From 0e7bbcc104baaade4f64205e9706b7d43c46db7d Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Wed, 27 Jul 2016 09:56:50 +0300 Subject: [PATCH 0001/1715] neigh: allow admin to set NUD_STALE Admin should be able to set any state. Currently, this fails when lladdr is not changed and state is changed from NUD_CONNECTED to NUD_STALE: ip neigh add 192.168.8.1 lladdr 00:11:22:33:44:55 nud perm dev wlan0 ip neigh show to 192.168.8.1 192.168.8.1 dev wlan0 lladdr 00:11:22:33:44:55 PERMANENT ip neigh change 192.168.8.1 lladdr 00:11:22:33:44:55 nud stale dev wlan0 ip neigh show to 192.168.8.1 192.168.8.1 dev wlan0 lladdr 00:11:22:33:44:55 PERMANENT Problem may be from 2.1.X days. Signed-off-by: Julian Anastasov Reviewed-by: Chunhui He Signed-off-by: David S. Miller --- net/core/neighbour.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index cf26e04c4046..2ae929f9bd06 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1148,7 +1148,8 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, } else goto out; } else { - if (lladdr == neigh->ha && new == NUD_STALE) + if (lladdr == neigh->ha && new == NUD_STALE && + !(flags & NEIGH_UPDATE_F_ADMIN)) new = old; } } From 841f60fcc4e7986a4ef3f83a289ab47076872e42 Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Wed, 27 Jul 2016 05:26:15 -0400 Subject: [PATCH 0002/1715] be2net: clear vlan-promisc setting before programming the vlan list The Lancer FW has a bug due to which in some cases vlan-promisc setting is cleared eventhough the vlan-list programming did not succeed (via VLAN_CONFIG) cmd. The driver has no way of knowing if the vlan-promisc mode was cleared or not when this cmd fails. To work around this issue, this patch first explicitly clears the vlan-promisc mode via RX_FILTER cmd and then tries to program the vlan list. Signed-off-by: Somnath Kotur Signed-off-by: Sathya Perla Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 874c7539a79d..a97fc8a80fac 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -1427,6 +1427,11 @@ static int be_vid_config(struct be_adapter *adapter) if (adapter->vlans_added > be_max_vlans(adapter)) return be_set_vlan_promisc(adapter); + if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) { + status = be_clear_vlan_promisc(adapter); + if (status) + return status; + } /* Construct VLAN Table to give to HW */ for_each_set_bit(i, adapter->vids, VLAN_N_VID) vids[num++] = cpu_to_le16(i); @@ -1439,8 +1444,6 @@ static int be_vid_config(struct be_adapter *adapter) addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES) return be_set_vlan_promisc(adapter); - } else if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) { - status = be_clear_vlan_promisc(adapter); } return status; } From 0aff1fbfe72e47412e3213648e972c339af30e4e Mon Sep 17 00:00:00 2001 From: Sathya Perla Date: Wed, 27 Jul 2016 05:26:16 -0400 Subject: [PATCH 0003/1715] be2net: do not remove vids from driver table if be_vid_config() fails. The driver currently removes a new vid from the adapter->vids[] array if be_vid_config() returns an error, which occurs when there is an error in HW/FW. This is wrong. After the HW/FW error is recovered from, we need the complete vids[] array to re-program the vlan list. Signed-off-by: Sathya Perla Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index a97fc8a80fac..243d43bfd975 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -1463,13 +1463,7 @@ static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid) set_bit(vid, adapter->vids); adapter->vlans_added++; - status = be_vid_config(adapter); - if (status) { - adapter->vlans_added--; - clear_bit(vid, adapter->vids); - } - - return status; + return be_vid_config(adapter); } static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid) From 92fbb1df83ec17f62a46b23507ebb3f06ca10cd3 Mon Sep 17 00:00:00 2001 From: Sriharsha Basavapatna Date: Wed, 27 Jul 2016 05:26:17 -0400 Subject: [PATCH 0004/1715] be2net: Avoid unnecessary firmware updates of multicast list Eachtime the ndo_set_rx_mode() routine is called, the driver programs the multicast list in the adapter without checking if there are any changes to the list. This leads to a flood of RX_FILTER cmds when a number of vlan interfaces are configured over the device, as the ndo_ gets called for each vlan interface. To avoid this, we now use __dev_mc_sync() and __dev_uc_sync() API, but only to detect if there is a change in the mc/uc lists. Now that we use this API, the code has to be-designed to issue these API calls for each invocation of the be_set_rx_mode() call. Signed-off-by: Sriharsha Basavapatna Signed-off-by: Sathya Perla Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be.h | 2 + drivers/net/ethernet/emulex/benet/be_main.c | 169 +++++++++++++++----- 2 files changed, 130 insertions(+), 41 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index 4555e041ef69..68687127e4fd 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -573,6 +573,8 @@ struct be_adapter { u32 uc_macs; /* Count of secondary UC MAC programmed */ unsigned long vids[BITS_TO_LONGS(VLAN_N_VID)]; u16 vlans_added; + bool update_uc_list; + bool update_mc_list; u32 beacon_state; /* for set_phys_id */ diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 243d43bfd975..03f6f6db2d3c 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -1420,8 +1420,8 @@ static int be_vid_config(struct be_adapter *adapter) u16 num = 0, i = 0; int status = 0; - /* No need to further configure vids if in promiscuous mode */ - if (be_in_all_promisc(adapter)) + /* No need to change the VLAN state if the I/F is in promiscuous */ + if (adapter->netdev->flags & IFF_PROMISC) return 0; if (adapter->vlans_added > be_max_vlans(adapter)) @@ -1483,12 +1483,6 @@ static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid) return be_vid_config(adapter); } -static void be_clear_all_promisc(struct be_adapter *adapter) -{ - be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, OFF); - adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS; -} - static void be_set_all_promisc(struct be_adapter *adapter) { be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON); @@ -1507,42 +1501,144 @@ static void be_set_mc_promisc(struct be_adapter *adapter) adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS; } -static void be_set_mc_list(struct be_adapter *adapter) +static void be_set_uc_promisc(struct be_adapter *adapter) { int status; - status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON); + if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) + return; + + status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON); if (!status) - adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS; - else + adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS; +} + +static void be_clear_uc_promisc(struct be_adapter *adapter) +{ + int status; + + if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)) + return; + + status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF); + if (!status) + adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS; +} + +/* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync(). + * We use a single callback function for both sync and unsync. We really don't + * add/remove addresses through this callback. But, we use it to detect changes + * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode(). + */ +static int be_uc_list_update(struct net_device *netdev, + const unsigned char *addr) +{ + struct be_adapter *adapter = netdev_priv(netdev); + + adapter->update_uc_list = true; + return 0; +} + +static int be_mc_list_update(struct net_device *netdev, + const unsigned char *addr) +{ + struct be_adapter *adapter = netdev_priv(netdev); + + adapter->update_mc_list = true; + return 0; +} + +static void be_set_mc_list(struct be_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + bool mc_promisc = false; + int status; + + __dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update); + + if (netdev->flags & IFF_PROMISC) { + adapter->update_mc_list = false; + } else if (netdev->flags & IFF_ALLMULTI || + netdev_mc_count(netdev) > be_max_mc(adapter)) { + /* Enable multicast promisc if num configured exceeds + * what we support + */ + mc_promisc = true; + adapter->update_mc_list = false; + } else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) { + /* Update mc-list unconditionally if the iface was previously + * in mc-promisc mode and now is out of that mode. + */ + adapter->update_mc_list = true; + } + + if (mc_promisc) { be_set_mc_promisc(adapter); + } else if (adapter->update_mc_list) { + status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON); + if (!status) + adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS; + else + be_set_mc_promisc(adapter); + + adapter->update_mc_list = false; + } +} + +static void be_clear_mc_list(struct be_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + + __dev_mc_unsync(netdev, NULL); + be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF); } static void be_set_uc_list(struct be_adapter *adapter) { + struct net_device *netdev = adapter->netdev; struct netdev_hw_addr *ha; + bool uc_promisc = false; int i = 1; /* First slot is claimed by the Primary MAC */ - for (; adapter->uc_macs > 0; adapter->uc_macs--, i++) - be_cmd_pmac_del(adapter, adapter->if_handle, - adapter->pmac_id[i], 0); + __dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update); - if (netdev_uc_count(adapter->netdev) > be_max_uc(adapter)) { - be_set_all_promisc(adapter); - return; + if (netdev->flags & IFF_PROMISC) { + adapter->update_uc_list = false; + } else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) { + uc_promisc = true; + adapter->update_uc_list = false; + } else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) { + /* Update uc-list unconditionally if the iface was previously + * in uc-promisc mode and now is out of that mode. + */ + adapter->update_uc_list = true; } - netdev_for_each_uc_addr(ha, adapter->netdev) { - adapter->uc_macs++; /* First slot is for Primary MAC */ - be_cmd_pmac_add(adapter, (u8 *)ha->addr, adapter->if_handle, - &adapter->pmac_id[adapter->uc_macs], 0); + if (uc_promisc) { + be_set_uc_promisc(adapter); + } else if (adapter->update_uc_list) { + be_clear_uc_promisc(adapter); + + for (; adapter->uc_macs > 0; adapter->uc_macs--, i++) + be_cmd_pmac_del(adapter, adapter->if_handle, + adapter->pmac_id[i], 0); + + netdev_for_each_uc_addr(ha, adapter->netdev) { + adapter->uc_macs++; /* First slot is for Primary MAC */ + be_cmd_pmac_add(adapter, + (u8 *)ha->addr, adapter->if_handle, + &adapter->pmac_id[adapter->uc_macs], 0); + } + adapter->update_uc_list = false; } } static void be_clear_uc_list(struct be_adapter *adapter) { + struct net_device *netdev = adapter->netdev; int i; + __dev_uc_unsync(netdev, NULL); for (i = 1; i < (adapter->uc_macs + 1); i++) be_cmd_pmac_del(adapter, adapter->if_handle, adapter->pmac_id[i], 0); @@ -1554,27 +1650,17 @@ static void be_set_rx_mode(struct net_device *netdev) struct be_adapter *adapter = netdev_priv(netdev); if (netdev->flags & IFF_PROMISC) { - be_set_all_promisc(adapter); - return; + if (!be_in_all_promisc(adapter)) + be_set_all_promisc(adapter); + } else if (be_in_all_promisc(adapter)) { + /* We need to re-program the vlan-list or clear + * vlan-promisc mode (if needed) when the interface + * comes out of promisc mode. + */ + be_vid_config(adapter); } - /* Interface was previously in promiscuous mode; disable it */ - if (be_in_all_promisc(adapter)) { - be_clear_all_promisc(adapter); - if (adapter->vlans_added) - be_vid_config(adapter); - } - - /* Enable multicast promisc if num configured exceeds what we support */ - if (netdev->flags & IFF_ALLMULTI || - netdev_mc_count(netdev) > be_max_mc(adapter)) { - be_set_mc_promisc(adapter); - return; - } - - if (netdev_uc_count(netdev) != adapter->uc_macs) - be_set_uc_list(adapter); - + be_set_uc_list(adapter); be_set_mc_list(adapter); } @@ -3426,6 +3512,7 @@ static void be_disable_if_filters(struct be_adapter *adapter) adapter->pmac_id[0], 0); be_clear_uc_list(adapter); + be_clear_mc_list(adapter); /* The IFACE flags are enabled in the open path and cleared * in the close path. When a VF gets detached from the host and From b71724147e7307d9d6f89b3f60b92375b304e181 Mon Sep 17 00:00:00 2001 From: Sathya Perla Date: Wed, 27 Jul 2016 05:26:18 -0400 Subject: [PATCH 0005/1715] be2net: replace polling with sleeping in the FW completion path The ndo_set_rx_mode() and ndo_add/del_vxlan_port() calls may be called with BHs disabled. The driver currently issues the required cmds to the FW in these contexts and polls on completions from the FW, while BHs remain disabled. This can cause either packet loss or packet reception to be delayed on that CPU. This patch defers processing of the above cmds to a separate workqueue. With this change, FW cmds are now issued only in process context. Now that the FW cmds are issued only in process context, they can sleep waiting for a completion instead of polling. All the spin_lock_bh(mcc_lock) calls are now replaced with mutex calls. Also a new rx_filter_lock is now needed to protect the RX filtering fields like vids[] between be_vlan_add/rem_vid() and __be_set_rx_mode() contexts. Signed-off-by: Sathya Perla Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be.h | 19 +- drivers/net/ethernet/emulex/benet/be_cmds.c | 202 +++++++-------- drivers/net/ethernet/emulex/benet/be_main.c | 264 +++++++++++++++----- 3 files changed, 327 insertions(+), 158 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index 68687127e4fd..86780b5c40ef 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -508,6 +508,10 @@ struct be_wrb_params { u16 lso_mss; /* MSS for LSO */ }; +struct be_eth_addr { + unsigned char mac[ETH_ALEN]; +}; + struct be_adapter { struct pci_dev *pdev; struct net_device *netdev; @@ -523,7 +527,7 @@ struct be_adapter { struct be_dma_mem mbox_mem_alloced; struct be_mcc_obj mcc_obj; - spinlock_t mcc_lock; /* For serializing mcc cmds to BE card */ + struct mutex mcc_lock; /* For serializing mcc cmds to BE card */ spinlock_t mcc_cq_lock; u16 cfg_num_rx_irqs; /* configured via set-channels */ @@ -570,11 +574,15 @@ struct be_adapter { int if_handle; /* Used to configure filtering */ u32 if_flags; /* Interface filtering flags */ u32 *pmac_id; /* MAC addr handle used by BE card */ + struct be_eth_addr *uc_list;/* list of uc-addrs programmed (not perm) */ u32 uc_macs; /* Count of secondary UC MAC programmed */ + struct be_eth_addr *mc_list;/* list of mcast addrs programmed */ + u32 mc_count; unsigned long vids[BITS_TO_LONGS(VLAN_N_VID)]; u16 vlans_added; bool update_uc_list; bool update_mc_list; + struct mutex rx_filter_lock;/* For protecting vids[] & mc/uc_list[] */ u32 beacon_state; /* for set_phys_id */ @@ -628,6 +636,15 @@ struct be_adapter { u8 phy_state; /* state of sfp optics (functional, faulted, etc.,) */ }; +/* Used for defered FW config cmds. Add fields to this struct as reqd */ +struct be_cmd_work { + struct work_struct work; + struct be_adapter *adapter; + union { + __be16 vxlan_port; + } info; +}; + #define be_physfn(adapter) (!adapter->virtfn) #define be_virtfn(adapter) (adapter->virtfn) #define sriov_enabled(adapter) (adapter->flags & \ diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 2cc11756859f..fa11a5a8c354 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -571,7 +571,7 @@ int be_process_mcc(struct be_adapter *adapter) /* Wait till no more pending mcc requests are present */ static int be_mcc_wait_compl(struct be_adapter *adapter) { -#define mcc_timeout 120000 /* 12s timeout */ +#define mcc_timeout 12000 /* 12s timeout */ int i, status = 0; struct be_mcc_obj *mcc_obj = &adapter->mcc_obj; @@ -585,7 +585,7 @@ static int be_mcc_wait_compl(struct be_adapter *adapter) if (atomic_read(&mcc_obj->q.used) == 0) break; - udelay(100); + usleep_range(500, 1000); } if (i == mcc_timeout) { dev_err(&adapter->pdev->dev, "FW not responding\n"); @@ -863,7 +863,7 @@ static bool use_mcc(struct be_adapter *adapter) static int be_cmd_lock(struct be_adapter *adapter) { if (use_mcc(adapter)) { - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); return 0; } else { return mutex_lock_interruptible(&adapter->mbox_lock); @@ -874,7 +874,7 @@ static int be_cmd_lock(struct be_adapter *adapter) static void be_cmd_unlock(struct be_adapter *adapter) { if (use_mcc(adapter)) - spin_unlock_bh(&adapter->mcc_lock); + return mutex_unlock(&adapter->mcc_lock); else return mutex_unlock(&adapter->mbox_lock); } @@ -1044,7 +1044,7 @@ int be_cmd_mac_addr_query(struct be_adapter *adapter, u8 *mac_addr, struct be_cmd_req_mac_query *req; int status; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1073,7 +1073,7 @@ int be_cmd_mac_addr_query(struct be_adapter *adapter, u8 *mac_addr, } err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -1085,7 +1085,7 @@ int be_cmd_pmac_add(struct be_adapter *adapter, u8 *mac_addr, struct be_cmd_req_pmac_add *req; int status; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1110,7 +1110,7 @@ int be_cmd_pmac_add(struct be_adapter *adapter, u8 *mac_addr, } err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); if (status == MCC_STATUS_UNAUTHORIZED_REQUEST) status = -EPERM; @@ -1128,7 +1128,7 @@ int be_cmd_pmac_del(struct be_adapter *adapter, u32 if_id, int pmac_id, u32 dom) if (pmac_id == -1) return 0; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1148,7 +1148,7 @@ int be_cmd_pmac_del(struct be_adapter *adapter, u32 if_id, int pmac_id, u32 dom) status = be_mcc_notify_wait(adapter); err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -1411,7 +1411,7 @@ int be_cmd_rxq_create(struct be_adapter *adapter, struct be_dma_mem *q_mem = &rxq->dma_mem; int status; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1441,7 +1441,7 @@ int be_cmd_rxq_create(struct be_adapter *adapter, } err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -1505,7 +1505,7 @@ int be_cmd_rxq_destroy(struct be_adapter *adapter, struct be_queue_info *q) struct be_cmd_req_q_destroy *req; int status; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1522,7 +1522,7 @@ int be_cmd_rxq_destroy(struct be_adapter *adapter, struct be_queue_info *q) q->created = false; err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -1590,7 +1590,7 @@ int be_cmd_get_stats(struct be_adapter *adapter, struct be_dma_mem *nonemb_cmd) struct be_cmd_req_hdr *hdr; int status = 0; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1618,7 +1618,7 @@ int be_cmd_get_stats(struct be_adapter *adapter, struct be_dma_mem *nonemb_cmd) adapter->stats_cmd_sent = true; err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -1634,7 +1634,7 @@ int lancer_cmd_get_pport_stats(struct be_adapter *adapter, CMD_SUBSYSTEM_ETH)) return -EPERM; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1657,7 +1657,7 @@ int lancer_cmd_get_pport_stats(struct be_adapter *adapter, adapter->stats_cmd_sent = true; err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -1694,7 +1694,7 @@ int be_cmd_link_status_query(struct be_adapter *adapter, u16 *link_speed, struct be_cmd_req_link_status *req; int status; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); if (link_status) *link_status = LINK_DOWN; @@ -1733,7 +1733,7 @@ int be_cmd_link_status_query(struct be_adapter *adapter, u16 *link_speed, } err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -1744,7 +1744,7 @@ int be_cmd_get_die_temperature(struct be_adapter *adapter) struct be_cmd_req_get_cntl_addnl_attribs *req; int status = 0; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1759,7 +1759,7 @@ int be_cmd_get_die_temperature(struct be_adapter *adapter) status = be_mcc_notify(adapter); err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -1808,7 +1808,7 @@ int be_cmd_get_fat_dump(struct be_adapter *adapter, u32 buf_len, void *buf) if (!get_fat_cmd.va) return -ENOMEM; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); while (total_size) { buf_size = min(total_size, (u32)60*1024); @@ -1848,7 +1848,7 @@ int be_cmd_get_fat_dump(struct be_adapter *adapter, u32 buf_len, void *buf) err: dma_free_coherent(&adapter->pdev->dev, get_fat_cmd.size, get_fat_cmd.va, get_fat_cmd.dma); - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -1859,7 +1859,7 @@ int be_cmd_get_fw_ver(struct be_adapter *adapter) struct be_cmd_req_get_fw_version *req; int status; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1882,7 +1882,7 @@ int be_cmd_get_fw_ver(struct be_adapter *adapter) sizeof(adapter->fw_on_flash)); } err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -1896,7 +1896,7 @@ static int __be_cmd_modify_eqd(struct be_adapter *adapter, struct be_cmd_req_modify_eq_delay *req; int status = 0, i; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1919,7 +1919,7 @@ static int __be_cmd_modify_eqd(struct be_adapter *adapter, status = be_mcc_notify(adapter); err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -1946,7 +1946,7 @@ int be_cmd_vlan_config(struct be_adapter *adapter, u32 if_id, u16 *vtag_array, struct be_cmd_req_vlan_config *req; int status; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1968,7 +1968,7 @@ int be_cmd_vlan_config(struct be_adapter *adapter, u32 if_id, u16 *vtag_array, status = be_mcc_notify_wait(adapter); err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -1979,7 +1979,7 @@ static int __be_cmd_rx_filter(struct be_adapter *adapter, u32 flags, u32 value) struct be_cmd_req_rx_filter *req = mem->va; int status; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1996,8 +1996,7 @@ static int __be_cmd_rx_filter(struct be_adapter *adapter, u32 flags, u32 value) req->if_flags = (value == ON) ? req->if_flags_mask : 0; if (flags & BE_IF_FLAGS_MULTICAST) { - struct netdev_hw_addr *ha; - int i = 0; + int i; /* Reset mcast promisc mode if already set by setting mask * and not setting flags field @@ -2005,14 +2004,15 @@ static int __be_cmd_rx_filter(struct be_adapter *adapter, u32 flags, u32 value) req->if_flags_mask |= cpu_to_le32(BE_IF_FLAGS_MCAST_PROMISCUOUS & be_if_cap_flags(adapter)); - req->mcast_num = cpu_to_le32(netdev_mc_count(adapter->netdev)); - netdev_for_each_mc_addr(ha, adapter->netdev) - memcpy(req->mcast_mac[i++].byte, ha->addr, ETH_ALEN); + req->mcast_num = cpu_to_le32(adapter->mc_count); + for (i = 0; i < adapter->mc_count; i++) + ether_addr_copy(req->mcast_mac[i].byte, + adapter->mc_list[i].mac); } status = be_mcc_notify_wait(adapter); err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -2043,7 +2043,7 @@ int be_cmd_set_flow_control(struct be_adapter *adapter, u32 tx_fc, u32 rx_fc) CMD_SUBSYSTEM_COMMON)) return -EPERM; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -2063,7 +2063,7 @@ int be_cmd_set_flow_control(struct be_adapter *adapter, u32 tx_fc, u32 rx_fc) status = be_mcc_notify_wait(adapter); err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); if (base_status(status) == MCC_STATUS_FEATURE_NOT_SUPPORTED) return -EOPNOTSUPP; @@ -2082,7 +2082,7 @@ int be_cmd_get_flow_control(struct be_adapter *adapter, u32 *tx_fc, u32 *rx_fc) CMD_SUBSYSTEM_COMMON)) return -EPERM; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -2105,7 +2105,7 @@ int be_cmd_get_flow_control(struct be_adapter *adapter, u32 *tx_fc, u32 *rx_fc) } err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -2186,7 +2186,7 @@ int be_cmd_rss_config(struct be_adapter *adapter, u8 *rsstable, if (!(be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS)) return 0; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -2211,7 +2211,7 @@ int be_cmd_rss_config(struct be_adapter *adapter, u8 *rsstable, status = be_mcc_notify_wait(adapter); err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -2223,7 +2223,7 @@ int be_cmd_set_beacon_state(struct be_adapter *adapter, u8 port_num, struct be_cmd_req_enable_disable_beacon *req; int status; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -2244,7 +2244,7 @@ int be_cmd_set_beacon_state(struct be_adapter *adapter, u8 port_num, status = be_mcc_notify_wait(adapter); err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -2255,7 +2255,7 @@ int be_cmd_get_beacon_state(struct be_adapter *adapter, u8 port_num, u32 *state) struct be_cmd_req_get_beacon_state *req; int status; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -2279,7 +2279,7 @@ int be_cmd_get_beacon_state(struct be_adapter *adapter, u8 port_num, u32 *state) } err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -2303,7 +2303,7 @@ int be_cmd_read_port_transceiver_data(struct be_adapter *adapter, return -ENOMEM; } - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -2325,7 +2325,7 @@ int be_cmd_read_port_transceiver_data(struct be_adapter *adapter, memcpy(data, resp->page_data, PAGE_DATA_LEN); } err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va, cmd.dma); return status; } @@ -2342,7 +2342,7 @@ static int lancer_cmd_write_object(struct be_adapter *adapter, void *ctxt = NULL; int status; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); adapter->flash_status = 0; wrb = wrb_from_mccq(adapter); @@ -2384,7 +2384,7 @@ static int lancer_cmd_write_object(struct be_adapter *adapter, if (status) goto err_unlock; - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); if (!wait_for_completion_timeout(&adapter->et_cmd_compl, msecs_to_jiffies(60000))) @@ -2403,7 +2403,7 @@ static int lancer_cmd_write_object(struct be_adapter *adapter, return status; err_unlock: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -2457,7 +2457,7 @@ static int lancer_cmd_delete_object(struct be_adapter *adapter, struct be_mcc_wrb *wrb; int status; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -2475,7 +2475,7 @@ static int lancer_cmd_delete_object(struct be_adapter *adapter, status = be_mcc_notify_wait(adapter); err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -2488,7 +2488,7 @@ int lancer_cmd_read_object(struct be_adapter *adapter, struct be_dma_mem *cmd, struct lancer_cmd_resp_read_object *resp; int status; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -2522,7 +2522,7 @@ int lancer_cmd_read_object(struct be_adapter *adapter, struct be_dma_mem *cmd, } err_unlock: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -2534,7 +2534,7 @@ static int be_cmd_write_flashrom(struct be_adapter *adapter, struct be_cmd_write_flashrom *req; int status; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); adapter->flash_status = 0; wrb = wrb_from_mccq(adapter); @@ -2559,7 +2559,7 @@ static int be_cmd_write_flashrom(struct be_adapter *adapter, if (status) goto err_unlock; - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); if (!wait_for_completion_timeout(&adapter->et_cmd_compl, msecs_to_jiffies(40000))) @@ -2570,7 +2570,7 @@ static int be_cmd_write_flashrom(struct be_adapter *adapter, return status; err_unlock: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -2581,7 +2581,7 @@ static int be_cmd_get_flash_crc(struct be_adapter *adapter, u8 *flashed_crc, struct be_mcc_wrb *wrb; int status; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -2608,7 +2608,7 @@ static int be_cmd_get_flash_crc(struct be_adapter *adapter, u8 *flashed_crc, memcpy(flashed_crc, req->crc, 4); err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -3192,7 +3192,7 @@ int be_cmd_enable_magic_wol(struct be_adapter *adapter, u8 *mac, struct be_cmd_req_acpi_wol_magic_config *req; int status; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3209,7 +3209,7 @@ int be_cmd_enable_magic_wol(struct be_adapter *adapter, u8 *mac, status = be_mcc_notify_wait(adapter); err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -3224,7 +3224,7 @@ int be_cmd_set_loopback(struct be_adapter *adapter, u8 port_num, CMD_SUBSYSTEM_LOWLEVEL)) return -EPERM; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3247,7 +3247,7 @@ int be_cmd_set_loopback(struct be_adapter *adapter, u8 port_num, if (status) goto err_unlock; - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); if (!wait_for_completion_timeout(&adapter->et_cmd_compl, msecs_to_jiffies(SET_LB_MODE_TIMEOUT))) @@ -3256,7 +3256,7 @@ int be_cmd_set_loopback(struct be_adapter *adapter, u8 port_num, return status; err_unlock: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -3273,7 +3273,7 @@ int be_cmd_loopback_test(struct be_adapter *adapter, u32 port_num, CMD_SUBSYSTEM_LOWLEVEL)) return -EPERM; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3299,7 +3299,7 @@ int be_cmd_loopback_test(struct be_adapter *adapter, u32 port_num, if (status) goto err; - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); wait_for_completion(&adapter->et_cmd_compl); resp = embedded_payload(wrb); @@ -3307,7 +3307,7 @@ int be_cmd_loopback_test(struct be_adapter *adapter, u32 port_num, return status; err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -3323,7 +3323,7 @@ int be_cmd_ddr_dma_test(struct be_adapter *adapter, u64 pattern, CMD_SUBSYSTEM_LOWLEVEL)) return -EPERM; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3357,7 +3357,7 @@ int be_cmd_ddr_dma_test(struct be_adapter *adapter, u64 pattern, } err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -3368,7 +3368,7 @@ int be_cmd_get_seeprom_data(struct be_adapter *adapter, struct be_cmd_req_seeprom_read *req; int status; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3384,7 +3384,7 @@ int be_cmd_get_seeprom_data(struct be_adapter *adapter, status = be_mcc_notify_wait(adapter); err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -3399,7 +3399,7 @@ int be_cmd_get_phy_info(struct be_adapter *adapter) CMD_SUBSYSTEM_COMMON)) return -EPERM; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3444,7 +3444,7 @@ int be_cmd_get_phy_info(struct be_adapter *adapter) } dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va, cmd.dma); err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -3454,7 +3454,7 @@ static int be_cmd_set_qos(struct be_adapter *adapter, u32 bps, u32 domain) struct be_cmd_req_set_qos *req; int status; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3474,7 +3474,7 @@ static int be_cmd_set_qos(struct be_adapter *adapter, u32 bps, u32 domain) status = be_mcc_notify_wait(adapter); err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -3581,7 +3581,7 @@ int be_cmd_get_fn_privileges(struct be_adapter *adapter, u32 *privilege, struct be_cmd_req_get_fn_privileges *req; int status; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3613,7 +3613,7 @@ int be_cmd_get_fn_privileges(struct be_adapter *adapter, u32 *privilege, } err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -3625,7 +3625,7 @@ int be_cmd_set_fn_privileges(struct be_adapter *adapter, u32 privileges, struct be_cmd_req_set_fn_privileges *req; int status; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3645,7 +3645,7 @@ int be_cmd_set_fn_privileges(struct be_adapter *adapter, u32 privileges, status = be_mcc_notify_wait(adapter); err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -3677,7 +3677,7 @@ int be_cmd_get_mac_from_list(struct be_adapter *adapter, u8 *mac, return -ENOMEM; } - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3741,7 +3741,7 @@ int be_cmd_get_mac_from_list(struct be_adapter *adapter, u8 *mac, } out: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); dma_free_coherent(&adapter->pdev->dev, get_mac_list_cmd.size, get_mac_list_cmd.va, get_mac_list_cmd.dma); return status; @@ -3801,7 +3801,7 @@ int be_cmd_set_mac_list(struct be_adapter *adapter, u8 *mac_array, if (!cmd.va) return -ENOMEM; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3823,7 +3823,7 @@ int be_cmd_set_mac_list(struct be_adapter *adapter, u8 *mac_array, err: dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va, cmd.dma); - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -3859,7 +3859,7 @@ int be_cmd_set_hsw_config(struct be_adapter *adapter, u16 pvid, CMD_SUBSYSTEM_COMMON)) return -EPERM; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3900,7 +3900,7 @@ int be_cmd_set_hsw_config(struct be_adapter *adapter, u16 pvid, status = be_mcc_notify_wait(adapter); err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -3914,7 +3914,7 @@ int be_cmd_get_hsw_config(struct be_adapter *adapter, u16 *pvid, int status; u16 vid; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3961,7 +3961,7 @@ int be_cmd_get_hsw_config(struct be_adapter *adapter, u16 *pvid, } err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -4156,7 +4156,7 @@ int be_cmd_set_ext_fat_capabilites(struct be_adapter *adapter, struct be_cmd_req_set_ext_fat_caps *req; int status; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -4172,7 +4172,7 @@ int be_cmd_set_ext_fat_capabilites(struct be_adapter *adapter, status = be_mcc_notify_wait(adapter); err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -4650,7 +4650,7 @@ int be_cmd_manage_iface(struct be_adapter *adapter, u32 iface, u8 op) if (iface == 0xFFFFFFFF) return -1; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -4667,7 +4667,7 @@ int be_cmd_manage_iface(struct be_adapter *adapter, u32 iface, u8 op) status = be_mcc_notify_wait(adapter); err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -4701,7 +4701,7 @@ int be_cmd_get_if_id(struct be_adapter *adapter, struct be_vf_cfg *vf_cfg, struct be_cmd_resp_get_iface_list *resp; int status; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -4722,7 +4722,7 @@ int be_cmd_get_if_id(struct be_adapter *adapter, struct be_vf_cfg *vf_cfg, } err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -4816,7 +4816,7 @@ int be_cmd_enable_vf(struct be_adapter *adapter, u8 domain) if (BEx_chip(adapter)) return 0; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -4834,7 +4834,7 @@ int be_cmd_enable_vf(struct be_adapter *adapter, u8 domain) req->enable = 1; status = be_mcc_notify_wait(adapter); err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -4905,7 +4905,7 @@ int __be_cmd_set_logical_link_config(struct be_adapter *adapter, struct be_cmd_req_set_ll_link *req; int status; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -4931,7 +4931,7 @@ int __be_cmd_set_logical_link_config(struct be_adapter *adapter, status = be_mcc_notify_wait(adapter); err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -4964,7 +4964,7 @@ int be_roce_mcc_cmd(void *netdev_handle, void *wrb_payload, struct be_cmd_resp_hdr *resp; int status; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -4987,7 +4987,7 @@ int be_roce_mcc_cmd(void *netdev_handle, void *wrb_payload, memcpy(wrb_payload, resp, sizeof(*resp) + resp->response_length); be_dws_le_to_cpu(wrb_payload, sizeof(*resp) + resp->response_length); err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } EXPORT_SYMBOL(be_roce_mcc_cmd); diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 03f6f6db2d3c..f7584d4139ff 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -53,6 +53,10 @@ static const struct pci_device_id be_dev_ids[] = { { 0 } }; MODULE_DEVICE_TABLE(pci, be_dev_ids); + +/* Workqueue used by all functions for defering cmd calls to the adapter */ +struct workqueue_struct *be_wq; + /* UE Status Low CSR */ static const char * const ue_status_low_desc[] = { "CEV", @@ -1453,34 +1457,45 @@ static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid) struct be_adapter *adapter = netdev_priv(netdev); int status = 0; + mutex_lock(&adapter->rx_filter_lock); + /* Packets with VID 0 are always received by Lancer by default */ if (lancer_chip(adapter) && vid == 0) - return status; + goto done; if (test_bit(vid, adapter->vids)) - return status; + goto done; set_bit(vid, adapter->vids); adapter->vlans_added++; - return be_vid_config(adapter); + status = be_vid_config(adapter); +done: + mutex_unlock(&adapter->rx_filter_lock); + return status; } static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid) { struct be_adapter *adapter = netdev_priv(netdev); + int status = 0; + + mutex_lock(&adapter->rx_filter_lock); /* Packets with VID 0 are always received by Lancer by default */ if (lancer_chip(adapter) && vid == 0) - return 0; + goto done; if (!test_bit(vid, adapter->vids)) - return 0; + goto done; clear_bit(vid, adapter->vids); adapter->vlans_added--; - return be_vid_config(adapter); + status = be_vid_config(adapter); +done: + mutex_unlock(&adapter->rx_filter_lock); + return status; } static void be_set_all_promisc(struct be_adapter *adapter) @@ -1551,9 +1566,11 @@ static int be_mc_list_update(struct net_device *netdev, static void be_set_mc_list(struct be_adapter *adapter) { struct net_device *netdev = adapter->netdev; + struct netdev_hw_addr *ha; bool mc_promisc = false; int status; + netif_addr_lock_bh(netdev); __dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update); if (netdev->flags & IFF_PROMISC) { @@ -1572,6 +1589,18 @@ static void be_set_mc_list(struct be_adapter *adapter) adapter->update_mc_list = true; } + if (adapter->update_mc_list) { + int i = 0; + + /* cache the mc-list in adapter */ + netdev_for_each_mc_addr(ha, netdev) { + ether_addr_copy(adapter->mc_list[i].mac, ha->addr); + i++; + } + adapter->mc_count = netdev_mc_count(netdev); + } + netif_addr_unlock_bh(netdev); + if (mc_promisc) { be_set_mc_promisc(adapter); } else if (adapter->update_mc_list) { @@ -1591,6 +1620,7 @@ static void be_clear_mc_list(struct be_adapter *adapter) __dev_mc_unsync(netdev, NULL); be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF); + adapter->mc_count = 0; } static void be_set_uc_list(struct be_adapter *adapter) @@ -1598,8 +1628,9 @@ static void be_set_uc_list(struct be_adapter *adapter) struct net_device *netdev = adapter->netdev; struct netdev_hw_addr *ha; bool uc_promisc = false; - int i = 1; /* First slot is claimed by the Primary MAC */ + int curr_uc_macs = 0, i; + netif_addr_lock_bh(netdev); __dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update); if (netdev->flags & IFF_PROMISC) { @@ -1614,21 +1645,32 @@ static void be_set_uc_list(struct be_adapter *adapter) adapter->update_uc_list = true; } + if (adapter->update_uc_list) { + i = 1; /* First slot is claimed by the Primary MAC */ + + /* cache the uc-list in adapter array */ + netdev_for_each_uc_addr(ha, netdev) { + ether_addr_copy(adapter->uc_list[i].mac, ha->addr); + i++; + } + curr_uc_macs = netdev_uc_count(netdev); + } + netif_addr_unlock_bh(netdev); + if (uc_promisc) { be_set_uc_promisc(adapter); } else if (adapter->update_uc_list) { be_clear_uc_promisc(adapter); - for (; adapter->uc_macs > 0; adapter->uc_macs--, i++) + for (i = 0; i < adapter->uc_macs; i++) be_cmd_pmac_del(adapter, adapter->if_handle, - adapter->pmac_id[i], 0); + adapter->pmac_id[i + 1], 0); - netdev_for_each_uc_addr(ha, adapter->netdev) { - adapter->uc_macs++; /* First slot is for Primary MAC */ - be_cmd_pmac_add(adapter, - (u8 *)ha->addr, adapter->if_handle, - &adapter->pmac_id[adapter->uc_macs], 0); - } + for (i = 0; i < curr_uc_macs; i++) + be_cmd_pmac_add(adapter, adapter->uc_list[i].mac, + adapter->if_handle, + &adapter->pmac_id[i + 1], 0); + adapter->uc_macs = curr_uc_macs; adapter->update_uc_list = false; } } @@ -1639,15 +1681,17 @@ static void be_clear_uc_list(struct be_adapter *adapter) int i; __dev_uc_unsync(netdev, NULL); - for (i = 1; i < (adapter->uc_macs + 1); i++) + for (i = 0; i < adapter->uc_macs; i++) be_cmd_pmac_del(adapter, adapter->if_handle, - adapter->pmac_id[i], 0); + adapter->pmac_id[i + 1], 0); adapter->uc_macs = 0; } -static void be_set_rx_mode(struct net_device *netdev) +static void __be_set_rx_mode(struct be_adapter *adapter) { - struct be_adapter *adapter = netdev_priv(netdev); + struct net_device *netdev = adapter->netdev; + + mutex_lock(&adapter->rx_filter_lock); if (netdev->flags & IFF_PROMISC) { if (!be_in_all_promisc(adapter)) @@ -1662,6 +1706,17 @@ static void be_set_rx_mode(struct net_device *netdev) be_set_uc_list(adapter); be_set_mc_list(adapter); + + mutex_unlock(&adapter->rx_filter_lock); +} + +static void be_work_set_rx_mode(struct work_struct *work) +{ + struct be_cmd_work *cmd_work = + container_of(work, struct be_cmd_work, work); + + __be_set_rx_mode(cmd_work->adapter); + kfree(cmd_work); } static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac) @@ -3546,6 +3601,11 @@ static int be_close(struct net_device *netdev) if (!(adapter->flags & BE_FLAGS_SETUP_DONE)) return 0; + /* Before attempting cleanup ensure all the pending cmds in the + * config_wq have finished execution + */ + flush_workqueue(be_wq); + be_disable_if_filters(adapter); if (adapter->flags & BE_FLAGS_NAPI_ENABLED) { @@ -3670,7 +3730,7 @@ static int be_enable_if_filters(struct be_adapter *adapter) if (adapter->vlans_added) be_vid_config(adapter); - be_set_rx_mode(adapter->netdev); + __be_set_rx_mode(adapter); return 0; } @@ -3944,6 +4004,20 @@ static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs, vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1); } +static void be_if_destroy(struct be_adapter *adapter) +{ + be_cmd_if_destroy(adapter, adapter->if_handle, 0); + + kfree(adapter->pmac_id); + adapter->pmac_id = NULL; + + kfree(adapter->mc_list); + adapter->mc_list = NULL; + + kfree(adapter->uc_list); + adapter->uc_list = NULL; +} + static int be_clear(struct be_adapter *adapter) { struct pci_dev *pdev = adapter->pdev; @@ -3951,6 +4025,8 @@ static int be_clear(struct be_adapter *adapter) be_cancel_worker(adapter); + flush_workqueue(be_wq); + if (sriov_enabled(adapter)) be_vf_clear(adapter); @@ -3968,10 +4044,8 @@ static int be_clear(struct be_adapter *adapter) } be_disable_vxlan_offloads(adapter); - kfree(adapter->pmac_id); - adapter->pmac_id = NULL; - be_cmd_if_destroy(adapter, adapter->if_handle, 0); + be_if_destroy(adapter); be_clear_queues(adapter); @@ -4425,7 +4499,7 @@ static int be_mac_setup(struct be_adapter *adapter) static void be_schedule_worker(struct be_adapter *adapter) { - schedule_delayed_work(&adapter->work, msecs_to_jiffies(1000)); + queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000)); adapter->flags |= BE_FLAGS_WORKER_SCHEDULED; } @@ -4477,6 +4551,22 @@ static int be_if_create(struct be_adapter *adapter) u32 cap_flags = be_if_cap_flags(adapter); int status; + /* alloc required memory for other filtering fields */ + adapter->pmac_id = kcalloc(be_max_uc(adapter), + sizeof(*adapter->pmac_id), GFP_KERNEL); + if (!adapter->pmac_id) + return -ENOMEM; + + adapter->mc_list = kcalloc(be_max_mc(adapter), + sizeof(*adapter->mc_list), GFP_KERNEL); + if (!adapter->mc_list) + return -ENOMEM; + + adapter->uc_list = kcalloc(be_max_uc(adapter), + sizeof(*adapter->uc_list), GFP_KERNEL); + if (!adapter->uc_list) + return -ENOMEM; + if (adapter->cfg_num_rx_irqs == 1) cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS); @@ -4485,7 +4575,10 @@ static int be_if_create(struct be_adapter *adapter) status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags, &adapter->if_handle, 0); - return status; + if (status) + return status; + + return 0; } int be_update_queues(struct be_adapter *adapter) @@ -4614,11 +4707,6 @@ static int be_setup(struct be_adapter *adapter) if (status) goto err; - adapter->pmac_id = kcalloc(be_max_uc(adapter), - sizeof(*adapter->pmac_id), GFP_KERNEL); - if (!adapter->pmac_id) - return -ENOMEM; - status = be_msix_enable(adapter); if (status) goto err; @@ -4812,6 +4900,23 @@ static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, 0, 0, nlflags, filter_mask, NULL); } +static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter, + void (*func)(struct work_struct *)) +{ + struct be_cmd_work *work; + + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) { + dev_err(&adapter->pdev->dev, + "be_work memory allocation failed\n"); + return NULL; + } + + INIT_WORK(&work->work, func); + work->adapter = adapter; + return work; +} + /* VxLAN offload Notes: * * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't @@ -4826,23 +4931,19 @@ static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, * adds more than one port, disable offloads and don't re-enable them again * until after all the tunnels are removed. */ -static void be_add_vxlan_port(struct net_device *netdev, - struct udp_tunnel_info *ti) +static void be_work_add_vxlan_port(struct work_struct *work) { - struct be_adapter *adapter = netdev_priv(netdev); + struct be_cmd_work *cmd_work = + container_of(work, struct be_cmd_work, work); + struct be_adapter *adapter = cmd_work->adapter; + struct net_device *netdev = adapter->netdev; struct device *dev = &adapter->pdev->dev; - __be16 port = ti->port; + __be16 port = cmd_work->info.vxlan_port; int status; - if (ti->type != UDP_TUNNEL_TYPE_VXLAN) - return; - - if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter)) - return; - if (adapter->vxlan_port == port && adapter->vxlan_port_count) { adapter->vxlan_port_aliases++; - return; + goto done; } if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) { @@ -4854,7 +4955,7 @@ static void be_add_vxlan_port(struct net_device *netdev, } if (adapter->vxlan_port_count++ >= 1) - return; + goto done; status = be_cmd_manage_iface(adapter, adapter->if_handle, OP_CONVERT_NORMAL_TO_TUNNEL); @@ -4879,29 +4980,26 @@ static void be_add_vxlan_port(struct net_device *netdev, dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n", be16_to_cpu(port)); - return; + goto done; err: be_disable_vxlan_offloads(adapter); +done: + kfree(cmd_work); } -static void be_del_vxlan_port(struct net_device *netdev, - struct udp_tunnel_info *ti) +static void be_work_del_vxlan_port(struct work_struct *work) { - struct be_adapter *adapter = netdev_priv(netdev); - __be16 port = ti->port; - - if (ti->type != UDP_TUNNEL_TYPE_VXLAN) - return; - - if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter)) - return; + struct be_cmd_work *cmd_work = + container_of(work, struct be_cmd_work, work); + struct be_adapter *adapter = cmd_work->adapter; + __be16 port = cmd_work->info.vxlan_port; if (adapter->vxlan_port != port) goto done; if (adapter->vxlan_port_aliases) { adapter->vxlan_port_aliases--; - return; + goto out; } be_disable_vxlan_offloads(adapter); @@ -4911,6 +5009,40 @@ static void be_del_vxlan_port(struct net_device *netdev, be16_to_cpu(port)); done: adapter->vxlan_port_count--; +out: + kfree(cmd_work); +} + +static void be_cfg_vxlan_port(struct net_device *netdev, + struct udp_tunnel_info *ti, + void (*func)(struct work_struct *)) +{ + struct be_adapter *adapter = netdev_priv(netdev); + struct be_cmd_work *cmd_work; + + if (ti->type != UDP_TUNNEL_TYPE_VXLAN) + return; + + if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter)) + return; + + cmd_work = be_alloc_work(adapter, func); + if (cmd_work) { + cmd_work->info.vxlan_port = ti->port; + queue_work(be_wq, &cmd_work->work); + } +} + +static void be_del_vxlan_port(struct net_device *netdev, + struct udp_tunnel_info *ti) +{ + be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port); +} + +static void be_add_vxlan_port(struct net_device *netdev, + struct udp_tunnel_info *ti) +{ + be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port); } static netdev_features_t be_features_check(struct sk_buff *skb, @@ -4975,6 +5107,16 @@ static int be_get_phys_port_id(struct net_device *dev, return 0; } +static void be_set_rx_mode(struct net_device *dev) +{ + struct be_adapter *adapter = netdev_priv(dev); + struct be_cmd_work *work; + + work = be_alloc_work(adapter, be_work_set_rx_mode); + if (work) + queue_work(be_wq, &work->work); +} + static const struct net_device_ops be_netdev_ops = { .ndo_open = be_open, .ndo_stop = be_close, @@ -5200,7 +5342,7 @@ static void be_worker(struct work_struct *work) reschedule: adapter->work_counter++; - schedule_delayed_work(&adapter->work, msecs_to_jiffies(1000)); + queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000)); } static void be_unmap_pci_bars(struct be_adapter *adapter) @@ -5340,7 +5482,8 @@ static int be_drv_init(struct be_adapter *adapter) } mutex_init(&adapter->mbox_lock); - spin_lock_init(&adapter->mcc_lock); + mutex_init(&adapter->mcc_lock); + mutex_init(&adapter->rx_filter_lock); spin_lock_init(&adapter->mcc_cq_lock); init_completion(&adapter->et_cmd_compl); @@ -5796,6 +5939,12 @@ static int __init be_init_module(void) pr_info(DRV_NAME " : Use sysfs method to enable VFs\n"); } + be_wq = create_singlethread_workqueue("be_wq"); + if (!be_wq) { + pr_warn(DRV_NAME "workqueue creation failed\n"); + return -1; + } + return pci_register_driver(&be_driver); } module_init(be_init_module); @@ -5803,5 +5952,8 @@ module_init(be_init_module); static void __exit be_exit_module(void) { pci_unregister_driver(&be_driver); + + if (be_wq) + destroy_workqueue(be_wq); } module_exit(be_exit_module); From 22fc5388721154649691cc4bff5fe77a973fa68e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 29 Jul 2016 11:30:37 +0200 Subject: [PATCH 0006/1715] net: ipconfig: Add device name to debug messages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This simplifies understanding what happens when there is more than one device. Signed-off-by: Uwe Kleine-König Signed-off-by: David S. Miller --- net/ipv4/ipconfig.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 1d71c40eaaf3..369e4a004850 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -666,14 +666,14 @@ static const u8 ic_bootp_cookie[4] = { 99, 130, 83, 99 }; #ifdef IPCONFIG_DHCP static void __init -ic_dhcp_init_options(u8 *options) +ic_dhcp_init_options(u8 *options, struct ic_device *d) { u8 mt = ((ic_servaddr == NONE) ? DHCPDISCOVER : DHCPREQUEST); u8 *e = options; int len; - pr_debug("DHCP: Sending message type %d\n", mt); + pr_debug("DHCP: Sending message type %d (%s)\n", mt, d->dev->name); memcpy(e, ic_bootp_cookie, 4); /* RFC1048 Magic Cookie */ e += 4; @@ -857,7 +857,7 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d /* add DHCP options or BOOTP extensions */ #ifdef IPCONFIG_DHCP if (ic_proto_enabled & IC_USE_DHCP) - ic_dhcp_init_options(b->exten); + ic_dhcp_init_options(b->exten, d); else #endif ic_bootp_init_ext(b->exten); @@ -1033,8 +1033,8 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str /* Is it a reply to our BOOTP request? */ if (b->op != BOOTP_REPLY || b->xid != d->xid) { - net_err_ratelimited("DHCP/BOOTP: Reply not for us, op[%x] xid[%x]\n", - b->op, b->xid); + net_err_ratelimited("DHCP/BOOTP: Reply not for us on %s, op[%x] xid[%x]\n", + d->dev->name, b->op, b->xid); goto drop_unlock; } @@ -1075,7 +1075,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str } } - pr_debug("DHCP: Got message type %d\n", mt); + pr_debug("DHCP: Got message type %d (%s)\n", mt, d->dev->name); switch (mt) { case DHCPOFFER: From 2647cffb2bc6fbed163d377390eb7ca552c7c1cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 29 Jul 2016 11:30:38 +0200 Subject: [PATCH 0007/1715] net: ipconfig: Support using "delayed" DHCP replies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The dhcp code only waits 1s between sending DHCP requests on different devices and only accepts an answer for the device that sent out the last request. Only the timeout at the end of a loop is increased iteratively which favours only the last device. This makes it impossible to work with a dhcp server that takes little more than 1s connected to a device that is not the last one. Instead of also increasing the inter-device timeout, teach the code to handle delayed replies. To accomplish that, make *ic_dev track the current ic_device instead of the current net_device and adapt all users accordingly. The relevant change then is to reset d to ic_dev on a reply to assert that the followup request goes through the right device. Signed-off-by: Uwe Kleine-König Signed-off-by: David S. Miller --- net/ipv4/ipconfig.c | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 369e4a004850..5af6736bd384 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -188,7 +188,7 @@ struct ic_device { }; static struct ic_device *ic_first_dev __initdata; /* List of open device */ -static struct net_device *ic_dev __initdata; /* Selected device */ +static struct ic_device *ic_dev __initdata; /* Selected device */ static bool __init ic_is_init_dev(struct net_device *dev) { @@ -307,7 +307,7 @@ static void __init ic_close_devs(void) while ((d = next)) { next = d->next; dev = d->dev; - if (dev != ic_dev && !netdev_uses_dsa(dev)) { + if (dev != ic_dev->dev && !netdev_uses_dsa(dev)) { pr_debug("IP-Config: Downing %s\n", dev->name); dev_change_flags(dev, d->flags); } @@ -372,7 +372,7 @@ static int __init ic_setup_if(void) int err; memset(&ir, 0, sizeof(ir)); - strcpy(ir.ifr_ifrn.ifrn_name, ic_dev->name); + strcpy(ir.ifr_ifrn.ifrn_name, ic_dev->dev->name); set_sockaddr(sin, ic_myaddr, 0); if ((err = ic_devinet_ioctl(SIOCSIFADDR, &ir)) < 0) { pr_err("IP-Config: Unable to set interface address (%d)\n", @@ -396,7 +396,7 @@ static int __init ic_setup_if(void) * out, we'll try to muddle along. */ if (ic_dev_mtu != 0) { - strcpy(ir.ifr_name, ic_dev->name); + strcpy(ir.ifr_name, ic_dev->dev->name); ir.ifr_mtu = ic_dev_mtu; if ((err = ic_dev_ioctl(SIOCSIFMTU, &ir)) < 0) pr_err("IP-Config: Unable to set interface mtu to %d (%d)\n", @@ -568,7 +568,7 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt goto drop_unlock; /* We have a winner! */ - ic_dev = dev; + ic_dev = d; if (ic_myaddr == NONE) ic_myaddr = tip; ic_servaddr = sip; @@ -655,8 +655,6 @@ static struct packet_type bootp_packet_type __initdata = { .func = ic_bootp_recv, }; -static __be32 ic_dev_xid; /* Device under configuration */ - /* * Initialize DHCP/BOOTP extension fields in the request. */ @@ -1038,12 +1036,6 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str goto drop_unlock; } - /* Is it a reply for the device we are configuring? */ - if (b->xid != ic_dev_xid) { - net_err_ratelimited("DHCP/BOOTP: Ignoring delayed packet\n"); - goto drop_unlock; - } - /* Parse extensions */ if (ext_len >= 4 && !memcmp(b->exten, ic_bootp_cookie, 4)) { /* Check magic cookie */ @@ -1130,7 +1122,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str } /* We have a winner! */ - ic_dev = dev; + ic_dev = d; ic_myaddr = b->your_ip; ic_servaddr = b->server_ip; ic_addrservaddr = b->iph.saddr; @@ -1225,9 +1217,6 @@ static int __init ic_dynamic(void) timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned int) CONF_TIMEOUT_RANDOM); for (;;) { #ifdef IPCONFIG_BOOTP - /* Track the device we are configuring */ - ic_dev_xid = d->xid; - if (do_bootp && (d->able & IC_BOOTP)) ic_bootp_send_if(d, jiffies - start_jiffies); #endif @@ -1245,6 +1234,8 @@ static int __init ic_dynamic(void) (ic_proto_enabled & IC_USE_DHCP) && ic_dhcp_msgtype != DHCPACK) { ic_got_reply = 0; + /* continue on device that got the reply */ + d = ic_dev; pr_cont(","); continue; } @@ -1487,7 +1478,7 @@ static int __init ip_auto_config(void) #endif /* IPCONFIG_DYNAMIC */ } else { /* Device selected manually or only one device -> use it */ - ic_dev = ic_first_dev->dev; + ic_dev = ic_first_dev; } addr = root_nfs_parse_addr(root_server_path); @@ -1522,7 +1513,7 @@ static int __init ip_auto_config(void) pr_info("IP-Config: Complete:\n"); pr_info(" device=%s, hwaddr=%*phC, ipaddr=%pI4, mask=%pI4, gw=%pI4\n", - ic_dev->name, ic_dev->addr_len, ic_dev->dev_addr, + ic_dev->dev->name, ic_dev->dev->addr_len, ic_dev->dev->dev_addr, &ic_myaddr, &ic_netmask, &ic_gateway); pr_info(" host=%s, domain=%s, nis-domain=%s\n", utsname()->nodename, ic_domain, utsname()->domainname); From e068853409aa17208e94f4ca628005cc6f51e043 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 29 Jul 2016 11:30:39 +0200 Subject: [PATCH 0008/1715] net: ipconfig: drop inter-device timeout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that ipconfig learned to handle "delayed replies" in the previous commit, there is no reason any more to delay sending a first request per device. Signed-off-by: Uwe Kleine-König Signed-off-by: David S. Miller --- net/ipv4/ipconfig.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 5af6736bd384..42cf629357b5 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -85,7 +85,6 @@ /* Define the timeout for waiting for a DHCP/BOOTP/RARP reply */ #define CONF_OPEN_RETRIES 2 /* (Re)open devices twice */ #define CONF_SEND_RETRIES 6 /* Send six requests per open */ -#define CONF_INTER_TIMEOUT (HZ) /* Inter-device timeout: 1 second */ #define CONF_BASE_TIMEOUT (HZ*2) /* Initial timeout: 2 seconds */ #define CONF_TIMEOUT_RANDOM (HZ) /* Maximum amount of randomization */ #define CONF_TIMEOUT_MULT *7/4 /* Rate of timeout growth */ @@ -1225,9 +1224,11 @@ static int __init ic_dynamic(void) ic_rarp_send_if(d); #endif - jiff = jiffies + (d->next ? CONF_INTER_TIMEOUT : timeout); - while (time_before(jiffies, jiff) && !ic_got_reply) - schedule_timeout_uninterruptible(1); + if (!d->next) { + jiff = jiffies + timeout; + while (time_before(jiffies, jiff) && !ic_got_reply) + schedule_timeout_uninterruptible(1); + } #ifdef IPCONFIG_DHCP /* DHCP isn't done until we get a DHCPACK. */ if ((ic_got_reply & IC_BOOTP) && From a1f4064b1aa34b8176bda36946bbb4a5ceeb4ce7 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Fri, 29 Jul 2016 19:52:56 +0200 Subject: [PATCH 0009/1715] bna: remove useless linked list Remove global variable bnad_list and bnad->list_entry that are used as list of bna driver instances. It is not necessary and useless. Signed-off-by: Ivan Vecera Signed-off-by: David S. Miller --- drivers/net/ethernet/brocade/bna/bnad.c | 3 --- drivers/net/ethernet/brocade/bna/bnad.h | 1 - 2 files changed, 4 deletions(-) diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c index 771cc267f217..696bbae36c88 100644 --- a/drivers/net/ethernet/brocade/bna/bnad.c +++ b/drivers/net/ethernet/brocade/bna/bnad.c @@ -56,7 +56,6 @@ MODULE_PARM_DESC(bna_debugfs_enable, "Enables debugfs feature, default=1," static u32 bnad_rxqs_per_cq = 2; static u32 bna_id; static struct mutex bnad_list_mutex; -static LIST_HEAD(bnad_list); static const u8 bnad_bcast_addr[] __aligned(2) = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; @@ -80,7 +79,6 @@ static void bnad_add_to_list(struct bnad *bnad) { mutex_lock(&bnad_list_mutex); - list_add_tail(&bnad->list_entry, &bnad_list); bnad->id = bna_id++; mutex_unlock(&bnad_list_mutex); } @@ -89,7 +87,6 @@ static void bnad_remove_from_list(struct bnad *bnad) { mutex_lock(&bnad_list_mutex); - list_del(&bnad->list_entry); mutex_unlock(&bnad_list_mutex); } diff --git a/drivers/net/ethernet/brocade/bna/bnad.h b/drivers/net/ethernet/brocade/bna/bnad.h index f4ed816b93ee..46f7b842b39c 100644 --- a/drivers/net/ethernet/brocade/bna/bnad.h +++ b/drivers/net/ethernet/brocade/bna/bnad.h @@ -288,7 +288,6 @@ struct bnad_rx_unmap_q { struct bnad { struct net_device *netdev; u32 id; - struct list_head list_entry; /* Data path */ struct bnad_tx_info tx_info[BNAD_MAX_TX]; From 285eb9c37281c8c34bd604346a48d6627db51007 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Fri, 29 Jul 2016 19:52:57 +0200 Subject: [PATCH 0010/1715] bna: change type of bna_id to atomic_t Change type of bna_id to atomic_t. The bnad_list_mutex is used to prevent a race when bna_id is incremented. After the change the mutex can be removed in the next step. Signed-off-by: Ivan Vecera Signed-off-by: David S. Miller --- drivers/net/ethernet/brocade/bna/bnad.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c index 696bbae36c88..2bed05032597 100644 --- a/drivers/net/ethernet/brocade/bna/bnad.c +++ b/drivers/net/ethernet/brocade/bna/bnad.c @@ -54,7 +54,7 @@ MODULE_PARM_DESC(bna_debugfs_enable, "Enables debugfs feature, default=1," * Global variables */ static u32 bnad_rxqs_per_cq = 2; -static u32 bna_id; +static atomic_t bna_id; static struct mutex bnad_list_mutex; static const u8 bnad_bcast_addr[] __aligned(2) = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; @@ -79,7 +79,6 @@ static void bnad_add_to_list(struct bnad *bnad) { mutex_lock(&bnad_list_mutex); - bnad->id = bna_id++; mutex_unlock(&bnad_list_mutex); } @@ -3651,6 +3650,7 @@ bnad_pci_probe(struct pci_dev *pdev, bnad = netdev_priv(netdev); bnad_lock_init(bnad); bnad_add_to_list(bnad); + bnad->id = atomic_inc_return(&bna_id) - 1; mutex_lock(&bnad->conf_mutex); /* From 09e3636047c6b44dd4cffdbd7cf95fcd3edd3d2e Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Fri, 29 Jul 2016 19:52:58 +0200 Subject: [PATCH 0011/1715] bna: remove global bnad_list_mutex Remove global bnad_list_mutex as it is not used anymore. This makes bnad_add_to_list() and bnad_remove_from_list() empty so remove them too. Signed-off-by: Ivan Vecera Signed-off-by: David S. Miller --- drivers/net/ethernet/brocade/bna/bnad.c | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c index 2bed05032597..f9df4b5ae90e 100644 --- a/drivers/net/ethernet/brocade/bna/bnad.c +++ b/drivers/net/ethernet/brocade/bna/bnad.c @@ -55,7 +55,6 @@ MODULE_PARM_DESC(bna_debugfs_enable, "Enables debugfs feature, default=1," */ static u32 bnad_rxqs_per_cq = 2; static atomic_t bna_id; -static struct mutex bnad_list_mutex; static const u8 bnad_bcast_addr[] __aligned(2) = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; @@ -75,20 +74,6 @@ do { \ (_res_info)->res_u.mem_info.len = (_size); \ } while (0) -static void -bnad_add_to_list(struct bnad *bnad) -{ - mutex_lock(&bnad_list_mutex); - mutex_unlock(&bnad_list_mutex); -} - -static void -bnad_remove_from_list(struct bnad *bnad) -{ - mutex_lock(&bnad_list_mutex); - mutex_unlock(&bnad_list_mutex); -} - /* * Reinitialize completions in CQ, once Rx is taken down */ @@ -3569,14 +3554,12 @@ bnad_lock_init(struct bnad *bnad) { spin_lock_init(&bnad->bna_lock); mutex_init(&bnad->conf_mutex); - mutex_init(&bnad_list_mutex); } static void bnad_lock_uninit(struct bnad *bnad) { mutex_destroy(&bnad->conf_mutex); - mutex_destroy(&bnad_list_mutex); } /* PCI Initialization */ @@ -3649,7 +3632,6 @@ bnad_pci_probe(struct pci_dev *pdev, } bnad = netdev_priv(netdev); bnad_lock_init(bnad); - bnad_add_to_list(bnad); bnad->id = atomic_inc_return(&bna_id) - 1; mutex_lock(&bnad->conf_mutex); @@ -3804,7 +3786,6 @@ pci_uninit: bnad_pci_uninit(pdev); unlock_mutex: mutex_unlock(&bnad->conf_mutex); - bnad_remove_from_list(bnad); bnad_lock_uninit(bnad); free_netdev(netdev); return err; @@ -3842,7 +3823,6 @@ bnad_pci_remove(struct pci_dev *pdev) bnad_disable_msix(bnad); bnad_pci_uninit(pdev); mutex_unlock(&bnad->conf_mutex); - bnad_remove_from_list(bnad); bnad_lock_uninit(bnad); /* Remove the debugfs node for this bnad */ kfree(bnad->regdata); From 86bc5ed672eea892dce43e8be8b3de6e58c51d5e Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Sat, 30 Jul 2016 16:14:43 +0200 Subject: [PATCH 0012/1715] net: ethernet: octeon: use phydev from struct net_device The private structure contain a pointer to phydev, but the structure net_device already contain such pointer. So we can remove the pointer phydev in the private structure, and update the driver to use the one contained in struct net_device. Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- drivers/staging/octeon/ethernet-mdio.c | 48 ++++++++++-------------- drivers/staging/octeon/ethernet-rgmii.c | 2 +- drivers/staging/octeon/ethernet.c | 12 +++--- drivers/staging/octeon/octeon-ethernet.h | 1 - 4 files changed, 26 insertions(+), 37 deletions(-) diff --git a/drivers/staging/octeon/ethernet-mdio.c b/drivers/staging/octeon/ethernet-mdio.c index e13a4ab46977..661b97b9f6d2 100644 --- a/drivers/staging/octeon/ethernet-mdio.c +++ b/drivers/staging/octeon/ethernet-mdio.c @@ -36,36 +36,30 @@ static void cvm_oct_get_drvinfo(struct net_device *dev, static int cvm_oct_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) { - struct octeon_ethernet *priv = netdev_priv(dev); - - if (priv->phydev) - return phy_ethtool_gset(priv->phydev, cmd); + if (dev->phydev) + return phy_ethtool_gset(dev->phydev, cmd); return -EINVAL; } static int cvm_oct_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) { - struct octeon_ethernet *priv = netdev_priv(dev); - if (!capable(CAP_NET_ADMIN)) return -EPERM; - if (priv->phydev) - return phy_ethtool_sset(priv->phydev, cmd); + if (dev->phydev) + return phy_ethtool_sset(dev->phydev, cmd); return -EINVAL; } static int cvm_oct_nway_reset(struct net_device *dev) { - struct octeon_ethernet *priv = netdev_priv(dev); - if (!capable(CAP_NET_ADMIN)) return -EPERM; - if (priv->phydev) - return phy_start_aneg(priv->phydev); + if (dev->phydev) + return phy_start_aneg(dev->phydev); return -EINVAL; } @@ -88,15 +82,13 @@ const struct ethtool_ops cvm_oct_ethtool_ops = { */ int cvm_oct_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { - struct octeon_ethernet *priv = netdev_priv(dev); - if (!netif_running(dev)) return -EINVAL; - if (!priv->phydev) + if (!dev->phydev) return -EINVAL; - return phy_mii_ioctl(priv->phydev, rq, cmd); + return phy_mii_ioctl(dev->phydev, rq, cmd); } void cvm_oct_note_carrier(struct octeon_ethernet *priv, @@ -119,9 +111,9 @@ void cvm_oct_adjust_link(struct net_device *dev) cvmx_helper_link_info_t link_info; link_info.u64 = 0; - link_info.s.link_up = priv->phydev->link ? 1 : 0; - link_info.s.full_duplex = priv->phydev->duplex ? 1 : 0; - link_info.s.speed = priv->phydev->speed; + link_info.s.link_up = dev->phydev->link ? 1 : 0; + link_info.s.full_duplex = dev->phydev->duplex ? 1 : 0; + link_info.s.speed = dev->phydev->speed; priv->link_info = link_info.u64; /* @@ -130,8 +122,8 @@ void cvm_oct_adjust_link(struct net_device *dev) if (priv->poll) priv->poll(dev); - if (priv->last_link != priv->phydev->link) { - priv->last_link = priv->phydev->link; + if (priv->last_link != dev->phydev->link) { + priv->last_link = dev->phydev->link; cvmx_helper_link_set(priv->port, link_info); cvm_oct_note_carrier(priv, link_info); } @@ -151,9 +143,8 @@ int cvm_oct_common_stop(struct net_device *dev) priv->poll = NULL; - if (priv->phydev) - phy_disconnect(priv->phydev); - priv->phydev = NULL; + if (dev->phydev) + phy_disconnect(dev->phydev); if (priv->last_link) { link_info.u64 = 0; @@ -176,6 +167,7 @@ int cvm_oct_phy_setup_device(struct net_device *dev) { struct octeon_ethernet *priv = netdev_priv(dev); struct device_node *phy_node; + struct phy_device *phydev = NULL; if (!priv->of_node) goto no_phy; @@ -193,14 +185,14 @@ int cvm_oct_phy_setup_device(struct net_device *dev) if (!phy_node) goto no_phy; - priv->phydev = of_phy_connect(dev, phy_node, cvm_oct_adjust_link, 0, - PHY_INTERFACE_MODE_GMII); + phydev = of_phy_connect(dev, phy_node, cvm_oct_adjust_link, 0, + PHY_INTERFACE_MODE_GMII); - if (!priv->phydev) + if (!phydev) return -ENODEV; priv->last_link = 0; - phy_start_aneg(priv->phydev); + phy_start_aneg(phydev); return 0; no_phy: diff --git a/drivers/staging/octeon/ethernet-rgmii.c b/drivers/staging/octeon/ethernet-rgmii.c index 91b148cfcbdb..48846dffc8e1 100644 --- a/drivers/staging/octeon/ethernet-rgmii.c +++ b/drivers/staging/octeon/ethernet-rgmii.c @@ -145,7 +145,7 @@ int cvm_oct_rgmii_open(struct net_device *dev) if (ret) return ret; - if (priv->phydev) { + if (dev->phydev) { /* * In phydev mode, we need still periodic polling for the * preamble error checking, and we also need to call this diff --git a/drivers/staging/octeon/ethernet.c b/drivers/staging/octeon/ethernet.c index e9cd5f242921..45d576361319 100644 --- a/drivers/staging/octeon/ethernet.c +++ b/drivers/staging/octeon/ethernet.c @@ -457,10 +457,8 @@ int cvm_oct_common_init(struct net_device *dev) void cvm_oct_common_uninit(struct net_device *dev) { - struct octeon_ethernet *priv = netdev_priv(dev); - - if (priv->phydev) - phy_disconnect(priv->phydev); + if (dev->phydev) + phy_disconnect(dev->phydev); } int cvm_oct_common_open(struct net_device *dev, @@ -484,10 +482,10 @@ int cvm_oct_common_open(struct net_device *dev, if (octeon_is_simulation()) return 0; - if (priv->phydev) { - int r = phy_read_status(priv->phydev); + if (dev->phydev) { + int r = phy_read_status(dev->phydev); - if (r == 0 && priv->phydev->link == 0) + if (r == 0 && dev->phydev->link == 0) netif_carrier_off(dev); cvm_oct_adjust_link(dev); } else { diff --git a/drivers/staging/octeon/octeon-ethernet.h b/drivers/staging/octeon/octeon-ethernet.h index 6275c15e0035..d533aefe085a 100644 --- a/drivers/staging/octeon/octeon-ethernet.h +++ b/drivers/staging/octeon/octeon-ethernet.h @@ -40,7 +40,6 @@ struct octeon_ethernet { struct sk_buff_head tx_free_list[16]; /* Device statistics */ struct net_device_stats stats; - struct phy_device *phydev; unsigned int last_speed; unsigned int last_link; /* Last negotiated link state */ From 0d5704bf4ebe4d83ab46a2206a74faf2fe3a77af Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Sat, 30 Jul 2016 16:14:44 +0200 Subject: [PATCH 0013/1715] net: ethernet: octeon: use phy_ethtool_{get|set}_link_ksettings There are two generics functions phy_ethtool_{get|set}_link_ksettings, so we can use them instead of defining the same code in the driver. There was a check on CAP_NET_ADMIN in cvm_oct_set_settings, but this check is already done in dev_ethtool, so no need to repeat it before calling the generic function. Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- drivers/staging/octeon/ethernet-mdio.c | 23 ++--------------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/drivers/staging/octeon/ethernet-mdio.c b/drivers/staging/octeon/ethernet-mdio.c index 661b97b9f6d2..1fde9c824948 100644 --- a/drivers/staging/octeon/ethernet-mdio.c +++ b/drivers/staging/octeon/ethernet-mdio.c @@ -34,25 +34,6 @@ static void cvm_oct_get_drvinfo(struct net_device *dev, strlcpy(info->bus_info, "Builtin", sizeof(info->bus_info)); } -static int cvm_oct_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) -{ - if (dev->phydev) - return phy_ethtool_gset(dev->phydev, cmd); - - return -EINVAL; -} - -static int cvm_oct_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) -{ - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - if (dev->phydev) - return phy_ethtool_sset(dev->phydev, cmd); - - return -EINVAL; -} - static int cvm_oct_nway_reset(struct net_device *dev) { if (!capable(CAP_NET_ADMIN)) @@ -66,10 +47,10 @@ static int cvm_oct_nway_reset(struct net_device *dev) const struct ethtool_ops cvm_oct_ethtool_ops = { .get_drvinfo = cvm_oct_get_drvinfo, - .get_settings = cvm_oct_get_settings, - .set_settings = cvm_oct_set_settings, .nway_reset = cvm_oct_nway_reset, .get_link = ethtool_op_get_link, + .get_link_ksettings = phy_ethtool_get_link_ksettings, + .set_link_ksettings = phy_ethtool_set_link_ksettings, }; /** From 65752dda4b525730746085a351e6e936d29bc586 Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Sat, 30 Jul 2016 16:22:05 +0200 Subject: [PATCH 0014/1715] net: ethernet: greth: use phydev from struct net_device The private structure contain a pointer to phydev, but the structure net_device already contain such pointer. So we can remove the pointer phy in the private structure, and update the driver to use the one contained in struct net_device. Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- drivers/net/ethernet/aeroflex/greth.c | 23 +++++++++++------------ drivers/net/ethernet/aeroflex/greth.h | 1 - 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/aeroflex/greth.c b/drivers/net/ethernet/aeroflex/greth.c index bca07c5c94bd..010978bb295f 100644 --- a/drivers/net/ethernet/aeroflex/greth.c +++ b/drivers/net/ethernet/aeroflex/greth.c @@ -1107,8 +1107,7 @@ static void greth_set_msglevel(struct net_device *dev, u32 value) } static int greth_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) { - struct greth_private *greth = netdev_priv(dev); - struct phy_device *phy = greth->phy; + struct phy_device *phy = dev->phydev; if (!phy) return -ENODEV; @@ -1118,8 +1117,7 @@ static int greth_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) static int greth_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) { - struct greth_private *greth = netdev_priv(dev); - struct phy_device *phy = greth->phy; + struct phy_device *phy = dev->phydev; if (!phy) return -ENODEV; @@ -1224,7 +1222,7 @@ static int greth_mdio_write(struct mii_bus *bus, int phy, int reg, u16 val) static void greth_link_change(struct net_device *dev) { struct greth_private *greth = netdev_priv(dev); - struct phy_device *phydev = greth->phy; + struct phy_device *phydev = dev->phydev; unsigned long flags; int status_change = 0; u32 ctrl; @@ -1307,7 +1305,6 @@ static int greth_mdio_probe(struct net_device *dev) greth->link = 0; greth->speed = 0; greth->duplex = -1; - greth->phy = phy; return 0; } @@ -1325,6 +1322,7 @@ static int greth_mdio_init(struct greth_private *greth) { int ret; unsigned long timeout; + struct net_device *ndev = greth->netdev; greth->mdio = mdiobus_alloc(); if (!greth->mdio) { @@ -1349,15 +1347,16 @@ static int greth_mdio_init(struct greth_private *greth) goto unreg_mdio; } - phy_start(greth->phy); + phy_start(ndev->phydev); /* If Ethernet debug link is used make autoneg happen right away */ if (greth->edcl && greth_edcl == 1) { - phy_start_aneg(greth->phy); + phy_start_aneg(ndev->phydev); timeout = jiffies + 6*HZ; - while (!phy_aneg_done(greth->phy) && time_before(jiffies, timeout)) { + while (!phy_aneg_done(ndev->phydev) && + time_before(jiffies, timeout)) { } - phy_read_status(greth->phy); + phy_read_status(ndev->phydev); greth_link_change(greth->netdev); } @@ -1569,8 +1568,8 @@ static int greth_of_remove(struct platform_device *of_dev) dma_free_coherent(&of_dev->dev, 1024, greth->tx_bd_base, greth->tx_bd_base_phys); - if (greth->phy) - phy_stop(greth->phy); + if (ndev->phydev) + phy_stop(ndev->phydev); mdiobus_unregister(greth->mdio); unregister_netdev(ndev); diff --git a/drivers/net/ethernet/aeroflex/greth.h b/drivers/net/ethernet/aeroflex/greth.h index 92dd918e4a83..9c07140a5d8d 100644 --- a/drivers/net/ethernet/aeroflex/greth.h +++ b/drivers/net/ethernet/aeroflex/greth.h @@ -123,7 +123,6 @@ struct greth_private { struct napi_struct napi; spinlock_t devlock; - struct phy_device *phy; struct mii_bus *mdio; unsigned int link; unsigned int speed; From 72582fdb92457a1550dac0e271bde9dd906fda05 Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Sat, 30 Jul 2016 16:22:06 +0200 Subject: [PATCH 0015/1715] net: ethernet: greth: use phy_ethtool_{get|set}_link_ksettings There are two generics functions phy_ethtool_{get|set}_link_ksettings, so we can use them instead of defining the same code in the driver. Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- drivers/net/ethernet/aeroflex/greth.c | 23 ++--------------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/aeroflex/greth.c b/drivers/net/ethernet/aeroflex/greth.c index 010978bb295f..f8df8248035e 100644 --- a/drivers/net/ethernet/aeroflex/greth.c +++ b/drivers/net/ethernet/aeroflex/greth.c @@ -1105,25 +1105,6 @@ static void greth_set_msglevel(struct net_device *dev, u32 value) struct greth_private *greth = netdev_priv(dev); greth->msg_enable = value; } -static int greth_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) -{ - struct phy_device *phy = dev->phydev; - - if (!phy) - return -ENODEV; - - return phy_ethtool_gset(phy, cmd); -} - -static int greth_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) -{ - struct phy_device *phy = dev->phydev; - - if (!phy) - return -ENODEV; - - return phy_ethtool_sset(phy, cmd); -} static int greth_get_regs_len(struct net_device *dev) { @@ -1155,12 +1136,12 @@ static void greth_get_regs(struct net_device *dev, struct ethtool_regs *regs, vo static const struct ethtool_ops greth_ethtool_ops = { .get_msglevel = greth_get_msglevel, .set_msglevel = greth_set_msglevel, - .get_settings = greth_get_settings, - .set_settings = greth_set_settings, .get_drvinfo = greth_get_drvinfo, .get_regs_len = greth_get_regs_len, .get_regs = greth_get_regs, .get_link = ethtool_op_get_link, + .get_link_ksettings = phy_ethtool_get_link_ksettings, + .set_link_ksettings = phy_ethtool_set_link_ksettings, }; static struct net_device_ops greth_netdev_ops = { From c6c022e3602a8165ead71d60ac7ac22e07c81d37 Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Sat, 30 Jul 2016 17:42:11 +0200 Subject: [PATCH 0016/1715] net: ethernet: marvell: mvneta: use phydev from struct net_device The private structure contain a pointer to phydev, but the structure net_device already contain such pointer. So we can remove the pointer phy_dev in the private structure, and update the driver to use the one contained in struct net_device. Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 34 +++++++++++++-------------- 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index d41c28d00b57..d2ccab738acb 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -399,7 +399,6 @@ struct mvneta_port { u16 rx_ring_size; struct mii_bus *mii_bus; - struct phy_device *phy_dev; phy_interface_t phy_interface; struct device_node *phy_node; unsigned int link; @@ -2651,6 +2650,7 @@ static int mvneta_poll(struct napi_struct *napi, int budget) u32 cause_rx_tx; int rx_queue; struct mvneta_port *pp = netdev_priv(napi->dev); + struct net_device *ndev = pp->dev; struct mvneta_pcpu_port *port = this_cpu_ptr(pp->ports); if (!netif_running(pp->dev)) { @@ -2668,7 +2668,7 @@ static int mvneta_poll(struct napi_struct *napi, int budget) (MVNETA_CAUSE_PHY_STATUS_CHANGE | MVNETA_CAUSE_LINK_CHANGE | MVNETA_CAUSE_PSC_SYNC_CHANGE))) { - mvneta_fixed_link_update(pp, pp->phy_dev); + mvneta_fixed_link_update(pp, ndev->phydev); } } @@ -2963,6 +2963,7 @@ static int mvneta_setup_txqs(struct mvneta_port *pp) static void mvneta_start_dev(struct mvneta_port *pp) { int cpu; + struct net_device *ndev = pp->dev; mvneta_max_rx_size_set(pp, pp->pkt_size); mvneta_txq_max_tx_size_set(pp, pp->pkt_size); @@ -2985,15 +2986,16 @@ static void mvneta_start_dev(struct mvneta_port *pp) MVNETA_CAUSE_LINK_CHANGE | MVNETA_CAUSE_PSC_SYNC_CHANGE); - phy_start(pp->phy_dev); + phy_start(ndev->phydev); netif_tx_start_all_queues(pp->dev); } static void mvneta_stop_dev(struct mvneta_port *pp) { unsigned int cpu; + struct net_device *ndev = pp->dev; - phy_stop(pp->phy_dev); + phy_stop(ndev->phydev); for_each_online_cpu(cpu) { struct mvneta_pcpu_port *port = per_cpu_ptr(pp->ports, cpu); @@ -3166,7 +3168,7 @@ static int mvneta_set_mac_addr(struct net_device *dev, void *addr) static void mvneta_adjust_link(struct net_device *ndev) { struct mvneta_port *pp = netdev_priv(ndev); - struct phy_device *phydev = pp->phy_dev; + struct phy_device *phydev = ndev->phydev; int status_change = 0; if (phydev->link) { @@ -3244,7 +3246,6 @@ static int mvneta_mdio_probe(struct mvneta_port *pp) phy_dev->supported &= PHY_GBIT_FEATURES; phy_dev->advertising = phy_dev->supported; - pp->phy_dev = phy_dev; pp->link = 0; pp->duplex = 0; pp->speed = 0; @@ -3254,8 +3255,9 @@ static int mvneta_mdio_probe(struct mvneta_port *pp) static void mvneta_mdio_remove(struct mvneta_port *pp) { - phy_disconnect(pp->phy_dev); - pp->phy_dev = NULL; + struct net_device *ndev = pp->dev; + + phy_disconnect(ndev->phydev); } /* Electing a CPU must be done in an atomic way: it should be done @@ -3495,12 +3497,10 @@ static int mvneta_stop(struct net_device *dev) static int mvneta_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { - struct mvneta_port *pp = netdev_priv(dev); - - if (!pp->phy_dev) + if (!dev->phydev) return -ENOTSUPP; - return phy_mii_ioctl(pp->phy_dev, ifr, cmd); + return phy_mii_ioctl(dev->phydev, ifr, cmd); } /* Ethtool methods */ @@ -3508,19 +3508,17 @@ static int mvneta_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) /* Get settings (phy address, speed) for ethtools */ int mvneta_ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) { - struct mvneta_port *pp = netdev_priv(dev); - - if (!pp->phy_dev) + if (!dev->phydev) return -ENODEV; - return phy_ethtool_gset(pp->phy_dev, cmd); + return phy_ethtool_gset(dev->phydev, cmd); } /* Set settings (phy address, speed) for ethtools */ int mvneta_ethtool_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) { struct mvneta_port *pp = netdev_priv(dev); - struct phy_device *phydev = pp->phy_dev; + struct phy_device *phydev = dev->phydev; if (!phydev) return -ENODEV; @@ -3557,7 +3555,7 @@ int mvneta_ethtool_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) } } - return phy_ethtool_sset(pp->phy_dev, cmd); + return phy_ethtool_sset(dev->phydev, cmd); } /* Set interrupt coalescing for ethtools */ From 013ad40d3709c12fbe2edf961a7109480a2f550a Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Sat, 30 Jul 2016 17:42:12 +0200 Subject: [PATCH 0017/1715] net: ethernet: marvell: mvneta: use new api ethtool_{get|set}_link_ksettings The ethtool api {get|set}_settings is deprecated. We move the mvneta driver to new api {get|set}_link_ksettings. We use the generic function phy_ethtool_get_link_ksettings, and update old mvneta_ethtool_set_settings to the new api. Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 34 ++++++++++----------------- 1 file changed, 13 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index d2ccab738acb..8e4252dd9a9d 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -3505,30 +3505,22 @@ static int mvneta_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) /* Ethtool methods */ -/* Get settings (phy address, speed) for ethtools */ -int mvneta_ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +/* Set link ksettings (phy address, speed) for ethtools */ +int mvneta_ethtool_set_link_ksettings(struct net_device *ndev, + const struct ethtool_link_ksettings *cmd) { - if (!dev->phydev) - return -ENODEV; - - return phy_ethtool_gset(dev->phydev, cmd); -} - -/* Set settings (phy address, speed) for ethtools */ -int mvneta_ethtool_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) -{ - struct mvneta_port *pp = netdev_priv(dev); - struct phy_device *phydev = dev->phydev; + struct mvneta_port *pp = netdev_priv(ndev); + struct phy_device *phydev = ndev->phydev; if (!phydev) return -ENODEV; - if ((cmd->autoneg == AUTONEG_ENABLE) != pp->use_inband_status) { + if ((cmd->base.autoneg == AUTONEG_ENABLE) != pp->use_inband_status) { u32 val; - mvneta_set_autoneg(pp, cmd->autoneg == AUTONEG_ENABLE); + mvneta_set_autoneg(pp, cmd->base.autoneg == AUTONEG_ENABLE); - if (cmd->autoneg == AUTONEG_DISABLE) { + if (cmd->base.autoneg == AUTONEG_DISABLE) { val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG); val &= ~(MVNETA_GMAC_CONFIG_MII_SPEED | MVNETA_GMAC_CONFIG_GMII_SPEED | @@ -3545,17 +3537,17 @@ int mvneta_ethtool_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val); } - pp->use_inband_status = (cmd->autoneg == AUTONEG_ENABLE); + pp->use_inband_status = (cmd->base.autoneg == AUTONEG_ENABLE); netdev_info(pp->dev, "autoneg status set to %i\n", pp->use_inband_status); - if (netif_running(dev)) { + if (netif_running(ndev)) { mvneta_port_down(pp); mvneta_port_up(pp); } } - return phy_ethtool_sset(dev->phydev, cmd); + return phy_ethtool_ksettings_set(ndev->phydev, cmd); } /* Set interrupt coalescing for ethtools */ @@ -3819,8 +3811,6 @@ static const struct net_device_ops mvneta_netdev_ops = { const struct ethtool_ops mvneta_eth_tool_ops = { .get_link = ethtool_op_get_link, - .get_settings = mvneta_ethtool_get_settings, - .set_settings = mvneta_ethtool_set_settings, .set_coalesce = mvneta_ethtool_set_coalesce, .get_coalesce = mvneta_ethtool_get_coalesce, .get_drvinfo = mvneta_ethtool_get_drvinfo, @@ -3833,6 +3823,8 @@ const struct ethtool_ops mvneta_eth_tool_ops = { .get_rxnfc = mvneta_ethtool_get_rxnfc, .get_rxfh = mvneta_ethtool_get_rxfh, .set_rxfh = mvneta_ethtool_set_rxfh, + .get_link_ksettings = phy_ethtool_get_link_ksettings, + .set_link_ksettings = mvneta_ethtool_set_link_ksettings, }; /* Initialize hw */ From a052517a8ff654f5226cf47008ebc4f7936b8f13 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 2 Aug 2016 18:02:57 +0800 Subject: [PATCH 0018/1715] net/multicast: should not send source list records when have filter mode change Based on RFC3376 5.1 and RFC3810 6.1 If the per-interface listening change that triggers the new report is a filter mode change, then the next [Robustness Variable] State Change Reports will include a Filter Mode Change Record. This applies even if any number of source list changes occur in that period. Old State New State State Change Record Sent --------- --------- ------------------------ INCLUDE (A) EXCLUDE (B) TO_EX (B) EXCLUDE (A) INCLUDE (B) TO_IN (B) So we should not send source-list change if there is a filter-mode change. Here are two scenarios: 1. Group deleted and filter mode is EXCLUDE, which means we need send a TO_IN { }. 2. Not group deleted, but has pcm->crcount, which means we need send a normal filter-mode-change. At the same time, if the type is ALLOW or BLOCK, and have psf->sf_crcount, we stop add records and decrease sf_crcount directly Reference: https://www.ietf.org/mail-archive/web/magma/current/msg01274.html Signed-off-by: Hangbin Liu Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 10 ++++++++++ net/ipv6/mcast.c | 10 ++++++++++ 2 files changed, 20 insertions(+) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 9b4ca87f70ba..606cc3e85d2b 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -472,6 +472,15 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, continue; } + /* Based on RFC3376 5.1. Should not send source-list change + * records when there is a filter mode change. + */ + if (((gdeleted && pmc->sfmode == MCAST_EXCLUDE) || + (!gdeleted && pmc->crcount)) && + (type == IGMPV3_ALLOW_NEW_SOURCES || + type == IGMPV3_BLOCK_OLD_SOURCES) && psf->sf_crcount) + goto decrease_sf_crcount; + /* clear marks on query responses */ if (isquery) psf->sf_gsresp = 0; @@ -499,6 +508,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, scount++; stotal++; if ((type == IGMPV3_ALLOW_NEW_SOURCES || type == IGMPV3_BLOCK_OLD_SOURCES) && psf->sf_crcount) { +decrease_sf_crcount: psf->sf_crcount--; if ((sdeleted || gdeleted) && psf->sf_crcount == 0) { if (psf_prev) diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index d64ee7e83664..75c1fc54f188 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1739,6 +1739,15 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, continue; } + /* Based on RFC3810 6.1. Should not send source-list change + * records when there is a filter mode change. + */ + if (((gdeleted && pmc->mca_sfmode == MCAST_EXCLUDE) || + (!gdeleted && pmc->mca_crcount)) && + (type == MLD2_ALLOW_NEW_SOURCES || + type == MLD2_BLOCK_OLD_SOURCES) && psf->sf_crcount) + goto decrease_sf_crcount; + /* clear marks on query responses */ if (isquery) psf->sf_gsresp = 0; @@ -1766,6 +1775,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, scount++; stotal++; if ((type == MLD2_ALLOW_NEW_SOURCES || type == MLD2_BLOCK_OLD_SOURCES) && psf->sf_crcount) { +decrease_sf_crcount: psf->sf_crcount--; if ((sdeleted || gdeleted) && psf->sf_crcount == 0) { if (psf_prev) From 0caf5b261b6360cac9b320fe7c2f43ed162a3e61 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 2 Aug 2016 13:49:00 +0000 Subject: [PATCH 0019/1715] qed: Use DEFINE_SPINLOCK() for spinlock spinlock can be initialized automatically with DEFINE_SPINLOCK() rather than explicitly calling spin_lock_init(). Signed-off-by: Wei Yongjun Acked-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_dev.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index 0e4f4a9306b5..b8d594a95a65 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -35,8 +35,7 @@ #include "qed_sriov.h" #include "qed_vf.h" -static spinlock_t qm_lock; -static bool qm_lock_init = false; +static DEFINE_SPINLOCK(qm_lock); /* API common to all protocols */ enum BAR_ID { @@ -1004,11 +1003,6 @@ int qed_hw_init(struct qed_dev *cdev, p_hwfn->first_on_engine = (load_code == FW_MSG_CODE_DRV_LOAD_ENGINE); - if (!qm_lock_init) { - spin_lock_init(&qm_lock); - qm_lock_init = true; - } - switch (load_code) { case FW_MSG_CODE_DRV_LOAD_ENGINE: rc = qed_hw_init_common(p_hwfn, p_hwfn->p_main_ptt, From 678a6241c64ef85c0f8acd0d60ca6fd5ff3e6887 Mon Sep 17 00:00:00 2001 From: Michal Soltys Date: Wed, 3 Aug 2016 00:44:54 +0200 Subject: [PATCH 0020/1715] net/sched/sch_hfsc.c: keep fsc and virtual times in sync; fix an old bug This patch simplifies how we update fsc and calculate vt from it - while keeping the expected functionality identical with how hfsc behaves curently. It also fixes a certain issue introduced with a very old patch. The idea is, that instead of correcting cl_vt before fsc curve update (rtsc_min) and correcting cl_vt after calculation (rtsc_y2x) to keep cl_vt local to the current period - we can simply rely on virtual times and curve values always being in sync - analogously to how rsc and usc function, except that we use virtual time here. Why hasn't it been done since the beginning this way ? The likely scenario (basing on the code trying to correct curves whenever possible) was to keep the virtual times as small as possible - as they have tendency to "gallop" forward whenever their siblings and other fair sharing subtrees are idling. On top of that, current code is subtly bugged, so cumulative time (without any corrections) is always kept and used in init_vf() when a new backlog period begins (using cl_cvtoff). Is cumulative value safe ? Generally yes, though corner cases are easy to create. For example consider: 1gbit interface some 100kbit leaf, everything else idle With current tick (64ns) 1s is 15625000 ticks, but the leaf is alone and it's virtual time, so in reality it's 10000 times more. ITOW 38 bits are needed to hold 1 second. 54 - 1 day, 59 - 1 month, 63 - 1 year (all logarithms rounded up). It's getting somewhat dangerous, but also requires setup excusing this kind of values not mentioning permanently backlogged class for a year. In near most extreme case (10gbit, 10kbit leaf), we have "enough" to hold ~13.6 days in 64 bits. Well, the issue remains mostly theoretical and cl_cvtoff has been working fine for all those years. Sensible configuration are de-facto immune to this issue, and not so sensible can solve it with a cronjob and its period inversely proportional to the insanity of such setup =) Now let's explain the subtle bug mentioned earlier. The issue is related to how offsets are kept and how we calculate virtual times and update fair service curve(s). The issue itself is subtle, but easy to observe with long m1 segments. It was introduced in rather old patch: Commit 99296150c7: "[NET_SCHED]: O(1) children vtoff adjustment in HFSC scheduler" (available in git://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git) Originally when a new backlog period was started, cl_vtoff of each sibling was updated with cl_cvtmax from past period - naturally moving all cl_vt to proper starting point. That patch adjusted it so cumulative offset is kept in the parent, and there is no need for traversing the list (as any subsequent child activation derives new vt from already active sibling(s)). But with this change, cl_vtoff (of each sibling) is no longer persistent across the inactivity periods, as it's calculated from parent's cl_cvtoff on a new backlog period, conflicting with the following curve correction from the previous period: if (cl->cl_virtual.x == vt) { cl->cl_virtual.x -= cl->cl_vtoff; cl->cl_vtoff = 0; } This essentially tries to keep curve as if it was local to the period and resets cl_vtoff (cumulative vt offset of the class) to 0 when possible (read: when we have an intersection or if a new curve is below the old one). But then it's recalculated from cl_cvtoff on next active period. Then rtsc_min() call preceding the above if() doesn't really do what we expect it to do in such scenario - as it calculates the minimum of corrected curve (from the previous backlog period) and the new uncorrected curve (with offset derived from cl_cvtoff). Example: tc class add dev $ife parent 1:0 classid 1:1 hfsc ls m2 100mbit ul m2 100mbit tc class add dev $ife parent 1:1 classid 1:10 hfsc ls m1 80mbit d 10s m2 20mbit tc class add dev $ife parent 1:1 classid 1:11 hfsc ls m2 20mbit start B, keep it backlogged, let it run 6s (30s worth of vt as A is idle) pause B briefly to force cl_cvtoff update in parent (whole 1:1 going idle) start A, let it run 10s pause A briefly to force rtsc_min() At this point we would expect A to continue at 20mbit after a brief moment of 80mbit. But instead A will use 80mbit for full 10s again. It's the effect of first correcting A (during 'start A'), and then - after unpausing - calculating rtsc_min() from old corrected and new uncorrected curve. The patch fixes this bug and keepis vt and fsc in sync (virtual times are cumulative, not local to the backlog period). Signed-off-by: Michal Soltys Signed-off-by: David S. Miller --- net/sched/sch_hfsc.c | 44 ++++++++++++-------------------------------- 1 file changed, 12 insertions(+), 32 deletions(-) diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 3ddc7bd74ecb..6329d7d0d334 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -151,11 +151,8 @@ struct hfsc_class { (monotonic within a period) */ u64 cl_vtadj; /* intra-period cumulative vt adjustment */ - u64 cl_vtoff; /* inter-period cumulative vt offset */ - u64 cl_cvtmax; /* max child's vt in the last period */ - u64 cl_cvtoff; /* cumulative cvtmax of all periods */ - u64 cl_pcvtoff; /* parent's cvtoff at initialization - time */ + u64 cl_cvtoff; /* largest virtual time seen among + the children */ struct internal_sc cl_rsc; /* internal real-time service curve */ struct internal_sc cl_fsc; /* internal fair service curve */ @@ -701,28 +698,16 @@ init_vf(struct hfsc_class *cl, unsigned int len) } else { /* * first child for a new parent backlog period. - * add parent's cvtmax to cvtoff to make a new - * vt (vtoff + vt) larger than the vt in the - * last period for all children. + * initialize cl_vt to the highest value seen + * among the siblings. this is analogous to + * what cur_time would provide in realtime case. */ - vt = cl->cl_parent->cl_cvtmax; - cl->cl_parent->cl_cvtoff += vt; - cl->cl_parent->cl_cvtmax = 0; + cl->cl_vt = cl->cl_parent->cl_cvtoff; cl->cl_parent->cl_cvtmin = 0; - cl->cl_vt = 0; } - cl->cl_vtoff = cl->cl_parent->cl_cvtoff - - cl->cl_pcvtoff; - /* update the virtual curve */ - vt = cl->cl_vt + cl->cl_vtoff; - rtsc_min(&cl->cl_virtual, &cl->cl_fsc, vt, - cl->cl_total); - if (cl->cl_virtual.x == vt) { - cl->cl_virtual.x -= cl->cl_vtoff; - cl->cl_vtoff = 0; - } + rtsc_min(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vt, cl->cl_total); cl->cl_vtadj = 0; cl->cl_vtperiod++; /* increment vt period */ @@ -779,8 +764,7 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time) go_passive = 0; /* update vt */ - cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total) - - cl->cl_vtoff + cl->cl_vtadj; + cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total) + cl->cl_vtadj; /* * if vt of the class is smaller than cvtmin, @@ -795,9 +779,9 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time) if (go_passive) { /* no more active child, going passive */ - /* update cvtmax of the parent class */ - if (cl->cl_vt > cl->cl_parent->cl_cvtmax) - cl->cl_parent->cl_cvtmax = cl->cl_vt; + /* update cvtoff of the parent class */ + if (cl->cl_vt > cl->cl_parent->cl_cvtoff) + cl->cl_parent->cl_cvtoff = cl->cl_vt; /* remove this class from the vt tree */ vttree_remove(cl); @@ -940,7 +924,7 @@ static void hfsc_change_fsc(struct hfsc_class *cl, struct tc_service_curve *fsc) { sc2isc(fsc, &cl->cl_fsc); - rtsc_init(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vtoff + cl->cl_vt, cl->cl_total); + rtsc_init(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vt, cl->cl_total); cl->cl_flags |= HFSC_FSC; } @@ -1094,7 +1078,6 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (parent->level == 0) hfsc_purge_queue(sch, parent); hfsc_adjust_levels(parent); - cl->cl_pcvtoff = parent->cl_cvtoff; sch_tree_unlock(sch); qdisc_class_hash_grow(sch, &q->clhash); @@ -1482,11 +1465,8 @@ hfsc_reset_class(struct hfsc_class *cl) cl->cl_e = 0; cl->cl_vt = 0; cl->cl_vtadj = 0; - cl->cl_vtoff = 0; cl->cl_cvtmin = 0; - cl->cl_cvtmax = 0; cl->cl_cvtoff = 0; - cl->cl_pcvtoff = 0; cl->cl_vtperiod = 0; cl->cl_parentperiod = 0; cl->cl_f = 0; From 37088f617d5b025b0e3ddd38a48e3b10ca7d89d9 Mon Sep 17 00:00:00 2001 From: Michal Soltys Date: Wed, 3 Aug 2016 00:44:55 +0200 Subject: [PATCH 0021/1715] net/sched/sch_hfsc.c: remove unused cl_myfadj The code using this variable has been commented out in the past as it was causing issues in upperlimited link-sharing scenarios. Signed-off-by: Michal Soltys Signed-off-by: David S. Miller --- net/sched/sch_hfsc.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 6329d7d0d334..000f1d36128e 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -142,8 +142,6 @@ struct hfsc_class { link-sharing, max(myf, cfmin) */ u64 cl_myf; /* my fit-time (calculated from this class's own upperlimit curve) */ - u64 cl_myfadj; /* my fit-time adjustment (to cancel - history dependence) */ u64 cl_cfmin; /* earliest children's fit-time (used with cl_myf to obtain cl_f) */ u64 cl_cvtmin; /* minimal virtual time among the @@ -730,7 +728,6 @@ init_vf(struct hfsc_class *cl, unsigned int len) /* compute myf */ cl->cl_myf = rtsc_y2x(&cl->cl_ulimit, cl->cl_total); - cl->cl_myfadj = 0; } } @@ -797,9 +794,10 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time) /* update f */ if (cl->cl_flags & HFSC_USC) { + cl->cl_myf = rtsc_y2x(&cl->cl_ulimit, cl->cl_total); +#if 0 cl->cl_myf = cl->cl_myfadj + rtsc_y2x(&cl->cl_ulimit, cl->cl_total); -#if 0 /* * This code causes classes to stay way under their * limit when multiple classes are used at gigabit @@ -1471,7 +1469,6 @@ hfsc_reset_class(struct hfsc_class *cl) cl->cl_parentperiod = 0; cl->cl_f = 0; cl->cl_myf = 0; - cl->cl_myfadj = 0; cl->cl_cfmin = 0; cl->cl_nactive = 0; From aeec3021043b66a1418df416e1ba83def070cef1 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Thu, 4 Aug 2016 18:20:51 +0300 Subject: [PATCH 0022/1715] net: ethernet: ti: cpdma: remove used_desc counter The struct cpdma_desc_pool->used_desc field can be safely removed from CPDMA driver (and hot patch) because used_descs counter is used just for pool consistency check at CPDMA deinitialization and now this check can be re-implemnted using gen_pool_size(pool->gen_pool) != gen_pool_avail(pool->gen_pool). More over, this will allow to get rid of warnings in cpdma_desc_pool_destro()-> WARN_ON(pool->used_desc) which may happen because the used_descs is used unprotected, since CPDMA has been switched to use genalloc, and may get wrong values on SMP. Hence, remove used_desc from struct cpdma_desc_pool. Signed-off-by: Grygorii Strashko Reviewed-by: Ivan Khoronzhuk Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/davinci_cpdma.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c b/drivers/net/ethernet/ti/davinci_cpdma.c index 19e5f32a8a64..cf72b3390d85 100644 --- a/drivers/net/ethernet/ti/davinci_cpdma.c +++ b/drivers/net/ethernet/ti/davinci_cpdma.c @@ -86,7 +86,7 @@ struct cpdma_desc_pool { void __iomem *iomap; /* ioremap map */ void *cpumap; /* dma_alloc map */ int desc_size, mem_size; - int num_desc, used_desc; + int num_desc; struct device *dev; struct gen_pool *gen_pool; }; @@ -148,7 +148,10 @@ static void cpdma_desc_pool_destroy(struct cpdma_desc_pool *pool) if (!pool) return; - WARN_ON(pool->used_desc); + WARN(gen_pool_size(pool->gen_pool) != gen_pool_avail(pool->gen_pool), + "cpdma_desc_pool size %d != avail %d", + gen_pool_size(pool->gen_pool), + gen_pool_avail(pool->gen_pool)); if (pool->cpumap) dma_free_coherent(pool->dev, pool->mem_size, pool->cpumap, pool->phys); @@ -232,21 +235,14 @@ desc_from_phys(struct cpdma_desc_pool *pool, dma_addr_t dma) static struct cpdma_desc __iomem * cpdma_desc_alloc(struct cpdma_desc_pool *pool) { - struct cpdma_desc __iomem *desc = NULL; - - desc = (struct cpdma_desc __iomem *)gen_pool_alloc(pool->gen_pool, - pool->desc_size); - if (desc) - pool->used_desc++; - - return desc; + return (struct cpdma_desc __iomem *) + gen_pool_alloc(pool->gen_pool, pool->desc_size); } static void cpdma_desc_free(struct cpdma_desc_pool *pool, struct cpdma_desc __iomem *desc, int num_desc) { gen_pool_free(pool->gen_pool, (unsigned long)desc, pool->desc_size); - pool->used_desc--; } struct cpdma_ctlr *cpdma_ctlr_create(struct cpdma_params *params) From b37879e6ca7079943542048da37f00a386c30cee Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Thu, 4 Aug 2016 10:42:14 -0700 Subject: [PATCH 0023/1715] hv_netvsc: Add query for initial physical link speed The physical link speed value will be reported by ethtool command. The real speed is available from Windows 2016 host or later. Signed-off-by: Haiyang Zhang Reviewed-by: K. Y. Srinivasan Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 4 ++-- drivers/net/hyperv/rndis_filter.c | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 41bd952cc28d..3e5356b6f825 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1352,6 +1352,8 @@ static int netvsc_probe(struct hv_device *dev, netif_carrier_off(net); + netvsc_init_settings(net); + net_device_ctx = netdev_priv(net); net_device_ctx->device_ctx = dev; net_device_ctx->msg_enable = netif_msg_init(debug, default_msg); @@ -1410,8 +1412,6 @@ static int netvsc_probe(struct hv_device *dev, netif_set_real_num_tx_queues(net, nvdev->num_chn); netif_set_real_num_rx_queues(net, nvdev->num_chn); - netvsc_init_settings(net); - ret = register_netdev(net); if (ret != 0) { pr_err("Unable to register netdev.\n"); diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 8e830f741d47..dd3b3352a950 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -752,6 +752,28 @@ static int rndis_filter_query_device_link_status(struct rndis_device *dev) return ret; } +static int rndis_filter_query_link_speed(struct rndis_device *dev) +{ + u32 size = sizeof(u32); + u32 link_speed; + struct net_device_context *ndc; + int ret; + + ret = rndis_filter_query_device(dev, RNDIS_OID_GEN_LINK_SPEED, + &link_speed, &size); + + if (!ret) { + ndc = netdev_priv(dev->ndev); + + /* The link speed reported from host is in 100bps unit, so + * we convert it to Mbps here. + */ + ndc->speed = link_speed / 10000; + } + + return ret; +} + int rndis_filter_set_packet_filter(struct rndis_device *dev, u32 new_filter) { struct rndis_request *request; @@ -1044,6 +1066,8 @@ int rndis_filter_device_add(struct hv_device *dev, if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_5) return 0; + rndis_filter_query_link_speed(rndis_device); + /* vRSS setup */ memset(&rsscap, 0, rsscap_size); ret = rndis_filter_query_device(rndis_device, From 7f5d5af0b2f859f09b3dcb16a00b800fa48d9288 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Thu, 4 Aug 2016 10:42:15 -0700 Subject: [PATCH 0024/1715] hv_netvsc: Add handler for physical link speed change On Hyper-V host 2016 and later, VMs gets an event message of the physical link speed when vSwitch is changed. This patch handles this message, so the updated link speed can be reported by ethtool. Signed-off-by: Haiyang Zhang Reviewed-by: K. Y. Srinivasan Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 1 + drivers/net/hyperv/netvsc.c | 6 +++++- drivers/net/hyperv/netvsc_drv.c | 25 +++++++++++++++++++------ 3 files changed, 25 insertions(+), 7 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 467fb8b4d083..c1403fda874c 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -490,6 +490,7 @@ struct nvsp_2_vsc_capability { u64 sriov:1; u64 ieee8021q:1; u64 correlation_id:1; + u64 teaming:1; }; }; } __packed; diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 20e09174ff62..36a7994f2811 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -472,9 +472,13 @@ static int negotiate_nvsp_ver(struct hv_device *device, init_packet->msg.v2_msg.send_ndis_config.mtu = ndev->mtu + ETH_HLEN; init_packet->msg.v2_msg.send_ndis_config.capability.ieee8021q = 1; - if (nvsp_ver >= NVSP_PROTOCOL_VERSION_5) + if (nvsp_ver >= NVSP_PROTOCOL_VERSION_5) { init_packet->msg.v2_msg.send_ndis_config.capability.sriov = 1; + /* Teaming bit is needed to receive link speed updates */ + init_packet->msg.v2_msg.send_ndis_config.capability.teaming = 1; + } + ret = vmbus_sendpacket(device->channel, init_packet, sizeof(struct nvsp_message), (unsigned long)init_packet, diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 3e5356b6f825..82c8f6d47283 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -579,19 +579,32 @@ void netvsc_linkstatus_callback(struct hv_device *device_obj, struct netvsc_reconfig *event; unsigned long flags; - /* Handle link change statuses only */ + net = hv_get_drvdata(device_obj); + + if (!net) + return; + + ndev_ctx = netdev_priv(net); + + /* Update the physical link speed when changing to another vSwitch */ + if (indicate->status == RNDIS_STATUS_LINK_SPEED_CHANGE) { + u32 speed; + + speed = *(u32 *)((void *)indicate + indicate-> + status_buf_offset) / 10000; + ndev_ctx->speed = speed; + return; + } + + /* Handle these link change statuses below */ if (indicate->status != RNDIS_STATUS_NETWORK_CHANGE && indicate->status != RNDIS_STATUS_MEDIA_CONNECT && indicate->status != RNDIS_STATUS_MEDIA_DISCONNECT) return; - net = hv_get_drvdata(device_obj); - - if (!net || net->reg_state != NETREG_REGISTERED) + if (net->reg_state != NETREG_REGISTERED) return; - ndev_ctx = netdev_priv(net); - event = kzalloc(sizeof(*event), GFP_ATOMIC); if (!event) return; From 07bf2e11ad0586855f8be5e5775a4f314ce057fe Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Fri, 5 Aug 2016 13:24:12 +0200 Subject: [PATCH 0025/1715] net/fsl: use of_property_read_bool Use of_property_read_bool to check for the existence of a property. The semantic patch that makes this change is as follows: (http://coccinelle.lip6.fr/) // @@ expression e1,e2,x; @@ - if (of_get_property(e1,e2,NULL)) - x = true; - else - x = false; + x = of_property_read_bool(e1,e2); // Signed-off-by: Julia Lawall Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/xgmac_mdio.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/freescale/xgmac_mdio.c b/drivers/net/ethernet/freescale/xgmac_mdio.c index 7b8fe866f603..e03b30c60dcf 100644 --- a/drivers/net/ethernet/freescale/xgmac_mdio.c +++ b/drivers/net/ethernet/freescale/xgmac_mdio.c @@ -271,11 +271,8 @@ static int xgmac_mdio_probe(struct platform_device *pdev) goto err_ioremap; } - if (of_get_property(pdev->dev.of_node, - "little-endian", NULL)) - priv->is_little_endian = true; - else - priv->is_little_endian = false; + priv->is_little_endian = of_property_read_bool(pdev->dev.of_node, + "little-endian"); ret = of_mdiobus_register(bus, np); if (ret) { From d50736a853b8633ed8c9c64a2a1487e6081b739c Mon Sep 17 00:00:00 2001 From: Raju Lakkaraju Date: Fri, 5 Aug 2016 17:54:21 +0530 Subject: [PATCH 0026/1715] Microsemi VSC 8531/41 PHY Driver Hello, I added all review comments and re-sending for review. >From a5017f5878a92d2acec86a6a29b1498c457cb73a Mon Sep 17 00:00:00 2001 From: Nagaraju Lakkaraju Date: Wed, 3 Aug 2016 18:28:24 +0530 Subject: [PATCH v2] net: phy: Add drivers for Microsemi PHYs Signed-off-by: Nagaraju Lakkaraju Signed-off-by: David S. Miller --- drivers/net/phy/Kconfig | 5 ++ drivers/net/phy/Makefile | 1 + drivers/net/phy/mscc.c | 161 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 167 insertions(+) create mode 100644 drivers/net/phy/mscc.c diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 47a64342cc16..1b534eaaca3d 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -307,6 +307,11 @@ config MDIO_XGENE This module provides a driver for the MDIO busses found in the APM X-Gene SoC's. +config MICROSEMI_PHY + tristate "Drivers for the Microsemi PHYs" + ---help--- + Currently supports the VSC8531 and VSC8541 PHYs + endif # PHYLIB config MICREL_KS8995MA diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile index 534dfa74d5a2..a713bd4da70a 100644 --- a/drivers/net/phy/Makefile +++ b/drivers/net/phy/Makefile @@ -11,6 +11,7 @@ obj-$(CONFIG_CICADA_PHY) += cicada.o obj-$(CONFIG_LXT_PHY) += lxt.o obj-$(CONFIG_QSEMI_PHY) += qsemi.o obj-$(CONFIG_SMSC_PHY) += smsc.o +obj-$(CONFIG_MICROSEMI_PHY) += mscc.o obj-$(CONFIG_TERANETICS_PHY) += teranetics.o obj-$(CONFIG_VITESSE_PHY) += vitesse.o obj-$(CONFIG_BCM_NET_PHYLIB) += bcm-phy-lib.o diff --git a/drivers/net/phy/mscc.c b/drivers/net/phy/mscc.c new file mode 100644 index 000000000000..2e1de53e3548 --- /dev/null +++ b/drivers/net/phy/mscc.c @@ -0,0 +1,161 @@ +/* + * Driver for Microsemi VSC85xx PHYs + * + * Author: Nagaraju Lakkaraju + * License: Dual MIT/GPL + * Copyright (c) 2016 Microsemi Corporation + */ + +#include +#include +#include +#include +#include + +enum rgmii_rx_clock_delay { + RGMII_RX_CLK_DELAY_0_2_NS = 0, + RGMII_RX_CLK_DELAY_0_8_NS = 1, + RGMII_RX_CLK_DELAY_1_1_NS = 2, + RGMII_RX_CLK_DELAY_1_7_NS = 3, + RGMII_RX_CLK_DELAY_2_0_NS = 4, + RGMII_RX_CLK_DELAY_2_3_NS = 5, + RGMII_RX_CLK_DELAY_2_6_NS = 6, + RGMII_RX_CLK_DELAY_3_4_NS = 7 +}; + +#define MII_VSC85XX_INT_MASK 25 +#define MII_VSC85XX_INT_MASK_MASK 0xa000 +#define MII_VSC85XX_INT_STATUS 26 + +#define MSCC_EXT_PAGE_ACCESS 31 +#define MSCC_PHY_PAGE_STANDARD 0x0000 /* Standard registers */ +#define MSCC_PHY_PAGE_EXTENDED_2 0x0002 /* Extended reg - page 2 */ + +/* Extended Page 2 Registers */ +#define MSCC_PHY_RGMII_CNTL 20 +#define RGMII_RX_CLK_DELAY_MASK 0x0070 +#define RGMII_RX_CLK_DELAY_POS 4 + +/* Microsemi PHY ID's */ +#define PHY_ID_VSC8531 0x00070570 +#define PHY_ID_VSC8541 0x00070770 + +static int vsc85xx_phy_page_set(struct phy_device *phydev, u8 page) +{ + int rc; + + rc = phy_write(phydev, MSCC_EXT_PAGE_ACCESS, page); + return rc; +} + +static int vsc85xx_default_config(struct phy_device *phydev) +{ + int rc; + u16 reg_val; + + mutex_lock(&phydev->lock); + rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_EXTENDED_2); + if (rc != 0) + goto out_unlock; + + reg_val = phy_read(phydev, MSCC_PHY_RGMII_CNTL); + reg_val &= ~(RGMII_RX_CLK_DELAY_MASK); + reg_val |= (RGMII_RX_CLK_DELAY_1_1_NS << RGMII_RX_CLK_DELAY_POS); + phy_write(phydev, MSCC_PHY_RGMII_CNTL, reg_val); + rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_STANDARD); + +out_unlock: + mutex_unlock(&phydev->lock); + + return rc; +} + +static int vsc85xx_config_init(struct phy_device *phydev) +{ + int rc; + + rc = vsc85xx_default_config(phydev); + if (rc) + return rc; + rc = genphy_config_init(phydev); + + return rc; +} + +static int vsc85xx_ack_interrupt(struct phy_device *phydev) +{ + int rc; + + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) + rc = phy_read(phydev, MII_VSC85XX_INT_STATUS); + + return (rc < 0) ? rc : 0; +} + +static int vsc85xx_config_intr(struct phy_device *phydev) +{ + int rc; + + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { + rc = phy_write(phydev, MII_VSC85XX_INT_MASK, + MII_VSC85XX_INT_MASK_MASK); + } else { + rc = phy_write(phydev, MII_VSC85XX_INT_MASK, 0); + if (rc < 0) + return rc; + rc = phy_read(phydev, MII_VSC85XX_INT_STATUS); + } + + return rc; +} + +/* Microsemi VSC85xx PHYs */ +static struct phy_driver vsc85xx_driver[] = { +{ + .phy_id = PHY_ID_VSC8531, + .name = "Microsemi VSC8531", + .phy_id_mask = 0xfffffff0, + .features = PHY_GBIT_FEATURES, + .flags = PHY_HAS_INTERRUPT, + .soft_reset = &genphy_soft_reset, + .config_init = &vsc85xx_config_init, + .config_aneg = &genphy_config_aneg, + .aneg_done = &genphy_aneg_done, + .read_status = &genphy_read_status, + .ack_interrupt = &vsc85xx_ack_interrupt, + .config_intr = &vsc85xx_config_intr, + .suspend = &genphy_suspend, + .resume = &genphy_resume, +}, +{ + .phy_id = PHY_ID_VSC8541, + .name = "Microsemi VSC8541 SyncE", + .phy_id_mask = 0xfffffff0, + .features = PHY_GBIT_FEATURES, + .flags = PHY_HAS_INTERRUPT, + .soft_reset = &genphy_soft_reset, + .config_init = &vsc85xx_config_init, + .config_aneg = &genphy_config_aneg, + .aneg_done = &genphy_aneg_done, + .read_status = &genphy_read_status, + .ack_interrupt = &vsc85xx_ack_interrupt, + .config_intr = &vsc85xx_config_intr, + .suspend = &genphy_suspend, + .resume = &genphy_resume, +} + +}; + +module_phy_driver(vsc85xx_driver); + +static struct mdio_device_id __maybe_unused vsc85xx_tbl[] = { + { PHY_ID_VSC8531, 0xfffffff0, }, + { PHY_ID_VSC8541, 0xfffffff0, }, + { } +}; + +MODULE_DEVICE_TABLE(mdio, vsc85xx_tbl); + +MODULE_DESCRIPTION("Microsemi VSC85xx PHY driver"); +MODULE_AUTHOR("Nagaraju Lakkaraju"); +MODULE_LICENSE("Dual MIT/GPL"); From 6cdaf03f8c23fcbfe7d4fa6876e114193bb6b632 Mon Sep 17 00:00:00 2001 From: Nicolas Iooss Date: Fri, 5 Aug 2016 22:11:12 +0200 Subject: [PATCH 0027/1715] RDS: add __printf format attribute to error reporting functions This is helpful to detect at compile-time errors related to format strings. Signed-off-by: Nicolas Iooss Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/ib.h | 1 + net/rds/rds.h | 1 + 2 files changed, 2 insertions(+) diff --git a/net/rds/ib.h b/net/rds/ib.h index 046f7508c06b..45ac8e8e58f4 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -333,6 +333,7 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp); void rds_ib_state_change(struct sock *sk); int rds_ib_listen_init(void); void rds_ib_listen_stop(void); +__printf(2, 3) void __rds_ib_conn_error(struct rds_connection *conn, const char *, ...); int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, struct rdma_cm_event *event); diff --git a/net/rds/rds.h b/net/rds/rds.h index b2d17f0fafa8..fd0bccb2f9f9 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -688,6 +688,7 @@ void __rds_conn_error(struct rds_connection *conn, const char *, ...); #define rds_conn_error(conn, fmt...) \ __rds_conn_error(conn, KERN_WARNING "RDS: " fmt) +__printf(2, 3) void __rds_conn_path_error(struct rds_conn_path *cp, const char *, ...); #define rds_conn_path_error(cp, fmt...) \ __rds_conn_path_error(cp, KERN_WARNING "RDS: " fmt) From 7d0a55339f146a27282f95ab445d6d0324958a40 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Tue, 9 Aug 2016 07:50:46 +0200 Subject: [PATCH 0028/1715] batman-adv: Start new development cycle Signed-off-by: Simon Wunderlich Signed-off-by: Sven Eckelmann --- net/batman-adv/main.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 06a860845434..09af21e27639 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -24,7 +24,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2016.3" +#define BATADV_SOURCE_VERSION "2016.4" #endif /* B.A.T.M.A.N. parameters */ From e61cdfa3343b439590d6b9319fc973b7a1cb97ae Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Tue, 7 Jun 2016 22:44:53 +0200 Subject: [PATCH 0029/1715] batman-adv: Document optional batadv_algo_ops Some operations in batadv_algo_ops are optional and marked as such in the kerneldoc. But some of them miss the "(optional)" in their kerneldoc. These have to also be marked to give an implementor of an algorithm the correct background information without looking in the code calling these function pointers. Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Simon Wunderlich --- net/batman-adv/types.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index a64522c3b45d..0ede27a1e343 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -1396,6 +1396,7 @@ struct batadv_forw_packet { /** * struct batadv_algo_iface_ops - mesh algorithm callbacks (interface specific) * @activate: start routing mechanisms when hard-interface is brought up + * (optional) * @enable: init routing info when hard-interface is enabled * @disable: de-init routing info when hard-interface is disabled * @update_mac: (re-)init mac addresses of the protocol information @@ -1413,6 +1414,7 @@ struct batadv_algo_iface_ops { /** * struct batadv_algo_neigh_ops - mesh algorithm callbacks (neighbour specific) * @hardif_init: called on creation of single hop entry + * (optional) * @cmp: compare the metrics of two neighbors for their respective outgoing * interfaces * @is_similar_or_better: check if neigh1 is equally similar or better than @@ -1435,11 +1437,11 @@ struct batadv_algo_neigh_ops { /** * struct batadv_algo_orig_ops - mesh algorithm callbacks (originator specific) * @free: free the resources allocated by the routing algorithm for an orig_node - * object + * object (optional) * @add_if: ask the routing algorithm to apply the needed changes to the - * orig_node due to a new hard-interface being added into the mesh + * orig_node due to a new hard-interface being added into the mesh (optional) * @del_if: ask the routing algorithm to apply the needed changes to the - * orig_node due to an hard-interface being removed from the mesh + * orig_node due to an hard-interface being removed from the mesh (optional) * @print: print the originator table (optional) */ struct batadv_algo_orig_ops { From 9791860ce5438da65f4f5c38c282fdc45e30dd97 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 10 Jun 2016 23:00:55 +0200 Subject: [PATCH 0030/1715] batman-adv: Define module rtnl link name The batman-adv module can automatically be loaded when operations over the rtnl link are triggered. This requires only the correct rtnl link name in the module header. Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Simon Wunderlich --- net/batman-adv/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index fe4c5e29f96b..f61479b5a99d 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -638,3 +638,4 @@ MODULE_AUTHOR(BATADV_DRIVER_AUTHOR); MODULE_DESCRIPTION(BATADV_DRIVER_DESC); MODULE_SUPPORTED_DEVICE(BATADV_DRIVER_DEVICE); MODULE_VERSION(BATADV_SOURCE_VERSION); +MODULE_ALIAS_RTNL_LINK("batadv"); From f2c750fedd2e102b95a3b1008703b9f54bbc9b08 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 10 Jun 2016 23:00:56 +0200 Subject: [PATCH 0031/1715] batman-adv: Use rtnl link in device creation example The standard kernel API to add new virtual interfaces and attach other interfaces to it is rtnl-link. batman-adv supports it since v3.10. This functionality should be used instead of the legacy batman-adv-only sysfs interface. Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Simon Wunderlich --- Documentation/networking/batman-adv.txt | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/Documentation/networking/batman-adv.txt b/Documentation/networking/batman-adv.txt index 1b5e7a7f2185..8a8d3d96f6c6 100644 --- a/Documentation/networking/batman-adv.txt +++ b/Documentation/networking/batman-adv.txt @@ -43,10 +43,15 @@ new interfaces to verify the compatibility. There is no need to reload the module if you plug your USB wifi adapter into your ma- chine after batman advanced was initially loaded. -To activate a given interface simply write "bat0" into its -"mesh_iface" file inside the batman_adv subfolder: +The batman-adv soft-interface can be created using the iproute2 +tool "ip" -# echo bat0 > /sys/class/net/eth0/batman_adv/mesh_iface +# ip link add name bat0 type batadv + +To activate a given interface simply attach it to the "bat0" +interface + +# ip link set dev eth0 master bat0 Repeat this step for all interfaces you wish to add. Now batman starts using/broadcasting on this/these interface(s). @@ -56,10 +61,10 @@ By reading the "iface_status" file you can check its status: # cat /sys/class/net/eth0/batman_adv/iface_status # active -To deactivate an interface you have to write "none" into its -"mesh_iface" file: +To deactivate an interface you have to detach it from the +"bat0" interface: -# echo none > /sys/class/net/eth0/batman_adv/mesh_iface +# ip link set dev eth0 nomaster All mesh wide settings can be found in batman's own interface From 77d69d8ce17ce0d2a29d042e27943b0a8cea302d Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 13 Jun 2016 07:41:30 +0200 Subject: [PATCH 0032/1715] batman-adv: Modify mesh_iface outside sysfs context The legacy sysfs interface to modify interfaces belonging to batman-adv is run inside a region holding s_lock. And to add a net_device, it has to also get the rtnl_lock. This is exactly the other way around than in other virtual net_devices and conflicts with netdevice notifier which executes inside rtnl_lock. The inverted lock situation is currently solved by executing the removal of netdevices via workqueue. The workqueue isn't executed inside rtnl_lock and thus can independently get the s_lock and the rtnl_lock. But this workaround fails when the netdevice notifier creates events in quick succession and the earlier triggered removal of a net_device isn't processed in the workqueue before the adding of the new netdevice (with same name) event is issued. Instead the legacy sysfs interface store events have to be enqueued in a workqueue to loose the s_lock. The worker is then free to get the required locks and the deadlock is avoided. Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Simon Wunderlich --- net/batman-adv/sysfs.c | 107 +++++++++++++++++++++++++++++++---------- net/batman-adv/types.h | 13 +++++ 2 files changed, 94 insertions(+), 26 deletions(-) diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index fe9ca94ddee2..852895946edf 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -37,6 +37,7 @@ #include #include #include +#include #include "bridge_loop_avoidance.h" #include "distributed-arp-table.h" @@ -828,31 +829,31 @@ static ssize_t batadv_show_mesh_iface(struct kobject *kobj, return length; } -static ssize_t batadv_store_mesh_iface(struct kobject *kobj, - struct attribute *attr, char *buff, - size_t count) +/** + * batadv_store_mesh_iface_finish - store new hardif mesh_iface state + * @net_dev: netdevice to add/remove to/from batman-adv soft-interface + * @ifname: name of soft-interface to modify + * + * Changes the parts of the hard+soft interface which can not be modified under + * sysfs lock (to prevent deadlock situations). + * + * Return: 0 on success, 0 < on failure + */ +static int batadv_store_mesh_iface_finish(struct net_device *net_dev, + char ifname[IFNAMSIZ]) { - struct net_device *net_dev = batadv_kobj_to_netdev(kobj); struct net *net = dev_net(net_dev); struct batadv_hard_iface *hard_iface; - int status_tmp = -1; - int ret = count; + int status_tmp; + int ret = 0; + + ASSERT_RTNL(); hard_iface = batadv_hardif_get_by_netdev(net_dev); if (!hard_iface) - return count; + return 0; - if (buff[count - 1] == '\n') - buff[count - 1] = '\0'; - - if (strlen(buff) >= IFNAMSIZ) { - pr_err("Invalid parameter for 'mesh_iface' setting received: interface name too long '%s'\n", - buff); - batadv_hardif_put(hard_iface); - return -EINVAL; - } - - if (strncmp(buff, "none", 4) == 0) + if (strncmp(ifname, "none", 4) == 0) status_tmp = BATADV_IF_NOT_IN_USE; else status_tmp = BATADV_IF_I_WANT_YOU; @@ -861,15 +862,13 @@ static ssize_t batadv_store_mesh_iface(struct kobject *kobj, goto out; if ((hard_iface->soft_iface) && - (strncmp(hard_iface->soft_iface->name, buff, IFNAMSIZ) == 0)) + (strncmp(hard_iface->soft_iface->name, ifname, IFNAMSIZ) == 0)) goto out; - rtnl_lock(); - if (status_tmp == BATADV_IF_NOT_IN_USE) { batadv_hardif_disable_interface(hard_iface, BATADV_IF_CLEANUP_AUTO); - goto unlock; + goto out; } /* if the interface already is in use */ @@ -877,15 +876,71 @@ static ssize_t batadv_store_mesh_iface(struct kobject *kobj, batadv_hardif_disable_interface(hard_iface, BATADV_IF_CLEANUP_AUTO); - ret = batadv_hardif_enable_interface(hard_iface, net, buff); - -unlock: - rtnl_unlock(); + ret = batadv_hardif_enable_interface(hard_iface, net, ifname); out: batadv_hardif_put(hard_iface); return ret; } +/** + * batadv_store_mesh_iface_work - store new hardif mesh_iface state + * @work: work queue item + * + * Changes the parts of the hard+soft interface which can not be modified under + * sysfs lock (to prevent deadlock situations). + */ +static void batadv_store_mesh_iface_work(struct work_struct *work) +{ + struct batadv_store_mesh_work *store_work; + int ret; + + store_work = container_of(work, struct batadv_store_mesh_work, work); + + rtnl_lock(); + ret = batadv_store_mesh_iface_finish(store_work->net_dev, + store_work->soft_iface_name); + rtnl_unlock(); + + if (ret < 0) + pr_err("Failed to store new mesh_iface state %s for %s: %d\n", + store_work->soft_iface_name, store_work->net_dev->name, + ret); + + dev_put(store_work->net_dev); + kfree(store_work); +} + +static ssize_t batadv_store_mesh_iface(struct kobject *kobj, + struct attribute *attr, char *buff, + size_t count) +{ + struct net_device *net_dev = batadv_kobj_to_netdev(kobj); + struct batadv_store_mesh_work *store_work; + + if (buff[count - 1] == '\n') + buff[count - 1] = '\0'; + + if (strlen(buff) >= IFNAMSIZ) { + pr_err("Invalid parameter for 'mesh_iface' setting received: interface name too long '%s'\n", + buff); + return -EINVAL; + } + + store_work = kmalloc(sizeof(*store_work), GFP_KERNEL); + if (!store_work) + return -ENOMEM; + + dev_hold(net_dev); + INIT_WORK(&store_work->work, batadv_store_mesh_iface_work); + store_work->net_dev = net_dev; + strlcpy(store_work->soft_iface_name, buff, + sizeof(store_work->soft_iface_name)); + + queue_work(batadv_event_workqueue, &store_work->work); + + return count; +} + static ssize_t batadv_show_iface_status(struct kobject *kobj, struct attribute *attr, char *buff) { diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 0ede27a1e343..23c9577e1d1b 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -1566,4 +1566,17 @@ enum batadv_tvlv_handler_flags { BATADV_TVLV_HANDLER_OGM_CALLED = BIT(2), }; +/** + * struct batadv_store_mesh_work - Work queue item to detach add/del interface + * from sysfs locks + * @net_dev: netdevice to add/remove to/from batman-adv soft-interface + * @soft_iface_name: name of soft-interface to modify + * @work: work queue item + */ +struct batadv_store_mesh_work { + struct net_device *net_dev; + char soft_iface_name[IFNAMSIZ]; + struct work_struct work; +}; + #endif /* _NET_BATMAN_ADV_TYPES_H_ */ From 569c98504b2004dba207127bcceb8f3aeb9b8e7d Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 13 Jun 2016 07:41:31 +0200 Subject: [PATCH 0033/1715] batman-adv: Revert "postpone sysfs removal when unregistering" Postponing the removal of the interface breaks the expected behavior of NETDEV_UNREGISTER and NETDEV_PRE_TYPE_CHANGE. This is especially problematic when an interface is removed and added in quick succession. This reverts commit 5bc44dc8458c ("batman-adv: postpone sysfs removal when unregistering"). Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Simon Wunderlich --- net/batman-adv/hard-interface.c | 26 +++---------------- net/batman-adv/soft-interface.c | 44 ++++++++++----------------------- net/batman-adv/types.h | 4 --- 3 files changed, 16 insertions(+), 58 deletions(-) diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 1f9080840566..714af8e7bfa5 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -35,7 +35,6 @@ #include #include #include -#include #include "bat_v.h" #include "bridge_loop_avoidance.h" @@ -625,25 +624,6 @@ out: batadv_hardif_put(primary_if); } -/** - * batadv_hardif_remove_interface_finish - cleans up the remains of a hardif - * @work: work queue item - * - * Free the parts of the hard interface which can not be removed under - * rtnl lock (to prevent deadlock situations). - */ -static void batadv_hardif_remove_interface_finish(struct work_struct *work) -{ - struct batadv_hard_iface *hard_iface; - - hard_iface = container_of(work, struct batadv_hard_iface, - cleanup_work); - - batadv_debugfs_del_hardif(hard_iface); - batadv_sysfs_del_hardif(&hard_iface->hardif_obj); - batadv_hardif_put(hard_iface); -} - static struct batadv_hard_iface * batadv_hardif_add_interface(struct net_device *net_dev) { @@ -676,8 +656,6 @@ batadv_hardif_add_interface(struct net_device *net_dev) INIT_LIST_HEAD(&hard_iface->list); INIT_HLIST_HEAD(&hard_iface->neigh_list); - INIT_WORK(&hard_iface->cleanup_work, - batadv_hardif_remove_interface_finish); spin_lock_init(&hard_iface->neigh_list_lock); @@ -719,7 +697,9 @@ static void batadv_hardif_remove_interface(struct batadv_hard_iface *hard_iface) return; hard_iface->if_status = BATADV_IF_TO_BE_REMOVED; - queue_work(batadv_event_workqueue, &hard_iface->cleanup_work); + batadv_debugfs_del_hardif(hard_iface); + batadv_sysfs_del_hardif(&hard_iface->hardif_obj); + batadv_hardif_put(hard_iface); } void batadv_hardif_remove_interfaces(void) diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 7527c0652dd5..216ac03ab432 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -46,7 +47,6 @@ #include #include #include -#include #include "bat_algo.h" #include "bridge_loop_avoidance.h" @@ -746,34 +746,6 @@ static void batadv_set_lockdep_class(struct net_device *dev) netdev_for_each_tx_queue(dev, batadv_set_lockdep_class_one, NULL); } -/** - * batadv_softif_destroy_finish - cleans up the remains of a softif - * @work: work queue item - * - * Free the parts of the soft interface which can not be removed under - * rtnl lock (to prevent deadlock situations). - */ -static void batadv_softif_destroy_finish(struct work_struct *work) -{ - struct batadv_softif_vlan *vlan; - struct batadv_priv *bat_priv; - struct net_device *soft_iface; - - bat_priv = container_of(work, struct batadv_priv, - cleanup_work); - soft_iface = bat_priv->soft_iface; - - /* destroy the "untagged" VLAN */ - vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS); - if (vlan) { - batadv_softif_destroy_vlan(bat_priv, vlan); - batadv_softif_vlan_put(vlan); - } - - batadv_sysfs_del_meshif(soft_iface); - unregister_netdev(soft_iface); -} - /** * batadv_softif_init_late - late stage initialization of soft interface * @dev: registered network device to modify @@ -791,7 +763,6 @@ static int batadv_softif_init_late(struct net_device *dev) bat_priv = netdev_priv(dev); bat_priv->soft_iface = dev; - INIT_WORK(&bat_priv->cleanup_work, batadv_softif_destroy_finish); /* batadv_interface_stats() needs to be available as soon as * register_netdevice() has been called @@ -1028,8 +999,19 @@ struct net_device *batadv_softif_create(struct net *net, const char *name) void batadv_softif_destroy_sysfs(struct net_device *soft_iface) { struct batadv_priv *bat_priv = netdev_priv(soft_iface); + struct batadv_softif_vlan *vlan; - queue_work(batadv_event_workqueue, &bat_priv->cleanup_work); + ASSERT_RTNL(); + + /* destroy the "untagged" VLAN */ + vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS); + if (vlan) { + batadv_softif_destroy_vlan(bat_priv, vlan); + batadv_softif_vlan_put(vlan); + } + + batadv_sysfs_del_meshif(soft_iface); + unregister_netdevice(soft_iface); } /** diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 23c9577e1d1b..96af6daa4fc9 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -132,7 +132,6 @@ struct batadv_hard_iface_bat_v { * @rcu: struct used for freeing in an RCU-safe manner * @bat_iv: per hard-interface B.A.T.M.A.N. IV data * @bat_v: per hard-interface B.A.T.M.A.N. V data - * @cleanup_work: work queue callback item for hard-interface deinit * @debug_dir: dentry for nc subdir in batman-adv directory in debugfs * @neigh_list: list of unique single hop neighbors via this interface * @neigh_list_lock: lock protecting neigh_list @@ -152,7 +151,6 @@ struct batadv_hard_iface { #ifdef CONFIG_BATMAN_ADV_BATMAN_V struct batadv_hard_iface_bat_v bat_v; #endif - struct work_struct cleanup_work; struct dentry *debug_dir; struct hlist_head neigh_list; /* neigh_list_lock protects: neigh_list */ @@ -1015,7 +1013,6 @@ struct batadv_priv_bat_v { * @forw_bcast_list_lock: lock protecting forw_bcast_list * @tp_list_lock: spinlock protecting @tp_list * @orig_work: work queue callback item for orig node purging - * @cleanup_work: work queue callback item for soft-interface deinit * @primary_if: one of the hard-interfaces assigned to this mesh interface * becomes the primary interface * @algo_ops: routing algorithm used by this mesh interface @@ -1074,7 +1071,6 @@ struct batadv_priv { spinlock_t tp_list_lock; /* protects tp_list */ atomic_t tp_num; struct delayed_work orig_work; - struct work_struct cleanup_work; struct batadv_hard_iface __rcu *primary_if; /* rcu protected pointer */ struct batadv_algo_ops *algo_ops; struct hlist_head softif_vlan_list; From f4acb1086bc972f69e1ae14ec5b4473bc6c02dd5 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 13 Jun 2016 07:41:32 +0200 Subject: [PATCH 0034/1715] batman-adv: Avoid sysfs name collision for netns moves The kobject_put is only removing the sysfs entry and corresponding entries when its reference counter becomes zero. This tends to lead to collisions when a device is moved between two different network namespaces because some of the sysfs files have to be removed first and then added again to the already moved sysfs entry. WARNING: CPU: 0 PID: 290 at lib/kobject.c:240 kobject_add_internal+0x5ec/0x8a0 kobject_add_internal failed for batman_adv with -EEXIST, don't try to register things with the same name in the same directory. But the caller of kobject_put can already remove the sysfs entry before it does the kobject_put. This removal is done even when the reference counter is not yet zero and thus avoids the problem. Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Simon Wunderlich --- net/batman-adv/sysfs.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index 852895946edf..4e06cb792e5d 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -713,6 +713,8 @@ rem_attr: for (bat_attr = batadv_mesh_attrs; *bat_attr; ++bat_attr) sysfs_remove_file(bat_priv->mesh_obj, &((*bat_attr)->attr)); + kobject_uevent(bat_priv->mesh_obj, KOBJ_REMOVE); + kobject_del(bat_priv->mesh_obj); kobject_put(bat_priv->mesh_obj); bat_priv->mesh_obj = NULL; out: @@ -727,6 +729,8 @@ void batadv_sysfs_del_meshif(struct net_device *dev) for (bat_attr = batadv_mesh_attrs; *bat_attr; ++bat_attr) sysfs_remove_file(bat_priv->mesh_obj, &((*bat_attr)->attr)); + kobject_uevent(bat_priv->mesh_obj, KOBJ_REMOVE); + kobject_del(bat_priv->mesh_obj); kobject_put(bat_priv->mesh_obj); bat_priv->mesh_obj = NULL; } @@ -782,6 +786,10 @@ rem_attr: for (bat_attr = batadv_vlan_attrs; *bat_attr; ++bat_attr) sysfs_remove_file(vlan->kobj, &((*bat_attr)->attr)); + if (vlan->kobj != bat_priv->mesh_obj) { + kobject_uevent(vlan->kobj, KOBJ_REMOVE); + kobject_del(vlan->kobj); + } kobject_put(vlan->kobj); vlan->kobj = NULL; out: @@ -801,6 +809,10 @@ void batadv_sysfs_del_vlan(struct batadv_priv *bat_priv, for (bat_attr = batadv_vlan_attrs; *bat_attr; ++bat_attr) sysfs_remove_file(vlan->kobj, &((*bat_attr)->attr)); + if (vlan->kobj != bat_priv->mesh_obj) { + kobject_uevent(vlan->kobj, KOBJ_REMOVE); + kobject_del(vlan->kobj); + } kobject_put(vlan->kobj); vlan->kobj = NULL; } @@ -1103,6 +1115,8 @@ out: void batadv_sysfs_del_hardif(struct kobject **hardif_obj) { + kobject_uevent(*hardif_obj, KOBJ_REMOVE); + kobject_del(*hardif_obj); kobject_put(*hardif_obj); *hardif_obj = NULL; } From f55a2e844701673dcdb939ea94a289294d1741e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Tue, 14 Jun 2016 22:56:50 +0200 Subject: [PATCH 0035/1715] batman-adv: Remove unused primary_if and bat_priv variables MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes: ef0a937f7a14 ("batman-adv: consider outgoing interface in OGM sending") Signed-off-by: Linus Lüssing Signed-off-by: Marek Lindner Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/bat_iv_ogm.c | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 19b0abd6c640..6af446208b38 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -528,36 +528,25 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet, static void batadv_iv_ogm_emit(struct batadv_forw_packet *forw_packet) { struct net_device *soft_iface; - struct batadv_priv *bat_priv; - struct batadv_hard_iface *primary_if = NULL; if (!forw_packet->if_incoming) { pr_err("Error - can't forward packet: incoming iface not specified\n"); - goto out; + return; } soft_iface = forw_packet->if_incoming->soft_iface; - bat_priv = netdev_priv(soft_iface); if (WARN_ON(!forw_packet->if_outgoing)) - goto out; + return; if (WARN_ON(forw_packet->if_outgoing->soft_iface != soft_iface)) - goto out; + return; if (forw_packet->if_incoming->if_status != BATADV_IF_ACTIVE) - goto out; - - primary_if = batadv_primary_if_get_selected(bat_priv); - if (!primary_if) - goto out; + return; /* only for one specific outgoing interface */ batadv_iv_ogm_send_to_if(forw_packet, forw_packet->if_outgoing); - -out: - if (primary_if) - batadv_hardif_put(primary_if); } /** From 086869438a73d8213a3f945da6679a548badbabc Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Sun, 3 Jul 2016 12:46:32 +0200 Subject: [PATCH 0036/1715] batman-adv: make the GW selection class algorithm specific The B.A.T.M.A.N. V algorithm uses a different metric compared to its predecessor and for this reason the logic used to compute the best Gateway is also changed. This means that the GW selection class fed to this logic has a semantics that depends on the algorithm being used. Make the parsing and printing routine of the GW selection class routing algorithm specific. Each algorithm can now parse (and print) this value independently. If no API is provided by any algorithm, the default is to use the current mechanism of considering such value like an integer between 1 and 255. Signed-off-by: Antonio Quartulli Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Simon Wunderlich --- net/batman-adv/bat_v.c | 34 ++++++++++++++++++++++++++++++++++ net/batman-adv/sysfs.c | 34 ++++++++++++++++++++++++++++++++-- net/batman-adv/types.h | 13 +++++++++++++ 3 files changed, 79 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index 0366cbf5e444..90fd5ee877d1 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -21,8 +21,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -34,6 +36,8 @@ #include "bat_algo.h" #include "bat_v_elp.h" #include "bat_v_ogm.h" +#include "gateway_client.h" +#include "gateway_common.h" #include "hard-interface.h" #include "hash.h" #include "originator.h" @@ -320,6 +324,32 @@ err_ifinfo1: return ret; } +static ssize_t batadv_v_store_sel_class(struct batadv_priv *bat_priv, + char *buff, size_t count) +{ + u32 old_class, class; + + if (!batadv_parse_throughput(bat_priv->soft_iface, buff, + "B.A.T.M.A.N. V GW selection class", + &class)) + return -EINVAL; + + old_class = atomic_read(&bat_priv->gw.sel_class); + atomic_set(&bat_priv->gw.sel_class, class); + + if (old_class != class) + batadv_gw_reselect(bat_priv); + + return count; +} + +static ssize_t batadv_v_show_sel_class(struct batadv_priv *bat_priv, char *buff) +{ + u32 class = atomic_read(&bat_priv->gw.sel_class); + + return sprintf(buff, "%u.%u MBit\n", class / 10, class % 10); +} + static struct batadv_algo_ops batadv_batman_v __read_mostly = { .name = "BATMAN_V", .iface = { @@ -338,6 +368,10 @@ static struct batadv_algo_ops batadv_batman_v __read_mostly = { .orig = { .print = batadv_v_orig_print, }, + .gw = { + .store_sel_class = batadv_v_store_sel_class, + .show_sel_class = batadv_v_show_sel_class, + }, }; /** diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index 4e06cb792e5d..e78bd7f2f276 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -515,6 +515,36 @@ static ssize_t batadv_store_gw_mode(struct kobject *kobj, return count; } +static ssize_t batadv_show_gw_sel_class(struct kobject *kobj, + struct attribute *attr, char *buff) +{ + struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); + + if (bat_priv->algo_ops->gw.show_sel_class) + return bat_priv->algo_ops->gw.show_sel_class(bat_priv, buff); + + return sprintf(buff, "%i\n", atomic_read(&bat_priv->gw.sel_class)); +} + +static ssize_t batadv_store_gw_sel_class(struct kobject *kobj, + struct attribute *attr, char *buff, + size_t count) +{ + struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); + + if (buff[count - 1] == '\n') + buff[count - 1] = '\0'; + + if (bat_priv->algo_ops->gw.store_sel_class) + return bat_priv->algo_ops->gw.store_sel_class(bat_priv, buff, + count); + + return __batadv_store_uint_attr(buff, count, 1, BATADV_TQ_MAX_VALUE, + batadv_post_gw_reselect, attr, + &bat_priv->gw.sel_class, + bat_priv->soft_iface); +} + static ssize_t batadv_show_gw_bwidth(struct kobject *kobj, struct attribute *attr, char *buff) { @@ -626,8 +656,8 @@ BATADV_ATTR_SIF_UINT(orig_interval, orig_interval, S_IRUGO | S_IWUSR, 2 * BATADV_JITTER, INT_MAX, NULL); BATADV_ATTR_SIF_UINT(hop_penalty, hop_penalty, S_IRUGO | S_IWUSR, 0, BATADV_TQ_MAX_VALUE, NULL); -BATADV_ATTR_SIF_UINT(gw_sel_class, gw.sel_class, S_IRUGO | S_IWUSR, 1, - BATADV_TQ_MAX_VALUE, batadv_post_gw_reselect); +static BATADV_ATTR(gw_sel_class, S_IRUGO | S_IWUSR, batadv_show_gw_sel_class, + batadv_store_gw_sel_class); static BATADV_ATTR(gw_bandwidth, S_IRUGO | S_IWUSR, batadv_show_gw_bwidth, batadv_store_gw_bwidth); #ifdef CONFIG_BATMAN_ADV_MCAST diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 96af6daa4fc9..deaadba61a2c 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -1449,6 +1449,17 @@ struct batadv_algo_orig_ops { struct batadv_hard_iface *hard_iface); }; +/** + * struct batadv_algo_gw_ops - mesh algorithm callbacks (GW specific) + * @store_sel_class: parse and stores a new GW selection class (optional) + * @show_sel_class: prints the current GW selection class (optional) + */ +struct batadv_algo_gw_ops { + ssize_t (*store_sel_class)(struct batadv_priv *bat_priv, char *buff, + size_t count); + ssize_t (*show_sel_class)(struct batadv_priv *bat_priv, char *buff); +}; + /** * struct batadv_algo_ops - mesh algorithm callbacks * @list: list node for the batadv_algo_list @@ -1456,6 +1467,7 @@ struct batadv_algo_orig_ops { * @iface: callbacks related to interface handling * @neigh: callbacks related to neighbors handling * @orig: callbacks related to originators handling + * @gw: callbacks related to GW mode */ struct batadv_algo_ops { struct hlist_node list; @@ -1463,6 +1475,7 @@ struct batadv_algo_ops { struct batadv_algo_iface_ops iface; struct batadv_algo_neigh_ops neigh; struct batadv_algo_orig_ops orig; + struct batadv_algo_gw_ops gw; }; /** From 34d99cfefaac596adfe9b69f5e7c2cd291af2334 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Sun, 3 Jul 2016 12:46:33 +0200 Subject: [PATCH 0037/1715] batman-adv: make GW election code protocol specific Each routing protocol may have its own specific logic about gateway election which is potentially based on the metric being used. Create two GW specific API functions and move the current election logic in the B.A.T.M.A.N. IV specific code. Signed-off-by: Antonio Quartulli Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Simon Wunderlich --- net/batman-adv/bat_iv_ogm.c | 219 ++++++++++++++++++++++++++++++++ net/batman-adv/gateway_client.c | 217 ++++--------------------------- net/batman-adv/gateway_client.h | 3 + net/batman-adv/gateway_common.c | 5 +- net/batman-adv/types.h | 11 ++ 5 files changed, 263 insertions(+), 192 deletions(-) diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 6af446208b38..d04874fc034d 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -51,6 +51,7 @@ #include "bat_algo.h" #include "bitarray.h" +#include "gateway_client.h" #include "hard-interface.h" #include "hash.h" #include "log.h" @@ -2106,6 +2107,219 @@ static void batadv_iv_iface_activate(struct batadv_hard_iface *hard_iface) batadv_iv_ogm_schedule(hard_iface); } +static struct batadv_gw_node * +batadv_iv_gw_get_best_gw_node(struct batadv_priv *bat_priv) +{ + struct batadv_neigh_node *router; + struct batadv_neigh_ifinfo *router_ifinfo; + struct batadv_gw_node *gw_node, *curr_gw = NULL; + u64 max_gw_factor = 0; + u64 tmp_gw_factor = 0; + u8 max_tq = 0; + u8 tq_avg; + struct batadv_orig_node *orig_node; + + rcu_read_lock(); + hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) { + orig_node = gw_node->orig_node; + router = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT); + if (!router) + continue; + + router_ifinfo = batadv_neigh_ifinfo_get(router, + BATADV_IF_DEFAULT); + if (!router_ifinfo) + goto next; + + if (!kref_get_unless_zero(&gw_node->refcount)) + goto next; + + tq_avg = router_ifinfo->bat_iv.tq_avg; + + switch (atomic_read(&bat_priv->gw.sel_class)) { + case 1: /* fast connection */ + tmp_gw_factor = tq_avg * tq_avg; + tmp_gw_factor *= gw_node->bandwidth_down; + tmp_gw_factor *= 100 * 100; + tmp_gw_factor >>= 18; + + if ((tmp_gw_factor > max_gw_factor) || + ((tmp_gw_factor == max_gw_factor) && + (tq_avg > max_tq))) { + if (curr_gw) + batadv_gw_node_put(curr_gw); + curr_gw = gw_node; + kref_get(&curr_gw->refcount); + } + break; + + default: /* 2: stable connection (use best statistic) + * 3: fast-switch (use best statistic but change as + * soon as a better gateway appears) + * XX: late-switch (use best statistic but change as + * soon as a better gateway appears which has + * $routing_class more tq points) + */ + if (tq_avg > max_tq) { + if (curr_gw) + batadv_gw_node_put(curr_gw); + curr_gw = gw_node; + kref_get(&curr_gw->refcount); + } + break; + } + + if (tq_avg > max_tq) + max_tq = tq_avg; + + if (tmp_gw_factor > max_gw_factor) + max_gw_factor = tmp_gw_factor; + + batadv_gw_node_put(gw_node); + +next: + batadv_neigh_node_put(router); + if (router_ifinfo) + batadv_neigh_ifinfo_put(router_ifinfo); + } + rcu_read_unlock(); + + return curr_gw; +} + +static bool batadv_iv_gw_is_eligible(struct batadv_priv *bat_priv, + struct batadv_orig_node *curr_gw_orig, + struct batadv_orig_node *orig_node) +{ + struct batadv_neigh_ifinfo *router_orig_ifinfo = NULL; + struct batadv_neigh_ifinfo *router_gw_ifinfo = NULL; + struct batadv_neigh_node *router_gw = NULL; + struct batadv_neigh_node *router_orig = NULL; + u8 gw_tq_avg, orig_tq_avg; + bool ret = false; + + /* dynamic re-election is performed only on fast or late switch */ + if (atomic_read(&bat_priv->gw.sel_class) <= 2) + return false; + + router_gw = batadv_orig_router_get(curr_gw_orig, BATADV_IF_DEFAULT); + if (!router_gw) { + ret = true; + goto out; + } + + router_gw_ifinfo = batadv_neigh_ifinfo_get(router_gw, + BATADV_IF_DEFAULT); + if (!router_gw_ifinfo) { + ret = true; + goto out; + } + + router_orig = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT); + if (!router_orig) + goto out; + + router_orig_ifinfo = batadv_neigh_ifinfo_get(router_orig, + BATADV_IF_DEFAULT); + if (!router_orig_ifinfo) + goto out; + + gw_tq_avg = router_gw_ifinfo->bat_iv.tq_avg; + orig_tq_avg = router_orig_ifinfo->bat_iv.tq_avg; + + /* the TQ value has to be better */ + if (orig_tq_avg < gw_tq_avg) + goto out; + + /* if the routing class is greater than 3 the value tells us how much + * greater the TQ value of the new gateway must be + */ + if ((atomic_read(&bat_priv->gw.sel_class) > 3) && + (orig_tq_avg - gw_tq_avg < atomic_read(&bat_priv->gw.sel_class))) + goto out; + + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, + "Restarting gateway selection: better gateway found (tq curr: %i, tq new: %i)\n", + gw_tq_avg, orig_tq_avg); + + ret = true; +out: + if (router_gw_ifinfo) + batadv_neigh_ifinfo_put(router_gw_ifinfo); + if (router_orig_ifinfo) + batadv_neigh_ifinfo_put(router_orig_ifinfo); + if (router_gw) + batadv_neigh_node_put(router_gw); + if (router_orig) + batadv_neigh_node_put(router_orig); + + return ret; +} + +/* fails if orig_node has no router */ +static int batadv_iv_gw_write_buffer_text(struct batadv_priv *bat_priv, + struct seq_file *seq, + const struct batadv_gw_node *gw_node) +{ + struct batadv_gw_node *curr_gw; + struct batadv_neigh_node *router; + struct batadv_neigh_ifinfo *router_ifinfo = NULL; + int ret = -1; + + router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT); + if (!router) + goto out; + + router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT); + if (!router_ifinfo) + goto out; + + curr_gw = batadv_gw_get_selected_gw_node(bat_priv); + + seq_printf(seq, "%s %pM (%3i) %pM [%10s]: %u.%u/%u.%u MBit\n", + (curr_gw == gw_node ? "=>" : " "), + gw_node->orig_node->orig, + router_ifinfo->bat_iv.tq_avg, router->addr, + router->if_incoming->net_dev->name, + gw_node->bandwidth_down / 10, + gw_node->bandwidth_down % 10, + gw_node->bandwidth_up / 10, + gw_node->bandwidth_up % 10); + ret = seq_has_overflowed(seq) ? -1 : 0; + + if (curr_gw) + batadv_gw_node_put(curr_gw); +out: + if (router_ifinfo) + batadv_neigh_ifinfo_put(router_ifinfo); + if (router) + batadv_neigh_node_put(router); + return ret; +} + +static void batadv_iv_gw_print(struct batadv_priv *bat_priv, + struct seq_file *seq) +{ + struct batadv_gw_node *gw_node; + int gw_count = 0; + + seq_puts(seq, + " Gateway (#/255) Nexthop [outgoingIF]: advertised uplink bandwidth\n"); + + rcu_read_lock(); + hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) { + /* fails if orig_node has no router */ + if (batadv_iv_gw_write_buffer_text(bat_priv, seq, gw_node) < 0) + continue; + + gw_count++; + } + rcu_read_unlock(); + + if (gw_count == 0) + seq_puts(seq, "No gateways in range ...\n"); +} + static struct batadv_algo_ops batadv_batman_iv __read_mostly = { .name = "BATMAN_IV", .iface = { @@ -2126,6 +2340,11 @@ static struct batadv_algo_ops batadv_batman_iv __read_mostly = { .add_if = batadv_iv_ogm_orig_add_if, .del_if = batadv_iv_ogm_orig_del_if, }, + .gw = { + .get_best_gw_node = batadv_iv_gw_get_best_gw_node, + .is_eligible = batadv_iv_gw_is_eligible, + .print = batadv_iv_gw_print, + }, }; int __init batadv_iv_init(void) diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 63a805d3f96e..ec363f39b6a9 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -80,12 +80,12 @@ static void batadv_gw_node_release(struct kref *ref) * batadv_gw_node_put - decrement the gw_node refcounter and possibly release it * @gw_node: gateway node to free */ -static void batadv_gw_node_put(struct batadv_gw_node *gw_node) +void batadv_gw_node_put(struct batadv_gw_node *gw_node) { kref_put(&gw_node->refcount, batadv_gw_node_release); } -static struct batadv_gw_node * +struct batadv_gw_node * batadv_gw_get_selected_gw_node(struct batadv_priv *bat_priv) { struct batadv_gw_node *gw_node; @@ -164,86 +164,6 @@ void batadv_gw_reselect(struct batadv_priv *bat_priv) atomic_set(&bat_priv->gw.reselect, 1); } -static struct batadv_gw_node * -batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv) -{ - struct batadv_neigh_node *router; - struct batadv_neigh_ifinfo *router_ifinfo; - struct batadv_gw_node *gw_node, *curr_gw = NULL; - u64 max_gw_factor = 0; - u64 tmp_gw_factor = 0; - u8 max_tq = 0; - u8 tq_avg; - struct batadv_orig_node *orig_node; - - rcu_read_lock(); - hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) { - orig_node = gw_node->orig_node; - router = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT); - if (!router) - continue; - - router_ifinfo = batadv_neigh_ifinfo_get(router, - BATADV_IF_DEFAULT); - if (!router_ifinfo) - goto next; - - if (!kref_get_unless_zero(&gw_node->refcount)) - goto next; - - tq_avg = router_ifinfo->bat_iv.tq_avg; - - switch (atomic_read(&bat_priv->gw.sel_class)) { - case 1: /* fast connection */ - tmp_gw_factor = tq_avg * tq_avg; - tmp_gw_factor *= gw_node->bandwidth_down; - tmp_gw_factor *= 100 * 100; - tmp_gw_factor >>= 18; - - if ((tmp_gw_factor > max_gw_factor) || - ((tmp_gw_factor == max_gw_factor) && - (tq_avg > max_tq))) { - if (curr_gw) - batadv_gw_node_put(curr_gw); - curr_gw = gw_node; - kref_get(&curr_gw->refcount); - } - break; - - default: /* 2: stable connection (use best statistic) - * 3: fast-switch (use best statistic but change as - * soon as a better gateway appears) - * XX: late-switch (use best statistic but change as - * soon as a better gateway appears which has - * $routing_class more tq points) - */ - if (tq_avg > max_tq) { - if (curr_gw) - batadv_gw_node_put(curr_gw); - curr_gw = gw_node; - kref_get(&curr_gw->refcount); - } - break; - } - - if (tq_avg > max_tq) - max_tq = tq_avg; - - if (tmp_gw_factor > max_gw_factor) - max_gw_factor = tmp_gw_factor; - - batadv_gw_node_put(gw_node); - -next: - batadv_neigh_node_put(router); - if (router_ifinfo) - batadv_neigh_ifinfo_put(router_ifinfo); - } - rcu_read_unlock(); - - return curr_gw; -} - /** * batadv_gw_check_client_stop - check if client mode has been switched off * @bat_priv: the bat priv with all the soft interface information @@ -287,12 +207,19 @@ void batadv_gw_election(struct batadv_priv *bat_priv) if (atomic_read(&bat_priv->gw.mode) != BATADV_GW_MODE_CLIENT) goto out; + if (!bat_priv->algo_ops->gw.get_best_gw_node) + goto out; + curr_gw = batadv_gw_get_selected_gw_node(bat_priv); if (!batadv_atomic_dec_not_zero(&bat_priv->gw.reselect) && curr_gw) goto out; - next_gw = batadv_gw_get_best_gw_node(bat_priv); + /* if gw.reselect is set to 1 it means that a previous call to + * gw.is_eligible() said that we have a new best GW, therefore it can + * now be picked from the list and selected + */ + next_gw = bat_priv->algo_ops->gw.get_best_gw_node(bat_priv); if (curr_gw == next_gw) goto out; @@ -360,70 +287,31 @@ out: void batadv_gw_check_election(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node) { - struct batadv_neigh_ifinfo *router_orig_tq = NULL; - struct batadv_neigh_ifinfo *router_gw_tq = NULL; struct batadv_orig_node *curr_gw_orig; - struct batadv_neigh_node *router_gw = NULL; - struct batadv_neigh_node *router_orig = NULL; - u8 gw_tq_avg, orig_tq_avg; + + /* abort immediately if the routing algorithm does not support gateway + * election + */ + if (!bat_priv->algo_ops->gw.is_eligible) + return; curr_gw_orig = batadv_gw_get_selected_orig(bat_priv); if (!curr_gw_orig) goto reselect; - router_gw = batadv_orig_router_get(curr_gw_orig, BATADV_IF_DEFAULT); - if (!router_gw) - goto reselect; - - router_gw_tq = batadv_neigh_ifinfo_get(router_gw, - BATADV_IF_DEFAULT); - if (!router_gw_tq) - goto reselect; - /* this node already is the gateway */ if (curr_gw_orig == orig_node) goto out; - router_orig = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT); - if (!router_orig) + if (!bat_priv->algo_ops->gw.is_eligible(bat_priv, curr_gw_orig, + orig_node)) goto out; - router_orig_tq = batadv_neigh_ifinfo_get(router_orig, - BATADV_IF_DEFAULT); - if (!router_orig_tq) - goto out; - - gw_tq_avg = router_gw_tq->bat_iv.tq_avg; - orig_tq_avg = router_orig_tq->bat_iv.tq_avg; - - /* the TQ value has to be better */ - if (orig_tq_avg < gw_tq_avg) - goto out; - - /* if the routing class is greater than 3 the value tells us how much - * greater the TQ value of the new gateway must be - */ - if ((atomic_read(&bat_priv->gw.sel_class) > 3) && - (orig_tq_avg - gw_tq_avg < atomic_read(&bat_priv->gw.sel_class))) - goto out; - - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Restarting gateway selection: better gateway found (tq curr: %i, tq new: %i)\n", - gw_tq_avg, orig_tq_avg); - reselect: batadv_gw_reselect(bat_priv); out: if (curr_gw_orig) batadv_orig_node_put(curr_gw_orig); - if (router_gw) - batadv_neigh_node_put(router_gw); - if (router_orig) - batadv_neigh_node_put(router_orig); - if (router_gw_tq) - batadv_neigh_ifinfo_put(router_gw_tq); - if (router_orig_tq) - batadv_neigh_ifinfo_put(router_orig_tq); } /** @@ -585,80 +473,31 @@ void batadv_gw_node_free(struct batadv_priv *bat_priv) spin_unlock_bh(&bat_priv->gw.list_lock); } -/* fails if orig_node has no router */ -static int batadv_write_buffer_text(struct batadv_priv *bat_priv, - struct seq_file *seq, - const struct batadv_gw_node *gw_node) -{ - struct batadv_gw_node *curr_gw; - struct batadv_neigh_node *router; - struct batadv_neigh_ifinfo *router_ifinfo = NULL; - int ret = -1; - - router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT); - if (!router) - goto out; - - router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT); - if (!router_ifinfo) - goto out; - - curr_gw = batadv_gw_get_selected_gw_node(bat_priv); - - seq_printf(seq, "%s %pM (%3i) %pM [%10s]: %u.%u/%u.%u MBit\n", - (curr_gw == gw_node ? "=>" : " "), - gw_node->orig_node->orig, - router_ifinfo->bat_iv.tq_avg, router->addr, - router->if_incoming->net_dev->name, - gw_node->bandwidth_down / 10, - gw_node->bandwidth_down % 10, - gw_node->bandwidth_up / 10, - gw_node->bandwidth_up % 10); - ret = seq_has_overflowed(seq) ? -1 : 0; - - if (curr_gw) - batadv_gw_node_put(curr_gw); -out: - if (router_ifinfo) - batadv_neigh_ifinfo_put(router_ifinfo); - if (router) - batadv_neigh_node_put(router); - return ret; -} - int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset) { struct net_device *net_dev = (struct net_device *)seq->private; struct batadv_priv *bat_priv = netdev_priv(net_dev); struct batadv_hard_iface *primary_if; - struct batadv_gw_node *gw_node; - int gw_count = 0; primary_if = batadv_seq_print_text_primary_if_get(seq); if (!primary_if) - goto out; + return 0; - seq_printf(seq, - " Gateway (#/255) Nexthop [outgoingIF]: advertised uplink bandwidth ... [B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s)]\n", + seq_printf(seq, "[B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s %s)]\n", BATADV_SOURCE_VERSION, primary_if->net_dev->name, - primary_if->net_dev->dev_addr, net_dev->name); + primary_if->net_dev->dev_addr, net_dev->name, + bat_priv->algo_ops->name); - rcu_read_lock(); - hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) { - /* fails if orig_node has no router */ - if (batadv_write_buffer_text(bat_priv, seq, gw_node) < 0) - continue; + batadv_hardif_put(primary_if); - gw_count++; + if (!bat_priv->algo_ops->gw.print) { + seq_puts(seq, + "No printing function for this routing protocol\n"); + return 0; } - rcu_read_unlock(); - if (gw_count == 0) - seq_puts(seq, "No gateways in range ...\n"); + bat_priv->algo_ops->gw.print(bat_priv, seq); -out: - if (primary_if) - batadv_hardif_put(primary_if); return 0; } diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h index 582dd8c413c8..4c9edde48914 100644 --- a/net/batman-adv/gateway_client.h +++ b/net/batman-adv/gateway_client.h @@ -39,6 +39,9 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv, void batadv_gw_node_delete(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node); void batadv_gw_node_free(struct batadv_priv *bat_priv); +void batadv_gw_node_put(struct batadv_gw_node *gw_node); +struct batadv_gw_node * +batadv_gw_get_selected_gw_node(struct batadv_priv *bat_priv); int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset); bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, struct sk_buff *skb); enum batadv_dhcp_recipient diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c index d7bc6a87bcc9..21184810d89f 100644 --- a/net/batman-adv/gateway_common.c +++ b/net/batman-adv/gateway_common.c @@ -241,10 +241,9 @@ static void batadv_gw_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, batadv_gw_node_update(bat_priv, orig, &gateway); - /* restart gateway selection if fast or late switching was enabled */ + /* restart gateway selection */ if ((gateway.bandwidth_down != 0) && - (atomic_read(&bat_priv->gw.mode) == BATADV_GW_MODE_CLIENT) && - (atomic_read(&bat_priv->gw.sel_class) > 2)) + (atomic_read(&bat_priv->gw.mode) == BATADV_GW_MODE_CLIENT)) batadv_gw_check_election(bat_priv, orig); } diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index deaadba61a2c..54710c781ca7 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -1453,11 +1453,22 @@ struct batadv_algo_orig_ops { * struct batadv_algo_gw_ops - mesh algorithm callbacks (GW specific) * @store_sel_class: parse and stores a new GW selection class (optional) * @show_sel_class: prints the current GW selection class (optional) + * @get_best_gw_node: select the best GW from the list of available nodes + * (optional) + * @is_eligible: check if a newly discovered GW is a potential candidate for + * the election as best GW (optional) + * @print: print the gateway table (optional) */ struct batadv_algo_gw_ops { ssize_t (*store_sel_class)(struct batadv_priv *bat_priv, char *buff, size_t count); ssize_t (*show_sel_class)(struct batadv_priv *bat_priv, char *buff); + struct batadv_gw_node *(*get_best_gw_node) + (struct batadv_priv *bat_priv); + bool (*is_eligible)(struct batadv_priv *bat_priv, + struct batadv_orig_node *curr_gw_orig, + struct batadv_orig_node *orig_node); + void (*print)(struct batadv_priv *bat_priv, struct seq_file *seq); }; /** From 50164d8f500f1cd211178f7b3d062987d68fe013 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Sun, 3 Jul 2016 12:46:34 +0200 Subject: [PATCH 0038/1715] batman-adv: B.A.T.M.A.N. V - implement GW selection logic Since the GW selection logic has been made routing protocol specific it is now possible for B.A.T.M.A.N V to have its own mechanism by providing the API implementation. Implement the GW specific API in the B.A.T.M.A.N. V protocol in order to provide a working GW selection mechanism. Signed-off-by: Antonio Quartulli Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Simon Wunderlich --- net/batman-adv/bat_v.c | 223 +++++++++++++++++++++++++++++++- net/batman-adv/gateway_client.c | 5 +- net/batman-adv/gateway_client.h | 2 + 3 files changed, 226 insertions(+), 4 deletions(-) diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index 90fd5ee877d1..1d777b171366 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -40,6 +41,7 @@ #include "gateway_common.h" #include "hard-interface.h" #include "hash.h" +#include "log.h" #include "originator.h" #include "packet.h" @@ -350,6 +352,213 @@ static ssize_t batadv_v_show_sel_class(struct batadv_priv *bat_priv, char *buff) return sprintf(buff, "%u.%u MBit\n", class / 10, class % 10); } +/** + * batadv_v_gw_throughput_get - retrieve the GW-bandwidth for a given GW + * @gw_node: the GW to retrieve the metric for + * @bw: the pointer where the metric will be stored. The metric is computed as + * the minimum between the GW advertised throughput and the path throughput to + * it in the mesh + * + * Return: 0 on success, -1 on failure + */ +static int batadv_v_gw_throughput_get(struct batadv_gw_node *gw_node, u32 *bw) +{ + struct batadv_neigh_ifinfo *router_ifinfo = NULL; + struct batadv_orig_node *orig_node; + struct batadv_neigh_node *router; + int ret = -1; + + orig_node = gw_node->orig_node; + router = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT); + if (!router) + goto out; + + router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT); + if (!router_ifinfo) + goto out; + + /* the GW metric is computed as the minimum between the path throughput + * to reach the GW itself and the advertised bandwidth. + * This gives us an approximation of the effective throughput that the + * client can expect via this particular GW node + */ + *bw = router_ifinfo->bat_v.throughput; + *bw = min_t(u32, *bw, gw_node->bandwidth_down); + + ret = 0; +out: + if (router) + batadv_neigh_node_put(router); + if (router_ifinfo) + batadv_neigh_ifinfo_put(router_ifinfo); + + return ret; +} + +/** + * batadv_v_gw_get_best_gw_node - retrieve the best GW node + * @bat_priv: the bat priv with all the soft interface information + * + * Return: the GW node having the best GW-metric, NULL if no GW is known + */ +static struct batadv_gw_node * +batadv_v_gw_get_best_gw_node(struct batadv_priv *bat_priv) +{ + struct batadv_gw_node *gw_node, *curr_gw = NULL; + u32 max_bw = 0, bw; + + rcu_read_lock(); + hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) { + if (!kref_get_unless_zero(&gw_node->refcount)) + continue; + + if (batadv_v_gw_throughput_get(gw_node, &bw) < 0) + goto next; + + if (curr_gw && (bw <= max_bw)) + goto next; + + if (curr_gw) + batadv_gw_node_put(curr_gw); + + curr_gw = gw_node; + kref_get(&curr_gw->refcount); + max_bw = bw; + +next: + batadv_gw_node_put(gw_node); + } + rcu_read_unlock(); + + return curr_gw; +} + +/** + * batadv_v_gw_is_eligible - check if a originator would be selected as GW + * @bat_priv: the bat priv with all the soft interface information + * @curr_gw_orig: originator representing the currently selected GW + * @orig_node: the originator representing the new candidate + * + * Return: true if orig_node can be selected as current GW, false otherwise + */ +static bool batadv_v_gw_is_eligible(struct batadv_priv *bat_priv, + struct batadv_orig_node *curr_gw_orig, + struct batadv_orig_node *orig_node) +{ + struct batadv_gw_node *curr_gw = NULL, *orig_gw = NULL; + u32 gw_throughput, orig_throughput, threshold; + bool ret = false; + + threshold = atomic_read(&bat_priv->gw.sel_class); + + curr_gw = batadv_gw_node_get(bat_priv, curr_gw_orig); + if (!curr_gw) { + ret = true; + goto out; + } + + if (batadv_v_gw_throughput_get(curr_gw, &gw_throughput) < 0) { + ret = true; + goto out; + } + + orig_gw = batadv_gw_node_get(bat_priv, orig_node); + if (!orig_node) + goto out; + + if (batadv_v_gw_throughput_get(orig_gw, &orig_throughput) < 0) + goto out; + + if (orig_throughput < gw_throughput) + goto out; + + if ((orig_throughput - gw_throughput) < threshold) + goto out; + + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, + "Restarting gateway selection: better gateway found (throughput curr: %u, throughput new: %u)\n", + gw_throughput, orig_throughput); + + ret = true; +out: + if (curr_gw) + batadv_gw_node_put(curr_gw); + if (orig_gw) + batadv_gw_node_put(orig_gw); + + return ret; +} + +/* fails if orig_node has no router */ +static int batadv_v_gw_write_buffer_text(struct batadv_priv *bat_priv, + struct seq_file *seq, + const struct batadv_gw_node *gw_node) +{ + struct batadv_gw_node *curr_gw; + struct batadv_neigh_node *router; + struct batadv_neigh_ifinfo *router_ifinfo = NULL; + int ret = -1; + + router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT); + if (!router) + goto out; + + router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT); + if (!router_ifinfo) + goto out; + + curr_gw = batadv_gw_get_selected_gw_node(bat_priv); + + seq_printf(seq, "%s %pM (%9u.%1u) %pM [%10s]: %u.%u/%u.%u MBit\n", + (curr_gw == gw_node ? "=>" : " "), + gw_node->orig_node->orig, + router_ifinfo->bat_v.throughput / 10, + router_ifinfo->bat_v.throughput % 10, router->addr, + router->if_incoming->net_dev->name, + gw_node->bandwidth_down / 10, + gw_node->bandwidth_down % 10, + gw_node->bandwidth_up / 10, + gw_node->bandwidth_up % 10); + ret = seq_has_overflowed(seq) ? -1 : 0; + + if (curr_gw) + batadv_gw_node_put(curr_gw); +out: + if (router_ifinfo) + batadv_neigh_ifinfo_put(router_ifinfo); + if (router) + batadv_neigh_node_put(router); + return ret; +} + +/** + * batadv_v_gw_print - print the gateway list + * @bat_priv: the bat priv with all the soft interface information + * @seq: gateway table seq_file struct + */ +static void batadv_v_gw_print(struct batadv_priv *bat_priv, + struct seq_file *seq) +{ + struct batadv_gw_node *gw_node; + int gw_count = 0; + + seq_puts(seq, + " Gateway ( throughput) Nexthop [outgoingIF]: advertised uplink bandwidth\n"); + + rcu_read_lock(); + hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) { + /* fails if orig_node has no router */ + if (batadv_v_gw_write_buffer_text(bat_priv, seq, gw_node) < 0) + continue; + + gw_count++; + } + rcu_read_unlock(); + + if (gw_count == 0) + seq_puts(seq, "No gateways in range ...\n"); +} + static struct batadv_algo_ops batadv_batman_v __read_mostly = { .name = "BATMAN_V", .iface = { @@ -371,6 +580,9 @@ static struct batadv_algo_ops batadv_batman_v __read_mostly = { .gw = { .store_sel_class = batadv_v_store_sel_class, .show_sel_class = batadv_v_show_sel_class, + .get_best_gw_node = batadv_v_gw_get_best_gw_node, + .is_eligible = batadv_v_gw_is_eligible, + .print = batadv_v_gw_print, }, }; @@ -397,7 +609,16 @@ void batadv_v_hardif_init(struct batadv_hard_iface *hard_iface) */ int batadv_v_mesh_init(struct batadv_priv *bat_priv) { - return batadv_v_ogm_init(bat_priv); + int ret = 0; + + ret = batadv_v_ogm_init(bat_priv); + if (ret < 0) + return ret; + + /* set default throughput difference threshold to 5Mbps */ + atomic_set(&bat_priv->gw.sel_class, 50); + + return 0; } /** diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index ec363f39b6a9..a77a17939f1e 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -360,9 +360,8 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv, * * Return: gateway node if found or NULL otherwise. */ -static struct batadv_gw_node * -batadv_gw_node_get(struct batadv_priv *bat_priv, - struct batadv_orig_node *orig_node) +struct batadv_gw_node *batadv_gw_node_get(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig_node) { struct batadv_gw_node *gw_node_tmp, *gw_node = NULL; diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h index 4c9edde48914..6b40432aa1ed 100644 --- a/net/batman-adv/gateway_client.h +++ b/net/batman-adv/gateway_client.h @@ -47,5 +47,7 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, struct sk_buff *skb); enum batadv_dhcp_recipient batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len, u8 *chaddr); +struct batadv_gw_node *batadv_gw_node_get(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig_node); #endif /* _NET_BATMAN_ADV_GATEWAY_CLIENT_H_ */ From a8d8d1de414ee4c393ba43dbbf01eaf21a4f47bd Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Sun, 3 Jul 2016 12:46:35 +0200 Subject: [PATCH 0039/1715] batman-adv: disable sysfs knobs when GW-mode is not implemented Now that the GW-mode code is algorithm specific, batman-adv expects the routing algorithm to implement some APIs to make it work. However, such APIs are not mandatory, therefore we might have algorithms not providing them. In this case all the sysfs knobs related to GW-mode should be deactivated to make sure that settings injected by the user for this feature are rejected. Signed-off-by: Antonio Quartulli Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Simon Wunderlich --- net/batman-adv/sysfs.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index e78bd7f2f276..02d96f224c60 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -429,6 +429,13 @@ static ssize_t batadv_show_gw_mode(struct kobject *kobj, struct attribute *attr, struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); int bytes_written; + /* GW mode is not available if the routing algorithm in use does not + * implement the GW API + */ + if (!bat_priv->algo_ops->gw.get_best_gw_node || + !bat_priv->algo_ops->gw.is_eligible) + return -ENOENT; + switch (atomic_read(&bat_priv->gw.mode)) { case BATADV_GW_MODE_CLIENT: bytes_written = sprintf(buff, "%s\n", @@ -456,6 +463,13 @@ static ssize_t batadv_store_gw_mode(struct kobject *kobj, char *curr_gw_mode_str; int gw_mode_tmp = -1; + /* toggling GW mode is allowed only if the routing algorithm in use + * provides the GW API + */ + if (!bat_priv->algo_ops->gw.get_best_gw_node || + !bat_priv->algo_ops->gw.is_eligible) + return -EINVAL; + if (buff[count - 1] == '\n') buff[count - 1] = '\0'; @@ -520,6 +534,13 @@ static ssize_t batadv_show_gw_sel_class(struct kobject *kobj, { struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); + /* GW selection class is not available if the routing algorithm in use + * does not implement the GW API + */ + if (!bat_priv->algo_ops->gw.get_best_gw_node || + !bat_priv->algo_ops->gw.is_eligible) + return -ENOENT; + if (bat_priv->algo_ops->gw.show_sel_class) return bat_priv->algo_ops->gw.show_sel_class(bat_priv, buff); @@ -532,6 +553,13 @@ static ssize_t batadv_store_gw_sel_class(struct kobject *kobj, { struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); + /* setting the GW selection class is allowed only if the routing + * algorithm in use implements the GW API + */ + if (!bat_priv->algo_ops->gw.get_best_gw_node || + !bat_priv->algo_ops->gw.is_eligible) + return -EINVAL; + if (buff[count - 1] == '\n') buff[count - 1] = '\0'; From 57b125029c0483ae53abf6d5d26c3eaa1b521573 Mon Sep 17 00:00:00 2001 From: Markus Pargmann Date: Sun, 3 Jul 2016 11:07:14 +0200 Subject: [PATCH 0040/1715] batman-adv: iv_ogm, Reduce code duplication The difference between tq1 and tq2 are calculated the same way in two separate functions. This patch moves the common code to a separate function 'batadv_iv_ogm_neigh_diff' which handles everything necessary. The other two functions can then handle errors and use the difference directly. Signed-off-by: Markus Pargmann [sven@narfation.org: rebased on current version, initialize return variable in batadv_iv_ogm_neigh_diff, add kerneldoc, convert to bool return type] Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Simon Wunderlich --- net/batman-adv/bat_iv_ogm.c | 95 ++++++++++++++++++++++--------------- 1 file changed, 56 insertions(+), 39 deletions(-) diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index d04874fc034d..57e0af9b39e4 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -2018,6 +2018,51 @@ static void batadv_iv_neigh_print(struct batadv_priv *bat_priv, seq_puts(seq, "No batman nodes in range ...\n"); } +/** + * batadv_iv_ogm_neigh_diff - calculate tq difference of two neighbors + * @neigh1: the first neighbor object of the comparison + * @if_outgoing1: outgoing interface for the first neighbor + * @neigh2: the second neighbor object of the comparison + * @if_outgoing2: outgoing interface for the second neighbor + * @diff: pointer to integer receiving the calculated difference + * + * The content of *@diff is only valid when this function returns true. + * It is less, equal to or greater than 0 if the metric via neigh1 is lower, + * the same as or higher than the metric via neigh2 + * + * Return: true when the difference could be calculated, false otherwise + */ +static bool batadv_iv_ogm_neigh_diff(struct batadv_neigh_node *neigh1, + struct batadv_hard_iface *if_outgoing1, + struct batadv_neigh_node *neigh2, + struct batadv_hard_iface *if_outgoing2, + int *diff) +{ + struct batadv_neigh_ifinfo *neigh1_ifinfo, *neigh2_ifinfo; + u8 tq1, tq2; + bool ret = true; + + neigh1_ifinfo = batadv_neigh_ifinfo_get(neigh1, if_outgoing1); + neigh2_ifinfo = batadv_neigh_ifinfo_get(neigh2, if_outgoing2); + + if (!neigh1_ifinfo || !neigh2_ifinfo) { + ret = false; + goto out; + } + + tq1 = neigh1_ifinfo->bat_iv.tq_avg; + tq2 = neigh2_ifinfo->bat_iv.tq_avg; + *diff = (int)tq1 - (int)tq2; + +out: + if (neigh1_ifinfo) + batadv_neigh_ifinfo_put(neigh1_ifinfo); + if (neigh2_ifinfo) + batadv_neigh_ifinfo_put(neigh2_ifinfo); + + return ret; +} + /** * batadv_iv_ogm_neigh_cmp - compare the metrics of two neighbors * @neigh1: the first neighbor object of the comparison @@ -2033,27 +2078,13 @@ static int batadv_iv_ogm_neigh_cmp(struct batadv_neigh_node *neigh1, struct batadv_neigh_node *neigh2, struct batadv_hard_iface *if_outgoing2) { - struct batadv_neigh_ifinfo *neigh1_ifinfo, *neigh2_ifinfo; - u8 tq1, tq2; + bool ret; int diff; - neigh1_ifinfo = batadv_neigh_ifinfo_get(neigh1, if_outgoing1); - neigh2_ifinfo = batadv_neigh_ifinfo_get(neigh2, if_outgoing2); - - if (!neigh1_ifinfo || !neigh2_ifinfo) { - diff = 0; - goto out; - } - - tq1 = neigh1_ifinfo->bat_iv.tq_avg; - tq2 = neigh2_ifinfo->bat_iv.tq_avg; - diff = tq1 - tq2; - -out: - if (neigh1_ifinfo) - batadv_neigh_ifinfo_put(neigh1_ifinfo); - if (neigh2_ifinfo) - batadv_neigh_ifinfo_put(neigh2_ifinfo); + ret = batadv_iv_ogm_neigh_diff(neigh1, if_outgoing1, neigh2, + if_outgoing2, &diff); + if (!ret) + return 0; return diff; } @@ -2075,29 +2106,15 @@ batadv_iv_ogm_neigh_is_sob(struct batadv_neigh_node *neigh1, struct batadv_neigh_node *neigh2, struct batadv_hard_iface *if_outgoing2) { - struct batadv_neigh_ifinfo *neigh1_ifinfo, *neigh2_ifinfo; - u8 tq1, tq2; bool ret; + int diff; - neigh1_ifinfo = batadv_neigh_ifinfo_get(neigh1, if_outgoing1); - neigh2_ifinfo = batadv_neigh_ifinfo_get(neigh2, if_outgoing2); - - /* we can't say that the metric is better */ - if (!neigh1_ifinfo || !neigh2_ifinfo) { - ret = false; - goto out; - } - - tq1 = neigh1_ifinfo->bat_iv.tq_avg; - tq2 = neigh2_ifinfo->bat_iv.tq_avg; - ret = (tq1 - tq2) > -BATADV_TQ_SIMILARITY_THRESHOLD; - -out: - if (neigh1_ifinfo) - batadv_neigh_ifinfo_put(neigh1_ifinfo); - if (neigh2_ifinfo) - batadv_neigh_ifinfo_put(neigh2_ifinfo); + ret = batadv_iv_ogm_neigh_diff(neigh1, if_outgoing1, neigh2, + if_outgoing2, &diff); + if (!ret) + return false; + ret = diff > -BATADV_TQ_SIMILARITY_THRESHOLD; return ret; } From 4fd261bf586b0fc932d5fd20d9b1648547b16eca Mon Sep 17 00:00:00 2001 From: kbuild test robot Date: Wed, 6 Jul 2016 10:49:29 +0800 Subject: [PATCH 0041/1715] batman-adv: fix boolreturn.cocci warnings net/batman-adv/bridge_loop_avoidance.c:1105:9-10: WARNING: return of 0/1 in function 'batadv_bla_process_claim' with return type bool Return statements in functions returning bool should use true/false instead of 1/0. Generated by: scripts/coccinelle/misc/boolreturn.cocci Signed-off-by: Fengguang Wu Signed-off-by: Marek Lindner Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/bridge_loop_avoidance.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index ad2ffe16d29f..c75ef648f0fd 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -1148,7 +1148,7 @@ static bool batadv_bla_process_claim(struct batadv_priv *bat_priv, /* Let the loopdetect frames on the mesh in any case. */ if (bla_dst->type == BATADV_CLAIM_TYPE_LOOPDETECT) - return 0; + return false; /* check if it is a claim frame. */ ret = batadv_check_claim_group(bat_priv, primary_if, hw_src, hw_dst, From a65e5481315e0754a20f58aa374423610a311f33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Mon, 20 Jun 2016 21:39:54 +0200 Subject: [PATCH 0042/1715] batman-adv: Introduce forward packet creation helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch abstracts the forward packet creation into the new function batadv_forw_packet_alloc(). The queue counting and interface reference counters are now handled internally within batadv_forw_packet_alloc() and its batadv_forw_packet_free() counterpart. This should reduce the risk of having reference/queue counting bugs again and should increase code readibility. Signed-off-by: Linus Lüssing Signed-off-by: Marek Lindner Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/bat_iv_ogm.c | 38 +++--------- net/batman-adv/send.c | 111 ++++++++++++++++++++++++++---------- net/batman-adv/send.h | 6 ++ net/batman-adv/types.h | 2 + 4 files changed, 98 insertions(+), 59 deletions(-) diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 57e0af9b39e4..a40cdf273625 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -675,19 +675,12 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, struct batadv_forw_packet *forw_packet_aggr; unsigned char *skb_buff; unsigned int skb_size; + atomic_t *queue_left = own_packet ? NULL : &bat_priv->batman_queue_left; - /* own packet should always be scheduled */ - if (!own_packet) { - if (!batadv_atomic_dec_not_zero(&bat_priv->batman_queue_left)) { - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "batman packet queue full\n"); - return; - } - } - - forw_packet_aggr = kmalloc(sizeof(*forw_packet_aggr), GFP_ATOMIC); + forw_packet_aggr = batadv_forw_packet_alloc(if_incoming, if_outgoing, + queue_left, bat_priv); if (!forw_packet_aggr) - goto out_nomem; + return; if (atomic_read(&bat_priv->aggregated_ogms) && packet_len < BATADV_MAX_AGGREGATION_BYTES) @@ -698,8 +691,11 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, skb_size += ETH_HLEN; forw_packet_aggr->skb = netdev_alloc_skb_ip_align(NULL, skb_size); - if (!forw_packet_aggr->skb) - goto out_free_forw_packet; + if (!forw_packet_aggr->skb) { + batadv_forw_packet_free(forw_packet_aggr); + return; + } + forw_packet_aggr->skb->priority = TC_PRIO_CONTROL; skb_reserve(forw_packet_aggr->skb, ETH_HLEN); @@ -707,12 +703,7 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, forw_packet_aggr->packet_len = packet_len; memcpy(skb_buff, packet_buff, packet_len); - kref_get(&if_incoming->refcount); - kref_get(&if_outgoing->refcount); forw_packet_aggr->own = own_packet; - forw_packet_aggr->if_incoming = if_incoming; - forw_packet_aggr->if_outgoing = if_outgoing; - forw_packet_aggr->num_packets = 0; forw_packet_aggr->direct_link_flags = BATADV_NO_FLAGS; forw_packet_aggr->send_time = send_time; @@ -731,13 +722,6 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, queue_delayed_work(batadv_event_workqueue, &forw_packet_aggr->delayed_work, send_time - jiffies); - - return; -out_free_forw_packet: - kfree(forw_packet_aggr); -out_nomem: - if (!own_packet) - atomic_inc(&bat_priv->batman_queue_left); } /* aggregate a new packet into the existing ogm packet */ @@ -1820,10 +1804,6 @@ static void batadv_iv_send_outstanding_bat_ogm_packet(struct work_struct *work) batadv_iv_ogm_schedule(forw_packet->if_incoming); out: - /* don't count own packet */ - if (!forw_packet->own) - atomic_inc(&bat_priv->batman_queue_left); - batadv_forw_packet_free(forw_packet); } diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 6191159484df..33d8bd14140c 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -439,6 +439,13 @@ int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb, BATADV_P_DATA, orig_node, vid); } +/** + * batadv_forw_packet_free - free a forwarding packet + * @forw_packet: The packet to free + * + * This frees a forwarding packet and releases any resources it might + * have claimed. + */ void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet) { kfree_skb(forw_packet->skb); @@ -446,9 +453,73 @@ void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet) batadv_hardif_put(forw_packet->if_incoming); if (forw_packet->if_outgoing) batadv_hardif_put(forw_packet->if_outgoing); + if (forw_packet->queue_left) + atomic_inc(forw_packet->queue_left); kfree(forw_packet); } +/** + * batadv_forw_packet_alloc - allocate a forwarding packet + * @if_incoming: The (optional) if_incoming to be grabbed + * @if_outgoing: The (optional) if_outgoing to be grabbed + * @queue_left: The (optional) queue counter to decrease + * @bat_priv: The bat_priv for the mesh of this forw_packet + * + * Allocates a forwarding packet and tries to get a reference to the + * (optional) if_incoming, if_outgoing and queue_left. If queue_left + * is NULL then bat_priv is optional, too. + * + * Return: An allocated forwarding packet on success, NULL otherwise. + */ +struct batadv_forw_packet * +batadv_forw_packet_alloc(struct batadv_hard_iface *if_incoming, + struct batadv_hard_iface *if_outgoing, + atomic_t *queue_left, + struct batadv_priv *bat_priv) +{ + struct batadv_forw_packet *forw_packet; + const char *qname; + + if (queue_left && !batadv_atomic_dec_not_zero(queue_left)) { + qname = "unknown"; + + if (queue_left == &bat_priv->bcast_queue_left) + qname = "bcast"; + + if (queue_left == &bat_priv->batman_queue_left) + qname = "batman"; + + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, + "%s queue is full\n", qname); + + return NULL; + } + + forw_packet = kmalloc(sizeof(*forw_packet), GFP_ATOMIC); + if (!forw_packet) + goto err; + + if (if_incoming) + kref_get(&if_incoming->refcount); + + if (if_outgoing) + kref_get(&if_outgoing->refcount); + + forw_packet->skb = NULL; + forw_packet->queue_left = queue_left; + forw_packet->if_incoming = if_incoming; + forw_packet->if_outgoing = if_outgoing; + forw_packet->num_packets = 0; + + return forw_packet; + +err: + if (queue_left) + atomic_inc(queue_left); + + return NULL; +} + static void _batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, struct batadv_forw_packet *forw_packet, @@ -487,24 +558,20 @@ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, struct batadv_bcast_packet *bcast_packet; struct sk_buff *newskb; - if (!batadv_atomic_dec_not_zero(&bat_priv->bcast_queue_left)) { - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "bcast packet queue full\n"); - goto out; - } - primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if) - goto out_and_inc; - - forw_packet = kmalloc(sizeof(*forw_packet), GFP_ATOMIC); + goto err; + forw_packet = batadv_forw_packet_alloc(primary_if, NULL, + &bat_priv->bcast_queue_left, + bat_priv); + batadv_hardif_put(primary_if); if (!forw_packet) - goto out_and_inc; + goto err; newskb = skb_copy(skb, GFP_ATOMIC); if (!newskb) - goto packet_free; + goto err_packet_free; /* as we have a copy now, it is safe to decrease the TTL */ bcast_packet = (struct batadv_bcast_packet *)newskb->data; @@ -513,11 +580,6 @@ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, skb_reset_mac_header(newskb); forw_packet->skb = newskb; - forw_packet->if_incoming = primary_if; - forw_packet->if_outgoing = NULL; - - /* how often did we send the bcast packet ? */ - forw_packet->num_packets = 0; INIT_DELAYED_WORK(&forw_packet->delayed_work, batadv_send_outstanding_bcast_packet); @@ -525,13 +587,9 @@ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, _batadv_add_bcast_packet_to_list(bat_priv, forw_packet, delay); return NETDEV_TX_OK; -packet_free: - kfree(forw_packet); -out_and_inc: - atomic_inc(&bat_priv->bcast_queue_left); -out: - if (primary_if) - batadv_hardif_put(primary_if); +err_packet_free: + batadv_forw_packet_free(forw_packet); +err: return NETDEV_TX_BUSY; } @@ -592,7 +650,6 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work) out: batadv_forw_packet_free(forw_packet); - atomic_inc(&bat_priv->bcast_queue_left); } void @@ -633,9 +690,6 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, if (pending) { hlist_del(&forw_packet->list); - if (!forw_packet->own) - atomic_inc(&bat_priv->bcast_queue_left); - batadv_forw_packet_free(forw_packet); } } @@ -663,9 +717,6 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, if (pending) { hlist_del(&forw_packet->list); - if (!forw_packet->own) - atomic_inc(&bat_priv->batman_queue_left); - batadv_forw_packet_free(forw_packet); } } diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h index 7cecb7563b45..999f78683d9e 100644 --- a/net/batman-adv/send.h +++ b/net/batman-adv/send.h @@ -28,6 +28,12 @@ struct sk_buff; void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet); +struct batadv_forw_packet * +batadv_forw_packet_alloc(struct batadv_hard_iface *if_incoming, + struct batadv_hard_iface *if_outgoing, + atomic_t *queue_left, + struct batadv_priv *bat_priv); + int batadv_send_skb_to_orig(struct sk_buff *skb, struct batadv_orig_node *orig_node, struct batadv_hard_iface *recv_if); diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 54710c781ca7..72806a3c40df 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -1375,6 +1375,7 @@ struct batadv_skb_cb { * locally generated packet * @if_outgoing: packet where the packet should be sent to, or NULL if * unspecified + * @queue_left: The queue (counter) this packet was applied to */ struct batadv_forw_packet { struct hlist_node list; @@ -1387,6 +1388,7 @@ struct batadv_forw_packet { struct delayed_work delayed_work; struct batadv_hard_iface *if_incoming; struct batadv_hard_iface *if_outgoing; + atomic_t *queue_left; }; /** From 86452f81d200d4d6ad489ef84311030eff84dd84 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 25 Jun 2016 16:44:06 +0200 Subject: [PATCH 0043/1715] batman-adv: use kmem_cache for translation table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The translation table (global, local) is usually the part of batman-adv which has the most dynamical allocated objects. Most of them (tt_local_entry, tt_global_entry, tt_orig_list_entry, tt_change_node, tt_req_node, tt_roam_node) are equally sized. So it makes sense to have them allocated from a kmem_cache for each type. This approach allowed a small wireless router (TP-Link TL-841NDv8; SLUB allocator) to store 34% more translation table entries compared to the current implementation. [1] https://open-mesh.org/projects/batman-adv/wiki/Kmalloc-kmem-cache-tests Reported-by: Linus Lüssing Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Simon Wunderlich --- net/batman-adv/main.c | 16 ++- net/batman-adv/translation-table.c | 169 ++++++++++++++++++++++++++--- net/batman-adv/translation-table.h | 3 + 3 files changed, 169 insertions(+), 19 deletions(-) diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index f61479b5a99d..ef07e5b34415 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -82,6 +82,12 @@ static void batadv_recv_handler_init(void); static int __init batadv_init(void) { + int ret; + + ret = batadv_tt_cache_init(); + if (ret < 0) + return ret; + INIT_LIST_HEAD(&batadv_hardif_list); batadv_algo_init(); @@ -93,9 +99,8 @@ static int __init batadv_init(void) batadv_tp_meter_init(); batadv_event_workqueue = create_singlethread_workqueue("bat_events"); - if (!batadv_event_workqueue) - return -ENOMEM; + goto err_create_wq; batadv_socket_init(); batadv_debugfs_init(); @@ -108,6 +113,11 @@ static int __init batadv_init(void) BATADV_SOURCE_VERSION, BATADV_COMPAT_VERSION); return 0; + +err_create_wq: + batadv_tt_cache_destroy(); + + return -ENOMEM; } static void __exit batadv_exit(void) @@ -123,6 +133,8 @@ static void __exit batadv_exit(void) batadv_event_workqueue = NULL; rcu_barrier(); + + batadv_tt_cache_destroy(); } int batadv_mesh_init(struct net_device *soft_iface) diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 7e6df7a4964a..af2bfef6dca8 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -22,12 +22,14 @@ #include #include #include +#include #include #include #include #include #include #include +#include #include #include #include @@ -54,6 +56,13 @@ #include "soft-interface.h" #include "tvlv.h" +static struct kmem_cache *batadv_tl_cache __read_mostly; +static struct kmem_cache *batadv_tg_cache __read_mostly; +static struct kmem_cache *batadv_tt_orig_cache __read_mostly; +static struct kmem_cache *batadv_tt_change_cache __read_mostly; +static struct kmem_cache *batadv_tt_req_cache __read_mostly; +static struct kmem_cache *batadv_tt_roam_cache __read_mostly; + /* hash class keys */ static struct lock_class_key batadv_tt_local_hash_lock_class_key; static struct lock_class_key batadv_tt_global_hash_lock_class_key; @@ -204,6 +213,20 @@ batadv_tt_global_hash_find(struct batadv_priv *bat_priv, const u8 *addr, return tt_global_entry; } +/** + * batadv_tt_local_entry_free_rcu - free the tt_local_entry + * @rcu: rcu pointer of the tt_local_entry + */ +static void batadv_tt_local_entry_free_rcu(struct rcu_head *rcu) +{ + struct batadv_tt_local_entry *tt_local_entry; + + tt_local_entry = container_of(rcu, struct batadv_tt_local_entry, + common.rcu); + + kmem_cache_free(batadv_tl_cache, tt_local_entry); +} + /** * batadv_tt_local_entry_release - release tt_local_entry from lists and queue * for free after rcu grace period @@ -218,7 +241,7 @@ static void batadv_tt_local_entry_release(struct kref *ref) batadv_softif_vlan_put(tt_local_entry->vlan); - kfree_rcu(tt_local_entry, common.rcu); + call_rcu(&tt_local_entry->common.rcu, batadv_tt_local_entry_free_rcu); } /** @@ -233,6 +256,20 @@ batadv_tt_local_entry_put(struct batadv_tt_local_entry *tt_local_entry) batadv_tt_local_entry_release); } +/** + * batadv_tt_global_entry_free_rcu - free the tt_global_entry + * @rcu: rcu pointer of the tt_global_entry + */ +static void batadv_tt_global_entry_free_rcu(struct rcu_head *rcu) +{ + struct batadv_tt_global_entry *tt_global_entry; + + tt_global_entry = container_of(rcu, struct batadv_tt_global_entry, + common.rcu); + + kmem_cache_free(batadv_tg_cache, tt_global_entry); +} + /** * batadv_tt_global_entry_release - release tt_global_entry from lists and queue * for free after rcu grace period @@ -246,7 +283,8 @@ static void batadv_tt_global_entry_release(struct kref *ref) common.refcount); batadv_tt_global_del_orig_list(tt_global_entry); - kfree_rcu(tt_global_entry, common.rcu); + + call_rcu(&tt_global_entry->common.rcu, batadv_tt_global_entry_free_rcu); } /** @@ -383,6 +421,19 @@ static void batadv_tt_global_size_dec(struct batadv_orig_node *orig_node, batadv_tt_global_size_mod(orig_node, vid, -1); } +/** + * batadv_tt_orig_list_entry_free_rcu - free the orig_entry + * @rcu: rcu pointer of the orig_entry + */ +static void batadv_tt_orig_list_entry_free_rcu(struct rcu_head *rcu) +{ + struct batadv_tt_orig_list_entry *orig_entry; + + orig_entry = container_of(rcu, struct batadv_tt_orig_list_entry, rcu); + + kmem_cache_free(batadv_tt_orig_cache, orig_entry); +} + /** * batadv_tt_orig_list_entry_release - release tt orig entry from lists and * queue for free after rcu grace period @@ -396,7 +447,7 @@ static void batadv_tt_orig_list_entry_release(struct kref *ref) refcount); batadv_orig_node_put(orig_entry->orig_node); - kfree_rcu(orig_entry, rcu); + call_rcu(&orig_entry->rcu, batadv_tt_orig_list_entry_free_rcu); } /** @@ -426,7 +477,7 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv, bool event_removed = false; bool del_op_requested, del_op_entry; - tt_change_node = kmalloc(sizeof(*tt_change_node), GFP_ATOMIC); + tt_change_node = kmem_cache_alloc(batadv_tt_change_cache, GFP_ATOMIC); if (!tt_change_node) return; @@ -467,8 +518,8 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv, continue; del: list_del(&entry->list); - kfree(entry); - kfree(tt_change_node); + kmem_cache_free(batadv_tt_change_cache, entry); + kmem_cache_free(batadv_tt_change_cache, tt_change_node); event_removed = true; goto unlock; } @@ -646,7 +697,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, goto out; } - tt_local = kmalloc(sizeof(*tt_local), GFP_ATOMIC); + tt_local = kmem_cache_alloc(batadv_tl_cache, GFP_ATOMIC); if (!tt_local) goto out; @@ -656,7 +707,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, net_ratelimited_function(batadv_info, soft_iface, "adding TT local entry %pM to non-existent VLAN %d\n", addr, BATADV_PRINT_VID(vid)); - kfree(tt_local); + kmem_cache_free(batadv_tl_cache, tt_local); tt_local = NULL; goto out; } @@ -959,7 +1010,7 @@ static void batadv_tt_tvlv_container_update(struct batadv_priv *bat_priv) tt_diff_entries_count++; } list_del(&entry->list); - kfree(entry); + kmem_cache_free(batadv_tt_change_cache, entry); } spin_unlock_bh(&bat_priv->tt.changes_list_lock); @@ -1259,7 +1310,7 @@ static void batadv_tt_changes_list_free(struct batadv_priv *bat_priv) list_for_each_entry_safe(entry, safe, &bat_priv->tt.changes_list, list) { list_del(&entry->list); - kfree(entry); + kmem_cache_free(batadv_tt_change_cache, entry); } atomic_set(&bat_priv->tt.local_changes, 0); @@ -1341,7 +1392,7 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global, goto out; } - orig_entry = kzalloc(sizeof(*orig_entry), GFP_ATOMIC); + orig_entry = kmem_cache_zalloc(batadv_tt_orig_cache, GFP_ATOMIC); if (!orig_entry) goto out; @@ -1411,7 +1462,8 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv, goto out; if (!tt_global_entry) { - tt_global_entry = kzalloc(sizeof(*tt_global_entry), GFP_ATOMIC); + tt_global_entry = kmem_cache_zalloc(batadv_tg_cache, + GFP_ATOMIC); if (!tt_global_entry) goto out; @@ -2280,7 +2332,7 @@ static void batadv_tt_req_node_release(struct kref *ref) tt_req_node = container_of(ref, struct batadv_tt_req_node, refcount); - kfree(tt_req_node); + kmem_cache_free(batadv_tt_req_cache, tt_req_node); } /** @@ -2367,7 +2419,7 @@ batadv_tt_req_node_new(struct batadv_priv *bat_priv, goto unlock; } - tt_req_node = kmalloc(sizeof(*tt_req_node), GFP_ATOMIC); + tt_req_node = kmem_cache_alloc(batadv_tt_req_cache, GFP_ATOMIC); if (!tt_req_node) goto unlock; @@ -3104,7 +3156,7 @@ static void batadv_tt_roam_list_free(struct batadv_priv *bat_priv) list_for_each_entry_safe(node, safe, &bat_priv->tt.roam_list, list) { list_del(&node->list); - kfree(node); + kmem_cache_free(batadv_tt_roam_cache, node); } spin_unlock_bh(&bat_priv->tt.roam_list_lock); @@ -3121,7 +3173,7 @@ static void batadv_tt_roam_purge(struct batadv_priv *bat_priv) continue; list_del(&node->list); - kfree(node); + kmem_cache_free(batadv_tt_roam_cache, node); } spin_unlock_bh(&bat_priv->tt.roam_list_lock); } @@ -3162,7 +3214,8 @@ static bool batadv_tt_check_roam_count(struct batadv_priv *bat_priv, u8 *client) } if (!ret) { - tt_roam_node = kmalloc(sizeof(*tt_roam_node), GFP_ATOMIC); + tt_roam_node = kmem_cache_alloc(batadv_tt_roam_cache, + GFP_ATOMIC); if (!tt_roam_node) goto unlock; @@ -3865,3 +3918,85 @@ bool batadv_tt_global_is_isolated(struct batadv_priv *bat_priv, return ret; } + +/** + * batadv_tt_cache_init - Initialize tt memory object cache + * + * Return: 0 on success or negative error number in case of failure. + */ +int __init batadv_tt_cache_init(void) +{ + size_t tl_size = sizeof(struct batadv_tt_local_entry); + size_t tg_size = sizeof(struct batadv_tt_global_entry); + size_t tt_orig_size = sizeof(struct batadv_tt_orig_list_entry); + size_t tt_change_size = sizeof(struct batadv_tt_change_node); + size_t tt_req_size = sizeof(struct batadv_tt_req_node); + size_t tt_roam_size = sizeof(struct batadv_tt_roam_node); + + batadv_tl_cache = kmem_cache_create("batadv_tl_cache", tl_size, 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!batadv_tl_cache) + return -ENOMEM; + + batadv_tg_cache = kmem_cache_create("batadv_tg_cache", tg_size, 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!batadv_tg_cache) + goto err_tt_tl_destroy; + + batadv_tt_orig_cache = kmem_cache_create("batadv_tt_orig_cache", + tt_orig_size, 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!batadv_tt_orig_cache) + goto err_tt_tg_destroy; + + batadv_tt_change_cache = kmem_cache_create("batadv_tt_change_cache", + tt_change_size, 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!batadv_tt_change_cache) + goto err_tt_orig_destroy; + + batadv_tt_req_cache = kmem_cache_create("batadv_tt_req_cache", + tt_req_size, 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!batadv_tt_req_cache) + goto err_tt_change_destroy; + + batadv_tt_roam_cache = kmem_cache_create("batadv_tt_roam_cache", + tt_roam_size, 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!batadv_tt_roam_cache) + goto err_tt_req_destroy; + + return 0; + +err_tt_req_destroy: + kmem_cache_destroy(batadv_tt_req_cache); + batadv_tt_req_cache = NULL; +err_tt_change_destroy: + kmem_cache_destroy(batadv_tt_change_cache); + batadv_tt_change_cache = NULL; +err_tt_orig_destroy: + kmem_cache_destroy(batadv_tt_orig_cache); + batadv_tt_orig_cache = NULL; +err_tt_tg_destroy: + kmem_cache_destroy(batadv_tg_cache); + batadv_tg_cache = NULL; +err_tt_tl_destroy: + kmem_cache_destroy(batadv_tl_cache); + batadv_tl_cache = NULL; + + return -ENOMEM; +} + +/** + * batadv_tt_cache_destroy - Destroy tt memory object cache + */ +void batadv_tt_cache_destroy(void) +{ + kmem_cache_destroy(batadv_tl_cache); + kmem_cache_destroy(batadv_tg_cache); + kmem_cache_destroy(batadv_tt_orig_cache); + kmem_cache_destroy(batadv_tt_change_cache); + kmem_cache_destroy(batadv_tt_req_cache); + kmem_cache_destroy(batadv_tt_roam_cache); +} diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h index 7c7e2c006bfe..02b0f85527cc 100644 --- a/net/batman-adv/translation-table.h +++ b/net/batman-adv/translation-table.h @@ -59,4 +59,7 @@ bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv, bool batadv_tt_global_is_isolated(struct batadv_priv *bat_priv, const u8 *addr, unsigned short vid); +int batadv_tt_cache_init(void); +void batadv_tt_cache_destroy(void); + #endif /* _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ */ From f19dc7770f5d55274ef9821392199daca03469a9 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 27 Jun 2016 08:15:42 +0200 Subject: [PATCH 0044/1715] batman-adv: Remove orig_node reference handling from send_skb_unicast MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function batadv_send_skb_unicast is not acquiring a reference for an orig_node nor removing it from any datastructure. It still reduces the reference counter for an object which is still in the hands of the caller. This is confusing and can lead in the future to problems in the reference handling of the caller function. Signed-off-by: Sven Eckelmann Acked-by: Linus Lüssing Signed-off-by: Marek Lindner Signed-off-by: Simon Wunderlich --- net/batman-adv/send.c | 25 +++++++++++++++++-------- net/batman-adv/soft-interface.c | 3 +++ 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 33d8bd14140c..8d4e1f578574 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -315,8 +315,7 @@ out: * * Wrap the given skb into a batman-adv unicast or unicast-4addr header * depending on whether BATADV_UNICAST or BATADV_UNICAST_4ADDR was supplied - * as packet_type. Then send this frame to the given orig_node and release a - * reference to this orig_node. + * as packet_type. Then send this frame to the given orig_node. * * Return: NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise. */ @@ -370,8 +369,6 @@ int batadv_send_skb_unicast(struct batadv_priv *bat_priv, ret = NET_XMIT_SUCCESS; out: - if (orig_node) - batadv_orig_node_put(orig_node); if (ret == NET_XMIT_DROP) kfree_skb(skb); return ret; @@ -403,6 +400,7 @@ int batadv_send_skb_via_tt_generic(struct batadv_priv *bat_priv, struct ethhdr *ethhdr = (struct ethhdr *)skb->data; struct batadv_orig_node *orig_node; u8 *src, *dst; + int ret; src = ethhdr->h_source; dst = ethhdr->h_dest; @@ -414,8 +412,13 @@ int batadv_send_skb_via_tt_generic(struct batadv_priv *bat_priv, } orig_node = batadv_transtable_search(bat_priv, src, dst, vid); - return batadv_send_skb_unicast(bat_priv, skb, packet_type, - packet_subtype, orig_node, vid); + ret = batadv_send_skb_unicast(bat_priv, skb, packet_type, + packet_subtype, orig_node, vid); + + if (orig_node) + batadv_orig_node_put(orig_node); + + return ret; } /** @@ -433,10 +436,16 @@ int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned short vid) { struct batadv_orig_node *orig_node; + int ret; orig_node = batadv_gw_get_selected_orig(bat_priv); - return batadv_send_skb_unicast(bat_priv, skb, BATADV_UNICAST_4ADDR, - BATADV_P_DATA, orig_node, vid); + ret = batadv_send_skb_unicast(bat_priv, skb, BATADV_UNICAST_4ADDR, + BATADV_P_DATA, orig_node, vid); + + if (orig_node) + batadv_orig_node_put(orig_node); + + return ret; } /** diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 216ac03ab432..e508bf5957b3 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -57,6 +57,7 @@ #include "hard-interface.h" #include "multicast.h" #include "network-coding.h" +#include "originator.h" #include "packet.h" #include "send.h" #include "sysfs.h" @@ -377,6 +378,8 @@ dropped: dropped_freed: batadv_inc_counter(bat_priv, BATADV_CNT_TX_DROPPED); end: + if (mcast_single_orig) + batadv_orig_node_put(mcast_single_orig); if (primary_if) batadv_hardif_put(primary_if); return NETDEV_TX_OK; From 4d7de48c797c9207412da4e350c5170617eaf8c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Mon, 11 Jul 2016 11:16:36 +0200 Subject: [PATCH 0045/1715] batman-adv: Use bitwise instead of arithmetic operator for flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This silences the following coccinelle warning: "WARNING: sum of probable bitmasks, consider |" Signed-off-by: Linus Lüssing Signed-off-by: Marek Lindner Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/multicast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index cc915073a753..894df6020f6a 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -528,7 +528,7 @@ update: } return !(mcast_data.flags & - (BATADV_MCAST_WANT_ALL_IPV4 + BATADV_MCAST_WANT_ALL_IPV6)); + (BATADV_MCAST_WANT_ALL_IPV4 | BATADV_MCAST_WANT_ALL_IPV6)); } /** From b5dcbad25219b82408e15e6d775a406be2116af1 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Wed, 29 Jun 2016 23:45:57 +0200 Subject: [PATCH 0046/1715] batman-adv: Fix consistency of update route messages The debug messages of _batadv_update_route were printed before the actual route change is done. At this point it is not really known which curr_router will be replaced. Thus the messages could print the wrong operation. Printing the debug messages after the operation was done avoids this problem. Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Simon Wunderlich --- net/batman-adv/routing.c | 43 ++++++++++++++++------------------------ 1 file changed, 17 insertions(+), 26 deletions(-) diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 7602c001e92b..610f2c45edcd 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -74,11 +74,23 @@ static void _batadv_update_route(struct batadv_priv *bat_priv, if (!orig_ifinfo) return; - rcu_read_lock(); - curr_router = rcu_dereference(orig_ifinfo->router); - if (curr_router && !kref_get_unless_zero(&curr_router->refcount)) - curr_router = NULL; - rcu_read_unlock(); + spin_lock_bh(&orig_node->neigh_list_lock); + /* curr_router used earlier may not be the current orig_ifinfo->router + * anymore because it was dereferenced outside of the neigh_list_lock + * protected region. After the new best neighbor has replace the current + * best neighbor the reference counter needs to decrease. Consequently, + * the code needs to ensure the curr_router variable contains a pointer + * to the replaced best neighbor. + */ + curr_router = rcu_dereference_protected(orig_ifinfo->router, true); + + /* increase refcount of new best neighbor */ + if (neigh_node) + kref_get(&neigh_node->refcount); + + rcu_assign_pointer(orig_ifinfo->router, neigh_node); + spin_unlock_bh(&orig_node->neigh_list_lock); + batadv_orig_ifinfo_put(orig_ifinfo); /* route deleted */ if ((curr_router) && (!neigh_node)) { @@ -100,27 +112,6 @@ static void _batadv_update_route(struct batadv_priv *bat_priv, curr_router->addr); } - if (curr_router) - batadv_neigh_node_put(curr_router); - - spin_lock_bh(&orig_node->neigh_list_lock); - /* curr_router used earlier may not be the current orig_ifinfo->router - * anymore because it was dereferenced outside of the neigh_list_lock - * protected region. After the new best neighbor has replace the current - * best neighbor the reference counter needs to decrease. Consequently, - * the code needs to ensure the curr_router variable contains a pointer - * to the replaced best neighbor. - */ - curr_router = rcu_dereference_protected(orig_ifinfo->router, true); - - /* increase refcount of new best neighbor */ - if (neigh_node) - kref_get(&neigh_node->refcount); - - rcu_assign_pointer(orig_ifinfo->router, neigh_node); - spin_unlock_bh(&orig_node->neigh_list_lock); - batadv_orig_ifinfo_put(orig_ifinfo); - /* decrease refcount of previous best neighbor */ if (curr_router) batadv_neigh_node_put(curr_router); From 275019d2f00ed93e800f505a7b6f9e8ecf396898 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 3 Jul 2016 13:31:33 +0200 Subject: [PATCH 0047/1715] batman-adv: Handle parent interfaces in a different netns batman-adv tries to prevent the user from placing a batX soft interface into another batman mesh as a hard interface. It does this by walking up the devices list of parents and ensures they are all none batX interfaces. iflink can point to an interface in a different namespace, so also retrieve the parents name space when finding the parent and use it when doing the comparison. Signed-off-by: Andrew Lunn [sven@narfation.org: Fix alignments, simplify parent netns retrieval] Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Marek Lindner --- net/batman-adv/hard-interface.c | 50 ++++++++++++++++++++++++++++----- 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 714af8e7bfa5..43c9a3e02512 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -35,6 +35,8 @@ #include #include #include +#include +#include #include "bat_v.h" #include "bridge_loop_avoidance.h" @@ -83,26 +85,56 @@ out: return hard_iface; } +/** + * batadv_getlink_net - return link net namespace (of use fallback) + * @netdev: net_device to check + * @fallback_net: return in case get_link_net is not available for @netdev + * + * Return: result of rtnl_link_ops->get_link_net or @fallback_net + */ +static const struct net *batadv_getlink_net(const struct net_device *netdev, + const struct net *fallback_net) +{ + if (!netdev->rtnl_link_ops) + return fallback_net; + + if (!netdev->rtnl_link_ops->get_link_net) + return fallback_net; + + return netdev->rtnl_link_ops->get_link_net(netdev); +} + /** * batadv_mutual_parents - check if two devices are each others parent - * @dev1: 1st net_device - * @dev2: 2nd net_device + * @dev1: 1st net dev + * @net1: 1st devices netns + * @dev2: 2nd net dev + * @net2: 2nd devices netns * * veth devices come in pairs and each is the parent of the other! * * Return: true if the devices are each others parent, otherwise false */ static bool batadv_mutual_parents(const struct net_device *dev1, - const struct net_device *dev2) + const struct net *net1, + const struct net_device *dev2, + const struct net *net2) { int dev1_parent_iflink = dev_get_iflink(dev1); int dev2_parent_iflink = dev_get_iflink(dev2); + const struct net *dev1_parent_net; + const struct net *dev2_parent_net; + + dev1_parent_net = batadv_getlink_net(dev1, net1); + dev2_parent_net = batadv_getlink_net(dev2, net2); if (!dev1_parent_iflink || !dev2_parent_iflink) return false; return (dev1_parent_iflink == dev2->ifindex) && - (dev2_parent_iflink == dev1->ifindex); + (dev2_parent_iflink == dev1->ifindex) && + net_eq(dev1_parent_net, net2) && + net_eq(dev2_parent_net, net1); } /** @@ -120,8 +152,9 @@ static bool batadv_mutual_parents(const struct net_device *dev1, */ static bool batadv_is_on_batman_iface(const struct net_device *net_dev) { - struct net_device *parent_dev; struct net *net = dev_net(net_dev); + struct net_device *parent_dev; + const struct net *parent_net; bool ret; /* check if this is a batman-adv mesh interface */ @@ -133,13 +166,16 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev) dev_get_iflink(net_dev) == net_dev->ifindex) return false; + parent_net = batadv_getlink_net(net_dev, net); + /* recurse over the parent device */ - parent_dev = __dev_get_by_index(net, dev_get_iflink(net_dev)); + parent_dev = __dev_get_by_index((struct net *)parent_net, + dev_get_iflink(net_dev)); /* if we got a NULL parent_dev there is something broken.. */ if (WARN(!parent_dev, "Cannot find parent device")) return false; - if (batadv_mutual_parents(net_dev, parent_dev)) + if (batadv_mutual_parents(net_dev, net, parent_dev, parent_net)) return false; ret = batadv_is_on_batman_iface(parent_dev); From 94969208c8c7f3dd06c0e5e61155077b573d5d5f Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 3 Jul 2016 13:31:34 +0200 Subject: [PATCH 0048/1715] batman-adv: Suppress debugfs entries for netns's Debugfs is not netns aware. It thus has problems when the same interface name exists in multiple network name spaces. Work around this by not creating entries for interfaces in name spaces other than the default name space. This means meshes in network namespaces cannot be managed via debugfs, but there will soon be a netlink interface which is netns aware. Signed-off-by: Andrew Lunn Signed-off-by: Simon Wunderlich Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner --- net/batman-adv/debugfs.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c index 1d68b6e63b96..b4ffba7dd583 100644 --- a/net/batman-adv/debugfs.c +++ b/net/batman-adv/debugfs.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "bat_algo.h" #include "bridge_loop_avoidance.h" @@ -305,12 +306,16 @@ void batadv_debugfs_destroy(void) */ int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface) { + struct net *net = dev_net(hard_iface->net_dev); struct batadv_debuginfo **bat_debug; struct dentry *file; if (!batadv_debugfs) goto out; + if (net != &init_net) + return 0; + hard_iface->debug_dir = debugfs_create_dir(hard_iface->net_dev->name, batadv_debugfs); if (!hard_iface->debug_dir) @@ -341,6 +346,11 @@ out: */ void batadv_debugfs_del_hardif(struct batadv_hard_iface *hard_iface) { + struct net *net = dev_net(hard_iface->net_dev); + + if (net != &init_net) + return; + if (batadv_debugfs) { debugfs_remove_recursive(hard_iface->debug_dir); hard_iface->debug_dir = NULL; @@ -351,11 +361,15 @@ int batadv_debugfs_add_meshif(struct net_device *dev) { struct batadv_priv *bat_priv = netdev_priv(dev); struct batadv_debuginfo **bat_debug; + struct net *net = dev_net(dev); struct dentry *file; if (!batadv_debugfs) goto out; + if (net != &init_net) + return 0; + bat_priv->debug_dir = debugfs_create_dir(dev->name, batadv_debugfs); if (!bat_priv->debug_dir) goto out; @@ -392,6 +406,10 @@ out: void batadv_debugfs_del_meshif(struct net_device *dev) { struct batadv_priv *bat_priv = netdev_priv(dev); + struct net *net = dev_net(dev); + + if (net != &init_net) + return; batadv_debug_log_cleanup(bat_priv); From 07a3061e0832fe22932e0fa977581e45b9c42431 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Sun, 3 Jul 2016 13:31:35 +0200 Subject: [PATCH 0049/1715] batman-adv: netlink: add routing_algo query BATADV_CMD_GET_ROUTING_ALGOS is used to get the list of supported routing algorithms. Signed-off-by: Matthias Schiffer Signed-off-by: Andrew Lunn [sven.eckelmann@open-mesh.com: Reduce the number of changes to BATADV_CMD_GET_ROUTING_ALGOS, fix includes] Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Marek Lindner --- include/uapi/linux/batman_adv.h | 2 + net/batman-adv/bat_algo.c | 68 +++++++++++++++++++++++++++++++++ net/batman-adv/bat_algo.h | 3 ++ net/batman-adv/netlink.c | 9 ++++- net/batman-adv/netlink.h | 3 ++ 5 files changed, 84 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index 0fbf6fd4711b..7afce444a64e 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -73,6 +73,7 @@ enum batadv_nl_attrs { * @BATADV_CMD_GET_MESH_INFO: Query basic information about batman-adv device * @BATADV_CMD_TP_METER: Start a tp meter session * @BATADV_CMD_TP_METER_CANCEL: Cancel a tp meter session + * @BATADV_CMD_GET_ROUTING_ALGOS: Query the list of routing algorithms. * @__BATADV_CMD_AFTER_LAST: internal use * @BATADV_CMD_MAX: highest used command number */ @@ -81,6 +82,7 @@ enum batadv_nl_commands { BATADV_CMD_GET_MESH_INFO, BATADV_CMD_TP_METER, BATADV_CMD_TP_METER_CANCEL, + BATADV_CMD_GET_ROUTING_ALGOS, /* add new commands above here */ __BATADV_CMD_AFTER_LAST, BATADV_CMD_MAX = __BATADV_CMD_AFTER_LAST - 1 diff --git a/net/batman-adv/bat_algo.c b/net/batman-adv/bat_algo.c index 81dbbf569bd4..f2cc50d354d9 100644 --- a/net/batman-adv/bat_algo.c +++ b/net/batman-adv/bat_algo.c @@ -20,12 +20,18 @@ #include #include #include +#include #include #include +#include #include #include +#include +#include +#include #include "bat_algo.h" +#include "netlink.h" char batadv_routing_algo[20] = "BATMAN_IV"; static struct hlist_head batadv_algo_list; @@ -138,3 +144,65 @@ static struct kparam_string batadv_param_string_ra = { module_param_cb(routing_algo, &batadv_param_ops_ra, &batadv_param_string_ra, 0644); + +/** + * batadv_algo_dump_entry - fill in information about one supported routing + * algorithm + * @msg: netlink message to be sent back + * @portid: Port to reply to + * @seq: Sequence number of message + * @bat_algo_ops: Algorithm to be dumped + * + * Return: Error number, or 0 on success + */ +static int batadv_algo_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_algo_ops *bat_algo_ops) +{ + void *hdr; + + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, + NLM_F_MULTI, BATADV_CMD_GET_ROUTING_ALGOS); + if (!hdr) + return -EMSGSIZE; + + if (nla_put_string(msg, BATADV_ATTR_ALGO_NAME, bat_algo_ops->name)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +/** + * batadv_algo_dump - fill in information about supported routing + * algorithms + * @msg: netlink message to be sent back + * @cb: Parameters to the netlink request + * + * Return: Length of reply message. + */ +int batadv_algo_dump(struct sk_buff *msg, struct netlink_callback *cb) +{ + int portid = NETLINK_CB(cb->skb).portid; + struct batadv_algo_ops *bat_algo_ops; + int skip = cb->args[0]; + int i = 0; + + hlist_for_each_entry(bat_algo_ops, &batadv_algo_list, list) { + if (i++ < skip) + continue; + + if (batadv_algo_dump_entry(msg, portid, cb->nlh->nlmsg_seq, + bat_algo_ops)) { + i--; + break; + } + } + + cb->args[0] = i; + + return msg->len; +} diff --git a/net/batman-adv/bat_algo.h b/net/batman-adv/bat_algo.h index 860d773dd8fa..3b5b69cdd12b 100644 --- a/net/batman-adv/bat_algo.h +++ b/net/batman-adv/bat_algo.h @@ -22,7 +22,9 @@ #include +struct netlink_callback; struct seq_file; +struct sk_buff; extern char batadv_routing_algo[]; extern struct list_head batadv_hardif_list; @@ -31,5 +33,6 @@ void batadv_algo_init(void); int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops); int batadv_algo_select(struct batadv_priv *bat_priv, char *name); int batadv_algo_seq_print_text(struct seq_file *seq, void *offset); +int batadv_algo_dump(struct sk_buff *msg, struct netlink_callback *cb); #endif /* _NET_BATMAN_ADV_BAT_ALGO_H_ */ diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 231f8eaf075b..19fb2657e274 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -32,13 +32,14 @@ #include #include +#include "bat_algo.h" #include "hard-interface.h" #include "soft-interface.h" #include "tp_meter.h" struct sk_buff; -static struct genl_family batadv_netlink_family = { +struct genl_family batadv_netlink_family = { .id = GENL_ID_GENERATE, .hdrsize = 0, .name = BATADV_NL_NAME, @@ -399,6 +400,12 @@ static struct genl_ops batadv_netlink_ops[] = { .policy = batadv_netlink_policy, .doit = batadv_netlink_tp_meter_cancel, }, + { + .cmd = BATADV_CMD_GET_ROUTING_ALGOS, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .dumpit = batadv_algo_dump, + }, }; /** diff --git a/net/batman-adv/netlink.h b/net/batman-adv/netlink.h index 945653ab58c6..b399f49504df 100644 --- a/net/batman-adv/netlink.h +++ b/net/batman-adv/netlink.h @@ -21,6 +21,7 @@ #include "main.h" #include +#include void batadv_netlink_register(void); void batadv_netlink_unregister(void); @@ -29,4 +30,6 @@ int batadv_netlink_tpmeter_notify(struct batadv_priv *bat_priv, const u8 *dst, u8 result, u32 test_time, u64 total_bytes, u32 cookie); +extern struct genl_family batadv_netlink_family; + #endif /* _NET_BATMAN_ADV_NETLINK_H_ */ From b60620cf567b79da46096a0ba29b39f23b6e7f1c Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Sun, 3 Jul 2016 13:31:36 +0200 Subject: [PATCH 0050/1715] batman-adv: netlink: hardif query BATADV_CMD_GET_HARDIFS will return the list of hardifs (including index, name and MAC address) of all hardifs for a given softif. Signed-off-by: Matthias Schiffer Signed-off-by: Andrew Lunn [sven.eckelmann@open-mesh.com: Reduce the number of changes to BATADV_CMD_GET_HARDIFS, add policy for attributes] Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Marek Lindner --- include/uapi/linux/batman_adv.h | 4 + net/batman-adv/netlink.c | 128 +++++++++++++++++++++++++++++++- 2 files changed, 130 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index 7afce444a64e..8abcbca14b6a 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -40,6 +40,7 @@ * @BATADV_ATTR_TPMETER_BYTES: amount of acked bytes during run * @BATADV_ATTR_TPMETER_COOKIE: session cookie to match tp_meter session * @BATADV_ATTR_PAD: attribute used for padding for 64-bit alignment + * @BATADV_ATTR_ACTIVE: Flag indicating if the hard interface is active * @__BATADV_ATTR_AFTER_LAST: internal use * @NUM_BATADV_ATTR: total number of batadv_nl_attrs available * @BATADV_ATTR_MAX: highest attribute number currently defined @@ -60,6 +61,7 @@ enum batadv_nl_attrs { BATADV_ATTR_TPMETER_BYTES, BATADV_ATTR_TPMETER_COOKIE, BATADV_ATTR_PAD, + BATADV_ATTR_ACTIVE, /* add attributes above here, update the policy in netlink.c */ __BATADV_ATTR_AFTER_LAST, NUM_BATADV_ATTR = __BATADV_ATTR_AFTER_LAST, @@ -74,6 +76,7 @@ enum batadv_nl_attrs { * @BATADV_CMD_TP_METER: Start a tp meter session * @BATADV_CMD_TP_METER_CANCEL: Cancel a tp meter session * @BATADV_CMD_GET_ROUTING_ALGOS: Query the list of routing algorithms. + * @BATADV_CMD_GET_HARDIFS: Query list of hard interfaces * @__BATADV_CMD_AFTER_LAST: internal use * @BATADV_CMD_MAX: highest used command number */ @@ -83,6 +86,7 @@ enum batadv_nl_commands { BATADV_CMD_TP_METER, BATADV_CMD_TP_METER_CANCEL, BATADV_CMD_GET_ROUTING_ALGOS, + BATADV_CMD_GET_HARDIFS, /* add new commands above here */ __BATADV_CMD_AFTER_LAST, BATADV_CMD_MAX = __BATADV_CMD_AFTER_LAST - 1 diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 19fb2657e274..3f872d6eec57 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -26,10 +26,14 @@ #include #include #include +#include +#include +#include #include #include #include #include +#include #include #include "bat_algo.h" @@ -37,8 +41,6 @@ #include "soft-interface.h" #include "tp_meter.h" -struct sk_buff; - struct genl_family batadv_netlink_family = { .id = GENL_ID_GENERATE, .hdrsize = 0, @@ -70,8 +72,24 @@ static struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = { [BATADV_ATTR_TPMETER_TEST_TIME] = { .type = NLA_U32 }, [BATADV_ATTR_TPMETER_BYTES] = { .type = NLA_U64 }, [BATADV_ATTR_TPMETER_COOKIE] = { .type = NLA_U32 }, + [BATADV_ATTR_ACTIVE] = { .type = NLA_FLAG }, }; +/** + * batadv_netlink_get_ifindex - Extract an interface index from a message + * @nlh: Message header + * @attrtype: Attribute which holds an interface index + * + * Return: interface index, or 0. + */ +static int +batadv_netlink_get_ifindex(const struct nlmsghdr *nlh, int attrtype) +{ + struct nlattr *attr = nlmsg_find_attr(nlh, GENL_HDRLEN, attrtype); + + return attr ? nla_get_u32(attr) : 0; +} + /** * batadv_netlink_mesh_info_put - fill in generic information about mesh * interface @@ -381,6 +399,106 @@ out: return ret; } +/** + * batadv_netlink_dump_hardif_entry - Dump one hard interface into a message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @hard_iface: Hard interface to dump + * + * Return: error code, or 0 on success + */ +static int +batadv_netlink_dump_hardif_entry(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_hard_iface *hard_iface) +{ + struct net_device *net_dev = hard_iface->net_dev; + void *hdr; + + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, NLM_F_MULTI, + BATADV_CMD_GET_HARDIFS); + if (!hdr) + return -EMSGSIZE; + + if (nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX, + net_dev->ifindex) || + nla_put_string(msg, BATADV_ATTR_HARD_IFNAME, + net_dev->name) || + nla_put(msg, BATADV_ATTR_HARD_ADDRESS, ETH_ALEN, + net_dev->dev_addr)) + goto nla_put_failure; + + if (hard_iface->if_status == BATADV_IF_ACTIVE) { + if (nla_put_flag(msg, BATADV_ATTR_ACTIVE)) + goto nla_put_failure; + } + + genlmsg_end(msg, hdr); + return 0; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +/** + * batadv_netlink_dump_hardifs - Dump all hard interface into a messages + * @msg: Netlink message to dump into + * @cb: Parameters from query + * + * Return: error code, or length of reply message on success + */ +static int +batadv_netlink_dump_hardifs(struct sk_buff *msg, struct netlink_callback *cb) +{ + struct net *net = sock_net(cb->skb->sk); + struct net_device *soft_iface; + struct batadv_hard_iface *hard_iface; + int ifindex; + int portid = NETLINK_CB(cb->skb).portid; + int seq = cb->nlh->nlmsg_seq; + int skip = cb->args[0]; + int i = 0; + + ifindex = batadv_netlink_get_ifindex(cb->nlh, + BATADV_ATTR_MESH_IFINDEX); + if (!ifindex) + return -EINVAL; + + soft_iface = dev_get_by_index(net, ifindex); + if (!soft_iface) + return -ENODEV; + + if (!batadv_softif_is_valid(soft_iface)) { + dev_put(soft_iface); + return -ENODEV; + } + + rcu_read_lock(); + + list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { + if (hard_iface->soft_iface != soft_iface) + continue; + + if (i++ < skip) + continue; + + if (batadv_netlink_dump_hardif_entry(msg, portid, seq, + hard_iface)) { + i--; + break; + } + } + + rcu_read_unlock(); + + dev_put(soft_iface); + + cb->args[0] = i; + + return msg->len; +} + static struct genl_ops batadv_netlink_ops[] = { { .cmd = BATADV_CMD_GET_MESH_INFO, @@ -406,6 +524,12 @@ static struct genl_ops batadv_netlink_ops[] = { .policy = batadv_netlink_policy, .dumpit = batadv_algo_dump, }, + { + .cmd = BATADV_CMD_GET_HARDIFS, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .dumpit = batadv_netlink_dump_hardifs, + }, }; /** From d34f05507db245bef819b684ad84f9e0f9bb003d Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Sun, 3 Jul 2016 13:31:37 +0200 Subject: [PATCH 0051/1715] batman-adv: netlink: add translation table query This adds the commands BATADV_CMD_GET_TRANSTABLE_LOCAL and BATADV_CMD_GET_TRANSTABLE_GLOBAL, which correspond to the transtable_local and transtable_global debugfs files. The batadv_tt_client_flags enum is moved to the UAPI to expose it as part of the netlink API. Signed-off-by: Matthias Schiffer Signed-off-by: Andrew Lunn [sven.eckelmann@open-mesh.com: add policy for attributes, fix includes] Signed-off-by: Sven Eckelmann [sw@simonwunderlich.de: fix VID attributes content] Signed-off-by: Simon Wunderlich Signed-off-by: Marek Lindner --- include/uapi/linux/batman_adv.h | 56 +++++ net/batman-adv/netlink.c | 23 +- net/batman-adv/netlink.h | 3 + net/batman-adv/packet.h | 36 --- net/batman-adv/translation-table.c | 377 +++++++++++++++++++++++++++++ net/batman-adv/translation-table.h | 4 + 6 files changed, 462 insertions(+), 37 deletions(-) diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index 8abcbca14b6a..1168d058cd2b 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -22,6 +22,42 @@ #define BATADV_NL_MCAST_GROUP_TPMETER "tpmeter" +/** + * enum batadv_tt_client_flags - TT client specific flags + * @BATADV_TT_CLIENT_DEL: the client has to be deleted from the table + * @BATADV_TT_CLIENT_ROAM: the client roamed to/from another node and the new + * update telling its new real location has not been received/sent yet + * @BATADV_TT_CLIENT_WIFI: this client is connected through a wifi interface. + * This information is used by the "AP Isolation" feature + * @BATADV_TT_CLIENT_ISOLA: this client is considered "isolated". This + * information is used by the Extended Isolation feature + * @BATADV_TT_CLIENT_NOPURGE: this client should never be removed from the table + * @BATADV_TT_CLIENT_NEW: this client has been added to the local table but has + * not been announced yet + * @BATADV_TT_CLIENT_PENDING: this client is marked for removal but it is kept + * in the table for one more originator interval for consistency purposes + * @BATADV_TT_CLIENT_TEMP: this global client has been detected to be part of + * the network but no nnode has already announced it + * + * Bits from 0 to 7 are called _remote flags_ because they are sent on the wire. + * Bits from 8 to 15 are called _local flags_ because they are used for local + * computations only. + * + * Bits from 4 to 7 - a subset of remote flags - are ensured to be in sync with + * the other nodes in the network. To achieve this goal these flags are included + * in the TT CRC computation. + */ +enum batadv_tt_client_flags { + BATADV_TT_CLIENT_DEL = (1 << 0), + BATADV_TT_CLIENT_ROAM = (1 << 1), + BATADV_TT_CLIENT_WIFI = (1 << 4), + BATADV_TT_CLIENT_ISOLA = (1 << 5), + BATADV_TT_CLIENT_NOPURGE = (1 << 8), + BATADV_TT_CLIENT_NEW = (1 << 9), + BATADV_TT_CLIENT_PENDING = (1 << 10), + BATADV_TT_CLIENT_TEMP = (1 << 11), +}; + /** * enum batadv_nl_attrs - batman-adv netlink attributes * @@ -41,6 +77,14 @@ * @BATADV_ATTR_TPMETER_COOKIE: session cookie to match tp_meter session * @BATADV_ATTR_PAD: attribute used for padding for 64-bit alignment * @BATADV_ATTR_ACTIVE: Flag indicating if the hard interface is active + * @BATADV_ATTR_TT_ADDRESS: Client MAC address + * @BATADV_ATTR_TT_TTVN: Translation table version + * @BATADV_ATTR_TT_LAST_TTVN: Previous translation table version + * @BATADV_ATTR_TT_CRC32: CRC32 over translation table + * @BATADV_ATTR_TT_VID: VLAN ID + * @BATADV_ATTR_TT_FLAGS: Translation table client flags + * @BATADV_ATTR_FLAG_BEST: Flags indicating entry is the best + * @BATADV_ATTR_LAST_SEEN_MSECS: Time in milliseconds since last seen * @__BATADV_ATTR_AFTER_LAST: internal use * @NUM_BATADV_ATTR: total number of batadv_nl_attrs available * @BATADV_ATTR_MAX: highest attribute number currently defined @@ -62,6 +106,14 @@ enum batadv_nl_attrs { BATADV_ATTR_TPMETER_COOKIE, BATADV_ATTR_PAD, BATADV_ATTR_ACTIVE, + BATADV_ATTR_TT_ADDRESS, + BATADV_ATTR_TT_TTVN, + BATADV_ATTR_TT_LAST_TTVN, + BATADV_ATTR_TT_CRC32, + BATADV_ATTR_TT_VID, + BATADV_ATTR_TT_FLAGS, + BATADV_ATTR_FLAG_BEST, + BATADV_ATTR_LAST_SEEN_MSECS, /* add attributes above here, update the policy in netlink.c */ __BATADV_ATTR_AFTER_LAST, NUM_BATADV_ATTR = __BATADV_ATTR_AFTER_LAST, @@ -77,6 +129,8 @@ enum batadv_nl_attrs { * @BATADV_CMD_TP_METER_CANCEL: Cancel a tp meter session * @BATADV_CMD_GET_ROUTING_ALGOS: Query the list of routing algorithms. * @BATADV_CMD_GET_HARDIFS: Query list of hard interfaces + * @BATADV_CMD_GET_TRANSTABLE_LOCAL: Query list of local translations + * @BATADV_CMD_GET_TRANSTABLE_GLOBAL Query list of global translations * @__BATADV_CMD_AFTER_LAST: internal use * @BATADV_CMD_MAX: highest used command number */ @@ -87,6 +141,8 @@ enum batadv_nl_commands { BATADV_CMD_TP_METER_CANCEL, BATADV_CMD_GET_ROUTING_ALGOS, BATADV_CMD_GET_HARDIFS, + BATADV_CMD_GET_TRANSTABLE_LOCAL, + BATADV_CMD_GET_TRANSTABLE_GLOBAL, /* add new commands above here */ __BATADV_CMD_AFTER_LAST, BATADV_CMD_MAX = __BATADV_CMD_AFTER_LAST - 1 diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 3f872d6eec57..14360ec16513 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -40,6 +40,7 @@ #include "hard-interface.h" #include "soft-interface.h" #include "tp_meter.h" +#include "translation-table.h" struct genl_family batadv_netlink_family = { .id = GENL_ID_GENERATE, @@ -73,6 +74,14 @@ static struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = { [BATADV_ATTR_TPMETER_BYTES] = { .type = NLA_U64 }, [BATADV_ATTR_TPMETER_COOKIE] = { .type = NLA_U32 }, [BATADV_ATTR_ACTIVE] = { .type = NLA_FLAG }, + [BATADV_ATTR_TT_ADDRESS] = { .len = ETH_ALEN }, + [BATADV_ATTR_TT_TTVN] = { .type = NLA_U8 }, + [BATADV_ATTR_TT_LAST_TTVN] = { .type = NLA_U8 }, + [BATADV_ATTR_TT_CRC32] = { .type = NLA_U32 }, + [BATADV_ATTR_TT_VID] = { .type = NLA_U16 }, + [BATADV_ATTR_TT_FLAGS] = { .type = NLA_U32 }, + [BATADV_ATTR_FLAG_BEST] = { .type = NLA_FLAG }, + [BATADV_ATTR_LAST_SEEN_MSECS] = { .type = NLA_U32 }, }; /** @@ -82,7 +91,7 @@ static struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = { * * Return: interface index, or 0. */ -static int +int batadv_netlink_get_ifindex(const struct nlmsghdr *nlh, int attrtype) { struct nlattr *attr = nlmsg_find_attr(nlh, GENL_HDRLEN, attrtype); @@ -530,6 +539,18 @@ static struct genl_ops batadv_netlink_ops[] = { .policy = batadv_netlink_policy, .dumpit = batadv_netlink_dump_hardifs, }, + { + .cmd = BATADV_CMD_GET_TRANSTABLE_LOCAL, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .dumpit = batadv_tt_local_dump, + }, + { + .cmd = BATADV_CMD_GET_TRANSTABLE_GLOBAL, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .dumpit = batadv_tt_global_dump, + }, }; /** diff --git a/net/batman-adv/netlink.h b/net/batman-adv/netlink.h index b399f49504df..52eb16281aba 100644 --- a/net/batman-adv/netlink.h +++ b/net/batman-adv/netlink.h @@ -23,8 +23,11 @@ #include #include +struct nlmsghdr; + void batadv_netlink_register(void); void batadv_netlink_unregister(void); +int batadv_netlink_get_ifindex(const struct nlmsghdr *nlh, int attrtype); int batadv_netlink_tpmeter_notify(struct batadv_priv *bat_priv, const u8 *dst, u8 result, u32 test_time, u64 total_bytes, diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index 6b011ff64dd8..6afc0b86950e 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -128,42 +128,6 @@ enum batadv_tt_data_flags { BATADV_TT_FULL_TABLE = BIT(4), }; -/** - * enum batadv_tt_client_flags - TT client specific flags - * @BATADV_TT_CLIENT_DEL: the client has to be deleted from the table - * @BATADV_TT_CLIENT_ROAM: the client roamed to/from another node and the new - * update telling its new real location has not been received/sent yet - * @BATADV_TT_CLIENT_WIFI: this client is connected through a wifi interface. - * This information is used by the "AP Isolation" feature - * @BATADV_TT_CLIENT_ISOLA: this client is considered "isolated". This - * information is used by the Extended Isolation feature - * @BATADV_TT_CLIENT_NOPURGE: this client should never be removed from the table - * @BATADV_TT_CLIENT_NEW: this client has been added to the local table but has - * not been announced yet - * @BATADV_TT_CLIENT_PENDING: this client is marked for removal but it is kept - * in the table for one more originator interval for consistency purposes - * @BATADV_TT_CLIENT_TEMP: this global client has been detected to be part of - * the network but no nnode has already announced it - * - * Bits from 0 to 7 are called _remote flags_ because they are sent on the wire. - * Bits from 8 to 15 are called _local flags_ because they are used for local - * computations only. - * - * Bits from 4 to 7 - a subset of remote flags - are ensured to be in sync with - * the other nodes in the network. To achieve this goal these flags are included - * in the TT CRC computation. - */ -enum batadv_tt_client_flags { - BATADV_TT_CLIENT_DEL = BIT(0), - BATADV_TT_CLIENT_ROAM = BIT(1), - BATADV_TT_CLIENT_WIFI = BIT(4), - BATADV_TT_CLIENT_ISOLA = BIT(5), - BATADV_TT_CLIENT_NOPURGE = BIT(8), - BATADV_TT_CLIENT_NEW = BIT(9), - BATADV_TT_CLIENT_PENDING = BIT(10), - BATADV_TT_CLIENT_TEMP = BIT(11), -}; - /** * enum batadv_vlan_flags - flags for the four MSB of any vlan ID field * @BATADV_VLAN_HAS_TAG: whether the field contains a valid vlan tag or not diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index af2bfef6dca8..20804078293c 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -37,20 +37,27 @@ #include #include #include +#include #include #include #include +#include #include #include #include #include #include +#include +#include +#include +#include #include "bridge_loop_avoidance.h" #include "hard-interface.h" #include "hash.h" #include "log.h" #include "multicast.h" +#include "netlink.h" #include "originator.h" #include "packet.h" #include "soft-interface.h" @@ -1108,6 +1115,164 @@ out: return 0; } +/** + * batadv_tt_local_dump_entry - Dump one TT local entry into a message + * @msg :Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @bat_priv: The bat priv with all the soft interface information + * @common: tt local & tt global common data + * + * Return: Error code, or 0 on success + */ +static int +batadv_tt_local_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_priv *bat_priv, + struct batadv_tt_common_entry *common) +{ + void *hdr; + struct batadv_softif_vlan *vlan; + struct batadv_tt_local_entry *local; + unsigned int last_seen_msecs; + u32 crc; + + local = container_of(common, struct batadv_tt_local_entry, common); + last_seen_msecs = jiffies_to_msecs(jiffies - local->last_seen); + + vlan = batadv_softif_vlan_get(bat_priv, common->vid); + if (!vlan) + return 0; + + crc = vlan->tt.crc; + + batadv_softif_vlan_put(vlan); + + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, + NLM_F_MULTI, + BATADV_CMD_GET_TRANSTABLE_LOCAL); + if (!hdr) + return -ENOBUFS; + + if (nla_put(msg, BATADV_ATTR_TT_ADDRESS, ETH_ALEN, common->addr) || + nla_put_u32(msg, BATADV_ATTR_TT_CRC32, crc) || + nla_put_u16(msg, BATADV_ATTR_TT_VID, common->vid) || + nla_put_u32(msg, BATADV_ATTR_TT_FLAGS, common->flags)) + goto nla_put_failure; + + if (!(common->flags & BATADV_TT_CLIENT_NOPURGE) && + nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS, last_seen_msecs)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +/** + * batadv_tt_local_dump_bucket - Dump one TT local bucket into a message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @bat_priv: The bat priv with all the soft interface information + * @head: Pointer to the list containing the local tt entries + * @idx_s: Number of entries to skip + * + * Return: Error code, or 0 on success + */ +static int +batadv_tt_local_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_priv *bat_priv, + struct hlist_head *head, int *idx_s) +{ + struct batadv_tt_common_entry *common; + int idx = 0; + + rcu_read_lock(); + hlist_for_each_entry_rcu(common, head, hash_entry) { + if (idx++ < *idx_s) + continue; + + if (batadv_tt_local_dump_entry(msg, portid, seq, bat_priv, + common)) { + rcu_read_unlock(); + *idx_s = idx - 1; + return -EMSGSIZE; + } + } + rcu_read_unlock(); + + *idx_s = 0; + return 0; +} + +/** + * batadv_tt_local_dump - Dump TT local entries into a message + * @msg: Netlink message to dump into + * @cb: Parameters from query + * + * Return: Error code, or 0 on success + */ +int batadv_tt_local_dump(struct sk_buff *msg, struct netlink_callback *cb) +{ + struct net *net = sock_net(cb->skb->sk); + struct net_device *soft_iface; + struct batadv_priv *bat_priv; + struct batadv_hard_iface *primary_if = NULL; + struct batadv_hashtable *hash; + struct hlist_head *head; + int ret; + int ifindex; + int bucket = cb->args[0]; + int idx = cb->args[1]; + int portid = NETLINK_CB(cb->skb).portid; + + ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX); + if (!ifindex) + return -EINVAL; + + soft_iface = dev_get_by_index(net, ifindex); + if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { + ret = -ENODEV; + goto out; + } + + bat_priv = netdev_priv(soft_iface); + + primary_if = batadv_primary_if_get_selected(bat_priv); + if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) { + ret = -ENOENT; + goto out; + } + + hash = bat_priv->tt.local_hash; + + while (bucket < hash->size) { + head = &hash->table[bucket]; + + if (batadv_tt_local_dump_bucket(msg, portid, cb->nlh->nlmsg_seq, + bat_priv, head, &idx)) + break; + + bucket++; + } + + ret = msg->len; + + out: + if (primary_if) + batadv_hardif_put(primary_if); + if (soft_iface) + dev_put(soft_iface); + + cb->args[0] = bucket; + cb->args[1] = idx; + + return ret; +} + static void batadv_tt_local_set_pending(struct batadv_priv *bat_priv, struct batadv_tt_local_entry *tt_local_entry, @@ -1755,6 +1920,218 @@ out: return 0; } +/** + * batadv_tt_global_dump_subentry - Dump all TT local entries into a message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @common: tt local & tt global common data + * @orig: Originator node announcing a non-mesh client + * @best: Is the best originator for the TT entry + * + * Return: Error code, or 0 on success + */ +static int +batadv_tt_global_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_tt_common_entry *common, + struct batadv_tt_orig_list_entry *orig, + bool best) +{ + void *hdr; + struct batadv_orig_node_vlan *vlan; + u8 last_ttvn; + u32 crc; + + vlan = batadv_orig_node_vlan_get(orig->orig_node, + common->vid); + if (!vlan) + return 0; + + crc = vlan->tt.crc; + + batadv_orig_node_vlan_put(vlan); + + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, + NLM_F_MULTI, + BATADV_CMD_GET_TRANSTABLE_GLOBAL); + if (!hdr) + return -ENOBUFS; + + last_ttvn = atomic_read(&orig->orig_node->last_ttvn); + + if (nla_put(msg, BATADV_ATTR_TT_ADDRESS, ETH_ALEN, common->addr) || + nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN, + orig->orig_node->orig) || + nla_put_u8(msg, BATADV_ATTR_TT_TTVN, orig->ttvn) || + nla_put_u8(msg, BATADV_ATTR_TT_LAST_TTVN, last_ttvn) || + nla_put_u32(msg, BATADV_ATTR_TT_CRC32, crc) || + nla_put_u16(msg, BATADV_ATTR_TT_VID, common->vid) || + nla_put_u32(msg, BATADV_ATTR_TT_FLAGS, common->flags)) + goto nla_put_failure; + + if (best && nla_put_flag(msg, BATADV_ATTR_FLAG_BEST)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +/** + * batadv_tt_global_dump_entry - Dump one TT global entry into a message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @bat_priv: The bat priv with all the soft interface information + * @common: tt local & tt global common data + * @sub_s: Number of entries to skip + * + * This function assumes the caller holds rcu_read_lock(). + * + * Return: Error code, or 0 on success + */ +static int +batadv_tt_global_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_priv *bat_priv, + struct batadv_tt_common_entry *common, int *sub_s) +{ + struct batadv_tt_orig_list_entry *orig_entry, *best_entry; + struct batadv_tt_global_entry *global; + struct hlist_head *head; + int sub = 0; + bool best; + + global = container_of(common, struct batadv_tt_global_entry, common); + best_entry = batadv_transtable_best_orig(bat_priv, global); + head = &global->orig_list; + + hlist_for_each_entry_rcu(orig_entry, head, list) { + if (sub++ < *sub_s) + continue; + + best = (orig_entry == best_entry); + + if (batadv_tt_global_dump_subentry(msg, portid, seq, common, + orig_entry, best)) { + *sub_s = sub - 1; + return -EMSGSIZE; + } + } + + *sub_s = 0; + return 0; +} + +/** + * batadv_tt_global_dump_bucket - Dump one TT local bucket into a message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @bat_priv: The bat priv with all the soft interface information + * @head: Pointer to the list containing the global tt entries + * @idx_s: Number of entries to skip + * @sub: Number of entries to skip + * + * Return: Error code, or 0 on success + */ +static int +batadv_tt_global_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_priv *bat_priv, + struct hlist_head *head, int *idx_s, int *sub) +{ + struct batadv_tt_common_entry *common; + int idx = 0; + + rcu_read_lock(); + hlist_for_each_entry_rcu(common, head, hash_entry) { + if (idx++ < *idx_s) + continue; + + if (batadv_tt_global_dump_entry(msg, portid, seq, bat_priv, + common, sub)) { + rcu_read_unlock(); + *idx_s = idx - 1; + return -EMSGSIZE; + } + } + rcu_read_unlock(); + + *idx_s = 0; + *sub = 0; + return 0; +} + +/** + * batadv_tt_global_dump - Dump TT global entries into a message + * @msg: Netlink message to dump into + * @cb: Parameters from query + * + * Return: Error code, or length of message on success + */ +int batadv_tt_global_dump(struct sk_buff *msg, struct netlink_callback *cb) +{ + struct net *net = sock_net(cb->skb->sk); + struct net_device *soft_iface; + struct batadv_priv *bat_priv; + struct batadv_hard_iface *primary_if = NULL; + struct batadv_hashtable *hash; + struct hlist_head *head; + int ret; + int ifindex; + int bucket = cb->args[0]; + int idx = cb->args[1]; + int sub = cb->args[2]; + int portid = NETLINK_CB(cb->skb).portid; + + ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX); + if (!ifindex) + return -EINVAL; + + soft_iface = dev_get_by_index(net, ifindex); + if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { + ret = -ENODEV; + goto out; + } + + bat_priv = netdev_priv(soft_iface); + + primary_if = batadv_primary_if_get_selected(bat_priv); + if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) { + ret = -ENOENT; + goto out; + } + + hash = bat_priv->tt.global_hash; + + while (bucket < hash->size) { + head = &hash->table[bucket]; + + if (batadv_tt_global_dump_bucket(msg, portid, + cb->nlh->nlmsg_seq, bat_priv, + head, &idx, &sub)) + break; + + bucket++; + } + + ret = msg->len; + + out: + if (primary_if) + batadv_hardif_put(primary_if); + if (soft_iface) + dev_put(soft_iface); + + cb->args[0] = bucket; + cb->args[1] = idx; + cb->args[2] = sub; + + return ret; +} + /** * _batadv_tt_global_del_orig_entry - remove and free an orig_entry * @tt_global_entry: the global entry to remove the orig_entry from diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h index 02b0f85527cc..783fdba84db2 100644 --- a/net/batman-adv/translation-table.h +++ b/net/batman-adv/translation-table.h @@ -22,8 +22,10 @@ #include +struct netlink_callback; struct net_device; struct seq_file; +struct sk_buff; int batadv_tt_init(struct batadv_priv *bat_priv); bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, @@ -33,6 +35,8 @@ u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const char *message, bool roaming); int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset); int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset); +int batadv_tt_local_dump(struct sk_buff *msg, struct netlink_callback *cb); +int batadv_tt_global_dump(struct sk_buff *msg, struct netlink_callback *cb); void batadv_tt_global_del_orig(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, s32 match_vid, const char *message); From f32ed4b54ef4d5c9ad7f8135226bc34bd0dccb5c Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 3 Jul 2016 13:31:38 +0200 Subject: [PATCH 0052/1715] batman-adv: Provide TTVN in the mesh_info netlink msg The TTVN is the main information for the debugging of translation table problems. It is therefore necessary when comparing the global translation tables. Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Marek Lindner --- net/batman-adv/netlink.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 14360ec16513..0c0d20bb92ce 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -18,6 +18,7 @@ #include "netlink.h" #include "main.h" +#include #include #include #include @@ -121,7 +122,9 @@ batadv_netlink_mesh_info_put(struct sk_buff *msg, struct net_device *soft_iface) nla_put_u32(msg, BATADV_ATTR_MESH_IFINDEX, soft_iface->ifindex) || nla_put_string(msg, BATADV_ATTR_MESH_IFNAME, soft_iface->name) || nla_put(msg, BATADV_ATTR_MESH_ADDRESS, ETH_ALEN, - soft_iface->dev_addr)) + soft_iface->dev_addr) || + nla_put_u8(msg, BATADV_ATTR_TT_TTVN, + (u8)atomic_read(&bat_priv->tt.vn))) goto out; primary_if = batadv_primary_if_get_selected(bat_priv); From 85cf8c859d53f6f53c37bb7f23a41f6171427021 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Sun, 3 Jul 2016 13:31:39 +0200 Subject: [PATCH 0053/1715] batman-adv: netlink: add originator and neighbor table queries Add BATADV_CMD_GET_ORIGINATORS and BATADV_CMD_GET_NEIGHBORS commands, using handlers bat_orig_dump and bat_neigh_dump in batadv_algo_ops. Will always return -EOPNOTSUPP for now, as no implementations exist yet. Signed-off-by: Matthias Schiffer Signed-off-by: Andrew Lunn [sven@narfation.org: Rewrite based on new algo_ops structures] Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Marek Lindner --- include/uapi/linux/batman_adv.h | 4 + net/batman-adv/netlink.c | 13 +++ net/batman-adv/originator.c | 160 ++++++++++++++++++++++++++++++++ net/batman-adv/originator.h | 4 + net/batman-adv/types.h | 9 ++ 5 files changed, 190 insertions(+) diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index 1168d058cd2b..3f7a415f5e09 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -131,6 +131,8 @@ enum batadv_nl_attrs { * @BATADV_CMD_GET_HARDIFS: Query list of hard interfaces * @BATADV_CMD_GET_TRANSTABLE_LOCAL: Query list of local translations * @BATADV_CMD_GET_TRANSTABLE_GLOBAL Query list of global translations + * @BATADV_CMD_GET_ORIGINATORS: Query list of originators + * @BATADV_CMD_GET_NEIGHBORS: Query list of neighbours * @__BATADV_CMD_AFTER_LAST: internal use * @BATADV_CMD_MAX: highest used command number */ @@ -143,6 +145,8 @@ enum batadv_nl_commands { BATADV_CMD_GET_HARDIFS, BATADV_CMD_GET_TRANSTABLE_LOCAL, BATADV_CMD_GET_TRANSTABLE_GLOBAL, + BATADV_CMD_GET_ORIGINATORS, + BATADV_CMD_GET_NEIGHBORS, /* add new commands above here */ __BATADV_CMD_AFTER_LAST, BATADV_CMD_MAX = __BATADV_CMD_AFTER_LAST - 1 diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 0c0d20bb92ce..8469fc4ec5a3 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -39,6 +39,7 @@ #include "bat_algo.h" #include "hard-interface.h" +#include "originator.h" #include "soft-interface.h" #include "tp_meter.h" #include "translation-table.h" @@ -554,6 +555,18 @@ static struct genl_ops batadv_netlink_ops[] = { .policy = batadv_netlink_policy, .dumpit = batadv_tt_global_dump, }, + { + .cmd = BATADV_CMD_GET_ORIGINATORS, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .dumpit = batadv_orig_dump, + }, + { + .cmd = BATADV_CMD_GET_NEIGHBORS, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .dumpit = batadv_hardif_neigh_dump, + }, }; /** diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 3940b5d24421..95c85558c530 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -28,11 +28,15 @@ #include #include #include +#include #include #include +#include #include #include #include +#include +#include #include "bat_algo.h" #include "distributed-arp-table.h" @@ -42,8 +46,10 @@ #include "hash.h" #include "log.h" #include "multicast.h" +#include "netlink.h" #include "network-coding.h" #include "routing.h" +#include "soft-interface.h" #include "translation-table.h" /* hash class keys */ @@ -720,6 +726,83 @@ int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset) return 0; } +/** + * batadv_hardif_neigh_dump - Dump to netlink the neighbor infos for a specific + * outgoing interface + * @msg: message to dump into + * @cb: parameters for the dump + * + * Return: 0 or error value + */ +int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb) +{ + struct net *net = sock_net(cb->skb->sk); + struct net_device *soft_iface; + struct net_device *hard_iface = NULL; + struct batadv_hard_iface *hardif = BATADV_IF_DEFAULT; + struct batadv_priv *bat_priv; + struct batadv_hard_iface *primary_if = NULL; + int ret; + int ifindex, hard_ifindex; + + ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX); + if (!ifindex) + return -EINVAL; + + soft_iface = dev_get_by_index(net, ifindex); + if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { + ret = -ENODEV; + goto out; + } + + bat_priv = netdev_priv(soft_iface); + + primary_if = batadv_primary_if_get_selected(bat_priv); + if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) { + ret = -ENOENT; + goto out; + } + + hard_ifindex = batadv_netlink_get_ifindex(cb->nlh, + BATADV_ATTR_HARD_IFINDEX); + if (hard_ifindex) { + hard_iface = dev_get_by_index(net, hard_ifindex); + if (hard_iface) + hardif = batadv_hardif_get_by_netdev(hard_iface); + + if (!hardif) { + ret = -ENODEV; + goto out; + } + + if (hardif->soft_iface != soft_iface) { + ret = -ENOENT; + goto out; + } + } + + if (!bat_priv->algo_ops->neigh.dump) { + ret = -EOPNOTSUPP; + goto out; + } + + bat_priv->algo_ops->neigh.dump(msg, cb, bat_priv, hardif); + + ret = msg->len; + + out: + if (hardif) + batadv_hardif_put(hardif); + if (hard_iface) + dev_put(hard_iface); + if (primary_if) + batadv_hardif_put(primary_if); + if (soft_iface) + dev_put(soft_iface); + + return ret; +} + /** * batadv_orig_ifinfo_release - release orig_ifinfo from lists and queue for * free after rcu grace period @@ -1330,6 +1413,83 @@ out: return 0; } +/** + * batadv_orig_dump - Dump to netlink the originator infos for a specific + * outgoing interface + * @msg: message to dump into + * @cb: parameters for the dump + * + * Return: 0 or error value + */ +int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb) +{ + struct net *net = sock_net(cb->skb->sk); + struct net_device *soft_iface; + struct net_device *hard_iface = NULL; + struct batadv_hard_iface *hardif = BATADV_IF_DEFAULT; + struct batadv_priv *bat_priv; + struct batadv_hard_iface *primary_if = NULL; + int ret; + int ifindex, hard_ifindex; + + ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX); + if (!ifindex) + return -EINVAL; + + soft_iface = dev_get_by_index(net, ifindex); + if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { + ret = -ENODEV; + goto out; + } + + bat_priv = netdev_priv(soft_iface); + + primary_if = batadv_primary_if_get_selected(bat_priv); + if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) { + ret = -ENOENT; + goto out; + } + + hard_ifindex = batadv_netlink_get_ifindex(cb->nlh, + BATADV_ATTR_HARD_IFINDEX); + if (hard_ifindex) { + hard_iface = dev_get_by_index(net, hard_ifindex); + if (hard_iface) + hardif = batadv_hardif_get_by_netdev(hard_iface); + + if (!hardif) { + ret = -ENODEV; + goto out; + } + + if (hardif->soft_iface != soft_iface) { + ret = -ENOENT; + goto out; + } + } + + if (!bat_priv->algo_ops->orig.dump) { + ret = -EOPNOTSUPP; + goto out; + } + + bat_priv->algo_ops->orig.dump(msg, cb, bat_priv, hardif); + + ret = msg->len; + + out: + if (hardif) + batadv_hardif_put(hardif); + if (hard_iface) + dev_put(hard_iface); + if (primary_if) + batadv_hardif_put(primary_if); + if (soft_iface) + dev_put(soft_iface); + + return ret; +} + int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface, int max_if_num) { diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index 566306bf05dc..ebc56183f358 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -31,7 +31,9 @@ #include "hash.h" +struct netlink_callback; struct seq_file; +struct sk_buff; bool batadv_compare_orig(const struct hlist_node *node, const void *data2); int batadv_originator_init(struct batadv_priv *bat_priv); @@ -61,6 +63,7 @@ batadv_neigh_ifinfo_get(struct batadv_neigh_node *neigh, struct batadv_hard_iface *if_outgoing); void batadv_neigh_ifinfo_put(struct batadv_neigh_ifinfo *neigh_ifinfo); +int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb); int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset); struct batadv_orig_ifinfo * @@ -72,6 +75,7 @@ batadv_orig_ifinfo_new(struct batadv_orig_node *orig_node, void batadv_orig_ifinfo_put(struct batadv_orig_ifinfo *orig_ifinfo); int batadv_orig_seq_print_text(struct seq_file *seq, void *offset); +int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb); int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset); int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface, int max_if_num); diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 72806a3c40df..968023a61598 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -28,6 +28,7 @@ #include #include #include +#include #include /* for linux/wait.h */ #include #include @@ -1418,6 +1419,7 @@ struct batadv_algo_iface_ops { * @is_similar_or_better: check if neigh1 is equally similar or better than * neigh2 for their respective outgoing interface from the metric prospective * @print: print the single hop neighbor list (optional) + * @dump: dump neighbors to a netlink socket (optional) */ struct batadv_algo_neigh_ops { void (*hardif_init)(struct batadv_hardif_neigh_node *neigh); @@ -1430,6 +1432,9 @@ struct batadv_algo_neigh_ops { struct batadv_neigh_node *neigh2, struct batadv_hard_iface *if_outgoing2); void (*print)(struct batadv_priv *priv, struct seq_file *seq); + void (*dump)(struct sk_buff *msg, struct netlink_callback *cb, + struct batadv_priv *priv, + struct batadv_hard_iface *hard_iface); }; /** @@ -1441,6 +1446,7 @@ struct batadv_algo_neigh_ops { * @del_if: ask the routing algorithm to apply the needed changes to the * orig_node due to an hard-interface being removed from the mesh (optional) * @print: print the originator table (optional) + * @dump: dump originators to a netlink socket (optional) */ struct batadv_algo_orig_ops { void (*free)(struct batadv_orig_node *orig_node); @@ -1449,6 +1455,9 @@ struct batadv_algo_orig_ops { int del_if_num); void (*print)(struct batadv_priv *priv, struct seq_file *seq, struct batadv_hard_iface *hard_iface); + void (*dump)(struct sk_buff *msg, struct netlink_callback *cb, + struct batadv_priv *priv, + struct batadv_hard_iface *hard_iface); }; /** From 024f99cb4acc14dab5e55e1ecdf6aad31269ca98 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Sun, 3 Jul 2016 13:31:40 +0200 Subject: [PATCH 0054/1715] batman-adv: add B.A.T.M.A.N. IV bat_{orig, neigh}_dump implementations Signed-off-by: Matthias Schiffer Signed-off-by: Andrew Lunn [sven.eckelmann@open-mesh.com: Fix function parameter alignments, add policy for attributes, fix includes, fix algo_ops integration] Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Marek Lindner --- include/uapi/linux/batman_adv.h | 4 + net/batman-adv/bat_iv_ogm.c | 366 ++++++++++++++++++++++++++++++++ net/batman-adv/netlink.c | 2 + 3 files changed, 372 insertions(+) diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index 3f7a415f5e09..ba2359a1f464 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -85,6 +85,8 @@ enum batadv_tt_client_flags { * @BATADV_ATTR_TT_FLAGS: Translation table client flags * @BATADV_ATTR_FLAG_BEST: Flags indicating entry is the best * @BATADV_ATTR_LAST_SEEN_MSECS: Time in milliseconds since last seen + * @BATADV_ATTR_NEIGH_ADDRESS: Neighbour MAC address + * @BATADV_ATTR_TQ: TQ to neighbour * @__BATADV_ATTR_AFTER_LAST: internal use * @NUM_BATADV_ATTR: total number of batadv_nl_attrs available * @BATADV_ATTR_MAX: highest attribute number currently defined @@ -114,6 +116,8 @@ enum batadv_nl_attrs { BATADV_ATTR_TT_FLAGS, BATADV_ATTR_FLAG_BEST, BATADV_ATTR_LAST_SEEN_MSECS, + BATADV_ATTR_NEIGH_ADDRESS, + BATADV_ATTR_TQ, /* add attributes above here, update the policy in netlink.c */ __BATADV_ATTR_AFTER_LAST, NUM_BATADV_ATTR = __BATADV_ATTR_AFTER_LAST, diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index a40cdf273625..7a8c0f63e2ae 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -48,6 +49,9 @@ #include #include #include +#include +#include +#include #include "bat_algo.h" #include "bitarray.h" @@ -55,6 +59,7 @@ #include "hard-interface.h" #include "hash.h" #include "log.h" +#include "netlink.h" #include "network-coding.h" #include "originator.h" #include "packet.h" @@ -1947,6 +1952,235 @@ next: seq_puts(seq, "No batman nodes in range ...\n"); } +/** + * batadv_iv_ogm_neigh_get_tq_avg - Get the TQ average for a neighbour on a + * given outgoing interface. + * @neigh_node: Neighbour of interest + * @if_outgoing: Outgoing interface of interest + * @tq_avg: Pointer of where to store the TQ average + * + * Return: False if no average TQ available, otherwise true. + */ +static bool +batadv_iv_ogm_neigh_get_tq_avg(struct batadv_neigh_node *neigh_node, + struct batadv_hard_iface *if_outgoing, + u8 *tq_avg) +{ + struct batadv_neigh_ifinfo *n_ifinfo; + + n_ifinfo = batadv_neigh_ifinfo_get(neigh_node, if_outgoing); + if (!n_ifinfo) + return false; + + *tq_avg = n_ifinfo->bat_iv.tq_avg; + batadv_neigh_ifinfo_put(n_ifinfo); + + return true; +} + +/** + * batadv_iv_ogm_orig_dump_subentry - Dump an originator subentry into a + * message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @bat_priv: The bat priv with all the soft interface information + * @if_outgoing: Limit dump to entries with this outgoing interface + * @orig_node: Originator to dump + * @neigh_node: Single hops neighbour + * @best: Is the best originator + * + * Return: Error code, or 0 on success + */ +static int +batadv_iv_ogm_orig_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_priv *bat_priv, + struct batadv_hard_iface *if_outgoing, + struct batadv_orig_node *orig_node, + struct batadv_neigh_node *neigh_node, + bool best) +{ + void *hdr; + u8 tq_avg; + unsigned int last_seen_msecs; + + last_seen_msecs = jiffies_to_msecs(jiffies - orig_node->last_seen); + + if (!batadv_iv_ogm_neigh_get_tq_avg(neigh_node, if_outgoing, &tq_avg)) + return 0; + + if (if_outgoing != BATADV_IF_DEFAULT && + if_outgoing != neigh_node->if_incoming) + return 0; + + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, + NLM_F_MULTI, BATADV_CMD_GET_ORIGINATORS); + if (!hdr) + return -ENOBUFS; + + if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN, + orig_node->orig) || + nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN, + neigh_node->addr) || + nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX, + neigh_node->if_incoming->net_dev->ifindex) || + nla_put_u8(msg, BATADV_ATTR_TQ, tq_avg) || + nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS, + last_seen_msecs)) + goto nla_put_failure; + + if (best && nla_put_flag(msg, BATADV_ATTR_FLAG_BEST)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +/** + * batadv_iv_ogm_orig_dump_entry - Dump an originator entry into a message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @bat_priv: The bat priv with all the soft interface information + * @if_outgoing: Limit dump to entries with this outgoing interface + * @orig_node: Originator to dump + * @sub_s: Number of sub entries to skip + * + * This function assumes the caller holds rcu_read_lock(). + * + * Return: Error code, or 0 on success + */ +static int +batadv_iv_ogm_orig_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_priv *bat_priv, + struct batadv_hard_iface *if_outgoing, + struct batadv_orig_node *orig_node, int *sub_s) +{ + struct batadv_neigh_node *neigh_node_best; + struct batadv_neigh_node *neigh_node; + int sub = 0; + bool best; + u8 tq_avg_best; + + neigh_node_best = batadv_orig_router_get(orig_node, if_outgoing); + if (!neigh_node_best) + goto out; + + if (!batadv_iv_ogm_neigh_get_tq_avg(neigh_node_best, if_outgoing, + &tq_avg_best)) + goto out; + + if (tq_avg_best == 0) + goto out; + + hlist_for_each_entry_rcu(neigh_node, &orig_node->neigh_list, list) { + if (sub++ < *sub_s) + continue; + + best = (neigh_node == neigh_node_best); + + if (batadv_iv_ogm_orig_dump_subentry(msg, portid, seq, + bat_priv, if_outgoing, + orig_node, neigh_node, + best)) { + batadv_neigh_node_put(neigh_node_best); + + *sub_s = sub - 1; + return -EMSGSIZE; + } + } + + out: + if (neigh_node_best) + batadv_neigh_node_put(neigh_node_best); + + *sub_s = 0; + return 0; +} + +/** + * batadv_iv_ogm_orig_dump_bucket - Dump an originator bucket into a + * message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @bat_priv: The bat priv with all the soft interface information + * @if_outgoing: Limit dump to entries with this outgoing interface + * @head: Bucket to be dumped + * @idx_s: Number of entries to be skipped + * @sub: Number of sub entries to be skipped + * + * Return: Error code, or 0 on success + */ +static int +batadv_iv_ogm_orig_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_priv *bat_priv, + struct batadv_hard_iface *if_outgoing, + struct hlist_head *head, int *idx_s, int *sub) +{ + struct batadv_orig_node *orig_node; + int idx = 0; + + rcu_read_lock(); + hlist_for_each_entry_rcu(orig_node, head, hash_entry) { + if (idx++ < *idx_s) + continue; + + if (batadv_iv_ogm_orig_dump_entry(msg, portid, seq, bat_priv, + if_outgoing, orig_node, + sub)) { + rcu_read_unlock(); + *idx_s = idx - 1; + return -EMSGSIZE; + } + } + rcu_read_unlock(); + + *idx_s = 0; + *sub = 0; + return 0; +} + +/** + * batadv_iv_ogm_orig_dump - Dump the originators into a message + * @msg: Netlink message to dump into + * @cb: Control block containing additional options + * @bat_priv: The bat priv with all the soft interface information + * @if_outgoing: Limit dump to entries with this outgoing interface + */ +static void +batadv_iv_ogm_orig_dump(struct sk_buff *msg, struct netlink_callback *cb, + struct batadv_priv *bat_priv, + struct batadv_hard_iface *if_outgoing) +{ + struct batadv_hashtable *hash = bat_priv->orig_hash; + struct hlist_head *head; + int bucket = cb->args[0]; + int idx = cb->args[1]; + int sub = cb->args[2]; + int portid = NETLINK_CB(cb->skb).portid; + + while (bucket < hash->size) { + head = &hash->table[bucket]; + + if (batadv_iv_ogm_orig_dump_bucket(msg, portid, + cb->nlh->nlmsg_seq, + bat_priv, if_outgoing, head, + &idx, &sub)) + break; + + bucket++; + } + + cb->args[0] = bucket; + cb->args[1] = idx; + cb->args[2] = sub; +} + /** * batadv_iv_hardif_neigh_print - print a single hop neighbour node * @seq: neighbour table seq_file struct @@ -2043,6 +2277,136 @@ out: return ret; } +/** + * batadv_iv_ogm_neigh_dump_neigh - Dump a neighbour into a netlink message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @hardif_neigh: Neighbour to be dumped + * + * Return: Error code, or 0 on success + */ +static int +batadv_iv_ogm_neigh_dump_neigh(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_hardif_neigh_node *hardif_neigh) +{ + void *hdr; + unsigned int last_seen_msecs; + + last_seen_msecs = jiffies_to_msecs(jiffies - hardif_neigh->last_seen); + + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, + NLM_F_MULTI, BATADV_CMD_GET_NEIGHBORS); + if (!hdr) + return -ENOBUFS; + + if (nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN, + hardif_neigh->addr) || + nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX, + hardif_neigh->if_incoming->net_dev->ifindex) || + nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS, + last_seen_msecs)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +/** + * batadv_iv_ogm_neigh_dump_hardif - Dump the neighbours of a hard interface + * into a message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @bat_priv: The bat priv with all the soft interface information + * @hard_iface: Hard interface to dump the neighbours for + * @idx_s: Number of entries to skip + * + * This function assumes the caller holds rcu_read_lock(). + * + * Return: Error code, or 0 on success + */ +static int +batadv_iv_ogm_neigh_dump_hardif(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_priv *bat_priv, + struct batadv_hard_iface *hard_iface, + int *idx_s) +{ + struct batadv_hardif_neigh_node *hardif_neigh; + int idx = 0; + + hlist_for_each_entry_rcu(hardif_neigh, + &hard_iface->neigh_list, list) { + if (idx++ < *idx_s) + continue; + + if (batadv_iv_ogm_neigh_dump_neigh(msg, portid, seq, + hardif_neigh)) { + *idx_s = idx - 1; + return -EMSGSIZE; + } + } + + *idx_s = 0; + return 0; +} + +/** + * batadv_iv_ogm_neigh_dump - Dump the neighbours into a message + * @msg: Netlink message to dump into + * @cb: Control block containing additional options + * @bat_priv: The bat priv with all the soft interface information + * @single_hardif: Limit dump to this hard interfaace + */ +static void +batadv_iv_ogm_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb, + struct batadv_priv *bat_priv, + struct batadv_hard_iface *single_hardif) +{ + struct batadv_hard_iface *hard_iface; + int i_hardif = 0; + int i_hardif_s = cb->args[0]; + int idx = cb->args[1]; + int portid = NETLINK_CB(cb->skb).portid; + + rcu_read_lock(); + if (single_hardif) { + if (i_hardif_s == 0) { + if (batadv_iv_ogm_neigh_dump_hardif(msg, portid, + cb->nlh->nlmsg_seq, + bat_priv, + single_hardif, + &idx) == 0) + i_hardif++; + } + } else { + list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, + list) { + if (hard_iface->soft_iface != bat_priv->soft_iface) + continue; + + if (i_hardif++ < i_hardif_s) + continue; + + if (batadv_iv_ogm_neigh_dump_hardif(msg, portid, + cb->nlh->nlmsg_seq, + bat_priv, + hard_iface, &idx)) { + i_hardif--; + break; + } + } + } + rcu_read_unlock(); + + cb->args[0] = i_hardif; + cb->args[1] = idx; +} + /** * batadv_iv_ogm_neigh_cmp - compare the metrics of two neighbors * @neigh1: the first neighbor object of the comparison @@ -2330,9 +2694,11 @@ static struct batadv_algo_ops batadv_batman_iv __read_mostly = { .cmp = batadv_iv_ogm_neigh_cmp, .is_similar_or_better = batadv_iv_ogm_neigh_is_sob, .print = batadv_iv_neigh_print, + .dump = batadv_iv_ogm_neigh_dump, }, .orig = { .print = batadv_iv_ogm_orig_print, + .dump = batadv_iv_ogm_orig_dump, .free = batadv_iv_ogm_orig_free, .add_if = batadv_iv_ogm_orig_add_if, .del_if = batadv_iv_ogm_orig_del_if, diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 8469fc4ec5a3..0c7940cc1968 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -84,6 +84,8 @@ static struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = { [BATADV_ATTR_TT_FLAGS] = { .type = NLA_U32 }, [BATADV_ATTR_FLAG_BEST] = { .type = NLA_FLAG }, [BATADV_ATTR_LAST_SEEN_MSECS] = { .type = NLA_U32 }, + [BATADV_ATTR_NEIGH_ADDRESS] = { .len = ETH_ALEN }, + [BATADV_ATTR_TQ] = { .type = NLA_U8 }, }; /** From f02a478f518ee5690f279c8c2d3a6222143a7b20 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Sun, 3 Jul 2016 13:31:41 +0200 Subject: [PATCH 0055/1715] batman-adv: add B.A.T.M.A.N. V bat_{orig, neigh}_dump implementations Dump the algo V originators and neighbours. Signed-off-by: Matthias Schiffer Signed-off-by: Andrew Lunn [sven@narfation.org: Fix includes, fix algo_ops integration] Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Marek Lindner --- include/uapi/linux/batman_adv.h | 2 + net/batman-adv/bat_v.c | 340 ++++++++++++++++++++++++++++++++ net/batman-adv/netlink.c | 1 + 3 files changed, 343 insertions(+) diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index ba2359a1f464..2e2747fb1311 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -87,6 +87,7 @@ enum batadv_tt_client_flags { * @BATADV_ATTR_LAST_SEEN_MSECS: Time in milliseconds since last seen * @BATADV_ATTR_NEIGH_ADDRESS: Neighbour MAC address * @BATADV_ATTR_TQ: TQ to neighbour + * @BATADV_ATTR_THROUGHPUT: Estimated throughput to Neighbour * @__BATADV_ATTR_AFTER_LAST: internal use * @NUM_BATADV_ATTR: total number of batadv_nl_attrs available * @BATADV_ATTR_MAX: highest attribute number currently defined @@ -118,6 +119,7 @@ enum batadv_nl_attrs { BATADV_ATTR_LAST_SEEN_MSECS, BATADV_ATTR_NEIGH_ADDRESS, BATADV_ATTR_TQ, + BATADV_ATTR_THROUGHPUT, /* add attributes above here, update the policy in netlink.c */ __BATADV_ATTR_AFTER_LAST, NUM_BATADV_ATTR = __BATADV_ATTR_AFTER_LAST, diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index 1d777b171366..9dccfaf32115 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -22,17 +22,22 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include #include #include #include #include +#include +#include +#include #include "bat_algo.h" #include "bat_v_elp.h" @@ -42,9 +47,12 @@ #include "hard-interface.h" #include "hash.h" #include "log.h" +#include "netlink.h" #include "originator.h" #include "packet.h" +struct sk_buff; + static void batadv_v_iface_activate(struct batadv_hard_iface *hard_iface) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); @@ -205,6 +213,138 @@ static void batadv_v_neigh_print(struct batadv_priv *bat_priv, seq_puts(seq, "No batman nodes in range ...\n"); } +/** + * batadv_v_neigh_dump_neigh - Dump a neighbour into a message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @hardif_neigh: Neighbour to dump + * + * Return: Error code, or 0 on success + */ +static int +batadv_v_neigh_dump_neigh(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_hardif_neigh_node *hardif_neigh) +{ + void *hdr; + unsigned int last_seen_msecs; + u32 throughput; + + last_seen_msecs = jiffies_to_msecs(jiffies - hardif_neigh->last_seen); + throughput = ewma_throughput_read(&hardif_neigh->bat_v.throughput); + throughput = throughput * 100; + + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, NLM_F_MULTI, + BATADV_CMD_GET_NEIGHBORS); + if (!hdr) + return -ENOBUFS; + + if (nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN, + hardif_neigh->addr) || + nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX, + hardif_neigh->if_incoming->net_dev->ifindex) || + nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS, + last_seen_msecs) || + nla_put_u32(msg, BATADV_ATTR_THROUGHPUT, throughput)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +/** + * batadv_v_neigh_dump_hardif - Dump the neighbours of a hard interface into + * a message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @bat_priv: The bat priv with all the soft interface information + * @hard_iface: The hard interface to be dumped + * @idx_s: Entries to be skipped + * + * This function assumes the caller holds rcu_read_lock(). + * + * Return: Error code, or 0 on success + */ +static int +batadv_v_neigh_dump_hardif(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_priv *bat_priv, + struct batadv_hard_iface *hard_iface, + int *idx_s) +{ + struct batadv_hardif_neigh_node *hardif_neigh; + int idx = 0; + + hlist_for_each_entry_rcu(hardif_neigh, + &hard_iface->neigh_list, list) { + if (idx++ < *idx_s) + continue; + + if (batadv_v_neigh_dump_neigh(msg, portid, seq, hardif_neigh)) { + *idx_s = idx - 1; + return -EMSGSIZE; + } + } + + *idx_s = 0; + return 0; +} + +/** + * batadv_v_neigh_dump - Dump the neighbours of a hard interface into a + * message + * @msg: Netlink message to dump into + * @cb: Control block containing additional options + * @bat_priv: The bat priv with all the soft interface information + * @single_hardif: Limit dumping to this hard interface + */ +static void +batadv_v_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb, + struct batadv_priv *bat_priv, + struct batadv_hard_iface *single_hardif) +{ + struct batadv_hard_iface *hard_iface; + int i_hardif = 0; + int i_hardif_s = cb->args[0]; + int idx = cb->args[1]; + int portid = NETLINK_CB(cb->skb).portid; + + rcu_read_lock(); + if (single_hardif) { + if (i_hardif_s == 0) { + if (batadv_v_neigh_dump_hardif(msg, portid, + cb->nlh->nlmsg_seq, + bat_priv, single_hardif, + &idx) == 0) + i_hardif++; + } + } else { + list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { + if (hard_iface->soft_iface != bat_priv->soft_iface) + continue; + + if (i_hardif++ < i_hardif_s) + continue; + + if (batadv_v_neigh_dump_hardif(msg, portid, + cb->nlh->nlmsg_seq, + bat_priv, hard_iface, + &idx)) { + i_hardif--; + break; + } + } + } + rcu_read_unlock(); + + cb->args[0] = i_hardif; + cb->args[1] = idx; +} + /** * batadv_v_orig_print - print the originator table * @bat_priv: the bat priv with all the soft interface information @@ -272,6 +412,204 @@ next: seq_puts(seq, "No batman nodes in range ...\n"); } +/** + * batadv_v_orig_dump_subentry - Dump an originator subentry into a + * message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @bat_priv: The bat priv with all the soft interface information + * @if_outgoing: Limit dump to entries with this outgoing interface + * @orig_node: Originator to dump + * @neigh_node: Single hops neighbour + * @best: Is the best originator + * + * Return: Error code, or 0 on success + */ +static int +batadv_v_orig_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_priv *bat_priv, + struct batadv_hard_iface *if_outgoing, + struct batadv_orig_node *orig_node, + struct batadv_neigh_node *neigh_node, + bool best) +{ + struct batadv_neigh_ifinfo *n_ifinfo; + unsigned int last_seen_msecs; + u32 throughput; + void *hdr; + + n_ifinfo = batadv_neigh_ifinfo_get(neigh_node, if_outgoing); + if (!n_ifinfo) + return 0; + + throughput = n_ifinfo->bat_v.throughput * 100; + + batadv_neigh_ifinfo_put(n_ifinfo); + + last_seen_msecs = jiffies_to_msecs(jiffies - orig_node->last_seen); + + if (if_outgoing != BATADV_IF_DEFAULT && + if_outgoing != neigh_node->if_incoming) + return 0; + + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, NLM_F_MULTI, + BATADV_CMD_GET_ORIGINATORS); + if (!hdr) + return -ENOBUFS; + + if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN, orig_node->orig) || + nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN, + neigh_node->addr) || + nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX, + neigh_node->if_incoming->net_dev->ifindex) || + nla_put_u32(msg, BATADV_ATTR_THROUGHPUT, throughput) || + nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS, + last_seen_msecs)) + goto nla_put_failure; + + if (best && nla_put_flag(msg, BATADV_ATTR_FLAG_BEST)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +/** + * batadv_v_orig_dump_entry - Dump an originator entry into a message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @bat_priv: The bat priv with all the soft interface information + * @if_outgoing: Limit dump to entries with this outgoing interface + * @orig_node: Originator to dump + * @sub_s: Number of sub entries to skip + * + * This function assumes the caller holds rcu_read_lock(). + * + * Return: Error code, or 0 on success + */ +static int +batadv_v_orig_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_priv *bat_priv, + struct batadv_hard_iface *if_outgoing, + struct batadv_orig_node *orig_node, int *sub_s) +{ + struct batadv_neigh_node *neigh_node_best; + struct batadv_neigh_node *neigh_node; + int sub = 0; + bool best; + + neigh_node_best = batadv_orig_router_get(orig_node, if_outgoing); + if (!neigh_node_best) + goto out; + + hlist_for_each_entry_rcu(neigh_node, &orig_node->neigh_list, list) { + if (sub++ < *sub_s) + continue; + + best = (neigh_node == neigh_node_best); + + if (batadv_v_orig_dump_subentry(msg, portid, seq, bat_priv, + if_outgoing, orig_node, + neigh_node, best)) { + batadv_neigh_node_put(neigh_node_best); + + *sub_s = sub - 1; + return -EMSGSIZE; + } + } + + out: + if (neigh_node_best) + batadv_neigh_node_put(neigh_node_best); + + *sub_s = 0; + return 0; +} + +/** + * batadv_v_orig_dump_bucket - Dump an originator bucket into a + * message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @bat_priv: The bat priv with all the soft interface information + * @if_outgoing: Limit dump to entries with this outgoing interface + * @head: Bucket to be dumped + * @idx_s: Number of entries to be skipped + * @sub: Number of sub entries to be skipped + * + * Return: Error code, or 0 on success + */ +static int +batadv_v_orig_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_priv *bat_priv, + struct batadv_hard_iface *if_outgoing, + struct hlist_head *head, int *idx_s, int *sub) +{ + struct batadv_orig_node *orig_node; + int idx = 0; + + rcu_read_lock(); + hlist_for_each_entry_rcu(orig_node, head, hash_entry) { + if (idx++ < *idx_s) + continue; + + if (batadv_v_orig_dump_entry(msg, portid, seq, bat_priv, + if_outgoing, orig_node, sub)) { + rcu_read_unlock(); + *idx_s = idx - 1; + return -EMSGSIZE; + } + } + rcu_read_unlock(); + + *idx_s = 0; + *sub = 0; + return 0; +} + +/** + * batadv_v_orig_dump - Dump the originators into a message + * @msg: Netlink message to dump into + * @cb: Control block containing additional options + * @bat_priv: The bat priv with all the soft interface information + * @if_outgoing: Limit dump to entries with this outgoing interface + */ +static void +batadv_v_orig_dump(struct sk_buff *msg, struct netlink_callback *cb, + struct batadv_priv *bat_priv, + struct batadv_hard_iface *if_outgoing) +{ + struct batadv_hashtable *hash = bat_priv->orig_hash; + struct hlist_head *head; + int bucket = cb->args[0]; + int idx = cb->args[1]; + int sub = cb->args[2]; + int portid = NETLINK_CB(cb->skb).portid; + + while (bucket < hash->size) { + head = &hash->table[bucket]; + + if (batadv_v_orig_dump_bucket(msg, portid, + cb->nlh->nlmsg_seq, + bat_priv, if_outgoing, head, &idx, + &sub)) + break; + + bucket++; + } + + cb->args[0] = bucket; + cb->args[1] = idx; + cb->args[2] = sub; +} + static int batadv_v_neigh_cmp(struct batadv_neigh_node *neigh1, struct batadv_hard_iface *if_outgoing1, struct batadv_neigh_node *neigh2, @@ -573,9 +911,11 @@ static struct batadv_algo_ops batadv_batman_v __read_mostly = { .cmp = batadv_v_neigh_cmp, .is_similar_or_better = batadv_v_neigh_is_sob, .print = batadv_v_neigh_print, + .dump = batadv_v_neigh_dump, }, .orig = { .print = batadv_v_orig_print, + .dump = batadv_v_orig_dump, }, .gw = { .store_sel_class = batadv_v_store_sel_class, diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 0c7940cc1968..025f2ec2b27e 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -86,6 +86,7 @@ static struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = { [BATADV_ATTR_LAST_SEEN_MSECS] = { .type = NLA_U32 }, [BATADV_ATTR_NEIGH_ADDRESS] = { .len = ETH_ALEN }, [BATADV_ATTR_TQ] = { .type = NLA_U8 }, + [BATADV_ATTR_THROUGHPUT] = { .type = NLA_U32 }, }; /** From d7129dafcb71adfd1a166d0477ce0f564cf410d5 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 3 Jul 2016 13:31:42 +0200 Subject: [PATCH 0056/1715] batman-adv: netlink: add gateway table queries Add BATADV_CMD_GET_GATEWAYS commands, using handlers bat_gw_dump in batadv_algo_ops. Will always return -EOPNOTSUPP for now, as no implementations exist yet. Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Simon Wunderlich --- include/uapi/linux/batman_adv.h | 8 +++++ net/batman-adv/gateway_client.c | 59 +++++++++++++++++++++++++++++++++ net/batman-adv/gateway_client.h | 2 ++ net/batman-adv/netlink.c | 10 ++++++ net/batman-adv/types.h | 3 ++ 5 files changed, 82 insertions(+) diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index 2e2747fb1311..a13fc09e8192 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -88,6 +88,9 @@ enum batadv_tt_client_flags { * @BATADV_ATTR_NEIGH_ADDRESS: Neighbour MAC address * @BATADV_ATTR_TQ: TQ to neighbour * @BATADV_ATTR_THROUGHPUT: Estimated throughput to Neighbour + * @BATADV_ATTR_BANDWIDTH_UP: Reported uplink bandwidth + * @BATADV_ATTR_BANDWIDTH_DOWN: Reported downlink bandwidth + * @BATADV_ATTR_ROUTER: Gateway router MAC address * @__BATADV_ATTR_AFTER_LAST: internal use * @NUM_BATADV_ATTR: total number of batadv_nl_attrs available * @BATADV_ATTR_MAX: highest attribute number currently defined @@ -120,6 +123,9 @@ enum batadv_nl_attrs { BATADV_ATTR_NEIGH_ADDRESS, BATADV_ATTR_TQ, BATADV_ATTR_THROUGHPUT, + BATADV_ATTR_BANDWIDTH_UP, + BATADV_ATTR_BANDWIDTH_DOWN, + BATADV_ATTR_ROUTER, /* add attributes above here, update the policy in netlink.c */ __BATADV_ATTR_AFTER_LAST, NUM_BATADV_ATTR = __BATADV_ATTR_AFTER_LAST, @@ -139,6 +145,7 @@ enum batadv_nl_attrs { * @BATADV_CMD_GET_TRANSTABLE_GLOBAL Query list of global translations * @BATADV_CMD_GET_ORIGINATORS: Query list of originators * @BATADV_CMD_GET_NEIGHBORS: Query list of neighbours + * @BATADV_CMD_GET_GATEWAYS: Query list of gateways * @__BATADV_CMD_AFTER_LAST: internal use * @BATADV_CMD_MAX: highest used command number */ @@ -153,6 +160,7 @@ enum batadv_nl_commands { BATADV_CMD_GET_TRANSTABLE_GLOBAL, BATADV_CMD_GET_ORIGINATORS, BATADV_CMD_GET_NEIGHBORS, + BATADV_CMD_GET_GATEWAYS, /* add new commands above here */ __BATADV_CMD_AFTER_LAST, BATADV_CMD_MAX = __BATADV_CMD_AFTER_LAST - 1 diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index a77a17939f1e..c2928c2287b8 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -20,6 +20,7 @@ #include #include +#include #include #include #include @@ -31,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -39,13 +41,17 @@ #include #include #include +#include +#include #include "gateway_common.h" #include "hard-interface.h" #include "log.h" +#include "netlink.h" #include "originator.h" #include "packet.h" #include "routing.h" +#include "soft-interface.h" #include "sysfs.h" #include "translation-table.h" @@ -500,6 +506,59 @@ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset) return 0; } +/** + * batadv_gw_dump - Dump gateways into a message + * @msg: Netlink message to dump into + * @cb: Control block containing additional options + * + * Return: Error code, or length of message + */ +int batadv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb) +{ + struct batadv_hard_iface *primary_if = NULL; + struct net *net = sock_net(cb->skb->sk); + struct net_device *soft_iface; + struct batadv_priv *bat_priv; + int ifindex; + int ret; + + ifindex = batadv_netlink_get_ifindex(cb->nlh, + BATADV_ATTR_MESH_IFINDEX); + if (!ifindex) + return -EINVAL; + + soft_iface = dev_get_by_index(net, ifindex); + if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { + ret = -ENODEV; + goto out; + } + + bat_priv = netdev_priv(soft_iface); + + primary_if = batadv_primary_if_get_selected(bat_priv); + if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) { + ret = -ENOENT; + goto out; + } + + if (!bat_priv->algo_ops->gw.dump) { + ret = -EOPNOTSUPP; + goto out; + } + + bat_priv->algo_ops->gw.dump(msg, cb, bat_priv); + + ret = msg->len; + +out: + if (primary_if) + batadv_hardif_put(primary_if); + if (soft_iface) + dev_put(soft_iface); + + return ret; +} + /** * batadv_gw_dhcp_recipient_get - check if a packet is a DHCP message * @skb: the packet to check diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h index 6b40432aa1ed..859166d03561 100644 --- a/net/batman-adv/gateway_client.h +++ b/net/batman-adv/gateway_client.h @@ -23,6 +23,7 @@ #include struct batadv_tvlv_gateway_data; +struct netlink_callback; struct seq_file; struct sk_buff; @@ -43,6 +44,7 @@ void batadv_gw_node_put(struct batadv_gw_node *gw_node); struct batadv_gw_node * batadv_gw_get_selected_gw_node(struct batadv_priv *bat_priv); int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset); +int batadv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb); bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, struct sk_buff *skb); enum batadv_dhcp_recipient batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len, diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 025f2ec2b27e..c68ccb03634d 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -38,6 +38,7 @@ #include #include "bat_algo.h" +#include "gateway_client.h" #include "hard-interface.h" #include "originator.h" #include "soft-interface.h" @@ -87,6 +88,9 @@ static struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = { [BATADV_ATTR_NEIGH_ADDRESS] = { .len = ETH_ALEN }, [BATADV_ATTR_TQ] = { .type = NLA_U8 }, [BATADV_ATTR_THROUGHPUT] = { .type = NLA_U32 }, + [BATADV_ATTR_BANDWIDTH_UP] = { .type = NLA_U32 }, + [BATADV_ATTR_BANDWIDTH_DOWN] = { .type = NLA_U32 }, + [BATADV_ATTR_ROUTER] = { .len = ETH_ALEN }, }; /** @@ -570,6 +574,12 @@ static struct genl_ops batadv_netlink_ops[] = { .policy = batadv_netlink_policy, .dumpit = batadv_hardif_neigh_dump, }, + { + .cmd = BATADV_CMD_GET_GATEWAYS, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .dumpit = batadv_gw_dump, + }, }; /** diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 968023a61598..b5f01a36ec34 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -1469,6 +1469,7 @@ struct batadv_algo_orig_ops { * @is_eligible: check if a newly discovered GW is a potential candidate for * the election as best GW (optional) * @print: print the gateway table (optional) + * @dump: dump gateways to a netlink socket (optional) */ struct batadv_algo_gw_ops { ssize_t (*store_sel_class)(struct batadv_priv *bat_priv, char *buff, @@ -1480,6 +1481,8 @@ struct batadv_algo_gw_ops { struct batadv_orig_node *curr_gw_orig, struct batadv_orig_node *orig_node); void (*print)(struct batadv_priv *bat_priv, struct seq_file *seq); + void (*dump)(struct sk_buff *msg, struct netlink_callback *cb, + struct batadv_priv *priv); }; /** From efb766af06e39101456e3c83e98112ce9ab9739c Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 3 Jul 2016 13:31:43 +0200 Subject: [PATCH 0057/1715] batman-adv: add B.A.T.M.A.N. IV bat_gw_dump implementations Dump the list of gateways via the netlink socket. Signed-off-by: Andrew Lunn [sven.eckelmann@open-mesh.com: integrate in batadv_algo_ops] Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Marek Lindner --- net/batman-adv/bat_iv_ogm.c | 105 ++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 7a8c0f63e2ae..9ed4f1fc6ac5 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -2681,6 +2681,110 @@ static void batadv_iv_gw_print(struct batadv_priv *bat_priv, seq_puts(seq, "No gateways in range ...\n"); } +/** + * batadv_iv_gw_dump_entry - Dump a gateway into a message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @bat_priv: The bat priv with all the soft interface information + * @gw_node: Gateway to be dumped + * + * Return: Error code, or 0 on success + */ +static int batadv_iv_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_priv *bat_priv, + struct batadv_gw_node *gw_node) +{ + struct batadv_neigh_ifinfo *router_ifinfo = NULL; + struct batadv_neigh_node *router; + struct batadv_gw_node *curr_gw; + int ret = -EINVAL; + void *hdr; + + router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT); + if (!router) + goto out; + + router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT); + if (!router_ifinfo) + goto out; + + curr_gw = batadv_gw_get_selected_gw_node(bat_priv); + + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, + NLM_F_MULTI, BATADV_CMD_GET_GATEWAYS); + if (!hdr) { + ret = -ENOBUFS; + goto out; + } + + ret = -EMSGSIZE; + + if (curr_gw == gw_node) + if (nla_put_flag(msg, BATADV_ATTR_FLAG_BEST)) { + genlmsg_cancel(msg, hdr); + goto out; + } + + if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN, + gw_node->orig_node->orig) || + nla_put_u8(msg, BATADV_ATTR_TQ, router_ifinfo->bat_iv.tq_avg) || + nla_put(msg, BATADV_ATTR_ROUTER, ETH_ALEN, + router->addr) || + nla_put_string(msg, BATADV_ATTR_HARD_IFNAME, + router->if_incoming->net_dev->name) || + nla_put_u32(msg, BATADV_ATTR_BANDWIDTH_DOWN, + gw_node->bandwidth_down) || + nla_put_u32(msg, BATADV_ATTR_BANDWIDTH_UP, + gw_node->bandwidth_up)) { + genlmsg_cancel(msg, hdr); + goto out; + } + + genlmsg_end(msg, hdr); + ret = 0; + +out: + if (router_ifinfo) + batadv_neigh_ifinfo_put(router_ifinfo); + if (router) + batadv_neigh_node_put(router); + return ret; +} + +/** + * batadv_iv_gw_dump - Dump gateways into a message + * @msg: Netlink message to dump into + * @cb: Control block containing additional options + * @bat_priv: The bat priv with all the soft interface information + */ +static void batadv_iv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb, + struct batadv_priv *bat_priv) +{ + int portid = NETLINK_CB(cb->skb).portid; + struct batadv_gw_node *gw_node; + int idx_skip = cb->args[0]; + int idx = 0; + + rcu_read_lock(); + hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) { + if (idx++ < idx_skip) + continue; + + if (batadv_iv_gw_dump_entry(msg, portid, cb->nlh->nlmsg_seq, + bat_priv, gw_node)) { + idx_skip = idx - 1; + goto unlock; + } + } + + idx_skip = idx; +unlock: + rcu_read_unlock(); + + cb->args[0] = idx_skip; +} + static struct batadv_algo_ops batadv_batman_iv __read_mostly = { .name = "BATMAN_IV", .iface = { @@ -2707,6 +2811,7 @@ static struct batadv_algo_ops batadv_batman_iv __read_mostly = { .get_best_gw_node = batadv_iv_gw_get_best_gw_node, .is_eligible = batadv_iv_gw_is_eligible, .print = batadv_iv_gw_print, + .dump = batadv_iv_gw_dump, }, }; From b71bb6f924fe4c77d476738289242b5567269df6 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 3 Jul 2016 13:31:44 +0200 Subject: [PATCH 0058/1715] batman-adv: add B.A.T.M.A.N. V bat_gw_dump implementations Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Simon Wunderlich --- net/batman-adv/bat_v.c | 125 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index 9dccfaf32115..9e872dcc1260 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -897,6 +897,130 @@ static void batadv_v_gw_print(struct batadv_priv *bat_priv, seq_puts(seq, "No gateways in range ...\n"); } +/** + * batadv_v_gw_dump_entry - Dump a gateway into a message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @bat_priv: The bat priv with all the soft interface information + * @gw_node: Gateway to be dumped + * + * Return: Error code, or 0 on success + */ +static int batadv_v_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_priv *bat_priv, + struct batadv_gw_node *gw_node) +{ + struct batadv_neigh_ifinfo *router_ifinfo = NULL; + struct batadv_neigh_node *router; + struct batadv_gw_node *curr_gw; + int ret = -EINVAL; + void *hdr; + + router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT); + if (!router) + goto out; + + router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT); + if (!router_ifinfo) + goto out; + + curr_gw = batadv_gw_get_selected_gw_node(bat_priv); + + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, + NLM_F_MULTI, BATADV_CMD_GET_GATEWAYS); + if (!hdr) { + ret = -ENOBUFS; + goto out; + } + + ret = -EMSGSIZE; + + if (curr_gw == gw_node) { + if (nla_put_flag(msg, BATADV_ATTR_FLAG_BEST)) { + genlmsg_cancel(msg, hdr); + goto out; + } + } + + if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN, + gw_node->orig_node->orig)) { + genlmsg_cancel(msg, hdr); + goto out; + } + + if (nla_put_u32(msg, BATADV_ATTR_THROUGHPUT, + router_ifinfo->bat_v.throughput)) { + genlmsg_cancel(msg, hdr); + goto out; + } + + if (nla_put(msg, BATADV_ATTR_ROUTER, ETH_ALEN, router->addr)) { + genlmsg_cancel(msg, hdr); + goto out; + } + + if (nla_put_string(msg, BATADV_ATTR_HARD_IFNAME, + router->if_incoming->net_dev->name)) { + genlmsg_cancel(msg, hdr); + goto out; + } + + if (nla_put_u32(msg, BATADV_ATTR_BANDWIDTH_DOWN, + gw_node->bandwidth_down)) { + genlmsg_cancel(msg, hdr); + goto out; + } + + if (nla_put_u32(msg, BATADV_ATTR_BANDWIDTH_UP, gw_node->bandwidth_up)) { + genlmsg_cancel(msg, hdr); + goto out; + } + + genlmsg_end(msg, hdr); + ret = 0; + +out: + if (router_ifinfo) + batadv_neigh_ifinfo_put(router_ifinfo); + if (router) + batadv_neigh_node_put(router); + return ret; +} + +/** + * batadv_v_gw_dump - Dump gateways into a message + * @msg: Netlink message to dump into + * @cb: Control block containing additional options + * @bat_priv: The bat priv with all the soft interface information + */ +static void batadv_v_gw_dump(struct sk_buff *msg, struct netlink_callback *cb, + struct batadv_priv *bat_priv) +{ + int portid = NETLINK_CB(cb->skb).portid; + struct batadv_gw_node *gw_node; + int idx_skip = cb->args[0]; + int idx = 0; + + rcu_read_lock(); + hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) { + if (idx++ < idx_skip) + continue; + + if (batadv_v_gw_dump_entry(msg, portid, cb->nlh->nlmsg_seq, + bat_priv, gw_node)) { + idx_skip = idx - 1; + goto unlock; + } + } + + idx_skip = idx; +unlock: + rcu_read_unlock(); + + cb->args[0] = idx_skip; +} + static struct batadv_algo_ops batadv_batman_v __read_mostly = { .name = "BATMAN_V", .iface = { @@ -923,6 +1047,7 @@ static struct batadv_algo_ops batadv_batman_v __read_mostly = { .get_best_gw_node = batadv_v_gw_get_best_gw_node, .is_eligible = batadv_v_gw_is_eligible, .print = batadv_v_gw_print, + .dump = batadv_v_gw_dump, }, }; From 04f3f5bf1883fbe0acba5c1fc698cf5cedebc5c5 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 3 Jul 2016 13:31:45 +0200 Subject: [PATCH 0059/1715] batman-adv: add B.A.T.M.A.N. Dump BLA claims via netlink Dump the list of bridge loop avoidance claims via the netlink socket. Signed-off-by: Andrew Lunn [sven.eckelmann@open-mesh.com: add policy for attributes, fix includes, fix soft_iface reference leak] Signed-off-by: Sven Eckelmann [sw@simonwunderlich.de: fix kerneldoc, fix error reporting] Signed-off-by: Simon Wunderlich Signed-off-by: Marek Lindner --- include/uapi/linux/batman_adv.h | 12 ++ net/batman-adv/bridge_loop_avoidance.c | 169 +++++++++++++++++++++++++ net/batman-adv/bridge_loop_avoidance.h | 10 +- net/batman-adv/netlink.c | 12 ++ 4 files changed, 202 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index a13fc09e8192..96b37ab2e840 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -91,6 +91,11 @@ enum batadv_tt_client_flags { * @BATADV_ATTR_BANDWIDTH_UP: Reported uplink bandwidth * @BATADV_ATTR_BANDWIDTH_DOWN: Reported downlink bandwidth * @BATADV_ATTR_ROUTER: Gateway router MAC address + * @BATADV_ATTR_BLA_OWN: Flag indicating own originator + * @BATADV_ATTR_BLA_ADDRESS: Bridge loop avoidance claim MAC address + * @BATADV_ATTR_BLA_VID: BLA VLAN ID + * @BATADV_ATTR_BLA_BACKBONE: BLA gateway originator MAC address + * @BATADV_ATTR_BLA_CRC: BLA CRC * @__BATADV_ATTR_AFTER_LAST: internal use * @NUM_BATADV_ATTR: total number of batadv_nl_attrs available * @BATADV_ATTR_MAX: highest attribute number currently defined @@ -126,6 +131,11 @@ enum batadv_nl_attrs { BATADV_ATTR_BANDWIDTH_UP, BATADV_ATTR_BANDWIDTH_DOWN, BATADV_ATTR_ROUTER, + BATADV_ATTR_BLA_OWN, + BATADV_ATTR_BLA_ADDRESS, + BATADV_ATTR_BLA_VID, + BATADV_ATTR_BLA_BACKBONE, + BATADV_ATTR_BLA_CRC, /* add attributes above here, update the policy in netlink.c */ __BATADV_ATTR_AFTER_LAST, NUM_BATADV_ATTR = __BATADV_ATTR_AFTER_LAST, @@ -146,6 +156,7 @@ enum batadv_nl_attrs { * @BATADV_CMD_GET_ORIGINATORS: Query list of originators * @BATADV_CMD_GET_NEIGHBORS: Query list of neighbours * @BATADV_CMD_GET_GATEWAYS: Query list of gateways + * @BATADV_CMD_GET_BLA_CLAIM: Query list of bridge loop avoidance claims * @__BATADV_CMD_AFTER_LAST: internal use * @BATADV_CMD_MAX: highest used command number */ @@ -161,6 +172,7 @@ enum batadv_nl_commands { BATADV_CMD_GET_ORIGINATORS, BATADV_CMD_GET_NEIGHBORS, BATADV_CMD_GET_GATEWAYS, + BATADV_CMD_GET_BLA_CLAIM, /* add new commands above here */ __BATADV_CMD_AFTER_LAST, BATADV_CMD_MAX = __BATADV_CMD_AFTER_LAST - 1 diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index c75ef648f0fd..aafa88f3e98d 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -45,12 +46,18 @@ #include #include #include +#include +#include +#include +#include #include "hard-interface.h" #include "hash.h" #include "log.h" +#include "netlink.h" #include "originator.h" #include "packet.h" +#include "soft-interface.h" #include "sysfs.h" #include "translation-table.h" @@ -2051,6 +2058,168 @@ out: return 0; } +/** + * batadv_bla_claim_dump_entry - dump one entry of the claim table + * to a netlink socket + * @msg: buffer for the message + * @portid: netlink port + * @seq: Sequence number of netlink message + * @primary_if: primary interface + * @claim: entry to dump + * + * Return: 0 or error code. + */ +static int +batadv_bla_claim_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_hard_iface *primary_if, + struct batadv_bla_claim *claim) +{ + u8 *primary_addr = primary_if->net_dev->dev_addr; + u16 backbone_crc; + bool is_own; + void *hdr; + int ret = -EINVAL; + + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, + NLM_F_MULTI, BATADV_CMD_GET_BLA_CLAIM); + if (!hdr) { + ret = -ENOBUFS; + goto out; + } + + is_own = batadv_compare_eth(claim->backbone_gw->orig, + primary_addr); + + spin_lock_bh(&claim->backbone_gw->crc_lock); + backbone_crc = claim->backbone_gw->crc; + spin_unlock_bh(&claim->backbone_gw->crc_lock); + + if (is_own) + if (nla_put_flag(msg, BATADV_ATTR_BLA_OWN)) { + genlmsg_cancel(msg, hdr); + goto out; + } + + if (nla_put(msg, BATADV_ATTR_BLA_ADDRESS, ETH_ALEN, claim->addr) || + nla_put_u16(msg, BATADV_ATTR_BLA_VID, claim->vid) || + nla_put(msg, BATADV_ATTR_BLA_BACKBONE, ETH_ALEN, + claim->backbone_gw->orig) || + nla_put_u16(msg, BATADV_ATTR_BLA_CRC, + backbone_crc)) { + genlmsg_cancel(msg, hdr); + goto out; + } + + genlmsg_end(msg, hdr); + ret = 0; + +out: + return ret; +} + +/** + * batadv_bla_claim_dump_bucket - dump one bucket of the claim table + * to a netlink socket + * @msg: buffer for the message + * @portid: netlink port + * @seq: Sequence number of netlink message + * @primary_if: primary interface + * @head: bucket to dump + * @idx_skip: How many entries to skip + * + * Return: always 0. + */ +static int +batadv_bla_claim_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_hard_iface *primary_if, + struct hlist_head *head, int *idx_skip) +{ + struct batadv_bla_claim *claim; + int idx = 0; + + rcu_read_lock(); + hlist_for_each_entry_rcu(claim, head, hash_entry) { + if (idx++ < *idx_skip) + continue; + if (batadv_bla_claim_dump_entry(msg, portid, seq, + primary_if, claim)) { + *idx_skip = idx - 1; + goto unlock; + } + } + + *idx_skip = idx; +unlock: + rcu_read_unlock(); + return 0; +} + +/** + * batadv_bla_claim_dump - dump claim table to a netlink socket + * @msg: buffer for the message + * @cb: callback structure containing arguments + * + * Return: message length. + */ +int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb) +{ + struct batadv_hard_iface *primary_if = NULL; + int portid = NETLINK_CB(cb->skb).portid; + struct net *net = sock_net(cb->skb->sk); + struct net_device *soft_iface; + struct batadv_hashtable *hash; + struct batadv_priv *bat_priv; + int bucket = cb->args[0]; + struct hlist_head *head; + int idx = cb->args[1]; + int ifindex; + int ret = 0; + + ifindex = batadv_netlink_get_ifindex(cb->nlh, + BATADV_ATTR_MESH_IFINDEX); + if (!ifindex) + return -EINVAL; + + soft_iface = dev_get_by_index(net, ifindex); + if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { + ret = -ENODEV; + goto out; + } + + bat_priv = netdev_priv(soft_iface); + hash = bat_priv->bla.claim_hash; + + primary_if = batadv_primary_if_get_selected(bat_priv); + if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) { + ret = -ENOENT; + goto out; + } + + while (bucket < hash->size) { + head = &hash->table[bucket]; + + if (batadv_bla_claim_dump_bucket(msg, portid, + cb->nlh->nlmsg_seq, + primary_if, head, &idx)) + break; + bucket++; + } + + cb->args[0] = bucket; + cb->args[1] = idx; + + ret = msg->len; + +out: + if (primary_if) + batadv_hardif_put(primary_if); + + if (soft_iface) + dev_put(soft_iface); + + return ret; +} + /** * batadv_bla_backbone_table_seq_print_text - print the backbone table in a seq * file diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h index 0f01daeb359e..a80b9e96f28e 100644 --- a/net/batman-adv/bridge_loop_avoidance.h +++ b/net/batman-adv/bridge_loop_avoidance.h @@ -23,6 +23,7 @@ #include struct net_device; +struct netlink_callback; struct seq_file; struct sk_buff; @@ -35,6 +36,7 @@ bool batadv_bla_is_backbone_gw(struct sk_buff *skb, struct batadv_orig_node *orig_node, int hdr_size); int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset); +int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb); int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset); bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, u8 *orig, @@ -47,7 +49,7 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv, void batadv_bla_status_update(struct net_device *net_dev); int batadv_bla_init(struct batadv_priv *bat_priv); void batadv_bla_free(struct batadv_priv *bat_priv); - +int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb); #define BATADV_BLA_CRC_INIT 0 #else /* ifdef CONFIG_BATMAN_ADV_BLA */ @@ -112,6 +114,12 @@ static inline void batadv_bla_free(struct batadv_priv *bat_priv) { } +static inline int batadv_bla_claim_dump(struct sk_buff *msg, + struct netlink_callback *cb) +{ + return -EOPNOTSUPP; +} + #endif /* ifdef CONFIG_BATMAN_ADV_BLA */ #endif /* ifndef _NET_BATMAN_ADV_BLA_H_ */ diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index c68ccb03634d..b33675cbaecf 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -38,6 +38,7 @@ #include #include "bat_algo.h" +#include "bridge_loop_avoidance.h" #include "gateway_client.h" #include "hard-interface.h" #include "originator.h" @@ -91,6 +92,11 @@ static struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = { [BATADV_ATTR_BANDWIDTH_UP] = { .type = NLA_U32 }, [BATADV_ATTR_BANDWIDTH_DOWN] = { .type = NLA_U32 }, [BATADV_ATTR_ROUTER] = { .len = ETH_ALEN }, + [BATADV_ATTR_BLA_OWN] = { .type = NLA_FLAG }, + [BATADV_ATTR_BLA_ADDRESS] = { .len = ETH_ALEN }, + [BATADV_ATTR_BLA_VID] = { .type = NLA_U16 }, + [BATADV_ATTR_BLA_BACKBONE] = { .len = ETH_ALEN }, + [BATADV_ATTR_BLA_CRC] = { .type = NLA_U16 }, }; /** @@ -580,6 +586,12 @@ static struct genl_ops batadv_netlink_ops[] = { .policy = batadv_netlink_policy, .dumpit = batadv_gw_dump, }, + { + .cmd = BATADV_CMD_GET_BLA_CLAIM, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .dumpit = batadv_bla_claim_dump, + }, }; /** From 8dad6f0db6b4457b1c4b04d4edf62744921c32fd Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 3 Jul 2016 13:31:46 +0200 Subject: [PATCH 0060/1715] batman-adv: Provide bla group in the mesh_info netlink msg The bridge loop avoidange is the main information for the debugging of of bridge loop detection problems. It is therefore necessary when comparing the bla claim tables. Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Marek Lindner --- net/batman-adv/netlink.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index b33675cbaecf..464de9d05135 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -19,6 +19,7 @@ #include "main.h" #include +#include #include #include #include @@ -42,6 +43,7 @@ #include "gateway_client.h" #include "hard-interface.h" #include "originator.h" +#include "packet.h" #include "soft-interface.h" #include "tp_meter.h" #include "translation-table.h" @@ -141,6 +143,12 @@ batadv_netlink_mesh_info_put(struct sk_buff *msg, struct net_device *soft_iface) (u8)atomic_read(&bat_priv->tt.vn))) goto out; +#ifdef CONFIG_BATMAN_ADV_BLA + if (nla_put_u16(msg, BATADV_ATTR_BLA_CRC, + ntohs(bat_priv->bla.claim_dest.group))) + goto out; +#endif + primary_if = batadv_primary_if_get_selected(bat_priv); if (primary_if && primary_if->if_status == BATADV_IF_ACTIVE) { hard_iface = primary_if->net_dev; From ea4152e1171604f325f1a5f080190823a0edbc1f Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Sun, 3 Jul 2016 13:31:47 +0200 Subject: [PATCH 0061/1715] batman-adv: add backbone table netlink support Dump the list of bridge loop avoidance backbones via the netlink socket. Signed-off-by: Simon Wunderlich Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner --- include/uapi/linux/batman_adv.h | 2 + net/batman-adv/bridge_loop_avoidance.c | 164 +++++++++++++++++++++++++ net/batman-adv/bridge_loop_avoidance.h | 7 ++ net/batman-adv/netlink.c | 7 ++ 4 files changed, 180 insertions(+) diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index 96b37ab2e840..734fe83ab645 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -157,6 +157,7 @@ enum batadv_nl_attrs { * @BATADV_CMD_GET_NEIGHBORS: Query list of neighbours * @BATADV_CMD_GET_GATEWAYS: Query list of gateways * @BATADV_CMD_GET_BLA_CLAIM: Query list of bridge loop avoidance claims + * @BATADV_CMD_GET_BLA_BACKBONE: Query list of bridge loop avoidance backbones * @__BATADV_CMD_AFTER_LAST: internal use * @BATADV_CMD_MAX: highest used command number */ @@ -173,6 +174,7 @@ enum batadv_nl_commands { BATADV_CMD_GET_NEIGHBORS, BATADV_CMD_GET_GATEWAYS, BATADV_CMD_GET_BLA_CLAIM, + BATADV_CMD_GET_BLA_BACKBONE, /* add new commands above here */ __BATADV_CMD_AFTER_LAST, BATADV_CMD_MAX = __BATADV_CMD_AFTER_LAST - 1 diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index aafa88f3e98d..35ed1d32bab5 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -2283,3 +2283,167 @@ out: batadv_hardif_put(primary_if); return 0; } + +/** + * batadv_bla_backbone_dump_entry - dump one entry of the backbone table + * to a netlink socket + * @msg: buffer for the message + * @portid: netlink port + * @seq: Sequence number of netlink message + * @primary_if: primary interface + * @backbone_gw: entry to dump + * + * Return: 0 or error code. + */ +static int +batadv_bla_backbone_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_hard_iface *primary_if, + struct batadv_bla_backbone_gw *backbone_gw) +{ + u8 *primary_addr = primary_if->net_dev->dev_addr; + u16 backbone_crc; + bool is_own; + int msecs; + void *hdr; + int ret = -EINVAL; + + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, + NLM_F_MULTI, BATADV_CMD_GET_BLA_BACKBONE); + if (!hdr) { + ret = -ENOBUFS; + goto out; + } + + is_own = batadv_compare_eth(backbone_gw->orig, primary_addr); + + spin_lock_bh(&backbone_gw->crc_lock); + backbone_crc = backbone_gw->crc; + spin_unlock_bh(&backbone_gw->crc_lock); + + msecs = jiffies_to_msecs(jiffies - backbone_gw->lasttime); + + if (is_own) + if (nla_put_flag(msg, BATADV_ATTR_BLA_OWN)) { + genlmsg_cancel(msg, hdr); + goto out; + } + + if (nla_put(msg, BATADV_ATTR_BLA_BACKBONE, ETH_ALEN, + backbone_gw->orig) || + nla_put_u16(msg, BATADV_ATTR_BLA_VID, backbone_gw->vid) || + nla_put_u16(msg, BATADV_ATTR_BLA_CRC, + backbone_crc) || + nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS, msecs)) { + genlmsg_cancel(msg, hdr); + goto out; + } + + genlmsg_end(msg, hdr); + ret = 0; + +out: + return ret; +} + +/** + * batadv_bla_backbone_dump_bucket - dump one bucket of the backbone table + * to a netlink socket + * @msg: buffer for the message + * @portid: netlink port + * @seq: Sequence number of netlink message + * @primary_if: primary interface + * @head: bucket to dump + * @idx_skip: How many entries to skip + * + * Return: always 0. + */ +static int +batadv_bla_backbone_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_hard_iface *primary_if, + struct hlist_head *head, int *idx_skip) +{ + struct batadv_bla_backbone_gw *backbone_gw; + int idx = 0; + + rcu_read_lock(); + hlist_for_each_entry_rcu(backbone_gw, head, hash_entry) { + if (idx++ < *idx_skip) + continue; + if (batadv_bla_backbone_dump_entry(msg, portid, seq, + primary_if, backbone_gw)) { + *idx_skip = idx - 1; + goto unlock; + } + } + + *idx_skip = idx; +unlock: + rcu_read_unlock(); + return 0; +} + +/** + * batadv_bla_backbone_dump - dump backbone table to a netlink socket + * @msg: buffer for the message + * @cb: callback structure containing arguments + * + * Return: message length. + */ +int batadv_bla_backbone_dump(struct sk_buff *msg, struct netlink_callback *cb) +{ + struct batadv_hard_iface *primary_if = NULL; + int portid = NETLINK_CB(cb->skb).portid; + struct net *net = sock_net(cb->skb->sk); + struct net_device *soft_iface; + struct batadv_hashtable *hash; + struct batadv_priv *bat_priv; + int bucket = cb->args[0]; + struct hlist_head *head; + int idx = cb->args[1]; + int ifindex; + int ret = 0; + + ifindex = batadv_netlink_get_ifindex(cb->nlh, + BATADV_ATTR_MESH_IFINDEX); + if (!ifindex) + return -EINVAL; + + soft_iface = dev_get_by_index(net, ifindex); + if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { + ret = -ENODEV; + goto out; + } + + bat_priv = netdev_priv(soft_iface); + hash = bat_priv->bla.backbone_hash; + + primary_if = batadv_primary_if_get_selected(bat_priv); + if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) { + ret = -ENOENT; + goto out; + } + + while (bucket < hash->size) { + head = &hash->table[bucket]; + + if (batadv_bla_backbone_dump_bucket(msg, portid, + cb->nlh->nlmsg_seq, + primary_if, head, &idx)) + break; + bucket++; + } + + cb->args[0] = bucket; + cb->args[1] = idx; + + ret = msg->len; + +out: + if (primary_if) + batadv_hardif_put(primary_if); + + if (soft_iface) + dev_put(soft_iface); + + return ret; +} diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h index a80b9e96f28e..1ae93e46fb98 100644 --- a/net/batman-adv/bridge_loop_avoidance.h +++ b/net/batman-adv/bridge_loop_avoidance.h @@ -39,6 +39,7 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset); int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb); int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset); +int batadv_bla_backbone_dump(struct sk_buff *msg, struct netlink_callback *cb); bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, u8 *orig, unsigned short vid); bool batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv, @@ -120,6 +121,12 @@ static inline int batadv_bla_claim_dump(struct sk_buff *msg, return -EOPNOTSUPP; } +static inline int batadv_bla_backbone_dump(struct sk_buff *msg, + struct netlink_callback *cb) +{ + return -EOPNOTSUPP; +} + #endif /* ifdef CONFIG_BATMAN_ADV_BLA */ #endif /* ifndef _NET_BATMAN_ADV_BLA_H_ */ diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 464de9d05135..c3f68167591d 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -600,6 +600,13 @@ static struct genl_ops batadv_netlink_ops[] = { .policy = batadv_netlink_policy, .dumpit = batadv_bla_claim_dump, }, + { + .cmd = BATADV_CMD_GET_BLA_BACKBONE, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .dumpit = batadv_bla_backbone_dump, + }, + }; /** From 4c09a08b47ffac9aa3bc91870aa54c9ae39d9674 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 3 Jul 2016 13:31:48 +0200 Subject: [PATCH 0062/1715] batman-adv: Indicate netlink socket can be used with netns. Set the netnsof flag on the family structure, indicating it can be used with different network name spaces. Signed-off-by: Andrew Lunn Signed-off-by: Simon Wunderlich Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner --- net/batman-adv/netlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index c3f68167591d..18831e72b0fb 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -54,6 +54,7 @@ struct genl_family batadv_netlink_family = { .name = BATADV_NL_NAME, .version = 1, .maxattr = BATADV_ATTR_MAX, + .netnsok = true, }; /* multicast groups */ From 09537d1869499fb3eac710cc54e700602b6c66c9 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 15 Jul 2016 17:39:16 +0200 Subject: [PATCH 0063/1715] batman-adv: Place kref_get for orig_node_vlan near use It is hard to understand why the refcnt is increased when it isn't done near the actual place the new reference is used. So using kref_get right before the place which requires the reference and in the same function helps to avoid accidental problems caused by incorrect reference counting. Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Simon Wunderlich --- net/batman-adv/originator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 95c85558c530..5108af11a6e9 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -133,9 +133,9 @@ batadv_orig_node_vlan_new(struct batadv_orig_node *orig_node, goto out; kref_init(&vlan->refcount); - kref_get(&vlan->refcount); vlan->vid = vid; + kref_get(&vlan->refcount); hlist_add_head_rcu(&vlan->list, &orig_node->vlan_list); out: From f257b99becf5183c721a3e0e41ad9b9fbbddbd44 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 15 Jul 2016 17:39:17 +0200 Subject: [PATCH 0064/1715] batman-adv: Place kref_get for orig_ifinfo near use It is hard to understand why the refcnt is increased when it isn't done near the actual place the new reference is used. So using kref_get right before the place which requires the reference and in the same function helps to avoid accidental problems caused by incorrect reference counting. Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Simon Wunderlich --- net/batman-adv/originator.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 5108af11a6e9..88289645f3c9 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -386,6 +386,7 @@ batadv_orig_ifinfo_new(struct batadv_orig_node *orig_node, orig_ifinfo->if_outgoing = if_outgoing; INIT_HLIST_NODE(&orig_ifinfo->list); kref_init(&orig_ifinfo->refcount); + kref_get(&orig_ifinfo->refcount); hlist_add_head_rcu(&orig_ifinfo->list, &orig_node->ifinfo_list); From 23f554855911f788474b52ff3b7af656b5b61239 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 15 Jul 2016 17:39:18 +0200 Subject: [PATCH 0065/1715] batman-adv: Place kref_get for tt_orig_list_entry near use It is hard to understand why the refcnt is increased when it isn't done near the actual place the new reference is used. So using kref_get right before the place which requires the reference and in the same function helps to avoid accidental problems caused by incorrect reference counting. Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Simon Wunderlich --- net/batman-adv/translation-table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 20804078293c..5cc500fb8ddc 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -1567,9 +1567,9 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global, orig_entry->orig_node = orig_node; orig_entry->ttvn = ttvn; kref_init(&orig_entry->refcount); - kref_get(&orig_entry->refcount); spin_lock_bh(&tt_global->list_lock); + kref_get(&orig_entry->refcount); hlist_add_head_rcu(&orig_entry->list, &tt_global->orig_list); spin_unlock_bh(&tt_global->list_lock); From 2e774ac2f7ad4a7fb4289b7aacd9709bf3e2f991 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 15 Jul 2016 17:39:19 +0200 Subject: [PATCH 0066/1715] batman-adv: Place kref_get for neigh_ifinfo near use It is hard to understand why the refcnt is increased when it isn't done near the actual place the new reference is used. So using kref_get right before the place which requires the reference and in the same function helps to avoid accidental problems caused by incorrect reference counting. Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Simon Wunderlich --- net/batman-adv/originator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 88289645f3c9..5e99a6e296e6 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -460,9 +460,9 @@ batadv_neigh_ifinfo_new(struct batadv_neigh_node *neigh, INIT_HLIST_NODE(&neigh_ifinfo->list); kref_init(&neigh_ifinfo->refcount); - kref_get(&neigh_ifinfo->refcount); neigh_ifinfo->if_outgoing = if_outgoing; + kref_get(&neigh_ifinfo->refcount); hlist_add_head_rcu(&neigh_ifinfo->list, &neigh->ifinfo_list); out: From 8427445886d23729cc41305cef580605202f01db Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 15 Jul 2016 17:39:20 +0200 Subject: [PATCH 0067/1715] batman-adv: Place kref_get for neigh_node near use It is hard to understand why the refcnt is increased when it isn't done near the actual place the new reference is used. So using kref_get right before the place which requires the reference and in the same function helps to avoid accidental problems caused by incorrect reference counting. Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Simon Wunderlich --- net/batman-adv/originator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 5e99a6e296e6..0792de869f4e 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -654,8 +654,8 @@ batadv_neigh_node_create(struct batadv_orig_node *orig_node, /* extra reference for return */ kref_init(&neigh_node->refcount); - kref_get(&neigh_node->refcount); + kref_get(&neigh_node->refcount); hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list); batadv_dbg(BATADV_DBG_BATMAN, orig_node->bat_priv, From 55db2d590298e8ffe6ee66134157aa710e45faa2 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 15 Jul 2016 17:39:21 +0200 Subject: [PATCH 0068/1715] batman-adv: Place kref_get for orig_node near use It is hard to understand why the refcnt is increased when it isn't done near the actual place the new reference is used. So using kref_get right before the place which requires the reference and in the same function helps to avoid accidental problems caused by incorrect reference counting. Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Simon Wunderlich --- net/batman-adv/bat_iv_ogm.c | 7 ++++--- net/batman-adv/bat_v_ogm.c | 5 ++--- net/batman-adv/gateway_client.c | 2 +- net/batman-adv/network-coding.c | 7 +++---- net/batman-adv/originator.c | 1 - 5 files changed, 10 insertions(+), 12 deletions(-) diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 9ed4f1fc6ac5..3c7900d543ca 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -324,17 +324,18 @@ batadv_iv_ogm_orig_get(struct batadv_priv *bat_priv, const u8 *addr) if (!orig_node->bat_iv.bcast_own_sum) goto free_orig_node; + kref_get(&orig_node->refcount); hash_added = batadv_hash_add(bat_priv->orig_hash, batadv_compare_orig, batadv_choose_orig, orig_node, &orig_node->hash_entry); if (hash_added != 0) - goto free_orig_node; + goto free_orig_node_hash; return orig_node; -free_orig_node: - /* free twice, as batadv_orig_node_new sets refcount to 2 */ +free_orig_node_hash: batadv_orig_node_put(orig_node); +free_orig_node: batadv_orig_node_put(orig_node); return NULL; diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c index 6fbba4eb0617..1aeeadca620c 100644 --- a/net/batman-adv/bat_v_ogm.c +++ b/net/batman-adv/bat_v_ogm.c @@ -73,13 +73,12 @@ struct batadv_orig_node *batadv_v_ogm_orig_get(struct batadv_priv *bat_priv, if (!orig_node) return NULL; + kref_get(&orig_node->refcount); hash_added = batadv_hash_add(bat_priv->orig_hash, batadv_compare_orig, batadv_choose_orig, orig_node, &orig_node->hash_entry); if (hash_added != 0) { - /* orig_node->refcounter is initialised to 2 by - * batadv_orig_node_new() - */ + /* remove refcnt for newly created orig_node and hash entry */ batadv_orig_node_put(orig_node); batadv_orig_node_put(orig_node); orig_node = NULL; diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index c2928c2287b8..b889e1fdba4d 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -339,8 +339,8 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv, if (!gw_node) return; - kref_get(&orig_node->refcount); INIT_HLIST_NODE(&gw_node->list); + kref_get(&orig_node->refcount); gw_node->orig_node = orig_node; gw_node->bandwidth_down = ntohl(gateway->bandwidth_down); gw_node->bandwidth_up = ntohl(gateway->bandwidth_up); diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index 293ef4ffd4e1..3814cfb94846 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -856,14 +856,13 @@ batadv_nc_get_nc_node(struct batadv_priv *bat_priv, if (!nc_node) return NULL; - kref_get(&orig_neigh_node->refcount); - /* Initialize nc_node */ INIT_LIST_HEAD(&nc_node->list); - ether_addr_copy(nc_node->addr, orig_node->orig); - nc_node->orig_node = orig_neigh_node; kref_init(&nc_node->refcount); kref_get(&nc_node->refcount); + ether_addr_copy(nc_node->addr, orig_node->orig); + kref_get(&orig_neigh_node->refcount); + nc_node->orig_node = orig_neigh_node; /* Select ingoing or outgoing coding node */ if (in_coding) { diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 0792de869f4e..0b7d57aad417 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -989,7 +989,6 @@ struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv, /* extra reference for return */ kref_init(&orig_node->refcount); - kref_get(&orig_node->refcount); orig_node->bat_priv = bat_priv; ether_addr_copy(orig_node->orig, addr); From e3387b266cf68b497be5824e02b074d76244e2ec Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 15 Jul 2016 17:39:22 +0200 Subject: [PATCH 0069/1715] batman-adv: Place kref_get for tt_local_entry near use It is hard to understand why the refcnt is increased when it isn't done near the actual place the new reference is used. So using kref_get right before the place which requires the reference and in the same function helps to avoid accidental problems caused by incorrect reference counting. Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Simon Wunderlich --- net/batman-adv/translation-table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 5cc500fb8ddc..094da1a2c77a 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -734,7 +734,6 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, if (batadv_is_wifi_netdev(in_dev)) tt_local->common.flags |= BATADV_TT_CLIENT_WIFI; kref_init(&tt_local->common.refcount); - kref_get(&tt_local->common.refcount); tt_local->last_seen = jiffies; tt_local->common.added_at = tt_local->last_seen; tt_local->vlan = vlan; @@ -746,6 +745,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, is_multicast_ether_addr(addr)) tt_local->common.flags |= BATADV_TT_CLIENT_NOPURGE; + kref_get(&tt_local->common.refcount); hash_added = batadv_hash_add(bat_priv->tt.local_hash, batadv_compare_tt, batadv_choose_tt, &tt_local->common, &tt_local->common.hash_entry); From 15d5ffdea0ddfa78609b1587851b13d9137fbbcd Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 15 Jul 2016 17:39:23 +0200 Subject: [PATCH 0070/1715] batman-adv: Place kref_get for tt_common near use It is hard to understand why the refcnt is increased when it isn't done near the actual place the new reference is used. So using kref_get right before the place which requires the reference and in the same function helps to avoid accidental problems caused by incorrect reference counting. Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Simon Wunderlich --- net/batman-adv/translation-table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 094da1a2c77a..d94e298fc7e2 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -1645,13 +1645,13 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv, if (flags & BATADV_TT_CLIENT_ROAM) tt_global_entry->roam_at = jiffies; kref_init(&common->refcount); - kref_get(&common->refcount); common->added_at = jiffies; INIT_HLIST_HEAD(&tt_global_entry->orig_list); atomic_set(&tt_global_entry->orig_list_count, 0); spin_lock_init(&tt_global_entry->list_lock); + kref_get(&common->refcount); hash_added = batadv_hash_add(bat_priv->tt.global_hash, batadv_compare_tt, batadv_choose_tt, common, From 7282ac396ea617a0a4f8f89389f84690f8431d7d Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 15 Jul 2016 17:39:24 +0200 Subject: [PATCH 0071/1715] batman-adv: Place kref_get for bla_claim near use It is hard to understand why the refcnt is increased when it isn't done near the actual place the new reference is used. So using kref_get right before the place which requires the reference and in the same function helps to avoid accidental problems caused by incorrect reference counting. Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Simon Wunderlich --- net/batman-adv/bridge_loop_avoidance.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 35ed1d32bab5..b0517a0bc8e7 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -718,12 +718,13 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv, claim->lasttime = jiffies; kref_get(&backbone_gw->refcount); claim->backbone_gw = backbone_gw; - kref_init(&claim->refcount); - kref_get(&claim->refcount); + batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_add_claim(): adding new entry %pM, vid %d to hash ...\n", mac, BATADV_PRINT_VID(vid)); + + kref_get(&claim->refcount); hash_added = batadv_hash_add(bat_priv->bla.claim_hash, batadv_compare_claim, batadv_choose_claim, claim, From 4e8389e17a97c6bdd927d33f6e5d505c08a85a4f Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 15 Jul 2016 17:39:25 +0200 Subject: [PATCH 0072/1715] batman-adv: Place kref_get for bla_backbone_gw near use It is hard to understand why the refcnt is increased when it isn't done near the actual place the new reference is used. So using kref_get right before the place which requires the reference and in the same function helps to avoid accidental problems caused by incorrect reference counting. Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Simon Wunderlich --- net/batman-adv/bridge_loop_avoidance.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index b0517a0bc8e7..1db3c12c0be0 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -526,11 +526,9 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, u8 *orig, atomic_set(&entry->wait_periods, 0); ether_addr_copy(entry->orig, orig); INIT_WORK(&entry->report_work, batadv_bla_loopdetect_report); - - /* one for the hash, one for returning */ kref_init(&entry->refcount); - kref_get(&entry->refcount); + kref_get(&entry->refcount); hash_added = batadv_hash_add(bat_priv->bla.backbone_hash, batadv_compare_backbone_gw, batadv_choose_backbone_gw, entry, From 6a51e09d8b5828698217ac4a04b97de1e5415978 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 15 Jul 2016 17:39:26 +0200 Subject: [PATCH 0073/1715] batman-adv: Place kref_get for dat_entry near use It is hard to understand why the refcnt is increased when it isn't done near the actual place the new reference is used. So using kref_get right before the place which requires the reference and in the same function helps to avoid accidental problems caused by incorrect reference counting. Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Simon Wunderlich --- net/batman-adv/distributed-arp-table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index b1cc8bfe11ac..059bc23da534 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -343,8 +343,8 @@ static void batadv_dat_entry_add(struct batadv_priv *bat_priv, __be32 ip, ether_addr_copy(dat_entry->mac_addr, mac_addr); dat_entry->last_update = jiffies; kref_init(&dat_entry->refcount); - kref_get(&dat_entry->refcount); + kref_get(&dat_entry->refcount); hash_added = batadv_hash_add(bat_priv->dat.hash, batadv_compare_dat, batadv_hash_dat, dat_entry, &dat_entry->hash_entry); From f665fa7e85e9e32deadc8aee35c958334c812d8d Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 15 Jul 2016 17:39:27 +0200 Subject: [PATCH 0074/1715] batman-adv: Place kref_get for gw_node near use It is hard to understand why the refcnt is increased when it isn't done near the actual place the new reference is used. So using kref_get right before the place which requires the reference and in the same function helps to avoid accidental problems caused by incorrect reference counting. Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Simon Wunderlich --- net/batman-adv/gateway_client.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index b889e1fdba4d..4b51b1cf4f76 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -339,14 +339,15 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv, if (!gw_node) return; + kref_init(&gw_node->refcount); INIT_HLIST_NODE(&gw_node->list); kref_get(&orig_node->refcount); gw_node->orig_node = orig_node; gw_node->bandwidth_down = ntohl(gateway->bandwidth_down); gw_node->bandwidth_up = ntohl(gateway->bandwidth_up); - kref_init(&gw_node->refcount); spin_lock_bh(&bat_priv->gw.list_lock); + kref_get(&gw_node->refcount); hlist_add_head_rcu(&gw_node->list, &bat_priv->gw.list); spin_unlock_bh(&bat_priv->gw.list_lock); @@ -357,6 +358,9 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv, ntohl(gateway->bandwidth_down) % 10, ntohl(gateway->bandwidth_up) / 10, ntohl(gateway->bandwidth_up) % 10); + + /* don't return reference to new gw_node */ + batadv_gw_node_put(gw_node); } /** From b2367e46fa0d50fd784be3ec0a5c370bf2f2b61a Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 15 Jul 2016 17:39:28 +0200 Subject: [PATCH 0075/1715] batman-adv: Place kref_get for hard_iface near use It is hard to understand why the refcnt is increased when it isn't done near the actual place the new reference is used. So using kref_get right before the place which requires the reference and in the same function helps to avoid accidental problems caused by incorrect reference counting. Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Simon Wunderlich --- net/batman-adv/hard-interface.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 43c9a3e02512..9284c73f1b48 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -694,6 +694,7 @@ batadv_hardif_add_interface(struct net_device *net_dev) INIT_HLIST_HEAD(&hard_iface->neigh_list); spin_lock_init(&hard_iface->neigh_list_lock); + kref_init(&hard_iface->refcount); hard_iface->num_bcasts = BATADV_NUM_BCASTS_DEFAULT; if (batadv_is_wifi_netdev(net_dev)) @@ -701,11 +702,8 @@ batadv_hardif_add_interface(struct net_device *net_dev) batadv_v_hardif_init(hard_iface); - /* extra reference for return */ - kref_init(&hard_iface->refcount); - kref_get(&hard_iface->refcount); - batadv_check_known_mac_addr(hard_iface->net_dev); + kref_get(&hard_iface->refcount); list_add_tail_rcu(&hard_iface->list, &batadv_hardif_list); return hard_iface; From df28ca6bb3282a4c8dc5b65f60b0136fc190ee52 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 15 Jul 2016 17:39:29 +0200 Subject: [PATCH 0076/1715] batman-adv: Place kref_get for softif_vlan near use It is hard to understand why the refcnt is increased when it isn't done near the actual place the new reference is used. So using kref_get right before the place which requires the reference and in the same function helps to avoid accidental problems caused by incorrect reference counting. Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Simon Wunderlich --- net/batman-adv/soft-interface.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index e508bf5957b3..49e16b6e0ba3 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -594,6 +594,7 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid) } spin_lock_bh(&bat_priv->softif_vlan_list_lock); + kref_get(&vlan->refcount); hlist_add_head_rcu(&vlan->list, &bat_priv->softif_vlan_list); spin_unlock_bh(&bat_priv->softif_vlan_list_lock); @@ -604,6 +605,9 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid) bat_priv->soft_iface->dev_addr, vid, BATADV_NULL_IFINDEX, BATADV_NO_MARK); + /* don't return reference to new softif_vlan */ + batadv_softif_vlan_put(vlan); + return 0; } From da7a26af4ae7192e63103aaaf0941a1fd42723df Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 15 Jul 2016 17:39:30 +0200 Subject: [PATCH 0077/1715] batman-adv: Place kref_get for nc_node near use It is hard to understand why the refcnt is increased when it isn't done near the actual place the new reference is used. So using kref_get right before the place which requires the reference and in the same function helps to avoid accidental problems caused by incorrect reference counting. Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Simon Wunderlich --- net/batman-adv/network-coding.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index 3814cfb94846..4f4cfe53f973 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -859,7 +859,6 @@ batadv_nc_get_nc_node(struct batadv_priv *bat_priv, /* Initialize nc_node */ INIT_LIST_HEAD(&nc_node->list); kref_init(&nc_node->refcount); - kref_get(&nc_node->refcount); ether_addr_copy(nc_node->addr, orig_node->orig); kref_get(&orig_neigh_node->refcount); nc_node->orig_node = orig_neigh_node; @@ -878,6 +877,7 @@ batadv_nc_get_nc_node(struct batadv_priv *bat_priv, /* Add nc_node to orig_node */ spin_lock_bh(lock); + kref_get(&nc_node->refcount); list_add_tail_rcu(&nc_node->list, list); spin_unlock_bh(lock); From f489eab5b16b8a2ed8d63b6a725cc31e0c1d6c1a Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 15 Jul 2016 17:39:31 +0200 Subject: [PATCH 0078/1715] batman-adv: Place kref_get for nc_path near use It is hard to understand why the refcnt is increased when it isn't done near the actual place the new reference is used. So using kref_get right before the place which requires the reference and in the same function helps to avoid accidental problems caused by incorrect reference counting. Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Simon Wunderlich --- net/batman-adv/network-coding.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index 4f4cfe53f973..165cd27777cb 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -978,7 +978,6 @@ static struct batadv_nc_path *batadv_nc_get_path(struct batadv_priv *bat_priv, INIT_LIST_HEAD(&nc_path->packet_list); spin_lock_init(&nc_path->packet_list_lock); kref_init(&nc_path->refcount); - kref_get(&nc_path->refcount); nc_path->last_valid = jiffies; ether_addr_copy(nc_path->next_hop, dst); ether_addr_copy(nc_path->prev_hop, src); @@ -988,6 +987,7 @@ static struct batadv_nc_path *batadv_nc_get_path(struct batadv_priv *bat_priv, nc_path->next_hop); /* Add nc_path to hash table */ + kref_get(&nc_path->refcount); hash_added = batadv_hash_add(hash, batadv_nc_hash_compare, batadv_nc_hash_choose, &nc_path_key, &nc_path->hash_entry); From 6913d61be5757dab2327a64a95d87caf7cca0632 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 15 Jul 2016 17:39:32 +0200 Subject: [PATCH 0079/1715] batman-adv: Place kref_get for tvlv_container near use It is hard to understand why the refcnt is increased when it isn't done near the actual place the new reference is used. So using kref_get right before the place which requires the reference and in the same function helps to avoid accidental problems caused by incorrect reference counting. Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Simon Wunderlich --- net/batman-adv/tvlv.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c index 3d1cf0fb112d..353386780e6d 100644 --- a/net/batman-adv/tvlv.c +++ b/net/batman-adv/tvlv.c @@ -257,8 +257,13 @@ void batadv_tvlv_container_register(struct batadv_priv *bat_priv, spin_lock_bh(&bat_priv->tvlv.container_list_lock); tvlv_old = batadv_tvlv_container_get(bat_priv, type, version); batadv_tvlv_container_remove(bat_priv, tvlv_old); + + kref_get(&tvlv_new->refcount); hlist_add_head(&tvlv_new->list, &bat_priv->tvlv.container_list); spin_unlock_bh(&bat_priv->tvlv.container_list_lock); + + /* don't return reference to new tvlv_container */ + batadv_tvlv_container_put(tvlv_new); } /** From 27d684ec5bc7caf8a72a7a0383fa3e1631ee2b25 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 15 Jul 2016 17:39:33 +0200 Subject: [PATCH 0080/1715] batman-adv: Place kref_get for tvlv_handler near use It is hard to understand why the refcnt is increased when it isn't done near the actual place the new reference is used. So using kref_get right before the place which requires the reference and in the same function helps to avoid accidental problems caused by incorrect reference counting. Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Simon Wunderlich --- net/batman-adv/tvlv.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c index 353386780e6d..77654f055f24 100644 --- a/net/batman-adv/tvlv.c +++ b/net/batman-adv/tvlv.c @@ -547,8 +547,12 @@ void batadv_tvlv_handler_register(struct batadv_priv *bat_priv, INIT_HLIST_NODE(&tvlv_handler->list); spin_lock_bh(&bat_priv->tvlv.handler_list_lock); + kref_get(&tvlv_handler->refcount); hlist_add_head_rcu(&tvlv_handler->list, &bat_priv->tvlv.handler_list); spin_unlock_bh(&bat_priv->tvlv.handler_list_lock); + + /* don't return reference to new tvlv_handler */ + batadv_tvlv_handler_put(tvlv_handler); } /** From 06d640c9aad6d96713ea2fbe36f5a344428ccd57 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 10 Jul 2016 15:47:57 +0200 Subject: [PATCH 0081/1715] batman-adv: Keep batadv netdev when hardif disappears Switch-like virtual interfaces like bridge or openvswitch don't destroy itself when all their attached netdevices dissappear. Instead they only remove the link to the unregistered device and keep working until they get removed manually. This has the benefit that all configurations for this interfaces are kept and daemons reacting to rtnl events can just add new slave interfaces without going through the complete configuration of the switch-like netdevice. Handling unregister events of client devices similar in batman-adv allows users to drop their current workaround of dummy netdevices attached to batman-adv soft-interfaces. Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Simon Wunderlich --- net/batman-adv/hard-interface.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 9284c73f1b48..08ce36147c4c 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -725,7 +725,7 @@ static void batadv_hardif_remove_interface(struct batadv_hard_iface *hard_iface) /* first deactivate interface */ if (hard_iface->if_status != BATADV_IF_NOT_IN_USE) batadv_hardif_disable_interface(hard_iface, - BATADV_IF_CLEANUP_AUTO); + BATADV_IF_CLEANUP_KEEP); if (hard_iface->if_status != BATADV_IF_NOT_IN_USE) return; From dc1cbd145eecf21209d0322874e1766bcbce3e3f Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 16 Jul 2016 09:31:20 +0200 Subject: [PATCH 0082/1715] batman-adv: Allow to disable debugfs support The files provided by batman-adv via debugfs are currently converted to netlink. Tools which are not yet converted to use the netlink interface may still rely on the old debugfs files. But systems which already upgraded their tools can save some space by disabling this feature. The default configuration of batman-adv on amd64 can reduce the size of the module by around 11% when this feature is disabled. $ size net/batman-adv/batman-adv.ko* text data bss dec hex filename 150507 10395 4160 165062 284c6 net/batman-adv/batman-adv.ko.y 137106 7099 2112 146317 23b8d net/batman-adv/batman-adv.ko.n Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Simon Wunderlich --- net/batman-adv/Kconfig | 15 +++++++++++++-- net/batman-adv/Makefile | 4 ++-- net/batman-adv/bat_algo.c | 2 ++ net/batman-adv/bat_iv_ogm.c | 12 ++++++++++++ net/batman-adv/bat_v.c | 12 ++++++++++++ net/batman-adv/bridge_loop_avoidance.c | 4 ++++ net/batman-adv/debugfs.h | 2 +- net/batman-adv/distributed-arp-table.c | 2 ++ net/batman-adv/gateway_client.c | 2 ++ net/batman-adv/icmp_socket.h | 18 +++++++++++++++++- net/batman-adv/main.c | 2 ++ net/batman-adv/multicast.c | 2 ++ net/batman-adv/network-coding.c | 2 ++ net/batman-adv/originator.c | 4 ++++ net/batman-adv/translation-table.c | 4 ++++ net/batman-adv/types.h | 6 ++++++ 16 files changed, 87 insertions(+), 6 deletions(-) diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig index 833bb145ba3c..f20742cbae6d 100644 --- a/net/batman-adv/Kconfig +++ b/net/batman-adv/Kconfig @@ -73,10 +73,21 @@ config BATMAN_ADV_MCAST reduce the air overhead while improving the reliability of multicast messages. -config BATMAN_ADV_DEBUG - bool "B.A.T.M.A.N. debugging" +config BATMAN_ADV_DEBUGFS + bool "batman-adv debugfs entries" depends on BATMAN_ADV depends on DEBUG_FS + default y + help + Enable this to export routing related debug tables via debugfs. + The information for each soft-interface and used hard-interface can be + found under batman_adv/ + + If unsure, say Y. + +config BATMAN_ADV_DEBUG + bool "B.A.T.M.A.N. debugging" + depends on BATMAN_ADV_DEBUGFS help This is an option for use by developers; most people should say N here. This enables compilation of support for diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile index a83fc6c58d19..f724d3c98a81 100644 --- a/net/batman-adv/Makefile +++ b/net/batman-adv/Makefile @@ -24,14 +24,14 @@ batman-adv-$(CONFIG_BATMAN_ADV_BATMAN_V) += bat_v_elp.o batman-adv-$(CONFIG_BATMAN_ADV_BATMAN_V) += bat_v_ogm.o batman-adv-y += bitarray.o batman-adv-$(CONFIG_BATMAN_ADV_BLA) += bridge_loop_avoidance.o -batman-adv-$(CONFIG_DEBUG_FS) += debugfs.o +batman-adv-$(CONFIG_BATMAN_ADV_DEBUGFS) += debugfs.o batman-adv-$(CONFIG_BATMAN_ADV_DAT) += distributed-arp-table.o batman-adv-y += fragmentation.o batman-adv-y += gateway_client.o batman-adv-y += gateway_common.o batman-adv-y += hard-interface.o batman-adv-y += hash.o -batman-adv-y += icmp_socket.o +batman-adv-$(CONFIG_BATMAN_ADV_DEBUGFS) += icmp_socket.o batman-adv-$(CONFIG_BATMAN_ADV_DEBUG) += log.o batman-adv-y += main.o batman-adv-$(CONFIG_BATMAN_ADV_MCAST) += multicast.o diff --git a/net/batman-adv/bat_algo.c b/net/batman-adv/bat_algo.c index f2cc50d354d9..623d04302aa2 100644 --- a/net/batman-adv/bat_algo.c +++ b/net/batman-adv/bat_algo.c @@ -101,6 +101,7 @@ int batadv_algo_select(struct batadv_priv *bat_priv, char *name) return 0; } +#ifdef CONFIG_BATMAN_ADV_DEBUGFS int batadv_algo_seq_print_text(struct seq_file *seq, void *offset) { struct batadv_algo_ops *bat_algo_ops; @@ -113,6 +114,7 @@ int batadv_algo_seq_print_text(struct seq_file *seq, void *offset) return 0; } +#endif static int batadv_param_set_ra(const char *val, const struct kernel_param *kp) { diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 3c7900d543ca..e2d18d0b1f06 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -1855,6 +1855,7 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb, return NET_RX_SUCCESS; } +#ifdef CONFIG_BATMAN_ADV_DEBUGFS /** * batadv_iv_ogm_orig_print_neigh - print neighbors for the originator table * @orig_node: the orig_node for which the neighbors are printed @@ -1952,6 +1953,7 @@ next: if (batman_count == 0) seq_puts(seq, "No batman nodes in range ...\n"); } +#endif /** * batadv_iv_ogm_neigh_get_tq_avg - Get the TQ average for a neighbour on a @@ -2182,6 +2184,7 @@ batadv_iv_ogm_orig_dump(struct sk_buff *msg, struct netlink_callback *cb, cb->args[2] = sub; } +#ifdef CONFIG_BATMAN_ADV_DEBUGFS /** * batadv_iv_hardif_neigh_print - print a single hop neighbour node * @seq: neighbour table seq_file struct @@ -2232,6 +2235,7 @@ static void batadv_iv_neigh_print(struct batadv_priv *bat_priv, if (batman_count == 0) seq_puts(seq, "No batman nodes in range ...\n"); } +#endif /** * batadv_iv_ogm_neigh_diff - calculate tq difference of two neighbors @@ -2618,6 +2622,7 @@ out: return ret; } +#ifdef CONFIG_BATMAN_ADV_DEBUGFS /* fails if orig_node has no router */ static int batadv_iv_gw_write_buffer_text(struct batadv_priv *bat_priv, struct seq_file *seq, @@ -2681,6 +2686,7 @@ static void batadv_iv_gw_print(struct batadv_priv *bat_priv, if (gw_count == 0) seq_puts(seq, "No gateways in range ...\n"); } +#endif /** * batadv_iv_gw_dump_entry - Dump a gateway into a message @@ -2798,11 +2804,15 @@ static struct batadv_algo_ops batadv_batman_iv __read_mostly = { .neigh = { .cmp = batadv_iv_ogm_neigh_cmp, .is_similar_or_better = batadv_iv_ogm_neigh_is_sob, +#ifdef CONFIG_BATMAN_ADV_DEBUGFS .print = batadv_iv_neigh_print, +#endif .dump = batadv_iv_ogm_neigh_dump, }, .orig = { +#ifdef CONFIG_BATMAN_ADV_DEBUGFS .print = batadv_iv_ogm_orig_print, +#endif .dump = batadv_iv_ogm_orig_dump, .free = batadv_iv_ogm_orig_free, .add_if = batadv_iv_ogm_orig_add_if, @@ -2811,7 +2821,9 @@ static struct batadv_algo_ops batadv_batman_iv __read_mostly = { .gw = { .get_best_gw_node = batadv_iv_gw_get_best_gw_node, .is_eligible = batadv_iv_gw_is_eligible, +#ifdef CONFIG_BATMAN_ADV_DEBUGFS .print = batadv_iv_gw_print, +#endif .dump = batadv_iv_gw_dump, }, }; diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index 9e872dcc1260..e79f6f01182e 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -129,6 +129,7 @@ batadv_v_hardif_neigh_init(struct batadv_hardif_neigh_node *hardif_neigh) batadv_v_elp_throughput_metric_update); } +#ifdef CONFIG_BATMAN_ADV_DEBUGFS /** * batadv_v_orig_print_neigh - print neighbors for the originator table * @orig_node: the orig_node for which the neighbors are printed @@ -212,6 +213,7 @@ static void batadv_v_neigh_print(struct batadv_priv *bat_priv, if (batman_count == 0) seq_puts(seq, "No batman nodes in range ...\n"); } +#endif /** * batadv_v_neigh_dump_neigh - Dump a neighbour into a message @@ -345,6 +347,7 @@ batadv_v_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb, cb->args[1] = idx; } +#ifdef CONFIG_BATMAN_ADV_DEBUGFS /** * batadv_v_orig_print - print the originator table * @bat_priv: the bat priv with all the soft interface information @@ -411,6 +414,7 @@ next: if (batman_count == 0) seq_puts(seq, "No batman nodes in range ...\n"); } +#endif /** * batadv_v_orig_dump_subentry - Dump an originator subentry into a @@ -827,6 +831,7 @@ out: return ret; } +#ifdef CONFIG_BATMAN_ADV_DEBUGFS /* fails if orig_node has no router */ static int batadv_v_gw_write_buffer_text(struct batadv_priv *bat_priv, struct seq_file *seq, @@ -896,6 +901,7 @@ static void batadv_v_gw_print(struct batadv_priv *bat_priv, if (gw_count == 0) seq_puts(seq, "No gateways in range ...\n"); } +#endif /** * batadv_v_gw_dump_entry - Dump a gateway into a message @@ -1034,11 +1040,15 @@ static struct batadv_algo_ops batadv_batman_v __read_mostly = { .hardif_init = batadv_v_hardif_neigh_init, .cmp = batadv_v_neigh_cmp, .is_similar_or_better = batadv_v_neigh_is_sob, +#ifdef CONFIG_BATMAN_ADV_DEBUGFS .print = batadv_v_neigh_print, +#endif .dump = batadv_v_neigh_dump, }, .orig = { +#ifdef CONFIG_BATMAN_ADV_DEBUGFS .print = batadv_v_orig_print, +#endif .dump = batadv_v_orig_dump, }, .gw = { @@ -1046,7 +1056,9 @@ static struct batadv_algo_ops batadv_batman_v __read_mostly = { .show_sel_class = batadv_v_show_sel_class, .get_best_gw_node = batadv_v_gw_get_best_gw_node, .is_eligible = batadv_v_gw_is_eligible, +#ifdef CONFIG_BATMAN_ADV_DEBUGFS .print = batadv_v_gw_print, +#endif .dump = batadv_v_gw_dump, }, }; diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 1db3c12c0be0..e7f690b571ea 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -1996,6 +1996,7 @@ out: return ret; } +#ifdef CONFIG_BATMAN_ADV_DEBUGFS /** * batadv_bla_claim_table_seq_print_text - print the claim table in a seq file * @seq: seq file to print on @@ -2056,6 +2057,7 @@ out: batadv_hardif_put(primary_if); return 0; } +#endif /** * batadv_bla_claim_dump_entry - dump one entry of the claim table @@ -2219,6 +2221,7 @@ out: return ret; } +#ifdef CONFIG_BATMAN_ADV_DEBUGFS /** * batadv_bla_backbone_table_seq_print_text - print the backbone table in a seq * file @@ -2282,6 +2285,7 @@ out: batadv_hardif_put(primary_if); return 0; } +#endif /** * batadv_bla_backbone_dump_entry - dump one entry of the backbone table diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h index 1ab4e2e63afc..c68ff3dcb926 100644 --- a/net/batman-adv/debugfs.h +++ b/net/batman-adv/debugfs.h @@ -26,7 +26,7 @@ struct net_device; #define BATADV_DEBUGFS_SUBDIR "batman_adv" -#if IS_ENABLED(CONFIG_DEBUG_FS) +#if IS_ENABLED(CONFIG_BATMAN_ADV_DEBUGFS) void batadv_debugfs_init(void); void batadv_debugfs_destroy(void); diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 059bc23da534..e257efdc5d03 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -795,6 +795,7 @@ void batadv_dat_free(struct batadv_priv *bat_priv) batadv_dat_hash_free(bat_priv); } +#ifdef CONFIG_BATMAN_ADV_DEBUGFS /** * batadv_dat_cache_seq_print_text - print the local DAT hash table * @seq: seq file to print on @@ -846,6 +847,7 @@ out: batadv_hardif_put(primary_if); return 0; } +#endif /** * batadv_arp_get_type - parse an ARP packet and gets the type diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 4b51b1cf4f76..de055d64debe 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -482,6 +482,7 @@ void batadv_gw_node_free(struct batadv_priv *bat_priv) spin_unlock_bh(&bat_priv->gw.list_lock); } +#ifdef CONFIG_BATMAN_ADV_DEBUGFS int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset) { struct net_device *net_dev = (struct net_device *)seq->private; @@ -509,6 +510,7 @@ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset) return 0; } +#endif /** * batadv_gw_dump - Dump gateways into a message diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h index 618d5de06f20..e44a7da51431 100644 --- a/net/batman-adv/icmp_socket.h +++ b/net/batman-adv/icmp_socket.h @@ -26,9 +26,25 @@ struct batadv_icmp_header; #define BATADV_ICMP_SOCKET "socket" -void batadv_socket_init(void); int batadv_socket_setup(struct batadv_priv *bat_priv); + +#ifdef CONFIG_BATMAN_ADV_DEBUGFS + +void batadv_socket_init(void); void batadv_socket_receive_packet(struct batadv_icmp_header *icmph, size_t icmp_len); +#else + +static inline void batadv_socket_init(void) +{ +} + +static inline void +batadv_socket_receive_packet(struct batadv_icmp_header *icmph, size_t icmp_len) +{ +} + +#endif + #endif /* _NET_BATMAN_ADV_ICMP_SOCKET_H_ */ diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index ef07e5b34415..2c017ab47557 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -282,6 +282,7 @@ bool batadv_is_my_mac(struct batadv_priv *bat_priv, const u8 *addr) return is_my_mac; } +#ifdef CONFIG_BATMAN_ADV_DEBUGFS /** * batadv_seq_print_text_primary_if_get - called from debugfs table printing * function that requires the primary interface @@ -317,6 +318,7 @@ batadv_seq_print_text_primary_if_get(struct seq_file *seq) out: return primary_if; } +#endif /** * batadv_max_header_len - calculate maximum encapsulation overhead for a diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index 894df6020f6a..13661f43386f 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -1134,6 +1134,7 @@ void batadv_mcast_init(struct batadv_priv *bat_priv) BATADV_TVLV_HANDLER_OGM_CIFNOTFND); } +#ifdef CONFIG_BATMAN_ADV_DEBUGFS /** * batadv_mcast_flags_print_header - print own mcast flags to debugfs table * @bat_priv: the bat priv with all the soft interface information @@ -1234,6 +1235,7 @@ int batadv_mcast_flags_seq_print_text(struct seq_file *seq, void *offset) return 0; } +#endif /** * batadv_mcast_free - free the multicast optimizations structures diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index 165cd27777cb..e3baf697a35c 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -1881,6 +1881,7 @@ void batadv_nc_mesh_free(struct batadv_priv *bat_priv) batadv_hash_destroy(bat_priv->nc.decoding_hash); } +#ifdef CONFIG_BATMAN_ADV_DEBUGFS /** * batadv_nc_nodes_seq_print_text - print the nc node information * @seq: seq file to print on @@ -1980,3 +1981,4 @@ int batadv_nc_init_debugfs(struct batadv_priv *bat_priv) out: return -ENOMEM; } +#endif diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 0b7d57aad417..5f3bfc41aeb1 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -693,6 +693,7 @@ batadv_neigh_node_get_or_create(struct batadv_orig_node *orig_node, return batadv_neigh_node_create(orig_node, hard_iface, neigh_addr); } +#ifdef CONFIG_BATMAN_ADV_DEBUGFS /** * batadv_hardif_neigh_seq_print_text - print the single hop neighbour list * @seq: neighbour table seq_file struct @@ -726,6 +727,7 @@ int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset) bat_priv->algo_ops->neigh.print(bat_priv, seq); return 0; } +#endif /** * batadv_hardif_neigh_dump - Dump to netlink the neighbor infos for a specific @@ -1339,6 +1341,7 @@ void batadv_purge_orig_ref(struct batadv_priv *bat_priv) _batadv_purge_orig(bat_priv); } +#ifdef CONFIG_BATMAN_ADV_DEBUGFS int batadv_orig_seq_print_text(struct seq_file *seq, void *offset) { struct net_device *net_dev = (struct net_device *)seq->private; @@ -1412,6 +1415,7 @@ out: batadv_hardif_put(hard_iface); return 0; } +#endif /** * batadv_orig_dump - Dump to netlink the originator infos for a specific diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index d94e298fc7e2..7f663092f6de 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -1047,6 +1047,7 @@ container_register: kfree(tt_data); } +#ifdef CONFIG_BATMAN_ADV_DEBUGFS int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) { struct net_device *net_dev = (struct net_device *)seq->private; @@ -1114,6 +1115,7 @@ out: batadv_hardif_put(primary_if); return 0; } +#endif /** * batadv_tt_local_dump_entry - Dump one TT local entry into a message @@ -1796,6 +1798,7 @@ batadv_transtable_best_orig(struct batadv_priv *bat_priv, return best_entry; } +#ifdef CONFIG_BATMAN_ADV_DEBUGFS /** * batadv_tt_global_print_entry - print all orig nodes who announce the address * for this global entry @@ -1919,6 +1922,7 @@ out: batadv_hardif_put(primary_if); return 0; } +#endif /** * batadv_tt_global_dump_subentry - Dump all TT local entries into a message diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index b5f01a36ec34..b3dd1a381aad 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -1431,7 +1431,9 @@ struct batadv_algo_neigh_ops { struct batadv_hard_iface *if_outgoing1, struct batadv_neigh_node *neigh2, struct batadv_hard_iface *if_outgoing2); +#ifdef CONFIG_BATMAN_ADV_DEBUGFS void (*print)(struct batadv_priv *priv, struct seq_file *seq); +#endif void (*dump)(struct sk_buff *msg, struct netlink_callback *cb, struct batadv_priv *priv, struct batadv_hard_iface *hard_iface); @@ -1453,8 +1455,10 @@ struct batadv_algo_orig_ops { int (*add_if)(struct batadv_orig_node *orig_node, int max_if_num); int (*del_if)(struct batadv_orig_node *orig_node, int max_if_num, int del_if_num); +#ifdef CONFIG_BATMAN_ADV_DEBUGFS void (*print)(struct batadv_priv *priv, struct seq_file *seq, struct batadv_hard_iface *hard_iface); +#endif void (*dump)(struct sk_buff *msg, struct netlink_callback *cb, struct batadv_priv *priv, struct batadv_hard_iface *hard_iface); @@ -1480,7 +1484,9 @@ struct batadv_algo_gw_ops { bool (*is_eligible)(struct batadv_priv *bat_priv, struct batadv_orig_node *curr_gw_orig, struct batadv_orig_node *orig_node); +#ifdef CONFIG_BATMAN_ADV_DEBUGFS void (*print)(struct batadv_priv *bat_priv, struct seq_file *seq); +#endif void (*dump)(struct sk_buff *msg, struct netlink_callback *cb, struct batadv_priv *priv); }; From bb8082f6913889418b69a54e57b4a39d7128a700 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Tue, 9 Aug 2016 15:12:26 +0200 Subject: [PATCH 0083/1715] ppp: build ifname using unit identifier for rtnl based devices Userspace programs generally need to know the name of the ppp devices they create. Both ioctl and rtnl interfaces use the ppp sheme to name them. But although the suffix used by the ioctl interface can be known by userspace (it's the PPP unit identifier returned by the PPPIOCGUNIT ioctl), the one used by the rtnl is only known by the kernel. This patch brings more consistency between ioctl and rtnl based ppp devices by generating device names using the PPP unit identifer as suffix in both cases. This way, userspace can always infer the name of the devices they create. Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- drivers/net/ppp/ppp_generic.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index f226db4616b7..70cfa06ccd40 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -1103,6 +1103,15 @@ static int ppp_nl_newlink(struct net *src_net, struct net_device *dev, } conf.file = file; + + /* Don't use device name generated by the rtnetlink layer when ifname + * isn't specified. Let ppp_dev_configure() set the device name using + * the PPP unit identifer as suffix (i.e. ppp). This allows + * userspace to infer the device name using to the PPPIOCGUNIT ioctl. + */ + if (!tb[IFLA_IFNAME]) + conf.ifname_is_set = false; + err = ppp_dev_configure(src_net, dev, &conf); out_unlock: From 631fee7d70e8eabb642b4bcc58f08bbe880c91aa Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 9 Aug 2016 06:51:06 -0700 Subject: [PATCH 0084/1715] net: Remove fib_local variable After commit 0ddcf43d5d4a ("ipv4: FIB Local/MAIN table collapse") fib_local is set but not used. Remove it. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 - net/ipv4/fib_frontend.c | 7 ------- 2 files changed, 8 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index d061ffeb1e71..7adf4386ac8f 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -40,7 +40,6 @@ struct netns_ipv4 { #ifdef CONFIG_IP_MULTIPLE_TABLES struct fib_rules_ops *rules_ops; bool fib_has_custom_rules; - struct fib_table __rcu *fib_local; struct fib_table __rcu *fib_main; struct fib_table __rcu *fib_default; #endif diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index ef2ebeb89d0f..317c31939732 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -93,9 +93,6 @@ struct fib_table *fib_new_table(struct net *net, u32 id) return NULL; switch (id) { - case RT_TABLE_LOCAL: - rcu_assign_pointer(net->ipv4.fib_local, tb); - break; case RT_TABLE_MAIN: rcu_assign_pointer(net->ipv4.fib_main, tb); break; @@ -137,9 +134,6 @@ static void fib_replace_table(struct net *net, struct fib_table *old, { #ifdef CONFIG_IP_MULTIPLE_TABLES switch (new->tb_id) { - case RT_TABLE_LOCAL: - rcu_assign_pointer(net->ipv4.fib_local, new); - break; case RT_TABLE_MAIN: rcu_assign_pointer(net->ipv4.fib_main, new); break; @@ -1249,7 +1243,6 @@ static void ip_fib_net_exit(struct net *net) rtnl_lock(); #ifdef CONFIG_IP_MULTIPLE_TABLES - RCU_INIT_POINTER(net->ipv4.fib_local, NULL); RCU_INIT_POINTER(net->ipv4.fib_main, NULL); RCU_INIT_POINTER(net->ipv4.fib_default, NULL); #endif From 0dff88d39fd0b17a5e3ff03d0264b823cab1352c Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Tue, 9 Aug 2016 19:09:45 +0200 Subject: [PATCH 0085/1715] net: dsa: b53: constify b53_io_ops structures The b53_io_ops structures are never modified, so declare them as const. Done with the help of Coccinelle. Signed-off-by: Julia Lawall Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 3 ++- drivers/net/dsa/b53/b53_mdio.c | 2 +- drivers/net/dsa/b53/b53_mmap.c | 2 +- drivers/net/dsa/b53/b53_priv.h | 3 ++- drivers/net/dsa/b53/b53_spi.c | 2 +- drivers/net/dsa/b53/b53_srab.c | 2 +- 6 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index bda37d336736..38ee10de7884 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1681,7 +1681,8 @@ static int b53_switch_init(struct b53_device *dev) return 0; } -struct b53_device *b53_switch_alloc(struct device *base, struct b53_io_ops *ops, +struct b53_device *b53_switch_alloc(struct device *base, + const struct b53_io_ops *ops, void *priv) { struct dsa_switch *ds; diff --git a/drivers/net/dsa/b53/b53_mdio.c b/drivers/net/dsa/b53/b53_mdio.c index aa87c3fffdac..477a16b5660a 100644 --- a/drivers/net/dsa/b53/b53_mdio.c +++ b/drivers/net/dsa/b53/b53_mdio.c @@ -267,7 +267,7 @@ static int b53_mdio_phy_write16(struct b53_device *dev, int addr, int reg, return mdiobus_write_nested(bus, addr, reg, value); } -static struct b53_io_ops b53_mdio_ops = { +static const struct b53_io_ops b53_mdio_ops = { .read8 = b53_mdio_read8, .read16 = b53_mdio_read16, .read32 = b53_mdio_read32, diff --git a/drivers/net/dsa/b53/b53_mmap.c b/drivers/net/dsa/b53/b53_mmap.c index 77ffc4312808..cc9e6bd83e0e 100644 --- a/drivers/net/dsa/b53/b53_mmap.c +++ b/drivers/net/dsa/b53/b53_mmap.c @@ -208,7 +208,7 @@ static int b53_mmap_write64(struct b53_device *dev, u8 page, u8 reg, return 0; } -static struct b53_io_ops b53_mmap_ops = { +static const struct b53_io_ops b53_mmap_ops = { .read8 = b53_mmap_read8, .read16 = b53_mmap_read16, .read32 = b53_mmap_read32, diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index 835a744f206e..d268493a5fec 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -182,7 +182,8 @@ static inline int is_cpu_port(struct b53_device *dev, int port) return dev->cpu_port; } -struct b53_device *b53_switch_alloc(struct device *base, struct b53_io_ops *ops, +struct b53_device *b53_switch_alloc(struct device *base, + const struct b53_io_ops *ops, void *priv); int b53_switch_detect(struct b53_device *dev); diff --git a/drivers/net/dsa/b53/b53_spi.c b/drivers/net/dsa/b53/b53_spi.c index 2bda0b5f1578..1c847a161bbd 100644 --- a/drivers/net/dsa/b53/b53_spi.c +++ b/drivers/net/dsa/b53/b53_spi.c @@ -270,7 +270,7 @@ static int b53_spi_write64(struct b53_device *dev, u8 page, u8 reg, u64 value) return spi_write(spi, txbuf, sizeof(txbuf)); } -static struct b53_io_ops b53_spi_ops = { +static const struct b53_io_ops b53_spi_ops = { .read8 = b53_spi_read8, .read16 = b53_spi_read16, .read32 = b53_spi_read32, diff --git a/drivers/net/dsa/b53/b53_srab.c b/drivers/net/dsa/b53/b53_srab.c index 3e2d4a5fcd5a..8a62b6a69703 100644 --- a/drivers/net/dsa/b53/b53_srab.c +++ b/drivers/net/dsa/b53/b53_srab.c @@ -344,7 +344,7 @@ err: return ret; } -static struct b53_io_ops b53_srab_ops = { +static const struct b53_io_ops b53_srab_ops = { .read8 = b53_srab_read8, .read16 = b53_srab_read16, .read32 = b53_srab_read32, From 0184165b2f42c4b032da9dd11546bfbaeb5afd4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20S=C3=B6derlund?= Date: Wed, 3 Aug 2016 15:56:47 +0200 Subject: [PATCH 0086/1715] ravb: add sleep PM suspend/resume support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The interface would not function after the system had been woken up after have been suspended (echo mem > /sys/power/state) cycle. The reason for this is that all device registers have been reset to its default values. This patch adds sleep suspend and resume functions that detached the interface at suspend and restore the registers and reattach the interface at resume. Only the registers that are only configured at probe time needs to be explicitly restored by the resume handler. All other registers are reconfigured by either reopening the device in the resume handler (if the device was running when the system was suspended) or when the interface is opened by a user at a later time. Signed-off-by: Niklas Söderlund Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/ravb_main.c | 72 +++++++++++++++++++++--- 1 file changed, 64 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 1e1cc0fad17f..256565ea9cfc 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1876,6 +1876,20 @@ static int ravb_set_gti(struct net_device *ndev) return 0; } +static void ravb_set_config_mode(struct net_device *ndev) +{ + struct ravb_private *priv = netdev_priv(ndev); + + if (priv->chip_id == RCAR_GEN2) { + ravb_modify(ndev, CCC, CCC_OPC, CCC_OPC_CONFIG); + /* Set CSEL value */ + ravb_modify(ndev, CCC, CCC_CSEL, CCC_CSEL_HPB); + } else { + ravb_modify(ndev, CCC, CCC_OPC, CCC_OPC_CONFIG | + CCC_GAC | CCC_CSEL_HPB); + } +} + static int ravb_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; @@ -1978,14 +1992,7 @@ static int ravb_probe(struct platform_device *pdev) ndev->ethtool_ops = &ravb_ethtool_ops; /* Set AVB config mode */ - if (chip_id == RCAR_GEN2) { - ravb_modify(ndev, CCC, CCC_OPC, CCC_OPC_CONFIG); - /* Set CSEL value */ - ravb_modify(ndev, CCC, CCC_CSEL, CCC_CSEL_HPB); - } else { - ravb_modify(ndev, CCC, CCC_OPC, CCC_OPC_CONFIG | - CCC_GAC | CCC_CSEL_HPB); - } + ravb_set_config_mode(ndev); /* Set GTI value */ error = ravb_set_gti(ndev); @@ -2097,6 +2104,54 @@ static int ravb_remove(struct platform_device *pdev) } #ifdef CONFIG_PM +static int ravb_runtime_suspend(struct device *dev) +{ + struct net_device *ndev = dev_get_drvdata(dev); + int ret = 0; + + if (netif_running(ndev)) { + netif_device_detach(ndev); + ret = ravb_close(ndev); + } + + return ret; +} + +static int ravb_runtime_resume(struct device *dev) +{ + struct net_device *ndev = dev_get_drvdata(dev); + struct ravb_private *priv = netdev_priv(ndev); + int ret = 0; + + /* All register have been reset to default values. + * Restore all registers which where setup at probe time and + * reopen device if it was running before system suspended. + */ + + /* Set AVB config mode */ + ravb_set_config_mode(ndev); + + /* Set GTI value */ + ret = ravb_set_gti(ndev); + if (ret) + return ret; + + /* Request GTI loading */ + ravb_modify(ndev, GCCR, GCCR_LTI, GCCR_LTI); + + /* Restore descriptor base address table */ + ravb_write(ndev, priv->desc_bat_dma, DBAT); + + if (netif_running(ndev)) { + ret = ravb_open(ndev); + if (ret < 0) + return ret; + netif_device_attach(ndev); + } + + return ret; +} + static int ravb_runtime_nop(struct device *dev) { /* Runtime PM callback shared between ->runtime_suspend() @@ -2110,6 +2165,7 @@ static int ravb_runtime_nop(struct device *dev) } static const struct dev_pm_ops ravb_dev_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(ravb_runtime_suspend, ravb_runtime_resume) SET_RUNTIME_PM_OPS(ravb_runtime_nop, ravb_runtime_nop, NULL) }; From e45a8a9e60ff1dd5ad118c794337a1101b46ab0d Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Tue, 9 Aug 2016 18:27:08 +0200 Subject: [PATCH 0087/1715] xfrm: constify xfrm_replay structures The xfrm_replay structures are never modified, so declare them as const. Done with the help of Coccinelle. Signed-off-by: Julia Lawall Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 2 +- net/xfrm/xfrm_replay.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index adfebd6f243c..d2fdd6d70959 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -187,7 +187,7 @@ struct xfrm_state { struct xfrm_replay_state_esn *preplay_esn; /* The functions for replay detection. */ - struct xfrm_replay *repl; + const struct xfrm_replay *repl; /* internal flag that only holds state for delayed aevent at the * moment diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index 4fd725a0c500..cdc2e2e71bff 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -558,7 +558,7 @@ static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq) x->repl->notify(x, XFRM_REPLAY_UPDATE); } -static struct xfrm_replay xfrm_replay_legacy = { +static const struct xfrm_replay xfrm_replay_legacy = { .advance = xfrm_replay_advance, .check = xfrm_replay_check, .recheck = xfrm_replay_check, @@ -566,7 +566,7 @@ static struct xfrm_replay xfrm_replay_legacy = { .overflow = xfrm_replay_overflow, }; -static struct xfrm_replay xfrm_replay_bmp = { +static const struct xfrm_replay xfrm_replay_bmp = { .advance = xfrm_replay_advance_bmp, .check = xfrm_replay_check_bmp, .recheck = xfrm_replay_check_bmp, @@ -574,7 +574,7 @@ static struct xfrm_replay xfrm_replay_bmp = { .overflow = xfrm_replay_overflow_bmp, }; -static struct xfrm_replay xfrm_replay_esn = { +static const struct xfrm_replay xfrm_replay_esn = { .advance = xfrm_replay_advance_esn, .check = xfrm_replay_check_esn, .recheck = xfrm_replay_recheck_esn, From ae3fb6d32140e5c5b491892105ca89066171d217 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 9 Aug 2016 12:16:04 +0200 Subject: [PATCH 0088/1715] xfrm: state: use hlist_for_each_entry_rcu helper This is required once we allow lockless access of bydst/bysrc hash tables. Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 9895a8c56d8c..904ab4d4ac05 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -76,18 +76,18 @@ static void xfrm_hash_transfer(struct hlist_head *list, h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr, x->props.reqid, x->props.family, nhashmask); - hlist_add_head(&x->bydst, ndsttable+h); + hlist_add_head_rcu(&x->bydst, ndsttable + h); h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr, x->props.family, nhashmask); - hlist_add_head(&x->bysrc, nsrctable+h); + hlist_add_head_rcu(&x->bysrc, nsrctable + h); if (x->id.spi) { h = __xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family, nhashmask); - hlist_add_head(&x->byspi, nspitable+h); + hlist_add_head_rcu(&x->byspi, nspitable + h); } } } @@ -520,10 +520,10 @@ int __xfrm_state_delete(struct xfrm_state *x) x->km.state = XFRM_STATE_DEAD; spin_lock(&net->xfrm.xfrm_state_lock); list_del(&x->km.all); - hlist_del(&x->bydst); - hlist_del(&x->bysrc); + hlist_del_rcu(&x->bydst); + hlist_del_rcu(&x->bysrc); if (x->id.spi) - hlist_del(&x->byspi); + hlist_del_rcu(&x->byspi); net->xfrm.state_num--; spin_unlock(&net->xfrm.xfrm_state_lock); @@ -659,7 +659,7 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark, unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family); struct xfrm_state *x; - hlist_for_each_entry(x, net->xfrm.state_byspi+h, byspi) { + hlist_for_each_entry_rcu(x, net->xfrm.state_byspi + h, byspi) { if (x->props.family != family || x->id.spi != spi || x->id.proto != proto || @@ -683,7 +683,7 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark, unsigned int h = xfrm_src_hash(net, daddr, saddr, family); struct xfrm_state *x; - hlist_for_each_entry(x, net->xfrm.state_bysrc+h, bysrc) { + hlist_for_each_entry_rcu(x, net->xfrm.state_bysrc + h, bysrc) { if (x->props.family != family || x->id.proto != proto || !xfrm_addr_equal(&x->id.daddr, daddr, family) || @@ -781,7 +781,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, spin_lock_bh(&net->xfrm.xfrm_state_lock); h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family); - hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { + hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) { if (x->props.family == encap_family && x->props.reqid == tmpl->reqid && (mark & x->mark.m) == x->mark.v && @@ -797,7 +797,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, goto found; h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family); - hlist_for_each_entry(x, net->xfrm.state_bydst+h_wildcard, bydst) { + hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h_wildcard, bydst) { if (x->props.family == encap_family && x->props.reqid == tmpl->reqid && (mark & x->mark.m) == x->mark.v && @@ -852,12 +852,12 @@ found: if (km_query(x, tmpl, pol) == 0) { x->km.state = XFRM_STATE_ACQ; list_add(&x->km.all, &net->xfrm.state_all); - hlist_add_head(&x->bydst, net->xfrm.state_bydst+h); + hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h); h = xfrm_src_hash(net, daddr, saddr, encap_family); - hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h); + hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h); if (x->id.spi) { h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family); - hlist_add_head(&x->byspi, net->xfrm.state_byspi+h); + hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h); } x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires; tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL); @@ -945,16 +945,16 @@ static void __xfrm_state_insert(struct xfrm_state *x) h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr, x->props.reqid, x->props.family); - hlist_add_head(&x->bydst, net->xfrm.state_bydst+h); + hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h); h = xfrm_src_hash(net, &x->id.daddr, &x->props.saddr, x->props.family); - hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h); + hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h); if (x->id.spi) { h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family); - hlist_add_head(&x->byspi, net->xfrm.state_byspi+h); + hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h); } tasklet_hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL); @@ -1063,9 +1063,9 @@ static struct xfrm_state *__find_acq_core(struct net *net, xfrm_state_hold(x); tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL); list_add(&x->km.all, &net->xfrm.state_all); - hlist_add_head(&x->bydst, net->xfrm.state_bydst+h); + hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h); h = xfrm_src_hash(net, daddr, saddr, family); - hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h); + hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h); net->xfrm.state_num++; @@ -1581,7 +1581,7 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) if (x->id.spi) { spin_lock_bh(&net->xfrm.xfrm_state_lock); h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family); - hlist_add_head(&x->byspi, net->xfrm.state_byspi+h); + hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h); spin_unlock_bh(&net->xfrm.xfrm_state_lock); err = 0; From 02efdff7e209859c2755ebe93b3bd0e3d40123ab Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 9 Aug 2016 12:16:05 +0200 Subject: [PATCH 0089/1715] xfrm: state: use atomic_inc_not_zero to increment refcount Once xfrm_state_lookup_byaddr no longer acquires the state lock another cpu might be freeing the state entry at the same time. To detect this we use atomic_inc_not_zero, we then signal -EAGAIN to caller in case our result was stale. Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 904ab4d4ac05..84c1db6254d5 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -37,6 +37,11 @@ static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; +static inline bool xfrm_state_hold_rcu(struct xfrm_state __rcu *x) +{ + return atomic_inc_not_zero(&x->refcnt); +} + static inline unsigned int xfrm_dst_hash(struct net *net, const xfrm_address_t *daddr, const xfrm_address_t *saddr, @@ -668,7 +673,8 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark, if ((mark & x->mark.m) != x->mark.v) continue; - xfrm_state_hold(x); + if (!xfrm_state_hold_rcu(x)) + continue; return x; } @@ -692,7 +698,8 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark, if ((mark & x->mark.m) != x->mark.v) continue; - xfrm_state_hold(x); + if (!xfrm_state_hold_rcu(x)) + continue; return x; } @@ -871,10 +878,14 @@ found: } } out: - if (x) - xfrm_state_hold(x); - else + if (x) { + if (!xfrm_state_hold_rcu(x)) { + *err = -EAGAIN; + x = NULL; + } + } else { *err = acquire_in_progress ? -EAGAIN : error; + } spin_unlock_bh(&net->xfrm.xfrm_state_lock); if (to_put) xfrm_state_put(to_put); From df7274eb70b7c8488170ebe8757dd94647a8e1e5 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 9 Aug 2016 12:16:06 +0200 Subject: [PATCH 0090/1715] xfrm: state: delay freeing until rcu grace period has elapsed The hash table backend memory and the state structs are free'd via kfree/vfree. Once we only rely on rcu during lookups we have to make sure no other cpu is currently accessing this before doing the free. Free operations already happen from worker so we can use synchronize_rcu to wait until concurrent readers are done. Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 84c1db6254d5..8e373876924f 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -146,6 +146,9 @@ static void xfrm_hash_resize(struct work_struct *work) spin_unlock_bh(&net->xfrm.xfrm_state_lock); osize = (ohashmask + 1) * sizeof(struct hlist_head); + + synchronize_rcu(); + xfrm_hash_free(odst, osize); xfrm_hash_free(osrc, osize); xfrm_hash_free(ospi, osize); @@ -369,6 +372,8 @@ static void xfrm_state_gc_task(struct work_struct *work) hlist_move_list(&net->xfrm.state_gc_list, &gc_list); spin_unlock_bh(&xfrm_state_gc_lock); + synchronize_rcu(); + hlist_for_each_entry_safe(x, tmp, &gc_list, gclist) xfrm_state_gc_destroy(x); } From b65e3d7be06fd8ff5236439254f338fe1a8d4bbd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 9 Aug 2016 12:16:07 +0200 Subject: [PATCH 0091/1715] xfrm: state: add sequence count to detect hash resizes Once xfrm_state_find is lockless we have to cope with a concurrent resize opertion. We use a sequence counter to block in case a resize is in progress and to detect if we might have missed a state that got moved to a new hash table. Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 8e373876924f..ac4037cf6a29 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -36,6 +36,7 @@ */ static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; +static __read_mostly seqcount_t xfrm_state_hash_generation = SEQCNT_ZERO(xfrm_state_hash_generation); static inline bool xfrm_state_hold_rcu(struct xfrm_state __rcu *x) { @@ -127,6 +128,7 @@ static void xfrm_hash_resize(struct work_struct *work) } spin_lock_bh(&net->xfrm.xfrm_state_lock); + write_seqcount_begin(&xfrm_state_hash_generation); nhashmask = (nsize / sizeof(struct hlist_head)) - 1U; for (i = net->xfrm.state_hmask; i >= 0; i--) @@ -143,6 +145,7 @@ static void xfrm_hash_resize(struct work_struct *work) net->xfrm.state_byspi = nspi; net->xfrm.state_hmask = nhashmask; + write_seqcount_end(&xfrm_state_hash_generation); spin_unlock_bh(&net->xfrm.xfrm_state_lock); osize = (ohashmask + 1) * sizeof(struct hlist_head); @@ -787,10 +790,13 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, struct xfrm_state *best = NULL; u32 mark = pol->mark.v & pol->mark.m; unsigned short encap_family = tmpl->encap_family; + unsigned int sequence; struct km_event c; to_put = NULL; + sequence = read_seqcount_begin(&xfrm_state_hash_generation); + spin_lock_bh(&net->xfrm.xfrm_state_lock); h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family); hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) { @@ -894,6 +900,15 @@ out: spin_unlock_bh(&net->xfrm.xfrm_state_lock); if (to_put) xfrm_state_put(to_put); + + if (read_seqcount_retry(&xfrm_state_hash_generation, sequence)) { + *err = -EAGAIN; + if (x) { + xfrm_state_put(x); + x = NULL; + } + } + return x; } From c8406998b80183ef87895ab1de4dbed8bb2d53a0 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 9 Aug 2016 12:16:08 +0200 Subject: [PATCH 0092/1715] xfrm: state: use rcu_deref and assign_pointer helpers Before xfrm_state_find() can use rcu_read_lock instead of xfrm_state_lock we need to switch users of the hash table to assign/obtain the pointers with the appropriate rcu helpers. Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index ac4037cf6a29..53e7867f9254 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -28,6 +28,9 @@ #include "xfrm_hash.h" +#define xfrm_state_deref_prot(table, net) \ + rcu_dereference_protected((table), lockdep_is_held(&(net)->xfrm.xfrm_state_lock)) + /* Each xfrm_state may be linked to two tables: 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl) @@ -131,18 +134,17 @@ static void xfrm_hash_resize(struct work_struct *work) write_seqcount_begin(&xfrm_state_hash_generation); nhashmask = (nsize / sizeof(struct hlist_head)) - 1U; + odst = xfrm_state_deref_prot(net->xfrm.state_bydst, net); for (i = net->xfrm.state_hmask; i >= 0; i--) - xfrm_hash_transfer(net->xfrm.state_bydst+i, ndst, nsrc, nspi, - nhashmask); + xfrm_hash_transfer(odst + i, ndst, nsrc, nspi, nhashmask); - odst = net->xfrm.state_bydst; - osrc = net->xfrm.state_bysrc; - ospi = net->xfrm.state_byspi; + osrc = xfrm_state_deref_prot(net->xfrm.state_bysrc, net); + ospi = xfrm_state_deref_prot(net->xfrm.state_byspi, net); ohashmask = net->xfrm.state_hmask; - net->xfrm.state_bydst = ndst; - net->xfrm.state_bysrc = nsrc; - net->xfrm.state_byspi = nspi; + rcu_assign_pointer(net->xfrm.state_bydst, ndst); + rcu_assign_pointer(net->xfrm.state_bysrc, nsrc); + rcu_assign_pointer(net->xfrm.state_byspi, nspi); net->xfrm.state_hmask = nhashmask; write_seqcount_end(&xfrm_state_hash_generation); From d737a5805581c6f99dad4caa9fdf80965d617d1a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 9 Aug 2016 12:16:09 +0200 Subject: [PATCH 0093/1715] xfrm: state: don't use lock anymore unless acquire operation is needed push the lock down, after earlier patches we can rely on rcu to make sure state struct won't go away. Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- include/net/netns/xfrm.h | 6 +++--- net/xfrm/xfrm_state.c | 6 ++++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 24cd3949a9a4..1ab51d188408 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -38,9 +38,9 @@ struct netns_xfrm { * mode. Also, it can be used by ah/esp icmp error handler to find * offending SA. */ - struct hlist_head *state_bydst; - struct hlist_head *state_bysrc; - struct hlist_head *state_byspi; + struct hlist_head __rcu *state_bydst; + struct hlist_head __rcu *state_bysrc; + struct hlist_head __rcu *state_byspi; unsigned int state_hmask; unsigned int state_num; struct work_struct state_hash_work; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 53e7867f9254..1a15b658a79e 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -799,7 +799,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, sequence = read_seqcount_begin(&xfrm_state_hash_generation); - spin_lock_bh(&net->xfrm.xfrm_state_lock); + rcu_read_lock(); h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family); hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) { if (x->props.family == encap_family && @@ -870,6 +870,7 @@ found: } if (km_query(x, tmpl, pol) == 0) { + spin_lock_bh(&net->xfrm.xfrm_state_lock); x->km.state = XFRM_STATE_ACQ; list_add(&x->km.all, &net->xfrm.state_all); hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h); @@ -883,6 +884,7 @@ found: tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL); net->xfrm.state_num++; xfrm_hash_grow_check(net, x->bydst.next != NULL); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); } else { x->km.state = XFRM_STATE_DEAD; to_put = x; @@ -899,7 +901,7 @@ out: } else { *err = acquire_in_progress ? -EAGAIN : error; } - spin_unlock_bh(&net->xfrm.xfrm_state_lock); + rcu_read_unlock(); if (to_put) xfrm_state_put(to_put); From 9c706a49d660653625d206f6972541c1f60ea2b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 10 Aug 2016 11:44:17 +0200 Subject: [PATCH 0094/1715] net: ipconfig: fix use after free MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ic_close_devs() calls kfree() for all devices's ic_device. Since commit 2647cffb2bc6 ("net: ipconfig: Support using "delayed" DHCP replies") the active device's ic_device is still used however to print the ipconfig summary which results in an oops if the memory is already changed. So delay freeing until after the autoconfig results are reported. Fixes: 2647cffb2bc6 ("net: ipconfig: Support using "delayed" DHCP replies") Reported-by: Geert Uytterhoeven Signed-off-by: Uwe Kleine-König Tested-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- net/ipv4/ipconfig.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 42cf629357b5..66c2fe602810 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -1492,14 +1492,6 @@ static int __init ip_auto_config(void) if (ic_defaults() < 0) return -1; - /* - * Close all network devices except the device we've - * autoconfigured and set up routes. - */ - ic_close_devs(); - if (ic_setup_if() < 0 || ic_setup_routes() < 0) - return -1; - /* * Record which protocol was actually used. */ @@ -1534,6 +1526,15 @@ static int __init ip_auto_config(void) pr_cont("\n"); #endif /* !SILENT */ + /* + * Close all network devices except the device we've + * autoconfigured and set up routes. + */ + ic_close_devs(); + if (ic_setup_if() < 0 || ic_setup_routes() < 0) + return -1; + + return 0; } From b89b815c32f205a772180969da0188665f7da9e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20S=C3=B6derlund?= Date: Wed, 10 Aug 2016 13:09:49 +0200 Subject: [PATCH 0095/1715] ravb: use proper names for suspend/resume functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The patch 'ravb: add sleep PM suspend/resume support' used incorrect function names containing 'runtime' for the suspend and resume functions. Reported-by: Sergei Shtylyov Signed-off-by: Niklas Söderlund Acked-by: Sergei Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/ravb_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 256565ea9cfc..d4809adeb048 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -2104,7 +2104,7 @@ static int ravb_remove(struct platform_device *pdev) } #ifdef CONFIG_PM -static int ravb_runtime_suspend(struct device *dev) +static int ravb_suspend(struct device *dev) { struct net_device *ndev = dev_get_drvdata(dev); int ret = 0; @@ -2117,7 +2117,7 @@ static int ravb_runtime_suspend(struct device *dev) return ret; } -static int ravb_runtime_resume(struct device *dev) +static int ravb_resume(struct device *dev) { struct net_device *ndev = dev_get_drvdata(dev); struct ravb_private *priv = netdev_priv(ndev); @@ -2165,7 +2165,7 @@ static int ravb_runtime_nop(struct device *dev) } static const struct dev_pm_ops ravb_dev_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(ravb_runtime_suspend, ravb_runtime_resume) + SET_SYSTEM_SLEEP_PM_OPS(ravb_suspend, ravb_resume) SET_RUNTIME_PM_OPS(ravb_runtime_nop, ravb_runtime_nop, NULL) }; From e87a8f24c9151d449ab46d82a504c1ebfea210f2 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Wed, 10 Aug 2016 11:03:35 +0200 Subject: [PATCH 0096/1715] net: resolve symbol conflicts with generic hashtable.h This is a preparatory patch for converting qdisc linked list into a hashtable. As we'll need to include hashtable.h in netdevice.h, we first have to make sure that this will not introduce symbol conflicts for any of the netdevice.h users. Reviewed-by: Cong Wang Signed-off-by: Jiri Kosina Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/davinci_emac.c | 14 +++++++------- net/ipv6/ip6_gre.c | 12 ++++++------ net/ipv6/ip6_tunnel.c | 10 +++++----- net/ipv6/ip6_vti.c | 10 +++++----- net/ipv6/sit.c | 10 +++++----- 5 files changed, 28 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index 727a79f3c7dd..2d6fc9a0fb21 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -597,14 +597,14 @@ static u32 hash_get(u8 *addr) } /** - * hash_add - Hash function to add mac addr from hash table + * emac_hash_add - Hash function to add mac addr from hash table * @priv: The DaVinci EMAC private adapter structure * @mac_addr: mac address to delete from hash table * * Adds mac address to the internal hash table * */ -static int hash_add(struct emac_priv *priv, u8 *mac_addr) +static int emac_hash_add(struct emac_priv *priv, u8 *mac_addr) { struct device *emac_dev = &priv->ndev->dev; u32 rc = 0; @@ -613,7 +613,7 @@ static int hash_add(struct emac_priv *priv, u8 *mac_addr) if (hash_value >= EMAC_NUM_MULTICAST_BITS) { if (netif_msg_drv(priv)) { - dev_err(emac_dev, "DaVinci EMAC: hash_add(): Invalid "\ + dev_err(emac_dev, "DaVinci EMAC: emac_hash_add(): Invalid "\ "Hash %08x, should not be greater than %08x", hash_value, (EMAC_NUM_MULTICAST_BITS - 1)); } @@ -639,14 +639,14 @@ static int hash_add(struct emac_priv *priv, u8 *mac_addr) } /** - * hash_del - Hash function to delete mac addr from hash table + * emac_hash_del - Hash function to delete mac addr from hash table * @priv: The DaVinci EMAC private adapter structure * @mac_addr: mac address to delete from hash table * * Removes mac address from the internal hash table * */ -static int hash_del(struct emac_priv *priv, u8 *mac_addr) +static int emac_hash_del(struct emac_priv *priv, u8 *mac_addr) { u32 hash_value; u32 hash_bit; @@ -696,10 +696,10 @@ static void emac_add_mcast(struct emac_priv *priv, u32 action, u8 *mac_addr) switch (action) { case EMAC_MULTICAST_ADD: - update = hash_add(priv, mac_addr); + update = emac_hash_add(priv, mac_addr); break; case EMAC_MULTICAST_DEL: - update = hash_del(priv, mac_addr); + update = emac_hash_del(priv, mac_addr); break; case EMAC_ALL_MULTI_SET: update = 1; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 776d145113e1..b375b5addb76 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -61,12 +61,12 @@ static bool log_ecn_error = true; module_param(log_ecn_error, bool, 0644); MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); -#define HASH_SIZE_SHIFT 5 -#define HASH_SIZE (1 << HASH_SIZE_SHIFT) +#define IP6_GRE_HASH_SIZE_SHIFT 5 +#define IP6_GRE_HASH_SIZE (1 << IP6_GRE_HASH_SIZE_SHIFT) static int ip6gre_net_id __read_mostly; struct ip6gre_net { - struct ip6_tnl __rcu *tunnels[4][HASH_SIZE]; + struct ip6_tnl __rcu *tunnels[4][IP6_GRE_HASH_SIZE]; struct net_device *fb_tunnel_dev; }; @@ -96,12 +96,12 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu); will match fallback tunnel. */ -#define HASH_KEY(key) (((__force u32)key^((__force u32)key>>4))&(HASH_SIZE - 1)) +#define HASH_KEY(key) (((__force u32)key^((__force u32)key>>4))&(IP6_GRE_HASH_SIZE - 1)) static u32 HASH_ADDR(const struct in6_addr *addr) { u32 hash = ipv6_addr_hash(addr); - return hash_32(hash, HASH_SIZE_SHIFT); + return hash_32(hash, IP6_GRE_HASH_SIZE_SHIFT); } #define tunnels_r_l tunnels[3] @@ -1089,7 +1089,7 @@ static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head) for (prio = 0; prio < 4; prio++) { int h; - for (h = 0; h < HASH_SIZE; h++) { + for (h = 0; h < IP6_GRE_HASH_SIZE; h++) { struct ip6_tnl *t; t = rtnl_dereference(ign->tunnels[prio][h]); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 7b0481e3738f..2050217df565 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -64,8 +64,8 @@ MODULE_LICENSE("GPL"); MODULE_ALIAS_RTNL_LINK("ip6tnl"); MODULE_ALIAS_NETDEV("ip6tnl0"); -#define HASH_SIZE_SHIFT 5 -#define HASH_SIZE (1 << HASH_SIZE_SHIFT) +#define IP6_TUNNEL_HASH_SIZE_SHIFT 5 +#define IP6_TUNNEL_HASH_SIZE (1 << IP6_TUNNEL_HASH_SIZE_SHIFT) static bool log_ecn_error = true; module_param(log_ecn_error, bool, 0644); @@ -75,7 +75,7 @@ static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2) { u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2); - return hash_32(hash, HASH_SIZE_SHIFT); + return hash_32(hash, IP6_TUNNEL_HASH_SIZE_SHIFT); } static int ip6_tnl_dev_init(struct net_device *dev); @@ -87,7 +87,7 @@ struct ip6_tnl_net { /* the IPv6 tunnel fallback device */ struct net_device *fb_tnl_dev; /* lists for storing tunnels in use */ - struct ip6_tnl __rcu *tnls_r_l[HASH_SIZE]; + struct ip6_tnl __rcu *tnls_r_l[IP6_TUNNEL_HASH_SIZE]; struct ip6_tnl __rcu *tnls_wc[1]; struct ip6_tnl __rcu **tnls[2]; }; @@ -2031,7 +2031,7 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct net *net) if (dev->rtnl_link_ops == &ip6_link_ops) unregister_netdevice_queue(dev, &list); - for (h = 0; h < HASH_SIZE; h++) { + for (h = 0; h < IP6_TUNNEL_HASH_SIZE; h++) { t = rtnl_dereference(ip6n->tnls_r_l[h]); while (t) { /* If dev is in the same netns, it has already diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index d90a11f14040..cc7e05898307 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -50,14 +50,14 @@ #include #include -#define HASH_SIZE_SHIFT 5 -#define HASH_SIZE (1 << HASH_SIZE_SHIFT) +#define IP6_VTI_HASH_SIZE_SHIFT 5 +#define IP6_VTI_HASH_SIZE (1 << IP6_VTI_HASH_SIZE_SHIFT) static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2) { u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2); - return hash_32(hash, HASH_SIZE_SHIFT); + return hash_32(hash, IP6_VTI_HASH_SIZE_SHIFT); } static int vti6_dev_init(struct net_device *dev); @@ -69,7 +69,7 @@ struct vti6_net { /* the vti6 tunnel fallback device */ struct net_device *fb_tnl_dev; /* lists for storing tunnels in use */ - struct ip6_tnl __rcu *tnls_r_l[HASH_SIZE]; + struct ip6_tnl __rcu *tnls_r_l[IP6_VTI_HASH_SIZE]; struct ip6_tnl __rcu *tnls_wc[1]; struct ip6_tnl __rcu **tnls[2]; }; @@ -1040,7 +1040,7 @@ static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n) struct ip6_tnl *t; LIST_HEAD(list); - for (h = 0; h < HASH_SIZE; h++) { + for (h = 0; h < IP6_VTI_HASH_SIZE; h++) { t = rtnl_dereference(ip6n->tnls_r_l[h]); while (t) { unregister_netdevice_queue(t->dev, &list); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 182b6a9be29d..696edeeff8bc 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -62,7 +62,7 @@ For comments look at net/ipv4/ip_gre.c --ANK */ -#define HASH_SIZE 16 +#define IP6_SIT_HASH_SIZE 16 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) static bool log_ecn_error = true; @@ -78,9 +78,9 @@ static struct rtnl_link_ops sit_link_ops __read_mostly; static int sit_net_id __read_mostly; struct sit_net { - struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE]; - struct ip_tunnel __rcu *tunnels_r[HASH_SIZE]; - struct ip_tunnel __rcu *tunnels_l[HASH_SIZE]; + struct ip_tunnel __rcu *tunnels_r_l[IP6_SIT_HASH_SIZE]; + struct ip_tunnel __rcu *tunnels_r[IP6_SIT_HASH_SIZE]; + struct ip_tunnel __rcu *tunnels_l[IP6_SIT_HASH_SIZE]; struct ip_tunnel __rcu *tunnels_wc[1]; struct ip_tunnel __rcu **tunnels[4]; @@ -1783,7 +1783,7 @@ static void __net_exit sit_destroy_tunnels(struct net *net, for (prio = 1; prio < 4; prio++) { int h; - for (h = 0; h < HASH_SIZE; h++) { + for (h = 0; h < IP6_SIT_HASH_SIZE; h++) { struct ip_tunnel *t; t = rtnl_dereference(sitn->tunnels[prio][h]); From 59cc1f61f09c26ce82c308e24b76141e1efe99f8 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Wed, 10 Aug 2016 11:05:15 +0200 Subject: [PATCH 0097/1715] net: sched: convert qdisc linked list to hashtable Convert the per-device linked list into a hashtable. The primary motivation for this change is that currently, we're not tracking all the qdiscs in hierarchy (e.g. excluding default qdiscs), as the lookup performed over the linked list by qdisc_match_from_root() is rather expensive. The ultimate goal is to get rid of hidden qdiscs completely, which will bring much more determinism in user experience. Reviewed-by: Cong Wang Signed-off-by: Jiri Kosina Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++++ include/net/pkt_sched.h | 4 ++-- include/net/sch_generic.h | 2 +- net/core/dev.c | 3 +++ net/sched/sch_api.c | 23 +++++++++++++---------- net/sched/sch_generic.c | 8 +++++--- net/sched/sch_mq.c | 2 +- net/sched/sch_mqprio.c | 2 +- 8 files changed, 30 insertions(+), 18 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 076df5360ba5..96e0b6cd964e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -52,6 +52,7 @@ #include #include #include +#include struct netpoll_info; struct device; @@ -1800,6 +1801,9 @@ struct net_device { unsigned int num_tx_queues; unsigned int real_num_tx_queues; struct Qdisc *qdisc; +#ifdef CONFIG_NET_SCHED + DECLARE_HASHTABLE (qdisc_hash, 4); +#endif unsigned long tx_queue_len; spinlock_t tx_global_lock; int watchdog_timeo; diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 7caa99b482c6..cd334c9584e9 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -90,8 +90,8 @@ int unregister_qdisc(struct Qdisc_ops *qops); void qdisc_get_default(char *id, size_t len); int qdisc_set_default(const char *id); -void qdisc_list_add(struct Qdisc *q); -void qdisc_list_del(struct Qdisc *q); +void qdisc_hash_add(struct Qdisc *q); +void qdisc_hash_del(struct Qdisc *q); struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle); struct Qdisc *qdisc_lookup_class(struct net_device *dev, u32 handle); struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 909aff2db2b3..0d501779cc68 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -61,7 +61,7 @@ struct Qdisc { u32 limit; const struct Qdisc_ops *ops; struct qdisc_size_table __rcu *stab; - struct list_head list; + struct hlist_node hash; u32 handle; u32 parent; void *u32_node; diff --git a/net/core/dev.c b/net/core/dev.c index 4ce07dc25573..936ea0054f57 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7629,6 +7629,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, INIT_LIST_HEAD(&dev->all_adj_list.lower); INIT_LIST_HEAD(&dev->ptype_all); INIT_LIST_HEAD(&dev->ptype_specific); +#ifdef CONFIG_NET_SCHED + hash_init(dev->qdisc_hash); +#endif dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM; setup(dev); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 12ebde845523..25aada7b095c 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -263,33 +264,33 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) root->handle == handle) return root; - list_for_each_entry_rcu(q, &root->list, list) { + hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle) { if (q->handle == handle) return q; } return NULL; } -void qdisc_list_add(struct Qdisc *q) +void qdisc_hash_add(struct Qdisc *q) { if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { struct Qdisc *root = qdisc_dev(q)->qdisc; WARN_ON_ONCE(root == &noop_qdisc); ASSERT_RTNL(); - list_add_tail_rcu(&q->list, &root->list); + hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle); } } -EXPORT_SYMBOL(qdisc_list_add); +EXPORT_SYMBOL(qdisc_hash_add); -void qdisc_list_del(struct Qdisc *q) +void qdisc_hash_del(struct Qdisc *q) { if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { ASSERT_RTNL(); - list_del_rcu(&q->list); + hash_del_rcu(&q->hash); } } -EXPORT_SYMBOL(qdisc_list_del); +EXPORT_SYMBOL(qdisc_hash_del); struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) { @@ -998,7 +999,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, goto err_out4; } - qdisc_list_add(sch); + qdisc_hash_add(sch); return sch; } @@ -1435,6 +1436,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, { int ret = 0, q_idx = *q_idx_p; struct Qdisc *q; + int b; if (!root) return 0; @@ -1449,7 +1451,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, goto done; q_idx++; } - list_for_each_entry(q, &root->list, list) { + hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) { if (q_idx < s_q_idx) { q_idx++; continue; @@ -1765,6 +1767,7 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb, int *t_p, int s_t) { struct Qdisc *q; + int b; if (!root) return 0; @@ -1772,7 +1775,7 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb, if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0) return -1; - list_for_each_entry(q, &root->list, list) { + hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) { if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0) return -1; } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index e95b67cd5718..18faecc3f13e 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -423,7 +423,6 @@ struct Qdisc noop_qdisc = { .dequeue = noop_dequeue, .flags = TCQ_F_BUILTIN, .ops = &noop_qdisc_ops, - .list = LIST_HEAD_INIT(noop_qdisc.list), .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), .dev_queue = &noop_netdev_queue, .running = SEQCNT_ZERO(noop_qdisc.running), @@ -613,7 +612,6 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p); sch->padded = (char *) sch - (char *) p; } - INIT_LIST_HEAD(&sch->list); skb_queue_head_init(&sch->q); spin_lock_init(&sch->busylock); @@ -700,7 +698,7 @@ void qdisc_destroy(struct Qdisc *qdisc) return; #ifdef CONFIG_NET_SCHED - qdisc_list_del(qdisc); + qdisc_hash_del(qdisc); qdisc_put_stab(rtnl_dereference(qdisc->stab)); #endif @@ -788,6 +786,10 @@ static void attach_default_qdiscs(struct net_device *dev) qdisc->ops->attach(qdisc); } } +#ifdef CONFIG_NET_SCHED + if (dev->qdisc) + qdisc_hash_add(dev->qdisc); +#endif } static void transition_one_qdisc(struct net_device *dev, diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index b9439827c172..2bc8d7f8df16 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -88,7 +88,7 @@ static void mq_attach(struct Qdisc *sch) qdisc_destroy(old); #ifdef CONFIG_NET_SCHED if (ntx < dev->real_num_tx_queues) - qdisc_list_add(qdisc); + qdisc_hash_add(qdisc); #endif } diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 549c66359924..b5c502c78143 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -182,7 +182,7 @@ static void mqprio_attach(struct Qdisc *sch) if (old) qdisc_destroy(old); if (ntx < dev->real_num_tx_queues) - qdisc_list_add(qdisc); + qdisc_hash_add(qdisc); } kfree(priv->qdiscs); priv->qdiscs = NULL; From ab10dccb11608b96b43b557c12a5ad867723e503 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Tue, 9 Aug 2016 12:38:24 +0800 Subject: [PATCH 0098/1715] rps: Inspect PPTP encapsulated by GRE to get flow hash The PPTP is encapsulated by GRE header with that GRE_VERSION bits must contain one. But current GRE RPS needs the GRE_VERSION must be zero. So RPS does not work for PPTP traffic. In my test environment, there are four MIPS cores, and all traffic are passed through by PPTP. As a result, only one core is 100% busy while other three cores are very idle. After this patch, the usage of four cores are balanced well. Signed-off-by: Gao Feng Reviewed-by: Philip Prindeville Signed-off-by: David S. Miller --- drivers/net/ppp/pptp.c | 36 +---------- include/net/gre.h | 10 ++- include/net/pptp.h | 40 ++++++++++++ include/uapi/linux/if_tunnel.h | 7 ++- net/core/flow_dissector.c | 109 +++++++++++++++++++++++---------- 5 files changed, 133 insertions(+), 69 deletions(-) create mode 100644 include/net/pptp.h diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c index ae0905ed4a32..3e68dbc0af7f 100644 --- a/drivers/net/ppp/pptp.c +++ b/drivers/net/ppp/pptp.c @@ -37,6 +37,7 @@ #include #include #include +#include #include @@ -53,41 +54,6 @@ static struct proto pptp_sk_proto __read_mostly; static const struct ppp_channel_ops pptp_chan_ops; static const struct proto_ops pptp_ops; -#define PPP_LCP_ECHOREQ 0x09 -#define PPP_LCP_ECHOREP 0x0A -#define SC_RCV_BITS (SC_RCV_B7_1|SC_RCV_B7_0|SC_RCV_ODDP|SC_RCV_EVNP) - -#define MISSING_WINDOW 20 -#define WRAPPED(curseq, lastseq)\ - ((((curseq) & 0xffffff00) == 0) &&\ - (((lastseq) & 0xffffff00) == 0xffffff00)) - -#define PPTP_GRE_PROTO 0x880B -#define PPTP_GRE_VER 0x1 - -#define PPTP_GRE_FLAG_C 0x80 -#define PPTP_GRE_FLAG_R 0x40 -#define PPTP_GRE_FLAG_K 0x20 -#define PPTP_GRE_FLAG_S 0x10 -#define PPTP_GRE_FLAG_A 0x80 - -#define PPTP_GRE_IS_C(f) ((f)&PPTP_GRE_FLAG_C) -#define PPTP_GRE_IS_R(f) ((f)&PPTP_GRE_FLAG_R) -#define PPTP_GRE_IS_K(f) ((f)&PPTP_GRE_FLAG_K) -#define PPTP_GRE_IS_S(f) ((f)&PPTP_GRE_FLAG_S) -#define PPTP_GRE_IS_A(f) ((f)&PPTP_GRE_FLAG_A) - -#define PPTP_HEADER_OVERHEAD (2+sizeof(struct pptp_gre_header)) -struct pptp_gre_header { - u8 flags; - u8 ver; - __be16 protocol; - __be16 payload_len; - __be16 call_id; - __be32 seq; - __be32 ack; -} __packed; - static struct pppox_sock *lookup_chan(u16 call_id, __be32 s_addr) { struct pppox_sock *sock; diff --git a/include/net/gre.h b/include/net/gre.h index 7a54a31d1d4c..8962e1e68449 100644 --- a/include/net/gre.h +++ b/include/net/gre.h @@ -7,7 +7,15 @@ struct gre_base_hdr { __be16 flags; __be16 protocol; -}; +} __packed; + +struct gre_full_hdr { + struct gre_base_hdr fixed_header; + __be16 csum; + __be16 reserved1; + __be32 key; + __be32 seq; +} __packed; #define GRE_HEADER_SECTION 4 #define GREPROTO_CISCO 0 diff --git a/include/net/pptp.h b/include/net/pptp.h new file mode 100644 index 000000000000..301d3e2ba1f9 --- /dev/null +++ b/include/net/pptp.h @@ -0,0 +1,40 @@ +#ifndef _NET_PPTP_H +#define _NET_PPTP_H + +#define PPP_LCP_ECHOREQ 0x09 +#define PPP_LCP_ECHOREP 0x0A +#define SC_RCV_BITS (SC_RCV_B7_1|SC_RCV_B7_0|SC_RCV_ODDP|SC_RCV_EVNP) + +#define MISSING_WINDOW 20 +#define WRAPPED(curseq, lastseq)\ + ((((curseq) & 0xffffff00) == 0) &&\ + (((lastseq) & 0xffffff00) == 0xffffff00)) + +#define PPTP_GRE_PROTO 0x880B +#define PPTP_GRE_VER 0x1 + +#define PPTP_GRE_FLAG_C 0x80 +#define PPTP_GRE_FLAG_R 0x40 +#define PPTP_GRE_FLAG_K 0x20 +#define PPTP_GRE_FLAG_S 0x10 +#define PPTP_GRE_FLAG_A 0x80 + +#define PPTP_GRE_IS_C(f) ((f)&PPTP_GRE_FLAG_C) +#define PPTP_GRE_IS_R(f) ((f)&PPTP_GRE_FLAG_R) +#define PPTP_GRE_IS_K(f) ((f)&PPTP_GRE_FLAG_K) +#define PPTP_GRE_IS_S(f) ((f)&PPTP_GRE_FLAG_S) +#define PPTP_GRE_IS_A(f) ((f)&PPTP_GRE_FLAG_A) + +#define PPTP_HEADER_OVERHEAD (2+sizeof(struct pptp_gre_header)) +struct pptp_gre_header { + u8 flags; + u8 ver; + __be16 protocol; + __be16 payload_len; + __be16 call_id; + __be32 seq; + __be32 ack; +} __packed; + + +#endif diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index 1046f5515174..60dbb200de60 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -24,9 +24,14 @@ #define GRE_SEQ __cpu_to_be16(0x1000) #define GRE_STRICT __cpu_to_be16(0x0800) #define GRE_REC __cpu_to_be16(0x0700) -#define GRE_FLAGS __cpu_to_be16(0x00F8) +#define GRE_ACK __cpu_to_be16(0x0080) +#define GRE_FLAGS __cpu_to_be16(0x0078) #define GRE_VERSION __cpu_to_be16(0x0007) +#define GRE_VERSION_1 __cpu_to_be16(0x0001) +#define GRE_PROTO_PPP __cpu_to_be16(0x880b) +#define GRE_PPTP_KEY_MASK __cpu_to_be32(0xffff) + struct ip_tunnel_parm { char name[IFNAMSIZ]; int link; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 61ad43f61c5e..91028ae2fb01 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -6,6 +6,8 @@ #include #include #include +#include +#include #include #include #include @@ -338,32 +340,42 @@ mpls: ip_proto_again: switch (ip_proto) { case IPPROTO_GRE: { - struct gre_hdr { - __be16 flags; - __be16 proto; - } *hdr, _hdr; + struct gre_base_hdr *hdr, _hdr; + u16 gre_ver; + int offset = 0; hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); if (!hdr) goto out_bad; - /* - * Only look inside GRE if version zero and no - * routing - */ - if (hdr->flags & (GRE_VERSION | GRE_ROUTING)) + + /* Only look inside GRE without routing */ + if (hdr->flags & GRE_ROUTING) break; - proto = hdr->proto; - nhoff += 4; + /* Only look inside GRE for version 0 and 1 */ + gre_ver = ntohs(hdr->flags & GRE_VERSION); + if (gre_ver > 1) + break; + + proto = hdr->protocol; + if (gre_ver) { + /* Version1 must be PPTP, and check the flags */ + if (!(proto == GRE_PROTO_PPP && (hdr->flags & GRE_KEY))) + break; + } + + offset += sizeof(struct gre_base_hdr); + if (hdr->flags & GRE_CSUM) - nhoff += 4; + offset += sizeof(((struct gre_full_hdr *)0)->csum) + + sizeof(((struct gre_full_hdr *)0)->reserved1); + if (hdr->flags & GRE_KEY) { const __be32 *keyid; __be32 _keyid; - keyid = __skb_header_pointer(skb, nhoff, sizeof(_keyid), + keyid = __skb_header_pointer(skb, nhoff + offset, sizeof(_keyid), data, hlen, &_keyid); - if (!keyid) goto out_bad; @@ -372,32 +384,65 @@ ip_proto_again: key_keyid = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_GRE_KEYID, target_container); - key_keyid->keyid = *keyid; + if (gre_ver == 0) + key_keyid->keyid = *keyid; + else + key_keyid->keyid = *keyid & GRE_PPTP_KEY_MASK; } - nhoff += 4; + offset += sizeof(((struct gre_full_hdr *)0)->key); } + if (hdr->flags & GRE_SEQ) - nhoff += 4; - if (proto == htons(ETH_P_TEB)) { - const struct ethhdr *eth; - struct ethhdr _eth; + offset += sizeof(((struct pptp_gre_header *)0)->seq); - eth = __skb_header_pointer(skb, nhoff, - sizeof(_eth), - data, hlen, &_eth); - if (!eth) + if (gre_ver == 0) { + if (proto == htons(ETH_P_TEB)) { + const struct ethhdr *eth; + struct ethhdr _eth; + + eth = __skb_header_pointer(skb, nhoff + offset, + sizeof(_eth), + data, hlen, &_eth); + if (!eth) + goto out_bad; + proto = eth->h_proto; + offset += sizeof(*eth); + + /* Cap headers that we access via pointers at the + * end of the Ethernet header as our maximum alignment + * at that point is only 2 bytes. + */ + if (NET_IP_ALIGN) + hlen = (nhoff + offset); + } + } else { /* version 1, must be PPTP */ + u8 _ppp_hdr[PPP_HDRLEN]; + u8 *ppp_hdr; + + if (hdr->flags & GRE_ACK) + offset += sizeof(((struct pptp_gre_header *)0)->ack); + + ppp_hdr = skb_header_pointer(skb, nhoff + offset, + sizeof(_ppp_hdr), _ppp_hdr); + if (!ppp_hdr) goto out_bad; - proto = eth->h_proto; - nhoff += sizeof(*eth); - /* Cap headers that we access via pointers at the - * end of the Ethernet header as our maximum alignment - * at that point is only 2 bytes. - */ - if (NET_IP_ALIGN) - hlen = nhoff; + switch (PPP_PROTOCOL(ppp_hdr)) { + case PPP_IP: + proto = htons(ETH_P_IP); + break; + case PPP_IPV6: + proto = htons(ETH_P_IPV6); + break; + default: + /* Could probably catch some more like MPLS */ + break; + } + + offset += PPP_HDRLEN; } + nhoff += offset; key_control->flags |= FLOW_DIS_ENCAPSULATION; if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) goto out_good; From 27e9e10391259cbe2e52d6f0ac5b16b1d898d901 Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Wed, 10 Aug 2016 02:22:32 +0300 Subject: [PATCH 0099/1715] net: ethernet: ti: cpsw: simplify submit routine As second net dev is created only in case of dual_emac mode, port number can be figured out in simpler way. Also no need to pass redundant ndev struct. Reviewed-by: Mugunthan V N Reviewed-by: Grygorii Strashko Signed-off-by: Ivan Khoronzhuk Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index c51f34693eae..8972bf6870ed 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1065,19 +1065,11 @@ static int cpsw_common_res_usage_state(struct cpsw_priv *priv) return usage_count; } -static inline int cpsw_tx_packet_submit(struct net_device *ndev, - struct cpsw_priv *priv, struct sk_buff *skb) +static inline int cpsw_tx_packet_submit(struct cpsw_priv *priv, + struct sk_buff *skb) { - if (!priv->data.dual_emac) - return cpdma_chan_submit(priv->txch, skb, skb->data, - skb->len, 0); - - if (ndev == cpsw_get_slave_ndev(priv, 0)) - return cpdma_chan_submit(priv->txch, skb, skb->data, - skb->len, 1); - else - return cpdma_chan_submit(priv->txch, skb, skb->data, - skb->len, 2); + return cpdma_chan_submit(priv->txch, skb, skb->data, skb->len, + priv->emac_port + priv->data.dual_emac); } static inline void cpsw_add_dual_emac_def_ale_entries( @@ -1406,7 +1398,7 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff *skb, skb_tx_timestamp(skb); - ret = cpsw_tx_packet_submit(ndev, priv, skb); + ret = cpsw_tx_packet_submit(priv, skb); if (unlikely(ret != 0)) { cpsw_err(priv, tx_err, "desc submit failed\n"); goto fail; From 0a440f8f4f75598235a71a365fb0dbe0056ad75c Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Wed, 10 Aug 2016 02:22:33 +0300 Subject: [PATCH 0100/1715] net: ethernet: ti: cpsw: remove intr dbg msg from poll handlers At poll handler no possibility to figure out which network device is handling packets, as cpdma channels are common for both network devices in dual_emac mode. Currently, the messages are printed only for one device, in fact, there is two. This print msg is incorrect and seems is not very useful, so drop it from poll handler. Reviewed-by: Mugunthan V N Signed-off-by: Ivan Khoronzhuk Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 8972bf6870ed..21cf3671c79e 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -793,9 +793,6 @@ static int cpsw_tx_poll(struct napi_struct *napi_tx, int budget) } } - if (num_tx) - cpsw_dbg(priv, intr, "poll %d tx pkts\n", num_tx); - return num_tx; } @@ -814,9 +811,6 @@ static int cpsw_rx_poll(struct napi_struct *napi_rx, int budget) } } - if (num_rx) - cpsw_dbg(priv, intr, "poll %d rx pkts\n", num_rx); - return num_rx; } From 6f1f58361fa249ce10ca14256e473e6881e4efee Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Wed, 10 Aug 2016 02:22:34 +0300 Subject: [PATCH 0101/1715] net: ethernet: ti: cpsw: remove priv from cpsw_get_slave_port() parameters list There is no need in priv here. Reviewed-by: Mugunthan V N Reviewed-by: Grygorii Strashko Signed-off-by: Ivan Khoronzhuk Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 21cf3671c79e..4f6a4c100810 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -525,7 +525,7 @@ static const struct cpsw_stats cpsw_gstrings_stats[] = { if (priv->data.dual_emac) { \ struct cpsw_slave *slave = priv->slaves + \ priv->emac_port; \ - int slave_port = cpsw_get_slave_port(priv, \ + int slave_port = cpsw_get_slave_port( \ slave->slave_num); \ cpsw_ale_add_mcast(priv->ale, addr, \ 1 << slave_port | ALE_PORT_HOST, \ @@ -537,7 +537,7 @@ static const struct cpsw_stats cpsw_gstrings_stats[] = { } \ } while (0) -static inline int cpsw_get_slave_port(struct cpsw_priv *priv, u32 slave_num) +static inline int cpsw_get_slave_port(u32 slave_num) { return slave_num + 1; } @@ -847,7 +847,7 @@ static void _cpsw_adjust_link(struct cpsw_slave *slave, if (!phy) return; - slave_port = cpsw_get_slave_port(priv, slave->slave_num); + slave_port = cpsw_get_slave_port(slave->slave_num); if (phy->link) { mac_control = priv->data.mac_control; @@ -1118,7 +1118,7 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv) slave->mac_control = 0; /* no link yet */ - slave_port = cpsw_get_slave_port(priv, slave->slave_num); + slave_port = cpsw_get_slave_port(slave->slave_num); if (priv->data.dual_emac) cpsw_add_dual_emac_def_ale_entries(priv, slave, slave_port); @@ -1220,7 +1220,7 @@ static void cpsw_slave_stop(struct cpsw_slave *slave, struct cpsw_priv *priv) { u32 slave_port; - slave_port = cpsw_get_slave_port(priv, slave->slave_num); + slave_port = cpsw_get_slave_port(slave->slave_num); if (!slave->phy) return; From ef4183a1d75b15c3bbd6e7f2b14fc5480d740bff Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Wed, 10 Aug 2016 02:22:35 +0300 Subject: [PATCH 0102/1715] net: ethernet: ti: cpsw: remove clk var from priv There is no need to hold link to clk, it's used only once while probe. Reviewed-by: Mugunthan V N Reviewed-by: Grygorii Strashko Signed-off-by: Ivan Khoronzhuk Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 4f6a4c100810..0b6958d6834e 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -379,7 +379,6 @@ struct cpsw_priv { u32 coal_intvl; u32 bus_freq_mhz; int rx_packet_max; - struct clk *clk; u8 mac_addr[ETH_ALEN]; struct cpsw_slave *slaves; struct cpdma_ctlr *dma; @@ -2177,8 +2176,6 @@ static int cpsw_probe_dual_emac(struct platform_device *pdev, memcpy(ndev->dev_addr, priv_sl2->mac_addr, ETH_ALEN); priv_sl2->slaves = priv->slaves; - priv_sl2->clk = priv->clk; - priv_sl2->coal_intvl = 0; priv_sl2->bus_freq_mhz = priv->bus_freq_mhz; @@ -2256,6 +2253,7 @@ MODULE_DEVICE_TABLE(of, cpsw_of_mtable); static int cpsw_probe(struct platform_device *pdev) { + struct clk *clk; struct cpsw_platform_data *data; struct net_device *ndev; struct cpsw_priv *priv; @@ -2334,14 +2332,14 @@ static int cpsw_probe(struct platform_device *pdev) priv->slaves[0].ndev = ndev; priv->emac_port = 0; - priv->clk = devm_clk_get(&pdev->dev, "fck"); - if (IS_ERR(priv->clk)) { + clk = devm_clk_get(&pdev->dev, "fck"); + if (IS_ERR(clk)) { dev_err(priv->dev, "fck is not found\n"); ret = -ENODEV; goto clean_runtime_disable_ret; } priv->coal_intvl = 0; - priv->bus_freq_mhz = clk_get_rate(priv->clk) / 1000000; + priv->bus_freq_mhz = clk_get_rate(clk) / 1000000; ss_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); ss_regs = devm_ioremap_resource(&pdev->dev, ss_res); From 82b52104a31a96689fe1931180c66dd699ad5fc1 Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Wed, 10 Aug 2016 02:22:36 +0300 Subject: [PATCH 0103/1715] net: ethernet: ti: cpsw: don't check slave num in runtime No need to check const slave num in runtime for every packet, and ndev for slaves w/o ndev is anyway NULL. So remove redundant check and macro. Reviewed-by: Mugunthan V N Signed-off-by: Ivan Khoronzhuk Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 0b6958d6834e..cfbb1f2becdd 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -497,9 +497,6 @@ static const struct cpsw_stats cpsw_gstrings_stats[] = { n; n--) \ (func)(slave++, ##arg); \ } while (0) -#define cpsw_get_slave_ndev(priv, __slave_no__) \ - ((__slave_no__ < priv->data.slaves) ? \ - priv->slaves[__slave_no__].ndev : NULL) #define cpsw_get_slave_priv(priv, __slave_no__) \ (((__slave_no__ < priv->data.slaves) && \ (priv->slaves[__slave_no__].ndev)) ? \ @@ -510,11 +507,11 @@ static const struct cpsw_stats cpsw_gstrings_stats[] = { if (!priv->data.dual_emac) \ break; \ if (CPDMA_RX_SOURCE_PORT(status) == 1) { \ - ndev = cpsw_get_slave_ndev(priv, 0); \ + ndev = priv->slaves[0].ndev; \ priv = netdev_priv(ndev); \ skb->dev = ndev; \ } else if (CPDMA_RX_SOURCE_PORT(status) == 2) { \ - ndev = cpsw_get_slave_ndev(priv, 1); \ + ndev = priv->slaves[1].ndev; \ priv = netdev_priv(ndev); \ skb->dev = ndev; \ } \ @@ -2561,7 +2558,7 @@ static int cpsw_remove(struct platform_device *pdev) } if (priv->data.dual_emac) - unregister_netdev(cpsw_get_slave_ndev(priv, 1)); + unregister_netdev(priv->slaves[1].ndev); unregister_netdev(ndev); cpsw_ale_destroy(priv->ale); @@ -2570,7 +2567,7 @@ static int cpsw_remove(struct platform_device *pdev) pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); if (priv->data.dual_emac) - free_netdev(cpsw_get_slave_ndev(priv, 1)); + free_netdev(priv->slaves[1].ndev); free_netdev(ndev); return 0; } From 649a1688c96098a702e827a073542b29739b4f88 Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Wed, 10 Aug 2016 02:22:37 +0300 Subject: [PATCH 0104/1715] net: ethernet: ti: cpsw: create common struct to hold shared driver data This patch simply create holder for common data and as a start moves pdev var to it. Signed-off-by: Ivan Khoronzhuk Reviewed-by: Mugunthan V N Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 62 +++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index cfbb1f2becdd..3ccf577dd46d 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -363,8 +363,11 @@ static inline void slave_write(struct cpsw_slave *slave, u32 val, u32 offset) __raw_writel(val, slave->regs + offset); } -struct cpsw_priv { +struct cpsw_common { struct platform_device *pdev; +}; + +struct cpsw_priv { struct net_device *ndev; struct napi_struct napi_rx; struct napi_struct napi_tx; @@ -394,6 +397,7 @@ struct cpsw_priv { u32 num_irqs; struct cpts *cpts; u32 emac_port; + struct cpsw_common *cpsw; }; struct cpsw_stats { @@ -484,6 +488,7 @@ static const struct cpsw_stats cpsw_gstrings_stats[] = { #define CPSW_STATS_LEN ARRAY_SIZE(cpsw_gstrings_stats) +#define ndev_to_cpsw(ndev) (((struct cpsw_priv *)netdev_priv(ndev))->cpsw) #define napi_to_priv(napi) container_of(napi, struct cpsw_priv, napi) #define for_each_slave(priv, func, arg...) \ do { \ @@ -1091,6 +1096,7 @@ static void soft_reset_slave(struct cpsw_slave *slave) static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv) { u32 slave_port; + struct cpsw_common *cpsw = priv->cpsw; soft_reset_slave(slave); @@ -1149,7 +1155,7 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv) phy_start(slave->phy); /* Configure GMII_SEL register */ - cpsw_phy_sel(&priv->pdev->dev, slave->phy->interface, slave->slave_num); + cpsw_phy_sel(&cpsw->pdev->dev, slave->phy->interface, slave->slave_num); } static inline void cpsw_add_default_vlan(struct cpsw_priv *priv) @@ -1231,12 +1237,13 @@ static void cpsw_slave_stop(struct cpsw_slave *slave, struct cpsw_priv *priv) static int cpsw_ndo_open(struct net_device *ndev) { struct cpsw_priv *priv = netdev_priv(ndev); + struct cpsw_common *cpsw = priv->cpsw; int i, ret; u32 reg; - ret = pm_runtime_get_sync(&priv->pdev->dev); + ret = pm_runtime_get_sync(&cpsw->pdev->dev); if (ret < 0) { - pm_runtime_put_noidle(&priv->pdev->dev); + pm_runtime_put_noidle(&cpsw->pdev->dev); return ret; } @@ -1313,7 +1320,7 @@ static int cpsw_ndo_open(struct net_device *ndev) */ cpsw_info(priv, ifup, "submitted %d rx descriptors\n", i); - if (cpts_register(&priv->pdev->dev, priv->cpts, + if (cpts_register(&cpsw->pdev->dev, priv->cpts, priv->data.cpts_clock_mult, priv->data.cpts_clock_shift)) dev_err(priv->dev, "error registering cpts device\n"); @@ -1338,7 +1345,7 @@ static int cpsw_ndo_open(struct net_device *ndev) err_cleanup: cpdma_ctlr_stop(priv->dma); for_each_slave(priv, cpsw_slave_stop, priv); - pm_runtime_put_sync(&priv->pdev->dev); + pm_runtime_put_sync(&cpsw->pdev->dev); netif_carrier_off(priv->ndev); return ret; } @@ -1346,6 +1353,7 @@ err_cleanup: static int cpsw_ndo_stop(struct net_device *ndev) { struct cpsw_priv *priv = netdev_priv(ndev); + struct cpsw_common *cpsw = priv->cpsw; cpsw_info(priv, ifdown, "shutting down cpsw device\n"); netif_stop_queue(priv->ndev); @@ -1362,7 +1370,7 @@ static int cpsw_ndo_stop(struct net_device *ndev) cpsw_ale_stop(priv->ale); } for_each_slave(priv, cpsw_slave_stop, priv); - pm_runtime_put_sync(&priv->pdev->dev); + pm_runtime_put_sync(&cpsw->pdev->dev); if (priv->data.dual_emac) priv->slaves[priv->emac_port].open_stat = false; return 0; @@ -1594,6 +1602,7 @@ static int cpsw_ndo_set_mac_address(struct net_device *ndev, void *p) { struct cpsw_priv *priv = netdev_priv(ndev); struct sockaddr *addr = (struct sockaddr *)p; + struct cpsw_common *cpsw = priv->cpsw; int flags = 0; u16 vid = 0; int ret; @@ -1601,9 +1610,9 @@ static int cpsw_ndo_set_mac_address(struct net_device *ndev, void *p) if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - ret = pm_runtime_get_sync(&priv->pdev->dev); + ret = pm_runtime_get_sync(&cpsw->pdev->dev); if (ret < 0) { - pm_runtime_put_noidle(&priv->pdev->dev); + pm_runtime_put_noidle(&cpsw->pdev->dev); return ret; } @@ -1621,7 +1630,7 @@ static int cpsw_ndo_set_mac_address(struct net_device *ndev, void *p) memcpy(ndev->dev_addr, priv->mac_addr, ETH_ALEN); for_each_slave(priv, cpsw_set_slave_mac, priv); - pm_runtime_put(&priv->pdev->dev); + pm_runtime_put(&cpsw->pdev->dev); return 0; } @@ -1687,14 +1696,15 @@ static int cpsw_ndo_vlan_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid) { struct cpsw_priv *priv = netdev_priv(ndev); + struct cpsw_common *cpsw = priv->cpsw; int ret; if (vid == priv->data.default_vlan) return 0; - ret = pm_runtime_get_sync(&priv->pdev->dev); + ret = pm_runtime_get_sync(&cpsw->pdev->dev); if (ret < 0) { - pm_runtime_put_noidle(&priv->pdev->dev); + pm_runtime_put_noidle(&cpsw->pdev->dev); return ret; } @@ -1714,7 +1724,7 @@ static int cpsw_ndo_vlan_rx_add_vid(struct net_device *ndev, dev_info(priv->dev, "Adding vlanid %d to vlan filter\n", vid); ret = cpsw_add_vlan_ale_entry(priv, vid); - pm_runtime_put(&priv->pdev->dev); + pm_runtime_put(&cpsw->pdev->dev); return ret; } @@ -1722,14 +1732,15 @@ static int cpsw_ndo_vlan_rx_kill_vid(struct net_device *ndev, __be16 proto, u16 vid) { struct cpsw_priv *priv = netdev_priv(ndev); + struct cpsw_common *cpsw = priv->cpsw; int ret; if (vid == priv->data.default_vlan) return 0; - ret = pm_runtime_get_sync(&priv->pdev->dev); + ret = pm_runtime_get_sync(&cpsw->pdev->dev); if (ret < 0) { - pm_runtime_put_noidle(&priv->pdev->dev); + pm_runtime_put_noidle(&cpsw->pdev->dev); return ret; } @@ -1754,7 +1765,7 @@ static int cpsw_ndo_vlan_rx_kill_vid(struct net_device *ndev, ret = cpsw_ale_del_mcast(priv->ale, priv->ndev->broadcast, 0, ALE_VLAN, vid); - pm_runtime_put(&priv->pdev->dev); + pm_runtime_put(&cpsw->pdev->dev); return ret; } @@ -1797,11 +1808,11 @@ static void cpsw_get_regs(struct net_device *ndev, static void cpsw_get_drvinfo(struct net_device *ndev, struct ethtool_drvinfo *info) { - struct cpsw_priv *priv = netdev_priv(ndev); + struct cpsw_common *cpsw = ndev_to_cpsw(ndev); strlcpy(info->driver, "cpsw", sizeof(info->driver)); strlcpy(info->version, "1.0", sizeof(info->version)); - strlcpy(info->bus_info, priv->pdev->name, sizeof(info->bus_info)); + strlcpy(info->bus_info, cpsw->pdev->name, sizeof(info->bus_info)); } static u32 cpsw_get_msglevel(struct net_device *ndev) @@ -1920,12 +1931,13 @@ static int cpsw_set_pauseparam(struct net_device *ndev, static int cpsw_ethtool_op_begin(struct net_device *ndev) { struct cpsw_priv *priv = netdev_priv(ndev); + struct cpsw_common *cpsw = priv->cpsw; int ret; - ret = pm_runtime_get_sync(&priv->pdev->dev); + ret = pm_runtime_get_sync(&cpsw->pdev->dev); if (ret < 0) { cpsw_err(priv, drv, "ethtool begin failed %d\n", ret); - pm_runtime_put_noidle(&priv->pdev->dev); + pm_runtime_put_noidle(&cpsw->pdev->dev); } return ret; @@ -1936,7 +1948,7 @@ static void cpsw_ethtool_op_complete(struct net_device *ndev) struct cpsw_priv *priv = netdev_priv(ndev); int ret; - ret = pm_runtime_put(&priv->pdev->dev); + ret = pm_runtime_put(&priv->cpsw->pdev->dev); if (ret < 0) cpsw_err(priv, drv, "ethtool complete failed %d\n", ret); } @@ -2155,8 +2167,8 @@ static int cpsw_probe_dual_emac(struct platform_device *pdev, } priv_sl2 = netdev_priv(ndev); + priv_sl2->cpsw = priv->cpsw; priv_sl2->data = *data; - priv_sl2->pdev = pdev; priv_sl2->ndev = ndev; priv_sl2->dev = &ndev->dev; priv_sl2->msg_enable = netif_msg_init(debug_level, CPSW_DEBUG); @@ -2261,9 +2273,13 @@ static int cpsw_probe(struct platform_device *pdev) const struct of_device_id *of_id; struct gpio_descs *mode; u32 slave_offset, sliver_offset, slave_size; + struct cpsw_common *cpsw; int ret = 0, i; int irq; + cpsw = devm_kzalloc(&pdev->dev, sizeof(struct cpsw_common), GFP_KERNEL); + cpsw->pdev = pdev; + ndev = alloc_etherdev(sizeof(struct cpsw_priv)); if (!ndev) { dev_err(&pdev->dev, "error allocating net_device\n"); @@ -2272,7 +2288,7 @@ static int cpsw_probe(struct platform_device *pdev) platform_set_drvdata(pdev, ndev); priv = netdev_priv(ndev); - priv->pdev = pdev; + priv->cpsw = cpsw; priv->ndev = ndev; priv->dev = &ndev->dev; priv->msg_enable = netif_msg_init(debug_level, CPSW_DEBUG); From 56e31bd89334325f918e4b9f4d91f07157400169 Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Wed, 10 Aug 2016 02:22:38 +0300 Subject: [PATCH 0105/1715] net: ethernet: ti: cpsw: replace pdev on dev No need to hold pdev link when only dev is needed. This allows to simplify a bunch of cpsw->pdev->dev now and farther. Signed-off-by: Ivan Khoronzhuk Reviewed-by: Mugunthan V N Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 65 ++++++++++++++++++---------------- 1 file changed, 34 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 3ccf577dd46d..c21cc38834d0 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -364,7 +364,7 @@ static inline void slave_write(struct cpsw_slave *slave, u32 val, u32 offset) } struct cpsw_common { - struct platform_device *pdev; + struct device *dev; }; struct cpsw_priv { @@ -1155,7 +1155,7 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv) phy_start(slave->phy); /* Configure GMII_SEL register */ - cpsw_phy_sel(&cpsw->pdev->dev, slave->phy->interface, slave->slave_num); + cpsw_phy_sel(cpsw->dev, slave->phy->interface, slave->slave_num); } static inline void cpsw_add_default_vlan(struct cpsw_priv *priv) @@ -1241,9 +1241,9 @@ static int cpsw_ndo_open(struct net_device *ndev) int i, ret; u32 reg; - ret = pm_runtime_get_sync(&cpsw->pdev->dev); + ret = pm_runtime_get_sync(cpsw->dev); if (ret < 0) { - pm_runtime_put_noidle(&cpsw->pdev->dev); + pm_runtime_put_noidle(cpsw->dev); return ret; } @@ -1320,7 +1320,7 @@ static int cpsw_ndo_open(struct net_device *ndev) */ cpsw_info(priv, ifup, "submitted %d rx descriptors\n", i); - if (cpts_register(&cpsw->pdev->dev, priv->cpts, + if (cpts_register(cpsw->dev, priv->cpts, priv->data.cpts_clock_mult, priv->data.cpts_clock_shift)) dev_err(priv->dev, "error registering cpts device\n"); @@ -1345,7 +1345,7 @@ static int cpsw_ndo_open(struct net_device *ndev) err_cleanup: cpdma_ctlr_stop(priv->dma); for_each_slave(priv, cpsw_slave_stop, priv); - pm_runtime_put_sync(&cpsw->pdev->dev); + pm_runtime_put_sync(cpsw->dev); netif_carrier_off(priv->ndev); return ret; } @@ -1370,7 +1370,7 @@ static int cpsw_ndo_stop(struct net_device *ndev) cpsw_ale_stop(priv->ale); } for_each_slave(priv, cpsw_slave_stop, priv); - pm_runtime_put_sync(&cpsw->pdev->dev); + pm_runtime_put_sync(cpsw->dev); if (priv->data.dual_emac) priv->slaves[priv->emac_port].open_stat = false; return 0; @@ -1610,9 +1610,9 @@ static int cpsw_ndo_set_mac_address(struct net_device *ndev, void *p) if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - ret = pm_runtime_get_sync(&cpsw->pdev->dev); + ret = pm_runtime_get_sync(cpsw->dev); if (ret < 0) { - pm_runtime_put_noidle(&cpsw->pdev->dev); + pm_runtime_put_noidle(cpsw->dev); return ret; } @@ -1630,7 +1630,7 @@ static int cpsw_ndo_set_mac_address(struct net_device *ndev, void *p) memcpy(ndev->dev_addr, priv->mac_addr, ETH_ALEN); for_each_slave(priv, cpsw_set_slave_mac, priv); - pm_runtime_put(&cpsw->pdev->dev); + pm_runtime_put(cpsw->dev); return 0; } @@ -1702,9 +1702,9 @@ static int cpsw_ndo_vlan_rx_add_vid(struct net_device *ndev, if (vid == priv->data.default_vlan) return 0; - ret = pm_runtime_get_sync(&cpsw->pdev->dev); + ret = pm_runtime_get_sync(cpsw->dev); if (ret < 0) { - pm_runtime_put_noidle(&cpsw->pdev->dev); + pm_runtime_put_noidle(cpsw->dev); return ret; } @@ -1724,7 +1724,7 @@ static int cpsw_ndo_vlan_rx_add_vid(struct net_device *ndev, dev_info(priv->dev, "Adding vlanid %d to vlan filter\n", vid); ret = cpsw_add_vlan_ale_entry(priv, vid); - pm_runtime_put(&cpsw->pdev->dev); + pm_runtime_put(cpsw->dev); return ret; } @@ -1738,9 +1738,9 @@ static int cpsw_ndo_vlan_rx_kill_vid(struct net_device *ndev, if (vid == priv->data.default_vlan) return 0; - ret = pm_runtime_get_sync(&cpsw->pdev->dev); + ret = pm_runtime_get_sync(cpsw->dev); if (ret < 0) { - pm_runtime_put_noidle(&cpsw->pdev->dev); + pm_runtime_put_noidle(cpsw->dev); return ret; } @@ -1765,7 +1765,7 @@ static int cpsw_ndo_vlan_rx_kill_vid(struct net_device *ndev, ret = cpsw_ale_del_mcast(priv->ale, priv->ndev->broadcast, 0, ALE_VLAN, vid); - pm_runtime_put(&cpsw->pdev->dev); + pm_runtime_put(cpsw->dev); return ret; } @@ -1809,10 +1809,11 @@ static void cpsw_get_drvinfo(struct net_device *ndev, struct ethtool_drvinfo *info) { struct cpsw_common *cpsw = ndev_to_cpsw(ndev); + struct platform_device *pdev = to_platform_device(cpsw->dev); strlcpy(info->driver, "cpsw", sizeof(info->driver)); strlcpy(info->version, "1.0", sizeof(info->version)); - strlcpy(info->bus_info, cpsw->pdev->name, sizeof(info->bus_info)); + strlcpy(info->bus_info, pdev->name, sizeof(info->bus_info)); } static u32 cpsw_get_msglevel(struct net_device *ndev) @@ -1934,10 +1935,10 @@ static int cpsw_ethtool_op_begin(struct net_device *ndev) struct cpsw_common *cpsw = priv->cpsw; int ret; - ret = pm_runtime_get_sync(&cpsw->pdev->dev); + ret = pm_runtime_get_sync(cpsw->dev); if (ret < 0) { cpsw_err(priv, drv, "ethtool begin failed %d\n", ret); - pm_runtime_put_noidle(&cpsw->pdev->dev); + pm_runtime_put_noidle(cpsw->dev); } return ret; @@ -1948,7 +1949,7 @@ static void cpsw_ethtool_op_complete(struct net_device *ndev) struct cpsw_priv *priv = netdev_priv(ndev); int ret; - ret = pm_runtime_put(&priv->cpsw->pdev->dev); + ret = pm_runtime_put(priv->cpsw->dev); if (ret < 0) cpsw_err(priv, drv, "ethtool complete failed %d\n", ret); } @@ -2152,17 +2153,17 @@ no_phy_slave: return 0; } -static int cpsw_probe_dual_emac(struct platform_device *pdev, - struct cpsw_priv *priv) +static int cpsw_probe_dual_emac(struct cpsw_priv *priv) { struct cpsw_platform_data *data = &priv->data; struct net_device *ndev; struct cpsw_priv *priv_sl2; int ret = 0, i; + struct cpsw_common *cpsw = priv->cpsw; ndev = alloc_etherdev(sizeof(struct cpsw_priv)); if (!ndev) { - dev_err(&pdev->dev, "cpsw: error allocating net_device\n"); + dev_err(cpsw->dev, "cpsw: error allocating net_device\n"); return -ENOMEM; } @@ -2177,10 +2178,12 @@ static int cpsw_probe_dual_emac(struct platform_device *pdev, if (is_valid_ether_addr(data->slave_data[1].mac_addr)) { memcpy(priv_sl2->mac_addr, data->slave_data[1].mac_addr, ETH_ALEN); - dev_info(&pdev->dev, "cpsw: Detected MACID = %pM\n", priv_sl2->mac_addr); + dev_info(cpsw->dev, "cpsw: Detected MACID = %pM\n", + priv_sl2->mac_addr); } else { random_ether_addr(priv_sl2->mac_addr); - dev_info(&pdev->dev, "cpsw: Random MACID = %pM\n", priv_sl2->mac_addr); + dev_info(cpsw->dev, "cpsw: Random MACID = %pM\n", + priv_sl2->mac_addr); } memcpy(ndev->dev_addr, priv_sl2->mac_addr, ETH_ALEN); @@ -2211,10 +2214,10 @@ static int cpsw_probe_dual_emac(struct platform_device *pdev, ndev->ethtool_ops = &cpsw_ethtool_ops; /* register the network device */ - SET_NETDEV_DEV(ndev, &pdev->dev); + SET_NETDEV_DEV(ndev, cpsw->dev); ret = register_netdev(ndev); if (ret) { - dev_err(&pdev->dev, "cpsw: error registering net device\n"); + dev_err(cpsw->dev, "cpsw: error registering net device\n"); free_netdev(ndev); ret = -ENODEV; } @@ -2278,7 +2281,7 @@ static int cpsw_probe(struct platform_device *pdev) int irq; cpsw = devm_kzalloc(&pdev->dev, sizeof(struct cpsw_common), GFP_KERNEL); - cpsw->pdev = pdev; + cpsw->dev = &pdev->dev; ndev = alloc_etherdev(sizeof(struct cpsw_priv)); if (!ndev) { @@ -2541,7 +2544,7 @@ static int cpsw_probe(struct platform_device *pdev) &ss_res->start, ndev->irq); if (priv->data.dual_emac) { - ret = cpsw_probe_dual_emac(pdev, priv); + ret = cpsw_probe_dual_emac(priv); if (ret) { cpsw_err(priv, probe, "error probe slave 2 emac interface\n"); goto clean_ale_ret; @@ -2608,7 +2611,7 @@ static int cpsw_suspend(struct device *dev) } /* Select sleep pin state */ - pinctrl_pm_select_sleep_state(&pdev->dev); + pinctrl_pm_select_sleep_state(dev); return 0; } @@ -2620,7 +2623,7 @@ static int cpsw_resume(struct device *dev) struct cpsw_priv *priv = netdev_priv(ndev); /* Select default pin state */ - pinctrl_pm_select_default_state(&pdev->dev); + pinctrl_pm_select_default_state(dev); if (priv->data.dual_emac) { int i; From 5d8d0d4d46ed7bc0cf3e3ccb6c8b6ad7676335bc Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Wed, 10 Aug 2016 02:22:39 +0300 Subject: [PATCH 0106/1715] net: ethernet: ti: cpsw: move links on h/w registers to cpsw_common The pointers on h/w registers are common for every cpsw_private instance, so no need to hold them for every ndev. Signed-off-by: Ivan Khoronzhuk Reviewed-by: Mugunthan V N Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 97 +++++++++++++++++++--------------- 1 file changed, 53 insertions(+), 44 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index c21cc38834d0..5db2a554fc2c 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -365,6 +365,10 @@ static inline void slave_write(struct cpsw_slave *slave, u32 val, u32 offset) struct cpsw_common { struct device *dev; + struct cpsw_ss_regs __iomem *regs; + struct cpsw_wr_regs __iomem *wr_regs; + u8 __iomem *hw_stats; + struct cpsw_host_regs __iomem *host_port_regs; }; struct cpsw_priv { @@ -373,10 +377,6 @@ struct cpsw_priv { struct napi_struct napi_tx; struct device *dev; struct cpsw_platform_data data; - struct cpsw_ss_regs __iomem *regs; - struct cpsw_wr_regs __iomem *wr_regs; - u8 __iomem *hw_stats; - struct cpsw_host_regs __iomem *host_port_regs; u32 msg_enable; u32 version; u32 coal_intvl; @@ -656,8 +656,10 @@ static void cpsw_ndo_set_rx_mode(struct net_device *ndev) static void cpsw_intr_enable(struct cpsw_priv *priv) { - __raw_writel(0xFF, &priv->wr_regs->tx_en); - __raw_writel(0xFF, &priv->wr_regs->rx_en); + struct cpsw_common *cpsw = priv->cpsw; + + __raw_writel(0xFF, &cpsw->wr_regs->tx_en); + __raw_writel(0xFF, &cpsw->wr_regs->rx_en); cpdma_ctlr_int_ctrl(priv->dma, true); return; @@ -665,8 +667,10 @@ static void cpsw_intr_enable(struct cpsw_priv *priv) static void cpsw_intr_disable(struct cpsw_priv *priv) { - __raw_writel(0, &priv->wr_regs->tx_en); - __raw_writel(0, &priv->wr_regs->rx_en); + struct cpsw_common *cpsw = priv->cpsw; + + __raw_writel(0, &cpsw->wr_regs->tx_en); + __raw_writel(0, &cpsw->wr_regs->rx_en); cpdma_ctlr_int_ctrl(priv->dma, false); return; @@ -750,8 +754,9 @@ requeue: static irqreturn_t cpsw_tx_interrupt(int irq, void *dev_id) { struct cpsw_priv *priv = dev_id; + struct cpsw_common *cpsw = priv->cpsw; - writel(0, &priv->wr_regs->tx_en); + writel(0, &cpsw->wr_regs->tx_en); cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_TX); if (priv->quirk_irq) { @@ -766,9 +771,10 @@ static irqreturn_t cpsw_tx_interrupt(int irq, void *dev_id) static irqreturn_t cpsw_rx_interrupt(int irq, void *dev_id) { struct cpsw_priv *priv = dev_id; + struct cpsw_common *cpsw = priv->cpsw; cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_RX); - writel(0, &priv->wr_regs->rx_en); + writel(0, &cpsw->wr_regs->rx_en); if (priv->quirk_irq) { disable_irq_nosync(priv->irqs_table[0]); @@ -783,11 +789,12 @@ static int cpsw_tx_poll(struct napi_struct *napi_tx, int budget) { struct cpsw_priv *priv = napi_to_priv(napi_tx); int num_tx; + struct cpsw_common *cpsw = priv->cpsw; num_tx = cpdma_chan_process(priv->txch, budget); if (num_tx < budget) { napi_complete(napi_tx); - writel(0xff, &priv->wr_regs->tx_en); + writel(0xff, &cpsw->wr_regs->tx_en); if (priv->quirk_irq && priv->tx_irq_disabled) { priv->tx_irq_disabled = false; enable_irq(priv->irqs_table[1]); @@ -801,11 +808,12 @@ static int cpsw_rx_poll(struct napi_struct *napi_rx, int budget) { struct cpsw_priv *priv = napi_to_priv(napi_rx); int num_rx; + struct cpsw_common *cpsw = priv->cpsw; num_rx = cpdma_chan_process(priv->rxch, budget); if (num_rx < budget) { napi_complete(napi_rx); - writel(0xff, &priv->wr_regs->rx_en); + writel(0xff, &cpsw->wr_regs->rx_en); if (priv->quirk_irq && priv->rx_irq_disabled) { priv->rx_irq_disabled = false; enable_irq(priv->irqs_table[0]); @@ -925,10 +933,11 @@ static int cpsw_set_coalesce(struct net_device *ndev, u32 prescale = 0; u32 addnl_dvdr = 1; u32 coal_intvl = 0; + struct cpsw_common *cpsw = priv->cpsw; coal_intvl = coal->rx_coalesce_usecs; - int_ctrl = readl(&priv->wr_regs->int_control); + int_ctrl = readl(&cpsw->wr_regs->int_control); prescale = priv->bus_freq_mhz * 4; if (!coal->rx_coalesce_usecs) { @@ -957,15 +966,15 @@ static int cpsw_set_coalesce(struct net_device *ndev, } num_interrupts = (1000 * addnl_dvdr) / coal_intvl; - writel(num_interrupts, &priv->wr_regs->rx_imax); - writel(num_interrupts, &priv->wr_regs->tx_imax); + writel(num_interrupts, &cpsw->wr_regs->rx_imax); + writel(num_interrupts, &cpsw->wr_regs->tx_imax); int_ctrl |= CPSW_INTPACEEN; int_ctrl &= (~CPSW_INTPRESCALE_MASK); int_ctrl |= (prescale & CPSW_INTPRESCALE_MASK); update_return: - writel(int_ctrl, &priv->wr_regs->int_control); + writel(int_ctrl, &cpsw->wr_regs->int_control); cpsw_notice(priv, timer, "Set coalesce to %d usecs.\n", coal_intvl); if (priv->data.dual_emac) { @@ -1017,6 +1026,7 @@ static void cpsw_get_ethtool_stats(struct net_device *ndev, u32 val; u8 *p; int i; + struct cpsw_common *cpsw = priv->cpsw; /* Collect Davinci CPDMA stats for Rx and Tx Channel */ cpdma_chan_get_stats(priv->rxch, &rx_stats); @@ -1025,7 +1035,7 @@ static void cpsw_get_ethtool_stats(struct net_device *ndev, for (i = 0; i < CPSW_STATS_LEN; i++) { switch (cpsw_gstrings_stats[i].type) { case CPSW_STATS: - val = readl(priv->hw_stats + + val = readl(cpsw->hw_stats + cpsw_gstrings_stats[i].stat_offset); data[i] = val; break; @@ -1164,11 +1174,12 @@ static inline void cpsw_add_default_vlan(struct cpsw_priv *priv) u32 reg; int i; int unreg_mcast_mask; + struct cpsw_common *cpsw = priv->cpsw; reg = (priv->version == CPSW_VERSION_1) ? CPSW1_PORT_VLAN : CPSW2_PORT_VLAN; - writel(vlan, &priv->host_port_regs->port_vlan); + writel(vlan, &cpsw->host_port_regs->port_vlan); for (i = 0; i < priv->data.slaves; i++) slave_write(priv->slaves + i, vlan, reg); @@ -1185,27 +1196,28 @@ static inline void cpsw_add_default_vlan(struct cpsw_priv *priv) static void cpsw_init_host_port(struct cpsw_priv *priv) { - u32 control_reg; u32 fifo_mode; + u32 control_reg; + struct cpsw_common *cpsw = priv->cpsw; /* soft reset the controller and initialize ale */ - soft_reset("cpsw", &priv->regs->soft_reset); + soft_reset("cpsw", &cpsw->regs->soft_reset); cpsw_ale_start(priv->ale); /* switch to vlan unaware mode */ cpsw_ale_control_set(priv->ale, HOST_PORT_NUM, ALE_VLAN_AWARE, CPSW_ALE_VLAN_AWARE); - control_reg = readl(&priv->regs->control); + control_reg = readl(&cpsw->regs->control); control_reg |= CPSW_VLAN_AWARE; - writel(control_reg, &priv->regs->control); + writel(control_reg, &cpsw->regs->control); fifo_mode = (priv->data.dual_emac) ? CPSW_FIFO_DUAL_MAC_MODE : CPSW_FIFO_NORMAL_MODE; - writel(fifo_mode, &priv->host_port_regs->tx_in_ctl); + writel(fifo_mode, &cpsw->host_port_regs->tx_in_ctl); /* setup host port priority mapping */ __raw_writel(CPDMA_TX_PRIORITY_MAP, - &priv->host_port_regs->cpdma_tx_pri_map); - __raw_writel(0, &priv->host_port_regs->cpdma_rx_chan_map); + &cpsw->host_port_regs->cpdma_tx_pri_map); + __raw_writel(0, &cpsw->host_port_regs->cpdma_rx_chan_map); cpsw_ale_control_set(priv->ale, HOST_PORT_NUM, ALE_PORT_STATE, ALE_PORT_STATE_FORWARD); @@ -1278,13 +1290,13 @@ static int cpsw_ndo_open(struct net_device *ndev) cpdma_control_set(priv->dma, CPDMA_RX_BUFFER_OFFSET, 0); /* disable priority elevation */ - __raw_writel(0, &priv->regs->ptype); + __raw_writel(0, &cpsw->regs->ptype); /* enable statistics collection only on all ports */ - __raw_writel(0x7, &priv->regs->stat_port_en); + __raw_writel(0x7, &cpsw->regs->stat_port_en); /* Enable internal fifo flow control */ - writel(0x7, &priv->regs->flow_control); + writel(0x7, &cpsw->regs->flow_control); napi_enable(&priv_sl0->napi_rx); napi_enable(&priv_sl0->napi_tx); @@ -1443,6 +1455,7 @@ static void cpsw_hwtstamp_v1(struct cpsw_priv *priv) static void cpsw_hwtstamp_v2(struct cpsw_priv *priv) { struct cpsw_slave *slave; + struct cpsw_common *cpsw = priv->cpsw; u32 ctrl, mtype; if (priv->data.dual_emac) @@ -1477,7 +1490,7 @@ static void cpsw_hwtstamp_v2(struct cpsw_priv *priv) slave_write(slave, mtype, CPSW2_TS_SEQ_MTYPE); slave_write(slave, ctrl, CPSW2_CONTROL); - __raw_writel(ETH_P_1588, &priv->regs->ts_ltype); + __raw_writel(ETH_P_1588, &cpsw->regs->ts_ltype); } static int cpsw_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) @@ -1980,7 +1993,8 @@ static const struct ethtool_ops cpsw_ethtool_ops = { static void cpsw_slave_init(struct cpsw_slave *slave, struct cpsw_priv *priv, u32 slave_reg_ofs, u32 sliver_reg_ofs) { - void __iomem *regs = priv->regs; + struct cpsw_common *cpsw = priv->cpsw; + void __iomem *regs = cpsw->regs; int slave_num = slave->slave_num; struct cpsw_slave_data *data = priv->data.slave_data + slave_num; @@ -2190,11 +2204,6 @@ static int cpsw_probe_dual_emac(struct cpsw_priv *priv) priv_sl2->slaves = priv->slaves; priv_sl2->coal_intvl = 0; priv_sl2->bus_freq_mhz = priv->bus_freq_mhz; - - priv_sl2->regs = priv->regs; - priv_sl2->host_port_regs = priv->host_port_regs; - priv_sl2->wr_regs = priv->wr_regs; - priv_sl2->hw_stats = priv->hw_stats; priv_sl2->dma = priv->dma; priv_sl2->txch = priv->txch; priv_sl2->rxch = priv->rxch; @@ -2363,7 +2372,7 @@ static int cpsw_probe(struct platform_device *pdev) ret = PTR_ERR(ss_regs); goto clean_runtime_disable_ret; } - priv->regs = ss_regs; + cpsw->regs = ss_regs; /* Need to enable clocks with runtime PM api to access module * registers @@ -2373,13 +2382,13 @@ static int cpsw_probe(struct platform_device *pdev) pm_runtime_put_noidle(&pdev->dev); goto clean_runtime_disable_ret; } - priv->version = readl(&priv->regs->id_ver); + priv->version = readl(&cpsw->regs->id_ver); pm_runtime_put_sync(&pdev->dev); res = platform_get_resource(pdev, IORESOURCE_MEM, 1); - priv->wr_regs = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(priv->wr_regs)) { - ret = PTR_ERR(priv->wr_regs); + cpsw->wr_regs = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(cpsw->wr_regs)) { + ret = PTR_ERR(cpsw->wr_regs); goto clean_runtime_disable_ret; } @@ -2388,9 +2397,9 @@ static int cpsw_probe(struct platform_device *pdev) switch (priv->version) { case CPSW_VERSION_1: - priv->host_port_regs = ss_regs + CPSW1_HOST_PORT_OFFSET; + cpsw->host_port_regs = ss_regs + CPSW1_HOST_PORT_OFFSET; priv->cpts->reg = ss_regs + CPSW1_CPTS_OFFSET; - priv->hw_stats = ss_regs + CPSW1_HW_STATS; + cpsw->hw_stats = ss_regs + CPSW1_HW_STATS; dma_params.dmaregs = ss_regs + CPSW1_CPDMA_OFFSET; dma_params.txhdp = ss_regs + CPSW1_STATERAM_OFFSET; ale_params.ale_regs = ss_regs + CPSW1_ALE_OFFSET; @@ -2402,9 +2411,9 @@ static int cpsw_probe(struct platform_device *pdev) case CPSW_VERSION_2: case CPSW_VERSION_3: case CPSW_VERSION_4: - priv->host_port_regs = ss_regs + CPSW2_HOST_PORT_OFFSET; + cpsw->host_port_regs = ss_regs + CPSW2_HOST_PORT_OFFSET; priv->cpts->reg = ss_regs + CPSW2_CPTS_OFFSET; - priv->hw_stats = ss_regs + CPSW2_HW_STATS; + cpsw->hw_stats = ss_regs + CPSW2_HW_STATS; dma_params.dmaregs = ss_regs + CPSW2_CPDMA_OFFSET; dma_params.txhdp = ss_regs + CPSW2_STATERAM_OFFSET; ale_params.ale_regs = ss_regs + CPSW2_ALE_OFFSET; From 2c836bd9a247132ff478857ec7b41a740df5ab64 Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Wed, 10 Aug 2016 02:22:40 +0300 Subject: [PATCH 0107/1715] net: ethernet: ti: cpsw: move cpdma resources to cpsw_common Every net device private struct holds links to shared cpdma resources. No need to save and every time synchronize these resources per net dev. So, move it to common driver struct. Signed-off-by: Ivan Khoronzhuk Reviewed-by: Mugunthan V N Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 97 +++++++++++++++++----------------- 1 file changed, 48 insertions(+), 49 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 5db2a554fc2c..6d99d1e614f5 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -369,6 +369,8 @@ struct cpsw_common { struct cpsw_wr_regs __iomem *wr_regs; u8 __iomem *hw_stats; struct cpsw_host_regs __iomem *host_port_regs; + struct cpdma_ctlr *dma; + struct cpdma_chan *txch, *rxch; }; struct cpsw_priv { @@ -384,8 +386,6 @@ struct cpsw_priv { int rx_packet_max; u8 mac_addr[ETH_ALEN]; struct cpsw_slave *slaves; - struct cpdma_ctlr *dma; - struct cpdma_chan *txch, *rxch; struct cpsw_ale *ale; bool rx_pause; bool tx_pause; @@ -654,25 +654,21 @@ static void cpsw_ndo_set_rx_mode(struct net_device *ndev) } } -static void cpsw_intr_enable(struct cpsw_priv *priv) +static void cpsw_intr_enable(struct cpsw_common *cpsw) { - struct cpsw_common *cpsw = priv->cpsw; - __raw_writel(0xFF, &cpsw->wr_regs->tx_en); __raw_writel(0xFF, &cpsw->wr_regs->rx_en); - cpdma_ctlr_int_ctrl(priv->dma, true); + cpdma_ctlr_int_ctrl(cpsw->dma, true); return; } -static void cpsw_intr_disable(struct cpsw_priv *priv) +static void cpsw_intr_disable(struct cpsw_common *cpsw) { - struct cpsw_common *cpsw = priv->cpsw; - __raw_writel(0, &cpsw->wr_regs->tx_en); __raw_writel(0, &cpsw->wr_regs->rx_en); - cpdma_ctlr_int_ctrl(priv->dma, false); + cpdma_ctlr_int_ctrl(cpsw->dma, false); return; } @@ -700,6 +696,7 @@ static void cpsw_rx_handler(void *token, int len, int status) struct net_device *ndev = skb->dev; struct cpsw_priv *priv = netdev_priv(ndev); int ret = 0; + struct cpsw_common *cpsw = priv->cpsw; cpsw_dual_emac_src_port_detect(status, priv, ndev, skb); @@ -745,8 +742,8 @@ static void cpsw_rx_handler(void *token, int len, int status) } requeue: - ret = cpdma_chan_submit(priv->rxch, new_skb, new_skb->data, - skb_tailroom(new_skb), 0); + ret = cpdma_chan_submit(cpsw->rxch, new_skb, new_skb->data, + skb_tailroom(new_skb), 0); if (WARN_ON(ret < 0)) dev_kfree_skb_any(new_skb); } @@ -757,7 +754,7 @@ static irqreturn_t cpsw_tx_interrupt(int irq, void *dev_id) struct cpsw_common *cpsw = priv->cpsw; writel(0, &cpsw->wr_regs->tx_en); - cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_TX); + cpdma_ctlr_eoi(cpsw->dma, CPDMA_EOI_TX); if (priv->quirk_irq) { disable_irq_nosync(priv->irqs_table[1]); @@ -773,7 +770,7 @@ static irqreturn_t cpsw_rx_interrupt(int irq, void *dev_id) struct cpsw_priv *priv = dev_id; struct cpsw_common *cpsw = priv->cpsw; - cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_RX); + cpdma_ctlr_eoi(cpsw->dma, CPDMA_EOI_RX); writel(0, &cpsw->wr_regs->rx_en); if (priv->quirk_irq) { @@ -791,7 +788,7 @@ static int cpsw_tx_poll(struct napi_struct *napi_tx, int budget) int num_tx; struct cpsw_common *cpsw = priv->cpsw; - num_tx = cpdma_chan_process(priv->txch, budget); + num_tx = cpdma_chan_process(cpsw->txch, budget); if (num_tx < budget) { napi_complete(napi_tx); writel(0xff, &cpsw->wr_regs->tx_en); @@ -810,7 +807,7 @@ static int cpsw_rx_poll(struct napi_struct *napi_rx, int budget) int num_rx; struct cpsw_common *cpsw = priv->cpsw; - num_rx = cpdma_chan_process(priv->rxch, budget); + num_rx = cpdma_chan_process(cpsw->rxch, budget); if (num_rx < budget) { napi_complete(napi_rx); writel(0xff, &cpsw->wr_regs->rx_en); @@ -1020,17 +1017,16 @@ static void cpsw_get_strings(struct net_device *ndev, u32 stringset, u8 *data) static void cpsw_get_ethtool_stats(struct net_device *ndev, struct ethtool_stats *stats, u64 *data) { - struct cpsw_priv *priv = netdev_priv(ndev); struct cpdma_chan_stats rx_stats; struct cpdma_chan_stats tx_stats; u32 val; u8 *p; int i; - struct cpsw_common *cpsw = priv->cpsw; + struct cpsw_common *cpsw = ndev_to_cpsw(ndev); /* Collect Davinci CPDMA stats for Rx and Tx Channel */ - cpdma_chan_get_stats(priv->rxch, &rx_stats); - cpdma_chan_get_stats(priv->txch, &tx_stats); + cpdma_chan_get_stats(cpsw->rxch, &rx_stats); + cpdma_chan_get_stats(cpsw->txch, &tx_stats); for (i = 0; i < CPSW_STATS_LEN; i++) { switch (cpsw_gstrings_stats[i].type) { @@ -1073,7 +1069,9 @@ static int cpsw_common_res_usage_state(struct cpsw_priv *priv) static inline int cpsw_tx_packet_submit(struct cpsw_priv *priv, struct sk_buff *skb) { - return cpdma_chan_submit(priv->txch, skb, skb->data, skb->len, + struct cpsw_common *cpsw = priv->cpsw; + + return cpdma_chan_submit(cpsw->txch, skb, skb->data, skb->len, priv->emac_port + priv->data.dual_emac); } @@ -1260,7 +1258,7 @@ static int cpsw_ndo_open(struct net_device *ndev) } if (!cpsw_common_res_usage_state(priv)) - cpsw_intr_disable(priv); + cpsw_intr_disable(cpsw); netif_carrier_off(ndev); reg = priv->version; @@ -1286,8 +1284,8 @@ static int cpsw_ndo_open(struct net_device *ndev) int buf_num; /* setup tx dma to fixed prio and zero offset */ - cpdma_control_set(priv->dma, CPDMA_TX_PRIO_FIXED, 1); - cpdma_control_set(priv->dma, CPDMA_RX_BUFFER_OFFSET, 0); + cpdma_control_set(cpsw->dma, CPDMA_TX_PRIO_FIXED, 1); + cpdma_control_set(cpsw->dma, CPDMA_RX_BUFFER_OFFSET, 0); /* disable priority elevation */ __raw_writel(0, &cpsw->regs->ptype); @@ -1311,7 +1309,7 @@ static int cpsw_ndo_open(struct net_device *ndev) enable_irq(priv->irqs_table[0]); } - buf_num = cpdma_chan_get_rx_buf_num(priv->dma); + buf_num = cpdma_chan_get_rx_buf_num(cpsw->dma); for (i = 0; i < buf_num; i++) { struct sk_buff *skb; @@ -1320,8 +1318,8 @@ static int cpsw_ndo_open(struct net_device *ndev) priv->rx_packet_max, GFP_KERNEL); if (!skb) goto err_cleanup; - ret = cpdma_chan_submit(priv->rxch, skb, skb->data, - skb_tailroom(skb), 0); + ret = cpdma_chan_submit(cpsw->rxch, skb, skb->data, + skb_tailroom(skb), 0); if (ret < 0) { kfree_skb(skb); goto err_cleanup; @@ -1347,15 +1345,15 @@ static int cpsw_ndo_open(struct net_device *ndev) cpsw_set_coalesce(ndev, &coal); } - cpdma_ctlr_start(priv->dma); - cpsw_intr_enable(priv); + cpdma_ctlr_start(cpsw->dma); + cpsw_intr_enable(cpsw); if (priv->data.dual_emac) priv->slaves[priv->emac_port].open_stat = true; return 0; err_cleanup: - cpdma_ctlr_stop(priv->dma); + cpdma_ctlr_stop(cpsw->dma); for_each_slave(priv, cpsw_slave_stop, priv); pm_runtime_put_sync(cpsw->dev); netif_carrier_off(priv->ndev); @@ -1377,8 +1375,8 @@ static int cpsw_ndo_stop(struct net_device *ndev) napi_disable(&priv_sl0->napi_rx); napi_disable(&priv_sl0->napi_tx); cpts_unregister(priv->cpts); - cpsw_intr_disable(priv); - cpdma_ctlr_stop(priv->dma); + cpsw_intr_disable(cpsw); + cpdma_ctlr_stop(cpsw->dma); cpsw_ale_stop(priv->ale); } for_each_slave(priv, cpsw_slave_stop, priv); @@ -1393,6 +1391,7 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff *skb, { struct cpsw_priv *priv = netdev_priv(ndev); int ret; + struct cpsw_common *cpsw = priv->cpsw; netif_trans_update(ndev); @@ -1417,7 +1416,7 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff *skb, /* If there is no more tx desc left free then we need to * tell the kernel to stop sending us tx frames. */ - if (unlikely(!cpdma_check_free_tx_desc(priv->txch))) + if (unlikely(!cpdma_check_free_tx_desc(cpsw->txch))) netif_stop_queue(ndev); return NETDEV_TX_OK; @@ -1602,13 +1601,14 @@ static int cpsw_ndo_ioctl(struct net_device *dev, struct ifreq *req, int cmd) static void cpsw_ndo_tx_timeout(struct net_device *ndev) { struct cpsw_priv *priv = netdev_priv(ndev); + struct cpsw_common *cpsw = priv->cpsw; cpsw_err(priv, tx_err, "transmit timeout, restarting dma\n"); ndev->stats.tx_errors++; - cpsw_intr_disable(priv); - cpdma_chan_stop(priv->txch); - cpdma_chan_start(priv->txch); - cpsw_intr_enable(priv); + cpsw_intr_disable(cpsw); + cpdma_chan_stop(cpsw->txch); + cpdma_chan_start(cpsw->txch); + cpsw_intr_enable(cpsw); } static int cpsw_ndo_set_mac_address(struct net_device *ndev, void *p) @@ -1652,11 +1652,12 @@ static int cpsw_ndo_set_mac_address(struct net_device *ndev, void *p) static void cpsw_ndo_poll_controller(struct net_device *ndev) { struct cpsw_priv *priv = netdev_priv(ndev); + struct cpsw_common *cpsw = priv->cpsw; - cpsw_intr_disable(priv); + cpsw_intr_disable(priv->cpsw); cpsw_rx_interrupt(priv->irqs_table[0], priv); cpsw_tx_interrupt(priv->irqs_table[1], priv); - cpsw_intr_enable(priv); + cpsw_intr_enable(priv->cpsw); } #endif @@ -2204,9 +2205,6 @@ static int cpsw_probe_dual_emac(struct cpsw_priv *priv) priv_sl2->slaves = priv->slaves; priv_sl2->coal_intvl = 0; priv_sl2->bus_freq_mhz = priv->bus_freq_mhz; - priv_sl2->dma = priv->dma; - priv_sl2->txch = priv->txch; - priv_sl2->rxch = priv->rxch; priv_sl2->ale = priv->ale; priv_sl2->emac_port = 1; priv->slaves[1].ndev = ndev; @@ -2450,19 +2448,19 @@ static int cpsw_probe(struct platform_device *pdev) dma_params.has_ext_regs = true; dma_params.desc_hw_addr = dma_params.desc_mem_phys; - priv->dma = cpdma_ctlr_create(&dma_params); - if (!priv->dma) { + cpsw->dma = cpdma_ctlr_create(&dma_params); + if (!cpsw->dma) { dev_err(priv->dev, "error initializing dma\n"); ret = -ENOMEM; goto clean_runtime_disable_ret; } - priv->txch = cpdma_chan_create(priv->dma, tx_chan_num(0), + cpsw->txch = cpdma_chan_create(cpsw->dma, tx_chan_num(0), cpsw_tx_handler); - priv->rxch = cpdma_chan_create(priv->dma, rx_chan_num(0), + cpsw->rxch = cpdma_chan_create(cpsw->dma, rx_chan_num(0), cpsw_rx_handler); - if (WARN_ON(!priv->txch || !priv->rxch)) { + if (WARN_ON(!cpsw->txch || !cpsw->rxch)) { dev_err(priv->dev, "error initializing dma channels\n"); ret = -ENOMEM; goto clean_dma_ret; @@ -2565,7 +2563,7 @@ static int cpsw_probe(struct platform_device *pdev) clean_ale_ret: cpsw_ale_destroy(priv->ale); clean_dma_ret: - cpdma_ctlr_destroy(priv->dma); + cpdma_ctlr_destroy(cpsw->dma); clean_runtime_disable_ret: pm_runtime_disable(&pdev->dev); clean_ndev_ret: @@ -2577,6 +2575,7 @@ static int cpsw_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); struct cpsw_priv *priv = netdev_priv(ndev); + struct cpsw_common *cpsw = priv->cpsw; int ret; ret = pm_runtime_get_sync(&pdev->dev); @@ -2590,7 +2589,7 @@ static int cpsw_remove(struct platform_device *pdev) unregister_netdev(ndev); cpsw_ale_destroy(priv->ale); - cpdma_ctlr_destroy(priv->dma); + cpdma_ctlr_destroy(cpsw->dma); of_platform_depopulate(&pdev->dev); pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); From e38b5a3db84c75c418d8c08863e005bda077f382 Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Wed, 10 Aug 2016 02:22:41 +0300 Subject: [PATCH 0108/1715] net; ethernet: ti: cpsw: move irq stuff under cpsw_common The irq data are common for net devs in dual_emac mode. So no need to hold these data in every priv struct, move them under cpsw_common. Also delete irq_num var, as after optimization it's not needed. Correct number of irqs to 2, as anyway, driver is using only 2, at least for now. Signed-off-by: Ivan Khoronzhuk Reviewed-by: Mugunthan V N Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 65 +++++++++++++++------------------- 1 file changed, 29 insertions(+), 36 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 6d99d1e614f5..b2482b668044 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -143,6 +143,7 @@ do { \ #define cpsw_slave_index(priv) \ ((priv->data.dual_emac) ? priv->emac_port : \ priv->data.active_slave) +#define IRQ_NUM 2 static int debug_level; module_param(debug_level, int, 0); @@ -371,6 +372,10 @@ struct cpsw_common { struct cpsw_host_regs __iomem *host_port_regs; struct cpdma_ctlr *dma; struct cpdma_chan *txch, *rxch; + bool quirk_irq; + bool rx_irq_disabled; + bool tx_irq_disabled; + u32 irqs_table[IRQ_NUM]; }; struct cpsw_priv { @@ -389,12 +394,6 @@ struct cpsw_priv { struct cpsw_ale *ale; bool rx_pause; bool tx_pause; - bool quirk_irq; - bool rx_irq_disabled; - bool tx_irq_disabled; - /* snapshot of IRQ numbers */ - u32 irqs_table[4]; - u32 num_irqs; struct cpts *cpts; u32 emac_port; struct cpsw_common *cpsw; @@ -756,9 +755,9 @@ static irqreturn_t cpsw_tx_interrupt(int irq, void *dev_id) writel(0, &cpsw->wr_regs->tx_en); cpdma_ctlr_eoi(cpsw->dma, CPDMA_EOI_TX); - if (priv->quirk_irq) { - disable_irq_nosync(priv->irqs_table[1]); - priv->tx_irq_disabled = true; + if (cpsw->quirk_irq) { + disable_irq_nosync(cpsw->irqs_table[1]); + cpsw->tx_irq_disabled = true; } napi_schedule(&priv->napi_tx); @@ -773,9 +772,9 @@ static irqreturn_t cpsw_rx_interrupt(int irq, void *dev_id) cpdma_ctlr_eoi(cpsw->dma, CPDMA_EOI_RX); writel(0, &cpsw->wr_regs->rx_en); - if (priv->quirk_irq) { - disable_irq_nosync(priv->irqs_table[0]); - priv->rx_irq_disabled = true; + if (cpsw->quirk_irq) { + disable_irq_nosync(cpsw->irqs_table[0]); + cpsw->rx_irq_disabled = true; } napi_schedule(&priv->napi_rx); @@ -792,9 +791,9 @@ static int cpsw_tx_poll(struct napi_struct *napi_tx, int budget) if (num_tx < budget) { napi_complete(napi_tx); writel(0xff, &cpsw->wr_regs->tx_en); - if (priv->quirk_irq && priv->tx_irq_disabled) { - priv->tx_irq_disabled = false; - enable_irq(priv->irqs_table[1]); + if (cpsw->quirk_irq && cpsw->tx_irq_disabled) { + cpsw->tx_irq_disabled = false; + enable_irq(cpsw->irqs_table[1]); } } @@ -811,9 +810,9 @@ static int cpsw_rx_poll(struct napi_struct *napi_rx, int budget) if (num_rx < budget) { napi_complete(napi_rx); writel(0xff, &cpsw->wr_regs->rx_en); - if (priv->quirk_irq && priv->rx_irq_disabled) { - priv->rx_irq_disabled = false; - enable_irq(priv->irqs_table[0]); + if (cpsw->quirk_irq && cpsw->rx_irq_disabled) { + cpsw->rx_irq_disabled = false; + enable_irq(cpsw->irqs_table[0]); } } @@ -1299,14 +1298,14 @@ static int cpsw_ndo_open(struct net_device *ndev) napi_enable(&priv_sl0->napi_rx); napi_enable(&priv_sl0->napi_tx); - if (priv_sl0->tx_irq_disabled) { - priv_sl0->tx_irq_disabled = false; - enable_irq(priv->irqs_table[1]); + if (cpsw->tx_irq_disabled) { + cpsw->tx_irq_disabled = false; + enable_irq(cpsw->irqs_table[1]); } - if (priv_sl0->rx_irq_disabled) { - priv_sl0->rx_irq_disabled = false; - enable_irq(priv->irqs_table[0]); + if (cpsw->rx_irq_disabled) { + cpsw->rx_irq_disabled = false; + enable_irq(cpsw->irqs_table[0]); } buf_num = cpdma_chan_get_rx_buf_num(cpsw->dma); @@ -1655,8 +1654,8 @@ static void cpsw_ndo_poll_controller(struct net_device *ndev) struct cpsw_common *cpsw = priv->cpsw; cpsw_intr_disable(priv->cpsw); - cpsw_rx_interrupt(priv->irqs_table[0], priv); - cpsw_tx_interrupt(priv->irqs_table[1], priv); + cpsw_rx_interrupt(cpsw->irqs_table[0], priv); + cpsw_tx_interrupt(cpsw->irqs_table[1], priv); cpsw_intr_enable(priv->cpsw); } #endif @@ -2173,7 +2172,7 @@ static int cpsw_probe_dual_emac(struct cpsw_priv *priv) struct cpsw_platform_data *data = &priv->data; struct net_device *ndev; struct cpsw_priv *priv_sl2; - int ret = 0, i; + int ret = 0; struct cpsw_common *cpsw = priv->cpsw; ndev = alloc_etherdev(sizeof(struct cpsw_priv)); @@ -2210,11 +2209,6 @@ static int cpsw_probe_dual_emac(struct cpsw_priv *priv) priv->slaves[1].ndev = ndev; priv_sl2->cpts = priv->cpts; priv_sl2->version = priv->version; - - for (i = 0; i < priv->num_irqs; i++) { - priv_sl2->irqs_table[i] = priv->irqs_table[i]; - priv_sl2->num_irqs = priv->num_irqs; - } ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; ndev->netdev_ops = &cpsw_netdev_ops; @@ -2489,7 +2483,7 @@ static int cpsw_probe(struct platform_device *pdev) if (of_id) { pdev->id_entry = of_id->data; if (pdev->id_entry->driver_data) - priv->quirk_irq = true; + cpsw->quirk_irq = true; } /* Grab RX and TX IRQs. Note that we also have RX_THRESHOLD and @@ -2507,7 +2501,7 @@ static int cpsw_probe(struct platform_device *pdev) goto clean_ale_ret; } - priv->irqs_table[0] = irq; + cpsw->irqs_table[0] = irq; ret = devm_request_irq(&pdev->dev, irq, cpsw_rx_interrupt, 0, dev_name(&pdev->dev), priv); if (ret < 0) { @@ -2522,14 +2516,13 @@ static int cpsw_probe(struct platform_device *pdev) goto clean_ale_ret; } - priv->irqs_table[1] = irq; + cpsw->irqs_table[1] = irq; ret = devm_request_irq(&pdev->dev, irq, cpsw_tx_interrupt, 0, dev_name(&pdev->dev), priv); if (ret < 0) { dev_err(priv->dev, "error attaching irq (%d)\n", ret); goto clean_ale_ret; } - priv->num_irqs = 2; ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; From 606f39939595a4d4540406bfc11f265b2036af6d Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Wed, 10 Aug 2016 02:22:42 +0300 Subject: [PATCH 0109/1715] net: ethernet: ti: cpsw: move platform data and slaves info to cpsw_common These data are common for net devs in dual_emac mode. No need to hold it for every priv instance, so move them under cpsw_common. Signed-off-by: Ivan Khoronzhuk Reviewed-by: Mugunthan V N Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 265 +++++++++++++++++---------------- 1 file changed, 137 insertions(+), 128 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index b2482b668044..ab5488b0cbae 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -140,9 +140,9 @@ do { \ #define CPSW_CMINTMAX_INTVL (1000 / CPSW_CMINTMIN_CNT) #define CPSW_CMINTMIN_INTVL ((1000 / CPSW_CMINTMAX_CNT) + 1) -#define cpsw_slave_index(priv) \ - ((priv->data.dual_emac) ? priv->emac_port : \ - priv->data.active_slave) +#define cpsw_slave_index(cpsw, priv) \ + ((cpsw->data.dual_emac) ? priv->emac_port : \ + cpsw->data.active_slave) #define IRQ_NUM 2 static int debug_level; @@ -366,10 +366,12 @@ static inline void slave_write(struct cpsw_slave *slave, u32 val, u32 offset) struct cpsw_common { struct device *dev; + struct cpsw_platform_data data; struct cpsw_ss_regs __iomem *regs; struct cpsw_wr_regs __iomem *wr_regs; u8 __iomem *hw_stats; struct cpsw_host_regs __iomem *host_port_regs; + struct cpsw_slave *slaves; struct cpdma_ctlr *dma; struct cpdma_chan *txch, *rxch; bool quirk_irq; @@ -383,14 +385,12 @@ struct cpsw_priv { struct napi_struct napi_rx; struct napi_struct napi_tx; struct device *dev; - struct cpsw_platform_data data; u32 msg_enable; u32 version; u32 coal_intvl; u32 bus_freq_mhz; int rx_packet_max; u8 mac_addr[ETH_ALEN]; - struct cpsw_slave *slaves; struct cpsw_ale *ale; bool rx_pause; bool tx_pause; @@ -492,38 +492,39 @@ static const struct cpsw_stats cpsw_gstrings_stats[] = { #define for_each_slave(priv, func, arg...) \ do { \ struct cpsw_slave *slave; \ + struct cpsw_common *cpsw = (priv)->cpsw; \ int n; \ - if (priv->data.dual_emac) \ - (func)((priv)->slaves + priv->emac_port, ##arg);\ + if (cpsw->data.dual_emac) \ + (func)((cpsw)->slaves + priv->emac_port, ##arg);\ else \ - for (n = (priv)->data.slaves, \ - slave = (priv)->slaves; \ + for (n = cpsw->data.slaves, \ + slave = cpsw->slaves; \ n; n--) \ (func)(slave++, ##arg); \ } while (0) -#define cpsw_get_slave_priv(priv, __slave_no__) \ - (((__slave_no__ < priv->data.slaves) && \ - (priv->slaves[__slave_no__].ndev)) ? \ - netdev_priv(priv->slaves[__slave_no__].ndev) : NULL) \ +#define cpsw_get_slave_priv(cpsw, __slave_no__) \ + (((__slave_no__ < cpsw->data.slaves) && \ + (cpsw->slaves[__slave_no__].ndev)) ? \ + netdev_priv(cpsw->slaves[__slave_no__].ndev) : NULL) \ -#define cpsw_dual_emac_src_port_detect(status, priv, ndev, skb) \ +#define cpsw_dual_emac_src_port_detect(cpsw, status, priv, ndev, skb) \ do { \ - if (!priv->data.dual_emac) \ + if (!cpsw->data.dual_emac) \ break; \ if (CPDMA_RX_SOURCE_PORT(status) == 1) { \ - ndev = priv->slaves[0].ndev; \ + ndev = cpsw->slaves[0].ndev; \ priv = netdev_priv(ndev); \ skb->dev = ndev; \ } else if (CPDMA_RX_SOURCE_PORT(status) == 2) { \ - ndev = priv->slaves[1].ndev; \ + ndev = cpsw->slaves[1].ndev; \ priv = netdev_priv(ndev); \ skb->dev = ndev; \ } \ } while (0) -#define cpsw_add_mcast(priv, addr) \ +#define cpsw_add_mcast(cpsw, priv, addr) \ do { \ - if (priv->data.dual_emac) { \ - struct cpsw_slave *slave = priv->slaves + \ + if (cpsw->data.dual_emac) { \ + struct cpsw_slave *slave = cpsw->slaves + \ priv->emac_port; \ int slave_port = cpsw_get_slave_port( \ slave->slave_num); \ @@ -545,18 +546,19 @@ static inline int cpsw_get_slave_port(u32 slave_num) static void cpsw_set_promiscious(struct net_device *ndev, bool enable) { struct cpsw_priv *priv = netdev_priv(ndev); + struct cpsw_common *cpsw = priv->cpsw; struct cpsw_ale *ale = priv->ale; int i; - if (priv->data.dual_emac) { + if (cpsw->data.dual_emac) { bool flag = false; /* Enabling promiscuous mode for one interface will be * common for both the interface as the interface shares * the same hardware resource. */ - for (i = 0; i < priv->data.slaves; i++) - if (priv->slaves[i].ndev->flags & IFF_PROMISC) + for (i = 0; i < cpsw->data.slaves; i++) + if (cpsw->slaves[i].ndev->flags & IFF_PROMISC) flag = true; if (!enable && flag) { @@ -579,7 +581,7 @@ static void cpsw_set_promiscious(struct net_device *ndev, bool enable) unsigned long timeout = jiffies + HZ; /* Disable Learn for all ports (host is port 0 and slaves are port 1 and up */ - for (i = 0; i <= priv->data.slaves; i++) { + for (i = 0; i <= cpsw->data.slaves; i++) { cpsw_ale_control_set(ale, i, ALE_PORT_NOLEARN, 1); cpsw_ale_control_set(ale, i, @@ -606,7 +608,7 @@ static void cpsw_set_promiscious(struct net_device *ndev, bool enable) cpsw_ale_control_set(ale, 0, ALE_P0_UNI_FLOOD, 0); /* Enable Learn for all ports (host is port 0 and slaves are port 1 and up */ - for (i = 0; i <= priv->data.slaves; i++) { + for (i = 0; i <= cpsw->data.slaves; i++) { cpsw_ale_control_set(ale, i, ALE_PORT_NOLEARN, 0); cpsw_ale_control_set(ale, i, @@ -620,12 +622,13 @@ static void cpsw_set_promiscious(struct net_device *ndev, bool enable) static void cpsw_ndo_set_rx_mode(struct net_device *ndev) { struct cpsw_priv *priv = netdev_priv(ndev); + struct cpsw_common *cpsw = priv->cpsw; int vid; - if (priv->data.dual_emac) - vid = priv->slaves[priv->emac_port].port_vlan; + if (cpsw->data.dual_emac) + vid = cpsw->slaves[priv->emac_port].port_vlan; else - vid = priv->data.default_vlan; + vid = cpsw->data.default_vlan; if (ndev->flags & IFF_PROMISC) { /* Enable promiscuous mode */ @@ -648,7 +651,7 @@ static void cpsw_ndo_set_rx_mode(struct net_device *ndev) /* program multicast address list into ALE register */ netdev_for_each_mc_addr(ha, ndev) { - cpsw_add_mcast(priv, (u8 *)ha->addr); + cpsw_add_mcast(cpsw, priv, (u8 *)ha->addr); } } } @@ -697,16 +700,16 @@ static void cpsw_rx_handler(void *token, int len, int status) int ret = 0; struct cpsw_common *cpsw = priv->cpsw; - cpsw_dual_emac_src_port_detect(status, priv, ndev, skb); + cpsw_dual_emac_src_port_detect(cpsw, status, priv, ndev, skb); if (unlikely(status < 0) || unlikely(!netif_running(ndev))) { bool ndev_status = false; - struct cpsw_slave *slave = priv->slaves; + struct cpsw_slave *slave = cpsw->slaves; int n; - if (priv->data.dual_emac) { + if (cpsw->data.dual_emac) { /* In dual emac mode check for all interfaces */ - for (n = priv->data.slaves; n; n--, slave++) + for (n = cpsw->data.slaves; n; n--, slave++) if (netif_running(slave->ndev)) ndev_status = true; } @@ -848,6 +851,7 @@ static void _cpsw_adjust_link(struct cpsw_slave *slave, struct phy_device *phy = slave->phy; u32 mac_control = 0; u32 slave_port; + struct cpsw_common *cpsw = priv->cpsw; if (!phy) return; @@ -855,7 +859,7 @@ static void _cpsw_adjust_link(struct cpsw_slave *slave, slave_port = cpsw_get_slave_port(slave->slave_num); if (phy->link) { - mac_control = priv->data.mac_control; + mac_control = cpsw->data.mac_control; /* enable forwarding */ cpsw_ale_control_set(priv->ale, slave_port, @@ -973,11 +977,11 @@ update_return: writel(int_ctrl, &cpsw->wr_regs->int_control); cpsw_notice(priv, timer, "Set coalesce to %d usecs.\n", coal_intvl); - if (priv->data.dual_emac) { + if (cpsw->data.dual_emac) { int i; - for (i = 0; i < priv->data.slaves; i++) { - priv = netdev_priv(priv->slaves[i].ndev); + for (i = 0; i < cpsw->data.slaves; i++) { + priv = netdev_priv(cpsw->slaves[i].ndev); priv->coal_intvl = coal_intvl; } } else { @@ -1050,16 +1054,16 @@ static void cpsw_get_ethtool_stats(struct net_device *ndev, } } -static int cpsw_common_res_usage_state(struct cpsw_priv *priv) +static int cpsw_common_res_usage_state(struct cpsw_common *cpsw) { u32 i; u32 usage_count = 0; - if (!priv->data.dual_emac) + if (!cpsw->data.dual_emac) return 0; - for (i = 0; i < priv->data.slaves; i++) - if (priv->slaves[i].open_stat) + for (i = 0; i < cpsw->data.slaves; i++) + if (cpsw->slaves[i].open_stat) usage_count++; return usage_count; @@ -1071,7 +1075,7 @@ static inline int cpsw_tx_packet_submit(struct cpsw_priv *priv, struct cpsw_common *cpsw = priv->cpsw; return cpdma_chan_submit(cpsw->txch, skb, skb->data, skb->len, - priv->emac_port + priv->data.dual_emac); + priv->emac_port + cpsw->data.dual_emac); } static inline void cpsw_add_dual_emac_def_ale_entries( @@ -1129,7 +1133,7 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv) slave_port = cpsw_get_slave_port(slave->slave_num); - if (priv->data.dual_emac) + if (cpsw->data.dual_emac) cpsw_add_dual_emac_def_ale_entries(priv, slave, slave_port); else cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast, @@ -1167,19 +1171,19 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv) static inline void cpsw_add_default_vlan(struct cpsw_priv *priv) { - const int vlan = priv->data.default_vlan; + struct cpsw_common *cpsw = priv->cpsw; + const int vlan = cpsw->data.default_vlan; u32 reg; int i; int unreg_mcast_mask; - struct cpsw_common *cpsw = priv->cpsw; reg = (priv->version == CPSW_VERSION_1) ? CPSW1_PORT_VLAN : CPSW2_PORT_VLAN; writel(vlan, &cpsw->host_port_regs->port_vlan); - for (i = 0; i < priv->data.slaves; i++) - slave_write(priv->slaves + i, vlan, reg); + for (i = 0; i < cpsw->data.slaves; i++) + slave_write(cpsw->slaves + i, vlan, reg); if (priv->ndev->flags & IFF_ALLMULTI) unreg_mcast_mask = ALE_ALL_PORTS; @@ -1207,7 +1211,7 @@ static void cpsw_init_host_port(struct cpsw_priv *priv) control_reg = readl(&cpsw->regs->control); control_reg |= CPSW_VLAN_AWARE; writel(control_reg, &cpsw->regs->control); - fifo_mode = (priv->data.dual_emac) ? CPSW_FIFO_DUAL_MAC_MODE : + fifo_mode = (cpsw->data.dual_emac) ? CPSW_FIFO_DUAL_MAC_MODE : CPSW_FIFO_NORMAL_MODE; writel(fifo_mode, &cpsw->host_port_regs->tx_in_ctl); @@ -1219,7 +1223,7 @@ static void cpsw_init_host_port(struct cpsw_priv *priv) cpsw_ale_control_set(priv->ale, HOST_PORT_NUM, ALE_PORT_STATE, ALE_PORT_STATE_FORWARD); - if (!priv->data.dual_emac) { + if (!cpsw->data.dual_emac) { cpsw_ale_add_ucast(priv->ale, priv->mac_addr, HOST_PORT_NUM, 0, 0); cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast, @@ -1256,7 +1260,7 @@ static int cpsw_ndo_open(struct net_device *ndev) return ret; } - if (!cpsw_common_res_usage_state(priv)) + if (!cpsw_common_res_usage_state(cpsw)) cpsw_intr_disable(cpsw); netif_carrier_off(ndev); @@ -1267,19 +1271,19 @@ static int cpsw_ndo_open(struct net_device *ndev) CPSW_RTL_VERSION(reg)); /* initialize host and slave ports */ - if (!cpsw_common_res_usage_state(priv)) + if (!cpsw_common_res_usage_state(cpsw)) cpsw_init_host_port(priv); for_each_slave(priv, cpsw_slave_open, priv); /* Add default VLAN */ - if (!priv->data.dual_emac) + if (!cpsw->data.dual_emac) cpsw_add_default_vlan(priv); else - cpsw_ale_add_vlan(priv->ale, priv->data.default_vlan, + cpsw_ale_add_vlan(priv->ale, cpsw->data.default_vlan, ALE_ALL_PORTS, ALE_ALL_PORTS, 0, 0); - if (!cpsw_common_res_usage_state(priv)) { - struct cpsw_priv *priv_sl0 = cpsw_get_slave_priv(priv, 0); + if (!cpsw_common_res_usage_state(cpsw)) { + struct cpsw_priv *priv_sl0 = cpsw_get_slave_priv(cpsw, 0); int buf_num; /* setup tx dma to fixed prio and zero offset */ @@ -1330,8 +1334,8 @@ static int cpsw_ndo_open(struct net_device *ndev) cpsw_info(priv, ifup, "submitted %d rx descriptors\n", i); if (cpts_register(cpsw->dev, priv->cpts, - priv->data.cpts_clock_mult, - priv->data.cpts_clock_shift)) + cpsw->data.cpts_clock_mult, + cpsw->data.cpts_clock_shift)) dev_err(priv->dev, "error registering cpts device\n"); } @@ -1347,8 +1351,8 @@ static int cpsw_ndo_open(struct net_device *ndev) cpdma_ctlr_start(cpsw->dma); cpsw_intr_enable(cpsw); - if (priv->data.dual_emac) - priv->slaves[priv->emac_port].open_stat = true; + if (cpsw->data.dual_emac) + cpsw->slaves[priv->emac_port].open_stat = true; return 0; err_cleanup: @@ -1368,8 +1372,8 @@ static int cpsw_ndo_stop(struct net_device *ndev) netif_stop_queue(priv->ndev); netif_carrier_off(priv->ndev); - if (cpsw_common_res_usage_state(priv) <= 1) { - struct cpsw_priv *priv_sl0 = cpsw_get_slave_priv(priv, 0); + if (cpsw_common_res_usage_state(cpsw) <= 1) { + struct cpsw_priv *priv_sl0 = cpsw_get_slave_priv(cpsw, 0); napi_disable(&priv_sl0->napi_rx); napi_disable(&priv_sl0->napi_tx); @@ -1380,8 +1384,8 @@ static int cpsw_ndo_stop(struct net_device *ndev) } for_each_slave(priv, cpsw_slave_stop, priv); pm_runtime_put_sync(cpsw->dev); - if (priv->data.dual_emac) - priv->slaves[priv->emac_port].open_stat = false; + if (cpsw->data.dual_emac) + cpsw->slaves[priv->emac_port].open_stat = false; return 0; } @@ -1429,7 +1433,8 @@ fail: static void cpsw_hwtstamp_v1(struct cpsw_priv *priv) { - struct cpsw_slave *slave = &priv->slaves[priv->data.active_slave]; + struct cpsw_common *cpsw = priv->cpsw; + struct cpsw_slave *slave = &cpsw->slaves[cpsw->data.active_slave]; u32 ts_en, seq_id; if (!priv->cpts->tx_enable && !priv->cpts->rx_enable) { @@ -1456,10 +1461,10 @@ static void cpsw_hwtstamp_v2(struct cpsw_priv *priv) struct cpsw_common *cpsw = priv->cpsw; u32 ctrl, mtype; - if (priv->data.dual_emac) - slave = &priv->slaves[priv->emac_port]; + if (cpsw->data.dual_emac) + slave = &cpsw->slaves[priv->emac_port]; else - slave = &priv->slaves[priv->data.active_slave]; + slave = &cpsw->slaves[cpsw->data.active_slave]; ctrl = slave_read(slave, CPSW2_CONTROL); switch (priv->version) { @@ -1578,7 +1583,8 @@ static int cpsw_hwtstamp_get(struct net_device *dev, struct ifreq *ifr) static int cpsw_ndo_ioctl(struct net_device *dev, struct ifreq *req, int cmd) { struct cpsw_priv *priv = netdev_priv(dev); - int slave_no = cpsw_slave_index(priv); + struct cpsw_common *cpsw = priv->cpsw; + int slave_no = cpsw_slave_index(cpsw, priv); if (!netif_running(dev)) return -EINVAL; @@ -1592,9 +1598,9 @@ static int cpsw_ndo_ioctl(struct net_device *dev, struct ifreq *req, int cmd) #endif } - if (!priv->slaves[slave_no].phy) + if (!cpsw->slaves[slave_no].phy) return -EOPNOTSUPP; - return phy_mii_ioctl(priv->slaves[slave_no].phy, req, cmd); + return phy_mii_ioctl(cpsw->slaves[slave_no].phy, req, cmd); } static void cpsw_ndo_tx_timeout(struct net_device *ndev) @@ -1628,8 +1634,8 @@ static int cpsw_ndo_set_mac_address(struct net_device *ndev, void *p) return ret; } - if (priv->data.dual_emac) { - vid = priv->slaves[priv->emac_port].port_vlan; + if (cpsw->data.dual_emac) { + vid = cpsw->slaves[priv->emac_port].port_vlan; flags = ALE_VLAN; } @@ -1666,8 +1672,9 @@ static inline int cpsw_add_vlan_ale_entry(struct cpsw_priv *priv, int ret; int unreg_mcast_mask = 0; u32 port_mask; + struct cpsw_common *cpsw = priv->cpsw; - if (priv->data.dual_emac) { + if (cpsw->data.dual_emac) { port_mask = (1 << (priv->emac_port + 1)) | ALE_PORT_HOST; if (priv->ndev->flags & IFF_ALLMULTI) @@ -1712,7 +1719,7 @@ static int cpsw_ndo_vlan_rx_add_vid(struct net_device *ndev, struct cpsw_common *cpsw = priv->cpsw; int ret; - if (vid == priv->data.default_vlan) + if (vid == cpsw->data.default_vlan) return 0; ret = pm_runtime_get_sync(cpsw->dev); @@ -1721,15 +1728,15 @@ static int cpsw_ndo_vlan_rx_add_vid(struct net_device *ndev, return ret; } - if (priv->data.dual_emac) { + if (cpsw->data.dual_emac) { /* In dual EMAC, reserved VLAN id should not be used for * creating VLAN interfaces as this can break the dual * EMAC port separation */ int i; - for (i = 0; i < priv->data.slaves; i++) { - if (vid == priv->slaves[i].port_vlan) + for (i = 0; i < cpsw->data.slaves; i++) { + if (vid == cpsw->slaves[i].port_vlan) return -EINVAL; } } @@ -1748,7 +1755,7 @@ static int cpsw_ndo_vlan_rx_kill_vid(struct net_device *ndev, struct cpsw_common *cpsw = priv->cpsw; int ret; - if (vid == priv->data.default_vlan) + if (vid == cpsw->data.default_vlan) return 0; ret = pm_runtime_get_sync(cpsw->dev); @@ -1757,11 +1764,11 @@ static int cpsw_ndo_vlan_rx_kill_vid(struct net_device *ndev, return ret; } - if (priv->data.dual_emac) { + if (cpsw->data.dual_emac) { int i; - for (i = 0; i < priv->data.slaves; i++) { - if (vid == priv->slaves[i].port_vlan) + for (i = 0; i < cpsw->data.slaves; i++) { + if (vid == cpsw->slaves[i].port_vlan) return -EINVAL; } } @@ -1801,9 +1808,9 @@ static const struct net_device_ops cpsw_netdev_ops = { static int cpsw_get_regs_len(struct net_device *ndev) { - struct cpsw_priv *priv = netdev_priv(ndev); + struct cpsw_common *cpsw = ndev_to_cpsw(ndev); - return priv->data.ale_entries * ALE_ENTRY_WORDS * sizeof(u32); + return cpsw->data.ale_entries * ALE_ENTRY_WORDS * sizeof(u32); } static void cpsw_get_regs(struct net_device *ndev, @@ -1877,10 +1884,11 @@ static int cpsw_get_settings(struct net_device *ndev, struct ethtool_cmd *ecmd) { struct cpsw_priv *priv = netdev_priv(ndev); - int slave_no = cpsw_slave_index(priv); + struct cpsw_common *cpsw = priv->cpsw; + int slave_no = cpsw_slave_index(cpsw, priv); - if (priv->slaves[slave_no].phy) - return phy_ethtool_gset(priv->slaves[slave_no].phy, ecmd); + if (cpsw->slaves[slave_no].phy) + return phy_ethtool_gset(cpsw->slaves[slave_no].phy, ecmd); else return -EOPNOTSUPP; } @@ -1888,10 +1896,11 @@ static int cpsw_get_settings(struct net_device *ndev, static int cpsw_set_settings(struct net_device *ndev, struct ethtool_cmd *ecmd) { struct cpsw_priv *priv = netdev_priv(ndev); - int slave_no = cpsw_slave_index(priv); + struct cpsw_common *cpsw = priv->cpsw; + int slave_no = cpsw_slave_index(cpsw, priv); - if (priv->slaves[slave_no].phy) - return phy_ethtool_sset(priv->slaves[slave_no].phy, ecmd); + if (cpsw->slaves[slave_no].phy) + return phy_ethtool_sset(cpsw->slaves[slave_no].phy, ecmd); else return -EOPNOTSUPP; } @@ -1899,22 +1908,24 @@ static int cpsw_set_settings(struct net_device *ndev, struct ethtool_cmd *ecmd) static void cpsw_get_wol(struct net_device *ndev, struct ethtool_wolinfo *wol) { struct cpsw_priv *priv = netdev_priv(ndev); - int slave_no = cpsw_slave_index(priv); + struct cpsw_common *cpsw = priv->cpsw; + int slave_no = cpsw_slave_index(cpsw, priv); wol->supported = 0; wol->wolopts = 0; - if (priv->slaves[slave_no].phy) - phy_ethtool_get_wol(priv->slaves[slave_no].phy, wol); + if (cpsw->slaves[slave_no].phy) + phy_ethtool_get_wol(cpsw->slaves[slave_no].phy, wol); } static int cpsw_set_wol(struct net_device *ndev, struct ethtool_wolinfo *wol) { struct cpsw_priv *priv = netdev_priv(ndev); - int slave_no = cpsw_slave_index(priv); + struct cpsw_common *cpsw = priv->cpsw; + int slave_no = cpsw_slave_index(cpsw, priv); - if (priv->slaves[slave_no].phy) - return phy_ethtool_set_wol(priv->slaves[slave_no].phy, wol); + if (cpsw->slaves[slave_no].phy) + return phy_ethtool_set_wol(cpsw->slaves[slave_no].phy, wol); else return -EOPNOTSUPP; } @@ -1990,13 +2001,12 @@ static const struct ethtool_ops cpsw_ethtool_ops = { .complete = cpsw_ethtool_op_complete, }; -static void cpsw_slave_init(struct cpsw_slave *slave, struct cpsw_priv *priv, +static void cpsw_slave_init(struct cpsw_slave *slave, struct cpsw_common *cpsw, u32 slave_reg_ofs, u32 sliver_reg_ofs) { - struct cpsw_common *cpsw = priv->cpsw; void __iomem *regs = cpsw->regs; int slave_num = slave->slave_num; - struct cpsw_slave_data *data = priv->data.slave_data + slave_num; + struct cpsw_slave_data *data = cpsw->data.slave_data + slave_num; slave->data = data; slave->regs = regs + slave_reg_ofs; @@ -2169,11 +2179,11 @@ no_phy_slave: static int cpsw_probe_dual_emac(struct cpsw_priv *priv) { - struct cpsw_platform_data *data = &priv->data; + struct cpsw_common *cpsw = priv->cpsw; + struct cpsw_platform_data *data = &cpsw->data; struct net_device *ndev; struct cpsw_priv *priv_sl2; int ret = 0; - struct cpsw_common *cpsw = priv->cpsw; ndev = alloc_etherdev(sizeof(struct cpsw_priv)); if (!ndev) { @@ -2182,8 +2192,7 @@ static int cpsw_probe_dual_emac(struct cpsw_priv *priv) } priv_sl2 = netdev_priv(ndev); - priv_sl2->cpsw = priv->cpsw; - priv_sl2->data = *data; + priv_sl2->cpsw = cpsw; priv_sl2->ndev = ndev; priv_sl2->dev = &ndev->dev; priv_sl2->msg_enable = netif_msg_init(debug_level, CPSW_DEBUG); @@ -2201,12 +2210,11 @@ static int cpsw_probe_dual_emac(struct cpsw_priv *priv) } memcpy(ndev->dev_addr, priv_sl2->mac_addr, ETH_ALEN); - priv_sl2->slaves = priv->slaves; priv_sl2->coal_intvl = 0; priv_sl2->bus_freq_mhz = priv->bus_freq_mhz; priv_sl2->ale = priv->ale; priv_sl2->emac_port = 1; - priv->slaves[1].ndev = ndev; + cpsw->slaves[1].ndev = ndev; priv_sl2->cpts = priv->cpts; priv_sl2->version = priv->version; ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; @@ -2319,12 +2327,12 @@ static int cpsw_probe(struct platform_device *pdev) /* Select default pin state */ pinctrl_pm_select_default_state(&pdev->dev); - if (cpsw_probe_dt(&priv->data, pdev)) { + if (cpsw_probe_dt(&cpsw->data, pdev)) { dev_err(&pdev->dev, "cpsw: platform data missing\n"); ret = -ENODEV; goto clean_runtime_disable_ret; } - data = &priv->data; + data = &cpsw->data; if (is_valid_ether_addr(data->slave_data[0].mac_addr)) { memcpy(priv->mac_addr, data->slave_data[0].mac_addr, ETH_ALEN); @@ -2336,17 +2344,17 @@ static int cpsw_probe(struct platform_device *pdev) memcpy(ndev->dev_addr, priv->mac_addr, ETH_ALEN); - priv->slaves = devm_kzalloc(&pdev->dev, + cpsw->slaves = devm_kzalloc(&pdev->dev, sizeof(struct cpsw_slave) * data->slaves, GFP_KERNEL); - if (!priv->slaves) { + if (!cpsw->slaves) { ret = -ENOMEM; goto clean_runtime_disable_ret; } for (i = 0; i < data->slaves; i++) - priv->slaves[i].slave_num = i; + cpsw->slaves[i].slave_num = i; - priv->slaves[0].ndev = ndev; + cpsw->slaves[0].ndev = ndev; priv->emac_port = 0; clk = devm_clk_get(&pdev->dev, "fck"); @@ -2420,9 +2428,10 @@ static int cpsw_probe(struct platform_device *pdev) ret = -ENODEV; goto clean_runtime_disable_ret; } - for (i = 0; i < priv->data.slaves; i++) { - struct cpsw_slave *slave = &priv->slaves[i]; - cpsw_slave_init(slave, priv, slave_offset, sliver_offset); + for (i = 0; i < cpsw->data.slaves; i++) { + struct cpsw_slave *slave = &cpsw->slaves[i]; + + cpsw_slave_init(slave, cpsw, slave_offset, sliver_offset); slave_offset += slave_size; sliver_offset += SLIVER_SIZE; } @@ -2543,7 +2552,7 @@ static int cpsw_probe(struct platform_device *pdev) cpsw_notice(priv, probe, "initialized device (regs %pa, irq %d)\n", &ss_res->start, ndev->irq); - if (priv->data.dual_emac) { + if (cpsw->data.dual_emac) { ret = cpsw_probe_dual_emac(priv); if (ret) { cpsw_err(priv, probe, "error probe slave 2 emac interface\n"); @@ -2577,8 +2586,8 @@ static int cpsw_remove(struct platform_device *pdev) return ret; } - if (priv->data.dual_emac) - unregister_netdev(priv->slaves[1].ndev); + if (cpsw->data.dual_emac) + unregister_netdev(cpsw->slaves[1].ndev); unregister_netdev(ndev); cpsw_ale_destroy(priv->ale); @@ -2586,8 +2595,8 @@ static int cpsw_remove(struct platform_device *pdev) of_platform_depopulate(&pdev->dev); pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); - if (priv->data.dual_emac) - free_netdev(priv->slaves[1].ndev); + if (cpsw->data.dual_emac) + free_netdev(cpsw->slaves[1].ndev); free_netdev(ndev); return 0; } @@ -2597,14 +2606,14 @@ static int cpsw_suspend(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct net_device *ndev = platform_get_drvdata(pdev); - struct cpsw_priv *priv = netdev_priv(ndev); + struct cpsw_common *cpsw = ndev_to_cpsw(ndev); - if (priv->data.dual_emac) { + if (cpsw->data.dual_emac) { int i; - for (i = 0; i < priv->data.slaves; i++) { - if (netif_running(priv->slaves[i].ndev)) - cpsw_ndo_stop(priv->slaves[i].ndev); + for (i = 0; i < cpsw->data.slaves; i++) { + if (netif_running(cpsw->slaves[i].ndev)) + cpsw_ndo_stop(cpsw->slaves[i].ndev); } } else { if (netif_running(ndev)) @@ -2621,17 +2630,17 @@ static int cpsw_resume(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct net_device *ndev = platform_get_drvdata(pdev); - struct cpsw_priv *priv = netdev_priv(ndev); + struct cpsw_common *cpsw = netdev_priv(ndev); /* Select default pin state */ pinctrl_pm_select_default_state(dev); - if (priv->data.dual_emac) { + if (cpsw->data.dual_emac) { int i; - for (i = 0; i < priv->data.slaves; i++) { - if (netif_running(priv->slaves[i].ndev)) - cpsw_ndo_open(priv->slaves[i].ndev); + for (i = 0; i < cpsw->data.slaves; i++) { + if (netif_running(cpsw->slaves[i].ndev)) + cpsw_ndo_open(cpsw->slaves[i].ndev); } } else { if (netif_running(ndev)) From dbc4ec522d2ee2b3fa24d329a7a63594aba3b069 Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Wed, 10 Aug 2016 02:22:43 +0300 Subject: [PATCH 0110/1715] net: ethernet: ti: cpsw: move napi struct to cpsw_common The napi structs are common for both net devices in dual_emac mode, In order to not hold duplicate links to them, move to cpsw_common. Signed-off-by: Ivan Khoronzhuk Reviewed-by: Mugunthan V N Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 52 ++++++++++++++-------------------- 1 file changed, 22 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index ab5488b0cbae..2c2e36a416c1 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -367,6 +367,8 @@ static inline void slave_write(struct cpsw_slave *slave, u32 val, u32 offset) struct cpsw_common { struct device *dev; struct cpsw_platform_data data; + struct napi_struct napi_rx; + struct napi_struct napi_tx; struct cpsw_ss_regs __iomem *regs; struct cpsw_wr_regs __iomem *wr_regs; u8 __iomem *hw_stats; @@ -382,8 +384,6 @@ struct cpsw_common { struct cpsw_priv { struct net_device *ndev; - struct napi_struct napi_rx; - struct napi_struct napi_tx; struct device *dev; u32 msg_enable; u32 version; @@ -488,7 +488,7 @@ static const struct cpsw_stats cpsw_gstrings_stats[] = { #define CPSW_STATS_LEN ARRAY_SIZE(cpsw_gstrings_stats) #define ndev_to_cpsw(ndev) (((struct cpsw_priv *)netdev_priv(ndev))->cpsw) -#define napi_to_priv(napi) container_of(napi, struct cpsw_priv, napi) +#define napi_to_cpsw(napi) container_of(napi, struct cpsw_common, napi) #define for_each_slave(priv, func, arg...) \ do { \ struct cpsw_slave *slave; \ @@ -752,8 +752,7 @@ requeue: static irqreturn_t cpsw_tx_interrupt(int irq, void *dev_id) { - struct cpsw_priv *priv = dev_id; - struct cpsw_common *cpsw = priv->cpsw; + struct cpsw_common *cpsw = dev_id; writel(0, &cpsw->wr_regs->tx_en); cpdma_ctlr_eoi(cpsw->dma, CPDMA_EOI_TX); @@ -763,14 +762,13 @@ static irqreturn_t cpsw_tx_interrupt(int irq, void *dev_id) cpsw->tx_irq_disabled = true; } - napi_schedule(&priv->napi_tx); + napi_schedule(&cpsw->napi_tx); return IRQ_HANDLED; } static irqreturn_t cpsw_rx_interrupt(int irq, void *dev_id) { - struct cpsw_priv *priv = dev_id; - struct cpsw_common *cpsw = priv->cpsw; + struct cpsw_common *cpsw = dev_id; cpdma_ctlr_eoi(cpsw->dma, CPDMA_EOI_RX); writel(0, &cpsw->wr_regs->rx_en); @@ -780,15 +778,14 @@ static irqreturn_t cpsw_rx_interrupt(int irq, void *dev_id) cpsw->rx_irq_disabled = true; } - napi_schedule(&priv->napi_rx); + napi_schedule(&cpsw->napi_rx); return IRQ_HANDLED; } static int cpsw_tx_poll(struct napi_struct *napi_tx, int budget) { - struct cpsw_priv *priv = napi_to_priv(napi_tx); + struct cpsw_common *cpsw = napi_to_cpsw(napi_tx); int num_tx; - struct cpsw_common *cpsw = priv->cpsw; num_tx = cpdma_chan_process(cpsw->txch, budget); if (num_tx < budget) { @@ -805,9 +802,8 @@ static int cpsw_tx_poll(struct napi_struct *napi_tx, int budget) static int cpsw_rx_poll(struct napi_struct *napi_rx, int budget) { - struct cpsw_priv *priv = napi_to_priv(napi_rx); + struct cpsw_common *cpsw = napi_to_cpsw(napi_rx); int num_rx; - struct cpsw_common *cpsw = priv->cpsw; num_rx = cpdma_chan_process(cpsw->rxch, budget); if (num_rx < budget) { @@ -1283,7 +1279,6 @@ static int cpsw_ndo_open(struct net_device *ndev) ALE_ALL_PORTS, ALE_ALL_PORTS, 0, 0); if (!cpsw_common_res_usage_state(cpsw)) { - struct cpsw_priv *priv_sl0 = cpsw_get_slave_priv(cpsw, 0); int buf_num; /* setup tx dma to fixed prio and zero offset */ @@ -1299,8 +1294,8 @@ static int cpsw_ndo_open(struct net_device *ndev) /* Enable internal fifo flow control */ writel(0x7, &cpsw->regs->flow_control); - napi_enable(&priv_sl0->napi_rx); - napi_enable(&priv_sl0->napi_tx); + napi_enable(&cpsw->napi_rx); + napi_enable(&cpsw->napi_tx); if (cpsw->tx_irq_disabled) { cpsw->tx_irq_disabled = false; @@ -1373,10 +1368,8 @@ static int cpsw_ndo_stop(struct net_device *ndev) netif_carrier_off(priv->ndev); if (cpsw_common_res_usage_state(cpsw) <= 1) { - struct cpsw_priv *priv_sl0 = cpsw_get_slave_priv(cpsw, 0); - - napi_disable(&priv_sl0->napi_rx); - napi_disable(&priv_sl0->napi_tx); + napi_disable(&cpsw->napi_rx); + napi_disable(&cpsw->napi_tx); cpts_unregister(priv->cpts); cpsw_intr_disable(cpsw); cpdma_ctlr_stop(cpsw->dma); @@ -1656,13 +1649,12 @@ static int cpsw_ndo_set_mac_address(struct net_device *ndev, void *p) #ifdef CONFIG_NET_POLL_CONTROLLER static void cpsw_ndo_poll_controller(struct net_device *ndev) { - struct cpsw_priv *priv = netdev_priv(ndev); - struct cpsw_common *cpsw = priv->cpsw; + struct cpsw_common *cpsw = ndev_to_cpsw(ndev); - cpsw_intr_disable(priv->cpsw); - cpsw_rx_interrupt(cpsw->irqs_table[0], priv); - cpsw_tx_interrupt(cpsw->irqs_table[1], priv); - cpsw_intr_enable(priv->cpsw); + cpsw_intr_disable(cpsw); + cpsw_rx_interrupt(cpsw->irqs_table[0], cpsw); + cpsw_tx_interrupt(cpsw->irqs_table[1], cpsw); + cpsw_intr_enable(cpsw); } #endif @@ -2512,7 +2504,7 @@ static int cpsw_probe(struct platform_device *pdev) cpsw->irqs_table[0] = irq; ret = devm_request_irq(&pdev->dev, irq, cpsw_rx_interrupt, - 0, dev_name(&pdev->dev), priv); + 0, dev_name(&pdev->dev), cpsw); if (ret < 0) { dev_err(priv->dev, "error attaching irq (%d)\n", ret); goto clean_ale_ret; @@ -2527,7 +2519,7 @@ static int cpsw_probe(struct platform_device *pdev) cpsw->irqs_table[1] = irq; ret = devm_request_irq(&pdev->dev, irq, cpsw_tx_interrupt, - 0, dev_name(&pdev->dev), priv); + 0, dev_name(&pdev->dev), cpsw); if (ret < 0) { dev_err(priv->dev, "error attaching irq (%d)\n", ret); goto clean_ale_ret; @@ -2537,8 +2529,8 @@ static int cpsw_probe(struct platform_device *pdev) ndev->netdev_ops = &cpsw_netdev_ops; ndev->ethtool_ops = &cpsw_ethtool_ops; - netif_napi_add(ndev, &priv->napi_rx, cpsw_rx_poll, CPSW_POLL_WEIGHT); - netif_tx_napi_add(ndev, &priv->napi_tx, cpsw_tx_poll, CPSW_POLL_WEIGHT); + netif_napi_add(ndev, &cpsw->napi_rx, cpsw_rx_poll, CPSW_POLL_WEIGHT); + netif_tx_napi_add(ndev, &cpsw->napi_tx, cpsw_tx_poll, CPSW_POLL_WEIGHT); /* register the network device */ SET_NETDEV_DEV(ndev, &pdev->dev); From 2a05a622d88a8e195adaab3ab83aaba317a2e5cf Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Wed, 10 Aug 2016 02:22:44 +0300 Subject: [PATCH 0111/1715] net: ethernet: ti: cpsw: move ale, cpts and drivers params under cpsw_common The ale, cpts, version, rx_packet_max, bus_freq, interrupt pacing parameters are common per net device that uses the same h/w. So, move them to common driver structure. Signed-off-by: Ivan Khoronzhuk Reviewed-by: Mugunthan V N Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 235 +++++++++++++++------------------ 1 file changed, 106 insertions(+), 129 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 2c2e36a416c1..b4d3b410c32a 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -373,28 +373,28 @@ struct cpsw_common { struct cpsw_wr_regs __iomem *wr_regs; u8 __iomem *hw_stats; struct cpsw_host_regs __iomem *host_port_regs; + u32 version; + u32 coal_intvl; + u32 bus_freq_mhz; + int rx_packet_max; struct cpsw_slave *slaves; struct cpdma_ctlr *dma; struct cpdma_chan *txch, *rxch; + struct cpsw_ale *ale; bool quirk_irq; bool rx_irq_disabled; bool tx_irq_disabled; u32 irqs_table[IRQ_NUM]; + struct cpts *cpts; }; struct cpsw_priv { struct net_device *ndev; struct device *dev; u32 msg_enable; - u32 version; - u32 coal_intvl; - u32 bus_freq_mhz; - int rx_packet_max; u8 mac_addr[ETH_ALEN]; - struct cpsw_ale *ale; bool rx_pause; bool tx_pause; - struct cpts *cpts; u32 emac_port; struct cpsw_common *cpsw; }; @@ -502,22 +502,16 @@ static const struct cpsw_stats cpsw_gstrings_stats[] = { n; n--) \ (func)(slave++, ##arg); \ } while (0) -#define cpsw_get_slave_priv(cpsw, __slave_no__) \ - (((__slave_no__ < cpsw->data.slaves) && \ - (cpsw->slaves[__slave_no__].ndev)) ? \ - netdev_priv(cpsw->slaves[__slave_no__].ndev) : NULL) \ -#define cpsw_dual_emac_src_port_detect(cpsw, status, priv, ndev, skb) \ +#define cpsw_dual_emac_src_port_detect(cpsw, status, ndev, skb) \ do { \ if (!cpsw->data.dual_emac) \ break; \ if (CPDMA_RX_SOURCE_PORT(status) == 1) { \ ndev = cpsw->slaves[0].ndev; \ - priv = netdev_priv(ndev); \ skb->dev = ndev; \ } else if (CPDMA_RX_SOURCE_PORT(status) == 2) { \ ndev = cpsw->slaves[1].ndev; \ - priv = netdev_priv(ndev); \ skb->dev = ndev; \ } \ } while (0) @@ -528,11 +522,11 @@ static const struct cpsw_stats cpsw_gstrings_stats[] = { priv->emac_port; \ int slave_port = cpsw_get_slave_port( \ slave->slave_num); \ - cpsw_ale_add_mcast(priv->ale, addr, \ + cpsw_ale_add_mcast(cpsw->ale, addr, \ 1 << slave_port | ALE_PORT_HOST, \ ALE_VLAN, slave->port_vlan, 0); \ } else { \ - cpsw_ale_add_mcast(priv->ale, addr, \ + cpsw_ale_add_mcast(cpsw->ale, addr, \ ALE_ALL_PORTS, \ 0, 0, 0); \ } \ @@ -545,9 +539,8 @@ static inline int cpsw_get_slave_port(u32 slave_num) static void cpsw_set_promiscious(struct net_device *ndev, bool enable) { - struct cpsw_priv *priv = netdev_priv(ndev); - struct cpsw_common *cpsw = priv->cpsw; - struct cpsw_ale *ale = priv->ale; + struct cpsw_common *cpsw = ndev_to_cpsw(ndev); + struct cpsw_ale *ale = cpsw->ale; int i; if (cpsw->data.dual_emac) { @@ -633,7 +626,7 @@ static void cpsw_ndo_set_rx_mode(struct net_device *ndev) if (ndev->flags & IFF_PROMISC) { /* Enable promiscuous mode */ cpsw_set_promiscious(ndev, true); - cpsw_ale_set_allmulti(priv->ale, IFF_ALLMULTI); + cpsw_ale_set_allmulti(cpsw->ale, IFF_ALLMULTI); return; } else { /* Disable promiscuous mode */ @@ -641,10 +634,10 @@ static void cpsw_ndo_set_rx_mode(struct net_device *ndev) } /* Restore allmulti on vlans if necessary */ - cpsw_ale_set_allmulti(priv->ale, priv->ndev->flags & IFF_ALLMULTI); + cpsw_ale_set_allmulti(cpsw->ale, priv->ndev->flags & IFF_ALLMULTI); /* Clear all mcast from ALE */ - cpsw_ale_flush_multicast(priv->ale, ALE_ALL_PORTS, vid); + cpsw_ale_flush_multicast(cpsw->ale, ALE_ALL_PORTS, vid); if (!netdev_mc_empty(ndev)) { struct netdev_hw_addr *ha; @@ -678,14 +671,14 @@ static void cpsw_tx_handler(void *token, int len, int status) { struct sk_buff *skb = token; struct net_device *ndev = skb->dev; - struct cpsw_priv *priv = netdev_priv(ndev); + struct cpsw_common *cpsw = ndev_to_cpsw(ndev); /* Check whether the queue is stopped due to stalled tx dma, if the * queue is stopped then start the queue as we have free desc for tx */ if (unlikely(netif_queue_stopped(ndev))) netif_wake_queue(ndev); - cpts_tx_timestamp(priv->cpts, skb); + cpts_tx_timestamp(cpsw->cpts, skb); ndev->stats.tx_packets++; ndev->stats.tx_bytes += len; dev_kfree_skb_any(skb); @@ -696,11 +689,10 @@ static void cpsw_rx_handler(void *token, int len, int status) struct sk_buff *skb = token; struct sk_buff *new_skb; struct net_device *ndev = skb->dev; - struct cpsw_priv *priv = netdev_priv(ndev); int ret = 0; - struct cpsw_common *cpsw = priv->cpsw; + struct cpsw_common *cpsw = ndev_to_cpsw(ndev); - cpsw_dual_emac_src_port_detect(cpsw, status, priv, ndev, skb); + cpsw_dual_emac_src_port_detect(cpsw, status, ndev, skb); if (unlikely(status < 0) || unlikely(!netif_running(ndev))) { bool ndev_status = false; @@ -730,10 +722,10 @@ static void cpsw_rx_handler(void *token, int len, int status) return; } - new_skb = netdev_alloc_skb_ip_align(ndev, priv->rx_packet_max); + new_skb = netdev_alloc_skb_ip_align(ndev, cpsw->rx_packet_max); if (new_skb) { skb_put(skb, len); - cpts_rx_timestamp(priv->cpts, skb); + cpts_rx_timestamp(cpsw->cpts, skb); skb->protocol = eth_type_trans(skb, ndev); netif_receive_skb(skb); ndev->stats.rx_bytes += len; @@ -858,7 +850,7 @@ static void _cpsw_adjust_link(struct cpsw_slave *slave, mac_control = cpsw->data.mac_control; /* enable forwarding */ - cpsw_ale_control_set(priv->ale, slave_port, + cpsw_ale_control_set(cpsw->ale, slave_port, ALE_PORT_STATE, ALE_PORT_STATE_FORWARD); if (phy->speed == 1000) @@ -882,7 +874,7 @@ static void _cpsw_adjust_link(struct cpsw_slave *slave, } else { mac_control = 0; /* disable forwarding */ - cpsw_ale_control_set(priv->ale, slave_port, + cpsw_ale_control_set(cpsw->ale, slave_port, ALE_PORT_STATE, ALE_PORT_STATE_DISABLE); } @@ -914,9 +906,9 @@ static void cpsw_adjust_link(struct net_device *ndev) static int cpsw_get_coalesce(struct net_device *ndev, struct ethtool_coalesce *coal) { - struct cpsw_priv *priv = netdev_priv(ndev); + struct cpsw_common *cpsw = ndev_to_cpsw(ndev); - coal->rx_coalesce_usecs = priv->coal_intvl; + coal->rx_coalesce_usecs = cpsw->coal_intvl; return 0; } @@ -934,7 +926,7 @@ static int cpsw_set_coalesce(struct net_device *ndev, coal_intvl = coal->rx_coalesce_usecs; int_ctrl = readl(&cpsw->wr_regs->int_control); - prescale = priv->bus_freq_mhz * 4; + prescale = cpsw->bus_freq_mhz * 4; if (!coal->rx_coalesce_usecs) { int_ctrl &= ~(CPSW_INTPRESCALE_MASK | CPSW_INTPACEEN); @@ -973,16 +965,7 @@ update_return: writel(int_ctrl, &cpsw->wr_regs->int_control); cpsw_notice(priv, timer, "Set coalesce to %d usecs.\n", coal_intvl); - if (cpsw->data.dual_emac) { - int i; - - for (i = 0; i < cpsw->data.slaves; i++) { - priv = netdev_priv(cpsw->slaves[i].ndev); - priv->coal_intvl = coal_intvl; - } - } else { - priv->coal_intvl = coal_intvl; - } + cpsw->coal_intvl = coal_intvl; return 0; } @@ -1078,18 +1061,20 @@ static inline void cpsw_add_dual_emac_def_ale_entries( struct cpsw_priv *priv, struct cpsw_slave *slave, u32 slave_port) { + struct cpsw_common *cpsw = priv->cpsw; u32 port_mask = 1 << slave_port | ALE_PORT_HOST; - if (priv->version == CPSW_VERSION_1) + if (cpsw->version == CPSW_VERSION_1) slave_write(slave, slave->port_vlan, CPSW1_PORT_VLAN); else slave_write(slave, slave->port_vlan, CPSW2_PORT_VLAN); - cpsw_ale_add_vlan(priv->ale, slave->port_vlan, port_mask, + cpsw_ale_add_vlan(cpsw->ale, slave->port_vlan, port_mask, port_mask, port_mask, 0); - cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast, + cpsw_ale_add_mcast(cpsw->ale, priv->ndev->broadcast, port_mask, ALE_VLAN, slave->port_vlan, 0); - cpsw_ale_add_ucast(priv->ale, priv->mac_addr, - HOST_PORT_NUM, ALE_VLAN | ALE_SECURE, slave->port_vlan); + cpsw_ale_add_ucast(cpsw->ale, priv->mac_addr, + HOST_PORT_NUM, ALE_VLAN | + ALE_SECURE, slave->port_vlan); } static void soft_reset_slave(struct cpsw_slave *slave) @@ -1110,7 +1095,7 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv) /* setup priority mapping */ __raw_writel(RX_PRIORITY_MAPPING, &slave->sliver->rx_pri_map); - switch (priv->version) { + switch (cpsw->version) { case CPSW_VERSION_1: slave_write(slave, TX_PRIORITY_MAPPING, CPSW1_TX_PRI_MAP); break; @@ -1122,7 +1107,7 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv) } /* setup max packet size, and mac address */ - __raw_writel(priv->rx_packet_max, &slave->sliver->rx_maxlen); + __raw_writel(cpsw->rx_packet_max, &slave->sliver->rx_maxlen); cpsw_set_slave_mac(slave, priv); slave->mac_control = 0; /* no link yet */ @@ -1132,7 +1117,7 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv) if (cpsw->data.dual_emac) cpsw_add_dual_emac_def_ale_entries(priv, slave, slave_port); else - cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast, + cpsw_ale_add_mcast(cpsw->ale, priv->ndev->broadcast, 1 << slave_port, 0, 0, ALE_MCAST_FWD_2); if (slave->data->phy_node) { @@ -1173,7 +1158,7 @@ static inline void cpsw_add_default_vlan(struct cpsw_priv *priv) int i; int unreg_mcast_mask; - reg = (priv->version == CPSW_VERSION_1) ? CPSW1_PORT_VLAN : + reg = (cpsw->version == CPSW_VERSION_1) ? CPSW1_PORT_VLAN : CPSW2_PORT_VLAN; writel(vlan, &cpsw->host_port_regs->port_vlan); @@ -1186,7 +1171,7 @@ static inline void cpsw_add_default_vlan(struct cpsw_priv *priv) else unreg_mcast_mask = ALE_PORT_1 | ALE_PORT_2; - cpsw_ale_add_vlan(priv->ale, vlan, ALE_ALL_PORTS, + cpsw_ale_add_vlan(cpsw->ale, vlan, ALE_ALL_PORTS, ALE_ALL_PORTS, ALE_ALL_PORTS, unreg_mcast_mask); } @@ -1199,10 +1184,10 @@ static void cpsw_init_host_port(struct cpsw_priv *priv) /* soft reset the controller and initialize ale */ soft_reset("cpsw", &cpsw->regs->soft_reset); - cpsw_ale_start(priv->ale); + cpsw_ale_start(cpsw->ale); /* switch to vlan unaware mode */ - cpsw_ale_control_set(priv->ale, HOST_PORT_NUM, ALE_VLAN_AWARE, + cpsw_ale_control_set(cpsw->ale, HOST_PORT_NUM, ALE_VLAN_AWARE, CPSW_ALE_VLAN_AWARE); control_reg = readl(&cpsw->regs->control); control_reg |= CPSW_VLAN_AWARE; @@ -1216,18 +1201,18 @@ static void cpsw_init_host_port(struct cpsw_priv *priv) &cpsw->host_port_regs->cpdma_tx_pri_map); __raw_writel(0, &cpsw->host_port_regs->cpdma_rx_chan_map); - cpsw_ale_control_set(priv->ale, HOST_PORT_NUM, + cpsw_ale_control_set(cpsw->ale, HOST_PORT_NUM, ALE_PORT_STATE, ALE_PORT_STATE_FORWARD); if (!cpsw->data.dual_emac) { - cpsw_ale_add_ucast(priv->ale, priv->mac_addr, HOST_PORT_NUM, + cpsw_ale_add_ucast(cpsw->ale, priv->mac_addr, HOST_PORT_NUM, 0, 0); - cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast, + cpsw_ale_add_mcast(cpsw->ale, priv->ndev->broadcast, ALE_PORT_HOST, 0, 0, ALE_MCAST_FWD_2); } } -static void cpsw_slave_stop(struct cpsw_slave *slave, struct cpsw_priv *priv) +static void cpsw_slave_stop(struct cpsw_slave *slave, struct cpsw_common *cpsw) { u32 slave_port; @@ -1238,7 +1223,7 @@ static void cpsw_slave_stop(struct cpsw_slave *slave, struct cpsw_priv *priv) phy_stop(slave->phy); phy_disconnect(slave->phy); slave->phy = NULL; - cpsw_ale_control_set(priv->ale, slave_port, + cpsw_ale_control_set(cpsw->ale, slave_port, ALE_PORT_STATE, ALE_PORT_STATE_DISABLE); soft_reset_slave(slave); } @@ -1260,7 +1245,7 @@ static int cpsw_ndo_open(struct net_device *ndev) cpsw_intr_disable(cpsw); netif_carrier_off(ndev); - reg = priv->version; + reg = cpsw->version; dev_info(priv->dev, "initializing cpsw version %d.%d (%d)\n", CPSW_MAJOR_VERSION(reg), CPSW_MINOR_VERSION(reg), @@ -1275,7 +1260,7 @@ static int cpsw_ndo_open(struct net_device *ndev) if (!cpsw->data.dual_emac) cpsw_add_default_vlan(priv); else - cpsw_ale_add_vlan(priv->ale, cpsw->data.default_vlan, + cpsw_ale_add_vlan(cpsw->ale, cpsw->data.default_vlan, ALE_ALL_PORTS, ALE_ALL_PORTS, 0, 0); if (!cpsw_common_res_usage_state(cpsw)) { @@ -1313,7 +1298,7 @@ static int cpsw_ndo_open(struct net_device *ndev) ret = -ENOMEM; skb = __netdev_alloc_skb_ip_align(priv->ndev, - priv->rx_packet_max, GFP_KERNEL); + cpsw->rx_packet_max, GFP_KERNEL); if (!skb) goto err_cleanup; ret = cpdma_chan_submit(cpsw->rxch, skb, skb->data, @@ -1328,7 +1313,7 @@ static int cpsw_ndo_open(struct net_device *ndev) */ cpsw_info(priv, ifup, "submitted %d rx descriptors\n", i); - if (cpts_register(cpsw->dev, priv->cpts, + if (cpts_register(cpsw->dev, cpsw->cpts, cpsw->data.cpts_clock_mult, cpsw->data.cpts_clock_shift)) dev_err(priv->dev, "error registering cpts device\n"); @@ -1336,10 +1321,10 @@ static int cpsw_ndo_open(struct net_device *ndev) } /* Enable Interrupt pacing if configured */ - if (priv->coal_intvl != 0) { + if (cpsw->coal_intvl != 0) { struct ethtool_coalesce coal; - coal.rx_coalesce_usecs = priv->coal_intvl; + coal.rx_coalesce_usecs = cpsw->coal_intvl; cpsw_set_coalesce(ndev, &coal); } @@ -1352,7 +1337,7 @@ static int cpsw_ndo_open(struct net_device *ndev) err_cleanup: cpdma_ctlr_stop(cpsw->dma); - for_each_slave(priv, cpsw_slave_stop, priv); + for_each_slave(priv, cpsw_slave_stop, cpsw); pm_runtime_put_sync(cpsw->dev); netif_carrier_off(priv->ndev); return ret; @@ -1370,12 +1355,12 @@ static int cpsw_ndo_stop(struct net_device *ndev) if (cpsw_common_res_usage_state(cpsw) <= 1) { napi_disable(&cpsw->napi_rx); napi_disable(&cpsw->napi_tx); - cpts_unregister(priv->cpts); + cpts_unregister(cpsw->cpts); cpsw_intr_disable(cpsw); cpdma_ctlr_stop(cpsw->dma); - cpsw_ale_stop(priv->ale); + cpsw_ale_stop(cpsw->ale); } - for_each_slave(priv, cpsw_slave_stop, priv); + for_each_slave(priv, cpsw_slave_stop, cpsw); pm_runtime_put_sync(cpsw->dev); if (cpsw->data.dual_emac) cpsw->slaves[priv->emac_port].open_stat = false; @@ -1398,7 +1383,7 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff *skb, } if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP && - priv->cpts->tx_enable) + cpsw->cpts->tx_enable) skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; skb_tx_timestamp(skb); @@ -1424,13 +1409,12 @@ fail: #ifdef CONFIG_TI_CPTS -static void cpsw_hwtstamp_v1(struct cpsw_priv *priv) +static void cpsw_hwtstamp_v1(struct cpsw_common *cpsw) { - struct cpsw_common *cpsw = priv->cpsw; struct cpsw_slave *slave = &cpsw->slaves[cpsw->data.active_slave]; u32 ts_en, seq_id; - if (!priv->cpts->tx_enable && !priv->cpts->rx_enable) { + if (!cpsw->cpts->tx_enable && !cpsw->cpts->rx_enable) { slave_write(slave, 0, CPSW1_TS_CTL); return; } @@ -1438,10 +1422,10 @@ static void cpsw_hwtstamp_v1(struct cpsw_priv *priv) seq_id = (30 << CPSW_V1_SEQ_ID_OFS_SHIFT) | ETH_P_1588; ts_en = EVENT_MSG_BITS << CPSW_V1_MSG_TYPE_OFS; - if (priv->cpts->tx_enable) + if (cpsw->cpts->tx_enable) ts_en |= CPSW_V1_TS_TX_EN; - if (priv->cpts->rx_enable) + if (cpsw->cpts->rx_enable) ts_en |= CPSW_V1_TS_RX_EN; slave_write(slave, ts_en, CPSW1_TS_CTL); @@ -1460,24 +1444,24 @@ static void cpsw_hwtstamp_v2(struct cpsw_priv *priv) slave = &cpsw->slaves[cpsw->data.active_slave]; ctrl = slave_read(slave, CPSW2_CONTROL); - switch (priv->version) { + switch (cpsw->version) { case CPSW_VERSION_2: ctrl &= ~CTRL_V2_ALL_TS_MASK; - if (priv->cpts->tx_enable) + if (cpsw->cpts->tx_enable) ctrl |= CTRL_V2_TX_TS_BITS; - if (priv->cpts->rx_enable) + if (cpsw->cpts->rx_enable) ctrl |= CTRL_V2_RX_TS_BITS; break; case CPSW_VERSION_3: default: ctrl &= ~CTRL_V3_ALL_TS_MASK; - if (priv->cpts->tx_enable) + if (cpsw->cpts->tx_enable) ctrl |= CTRL_V3_TX_TS_BITS; - if (priv->cpts->rx_enable) + if (cpsw->cpts->rx_enable) ctrl |= CTRL_V3_RX_TS_BITS; break; } @@ -1492,12 +1476,13 @@ static void cpsw_hwtstamp_v2(struct cpsw_priv *priv) static int cpsw_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) { struct cpsw_priv *priv = netdev_priv(dev); - struct cpts *cpts = priv->cpts; struct hwtstamp_config cfg; + struct cpsw_common *cpsw = priv->cpsw; + struct cpts *cpts = cpsw->cpts; - if (priv->version != CPSW_VERSION_1 && - priv->version != CPSW_VERSION_2 && - priv->version != CPSW_VERSION_3) + if (cpsw->version != CPSW_VERSION_1 && + cpsw->version != CPSW_VERSION_2 && + cpsw->version != CPSW_VERSION_3) return -EOPNOTSUPP; if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) @@ -1537,9 +1522,9 @@ static int cpsw_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) cpts->tx_enable = cfg.tx_type == HWTSTAMP_TX_ON; - switch (priv->version) { + switch (cpsw->version) { case CPSW_VERSION_1: - cpsw_hwtstamp_v1(priv); + cpsw_hwtstamp_v1(cpsw); break; case CPSW_VERSION_2: case CPSW_VERSION_3: @@ -1554,13 +1539,13 @@ static int cpsw_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) static int cpsw_hwtstamp_get(struct net_device *dev, struct ifreq *ifr) { - struct cpsw_priv *priv = netdev_priv(dev); - struct cpts *cpts = priv->cpts; + struct cpsw_common *cpsw = ndev_to_cpsw(dev); + struct cpts *cpts = cpsw->cpts; struct hwtstamp_config cfg; - if (priv->version != CPSW_VERSION_1 && - priv->version != CPSW_VERSION_2 && - priv->version != CPSW_VERSION_3) + if (cpsw->version != CPSW_VERSION_1 && + cpsw->version != CPSW_VERSION_2 && + cpsw->version != CPSW_VERSION_3) return -EOPNOTSUPP; cfg.flags = 0; @@ -1632,9 +1617,9 @@ static int cpsw_ndo_set_mac_address(struct net_device *ndev, void *p) flags = ALE_VLAN; } - cpsw_ale_del_ucast(priv->ale, priv->mac_addr, HOST_PORT_NUM, + cpsw_ale_del_ucast(cpsw->ale, priv->mac_addr, HOST_PORT_NUM, flags, vid); - cpsw_ale_add_ucast(priv->ale, addr->sa_data, HOST_PORT_NUM, + cpsw_ale_add_ucast(cpsw->ale, addr->sa_data, HOST_PORT_NUM, flags, vid); memcpy(priv->mac_addr, addr->sa_data, ETH_ALEN); @@ -1680,27 +1665,27 @@ static inline int cpsw_add_vlan_ale_entry(struct cpsw_priv *priv, unreg_mcast_mask = ALE_PORT_1 | ALE_PORT_2; } - ret = cpsw_ale_add_vlan(priv->ale, vid, port_mask, 0, port_mask, + ret = cpsw_ale_add_vlan(cpsw->ale, vid, port_mask, 0, port_mask, unreg_mcast_mask); if (ret != 0) return ret; - ret = cpsw_ale_add_ucast(priv->ale, priv->mac_addr, + ret = cpsw_ale_add_ucast(cpsw->ale, priv->mac_addr, HOST_PORT_NUM, ALE_VLAN, vid); if (ret != 0) goto clean_vid; - ret = cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast, + ret = cpsw_ale_add_mcast(cpsw->ale, priv->ndev->broadcast, port_mask, ALE_VLAN, vid, 0); if (ret != 0) goto clean_vlan_ucast; return 0; clean_vlan_ucast: - cpsw_ale_del_ucast(priv->ale, priv->mac_addr, + cpsw_ale_del_ucast(cpsw->ale, priv->mac_addr, HOST_PORT_NUM, ALE_VLAN, vid); clean_vid: - cpsw_ale_del_vlan(priv->ale, vid, 0); + cpsw_ale_del_vlan(cpsw->ale, vid, 0); return ret; } @@ -1766,16 +1751,16 @@ static int cpsw_ndo_vlan_rx_kill_vid(struct net_device *ndev, } dev_info(priv->dev, "removing vlanid %d from vlan filter\n", vid); - ret = cpsw_ale_del_vlan(priv->ale, vid, 0); + ret = cpsw_ale_del_vlan(cpsw->ale, vid, 0); if (ret != 0) return ret; - ret = cpsw_ale_del_ucast(priv->ale, priv->mac_addr, + ret = cpsw_ale_del_ucast(cpsw->ale, priv->mac_addr, HOST_PORT_NUM, ALE_VLAN, vid); if (ret != 0) return ret; - ret = cpsw_ale_del_mcast(priv->ale, priv->ndev->broadcast, + ret = cpsw_ale_del_mcast(cpsw->ale, priv->ndev->broadcast, 0, ALE_VLAN, vid); pm_runtime_put(cpsw->dev); return ret; @@ -1808,13 +1793,13 @@ static int cpsw_get_regs_len(struct net_device *ndev) static void cpsw_get_regs(struct net_device *ndev, struct ethtool_regs *regs, void *p) { - struct cpsw_priv *priv = netdev_priv(ndev); u32 *reg = p; + struct cpsw_common *cpsw = ndev_to_cpsw(ndev); /* update CPSW IP version */ - regs->version = priv->version; + regs->version = cpsw->version; - cpsw_ale_dump(priv->ale, reg); + cpsw_ale_dump(cpsw->ale, reg); } static void cpsw_get_drvinfo(struct net_device *ndev, @@ -1844,7 +1829,7 @@ static int cpsw_get_ts_info(struct net_device *ndev, struct ethtool_ts_info *info) { #ifdef CONFIG_TI_CPTS - struct cpsw_priv *priv = netdev_priv(ndev); + struct cpsw_common *cpsw = ndev_to_cpsw(ndev); info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE | @@ -1853,7 +1838,7 @@ static int cpsw_get_ts_info(struct net_device *ndev, SOF_TIMESTAMPING_RX_SOFTWARE | SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_RAW_HARDWARE; - info->phc_index = priv->cpts->phc_index; + info->phc_index = cpsw->cpts->phc_index; info->tx_types = (1 << HWTSTAMP_TX_OFF) | (1 << HWTSTAMP_TX_ON); @@ -2188,7 +2173,6 @@ static int cpsw_probe_dual_emac(struct cpsw_priv *priv) priv_sl2->ndev = ndev; priv_sl2->dev = &ndev->dev; priv_sl2->msg_enable = netif_msg_init(debug_level, CPSW_DEBUG); - priv_sl2->rx_packet_max = max(rx_packet_max, 128); if (is_valid_ether_addr(data->slave_data[1].mac_addr)) { memcpy(priv_sl2->mac_addr, data->slave_data[1].mac_addr, @@ -2202,13 +2186,8 @@ static int cpsw_probe_dual_emac(struct cpsw_priv *priv) } memcpy(ndev->dev_addr, priv_sl2->mac_addr, ETH_ALEN); - priv_sl2->coal_intvl = 0; - priv_sl2->bus_freq_mhz = priv->bus_freq_mhz; - priv_sl2->ale = priv->ale; priv_sl2->emac_port = 1; cpsw->slaves[1].ndev = ndev; - priv_sl2->cpts = priv->cpts; - priv_sl2->version = priv->version; ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; ndev->netdev_ops = &cpsw_netdev_ops; @@ -2296,9 +2275,9 @@ static int cpsw_probe(struct platform_device *pdev) priv->ndev = ndev; priv->dev = &ndev->dev; priv->msg_enable = netif_msg_init(debug_level, CPSW_DEBUG); - priv->rx_packet_max = max(rx_packet_max, 128); - priv->cpts = devm_kzalloc(&pdev->dev, sizeof(struct cpts), GFP_KERNEL); - if (!priv->cpts) { + cpsw->rx_packet_max = max(rx_packet_max, 128); + cpsw->cpts = devm_kzalloc(&pdev->dev, sizeof(struct cpts), GFP_KERNEL); + if (!cpsw->cpts) { dev_err(&pdev->dev, "error allocating cpts\n"); ret = -ENOMEM; goto clean_ndev_ret; @@ -2355,8 +2334,7 @@ static int cpsw_probe(struct platform_device *pdev) ret = -ENODEV; goto clean_runtime_disable_ret; } - priv->coal_intvl = 0; - priv->bus_freq_mhz = clk_get_rate(clk) / 1000000; + cpsw->bus_freq_mhz = clk_get_rate(clk) / 1000000; ss_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); ss_regs = devm_ioremap_resource(&pdev->dev, ss_res); @@ -2374,7 +2352,7 @@ static int cpsw_probe(struct platform_device *pdev) pm_runtime_put_noidle(&pdev->dev); goto clean_runtime_disable_ret; } - priv->version = readl(&cpsw->regs->id_ver); + cpsw->version = readl(&cpsw->regs->id_ver); pm_runtime_put_sync(&pdev->dev); res = platform_get_resource(pdev, IORESOURCE_MEM, 1); @@ -2387,10 +2365,10 @@ static int cpsw_probe(struct platform_device *pdev) memset(&dma_params, 0, sizeof(dma_params)); memset(&ale_params, 0, sizeof(ale_params)); - switch (priv->version) { + switch (cpsw->version) { case CPSW_VERSION_1: cpsw->host_port_regs = ss_regs + CPSW1_HOST_PORT_OFFSET; - priv->cpts->reg = ss_regs + CPSW1_CPTS_OFFSET; + cpsw->cpts->reg = ss_regs + CPSW1_CPTS_OFFSET; cpsw->hw_stats = ss_regs + CPSW1_HW_STATS; dma_params.dmaregs = ss_regs + CPSW1_CPDMA_OFFSET; dma_params.txhdp = ss_regs + CPSW1_STATERAM_OFFSET; @@ -2404,7 +2382,7 @@ static int cpsw_probe(struct platform_device *pdev) case CPSW_VERSION_3: case CPSW_VERSION_4: cpsw->host_port_regs = ss_regs + CPSW2_HOST_PORT_OFFSET; - priv->cpts->reg = ss_regs + CPSW2_CPTS_OFFSET; + cpsw->cpts->reg = ss_regs + CPSW2_CPTS_OFFSET; cpsw->hw_stats = ss_regs + CPSW2_HW_STATS; dma_params.dmaregs = ss_regs + CPSW2_CPDMA_OFFSET; dma_params.txhdp = ss_regs + CPSW2_STATERAM_OFFSET; @@ -2416,7 +2394,7 @@ static int cpsw_probe(struct platform_device *pdev) (u32 __force) ss_res->start + CPSW2_BD_OFFSET; break; default: - dev_err(priv->dev, "unknown version 0x%08x\n", priv->version); + dev_err(priv->dev, "unknown version 0x%08x\n", cpsw->version); ret = -ENODEV; goto clean_runtime_disable_ret; } @@ -2466,8 +2444,8 @@ static int cpsw_probe(struct platform_device *pdev) ale_params.ale_entries = data->ale_entries; ale_params.ale_ports = data->slaves; - priv->ale = cpsw_ale_create(&ale_params); - if (!priv->ale) { + cpsw->ale = cpsw_ale_create(&ale_params); + if (!cpsw->ale) { dev_err(priv->dev, "error initializing ale engine\n"); ret = -ENODEV; goto clean_dma_ret; @@ -2555,7 +2533,7 @@ static int cpsw_probe(struct platform_device *pdev) return 0; clean_ale_ret: - cpsw_ale_destroy(priv->ale); + cpsw_ale_destroy(cpsw->ale); clean_dma_ret: cpdma_ctlr_destroy(cpsw->dma); clean_runtime_disable_ret: @@ -2568,8 +2546,7 @@ clean_ndev_ret: static int cpsw_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); - struct cpsw_priv *priv = netdev_priv(ndev); - struct cpsw_common *cpsw = priv->cpsw; + struct cpsw_common *cpsw = ndev_to_cpsw(ndev); int ret; ret = pm_runtime_get_sync(&pdev->dev); @@ -2582,7 +2559,7 @@ static int cpsw_remove(struct platform_device *pdev) unregister_netdev(cpsw->slaves[1].ndev); unregister_netdev(ndev); - cpsw_ale_destroy(priv->ale); + cpsw_ale_destroy(cpsw->ale); cpdma_ctlr_destroy(cpsw->dma); of_platform_depopulate(&pdev->dev); pm_runtime_put_sync(&pdev->dev); From 054c67d1c82afde13e475cdd8b7117a5e40bebb1 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Tue, 9 Aug 2016 03:51:23 -0400 Subject: [PATCH 0112/1715] qed*: Add support for ethtool link_ksettings callbacks. This patch adds the driver implementation for ethtool link_ksettings callbacks. qed driver now defines/uses the qed specific masks for representing link capability values. qede driver maps these values to to new link modes defined by the kernel implementation of link_ksettings. Please consider applying this to 'net-next' branch. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_main.c | 107 +++++++------- drivers/net/ethernet/qlogic/qed/qed_mcp.c | 3 + drivers/net/ethernet/qlogic/qed/qed_mcp.h | 7 +- .../net/ethernet/qlogic/qede/qede_ethtool.c | 132 ++++++++++++++---- include/linux/qed/qed_if.h | 15 ++ 5 files changed, 180 insertions(+), 84 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index c7dc34bfdd0a..d6e1dc5fac94 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -1025,20 +1025,23 @@ static int qed_set_link(struct qed_dev *cdev, struct qed_link_params *params) link_params->speed.autoneg = params->autoneg; if (params->override_flags & QED_LINK_OVERRIDE_SPEED_ADV_SPEEDS) { link_params->speed.advertised_speeds = 0; - if ((params->adv_speeds & SUPPORTED_1000baseT_Half) || - (params->adv_speeds & SUPPORTED_1000baseT_Full)) + if ((params->adv_speeds & QED_LM_1000baseT_Half_BIT) || + (params->adv_speeds & QED_LM_1000baseT_Full_BIT)) link_params->speed.advertised_speeds |= - NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G; - if (params->adv_speeds & SUPPORTED_10000baseKR_Full) + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G; + if (params->adv_speeds & QED_LM_10000baseKR_Full_BIT) link_params->speed.advertised_speeds |= - NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G; - if (params->adv_speeds & SUPPORTED_40000baseLR4_Full) + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G; + if (params->adv_speeds & QED_LM_25000baseKR_Full_BIT) link_params->speed.advertised_speeds |= - NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G; - if (params->adv_speeds & 0) + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G; + if (params->adv_speeds & QED_LM_40000baseLR4_Full_BIT) link_params->speed.advertised_speeds |= - NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G; - if (params->adv_speeds & 0) + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G; + if (params->adv_speeds & QED_LM_50000baseKR2_Full_BIT) + link_params->speed.advertised_speeds |= + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G; + if (params->adv_speeds & QED_LM_100000baseKR4_Full_BIT) link_params->speed.advertised_speeds |= NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G; } @@ -1168,50 +1171,56 @@ static void qed_fill_link(struct qed_hwfn *hwfn, if_link->link_up = true; /* TODO - at the moment assume supported and advertised speed equal */ - if_link->supported_caps = SUPPORTED_FIBRE; + if_link->supported_caps = QED_LM_FIBRE_BIT; if (params.speed.autoneg) - if_link->supported_caps |= SUPPORTED_Autoneg; + if_link->supported_caps |= QED_LM_Autoneg_BIT; if (params.pause.autoneg || (params.pause.forced_rx && params.pause.forced_tx)) - if_link->supported_caps |= SUPPORTED_Asym_Pause; + if_link->supported_caps |= QED_LM_Asym_Pause_BIT; if (params.pause.autoneg || params.pause.forced_rx || params.pause.forced_tx) - if_link->supported_caps |= SUPPORTED_Pause; + if_link->supported_caps |= QED_LM_Pause_BIT; if_link->advertised_caps = if_link->supported_caps; if (params.speed.advertised_speeds & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G) - if_link->advertised_caps |= SUPPORTED_1000baseT_Half | - SUPPORTED_1000baseT_Full; + if_link->advertised_caps |= QED_LM_1000baseT_Half_BIT | + QED_LM_1000baseT_Full_BIT; if (params.speed.advertised_speeds & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G) - if_link->advertised_caps |= SUPPORTED_10000baseKR_Full; + if_link->advertised_caps |= QED_LM_10000baseKR_Full_BIT; if (params.speed.advertised_speeds & - NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G) - if_link->advertised_caps |= SUPPORTED_40000baseLR4_Full; + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G) + if_link->advertised_caps |= QED_LM_25000baseKR_Full_BIT; if (params.speed.advertised_speeds & - NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G) - if_link->advertised_caps |= 0; + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G) + if_link->advertised_caps |= QED_LM_40000baseLR4_Full_BIT; + if (params.speed.advertised_speeds & + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G) + if_link->advertised_caps |= QED_LM_50000baseKR2_Full_BIT; if (params.speed.advertised_speeds & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G) - if_link->advertised_caps |= 0; + if_link->advertised_caps |= QED_LM_100000baseKR4_Full_BIT; if (link_caps.speed_capabilities & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G) - if_link->supported_caps |= SUPPORTED_1000baseT_Half | - SUPPORTED_1000baseT_Full; + if_link->supported_caps |= QED_LM_1000baseT_Half_BIT | + QED_LM_1000baseT_Full_BIT; if (link_caps.speed_capabilities & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G) - if_link->supported_caps |= SUPPORTED_10000baseKR_Full; + if_link->supported_caps |= QED_LM_10000baseKR_Full_BIT; if (link_caps.speed_capabilities & - NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G) - if_link->supported_caps |= SUPPORTED_40000baseLR4_Full; + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G) + if_link->supported_caps |= QED_LM_25000baseKR_Full_BIT; if (link_caps.speed_capabilities & - NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G) - if_link->supported_caps |= 0; + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G) + if_link->supported_caps |= QED_LM_40000baseLR4_Full_BIT; + if (link_caps.speed_capabilities & + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G) + if_link->supported_caps |= QED_LM_50000baseKR2_Full_BIT; if (link_caps.speed_capabilities & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G) - if_link->supported_caps |= 0; + if_link->supported_caps |= QED_LM_100000baseKR4_Full_BIT; if (link.link_up) if_link->speed = link.speed; @@ -1231,33 +1240,29 @@ static void qed_fill_link(struct qed_hwfn *hwfn, if_link->pause_config |= QED_LINK_PAUSE_TX_ENABLE; /* Link partner capabilities */ - if (link.partner_adv_speed & - QED_LINK_PARTNER_SPEED_1G_HD) - if_link->lp_caps |= SUPPORTED_1000baseT_Half; - if (link.partner_adv_speed & - QED_LINK_PARTNER_SPEED_1G_FD) - if_link->lp_caps |= SUPPORTED_1000baseT_Full; - if (link.partner_adv_speed & - QED_LINK_PARTNER_SPEED_10G) - if_link->lp_caps |= SUPPORTED_10000baseKR_Full; - if (link.partner_adv_speed & - QED_LINK_PARTNER_SPEED_40G) - if_link->lp_caps |= SUPPORTED_40000baseLR4_Full; - if (link.partner_adv_speed & - QED_LINK_PARTNER_SPEED_50G) - if_link->lp_caps |= 0; - if (link.partner_adv_speed & - QED_LINK_PARTNER_SPEED_100G) - if_link->lp_caps |= 0; + if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_1G_HD) + if_link->lp_caps |= QED_LM_1000baseT_Half_BIT; + if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_1G_FD) + if_link->lp_caps |= QED_LM_1000baseT_Full_BIT; + if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_10G) + if_link->lp_caps |= QED_LM_10000baseKR_Full_BIT; + if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_25G) + if_link->lp_caps |= QED_LM_25000baseKR_Full_BIT; + if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_40G) + if_link->lp_caps |= QED_LM_40000baseLR4_Full_BIT; + if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_50G) + if_link->lp_caps |= QED_LM_50000baseKR2_Full_BIT; + if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_100G) + if_link->lp_caps |= QED_LM_100000baseKR4_Full_BIT; if (link.an_complete) - if_link->lp_caps |= SUPPORTED_Autoneg; + if_link->lp_caps |= QED_LM_Autoneg_BIT; if (link.partner_adv_pause) - if_link->lp_caps |= SUPPORTED_Pause; + if_link->lp_caps |= QED_LM_Pause_BIT; if (link.partner_adv_pause == QED_LINK_PARTNER_ASYMMETRIC_PAUSE || link.partner_adv_pause == QED_LINK_PARTNER_BOTH_PAUSE) - if_link->lp_caps |= SUPPORTED_Asym_Pause; + if_link->lp_caps |= QED_LM_Asym_Pause_BIT; } static void qed_get_current_link(struct qed_dev *cdev, diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index a240f26344a4..ce4b08a5fe99 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -634,6 +634,9 @@ static void qed_mcp_handle_link_change(struct qed_hwfn *p_hwfn, p_link->partner_adv_speed |= (status & LINK_STATUS_LINK_PARTNER_20G_CAPABLE) ? QED_LINK_PARTNER_SPEED_20G : 0; + p_link->partner_adv_speed |= + (status & LINK_STATUS_LINK_PARTNER_25G_CAPABLE) ? + QED_LINK_PARTNER_SPEED_25G : 0; p_link->partner_adv_speed |= (status & LINK_STATUS_LINK_PARTNER_40G_CAPABLE) ? QED_LINK_PARTNER_SPEED_40G : 0; diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h index 7f319aa1b229..013d1b92ed63 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h @@ -60,9 +60,10 @@ struct qed_mcp_link_state { #define QED_LINK_PARTNER_SPEED_1G_FD BIT(1) #define QED_LINK_PARTNER_SPEED_10G BIT(2) #define QED_LINK_PARTNER_SPEED_20G BIT(3) -#define QED_LINK_PARTNER_SPEED_40G BIT(4) -#define QED_LINK_PARTNER_SPEED_50G BIT(5) -#define QED_LINK_PARTNER_SPEED_100G BIT(6) +#define QED_LINK_PARTNER_SPEED_25G BIT(4) +#define QED_LINK_PARTNER_SPEED_40G BIT(5) +#define QED_LINK_PARTNER_SPEED_50G BIT(6) +#define QED_LINK_PARTNER_SPEED_100G BIT(7) u32 partner_adv_speed; bool partner_tx_flow_ctrl_en; diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index f8492cac9290..0e0acfb5c1ed 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -249,78 +249,150 @@ static u32 qede_get_priv_flags(struct net_device *dev) return (!!(edev->dev_info.common.num_hwfns > 1)) << QEDE_PRI_FLAG_CMT; } -static int qede_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +struct qede_link_mode_mapping { + u32 qed_link_mode; + u32 ethtool_link_mode; +}; + +static const struct qede_link_mode_mapping qed_lm_map[] = { + {QED_LM_FIBRE_BIT, ETHTOOL_LINK_MODE_FIBRE_BIT}, + {QED_LM_Autoneg_BIT, ETHTOOL_LINK_MODE_Autoneg_BIT}, + {QED_LM_Asym_Pause_BIT, ETHTOOL_LINK_MODE_Asym_Pause_BIT}, + {QED_LM_Pause_BIT, ETHTOOL_LINK_MODE_Pause_BIT}, + {QED_LM_1000baseT_Half_BIT, ETHTOOL_LINK_MODE_1000baseT_Half_BIT}, + {QED_LM_1000baseT_Full_BIT, ETHTOOL_LINK_MODE_1000baseT_Full_BIT}, + {QED_LM_10000baseKR_Full_BIT, ETHTOOL_LINK_MODE_10000baseKR_Full_BIT}, + {QED_LM_25000baseKR_Full_BIT, ETHTOOL_LINK_MODE_25000baseKR_Full_BIT}, + {QED_LM_40000baseLR4_Full_BIT, ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT}, + {QED_LM_50000baseKR2_Full_BIT, ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT}, + {QED_LM_100000baseKR4_Full_BIT, + ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT}, +}; + +#define QEDE_DRV_TO_ETHTOOL_CAPS(caps, lk_ksettings, name) \ +{ \ + int i; \ + \ + for (i = 0; i < QED_LM_COUNT; i++) { \ + if ((caps) & (qed_lm_map[i].qed_link_mode)) \ + __set_bit(qed_lm_map[i].ethtool_link_mode,\ + lk_ksettings->link_modes.name); \ + } \ +} + +#define QEDE_ETHTOOL_TO_DRV_CAPS(caps, lk_ksettings, name) \ +{ \ + int i; \ + \ + for (i = 0; i < QED_LM_COUNT; i++) { \ + if (test_bit(qed_lm_map[i].ethtool_link_mode, \ + lk_ksettings->link_modes.name)) \ + caps |= qed_lm_map[i].qed_link_mode; \ + } \ +} + +static int qede_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) { + struct ethtool_link_settings *base = &cmd->base; struct qede_dev *edev = netdev_priv(dev); struct qed_link_output current_link; memset(¤t_link, 0, sizeof(current_link)); edev->ops->common->get_link(edev->cdev, ¤t_link); - cmd->supported = current_link.supported_caps; - cmd->advertising = current_link.advertised_caps; + ethtool_link_ksettings_zero_link_mode(cmd, supported); + QEDE_DRV_TO_ETHTOOL_CAPS(current_link.supported_caps, cmd, supported) + + ethtool_link_ksettings_zero_link_mode(cmd, advertising); + QEDE_DRV_TO_ETHTOOL_CAPS(current_link.advertised_caps, cmd, advertising) + + ethtool_link_ksettings_zero_link_mode(cmd, lp_advertising); + QEDE_DRV_TO_ETHTOOL_CAPS(current_link.lp_caps, cmd, lp_advertising) + if ((edev->state == QEDE_STATE_OPEN) && (current_link.link_up)) { - ethtool_cmd_speed_set(cmd, current_link.speed); - cmd->duplex = current_link.duplex; + base->speed = current_link.speed; + base->duplex = current_link.duplex; } else { - cmd->duplex = DUPLEX_UNKNOWN; - ethtool_cmd_speed_set(cmd, SPEED_UNKNOWN); + base->speed = SPEED_UNKNOWN; + base->duplex = DUPLEX_UNKNOWN; } - cmd->port = current_link.port; - cmd->autoneg = (current_link.autoneg) ? AUTONEG_ENABLE : - AUTONEG_DISABLE; - cmd->lp_advertising = current_link.lp_caps; + base->port = current_link.port; + base->autoneg = (current_link.autoneg) ? AUTONEG_ENABLE : + AUTONEG_DISABLE; return 0; } -static int qede_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) +static int qede_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { + const struct ethtool_link_settings *base = &cmd->base; struct qede_dev *edev = netdev_priv(dev); struct qed_link_output current_link; struct qed_link_params params; - u32 speed; if (!edev->ops || !edev->ops->common->can_link_change(edev->cdev)) { - DP_INFO(edev, - "Link settings are not allowed to be changed\n"); + DP_INFO(edev, "Link settings are not allowed to be changed\n"); return -EOPNOTSUPP; } - memset(¤t_link, 0, sizeof(current_link)); memset(¶ms, 0, sizeof(params)); edev->ops->common->get_link(edev->cdev, ¤t_link); - speed = ethtool_cmd_speed(cmd); params.override_flags |= QED_LINK_OVERRIDE_SPEED_ADV_SPEEDS; params.override_flags |= QED_LINK_OVERRIDE_SPEED_AUTONEG; - if (cmd->autoneg == AUTONEG_ENABLE) { + if (base->autoneg == AUTONEG_ENABLE) { params.autoneg = true; params.forced_speed = 0; - params.adv_speeds = cmd->advertising; - } else { /* forced speed */ + QEDE_ETHTOOL_TO_DRV_CAPS(params.adv_speeds, cmd, advertising) + } else { /* forced speed */ params.override_flags |= QED_LINK_OVERRIDE_SPEED_FORCED_SPEED; params.autoneg = false; - params.forced_speed = speed; - switch (speed) { + params.forced_speed = base->speed; + switch (base->speed) { case SPEED_10000: if (!(current_link.supported_caps & - SUPPORTED_10000baseKR_Full)) { + QED_LM_10000baseKR_Full_BIT)) { DP_INFO(edev, "10G speed not supported\n"); return -EINVAL; } - params.adv_speeds = SUPPORTED_10000baseKR_Full; + params.adv_speeds = QED_LM_10000baseKR_Full_BIT; + break; + case SPEED_25000: + if (!(current_link.supported_caps & + QED_LM_25000baseKR_Full_BIT)) { + DP_INFO(edev, "25G speed not supported\n"); + return -EINVAL; + } + params.adv_speeds = QED_LM_25000baseKR_Full_BIT; break; case SPEED_40000: if (!(current_link.supported_caps & - SUPPORTED_40000baseLR4_Full)) { + QED_LM_40000baseLR4_Full_BIT)) { DP_INFO(edev, "40G speed not supported\n"); return -EINVAL; } - params.adv_speeds = SUPPORTED_40000baseLR4_Full; + params.adv_speeds = QED_LM_40000baseLR4_Full_BIT; + break; + case 0xdead: + if (!(current_link.supported_caps & + QED_LM_50000baseKR2_Full_BIT)) { + DP_INFO(edev, "50G speed not supported\n"); + return -EINVAL; + } + params.adv_speeds = QED_LM_50000baseKR2_Full_BIT; + break; + case 0xbeef: + if (!(current_link.supported_caps & + QED_LM_100000baseKR4_Full_BIT)) { + DP_INFO(edev, "100G speed not supported\n"); + return -EINVAL; + } + params.adv_speeds = QED_LM_100000baseKR4_Full_BIT; break; default: - DP_INFO(edev, "Unsupported speed %u\n", speed); + DP_INFO(edev, "Unsupported speed %u\n", base->speed); return -EINVAL; } } @@ -1228,8 +1300,8 @@ static int qede_get_tunable(struct net_device *dev, } static const struct ethtool_ops qede_ethtool_ops = { - .get_settings = qede_get_settings, - .set_settings = qede_set_settings, + .get_link_ksettings = qede_get_link_ksettings, + .set_link_ksettings = qede_set_link_ksettings, .get_drvinfo = qede_get_drvinfo, .get_msglevel = qede_get_msglevel, .set_msglevel = qede_set_msglevel, @@ -1260,7 +1332,7 @@ static const struct ethtool_ops qede_ethtool_ops = { }; static const struct ethtool_ops qede_vf_ethtool_ops = { - .get_settings = qede_get_settings, + .get_link_ksettings = qede_get_link_ksettings, .get_drvinfo = qede_get_drvinfo, .get_msglevel = qede_get_msglevel, .set_msglevel = qede_set_msglevel, diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index b1e3c57c7117..737fc4c8db49 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -268,6 +268,21 @@ enum qed_protocol { QED_PROTOCOL_ISCSI, }; +enum qed_link_mode_bits { + QED_LM_FIBRE_BIT = BIT(0), + QED_LM_Autoneg_BIT = BIT(1), + QED_LM_Asym_Pause_BIT = BIT(2), + QED_LM_Pause_BIT = BIT(3), + QED_LM_1000baseT_Half_BIT = BIT(4), + QED_LM_1000baseT_Full_BIT = BIT(5), + QED_LM_10000baseKR_Full_BIT = BIT(6), + QED_LM_25000baseKR_Full_BIT = BIT(7), + QED_LM_40000baseLR4_Full_BIT = BIT(8), + QED_LM_50000baseKR2_Full_BIT = BIT(9), + QED_LM_100000baseKR4_Full_BIT = BIT(10), + QED_LM_COUNT = 11 +}; + struct qed_link_params { bool link_up; From fff8019a08b60dce0c7f2858ebe44c5b84ed493b Mon Sep 17 00:00:00 2001 From: Harini Katakam Date: Tue, 9 Aug 2016 13:15:53 +0530 Subject: [PATCH 0113/1715] net: macb: Add 64 bit addressing support for GEM This patch adds support for 64 bit addressing and BDs. -> Enable 64 bit addressing in DMACFG register. -> Set DMA mask when design config register shows support for 64 bit addr. -> Add new BD words for higher address when 64 bit DMA support is present. -> Add and update TBQPH and RBQPH for MSB of BD pointers. -> Change extraction and updation of buffer addresses to use 64 bit address. -> In gem_rx extract address in one place insted of two and use a separate flag for RXUSED. Signed-off-by: Harini Katakam Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb.c | 62 ++++++++++++++++++++++++----- drivers/net/ethernet/cadence/macb.h | 10 +++++ 2 files changed, 61 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index 89c0cfa9719f..6b797e301e3f 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -541,6 +541,14 @@ static void macb_tx_unmap(struct macb *bp, struct macb_tx_skb *tx_skb) } } +static inline void macb_set_addr(struct macb_dma_desc *desc, dma_addr_t addr) +{ + desc->addr = (u32)addr; +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + desc->addrh = (u32)(addr >> 32); +#endif +} + static void macb_tx_error_task(struct work_struct *work) { struct macb_queue *queue = container_of(work, struct macb_queue, @@ -621,14 +629,17 @@ static void macb_tx_error_task(struct work_struct *work) /* Set end of TX queue */ desc = macb_tx_desc(queue, 0); - desc->addr = 0; + macb_set_addr(desc, 0); desc->ctrl = MACB_BIT(TX_USED); /* Make descriptor updates visible to hardware */ wmb(); /* Reinitialize the TX desc queue */ - queue_writel(queue, TBQP, queue->tx_ring_dma); + queue_writel(queue, TBQP, (u32)(queue->tx_ring_dma)); +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + queue_writel(queue, TBQPH, (u32)(queue->tx_ring_dma >> 32)); +#endif /* Make TX ring reflect state of hardware */ queue->tx_head = 0; queue->tx_tail = 0; @@ -750,7 +761,7 @@ static void gem_rx_refill(struct macb *bp) if (entry == RX_RING_SIZE - 1) paddr |= MACB_BIT(RX_WRAP); - bp->rx_ring[entry].addr = paddr; + macb_set_addr(&(bp->rx_ring[entry]), paddr); bp->rx_ring[entry].ctrl = 0; /* properly align Ethernet header */ @@ -798,7 +809,9 @@ static int gem_rx(struct macb *bp, int budget) int count = 0; while (count < budget) { - u32 addr, ctrl; + u32 ctrl; + dma_addr_t addr; + bool rxused; entry = macb_rx_ring_wrap(bp->rx_tail); desc = &bp->rx_ring[entry]; @@ -806,10 +819,14 @@ static int gem_rx(struct macb *bp, int budget) /* Make hw descriptor updates visible to CPU */ rmb(); - addr = desc->addr; + rxused = (desc->addr & MACB_BIT(RX_USED)) ? true : false; + addr = MACB_BF(RX_WADDR, MACB_BFEXT(RX_WADDR, desc->addr)); +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + addr |= ((u64)(desc->addrh) << 32); +#endif ctrl = desc->ctrl; - if (!(addr & MACB_BIT(RX_USED))) + if (!rxused) break; bp->rx_tail++; @@ -835,7 +852,6 @@ static int gem_rx(struct macb *bp, int budget) netdev_vdbg(bp->dev, "gem_rx %u (len %u)\n", entry, len); skb_put(skb, len); - addr = MACB_BF(RX_WADDR, MACB_BFEXT(RX_WADDR, addr)); dma_unmap_single(&bp->pdev->dev, addr, bp->rx_buffer_size, DMA_FROM_DEVICE); @@ -1299,7 +1315,7 @@ static unsigned int macb_tx_map(struct macb *bp, ctrl |= MACB_BIT(TX_WRAP); /* Set TX buffer descriptor */ - desc->addr = tx_skb->mapping; + macb_set_addr(desc, tx_skb->mapping); /* desc->addr must be visible to hardware before clearing * 'TX_USED' bit in desc->ctrl. */ @@ -1422,6 +1438,9 @@ static void gem_free_rx_buffers(struct macb *bp) desc = &bp->rx_ring[i]; addr = MACB_BF(RX_WADDR, MACB_BFEXT(RX_WADDR, desc->addr)); +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + addr |= ((u64)(desc->addrh) << 32); +#endif dma_unmap_single(&bp->pdev->dev, addr, bp->rx_buffer_size, DMA_FROM_DEVICE); dev_kfree_skb_any(skb); @@ -1547,7 +1566,7 @@ static void gem_init_rings(struct macb *bp) for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { for (i = 0; i < TX_RING_SIZE; i++) { - queue->tx_ring[i].addr = 0; + macb_set_addr(&(queue->tx_ring[i]), 0); queue->tx_ring[i].ctrl = MACB_BIT(TX_USED); } queue->tx_ring[TX_RING_SIZE - 1].ctrl |= MACB_BIT(TX_WRAP); @@ -1694,6 +1713,10 @@ static void macb_configure_dma(struct macb *bp) dmacfg |= GEM_BIT(TXCOEN); else dmacfg &= ~GEM_BIT(TXCOEN); + +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + dmacfg |= GEM_BIT(ADDR64); +#endif netdev_dbg(bp->dev, "Cadence configure DMA with 0x%08x\n", dmacfg); gem_writel(bp, DMACFG, dmacfg); @@ -1739,9 +1762,15 @@ static void macb_init_hw(struct macb *bp) macb_configure_dma(bp); /* Initialize TX and RX buffers */ - macb_writel(bp, RBQP, bp->rx_ring_dma); + macb_writel(bp, RBQP, (u32)(bp->rx_ring_dma)); +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + macb_writel(bp, RBQPH, (u32)(bp->rx_ring_dma >> 32)); +#endif for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { - queue_writel(queue, TBQP, queue->tx_ring_dma); + queue_writel(queue, TBQP, (u32)(queue->tx_ring_dma)); +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + queue_writel(queue, TBQPH, (u32)(queue->tx_ring_dma >> 32)); +#endif /* Enable interrupts */ queue_writel(queue, IER, @@ -2379,6 +2408,9 @@ static int macb_init(struct platform_device *pdev) queue->IDR = GEM_IDR(hw_q - 1); queue->IMR = GEM_IMR(hw_q - 1); queue->TBQP = GEM_TBQP(hw_q - 1); +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + queue->TBQPH = GEM_TBQPH(hw_q -1); +#endif } else { /* queue0 uses legacy registers */ queue->ISR = MACB_ISR; @@ -2386,6 +2418,9 @@ static int macb_init(struct platform_device *pdev) queue->IDR = MACB_IDR; queue->IMR = MACB_IMR; queue->TBQP = MACB_TBQP; +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + queue->TBQPH = MACB_TBQPH; +#endif } /* get irq: here we use the linux queue index, not the hardware @@ -2935,6 +2970,11 @@ static int macb_probe(struct platform_device *pdev) bp->wol |= MACB_WOL_HAS_MAGIC_PACKET; device_init_wakeup(&pdev->dev, bp->wol & MACB_WOL_HAS_MAGIC_PACKET); +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + if (GEM_BFEXT(DBWDEF, gem_readl(bp, DCFG1)) > GEM_DBW32) + dma_set_mask(&pdev->dev, DMA_BIT_MASK(44)); +#endif + spin_lock_init(&bp->lock); /* setup capabilities */ diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index b6fcf10621b6..aa3aeecc3132 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -66,6 +66,8 @@ #define MACB_USRIO 0x00c0 #define MACB_WOL 0x00c4 #define MACB_MID 0x00fc +#define MACB_TBQPH 0x04C8 +#define MACB_RBQPH 0x04D4 /* GEM register offsets. */ #define GEM_NCFGR 0x0004 /* Network Config */ @@ -139,6 +141,7 @@ #define GEM_ISR(hw_q) (0x0400 + ((hw_q) << 2)) #define GEM_TBQP(hw_q) (0x0440 + ((hw_q) << 2)) +#define GEM_TBQPH(hw_q) (0x04C8) #define GEM_RBQP(hw_q) (0x0480 + ((hw_q) << 2)) #define GEM_IER(hw_q) (0x0600 + ((hw_q) << 2)) #define GEM_IDR(hw_q) (0x0620 + ((hw_q) << 2)) @@ -249,6 +252,8 @@ #define GEM_RXBS_SIZE 8 #define GEM_DDRP_OFFSET 24 /* disc_when_no_ahb */ #define GEM_DDRP_SIZE 1 +#define GEM_ADDR64_OFFSET 30 /* Address bus width - 64b or 32b */ +#define GEM_ADDR64_SIZE 1 /* Bitfields in NSR */ @@ -474,6 +479,10 @@ struct macb_dma_desc { u32 addr; u32 ctrl; +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + u32 addrh; + u32 resvd; +#endif }; /* DMA descriptor bitfields */ @@ -777,6 +786,7 @@ struct macb_queue { unsigned int IDR; unsigned int IMR; unsigned int TBQP; + unsigned int TBQPH; unsigned int tx_head, tx_tail; struct macb_dma_desc *tx_ring; From 05b8ad25bc8b045721a93d04cf8dcf6f74b3cd6a Mon Sep 17 00:00:00 2001 From: Adam Barth Date: Wed, 10 Aug 2016 09:45:39 -0700 Subject: [PATCH 0114/1715] samples/bpf: fix bpf_perf_event_output prototype The commit 555c8a8623a3 ("bpf: avoid stack copy and use skb ctx for event output") started using 20 of initially reserved upper 32-bits of 'flags' argument in bpf_perf_event_output(). Adjust corresponding prototype in samples/bpf/bpf_helpers.h Signed-off-by: Adam Barth Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- samples/bpf/bpf_helpers.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h index 217c8d507f2e..cbc52df165b4 100644 --- a/samples/bpf/bpf_helpers.h +++ b/samples/bpf/bpf_helpers.h @@ -37,7 +37,9 @@ static int (*bpf_clone_redirect)(void *ctx, int ifindex, int flags) = (void *) BPF_FUNC_clone_redirect; static int (*bpf_redirect)(int ifindex, int flags) = (void *) BPF_FUNC_redirect; -static int (*bpf_perf_event_output)(void *ctx, void *map, int index, void *data, int size) = +static int (*bpf_perf_event_output)(void *ctx, void *map, + unsigned long long flags, void *data, + int size) = (void *) BPF_FUNC_perf_event_output; static int (*bpf_get_stackid)(void *ctx, void *map, int flags) = (void *) BPF_FUNC_get_stackid; From 9fd0375ad376d1f98f4af929fdbefa141c86b800 Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Wed, 10 Aug 2016 00:04:48 +0200 Subject: [PATCH 0115/1715] net: ethernet: renesas: sh_eth: use phydev from struct net_device The private structure contain a pointer to phydev, but the structure net_device already contain such pointer. So we can remove the pointer phy_dev in the private structure, and update the driver to use the one contained in struct net_device. Signed-off-by: Philippe Reynes Tested-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/sh_eth.c | 29 +++++++++++---------------- drivers/net/ethernet/renesas/sh_eth.h | 1 - 2 files changed, 12 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 799d58d86e6d..901ed361e5d1 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -1723,7 +1723,7 @@ out: static void sh_eth_adjust_link(struct net_device *ndev) { struct sh_eth_private *mdp = netdev_priv(ndev); - struct phy_device *phydev = mdp->phydev; + struct phy_device *phydev = ndev->phydev; int new_state = 0; if (phydev->link) { @@ -1800,22 +1800,19 @@ static int sh_eth_phy_init(struct net_device *ndev) phy_attached_info(phydev); - mdp->phydev = phydev; - return 0; } /* PHY control start function */ static int sh_eth_phy_start(struct net_device *ndev) { - struct sh_eth_private *mdp = netdev_priv(ndev); int ret; ret = sh_eth_phy_init(ndev); if (ret) return ret; - phy_start(mdp->phydev); + phy_start(ndev->phydev); return 0; } @@ -1827,11 +1824,11 @@ static int sh_eth_get_settings(struct net_device *ndev, unsigned long flags; int ret; - if (!mdp->phydev) + if (!ndev->phydev) return -ENODEV; spin_lock_irqsave(&mdp->lock, flags); - ret = phy_ethtool_gset(mdp->phydev, ecmd); + ret = phy_ethtool_gset(ndev->phydev, ecmd); spin_unlock_irqrestore(&mdp->lock, flags); return ret; @@ -1844,7 +1841,7 @@ static int sh_eth_set_settings(struct net_device *ndev, unsigned long flags; int ret; - if (!mdp->phydev) + if (!ndev->phydev) return -ENODEV; spin_lock_irqsave(&mdp->lock, flags); @@ -1852,7 +1849,7 @@ static int sh_eth_set_settings(struct net_device *ndev, /* disable tx and rx */ sh_eth_rcv_snd_disable(ndev); - ret = phy_ethtool_sset(mdp->phydev, ecmd); + ret = phy_ethtool_sset(ndev->phydev, ecmd); if (ret) goto error_exit; @@ -2067,11 +2064,11 @@ static int sh_eth_nway_reset(struct net_device *ndev) unsigned long flags; int ret; - if (!mdp->phydev) + if (!ndev->phydev) return -ENODEV; spin_lock_irqsave(&mdp->lock, flags); - ret = phy_start_aneg(mdp->phydev); + ret = phy_start_aneg(ndev->phydev); spin_unlock_irqrestore(&mdp->lock, flags); return ret; @@ -2408,10 +2405,9 @@ static int sh_eth_close(struct net_device *ndev) sh_eth_dev_exit(ndev); /* PHY Disconnect */ - if (mdp->phydev) { - phy_stop(mdp->phydev); - phy_disconnect(mdp->phydev); - mdp->phydev = NULL; + if (ndev->phydev) { + phy_stop(ndev->phydev); + phy_disconnect(ndev->phydev); } free_irq(ndev->irq, ndev); @@ -2429,8 +2425,7 @@ static int sh_eth_close(struct net_device *ndev) /* ioctl to device function */ static int sh_eth_do_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd) { - struct sh_eth_private *mdp = netdev_priv(ndev); - struct phy_device *phydev = mdp->phydev; + struct phy_device *phydev = ndev->phydev; if (!netif_running(ndev)) return -EINVAL; diff --git a/drivers/net/ethernet/renesas/sh_eth.h b/drivers/net/ethernet/renesas/sh_eth.h index c62380e34a1d..d050f37f3e0f 100644 --- a/drivers/net/ethernet/renesas/sh_eth.h +++ b/drivers/net/ethernet/renesas/sh_eth.h @@ -518,7 +518,6 @@ struct sh_eth_private { /* MII transceiver section. */ u32 phy_id; /* PHY ID */ struct mii_bus *mii_bus; /* MDIO bus control */ - struct phy_device *phydev; /* PHY device control */ int link; phy_interface_t phy_interface; int msg_enable; From f08aff444ae0004c9ae6df3241fc313a5024d375 Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Wed, 10 Aug 2016 00:04:49 +0200 Subject: [PATCH 0116/1715] net: ethernet: renesas: sh_eth: use new api ethtool_{get|set}_link_ksettings The ethtool api {get|set}_settings is deprecated. We move this driver to new api {get|set}_link_ksettings. Signed-off-by: Philippe Reynes Tested-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/sh_eth.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 901ed361e5d1..1f8240aec086 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -1817,8 +1817,8 @@ static int sh_eth_phy_start(struct net_device *ndev) return 0; } -static int sh_eth_get_settings(struct net_device *ndev, - struct ethtool_cmd *ecmd) +static int sh_eth_get_link_ksettings(struct net_device *ndev, + struct ethtool_link_ksettings *cmd) { struct sh_eth_private *mdp = netdev_priv(ndev); unsigned long flags; @@ -1828,14 +1828,14 @@ static int sh_eth_get_settings(struct net_device *ndev, return -ENODEV; spin_lock_irqsave(&mdp->lock, flags); - ret = phy_ethtool_gset(ndev->phydev, ecmd); + ret = phy_ethtool_ksettings_get(ndev->phydev, cmd); spin_unlock_irqrestore(&mdp->lock, flags); return ret; } -static int sh_eth_set_settings(struct net_device *ndev, - struct ethtool_cmd *ecmd) +static int sh_eth_set_link_ksettings(struct net_device *ndev, + const struct ethtool_link_ksettings *cmd) { struct sh_eth_private *mdp = netdev_priv(ndev); unsigned long flags; @@ -1849,11 +1849,11 @@ static int sh_eth_set_settings(struct net_device *ndev, /* disable tx and rx */ sh_eth_rcv_snd_disable(ndev); - ret = phy_ethtool_sset(ndev->phydev, ecmd); + ret = phy_ethtool_ksettings_set(ndev->phydev, cmd); if (ret) goto error_exit; - if (ecmd->duplex == DUPLEX_FULL) + if (cmd->base.duplex == DUPLEX_FULL) mdp->duplex = 1; else mdp->duplex = 0; @@ -2195,8 +2195,6 @@ static int sh_eth_set_ringparam(struct net_device *ndev, } static const struct ethtool_ops sh_eth_ethtool_ops = { - .get_settings = sh_eth_get_settings, - .set_settings = sh_eth_set_settings, .get_regs_len = sh_eth_get_regs_len, .get_regs = sh_eth_get_regs, .nway_reset = sh_eth_nway_reset, @@ -2208,6 +2206,8 @@ static const struct ethtool_ops sh_eth_ethtool_ops = { .get_sset_count = sh_eth_get_sset_count, .get_ringparam = sh_eth_get_ringparam, .set_ringparam = sh_eth_set_ringparam, + .get_link_ksettings = sh_eth_get_link_ksettings, + .set_link_ksettings = sh_eth_set_link_ksettings, }; /* network device open function */ From 3b17fbf87d5dadf123d328ab072334da285748c1 Mon Sep 17 00:00:00 2001 From: Maxim Altshul Date: Mon, 11 Jul 2016 17:15:24 +0300 Subject: [PATCH 0117/1715] mac80211: mesh: Add support for HW RC implementation Mesh HWMP module will be able to rely on the HW RC algorithm if it exists, for path metric calculations. This allows the metric calculation mechanism to calculate a correct metric, based on PER and last TX rate both via HW RC algorithm if it exists or via parameters collected by the SW. Signed-off-by: Maxim Altshul Signed-off-by: Johannes Berg --- net/mac80211/mesh_hwmp.c | 27 +++++++++++++++++++-------- net/mac80211/sta_info.c | 23 +++++++++++++++++++---- net/mac80211/sta_info.h | 2 ++ 3 files changed, 40 insertions(+), 12 deletions(-) diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 8f9c3bde835f..fa7d37cf0351 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -326,22 +326,33 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local, u32 tx_time, estimated_retx; u64 result; - if (sta->mesh->fail_avg >= 100) - return MAX_METRIC; + /* Try to get rate based on HW/SW RC algorithm. + * Rate is returned in units of Kbps, correct this + * to comply with airtime calculation units + * Round up in case we get rate < 100Kbps + */ + rate = DIV_ROUND_UP(sta_get_expected_throughput(sta), 100); - sta_set_rate_info_tx(sta, &sta->tx_stats.last_rate, &rinfo); - rate = cfg80211_calculate_bitrate(&rinfo); - if (WARN_ON(!rate)) - return MAX_METRIC; + if (rate) { + err = 0; + } else { + if (sta->mesh->fail_avg >= 100) + return MAX_METRIC; - err = (sta->mesh->fail_avg << ARITH_SHIFT) / 100; + sta_set_rate_info_tx(sta, &sta->tx_stats.last_rate, &rinfo); + rate = cfg80211_calculate_bitrate(&rinfo); + if (WARN_ON(!rate)) + return MAX_METRIC; + + err = (sta->mesh->fail_avg << ARITH_SHIFT) / 100; + } /* bitrate is in units of 100 Kbps, while we need rate in units of * 1Mbps. This will be corrected on tx_time computation. */ tx_time = (device_constant + 10 * test_frame_len / rate); estimated_retx = ((1 << (2 * ARITH_SHIFT)) / (s_unit - err)); - result = (tx_time * estimated_retx) >> (2 * ARITH_SHIFT) ; + result = (tx_time * estimated_retx) >> (2 * ARITH_SHIFT); return (u32)result; } diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 76b737dcc36f..d1cba819e19a 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -2279,16 +2279,31 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_TDLS_PEER); + thr = sta_get_expected_throughput(sta); + + if (thr != 0) { + sinfo->filled |= BIT(NL80211_STA_INFO_EXPECTED_THROUGHPUT); + sinfo->expected_throughput = thr; + } +} + +u32 sta_get_expected_throughput(struct sta_info *sta) +{ + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_local *local = sdata->local; + struct rate_control_ref *ref = NULL; + u32 thr = 0; + + if (test_sta_flag(sta, WLAN_STA_RATE_CONTROL)) + ref = local->rate_ctrl; + /* check if the driver has a SW RC implementation */ if (ref && ref->ops->get_expected_throughput) thr = ref->ops->get_expected_throughput(sta->rate_ctrl_priv); else thr = drv_get_expected_throughput(local, &sta->sta); - if (thr != 0) { - sinfo->filled |= BIT(NL80211_STA_INFO_EXPECTED_THROUGHPUT); - sinfo->expected_throughput = thr; - } + return thr; } unsigned long ieee80211_sta_last_active(struct sta_info *sta) diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 78b0ef32dddd..0556be3e3628 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -712,6 +712,8 @@ void sta_set_rate_info_tx(struct sta_info *sta, struct rate_info *rinfo); void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo); +u32 sta_get_expected_throughput(struct sta_info *sta); + void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, unsigned long exp_time); u8 sta_info_tx_streams(struct sta_info *sta); From eae4430ee7c5ea1152400cfc070f3746d41fa134 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 13 Jul 2016 11:00:02 +0200 Subject: [PATCH 0118/1715] mac80211: remove skb header offset mangling in ieee80211_build_hdr Since the code only touches the MAC headers, the offsets to the network/transport headers remain the same throughout this function. Remove pointless pieces of code that try to 'preserve' them. Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 28 ++-------------------------- 1 file changed, 2 insertions(+), 26 deletions(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 502396694f47..1d0746dfea57 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2334,7 +2334,6 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, struct mesh_path __maybe_unused *mppath = NULL, *mpath = NULL; const u8 *encaps_data; int encaps_len, skip_header_bytes; - int nh_pos, h_pos; bool wme_sta = false, authorized = false; bool tdls_peer; bool multicast; @@ -2640,13 +2639,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, encaps_len = 0; } - nh_pos = skb_network_header(skb) - skb->data; - h_pos = skb_transport_header(skb) - skb->data; - skb_pull(skb, skip_header_bytes); - nh_pos -= skip_header_bytes; - h_pos -= skip_header_bytes; - head_need = hdrlen + encaps_len + meshhdrlen - skb_headroom(skb); /* @@ -2672,18 +2665,12 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, } } - if (encaps_data) { + if (encaps_data) memcpy(skb_push(skb, encaps_len), encaps_data, encaps_len); - nh_pos += encaps_len; - h_pos += encaps_len; - } #ifdef CONFIG_MAC80211_MESH - if (meshhdrlen > 0) { + if (meshhdrlen > 0) memcpy(skb_push(skb, meshhdrlen), &mesh_hdr, meshhdrlen); - nh_pos += meshhdrlen; - h_pos += meshhdrlen; - } #endif if (ieee80211_is_data_qos(fc)) { @@ -2699,15 +2686,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, } else memcpy(skb_push(skb, hdrlen), &hdr, hdrlen); - nh_pos += hdrlen; - h_pos += hdrlen; - - /* Update skb pointers to various headers since this modified frame - * is going to go through Linux networking code that may potentially - * need things like pointer to IP header. */ skb_reset_mac_header(skb); - skb_set_network_header(skb, nh_pos); - skb_set_transport_header(skb, h_pos); info = IEEE80211_SKB_CB(skb); memset(info, 0, sizeof(*info)); @@ -4390,9 +4369,6 @@ void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata, int ac = ieee802_1d_to_ac[tid & 7]; skb_reset_mac_header(skb); - skb_reset_network_header(skb); - skb_reset_transport_header(skb); - skb_set_queue_mapping(skb, ac); skb->priority = tid; From 896ff0635a312022c91e2bef30c80abc27af62e8 Mon Sep 17 00:00:00 2001 From: Denis Kenzior Date: Wed, 3 Aug 2016 16:58:33 -0500 Subject: [PATCH 0119/1715] cfg80211: always notify userspace of new wireless netdevs This change alters the semantics of NL80211_CMD_NEW_INTERFACE events by always sending this event whenever a new net_device object associated with a wdev is registered. Prior to this change, this event was only sent as a result of NL80211_CMD_NEW_INTERFACE command sent from userspace. This allows userspace to reliably detect new wireless interfaces (e.g. due to hardware hot-plug events, etc). For wdevs created without an associated net_device object (e.g. NL80211_IFTYPE_P2P_DEVICE), the NL80211_CMD_NEW_INTERFACE event is still generated inside the relevant nl80211 command handler. Signed-off-by: Denis Kenzior Signed-off-by: Johannes Berg --- net/wireless/core.c | 2 ++ net/wireless/nl80211.c | 46 +++++++++++++++++++++++++++++------------- net/wireless/nl80211.h | 3 +++ 3 files changed, 37 insertions(+), 14 deletions(-) diff --git a/net/wireless/core.c b/net/wireless/core.c index 7645e97362c0..7758c0fe781a 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1079,6 +1079,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, wdev->iftype == NL80211_IFTYPE_P2P_CLIENT || wdev->iftype == NL80211_IFTYPE_ADHOC) && !wdev->use_4addr) dev->priv_flags |= IFF_DONT_BRIDGE; + + nl80211_notify_iface(rdev, wdev, NL80211_CMD_NEW_INTERFACE); break; case NETDEV_GOING_DOWN: cfg80211_leave(rdev, wdev); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index f02653a08993..a8c062dbd51d 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2751,7 +2751,7 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct vif_params params; struct wireless_dev *wdev; - struct sk_buff *msg, *event; + struct sk_buff *msg; int err; enum nl80211_iftype type = NL80211_IFTYPE_UNSPECIFIED; u32 flags; @@ -2855,20 +2855,15 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) return -ENOBUFS; } - event = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (event) { - if (nl80211_send_iface(event, 0, 0, 0, - rdev, wdev, false) < 0) { - nlmsg_free(event); - goto out; - } + /* + * For wdevs which have no associated netdev object (e.g. of type + * NL80211_IFTYPE_P2P_DEVICE), emit the NEW_INTERFACE event here. + * For all other types, the event will be generated from the + * netdev notifier + */ + if (!wdev->netdev) + nl80211_notify_iface(rdev, wdev, NL80211_CMD_NEW_INTERFACE); - genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), - event, 0, NL80211_MCGRP_CONFIG, - GFP_KERNEL); - } - -out: return genlmsg_reply(msg, info); } @@ -11847,6 +11842,29 @@ void nl80211_notify_wiphy(struct cfg80211_registered_device *rdev, NL80211_MCGRP_CONFIG, GFP_KERNEL); } +void nl80211_notify_iface(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, + enum nl80211_commands cmd) +{ + struct sk_buff *msg; + + WARN_ON(cmd != NL80211_CMD_NEW_INTERFACE && + cmd != NL80211_CMD_DEL_INTERFACE); + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return; + + if (nl80211_send_iface(msg, 0, 0, 0, rdev, wdev, + cmd == NL80211_CMD_DEL_INTERFACE) < 0) { + nlmsg_free(msg); + return; + } + + genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, + NL80211_MCGRP_CONFIG, GFP_KERNEL); +} + static int nl80211_add_scan_req(struct sk_buff *msg, struct cfg80211_registered_device *rdev) { diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index a63f402b10b7..7e3821d7fcc5 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -7,6 +7,9 @@ int nl80211_init(void); void nl80211_exit(void); void nl80211_notify_wiphy(struct cfg80211_registered_device *rdev, enum nl80211_commands cmd); +void nl80211_notify_iface(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, + enum nl80211_commands cmd); void nl80211_send_scan_start(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); struct sk_buff *nl80211_build_scan_msg(struct cfg80211_registered_device *rdev, From 7f8ed01ea5d4d9d4acc3bb046de1fc84ac83a5e2 Mon Sep 17 00:00:00 2001 From: Denis Kenzior Date: Wed, 3 Aug 2016 16:58:35 -0500 Subject: [PATCH 0120/1715] cfg80211: always notify userspace when wireless netdev is removed This change alters the semantics of NL80211_CMD_DEL_INTERFACE events by always sending this event whenever a net_device object associated with a wdev is destroyed. Prior to this change, this event was only emitted as a result of NL80211_CMD_DEL_INTERFACE command sent from userspace. This allows userspace to reliably detect when wireless interfaces have been removed, e.g. due to USB removal events, etc. For wireless device objects without an associated net_device (e.g. NL80211_IFTYPE_P2P_DEVICE), the NL80211_CMD_DEL_INTERFACE event is now generated inside cfg80211_unregister_wdev. Signed-off-by: Denis Kenzior Signed-off-by: Johannes Berg --- net/wireless/core.c | 4 ++++ net/wireless/nl80211.c | 18 +----------------- 2 files changed, 5 insertions(+), 17 deletions(-) diff --git a/net/wireless/core.c b/net/wireless/core.c index 7758c0fe781a..2029b49a1df3 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -906,6 +906,8 @@ void cfg80211_unregister_wdev(struct wireless_dev *wdev) if (WARN_ON(wdev->netdev)) return; + nl80211_notify_iface(rdev, wdev, NL80211_CMD_DEL_INTERFACE); + list_del_rcu(&wdev->list); rdev->devlist_generation++; @@ -1159,6 +1161,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, * remove and clean it up. */ if (!list_empty(&wdev->list)) { + nl80211_notify_iface(rdev, wdev, + NL80211_CMD_DEL_INTERFACE); sysfs_remove_link(&dev->dev.kobj, "phy80211"); list_del_rcu(&wdev->list); rdev->devlist_generation++; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index a8c062dbd51d..0560870fc69d 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2871,18 +2871,10 @@ static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = info->user_ptr[1]; - struct sk_buff *msg; - int status; if (!rdev->ops->del_virtual_intf) return -EOPNOTSUPP; - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (msg && nl80211_send_iface(msg, 0, 0, 0, rdev, wdev, true) < 0) { - nlmsg_free(msg); - msg = NULL; - } - /* * If we remove a wireless device without a netdev then clear * user_ptr[1] so that nl80211_post_doit won't dereference it @@ -2893,15 +2885,7 @@ static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info) if (!wdev->netdev) info->user_ptr[1] = NULL; - status = rdev_del_virtual_intf(rdev, wdev); - if (status >= 0 && msg) - genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), - msg, 0, NL80211_MCGRP_CONFIG, - GFP_KERNEL); - else - nlmsg_free(msg); - - return status; + return rdev_del_virtual_intf(rdev, wdev); } static int nl80211_set_noack_map(struct sk_buff *skb, struct genl_info *info) From 12d20fc9186a742d40e824f575df5aa62be31d69 Mon Sep 17 00:00:00 2001 From: Purushottam Kushwaha Date: Thu, 11 Aug 2016 15:14:02 +0530 Subject: [PATCH 0121/1715] cfg80211: identically validate beacon interval for AP/MESH/IBSS Beacon interval interface combinations validation was missing for MESH/IBSS join, add those. Johannes: also move the beacon interval check disallowing really tiny and really big intervals into the common function, which adds it for AP mode. Signed-off-by: Purushottam Kushwaha Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 16 +++++++++------- net/wireless/util.c | 2 +- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 0560870fc69d..d36c40a4d832 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -7752,12 +7752,13 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) ibss.beacon_interval = 100; - if (info->attrs[NL80211_ATTR_BEACON_INTERVAL]) { + if (info->attrs[NL80211_ATTR_BEACON_INTERVAL]) ibss.beacon_interval = nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]); - if (ibss.beacon_interval < 1 || ibss.beacon_interval > 10000) - return -EINVAL; - } + + err = cfg80211_validate_beacon_int(rdev, ibss.beacon_interval); + if (err) + return err; if (!rdev->ops->join_ibss) return -EOPNOTSUPP; @@ -9231,9 +9232,10 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_BEACON_INTERVAL]) { setup.beacon_interval = nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]); - if (setup.beacon_interval < 10 || - setup.beacon_interval > 10000) - return -EINVAL; + + err = cfg80211_validate_beacon_int(rdev, setup.beacon_interval); + if (err) + return err; } if (info->attrs[NL80211_ATTR_DTIM_PERIOD]) { diff --git a/net/wireless/util.c b/net/wireless/util.c index b7d1592bd5b8..0675f513e7b9 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1559,7 +1559,7 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev; int res = 0; - if (!beacon_int) + if (beacon_int < 10 || beacon_int > 10000) return -EINVAL; list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { From 4fdbc67a25ce577b79b3af595e874e9ef921329f Mon Sep 17 00:00:00 2001 From: Maxim Altshul Date: Thu, 11 Aug 2016 13:38:16 +0300 Subject: [PATCH 0122/1715] mac80211: call get_expected_throughput only after adding station Depending on which method the driver implements, userspace could call this (indirectly, by getting station info) before the driver knows about the station, possibly causing it to misbehave. Therefore, add a check for sta->uploaded which indicates that the driver knows about the station. Signed-off-by: Maxim Altshul [reword commit message] Signed-off-by: Johannes Berg --- net/mac80211/driver-ops.h | 8 ++++---- net/mac80211/sta_info.c | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index ba5fc1f01e53..42a41ae405ba 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -1088,13 +1088,13 @@ static inline void drv_leave_ibss(struct ieee80211_local *local, } static inline u32 drv_get_expected_throughput(struct ieee80211_local *local, - struct ieee80211_sta *sta) + struct sta_info *sta) { u32 ret = 0; - trace_drv_get_expected_throughput(sta); - if (local->ops->get_expected_throughput) - ret = local->ops->get_expected_throughput(&local->hw, sta); + trace_drv_get_expected_throughput(&sta->sta); + if (local->ops->get_expected_throughput && sta->uploaded) + ret = local->ops->get_expected_throughput(&local->hw, &sta->sta); trace_drv_return_u32(local, ret); return ret; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index d1cba819e19a..19f14c907d74 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -2301,7 +2301,7 @@ u32 sta_get_expected_throughput(struct sta_info *sta) if (ref && ref->ops->get_expected_throughput) thr = ref->ops->get_expected_throughput(sta->rate_ctrl_priv); else - thr = drv_get_expected_throughput(local, &sta->sta); + thr = drv_get_expected_throughput(local, sta); return thr; } From ff9a71afc948f609c4eaba47cd6788926e7cc105 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 11 Aug 2016 14:59:53 +0200 Subject: [PATCH 0123/1715] nl80211: explicitly check enum nl80211_mesh_power_mode Different gcc versions appear to be treating enum with different signedness, causing warnings with the out parameter one way or the other. Just use the correct type to avoid all that. Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d36c40a4d832..499785778983 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5353,6 +5353,18 @@ static int nl80211_check_s32(const struct nlattr *nla, s32 min, s32 max, s32 *ou return 0; } +static int nl80211_check_power_mode(const struct nlattr *nla, + enum nl80211_mesh_power_mode min, + enum nl80211_mesh_power_mode max, + enum nl80211_mesh_power_mode *out) +{ + u32 val = nla_get_u32(nla); + if (val < min || val > max) + return -EINVAL; + *out = val; + return 0; +} + static int nl80211_parse_mesh_config(struct genl_info *info, struct mesh_config *cfg, u32 *mask_out) @@ -5497,7 +5509,7 @@ do { \ NL80211_MESH_POWER_ACTIVE, NL80211_MESH_POWER_MAX, mask, NL80211_MESHCONF_POWER_MODE, - nl80211_check_u32); + nl80211_check_power_mode); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshAwakeWindowDuration, 0, 65535, mask, NL80211_MESHCONF_AWAKE_WINDOW, nl80211_check_u16); From e5e693ab49a95e1994979972eea224eefa81eba9 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Sat, 23 Jul 2016 19:21:47 +0800 Subject: [PATCH 0124/1715] netfilter: conntrack: Only need first 4 bytes to get l4proto ports We only need first 4 bytes instead of 8 bytes to get the ports of tcp/udp/dccp/sctp/udplite in their pkt_to_tuple function. Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto_dccp.c | 3 ++- net/netfilter/nf_conntrack_proto_sctp.c | 4 ++-- net/netfilter/nf_conntrack_proto_tcp.c | 4 ++-- net/netfilter/nf_conntrack_proto_udp.c | 4 ++-- net/netfilter/nf_conntrack_proto_udplite.c | 3 ++- 5 files changed, 10 insertions(+), 8 deletions(-) diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 399a38fd685a..a45bee52dccc 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -402,7 +402,8 @@ static bool dccp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, { struct dccp_hdr _hdr, *dh; - dh = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); + /* Actually only need first 4 bytes to get ports. */ + dh = skb_header_pointer(skb, dataoff, 4, &_hdr); if (dh == NULL) return false; diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 1d7ab960a9e6..e769f0561621 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -161,8 +161,8 @@ static bool sctp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, const struct sctphdr *hp; struct sctphdr _hdr; - /* Actually only need first 8 bytes. */ - hp = skb_header_pointer(skb, dataoff, 8, &_hdr); + /* Actually only need first 4 bytes to get ports. */ + hp = skb_header_pointer(skb, dataoff, 4, &_hdr); if (hp == NULL) return false; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 70c8381641a7..4abe9e1f8909 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -282,8 +282,8 @@ static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, const struct tcphdr *hp; struct tcphdr _hdr; - /* Actually only need first 8 bytes. */ - hp = skb_header_pointer(skb, dataoff, 8, &_hdr); + /* Actually only need first 4 bytes to get ports. */ + hp = skb_header_pointer(skb, dataoff, 4, &_hdr); if (hp == NULL) return false; diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 4fd040575ffe..8a057e1e1247 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -44,8 +44,8 @@ static bool udp_pkt_to_tuple(const struct sk_buff *skb, const struct udphdr *hp; struct udphdr _hdr; - /* Actually only need first 8 bytes. */ - hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); + /* Actually only need first 4 bytes to get ports. */ + hp = skb_header_pointer(skb, dataoff, 4, &_hdr); if (hp == NULL) return false; diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 9d692f5adb94..029206e8dec4 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -54,7 +54,8 @@ static bool udplite_pkt_to_tuple(const struct sk_buff *skb, const struct udphdr *hp; struct udphdr _hdr; - hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); + /* Actually only need first 4 bytes to get ports. */ + hp = skb_header_pointer(skb, dataoff, 4, &_hdr); if (hp == NULL) return false; From ceee4091d622790a7a06e5b202670ef9fdfe8c79 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Mon, 25 Jul 2016 15:24:43 +0800 Subject: [PATCH 0125/1715] netfilter: physdev: add missed blank Signed-off-by: Hangbin Liu Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_physdev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index e5f18988aee0..bb33598e4530 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -107,8 +107,8 @@ static int physdev_mt_check(const struct xt_mtchk_param *par) info->invert & XT_PHYSDEV_OP_BRIDGED) && par->hook_mask & ((1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) | (1 << NF_INET_POST_ROUTING))) { - pr_info("using --physdev-out and --physdev-is-out are only" - "supported in the FORWARD and POSTROUTING chains with" + pr_info("using --physdev-out and --physdev-is-out are only " + "supported in the FORWARD and POSTROUTING chains with " "bridged traffic.\n"); if (par->hook_mask & (1 << NF_INET_LOCAL_OUT)) return -EINVAL; From 9f7c824a44c9f03a5ee9b6291b0685cbdb89ae58 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 30 Jul 2016 19:44:12 +0800 Subject: [PATCH 0126/1715] netfilter: nf_dup4: remove redundant checksum recalculation IP header checksum will be recalculated at ip_local_out, so there's no need to calculated it here, remove it. Also update code comments to illustrate it, and delete the misleading comments about checksum recalculation. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_dup_ipv4.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/net/ipv4/netfilter/nf_dup_ipv4.c b/net/ipv4/netfilter/nf_dup_ipv4.c index ceb187308120..cf986e1c7bbd 100644 --- a/net/ipv4/netfilter/nf_dup_ipv4.c +++ b/net/ipv4/netfilter/nf_dup_ipv4.c @@ -74,21 +74,19 @@ void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum, nf_conntrack_get(skb->nfct); #endif /* - * If we are in PREROUTING/INPUT, the checksum must be recalculated - * since the length could have changed as a result of defragmentation. - * - * We also decrease the TTL to mitigate potential loops between two - * hosts. + * If we are in PREROUTING/INPUT, decrease the TTL to mitigate potential + * loops between two hosts. * * Set %IP_DF so that the original source is notified of a potentially * decreased MTU on the clone route. IPv6 does this too. + * + * IP header checksum will be recalculated at ip_local_out. */ iph = ip_hdr(skb); iph->frag_off |= htons(IP_DF); if (hooknum == NF_INET_PRE_ROUTING || hooknum == NF_INET_LOCAL_IN) --iph->ttl; - ip_send_check(iph); if (nf_dup_ipv4_route(net, skb, gw, oif)) { __this_cpu_write(nf_skb_duplicated, true); From d0b35b93d45cedf1dc561aba4027441b3d28d290 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 3 Aug 2016 02:45:07 +0200 Subject: [PATCH 0127/1715] netfilter: use_nf_conn_expires helper in more places ... so we don't need to touch all of these places when we get rid of the timer in nf_conn. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c | 3 +-- net/netfilter/nf_conntrack_netlink.c | 5 +---- net/netfilter/nf_conntrack_standalone.c | 3 +-- net/netfilter/xt_conntrack.c | 4 +--- 4 files changed, 4 insertions(+), 11 deletions(-) diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 63923710f325..67bfc69e00bc 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -163,8 +163,7 @@ static int ct_seq_show(struct seq_file *s, void *v) ret = -ENOSPC; seq_printf(s, "%-8s %u %ld ", l4proto->name, nf_ct_protonum(ct), - timer_pending(&ct->timeout) - ? (long)(ct->timeout.expires - jiffies)/HZ : 0); + nf_ct_expires(ct) / HZ); if (l4proto->print_conntrack) l4proto->print_conntrack(s, ct); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 050bb3420a6b..68800c10a320 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -149,10 +149,7 @@ nla_put_failure: static int ctnetlink_dump_timeout(struct sk_buff *skb, const struct nf_conn *ct) { - long timeout = ((long)ct->timeout.expires - (long)jiffies) / HZ; - - if (timeout < 0) - timeout = 0; + long timeout = nf_ct_expires(ct) / HZ; if (nla_put_be32(skb, CTA_TIMEOUT, htonl(timeout))) goto nla_put_failure; diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 958a1455ca7f..4e7becde4357 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -224,8 +224,7 @@ static int ct_seq_show(struct seq_file *s, void *v) seq_printf(s, "%-8s %u %-8s %u %ld ", l3proto->name, nf_ct_l3num(ct), l4proto->name, nf_ct_protonum(ct), - timer_pending(&ct->timeout) - ? (long)(ct->timeout.expires - jiffies)/HZ : 0); + nf_ct_expires(ct) / HZ); if (l4proto->print_conntrack) l4proto->print_conntrack(s, ct); diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index 188404b9b002..a3b8f697cfc5 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -233,10 +233,8 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par, return false; if (info->match_flags & XT_CONNTRACK_EXPIRES) { - unsigned long expires = 0; + unsigned long expires = nf_ct_expires(ct) / HZ; - if (timer_pending(&ct->timeout)) - expires = (ct->timeout.expires - jiffies) / HZ; if ((expires >= info->expires_min && expires <= info->expires_max) ^ !(info->invert_flags & XT_CONNTRACK_EXPIRES)) From a6c46d9bc9d8ca6e30e681aadd30b73c44434b7d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 3 Aug 2016 15:21:28 +0200 Subject: [PATCH 0128/1715] ipvs: use nf_ct_kill helper Once timer is removed from nf_conn struct we cannot open-code the removal sequence anymore. Signed-off-by: Florian Westphal Acked-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_nfct.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c index f04fd8df210b..fc230d99aa3b 100644 --- a/net/netfilter/ipvs/ip_vs_nfct.c +++ b/net/netfilter/ipvs/ip_vs_nfct.c @@ -281,13 +281,10 @@ void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp) h = nf_conntrack_find_get(cp->ipvs->net, &nf_ct_zone_dflt, &tuple); if (h) { ct = nf_ct_tuplehash_to_ctrack(h); - /* Show what happens instead of calling nf_ct_kill() */ - if (del_timer(&ct->timeout)) { - IP_VS_DBG(7, "%s: ct=%p, deleted conntrack timer for tuple=" + if (nf_ct_kill(ct)) { + IP_VS_DBG(7, "%s: ct=%p, deleted conntrack for tuple=" FMT_TUPLE "\n", __func__, ct, ARG_TUPLE(&tuple)); - if (ct->timeout.function) - ct->timeout.function(ct->timeout.data); } else { IP_VS_DBG(7, "%s: ct=%p, no conntrack timer for tuple=" FMT_TUPLE "\n", From 0ed6389c483dc77cdbdd48de0ca7ce41723dd667 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 9 Aug 2016 16:11:46 +0200 Subject: [PATCH 0129/1715] netfilter: nf_tables: rename set implementations Use nft_set_* prefix for backend set implementations, thus we can use nft_hash for the new hash expression. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/Kconfig | 4 ++-- net/netfilter/Makefile | 4 ++-- net/netfilter/{nft_hash.c => nft_set_hash.c} | 0 net/netfilter/{nft_rbtree.c => nft_set_rbtree.c} | 0 4 files changed, 4 insertions(+), 4 deletions(-) rename net/netfilter/{nft_hash.c => nft_set_hash.c} (100%) rename net/netfilter/{nft_rbtree.c => nft_set_rbtree.c} (100%) diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 9266ceebd112..e5740e108a0b 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -481,13 +481,13 @@ config NFT_CT This option adds the "meta" expression that you can use to match connection tracking information such as the flow state. -config NFT_RBTREE +config NFT_SET_RBTREE tristate "Netfilter nf_tables rbtree set module" help This option adds the "rbtree" set type (Red Black tree) that is used to build interval-based sets. -config NFT_HASH +config NFT_SET_HASH tristate "Netfilter nf_tables hash set module" help This option adds the "hash" set type that is used to build one-way diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 69134541d65b..101fb859203c 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -86,8 +86,8 @@ obj-$(CONFIG_NFT_NAT) += nft_nat.o obj-$(CONFIG_NFT_QUEUE) += nft_queue.o obj-$(CONFIG_NFT_REJECT) += nft_reject.o obj-$(CONFIG_NFT_REJECT_INET) += nft_reject_inet.o -obj-$(CONFIG_NFT_RBTREE) += nft_rbtree.o -obj-$(CONFIG_NFT_HASH) += nft_hash.o +obj-$(CONFIG_NFT_SET_RBTREE) += nft_set_rbtree.o +obj-$(CONFIG_NFT_SET_HASH) += nft_set_hash.o obj-$(CONFIG_NFT_COUNTER) += nft_counter.o obj-$(CONFIG_NFT_LOG) += nft_log.o obj-$(CONFIG_NFT_MASQ) += nft_masq.o diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_set_hash.c similarity index 100% rename from net/netfilter/nft_hash.c rename to net/netfilter/nft_set_hash.c diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_set_rbtree.c similarity index 100% rename from net/netfilter/nft_rbtree.c rename to net/netfilter/nft_set_rbtree.c From a5eefc1df641f3c99fe54b309e7b79c18cec4a1e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 11 Aug 2016 15:17:52 +0200 Subject: [PATCH 0130/1715] xfrm: policy: use rcu versions for iteration and list add/del This is required once we allow lockless readers. Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index b5e665b3cfb0..93b8ff74001f 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -426,14 +426,14 @@ redo: h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr, pol->family, nhashmask, dbits, sbits); if (!entry0) { - hlist_del(&pol->bydst); - hlist_add_head(&pol->bydst, ndsttable+h); + hlist_del_rcu(&pol->bydst); + hlist_add_head_rcu(&pol->bydst, ndsttable + h); h0 = h; } else { if (h != h0) continue; - hlist_del(&pol->bydst); - hlist_add_behind(&pol->bydst, entry0); + hlist_del_rcu(&pol->bydst); + hlist_add_behind_rcu(&pol->bydst, entry0); } entry0 = &pol->bydst; } @@ -1106,7 +1106,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, read_lock_bh(&net->xfrm.xfrm_policy_lock); chain = policy_hash_direct(net, daddr, saddr, family, dir); ret = NULL; - hlist_for_each_entry(pol, chain, bydst) { + hlist_for_each_entry_rcu(pol, chain, bydst) { err = xfrm_policy_match(pol, fl, type, family, dir); if (err) { if (err == -ESRCH) @@ -1122,7 +1122,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, } } chain = &net->xfrm.policy_inexact[dir]; - hlist_for_each_entry(pol, chain, bydst) { + hlist_for_each_entry_rcu(pol, chain, bydst) { if ((pol->priority >= priority) && ret) break; @@ -1271,7 +1271,7 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, /* Socket policies are not hashed. */ if (!hlist_unhashed(&pol->bydst)) { - hlist_del(&pol->bydst); + hlist_del_rcu(&pol->bydst); hlist_del(&pol->byidx); } From e1e551bc56302b80ff930c966f9985095fb1b70d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 11 Aug 2016 15:17:53 +0200 Subject: [PATCH 0131/1715] xfrm: policy: prepare policy_bydst hash for rcu lookups Since commit 56f047305dd4b6b617 ("xfrm: add rcu grace period in xfrm_policy_destroy()") xfrm policy objects are already free'd via rcu. In order to make more places lockless (i.e. use rcu_read_lock instead of grabbing read-side of policy rwlock) we only need to: - use rcu_assign_pointer to store address of new hash table backend memory - add rcu barrier so that freeing of old memory is delayed (expansion and free happens from system workqueue, so synchronize_rcu is fine) - use rcu_dereference to fetch current address of the hash table. Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 93b8ff74001f..4a8d90a88c83 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -385,9 +385,11 @@ static struct hlist_head *policy_hash_bysel(struct net *net, __get_hash_thresh(net, family, dir, &dbits, &sbits); hash = __sel_hash(sel, family, hmask, dbits, sbits); - return (hash == hmask + 1 ? - &net->xfrm.policy_inexact[dir] : - net->xfrm.policy_bydst[dir].table + hash); + if (hash == hmask + 1) + return &net->xfrm.policy_inexact[dir]; + + return rcu_dereference_check(net->xfrm.policy_bydst[dir].table, + lockdep_is_held(&net->xfrm.xfrm_policy_lock)) + hash; } static struct hlist_head *policy_hash_direct(struct net *net, @@ -403,7 +405,8 @@ static struct hlist_head *policy_hash_direct(struct net *net, __get_hash_thresh(net, family, dir, &dbits, &sbits); hash = __addr_hash(daddr, saddr, family, hmask, dbits, sbits); - return net->xfrm.policy_bydst[dir].table + hash; + return rcu_dereference_check(net->xfrm.policy_bydst[dir].table, + lockdep_is_held(&net->xfrm.xfrm_policy_lock)) + hash; } static void xfrm_dst_hash_transfer(struct net *net, @@ -468,8 +471,8 @@ static void xfrm_bydst_resize(struct net *net, int dir) unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; unsigned int nhashmask = xfrm_new_hash_mask(hmask); unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); - struct hlist_head *odst = net->xfrm.policy_bydst[dir].table; struct hlist_head *ndst = xfrm_hash_alloc(nsize); + struct hlist_head *odst; int i; if (!ndst) @@ -477,14 +480,19 @@ static void xfrm_bydst_resize(struct net *net, int dir) write_lock_bh(&net->xfrm.xfrm_policy_lock); + odst = rcu_dereference_protected(net->xfrm.policy_bydst[dir].table, + lockdep_is_held(&net->xfrm.xfrm_policy_lock)); + for (i = hmask; i >= 0; i--) xfrm_dst_hash_transfer(net, odst + i, ndst, nhashmask, dir); - net->xfrm.policy_bydst[dir].table = ndst; + rcu_assign_pointer(net->xfrm.policy_bydst[dir].table, ndst); net->xfrm.policy_bydst[dir].hmask = nhashmask; write_unlock_bh(&net->xfrm.xfrm_policy_lock); + synchronize_rcu(); + xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head)); } From 30846090a746edfdb230deadd638cfa96f7b8c91 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 11 Aug 2016 15:17:54 +0200 Subject: [PATCH 0132/1715] xfrm: policy: add sequence count to sync with hash resize Once xfrm_policy_lookup_bytype doesn't grab xfrm_policy_lock anymore its possible for a hash resize to occur in parallel. Use sequence counter to block lookup in case a resize is in progress and to also re-lookup in case hash table was altered in the mean time (might cause use to not find the best-match). Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 4a8d90a88c83..576d90321068 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -49,6 +49,7 @@ static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO] __read_mostly; static struct kmem_cache *xfrm_dst_cache __read_mostly; +static __read_mostly seqcount_t xfrm_policy_hash_generation; static void xfrm_init_pmtu(struct dst_entry *dst); static int stale_bundle(struct dst_entry *dst); @@ -479,6 +480,10 @@ static void xfrm_bydst_resize(struct net *net, int dir) return; write_lock_bh(&net->xfrm.xfrm_policy_lock); + write_seqcount_begin(&xfrm_policy_hash_generation); + + odst = rcu_dereference_protected(net->xfrm.policy_bydst[dir].table, + lockdep_is_held(&net->xfrm.xfrm_policy_lock)); odst = rcu_dereference_protected(net->xfrm.policy_bydst[dir].table, lockdep_is_held(&net->xfrm.xfrm_policy_lock)); @@ -489,6 +494,7 @@ static void xfrm_bydst_resize(struct net *net, int dir) rcu_assign_pointer(net->xfrm.policy_bydst[dir].table, ndst); net->xfrm.policy_bydst[dir].hmask = nhashmask; + write_seqcount_end(&xfrm_policy_hash_generation); write_unlock_bh(&net->xfrm.xfrm_policy_lock); synchronize_rcu(); @@ -1104,7 +1110,8 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, struct xfrm_policy *pol, *ret; const xfrm_address_t *daddr, *saddr; struct hlist_head *chain; - u32 priority = ~0U; + unsigned int sequence; + u32 priority; daddr = xfrm_flowi_daddr(fl, family); saddr = xfrm_flowi_saddr(fl, family); @@ -1112,7 +1119,13 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, return NULL; read_lock_bh(&net->xfrm.xfrm_policy_lock); - chain = policy_hash_direct(net, daddr, saddr, family, dir); + retry: + do { + sequence = read_seqcount_begin(&xfrm_policy_hash_generation); + chain = policy_hash_direct(net, daddr, saddr, family, dir); + } while (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)); + + priority = ~0U; ret = NULL; hlist_for_each_entry_rcu(pol, chain, bydst) { err = xfrm_policy_match(pol, fl, type, family, dir); @@ -1148,6 +1161,9 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, } } + if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)) + goto retry; + xfrm_pol_hold(ret); fail: read_unlock_bh(&net->xfrm.xfrm_policy_lock); @@ -3090,6 +3106,7 @@ static struct pernet_operations __net_initdata xfrm_net_ops = { void __init xfrm_init(void) { register_pernet_subsys(&xfrm_net_ops); + seqcount_init(&xfrm_policy_hash_generation); xfrm_input_init(); } From e37cc8ade5afaf082f804c6d18eb23377146bec4 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 11 Aug 2016 15:17:55 +0200 Subject: [PATCH 0133/1715] xfrm: policy: use atomic_inc_not_zero in rcu section If we don't hold the policy lock anymore the refcnt might already be 0, i.e. policy struct is about to be free'd. Switch to atomic_inc_not_zero to avoid this. On removal policies are already unlinked from the tables (lists) before the last _put occurs so we are not supposed to find the same 'dead' entry on the next loop, so its safe to just repeat the lookup. Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 576d90321068..09f2e2b38246 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -60,6 +60,11 @@ static void __xfrm_policy_link(struct xfrm_policy *pol, int dir); static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, int dir); +static inline bool xfrm_pol_hold_rcu(struct xfrm_policy *policy) +{ + return atomic_inc_not_zero(&policy->refcnt); +} + static inline bool __xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl) { @@ -1164,7 +1169,8 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)) goto retry; - xfrm_pol_hold(ret); + if (ret && !xfrm_pol_hold_rcu(ret)) + goto retry; fail: read_unlock_bh(&net->xfrm.xfrm_policy_lock); From a7c44247f704e385c77579d65c6ee6d002832529 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 11 Aug 2016 15:17:56 +0200 Subject: [PATCH 0134/1715] xfrm: policy: make xfrm_policy_lookup_bytype lockless side effect: no longer disables BH (should be fine). Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- include/net/netns/xfrm.h | 2 +- net/xfrm/xfrm_policy.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 1ab51d188408..3ab828a97e68 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -11,7 +11,7 @@ struct ctl_table_header; struct xfrm_policy_hash { - struct hlist_head *table; + struct hlist_head __rcu *table; unsigned int hmask; u8 dbits4; u8 sbits4; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 09f2e2b38246..9302647f20a0 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1123,7 +1123,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, if (unlikely(!daddr || !saddr)) return NULL; - read_lock_bh(&net->xfrm.xfrm_policy_lock); + rcu_read_lock(); retry: do { sequence = read_seqcount_begin(&xfrm_policy_hash_generation); @@ -1172,7 +1172,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, if (ret && !xfrm_pol_hold_rcu(ret)) goto retry; fail: - read_unlock_bh(&net->xfrm.xfrm_policy_lock); + rcu_read_unlock(); return ret; } From ae33786f73a7ce5b15ce29e8f342e43606385cef Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 11 Aug 2016 15:17:57 +0200 Subject: [PATCH 0135/1715] xfrm: policy: only use rcu in xfrm_sk_policy_lookup Don't acquire the readlock anymore and rely on rcu alone. In case writer on other CPU changed policy at the wrong moment (after we obtained sk policy pointer but before we could obtain the reference) just repeat the lookup. Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 9302647f20a0..3d27b9a2fbac 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1249,10 +1249,9 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, const struct flowi *fl) { struct xfrm_policy *pol; - struct net *net = sock_net(sk); rcu_read_lock(); - read_lock_bh(&net->xfrm.xfrm_policy_lock); + again: pol = rcu_dereference(sk->sk_policy[dir]); if (pol != NULL) { bool match = xfrm_selector_match(&pol->selector, fl, @@ -1267,8 +1266,8 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, err = security_xfrm_policy_lookup(pol->security, fl->flowi_secid, policy_to_flow_dir(dir)); - if (!err) - xfrm_pol_hold(pol); + if (!err && !xfrm_pol_hold_rcu(pol)) + goto again; else if (err == -ESRCH) pol = NULL; else @@ -1277,7 +1276,6 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, pol = NULL; } out: - read_unlock_bh(&net->xfrm.xfrm_policy_lock); rcu_read_unlock(); return pol; } From d5b8f86dc7200d16e48bb3a6aaac29c0cdf621c9 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 11 Aug 2016 15:17:58 +0200 Subject: [PATCH 0136/1715] xfrm: policy: don't acquire policy lock in xfrm_spd_getinfo It doesn't seem that important. We now get inconsistent view of the counters, but those are stale anyway right after we drop the lock. Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 3d27b9a2fbac..35b85a9a358c 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -560,7 +560,6 @@ static inline int xfrm_byidx_should_resize(struct net *net, int total) void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si) { - read_lock_bh(&net->xfrm.xfrm_policy_lock); si->incnt = net->xfrm.policy_count[XFRM_POLICY_IN]; si->outcnt = net->xfrm.policy_count[XFRM_POLICY_OUT]; si->fwdcnt = net->xfrm.policy_count[XFRM_POLICY_FWD]; @@ -569,7 +568,6 @@ void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si) si->fwdscnt = net->xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX]; si->spdhcnt = net->xfrm.policy_idx_hmask; si->spdhmcnt = xfrm_policy_hashmax; - read_unlock_bh(&net->xfrm.xfrm_policy_lock); } EXPORT_SYMBOL(xfrm_spd_getinfo); From 9d0380df6217e8dd014118fa1c99dda9974f3613 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 11 Aug 2016 15:17:59 +0200 Subject: [PATCH 0137/1715] xfrm: policy: convert policy_lock to spinlock After earlier patches conversions all spots acquire the writer lock and we can now convert this to a normal spinlock. Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- include/net/netns/xfrm.h | 2 +- net/xfrm/xfrm_policy.c | 68 ++++++++++++++++++++-------------------- 2 files changed, 35 insertions(+), 35 deletions(-) diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 3ab828a97e68..177ed444d7b2 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -73,7 +73,7 @@ struct netns_xfrm { struct dst_ops xfrm6_dst_ops; #endif spinlock_t xfrm_state_lock; - rwlock_t xfrm_policy_lock; + spinlock_t xfrm_policy_lock; struct mutex xfrm_cfg_mutex; /* flow cache part */ diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 35b85a9a358c..dd01fd2e55fa 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -484,7 +484,7 @@ static void xfrm_bydst_resize(struct net *net, int dir) if (!ndst) return; - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); write_seqcount_begin(&xfrm_policy_hash_generation); odst = rcu_dereference_protected(net->xfrm.policy_bydst[dir].table, @@ -500,7 +500,7 @@ static void xfrm_bydst_resize(struct net *net, int dir) net->xfrm.policy_bydst[dir].hmask = nhashmask; write_seqcount_end(&xfrm_policy_hash_generation); - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); synchronize_rcu(); @@ -519,7 +519,7 @@ static void xfrm_byidx_resize(struct net *net, int total) if (!nidx) return; - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); for (i = hmask; i >= 0; i--) xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask); @@ -527,7 +527,7 @@ static void xfrm_byidx_resize(struct net *net, int total) net->xfrm.policy_byidx = nidx; net->xfrm.policy_idx_hmask = nhashmask; - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head)); } @@ -617,7 +617,7 @@ static void xfrm_hash_rebuild(struct work_struct *work) rbits6 = net->xfrm.policy_hthresh.rbits6; } while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq)); - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); /* reset the bydst and inexact table in all directions */ for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { @@ -659,7 +659,7 @@ static void xfrm_hash_rebuild(struct work_struct *work) hlist_add_head(&policy->bydst, chain); } - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); mutex_unlock(&hash_resize_mutex); } @@ -770,7 +770,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) struct hlist_head *chain; struct hlist_node *newpos; - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); chain = policy_hash_bysel(net, &policy->selector, policy->family, dir); delpol = NULL; newpos = NULL; @@ -781,7 +781,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) xfrm_sec_ctx_match(pol->security, policy->security) && !WARN_ON(delpol)) { if (excl) { - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return -EEXIST; } delpol = pol; @@ -817,7 +817,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) policy->curlft.use_time = 0; if (!mod_timer(&policy->timer, jiffies + HZ)) xfrm_pol_hold(policy); - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); if (delpol) xfrm_policy_kill(delpol); @@ -837,7 +837,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type, struct hlist_head *chain; *err = 0; - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); chain = policy_hash_bysel(net, sel, sel->family, dir); ret = NULL; hlist_for_each_entry(pol, chain, bydst) { @@ -850,7 +850,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type, *err = security_xfrm_policy_delete( pol->security); if (*err) { - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return pol; } __xfrm_policy_unlink(pol, dir); @@ -859,7 +859,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type, break; } } - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); if (ret && delete) xfrm_policy_kill(ret); @@ -878,7 +878,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type, return NULL; *err = 0; - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); chain = net->xfrm.policy_byidx + idx_hash(net, id); ret = NULL; hlist_for_each_entry(pol, chain, byidx) { @@ -889,7 +889,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type, *err = security_xfrm_policy_delete( pol->security); if (*err) { - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return pol; } __xfrm_policy_unlink(pol, dir); @@ -898,7 +898,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type, break; } } - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); if (ret && delete) xfrm_policy_kill(ret); @@ -956,7 +956,7 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) { int dir, err = 0, cnt = 0; - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); err = xfrm_policy_flush_secctx_check(net, type, task_valid); if (err) @@ -972,14 +972,14 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) if (pol->type != type) continue; __xfrm_policy_unlink(pol, dir); - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); cnt++; xfrm_audit_policy_delete(pol, 1, task_valid); xfrm_policy_kill(pol); - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); goto again1; } @@ -991,13 +991,13 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) if (pol->type != type) continue; __xfrm_policy_unlink(pol, dir); - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); cnt++; xfrm_audit_policy_delete(pol, 1, task_valid); xfrm_policy_kill(pol); - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); goto again2; } } @@ -1006,7 +1006,7 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) if (!cnt) err = -ESRCH; out: - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return err; } EXPORT_SYMBOL(xfrm_policy_flush); @@ -1026,7 +1026,7 @@ int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk, if (list_empty(&walk->walk.all) && walk->seq != 0) return 0; - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); if (list_empty(&walk->walk.all)) x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all); else @@ -1054,7 +1054,7 @@ int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk, } list_del_init(&walk->walk.all); out: - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return error; } EXPORT_SYMBOL(xfrm_policy_walk); @@ -1073,9 +1073,9 @@ void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net) if (list_empty(&walk->walk.all)) return; - write_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME where is net? */ + spin_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME where is net? */ list_del(&walk->walk.all); - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); } EXPORT_SYMBOL(xfrm_policy_walk_done); @@ -1321,9 +1321,9 @@ int xfrm_policy_delete(struct xfrm_policy *pol, int dir) { struct net *net = xp_net(pol); - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); pol = __xfrm_policy_unlink(pol, dir); - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); if (pol) { xfrm_policy_kill(pol); return 0; @@ -1342,7 +1342,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) return -EINVAL; #endif - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); old_pol = rcu_dereference_protected(sk->sk_policy[dir], lockdep_is_held(&net->xfrm.xfrm_policy_lock)); if (pol) { @@ -1360,7 +1360,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) */ xfrm_sk_policy_unlink(old_pol, dir); } - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); if (old_pol) { xfrm_policy_kill(old_pol); @@ -1390,9 +1390,9 @@ static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir) newp->type = old->type; memcpy(newp->xfrm_vec, old->xfrm_vec, newp->xfrm_nr*sizeof(struct xfrm_tmpl)); - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); xfrm_sk_policy_link(newp, dir); - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); xfrm_pol_put(newp); } return newp; @@ -3074,7 +3074,7 @@ static int __net_init xfrm_net_init(struct net *net) /* Initialize the per-net locks here */ spin_lock_init(&net->xfrm.xfrm_state_lock); - rwlock_init(&net->xfrm.xfrm_policy_lock); + spin_lock_init(&net->xfrm.xfrm_policy_lock); mutex_init(&net->xfrm.xfrm_cfg_mutex); return 0; @@ -3206,7 +3206,7 @@ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector * struct hlist_head *chain; u32 priority = ~0U; - read_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME*/ + spin_lock_bh(&net->xfrm.xfrm_policy_lock); chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir); hlist_for_each_entry(pol, chain, bydst) { if (xfrm_migrate_selector_match(sel, &pol->selector) && @@ -3230,7 +3230,7 @@ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector * xfrm_pol_hold(ret); - read_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return ret; } From cb1b69b0b15b2897daeba8674c14c85a23a3347f Mon Sep 17 00:00:00 2001 From: Laura Garcia Liebana Date: Thu, 11 Aug 2016 18:02:07 +0200 Subject: [PATCH 0138/1715] netfilter: nf_tables: add hash expression This patch adds a new hash expression, this provides jhash support but this can be extended to support for other hash functions. The modulus and seed already comes embedded into this new expression. Use case example: ... meta mark set hash ip saddr mod 10 Signed-off-by: Laura Garcia Liebana Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 20 ++++ net/netfilter/Kconfig | 6 + net/netfilter/Makefile | 1 + net/netfilter/nft_hash.c | 136 +++++++++++++++++++++++ 4 files changed, 163 insertions(+) create mode 100644 net/netfilter/nft_hash.c diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 01751faccaf8..6ce0a6dd0889 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -723,6 +723,26 @@ enum nft_meta_keys { NFT_META_PRANDOM, }; +/** + * enum nft_hash_attributes - nf_tables hash expression netlink attributes + * + * @NFTA_HASH_SREG: source register (NLA_U32) + * @NFTA_HASH_DREG: destination register (NLA_U32) + * @NFTA_HASH_LEN: source data length (NLA_U32) + * @NFTA_HASH_MODULUS: modulus value (NLA_U32) + * @NFTA_HASH_SEED: seed value (NLA_U32) + */ +enum nft_hash_attributes { + NFTA_HASH_UNSPEC, + NFTA_HASH_SREG, + NFTA_HASH_DREG, + NFTA_HASH_LEN, + NFTA_HASH_MODULUS, + NFTA_HASH_SEED, + __NFTA_HASH_MAX, +}; +#define NFTA_HASH_MAX (__NFTA_HASH_MAX - 1) + /** * enum nft_meta_attributes - nf_tables meta expression netlink attributes * diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index e5740e108a0b..9cfaa00c79b2 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -563,6 +563,12 @@ config NFT_COMPAT x_tables match/target extensions over the nf_tables framework. +config NFT_HASH + tristate "Netfilter nf_tables hash module" + help + This option adds the "hash" expression that you can use to perform + a hash operation on registers. + if NF_TABLES_NETDEV config NF_DUP_NETDEV diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 101fb859203c..1106ccde215c 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -92,6 +92,7 @@ obj-$(CONFIG_NFT_COUNTER) += nft_counter.o obj-$(CONFIG_NFT_LOG) += nft_log.o obj-$(CONFIG_NFT_MASQ) += nft_masq.o obj-$(CONFIG_NFT_REDIR) += nft_redir.o +obj-$(CONFIG_NFT_HASH) += nft_hash.o # nf_tables netdev obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c new file mode 100644 index 000000000000..b82ff29b3f5f --- /dev/null +++ b/net/netfilter/nft_hash.c @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2016 Laura Garcia + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_hash { + enum nft_registers sreg:8; + enum nft_registers dreg:8; + u8 len; + u32 modulus; + u32 seed; +}; + +static void nft_hash_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_hash *priv = nft_expr_priv(expr); + const void *data = ®s->data[priv->sreg]; + + regs->data[priv->dreg] = + reciprocal_scale(jhash(data, priv->len, priv->seed), + priv->modulus); +} + +const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = { + [NFTA_HASH_SREG] = { .type = NLA_U32 }, + [NFTA_HASH_DREG] = { .type = NLA_U32 }, + [NFTA_HASH_LEN] = { .type = NLA_U32 }, + [NFTA_HASH_MODULUS] = { .type = NLA_U32 }, + [NFTA_HASH_SEED] = { .type = NLA_U32 }, +}; + +static int nft_hash_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_hash *priv = nft_expr_priv(expr); + u32 len; + + if (!tb[NFTA_HASH_SREG] || + !tb[NFTA_HASH_DREG] || + !tb[NFTA_HASH_LEN] || + !tb[NFTA_HASH_SEED] || + !tb[NFTA_HASH_MODULUS]) + return -EINVAL; + + priv->sreg = nft_parse_register(tb[NFTA_HASH_SREG]); + priv->dreg = nft_parse_register(tb[NFTA_HASH_DREG]); + + len = ntohl(nla_get_be32(tb[NFTA_HASH_LEN])); + if (len == 0 || len > U8_MAX) + return -ERANGE; + + priv->len = len; + + priv->modulus = ntohl(nla_get_be32(tb[NFTA_HASH_MODULUS])); + if (priv->modulus <= 1) + return -ERANGE; + + priv->seed = ntohl(nla_get_be32(tb[NFTA_HASH_SEED])); + + return nft_validate_register_load(priv->sreg, len) && + nft_validate_register_store(ctx, priv->dreg, NULL, + NFT_DATA_VALUE, sizeof(u32)); +} + +static int nft_hash_dump(struct sk_buff *skb, + const struct nft_expr *expr) +{ + const struct nft_hash *priv = nft_expr_priv(expr); + + if (nft_dump_register(skb, NFTA_HASH_SREG, priv->sreg)) + goto nla_put_failure; + if (nft_dump_register(skb, NFTA_HASH_DREG, priv->dreg)) + goto nla_put_failure; + if (nft_dump_register(skb, NFTA_HASH_LEN, priv->len)) + goto nla_put_failure; + if (nft_dump_register(skb, NFTA_HASH_MODULUS, priv->modulus)) + goto nla_put_failure; + if (nft_dump_register(skb, NFTA_HASH_SEED, priv->seed)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_hash_type; +static const struct nft_expr_ops nft_hash_ops = { + .type = &nft_hash_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_hash)), + .eval = nft_hash_eval, + .init = nft_hash_init, + .dump = nft_hash_dump, +}; + +static struct nft_expr_type nft_hash_type __read_mostly = { + .name = "hash", + .ops = &nft_hash_ops, + .policy = nft_hash_policy, + .maxattr = NFTA_HASH_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_hash_module_init(void) +{ + return nft_register_expr(&nft_hash_type); +} + +static void __exit nft_hash_module_exit(void) +{ + nft_unregister_expr(&nft_hash_type); +} + +module_init(nft_hash_module_init); +module_exit(nft_hash_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Laura Garcia "); +MODULE_ALIAS_NFT_EXPR("hash"); From 300d8b93d73533411a98362350fe7d1795f1a044 Mon Sep 17 00:00:00 2001 From: Appana Durga Kedareswara Rao Date: Wed, 10 Aug 2016 11:20:06 +0530 Subject: [PATCH 0139/1715] net: Add mask for Control register 10Mbps speed This patch adds mask for the Control register 10Mbps speed. Reviewed-by: Florian Fainelli Signed-off-by: Kedareswara rao Appana Signed-off-by: David S. Miller --- include/uapi/linux/mii.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/mii.h b/include/uapi/linux/mii.h index 237fac4bc17b..15d8510cdae0 100644 --- a/include/uapi/linux/mii.h +++ b/include/uapi/linux/mii.h @@ -48,6 +48,7 @@ #define BMCR_SPEED100 0x2000 /* Select 100Mbps */ #define BMCR_LOOPBACK 0x4000 /* TXD loopback bits */ #define BMCR_RESET 0x8000 /* Reset to default state */ +#define BMCR_SPEED10 0x0000 /* Select 10Mbps */ /* Basic mode status register. */ #define BMSR_ERCAP 0x0001 /* Ext-reg capability */ From 71e11aff34510e4133965d63ae6d91a0cf4aa876 Mon Sep 17 00:00:00 2001 From: Appana Durga Kedareswara Rao Date: Wed, 10 Aug 2016 11:20:07 +0530 Subject: [PATCH 0140/1715] Documentation: DT: net: Add Xilinx gmiitorgmii converter device tree binding documentation Device-tree binding documentation for xilinx gmiitorgmii converter. Signed-off-by: Kedareswara rao Appana Acked-by: Rob Herring Signed-off-by: David S. Miller --- .../bindings/net/xilinx_gmii2rgmii.txt | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 Documentation/devicetree/bindings/net/xilinx_gmii2rgmii.txt diff --git a/Documentation/devicetree/bindings/net/xilinx_gmii2rgmii.txt b/Documentation/devicetree/bindings/net/xilinx_gmii2rgmii.txt new file mode 100644 index 000000000000..038dda48b8e6 --- /dev/null +++ b/Documentation/devicetree/bindings/net/xilinx_gmii2rgmii.txt @@ -0,0 +1,35 @@ +XILINX GMIITORGMII Converter Driver Device Tree Bindings +-------------------------------------------------------- + +The Gigabit Media Independent Interface (GMII) to Reduced Gigabit Media +Independent Interface (RGMII) core provides the RGMII between RGMII-compliant +Ethernet physical media devices (PHY) and the Gigabit Ethernet controller. +This core can be used in all three modes of operation(10/100/1000 Mb/s). +The Management Data Input/Output (MDIO) interface is used to configure the +Speed of operation. This core can switch dynamically between the three +Different speed modes by configuring the conveter register through mdio write. + +This converter sits between the ethernet MAC and the external phy. +MAC <==> GMII2RGMII <==> RGMII_PHY + +For more details about mdio please refer phy.txt file in the same directory. + +Required properties: +- compatible : Should be "xlnx,gmii-to-rgmii-1.0" +- reg : The ID number for the phy, usually a small integer +- phy-handle : Should point to the external phy device. + See ethernet.txt file in the same directory. + +Example: + mdio { + #address-cells = <1>; + #size-cells = <0>; + phy: ethernet-phy@0 { + ...... + }; + gmiitorgmii: gmiitorgmii@8 { + compatible = "xlnx,gmii-to-rgmii-1.0"; + reg = <8>; + phy-handle = <&phy>; + }; + }; From f411a6160bd4278508b5892949ba1a7db6f73489 Mon Sep 17 00:00:00 2001 From: Appana Durga Kedareswara Rao Date: Wed, 10 Aug 2016 11:20:08 +0530 Subject: [PATCH 0141/1715] net: phy: Add gmiitorgmii converter support This patch adds support for gmiitorgmii converter. The GMII to RGMII IP core provides the Reduced Gigabit Media Independent Interface (RGMII) between Ethernet physical media Devices and the Gigabit Ethernet controller. This core can Switch dynamically between the three different speed modes of Operation by configuring the converter register through mdio write. MDIO interface is used to set operating speed of Ethernet MAC. This converter sits between the MAC and the external phy MAC <==> GMII2RGMII <==> RGMII_PHY Signed-off-by: Kedareswara rao Appana Signed-off-by: David S. Miller --- drivers/net/phy/Kconfig | 7 ++ drivers/net/phy/Makefile | 1 + drivers/net/phy/xilinx_gmii2rgmii.c | 109 ++++++++++++++++++++++++++++ 3 files changed, 117 insertions(+) create mode 100644 drivers/net/phy/xilinx_gmii2rgmii.c diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 1b534eaaca3d..d66133bf3eb5 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -312,6 +312,13 @@ config MICROSEMI_PHY ---help--- Currently supports the VSC8531 and VSC8541 PHYs +config XILINX_GMII2RGMII + tristate "Xilinx GMII2RGMII converter driver" + ---help--- + This driver support xilinx GMII to RGMII IP core it provides + the Reduced Gigabit Media Independent Interface(RGMII) between + Ethernet physical media devices and the Gigabit Ethernet controller. + endif # PHYLIB config MICREL_KS8995MA diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile index a713bd4da70a..73d65ce04454 100644 --- a/drivers/net/phy/Makefile +++ b/drivers/net/phy/Makefile @@ -50,3 +50,4 @@ obj-$(CONFIG_MDIO_BCM_IPROC) += mdio-bcm-iproc.o obj-$(CONFIG_INTEL_XWAY_PHY) += intel-xway.o obj-$(CONFIG_MDIO_HISI_FEMAC) += mdio-hisi-femac.o obj-$(CONFIG_MDIO_XGENE) += mdio-xgene.o +obj-$(CONFIG_XILINX_GMII2RGMII) += xilinx_gmii2rgmii.o diff --git a/drivers/net/phy/xilinx_gmii2rgmii.c b/drivers/net/phy/xilinx_gmii2rgmii.c new file mode 100644 index 000000000000..8e980ad476a4 --- /dev/null +++ b/drivers/net/phy/xilinx_gmii2rgmii.c @@ -0,0 +1,109 @@ +/* Xilinx GMII2RGMII Converter driver + * + * Copyright (C) 2016 Xilinx, Inc. + * + * Author: Kedareswara rao Appana + * + * Description: + * This driver is developed for Xilinx GMII2RGMII Converter + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#include +#include +#include +#include +#include +#include + +#define XILINX_GMII2RGMII_REG 0x10 +#define XILINX_GMII2RGMII_SPEED_MASK (BMCR_SPEED1000 | BMCR_SPEED100) + +struct gmii2rgmii { + struct phy_device *phy_dev; + struct phy_driver *phy_drv; + struct phy_driver conv_phy_drv; + int addr; +}; + +static int xgmiitorgmii_read_status(struct phy_device *phydev) +{ + struct gmii2rgmii *priv = phydev->priv; + u16 val = 0; + + priv->phy_drv->read_status(phydev); + + val = mdiobus_read(phydev->mdio.bus, priv->addr, XILINX_GMII2RGMII_REG); + val &= XILINX_GMII2RGMII_SPEED_MASK; + + if (phydev->speed == SPEED_1000) + val |= BMCR_SPEED1000; + else if (phydev->speed == SPEED_100) + val |= BMCR_SPEED100; + else + val |= BMCR_SPEED10; + + mdiobus_write(phydev->mdio.bus, priv->addr, XILINX_GMII2RGMII_REG, val); + + return 0; +} + +int xgmiitorgmii_probe(struct mdio_device *mdiodev) +{ + struct device *dev = &mdiodev->dev; + struct device_node *np = dev->of_node, *phy_node; + struct gmii2rgmii *priv; + + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + phy_node = of_parse_phandle(np, "phy-handle", 0); + if (IS_ERR(phy_node)) { + dev_err(dev, "Couldn't parse phy-handle\n"); + return -ENODEV; + } + + priv->phy_dev = of_phy_find_device(phy_node); + if (!priv->phy_dev) { + dev_info(dev, "Couldn't find phydev\n"); + return -EPROBE_DEFER; + } + + priv->addr = mdiodev->addr; + priv->phy_drv = priv->phy_dev->drv; + memcpy(&priv->conv_phy_drv, priv->phy_dev->drv, + sizeof(struct phy_driver)); + priv->conv_phy_drv.read_status = xgmiitorgmii_read_status; + priv->phy_dev->priv = priv; + priv->phy_dev->drv = &priv->conv_phy_drv; + + return 0; +} + +static const struct of_device_id xgmiitorgmii_of_match[] = { + { .compatible = "xlnx,gmii-to-rgmii-1.0" }, + {}, +}; +MODULE_DEVICE_TABLE(of, xgmiitorgmii_of_match); + +static struct mdio_driver xgmiitorgmii_driver = { + .probe = xgmiitorgmii_probe, + .mdiodrv.driver = { + .name = "xgmiitorgmii", + .of_match_table = xgmiitorgmii_of_match, + }, +}; + +mdio_module_driver(xgmiitorgmii_driver); + +MODULE_DESCRIPTION("Xilinx GMII2RGMII converter driver"); +MODULE_LICENSE("GPL"); From 1738cd3ed342294360d6a74d4e58800004bff854 Mon Sep 17 00:00:00 2001 From: Netanel Belgazal Date: Wed, 10 Aug 2016 14:03:22 +0300 Subject: [PATCH 0142/1715] net: ena: Add a driver for Amazon Elastic Network Adapters (ENA) This is a driver for the ENA family of networking devices. Signed-off-by: Netanel Belgazal Signed-off-by: David S. Miller --- Documentation/networking/00-INDEX | 2 + Documentation/networking/ena.txt | 305 ++ MAINTAINERS | 9 + drivers/net/ethernet/Kconfig | 1 + drivers/net/ethernet/Makefile | 1 + drivers/net/ethernet/amazon/Kconfig | 27 + drivers/net/ethernet/amazon/Makefile | 5 + drivers/net/ethernet/amazon/ena/Makefile | 7 + .../net/ethernet/amazon/ena/ena_admin_defs.h | 973 +++++ drivers/net/ethernet/amazon/ena/ena_com.c | 2666 ++++++++++++++ drivers/net/ethernet/amazon/ena/ena_com.h | 1038 ++++++ .../net/ethernet/amazon/ena/ena_common_defs.h | 48 + drivers/net/ethernet/amazon/ena/ena_eth_com.c | 501 +++ drivers/net/ethernet/amazon/ena/ena_eth_com.h | 160 + .../net/ethernet/amazon/ena/ena_eth_io_defs.h | 416 +++ drivers/net/ethernet/amazon/ena/ena_ethtool.c | 895 +++++ drivers/net/ethernet/amazon/ena/ena_netdev.c | 3280 +++++++++++++++++ drivers/net/ethernet/amazon/ena/ena_netdev.h | 324 ++ .../net/ethernet/amazon/ena/ena_pci_id_tbl.h | 67 + .../net/ethernet/amazon/ena/ena_regs_defs.h | 133 + 20 files changed, 10858 insertions(+) create mode 100644 Documentation/networking/ena.txt create mode 100644 drivers/net/ethernet/amazon/Kconfig create mode 100644 drivers/net/ethernet/amazon/Makefile create mode 100644 drivers/net/ethernet/amazon/ena/Makefile create mode 100644 drivers/net/ethernet/amazon/ena/ena_admin_defs.h create mode 100644 drivers/net/ethernet/amazon/ena/ena_com.c create mode 100644 drivers/net/ethernet/amazon/ena/ena_com.h create mode 100644 drivers/net/ethernet/amazon/ena/ena_common_defs.h create mode 100644 drivers/net/ethernet/amazon/ena/ena_eth_com.c create mode 100644 drivers/net/ethernet/amazon/ena/ena_eth_com.h create mode 100644 drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h create mode 100644 drivers/net/ethernet/amazon/ena/ena_ethtool.c create mode 100644 drivers/net/ethernet/amazon/ena/ena_netdev.c create mode 100644 drivers/net/ethernet/amazon/ena/ena_netdev.h create mode 100644 drivers/net/ethernet/amazon/ena/ena_pci_id_tbl.h create mode 100644 drivers/net/ethernet/amazon/ena/ena_regs_defs.h diff --git a/Documentation/networking/00-INDEX b/Documentation/networking/00-INDEX index 415154a487d0..a7697783ac4c 100644 --- a/Documentation/networking/00-INDEX +++ b/Documentation/networking/00-INDEX @@ -74,6 +74,8 @@ dns_resolver.txt - The DNS resolver module allows kernel servies to make DNS queries. driver.txt - Softnet driver issues. +ena.txt + - info on Amazon's Elastic Network Adapter (ENA) e100.txt - info on Intel's EtherExpress PRO/100 line of 10/100 boards e1000.txt diff --git a/Documentation/networking/ena.txt b/Documentation/networking/ena.txt new file mode 100644 index 000000000000..2b4b6f57e549 --- /dev/null +++ b/Documentation/networking/ena.txt @@ -0,0 +1,305 @@ +Linux kernel driver for Elastic Network Adapter (ENA) family: +============================================================= + +Overview: +========= +ENA is a networking interface designed to make good use of modern CPU +features and system architectures. + +The ENA device exposes a lightweight management interface with a +minimal set of memory mapped registers and extendable command set +through an Admin Queue. + +The driver supports a range of ENA devices, is link-speed independent +(i.e., the same driver is used for 10GbE, 25GbE, 40GbE, etc.), and has +a negotiated and extendable feature set. + +Some ENA devices support SR-IOV. This driver is used for both the +SR-IOV Physical Function (PF) and Virtual Function (VF) devices. + +ENA devices enable high speed and low overhead network traffic +processing by providing multiple Tx/Rx queue pairs (the maximum number +is advertised by the device via the Admin Queue), a dedicated MSI-X +interrupt vector per Tx/Rx queue pair, adaptive interrupt moderation, +and CPU cacheline optimized data placement. + +The ENA driver supports industry standard TCP/IP offload features such +as checksum offload and TCP transmit segmentation offload (TSO). +Receive-side scaling (RSS) is supported for multi-core scaling. + +The ENA driver and its corresponding devices implement health +monitoring mechanisms such as watchdog, enabling the device and driver +to recover in a manner transparent to the application, as well as +debug logs. + +Some of the ENA devices support a working mode called Low-latency +Queue (LLQ), which saves several more microseconds. + +Supported PCI vendor ID/device IDs: +=================================== +1d0f:0ec2 - ENA PF +1d0f:1ec2 - ENA PF with LLQ support +1d0f:ec20 - ENA VF +1d0f:ec21 - ENA VF with LLQ support + +ENA Source Code Directory Structure: +==================================== +ena_com.[ch] - Management communication layer. This layer is + responsible for the handling all the management + (admin) communication between the device and the + driver. +ena_eth_com.[ch] - Tx/Rx data path. +ena_admin_defs.h - Definition of ENA management interface. +ena_eth_io_defs.h - Definition of ENA data path interface. +ena_common_defs.h - Common definitions for ena_com layer. +ena_regs_defs.h - Definition of ENA PCI memory-mapped (MMIO) registers. +ena_netdev.[ch] - Main Linux kernel driver. +ena_syfsfs.[ch] - Sysfs files. +ena_ethtool.c - ethtool callbacks. +ena_pci_id_tbl.h - Supported device IDs. + +Management Interface: +===================== +ENA management interface is exposed by means of: +- PCIe Configuration Space +- Device Registers +- Admin Queue (AQ) and Admin Completion Queue (ACQ) +- Asynchronous Event Notification Queue (AENQ) + +ENA device MMIO Registers are accessed only during driver +initialization and are not involved in further normal device +operation. + +AQ is used for submitting management commands, and the +results/responses are reported asynchronously through ACQ. + +ENA introduces a very small set of management commands with room for +vendor-specific extensions. Most of the management operations are +framed in a generic Get/Set feature command. + +The following admin queue commands are supported: +- Create I/O submission queue +- Create I/O completion queue +- Destroy I/O submission queue +- Destroy I/O completion queue +- Get feature +- Set feature +- Configure AENQ +- Get statistics + +Refer to ena_admin_defs.h for the list of supported Get/Set Feature +properties. + +The Asynchronous Event Notification Queue (AENQ) is a uni-directional +queue used by the ENA device to send to the driver events that cannot +be reported using ACQ. AENQ events are subdivided into groups. Each +group may have multiple syndromes, as shown below + +The events are: + Group Syndrome + Link state change - X - + Fatal error - X - + Notification Suspend traffic + Notification Resume traffic + Keep-Alive - X - + +ACQ and AENQ share the same MSI-X vector. + +Keep-Alive is a special mechanism that allows monitoring of the +device's health. The driver maintains a watchdog (WD) handler which, +if fired, logs the current state and statistics then resets and +restarts the ENA device and driver. A Keep-Alive event is delivered by +the device every second. The driver re-arms the WD upon reception of a +Keep-Alive event. A missed Keep-Alive event causes the WD handler to +fire. + +Data Path Interface: +==================== +I/O operations are based on Tx and Rx Submission Queues (Tx SQ and Rx +SQ correspondingly). Each SQ has a completion queue (CQ) associated +with it. + +The SQs and CQs are implemented as descriptor rings in contiguous +physical memory. + +The ENA driver supports two Queue Operation modes for Tx SQs: +- Regular mode + * In this mode the Tx SQs reside in the host's memory. The ENA + device fetches the ENA Tx descriptors and packet data from host + memory. +- Low Latency Queue (LLQ) mode or "push-mode". + * In this mode the driver pushes the transmit descriptors and the + first 128 bytes of the packet directly to the ENA device memory + space. The rest of the packet payload is fetched by the + device. For this operation mode, the driver uses a dedicated PCI + device memory BAR, which is mapped with write-combine capability. + +The Rx SQs support only the regular mode. + +Note: Not all ENA devices support LLQ, and this feature is negotiated + with the device upon initialization. If the ENA device does not + support LLQ mode, the driver falls back to the regular mode. + +The driver supports multi-queue for both Tx and Rx. This has various +benefits: +- Reduced CPU/thread/process contention on a given Ethernet interface. +- Cache miss rate on completion is reduced, particularly for data + cache lines that hold the sk_buff structures. +- Increased process-level parallelism when handling received packets. +- Increased data cache hit rate, by steering kernel processing of + packets to the CPU, where the application thread consuming the + packet is running. +- In hardware interrupt re-direction. + +Interrupt Modes: +================ +The driver assigns a single MSI-X vector per queue pair (for both Tx +and Rx directions). The driver assigns an additional dedicated MSI-X vector +for management (for ACQ and AENQ). + +Management interrupt registration is performed when the Linux kernel +probes the adapter, and it is de-registered when the adapter is +removed. I/O queue interrupt registration is performed when the Linux +interface of the adapter is opened, and it is de-registered when the +interface is closed. + +The management interrupt is named: + ena-mgmnt@pci: +and for each queue pair, an interrupt is named: + -Tx-Rx- + +The ENA device operates in auto-mask and auto-clear interrupt +modes. That is, once MSI-X is delivered to the host, its Cause bit is +automatically cleared and the interrupt is masked. The interrupt is +unmasked by the driver after NAPI processing is complete. + +Interrupt Moderation: +===================== +ENA driver and device can operate in conventional or adaptive interrupt +moderation mode. + +In conventional mode the driver instructs device to postpone interrupt +posting according to static interrupt delay value. The interrupt delay +value can be configured through ethtool(8). The following ethtool +parameters are supported by the driver: tx-usecs, rx-usecs + +In adaptive interrupt moderation mode the interrupt delay value is +updated by the driver dynamically and adjusted every NAPI cycle +according to the traffic nature. + +By default ENA driver applies adaptive coalescing on Rx traffic and +conventional coalescing on Tx traffic. + +Adaptive coalescing can be switched on/off through ethtool(8) +adaptive_rx on|off parameter. + +The driver chooses interrupt delay value according to the number of +bytes and packets received between interrupt unmasking and interrupt +posting. The driver uses interrupt delay table that subdivides the +range of received bytes/packets into 5 levels and assigns interrupt +delay value to each level. + +The user can enable/disable adaptive moderation, modify the interrupt +delay table and restore its default values through sysfs. + +The rx_copybreak is initialized by default to ENA_DEFAULT_RX_COPYBREAK +and can be configured by the ETHTOOL_STUNABLE command of the +SIOCETHTOOL ioctl. + +SKB: +The driver-allocated SKB for frames received from Rx handling using +NAPI context. The allocation method depends on the size of the packet. +If the frame length is larger than rx_copybreak, napi_get_frags() +is used, otherwise netdev_alloc_skb_ip_align() is used, the buffer +content is copied (by CPU) to the SKB, and the buffer is recycled. + +Statistics: +=========== +The user can obtain ENA device and driver statistics using ethtool. +The driver can collect regular or extended statistics (including +per-queue stats) from the device. + +In addition the driver logs the stats to syslog upon device reset. + +MTU: +==== +The driver supports an arbitrarily large MTU with a maximum that is +negotiated with the device. The driver configures MTU using the +SetFeature command (ENA_ADMIN_MTU property). The user can change MTU +via ip(8) and similar legacy tools. + +Stateless Offloads: +=================== +The ENA driver supports: +- TSO over IPv4/IPv6 +- TSO with ECN +- IPv4 header checksum offload +- TCP/UDP over IPv4/IPv6 checksum offloads + +RSS: +==== +- The ENA device supports RSS that allows flexible Rx traffic + steering. +- Toeplitz and CRC32 hash functions are supported. +- Different combinations of L2/L3/L4 fields can be configured as + inputs for hash functions. +- The driver configures RSS settings using the AQ SetFeature command + (ENA_ADMIN_RSS_HASH_FUNCTION, ENA_ADMIN_RSS_HASH_INPUT and + ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG properties). +- If the NETIF_F_RXHASH flag is set, the 32-bit result of the hash + function delivered in the Rx CQ descriptor is set in the received + SKB. +- The user can provide a hash key, hash function, and configure the + indirection table through ethtool(8). + +DATA PATH: +========== +Tx: +--- +end_start_xmit() is called by the stack. This function does the following: +- Maps data buffers (skb->data and frags). +- Populates ena_buf for the push buffer (if the driver and device are + in push mode.) +- Prepares ENA bufs for the remaining frags. +- Allocates a new request ID from the empty req_id ring. The request + ID is the index of the packet in the Tx info. This is used for + out-of-order TX completions. +- Adds the packet to the proper place in the Tx ring. +- Calls ena_com_prepare_tx(), an ENA communication layer that converts + the ena_bufs to ENA descriptors (and adds meta ENA descriptors as + needed.) + * This function also copies the ENA descriptors and the push buffer + to the Device memory space (if in push mode.) +- Writes doorbell to the ENA device. +- When the ENA device finishes sending the packet, a completion + interrupt is raised. +- The interrupt handler schedules NAPI. +- The ena_clean_tx_irq() function is called. This function handles the + completion descriptors generated by the ENA, with a single + completion descriptor per completed packet. + * req_id is retrieved from the completion descriptor. The tx_info of + the packet is retrieved via the req_id. The data buffers are + unmapped and req_id is returned to the empty req_id ring. + * The function stops when the completion descriptors are completed or + the budget is reached. + +Rx: +--- +- When a packet is received from the ENA device. +- The interrupt handler schedules NAPI. +- The ena_clean_rx_irq() function is called. This function calls + ena_rx_pkt(), an ENA communication layer function, which returns the + number of descriptors used for a new unhandled packet, and zero if + no new packet is found. +- Then it calls the ena_clean_rx_irq() function. +- ena_eth_rx_skb() checks packet length: + * If the packet is small (len < rx_copybreak), the driver allocates + a SKB for the new packet, and copies the packet payload into the + SKB data buffer. + - In this way the original data buffer is not passed to the stack + and is reused for future Rx packets. + * Otherwise the function unmaps the Rx buffer, then allocates the + new SKB structure and hooks the Rx buffer to the SKB frags. +- The new SKB is updated with the necessary information (protocol, + checksum hw verify result, etc.), and then passed to the network + stack, using the NAPI interface function napi_gro_receive(). diff --git a/MAINTAINERS b/MAINTAINERS index 429fc61bee81..50ce33dc62f6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -636,6 +636,15 @@ F: drivers/tty/serial/altera_jtaguart.c F: include/linux/altera_uart.h F: include/linux/altera_jtaguart.h +AMAZON ETHERNET DRIVERS +M: Netanel Belgazal +R: Saeed Bishara +R: Zorik Machulsky +L: netdev@vger.kernel.org +S: Supported +F: Documentation/networking/ena.txt +F: drivers/net/ethernet/amazon/ + AMD CRYPTOGRAPHIC COPROCESSOR (CCP) DRIVER M: Tom Lendacky M: Gary Hook diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig index 2ffd63463299..8cc7467b6c1f 100644 --- a/drivers/net/ethernet/Kconfig +++ b/drivers/net/ethernet/Kconfig @@ -24,6 +24,7 @@ source "drivers/net/ethernet/agere/Kconfig" source "drivers/net/ethernet/allwinner/Kconfig" source "drivers/net/ethernet/alteon/Kconfig" source "drivers/net/ethernet/altera/Kconfig" +source "drivers/net/ethernet/amazon/Kconfig" source "drivers/net/ethernet/amd/Kconfig" source "drivers/net/ethernet/apm/Kconfig" source "drivers/net/ethernet/apple/Kconfig" diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile index 1d349e9aa9a6..a09423df83f2 100644 --- a/drivers/net/ethernet/Makefile +++ b/drivers/net/ethernet/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_NET_VENDOR_AGERE) += agere/ obj-$(CONFIG_NET_VENDOR_ALLWINNER) += allwinner/ obj-$(CONFIG_NET_VENDOR_ALTEON) += alteon/ obj-$(CONFIG_ALTERA_TSE) += altera/ +obj-$(CONFIG_NET_VENDOR_AMAZON) += amazon/ obj-$(CONFIG_NET_VENDOR_AMD) += amd/ obj-$(CONFIG_NET_XGENE) += apm/ obj-$(CONFIG_NET_VENDOR_APPLE) += apple/ diff --git a/drivers/net/ethernet/amazon/Kconfig b/drivers/net/ethernet/amazon/Kconfig new file mode 100644 index 000000000000..99b30353541a --- /dev/null +++ b/drivers/net/ethernet/amazon/Kconfig @@ -0,0 +1,27 @@ +# +# Amazon network device configuration +# + +config NET_VENDOR_AMAZON + bool "Amazon Devices" + default y + ---help--- + If you have a network (Ethernet) device belonging to this class, say Y. + + Note that the answer to this question doesn't directly affect the + kernel: saying N will just cause the configurator to skip all + the questions about Amazon devices. If you say Y, you will be asked + for your specific device in the following questions. + +if NET_VENDOR_AMAZON + +config ENA_ETHERNET + tristate "Elastic Network Adapter (ENA) support" + depends on (PCI_MSI && X86) + ---help--- + This driver supports Elastic Network Adapter (ENA)" + + To compile this driver as a module, choose M here. + The module will be called ena. + +endif #NET_VENDOR_AMAZON diff --git a/drivers/net/ethernet/amazon/Makefile b/drivers/net/ethernet/amazon/Makefile new file mode 100644 index 000000000000..8e0b73f60d51 --- /dev/null +++ b/drivers/net/ethernet/amazon/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for the Amazon network device drivers. +# + +obj-$(CONFIG_ENA_ETHERNET) += ena/ diff --git a/drivers/net/ethernet/amazon/ena/Makefile b/drivers/net/ethernet/amazon/ena/Makefile new file mode 100644 index 000000000000..eaeeae06c5d9 --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for the Elastic Network Adapter (ENA) device drivers. +# + +obj-$(CONFIG_ENA_ETHERNET) += ena.o + +ena-y := ena_netdev.o ena_com.o ena_eth_com.o ena_ethtool.o diff --git a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h new file mode 100644 index 000000000000..a46e749bf226 --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h @@ -0,0 +1,973 @@ +/* + * Copyright 2015 - 2016 Amazon.com, Inc. or its affiliates. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _ENA_ADMIN_H_ +#define _ENA_ADMIN_H_ + +enum ena_admin_aq_opcode { + ENA_ADMIN_CREATE_SQ = 1, + + ENA_ADMIN_DESTROY_SQ = 2, + + ENA_ADMIN_CREATE_CQ = 3, + + ENA_ADMIN_DESTROY_CQ = 4, + + ENA_ADMIN_GET_FEATURE = 8, + + ENA_ADMIN_SET_FEATURE = 9, + + ENA_ADMIN_GET_STATS = 11, +}; + +enum ena_admin_aq_completion_status { + ENA_ADMIN_SUCCESS = 0, + + ENA_ADMIN_RESOURCE_ALLOCATION_FAILURE = 1, + + ENA_ADMIN_BAD_OPCODE = 2, + + ENA_ADMIN_UNSUPPORTED_OPCODE = 3, + + ENA_ADMIN_MALFORMED_REQUEST = 4, + + /* Additional status is provided in ACQ entry extended_status */ + ENA_ADMIN_ILLEGAL_PARAMETER = 5, + + ENA_ADMIN_UNKNOWN_ERROR = 6, +}; + +enum ena_admin_aq_feature_id { + ENA_ADMIN_DEVICE_ATTRIBUTES = 1, + + ENA_ADMIN_MAX_QUEUES_NUM = 2, + + ENA_ADMIN_RSS_HASH_FUNCTION = 10, + + ENA_ADMIN_STATELESS_OFFLOAD_CONFIG = 11, + + ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG = 12, + + ENA_ADMIN_MTU = 14, + + ENA_ADMIN_RSS_HASH_INPUT = 18, + + ENA_ADMIN_INTERRUPT_MODERATION = 20, + + ENA_ADMIN_AENQ_CONFIG = 26, + + ENA_ADMIN_LINK_CONFIG = 27, + + ENA_ADMIN_HOST_ATTR_CONFIG = 28, + + ENA_ADMIN_FEATURES_OPCODE_NUM = 32, +}; + +enum ena_admin_placement_policy_type { + /* descriptors and headers are in host memory */ + ENA_ADMIN_PLACEMENT_POLICY_HOST = 1, + + /* descriptors and headers are in device memory (a.k.a Low Latency + * Queue) + */ + ENA_ADMIN_PLACEMENT_POLICY_DEV = 3, +}; + +enum ena_admin_link_types { + ENA_ADMIN_LINK_SPEED_1G = 0x1, + + ENA_ADMIN_LINK_SPEED_2_HALF_G = 0x2, + + ENA_ADMIN_LINK_SPEED_5G = 0x4, + + ENA_ADMIN_LINK_SPEED_10G = 0x8, + + ENA_ADMIN_LINK_SPEED_25G = 0x10, + + ENA_ADMIN_LINK_SPEED_40G = 0x20, + + ENA_ADMIN_LINK_SPEED_50G = 0x40, + + ENA_ADMIN_LINK_SPEED_100G = 0x80, + + ENA_ADMIN_LINK_SPEED_200G = 0x100, + + ENA_ADMIN_LINK_SPEED_400G = 0x200, +}; + +enum ena_admin_completion_policy_type { + /* completion queue entry for each sq descriptor */ + ENA_ADMIN_COMPLETION_POLICY_DESC = 0, + + /* completion queue entry upon request in sq descriptor */ + ENA_ADMIN_COMPLETION_POLICY_DESC_ON_DEMAND = 1, + + /* current queue head pointer is updated in OS memory upon sq + * descriptor request + */ + ENA_ADMIN_COMPLETION_POLICY_HEAD_ON_DEMAND = 2, + + /* current queue head pointer is updated in OS memory for each sq + * descriptor + */ + ENA_ADMIN_COMPLETION_POLICY_HEAD = 3, +}; + +/* basic stats return ena_admin_basic_stats while extanded stats return a + * buffer (string format) with additional statistics per queue and per + * device id + */ +enum ena_admin_get_stats_type { + ENA_ADMIN_GET_STATS_TYPE_BASIC = 0, + + ENA_ADMIN_GET_STATS_TYPE_EXTENDED = 1, +}; + +enum ena_admin_get_stats_scope { + ENA_ADMIN_SPECIFIC_QUEUE = 0, + + ENA_ADMIN_ETH_TRAFFIC = 1, +}; + +struct ena_admin_aq_common_desc { + /* 11:0 : command_id + * 15:12 : reserved12 + */ + u16 command_id; + + /* as appears in ena_admin_aq_opcode */ + u8 opcode; + + /* 0 : phase + * 1 : ctrl_data - control buffer address valid + * 2 : ctrl_data_indirect - control buffer address + * points to list of pages with addresses of control + * buffers + * 7:3 : reserved3 + */ + u8 flags; +}; + +/* used in ena_admin_aq_entry. Can point directly to control data, or to a + * page list chunk. Used also at the end of indirect mode page list chunks, + * for chaining. + */ +struct ena_admin_ctrl_buff_info { + u32 length; + + struct ena_common_mem_addr address; +}; + +struct ena_admin_sq { + u16 sq_idx; + + /* 4:0 : reserved + * 7:5 : sq_direction - 0x1 - Tx; 0x2 - Rx + */ + u8 sq_identity; + + u8 reserved1; +}; + +struct ena_admin_aq_entry { + struct ena_admin_aq_common_desc aq_common_descriptor; + + union { + u32 inline_data_w1[3]; + + struct ena_admin_ctrl_buff_info control_buffer; + } u; + + u32 inline_data_w4[12]; +}; + +struct ena_admin_acq_common_desc { + /* command identifier to associate it with the aq descriptor + * 11:0 : command_id + * 15:12 : reserved12 + */ + u16 command; + + u8 status; + + /* 0 : phase + * 7:1 : reserved1 + */ + u8 flags; + + u16 extended_status; + + /* serves as a hint what AQ entries can be revoked */ + u16 sq_head_indx; +}; + +struct ena_admin_acq_entry { + struct ena_admin_acq_common_desc acq_common_descriptor; + + u32 response_specific_data[14]; +}; + +struct ena_admin_aq_create_sq_cmd { + struct ena_admin_aq_common_desc aq_common_descriptor; + + /* 4:0 : reserved0_w1 + * 7:5 : sq_direction - 0x1 - Tx, 0x2 - Rx + */ + u8 sq_identity; + + u8 reserved8_w1; + + /* 3:0 : placement_policy - Describing where the SQ + * descriptor ring and the SQ packet headers reside: + * 0x1 - descriptors and headers are in OS memory, + * 0x3 - descriptors and headers in device memory + * (a.k.a Low Latency Queue) + * 6:4 : completion_policy - Describing what policy + * to use for generation completion entry (cqe) in + * the CQ associated with this SQ: 0x0 - cqe for each + * sq descriptor, 0x1 - cqe upon request in sq + * descriptor, 0x2 - current queue head pointer is + * updated in OS memory upon sq descriptor request + * 0x3 - current queue head pointer is updated in OS + * memory for each sq descriptor + * 7 : reserved15_w1 + */ + u8 sq_caps_2; + + /* 0 : is_physically_contiguous - Described if the + * queue ring memory is allocated in physical + * contiguous pages or split. + * 7:1 : reserved17_w1 + */ + u8 sq_caps_3; + + /* associated completion queue id. This CQ must be created prior to + * SQ creation + */ + u16 cq_idx; + + /* submission queue depth in entries */ + u16 sq_depth; + + /* SQ physical base address in OS memory. This field should not be + * used for Low Latency queues. Has to be page aligned. + */ + struct ena_common_mem_addr sq_ba; + + /* specifies queue head writeback location in OS memory. Valid if + * completion_policy is set to completion_policy_head_on_demand or + * completion_policy_head. Has to be cache aligned + */ + struct ena_common_mem_addr sq_head_writeback; + + u32 reserved0_w7; + + u32 reserved0_w8; +}; + +enum ena_admin_sq_direction { + ENA_ADMIN_SQ_DIRECTION_TX = 1, + + ENA_ADMIN_SQ_DIRECTION_RX = 2, +}; + +struct ena_admin_acq_create_sq_resp_desc { + struct ena_admin_acq_common_desc acq_common_desc; + + u16 sq_idx; + + u16 reserved; + + /* queue doorbell address as an offset to PCIe MMIO REG BAR */ + u32 sq_doorbell_offset; + + /* low latency queue ring base address as an offset to PCIe MMIO + * LLQ_MEM BAR + */ + u32 llq_descriptors_offset; + + /* low latency queue headers' memory as an offset to PCIe MMIO + * LLQ_MEM BAR + */ + u32 llq_headers_offset; +}; + +struct ena_admin_aq_destroy_sq_cmd { + struct ena_admin_aq_common_desc aq_common_descriptor; + + struct ena_admin_sq sq; +}; + +struct ena_admin_acq_destroy_sq_resp_desc { + struct ena_admin_acq_common_desc acq_common_desc; +}; + +struct ena_admin_aq_create_cq_cmd { + struct ena_admin_aq_common_desc aq_common_descriptor; + + /* 4:0 : reserved5 + * 5 : interrupt_mode_enabled - if set, cq operates + * in interrupt mode, otherwise - polling + * 7:6 : reserved6 + */ + u8 cq_caps_1; + + /* 4:0 : cq_entry_size_words - size of CQ entry in + * 32-bit words, valid values: 4, 8. + * 7:5 : reserved7 + */ + u8 cq_caps_2; + + /* completion queue depth in # of entries. must be power of 2 */ + u16 cq_depth; + + /* msix vector assigned to this cq */ + u32 msix_vector; + + /* cq physical base address in OS memory. CQ must be physically + * contiguous + */ + struct ena_common_mem_addr cq_ba; +}; + +struct ena_admin_acq_create_cq_resp_desc { + struct ena_admin_acq_common_desc acq_common_desc; + + u16 cq_idx; + + /* actual cq depth in number of entries */ + u16 cq_actual_depth; + + u32 numa_node_register_offset; + + u32 cq_head_db_register_offset; + + u32 cq_interrupt_unmask_register_offset; +}; + +struct ena_admin_aq_destroy_cq_cmd { + struct ena_admin_aq_common_desc aq_common_descriptor; + + u16 cq_idx; + + u16 reserved1; +}; + +struct ena_admin_acq_destroy_cq_resp_desc { + struct ena_admin_acq_common_desc acq_common_desc; +}; + +/* ENA AQ Get Statistics command. Extended statistics are placed in control + * buffer pointed by AQ entry + */ +struct ena_admin_aq_get_stats_cmd { + struct ena_admin_aq_common_desc aq_common_descriptor; + + union { + /* command specific inline data */ + u32 inline_data_w1[3]; + + struct ena_admin_ctrl_buff_info control_buffer; + } u; + + /* stats type as defined in enum ena_admin_get_stats_type */ + u8 type; + + /* stats scope defined in enum ena_admin_get_stats_scope */ + u8 scope; + + u16 reserved3; + + /* queue id. used when scope is specific_queue */ + u16 queue_idx; + + /* device id, value 0xFFFF means mine. only privileged device can get + * stats of other device + */ + u16 device_id; +}; + +/* Basic Statistics Command. */ +struct ena_admin_basic_stats { + u32 tx_bytes_low; + + u32 tx_bytes_high; + + u32 tx_pkts_low; + + u32 tx_pkts_high; + + u32 rx_bytes_low; + + u32 rx_bytes_high; + + u32 rx_pkts_low; + + u32 rx_pkts_high; + + u32 rx_drops_low; + + u32 rx_drops_high; +}; + +struct ena_admin_acq_get_stats_resp { + struct ena_admin_acq_common_desc acq_common_desc; + + struct ena_admin_basic_stats basic_stats; +}; + +struct ena_admin_get_set_feature_common_desc { + /* 1:0 : select - 0x1 - current value; 0x3 - default + * value + * 7:3 : reserved3 + */ + u8 flags; + + /* as appears in ena_admin_aq_feature_id */ + u8 feature_id; + + u16 reserved16; +}; + +struct ena_admin_device_attr_feature_desc { + u32 impl_id; + + u32 device_version; + + /* bitmap of ena_admin_aq_feature_id */ + u32 supported_features; + + u32 reserved3; + + /* Indicates how many bits are used physical address access. */ + u32 phys_addr_width; + + /* Indicates how many bits are used virtual address access. */ + u32 virt_addr_width; + + /* unicast MAC address (in Network byte order) */ + u8 mac_addr[6]; + + u8 reserved7[2]; + + u32 max_mtu; +}; + +struct ena_admin_queue_feature_desc { + /* including LLQs */ + u32 max_sq_num; + + u32 max_sq_depth; + + u32 max_cq_num; + + u32 max_cq_depth; + + u32 max_llq_num; + + u32 max_llq_depth; + + u32 max_header_size; + + /* Maximum Descriptors number, including meta descriptor, allowed for + * a single Tx packet + */ + u16 max_packet_tx_descs; + + /* Maximum Descriptors number allowed for a single Rx packet */ + u16 max_packet_rx_descs; +}; + +struct ena_admin_set_feature_mtu_desc { + /* exclude L2 */ + u32 mtu; +}; + +struct ena_admin_set_feature_host_attr_desc { + /* host OS info base address in OS memory. host info is 4KB of + * physically contiguous + */ + struct ena_common_mem_addr os_info_ba; + + /* host debug area base address in OS memory. debug area must be + * physically contiguous + */ + struct ena_common_mem_addr debug_ba; + + /* debug area size */ + u32 debug_area_size; +}; + +struct ena_admin_feature_intr_moder_desc { + /* interrupt delay granularity in usec */ + u16 intr_delay_resolution; + + u16 reserved; +}; + +struct ena_admin_get_feature_link_desc { + /* Link speed in Mb */ + u32 speed; + + /* bit field of enum ena_admin_link types */ + u32 supported; + + /* 0 : autoneg + * 1 : duplex - Full Duplex + * 31:2 : reserved2 + */ + u32 flags; +}; + +struct ena_admin_feature_aenq_desc { + /* bitmask for AENQ groups the device can report */ + u32 supported_groups; + + /* bitmask for AENQ groups to report */ + u32 enabled_groups; +}; + +struct ena_admin_feature_offload_desc { + /* 0 : TX_L3_csum_ipv4 + * 1 : TX_L4_ipv4_csum_part - The checksum field + * should be initialized with pseudo header checksum + * 2 : TX_L4_ipv4_csum_full + * 3 : TX_L4_ipv6_csum_part - The checksum field + * should be initialized with pseudo header checksum + * 4 : TX_L4_ipv6_csum_full + * 5 : tso_ipv4 + * 6 : tso_ipv6 + * 7 : tso_ecn + */ + u32 tx; + + /* Receive side supported stateless offload + * 0 : RX_L3_csum_ipv4 - IPv4 checksum + * 1 : RX_L4_ipv4_csum - TCP/UDP/IPv4 checksum + * 2 : RX_L4_ipv6_csum - TCP/UDP/IPv6 checksum + * 3 : RX_hash - Hash calculation + */ + u32 rx_supported; + + u32 rx_enabled; +}; + +enum ena_admin_hash_functions { + ENA_ADMIN_TOEPLITZ = 1, + + ENA_ADMIN_CRC32 = 2, +}; + +struct ena_admin_feature_rss_flow_hash_control { + u32 keys_num; + + u32 reserved; + + u32 key[10]; +}; + +struct ena_admin_feature_rss_flow_hash_function { + /* 7:0 : funcs - bitmask of ena_admin_hash_functions */ + u32 supported_func; + + /* 7:0 : selected_func - bitmask of + * ena_admin_hash_functions + */ + u32 selected_func; + + /* initial value */ + u32 init_val; +}; + +/* RSS flow hash protocols */ +enum ena_admin_flow_hash_proto { + ENA_ADMIN_RSS_TCP4 = 0, + + ENA_ADMIN_RSS_UDP4 = 1, + + ENA_ADMIN_RSS_TCP6 = 2, + + ENA_ADMIN_RSS_UDP6 = 3, + + ENA_ADMIN_RSS_IP4 = 4, + + ENA_ADMIN_RSS_IP6 = 5, + + ENA_ADMIN_RSS_IP4_FRAG = 6, + + ENA_ADMIN_RSS_NOT_IP = 7, + + ENA_ADMIN_RSS_PROTO_NUM = 16, +}; + +/* RSS flow hash fields */ +enum ena_admin_flow_hash_fields { + /* Ethernet Dest Addr */ + ENA_ADMIN_RSS_L2_DA = 0, + + /* Ethernet Src Addr */ + ENA_ADMIN_RSS_L2_SA = 1, + + /* ipv4/6 Dest Addr */ + ENA_ADMIN_RSS_L3_DA = 2, + + /* ipv4/6 Src Addr */ + ENA_ADMIN_RSS_L3_SA = 5, + + /* tcp/udp Dest Port */ + ENA_ADMIN_RSS_L4_DP = 6, + + /* tcp/udp Src Port */ + ENA_ADMIN_RSS_L4_SP = 7, +}; + +struct ena_admin_proto_input { + /* flow hash fields (bitwise according to ena_admin_flow_hash_fields) */ + u16 fields; + + u16 reserved2; +}; + +struct ena_admin_feature_rss_hash_control { + struct ena_admin_proto_input supported_fields[ENA_ADMIN_RSS_PROTO_NUM]; + + struct ena_admin_proto_input selected_fields[ENA_ADMIN_RSS_PROTO_NUM]; + + struct ena_admin_proto_input reserved2[ENA_ADMIN_RSS_PROTO_NUM]; + + struct ena_admin_proto_input reserved3[ENA_ADMIN_RSS_PROTO_NUM]; +}; + +struct ena_admin_feature_rss_flow_hash_input { + /* supported hash input sorting + * 1 : L3_sort - support swap L3 addresses if DA is + * smaller than SA + * 2 : L4_sort - support swap L4 ports if DP smaller + * SP + */ + u16 supported_input_sort; + + /* enabled hash input sorting + * 1 : enable_L3_sort - enable swap L3 addresses if + * DA smaller than SA + * 2 : enable_L4_sort - enable swap L4 ports if DP + * smaller than SP + */ + u16 enabled_input_sort; +}; + +enum ena_admin_os_type { + ENA_ADMIN_OS_LINUX = 1, + + ENA_ADMIN_OS_WIN = 2, + + ENA_ADMIN_OS_DPDK = 3, + + ENA_ADMIN_OS_FREEBSD = 4, + + ENA_ADMIN_OS_IPXE = 5, +}; + +struct ena_admin_host_info { + /* defined in enum ena_admin_os_type */ + u32 os_type; + + /* os distribution string format */ + u8 os_dist_str[128]; + + /* OS distribution numeric format */ + u32 os_dist; + + /* kernel version string format */ + u8 kernel_ver_str[32]; + + /* Kernel version numeric format */ + u32 kernel_ver; + + /* 7:0 : major + * 15:8 : minor + * 23:16 : sub_minor + */ + u32 driver_version; + + /* features bitmap */ + u32 supported_network_features[4]; +}; + +struct ena_admin_rss_ind_table_entry { + u16 cq_idx; + + u16 reserved; +}; + +struct ena_admin_feature_rss_ind_table { + /* min supported table size (2^min_size) */ + u16 min_size; + + /* max supported table size (2^max_size) */ + u16 max_size; + + /* table size (2^size) */ + u16 size; + + u16 reserved; + + /* index of the inline entry. 0xFFFFFFFF means invalid */ + u32 inline_index; + + /* used for updating single entry, ignored when setting the entire + * table through the control buffer. + */ + struct ena_admin_rss_ind_table_entry inline_entry; +}; + +struct ena_admin_get_feat_cmd { + struct ena_admin_aq_common_desc aq_common_descriptor; + + struct ena_admin_ctrl_buff_info control_buffer; + + struct ena_admin_get_set_feature_common_desc feat_common; + + u32 raw[11]; +}; + +struct ena_admin_get_feat_resp { + struct ena_admin_acq_common_desc acq_common_desc; + + union { + u32 raw[14]; + + struct ena_admin_device_attr_feature_desc dev_attr; + + struct ena_admin_queue_feature_desc max_queue; + + struct ena_admin_feature_aenq_desc aenq; + + struct ena_admin_get_feature_link_desc link; + + struct ena_admin_feature_offload_desc offload; + + struct ena_admin_feature_rss_flow_hash_function flow_hash_func; + + struct ena_admin_feature_rss_flow_hash_input flow_hash_input; + + struct ena_admin_feature_rss_ind_table ind_table; + + struct ena_admin_feature_intr_moder_desc intr_moderation; + } u; +}; + +struct ena_admin_set_feat_cmd { + struct ena_admin_aq_common_desc aq_common_descriptor; + + struct ena_admin_ctrl_buff_info control_buffer; + + struct ena_admin_get_set_feature_common_desc feat_common; + + union { + u32 raw[11]; + + /* mtu size */ + struct ena_admin_set_feature_mtu_desc mtu; + + /* host attributes */ + struct ena_admin_set_feature_host_attr_desc host_attr; + + /* AENQ configuration */ + struct ena_admin_feature_aenq_desc aenq; + + /* rss flow hash function */ + struct ena_admin_feature_rss_flow_hash_function flow_hash_func; + + /* rss flow hash input */ + struct ena_admin_feature_rss_flow_hash_input flow_hash_input; + + /* rss indirection table */ + struct ena_admin_feature_rss_ind_table ind_table; + } u; +}; + +struct ena_admin_set_feat_resp { + struct ena_admin_acq_common_desc acq_common_desc; + + union { + u32 raw[14]; + } u; +}; + +struct ena_admin_aenq_common_desc { + u16 group; + + u16 syndrom; + + /* 0 : phase */ + u8 flags; + + u8 reserved1[3]; + + u32 timestamp_low; + + u32 timestamp_high; +}; + +/* asynchronous event notification groups */ +enum ena_admin_aenq_group { + ENA_ADMIN_LINK_CHANGE = 0, + + ENA_ADMIN_FATAL_ERROR = 1, + + ENA_ADMIN_WARNING = 2, + + ENA_ADMIN_NOTIFICATION = 3, + + ENA_ADMIN_KEEP_ALIVE = 4, + + ENA_ADMIN_AENQ_GROUPS_NUM = 5, +}; + +enum ena_admin_aenq_notification_syndrom { + ENA_ADMIN_SUSPEND = 0, + + ENA_ADMIN_RESUME = 1, +}; + +struct ena_admin_aenq_entry { + struct ena_admin_aenq_common_desc aenq_common_desc; + + /* command specific inline data */ + u32 inline_data_w4[12]; +}; + +struct ena_admin_aenq_link_change_desc { + struct ena_admin_aenq_common_desc aenq_common_desc; + + /* 0 : link_status */ + u32 flags; +}; + +struct ena_admin_ena_mmio_req_read_less_resp { + u16 req_id; + + u16 reg_off; + + /* value is valid when poll is cleared */ + u32 reg_val; +}; + +/* aq_common_desc */ +#define ENA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK GENMASK(11, 0) +#define ENA_ADMIN_AQ_COMMON_DESC_PHASE_MASK BIT(0) +#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_SHIFT 1 +#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_MASK BIT(1) +#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_SHIFT 2 +#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK BIT(2) + +/* sq */ +#define ENA_ADMIN_SQ_SQ_DIRECTION_SHIFT 5 +#define ENA_ADMIN_SQ_SQ_DIRECTION_MASK GENMASK(7, 5) + +/* acq_common_desc */ +#define ENA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK GENMASK(11, 0) +#define ENA_ADMIN_ACQ_COMMON_DESC_PHASE_MASK BIT(0) + +/* aq_create_sq_cmd */ +#define ENA_ADMIN_AQ_CREATE_SQ_CMD_SQ_DIRECTION_SHIFT 5 +#define ENA_ADMIN_AQ_CREATE_SQ_CMD_SQ_DIRECTION_MASK GENMASK(7, 5) +#define ENA_ADMIN_AQ_CREATE_SQ_CMD_PLACEMENT_POLICY_MASK GENMASK(3, 0) +#define ENA_ADMIN_AQ_CREATE_SQ_CMD_COMPLETION_POLICY_SHIFT 4 +#define ENA_ADMIN_AQ_CREATE_SQ_CMD_COMPLETION_POLICY_MASK GENMASK(6, 4) +#define ENA_ADMIN_AQ_CREATE_SQ_CMD_IS_PHYSICALLY_CONTIGUOUS_MASK BIT(0) + +/* aq_create_cq_cmd */ +#define ENA_ADMIN_AQ_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_SHIFT 5 +#define ENA_ADMIN_AQ_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_MASK BIT(5) +#define ENA_ADMIN_AQ_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0) + +/* get_set_feature_common_desc */ +#define ENA_ADMIN_GET_SET_FEATURE_COMMON_DESC_SELECT_MASK GENMASK(1, 0) + +/* get_feature_link_desc */ +#define ENA_ADMIN_GET_FEATURE_LINK_DESC_AUTONEG_MASK BIT(0) +#define ENA_ADMIN_GET_FEATURE_LINK_DESC_DUPLEX_SHIFT 1 +#define ENA_ADMIN_GET_FEATURE_LINK_DESC_DUPLEX_MASK BIT(1) + +/* feature_offload_desc */ +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK BIT(0) +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_SHIFT 1 +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK BIT(1) +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_SHIFT 2 +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK BIT(2) +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_SHIFT 3 +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK BIT(3) +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_SHIFT 4 +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_MASK BIT(4) +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_SHIFT 5 +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK BIT(5) +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_SHIFT 6 +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK BIT(6) +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_SHIFT 7 +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_MASK BIT(7) +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L3_CSUM_IPV4_MASK BIT(0) +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_SHIFT 1 +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK BIT(1) +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_SHIFT 2 +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK BIT(2) +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_HASH_SHIFT 3 +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_HASH_MASK BIT(3) + +/* feature_rss_flow_hash_function */ +#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_FUNCTION_FUNCS_MASK GENMASK(7, 0) +#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_FUNCTION_SELECTED_FUNC_MASK GENMASK(7, 0) + +/* feature_rss_flow_hash_input */ +#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L3_SORT_SHIFT 1 +#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L3_SORT_MASK BIT(1) +#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L4_SORT_SHIFT 2 +#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L4_SORT_MASK BIT(2) +#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_ENABLE_L3_SORT_SHIFT 1 +#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_ENABLE_L3_SORT_MASK BIT(1) +#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_ENABLE_L4_SORT_SHIFT 2 +#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_ENABLE_L4_SORT_MASK BIT(2) + +/* host_info */ +#define ENA_ADMIN_HOST_INFO_MAJOR_MASK GENMASK(7, 0) +#define ENA_ADMIN_HOST_INFO_MINOR_SHIFT 8 +#define ENA_ADMIN_HOST_INFO_MINOR_MASK GENMASK(15, 8) +#define ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT 16 +#define ENA_ADMIN_HOST_INFO_SUB_MINOR_MASK GENMASK(23, 16) + +/* aenq_common_desc */ +#define ENA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK BIT(0) + +/* aenq_link_change_desc */ +#define ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK BIT(0) + +#endif /*_ENA_ADMIN_H_ */ diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c new file mode 100644 index 000000000000..3066d9c99984 --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -0,0 +1,2666 @@ +/* + * Copyright 2015 Amazon.com, Inc. or its affiliates. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ena_com.h" + +/*****************************************************************************/ +/*****************************************************************************/ + +/* Timeout in micro-sec */ +#define ADMIN_CMD_TIMEOUT_US (1000000) + +#define ENA_ASYNC_QUEUE_DEPTH 4 +#define ENA_ADMIN_QUEUE_DEPTH 32 + +#define MIN_ENA_VER (((ENA_COMMON_SPEC_VERSION_MAJOR) << \ + ENA_REGS_VERSION_MAJOR_VERSION_SHIFT) \ + | (ENA_COMMON_SPEC_VERSION_MINOR)) + +#define ENA_CTRL_MAJOR 0 +#define ENA_CTRL_MINOR 0 +#define ENA_CTRL_SUB_MINOR 1 + +#define MIN_ENA_CTRL_VER \ + (((ENA_CTRL_MAJOR) << \ + (ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT)) | \ + ((ENA_CTRL_MINOR) << \ + (ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT)) | \ + (ENA_CTRL_SUB_MINOR)) + +#define ENA_DMA_ADDR_TO_UINT32_LOW(x) ((u32)((u64)(x))) +#define ENA_DMA_ADDR_TO_UINT32_HIGH(x) ((u32)(((u64)(x)) >> 32)) + +#define ENA_MMIO_READ_TIMEOUT 0xFFFFFFFF + +/*****************************************************************************/ +/*****************************************************************************/ +/*****************************************************************************/ + +enum ena_cmd_status { + ENA_CMD_SUBMITTED, + ENA_CMD_COMPLETED, + /* Abort - canceled by the driver */ + ENA_CMD_ABORTED, +}; + +struct ena_comp_ctx { + struct completion wait_event; + struct ena_admin_acq_entry *user_cqe; + u32 comp_size; + enum ena_cmd_status status; + /* status from the device */ + u8 comp_status; + u8 cmd_opcode; + bool occupied; +}; + +struct ena_com_stats_ctx { + struct ena_admin_aq_get_stats_cmd get_cmd; + struct ena_admin_acq_get_stats_resp get_resp; +}; + +static inline int ena_com_mem_addr_set(struct ena_com_dev *ena_dev, + struct ena_common_mem_addr *ena_addr, + dma_addr_t addr) +{ + if ((addr & GENMASK_ULL(ena_dev->dma_addr_bits - 1, 0)) != addr) { + pr_err("dma address has more bits that the device supports\n"); + return -EINVAL; + } + + ena_addr->mem_addr_low = (u32)addr; + ena_addr->mem_addr_high = (u64)addr >> 32; + + return 0; +} + +static int ena_com_admin_init_sq(struct ena_com_admin_queue *queue) +{ + struct ena_com_admin_sq *sq = &queue->sq; + u16 size = ADMIN_SQ_SIZE(queue->q_depth); + + sq->entries = dma_zalloc_coherent(queue->q_dmadev, size, &sq->dma_addr, + GFP_KERNEL); + + if (!sq->entries) { + pr_err("memory allocation failed"); + return -ENOMEM; + } + + sq->head = 0; + sq->tail = 0; + sq->phase = 1; + + sq->db_addr = NULL; + + return 0; +} + +static int ena_com_admin_init_cq(struct ena_com_admin_queue *queue) +{ + struct ena_com_admin_cq *cq = &queue->cq; + u16 size = ADMIN_CQ_SIZE(queue->q_depth); + + cq->entries = dma_zalloc_coherent(queue->q_dmadev, size, &cq->dma_addr, + GFP_KERNEL); + + if (!cq->entries) { + pr_err("memory allocation failed"); + return -ENOMEM; + } + + cq->head = 0; + cq->phase = 1; + + return 0; +} + +static int ena_com_admin_init_aenq(struct ena_com_dev *dev, + struct ena_aenq_handlers *aenq_handlers) +{ + struct ena_com_aenq *aenq = &dev->aenq; + u32 addr_low, addr_high, aenq_caps; + u16 size; + + dev->aenq.q_depth = ENA_ASYNC_QUEUE_DEPTH; + size = ADMIN_AENQ_SIZE(ENA_ASYNC_QUEUE_DEPTH); + aenq->entries = dma_zalloc_coherent(dev->dmadev, size, &aenq->dma_addr, + GFP_KERNEL); + + if (!aenq->entries) { + pr_err("memory allocation failed"); + return -ENOMEM; + } + + aenq->head = aenq->q_depth; + aenq->phase = 1; + + addr_low = ENA_DMA_ADDR_TO_UINT32_LOW(aenq->dma_addr); + addr_high = ENA_DMA_ADDR_TO_UINT32_HIGH(aenq->dma_addr); + + writel(addr_low, dev->reg_bar + ENA_REGS_AENQ_BASE_LO_OFF); + writel(addr_high, dev->reg_bar + ENA_REGS_AENQ_BASE_HI_OFF); + + aenq_caps = 0; + aenq_caps |= dev->aenq.q_depth & ENA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK; + aenq_caps |= (sizeof(struct ena_admin_aenq_entry) + << ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT) & + ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK; + writel(aenq_caps, dev->reg_bar + ENA_REGS_AENQ_CAPS_OFF); + + if (unlikely(!aenq_handlers)) { + pr_err("aenq handlers pointer is NULL\n"); + return -EINVAL; + } + + aenq->aenq_handlers = aenq_handlers; + + return 0; +} + +static inline void comp_ctxt_release(struct ena_com_admin_queue *queue, + struct ena_comp_ctx *comp_ctx) +{ + comp_ctx->occupied = false; + atomic_dec(&queue->outstanding_cmds); +} + +static struct ena_comp_ctx *get_comp_ctxt(struct ena_com_admin_queue *queue, + u16 command_id, bool capture) +{ + if (unlikely(command_id >= queue->q_depth)) { + pr_err("command id is larger than the queue size. cmd_id: %u queue size %d\n", + command_id, queue->q_depth); + return NULL; + } + + if (unlikely(queue->comp_ctx[command_id].occupied && capture)) { + pr_err("Completion context is occupied\n"); + return NULL; + } + + if (capture) { + atomic_inc(&queue->outstanding_cmds); + queue->comp_ctx[command_id].occupied = true; + } + + return &queue->comp_ctx[command_id]; +} + +static struct ena_comp_ctx *__ena_com_submit_admin_cmd(struct ena_com_admin_queue *admin_queue, + struct ena_admin_aq_entry *cmd, + size_t cmd_size_in_bytes, + struct ena_admin_acq_entry *comp, + size_t comp_size_in_bytes) +{ + struct ena_comp_ctx *comp_ctx; + u16 tail_masked, cmd_id; + u16 queue_size_mask; + u16 cnt; + + queue_size_mask = admin_queue->q_depth - 1; + + tail_masked = admin_queue->sq.tail & queue_size_mask; + + /* In case of queue FULL */ + cnt = admin_queue->sq.tail - admin_queue->sq.head; + if (cnt >= admin_queue->q_depth) { + pr_debug("admin queue is FULL (tail %d head %d depth: %d)\n", + admin_queue->sq.tail, admin_queue->sq.head, + admin_queue->q_depth); + admin_queue->stats.out_of_space++; + return ERR_PTR(-ENOSPC); + } + + cmd_id = admin_queue->curr_cmd_id; + + cmd->aq_common_descriptor.flags |= admin_queue->sq.phase & + ENA_ADMIN_AQ_COMMON_DESC_PHASE_MASK; + + cmd->aq_common_descriptor.command_id |= cmd_id & + ENA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK; + + comp_ctx = get_comp_ctxt(admin_queue, cmd_id, true); + if (unlikely(!comp_ctx)) + return ERR_PTR(-EINVAL); + + comp_ctx->status = ENA_CMD_SUBMITTED; + comp_ctx->comp_size = (u32)comp_size_in_bytes; + comp_ctx->user_cqe = comp; + comp_ctx->cmd_opcode = cmd->aq_common_descriptor.opcode; + + reinit_completion(&comp_ctx->wait_event); + + memcpy(&admin_queue->sq.entries[tail_masked], cmd, cmd_size_in_bytes); + + admin_queue->curr_cmd_id = (admin_queue->curr_cmd_id + 1) & + queue_size_mask; + + admin_queue->sq.tail++; + admin_queue->stats.submitted_cmd++; + + if (unlikely((admin_queue->sq.tail & queue_size_mask) == 0)) + admin_queue->sq.phase = !admin_queue->sq.phase; + + writel(admin_queue->sq.tail, admin_queue->sq.db_addr); + + return comp_ctx; +} + +static inline int ena_com_init_comp_ctxt(struct ena_com_admin_queue *queue) +{ + size_t size = queue->q_depth * sizeof(struct ena_comp_ctx); + struct ena_comp_ctx *comp_ctx; + u16 i; + + queue->comp_ctx = devm_kzalloc(queue->q_dmadev, size, GFP_KERNEL); + if (unlikely(!queue->comp_ctx)) { + pr_err("memory allocation failed"); + return -ENOMEM; + } + + for (i = 0; i < queue->q_depth; i++) { + comp_ctx = get_comp_ctxt(queue, i, false); + if (comp_ctx) + init_completion(&comp_ctx->wait_event); + } + + return 0; +} + +static struct ena_comp_ctx *ena_com_submit_admin_cmd(struct ena_com_admin_queue *admin_queue, + struct ena_admin_aq_entry *cmd, + size_t cmd_size_in_bytes, + struct ena_admin_acq_entry *comp, + size_t comp_size_in_bytes) +{ + unsigned long flags; + struct ena_comp_ctx *comp_ctx; + + spin_lock_irqsave(&admin_queue->q_lock, flags); + if (unlikely(!admin_queue->running_state)) { + spin_unlock_irqrestore(&admin_queue->q_lock, flags); + return ERR_PTR(-ENODEV); + } + comp_ctx = __ena_com_submit_admin_cmd(admin_queue, cmd, + cmd_size_in_bytes, + comp, + comp_size_in_bytes); + if (unlikely(IS_ERR(comp_ctx))) + admin_queue->running_state = false; + spin_unlock_irqrestore(&admin_queue->q_lock, flags); + + return comp_ctx; +} + +static int ena_com_init_io_sq(struct ena_com_dev *ena_dev, + struct ena_com_create_io_ctx *ctx, + struct ena_com_io_sq *io_sq) +{ + size_t size; + int dev_node = 0; + + memset(&io_sq->desc_addr, 0x0, sizeof(struct ena_com_io_desc_addr)); + + io_sq->desc_entry_size = + (io_sq->direction == ENA_COM_IO_QUEUE_DIRECTION_TX) ? + sizeof(struct ena_eth_io_tx_desc) : + sizeof(struct ena_eth_io_rx_desc); + + size = io_sq->desc_entry_size * io_sq->q_depth; + + if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) { + dev_node = dev_to_node(ena_dev->dmadev); + set_dev_node(ena_dev->dmadev, ctx->numa_node); + io_sq->desc_addr.virt_addr = + dma_zalloc_coherent(ena_dev->dmadev, size, + &io_sq->desc_addr.phys_addr, + GFP_KERNEL); + set_dev_node(ena_dev->dmadev, dev_node); + if (!io_sq->desc_addr.virt_addr) { + io_sq->desc_addr.virt_addr = + dma_zalloc_coherent(ena_dev->dmadev, size, + &io_sq->desc_addr.phys_addr, + GFP_KERNEL); + } + } else { + dev_node = dev_to_node(ena_dev->dmadev); + set_dev_node(ena_dev->dmadev, ctx->numa_node); + io_sq->desc_addr.virt_addr = + devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL); + set_dev_node(ena_dev->dmadev, dev_node); + if (!io_sq->desc_addr.virt_addr) { + io_sq->desc_addr.virt_addr = + devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL); + } + } + + if (!io_sq->desc_addr.virt_addr) { + pr_err("memory allocation failed"); + return -ENOMEM; + } + + io_sq->tail = 0; + io_sq->next_to_comp = 0; + io_sq->phase = 1; + + return 0; +} + +static int ena_com_init_io_cq(struct ena_com_dev *ena_dev, + struct ena_com_create_io_ctx *ctx, + struct ena_com_io_cq *io_cq) +{ + size_t size; + int prev_node = 0; + + memset(&io_cq->cdesc_addr, 0x0, sizeof(struct ena_com_io_desc_addr)); + + /* Use the basic completion descriptor for Rx */ + io_cq->cdesc_entry_size_in_bytes = + (io_cq->direction == ENA_COM_IO_QUEUE_DIRECTION_TX) ? + sizeof(struct ena_eth_io_tx_cdesc) : + sizeof(struct ena_eth_io_rx_cdesc_base); + + size = io_cq->cdesc_entry_size_in_bytes * io_cq->q_depth; + + prev_node = dev_to_node(ena_dev->dmadev); + set_dev_node(ena_dev->dmadev, ctx->numa_node); + io_cq->cdesc_addr.virt_addr = + dma_zalloc_coherent(ena_dev->dmadev, size, + &io_cq->cdesc_addr.phys_addr, GFP_KERNEL); + set_dev_node(ena_dev->dmadev, prev_node); + if (!io_cq->cdesc_addr.virt_addr) { + io_cq->cdesc_addr.virt_addr = + dma_zalloc_coherent(ena_dev->dmadev, size, + &io_cq->cdesc_addr.phys_addr, + GFP_KERNEL); + } + + if (!io_cq->cdesc_addr.virt_addr) { + pr_err("memory allocation failed"); + return -ENOMEM; + } + + io_cq->phase = 1; + io_cq->head = 0; + + return 0; +} + +static void ena_com_handle_single_admin_completion(struct ena_com_admin_queue *admin_queue, + struct ena_admin_acq_entry *cqe) +{ + struct ena_comp_ctx *comp_ctx; + u16 cmd_id; + + cmd_id = cqe->acq_common_descriptor.command & + ENA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK; + + comp_ctx = get_comp_ctxt(admin_queue, cmd_id, false); + if (unlikely(!comp_ctx)) { + pr_err("comp_ctx is NULL. Changing the admin queue running state\n"); + admin_queue->running_state = false; + return; + } + + comp_ctx->status = ENA_CMD_COMPLETED; + comp_ctx->comp_status = cqe->acq_common_descriptor.status; + + if (comp_ctx->user_cqe) + memcpy(comp_ctx->user_cqe, (void *)cqe, comp_ctx->comp_size); + + if (!admin_queue->polling) + complete(&comp_ctx->wait_event); +} + +static void ena_com_handle_admin_completion(struct ena_com_admin_queue *admin_queue) +{ + struct ena_admin_acq_entry *cqe = NULL; + u16 comp_num = 0; + u16 head_masked; + u8 phase; + + head_masked = admin_queue->cq.head & (admin_queue->q_depth - 1); + phase = admin_queue->cq.phase; + + cqe = &admin_queue->cq.entries[head_masked]; + + /* Go over all the completions */ + while ((cqe->acq_common_descriptor.flags & + ENA_ADMIN_ACQ_COMMON_DESC_PHASE_MASK) == phase) { + /* Do not read the rest of the completion entry before the + * phase bit was validated + */ + rmb(); + ena_com_handle_single_admin_completion(admin_queue, cqe); + + head_masked++; + comp_num++; + if (unlikely(head_masked == admin_queue->q_depth)) { + head_masked = 0; + phase = !phase; + } + + cqe = &admin_queue->cq.entries[head_masked]; + } + + admin_queue->cq.head += comp_num; + admin_queue->cq.phase = phase; + admin_queue->sq.head += comp_num; + admin_queue->stats.completed_cmd += comp_num; +} + +static int ena_com_comp_status_to_errno(u8 comp_status) +{ + if (unlikely(comp_status != 0)) + pr_err("admin command failed[%u]\n", comp_status); + + if (unlikely(comp_status > ENA_ADMIN_UNKNOWN_ERROR)) + return -EINVAL; + + switch (comp_status) { + case ENA_ADMIN_SUCCESS: + return 0; + case ENA_ADMIN_RESOURCE_ALLOCATION_FAILURE: + return -ENOMEM; + case ENA_ADMIN_UNSUPPORTED_OPCODE: + return -EPERM; + case ENA_ADMIN_BAD_OPCODE: + case ENA_ADMIN_MALFORMED_REQUEST: + case ENA_ADMIN_ILLEGAL_PARAMETER: + case ENA_ADMIN_UNKNOWN_ERROR: + return -EINVAL; + } + + return 0; +} + +static int ena_com_wait_and_process_admin_cq_polling(struct ena_comp_ctx *comp_ctx, + struct ena_com_admin_queue *admin_queue) +{ + unsigned long flags; + u32 start_time; + int ret; + + start_time = ((u32)jiffies_to_usecs(jiffies)); + + while (comp_ctx->status == ENA_CMD_SUBMITTED) { + if ((((u32)jiffies_to_usecs(jiffies)) - start_time) > + ADMIN_CMD_TIMEOUT_US) { + pr_err("Wait for completion (polling) timeout\n"); + /* ENA didn't have any completion */ + spin_lock_irqsave(&admin_queue->q_lock, flags); + admin_queue->stats.no_completion++; + admin_queue->running_state = false; + spin_unlock_irqrestore(&admin_queue->q_lock, flags); + + ret = -ETIME; + goto err; + } + + spin_lock_irqsave(&admin_queue->q_lock, flags); + ena_com_handle_admin_completion(admin_queue); + spin_unlock_irqrestore(&admin_queue->q_lock, flags); + + msleep(100); + } + + if (unlikely(comp_ctx->status == ENA_CMD_ABORTED)) { + pr_err("Command was aborted\n"); + spin_lock_irqsave(&admin_queue->q_lock, flags); + admin_queue->stats.aborted_cmd++; + spin_unlock_irqrestore(&admin_queue->q_lock, flags); + ret = -ENODEV; + goto err; + } + + WARN(comp_ctx->status != ENA_CMD_COMPLETED, "Invalid comp status %d\n", + comp_ctx->status); + + ret = ena_com_comp_status_to_errno(comp_ctx->comp_status); +err: + comp_ctxt_release(admin_queue, comp_ctx); + return ret; +} + +static int ena_com_wait_and_process_admin_cq_interrupts(struct ena_comp_ctx *comp_ctx, + struct ena_com_admin_queue *admin_queue) +{ + unsigned long flags; + int ret; + + wait_for_completion_timeout(&comp_ctx->wait_event, + usecs_to_jiffies(ADMIN_CMD_TIMEOUT_US)); + + /* In case the command wasn't completed find out the root cause. + * There might be 2 kinds of errors + * 1) No completion (timeout reached) + * 2) There is completion but the device didn't get any msi-x interrupt. + */ + if (unlikely(comp_ctx->status == ENA_CMD_SUBMITTED)) { + spin_lock_irqsave(&admin_queue->q_lock, flags); + ena_com_handle_admin_completion(admin_queue); + admin_queue->stats.no_completion++; + spin_unlock_irqrestore(&admin_queue->q_lock, flags); + + if (comp_ctx->status == ENA_CMD_COMPLETED) + pr_err("The ena device have completion but the driver didn't receive any MSI-X interrupt (cmd %d)\n", + comp_ctx->cmd_opcode); + else + pr_err("The ena device doesn't send any completion for the admin cmd %d status %d\n", + comp_ctx->cmd_opcode, comp_ctx->status); + + admin_queue->running_state = false; + ret = -ETIME; + goto err; + } + + ret = ena_com_comp_status_to_errno(comp_ctx->comp_status); +err: + comp_ctxt_release(admin_queue, comp_ctx); + return ret; +} + +/* This method read the hardware device register through posting writes + * and waiting for response + * On timeout the function will return ENA_MMIO_READ_TIMEOUT + */ +static u32 ena_com_reg_bar_read32(struct ena_com_dev *ena_dev, u16 offset) +{ + struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read; + volatile struct ena_admin_ena_mmio_req_read_less_resp *read_resp = + mmio_read->read_resp; + u32 mmio_read_reg, ret; + unsigned long flags; + int i; + + might_sleep(); + + /* If readless is disabled, perform regular read */ + if (!mmio_read->readless_supported) + return readl(ena_dev->reg_bar + offset); + + spin_lock_irqsave(&mmio_read->lock, flags); + mmio_read->seq_num++; + + read_resp->req_id = mmio_read->seq_num + 0xDEAD; + mmio_read_reg = (offset << ENA_REGS_MMIO_REG_READ_REG_OFF_SHIFT) & + ENA_REGS_MMIO_REG_READ_REG_OFF_MASK; + mmio_read_reg |= mmio_read->seq_num & + ENA_REGS_MMIO_REG_READ_REQ_ID_MASK; + + /* make sure read_resp->req_id get updated before the hw can write + * there + */ + wmb(); + + writel(mmio_read_reg, ena_dev->reg_bar + ENA_REGS_MMIO_REG_READ_OFF); + + for (i = 0; i < ENA_REG_READ_TIMEOUT; i++) { + if (read_resp->req_id == mmio_read->seq_num) + break; + + udelay(1); + } + + if (unlikely(i == ENA_REG_READ_TIMEOUT)) { + pr_err("reading reg failed for timeout. expected: req id[%hu] offset[%hu] actual: req id[%hu] offset[%hu]\n", + mmio_read->seq_num, offset, read_resp->req_id, + read_resp->reg_off); + ret = ENA_MMIO_READ_TIMEOUT; + goto err; + } + + if (read_resp->reg_off != offset) { + pr_err("Read failure: wrong offset provided"); + ret = ENA_MMIO_READ_TIMEOUT; + } else { + ret = read_resp->reg_val; + } +err: + spin_unlock_irqrestore(&mmio_read->lock, flags); + + return ret; +} + +/* There are two types to wait for completion. + * Polling mode - wait until the completion is available. + * Async mode - wait on wait queue until the completion is ready + * (or the timeout expired). + * It is expected that the IRQ called ena_com_handle_admin_completion + * to mark the completions. + */ +static int ena_com_wait_and_process_admin_cq(struct ena_comp_ctx *comp_ctx, + struct ena_com_admin_queue *admin_queue) +{ + if (admin_queue->polling) + return ena_com_wait_and_process_admin_cq_polling(comp_ctx, + admin_queue); + + return ena_com_wait_and_process_admin_cq_interrupts(comp_ctx, + admin_queue); +} + +static int ena_com_destroy_io_sq(struct ena_com_dev *ena_dev, + struct ena_com_io_sq *io_sq) +{ + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + struct ena_admin_aq_destroy_sq_cmd destroy_cmd; + struct ena_admin_acq_destroy_sq_resp_desc destroy_resp; + u8 direction; + int ret; + + memset(&destroy_cmd, 0x0, sizeof(struct ena_admin_aq_destroy_sq_cmd)); + + if (io_sq->direction == ENA_COM_IO_QUEUE_DIRECTION_TX) + direction = ENA_ADMIN_SQ_DIRECTION_TX; + else + direction = ENA_ADMIN_SQ_DIRECTION_RX; + + destroy_cmd.sq.sq_identity |= (direction << + ENA_ADMIN_SQ_SQ_DIRECTION_SHIFT) & + ENA_ADMIN_SQ_SQ_DIRECTION_MASK; + + destroy_cmd.sq.sq_idx = io_sq->idx; + destroy_cmd.aq_common_descriptor.opcode = ENA_ADMIN_DESTROY_SQ; + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *)&destroy_cmd, + sizeof(destroy_cmd), + (struct ena_admin_acq_entry *)&destroy_resp, + sizeof(destroy_resp)); + + if (unlikely(ret && (ret != -ENODEV))) + pr_err("failed to destroy io sq error: %d\n", ret); + + return ret; +} + +static void ena_com_io_queue_free(struct ena_com_dev *ena_dev, + struct ena_com_io_sq *io_sq, + struct ena_com_io_cq *io_cq) +{ + size_t size; + + if (io_cq->cdesc_addr.virt_addr) { + size = io_cq->cdesc_entry_size_in_bytes * io_cq->q_depth; + + dma_free_coherent(ena_dev->dmadev, size, + io_cq->cdesc_addr.virt_addr, + io_cq->cdesc_addr.phys_addr); + + io_cq->cdesc_addr.virt_addr = NULL; + } + + if (io_sq->desc_addr.virt_addr) { + size = io_sq->desc_entry_size * io_sq->q_depth; + + if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) + dma_free_coherent(ena_dev->dmadev, size, + io_sq->desc_addr.virt_addr, + io_sq->desc_addr.phys_addr); + else + devm_kfree(ena_dev->dmadev, io_sq->desc_addr.virt_addr); + + io_sq->desc_addr.virt_addr = NULL; + } +} + +static int wait_for_reset_state(struct ena_com_dev *ena_dev, u32 timeout, + u16 exp_state) +{ + u32 val, i; + + for (i = 0; i < timeout; i++) { + val = ena_com_reg_bar_read32(ena_dev, ENA_REGS_DEV_STS_OFF); + + if (unlikely(val == ENA_MMIO_READ_TIMEOUT)) { + pr_err("Reg read timeout occurred\n"); + return -ETIME; + } + + if ((val & ENA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK) == + exp_state) + return 0; + + /* The resolution of the timeout is 100ms */ + msleep(100); + } + + return -ETIME; +} + +static bool ena_com_check_supported_feature_id(struct ena_com_dev *ena_dev, + enum ena_admin_aq_feature_id feature_id) +{ + u32 feature_mask = 1 << feature_id; + + /* Device attributes is always supported */ + if ((feature_id != ENA_ADMIN_DEVICE_ATTRIBUTES) && + !(ena_dev->supported_features & feature_mask)) + return false; + + return true; +} + +static int ena_com_get_feature_ex(struct ena_com_dev *ena_dev, + struct ena_admin_get_feat_resp *get_resp, + enum ena_admin_aq_feature_id feature_id, + dma_addr_t control_buf_dma_addr, + u32 control_buff_size) +{ + struct ena_com_admin_queue *admin_queue; + struct ena_admin_get_feat_cmd get_cmd; + int ret; + + if (!ena_com_check_supported_feature_id(ena_dev, feature_id)) { + pr_info("Feature %d isn't supported\n", feature_id); + return -EPERM; + } + + memset(&get_cmd, 0x0, sizeof(get_cmd)); + admin_queue = &ena_dev->admin_queue; + + get_cmd.aq_common_descriptor.opcode = ENA_ADMIN_GET_FEATURE; + + if (control_buff_size) + get_cmd.aq_common_descriptor.flags = + ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK; + else + get_cmd.aq_common_descriptor.flags = 0; + + ret = ena_com_mem_addr_set(ena_dev, + &get_cmd.control_buffer.address, + control_buf_dma_addr); + if (unlikely(ret)) { + pr_err("memory address set failed\n"); + return ret; + } + + get_cmd.control_buffer.length = control_buff_size; + + get_cmd.feat_common.feature_id = feature_id; + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *) + &get_cmd, + sizeof(get_cmd), + (struct ena_admin_acq_entry *) + get_resp, + sizeof(*get_resp)); + + if (unlikely(ret)) + pr_err("Failed to submit get_feature command %d error: %d\n", + feature_id, ret); + + return ret; +} + +static int ena_com_get_feature(struct ena_com_dev *ena_dev, + struct ena_admin_get_feat_resp *get_resp, + enum ena_admin_aq_feature_id feature_id) +{ + return ena_com_get_feature_ex(ena_dev, + get_resp, + feature_id, + 0, + 0); +} + +static int ena_com_hash_key_allocate(struct ena_com_dev *ena_dev) +{ + struct ena_rss *rss = &ena_dev->rss; + + rss->hash_key = + dma_zalloc_coherent(ena_dev->dmadev, sizeof(*rss->hash_key), + &rss->hash_key_dma_addr, GFP_KERNEL); + + if (unlikely(!rss->hash_key)) + return -ENOMEM; + + return 0; +} + +static void ena_com_hash_key_destroy(struct ena_com_dev *ena_dev) +{ + struct ena_rss *rss = &ena_dev->rss; + + if (rss->hash_key) + dma_free_coherent(ena_dev->dmadev, sizeof(*rss->hash_key), + rss->hash_key, rss->hash_key_dma_addr); + rss->hash_key = NULL; +} + +static int ena_com_hash_ctrl_init(struct ena_com_dev *ena_dev) +{ + struct ena_rss *rss = &ena_dev->rss; + + rss->hash_ctrl = + dma_zalloc_coherent(ena_dev->dmadev, sizeof(*rss->hash_ctrl), + &rss->hash_ctrl_dma_addr, GFP_KERNEL); + + if (unlikely(!rss->hash_ctrl)) + return -ENOMEM; + + return 0; +} + +static void ena_com_hash_ctrl_destroy(struct ena_com_dev *ena_dev) +{ + struct ena_rss *rss = &ena_dev->rss; + + if (rss->hash_ctrl) + dma_free_coherent(ena_dev->dmadev, sizeof(*rss->hash_ctrl), + rss->hash_ctrl, rss->hash_ctrl_dma_addr); + rss->hash_ctrl = NULL; +} + +static int ena_com_indirect_table_allocate(struct ena_com_dev *ena_dev, + u16 log_size) +{ + struct ena_rss *rss = &ena_dev->rss; + struct ena_admin_get_feat_resp get_resp; + size_t tbl_size; + int ret; + + ret = ena_com_get_feature(ena_dev, &get_resp, + ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG); + if (unlikely(ret)) + return ret; + + if ((get_resp.u.ind_table.min_size > log_size) || + (get_resp.u.ind_table.max_size < log_size)) { + pr_err("indirect table size doesn't fit. requested size: %d while min is:%d and max %d\n", + 1 << log_size, 1 << get_resp.u.ind_table.min_size, + 1 << get_resp.u.ind_table.max_size); + return -EINVAL; + } + + tbl_size = (1ULL << log_size) * + sizeof(struct ena_admin_rss_ind_table_entry); + + rss->rss_ind_tbl = + dma_zalloc_coherent(ena_dev->dmadev, tbl_size, + &rss->rss_ind_tbl_dma_addr, GFP_KERNEL); + if (unlikely(!rss->rss_ind_tbl)) + goto mem_err1; + + tbl_size = (1ULL << log_size) * sizeof(u16); + rss->host_rss_ind_tbl = + devm_kzalloc(ena_dev->dmadev, tbl_size, GFP_KERNEL); + if (unlikely(!rss->host_rss_ind_tbl)) + goto mem_err2; + + rss->tbl_log_size = log_size; + + return 0; + +mem_err2: + tbl_size = (1ULL << log_size) * + sizeof(struct ena_admin_rss_ind_table_entry); + + dma_free_coherent(ena_dev->dmadev, tbl_size, rss->rss_ind_tbl, + rss->rss_ind_tbl_dma_addr); + rss->rss_ind_tbl = NULL; +mem_err1: + rss->tbl_log_size = 0; + return -ENOMEM; +} + +static void ena_com_indirect_table_destroy(struct ena_com_dev *ena_dev) +{ + struct ena_rss *rss = &ena_dev->rss; + size_t tbl_size = (1ULL << rss->tbl_log_size) * + sizeof(struct ena_admin_rss_ind_table_entry); + + if (rss->rss_ind_tbl) + dma_free_coherent(ena_dev->dmadev, tbl_size, rss->rss_ind_tbl, + rss->rss_ind_tbl_dma_addr); + rss->rss_ind_tbl = NULL; + + if (rss->host_rss_ind_tbl) + devm_kfree(ena_dev->dmadev, rss->host_rss_ind_tbl); + rss->host_rss_ind_tbl = NULL; +} + +static int ena_com_create_io_sq(struct ena_com_dev *ena_dev, + struct ena_com_io_sq *io_sq, u16 cq_idx) +{ + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + struct ena_admin_aq_create_sq_cmd create_cmd; + struct ena_admin_acq_create_sq_resp_desc cmd_completion; + u8 direction; + int ret; + + memset(&create_cmd, 0x0, sizeof(struct ena_admin_aq_create_sq_cmd)); + + create_cmd.aq_common_descriptor.opcode = ENA_ADMIN_CREATE_SQ; + + if (io_sq->direction == ENA_COM_IO_QUEUE_DIRECTION_TX) + direction = ENA_ADMIN_SQ_DIRECTION_TX; + else + direction = ENA_ADMIN_SQ_DIRECTION_RX; + + create_cmd.sq_identity |= (direction << + ENA_ADMIN_AQ_CREATE_SQ_CMD_SQ_DIRECTION_SHIFT) & + ENA_ADMIN_AQ_CREATE_SQ_CMD_SQ_DIRECTION_MASK; + + create_cmd.sq_caps_2 |= io_sq->mem_queue_type & + ENA_ADMIN_AQ_CREATE_SQ_CMD_PLACEMENT_POLICY_MASK; + + create_cmd.sq_caps_2 |= (ENA_ADMIN_COMPLETION_POLICY_DESC << + ENA_ADMIN_AQ_CREATE_SQ_CMD_COMPLETION_POLICY_SHIFT) & + ENA_ADMIN_AQ_CREATE_SQ_CMD_COMPLETION_POLICY_MASK; + + create_cmd.sq_caps_3 |= + ENA_ADMIN_AQ_CREATE_SQ_CMD_IS_PHYSICALLY_CONTIGUOUS_MASK; + + create_cmd.cq_idx = cq_idx; + create_cmd.sq_depth = io_sq->q_depth; + + if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) { + ret = ena_com_mem_addr_set(ena_dev, + &create_cmd.sq_ba, + io_sq->desc_addr.phys_addr); + if (unlikely(ret)) { + pr_err("memory address set failed\n"); + return ret; + } + } + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *)&create_cmd, + sizeof(create_cmd), + (struct ena_admin_acq_entry *)&cmd_completion, + sizeof(cmd_completion)); + if (unlikely(ret)) { + pr_err("Failed to create IO SQ. error: %d\n", ret); + return ret; + } + + io_sq->idx = cmd_completion.sq_idx; + + io_sq->db_addr = (u32 __iomem *)((uintptr_t)ena_dev->reg_bar + + (uintptr_t)cmd_completion.sq_doorbell_offset); + + if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { + io_sq->header_addr = (u8 __iomem *)((uintptr_t)ena_dev->mem_bar + + cmd_completion.llq_headers_offset); + + io_sq->desc_addr.pbuf_dev_addr = + (u8 __iomem *)((uintptr_t)ena_dev->mem_bar + + cmd_completion.llq_descriptors_offset); + } + + pr_debug("created sq[%u], depth[%u]\n", io_sq->idx, io_sq->q_depth); + + return ret; +} + +static int ena_com_ind_tbl_convert_to_device(struct ena_com_dev *ena_dev) +{ + struct ena_rss *rss = &ena_dev->rss; + struct ena_com_io_sq *io_sq; + u16 qid; + int i; + + for (i = 0; i < 1 << rss->tbl_log_size; i++) { + qid = rss->host_rss_ind_tbl[i]; + if (qid >= ENA_TOTAL_NUM_QUEUES) + return -EINVAL; + + io_sq = &ena_dev->io_sq_queues[qid]; + + if (io_sq->direction != ENA_COM_IO_QUEUE_DIRECTION_RX) + return -EINVAL; + + rss->rss_ind_tbl[i].cq_idx = io_sq->idx; + } + + return 0; +} + +static int ena_com_ind_tbl_convert_from_device(struct ena_com_dev *ena_dev) +{ + u16 dev_idx_to_host_tbl[ENA_TOTAL_NUM_QUEUES] = { (u16)-1 }; + struct ena_rss *rss = &ena_dev->rss; + u8 idx; + u16 i; + + for (i = 0; i < ENA_TOTAL_NUM_QUEUES; i++) + dev_idx_to_host_tbl[ena_dev->io_sq_queues[i].idx] = i; + + for (i = 0; i < 1 << rss->tbl_log_size; i++) { + if (rss->rss_ind_tbl[i].cq_idx > ENA_TOTAL_NUM_QUEUES) + return -EINVAL; + idx = (u8)rss->rss_ind_tbl[i].cq_idx; + + if (dev_idx_to_host_tbl[idx] > ENA_TOTAL_NUM_QUEUES) + return -EINVAL; + + rss->host_rss_ind_tbl[i] = dev_idx_to_host_tbl[idx]; + } + + return 0; +} + +static int ena_com_init_interrupt_moderation_table(struct ena_com_dev *ena_dev) +{ + size_t size; + + size = sizeof(struct ena_intr_moder_entry) * ENA_INTR_MAX_NUM_OF_LEVELS; + + ena_dev->intr_moder_tbl = + devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL); + if (!ena_dev->intr_moder_tbl) + return -ENOMEM; + + ena_com_config_default_interrupt_moderation_table(ena_dev); + + return 0; +} + +static void ena_com_update_intr_delay_resolution(struct ena_com_dev *ena_dev, + u16 intr_delay_resolution) +{ + struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl; + unsigned int i; + + if (!intr_delay_resolution) { + pr_err("Illegal intr_delay_resolution provided. Going to use default 1 usec resolution\n"); + intr_delay_resolution = 1; + } + ena_dev->intr_delay_resolution = intr_delay_resolution; + + /* update Rx */ + for (i = 0; i < ENA_INTR_MAX_NUM_OF_LEVELS; i++) + intr_moder_tbl[i].intr_moder_interval /= intr_delay_resolution; + + /* update Tx */ + ena_dev->intr_moder_tx_interval /= intr_delay_resolution; +} + +/*****************************************************************************/ +/******************************* API ******************************/ +/*****************************************************************************/ + +int ena_com_execute_admin_command(struct ena_com_admin_queue *admin_queue, + struct ena_admin_aq_entry *cmd, + size_t cmd_size, + struct ena_admin_acq_entry *comp, + size_t comp_size) +{ + struct ena_comp_ctx *comp_ctx; + int ret; + + comp_ctx = ena_com_submit_admin_cmd(admin_queue, cmd, cmd_size, + comp, comp_size); + if (unlikely(IS_ERR(comp_ctx))) { + pr_err("Failed to submit command [%ld]\n", PTR_ERR(comp_ctx)); + return PTR_ERR(comp_ctx); + } + + ret = ena_com_wait_and_process_admin_cq(comp_ctx, admin_queue); + if (unlikely(ret)) { + if (admin_queue->running_state) + pr_err("Failed to process command. ret = %d\n", ret); + else + pr_debug("Failed to process command. ret = %d\n", ret); + } + return ret; +} + +int ena_com_create_io_cq(struct ena_com_dev *ena_dev, + struct ena_com_io_cq *io_cq) +{ + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + struct ena_admin_aq_create_cq_cmd create_cmd; + struct ena_admin_acq_create_cq_resp_desc cmd_completion; + int ret; + + memset(&create_cmd, 0x0, sizeof(struct ena_admin_aq_create_cq_cmd)); + + create_cmd.aq_common_descriptor.opcode = ENA_ADMIN_CREATE_CQ; + + create_cmd.cq_caps_2 |= (io_cq->cdesc_entry_size_in_bytes / 4) & + ENA_ADMIN_AQ_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK; + create_cmd.cq_caps_1 |= + ENA_ADMIN_AQ_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_MASK; + + create_cmd.msix_vector = io_cq->msix_vector; + create_cmd.cq_depth = io_cq->q_depth; + + ret = ena_com_mem_addr_set(ena_dev, + &create_cmd.cq_ba, + io_cq->cdesc_addr.phys_addr); + if (unlikely(ret)) { + pr_err("memory address set failed\n"); + return ret; + } + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *)&create_cmd, + sizeof(create_cmd), + (struct ena_admin_acq_entry *)&cmd_completion, + sizeof(cmd_completion)); + if (unlikely(ret)) { + pr_err("Failed to create IO CQ. error: %d\n", ret); + return ret; + } + + io_cq->idx = cmd_completion.cq_idx; + + io_cq->unmask_reg = (u32 __iomem *)((uintptr_t)ena_dev->reg_bar + + cmd_completion.cq_interrupt_unmask_register_offset); + + if (cmd_completion.cq_head_db_register_offset) + io_cq->cq_head_db_reg = + (u32 __iomem *)((uintptr_t)ena_dev->reg_bar + + cmd_completion.cq_head_db_register_offset); + + if (cmd_completion.numa_node_register_offset) + io_cq->numa_node_cfg_reg = + (u32 __iomem *)((uintptr_t)ena_dev->reg_bar + + cmd_completion.numa_node_register_offset); + + pr_debug("created cq[%u], depth[%u]\n", io_cq->idx, io_cq->q_depth); + + return ret; +} + +int ena_com_get_io_handlers(struct ena_com_dev *ena_dev, u16 qid, + struct ena_com_io_sq **io_sq, + struct ena_com_io_cq **io_cq) +{ + if (qid >= ENA_TOTAL_NUM_QUEUES) { + pr_err("Invalid queue number %d but the max is %d\n", qid, + ENA_TOTAL_NUM_QUEUES); + return -EINVAL; + } + + *io_sq = &ena_dev->io_sq_queues[qid]; + *io_cq = &ena_dev->io_cq_queues[qid]; + + return 0; +} + +void ena_com_abort_admin_commands(struct ena_com_dev *ena_dev) +{ + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + struct ena_comp_ctx *comp_ctx; + u16 i; + + if (!admin_queue->comp_ctx) + return; + + for (i = 0; i < admin_queue->q_depth; i++) { + comp_ctx = get_comp_ctxt(admin_queue, i, false); + if (unlikely(!comp_ctx)) + break; + + comp_ctx->status = ENA_CMD_ABORTED; + + complete(&comp_ctx->wait_event); + } +} + +void ena_com_wait_for_abort_completion(struct ena_com_dev *ena_dev) +{ + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + unsigned long flags; + + spin_lock_irqsave(&admin_queue->q_lock, flags); + while (atomic_read(&admin_queue->outstanding_cmds) != 0) { + spin_unlock_irqrestore(&admin_queue->q_lock, flags); + msleep(20); + spin_lock_irqsave(&admin_queue->q_lock, flags); + } + spin_unlock_irqrestore(&admin_queue->q_lock, flags); +} + +int ena_com_destroy_io_cq(struct ena_com_dev *ena_dev, + struct ena_com_io_cq *io_cq) +{ + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + struct ena_admin_aq_destroy_cq_cmd destroy_cmd; + struct ena_admin_acq_destroy_cq_resp_desc destroy_resp; + int ret; + + memset(&destroy_cmd, 0x0, sizeof(struct ena_admin_aq_destroy_sq_cmd)); + + destroy_cmd.cq_idx = io_cq->idx; + destroy_cmd.aq_common_descriptor.opcode = ENA_ADMIN_DESTROY_CQ; + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *)&destroy_cmd, + sizeof(destroy_cmd), + (struct ena_admin_acq_entry *)&destroy_resp, + sizeof(destroy_resp)); + + if (unlikely(ret && (ret != -ENODEV))) + pr_err("Failed to destroy IO CQ. error: %d\n", ret); + + return ret; +} + +bool ena_com_get_admin_running_state(struct ena_com_dev *ena_dev) +{ + return ena_dev->admin_queue.running_state; +} + +void ena_com_set_admin_running_state(struct ena_com_dev *ena_dev, bool state) +{ + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + unsigned long flags; + + spin_lock_irqsave(&admin_queue->q_lock, flags); + ena_dev->admin_queue.running_state = state; + spin_unlock_irqrestore(&admin_queue->q_lock, flags); +} + +void ena_com_admin_aenq_enable(struct ena_com_dev *ena_dev) +{ + u16 depth = ena_dev->aenq.q_depth; + + WARN(ena_dev->aenq.head != depth, "Invalid AENQ state\n"); + + /* Init head_db to mark that all entries in the queue + * are initially available + */ + writel(depth, ena_dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF); +} + +int ena_com_set_aenq_config(struct ena_com_dev *ena_dev, u32 groups_flag) +{ + struct ena_com_admin_queue *admin_queue; + struct ena_admin_set_feat_cmd cmd; + struct ena_admin_set_feat_resp resp; + struct ena_admin_get_feat_resp get_resp; + int ret; + + ret = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_AENQ_CONFIG); + if (ret) { + pr_info("Can't get aenq configuration\n"); + return ret; + } + + if ((get_resp.u.aenq.supported_groups & groups_flag) != groups_flag) { + pr_warn("Trying to set unsupported aenq events. supported flag: %x asked flag: %x\n", + get_resp.u.aenq.supported_groups, groups_flag); + return -EPERM; + } + + memset(&cmd, 0x0, sizeof(cmd)); + admin_queue = &ena_dev->admin_queue; + + cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE; + cmd.aq_common_descriptor.flags = 0; + cmd.feat_common.feature_id = ENA_ADMIN_AENQ_CONFIG; + cmd.u.aenq.enabled_groups = groups_flag; + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *)&cmd, + sizeof(cmd), + (struct ena_admin_acq_entry *)&resp, + sizeof(resp)); + + if (unlikely(ret)) + pr_err("Failed to config AENQ ret: %d\n", ret); + + return ret; +} + +int ena_com_get_dma_width(struct ena_com_dev *ena_dev) +{ + u32 caps = ena_com_reg_bar_read32(ena_dev, ENA_REGS_CAPS_OFF); + int width; + + if (unlikely(caps == ENA_MMIO_READ_TIMEOUT)) { + pr_err("Reg read timeout occurred\n"); + return -ETIME; + } + + width = (caps & ENA_REGS_CAPS_DMA_ADDR_WIDTH_MASK) >> + ENA_REGS_CAPS_DMA_ADDR_WIDTH_SHIFT; + + pr_debug("ENA dma width: %d\n", width); + + if ((width < 32) || width > ENA_MAX_PHYS_ADDR_SIZE_BITS) { + pr_err("DMA width illegal value: %d\n", width); + return -EINVAL; + } + + ena_dev->dma_addr_bits = width; + + return width; +} + +int ena_com_validate_version(struct ena_com_dev *ena_dev) +{ + u32 ver; + u32 ctrl_ver; + u32 ctrl_ver_masked; + + /* Make sure the ENA version and the controller version are at least + * as the driver expects + */ + ver = ena_com_reg_bar_read32(ena_dev, ENA_REGS_VERSION_OFF); + ctrl_ver = ena_com_reg_bar_read32(ena_dev, + ENA_REGS_CONTROLLER_VERSION_OFF); + + if (unlikely((ver == ENA_MMIO_READ_TIMEOUT) || + (ctrl_ver == ENA_MMIO_READ_TIMEOUT))) { + pr_err("Reg read timeout occurred\n"); + return -ETIME; + } + + pr_info("ena device version: %d.%d\n", + (ver & ENA_REGS_VERSION_MAJOR_VERSION_MASK) >> + ENA_REGS_VERSION_MAJOR_VERSION_SHIFT, + ver & ENA_REGS_VERSION_MINOR_VERSION_MASK); + + if (ver < MIN_ENA_VER) { + pr_err("ENA version is lower than the minimal version the driver supports\n"); + return -1; + } + + pr_info("ena controller version: %d.%d.%d implementation version %d\n", + (ctrl_ver & ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK) >> + ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT, + (ctrl_ver & ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK) >> + ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT, + (ctrl_ver & ENA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK), + (ctrl_ver & ENA_REGS_CONTROLLER_VERSION_IMPL_ID_MASK) >> + ENA_REGS_CONTROLLER_VERSION_IMPL_ID_SHIFT); + + ctrl_ver_masked = + (ctrl_ver & ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK) | + (ctrl_ver & ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK) | + (ctrl_ver & ENA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK); + + /* Validate the ctrl version without the implementation ID */ + if (ctrl_ver_masked < MIN_ENA_CTRL_VER) { + pr_err("ENA ctrl version is lower than the minimal ctrl version the driver supports\n"); + return -1; + } + + return 0; +} + +void ena_com_admin_destroy(struct ena_com_dev *ena_dev) +{ + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + struct ena_com_admin_cq *cq = &admin_queue->cq; + struct ena_com_admin_sq *sq = &admin_queue->sq; + struct ena_com_aenq *aenq = &ena_dev->aenq; + u16 size; + + if (admin_queue->comp_ctx) + devm_kfree(ena_dev->dmadev, admin_queue->comp_ctx); + admin_queue->comp_ctx = NULL; + size = ADMIN_SQ_SIZE(admin_queue->q_depth); + if (sq->entries) + dma_free_coherent(ena_dev->dmadev, size, sq->entries, + sq->dma_addr); + sq->entries = NULL; + + size = ADMIN_CQ_SIZE(admin_queue->q_depth); + if (cq->entries) + dma_free_coherent(ena_dev->dmadev, size, cq->entries, + cq->dma_addr); + cq->entries = NULL; + + size = ADMIN_AENQ_SIZE(aenq->q_depth); + if (ena_dev->aenq.entries) + dma_free_coherent(ena_dev->dmadev, size, aenq->entries, + aenq->dma_addr); + aenq->entries = NULL; +} + +void ena_com_set_admin_polling_mode(struct ena_com_dev *ena_dev, bool polling) +{ + ena_dev->admin_queue.polling = polling; +} + +int ena_com_mmio_reg_read_request_init(struct ena_com_dev *ena_dev) +{ + struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read; + + spin_lock_init(&mmio_read->lock); + mmio_read->read_resp = + dma_zalloc_coherent(ena_dev->dmadev, + sizeof(*mmio_read->read_resp), + &mmio_read->read_resp_dma_addr, GFP_KERNEL); + if (unlikely(!mmio_read->read_resp)) + return -ENOMEM; + + ena_com_mmio_reg_read_request_write_dev_addr(ena_dev); + + mmio_read->read_resp->req_id = 0x0; + mmio_read->seq_num = 0x0; + mmio_read->readless_supported = true; + + return 0; +} + +void ena_com_set_mmio_read_mode(struct ena_com_dev *ena_dev, bool readless_supported) +{ + struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read; + + mmio_read->readless_supported = readless_supported; +} + +void ena_com_mmio_reg_read_request_destroy(struct ena_com_dev *ena_dev) +{ + struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read; + + writel(0x0, ena_dev->reg_bar + ENA_REGS_MMIO_RESP_LO_OFF); + writel(0x0, ena_dev->reg_bar + ENA_REGS_MMIO_RESP_HI_OFF); + + dma_free_coherent(ena_dev->dmadev, sizeof(*mmio_read->read_resp), + mmio_read->read_resp, mmio_read->read_resp_dma_addr); + + mmio_read->read_resp = NULL; +} + +void ena_com_mmio_reg_read_request_write_dev_addr(struct ena_com_dev *ena_dev) +{ + struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read; + u32 addr_low, addr_high; + + addr_low = ENA_DMA_ADDR_TO_UINT32_LOW(mmio_read->read_resp_dma_addr); + addr_high = ENA_DMA_ADDR_TO_UINT32_HIGH(mmio_read->read_resp_dma_addr); + + writel(addr_low, ena_dev->reg_bar + ENA_REGS_MMIO_RESP_LO_OFF); + writel(addr_high, ena_dev->reg_bar + ENA_REGS_MMIO_RESP_HI_OFF); +} + +int ena_com_admin_init(struct ena_com_dev *ena_dev, + struct ena_aenq_handlers *aenq_handlers, + bool init_spinlock) +{ + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + u32 aq_caps, acq_caps, dev_sts, addr_low, addr_high; + int ret; + + dev_sts = ena_com_reg_bar_read32(ena_dev, ENA_REGS_DEV_STS_OFF); + + if (unlikely(dev_sts == ENA_MMIO_READ_TIMEOUT)) { + pr_err("Reg read timeout occurred\n"); + return -ETIME; + } + + if (!(dev_sts & ENA_REGS_DEV_STS_READY_MASK)) { + pr_err("Device isn't ready, abort com init\n"); + return -ENODEV; + } + + admin_queue->q_depth = ENA_ADMIN_QUEUE_DEPTH; + + admin_queue->q_dmadev = ena_dev->dmadev; + admin_queue->polling = false; + admin_queue->curr_cmd_id = 0; + + atomic_set(&admin_queue->outstanding_cmds, 0); + + if (init_spinlock) + spin_lock_init(&admin_queue->q_lock); + + ret = ena_com_init_comp_ctxt(admin_queue); + if (ret) + goto error; + + ret = ena_com_admin_init_sq(admin_queue); + if (ret) + goto error; + + ret = ena_com_admin_init_cq(admin_queue); + if (ret) + goto error; + + admin_queue->sq.db_addr = (u32 __iomem *)((uintptr_t)ena_dev->reg_bar + + ENA_REGS_AQ_DB_OFF); + + addr_low = ENA_DMA_ADDR_TO_UINT32_LOW(admin_queue->sq.dma_addr); + addr_high = ENA_DMA_ADDR_TO_UINT32_HIGH(admin_queue->sq.dma_addr); + + writel(addr_low, ena_dev->reg_bar + ENA_REGS_AQ_BASE_LO_OFF); + writel(addr_high, ena_dev->reg_bar + ENA_REGS_AQ_BASE_HI_OFF); + + addr_low = ENA_DMA_ADDR_TO_UINT32_LOW(admin_queue->cq.dma_addr); + addr_high = ENA_DMA_ADDR_TO_UINT32_HIGH(admin_queue->cq.dma_addr); + + writel(addr_low, ena_dev->reg_bar + ENA_REGS_ACQ_BASE_LO_OFF); + writel(addr_high, ena_dev->reg_bar + ENA_REGS_ACQ_BASE_HI_OFF); + + aq_caps = 0; + aq_caps |= admin_queue->q_depth & ENA_REGS_AQ_CAPS_AQ_DEPTH_MASK; + aq_caps |= (sizeof(struct ena_admin_aq_entry) << + ENA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_SHIFT) & + ENA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_MASK; + + acq_caps = 0; + acq_caps |= admin_queue->q_depth & ENA_REGS_ACQ_CAPS_ACQ_DEPTH_MASK; + acq_caps |= (sizeof(struct ena_admin_acq_entry) << + ENA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_SHIFT) & + ENA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_MASK; + + writel(aq_caps, ena_dev->reg_bar + ENA_REGS_AQ_CAPS_OFF); + writel(acq_caps, ena_dev->reg_bar + ENA_REGS_ACQ_CAPS_OFF); + ret = ena_com_admin_init_aenq(ena_dev, aenq_handlers); + if (ret) + goto error; + + admin_queue->running_state = true; + + return 0; +error: + ena_com_admin_destroy(ena_dev); + + return ret; +} + +int ena_com_create_io_queue(struct ena_com_dev *ena_dev, + struct ena_com_create_io_ctx *ctx) +{ + struct ena_com_io_sq *io_sq; + struct ena_com_io_cq *io_cq; + int ret; + + if (ctx->qid >= ENA_TOTAL_NUM_QUEUES) { + pr_err("Qid (%d) is bigger than max num of queues (%d)\n", + ctx->qid, ENA_TOTAL_NUM_QUEUES); + return -EINVAL; + } + + io_sq = &ena_dev->io_sq_queues[ctx->qid]; + io_cq = &ena_dev->io_cq_queues[ctx->qid]; + + memset(io_sq, 0x0, sizeof(struct ena_com_io_sq)); + memset(io_cq, 0x0, sizeof(struct ena_com_io_cq)); + + /* Init CQ */ + io_cq->q_depth = ctx->queue_size; + io_cq->direction = ctx->direction; + io_cq->qid = ctx->qid; + + io_cq->msix_vector = ctx->msix_vector; + + io_sq->q_depth = ctx->queue_size; + io_sq->direction = ctx->direction; + io_sq->qid = ctx->qid; + + io_sq->mem_queue_type = ctx->mem_queue_type; + + if (ctx->direction == ENA_COM_IO_QUEUE_DIRECTION_TX) + /* header length is limited to 8 bits */ + io_sq->tx_max_header_size = + min_t(u32, ena_dev->tx_max_header_size, SZ_256); + + ret = ena_com_init_io_sq(ena_dev, ctx, io_sq); + if (ret) + goto error; + ret = ena_com_init_io_cq(ena_dev, ctx, io_cq); + if (ret) + goto error; + + ret = ena_com_create_io_cq(ena_dev, io_cq); + if (ret) + goto error; + + ret = ena_com_create_io_sq(ena_dev, io_sq, io_cq->idx); + if (ret) + goto destroy_io_cq; + + return 0; + +destroy_io_cq: + ena_com_destroy_io_cq(ena_dev, io_cq); +error: + ena_com_io_queue_free(ena_dev, io_sq, io_cq); + return ret; +} + +void ena_com_destroy_io_queue(struct ena_com_dev *ena_dev, u16 qid) +{ + struct ena_com_io_sq *io_sq; + struct ena_com_io_cq *io_cq; + + if (qid >= ENA_TOTAL_NUM_QUEUES) { + pr_err("Qid (%d) is bigger than max num of queues (%d)\n", qid, + ENA_TOTAL_NUM_QUEUES); + return; + } + + io_sq = &ena_dev->io_sq_queues[qid]; + io_cq = &ena_dev->io_cq_queues[qid]; + + ena_com_destroy_io_sq(ena_dev, io_sq); + ena_com_destroy_io_cq(ena_dev, io_cq); + + ena_com_io_queue_free(ena_dev, io_sq, io_cq); +} + +int ena_com_get_link_params(struct ena_com_dev *ena_dev, + struct ena_admin_get_feat_resp *resp) +{ + return ena_com_get_feature(ena_dev, resp, ENA_ADMIN_LINK_CONFIG); +} + +int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev, + struct ena_com_dev_get_features_ctx *get_feat_ctx) +{ + struct ena_admin_get_feat_resp get_resp; + int rc; + + rc = ena_com_get_feature(ena_dev, &get_resp, + ENA_ADMIN_DEVICE_ATTRIBUTES); + if (rc) + return rc; + + memcpy(&get_feat_ctx->dev_attr, &get_resp.u.dev_attr, + sizeof(get_resp.u.dev_attr)); + ena_dev->supported_features = get_resp.u.dev_attr.supported_features; + + rc = ena_com_get_feature(ena_dev, &get_resp, + ENA_ADMIN_MAX_QUEUES_NUM); + if (rc) + return rc; + + memcpy(&get_feat_ctx->max_queues, &get_resp.u.max_queue, + sizeof(get_resp.u.max_queue)); + ena_dev->tx_max_header_size = get_resp.u.max_queue.max_header_size; + + rc = ena_com_get_feature(ena_dev, &get_resp, + ENA_ADMIN_AENQ_CONFIG); + if (rc) + return rc; + + memcpy(&get_feat_ctx->aenq, &get_resp.u.aenq, + sizeof(get_resp.u.aenq)); + + rc = ena_com_get_feature(ena_dev, &get_resp, + ENA_ADMIN_STATELESS_OFFLOAD_CONFIG); + if (rc) + return rc; + + memcpy(&get_feat_ctx->offload, &get_resp.u.offload, + sizeof(get_resp.u.offload)); + + return 0; +} + +void ena_com_admin_q_comp_intr_handler(struct ena_com_dev *ena_dev) +{ + ena_com_handle_admin_completion(&ena_dev->admin_queue); +} + +/* ena_handle_specific_aenq_event: + * return the handler that is relevant to the specific event group + */ +static ena_aenq_handler ena_com_get_specific_aenq_cb(struct ena_com_dev *dev, + u16 group) +{ + struct ena_aenq_handlers *aenq_handlers = dev->aenq.aenq_handlers; + + if ((group < ENA_MAX_HANDLERS) && aenq_handlers->handlers[group]) + return aenq_handlers->handlers[group]; + + return aenq_handlers->unimplemented_handler; +} + +/* ena_aenq_intr_handler: + * handles the aenq incoming events. + * pop events from the queue and apply the specific handler + */ +void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data) +{ + struct ena_admin_aenq_entry *aenq_e; + struct ena_admin_aenq_common_desc *aenq_common; + struct ena_com_aenq *aenq = &dev->aenq; + ena_aenq_handler handler_cb; + u16 masked_head, processed = 0; + u8 phase; + + masked_head = aenq->head & (aenq->q_depth - 1); + phase = aenq->phase; + aenq_e = &aenq->entries[masked_head]; /* Get first entry */ + aenq_common = &aenq_e->aenq_common_desc; + + /* Go over all the events */ + while ((aenq_common->flags & ENA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK) == + phase) { + pr_debug("AENQ! Group[%x] Syndrom[%x] timestamp: [%llus]\n", + aenq_common->group, aenq_common->syndrom, + (u64)aenq_common->timestamp_low + + ((u64)aenq_common->timestamp_high << 32)); + + /* Handle specific event*/ + handler_cb = ena_com_get_specific_aenq_cb(dev, + aenq_common->group); + handler_cb(data, aenq_e); /* call the actual event handler*/ + + /* Get next event entry */ + masked_head++; + processed++; + + if (unlikely(masked_head == aenq->q_depth)) { + masked_head = 0; + phase = !phase; + } + aenq_e = &aenq->entries[masked_head]; + aenq_common = &aenq_e->aenq_common_desc; + } + + aenq->head += processed; + aenq->phase = phase; + + /* Don't update aenq doorbell if there weren't any processed events */ + if (!processed) + return; + + /* write the aenq doorbell after all AENQ descriptors were read */ + mb(); + writel((u32)aenq->head, dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF); +} + +int ena_com_dev_reset(struct ena_com_dev *ena_dev) +{ + u32 stat, timeout, cap, reset_val; + int rc; + + stat = ena_com_reg_bar_read32(ena_dev, ENA_REGS_DEV_STS_OFF); + cap = ena_com_reg_bar_read32(ena_dev, ENA_REGS_CAPS_OFF); + + if (unlikely((stat == ENA_MMIO_READ_TIMEOUT) || + (cap == ENA_MMIO_READ_TIMEOUT))) { + pr_err("Reg read32 timeout occurred\n"); + return -ETIME; + } + + if ((stat & ENA_REGS_DEV_STS_READY_MASK) == 0) { + pr_err("Device isn't ready, can't reset device\n"); + return -EINVAL; + } + + timeout = (cap & ENA_REGS_CAPS_RESET_TIMEOUT_MASK) >> + ENA_REGS_CAPS_RESET_TIMEOUT_SHIFT; + if (timeout == 0) { + pr_err("Invalid timeout value\n"); + return -EINVAL; + } + + /* start reset */ + reset_val = ENA_REGS_DEV_CTL_DEV_RESET_MASK; + writel(reset_val, ena_dev->reg_bar + ENA_REGS_DEV_CTL_OFF); + + /* Write again the MMIO read request address */ + ena_com_mmio_reg_read_request_write_dev_addr(ena_dev); + + rc = wait_for_reset_state(ena_dev, timeout, + ENA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK); + if (rc != 0) { + pr_err("Reset indication didn't turn on\n"); + return rc; + } + + /* reset done */ + writel(0, ena_dev->reg_bar + ENA_REGS_DEV_CTL_OFF); + rc = wait_for_reset_state(ena_dev, timeout, 0); + if (rc != 0) { + pr_err("Reset indication didn't turn off\n"); + return rc; + } + + return 0; +} + +static int ena_get_dev_stats(struct ena_com_dev *ena_dev, + struct ena_com_stats_ctx *ctx, + enum ena_admin_get_stats_type type) +{ + struct ena_admin_aq_get_stats_cmd *get_cmd = &ctx->get_cmd; + struct ena_admin_acq_get_stats_resp *get_resp = &ctx->get_resp; + struct ena_com_admin_queue *admin_queue; + int ret; + + admin_queue = &ena_dev->admin_queue; + + get_cmd->aq_common_descriptor.opcode = ENA_ADMIN_GET_STATS; + get_cmd->aq_common_descriptor.flags = 0; + get_cmd->type = type; + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *)get_cmd, + sizeof(*get_cmd), + (struct ena_admin_acq_entry *)get_resp, + sizeof(*get_resp)); + + if (unlikely(ret)) + pr_err("Failed to get stats. error: %d\n", ret); + + return ret; +} + +int ena_com_get_dev_basic_stats(struct ena_com_dev *ena_dev, + struct ena_admin_basic_stats *stats) +{ + struct ena_com_stats_ctx ctx; + int ret; + + memset(&ctx, 0x0, sizeof(ctx)); + ret = ena_get_dev_stats(ena_dev, &ctx, ENA_ADMIN_GET_STATS_TYPE_BASIC); + if (likely(ret == 0)) + memcpy(stats, &ctx.get_resp.basic_stats, + sizeof(ctx.get_resp.basic_stats)); + + return ret; +} + +int ena_com_set_dev_mtu(struct ena_com_dev *ena_dev, int mtu) +{ + struct ena_com_admin_queue *admin_queue; + struct ena_admin_set_feat_cmd cmd; + struct ena_admin_set_feat_resp resp; + int ret; + + if (!ena_com_check_supported_feature_id(ena_dev, ENA_ADMIN_MTU)) { + pr_info("Feature %d isn't supported\n", ENA_ADMIN_MTU); + return -EPERM; + } + + memset(&cmd, 0x0, sizeof(cmd)); + admin_queue = &ena_dev->admin_queue; + + cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE; + cmd.aq_common_descriptor.flags = 0; + cmd.feat_common.feature_id = ENA_ADMIN_MTU; + cmd.u.mtu.mtu = mtu; + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *)&cmd, + sizeof(cmd), + (struct ena_admin_acq_entry *)&resp, + sizeof(resp)); + + if (unlikely(ret)) + pr_err("Failed to set mtu %d. error: %d\n", mtu, ret); + + return ret; +} + +int ena_com_get_offload_settings(struct ena_com_dev *ena_dev, + struct ena_admin_feature_offload_desc *offload) +{ + int ret; + struct ena_admin_get_feat_resp resp; + + ret = ena_com_get_feature(ena_dev, &resp, + ENA_ADMIN_STATELESS_OFFLOAD_CONFIG); + if (unlikely(ret)) { + pr_err("Failed to get offload capabilities %d\n", ret); + return ret; + } + + memcpy(offload, &resp.u.offload, sizeof(resp.u.offload)); + + return 0; +} + +int ena_com_set_hash_function(struct ena_com_dev *ena_dev) +{ + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + struct ena_rss *rss = &ena_dev->rss; + struct ena_admin_set_feat_cmd cmd; + struct ena_admin_set_feat_resp resp; + struct ena_admin_get_feat_resp get_resp; + int ret; + + if (!ena_com_check_supported_feature_id(ena_dev, + ENA_ADMIN_RSS_HASH_FUNCTION)) { + pr_info("Feature %d isn't supported\n", + ENA_ADMIN_RSS_HASH_FUNCTION); + return -EPERM; + } + + /* Validate hash function is supported */ + ret = ena_com_get_feature(ena_dev, &get_resp, + ENA_ADMIN_RSS_HASH_FUNCTION); + if (unlikely(ret)) + return ret; + + if (get_resp.u.flow_hash_func.supported_func & (1 << rss->hash_func)) { + pr_err("Func hash %d isn't supported by device, abort\n", + rss->hash_func); + return -EPERM; + } + + memset(&cmd, 0x0, sizeof(cmd)); + + cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE; + cmd.aq_common_descriptor.flags = + ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK; + cmd.feat_common.feature_id = ENA_ADMIN_RSS_HASH_FUNCTION; + cmd.u.flow_hash_func.init_val = rss->hash_init_val; + cmd.u.flow_hash_func.selected_func = 1 << rss->hash_func; + + ret = ena_com_mem_addr_set(ena_dev, + &cmd.control_buffer.address, + rss->hash_key_dma_addr); + if (unlikely(ret)) { + pr_err("memory address set failed\n"); + return ret; + } + + cmd.control_buffer.length = sizeof(*rss->hash_key); + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *)&cmd, + sizeof(cmd), + (struct ena_admin_acq_entry *)&resp, + sizeof(resp)); + if (unlikely(ret)) { + pr_err("Failed to set hash function %d. error: %d\n", + rss->hash_func, ret); + return -EINVAL; + } + + return 0; +} + +int ena_com_fill_hash_function(struct ena_com_dev *ena_dev, + enum ena_admin_hash_functions func, + const u8 *key, u16 key_len, u32 init_val) +{ + struct ena_rss *rss = &ena_dev->rss; + struct ena_admin_get_feat_resp get_resp; + struct ena_admin_feature_rss_flow_hash_control *hash_key = + rss->hash_key; + int rc; + + /* Make sure size is a mult of DWs */ + if (unlikely(key_len & 0x3)) + return -EINVAL; + + rc = ena_com_get_feature_ex(ena_dev, &get_resp, + ENA_ADMIN_RSS_HASH_FUNCTION, + rss->hash_key_dma_addr, + sizeof(*rss->hash_key)); + if (unlikely(rc)) + return rc; + + if (!((1 << func) & get_resp.u.flow_hash_func.supported_func)) { + pr_err("Flow hash function %d isn't supported\n", func); + return -EPERM; + } + + switch (func) { + case ENA_ADMIN_TOEPLITZ: + if (key_len > sizeof(hash_key->key)) { + pr_err("key len (%hu) is bigger than the max supported (%zu)\n", + key_len, sizeof(hash_key->key)); + return -EINVAL; + } + + memcpy(hash_key->key, key, key_len); + rss->hash_init_val = init_val; + hash_key->keys_num = key_len >> 2; + break; + case ENA_ADMIN_CRC32: + rss->hash_init_val = init_val; + break; + default: + pr_err("Invalid hash function (%d)\n", func); + return -EINVAL; + } + + rc = ena_com_set_hash_function(ena_dev); + + /* Restore the old function */ + if (unlikely(rc)) + ena_com_get_hash_function(ena_dev, NULL, NULL); + + return rc; +} + +int ena_com_get_hash_function(struct ena_com_dev *ena_dev, + enum ena_admin_hash_functions *func, + u8 *key) +{ + struct ena_rss *rss = &ena_dev->rss; + struct ena_admin_get_feat_resp get_resp; + struct ena_admin_feature_rss_flow_hash_control *hash_key = + rss->hash_key; + int rc; + + rc = ena_com_get_feature_ex(ena_dev, &get_resp, + ENA_ADMIN_RSS_HASH_FUNCTION, + rss->hash_key_dma_addr, + sizeof(*rss->hash_key)); + if (unlikely(rc)) + return rc; + + rss->hash_func = get_resp.u.flow_hash_func.selected_func; + if (func) + *func = rss->hash_func; + + if (key) + memcpy(key, hash_key->key, (size_t)(hash_key->keys_num) << 2); + + return 0; +} + +int ena_com_get_hash_ctrl(struct ena_com_dev *ena_dev, + enum ena_admin_flow_hash_proto proto, + u16 *fields) +{ + struct ena_rss *rss = &ena_dev->rss; + struct ena_admin_get_feat_resp get_resp; + int rc; + + rc = ena_com_get_feature_ex(ena_dev, &get_resp, + ENA_ADMIN_RSS_HASH_INPUT, + rss->hash_ctrl_dma_addr, + sizeof(*rss->hash_ctrl)); + if (unlikely(rc)) + return rc; + + if (fields) + *fields = rss->hash_ctrl->selected_fields[proto].fields; + + return 0; +} + +int ena_com_set_hash_ctrl(struct ena_com_dev *ena_dev) +{ + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + struct ena_rss *rss = &ena_dev->rss; + struct ena_admin_feature_rss_hash_control *hash_ctrl = rss->hash_ctrl; + struct ena_admin_set_feat_cmd cmd; + struct ena_admin_set_feat_resp resp; + int ret; + + if (!ena_com_check_supported_feature_id(ena_dev, + ENA_ADMIN_RSS_HASH_INPUT)) { + pr_info("Feature %d isn't supported\n", ENA_ADMIN_RSS_HASH_INPUT); + return -EPERM; + } + + memset(&cmd, 0x0, sizeof(cmd)); + + cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE; + cmd.aq_common_descriptor.flags = + ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK; + cmd.feat_common.feature_id = ENA_ADMIN_RSS_HASH_INPUT; + cmd.u.flow_hash_input.enabled_input_sort = + ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L3_SORT_MASK | + ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L4_SORT_MASK; + + ret = ena_com_mem_addr_set(ena_dev, + &cmd.control_buffer.address, + rss->hash_ctrl_dma_addr); + if (unlikely(ret)) { + pr_err("memory address set failed\n"); + return ret; + } + cmd.control_buffer.length = sizeof(*hash_ctrl); + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *)&cmd, + sizeof(cmd), + (struct ena_admin_acq_entry *)&resp, + sizeof(resp)); + if (unlikely(ret)) + pr_err("Failed to set hash input. error: %d\n", ret); + + return ret; +} + +int ena_com_set_default_hash_ctrl(struct ena_com_dev *ena_dev) +{ + struct ena_rss *rss = &ena_dev->rss; + struct ena_admin_feature_rss_hash_control *hash_ctrl = + rss->hash_ctrl; + u16 available_fields = 0; + int rc, i; + + /* Get the supported hash input */ + rc = ena_com_get_hash_ctrl(ena_dev, 0, NULL); + if (unlikely(rc)) + return rc; + + hash_ctrl->selected_fields[ENA_ADMIN_RSS_TCP4].fields = + ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA | + ENA_ADMIN_RSS_L4_DP | ENA_ADMIN_RSS_L4_SP; + + hash_ctrl->selected_fields[ENA_ADMIN_RSS_UDP4].fields = + ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA | + ENA_ADMIN_RSS_L4_DP | ENA_ADMIN_RSS_L4_SP; + + hash_ctrl->selected_fields[ENA_ADMIN_RSS_TCP6].fields = + ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA | + ENA_ADMIN_RSS_L4_DP | ENA_ADMIN_RSS_L4_SP; + + hash_ctrl->selected_fields[ENA_ADMIN_RSS_UDP6].fields = + ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA | + ENA_ADMIN_RSS_L4_DP | ENA_ADMIN_RSS_L4_SP; + + hash_ctrl->selected_fields[ENA_ADMIN_RSS_IP4].fields = + ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA; + + hash_ctrl->selected_fields[ENA_ADMIN_RSS_IP6].fields = + ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA; + + hash_ctrl->selected_fields[ENA_ADMIN_RSS_IP4_FRAG].fields = + ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA; + + hash_ctrl->selected_fields[ENA_ADMIN_RSS_IP4_FRAG].fields = + ENA_ADMIN_RSS_L2_DA | ENA_ADMIN_RSS_L2_SA; + + for (i = 0; i < ENA_ADMIN_RSS_PROTO_NUM; i++) { + available_fields = hash_ctrl->selected_fields[i].fields & + hash_ctrl->supported_fields[i].fields; + if (available_fields != hash_ctrl->selected_fields[i].fields) { + pr_err("hash control doesn't support all the desire configuration. proto %x supported %x selected %x\n", + i, hash_ctrl->supported_fields[i].fields, + hash_ctrl->selected_fields[i].fields); + return -EPERM; + } + } + + rc = ena_com_set_hash_ctrl(ena_dev); + + /* In case of failure, restore the old hash ctrl */ + if (unlikely(rc)) + ena_com_get_hash_ctrl(ena_dev, 0, NULL); + + return rc; +} + +int ena_com_fill_hash_ctrl(struct ena_com_dev *ena_dev, + enum ena_admin_flow_hash_proto proto, + u16 hash_fields) +{ + struct ena_rss *rss = &ena_dev->rss; + struct ena_admin_feature_rss_hash_control *hash_ctrl = rss->hash_ctrl; + u16 supported_fields; + int rc; + + if (proto >= ENA_ADMIN_RSS_PROTO_NUM) { + pr_err("Invalid proto num (%u)\n", proto); + return -EINVAL; + } + + /* Get the ctrl table */ + rc = ena_com_get_hash_ctrl(ena_dev, proto, NULL); + if (unlikely(rc)) + return rc; + + /* Make sure all the fields are supported */ + supported_fields = hash_ctrl->supported_fields[proto].fields; + if ((hash_fields & supported_fields) != hash_fields) { + pr_err("proto %d doesn't support the required fields %x. supports only: %x\n", + proto, hash_fields, supported_fields); + } + + hash_ctrl->selected_fields[proto].fields = hash_fields; + + rc = ena_com_set_hash_ctrl(ena_dev); + + /* In case of failure, restore the old hash ctrl */ + if (unlikely(rc)) + ena_com_get_hash_ctrl(ena_dev, 0, NULL); + + return 0; +} + +int ena_com_indirect_table_fill_entry(struct ena_com_dev *ena_dev, + u16 entry_idx, u16 entry_value) +{ + struct ena_rss *rss = &ena_dev->rss; + + if (unlikely(entry_idx >= (1 << rss->tbl_log_size))) + return -EINVAL; + + if (unlikely((entry_value > ENA_TOTAL_NUM_QUEUES))) + return -EINVAL; + + rss->host_rss_ind_tbl[entry_idx] = entry_value; + + return 0; +} + +int ena_com_indirect_table_set(struct ena_com_dev *ena_dev) +{ + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + struct ena_rss *rss = &ena_dev->rss; + struct ena_admin_set_feat_cmd cmd; + struct ena_admin_set_feat_resp resp; + int ret; + + if (!ena_com_check_supported_feature_id( + ena_dev, ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG)) { + pr_info("Feature %d isn't supported\n", + ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG); + return -EPERM; + } + + ret = ena_com_ind_tbl_convert_to_device(ena_dev); + if (ret) { + pr_err("Failed to convert host indirection table to device table\n"); + return ret; + } + + memset(&cmd, 0x0, sizeof(cmd)); + + cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE; + cmd.aq_common_descriptor.flags = + ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK; + cmd.feat_common.feature_id = ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG; + cmd.u.ind_table.size = rss->tbl_log_size; + cmd.u.ind_table.inline_index = 0xFFFFFFFF; + + ret = ena_com_mem_addr_set(ena_dev, + &cmd.control_buffer.address, + rss->rss_ind_tbl_dma_addr); + if (unlikely(ret)) { + pr_err("memory address set failed\n"); + return ret; + } + + cmd.control_buffer.length = (1ULL << rss->tbl_log_size) * + sizeof(struct ena_admin_rss_ind_table_entry); + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *)&cmd, + sizeof(cmd), + (struct ena_admin_acq_entry *)&resp, + sizeof(resp)); + + if (unlikely(ret)) + pr_err("Failed to set indirect table. error: %d\n", ret); + + return ret; +} + +int ena_com_indirect_table_get(struct ena_com_dev *ena_dev, u32 *ind_tbl) +{ + struct ena_rss *rss = &ena_dev->rss; + struct ena_admin_get_feat_resp get_resp; + u32 tbl_size; + int i, rc; + + tbl_size = (1ULL << rss->tbl_log_size) * + sizeof(struct ena_admin_rss_ind_table_entry); + + rc = ena_com_get_feature_ex(ena_dev, &get_resp, + ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG, + rss->rss_ind_tbl_dma_addr, + tbl_size); + if (unlikely(rc)) + return rc; + + if (!ind_tbl) + return 0; + + rc = ena_com_ind_tbl_convert_from_device(ena_dev); + if (unlikely(rc)) + return rc; + + for (i = 0; i < (1 << rss->tbl_log_size); i++) + ind_tbl[i] = rss->host_rss_ind_tbl[i]; + + return 0; +} + +int ena_com_rss_init(struct ena_com_dev *ena_dev, u16 indr_tbl_log_size) +{ + int rc; + + memset(&ena_dev->rss, 0x0, sizeof(ena_dev->rss)); + + rc = ena_com_indirect_table_allocate(ena_dev, indr_tbl_log_size); + if (unlikely(rc)) + goto err_indr_tbl; + + rc = ena_com_hash_key_allocate(ena_dev); + if (unlikely(rc)) + goto err_hash_key; + + rc = ena_com_hash_ctrl_init(ena_dev); + if (unlikely(rc)) + goto err_hash_ctrl; + + return 0; + +err_hash_ctrl: + ena_com_hash_key_destroy(ena_dev); +err_hash_key: + ena_com_indirect_table_destroy(ena_dev); +err_indr_tbl: + + return rc; +} + +void ena_com_rss_destroy(struct ena_com_dev *ena_dev) +{ + ena_com_indirect_table_destroy(ena_dev); + ena_com_hash_key_destroy(ena_dev); + ena_com_hash_ctrl_destroy(ena_dev); + + memset(&ena_dev->rss, 0x0, sizeof(ena_dev->rss)); +} + +int ena_com_allocate_host_info(struct ena_com_dev *ena_dev) +{ + struct ena_host_attribute *host_attr = &ena_dev->host_attr; + + host_attr->host_info = + dma_zalloc_coherent(ena_dev->dmadev, SZ_4K, + &host_attr->host_info_dma_addr, GFP_KERNEL); + if (unlikely(!host_attr->host_info)) + return -ENOMEM; + + return 0; +} + +int ena_com_allocate_debug_area(struct ena_com_dev *ena_dev, + u32 debug_area_size) +{ + struct ena_host_attribute *host_attr = &ena_dev->host_attr; + + host_attr->debug_area_virt_addr = + dma_zalloc_coherent(ena_dev->dmadev, debug_area_size, + &host_attr->debug_area_dma_addr, GFP_KERNEL); + if (unlikely(!host_attr->debug_area_virt_addr)) { + host_attr->debug_area_size = 0; + return -ENOMEM; + } + + host_attr->debug_area_size = debug_area_size; + + return 0; +} + +void ena_com_delete_host_info(struct ena_com_dev *ena_dev) +{ + struct ena_host_attribute *host_attr = &ena_dev->host_attr; + + if (host_attr->host_info) { + dma_free_coherent(ena_dev->dmadev, SZ_4K, host_attr->host_info, + host_attr->host_info_dma_addr); + host_attr->host_info = NULL; + } +} + +void ena_com_delete_debug_area(struct ena_com_dev *ena_dev) +{ + struct ena_host_attribute *host_attr = &ena_dev->host_attr; + + if (host_attr->debug_area_virt_addr) { + dma_free_coherent(ena_dev->dmadev, host_attr->debug_area_size, + host_attr->debug_area_virt_addr, + host_attr->debug_area_dma_addr); + host_attr->debug_area_virt_addr = NULL; + } +} + +int ena_com_set_host_attributes(struct ena_com_dev *ena_dev) +{ + struct ena_host_attribute *host_attr = &ena_dev->host_attr; + struct ena_com_admin_queue *admin_queue; + struct ena_admin_set_feat_cmd cmd; + struct ena_admin_set_feat_resp resp; + + int ret; + + if (!ena_com_check_supported_feature_id(ena_dev, + ENA_ADMIN_HOST_ATTR_CONFIG)) { + pr_warn("Set host attribute isn't supported\n"); + return -EPERM; + } + + memset(&cmd, 0x0, sizeof(cmd)); + admin_queue = &ena_dev->admin_queue; + + cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE; + cmd.feat_common.feature_id = ENA_ADMIN_HOST_ATTR_CONFIG; + + ret = ena_com_mem_addr_set(ena_dev, + &cmd.u.host_attr.debug_ba, + host_attr->debug_area_dma_addr); + if (unlikely(ret)) { + pr_err("memory address set failed\n"); + return ret; + } + + ret = ena_com_mem_addr_set(ena_dev, + &cmd.u.host_attr.os_info_ba, + host_attr->host_info_dma_addr); + if (unlikely(ret)) { + pr_err("memory address set failed\n"); + return ret; + } + + cmd.u.host_attr.debug_area_size = host_attr->debug_area_size; + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *)&cmd, + sizeof(cmd), + (struct ena_admin_acq_entry *)&resp, + sizeof(resp)); + + if (unlikely(ret)) + pr_err("Failed to set host attributes: %d\n", ret); + + return ret; +} + +/* Interrupt moderation */ +bool ena_com_interrupt_moderation_supported(struct ena_com_dev *ena_dev) +{ + return ena_com_check_supported_feature_id(ena_dev, + ENA_ADMIN_INTERRUPT_MODERATION); +} + +int ena_com_update_nonadaptive_moderation_interval_tx(struct ena_com_dev *ena_dev, + u32 tx_coalesce_usecs) +{ + if (!ena_dev->intr_delay_resolution) { + pr_err("Illegal interrupt delay granularity value\n"); + return -EFAULT; + } + + ena_dev->intr_moder_tx_interval = tx_coalesce_usecs / + ena_dev->intr_delay_resolution; + + return 0; +} + +int ena_com_update_nonadaptive_moderation_interval_rx(struct ena_com_dev *ena_dev, + u32 rx_coalesce_usecs) +{ + if (!ena_dev->intr_delay_resolution) { + pr_err("Illegal interrupt delay granularity value\n"); + return -EFAULT; + } + + /* We use LOWEST entry of moderation table for storing + * nonadaptive interrupt coalescing values + */ + ena_dev->intr_moder_tbl[ENA_INTR_MODER_LOWEST].intr_moder_interval = + rx_coalesce_usecs / ena_dev->intr_delay_resolution; + + return 0; +} + +void ena_com_destroy_interrupt_moderation(struct ena_com_dev *ena_dev) +{ + if (ena_dev->intr_moder_tbl) + devm_kfree(ena_dev->dmadev, ena_dev->intr_moder_tbl); + ena_dev->intr_moder_tbl = NULL; +} + +int ena_com_init_interrupt_moderation(struct ena_com_dev *ena_dev) +{ + struct ena_admin_get_feat_resp get_resp; + u16 delay_resolution; + int rc; + + rc = ena_com_get_feature(ena_dev, &get_resp, + ENA_ADMIN_INTERRUPT_MODERATION); + + if (rc) { + if (rc == -EPERM) { + pr_info("Feature %d isn't supported\n", + ENA_ADMIN_INTERRUPT_MODERATION); + rc = 0; + } else { + pr_err("Failed to get interrupt moderation admin cmd. rc: %d\n", + rc); + } + + /* no moderation supported, disable adaptive support */ + ena_com_disable_adaptive_moderation(ena_dev); + return rc; + } + + rc = ena_com_init_interrupt_moderation_table(ena_dev); + if (rc) + goto err; + + /* if moderation is supported by device we set adaptive moderation */ + delay_resolution = get_resp.u.intr_moderation.intr_delay_resolution; + ena_com_update_intr_delay_resolution(ena_dev, delay_resolution); + ena_com_enable_adaptive_moderation(ena_dev); + + return 0; +err: + ena_com_destroy_interrupt_moderation(ena_dev); + return rc; +} + +void ena_com_config_default_interrupt_moderation_table(struct ena_com_dev *ena_dev) +{ + struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl; + + if (!intr_moder_tbl) + return; + + intr_moder_tbl[ENA_INTR_MODER_LOWEST].intr_moder_interval = + ENA_INTR_LOWEST_USECS; + intr_moder_tbl[ENA_INTR_MODER_LOWEST].pkts_per_interval = + ENA_INTR_LOWEST_PKTS; + intr_moder_tbl[ENA_INTR_MODER_LOWEST].bytes_per_interval = + ENA_INTR_LOWEST_BYTES; + + intr_moder_tbl[ENA_INTR_MODER_LOW].intr_moder_interval = + ENA_INTR_LOW_USECS; + intr_moder_tbl[ENA_INTR_MODER_LOW].pkts_per_interval = + ENA_INTR_LOW_PKTS; + intr_moder_tbl[ENA_INTR_MODER_LOW].bytes_per_interval = + ENA_INTR_LOW_BYTES; + + intr_moder_tbl[ENA_INTR_MODER_MID].intr_moder_interval = + ENA_INTR_MID_USECS; + intr_moder_tbl[ENA_INTR_MODER_MID].pkts_per_interval = + ENA_INTR_MID_PKTS; + intr_moder_tbl[ENA_INTR_MODER_MID].bytes_per_interval = + ENA_INTR_MID_BYTES; + + intr_moder_tbl[ENA_INTR_MODER_HIGH].intr_moder_interval = + ENA_INTR_HIGH_USECS; + intr_moder_tbl[ENA_INTR_MODER_HIGH].pkts_per_interval = + ENA_INTR_HIGH_PKTS; + intr_moder_tbl[ENA_INTR_MODER_HIGH].bytes_per_interval = + ENA_INTR_HIGH_BYTES; + + intr_moder_tbl[ENA_INTR_MODER_HIGHEST].intr_moder_interval = + ENA_INTR_HIGHEST_USECS; + intr_moder_tbl[ENA_INTR_MODER_HIGHEST].pkts_per_interval = + ENA_INTR_HIGHEST_PKTS; + intr_moder_tbl[ENA_INTR_MODER_HIGHEST].bytes_per_interval = + ENA_INTR_HIGHEST_BYTES; +} + +unsigned int ena_com_get_nonadaptive_moderation_interval_tx(struct ena_com_dev *ena_dev) +{ + return ena_dev->intr_moder_tx_interval; +} + +unsigned int ena_com_get_nonadaptive_moderation_interval_rx(struct ena_com_dev *ena_dev) +{ + struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl; + + if (intr_moder_tbl) + return intr_moder_tbl[ENA_INTR_MODER_LOWEST].intr_moder_interval; + + return 0; +} + +void ena_com_init_intr_moderation_entry(struct ena_com_dev *ena_dev, + enum ena_intr_moder_level level, + struct ena_intr_moder_entry *entry) +{ + struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl; + + if (level >= ENA_INTR_MAX_NUM_OF_LEVELS) + return; + + intr_moder_tbl[level].intr_moder_interval = entry->intr_moder_interval; + if (ena_dev->intr_delay_resolution) + intr_moder_tbl[level].intr_moder_interval /= + ena_dev->intr_delay_resolution; + intr_moder_tbl[level].pkts_per_interval = entry->pkts_per_interval; + + /* use hardcoded value until ethtool supports bytecount parameter */ + if (entry->bytes_per_interval != ENA_INTR_BYTE_COUNT_NOT_SUPPORTED) + intr_moder_tbl[level].bytes_per_interval = entry->bytes_per_interval; +} + +void ena_com_get_intr_moderation_entry(struct ena_com_dev *ena_dev, + enum ena_intr_moder_level level, + struct ena_intr_moder_entry *entry) +{ + struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl; + + if (level >= ENA_INTR_MAX_NUM_OF_LEVELS) + return; + + entry->intr_moder_interval = intr_moder_tbl[level].intr_moder_interval; + if (ena_dev->intr_delay_resolution) + entry->intr_moder_interval *= ena_dev->intr_delay_resolution; + entry->pkts_per_interval = + intr_moder_tbl[level].pkts_per_interval; + entry->bytes_per_interval = intr_moder_tbl[level].bytes_per_interval; +} diff --git a/drivers/net/ethernet/amazon/ena/ena_com.h b/drivers/net/ethernet/amazon/ena/ena_com.h new file mode 100644 index 000000000000..509d7b8e15ab --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_com.h @@ -0,0 +1,1038 @@ +/* + * Copyright 2015 Amazon.com, Inc. or its affiliates. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ENA_COM +#define ENA_COM + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ena_common_defs.h" +#include "ena_admin_defs.h" +#include "ena_eth_io_defs.h" +#include "ena_regs_defs.h" + +#undef pr_fmt +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#define ENA_MAX_NUM_IO_QUEUES 128U +/* We need to queues for each IO (on for Tx and one for Rx) */ +#define ENA_TOTAL_NUM_QUEUES (2 * (ENA_MAX_NUM_IO_QUEUES)) + +#define ENA_MAX_HANDLERS 256 + +#define ENA_MAX_PHYS_ADDR_SIZE_BITS 48 + +/* Unit in usec */ +#define ENA_REG_READ_TIMEOUT 200000 + +#define ADMIN_SQ_SIZE(depth) ((depth) * sizeof(struct ena_admin_aq_entry)) +#define ADMIN_CQ_SIZE(depth) ((depth) * sizeof(struct ena_admin_acq_entry)) +#define ADMIN_AENQ_SIZE(depth) ((depth) * sizeof(struct ena_admin_aenq_entry)) + +/*****************************************************************************/ +/*****************************************************************************/ +/* ENA adaptive interrupt moderation settings */ + +#define ENA_INTR_LOWEST_USECS (0) +#define ENA_INTR_LOWEST_PKTS (3) +#define ENA_INTR_LOWEST_BYTES (2 * 1524) + +#define ENA_INTR_LOW_USECS (32) +#define ENA_INTR_LOW_PKTS (12) +#define ENA_INTR_LOW_BYTES (16 * 1024) + +#define ENA_INTR_MID_USECS (80) +#define ENA_INTR_MID_PKTS (48) +#define ENA_INTR_MID_BYTES (64 * 1024) + +#define ENA_INTR_HIGH_USECS (128) +#define ENA_INTR_HIGH_PKTS (96) +#define ENA_INTR_HIGH_BYTES (128 * 1024) + +#define ENA_INTR_HIGHEST_USECS (192) +#define ENA_INTR_HIGHEST_PKTS (128) +#define ENA_INTR_HIGHEST_BYTES (192 * 1024) + +#define ENA_INTR_INITIAL_TX_INTERVAL_USECS 196 +#define ENA_INTR_INITIAL_RX_INTERVAL_USECS 4 +#define ENA_INTR_DELAY_OLD_VALUE_WEIGHT 6 +#define ENA_INTR_DELAY_NEW_VALUE_WEIGHT 4 +#define ENA_INTR_MODER_LEVEL_STRIDE 2 +#define ENA_INTR_BYTE_COUNT_NOT_SUPPORTED 0xFFFFFF + +enum ena_intr_moder_level { + ENA_INTR_MODER_LOWEST = 0, + ENA_INTR_MODER_LOW, + ENA_INTR_MODER_MID, + ENA_INTR_MODER_HIGH, + ENA_INTR_MODER_HIGHEST, + ENA_INTR_MAX_NUM_OF_LEVELS, +}; + +struct ena_intr_moder_entry { + unsigned int intr_moder_interval; + unsigned int pkts_per_interval; + unsigned int bytes_per_interval; +}; + +enum queue_direction { + ENA_COM_IO_QUEUE_DIRECTION_TX, + ENA_COM_IO_QUEUE_DIRECTION_RX +}; + +struct ena_com_buf { + dma_addr_t paddr; /**< Buffer physical address */ + u16 len; /**< Buffer length in bytes */ +}; + +struct ena_com_rx_buf_info { + u16 len; + u16 req_id; +}; + +struct ena_com_io_desc_addr { + u8 __iomem *pbuf_dev_addr; /* LLQ address */ + u8 *virt_addr; + dma_addr_t phys_addr; +}; + +struct ena_com_tx_meta { + u16 mss; + u16 l3_hdr_len; + u16 l3_hdr_offset; + u16 l4_hdr_len; /* In words */ +}; + +struct ena_com_io_cq { + struct ena_com_io_desc_addr cdesc_addr; + + /* Interrupt unmask register */ + u32 __iomem *unmask_reg; + + /* The completion queue head doorbell register */ + u32 __iomem *cq_head_db_reg; + + /* numa configuration register (for TPH) */ + u32 __iomem *numa_node_cfg_reg; + + /* The value to write to the above register to unmask + * the interrupt of this queue + */ + u32 msix_vector; + + enum queue_direction direction; + + /* holds the number of cdesc of the current packet */ + u16 cur_rx_pkt_cdesc_count; + /* save the firt cdesc idx of the current packet */ + u16 cur_rx_pkt_cdesc_start_idx; + + u16 q_depth; + /* Caller qid */ + u16 qid; + + /* Device queue index */ + u16 idx; + u16 head; + u16 last_head_update; + u8 phase; + u8 cdesc_entry_size_in_bytes; + +} ____cacheline_aligned; + +struct ena_com_io_sq { + struct ena_com_io_desc_addr desc_addr; + + u32 __iomem *db_addr; + u8 __iomem *header_addr; + + enum queue_direction direction; + enum ena_admin_placement_policy_type mem_queue_type; + + u32 msix_vector; + struct ena_com_tx_meta cached_tx_meta; + + u16 q_depth; + u16 qid; + + u16 idx; + u16 tail; + u16 next_to_comp; + u32 tx_max_header_size; + u8 phase; + u8 desc_entry_size; + u8 dma_addr_bits; +} ____cacheline_aligned; + +struct ena_com_admin_cq { + struct ena_admin_acq_entry *entries; + dma_addr_t dma_addr; + + u16 head; + u8 phase; +}; + +struct ena_com_admin_sq { + struct ena_admin_aq_entry *entries; + dma_addr_t dma_addr; + + u32 __iomem *db_addr; + + u16 head; + u16 tail; + u8 phase; + +}; + +struct ena_com_stats_admin { + u32 aborted_cmd; + u32 submitted_cmd; + u32 completed_cmd; + u32 out_of_space; + u32 no_completion; +}; + +struct ena_com_admin_queue { + void *q_dmadev; + spinlock_t q_lock; /* spinlock for the admin queue */ + struct ena_comp_ctx *comp_ctx; + u16 q_depth; + struct ena_com_admin_cq cq; + struct ena_com_admin_sq sq; + + /* Indicate if the admin queue should poll for completion */ + bool polling; + + u16 curr_cmd_id; + + /* Indicate that the ena was initialized and can + * process new admin commands + */ + bool running_state; + + /* Count the number of outstanding admin commands */ + atomic_t outstanding_cmds; + + struct ena_com_stats_admin stats; +}; + +struct ena_aenq_handlers; + +struct ena_com_aenq { + u16 head; + u8 phase; + struct ena_admin_aenq_entry *entries; + dma_addr_t dma_addr; + u16 q_depth; + struct ena_aenq_handlers *aenq_handlers; +}; + +struct ena_com_mmio_read { + struct ena_admin_ena_mmio_req_read_less_resp *read_resp; + dma_addr_t read_resp_dma_addr; + u16 seq_num; + bool readless_supported; + /* spin lock to ensure a single outstanding read */ + spinlock_t lock; +}; + +struct ena_rss { + /* Indirect table */ + u16 *host_rss_ind_tbl; + struct ena_admin_rss_ind_table_entry *rss_ind_tbl; + dma_addr_t rss_ind_tbl_dma_addr; + u16 tbl_log_size; + + /* Hash key */ + enum ena_admin_hash_functions hash_func; + struct ena_admin_feature_rss_flow_hash_control *hash_key; + dma_addr_t hash_key_dma_addr; + u32 hash_init_val; + + /* Flow Control */ + struct ena_admin_feature_rss_hash_control *hash_ctrl; + dma_addr_t hash_ctrl_dma_addr; + +}; + +struct ena_host_attribute { + /* Debug area */ + u8 *debug_area_virt_addr; + dma_addr_t debug_area_dma_addr; + u32 debug_area_size; + + /* Host information */ + struct ena_admin_host_info *host_info; + dma_addr_t host_info_dma_addr; +}; + +/* Each ena_dev is a PCI function. */ +struct ena_com_dev { + struct ena_com_admin_queue admin_queue; + struct ena_com_aenq aenq; + struct ena_com_io_cq io_cq_queues[ENA_TOTAL_NUM_QUEUES]; + struct ena_com_io_sq io_sq_queues[ENA_TOTAL_NUM_QUEUES]; + u8 __iomem *reg_bar; + void __iomem *mem_bar; + void *dmadev; + + enum ena_admin_placement_policy_type tx_mem_queue_type; + u32 tx_max_header_size; + u16 stats_func; /* Selected function for extended statistic dump */ + u16 stats_queue; /* Selected queue for extended statistic dump */ + + struct ena_com_mmio_read mmio_read; + + struct ena_rss rss; + u32 supported_features; + u32 dma_addr_bits; + + struct ena_host_attribute host_attr; + bool adaptive_coalescing; + u16 intr_delay_resolution; + u32 intr_moder_tx_interval; + struct ena_intr_moder_entry *intr_moder_tbl; +}; + +struct ena_com_dev_get_features_ctx { + struct ena_admin_queue_feature_desc max_queues; + struct ena_admin_device_attr_feature_desc dev_attr; + struct ena_admin_feature_aenq_desc aenq; + struct ena_admin_feature_offload_desc offload; +}; + +struct ena_com_create_io_ctx { + enum ena_admin_placement_policy_type mem_queue_type; + enum queue_direction direction; + int numa_node; + u32 msix_vector; + u16 queue_size; + u16 qid; +}; + +typedef void (*ena_aenq_handler)(void *data, + struct ena_admin_aenq_entry *aenq_e); + +/* Holds aenq handlers. Indexed by AENQ event group */ +struct ena_aenq_handlers { + ena_aenq_handler handlers[ENA_MAX_HANDLERS]; + ena_aenq_handler unimplemented_handler; +}; + +/*****************************************************************************/ +/*****************************************************************************/ + +/* ena_com_mmio_reg_read_request_init - Init the mmio reg read mechanism + * @ena_dev: ENA communication layer struct + * + * Initialize the register read mechanism. + * + * @note: This method must be the first stage in the initialization sequence. + * + * @return - 0 on success, negative value on failure. + */ +int ena_com_mmio_reg_read_request_init(struct ena_com_dev *ena_dev); + +/* ena_com_set_mmio_read_mode - Enable/disable the mmio reg read mechanism + * @ena_dev: ENA communication layer struct + * @readless_supported: readless mode (enable/disable) + */ +void ena_com_set_mmio_read_mode(struct ena_com_dev *ena_dev, + bool readless_supported); + +/* ena_com_mmio_reg_read_request_write_dev_addr - Write the mmio reg read return + * value physical address. + * @ena_dev: ENA communication layer struct + */ +void ena_com_mmio_reg_read_request_write_dev_addr(struct ena_com_dev *ena_dev); + +/* ena_com_mmio_reg_read_request_destroy - Destroy the mmio reg read mechanism + * @ena_dev: ENA communication layer struct + */ +void ena_com_mmio_reg_read_request_destroy(struct ena_com_dev *ena_dev); + +/* ena_com_admin_init - Init the admin and the async queues + * @ena_dev: ENA communication layer struct + * @aenq_handlers: Those handlers to be called upon event. + * @init_spinlock: Indicate if this method should init the admin spinlock or + * the spinlock was init before (for example, in a case of FLR). + * + * Initialize the admin submission and completion queues. + * Initialize the asynchronous events notification queues. + * + * @return - 0 on success, negative value on failure. + */ +int ena_com_admin_init(struct ena_com_dev *ena_dev, + struct ena_aenq_handlers *aenq_handlers, + bool init_spinlock); + +/* ena_com_admin_destroy - Destroy the admin and the async events queues. + * @ena_dev: ENA communication layer struct + * + * @note: Before calling this method, the caller must validate that the device + * won't send any additional admin completions/aenq. + * To achieve that, a FLR is recommended. + */ +void ena_com_admin_destroy(struct ena_com_dev *ena_dev); + +/* ena_com_dev_reset - Perform device FLR to the device. + * @ena_dev: ENA communication layer struct + * + * @return - 0 on success, negative value on failure. + */ +int ena_com_dev_reset(struct ena_com_dev *ena_dev); + +/* ena_com_create_io_queue - Create io queue. + * @ena_dev: ENA communication layer struct + * @ctx - create context structure + * + * Create the submission and the completion queues. + * + * @return - 0 on success, negative value on failure. + */ +int ena_com_create_io_queue(struct ena_com_dev *ena_dev, + struct ena_com_create_io_ctx *ctx); + +/* ena_com_destroy_io_queue - Destroy IO queue with the queue id - qid. + * @ena_dev: ENA communication layer struct + * @qid - the caller virtual queue id. + */ +void ena_com_destroy_io_queue(struct ena_com_dev *ena_dev, u16 qid); + +/* ena_com_get_io_handlers - Return the io queue handlers + * @ena_dev: ENA communication layer struct + * @qid - the caller virtual queue id. + * @io_sq - IO submission queue handler + * @io_cq - IO completion queue handler. + * + * @return - 0 on success, negative value on failure. + */ +int ena_com_get_io_handlers(struct ena_com_dev *ena_dev, u16 qid, + struct ena_com_io_sq **io_sq, + struct ena_com_io_cq **io_cq); + +/* ena_com_admin_aenq_enable - ENAble asynchronous event notifications + * @ena_dev: ENA communication layer struct + * + * After this method, aenq event can be received via AENQ. + */ +void ena_com_admin_aenq_enable(struct ena_com_dev *ena_dev); + +/* ena_com_set_admin_running_state - Set the state of the admin queue + * @ena_dev: ENA communication layer struct + * + * Change the state of the admin queue (enable/disable) + */ +void ena_com_set_admin_running_state(struct ena_com_dev *ena_dev, bool state); + +/* ena_com_get_admin_running_state - Get the admin queue state + * @ena_dev: ENA communication layer struct + * + * Retrieve the state of the admin queue (enable/disable) + * + * @return - current polling mode (enable/disable) + */ +bool ena_com_get_admin_running_state(struct ena_com_dev *ena_dev); + +/* ena_com_set_admin_polling_mode - Set the admin completion queue polling mode + * @ena_dev: ENA communication layer struct + * @polling: ENAble/Disable polling mode + * + * Set the admin completion mode. + */ +void ena_com_set_admin_polling_mode(struct ena_com_dev *ena_dev, bool polling); + +/* ena_com_set_admin_polling_mode - Get the admin completion queue polling mode + * @ena_dev: ENA communication layer struct + * + * Get the admin completion mode. + * If polling mode is on, ena_com_execute_admin_command will perform a + * polling on the admin completion queue for the commands completion, + * otherwise it will wait on wait event. + * + * @return state + */ +bool ena_com_get_ena_admin_polling_mode(struct ena_com_dev *ena_dev); + +/* ena_com_admin_q_comp_intr_handler - admin queue interrupt handler + * @ena_dev: ENA communication layer struct + * + * This method go over the admin completion queue and wake up all the pending + * threads that wait on the commands wait event. + * + * @note: Should be called after MSI-X interrupt. + */ +void ena_com_admin_q_comp_intr_handler(struct ena_com_dev *ena_dev); + +/* ena_com_aenq_intr_handler - AENQ interrupt handler + * @ena_dev: ENA communication layer struct + * + * This method go over the async event notification queue and call the proper + * aenq handler. + */ +void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data); + +/* ena_com_abort_admin_commands - Abort all the outstanding admin commands. + * @ena_dev: ENA communication layer struct + * + * This method aborts all the outstanding admin commands. + * The caller should then call ena_com_wait_for_abort_completion to make sure + * all the commands were completed. + */ +void ena_com_abort_admin_commands(struct ena_com_dev *ena_dev); + +/* ena_com_wait_for_abort_completion - Wait for admin commands abort. + * @ena_dev: ENA communication layer struct + * + * This method wait until all the outstanding admin commands will be completed. + */ +void ena_com_wait_for_abort_completion(struct ena_com_dev *ena_dev); + +/* ena_com_validate_version - Validate the device parameters + * @ena_dev: ENA communication layer struct + * + * This method validate the device parameters are the same as the saved + * parameters in ena_dev. + * This method is useful after device reset, to validate the device mac address + * and the device offloads are the same as before the reset. + * + * @return - 0 on success negative value otherwise. + */ +int ena_com_validate_version(struct ena_com_dev *ena_dev); + +/* ena_com_get_link_params - Retrieve physical link parameters. + * @ena_dev: ENA communication layer struct + * @resp: Link parameters + * + * Retrieve the physical link parameters, + * like speed, auto-negotiation and full duplex support. + * + * @return - 0 on Success negative value otherwise. + */ +int ena_com_get_link_params(struct ena_com_dev *ena_dev, + struct ena_admin_get_feat_resp *resp); + +/* ena_com_get_dma_width - Retrieve physical dma address width the device + * supports. + * @ena_dev: ENA communication layer struct + * + * Retrieve the maximum physical address bits the device can handle. + * + * @return: > 0 on Success and negative value otherwise. + */ +int ena_com_get_dma_width(struct ena_com_dev *ena_dev); + +/* ena_com_set_aenq_config - Set aenq groups configurations + * @ena_dev: ENA communication layer struct + * @groups flag: bit fields flags of enum ena_admin_aenq_group. + * + * Configure which aenq event group the driver would like to receive. + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_set_aenq_config(struct ena_com_dev *ena_dev, u32 groups_flag); + +/* ena_com_get_dev_attr_feat - Get device features + * @ena_dev: ENA communication layer struct + * @get_feat_ctx: returned context that contain the get features. + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev, + struct ena_com_dev_get_features_ctx *get_feat_ctx); + +/* ena_com_get_dev_basic_stats - Get device basic statistics + * @ena_dev: ENA communication layer struct + * @stats: stats return value + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_get_dev_basic_stats(struct ena_com_dev *ena_dev, + struct ena_admin_basic_stats *stats); + +/* ena_com_set_dev_mtu - Configure the device mtu. + * @ena_dev: ENA communication layer struct + * @mtu: mtu value + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_set_dev_mtu(struct ena_com_dev *ena_dev, int mtu); + +/* ena_com_get_offload_settings - Retrieve the device offloads capabilities + * @ena_dev: ENA communication layer struct + * @offlad: offload return value + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_get_offload_settings(struct ena_com_dev *ena_dev, + struct ena_admin_feature_offload_desc *offload); + +/* ena_com_rss_init - Init RSS + * @ena_dev: ENA communication layer struct + * @log_size: indirection log size + * + * Allocate RSS/RFS resources. + * The caller then can configure rss using ena_com_set_hash_function, + * ena_com_set_hash_ctrl and ena_com_indirect_table_set. + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_rss_init(struct ena_com_dev *ena_dev, u16 log_size); + +/* ena_com_rss_destroy - Destroy rss + * @ena_dev: ENA communication layer struct + * + * Free all the RSS/RFS resources. + */ +void ena_com_rss_destroy(struct ena_com_dev *ena_dev); + +/* ena_com_fill_hash_function - Fill RSS hash function + * @ena_dev: ENA communication layer struct + * @func: The hash function (Toeplitz or crc) + * @key: Hash key (for toeplitz hash) + * @key_len: key length (max length 10 DW) + * @init_val: initial value for the hash function + * + * Fill the ena_dev resources with the desire hash function, hash key, key_len + * and key initial value (if needed by the hash function). + * To flush the key into the device the caller should call + * ena_com_set_hash_function. + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_fill_hash_function(struct ena_com_dev *ena_dev, + enum ena_admin_hash_functions func, + const u8 *key, u16 key_len, u32 init_val); + +/* ena_com_set_hash_function - Flush the hash function and it dependencies to + * the device. + * @ena_dev: ENA communication layer struct + * + * Flush the hash function and it dependencies (key, key length and + * initial value) if needed. + * + * @note: Prior to this method the caller should call ena_com_fill_hash_function + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_set_hash_function(struct ena_com_dev *ena_dev); + +/* ena_com_get_hash_function - Retrieve the hash function and the hash key + * from the device. + * @ena_dev: ENA communication layer struct + * @func: hash function + * @key: hash key + * + * Retrieve the hash function and the hash key from the device. + * + * @note: If the caller called ena_com_fill_hash_function but didn't flash + * it to the device, the new configuration will be lost. + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_get_hash_function(struct ena_com_dev *ena_dev, + enum ena_admin_hash_functions *func, + u8 *key); + +/* ena_com_fill_hash_ctrl - Fill RSS hash control + * @ena_dev: ENA communication layer struct. + * @proto: The protocol to configure. + * @hash_fields: bit mask of ena_admin_flow_hash_fields + * + * Fill the ena_dev resources with the desire hash control (the ethernet + * fields that take part of the hash) for a specific protocol. + * To flush the hash control to the device, the caller should call + * ena_com_set_hash_ctrl. + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_fill_hash_ctrl(struct ena_com_dev *ena_dev, + enum ena_admin_flow_hash_proto proto, + u16 hash_fields); + +/* ena_com_set_hash_ctrl - Flush the hash control resources to the device. + * @ena_dev: ENA communication layer struct + * + * Flush the hash control (the ethernet fields that take part of the hash) + * + * @note: Prior to this method the caller should call ena_com_fill_hash_ctrl. + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_set_hash_ctrl(struct ena_com_dev *ena_dev); + +/* ena_com_get_hash_ctrl - Retrieve the hash control from the device. + * @ena_dev: ENA communication layer struct + * @proto: The protocol to retrieve. + * @fields: bit mask of ena_admin_flow_hash_fields. + * + * Retrieve the hash control from the device. + * + * @note, If the caller called ena_com_fill_hash_ctrl but didn't flash + * it to the device, the new configuration will be lost. + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_get_hash_ctrl(struct ena_com_dev *ena_dev, + enum ena_admin_flow_hash_proto proto, + u16 *fields); + +/* ena_com_set_default_hash_ctrl - Set the hash control to a default + * configuration. + * @ena_dev: ENA communication layer struct + * + * Fill the ena_dev resources with the default hash control configuration. + * To flush the hash control to the device, the caller should call + * ena_com_set_hash_ctrl. + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_set_default_hash_ctrl(struct ena_com_dev *ena_dev); + +/* ena_com_indirect_table_fill_entry - Fill a single entry in the RSS + * indirection table + * @ena_dev: ENA communication layer struct. + * @entry_idx - indirection table entry. + * @entry_value - redirection value + * + * Fill a single entry of the RSS indirection table in the ena_dev resources. + * To flush the indirection table to the device, the called should call + * ena_com_indirect_table_set. + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_indirect_table_fill_entry(struct ena_com_dev *ena_dev, + u16 entry_idx, u16 entry_value); + +/* ena_com_indirect_table_set - Flush the indirection table to the device. + * @ena_dev: ENA communication layer struct + * + * Flush the indirection hash control to the device. + * Prior to this method the caller should call ena_com_indirect_table_fill_entry + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_indirect_table_set(struct ena_com_dev *ena_dev); + +/* ena_com_indirect_table_get - Retrieve the indirection table from the device. + * @ena_dev: ENA communication layer struct + * @ind_tbl: indirection table + * + * Retrieve the RSS indirection table from the device. + * + * @note: If the caller called ena_com_indirect_table_fill_entry but didn't flash + * it to the device, the new configuration will be lost. + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_indirect_table_get(struct ena_com_dev *ena_dev, u32 *ind_tbl); + +/* ena_com_allocate_host_info - Allocate host info resources. + * @ena_dev: ENA communication layer struct + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_allocate_host_info(struct ena_com_dev *ena_dev); + +/* ena_com_allocate_debug_area - Allocate debug area. + * @ena_dev: ENA communication layer struct + * @debug_area_size - debug area size. + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_allocate_debug_area(struct ena_com_dev *ena_dev, + u32 debug_area_size); + +/* ena_com_delete_debug_area - Free the debug area resources. + * @ena_dev: ENA communication layer struct + * + * Free the allocate debug area. + */ +void ena_com_delete_debug_area(struct ena_com_dev *ena_dev); + +/* ena_com_delete_host_info - Free the host info resources. + * @ena_dev: ENA communication layer struct + * + * Free the allocate host info. + */ +void ena_com_delete_host_info(struct ena_com_dev *ena_dev); + +/* ena_com_set_host_attributes - Update the device with the host + * attributes (debug area and host info) base address. + * @ena_dev: ENA communication layer struct + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_set_host_attributes(struct ena_com_dev *ena_dev); + +/* ena_com_create_io_cq - Create io completion queue. + * @ena_dev: ENA communication layer struct + * @io_cq - io completion queue handler + + * Create IO completion queue. + * + * @return - 0 on success, negative value on failure. + */ +int ena_com_create_io_cq(struct ena_com_dev *ena_dev, + struct ena_com_io_cq *io_cq); + +/* ena_com_destroy_io_cq - Destroy io completion queue. + * @ena_dev: ENA communication layer struct + * @io_cq - io completion queue handler + + * Destroy IO completion queue. + * + * @return - 0 on success, negative value on failure. + */ +int ena_com_destroy_io_cq(struct ena_com_dev *ena_dev, + struct ena_com_io_cq *io_cq); + +/* ena_com_execute_admin_command - Execute admin command + * @admin_queue: admin queue. + * @cmd: the admin command to execute. + * @cmd_size: the command size. + * @cmd_completion: command completion return value. + * @cmd_comp_size: command completion size. + + * Submit an admin command and then wait until the device will return a + * completion. + * The completion will be copyed into cmd_comp. + * + * @return - 0 on success, negative value on failure. + */ +int ena_com_execute_admin_command(struct ena_com_admin_queue *admin_queue, + struct ena_admin_aq_entry *cmd, + size_t cmd_size, + struct ena_admin_acq_entry *cmd_comp, + size_t cmd_comp_size); + +/* ena_com_init_interrupt_moderation - Init interrupt moderation + * @ena_dev: ENA communication layer struct + * + * @return - 0 on success, negative value on failure. + */ +int ena_com_init_interrupt_moderation(struct ena_com_dev *ena_dev); + +/* ena_com_destroy_interrupt_moderation - Destroy interrupt moderation resources + * @ena_dev: ENA communication layer struct + */ +void ena_com_destroy_interrupt_moderation(struct ena_com_dev *ena_dev); + +/* ena_com_interrupt_moderation_supported - Return if interrupt moderation + * capability is supported by the device. + * + * @return - supported or not. + */ +bool ena_com_interrupt_moderation_supported(struct ena_com_dev *ena_dev); + +/* ena_com_config_default_interrupt_moderation_table - Restore the interrupt + * moderation table back to the default parameters. + * @ena_dev: ENA communication layer struct + */ +void ena_com_config_default_interrupt_moderation_table(struct ena_com_dev *ena_dev); + +/* ena_com_update_nonadaptive_moderation_interval_tx - Update the + * non-adaptive interval in Tx direction. + * @ena_dev: ENA communication layer struct + * @tx_coalesce_usecs: Interval in usec. + * + * @return - 0 on success, negative value on failure. + */ +int ena_com_update_nonadaptive_moderation_interval_tx(struct ena_com_dev *ena_dev, + u32 tx_coalesce_usecs); + +/* ena_com_update_nonadaptive_moderation_interval_rx - Update the + * non-adaptive interval in Rx direction. + * @ena_dev: ENA communication layer struct + * @rx_coalesce_usecs: Interval in usec. + * + * @return - 0 on success, negative value on failure. + */ +int ena_com_update_nonadaptive_moderation_interval_rx(struct ena_com_dev *ena_dev, + u32 rx_coalesce_usecs); + +/* ena_com_get_nonadaptive_moderation_interval_tx - Retrieve the + * non-adaptive interval in Tx direction. + * @ena_dev: ENA communication layer struct + * + * @return - interval in usec + */ +unsigned int ena_com_get_nonadaptive_moderation_interval_tx(struct ena_com_dev *ena_dev); + +/* ena_com_get_nonadaptive_moderation_interval_rx - Retrieve the + * non-adaptive interval in Rx direction. + * @ena_dev: ENA communication layer struct + * + * @return - interval in usec + */ +unsigned int ena_com_get_nonadaptive_moderation_interval_rx(struct ena_com_dev *ena_dev); + +/* ena_com_init_intr_moderation_entry - Update a single entry in the interrupt + * moderation table. + * @ena_dev: ENA communication layer struct + * @level: Interrupt moderation table level + * @entry: Entry value + * + * Update a single entry in the interrupt moderation table. + */ +void ena_com_init_intr_moderation_entry(struct ena_com_dev *ena_dev, + enum ena_intr_moder_level level, + struct ena_intr_moder_entry *entry); + +/* ena_com_get_intr_moderation_entry - Init ena_intr_moder_entry. + * @ena_dev: ENA communication layer struct + * @level: Interrupt moderation table level + * @entry: Entry to fill. + * + * Initialize the entry according to the adaptive interrupt moderation table. + */ +void ena_com_get_intr_moderation_entry(struct ena_com_dev *ena_dev, + enum ena_intr_moder_level level, + struct ena_intr_moder_entry *entry); + +static inline bool ena_com_get_adaptive_moderation_enabled(struct ena_com_dev *ena_dev) +{ + return ena_dev->adaptive_coalescing; +} + +static inline void ena_com_enable_adaptive_moderation(struct ena_com_dev *ena_dev) +{ + ena_dev->adaptive_coalescing = true; +} + +static inline void ena_com_disable_adaptive_moderation(struct ena_com_dev *ena_dev) +{ + ena_dev->adaptive_coalescing = false; +} + +/* ena_com_calculate_interrupt_delay - Calculate new interrupt delay + * @ena_dev: ENA communication layer struct + * @pkts: Number of packets since the last update + * @bytes: Number of bytes received since the last update. + * @smoothed_interval: Returned interval + * @moder_tbl_idx: Current table level as input update new level as return + * value. + */ +static inline void ena_com_calculate_interrupt_delay(struct ena_com_dev *ena_dev, + unsigned int pkts, + unsigned int bytes, + unsigned int *smoothed_interval, + unsigned int *moder_tbl_idx) +{ + enum ena_intr_moder_level curr_moder_idx, new_moder_idx; + struct ena_intr_moder_entry *curr_moder_entry; + struct ena_intr_moder_entry *pred_moder_entry; + struct ena_intr_moder_entry *new_moder_entry; + struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl; + unsigned int interval; + + /* We apply adaptive moderation on Rx path only. + * Tx uses static interrupt moderation. + */ + if (!pkts || !bytes) + /* Tx interrupt, or spurious interrupt, + * in both cases we just use same delay values + */ + return; + + curr_moder_idx = (enum ena_intr_moder_level)(*moder_tbl_idx); + if (unlikely(curr_moder_idx >= ENA_INTR_MAX_NUM_OF_LEVELS)) { + pr_err("Wrong moderation index %u\n", curr_moder_idx); + return; + } + + curr_moder_entry = &intr_moder_tbl[curr_moder_idx]; + new_moder_idx = curr_moder_idx; + + if (curr_moder_idx == ENA_INTR_MODER_LOWEST) { + if ((pkts > curr_moder_entry->pkts_per_interval) || + (bytes > curr_moder_entry->bytes_per_interval)) + new_moder_idx = + (enum ena_intr_moder_level)(curr_moder_idx + ENA_INTR_MODER_LEVEL_STRIDE); + } else { + pred_moder_entry = &intr_moder_tbl[curr_moder_idx - ENA_INTR_MODER_LEVEL_STRIDE]; + + if ((pkts <= pred_moder_entry->pkts_per_interval) || + (bytes <= pred_moder_entry->bytes_per_interval)) + new_moder_idx = + (enum ena_intr_moder_level)(curr_moder_idx - ENA_INTR_MODER_LEVEL_STRIDE); + else if ((pkts > curr_moder_entry->pkts_per_interval) || + (bytes > curr_moder_entry->bytes_per_interval)) { + if (curr_moder_idx != ENA_INTR_MODER_HIGHEST) + new_moder_idx = + (enum ena_intr_moder_level)(curr_moder_idx + ENA_INTR_MODER_LEVEL_STRIDE); + } + } + new_moder_entry = &intr_moder_tbl[new_moder_idx]; + + interval = new_moder_entry->intr_moder_interval; + *smoothed_interval = ( + (interval * ENA_INTR_DELAY_NEW_VALUE_WEIGHT + + ENA_INTR_DELAY_OLD_VALUE_WEIGHT * (*smoothed_interval)) + 5) / + 10; + + *moder_tbl_idx = new_moder_idx; +} + +/* ena_com_update_intr_reg - Prepare interrupt register + * @intr_reg: interrupt register to update. + * @rx_delay_interval: Rx interval in usecs + * @tx_delay_interval: Tx interval in usecs + * @unmask: unask enable/disable + * + * Prepare interrupt update register with the supplied parameters. + */ +static inline void ena_com_update_intr_reg(struct ena_eth_io_intr_reg *intr_reg, + u32 rx_delay_interval, + u32 tx_delay_interval, + bool unmask) +{ + intr_reg->intr_control = 0; + intr_reg->intr_control |= rx_delay_interval & + ENA_ETH_IO_INTR_REG_RX_INTR_DELAY_MASK; + + intr_reg->intr_control |= + (tx_delay_interval << ENA_ETH_IO_INTR_REG_TX_INTR_DELAY_SHIFT) + & ENA_ETH_IO_INTR_REG_TX_INTR_DELAY_MASK; + + if (unmask) + intr_reg->intr_control |= ENA_ETH_IO_INTR_REG_INTR_UNMASK_MASK; +} + +#endif /* !(ENA_COM) */ diff --git a/drivers/net/ethernet/amazon/ena/ena_common_defs.h b/drivers/net/ethernet/amazon/ena/ena_common_defs.h new file mode 100644 index 000000000000..bb8d73676eab --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_common_defs.h @@ -0,0 +1,48 @@ +/* + * Copyright 2015 - 2016 Amazon.com, Inc. or its affiliates. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _ENA_COMMON_H_ +#define _ENA_COMMON_H_ + +#define ENA_COMMON_SPEC_VERSION_MAJOR 0 /* */ +#define ENA_COMMON_SPEC_VERSION_MINOR 10 /* */ + +/* ENA operates with 48-bit memory addresses. ena_mem_addr_t */ +struct ena_common_mem_addr { + u32 mem_addr_low; + + u16 mem_addr_high; + + /* MBZ */ + u16 reserved16; +}; + +#endif /*_ENA_COMMON_H_ */ diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.c b/drivers/net/ethernet/amazon/ena/ena_eth_com.c new file mode 100644 index 000000000000..539c536464a5 --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.c @@ -0,0 +1,501 @@ +/* + * Copyright 2015 Amazon.com, Inc. or its affiliates. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ena_eth_com.h" + +static inline struct ena_eth_io_rx_cdesc_base *ena_com_get_next_rx_cdesc( + struct ena_com_io_cq *io_cq) +{ + struct ena_eth_io_rx_cdesc_base *cdesc; + u16 expected_phase, head_masked; + u16 desc_phase; + + head_masked = io_cq->head & (io_cq->q_depth - 1); + expected_phase = io_cq->phase; + + cdesc = (struct ena_eth_io_rx_cdesc_base *)(io_cq->cdesc_addr.virt_addr + + (head_masked * io_cq->cdesc_entry_size_in_bytes)); + + desc_phase = (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_PHASE_MASK) >> + ENA_ETH_IO_RX_CDESC_BASE_PHASE_SHIFT; + + if (desc_phase != expected_phase) + return NULL; + + return cdesc; +} + +static inline void ena_com_cq_inc_head(struct ena_com_io_cq *io_cq) +{ + io_cq->head++; + + /* Switch phase bit in case of wrap around */ + if (unlikely((io_cq->head & (io_cq->q_depth - 1)) == 0)) + io_cq->phase ^= 1; +} + +static inline void *get_sq_desc(struct ena_com_io_sq *io_sq) +{ + u16 tail_masked; + u32 offset; + + tail_masked = io_sq->tail & (io_sq->q_depth - 1); + + offset = tail_masked * io_sq->desc_entry_size; + + return (void *)((uintptr_t)io_sq->desc_addr.virt_addr + offset); +} + +static inline void ena_com_copy_curr_sq_desc_to_dev(struct ena_com_io_sq *io_sq) +{ + u16 tail_masked = io_sq->tail & (io_sq->q_depth - 1); + u32 offset = tail_masked * io_sq->desc_entry_size; + + /* In case this queue isn't a LLQ */ + if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) + return; + + memcpy_toio(io_sq->desc_addr.pbuf_dev_addr + offset, + io_sq->desc_addr.virt_addr + offset, + io_sq->desc_entry_size); +} + +static inline void ena_com_sq_update_tail(struct ena_com_io_sq *io_sq) +{ + io_sq->tail++; + + /* Switch phase bit in case of wrap around */ + if (unlikely((io_sq->tail & (io_sq->q_depth - 1)) == 0)) + io_sq->phase ^= 1; +} + +static inline int ena_com_write_header(struct ena_com_io_sq *io_sq, + u8 *head_src, u16 header_len) +{ + u16 tail_masked = io_sq->tail & (io_sq->q_depth - 1); + u8 __iomem *dev_head_addr = + io_sq->header_addr + (tail_masked * io_sq->tx_max_header_size); + + if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) + return 0; + + if (unlikely(!io_sq->header_addr)) { + pr_err("Push buffer header ptr is NULL\n"); + return -EINVAL; + } + + memcpy_toio(dev_head_addr, head_src, header_len); + + return 0; +} + +static inline struct ena_eth_io_rx_cdesc_base * + ena_com_rx_cdesc_idx_to_ptr(struct ena_com_io_cq *io_cq, u16 idx) +{ + idx &= (io_cq->q_depth - 1); + return (struct ena_eth_io_rx_cdesc_base *) + ((uintptr_t)io_cq->cdesc_addr.virt_addr + + idx * io_cq->cdesc_entry_size_in_bytes); +} + +static inline u16 ena_com_cdesc_rx_pkt_get(struct ena_com_io_cq *io_cq, + u16 *first_cdesc_idx) +{ + struct ena_eth_io_rx_cdesc_base *cdesc; + u16 count = 0, head_masked; + u32 last = 0; + + do { + cdesc = ena_com_get_next_rx_cdesc(io_cq); + if (!cdesc) + break; + + ena_com_cq_inc_head(io_cq); + count++; + last = (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_LAST_MASK) >> + ENA_ETH_IO_RX_CDESC_BASE_LAST_SHIFT; + } while (!last); + + if (last) { + *first_cdesc_idx = io_cq->cur_rx_pkt_cdesc_start_idx; + count += io_cq->cur_rx_pkt_cdesc_count; + + head_masked = io_cq->head & (io_cq->q_depth - 1); + + io_cq->cur_rx_pkt_cdesc_count = 0; + io_cq->cur_rx_pkt_cdesc_start_idx = head_masked; + + pr_debug("ena q_id: %d packets were completed. first desc idx %u descs# %d\n", + io_cq->qid, *first_cdesc_idx, count); + } else { + io_cq->cur_rx_pkt_cdesc_count += count; + count = 0; + } + + return count; +} + +static inline bool ena_com_meta_desc_changed(struct ena_com_io_sq *io_sq, + struct ena_com_tx_ctx *ena_tx_ctx) +{ + int rc; + + if (ena_tx_ctx->meta_valid) { + rc = memcmp(&io_sq->cached_tx_meta, + &ena_tx_ctx->ena_meta, + sizeof(struct ena_com_tx_meta)); + + if (unlikely(rc != 0)) + return true; + } + + return false; +} + +static inline void ena_com_create_and_store_tx_meta_desc(struct ena_com_io_sq *io_sq, + struct ena_com_tx_ctx *ena_tx_ctx) +{ + struct ena_eth_io_tx_meta_desc *meta_desc = NULL; + struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta; + + meta_desc = get_sq_desc(io_sq); + memset(meta_desc, 0x0, sizeof(struct ena_eth_io_tx_meta_desc)); + + meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_META_DESC_MASK; + + meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_EXT_VALID_MASK; + + /* bits 0-9 of the mss */ + meta_desc->word2 |= (ena_meta->mss << + ENA_ETH_IO_TX_META_DESC_MSS_LO_SHIFT) & + ENA_ETH_IO_TX_META_DESC_MSS_LO_MASK; + /* bits 10-13 of the mss */ + meta_desc->len_ctrl |= ((ena_meta->mss >> 10) << + ENA_ETH_IO_TX_META_DESC_MSS_HI_SHIFT) & + ENA_ETH_IO_TX_META_DESC_MSS_HI_MASK; + + /* Extended meta desc */ + meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_ETH_META_TYPE_MASK; + meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_META_STORE_MASK; + meta_desc->len_ctrl |= (io_sq->phase << + ENA_ETH_IO_TX_META_DESC_PHASE_SHIFT) & + ENA_ETH_IO_TX_META_DESC_PHASE_MASK; + + meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_FIRST_MASK; + meta_desc->word2 |= ena_meta->l3_hdr_len & + ENA_ETH_IO_TX_META_DESC_L3_HDR_LEN_MASK; + meta_desc->word2 |= (ena_meta->l3_hdr_offset << + ENA_ETH_IO_TX_META_DESC_L3_HDR_OFF_SHIFT) & + ENA_ETH_IO_TX_META_DESC_L3_HDR_OFF_MASK; + + meta_desc->word2 |= (ena_meta->l4_hdr_len << + ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_SHIFT) & + ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_MASK; + + meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_META_STORE_MASK; + + /* Cached the meta desc */ + memcpy(&io_sq->cached_tx_meta, ena_meta, + sizeof(struct ena_com_tx_meta)); + + ena_com_copy_curr_sq_desc_to_dev(io_sq); + ena_com_sq_update_tail(io_sq); +} + +static inline void ena_com_rx_set_flags(struct ena_com_rx_ctx *ena_rx_ctx, + struct ena_eth_io_rx_cdesc_base *cdesc) +{ + ena_rx_ctx->l3_proto = cdesc->status & + ENA_ETH_IO_RX_CDESC_BASE_L3_PROTO_IDX_MASK; + ena_rx_ctx->l4_proto = + (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_MASK) >> + ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_SHIFT; + ena_rx_ctx->l3_csum_err = + (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_MASK) >> + ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_SHIFT; + ena_rx_ctx->l4_csum_err = + (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_MASK) >> + ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_SHIFT; + ena_rx_ctx->hash = cdesc->hash; + ena_rx_ctx->frag = + (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_MASK) >> + ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_SHIFT; + + pr_debug("ena_rx_ctx->l3_proto %d ena_rx_ctx->l4_proto %d\nena_rx_ctx->l3_csum_err %d ena_rx_ctx->l4_csum_err %d\nhash frag %d frag: %d cdesc_status: %x\n", + ena_rx_ctx->l3_proto, ena_rx_ctx->l4_proto, + ena_rx_ctx->l3_csum_err, ena_rx_ctx->l4_csum_err, + ena_rx_ctx->hash, ena_rx_ctx->frag, cdesc->status); +} + +/*****************************************************************************/ +/***************************** API **********************************/ +/*****************************************************************************/ + +int ena_com_prepare_tx(struct ena_com_io_sq *io_sq, + struct ena_com_tx_ctx *ena_tx_ctx, + int *nb_hw_desc) +{ + struct ena_eth_io_tx_desc *desc = NULL; + struct ena_com_buf *ena_bufs = ena_tx_ctx->ena_bufs; + void *push_header = ena_tx_ctx->push_header; + u16 header_len = ena_tx_ctx->header_len; + u16 num_bufs = ena_tx_ctx->num_bufs; + int total_desc, i, rc; + bool have_meta; + u64 addr_hi; + + WARN(io_sq->direction != ENA_COM_IO_QUEUE_DIRECTION_TX, "wrong Q type"); + + /* num_bufs +1 for potential meta desc */ + if (ena_com_sq_empty_space(io_sq) < (num_bufs + 1)) { + pr_err("Not enough space in the tx queue\n"); + return -ENOMEM; + } + + if (unlikely(header_len > io_sq->tx_max_header_size)) { + pr_err("header size is too large %d max header: %d\n", + header_len, io_sq->tx_max_header_size); + return -EINVAL; + } + + /* start with pushing the header (if needed) */ + rc = ena_com_write_header(io_sq, push_header, header_len); + if (unlikely(rc)) + return rc; + + have_meta = ena_tx_ctx->meta_valid && ena_com_meta_desc_changed(io_sq, + ena_tx_ctx); + if (have_meta) + ena_com_create_and_store_tx_meta_desc(io_sq, ena_tx_ctx); + + /* If the caller doesn't want send packets */ + if (unlikely(!num_bufs && !header_len)) { + *nb_hw_desc = have_meta ? 0 : 1; + return 0; + } + + desc = get_sq_desc(io_sq); + memset(desc, 0x0, sizeof(struct ena_eth_io_tx_desc)); + + /* Set first desc when we don't have meta descriptor */ + if (!have_meta) + desc->len_ctrl |= ENA_ETH_IO_TX_DESC_FIRST_MASK; + + desc->buff_addr_hi_hdr_sz |= (header_len << + ENA_ETH_IO_TX_DESC_HEADER_LENGTH_SHIFT) & + ENA_ETH_IO_TX_DESC_HEADER_LENGTH_MASK; + desc->len_ctrl |= (io_sq->phase << ENA_ETH_IO_TX_DESC_PHASE_SHIFT) & + ENA_ETH_IO_TX_DESC_PHASE_MASK; + + desc->len_ctrl |= ENA_ETH_IO_TX_DESC_COMP_REQ_MASK; + + /* Bits 0-9 */ + desc->meta_ctrl |= (ena_tx_ctx->req_id << + ENA_ETH_IO_TX_DESC_REQ_ID_LO_SHIFT) & + ENA_ETH_IO_TX_DESC_REQ_ID_LO_MASK; + + desc->meta_ctrl |= (ena_tx_ctx->df << + ENA_ETH_IO_TX_DESC_DF_SHIFT) & + ENA_ETH_IO_TX_DESC_DF_MASK; + + /* Bits 10-15 */ + desc->len_ctrl |= ((ena_tx_ctx->req_id >> 10) << + ENA_ETH_IO_TX_DESC_REQ_ID_HI_SHIFT) & + ENA_ETH_IO_TX_DESC_REQ_ID_HI_MASK; + + if (ena_tx_ctx->meta_valid) { + desc->meta_ctrl |= (ena_tx_ctx->tso_enable << + ENA_ETH_IO_TX_DESC_TSO_EN_SHIFT) & + ENA_ETH_IO_TX_DESC_TSO_EN_MASK; + desc->meta_ctrl |= ena_tx_ctx->l3_proto & + ENA_ETH_IO_TX_DESC_L3_PROTO_IDX_MASK; + desc->meta_ctrl |= (ena_tx_ctx->l4_proto << + ENA_ETH_IO_TX_DESC_L4_PROTO_IDX_SHIFT) & + ENA_ETH_IO_TX_DESC_L4_PROTO_IDX_MASK; + desc->meta_ctrl |= (ena_tx_ctx->l3_csum_enable << + ENA_ETH_IO_TX_DESC_L3_CSUM_EN_SHIFT) & + ENA_ETH_IO_TX_DESC_L3_CSUM_EN_MASK; + desc->meta_ctrl |= (ena_tx_ctx->l4_csum_enable << + ENA_ETH_IO_TX_DESC_L4_CSUM_EN_SHIFT) & + ENA_ETH_IO_TX_DESC_L4_CSUM_EN_MASK; + desc->meta_ctrl |= (ena_tx_ctx->l4_csum_partial << + ENA_ETH_IO_TX_DESC_L4_CSUM_PARTIAL_SHIFT) & + ENA_ETH_IO_TX_DESC_L4_CSUM_PARTIAL_MASK; + } + + for (i = 0; i < num_bufs; i++) { + /* The first desc share the same desc as the header */ + if (likely(i != 0)) { + ena_com_copy_curr_sq_desc_to_dev(io_sq); + ena_com_sq_update_tail(io_sq); + + desc = get_sq_desc(io_sq); + memset(desc, 0x0, sizeof(struct ena_eth_io_tx_desc)); + + desc->len_ctrl |= (io_sq->phase << + ENA_ETH_IO_TX_DESC_PHASE_SHIFT) & + ENA_ETH_IO_TX_DESC_PHASE_MASK; + } + + desc->len_ctrl |= ena_bufs->len & + ENA_ETH_IO_TX_DESC_LENGTH_MASK; + + addr_hi = ((ena_bufs->paddr & + GENMASK_ULL(io_sq->dma_addr_bits - 1, 32)) >> 32); + + desc->buff_addr_lo = (u32)ena_bufs->paddr; + desc->buff_addr_hi_hdr_sz |= addr_hi & + ENA_ETH_IO_TX_DESC_ADDR_HI_MASK; + ena_bufs++; + } + + /* set the last desc indicator */ + desc->len_ctrl |= ENA_ETH_IO_TX_DESC_LAST_MASK; + + ena_com_copy_curr_sq_desc_to_dev(io_sq); + + ena_com_sq_update_tail(io_sq); + + total_desc = max_t(u16, num_bufs, 1); + total_desc += have_meta ? 1 : 0; + + *nb_hw_desc = total_desc; + return 0; +} + +int ena_com_rx_pkt(struct ena_com_io_cq *io_cq, + struct ena_com_io_sq *io_sq, + struct ena_com_rx_ctx *ena_rx_ctx) +{ + struct ena_com_rx_buf_info *ena_buf = &ena_rx_ctx->ena_bufs[0]; + struct ena_eth_io_rx_cdesc_base *cdesc = NULL; + u16 cdesc_idx = 0; + u16 nb_hw_desc; + u16 i; + + WARN(io_cq->direction != ENA_COM_IO_QUEUE_DIRECTION_RX, "wrong Q type"); + + nb_hw_desc = ena_com_cdesc_rx_pkt_get(io_cq, &cdesc_idx); + if (nb_hw_desc == 0) { + ena_rx_ctx->descs = nb_hw_desc; + return 0; + } + + pr_debug("fetch rx packet: queue %d completed desc: %d\n", io_cq->qid, + nb_hw_desc); + + if (unlikely(nb_hw_desc > ena_rx_ctx->max_bufs)) { + pr_err("Too many RX cdescs (%d) > MAX(%d)\n", nb_hw_desc, + ena_rx_ctx->max_bufs); + return -ENOSPC; + } + + for (i = 0; i < nb_hw_desc; i++) { + cdesc = ena_com_rx_cdesc_idx_to_ptr(io_cq, cdesc_idx + i); + + ena_buf->len = cdesc->length; + ena_buf->req_id = cdesc->req_id; + ena_buf++; + } + + /* Update SQ head ptr */ + io_sq->next_to_comp += nb_hw_desc; + + pr_debug("[%s][QID#%d] Updating SQ head to: %d\n", __func__, io_sq->qid, + io_sq->next_to_comp); + + /* Get rx flags from the last pkt */ + ena_com_rx_set_flags(ena_rx_ctx, cdesc); + + ena_rx_ctx->descs = nb_hw_desc; + return 0; +} + +int ena_com_add_single_rx_desc(struct ena_com_io_sq *io_sq, + struct ena_com_buf *ena_buf, + u16 req_id) +{ + struct ena_eth_io_rx_desc *desc; + + WARN(io_sq->direction != ENA_COM_IO_QUEUE_DIRECTION_RX, "wrong Q type"); + + if (unlikely(ena_com_sq_empty_space(io_sq) == 0)) + return -ENOSPC; + + desc = get_sq_desc(io_sq); + memset(desc, 0x0, sizeof(struct ena_eth_io_rx_desc)); + + desc->length = ena_buf->len; + + desc->ctrl |= ENA_ETH_IO_RX_DESC_FIRST_MASK; + desc->ctrl |= ENA_ETH_IO_RX_DESC_LAST_MASK; + desc->ctrl |= io_sq->phase & ENA_ETH_IO_RX_DESC_PHASE_MASK; + desc->ctrl |= ENA_ETH_IO_RX_DESC_COMP_REQ_MASK; + + desc->req_id = req_id; + + desc->buff_addr_lo = (u32)ena_buf->paddr; + desc->buff_addr_hi = + ((ena_buf->paddr & GENMASK_ULL(io_sq->dma_addr_bits - 1, 32)) >> 32); + + ena_com_sq_update_tail(io_sq); + + return 0; +} + +int ena_com_tx_comp_req_id_get(struct ena_com_io_cq *io_cq, u16 *req_id) +{ + u8 expected_phase, cdesc_phase; + struct ena_eth_io_tx_cdesc *cdesc; + u16 masked_head; + + masked_head = io_cq->head & (io_cq->q_depth - 1); + expected_phase = io_cq->phase; + + cdesc = (struct ena_eth_io_tx_cdesc *) + ((uintptr_t)io_cq->cdesc_addr.virt_addr + + (masked_head * io_cq->cdesc_entry_size_in_bytes)); + + /* When the current completion descriptor phase isn't the same as the + * expected, it mean that the device still didn't update + * this completion. + */ + cdesc_phase = cdesc->flags & ENA_ETH_IO_TX_CDESC_PHASE_MASK; + if (cdesc_phase != expected_phase) + return -EAGAIN; + + ena_com_cq_inc_head(io_cq); + + *req_id = cdesc->req_id; + + return 0; +} diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.h b/drivers/net/ethernet/amazon/ena/ena_eth_com.h new file mode 100644 index 000000000000..bb53c3a4f8e9 --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.h @@ -0,0 +1,160 @@ +/* + * Copyright 2015 Amazon.com, Inc. or its affiliates. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ENA_ETH_COM_H_ +#define ENA_ETH_COM_H_ + +#include "ena_com.h" + +/* head update threshold in units of (queue size / ENA_COMP_HEAD_THRESH) */ +#define ENA_COMP_HEAD_THRESH 4 + +struct ena_com_tx_ctx { + struct ena_com_tx_meta ena_meta; + struct ena_com_buf *ena_bufs; + /* For LLQ, header buffer - pushed to the device mem space */ + void *push_header; + + enum ena_eth_io_l3_proto_index l3_proto; + enum ena_eth_io_l4_proto_index l4_proto; + u16 num_bufs; + u16 req_id; + /* For regular queue, indicate the size of the header + * For LLQ, indicate the size of the pushed buffer + */ + u16 header_len; + + u8 meta_valid; + u8 tso_enable; + u8 l3_csum_enable; + u8 l4_csum_enable; + u8 l4_csum_partial; + u8 df; /* Don't fragment */ +}; + +struct ena_com_rx_ctx { + struct ena_com_rx_buf_info *ena_bufs; + enum ena_eth_io_l3_proto_index l3_proto; + enum ena_eth_io_l4_proto_index l4_proto; + bool l3_csum_err; + bool l4_csum_err; + /* fragmented packet */ + bool frag; + u32 hash; + u16 descs; + int max_bufs; +}; + +int ena_com_prepare_tx(struct ena_com_io_sq *io_sq, + struct ena_com_tx_ctx *ena_tx_ctx, + int *nb_hw_desc); + +int ena_com_rx_pkt(struct ena_com_io_cq *io_cq, + struct ena_com_io_sq *io_sq, + struct ena_com_rx_ctx *ena_rx_ctx); + +int ena_com_add_single_rx_desc(struct ena_com_io_sq *io_sq, + struct ena_com_buf *ena_buf, + u16 req_id); + +int ena_com_tx_comp_req_id_get(struct ena_com_io_cq *io_cq, u16 *req_id); + +static inline void ena_com_unmask_intr(struct ena_com_io_cq *io_cq, + struct ena_eth_io_intr_reg *intr_reg) +{ + writel(intr_reg->intr_control, io_cq->unmask_reg); +} + +static inline int ena_com_sq_empty_space(struct ena_com_io_sq *io_sq) +{ + u16 tail, next_to_comp, cnt; + + next_to_comp = io_sq->next_to_comp; + tail = io_sq->tail; + cnt = tail - next_to_comp; + + return io_sq->q_depth - 1 - cnt; +} + +static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq) +{ + u16 tail; + + tail = io_sq->tail; + + pr_debug("write submission queue doorbell for queue: %d tail: %d\n", + io_sq->qid, tail); + + writel(tail, io_sq->db_addr); + + return 0; +} + +static inline int ena_com_update_dev_comp_head(struct ena_com_io_cq *io_cq) +{ + u16 unreported_comp, head; + bool need_update; + + head = io_cq->head; + unreported_comp = head - io_cq->last_head_update; + need_update = unreported_comp > (io_cq->q_depth / ENA_COMP_HEAD_THRESH); + + if (io_cq->cq_head_db_reg && need_update) { + pr_debug("Write completion queue doorbell for queue %d: head: %d\n", + io_cq->qid, head); + writel(head, io_cq->cq_head_db_reg); + io_cq->last_head_update = head; + } + + return 0; +} + +static inline void ena_com_update_numa_node(struct ena_com_io_cq *io_cq, + u8 numa_node) +{ + struct ena_eth_io_numa_node_cfg_reg numa_cfg; + + if (!io_cq->numa_node_cfg_reg) + return; + + numa_cfg.numa_cfg = (numa_node & ENA_ETH_IO_NUMA_NODE_CFG_REG_NUMA_MASK) + | ENA_ETH_IO_NUMA_NODE_CFG_REG_ENABLED_MASK; + + writel(numa_cfg.numa_cfg, io_cq->numa_node_cfg_reg); +} + +static inline void ena_com_comp_ack(struct ena_com_io_sq *io_sq, u16 elem) +{ + io_sq->next_to_comp += elem; +} + +#endif /* ENA_ETH_COM_H_ */ diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h b/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h new file mode 100644 index 000000000000..f320c58793a5 --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h @@ -0,0 +1,416 @@ +/* + * Copyright 2015 - 2016 Amazon.com, Inc. or its affiliates. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _ENA_ETH_IO_H_ +#define _ENA_ETH_IO_H_ + +enum ena_eth_io_l3_proto_index { + ENA_ETH_IO_L3_PROTO_UNKNOWN = 0, + + ENA_ETH_IO_L3_PROTO_IPV4 = 8, + + ENA_ETH_IO_L3_PROTO_IPV6 = 11, + + ENA_ETH_IO_L3_PROTO_FCOE = 21, + + ENA_ETH_IO_L3_PROTO_ROCE = 22, +}; + +enum ena_eth_io_l4_proto_index { + ENA_ETH_IO_L4_PROTO_UNKNOWN = 0, + + ENA_ETH_IO_L4_PROTO_TCP = 12, + + ENA_ETH_IO_L4_PROTO_UDP = 13, + + ENA_ETH_IO_L4_PROTO_ROUTEABLE_ROCE = 23, +}; + +struct ena_eth_io_tx_desc { + /* 15:0 : length - Buffer length in bytes, must + * include any packet trailers that the ENA supposed + * to update like End-to-End CRC, Authentication GMAC + * etc. This length must not include the + * 'Push_Buffer' length. This length must not include + * the 4-byte added in the end for 802.3 Ethernet FCS + * 21:16 : req_id_hi - Request ID[15:10] + * 22 : reserved22 - MBZ + * 23 : meta_desc - MBZ + * 24 : phase + * 25 : reserved1 - MBZ + * 26 : first - Indicates first descriptor in + * transaction + * 27 : last - Indicates last descriptor in + * transaction + * 28 : comp_req - Indicates whether completion + * should be posted, after packet is transmitted. + * Valid only for first descriptor + * 30:29 : reserved29 - MBZ + * 31 : reserved31 - MBZ + */ + u32 len_ctrl; + + /* 3:0 : l3_proto_idx - L3 protocol. This field + * required when l3_csum_en,l3_csum or tso_en are set. + * 4 : DF - IPv4 DF, must be 0 if packet is IPv4 and + * DF flags of the IPv4 header is 0. Otherwise must + * be set to 1 + * 6:5 : reserved5 + * 7 : tso_en - Enable TSO, For TCP only. + * 12:8 : l4_proto_idx - L4 protocol. This field need + * to be set when l4_csum_en or tso_en are set. + * 13 : l3_csum_en - enable IPv4 header checksum. + * 14 : l4_csum_en - enable TCP/UDP checksum. + * 15 : ethernet_fcs_dis - when set, the controller + * will not append the 802.3 Ethernet Frame Check + * Sequence to the packet + * 16 : reserved16 + * 17 : l4_csum_partial - L4 partial checksum. when + * set to 0, the ENA calculates the L4 checksum, + * where the Destination Address required for the + * TCP/UDP pseudo-header is taken from the actual + * packet L3 header. when set to 1, the ENA doesn't + * calculate the sum of the pseudo-header, instead, + * the checksum field of the L4 is used instead. When + * TSO enabled, the checksum of the pseudo-header + * must not include the tcp length field. L4 partial + * checksum should be used for IPv6 packet that + * contains Routing Headers. + * 20:18 : reserved18 - MBZ + * 21 : reserved21 - MBZ + * 31:22 : req_id_lo - Request ID[9:0] + */ + u32 meta_ctrl; + + u32 buff_addr_lo; + + /* address high and header size + * 15:0 : addr_hi - Buffer Pointer[47:32] + * 23:16 : reserved16_w2 + * 31:24 : header_length - Header length. For Low + * Latency Queues, this fields indicates the number + * of bytes written to the headers' memory. For + * normal queues, if packet is TCP or UDP, and longer + * than max_header_size, then this field should be + * set to the sum of L4 header offset and L4 header + * size(without options), otherwise, this field + * should be set to 0. For both modes, this field + * must not exceed the max_header_size. + * max_header_size value is reported by the Max + * Queues Feature descriptor + */ + u32 buff_addr_hi_hdr_sz; +}; + +struct ena_eth_io_tx_meta_desc { + /* 9:0 : req_id_lo - Request ID[9:0] + * 11:10 : reserved10 - MBZ + * 12 : reserved12 - MBZ + * 13 : reserved13 - MBZ + * 14 : ext_valid - if set, offset fields in Word2 + * are valid Also MSS High in Word 0 and bits [31:24] + * in Word 3 + * 15 : reserved15 + * 19:16 : mss_hi + * 20 : eth_meta_type - 0: Tx Metadata Descriptor, 1: + * Extended Metadata Descriptor + * 21 : meta_store - Store extended metadata in queue + * cache + * 22 : reserved22 - MBZ + * 23 : meta_desc - MBO + * 24 : phase + * 25 : reserved25 - MBZ + * 26 : first - Indicates first descriptor in + * transaction + * 27 : last - Indicates last descriptor in + * transaction + * 28 : comp_req - Indicates whether completion + * should be posted, after packet is transmitted. + * Valid only for first descriptor + * 30:29 : reserved29 - MBZ + * 31 : reserved31 - MBZ + */ + u32 len_ctrl; + + /* 5:0 : req_id_hi + * 31:6 : reserved6 - MBZ + */ + u32 word1; + + /* 7:0 : l3_hdr_len + * 15:8 : l3_hdr_off + * 21:16 : l4_hdr_len_in_words - counts the L4 header + * length in words. there is an explicit assumption + * that L4 header appears right after L3 header and + * L4 offset is based on l3_hdr_off+l3_hdr_len + * 31:22 : mss_lo + */ + u32 word2; + + u32 reserved; +}; + +struct ena_eth_io_tx_cdesc { + /* Request ID[15:0] */ + u16 req_id; + + u8 status; + + /* flags + * 0 : phase + * 7:1 : reserved1 + */ + u8 flags; + + u16 sub_qid; + + u16 sq_head_idx; +}; + +struct ena_eth_io_rx_desc { + /* In bytes. 0 means 64KB */ + u16 length; + + /* MBZ */ + u8 reserved2; + + /* 0 : phase + * 1 : reserved1 - MBZ + * 2 : first - Indicates first descriptor in + * transaction + * 3 : last - Indicates last descriptor in transaction + * 4 : comp_req + * 5 : reserved5 - MBO + * 7:6 : reserved6 - MBZ + */ + u8 ctrl; + + u16 req_id; + + /* MBZ */ + u16 reserved6; + + u32 buff_addr_lo; + + u16 buff_addr_hi; + + /* MBZ */ + u16 reserved16_w3; +}; + +/* 4-word format Note: all ethernet parsing information are valid only when + * last=1 + */ +struct ena_eth_io_rx_cdesc_base { + /* 4:0 : l3_proto_idx + * 6:5 : src_vlan_cnt + * 7 : reserved7 - MBZ + * 12:8 : l4_proto_idx + * 13 : l3_csum_err - when set, either the L3 + * checksum error detected, or, the controller didn't + * validate the checksum. This bit is valid only when + * l3_proto_idx indicates IPv4 packet + * 14 : l4_csum_err - when set, either the L4 + * checksum error detected, or, the controller didn't + * validate the checksum. This bit is valid only when + * l4_proto_idx indicates TCP/UDP packet, and, + * ipv4_frag is not set + * 15 : ipv4_frag - Indicates IPv4 fragmented packet + * 23:16 : reserved16 + * 24 : phase + * 25 : l3_csum2 - second checksum engine result + * 26 : first - Indicates first descriptor in + * transaction + * 27 : last - Indicates last descriptor in + * transaction + * 29:28 : reserved28 + * 30 : buffer - 0: Metadata descriptor. 1: Buffer + * Descriptor was used + * 31 : reserved31 + */ + u32 status; + + u16 length; + + u16 req_id; + + /* 32-bit hash result */ + u32 hash; + + u16 sub_qid; + + u16 reserved; +}; + +/* 8-word format */ +struct ena_eth_io_rx_cdesc_ext { + struct ena_eth_io_rx_cdesc_base base; + + u32 buff_addr_lo; + + u16 buff_addr_hi; + + u16 reserved16; + + u32 reserved_w6; + + u32 reserved_w7; +}; + +struct ena_eth_io_intr_reg { + /* 14:0 : rx_intr_delay + * 29:15 : tx_intr_delay + * 30 : intr_unmask + * 31 : reserved + */ + u32 intr_control; +}; + +struct ena_eth_io_numa_node_cfg_reg { + /* 7:0 : numa + * 30:8 : reserved + * 31 : enabled + */ + u32 numa_cfg; +}; + +/* tx_desc */ +#define ENA_ETH_IO_TX_DESC_LENGTH_MASK GENMASK(15, 0) +#define ENA_ETH_IO_TX_DESC_REQ_ID_HI_SHIFT 16 +#define ENA_ETH_IO_TX_DESC_REQ_ID_HI_MASK GENMASK(21, 16) +#define ENA_ETH_IO_TX_DESC_META_DESC_SHIFT 23 +#define ENA_ETH_IO_TX_DESC_META_DESC_MASK BIT(23) +#define ENA_ETH_IO_TX_DESC_PHASE_SHIFT 24 +#define ENA_ETH_IO_TX_DESC_PHASE_MASK BIT(24) +#define ENA_ETH_IO_TX_DESC_FIRST_SHIFT 26 +#define ENA_ETH_IO_TX_DESC_FIRST_MASK BIT(26) +#define ENA_ETH_IO_TX_DESC_LAST_SHIFT 27 +#define ENA_ETH_IO_TX_DESC_LAST_MASK BIT(27) +#define ENA_ETH_IO_TX_DESC_COMP_REQ_SHIFT 28 +#define ENA_ETH_IO_TX_DESC_COMP_REQ_MASK BIT(28) +#define ENA_ETH_IO_TX_DESC_L3_PROTO_IDX_MASK GENMASK(3, 0) +#define ENA_ETH_IO_TX_DESC_DF_SHIFT 4 +#define ENA_ETH_IO_TX_DESC_DF_MASK BIT(4) +#define ENA_ETH_IO_TX_DESC_TSO_EN_SHIFT 7 +#define ENA_ETH_IO_TX_DESC_TSO_EN_MASK BIT(7) +#define ENA_ETH_IO_TX_DESC_L4_PROTO_IDX_SHIFT 8 +#define ENA_ETH_IO_TX_DESC_L4_PROTO_IDX_MASK GENMASK(12, 8) +#define ENA_ETH_IO_TX_DESC_L3_CSUM_EN_SHIFT 13 +#define ENA_ETH_IO_TX_DESC_L3_CSUM_EN_MASK BIT(13) +#define ENA_ETH_IO_TX_DESC_L4_CSUM_EN_SHIFT 14 +#define ENA_ETH_IO_TX_DESC_L4_CSUM_EN_MASK BIT(14) +#define ENA_ETH_IO_TX_DESC_ETHERNET_FCS_DIS_SHIFT 15 +#define ENA_ETH_IO_TX_DESC_ETHERNET_FCS_DIS_MASK BIT(15) +#define ENA_ETH_IO_TX_DESC_L4_CSUM_PARTIAL_SHIFT 17 +#define ENA_ETH_IO_TX_DESC_L4_CSUM_PARTIAL_MASK BIT(17) +#define ENA_ETH_IO_TX_DESC_REQ_ID_LO_SHIFT 22 +#define ENA_ETH_IO_TX_DESC_REQ_ID_LO_MASK GENMASK(31, 22) +#define ENA_ETH_IO_TX_DESC_ADDR_HI_MASK GENMASK(15, 0) +#define ENA_ETH_IO_TX_DESC_HEADER_LENGTH_SHIFT 24 +#define ENA_ETH_IO_TX_DESC_HEADER_LENGTH_MASK GENMASK(31, 24) + +/* tx_meta_desc */ +#define ENA_ETH_IO_TX_META_DESC_REQ_ID_LO_MASK GENMASK(9, 0) +#define ENA_ETH_IO_TX_META_DESC_EXT_VALID_SHIFT 14 +#define ENA_ETH_IO_TX_META_DESC_EXT_VALID_MASK BIT(14) +#define ENA_ETH_IO_TX_META_DESC_MSS_HI_SHIFT 16 +#define ENA_ETH_IO_TX_META_DESC_MSS_HI_MASK GENMASK(19, 16) +#define ENA_ETH_IO_TX_META_DESC_ETH_META_TYPE_SHIFT 20 +#define ENA_ETH_IO_TX_META_DESC_ETH_META_TYPE_MASK BIT(20) +#define ENA_ETH_IO_TX_META_DESC_META_STORE_SHIFT 21 +#define ENA_ETH_IO_TX_META_DESC_META_STORE_MASK BIT(21) +#define ENA_ETH_IO_TX_META_DESC_META_DESC_SHIFT 23 +#define ENA_ETH_IO_TX_META_DESC_META_DESC_MASK BIT(23) +#define ENA_ETH_IO_TX_META_DESC_PHASE_SHIFT 24 +#define ENA_ETH_IO_TX_META_DESC_PHASE_MASK BIT(24) +#define ENA_ETH_IO_TX_META_DESC_FIRST_SHIFT 26 +#define ENA_ETH_IO_TX_META_DESC_FIRST_MASK BIT(26) +#define ENA_ETH_IO_TX_META_DESC_LAST_SHIFT 27 +#define ENA_ETH_IO_TX_META_DESC_LAST_MASK BIT(27) +#define ENA_ETH_IO_TX_META_DESC_COMP_REQ_SHIFT 28 +#define ENA_ETH_IO_TX_META_DESC_COMP_REQ_MASK BIT(28) +#define ENA_ETH_IO_TX_META_DESC_REQ_ID_HI_MASK GENMASK(5, 0) +#define ENA_ETH_IO_TX_META_DESC_L3_HDR_LEN_MASK GENMASK(7, 0) +#define ENA_ETH_IO_TX_META_DESC_L3_HDR_OFF_SHIFT 8 +#define ENA_ETH_IO_TX_META_DESC_L3_HDR_OFF_MASK GENMASK(15, 8) +#define ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_SHIFT 16 +#define ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_MASK GENMASK(21, 16) +#define ENA_ETH_IO_TX_META_DESC_MSS_LO_SHIFT 22 +#define ENA_ETH_IO_TX_META_DESC_MSS_LO_MASK GENMASK(31, 22) + +/* tx_cdesc */ +#define ENA_ETH_IO_TX_CDESC_PHASE_MASK BIT(0) + +/* rx_desc */ +#define ENA_ETH_IO_RX_DESC_PHASE_MASK BIT(0) +#define ENA_ETH_IO_RX_DESC_FIRST_SHIFT 2 +#define ENA_ETH_IO_RX_DESC_FIRST_MASK BIT(2) +#define ENA_ETH_IO_RX_DESC_LAST_SHIFT 3 +#define ENA_ETH_IO_RX_DESC_LAST_MASK BIT(3) +#define ENA_ETH_IO_RX_DESC_COMP_REQ_SHIFT 4 +#define ENA_ETH_IO_RX_DESC_COMP_REQ_MASK BIT(4) + +/* rx_cdesc_base */ +#define ENA_ETH_IO_RX_CDESC_BASE_L3_PROTO_IDX_MASK GENMASK(4, 0) +#define ENA_ETH_IO_RX_CDESC_BASE_SRC_VLAN_CNT_SHIFT 5 +#define ENA_ETH_IO_RX_CDESC_BASE_SRC_VLAN_CNT_MASK GENMASK(6, 5) +#define ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_SHIFT 8 +#define ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_MASK GENMASK(12, 8) +#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_SHIFT 13 +#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_MASK BIT(13) +#define ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_SHIFT 14 +#define ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_MASK BIT(14) +#define ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_SHIFT 15 +#define ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_MASK BIT(15) +#define ENA_ETH_IO_RX_CDESC_BASE_PHASE_SHIFT 24 +#define ENA_ETH_IO_RX_CDESC_BASE_PHASE_MASK BIT(24) +#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM2_SHIFT 25 +#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM2_MASK BIT(25) +#define ENA_ETH_IO_RX_CDESC_BASE_FIRST_SHIFT 26 +#define ENA_ETH_IO_RX_CDESC_BASE_FIRST_MASK BIT(26) +#define ENA_ETH_IO_RX_CDESC_BASE_LAST_SHIFT 27 +#define ENA_ETH_IO_RX_CDESC_BASE_LAST_MASK BIT(27) +#define ENA_ETH_IO_RX_CDESC_BASE_BUFFER_SHIFT 30 +#define ENA_ETH_IO_RX_CDESC_BASE_BUFFER_MASK BIT(30) + +/* intr_reg */ +#define ENA_ETH_IO_INTR_REG_RX_INTR_DELAY_MASK GENMASK(14, 0) +#define ENA_ETH_IO_INTR_REG_TX_INTR_DELAY_SHIFT 15 +#define ENA_ETH_IO_INTR_REG_TX_INTR_DELAY_MASK GENMASK(29, 15) +#define ENA_ETH_IO_INTR_REG_INTR_UNMASK_SHIFT 30 +#define ENA_ETH_IO_INTR_REG_INTR_UNMASK_MASK BIT(30) + +/* numa_node_cfg_reg */ +#define ENA_ETH_IO_NUMA_NODE_CFG_REG_NUMA_MASK GENMASK(7, 0) +#define ENA_ETH_IO_NUMA_NODE_CFG_REG_ENABLED_SHIFT 31 +#define ENA_ETH_IO_NUMA_NODE_CFG_REG_ENABLED_MASK BIT(31) + +#endif /*_ENA_ETH_IO_H_ */ diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c new file mode 100644 index 000000000000..67b2338f8fb3 --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -0,0 +1,895 @@ +/* + * Copyright 2015 Amazon.com, Inc. or its affiliates. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include + +#include "ena_netdev.h" + +struct ena_stats { + char name[ETH_GSTRING_LEN]; + int stat_offset; +}; + +#define ENA_STAT_ENA_COM_ENTRY(stat) { \ + .name = #stat, \ + .stat_offset = offsetof(struct ena_com_stats_admin, stat) \ +} + +#define ENA_STAT_ENTRY(stat, stat_type) { \ + .name = #stat, \ + .stat_offset = offsetof(struct ena_stats_##stat_type, stat) \ +} + +#define ENA_STAT_RX_ENTRY(stat) \ + ENA_STAT_ENTRY(stat, rx) + +#define ENA_STAT_TX_ENTRY(stat) \ + ENA_STAT_ENTRY(stat, tx) + +#define ENA_STAT_GLOBAL_ENTRY(stat) \ + ENA_STAT_ENTRY(stat, dev) + +static const struct ena_stats ena_stats_global_strings[] = { + ENA_STAT_GLOBAL_ENTRY(tx_timeout), + ENA_STAT_GLOBAL_ENTRY(io_suspend), + ENA_STAT_GLOBAL_ENTRY(io_resume), + ENA_STAT_GLOBAL_ENTRY(wd_expired), + ENA_STAT_GLOBAL_ENTRY(interface_up), + ENA_STAT_GLOBAL_ENTRY(interface_down), + ENA_STAT_GLOBAL_ENTRY(admin_q_pause), +}; + +static const struct ena_stats ena_stats_tx_strings[] = { + ENA_STAT_TX_ENTRY(cnt), + ENA_STAT_TX_ENTRY(bytes), + ENA_STAT_TX_ENTRY(queue_stop), + ENA_STAT_TX_ENTRY(queue_wakeup), + ENA_STAT_TX_ENTRY(dma_mapping_err), + ENA_STAT_TX_ENTRY(linearize), + ENA_STAT_TX_ENTRY(linearize_failed), + ENA_STAT_TX_ENTRY(napi_comp), + ENA_STAT_TX_ENTRY(tx_poll), + ENA_STAT_TX_ENTRY(doorbells), + ENA_STAT_TX_ENTRY(prepare_ctx_err), + ENA_STAT_TX_ENTRY(missing_tx_comp), + ENA_STAT_TX_ENTRY(bad_req_id), +}; + +static const struct ena_stats ena_stats_rx_strings[] = { + ENA_STAT_RX_ENTRY(cnt), + ENA_STAT_RX_ENTRY(bytes), + ENA_STAT_RX_ENTRY(refil_partial), + ENA_STAT_RX_ENTRY(bad_csum), + ENA_STAT_RX_ENTRY(page_alloc_fail), + ENA_STAT_RX_ENTRY(skb_alloc_fail), + ENA_STAT_RX_ENTRY(dma_mapping_err), + ENA_STAT_RX_ENTRY(bad_desc_num), + ENA_STAT_RX_ENTRY(rx_copybreak_pkt), +}; + +static const struct ena_stats ena_stats_ena_com_strings[] = { + ENA_STAT_ENA_COM_ENTRY(aborted_cmd), + ENA_STAT_ENA_COM_ENTRY(submitted_cmd), + ENA_STAT_ENA_COM_ENTRY(completed_cmd), + ENA_STAT_ENA_COM_ENTRY(out_of_space), + ENA_STAT_ENA_COM_ENTRY(no_completion), +}; + +#define ENA_STATS_ARRAY_GLOBAL ARRAY_SIZE(ena_stats_global_strings) +#define ENA_STATS_ARRAY_TX ARRAY_SIZE(ena_stats_tx_strings) +#define ENA_STATS_ARRAY_RX ARRAY_SIZE(ena_stats_rx_strings) +#define ENA_STATS_ARRAY_ENA_COM ARRAY_SIZE(ena_stats_ena_com_strings) + +static void ena_safe_update_stat(u64 *src, u64 *dst, + struct u64_stats_sync *syncp) +{ + unsigned int start; + + do { + start = u64_stats_fetch_begin_irq(syncp); + *(dst) = *src; + } while (u64_stats_fetch_retry_irq(syncp, start)); +} + +static void ena_queue_stats(struct ena_adapter *adapter, u64 **data) +{ + const struct ena_stats *ena_stats; + struct ena_ring *ring; + + u64 *ptr; + int i, j; + + for (i = 0; i < adapter->num_queues; i++) { + /* Tx stats */ + ring = &adapter->tx_ring[i]; + + for (j = 0; j < ENA_STATS_ARRAY_TX; j++) { + ena_stats = &ena_stats_tx_strings[j]; + + ptr = (u64 *)((uintptr_t)&ring->tx_stats + + (uintptr_t)ena_stats->stat_offset); + + ena_safe_update_stat(ptr, (*data)++, &ring->syncp); + } + + /* Rx stats */ + ring = &adapter->rx_ring[i]; + + for (j = 0; j < ENA_STATS_ARRAY_RX; j++) { + ena_stats = &ena_stats_rx_strings[j]; + + ptr = (u64 *)((uintptr_t)&ring->rx_stats + + (uintptr_t)ena_stats->stat_offset); + + ena_safe_update_stat(ptr, (*data)++, &ring->syncp); + } + } +} + +static void ena_dev_admin_queue_stats(struct ena_adapter *adapter, u64 **data) +{ + const struct ena_stats *ena_stats; + u32 *ptr; + int i; + + for (i = 0; i < ENA_STATS_ARRAY_ENA_COM; i++) { + ena_stats = &ena_stats_ena_com_strings[i]; + + ptr = (u32 *)((uintptr_t)&adapter->ena_dev->admin_queue.stats + + (uintptr_t)ena_stats->stat_offset); + + *(*data)++ = *ptr; + } +} + +static void ena_get_ethtool_stats(struct net_device *netdev, + struct ethtool_stats *stats, + u64 *data) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + const struct ena_stats *ena_stats; + u64 *ptr; + int i; + + for (i = 0; i < ENA_STATS_ARRAY_GLOBAL; i++) { + ena_stats = &ena_stats_global_strings[i]; + + ptr = (u64 *)((uintptr_t)&adapter->dev_stats + + (uintptr_t)ena_stats->stat_offset); + + ena_safe_update_stat(ptr, data++, &adapter->syncp); + } + + ena_queue_stats(adapter, &data); + ena_dev_admin_queue_stats(adapter, &data); +} + +int ena_get_sset_count(struct net_device *netdev, int sset) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + + if (sset != ETH_SS_STATS) + return -EOPNOTSUPP; + + return adapter->num_queues * (ENA_STATS_ARRAY_TX + ENA_STATS_ARRAY_RX) + + ENA_STATS_ARRAY_GLOBAL + ENA_STATS_ARRAY_ENA_COM; +} + +static void ena_queue_strings(struct ena_adapter *adapter, u8 **data) +{ + const struct ena_stats *ena_stats; + int i, j; + + for (i = 0; i < adapter->num_queues; i++) { + /* Tx stats */ + for (j = 0; j < ENA_STATS_ARRAY_TX; j++) { + ena_stats = &ena_stats_tx_strings[j]; + + snprintf(*data, ETH_GSTRING_LEN, + "queue_%u_tx_%s", i, ena_stats->name); + (*data) += ETH_GSTRING_LEN; + } + /* Rx stats */ + for (j = 0; j < ENA_STATS_ARRAY_RX; j++) { + ena_stats = &ena_stats_rx_strings[j]; + + snprintf(*data, ETH_GSTRING_LEN, + "queue_%u_rx_%s", i, ena_stats->name); + (*data) += ETH_GSTRING_LEN; + } + } +} + +static void ena_com_dev_strings(u8 **data) +{ + const struct ena_stats *ena_stats; + int i; + + for (i = 0; i < ENA_STATS_ARRAY_ENA_COM; i++) { + ena_stats = &ena_stats_ena_com_strings[i]; + + snprintf(*data, ETH_GSTRING_LEN, + "ena_admin_q_%s", ena_stats->name); + (*data) += ETH_GSTRING_LEN; + } +} + +static void ena_get_strings(struct net_device *netdev, u32 sset, u8 *data) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + const struct ena_stats *ena_stats; + int i; + + if (sset != ETH_SS_STATS) + return; + + for (i = 0; i < ENA_STATS_ARRAY_GLOBAL; i++) { + ena_stats = &ena_stats_global_strings[i]; + + memcpy(data, ena_stats->name, ETH_GSTRING_LEN); + data += ETH_GSTRING_LEN; + } + + ena_queue_strings(adapter, &data); + ena_com_dev_strings(&data); +} + +static int ena_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *link_ksettings) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + struct ena_com_dev *ena_dev = adapter->ena_dev; + struct ena_admin_get_feature_link_desc *link; + struct ena_admin_get_feat_resp feat_resp; + int rc; + + rc = ena_com_get_link_params(ena_dev, &feat_resp); + if (rc) + return rc; + + link = &feat_resp.u.link; + link_ksettings->base.speed = link->speed; + + if (link->flags & ENA_ADMIN_GET_FEATURE_LINK_DESC_AUTONEG_MASK) { + ethtool_link_ksettings_add_link_mode(link_ksettings, + supported, Autoneg); + ethtool_link_ksettings_add_link_mode(link_ksettings, + supported, Autoneg); + } + + link_ksettings->base.autoneg = + (link->flags & ENA_ADMIN_GET_FEATURE_LINK_DESC_AUTONEG_MASK) ? + AUTONEG_ENABLE : AUTONEG_DISABLE; + + link_ksettings->base.duplex = DUPLEX_FULL; + + return 0; +} + +static int ena_get_coalesce(struct net_device *net_dev, + struct ethtool_coalesce *coalesce) +{ + struct ena_adapter *adapter = netdev_priv(net_dev); + struct ena_com_dev *ena_dev = adapter->ena_dev; + struct ena_intr_moder_entry intr_moder_entry; + + if (!ena_com_interrupt_moderation_supported(ena_dev)) { + /* the devie doesn't support interrupt moderation */ + return -EOPNOTSUPP; + } + coalesce->tx_coalesce_usecs = + ena_com_get_nonadaptive_moderation_interval_tx(ena_dev) / + ena_dev->intr_delay_resolution; + if (!ena_com_get_adaptive_moderation_enabled(ena_dev)) { + coalesce->rx_coalesce_usecs = + ena_com_get_nonadaptive_moderation_interval_rx(ena_dev) + / ena_dev->intr_delay_resolution; + } else { + ena_com_get_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_LOWEST, &intr_moder_entry); + coalesce->rx_coalesce_usecs_low = intr_moder_entry.intr_moder_interval; + coalesce->rx_max_coalesced_frames_low = intr_moder_entry.pkts_per_interval; + + ena_com_get_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_MID, &intr_moder_entry); + coalesce->rx_coalesce_usecs = intr_moder_entry.intr_moder_interval; + coalesce->rx_max_coalesced_frames = intr_moder_entry.pkts_per_interval; + + ena_com_get_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_HIGHEST, &intr_moder_entry); + coalesce->rx_coalesce_usecs_high = intr_moder_entry.intr_moder_interval; + coalesce->rx_max_coalesced_frames_high = intr_moder_entry.pkts_per_interval; + } + coalesce->use_adaptive_rx_coalesce = + ena_com_get_adaptive_moderation_enabled(ena_dev); + + return 0; +} + +static void ena_update_tx_rings_intr_moderation(struct ena_adapter *adapter) +{ + unsigned int val; + int i; + + val = ena_com_get_nonadaptive_moderation_interval_tx(adapter->ena_dev); + + for (i = 0; i < adapter->num_queues; i++) + adapter->tx_ring[i].smoothed_interval = val; +} + +static int ena_set_coalesce(struct net_device *net_dev, + struct ethtool_coalesce *coalesce) +{ + struct ena_adapter *adapter = netdev_priv(net_dev); + struct ena_com_dev *ena_dev = adapter->ena_dev; + struct ena_intr_moder_entry intr_moder_entry; + int rc; + + if (!ena_com_interrupt_moderation_supported(ena_dev)) { + /* the devie doesn't support interrupt moderation */ + return -EOPNOTSUPP; + } + + if (coalesce->rx_coalesce_usecs_irq || + coalesce->rx_max_coalesced_frames_irq || + coalesce->tx_coalesce_usecs_irq || + coalesce->tx_max_coalesced_frames || + coalesce->tx_max_coalesced_frames_irq || + coalesce->stats_block_coalesce_usecs || + coalesce->use_adaptive_tx_coalesce || + coalesce->pkt_rate_low || + coalesce->tx_coalesce_usecs_low || + coalesce->tx_max_coalesced_frames_low || + coalesce->pkt_rate_high || + coalesce->tx_coalesce_usecs_high || + coalesce->tx_max_coalesced_frames_high || + coalesce->rate_sample_interval) + return -EINVAL; + + rc = ena_com_update_nonadaptive_moderation_interval_tx(ena_dev, + coalesce->tx_coalesce_usecs); + if (rc) + return rc; + + ena_update_tx_rings_intr_moderation(adapter); + + if (ena_com_get_adaptive_moderation_enabled(ena_dev)) { + if (!coalesce->use_adaptive_rx_coalesce) { + ena_com_disable_adaptive_moderation(ena_dev); + rc = ena_com_update_nonadaptive_moderation_interval_rx(ena_dev, + coalesce->rx_coalesce_usecs); + return rc; + } + } else { /* was in non-adaptive mode */ + if (coalesce->use_adaptive_rx_coalesce) { + ena_com_enable_adaptive_moderation(ena_dev); + } else { + rc = ena_com_update_nonadaptive_moderation_interval_rx(ena_dev, + coalesce->rx_coalesce_usecs); + return rc; + } + } + + intr_moder_entry.intr_moder_interval = coalesce->rx_coalesce_usecs_low; + intr_moder_entry.pkts_per_interval = coalesce->rx_max_coalesced_frames_low; + intr_moder_entry.bytes_per_interval = ENA_INTR_BYTE_COUNT_NOT_SUPPORTED; + ena_com_init_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_LOWEST, &intr_moder_entry); + + intr_moder_entry.intr_moder_interval = coalesce->rx_coalesce_usecs; + intr_moder_entry.pkts_per_interval = coalesce->rx_max_coalesced_frames; + intr_moder_entry.bytes_per_interval = ENA_INTR_BYTE_COUNT_NOT_SUPPORTED; + ena_com_init_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_MID, &intr_moder_entry); + + intr_moder_entry.intr_moder_interval = coalesce->rx_coalesce_usecs_high; + intr_moder_entry.pkts_per_interval = coalesce->rx_max_coalesced_frames_high; + intr_moder_entry.bytes_per_interval = ENA_INTR_BYTE_COUNT_NOT_SUPPORTED; + ena_com_init_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_HIGHEST, &intr_moder_entry); + + return 0; +} + +static u32 ena_get_msglevel(struct net_device *netdev) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + + return adapter->msg_enable; +} + +static void ena_set_msglevel(struct net_device *netdev, u32 value) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + + adapter->msg_enable = value; +} + +static void ena_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *info) +{ + struct ena_adapter *adapter = netdev_priv(dev); + + strlcpy(info->driver, DRV_MODULE_NAME, sizeof(info->driver)); + strlcpy(info->version, DRV_MODULE_VERSION, sizeof(info->version)); + strlcpy(info->bus_info, pci_name(adapter->pdev), + sizeof(info->bus_info)); +} + +static void ena_get_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + struct ena_ring *tx_ring = &adapter->tx_ring[0]; + struct ena_ring *rx_ring = &adapter->rx_ring[0]; + + ring->rx_max_pending = rx_ring->ring_size; + ring->tx_max_pending = tx_ring->ring_size; + ring->rx_pending = rx_ring->ring_size; + ring->tx_pending = tx_ring->ring_size; +} + +static u32 ena_flow_hash_to_flow_type(u16 hash_fields) +{ + u32 data = 0; + + if (hash_fields & ENA_ADMIN_RSS_L2_DA) + data |= RXH_L2DA; + + if (hash_fields & ENA_ADMIN_RSS_L3_DA) + data |= RXH_IP_DST; + + if (hash_fields & ENA_ADMIN_RSS_L3_SA) + data |= RXH_IP_SRC; + + if (hash_fields & ENA_ADMIN_RSS_L4_DP) + data |= RXH_L4_B_2_3; + + if (hash_fields & ENA_ADMIN_RSS_L4_SP) + data |= RXH_L4_B_0_1; + + return data; +} + +static u16 ena_flow_data_to_flow_hash(u32 hash_fields) +{ + u16 data = 0; + + if (hash_fields & RXH_L2DA) + data |= ENA_ADMIN_RSS_L2_DA; + + if (hash_fields & RXH_IP_DST) + data |= ENA_ADMIN_RSS_L3_DA; + + if (hash_fields & RXH_IP_SRC) + data |= ENA_ADMIN_RSS_L3_SA; + + if (hash_fields & RXH_L4_B_2_3) + data |= ENA_ADMIN_RSS_L4_DP; + + if (hash_fields & RXH_L4_B_0_1) + data |= ENA_ADMIN_RSS_L4_SP; + + return data; +} + +static int ena_get_rss_hash(struct ena_com_dev *ena_dev, + struct ethtool_rxnfc *cmd) +{ + enum ena_admin_flow_hash_proto proto; + u16 hash_fields; + int rc; + + cmd->data = 0; + + switch (cmd->flow_type) { + case TCP_V4_FLOW: + proto = ENA_ADMIN_RSS_TCP4; + break; + case UDP_V4_FLOW: + proto = ENA_ADMIN_RSS_UDP4; + break; + case TCP_V6_FLOW: + proto = ENA_ADMIN_RSS_TCP6; + break; + case UDP_V6_FLOW: + proto = ENA_ADMIN_RSS_UDP6; + break; + case IPV4_FLOW: + proto = ENA_ADMIN_RSS_IP4; + break; + case IPV6_FLOW: + proto = ENA_ADMIN_RSS_IP6; + break; + case ETHER_FLOW: + proto = ENA_ADMIN_RSS_NOT_IP; + break; + case AH_V4_FLOW: + case ESP_V4_FLOW: + case AH_V6_FLOW: + case ESP_V6_FLOW: + case SCTP_V4_FLOW: + case AH_ESP_V4_FLOW: + return -EOPNOTSUPP; + default: + return -EINVAL; + } + + rc = ena_com_get_hash_ctrl(ena_dev, proto, &hash_fields); + if (rc) { + /* If device don't have permission, return unsupported */ + if (rc == -EPERM) + rc = -EOPNOTSUPP; + return rc; + } + + cmd->data = ena_flow_hash_to_flow_type(hash_fields); + + return 0; +} + +static int ena_set_rss_hash(struct ena_com_dev *ena_dev, + struct ethtool_rxnfc *cmd) +{ + enum ena_admin_flow_hash_proto proto; + u16 hash_fields; + + switch (cmd->flow_type) { + case TCP_V4_FLOW: + proto = ENA_ADMIN_RSS_TCP4; + break; + case UDP_V4_FLOW: + proto = ENA_ADMIN_RSS_UDP4; + break; + case TCP_V6_FLOW: + proto = ENA_ADMIN_RSS_TCP6; + break; + case UDP_V6_FLOW: + proto = ENA_ADMIN_RSS_UDP6; + break; + case IPV4_FLOW: + proto = ENA_ADMIN_RSS_IP4; + break; + case IPV6_FLOW: + proto = ENA_ADMIN_RSS_IP6; + break; + case ETHER_FLOW: + proto = ENA_ADMIN_RSS_NOT_IP; + break; + case AH_V4_FLOW: + case ESP_V4_FLOW: + case AH_V6_FLOW: + case ESP_V6_FLOW: + case SCTP_V4_FLOW: + case AH_ESP_V4_FLOW: + return -EOPNOTSUPP; + default: + return -EINVAL; + } + + hash_fields = ena_flow_data_to_flow_hash(cmd->data); + + return ena_com_fill_hash_ctrl(ena_dev, proto, hash_fields); +} + +static int ena_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *info) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + int rc = 0; + + switch (info->cmd) { + case ETHTOOL_SRXFH: + rc = ena_set_rss_hash(adapter->ena_dev, info); + break; + case ETHTOOL_SRXCLSRLDEL: + case ETHTOOL_SRXCLSRLINS: + default: + netif_err(adapter, drv, netdev, + "Command parameter %d is not supported\n", info->cmd); + rc = -EOPNOTSUPP; + } + + return (rc == -EPERM) ? -EOPNOTSUPP : rc; +} + +static int ena_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *info, + u32 *rules) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + int rc = 0; + + switch (info->cmd) { + case ETHTOOL_GRXRINGS: + info->data = adapter->num_queues; + rc = 0; + break; + case ETHTOOL_GRXFH: + rc = ena_get_rss_hash(adapter->ena_dev, info); + break; + case ETHTOOL_GRXCLSRLCNT: + case ETHTOOL_GRXCLSRULE: + case ETHTOOL_GRXCLSRLALL: + default: + netif_err(adapter, drv, netdev, + "Command parameter %d is not supported\n", info->cmd); + rc = -EOPNOTSUPP; + } + + return (rc == -EPERM) ? -EOPNOTSUPP : rc; +} + +static u32 ena_get_rxfh_indir_size(struct net_device *netdev) +{ + return ENA_RX_RSS_TABLE_SIZE; +} + +static u32 ena_get_rxfh_key_size(struct net_device *netdev) +{ + return ENA_HASH_KEY_SIZE; +} + +static int ena_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, + u8 *hfunc) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + enum ena_admin_hash_functions ena_func; + u8 func; + int rc; + + rc = ena_com_indirect_table_get(adapter->ena_dev, indir); + if (rc) + return rc; + + rc = ena_com_get_hash_function(adapter->ena_dev, &ena_func, key); + if (rc) + return rc; + + switch (ena_func) { + case ENA_ADMIN_TOEPLITZ: + func = ETH_RSS_HASH_TOP; + break; + case ENA_ADMIN_CRC32: + func = ETH_RSS_HASH_XOR; + break; + default: + netif_err(adapter, drv, netdev, + "Command parameter is not supported\n"); + return -EOPNOTSUPP; + } + + if (hfunc) + *hfunc = func; + + return rc; +} + +static int ena_set_rxfh(struct net_device *netdev, const u32 *indir, + const u8 *key, const u8 hfunc) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + struct ena_com_dev *ena_dev = adapter->ena_dev; + enum ena_admin_hash_functions func; + int rc, i; + + if (indir) { + for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) { + rc = ena_com_indirect_table_fill_entry(ena_dev, + ENA_IO_RXQ_IDX(indir[i]), + i); + if (unlikely(rc)) { + netif_err(adapter, drv, netdev, + "Cannot fill indirect table (index is too large)\n"); + return rc; + } + } + + rc = ena_com_indirect_table_set(ena_dev); + if (rc) { + netif_err(adapter, drv, netdev, + "Cannot set indirect table\n"); + return rc == -EPERM ? -EOPNOTSUPP : rc; + } + } + + switch (hfunc) { + case ETH_RSS_HASH_TOP: + func = ENA_ADMIN_TOEPLITZ; + break; + case ETH_RSS_HASH_XOR: + func = ENA_ADMIN_CRC32; + break; + default: + netif_err(adapter, drv, netdev, "Unsupported hfunc %d\n", + hfunc); + return -EOPNOTSUPP; + } + + if (key) { + rc = ena_com_fill_hash_function(ena_dev, func, key, + ENA_HASH_KEY_SIZE, + 0xFFFFFFFF); + if (unlikely(rc)) { + netif_err(adapter, drv, netdev, "Cannot fill key\n"); + return rc == -EPERM ? -EOPNOTSUPP : rc; + } + } + + return 0; +} + +static void ena_get_channels(struct net_device *netdev, + struct ethtool_channels *channels) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + + channels->max_rx = ENA_MAX_NUM_IO_QUEUES; + channels->max_tx = ENA_MAX_NUM_IO_QUEUES; + channels->max_other = 0; + channels->max_combined = 0; + channels->rx_count = adapter->num_queues; + channels->tx_count = adapter->num_queues; + channels->other_count = 0; + channels->combined_count = 0; +} + +static int ena_get_tunable(struct net_device *netdev, + const struct ethtool_tunable *tuna, void *data) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + int ret = 0; + + switch (tuna->id) { + case ETHTOOL_RX_COPYBREAK: + *(u32 *)data = adapter->rx_copybreak; + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static int ena_set_tunable(struct net_device *netdev, + const struct ethtool_tunable *tuna, + const void *data) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + int ret = 0; + u32 len; + + switch (tuna->id) { + case ETHTOOL_RX_COPYBREAK: + len = *(u32 *)data; + if (len > adapter->netdev->mtu) { + ret = -EINVAL; + break; + } + adapter->rx_copybreak = len; + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static const struct ethtool_ops ena_ethtool_ops = { + .get_link_ksettings = ena_get_link_ksettings, + .get_drvinfo = ena_get_drvinfo, + .get_msglevel = ena_get_msglevel, + .set_msglevel = ena_set_msglevel, + .get_link = ethtool_op_get_link, + .get_coalesce = ena_get_coalesce, + .set_coalesce = ena_set_coalesce, + .get_ringparam = ena_get_ringparam, + .get_sset_count = ena_get_sset_count, + .get_strings = ena_get_strings, + .get_ethtool_stats = ena_get_ethtool_stats, + .get_rxnfc = ena_get_rxnfc, + .set_rxnfc = ena_set_rxnfc, + .get_rxfh_indir_size = ena_get_rxfh_indir_size, + .get_rxfh_key_size = ena_get_rxfh_key_size, + .get_rxfh = ena_get_rxfh, + .set_rxfh = ena_set_rxfh, + .get_channels = ena_get_channels, + .get_tunable = ena_get_tunable, + .set_tunable = ena_set_tunable, +}; + +void ena_set_ethtool_ops(struct net_device *netdev) +{ + netdev->ethtool_ops = &ena_ethtool_ops; +} + +static void ena_dump_stats_ex(struct ena_adapter *adapter, u8 *buf) +{ + struct net_device *netdev = adapter->netdev; + u8 *strings_buf; + u64 *data_buf; + int strings_num; + int i, rc; + + strings_num = ena_get_sset_count(netdev, ETH_SS_STATS); + if (strings_num <= 0) { + netif_err(adapter, drv, netdev, "Can't get stats num\n"); + return; + } + + strings_buf = devm_kzalloc(&adapter->pdev->dev, + strings_num * ETH_GSTRING_LEN, + GFP_ATOMIC); + if (!strings_buf) { + netif_err(adapter, drv, netdev, + "failed to alloc strings_buf\n"); + return; + } + + data_buf = devm_kzalloc(&adapter->pdev->dev, + strings_num * sizeof(u64), + GFP_ATOMIC); + if (!data_buf) { + netif_err(adapter, drv, netdev, + "failed to allocate data buf\n"); + devm_kfree(&adapter->pdev->dev, strings_buf); + return; + } + + ena_get_strings(netdev, ETH_SS_STATS, strings_buf); + ena_get_ethtool_stats(netdev, NULL, data_buf); + + /* If there is a buffer, dump stats, otherwise print them to dmesg */ + if (buf) + for (i = 0; i < strings_num; i++) { + rc = snprintf(buf, ETH_GSTRING_LEN + sizeof(u64), + "%s %llu\n", + strings_buf + i * ETH_GSTRING_LEN, + data_buf[i]); + buf += rc; + } + else + for (i = 0; i < strings_num; i++) + netif_err(adapter, drv, netdev, "%s: %llu\n", + strings_buf + i * ETH_GSTRING_LEN, + data_buf[i]); + + devm_kfree(&adapter->pdev->dev, strings_buf); + devm_kfree(&adapter->pdev->dev, data_buf); +} + +void ena_dump_stats_to_buf(struct ena_adapter *adapter, u8 *buf) +{ + if (!buf) + return; + + ena_dump_stats_ex(adapter, buf); +} + +void ena_dump_stats_to_dmesg(struct ena_adapter *adapter) +{ + ena_dump_stats_ex(adapter, NULL); +} diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c new file mode 100644 index 000000000000..94790df2d559 --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -0,0 +1,3280 @@ +/* + * Copyright 2015 Amazon.com, Inc. or its affiliates. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#ifdef CONFIG_RFS_ACCEL +#include +#endif /* CONFIG_RFS_ACCEL */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ena_netdev.h" +#include "ena_pci_id_tbl.h" + +static char version[] = DEVICE_NAME " v" DRV_MODULE_VERSION "\n"; + +MODULE_AUTHOR("Amazon.com, Inc. or its affiliates"); +MODULE_DESCRIPTION(DEVICE_NAME); +MODULE_LICENSE("GPL"); +MODULE_VERSION(DRV_MODULE_VERSION); + +/* Time in jiffies before concluding the transmitter is hung. */ +#define TX_TIMEOUT (5 * HZ) + +#define ENA_NAPI_BUDGET 64 + +#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_IFUP | \ + NETIF_MSG_TX_DONE | NETIF_MSG_TX_ERR | NETIF_MSG_RX_ERR) +static int debug = -1; +module_param(debug, int, 0); +MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); + +static struct ena_aenq_handlers aenq_handlers; + +static struct workqueue_struct *ena_wq; + +MODULE_DEVICE_TABLE(pci, ena_pci_tbl); + +static int ena_rss_init_default(struct ena_adapter *adapter); + +static void ena_tx_timeout(struct net_device *dev) +{ + struct ena_adapter *adapter = netdev_priv(dev); + + u64_stats_update_begin(&adapter->syncp); + adapter->dev_stats.tx_timeout++; + u64_stats_update_end(&adapter->syncp); + + netif_err(adapter, tx_err, dev, "Transmit time out\n"); + + /* Change the state of the device to trigger reset */ + set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); +} + +static void update_rx_ring_mtu(struct ena_adapter *adapter, int mtu) +{ + int i; + + for (i = 0; i < adapter->num_queues; i++) + adapter->rx_ring[i].mtu = mtu; +} + +static int ena_change_mtu(struct net_device *dev, int new_mtu) +{ + struct ena_adapter *adapter = netdev_priv(dev); + int ret; + + if ((new_mtu > adapter->max_mtu) || (new_mtu < ENA_MIN_MTU)) { + netif_err(adapter, drv, dev, + "Invalid MTU setting. new_mtu: %d\n", new_mtu); + + return -EINVAL; + } + + ret = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu); + if (!ret) { + netif_dbg(adapter, drv, dev, "set MTU to %d\n", new_mtu); + update_rx_ring_mtu(adapter, new_mtu); + dev->mtu = new_mtu; + } else { + netif_err(adapter, drv, dev, "Failed to set MTU to %d\n", + new_mtu); + } + + return ret; +} + +static int ena_init_rx_cpu_rmap(struct ena_adapter *adapter) +{ +#ifdef CONFIG_RFS_ACCEL + u32 i; + int rc; + + adapter->netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(adapter->num_queues); + if (!adapter->netdev->rx_cpu_rmap) + return -ENOMEM; + for (i = 0; i < adapter->num_queues; i++) { + int irq_idx = ENA_IO_IRQ_IDX(i); + + rc = irq_cpu_rmap_add(adapter->netdev->rx_cpu_rmap, + adapter->msix_entries[irq_idx].vector); + if (rc) { + free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap); + adapter->netdev->rx_cpu_rmap = NULL; + return rc; + } + } +#endif /* CONFIG_RFS_ACCEL */ + return 0; +} + +static void ena_init_io_rings_common(struct ena_adapter *adapter, + struct ena_ring *ring, u16 qid) +{ + ring->qid = qid; + ring->pdev = adapter->pdev; + ring->dev = &adapter->pdev->dev; + ring->netdev = adapter->netdev; + ring->napi = &adapter->ena_napi[qid].napi; + ring->adapter = adapter; + ring->ena_dev = adapter->ena_dev; + ring->per_napi_packets = 0; + ring->per_napi_bytes = 0; + ring->cpu = 0; + u64_stats_init(&ring->syncp); +} + +static void ena_init_io_rings(struct ena_adapter *adapter) +{ + struct ena_com_dev *ena_dev; + struct ena_ring *txr, *rxr; + int i; + + ena_dev = adapter->ena_dev; + + for (i = 0; i < adapter->num_queues; i++) { + txr = &adapter->tx_ring[i]; + rxr = &adapter->rx_ring[i]; + + /* TX/RX common ring state */ + ena_init_io_rings_common(adapter, txr, i); + ena_init_io_rings_common(adapter, rxr, i); + + /* TX specific ring state */ + txr->ring_size = adapter->tx_ring_size; + txr->tx_max_header_size = ena_dev->tx_max_header_size; + txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type; + txr->sgl_size = adapter->max_tx_sgl_size; + txr->smoothed_interval = + ena_com_get_nonadaptive_moderation_interval_tx(ena_dev); + + /* RX specific ring state */ + rxr->ring_size = adapter->rx_ring_size; + rxr->rx_copybreak = adapter->rx_copybreak; + rxr->sgl_size = adapter->max_rx_sgl_size; + rxr->smoothed_interval = + ena_com_get_nonadaptive_moderation_interval_rx(ena_dev); + } +} + +/* ena_setup_tx_resources - allocate I/O Tx resources (Descriptors) + * @adapter: network interface device structure + * @qid: queue index + * + * Return 0 on success, negative on failure + */ +static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid) +{ + struct ena_ring *tx_ring = &adapter->tx_ring[qid]; + struct ena_irq *ena_irq = &adapter->irq_tbl[ENA_IO_IRQ_IDX(qid)]; + int size, i, node; + + if (tx_ring->tx_buffer_info) { + netif_err(adapter, ifup, + adapter->netdev, "tx_buffer_info info is not NULL"); + return -EEXIST; + } + + size = sizeof(struct ena_tx_buffer) * tx_ring->ring_size; + node = cpu_to_node(ena_irq->cpu); + + tx_ring->tx_buffer_info = vzalloc_node(size, node); + if (!tx_ring->tx_buffer_info) { + tx_ring->tx_buffer_info = vzalloc(size); + if (!tx_ring->tx_buffer_info) + return -ENOMEM; + } + + size = sizeof(u16) * tx_ring->ring_size; + tx_ring->free_tx_ids = vzalloc_node(size, node); + if (!tx_ring->free_tx_ids) { + tx_ring->free_tx_ids = vzalloc(size); + if (!tx_ring->free_tx_ids) { + vfree(tx_ring->tx_buffer_info); + return -ENOMEM; + } + } + + /* Req id ring for TX out of order completions */ + for (i = 0; i < tx_ring->ring_size; i++) + tx_ring->free_tx_ids[i] = i; + + /* Reset tx statistics */ + memset(&tx_ring->tx_stats, 0x0, sizeof(tx_ring->tx_stats)); + + tx_ring->next_to_use = 0; + tx_ring->next_to_clean = 0; + tx_ring->cpu = ena_irq->cpu; + return 0; +} + +/* ena_free_tx_resources - Free I/O Tx Resources per Queue + * @adapter: network interface device structure + * @qid: queue index + * + * Free all transmit software resources + */ +static void ena_free_tx_resources(struct ena_adapter *adapter, int qid) +{ + struct ena_ring *tx_ring = &adapter->tx_ring[qid]; + + vfree(tx_ring->tx_buffer_info); + tx_ring->tx_buffer_info = NULL; + + vfree(tx_ring->free_tx_ids); + tx_ring->free_tx_ids = NULL; +} + +/* ena_setup_all_tx_resources - allocate I/O Tx queues resources for All queues + * @adapter: private structure + * + * Return 0 on success, negative on failure + */ +static int ena_setup_all_tx_resources(struct ena_adapter *adapter) +{ + int i, rc = 0; + + for (i = 0; i < adapter->num_queues; i++) { + rc = ena_setup_tx_resources(adapter, i); + if (rc) + goto err_setup_tx; + } + + return 0; + +err_setup_tx: + + netif_err(adapter, ifup, adapter->netdev, + "Tx queue %d: allocation failed\n", i); + + /* rewind the index freeing the rings as we go */ + while (i--) + ena_free_tx_resources(adapter, i); + return rc; +} + +/* ena_free_all_io_tx_resources - Free I/O Tx Resources for All Queues + * @adapter: board private structure + * + * Free all transmit software resources + */ +static void ena_free_all_io_tx_resources(struct ena_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_queues; i++) + ena_free_tx_resources(adapter, i); +} + +/* ena_setup_rx_resources - allocate I/O Rx resources (Descriptors) + * @adapter: network interface device structure + * @qid: queue index + * + * Returns 0 on success, negative on failure + */ +static int ena_setup_rx_resources(struct ena_adapter *adapter, + u32 qid) +{ + struct ena_ring *rx_ring = &adapter->rx_ring[qid]; + struct ena_irq *ena_irq = &adapter->irq_tbl[ENA_IO_IRQ_IDX(qid)]; + int size, node; + + if (rx_ring->rx_buffer_info) { + netif_err(adapter, ifup, adapter->netdev, + "rx_buffer_info is not NULL"); + return -EEXIST; + } + + /* alloc extra element so in rx path + * we can always prefetch rx_info + 1 + */ + size = sizeof(struct ena_rx_buffer) * (rx_ring->ring_size + 1); + node = cpu_to_node(ena_irq->cpu); + + rx_ring->rx_buffer_info = vzalloc_node(size, node); + if (!rx_ring->rx_buffer_info) { + rx_ring->rx_buffer_info = vzalloc(size); + if (!rx_ring->rx_buffer_info) + return -ENOMEM; + } + + /* Reset rx statistics */ + memset(&rx_ring->rx_stats, 0x0, sizeof(rx_ring->rx_stats)); + + rx_ring->next_to_clean = 0; + rx_ring->next_to_use = 0; + rx_ring->cpu = ena_irq->cpu; + + return 0; +} + +/* ena_free_rx_resources - Free I/O Rx Resources + * @adapter: network interface device structure + * @qid: queue index + * + * Free all receive software resources + */ +static void ena_free_rx_resources(struct ena_adapter *adapter, + u32 qid) +{ + struct ena_ring *rx_ring = &adapter->rx_ring[qid]; + + vfree(rx_ring->rx_buffer_info); + rx_ring->rx_buffer_info = NULL; +} + +/* ena_setup_all_rx_resources - allocate I/O Rx queues resources for all queues + * @adapter: board private structure + * + * Return 0 on success, negative on failure + */ +static int ena_setup_all_rx_resources(struct ena_adapter *adapter) +{ + int i, rc = 0; + + for (i = 0; i < adapter->num_queues; i++) { + rc = ena_setup_rx_resources(adapter, i); + if (rc) + goto err_setup_rx; + } + + return 0; + +err_setup_rx: + + netif_err(adapter, ifup, adapter->netdev, + "Rx queue %d: allocation failed\n", i); + + /* rewind the index freeing the rings as we go */ + while (i--) + ena_free_rx_resources(adapter, i); + return rc; +} + +/* ena_free_all_io_rx_resources - Free I/O Rx Resources for All Queues + * @adapter: board private structure + * + * Free all receive software resources + */ +static void ena_free_all_io_rx_resources(struct ena_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_queues; i++) + ena_free_rx_resources(adapter, i); +} + +static inline int ena_alloc_rx_page(struct ena_ring *rx_ring, + struct ena_rx_buffer *rx_info, gfp_t gfp) +{ + struct ena_com_buf *ena_buf; + struct page *page; + dma_addr_t dma; + + /* if previous allocated page is not used */ + if (unlikely(rx_info->page)) + return 0; + + page = alloc_page(gfp); + if (unlikely(!page)) { + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->rx_stats.page_alloc_fail++; + u64_stats_update_end(&rx_ring->syncp); + return -ENOMEM; + } + + dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, + DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(rx_ring->dev, dma))) { + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->rx_stats.dma_mapping_err++; + u64_stats_update_end(&rx_ring->syncp); + + __free_page(page); + return -EIO; + } + netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, + "alloc page %p, rx_info %p\n", page, rx_info); + + rx_info->page = page; + rx_info->page_offset = 0; + ena_buf = &rx_info->ena_buf; + ena_buf->paddr = dma; + ena_buf->len = PAGE_SIZE; + + return 0; +} + +static void ena_free_rx_page(struct ena_ring *rx_ring, + struct ena_rx_buffer *rx_info) +{ + struct page *page = rx_info->page; + struct ena_com_buf *ena_buf = &rx_info->ena_buf; + + if (unlikely(!page)) { + netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev, + "Trying to free unallocated buffer\n"); + return; + } + + dma_unmap_page(rx_ring->dev, ena_buf->paddr, PAGE_SIZE, + DMA_FROM_DEVICE); + + __free_page(page); + rx_info->page = NULL; +} + +static int ena_refill_rx_bufs(struct ena_ring *rx_ring, u32 num) +{ + u16 next_to_use; + u32 i; + int rc; + + next_to_use = rx_ring->next_to_use; + + for (i = 0; i < num; i++) { + struct ena_rx_buffer *rx_info = + &rx_ring->rx_buffer_info[next_to_use]; + + rc = ena_alloc_rx_page(rx_ring, rx_info, + __GFP_COLD | GFP_ATOMIC | __GFP_COMP); + if (unlikely(rc < 0)) { + netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev, + "failed to alloc buffer for rx queue %d\n", + rx_ring->qid); + break; + } + rc = ena_com_add_single_rx_desc(rx_ring->ena_com_io_sq, + &rx_info->ena_buf, + next_to_use); + if (unlikely(rc)) { + netif_warn(rx_ring->adapter, rx_status, rx_ring->netdev, + "failed to add buffer for rx queue %d\n", + rx_ring->qid); + break; + } + next_to_use = ENA_RX_RING_IDX_NEXT(next_to_use, + rx_ring->ring_size); + } + + if (unlikely(i < num)) { + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->rx_stats.refil_partial++; + u64_stats_update_end(&rx_ring->syncp); + netdev_warn(rx_ring->netdev, + "refilled rx qid %d with only %d buffers (from %d)\n", + rx_ring->qid, i, num); + } + + if (likely(i)) { + /* Add memory barrier to make sure the desc were written before + * issue a doorbell + */ + wmb(); + ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq); + } + + rx_ring->next_to_use = next_to_use; + + return i; +} + +static void ena_free_rx_bufs(struct ena_adapter *adapter, + u32 qid) +{ + struct ena_ring *rx_ring = &adapter->rx_ring[qid]; + u32 i; + + for (i = 0; i < rx_ring->ring_size; i++) { + struct ena_rx_buffer *rx_info = &rx_ring->rx_buffer_info[i]; + + if (rx_info->page) + ena_free_rx_page(rx_ring, rx_info); + } +} + +/* ena_refill_all_rx_bufs - allocate all queues Rx buffers + * @adapter: board private structure + * + */ +static void ena_refill_all_rx_bufs(struct ena_adapter *adapter) +{ + struct ena_ring *rx_ring; + int i, rc, bufs_num; + + for (i = 0; i < adapter->num_queues; i++) { + rx_ring = &adapter->rx_ring[i]; + bufs_num = rx_ring->ring_size - 1; + rc = ena_refill_rx_bufs(rx_ring, bufs_num); + + if (unlikely(rc != bufs_num)) + netif_warn(rx_ring->adapter, rx_status, rx_ring->netdev, + "refilling Queue %d failed. allocated %d buffers from: %d\n", + i, rc, bufs_num); + } +} + +static void ena_free_all_rx_bufs(struct ena_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_queues; i++) + ena_free_rx_bufs(adapter, i); +} + +/* ena_free_tx_bufs - Free Tx Buffers per Queue + * @tx_ring: TX ring for which buffers be freed + */ +static void ena_free_tx_bufs(struct ena_ring *tx_ring) +{ + u32 i; + + for (i = 0; i < tx_ring->ring_size; i++) { + struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i]; + struct ena_com_buf *ena_buf; + int nr_frags; + int j; + + if (!tx_info->skb) + continue; + + netdev_notice(tx_ring->netdev, + "free uncompleted tx skb qid %d idx 0x%x\n", + tx_ring->qid, i); + + ena_buf = tx_info->bufs; + dma_unmap_single(tx_ring->dev, + ena_buf->paddr, + ena_buf->len, + DMA_TO_DEVICE); + + /* unmap remaining mapped pages */ + nr_frags = tx_info->num_of_bufs - 1; + for (j = 0; j < nr_frags; j++) { + ena_buf++; + dma_unmap_page(tx_ring->dev, + ena_buf->paddr, + ena_buf->len, + DMA_TO_DEVICE); + } + + dev_kfree_skb_any(tx_info->skb); + } + netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev, + tx_ring->qid)); +} + +static void ena_free_all_tx_bufs(struct ena_adapter *adapter) +{ + struct ena_ring *tx_ring; + int i; + + for (i = 0; i < adapter->num_queues; i++) { + tx_ring = &adapter->tx_ring[i]; + ena_free_tx_bufs(tx_ring); + } +} + +static void ena_destroy_all_tx_queues(struct ena_adapter *adapter) +{ + u16 ena_qid; + int i; + + for (i = 0; i < adapter->num_queues; i++) { + ena_qid = ENA_IO_TXQ_IDX(i); + ena_com_destroy_io_queue(adapter->ena_dev, ena_qid); + } +} + +static void ena_destroy_all_rx_queues(struct ena_adapter *adapter) +{ + u16 ena_qid; + int i; + + for (i = 0; i < adapter->num_queues; i++) { + ena_qid = ENA_IO_RXQ_IDX(i); + ena_com_destroy_io_queue(adapter->ena_dev, ena_qid); + } +} + +static void ena_destroy_all_io_queues(struct ena_adapter *adapter) +{ + ena_destroy_all_tx_queues(adapter); + ena_destroy_all_rx_queues(adapter); +} + +static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id) +{ + struct ena_tx_buffer *tx_info = NULL; + + if (likely(req_id < tx_ring->ring_size)) { + tx_info = &tx_ring->tx_buffer_info[req_id]; + if (likely(tx_info->skb)) + return 0; + } + + if (tx_info) + netif_err(tx_ring->adapter, tx_done, tx_ring->netdev, + "tx_info doesn't have valid skb\n"); + else + netif_err(tx_ring->adapter, tx_done, tx_ring->netdev, + "Invalid req_id: %hu\n", req_id); + + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.bad_req_id++; + u64_stats_update_end(&tx_ring->syncp); + + /* Trigger device reset */ + set_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags); + return -EFAULT; +} + +static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget) +{ + struct netdev_queue *txq; + bool above_thresh; + u32 tx_bytes = 0; + u32 total_done = 0; + u16 next_to_clean; + u16 req_id; + int tx_pkts = 0; + int rc; + + next_to_clean = tx_ring->next_to_clean; + txq = netdev_get_tx_queue(tx_ring->netdev, tx_ring->qid); + + while (tx_pkts < budget) { + struct ena_tx_buffer *tx_info; + struct sk_buff *skb; + struct ena_com_buf *ena_buf; + int i, nr_frags; + + rc = ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq, + &req_id); + if (rc) + break; + + rc = validate_tx_req_id(tx_ring, req_id); + if (rc) + break; + + tx_info = &tx_ring->tx_buffer_info[req_id]; + skb = tx_info->skb; + + /* prefetch skb_end_pointer() to speedup skb_shinfo(skb) */ + prefetch(&skb->end); + + tx_info->skb = NULL; + tx_info->last_jiffies = 0; + + if (likely(tx_info->num_of_bufs != 0)) { + ena_buf = tx_info->bufs; + + dma_unmap_single(tx_ring->dev, + dma_unmap_addr(ena_buf, paddr), + dma_unmap_len(ena_buf, len), + DMA_TO_DEVICE); + + /* unmap remaining mapped pages */ + nr_frags = tx_info->num_of_bufs - 1; + for (i = 0; i < nr_frags; i++) { + ena_buf++; + dma_unmap_page(tx_ring->dev, + dma_unmap_addr(ena_buf, paddr), + dma_unmap_len(ena_buf, len), + DMA_TO_DEVICE); + } + } + + netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev, + "tx_poll: q %d skb %p completed\n", tx_ring->qid, + skb); + + tx_bytes += skb->len; + dev_kfree_skb(skb); + tx_pkts++; + total_done += tx_info->tx_descs; + + tx_ring->free_tx_ids[next_to_clean] = req_id; + next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean, + tx_ring->ring_size); + } + + tx_ring->next_to_clean = next_to_clean; + ena_com_comp_ack(tx_ring->ena_com_io_sq, total_done); + ena_com_update_dev_comp_head(tx_ring->ena_com_io_cq); + + netdev_tx_completed_queue(txq, tx_pkts, tx_bytes); + + netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev, + "tx_poll: q %d done. total pkts: %d\n", + tx_ring->qid, tx_pkts); + + /* need to make the rings circular update visible to + * ena_start_xmit() before checking for netif_queue_stopped(). + */ + smp_mb(); + + above_thresh = ena_com_sq_empty_space(tx_ring->ena_com_io_sq) > + ENA_TX_WAKEUP_THRESH; + if (unlikely(netif_tx_queue_stopped(txq) && above_thresh)) { + __netif_tx_lock(txq, smp_processor_id()); + above_thresh = ena_com_sq_empty_space(tx_ring->ena_com_io_sq) > + ENA_TX_WAKEUP_THRESH; + if (netif_tx_queue_stopped(txq) && above_thresh) { + netif_tx_wake_queue(txq); + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.queue_wakeup++; + u64_stats_update_end(&tx_ring->syncp); + } + __netif_tx_unlock(txq); + } + + tx_ring->per_napi_bytes += tx_bytes; + tx_ring->per_napi_packets += tx_pkts; + + return tx_pkts; +} + +static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, + struct ena_com_rx_buf_info *ena_bufs, + u32 descs, + u16 *next_to_clean) +{ + struct sk_buff *skb; + struct ena_rx_buffer *rx_info = + &rx_ring->rx_buffer_info[*next_to_clean]; + u32 len; + u32 buf = 0; + void *va; + + len = ena_bufs[0].len; + if (unlikely(!rx_info->page)) { + netif_err(rx_ring->adapter, rx_err, rx_ring->netdev, + "Page is NULL\n"); + return NULL; + } + + netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, + "rx_info %p page %p\n", + rx_info, rx_info->page); + + /* save virt address of first buffer */ + va = page_address(rx_info->page) + rx_info->page_offset; + prefetch(va + NET_IP_ALIGN); + + if (len <= rx_ring->rx_copybreak) { + skb = netdev_alloc_skb_ip_align(rx_ring->netdev, + rx_ring->rx_copybreak); + if (unlikely(!skb)) { + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->rx_stats.skb_alloc_fail++; + u64_stats_update_end(&rx_ring->syncp); + netif_err(rx_ring->adapter, rx_err, rx_ring->netdev, + "Failed to allocate skb\n"); + return NULL; + } + + netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, + "rx allocated small packet. len %d. data_len %d\n", + skb->len, skb->data_len); + + /* sync this buffer for CPU use */ + dma_sync_single_for_cpu(rx_ring->dev, + dma_unmap_addr(&rx_info->ena_buf, paddr), + len, + DMA_FROM_DEVICE); + skb_copy_to_linear_data(skb, va, len); + dma_sync_single_for_device(rx_ring->dev, + dma_unmap_addr(&rx_info->ena_buf, paddr), + len, + DMA_FROM_DEVICE); + + skb_put(skb, len); + skb->protocol = eth_type_trans(skb, rx_ring->netdev); + *next_to_clean = ENA_RX_RING_IDX_ADD(*next_to_clean, descs, + rx_ring->ring_size); + return skb; + } + + skb = napi_get_frags(rx_ring->napi); + if (unlikely(!skb)) { + netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, + "Failed allocating skb\n"); + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->rx_stats.skb_alloc_fail++; + u64_stats_update_end(&rx_ring->syncp); + return NULL; + } + + do { + dma_unmap_page(rx_ring->dev, + dma_unmap_addr(&rx_info->ena_buf, paddr), + PAGE_SIZE, DMA_FROM_DEVICE); + + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_info->page, + rx_info->page_offset, len, PAGE_SIZE); + + netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, + "rx skb updated. len %d. data_len %d\n", + skb->len, skb->data_len); + + rx_info->page = NULL; + *next_to_clean = + ENA_RX_RING_IDX_NEXT(*next_to_clean, + rx_ring->ring_size); + if (likely(--descs == 0)) + break; + rx_info = &rx_ring->rx_buffer_info[*next_to_clean]; + len = ena_bufs[++buf].len; + } while (1); + + return skb; +} + +/* ena_rx_checksum - indicate in skb if hw indicated a good cksum + * @adapter: structure containing adapter specific data + * @ena_rx_ctx: received packet context/metadata + * @skb: skb currently being received and modified + */ +static inline void ena_rx_checksum(struct ena_ring *rx_ring, + struct ena_com_rx_ctx *ena_rx_ctx, + struct sk_buff *skb) +{ + /* Rx csum disabled */ + if (unlikely(!(rx_ring->netdev->features & NETIF_F_RXCSUM))) { + skb->ip_summed = CHECKSUM_NONE; + return; + } + + /* For fragmented packets the checksum isn't valid */ + if (ena_rx_ctx->frag) { + skb->ip_summed = CHECKSUM_NONE; + return; + } + + /* if IP and error */ + if (unlikely((ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) && + (ena_rx_ctx->l3_csum_err))) { + /* ipv4 checksum error */ + skb->ip_summed = CHECKSUM_NONE; + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->rx_stats.bad_csum++; + u64_stats_update_end(&rx_ring->syncp); + netif_err(rx_ring->adapter, rx_err, rx_ring->netdev, + "RX IPv4 header checksum error\n"); + return; + } + + /* if TCP/UDP */ + if (likely((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) || + (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP))) { + if (unlikely(ena_rx_ctx->l4_csum_err)) { + /* TCP/UDP checksum error */ + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->rx_stats.bad_csum++; + u64_stats_update_end(&rx_ring->syncp); + netif_err(rx_ring->adapter, rx_err, rx_ring->netdev, + "RX L4 checksum error\n"); + skb->ip_summed = CHECKSUM_NONE; + return; + } + + skb->ip_summed = CHECKSUM_UNNECESSARY; + } +} + +static void ena_set_rx_hash(struct ena_ring *rx_ring, + struct ena_com_rx_ctx *ena_rx_ctx, + struct sk_buff *skb) +{ + enum pkt_hash_types hash_type; + + if (likely(rx_ring->netdev->features & NETIF_F_RXHASH)) { + if (likely((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) || + (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP))) + + hash_type = PKT_HASH_TYPE_L4; + else + hash_type = PKT_HASH_TYPE_NONE; + + /* Override hash type if the packet is fragmented */ + if (ena_rx_ctx->frag) + hash_type = PKT_HASH_TYPE_NONE; + + skb_set_hash(skb, ena_rx_ctx->hash, hash_type); + } +} + +/* ena_clean_rx_irq - Cleanup RX irq + * @rx_ring: RX ring to clean + * @napi: napi handler + * @budget: how many packets driver is allowed to clean + * + * Returns the number of cleaned buffers. + */ +static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, + u32 budget) +{ + u16 next_to_clean = rx_ring->next_to_clean; + u32 res_budget, work_done; + + struct ena_com_rx_ctx ena_rx_ctx; + struct ena_adapter *adapter; + struct sk_buff *skb; + int refill_required; + int refill_threshold; + int rc = 0; + int total_len = 0; + int rx_copybreak_pkt = 0; + + netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, + "%s qid %d\n", __func__, rx_ring->qid); + res_budget = budget; + + do { + ena_rx_ctx.ena_bufs = rx_ring->ena_bufs; + ena_rx_ctx.max_bufs = rx_ring->sgl_size; + ena_rx_ctx.descs = 0; + rc = ena_com_rx_pkt(rx_ring->ena_com_io_cq, + rx_ring->ena_com_io_sq, + &ena_rx_ctx); + if (unlikely(rc)) + goto error; + + if (unlikely(ena_rx_ctx.descs == 0)) + break; + + netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, + "rx_poll: q %d got packet from ena. descs #: %d l3 proto %d l4 proto %d hash: %x\n", + rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto, + ena_rx_ctx.l4_proto, ena_rx_ctx.hash); + + /* allocate skb and fill it */ + skb = ena_rx_skb(rx_ring, rx_ring->ena_bufs, ena_rx_ctx.descs, + &next_to_clean); + + /* exit if we failed to retrieve a buffer */ + if (unlikely(!skb)) { + next_to_clean = ENA_RX_RING_IDX_ADD(next_to_clean, + ena_rx_ctx.descs, + rx_ring->ring_size); + break; + } + + ena_rx_checksum(rx_ring, &ena_rx_ctx, skb); + + ena_set_rx_hash(rx_ring, &ena_rx_ctx, skb); + + skb_record_rx_queue(skb, rx_ring->qid); + + if (rx_ring->ena_bufs[0].len <= rx_ring->rx_copybreak) { + total_len += rx_ring->ena_bufs[0].len; + rx_copybreak_pkt++; + napi_gro_receive(napi, skb); + } else { + total_len += skb->len; + napi_gro_frags(napi); + } + + res_budget--; + } while (likely(res_budget)); + + work_done = budget - res_budget; + rx_ring->per_napi_bytes += total_len; + rx_ring->per_napi_packets += work_done; + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->rx_stats.bytes += total_len; + rx_ring->rx_stats.cnt += work_done; + rx_ring->rx_stats.rx_copybreak_pkt += rx_copybreak_pkt; + u64_stats_update_end(&rx_ring->syncp); + + rx_ring->next_to_clean = next_to_clean; + + refill_required = ena_com_sq_empty_space(rx_ring->ena_com_io_sq); + refill_threshold = rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER; + + /* Optimization, try to batch new rx buffers */ + if (refill_required > refill_threshold) { + ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq); + ena_refill_rx_bufs(rx_ring, refill_required); + } + + return work_done; + +error: + adapter = netdev_priv(rx_ring->netdev); + + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->rx_stats.bad_desc_num++; + u64_stats_update_end(&rx_ring->syncp); + + /* Too many desc from the device. Trigger reset */ + set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); + + return 0; +} + +inline void ena_adjust_intr_moderation(struct ena_ring *rx_ring, + struct ena_ring *tx_ring) +{ + /* We apply adaptive moderation on Rx path only. + * Tx uses static interrupt moderation. + */ + ena_com_calculate_interrupt_delay(rx_ring->ena_dev, + rx_ring->per_napi_packets, + rx_ring->per_napi_bytes, + &rx_ring->smoothed_interval, + &rx_ring->moder_tbl_idx); + + /* Reset per napi packets/bytes */ + tx_ring->per_napi_packets = 0; + tx_ring->per_napi_bytes = 0; + rx_ring->per_napi_packets = 0; + rx_ring->per_napi_bytes = 0; +} + +static inline void ena_update_ring_numa_node(struct ena_ring *tx_ring, + struct ena_ring *rx_ring) +{ + int cpu = get_cpu(); + int numa_node; + + /* Check only one ring since the 2 rings are running on the same cpu */ + if (likely(tx_ring->cpu == cpu)) + goto out; + + numa_node = cpu_to_node(cpu); + put_cpu(); + + if (numa_node != NUMA_NO_NODE) { + ena_com_update_numa_node(tx_ring->ena_com_io_cq, numa_node); + ena_com_update_numa_node(rx_ring->ena_com_io_cq, numa_node); + } + + tx_ring->cpu = cpu; + rx_ring->cpu = cpu; + + return; +out: + put_cpu(); +} + +static int ena_io_poll(struct napi_struct *napi, int budget) +{ + struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi); + struct ena_ring *tx_ring, *rx_ring; + struct ena_eth_io_intr_reg intr_reg; + + u32 tx_work_done; + u32 rx_work_done; + int tx_budget; + int napi_comp_call = 0; + int ret; + + tx_ring = ena_napi->tx_ring; + rx_ring = ena_napi->rx_ring; + + tx_budget = tx_ring->ring_size / ENA_TX_POLL_BUDGET_DIVIDER; + + if (!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags)) { + napi_complete_done(napi, 0); + return 0; + } + + tx_work_done = ena_clean_tx_irq(tx_ring, tx_budget); + rx_work_done = ena_clean_rx_irq(rx_ring, napi, budget); + + if ((budget > rx_work_done) && (tx_budget > tx_work_done)) { + napi_complete_done(napi, rx_work_done); + + napi_comp_call = 1; + /* Tx and Rx share the same interrupt vector */ + if (ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev)) + ena_adjust_intr_moderation(rx_ring, tx_ring); + + /* Update intr register: rx intr delay, tx intr delay and + * interrupt unmask + */ + ena_com_update_intr_reg(&intr_reg, + rx_ring->smoothed_interval, + tx_ring->smoothed_interval, + true); + + /* It is a shared MSI-X. Tx and Rx CQ have pointer to it. + * So we use one of them to reach the intr reg + */ + ena_com_unmask_intr(rx_ring->ena_com_io_cq, &intr_reg); + + ena_update_ring_numa_node(tx_ring, rx_ring); + + ret = rx_work_done; + } else { + ret = budget; + } + + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.napi_comp += napi_comp_call; + tx_ring->tx_stats.tx_poll++; + u64_stats_update_end(&tx_ring->syncp); + + return ret; +} + +static irqreturn_t ena_intr_msix_mgmnt(int irq, void *data) +{ + struct ena_adapter *adapter = (struct ena_adapter *)data; + + ena_com_admin_q_comp_intr_handler(adapter->ena_dev); + + /* Don't call the aenq handler before probe is done */ + if (likely(test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags))) + ena_com_aenq_intr_handler(adapter->ena_dev, data); + + return IRQ_HANDLED; +} + +/* ena_intr_msix_io - MSI-X Interrupt Handler for Tx/Rx + * @irq: interrupt number + * @data: pointer to a network interface private napi device structure + */ +static irqreturn_t ena_intr_msix_io(int irq, void *data) +{ + struct ena_napi *ena_napi = data; + + napi_schedule(&ena_napi->napi); + + return IRQ_HANDLED; +} + +static int ena_enable_msix(struct ena_adapter *adapter, int num_queues) +{ + int i, msix_vecs, rc; + + if (test_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) { + netif_err(adapter, probe, adapter->netdev, + "Error, MSI-X is already enabled\n"); + return -EPERM; + } + + /* Reserved the max msix vectors we might need */ + msix_vecs = ENA_MAX_MSIX_VEC(num_queues); + + netif_dbg(adapter, probe, adapter->netdev, + "trying to enable MSI-X, vectors %d\n", msix_vecs); + + adapter->msix_entries = vzalloc(msix_vecs * sizeof(struct msix_entry)); + + if (!adapter->msix_entries) + return -ENOMEM; + + for (i = 0; i < msix_vecs; i++) + adapter->msix_entries[i].entry = i; + + rc = pci_enable_msix(adapter->pdev, adapter->msix_entries, msix_vecs); + if (rc != 0) { + netif_err(adapter, probe, adapter->netdev, + "Failed to enable MSI-X, vectors %d rc %d\n", + msix_vecs, rc); + return -ENOSPC; + } + + netif_dbg(adapter, probe, adapter->netdev, "enable MSI-X, vectors %d\n", + msix_vecs); + + if (msix_vecs >= 1) { + if (ena_init_rx_cpu_rmap(adapter)) + netif_warn(adapter, probe, adapter->netdev, + "Failed to map IRQs to CPUs\n"); + } + + adapter->msix_vecs = msix_vecs; + set_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags); + + return 0; +} + +static void ena_setup_mgmnt_intr(struct ena_adapter *adapter) +{ + u32 cpu; + + snprintf(adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].name, + ENA_IRQNAME_SIZE, "ena-mgmnt@pci:%s", + pci_name(adapter->pdev)); + adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].handler = + ena_intr_msix_mgmnt; + adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].data = adapter; + adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].vector = + adapter->msix_entries[ENA_MGMNT_IRQ_IDX].vector; + cpu = cpumask_first(cpu_online_mask); + adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].cpu = cpu; + cpumask_set_cpu(cpu, + &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].affinity_hint_mask); +} + +static void ena_setup_io_intr(struct ena_adapter *adapter) +{ + struct net_device *netdev; + int irq_idx, i, cpu; + + netdev = adapter->netdev; + + for (i = 0; i < adapter->num_queues; i++) { + irq_idx = ENA_IO_IRQ_IDX(i); + cpu = i % num_online_cpus(); + + snprintf(adapter->irq_tbl[irq_idx].name, ENA_IRQNAME_SIZE, + "%s-Tx-Rx-%d", netdev->name, i); + adapter->irq_tbl[irq_idx].handler = ena_intr_msix_io; + adapter->irq_tbl[irq_idx].data = &adapter->ena_napi[i]; + adapter->irq_tbl[irq_idx].vector = + adapter->msix_entries[irq_idx].vector; + adapter->irq_tbl[irq_idx].cpu = cpu; + + cpumask_set_cpu(cpu, + &adapter->irq_tbl[irq_idx].affinity_hint_mask); + } +} + +static int ena_request_mgmnt_irq(struct ena_adapter *adapter) +{ + unsigned long flags = 0; + struct ena_irq *irq; + int rc; + + irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX]; + rc = request_irq(irq->vector, irq->handler, flags, irq->name, + irq->data); + if (rc) { + netif_err(adapter, probe, adapter->netdev, + "failed to request admin irq\n"); + return rc; + } + + netif_dbg(adapter, probe, adapter->netdev, + "set affinity hint of mgmnt irq.to 0x%lx (irq vector: %d)\n", + irq->affinity_hint_mask.bits[0], irq->vector); + + irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask); + + return rc; +} + +static int ena_request_io_irq(struct ena_adapter *adapter) +{ + unsigned long flags = 0; + struct ena_irq *irq; + int rc = 0, i, k; + + if (!test_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) { + netif_err(adapter, ifup, adapter->netdev, + "Failed to request I/O IRQ: MSI-X is not enabled\n"); + return -EINVAL; + } + + for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) { + irq = &adapter->irq_tbl[i]; + rc = request_irq(irq->vector, irq->handler, flags, irq->name, + irq->data); + if (rc) { + netif_err(adapter, ifup, adapter->netdev, + "Failed to request I/O IRQ. index %d rc %d\n", + i, rc); + goto err; + } + + netif_dbg(adapter, ifup, adapter->netdev, + "set affinity hint of irq. index %d to 0x%lx (irq vector: %d)\n", + i, irq->affinity_hint_mask.bits[0], irq->vector); + + irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask); + } + + return rc; + +err: + for (k = ENA_IO_IRQ_FIRST_IDX; k < i; k++) { + irq = &adapter->irq_tbl[k]; + free_irq(irq->vector, irq->data); + } + + return rc; +} + +static void ena_free_mgmnt_irq(struct ena_adapter *adapter) +{ + struct ena_irq *irq; + + irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX]; + synchronize_irq(irq->vector); + irq_set_affinity_hint(irq->vector, NULL); + free_irq(irq->vector, irq->data); +} + +static void ena_free_io_irq(struct ena_adapter *adapter) +{ + struct ena_irq *irq; + int i; + +#ifdef CONFIG_RFS_ACCEL + if (adapter->msix_vecs >= 1) { + free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap); + adapter->netdev->rx_cpu_rmap = NULL; + } +#endif /* CONFIG_RFS_ACCEL */ + + for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) { + irq = &adapter->irq_tbl[i]; + irq_set_affinity_hint(irq->vector, NULL); + free_irq(irq->vector, irq->data); + } +} + +static void ena_disable_msix(struct ena_adapter *adapter) +{ + if (test_and_clear_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) + pci_disable_msix(adapter->pdev); + + if (adapter->msix_entries) + vfree(adapter->msix_entries); + adapter->msix_entries = NULL; +} + +static void ena_disable_io_intr_sync(struct ena_adapter *adapter) +{ + int i; + + if (!netif_running(adapter->netdev)) + return; + + for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) + synchronize_irq(adapter->irq_tbl[i].vector); +} + +static void ena_del_napi(struct ena_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_queues; i++) + netif_napi_del(&adapter->ena_napi[i].napi); +} + +static void ena_init_napi(struct ena_adapter *adapter) +{ + struct ena_napi *napi; + int i; + + for (i = 0; i < adapter->num_queues; i++) { + napi = &adapter->ena_napi[i]; + + netif_napi_add(adapter->netdev, + &adapter->ena_napi[i].napi, + ena_io_poll, + ENA_NAPI_BUDGET); + napi->rx_ring = &adapter->rx_ring[i]; + napi->tx_ring = &adapter->tx_ring[i]; + napi->qid = i; + } +} + +static void ena_napi_disable_all(struct ena_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_queues; i++) + napi_disable(&adapter->ena_napi[i].napi); +} + +static void ena_napi_enable_all(struct ena_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_queues; i++) + napi_enable(&adapter->ena_napi[i].napi); +} + +static void ena_restore_ethtool_params(struct ena_adapter *adapter) +{ + adapter->tx_usecs = 0; + adapter->rx_usecs = 0; + adapter->tx_frames = 1; + adapter->rx_frames = 1; +} + +/* Configure the Rx forwarding */ +static int ena_rss_configure(struct ena_adapter *adapter) +{ + struct ena_com_dev *ena_dev = adapter->ena_dev; + int rc; + + /* In case the RSS table wasn't initialized by probe */ + if (!ena_dev->rss.tbl_log_size) { + rc = ena_rss_init_default(adapter); + if (rc && (rc != -EPERM)) { + netif_err(adapter, ifup, adapter->netdev, + "Failed to init RSS rc: %d\n", rc); + return rc; + } + } + + /* Set indirect table */ + rc = ena_com_indirect_table_set(ena_dev); + if (unlikely(rc && rc != -EPERM)) + return rc; + + /* Configure hash function (if supported) */ + rc = ena_com_set_hash_function(ena_dev); + if (unlikely(rc && (rc != -EPERM))) + return rc; + + /* Configure hash inputs (if supported) */ + rc = ena_com_set_hash_ctrl(ena_dev); + if (unlikely(rc && (rc != -EPERM))) + return rc; + + return 0; +} + +static int ena_up_complete(struct ena_adapter *adapter) +{ + int rc, i; + + rc = ena_rss_configure(adapter); + if (rc) + return rc; + + ena_init_napi(adapter); + + ena_change_mtu(adapter->netdev, adapter->netdev->mtu); + + ena_refill_all_rx_bufs(adapter); + + /* enable transmits */ + netif_tx_start_all_queues(adapter->netdev); + + ena_restore_ethtool_params(adapter); + + ena_napi_enable_all(adapter); + + /* schedule napi in case we had pending packets + * from the last time we disable napi + */ + for (i = 0; i < adapter->num_queues; i++) + napi_schedule(&adapter->ena_napi[i].napi); + + return 0; +} + +static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid) +{ + struct ena_com_create_io_ctx ctx = { 0 }; + struct ena_com_dev *ena_dev; + struct ena_ring *tx_ring; + u32 msix_vector; + u16 ena_qid; + int rc; + + ena_dev = adapter->ena_dev; + + tx_ring = &adapter->tx_ring[qid]; + msix_vector = ENA_IO_IRQ_IDX(qid); + ena_qid = ENA_IO_TXQ_IDX(qid); + + ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX; + ctx.qid = ena_qid; + ctx.mem_queue_type = ena_dev->tx_mem_queue_type; + ctx.msix_vector = msix_vector; + ctx.queue_size = adapter->tx_ring_size; + ctx.numa_node = cpu_to_node(tx_ring->cpu); + + rc = ena_com_create_io_queue(ena_dev, &ctx); + if (rc) { + netif_err(adapter, ifup, adapter->netdev, + "Failed to create I/O TX queue num %d rc: %d\n", + qid, rc); + return rc; + } + + rc = ena_com_get_io_handlers(ena_dev, ena_qid, + &tx_ring->ena_com_io_sq, + &tx_ring->ena_com_io_cq); + if (rc) { + netif_err(adapter, ifup, adapter->netdev, + "Failed to get TX queue handlers. TX queue num %d rc: %d\n", + qid, rc); + ena_com_destroy_io_queue(ena_dev, ena_qid); + } + + ena_com_update_numa_node(tx_ring->ena_com_io_cq, ctx.numa_node); + return rc; +} + +static int ena_create_all_io_tx_queues(struct ena_adapter *adapter) +{ + struct ena_com_dev *ena_dev = adapter->ena_dev; + int rc, i; + + for (i = 0; i < adapter->num_queues; i++) { + rc = ena_create_io_tx_queue(adapter, i); + if (rc) + goto create_err; + } + + return 0; + +create_err: + while (i--) + ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(i)); + + return rc; +} + +static int ena_create_io_rx_queue(struct ena_adapter *adapter, int qid) +{ + struct ena_com_dev *ena_dev; + struct ena_com_create_io_ctx ctx = { 0 }; + struct ena_ring *rx_ring; + u32 msix_vector; + u16 ena_qid; + int rc; + + ena_dev = adapter->ena_dev; + + rx_ring = &adapter->rx_ring[qid]; + msix_vector = ENA_IO_IRQ_IDX(qid); + ena_qid = ENA_IO_RXQ_IDX(qid); + + ctx.qid = ena_qid; + ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX; + ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; + ctx.msix_vector = msix_vector; + ctx.queue_size = adapter->rx_ring_size; + ctx.numa_node = cpu_to_node(rx_ring->cpu); + + rc = ena_com_create_io_queue(ena_dev, &ctx); + if (rc) { + netif_err(adapter, ifup, adapter->netdev, + "Failed to create I/O RX queue num %d rc: %d\n", + qid, rc); + return rc; + } + + rc = ena_com_get_io_handlers(ena_dev, ena_qid, + &rx_ring->ena_com_io_sq, + &rx_ring->ena_com_io_cq); + if (rc) { + netif_err(adapter, ifup, adapter->netdev, + "Failed to get RX queue handlers. RX queue num %d rc: %d\n", + qid, rc); + ena_com_destroy_io_queue(ena_dev, ena_qid); + } + + ena_com_update_numa_node(rx_ring->ena_com_io_cq, ctx.numa_node); + + return rc; +} + +static int ena_create_all_io_rx_queues(struct ena_adapter *adapter) +{ + struct ena_com_dev *ena_dev = adapter->ena_dev; + int rc, i; + + for (i = 0; i < adapter->num_queues; i++) { + rc = ena_create_io_rx_queue(adapter, i); + if (rc) + goto create_err; + } + + return 0; + +create_err: + while (i--) + ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(i)); + + return rc; +} + +static int ena_up(struct ena_adapter *adapter) +{ + int rc; + + netdev_dbg(adapter->netdev, "%s\n", __func__); + + ena_setup_io_intr(adapter); + + rc = ena_request_io_irq(adapter); + if (rc) + goto err_req_irq; + + /* allocate transmit descriptors */ + rc = ena_setup_all_tx_resources(adapter); + if (rc) + goto err_setup_tx; + + /* allocate receive descriptors */ + rc = ena_setup_all_rx_resources(adapter); + if (rc) + goto err_setup_rx; + + /* Create TX queues */ + rc = ena_create_all_io_tx_queues(adapter); + if (rc) + goto err_create_tx_queues; + + /* Create RX queues */ + rc = ena_create_all_io_rx_queues(adapter); + if (rc) + goto err_create_rx_queues; + + rc = ena_up_complete(adapter); + if (rc) + goto err_up; + + if (test_bit(ENA_FLAG_LINK_UP, &adapter->flags)) + netif_carrier_on(adapter->netdev); + + u64_stats_update_begin(&adapter->syncp); + adapter->dev_stats.interface_up++; + u64_stats_update_end(&adapter->syncp); + + set_bit(ENA_FLAG_DEV_UP, &adapter->flags); + + return rc; + +err_up: + ena_destroy_all_rx_queues(adapter); +err_create_rx_queues: + ena_destroy_all_tx_queues(adapter); +err_create_tx_queues: + ena_free_all_io_rx_resources(adapter); +err_setup_rx: + ena_free_all_io_tx_resources(adapter); +err_setup_tx: + ena_free_io_irq(adapter); +err_req_irq: + + return rc; +} + +static void ena_down(struct ena_adapter *adapter) +{ + netif_info(adapter, ifdown, adapter->netdev, "%s\n", __func__); + + clear_bit(ENA_FLAG_DEV_UP, &adapter->flags); + + u64_stats_update_begin(&adapter->syncp); + adapter->dev_stats.interface_down++; + u64_stats_update_end(&adapter->syncp); + + /* After this point the napi handler won't enable the tx queue */ + ena_napi_disable_all(adapter); + netif_carrier_off(adapter->netdev); + netif_tx_disable(adapter->netdev); + + /* After destroy the queue there won't be any new interrupts */ + ena_destroy_all_io_queues(adapter); + + ena_disable_io_intr_sync(adapter); + ena_free_io_irq(adapter); + ena_del_napi(adapter); + + ena_free_all_tx_bufs(adapter); + ena_free_all_rx_bufs(adapter); + ena_free_all_io_tx_resources(adapter); + ena_free_all_io_rx_resources(adapter); +} + +/* ena_open - Called when a network interface is made active + * @netdev: network interface device structure + * + * Returns 0 on success, negative value on failure + * + * The open entry point is called when a network interface is made + * active by the system (IFF_UP). At this point all resources needed + * for transmit and receive operations are allocated, the interrupt + * handler is registered with the OS, the watchdog timer is started, + * and the stack is notified that the interface is ready. + */ +static int ena_open(struct net_device *netdev) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + int rc; + + /* Notify the stack of the actual queue counts. */ + rc = netif_set_real_num_tx_queues(netdev, adapter->num_queues); + if (rc) { + netif_err(adapter, ifup, netdev, "Can't set num tx queues\n"); + return rc; + } + + rc = netif_set_real_num_rx_queues(netdev, adapter->num_queues); + if (rc) { + netif_err(adapter, ifup, netdev, "Can't set num rx queues\n"); + return rc; + } + + rc = ena_up(adapter); + if (rc) + return rc; + + return rc; +} + +/* ena_close - Disables a network interface + * @netdev: network interface device structure + * + * Returns 0, this is not allowed to fail + * + * The close entry point is called when an interface is de-activated + * by the OS. The hardware is still under the drivers control, but + * needs to be disabled. A global MAC reset is issued to stop the + * hardware, and all transmit and receive resources are freed. + */ +static int ena_close(struct net_device *netdev) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + + netif_dbg(adapter, ifdown, netdev, "%s\n", __func__); + + if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) + ena_down(adapter); + + return 0; +} + +static void ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx, struct sk_buff *skb) +{ + u32 mss = skb_shinfo(skb)->gso_size; + struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta; + u8 l4_protocol = 0; + + if ((skb->ip_summed == CHECKSUM_PARTIAL) || mss) { + ena_tx_ctx->l4_csum_enable = 1; + if (mss) { + ena_tx_ctx->tso_enable = 1; + ena_meta->l4_hdr_len = tcp_hdr(skb)->doff; + ena_tx_ctx->l4_csum_partial = 0; + } else { + ena_tx_ctx->tso_enable = 0; + ena_meta->l4_hdr_len = 0; + ena_tx_ctx->l4_csum_partial = 1; + } + + switch (ip_hdr(skb)->version) { + case IPVERSION: + ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4; + if (ip_hdr(skb)->frag_off & htons(IP_DF)) + ena_tx_ctx->df = 1; + if (mss) + ena_tx_ctx->l3_csum_enable = 1; + l4_protocol = ip_hdr(skb)->protocol; + break; + case 6: + ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6; + l4_protocol = ipv6_hdr(skb)->nexthdr; + break; + default: + break; + } + + if (l4_protocol == IPPROTO_TCP) + ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP; + else + ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP; + + ena_meta->mss = mss; + ena_meta->l3_hdr_len = skb_network_header_len(skb); + ena_meta->l3_hdr_offset = skb_network_offset(skb); + ena_tx_ctx->meta_valid = 1; + + } else { + ena_tx_ctx->meta_valid = 0; + } +} + +static int ena_check_and_linearize_skb(struct ena_ring *tx_ring, + struct sk_buff *skb) +{ + int num_frags, header_len, rc; + + num_frags = skb_shinfo(skb)->nr_frags; + header_len = skb_headlen(skb); + + if (num_frags < tx_ring->sgl_size) + return 0; + + if ((num_frags == tx_ring->sgl_size) && + (header_len < tx_ring->tx_max_header_size)) + return 0; + + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.linearize++; + u64_stats_update_end(&tx_ring->syncp); + + rc = skb_linearize(skb); + if (unlikely(rc)) { + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.linearize_failed++; + u64_stats_update_end(&tx_ring->syncp); + } + + return rc; +} + +/* Called with netif_tx_lock. */ +static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct ena_adapter *adapter = netdev_priv(dev); + struct ena_tx_buffer *tx_info; + struct ena_com_tx_ctx ena_tx_ctx; + struct ena_ring *tx_ring; + struct netdev_queue *txq; + struct ena_com_buf *ena_buf; + void *push_hdr; + u32 len, last_frag; + u16 next_to_use; + u16 req_id; + u16 push_len; + u16 header_len; + dma_addr_t dma; + int qid, rc, nb_hw_desc; + int i = -1; + + netif_dbg(adapter, tx_queued, dev, "%s skb %p\n", __func__, skb); + /* Determine which tx ring we will be placed on */ + qid = skb_get_queue_mapping(skb); + tx_ring = &adapter->tx_ring[qid]; + txq = netdev_get_tx_queue(dev, qid); + + rc = ena_check_and_linearize_skb(tx_ring, skb); + if (unlikely(rc)) + goto error_drop_packet; + + skb_tx_timestamp(skb); + len = skb_headlen(skb); + + next_to_use = tx_ring->next_to_use; + req_id = tx_ring->free_tx_ids[next_to_use]; + tx_info = &tx_ring->tx_buffer_info[req_id]; + tx_info->num_of_bufs = 0; + + WARN(tx_info->skb, "SKB isn't NULL req_id %d\n", req_id); + ena_buf = tx_info->bufs; + tx_info->skb = skb; + + if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { + /* prepared the push buffer */ + push_len = min_t(u32, len, tx_ring->tx_max_header_size); + header_len = push_len; + push_hdr = skb->data; + } else { + push_len = 0; + header_len = min_t(u32, len, tx_ring->tx_max_header_size); + push_hdr = NULL; + } + + netif_dbg(adapter, tx_queued, dev, + "skb: %p header_buf->vaddr: %p push_len: %d\n", skb, + push_hdr, push_len); + + if (len > push_len) { + dma = dma_map_single(tx_ring->dev, skb->data + push_len, + len - push_len, DMA_TO_DEVICE); + if (dma_mapping_error(tx_ring->dev, dma)) + goto error_report_dma_error; + + ena_buf->paddr = dma; + ena_buf->len = len - push_len; + + ena_buf++; + tx_info->num_of_bufs++; + } + + last_frag = skb_shinfo(skb)->nr_frags; + + for (i = 0; i < last_frag; i++) { + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + + len = skb_frag_size(frag); + dma = skb_frag_dma_map(tx_ring->dev, frag, 0, len, + DMA_TO_DEVICE); + if (dma_mapping_error(tx_ring->dev, dma)) + goto error_report_dma_error; + + ena_buf->paddr = dma; + ena_buf->len = len; + ena_buf++; + } + + tx_info->num_of_bufs += last_frag; + + memset(&ena_tx_ctx, 0x0, sizeof(struct ena_com_tx_ctx)); + ena_tx_ctx.ena_bufs = tx_info->bufs; + ena_tx_ctx.push_header = push_hdr; + ena_tx_ctx.num_bufs = tx_info->num_of_bufs; + ena_tx_ctx.req_id = req_id; + ena_tx_ctx.header_len = header_len; + + /* set flags and meta data */ + ena_tx_csum(&ena_tx_ctx, skb); + + /* prepare the packet's descriptors to dma engine */ + rc = ena_com_prepare_tx(tx_ring->ena_com_io_sq, &ena_tx_ctx, + &nb_hw_desc); + + if (unlikely(rc)) { + netif_err(adapter, tx_queued, dev, + "failed to prepare tx bufs\n"); + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.queue_stop++; + tx_ring->tx_stats.prepare_ctx_err++; + u64_stats_update_end(&tx_ring->syncp); + netif_tx_stop_queue(txq); + goto error_unmap_dma; + } + + netdev_tx_sent_queue(txq, skb->len); + + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.cnt++; + tx_ring->tx_stats.bytes += skb->len; + u64_stats_update_end(&tx_ring->syncp); + + tx_info->tx_descs = nb_hw_desc; + tx_info->last_jiffies = jiffies; + + tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use, + tx_ring->ring_size); + + /* This WMB is aimed to: + * 1 - perform smp barrier before reading next_to_completion + * 2 - make sure the desc were written before trigger DB + */ + wmb(); + + /* stop the queue when no more space available, the packet can have up + * to sgl_size + 2. one for the meta descriptor and one for header + * (if the header is larger than tx_max_header_size). + */ + if (unlikely(ena_com_sq_empty_space(tx_ring->ena_com_io_sq) < + (tx_ring->sgl_size + 2))) { + netif_dbg(adapter, tx_queued, dev, "%s stop queue %d\n", + __func__, qid); + + netif_tx_stop_queue(txq); + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.queue_stop++; + u64_stats_update_end(&tx_ring->syncp); + + /* There is a rare condition where this function decide to + * stop the queue but meanwhile clean_tx_irq updates + * next_to_completion and terminates. + * The queue will remain stopped forever. + * To solve this issue this function perform rmb, check + * the wakeup condition and wake up the queue if needed. + */ + smp_rmb(); + + if (ena_com_sq_empty_space(tx_ring->ena_com_io_sq) + > ENA_TX_WAKEUP_THRESH) { + netif_tx_wake_queue(txq); + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.queue_wakeup++; + u64_stats_update_end(&tx_ring->syncp); + } + } + + if (netif_xmit_stopped(txq) || !skb->xmit_more) { + /* trigger the dma engine */ + ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.doorbells++; + u64_stats_update_end(&tx_ring->syncp); + } + + return NETDEV_TX_OK; + +error_report_dma_error: + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.dma_mapping_err++; + u64_stats_update_end(&tx_ring->syncp); + netdev_warn(adapter->netdev, "failed to map skb\n"); + + tx_info->skb = NULL; + +error_unmap_dma: + if (i >= 0) { + /* save value of frag that failed */ + last_frag = i; + + /* start back at beginning and unmap skb */ + tx_info->skb = NULL; + ena_buf = tx_info->bufs; + dma_unmap_single(tx_ring->dev, dma_unmap_addr(ena_buf, paddr), + dma_unmap_len(ena_buf, len), DMA_TO_DEVICE); + + /* unmap remaining mapped pages */ + for (i = 0; i < last_frag; i++) { + ena_buf++; + dma_unmap_page(tx_ring->dev, dma_unmap_addr(ena_buf, paddr), + dma_unmap_len(ena_buf, len), DMA_TO_DEVICE); + } + } + +error_drop_packet: + + dev_kfree_skb(skb); + return NETDEV_TX_OK; +} + +#ifdef CONFIG_NET_POLL_CONTROLLER +static void ena_netpoll(struct net_device *netdev) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + int i; + + for (i = 0; i < adapter->num_queues; i++) + napi_schedule(&adapter->ena_napi[i].napi); +} +#endif /* CONFIG_NET_POLL_CONTROLLER */ + +static u16 ena_select_queue(struct net_device *dev, struct sk_buff *skb, + void *accel_priv, select_queue_fallback_t fallback) +{ + u16 qid; + /* we suspect that this is good for in--kernel network services that + * want to loop incoming skb rx to tx in normal user generated traffic, + * most probably we will not get to this + */ + if (skb_rx_queue_recorded(skb)) + qid = skb_get_rx_queue(skb); + else + qid = fallback(dev, skb); + + return qid; +} + +static void ena_config_host_info(struct ena_com_dev *ena_dev) +{ + struct ena_admin_host_info *host_info; + int rc; + + /* Allocate only the host info */ + rc = ena_com_allocate_host_info(ena_dev); + if (rc) { + pr_err("Cannot allocate host info\n"); + return; + } + + host_info = ena_dev->host_attr.host_info; + + host_info->os_type = ENA_ADMIN_OS_LINUX; + host_info->kernel_ver = LINUX_VERSION_CODE; + strncpy(host_info->kernel_ver_str, utsname()->version, + sizeof(host_info->kernel_ver_str) - 1); + host_info->os_dist = 0; + strncpy(host_info->os_dist_str, utsname()->release, + sizeof(host_info->os_dist_str) - 1); + host_info->driver_version = + (DRV_MODULE_VER_MAJOR) | + (DRV_MODULE_VER_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) | + (DRV_MODULE_VER_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT); + + rc = ena_com_set_host_attributes(ena_dev); + if (rc) { + if (rc == -EPERM) + pr_warn("Cannot set host attributes\n"); + else + pr_err("Cannot set host attributes\n"); + + goto err; + } + + return; + +err: + ena_com_delete_host_info(ena_dev); +} + +static void ena_config_debug_area(struct ena_adapter *adapter) +{ + u32 debug_area_size; + int rc, ss_count; + + ss_count = ena_get_sset_count(adapter->netdev, ETH_SS_STATS); + if (ss_count <= 0) { + netif_err(adapter, drv, adapter->netdev, + "SS count is negative\n"); + return; + } + + /* allocate 32 bytes for each string and 64bit for the value */ + debug_area_size = ss_count * ETH_GSTRING_LEN + sizeof(u64) * ss_count; + + rc = ena_com_allocate_debug_area(adapter->ena_dev, debug_area_size); + if (rc) { + pr_err("Cannot allocate debug area\n"); + return; + } + + rc = ena_com_set_host_attributes(adapter->ena_dev); + if (rc) { + if (rc == -EPERM) + netif_warn(adapter, drv, adapter->netdev, + "Cannot set host attributes\n"); + else + netif_err(adapter, drv, adapter->netdev, + "Cannot set host attributes\n"); + goto err; + } + + return; +err: + ena_com_delete_debug_area(adapter->ena_dev); +} + +static struct rtnl_link_stats64 *ena_get_stats64(struct net_device *netdev, + struct rtnl_link_stats64 *stats) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + struct ena_admin_basic_stats ena_stats; + int rc; + + if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) + return NULL; + + rc = ena_com_get_dev_basic_stats(adapter->ena_dev, &ena_stats); + if (rc) + return NULL; + + stats->tx_bytes = ((u64)ena_stats.tx_bytes_high << 32) | + ena_stats.tx_bytes_low; + stats->rx_bytes = ((u64)ena_stats.rx_bytes_high << 32) | + ena_stats.rx_bytes_low; + + stats->rx_packets = ((u64)ena_stats.rx_pkts_high << 32) | + ena_stats.rx_pkts_low; + stats->tx_packets = ((u64)ena_stats.tx_pkts_high << 32) | + ena_stats.tx_pkts_low; + + stats->rx_dropped = ((u64)ena_stats.rx_drops_high << 32) | + ena_stats.rx_drops_low; + + stats->multicast = 0; + stats->collisions = 0; + + stats->rx_length_errors = 0; + stats->rx_crc_errors = 0; + stats->rx_frame_errors = 0; + stats->rx_fifo_errors = 0; + stats->rx_missed_errors = 0; + stats->tx_window_errors = 0; + + stats->rx_errors = 0; + stats->tx_errors = 0; + + return stats; +} + +static const struct net_device_ops ena_netdev_ops = { + .ndo_open = ena_open, + .ndo_stop = ena_close, + .ndo_start_xmit = ena_start_xmit, + .ndo_select_queue = ena_select_queue, + .ndo_get_stats64 = ena_get_stats64, + .ndo_tx_timeout = ena_tx_timeout, + .ndo_change_mtu = ena_change_mtu, + .ndo_set_mac_address = NULL, + .ndo_validate_addr = eth_validate_addr, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = ena_netpoll, +#endif /* CONFIG_NET_POLL_CONTROLLER */ +}; + +static void ena_device_io_suspend(struct work_struct *work) +{ + struct ena_adapter *adapter = + container_of(work, struct ena_adapter, suspend_io_task); + struct net_device *netdev = adapter->netdev; + + /* ena_napi_disable_all disables only the IO handling. + * We are still subject to AENQ keep alive watchdog. + */ + u64_stats_update_begin(&adapter->syncp); + adapter->dev_stats.io_suspend++; + u64_stats_update_begin(&adapter->syncp); + ena_napi_disable_all(adapter); + netif_tx_lock(netdev); + netif_device_detach(netdev); + netif_tx_unlock(netdev); +} + +static void ena_device_io_resume(struct work_struct *work) +{ + struct ena_adapter *adapter = + container_of(work, struct ena_adapter, resume_io_task); + struct net_device *netdev = adapter->netdev; + + u64_stats_update_begin(&adapter->syncp); + adapter->dev_stats.io_resume++; + u64_stats_update_end(&adapter->syncp); + + netif_device_attach(netdev); + ena_napi_enable_all(adapter); +} + +static int ena_device_validate_params(struct ena_adapter *adapter, + struct ena_com_dev_get_features_ctx *get_feat_ctx) +{ + struct net_device *netdev = adapter->netdev; + int rc; + + rc = ether_addr_equal(get_feat_ctx->dev_attr.mac_addr, + adapter->mac_addr); + if (!rc) { + netif_err(adapter, drv, netdev, + "Error, mac address are different\n"); + return -EINVAL; + } + + if ((get_feat_ctx->max_queues.max_cq_num < adapter->num_queues) || + (get_feat_ctx->max_queues.max_sq_num < adapter->num_queues)) { + netif_err(adapter, drv, netdev, + "Error, device doesn't support enough queues\n"); + return -EINVAL; + } + + if (get_feat_ctx->dev_attr.max_mtu < netdev->mtu) { + netif_err(adapter, drv, netdev, + "Error, device max mtu is smaller than netdev MTU\n"); + return -EINVAL; + } + + return 0; +} + +static int ena_device_init(struct ena_com_dev *ena_dev, struct pci_dev *pdev, + struct ena_com_dev_get_features_ctx *get_feat_ctx, + bool *wd_state) +{ + struct device *dev = &pdev->dev; + bool readless_supported; + u32 aenq_groups; + int dma_width; + int rc; + + rc = ena_com_mmio_reg_read_request_init(ena_dev); + if (rc) { + dev_err(dev, "failed to init mmio read less\n"); + return rc; + } + + /* The PCIe configuration space revision id indicate if mmio reg + * read is disabled + */ + readless_supported = !(pdev->revision & ENA_MMIO_DISABLE_REG_READ); + ena_com_set_mmio_read_mode(ena_dev, readless_supported); + + rc = ena_com_dev_reset(ena_dev); + if (rc) { + dev_err(dev, "Can not reset device\n"); + goto err_mmio_read_less; + } + + rc = ena_com_validate_version(ena_dev); + if (rc) { + dev_err(dev, "device version is too low\n"); + goto err_mmio_read_less; + } + + dma_width = ena_com_get_dma_width(ena_dev); + if (dma_width < 0) { + dev_err(dev, "Invalid dma width value %d", dma_width); + goto err_mmio_read_less; + } + + rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(dma_width)); + if (rc) { + dev_err(dev, "pci_set_dma_mask failed 0x%x\n", rc); + goto err_mmio_read_less; + } + + rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(dma_width)); + if (rc) { + dev_err(dev, "err_pci_set_consistent_dma_mask failed 0x%x\n", + rc); + goto err_mmio_read_less; + } + + /* ENA admin level init */ + rc = ena_com_admin_init(ena_dev, &aenq_handlers, true); + if (rc) { + dev_err(dev, + "Can not initialize ena admin queue with device\n"); + goto err_mmio_read_less; + } + + /* To enable the msix interrupts the driver needs to know the number + * of queues. So the driver uses polling mode to retrieve this + * information + */ + ena_com_set_admin_polling_mode(ena_dev, true); + + /* Get Device Attributes*/ + rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx); + if (rc) { + dev_err(dev, "Cannot get attribute for ena device rc=%d\n", rc); + goto err_admin_init; + } + + /* Try to turn all the available aenq groups */ + aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) | + BIT(ENA_ADMIN_FATAL_ERROR) | + BIT(ENA_ADMIN_WARNING) | + BIT(ENA_ADMIN_NOTIFICATION) | + BIT(ENA_ADMIN_KEEP_ALIVE); + + aenq_groups &= get_feat_ctx->aenq.supported_groups; + + rc = ena_com_set_aenq_config(ena_dev, aenq_groups); + if (rc) { + dev_err(dev, "Cannot configure aenq groups rc= %d\n", rc); + goto err_admin_init; + } + + *wd_state = !!(aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE)); + + ena_config_host_info(ena_dev); + + return 0; + +err_admin_init: + ena_com_admin_destroy(ena_dev); +err_mmio_read_less: + ena_com_mmio_reg_read_request_destroy(ena_dev); + + return rc; +} + +static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter, + int io_vectors) +{ + struct ena_com_dev *ena_dev = adapter->ena_dev; + struct device *dev = &adapter->pdev->dev; + int rc; + + rc = ena_enable_msix(adapter, io_vectors); + if (rc) { + dev_err(dev, "Can not reserve msix vectors\n"); + return rc; + } + + ena_setup_mgmnt_intr(adapter); + + rc = ena_request_mgmnt_irq(adapter); + if (rc) { + dev_err(dev, "Can not setup management interrupts\n"); + goto err_disable_msix; + } + + ena_com_set_admin_polling_mode(ena_dev, false); + + ena_com_admin_aenq_enable(ena_dev); + + return 0; + +err_disable_msix: + ena_disable_msix(adapter); + + return rc; +} + +static void ena_fw_reset_device(struct work_struct *work) +{ + struct ena_com_dev_get_features_ctx get_feat_ctx; + struct ena_adapter *adapter = + container_of(work, struct ena_adapter, reset_task); + struct net_device *netdev = adapter->netdev; + struct ena_com_dev *ena_dev = adapter->ena_dev; + struct pci_dev *pdev = adapter->pdev; + bool dev_up, wd_state; + int rc; + + del_timer_sync(&adapter->timer_service); + + rtnl_lock(); + + dev_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); + ena_com_set_admin_running_state(ena_dev, false); + + /* After calling ena_close the tx queues and the napi + * are disabled so no one can interfere or touch the + * data structures + */ + ena_close(netdev); + + rc = ena_com_dev_reset(ena_dev); + if (rc) { + dev_err(&pdev->dev, "Device reset failed\n"); + goto err; + } + + ena_free_mgmnt_irq(adapter); + + ena_disable_msix(adapter); + + ena_com_abort_admin_commands(ena_dev); + + ena_com_wait_for_abort_completion(ena_dev); + + ena_com_admin_destroy(ena_dev); + + ena_com_mmio_reg_read_request_destroy(ena_dev); + + /* Finish with the destroy part. Start the init part */ + + rc = ena_device_init(ena_dev, adapter->pdev, &get_feat_ctx, &wd_state); + if (rc) { + dev_err(&pdev->dev, "Can not initialize device\n"); + goto err; + } + adapter->wd_state = wd_state; + + rc = ena_device_validate_params(adapter, &get_feat_ctx); + if (rc) { + dev_err(&pdev->dev, "Validation of device parameters failed\n"); + goto err_device_destroy; + } + + rc = ena_enable_msix_and_set_admin_interrupts(adapter, + adapter->num_queues); + if (rc) { + dev_err(&pdev->dev, "Enable MSI-X failed\n"); + goto err_device_destroy; + } + /* If the interface was up before the reset bring it up */ + if (dev_up) { + rc = ena_up(adapter); + if (rc) { + dev_err(&pdev->dev, "Failed to create I/O queues\n"); + goto err_disable_msix; + } + } + + mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ)); + + rtnl_unlock(); + + dev_err(&pdev->dev, "Device reset completed successfully\n"); + + return; +err_disable_msix: + ena_free_mgmnt_irq(adapter); + ena_disable_msix(adapter); +err_device_destroy: + ena_com_admin_destroy(ena_dev); +err: + rtnl_unlock(); + + dev_err(&pdev->dev, + "Reset attempt failed. Can not reset the device\n"); +} + +static void check_for_missing_tx_completions(struct ena_adapter *adapter) +{ + struct ena_tx_buffer *tx_buf; + unsigned long last_jiffies; + struct ena_ring *tx_ring; + int i, j, budget; + u32 missed_tx; + + /* Make sure the driver doesn't turn the device in other process */ + smp_rmb(); + + if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) + return; + + budget = ENA_MONITORED_TX_QUEUES; + + for (i = adapter->last_monitored_tx_qid; i < adapter->num_queues; i++) { + tx_ring = &adapter->tx_ring[i]; + + for (j = 0; j < tx_ring->ring_size; j++) { + tx_buf = &tx_ring->tx_buffer_info[j]; + last_jiffies = tx_buf->last_jiffies; + if (unlikely(last_jiffies && time_is_before_jiffies(last_jiffies + TX_TIMEOUT))) { + netif_notice(adapter, tx_err, adapter->netdev, + "Found a Tx that wasn't completed on time, qid %d, index %d.\n", + tx_ring->qid, j); + + u64_stats_update_begin(&tx_ring->syncp); + missed_tx = tx_ring->tx_stats.missing_tx_comp++; + u64_stats_update_end(&tx_ring->syncp); + + /* Clear last jiffies so the lost buffer won't + * be counted twice. + */ + tx_buf->last_jiffies = 0; + + if (unlikely(missed_tx > MAX_NUM_OF_TIMEOUTED_PACKETS)) { + netif_err(adapter, tx_err, adapter->netdev, + "The number of lost tx completion is above the threshold (%d > %d). Reset the device\n", + missed_tx, MAX_NUM_OF_TIMEOUTED_PACKETS); + set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); + } + } + } + + budget--; + if (!budget) + break; + } + + adapter->last_monitored_tx_qid = i % adapter->num_queues; +} + +/* Check for keep alive expiration */ +static void check_for_missing_keep_alive(struct ena_adapter *adapter) +{ + unsigned long keep_alive_expired; + + if (!adapter->wd_state) + return; + + keep_alive_expired = round_jiffies(adapter->last_keep_alive_jiffies + + ENA_DEVICE_KALIVE_TIMEOUT); + if (unlikely(time_is_before_jiffies(keep_alive_expired))) { + netif_err(adapter, drv, adapter->netdev, + "Keep alive watchdog timeout.\n"); + u64_stats_update_begin(&adapter->syncp); + adapter->dev_stats.wd_expired++; + u64_stats_update_end(&adapter->syncp); + set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); + } +} + +static void check_for_admin_com_state(struct ena_adapter *adapter) +{ + if (unlikely(!ena_com_get_admin_running_state(adapter->ena_dev))) { + netif_err(adapter, drv, adapter->netdev, + "ENA admin queue is not in running state!\n"); + u64_stats_update_begin(&adapter->syncp); + adapter->dev_stats.admin_q_pause++; + u64_stats_update_end(&adapter->syncp); + set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); + } +} + +static void ena_update_host_info(struct ena_admin_host_info *host_info, + struct net_device *netdev) +{ + host_info->supported_network_features[0] = + netdev->features & GENMASK_ULL(31, 0); + host_info->supported_network_features[1] = + (netdev->features & GENMASK_ULL(63, 32)) >> 32; +} + +static void ena_timer_service(unsigned long data) +{ + struct ena_adapter *adapter = (struct ena_adapter *)data; + u8 *debug_area = adapter->ena_dev->host_attr.debug_area_virt_addr; + struct ena_admin_host_info *host_info = + adapter->ena_dev->host_attr.host_info; + + check_for_missing_keep_alive(adapter); + + check_for_admin_com_state(adapter); + + check_for_missing_tx_completions(adapter); + + if (debug_area) + ena_dump_stats_to_buf(adapter, debug_area); + + if (host_info) + ena_update_host_info(host_info, adapter->netdev); + + if (unlikely(test_and_clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) { + netif_err(adapter, drv, adapter->netdev, + "Trigger reset is on\n"); + ena_dump_stats_to_dmesg(adapter); + queue_work(ena_wq, &adapter->reset_task); + return; + } + + /* Reset the timer */ + mod_timer(&adapter->timer_service, jiffies + HZ); +} + +static int ena_calc_io_queue_num(struct pci_dev *pdev, + struct ena_com_dev *ena_dev, + struct ena_com_dev_get_features_ctx *get_feat_ctx) +{ + int io_sq_num, io_queue_num; + + /* In case of LLQ use the llq number in the get feature cmd */ + if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { + io_sq_num = get_feat_ctx->max_queues.max_llq_num; + + if (io_sq_num == 0) { + dev_err(&pdev->dev, + "Trying to use LLQ but llq_num is 0. Fall back into regular queues\n"); + + ena_dev->tx_mem_queue_type = + ENA_ADMIN_PLACEMENT_POLICY_HOST; + io_sq_num = get_feat_ctx->max_queues.max_sq_num; + } + } else { + io_sq_num = get_feat_ctx->max_queues.max_sq_num; + } + + io_queue_num = min_t(int, num_possible_cpus(), ENA_MAX_NUM_IO_QUEUES); + io_queue_num = min_t(int, io_queue_num, io_sq_num); + io_queue_num = min_t(int, io_queue_num, + get_feat_ctx->max_queues.max_cq_num); + /* 1 IRQ for for mgmnt and 1 IRQs for each IO direction */ + io_queue_num = min_t(int, io_queue_num, pci_msix_vec_count(pdev) - 1); + if (unlikely(!io_queue_num)) { + dev_err(&pdev->dev, "The device doesn't have io queues\n"); + return -EFAULT; + } + + return io_queue_num; +} + +static int ena_set_push_mode(struct pci_dev *pdev, struct ena_com_dev *ena_dev, + struct ena_com_dev_get_features_ctx *get_feat_ctx) +{ + bool has_mem_bar; + + has_mem_bar = pci_select_bars(pdev, IORESOURCE_MEM) & BIT(ENA_MEM_BAR); + + /* Enable push mode if device supports LLQ */ + if (has_mem_bar && (get_feat_ctx->max_queues.max_llq_num > 0)) + ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_DEV; + else + ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; + + return 0; +} + +static void ena_set_dev_offloads(struct ena_com_dev_get_features_ctx *feat, + struct net_device *netdev) +{ + netdev_features_t dev_features = 0; + + /* Set offload features */ + if (feat->offload.tx & + ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK) + dev_features |= NETIF_F_IP_CSUM; + + if (feat->offload.tx & + ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK) + dev_features |= NETIF_F_IPV6_CSUM; + + if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK) + dev_features |= NETIF_F_TSO; + + if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK) + dev_features |= NETIF_F_TSO6; + + if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_MASK) + dev_features |= NETIF_F_TSO_ECN; + + if (feat->offload.rx_supported & + ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK) + dev_features |= NETIF_F_RXCSUM; + + if (feat->offload.rx_supported & + ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK) + dev_features |= NETIF_F_RXCSUM; + + netdev->features = + dev_features | + NETIF_F_SG | + NETIF_F_NTUPLE | + NETIF_F_RXHASH | + NETIF_F_HIGHDMA; + + netdev->hw_features |= netdev->features; + netdev->vlan_features |= netdev->features; +} + +static void ena_set_conf_feat_params(struct ena_adapter *adapter, + struct ena_com_dev_get_features_ctx *feat) +{ + struct net_device *netdev = adapter->netdev; + + /* Copy mac address */ + if (!is_valid_ether_addr(feat->dev_attr.mac_addr)) { + eth_hw_addr_random(netdev); + ether_addr_copy(adapter->mac_addr, netdev->dev_addr); + } else { + ether_addr_copy(adapter->mac_addr, feat->dev_attr.mac_addr); + ether_addr_copy(netdev->dev_addr, adapter->mac_addr); + } + + /* Set offload features */ + ena_set_dev_offloads(feat, netdev); + + adapter->max_mtu = feat->dev_attr.max_mtu; +} + +static int ena_rss_init_default(struct ena_adapter *adapter) +{ + struct ena_com_dev *ena_dev = adapter->ena_dev; + struct device *dev = &adapter->pdev->dev; + int rc, i; + u32 val; + + rc = ena_com_rss_init(ena_dev, ENA_RX_RSS_TABLE_LOG_SIZE); + if (unlikely(rc)) { + dev_err(dev, "Cannot init indirect table\n"); + goto err_rss_init; + } + + for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) { + val = ethtool_rxfh_indir_default(i, adapter->num_queues); + rc = ena_com_indirect_table_fill_entry(ena_dev, i, + ENA_IO_RXQ_IDX(val)); + if (unlikely(rc && (rc != -EPERM))) { + dev_err(dev, "Cannot fill indirect table\n"); + goto err_fill_indir; + } + } + + rc = ena_com_fill_hash_function(ena_dev, ENA_ADMIN_CRC32, NULL, + ENA_HASH_KEY_SIZE, 0xFFFFFFFF); + if (unlikely(rc && (rc != -EPERM))) { + dev_err(dev, "Cannot fill hash function\n"); + goto err_fill_indir; + } + + rc = ena_com_set_default_hash_ctrl(ena_dev); + if (unlikely(rc && (rc != -EPERM))) { + dev_err(dev, "Cannot fill hash control\n"); + goto err_fill_indir; + } + + return 0; + +err_fill_indir: + ena_com_rss_destroy(ena_dev); +err_rss_init: + + return rc; +} + +static void ena_release_bars(struct ena_com_dev *ena_dev, struct pci_dev *pdev) +{ + int release_bars; + + release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK; + pci_release_selected_regions(pdev, release_bars); +} + +static int ena_calc_queue_size(struct pci_dev *pdev, + struct ena_com_dev *ena_dev, + u16 *max_tx_sgl_size, + u16 *max_rx_sgl_size, + struct ena_com_dev_get_features_ctx *get_feat_ctx) +{ + u32 queue_size = ENA_DEFAULT_RING_SIZE; + + queue_size = min_t(u32, queue_size, + get_feat_ctx->max_queues.max_cq_depth); + queue_size = min_t(u32, queue_size, + get_feat_ctx->max_queues.max_sq_depth); + + if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) + queue_size = min_t(u32, queue_size, + get_feat_ctx->max_queues.max_llq_depth); + + queue_size = rounddown_pow_of_two(queue_size); + + if (unlikely(!queue_size)) { + dev_err(&pdev->dev, "Invalid queue size\n"); + return -EFAULT; + } + + *max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS, + get_feat_ctx->max_queues.max_packet_tx_descs); + *max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS, + get_feat_ctx->max_queues.max_packet_rx_descs); + + return queue_size; +} + +/* ena_probe - Device Initialization Routine + * @pdev: PCI device information struct + * @ent: entry in ena_pci_tbl + * + * Returns 0 on success, negative on failure + * + * ena_probe initializes an adapter identified by a pci_dev structure. + * The OS initialization, configuring of the adapter private structure, + * and a hardware reset occur. + */ +static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + struct ena_com_dev_get_features_ctx get_feat_ctx; + static int version_printed; + struct net_device *netdev; + struct ena_adapter *adapter; + struct ena_com_dev *ena_dev = NULL; + static int adapters_found; + int io_queue_num, bars, rc; + int queue_size; + u16 tx_sgl_size = 0; + u16 rx_sgl_size = 0; + bool wd_state; + + dev_dbg(&pdev->dev, "%s\n", __func__); + + if (version_printed++ == 0) + dev_info(&pdev->dev, "%s", version); + + rc = pci_enable_device_mem(pdev); + if (rc) { + dev_err(&pdev->dev, "pci_enable_device_mem() failed!\n"); + return rc; + } + + pci_set_master(pdev); + + ena_dev = vzalloc(sizeof(*ena_dev)); + if (!ena_dev) { + rc = -ENOMEM; + goto err_disable_device; + } + + bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK; + rc = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME); + if (rc) { + dev_err(&pdev->dev, "pci_request_selected_regions failed %d\n", + rc); + goto err_free_ena_dev; + } + + ena_dev->reg_bar = ioremap(pci_resource_start(pdev, ENA_REG_BAR), + pci_resource_len(pdev, ENA_REG_BAR)); + if (!ena_dev->reg_bar) { + dev_err(&pdev->dev, "failed to remap regs bar\n"); + rc = -EFAULT; + goto err_free_region; + } + + ena_dev->dmadev = &pdev->dev; + + rc = ena_device_init(ena_dev, pdev, &get_feat_ctx, &wd_state); + if (rc) { + dev_err(&pdev->dev, "ena device init failed\n"); + if (rc == -ETIME) + rc = -EPROBE_DEFER; + goto err_free_region; + } + + rc = ena_set_push_mode(pdev, ena_dev, &get_feat_ctx); + if (rc) { + dev_err(&pdev->dev, "Invalid module param(push_mode)\n"); + goto err_device_destroy; + } + + if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { + ena_dev->mem_bar = ioremap_wc(pci_resource_start(pdev, ENA_MEM_BAR), + pci_resource_len(pdev, ENA_MEM_BAR)); + if (!ena_dev->mem_bar) { + rc = -EFAULT; + goto err_device_destroy; + } + } + + /* initial Tx interrupt delay, Assumes 1 usec granularity. + * Updated during device initialization with the real granularity + */ + ena_dev->intr_moder_tx_interval = ENA_INTR_INITIAL_TX_INTERVAL_USECS; + io_queue_num = ena_calc_io_queue_num(pdev, ena_dev, &get_feat_ctx); + queue_size = ena_calc_queue_size(pdev, ena_dev, &tx_sgl_size, + &rx_sgl_size, &get_feat_ctx); + if ((queue_size <= 0) || (io_queue_num <= 0)) { + rc = -EFAULT; + goto err_device_destroy; + } + + dev_info(&pdev->dev, "creating %d io queues. queue size: %d\n", + io_queue_num, queue_size); + + /* dev zeroed in init_etherdev */ + netdev = alloc_etherdev_mq(sizeof(struct ena_adapter), io_queue_num); + if (!netdev) { + dev_err(&pdev->dev, "alloc_etherdev_mq failed\n"); + rc = -ENOMEM; + goto err_device_destroy; + } + + SET_NETDEV_DEV(netdev, &pdev->dev); + + adapter = netdev_priv(netdev); + pci_set_drvdata(pdev, adapter); + + adapter->ena_dev = ena_dev; + adapter->netdev = netdev; + adapter->pdev = pdev; + + ena_set_conf_feat_params(adapter, &get_feat_ctx); + + adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE); + + adapter->tx_ring_size = queue_size; + adapter->rx_ring_size = queue_size; + + adapter->max_tx_sgl_size = tx_sgl_size; + adapter->max_rx_sgl_size = rx_sgl_size; + + adapter->num_queues = io_queue_num; + adapter->last_monitored_tx_qid = 0; + + adapter->rx_copybreak = ENA_DEFAULT_RX_COPYBREAK; + adapter->wd_state = wd_state; + + snprintf(adapter->name, ENA_NAME_MAX_LEN, "ena_%d", adapters_found); + + rc = ena_com_init_interrupt_moderation(adapter->ena_dev); + if (rc) { + dev_err(&pdev->dev, + "Failed to query interrupt moderation feature\n"); + goto err_netdev_destroy; + } + ena_init_io_rings(adapter); + + netdev->netdev_ops = &ena_netdev_ops; + netdev->watchdog_timeo = TX_TIMEOUT; + ena_set_ethtool_ops(netdev); + + netdev->priv_flags |= IFF_UNICAST_FLT; + + u64_stats_init(&adapter->syncp); + + rc = ena_enable_msix_and_set_admin_interrupts(adapter, io_queue_num); + if (rc) { + dev_err(&pdev->dev, + "Failed to enable and set the admin interrupts\n"); + goto err_worker_destroy; + } + rc = ena_rss_init_default(adapter); + if (rc && (rc != -EPERM)) { + dev_err(&pdev->dev, "Cannot init RSS rc: %d\n", rc); + goto err_free_msix; + } + + ena_config_debug_area(adapter); + + memcpy(adapter->netdev->perm_addr, adapter->mac_addr, netdev->addr_len); + + netif_carrier_off(netdev); + + rc = register_netdev(netdev); + if (rc) { + dev_err(&pdev->dev, "Cannot register net device\n"); + goto err_rss; + } + + INIT_WORK(&adapter->suspend_io_task, ena_device_io_suspend); + INIT_WORK(&adapter->resume_io_task, ena_device_io_resume); + INIT_WORK(&adapter->reset_task, ena_fw_reset_device); + + adapter->last_keep_alive_jiffies = jiffies; + + init_timer(&adapter->timer_service); + adapter->timer_service.expires = round_jiffies(jiffies + HZ); + adapter->timer_service.function = ena_timer_service; + adapter->timer_service.data = (unsigned long)adapter; + + add_timer(&adapter->timer_service); + + dev_info(&pdev->dev, "%s found at mem %lx, mac addr %pM Queues %d\n", + DEVICE_NAME, (long)pci_resource_start(pdev, 0), + netdev->dev_addr, io_queue_num); + + set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags); + + adapters_found++; + + return 0; + +err_rss: + ena_com_delete_debug_area(ena_dev); + ena_com_rss_destroy(ena_dev); +err_free_msix: + ena_com_dev_reset(ena_dev); + ena_free_mgmnt_irq(adapter); + ena_disable_msix(adapter); +err_worker_destroy: + ena_com_destroy_interrupt_moderation(ena_dev); + del_timer(&adapter->timer_service); + cancel_work_sync(&adapter->suspend_io_task); + cancel_work_sync(&adapter->resume_io_task); +err_netdev_destroy: + free_netdev(netdev); +err_device_destroy: + ena_com_delete_host_info(ena_dev); + ena_com_admin_destroy(ena_dev); +err_free_region: + ena_release_bars(ena_dev, pdev); +err_free_ena_dev: + pci_set_drvdata(pdev, NULL); + vfree(ena_dev); +err_disable_device: + pci_disable_device(pdev); + return rc; +} + +/*****************************************************************************/ +static int ena_sriov_configure(struct pci_dev *dev, int numvfs) +{ + int rc; + + if (numvfs > 0) { + rc = pci_enable_sriov(dev, numvfs); + if (rc != 0) { + dev_err(&dev->dev, + "pci_enable_sriov failed to enable: %d vfs with the error: %d\n", + numvfs, rc); + return rc; + } + + return numvfs; + } + + if (numvfs == 0) { + pci_disable_sriov(dev); + return 0; + } + + return -EINVAL; +} + +/*****************************************************************************/ +/*****************************************************************************/ + +/* ena_remove - Device Removal Routine + * @pdev: PCI device information struct + * + * ena_remove is called by the PCI subsystem to alert the driver + * that it should release a PCI device. + */ +static void ena_remove(struct pci_dev *pdev) +{ + struct ena_adapter *adapter = pci_get_drvdata(pdev); + struct ena_com_dev *ena_dev; + struct net_device *netdev; + + if (!adapter) + /* This device didn't load properly and it's resources + * already released, nothing to do + */ + return; + + ena_dev = adapter->ena_dev; + netdev = adapter->netdev; + +#ifdef CONFIG_RFS_ACCEL + if ((adapter->msix_vecs >= 1) && (netdev->rx_cpu_rmap)) { + free_irq_cpu_rmap(netdev->rx_cpu_rmap); + netdev->rx_cpu_rmap = NULL; + } +#endif /* CONFIG_RFS_ACCEL */ + + unregister_netdev(netdev); + del_timer_sync(&adapter->timer_service); + + cancel_work_sync(&adapter->reset_task); + + cancel_work_sync(&adapter->suspend_io_task); + + cancel_work_sync(&adapter->resume_io_task); + + ena_com_dev_reset(ena_dev); + + ena_free_mgmnt_irq(adapter); + + ena_disable_msix(adapter); + + free_netdev(netdev); + + ena_com_mmio_reg_read_request_destroy(ena_dev); + + ena_com_abort_admin_commands(ena_dev); + + ena_com_wait_for_abort_completion(ena_dev); + + ena_com_admin_destroy(ena_dev); + + ena_com_rss_destroy(ena_dev); + + ena_com_delete_debug_area(ena_dev); + + ena_com_delete_host_info(ena_dev); + + ena_release_bars(ena_dev, pdev); + + pci_set_drvdata(pdev, NULL); + + pci_disable_device(pdev); + + ena_com_destroy_interrupt_moderation(ena_dev); + + vfree(ena_dev); +} + +static struct pci_driver ena_pci_driver = { + .name = DRV_MODULE_NAME, + .id_table = ena_pci_tbl, + .probe = ena_probe, + .remove = ena_remove, + .sriov_configure = ena_sriov_configure, +}; + +static int __init ena_init(void) +{ + pr_info("%s", version); + + ena_wq = create_singlethread_workqueue(DRV_MODULE_NAME); + if (!ena_wq) { + pr_err("Failed to create workqueue\n"); + return -ENOMEM; + } + + return pci_register_driver(&ena_pci_driver); +} + +static void __exit ena_cleanup(void) +{ + pci_unregister_driver(&ena_pci_driver); + + if (ena_wq) { + destroy_workqueue(ena_wq); + ena_wq = NULL; + } +} + +/****************************************************************************** + ******************************** AENQ Handlers ******************************* + *****************************************************************************/ +/* ena_update_on_link_change: + * Notify the network interface about the change in link status + */ +static void ena_update_on_link_change(void *adapter_data, + struct ena_admin_aenq_entry *aenq_e) +{ + struct ena_adapter *adapter = (struct ena_adapter *)adapter_data; + struct ena_admin_aenq_link_change_desc *aenq_desc = + (struct ena_admin_aenq_link_change_desc *)aenq_e; + int status = aenq_desc->flags & + ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK; + + if (status) { + netdev_dbg(adapter->netdev, "%s\n", __func__); + set_bit(ENA_FLAG_LINK_UP, &adapter->flags); + netif_carrier_on(adapter->netdev); + } else { + clear_bit(ENA_FLAG_LINK_UP, &adapter->flags); + netif_carrier_off(adapter->netdev); + } +} + +static void ena_keep_alive_wd(void *adapter_data, + struct ena_admin_aenq_entry *aenq_e) +{ + struct ena_adapter *adapter = (struct ena_adapter *)adapter_data; + + adapter->last_keep_alive_jiffies = jiffies; +} + +static void ena_notification(void *adapter_data, + struct ena_admin_aenq_entry *aenq_e) +{ + struct ena_adapter *adapter = (struct ena_adapter *)adapter_data; + + WARN(aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION, + "Invalid group(%x) expected %x\n", + aenq_e->aenq_common_desc.group, + ENA_ADMIN_NOTIFICATION); + + switch (aenq_e->aenq_common_desc.syndrom) { + case ENA_ADMIN_SUSPEND: + /* Suspend just the IO queues. + * We deliberately don't suspend admin so the timer and + * the keep_alive events should remain. + */ + queue_work(ena_wq, &adapter->suspend_io_task); + break; + case ENA_ADMIN_RESUME: + queue_work(ena_wq, &adapter->resume_io_task); + break; + default: + netif_err(adapter, drv, adapter->netdev, + "Invalid aenq notification link state %d\n", + aenq_e->aenq_common_desc.syndrom); + } +} + +/* This handler will called for unknown event group or unimplemented handlers*/ +static void unimplemented_aenq_handler(void *data, + struct ena_admin_aenq_entry *aenq_e) +{ + struct ena_adapter *adapter = (struct ena_adapter *)data; + + netif_err(adapter, drv, adapter->netdev, + "Unknown event was received or event with unimplemented handler\n"); +} + +static struct ena_aenq_handlers aenq_handlers = { + .handlers = { + [ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change, + [ENA_ADMIN_NOTIFICATION] = ena_notification, + [ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive_wd, + }, + .unimplemented_handler = unimplemented_aenq_handler +}; + +module_init(ena_init); +module_exit(ena_cleanup); diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h new file mode 100644 index 000000000000..69d7e9ed5bc8 --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h @@ -0,0 +1,324 @@ +/* + * Copyright 2015 Amazon.com, Inc. or its affiliates. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ENA_H +#define ENA_H + +#include +#include +#include +#include +#include +#include + +#include "ena_com.h" +#include "ena_eth_com.h" + +#define DRV_MODULE_VER_MAJOR 1 +#define DRV_MODULE_VER_MINOR 0 +#define DRV_MODULE_VER_SUBMINOR 2 + +#define DRV_MODULE_NAME "ena" +#ifndef DRV_MODULE_VERSION +#define DRV_MODULE_VERSION \ + __stringify(DRV_MODULE_VER_MAJOR) "." \ + __stringify(DRV_MODULE_VER_MINOR) "." \ + __stringify(DRV_MODULE_VER_SUBMINOR) +#endif + +#define DEVICE_NAME "Elastic Network Adapter (ENA)" + +/* 1 for AENQ + ADMIN */ +#define ENA_MAX_MSIX_VEC(io_queues) (1 + (io_queues)) + +#define ENA_REG_BAR 0 +#define ENA_MEM_BAR 2 +#define ENA_BAR_MASK (BIT(ENA_REG_BAR) | BIT(ENA_MEM_BAR)) + +#define ENA_DEFAULT_RING_SIZE (1024) + +#define ENA_TX_WAKEUP_THRESH (MAX_SKB_FRAGS + 2) +#define ENA_DEFAULT_RX_COPYBREAK (128 - NET_IP_ALIGN) + +/* limit the buffer size to 600 bytes to handle MTU changes from very + * small to very large, in which case the number of buffers per packet + * could exceed ENA_PKT_MAX_BUFS + */ +#define ENA_DEFAULT_MIN_RX_BUFF_ALLOC_SIZE 600 + +#define ENA_MIN_MTU 128 + +#define ENA_NAME_MAX_LEN 20 +#define ENA_IRQNAME_SIZE 40 + +#define ENA_PKT_MAX_BUFS 19 + +#define ENA_RX_RSS_TABLE_LOG_SIZE 7 +#define ENA_RX_RSS_TABLE_SIZE (1 << ENA_RX_RSS_TABLE_LOG_SIZE) + +#define ENA_HASH_KEY_SIZE 40 + +/* The number of tx packet completions that will be handled each NAPI poll + * cycle is ring_size / ENA_TX_POLL_BUDGET_DIVIDER. + */ +#define ENA_TX_POLL_BUDGET_DIVIDER 4 + +/* Refill Rx queue when number of available descriptors is below + * QUEUE_SIZE / ENA_RX_REFILL_THRESH_DIVIDER + */ +#define ENA_RX_REFILL_THRESH_DIVIDER 8 + +/* Number of queues to check for missing queues per timer service */ +#define ENA_MONITORED_TX_QUEUES 4 +/* Max timeout packets before device reset */ +#define MAX_NUM_OF_TIMEOUTED_PACKETS 32 + +#define ENA_TX_RING_IDX_NEXT(idx, ring_size) (((idx) + 1) & ((ring_size) - 1)) + +#define ENA_RX_RING_IDX_NEXT(idx, ring_size) (((idx) + 1) & ((ring_size) - 1)) +#define ENA_RX_RING_IDX_ADD(idx, n, ring_size) \ + (((idx) + (n)) & ((ring_size) - 1)) + +#define ENA_IO_TXQ_IDX(q) (2 * (q)) +#define ENA_IO_RXQ_IDX(q) (2 * (q) + 1) + +#define ENA_MGMNT_IRQ_IDX 0 +#define ENA_IO_IRQ_FIRST_IDX 1 +#define ENA_IO_IRQ_IDX(q) (ENA_IO_IRQ_FIRST_IDX + (q)) + +/* ENA device should send keep alive msg every 1 sec. + * We wait for 3 sec just to be on the safe side. + */ +#define ENA_DEVICE_KALIVE_TIMEOUT (3 * HZ) + +#define ENA_MMIO_DISABLE_REG_READ BIT(0) + +struct ena_irq { + irq_handler_t handler; + void *data; + int cpu; + u32 vector; + cpumask_t affinity_hint_mask; + char name[ENA_IRQNAME_SIZE]; +}; + +struct ena_napi { + struct napi_struct napi ____cacheline_aligned; + struct ena_ring *tx_ring; + struct ena_ring *rx_ring; + u32 qid; +}; + +struct ena_tx_buffer { + struct sk_buff *skb; + /* num of ena desc for this specific skb + * (includes data desc and metadata desc) + */ + u32 tx_descs; + /* num of buffers used by this skb */ + u32 num_of_bufs; + /* Save the last jiffies to detect missing tx packets */ + unsigned long last_jiffies; + struct ena_com_buf bufs[ENA_PKT_MAX_BUFS]; +} ____cacheline_aligned; + +struct ena_rx_buffer { + struct sk_buff *skb; + struct page *page; + u32 page_offset; + struct ena_com_buf ena_buf; +} ____cacheline_aligned; + +struct ena_stats_tx { + u64 cnt; + u64 bytes; + u64 queue_stop; + u64 prepare_ctx_err; + u64 queue_wakeup; + u64 dma_mapping_err; + u64 linearize; + u64 linearize_failed; + u64 napi_comp; + u64 tx_poll; + u64 doorbells; + u64 missing_tx_comp; + u64 bad_req_id; +}; + +struct ena_stats_rx { + u64 cnt; + u64 bytes; + u64 refil_partial; + u64 bad_csum; + u64 page_alloc_fail; + u64 skb_alloc_fail; + u64 dma_mapping_err; + u64 bad_desc_num; + u64 rx_copybreak_pkt; +}; + +struct ena_ring { + /* Holds the empty requests for TX out of order completions */ + u16 *free_tx_ids; + union { + struct ena_tx_buffer *tx_buffer_info; + struct ena_rx_buffer *rx_buffer_info; + }; + + /* cache ptr to avoid using the adapter */ + struct device *dev; + struct pci_dev *pdev; + struct napi_struct *napi; + struct net_device *netdev; + struct ena_com_dev *ena_dev; + struct ena_adapter *adapter; + struct ena_com_io_cq *ena_com_io_cq; + struct ena_com_io_sq *ena_com_io_sq; + + u16 next_to_use; + u16 next_to_clean; + u16 rx_copybreak; + u16 qid; + u16 mtu; + u16 sgl_size; + + /* The maximum header length the device can handle */ + u8 tx_max_header_size; + + /* cpu for TPH */ + int cpu; + /* number of tx/rx_buffer_info's entries */ + int ring_size; + + enum ena_admin_placement_policy_type tx_mem_queue_type; + + struct ena_com_rx_buf_info ena_bufs[ENA_PKT_MAX_BUFS]; + u32 smoothed_interval; + u32 per_napi_packets; + u32 per_napi_bytes; + enum ena_intr_moder_level moder_tbl_idx; + struct u64_stats_sync syncp; + union { + struct ena_stats_tx tx_stats; + struct ena_stats_rx rx_stats; + }; +} ____cacheline_aligned; + +struct ena_stats_dev { + u64 tx_timeout; + u64 io_suspend; + u64 io_resume; + u64 wd_expired; + u64 interface_up; + u64 interface_down; + u64 admin_q_pause; +}; + +enum ena_flags_t { + ENA_FLAG_DEVICE_RUNNING, + ENA_FLAG_DEV_UP, + ENA_FLAG_LINK_UP, + ENA_FLAG_MSIX_ENABLED, + ENA_FLAG_TRIGGER_RESET +}; + +/* adapter specific private data structure */ +struct ena_adapter { + struct ena_com_dev *ena_dev; + /* OS defined structs */ + struct net_device *netdev; + struct pci_dev *pdev; + + /* rx packets that shorter that this len will be copied to the skb + * header + */ + u32 rx_copybreak; + u32 max_mtu; + + int num_queues; + + struct msix_entry *msix_entries; + int msix_vecs; + + u32 tx_usecs, rx_usecs; /* interrupt moderation */ + u32 tx_frames, rx_frames; /* interrupt moderation */ + + u32 tx_ring_size; + u32 rx_ring_size; + + u32 msg_enable; + + u16 max_tx_sgl_size; + u16 max_rx_sgl_size; + + u8 mac_addr[ETH_ALEN]; + + char name[ENA_NAME_MAX_LEN]; + + unsigned long flags; + /* TX */ + struct ena_ring tx_ring[ENA_MAX_NUM_IO_QUEUES] + ____cacheline_aligned_in_smp; + + /* RX */ + struct ena_ring rx_ring[ENA_MAX_NUM_IO_QUEUES] + ____cacheline_aligned_in_smp; + + struct ena_napi ena_napi[ENA_MAX_NUM_IO_QUEUES]; + + struct ena_irq irq_tbl[ENA_MAX_MSIX_VEC(ENA_MAX_NUM_IO_QUEUES)]; + + /* timer service */ + struct work_struct reset_task; + struct work_struct suspend_io_task; + struct work_struct resume_io_task; + struct timer_list timer_service; + + bool wd_state; + unsigned long last_keep_alive_jiffies; + + struct u64_stats_sync syncp; + struct ena_stats_dev dev_stats; + + /* last queue index that was checked for uncompleted tx packets */ + u32 last_monitored_tx_qid; +}; + +void ena_set_ethtool_ops(struct net_device *netdev); + +void ena_dump_stats_to_dmesg(struct ena_adapter *adapter); + +void ena_dump_stats_to_buf(struct ena_adapter *adapter, u8 *buf); + +int ena_get_sset_count(struct net_device *netdev, int sset); + +#endif /* !(ENA_H) */ diff --git a/drivers/net/ethernet/amazon/ena/ena_pci_id_tbl.h b/drivers/net/ethernet/amazon/ena/ena_pci_id_tbl.h new file mode 100644 index 000000000000..f80d2a47fa94 --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_pci_id_tbl.h @@ -0,0 +1,67 @@ +/* + * Copyright 2015 Amazon.com, Inc. or its affiliates. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ENA_PCI_ID_TBL_H_ +#define ENA_PCI_ID_TBL_H_ + +#ifndef PCI_VENDOR_ID_AMAZON +#define PCI_VENDOR_ID_AMAZON 0x1d0f +#endif + +#ifndef PCI_DEV_ID_ENA_PF +#define PCI_DEV_ID_ENA_PF 0x0ec2 +#endif + +#ifndef PCI_DEV_ID_ENA_LLQ_PF +#define PCI_DEV_ID_ENA_LLQ_PF 0x1ec2 +#endif + +#ifndef PCI_DEV_ID_ENA_VF +#define PCI_DEV_ID_ENA_VF 0xec20 +#endif + +#ifndef PCI_DEV_ID_ENA_LLQ_VF +#define PCI_DEV_ID_ENA_LLQ_VF 0xec21 +#endif + +#define ENA_PCI_ID_TABLE_ENTRY(devid) \ + {PCI_DEVICE(PCI_VENDOR_ID_AMAZON, devid)}, + +static const struct pci_device_id ena_pci_tbl[] = { + ENA_PCI_ID_TABLE_ENTRY(PCI_DEV_ID_ENA_PF) + ENA_PCI_ID_TABLE_ENTRY(PCI_DEV_ID_ENA_LLQ_PF) + ENA_PCI_ID_TABLE_ENTRY(PCI_DEV_ID_ENA_VF) + ENA_PCI_ID_TABLE_ENTRY(PCI_DEV_ID_ENA_LLQ_VF) + { } +}; + +#endif /* ENA_PCI_ID_TBL_H_ */ diff --git a/drivers/net/ethernet/amazon/ena/ena_regs_defs.h b/drivers/net/ethernet/amazon/ena/ena_regs_defs.h new file mode 100644 index 000000000000..26097a2b6030 --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_regs_defs.h @@ -0,0 +1,133 @@ +/* + * Copyright 2015 - 2016 Amazon.com, Inc. or its affiliates. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _ENA_REGS_H_ +#define _ENA_REGS_H_ + +/* ena_registers offsets */ +#define ENA_REGS_VERSION_OFF 0x0 +#define ENA_REGS_CONTROLLER_VERSION_OFF 0x4 +#define ENA_REGS_CAPS_OFF 0x8 +#define ENA_REGS_CAPS_EXT_OFF 0xc +#define ENA_REGS_AQ_BASE_LO_OFF 0x10 +#define ENA_REGS_AQ_BASE_HI_OFF 0x14 +#define ENA_REGS_AQ_CAPS_OFF 0x18 +#define ENA_REGS_ACQ_BASE_LO_OFF 0x20 +#define ENA_REGS_ACQ_BASE_HI_OFF 0x24 +#define ENA_REGS_ACQ_CAPS_OFF 0x28 +#define ENA_REGS_AQ_DB_OFF 0x2c +#define ENA_REGS_ACQ_TAIL_OFF 0x30 +#define ENA_REGS_AENQ_CAPS_OFF 0x34 +#define ENA_REGS_AENQ_BASE_LO_OFF 0x38 +#define ENA_REGS_AENQ_BASE_HI_OFF 0x3c +#define ENA_REGS_AENQ_HEAD_DB_OFF 0x40 +#define ENA_REGS_AENQ_TAIL_OFF 0x44 +#define ENA_REGS_INTR_MASK_OFF 0x4c +#define ENA_REGS_DEV_CTL_OFF 0x54 +#define ENA_REGS_DEV_STS_OFF 0x58 +#define ENA_REGS_MMIO_REG_READ_OFF 0x5c +#define ENA_REGS_MMIO_RESP_LO_OFF 0x60 +#define ENA_REGS_MMIO_RESP_HI_OFF 0x64 +#define ENA_REGS_RSS_IND_ENTRY_UPDATE_OFF 0x68 + +/* version register */ +#define ENA_REGS_VERSION_MINOR_VERSION_MASK 0xff +#define ENA_REGS_VERSION_MAJOR_VERSION_SHIFT 8 +#define ENA_REGS_VERSION_MAJOR_VERSION_MASK 0xff00 + +/* controller_version register */ +#define ENA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK 0xff +#define ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT 8 +#define ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK 0xff00 +#define ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT 16 +#define ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK 0xff0000 +#define ENA_REGS_CONTROLLER_VERSION_IMPL_ID_SHIFT 24 +#define ENA_REGS_CONTROLLER_VERSION_IMPL_ID_MASK 0xff000000 + +/* caps register */ +#define ENA_REGS_CAPS_CONTIGUOUS_QUEUE_REQUIRED_MASK 0x1 +#define ENA_REGS_CAPS_RESET_TIMEOUT_SHIFT 1 +#define ENA_REGS_CAPS_RESET_TIMEOUT_MASK 0x3e +#define ENA_REGS_CAPS_DMA_ADDR_WIDTH_SHIFT 8 +#define ENA_REGS_CAPS_DMA_ADDR_WIDTH_MASK 0xff00 + +/* aq_caps register */ +#define ENA_REGS_AQ_CAPS_AQ_DEPTH_MASK 0xffff +#define ENA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_SHIFT 16 +#define ENA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_MASK 0xffff0000 + +/* acq_caps register */ +#define ENA_REGS_ACQ_CAPS_ACQ_DEPTH_MASK 0xffff +#define ENA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_SHIFT 16 +#define ENA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_MASK 0xffff0000 + +/* aenq_caps register */ +#define ENA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK 0xffff +#define ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT 16 +#define ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK 0xffff0000 + +/* dev_ctl register */ +#define ENA_REGS_DEV_CTL_DEV_RESET_MASK 0x1 +#define ENA_REGS_DEV_CTL_AQ_RESTART_SHIFT 1 +#define ENA_REGS_DEV_CTL_AQ_RESTART_MASK 0x2 +#define ENA_REGS_DEV_CTL_QUIESCENT_SHIFT 2 +#define ENA_REGS_DEV_CTL_QUIESCENT_MASK 0x4 +#define ENA_REGS_DEV_CTL_IO_RESUME_SHIFT 3 +#define ENA_REGS_DEV_CTL_IO_RESUME_MASK 0x8 + +/* dev_sts register */ +#define ENA_REGS_DEV_STS_READY_MASK 0x1 +#define ENA_REGS_DEV_STS_AQ_RESTART_IN_PROGRESS_SHIFT 1 +#define ENA_REGS_DEV_STS_AQ_RESTART_IN_PROGRESS_MASK 0x2 +#define ENA_REGS_DEV_STS_AQ_RESTART_FINISHED_SHIFT 2 +#define ENA_REGS_DEV_STS_AQ_RESTART_FINISHED_MASK 0x4 +#define ENA_REGS_DEV_STS_RESET_IN_PROGRESS_SHIFT 3 +#define ENA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK 0x8 +#define ENA_REGS_DEV_STS_RESET_FINISHED_SHIFT 4 +#define ENA_REGS_DEV_STS_RESET_FINISHED_MASK 0x10 +#define ENA_REGS_DEV_STS_FATAL_ERROR_SHIFT 5 +#define ENA_REGS_DEV_STS_FATAL_ERROR_MASK 0x20 +#define ENA_REGS_DEV_STS_QUIESCENT_STATE_IN_PROGRESS_SHIFT 6 +#define ENA_REGS_DEV_STS_QUIESCENT_STATE_IN_PROGRESS_MASK 0x40 +#define ENA_REGS_DEV_STS_QUIESCENT_STATE_ACHIEVED_SHIFT 7 +#define ENA_REGS_DEV_STS_QUIESCENT_STATE_ACHIEVED_MASK 0x80 + +/* mmio_reg_read register */ +#define ENA_REGS_MMIO_REG_READ_REQ_ID_MASK 0xffff +#define ENA_REGS_MMIO_REG_READ_REG_OFF_SHIFT 16 +#define ENA_REGS_MMIO_REG_READ_REG_OFF_MASK 0xffff0000 + +/* rss_ind_entry_update register */ +#define ENA_REGS_RSS_IND_ENTRY_UPDATE_INDEX_MASK 0xffff +#define ENA_REGS_RSS_IND_ENTRY_UPDATE_CQ_IDX_SHIFT 16 +#define ENA_REGS_RSS_IND_ENTRY_UPDATE_CQ_IDX_MASK 0xffff0000 + +#endif /*_ENA_REGS_H_ */ From e3e9652a43207561eaec6085a8272fe11b283286 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Thu, 11 Aug 2016 17:51:00 +0800 Subject: [PATCH 0143/1715] net: ethernet: mediatek: enhance the locking using the lightweight ones Since these critical sections protected by page_lock are all entered from the user context or bottom half context, they can be replaced with the spin_lock() or spin_lock_bh instead of spin_lock_irqsave(). Signed-off-by: Sean Wang Acked-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index b57ae3afb994..3a4726e2a297 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -353,18 +353,17 @@ static int mtk_set_mac_address(struct net_device *dev, void *p) int ret = eth_mac_addr(dev, p); struct mtk_mac *mac = netdev_priv(dev); const char *macaddr = dev->dev_addr; - unsigned long flags; if (ret) return ret; - spin_lock_irqsave(&mac->hw->page_lock, flags); + spin_lock_bh(&mac->hw->page_lock); mtk_w32(mac->hw, (macaddr[0] << 8) | macaddr[1], MTK_GDMA_MAC_ADRH(mac->id)); mtk_w32(mac->hw, (macaddr[2] << 24) | (macaddr[3] << 16) | (macaddr[4] << 8) | macaddr[5], MTK_GDMA_MAC_ADRL(mac->id)); - spin_unlock_irqrestore(&mac->hw->page_lock, flags); + spin_unlock_bh(&mac->hw->page_lock); return 0; } @@ -748,7 +747,6 @@ static int mtk_start_xmit(struct sk_buff *skb, struct net_device *dev) struct mtk_eth *eth = mac->hw; struct mtk_tx_ring *ring = ð->tx_ring; struct net_device_stats *stats = &dev->stats; - unsigned long flags; bool gso = false; int tx_num; @@ -756,14 +754,14 @@ static int mtk_start_xmit(struct sk_buff *skb, struct net_device *dev) * however we have 2 queues running on the same ring so we need to lock * the ring access */ - spin_lock_irqsave(ð->page_lock, flags); + spin_lock(ð->page_lock); tx_num = mtk_cal_txd_req(skb); if (unlikely(atomic_read(&ring->free_count) <= tx_num)) { mtk_stop_queue(eth); netif_err(eth, tx_queued, dev, "Tx Ring full when queue awake!\n"); - spin_unlock_irqrestore(ð->page_lock, flags); + spin_unlock(ð->page_lock); return NETDEV_TX_BUSY; } @@ -788,12 +786,12 @@ static int mtk_start_xmit(struct sk_buff *skb, struct net_device *dev) if (unlikely(atomic_read(&ring->free_count) <= ring->thresh)) mtk_stop_queue(eth); - spin_unlock_irqrestore(ð->page_lock, flags); + spin_unlock(ð->page_lock); return NETDEV_TX_OK; drop: - spin_unlock_irqrestore(ð->page_lock, flags); + spin_unlock(ð->page_lock); stats->tx_dropped++; dev_kfree_skb(skb); return NETDEV_TX_OK; @@ -1347,16 +1345,15 @@ static int mtk_open(struct net_device *dev) static void mtk_stop_dma(struct mtk_eth *eth, u32 glo_cfg) { - unsigned long flags; u32 val; int i; /* stop the dma engine */ - spin_lock_irqsave(ð->page_lock, flags); + spin_lock_bh(ð->page_lock); val = mtk_r32(eth, glo_cfg); mtk_w32(eth, val & ~(MTK_TX_WB_DDONE | MTK_RX_DMA_EN | MTK_TX_DMA_EN), glo_cfg); - spin_unlock_irqrestore(ð->page_lock, flags); + spin_unlock_bh(ð->page_lock); /* wait for dma stop */ for (i = 0; i < 10; i++) { From d29e33d6d087ef66a70f94791504579366cf3d09 Mon Sep 17 00:00:00 2001 From: Bert Kenward Date: Thu, 11 Aug 2016 13:01:01 +0100 Subject: [PATCH 0144/1715] sfc: update MCDI protocol headers Signed-off-by: Bert Kenward Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/mcdi_pcol.h | 530 ++++++++++++++++++++++++++- 1 file changed, 515 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/sfc/mcdi_pcol.h b/drivers/net/ethernet/sfc/mcdi_pcol.h index c9a5b003caaf..ccceafc15896 100644 --- a/drivers/net/ethernet/sfc/mcdi_pcol.h +++ b/drivers/net/ethernet/sfc/mcdi_pcol.h @@ -2645,16 +2645,20 @@ #define MC_CMD_POLL_BIST_MEM_BUS_MC 0x0 /* enum: CSR IREG bus. */ #define MC_CMD_POLL_BIST_MEM_BUS_CSR 0x1 -/* enum: RX DPCPU bus. */ +/* enum: RX0 DPCPU bus. */ #define MC_CMD_POLL_BIST_MEM_BUS_DPCPU_RX 0x2 /* enum: TX0 DPCPU bus. */ #define MC_CMD_POLL_BIST_MEM_BUS_DPCPU_TX0 0x3 /* enum: TX1 DPCPU bus. */ #define MC_CMD_POLL_BIST_MEM_BUS_DPCPU_TX1 0x4 -/* enum: RX DICPU bus. */ +/* enum: RX0 DICPU bus. */ #define MC_CMD_POLL_BIST_MEM_BUS_DICPU_RX 0x5 /* enum: TX DICPU bus. */ #define MC_CMD_POLL_BIST_MEM_BUS_DICPU_TX 0x6 +/* enum: RX1 DPCPU bus. */ +#define MC_CMD_POLL_BIST_MEM_BUS_DPCPU_RX1 0x7 +/* enum: RX1 DICPU bus. */ +#define MC_CMD_POLL_BIST_MEM_BUS_DICPU_RX1 0x8 /* Pattern written to RAM / register */ #define MC_CMD_POLL_BIST_OUT_MEM_EXPECT_OFST 16 /* Actual value read from RAM / register */ @@ -3612,6 +3616,8 @@ #define MC_CMD_NVRAM_INFO_OUT_PROTECTED_WIDTH 1 #define MC_CMD_NVRAM_INFO_OUT_TLV_LBN 1 #define MC_CMD_NVRAM_INFO_OUT_TLV_WIDTH 1 +#define MC_CMD_NVRAM_INFO_OUT_CMAC_LBN 6 +#define MC_CMD_NVRAM_INFO_OUT_CMAC_WIDTH 1 #define MC_CMD_NVRAM_INFO_OUT_A_B_LBN 7 #define MC_CMD_NVRAM_INFO_OUT_A_B_WIDTH 1 #define MC_CMD_NVRAM_INFO_OUT_PHYSDEV_OFST 16 @@ -4389,6 +4395,8 @@ * the command will fail with MC_CMD_ERR_FILTERS_PRESENT. */ #define MC_CMD_WORKAROUND_BUG26807 0x6 +/* enum: Bug 61265 work around (broken EVQ TMR writes). */ +#define MC_CMD_WORKAROUND_BUG61265 0x7 /* 0 = disable the workaround indicated by TYPE; any non-zero value = enable * the workaround */ @@ -4413,7 +4421,6 @@ * (GET_PHY_CFG_OUT_MEDIA_TYPE); the valid 'page number' input values, and the * output data, are interpreted on a per-type basis. For SFP+: PAGE=0 or 1 * returns a 128-byte block read from module I2C address 0xA0 offset 0 or 0x80. - * Anything else: currently undefined. Locks required: None. Return code: 0. */ #define MC_CMD_GET_PHY_MEDIA_INFO 0x4b @@ -5479,6 +5486,8 @@ #define LICENSED_V3_FEATURES_TX_SNIFF_WIDTH 1 #define LICENSED_V3_FEATURES_PROXY_FILTER_OPS_LBN 8 #define LICENSED_V3_FEATURES_PROXY_FILTER_OPS_WIDTH 1 +#define LICENSED_V3_FEATURES_EVENT_CUT_THROUGH_LBN 9 +#define LICENSED_V3_FEATURES_EVENT_CUT_THROUGH_WIDTH 1 #define LICENSED_V3_FEATURES_MASK_LBN 0 #define LICENSED_V3_FEATURES_MASK_WIDTH 64 @@ -5634,6 +5643,109 @@ /* Only valid if INTRFLAG was true */ #define MC_CMD_INIT_EVQ_OUT_IRQ_OFST 0 +/* MC_CMD_INIT_EVQ_V2_IN msgrequest */ +#define MC_CMD_INIT_EVQ_V2_IN_LENMIN 44 +#define MC_CMD_INIT_EVQ_V2_IN_LENMAX 548 +#define MC_CMD_INIT_EVQ_V2_IN_LEN(num) (36+8*(num)) +/* Size, in entries */ +#define MC_CMD_INIT_EVQ_V2_IN_SIZE_OFST 0 +/* Desired instance. Must be set to a specific instance, which is a function + * local queue index. + */ +#define MC_CMD_INIT_EVQ_V2_IN_INSTANCE_OFST 4 +/* The initial timer value. The load value is ignored if the timer mode is DIS. + */ +#define MC_CMD_INIT_EVQ_V2_IN_TMR_LOAD_OFST 8 +/* The reload value is ignored in one-shot modes */ +#define MC_CMD_INIT_EVQ_V2_IN_TMR_RELOAD_OFST 12 +/* tbd */ +#define MC_CMD_INIT_EVQ_V2_IN_FLAGS_OFST 16 +#define MC_CMD_INIT_EVQ_V2_IN_FLAG_INTERRUPTING_LBN 0 +#define MC_CMD_INIT_EVQ_V2_IN_FLAG_INTERRUPTING_WIDTH 1 +#define MC_CMD_INIT_EVQ_V2_IN_FLAG_RPTR_DOS_LBN 1 +#define MC_CMD_INIT_EVQ_V2_IN_FLAG_RPTR_DOS_WIDTH 1 +#define MC_CMD_INIT_EVQ_V2_IN_FLAG_INT_ARMD_LBN 2 +#define MC_CMD_INIT_EVQ_V2_IN_FLAG_INT_ARMD_WIDTH 1 +#define MC_CMD_INIT_EVQ_V2_IN_FLAG_CUT_THRU_LBN 3 +#define MC_CMD_INIT_EVQ_V2_IN_FLAG_CUT_THRU_WIDTH 1 +#define MC_CMD_INIT_EVQ_V2_IN_FLAG_RX_MERGE_LBN 4 +#define MC_CMD_INIT_EVQ_V2_IN_FLAG_RX_MERGE_WIDTH 1 +#define MC_CMD_INIT_EVQ_V2_IN_FLAG_TX_MERGE_LBN 5 +#define MC_CMD_INIT_EVQ_V2_IN_FLAG_TX_MERGE_WIDTH 1 +#define MC_CMD_INIT_EVQ_V2_IN_FLAG_USE_TIMER_LBN 6 +#define MC_CMD_INIT_EVQ_V2_IN_FLAG_USE_TIMER_WIDTH 1 +#define MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_LBN 7 +#define MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_WIDTH 4 +/* enum: All initialisation flags specified by host. */ +#define MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_MANUAL 0x0 +/* enum: MEDFORD only. Certain initialisation flags specified by host may be + * over-ridden by firmware based on licenses and firmware variant in order to + * provide the lowest latency achievable. See + * MC_CMD_INIT_EVQ_V2/MC_CMD_INIT_EVQ_V2_OUT/FLAGS for list of affected flags. + */ +#define MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_LOW_LATENCY 0x1 +/* enum: MEDFORD only. Certain initialisation flags specified by host may be + * over-ridden by firmware based on licenses and firmware variant in order to + * provide the best throughput achievable. See + * MC_CMD_INIT_EVQ_V2/MC_CMD_INIT_EVQ_V2_OUT/FLAGS for list of affected flags. + */ +#define MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_THROUGHPUT 0x2 +/* enum: MEDFORD only. Certain initialisation flags may be over-ridden by + * firmware based on licenses and firmware variant. See + * MC_CMD_INIT_EVQ_V2/MC_CMD_INIT_EVQ_V2_OUT/FLAGS for list of affected flags. + */ +#define MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_AUTO 0x3 +#define MC_CMD_INIT_EVQ_V2_IN_TMR_MODE_OFST 20 +/* enum: Disabled */ +#define MC_CMD_INIT_EVQ_V2_IN_TMR_MODE_DIS 0x0 +/* enum: Immediate */ +#define MC_CMD_INIT_EVQ_V2_IN_TMR_IMMED_START 0x1 +/* enum: Triggered */ +#define MC_CMD_INIT_EVQ_V2_IN_TMR_TRIG_START 0x2 +/* enum: Hold-off */ +#define MC_CMD_INIT_EVQ_V2_IN_TMR_INT_HLDOFF 0x3 +/* Target EVQ for wakeups if in wakeup mode. */ +#define MC_CMD_INIT_EVQ_V2_IN_TARGET_EVQ_OFST 24 +/* Target interrupt if in interrupting mode (note union with target EVQ). Use + * MC_CMD_RESOURCE_INSTANCE_ANY unless a specific one required for test + * purposes. + */ +#define MC_CMD_INIT_EVQ_V2_IN_IRQ_NUM_OFST 24 +/* Event Counter Mode. */ +#define MC_CMD_INIT_EVQ_V2_IN_COUNT_MODE_OFST 28 +/* enum: Disabled */ +#define MC_CMD_INIT_EVQ_V2_IN_COUNT_MODE_DIS 0x0 +/* enum: Disabled */ +#define MC_CMD_INIT_EVQ_V2_IN_COUNT_MODE_RX 0x1 +/* enum: Disabled */ +#define MC_CMD_INIT_EVQ_V2_IN_COUNT_MODE_TX 0x2 +/* enum: Disabled */ +#define MC_CMD_INIT_EVQ_V2_IN_COUNT_MODE_RXTX 0x3 +/* Event queue packet count threshold. */ +#define MC_CMD_INIT_EVQ_V2_IN_COUNT_THRSHLD_OFST 32 +/* 64-bit address of 4k of 4k-aligned host memory buffer */ +#define MC_CMD_INIT_EVQ_V2_IN_DMA_ADDR_OFST 36 +#define MC_CMD_INIT_EVQ_V2_IN_DMA_ADDR_LEN 8 +#define MC_CMD_INIT_EVQ_V2_IN_DMA_ADDR_LO_OFST 36 +#define MC_CMD_INIT_EVQ_V2_IN_DMA_ADDR_HI_OFST 40 +#define MC_CMD_INIT_EVQ_V2_IN_DMA_ADDR_MINNUM 1 +#define MC_CMD_INIT_EVQ_V2_IN_DMA_ADDR_MAXNUM 64 + +/* MC_CMD_INIT_EVQ_V2_OUT msgresponse */ +#define MC_CMD_INIT_EVQ_V2_OUT_LEN 8 +/* Only valid if INTRFLAG was true */ +#define MC_CMD_INIT_EVQ_V2_OUT_IRQ_OFST 0 +/* Actual configuration applied on the card */ +#define MC_CMD_INIT_EVQ_V2_OUT_FLAGS_OFST 4 +#define MC_CMD_INIT_EVQ_V2_OUT_FLAG_CUT_THRU_LBN 0 +#define MC_CMD_INIT_EVQ_V2_OUT_FLAG_CUT_THRU_WIDTH 1 +#define MC_CMD_INIT_EVQ_V2_OUT_FLAG_RX_MERGE_LBN 1 +#define MC_CMD_INIT_EVQ_V2_OUT_FLAG_RX_MERGE_WIDTH 1 +#define MC_CMD_INIT_EVQ_V2_OUT_FLAG_TX_MERGE_LBN 2 +#define MC_CMD_INIT_EVQ_V2_OUT_FLAG_TX_MERGE_WIDTH 1 +#define MC_CMD_INIT_EVQ_V2_OUT_FLAG_RXQ_FORCE_EV_MERGING_LBN 3 +#define MC_CMD_INIT_EVQ_V2_OUT_FLAG_RXQ_FORCE_EV_MERGING_WIDTH 1 + /* QUEUE_CRC_MODE structuredef */ #define QUEUE_CRC_MODE_LEN 1 #define QUEUE_CRC_MODE_MODE_LBN 0 @@ -5697,8 +5809,8 @@ #define MC_CMD_INIT_RXQ_IN_FLAG_PREFIX_WIDTH 1 #define MC_CMD_INIT_RXQ_IN_FLAG_DISABLE_SCATTER_LBN 9 #define MC_CMD_INIT_RXQ_IN_FLAG_DISABLE_SCATTER_WIDTH 1 -#define MC_CMD_INIT_RXQ_IN_FLAG_FORCE_EV_MERGING_LBN 10 -#define MC_CMD_INIT_RXQ_IN_FLAG_FORCE_EV_MERGING_WIDTH 1 +#define MC_CMD_INIT_RXQ_IN_UNUSED_LBN 10 +#define MC_CMD_INIT_RXQ_IN_UNUSED_WIDTH 1 /* Owner ID to use if in buffer mode (zero if physical) */ #define MC_CMD_INIT_RXQ_IN_OWNER_ID_OFST 20 /* The port ID associated with the v-adaptor which should contain this DMAQ. */ @@ -7854,6 +7966,20 @@ #define MC_CMD_GET_CAPABILITIES_V2_OUT_EVENT_CUT_THROUGH_WIDTH 1 #define MC_CMD_GET_CAPABILITIES_V2_OUT_RX_CUT_THROUGH_LBN 4 #define MC_CMD_GET_CAPABILITIES_V2_OUT_RX_CUT_THROUGH_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V2_OUT_TX_VFIFO_ULL_MODE_LBN 5 +#define MC_CMD_GET_CAPABILITIES_V2_OUT_TX_VFIFO_ULL_MODE_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V2_OUT_MAC_STATS_40G_TX_SIZE_BINS_LBN 6 +#define MC_CMD_GET_CAPABILITIES_V2_OUT_MAC_STATS_40G_TX_SIZE_BINS_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V2_OUT_INIT_EVQ_V2_LBN 7 +#define MC_CMD_GET_CAPABILITIES_V2_OUT_INIT_EVQ_V2_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V2_OUT_TX_MAC_TIMESTAMPING_LBN 8 +#define MC_CMD_GET_CAPABILITIES_V2_OUT_TX_MAC_TIMESTAMPING_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V2_OUT_TX_TIMESTAMP_LBN 9 +#define MC_CMD_GET_CAPABILITIES_V2_OUT_TX_TIMESTAMP_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V2_OUT_RX_SNIFF_LBN 10 +#define MC_CMD_GET_CAPABILITIES_V2_OUT_RX_SNIFF_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V2_OUT_TX_SNIFF_LBN 11 +#define MC_CMD_GET_CAPABILITIES_V2_OUT_TX_SNIFF_WIDTH 1 /* Number of FATSOv2 contexts per datapath supported by this NIC. Not present * on older firmware (check the length). */ @@ -7910,6 +8036,288 @@ #define MC_CMD_GET_CAPABILITIES_V2_OUT_SIZE_PIO_BUFF_OFST 70 #define MC_CMD_GET_CAPABILITIES_V2_OUT_SIZE_PIO_BUFF_LEN 2 +/* MC_CMD_GET_CAPABILITIES_V3_OUT msgresponse */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_LEN 73 +/* First word of flags. */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_FLAGS1_OFST 0 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_VPORT_RECONFIGURE_LBN 3 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_VPORT_RECONFIGURE_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_STRIPING_LBN 4 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_STRIPING_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_VADAPTOR_QUERY_LBN 5 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_VADAPTOR_QUERY_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_EVB_PORT_VLAN_RESTRICT_LBN 6 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_EVB_PORT_VLAN_RESTRICT_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_DRV_ATTACH_PREBOOT_LBN 7 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_DRV_ATTACH_PREBOOT_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_FORCE_EVENT_MERGING_LBN 8 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_FORCE_EVENT_MERGING_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_SET_MAC_ENHANCED_LBN 9 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_SET_MAC_ENHANCED_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_UNKNOWN_UCAST_DST_FILTER_ALWAYS_MULTI_RECIPIENT_LBN 10 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_UNKNOWN_UCAST_DST_FILTER_ALWAYS_MULTI_RECIPIENT_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_VADAPTOR_PERMIT_SET_MAC_WHEN_FILTERS_INSTALLED_LBN 11 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_VADAPTOR_PERMIT_SET_MAC_WHEN_FILTERS_INSTALLED_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_MAC_SECURITY_FILTERING_LBN 12 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_MAC_SECURITY_FILTERING_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_ADDITIONAL_RSS_MODES_LBN 13 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_ADDITIONAL_RSS_MODES_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_QBB_LBN 14 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_QBB_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_PACKED_STREAM_VAR_BUFFERS_LBN 15 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_PACKED_STREAM_VAR_BUFFERS_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_RSS_LIMITED_LBN 16 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_RSS_LIMITED_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_PACKED_STREAM_LBN 17 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_PACKED_STREAM_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_INCLUDE_FCS_LBN 18 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_INCLUDE_FCS_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_VLAN_INSERTION_LBN 19 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_VLAN_INSERTION_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_VLAN_STRIPPING_LBN 20 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_VLAN_STRIPPING_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TSO_LBN 21 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TSO_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_PREFIX_LEN_0_LBN 22 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_PREFIX_LEN_0_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_PREFIX_LEN_14_LBN 23 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_PREFIX_LEN_14_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_TIMESTAMP_LBN 24 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_TIMESTAMP_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_BATCHING_LBN 25 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_BATCHING_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_MCAST_FILTER_CHAINING_LBN 26 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_MCAST_FILTER_CHAINING_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_PM_AND_RXDP_COUNTERS_LBN 27 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_PM_AND_RXDP_COUNTERS_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_DISABLE_SCATTER_LBN 28 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_DISABLE_SCATTER_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_MCAST_UDP_LOOPBACK_LBN 29 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_MCAST_UDP_LOOPBACK_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_EVB_LBN 30 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_EVB_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_VXLAN_NVGRE_LBN 31 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_VXLAN_NVGRE_WIDTH 1 +/* RxDPCPU firmware id. */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_DPCPU_FW_ID_OFST 4 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_DPCPU_FW_ID_LEN 2 +/* enum: Standard RXDP firmware */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP 0x0 +/* enum: Low latency RXDP firmware */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_LOW_LATENCY 0x1 +/* enum: Packed stream RXDP firmware */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_PACKED_STREAM 0x2 +/* enum: BIST RXDP firmware */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_BIST 0x10a +/* enum: RXDP Test firmware image 1 */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_TO_MC_CUT_THROUGH 0x101 +/* enum: RXDP Test firmware image 2 */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD 0x102 +/* enum: RXDP Test firmware image 3 */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD_FIRST 0x103 +/* enum: RXDP Test firmware image 4 */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_EVERY_EVENT_BATCHABLE 0x104 +/* enum: RXDP Test firmware image 5 */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_BACKPRESSURE 0x105 +/* enum: RXDP Test firmware image 6 */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_PACKET_EDITS 0x106 +/* enum: RXDP Test firmware image 7 */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_RX_HDR_SPLIT 0x107 +/* enum: RXDP Test firmware image 8 */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_DISABLE_DL 0x108 +/* enum: RXDP Test firmware image 9 */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_DOORBELL_DELAY 0x10b +/* TxDPCPU firmware id. */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_DPCPU_FW_ID_OFST 6 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_DPCPU_FW_ID_LEN 2 +/* enum: Standard TXDP firmware */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP 0x0 +/* enum: Low latency TXDP firmware */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_LOW_LATENCY 0x1 +/* enum: High packet rate TXDP firmware */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_HIGH_PACKET_RATE 0x3 +/* enum: BIST TXDP firmware */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_BIST 0x12d +/* enum: TXDP Test firmware image 1 */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_TEST_FW_TSO_EDIT 0x101 +/* enum: TXDP Test firmware image 2 */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_TEST_FW_PACKET_EDITS 0x102 +/* enum: TXDP CSR bus test firmware */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_TEST_FW_CSR 0x103 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_VERSION_OFST 8 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_VERSION_LEN 2 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_VERSION_REV_LBN 0 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_VERSION_REV_WIDTH 12 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_VERSION_TYPE_LBN 12 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_VERSION_TYPE_WIDTH 4 +/* enum: reserved value - do not use (may indicate alternative interpretation + * of REV field in future) + */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_RESERVED 0x0 +/* enum: Trivial RX PD firmware for early Huntington development (Huntington + * development only) + */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_FIRST_PKT 0x1 +/* enum: RX PD firmware with approximately Siena-compatible behaviour + * (Huntington development only) + */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_SIENA_COMPAT 0x2 +/* enum: Virtual switching (full feature) RX PD production firmware */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_VSWITCH 0x3 +/* enum: siena_compat variant RX PD firmware using PM rather than MAC + * (Huntington development only) + */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_SIENA_COMPAT_PM 0x4 +/* enum: Low latency RX PD production firmware */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_LOW_LATENCY 0x5 +/* enum: Packed stream RX PD production firmware */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_PACKED_STREAM 0x6 +/* enum: RX PD firmware handling layer 2 only for high packet rate performance + * tests (Medford development only) + */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_LAYER2_PERF 0x7 +/* enum: Rules engine RX PD production firmware */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_RULES_ENGINE 0x8 +/* enum: RX PD firmware for GUE parsing prototype (Medford development only) */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE 0xe +/* enum: RX PD firmware parsing but not filtering network overlay tunnel + * encapsulations (Medford development only) + */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_TESTFW_ENCAP_PARSING_ONLY 0xf +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_VERSION_OFST 10 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_VERSION_LEN 2 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_VERSION_REV_LBN 0 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_VERSION_REV_WIDTH 12 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_VERSION_TYPE_LBN 12 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_VERSION_TYPE_WIDTH 4 +/* enum: reserved value - do not use (may indicate alternative interpretation + * of REV field in future) + */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_RESERVED 0x0 +/* enum: Trivial TX PD firmware for early Huntington development (Huntington + * development only) + */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_FIRST_PKT 0x1 +/* enum: TX PD firmware with approximately Siena-compatible behaviour + * (Huntington development only) + */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_SIENA_COMPAT 0x2 +/* enum: Virtual switching (full feature) TX PD production firmware */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_VSWITCH 0x3 +/* enum: siena_compat variant TX PD firmware using PM rather than MAC + * (Huntington development only) + */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_SIENA_COMPAT_PM 0x4 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_LOW_LATENCY 0x5 /* enum */ +/* enum: TX PD firmware handling layer 2 only for high packet rate performance + * tests (Medford development only) + */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_LAYER2_PERF 0x7 +/* enum: Rules engine TX PD production firmware */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_RULES_ENGINE 0x8 +/* enum: RX PD firmware for GUE parsing prototype (Medford development only) */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE 0xe +/* Hardware capabilities of NIC */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_HW_CAPABILITIES_OFST 12 +/* Licensed capabilities */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_LICENSE_CAPABILITIES_OFST 16 +/* Second word of flags. Not present on older firmware (check the length). */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_FLAGS2_OFST 20 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TSO_V2_LBN 0 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TSO_V2_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TSO_V2_ENCAP_LBN 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TSO_V2_ENCAP_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_EVQ_TIMER_CTRL_LBN 2 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_EVQ_TIMER_CTRL_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_EVENT_CUT_THROUGH_LBN 3 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_EVENT_CUT_THROUGH_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_CUT_THROUGH_LBN 4 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_CUT_THROUGH_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_VFIFO_ULL_MODE_LBN 5 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_VFIFO_ULL_MODE_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_MAC_STATS_40G_TX_SIZE_BINS_LBN 6 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_MAC_STATS_40G_TX_SIZE_BINS_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_INIT_EVQ_V2_LBN 7 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_INIT_EVQ_V2_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_MAC_TIMESTAMPING_LBN 8 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_MAC_TIMESTAMPING_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TIMESTAMP_LBN 9 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TIMESTAMP_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_SNIFF_LBN 10 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_SNIFF_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_SNIFF_LBN 11 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_SNIFF_WIDTH 1 +/* Number of FATSOv2 contexts per datapath supported by this NIC. Not present + * on older firmware (check the length). + */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TSO_V2_N_CONTEXTS_OFST 24 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TSO_V2_N_CONTEXTS_LEN 2 +/* One byte per PF containing the number of the external port assigned to this + * PF, indexed by PF number. Special values indicate that a PF is either not + * present or not assigned. + */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_PFS_TO_PORTS_ASSIGNMENT_OFST 26 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_PFS_TO_PORTS_ASSIGNMENT_LEN 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_PFS_TO_PORTS_ASSIGNMENT_NUM 16 +/* enum: The caller is not permitted to access information on this PF. */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_ACCESS_NOT_PERMITTED 0xff +/* enum: PF does not exist. */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_PF_NOT_PRESENT 0xfe +/* enum: PF does exist but is not assigned to any external port. */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_PF_NOT_ASSIGNED 0xfd +/* enum: This value indicates that PF is assigned, but it cannot be expressed + * in this field. It is intended for a possible future situation where a more + * complex scheme of PFs to ports mapping is being used. The future driver + * should look for a new field supporting the new scheme. The current/old + * driver should treat this value as PF_NOT_ASSIGNED. + */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_INCOMPATIBLE_ASSIGNMENT 0xfc +/* One byte per PF containing the number of its VFs, indexed by PF number. A + * special value indicates that a PF is not present. + */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_VFS_PER_PF_OFST 42 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_VFS_PER_PF_LEN 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_VFS_PER_PF_NUM 16 +/* enum: The caller is not permitted to access information on this PF. */ +/* MC_CMD_GET_CAPABILITIES_V3_OUT_ACCESS_NOT_PERMITTED 0xff */ +/* enum: PF does not exist. */ +/* MC_CMD_GET_CAPABILITIES_V3_OUT_PF_NOT_PRESENT 0xfe */ +/* Number of VIs available for each external port */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_VIS_PER_PORT_OFST 58 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_VIS_PER_PORT_LEN 2 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_VIS_PER_PORT_NUM 4 +/* Size of RX descriptor cache expressed as binary logarithm The actual size + * equals (2 ^ RX_DESC_CACHE_SIZE) + */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_DESC_CACHE_SIZE_OFST 66 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_DESC_CACHE_SIZE_LEN 1 +/* Size of TX descriptor cache expressed as binary logarithm The actual size + * equals (2 ^ TX_DESC_CACHE_SIZE) + */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_DESC_CACHE_SIZE_OFST 67 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_DESC_CACHE_SIZE_LEN 1 +/* Total number of available PIO buffers */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_PIO_BUFFS_OFST 68 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_PIO_BUFFS_LEN 2 +/* Size of a single PIO buffer */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_SIZE_PIO_BUFF_OFST 70 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_SIZE_PIO_BUFF_LEN 2 +/* On chips later than Medford the amount of address space assigned to each VI + * is configurable. This is a global setting that the driver must query to + * discover the VI to address mapping. Cut-through PIO (CTPIO) in not available + * with 8k VI windows. + */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_OFST 72 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_LEN 1 +/* enum: Each VI occupies 8k as on Huntington and Medford. PIO is at offset 4k. + * CTPIO is not mapped. + */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_8K 0x0 +/* enum: Each VI occupies 16k. PIO is at offset 4k. CTPIO is at offset 12k. */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_16K 0x1 +/* enum: Each VI occupies 64k. PIO is at offset 4k. CTPIO is at offset 12k. */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_64K 0x2 + /***********************************/ /* MC_CMD_V2_EXTN @@ -9026,7 +9434,7 @@ */ #define MC_CMD_GET_RXDP_CONFIG 0xc2 -#define MC_CMD_0xc2_PRIVILEGE_CTG SRIOV_CTG_ADMIN +#define MC_CMD_0xc2_PRIVILEGE_CTG SRIOV_CTG_GENERAL /* MC_CMD_GET_RXDP_CONFIG_IN msgrequest */ #define MC_CMD_GET_RXDP_CONFIG_IN_LEN 0 @@ -10125,7 +10533,9 @@ * that this operation returns a zero-length response */ #define MC_CMD_LICENSING_V3_IN_OP_UPDATE_LICENSE 0x0 -/* enum: report counts of installed licenses */ +/* enum: report counts of installed licenses Returns EAGAIN if license + * processing (updating) has been started but not yet completed. + */ #define MC_CMD_LICENSING_V3_IN_OP_REPORT_LICENSE 0x1 /* MC_CMD_LICENSING_V3_OUT msgresponse */ @@ -10763,6 +11173,8 @@ #define MC_CMD_GET_WORKAROUNDS_OUT_BUG42008 0x20 /* enum: Bug 26807 features present in firmware (multicast filter chaining) */ #define MC_CMD_GET_WORKAROUNDS_OUT_BUG26807 0x40 +/* enum: Bug 61265 work around (broken EVQ TMR writes). */ +#define MC_CMD_GET_WORKAROUNDS_OUT_BUG61265 0x80 /***********************************/ @@ -11280,22 +11692,110 @@ #define MC_CMD_0x118_PRIVILEGE_CTG SRIOV_CTG_ADMIN /* MC_CMD_RX_BALANCING_IN msgrequest */ -#define MC_CMD_RX_BALANCING_IN_LEN 4 +#define MC_CMD_RX_BALANCING_IN_LEN 16 /* The RX port whose upconverter table will be modified */ #define MC_CMD_RX_BALANCING_IN_PORT_OFST 0 -#define MC_CMD_RX_BALANCING_IN_PORT_LEN 1 /* The VLAN priority associated to the table index and vFIFO */ -#define MC_CMD_RX_BALANCING_IN_PRIORITY_OFST 1 -#define MC_CMD_RX_BALANCING_IN_PRIORITY_LEN 1 +#define MC_CMD_RX_BALANCING_IN_PRIORITY_OFST 4 /* The resulting bit of SRC^DST for indexing the table */ -#define MC_CMD_RX_BALANCING_IN_SRC_DST_OFST 2 -#define MC_CMD_RX_BALANCING_IN_SRC_DST_LEN 1 +#define MC_CMD_RX_BALANCING_IN_SRC_DST_OFST 8 /* The RX engine to which the vFIFO in the table entry will point to */ -#define MC_CMD_RX_BALANCING_IN_ENG_OFST 3 -#define MC_CMD_RX_BALANCING_IN_ENG_LEN 1 +#define MC_CMD_RX_BALANCING_IN_ENG_OFST 12 /* MC_CMD_RX_BALANCING_OUT msgresponse */ #define MC_CMD_RX_BALANCING_OUT_LEN 0 +/***********************************/ +/* MC_CMD_SET_EVQ_TMR + * Update the timer load, timer reload and timer mode values for a given EVQ. + * The requested timer values (in TMR_LOAD_REQ_NS and TMR_RELOAD_REQ_NS) will + * be rounded up to the granularity supported by the hardware, then truncated + * to the range supported by the hardware. The resulting value after the + * rounding and truncation will be returned to the caller (in TMR_LOAD_ACT_NS + * and TMR_RELOAD_ACT_NS). + */ +#define MC_CMD_SET_EVQ_TMR 0x120 + +#define MC_CMD_0x120_PRIVILEGE_CTG SRIOV_CTG_GENERAL + +/* MC_CMD_SET_EVQ_TMR_IN msgrequest */ +#define MC_CMD_SET_EVQ_TMR_IN_LEN 16 +/* Function-relative queue instance */ +#define MC_CMD_SET_EVQ_TMR_IN_INSTANCE_OFST 0 +/* Requested value for timer load (in nanoseconds) */ +#define MC_CMD_SET_EVQ_TMR_IN_TMR_LOAD_REQ_NS_OFST 4 +/* Requested value for timer reload (in nanoseconds) */ +#define MC_CMD_SET_EVQ_TMR_IN_TMR_RELOAD_REQ_NS_OFST 8 +/* Timer mode. Meanings as per EVQ_TMR_REG.TC_TIMER_VAL */ +#define MC_CMD_SET_EVQ_TMR_IN_TMR_MODE_OFST 12 +#define MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_DIS 0x0 /* enum */ +#define MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_IMMED_START 0x1 /* enum */ +#define MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_TRIG_START 0x2 /* enum */ +#define MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_INT_HLDOFF 0x3 /* enum */ + +/* MC_CMD_SET_EVQ_TMR_OUT msgresponse */ +#define MC_CMD_SET_EVQ_TMR_OUT_LEN 8 +/* Actual value for timer load (in nanoseconds) */ +#define MC_CMD_SET_EVQ_TMR_OUT_TMR_LOAD_ACT_NS_OFST 0 +/* Actual value for timer reload (in nanoseconds) */ +#define MC_CMD_SET_EVQ_TMR_OUT_TMR_RELOAD_ACT_NS_OFST 4 + + +/***********************************/ +/* MC_CMD_GET_EVQ_TMR_PROPERTIES + * Query properties about the event queue timers. + */ +#define MC_CMD_GET_EVQ_TMR_PROPERTIES 0x122 + +#define MC_CMD_0x122_PRIVILEGE_CTG SRIOV_CTG_GENERAL + +/* MC_CMD_GET_EVQ_TMR_PROPERTIES_IN msgrequest */ +#define MC_CMD_GET_EVQ_TMR_PROPERTIES_IN_LEN 0 + +/* MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT msgresponse */ +#define MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_LEN 36 +/* Reserved for future use. */ +#define MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_FLAGS_OFST 0 +/* For timers updated via writes to EVQ_TMR_REG, this is the time interval (in + * nanoseconds) for each increment of the timer load/reload count. The + * requested duration of a timer is this value multiplied by the timer + * load/reload count. + */ +#define MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_TMR_REG_NS_PER_COUNT_OFST 4 +/* For timers updated via writes to EVQ_TMR_REG, this is the maximum value + * allowed for timer load/reload counts. + */ +#define MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_TMR_REG_MAX_COUNT_OFST 8 +/* For timers updated via writes to EVQ_TMR_REG, timer load/reload counts not a + * multiple of this step size will be rounded in an implementation defined + * manner. + */ +#define MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_TMR_REG_STEP_OFST 12 +/* Maximum timer duration (in nanoseconds) for timers updated via MCDI. Only + * meaningful if MC_CMD_SET_EVQ_TMR is implemented. + */ +#define MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_MCDI_TMR_MAX_NS_OFST 16 +/* Timer durations requested via MCDI that are not a multiple of this step size + * will be rounded up. Only meaningful if MC_CMD_SET_EVQ_TMR is implemented. + */ +#define MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_MCDI_TMR_STEP_NS_OFST 20 +/* For timers updated using the bug35388 workaround, this is the time interval + * (in nanoseconds) for each increment of the timer load/reload count. The + * requested duration of a timer is this value multiplied by the timer + * load/reload count. This field is only meaningful if the bug35388 workaround + * is enabled. + */ +#define MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_BUG35388_TMR_NS_PER_COUNT_OFST 24 +/* For timers updated using the bug35388 workaround, this is the maximum value + * allowed for timer load/reload counts. This field is only meaningful if the + * bug35388 workaround is enabled. + */ +#define MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_BUG35388_TMR_MAX_COUNT_OFST 28 +/* For timers updated using the bug35388 workaround, timer load/reload counts + * not a multiple of this step size will be rounded in an implementation + * defined manner. This field is only meaningful if the bug35388 workaround is + * enabled. + */ +#define MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_BUG35388_TMR_STEP_OFST 32 #endif /* MCDI_PCOL_H */ From 7014d7f672f7dc180d1386239c4f0c455e405ed9 Mon Sep 17 00:00:00 2001 From: Bert Kenward Date: Thu, 11 Aug 2016 13:01:21 +0100 Subject: [PATCH 0145/1715] sfc: allow asynchronous MCDI without completion function Signed-off-by: Bert Kenward Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/mcdi.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c index d28e7dd8fa3c..9fbc12a8f80c 100644 --- a/drivers/net/ethernet/sfc/mcdi.c +++ b/drivers/net/ethernet/sfc/mcdi.c @@ -548,7 +548,10 @@ static bool efx_mcdi_complete_async(struct efx_mcdi_iface *mcdi, bool timeout) efx_mcdi_display_error(efx, async->cmd, async->inlen, errbuf, err_len, rc); } - async->complete(efx, async->cookie, rc, outbuf, data_len); + + if (async->complete) + async->complete(efx, async->cookie, rc, outbuf, + min(async->outlen, data_len)); kfree(async); efx_mcdi_release(mcdi); From ca889a052c20cad171ff5161c030959e47e84666 Mon Sep 17 00:00:00 2001 From: Bert Kenward Date: Thu, 11 Aug 2016 13:01:35 +0100 Subject: [PATCH 0146/1715] sfc: retrieve second word of datapath capabilities Signed-off-by: Bert Kenward Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef10.c | 10 ++++++++-- drivers/net/ethernet/sfc/nic.h | 3 +++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index f658fee74f18..fd5d086f53a5 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -177,7 +177,7 @@ static int efx_ef10_get_vf_index(struct efx_nic *efx) static int efx_ef10_init_datapath_caps(struct efx_nic *efx) { - MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_CAPABILITIES_OUT_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_CAPABILITIES_V2_OUT_LEN); struct efx_ef10_nic_data *nic_data = efx->nic_data; size_t outlen; int rc; @@ -188,7 +188,7 @@ static int efx_ef10_init_datapath_caps(struct efx_nic *efx) outbuf, sizeof(outbuf), &outlen); if (rc) return rc; - if (outlen < sizeof(outbuf)) { + if (outlen < MC_CMD_GET_CAPABILITIES_OUT_LEN) { netif_err(efx, drv, efx->net_dev, "unable to read datapath firmware capabilities\n"); return -EIO; @@ -197,6 +197,12 @@ static int efx_ef10_init_datapath_caps(struct efx_nic *efx) nic_data->datapath_caps = MCDI_DWORD(outbuf, GET_CAPABILITIES_OUT_FLAGS1); + if (outlen >= MC_CMD_GET_CAPABILITIES_V2_OUT_LEN) + nic_data->datapath_caps2 = MCDI_DWORD(outbuf, + GET_CAPABILITIES_V2_OUT_FLAGS2); + else + nic_data->datapath_caps2 = 0; + /* record the DPCPU firmware IDs to determine VEB vswitching support. */ nic_data->rx_dpcpu_fw_id = diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h index 96944c3c9d14..531c5a8a72c5 100644 --- a/drivers/net/ethernet/sfc/nic.h +++ b/drivers/net/ethernet/sfc/nic.h @@ -511,6 +511,8 @@ enum { * after MC reboot * @datapath_caps: Capabilities of datapath firmware (FLAGS1 field of * %MC_CMD_GET_CAPABILITIES response) + * @datapath_caps2: Further Capabilities of datapath firmware (FLAGS2 field of + * %MC_CMD_GET_CAPABILITIES response) * @rx_dpcpu_fw_id: Firmware ID of the RxDPCPU * @tx_dpcpu_fw_id: Firmware ID of the TxDPCPU * @vport_id: The function's vport ID, only relevant for PFs @@ -542,6 +544,7 @@ struct efx_ef10_nic_data { bool workaround_26807; bool must_check_datapath_caps; u32 datapath_caps; + u32 datapath_caps2; unsigned int rx_dpcpu_fw_id; unsigned int tx_dpcpu_fw_id; unsigned int vport_id; From a995560a279628bcc00a0524ab068c39a3a184fb Mon Sep 17 00:00:00 2001 From: Bert Kenward Date: Thu, 11 Aug 2016 13:01:54 +0100 Subject: [PATCH 0147/1715] sfc: use new performance based event queue init Rather than explicitly specifying flags we can now specify a desired performance target to the firmware, ie higher throughput or lower latency. For now we use the default "auto" configuration. Signed-off-by: Bert Kenward Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef10.c | 43 ++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index fd5d086f53a5..e57fa2e8e7c8 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -2541,13 +2541,12 @@ fail: static int efx_ef10_ev_init(struct efx_channel *channel) { MCDI_DECLARE_BUF(inbuf, - MC_CMD_INIT_EVQ_IN_LEN(EFX_MAX_EVQ_SIZE * 8 / - EFX_BUF_SIZE)); - MCDI_DECLARE_BUF(outbuf, MC_CMD_INIT_EVQ_OUT_LEN); + MC_CMD_INIT_EVQ_V2_IN_LEN(EFX_MAX_EVQ_SIZE * 8 / + EFX_BUF_SIZE)); + MCDI_DECLARE_BUF(outbuf, MC_CMD_INIT_EVQ_V2_OUT_LEN); size_t entries = channel->eventq.buf.len / EFX_BUF_SIZE; struct efx_nic *efx = channel->efx; struct efx_ef10_nic_data *nic_data; - bool supports_rx_merge; size_t inlen, outlen; unsigned int enabled, implemented; dma_addr_t dma_addr; @@ -2555,9 +2554,6 @@ static int efx_ef10_ev_init(struct efx_channel *channel) int i; nic_data = efx->nic_data; - supports_rx_merge = - !!(nic_data->datapath_caps & - 1 << MC_CMD_GET_CAPABILITIES_OUT_RX_BATCHING_LBN); /* Fill event queue with all ones (i.e. empty events) */ memset(channel->eventq.buf.addr, 0xff, channel->eventq.buf.len); @@ -2566,11 +2562,6 @@ static int efx_ef10_ev_init(struct efx_channel *channel) MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_INSTANCE, channel->channel); /* INIT_EVQ expects index in vector table, not absolute */ MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_IRQ_NUM, channel->channel); - MCDI_POPULATE_DWORD_4(inbuf, INIT_EVQ_IN_FLAGS, - INIT_EVQ_IN_FLAG_INTERRUPTING, 1, - INIT_EVQ_IN_FLAG_RX_MERGE, 1, - INIT_EVQ_IN_FLAG_TX_MERGE, 1, - INIT_EVQ_IN_FLAG_CUT_THRU, !supports_rx_merge); MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_TMR_MODE, MC_CMD_INIT_EVQ_IN_TMR_MODE_DIS); MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_TMR_LOAD, 0); @@ -2579,6 +2570,27 @@ static int efx_ef10_ev_init(struct efx_channel *channel) MC_CMD_INIT_EVQ_IN_COUNT_MODE_DIS); MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_COUNT_THRSHLD, 0); + if (nic_data->datapath_caps2 & + 1 << MC_CMD_GET_CAPABILITIES_V2_OUT_INIT_EVQ_V2_LBN) { + /* Use the new generic approach to specifying event queue + * configuration, requesting lower latency or higher throughput. + * The options that actually get used appear in the output. + */ + MCDI_POPULATE_DWORD_2(inbuf, INIT_EVQ_V2_IN_FLAGS, + INIT_EVQ_V2_IN_FLAG_INTERRUPTING, 1, + INIT_EVQ_V2_IN_FLAG_TYPE, + MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_AUTO); + } else { + bool cut_thru = !(nic_data->datapath_caps & + 1 << MC_CMD_GET_CAPABILITIES_OUT_RX_BATCHING_LBN); + + MCDI_POPULATE_DWORD_4(inbuf, INIT_EVQ_IN_FLAGS, + INIT_EVQ_IN_FLAG_INTERRUPTING, 1, + INIT_EVQ_IN_FLAG_RX_MERGE, 1, + INIT_EVQ_IN_FLAG_TX_MERGE, 1, + INIT_EVQ_IN_FLAG_CUT_THRU, cut_thru); + } + dma_addr = channel->eventq.buf.dma_addr; for (i = 0; i < entries; ++i) { MCDI_SET_ARRAY_QWORD(inbuf, INIT_EVQ_IN_DMA_ADDR, i, dma_addr); @@ -2589,6 +2601,13 @@ static int efx_ef10_ev_init(struct efx_channel *channel) rc = efx_mcdi_rpc(efx, MC_CMD_INIT_EVQ, inbuf, inlen, outbuf, sizeof(outbuf), &outlen); + + if (outlen >= MC_CMD_INIT_EVQ_V2_OUT_LEN) + netif_dbg(efx, drv, efx->net_dev, + "Channel %d using event queue flags %08x\n", + channel->channel, + MCDI_DWORD(outbuf, INIT_EVQ_V2_OUT_FLAGS)); + /* IRQ return is ignored */ if (channel->channel || rc) return rc; From 539de7c5240a257c9028b3063873170a6867b159 Mon Sep 17 00:00:00 2001 From: Bert Kenward Date: Thu, 11 Aug 2016 13:02:09 +0100 Subject: [PATCH 0148/1715] sfc: set interrupt moderation via MCDI SFN8000-series NICs require a new method of setting interrupt moderation, via MCDI. This is indicated by a workaround flag. This new MCDI command takes an explicit time value rather than a number of ticks. It therefore makes sense to also store the moderation values in terms of time, since that is what the ethtool interface is interested in. Signed-off-by: Bert Kenward Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef10.c | 38 ++++++++--- drivers/net/ethernet/sfc/efx.c | 89 +++++++++++++------------- drivers/net/ethernet/sfc/efx.h | 2 + drivers/net/ethernet/sfc/falcon.c | 7 +- drivers/net/ethernet/sfc/net_driver.h | 10 +-- drivers/net/ethernet/sfc/nic.h | 2 + drivers/net/ethernet/sfc/ptp.c | 2 +- drivers/net/ethernet/sfc/siena.c | 11 +++- drivers/net/ethernet/sfc/workarounds.h | 4 ++ 9 files changed, 103 insertions(+), 62 deletions(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index e57fa2e8e7c8..5f6c4fa450c5 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -1749,27 +1749,43 @@ static size_t efx_ef10_update_stats_vf(struct efx_nic *efx, u64 *full_stats, static void efx_ef10_push_irq_moderation(struct efx_channel *channel) { struct efx_nic *efx = channel->efx; - unsigned int mode, value; + unsigned int mode, usecs; efx_dword_t timer_cmd; - if (channel->irq_moderation) { + if (channel->irq_moderation_us) { mode = 3; - value = channel->irq_moderation - 1; + usecs = channel->irq_moderation_us; } else { mode = 0; - value = 0; + usecs = 0; } - if (EFX_EF10_WORKAROUND_35388(efx)) { + if (EFX_EF10_WORKAROUND_61265(efx)) { + MCDI_DECLARE_BUF(inbuf, MC_CMD_SET_EVQ_TMR_IN_LEN); + unsigned int ns = usecs * 1000; + + MCDI_SET_DWORD(inbuf, SET_EVQ_TMR_IN_INSTANCE, + channel->channel); + MCDI_SET_DWORD(inbuf, SET_EVQ_TMR_IN_TMR_LOAD_REQ_NS, ns); + MCDI_SET_DWORD(inbuf, SET_EVQ_TMR_IN_TMR_RELOAD_REQ_NS, ns); + MCDI_SET_DWORD(inbuf, SET_EVQ_TMR_IN_TMR_MODE, mode); + + efx_mcdi_rpc_async(efx, MC_CMD_SET_EVQ_TMR, + inbuf, sizeof(inbuf), 0, NULL, 0); + } else if (EFX_EF10_WORKAROUND_35388(efx)) { + unsigned int ticks = efx_usecs_to_ticks(efx, usecs); + EFX_POPULATE_DWORD_3(timer_cmd, ERF_DD_EVQ_IND_TIMER_FLAGS, EFE_DD_EVQ_IND_TIMER_FLAGS, ERF_DD_EVQ_IND_TIMER_MODE, mode, - ERF_DD_EVQ_IND_TIMER_VAL, value); + ERF_DD_EVQ_IND_TIMER_VAL, ticks); efx_writed_page(efx, &timer_cmd, ER_DD_EVQ_INDIRECT, channel->channel); } else { + unsigned int ticks = efx_usecs_to_ticks(efx, usecs); + EFX_POPULATE_DWORD_2(timer_cmd, ERF_DZ_TC_TIMER_MODE, mode, - ERF_DZ_TC_TIMER_VAL, value); + ERF_DZ_TC_TIMER_VAL, ticks); efx_writed_page(efx, &timer_cmd, ER_DZ_EVQ_TMR, channel->channel); } @@ -2615,10 +2631,11 @@ static int efx_ef10_ev_init(struct efx_channel *channel) /* Successfully created event queue on channel 0 */ rc = efx_mcdi_get_workarounds(efx, &implemented, &enabled); if (rc == -ENOSYS) { - /* GET_WORKAROUNDS was implemented before the bug26807 - * workaround, thus the latter must be unavailable in this fw + /* GET_WORKAROUNDS was implemented before these workarounds, + * thus they must be unavailable in this firmware. */ nic_data->workaround_26807 = false; + nic_data->workaround_61265 = false; rc = 0; } else if (rc) { goto fail; @@ -2658,6 +2675,9 @@ static int efx_ef10_ev_init(struct efx_channel *channel) rc = 0; } } + + nic_data->workaround_61265 = + !!(implemented & MC_CMD_GET_WORKAROUNDS_OUT_BUG61265); } if (!rc) diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 14b821b1c880..b26e1f929dc4 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -281,6 +281,27 @@ static int efx_process_channel(struct efx_channel *channel, int budget) * NAPI guarantees serialisation of polls of the same device, which * provides the guarantee required by efx_process_channel(). */ +static void efx_update_irq_mod(struct efx_nic *efx, struct efx_channel *channel) +{ + int step = efx->irq_mod_step_us; + + if (channel->irq_mod_score < irq_adapt_low_thresh) { + if (channel->irq_moderation_us > step) { + channel->irq_moderation_us -= step; + efx->type->push_irq_moderation(channel); + } + } else if (channel->irq_mod_score > irq_adapt_high_thresh) { + if (channel->irq_moderation_us < + efx->irq_rx_moderation_us) { + channel->irq_moderation_us += step; + efx->type->push_irq_moderation(channel); + } + } + + channel->irq_count = 0; + channel->irq_mod_score = 0; +} + static int efx_poll(struct napi_struct *napi, int budget) { struct efx_channel *channel = @@ -301,22 +322,7 @@ static int efx_poll(struct napi_struct *napi, int budget) if (efx_channel_has_rx_queue(channel) && efx->irq_rx_adaptive && unlikely(++channel->irq_count == 1000)) { - if (unlikely(channel->irq_mod_score < - irq_adapt_low_thresh)) { - if (channel->irq_moderation > 1) { - channel->irq_moderation -= 1; - efx->type->push_irq_moderation(channel); - } - } else if (unlikely(channel->irq_mod_score > - irq_adapt_high_thresh)) { - if (channel->irq_moderation < - efx->irq_rx_moderation) { - channel->irq_moderation += 1; - efx->type->push_irq_moderation(channel); - } - } - channel->irq_count = 0; - channel->irq_mod_score = 0; + efx_update_irq_mod(efx, channel); } efx_filter_rfs_expire(channel); @@ -1703,6 +1709,7 @@ static int efx_probe_nic(struct efx_nic *efx) netif_set_real_num_rx_queues(efx->net_dev, efx->n_rx_channels); /* Initialise the interrupt moderation settings */ + efx->irq_mod_step_us = DIV_ROUND_UP(efx->timer_quantum_ns, 1000); efx_init_irq_moderation(efx, tx_irq_mod_usec, rx_irq_mod_usec, true, true); @@ -1949,14 +1956,21 @@ static void efx_remove_all(struct efx_nic *efx) * Interrupt moderation * **************************************************************************/ - -static unsigned int irq_mod_ticks(unsigned int usecs, unsigned int quantum_ns) +unsigned int efx_usecs_to_ticks(struct efx_nic *efx, unsigned int usecs) { if (usecs == 0) return 0; - if (usecs * 1000 < quantum_ns) + if (usecs * 1000 < efx->timer_quantum_ns) return 1; /* never round down to 0 */ - return usecs * 1000 / quantum_ns; + return usecs * 1000 / efx->timer_quantum_ns; +} + +unsigned int efx_ticks_to_usecs(struct efx_nic *efx, unsigned int ticks) +{ + /* We must round up when converting ticks to microseconds + * because we round down when converting the other way. + */ + return DIV_ROUND_UP(ticks * efx->timer_quantum_ns, 1000); } /* Set interrupt moderation parameters */ @@ -1968,18 +1982,12 @@ int efx_init_irq_moderation(struct efx_nic *efx, unsigned int tx_usecs, unsigned int irq_mod_max = DIV_ROUND_UP(efx->type->timer_period_max * efx->timer_quantum_ns, 1000); - unsigned int tx_ticks; - unsigned int rx_ticks; - EFX_ASSERT_RESET_SERIALISED(efx); if (tx_usecs > irq_mod_max || rx_usecs > irq_mod_max) return -EINVAL; - tx_ticks = irq_mod_ticks(tx_usecs, efx->timer_quantum_ns); - rx_ticks = irq_mod_ticks(rx_usecs, efx->timer_quantum_ns); - - if (tx_ticks != rx_ticks && efx->tx_channel_offset == 0 && + if (tx_usecs != rx_usecs && efx->tx_channel_offset == 0 && !rx_may_override_tx) { netif_err(efx, drv, efx->net_dev, "Channels are shared. " "RX and TX IRQ moderation must be equal\n"); @@ -1987,12 +1995,12 @@ int efx_init_irq_moderation(struct efx_nic *efx, unsigned int tx_usecs, } efx->irq_rx_adaptive = rx_adaptive; - efx->irq_rx_moderation = rx_ticks; + efx->irq_rx_moderation_us = rx_usecs; efx_for_each_channel(channel, efx) { if (efx_channel_has_rx_queue(channel)) - channel->irq_moderation = rx_ticks; + channel->irq_moderation_us = rx_usecs; else if (efx_channel_has_tx_queues(channel)) - channel->irq_moderation = tx_ticks; + channel->irq_moderation_us = tx_usecs; } return 0; @@ -2001,26 +2009,21 @@ int efx_init_irq_moderation(struct efx_nic *efx, unsigned int tx_usecs, void efx_get_irq_moderation(struct efx_nic *efx, unsigned int *tx_usecs, unsigned int *rx_usecs, bool *rx_adaptive) { - /* We must round up when converting ticks to microseconds - * because we round down when converting the other way. - */ - *rx_adaptive = efx->irq_rx_adaptive; - *rx_usecs = DIV_ROUND_UP(efx->irq_rx_moderation * - efx->timer_quantum_ns, - 1000); + *rx_usecs = efx->irq_rx_moderation_us; /* If channels are shared between RX and TX, so is IRQ * moderation. Otherwise, IRQ moderation is the same for all * TX channels and is not adaptive. */ - if (efx->tx_channel_offset == 0) + if (efx->tx_channel_offset == 0) { *tx_usecs = *rx_usecs; - else - *tx_usecs = DIV_ROUND_UP( - efx->channel[efx->tx_channel_offset]->irq_moderation * - efx->timer_quantum_ns, - 1000); + } else { + struct efx_channel *tx_channel; + + tx_channel = efx->channel[efx->tx_channel_offset]; + *tx_usecs = tx_channel->irq_moderation_us; + } } /************************************************************************** diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h index c3ae739e9c7a..342ae16e1f2d 100644 --- a/drivers/net/ethernet/sfc/efx.h +++ b/drivers/net/ethernet/sfc/efx.h @@ -204,6 +204,8 @@ int efx_try_recovery(struct efx_nic *efx); /* Global */ void efx_schedule_reset(struct efx_nic *efx, enum reset_type type); +unsigned int efx_usecs_to_ticks(struct efx_nic *efx, unsigned int usecs); +unsigned int efx_ticks_to_usecs(struct efx_nic *efx, unsigned int ticks); int efx_init_irq_moderation(struct efx_nic *efx, unsigned int tx_usecs, unsigned int rx_usecs, bool rx_adaptive, bool rx_may_override_tx); diff --git a/drivers/net/ethernet/sfc/falcon.c b/drivers/net/ethernet/sfc/falcon.c index d790cb8d9db3..d3d0bccaccaa 100644 --- a/drivers/net/ethernet/sfc/falcon.c +++ b/drivers/net/ethernet/sfc/falcon.c @@ -378,12 +378,15 @@ static void falcon_push_irq_moderation(struct efx_channel *channel) struct efx_nic *efx = channel->efx; /* Set timer register */ - if (channel->irq_moderation) { + if (channel->irq_moderation_us) { + unsigned int ticks; + + ticks = efx_usecs_to_ticks(efx, channel->irq_moderation_us); EFX_POPULATE_DWORD_2(timer_cmd, FRF_AB_TC_TIMER_MODE, FFE_BB_TIMER_MODE_INT_HLDOFF, FRF_AB_TC_TIMER_VAL, - channel->irq_moderation - 1); + ticks - 1); } else { EFX_POPULATE_DWORD_2(timer_cmd, FRF_AB_TC_TIMER_MODE, diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 9ff062a36ea8..3e8c6fb0a287 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -392,7 +392,7 @@ enum efx_sync_events_state { * @eventq_init: Event queue initialised flag * @enabled: Channel enabled indicator * @irq: IRQ number (MSI and MSI-X only) - * @irq_moderation: IRQ moderation value (in hardware ticks) + * @irq_moderation_us: IRQ moderation value (in microseconds) * @napi_dev: Net device used with NAPI * @napi_str: NAPI control structure * @state: state for NAPI vs busy polling @@ -433,7 +433,7 @@ struct efx_channel { bool eventq_init; bool enabled; int irq; - unsigned int irq_moderation; + unsigned int irq_moderation_us; struct net_device *napi_dev; struct napi_struct napi_str; #ifdef CONFIG_NET_RX_BUSY_POLL @@ -811,7 +811,8 @@ struct vfdi_status; * @interrupt_mode: Interrupt mode * @timer_quantum_ns: Interrupt timer quantum, in nanoseconds * @irq_rx_adaptive: Adaptive IRQ moderation enabled for RX event queues - * @irq_rx_moderation: IRQ moderation time for RX event queues + * @irq_rx_mod_step_us: Step size for IRQ moderation for RX event queues + * @irq_rx_moderation_us: IRQ moderation time for RX event queues * @msg_enable: Log message enable flags * @state: Device state number (%STATE_*). Serialised by the rtnl_lock. * @reset_pending: Bitmask for pending resets @@ -941,7 +942,8 @@ struct efx_nic { enum efx_int_mode interrupt_mode; unsigned int timer_quantum_ns; bool irq_rx_adaptive; - unsigned int irq_rx_moderation; + unsigned int irq_mod_step_us; + unsigned int irq_rx_moderation_us; u32 msg_enable; enum nic_state state; diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h index 531c5a8a72c5..d8b1694638cd 100644 --- a/drivers/net/ethernet/sfc/nic.h +++ b/drivers/net/ethernet/sfc/nic.h @@ -507,6 +507,7 @@ enum { * @stats: Hardware statistics * @workaround_35388: Flag: firmware supports workaround for bug 35388 * @workaround_26807: Flag: firmware supports workaround for bug 26807 + * @workaround_61265: Flag: firmware supports workaround for bug 61265 * @must_check_datapath_caps: Flag: @datapath_caps needs to be revalidated * after MC reboot * @datapath_caps: Capabilities of datapath firmware (FLAGS1 field of @@ -542,6 +543,7 @@ struct efx_ef10_nic_data { u64 stats[EF10_STAT_COUNT]; bool workaround_35388; bool workaround_26807; + bool workaround_61265; bool must_check_datapath_caps; u32 datapath_caps; u32 datapath_caps2; diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c index c771e0af4e06..dd204d9704c6 100644 --- a/drivers/net/ethernet/sfc/ptp.c +++ b/drivers/net/ethernet/sfc/ptp.c @@ -1306,7 +1306,7 @@ static int efx_ptp_probe_channel(struct efx_channel *channel) { struct efx_nic *efx = channel->efx; - channel->irq_moderation = 0; + channel->irq_moderation_us = 0; channel->rx_queue.core_index = 0; return efx_ptp_probe(efx, channel); diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c index 2219b5424d2b..994e33f93caf 100644 --- a/drivers/net/ethernet/sfc/siena.c +++ b/drivers/net/ethernet/sfc/siena.c @@ -34,19 +34,24 @@ static void siena_init_wol(struct efx_nic *efx); static void siena_push_irq_moderation(struct efx_channel *channel) { + struct efx_nic *efx = channel->efx; efx_dword_t timer_cmd; - if (channel->irq_moderation) + if (channel->irq_moderation_us) { + unsigned int ticks; + + ticks = efx_usecs_to_ticks(efx, channel->irq_moderation_us); EFX_POPULATE_DWORD_2(timer_cmd, FRF_CZ_TC_TIMER_MODE, FFE_CZ_TIMER_MODE_INT_HLDOFF, FRF_CZ_TC_TIMER_VAL, - channel->irq_moderation - 1); - else + ticks - 1); + } else { EFX_POPULATE_DWORD_2(timer_cmd, FRF_CZ_TC_TIMER_MODE, FFE_CZ_TIMER_MODE_DIS, FRF_CZ_TC_TIMER_VAL, 0); + } efx_writed_page_locked(channel->efx, &timer_cmd, FR_BZ_TIMER_COMMAND_P0, channel->channel); } diff --git a/drivers/net/ethernet/sfc/workarounds.h b/drivers/net/ethernet/sfc/workarounds.h index 2310b75d4ec2..351cd14cb9f9 100644 --- a/drivers/net/ethernet/sfc/workarounds.h +++ b/drivers/net/ethernet/sfc/workarounds.h @@ -50,4 +50,8 @@ #define EFX_WORKAROUND_35388(efx) \ (efx_nic_rev(efx) == EFX_REV_HUNT_A0 && EFX_EF10_WORKAROUND_35388(efx)) +/* Moderation timer access must go through MCDI */ +#define EFX_EF10_WORKAROUND_61265(efx) \ + (((struct efx_ef10_nic_data *)efx->nic_data)->workaround_61265) + #endif /* EFX_WORKAROUNDS_H */ From d95e329a55baaea8c00d1b3e0ce22c974d447524 Mon Sep 17 00:00:00 2001 From: Bert Kenward Date: Thu, 11 Aug 2016 13:02:36 +0100 Subject: [PATCH 0149/1715] sfc: get timer configuration from adapter On SFN8000 series adapters the MC provides a method to get the timer quantum and the maximum timer setting. We revert to the old values if the new call is unavailable. Signed-off-by: Bert Kenward Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef10.c | 142 ++++++++++++++++++++------ drivers/net/ethernet/sfc/efx.c | 9 +- drivers/net/ethernet/sfc/falcon.c | 2 + drivers/net/ethernet/sfc/net_driver.h | 2 + drivers/net/ethernet/sfc/siena.c | 3 + 5 files changed, 125 insertions(+), 33 deletions(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 5f6c4fa450c5..b8c9f1884a15 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -233,6 +233,116 @@ static int efx_ef10_get_sysclk_freq(struct efx_nic *efx) return rc > 0 ? rc : -ERANGE; } +static int efx_ef10_get_timer_workarounds(struct efx_nic *efx) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + unsigned int implemented; + unsigned int enabled; + int rc; + + nic_data->workaround_35388 = false; + nic_data->workaround_61265 = false; + + rc = efx_mcdi_get_workarounds(efx, &implemented, &enabled); + + if (rc == -ENOSYS) { + /* Firmware without GET_WORKAROUNDS - not a problem. */ + rc = 0; + } else if (rc == 0) { + /* Bug61265 workaround is always enabled if implemented. */ + if (enabled & MC_CMD_GET_WORKAROUNDS_OUT_BUG61265) + nic_data->workaround_61265 = true; + + if (enabled & MC_CMD_GET_WORKAROUNDS_OUT_BUG35388) { + nic_data->workaround_35388 = true; + } else if (implemented & MC_CMD_GET_WORKAROUNDS_OUT_BUG35388) { + /* Workaround is implemented but not enabled. + * Try to enable it. + */ + rc = efx_mcdi_set_workaround(efx, + MC_CMD_WORKAROUND_BUG35388, + true, NULL); + if (rc == 0) + nic_data->workaround_35388 = true; + /* If we failed to set the workaround just carry on. */ + rc = 0; + } + } + + netif_dbg(efx, probe, efx->net_dev, + "workaround for bug 35388 is %sabled\n", + nic_data->workaround_35388 ? "en" : "dis"); + netif_dbg(efx, probe, efx->net_dev, + "workaround for bug 61265 is %sabled\n", + nic_data->workaround_61265 ? "en" : "dis"); + + return rc; +} + +static void efx_ef10_process_timer_config(struct efx_nic *efx, + const efx_dword_t *data) +{ + unsigned int max_count; + + if (EFX_EF10_WORKAROUND_61265(efx)) { + efx->timer_quantum_ns = MCDI_DWORD(data, + GET_EVQ_TMR_PROPERTIES_OUT_MCDI_TMR_STEP_NS); + efx->timer_max_ns = MCDI_DWORD(data, + GET_EVQ_TMR_PROPERTIES_OUT_MCDI_TMR_MAX_NS); + } else if (EFX_EF10_WORKAROUND_35388(efx)) { + efx->timer_quantum_ns = MCDI_DWORD(data, + GET_EVQ_TMR_PROPERTIES_OUT_BUG35388_TMR_NS_PER_COUNT); + max_count = MCDI_DWORD(data, + GET_EVQ_TMR_PROPERTIES_OUT_BUG35388_TMR_MAX_COUNT); + efx->timer_max_ns = max_count * efx->timer_quantum_ns; + } else { + efx->timer_quantum_ns = MCDI_DWORD(data, + GET_EVQ_TMR_PROPERTIES_OUT_TMR_REG_NS_PER_COUNT); + max_count = MCDI_DWORD(data, + GET_EVQ_TMR_PROPERTIES_OUT_TMR_REG_MAX_COUNT); + efx->timer_max_ns = max_count * efx->timer_quantum_ns; + } + + netif_dbg(efx, probe, efx->net_dev, + "got timer properties from MC: quantum %u ns; max %u ns\n", + efx->timer_quantum_ns, efx->timer_max_ns); +} + +static int efx_ef10_get_timer_config(struct efx_nic *efx) +{ + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_LEN); + int rc; + + rc = efx_ef10_get_timer_workarounds(efx); + if (rc) + return rc; + + rc = efx_mcdi_rpc_quiet(efx, MC_CMD_GET_EVQ_TMR_PROPERTIES, NULL, 0, + outbuf, sizeof(outbuf), NULL); + + if (rc == 0) { + efx_ef10_process_timer_config(efx, outbuf); + } else if (rc == -ENOSYS || rc == -EPERM) { + /* Not available - fall back to Huntington defaults. */ + unsigned int quantum; + + rc = efx_ef10_get_sysclk_freq(efx); + if (rc < 0) + return rc; + + quantum = 1536000 / rc; /* 1536 cycles */ + efx->timer_quantum_ns = quantum; + efx->timer_max_ns = efx->type->timer_period_max * quantum; + rc = 0; + } else { + efx_mcdi_display_error(efx, MC_CMD_GET_EVQ_TMR_PROPERTIES, + MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_LEN, + NULL, 0, rc); + } + + return rc; +} + static int efx_ef10_get_mac_address_pf(struct efx_nic *efx, u8 *mac_address) { MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_MAC_ADDRESSES_OUT_LEN); @@ -533,33 +643,11 @@ static int efx_ef10_probe(struct efx_nic *efx) if (rc) goto fail5; - rc = efx_ef10_get_sysclk_freq(efx); + rc = efx_ef10_get_timer_config(efx); if (rc < 0) goto fail5; efx->timer_quantum_ns = 1536000 / rc; /* 1536 cycles */ - /* Check whether firmware supports bug 35388 workaround. - * First try to enable it, then if we get EPERM, just - * ask if it's already enabled - */ - rc = efx_mcdi_set_workaround(efx, MC_CMD_WORKAROUND_BUG35388, true, NULL); - if (rc == 0) { - nic_data->workaround_35388 = true; - } else if (rc == -EPERM) { - unsigned int enabled; - - rc = efx_mcdi_get_workarounds(efx, NULL, &enabled); - if (rc) - goto fail3; - nic_data->workaround_35388 = enabled & - MC_CMD_GET_WORKAROUNDS_OUT_BUG35388; - } else if (rc != -ENOSYS && rc != -ENOENT) { - goto fail5; - } - netif_dbg(efx, probe, efx->net_dev, - "workaround for bug 35388 is %sabled\n", - nic_data->workaround_35388 ? "en" : "dis"); - rc = efx_mcdi_mon_probe(efx); if (rc && rc != -EPERM) goto fail5; @@ -2631,11 +2719,10 @@ static int efx_ef10_ev_init(struct efx_channel *channel) /* Successfully created event queue on channel 0 */ rc = efx_mcdi_get_workarounds(efx, &implemented, &enabled); if (rc == -ENOSYS) { - /* GET_WORKAROUNDS was implemented before these workarounds, - * thus they must be unavailable in this firmware. + /* GET_WORKAROUNDS was implemented before this workaround, + * thus it must be unavailable in this firmware. */ nic_data->workaround_26807 = false; - nic_data->workaround_61265 = false; rc = 0; } else if (rc) { goto fail; @@ -2675,9 +2762,6 @@ static int efx_ef10_ev_init(struct efx_channel *channel) rc = 0; } } - - nic_data->workaround_61265 = - !!(implemented & MC_CMD_GET_WORKAROUNDS_OUT_BUG61265); } if (!rc) diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index b26e1f929dc4..f3826ae28bac 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -1979,12 +1979,13 @@ int efx_init_irq_moderation(struct efx_nic *efx, unsigned int tx_usecs, bool rx_may_override_tx) { struct efx_channel *channel; - unsigned int irq_mod_max = DIV_ROUND_UP(efx->type->timer_period_max * - efx->timer_quantum_ns, - 1000); + unsigned int timer_max_us; + EFX_ASSERT_RESET_SERIALISED(efx); - if (tx_usecs > irq_mod_max || rx_usecs > irq_mod_max) + timer_max_us = efx->timer_max_ns / 1000; + + if (tx_usecs > timer_max_us || rx_usecs > timer_max_us) return -EINVAL; if (tx_usecs != rx_usecs && efx->tx_channel_offset == 0 && diff --git a/drivers/net/ethernet/sfc/falcon.c b/drivers/net/ethernet/sfc/falcon.c index d3d0bccaccaa..1a7092602aec 100644 --- a/drivers/net/ethernet/sfc/falcon.c +++ b/drivers/net/ethernet/sfc/falcon.c @@ -2376,6 +2376,8 @@ static int falcon_probe_nic(struct efx_nic *efx) EFX_MAX_CHANNELS); efx->max_tx_channels = efx->max_channels; efx->timer_quantum_ns = 4968; /* 621 cycles */ + efx->timer_max_ns = efx->type->timer_period_max * + efx->timer_quantum_ns; /* Initialise I2C adapter */ board = falcon_board(efx); diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 3e8c6fb0a287..13b7f52e6724 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -810,6 +810,7 @@ struct vfdi_status; * @membase: Memory BAR value * @interrupt_mode: Interrupt mode * @timer_quantum_ns: Interrupt timer quantum, in nanoseconds + * @timer_max_ns: Interrupt timer maximum value, in nanoseconds * @irq_rx_adaptive: Adaptive IRQ moderation enabled for RX event queues * @irq_rx_mod_step_us: Step size for IRQ moderation for RX event queues * @irq_rx_moderation_us: IRQ moderation time for RX event queues @@ -941,6 +942,7 @@ struct efx_nic { enum efx_int_mode interrupt_mode; unsigned int timer_quantum_ns; + unsigned int timer_max_ns; bool irq_rx_adaptive; unsigned int irq_mod_step_us; unsigned int irq_rx_moderation_us; diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c index 994e33f93caf..04ed1b4c7cd9 100644 --- a/drivers/net/ethernet/sfc/siena.c +++ b/drivers/net/ethernet/sfc/siena.c @@ -227,6 +227,9 @@ static int siena_probe_nvconfig(struct efx_nic *efx) efx->timer_quantum_ns = (caps & (1 << MC_CMD_CAPABILITIES_TURBO_ACTIVE_LBN)) ? 3072 : 6144; /* 768 cycles */ + efx->timer_max_ns = efx->type->timer_period_max * + efx->timer_quantum_ns; + return rc; } From aed704b7a634954dc28fe5c4b49db478cf2d96b7 Mon Sep 17 00:00:00 2001 From: Sargun Dhillon Date: Fri, 12 Aug 2016 08:56:40 -0700 Subject: [PATCH 0150/1715] cgroup: Add task_under_cgroup_hierarchy cgroup inline function to headers This commit adds an inline function to cgroup.h to check whether a given task is under a given cgroup hierarchy. This is to avoid having to put ifdefs in .c files to gate access to cgroups. When cgroups are disabled this always returns true. Signed-off-by: Sargun Dhillon Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Tejun Heo Acked-by: Tejun Heo Signed-off-by: David S. Miller --- include/linux/cgroup.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 984f73b719a9..a4414a11eea7 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -497,6 +497,23 @@ static inline bool cgroup_is_descendant(struct cgroup *cgrp, return cgrp->ancestor_ids[ancestor->level] == ancestor->id; } +/** + * task_under_cgroup_hierarchy - test task's membership of cgroup ancestry + * @task: the task to be tested + * @ancestor: possible ancestor of @task's cgroup + * + * Tests whether @task's default cgroup hierarchy is a descendant of @ancestor. + * It follows all the same rules as cgroup_is_descendant, and only applies + * to the default hierarchy. + */ +static inline bool task_under_cgroup_hierarchy(struct task_struct *task, + struct cgroup *ancestor) +{ + struct css_set *cset = task_css_set(task); + + return cgroup_is_descendant(cset->dfl_cgrp, ancestor); +} + /* no synchronization, the result can only be used as a hint */ static inline bool cgroup_is_populated(struct cgroup *cgrp) { @@ -557,6 +574,7 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp) #else /* !CONFIG_CGROUPS */ struct cgroup_subsys_state; +struct cgroup; static inline void css_put(struct cgroup_subsys_state *css) {} static inline int cgroup_attach_task_all(struct task_struct *from, @@ -574,6 +592,11 @@ static inline void cgroup_free(struct task_struct *p) {} static inline int cgroup_init_early(void) { return 0; } static inline int cgroup_init(void) { return 0; } +static inline bool task_under_cgroup_hierarchy(struct task_struct *task, + struct cgroup *ancestor) +{ + return true; +} #endif /* !CONFIG_CGROUPS */ /* From 60d20f9195b260bdf0ac10c275ae9f6016f9c069 Mon Sep 17 00:00:00 2001 From: Sargun Dhillon Date: Fri, 12 Aug 2016 08:56:52 -0700 Subject: [PATCH 0151/1715] bpf: Add bpf_current_task_under_cgroup helper This adds a bpf helper that's similar to the skb_in_cgroup helper to check whether the probe is currently executing in the context of a specific subset of the cgroupsv2 hierarchy. It does this based on membership test for a cgroup arraymap. It is invalid to call this in an interrupt, and it'll return an error. The helper is primarily to be used in debugging activities for containers, where you may have multiple programs running in a given top-level "container". Signed-off-by: Sargun Dhillon Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Tejun Heo Acked-by: Tejun Heo Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 11 +++++++++++ kernel/bpf/arraymap.c | 2 +- kernel/bpf/verifier.c | 4 +++- kernel/trace/bpf_trace.c | 30 ++++++++++++++++++++++++++++++ 4 files changed, 45 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index da218fec6056..bea0c4e2830a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -375,6 +375,17 @@ enum bpf_func_id { */ BPF_FUNC_probe_write_user, + /** + * bpf_current_task_under_cgroup(map, index) - Check cgroup2 membership of current task + * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type + * @index: index of the cgroup in the bpf_map + * Return: + * == 0 current failed the cgroup2 descendant test + * == 1 current succeeded the cgroup2 descendant test + * < 0 error + */ + BPF_FUNC_current_task_under_cgroup, + __BPF_FUNC_MAX_ID, }; diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 633a650d7aeb..a2ac051c342f 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -538,7 +538,7 @@ static int __init register_perf_event_array_map(void) } late_initcall(register_perf_event_array_map); -#ifdef CONFIG_SOCK_CGROUP_DATA +#ifdef CONFIG_CGROUPS static void *cgroup_fd_array_get_ptr(struct bpf_map *map, struct file *map_file /* not used */, int fd) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 7094c69ac199..d504722ebfa4 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1053,7 +1053,8 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id) goto error; break; case BPF_MAP_TYPE_CGROUP_ARRAY: - if (func_id != BPF_FUNC_skb_in_cgroup) + if (func_id != BPF_FUNC_skb_in_cgroup && + func_id != BPF_FUNC_current_task_under_cgroup) goto error; break; default: @@ -1075,6 +1076,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id) if (map->map_type != BPF_MAP_TYPE_STACK_TRACE) goto error; break; + case BPF_FUNC_current_task_under_cgroup: case BPF_FUNC_skb_in_cgroup: if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY) goto error; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index b20438fdb029..6b794d6669a7 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -376,6 +376,34 @@ static const struct bpf_func_proto bpf_get_current_task_proto = { .ret_type = RET_INTEGER, }; +static u64 bpf_current_task_under_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct bpf_map *map = (struct bpf_map *)(long)r1; + struct bpf_array *array = container_of(map, struct bpf_array, map); + struct cgroup *cgrp; + u32 idx = (u32)r2; + + if (unlikely(in_interrupt())) + return -EINVAL; + + if (unlikely(idx >= array->map.max_entries)) + return -E2BIG; + + cgrp = READ_ONCE(array->ptrs[idx]); + if (unlikely(!cgrp)) + return -EAGAIN; + + return task_under_cgroup_hierarchy(current, cgrp); +} + +static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = { + .func = bpf_current_task_under_cgroup, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_ANYTHING, +}; + static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id) { switch (func_id) { @@ -407,6 +435,8 @@ static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id) return &bpf_perf_event_read_proto; case BPF_FUNC_probe_write_user: return bpf_get_probe_write_proto(); + case BPF_FUNC_current_task_under_cgroup: + return &bpf_current_task_under_cgroup_proto; default: return NULL; } From 9e6e60ecbd7323d4ac3f98dcdc1bd2c527a736ef Mon Sep 17 00:00:00 2001 From: Sargun Dhillon Date: Fri, 12 Aug 2016 08:57:04 -0700 Subject: [PATCH 0152/1715] samples/bpf: Add test_current_task_under_cgroup test This test has a BPF program which writes the last known pid to call the sync syscall within a given cgroup to a map. The user mode program creates its own mount namespace, and mounts the cgroupsv2 hierarchy in there, as on all current test systems (Ubuntu 16.04, Debian), the cgroupsv2 vfs is unmounted by default. Once it does this, it proceeds to test. The test checks for positive and negative condition. It ensures that when it's part of a given cgroup, its pid is captured in the map, and that when it leaves the cgroup, this doesn't happen. It populate a cgroups arraymap prior to execution in userspace. This means that the program must be run in the same cgroups namespace as the programs that are being traced. Signed-off-by: Sargun Dhillon Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Tejun Heo Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- samples/bpf/Makefile | 5 + samples/bpf/bpf_helpers.h | 2 + .../bpf/test_current_task_under_cgroup_kern.c | 43 ++++++ .../bpf/test_current_task_under_cgroup_user.c | 145 ++++++++++++++++++ 4 files changed, 195 insertions(+) create mode 100644 samples/bpf/test_current_task_under_cgroup_kern.c create mode 100644 samples/bpf/test_current_task_under_cgroup_user.c diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 90ebf7d35c07..eb582c6264c3 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -24,6 +24,7 @@ hostprogs-y += test_overhead hostprogs-y += test_cgrp2_array_pin hostprogs-y += xdp1 hostprogs-y += xdp2 +hostprogs-y += test_current_task_under_cgroup test_verifier-objs := test_verifier.o libbpf.o test_maps-objs := test_maps.o libbpf.o @@ -49,6 +50,8 @@ test_cgrp2_array_pin-objs := libbpf.o test_cgrp2_array_pin.o xdp1-objs := bpf_load.o libbpf.o xdp1_user.o # reuse xdp1 source intentionally xdp2-objs := bpf_load.o libbpf.o xdp1_user.o +test_current_task_under_cgroup-objs := bpf_load.o libbpf.o \ + test_current_task_under_cgroup_user.o # Tell kbuild to always build the programs always := $(hostprogs-y) @@ -74,6 +77,7 @@ always += parse_varlen.o parse_simple.o parse_ldabs.o always += test_cgrp2_tc_kern.o always += xdp1_kern.o always += xdp2_kern.o +always += test_current_task_under_cgroup_kern.o HOSTCFLAGS += -I$(objtree)/usr/include @@ -97,6 +101,7 @@ HOSTLOADLIBES_map_perf_test += -lelf -lrt HOSTLOADLIBES_test_overhead += -lelf -lrt HOSTLOADLIBES_xdp1 += -lelf HOSTLOADLIBES_xdp2 += -lelf +HOSTLOADLIBES_test_current_task_under_cgroup += -lelf # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: # make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h index cbc52df165b4..5e4c41e256b8 100644 --- a/samples/bpf/bpf_helpers.h +++ b/samples/bpf/bpf_helpers.h @@ -45,6 +45,8 @@ static int (*bpf_get_stackid)(void *ctx, void *map, int flags) = (void *) BPF_FUNC_get_stackid; static int (*bpf_probe_write_user)(void *dst, void *src, int size) = (void *) BPF_FUNC_probe_write_user; +static int (*bpf_current_task_under_cgroup)(void *map, int index) = + (void *) BPF_FUNC_current_task_under_cgroup; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions diff --git a/samples/bpf/test_current_task_under_cgroup_kern.c b/samples/bpf/test_current_task_under_cgroup_kern.c new file mode 100644 index 000000000000..86b28d7d6c99 --- /dev/null +++ b/samples/bpf/test_current_task_under_cgroup_kern.c @@ -0,0 +1,43 @@ +/* Copyright (c) 2016 Sargun Dhillon + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ + +#include +#include +#include +#include "bpf_helpers.h" +#include + +struct bpf_map_def SEC("maps") cgroup_map = { + .type = BPF_MAP_TYPE_CGROUP_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(u32), + .max_entries = 1, +}; + +struct bpf_map_def SEC("maps") perf_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(u64), + .max_entries = 1, +}; + +/* Writes the last PID that called sync to a map at index 0 */ +SEC("kprobe/sys_sync") +int bpf_prog1(struct pt_regs *ctx) +{ + u64 pid = bpf_get_current_pid_tgid(); + int idx = 0; + + if (!bpf_current_task_under_cgroup(&cgroup_map, 0)) + return 0; + + bpf_map_update_elem(&perf_map, &idx, &pid, BPF_ANY); + return 0; +} + +char _license[] SEC("license") = "GPL"; +u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/test_current_task_under_cgroup_user.c b/samples/bpf/test_current_task_under_cgroup_user.c new file mode 100644 index 000000000000..30b0bce884f9 --- /dev/null +++ b/samples/bpf/test_current_task_under_cgroup_user.c @@ -0,0 +1,145 @@ +/* Copyright (c) 2016 Sargun Dhillon + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ + +#define _GNU_SOURCE +#include +#include +#include +#include "libbpf.h" +#include "bpf_load.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define CGROUP_MOUNT_PATH "/mnt" +#define CGROUP_PATH "/mnt/my-cgroup" + +#define clean_errno() (errno == 0 ? "None" : strerror(errno)) +#define log_err(MSG, ...) fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \ + __FILE__, __LINE__, clean_errno(), ##__VA_ARGS__) + +static int join_cgroup(char *path) +{ + int fd, rc = 0; + pid_t pid = getpid(); + char cgroup_path[PATH_MAX + 1]; + + snprintf(cgroup_path, sizeof(cgroup_path), "%s/cgroup.procs", path); + + fd = open(cgroup_path, O_WRONLY); + if (fd < 0) { + log_err("Opening Cgroup"); + return 1; + } + + if (dprintf(fd, "%d\n", pid) < 0) { + log_err("Joining Cgroup"); + rc = 1; + } + close(fd); + return rc; +} + +int main(int argc, char **argv) +{ + char filename[256]; + int cg2, idx = 0; + pid_t remote_pid, local_pid = getpid(); + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + /* + * This is to avoid interfering with existing cgroups. Unfortunately, + * most people don't have cgroupv2 enabled at this point in time. + * It's easier to create our own mount namespace and manage it + * ourselves. + */ + if (unshare(CLONE_NEWNS)) { + log_err("unshare"); + return 1; + } + + if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL)) { + log_err("mount fakeroot"); + return 1; + } + + if (mount("none", CGROUP_MOUNT_PATH, "cgroup2", 0, NULL)) { + log_err("mount cgroup2"); + return 1; + } + + if (mkdir(CGROUP_PATH, 0777) && errno != EEXIST) { + log_err("mkdir cgroup"); + return 1; + } + + cg2 = open(CGROUP_PATH, O_RDONLY); + if (cg2 < 0) { + log_err("opening target cgroup"); + goto cleanup_cgroup_err; + } + + if (bpf_update_elem(map_fd[0], &idx, &cg2, BPF_ANY)) { + log_err("Adding target cgroup to map"); + goto cleanup_cgroup_err; + } + if (join_cgroup("/mnt/my-cgroup")) { + log_err("Leaving target cgroup"); + goto cleanup_cgroup_err; + } + + /* + * The installed helper program catched the sync call, and should + * write it to the map. + */ + + sync(); + bpf_lookup_elem(map_fd[1], &idx, &remote_pid); + + if (local_pid != remote_pid) { + fprintf(stderr, + "BPF Helper didn't write correct PID to map, but: %d\n", + remote_pid); + goto leave_cgroup_err; + } + + /* Verify the negative scenario; leave the cgroup */ + if (join_cgroup(CGROUP_MOUNT_PATH)) + goto leave_cgroup_err; + + remote_pid = 0; + bpf_update_elem(map_fd[1], &idx, &remote_pid, BPF_ANY); + + sync(); + bpf_lookup_elem(map_fd[1], &idx, &remote_pid); + + if (local_pid == remote_pid) { + fprintf(stderr, "BPF cgroup negative test did not work\n"); + goto cleanup_cgroup_err; + } + + rmdir(CGROUP_PATH); + return 0; + + /* Error condition, cleanup */ +leave_cgroup_err: + join_cgroup(CGROUP_MOUNT_PATH); +cleanup_cgroup_err: + rmdir(CGROUP_PATH); + return 1; +} From 03ff4979345110d30ecdeab2ae9cb2f451f158bf Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 13 Aug 2016 01:54:15 +0000 Subject: [PATCH 0153/1715] sit: make function ipip6_valid_ip_proto() static Fixes the following sparse warning: net/ipv6/sit.c:1129:6: warning: symbol 'ipip6_valid_ip_proto' was not declared. Should it be static? Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/ipv6/sit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 696edeeff8bc..b1cdf8009d29 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1126,7 +1126,7 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t, } #endif -bool ipip6_valid_ip_proto(u8 ipproto) +static bool ipip6_valid_ip_proto(u8 ipproto) { return ipproto == IPPROTO_IPV6 || ipproto == IPPROTO_IPIP || From 6841de8b0d03cc9a4e0e928453623c13ee754f77 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 11 Aug 2016 18:17:16 -0700 Subject: [PATCH 0154/1715] bpf: allow helpers access the packet directly The helper functions like bpf_map_lookup_elem(map, key) were only allowing 'key' to point to the initialized stack area. That is causing performance degradation when programs need to process millions of packets per second and need to copy contents of the packet into the stack just to pass the stack pointer into the lookup() function. Allow such helpers read from the packet directly. All helpers that expect ARG_PTR_TO_MAP_KEY, ARG_PTR_TO_MAP_VALUE, ARG_PTR_TO_STACK assume byte aligned pointer, so no alignment concerns, only need to check that helper will not be accessing beyond the packet range verified by the prior 'if (ptr < data_end)' condition. For now allow this feature for XDP programs only. Later it can be relaxed for the clsact programs as well. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 61 ++++++++++++++++++++++++++++++------------- 1 file changed, 43 insertions(+), 18 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index d504722ebfa4..0cc46f1df358 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -930,14 +930,14 @@ static int check_func_arg(struct verifier_env *env, u32 regno, enum bpf_arg_type arg_type, struct bpf_call_arg_meta *meta) { - struct reg_state *reg = env->cur_state.regs + regno; - enum bpf_reg_type expected_type; + struct reg_state *regs = env->cur_state.regs, *reg = ®s[regno]; + enum bpf_reg_type expected_type, type = reg->type; int err = 0; if (arg_type == ARG_DONTCARE) return 0; - if (reg->type == NOT_INIT) { + if (type == NOT_INIT) { verbose("R%d !read_ok\n", regno); return -EACCES; } @@ -950,16 +950,29 @@ static int check_func_arg(struct verifier_env *env, u32 regno, return 0; } + if (type == PTR_TO_PACKET && !may_write_pkt_data(env->prog->type)) { + verbose("helper access to the packet is not allowed for clsact\n"); + return -EACCES; + } + if (arg_type == ARG_PTR_TO_MAP_KEY || arg_type == ARG_PTR_TO_MAP_VALUE) { expected_type = PTR_TO_STACK; + if (type != PTR_TO_PACKET && type != expected_type) + goto err_type; } else if (arg_type == ARG_CONST_STACK_SIZE || arg_type == ARG_CONST_STACK_SIZE_OR_ZERO) { expected_type = CONST_IMM; + if (type != expected_type) + goto err_type; } else if (arg_type == ARG_CONST_MAP_PTR) { expected_type = CONST_PTR_TO_MAP; + if (type != expected_type) + goto err_type; } else if (arg_type == ARG_PTR_TO_CTX) { expected_type = PTR_TO_CTX; + if (type != expected_type) + goto err_type; } else if (arg_type == ARG_PTR_TO_STACK || arg_type == ARG_PTR_TO_RAW_STACK) { expected_type = PTR_TO_STACK; @@ -967,20 +980,16 @@ static int check_func_arg(struct verifier_env *env, u32 regno, * passed in as argument, it's a CONST_IMM type. Final test * happens during stack boundary checking. */ - if (reg->type == CONST_IMM && reg->imm == 0) - expected_type = CONST_IMM; + if (type == CONST_IMM && reg->imm == 0) + /* final test in check_stack_boundary() */; + else if (type != PTR_TO_PACKET && type != expected_type) + goto err_type; meta->raw_mode = arg_type == ARG_PTR_TO_RAW_STACK; } else { verbose("unsupported arg_type %d\n", arg_type); return -EFAULT; } - if (reg->type != expected_type) { - verbose("R%d type=%s expected=%s\n", regno, - reg_type_str[reg->type], reg_type_str[expected_type]); - return -EACCES; - } - if (arg_type == ARG_CONST_MAP_PTR) { /* bpf_map_xxx(map_ptr) call: remember that map_ptr */ meta->map_ptr = reg->map_ptr; @@ -998,8 +1007,13 @@ static int check_func_arg(struct verifier_env *env, u32 regno, verbose("invalid map_ptr to access map->key\n"); return -EACCES; } - err = check_stack_boundary(env, regno, meta->map_ptr->key_size, - false, NULL); + if (type == PTR_TO_PACKET) + err = check_packet_access(env, regno, 0, + meta->map_ptr->key_size); + else + err = check_stack_boundary(env, regno, + meta->map_ptr->key_size, + false, NULL); } else if (arg_type == ARG_PTR_TO_MAP_VALUE) { /* bpf_map_xxx(..., map_ptr, ..., value) call: * check [value, value + map->value_size) validity @@ -1009,9 +1023,13 @@ static int check_func_arg(struct verifier_env *env, u32 regno, verbose("invalid map_ptr to access map->value\n"); return -EACCES; } - err = check_stack_boundary(env, regno, - meta->map_ptr->value_size, - false, NULL); + if (type == PTR_TO_PACKET) + err = check_packet_access(env, regno, 0, + meta->map_ptr->value_size); + else + err = check_stack_boundary(env, regno, + meta->map_ptr->value_size, + false, NULL); } else if (arg_type == ARG_CONST_STACK_SIZE || arg_type == ARG_CONST_STACK_SIZE_OR_ZERO) { bool zero_size_allowed = (arg_type == ARG_CONST_STACK_SIZE_OR_ZERO); @@ -1025,11 +1043,18 @@ static int check_func_arg(struct verifier_env *env, u32 regno, verbose("ARG_CONST_STACK_SIZE cannot be first argument\n"); return -EACCES; } - err = check_stack_boundary(env, regno - 1, reg->imm, - zero_size_allowed, meta); + if (regs[regno - 1].type == PTR_TO_PACKET) + err = check_packet_access(env, regno - 1, 0, reg->imm); + else + err = check_stack_boundary(env, regno - 1, reg->imm, + zero_size_allowed, meta); } return err; +err_type: + verbose("R%d type=%s expected=%s\n", regno, + reg_type_str[type], reg_type_str[expected_type]); + return -EACCES; } static int check_map_func_compatibility(struct bpf_map *map, int func_id) From 1633ac0a2e774a9af339b9290ef33cd97a918c54 Mon Sep 17 00:00:00 2001 From: Aaron Yue Date: Thu, 11 Aug 2016 18:17:17 -0700 Subject: [PATCH 0155/1715] samples/bpf: add verifier tests for the helper access to the packet test various corner cases of the helper function access to the packet via crafted XDP programs. Signed-off-by: Aaron Yue Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- samples/bpf/test_verifier.c | 114 ++++++++++++++++++++++++++++++++++-- 1 file changed, 110 insertions(+), 4 deletions(-) diff --git a/samples/bpf/test_verifier.c b/samples/bpf/test_verifier.c index fe2fcec98c1f..78c6f131d94f 100644 --- a/samples/bpf/test_verifier.c +++ b/samples/bpf/test_verifier.c @@ -1449,7 +1449,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { - "pkt: test1", + "direct packet access: test1", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct __sk_buff, data)), @@ -1466,7 +1466,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { - "pkt: test2", + "direct packet access: test2", .insns = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, @@ -1499,7 +1499,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { - "pkt: test3", + "direct packet access: test3", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct __sk_buff, data)), @@ -1511,7 +1511,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, }, { - "pkt: test4", + "direct packet access: test4", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct __sk_buff, data)), @@ -1528,6 +1528,112 @@ static struct bpf_test tests[] = { .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, + { + "helper access to packet: test1, valid packet_ptr range", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 5), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_2), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_update_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup = {5}, + .result_unpriv = ACCEPT, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "helper access to packet: test2, unchecked packet_ptr", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup = {1}, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "helper access to packet: test3, variable add", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 10), + BPF_LDX_MEM(BPF_B, BPF_REG_5, BPF_REG_2, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_5), + BPF_MOV64_REG(BPF_REG_5, BPF_REG_4), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_3, 4), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup = {11}, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "helper access to packet: test4, packet_ptr with bad range", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup = {7}, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "helper access to packet: test5, packet_ptr with too short range", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup = {6}, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_XDP, + }, }; static int probe_filter_length(struct bpf_insn *fp) From 8937bd80fce64a25be23c7790459d93f7b1e9b79 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 11 Aug 2016 18:17:18 -0700 Subject: [PATCH 0156/1715] bpf: allow bpf_get_prandom_u32() to be used in tracing bpf_get_prandom_u32() was initially introduced for socket filters and later requested numberous times to be added to tracing bpf programs for the same reason as in socket filters: to be able to randomly select incoming events. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/trace/bpf_trace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 6b794d6669a7..ad35213b8405 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -437,6 +437,8 @@ static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id) return bpf_get_probe_write_proto(); case BPF_FUNC_current_task_under_cgroup: return &bpf_current_task_under_cgroup_proto; + case BPF_FUNC_get_prandom_u32: + return &bpf_get_prandom_u32_proto; default: return NULL; } From adf0516845bcd0e626323c858ece28ee58c74455 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 12 Aug 2016 13:47:06 +0200 Subject: [PATCH 0157/1715] netfilter: remove ip_conntrack* sysctl compat code This backward compatibility has been around for more than ten years, since Yasuyuki Kozakai introduced IPv6 in conntrack. These days, we have alternate /proc/net/nf_conntrack* entries, the ctnetlink interface and the conntrack utility got adopted by many people in the user community according to what I observed on the netfilter user mailing list. So let's get rid of this. Note that nf_conntrack_htable_size and unsigned int nf_conntrack_max do not need to be exported as symbol anymore. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 8 - include/net/netns/conntrack.h | 8 - net/ipv4/netfilter/Kconfig | 11 - net/ipv4/netfilter/Makefile | 5 - .../netfilter/nf_conntrack_l3proto_ipv4.c | 70 --- .../nf_conntrack_l3proto_ipv4_compat.c | 491 ------------------ net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 39 +- net/netfilter/nf_conntrack_core.c | 3 - net/netfilter/nf_conntrack_proto.c | 81 +-- net/netfilter/nf_conntrack_proto_generic.c | 39 +- net/netfilter/nf_conntrack_proto_sctp.c | 85 +-- net/netfilter/nf_conntrack_proto_tcp.c | 127 +---- net/netfilter/nf_conntrack_proto_udp.c | 49 +- 13 files changed, 7 insertions(+), 1009 deletions(-) delete mode 100644 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 1a5fb36f165f..de629f1520df 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -134,14 +134,6 @@ void nf_ct_l4proto_pernet_unregister(struct net *net, int nf_ct_l4proto_register(struct nf_conntrack_l4proto *proto); void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *proto); -static inline void nf_ct_kfree_compat_sysctl_table(struct nf_proto_net *pn) -{ -#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) - kfree(pn->ctl_compat_table); - pn->ctl_compat_table = NULL; -#endif -} - /* Generic netlink helpers */ int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple); diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 38b1a80517f0..e469e85de3f9 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -15,10 +15,6 @@ struct nf_proto_net { #ifdef CONFIG_SYSCTL struct ctl_table_header *ctl_table_header; struct ctl_table *ctl_table; -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - struct ctl_table_header *ctl_compat_header; - struct ctl_table *ctl_compat_table; -#endif #endif unsigned int users; }; @@ -58,10 +54,6 @@ struct nf_ip_net { struct nf_udp_net udp; struct nf_icmp_net icmp; struct nf_icmp_net icmpv6; -#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) - struct ctl_table_header *ctl_table_header; - struct ctl_table *ctl_table; -#endif }; struct ct_pcpu { diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index c187c60e3e0c..d613309e3e5d 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -25,17 +25,6 @@ config NF_CONNTRACK_IPV4 To compile it as a module, choose M here. If unsure, say N. -config NF_CONNTRACK_PROC_COMPAT - bool "proc/sysctl compatibility with old connection tracking" - depends on NF_CONNTRACK_PROCFS && NF_CONNTRACK_IPV4 - default y - help - This option enables /proc and sysctl compatibility with the old - layer 3 dependent connection tracking. This is needed to keep - old programs that have not been adapted to the new names working. - - If unsure, say Y. - if NF_TABLES config NF_TABLES_IPV4 diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 87b073da14c9..853328f8fd05 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -4,11 +4,6 @@ # objects for l3 independent conntrack nf_conntrack_ipv4-y := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o -ifeq ($(CONFIG_NF_CONNTRACK_PROC_COMPAT),y) -ifeq ($(CONFIG_PROC_FS),y) -nf_conntrack_ipv4-objs += nf_conntrack_l3proto_ipv4_compat.o -endif -endif # connection tracking obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index ae1a71a97132..870aebda2932 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -202,47 +202,6 @@ static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = { }, }; -#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) -static int log_invalid_proto_min = 0; -static int log_invalid_proto_max = 255; - -static struct ctl_table ip_ct_sysctl_table[] = { - { - .procname = "ip_conntrack_max", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "ip_conntrack_count", - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = proc_dointvec, - }, - { - .procname = "ip_conntrack_buckets", - .maxlen = sizeof(unsigned int), - .mode = 0444, - .proc_handler = proc_dointvec, - }, - { - .procname = "ip_conntrack_checksum", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "ip_conntrack_log_invalid", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &log_invalid_proto_min, - .extra2 = &log_invalid_proto_max, - }, - { } -}; -#endif /* CONFIG_SYSCTL && CONFIG_NF_CONNTRACK_PROC_COMPAT */ - /* Fast function for those who don't want to parse /proc (and I don't blame them). */ /* Reversing the socket's dst/src point of view gives us the reply @@ -350,20 +309,6 @@ static struct nf_sockopt_ops so_getorigdst = { static int ipv4_init_net(struct net *net) { -#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) - struct nf_ip_net *in = &net->ct.nf_ct_proto; - in->ctl_table = kmemdup(ip_ct_sysctl_table, - sizeof(ip_ct_sysctl_table), - GFP_KERNEL); - if (!in->ctl_table) - return -ENOMEM; - - in->ctl_table[0].data = &nf_conntrack_max; - in->ctl_table[1].data = &net->ct.count; - in->ctl_table[2].data = &nf_conntrack_htable_size; - in->ctl_table[3].data = &net->ct.sysctl_checksum; - in->ctl_table[4].data = &net->ct.sysctl_log_invalid; -#endif return 0; } @@ -379,9 +324,6 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = { .nlattr_tuple_size = ipv4_nlattr_tuple_size, .nlattr_to_tuple = ipv4_nlattr_to_tuple, .nla_policy = ipv4_nla_policy, -#endif -#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) - .ctl_table_path = "net/ipv4/netfilter", #endif .init_net = ipv4_init_net, .me = THIS_MODULE, @@ -492,16 +434,7 @@ static int __init nf_conntrack_l3proto_ipv4_init(void) goto cleanup_icmpv4; } -#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) - ret = nf_conntrack_ipv4_compat_init(); - if (ret < 0) - goto cleanup_proto; -#endif return ret; -#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) - cleanup_proto: - nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4); -#endif cleanup_icmpv4: nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp); cleanup_udp4: @@ -520,9 +453,6 @@ static int __init nf_conntrack_l3proto_ipv4_init(void) static void __exit nf_conntrack_l3proto_ipv4_fini(void) { synchronize_net(); -#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) - nf_conntrack_ipv4_compat_fini(); -#endif nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4); nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp); nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp4); diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c deleted file mode 100644 index 67bfc69e00bc..000000000000 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ /dev/null @@ -1,491 +0,0 @@ -/* ip_conntrack proc compat - based on ip_conntrack_standalone.c - * - * (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2006 Netfilter Core Team - * (C) 2006-2010 Patrick McHardy - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -struct ct_iter_state { - struct seq_net_private p; - struct hlist_nulls_head *hash; - unsigned int htable_size; - unsigned int bucket; -}; - -static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) -{ - struct ct_iter_state *st = seq->private; - struct hlist_nulls_node *n; - - for (st->bucket = 0; - st->bucket < st->htable_size; - st->bucket++) { - n = rcu_dereference( - hlist_nulls_first_rcu(&st->hash[st->bucket])); - if (!is_a_nulls(n)) - return n; - } - return NULL; -} - -static struct hlist_nulls_node *ct_get_next(struct seq_file *seq, - struct hlist_nulls_node *head) -{ - struct ct_iter_state *st = seq->private; - - head = rcu_dereference(hlist_nulls_next_rcu(head)); - while (is_a_nulls(head)) { - if (likely(get_nulls_value(head) == st->bucket)) { - if (++st->bucket >= st->htable_size) - return NULL; - } - head = rcu_dereference( - hlist_nulls_first_rcu(&st->hash[st->bucket])); - } - return head; -} - -static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos) -{ - struct hlist_nulls_node *head = ct_get_first(seq); - - if (head) - while (pos && (head = ct_get_next(seq, head))) - pos--; - return pos ? NULL : head; -} - -static void *ct_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(RCU) -{ - struct ct_iter_state *st = seq->private; - - rcu_read_lock(); - - nf_conntrack_get_ht(&st->hash, &st->htable_size); - return ct_get_idx(seq, *pos); -} - -static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos) -{ - (*pos)++; - return ct_get_next(s, v); -} - -static void ct_seq_stop(struct seq_file *s, void *v) - __releases(RCU) -{ - rcu_read_unlock(); -} - -#ifdef CONFIG_NF_CONNTRACK_SECMARK -static void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) -{ - int ret; - u32 len; - char *secctx; - - ret = security_secid_to_secctx(ct->secmark, &secctx, &len); - if (ret) - return; - - seq_printf(s, "secctx=%s ", secctx); - - security_release_secctx(secctx, len); -} -#else -static inline void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) -{ -} -#endif - -static bool ct_seq_should_skip(const struct nf_conn *ct, - const struct net *net, - const struct nf_conntrack_tuple_hash *hash) -{ - /* we only want to print DIR_ORIGINAL */ - if (NF_CT_DIRECTION(hash)) - return true; - - if (nf_ct_l3num(ct) != AF_INET) - return true; - - if (!net_eq(nf_ct_net(ct), net)) - return true; - - return false; -} - -static int ct_seq_show(struct seq_file *s, void *v) -{ - struct nf_conntrack_tuple_hash *hash = v; - struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash); - const struct nf_conntrack_l3proto *l3proto; - const struct nf_conntrack_l4proto *l4proto; - int ret = 0; - - NF_CT_ASSERT(ct); - if (ct_seq_should_skip(ct, seq_file_net(s), hash)) - return 0; - - if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use))) - return 0; - - /* check if we raced w. object reuse */ - if (!nf_ct_is_confirmed(ct) || - ct_seq_should_skip(ct, seq_file_net(s), hash)) - goto release; - - l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct)); - NF_CT_ASSERT(l3proto); - l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); - NF_CT_ASSERT(l4proto); - - ret = -ENOSPC; - seq_printf(s, "%-8s %u %ld ", - l4proto->name, nf_ct_protonum(ct), - nf_ct_expires(ct) / HZ); - - if (l4proto->print_conntrack) - l4proto->print_conntrack(s, ct); - - if (seq_has_overflowed(s)) - goto release; - - print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, - l3proto, l4proto); - - if (seq_has_overflowed(s)) - goto release; - - if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL)) - goto release; - - if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status))) - seq_printf(s, "[UNREPLIED] "); - - print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, - l3proto, l4proto); - - if (seq_has_overflowed(s)) - goto release; - - if (seq_print_acct(s, ct, IP_CT_DIR_REPLY)) - goto release; - - if (test_bit(IPS_ASSURED_BIT, &ct->status)) - seq_printf(s, "[ASSURED] "); - -#ifdef CONFIG_NF_CONNTRACK_MARK - seq_printf(s, "mark=%u ", ct->mark); -#endif - - ct_show_secctx(s, ct); - - seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use)); - - if (seq_has_overflowed(s)) - goto release; - - ret = 0; -release: - nf_ct_put(ct); - return ret; -} - -static const struct seq_operations ct_seq_ops = { - .start = ct_seq_start, - .next = ct_seq_next, - .stop = ct_seq_stop, - .show = ct_seq_show -}; - -static int ct_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &ct_seq_ops, - sizeof(struct ct_iter_state)); -} - -static const struct file_operations ct_file_ops = { - .owner = THIS_MODULE, - .open = ct_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - -/* expects */ -struct ct_expect_iter_state { - struct seq_net_private p; - unsigned int bucket; -}; - -static struct hlist_node *ct_expect_get_first(struct seq_file *seq) -{ - struct ct_expect_iter_state *st = seq->private; - struct hlist_node *n; - - for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { - n = rcu_dereference( - hlist_first_rcu(&nf_ct_expect_hash[st->bucket])); - if (n) - return n; - } - return NULL; -} - -static struct hlist_node *ct_expect_get_next(struct seq_file *seq, - struct hlist_node *head) -{ - struct ct_expect_iter_state *st = seq->private; - - head = rcu_dereference(hlist_next_rcu(head)); - while (head == NULL) { - if (++st->bucket >= nf_ct_expect_hsize) - return NULL; - head = rcu_dereference( - hlist_first_rcu(&nf_ct_expect_hash[st->bucket])); - } - return head; -} - -static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos) -{ - struct hlist_node *head = ct_expect_get_first(seq); - - if (head) - while (pos && (head = ct_expect_get_next(seq, head))) - pos--; - return pos ? NULL : head; -} - -static void *exp_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(RCU) -{ - rcu_read_lock(); - return ct_expect_get_idx(seq, *pos); -} - -static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - (*pos)++; - return ct_expect_get_next(seq, v); -} - -static void exp_seq_stop(struct seq_file *seq, void *v) - __releases(RCU) -{ - rcu_read_unlock(); -} - -static int exp_seq_show(struct seq_file *s, void *v) -{ - struct nf_conntrack_expect *exp; - const struct hlist_node *n = v; - - exp = hlist_entry(n, struct nf_conntrack_expect, hnode); - - if (!net_eq(nf_ct_net(exp->master), seq_file_net(s))) - return 0; - - if (exp->tuple.src.l3num != AF_INET) - return 0; - - if (exp->timeout.function) - seq_printf(s, "%ld ", timer_pending(&exp->timeout) - ? (long)(exp->timeout.expires - jiffies)/HZ : 0); - else - seq_printf(s, "- "); - - seq_printf(s, "proto=%u ", exp->tuple.dst.protonum); - - print_tuple(s, &exp->tuple, - __nf_ct_l3proto_find(exp->tuple.src.l3num), - __nf_ct_l4proto_find(exp->tuple.src.l3num, - exp->tuple.dst.protonum)); - seq_putc(s, '\n'); - - return 0; -} - -static const struct seq_operations exp_seq_ops = { - .start = exp_seq_start, - .next = exp_seq_next, - .stop = exp_seq_stop, - .show = exp_seq_show -}; - -static int exp_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &exp_seq_ops, - sizeof(struct ct_expect_iter_state)); -} - -static const struct file_operations ip_exp_file_ops = { - .owner = THIS_MODULE, - .open = exp_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - -static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos) -{ - struct net *net = seq_file_net(seq); - int cpu; - - if (*pos == 0) - return SEQ_START_TOKEN; - - for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) { - if (!cpu_possible(cpu)) - continue; - *pos = cpu+1; - return per_cpu_ptr(net->ct.stat, cpu); - } - - return NULL; -} - -static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - struct net *net = seq_file_net(seq); - int cpu; - - for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) { - if (!cpu_possible(cpu)) - continue; - *pos = cpu+1; - return per_cpu_ptr(net->ct.stat, cpu); - } - - return NULL; -} - -static void ct_cpu_seq_stop(struct seq_file *seq, void *v) -{ -} - -static int ct_cpu_seq_show(struct seq_file *seq, void *v) -{ - struct net *net = seq_file_net(seq); - unsigned int nr_conntracks = atomic_read(&net->ct.count); - const struct ip_conntrack_stat *st = v; - - if (v == SEQ_START_TOKEN) { - seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete search_restart\n"); - return 0; - } - - seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x " - "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", - nr_conntracks, - st->searched, - st->found, - st->new, - st->invalid, - st->ignore, - st->delete, - st->delete_list, - st->insert, - st->insert_failed, - st->drop, - st->early_drop, - st->error, - - st->expect_new, - st->expect_create, - st->expect_delete, - st->search_restart - ); - return 0; -} - -static const struct seq_operations ct_cpu_seq_ops = { - .start = ct_cpu_seq_start, - .next = ct_cpu_seq_next, - .stop = ct_cpu_seq_stop, - .show = ct_cpu_seq_show, -}; - -static int ct_cpu_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &ct_cpu_seq_ops, - sizeof(struct seq_net_private)); -} - -static const struct file_operations ct_cpu_seq_fops = { - .owner = THIS_MODULE, - .open = ct_cpu_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - -static int __net_init ip_conntrack_net_init(struct net *net) -{ - struct proc_dir_entry *proc, *proc_exp, *proc_stat; - - proc = proc_create("ip_conntrack", 0440, net->proc_net, &ct_file_ops); - if (!proc) - goto err1; - - proc_exp = proc_create("ip_conntrack_expect", 0440, net->proc_net, - &ip_exp_file_ops); - if (!proc_exp) - goto err2; - - proc_stat = proc_create("ip_conntrack", S_IRUGO, - net->proc_net_stat, &ct_cpu_seq_fops); - if (!proc_stat) - goto err3; - return 0; - -err3: - remove_proc_entry("ip_conntrack_expect", net->proc_net); -err2: - remove_proc_entry("ip_conntrack", net->proc_net); -err1: - return -ENOMEM; -} - -static void __net_exit ip_conntrack_net_exit(struct net *net) -{ - remove_proc_entry("ip_conntrack", net->proc_net_stat); - remove_proc_entry("ip_conntrack_expect", net->proc_net); - remove_proc_entry("ip_conntrack", net->proc_net); -} - -static struct pernet_operations ip_conntrack_net_ops = { - .init = ip_conntrack_net_init, - .exit = ip_conntrack_net_exit, -}; - -int __init nf_conntrack_ipv4_compat_init(void) -{ - return register_pernet_subsys(&ip_conntrack_net_ops); -} - -void __exit nf_conntrack_ipv4_compat_fini(void) -{ - unregister_pernet_subsys(&ip_conntrack_net_ops); -} diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index c567e1b5d799..4b5904bc2614 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -327,17 +327,6 @@ static struct ctl_table icmp_sysctl_table[] = { }, { } }; -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT -static struct ctl_table icmp_compat_sysctl_table[] = { - { - .procname = "ip_conntrack_icmp_timeout", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { } -}; -#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ static int icmp_kmemdup_sysctl_table(struct nf_proto_net *pn, @@ -355,40 +344,14 @@ static int icmp_kmemdup_sysctl_table(struct nf_proto_net *pn, return 0; } -static int icmp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn, - struct nf_icmp_net *in) -{ -#ifdef CONFIG_SYSCTL -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - pn->ctl_compat_table = kmemdup(icmp_compat_sysctl_table, - sizeof(icmp_compat_sysctl_table), - GFP_KERNEL); - if (!pn->ctl_compat_table) - return -ENOMEM; - - pn->ctl_compat_table[0].data = &in->timeout; -#endif -#endif - return 0; -} - static int icmp_init_net(struct net *net, u_int16_t proto) { - int ret; struct nf_icmp_net *in = icmp_pernet(net); struct nf_proto_net *pn = &in->pn; in->timeout = nf_ct_icmp_timeout; - ret = icmp_kmemdup_compat_sysctl_table(pn, in); - if (ret < 0) - return ret; - - ret = icmp_kmemdup_sysctl_table(pn, in); - if (ret < 0) - nf_ct_kfree_compat_sysctl_table(pn); - - return ret; + return icmp_kmemdup_sysctl_table(pn, in); } static struct nf_proto_net *icmp_get_net_proto(struct net *net) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index dd2c43abf9e2..22558b7ff7cd 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -161,10 +161,7 @@ static void nf_conntrack_all_unlock(void) } unsigned int nf_conntrack_htable_size __read_mostly; -EXPORT_SYMBOL_GPL(nf_conntrack_htable_size); - unsigned int nf_conntrack_max __read_mostly; -EXPORT_SYMBOL_GPL(nf_conntrack_max); DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked); EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked); diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index b65d5864b6d9..8d2c7d8c666a 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -159,54 +159,6 @@ static int kill_l4proto(struct nf_conn *i, void *data) nf_ct_l3num(i) == l4proto->l3proto; } -static struct nf_ip_net *nf_ct_l3proto_net(struct net *net, - struct nf_conntrack_l3proto *l3proto) -{ - if (l3proto->l3proto == PF_INET) - return &net->ct.nf_ct_proto; - else - return NULL; -} - -static int nf_ct_l3proto_register_sysctl(struct net *net, - struct nf_conntrack_l3proto *l3proto) -{ - int err = 0; - struct nf_ip_net *in = nf_ct_l3proto_net(net, l3proto); - /* nf_conntrack_l3proto_ipv6 doesn't support sysctl */ - if (in == NULL) - return 0; - -#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) - if (in->ctl_table != NULL) { - err = nf_ct_register_sysctl(net, - &in->ctl_table_header, - l3proto->ctl_table_path, - in->ctl_table); - if (err < 0) { - kfree(in->ctl_table); - in->ctl_table = NULL; - } - } -#endif - return err; -} - -static void nf_ct_l3proto_unregister_sysctl(struct net *net, - struct nf_conntrack_l3proto *l3proto) -{ - struct nf_ip_net *in = nf_ct_l3proto_net(net, l3proto); - - if (in == NULL) - return; -#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) - if (in->ctl_table_header != NULL) - nf_ct_unregister_sysctl(&in->ctl_table_header, - &in->ctl_table, - 0); -#endif -} - int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto) { int ret = 0; @@ -241,7 +193,7 @@ EXPORT_SYMBOL_GPL(nf_ct_l3proto_register); int nf_ct_l3proto_pernet_register(struct net *net, struct nf_conntrack_l3proto *proto) { - int ret = 0; + int ret; if (proto->init_net) { ret = proto->init_net(net); @@ -249,7 +201,7 @@ int nf_ct_l3proto_pernet_register(struct net *net, return ret; } - return nf_ct_l3proto_register_sysctl(net, proto); + return 0; } EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_register); @@ -272,8 +224,6 @@ EXPORT_SYMBOL_GPL(nf_ct_l3proto_unregister); void nf_ct_l3proto_pernet_unregister(struct net *net, struct nf_conntrack_l3proto *proto) { - nf_ct_l3proto_unregister_sysctl(net, proto); - /* Remove all contrack entries for this protocol */ nf_ct_iterate_cleanup(net, kill_l3proto, proto, 0, 0); } @@ -312,26 +262,6 @@ int nf_ct_l4proto_register_sysctl(struct net *net, } } } -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - if (l4proto->l3proto != AF_INET6 && pn->ctl_compat_table != NULL) { - if (err < 0) { - nf_ct_kfree_compat_sysctl_table(pn); - goto out; - } - err = nf_ct_register_sysctl(net, - &pn->ctl_compat_header, - "net/ipv4/netfilter", - pn->ctl_compat_table); - if (err == 0) - goto out; - - nf_ct_kfree_compat_sysctl_table(pn); - nf_ct_unregister_sysctl(&pn->ctl_table_header, - &pn->ctl_table, - pn->users); - } -out: -#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ return err; } @@ -346,13 +276,6 @@ void nf_ct_l4proto_unregister_sysctl(struct net *net, nf_ct_unregister_sysctl(&pn->ctl_table_header, &pn->ctl_table, pn->users); - -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - if (l4proto->l3proto != AF_INET6 && pn->ctl_compat_header != NULL) - nf_ct_unregister_sysctl(&pn->ctl_compat_header, - &pn->ctl_compat_table, - 0); -#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ } diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index 86dc752e5349..d5868bad33a7 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -151,17 +151,6 @@ static struct ctl_table generic_sysctl_table[] = { }, { } }; -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT -static struct ctl_table generic_compat_sysctl_table[] = { - { - .procname = "ip_conntrack_generic_timeout", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { } -}; -#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ static int generic_kmemdup_sysctl_table(struct nf_proto_net *pn, @@ -179,40 +168,14 @@ static int generic_kmemdup_sysctl_table(struct nf_proto_net *pn, return 0; } -static int generic_kmemdup_compat_sysctl_table(struct nf_proto_net *pn, - struct nf_generic_net *gn) -{ -#ifdef CONFIG_SYSCTL -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - pn->ctl_compat_table = kmemdup(generic_compat_sysctl_table, - sizeof(generic_compat_sysctl_table), - GFP_KERNEL); - if (!pn->ctl_compat_table) - return -ENOMEM; - - pn->ctl_compat_table[0].data = &gn->timeout; -#endif -#endif - return 0; -} - static int generic_init_net(struct net *net, u_int16_t proto) { - int ret; struct nf_generic_net *gn = generic_pernet(net); struct nf_proto_net *pn = &gn->pn; gn->timeout = nf_ct_generic_timeout; - ret = generic_kmemdup_compat_sysctl_table(pn, gn); - if (ret < 0) - return ret; - - ret = generic_kmemdup_sysctl_table(pn, gn); - if (ret < 0) - nf_ct_kfree_compat_sysctl_table(pn); - - return ret; + return generic_kmemdup_sysctl_table(pn, gn); } static struct nf_proto_net *generic_get_net_proto(struct net *net) diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index e769f0561621..982ea62606c7 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -705,54 +705,6 @@ static struct ctl_table sctp_sysctl_table[] = { }, { } }; - -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT -static struct ctl_table sctp_compat_sysctl_table[] = { - { - .procname = "ip_conntrack_sctp_timeout_closed", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_sctp_timeout_cookie_wait", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_sctp_timeout_cookie_echoed", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_sctp_timeout_established", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_sctp_timeout_shutdown_sent", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_sctp_timeout_shutdown_recd", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_sctp_timeout_shutdown_ack_sent", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { } -}; -#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn, @@ -781,32 +733,8 @@ static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn, return 0; } -static int sctp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn, - struct sctp_net *sn) -{ -#ifdef CONFIG_SYSCTL -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - pn->ctl_compat_table = kmemdup(sctp_compat_sysctl_table, - sizeof(sctp_compat_sysctl_table), - GFP_KERNEL); - if (!pn->ctl_compat_table) - return -ENOMEM; - - pn->ctl_compat_table[0].data = &sn->timeouts[SCTP_CONNTRACK_CLOSED]; - pn->ctl_compat_table[1].data = &sn->timeouts[SCTP_CONNTRACK_COOKIE_WAIT]; - pn->ctl_compat_table[2].data = &sn->timeouts[SCTP_CONNTRACK_COOKIE_ECHOED]; - pn->ctl_compat_table[3].data = &sn->timeouts[SCTP_CONNTRACK_ESTABLISHED]; - pn->ctl_compat_table[4].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_SENT]; - pn->ctl_compat_table[5].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_RECD]; - pn->ctl_compat_table[6].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT]; -#endif -#endif - return 0; -} - static int sctp_init_net(struct net *net, u_int16_t proto) { - int ret; struct sctp_net *sn = sctp_pernet(net); struct nf_proto_net *pn = &sn->pn; @@ -817,18 +745,7 @@ static int sctp_init_net(struct net *net, u_int16_t proto) sn->timeouts[i] = sctp_timeouts[i]; } - if (proto == AF_INET) { - ret = sctp_kmemdup_compat_sysctl_table(pn, sn); - if (ret < 0) - return ret; - - ret = sctp_kmemdup_sysctl_table(pn, sn); - if (ret < 0) - nf_ct_kfree_compat_sysctl_table(pn); - } else - ret = sctp_kmemdup_sysctl_table(pn, sn); - - return ret; + return sctp_kmemdup_sysctl_table(pn, sn); } static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = { diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 4abe9e1f8909..69f687740c76 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1481,90 +1481,6 @@ static struct ctl_table tcp_sysctl_table[] = { }, { } }; - -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT -static struct ctl_table tcp_compat_sysctl_table[] = { - { - .procname = "ip_conntrack_tcp_timeout_syn_sent", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_tcp_timeout_syn_sent2", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_tcp_timeout_syn_recv", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_tcp_timeout_established", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_tcp_timeout_fin_wait", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_tcp_timeout_close_wait", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_tcp_timeout_last_ack", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_tcp_timeout_time_wait", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_tcp_timeout_close", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_tcp_timeout_max_retrans", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_tcp_loose", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "ip_conntrack_tcp_be_liberal", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "ip_conntrack_tcp_max_retrans", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { } -}; -#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn, @@ -1597,38 +1513,8 @@ static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn, return 0; } -static int tcp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn, - struct nf_tcp_net *tn) -{ -#ifdef CONFIG_SYSCTL -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - pn->ctl_compat_table = kmemdup(tcp_compat_sysctl_table, - sizeof(tcp_compat_sysctl_table), - GFP_KERNEL); - if (!pn->ctl_compat_table) - return -ENOMEM; - - pn->ctl_compat_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT]; - pn->ctl_compat_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT2]; - pn->ctl_compat_table[2].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV]; - pn->ctl_compat_table[3].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED]; - pn->ctl_compat_table[4].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT]; - pn->ctl_compat_table[5].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT]; - pn->ctl_compat_table[6].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK]; - pn->ctl_compat_table[7].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT]; - pn->ctl_compat_table[8].data = &tn->timeouts[TCP_CONNTRACK_CLOSE]; - pn->ctl_compat_table[9].data = &tn->timeouts[TCP_CONNTRACK_RETRANS]; - pn->ctl_compat_table[10].data = &tn->tcp_loose; - pn->ctl_compat_table[11].data = &tn->tcp_be_liberal; - pn->ctl_compat_table[12].data = &tn->tcp_max_retrans; -#endif -#endif - return 0; -} - static int tcp_init_net(struct net *net, u_int16_t proto) { - int ret; struct nf_tcp_net *tn = tcp_pernet(net); struct nf_proto_net *pn = &tn->pn; @@ -1643,18 +1529,7 @@ static int tcp_init_net(struct net *net, u_int16_t proto) tn->tcp_max_retrans = nf_ct_tcp_max_retrans; } - if (proto == AF_INET) { - ret = tcp_kmemdup_compat_sysctl_table(pn, tn); - if (ret < 0) - return ret; - - ret = tcp_kmemdup_sysctl_table(pn, tn); - if (ret < 0) - nf_ct_kfree_compat_sysctl_table(pn); - } else - ret = tcp_kmemdup_sysctl_table(pn, tn); - - return ret; + return tcp_kmemdup_sysctl_table(pn, tn); } static struct nf_proto_net *tcp_get_net_proto(struct net *net) diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 8a057e1e1247..20f35ed68030 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -218,23 +218,6 @@ static struct ctl_table udp_sysctl_table[] = { }, { } }; -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT -static struct ctl_table udp_compat_sysctl_table[] = { - { - .procname = "ip_conntrack_udp_timeout", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_udp_timeout_stream", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { } -}; -#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ static int udp_kmemdup_sysctl_table(struct nf_proto_net *pn, @@ -254,27 +237,8 @@ static int udp_kmemdup_sysctl_table(struct nf_proto_net *pn, return 0; } -static int udp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn, - struct nf_udp_net *un) -{ -#ifdef CONFIG_SYSCTL -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - pn->ctl_compat_table = kmemdup(udp_compat_sysctl_table, - sizeof(udp_compat_sysctl_table), - GFP_KERNEL); - if (!pn->ctl_compat_table) - return -ENOMEM; - - pn->ctl_compat_table[0].data = &un->timeouts[UDP_CT_UNREPLIED]; - pn->ctl_compat_table[1].data = &un->timeouts[UDP_CT_REPLIED]; -#endif -#endif - return 0; -} - static int udp_init_net(struct net *net, u_int16_t proto) { - int ret; struct nf_udp_net *un = udp_pernet(net); struct nf_proto_net *pn = &un->pn; @@ -285,18 +249,7 @@ static int udp_init_net(struct net *net, u_int16_t proto) un->timeouts[i] = udp_timeouts[i]; } - if (proto == AF_INET) { - ret = udp_kmemdup_compat_sysctl_table(pn, un); - if (ret < 0) - return ret; - - ret = udp_kmemdup_sysctl_table(pn, un); - if (ret < 0) - nf_ct_kfree_compat_sysctl_table(pn); - } else - ret = udp_kmemdup_sysctl_table(pn, un); - - return ret; + return udp_kmemdup_sysctl_table(pn, un); } static struct nf_proto_net *udp_get_net_proto(struct net *net) From ee0f95440dc16dd0db9c2856bed1e7806d1839de Mon Sep 17 00:00:00 2001 From: Iyappan Subramanian Date: Fri, 12 Aug 2016 22:05:37 -0700 Subject: [PATCH 0158/1715] drivers: net: xgene: Fix compiler warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixed compiler warnings reported with -Wmaybe-uninitialized W=1, /drivers/net/ethernet/apm/xgene/xgene_enet_main.c: In function ‘xgene_enet_rx_frame’: ../drivers/net/ethernet/apm/xgene/xgene_enet_main.c:455:27: warning: variable ‘pdata’ set but not used [-Wunused-but-set-variable] struct xgene_enet_pdata *pdata; ^ ../drivers/net/ethernet/apm/xgene/xgene_enet_main.c: In function ‘xgene_enet_remove’: ../drivers/net/ethernet/apm/xgene/xgene_enet_main.c:1691:30: warning: variable ‘mac_ops’ set but not used [-Wunused-but-set-variable] const struct xgene_mac_ops *mac_ops; ^ Signed-off-by: Iyappan Subramanian Signed-off-by: David S. Miller --- drivers/net/ethernet/apm/xgene/xgene_enet_main.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index d1d6b5eeb613..ceeb3f41848d 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -452,7 +452,6 @@ static int xgene_enet_rx_frame(struct xgene_enet_desc_ring *rx_ring, struct xgene_enet_raw_desc *raw_desc) { struct net_device *ndev; - struct xgene_enet_pdata *pdata; struct device *dev; struct xgene_enet_desc_ring *buf_pool; u32 datalen, skb_index; @@ -461,7 +460,6 @@ static int xgene_enet_rx_frame(struct xgene_enet_desc_ring *rx_ring, int ret = 0; ndev = rx_ring->ndev; - pdata = netdev_priv(ndev); dev = ndev_to_dev(rx_ring->ndev); buf_pool = rx_ring->buf_pool; @@ -1688,11 +1686,9 @@ err: static int xgene_enet_remove(struct platform_device *pdev) { struct xgene_enet_pdata *pdata; - const struct xgene_mac_ops *mac_ops; struct net_device *ndev; pdata = platform_get_drvdata(pdev); - mac_ops = pdata->mac_ops; ndev = pdata->ndev; rtnl_lock(); From 6e434627bce2a4ccf6d7eea50c08b4fdbb70dd3e Mon Sep 17 00:00:00 2001 From: Iyappan Subramanian Date: Fri, 12 Aug 2016 22:05:38 -0700 Subject: [PATCH 0159/1715] drivers: net: xgene: fix: Add dma_unmap_single In addition to xgene_enet_delete_bufpool() freeing skbs, their associated dma memory should also be unmapped. Signed-off-by: Iyappan Subramanian Signed-off-by: David S. Miller --- drivers/net/ethernet/apm/xgene/xgene_enet_main.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index ceeb3f41848d..c1fd33d23306 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -72,7 +72,6 @@ static int xgene_enet_refill_bufpool(struct xgene_enet_desc_ring *buf_pool, skb = netdev_alloc_skb_ip_align(ndev, len); if (unlikely(!skb)) return -ENOMEM; - buf_pool->rx_skb[tail] = skb; dma_addr = dma_map_single(dev, skb->data, len, DMA_FROM_DEVICE); if (dma_mapping_error(dev, dma_addr)) { @@ -81,6 +80,8 @@ static int xgene_enet_refill_bufpool(struct xgene_enet_desc_ring *buf_pool, return -EINVAL; } + buf_pool->rx_skb[tail] = skb; + raw_desc->m1 = cpu_to_le64(SET_VAL(DATAADDR, dma_addr) | SET_VAL(BUFDATALEN, bufdatalen) | SET_BIT(COHERENT)); @@ -102,12 +103,21 @@ static u8 xgene_enet_hdr_len(const void *data) static void xgene_enet_delete_bufpool(struct xgene_enet_desc_ring *buf_pool) { + struct device *dev = ndev_to_dev(buf_pool->ndev); + struct xgene_enet_raw_desc16 *raw_desc; + dma_addr_t dma_addr; int i; /* Free up the buffers held by hardware */ for (i = 0; i < buf_pool->slots; i++) { - if (buf_pool->rx_skb[i]) + if (buf_pool->rx_skb[i]) { dev_kfree_skb_any(buf_pool->rx_skb[i]); + + raw_desc = &buf_pool->raw_desc16[i]; + dma_addr = GET_VAL(DATAADDR, le64_to_cpu(raw_desc->m1)); + dma_unmap_single(dev, dma_addr, XGENE_ENET_MAX_MTU, + DMA_FROM_DEVICE); + } } } From 15e32296e44738e6c8dff56c46fd18e9b4c9b6f6 Mon Sep 17 00:00:00 2001 From: Iyappan Subramanian Date: Fri, 12 Aug 2016 22:05:39 -0700 Subject: [PATCH 0160/1715] drivers: net: xgene: fix: Delete descriptor rings and buffer pools xgene_enet_init_hw() should delete any descriptor rings and buffer pools setup should le_ops->cle_init() return an error. Signed-off-by: Iyappan Subramanian Signed-off-by: David S. Miller --- drivers/net/ethernet/apm/xgene/xgene_enet_main.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index c1fd33d23306..6b36e0fb26c2 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -1462,10 +1462,8 @@ static int xgene_enet_init_hw(struct xgene_enet_pdata *pdata) buf_pool = pdata->rx_ring[i]->buf_pool; xgene_enet_init_bufpool(buf_pool); ret = xgene_enet_refill_bufpool(buf_pool, pdata->rx_buff_cnt); - if (ret) { - xgene_enet_delete_desc_rings(pdata); - return ret; - } + if (ret) + goto err; } dst_ring_num = xgene_enet_dst_ring_num(pdata->rx_ring[0]); @@ -1482,7 +1480,7 @@ static int xgene_enet_init_hw(struct xgene_enet_pdata *pdata) ret = pdata->cle_ops->cle_init(pdata); if (ret) { netdev_err(ndev, "Preclass Tree init error\n"); - return ret; + goto err; } } else { pdata->port_ops->cle_bypass(pdata, dst_ring_num, buf_pool->id); @@ -1492,6 +1490,10 @@ static int xgene_enet_init_hw(struct xgene_enet_pdata *pdata) pdata->mac_ops->init(pdata); return ret; + +err: + xgene_enet_delete_desc_rings(pdata); + return ret; } static void xgene_enet_setup_ops(struct xgene_enet_pdata *pdata) From cecd6e510c65b25c9a3fcc09480f561189a7c3b8 Mon Sep 17 00:00:00 2001 From: Iyappan Subramanian Date: Fri, 12 Aug 2016 22:05:40 -0700 Subject: [PATCH 0161/1715] drivers: net: xgene: Fix error deconstruction path Since register_netdev() call in xgene_enet_probe() was moved down to the end, it doesn't properly handle errors that may occur, by deconstructing everything that was setup before the error occurred. Signed-off-by: Iyappan Subramanian Signed-off-by: David S. Miller --- .../net/ethernet/apm/xgene/xgene_enet_main.c | 27 +++++++++++++------ 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index 6b36e0fb26c2..a727da8260c1 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -1641,8 +1641,8 @@ static int xgene_enet_probe(struct platform_device *pdev) } #endif if (!pdata->enet_id) { - free_netdev(ndev); - return -ENODEV; + ret = -ENODEV; + goto err; } ret = xgene_enet_get_resources(pdata); @@ -1665,7 +1665,7 @@ static int xgene_enet_probe(struct platform_device *pdev) ret = xgene_enet_init_hw(pdata); if (ret) - goto err_netdev; + goto err; link_state = pdata->mac_ops->link_state; if (pdata->phy_mode == PHY_INTERFACE_MODE_XGMII) { @@ -1675,21 +1675,32 @@ static int xgene_enet_probe(struct platform_device *pdev) ret = xgene_enet_mdio_config(pdata); else INIT_DELAYED_WORK(&pdata->link_work, link_state); + + if (ret) + goto err1; } - if (ret) - goto err; xgene_enet_napi_add(pdata); ret = register_netdev(ndev); if (ret) { netdev_err(ndev, "Failed to register netdev\n"); - goto err; + goto err2; } return 0; -err_netdev: - unregister_netdev(ndev); +err2: + /* + * If necessary, free_netdev() will call netif_napi_del() and undo + * the effects of xgene_enet_napi_add()'s calls to netif_napi_add(). + */ + + if (pdata->mdio_driver) + xgene_enet_phy_disconnect(pdata); + else if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII) + xgene_enet_mdio_remove(pdata); +err1: + xgene_enet_delete_desc_rings(pdata); err: free_netdev(ndev); return ret; From 29b4eafbae9cc5eec19377f4e5637a6e2dedd6bc Mon Sep 17 00:00:00 2001 From: Iyappan Subramanian Date: Fri, 12 Aug 2016 22:05:41 -0700 Subject: [PATCH 0162/1715] drivers: net: xgene: Fix RSS indirection table fields This patch fixes FPSel and NxtFPSel fields length to 5-bit value. Signed-off-by: Quan Nguyen Signed-off-by: Iyappan Subramanian Tested-by: Fushen Chen Signed-off-by: David S. Miller --- drivers/net/ethernet/apm/xgene/xgene_enet_cle.c | 17 ++++++++++++----- drivers/net/ethernet/apm/xgene/xgene_enet_cle.h | 10 +++++++--- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_cle.c b/drivers/net/ethernet/apm/xgene/xgene_enet_cle.c index 472c0fb3f4c4..23d72af83d82 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_cle.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_cle.c @@ -32,12 +32,19 @@ static void xgene_cle_sband_to_hw(u8 frag, enum xgene_cle_prot_version ver, SET_VAL(SB_HDRLEN, len); } -static void xgene_cle_idt_to_hw(u32 dstqid, u32 fpsel, +static void xgene_cle_idt_to_hw(struct xgene_enet_pdata *pdata, + u32 dstqid, u32 fpsel, u32 nfpsel, u32 *idt_reg) { - *idt_reg = SET_VAL(IDT_DSTQID, dstqid) | - SET_VAL(IDT_FPSEL, fpsel) | - SET_VAL(IDT_NFPSEL, nfpsel); + if (pdata->enet_id == XGENE_ENET1) { + *idt_reg = SET_VAL(IDT_DSTQID, dstqid) | + SET_VAL(IDT_FPSEL1, fpsel) | + SET_VAL(IDT_NFPSEL1, nfpsel); + } else { + *idt_reg = SET_VAL(IDT_DSTQID, dstqid) | + SET_VAL(IDT_FPSEL, fpsel) | + SET_VAL(IDT_NFPSEL, nfpsel); + } } static void xgene_cle_dbptr_to_hw(struct xgene_enet_pdata *pdata, @@ -344,7 +351,7 @@ static int xgene_cle_set_rss_idt(struct xgene_enet_pdata *pdata) nfpsel = 0; idt_reg = 0; - xgene_cle_idt_to_hw(dstqid, fpsel, nfpsel, &idt_reg); + xgene_cle_idt_to_hw(pdata, dstqid, fpsel, nfpsel, &idt_reg); ret = xgene_cle_dram_wr(&pdata->cle, &idt_reg, 1, i, RSS_IDT, CLE_CMD_WR); if (ret) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_cle.h b/drivers/net/ethernet/apm/xgene/xgene_enet_cle.h index 33c5f6b25824..9ac9f8e145ec 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_cle.h +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_cle.h @@ -196,9 +196,13 @@ enum xgene_cle_ptree_dbptrs { #define IDT_DSTQID_POS 0 #define IDT_DSTQID_LEN 12 #define IDT_FPSEL_POS 12 -#define IDT_FPSEL_LEN 4 -#define IDT_NFPSEL_POS 16 -#define IDT_NFPSEL_LEN 4 +#define IDT_FPSEL_LEN 5 +#define IDT_NFPSEL_POS 17 +#define IDT_NFPSEL_LEN 5 +#define IDT_FPSEL1_POS 12 +#define IDT_FPSEL1_LEN 4 +#define IDT_NFPSEL1_POS 16 +#define IDT_NFPSEL1_LEN 4 struct xgene_cle_ptree_branch { bool valid; From 4f1c8d811251876ee0a1e82c7379305faf589974 Mon Sep 17 00:00:00 2001 From: Iyappan Subramanian Date: Fri, 12 Aug 2016 22:05:42 -0700 Subject: [PATCH 0163/1715] drivers: net: xgene: Change port init sequence This patch rearranges the port initialization sequence as recommended by hardware specification. This patch also removes, mac_init() call from xgene_enet_link_state(), as it was not required. Signed-off-by: Iyappan Subramanian Tested-by: Fushen Chen Signed-off-by: David S. Miller --- drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c index 9c6ad0dce00f..d53c05336c44 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c @@ -216,12 +216,12 @@ static void xgene_xgmac_init(struct xgene_enet_pdata *pdata) data |= CFG_RSIF_FPBUFF_TIMEOUT_EN; xgene_enet_wr_csr(pdata, XG_RSIF_CONFIG_REG_ADDR, data); - xgene_enet_wr_csr(pdata, XG_CFG_BYPASS_ADDR, RESUME_TX); - xgene_enet_wr_csr(pdata, XGENET_RX_DV_GATE_REG_0_ADDR, 0); xgene_enet_rd_csr(pdata, XG_ENET_SPARE_CFG_REG_ADDR, &data); data |= BIT(12); xgene_enet_wr_csr(pdata, XG_ENET_SPARE_CFG_REG_ADDR, data); xgene_enet_wr_csr(pdata, XG_ENET_SPARE_CFG_REG_1_ADDR, 0x82); + xgene_enet_wr_csr(pdata, XGENET_RX_DV_GATE_REG_0_ADDR, 0); + xgene_enet_wr_csr(pdata, XG_CFG_BYPASS_ADDR, RESUME_TX); } static void xgene_xgmac_rx_enable(struct xgene_enet_pdata *pdata) @@ -366,7 +366,6 @@ static void xgene_enet_link_state(struct work_struct *work) if (link_status) { if (!netif_carrier_ok(ndev)) { netif_carrier_on(ndev); - xgene_xgmac_init(pdata); xgene_xgmac_rx_enable(pdata); xgene_xgmac_tx_enable(pdata); netdev_info(ndev, "Link is Up - 10Gbps\n"); From 3eb7cb9dc94609c14c51029964ac4bfd59cc4d25 Mon Sep 17 00:00:00 2001 From: Iyappan Subramanian Date: Fri, 12 Aug 2016 22:05:43 -0700 Subject: [PATCH 0164/1715] drivers: net: xgene: XFI PCS reset when link is down This patch fixes the link recovery issue, by doing PCS reset when the link is down. Signed-off-by: Fushen Chen Signed-off-by: Iyappan Subramanian Signed-off-by: David S. Miller --- .../net/ethernet/apm/xgene/xgene_enet_hw.h | 6 +++ .../net/ethernet/apm/xgene/xgene_enet_main.c | 1 + .../net/ethernet/apm/xgene/xgene_enet_main.h | 1 + .../net/ethernet/apm/xgene/xgene_enet_xgmac.c | 42 +++++++++++++++++++ .../net/ethernet/apm/xgene/xgene_enet_xgmac.h | 4 ++ 5 files changed, 54 insertions(+) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h index 179a44dceb29..8a8d05500894 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h @@ -124,6 +124,12 @@ enum xgene_enet_rm { #define MAC_READ_REG_OFFSET 0x0c #define MAC_COMMAND_DONE_REG_OFFSET 0x10 +#define PCS_ADDR_REG_OFFSET 0x00 +#define PCS_COMMAND_REG_OFFSET 0x04 +#define PCS_WRITE_REG_OFFSET 0x08 +#define PCS_READ_REG_OFFSET 0x0c +#define PCS_COMMAND_DONE_REG_OFFSET 0x10 + #define MII_MGMT_CONFIG_ADDR 0x20 #define MII_MGMT_COMMAND_ADDR 0x24 #define MII_MGMT_ADDRESS_ADDR 0x28 diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index a727da8260c1..01a9aaf49669 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -1433,6 +1433,7 @@ static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata) } else { pdata->mcx_mac_addr = base_addr + BLOCK_AXG_MAC_OFFSET; pdata->mcx_mac_csr_addr = base_addr + BLOCK_AXG_MAC_CSR_OFFSET; + pdata->pcs_addr = base_addr + BLOCK_PCS_OFFSET; } pdata->rx_buff_cnt = NUM_PKT_BUF; diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h index 217546e5714a..53f4a165a56f 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h @@ -196,6 +196,7 @@ struct xgene_enet_pdata { void __iomem *mcx_mac_addr; void __iomem *mcx_mac_csr_addr; void __iomem *base_addr; + void __iomem *pcs_addr; void __iomem *ring_csr_addr; void __iomem *ring_cmd_addr; int phy_mode; diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c index d53c05336c44..4087dba6ea84 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c @@ -84,6 +84,21 @@ static void xgene_enet_wr_mac(struct xgene_enet_pdata *pdata, wr_addr); } +static void xgene_enet_wr_pcs(struct xgene_enet_pdata *pdata, + u32 wr_addr, u32 wr_data) +{ + void __iomem *addr, *wr, *cmd, *cmd_done; + + addr = pdata->pcs_addr + PCS_ADDR_REG_OFFSET; + wr = pdata->pcs_addr + PCS_WRITE_REG_OFFSET; + cmd = pdata->pcs_addr + PCS_COMMAND_REG_OFFSET; + cmd_done = pdata->pcs_addr + PCS_COMMAND_DONE_REG_OFFSET; + + if (!xgene_enet_wr_indirect(addr, wr, cmd, cmd_done, wr_addr, wr_data)) + netdev_err(pdata->ndev, "PCS write failed, addr: %04x\n", + wr_addr); +} + static void xgene_enet_rd_csr(struct xgene_enet_pdata *pdata, u32 offset, u32 *val) { @@ -122,6 +137,7 @@ static bool xgene_enet_rd_indirect(void __iomem *addr, void __iomem *rd, return true; } + static void xgene_enet_rd_mac(struct xgene_enet_pdata *pdata, u32 rd_addr, u32 *rd_data) { @@ -137,6 +153,21 @@ static void xgene_enet_rd_mac(struct xgene_enet_pdata *pdata, rd_addr); } +static void xgene_enet_rd_pcs(struct xgene_enet_pdata *pdata, + u32 rd_addr, u32 *rd_data) +{ + void __iomem *addr, *rd, *cmd, *cmd_done; + + addr = pdata->pcs_addr + PCS_ADDR_REG_OFFSET; + rd = pdata->pcs_addr + PCS_READ_REG_OFFSET; + cmd = pdata->pcs_addr + PCS_COMMAND_REG_OFFSET; + cmd_done = pdata->pcs_addr + PCS_COMMAND_DONE_REG_OFFSET; + + if (!xgene_enet_rd_indirect(addr, rd, cmd, cmd_done, rd_addr, rd_data)) + netdev_err(pdata->ndev, "PCS read failed, addr: %04x\n", + rd_addr); +} + static int xgene_enet_ecc_init(struct xgene_enet_pdata *pdata) { struct net_device *ndev = pdata->ndev; @@ -171,6 +202,15 @@ static void xgene_xgmac_reset(struct xgene_enet_pdata *pdata) xgene_enet_wr_mac(pdata, AXGMAC_CONFIG_0, 0); } +static void xgene_pcs_reset(struct xgene_enet_pdata *pdata) +{ + u32 data; + + xgene_enet_rd_pcs(pdata, PCS_CONTROL_1, &data); + xgene_enet_wr_pcs(pdata, PCS_CONTROL_1, data | PCS_CTRL_PCS_RST); + xgene_enet_wr_pcs(pdata, PCS_CONTROL_1, data & ~PCS_CTRL_PCS_RST); +} + static void xgene_xgmac_set_mac_addr(struct xgene_enet_pdata *pdata) { u32 addr0, addr1; @@ -379,6 +419,8 @@ static void xgene_enet_link_state(struct work_struct *work) netdev_info(ndev, "Link is Down\n"); } poll_interval = PHY_POLL_LINK_OFF; + + xgene_pcs_reset(pdata); } schedule_delayed_work(&pdata->link_work, poll_interval); diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h index f1ea485f916b..360ccbd95566 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h @@ -24,6 +24,7 @@ #define X2_BLOCK_ETH_MAC_CSR_OFFSET 0x3000 #define BLOCK_AXG_MAC_OFFSET 0x0800 #define BLOCK_AXG_MAC_CSR_OFFSET 0x2000 +#define BLOCK_PCS_OFFSET 0x3800 #define XGENET_CONFIG_REG_ADDR 0x20 #define XGENET_SRST_ADDR 0x00 @@ -72,6 +73,9 @@ #define XG_MCX_ICM_CONFIG0_REG_0_ADDR 0x00e0 #define XG_MCX_ICM_CONFIG2_REG_0_ADDR 0x00e8 +#define PCS_CONTROL_1 0x0000 +#define PCS_CTRL_PCS_RST BIT(15) + extern const struct xgene_mac_ops xgene_xgmac_ops; extern const struct xgene_port_ops xgene_xgport_ops; From 27ecf87c8a9d6152203ff5feb74535abd48e9ad3 Mon Sep 17 00:00:00 2001 From: Iyappan Subramanian Date: Fri, 12 Aug 2016 22:05:44 -0700 Subject: [PATCH 0165/1715] drivers: net: xgene: Poll link status via GPIO When 10GbE SFP+ module is not plugged in or cable is not connected, the link status register does not report the proper state due to floating signal. This patch checks the module present status via an GPIO to determine whether to ignore the link status register and report link down. Signed-off-by: Quan Nguyen Signed-off-by: Iyappan Subramanian Tested-by: Fushen Chen Signed-off-by: David S. Miller --- drivers/net/ethernet/apm/xgene/Kconfig | 1 + drivers/net/ethernet/apm/xgene/xgene_enet_main.c | 15 +++++++++++++++ drivers/net/ethernet/apm/xgene/xgene_enet_main.h | 1 + drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c | 6 ++++++ 4 files changed, 23 insertions(+) diff --git a/drivers/net/ethernet/apm/xgene/Kconfig b/drivers/net/ethernet/apm/xgene/Kconfig index 300e3b5c54e0..afccb033177b 100644 --- a/drivers/net/ethernet/apm/xgene/Kconfig +++ b/drivers/net/ethernet/apm/xgene/Kconfig @@ -4,6 +4,7 @@ config NET_XGENE depends on ARCH_XGENE || COMPILE_TEST select PHYLIB select MDIO_XGENE + select GPIOLIB help This is the Ethernet driver for the on-chip ethernet interface on the APM X-Gene SoC. diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index 01a9aaf49669..b8b9495e6da6 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -19,6 +19,7 @@ * along with this program. If not, see . */ +#include #include "xgene_enet_main.h" #include "xgene_enet_hw.h" #include "xgene_enet_sgmac.h" @@ -1320,6 +1321,18 @@ static int xgene_enet_check_phy_handle(struct xgene_enet_pdata *pdata) return 0; } +static void xgene_enet_gpiod_get(struct xgene_enet_pdata *pdata) +{ + struct device *dev = &pdata->pdev->dev; + + if (pdata->phy_mode != PHY_INTERFACE_MODE_XGMII) + return; + + pdata->sfp_rdy = gpiod_get(dev, "rxlos", GPIOD_IN); + if (IS_ERR(pdata->sfp_rdy)) + pdata->sfp_rdy = gpiod_get(dev, "sfp", GPIOD_IN); +} + static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata) { struct platform_device *pdev; @@ -1409,6 +1422,8 @@ static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata) if (ret) return ret; + xgene_enet_gpiod_get(pdata); + pdata->clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(pdata->clk)) { /* Firmware may have set up the clock already. */ diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h index 53f4a165a56f..b339fc1e8841 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h @@ -217,6 +217,7 @@ struct xgene_enet_pdata { u8 tx_delay; u8 rx_delay; bool mdio_driver; + struct gpio_desc *sfp_rdy; }; struct xgene_indirect_ctl { diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c index 4087dba6ea84..d672e71b5a50 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c @@ -18,6 +18,8 @@ * along with this program. If not, see . */ +#include +#include #include "xgene_enet_main.h" #include "xgene_enet_hw.h" #include "xgene_enet_xgmac.h" @@ -399,10 +401,14 @@ static void xgene_enet_link_state(struct work_struct *work) { struct xgene_enet_pdata *pdata = container_of(to_delayed_work(work), struct xgene_enet_pdata, link_work); + struct gpio_desc *sfp_rdy = pdata->sfp_rdy; struct net_device *ndev = pdata->ndev; u32 link_status, poll_interval; link_status = xgene_enet_link_status(pdata); + if (link_status && !IS_ERR(sfp_rdy) && !gpiod_get_value(sfp_rdy)) + link_status = 0; + if (link_status) { if (!netif_carrier_ok(ndev)) { netif_carrier_on(ndev); From 72d256439fcfec43df4c498b1d9b6131c32a8994 Mon Sep 17 00:00:00 2001 From: Iyappan Subramanian Date: Fri, 12 Aug 2016 22:05:45 -0700 Subject: [PATCH 0166/1715] dtb: xgene: Add rxlos-gpios property Added rxlos GPIO mapping by adding rxlos-gpios property. Signed-off-by: Quan Nguyen Signed-off-by: Iyappan Subramanian Tested-by: Fushen Chen Signed-off-by: David S. Miller --- arch/arm64/boot/dts/apm/apm-mustang.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/apm/apm-mustang.dts b/arch/arm64/boot/dts/apm/apm-mustang.dts index b7fb5d9295c2..32a961c5e98a 100644 --- a/arch/arm64/boot/dts/apm/apm-mustang.dts +++ b/arch/arm64/boot/dts/apm/apm-mustang.dts @@ -74,6 +74,7 @@ &xgenet { status = "ok"; + rxlos-gpios = <&sbgpio 12 1>; }; &mmc0 { From c50fc2622cbce4341daf40a4e7ad3f1625c9e55c Mon Sep 17 00:00:00 2001 From: Iyappan Subramanian Date: Fri, 12 Aug 2016 22:05:46 -0700 Subject: [PATCH 0167/1715] Documentation: dtb: xgene: Add rxlos GPIO mapping Signed-off-by: Quan Nguyen Signed-off-by: Iyappan Subramanian Tested-by: Fushen Chen Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/apm-xgene-enet.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/net/apm-xgene-enet.txt b/Documentation/devicetree/bindings/net/apm-xgene-enet.txt index e41b2d59ca7f..f591ab782dbc 100644 --- a/Documentation/devicetree/bindings/net/apm-xgene-enet.txt +++ b/Documentation/devicetree/bindings/net/apm-xgene-enet.txt @@ -47,6 +47,9 @@ Optional properties: Valid values are between 0 to 7, that maps to 273, 589, 899, 1222, 1480, 1806, 2147, 2464 ps Default value is 2, which corresponds to 899 ps +- rxlos-gpios: Input gpio from SFP+ module to indicate availability of + incoming signal. + Example: menetclk: menetclk { From 03377e381bf48d7aaa797577789551b04bce7006 Mon Sep 17 00:00:00 2001 From: Iyappan Subramanian Date: Fri, 12 Aug 2016 22:05:47 -0700 Subject: [PATCH 0168/1715] drivers: net: xgene: Fix backward compatibility This patch fixes the backward compatibility on handling phy_connect(), by iterating over the phy-handle, when new DT is used with older kernel. Signed-off-by: Iyappan Subramanian Signed-off-by: David S. Miller --- drivers/net/ethernet/apm/xgene/xgene_enet_hw.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c index 18bb9556dd00..321fb197621e 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c @@ -761,18 +761,18 @@ int xgene_enet_phy_connect(struct net_device *ndev) if (dev->of_node) { for (i = 0 ; i < 2; i++) { np = of_parse_phandle(dev->of_node, "phy-handle", i); - if (np) + + if (!np) + continue; + + phy_dev = of_phy_connect(ndev, np, + &xgene_enet_adjust_link, + 0, pdata->phy_mode); + of_node_put(np); + if (phy_dev) break; } - if (!np) { - netdev_dbg(ndev, "No phy-handle found in DT\n"); - return -ENODEV; - } - - phy_dev = of_phy_connect(ndev, np, &xgene_enet_adjust_link, - 0, pdata->phy_mode); - of_node_put(np); if (!phy_dev) { netdev_err(ndev, "Could not connect to PHY\n"); return -ENODEV; From 5ac6caab224921fd3331a7e4b19d34623c752d09 Mon Sep 17 00:00:00 2001 From: Iyappan Subramanian Date: Fri, 12 Aug 2016 22:05:48 -0700 Subject: [PATCH 0169/1715] dtb: xgene: Fix backward compatibility This patch fixes the backward compatibility when used with older kernel. Signed-off-by: Iyappan Subramanian Signed-off-by: David S. Miller --- arch/arm64/boot/dts/apm/apm-storm.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/apm/apm-storm.dtsi b/arch/arm64/boot/dts/apm/apm-storm.dtsi index f1c2c713f9b0..d5c3435324e8 100644 --- a/arch/arm64/boot/dts/apm/apm-storm.dtsi +++ b/arch/arm64/boot/dts/apm/apm-storm.dtsi @@ -923,7 +923,7 @@ /* mac address will be overwritten by the bootloader */ local-mac-address = [00 00 00 00 00 00]; phy-connection-type = "rgmii"; - phy-handle = <&menet0phy>,<&menetphy>; + phy-handle = <&menetphy>,<&menet0phy>; mdio { compatible = "apm,xgene-mdio"; #address-cells = <1>; From a5c3d4985eeaddee795e5a4cd71be35e5e149896 Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Fri, 12 Aug 2016 16:51:24 +0530 Subject: [PATCH 0170/1715] net: thunderx: Moved HW capability info from macros to structure Current driver has most of the HW maximums info like no of channels, traffic limiters, RSS indices e.t.c in the form of macros. These have been moved into a 'hw_info' structure so that support for VNIC on newer chips with different set of HW maximums can be added. Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nic.h | 41 ++------ .../net/ethernet/cavium/thunder/nic_main.c | 96 +++++++++++++++---- 2 files changed, 85 insertions(+), 52 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h index 83025bb4737c..66b499b1b774 100644 --- a/drivers/net/ethernet/cavium/thunder/nic.h +++ b/drivers/net/ethernet/cavium/thunder/nic.h @@ -20,6 +20,9 @@ #define PCI_DEVICE_ID_THUNDER_NIC_VF 0xA034 #define PCI_DEVICE_ID_THUNDER_BGX 0xA026 +/* Subsystem device IDs */ +#define PCI_SUBSYS_DEVID_88XX_NIC_PF 0xA11E + /* PCI BAR nos */ #define PCI_CFG_REG_BAR_NUM 0 #define PCI_MSIX_REG_BAR_NUM 4 @@ -41,40 +44,8 @@ /* Max pkinds */ #define NIC_MAX_PKIND 16 -/* Rx Channels */ -/* Receive channel configuration in TNS bypass mode - * Below is configuration in TNS bypass mode - * BGX0-LMAC0-CHAN0 - VNIC CHAN0 - * BGX0-LMAC1-CHAN0 - VNIC CHAN16 - * ... - * BGX1-LMAC0-CHAN0 - VNIC CHAN128 - * ... - * BGX1-LMAC3-CHAN0 - VNIC CHAN174 - */ -#define NIC_INTF_COUNT 2 /* Interfaces btw VNIC and TNS/BGX */ -#define NIC_CHANS_PER_INF 128 -#define NIC_MAX_CHANS (NIC_INTF_COUNT * NIC_CHANS_PER_INF) -#define NIC_CPI_COUNT 2048 /* No of channel parse indices */ - -/* TNS bypass mode: 1-1 mapping between VNIC and BGX:LMAC */ -#define NIC_MAX_BGX MAX_BGX_PER_CN88XX -#define NIC_CPI_PER_BGX (NIC_CPI_COUNT / NIC_MAX_BGX) -#define NIC_MAX_CPI_PER_LMAC 64 /* Max when CPI_ALG is IP diffserv */ -#define NIC_RSSI_PER_BGX (NIC_RSSI_COUNT / NIC_MAX_BGX) - -/* Tx scheduling */ -#define NIC_MAX_TL4 1024 -#define NIC_MAX_TL4_SHAPERS 256 /* 1 shaper for 4 TL4s */ -#define NIC_MAX_TL3 256 -#define NIC_MAX_TL3_SHAPERS 64 /* 1 shaper for 4 TL3s */ -#define NIC_MAX_TL2 64 -#define NIC_MAX_TL2_SHAPERS 2 /* 1 shaper for 32 TL2s */ -#define NIC_MAX_TL1 2 - -/* TNS bypass mode */ -#define NIC_TL2_PER_BGX 32 -#define NIC_TL4_PER_BGX (NIC_MAX_TL4 / NIC_MAX_BGX) -#define NIC_TL4_PER_LMAC (NIC_MAX_TL4 / NIC_CHANS_PER_INF) +/* Max when CPI_ALG is IP diffserv */ +#define NIC_MAX_CPI_PER_LMAC 64 /* NIC VF Interrupts */ #define NICVF_INTR_CQ 0 @@ -148,7 +119,6 @@ struct nicvf_cq_poll { struct napi_struct napi; }; -#define NIC_RSSI_COUNT 4096 /* Total no of RSS indices */ #define NIC_MAX_RSS_HASH_BITS 8 #define NIC_MAX_RSS_IDR_TBL_SIZE (1 << NIC_MAX_RSS_HASH_BITS) #define RSS_HASH_KEY_SIZE 5 /* 320 bit key */ @@ -273,6 +243,7 @@ struct nicvf { struct net_device *netdev; struct pci_dev *pdev; void __iomem *reg_base; +#define MAX_QUEUES_PER_QSET 8 struct queue_set *qs; struct nicvf_cq_poll *napi[8]; u8 vf_id; diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c index 16ed20357c5c..dc845a01fecb 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_main.c +++ b/drivers/net/ethernet/cavium/thunder/nic_main.c @@ -20,8 +20,23 @@ #define DRV_NAME "thunder-nic" #define DRV_VERSION "1.0" +struct hw_info { + u8 bgx_cnt; + u8 chans_per_lmac; + u8 chans_per_bgx; /* Rx/Tx chans */ + u16 cpi_cnt; + u16 rssi_cnt; + u16 rss_ind_tbl_size; + u16 tl4_cnt; + u16 tl3_cnt; + u8 tl2_cnt; + u8 tl1_cnt; + bool tl1_per_bgx; /* TL1 per BGX or per LMAC */ +}; + struct nicpf { struct pci_dev *pdev; + struct hw_info *hw; u8 node; unsigned int flags; u8 num_vf_en; /* No of VF enabled */ @@ -44,7 +59,6 @@ struct nicpf { u32 speed[MAX_LMAC]; u16 cpi_base[MAX_NUM_VFS_SUPPORTED]; u16 rssi_base[MAX_NUM_VFS_SUPPORTED]; - u16 rss_ind_tbl_size; bool mbx_lock[MAX_NUM_VFS_SUPPORTED]; /* MSI-X */ @@ -275,7 +289,7 @@ static void nic_set_lmac_vf_mapping(struct nicpf *nic) nic->num_vf_en = 0; - for (bgx = 0; bgx < NIC_MAX_BGX; bgx++) { + for (bgx = 0; bgx < nic->hw->bgx_cnt; bgx++) { if (!(bgx_map & (1 << bgx))) continue; lmac_cnt = bgx_get_lmac_count(nic->node, bgx); @@ -298,6 +312,30 @@ static void nic_set_lmac_vf_mapping(struct nicpf *nic) } } +static void nic_get_hw_info(struct nicpf *nic) +{ + u16 sdevid; + struct hw_info *hw = nic->hw; + + pci_read_config_word(nic->pdev, PCI_SUBSYSTEM_ID, &sdevid); + + switch (sdevid) { + case PCI_SUBSYS_DEVID_88XX_NIC_PF: + hw->bgx_cnt = MAX_BGX_PER_CN88XX; + hw->chans_per_lmac = 16; + hw->chans_per_bgx = 128; + hw->cpi_cnt = 2048; + hw->rssi_cnt = 4096; + hw->rss_ind_tbl_size = NIC_MAX_RSS_IDR_TBL_SIZE; + hw->tl3_cnt = 256; + hw->tl2_cnt = 64; + hw->tl1_cnt = 2; + hw->tl1_per_bgx = true; + break; + } + hw->tl4_cnt = MAX_QUEUES_PER_QSET * pci_sriov_get_totalvfs(nic->pdev); +} + #define BGX0_BLOCK 8 #define BGX1_BLOCK 9 @@ -306,6 +344,9 @@ static void nic_init_hw(struct nicpf *nic) int i; u64 cqm_cfg; + /* Get HW capability info */ + nic_get_hw_info(nic); + /* Enable NIC HW block */ nic_reg_write(nic, NIC_PF_CFG, 0x3); @@ -351,6 +392,7 @@ static void nic_init_hw(struct nicpf *nic) /* Channel parse index configuration */ static void nic_config_cpi(struct nicpf *nic, struct cpi_cfg_msg *cfg) { + struct hw_info *hw = nic->hw; u32 vnic, bgx, lmac, chan; u32 padd, cpi_count = 0; u64 cpi_base, cpi, rssi_base, rssi; @@ -360,9 +402,11 @@ static void nic_config_cpi(struct nicpf *nic, struct cpi_cfg_msg *cfg) bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vnic]); lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vnic]); - chan = (lmac * MAX_BGX_CHANS_PER_LMAC) + (bgx * NIC_CHANS_PER_INF); - cpi_base = (lmac * NIC_MAX_CPI_PER_LMAC) + (bgx * NIC_CPI_PER_BGX); - rssi_base = (lmac * nic->rss_ind_tbl_size) + (bgx * NIC_RSSI_PER_BGX); + chan = (lmac * hw->chans_per_lmac) + (bgx * hw->chans_per_bgx); + cpi_base = (lmac * NIC_MAX_CPI_PER_LMAC) + + (bgx * (hw->cpi_cnt / hw->bgx_cnt)); + rssi_base = (lmac * hw->rss_ind_tbl_size) + + (bgx * (hw->rssi_cnt / hw->bgx_cnt)); /* Rx channel configuration */ nic_reg_write(nic, NIC_PF_CHAN_0_255_RX_BP_CFG | (chan << 3), @@ -434,7 +478,7 @@ static void nic_send_rss_size(struct nicpf *nic, int vf) msg = (u64 *)&mbx; mbx.rss_size.msg = NIC_MBOX_MSG_RSS_SIZE; - mbx.rss_size.ind_tbl_size = nic->rss_ind_tbl_size; + mbx.rss_size.ind_tbl_size = nic->hw->rss_ind_tbl_size; nic_send_msg_to_vf(nic, vf, &mbx); } @@ -494,6 +538,7 @@ static void nic_config_rss(struct nicpf *nic, struct rss_cfg_msg *cfg) static void nic_tx_channel_cfg(struct nicpf *nic, u8 vnic, struct sq_cfg_msg *sq) { + struct hw_info *hw = nic->hw; u32 bgx, lmac, chan; u32 tl2, tl3, tl4; u32 rr_quantum; @@ -512,21 +557,24 @@ static void nic_tx_channel_cfg(struct nicpf *nic, u8 vnic, /* 24 bytes for FCS, IPG and preamble */ rr_quantum = ((NIC_HW_MAX_FRS + 24) / 4); + /* For 88xx 0-511 TL4 transmits via BGX0 and + * 512-1023 TL4s transmit via BGX1. + */ + tl4 = bgx * (hw->tl4_cnt / hw->bgx_cnt); if (!sq->sqs_mode) { - tl4 = (lmac * NIC_TL4_PER_LMAC) + (bgx * NIC_TL4_PER_BGX); + tl4 += (lmac * MAX_QUEUES_PER_QSET); } else { for (svf = 0; svf < MAX_SQS_PER_VF; svf++) { if (nic->vf_sqs[pqs_vnic][svf] == vnic) break; } - tl4 = (MAX_LMAC_PER_BGX * NIC_TL4_PER_LMAC); - tl4 += (lmac * NIC_TL4_PER_LMAC * MAX_SQS_PER_VF); - tl4 += (svf * NIC_TL4_PER_LMAC); - tl4 += (bgx * NIC_TL4_PER_BGX); + tl4 += (MAX_LMAC_PER_BGX * MAX_QUEUES_PER_QSET); + tl4 += (lmac * MAX_QUEUES_PER_QSET * MAX_SQS_PER_VF); + tl4 += (svf * MAX_QUEUES_PER_QSET); } tl4 += sq_idx; - tl3 = tl4 / (NIC_MAX_TL4 / NIC_MAX_TL3); + tl3 = tl4 / (hw->tl4_cnt / hw->tl3_cnt); nic_reg_write(nic, NIC_PF_QSET_0_127_SQ_0_7_CFG2 | ((u64)vnic << NIC_QS_ID_SHIFT) | ((u32)sq_idx << NIC_Q_NUM_SHIFT), tl4); @@ -534,8 +582,13 @@ static void nic_tx_channel_cfg(struct nicpf *nic, u8 vnic, ((u64)vnic << 27) | ((u32)sq_idx << 24) | rr_quantum); nic_reg_write(nic, NIC_PF_TL3_0_255_CFG | (tl3 << 3), rr_quantum); - chan = (lmac * MAX_BGX_CHANS_PER_LMAC) + (bgx * NIC_CHANS_PER_INF); + + /* On 88xx 0-127 channels are for BGX0 and + * 127-255 channels for BGX1. + */ + chan = (lmac * hw->chans_per_lmac) + (bgx * hw->chans_per_bgx); nic_reg_write(nic, NIC_PF_TL3_0_255_CHAN | (tl3 << 3), chan); + /* Enable backpressure on the channel */ nic_reg_write(nic, NIC_PF_CHAN_0_255_TX_CFG | (chan << 3), 1); @@ -1008,6 +1061,12 @@ static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (!nic) return -ENOMEM; + nic->hw = devm_kzalloc(dev, sizeof(struct hw_info), GFP_KERNEL); + if (!nic->hw) { + devm_kfree(dev, nic); + return -ENOMEM; + } + pci_set_drvdata(pdev, nic); nic->pdev = pdev; @@ -1047,13 +1106,10 @@ static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) nic->node = nic_get_node_id(pdev); - nic_set_lmac_vf_mapping(nic); - /* Initialize hardware */ nic_init_hw(nic); - /* Set RSS TBL size for each VF */ - nic->rss_ind_tbl_size = NIC_MAX_RSS_IDR_TBL_SIZE; + nic_set_lmac_vf_mapping(nic); /* Register interrupts */ err = nic_register_interrupts(nic); @@ -1086,6 +1142,8 @@ err_unregister_interrupts: err_release_regions: pci_release_regions(pdev); err_disable_device: + devm_kfree(dev, nic->hw); + devm_kfree(dev, nic); pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); return err; @@ -1106,6 +1164,10 @@ static void nic_remove(struct pci_dev *pdev) nic_unregister_interrupts(nic); pci_release_regions(pdev); + + devm_kfree(&pdev->dev, nic->hw); + devm_kfree(&pdev->dev, nic); + pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); } From f7ff0ae844128e669704860d8c9d1ff510cee271 Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Fri, 12 Aug 2016 16:51:25 +0530 Subject: [PATCH 0171/1715] net: thunderx: Add VNIC's PCI devid on future chips This patch adds PCI device IDs of VNIC on newer chips and also registers VF driver with them. Device id remains same for all versions of chips but subsystem device id changes. Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nic.h | 10 +++++++++- drivers/net/ethernet/cavium/thunder/nicvf_main.c | 14 ++++++++++++-- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h index 66b499b1b774..6b0b2407ca3b 100644 --- a/drivers/net/ethernet/cavium/thunder/nic.h +++ b/drivers/net/ethernet/cavium/thunder/nic.h @@ -21,7 +21,15 @@ #define PCI_DEVICE_ID_THUNDER_BGX 0xA026 /* Subsystem device IDs */ -#define PCI_SUBSYS_DEVID_88XX_NIC_PF 0xA11E +#define PCI_SUBSYS_DEVID_88XX_NIC_PF 0xA11E +#define PCI_SUBSYS_DEVID_81XX_NIC_PF 0xA21E +#define PCI_SUBSYS_DEVID_83XX_NIC_PF 0xA31E + +#define PCI_SUBSYS_DEVID_88XX_PASS1_NIC_VF 0xA11E +#define PCI_SUBSYS_DEVID_88XX_NIC_VF 0xA134 +#define PCI_SUBSYS_DEVID_81XX_NIC_VF 0xA234 +#define PCI_SUBSYS_DEVID_83XX_NIC_VF 0xA334 + /* PCI BAR nos */ #define PCI_CFG_REG_BAR_NUM 0 diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index a19e73f11d73..0c106350d5a8 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -29,10 +29,20 @@ static const struct pci_device_id nicvf_id_table[] = { { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_NIC_VF, - PCI_VENDOR_ID_CAVIUM, 0xA134) }, + PCI_VENDOR_ID_CAVIUM, + PCI_SUBSYS_DEVID_88XX_NIC_VF) }, { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_PASS1_NIC_VF, - PCI_VENDOR_ID_CAVIUM, 0xA11E) }, + PCI_VENDOR_ID_CAVIUM, + PCI_SUBSYS_DEVID_88XX_PASS1_NIC_VF) }, + { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, + PCI_DEVICE_ID_THUNDER_NIC_VF, + PCI_VENDOR_ID_CAVIUM, + PCI_SUBSYS_DEVID_81XX_NIC_VF) }, + { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, + PCI_DEVICE_ID_THUNDER_NIC_VF, + PCI_VENDOR_ID_CAVIUM, + PCI_SUBSYS_DEVID_83XX_NIC_VF) }, { 0, } /* end of table */ }; From 0025d93ebb9b4f7ad7807d22fe65852f447309a1 Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Fri, 12 Aug 2016 16:51:26 +0530 Subject: [PATCH 0172/1715] net: thunderx: Add support for 81xx and 83xx chips This patch adds info on HW maximums of 81xx/83xx and also configures receive and transmit datapaths accordingly. Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- .../net/ethernet/cavium/thunder/nic_main.c | 87 +++++++++++++++---- drivers/net/ethernet/cavium/thunder/nic_reg.h | 1 + .../net/ethernet/cavium/thunder/thunder_bgx.h | 2 + 3 files changed, 73 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c index dc845a01fecb..4974923b9d44 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_main.c +++ b/drivers/net/ethernet/cavium/thunder/nic_main.c @@ -24,6 +24,8 @@ struct hw_info { u8 bgx_cnt; u8 chans_per_lmac; u8 chans_per_bgx; /* Rx/Tx chans */ + u8 chans_per_rgx; + u8 chans_per_lbk; u16 cpi_cnt; u16 rssi_cnt; u16 rss_ind_tbl_size; @@ -332,6 +334,33 @@ static void nic_get_hw_info(struct nicpf *nic) hw->tl1_cnt = 2; hw->tl1_per_bgx = true; break; + case PCI_SUBSYS_DEVID_81XX_NIC_PF: + hw->bgx_cnt = MAX_BGX_PER_CN81XX; + hw->chans_per_lmac = 8; + hw->chans_per_bgx = 32; + hw->chans_per_rgx = 8; + hw->chans_per_lbk = 24; + hw->cpi_cnt = 512; + hw->rssi_cnt = 256; + hw->rss_ind_tbl_size = 32; /* Max RSSI / Max interfaces */ + hw->tl3_cnt = 64; + hw->tl2_cnt = 16; + hw->tl1_cnt = 10; + hw->tl1_per_bgx = false; + break; + case PCI_SUBSYS_DEVID_83XX_NIC_PF: + hw->bgx_cnt = MAX_BGX_PER_CN83XX; + hw->chans_per_lmac = 8; + hw->chans_per_bgx = 32; + hw->chans_per_lbk = 64; + hw->cpi_cnt = 2048; + hw->rssi_cnt = 1024; + hw->rss_ind_tbl_size = 64; /* Max RSSI / Max interfaces */ + hw->tl3_cnt = 256; + hw->tl2_cnt = 64; + hw->tl1_cnt = 18; + hw->tl1_per_bgx = false; + break; } hw->tl4_cnt = MAX_QUEUES_PER_QSET * pci_sriov_get_totalvfs(nic->pdev); } @@ -353,11 +382,15 @@ static void nic_init_hw(struct nicpf *nic) /* Enable backpressure */ nic_reg_write(nic, NIC_PF_BP_CFG, (1ULL << 6) | 0x03); - /* Disable TNS mode on both interfaces */ - nic_reg_write(nic, NIC_PF_INTF_0_1_SEND_CFG, - (NIC_TNS_BYPASS_MODE << 7) | BGX0_BLOCK); - nic_reg_write(nic, NIC_PF_INTF_0_1_SEND_CFG | (1 << 8), - (NIC_TNS_BYPASS_MODE << 7) | BGX1_BLOCK); + /* TNS and TNS bypass modes are present only on 88xx */ + if (nic->pdev->subsystem_device == PCI_SUBSYS_DEVID_88XX_NIC_PF) { + /* Disable TNS mode on both interfaces */ + nic_reg_write(nic, NIC_PF_INTF_0_1_SEND_CFG, + (NIC_TNS_BYPASS_MODE << 7) | BGX0_BLOCK); + nic_reg_write(nic, NIC_PF_INTF_0_1_SEND_CFG | (1 << 8), + (NIC_TNS_BYPASS_MODE << 7) | BGX1_BLOCK); + } + nic_reg_write(nic, NIC_PF_INTF_0_1_BP_CFG, (1ULL << 63) | BGX0_BLOCK); nic_reg_write(nic, NIC_PF_INTF_0_1_BP_CFG + (1 << 8), @@ -525,7 +558,7 @@ static void nic_config_rss(struct nicpf *nic, struct rss_cfg_msg *cfg) /* 4 level transmit side scheduler configutation * for TNS bypass mode * - * Sample configuration for SQ0 + * Sample configuration for SQ0 on 88xx * VNIC0-SQ0 -> TL4(0) -> TL3[0] -> TL2[0] -> TL1[0] -> BGX0 * VNIC1-SQ0 -> TL4(8) -> TL3[2] -> TL2[0] -> TL1[0] -> BGX0 * VNIC2-SQ0 -> TL4(16) -> TL3[4] -> TL2[1] -> TL1[0] -> BGX0 @@ -560,17 +593,21 @@ static void nic_tx_channel_cfg(struct nicpf *nic, u8 vnic, /* For 88xx 0-511 TL4 transmits via BGX0 and * 512-1023 TL4s transmit via BGX1. */ - tl4 = bgx * (hw->tl4_cnt / hw->bgx_cnt); - if (!sq->sqs_mode) { - tl4 += (lmac * MAX_QUEUES_PER_QSET); - } else { - for (svf = 0; svf < MAX_SQS_PER_VF; svf++) { - if (nic->vf_sqs[pqs_vnic][svf] == vnic) - break; + if (hw->tl1_per_bgx) { + tl4 = bgx * (hw->tl4_cnt / hw->bgx_cnt); + if (!sq->sqs_mode) { + tl4 += (lmac * MAX_QUEUES_PER_QSET); + } else { + for (svf = 0; svf < MAX_SQS_PER_VF; svf++) { + if (nic->vf_sqs[pqs_vnic][svf] == vnic) + break; + } + tl4 += (MAX_LMAC_PER_BGX * MAX_QUEUES_PER_QSET); + tl4 += (lmac * MAX_QUEUES_PER_QSET * MAX_SQS_PER_VF); + tl4 += (svf * MAX_QUEUES_PER_QSET); } - tl4 += (MAX_LMAC_PER_BGX * MAX_QUEUES_PER_QSET); - tl4 += (lmac * MAX_QUEUES_PER_QSET * MAX_SQS_PER_VF); - tl4 += (svf * MAX_QUEUES_PER_QSET); + } else { + tl4 = (vnic * MAX_QUEUES_PER_QSET); } tl4 += sq_idx; @@ -585,9 +622,15 @@ static void nic_tx_channel_cfg(struct nicpf *nic, u8 vnic, /* On 88xx 0-127 channels are for BGX0 and * 127-255 channels for BGX1. + * + * On 81xx/83xx TL3_CHAN reg should be configured with channel + * within LMAC i.e 0-7 and not the actual channel number like on 88xx */ chan = (lmac * hw->chans_per_lmac) + (bgx * hw->chans_per_bgx); - nic_reg_write(nic, NIC_PF_TL3_0_255_CHAN | (tl3 << 3), chan); + if (hw->tl1_per_bgx) + nic_reg_write(nic, NIC_PF_TL3_0_255_CHAN | (tl3 << 3), chan); + else + nic_reg_write(nic, NIC_PF_TL3_0_255_CHAN | (tl3 << 3), 0); /* Enable backpressure on the channel */ nic_reg_write(nic, NIC_PF_CHAN_0_255_TX_CFG | (chan << 3), 1); @@ -597,6 +640,16 @@ static void nic_tx_channel_cfg(struct nicpf *nic, u8 vnic, nic_reg_write(nic, NIC_PF_TL2_0_63_CFG | (tl2 << 3), rr_quantum); /* No priorities as of now */ nic_reg_write(nic, NIC_PF_TL2_0_63_PRI | (tl2 << 3), 0x00); + + /* Unlike 88xx where TL2s 0-31 transmits to TL1 '0' and rest to TL1 '1' + * on 81xx/83xx TL2 needs to be configured to transmit to one of the + * possible LMACs. + * + * This register doesn't exist on 88xx. + */ + if (!hw->tl1_per_bgx) + nic_reg_write(nic, NIC_PF_TL2_LMAC | (tl2 << 3), + lmac + (bgx * MAX_LMAC_PER_BGX)); } /* Send primary nicvf pointer to secondary QS's VF */ diff --git a/drivers/net/ethernet/cavium/thunder/nic_reg.h b/drivers/net/ethernet/cavium/thunder/nic_reg.h index afb10e326b4f..833cf3d313fd 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_reg.h +++ b/drivers/net/ethernet/cavium/thunder/nic_reg.h @@ -103,6 +103,7 @@ #define NIC_PF_SW_SYNC_RX_DONE (0x490008) #define NIC_PF_TL2_0_63_CFG (0x500000) #define NIC_PF_TL2_0_63_PRI (0x520000) +#define NIC_PF_TL2_LMAC (0x540000) #define NIC_PF_TL2_0_63_SH_STATUS (0x580000) #define NIC_PF_TL3A_0_63_CFG (0x5F0000) #define NIC_PF_TL3_0_255_CFG (0x600000) diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h index 42010d2e5ddf..9f5e49a91783 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h @@ -11,6 +11,8 @@ #define MAX_BGX_THUNDER 8 /* Max 4 nodes, 2 per node */ #define MAX_BGX_PER_CN88XX 2 +#define MAX_BGX_PER_CN81XX 2 +#define MAX_BGX_PER_CN83XX 4 #define MAX_LMAC_PER_BGX 4 #define MAX_BGX_CHANS_PER_LMAC 16 #define MAX_DMAC_PER_LMAC 8 From 3a397ebe154b385e5e3d0c7fee478ccef58742fc Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Fri, 12 Aug 2016 16:51:27 +0530 Subject: [PATCH 0173/1715] net: thunderx: Set queue count based on number of CPUs 81xx has only 4 CPUs, so it doesn't make sense to initialize entire Qset i.e 8 queues by default. Made changes to queue initialization to init queues equal to number of CPUs or 8 queues whichever is lesser. Also this will be applicable to VMs with VNIC VF attached and having less VCPUs Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nic_main.c | 6 ++++++ drivers/net/ethernet/cavium/thunder/nicvf_main.c | 7 +++---- drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 8 ++++---- drivers/net/ethernet/cavium/thunder/nicvf_queues.h | 5 +---- 4 files changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c index 4974923b9d44..0d81117f019c 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_main.c +++ b/drivers/net/ethernet/cavium/thunder/nic_main.c @@ -1009,6 +1009,12 @@ static int nic_num_sqs_en(struct nicpf *nic, int vf_en) int pos, sqs_per_vf = MAX_SQS_PER_VF_SINGLE_NODE; u16 total_vf; + /* Secondary Qsets are needed only if CPU count is + * morethan MAX_QUEUES_PER_QSET. + */ + if (num_online_cpus() <= MAX_QUEUES_PER_QSET) + return 0; + /* Check if its a multi-node environment */ if (nr_node_ids > 1) sqs_per_vf = MAX_SQS_PER_VF; diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 0c106350d5a8..d9efe2fb7b81 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -1537,14 +1537,13 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_release_regions; } - qcount = MAX_CMP_QUEUES_PER_QS; + qcount = netif_get_num_default_rss_queues(); /* Restrict multiqset support only for host bound VFs */ if (pdev->is_virtfn) { /* Set max number of queues per VF */ - qcount = roundup(num_online_cpus(), MAX_CMP_QUEUES_PER_QS); - qcount = min(qcount, - (MAX_SQS_PER_VF + 1) * MAX_CMP_QUEUES_PER_QS); + qcount = min_t(int, num_online_cpus(), + (MAX_SQS_PER_VF + 1) * MAX_CMP_QUEUES_PER_QS); } netdev = alloc_etherdev_mqs(sizeof(struct nicvf), qcount, qcount); diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index 0ff8e60deccb..e521a94dc79f 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -762,10 +762,10 @@ int nicvf_set_qset_resources(struct nicvf *nic) nic->qs = qs; /* Set count of each queue */ - qs->rbdr_cnt = RBDR_CNT; - qs->rq_cnt = RCV_QUEUE_CNT; - qs->sq_cnt = SND_QUEUE_CNT; - qs->cq_cnt = CMP_QUEUE_CNT; + qs->rbdr_cnt = DEFAULT_RBDR_CNT; + qs->rq_cnt = min_t(u8, MAX_RCV_QUEUES_PER_QS, num_online_cpus()); + qs->sq_cnt = min_t(u8, MAX_SND_QUEUES_PER_QS, num_online_cpus()); + qs->cq_cnt = max_t(u8, qs->rq_cnt, qs->sq_cnt); /* Set queue lengths */ qs->rbdr_len = RCV_BUF_COUNT; diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h index 6673e1133523..869f3386028b 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h @@ -57,10 +57,7 @@ #define CMP_QUEUE_SIZE6 6ULL /* 64K entries */ /* Default queue count per QS, its lengths and threshold values */ -#define RBDR_CNT 1 -#define RCV_QUEUE_CNT 8 -#define SND_QUEUE_CNT 8 -#define CMP_QUEUE_CNT 8 /* Max of RCV and SND qcount */ +#define DEFAULT_RBDR_CNT 1 #define SND_QSIZE SND_QUEUE_SIZE2 #define SND_QUEUE_LEN (1ULL << (SND_QSIZE + 10)) From 02a72bd8cded6aea8385d996b7d3d5403653f88a Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Fri, 12 Aug 2016 16:51:28 +0530 Subject: [PATCH 0174/1715] net: thunderx: Enable CQE_RX desc's extension fields Unlike 88xx, CQE_RX descriptor's tunnelling extension i.e CQE_RX2_S is always enabled on 81xx/83xx and HW does insert these fields into CQE_RX. As a result receive buffer addresses will now be present at 7th word of CQE_RX instead of 6th. Enable CQE_RX2_S on 88xx pass 2.x as well. Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nic.h | 9 ++++++++- drivers/net/ethernet/cavium/thunder/nic_main.c | 7 +++++++ drivers/net/ethernet/cavium/thunder/nic_reg.h | 1 + drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 12 +++++++++++- 4 files changed, 27 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h index 6b0b2407ca3b..136db2a8f044 100644 --- a/drivers/net/ethernet/cavium/thunder/nic.h +++ b/drivers/net/ethernet/cavium/thunder/nic.h @@ -493,7 +493,14 @@ static inline int nic_get_node_id(struct pci_dev *pdev) static inline bool pass1_silicon(struct pci_dev *pdev) { - return pdev->revision < 8; + return (pdev->revision < 8) && + (pdev->subsystem_device == PCI_SUBSYS_DEVID_88XX_NIC_PF); +} + +static inline bool pass2_silicon(struct pci_dev *pdev) +{ + return (pdev->revision >= 8) && + (pdev->subsystem_device == PCI_SUBSYS_DEVID_88XX_NIC_PF); } int nicvf_set_real_num_queues(struct net_device *netdev, diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c index 0d81117f019c..3f52b364e12d 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_main.c +++ b/drivers/net/ethernet/cavium/thunder/nic_main.c @@ -799,6 +799,13 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf) (mbx.rq.qs_num << NIC_QS_ID_SHIFT) | (mbx.rq.rq_num << NIC_Q_NUM_SHIFT); nic_reg_write(nic, reg_addr, mbx.rq.cfg); + /* Enable CQE_RX2_S extension in CQE_RX descriptor. + * This gets appended by default on 81xx/83xx chips, + * for consistency enabling the same on 88xx pass2 + * where this is introduced. + */ + if (pass2_silicon(nic->pdev)) + nic_reg_write(nic, NIC_PF_RX_CFG, 0x01); break; case NIC_MBOX_MSG_RQ_BP_CFG: reg_addr = NIC_PF_QSET_0_127_RQ_0_7_BP_CFG | diff --git a/drivers/net/ethernet/cavium/thunder/nic_reg.h b/drivers/net/ethernet/cavium/thunder/nic_reg.h index 833cf3d313fd..b4a79534a06b 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_reg.h +++ b/drivers/net/ethernet/cavium/thunder/nic_reg.h @@ -36,6 +36,7 @@ #define NIC_PF_MAILBOX_ENA_W1C (0x0450) #define NIC_PF_MAILBOX_ENA_W1S (0x0470) #define NIC_PF_RX_ETYPE_0_7 (0x0500) +#define NIC_PF_RX_CFG (0x05D0) #define NIC_PF_PKIND_0_15_CFG (0x0600) #define NIC_PF_ECC0_FLIP0 (0x1000) #define NIC_PF_ECC1_FLIP0 (0x1008) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index e521a94dc79f..ca223aa73825 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -1190,7 +1190,17 @@ struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx) u64 *rb_ptrs = NULL; rb_lens = (void *)cqe_rx + (3 * sizeof(u64)); - rb_ptrs = (void *)cqe_rx + (6 * sizeof(u64)); + /* Except 88xx pass1 on all other chips CQE_RX2_S is added to + * CQE_RX at word6, hence buffer pointers move by word + * + * Use existing 'hw_tso' flag which will be set for all chips + * except 88xx pass1 instead of a additional cache line + * access (or miss) by using pci dev's revision. + */ + if (!nic->hw_tso) + rb_ptrs = (void *)cqe_rx + (6 * sizeof(u64)); + else + rb_ptrs = (void *)cqe_rx + (7 * sizeof(u64)); netdev_dbg(nic->netdev, "%s rb_cnt %d rb0_ptr %llx rb0_sz %d\n", __func__, cqe_rx->rb_cnt, cqe_rx->rb0_ptr, cqe_rx->rb0_sz); From 52358aad36abb2bc9017d7319f2531b94429354d Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Fri, 12 Aug 2016 16:51:29 +0530 Subject: [PATCH 0175/1715] net: thunderx: Enable mailbox interrupts on 81xx/83xx 88xx has 128 VFs, 81xx has 8 VFs and 83xx will have 32VFs. Made changes to PF driver such that mailbox interrupt enable registers are configuired based on number of VFs HW supports. Also cleanedup mailbox irq handler registration code. Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- .../net/ethernet/cavium/thunder/nic_main.c | 88 +++++++++++-------- 1 file changed, 50 insertions(+), 38 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c index 3f52b364e12d..955c52261c59 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_main.c +++ b/drivers/net/ethernet/cavium/thunder/nic_main.c @@ -66,8 +66,9 @@ struct nicpf { /* MSI-X */ bool msix_enabled; u8 num_vec; - struct msix_entry msix_entries[NIC_PF_MSIX_VECTORS]; + struct msix_entry *msix_entries; bool irq_allocated[NIC_PF_MSIX_VECTORS]; + char irq_name[NIC_PF_MSIX_VECTORS][20]; }; /* Supported devices */ @@ -105,9 +106,22 @@ static u64 nic_reg_read(struct nicpf *nic, u64 offset) /* PF -> VF mailbox communication APIs */ static void nic_enable_mbx_intr(struct nicpf *nic) { - /* Enable mailbox interrupt for all 128 VFs */ - nic_reg_write(nic, NIC_PF_MAILBOX_ENA_W1S, ~0ull); - nic_reg_write(nic, NIC_PF_MAILBOX_ENA_W1S + sizeof(u64), ~0ull); + int vf_cnt = pci_sriov_get_totalvfs(nic->pdev); + +#define INTR_MASK(vfs) ((vfs < 64) ? (BIT_ULL(vfs) - 1) : (~0ull)) + + /* Clear it, to avoid spurious interrupts (if any) */ + nic_reg_write(nic, NIC_PF_MAILBOX_INT, INTR_MASK(vf_cnt)); + + /* Enable mailbox interrupt for all VFs */ + nic_reg_write(nic, NIC_PF_MAILBOX_ENA_W1S, INTR_MASK(vf_cnt)); + /* One mailbox intr enable reg per 64 VFs */ + if (vf_cnt > 64) { + nic_reg_write(nic, NIC_PF_MAILBOX_INT + sizeof(u64), + INTR_MASK(vf_cnt - 64)); + nic_reg_write(nic, NIC_PF_MAILBOX_ENA_W1S + sizeof(u64), + INTR_MASK(vf_cnt - 64)); + } } static void nic_clear_mbx_intr(struct nicpf *nic, int vf, int mbx_reg) @@ -894,11 +908,18 @@ unlock: nic->mbx_lock[vf] = false; } -static void nic_mbx_intr_handler (struct nicpf *nic, int mbx) +static irqreturn_t nic_mbx_intr_handler(int irq, void *nic_irq) { + struct nicpf *nic = (struct nicpf *)nic_irq; + int mbx; u64 intr; u8 vf, vf_per_mbx_reg = 64; + if (irq == nic->msix_entries[NIC_PF_INTR_ID_MBOX0].vector) + mbx = 0; + else + mbx = 1; + intr = nic_reg_read(nic, NIC_PF_MAILBOX_INT + (mbx << 3)); dev_dbg(&nic->pdev->dev, "PF interrupt Mbox%d 0x%llx\n", mbx, intr); for (vf = 0; vf < vf_per_mbx_reg; vf++) { @@ -910,23 +931,6 @@ static void nic_mbx_intr_handler (struct nicpf *nic, int mbx) nic_clear_mbx_intr(nic, vf, mbx); } } -} - -static irqreturn_t nic_mbx0_intr_handler (int irq, void *nic_irq) -{ - struct nicpf *nic = (struct nicpf *)nic_irq; - - nic_mbx_intr_handler(nic, 0); - - return IRQ_HANDLED; -} - -static irqreturn_t nic_mbx1_intr_handler (int irq, void *nic_irq) -{ - struct nicpf *nic = (struct nicpf *)nic_irq; - - nic_mbx_intr_handler(nic, 1); - return IRQ_HANDLED; } @@ -934,7 +938,13 @@ static int nic_enable_msix(struct nicpf *nic) { int i, ret; - nic->num_vec = NIC_PF_MSIX_VECTORS; + nic->num_vec = pci_msix_vec_count(nic->pdev); + + nic->msix_entries = kmalloc_array(nic->num_vec, + sizeof(struct msix_entry), + GFP_KERNEL); + if (!nic->msix_entries) + return -ENOMEM; for (i = 0; i < nic->num_vec; i++) nic->msix_entries[i].entry = i; @@ -942,8 +952,9 @@ static int nic_enable_msix(struct nicpf *nic) ret = pci_enable_msix(nic->pdev, nic->msix_entries, nic->num_vec); if (ret) { dev_err(&nic->pdev->dev, - "Request for #%d msix vectors failed\n", - nic->num_vec); + "Request for #%d msix vectors failed, returned %d\n", + nic->num_vec, ret); + kfree(nic->msix_entries); return ret; } @@ -955,6 +966,7 @@ static void nic_disable_msix(struct nicpf *nic) { if (nic->msix_enabled) { pci_disable_msix(nic->pdev); + kfree(nic->msix_entries); nic->msix_enabled = 0; nic->num_vec = 0; } @@ -973,27 +985,26 @@ static void nic_free_all_interrupts(struct nicpf *nic) static int nic_register_interrupts(struct nicpf *nic) { - int ret; + int i, ret; /* Enable MSI-X */ ret = nic_enable_msix(nic); if (ret) return ret; - /* Register mailbox interrupt handlers */ - ret = request_irq(nic->msix_entries[NIC_PF_INTR_ID_MBOX0].vector, - nic_mbx0_intr_handler, 0, "NIC Mbox0", nic); - if (ret) - goto fail; + /* Register mailbox interrupt handler */ + for (i = NIC_PF_INTR_ID_MBOX0; i < nic->num_vec; i++) { + sprintf(nic->irq_name[i], + "NICPF Mbox%d", (i - NIC_PF_INTR_ID_MBOX0)); - nic->irq_allocated[NIC_PF_INTR_ID_MBOX0] = true; + ret = request_irq(nic->msix_entries[i].vector, + nic_mbx_intr_handler, 0, + nic->irq_name[i], nic); + if (ret) + goto fail; - ret = request_irq(nic->msix_entries[NIC_PF_INTR_ID_MBOX1].vector, - nic_mbx1_intr_handler, 0, "NIC Mbox1", nic); - if (ret) - goto fail; - - nic->irq_allocated[NIC_PF_INTR_ID_MBOX1] = true; + nic->irq_allocated[i] = true; + } /* Enable mailbox interrupt */ nic_enable_mbx_intr(nic); @@ -1002,6 +1013,7 @@ static int nic_register_interrupts(struct nicpf *nic) fail: dev_err(&nic->pdev->dev, "Request irq failed\n"); nic_free_all_interrupts(nic); + nic_disable_msix(nic); return ret; } From 0bcb7d510c857a7ecfe4196e7900786057255c5b Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Fri, 12 Aug 2016 16:51:30 +0530 Subject: [PATCH 0176/1715] net: thunderx: Support for different LMAC types within BGX On 88xx all LMACs in a BGX will be in same mode but on 81xx BGX can be split as two and there can be LMACs configured in different modes. These changes move lmac_type, lane2serdes fields into per lmac struct from BGX struct. Got rid of qlm_mode field which has become redundant with these changes. And now no of valid LMACs is read from CSRs configured by low level firmware and figuring out the same based on QLM mode is discarded Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- .../net/ethernet/cavium/thunder/thunder_bgx.c | 240 ++++++++---------- .../net/ethernet/cavium/thunder/thunder_bgx.h | 10 - 2 files changed, 106 insertions(+), 144 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 63a39ac97d53..44974270093d 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -28,6 +28,9 @@ struct lmac { struct bgx *bgx; int dmac; u8 mac[ETH_ALEN]; + u8 lmac_type; + u8 lane_to_sds; + bool use_training; bool link_up; int lmacid; /* ID within BGX */ int lmacid_bd; /* ID on board */ @@ -43,12 +46,8 @@ struct lmac { struct bgx { u8 bgx_id; - u8 qlm_mode; struct lmac lmac[MAX_LMAC_PER_BGX]; int lmac_count; - int lmac_type; - int lane_to_sds; - int use_training; void __iomem *reg_base; struct pci_dev *pdev; }; @@ -418,9 +417,10 @@ static int bgx_lmac_sgmii_init(struct bgx *bgx, int lmacid) return 0; } -static int bgx_lmac_xaui_init(struct bgx *bgx, int lmacid, int lmac_type) +static int bgx_lmac_xaui_init(struct bgx *bgx, struct lmac *lmac) { u64 cfg; + int lmacid = lmac->lmacid; /* Reset SPU */ bgx_reg_modify(bgx, lmacid, BGX_SPUX_CONTROL1, SPU_CTL_RESET); @@ -436,7 +436,7 @@ static int bgx_lmac_xaui_init(struct bgx *bgx, int lmacid, int lmac_type) bgx_reg_modify(bgx, lmacid, BGX_SPUX_CONTROL1, SPU_CTL_LOW_POWER); /* Set interleaved running disparity for RXAUI */ - if (bgx->lmac_type != BGX_MODE_RXAUI) + if (lmac->lmac_type != BGX_MODE_RXAUI) bgx_reg_modify(bgx, lmacid, BGX_SPUX_MISC_CONTROL, SPU_MISC_CTL_RX_DIS); else @@ -451,7 +451,7 @@ static int bgx_lmac_xaui_init(struct bgx *bgx, int lmacid, int lmac_type) cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_INT); bgx_reg_write(bgx, lmacid, BGX_SPUX_INT, cfg); - if (bgx->use_training) { + if (lmac->use_training) { bgx_reg_write(bgx, lmacid, BGX_SPUX_BR_PMD_LP_CUP, 0x00); bgx_reg_write(bgx, lmacid, BGX_SPUX_BR_PMD_LD_CUP, 0x00); bgx_reg_write(bgx, lmacid, BGX_SPUX_BR_PMD_LD_REP, 0x00); @@ -474,9 +474,9 @@ static int bgx_lmac_xaui_init(struct bgx *bgx, int lmacid, int lmac_type) bgx_reg_write(bgx, lmacid, BGX_SPUX_AN_CONTROL, cfg); cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_AN_ADV); - if (bgx->lmac_type == BGX_MODE_10G_KR) + if (lmac->lmac_type == BGX_MODE_10G_KR) cfg |= (1 << 23); - else if (bgx->lmac_type == BGX_MODE_40G_KR) + else if (lmac->lmac_type == BGX_MODE_40G_KR) cfg |= (1 << 24); else cfg &= ~((1 << 23) | (1 << 24)); @@ -511,11 +511,11 @@ static int bgx_xaui_check_link(struct lmac *lmac) { struct bgx *bgx = lmac->bgx; int lmacid = lmac->lmacid; - int lmac_type = bgx->lmac_type; + int lmac_type = lmac->lmac_type; u64 cfg; bgx_reg_modify(bgx, lmacid, BGX_SPUX_MISC_CONTROL, SPU_MISC_CTL_RX_DIS); - if (bgx->use_training) { + if (lmac->use_training) { cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_INT); if (!(cfg & (1ull << 13))) { cfg = (1ull << 13) | (1ull << 14); @@ -556,7 +556,7 @@ static int bgx_xaui_check_link(struct lmac *lmac) BGX_SPUX_STATUS2, SPU_STATUS2_RCVFLT); if (bgx_reg_read(bgx, lmacid, BGX_SPUX_STATUS2) & SPU_STATUS2_RCVFLT) { dev_err(&bgx->pdev->dev, "Receive fault, retry training\n"); - if (bgx->use_training) { + if (lmac->use_training) { cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_INT); if (!(cfg & (1ull << 13))) { cfg = (1ull << 13) | (1ull << 14); @@ -599,7 +599,7 @@ static int bgx_xaui_check_link(struct lmac *lmac) /* Rx local/remote fault seen. * Do lmac reinit to see if condition recovers */ - bgx_lmac_xaui_init(bgx, lmacid, bgx->lmac_type); + bgx_lmac_xaui_init(bgx, lmac); return -1; } @@ -623,7 +623,7 @@ static void bgx_poll_for_link(struct work_struct *work) if ((spu_link & SPU_STATUS1_RCV_LNK) && !(smu_link & SMU_RX_CTL_STATUS)) { lmac->link_up = 1; - if (lmac->bgx->lmac_type == BGX_MODE_XLAUI) + if (lmac->lmac_type == BGX_MODE_XLAUI) lmac->last_speed = 40000; else lmac->last_speed = 10000; @@ -657,13 +657,13 @@ static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid) lmac = &bgx->lmac[lmacid]; lmac->bgx = bgx; - if (bgx->lmac_type == BGX_MODE_SGMII) { + if (lmac->lmac_type == BGX_MODE_SGMII) { lmac->is_sgmii = 1; if (bgx_lmac_sgmii_init(bgx, lmacid)) return -1; } else { lmac->is_sgmii = 0; - if (bgx_lmac_xaui_init(bgx, lmacid, bgx->lmac_type)) + if (bgx_lmac_xaui_init(bgx, lmac)) return -1; } @@ -685,10 +685,10 @@ static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid) /* Restore default cfg, incase low level firmware changed it */ bgx_reg_write(bgx, lmacid, BGX_CMRX_RX_DMAC_CTL, 0x03); - if ((bgx->lmac_type != BGX_MODE_XFI) && - (bgx->lmac_type != BGX_MODE_XLAUI) && - (bgx->lmac_type != BGX_MODE_40G_KR) && - (bgx->lmac_type != BGX_MODE_10G_KR)) { + if ((lmac->lmac_type != BGX_MODE_XFI) && + (lmac->lmac_type != BGX_MODE_XLAUI) && + (lmac->lmac_type != BGX_MODE_40G_KR) && + (lmac->lmac_type != BGX_MODE_10G_KR)) { if (!lmac->phydev) return -ENODEV; @@ -753,76 +753,19 @@ static void bgx_lmac_disable(struct bgx *bgx, u8 lmacid) bgx_flush_dmac_addrs(bgx, lmacid); - if ((bgx->lmac_type != BGX_MODE_XFI) && - (bgx->lmac_type != BGX_MODE_XLAUI) && - (bgx->lmac_type != BGX_MODE_40G_KR) && - (bgx->lmac_type != BGX_MODE_10G_KR) && lmac->phydev) + if ((lmac->lmac_type != BGX_MODE_XFI) && + (lmac->lmac_type != BGX_MODE_XLAUI) && + (lmac->lmac_type != BGX_MODE_40G_KR) && + (lmac->lmac_type != BGX_MODE_10G_KR) && lmac->phydev) phy_disconnect(lmac->phydev); lmac->phydev = NULL; } -static void bgx_set_num_ports(struct bgx *bgx) -{ - u64 lmac_count; - - switch (bgx->qlm_mode) { - case QLM_MODE_SGMII: - bgx->lmac_count = 4; - bgx->lmac_type = BGX_MODE_SGMII; - bgx->lane_to_sds = 0; - break; - case QLM_MODE_XAUI_1X4: - bgx->lmac_count = 1; - bgx->lmac_type = BGX_MODE_XAUI; - bgx->lane_to_sds = 0xE4; - break; - case QLM_MODE_RXAUI_2X2: - bgx->lmac_count = 2; - bgx->lmac_type = BGX_MODE_RXAUI; - bgx->lane_to_sds = 0xE4; - break; - case QLM_MODE_XFI_4X1: - bgx->lmac_count = 4; - bgx->lmac_type = BGX_MODE_XFI; - bgx->lane_to_sds = 0; - break; - case QLM_MODE_XLAUI_1X4: - bgx->lmac_count = 1; - bgx->lmac_type = BGX_MODE_XLAUI; - bgx->lane_to_sds = 0xE4; - break; - case QLM_MODE_10G_KR_4X1: - bgx->lmac_count = 4; - bgx->lmac_type = BGX_MODE_10G_KR; - bgx->lane_to_sds = 0; - bgx->use_training = 1; - break; - case QLM_MODE_40G_KR4_1X4: - bgx->lmac_count = 1; - bgx->lmac_type = BGX_MODE_40G_KR; - bgx->lane_to_sds = 0xE4; - bgx->use_training = 1; - break; - default: - bgx->lmac_count = 0; - break; - } - - /* Check if low level firmware has programmed LMAC count - * based on board type, if yes consider that otherwise - * the default static values - */ - lmac_count = bgx_reg_read(bgx, 0, BGX_CMR_RX_LMACS) & 0x7; - if (lmac_count != 4) - bgx->lmac_count = lmac_count; -} - static void bgx_init_hw(struct bgx *bgx) { int i; - - bgx_set_num_ports(bgx); + struct lmac *lmac; bgx_reg_modify(bgx, 0, BGX_CMR_GLOBAL_CFG, CMR_GLOBAL_CFG_FCS_STRIP); if (bgx_reg_read(bgx, 0, BGX_CMR_BIST_STATUS)) @@ -830,17 +773,9 @@ static void bgx_init_hw(struct bgx *bgx) /* Set lmac type and lane2serdes mapping */ for (i = 0; i < bgx->lmac_count; i++) { - if (bgx->lmac_type == BGX_MODE_RXAUI) { - if (i) - bgx->lane_to_sds = 0x0e; - else - bgx->lane_to_sds = 0x04; - bgx_reg_write(bgx, i, BGX_CMRX_CFG, - (bgx->lmac_type << 8) | bgx->lane_to_sds); - continue; - } + lmac = &bgx->lmac[i]; bgx_reg_write(bgx, i, BGX_CMRX_CFG, - (bgx->lmac_type << 8) | (bgx->lane_to_sds + i)); + (lmac->lmac_type << 8) | lmac->lane_to_sds); bgx->lmac[i].lmacid_bd = lmac_count; lmac_count++; } @@ -863,56 +798,93 @@ static void bgx_init_hw(struct bgx *bgx) bgx_reg_write(bgx, 0, BGX_CMR_RX_STREERING + (i * 8), 0x00); } -static void bgx_get_qlm_mode(struct bgx *bgx) +static void bgx_print_qlm_mode(struct bgx *bgx, u8 lmacid) { struct device *dev = &bgx->pdev->dev; - int lmac_type; - int train_en; + struct lmac *lmac; + char str[20]; + + lmac = &bgx->lmac[lmacid]; + sprintf(str, "BGX%d QLM mode", bgx->bgx_id); + + switch (lmac->lmac_type) { + case BGX_MODE_SGMII: + dev_info(dev, "%s: SGMII\n", (char *)str); + break; + case BGX_MODE_XAUI: + dev_info(dev, "%s: XAUI\n", (char *)str); + break; + case BGX_MODE_RXAUI: + dev_info(dev, "%s: RXAUI\n", (char *)str); + break; + case BGX_MODE_XFI: + if (!lmac->use_training) + dev_info(dev, "%s: XFI\n", (char *)str); + else + dev_info(dev, "%s: 10G_KR\n", (char *)str); + break; + case BGX_MODE_XLAUI: + if (!lmac->use_training) + dev_info(dev, "%s: XLAUI\n", (char *)str); + else + dev_info(dev, "%s: 40G_KR4\n", (char *)str); + break; + default: + dev_info(dev, "%s: INVALID\n", (char *)str); + } +} + +static void lmac_set_lane2sds(struct lmac *lmac) +{ + switch (lmac->lmac_type) { + case BGX_MODE_SGMII: + case BGX_MODE_XFI: + lmac->lane_to_sds = lmac->lmacid; + break; + case BGX_MODE_XAUI: + case BGX_MODE_XLAUI: + lmac->lane_to_sds = 0xE4; + break; + case BGX_MODE_RXAUI: + lmac->lane_to_sds = (lmac->lmacid) ? 0xE : 0x4; + break; + default: + lmac->lane_to_sds = 0; + break; + } +} + +static void bgx_set_lmac_config(struct bgx *bgx, u8 idx) +{ + struct lmac *lmac; + u64 cmr_cfg; + + lmac = &bgx->lmac[idx]; + lmac->lmacid = idx; /* Read LMAC0 type to figure out QLM mode * This is configured by low level firmware */ - lmac_type = bgx_reg_read(bgx, 0, BGX_CMRX_CFG); - lmac_type = (lmac_type >> 8) & 0x07; + cmr_cfg = bgx_reg_read(bgx, 0, BGX_CMRX_CFG); + lmac->lmac_type = (cmr_cfg >> 8) & 0x07; + lmac->use_training = + bgx_reg_read(bgx, 0, BGX_SPUX_BR_PMD_CRTL) & + SPU_PMD_CRTL_TRAIN_EN; + lmac_set_lane2sds(lmac); +} - train_en = bgx_reg_read(bgx, 0, BGX_SPUX_BR_PMD_CRTL) & - SPU_PMD_CRTL_TRAIN_EN; +static void bgx_get_qlm_mode(struct bgx *bgx) +{ + u8 idx; - switch (lmac_type) { - case BGX_MODE_SGMII: - bgx->qlm_mode = QLM_MODE_SGMII; - dev_info(dev, "BGX%d QLM mode: SGMII\n", bgx->bgx_id); - break; - case BGX_MODE_XAUI: - bgx->qlm_mode = QLM_MODE_XAUI_1X4; - dev_info(dev, "BGX%d QLM mode: XAUI\n", bgx->bgx_id); - break; - case BGX_MODE_RXAUI: - bgx->qlm_mode = QLM_MODE_RXAUI_2X2; - dev_info(dev, "BGX%d QLM mode: RXAUI\n", bgx->bgx_id); - break; - case BGX_MODE_XFI: - if (!train_en) { - bgx->qlm_mode = QLM_MODE_XFI_4X1; - dev_info(dev, "BGX%d QLM mode: XFI\n", bgx->bgx_id); - } else { - bgx->qlm_mode = QLM_MODE_10G_KR_4X1; - dev_info(dev, "BGX%d QLM mode: 10G_KR\n", bgx->bgx_id); - } - break; - case BGX_MODE_XLAUI: - if (!train_en) { - bgx->qlm_mode = QLM_MODE_XLAUI_1X4; - dev_info(dev, "BGX%d QLM mode: XLAUI\n", bgx->bgx_id); - } else { - bgx->qlm_mode = QLM_MODE_40G_KR4_1X4; - dev_info(dev, "BGX%d QLM mode: 40G_KR4\n", bgx->bgx_id); - } - break; - default: - bgx->qlm_mode = QLM_MODE_SGMII; - dev_info(dev, "BGX%d QLM default mode: SGMII\n", bgx->bgx_id); - } + /* It is assumed that low level firmware sets this value */ + bgx->lmac_count = bgx_reg_read(bgx, 0, BGX_CMR_RX_LMACS) & 0x7; + if (bgx->lmac_count > MAX_LMAC_PER_BGX) + bgx->lmac_count = MAX_LMAC_PER_BGX; + + for (idx = 0; idx < bgx->lmac_count; idx++) + bgx_set_lmac_config(bgx, idx); + bgx_print_qlm_mode(bgx, 0); } #ifdef CONFIG_ACPI diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h index 9f5e49a91783..b7b91c857dba 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h @@ -217,14 +217,4 @@ enum LMAC_TYPE { BGX_MODE_40G_KR = 4,/* 4 lanes, 10.3125 Gbaud */ }; -enum qlm_mode { - QLM_MODE_SGMII, /* SGMII, each lane independent */ - QLM_MODE_XAUI_1X4, /* 1 XAUI or DXAUI, 4 lanes */ - QLM_MODE_RXAUI_2X2, /* 2 RXAUI, 2 lanes each */ - QLM_MODE_XFI_4X1, /* 4 XFI, 1 lane each */ - QLM_MODE_XLAUI_1X4, /* 1 XLAUI, 4 lanes each */ - QLM_MODE_10G_KR_4X1, /* 4 10GBASE-KR, 1 lane each */ - QLM_MODE_40G_KR4_1X4, /* 1 40GBASE-KR4, 4 lanes each */ -}; - #endif /* THUNDER_BGX_H */ From 57aaf63cb13ca342bfeba6772df3a4e05a35e4ab Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Fri, 12 Aug 2016 16:51:31 +0530 Subject: [PATCH 0177/1715] net: thunderx: Add 81xx support to BGX driver This patch adds support for BGX module on 81xx where a BGX can be split and have different LMACs configured in different modes. Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- .../net/ethernet/cavium/thunder/thunder_bgx.c | 112 ++++++++++++++++-- .../net/ethernet/cavium/thunder/thunder_bgx.h | 11 ++ 2 files changed, 113 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 44974270093d..9c3c273f4535 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -50,6 +50,7 @@ struct bgx { int lmac_count; void __iomem *reg_base; struct pci_dev *pdev; + bool is_81xx; }; static struct bgx *bgx_vnic[MAX_BGX_THUNDER]; @@ -803,9 +804,17 @@ static void bgx_print_qlm_mode(struct bgx *bgx, u8 lmacid) struct device *dev = &bgx->pdev->dev; struct lmac *lmac; char str[20]; + u8 dlm; + + if (lmacid > MAX_LMAC_PER_BGX) + return; lmac = &bgx->lmac[lmacid]; - sprintf(str, "BGX%d QLM mode", bgx->bgx_id); + dlm = (lmacid / 2) + (bgx->bgx_id * 2); + if (!bgx->is_81xx) + sprintf(str, "BGX%d QLM mode", bgx->bgx_id); + else + sprintf(str, "BGX%d DLM%d mode", bgx->bgx_id, dlm); switch (lmac->lmac_type) { case BGX_MODE_SGMII: @@ -857,26 +866,81 @@ static void lmac_set_lane2sds(struct lmac *lmac) static void bgx_set_lmac_config(struct bgx *bgx, u8 idx) { struct lmac *lmac; + struct lmac *olmac; u64 cmr_cfg; + u8 lmac_type; + u8 lane_to_sds; lmac = &bgx->lmac[idx]; - lmac->lmacid = idx; - /* Read LMAC0 type to figure out QLM mode - * This is configured by low level firmware + if (!bgx->is_81xx) { + /* Read LMAC0 type to figure out QLM mode + * This is configured by low level firmware + */ + cmr_cfg = bgx_reg_read(bgx, 0, BGX_CMRX_CFG); + lmac->lmac_type = (cmr_cfg >> 8) & 0x07; + lmac->use_training = + bgx_reg_read(bgx, 0, BGX_SPUX_BR_PMD_CRTL) & + SPU_PMD_CRTL_TRAIN_EN; + lmac_set_lane2sds(lmac); + return; + } + + /* On 81xx BGX can be split across 2 DLMs + * firmware programs lmac_type of LMAC0 and LMAC2 */ - cmr_cfg = bgx_reg_read(bgx, 0, BGX_CMRX_CFG); - lmac->lmac_type = (cmr_cfg >> 8) & 0x07; - lmac->use_training = - bgx_reg_read(bgx, 0, BGX_SPUX_BR_PMD_CRTL) & + if ((idx == 0) || (idx == 2)) { + cmr_cfg = bgx_reg_read(bgx, idx, BGX_CMRX_CFG); + lmac_type = (u8)((cmr_cfg >> 8) & 0x07); + lane_to_sds = (u8)(cmr_cfg & 0xFF); + /* Check if config is not reset value */ + if ((lmac_type == 0) && (lane_to_sds == 0xE4)) + lmac->lmac_type = BGX_MODE_INVALID; + else + lmac->lmac_type = lmac_type; + lmac->use_training = + bgx_reg_read(bgx, idx, BGX_SPUX_BR_PMD_CRTL) & + SPU_PMD_CRTL_TRAIN_EN; + lmac_set_lane2sds(lmac); + + /* Set LMAC type of other lmac on same DLM i.e LMAC 1/3 */ + olmac = &bgx->lmac[idx + 1]; + olmac->lmac_type = lmac->lmac_type; + olmac->use_training = + bgx_reg_read(bgx, idx + 1, BGX_SPUX_BR_PMD_CRTL) & SPU_PMD_CRTL_TRAIN_EN; - lmac_set_lane2sds(lmac); + lmac_set_lane2sds(olmac); + } +} + +static bool is_dlm0_in_bgx_mode(struct bgx *bgx) +{ + struct lmac *lmac; + + if (!bgx->is_81xx) + return true; + + lmac = &bgx->lmac[1]; + if (lmac->lmac_type == BGX_MODE_INVALID) + return false; + + return true; } static void bgx_get_qlm_mode(struct bgx *bgx) { + struct lmac *lmac; + struct lmac *lmac01; + struct lmac *lmac23; u8 idx; + /* Init all LMAC's type to invalid */ + for (idx = 0; idx < MAX_LMAC_PER_BGX; idx++) { + lmac = &bgx->lmac[idx]; + lmac->lmac_type = BGX_MODE_INVALID; + lmac->lmacid = idx; + } + /* It is assumed that low level firmware sets this value */ bgx->lmac_count = bgx_reg_read(bgx, 0, BGX_CMR_RX_LMACS) & 0x7; if (bgx->lmac_count > MAX_LMAC_PER_BGX) @@ -884,7 +948,28 @@ static void bgx_get_qlm_mode(struct bgx *bgx) for (idx = 0; idx < bgx->lmac_count; idx++) bgx_set_lmac_config(bgx, idx); - bgx_print_qlm_mode(bgx, 0); + + if (!bgx->is_81xx) { + bgx_print_qlm_mode(bgx, 0); + return; + } + + if (bgx->lmac_count) { + bgx_print_qlm_mode(bgx, 0); + bgx_print_qlm_mode(bgx, 2); + } + + /* If DLM0 is not in BGX mode then LMAC0/1 have + * to be configured with serdes lanes of DLM1 + */ + if (is_dlm0_in_bgx_mode(bgx) || (bgx->lmac_count > 2)) + return; + for (idx = 0; idx < bgx->lmac_count; idx++) { + lmac01 = &bgx->lmac[idx]; + lmac23 = &bgx->lmac[idx + 2]; + lmac01->lmac_type = lmac23->lmac_type; + lmac01->lane_to_sds = lmac23->lane_to_sds; + } } #ifdef CONFIG_ACPI @@ -1059,6 +1144,7 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct device *dev = &pdev->dev; struct bgx *bgx = NULL; u8 lmac; + u16 sdevid; bgx = devm_kzalloc(dev, sizeof(*bgx), GFP_KERNEL); if (!bgx) @@ -1080,6 +1166,10 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_disable_device; } + pci_read_config_word(pdev, PCI_SUBSYSTEM_ID, &sdevid); + if (sdevid == PCI_SUBSYS_DEVID_81XX_BGX) + bgx->is_81xx = true; + /* MAP configuration registers */ bgx->reg_base = pcim_iomap(pdev, PCI_CFG_REG_BAR_NUM, 0); if (!bgx->reg_base) { @@ -1105,6 +1195,8 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) { dev_err(dev, "BGX%d failed to enable lmac%d\n", bgx->bgx_id, lmac); + while (lmac) + bgx_lmac_disable(bgx, --lmac); goto err_enable; } } diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h index b7b91c857dba..38e9fb4ae8c4 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h @@ -9,6 +9,14 @@ #ifndef THUNDER_BGX_H #define THUNDER_BGX_H +/* PCI device ID */ +#define PCI_DEVICE_ID_THUNDER_BGX 0xA026 + +/* Subsystem device IDs */ +#define PCI_SUBSYS_DEVID_88XX_BGX 0xA126 +#define PCI_SUBSYS_DEVID_81XX_BGX 0xA226 +#define PCI_SUBSYS_DEVID_83XX_BGX 0xA326 + #define MAX_BGX_THUNDER 8 /* Max 4 nodes, 2 per node */ #define MAX_BGX_PER_CN88XX 2 #define MAX_BGX_PER_CN81XX 2 @@ -215,6 +223,9 @@ enum LMAC_TYPE { BGX_MODE_XLAUI = 4, /* 4 lanes, 10.3125 Gbaud */ BGX_MODE_10G_KR = 3,/* 1 lane, 10.3125 Gbaud */ BGX_MODE_40G_KR = 4,/* 4 lanes, 10.3125 Gbaud */ + BGX_MODE_RGMII = 5, + BGX_MODE_QSGMII = 6, + BGX_MODE_INVALID = 7, }; #endif /* THUNDER_BGX_H */ From 3f8057cfe8adc12bd272fe2c67e3c9ab9cff0a12 Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Fri, 12 Aug 2016 16:51:32 +0530 Subject: [PATCH 0178/1715] net: thunderx: Add QSGMII interface type support This patch adds support for QSGMII interface type to the BGX driver. This type of interface is supported by 81xx SOC. Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- .../net/ethernet/cavium/thunder/thunder_bgx.c | 65 +++++++++++++++---- .../net/ethernet/cavium/thunder/thunder_bgx.h | 1 + 2 files changed, 54 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 9c3c273f4535..0bf8d244185b 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -379,8 +379,9 @@ void bgx_lmac_internal_loopback(int node, int bgx_idx, } EXPORT_SYMBOL(bgx_lmac_internal_loopback); -static int bgx_lmac_sgmii_init(struct bgx *bgx, int lmacid) +static int bgx_lmac_sgmii_init(struct bgx *bgx, struct lmac *lmac) { + int lmacid = lmac->lmacid; u64 cfg; bgx_reg_modify(bgx, lmacid, BGX_GMP_GMI_TXX_THRESH, 0x30); @@ -409,6 +410,14 @@ static int bgx_lmac_sgmii_init(struct bgx *bgx, int lmacid) cfg |= (PCS_MRX_CTL_RST_AN | PCS_MRX_CTL_AN_EN); bgx_reg_write(bgx, lmacid, BGX_GMP_PCS_MRX_CTL, cfg); + if (lmac->lmac_type == BGX_MODE_QSGMII) { + /* Disable disparity check for QSGMII */ + cfg = bgx_reg_read(bgx, lmacid, BGX_GMP_PCS_MISCX_CTL); + cfg &= ~PCS_MISC_CTL_DISP_EN; + bgx_reg_write(bgx, lmacid, BGX_GMP_PCS_MISCX_CTL, cfg); + return 0; + } + if (bgx_poll_reg(bgx, lmacid, BGX_GMP_PCS_MRX_STATUS, PCS_MRX_STATUS_AN_CPT, false)) { dev_err(&bgx->pdev->dev, "BGX AN_CPT not completed\n"); @@ -650,6 +659,14 @@ static void bgx_poll_for_link(struct work_struct *work) queue_delayed_work(lmac->check_link, &lmac->dwork, HZ * 2); } +static int phy_interface_mode(u8 lmac_type) +{ + if (lmac_type == BGX_MODE_QSGMII) + return PHY_INTERFACE_MODE_QSGMII; + + return PHY_INTERFACE_MODE_SGMII; +} + static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid) { struct lmac *lmac; @@ -658,9 +675,10 @@ static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid) lmac = &bgx->lmac[lmacid]; lmac->bgx = bgx; - if (lmac->lmac_type == BGX_MODE_SGMII) { + if ((lmac->lmac_type == BGX_MODE_SGMII) || + (lmac->lmac_type == BGX_MODE_QSGMII)) { lmac->is_sgmii = 1; - if (bgx_lmac_sgmii_init(bgx, lmacid)) + if (bgx_lmac_sgmii_init(bgx, lmac)) return -1; } else { lmac->is_sgmii = 0; @@ -697,7 +715,7 @@ static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid) if (phy_connect_direct(&lmac->netdev, lmac->phydev, bgx_lmac_handler, - PHY_INTERFACE_MODE_SGMII)) + phy_interface_mode(lmac->lmac_type))) return -ENODEV; phy_start_aneg(lmac->phydev); @@ -799,6 +817,11 @@ static void bgx_init_hw(struct bgx *bgx) bgx_reg_write(bgx, 0, BGX_CMR_RX_STREERING + (i * 8), 0x00); } +static u8 bgx_get_lane2sds_cfg(struct bgx *bgx, struct lmac *lmac) +{ + return (u8)(bgx_reg_read(bgx, lmac->lmacid, BGX_CMRX_CFG) & 0xFF); +} + static void bgx_print_qlm_mode(struct bgx *bgx, u8 lmacid) { struct device *dev = &bgx->pdev->dev; @@ -838,12 +861,22 @@ static void bgx_print_qlm_mode(struct bgx *bgx, u8 lmacid) else dev_info(dev, "%s: 40G_KR4\n", (char *)str); break; - default: - dev_info(dev, "%s: INVALID\n", (char *)str); + case BGX_MODE_QSGMII: + if ((lmacid == 0) && + (bgx_get_lane2sds_cfg(bgx, lmac) != lmacid)) + return; + if ((lmacid == 2) && + (bgx_get_lane2sds_cfg(bgx, lmac) == lmacid)) + return; + dev_info(dev, "%s: QSGMII\n", (char *)str); + break; + case BGX_MODE_INVALID: + /* Nothing to do */ + break; } } -static void lmac_set_lane2sds(struct lmac *lmac) +static void lmac_set_lane2sds(struct bgx *bgx, struct lmac *lmac) { switch (lmac->lmac_type) { case BGX_MODE_SGMII: @@ -857,6 +890,14 @@ static void lmac_set_lane2sds(struct lmac *lmac) case BGX_MODE_RXAUI: lmac->lane_to_sds = (lmac->lmacid) ? 0xE : 0x4; break; + case BGX_MODE_QSGMII: + /* There is no way to determine if DLM0/2 is QSGMII or + * DLM1/3 is configured to QSGMII as bootloader will + * configure all LMACs, so take whatever is configured + * by low level firmware. + */ + lmac->lane_to_sds = bgx_get_lane2sds_cfg(bgx, lmac); + break; default: lmac->lane_to_sds = 0; break; @@ -882,7 +923,7 @@ static void bgx_set_lmac_config(struct bgx *bgx, u8 idx) lmac->use_training = bgx_reg_read(bgx, 0, BGX_SPUX_BR_PMD_CRTL) & SPU_PMD_CRTL_TRAIN_EN; - lmac_set_lane2sds(lmac); + lmac_set_lane2sds(bgx, lmac); return; } @@ -901,7 +942,7 @@ static void bgx_set_lmac_config(struct bgx *bgx, u8 idx) lmac->use_training = bgx_reg_read(bgx, idx, BGX_SPUX_BR_PMD_CRTL) & SPU_PMD_CRTL_TRAIN_EN; - lmac_set_lane2sds(lmac); + lmac_set_lane2sds(bgx, lmac); /* Set LMAC type of other lmac on same DLM i.e LMAC 1/3 */ olmac = &bgx->lmac[idx + 1]; @@ -909,7 +950,7 @@ static void bgx_set_lmac_config(struct bgx *bgx, u8 idx) olmac->use_training = bgx_reg_read(bgx, idx + 1, BGX_SPUX_BR_PMD_CRTL) & SPU_PMD_CRTL_TRAIN_EN; - lmac_set_lane2sds(olmac); + lmac_set_lane2sds(bgx, olmac); } } @@ -920,7 +961,7 @@ static bool is_dlm0_in_bgx_mode(struct bgx *bgx) if (!bgx->is_81xx) return true; - lmac = &bgx->lmac[1]; + lmac = &bgx->lmac[0]; if (lmac->lmac_type == BGX_MODE_INVALID) return false; @@ -946,7 +987,7 @@ static void bgx_get_qlm_mode(struct bgx *bgx) if (bgx->lmac_count > MAX_LMAC_PER_BGX) bgx->lmac_count = MAX_LMAC_PER_BGX; - for (idx = 0; idx < bgx->lmac_count; idx++) + for (idx = 0; idx < MAX_LMAC_PER_BGX; idx++) bgx_set_lmac_config(bgx, idx); if (!bgx->is_81xx) { diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h index 38e9fb4ae8c4..0705863d2915 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h @@ -146,6 +146,7 @@ #define BGX_GMP_PCS_ANX_AN_RESULTS 0x30020 #define BGX_GMP_PCS_SGM_AN_ADV 0x30068 #define BGX_GMP_PCS_MISCX_CTL 0x30078 +#define PCS_MISC_CTL_DISP_EN BIT_ULL(13) #define PCS_MISC_CTL_GMX_ENO BIT_ULL(11) #define PCS_MISC_CTL_SAMP_PT_MASK 0x7Full #define BGX_GMP_GMI_PRTX_CFG 0x38020 From 6465859aba1e66a5351b047fbf40e9e9bcb6c669 Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Fri, 12 Aug 2016 16:51:33 +0530 Subject: [PATCH 0179/1715] net: thunderx: Add RGMII interface type support This patch adds RGX/RGMII interface type support to BGX driver. This type of interface is supported by 81xx SOC. CN81XX VNIC has 8 VFs and max possible LMAC interfaces are 9, hence RGMII interface will not work if all DLMs are in BGX mode and all 8 LMACs are enabled Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/Kconfig | 10 + drivers/net/ethernet/cavium/thunder/Makefile | 1 + .../net/ethernet/cavium/thunder/nic_main.c | 14 +- .../net/ethernet/cavium/thunder/thunder_bgx.c | 97 ++++--- .../net/ethernet/cavium/thunder/thunder_bgx.h | 6 +- .../net/ethernet/cavium/thunder/thunder_xcv.c | 237 ++++++++++++++++++ 6 files changed, 332 insertions(+), 33 deletions(-) create mode 100644 drivers/net/ethernet/cavium/thunder/thunder_xcv.c diff --git a/drivers/net/ethernet/cavium/Kconfig b/drivers/net/ethernet/cavium/Kconfig index 0ef232d3331e..e1b78b500309 100644 --- a/drivers/net/ethernet/cavium/Kconfig +++ b/drivers/net/ethernet/cavium/Kconfig @@ -36,10 +36,20 @@ config THUNDER_NIC_BGX depends on 64BIT select PHYLIB select MDIO_THUNDER + select THUNDER_NIC_RGX ---help--- This driver supports programming and controlling of MAC interface from NIC physical function driver. +config THUNDER_NIC_RGX + tristate "Thunder MAC interface driver (RGX)" + depends on 64BIT + select PHYLIB + select MDIO_THUNDER + ---help--- + This driver supports configuring XCV block of RGX interface + present on CN81XX chip. + config LIQUIDIO tristate "Cavium LiquidIO support" depends on 64BIT diff --git a/drivers/net/ethernet/cavium/thunder/Makefile b/drivers/net/ethernet/cavium/thunder/Makefile index 5c4615ccaa14..6b4d4add7353 100644 --- a/drivers/net/ethernet/cavium/thunder/Makefile +++ b/drivers/net/ethernet/cavium/thunder/Makefile @@ -2,6 +2,7 @@ # Makefile for Cavium's Thunder ethernet device # +obj-$(CONFIG_THUNDER_NIC_RGX) += thunder_xcv.o obj-$(CONFIG_THUNDER_NIC_BGX) += thunder_bgx.o obj-$(CONFIG_THUNDER_NIC_PF) += nicpf.o obj-$(CONFIG_THUNDER_NIC_VF) += nicvf.o diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c index 955c52261c59..91c575d418d0 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_main.c +++ b/drivers/net/ethernet/cavium/thunder/nic_main.c @@ -325,6 +325,14 @@ static void nic_set_lmac_vf_mapping(struct nicpf *nic) nic_reg_write(nic, NIC_PF_LMAC_0_7_CREDIT + (lmac * 8), lmac_credit); + + /* On CN81XX there are only 8 VFs but max possible no of + * interfaces are 9. + */ + if (nic->num_vf_en >= pci_sriov_get_totalvfs(nic->pdev)) { + nic->num_vf_en = pci_sriov_get_totalvfs(nic->pdev); + break; + } } } @@ -450,10 +458,8 @@ static void nic_config_cpi(struct nicpf *nic, struct cpi_cfg_msg *cfg) lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vnic]); chan = (lmac * hw->chans_per_lmac) + (bgx * hw->chans_per_bgx); - cpi_base = (lmac * NIC_MAX_CPI_PER_LMAC) + - (bgx * (hw->cpi_cnt / hw->bgx_cnt)); - rssi_base = (lmac * hw->rss_ind_tbl_size) + - (bgx * (hw->rssi_cnt / hw->bgx_cnt)); + cpi_base = vnic * NIC_MAX_CPI_PER_LMAC; + rssi_base = vnic * hw->rss_ind_tbl_size; /* Rx channel configuration */ nic_reg_write(nic, NIC_PF_CHAN_0_255_RX_BP_CFG | (chan << 3), diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 0bf8d244185b..4ddc760afe03 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -48,9 +48,11 @@ struct bgx { u8 bgx_id; struct lmac lmac[MAX_LMAC_PER_BGX]; int lmac_count; + u8 max_lmac; void __iomem *reg_base; struct pci_dev *pdev; bool is_81xx; + bool is_rgx; }; static struct bgx *bgx_vnic[MAX_BGX_THUNDER]; @@ -61,6 +63,7 @@ static int bgx_xaui_check_link(struct lmac *lmac); /* Supported devices */ static const struct pci_device_id bgx_id_table[] = { { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_BGX) }, + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_RGX) }, { 0, } /* end of table */ }; @@ -124,7 +127,7 @@ unsigned bgx_get_map(int node) int i; unsigned map = 0; - for (i = 0; i < MAX_BGX_PER_CN88XX; i++) { + for (i = 0; i < MAX_BGX_PER_CN81XX; i++) { if (bgx_vnic[(node * MAX_BGX_PER_CN88XX) + i]) map |= (1 << i); } @@ -189,10 +192,12 @@ EXPORT_SYMBOL(bgx_set_lmac_mac); void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable) { struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx]; + struct lmac *lmac; u64 cfg; if (!bgx) return; + lmac = &bgx->lmac[lmacid]; cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG); if (enable) @@ -200,6 +205,9 @@ void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable) else cfg &= ~(CMR_PKT_RX_EN | CMR_PKT_TX_EN); bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg); + + if (bgx->is_rgx) + xcv_setup_link(enable ? lmac->link_up : 0, lmac->last_speed); } EXPORT_SYMBOL(bgx_lmac_rx_tx_enable); @@ -266,9 +274,12 @@ static void bgx_sgmii_change_link_state(struct lmac *lmac) port_cfg = bgx_reg_read(bgx, lmac->lmacid, BGX_GMP_GMI_PRTX_CFG); - /* renable lmac */ + /* Re-enable lmac */ cmr_cfg |= CMR_EN; bgx_reg_write(bgx, lmac->lmacid, BGX_CMRX_CFG, cmr_cfg); + + if (bgx->is_rgx && (cmr_cfg & (CMR_PKT_RX_EN | CMR_PKT_TX_EN))) + xcv_setup_link(lmac->link_up, lmac->last_speed); } static void bgx_lmac_handler(struct net_device *netdev) @@ -418,10 +429,12 @@ static int bgx_lmac_sgmii_init(struct bgx *bgx, struct lmac *lmac) return 0; } - if (bgx_poll_reg(bgx, lmacid, BGX_GMP_PCS_MRX_STATUS, - PCS_MRX_STATUS_AN_CPT, false)) { - dev_err(&bgx->pdev->dev, "BGX AN_CPT not completed\n"); - return -1; + if (lmac->lmac_type == BGX_MODE_SGMII) { + if (bgx_poll_reg(bgx, lmacid, BGX_GMP_PCS_MRX_STATUS, + PCS_MRX_STATUS_AN_CPT, false)) { + dev_err(&bgx->pdev->dev, "BGX AN_CPT not completed\n"); + return -1; + } } return 0; @@ -663,6 +676,8 @@ static int phy_interface_mode(u8 lmac_type) { if (lmac_type == BGX_MODE_QSGMII) return PHY_INTERFACE_MODE_QSGMII; + if (lmac_type == BGX_MODE_RGMII) + return PHY_INTERFACE_MODE_RGMII; return PHY_INTERFACE_MODE_SGMII; } @@ -676,7 +691,8 @@ static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid) lmac->bgx = bgx; if ((lmac->lmac_type == BGX_MODE_SGMII) || - (lmac->lmac_type == BGX_MODE_QSGMII)) { + (lmac->lmac_type == BGX_MODE_QSGMII) || + (lmac->lmac_type == BGX_MODE_RGMII)) { lmac->is_sgmii = 1; if (bgx_lmac_sgmii_init(bgx, lmac)) return -1; @@ -829,7 +845,7 @@ static void bgx_print_qlm_mode(struct bgx *bgx, u8 lmacid) char str[20]; u8 dlm; - if (lmacid > MAX_LMAC_PER_BGX) + if (lmacid > bgx->max_lmac) return; lmac = &bgx->lmac[lmacid]; @@ -870,6 +886,9 @@ static void bgx_print_qlm_mode(struct bgx *bgx, u8 lmacid) return; dev_info(dev, "%s: QSGMII\n", (char *)str); break; + case BGX_MODE_RGMII: + dev_info(dev, "%s: RGMII\n", (char *)str); + break; case BGX_MODE_INVALID: /* Nothing to do */ break; @@ -885,6 +904,7 @@ static void lmac_set_lane2sds(struct bgx *bgx, struct lmac *lmac) break; case BGX_MODE_XAUI: case BGX_MODE_XLAUI: + case BGX_MODE_RGMII: lmac->lane_to_sds = 0xE4; break; case BGX_MODE_RXAUI: @@ -904,6 +924,18 @@ static void lmac_set_lane2sds(struct bgx *bgx, struct lmac *lmac) } } +static void lmac_set_training(struct bgx *bgx, struct lmac *lmac, int lmacid) +{ + if ((lmac->lmac_type != BGX_MODE_10G_KR) && + (lmac->lmac_type != BGX_MODE_40G_KR)) { + lmac->use_training = 0; + return; + } + + lmac->use_training = bgx_reg_read(bgx, lmacid, BGX_SPUX_BR_PMD_CRTL) & + SPU_PMD_CRTL_TRAIN_EN; +} + static void bgx_set_lmac_config(struct bgx *bgx, u8 idx) { struct lmac *lmac; @@ -914,15 +946,15 @@ static void bgx_set_lmac_config(struct bgx *bgx, u8 idx) lmac = &bgx->lmac[idx]; - if (!bgx->is_81xx) { + if (!bgx->is_81xx || bgx->is_rgx) { /* Read LMAC0 type to figure out QLM mode * This is configured by low level firmware */ cmr_cfg = bgx_reg_read(bgx, 0, BGX_CMRX_CFG); lmac->lmac_type = (cmr_cfg >> 8) & 0x07; - lmac->use_training = - bgx_reg_read(bgx, 0, BGX_SPUX_BR_PMD_CRTL) & - SPU_PMD_CRTL_TRAIN_EN; + if (bgx->is_rgx) + lmac->lmac_type = BGX_MODE_RGMII; + lmac_set_training(bgx, lmac, 0); lmac_set_lane2sds(bgx, lmac); return; } @@ -939,17 +971,13 @@ static void bgx_set_lmac_config(struct bgx *bgx, u8 idx) lmac->lmac_type = BGX_MODE_INVALID; else lmac->lmac_type = lmac_type; - lmac->use_training = - bgx_reg_read(bgx, idx, BGX_SPUX_BR_PMD_CRTL) & - SPU_PMD_CRTL_TRAIN_EN; + lmac_set_training(bgx, lmac, lmac->lmacid); lmac_set_lane2sds(bgx, lmac); /* Set LMAC type of other lmac on same DLM i.e LMAC 1/3 */ olmac = &bgx->lmac[idx + 1]; olmac->lmac_type = lmac->lmac_type; - olmac->use_training = - bgx_reg_read(bgx, idx + 1, BGX_SPUX_BR_PMD_CRTL) & - SPU_PMD_CRTL_TRAIN_EN; + lmac_set_training(bgx, olmac, olmac->lmacid); lmac_set_lane2sds(bgx, olmac); } } @@ -976,21 +1004,22 @@ static void bgx_get_qlm_mode(struct bgx *bgx) u8 idx; /* Init all LMAC's type to invalid */ - for (idx = 0; idx < MAX_LMAC_PER_BGX; idx++) { + for (idx = 0; idx < bgx->max_lmac; idx++) { lmac = &bgx->lmac[idx]; - lmac->lmac_type = BGX_MODE_INVALID; lmac->lmacid = idx; + lmac->lmac_type = BGX_MODE_INVALID; + lmac->use_training = false; } /* It is assumed that low level firmware sets this value */ bgx->lmac_count = bgx_reg_read(bgx, 0, BGX_CMR_RX_LMACS) & 0x7; - if (bgx->lmac_count > MAX_LMAC_PER_BGX) - bgx->lmac_count = MAX_LMAC_PER_BGX; + if (bgx->lmac_count > bgx->max_lmac) + bgx->lmac_count = bgx->max_lmac; - for (idx = 0; idx < MAX_LMAC_PER_BGX; idx++) + for (idx = 0; idx < bgx->max_lmac; idx++) bgx_set_lmac_config(bgx, idx); - if (!bgx->is_81xx) { + if (!bgx->is_81xx || bgx->is_rgx) { bgx_print_qlm_mode(bgx, 0); return; } @@ -1140,7 +1169,7 @@ static int bgx_init_of_phy(struct bgx *bgx) } lmac++; - if (lmac == MAX_LMAC_PER_BGX) { + if (lmac == bgx->max_lmac) { of_node_put(node); break; } @@ -1218,10 +1247,22 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err = -ENOMEM; goto err_release_regions; } - bgx->bgx_id = (pci_resource_start(pdev, PCI_CFG_REG_BAR_NUM) >> 24) & 1; - bgx->bgx_id += nic_get_node_id(pdev) * MAX_BGX_PER_CN88XX; - bgx_vnic[bgx->bgx_id] = bgx; + pci_read_config_word(pdev, PCI_DEVICE_ID, &sdevid); + if (sdevid != PCI_DEVICE_ID_THUNDER_RGX) { + bgx->bgx_id = + (pci_resource_start(pdev, PCI_CFG_REG_BAR_NUM) >> 24) & 1; + bgx->bgx_id += nic_get_node_id(pdev) * MAX_BGX_PER_CN88XX; + bgx->max_lmac = MAX_LMAC_PER_BGX; + bgx_vnic[bgx->bgx_id] = bgx; + } else { + bgx->is_rgx = true; + bgx->max_lmac = 1; + bgx->bgx_id = MAX_BGX_PER_CN81XX - 1; + bgx_vnic[bgx->bgx_id] = bgx; + xcv_init_hw(); + } + bgx_get_qlm_mode(bgx); err = bgx_init_phy(bgx); diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h index 0705863d2915..6225ff4bdbcd 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h @@ -11,6 +11,7 @@ /* PCI device ID */ #define PCI_DEVICE_ID_THUNDER_BGX 0xA026 +#define PCI_DEVICE_ID_THUNDER_RGX 0xA054 /* Subsystem device IDs */ #define PCI_SUBSYS_DEVID_88XX_BGX 0xA126 @@ -19,7 +20,7 @@ #define MAX_BGX_THUNDER 8 /* Max 4 nodes, 2 per node */ #define MAX_BGX_PER_CN88XX 2 -#define MAX_BGX_PER_CN81XX 2 +#define MAX_BGX_PER_CN81XX 3 /* 2 BGXs + 1 RGX */ #define MAX_BGX_PER_CN83XX 4 #define MAX_LMAC_PER_BGX 4 #define MAX_BGX_CHANS_PER_LMAC 16 @@ -205,6 +206,9 @@ void bgx_set_lmac_mac(int node, int bgx_idx, int lmacid, const u8 *mac); void bgx_get_lmac_link_state(int node, int bgx_idx, int lmacid, void *status); void bgx_lmac_internal_loopback(int node, int bgx_idx, int lmac_idx, bool enable); +void xcv_init_hw(void); +void xcv_setup_link(bool link_up, int link_speed); + u64 bgx_get_rx_stats(int node, int bgx_idx, int lmac, int idx); u64 bgx_get_tx_stats(int node, int bgx_idx, int lmac, int idx); #define BGX_RX_STATS_COUNT 11 diff --git a/drivers/net/ethernet/cavium/thunder/thunder_xcv.c b/drivers/net/ethernet/cavium/thunder/thunder_xcv.c new file mode 100644 index 000000000000..9210d04e4b5d --- /dev/null +++ b/drivers/net/ethernet/cavium/thunder/thunder_xcv.c @@ -0,0 +1,237 @@ +/* + * Copyright (C) 2016 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "nic.h" +#include "thunder_bgx.h" + +#define DRV_NAME "thunder-xcv" +#define DRV_VERSION "1.0" + +/* Register offsets */ +#define XCV_RESET 0x00 +#define PORT_EN BIT_ULL(63) +#define CLK_RESET BIT_ULL(15) +#define DLL_RESET BIT_ULL(11) +#define COMP_EN BIT_ULL(7) +#define TX_PKT_RESET BIT_ULL(3) +#define TX_DATA_RESET BIT_ULL(2) +#define RX_PKT_RESET BIT_ULL(1) +#define RX_DATA_RESET BIT_ULL(0) +#define XCV_DLL_CTL 0x10 +#define CLKRX_BYP BIT_ULL(23) +#define CLKTX_BYP BIT_ULL(15) +#define XCV_COMP_CTL 0x20 +#define DRV_BYP BIT_ULL(63) +#define XCV_CTL 0x30 +#define XCV_INT 0x40 +#define XCV_INT_W1S 0x48 +#define XCV_INT_ENA_W1C 0x50 +#define XCV_INT_ENA_W1S 0x58 +#define XCV_INBND_STATUS 0x80 +#define XCV_BATCH_CRD_RET 0x100 + +struct xcv { + void __iomem *reg_base; + struct pci_dev *pdev; +}; + +static struct xcv *xcv; + +/* Supported devices */ +static const struct pci_device_id xcv_id_table[] = { + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, 0xA056) }, + { 0, } /* end of table */ +}; + +MODULE_AUTHOR("Cavium Inc"); +MODULE_DESCRIPTION("Cavium Thunder RGX/XCV Driver"); +MODULE_LICENSE("GPL v2"); +MODULE_VERSION(DRV_VERSION); +MODULE_DEVICE_TABLE(pci, xcv_id_table); + +void xcv_init_hw(void) +{ + u64 cfg; + + /* Take DLL out of reset */ + cfg = readq_relaxed(xcv->reg_base + XCV_RESET); + cfg &= ~DLL_RESET; + writeq_relaxed(cfg, xcv->reg_base + XCV_RESET); + + /* Take clock tree out of reset */ + cfg = readq_relaxed(xcv->reg_base + XCV_RESET); + cfg &= ~CLK_RESET; + writeq_relaxed(cfg, xcv->reg_base + XCV_RESET); + /* Wait for DLL to lock */ + msleep(1); + + /* Configure DLL - enable or bypass + * TX no bypass, RX bypass + */ + cfg = readq_relaxed(xcv->reg_base + XCV_DLL_CTL); + cfg &= ~0xFF03; + cfg |= CLKRX_BYP; + writeq_relaxed(cfg, xcv->reg_base + XCV_DLL_CTL); + + /* Enable compensation controller and force the + * write to be visible to HW by readig back. + */ + cfg = readq_relaxed(xcv->reg_base + XCV_RESET); + cfg |= COMP_EN; + writeq_relaxed(cfg, xcv->reg_base + XCV_RESET); + readq_relaxed(xcv->reg_base + XCV_RESET); + /* Wait for compensation state machine to lock */ + msleep(10); + + /* enable the XCV block */ + cfg = readq_relaxed(xcv->reg_base + XCV_RESET); + cfg |= PORT_EN; + writeq_relaxed(cfg, xcv->reg_base + XCV_RESET); + + cfg = readq_relaxed(xcv->reg_base + XCV_RESET); + cfg |= CLK_RESET; + writeq_relaxed(cfg, xcv->reg_base + XCV_RESET); +} +EXPORT_SYMBOL(xcv_init_hw); + +void xcv_setup_link(bool link_up, int link_speed) +{ + u64 cfg; + int speed = 2; + + if (!xcv) { + dev_err(&xcv->pdev->dev, + "XCV init not done, probe may have failed\n"); + return; + } + + if (link_speed == 100) + speed = 1; + else if (link_speed == 10) + speed = 0; + + if (link_up) { + /* set operating speed */ + cfg = readq_relaxed(xcv->reg_base + XCV_CTL); + cfg &= ~0x03; + cfg |= speed; + writeq_relaxed(cfg, xcv->reg_base + XCV_CTL); + + /* Reset datapaths */ + cfg = readq_relaxed(xcv->reg_base + XCV_RESET); + cfg |= TX_DATA_RESET | RX_DATA_RESET; + writeq_relaxed(cfg, xcv->reg_base + XCV_RESET); + + /* Enable the packet flow */ + cfg = readq_relaxed(xcv->reg_base + XCV_RESET); + cfg |= TX_PKT_RESET | RX_PKT_RESET; + writeq_relaxed(cfg, xcv->reg_base + XCV_RESET); + + /* Return credits to RGX */ + writeq_relaxed(0x01, xcv->reg_base + XCV_BATCH_CRD_RET); + } else { + /* Disable packet flow */ + cfg = readq_relaxed(xcv->reg_base + XCV_RESET); + cfg &= ~(TX_PKT_RESET | RX_PKT_RESET); + writeq_relaxed(cfg, xcv->reg_base + XCV_RESET); + readq_relaxed(xcv->reg_base + XCV_RESET); + } +} +EXPORT_SYMBOL(xcv_setup_link); + +static int xcv_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + int err; + struct device *dev = &pdev->dev; + + xcv = devm_kzalloc(dev, sizeof(struct xcv), GFP_KERNEL); + if (!xcv) + return -ENOMEM; + xcv->pdev = pdev; + + pci_set_drvdata(pdev, xcv); + + err = pci_enable_device(pdev); + if (err) { + dev_err(dev, "Failed to enable PCI device\n"); + goto err_kfree; + } + + err = pci_request_regions(pdev, DRV_NAME); + if (err) { + dev_err(dev, "PCI request regions failed 0x%x\n", err); + goto err_disable_device; + } + + /* MAP configuration registers */ + xcv->reg_base = pcim_iomap(pdev, PCI_CFG_REG_BAR_NUM, 0); + if (!xcv->reg_base) { + dev_err(dev, "XCV: Cannot map CSR memory space, aborting\n"); + err = -ENOMEM; + goto err_release_regions; + } + + return 0; + +err_release_regions: + pci_release_regions(pdev); +err_disable_device: + pci_disable_device(pdev); +err_kfree: + pci_set_drvdata(pdev, NULL); + devm_kfree(dev, xcv); + xcv = NULL; + return err; +} + +static void xcv_remove(struct pci_dev *pdev) +{ + struct device *dev = &pdev->dev; + + if (xcv) { + devm_kfree(dev, xcv); + xcv = NULL; + } + + pci_release_regions(pdev); + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); +} + +static struct pci_driver xcv_driver = { + .name = DRV_NAME, + .id_table = xcv_id_table, + .probe = xcv_probe, + .remove = xcv_remove, +}; + +static int __init xcv_init_module(void) +{ + pr_info("%s, ver %s\n", DRV_NAME, DRV_VERSION); + + return pci_register_driver(&xcv_driver); +} + +static void __exit xcv_cleanup_module(void) +{ + pci_unregister_driver(&xcv_driver); +} + +module_init(xcv_init_module); +module_exit(xcv_cleanup_module); From 949b5331419828ff4c028dae556e5983b06b5d18 Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Fri, 12 Aug 2016 16:51:34 +0530 Subject: [PATCH 0180/1715] net: thunderx: Add support for 16 LMACs of 83xx 83xx will have 4 BGX blocks i.e 16 LMACs, to avoid changing the same with every platform, nicpf struct elements which track LMAC related info are now allocated runtime based on platform's max possible BGX count. Also fixed configuring min packet size for all LMAC's supported on a platform. Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- .../net/ethernet/cavium/thunder/nic_main.c | 75 +++++++++++++++---- .../net/ethernet/cavium/thunder/thunder_bgx.h | 2 - 2 files changed, 60 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c index 91c575d418d0..b982e5ff7bbe 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_main.c +++ b/drivers/net/ethernet/cavium/thunder/nic_main.c @@ -53,12 +53,12 @@ struct nicpf { #define NIC_SET_VF_LMAC_MAP(bgx, lmac) (((bgx & 0xF) << 4) | (lmac & 0xF)) #define NIC_GET_BGX_FROM_VF_LMAC_MAP(map) ((map >> 4) & 0xF) #define NIC_GET_LMAC_FROM_VF_LMAC_MAP(map) (map & 0xF) - u8 vf_lmac_map[MAX_LMAC]; + u8 *vf_lmac_map; struct delayed_work dwork; struct workqueue_struct *check_link; - u8 link[MAX_LMAC]; - u8 duplex[MAX_LMAC]; - u32 speed[MAX_LMAC]; + u8 *link; + u8 *duplex; + u32 *speed; u16 cpi_base[MAX_NUM_VFS_SUPPORTED]; u16 rssi_base[MAX_NUM_VFS_SUPPORTED]; bool mbx_lock[MAX_NUM_VFS_SUPPORTED]; @@ -174,7 +174,7 @@ static void nic_mbx_send_ready(struct nicpf *nic, int vf) mbx.nic_cfg.tns_mode = NIC_TNS_BYPASS_MODE; - if (vf < MAX_LMAC) { + if (vf < nic->num_vf_en) { bgx_idx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]); lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]); @@ -185,7 +185,7 @@ static void nic_mbx_send_ready(struct nicpf *nic, int vf) mbx.nic_cfg.sqs_mode = (vf >= nic->num_vf_en) ? true : false; mbx.nic_cfg.node_id = nic->node; - mbx.nic_cfg.loopback_supported = vf < MAX_LMAC; + mbx.nic_cfg.loopback_supported = vf < nic->num_vf_en; nic_send_msg_to_vf(nic, vf, &mbx); } @@ -278,14 +278,22 @@ static int nic_update_hw_frs(struct nicpf *nic, int new_frs, int vf) /* Set minimum transmit packet size */ static void nic_set_tx_pkt_pad(struct nicpf *nic, int size) { - int lmac; + int lmac, max_lmac; + u16 sdevid; u64 lmac_cfg; /* Max value that can be set is 60 */ if (size > 60) size = 60; - for (lmac = 0; lmac < (MAX_BGX_PER_CN88XX * MAX_LMAC_PER_BGX); lmac++) { + pci_read_config_word(nic->pdev, PCI_SUBSYSTEM_ID, &sdevid); + /* 81xx's RGX has only one LMAC */ + if (sdevid == PCI_SUBSYS_DEVID_81XX_NIC_PF) + max_lmac = ((nic->hw->bgx_cnt - 1) * MAX_LMAC_PER_BGX) + 1; + else + max_lmac = nic->hw->bgx_cnt * MAX_LMAC_PER_BGX; + + for (lmac = 0; lmac < max_lmac; lmac++) { lmac_cfg = nic_reg_read(nic, NIC_PF_LMAC_0_7_CFG | (lmac << 3)); lmac_cfg &= ~(0xF << 2); lmac_cfg |= ((size / 4) << 2); @@ -336,8 +344,17 @@ static void nic_set_lmac_vf_mapping(struct nicpf *nic) } } -static void nic_get_hw_info(struct nicpf *nic) +static void nic_free_lmacmem(struct nicpf *nic) { + kfree(nic->vf_lmac_map); + kfree(nic->link); + kfree(nic->duplex); + kfree(nic->speed); +} + +static int nic_get_hw_info(struct nicpf *nic) +{ + u8 max_lmac; u16 sdevid; struct hw_info *hw = nic->hw; @@ -385,18 +402,40 @@ static void nic_get_hw_info(struct nicpf *nic) break; } hw->tl4_cnt = MAX_QUEUES_PER_QSET * pci_sriov_get_totalvfs(nic->pdev); + + /* Allocate memory for LMAC tracking elements */ + max_lmac = hw->bgx_cnt * MAX_LMAC_PER_BGX; + nic->vf_lmac_map = kmalloc_array(max_lmac, sizeof(u8), GFP_KERNEL); + if (!nic->vf_lmac_map) + goto error; + nic->link = kmalloc_array(max_lmac, sizeof(u8), GFP_KERNEL); + if (!nic->link) + goto error; + nic->duplex = kmalloc_array(max_lmac, sizeof(u8), GFP_KERNEL); + if (!nic->duplex) + goto error; + nic->speed = kmalloc_array(max_lmac, sizeof(u32), GFP_KERNEL); + if (!nic->speed) + goto error; + return 0; + +error: + nic_free_lmacmem(nic); + return -ENOMEM; } #define BGX0_BLOCK 8 #define BGX1_BLOCK 9 -static void nic_init_hw(struct nicpf *nic) +static int nic_init_hw(struct nicpf *nic) { - int i; + int i, err; u64 cqm_cfg; /* Get HW capability info */ - nic_get_hw_info(nic); + err = nic_get_hw_info(nic); + if (err) + return err; /* Enable NIC HW block */ nic_reg_write(nic, NIC_PF_CFG, 0x3); @@ -442,6 +481,8 @@ static void nic_init_hw(struct nicpf *nic) cqm_cfg = nic_reg_read(nic, NIC_PF_CQM_CFG); if (cqm_cfg < NICPF_CQM_MIN_DROP_LEVEL) nic_reg_write(nic, NIC_PF_CQM_CFG, NICPF_CQM_MIN_DROP_LEVEL); + + return 0; } /* Channel parse index configuration */ @@ -741,7 +782,7 @@ static int nic_config_loopback(struct nicpf *nic, struct set_loopback *lbk) { int bgx_idx, lmac_idx; - if (lbk->vf_id > MAX_LMAC) + if (lbk->vf_id >= nic->num_vf_en) return -1; bgx_idx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[lbk->vf_id]); @@ -795,7 +836,7 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf) switch (mbx.msg.msg) { case NIC_MBOX_MSG_READY: nic_mbx_send_ready(nic, vf); - if (vf < MAX_LMAC) { + if (vf < nic->num_vf_en) { nic->link[vf] = 0; nic->duplex[vf] = 0; nic->speed[vf] = 0; @@ -1191,7 +1232,9 @@ static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) nic->node = nic_get_node_id(pdev); /* Initialize hardware */ - nic_init_hw(nic); + err = nic_init_hw(nic); + if (err) + goto err_release_regions; nic_set_lmac_vf_mapping(nic); @@ -1226,6 +1269,7 @@ err_unregister_interrupts: err_release_regions: pci_release_regions(pdev); err_disable_device: + nic_free_lmacmem(nic); devm_kfree(dev, nic->hw); devm_kfree(dev, nic); pci_disable_device(pdev); @@ -1249,6 +1293,7 @@ static void nic_remove(struct pci_dev *pdev) nic_unregister_interrupts(nic); pci_release_regions(pdev); + nic_free_lmacmem(nic); devm_kfree(&pdev->dev, nic->hw); devm_kfree(&pdev->dev, nic); diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h index 6225ff4bdbcd..c7d7cc67bee0 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h @@ -29,8 +29,6 @@ #define MAX_DMAC_PER_LMAC_TNS_BYPASS_MODE 2 -#define MAX_LMAC (MAX_BGX_PER_CN88XX * MAX_LMAC_PER_BGX) - /* Registers */ #define BGX_CMRX_CFG 0x00 #define CMR_PKT_TX_EN BIT_ULL(13) From 09de39176b624b7556d190511aebd92da3420ec9 Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Fri, 12 Aug 2016 16:51:35 +0530 Subject: [PATCH 0181/1715] net: thunderx: Support for 83xx mixed QLM/DLM config 83xx has 4 BGX blocks and are enabled mixed QLM/DLM configs. BGX0/BGX1 are from QLM2/QLM3, BGX3 is DLM4 and BGX2 is split across DLM5 & DLM6. This patch adds support for BGX2's split config and also enables all 4 BGXs to be used. Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- .../net/ethernet/cavium/thunder/thunder_bgx.c | 44 ++++++++++--------- .../net/ethernet/cavium/thunder/thunder_bgx.h | 3 +- 2 files changed, 26 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 4ddc760afe03..ca58d7f52ecd 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -51,7 +51,7 @@ struct bgx { u8 max_lmac; void __iomem *reg_base; struct pci_dev *pdev; - bool is_81xx; + bool is_dlm; bool is_rgx; }; @@ -127,8 +127,8 @@ unsigned bgx_get_map(int node) int i; unsigned map = 0; - for (i = 0; i < MAX_BGX_PER_CN81XX; i++) { - if (bgx_vnic[(node * MAX_BGX_PER_CN88XX) + i]) + for (i = 0; i < MAX_BGX_PER_NODE; i++) { + if (bgx_vnic[(node * MAX_BGX_PER_NODE) + i]) map |= (1 << i); } @@ -141,7 +141,7 @@ int bgx_get_lmac_count(int node, int bgx_idx) { struct bgx *bgx; - bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx]; + bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx]; if (bgx) return bgx->lmac_count; @@ -156,7 +156,7 @@ void bgx_get_lmac_link_state(int node, int bgx_idx, int lmacid, void *status) struct bgx *bgx; struct lmac *lmac; - bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx]; + bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx]; if (!bgx) return; @@ -169,7 +169,7 @@ EXPORT_SYMBOL(bgx_get_lmac_link_state); const u8 *bgx_get_lmac_mac(int node, int bgx_idx, int lmacid) { - struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx]; + struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx]; if (bgx) return bgx->lmac[lmacid].mac; @@ -180,7 +180,7 @@ EXPORT_SYMBOL(bgx_get_lmac_mac); void bgx_set_lmac_mac(int node, int bgx_idx, int lmacid, const u8 *mac) { - struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx]; + struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx]; if (!bgx) return; @@ -191,7 +191,7 @@ EXPORT_SYMBOL(bgx_set_lmac_mac); void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable) { - struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx]; + struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx]; struct lmac *lmac; u64 cfg; @@ -325,7 +325,7 @@ u64 bgx_get_rx_stats(int node, int bgx_idx, int lmac, int idx) { struct bgx *bgx; - bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx]; + bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx]; if (!bgx) return 0; @@ -339,7 +339,7 @@ u64 bgx_get_tx_stats(int node, int bgx_idx, int lmac, int idx) { struct bgx *bgx; - bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx]; + bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx]; if (!bgx) return 0; @@ -367,7 +367,7 @@ void bgx_lmac_internal_loopback(int node, int bgx_idx, struct lmac *lmac; u64 cfg; - bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx]; + bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx]; if (!bgx) return; @@ -850,7 +850,7 @@ static void bgx_print_qlm_mode(struct bgx *bgx, u8 lmacid) lmac = &bgx->lmac[lmacid]; dlm = (lmacid / 2) + (bgx->bgx_id * 2); - if (!bgx->is_81xx) + if (!bgx->is_dlm) sprintf(str, "BGX%d QLM mode", bgx->bgx_id); else sprintf(str, "BGX%d DLM%d mode", bgx->bgx_id, dlm); @@ -946,7 +946,7 @@ static void bgx_set_lmac_config(struct bgx *bgx, u8 idx) lmac = &bgx->lmac[idx]; - if (!bgx->is_81xx || bgx->is_rgx) { + if (!bgx->is_dlm || bgx->is_rgx) { /* Read LMAC0 type to figure out QLM mode * This is configured by low level firmware */ @@ -986,7 +986,7 @@ static bool is_dlm0_in_bgx_mode(struct bgx *bgx) { struct lmac *lmac; - if (!bgx->is_81xx) + if (!bgx->is_dlm) return true; lmac = &bgx->lmac[0]; @@ -1019,7 +1019,7 @@ static void bgx_get_qlm_mode(struct bgx *bgx) for (idx = 0; idx < bgx->max_lmac; idx++) bgx_set_lmac_config(bgx, idx); - if (!bgx->is_81xx || bgx->is_rgx) { + if (!bgx->is_dlm || bgx->is_rgx) { bgx_print_qlm_mode(bgx, 0); return; } @@ -1236,10 +1236,6 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_disable_device; } - pci_read_config_word(pdev, PCI_SUBSYSTEM_ID, &sdevid); - if (sdevid == PCI_SUBSYS_DEVID_81XX_BGX) - bgx->is_81xx = true; - /* MAP configuration registers */ bgx->reg_base = pcim_iomap(pdev, PCI_CFG_REG_BAR_NUM, 0); if (!bgx->reg_base) { @@ -1252,7 +1248,7 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (sdevid != PCI_DEVICE_ID_THUNDER_RGX) { bgx->bgx_id = (pci_resource_start(pdev, PCI_CFG_REG_BAR_NUM) >> 24) & 1; - bgx->bgx_id += nic_get_node_id(pdev) * MAX_BGX_PER_CN88XX; + bgx->bgx_id += nic_get_node_id(pdev) * MAX_BGX_PER_NODE; bgx->max_lmac = MAX_LMAC_PER_BGX; bgx_vnic[bgx->bgx_id] = bgx; } else { @@ -1263,6 +1259,14 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) xcv_init_hw(); } + /* On 81xx all are DLMs and on 83xx there are 3 BGX QLMs and one + * BGX i.e BGX2 can be split across 2 DLMs. + */ + pci_read_config_word(pdev, PCI_SUBSYSTEM_ID, &sdevid); + if ((sdevid == PCI_SUBSYS_DEVID_81XX_BGX) || + ((sdevid == PCI_SUBSYS_DEVID_83XX_BGX) && (bgx->bgx_id == 2))) + bgx->is_dlm = true; + bgx_get_qlm_mode(bgx); err = bgx_init_phy(bgx); diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h index c7d7cc67bee0..d59c71e4a000 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h @@ -18,10 +18,11 @@ #define PCI_SUBSYS_DEVID_81XX_BGX 0xA226 #define PCI_SUBSYS_DEVID_83XX_BGX 0xA326 -#define MAX_BGX_THUNDER 8 /* Max 4 nodes, 2 per node */ +#define MAX_BGX_THUNDER 8 /* Max 2 nodes, 4 per node */ #define MAX_BGX_PER_CN88XX 2 #define MAX_BGX_PER_CN81XX 3 /* 2 BGXs + 1 RGX */ #define MAX_BGX_PER_CN83XX 4 +#define MAX_BGX_PER_NODE 4 #define MAX_LMAC_PER_BGX 4 #define MAX_BGX_CHANS_PER_LMAC 16 #define MAX_DMAC_PER_LMAC 8 From e412621394faacc24243abab84ce5b7a73407350 Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Fri, 12 Aug 2016 16:51:36 +0530 Subject: [PATCH 0182/1715] net: thunderx: Use netdev's name for naming VF's interrupts This patch changes the way VF's irqs are visible in /proc/interrupts. Instead of VF id, logical interface's netdev name is used for IRQ naming and also all secondary VF's interrupts in multiqset config use primary VF's netdev name. Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- .../net/ethernet/cavium/thunder/nicvf_main.c | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index d9efe2fb7b81..5335ff1bf69a 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -938,16 +938,19 @@ static int nicvf_register_interrupts(struct nicvf *nic) int vector; for_each_cq_irq(irq) - sprintf(nic->irq_name[irq], "NICVF%d CQ%d", - nic->vf_id, irq); + sprintf(nic->irq_name[irq], "%s-rxtx-%d", + nic->pnicvf->netdev->name, + nicvf_netdev_qidx(nic, irq)); for_each_sq_irq(irq) - sprintf(nic->irq_name[irq], "NICVF%d SQ%d", - nic->vf_id, irq - NICVF_INTR_ID_SQ); + sprintf(nic->irq_name[irq], "%s-sq-%d", + nic->pnicvf->netdev->name, + nicvf_netdev_qidx(nic, irq - NICVF_INTR_ID_SQ)); for_each_rbdr_irq(irq) - sprintf(nic->irq_name[irq], "NICVF%d RBDR%d", - nic->vf_id, irq - NICVF_INTR_ID_RBDR); + sprintf(nic->irq_name[irq], "%s-rbdr-%d", + nic->pnicvf->netdev->name, + nic->sqs_mode ? (nic->sqs_id + 1) : 0); /* Register CQ interrupts */ for (irq = 0; irq < nic->qs->cq_cnt; irq++) { @@ -971,8 +974,9 @@ static int nicvf_register_interrupts(struct nicvf *nic) } /* Register QS error interrupt */ - sprintf(nic->irq_name[NICVF_INTR_ID_QS_ERR], - "NICVF%d Qset error", nic->vf_id); + sprintf(nic->irq_name[NICVF_INTR_ID_QS_ERR], "%s-qset-err-%d", + nic->pnicvf->netdev->name, + nic->sqs_mode ? (nic->sqs_id + 1) : 0); irq = NICVF_INTR_ID_QS_ERR; ret = request_irq(nic->msix_entries[irq].vector, nicvf_qs_err_intr_handler, From a8671acca87267ad8b583b6e5cf9f6bcb09ff77a Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Fri, 12 Aug 2016 16:51:37 +0530 Subject: [PATCH 0183/1715] net: thunderx: Use skb_add_rx_frag() for split buffer Rx pkts Instead of a round about way of converting buffers to SKBs and combining them into a frag list, use standard skb_add_rx_frag() API to merge page fragments. This code is useful when incoming packets are of size more than RCV_FRAG_LEN which is currently set to 2048bytes. Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- .../ethernet/cavium/thunder/nicvf_queues.c | 24 +++++-------------- 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index ca223aa73825..ed2fe7276897 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -1184,8 +1184,8 @@ struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx) int frag; int payload_len = 0; struct sk_buff *skb = NULL; - struct sk_buff *skb_frag = NULL; - struct sk_buff *prev_frag = NULL; + struct page *page; + int offset; u16 *rb_lens = NULL; u64 *rb_ptrs = NULL; @@ -1218,22 +1218,10 @@ struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx) skb_put(skb, payload_len); } else { /* Add fragments */ - skb_frag = nicvf_rb_ptr_to_skb(nic, *rb_ptrs, - payload_len); - if (!skb_frag) { - dev_kfree_skb(skb); - return NULL; - } - - if (!skb_shinfo(skb)->frag_list) - skb_shinfo(skb)->frag_list = skb_frag; - else - prev_frag->next = skb_frag; - - prev_frag = skb_frag; - skb->len += payload_len; - skb->data_len += payload_len; - skb_frag->len = payload_len; + page = virt_to_page(phys_to_virt(*rb_ptrs)); + offset = phys_to_virt(*rb_ptrs) - page_address(page); + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, + offset, payload_len, RCV_FRAG_LEN); } /* Next buffer pointer */ rb_ptrs++; From ecae29cb15c1a9502bb24f44ff9b84075c0b1dcd Mon Sep 17 00:00:00 2001 From: Radoslaw Biernacki Date: Fri, 12 Aug 2016 16:51:38 +0530 Subject: [PATCH 0184/1715] net: thunderx: Improvement for MBX interface debug messages Adding debug messages in case of NACK for a mailbox message, also did small cleanups. Signed-off-by: Radoslaw Biernacki Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nic_main.c | 16 ++++++++++------ drivers/net/ethernet/cavium/thunder/nicvf_main.c | 8 ++++++-- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c index b982e5ff7bbe..c706b2e31598 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_main.c +++ b/drivers/net/ethernet/cavium/thunder/nic_main.c @@ -831,7 +831,7 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf) mbx_addr += sizeof(u64); } - dev_dbg(&nic->pdev->dev, "%s: Mailbox msg %d from VF%d\n", + dev_dbg(&nic->pdev->dev, "%s: Mailbox msg 0x%02x from VF%d\n", __func__, mbx.msg.msg, vf); switch (mbx.msg.msg) { case NIC_MBOX_MSG_READY: @@ -841,8 +841,7 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf) nic->duplex[vf] = 0; nic->speed[vf] = 0; } - ret = 1; - break; + goto unlock; case NIC_MBOX_MSG_QS_CFG: reg_addr = NIC_PF_QSET_0_127_CFG | (mbx.qs.num << NIC_QS_ID_SHIFT); @@ -891,8 +890,10 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf) nic_tx_channel_cfg(nic, mbx.qs.num, &mbx.sq); break; case NIC_MBOX_MSG_SET_MAC: - if (vf >= nic->num_vf_en) + if (vf >= nic->num_vf_en) { + ret = -1; /* NACK */ break; + } lmac = mbx.mac.vf_id; bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[lmac]); lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[lmac]); @@ -947,10 +948,13 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf) break; } - if (!ret) + if (!ret) { nic_mbx_send_ack(nic, vf); - else if (mbx.msg.msg != NIC_MBOX_MSG_READY) + } else if (mbx.msg.msg != NIC_MBOX_MSG_READY) { + dev_err(&nic->pdev->dev, "NACK for MBOX 0x%02x from VF %d\n", + mbx.msg.msg, vf); nic_mbx_send_nack(nic, vf); + } unlock: nic->mbx_lock[vf] = false; } diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 5335ff1bf69a..dd57361aa3fc 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -144,15 +144,19 @@ int nicvf_send_msg_to_pf(struct nicvf *nic, union nic_mbx *mbx) /* Wait for previous message to be acked, timeout 2sec */ while (!nic->pf_acked) { - if (nic->pf_nacked) + if (nic->pf_nacked) { + netdev_err(nic->netdev, + "PF NACK to mbox msg 0x%02x from VF%d\n", + (mbx->msg.msg & 0xFF), nic->vf_id); return -EINVAL; + } msleep(sleep); if (nic->pf_acked) break; timeout -= sleep; if (!timeout) { netdev_err(nic->netdev, - "PF didn't ack to mbox msg %d from VF%d\n", + "PF didn't ACK to mbox msg 0x%02x from VF%d\n", (mbx->msg.msg & 0xFF), nic->vf_id); return -EBUSY; } From 3458c40d608ab7d3b2b591eb9b1f1b84502b55d2 Mon Sep 17 00:00:00 2001 From: Jerin Jacob Date: Fri, 12 Aug 2016 16:51:39 +0530 Subject: [PATCH 0185/1715] net: thunderx: Reset RXQ HW stats when interface is brought down When SQ/TXQ is reclaimed i.e reset it's stats also automatically reset by HW. This is not the case with RQ. Also VF doesn't have write access to statistics counter registers. Hence a new Mbox msg is introduced which supports resetting RQ, SQ and full Qset stats. Currently only RQ stats are being reset using this mbox message. Signed-off-by: Jerin Jacob Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nic.h | 27 +++++++++++ .../net/ethernet/cavium/thunder/nic_main.c | 45 +++++++++++++++++++ .../ethernet/cavium/thunder/nicvf_queues.c | 15 +++++++ 3 files changed, 87 insertions(+) diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h index 136db2a8f044..dd63f961827a 100644 --- a/drivers/net/ethernet/cavium/thunder/nic.h +++ b/drivers/net/ethernet/cavium/thunder/nic.h @@ -347,6 +347,7 @@ struct nicvf { #define NIC_MBOX_MSG_PNICVF_PTR 0x14 /* Get primary qset nicvf ptr */ #define NIC_MBOX_MSG_SNICVF_PTR 0x15 /* Send sqet nicvf ptr to PVF */ #define NIC_MBOX_MSG_LOOPBACK 0x16 /* Set interface in loopback */ +#define NIC_MBOX_MSG_RESET_STAT_COUNTER 0x17 /* Reset statistics counters */ #define NIC_MBOX_MSG_CFG_DONE 0xF0 /* VF configuration done */ #define NIC_MBOX_MSG_SHUTDOWN 0xF1 /* VF is being shutdown */ @@ -463,6 +464,31 @@ struct set_loopback { bool enable; }; +/* Reset statistics counters */ +struct reset_stat_cfg { + u8 msg; + /* Bitmap to select NIC_PF_VNIC(vf_id)_RX_STAT(0..13) */ + u16 rx_stat_mask; + /* Bitmap to select NIC_PF_VNIC(vf_id)_TX_STAT(0..4) */ + u8 tx_stat_mask; + /* Bitmap to select NIC_PF_QS(0..127)_RQ(0..7)_STAT(0..1) + * bit14, bit15 NIC_PF_QS(vf_id)_RQ7_STAT(0..1) + * bit12, bit13 NIC_PF_QS(vf_id)_RQ6_STAT(0..1) + * .. + * bit2, bit3 NIC_PF_QS(vf_id)_RQ1_STAT(0..1) + * bit0, bit1 NIC_PF_QS(vf_id)_RQ0_STAT(0..1) + */ + u16 rq_stat_mask; + /* Bitmap to select NIC_PF_QS(0..127)_SQ(0..7)_STAT(0..1) + * bit14, bit15 NIC_PF_QS(vf_id)_SQ7_STAT(0..1) + * bit12, bit13 NIC_PF_QS(vf_id)_SQ6_STAT(0..1) + * .. + * bit2, bit3 NIC_PF_QS(vf_id)_SQ1_STAT(0..1) + * bit0, bit1 NIC_PF_QS(vf_id)_SQ0_STAT(0..1) + */ + u16 sq_stat_mask; +}; + /* 128 bit shared memory between PF and each VF */ union nic_mbx { struct { u8 msg; } msg; @@ -480,6 +506,7 @@ union nic_mbx { struct sqs_alloc sqs_alloc; struct nicvf_ptr nicvf; struct set_loopback lbk; + struct reset_stat_cfg reset_stat; }; #define NIC_NODE_ID_MASK 0x03 diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c index c706b2e31598..167291bba88a 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_main.c +++ b/drivers/net/ethernet/cavium/thunder/nic_main.c @@ -793,6 +793,48 @@ static int nic_config_loopback(struct nicpf *nic, struct set_loopback *lbk) return 0; } +/* Reset statistics counters */ +static int nic_reset_stat_counters(struct nicpf *nic, + int vf, struct reset_stat_cfg *cfg) +{ + int i, stat, qnum; + u64 reg_addr; + + for (i = 0; i < RX_STATS_ENUM_LAST; i++) { + if (cfg->rx_stat_mask & BIT(i)) { + reg_addr = NIC_PF_VNIC_0_127_RX_STAT_0_13 | + (vf << NIC_QS_ID_SHIFT) | + (i << 3); + nic_reg_write(nic, reg_addr, 0); + } + } + + for (i = 0; i < TX_STATS_ENUM_LAST; i++) { + if (cfg->tx_stat_mask & BIT(i)) { + reg_addr = NIC_PF_VNIC_0_127_TX_STAT_0_4 | + (vf << NIC_QS_ID_SHIFT) | + (i << 3); + nic_reg_write(nic, reg_addr, 0); + } + } + + for (i = 0; i <= 15; i++) { + qnum = i >> 1; + stat = i & 1 ? 1 : 0; + reg_addr = (vf << NIC_QS_ID_SHIFT) | + (qnum << NIC_Q_NUM_SHIFT) | (stat << 3); + if (cfg->rq_stat_mask & BIT(i)) { + reg_addr |= NIC_PF_QSET_0_127_RQ_0_7_STAT_0_1; + nic_reg_write(nic, reg_addr, 0); + } + if (cfg->sq_stat_mask & BIT(i)) { + reg_addr |= NIC_PF_QSET_0_127_SQ_0_7_STAT_0_1; + nic_reg_write(nic, reg_addr, 0); + } + } + return 0; +} + static void nic_enable_vf(struct nicpf *nic, int vf, bool enable) { int bgx, lmac; @@ -942,6 +984,9 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf) case NIC_MBOX_MSG_LOOPBACK: ret = nic_config_loopback(nic, &mbx.lbk); break; + case NIC_MBOX_MSG_RESET_STAT_COUNTER: + ret = nic_reset_stat_counters(nic, vf, &mbx.reset_stat); + break; default: dev_err(&nic->pdev->dev, "Invalid msg from VF%d, msg 0x%x\n", vf, mbx.msg.msg); diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index ed2fe7276897..7d90856c9783 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -479,6 +479,16 @@ void nicvf_config_vlan_stripping(struct nicvf *nic, netdev_features_t features) NIC_QSET_RQ_GEN_CFG, 0, rq_cfg); } +static void nicvf_reset_rcv_queue_stats(struct nicvf *nic) +{ + union nic_mbx mbx = {}; + + /* Reset all RXQ's stats */ + mbx.reset_stat.msg = NIC_MBOX_MSG_RESET_STAT_COUNTER; + mbx.reset_stat.rq_stat_mask = 0xFFFF; + nicvf_send_msg_to_pf(nic, &mbx); +} + /* Configures receive queue */ static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs, int qidx, bool enable) @@ -812,6 +822,11 @@ int nicvf_config_data_transfer(struct nicvf *nic, bool enable) nicvf_free_resources(nic); } + /* Reset RXQ's stats. + * SQ's stats will get reset automatically once SQ is reset. + */ + nicvf_reset_rcv_queue_stats(nic); + return 0; } From a3a8ce4ce78d9b7c31c7e7f9327409a575bd6280 Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Fri, 12 Aug 2016 16:51:40 +0530 Subject: [PATCH 0186/1715] net: thunderx: Don't set mac address for secondary Qset VFs Set MAC addresses only for primary VF's and don't for secondary VFs. Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nicvf_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index dd57361aa3fc..0b68f2b6de5d 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -1209,7 +1209,7 @@ int nicvf_open(struct net_device *netdev) } /* Check if we got MAC address from PF or else generate a radom MAC */ - if (is_zero_ether_addr(netdev->dev_addr)) { + if (!nic->sqs_mode && is_zero_ether_addr(netdev->dev_addr)) { eth_hw_addr_random(netdev); nicvf_hw_set_mac_addr(nic, netdev); } From c43548d26cb4b699910a3cb5308ce89e7acb30fe Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Fri, 12 Aug 2016 16:51:41 +0530 Subject: [PATCH 0187/1715] net: thunderx: Use napi_consume_skb for bulk free This patch enables bulk freeing on the Tx side. Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nicvf_main.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 0b68f2b6de5d..eb48d33ec67b 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -521,7 +521,8 @@ static int nicvf_init_resources(struct nicvf *nic) static void nicvf_snd_pkt_handler(struct net_device *netdev, struct cmp_queue *cq, - struct cqe_send_t *cqe_tx, int cqe_type) + struct cqe_send_t *cqe_tx, + int cqe_type, int budget) { struct sk_buff *skb = NULL; struct nicvf *nic = netdev_priv(netdev); @@ -545,7 +546,7 @@ static void nicvf_snd_pkt_handler(struct net_device *netdev, if (skb) { nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1); prefetch(skb); - dev_consume_skb_any(skb); + napi_consume_skb(skb, budget); sq->skbuff[cqe_tx->sqe_ptr] = (u64)NULL; } else { /* In case of HW TSO, HW sends a CQE for each segment of a TSO @@ -700,7 +701,8 @@ loop: break; case CQE_TYPE_SEND: nicvf_snd_pkt_handler(netdev, cq, - (void *)cq_desc, CQE_TYPE_SEND); + (void *)cq_desc, CQE_TYPE_SEND, + budget); tx_done++; break; case CQE_TYPE_INVALID: From e22e86ea987d4ea341d30d9891c48e3ccac0c634 Mon Sep 17 00:00:00 2001 From: Zyta Szpak Date: Fri, 12 Aug 2016 16:51:42 +0530 Subject: [PATCH 0188/1715] net: thunderx: Configure tunnelling protocol parsing This patch enables parsing of inner layers for tunnelled packets. Signed-off-by: Zyta Szpak Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- .../net/ethernet/cavium/thunder/nic_main.c | 21 +++++++++++++++++++ drivers/net/ethernet/cavium/thunder/nic_reg.h | 13 ++++++++++++ 2 files changed, 34 insertions(+) diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c index 167291bba88a..25618d203931 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_main.c +++ b/drivers/net/ethernet/cavium/thunder/nic_main.c @@ -835,6 +835,25 @@ static int nic_reset_stat_counters(struct nicpf *nic, return 0; } +static void nic_enable_tunnel_parsing(struct nicpf *nic, int vf) +{ + u64 prot_def = (IPV6_PROT << 32) | (IPV4_PROT << 16) | ET_PROT; + u64 vxlan_prot_def = (IPV6_PROT_DEF << 32) | + (IPV4_PROT_DEF) << 16 | ET_PROT_DEF; + + /* Configure tunnel parsing parameters */ + nic_reg_write(nic, NIC_PF_RX_GENEVE_DEF, + (1ULL << 63 | UDP_GENEVE_PORT_NUM)); + nic_reg_write(nic, NIC_PF_RX_GENEVE_PROT_DEF, + ((7ULL << 61) | prot_def)); + nic_reg_write(nic, NIC_PF_RX_NVGRE_PROT_DEF, + ((7ULL << 61) | prot_def)); + nic_reg_write(nic, NIC_PF_RX_VXLAN_DEF_0_1, + ((1ULL << 63) | UDP_VXLAN_PORT_NUM)); + nic_reg_write(nic, NIC_PF_RX_VXLAN_PROT_DEF, + ((0xfULL << 60) | vxlan_prot_def)); +} + static void nic_enable_vf(struct nicpf *nic, int vf, bool enable) { int bgx, lmac; @@ -908,6 +927,8 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf) */ if (pass2_silicon(nic->pdev)) nic_reg_write(nic, NIC_PF_RX_CFG, 0x01); + if (!pass1_silicon(nic->pdev)) + nic_enable_tunnel_parsing(nic, vf); break; case NIC_MBOX_MSG_RQ_BP_CFG: reg_addr = NIC_PF_QSET_0_127_RQ_0_7_BP_CFG | diff --git a/drivers/net/ethernet/cavium/thunder/nic_reg.h b/drivers/net/ethernet/cavium/thunder/nic_reg.h index b4a79534a06b..db9c632d0157 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_reg.h +++ b/drivers/net/ethernet/cavium/thunder/nic_reg.h @@ -36,6 +36,19 @@ #define NIC_PF_MAILBOX_ENA_W1C (0x0450) #define NIC_PF_MAILBOX_ENA_W1S (0x0470) #define NIC_PF_RX_ETYPE_0_7 (0x0500) +#define NIC_PF_RX_GENEVE_DEF (0x0580) +#define UDP_GENEVE_PORT_NUM 0x17C1ULL +#define NIC_PF_RX_GENEVE_PROT_DEF (0x0588) +#define IPV6_PROT 0x86DDULL +#define IPV4_PROT 0x800ULL +#define ET_PROT 0x6558ULL +#define NIC_PF_RX_NVGRE_PROT_DEF (0x0598) +#define NIC_PF_RX_VXLAN_DEF_0_1 (0x05A0) +#define UDP_VXLAN_PORT_NUM 0x12B5 +#define NIC_PF_RX_VXLAN_PROT_DEF (0x05B0) +#define IPV6_PROT_DEF 0x2ULL +#define IPV4_PROT_DEF 0x1ULL +#define ET_PROT_DEF 0x3ULL #define NIC_PF_RX_CFG (0x05D0) #define NIC_PF_PKIND_0_15_CFG (0x0600) #define NIC_PF_ECC0_FLIP0 (0x1000) From 0052c92f8f84ef94210b91dd471a3dfa1f25625a Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Fri, 12 Aug 2016 16:51:43 +0530 Subject: [PATCH 0189/1715] net: thunderx: Use netdev_rss_key_fill() helper Use standard API to generate a random RSS hash key on every boot. Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nicvf_main.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index eb48d33ec67b..06c014edf762 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -366,13 +366,7 @@ static int nicvf_rss_init(struct nicvf *nic) rss->enable = true; - /* Using the HW reset value for now */ - rss->key[0] = 0xFEED0BADFEED0BADULL; - rss->key[1] = 0xFEED0BADFEED0BADULL; - rss->key[2] = 0xFEED0BADFEED0BADULL; - rss->key[3] = 0xFEED0BADFEED0BADULL; - rss->key[4] = 0xFEED0BADFEED0BADULL; - + netdev_rss_key_fill(rss->key, RSS_HASH_KEY_SIZE * sizeof(u64)); nicvf_set_rss_key(nic); rss->cfg = RSS_IP_HASH_ENA | RSS_TCP_HASH_ENA | RSS_UDP_HASH_ENA; From 93db2cf8caa1fa69cb833175cc5d30a7d178d53b Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Fri, 12 Aug 2016 16:51:44 +0530 Subject: [PATCH 0190/1715] net: thunderx: Don't set RX_PACKET_DIS while initializing Setting BGXX_SPUX_MISC_CONTROL::RX_PACKET_DIS is not needed as packet reception is anyway disabled by BGXX_CMRX_CONFIG::DATA_PKT_RX_EN. Also setting RX_PACKET_DIS causes a bogus remote fault condition which delays link detection. Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- .../net/ethernet/cavium/thunder/thunder_bgx.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index ca58d7f52ecd..8bbaedbb7b94 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -459,12 +459,14 @@ static int bgx_lmac_xaui_init(struct bgx *bgx, struct lmac *lmac) bgx_reg_modify(bgx, lmacid, BGX_SPUX_CONTROL1, SPU_CTL_LOW_POWER); /* Set interleaved running disparity for RXAUI */ - if (lmac->lmac_type != BGX_MODE_RXAUI) - bgx_reg_modify(bgx, lmacid, - BGX_SPUX_MISC_CONTROL, SPU_MISC_CTL_RX_DIS); - else + if (lmac->lmac_type == BGX_MODE_RXAUI) bgx_reg_modify(bgx, lmacid, BGX_SPUX_MISC_CONTROL, - SPU_MISC_CTL_RX_DIS | SPU_MISC_CTL_INTLV_RDISP); + SPU_MISC_CTL_INTLV_RDISP); + + /* Clear receive packet disable */ + cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_MISC_CONTROL); + cfg &= ~SPU_MISC_CTL_RX_DIS; + bgx_reg_write(bgx, lmacid, BGX_SPUX_MISC_CONTROL, cfg); /* clear all interrupts */ cfg = bgx_reg_read(bgx, lmacid, BGX_SMUX_RX_INT); @@ -537,7 +539,6 @@ static int bgx_xaui_check_link(struct lmac *lmac) int lmac_type = lmac->lmac_type; u64 cfg; - bgx_reg_modify(bgx, lmacid, BGX_SPUX_MISC_CONTROL, SPU_MISC_CTL_RX_DIS); if (lmac->use_training) { cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_INT); if (!(cfg & (1ull << 13))) { @@ -607,11 +608,6 @@ static int bgx_xaui_check_link(struct lmac *lmac) return -1; } - /* Clear receive packet disable */ - cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_MISC_CONTROL); - cfg &= ~SPU_MISC_CTL_RX_DIS; - bgx_reg_write(bgx, lmacid, BGX_SPUX_MISC_CONTROL, cfg); - /* Check for MAC RX faults */ cfg = bgx_reg_read(bgx, lmacid, BGX_SMUX_RX_CTL); /* 0 - Link is okay, 1 - Local fault, 2 - Remote fault */ From 6176e89c5734e597f3e2279c8d5629a523cf6537 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Fri, 12 Aug 2016 16:34:49 +0200 Subject: [PATCH 0191/1715] net: fix up a few missing hashtable.h conflict resolutions There are a couple of leftover symbol conflicts caused by hashtable.h being included by netdevice.h; those were not caught as build failure (they're "only" a warning, but in fact real bugs). Fix those up. Fixes: e87a8f24c ("net: resolve symbol conflicts with generic hashtable.h") Reported-by: Daniel Borkmann Signed-off-by: Jiri Kosina Signed-off-by: David S. Miller --- drivers/net/ethernet/dec/tulip/de4x5.c | 4 ++-- drivers/net/ethernet/dec/tulip/de4x5.h | 4 ++-- drivers/net/ethernet/freescale/fec_main.c | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/dec/tulip/de4x5.c b/drivers/net/ethernet/dec/tulip/de4x5.c index f0e9e2ef62a0..6620fc861c47 100644 --- a/drivers/net/ethernet/dec/tulip/de4x5.c +++ b/drivers/net/ethernet/dec/tulip/de4x5.c @@ -1966,7 +1966,7 @@ SetMulticastFilter(struct net_device *dev) } else if (lp->setup_f == HASH_PERF) { /* Hash Filtering */ netdev_for_each_mc_addr(ha, dev) { crc = ether_crc_le(ETH_ALEN, ha->addr); - hashcode = crc & HASH_BITS; /* hashcode is 9 LSb of CRC */ + hashcode = crc & DE4X5_HASH_BITS; /* hashcode is 9 LSb of CRC */ byte = hashcode >> 3; /* bit[3-8] -> byte in filter */ bit = 1 << (hashcode & 0x07);/* bit[0-2] -> bit in byte */ @@ -5043,7 +5043,7 @@ build_setup_frame(struct net_device *dev, int mode) *(pa + i) = dev->dev_addr[i]; /* Host address */ if (i & 0x01) pa += 2; } - *(lp->setup_frame + (HASH_TABLE_LEN >> 3) - 3) = 0x80; + *(lp->setup_frame + (DE4X5_HASH_TABLE_LEN >> 3) - 3) = 0x80; } else { for (i=0; idev_addr[i]; diff --git a/drivers/net/ethernet/dec/tulip/de4x5.h b/drivers/net/ethernet/dec/tulip/de4x5.h index ec756eba397b..1bfdc9b117f6 100644 --- a/drivers/net/ethernet/dec/tulip/de4x5.h +++ b/drivers/net/ethernet/dec/tulip/de4x5.h @@ -860,8 +860,8 @@ #define PCI 0 #define EISA 1 -#define HASH_TABLE_LEN 512 /* Bits */ -#define HASH_BITS 0x01ff /* 9 LS bits */ +#define DE4X5_HASH_TABLE_LEN 512 /* Bits */ +#define DE4X5_HASH_BITS 0x01ff /* 9 LS bits */ #define SETUP_FRAME_LEN 192 /* Bytes */ #define IMPERF_PA_OFFSET 156 /* Bytes */ diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 01f7e811739b..fb5c63881340 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -2887,7 +2887,7 @@ fec_enet_close(struct net_device *ndev) * this kind of feature?). */ -#define HASH_BITS 6 /* #bits in hash */ +#define FEC_HASH_BITS 6 /* #bits in hash */ #define CRC32_POLY 0xEDB88320 static void set_multicast_list(struct net_device *ndev) @@ -2935,10 +2935,10 @@ static void set_multicast_list(struct net_device *ndev) } } - /* only upper 6 bits (HASH_BITS) are used + /* only upper 6 bits (FEC_HASH_BITS) are used * which point to specific bit in he hash registers */ - hash = (crc >> (32 - HASH_BITS)) & 0x3f; + hash = (crc >> (32 - FEC_HASH_BITS)) & 0x3f; if (hash > 31) { tmp = readl(fep->hwp + FEC_GRP_HASH_TABLE_HIGH); From 87ced2f9584efc476b955eb2343728fab20ff80b Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 11 Aug 2016 23:05:20 +0200 Subject: [PATCH 0192/1715] net: can: usb: ems_usb: don't print error when allocating urb fails kmalloc will print enough information in case of failure. Signed-off-by: Wolfram Sang Signed-off-by: David S. Miller --- drivers/net/can/usb/ems_usb.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c index 71f0e791355b..b3d02759c226 100644 --- a/drivers/net/can/usb/ems_usb.c +++ b/drivers/net/can/usb/ems_usb.c @@ -600,7 +600,6 @@ static int ems_usb_start(struct ems_usb *dev) /* create a URB, and a buffer for it */ urb = usb_alloc_urb(0, GFP_KERNEL); if (!urb) { - netdev_err(netdev, "No memory left for URBs\n"); err = -ENOMEM; break; } @@ -752,10 +751,8 @@ static netdev_tx_t ems_usb_start_xmit(struct sk_buff *skb, struct net_device *ne /* create a URB, and a buffer for it, and copy the data to the URB */ urb = usb_alloc_urb(0, GFP_ATOMIC); - if (!urb) { - netdev_err(netdev, "No memory left for URBs\n"); + if (!urb) goto nomem; - } buf = usb_alloc_coherent(dev->udev, size, GFP_ATOMIC, &urb->transfer_dma); if (!buf) { @@ -1007,10 +1004,8 @@ static int ems_usb_probe(struct usb_interface *intf, dev->tx_contexts[i].echo_index = MAX_TX_URBS; dev->intr_urb = usb_alloc_urb(0, GFP_KERNEL); - if (!dev->intr_urb) { - dev_err(&intf->dev, "Couldn't alloc intr URB\n"); + if (!dev->intr_urb) goto cleanup_candev; - } dev->intr_in_buffer = kzalloc(INTR_IN_BUFFER_SIZE, GFP_KERNEL); if (!dev->intr_in_buffer) From 912f85e104f4dfc46bf33e6b5eef1f72a39d0336 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 11 Aug 2016 23:05:21 +0200 Subject: [PATCH 0193/1715] net: can: usb: esd_usb2: don't print error when allocating urb fails kmalloc will print enough information in case of failure. Signed-off-by: Wolfram Sang Signed-off-by: David S. Miller --- drivers/net/can/usb/esd_usb2.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/can/usb/esd_usb2.c b/drivers/net/can/usb/esd_usb2.c index 784a9002fbb9..be928ce62d32 100644 --- a/drivers/net/can/usb/esd_usb2.c +++ b/drivers/net/can/usb/esd_usb2.c @@ -558,8 +558,6 @@ static int esd_usb2_setup_rx_urbs(struct esd_usb2 *dev) /* create a URB, and a buffer for it */ urb = usb_alloc_urb(0, GFP_KERNEL); if (!urb) { - dev_warn(dev->udev->dev.parent, - "No memory left for URBs\n"); err = -ENOMEM; break; } @@ -730,7 +728,6 @@ static netdev_tx_t esd_usb2_start_xmit(struct sk_buff *skb, /* create a URB, and a buffer for it, and copy the data to the URB */ urb = usb_alloc_urb(0, GFP_ATOMIC); if (!urb) { - netdev_err(netdev, "No memory left for URBs\n"); stats->tx_dropped++; dev_kfree_skb(skb); goto nourbmem; From 2a4a1e4013bddb60939328cc8bb99adf458d14af Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 11 Aug 2016 23:05:22 +0200 Subject: [PATCH 0194/1715] net: can: usb: gs_usb: don't print error when allocating urb fails kmalloc will print enough information in case of failure. Signed-off-by: Wolfram Sang Signed-off-by: David S. Miller --- drivers/net/can/usb/gs_usb.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index 6f0cbc38782e..77e3cc06a30c 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -493,10 +493,8 @@ static netdev_tx_t gs_can_start_xmit(struct sk_buff *skb, /* create a URB, and a buffer for it */ urb = usb_alloc_urb(0, GFP_ATOMIC); - if (!urb) { - netdev_err(netdev, "No memory left for URB\n"); + if (!urb) goto nomem_urb; - } hf = usb_alloc_coherent(dev->udev, sizeof(*hf), GFP_ATOMIC, &urb->transfer_dma); @@ -600,11 +598,8 @@ static int gs_can_open(struct net_device *netdev) /* alloc rx urb */ urb = usb_alloc_urb(0, GFP_KERNEL); - if (!urb) { - netdev_err(netdev, - "No memory left for URB\n"); + if (!urb) return -ENOMEM; - } /* alloc rx buffer */ buf = usb_alloc_coherent(dev->udev, From bc1af47df2bc52b8cfad7e455fd1cb6fbd8cd6b3 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 11 Aug 2016 23:05:23 +0200 Subject: [PATCH 0195/1715] net: can: usb: kvaser_usb: don't print error when allocating urb fails kmalloc will print enough information in case of failure. Signed-off-by: Wolfram Sang Signed-off-by: David S. Miller --- drivers/net/can/usb/kvaser_usb.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c index 6f1f3b675ff5..d51e0c401b48 100644 --- a/drivers/net/can/usb/kvaser_usb.c +++ b/drivers/net/can/usb/kvaser_usb.c @@ -787,10 +787,8 @@ static int kvaser_usb_simple_msg_async(struct kvaser_usb_net_priv *priv, int err; urb = usb_alloc_urb(0, GFP_ATOMIC); - if (!urb) { - netdev_err(netdev, "No memory left for URBs\n"); + if (!urb) return -ENOMEM; - } buf = kmalloc(sizeof(struct kvaser_msg), GFP_ATOMIC); if (!buf) { @@ -1393,8 +1391,6 @@ static int kvaser_usb_setup_rx_urbs(struct kvaser_usb *dev) urb = usb_alloc_urb(0, GFP_KERNEL); if (!urb) { - dev_warn(dev->udev->dev.parent, - "No memory left for URBs\n"); err = -ENOMEM; break; } @@ -1670,7 +1666,6 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb, urb = usb_alloc_urb(0, GFP_ATOMIC); if (!urb) { - netdev_err(netdev, "No memory left for URBs\n"); stats->tx_dropped++; dev_kfree_skb(skb); return NETDEV_TX_OK; From 2479204b256392a170ba819411b14c3da0bee1b5 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 11 Aug 2016 23:05:24 +0200 Subject: [PATCH 0196/1715] net: can: usb: peak_usb: pcan_usb_core: don't print error when allocating urb fails kmalloc will print enough information in case of failure. Signed-off-by: Wolfram Sang Signed-off-by: David S. Miller --- drivers/net/can/usb/peak_usb/pcan_usb_core.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c index bfb91d8fa460..c06382cdfdfe 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c @@ -399,7 +399,6 @@ static int peak_usb_start(struct peak_usb_device *dev) /* create a URB, and a buffer for it, to receive usb messages */ urb = usb_alloc_urb(0, GFP_KERNEL); if (!urb) { - netdev_err(netdev, "No memory left for URBs\n"); err = -ENOMEM; break; } @@ -454,7 +453,6 @@ static int peak_usb_start(struct peak_usb_device *dev) /* create a URB and a buffer for it, to transmit usb messages */ urb = usb_alloc_urb(0, GFP_KERNEL); if (!urb) { - netdev_err(netdev, "No memory left for URBs\n"); err = -ENOMEM; break; } @@ -651,10 +649,8 @@ static int peak_usb_restart(struct peak_usb_device *dev) /* first allocate a urb to handle the asynchronous steps */ urb = usb_alloc_urb(0, GFP_ATOMIC); - if (!urb) { - netdev_err(dev->netdev, "no memory left for urb\n"); + if (!urb) return -ENOMEM; - } /* also allocate enough space for the commands to send */ buf = kmalloc(PCAN_USB_MAX_CMD_LEN, GFP_ATOMIC); From 7fe7cfa43a99056ffa52aed4053693dedc3cbbbb Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 11 Aug 2016 23:05:25 +0200 Subject: [PATCH 0197/1715] net: can: usb: usb_8dev: don't print error when allocating urb fails kmalloc will print enough information in case of failure. Signed-off-by: Wolfram Sang Signed-off-by: David S. Miller --- drivers/net/can/usb/usb_8dev.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c index a731720f1d13..108a30e15097 100644 --- a/drivers/net/can/usb/usb_8dev.c +++ b/drivers/net/can/usb/usb_8dev.c @@ -623,10 +623,8 @@ static netdev_tx_t usb_8dev_start_xmit(struct sk_buff *skb, /* create a URB, and a buffer for it, and copy the data to the URB */ urb = usb_alloc_urb(0, GFP_ATOMIC); - if (!urb) { - netdev_err(netdev, "No memory left for URBs\n"); + if (!urb) goto nomem; - } buf = usb_alloc_coherent(priv->udev, size, GFP_ATOMIC, &urb->transfer_dma); @@ -748,7 +746,6 @@ static int usb_8dev_start(struct usb_8dev_priv *priv) /* create a URB, and a buffer for it */ urb = usb_alloc_urb(0, GFP_KERNEL); if (!urb) { - netdev_err(netdev, "No memory left for URBs\n"); err = -ENOMEM; break; } From 12800ea95a888f71983fd2deb69270fbd0702194 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 11 Aug 2016 23:05:26 +0200 Subject: [PATCH 0198/1715] net: usb: hso: don't print error when allocating urb fails kmalloc will print enough information in case of failure. Signed-off-by: Wolfram Sang Signed-off-by: David S. Miller --- drivers/net/usb/hso.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index 4b4458616693..c5544d36c54f 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -2300,10 +2300,8 @@ static int hso_serial_common_create(struct hso_serial *serial, int num_urbs, serial->rx_data_length = rx_size; for (i = 0; i < serial->num_rx_urbs; i++) { serial->rx_urb[i] = usb_alloc_urb(0, GFP_KERNEL); - if (!serial->rx_urb[i]) { - dev_err(dev, "Could not allocate urb?\n"); + if (!serial->rx_urb[i]) goto exit; - } serial->rx_urb[i]->transfer_buffer = NULL; serial->rx_urb[i]->transfer_buffer_length = 0; serial->rx_data[i] = kzalloc(serial->rx_data_length, @@ -2314,10 +2312,8 @@ static int hso_serial_common_create(struct hso_serial *serial, int num_urbs, /* TX, allocate urb and initialize */ serial->tx_urb = usb_alloc_urb(0, GFP_KERNEL); - if (!serial->tx_urb) { - dev_err(dev, "Could not allocate urb?\n"); + if (!serial->tx_urb) goto exit; - } serial->tx_urb->transfer_buffer = NULL; serial->tx_urb->transfer_buffer_length = 0; /* prepare our TX buffer */ @@ -2555,20 +2551,16 @@ static struct hso_device *hso_create_net_device(struct usb_interface *interface, /* start allocating */ for (i = 0; i < MUX_BULK_RX_BUF_COUNT; i++) { hso_net->mux_bulk_rx_urb_pool[i] = usb_alloc_urb(0, GFP_KERNEL); - if (!hso_net->mux_bulk_rx_urb_pool[i]) { - dev_err(&interface->dev, "Could not allocate rx urb\n"); + if (!hso_net->mux_bulk_rx_urb_pool[i]) goto exit; - } hso_net->mux_bulk_rx_buf_pool[i] = kzalloc(MUX_BULK_RX_BUF_SIZE, GFP_KERNEL); if (!hso_net->mux_bulk_rx_buf_pool[i]) goto exit; } hso_net->mux_bulk_tx_urb = usb_alloc_urb(0, GFP_KERNEL); - if (!hso_net->mux_bulk_tx_urb) { - dev_err(&interface->dev, "Could not allocate tx urb\n"); + if (!hso_net->mux_bulk_tx_urb) goto exit; - } hso_net->mux_bulk_tx_buf = kzalloc(MUX_BULK_TX_BUF_SIZE, GFP_KERNEL); if (!hso_net->mux_bulk_tx_buf) goto exit; @@ -2787,10 +2779,8 @@ struct hso_shared_int *hso_create_shared_int(struct usb_interface *interface) } mux->shared_intr_urb = usb_alloc_urb(0, GFP_KERNEL); - if (!mux->shared_intr_urb) { - dev_err(&interface->dev, "Could not allocate intr urb?\n"); + if (!mux->shared_intr_urb) goto exit; - } mux->shared_intr_buf = kzalloc(le16_to_cpu(mux->intr_endp->wMaxPacketSize), GFP_KERNEL); From d7c4e84e34813d9d2cbb9ec72fb8cbcbb71340c0 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 11 Aug 2016 23:05:27 +0200 Subject: [PATCH 0199/1715] net: usb: lan78xx: don't print error when allocating urb fails kmalloc will print enough information in case of failure. Signed-off-by: Wolfram Sang Acked-by: Woojung Huh Signed-off-by: David S. Miller --- drivers/net/usb/lan78xx.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 6a9d474b08b2..432b8a3ae354 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -3002,10 +3002,8 @@ static void lan78xx_tx_bh(struct lan78xx_net *dev) gso_skb: urb = usb_alloc_urb(0, GFP_ATOMIC); - if (!urb) { - netif_dbg(dev, tx_err, dev->net, "no urb\n"); + if (!urb) goto drop; - } entry = (struct skb_data *)skb->cb; entry->urb = urb; From ef6cd1301e06e0a5de24938b92c5859d5021ea5d Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 11 Aug 2016 23:05:28 +0200 Subject: [PATCH 0200/1715] net: usb: usbnet: don't print error when allocating urb fails kmalloc will print enough information in case of failure. Signed-off-by: Wolfram Sang Signed-off-by: David S. Miller --- drivers/net/usb/usbnet.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 3bfb59209326..d5071e364d40 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -2062,11 +2062,8 @@ int usbnet_write_cmd_async(struct usbnet *dev, u8 cmd, u8 reqtype, cmd, reqtype, value, index, size); urb = usb_alloc_urb(0, GFP_ATOMIC); - if (!urb) { - netdev_err(dev->net, "Error allocating URB in" - " %s!\n", __func__); + if (!urb) goto fail; - } if (data) { buf = kmemdup(data, size, GFP_ATOMIC); From aa38b885e52149cd0726e09c857b65740824d755 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 11 Aug 2016 23:05:29 +0200 Subject: [PATCH 0201/1715] net: wimax: i2400m: usb-notif: don't print error when allocating urb fails kmalloc will print enough information in case of failure. Signed-off-by: Wolfram Sang Signed-off-by: David S. Miller --- drivers/net/wimax/i2400m/usb-notif.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/wimax/i2400m/usb-notif.c b/drivers/net/wimax/i2400m/usb-notif.c index fc1355d98bc6..5d429f816125 100644 --- a/drivers/net/wimax/i2400m/usb-notif.c +++ b/drivers/net/wimax/i2400m/usb-notif.c @@ -206,7 +206,6 @@ int i2400mu_notification_setup(struct i2400mu *i2400mu) i2400mu->notif_urb = usb_alloc_urb(0, GFP_KERNEL); if (!i2400mu->notif_urb) { ret = -ENOMEM; - dev_err(dev, "notification: cannot allocate URB\n"); goto error_alloc_urb; } epd = usb_get_epd(i2400mu->usb_iface, From 71c4c616eced093d90d37fec06b8e965a864eaa4 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 11 Aug 2016 23:05:30 +0200 Subject: [PATCH 0202/1715] net: wireless: ath: ar5523: ar5523: don't print error when allocating urb fails kmalloc will print enough information in case of failure. Signed-off-by: Wolfram Sang Signed-off-by: David S. Miller --- drivers/net/wireless/ath/ar5523/ar5523.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/ath/ar5523/ar5523.c b/drivers/net/wireless/ath/ar5523/ar5523.c index 8aded24bcdf4..7a60d2e652da 100644 --- a/drivers/net/wireless/ath/ar5523/ar5523.c +++ b/drivers/net/wireless/ath/ar5523/ar5523.c @@ -706,10 +706,8 @@ static int ar5523_alloc_rx_bufs(struct ar5523 *ar) data->ar = ar; data->urb = usb_alloc_urb(0, GFP_KERNEL); - if (!data->urb) { - ar5523_err(ar, "could not allocate rx data urb\n"); + if (!data->urb) goto err; - } list_add_tail(&data->list, &ar->rx_data_free); atomic_inc(&ar->rx_data_free_cnt); } @@ -824,7 +822,6 @@ static void ar5523_tx_work_locked(struct ar5523 *ar) urb = usb_alloc_urb(0, GFP_KERNEL); if (!urb) { - ar5523_err(ar, "Failed to allocate TX urb\n"); ieee80211_free_txskb(ar->hw, skb); continue; } @@ -949,10 +946,8 @@ static int ar5523_alloc_tx_cmd(struct ar5523 *ar) init_completion(&cmd->done); cmd->urb_tx = usb_alloc_urb(0, GFP_KERNEL); - if (!cmd->urb_tx) { - ar5523_err(ar, "could not allocate urb\n"); + if (!cmd->urb_tx) return -ENOMEM; - } cmd->buf_tx = usb_alloc_coherent(ar->dev, AR5523_MAX_TXCMDSZ, GFP_KERNEL, &cmd->urb_tx->transfer_dma); From 938f89e50a41c2d56710805fb019ad7618cef84b Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 11 Aug 2016 23:05:31 +0200 Subject: [PATCH 0203/1715] net: wireless: broadcom: brcm80211: brcmfmac: usb: don't print error when allocating urb fails kmalloc will print enough information in case of failure. Signed-off-by: Wolfram Sang Signed-off-by: David S. Miller --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c index 98b15a9a2779..fa26619a7945 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c @@ -1099,15 +1099,11 @@ struct brcmf_usbdev *brcmf_usb_attach(struct brcmf_usbdev_info *devinfo, devinfo->tx_freecount = ntxq; devinfo->ctl_urb = usb_alloc_urb(0, GFP_ATOMIC); - if (!devinfo->ctl_urb) { - brcmf_err("usb_alloc_urb (ctl) failed\n"); + if (!devinfo->ctl_urb) goto error; - } devinfo->bulk_urb = usb_alloc_urb(0, GFP_ATOMIC); - if (!devinfo->bulk_urb) { - brcmf_err("usb_alloc_urb (bulk) failed\n"); + if (!devinfo->bulk_urb) goto error; - } return &devinfo->bus_pub; From 3de6e8852f55b6607ae2f5638a2a755478cee98a Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 11 Aug 2016 23:13:05 +0200 Subject: [PATCH 0204/1715] net: wireless: intersil: orinoco: orinoco_usb: don't print error when allocating urb fails kmalloc will print enough information in case of failure. Signed-off-by: Wolfram Sang Signed-off-by: David S. Miller --- drivers/net/wireless/intersil/orinoco/orinoco_usb.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c index 56f109bc8394..bca6935a94db 100644 --- a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c +++ b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c @@ -1613,10 +1613,8 @@ static int ezusb_probe(struct usb_interface *interface, } upriv->read_urb = usb_alloc_urb(0, GFP_KERNEL); - if (!upriv->read_urb) { - err("No free urbs available"); + if (!upriv->read_urb) goto error; - } if (le16_to_cpu(ep->wMaxPacketSize) != 64) pr_warn("bulk in: wMaxPacketSize!= 64\n"); if (ep->bEndpointAddress != (2 | USB_DIR_IN)) From da8794ce8e776d82c7ef417eafb14c749d2c1f37 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 11 Aug 2016 23:13:06 +0200 Subject: [PATCH 0205/1715] net: wireless: marvell: libertas_tf: if_usb: don't print error when allocating urb fails kmalloc will print enough information in case of failure. Signed-off-by: Wolfram Sang Signed-off-by: David S. Miller --- drivers/net/wireless/marvell/libertas_tf/if_usb.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/marvell/libertas_tf/if_usb.c b/drivers/net/wireless/marvell/libertas_tf/if_usb.c index 799a2efe5793..e0ade40d9497 100644 --- a/drivers/net/wireless/marvell/libertas_tf/if_usb.c +++ b/drivers/net/wireless/marvell/libertas_tf/if_usb.c @@ -198,22 +198,16 @@ static int if_usb_probe(struct usb_interface *intf, } cardp->rx_urb = usb_alloc_urb(0, GFP_KERNEL); - if (!cardp->rx_urb) { - lbtf_deb_usbd(&udev->dev, "Rx URB allocation failed\n"); + if (!cardp->rx_urb) goto dealloc; - } cardp->tx_urb = usb_alloc_urb(0, GFP_KERNEL); - if (!cardp->tx_urb) { - lbtf_deb_usbd(&udev->dev, "Tx URB allocation failed\n"); + if (!cardp->tx_urb) goto dealloc; - } cardp->cmd_urb = usb_alloc_urb(0, GFP_KERNEL); - if (!cardp->cmd_urb) { - lbtf_deb_usbd(&udev->dev, "Cmd URB allocation failed\n"); + if (!cardp->cmd_urb) goto dealloc; - } cardp->ep_out_buf = kmalloc(MRVDRV_ETH_TX_PACKET_BUFFER_SIZE, GFP_KERNEL); From dbea99d6d95db68df004b2a0341418338e9b9130 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 11 Aug 2016 23:13:07 +0200 Subject: [PATCH 0206/1715] net: wireless: marvell: mwifiex: usb: don't print error when allocating urb fails kmalloc will print enough information in case of failure. Signed-off-by: Wolfram Sang Signed-off-by: David S. Miller --- drivers/net/wireless/marvell/mwifiex/usb.c | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/usb.c b/drivers/net/wireless/marvell/mwifiex/usb.c index 0857575c5c39..3bd04f52f369 100644 --- a/drivers/net/wireless/marvell/mwifiex/usb.c +++ b/drivers/net/wireless/marvell/mwifiex/usb.c @@ -657,11 +657,8 @@ static int mwifiex_usb_tx_init(struct mwifiex_adapter *adapter) card->tx_cmd.ep = card->tx_cmd_ep; card->tx_cmd.urb = usb_alloc_urb(0, GFP_KERNEL); - if (!card->tx_cmd.urb) { - mwifiex_dbg(adapter, ERROR, - "tx_cmd.urb allocation failed\n"); + if (!card->tx_cmd.urb) return -ENOMEM; - } for (i = 0; i < MWIFIEX_TX_DATA_PORT; i++) { port = &card->port[i]; @@ -677,11 +674,8 @@ static int mwifiex_usb_tx_init(struct mwifiex_adapter *adapter) port->tx_data_list[j].ep = port->tx_data_ep; port->tx_data_list[j].urb = usb_alloc_urb(0, GFP_KERNEL); - if (!port->tx_data_list[j].urb) { - mwifiex_dbg(adapter, ERROR, - "urb allocation failed\n"); + if (!port->tx_data_list[j].urb) return -ENOMEM; - } } } @@ -697,10 +691,8 @@ static int mwifiex_usb_rx_init(struct mwifiex_adapter *adapter) card->rx_cmd.ep = card->rx_cmd_ep; card->rx_cmd.urb = usb_alloc_urb(0, GFP_KERNEL); - if (!card->rx_cmd.urb) { - mwifiex_dbg(adapter, ERROR, "rx_cmd.urb allocation failed\n"); + if (!card->rx_cmd.urb) return -ENOMEM; - } card->rx_cmd.skb = dev_alloc_skb(MWIFIEX_RX_CMD_BUF_SIZE); if (!card->rx_cmd.skb) @@ -714,11 +706,8 @@ static int mwifiex_usb_rx_init(struct mwifiex_adapter *adapter) card->rx_data_list[i].ep = card->rx_data_ep; card->rx_data_list[i].urb = usb_alloc_urb(0, GFP_KERNEL); - if (!card->rx_data_list[i].urb) { - mwifiex_dbg(adapter, ERROR, - "rx_data_list[] urb allocation failed\n"); + if (!card->rx_data_list[i].urb) return -1; - } if (mwifiex_usb_submit_rx_urb(&card->rx_data_list[i], MWIFIEX_RX_DATA_BUF_SIZE)) return -1; From eb36333896f8e380f7357bdbc1a7dc28b33c6a96 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 11 Aug 2016 23:13:08 +0200 Subject: [PATCH 0207/1715] net: wireless: realtek: rtlwifi: usb: don't print error when allocating urb fails kmalloc will print enough information in case of failure. Signed-off-by: Wolfram Sang Acked-by: Larry Finger Signed-off-by: David S. Miller --- drivers/net/wireless/realtek/rtlwifi/usb.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/usb.c b/drivers/net/wireless/realtek/rtlwifi/usb.c index 41617b7b0822..32aa5c1d070a 100644 --- a/drivers/net/wireless/realtek/rtlwifi/usb.c +++ b/drivers/net/wireless/realtek/rtlwifi/usb.c @@ -739,11 +739,8 @@ static int _rtl_usb_receive(struct ieee80211_hw *hw) for (i = 0; i < rtlusb->rx_urb_num; i++) { err = -ENOMEM; urb = usb_alloc_urb(0, GFP_KERNEL); - if (!urb) { - RT_TRACE(rtlpriv, COMP_USB, DBG_EMERG, - "Failed to alloc URB!!\n"); + if (!urb) goto err_out; - } err = _rtl_prep_rx_urb(hw, rtlusb, urb, GFP_KERNEL); if (err < 0) { @@ -907,15 +904,12 @@ static void _rtl_tx_complete(struct urb *urb) static struct urb *_rtl_usb_tx_urb_setup(struct ieee80211_hw *hw, struct sk_buff *skb, u32 ep_num) { - struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_usb *rtlusb = rtl_usbdev(rtl_usbpriv(hw)); struct urb *_urb; WARN_ON(NULL == skb); _urb = usb_alloc_urb(0, GFP_ATOMIC); if (!_urb) { - RT_TRACE(rtlpriv, COMP_USB, DBG_EMERG, - "Can't allocate URB for bulk out!\n"); kfree_skb(skb); return NULL; } From 1e10f3fbfb1c351768576b756c60f83922b65e5e Mon Sep 17 00:00:00 2001 From: LABBE Corentin Date: Fri, 12 Aug 2016 14:58:02 +0200 Subject: [PATCH 0208/1715] net: bfin_mac: Fix a few spelling fixes This patch respell some word badly spelled. - Invidate instead of Invalidate - proble instead of probe Signed-off-by: LABBE Corentin Signed-off-by: David S. Miller --- drivers/net/ethernet/adi/bfin_mac.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/adi/bfin_mac.c b/drivers/net/ethernet/adi/bfin_mac.c index 38eaea18da23..00f9ee3fc3e5 100644 --- a/drivers/net/ethernet/adi/bfin_mac.c +++ b/drivers/net/ethernet/adi/bfin_mac.c @@ -192,8 +192,8 @@ static int desc_list_init(struct net_device *dev) goto init_error; skb_reserve(new_skb, NET_IP_ALIGN); - /* Invidate the data cache of skb->data range when it is write back - * cache. It will prevent overwritting the new data from DMA + /* Invalidate the data cache of skb->data range when it is write back + * cache. It will prevent overwriting the new data from DMA */ blackfin_dcache_invalidate_range((unsigned long)new_skb->head, (unsigned long)new_skb->end); @@ -1205,7 +1205,7 @@ static void bfin_mac_rx(struct bfin_mac_local *lp) } /* reserve 2 bytes for RXDWA padding */ skb_reserve(new_skb, NET_IP_ALIGN); - /* Invidate the data cache of skb->data range when it is write back + /* Invalidate the data cache of skb->data range when it is write back * cache. It will prevent overwritting the new data from DMA */ blackfin_dcache_invalidate_range((unsigned long)new_skb->head, @@ -1599,7 +1599,7 @@ static int bfin_mac_probe(struct platform_device *pdev) *(__le16 *) (&(ndev->dev_addr[4])) = cpu_to_le16((u16) bfin_read_EMAC_ADDRHI()); /* probe mac */ - /*todo: how to proble? which is revision_register */ + /*todo: how to probe? which is revision_register */ bfin_write_EMAC_ADDRLO(0x12345678); if (bfin_read_EMAC_ADDRLO() != 0x12345678) { dev_err(&pdev->dev, "Cannot detect Blackfin on-chip ethernet MAC controller!\n"); From d16d9d2ad778e8247617c10703dfd749c776f242 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 12 Aug 2016 21:29:24 +0100 Subject: [PATCH 0209/1715] net: phy: initialize rc to zero to avoid returning garbage value In the case where phydev->interrupts is not PHY_INTERRUPT_ENABLED function vsc85xx_ack_interrupt is returning an uninitialized garbage value. Fix this by initializing rc to zero. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/phy/mscc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/mscc.c b/drivers/net/phy/mscc.c index 2e1de53e3548..ad33390b382a 100644 --- a/drivers/net/phy/mscc.c +++ b/drivers/net/phy/mscc.c @@ -84,7 +84,7 @@ static int vsc85xx_config_init(struct phy_device *phydev) static int vsc85xx_ack_interrupt(struct phy_device *phydev) { - int rc; + int rc = 0; if (phydev->interrupts == PHY_INTERRUPT_ENABLED) rc = phy_read(phydev, MII_VSC85XX_INT_STATUS); From 04ed5ad5db6880d53dd1bb8c93e82228a462a4dd Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Sun, 17 Jul 2016 01:28:47 +0300 Subject: [PATCH 0210/1715] net/mlx5: Init/Teardown hca commands via mlx5 ifc Remove old representation of manually created Init/Teardown hca commands layout and use mlx5_ifc canonical structures and defines. Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/fw.c | 36 ++++++-------------- include/linux/mlx5/device.h | 24 ------------- 2 files changed, 10 insertions(+), 50 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index 77fc1aa26114..56bf520d1429 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -162,38 +162,22 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) int mlx5_cmd_init_hca(struct mlx5_core_dev *dev) { - struct mlx5_cmd_init_hca_mbox_in in; - struct mlx5_cmd_init_hca_mbox_out out; + u32 out[MLX5_ST_SZ_DW(init_hca_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(init_hca_in)] = {0}; int err; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_INIT_HCA); - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); - if (err) - return err; - - if (out.hdr.status) - err = mlx5_cmd_status_to_err(&out.hdr); - - return err; + MLX5_SET(init_hca_in, in, opcode, MLX5_CMD_OP_INIT_HCA); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + return err ? : mlx5_cmd_status_to_err_v2(out); } int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev) { - struct mlx5_cmd_teardown_hca_mbox_in in; - struct mlx5_cmd_teardown_hca_mbox_out out; + u32 out[MLX5_ST_SZ_DW(teardown_hca_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(teardown_hca_in)] = {0}; int err; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_TEARDOWN_HCA); - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); - if (err) - return err; - - if (out.hdr.status) - err = mlx5_cmd_status_to_err(&out.hdr); - - return err; + MLX5_SET(teardown_hca_in, in, opcode, MLX5_CMD_OP_TEARDOWN_HCA); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + return err ? : mlx5_cmd_status_to_err_v2(out); } diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 0b6d15cddb2f..6c343c0b77d2 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -455,30 +455,6 @@ struct mlx5_odp_caps { char reserved2[0xe4]; }; -struct mlx5_cmd_init_hca_mbox_in { - struct mlx5_inbox_hdr hdr; - u8 rsvd0[2]; - __be16 profile; - u8 rsvd1[4]; -}; - -struct mlx5_cmd_init_hca_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_cmd_teardown_hca_mbox_in { - struct mlx5_inbox_hdr hdr; - u8 rsvd0[2]; - __be16 profile; - u8 rsvd1[4]; -}; - -struct mlx5_cmd_teardown_hca_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - struct mlx5_cmd_layout { u8 type; u8 rsvd0[3]; From 20ed51c643b6296789a48adc3bc2cc875a1612cf Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Sun, 17 Jul 2016 00:46:41 +0300 Subject: [PATCH 0211/1715] net/mlx5: Access register and MAD IFC commands via mlx5 ifc Remove old representation of manually created ACCESS_REG/MAD_IFC commands layout and use mlx5_ifc canonical structures and defines. Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/mad.c | 42 ++++++++-------- .../net/ethernet/mellanox/mlx5/core/port.c | 48 +++++++++---------- include/linux/mlx5/device.h | 29 ----------- 3 files changed, 43 insertions(+), 76 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mad.c b/drivers/net/ethernet/mellanox/mlx5/core/mad.c index 1368dac00da0..13e6afd52a9b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mad.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mad.c @@ -39,36 +39,34 @@ int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, u16 opmod, u8 port) { - struct mlx5_mad_ifc_mbox_in *in = NULL; - struct mlx5_mad_ifc_mbox_out *out = NULL; - int err; + int outlen = MLX5_ST_SZ_BYTES(mad_ifc_out); + int inlen = MLX5_ST_SZ_BYTES(mad_ifc_in); + int err = -ENOMEM; + void *data; + void *resp; + u32 *out; + u32 *in; - in = kzalloc(sizeof(*in), GFP_KERNEL); - if (!in) - return -ENOMEM; - - out = kzalloc(sizeof(*out), GFP_KERNEL); - if (!out) { - err = -ENOMEM; + in = kzalloc(inlen, GFP_KERNEL); + out = kzalloc(outlen, GFP_KERNEL); + if (!in || !out) goto out; - } - in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MAD_IFC); - in->hdr.opmod = cpu_to_be16(opmod); - in->port = port; + MLX5_SET(mad_ifc_in, in, opcode, MLX5_CMD_OP_MAD_IFC); + MLX5_SET(mad_ifc_in, in, op_mod, opmod); + MLX5_SET(mad_ifc_in, in, port, port); - memcpy(in->data, inb, sizeof(in->data)); + data = MLX5_ADDR_OF(mad_ifc_in, in, mad); + memcpy(data, inb, MLX5_FLD_SZ_BYTES(mad_ifc_in, mad)); - err = mlx5_cmd_exec(dev, in, sizeof(*in), out, sizeof(*out)); + err = mlx5_cmd_exec(dev, in, inlen, out, outlen); + err = err ? : mlx5_cmd_status_to_err_v2(out); if (err) goto out; - if (out->hdr.status) { - err = mlx5_cmd_status_to_err(&out->hdr); - goto out; - } - - memcpy(outb, out->data, sizeof(out->data)); + resp = MLX5_ADDR_OF(mad_ifc_out, out, response_mad_packet); + memcpy(outb, resp, + MLX5_FLD_SZ_BYTES(mad_ifc_out, response_mad_packet)); out: kfree(out); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 752c08127138..e8324c2a8fc3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -38,45 +38,43 @@ int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, int size_in, void *data_out, int size_out, - u16 reg_num, int arg, int write) + u16 reg_id, int arg, int write) { - struct mlx5_access_reg_mbox_in *in = NULL; - struct mlx5_access_reg_mbox_out *out = NULL; + int outlen = MLX5_ST_SZ_BYTES(access_register_out) + size_out; + int inlen = MLX5_ST_SZ_BYTES(access_register_in) + size_in; int err = -ENOMEM; + u32 *out = NULL; + u32 *in = NULL; + void *data; - in = mlx5_vzalloc(sizeof(*in) + size_in); - if (!in) - return -ENOMEM; + in = mlx5_vzalloc(inlen); + out = mlx5_vzalloc(outlen); + if (!in || !out) + goto out; - out = mlx5_vzalloc(sizeof(*out) + size_out); - if (!out) - goto ex1; + data = MLX5_ADDR_OF(access_register_in, in, register_data); + memcpy(data, data_in, size_in); - memcpy(in->data, data_in, size_in); - in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ACCESS_REG); - in->hdr.opmod = cpu_to_be16(!write); - in->arg = cpu_to_be32(arg); - in->register_id = cpu_to_be16(reg_num); - err = mlx5_cmd_exec(dev, in, sizeof(*in) + size_in, out, - sizeof(*out) + size_out); + MLX5_SET(access_register_in, in, opcode, MLX5_CMD_OP_ACCESS_REG); + MLX5_SET(access_register_in, in, op_mod, !write); + MLX5_SET(access_register_in, in, argument, arg); + MLX5_SET(access_register_in, in, register_id, reg_id); + + err = mlx5_cmd_exec(dev, in, inlen, out, outlen); + err = err ? : mlx5_cmd_status_to_err_v2(out); if (err) - goto ex2; + goto out; - if (out->hdr.status) - err = mlx5_cmd_status_to_err(&out->hdr); + data = MLX5_ADDR_OF(access_register_out, out, register_data); + memcpy(data_out, data, size_out); - if (!err) - memcpy(data_out, out->data, size_out); - -ex2: +out: kvfree(out); -ex1: kvfree(in); return err; } EXPORT_SYMBOL_GPL(mlx5_core_access_reg); - struct mlx5_reg_pcap { u8 rsvd0; u8 port_num; diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 6c343c0b77d2..9570c493b50f 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1165,35 +1165,6 @@ struct mlx5_dump_mkey_mbox_out { __be32 mkey; }; -struct mlx5_mad_ifc_mbox_in { - struct mlx5_inbox_hdr hdr; - __be16 remote_lid; - u8 rsvd0; - u8 port; - u8 rsvd1[4]; - u8 data[256]; -}; - -struct mlx5_mad_ifc_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; - u8 data[256]; -}; - -struct mlx5_access_reg_mbox_in { - struct mlx5_inbox_hdr hdr; - u8 rsvd0[2]; - __be16 register_id; - __be32 arg; - __be32 data[0]; -}; - -struct mlx5_access_reg_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; - __be32 data[0]; -}; - #define MLX5_ATTR_EXTENDED_PORT_INFO cpu_to_be16(0xff90) enum { From 732ef5ad8ff6e2fb9a801c0e1313d5a5d85ed98f Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Sun, 17 Jul 2016 01:44:57 +0300 Subject: [PATCH 0212/1715] net/mlx5: PD and UAR commands via mlx5 ifc Remove old representation of manually created PD/UAR commands layouts and use mlx5_ifc canonical structures and defines. Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/pd.c | 58 ++++------------ drivers/net/ethernet/mellanox/mlx5/core/uar.c | 66 ++++--------------- 2 files changed, 25 insertions(+), 99 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pd.c b/drivers/net/ethernet/mellanox/mlx5/core/pd.c index f2d3aee909e8..efe452c30a8d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pd.c @@ -36,66 +36,32 @@ #include #include "mlx5_core.h" -struct mlx5_alloc_pd_mbox_in { - struct mlx5_inbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_alloc_pd_mbox_out { - struct mlx5_outbox_hdr hdr; - __be32 pdn; - u8 rsvd[4]; -}; - -struct mlx5_dealloc_pd_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 pdn; - u8 rsvd[4]; -}; - -struct mlx5_dealloc_pd_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn) { - struct mlx5_alloc_pd_mbox_in in; - struct mlx5_alloc_pd_mbox_out out; + u32 out[MLX5_ST_SZ_DW(alloc_pd_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(alloc_pd_in)] = {0}; int err; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ALLOC_PD); - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + MLX5_SET(alloc_pd_in, in, opcode, MLX5_CMD_OP_ALLOC_PD); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + err = err ? : mlx5_cmd_status_to_err_v2(out); if (err) return err; - if (out.hdr.status) - return mlx5_cmd_status_to_err(&out.hdr); - - *pdn = be32_to_cpu(out.pdn) & 0xffffff; + *pdn = MLX5_GET(alloc_pd_out, out, pd); return err; } EXPORT_SYMBOL(mlx5_core_alloc_pd); int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn) { - struct mlx5_dealloc_pd_mbox_in in; - struct mlx5_dealloc_pd_mbox_out out; + u32 out[MLX5_ST_SZ_DW(dealloc_pd_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(dealloc_pd_in)] = {0}; int err; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DEALLOC_PD); - in.pdn = cpu_to_be32(pdn); - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); - if (err) - return err; - - if (out.hdr.status) - return mlx5_cmd_status_to_err(&out.hdr); - - return err; + MLX5_SET(dealloc_pd_in, in, opcode, MLX5_CMD_OP_DEALLOC_PD); + MLX5_SET(dealloc_pd_in, in, pd, pdn); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + return err ? : mlx5_cmd_status_to_err_v2(out); } EXPORT_SYMBOL(mlx5_core_dealloc_pd); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c index 5ff8af472bf5..d0a0e0bc77e4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/uar.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c @@ -42,73 +42,33 @@ enum { NUM_LOW_LAT_UUARS = 4, }; - -struct mlx5_alloc_uar_mbox_in { - struct mlx5_inbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_alloc_uar_mbox_out { - struct mlx5_outbox_hdr hdr; - __be32 uarn; - u8 rsvd[4]; -}; - -struct mlx5_free_uar_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 uarn; - u8 rsvd[4]; -}; - -struct mlx5_free_uar_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn) { - struct mlx5_alloc_uar_mbox_in in; - struct mlx5_alloc_uar_mbox_out out; + u32 out[MLX5_ST_SZ_DW(alloc_uar_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(alloc_uar_in)] = {0}; int err; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ALLOC_UAR); - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + MLX5_SET(alloc_uar_in, in, opcode, MLX5_CMD_OP_ALLOC_UAR); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + err = err ? : mlx5_cmd_status_to_err_v2(out); if (err) - goto ex; + return err; - if (out.hdr.status) { - err = mlx5_cmd_status_to_err(&out.hdr); - goto ex; - } - - *uarn = be32_to_cpu(out.uarn) & 0xffffff; - -ex: + *uarn = MLX5_GET(alloc_uar_out, out, uar); return err; } EXPORT_SYMBOL(mlx5_cmd_alloc_uar); int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn) { - struct mlx5_free_uar_mbox_in in; - struct mlx5_free_uar_mbox_out out; + u32 out[MLX5_ST_SZ_DW(dealloc_uar_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(dealloc_uar_in)] = {0}; int err; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DEALLOC_UAR); - in.uarn = cpu_to_be32(uarn); - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); - if (err) - goto ex; - - if (out.hdr.status) - err = mlx5_cmd_status_to_err(&out.hdr); - -ex: - return err; + MLX5_SET(dealloc_uar_in, in, opcode, MLX5_CMD_OP_DEALLOC_UAR); + MLX5_SET(dealloc_uar_in, in, uar, uarn); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + return err ? : mlx5_cmd_status_to_err_v2(out); } EXPORT_SYMBOL(mlx5_cmd_free_uar); From 20bb566bda7b3e62b67dbb1bd363be40b5ae81c3 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Sun, 17 Jul 2016 02:01:45 +0300 Subject: [PATCH 0213/1715] net/mlx5: MCG commands via mlx5 ifc Remove old representation of manually created MCG commands layout and use mlx5_ifc canonical structures and defines. Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 4 +- drivers/net/ethernet/mellanox/mlx5/core/mcg.c | 70 +++++-------------- include/linux/mlx5/mlx5_ifc.h | 2 +- 3 files changed, 21 insertions(+), 55 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index d6e2a1cae19a..0d55e0f33f53 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -280,7 +280,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_DEALLOC_Q_COUNTER: case MLX5_CMD_OP_DEALLOC_PD: case MLX5_CMD_OP_DEALLOC_UAR: - case MLX5_CMD_OP_DETTACH_FROM_MCG: + case MLX5_CMD_OP_DETACH_FROM_MCG: case MLX5_CMD_OP_DEALLOC_XRCD: case MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN: case MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT: @@ -490,7 +490,7 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(CONFIG_INT_MODERATION); MLX5_COMMAND_STR_CASE(ACCESS_REG); MLX5_COMMAND_STR_CASE(ATTACH_TO_MCG); - MLX5_COMMAND_STR_CASE(DETTACH_FROM_MCG); + MLX5_COMMAND_STR_CASE(DETACH_FROM_MCG); MLX5_COMMAND_STR_CASE(GET_DROPPED_PACKET_LOG); MLX5_COMMAND_STR_CASE(MAD_IFC); MLX5_COMMAND_STR_CASE(QUERY_MAD_DEMUX); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mcg.c b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c index d5a0c2d61a18..01a1abd88203 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c @@ -37,70 +37,36 @@ #include #include "mlx5_core.h" -struct mlx5_attach_mcg_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 qpn; - __be32 rsvd; - u8 gid[16]; -}; - -struct mlx5_attach_mcg_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvf[8]; -}; - -struct mlx5_detach_mcg_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 qpn; - __be32 rsvd; - u8 gid[16]; -}; - -struct mlx5_detach_mcg_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvf[8]; -}; - int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn) { - struct mlx5_attach_mcg_mbox_in in; - struct mlx5_attach_mcg_mbox_out out; + u32 out[MLX5_ST_SZ_DW(attach_to_mcg_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(attach_to_mcg_in)] = {0}; + void *gid; int err; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ATTACH_TO_MCG); - memcpy(in.gid, mgid, sizeof(*mgid)); - in.qpn = cpu_to_be32(qpn); - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); - if (err) - return err; + MLX5_SET(attach_to_mcg_in, in, opcode, MLX5_CMD_OP_ATTACH_TO_MCG); + MLX5_SET(attach_to_mcg_in, in, qpn, qpn); + gid = MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid); + memcpy(gid, mgid, sizeof(*mgid)); - if (out.hdr.status) - err = mlx5_cmd_status_to_err(&out.hdr); - - return err; + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + return err ? : mlx5_cmd_status_to_err_v2(out); } EXPORT_SYMBOL(mlx5_core_attach_mcg); int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn) { - struct mlx5_detach_mcg_mbox_in in; - struct mlx5_detach_mcg_mbox_out out; + u32 out[MLX5_ST_SZ_DW(detach_from_mcg_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(detach_from_mcg_in)] = {0}; + void *gid; int err; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DETTACH_FROM_MCG); - memcpy(in.gid, mgid, sizeof(*mgid)); - in.qpn = cpu_to_be32(qpn); - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); - if (err) - return err; + MLX5_SET(detach_from_mcg_in, in, opcode, MLX5_CMD_OP_DETACH_FROM_MCG); + MLX5_SET(detach_from_mcg_in, in, qpn, qpn); + gid = MLX5_ADDR_OF(detach_from_mcg_in, in, multicast_gid); + memcpy(gid, mgid, sizeof(*mgid)); - if (out.hdr.status) - err = mlx5_cmd_status_to_err(&out.hdr); - - return err; + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + return err ? : mlx5_cmd_status_to_err_v2(out); } EXPORT_SYMBOL(mlx5_core_detach_mcg); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 21bc4557b67a..3f70fc9c2fc9 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -152,7 +152,7 @@ enum { MLX5_CMD_OP_CONFIG_INT_MODERATION = 0x804, MLX5_CMD_OP_ACCESS_REG = 0x805, MLX5_CMD_OP_ATTACH_TO_MCG = 0x806, - MLX5_CMD_OP_DETTACH_FROM_MCG = 0x807, + MLX5_CMD_OP_DETACH_FROM_MCG = 0x807, MLX5_CMD_OP_GET_DROPPED_PACKET_LOG = 0x80a, MLX5_CMD_OP_MAD_IFC = 0x50d, MLX5_CMD_OP_QUERY_MAD_DEMUX = 0x80b, From a533ed5e179cd15512d40282617909d3482a771c Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Sun, 17 Jul 2016 13:27:25 +0300 Subject: [PATCH 0214/1715] net/mlx5: Pages management commands via mlx5 ifc Remove old representation of manually created Pages management commands layout, and use mlx5_ifc canonical structures and defines. Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- .../ethernet/mellanox/mlx5/core/pagealloc.c | 165 ++++++------------ 1 file changed, 58 insertions(+), 107 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index 32dea3524cee..7bfac21fbfee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -44,12 +44,6 @@ enum { MLX5_PAGES_TAKE = 2 }; -enum { - MLX5_BOOT_PAGES = 1, - MLX5_INIT_PAGES = 2, - MLX5_POST_INIT_PAGES = 3 -}; - struct mlx5_pages_req { struct mlx5_core_dev *dev; u16 func_id; @@ -67,33 +61,6 @@ struct fw_page { unsigned free_count; }; -struct mlx5_query_pages_inbox { - struct mlx5_inbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_query_pages_outbox { - struct mlx5_outbox_hdr hdr; - __be16 rsvd; - __be16 func_id; - __be32 num_pages; -}; - -struct mlx5_manage_pages_inbox { - struct mlx5_inbox_hdr hdr; - __be16 rsvd; - __be16 func_id; - __be32 num_entries; - __be64 pas[0]; -}; - -struct mlx5_manage_pages_outbox { - struct mlx5_outbox_hdr hdr; - __be32 num_entries; - u8 rsvd[4]; - __be64 pas[0]; -}; - enum { MAX_RECLAIM_TIME_MSECS = 5000, MAX_RECLAIM_VFS_PAGES_TIME_MSECS = 2 * 1000 * 60, @@ -167,24 +134,22 @@ static struct fw_page *find_fw_page(struct mlx5_core_dev *dev, u64 addr) static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id, s32 *npages, int boot) { - struct mlx5_query_pages_inbox in; - struct mlx5_query_pages_outbox out; + u32 out[MLX5_ST_SZ_DW(query_pages_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(query_pages_in)] = {0}; int err; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_PAGES); - in.hdr.opmod = boot ? cpu_to_be16(MLX5_BOOT_PAGES) : cpu_to_be16(MLX5_INIT_PAGES); + MLX5_SET(query_pages_in, in, opcode, MLX5_CMD_OP_QUERY_PAGES); + MLX5_SET(query_pages_in, in, op_mod, boot ? + MLX5_QUERY_PAGES_IN_OP_MOD_BOOT_PAGES : + MLX5_QUERY_PAGES_IN_OP_MOD_INIT_PAGES); - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + err = err ? : mlx5_cmd_status_to_err_v2(out); if (err) return err; - if (out.hdr.status) - return mlx5_cmd_status_to_err(&out.hdr); - - *npages = be32_to_cpu(out.num_pages); - *func_id = be16_to_cpu(out.func_id); + *npages = MLX5_GET(query_pages_out, out, num_pages); + *func_id = MLX5_GET(query_pages_out, out, function_id); return err; } @@ -280,46 +245,37 @@ out_alloc: static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id) { - struct mlx5_manage_pages_inbox *in; - struct mlx5_manage_pages_outbox out; + u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {0}; int err; - in = kzalloc(sizeof(*in), GFP_KERNEL); - if (!in) - return; - - memset(&out, 0, sizeof(out)); - in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES); - in->hdr.opmod = cpu_to_be16(MLX5_PAGES_CANT_GIVE); - in->func_id = cpu_to_be16(func_id); - err = mlx5_cmd_exec(dev, in, sizeof(*in), &out, sizeof(out)); - if (!err) - err = mlx5_cmd_status_to_err(&out.hdr); - + MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES); + MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_CANT_GIVE); + MLX5_SET(manage_pages_in, in, function_id, func_id); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + err = err ? : mlx5_cmd_status_to_err_v2(out); if (err) - mlx5_core_warn(dev, "page notify failed\n"); - - kfree(in); + mlx5_core_warn(dev, "page notify failed func_id(%d) err(%d)\n", + func_id, err); } static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, int notify_fail) { - struct mlx5_manage_pages_inbox *in; - struct mlx5_manage_pages_outbox out; - int inlen; + u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0}; + int inlen = MLX5_ST_SZ_BYTES(manage_pages_in); u64 addr; int err; + u32 *in; int i; - inlen = sizeof(*in) + npages * sizeof(in->pas[0]); + inlen += npages * MLX5_FLD_SZ_BYTES(manage_pages_in, pas[0]); in = mlx5_vzalloc(inlen); if (!in) { err = -ENOMEM; mlx5_core_warn(dev, "vzalloc failed %d\n", inlen); goto out_free; } - memset(&out, 0, sizeof(out)); for (i = 0; i < npages; i++) { retry: @@ -332,27 +288,22 @@ retry: goto retry; } - in->pas[i] = cpu_to_be64(addr); + MLX5_SET64(manage_pages_in, in, pas[i], addr); } - in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES); - in->hdr.opmod = cpu_to_be16(MLX5_PAGES_GIVE); - in->func_id = cpu_to_be16(func_id); - in->num_entries = cpu_to_be32(npages); - err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out)); + MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES); + MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_GIVE); + MLX5_SET(manage_pages_in, in, function_id, func_id); + MLX5_SET(manage_pages_in, in, input_num_entries, npages); + + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); + err = err ? : mlx5_cmd_status_to_err_v2(out); if (err) { mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n", func_id, npages, err); goto out_4k; } - err = mlx5_cmd_status_to_err(&out.hdr); - if (err) { - mlx5_core_warn(dev, "func_id 0x%x, npages %d, status %d\n", - func_id, npages, out.hdr.status); - goto out_4k; - } - dev->priv.fw_pages += npages; if (func_id) dev->priv.vfs_pages += npages; @@ -364,7 +315,7 @@ retry: out_4k: for (i--; i >= 0; i--) - free_4k(dev, be64_to_cpu(in->pas[i])); + free_4k(dev, MLX5_GET64(manage_pages_in, in, pas[i])); out_free: kvfree(in); if (notify_fail) @@ -373,64 +324,65 @@ out_free: } static int reclaim_pages_cmd(struct mlx5_core_dev *dev, - struct mlx5_manage_pages_inbox *in, int in_size, - struct mlx5_manage_pages_outbox *out, int out_size) + u32 *in, int in_size, u32 *out, int out_size) { struct fw_page *fwp; struct rb_node *p; u32 npages; u32 i = 0; - if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) - return mlx5_cmd_exec_check_status(dev, (u32 *)in, in_size, - (u32 *)out, out_size); + if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) { + int err = mlx5_cmd_exec(dev, in, in_size, out, out_size); - npages = be32_to_cpu(in->num_entries); + return err ? : mlx5_cmd_status_to_err_v2(out); + } + + /* No hard feelings, we want our pages back! */ + npages = MLX5_GET(manage_pages_in, in, input_num_entries); p = rb_first(&dev->priv.page_root); while (p && i < npages) { fwp = rb_entry(p, struct fw_page, rb_node); - out->pas[i] = cpu_to_be64(fwp->addr); + MLX5_SET64(manage_pages_out, out, pas[i], fwp->addr); p = rb_next(p); i++; } - out->num_entries = cpu_to_be32(i); + MLX5_SET(manage_pages_out, out, output_num_entries, i); return 0; } static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, int *nclaimed) { - struct mlx5_manage_pages_inbox in; - struct mlx5_manage_pages_outbox *out; + int outlen = MLX5_ST_SZ_BYTES(manage_pages_out); + u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {0}; int num_claimed; - int outlen; - u64 addr; + u32 *out; int err; int i; if (nclaimed) *nclaimed = 0; - memset(&in, 0, sizeof(in)); - outlen = sizeof(*out) + npages * sizeof(out->pas[0]); + outlen += npages * MLX5_FLD_SZ_BYTES(manage_pages_out, pas[0]); out = mlx5_vzalloc(outlen); if (!out) return -ENOMEM; - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES); - in.hdr.opmod = cpu_to_be16(MLX5_PAGES_TAKE); - in.func_id = cpu_to_be16(func_id); - in.num_entries = cpu_to_be32(npages); + MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES); + MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_TAKE); + MLX5_SET(manage_pages_in, in, function_id, func_id); + MLX5_SET(manage_pages_in, in, input_num_entries, npages); + mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen); - err = reclaim_pages_cmd(dev, &in, sizeof(in), out, outlen); + err = reclaim_pages_cmd(dev, in, sizeof(in), out, outlen); if (err) { mlx5_core_err(dev, "failed reclaiming pages: err %d\n", err); goto out_free; } - num_claimed = be32_to_cpu(out->num_entries); + num_claimed = MLX5_GET(manage_pages_out, out, output_num_entries); if (num_claimed > npages) { mlx5_core_warn(dev, "fw returned %d, driver asked %d => corruption\n", num_claimed, npages); @@ -438,10 +390,9 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, goto out_free; } - for (i = 0; i < num_claimed; i++) { - addr = be64_to_cpu(out->pas[i]); - free_4k(dev, addr); - } + for (i = 0; i < num_claimed; i++) + free_4k(dev, MLX5_GET64(manage_pages_out, out, pas[i])); + if (nclaimed) *nclaimed = num_claimed; @@ -518,8 +469,8 @@ static int optimal_reclaimed_pages(void) int ret; ret = (sizeof(lay->out) + MLX5_BLKS_FOR_RECLAIM_PAGES * sizeof(block->data) - - sizeof(struct mlx5_manage_pages_outbox)) / - FIELD_SIZEOF(struct mlx5_manage_pages_outbox, pas[0]); + MLX5_ST_SZ_BYTES(manage_pages_out)) / + MLX5_FLD_SZ_BYTES(manage_pages_out, pas[0]); return ret; } From 73b626c182dff06867ceba996a819e8372c9b2ce Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Sat, 16 Jul 2016 03:26:15 +0300 Subject: [PATCH 0215/1715] net/mlx5: EQ commands via mlx5 ifc Remove old representation of manually created EQ commands layout, and use mlx5_ifc canonical structures and defines. Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- .../net/ethernet/mellanox/mlx5/core/debugfs.c | 18 ++--- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 78 ++++++++----------- include/linux/mlx5/device.h | 74 ------------------ include/linux/mlx5/driver.h | 2 +- 4 files changed, 44 insertions(+), 128 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c index 5210d92e6bc7..58e5518ebb27 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c @@ -358,32 +358,32 @@ out: static u64 eq_read_field(struct mlx5_core_dev *dev, struct mlx5_eq *eq, int index) { - struct mlx5_query_eq_mbox_out *out; - struct mlx5_eq_context *ctx; + int outlen = MLX5_ST_SZ_BYTES(query_eq_out); u64 param = 0; + void *ctx; + u32 *out; int err; - out = kzalloc(sizeof(*out), GFP_KERNEL); + out = kzalloc(outlen, GFP_KERNEL); if (!out) return param; - ctx = &out->ctx; - - err = mlx5_core_eq_query(dev, eq, out, sizeof(*out)); + err = mlx5_core_eq_query(dev, eq, out, outlen); if (err) { mlx5_core_warn(dev, "failed to query eq\n"); goto out; } + ctx = MLX5_ADDR_OF(query_eq_out, out, eq_context_entry); switch (index) { case EQ_NUM_EQES: - param = 1 << ((be32_to_cpu(ctx->log_sz_usr_page) >> 24) & 0x1f); + param = 1 << MLX5_GET(eqc, ctx, log_eq_size); break; case EQ_INTR: - param = ctx->intr; + param = MLX5_GET(eqc, ctx, intr); break; case EQ_LOG_PG_SZ: - param = (ctx->log_page_size & 0x1f) + 12; + param = MLX5_GET(eqc, ctx, log_page_size) + 12; break; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 0e30602ef76d..71411976ef1c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -86,23 +86,16 @@ struct cre_des_eq { static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn) { - struct mlx5_destroy_eq_mbox_in in; - struct mlx5_destroy_eq_mbox_out out; + u32 out[MLX5_ST_SZ_DW(destroy_eq_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(destroy_eq_in)] = {0}; int err; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_EQ); - in.eqn = eqn; - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); - if (!err) - goto ex; + MLX5_SET(destroy_eq_in, in, opcode, MLX5_CMD_OP_DESTROY_EQ); + MLX5_SET(destroy_eq_in, in, eq_number, eqn); - if (out.hdr.status) - err = mlx5_cmd_status_to_err(&out.hdr); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + return err ? : mlx5_cmd_status_to_err_v2(out); -ex: - return err; } static struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry) @@ -351,11 +344,13 @@ static void init_eq_buf(struct mlx5_eq *eq) int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, int nent, u64 mask, const char *name, struct mlx5_uar *uar) { + u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0}; struct mlx5_priv *priv = &dev->priv; - struct mlx5_create_eq_mbox_in *in; - struct mlx5_create_eq_mbox_out out; - int err; + __be64 *pas; + void *eqc; int inlen; + u32 *in; + int err; eq->nent = roundup_pow_of_two(nent + MLX5_NUM_SPARE_EQE); eq->cons_index = 0; @@ -365,35 +360,37 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, init_eq_buf(eq); - inlen = sizeof(*in) + sizeof(in->pas[0]) * eq->buf.npages; + inlen = MLX5_ST_SZ_BYTES(create_eq_in) + + MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->buf.npages; + in = mlx5_vzalloc(inlen); if (!in) { err = -ENOMEM; goto err_buf; } - memset(&out, 0, sizeof(out)); - mlx5_fill_page_array(&eq->buf, in->pas); + pas = (__be64 *)MLX5_ADDR_OF(create_eq_in, in, pas); + mlx5_fill_page_array(&eq->buf, pas); - in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_EQ); - in->ctx.log_sz_usr_page = cpu_to_be32(ilog2(eq->nent) << 24 | uar->index); - in->ctx.intr = vecidx; - in->ctx.log_page_size = eq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT; - in->events_mask = cpu_to_be64(mask); + MLX5_SET(create_eq_in, in, opcode, MLX5_CMD_OP_CREATE_EQ); + MLX5_SET64(create_eq_in, in, event_bitmask, mask); - err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out)); + eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry); + MLX5_SET(eqc, eqc, log_eq_size, ilog2(eq->nent)); + MLX5_SET(eqc, eqc, uar_page, uar->index); + MLX5_SET(eqc, eqc, intr, vecidx); + MLX5_SET(eqc, eqc, log_page_size, + eq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); + + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); + err = err ? : mlx5_cmd_status_to_err_v2(out); if (err) goto err_in; - if (out.hdr.status) { - err = mlx5_cmd_status_to_err(&out.hdr); - goto err_in; - } - snprintf(priv->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s", name, pci_name(dev->pdev)); - eq->eqn = out.eq_number; + eq->eqn = MLX5_GET(create_eq_out, out, eq_number); eq->irqn = priv->msix_arr[vecidx].vector; eq->dev = dev; eq->doorbell = uar->map + MLX5_EQ_DOORBEL_OFFSET; @@ -547,22 +544,15 @@ int mlx5_stop_eqs(struct mlx5_core_dev *dev) } int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, - struct mlx5_query_eq_mbox_out *out, int outlen) + u32 *out, int outlen) { - struct mlx5_query_eq_mbox_in in; + u32 in[MLX5_ST_SZ_DW(query_eq_in)] = {0}; int err; - memset(&in, 0, sizeof(in)); - memset(out, 0, outlen); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_EQ); - in.eqn = eq->eqn; - err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen); - if (err) - return err; + MLX5_SET(query_eq_in, in, opcode, MLX5_CMD_OP_QUERY_EQ); + MLX5_SET(query_eq_in, in, eq_number, eq->eqn); - if (out->hdr.status) - err = mlx5_cmd_status_to_err(&out->hdr); - - return err; + err = mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); + return err ? : mlx5_cmd_status_to_err_v2(out); } EXPORT_SYMBOL_GPL(mlx5_core_eq_query); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 9570c493b50f..c84e0ba5b261 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -995,80 +995,6 @@ struct mlx5_disable_hca_mbox_out { u8 rsvd[8]; }; -struct mlx5_eq_context { - u8 status; - u8 ec_oi; - u8 st; - u8 rsvd2[7]; - __be16 page_pffset; - __be32 log_sz_usr_page; - u8 rsvd3[7]; - u8 intr; - u8 log_page_size; - u8 rsvd4[15]; - __be32 consumer_counter; - __be32 produser_counter; - u8 rsvd5[16]; -}; - -struct mlx5_create_eq_mbox_in { - struct mlx5_inbox_hdr hdr; - u8 rsvd0[3]; - u8 input_eqn; - u8 rsvd1[4]; - struct mlx5_eq_context ctx; - u8 rsvd2[8]; - __be64 events_mask; - u8 rsvd3[176]; - __be64 pas[0]; -}; - -struct mlx5_create_eq_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd0[3]; - u8 eq_number; - u8 rsvd1[4]; -}; - -struct mlx5_destroy_eq_mbox_in { - struct mlx5_inbox_hdr hdr; - u8 rsvd0[3]; - u8 eqn; - u8 rsvd1[4]; -}; - -struct mlx5_destroy_eq_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_map_eq_mbox_in { - struct mlx5_inbox_hdr hdr; - __be64 mask; - u8 mu; - u8 rsvd0[2]; - u8 eqn; - u8 rsvd1[24]; -}; - -struct mlx5_map_eq_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_query_eq_mbox_in { - struct mlx5_inbox_hdr hdr; - u8 rsvd0[3]; - u8 eqn; - u8 rsvd1[4]; -}; - -struct mlx5_query_eq_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; - struct mlx5_eq_context ctx; -}; - enum { MLX5_MKEY_STATUS_FREE = 1 << 6, }; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index ccea6fb16482..eed4b612572d 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -865,7 +865,7 @@ int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq); void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq); int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, - struct mlx5_query_eq_mbox_out *out, int outlen); + u32 *out, int outlen); int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev); void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev); int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev); From 278277866334e515141dde7c8ac143e15c0a767f Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Sat, 16 Jul 2016 02:33:22 +0300 Subject: [PATCH 0216/1715] {net,IB}/mlx5: CQ commands via mlx5 ifc Remove old representation of manually created CQ commands layout, and use mlx5_ifc canonical structures and defines. Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/cq.c | 108 +++++++++-------- drivers/net/ethernet/mellanox/mlx5/core/cq.c | 109 ++++++++---------- .../net/ethernet/mellanox/mlx5/core/debugfs.c | 18 +-- include/linux/mlx5/cq.h | 6 +- include/linux/mlx5/device.h | 76 ------------ 5 files changed, 119 insertions(+), 198 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 308a358e5b46..35a9f718e669 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -747,14 +747,16 @@ static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf, static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, struct ib_ucontext *context, struct mlx5_ib_cq *cq, - int entries, struct mlx5_create_cq_mbox_in **cqb, + int entries, u32 **cqb, int *cqe_size, int *index, int *inlen) { struct mlx5_ib_create_cq ucmd; size_t ucmdlen; int page_shift; + __be64 *pas; int npages; int ncont; + void *cqc; int err; ucmdlen = @@ -792,14 +794,20 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, mlx5_ib_dbg(dev, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont %d\n", ucmd.buf_addr, entries * ucmd.cqe_size, npages, page_shift, ncont); - *inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * ncont; + *inlen = MLX5_ST_SZ_BYTES(create_cq_in) + + MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * ncont; *cqb = mlx5_vzalloc(*inlen); if (!*cqb) { err = -ENOMEM; goto err_db; } - mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, (*cqb)->pas, 0); - (*cqb)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT; + + pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas); + mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, pas, 0); + + cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context); + MLX5_SET(cqc, cqc, log_page_size, + page_shift - MLX5_ADAPTER_PAGE_SHIFT); *index = to_mucontext(context)->uuari.uars[0].index; @@ -834,9 +842,10 @@ static void init_cq_buf(struct mlx5_ib_cq *cq, struct mlx5_ib_cq_buf *buf) static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, int entries, int cqe_size, - struct mlx5_create_cq_mbox_in **cqb, - int *index, int *inlen) + u32 **cqb, int *index, int *inlen) { + __be64 *pas; + void *cqc; int err; err = mlx5_db_alloc(dev->mdev, &cq->db); @@ -853,15 +862,21 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, init_cq_buf(cq, &cq->buf); - *inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * cq->buf.buf.npages; + *inlen = MLX5_ST_SZ_BYTES(create_cq_in) + + MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * cq->buf.buf.npages; *cqb = mlx5_vzalloc(*inlen); if (!*cqb) { err = -ENOMEM; goto err_buf; } - mlx5_fill_page_array(&cq->buf.buf, (*cqb)->pas); - (*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT; + pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas); + mlx5_fill_page_array(&cq->buf.buf, pas); + + cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context); + MLX5_SET(cqc, cqc, log_page_size, + cq->buf.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); + *index = dev->mdev->priv.uuari.uars[0].index; return 0; @@ -895,11 +910,12 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, { int entries = attr->cqe; int vector = attr->comp_vector; - struct mlx5_create_cq_mbox_in *cqb = NULL; struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_ib_cq *cq; int uninitialized_var(index); int uninitialized_var(inlen); + u32 *cqb = NULL; + void *cqc; int cqe_size; unsigned int irqn; int eqn; @@ -945,19 +961,20 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, INIT_WORK(&cq->notify_work, notify_soft_wc_handler); } - cq->cqe_size = cqe_size; - cqb->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5; - - if (cq->create_flags & IB_CQ_FLAGS_IGNORE_OVERRUN) - cqb->ctx.cqe_sz_flags |= (1 << 1); - - cqb->ctx.log_sz_usr_page = cpu_to_be32((ilog2(entries) << 24) | index); err = mlx5_vector2eqn(dev->mdev, vector, &eqn, &irqn); if (err) goto err_cqb; - cqb->ctx.c_eqn = cpu_to_be16(eqn); - cqb->ctx.db_record_addr = cpu_to_be64(cq->db.dma); + cq->cqe_size = cqe_size; + + cqc = MLX5_ADDR_OF(create_cq_in, cqb, cq_context); + MLX5_SET(cqc, cqc, cqe_sz, cqe_sz_to_mlx_sz(cqe_size)); + MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries)); + MLX5_SET(cqc, cqc, uar_page, index); + MLX5_SET(cqc, cqc, c_eqn, eqn); + MLX5_SET64(cqc, cqc, dbr_addr, cq->db.dma); + if (cq->create_flags & IB_CQ_FLAGS_IGNORE_OVERRUN) + MLX5_SET(cqc, cqc, oi, 1); err = mlx5_core_create_cq(dev->mdev, &cq->mcq, cqb, inlen); if (err) @@ -1088,27 +1105,15 @@ void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq) int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) { - struct mlx5_modify_cq_mbox_in *in; struct mlx5_ib_dev *dev = to_mdev(cq->device); struct mlx5_ib_cq *mcq = to_mcq(cq); int err; - u32 fsel; if (!MLX5_CAP_GEN(dev->mdev, cq_moderation)) return -ENOSYS; - in = kzalloc(sizeof(*in), GFP_KERNEL); - if (!in) - return -ENOMEM; - - in->cqn = cpu_to_be32(mcq->mcq.cqn); - fsel = (MLX5_CQ_MODIFY_PERIOD | MLX5_CQ_MODIFY_COUNT); - in->ctx.cq_period = cpu_to_be16(cq_period); - in->ctx.cq_max_count = cpu_to_be16(cq_count); - in->field_select = cpu_to_be32(fsel); - err = mlx5_core_modify_cq(dev->mdev, &mcq->mcq, in, sizeof(*in)); - kfree(in); - + err = mlx5_core_modify_cq_moderation(dev->mdev, &mcq->mcq, + cq_period, cq_count); if (err) mlx5_ib_warn(dev, "modify cq 0x%x failed\n", mcq->mcq.cqn); @@ -1241,9 +1246,11 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(ibcq->device); struct mlx5_ib_cq *cq = to_mcq(ibcq); - struct mlx5_modify_cq_mbox_in *in; + void *cqc; + u32 *in; int err; int npas; + __be64 *pas; int page_shift; int inlen; int uninitialized_var(cqe_size); @@ -1285,28 +1292,37 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) if (err) goto ex; - inlen = sizeof(*in) + npas * sizeof(in->pas[0]); + inlen = MLX5_ST_SZ_BYTES(modify_cq_in) + + MLX5_FLD_SZ_BYTES(modify_cq_in, pas[0]) * npas; + in = mlx5_vzalloc(inlen); if (!in) { err = -ENOMEM; goto ex_resize; } + pas = (__be64 *)MLX5_ADDR_OF(modify_cq_in, in, pas); if (udata) mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift, - in->pas, 0); + pas, 0); else - mlx5_fill_page_array(&cq->resize_buf->buf, in->pas); + mlx5_fill_page_array(&cq->resize_buf->buf, pas); - in->field_select = cpu_to_be32(MLX5_MODIFY_CQ_MASK_LOG_SIZE | - MLX5_MODIFY_CQ_MASK_PG_OFFSET | - MLX5_MODIFY_CQ_MASK_PG_SIZE); - in->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT; - in->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5; - in->ctx.page_offset = 0; - in->ctx.log_sz_usr_page = cpu_to_be32(ilog2(entries) << 24); - in->hdr.opmod = cpu_to_be16(MLX5_CQ_OPMOD_RESIZE); - in->cqn = cpu_to_be32(cq->mcq.cqn); + MLX5_SET(modify_cq_in, in, + modify_field_select_resize_field_select.resize_field_select.resize_field_select, + MLX5_MODIFY_CQ_MASK_LOG_SIZE | + MLX5_MODIFY_CQ_MASK_PG_OFFSET | + MLX5_MODIFY_CQ_MASK_PG_SIZE); + + cqc = MLX5_ADDR_OF(modify_cq_in, in, cq_context); + + MLX5_SET(cqc, cqc, log_page_size, + page_shift - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(cqc, cqc, cqe_sz, cqe_sz_to_mlx_sz(cqe_size)); + MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries)); + + MLX5_SET(modify_cq_in, in, op_mod, MLX5_CQ_OPMOD_RESIZE); + MLX5_SET(modify_cq_in, in, cqn, cq->mcq.cqn); err = mlx5_core_modify_cq(dev->mdev, &cq->mcq, in, inlen); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c index 873a631ad155..cf02d8a27874 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -134,33 +134,30 @@ void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type) complete(&cq->free); } - int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, - struct mlx5_create_cq_mbox_in *in, int inlen) + u32 *in, int inlen) { - int err; struct mlx5_cq_table *table = &dev->priv.cq_table; - struct mlx5_create_cq_mbox_out out; - struct mlx5_destroy_cq_mbox_in din; - struct mlx5_destroy_cq_mbox_out dout; + u32 out[MLX5_ST_SZ_DW(create_cq_out)]; + u32 din[MLX5_ST_SZ_DW(destroy_cq_in)]; + u32 dout[MLX5_ST_SZ_DW(destroy_cq_out)]; int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), c_eqn); struct mlx5_eq *eq; + int err; eq = mlx5_eqn2eq(dev, eqn); if (IS_ERR(eq)) return PTR_ERR(eq); - in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_CQ); - memset(&out, 0, sizeof(out)); - err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out)); + memset(out, 0, sizeof(out)); + MLX5_SET(create_cq_in, in, opcode, MLX5_CMD_OP_CREATE_CQ); + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); + err = err ? : mlx5_cmd_status_to_err_v2(out); if (err) return err; - if (out.hdr.status) - return mlx5_cmd_status_to_err(&out.hdr); - - cq->cqn = be32_to_cpu(out.cqn) & 0xffffff; + cq->cqn = MLX5_GET(create_cq_out, out, cqn); cq->cons_index = 0; cq->arm_sn = 0; atomic_set(&cq->refcount, 1); @@ -186,19 +183,21 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, return 0; err_cmd: - memset(&din, 0, sizeof(din)); - memset(&dout, 0, sizeof(dout)); - din.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_CQ); - mlx5_cmd_exec(dev, &din, sizeof(din), &dout, sizeof(dout)); - return err; + memset(din, 0, sizeof(din)); + memset(dout, 0, sizeof(dout)); + MLX5_SET(destroy_cq_in, din, opcode, MLX5_CMD_OP_DESTROY_CQ); + MLX5_SET(destroy_cq_in, din, cqn, cq->cqn); + err = mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout)); + return err ? : mlx5_cmd_status_to_err_v2(out); + } EXPORT_SYMBOL(mlx5_core_create_cq); int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) { struct mlx5_cq_table *table = &dev->priv.cq_table; - struct mlx5_destroy_cq_mbox_in in; - struct mlx5_destroy_cq_mbox_out out; + u32 out[MLX5_ST_SZ_DW(destroy_cq_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(destroy_cq_in)] = {0}; struct mlx5_core_cq *tmp; int err; @@ -214,17 +213,13 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) return -EINVAL; } - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_CQ); - in.cqn = cpu_to_be32(cq->cqn); - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + MLX5_SET(destroy_cq_in, in, opcode, MLX5_CMD_OP_DESTROY_CQ); + MLX5_SET(destroy_cq_in, in, cqn, cq->cqn); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + err = err ? : mlx5_cmd_status_to_err_v2(out); if (err) return err; - if (out.hdr.status) - return mlx5_cmd_status_to_err(&out.hdr); - synchronize_irq(cq->irqn); mlx5_debug_cq_remove(dev, cq); @@ -237,44 +232,28 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) EXPORT_SYMBOL(mlx5_core_destroy_cq); int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, - struct mlx5_query_cq_mbox_out *out) + u32 *out, int outlen) { - struct mlx5_query_cq_mbox_in in; + u32 in[MLX5_ST_SZ_DW(query_cq_in)] = {0}; int err; - memset(&in, 0, sizeof(in)); - memset(out, 0, sizeof(*out)); + MLX5_SET(query_cq_in, in, opcode, MLX5_CMD_OP_QUERY_CQ); + MLX5_SET(query_cq_in, in, cqn, cq->cqn); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_CQ); - in.cqn = cpu_to_be32(cq->cqn); - err = mlx5_cmd_exec(dev, &in, sizeof(in), out, sizeof(*out)); - if (err) - return err; - - if (out->hdr.status) - return mlx5_cmd_status_to_err(&out->hdr); - - return err; + err = mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); + return err ? : mlx5_cmd_status_to_err_v2(out); } EXPORT_SYMBOL(mlx5_core_query_cq); - int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, - struct mlx5_modify_cq_mbox_in *in, int in_sz) + u32 *in, int inlen) { - struct mlx5_modify_cq_mbox_out out; + u32 out[MLX5_ST_SZ_DW(modify_cq_out)] = {0}; int err; - memset(&out, 0, sizeof(out)); - in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MODIFY_CQ); - err = mlx5_cmd_exec(dev, in, in_sz, &out, sizeof(out)); - if (err) - return err; - - if (out.hdr.status) - return mlx5_cmd_status_to_err(&out.hdr); - - return 0; + MLX5_SET(modify_cq_in, in, opcode, MLX5_CMD_OP_MODIFY_CQ); + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); + return err ? : mlx5_cmd_status_to_err_v2(out); } EXPORT_SYMBOL(mlx5_core_modify_cq); @@ -283,18 +262,20 @@ int mlx5_core_modify_cq_moderation(struct mlx5_core_dev *dev, u16 cq_period, u16 cq_max_count) { - struct mlx5_modify_cq_mbox_in in; + u32 in[MLX5_ST_SZ_DW(modify_cq_in)] = {0}; + void *cqc; - memset(&in, 0, sizeof(in)); + MLX5_SET(modify_cq_in, in, cqn, cq->cqn); + cqc = MLX5_ADDR_OF(modify_cq_in, in, cq_context); + MLX5_SET(cqc, cqc, cq_period, cq_period); + MLX5_SET(cqc, cqc, cq_max_count, cq_max_count); + MLX5_SET(modify_cq_in, in, + modify_field_select_resize_field_select.modify_field_select.modify_field_select, + MLX5_CQ_MODIFY_PERIOD | MLX5_CQ_MODIFY_COUNT); - in.cqn = cpu_to_be32(cq->cqn); - in.ctx.cq_period = cpu_to_be16(cq_period); - in.ctx.cq_max_count = cpu_to_be16(cq_max_count); - in.field_select = cpu_to_be32(MLX5_CQ_MODIFY_PERIOD | - MLX5_CQ_MODIFY_COUNT); - - return mlx5_core_modify_cq(dev, cq, &in, sizeof(in)); + return mlx5_core_modify_cq(dev, cq, in, sizeof(in)); } +EXPORT_SYMBOL(mlx5_core_modify_cq_moderation); int mlx5_init_cq_table(struct mlx5_core_dev *dev) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c index 58e5518ebb27..b7484e4128c8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c @@ -395,37 +395,37 @@ out: static u64 cq_read_field(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, int index) { - struct mlx5_query_cq_mbox_out *out; - struct mlx5_cq_context *ctx; + int outlen = MLX5_ST_SZ_BYTES(query_cq_out); u64 param = 0; + void *ctx; + u32 *out; int err; - out = kzalloc(sizeof(*out), GFP_KERNEL); + out = mlx5_vzalloc(outlen); if (!out) return param; - ctx = &out->ctx; - - err = mlx5_core_query_cq(dev, cq, out); + err = mlx5_core_query_cq(dev, cq, out, outlen); if (err) { mlx5_core_warn(dev, "failed to query cq\n"); goto out; } + ctx = MLX5_ADDR_OF(query_cq_out, out, cq_context); switch (index) { case CQ_PID: param = cq->pid; break; case CQ_NUM_CQES: - param = 1 << ((be32_to_cpu(ctx->log_sz_usr_page) >> 24) & 0x1f); + param = 1 << MLX5_GET(cqc, ctx, log_cq_size); break; case CQ_LOG_PG_SZ: - param = (ctx->log_pg_sz & 0x1f) + 12; + param = MLX5_GET(cqc, ctx, log_page_size); break; } out: - kfree(out); + kvfree(out); return param; } diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index 2566f6d6444f..7c3c0d3aca37 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -170,12 +170,12 @@ static inline void mlx5_cq_arm(struct mlx5_core_cq *cq, u32 cmd, int mlx5_init_cq_table(struct mlx5_core_dev *dev); void mlx5_cleanup_cq_table(struct mlx5_core_dev *dev); int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, - struct mlx5_create_cq_mbox_in *in, int inlen); + u32 *in, int inlen); int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq); int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, - struct mlx5_query_cq_mbox_out *out); + u32 *out, int outlen); int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, - struct mlx5_modify_cq_mbox_in *in, int in_sz); + u32 *in, int inlen); int mlx5_core_modify_cq_moderation(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, u16 cq_period, u16 cq_max_count); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index c84e0ba5b261..5a1c1606bdbd 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -899,82 +899,6 @@ struct mlx5_arm_srq_mbox_out { u8 rsvd[8]; }; -struct mlx5_cq_context { - u8 status; - u8 cqe_sz_flags; - u8 st; - u8 rsvd3; - u8 rsvd4[6]; - __be16 page_offset; - __be32 log_sz_usr_page; - __be16 cq_period; - __be16 cq_max_count; - __be16 rsvd20; - __be16 c_eqn; - u8 log_pg_sz; - u8 rsvd25[7]; - __be32 last_notified_index; - __be32 solicit_producer_index; - __be32 consumer_counter; - __be32 producer_counter; - u8 rsvd48[8]; - __be64 db_record_addr; -}; - -struct mlx5_create_cq_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 input_cqn; - u8 rsvdx[4]; - struct mlx5_cq_context ctx; - u8 rsvd6[192]; - __be64 pas[0]; -}; - -struct mlx5_create_cq_mbox_out { - struct mlx5_outbox_hdr hdr; - __be32 cqn; - u8 rsvd0[4]; -}; - -struct mlx5_destroy_cq_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 cqn; - u8 rsvd0[4]; -}; - -struct mlx5_destroy_cq_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd0[8]; -}; - -struct mlx5_query_cq_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 cqn; - u8 rsvd0[4]; -}; - -struct mlx5_query_cq_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd0[8]; - struct mlx5_cq_context ctx; - u8 rsvd6[16]; - __be64 pas[0]; -}; - -struct mlx5_modify_cq_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 cqn; - __be32 field_select; - struct mlx5_cq_context ctx; - u8 rsvd[192]; - __be64 pas[0]; -}; - -struct mlx5_modify_cq_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - struct mlx5_enable_hca_mbox_in { struct mlx5_inbox_hdr hdr; u8 rsvd[8]; From ec22eb53106be1472ba6573dc900943f52f8fd1e Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Sat, 16 Jul 2016 06:28:36 +0300 Subject: [PATCH 0217/1715] {net,IB}/mlx5: MKey/PSV commands via mlx5 ifc Remove old representation of manually created MKey/PSV commands layout, and use mlx5_ifc canonical structures and defines. Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 +- drivers/infiniband/hw/mlx5/mr.c | 182 +++++++++-------- drivers/infiniband/hw/mlx5/qp.c | 8 +- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 4 +- .../ethernet/mellanox/mlx5/core/en_common.c | 23 ++- .../net/ethernet/mellanox/mlx5/core/en_main.c | 37 ++-- drivers/net/ethernet/mellanox/mlx5/core/mr.c | 185 ++++++++---------- include/linux/mlx5/device.h | 113 +---------- include/linux/mlx5/driver.h | 11 +- include/linux/mlx5/mlx5_ifc.h | 2 +- 10 files changed, 235 insertions(+), 332 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 372385d0f993..a59034aaa297 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -504,7 +504,7 @@ struct mlx5_ib_mr { int umred; int npages; struct mlx5_ib_dev *dev; - struct mlx5_create_mkey_mbox_out out; + u32 out[MLX5_ST_SZ_DW(create_mkey_out)]; struct mlx5_core_sig_ctx *sig; int live; void *descs_alloc; diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 4b021305c321..6f7e34753abc 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -135,20 +135,10 @@ static void reg_mr_callback(int status, void *context) return; } - if (mr->out.hdr.status) { - mlx5_ib_warn(dev, "failed - status %d, syndorme 0x%x\n", - mr->out.hdr.status, - be32_to_cpu(mr->out.hdr.syndrome)); - kfree(mr); - dev->fill_delay = 1; - mod_timer(&dev->delay_timer, jiffies + HZ); - return; - } - spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags); key = dev->mdev->priv.mkey_key++; spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags); - mr->mmkey.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key; + mr->mmkey.key = mlx5_idx_to_mkey(MLX5_GET(create_mkey_out, mr->out, mkey_index)) | key; cache->last_add = jiffies; @@ -170,16 +160,19 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) { struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent = &cache->ent[c]; - struct mlx5_create_mkey_mbox_in *in; + int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); struct mlx5_ib_mr *mr; int npages = 1 << ent->order; + void *mkc; + u32 *in; int err = 0; int i; - in = kzalloc(sizeof(*in), GFP_KERNEL); + in = kzalloc(inlen, GFP_KERNEL); if (!in) return -ENOMEM; + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); for (i = 0; i < num; i++) { if (ent->pending >= MAX_PENDING_REG_MR) { err = -EAGAIN; @@ -194,18 +187,22 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) mr->order = ent->order; mr->umred = 1; mr->dev = dev; - in->seg.status = MLX5_MKEY_STATUS_FREE; - in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2); - in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); - in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN; - in->seg.log2_page_size = 12; + + MLX5_SET(mkc, mkc, free, 1); + MLX5_SET(mkc, mkc, umr_en, 1); + MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT); + + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET(mkc, mkc, translations_octword_size, (npages + 1) / 2); + MLX5_SET(mkc, mkc, log_page_size, 12); spin_lock_irq(&ent->lock); ent->pending++; spin_unlock_irq(&ent->lock); - err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, - sizeof(*in), reg_mr_callback, - mr, &mr->out); + err = mlx5_core_create_mkey_cb(dev->mdev, &mr->mmkey, + in, inlen, + mr->out, sizeof(mr->out), + reg_mr_callback, mr); if (err) { spin_lock_irq(&ent->lock); ent->pending--; @@ -670,30 +667,38 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) { struct mlx5_ib_dev *dev = to_mdev(pd->device); + int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); struct mlx5_core_dev *mdev = dev->mdev; - struct mlx5_create_mkey_mbox_in *in; - struct mlx5_mkey_seg *seg; struct mlx5_ib_mr *mr; + void *mkc; + u32 *in; int err; mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); - in = kzalloc(sizeof(*in), GFP_KERNEL); + in = kzalloc(inlen, GFP_KERNEL); if (!in) { err = -ENOMEM; goto err_free; } - seg = &in->seg; - seg->flags = convert_access(acc) | MLX5_ACCESS_MODE_PA; - seg->flags_pd = cpu_to_be32(to_mpd(pd)->pdn | MLX5_MKEY_LEN64); - seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); - seg->start_addr = 0; + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); - err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, sizeof(*in), NULL, NULL, - NULL); + MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_PA); + MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); + MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); + MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ)); + MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE)); + MLX5_SET(mkc, mkc, lr, 1); + + MLX5_SET(mkc, mkc, length64, 1); + MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET64(mkc, mkc, start_addr, 0); + + err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen); if (err) goto err_in; @@ -1063,9 +1068,11 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, int page_shift, int access_flags) { struct mlx5_ib_dev *dev = to_mdev(pd->device); - struct mlx5_create_mkey_mbox_in *in; struct mlx5_ib_mr *mr; + __be64 *pas; + void *mkc; int inlen; + u32 *in; int err; bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg)); @@ -1073,31 +1080,41 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, if (!mr) return ERR_PTR(-ENOMEM); - inlen = sizeof(*in) + sizeof(*in->pas) * ((npages + 1) / 2) * 2; + inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + + sizeof(*pas) * ((npages + 1) / 2) * 2; in = mlx5_vzalloc(inlen); if (!in) { err = -ENOMEM; goto err_1; } - mlx5_ib_populate_pas(dev, umem, page_shift, in->pas, + pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); + mlx5_ib_populate_pas(dev, umem, page_shift, pas, pg_cap ? MLX5_IB_MTT_PRESENT : 0); - /* The MLX5_MKEY_INBOX_PG_ACCESS bit allows setting the access flags + /* The pg_access bit allows setting the access flags * in the page list submitted with the command. */ - in->flags = pg_cap ? cpu_to_be32(MLX5_MKEY_INBOX_PG_ACCESS) : 0; - in->seg.flags = convert_access(access_flags) | - MLX5_ACCESS_MODE_MTT; - in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); - in->seg.start_addr = cpu_to_be64(virt_addr); - in->seg.len = cpu_to_be64(length); - in->seg.bsfs_octo_size = 0; - in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift)); - in->seg.log2_page_size = page_shift; - in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); - in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length, - 1 << page_shift)); - err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen, NULL, - NULL, NULL); + MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap)); + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT); + MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC)); + MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE)); + MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ)); + MLX5_SET(mkc, mkc, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE)); + MLX5_SET(mkc, mkc, lr, 1); + + MLX5_SET64(mkc, mkc, start_addr, virt_addr); + MLX5_SET64(mkc, mkc, len, length); + MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); + MLX5_SET(mkc, mkc, bsf_octword_size, 0); + MLX5_SET(mkc, mkc, translations_octword_size, + get_octo_len(virt_addr, length, 1 << page_shift)); + MLX5_SET(mkc, mkc, log_page_size, page_shift); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET(create_mkey_in, in, translations_octword_actual_size, + get_octo_len(virt_addr, length, 1 << page_shift)); + + err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); if (err) { mlx5_ib_warn(dev, "create mkey failed\n"); goto err_2; @@ -1523,30 +1540,32 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, u32 max_num_sg) { struct mlx5_ib_dev *dev = to_mdev(pd->device); - struct mlx5_create_mkey_mbox_in *in; - struct mlx5_ib_mr *mr; + int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); int ndescs = ALIGN(max_num_sg, 4); + struct mlx5_ib_mr *mr; + void *mkc; + u32 *in; int err; mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); - in = kzalloc(sizeof(*in), GFP_KERNEL); + in = kzalloc(inlen, GFP_KERNEL); if (!in) { err = -ENOMEM; goto err_free; } - in->seg.status = MLX5_MKEY_STATUS_FREE; - in->seg.xlt_oct_size = cpu_to_be32(ndescs); - in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); - in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + MLX5_SET(mkc, mkc, free, 1); + MLX5_SET(mkc, mkc, translations_octword_size, ndescs); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); if (mr_type == IB_MR_TYPE_MEM_REG) { - mr->access_mode = MLX5_ACCESS_MODE_MTT; - in->seg.log2_page_size = PAGE_SHIFT; - + mr->access_mode = MLX5_MKC_ACCESS_MODE_MTT; + MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT); err = mlx5_alloc_priv_descs(pd->device, mr, ndescs, sizeof(u64)); if (err) @@ -1555,7 +1574,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, mr->desc_size = sizeof(u64); mr->max_descs = ndescs; } else if (mr_type == IB_MR_TYPE_SG_GAPS) { - mr->access_mode = MLX5_ACCESS_MODE_KLM; + mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS; err = mlx5_alloc_priv_descs(pd->device, mr, ndescs, sizeof(struct mlx5_klm)); @@ -1566,9 +1585,8 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, } else if (mr_type == IB_MR_TYPE_SIGNATURE) { u32 psv_index[2]; - in->seg.flags_pd = cpu_to_be32(be32_to_cpu(in->seg.flags_pd) | - MLX5_MKEY_BSF_EN); - in->seg.bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE); + MLX5_SET(mkc, mkc, bsf_en, 1); + MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE); mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL); if (!mr->sig) { err = -ENOMEM; @@ -1581,7 +1599,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, if (err) goto err_free_sig; - mr->access_mode = MLX5_ACCESS_MODE_KLM; + mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS; mr->sig->psv_memory.psv_idx = psv_index[0]; mr->sig->psv_wire.psv_idx = psv_index[1]; @@ -1595,9 +1613,10 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, goto err_free_in; } - in->seg.flags = MLX5_PERM_UMR_EN | mr->access_mode; - err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, sizeof(*in), - NULL, NULL, NULL); + MLX5_SET(mkc, mkc, access_mode, mr->access_mode); + MLX5_SET(mkc, mkc, umr_en, 1); + + err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); if (err) goto err_destroy_psv; @@ -1633,8 +1652,10 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(pd->device); - struct mlx5_create_mkey_mbox_in *in = NULL; + int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); struct mlx5_ib_mw *mw = NULL; + u32 *in = NULL; + void *mkc; int ndescs; int err; struct mlx5_ib_alloc_mw req = {}; @@ -1658,23 +1679,24 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4); mw = kzalloc(sizeof(*mw), GFP_KERNEL); - in = kzalloc(sizeof(*in), GFP_KERNEL); + in = kzalloc(inlen, GFP_KERNEL); if (!mw || !in) { err = -ENOMEM; goto free; } - in->seg.status = MLX5_MKEY_STATUS_FREE; - in->seg.xlt_oct_size = cpu_to_be32(ndescs); - in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); - in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_KLM | - MLX5_PERM_LOCAL_READ; - if (type == IB_MW_TYPE_2) - in->seg.flags_pd |= cpu_to_be32(MLX5_MKEY_REMOTE_INVAL); - in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); - err = mlx5_core_create_mkey(dev->mdev, &mw->mmkey, in, sizeof(*in), - NULL, NULL, NULL); + MLX5_SET(mkc, mkc, free, 1); + MLX5_SET(mkc, mkc, translations_octword_size, ndescs); + MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); + MLX5_SET(mkc, mkc, umr_en, 1); + MLX5_SET(mkc, mkc, lr, 1); + MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_KLMS); + MLX5_SET(mkc, mkc, en_rinval, !!((type == IB_MW_TYPE_2))); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + + err = mlx5_core_create_mkey(dev->mdev, &mw->mmkey, in, inlen); if (err) goto free; @@ -1811,7 +1833,7 @@ int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, mr->desc_size * mr->max_descs, DMA_TO_DEVICE); - if (mr->access_mode == MLX5_ACCESS_MODE_KLM) + if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS) n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset); else n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 0dd7d93cac95..21ab0e26fa71 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2968,7 +2968,7 @@ static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr, memset(umr, 0, sizeof(*umr)); - if (mr->access_mode == MLX5_ACCESS_MODE_KLM) + if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS) /* KLMs take twice the size of MTTs */ ndescs *= 2; @@ -3111,9 +3111,9 @@ static void set_reg_mkey_seg(struct mlx5_mkey_seg *seg, memset(seg, 0, sizeof(*seg)); - if (mr->access_mode == MLX5_ACCESS_MODE_MTT) + if (mr->access_mode == MLX5_MKC_ACCESS_MODE_MTT) seg->log2_page_size = ilog2(mr->ibmr.page_size); - else if (mr->access_mode == MLX5_ACCESS_MODE_KLM) + else if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS) /* KLMs take twice the size of MTTs */ ndescs *= 2; @@ -3454,7 +3454,7 @@ static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg, memset(seg, 0, sizeof(*seg)); seg->flags = get_umr_flags(wr->access_flags) | - MLX5_ACCESS_MODE_KLM; + MLX5_MKC_ACCESS_MODE_KLMS; seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00); seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 | MLX5_MKEY_BSF_EN | pdn); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 0d55e0f33f53..88b05400fbc8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -1301,10 +1301,12 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec) callback = ent->callback; context = ent->context; err = ent->ret; - if (!err) + if (!err) { err = mlx5_copy_from_msg(ent->uout, ent->out, ent->uout_size); + err = err ? err : mlx5_cmd_status_to_err_v2(ent->uout); + } mlx5_free_cmd_msg(dev, ent->out); free_msg(dev, ent->in); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c index 673043ccd76c..3247c3c543a0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -60,24 +60,27 @@ void mlx5e_destroy_tir(struct mlx5_core_dev *mdev, static int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, struct mlx5_core_mkey *mkey) { - struct mlx5_create_mkey_mbox_in *in; + int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + void *mkc; + u32 *in; int err; - in = mlx5_vzalloc(sizeof(*in)); + in = mlx5_vzalloc(inlen); if (!in) return -ENOMEM; - in->seg.flags = MLX5_PERM_LOCAL_WRITE | - MLX5_PERM_LOCAL_READ | - MLX5_ACCESS_MODE_PA; - in->seg.flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64); - in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_PA); + MLX5_SET(mkc, mkc, lw, 1); + MLX5_SET(mkc, mkc, lr, 1); - err = mlx5_core_create_mkey(mdev, mkey, in, sizeof(*in), NULL, NULL, - NULL); + MLX5_SET(mkc, mkc, pd, pdn); + MLX5_SET(mkc, mkc, length64, 1); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + + err = mlx5_core_create_mkey(mdev, mkey, in, inlen); kvfree(in); - return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 870bea37c57c..98fb0d8ef2d2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3228,35 +3228,34 @@ static void mlx5e_destroy_q_counter(struct mlx5e_priv *priv) static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5_create_mkey_mbox_in *in; - struct mlx5_mkey_seg *mkc; - int inlen = sizeof(*in); - u64 npages = - priv->profile->max_nch(mdev) * MLX5_CHANNEL_MAX_NUM_MTTS; + u64 npages = priv->profile->max_nch(mdev) * MLX5_CHANNEL_MAX_NUM_MTTS; + int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + void *mkc; + u32 *in; int err; in = mlx5_vzalloc(inlen); if (!in) return -ENOMEM; - mkc = &in->seg; - mkc->status = MLX5_MKEY_STATUS_FREE; - mkc->flags = MLX5_PERM_UMR_EN | - MLX5_PERM_LOCAL_READ | - MLX5_PERM_LOCAL_WRITE | - MLX5_ACCESS_MODE_MTT; + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); - mkc->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); - mkc->flags_pd = cpu_to_be32(mdev->mlx5e_res.pdn); - mkc->len = cpu_to_be64(npages << PAGE_SHIFT); - mkc->xlt_oct_size = cpu_to_be32(mlx5e_get_mtt_octw(npages)); - mkc->log2_page_size = PAGE_SHIFT; + MLX5_SET(mkc, mkc, free, 1); + MLX5_SET(mkc, mkc, umr_en, 1); + MLX5_SET(mkc, mkc, lw, 1); + MLX5_SET(mkc, mkc, lr, 1); + MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT); - err = mlx5_core_create_mkey(mdev, &priv->umr_mkey, in, inlen, NULL, - NULL, NULL); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.pdn); + MLX5_SET64(mkc, mkc, len, npages << PAGE_SHIFT); + MLX5_SET(mkc, mkc, translations_octword_size, + mlx5e_get_mtt_octw(npages)); + MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT); + + err = mlx5_core_create_mkey(mdev, &priv->umr_mkey, in, inlen); kvfree(in); - return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index 77a7293921d5..0a7b743caafe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -49,48 +49,44 @@ void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev) { } -int mlx5_core_create_mkey(struct mlx5_core_dev *dev, - struct mlx5_core_mkey *mkey, - struct mlx5_create_mkey_mbox_in *in, int inlen, - mlx5_cmd_cbk_t callback, void *context, - struct mlx5_create_mkey_mbox_out *out) +int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, + struct mlx5_core_mkey *mkey, + u32 *in, int inlen, + u32 *out, int outlen, + mlx5_cmd_cbk_t callback, void *context) { struct mlx5_mkey_table *table = &dev->priv.mkey_table; - struct mlx5_create_mkey_mbox_out lout; + u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {0}; + u32 mkey_index; + void *mkc; int err; u8 key; - memset(&lout, 0, sizeof(lout)); spin_lock_irq(&dev->priv.mkey_lock); key = dev->priv.mkey_key++; spin_unlock_irq(&dev->priv.mkey_lock); - in->seg.qpn_mkey7_0 |= cpu_to_be32(key); - in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_MKEY); - if (callback) { - err = mlx5_cmd_exec_cb(dev, in, inlen, out, sizeof(*out), - callback, context); + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + + MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY); + MLX5_SET(mkc, mkc, mkey_7_0, key); + + if (callback) + return mlx5_cmd_exec_cb(dev, in, inlen, out, outlen, + callback, context); + + err = mlx5_cmd_exec(dev, in, inlen, lout, sizeof(lout)); + err = err ? : mlx5_cmd_status_to_err_v2(lout); + if (err) return err; - } else { - err = mlx5_cmd_exec(dev, in, inlen, &lout, sizeof(lout)); - } - if (err) { - mlx5_core_dbg(dev, "cmd exec failed %d\n", err); - return err; - } - - if (lout.hdr.status) { - mlx5_core_dbg(dev, "status %d\n", lout.hdr.status); - return mlx5_cmd_status_to_err(&lout.hdr); - } - - mkey->iova = be64_to_cpu(in->seg.start_addr); - mkey->size = be64_to_cpu(in->seg.len); - mkey->key = mlx5_idx_to_mkey(be32_to_cpu(lout.mkey) & 0xffffff) | key; - mkey->pd = be32_to_cpu(in->seg.flags_pd) & 0xffffff; + mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index); + mkey->iova = MLX5_GET64(mkc, mkc, start_addr); + mkey->size = MLX5_GET64(mkc, mkc, len); + mkey->key = mlx5_idx_to_mkey(mkey_index) | key; + mkey->pd = MLX5_GET(mkc, mkc, pd); mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n", - be32_to_cpu(lout.mkey), key, mkey->key); + mkey_index, key, mkey->key); /* connect to mkey tree */ write_lock_irq(&table->lock); @@ -104,21 +100,27 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev, return err; } +EXPORT_SYMBOL(mlx5_core_create_mkey_cb); + +int mlx5_core_create_mkey(struct mlx5_core_dev *dev, + struct mlx5_core_mkey *mkey, + u32 *in, int inlen) +{ + return mlx5_core_create_mkey_cb(dev, mkey, in, inlen, + NULL, 0, NULL, NULL); +} EXPORT_SYMBOL(mlx5_core_create_mkey); int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey) { struct mlx5_mkey_table *table = &dev->priv.mkey_table; - struct mlx5_destroy_mkey_mbox_in in; - struct mlx5_destroy_mkey_mbox_out out; + u32 out[MLX5_ST_SZ_DW(destroy_mkey_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)] = {0}; struct mlx5_core_mkey *deleted_mkey; unsigned long flags; int err; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - write_lock_irqsave(&table->lock, flags); deleted_mkey = radix_tree_delete(&table->tree, mlx5_base_mkey(mkey->key)); write_unlock_irqrestore(&table->lock, flags); @@ -128,94 +130,81 @@ int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, return -ENOENT; } - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_MKEY); - in.mkey = cpu_to_be32(mlx5_mkey_to_idx(mkey->key)); - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); - if (err) - return err; + MLX5_SET(destroy_mkey_in, in, opcode, MLX5_CMD_OP_DESTROY_MKEY); + MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey->key)); - if (out.hdr.status) - return mlx5_cmd_status_to_err(&out.hdr); - - return err; + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + return err ? : mlx5_cmd_status_to_err_v2(out); } EXPORT_SYMBOL(mlx5_core_destroy_mkey); int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, - struct mlx5_query_mkey_mbox_out *out, int outlen) + u32 *out, int outlen) { - struct mlx5_query_mkey_mbox_in in; + u32 in[MLX5_ST_SZ_DW(query_mkey_in)] = {0}; int err; - memset(&in, 0, sizeof(in)); memset(out, 0, outlen); + MLX5_SET(query_mkey_in, in, opcode, MLX5_CMD_OP_QUERY_MKEY); + MLX5_SET(query_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey->key)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_MKEY); - in.mkey = cpu_to_be32(mlx5_mkey_to_idx(mkey->key)); - err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen); - if (err) - return err; - - if (out->hdr.status) - return mlx5_cmd_status_to_err(&out->hdr); - - return err; + err = mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); + return err ? : mlx5_cmd_status_to_err_v2(out); } EXPORT_SYMBOL(mlx5_core_query_mkey); int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *_mkey, u32 *mkey) { - struct mlx5_query_special_ctxs_mbox_in in; - struct mlx5_query_special_ctxs_mbox_out out; + u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)] = {0}; int err; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS); - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + MLX5_SET(query_special_contexts_in, in, opcode, + MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + err = err ? : mlx5_cmd_status_to_err_v2(out); if (err) return err; - if (out.hdr.status) - return mlx5_cmd_status_to_err(&out.hdr); - - *mkey = be32_to_cpu(out.dump_fill_mkey); - + *mkey = MLX5_GET(query_special_contexts_out, out, dump_fill_mkey); return err; } EXPORT_SYMBOL(mlx5_core_dump_fill_mkey); +static inline u32 mlx5_get_psv(u32 *out, int psv_index) +{ + switch (psv_index) { + case 1: return MLX5_GET(create_psv_out, out, psv1_index); + case 2: return MLX5_GET(create_psv_out, out, psv2_index); + case 3: return MLX5_GET(create_psv_out, out, psv3_index); + default: return MLX5_GET(create_psv_out, out, psv0_index); + } +} + int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn, int npsvs, u32 *sig_index) { - struct mlx5_allocate_psv_in in; - struct mlx5_allocate_psv_out out; + u32 out[MLX5_ST_SZ_DW(create_psv_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(create_psv_in)] = {0}; int i, err; if (npsvs > MLX5_MAX_PSVS) return -EINVAL; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); + MLX5_SET(create_psv_in, in, opcode, MLX5_CMD_OP_CREATE_PSV); + MLX5_SET(create_psv_in, in, pd, pdn); + MLX5_SET(create_psv_in, in, num_psv, npsvs); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_PSV); - in.npsv_pd = cpu_to_be32((npsvs << 28) | pdn); - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + err = err ? : mlx5_cmd_status_to_err_v2(out); if (err) { - mlx5_core_err(dev, "cmd exec failed %d\n", err); + mlx5_core_err(dev, "create_psv cmd exec failed %d\n", err); return err; } - if (out.hdr.status) { - mlx5_core_err(dev, "create_psv bad status %d\n", - out.hdr.status); - return mlx5_cmd_status_to_err(&out.hdr); - } - for (i = 0; i < npsvs; i++) - sig_index[i] = be32_to_cpu(out.psv_idx[i]) & 0xffffff; + sig_index[i] = mlx5_get_psv(out, i); return err; } @@ -223,29 +212,13 @@ EXPORT_SYMBOL(mlx5_core_create_psv); int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num) { - struct mlx5_destroy_psv_in in; - struct mlx5_destroy_psv_out out; + u32 out[MLX5_ST_SZ_DW(destroy_psv_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(destroy_psv_in)] = {0}; int err; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - - in.psv_number = cpu_to_be32(psv_num); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_PSV); - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); - if (err) { - mlx5_core_err(dev, "destroy_psv cmd exec failed %d\n", err); - goto out; - } - - if (out.hdr.status) { - mlx5_core_err(dev, "destroy_psv bad status %d\n", - out.hdr.status); - err = mlx5_cmd_status_to_err(&out.hdr); - goto out; - } - -out: - return err; + MLX5_SET(destroy_psv_in, in, opcode, MLX5_CMD_OP_DESTROY_PSV); + MLX5_SET(destroy_psv_in, in, psvn, psv_num); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + return err ? : mlx5_cmd_status_to_err_v2(out); } EXPORT_SYMBOL(mlx5_core_destroy_psv); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 5a1c1606bdbd..fb002db1e2f0 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -197,19 +197,6 @@ enum { MLX5_PCIE_CTRL_TPH_MASK = 3 << 4, }; -enum { - MLX5_ACCESS_MODE_PA = 0, - MLX5_ACCESS_MODE_MTT = 1, - MLX5_ACCESS_MODE_KLM = 2 -}; - -enum { - MLX5_MKEY_REMOTE_INVAL = 1 << 24, - MLX5_MKEY_FLAG_SYNC_UMR = 1 << 29, - MLX5_MKEY_BSF_EN = 1 << 30, - MLX5_MKEY_LEN64 = 1 << 31, -}; - enum { MLX5_EN_RD = (u64)1, MLX5_EN_WR = (u64)2 @@ -923,6 +910,13 @@ enum { MLX5_MKEY_STATUS_FREE = 1 << 6, }; +enum { + MLX5_MKEY_REMOTE_INVAL = 1 << 24, + MLX5_MKEY_FLAG_SYNC_UMR = 1 << 29, + MLX5_MKEY_BSF_EN = 1 << 30, + MLX5_MKEY_LEN64 = 1 << 31, +}; + struct mlx5_mkey_seg { /* This is a two bit field occupying bits 31-30. * bit 31 is always 0, @@ -945,105 +939,12 @@ struct mlx5_mkey_seg { u8 rsvd4[4]; }; -struct mlx5_query_special_ctxs_mbox_in { - struct mlx5_inbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_query_special_ctxs_mbox_out { - struct mlx5_outbox_hdr hdr; - __be32 dump_fill_mkey; - __be32 reserved_lkey; -}; - -struct mlx5_create_mkey_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 input_mkey_index; - __be32 flags; - struct mlx5_mkey_seg seg; - u8 rsvd1[16]; - __be32 xlat_oct_act_size; - __be32 rsvd2; - u8 rsvd3[168]; - __be64 pas[0]; -}; - -struct mlx5_create_mkey_mbox_out { - struct mlx5_outbox_hdr hdr; - __be32 mkey; - u8 rsvd[4]; -}; - -struct mlx5_destroy_mkey_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 mkey; - u8 rsvd[4]; -}; - -struct mlx5_destroy_mkey_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_query_mkey_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 mkey; -}; - -struct mlx5_query_mkey_mbox_out { - struct mlx5_outbox_hdr hdr; - __be64 pas[0]; -}; - -struct mlx5_modify_mkey_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 mkey; - __be64 pas[0]; -}; - -struct mlx5_modify_mkey_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_dump_mkey_mbox_in { - struct mlx5_inbox_hdr hdr; -}; - -struct mlx5_dump_mkey_mbox_out { - struct mlx5_outbox_hdr hdr; - __be32 mkey; -}; - #define MLX5_ATTR_EXTENDED_PORT_INFO cpu_to_be16(0xff90) enum { MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO = 1 << 0 }; -struct mlx5_allocate_psv_in { - struct mlx5_inbox_hdr hdr; - __be32 npsv_pd; - __be32 rsvd_psv0; -}; - -struct mlx5_allocate_psv_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; - __be32 psv_idx[4]; -}; - -struct mlx5_destroy_psv_in { - struct mlx5_inbox_hdr hdr; - __be32 psv_number; - u8 rsvd[4]; -}; - -struct mlx5_destroy_psv_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - enum { VPORT_STATE_DOWN = 0x0, VPORT_STATE_UP = 0x1, diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index eed4b612572d..173817187abb 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -807,15 +807,18 @@ int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, u16 lwm, int is_srq); void mlx5_init_mkey_table(struct mlx5_core_dev *dev); void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev); +int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, + struct mlx5_core_mkey *mkey, + u32 *in, int inlen, + u32 *out, int outlen, + mlx5_cmd_cbk_t callback, void *context); int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, - struct mlx5_create_mkey_mbox_in *in, int inlen, - mlx5_cmd_cbk_t callback, void *context, - struct mlx5_create_mkey_mbox_out *out); + u32 *in, int inlen); int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey); int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, - struct mlx5_query_mkey_mbox_out *out, int outlen); + u32 *out, int outlen); int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *_mkey, u32 *mkey); int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 3f70fc9c2fc9..2a39a06dbad4 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -3489,7 +3489,7 @@ struct mlx5_ifc_query_special_contexts_out_bits { u8 syndrome[0x20]; - u8 reserved_at_40[0x20]; + u8 dump_fill_mkey[0x20]; u8 resd_lkey[0x20]; }; From f8c46cb39079b7415ada1affc4631ae761d8b621 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 10 Aug 2016 14:36:00 -0700 Subject: [PATCH 0218/1715] netns: do not call pernet ops for not yet set up init_net namespace When CONFIG_NET_NS is disabled, registering pernet operations causes init() to be called immediately with init_net as an argument. Unfortunately this leads to some pernet ops, such as proc_net_ns_init() to be called too early, when init_net namespace has not been fully initialized. This causes issues when we want to change pernet ops to use more data from the net namespace in question, for example reference user namespace that owns our network namespace. To fix this we could either play game of musical chairs and rearrange init order, or we could do the same as when CONFIG_NET_NS is enabled, and postpone calling pernet ops->init() until namespace is set up properly. Note that we can not simply undo commit ed160e839d2e ("[NET]: Cleanup pernet operation without CONFIG_NET_NS") and use the same implementations for __register_pernet_operations() and __unregister_pernet_operations(), because many pernet ops are marked as __net_initdata and will be discarded, which wreaks havoc on our ops lists. Here we rely on the fact that we only use lists until init_net is fully initialized, which happens much earlier than discarding __net_initdata sections. Signed-off-by: Dmitry Torokhov Signed-off-by: David S. Miller --- net/core/net_namespace.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 2c2eb1b629b1..1fe58167d39a 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -37,6 +37,8 @@ struct net init_net = { }; EXPORT_SYMBOL(init_net); +static bool init_net_initialized; + #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS; @@ -750,6 +752,8 @@ static int __init net_ns_init(void) if (setup_net(&init_net, &init_user_ns)) panic("Could not setup the initial network namespace"); + init_net_initialized = true; + rtnl_lock(); list_add_tail_rcu(&init_net.list, &net_namespace_list); rtnl_unlock(); @@ -811,15 +815,24 @@ static void __unregister_pernet_operations(struct pernet_operations *ops) static int __register_pernet_operations(struct list_head *list, struct pernet_operations *ops) { + if (!init_net_initialized) { + list_add_tail(&ops->list, list); + return 0; + } + return ops_init(ops, &init_net); } static void __unregister_pernet_operations(struct pernet_operations *ops) { - LIST_HEAD(net_exit_list); - list_add(&init_net.exit_list, &net_exit_list); - ops_exit_list(ops, &net_exit_list); - ops_free_list(ops, &net_exit_list); + if (!init_net_initialized) { + list_del(&ops->list); + } else { + LIST_HEAD(net_exit_list); + list_add(&init_net.exit_list, &net_exit_list); + ops_exit_list(ops, &net_exit_list); + ops_free_list(ops, &net_exit_list); + } } #endif /* CONFIG_NET_NS */ From c110486f6cb240f36ec143cad6628d52c071f529 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 10 Aug 2016 14:36:01 -0700 Subject: [PATCH 0219/1715] proc: make proc entries inherit ownership from parent There are certain parameters that belong to net namespace and that are exported in /proc. They should be controllable by the container's owner, but are currently owned by global root and thus not available. Let's change proc code to inherit ownership of parent entry, and when create per-ns "net" proc entry set it up as owned by container's owner. Signed-off-by: Dmitry Torokhov Signed-off-by: David S. Miller --- fs/proc/generic.c | 2 ++ fs/proc/proc_net.c | 13 +++++++++++++ 2 files changed, 15 insertions(+) diff --git a/fs/proc/generic.c b/fs/proc/generic.c index c633476616e0..bca66d83a765 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -390,6 +390,8 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, atomic_set(&ent->count, 1); spin_lock_init(&ent->pde_unload_lock); INIT_LIST_HEAD(&ent->pde_openers); + proc_set_user(ent, (*parent)->uid, (*parent)->gid); + out: return ent; } diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index c8bbc68cdb05..7ae6b1da7cab 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -185,6 +186,8 @@ const struct file_operations proc_net_operations = { static __net_init int proc_net_ns_init(struct net *net) { struct proc_dir_entry *netd, *net_statd; + kuid_t uid; + kgid_t gid; int err; err = -ENOMEM; @@ -199,6 +202,16 @@ static __net_init int proc_net_ns_init(struct net *net) netd->parent = &proc_root; memcpy(netd->name, "net", 4); + uid = make_kuid(net->user_ns, 0); + if (!uid_valid(uid)) + uid = netd->uid; + + gid = make_kgid(net->user_ns, 0); + if (!gid_valid(gid)) + gid = netd->gid; + + proc_set_user(netd, uid, gid); + err = -EEXIST; net_statd = proc_net_mkdir(net, "stat", netd); if (!net_statd) From e79c6a4fc923eed2bdd3b716e0f01414847db90a Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 10 Aug 2016 14:36:02 -0700 Subject: [PATCH 0220/1715] net: make net namespace sysctls belong to container's owner If net namespace is attached to a user namespace let's make container's root owner of sysctls affecting said network namespace instead of global root. This also allows us to clean up net_ctl_permissions() because we do not need to fudge permissions anymore for the container's owner since it now owns the objects in question. Acked-by: "Eric W. Biederman" Signed-off-by: Dmitry Torokhov Signed-off-by: David S. Miller --- fs/proc/proc_sysctl.c | 5 +++++ include/linux/sysctl.h | 4 ++++ net/sysctl_net.c | 29 ++++++++++++++++++++--------- 3 files changed, 29 insertions(+), 9 deletions(-) diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index b59db94d2ff4..62d8c6975d34 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -430,6 +430,7 @@ static int sysctl_perm(struct ctl_table_header *head, struct ctl_table *table, i static struct inode *proc_sys_make_inode(struct super_block *sb, struct ctl_table_header *head, struct ctl_table *table) { + struct ctl_table_root *root = head->root; struct inode *inode; struct proc_inode *ei; @@ -457,6 +458,10 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, if (is_empty_dir(head)) make_empty_dir_inode(inode); } + + if (root->set_ownership) + root->set_ownership(head, table, &inode->i_uid, &inode->i_gid); + out: return inode; } diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 697e160c78d0..d82cb6011e77 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -25,6 +25,7 @@ #include #include #include +#include #include /* For the /proc/sys support */ @@ -157,6 +158,9 @@ struct ctl_table_root { struct ctl_table_set default_set; struct ctl_table_set *(*lookup)(struct ctl_table_root *root, struct nsproxy *namespaces); + void (*set_ownership)(struct ctl_table_header *head, + struct ctl_table *table, + kuid_t *uid, kgid_t *gid); int (*permissions)(struct ctl_table_header *head, struct ctl_table *table); }; diff --git a/net/sysctl_net.c b/net/sysctl_net.c index 46a71c701e7c..5bc1a3d57401 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -42,26 +42,37 @@ static int net_ctl_permissions(struct ctl_table_header *head, struct ctl_table *table) { struct net *net = container_of(head->set, struct net, sysctls); - kuid_t root_uid = make_kuid(net->user_ns, 0); - kgid_t root_gid = make_kgid(net->user_ns, 0); /* Allow network administrator to have same access as root. */ - if (ns_capable_noaudit(net->user_ns, CAP_NET_ADMIN) || - uid_eq(root_uid, current_euid())) { + if (ns_capable(net->user_ns, CAP_NET_ADMIN)) { int mode = (table->mode >> 6) & 7; return (mode << 6) | (mode << 3) | mode; } - /* Allow netns root group to have the same access as the root group */ - if (in_egroup_p(root_gid)) { - int mode = (table->mode >> 3) & 7; - return (mode << 3) | mode; - } + return table->mode; } +static void net_ctl_set_ownership(struct ctl_table_header *head, + struct ctl_table *table, + kuid_t *uid, kgid_t *gid) +{ + struct net *net = container_of(head->set, struct net, sysctls); + kuid_t ns_root_uid; + kgid_t ns_root_gid; + + ns_root_uid = make_kuid(net->user_ns, 0); + if (uid_valid(ns_root_uid)) + *uid = ns_root_uid; + + ns_root_gid = make_kgid(net->user_ns, 0); + if (gid_valid(ns_root_gid)) + *gid = ns_root_gid; +} + static struct ctl_table_root net_sysctl_root = { .lookup = net_ctl_header_lookup, .permissions = net_ctl_permissions, + .set_ownership = net_ctl_set_ownership, }; static int __net_init sysctl_net_init(struct net *net) From 7829451c695e5b795fef95d72cd258e78d52f62d Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Thu, 11 Aug 2016 21:06:23 +0530 Subject: [PATCH 0221/1715] cxgb4: Add control net_device for configuring PCIe VF Issue: For instance, the current APIs assume a 1-to-1 mapping of Network Ports, Physical Functions and the SR-IOV Virtual Functions of those Physical Functions. This is not the case with our cards where any Virtual Function can be hooked up to any Port -- or any number of Ports the current Linux APIs also assume only 1 Network Interface/Port can be accessed per Virtual Function. Another issue is that these APIs assume that the Administrative Driver is attached to the Physical Function Associated with a Virtual Function. This is not the case with our card where all administration is performed by a Driver which is not attached to any of the Physical Functions which have SR-IOV PCI Capabilities. Another consequence of these assumptions is the inability to utilize all of the cards SR-IOV resources. For instance, our cards have SR-IOV Capabilities on Physical Functions 0..3 and the administrative Driver attaches to Physical Function 4. Each of the Physical Functions 0..3 can support up to 16 Virtual Functions. With the current Linux APIs, a 2-Port card would only be able to use the Virtual Functions on Physical Function 0..1 and not allow the Virtual Functions on Physical Functions 2..3 to be used since there are no Ports 2..3 on a 2-Port card. Fix: Since the control node is always the netdevice for all VF ACL commands. Created a dummy netdevice for each Physical Function from 0 to 3 through which one could control their VFs. The device won't be associated with any port, since it doesn't need to transmit/receive. Its purely used for VF management purpose only. The device will be registered only when VF for a particular PF is configured using PCI sysfs interface and unregistered while pci_disable_sriov() for the PF is called. Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- .../net/ethernet/chelsio/cxgb4/cxgb4_main.c | 121 +++++++++++++++--- 1 file changed, 103 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index c45de49dc963..b7e932ec6894 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -3139,6 +3139,24 @@ static const struct net_device_ops cxgb4_netdev_ops = { }; +static const struct net_device_ops cxgb4_mgmt_netdev_ops = { +}; + +static void get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) +{ + struct adapter *adapter = netdev2adap(dev); + + strlcpy(info->driver, cxgb4_driver_name, sizeof(info->driver)); + strlcpy(info->version, cxgb4_driver_version, + sizeof(info->version)); + strlcpy(info->bus_info, pci_name(adapter->pdev), + sizeof(info->bus_info)); +} + +static const struct ethtool_ops cxgb4_mgmt_ethtool_ops = { + .get_drvinfo = get_drvinfo, +}; + void t4_fatal_err(struct adapter *adap) { t4_set_reg_field(adap, SGE_CONTROL_A, GLOBALENABLE_F, 0); @@ -4836,19 +4854,12 @@ static int get_chip_type(struct pci_dev *pdev, u32 pl_rev) #ifdef CONFIG_PCI_IOV static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs) { + struct adapter *adap = pci_get_drvdata(pdev); int err = 0; int current_vfs = pci_num_vf(pdev); u32 pcie_fw; - void __iomem *regs; - regs = pci_ioremap_bar(pdev, 0); - if (!regs) { - dev_err(&pdev->dev, "cannot map device registers\n"); - return -ENOMEM; - } - - pcie_fw = readl(regs + PCIE_FW_A); - iounmap(regs); + pcie_fw = readl(adap->regs + PCIE_FW_A); /* Check if cxgb4 is the MASTER and fw is initialized */ if (!(pcie_fw & PCIE_FW_INIT_F) || !(pcie_fw & PCIE_FW_MASTER_VLD_F) || @@ -4875,6 +4886,8 @@ static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs) */ if (!num_vfs) { pci_disable_sriov(pdev); + if (adap->port[0]->reg_state == NETREG_REGISTERED) + unregister_netdev(adap->port[0]); return num_vfs; } @@ -4882,6 +4895,12 @@ static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs) err = pci_enable_sriov(pdev, num_vfs); if (err) return err; + + if (adap->port[0]->reg_state == NETREG_UNINITIALIZED) { + err = register_netdev(adap->port[0]); + if (err < 0) + pr_info("Unable to register VF mgmt netdev\n"); + } } return num_vfs; } @@ -4893,9 +4912,14 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) struct port_info *pi; bool highdma = false; struct adapter *adapter = NULL; + struct net_device *netdev; +#ifdef CONFIG_PCI_IOV + char name[IFNAMSIZ]; +#endif void __iomem *regs; u32 whoami, pl_rev; enum chip_type chip; + static int adap_idx = 1; printk_once(KERN_INFO "%s - version %s\n", DRV_DESC, DRV_VERSION); @@ -4930,7 +4954,9 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) func = CHELSIO_CHIP_VERSION(chip) <= CHELSIO_T5 ? SOURCEPF_G(whoami) : T6_SOURCEPF_G(whoami); if (func != ent->driver_data) { +#ifndef CONFIG_PCI_IOV iounmap(regs); +#endif pci_disable_device(pdev); pci_save_state(pdev); /* to restore SR-IOV later */ goto sriov; @@ -4962,6 +4988,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) err = -ENOMEM; goto out_unmap_bar0; } + adap_idx++; adapter->workq = create_singlethread_workqueue("cxgb4"); if (!adapter->workq) { @@ -5048,8 +5075,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) T6_STATMODE_V(0))); for_each_port(adapter, i) { - struct net_device *netdev; - netdev = alloc_etherdev_mq(sizeof(struct port_info), MAX_ETH_QSETS); if (!netdev) { @@ -5217,6 +5242,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) attach_ulds(adapter); print_adapter_info(adapter); + return 0; sriov: #ifdef CONFIG_PCI_IOV @@ -5230,9 +5256,58 @@ sriov: "instantiated %u virtual functions\n", num_vf[func]); } -#endif + + adapter = kzalloc(sizeof(*adapter), GFP_KERNEL); + if (!adapter) { + err = -ENOMEM; + goto free_pci_region; + } + + snprintf(name, IFNAMSIZ, "mgmtpf%d%d", adap_idx, func); + netdev = alloc_netdev(0, name, NET_NAME_UNKNOWN, ether_setup); + if (!netdev) { + err = -ENOMEM; + goto free_adapter; + } + + adapter->pdev = pdev; + adapter->pdev_dev = &pdev->dev; + adapter->name = pci_name(pdev); + adapter->mbox = func; + adapter->pf = func; + adapter->regs = regs; + adapter->mbox_log = kzalloc(sizeof(*adapter->mbox_log) + + (sizeof(struct mbox_cmd) * + T4_OS_LOG_MBOX_CMDS), + GFP_KERNEL); + if (!adapter->mbox_log) { + err = -ENOMEM; + goto free_netdevice; + } + pi = netdev_priv(netdev); + pi->adapter = adapter; + SET_NETDEV_DEV(netdev, &pdev->dev); + pci_set_drvdata(pdev, adapter); + + adapter->port[0] = netdev; + netdev->netdev_ops = &cxgb4_mgmt_netdev_ops; + netdev->ethtool_ops = &cxgb4_mgmt_ethtool_ops; + return 0; + free_netdevice: + free_netdev(adapter->port[0]); + free_adapter: + kfree(adapter); + free_pci_region: + iounmap(regs); + pci_disable_sriov(pdev); + pci_release_regions(pdev); + return err; +#else + return 0; +#endif + out_free_dev: free_some_resources(adapter); out_unmap_bar: @@ -5258,12 +5333,12 @@ static void remove_one(struct pci_dev *pdev) { struct adapter *adapter = pci_get_drvdata(pdev); -#ifdef CONFIG_PCI_IOV - pci_disable_sriov(pdev); + if (!adapter) { + pci_release_regions(pdev); + return; + } -#endif - - if (adapter) { + if (adapter->pf == 4) { int i; /* Tear down per-adapter Work Queue first since it can contain @@ -5312,8 +5387,18 @@ static void remove_one(struct pci_dev *pdev) kfree(adapter->mbox_log); synchronize_rcu(); kfree(adapter); - } else + } +#ifdef CONFIG_PCI_IOV + else { + if (adapter->port[0]->reg_state == NETREG_REGISTERED) + unregister_netdev(adapter->port[0]); + free_netdev(adapter->port[0]); + iounmap(adapter->regs); + kfree(adapter); + pci_disable_sriov(pdev); pci_release_regions(pdev); + } +#endif } static struct pci_driver cxgb4_driver = { From 858aa65c5110b624bfdab6a891d53826b0dd45aa Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Thu, 11 Aug 2016 21:06:24 +0530 Subject: [PATCH 0222/1715] cxgb4/cxgb4vf: Add set VF mac address support Add ndo_set_vf_mac support which allows to set the MAC address for cxgb4vf interfaces from the host Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 3 + .../net/ethernet/chelsio/cxgb4/cxgb4_main.c | 24 ++++++- drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 41 ++++++++++++ .../ethernet/chelsio/cxgb4vf/cxgb4vf_main.c | 24 +++++++ .../ethernet/chelsio/cxgb4vf/t4vf_common.h | 3 + .../net/ethernet/chelsio/cxgb4vf/t4vf_hw.c | 63 +++++++++++++++++-- 6 files changed, 151 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 2e2aa9fec9bb..bcfa51226b46 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -1521,4 +1521,7 @@ void t4_idma_monitor_init(struct adapter *adapter, void t4_idma_monitor(struct adapter *adapter, struct sge_idma_monitor_state *idma, int hz, int ticks); +int t4_set_vf_mac_acl(struct adapter *adapter, unsigned int vf, + unsigned int naddr, u8 *addr); + #endif /* __CXGB4_H__ */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index b7e932ec6894..2bb804c93688 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -3078,6 +3078,26 @@ static int cxgb_change_mtu(struct net_device *dev, int new_mtu) return ret; } +#ifdef CONFIG_PCI_IOV +static int cxgb_set_vf_mac(struct net_device *dev, int vf, u8 *mac) +{ + struct port_info *pi = netdev_priv(dev); + struct adapter *adap = pi->adapter; + + /* verify MAC addr is valid */ + if (!is_valid_ether_addr(mac)) { + dev_err(pi->adapter->pdev_dev, + "Invalid Ethernet address %pM for VF %d\n", + mac, vf); + return -EINVAL; + } + + dev_info(pi->adapter->pdev_dev, + "Setting MAC %pM on VF %d\n", mac, vf); + return t4_set_vf_mac_acl(adap, vf + 1, 1, mac); +} +#endif + static int cxgb_set_mac_addr(struct net_device *dev, void *p) { int ret; @@ -3136,10 +3156,12 @@ static const struct net_device_ops cxgb4_netdev_ops = { #ifdef CONFIG_NET_RX_BUSY_POLL .ndo_busy_poll = cxgb_busy_poll, #endif - }; static const struct net_device_ops cxgb4_mgmt_netdev_ops = { +#ifdef CONFIG_PCI_IOV + .ndo_set_vf_mac = cxgb_set_vf_mac, +#endif }; static void get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index dc92c80a75f4..2a476cc4e073 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -8264,3 +8264,44 @@ void t4_idma_monitor(struct adapter *adapter, t4_sge_decode_idma_state(adapter, idma->idma_state[i]); } } + +/** + * t4_set_vf_mac - Set MAC address for the specified VF + * @adapter: The adapter + * @vf: one of the VFs instantiated by the specified PF + * @naddr: the number of MAC addresses + * @addr: the MAC address(es) to be set to the specified VF + */ +int t4_set_vf_mac_acl(struct adapter *adapter, unsigned int vf, + unsigned int naddr, u8 *addr) +{ + struct fw_acl_mac_cmd cmd; + + memset(&cmd, 0, sizeof(cmd)); + cmd.op_to_vfn = cpu_to_be32(FW_CMD_OP_V(FW_ACL_MAC_CMD) | + FW_CMD_REQUEST_F | + FW_CMD_WRITE_F | + FW_ACL_MAC_CMD_PFN_V(adapter->pf) | + FW_ACL_MAC_CMD_VFN_V(vf)); + + /* Note: Do not enable the ACL */ + cmd.en_to_len16 = cpu_to_be32((unsigned int)FW_LEN16(cmd)); + cmd.nmac = naddr; + + switch (adapter->pf) { + case 3: + memcpy(cmd.macaddr3, addr, sizeof(cmd.macaddr3)); + break; + case 2: + memcpy(cmd.macaddr2, addr, sizeof(cmd.macaddr2)); + break; + case 1: + memcpy(cmd.macaddr1, addr, sizeof(cmd.macaddr1)); + break; + case 0: + memcpy(cmd.macaddr0, addr, sizeof(cmd.macaddr0)); + break; + } + + return t4_wr_mbox(adapter, adapter->mbox, &cmd, sizeof(cmd), &cmd); +} diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c index e116bb8d1729..f2951bf68992 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c @@ -2777,6 +2777,7 @@ static int cxgb4vf_pci_probe(struct pci_dev *pdev, struct adapter *adapter; struct port_info *pi; struct net_device *netdev; + unsigned int pf; /* * Print our driver banner the first time we're called to initialize a @@ -2903,8 +2904,11 @@ static int cxgb4vf_pci_probe(struct pci_dev *pdev, * Allocate our "adapter ports" and stitch everything together. */ pmask = adapter->params.vfres.pmask; + pf = t4vf_get_pf_from_vf(adapter); for_each_port(adapter, pidx) { int port_id, viid; + u8 mac[ETH_ALEN]; + unsigned int naddr = 1; /* * We simplistically allocate our virtual interfaces @@ -2975,6 +2979,26 @@ static int cxgb4vf_pci_probe(struct pci_dev *pdev, pidx); goto err_free_dev; } + + err = t4vf_get_vf_mac_acl(adapter, pf, &naddr, mac); + if (err) { + dev_err(&pdev->dev, + "unable to determine MAC ACL address, " + "continuing anyway.. (status %d)\n", err); + } else if (naddr && adapter->params.vfres.nvi == 1) { + struct sockaddr addr; + + ether_addr_copy(addr.sa_data, mac); + err = cxgb4vf_set_mac_addr(netdev, &addr); + if (err) { + dev_err(&pdev->dev, + "unable to set MAC address %pM\n", + mac); + goto err_free_dev; + } + dev_info(&pdev->dev, + "Using assigned MAC ACL: %pM\n", mac); + } } /* See what interrupts we'll be using. If we've been configured to diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h index 8ee541431e8b..8067424ad4a8 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h +++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h @@ -347,6 +347,7 @@ int t4vf_bar2_sge_qregs(struct adapter *adapter, u64 *pbar2_qoffset, unsigned int *pbar2_qid); +unsigned int t4vf_get_pf_from_vf(struct adapter *); int t4vf_get_sge_params(struct adapter *); int t4vf_get_vpd_params(struct adapter *); int t4vf_get_dev_params(struct adapter *); @@ -381,5 +382,7 @@ int t4vf_eth_eq_free(struct adapter *, unsigned int); int t4vf_handle_fw_rpl(struct adapter *, const __be64 *); int t4vf_prep_adapter(struct adapter *); +int t4vf_get_vf_mac_acl(struct adapter *adapter, unsigned int pf, + unsigned int *naddr, u8 *addr); #endif /* __T4VF_COMMON_H__ */ diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c index 427bfa71388b..879f4c52b3d5 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c @@ -639,6 +639,15 @@ int t4vf_bar2_sge_qregs(struct adapter *adapter, return 0; } +unsigned int t4vf_get_pf_from_vf(struct adapter *adapter) +{ + u32 whoami; + + whoami = t4_read_reg(adapter, T4VF_PL_BASE_ADDR + PL_VF_WHOAMI_A); + return (CHELSIO_CHIP_VERSION(adapter->params.chip) <= CHELSIO_T5 ? + SOURCEPF_G(whoami) : T6_SOURCEPF_G(whoami)); +} + /** * t4vf_get_sge_params - retrieve adapter Scatter gather Engine parameters * @adapter: the adapter @@ -716,7 +725,6 @@ int t4vf_get_sge_params(struct adapter *adapter) * read. */ if (!is_t4(adapter->params.chip)) { - u32 whoami; unsigned int pf, s_hps, s_qpp; params[0] = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_REG) | @@ -740,11 +748,7 @@ int t4vf_get_sge_params(struct adapter *adapter) * register we just read. Do it once here so other code in * the driver can just use it. */ - whoami = t4_read_reg(adapter, - T4VF_PL_BASE_ADDR + PL_VF_WHOAMI_A); - pf = CHELSIO_CHIP_VERSION(adapter->params.chip) <= CHELSIO_T5 ? - SOURCEPF_G(whoami) : T6_SOURCEPF_G(whoami); - + pf = t4vf_get_pf_from_vf(adapter); s_hps = (HOSTPAGESIZEPF0_S + (HOSTPAGESIZEPF1_S - HOSTPAGESIZEPF0_S) * pf); sge_params->sge_vf_hps = @@ -1807,3 +1811,50 @@ int t4vf_prep_adapter(struct adapter *adapter) return 0; } + +/** + * t4vf_get_vf_mac_acl - Get the MAC address to be set to + * the VI of this VF. + * @adapter: The adapter + * @pf: The pf associated with vf + * @naddr: the number of ACL MAC addresses returned in addr + * @addr: Placeholder for MAC addresses + * + * Find the MAC address to be set to the VF's VI. The requested MAC address + * is from the host OS via callback in the PF driver. + */ +int t4vf_get_vf_mac_acl(struct adapter *adapter, unsigned int pf, + unsigned int *naddr, u8 *addr) +{ + struct fw_acl_mac_cmd cmd; + int ret; + + memset(&cmd, 0, sizeof(cmd)); + cmd.op_to_vfn = cpu_to_be32(FW_CMD_OP_V(FW_ACL_MAC_CMD) | + FW_CMD_REQUEST_F | + FW_CMD_READ_F); + cmd.en_to_len16 = cpu_to_be32((unsigned int)FW_LEN16(cmd)); + ret = t4vf_wr_mbox(adapter, &cmd, sizeof(cmd), &cmd); + if (ret) + return ret; + + if (cmd.nmac < *naddr) + *naddr = cmd.nmac; + + switch (pf) { + case 3: + memcpy(addr, cmd.macaddr3, sizeof(cmd.macaddr3)); + break; + case 2: + memcpy(addr, cmd.macaddr2, sizeof(cmd.macaddr2)); + break; + case 1: + memcpy(addr, cmd.macaddr1, sizeof(cmd.macaddr1)); + break; + case 0: + memcpy(addr, cmd.macaddr0, sizeof(cmd.macaddr0)); + break; + } + + return ret; +} From 02ee9b18dedf1d72afa0fbfb0d7c15f6b4904a07 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 11 Aug 2016 23:29:54 +0000 Subject: [PATCH 0223/1715] qed: Fix possible memory leak in qed_dcbnl_get_ieee_pfc() 'dcbx_info is malloced in qed_dcbnl_get_ieee_pfc() and should be freed before leaving from the error handling cases, otherwise it will cause memory leak. Fixes: a1d8d8a51e83 ("qed: Add dcbnl support.") Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_dcbx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c index d0dc28f93c0e..d2f202eae6b2 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c @@ -1855,6 +1855,7 @@ static int qed_dcbnl_get_ieee_pfc(struct qed_dev *cdev, if (!dcbx_info->operational.ieee) { DP_INFO(hwfn, "DCBX is not enabled/operational in IEEE mode\n"); + kfree(dcbx_info); return -EINVAL; } From b22ae0b4d9669495158a7fa0fd027bd0fcd8896e Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 12 Aug 2016 15:43:54 +0000 Subject: [PATCH 0224/1715] net: macb: add missing free_netdev() on error in macb_probe() Add the missing free_netdev() before return from function macb_probe() in the platform_get_irq() error handling case. Fixes: c69618b3e4f2 ("net/macb: fix probe sequence to setup clocks earlier") Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index 6b797e301e3f..dbce938e7d0c 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -2985,7 +2985,7 @@ static int macb_probe(struct platform_device *pdev) dev->irq = platform_get_irq(pdev, 0); if (dev->irq < 0) { err = dev->irq; - goto err_disable_clocks; + goto err_out_free_netdev; } mac = of_get_mac_address(np); From cfad65c7fa9e77c27025eb5c2098cfbd4d445eab Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 12 Aug 2016 15:46:57 +0000 Subject: [PATCH 0225/1715] net: dsa: b53: remove .owner and .bus fields for driver Remove .owner and .bus fields since module_spi_driver() is used which set them automatically. Generated by: scripts/coccinelle/api/platform_no_drv_owner.cocci Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_spi.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/dsa/b53/b53_spi.c b/drivers/net/dsa/b53/b53_spi.c index 1c847a161bbd..f89f5308a99b 100644 --- a/drivers/net/dsa/b53/b53_spi.c +++ b/drivers/net/dsa/b53/b53_spi.c @@ -317,8 +317,6 @@ static int b53_spi_remove(struct spi_device *spi) static struct spi_driver b53_spi_driver = { .driver = { .name = "b53-switch", - .bus = &spi_bus_type, - .owner = THIS_MODULE, }, .probe = b53_spi_probe, .remove = b53_spi_remove, From 03459345bc00da70a35fa39bcfcf13d779097074 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Sat, 13 Aug 2016 00:30:48 +0800 Subject: [PATCH 0226/1715] pptp: Refactor the struct and macros of PPTP codes 1. Use struct gre_base_hdr directly in pptp_gre_header instead of duplicated members; 2. Use existing macros like GRE_KEY, GRE_SEQ, and so on instead of duplicated macros defined by PPTP; 3. Add new macros like GRE_IS_ACK/SEQ and so on instead of PPTP_GRE_IS_A/S and so on; Signed-off-by: Gao Feng Reviewed-by: Philip Prindeville Signed-off-by: David S. Miller --- drivers/net/ppp/pptp.c | 28 +++++++++++++--------------- include/net/pptp.h | 19 +------------------ include/uapi/linux/if_tunnel.h | 12 ++++++++++-- 3 files changed, 24 insertions(+), 35 deletions(-) diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c index 3e68dbc0af7f..1951b1085cb8 100644 --- a/drivers/net/ppp/pptp.c +++ b/drivers/net/ppp/pptp.c @@ -206,16 +206,14 @@ static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb) skb_push(skb, header_len); hdr = (struct pptp_gre_header *)(skb->data); - hdr->flags = PPTP_GRE_FLAG_K; - hdr->ver = PPTP_GRE_VER; - hdr->protocol = htons(PPTP_GRE_PROTO); - hdr->call_id = htons(opt->dst_addr.call_id); + hdr->gre_hd.flags = GRE_KEY | GRE_VERSION_1 | GRE_SEQ; + hdr->gre_hd.protocol = GRE_PROTO_PPP; + hdr->call_id = htons(opt->dst_addr.call_id); - hdr->flags |= PPTP_GRE_FLAG_S; - hdr->seq = htonl(++opt->seq_sent); + hdr->seq = htonl(++opt->seq_sent); if (opt->ack_sent != seq_recv) { /* send ack with this message */ - hdr->ver |= PPTP_GRE_FLAG_A; + hdr->gre_hd.flags |= GRE_ACK; hdr->ack = htonl(seq_recv); opt->ack_sent = seq_recv; } @@ -278,7 +276,7 @@ static int pptp_rcv_core(struct sock *sk, struct sk_buff *skb) headersize = sizeof(*header); /* test if acknowledgement present */ - if (PPTP_GRE_IS_A(header->ver)) { + if (GRE_IS_ACK(header->gre_hd.flags)) { __u32 ack; if (!pskb_may_pull(skb, headersize)) @@ -286,7 +284,7 @@ static int pptp_rcv_core(struct sock *sk, struct sk_buff *skb) header = (struct pptp_gre_header *)(skb->data); /* ack in different place if S = 0 */ - ack = PPTP_GRE_IS_S(header->flags) ? header->ack : header->seq; + ack = GRE_IS_SEQ(header->gre_hd.flags) ? header->ack : header->seq; ack = ntohl(ack); @@ -299,7 +297,7 @@ static int pptp_rcv_core(struct sock *sk, struct sk_buff *skb) headersize -= sizeof(header->ack); } /* test if payload present */ - if (!PPTP_GRE_IS_S(header->flags)) + if (!GRE_IS_SEQ(header->gre_hd.flags)) goto drop; payload_len = ntohs(header->payload_len); @@ -360,11 +358,11 @@ static int pptp_rcv(struct sk_buff *skb) header = (struct pptp_gre_header *)skb->data; - if (ntohs(header->protocol) != PPTP_GRE_PROTO || /* PPTP-GRE protocol for PPTP */ - PPTP_GRE_IS_C(header->flags) || /* flag C should be clear */ - PPTP_GRE_IS_R(header->flags) || /* flag R should be clear */ - !PPTP_GRE_IS_K(header->flags) || /* flag K should be set */ - (header->flags&0xF) != 0) /* routing and recursion ctrl = 0 */ + if (header->gre_hd.protocol != GRE_PROTO_PPP || /* PPTP-GRE protocol for PPTP */ + GRE_IS_CSUM(header->gre_hd.flags) || /* flag CSUM should be clear */ + GRE_IS_ROUTING(header->gre_hd.flags) || /* flag ROUTING should be clear */ + !GRE_IS_KEY(header->gre_hd.flags) || /* flag KEY should be set */ + (header->gre_hd.flags & GRE_FLAGS)) /* flag Recursion Ctrl should be clear */ /* if invalid, discard this packet */ goto drop; diff --git a/include/net/pptp.h b/include/net/pptp.h index 301d3e2ba1f9..92e9f1fe2628 100644 --- a/include/net/pptp.h +++ b/include/net/pptp.h @@ -10,26 +10,9 @@ ((((curseq) & 0xffffff00) == 0) &&\ (((lastseq) & 0xffffff00) == 0xffffff00)) -#define PPTP_GRE_PROTO 0x880B -#define PPTP_GRE_VER 0x1 - -#define PPTP_GRE_FLAG_C 0x80 -#define PPTP_GRE_FLAG_R 0x40 -#define PPTP_GRE_FLAG_K 0x20 -#define PPTP_GRE_FLAG_S 0x10 -#define PPTP_GRE_FLAG_A 0x80 - -#define PPTP_GRE_IS_C(f) ((f)&PPTP_GRE_FLAG_C) -#define PPTP_GRE_IS_R(f) ((f)&PPTP_GRE_FLAG_R) -#define PPTP_GRE_IS_K(f) ((f)&PPTP_GRE_FLAG_K) -#define PPTP_GRE_IS_S(f) ((f)&PPTP_GRE_FLAG_S) -#define PPTP_GRE_IS_A(f) ((f)&PPTP_GRE_FLAG_A) - #define PPTP_HEADER_OVERHEAD (2+sizeof(struct pptp_gre_header)) struct pptp_gre_header { - u8 flags; - u8 ver; - __be16 protocol; + struct gre_base_hdr gre_hd; __be16 payload_len; __be16 call_id; __be32 seq; diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index 60dbb200de60..361b9f0f20d7 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -28,8 +28,16 @@ #define GRE_FLAGS __cpu_to_be16(0x0078) #define GRE_VERSION __cpu_to_be16(0x0007) -#define GRE_VERSION_1 __cpu_to_be16(0x0001) -#define GRE_PROTO_PPP __cpu_to_be16(0x880b) +#define GRE_IS_CSUM(f) ((f) & GRE_CSUM) +#define GRE_IS_ROUTING(f) ((f) & GRE_ROUTING) +#define GRE_IS_KEY(f) ((f) & GRE_KEY) +#define GRE_IS_SEQ(f) ((f) & GRE_SEQ) +#define GRE_IS_STRICT(f) ((f) & GRE_STRICT) +#define GRE_IS_REC(f) ((f) & GRE_REC) +#define GRE_IS_ACK(f) ((f) & GRE_ACK) + +#define GRE_VERSION_1 __cpu_to_be16(0x0001) +#define GRE_PROTO_PPP __cpu_to_be16(0x880b) #define GRE_PPTP_KEY_MASK __cpu_to_be32(0xffff) struct ip_tunnel_parm { From 54be3d985e7ec834dc2512d8a1345329d246ccd5 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Mon, 15 Aug 2016 08:34:56 +0200 Subject: [PATCH 0227/1715] fjes: Delete owner assignment The field "owner" is set by core. Thus delete an extra initialisation. Generated by: scripts/coccinelle/api/platform_no_drv_owner.cocci Signed-off-by: Markus Elfring Signed-off-by: David S. Miller --- drivers/net/fjes/fjes_main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c index 9006877c53f2..e46b1ebbbff4 100644 --- a/drivers/net/fjes/fjes_main.c +++ b/drivers/net/fjes/fjes_main.c @@ -97,7 +97,6 @@ static struct acpi_driver fjes_acpi_driver = { static struct platform_driver fjes_driver = { .driver = { .name = DRV_NAME, - .owner = THIS_MODULE, }, .probe = fjes_probe, .remove = fjes_remove, From 1a635e488ecf6fcae00bffda61707b63bc1aacbe Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Mon, 15 Aug 2016 10:42:43 +0300 Subject: [PATCH 0228/1715] qed*: Semantic changes Make semantic-only adjustments to qed* drivers, such as: - Changes in code indentation. - Usage of BIT() macro. - re-naming of variables. - Re-ordering of variable declerations. - Removal of (== 0) and (!= 0) in conditions. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_cxt.c | 47 ++-- drivers/net/ethernet/qlogic/qed/qed_dev.c | 96 ++++---- drivers/net/ethernet/qlogic/qed/qed_hw.c | 137 +++++------- .../net/ethernet/qlogic/qed/qed_init_ops.c | 79 +++---- drivers/net/ethernet/qlogic/qed/qed_int.c | 113 ++++------ drivers/net/ethernet/qlogic/qed/qed_l2.c | 210 ++++++++---------- drivers/net/ethernet/qlogic/qed/qed_main.c | 13 +- drivers/net/ethernet/qlogic/qed/qed_mcp.c | 67 ++---- .../net/ethernet/qlogic/qed/qed_sp_commands.c | 15 +- drivers/net/ethernet/qlogic/qed/qed_spq.c | 84 +++---- drivers/net/ethernet/qlogic/qed/qed_sriov.c | 56 +++-- .../net/ethernet/qlogic/qede/qede_ethtool.c | 6 +- drivers/net/ethernet/qlogic/qede/qede_main.c | 121 ++++------ 13 files changed, 404 insertions(+), 640 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c index 1c35f376143e..547692759d06 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c +++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c @@ -377,9 +377,8 @@ static void qed_cxt_set_proto_cid_count(struct qed_hwfn *p_hwfn, } } -u32 qed_cxt_get_proto_cid_count(struct qed_hwfn *p_hwfn, - enum protocol_type type, - u32 *vf_cid) +u32 qed_cxt_get_proto_cid_count(struct qed_hwfn *p_hwfn, + enum protocol_type type, u32 *vf_cid) { if (vf_cid) *vf_cid = p_hwfn->p_cxt_mngr->conn_cfg[type].cids_per_vf; @@ -405,10 +404,10 @@ u32 qed_cxt_get_proto_tid_count(struct qed_hwfn *p_hwfn, return cnt; } -static void -qed_cxt_set_proto_tid_count(struct qed_hwfn *p_hwfn, - enum protocol_type proto, - u8 seg, u8 seg_type, u32 count, bool has_fl) +static void qed_cxt_set_proto_tid_count(struct qed_hwfn *p_hwfn, + enum protocol_type proto, + u8 seg, + u8 seg_type, u32 count, bool has_fl) { struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr; struct qed_tid_seg *p_seg = &p_mngr->conn_cfg[proto].tid_seg[seg]; @@ -420,8 +419,7 @@ qed_cxt_set_proto_tid_count(struct qed_hwfn *p_hwfn, static void qed_ilt_cli_blk_fill(struct qed_ilt_client_cfg *p_cli, struct qed_ilt_cli_blk *p_blk, - u32 start_line, u32 total_size, - u32 elem_size) + u32 start_line, u32 total_size, u32 elem_size) { u32 ilt_size = ILT_PAGE_IN_BYTES(p_cli->p_size.val); @@ -448,8 +446,7 @@ static void qed_ilt_cli_adv_line(struct qed_hwfn *p_hwfn, p_cli->first.val = *p_line; p_cli->active = true; - *p_line += DIV_ROUND_UP(p_blk->total_size, - p_blk->real_size_in_page); + *p_line += DIV_ROUND_UP(p_blk->total_size, p_blk->real_size_in_page); p_cli->last.val = *p_line - 1; DP_VERBOSE(p_hwfn, QED_MSG_ILT, @@ -926,12 +923,9 @@ static int qed_ilt_blk_alloc(struct qed_hwfn *p_hwfn, void *p_virt; u32 size; - size = min_t(u32, sz_left, - p_blk->real_size_in_page); + size = min_t(u32, sz_left, p_blk->real_size_in_page); p_virt = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, - size, - &p_phys, - GFP_KERNEL); + size, &p_phys, GFP_KERNEL); if (!p_virt) return -ENOMEM; memset(p_virt, 0, size); @@ -976,7 +970,7 @@ static int qed_ilt_shadow_alloc(struct qed_hwfn *p_hwfn) for (j = 0; j < ILT_CLI_PF_BLOCKS; j++) { p_blk = &clients[i].pf_blks[j]; rc = qed_ilt_blk_alloc(p_hwfn, p_blk, i, 0); - if (rc != 0) + if (rc) goto ilt_shadow_fail; } for (k = 0; k < p_mngr->vf_count; k++) { @@ -985,7 +979,7 @@ static int qed_ilt_shadow_alloc(struct qed_hwfn *p_hwfn) p_blk = &clients[i].vf_blks[j]; rc = qed_ilt_blk_alloc(p_hwfn, p_blk, i, lines); - if (rc != 0) + if (rc) goto ilt_shadow_fail; } } @@ -1672,7 +1666,7 @@ static void qed_tm_init_pf(struct qed_hwfn *p_hwfn) p_hwfn->rel_pf_id * NUM_TASK_PF_SEGMENTS + i); STORE_RT_REG_AGG(p_hwfn, rt_reg, cfg_word); - active_seg_mask |= (tm_iids.pf_tids[i] ? (1 << i) : 0); + active_seg_mask |= (tm_iids.pf_tids[i] ? BIT(i) : 0); tm_offset += tm_iids.pf_tids[i]; } @@ -1702,8 +1696,7 @@ void qed_cxt_hw_init_pf(struct qed_hwfn *p_hwfn) } int qed_cxt_acquire_cid(struct qed_hwfn *p_hwfn, - enum protocol_type type, - u32 *p_cid) + enum protocol_type type, u32 *p_cid) { struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr; u32 rel_cid; @@ -1717,8 +1710,7 @@ int qed_cxt_acquire_cid(struct qed_hwfn *p_hwfn, p_mngr->acquired[type].max_count); if (rel_cid >= p_mngr->acquired[type].max_count) { - DP_NOTICE(p_hwfn, "no CID available for protocol %d\n", - type); + DP_NOTICE(p_hwfn, "no CID available for protocol %d\n", type); return -EINVAL; } @@ -1730,8 +1722,7 @@ int qed_cxt_acquire_cid(struct qed_hwfn *p_hwfn, } static bool qed_cxt_test_cid_acquired(struct qed_hwfn *p_hwfn, - u32 cid, - enum protocol_type *p_type) + u32 cid, enum protocol_type *p_type) { struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr; struct qed_cid_acquired_map *p_map; @@ -1763,8 +1754,7 @@ static bool qed_cxt_test_cid_acquired(struct qed_hwfn *p_hwfn, return true; } -void qed_cxt_release_cid(struct qed_hwfn *p_hwfn, - u32 cid) +void qed_cxt_release_cid(struct qed_hwfn *p_hwfn, u32 cid) { struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr; enum protocol_type type; @@ -1781,8 +1771,7 @@ void qed_cxt_release_cid(struct qed_hwfn *p_hwfn, __clear_bit(rel_cid, p_mngr->acquired[type].cid_map); } -int qed_cxt_get_cid_info(struct qed_hwfn *p_hwfn, - struct qed_cxt_info *p_info) +int qed_cxt_get_cid_info(struct qed_hwfn *p_hwfn, struct qed_cxt_info *p_info) { struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr; u32 conn_cxt_size, hw_p_size, cxts_per_p, line; diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index b8d594a95a65..6d105c8d3bbd 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -43,8 +43,7 @@ enum BAR_ID { BAR_ID_1 /* Used for doorbells */ }; -static u32 qed_hw_bar_size(struct qed_hwfn *p_hwfn, - enum BAR_ID bar_id) +static u32 qed_hw_bar_size(struct qed_hwfn *p_hwfn, enum BAR_ID bar_id) { u32 bar_reg = (bar_id == BAR_ID_0 ? PGLUE_B_REG_PF_BAR0_SIZE : PGLUE_B_REG_PF_BAR1_SIZE); @@ -69,8 +68,7 @@ static u32 qed_hw_bar_size(struct qed_hwfn *p_hwfn, } } -void qed_init_dp(struct qed_dev *cdev, - u32 dp_module, u8 dp_level) +void qed_init_dp(struct qed_dev *cdev, u32 dp_module, u8 dp_level) { u32 i; @@ -604,9 +602,8 @@ int qed_final_cleanup(struct qed_hwfn *p_hwfn, /* Make sure notification is not set before initiating final cleanup */ if (REG_RD(p_hwfn, addr)) { - DP_NOTICE( - p_hwfn, - "Unexpected; Found final cleanup notification before initiating final cleanup\n"); + DP_NOTICE(p_hwfn, + "Unexpected; Found final cleanup notification before initiating final cleanup\n"); REG_WR(p_hwfn, addr, 0); } @@ -700,17 +697,14 @@ static void qed_init_cau_rt_data(struct qed_dev *cdev) continue; qed_init_cau_sb_entry(p_hwfn, &sb_entry, - p_block->function_id, - 0, 0); - STORE_RT_REG_AGG(p_hwfn, offset + sb_id * 2, - sb_entry); + p_block->function_id, 0, 0); + STORE_RT_REG_AGG(p_hwfn, offset + sb_id * 2, sb_entry); } } } static int qed_hw_init_common(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - int hw_mode) + struct qed_ptt *p_ptt, int hw_mode) { struct qed_qm_info *qm_info = &p_hwfn->qm_info; struct qed_qm_common_rt_init_params params; @@ -758,7 +752,7 @@ static int qed_hw_init_common(struct qed_hwfn *p_hwfn, qed_port_unpretend(p_hwfn, p_ptt); rc = qed_init_run(p_hwfn, p_ptt, PHASE_ENGINE, ANY_PHASE_ID, hw_mode); - if (rc != 0) + if (rc) return rc; qed_wr(p_hwfn, p_ptt, PSWRQ2_REG_L2P_VALIDATE_VFID, 0); @@ -787,13 +781,12 @@ static int qed_hw_init_common(struct qed_hwfn *p_hwfn, } static int qed_hw_init_port(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - int hw_mode) + struct qed_ptt *p_ptt, int hw_mode) { int rc = 0; rc = qed_init_run(p_hwfn, p_ptt, PHASE_PORT, p_hwfn->port_id, hw_mode); - if (rc != 0) + if (rc) return rc; if (hw_mode & (1 << MODE_MF_SI)) { @@ -847,7 +840,7 @@ static int qed_hw_init_pf(struct qed_hwfn *p_hwfn, qed_int_igu_init_rt(p_hwfn); /* Set VLAN in NIG if needed */ - if (hw_mode & (1 << MODE_MF_SD)) { + if (hw_mode & BIT(MODE_MF_SD)) { DP_VERBOSE(p_hwfn, NETIF_MSG_HW, "Configuring LLH_FUNC_TAG\n"); STORE_RT_REG(p_hwfn, NIG_REG_LLH_FUNC_TAG_EN_RT_OFFSET, 1); STORE_RT_REG(p_hwfn, NIG_REG_LLH_FUNC_TAG_VALUE_RT_OFFSET, @@ -855,7 +848,7 @@ static int qed_hw_init_pf(struct qed_hwfn *p_hwfn, } /* Enable classification by MAC if needed */ - if (hw_mode & (1 << MODE_MF_SI)) { + if (hw_mode & BIT(MODE_MF_SI)) { DP_VERBOSE(p_hwfn, NETIF_MSG_HW, "Configuring TAGMAC_CLS_TYPE\n"); STORE_RT_REG(p_hwfn, @@ -870,7 +863,7 @@ static int qed_hw_init_pf(struct qed_hwfn *p_hwfn, /* Cleanup chip from previous driver if such remains exist */ rc = qed_final_cleanup(p_hwfn, p_ptt, rel_pf_id, false); - if (rc != 0) + if (rc) return rc; /* PF Init sequence */ @@ -949,8 +942,7 @@ static void qed_reset_mb_shadow(struct qed_hwfn *p_hwfn, /* Read shadow of current MFW mailbox */ qed_mcp_read_mb(p_hwfn, p_main_ptt); memcpy(p_hwfn->mcp_info->mfw_mb_shadow, - p_hwfn->mcp_info->mfw_mb_cur, - p_hwfn->mcp_info->mfw_mb_length); + p_hwfn->mcp_info->mfw_mb_cur, p_hwfn->mcp_info->mfw_mb_length); } int qed_hw_init(struct qed_dev *cdev, @@ -970,7 +962,7 @@ int qed_hw_init(struct qed_dev *cdev, if (IS_PF(cdev)) { rc = qed_init_fw_data(cdev, bin_fw_data); - if (rc != 0) + if (rc) return rc; } @@ -987,8 +979,7 @@ int qed_hw_init(struct qed_dev *cdev, qed_calc_hw_mode(p_hwfn); - rc = qed_mcp_load_req(p_hwfn, p_hwfn->p_main_ptt, - &load_code); + rc = qed_mcp_load_req(p_hwfn, p_hwfn->p_main_ptt, &load_code); if (rc) { DP_NOTICE(p_hwfn, "Failed sending LOAD_REQ command\n"); return rc; @@ -1065,9 +1056,8 @@ int qed_hw_init(struct qed_dev *cdev, } #define QED_HW_STOP_RETRY_LIMIT (10) -static inline void qed_hw_timers_stop(struct qed_dev *cdev, - struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt) +static void qed_hw_timers_stop(struct qed_dev *cdev, + struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { int i; @@ -1078,8 +1068,7 @@ static inline void qed_hw_timers_stop(struct qed_dev *cdev, for (i = 0; i < QED_HW_STOP_RETRY_LIMIT; i++) { if ((!qed_rd(p_hwfn, p_ptt, TM_REG_PF_SCAN_ACTIVE_CONN)) && - (!qed_rd(p_hwfn, p_ptt, - TM_REG_PF_SCAN_ACTIVE_TASK))) + (!qed_rd(p_hwfn, p_ptt, TM_REG_PF_SCAN_ACTIVE_TASK))) break; /* Dependent on number of connection/tasks, possibly @@ -1184,8 +1173,7 @@ void qed_hw_stop_fastpath(struct qed_dev *cdev) } DP_VERBOSE(p_hwfn, - NETIF_MSG_IFDOWN, - "Shutting down the fastpath\n"); + NETIF_MSG_IFDOWN, "Shutting down the fastpath\n"); qed_wr(p_hwfn, p_ptt, NIG_REG_RX_LLH_BRB_GATE_DNTFWD_PERPF, 0x1); @@ -1213,14 +1201,13 @@ void qed_hw_start_fastpath(struct qed_hwfn *p_hwfn) NIG_REG_RX_LLH_BRB_GATE_DNTFWD_PERPF, 0x0); } -static int qed_reg_assert(struct qed_hwfn *hwfn, - struct qed_ptt *ptt, u32 reg, - bool expected) +static int qed_reg_assert(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 reg, bool expected) { - u32 assert_val = qed_rd(hwfn, ptt, reg); + u32 assert_val = qed_rd(p_hwfn, p_ptt, reg); if (assert_val != expected) { - DP_NOTICE(hwfn, "Value at address 0x%x != 0x%08x\n", + DP_NOTICE(p_hwfn, "Value at address 0x%x != 0x%08x\n", reg, expected); return -EINVAL; } @@ -1300,8 +1287,7 @@ static void qed_hw_hwfn_prepare(struct qed_hwfn *p_hwfn) /* Clean Previous errors if such exist */ qed_wr(p_hwfn, p_hwfn->p_main_ptt, - PGLUE_B_REG_WAS_ERROR_PF_31_0_CLR, - 1 << p_hwfn->abs_pf_id); + PGLUE_B_REG_WAS_ERROR_PF_31_0_CLR, 1 << p_hwfn->abs_pf_id); /* enable internal target-read */ qed_wr(p_hwfn, p_hwfn->p_main_ptt, @@ -1311,7 +1297,8 @@ static void qed_hw_hwfn_prepare(struct qed_hwfn *p_hwfn) static void get_function_id(struct qed_hwfn *p_hwfn) { /* ME Register */ - p_hwfn->hw_info.opaque_fid = (u16)REG_RD(p_hwfn, PXP_PF_ME_OPAQUE_ADDR); + p_hwfn->hw_info.opaque_fid = (u16) REG_RD(p_hwfn, + PXP_PF_ME_OPAQUE_ADDR); p_hwfn->hw_info.concrete_fid = REG_RD(p_hwfn, PXP_PF_ME_CONCRETE_ADDR); @@ -1411,8 +1398,7 @@ static int qed_hw_get_resc(struct qed_hwfn *p_hwfn) return 0; } -static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt) +static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { u32 nvm_cfg1_offset, mf_mode, addr, generic_cont0, core_cfg; u32 port_cfg_addr, link_temp, nvm_cfg_addr, device_capabilities; @@ -1466,8 +1452,7 @@ static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn, p_hwfn->hw_info.port_mode = QED_PORT_MODE_DE_1X25G; break; default: - DP_NOTICE(p_hwfn, "Unknown port mode in 0x%08x\n", - core_cfg); + DP_NOTICE(p_hwfn, "Unknown port mode in 0x%08x\n", core_cfg); break; } @@ -1511,8 +1496,7 @@ static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn, link->speed.forced_speed = 100000; break; default: - DP_NOTICE(p_hwfn, "Unknown Speed in 0x%08x\n", - link_temp); + DP_NOTICE(p_hwfn, "Unknown Speed in 0x%08x\n", link_temp); } link_temp &= NVM_CFG1_PORT_DRV_FLOW_CONTROL_MASK; @@ -1697,10 +1681,9 @@ static int qed_get_dev_info(struct qed_dev *cdev) u32 tmp; /* Read Vendor Id / Device Id */ - pci_read_config_word(cdev->pdev, PCI_VENDOR_ID, - &cdev->vendor_id); - pci_read_config_word(cdev->pdev, PCI_DEVICE_ID, - &cdev->device_id); + pci_read_config_word(cdev->pdev, PCI_VENDOR_ID, &cdev->vendor_id); + pci_read_config_word(cdev->pdev, PCI_DEVICE_ID, &cdev->device_id); + cdev->chip_num = (u16)qed_rd(p_hwfn, p_hwfn->p_main_ptt, MISCS_REG_CHIP_NUM); cdev->chip_rev = (u16)qed_rd(p_hwfn, p_hwfn->p_main_ptt, @@ -1776,7 +1759,7 @@ static int qed_hw_prepare_single(struct qed_hwfn *p_hwfn, /* First hwfn learns basic information, e.g., number of hwfns */ if (!p_hwfn->my_id) { rc = qed_get_dev_info(p_hwfn->cdev); - if (rc != 0) + if (rc) goto err1; } @@ -2177,8 +2160,7 @@ int qed_fw_l2_queue(struct qed_hwfn *p_hwfn, u16 src_id, u16 *dst_id) return 0; } -int qed_fw_vport(struct qed_hwfn *p_hwfn, - u8 src_id, u8 *dst_id) +int qed_fw_vport(struct qed_hwfn *p_hwfn, u8 src_id, u8 *dst_id) { if (src_id >= RESC_NUM(p_hwfn, QED_VPORT)) { u8 min, max; @@ -2197,8 +2179,7 @@ int qed_fw_vport(struct qed_hwfn *p_hwfn, return 0; } -int qed_fw_rss_eng(struct qed_hwfn *p_hwfn, - u8 src_id, u8 *dst_id) +int qed_fw_rss_eng(struct qed_hwfn *p_hwfn, u8 src_id, u8 *dst_id) { if (src_id >= RESC_NUM(p_hwfn, QED_RSS_ENG)) { u8 min, max; @@ -2380,8 +2361,7 @@ static void qed_disable_wfq_for_all_vports(struct qed_hwfn *p_hwfn, * 3. total_req_min_rate [all vports min rate sum] shouldn't exceed min_pf_rate. */ static int qed_init_wfq_param(struct qed_hwfn *p_hwfn, - u16 vport_id, u32 req_rate, - u32 min_pf_rate) + u16 vport_id, u32 req_rate, u32 min_pf_rate) { u32 total_req_min_rate = 0, total_left_rate = 0, left_rate_per_vp = 0; int non_requested_count = 0, req_count = 0, i, num_vports; @@ -2465,7 +2445,7 @@ static int __qed_configure_vport_wfq(struct qed_hwfn *p_hwfn, rc = qed_init_wfq_param(p_hwfn, vp_id, rate, p_link->min_pf_rate); - if (rc == 0) + if (!rc) qed_configure_wfq_for_all_vports(p_hwfn, p_ptt, p_link->min_pf_rate); else diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.c b/drivers/net/ethernet/qlogic/qed/qed_hw.c index e17885321faf..8ebdc79b3850 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hw.c +++ b/drivers/net/ethernet/qlogic/qed/qed_hw.c @@ -44,8 +44,7 @@ struct qed_ptt_pool { int qed_ptt_pool_alloc(struct qed_hwfn *p_hwfn) { - struct qed_ptt_pool *p_pool = kmalloc(sizeof(*p_pool), - GFP_KERNEL); + struct qed_ptt_pool *p_pool = kmalloc(sizeof(*p_pool), GFP_KERNEL); int i; if (!p_pool) @@ -113,16 +112,14 @@ struct qed_ptt *qed_ptt_acquire(struct qed_hwfn *p_hwfn) return NULL; } -void qed_ptt_release(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt) +void qed_ptt_release(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { spin_lock_bh(&p_hwfn->p_ptt_pool->lock); list_add(&p_ptt->list_entry, &p_hwfn->p_ptt_pool->free_list); spin_unlock_bh(&p_hwfn->p_ptt_pool->lock); } -u32 qed_ptt_get_hw_addr(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt) +u32 qed_ptt_get_hw_addr(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { /* The HW is using DWORDS and we need to translate it to Bytes */ return le32_to_cpu(p_ptt->pxp.offset) << 2; @@ -141,8 +138,7 @@ u32 qed_ptt_get_bar_addr(struct qed_ptt *p_ptt) } void qed_ptt_set_win(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - u32 new_hw_addr) + struct qed_ptt *p_ptt, u32 new_hw_addr) { u32 prev_hw_addr; @@ -166,8 +162,7 @@ void qed_ptt_set_win(struct qed_hwfn *p_hwfn, } static u32 qed_set_ptt(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - u32 hw_addr) + struct qed_ptt *p_ptt, u32 hw_addr) { u32 win_hw_addr = qed_ptt_get_hw_addr(p_hwfn, p_ptt); u32 offset; @@ -224,10 +219,7 @@ u32 qed_rd(struct qed_hwfn *p_hwfn, static void qed_memcpy_hw(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, - void *addr, - u32 hw_addr, - size_t n, - bool to_device) + void *addr, u32 hw_addr, size_t n, bool to_device) { u32 dw_count, *host_addr, hw_offset; size_t quota, done = 0; @@ -259,8 +251,7 @@ static void qed_memcpy_hw(struct qed_hwfn *p_hwfn, } void qed_memcpy_from(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - void *dest, u32 hw_addr, size_t n) + struct qed_ptt *p_ptt, void *dest, u32 hw_addr, size_t n) { DP_VERBOSE(p_hwfn, NETIF_MSG_HW, "hw_addr 0x%x, dest %p hw_addr 0x%x, size %lu\n", @@ -270,8 +261,7 @@ void qed_memcpy_from(struct qed_hwfn *p_hwfn, } void qed_memcpy_to(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - u32 hw_addr, void *src, size_t n) + struct qed_ptt *p_ptt, u32 hw_addr, void *src, size_t n) { DP_VERBOSE(p_hwfn, NETIF_MSG_HW, "hw_addr 0x%x, hw_addr 0x%x, src %p size %lu\n", @@ -280,9 +270,7 @@ void qed_memcpy_to(struct qed_hwfn *p_hwfn, qed_memcpy_hw(p_hwfn, p_ptt, src, hw_addr, n, true); } -void qed_fid_pretend(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - u16 fid) +void qed_fid_pretend(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, u16 fid) { u16 control = 0; @@ -309,8 +297,7 @@ void qed_fid_pretend(struct qed_hwfn *p_hwfn, } void qed_port_pretend(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - u8 port_id) + struct qed_ptt *p_ptt, u8 port_id) { u16 control = 0; @@ -326,8 +313,7 @@ void qed_port_pretend(struct qed_hwfn *p_hwfn, *(u32 *)&p_ptt->pxp.pretend); } -void qed_port_unpretend(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt) +void qed_port_unpretend(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { u16 control = 0; @@ -429,28 +415,27 @@ u32 qed_dmae_idx_to_go_cmd(u8 idx) return DMAE_REG_GO_C0 + (idx << 2); } -static int -qed_dmae_post_command(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt) +static int qed_dmae_post_command(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt) { - struct dmae_cmd *command = p_hwfn->dmae_info.p_dmae_cmd; + struct dmae_cmd *p_command = p_hwfn->dmae_info.p_dmae_cmd; u8 idx_cmd = p_hwfn->dmae_info.channel, i; int qed_status = 0; /* verify address is not NULL */ - if ((((command->dst_addr_lo == 0) && (command->dst_addr_hi == 0)) || - ((command->src_addr_lo == 0) && (command->src_addr_hi == 0)))) { + if ((((!p_command->dst_addr_lo) && (!p_command->dst_addr_hi)) || + ((!p_command->src_addr_lo) && (!p_command->src_addr_hi)))) { DP_NOTICE(p_hwfn, "source or destination address 0 idx_cmd=%d\n" "opcode = [0x%08x,0x%04x] len=0x%x src=0x%x:%x dst=0x%x:%x\n", - idx_cmd, - le32_to_cpu(command->opcode), - le16_to_cpu(command->opcode_b), - le16_to_cpu(command->length_dw), - le32_to_cpu(command->src_addr_hi), - le32_to_cpu(command->src_addr_lo), - le32_to_cpu(command->dst_addr_hi), - le32_to_cpu(command->dst_addr_lo)); + idx_cmd, + le32_to_cpu(p_command->opcode), + le16_to_cpu(p_command->opcode_b), + le16_to_cpu(p_command->length_dw), + le32_to_cpu(p_command->src_addr_hi), + le32_to_cpu(p_command->src_addr_lo), + le32_to_cpu(p_command->dst_addr_hi), + le32_to_cpu(p_command->dst_addr_lo)); return -EINVAL; } @@ -459,13 +444,13 @@ qed_dmae_post_command(struct qed_hwfn *p_hwfn, NETIF_MSG_HW, "Posting DMAE command [idx %d]: opcode = [0x%08x,0x%04x] len=0x%x src=0x%x:%x dst=0x%x:%x\n", idx_cmd, - le32_to_cpu(command->opcode), - le16_to_cpu(command->opcode_b), - le16_to_cpu(command->length_dw), - le32_to_cpu(command->src_addr_hi), - le32_to_cpu(command->src_addr_lo), - le32_to_cpu(command->dst_addr_hi), - le32_to_cpu(command->dst_addr_lo)); + le32_to_cpu(p_command->opcode), + le16_to_cpu(p_command->opcode_b), + le16_to_cpu(p_command->length_dw), + le32_to_cpu(p_command->src_addr_hi), + le32_to_cpu(p_command->src_addr_lo), + le32_to_cpu(p_command->dst_addr_hi), + le32_to_cpu(p_command->dst_addr_lo)); /* Copy the command to DMAE - need to do it before every call * for source/dest address no reset. @@ -475,7 +460,7 @@ qed_dmae_post_command(struct qed_hwfn *p_hwfn, */ for (i = 0; i < DMAE_CMD_SIZE; i++) { u32 data = (i < DMAE_CMD_SIZE_TO_FILL) ? - *(((u32 *)command) + i) : 0; + *(((u32 *)p_command) + i) : 0; qed_wr(p_hwfn, p_ptt, DMAE_REG_CMD_MEM + @@ -483,9 +468,7 @@ qed_dmae_post_command(struct qed_hwfn *p_hwfn, (i * sizeof(u32)), data); } - qed_wr(p_hwfn, p_ptt, - qed_dmae_idx_to_go_cmd(idx_cmd), - DMAE_GO_VALUE); + qed_wr(p_hwfn, p_ptt, qed_dmae_idx_to_go_cmd(idx_cmd), DMAE_GO_VALUE); return qed_status; } @@ -498,9 +481,7 @@ int qed_dmae_info_alloc(struct qed_hwfn *p_hwfn) u32 **p_comp = &p_hwfn->dmae_info.p_completion_word; *p_comp = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, - sizeof(u32), - p_addr, - GFP_KERNEL); + sizeof(u32), p_addr, GFP_KERNEL); if (!*p_comp) { DP_NOTICE(p_hwfn, "Failed to allocate `p_completion_word'\n"); goto err; @@ -543,8 +524,7 @@ void qed_dmae_info_free(struct qed_hwfn *p_hwfn) p_phys = p_hwfn->dmae_info.completion_word_phys_addr; dma_free_coherent(&p_hwfn->cdev->pdev->dev, sizeof(u32), - p_hwfn->dmae_info.p_completion_word, - p_phys); + p_hwfn->dmae_info.p_completion_word, p_phys); p_hwfn->dmae_info.p_completion_word = NULL; } @@ -552,8 +532,7 @@ void qed_dmae_info_free(struct qed_hwfn *p_hwfn) p_phys = p_hwfn->dmae_info.dmae_cmd_phys_addr; dma_free_coherent(&p_hwfn->cdev->pdev->dev, sizeof(struct dmae_cmd), - p_hwfn->dmae_info.p_dmae_cmd, - p_phys); + p_hwfn->dmae_info.p_dmae_cmd, p_phys); p_hwfn->dmae_info.p_dmae_cmd = NULL; } @@ -571,9 +550,7 @@ void qed_dmae_info_free(struct qed_hwfn *p_hwfn) static int qed_dmae_operation_wait(struct qed_hwfn *p_hwfn) { - u32 wait_cnt = 0; - u32 wait_cnt_limit = 10000; - + u32 wait_cnt_limit = 10000, wait_cnt = 0; int qed_status = 0; barrier(); @@ -606,7 +583,7 @@ static int qed_dmae_execute_sub_operation(struct qed_hwfn *p_hwfn, u64 dst_addr, u8 src_type, u8 dst_type, - u32 length) + u32 length_dw) { dma_addr_t phys = p_hwfn->dmae_info.intermediate_buffer_phys_addr; struct dmae_cmd *cmd = p_hwfn->dmae_info.p_dmae_cmd; @@ -624,7 +601,7 @@ static int qed_dmae_execute_sub_operation(struct qed_hwfn *p_hwfn, cmd->src_addr_lo = cpu_to_le32(lower_32_bits(phys)); memcpy(&p_hwfn->dmae_info.p_intermediate_buffer[0], (void *)(uintptr_t)src_addr, - length * sizeof(u32)); + length_dw * sizeof(u32)); break; default: return -EINVAL; @@ -645,7 +622,7 @@ static int qed_dmae_execute_sub_operation(struct qed_hwfn *p_hwfn, return -EINVAL; } - cmd->length_dw = cpu_to_le16((u16)length); + cmd->length_dw = cpu_to_le16((u16)length_dw); qed_dmae_post_command(p_hwfn, p_ptt); @@ -654,16 +631,14 @@ static int qed_dmae_execute_sub_operation(struct qed_hwfn *p_hwfn, if (qed_status) { DP_NOTICE(p_hwfn, "qed_dmae_host2grc: Wait Failed. source_addr 0x%llx, grc_addr 0x%llx, size_in_dwords 0x%x\n", - src_addr, - dst_addr, - length); + src_addr, dst_addr, length_dw); return qed_status; } if (dst_type == QED_DMAE_ADDRESS_HOST_VIRT) memcpy((void *)(uintptr_t)(dst_addr), &p_hwfn->dmae_info.p_intermediate_buffer[0], - length * sizeof(u32)); + length_dw * sizeof(u32)); return 0; } @@ -730,10 +705,7 @@ static int qed_dmae_execute_command(struct qed_hwfn *p_hwfn, if (qed_status) { DP_NOTICE(p_hwfn, "qed_dmae_execute_sub_operation Failed with error 0x%x. source_addr 0x%llx, destination addr 0x%llx, size_in_dwords 0x%x\n", - qed_status, - src_addr, - dst_addr, - length_cur); + qed_status, src_addr, dst_addr, length_cur); break; } } @@ -743,10 +715,7 @@ static int qed_dmae_execute_command(struct qed_hwfn *p_hwfn, int qed_dmae_host2grc(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, - u64 source_addr, - u32 grc_addr, - u32 size_in_dwords, - u32 flags) + u64 source_addr, u32 grc_addr, u32 size_in_dwords, u32 flags) { u32 grc_addr_in_dw = grc_addr / sizeof(u32); struct qed_dmae_params params; @@ -768,9 +737,10 @@ int qed_dmae_host2grc(struct qed_hwfn *p_hwfn, return rc; } -int -qed_dmae_grc2host(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, u32 grc_addr, - dma_addr_t dest_addr, u32 size_in_dwords, u32 flags) +int qed_dmae_grc2host(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 grc_addr, + dma_addr_t dest_addr, u32 size_in_dwords, u32 flags) { u32 grc_addr_in_dw = grc_addr / sizeof(u32); struct qed_dmae_params params; @@ -791,12 +761,11 @@ qed_dmae_grc2host(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, u32 grc_addr, return rc; } -int -qed_dmae_host2host(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - dma_addr_t source_addr, - dma_addr_t dest_addr, - u32 size_in_dwords, struct qed_dmae_params *p_params) +int qed_dmae_host2host(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + dma_addr_t source_addr, + dma_addr_t dest_addr, + u32 size_in_dwords, struct qed_dmae_params *p_params) { int rc; diff --git a/drivers/net/ethernet/qlogic/qed/qed_init_ops.c b/drivers/net/ethernet/qlogic/qed/qed_init_ops.c index 9866a20d2128..57044eeeef24 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_init_ops.c +++ b/drivers/net/ethernet/qlogic/qed/qed_init_ops.c @@ -59,17 +59,14 @@ void qed_init_clear_rt_data(struct qed_hwfn *p_hwfn) p_hwfn->rt_data.b_valid[i] = false; } -void qed_init_store_rt_reg(struct qed_hwfn *p_hwfn, - u32 rt_offset, - u32 val) +void qed_init_store_rt_reg(struct qed_hwfn *p_hwfn, u32 rt_offset, u32 val) { p_hwfn->rt_data.init_val[rt_offset] = val; p_hwfn->rt_data.b_valid[rt_offset] = true; } void qed_init_store_rt_agg(struct qed_hwfn *p_hwfn, - u32 rt_offset, u32 *p_val, - size_t size) + u32 rt_offset, u32 *p_val, size_t size) { size_t i; @@ -81,10 +78,7 @@ void qed_init_store_rt_agg(struct qed_hwfn *p_hwfn, static int qed_init_rt(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, - u32 addr, - u16 rt_offset, - u16 size, - bool b_must_dmae) + u32 addr, u16 rt_offset, u16 size, bool b_must_dmae) { u32 *p_init_val = &p_hwfn->rt_data.init_val[rt_offset]; bool *p_valid = &p_hwfn->rt_data.b_valid[rt_offset]; @@ -102,8 +96,7 @@ static int qed_init_rt(struct qed_hwfn *p_hwfn, * simply write the data instead of using dmae. */ if (!b_must_dmae) { - qed_wr(p_hwfn, p_ptt, addr + (i << 2), - p_init_val[i]); + qed_wr(p_hwfn, p_ptt, addr + (i << 2), p_init_val[i]); continue; } @@ -115,7 +108,7 @@ static int qed_init_rt(struct qed_hwfn *p_hwfn, rc = qed_dmae_host2grc(p_hwfn, p_ptt, (uintptr_t)(p_init_val + i), addr + (i << 2), segment, 0); - if (rc != 0) + if (rc) return rc; /* Jump over the entire segment, including invalid entry */ @@ -182,9 +175,7 @@ static int qed_init_array_dmae(struct qed_hwfn *p_hwfn, static int qed_init_fill_dmae(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, - u32 addr, - u32 fill, - u32 fill_count) + u32 addr, u32 fill, u32 fill_count) { static u32 zero_buffer[DMAE_MAX_RW_SIZE]; @@ -199,15 +190,12 @@ static int qed_init_fill_dmae(struct qed_hwfn *p_hwfn, return qed_dmae_host2grc(p_hwfn, p_ptt, (uintptr_t)(&zero_buffer[0]), - addr, fill_count, - QED_DMAE_FLAG_RW_REPL_SRC); + addr, fill_count, QED_DMAE_FLAG_RW_REPL_SRC); } static void qed_init_fill(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, - u32 addr, - u32 fill, - u32 fill_count) + u32 addr, u32 fill, u32 fill_count) { u32 i; @@ -218,12 +206,12 @@ static void qed_init_fill(struct qed_hwfn *p_hwfn, static int qed_init_cmd_array(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, struct init_write_op *cmd, - bool b_must_dmae, - bool b_can_dmae) + bool b_must_dmae, bool b_can_dmae) { + u32 dmae_array_offset = le32_to_cpu(cmd->args.array_offset); u32 data = le32_to_cpu(cmd->data); u32 addr = GET_FIELD(data, INIT_WRITE_OP_ADDRESS) << 2; - u32 dmae_array_offset = le32_to_cpu(cmd->args.array_offset); + u32 offset, output_len, input_len, max_size; struct qed_dev *cdev = p_hwfn->cdev; union init_array_hdr *hdr; @@ -233,8 +221,7 @@ static int qed_init_cmd_array(struct qed_hwfn *p_hwfn, array_data = cdev->fw_data->arr_data; - hdr = (union init_array_hdr *)(array_data + - dmae_array_offset); + hdr = (union init_array_hdr *)(array_data + dmae_array_offset); data = le32_to_cpu(hdr->raw.data); switch (GET_FIELD(data, INIT_ARRAY_RAW_HDR_TYPE)) { case INIT_ARR_ZIPPED: @@ -290,13 +277,12 @@ static int qed_init_cmd_array(struct qed_hwfn *p_hwfn, /* init_ops write command */ static int qed_init_cmd_wr(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, - struct init_write_op *cmd, - bool b_can_dmae) + struct init_write_op *p_cmd, bool b_can_dmae) { - u32 data = le32_to_cpu(cmd->data); - u32 addr = GET_FIELD(data, INIT_WRITE_OP_ADDRESS) << 2; + u32 data = le32_to_cpu(p_cmd->data); bool b_must_dmae = GET_FIELD(data, INIT_WRITE_OP_WIDE_BUS); - union init_write_args *arg = &cmd->args; + u32 addr = GET_FIELD(data, INIT_WRITE_OP_ADDRESS) << 2; + union init_write_args *arg = &p_cmd->args; int rc = 0; /* Sanitize */ @@ -322,7 +308,7 @@ static int qed_init_cmd_wr(struct qed_hwfn *p_hwfn, le32_to_cpu(arg->zeros_count)); break; case INIT_SRC_ARRAY: - rc = qed_init_cmd_array(p_hwfn, p_ptt, cmd, + rc = qed_init_cmd_array(p_hwfn, p_ptt, p_cmd, b_must_dmae, b_can_dmae); break; case INIT_SRC_RUNTIME: @@ -353,8 +339,7 @@ static inline bool comp_or(u32 val, u32 expected_val) /* init_ops read/poll commands */ static void qed_init_cmd_rd(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - struct init_read_op *cmd) + struct qed_ptt *p_ptt, struct init_read_op *cmd) { bool (*comp_check)(u32 val, u32 expected_val); u32 delay = QED_INIT_POLL_PERIOD_US, val; @@ -412,35 +397,33 @@ static void qed_init_cmd_cb(struct qed_hwfn *p_hwfn, } static u8 qed_init_cmd_mode_match(struct qed_hwfn *p_hwfn, - u16 *offset, - int modes) + u16 *p_offset, int modes) { struct qed_dev *cdev = p_hwfn->cdev; const u8 *modes_tree_buf; u8 arg1, arg2, tree_val; modes_tree_buf = cdev->fw_data->modes_tree_buf; - tree_val = modes_tree_buf[(*offset)++]; + tree_val = modes_tree_buf[(*p_offset)++]; switch (tree_val) { case INIT_MODE_OP_NOT: - return qed_init_cmd_mode_match(p_hwfn, offset, modes) ^ 1; + return qed_init_cmd_mode_match(p_hwfn, p_offset, modes) ^ 1; case INIT_MODE_OP_OR: - arg1 = qed_init_cmd_mode_match(p_hwfn, offset, modes); - arg2 = qed_init_cmd_mode_match(p_hwfn, offset, modes); + arg1 = qed_init_cmd_mode_match(p_hwfn, p_offset, modes); + arg2 = qed_init_cmd_mode_match(p_hwfn, p_offset, modes); return arg1 | arg2; case INIT_MODE_OP_AND: - arg1 = qed_init_cmd_mode_match(p_hwfn, offset, modes); - arg2 = qed_init_cmd_mode_match(p_hwfn, offset, modes); + arg1 = qed_init_cmd_mode_match(p_hwfn, p_offset, modes); + arg2 = qed_init_cmd_mode_match(p_hwfn, p_offset, modes); return arg1 & arg2; default: tree_val -= MAX_INIT_MODE_OPS; - return (modes & (1 << tree_val)) ? 1 : 0; + return (modes & BIT(tree_val)) ? 1 : 0; } } static u32 qed_init_cmd_mode(struct qed_hwfn *p_hwfn, - struct init_if_mode_op *p_cmd, - int modes) + struct init_if_mode_op *p_cmd, int modes) { u16 offset = le16_to_cpu(p_cmd->modes_buf_offset); @@ -453,8 +436,7 @@ static u32 qed_init_cmd_mode(struct qed_hwfn *p_hwfn, static u32 qed_init_cmd_phase(struct qed_hwfn *p_hwfn, struct init_if_phase_op *p_cmd, - u32 phase, - u32 phase_id) + u32 phase, u32 phase_id) { u32 data = le32_to_cpu(p_cmd->phase_data); u32 op_data = le32_to_cpu(p_cmd->op_data); @@ -468,10 +450,7 @@ static u32 qed_init_cmd_phase(struct qed_hwfn *p_hwfn, } int qed_init_run(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - int phase, - int phase_id, - int modes) + struct qed_ptt *p_ptt, int phase, int phase_id, int modes) { struct qed_dev *cdev = p_hwfn->cdev; u32 cmd_num, num_init_ops; diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c index 8fa50fa23c8d..2afa7a09e1a3 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_int.c +++ b/drivers/net/ethernet/qlogic/qed/qed_int.c @@ -1775,10 +1775,9 @@ struct qed_sb_attn_info { }; static inline u16 qed_attn_update_idx(struct qed_hwfn *p_hwfn, - struct qed_sb_attn_info *p_sb_desc) + struct qed_sb_attn_info *p_sb_desc) { - u16 rc = 0; - u16 index; + u16 rc = 0, index; /* Make certain HW write took affect */ mmiowb(); @@ -1802,15 +1801,13 @@ static inline u16 qed_attn_update_idx(struct qed_hwfn *p_hwfn, * @param asserted_bits newly asserted bits * @return int */ -static int qed_int_assertion(struct qed_hwfn *p_hwfn, - u16 asserted_bits) +static int qed_int_assertion(struct qed_hwfn *p_hwfn, u16 asserted_bits) { struct qed_sb_attn_info *sb_attn_sw = p_hwfn->p_sb_attn; u32 igu_mask; /* Mask the source of the attention in the IGU */ - igu_mask = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt, - IGU_REG_ATTENTION_ENABLE); + igu_mask = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt, IGU_REG_ATTENTION_ENABLE); DP_VERBOSE(p_hwfn, NETIF_MSG_INTR, "IGU mask: 0x%08x --> 0x%08x\n", igu_mask, igu_mask & ~(asserted_bits & ATTN_BITS_MASKABLE)); igu_mask &= ~(asserted_bits & ATTN_BITS_MASKABLE); @@ -2041,7 +2038,7 @@ static int qed_int_deassertion(struct qed_hwfn *p_hwfn, struct aeu_invert_reg_bit *p_bit = &p_aeu->bits[j]; if ((p_bit->flags & ATTENTION_PARITY) && - !!(parities & (1 << bit_idx))) + !!(parities & BIT(bit_idx))) qed_int_deassertion_parity(p_hwfn, p_bit, bit_idx); @@ -2114,8 +2111,7 @@ static int qed_int_deassertion(struct qed_hwfn *p_hwfn, ~((u32)deasserted_bits)); /* Unmask deasserted attentions in IGU */ - aeu_mask = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt, - IGU_REG_ATTENTION_ENABLE); + aeu_mask = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt, IGU_REG_ATTENTION_ENABLE); aeu_mask |= (deasserted_bits & ATTN_BITS_MASKABLE); qed_wr(p_hwfn, p_hwfn->p_dpc_ptt, IGU_REG_ATTENTION_ENABLE, aeu_mask); @@ -2160,8 +2156,7 @@ static int qed_int_attentions(struct qed_hwfn *p_hwfn) index, attn_bits, attn_acks, asserted_bits, deasserted_bits, p_sb_attn_sw->known_attn); } else if (asserted_bits == 0x100) { - DP_INFO(p_hwfn, - "MFW indication via attention\n"); + DP_INFO(p_hwfn, "MFW indication via attention\n"); } else { DP_VERBOSE(p_hwfn, NETIF_MSG_INTR, "MFW indication [deassertion]\n"); @@ -2173,18 +2168,14 @@ static int qed_int_attentions(struct qed_hwfn *p_hwfn) return rc; } - if (deasserted_bits) { + if (deasserted_bits) rc = qed_int_deassertion(p_hwfn, deasserted_bits); - if (rc) - return rc; - } return rc; } static void qed_sb_ack_attn(struct qed_hwfn *p_hwfn, - void __iomem *igu_addr, - u32 ack_cons) + void __iomem *igu_addr, u32 ack_cons) { struct igu_prod_cons_update igu_ack = { 0 }; @@ -2242,9 +2233,8 @@ void qed_int_sp_dpc(unsigned long hwfn_cookie) /* Gather Interrupts/Attentions information */ if (!sb_info->sb_virt) { - DP_ERR( - p_hwfn->cdev, - "Interrupt Status block is NULL - cannot check for new interrupts!\n"); + DP_ERR(p_hwfn->cdev, + "Interrupt Status block is NULL - cannot check for new interrupts!\n"); } else { u32 tmp_index = sb_info->sb_ack; @@ -2255,9 +2245,8 @@ void qed_int_sp_dpc(unsigned long hwfn_cookie) } if (!sb_attn || !sb_attn->sb_attn) { - DP_ERR( - p_hwfn->cdev, - "Attentions Status block is NULL - cannot check for new attentions!\n"); + DP_ERR(p_hwfn->cdev, + "Attentions Status block is NULL - cannot check for new attentions!\n"); } else { u16 tmp_index = sb_attn->index; @@ -2313,8 +2302,7 @@ static void qed_int_sb_attn_free(struct qed_hwfn *p_hwfn) if (p_sb->sb_attn) dma_free_coherent(&p_hwfn->cdev->pdev->dev, SB_ATTN_ALIGNED_SIZE(p_hwfn), - p_sb->sb_attn, - p_sb->sb_phys); + p_sb->sb_attn, p_sb->sb_phys); kfree(p_sb); } @@ -2337,8 +2325,7 @@ static void qed_int_sb_attn_setup(struct qed_hwfn *p_hwfn, static void qed_int_sb_attn_init(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, - void *sb_virt_addr, - dma_addr_t sb_phy_addr) + void *sb_virt_addr, dma_addr_t sb_phy_addr) { struct qed_sb_attn_info *sb_info = p_hwfn->p_sb_attn; int i, j, k; @@ -2378,8 +2365,8 @@ static int qed_int_sb_attn_alloc(struct qed_hwfn *p_hwfn, { struct qed_dev *cdev = p_hwfn->cdev; struct qed_sb_attn_info *p_sb; - void *p_virt; dma_addr_t p_phys = 0; + void *p_virt; /* SB struct */ p_sb = kmalloc(sizeof(*p_sb), GFP_KERNEL); @@ -2412,9 +2399,7 @@ static int qed_int_sb_attn_alloc(struct qed_hwfn *p_hwfn, void qed_init_cau_sb_entry(struct qed_hwfn *p_hwfn, struct cau_sb_entry *p_sb_entry, - u8 pf_id, - u16 vf_number, - u8 vf_valid) + u8 pf_id, u16 vf_number, u8 vf_valid) { struct qed_dev *cdev = p_hwfn->cdev; u32 cau_state; @@ -2468,9 +2453,7 @@ void qed_init_cau_sb_entry(struct qed_hwfn *p_hwfn, void qed_int_cau_conf_sb(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, dma_addr_t sb_phys, - u16 igu_sb_id, - u16 vf_number, - u8 vf_valid) + u16 igu_sb_id, u16 vf_number, u8 vf_valid) { struct cau_sb_entry sb_entry; @@ -2514,8 +2497,7 @@ void qed_int_cau_conf_sb(struct qed_hwfn *p_hwfn, timer_res = 2; timeset = (u8)(p_hwfn->cdev->rx_coalesce_usecs >> timer_res); qed_int_cau_conf_pi(p_hwfn, p_ptt, igu_sb_id, RX_PI, - QED_COAL_RX_STATE_MACHINE, - timeset); + QED_COAL_RX_STATE_MACHINE, timeset); if (p_hwfn->cdev->tx_coalesce_usecs <= 0x7F) timer_res = 0; @@ -2541,8 +2523,7 @@ void qed_int_cau_conf_pi(struct qed_hwfn *p_hwfn, u8 timeset) { struct cau_pi_entry pi_entry; - u32 sb_offset; - u32 pi_offset; + u32 sb_offset, pi_offset; if (IS_VF(p_hwfn->cdev)) return; @@ -2569,8 +2550,7 @@ void qed_int_cau_conf_pi(struct qed_hwfn *p_hwfn, } void qed_int_sb_setup(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - struct qed_sb_info *sb_info) + struct qed_ptt *p_ptt, struct qed_sb_info *sb_info) { /* zero status block and ack counter */ sb_info->sb_ack = 0; @@ -2590,8 +2570,7 @@ void qed_int_sb_setup(struct qed_hwfn *p_hwfn, * * @return u16 */ -static u16 qed_get_igu_sb_id(struct qed_hwfn *p_hwfn, - u16 sb_id) +static u16 qed_get_igu_sb_id(struct qed_hwfn *p_hwfn, u16 sb_id) { u16 igu_sb_id; @@ -2612,9 +2591,7 @@ static u16 qed_get_igu_sb_id(struct qed_hwfn *p_hwfn, int qed_int_sb_init(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, struct qed_sb_info *sb_info, - void *sb_virt_addr, - dma_addr_t sb_phy_addr, - u16 sb_id) + void *sb_virt_addr, dma_addr_t sb_phy_addr, u16 sb_id) { sb_info->sb_virt = sb_virt_addr; sb_info->sb_phys = sb_phy_addr; @@ -2650,8 +2627,7 @@ int qed_int_sb_init(struct qed_hwfn *p_hwfn, } int qed_int_sb_release(struct qed_hwfn *p_hwfn, - struct qed_sb_info *sb_info, - u16 sb_id) + struct qed_sb_info *sb_info, u16 sb_id) { if (sb_id == QED_SP_SB_ID) { DP_ERR(p_hwfn, "Do Not free sp sb using this function"); @@ -2685,8 +2661,7 @@ static void qed_int_sp_sb_free(struct qed_hwfn *p_hwfn) kfree(p_sb); } -static int qed_int_sp_sb_alloc(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt) +static int qed_int_sp_sb_alloc(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { struct qed_sb_sp_info *p_sb; dma_addr_t p_phys = 0; @@ -2721,9 +2696,7 @@ static int qed_int_sp_sb_alloc(struct qed_hwfn *p_hwfn, int qed_int_register_cb(struct qed_hwfn *p_hwfn, qed_int_comp_cb_t comp_cb, - void *cookie, - u8 *sb_idx, - __le16 **p_fw_cons) + void *cookie, u8 *sb_idx, __le16 **p_fw_cons) { struct qed_sb_sp_info *p_sp_sb = p_hwfn->p_sp_sb; int rc = -ENOMEM; @@ -2764,8 +2737,7 @@ u16 qed_int_get_sp_sb_id(struct qed_hwfn *p_hwfn) } void qed_int_igu_enable_int(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - enum qed_int_mode int_mode) + struct qed_ptt *p_ptt, enum qed_int_mode int_mode) { u32 igu_pf_conf = IGU_PF_CONF_FUNC_EN | IGU_PF_CONF_ATTN_BIT_EN; @@ -2809,7 +2781,7 @@ int qed_int_igu_enable(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, qed_wr(p_hwfn, p_ptt, MISC_REG_AEU_MASK_ATTN_IGU, 0xff); if ((int_mode != QED_INT_MODE_INTA) || IS_LEAD_HWFN(p_hwfn)) { rc = qed_slowpath_irq_req(p_hwfn); - if (rc != 0) { + if (rc) { DP_NOTICE(p_hwfn, "Slowpath IRQ request failed\n"); return -EINVAL; } @@ -2822,8 +2794,7 @@ int qed_int_igu_enable(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, return rc; } -void qed_int_igu_disable_int(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt) +void qed_int_igu_disable_int(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { p_hwfn->b_int_enabled = 0; @@ -2950,13 +2921,11 @@ void qed_int_igu_init_pure_rt(struct qed_hwfn *p_hwfn, p_hwfn->hw_info.opaque_fid, b_set); } -static u32 qed_int_igu_read_cam_block(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - u16 sb_id) +static u32 qed_int_igu_read_cam_block(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u16 sb_id) { u32 val = qed_rd(p_hwfn, p_ptt, - IGU_REG_MAPPING_MEMORY + - sizeof(u32) * sb_id); + IGU_REG_MAPPING_MEMORY + sizeof(u32) * sb_id); struct qed_igu_block *p_block; p_block = &p_hwfn->hw_info.p_igu_info->igu_map.igu_blocks[sb_id]; @@ -2983,8 +2952,7 @@ out: return val; } -int qed_int_igu_read_cam(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt) +int qed_int_igu_read_cam(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { struct qed_igu_info *p_igu_info; u32 val, min_vf = 0, max_vf = 0; @@ -3104,22 +3072,19 @@ int qed_int_igu_read_cam(struct qed_hwfn *p_hwfn, */ void qed_int_igu_init_rt(struct qed_hwfn *p_hwfn) { - u32 igu_pf_conf = 0; - - igu_pf_conf |= IGU_PF_CONF_FUNC_EN; + u32 igu_pf_conf = IGU_PF_CONF_FUNC_EN; STORE_RT_REG(p_hwfn, IGU_REG_PF_CONFIGURATION_RT_OFFSET, igu_pf_conf); } u64 qed_int_igu_read_sisr_reg(struct qed_hwfn *p_hwfn) { - u64 intr_status = 0; - u32 intr_status_lo = 0; - u32 intr_status_hi = 0; u32 lsb_igu_cmd_addr = IGU_REG_SISR_MDPC_WMASK_LSB_UPPER - IGU_CMD_INT_ACK_BASE; u32 msb_igu_cmd_addr = IGU_REG_SISR_MDPC_WMASK_MSB_UPPER - IGU_CMD_INT_ACK_BASE; + u32 intr_status_hi = 0, intr_status_lo = 0; + u64 intr_status = 0; intr_status_lo = REG_RD(p_hwfn, GTT_BAR0_MAP_REG_IGU_CMD + @@ -3153,8 +3118,7 @@ static void qed_int_sp_dpc_free(struct qed_hwfn *p_hwfn) kfree(p_hwfn->sp_dpc); } -int qed_int_alloc(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt) +int qed_int_alloc(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { int rc = 0; @@ -3183,8 +3147,7 @@ void qed_int_free(struct qed_hwfn *p_hwfn) qed_int_sp_dpc_free(p_hwfn); } -void qed_int_setup(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt) +void qed_int_setup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { qed_int_sb_setup(p_hwfn, p_ptt, &p_hwfn->p_sp_sb->sb_info); qed_int_sb_attn_setup(p_hwfn, p_ptt); diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index 401e738543b5..b5d844568107 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -52,7 +52,7 @@ int qed_sp_eth_vport_start(struct qed_hwfn *p_hwfn, u16 rx_mode = 0; rc = qed_fw_vport(p_hwfn, p_params->vport_id, &abs_vport_id); - if (rc != 0) + if (rc) return rc; memset(&init_data, 0, sizeof(init_data)); @@ -80,8 +80,7 @@ int qed_sp_eth_vport_start(struct qed_hwfn *p_hwfn, p_ramrod->rx_mode.state = cpu_to_le16(rx_mode); /* TPA related fields */ - memset(&p_ramrod->tpa_param, 0, - sizeof(struct eth_vport_tpa_param)); + memset(&p_ramrod->tpa_param, 0, sizeof(struct eth_vport_tpa_param)); p_ramrod->tpa_param.max_buff_num = p_params->max_buffers_per_cqe; @@ -336,7 +335,7 @@ int qed_sp_vport_update(struct qed_hwfn *p_hwfn, } rc = qed_fw_vport(p_hwfn, p_params->vport_id, &abs_vport_id); - if (rc != 0) + if (rc) return rc; memset(&init_data, 0, sizeof(init_data)); @@ -411,7 +410,7 @@ int qed_sp_vport_stop(struct qed_hwfn *p_hwfn, u16 opaque_fid, u8 vport_id) return qed_vf_pf_vport_stop(p_hwfn); rc = qed_fw_vport(p_hwfn, vport_id, &abs_vport_id); - if (rc != 0) + if (rc) return rc; memset(&init_data, 0, sizeof(init_data)); @@ -476,7 +475,7 @@ static int qed_filter_accept_cmd(struct qed_dev *cdev, rc = qed_sp_vport_update(p_hwfn, &vport_update_params, comp_mode, p_comp_data); - if (rc != 0) { + if (rc) { DP_ERR(cdev, "Update rx_mode failed %d\n", rc); return rc; } @@ -511,7 +510,7 @@ static int qed_sp_release_queue_cid( int qed_sp_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn, u16 opaque_fid, u32 cid, - struct qed_queue_start_common_params *params, + struct qed_queue_start_common_params *p_params, u8 stats_id, u16 bd_max_bytes, dma_addr_t bd_chain_phys_addr, @@ -526,23 +525,23 @@ int qed_sp_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn, int rc = -EINVAL; /* Store information for the stop */ - p_rx_cid = &p_hwfn->p_rx_cids[params->queue_id]; - p_rx_cid->cid = cid; - p_rx_cid->opaque_fid = opaque_fid; - p_rx_cid->vport_id = params->vport_id; + p_rx_cid = &p_hwfn->p_rx_cids[p_params->queue_id]; + p_rx_cid->cid = cid; + p_rx_cid->opaque_fid = opaque_fid; + p_rx_cid->vport_id = p_params->vport_id; - rc = qed_fw_vport(p_hwfn, params->vport_id, &abs_vport_id); - if (rc != 0) + rc = qed_fw_vport(p_hwfn, p_params->vport_id, &abs_vport_id); + if (rc) return rc; - rc = qed_fw_l2_queue(p_hwfn, params->queue_id, &abs_rx_q_id); - if (rc != 0) + rc = qed_fw_l2_queue(p_hwfn, p_params->queue_id, &abs_rx_q_id); + if (rc) return rc; DP_VERBOSE(p_hwfn, QED_MSG_SP, "opaque_fid=0x%x, cid=0x%x, rx_qid=0x%x, vport_id=0x%x, sb_id=0x%x\n", - opaque_fid, cid, params->queue_id, params->vport_id, - params->sb); + opaque_fid, + cid, p_params->queue_id, p_params->vport_id, p_params->sb); /* Get SPQ entry */ memset(&init_data, 0, sizeof(init_data)); @@ -558,24 +557,25 @@ int qed_sp_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn, p_ramrod = &p_ent->ramrod.rx_queue_start; - p_ramrod->sb_id = cpu_to_le16(params->sb); - p_ramrod->sb_index = params->sb_idx; - p_ramrod->vport_id = abs_vport_id; - p_ramrod->stats_counter_id = stats_id; - p_ramrod->rx_queue_id = cpu_to_le16(abs_rx_q_id); - p_ramrod->complete_cqe_flg = 0; - p_ramrod->complete_event_flg = 1; + p_ramrod->sb_id = cpu_to_le16(p_params->sb); + p_ramrod->sb_index = p_params->sb_idx; + p_ramrod->vport_id = abs_vport_id; + p_ramrod->stats_counter_id = stats_id; + p_ramrod->rx_queue_id = cpu_to_le16(abs_rx_q_id); + p_ramrod->complete_cqe_flg = 0; + p_ramrod->complete_event_flg = 1; - p_ramrod->bd_max_bytes = cpu_to_le16(bd_max_bytes); + p_ramrod->bd_max_bytes = cpu_to_le16(bd_max_bytes); DMA_REGPAIR_LE(p_ramrod->bd_base, bd_chain_phys_addr); - p_ramrod->num_of_pbl_pages = cpu_to_le16(cqe_pbl_size); + p_ramrod->num_of_pbl_pages = cpu_to_le16(cqe_pbl_size); DMA_REGPAIR_LE(p_ramrod->cqe_pbl_addr, cqe_pbl_addr); - p_ramrod->vf_rx_prod_index = params->vf_qid; - if (params->vf_qid) + p_ramrod->vf_rx_prod_index = p_params->vf_qid; + if (p_params->vf_qid) DP_VERBOSE(p_hwfn, QED_MSG_SP, - "Queue is meant for VF rxq[%04x]\n", params->vf_qid); + "Queue is meant for VF rxq[%04x]\n", + p_params->vf_qid); return qed_spq_post(p_hwfn, p_ent, NULL); } @@ -583,7 +583,7 @@ int qed_sp_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn, static int qed_sp_eth_rx_queue_start(struct qed_hwfn *p_hwfn, u16 opaque_fid, - struct qed_queue_start_common_params *params, + struct qed_queue_start_common_params *p_params, u16 bd_max_bytes, dma_addr_t bd_chain_phys_addr, dma_addr_t cqe_pbl_addr, @@ -597,20 +597,20 @@ qed_sp_eth_rx_queue_start(struct qed_hwfn *p_hwfn, if (IS_VF(p_hwfn->cdev)) { return qed_vf_pf_rxq_start(p_hwfn, - params->queue_id, - params->sb, - params->sb_idx, + p_params->queue_id, + p_params->sb, + (u8)p_params->sb_idx, bd_max_bytes, bd_chain_phys_addr, cqe_pbl_addr, cqe_pbl_size, pp_prod); } - rc = qed_fw_l2_queue(p_hwfn, params->queue_id, &abs_l2_queue); - if (rc != 0) + rc = qed_fw_l2_queue(p_hwfn, p_params->queue_id, &abs_l2_queue); + if (rc) return rc; - rc = qed_fw_vport(p_hwfn, params->vport_id, &abs_stats_id); - if (rc != 0) + rc = qed_fw_vport(p_hwfn, p_params->vport_id, &abs_stats_id); + if (rc) return rc; *pp_prod = (u8 __iomem *)p_hwfn->regview + @@ -622,9 +622,8 @@ qed_sp_eth_rx_queue_start(struct qed_hwfn *p_hwfn, (u32 *)(&init_prod_val)); /* Allocate a CID for the queue */ - p_rx_cid = &p_hwfn->p_rx_cids[params->queue_id]; - rc = qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_ETH, - &p_rx_cid->cid); + p_rx_cid = &p_hwfn->p_rx_cids[p_params->queue_id]; + rc = qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_ETH, &p_rx_cid->cid); if (rc) { DP_NOTICE(p_hwfn, "Failed to acquire cid\n"); return rc; @@ -634,14 +633,14 @@ qed_sp_eth_rx_queue_start(struct qed_hwfn *p_hwfn, rc = qed_sp_eth_rxq_start_ramrod(p_hwfn, opaque_fid, p_rx_cid->cid, - params, + p_params, abs_stats_id, bd_max_bytes, bd_chain_phys_addr, cqe_pbl_addr, cqe_pbl_size); - if (rc != 0) + if (rc) qed_sp_release_queue_cid(p_hwfn, p_rx_cid); return rc; @@ -788,21 +787,20 @@ int qed_sp_eth_txq_start_ramrod(struct qed_hwfn *p_hwfn, if (rc) return rc; - p_ramrod = &p_ent->ramrod.tx_queue_start; - p_ramrod->vport_id = abs_vport_id; + p_ramrod = &p_ent->ramrod.tx_queue_start; + p_ramrod->vport_id = abs_vport_id; - p_ramrod->sb_id = cpu_to_le16(p_params->sb); - p_ramrod->sb_index = p_params->sb_idx; - p_ramrod->stats_counter_id = stats_id; + p_ramrod->sb_id = cpu_to_le16(p_params->sb); + p_ramrod->sb_index = p_params->sb_idx; + p_ramrod->stats_counter_id = stats_id; - p_ramrod->queue_zone_id = cpu_to_le16(abs_tx_q_id); - p_ramrod->pbl_size = cpu_to_le16(pbl_size); + p_ramrod->queue_zone_id = cpu_to_le16(abs_tx_q_id); + + p_ramrod->pbl_size = cpu_to_le16(pbl_size); DMA_REGPAIR_LE(p_ramrod->pbl_base_addr, pbl_addr); - pq_id = qed_get_qm_pq(p_hwfn, - PROTOCOLID_ETH, - p_pq_params); - p_ramrod->qm_pq_id = cpu_to_le16(pq_id); + pq_id = qed_get_qm_pq(p_hwfn, PROTOCOLID_ETH, p_pq_params); + p_ramrod->qm_pq_id = cpu_to_le16(pq_id); return qed_spq_post(p_hwfn, p_ent, NULL); } @@ -836,8 +834,7 @@ qed_sp_eth_tx_queue_start(struct qed_hwfn *p_hwfn, memset(&pq_params, 0, sizeof(pq_params)); /* Allocate a CID for the queue */ - rc = qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_ETH, - &p_tx_cid->cid); + rc = qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_ETH, &p_tx_cid->cid); if (rc) { DP_NOTICE(p_hwfn, "Failed to acquire cid\n"); return rc; @@ -896,8 +893,7 @@ int qed_sp_eth_tx_queue_stop(struct qed_hwfn *p_hwfn, u16 tx_queue_id) return qed_sp_release_queue_cid(p_hwfn, p_tx_cid); } -static enum eth_filter_action -qed_filter_action(enum qed_filter_opcode opcode) +static enum eth_filter_action qed_filter_action(enum qed_filter_opcode opcode) { enum eth_filter_action action = MAX_ETH_FILTER_ACTION; @@ -1033,19 +1029,19 @@ qed_filter_ucast_common(struct qed_hwfn *p_hwfn, p_first_filter->vni = cpu_to_le32(p_filter_cmd->vni); if (p_filter_cmd->opcode == QED_FILTER_MOVE) { - p_second_filter->type = p_first_filter->type; - p_second_filter->mac_msb = p_first_filter->mac_msb; - p_second_filter->mac_mid = p_first_filter->mac_mid; - p_second_filter->mac_lsb = p_first_filter->mac_lsb; - p_second_filter->vlan_id = p_first_filter->vlan_id; - p_second_filter->vni = p_first_filter->vni; + p_second_filter->type = p_first_filter->type; + p_second_filter->mac_msb = p_first_filter->mac_msb; + p_second_filter->mac_mid = p_first_filter->mac_mid; + p_second_filter->mac_lsb = p_first_filter->mac_lsb; + p_second_filter->vlan_id = p_first_filter->vlan_id; + p_second_filter->vni = p_first_filter->vni; p_first_filter->action = ETH_FILTER_ACTION_REMOVE; p_first_filter->vport_id = vport_to_remove_from; - p_second_filter->action = ETH_FILTER_ACTION_ADD; - p_second_filter->vport_id = vport_to_add_to; + p_second_filter->action = ETH_FILTER_ACTION_ADD; + p_second_filter->vport_id = vport_to_add_to; } else if (p_filter_cmd->opcode == QED_FILTER_REPLACE) { p_first_filter->vport_id = vport_to_add_to; memcpy(p_second_filter, p_first_filter, @@ -1086,7 +1082,7 @@ int qed_sp_eth_filter_ucast(struct qed_hwfn *p_hwfn, rc = qed_filter_ucast_common(p_hwfn, opaque_fid, p_filter_cmd, &p_ramrod, &p_ent, comp_mode, p_comp_data); - if (rc != 0) { + if (rc) { DP_ERR(p_hwfn, "Uni. filter command failed %d\n", rc); return rc; } @@ -1094,10 +1090,8 @@ int qed_sp_eth_filter_ucast(struct qed_hwfn *p_hwfn, p_header->assert_on_error = p_filter_cmd->assert_on_error; rc = qed_spq_post(p_hwfn, p_ent, NULL); - if (rc != 0) { - DP_ERR(p_hwfn, - "Unicast filter ADD command failed %d\n", - rc); + if (rc) { + DP_ERR(p_hwfn, "Unicast filter ADD command failed %d\n", rc); return rc; } @@ -1136,15 +1130,10 @@ int qed_sp_eth_filter_ucast(struct qed_hwfn *p_hwfn, * Return: ******************************************************************************/ static u32 qed_calc_crc32c(u8 *crc32_packet, - u32 crc32_length, - u32 crc32_seed, - u8 complement) + u32 crc32_length, u32 crc32_seed, u8 complement) { - u32 byte = 0; - u32 bit = 0; - u8 msb = 0; - u8 current_byte = 0; - u32 crc32_result = crc32_seed; + u32 byte = 0, bit = 0, crc32_result = crc32_seed; + u8 msb = 0, current_byte = 0; if ((!crc32_packet) || (crc32_length == 0) || @@ -1164,9 +1153,7 @@ static u32 qed_calc_crc32c(u8 *crc32_packet, return crc32_result; } -static inline u32 qed_crc32c_le(u32 seed, - u8 *mac, - u32 len) +static u32 qed_crc32c_le(u32 seed, u8 *mac, u32 len) { u32 packet_buf[2] = { 0 }; @@ -1244,11 +1231,11 @@ qed_sp_eth_filter_mcast(struct qed_hwfn *p_hwfn, /* Convert to correct endianity */ for (i = 0; i < ETH_MULTICAST_MAC_BINS_IN_REGS; i++) { + struct vport_update_ramrod_mcast *p_ramrod_bins; u32 *p_bins = (u32 *)bins; - struct vport_update_ramrod_mcast *approx_mcast; - approx_mcast = &p_ramrod->approx_mcast; - approx_mcast->bins[i] = cpu_to_le32(p_bins[i]); + p_ramrod_bins = &p_ramrod->approx_mcast; + p_ramrod_bins->bins[i] = cpu_to_le32(p_bins[i]); } } @@ -1286,8 +1273,7 @@ static int qed_filter_mcast_cmd(struct qed_dev *cdev, rc = qed_sp_eth_filter_mcast(p_hwfn, opaque_fid, p_filter_cmd, - comp_mode, - p_comp_data); + comp_mode, p_comp_data); } return rc; } @@ -1314,9 +1300,8 @@ static int qed_filter_ucast_cmd(struct qed_dev *cdev, rc = qed_sp_eth_filter_ucast(p_hwfn, opaque_fid, p_filter_cmd, - comp_mode, - p_comp_data); - if (rc != 0) + comp_mode, p_comp_data); + if (rc) break; } @@ -1590,8 +1575,7 @@ out: } } -void qed_get_vport_stats(struct qed_dev *cdev, - struct qed_eth_stats *stats) +void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats) { u32 i; @@ -1766,8 +1750,7 @@ static int qed_start_vport(struct qed_dev *cdev, return 0; } -static int qed_stop_vport(struct qed_dev *cdev, - u8 vport_id) +static int qed_stop_vport(struct qed_dev *cdev, u8 vport_id) { int rc, i; @@ -1775,8 +1758,7 @@ static int qed_stop_vport(struct qed_dev *cdev, struct qed_hwfn *p_hwfn = &cdev->hwfns[i]; rc = qed_sp_vport_stop(p_hwfn, - p_hwfn->hw_info.opaque_fid, - vport_id); + p_hwfn->hw_info.opaque_fid, vport_id); if (rc) { DP_ERR(cdev, "Failed to stop VPORT\n"); @@ -1801,10 +1783,8 @@ static int qed_update_vport(struct qed_dev *cdev, /* Translate protocol params into sp params */ sp_params.vport_id = params->vport_id; - sp_params.update_vport_active_rx_flg = - params->update_vport_active_flg; - sp_params.update_vport_active_tx_flg = - params->update_vport_active_flg; + sp_params.update_vport_active_rx_flg = params->update_vport_active_flg; + sp_params.update_vport_active_tx_flg = params->update_vport_active_flg; sp_params.vport_active_rx_flg = params->vport_active_flg; sp_params.vport_active_tx_flg = params->vport_active_flg; sp_params.update_tx_switching_flg = params->update_tx_switching_flg; @@ -1817,8 +1797,7 @@ static int qed_update_vport(struct qed_dev *cdev, * We need to re-fix the rss values per engine for CMT. */ if (cdev->num_hwfns > 1 && params->update_rss_flg) { - struct qed_update_vport_rss_params *rss = - ¶ms->rss_params; + struct qed_update_vport_rss_params *rss = ¶ms->rss_params; int k, max = 0; /* Find largest entry, since it's possible RSS needs to @@ -1893,8 +1872,8 @@ static int qed_start_rxq(struct qed_dev *cdev, u16 cqe_pbl_size, void __iomem **pp_prod) { - int rc, hwfn_index; struct qed_hwfn *p_hwfn; + int rc, hwfn_index; hwfn_index = params->rss_id % cdev->num_hwfns; p_hwfn = &cdev->hwfns[hwfn_index]; @@ -1935,8 +1914,7 @@ static int qed_stop_rxq(struct qed_dev *cdev, rc = qed_sp_eth_rx_queue_stop(p_hwfn, params->rx_queue_id / cdev->num_hwfns, - params->eq_completion_only, - false); + params->eq_completion_only, false); if (rc) { DP_ERR(cdev, "Failed to stop RXQ#%d\n", params->rx_queue_id); return rc; @@ -2047,11 +2025,11 @@ static int qed_configure_filter_rx_mode(struct qed_dev *cdev, memset(&accept_flags, 0, sizeof(accept_flags)); - accept_flags.update_rx_mode_config = 1; - accept_flags.update_tx_mode_config = 1; - accept_flags.rx_accept_filter = QED_ACCEPT_UCAST_MATCHED | - QED_ACCEPT_MCAST_MATCHED | - QED_ACCEPT_BCAST; + accept_flags.update_rx_mode_config = 1; + accept_flags.update_tx_mode_config = 1; + accept_flags.rx_accept_filter = QED_ACCEPT_UCAST_MATCHED | + QED_ACCEPT_MCAST_MATCHED | + QED_ACCEPT_BCAST; accept_flags.tx_accept_filter = QED_ACCEPT_UCAST_MATCHED | QED_ACCEPT_MCAST_MATCHED | QED_ACCEPT_BCAST; @@ -2072,9 +2050,8 @@ static int qed_configure_filter_ucast(struct qed_dev *cdev, struct qed_filter_ucast ucast; if (!params->vlan_valid && !params->mac_valid) { - DP_NOTICE( - cdev, - "Tried configuring a unicast filter, but both MAC and VLAN are not set\n"); + DP_NOTICE(cdev, + "Tried configuring a unicast filter, but both MAC and VLAN are not set\n"); return -EINVAL; } @@ -2135,8 +2112,7 @@ static int qed_configure_filter_mcast(struct qed_dev *cdev, for (i = 0; i < mcast.num_mc_addrs; i++) ether_addr_copy(mcast.mac[i], params->mac[i]); - return qed_filter_mcast_cmd(cdev, &mcast, - QED_SPQ_MODE_CB, NULL); + return qed_filter_mcast_cmd(cdev, &mcast, QED_SPQ_MODE_CB, NULL); } static int qed_configure_filter(struct qed_dev *cdev, @@ -2153,15 +2129,13 @@ static int qed_configure_filter(struct qed_dev *cdev, accept_flags = params->filter.accept_flags; return qed_configure_filter_rx_mode(cdev, accept_flags); default: - DP_NOTICE(cdev, "Unknown filter type %d\n", - (int)params->type); + DP_NOTICE(cdev, "Unknown filter type %d\n", (int)params->type); return -EINVAL; } } static int qed_fp_cqe_completion(struct qed_dev *dev, - u8 rss_id, - struct eth_slow_path_rx_cqe *cqe) + u8 rss_id, struct eth_slow_path_rx_cqe *cqe) { return qed_eth_cqe_completion(&dev->hwfns[rss_id % dev->num_hwfns], cqe); diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index d6e1dc5fac94..f4e816af1783 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -106,8 +106,7 @@ static void qed_free_pci(struct qed_dev *cdev) /* Performs PCI initializations as well as initializing PCI-related parameters * in the device structrue. Returns 0 in case of success. */ -static int qed_init_pci(struct qed_dev *cdev, - struct pci_dev *pdev) +static int qed_init_pci(struct qed_dev *cdev, struct pci_dev *pdev) { u8 rev_id; int rc; @@ -263,8 +262,7 @@ static struct qed_dev *qed_alloc_cdev(struct pci_dev *pdev) } /* Sets the requested power state */ -static int qed_set_power_state(struct qed_dev *cdev, - pci_power_t state) +static int qed_set_power_state(struct qed_dev *cdev, pci_power_t state) { if (!cdev) return -ENODEV; @@ -366,8 +364,8 @@ static int qed_enable_msix(struct qed_dev *cdev, DP_NOTICE(cdev, "Trying to enable MSI-X with less vectors (%d out of %d)\n", cnt, int_params->in.num_vectors); - rc = pci_enable_msix_exact(cdev->pdev, - int_params->msix_table, cnt); + rc = pci_enable_msix_exact(cdev->pdev, int_params->msix_table, + cnt); if (!rc) rc = cnt; } @@ -974,8 +972,7 @@ static u32 qed_sb_init(struct qed_dev *cdev, } static u32 qed_sb_release(struct qed_dev *cdev, - struct qed_sb_info *sb_info, - u16 sb_id) + struct qed_sb_info *sb_info, u16 sb_id) { struct qed_hwfn *p_hwfn; int hwfn_index; diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index ce4b08a5fe99..4a82b99c5a45 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -54,8 +54,7 @@ bool qed_mcp_is_init(struct qed_hwfn *p_hwfn) return true; } -void qed_mcp_cmd_port_init(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt) +void qed_mcp_cmd_port_init(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { u32 addr = SECTION_OFFSIZE_ADDR(p_hwfn->mcp_info->public_base, PUBLIC_PORT); @@ -68,8 +67,7 @@ void qed_mcp_cmd_port_init(struct qed_hwfn *p_hwfn, p_hwfn->mcp_info->port_addr, MFW_PORT(p_hwfn)); } -void qed_mcp_read_mb(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt) +void qed_mcp_read_mb(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { u32 length = MFW_DRV_MSG_MAX_DWORDS(p_hwfn->mcp_info->mfw_mb_length); u32 tmp, i; @@ -99,8 +97,7 @@ int qed_mcp_free(struct qed_hwfn *p_hwfn) return 0; } -static int qed_load_mcp_offsets(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt) +static int qed_load_mcp_offsets(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { struct qed_mcp_info *p_info = p_hwfn->mcp_info; u32 drv_mb_offsize, mfw_mb_offsize; @@ -143,8 +140,7 @@ static int qed_load_mcp_offsets(struct qed_hwfn *p_hwfn, return 0; } -int qed_mcp_cmd_init(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt) +int qed_mcp_cmd_init(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { struct qed_mcp_info *p_info; u32 size; @@ -189,8 +185,7 @@ err: * access is achieved by setting a blocking flag, which will fail other * competing contexts to send their mailboxes. */ -static int qed_mcp_mb_lock(struct qed_hwfn *p_hwfn, - u32 cmd) +static int qed_mcp_mb_lock(struct qed_hwfn *p_hwfn, u32 cmd) { spin_lock_bh(&p_hwfn->mcp_info->lock); @@ -221,15 +216,13 @@ static int qed_mcp_mb_lock(struct qed_hwfn *p_hwfn, return 0; } -static void qed_mcp_mb_unlock(struct qed_hwfn *p_hwfn, - u32 cmd) +static void qed_mcp_mb_unlock(struct qed_hwfn *p_hwfn, u32 cmd) { if (cmd != DRV_MSG_CODE_LOAD_REQ && cmd != DRV_MSG_CODE_UNLOAD_REQ) spin_unlock_bh(&p_hwfn->mcp_info->lock); } -int qed_mcp_reset(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt) +int qed_mcp_reset(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { u32 seq = ++p_hwfn->mcp_info->drv_mb_seq; u8 delay = CHIP_MCP_RESP_ITER_US; @@ -399,8 +392,7 @@ int qed_mcp_cmd(struct qed_hwfn *p_hwfn, } int qed_mcp_load_req(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - u32 *p_load_code) + struct qed_ptt *p_ptt, u32 *p_load_code) { struct qed_dev *cdev = p_hwfn->cdev; struct qed_mcp_mb_params mb_params; @@ -527,8 +519,7 @@ static void qed_mcp_handle_transceiver_change(struct qed_hwfn *p_hwfn, "Received transceiver state update [0x%08x] from mfw [Addr 0x%x]\n", transceiver_state, (u32)(p_hwfn->mcp_info->port_addr + - offsetof(struct public_port, - transceiver_data))); + offsetof(struct public_port, transceiver_data))); transceiver_state = GET_FIELD(transceiver_state, ETH_TRANSCEIVER_STATE); @@ -540,8 +531,7 @@ static void qed_mcp_handle_transceiver_change(struct qed_hwfn *p_hwfn, } static void qed_mcp_handle_link_change(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - bool b_reset) + struct qed_ptt *p_ptt, bool b_reset) { struct qed_mcp_link_state *p_link; u8 max_bw, min_bw; @@ -557,8 +547,7 @@ static void qed_mcp_handle_link_change(struct qed_hwfn *p_hwfn, "Received link update [0x%08x] from mfw [Addr 0x%x]\n", status, (u32)(p_hwfn->mcp_info->port_addr + - offsetof(struct public_port, - link_status))); + offsetof(struct public_port, link_status))); } else { DP_VERBOSE(p_hwfn, NETIF_MSG_LINK, "Resetting link indications\n"); @@ -755,8 +744,7 @@ static void qed_read_pf_bandwidth(struct qed_hwfn *p_hwfn, static u32 qed_mcp_get_shmem_func(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, - struct public_func *p_data, - int pfid) + struct public_func *p_data, int pfid) { u32 addr = SECTION_OFFSIZE_ADDR(p_hwfn->mcp_info->public_base, PUBLIC_FUNC); @@ -766,8 +754,7 @@ static u32 qed_mcp_get_shmem_func(struct qed_hwfn *p_hwfn, memset(p_data, 0, sizeof(*p_data)); - size = min_t(u32, sizeof(*p_data), - QED_SECTION_SIZE(mfw_path_offsize)); + size = min_t(u32, sizeof(*p_data), QED_SECTION_SIZE(mfw_path_offsize)); for (i = 0; i < size / sizeof(u32); i++) ((u32 *)p_data)[i] = qed_rd(p_hwfn, p_ptt, func_addr + (i << 2)); @@ -802,15 +789,13 @@ int qed_hw_init_first_eth(struct qed_hwfn *p_hwfn, return -EINVAL; } -static void qed_mcp_update_bw(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt) +static void qed_mcp_update_bw(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { struct qed_mcp_function_info *p_info; struct public_func shmem_info; u32 resp = 0, param = 0; - qed_mcp_get_shmem_func(p_hwfn, p_ptt, &shmem_info, - MCP_PF_ID(p_hwfn)); + qed_mcp_get_shmem_func(p_hwfn, p_ptt, &shmem_info, MCP_PF_ID(p_hwfn)); qed_read_pf_bandwidth(p_hwfn, &shmem_info); @@ -943,8 +928,7 @@ int qed_mcp_get_mfw_ver(struct qed_hwfn *p_hwfn, return 0; } -int qed_mcp_get_media_type(struct qed_dev *cdev, - u32 *p_media_type) +int qed_mcp_get_media_type(struct qed_dev *cdev, u32 *p_media_type) { struct qed_hwfn *p_hwfn = &cdev->hwfns[0]; struct qed_ptt *p_ptt; @@ -1006,15 +990,13 @@ int qed_mcp_fill_shmem_func_info(struct qed_hwfn *p_hwfn, struct qed_mcp_function_info *info; struct public_func shmem_info; - qed_mcp_get_shmem_func(p_hwfn, p_ptt, &shmem_info, - MCP_PF_ID(p_hwfn)); + qed_mcp_get_shmem_func(p_hwfn, p_ptt, &shmem_info, MCP_PF_ID(p_hwfn)); info = &p_hwfn->mcp_info->func_info; info->pause_on_host = (shmem_info.config & FUNC_MF_CFG_PAUSE_ON_HOST_RING) ? 1 : 0; - if (qed_mcp_get_shmem_proto(p_hwfn, &shmem_info, - &info->protocol)) { + if (qed_mcp_get_shmem_proto(p_hwfn, &shmem_info, &info->protocol)) { DP_ERR(p_hwfn, "Unknown personality %08x\n", (u32)(shmem_info.config & FUNC_MF_CFG_PROTOCOL_MASK)); return -EINVAL; @@ -1075,15 +1057,13 @@ struct qed_mcp_link_capabilities return &p_hwfn->mcp_info->link_capabilities; } -int qed_mcp_drain(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt) +int qed_mcp_drain(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { u32 resp = 0, param = 0; int rc; rc = qed_mcp_cmd(p_hwfn, p_ptt, - DRV_MSG_CODE_NIG_DRAIN, 1000, - &resp, ¶m); + DRV_MSG_CODE_NIG_DRAIN, 1000, &resp, ¶m); /* Wait for the drain to complete before returning */ msleep(1020); @@ -1092,8 +1072,7 @@ int qed_mcp_drain(struct qed_hwfn *p_hwfn, } int qed_mcp_get_flash_size(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - u32 *p_flash_size) + struct qed_ptt *p_ptt, u32 *p_flash_size) { u32 flash_size; @@ -1171,8 +1150,8 @@ qed_mcp_send_drv_version(struct qed_hwfn *p_hwfn, return rc; } -int qed_mcp_set_led(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, - enum qed_led_mode mode) +int qed_mcp_set_led(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, enum qed_led_mode mode) { u32 resp = 0, param = 0, drv_mb_param; int rc; diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c index a52f3fc051f5..2888eb0628f8 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c @@ -25,9 +25,7 @@ int qed_sp_init_request(struct qed_hwfn *p_hwfn, struct qed_spq_entry **pp_ent, - u8 cmd, - u8 protocol, - struct qed_sp_init_data *p_data) + u8 cmd, u8 protocol, struct qed_sp_init_data *p_data) { u32 opaque_cid = p_data->opaque_fid << 16 | p_data->cid; struct qed_spq_entry *p_ent = NULL; @@ -38,7 +36,7 @@ int qed_sp_init_request(struct qed_hwfn *p_hwfn, rc = qed_spq_get_entry(p_hwfn, pp_ent); - if (rc != 0) + if (rc) return rc; p_ent = *pp_ent; @@ -321,8 +319,7 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn, rc = qed_sp_init_request(p_hwfn, &p_ent, COMMON_RAMROD_PF_START, - PROTOCOLID_COMMON, - &init_data); + PROTOCOLID_COMMON, &init_data); if (rc) return rc; @@ -356,8 +353,7 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn, DMA_REGPAIR_LE(p_ramrod->consolid_q_pbl_addr, p_hwfn->p_consq->chain.pbl.p_phys_table); - qed_tunn_set_pf_start_params(p_hwfn, p_tunn, - &p_ramrod->tunnel_config); + qed_tunn_set_pf_start_params(p_hwfn, p_tunn, &p_ramrod->tunnel_config); if (IS_MF_SI(p_hwfn)) p_ramrod->allow_npar_tx_switching = allow_npar_tx_switch; @@ -389,8 +385,7 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn, DP_VERBOSE(p_hwfn, QED_MSG_SPQ, "Setting event_ring_sb [id %04x index %02x], outer_tag [%d]\n", - sb, sb_index, - p_ramrod->outer_tag); + sb, sb_index, p_ramrod->outer_tag); rc = qed_spq_post(p_hwfn, p_ent, NULL); diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c index d73456eab1d7..5e68a33b2097 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_spq.c +++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c @@ -41,8 +41,7 @@ ***************************************************************************/ static void qed_spq_blocking_cb(struct qed_hwfn *p_hwfn, void *cookie, - union event_ring_data *data, - u8 fw_return_code) + union event_ring_data *data, u8 fw_return_code) { struct qed_spq_comp_done *comp_done; @@ -109,9 +108,8 @@ static int qed_spq_block(struct qed_hwfn *p_hwfn, /*************************************************************************** * SPQ entries inner API ***************************************************************************/ -static int -qed_spq_fill_entry(struct qed_hwfn *p_hwfn, - struct qed_spq_entry *p_ent) +static int qed_spq_fill_entry(struct qed_hwfn *p_hwfn, + struct qed_spq_entry *p_ent) { p_ent->flags = 0; @@ -189,8 +187,7 @@ static void qed_spq_hw_initialize(struct qed_hwfn *p_hwfn, } static int qed_spq_hw_post(struct qed_hwfn *p_hwfn, - struct qed_spq *p_spq, - struct qed_spq_entry *p_ent) + struct qed_spq *p_spq, struct qed_spq_entry *p_ent) { struct qed_chain *p_chain = &p_hwfn->p_spq->chain; u16 echo = qed_chain_get_prod_idx(p_chain); @@ -255,8 +252,7 @@ qed_async_event_completion(struct qed_hwfn *p_hwfn, /*************************************************************************** * EQ API ***************************************************************************/ -void qed_eq_prod_update(struct qed_hwfn *p_hwfn, - u16 prod) +void qed_eq_prod_update(struct qed_hwfn *p_hwfn, u16 prod) { u32 addr = GTT_BAR0_MAP_REG_USDM_RAM + USTORM_EQE_CONS_OFFSET(p_hwfn->rel_pf_id); @@ -267,9 +263,7 @@ void qed_eq_prod_update(struct qed_hwfn *p_hwfn, mmiowb(); } -int qed_eq_completion(struct qed_hwfn *p_hwfn, - void *cookie) - +int qed_eq_completion(struct qed_hwfn *p_hwfn, void *cookie) { struct qed_eq *p_eq = cookie; struct qed_chain *p_chain = &p_eq->chain; @@ -323,8 +317,7 @@ int qed_eq_completion(struct qed_hwfn *p_hwfn, return rc; } -struct qed_eq *qed_eq_alloc(struct qed_hwfn *p_hwfn, - u16 num_elem) +struct qed_eq *qed_eq_alloc(struct qed_hwfn *p_hwfn, u16 num_elem) { struct qed_eq *p_eq; @@ -348,11 +341,8 @@ struct qed_eq *qed_eq_alloc(struct qed_hwfn *p_hwfn, } /* register EQ completion on the SP SB */ - qed_int_register_cb(p_hwfn, - qed_eq_completion, - p_eq, - &p_eq->eq_sb_index, - &p_eq->p_fw_cons); + qed_int_register_cb(p_hwfn, qed_eq_completion, + p_eq, &p_eq->eq_sb_index, &p_eq->p_fw_cons); return p_eq; @@ -361,14 +351,12 @@ eq_allocate_fail: return NULL; } -void qed_eq_setup(struct qed_hwfn *p_hwfn, - struct qed_eq *p_eq) +void qed_eq_setup(struct qed_hwfn *p_hwfn, struct qed_eq *p_eq) { qed_chain_reset(&p_eq->chain); } -void qed_eq_free(struct qed_hwfn *p_hwfn, - struct qed_eq *p_eq) +void qed_eq_free(struct qed_hwfn *p_hwfn, struct qed_eq *p_eq) { if (!p_eq) return; @@ -379,10 +367,9 @@ void qed_eq_free(struct qed_hwfn *p_hwfn, /*************************************************************************** * CQE API - manipulate EQ functionality ***************************************************************************/ -static int qed_cqe_completion( - struct qed_hwfn *p_hwfn, - struct eth_slow_path_rx_cqe *cqe, - enum protocol_type protocol) +static int qed_cqe_completion(struct qed_hwfn *p_hwfn, + struct eth_slow_path_rx_cqe *cqe, + enum protocol_type protocol) { if (IS_VF(p_hwfn->cdev)) return 0; @@ -463,8 +450,7 @@ int qed_spq_alloc(struct qed_hwfn *p_hwfn) u32 capacity; /* SPQ struct */ - p_spq = - kzalloc(sizeof(struct qed_spq), GFP_KERNEL); + p_spq = kzalloc(sizeof(struct qed_spq), GFP_KERNEL); if (!p_spq) { DP_NOTICE(p_hwfn, "Failed to allocate `struct qed_spq'\n"); return -ENOMEM; @@ -525,9 +511,7 @@ void qed_spq_free(struct qed_hwfn *p_hwfn) kfree(p_spq); } -int -qed_spq_get_entry(struct qed_hwfn *p_hwfn, - struct qed_spq_entry **pp_ent) +int qed_spq_get_entry(struct qed_hwfn *p_hwfn, struct qed_spq_entry **pp_ent) { struct qed_spq *p_spq = p_hwfn->p_spq; struct qed_spq_entry *p_ent = NULL; @@ -538,14 +522,15 @@ qed_spq_get_entry(struct qed_hwfn *p_hwfn, if (list_empty(&p_spq->free_pool)) { p_ent = kzalloc(sizeof(*p_ent), GFP_ATOMIC); if (!p_ent) { + DP_NOTICE(p_hwfn, + "Failed to allocate an SPQ entry for a pending ramrod\n"); rc = -ENOMEM; goto out_unlock; } p_ent->queue = &p_spq->unlimited_pending; } else { p_ent = list_first_entry(&p_spq->free_pool, - struct qed_spq_entry, - list); + struct qed_spq_entry, list); list_del(&p_ent->list); p_ent->queue = &p_spq->pending; } @@ -564,8 +549,7 @@ static void __qed_spq_return_entry(struct qed_hwfn *p_hwfn, list_add_tail(&p_ent->list, &p_hwfn->p_spq->free_pool); } -void qed_spq_return_entry(struct qed_hwfn *p_hwfn, - struct qed_spq_entry *p_ent) +void qed_spq_return_entry(struct qed_hwfn *p_hwfn, struct qed_spq_entry *p_ent) { spin_lock_bh(&p_hwfn->p_spq->lock); __qed_spq_return_entry(p_hwfn, p_ent); @@ -586,10 +570,9 @@ void qed_spq_return_entry(struct qed_hwfn *p_hwfn, * * @return int */ -static int -qed_spq_add_entry(struct qed_hwfn *p_hwfn, - struct qed_spq_entry *p_ent, - enum spq_priority priority) +static int qed_spq_add_entry(struct qed_hwfn *p_hwfn, + struct qed_spq_entry *p_ent, + enum spq_priority priority) { struct qed_spq *p_spq = p_hwfn->p_spq; @@ -604,8 +587,7 @@ qed_spq_add_entry(struct qed_hwfn *p_hwfn, struct qed_spq_entry *p_en2; p_en2 = list_first_entry(&p_spq->free_pool, - struct qed_spq_entry, - list); + struct qed_spq_entry, list); list_del(&p_en2->list); /* Copy the ring element physical pointer to the new @@ -655,8 +637,7 @@ u32 qed_spq_get_cid(struct qed_hwfn *p_hwfn) * Posting new Ramrods ***************************************************************************/ static int qed_spq_post_list(struct qed_hwfn *p_hwfn, - struct list_head *head, - u32 keep_reserve) + struct list_head *head, u32 keep_reserve) { struct qed_spq *p_spq = p_hwfn->p_spq; int rc; @@ -690,8 +671,7 @@ static int qed_spq_pend_post(struct qed_hwfn *p_hwfn) break; p_ent = list_first_entry(&p_spq->unlimited_pending, - struct qed_spq_entry, - list); + struct qed_spq_entry, list); if (!p_ent) return -EINVAL; @@ -705,8 +685,7 @@ static int qed_spq_pend_post(struct qed_hwfn *p_hwfn) } int qed_spq_post(struct qed_hwfn *p_hwfn, - struct qed_spq_entry *p_ent, - u8 *fw_return_code) + struct qed_spq_entry *p_ent, u8 *fw_return_code) { int rc = 0; struct qed_spq *p_spq = p_hwfn ? p_hwfn->p_spq : NULL; @@ -803,8 +782,7 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn, return -EINVAL; spin_lock_bh(&p_spq->lock); - list_for_each_entry_safe(p_ent, tmp, &p_spq->completion_pending, - list) { + list_for_each_entry_safe(p_ent, tmp, &p_spq->completion_pending, list) { if (p_ent->elem.hdr.echo == echo) { u16 pos = le16_to_cpu(echo) % SPQ_RING_SIZE; @@ -901,14 +879,12 @@ consq_allocate_fail: return NULL; } -void qed_consq_setup(struct qed_hwfn *p_hwfn, - struct qed_consq *p_consq) +void qed_consq_setup(struct qed_hwfn *p_hwfn, struct qed_consq *p_consq) { qed_chain_reset(&p_consq->chain); } -void qed_consq_free(struct qed_hwfn *p_hwfn, - struct qed_consq *p_consq) +void qed_consq_free(struct qed_hwfn *p_hwfn, struct qed_consq *p_consq) { if (!p_consq) return; diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c index 15399da268d9..51e4c906833f 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c @@ -699,7 +699,7 @@ static void qed_iov_config_perm_table(struct qed_hwfn *p_hwfn, &qzone_id); reg_addr = PSWHST_REG_ZONE_PERMISSION_TABLE + qzone_id * 4; - val = enable ? (vf->abs_vf_id | (1 << 8)) : 0; + val = enable ? (vf->abs_vf_id | BIT(8)) : 0; qed_wr(p_hwfn, p_ptt, reg_addr, val); } } @@ -1090,13 +1090,13 @@ static u16 qed_iov_prep_vp_update_resp_tlvs(struct qed_hwfn *p_hwfn, /* Prepare response for all extended tlvs if they are found by PF */ for (i = 0; i < QED_IOV_VP_UPDATE_MAX; i++) { - if (!(tlvs_mask & (1 << i))) + if (!(tlvs_mask & BIT(i))) continue; resp = qed_add_tlv(p_hwfn, &p_mbx->offset, qed_iov_vport_to_tlv(p_hwfn, i), size); - if (tlvs_accepted & (1 << i)) + if (tlvs_accepted & BIT(i)) resp->hdr.status = status; else resp->hdr.status = PFVF_STATUS_NOT_SUPPORTED; @@ -1334,8 +1334,7 @@ static void qed_iov_vf_mbx_acquire(struct qed_hwfn *p_hwfn, pfdev_info->fw_minor = FW_MINOR_VERSION; pfdev_info->fw_rev = FW_REVISION_VERSION; pfdev_info->fw_eng = FW_ENGINEERING_VERSION; - pfdev_info->minor_fp_hsi = min_t(u8, - ETH_HSI_VER_MINOR, + pfdev_info->minor_fp_hsi = min_t(u8, ETH_HSI_VER_MINOR, req->vfdev_info.eth_fp_hsi_minor); pfdev_info->os_type = VFPF_ACQUIRE_OS_LINUX; qed_mcp_get_mfw_ver(p_hwfn, p_ptt, &pfdev_info->mfw_ver, NULL); @@ -1438,14 +1437,11 @@ static int qed_iov_reconfigure_unicast_vlan(struct qed_hwfn *p_hwfn, filter.type = QED_FILTER_VLAN; filter.vlan = p_vf->shadow_config.vlans[i].vid; - DP_VERBOSE(p_hwfn, - QED_MSG_IOV, + DP_VERBOSE(p_hwfn, QED_MSG_IOV, "Reconfiguring VLAN [0x%04x] for VF [%04x]\n", filter.vlan, p_vf->relative_vf_id); - rc = qed_sp_eth_filter_ucast(p_hwfn, - p_vf->opaque_fid, - &filter, - QED_SPQ_MODE_CB, NULL); + rc = qed_sp_eth_filter_ucast(p_hwfn, p_vf->opaque_fid, + &filter, QED_SPQ_MODE_CB, NULL); if (rc) { DP_NOTICE(p_hwfn, "Failed to configure VLAN [%04x] to VF [%04x]\n", @@ -1463,7 +1459,7 @@ qed_iov_reconfigure_unicast_shadow(struct qed_hwfn *p_hwfn, { int rc = 0; - if ((events & (1 << VLAN_ADDR_FORCED)) && + if ((events & BIT(VLAN_ADDR_FORCED)) && !(p_vf->configured_features & (1 << VLAN_ADDR_FORCED))) rc = qed_iov_reconfigure_unicast_vlan(p_hwfn, p_vf); @@ -1479,7 +1475,7 @@ static int qed_iov_configure_vport_forced(struct qed_hwfn *p_hwfn, if (!p_vf->vport_instance) return -EINVAL; - if (events & (1 << MAC_ADDR_FORCED)) { + if (events & BIT(MAC_ADDR_FORCED)) { /* Since there's no way [currently] of removing the MAC, * we can always assume this means we need to force it. */ @@ -1502,7 +1498,7 @@ static int qed_iov_configure_vport_forced(struct qed_hwfn *p_hwfn, p_vf->configured_features |= 1 << MAC_ADDR_FORCED; } - if (events & (1 << VLAN_ADDR_FORCED)) { + if (events & BIT(VLAN_ADDR_FORCED)) { struct qed_sp_vport_update_params vport_update; u8 removal; int i; @@ -1572,7 +1568,7 @@ static int qed_iov_configure_vport_forced(struct qed_hwfn *p_hwfn, if (filter.vlan) p_vf->configured_features |= 1 << VLAN_ADDR_FORCED; else - p_vf->configured_features &= ~(1 << VLAN_ADDR_FORCED); + p_vf->configured_features &= ~BIT(VLAN_ADDR_FORCED); } /* If forced features are terminated, we need to configure the shadow @@ -1619,8 +1615,7 @@ static void qed_iov_vf_mbx_start_vport(struct qed_hwfn *p_hwfn, qed_int_cau_conf_sb(p_hwfn, p_ptt, start->sb_addr[sb_id], - vf->igu_sbs[sb_id], - vf->abs_vf_id, 1); + vf->igu_sbs[sb_id], vf->abs_vf_id, 1); } qed_iov_enable_vf_traffic(p_hwfn, p_ptt, vf); @@ -1632,7 +1627,7 @@ static void qed_iov_vf_mbx_start_vport(struct qed_hwfn *p_hwfn, * vfs that would still be fine, since they passed '0' as padding]. */ p_bitmap = &vf_info->bulletin.p_virt->valid_bitmap; - if (!(*p_bitmap & (1 << VFPF_BULLETIN_UNTAGGED_DEFAULT_FORCED))) { + if (!(*p_bitmap & BIT(VFPF_BULLETIN_UNTAGGED_DEFAULT_FORCED))) { u8 vf_req = start->only_untagged; vf_info->bulletin.p_virt->default_only_untagged = vf_req; @@ -1652,7 +1647,7 @@ static void qed_iov_vf_mbx_start_vport(struct qed_hwfn *p_hwfn, params.mtu = vf->mtu; rc = qed_sp_eth_vport_start(p_hwfn, ¶ms); - if (rc != 0) { + if (rc) { DP_ERR(p_hwfn, "qed_iov_vf_mbx_start_vport returned error %d\n", rc); status = PFVF_STATUS_FAILURE; @@ -1679,7 +1674,7 @@ static void qed_iov_vf_mbx_stop_vport(struct qed_hwfn *p_hwfn, vf->spoof_chk = false; rc = qed_sp_vport_stop(p_hwfn, vf->opaque_fid, vf->vport_id); - if (rc != 0) { + if (rc) { DP_ERR(p_hwfn, "qed_iov_vf_mbx_stop_vport returned error %d\n", rc); status = PFVF_STATUS_FAILURE; @@ -2045,7 +2040,7 @@ qed_iov_vp_update_vlan_param(struct qed_hwfn *p_hwfn, p_vf->shadow_config.inner_vlan_removal = p_vlan_tlv->remove_vlan; /* Ignore the VF request if we're forcing a vlan */ - if (!(p_vf->configured_features & (1 << VLAN_ADDR_FORCED))) { + if (!(p_vf->configured_features & BIT(VLAN_ADDR_FORCED))) { p_data->update_inner_vlan_removal_flg = 1; p_data->inner_vlan_removal_flg = p_vlan_tlv->remove_vlan; } @@ -2340,7 +2335,7 @@ static int qed_iov_vf_update_vlan_shadow(struct qed_hwfn *p_hwfn, /* In forced mode, we're willing to remove entries - but we don't add * new ones. */ - if (p_vf->bulletin.p_virt->valid_bitmap & (1 << VLAN_ADDR_FORCED)) + if (p_vf->bulletin.p_virt->valid_bitmap & BIT(VLAN_ADDR_FORCED)) return 0; if (p_params->opcode == QED_FILTER_ADD || @@ -2374,7 +2369,7 @@ static int qed_iov_vf_update_mac_shadow(struct qed_hwfn *p_hwfn, int i; /* If we're in forced-mode, we don't allow any change */ - if (p_vf->bulletin.p_virt->valid_bitmap & (1 << MAC_ADDR_FORCED)) + if (p_vf->bulletin.p_virt->valid_bitmap & BIT(MAC_ADDR_FORCED)) return 0; /* First remove entries and then add new ones */ @@ -2509,7 +2504,7 @@ static void qed_iov_vf_mbx_ucast_filter(struct qed_hwfn *p_hwfn, } /* Determine if the unicast filtering is acceptible by PF */ - if ((p_bulletin->valid_bitmap & (1 << VLAN_ADDR_FORCED)) && + if ((p_bulletin->valid_bitmap & BIT(VLAN_ADDR_FORCED)) && (params.type == QED_FILTER_VLAN || params.type == QED_FILTER_MAC_VLAN)) { /* Once VLAN is forced or PVID is set, do not allow @@ -2521,7 +2516,7 @@ static void qed_iov_vf_mbx_ucast_filter(struct qed_hwfn *p_hwfn, goto out; } - if ((p_bulletin->valid_bitmap & (1 << MAC_ADDR_FORCED)) && + if ((p_bulletin->valid_bitmap & BIT(MAC_ADDR_FORCED)) && (params.type == QED_FILTER_MAC || params.type == QED_FILTER_MAC_VLAN)) { if (!ether_addr_equal(p_bulletin->mac, params.mac) || @@ -2749,7 +2744,7 @@ cleanup: /* Mark VF for ack and clean pending state */ if (p_vf->state == VF_RESET) p_vf->state = VF_STOPPED; - ack_vfs[vfid / 32] |= (1 << (vfid % 32)); + ack_vfs[vfid / 32] |= BIT((vfid % 32)); p_hwfn->pf_iov_info->pending_flr[rel_vf_id / 64] &= ~(1ULL << (rel_vf_id % 64)); p_hwfn->pf_iov_info->pending_events[rel_vf_id / 64] &= @@ -2805,7 +2800,7 @@ int qed_iov_mark_vf_flr(struct qed_hwfn *p_hwfn, u32 *p_disabled_vfs) continue; vfid = p_vf->abs_vf_id; - if ((1 << (vfid % 32)) & p_disabled_vfs[vfid / 32]) { + if (BIT((vfid % 32)) & p_disabled_vfs[vfid / 32]) { u64 *p_flr = p_hwfn->pf_iov_info->pending_flr; u16 rel_vf_id = p_vf->relative_vf_id; @@ -3064,8 +3059,7 @@ static void qed_iov_bulletin_set_forced_mac(struct qed_hwfn *p_hwfn, vf_info->bulletin.p_virt->valid_bitmap |= feature; /* Forced MAC will disable MAC_ADDR */ - vf_info->bulletin.p_virt->valid_bitmap &= - ~(1 << VFPF_BULLETIN_MAC_ADDR); + vf_info->bulletin.p_virt->valid_bitmap &= ~BIT(VFPF_BULLETIN_MAC_ADDR); qed_iov_configure_vport_forced(p_hwfn, vf_info, feature); } @@ -3163,7 +3157,7 @@ static u8 *qed_iov_bulletin_get_forced_mac(struct qed_hwfn *p_hwfn, if (!p_vf || !p_vf->bulletin.p_virt) return NULL; - if (!(p_vf->bulletin.p_virt->valid_bitmap & (1 << MAC_ADDR_FORCED))) + if (!(p_vf->bulletin.p_virt->valid_bitmap & BIT(MAC_ADDR_FORCED))) return NULL; return p_vf->bulletin.p_virt->mac; @@ -3177,7 +3171,7 @@ u16 qed_iov_bulletin_get_forced_vlan(struct qed_hwfn *p_hwfn, u16 rel_vf_id) if (!p_vf || !p_vf->bulletin.p_virt) return 0; - if (!(p_vf->bulletin.p_virt->valid_bitmap & (1 << VLAN_ADDR_FORCED))) + if (!(p_vf->bulletin.p_virt->valid_bitmap & BIT(VLAN_ADDR_FORCED))) return 0; return p_vf->bulletin.p_virt->pvid; diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index 0e0acfb5c1ed..427e043a033f 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -440,8 +440,7 @@ static u32 qede_get_msglevel(struct net_device *ndev) { struct qede_dev *edev = netdev_priv(ndev); - return ((u32)edev->dp_level << QED_LOG_LEVEL_SHIFT) | - edev->dp_module; + return ((u32)edev->dp_level << QED_LOG_LEVEL_SHIFT) | edev->dp_module; } static void qede_set_msglevel(struct net_device *ndev, u32 level) @@ -465,8 +464,7 @@ static int qede_nway_reset(struct net_device *dev) struct qed_link_params link_params; if (!edev->ops || !edev->ops->common->can_link_change(edev->cdev)) { - DP_INFO(edev, - "Link settings are not allowed to be changed\n"); + DP_INFO(edev, "Link settings are not allowed to be changed\n"); return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index e4bd02e46e57..f4230d4a4fbd 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -270,8 +270,7 @@ module_exit(qede_cleanup); /* Unmap the data and free skb */ static int qede_free_tx_pkt(struct qede_dev *edev, - struct qede_tx_queue *txq, - int *len) + struct qede_tx_queue *txq, int *len) { u16 idx = txq->sw_tx_cons & NUM_TX_BDS_MAX; struct sk_buff *skb = txq->sw_tx_ring[idx].skb; @@ -329,8 +328,7 @@ static int qede_free_tx_pkt(struct qede_dev *edev, static void qede_free_failed_tx_pkt(struct qede_dev *edev, struct qede_tx_queue *txq, struct eth_tx_1st_bd *first_bd, - int nbd, - bool data_split) + int nbd, bool data_split) { u16 idx = txq->sw_tx_prod & NUM_TX_BDS_MAX; struct sk_buff *skb = txq->sw_tx_ring[idx].skb; @@ -339,8 +337,7 @@ static void qede_free_failed_tx_pkt(struct qede_dev *edev, /* Return prod to its position before this skb was handled */ qed_chain_set_prod(&txq->tx_pbl, - le16_to_cpu(txq->tx_db.data.bd_prod), - first_bd); + le16_to_cpu(txq->tx_db.data.bd_prod), first_bd); first_bd = (struct eth_tx_1st_bd *)qed_chain_produce(&txq->tx_pbl); @@ -366,8 +363,7 @@ static void qede_free_failed_tx_pkt(struct qede_dev *edev, /* Return again prod to its position before this skb was handled */ qed_chain_set_prod(&txq->tx_pbl, - le16_to_cpu(txq->tx_db.data.bd_prod), - first_bd); + le16_to_cpu(txq->tx_db.data.bd_prod), first_bd); /* Free skb */ dev_kfree_skb_any(skb); @@ -376,8 +372,7 @@ static void qede_free_failed_tx_pkt(struct qede_dev *edev, } static u32 qede_xmit_type(struct qede_dev *edev, - struct sk_buff *skb, - int *ipv6_ext) + struct sk_buff *skb, int *ipv6_ext) { u32 rc = XMIT_L4_CSUM; __be16 l3_proto; @@ -434,15 +429,13 @@ static void qede_set_params_for_ipv6_ext(struct sk_buff *skb, } static int map_frag_to_bd(struct qede_dev *edev, - skb_frag_t *frag, - struct eth_tx_bd *bd) + skb_frag_t *frag, struct eth_tx_bd *bd) { dma_addr_t mapping; /* Map skb non-linear frag data for DMA */ mapping = skb_frag_dma_map(&edev->pdev->dev, frag, 0, - skb_frag_size(frag), - DMA_TO_DEVICE); + skb_frag_size(frag), DMA_TO_DEVICE); if (unlikely(dma_mapping_error(&edev->pdev->dev, mapping))) { DP_NOTICE(edev, "Unable to map frag - dropping packet\n"); return -ENOMEM; @@ -504,9 +497,8 @@ static inline void qede_update_tx_producer(struct qede_tx_queue *txq) } /* Main transmit function */ -static -netdev_tx_t qede_start_xmit(struct sk_buff *skb, - struct net_device *ndev) +static netdev_tx_t qede_start_xmit(struct sk_buff *skb, + struct net_device *ndev) { struct qede_dev *edev = netdev_priv(ndev); struct netdev_queue *netdev_txq; @@ -530,8 +522,7 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb, txq = QEDE_TX_QUEUE(edev, txq_index); netdev_txq = netdev_get_tx_queue(ndev, txq_index); - WARN_ON(qed_chain_get_elem_left(&txq->tx_pbl) < - (MAX_SKB_FRAGS + 1)); + WARN_ON(qed_chain_get_elem_left(&txq->tx_pbl) < (MAX_SKB_FRAGS + 1)); xmit_type = qede_xmit_type(edev, skb, &ipv6_ext); @@ -761,8 +752,7 @@ int qede_txq_has_work(struct qede_tx_queue *txq) return hw_bd_cons != qed_chain_get_cons_idx(&txq->tx_pbl); } -static int qede_tx_int(struct qede_dev *edev, - struct qede_tx_queue *txq) +static int qede_tx_int(struct qede_dev *edev, struct qede_tx_queue *txq) { struct netdev_queue *netdev_txq; u16 hw_bd_cons; @@ -960,8 +950,7 @@ static inline void qede_update_rx_prod(struct qede_dev *edev, static u32 qede_get_rxhash(struct qede_dev *edev, u8 bitfields, - __le32 rss_hash, - enum pkt_hash_types *rxhash_type) + __le32 rss_hash, enum pkt_hash_types *rxhash_type) { enum rss_hash_type htype; @@ -990,12 +979,10 @@ static void qede_set_skb_csum(struct sk_buff *skb, u8 csum_flag) static inline void qede_skb_receive(struct qede_dev *edev, struct qede_fastpath *fp, - struct sk_buff *skb, - u16 vlan_tag) + struct sk_buff *skb, u16 vlan_tag) { if (vlan_tag) - __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), - vlan_tag); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); napi_gro_receive(&fp->napi, skb); } @@ -1018,8 +1005,7 @@ static void qede_set_gro_params(struct qede_dev *edev, static int qede_fill_frag_skb(struct qede_dev *edev, struct qede_rx_queue *rxq, - u8 tpa_agg_index, - u16 len_on_bd) + u8 tpa_agg_index, u16 len_on_bd) { struct sw_rx_data *current_bd = &rxq->sw_rx_ring[rxq->sw_rx_cons & NUM_RX_BDS_MAX]; @@ -1575,8 +1561,7 @@ alloc_skb: skb->protocol = eth_type_trans(skb, edev->ndev); rx_hash = qede_get_rxhash(edev, fp_cqe->bitfields, - fp_cqe->rss_hash, - &rxhash_type); + fp_cqe->rss_hash, &rxhash_type); skb_set_hash(skb, rx_hash, rxhash_type); @@ -1787,9 +1772,9 @@ void qede_fill_by_demand_stats(struct qede_dev *edev) edev->stats.tx_mac_ctrl_frames = stats.tx_mac_ctrl_frames; } -static struct rtnl_link_stats64 *qede_get_stats64( - struct net_device *dev, - struct rtnl_link_stats64 *stats) +static +struct rtnl_link_stats64 *qede_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats) { struct qede_dev *edev = netdev_priv(dev); @@ -2103,8 +2088,7 @@ static void qede_vlan_mark_nonconfigured(struct qede_dev *edev) } DP_VERBOSE(edev, NETIF_MSG_IFDOWN, - "marked vlan %d as non-configured\n", - vlan->vid); + "marked vlan %d as non-configured\n", vlan->vid); } edev->accept_any_vlan = false; @@ -2237,15 +2221,13 @@ static const struct net_device_ops qede_netdev_ops = { static struct qede_dev *qede_alloc_etherdev(struct qed_dev *cdev, struct pci_dev *pdev, struct qed_dev_eth_info *info, - u32 dp_module, - u8 dp_level) + u32 dp_module, u8 dp_level) { struct net_device *ndev; struct qede_dev *edev; ndev = alloc_etherdev_mqs(sizeof(*edev), - info->num_queues, - info->num_queues); + info->num_queues, info->num_queues); if (!ndev) { pr_err("etherdev allocation failed\n"); return NULL; @@ -2453,7 +2435,7 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level, bool is_vf, enum qede_probe_mode mode) { struct qed_probe_params probe_params; - struct qed_slowpath_params params; + struct qed_slowpath_params sp_params; struct qed_dev_eth_info dev_info; struct qede_dev *edev; struct qed_dev *cdev; @@ -2476,14 +2458,14 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level, qede_update_pf_params(cdev); /* Start the Slowpath-process */ - memset(¶ms, 0, sizeof(struct qed_slowpath_params)); - params.int_mode = QED_INT_MODE_MSIX; - params.drv_major = QEDE_MAJOR_VERSION; - params.drv_minor = QEDE_MINOR_VERSION; - params.drv_rev = QEDE_REVISION_VERSION; - params.drv_eng = QEDE_ENGINEERING_VERSION; - strlcpy(params.name, "qede LAN", QED_DRV_VER_STR_SIZE); - rc = qed_ops->common->slowpath_start(cdev, ¶ms); + memset(&sp_params, 0, sizeof(sp_params)); + sp_params.int_mode = QED_INT_MODE_MSIX; + sp_params.drv_major = QEDE_MAJOR_VERSION; + sp_params.drv_minor = QEDE_MINOR_VERSION; + sp_params.drv_rev = QEDE_REVISION_VERSION; + sp_params.drv_eng = QEDE_ENGINEERING_VERSION; + strlcpy(sp_params.name, "qede LAN", QED_DRV_VER_STR_SIZE); + rc = qed_ops->common->slowpath_start(cdev, &sp_params); if (rc) { pr_notice("Cannot start slowpath\n"); goto err1; @@ -2634,16 +2616,14 @@ static void qede_free_mem_sb(struct qede_dev *edev, /* This function allocates fast-path status block memory */ static int qede_alloc_mem_sb(struct qede_dev *edev, - struct qed_sb_info *sb_info, - u16 sb_id) + struct qed_sb_info *sb_info, u16 sb_id) { struct status_block *sb_virt; dma_addr_t sb_phys; int rc; sb_virt = dma_alloc_coherent(&edev->pdev->dev, - sizeof(*sb_virt), - &sb_phys, GFP_KERNEL); + sizeof(*sb_virt), &sb_phys, GFP_KERNEL); if (!sb_virt) { DP_ERR(edev, "Status block allocation failed\n"); return -ENOMEM; @@ -2675,16 +2655,15 @@ static void qede_free_rx_buffers(struct qede_dev *edev, data = rx_buf->data; dma_unmap_page(&edev->pdev->dev, - rx_buf->mapping, - PAGE_SIZE, DMA_FROM_DEVICE); + rx_buf->mapping, PAGE_SIZE, DMA_FROM_DEVICE); rx_buf->data = NULL; __free_page(data); } } -static void qede_free_sge_mem(struct qede_dev *edev, - struct qede_rx_queue *rxq) { +static void qede_free_sge_mem(struct qede_dev *edev, struct qede_rx_queue *rxq) +{ int i; if (edev->gro_disable) @@ -2703,8 +2682,7 @@ static void qede_free_sge_mem(struct qede_dev *edev, } } -static void qede_free_mem_rxq(struct qede_dev *edev, - struct qede_rx_queue *rxq) +static void qede_free_mem_rxq(struct qede_dev *edev, struct qede_rx_queue *rxq) { qede_free_sge_mem(edev, rxq); @@ -2763,8 +2741,7 @@ static int qede_alloc_rx_buffer(struct qede_dev *edev, return 0; } -static int qede_alloc_sge_mem(struct qede_dev *edev, - struct qede_rx_queue *rxq) +static int qede_alloc_sge_mem(struct qede_dev *edev, struct qede_rx_queue *rxq) { dma_addr_t mapping; int i; @@ -2811,15 +2788,14 @@ err: } /* This function allocates all memory needed per Rx queue */ -static int qede_alloc_mem_rxq(struct qede_dev *edev, - struct qede_rx_queue *rxq) +static int qede_alloc_mem_rxq(struct qede_dev *edev, struct qede_rx_queue *rxq) { int i, rc, size; rxq->num_rx_buffers = edev->q_num_rx_buffers; - rxq->rx_buf_size = NET_IP_ALIGN + ETH_OVERHEAD + - edev->ndev->mtu; + rxq->rx_buf_size = NET_IP_ALIGN + ETH_OVERHEAD + edev->ndev->mtu; + if (rxq->rx_buf_size > PAGE_SIZE) rxq->rx_buf_size = PAGE_SIZE; @@ -2873,8 +2849,7 @@ err: return rc; } -static void qede_free_mem_txq(struct qede_dev *edev, - struct qede_tx_queue *txq) +static void qede_free_mem_txq(struct qede_dev *edev, struct qede_tx_queue *txq) { /* Free the parallel SW ring */ kfree(txq->sw_tx_ring); @@ -2884,8 +2859,7 @@ static void qede_free_mem_txq(struct qede_dev *edev, } /* This function allocates all memory needed per Tx queue */ -static int qede_alloc_mem_txq(struct qede_dev *edev, - struct qede_tx_queue *txq) +static int qede_alloc_mem_txq(struct qede_dev *edev, struct qede_tx_queue *txq) { int size, rc; union eth_tx_bd_types *p_virt; @@ -2917,8 +2891,7 @@ err: } /* This function frees all memory of a single fp */ -static void qede_free_mem_fp(struct qede_dev *edev, - struct qede_fastpath *fp) +static void qede_free_mem_fp(struct qede_dev *edev, struct qede_fastpath *fp) { int tc; @@ -2933,8 +2906,7 @@ static void qede_free_mem_fp(struct qede_dev *edev, /* This function allocates all memory needed for a single fp (i.e. an entity * which contains status block, one rx queue and multiple per-TC tx queues. */ -static int qede_alloc_mem_fp(struct qede_dev *edev, - struct qede_fastpath *fp) +static int qede_alloc_mem_fp(struct qede_dev *edev, struct qede_fastpath *fp) { int rc, tc; @@ -3146,8 +3118,7 @@ static int qede_setup_irqs(struct qede_dev *edev) } static int qede_drain_txq(struct qede_dev *edev, - struct qede_tx_queue *txq, - bool allow_drain) + struct qede_tx_queue *txq, bool allow_drain) { int rc, cnt = 1000; From 83aeb9339f4859c587d0ad3d80d225b520db047e Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Mon, 15 Aug 2016 10:42:44 +0300 Subject: [PATCH 0229/1715] qed*: Trivial modifications Change qed* code in trivial manner; This isn't necessarily semantic-only, but the end result is the same, i.e., no change should occur from user perspective. Changes include: - Using temporary variables to better fit 80-character restrictions. - Removal of unused variables & code with no effect. [plus some additional minor modifications]. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_dev.c | 11 +++---- .../net/ethernet/qlogic/qed/qed_init_ops.c | 14 ++++----- drivers/net/ethernet/qlogic/qed/qed_int.c | 11 ++----- drivers/net/ethernet/qlogic/qed/qed_l2.c | 31 +++++++++---------- drivers/net/ethernet/qlogic/qed/qed_mcp.c | 4 +-- drivers/net/ethernet/qlogic/qede/qede_main.c | 3 -- 6 files changed, 28 insertions(+), 46 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index 6d105c8d3bbd..1371ea07f876 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -540,8 +540,7 @@ int qed_resc_alloc(struct qed_dev *cdev) cdev->reset_stats = kzalloc(sizeof(*cdev->reset_stats), GFP_KERNEL); if (!cdev->reset_stats) { DP_NOTICE(cdev, "Failed to allocate reset statistics\n"); - rc = -ENOMEM; - goto alloc_err; + goto alloc_no_mem; } return 0; @@ -1463,11 +1462,11 @@ static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) link_temp = qed_rd(p_hwfn, p_ptt, port_cfg_addr + offsetof(struct nvm_cfg1_port, speed_cap_mask)); - link->speed.advertised_speeds = - link_temp & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_MASK; + link_temp &= NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_MASK; + link->speed.advertised_speeds = link_temp; - p_hwfn->mcp_info->link_capabilities.speed_capabilities = - link->speed.advertised_speeds; + link_temp = link->speed.advertised_speeds; + p_hwfn->mcp_info->link_capabilities.speed_capabilities = link_temp; link_temp = qed_rd(p_hwfn, p_ptt, port_cfg_addr + diff --git a/drivers/net/ethernet/qlogic/qed/qed_init_ops.c b/drivers/net/ethernet/qlogic/qed/qed_init_ops.c index 57044eeeef24..b7a4b27ebdbd 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_init_ops.c +++ b/drivers/net/ethernet/qlogic/qed/qed_init_ops.c @@ -295,17 +295,15 @@ static int qed_init_cmd_wr(struct qed_hwfn *p_hwfn, switch (GET_FIELD(data, INIT_WRITE_OP_SOURCE)) { case INIT_SRC_INLINE: - qed_wr(p_hwfn, p_ptt, addr, - le32_to_cpu(arg->inline_val)); + data = le32_to_cpu(p_cmd->args.inline_val); + qed_wr(p_hwfn, p_ptt, addr, data); break; case INIT_SRC_ZEROS: - if (b_must_dmae || - (b_can_dmae && (le32_to_cpu(arg->zeros_count) >= 64))) - rc = qed_init_fill_dmae(p_hwfn, p_ptt, addr, 0, - le32_to_cpu(arg->zeros_count)); + data = le32_to_cpu(p_cmd->args.zeros_count); + if (b_must_dmae || (b_can_dmae && (data >= 64))) + rc = qed_init_fill_dmae(p_hwfn, p_ptt, addr, 0, data); else - qed_init_fill(p_hwfn, p_ptt, addr, 0, - le32_to_cpu(arg->zeros_count)); + qed_init_fill(p_hwfn, p_ptt, addr, 0, data); break; case INIT_SRC_ARRAY: rc = qed_init_cmd_array(p_hwfn, p_ptt, p_cmd, diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c index 2afa7a09e1a3..a701b7640e7c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_int.c +++ b/drivers/net/ethernet/qlogic/qed/qed_int.c @@ -2413,12 +2413,6 @@ void qed_init_cau_sb_entry(struct qed_hwfn *p_hwfn, SET_FIELD(p_sb_entry->params, CAU_SB_ENTRY_SB_TIMESET0, 0x7F); SET_FIELD(p_sb_entry->params, CAU_SB_ENTRY_SB_TIMESET1, 0x7F); - /* setting the time resultion to a fixed value ( = 1) */ - SET_FIELD(p_sb_entry->params, CAU_SB_ENTRY_TIMER_RES0, - QED_CAU_DEF_RX_TIMER_RES); - SET_FIELD(p_sb_entry->params, CAU_SB_ENTRY_TIMER_RES1, - QED_CAU_DEF_TX_TIMER_RES); - cau_state = CAU_HC_DISABLE_STATE; if (cdev->int_coalescing_mode == QED_COAL_MODE_ENABLE) { @@ -3133,10 +3127,9 @@ int qed_int_alloc(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) return rc; } rc = qed_int_sb_attn_alloc(p_hwfn, p_ptt); - if (rc) { + if (rc) DP_ERR(p_hwfn->cdev, "Failed to allocate sb attn mem\n"); - return rc; - } + return rc; } diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index b5d844568107..c823c46fd979 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -305,14 +305,14 @@ qed_sp_update_mcast_bin(struct qed_hwfn *p_hwfn, memset(&p_ramrod->approx_mcast.bins, 0, sizeof(p_ramrod->approx_mcast.bins)); - if (p_params->update_approx_mcast_flg) { - p_ramrod->common.update_approx_mcast_flg = 1; - for (i = 0; i < ETH_MULTICAST_MAC_BINS_IN_REGS; i++) { - u32 *p_bins = (u32 *)p_params->bins; - __le32 val = cpu_to_le32(p_bins[i]); + if (!p_params->update_approx_mcast_flg) + return; - p_ramrod->approx_mcast.bins[i] = val; - } + p_ramrod->common.update_approx_mcast_flg = 1; + for (i = 0; i < ETH_MULTICAST_MAC_BINS_IN_REGS; i++) { + u32 *p_bins = (u32 *)p_params->bins; + + p_ramrod->approx_mcast.bins[i] = cpu_to_le32(p_bins[i]); } } @@ -360,8 +360,8 @@ int qed_sp_vport_update(struct qed_hwfn *p_hwfn, p_cmn->tx_active_flg = p_params->vport_active_tx_flg; p_cmn->update_tx_active_flg = p_params->update_vport_active_tx_flg; p_cmn->accept_any_vlan = p_params->accept_any_vlan; - p_cmn->update_accept_any_vlan_flg = - p_params->update_accept_any_vlan_flg; + val = p_params->update_accept_any_vlan_flg; + p_cmn->update_accept_any_vlan_flg = val; p_cmn->inner_vlan_removal_en = p_params->inner_vlan_removal_flg; val = p_params->update_inner_vlan_removal_flg; @@ -1183,17 +1183,14 @@ qed_sp_eth_filter_mcast(struct qed_hwfn *p_hwfn, u8 abs_vport_id = 0; int rc, i; - if (p_filter_cmd->opcode == QED_FILTER_ADD) { + if (p_filter_cmd->opcode == QED_FILTER_ADD) rc = qed_fw_vport(p_hwfn, p_filter_cmd->vport_to_add_to, &abs_vport_id); - if (rc) - return rc; - } else { + else rc = qed_fw_vport(p_hwfn, p_filter_cmd->vport_to_remove_from, &abs_vport_id); - if (rc) - return rc; - } + if (rc) + return rc; /* Get SPQ entry */ memset(&init_data, 0, sizeof(init_data)); @@ -1840,8 +1837,8 @@ static int qed_update_vport(struct qed_dev *cdev, QED_RSS_IND_TABLE_SIZE * sizeof(u16)); memcpy(sp_rss_params.rss_key, params->rss_params.rss_key, QED_RSS_KEY_SIZE * sizeof(u32)); + sp_params.rss_params = &sp_rss_params; } - sp_params.rss_params = &sp_rss_params; for_each_hwfn(cdev, i) { struct qed_hwfn *p_hwfn = &cdev->hwfns[i]; diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index 4a82b99c5a45..0a676db02c8a 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -161,9 +161,7 @@ int qed_mcp_cmd_init(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) size = MFW_DRV_MSG_MAX_DWORDS(p_info->mfw_mb_length) * sizeof(u32); p_info->mfw_mb_cur = kzalloc(size, GFP_KERNEL); - p_info->mfw_mb_shadow = - kzalloc(sizeof(u32) * MFW_DRV_MSG_MAX_DWORDS( - p_info->mfw_mb_length), GFP_KERNEL); + p_info->mfw_mb_shadow = kzalloc(size, GFP_KERNEL); if (!p_info->mfw_mb_shadow || !p_info->mfw_mb_addr) goto err; diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index f4230d4a4fbd..3a99eaa9aa84 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -2704,9 +2704,6 @@ static int qede_alloc_rx_buffer(struct qede_dev *edev, struct eth_rx_bd *rx_bd; dma_addr_t mapping; struct page *data; - u16 rx_buf_size; - - rx_buf_size = rxq->rx_buf_size; data = alloc_pages(GFP_ATOMIC, 0); if (unlikely(!data)) { From 525ef5c07f187bf0918fdf3bbc76ad18ce1d1cf9 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Mon, 15 Aug 2016 10:42:45 +0300 Subject: [PATCH 0230/1715] qed*: Add and modify some prints This patch touches various prints in the driver - it reduces the verbosity of some prints [which were previously logged by default] while adding several new debug prints and modifying others. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_dev.c | 10 +++++++--- drivers/net/ethernet/qlogic/qed/qed_int.c | 8 ++++++-- drivers/net/ethernet/qlogic/qed/qed_main.c | 21 ++++++++++++++------ drivers/net/ethernet/qlogic/qed/qed_mcp.c | 7 ++++--- drivers/net/ethernet/qlogic/qed/qed_spq.c | 11 ++++++++-- drivers/net/ethernet/qlogic/qede/qede_main.c | 20 +++++++++++-------- 6 files changed, 53 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index 1371ea07f876..8117ddff501b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -1206,7 +1206,7 @@ static int qed_reg_assert(struct qed_hwfn *p_hwfn, u32 assert_val = qed_rd(p_hwfn, p_ptt, reg); if (assert_val != expected) { - DP_NOTICE(p_hwfn, "Value at address 0x%x != 0x%08x\n", + DP_NOTICE(p_hwfn, "Value at address 0x%08x != 0x%08x\n", reg, expected); return -EINVAL; } @@ -1306,6 +1306,10 @@ static void get_function_id(struct qed_hwfn *p_hwfn) PXP_CONCRETE_FID_PFID); p_hwfn->port_id = GET_FIELD(p_hwfn->hw_info.concrete_fid, PXP_CONCRETE_FID_PORT); + + DP_VERBOSE(p_hwfn, NETIF_MSG_PROBE, + "Read ME register: Concrete 0x%08x Opaque 0x%04x\n", + p_hwfn->hw_info.concrete_fid, p_hwfn->hw_info.opaque_fid); } static void qed_hw_set_feat(struct qed_hwfn *p_hwfn) @@ -1605,10 +1609,10 @@ static void qed_get_num_funcs(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) DP_VERBOSE(p_hwfn, NETIF_MSG_PROBE, - "PF [rel_id %d, abs_id %d] within the %d enabled functions on the engine\n", + "PF [rel_id %d, abs_id %d] occupies index %d within the %d enabled functions on the engine\n", p_hwfn->rel_pf_id, p_hwfn->abs_pf_id, - p_hwfn->num_funcs_on_engine); + p_hwfn->enabled_func_idx, p_hwfn->num_funcs_on_engine); } static int diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c index a701b7640e7c..61ec973a06c7 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_int.c +++ b/drivers/net/ethernet/qlogic/qed/qed_int.c @@ -2576,8 +2576,12 @@ static u16 qed_get_igu_sb_id(struct qed_hwfn *p_hwfn, u16 sb_id) else igu_sb_id = qed_vf_get_igu_sb_id(p_hwfn, sb_id); - DP_VERBOSE(p_hwfn, NETIF_MSG_INTR, "SB [%s] index is 0x%04x\n", - (sb_id == QED_SP_SB_ID) ? "DSB" : "non-DSB", igu_sb_id); + if (sb_id == QED_SP_SB_ID) + DP_VERBOSE(p_hwfn, NETIF_MSG_INTR, + "Slowpath SB index in IGU is 0x%04x\n", igu_sb_id); + else + DP_VERBOSE(p_hwfn, NETIF_MSG_INTR, + "SB [%04x] <--> IGU SB [%04x]\n", sb_id, igu_sb_id); return igu_sb_id; } diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index f4e816af1783..54976cc2747d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -51,8 +51,6 @@ MODULE_FIRMWARE(QED_FW_FILE_NAME); static int __init qed_init(void) { - pr_notice("qed_init called\n"); - pr_info("%s", version); return 0; @@ -437,6 +435,11 @@ static int qed_set_int_mode(struct qed_dev *cdev, bool force_mode) } out: + if (!rc) + DP_INFO(cdev, "Using %s interrupts\n", + int_params->out.int_mode == QED_INT_MODE_INTA ? + "INTa" : int_params->out.int_mode == QED_INT_MODE_MSI ? + "MSI" : "MSIX"); cdev->int_coalescing_mode = QED_COAL_MODE_ENABLE; return rc; @@ -512,19 +515,18 @@ static irqreturn_t qed_single_int(int irq, void *dev_instance) int qed_slowpath_irq_req(struct qed_hwfn *hwfn) { struct qed_dev *cdev = hwfn->cdev; + u32 int_mode; int rc = 0; u8 id; - if (cdev->int_params.out.int_mode == QED_INT_MODE_MSIX) { + int_mode = cdev->int_params.out.int_mode; + if (int_mode == QED_INT_MODE_MSIX) { id = hwfn->my_id; snprintf(hwfn->name, NAME_SIZE, "sp-%d-%02x:%02x.%02x", id, cdev->pdev->bus->number, PCI_SLOT(cdev->pdev->devfn), hwfn->abs_pf_id); rc = request_irq(cdev->int_params.msix_table[id].vector, qed_msix_sp_int, 0, hwfn->name, hwfn->sp_dpc); - if (!rc) - DP_VERBOSE(hwfn, (NETIF_MSG_INTR | QED_MSG_SP), - "Requested slowpath MSI-X\n"); } else { unsigned long flags = 0; @@ -539,6 +541,13 @@ int qed_slowpath_irq_req(struct qed_hwfn *hwfn) flags, cdev->name, cdev); } + if (rc) + DP_NOTICE(cdev, "request_irq failed, rc = %d\n", rc); + else + DP_VERBOSE(hwfn, (NETIF_MSG_INTR | QED_MSG_SP), + "Requested slowpath %s\n", + (int_mode == QED_INT_MODE_MSIX) ? "MSI-X" : "IRQ"); + return rc; } diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index 0a676db02c8a..88b448bb2949 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -317,7 +317,8 @@ static int qed_do_mcp_cmd(struct qed_hwfn *p_hwfn, *o_mcp_param = DRV_MB_RD(p_hwfn, p_ptt, fw_mb_param); } else { /* FW BUG! */ - DP_ERR(p_hwfn, "MFW failed to respond!\n"); + DP_ERR(p_hwfn, "MFW failed to respond [cmd 0x%x param 0x%x]\n", + cmd, param); *o_mcp_resp = 0; rc = -EAGAIN; } @@ -333,7 +334,7 @@ static int qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, /* MCP not initialized */ if (!qed_mcp_is_init(p_hwfn)) { - DP_NOTICE(p_hwfn, "MFW is not initialized !\n"); + DP_NOTICE(p_hwfn, "MFW is not initialized!\n"); return -EBUSY; } @@ -935,7 +936,7 @@ int qed_mcp_get_media_type(struct qed_dev *cdev, u32 *p_media_type) return -EINVAL; if (!qed_mcp_is_init(p_hwfn)) { - DP_NOTICE(p_hwfn, "MFW is not initialized !\n"); + DP_NOTICE(p_hwfn, "MFW is not initialized!\n"); return -EBUSY; } diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c index 5e68a33b2097..0265a32c8681 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_spq.c +++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c @@ -824,15 +824,22 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn, if (!found) { DP_NOTICE(p_hwfn, - "Failed to find an entry this EQE completes\n"); + "Failed to find an entry this EQE [echo %04x] completes\n", + le16_to_cpu(echo)); return -EEXIST; } - DP_VERBOSE(p_hwfn, QED_MSG_SPQ, "Complete: func %p cookie %p)\n", + DP_VERBOSE(p_hwfn, QED_MSG_SPQ, + "Complete EQE [echo %04x]: func %p cookie %p)\n", + le16_to_cpu(echo), p_ent->comp_cb.function, p_ent->comp_cb.cookie); if (found->comp_cb.function) found->comp_cb.function(p_hwfn, found->comp_cb.cookie, p_data, fw_return_code); + else + DP_VERBOSE(p_hwfn, + QED_MSG_SPQ, + "Got a completion without a callback function\n"); if ((found->comp_mode != QED_SPQ_MODE_EBLOCK) || (found->queue == &p_spq->unlimited_pending)) diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 3a99eaa9aa84..5ce8a3c9ed6f 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -222,7 +222,7 @@ int __init qede_init(void) { int ret; - pr_notice("qede_init: %s\n", version); + pr_info("qede_init: %s\n", version); qed_ops = qed_get_eth_ops(); if (!qed_ops) { @@ -253,7 +253,8 @@ int __init qede_init(void) static void __exit qede_cleanup(void) { - pr_notice("qede_cleanup called\n"); + if (debug & QED_LOG_INFO_MASK) + pr_info("qede_cleanup called\n"); unregister_netdevice_notifier(&qede_netdev_notifier); pci_unregister_driver(&qede_pci_driver); @@ -1453,7 +1454,7 @@ alloc_skb: skb = netdev_alloc_skb(edev->ndev, QEDE_RX_HDR_SIZE); if (unlikely(!skb)) { DP_NOTICE(edev, - "Build_skb failed, dropping incoming packet\n"); + "skb allocation failed, dropping incoming packet\n"); qede_recycle_rx_bd_ring(rxq, edev, fp_cqe->bd_num); rxq->rx_alloc_errors++; goto next_cqe; @@ -2130,7 +2131,7 @@ static void qede_udp_tunnel_add(struct net_device *dev, edev->vxlan_dst_port = t_port; - DP_VERBOSE(edev, QED_MSG_DEBUG, "Added vxlan port=%d", + DP_VERBOSE(edev, QED_MSG_DEBUG, "Added vxlan port=%d\n", t_port); set_bit(QEDE_SP_VXLAN_PORT_CONFIG, &edev->sp_flags); @@ -2141,7 +2142,7 @@ static void qede_udp_tunnel_add(struct net_device *dev, edev->geneve_dst_port = t_port; - DP_VERBOSE(edev, QED_MSG_DEBUG, "Added geneve port=%d", + DP_VERBOSE(edev, QED_MSG_DEBUG, "Added geneve port=%d\n", t_port); set_bit(QEDE_SP_GENEVE_PORT_CONFIG, &edev->sp_flags); break; @@ -2165,7 +2166,7 @@ static void qede_udp_tunnel_del(struct net_device *dev, edev->vxlan_dst_port = 0; - DP_VERBOSE(edev, QED_MSG_DEBUG, "Deleted vxlan port=%d", + DP_VERBOSE(edev, QED_MSG_DEBUG, "Deleted vxlan port=%d\n", t_port); set_bit(QEDE_SP_VXLAN_PORT_CONFIG, &edev->sp_flags); @@ -2176,7 +2177,7 @@ static void qede_udp_tunnel_del(struct net_device *dev, edev->geneve_dst_port = 0; - DP_VERBOSE(edev, QED_MSG_DEBUG, "Deleted geneve port=%d", + DP_VERBOSE(edev, QED_MSG_DEBUG, "Deleted geneve port=%d\n", t_port); set_bit(QEDE_SP_GENEVE_PORT_CONFIG, &edev->sp_flags); break; @@ -2243,6 +2244,9 @@ static struct qede_dev *qede_alloc_etherdev(struct qed_dev *cdev, edev->q_num_rx_buffers = NUM_RX_BDS_DEF; edev->q_num_tx_buffers = NUM_TX_BDS_DEF; + DP_INFO(edev, "Allocated netdev with %d tx queues and %d rx queues\n", + info->num_queues, info->num_queues); + SET_NETDEV_DEV(ndev, &pdev->dev); memset(&edev->stats, 0, sizeof(edev->stats)); @@ -2568,7 +2572,7 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode) qed_ops->common->slowpath_stop(cdev); qed_ops->common->remove(cdev); - pr_notice("Ending successfully qede_remove\n"); + dev_info(&pdev->dev, "Ending qede_remove successfully\n"); } static void qede_remove(struct pci_dev *pdev) From 2eb03e6c4e305b71bdd2d0ce4250b9c9099d9128 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Mon, 15 Aug 2016 14:51:54 +0300 Subject: [PATCH 0231/1715] switchdev: Put export declaration in the right place Move exporting of switchdev_port_same_parent_id to be right below it and not elsewhere. Signed-off-by: Or Gerlitz Reported-by: Ido Schimmel Signed-off-by: David S. Miller --- net/switchdev/switchdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index a5fc9dd24aa9..9e9012956993 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -1306,6 +1306,7 @@ bool switchdev_port_same_parent_id(struct net_device *a, return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid); } +EXPORT_SYMBOL_GPL(switchdev_port_same_parent_id); static u32 switchdev_port_fwd_mark_get(struct net_device *dev, struct net_device *group_dev) @@ -1323,7 +1324,6 @@ static u32 switchdev_port_fwd_mark_get(struct net_device *dev, return dev->ifindex; } -EXPORT_SYMBOL_GPL(switchdev_port_same_parent_id); static void switchdev_port_fwd_mark_reset(struct net_device *group_dev, u32 old_mark, u32 *reset_mark) From cfaace269d0ce5a4ae26bfe442f1c4df1a9558de Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 15 Aug 2016 13:55:17 +0100 Subject: [PATCH 0232/1715] net: hns: mdio->irq is an array, so no need to check if it is null The null check on mdio->irq is redundant since mdio->irq is an array of PHY_MAX_ADDR ints and hence can never be null. Remove the redundant check. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c index 5c8afe1a5ccb..a834774fdb02 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c @@ -684,8 +684,7 @@ hns_mac_register_phydev(struct mii_bus *mdio, struct hns_mac_cb *mac_cb, if (!phy || IS_ERR(phy)) return -EIO; - if (mdio->irq) - phy->irq = mdio->irq[addr]; + phy->irq = mdio->irq[addr]; /* All data is now stored in the phy struct; * register it From 64721094b9a966c8e02d2b03729e5093471f9d1c Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 15 Aug 2016 22:34:57 +0000 Subject: [PATCH 0233/1715] net: phy: Fix return value check in xgmiitorgmii_probe() In case of error, the function of_parse_phandle() returns NULL pointer not ERR_PTR(). The IS_ERR() test in the return value check should be replaced with NULL test. Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/phy/xilinx_gmii2rgmii.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/xilinx_gmii2rgmii.c b/drivers/net/phy/xilinx_gmii2rgmii.c index 8e980ad476a4..cad6e19724e2 100644 --- a/drivers/net/phy/xilinx_gmii2rgmii.c +++ b/drivers/net/phy/xilinx_gmii2rgmii.c @@ -67,7 +67,7 @@ int xgmiitorgmii_probe(struct mdio_device *mdiodev) return -ENOMEM; phy_node = of_parse_phandle(np, "phy-handle", 0); - if (IS_ERR(phy_node)) { + if (!phy_node) { dev_err(dev, "Couldn't parse phy-handle\n"); return -ENODEV; } From 557bc7d44d52d52374bc72e9cc3b0beb41026886 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 15 Aug 2016 22:50:34 +0000 Subject: [PATCH 0234/1715] net: ena: Remove unnecessary pci_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure. Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 94790df2d559..499a22b165f1 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -3060,7 +3060,6 @@ err_device_destroy: err_free_region: ena_release_bars(ena_dev, pdev); err_free_ena_dev: - pci_set_drvdata(pdev, NULL); vfree(ena_dev); err_disable_device: pci_disable_device(pdev); @@ -3156,8 +3155,6 @@ static void ena_remove(struct pci_dev *pdev) ena_release_bars(ena_dev, pdev); - pci_set_drvdata(pdev, NULL); - pci_disable_device(pdev); ena_com_destroy_interrupt_moderation(ena_dev); From 6e22066fd02b675260b980b3e42b7d616a9839c5 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 15 Aug 2016 22:51:04 +0000 Subject: [PATCH 0235/1715] net: ena: Fix error return code in ena_device_init() Fix to return a negative error code from the invalid dma width error handling case instead of 0. Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 499a22b165f1..5c536b8ec99c 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -2329,6 +2329,7 @@ static int ena_device_init(struct ena_com_dev *ena_dev, struct pci_dev *pdev, dma_width = ena_com_get_dma_width(ena_dev); if (dma_width < 0) { dev_err(dev, "Invalid dma width value %d", dma_width); + rc = dma_width; goto err_mmio_read_less; } From 5288b6fff17369386e435b6780a00e3e3fd633de Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 15 Aug 2016 22:51:29 +0000 Subject: [PATCH 0236/1715] net: thunderx: Remove unnecessary pci_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure. Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/thunder_xcv.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/thunder_xcv.c b/drivers/net/ethernet/cavium/thunder/thunder_xcv.c index 9210d04e4b5d..67befedef709 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_xcv.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_xcv.c @@ -194,7 +194,6 @@ err_release_regions: err_disable_device: pci_disable_device(pdev); err_kfree: - pci_set_drvdata(pdev, NULL); devm_kfree(dev, xcv); xcv = NULL; return err; @@ -211,7 +210,6 @@ static void xcv_remove(struct pci_dev *pdev) pci_release_regions(pdev); pci_disable_device(pdev); - pci_set_drvdata(pdev, NULL); } static struct pci_driver xcv_driver = { From 45d339309f491058f5a3f974a17aa893f5a35329 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 15 Aug 2016 22:51:48 +0000 Subject: [PATCH 0237/1715] net: mediatek: remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure. Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 3a4726e2a297..994232e03380 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1892,7 +1892,6 @@ static int mtk_remove(struct platform_device *pdev) netif_napi_del(ð->tx_napi); netif_napi_del(ð->rx_napi); mtk_cleanup(eth); - platform_set_drvdata(pdev, NULL); return 0; } From 2d79af6e35cd93614560b5b7e760daa306978813 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Mon, 15 Aug 2016 17:18:57 -0400 Subject: [PATCH 0238/1715] net: dsa: mv88e6xxx: rename _mv88e6xxx_wait Now that there is no locked version of the wait routine anymore, rename the _ prefixed version and make it use the new read API. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 63 +++++++++++++++++--------------- 1 file changed, 33 insertions(+), 30 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index d36aedde8cb9..a5a96bce44bf 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -216,6 +216,28 @@ static int mv88e6xxx_write(struct mv88e6xxx_chip *chip, return 0; } +static int mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int addr, int reg, + u16 mask) +{ + unsigned long timeout = jiffies + HZ / 10; + + while (time_before(jiffies, timeout)) { + u16 val; + int err; + + err = mv88e6xxx_read(chip, addr, reg, &val); + if (err) + return err; + + if (!(val & mask)) + return 0; + + usleep_range(1000, 2000); + } + + return -ETIMEDOUT; +} + /* Indirect write to single pointer-data register with an Update bit */ static int mv88e6xxx_update(struct mv88e6xxx_chip *chip, int addr, int reg, u16 update) @@ -819,35 +841,16 @@ static void mv88e6xxx_get_regs(struct dsa_switch *ds, int port, mutex_unlock(&chip->reg_lock); } -static int _mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int reg, int offset, - u16 mask) -{ - unsigned long timeout = jiffies + HZ / 10; - - while (time_before(jiffies, timeout)) { - int ret; - - ret = _mv88e6xxx_reg_read(chip, reg, offset); - if (ret < 0) - return ret; - if (!(ret & mask)) - return 0; - - usleep_range(1000, 2000); - } - return -ETIMEDOUT; -} - static int mv88e6xxx_mdio_wait(struct mv88e6xxx_chip *chip) { - return _mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_SMI_OP, - GLOBAL2_SMI_OP_BUSY); + return mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_SMI_OP, + GLOBAL2_SMI_OP_BUSY); } static int _mv88e6xxx_atu_wait(struct mv88e6xxx_chip *chip) { - return _mv88e6xxx_wait(chip, REG_GLOBAL, GLOBAL_ATU_OP, - GLOBAL_ATU_OP_BUSY); + return mv88e6xxx_wait(chip, REG_GLOBAL, GLOBAL_ATU_OP, + GLOBAL_ATU_OP_BUSY); } static int mv88e6xxx_mdio_read_indirect(struct mv88e6xxx_chip *chip, @@ -1227,8 +1230,8 @@ static int _mv88e6xxx_port_pvid_set(struct mv88e6xxx_chip *chip, static int _mv88e6xxx_vtu_wait(struct mv88e6xxx_chip *chip) { - return _mv88e6xxx_wait(chip, REG_GLOBAL, GLOBAL_VTU_OP, - GLOBAL_VTU_OP_BUSY); + return mv88e6xxx_wait(chip, REG_GLOBAL, GLOBAL_VTU_OP, + GLOBAL_VTU_OP_BUSY); } static int _mv88e6xxx_vtu_cmd(struct mv88e6xxx_chip *chip, u16 op) @@ -2949,8 +2952,8 @@ static int mv88e6xxx_g2_clear_irl(struct mv88e6xxx_chip *chip) break; /* Wait for the operation to complete */ - err = _mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_IRL_CMD, - GLOBAL2_IRL_CMD_BUSY); + err = mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_IRL_CMD, + GLOBAL2_IRL_CMD_BUSY); if (err) break; } @@ -3004,9 +3007,9 @@ static int mv88e6xxx_g2_clear_pot(struct mv88e6xxx_chip *chip) static int mv88e6xxx_g2_eeprom_wait(struct mv88e6xxx_chip *chip) { - return _mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_EEPROM_CMD, - GLOBAL2_EEPROM_CMD_BUSY | - GLOBAL2_EEPROM_CMD_RUNNING); + return mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_EEPROM_CMD, + GLOBAL2_EEPROM_CMD_BUSY | + GLOBAL2_EEPROM_CMD_RUNNING); } static int mv88e6xxx_g2_eeprom_cmd(struct mv88e6xxx_chip *chip, u16 cmd) From a0ffff2484dcd65e368a0c1b68a39b630167c2a5 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Mon, 15 Aug 2016 17:18:58 -0400 Subject: [PATCH 0239/1715] net: dsa: mv88e6xxx: describe Multi-chip registers Add flags to describe the presence of SMI Command and Data registers used to indirectly access internal SMI devices registers when the switch SMI address is not zero. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 2 +- drivers/net/dsa/mv88e6xxx/mv88e6xxx.h | 40 +++++++++++++++++---------- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index a5a96bce44bf..44debf22419b 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3883,7 +3883,7 @@ static int mv88e6xxx_smi_init(struct mv88e6xxx_chip *chip, if (sw_addr == 0) chip->smi_ops = &mv88e6xxx_smi_single_chip_ops; - else if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_MULTI_CHIP)) + else if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_MULTI_CHIP)) chip->smi_ops = &mv88e6xxx_smi_multi_chip_ops; else return -EINVAL; diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h index 48d6ea77f9bd..527a880118b4 100644 --- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h @@ -388,6 +388,13 @@ enum mv88e6xxx_cap { */ MV88E6XXX_CAP_EEE, + /* Multi-chip Addressing Mode. + * Some chips respond to only 2 registers of its own SMI device address + * when it is non-zero, and use indirect access to internal registers. + */ + MV88E6XXX_CAP_SMI_CMD, /* (0x00) SMI Command */ + MV88E6XXX_CAP_SMI_DATA, /* (0x01) SMI Data */ + /* Switch Global 2 Registers. * The device contains a second set of global 16-bit registers. */ @@ -403,12 +410,6 @@ enum mv88e6xxx_cap { MV88E6XXX_CAP_G2_EEPROM_CMD, /* (0x14) EEPROM Command */ MV88E6XXX_CAP_G2_EEPROM_DATA, /* (0x15) EEPROM Data */ - /* Multi-chip Addressing Mode. - * Some chips require an indirect SMI access when their SMI device - * address is not zero. See SMI_CMD and SMI_DATA. - */ - MV88E6XXX_CAP_MULTI_CHIP, - /* PHY Polling Unit. * See GLOBAL_CONTROL_PPU_ENABLE and GLOBAL_STATUS_PPU_POLLING. */ @@ -441,6 +442,10 @@ enum mv88e6xxx_cap { /* Bitmask of capabilities */ #define MV88E6XXX_FLAG_EEE BIT(MV88E6XXX_CAP_EEE) + +#define MV88E6XXX_FLAG_SMI_CMD BIT(MV88E6XXX_CAP_SMI_CMD) +#define MV88E6XXX_FLAG_SMI_DATA BIT(MV88E6XXX_CAP_SMI_DATA) + #define MV88E6XXX_FLAG_GLOBAL2 BIT(MV88E6XXX_CAP_GLOBAL2) #define MV88E6XXX_FLAG_G2_MGMT_EN_2X BIT(MV88E6XXX_CAP_G2_MGMT_EN_2X) #define MV88E6XXX_FLAG_G2_MGMT_EN_0X BIT(MV88E6XXX_CAP_G2_MGMT_EN_0X) @@ -452,7 +457,7 @@ enum mv88e6xxx_cap { #define MV88E6XXX_FLAG_G2_POT BIT(MV88E6XXX_CAP_G2_POT) #define MV88E6XXX_FLAG_G2_EEPROM_CMD BIT(MV88E6XXX_CAP_G2_EEPROM_CMD) #define MV88E6XXX_FLAG_G2_EEPROM_DATA BIT(MV88E6XXX_CAP_G2_EEPROM_DATA) -#define MV88E6XXX_FLAG_MULTI_CHIP BIT(MV88E6XXX_CAP_MULTI_CHIP) + #define MV88E6XXX_FLAG_PPU BIT(MV88E6XXX_CAP_PPU) #define MV88E6XXX_FLAG_PPU_ACTIVE BIT(MV88E6XXX_CAP_PPU_ACTIVE) #define MV88E6XXX_FLAG_SMI_PHY BIT(MV88E6XXX_CAP_SMI_PHY) @@ -471,6 +476,11 @@ enum mv88e6xxx_cap { (MV88E6XXX_FLAG_G2_IRL_CMD | \ MV88E6XXX_FLAG_G2_IRL_DATA) +/* Multi-chip Addressing Mode */ +#define MV88E6XXX_FLAGS_MULTI_CHIP \ + (MV88E6XXX_FLAG_SMI_CMD | \ + MV88E6XXX_FLAG_SMI_DATA) + /* Cross-chip Port VLAN Table */ #define MV88E6XXX_FLAGS_PVT \ (MV88E6XXX_FLAG_G2_PVT_ADDR | \ @@ -479,20 +489,20 @@ enum mv88e6xxx_cap { #define MV88E6XXX_FLAGS_FAMILY_6095 \ (MV88E6XXX_FLAG_GLOBAL2 | \ MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ - MV88E6XXX_FLAG_MULTI_CHIP | \ MV88E6XXX_FLAG_PPU | \ - MV88E6XXX_FLAG_VTU) + MV88E6XXX_FLAG_VTU | \ + MV88E6XXX_FLAGS_MULTI_CHIP) #define MV88E6XXX_FLAGS_FAMILY_6097 \ (MV88E6XXX_FLAG_GLOBAL2 | \ MV88E6XXX_FLAG_G2_MGMT_EN_2X | \ MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ MV88E6XXX_FLAG_G2_POT | \ - MV88E6XXX_FLAG_MULTI_CHIP | \ MV88E6XXX_FLAG_PPU | \ MV88E6XXX_FLAG_STU | \ MV88E6XXX_FLAG_VTU | \ MV88E6XXX_FLAGS_IRL | \ + MV88E6XXX_FLAGS_MULTI_CHIP | \ MV88E6XXX_FLAGS_PVT) #define MV88E6XXX_FLAGS_FAMILY_6165 \ @@ -501,17 +511,17 @@ enum mv88e6xxx_cap { MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ MV88E6XXX_FLAG_G2_SWITCH_MAC | \ MV88E6XXX_FLAG_G2_POT | \ - MV88E6XXX_FLAG_MULTI_CHIP | \ MV88E6XXX_FLAG_STU | \ MV88E6XXX_FLAG_TEMP | \ MV88E6XXX_FLAG_VTU | \ MV88E6XXX_FLAGS_IRL | \ + MV88E6XXX_FLAGS_MULTI_CHIP | \ MV88E6XXX_FLAGS_PVT) #define MV88E6XXX_FLAGS_FAMILY_6185 \ (MV88E6XXX_FLAG_GLOBAL2 | \ MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ - MV88E6XXX_FLAG_MULTI_CHIP | \ + MV88E6XXX_FLAGS_MULTI_CHIP | \ MV88E6XXX_FLAG_PPU | \ MV88E6XXX_FLAG_VTU) @@ -522,7 +532,6 @@ enum mv88e6xxx_cap { MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ MV88E6XXX_FLAG_G2_SWITCH_MAC | \ MV88E6XXX_FLAG_G2_POT | \ - MV88E6XXX_FLAG_MULTI_CHIP | \ MV88E6XXX_FLAG_PPU_ACTIVE | \ MV88E6XXX_FLAG_SMI_PHY | \ MV88E6XXX_FLAG_TEMP | \ @@ -530,6 +539,7 @@ enum mv88e6xxx_cap { MV88E6XXX_FLAG_VTU | \ MV88E6XXX_FLAGS_EEPROM16 | \ MV88E6XXX_FLAGS_IRL | \ + MV88E6XXX_FLAGS_MULTI_CHIP | \ MV88E6XXX_FLAGS_PVT) #define MV88E6XXX_FLAGS_FAMILY_6351 \ @@ -538,13 +548,13 @@ enum mv88e6xxx_cap { MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ MV88E6XXX_FLAG_G2_SWITCH_MAC | \ MV88E6XXX_FLAG_G2_POT | \ - MV88E6XXX_FLAG_MULTI_CHIP | \ MV88E6XXX_FLAG_PPU_ACTIVE | \ MV88E6XXX_FLAG_SMI_PHY | \ MV88E6XXX_FLAG_STU | \ MV88E6XXX_FLAG_TEMP | \ MV88E6XXX_FLAG_VTU | \ MV88E6XXX_FLAGS_IRL | \ + MV88E6XXX_FLAGS_MULTI_CHIP | \ MV88E6XXX_FLAGS_PVT) #define MV88E6XXX_FLAGS_FAMILY_6352 \ @@ -554,7 +564,6 @@ enum mv88e6xxx_cap { MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ MV88E6XXX_FLAG_G2_SWITCH_MAC | \ MV88E6XXX_FLAG_G2_POT | \ - MV88E6XXX_FLAG_MULTI_CHIP | \ MV88E6XXX_FLAG_PPU_ACTIVE | \ MV88E6XXX_FLAG_SMI_PHY | \ MV88E6XXX_FLAG_STU | \ @@ -563,6 +572,7 @@ enum mv88e6xxx_cap { MV88E6XXX_FLAG_VTU | \ MV88E6XXX_FLAGS_EEPROM16 | \ MV88E6XXX_FLAGS_IRL | \ + MV88E6XXX_FLAGS_MULTI_CHIP | \ MV88E6XXX_FLAGS_PVT) struct mv88e6xxx_info { From 57c67cf57a3898d234f029ca674f82d5bd2c080a Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Mon, 15 Aug 2016 17:18:59 -0400 Subject: [PATCH 0240/1715] net: dsa: mv88e6xxx: rework Global2 SMI PHY access Describe the presence of the Global2 SMI PHY registers, used to indirectly access the internal SMI devices registers on some chips. Also temporarily forward declare mv88e6xxx_g2_smi_phy_{read,write} to use them in mv88e6xxx_mdio_{read,write}_indirect, before getting rid of the later. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 97 ++++++++++++++++++--------- drivers/net/dsa/mv88e6xxx/mv88e6xxx.h | 49 +++++++------- 2 files changed, 90 insertions(+), 56 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 44debf22419b..924a2af5ac6d 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -841,52 +841,34 @@ static void mv88e6xxx_get_regs(struct dsa_switch *ds, int port, mutex_unlock(&chip->reg_lock); } -static int mv88e6xxx_mdio_wait(struct mv88e6xxx_chip *chip) -{ - return mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_SMI_OP, - GLOBAL2_SMI_OP_BUSY); -} - static int _mv88e6xxx_atu_wait(struct mv88e6xxx_chip *chip) { return mv88e6xxx_wait(chip, REG_GLOBAL, GLOBAL_ATU_OP, GLOBAL_ATU_OP_BUSY); } +static int mv88e6xxx_g2_smi_phy_read(struct mv88e6xxx_chip *chip, int addr, + int reg, u16 *val); +static int mv88e6xxx_g2_smi_phy_write(struct mv88e6xxx_chip *chip, int addr, + int reg, u16 val); + static int mv88e6xxx_mdio_read_indirect(struct mv88e6xxx_chip *chip, int addr, int regnum) { - int ret; + u16 val; + int err; - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL2, GLOBAL2_SMI_OP, - GLOBAL2_SMI_OP_22_READ | (addr << 5) | - regnum); - if (ret < 0) - return ret; + err = mv88e6xxx_g2_smi_phy_read(chip, addr, regnum, &val); + if (err) + return err; - ret = mv88e6xxx_mdio_wait(chip); - if (ret < 0) - return ret; - - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL2, GLOBAL2_SMI_DATA); - - return ret; + return val; } static int mv88e6xxx_mdio_write_indirect(struct mv88e6xxx_chip *chip, int addr, int regnum, u16 val) { - int ret; - - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL2, GLOBAL2_SMI_DATA, val); - if (ret < 0) - return ret; - - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL2, GLOBAL2_SMI_OP, - GLOBAL2_SMI_OP_22_WRITE | (addr << 5) | - regnum); - - return mv88e6xxx_mdio_wait(chip); + return mv88e6xxx_g2_smi_phy_write(chip, addr, regnum, val); } static int mv88e6xxx_get_eee(struct dsa_switch *ds, int port, @@ -3057,6 +3039,57 @@ static int mv88e6xxx_g2_eeprom_write16(struct mv88e6xxx_chip *chip, return mv88e6xxx_g2_eeprom_cmd(chip, cmd); } +static int mv88e6xxx_g2_smi_phy_wait(struct mv88e6xxx_chip *chip) +{ + return mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_CMD, + GLOBAL2_SMI_PHY_CMD_BUSY); +} + +static int mv88e6xxx_g2_smi_phy_cmd(struct mv88e6xxx_chip *chip, u16 cmd) +{ + int err; + + err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_CMD, cmd); + if (err) + return err; + + return mv88e6xxx_g2_smi_phy_wait(chip); +} + +static int mv88e6xxx_g2_smi_phy_read(struct mv88e6xxx_chip *chip, int addr, + int reg, u16 *val) +{ + u16 cmd = GLOBAL2_SMI_PHY_CMD_OP_22_READ_DATA | (addr << 5) | reg; + int err; + + err = mv88e6xxx_g2_smi_phy_wait(chip); + if (err) + return err; + + err = mv88e6xxx_g2_smi_phy_cmd(chip, cmd); + if (err) + return err; + + return mv88e6xxx_read(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_DATA, val); +} + +static int mv88e6xxx_g2_smi_phy_write(struct mv88e6xxx_chip *chip, int addr, + int reg, u16 val) +{ + u16 cmd = GLOBAL2_SMI_PHY_CMD_OP_22_WRITE_DATA | (addr << 5) | reg; + int err; + + err = mv88e6xxx_g2_smi_phy_wait(chip); + if (err) + return err; + + err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_DATA, val); + if (err) + return err; + + return mv88e6xxx_g2_smi_phy_cmd(chip, cmd); +} + static int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip) { u16 reg; @@ -3236,7 +3269,7 @@ static int mv88e6xxx_mdio_read(struct mii_bus *bus, int port, int regnum) if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU)) ret = mv88e6xxx_mdio_read_ppu(chip, addr, regnum); - else if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_SMI_PHY)) + else if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_SMI_PHY)) ret = mv88e6xxx_mdio_read_indirect(chip, addr, regnum); else ret = mv88e6xxx_mdio_read_direct(chip, addr, regnum); @@ -3259,7 +3292,7 @@ static int mv88e6xxx_mdio_write(struct mii_bus *bus, int port, int regnum, if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU)) ret = mv88e6xxx_mdio_write_ppu(chip, addr, regnum, val); - else if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_SMI_PHY)) + else if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_SMI_PHY)) ret = mv88e6xxx_mdio_write_indirect(chip, addr, regnum, val); else ret = mv88e6xxx_mdio_write_direct(chip, addr, regnum, val); diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h index 527a880118b4..8be0f3681c66 100644 --- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h @@ -329,17 +329,16 @@ #define GLOBAL2_EEPROM_DATA 0x15 #define GLOBAL2_PTP_AVB_OP 0x16 #define GLOBAL2_PTP_AVB_DATA 0x17 -#define GLOBAL2_SMI_OP 0x18 -#define GLOBAL2_SMI_OP_BUSY BIT(15) -#define GLOBAL2_SMI_OP_CLAUSE_22 BIT(12) -#define GLOBAL2_SMI_OP_22_WRITE ((1 << 10) | GLOBAL2_SMI_OP_BUSY | \ - GLOBAL2_SMI_OP_CLAUSE_22) -#define GLOBAL2_SMI_OP_22_READ ((2 << 10) | GLOBAL2_SMI_OP_BUSY | \ - GLOBAL2_SMI_OP_CLAUSE_22) -#define GLOBAL2_SMI_OP_45_WRITE_ADDR ((0 << 10) | GLOBAL2_SMI_OP_BUSY) -#define GLOBAL2_SMI_OP_45_WRITE_DATA ((1 << 10) | GLOBAL2_SMI_OP_BUSY) -#define GLOBAL2_SMI_OP_45_READ_DATA ((2 << 10) | GLOBAL2_SMI_OP_BUSY) -#define GLOBAL2_SMI_DATA 0x19 +#define GLOBAL2_SMI_PHY_CMD 0x18 +#define GLOBAL2_SMI_PHY_CMD_BUSY BIT(15) +#define GLOBAL2_SMI_PHY_CMD_MODE_22 BIT(12) +#define GLOBAL2_SMI_PHY_CMD_OP_22_WRITE_DATA ((0x1 << 10) | \ + GLOBAL2_SMI_PHY_CMD_MODE_22 | \ + GLOBAL2_SMI_PHY_CMD_BUSY) +#define GLOBAL2_SMI_PHY_CMD_OP_22_READ_DATA ((0x2 << 10) | \ + GLOBAL2_SMI_PHY_CMD_MODE_22 | \ + GLOBAL2_SMI_PHY_CMD_BUSY) +#define GLOBAL2_SMI_PHY_DATA 0x19 #define GLOBAL2_SCRATCH_MISC 0x1a #define GLOBAL2_SCRATCH_BUSY BIT(15) #define GLOBAL2_SCRATCH_REGISTER_SHIFT 8 @@ -409,6 +408,8 @@ enum mv88e6xxx_cap { MV88E6XXX_CAP_G2_POT, /* (0x0f) Priority Override Table */ MV88E6XXX_CAP_G2_EEPROM_CMD, /* (0x14) EEPROM Command */ MV88E6XXX_CAP_G2_EEPROM_DATA, /* (0x15) EEPROM Data */ + MV88E6XXX_CAP_G2_SMI_PHY_CMD, /* (0x18) SMI PHY Command */ + MV88E6XXX_CAP_G2_SMI_PHY_DATA, /* (0x19) SMI PHY Data */ /* PHY Polling Unit. * See GLOBAL_CONTROL_PPU_ENABLE and GLOBAL_STATUS_PPU_POLLING. @@ -416,12 +417,6 @@ enum mv88e6xxx_cap { MV88E6XXX_CAP_PPU, MV88E6XXX_CAP_PPU_ACTIVE, - /* SMI PHY Command and Data registers. - * This requires an indirect access to PHY registers through - * GLOBAL2_SMI_OP, otherwise direct access to PHY registers is done. - */ - MV88E6XXX_CAP_SMI_PHY, - /* Per VLAN Spanning Tree Unit (STU). * The Port State database, if present, is accessed through VTU * operations and dedicated SID registers. See GLOBAL_VTU_SID. @@ -457,10 +452,11 @@ enum mv88e6xxx_cap { #define MV88E6XXX_FLAG_G2_POT BIT(MV88E6XXX_CAP_G2_POT) #define MV88E6XXX_FLAG_G2_EEPROM_CMD BIT(MV88E6XXX_CAP_G2_EEPROM_CMD) #define MV88E6XXX_FLAG_G2_EEPROM_DATA BIT(MV88E6XXX_CAP_G2_EEPROM_DATA) +#define MV88E6XXX_FLAG_G2_SMI_PHY_CMD BIT(MV88E6XXX_CAP_G2_SMI_PHY_CMD) +#define MV88E6XXX_FLAG_G2_SMI_PHY_DATA BIT(MV88E6XXX_CAP_G2_SMI_PHY_DATA) #define MV88E6XXX_FLAG_PPU BIT(MV88E6XXX_CAP_PPU) #define MV88E6XXX_FLAG_PPU_ACTIVE BIT(MV88E6XXX_CAP_PPU_ACTIVE) -#define MV88E6XXX_FLAG_SMI_PHY BIT(MV88E6XXX_CAP_SMI_PHY) #define MV88E6XXX_FLAG_STU BIT(MV88E6XXX_CAP_STU) #define MV88E6XXX_FLAG_TEMP BIT(MV88E6XXX_CAP_TEMP) #define MV88E6XXX_FLAG_TEMP_LIMIT BIT(MV88E6XXX_CAP_TEMP_LIMIT) @@ -486,6 +482,11 @@ enum mv88e6xxx_cap { (MV88E6XXX_FLAG_G2_PVT_ADDR | \ MV88E6XXX_FLAG_G2_PVT_DATA) +/* Indirect PHY access via Global2 SMI PHY registers */ +#define MV88E6XXX_FLAGS_SMI_PHY \ + (MV88E6XXX_FLAG_G2_SMI_PHY_CMD |\ + MV88E6XXX_FLAG_G2_SMI_PHY_DATA) + #define MV88E6XXX_FLAGS_FAMILY_6095 \ (MV88E6XXX_FLAG_GLOBAL2 | \ MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ @@ -533,14 +534,14 @@ enum mv88e6xxx_cap { MV88E6XXX_FLAG_G2_SWITCH_MAC | \ MV88E6XXX_FLAG_G2_POT | \ MV88E6XXX_FLAG_PPU_ACTIVE | \ - MV88E6XXX_FLAG_SMI_PHY | \ MV88E6XXX_FLAG_TEMP | \ MV88E6XXX_FLAG_TEMP_LIMIT | \ MV88E6XXX_FLAG_VTU | \ MV88E6XXX_FLAGS_EEPROM16 | \ MV88E6XXX_FLAGS_IRL | \ MV88E6XXX_FLAGS_MULTI_CHIP | \ - MV88E6XXX_FLAGS_PVT) + MV88E6XXX_FLAGS_PVT | \ + MV88E6XXX_FLAGS_SMI_PHY) #define MV88E6XXX_FLAGS_FAMILY_6351 \ (MV88E6XXX_FLAG_GLOBAL2 | \ @@ -549,13 +550,13 @@ enum mv88e6xxx_cap { MV88E6XXX_FLAG_G2_SWITCH_MAC | \ MV88E6XXX_FLAG_G2_POT | \ MV88E6XXX_FLAG_PPU_ACTIVE | \ - MV88E6XXX_FLAG_SMI_PHY | \ MV88E6XXX_FLAG_STU | \ MV88E6XXX_FLAG_TEMP | \ MV88E6XXX_FLAG_VTU | \ MV88E6XXX_FLAGS_IRL | \ MV88E6XXX_FLAGS_MULTI_CHIP | \ - MV88E6XXX_FLAGS_PVT) + MV88E6XXX_FLAGS_PVT | \ + MV88E6XXX_FLAGS_SMI_PHY) #define MV88E6XXX_FLAGS_FAMILY_6352 \ (MV88E6XXX_FLAG_EEE | \ @@ -565,7 +566,6 @@ enum mv88e6xxx_cap { MV88E6XXX_FLAG_G2_SWITCH_MAC | \ MV88E6XXX_FLAG_G2_POT | \ MV88E6XXX_FLAG_PPU_ACTIVE | \ - MV88E6XXX_FLAG_SMI_PHY | \ MV88E6XXX_FLAG_STU | \ MV88E6XXX_FLAG_TEMP | \ MV88E6XXX_FLAG_TEMP_LIMIT | \ @@ -573,7 +573,8 @@ enum mv88e6xxx_cap { MV88E6XXX_FLAGS_EEPROM16 | \ MV88E6XXX_FLAGS_IRL | \ MV88E6XXX_FLAGS_MULTI_CHIP | \ - MV88E6XXX_FLAGS_PVT) + MV88E6XXX_FLAGS_PVT | \ + MV88E6XXX_FLAGS_SMI_PHY) struct mv88e6xxx_info { enum mv88e6xxx_family family; From e57e5e7769408b3899c2ef81422ab202dee7139d Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Mon, 15 Aug 2016 17:19:00 -0400 Subject: [PATCH 0241/1715] net: dsa: mv88e6xxx: abstract PHY ops Old chips use a direct access to the PHY devices registers. Next chips have a PHY Polling Unit (PPU) which needs to be disabled before accessing PHY registers. Newer chips have an indirect access to the PHY devices so that disabling the PPU is not necessary. Introduce a new phy_ops structure in the chip to describe the required PHY access routines. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 131 ++++++++++++++++---------- drivers/net/dsa/mv88e6xxx/mv88e6xxx.h | 1 + 2 files changed, 81 insertions(+), 51 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 924a2af5ac6d..3b0bc88a3feb 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -216,6 +216,28 @@ static int mv88e6xxx_write(struct mv88e6xxx_chip *chip, return 0; } +static int mv88e6xxx_phy_read(struct mv88e6xxx_chip *chip, int phy, + int reg, u16 *val) +{ + int addr = phy; /* PHY devices addresses start at 0x0 */ + + if (!chip->phy_ops) + return -EOPNOTSUPP; + + return chip->phy_ops->read(chip, addr, reg, val); +} + +static int mv88e6xxx_phy_write(struct mv88e6xxx_chip *chip, int phy, + int reg, u16 val) +{ + int addr = phy; /* PHY devices addresses start at 0x0 */ + + if (!chip->phy_ops) + return -EOPNOTSUPP; + + return chip->phy_ops->write(chip, addr, reg, val); +} + static int mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int addr, int reg, u16 mask) { @@ -422,34 +444,39 @@ static void mv88e6xxx_ppu_state_init(struct mv88e6xxx_chip *chip) chip->ppu_timer.function = mv88e6xxx_ppu_reenable_timer; } -static int mv88e6xxx_mdio_read_ppu(struct mv88e6xxx_chip *chip, int addr, - int regnum) +static int mv88e6xxx_phy_ppu_read(struct mv88e6xxx_chip *chip, int addr, + int reg, u16 *val) { - int ret; + int err; - ret = mv88e6xxx_ppu_access_get(chip); - if (ret >= 0) { - ret = _mv88e6xxx_reg_read(chip, addr, regnum); + err = mv88e6xxx_ppu_access_get(chip); + if (!err) { + err = mv88e6xxx_read(chip, addr, reg, val); mv88e6xxx_ppu_access_put(chip); } - return ret; + return err; } -static int mv88e6xxx_mdio_write_ppu(struct mv88e6xxx_chip *chip, int addr, - int regnum, u16 val) +static int mv88e6xxx_phy_ppu_write(struct mv88e6xxx_chip *chip, int addr, + int reg, u16 val) { - int ret; + int err; - ret = mv88e6xxx_ppu_access_get(chip); - if (ret >= 0) { - ret = _mv88e6xxx_reg_write(chip, addr, regnum, val); + err = mv88e6xxx_ppu_access_get(chip); + if (!err) { + err = mv88e6xxx_write(chip, addr, reg, val); mv88e6xxx_ppu_access_put(chip); } - return ret; + return err; } +static const struct mv88e6xxx_ops mv88e6xxx_phy_ppu_ops = { + .read = mv88e6xxx_phy_ppu_read, + .write = mv88e6xxx_phy_ppu_write, +}; + static bool mv88e6xxx_6065_family(struct mv88e6xxx_chip *chip) { return chip->info->family == MV88E6XXX_FAMILY_6065; @@ -3090,6 +3117,11 @@ static int mv88e6xxx_g2_smi_phy_write(struct mv88e6xxx_chip *chip, int addr, return mv88e6xxx_g2_smi_phy_cmd(chip, cmd); } +static const struct mv88e6xxx_ops mv88e6xxx_g2_smi_phy_ops = { + .read = mv88e6xxx_g2_smi_phy_read, + .write = mv88e6xxx_g2_smi_phy_write, +}; + static int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip) { u16 reg; @@ -3249,56 +3281,35 @@ static int mv88e6xxx_mdio_page_write(struct dsa_switch *ds, int port, int page, return ret; } -static int mv88e6xxx_port_to_mdio_addr(struct mv88e6xxx_chip *chip, int port) -{ - if (port >= 0 && port < chip->info->num_ports) - return port; - return -EINVAL; -} - -static int mv88e6xxx_mdio_read(struct mii_bus *bus, int port, int regnum) +static int mv88e6xxx_mdio_read(struct mii_bus *bus, int phy, int reg) { struct mv88e6xxx_chip *chip = bus->priv; - int addr = mv88e6xxx_port_to_mdio_addr(chip, port); - int ret; + u16 val; + int err; - if (addr < 0) + if (phy >= chip->info->num_ports) return 0xffff; mutex_lock(&chip->reg_lock); - - if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU)) - ret = mv88e6xxx_mdio_read_ppu(chip, addr, regnum); - else if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_SMI_PHY)) - ret = mv88e6xxx_mdio_read_indirect(chip, addr, regnum); - else - ret = mv88e6xxx_mdio_read_direct(chip, addr, regnum); - + err = mv88e6xxx_phy_read(chip, phy, reg, &val); mutex_unlock(&chip->reg_lock); - return ret; + + return err ? err : val; } -static int mv88e6xxx_mdio_write(struct mii_bus *bus, int port, int regnum, - u16 val) +static int mv88e6xxx_mdio_write(struct mii_bus *bus, int phy, int reg, u16 val) { struct mv88e6xxx_chip *chip = bus->priv; - int addr = mv88e6xxx_port_to_mdio_addr(chip, port); - int ret; + int err; - if (addr < 0) + if (phy >= chip->info->num_ports) return 0xffff; mutex_lock(&chip->reg_lock); - - if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU)) - ret = mv88e6xxx_mdio_write_ppu(chip, addr, regnum, val); - else if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_SMI_PHY)) - ret = mv88e6xxx_mdio_write_indirect(chip, addr, regnum, val); - else - ret = mv88e6xxx_mdio_write_direct(chip, addr, regnum, val); - + err = mv88e6xxx_phy_write(chip, phy, reg, val); mutex_unlock(&chip->reg_lock); - return ret; + + return err; } static int mv88e6xxx_mdio_register(struct mv88e6xxx_chip *chip, @@ -3308,9 +3319,6 @@ static int mv88e6xxx_mdio_register(struct mv88e6xxx_chip *chip, struct mii_bus *bus; int err; - if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU)) - mv88e6xxx_ppu_state_init(chip); - if (np) chip->mdio_np = of_get_child_by_name(np, "mdio"); @@ -3907,6 +3915,23 @@ static struct mv88e6xxx_chip *mv88e6xxx_alloc_chip(struct device *dev) return chip; } +static const struct mv88e6xxx_ops mv88e6xxx_phy_ops = { + .read = mv88e6xxx_read, + .write = mv88e6xxx_write, +}; + +static void mv88e6xxx_phy_init(struct mv88e6xxx_chip *chip) +{ + if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_SMI_PHY)) { + chip->phy_ops = &mv88e6xxx_g2_smi_phy_ops; + } else if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU)) { + chip->phy_ops = &mv88e6xxx_phy_ppu_ops; + mv88e6xxx_ppu_state_init(chip); + } else { + chip->phy_ops = &mv88e6xxx_phy_ops; + } +} + static int mv88e6xxx_smi_init(struct mv88e6xxx_chip *chip, struct mii_bus *bus, int sw_addr) { @@ -3954,6 +3979,8 @@ static const char *mv88e6xxx_drv_probe(struct device *dsa_dev, if (err) goto free; + mv88e6xxx_phy_init(chip); + err = mv88e6xxx_mdio_register(chip, NULL); if (err) goto free; @@ -4055,6 +4082,8 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev) if (err) return err; + mv88e6xxx_phy_init(chip); + chip->reset = devm_gpiod_get_optional(dev, "reset", GPIOD_ASIS); if (IS_ERR(chip->reset)) return PTR_ERR(chip->reset); diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h index 8be0f3681c66..1dd96e712342 100644 --- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h @@ -634,6 +634,7 @@ struct mv88e6xxx_chip { /* Handles automatic disabling and re-enabling of the PHY * polling unit. */ + const struct mv88e6xxx_ops *phy_ops; struct mutex ppu_mutex; int ppu_disabled; struct work_struct ppu_work; From 09cb7dfd3f144d18168e27b35361e9024763db15 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Mon, 15 Aug 2016 17:19:01 -0400 Subject: [PATCH 0242/1715] net: dsa: mv88e6xxx: describe PHY page and SerDes Add mv88e6xxx_phy_page_{read,write} routines and use them to access the SerDes PHY device registers. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 95 +++++++++++++++++++++++---- drivers/net/dsa/mv88e6xxx/mv88e6xxx.h | 27 +++++++- 2 files changed, 105 insertions(+), 17 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 3b0bc88a3feb..faa07513c9ff 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -238,6 +238,74 @@ static int mv88e6xxx_phy_write(struct mv88e6xxx_chip *chip, int phy, return chip->phy_ops->write(chip, addr, reg, val); } +static int mv88e6xxx_phy_page_get(struct mv88e6xxx_chip *chip, int phy, u8 page) +{ + if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_PHY_PAGE)) + return -EOPNOTSUPP; + + return mv88e6xxx_phy_write(chip, phy, PHY_PAGE, page); +} + +static void mv88e6xxx_phy_page_put(struct mv88e6xxx_chip *chip, int phy) +{ + int err; + + /* Restore PHY page Copper 0x0 for access via the registered MDIO bus */ + err = mv88e6xxx_phy_write(chip, phy, PHY_PAGE, PHY_PAGE_COPPER); + if (unlikely(err)) { + dev_err(chip->dev, "failed to restore PHY %d page Copper (%d)\n", + phy, err); + } +} + +static int mv88e6xxx_phy_page_read(struct mv88e6xxx_chip *chip, int phy, + u8 page, int reg, u16 *val) +{ + int err; + + /* There is no paging for registers 22 */ + if (reg == PHY_PAGE) + return -EINVAL; + + err = mv88e6xxx_phy_page_get(chip, phy, page); + if (!err) { + err = mv88e6xxx_phy_read(chip, phy, reg, val); + mv88e6xxx_phy_page_put(chip, phy); + } + + return err; +} + +static int mv88e6xxx_phy_page_write(struct mv88e6xxx_chip *chip, int phy, + u8 page, int reg, u16 val) +{ + int err; + + /* There is no paging for registers 22 */ + if (reg == PHY_PAGE) + return -EINVAL; + + err = mv88e6xxx_phy_page_get(chip, phy, page); + if (!err) { + err = mv88e6xxx_phy_write(chip, phy, PHY_PAGE, page); + mv88e6xxx_phy_page_put(chip, phy); + } + + return err; +} + +static int mv88e6xxx_serdes_read(struct mv88e6xxx_chip *chip, int reg, u16 *val) +{ + return mv88e6xxx_phy_page_read(chip, ADDR_SERDES, SERDES_PAGE_FIBER, + reg, val); +} + +static int mv88e6xxx_serdes_write(struct mv88e6xxx_chip *chip, int reg, u16 val) +{ + return mv88e6xxx_phy_page_write(chip, ADDR_SERDES, SERDES_PAGE_FIBER, + reg, val); +} + static int mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int addr, int reg, u16 mask) { @@ -2408,23 +2476,22 @@ static int mv88e6xxx_switch_reset(struct mv88e6xxx_chip *chip) return ret; } -static int mv88e6xxx_power_on_serdes(struct mv88e6xxx_chip *chip) +static int mv88e6xxx_serdes_power_on(struct mv88e6xxx_chip *chip) { - int ret; + u16 val; + int err; - ret = _mv88e6xxx_mdio_page_read(chip, REG_FIBER_SERDES, - PAGE_FIBER_SERDES, MII_BMCR); - if (ret < 0) - return ret; + /* Clear Power Down bit */ + err = mv88e6xxx_serdes_read(chip, MII_BMCR, &val); + if (err) + return err; - if (ret & BMCR_PDOWN) { - ret &= ~BMCR_PDOWN; - ret = _mv88e6xxx_mdio_page_write(chip, REG_FIBER_SERDES, - PAGE_FIBER_SERDES, MII_BMCR, - ret); + if (val & BMCR_PDOWN) { + val &= ~BMCR_PDOWN; + err = mv88e6xxx_serdes_write(chip, MII_BMCR, val); } - return ret; + return err; } static int mv88e6xxx_port_read(struct mv88e6xxx_chip *chip, int port, @@ -2547,7 +2614,7 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) /* If this port is connected to a SerDes, make sure the SerDes is not * powered down. */ - if (mv88e6xxx_6352_family(chip)) { + if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_SERDES)) { ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_STATUS); if (ret < 0) return ret; @@ -2555,7 +2622,7 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) if ((ret == PORT_STATUS_CMODE_100BASE_X) || (ret == PORT_STATUS_CMODE_1000BASE_X) || (ret == PORT_STATUS_CMODE_SGMII)) { - ret = mv88e6xxx_power_on_serdes(chip); + ret = mv88e6xxx_serdes_power_on(chip); if (ret < 0) return ret; } diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h index 1dd96e712342..1f9bab5891b1 100644 --- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h @@ -30,9 +30,12 @@ #define SMI_CMD_OP_45_READ_DATA_INC ((3 << 10) | SMI_CMD_BUSY) #define SMI_DATA 0x01 -/* Fiber/SERDES Registers are located at SMI address F, page 1 */ -#define REG_FIBER_SERDES 0x0f -#define PAGE_FIBER_SERDES 0x01 +/* PHY Registers */ +#define PHY_PAGE 0x16 +#define PHY_PAGE_COPPER 0x00 + +#define ADDR_SERDES 0x0f +#define SERDES_PAGE_FIBER 0x01 #define REG_PORT(p) (0x10 + (p)) #define PORT_STATUS 0x00 @@ -394,6 +397,14 @@ enum mv88e6xxx_cap { MV88E6XXX_CAP_SMI_CMD, /* (0x00) SMI Command */ MV88E6XXX_CAP_SMI_DATA, /* (0x01) SMI Data */ + /* PHY Registers. + */ + MV88E6XXX_CAP_PHY_PAGE, /* (0x16) Page Register */ + + /* Fiber/SERDES Registers (SMI address F). + */ + MV88E6XXX_CAP_SERDES, + /* Switch Global 2 Registers. * The device contains a second set of global 16-bit registers. */ @@ -441,6 +452,10 @@ enum mv88e6xxx_cap { #define MV88E6XXX_FLAG_SMI_CMD BIT(MV88E6XXX_CAP_SMI_CMD) #define MV88E6XXX_FLAG_SMI_DATA BIT(MV88E6XXX_CAP_SMI_DATA) +#define MV88E6XXX_FLAG_PHY_PAGE BIT(MV88E6XXX_CAP_PHY_PAGE) + +#define MV88E6XXX_FLAG_SERDES BIT(MV88E6XXX_CAP_SERDES) + #define MV88E6XXX_FLAG_GLOBAL2 BIT(MV88E6XXX_CAP_GLOBAL2) #define MV88E6XXX_FLAG_G2_MGMT_EN_2X BIT(MV88E6XXX_CAP_G2_MGMT_EN_2X) #define MV88E6XXX_FLAG_G2_MGMT_EN_0X BIT(MV88E6XXX_CAP_G2_MGMT_EN_0X) @@ -482,6 +497,11 @@ enum mv88e6xxx_cap { (MV88E6XXX_FLAG_G2_PVT_ADDR | \ MV88E6XXX_FLAG_G2_PVT_DATA) +/* Fiber/SERDES Registers at SMI address F, page 1 */ +#define MV88E6XXX_FLAGS_SERDES \ + (MV88E6XXX_FLAG_PHY_PAGE | \ + MV88E6XXX_FLAG_SERDES) + /* Indirect PHY access via Global2 SMI PHY registers */ #define MV88E6XXX_FLAGS_SMI_PHY \ (MV88E6XXX_FLAG_G2_SMI_PHY_CMD |\ @@ -574,6 +594,7 @@ enum mv88e6xxx_cap { MV88E6XXX_FLAGS_IRL | \ MV88E6XXX_FLAGS_MULTI_CHIP | \ MV88E6XXX_FLAGS_PVT | \ + MV88E6XXX_FLAGS_SERDES | \ MV88E6XXX_FLAGS_SMI_PHY) struct mv88e6xxx_info { From 9c93829c014f1a18a4aac2bf15b79ff4325a0912 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Mon, 15 Aug 2016 17:19:02 -0400 Subject: [PATCH 0243/1715] net: dsa: mv88e6xxx: use the new PHY API This commit replaces every MDIO direct or indirect access with the new generic mv88e6xxx_phy_* routines. This allows us to get rid of the mv88e6xxx_mdio_{read,write}_{,in}direct and {_,}mv88e6xxx_mdio_page_{read,write} functions. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 185 +++++++++---------------------- 1 file changed, 50 insertions(+), 135 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index faa07513c9ff..a230fcba5b64 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -372,22 +372,6 @@ static int _mv88e6xxx_reg_write(struct mv88e6xxx_chip *chip, int addr, return mv88e6xxx_write(chip, addr, reg, val); } -static int mv88e6xxx_mdio_read_direct(struct mv88e6xxx_chip *chip, - int addr, int regnum) -{ - if (addr >= 0) - return _mv88e6xxx_reg_read(chip, addr, regnum); - return 0xffff; -} - -static int mv88e6xxx_mdio_write_direct(struct mv88e6xxx_chip *chip, - int addr, int regnum, u16 val) -{ - if (addr >= 0) - return _mv88e6xxx_reg_write(chip, addr, regnum, val); - return 0; -} - static int mv88e6xxx_ppu_disable(struct mv88e6xxx_chip *chip) { int ret; @@ -942,87 +926,63 @@ static int _mv88e6xxx_atu_wait(struct mv88e6xxx_chip *chip) GLOBAL_ATU_OP_BUSY); } -static int mv88e6xxx_g2_smi_phy_read(struct mv88e6xxx_chip *chip, int addr, - int reg, u16 *val); -static int mv88e6xxx_g2_smi_phy_write(struct mv88e6xxx_chip *chip, int addr, - int reg, u16 val); - -static int mv88e6xxx_mdio_read_indirect(struct mv88e6xxx_chip *chip, - int addr, int regnum) -{ - u16 val; - int err; - - err = mv88e6xxx_g2_smi_phy_read(chip, addr, regnum, &val); - if (err) - return err; - - return val; -} - -static int mv88e6xxx_mdio_write_indirect(struct mv88e6xxx_chip *chip, - int addr, int regnum, u16 val) -{ - return mv88e6xxx_g2_smi_phy_write(chip, addr, regnum, val); -} - static int mv88e6xxx_get_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e) { struct mv88e6xxx_chip *chip = ds_to_priv(ds); - int reg; + u16 reg; + int err; if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_EEE)) return -EOPNOTSUPP; mutex_lock(&chip->reg_lock); - reg = mv88e6xxx_mdio_read_indirect(chip, port, 16); - if (reg < 0) + err = mv88e6xxx_phy_read(chip, port, 16, ®); + if (err) goto out; e->eee_enabled = !!(reg & 0x0200); e->tx_lpi_enabled = !!(reg & 0x0100); - reg = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_STATUS); - if (reg < 0) + err = mv88e6xxx_read(chip, REG_PORT(port), PORT_STATUS, ®); + if (err) goto out; e->eee_active = !!(reg & PORT_STATUS_EEE); - reg = 0; - out: mutex_unlock(&chip->reg_lock); - return reg; + + return err; } static int mv88e6xxx_set_eee(struct dsa_switch *ds, int port, struct phy_device *phydev, struct ethtool_eee *e) { struct mv88e6xxx_chip *chip = ds_to_priv(ds); - int reg; - int ret; + u16 reg; + int err; if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_EEE)) return -EOPNOTSUPP; mutex_lock(&chip->reg_lock); - ret = mv88e6xxx_mdio_read_indirect(chip, port, 16); - if (ret < 0) + err = mv88e6xxx_phy_read(chip, port, 16, ®); + if (err) goto out; - reg = ret & ~0x0300; + reg &= ~0x0300; if (e->eee_enabled) reg |= 0x0200; if (e->tx_lpi_enabled) reg |= 0x0100; - ret = mv88e6xxx_mdio_write_indirect(chip, port, 16, reg); + err = mv88e6xxx_phy_write(chip, port, 16, reg); out: mutex_unlock(&chip->reg_lock); - return ret; + return err; } static int _mv88e6xxx_atu_cmd(struct mv88e6xxx_chip *chip, u16 fid, u16 cmd) @@ -2382,38 +2342,6 @@ static void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, int port) mutex_unlock(&chip->reg_lock); } -static int _mv88e6xxx_mdio_page_write(struct mv88e6xxx_chip *chip, - int port, int page, int reg, int val) -{ - int ret; - - ret = mv88e6xxx_mdio_write_indirect(chip, port, 0x16, page); - if (ret < 0) - goto restore_page_0; - - ret = mv88e6xxx_mdio_write_indirect(chip, port, reg, val); -restore_page_0: - mv88e6xxx_mdio_write_indirect(chip, port, 0x16, 0x0); - - return ret; -} - -static int _mv88e6xxx_mdio_page_read(struct mv88e6xxx_chip *chip, - int port, int page, int reg) -{ - int ret; - - ret = mv88e6xxx_mdio_write_indirect(chip, port, 0x16, page); - if (ret < 0) - goto restore_page_0; - - ret = mv88e6xxx_mdio_read_indirect(chip, port, reg); -restore_page_0: - mv88e6xxx_mdio_write_indirect(chip, port, 0x16, 0x0); - - return ret; -} - static int mv88e6xxx_switch_reset(struct mv88e6xxx_chip *chip) { bool ppu_active = mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU_ACTIVE); @@ -3322,32 +3250,6 @@ static int mv88e6xxx_set_addr(struct dsa_switch *ds, u8 *addr) return err; } -static int mv88e6xxx_mdio_page_read(struct dsa_switch *ds, int port, int page, - int reg) -{ - struct mv88e6xxx_chip *chip = ds_to_priv(ds); - int ret; - - mutex_lock(&chip->reg_lock); - ret = _mv88e6xxx_mdio_page_read(chip, port, page, reg); - mutex_unlock(&chip->reg_lock); - - return ret; -} - -static int mv88e6xxx_mdio_page_write(struct dsa_switch *ds, int port, int page, - int reg, int val) -{ - struct mv88e6xxx_chip *chip = ds_to_priv(ds); - int ret; - - mutex_lock(&chip->reg_lock); - ret = _mv88e6xxx_mdio_page_write(chip, port, page, reg, val); - mutex_unlock(&chip->reg_lock); - - return ret; -} - static int mv88e6xxx_mdio_read(struct mii_bus *bus, int phy, int reg) { struct mv88e6xxx_chip *chip = bus->priv; @@ -3441,44 +3343,42 @@ static void mv88e6xxx_mdio_unregister(struct mv88e6xxx_chip *chip) static int mv88e61xx_get_temp(struct dsa_switch *ds, int *temp) { struct mv88e6xxx_chip *chip = ds_to_priv(ds); + u16 val; int ret; - int val; *temp = 0; mutex_lock(&chip->reg_lock); - ret = mv88e6xxx_mdio_write_direct(chip, 0x0, 0x16, 0x6); + ret = mv88e6xxx_phy_write(chip, 0x0, 0x16, 0x6); if (ret < 0) goto error; /* Enable temperature sensor */ - ret = mv88e6xxx_mdio_read_direct(chip, 0x0, 0x1a); + ret = mv88e6xxx_phy_read(chip, 0x0, 0x1a, &val); if (ret < 0) goto error; - ret = mv88e6xxx_mdio_write_direct(chip, 0x0, 0x1a, ret | (1 << 5)); + ret = mv88e6xxx_phy_write(chip, 0x0, 0x1a, val | (1 << 5)); if (ret < 0) goto error; /* Wait for temperature to stabilize */ usleep_range(10000, 12000); - val = mv88e6xxx_mdio_read_direct(chip, 0x0, 0x1a); - if (val < 0) { - ret = val; + ret = mv88e6xxx_phy_read(chip, 0x0, 0x1a, &val); + if (ret < 0) goto error; - } /* Disable temperature sensor */ - ret = mv88e6xxx_mdio_write_direct(chip, 0x0, 0x1a, ret & ~(1 << 5)); + ret = mv88e6xxx_phy_write(chip, 0x0, 0x1a, val & ~(1 << 5)); if (ret < 0) goto error; *temp = ((val & 0x1f) - 5) * 5; error: - mv88e6xxx_mdio_write_direct(chip, 0x0, 0x16, 0x0); + mv88e6xxx_phy_write(chip, 0x0, 0x16, 0x0); mutex_unlock(&chip->reg_lock); return ret; } @@ -3487,15 +3387,18 @@ static int mv88e63xx_get_temp(struct dsa_switch *ds, int *temp) { struct mv88e6xxx_chip *chip = ds_to_priv(ds); int phy = mv88e6xxx_6320_family(chip) ? 3 : 0; + u16 val; int ret; *temp = 0; - ret = mv88e6xxx_mdio_page_read(ds, phy, 6, 27); + mutex_lock(&chip->reg_lock); + ret = mv88e6xxx_phy_page_read(chip, phy, 6, 27, &val); + mutex_unlock(&chip->reg_lock); if (ret < 0) return ret; - *temp = (ret & 0xff) - 25; + *temp = (val & 0xff) - 25; return 0; } @@ -3517,6 +3420,7 @@ static int mv88e6xxx_get_temp_limit(struct dsa_switch *ds, int *temp) { struct mv88e6xxx_chip *chip = ds_to_priv(ds); int phy = mv88e6xxx_6320_family(chip) ? 3 : 0; + u16 val; int ret; if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_TEMP_LIMIT)) @@ -3524,11 +3428,13 @@ static int mv88e6xxx_get_temp_limit(struct dsa_switch *ds, int *temp) *temp = 0; - ret = mv88e6xxx_mdio_page_read(ds, phy, 6, 26); + mutex_lock(&chip->reg_lock); + ret = mv88e6xxx_phy_page_read(chip, phy, 6, 26, &val); + mutex_unlock(&chip->reg_lock); if (ret < 0) return ret; - *temp = (((ret >> 8) & 0x1f) * 5) - 25; + *temp = (((val >> 8) & 0x1f) * 5) - 25; return 0; } @@ -3537,23 +3443,30 @@ static int mv88e6xxx_set_temp_limit(struct dsa_switch *ds, int temp) { struct mv88e6xxx_chip *chip = ds_to_priv(ds); int phy = mv88e6xxx_6320_family(chip) ? 3 : 0; - int ret; + u16 val; + int err; if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_TEMP_LIMIT)) return -EOPNOTSUPP; - ret = mv88e6xxx_mdio_page_read(ds, phy, 6, 26); - if (ret < 0) - return ret; + mutex_lock(&chip->reg_lock); + err = mv88e6xxx_phy_page_read(chip, phy, 6, 26, &val); + if (err) + goto unlock; temp = clamp_val(DIV_ROUND_CLOSEST(temp, 5) + 5, 0, 0x1f); - return mv88e6xxx_mdio_page_write(ds, phy, 6, 26, - (ret & 0xe0ff) | (temp << 8)); + err = mv88e6xxx_phy_page_write(chip, phy, 6, 26, + (val & 0xe0ff) | (temp << 8)); +unlock: + mutex_unlock(&chip->reg_lock); + + return err; } static int mv88e6xxx_get_temp_alarm(struct dsa_switch *ds, bool *alarm) { struct mv88e6xxx_chip *chip = ds_to_priv(ds); int phy = mv88e6xxx_6320_family(chip) ? 3 : 0; + u16 val; int ret; if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_TEMP_LIMIT)) @@ -3561,11 +3474,13 @@ static int mv88e6xxx_get_temp_alarm(struct dsa_switch *ds, bool *alarm) *alarm = false; - ret = mv88e6xxx_mdio_page_read(ds, phy, 6, 26); + mutex_lock(&chip->reg_lock); + ret = mv88e6xxx_phy_page_read(chip, phy, 6, 26, &val); + mutex_unlock(&chip->reg_lock); if (ret < 0) return ret; - *alarm = !!(ret & 0x40); + *alarm = !!(val & 0x40); return 0; } From 09a7d9eca1a6cf5eb4f9abfdf8914db9dbd96f08 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 19 Jul 2016 01:17:59 +0300 Subject: [PATCH 0244/1715] {net,IB}/mlx5: QP/XRCD commands via mlx5 ifc Remove old representation of manually created QP/XRCD commands layout amd use mlx5_ifc canonical structures and defines. Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/qp.c | 156 ++++++++-------- .../net/ethernet/mellanox/mlx5/core/debugfs.c | 14 +- drivers/net/ethernet/mellanox/mlx5/core/qp.c | 167 +++++++----------- include/linux/mlx5/mlx5_ifc.h | 5 +- include/linux/mlx5/qp.h | 108 +---------- 5 files changed, 166 insertions(+), 284 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 21ab0e26fa71..d22492ff863e 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -726,7 +726,7 @@ err_umem: static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct mlx5_ib_qp *qp, struct ib_udata *udata, struct ib_qp_init_attr *attr, - struct mlx5_create_qp_mbox_in **in, + u32 **in, struct mlx5_ib_create_qp_resp *resp, int *inlen, struct mlx5_ib_qp_base *base) { @@ -739,6 +739,8 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, u32 offset = 0; int uuarn; int ncont = 0; + __be64 *pas; + void *qpc; int err; err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); @@ -795,20 +797,24 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, ubuffer->umem = NULL; } - *inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont; + *inlen = MLX5_ST_SZ_BYTES(create_qp_in) + + MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * ncont; *in = mlx5_vzalloc(*inlen); if (!*in) { err = -ENOMEM; goto err_umem; } - if (ubuffer->umem) - mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift, - (*in)->pas, 0); - (*in)->ctx.log_pg_sz_remote_qpn = - cpu_to_be32((page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24); - (*in)->ctx.params2 = cpu_to_be32(offset << 6); - (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index); + pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas); + if (ubuffer->umem) + mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift, pas, 0); + + qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc); + + MLX5_SET(qpc, qpc, log_page_size, page_shift - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(qpc, qpc, page_offset, offset); + + MLX5_SET(qpc, qpc, uar_page, uar_index); resp->uuar_index = uuarn; qp->uuarn = uuarn; @@ -857,12 +863,13 @@ static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp, static int create_kernel_qp(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *init_attr, struct mlx5_ib_qp *qp, - struct mlx5_create_qp_mbox_in **in, int *inlen, + u32 **in, int *inlen, struct mlx5_ib_qp_base *base) { enum mlx5_ib_latency_class lc = MLX5_IB_LATENCY_CLASS_LOW; struct mlx5_uuar_info *uuari; int uar_index; + void *qpc; int uuarn; int err; @@ -902,25 +909,29 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, } qp->sq.qend = mlx5_get_send_wqe(qp, qp->sq.wqe_cnt); - *inlen = sizeof(**in) + sizeof(*(*in)->pas) * qp->buf.npages; + *inlen = MLX5_ST_SZ_BYTES(create_qp_in) + + MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * qp->buf.npages; *in = mlx5_vzalloc(*inlen); if (!*in) { err = -ENOMEM; goto err_buf; } - (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index); - (*in)->ctx.log_pg_sz_remote_qpn = - cpu_to_be32((qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24); + + qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc); + MLX5_SET(qpc, qpc, uar_page, uar_index); + MLX5_SET(qpc, qpc, log_page_size, qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); + /* Set "fast registration enabled" for all kernel QPs */ - (*in)->ctx.params1 |= cpu_to_be32(1 << 11); - (*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4); + MLX5_SET(qpc, qpc, fre, 1); + MLX5_SET(qpc, qpc, rlky, 1); if (init_attr->create_flags & mlx5_ib_create_qp_sqpn_qp1()) { - (*in)->ctx.deth_sqpn = cpu_to_be32(1); + MLX5_SET(qpc, qpc, deth_sqpn, 1); qp->flags |= MLX5_IB_QP_SQPN_QP1; } - mlx5_fill_page_array(&qp->buf, (*in)->pas); + mlx5_fill_page_array(&qp->buf, + (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas)); err = mlx5_db_alloc(dev->mdev, &qp->db); if (err) { @@ -974,15 +985,15 @@ static void destroy_qp_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) free_uuar(&dev->mdev->priv.uuari, qp->bf->uuarn); } -static __be32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr) +static u32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr) { if (attr->srq || (attr->qp_type == IB_QPT_XRC_TGT) || (attr->qp_type == IB_QPT_XRC_INI)) - return cpu_to_be32(MLX5_SRQ_RQ); + return MLX5_SRQ_RQ; else if (!qp->has_rq) - return cpu_to_be32(MLX5_ZERO_LEN_RQ); + return MLX5_ZERO_LEN_RQ; else - return cpu_to_be32(MLX5_NON_ZERO_RQ); + return MLX5_NON_ZERO_RQ; } static int is_connected(enum ib_qp_type qp_type) @@ -1191,7 +1202,7 @@ static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev, } static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, - struct mlx5_create_qp_mbox_in *in, + u32 *in, struct ib_pd *pd) { struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp; @@ -1461,18 +1472,18 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct ib_udata *udata, struct mlx5_ib_qp *qp) { struct mlx5_ib_resources *devr = &dev->devr; + int inlen = MLX5_ST_SZ_BYTES(create_qp_in); struct mlx5_core_dev *mdev = dev->mdev; - struct mlx5_ib_qp_base *base; struct mlx5_ib_create_qp_resp resp; - struct mlx5_create_qp_mbox_in *in; - struct mlx5_ib_create_qp ucmd; struct mlx5_ib_cq *send_cq; struct mlx5_ib_cq *recv_cq; unsigned long flags; - int inlen = sizeof(*in); - int err; u32 uidx = MLX5_IB_DEFAULT_UIDX; + struct mlx5_ib_create_qp ucmd; + struct mlx5_ib_qp_base *base; void *qpc; + u32 *in; + int err; base = init_attr->qp_type == IB_QPT_RAW_PACKET ? &qp->raw_packet_qp.rq.base : @@ -1600,7 +1611,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (err) return err; } else { - in = mlx5_vzalloc(sizeof(*in)); + in = mlx5_vzalloc(inlen); if (!in) return -ENOMEM; @@ -1610,26 +1621,29 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (is_sqp(init_attr->qp_type)) qp->port = init_attr->port_num; - in->ctx.flags = cpu_to_be32(to_mlx5_st(init_attr->qp_type) << 16 | - MLX5_QP_PM_MIGRATED << 11); + qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); + + MLX5_SET(qpc, qpc, st, to_mlx5_st(init_attr->qp_type)); + MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); if (init_attr->qp_type != MLX5_IB_QPT_REG_UMR) - in->ctx.flags_pd = cpu_to_be32(to_mpd(pd ? pd : devr->p0)->pdn); + MLX5_SET(qpc, qpc, pd, to_mpd(pd ? pd : devr->p0)->pdn); else - in->ctx.flags_pd = cpu_to_be32(MLX5_QP_LAT_SENSITIVE); + MLX5_SET(qpc, qpc, latency_sensitive, 1); + if (qp->wq_sig) - in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_ENABLE_SIG); + MLX5_SET(qpc, qpc, wq_signature, 1); if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK) - in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_BLOCK_MCAST); + MLX5_SET(qpc, qpc, block_lb_mc, 1); if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL) - in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_MASTER); + MLX5_SET(qpc, qpc, cd_master, 1); if (qp->flags & MLX5_IB_QP_MANAGED_SEND) - in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_SLAVE_SEND); + MLX5_SET(qpc, qpc, cd_slave_send, 1); if (qp->flags & MLX5_IB_QP_MANAGED_RECV) - in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_SLAVE_RECV); + MLX5_SET(qpc, qpc, cd_slave_receive, 1); if (qp->scat_cqe && is_connected(init_attr->qp_type)) { int rcqe_sz; @@ -1639,71 +1653,68 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, scqe_sz = mlx5_ib_get_cqe_size(dev, init_attr->send_cq); if (rcqe_sz == 128) - in->ctx.cs_res = MLX5_RES_SCAT_DATA64_CQE; + MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA64_CQE); else - in->ctx.cs_res = MLX5_RES_SCAT_DATA32_CQE; + MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA32_CQE); if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) { if (scqe_sz == 128) - in->ctx.cs_req = MLX5_REQ_SCAT_DATA64_CQE; + MLX5_SET(qpc, qpc, cs_req, MLX5_REQ_SCAT_DATA64_CQE); else - in->ctx.cs_req = MLX5_REQ_SCAT_DATA32_CQE; + MLX5_SET(qpc, qpc, cs_req, MLX5_REQ_SCAT_DATA32_CQE); } } if (qp->rq.wqe_cnt) { - in->ctx.rq_size_stride = (qp->rq.wqe_shift - 4); - in->ctx.rq_size_stride |= ilog2(qp->rq.wqe_cnt) << 3; + MLX5_SET(qpc, qpc, log_rq_stride, qp->rq.wqe_shift - 4); + MLX5_SET(qpc, qpc, log_rq_size, ilog2(qp->rq.wqe_cnt)); } - in->ctx.rq_type_srqn = get_rx_type(qp, init_attr); + MLX5_SET(qpc, qpc, rq_type, get_rx_type(qp, init_attr)); if (qp->sq.wqe_cnt) - in->ctx.sq_crq_size |= cpu_to_be16(ilog2(qp->sq.wqe_cnt) << 11); + MLX5_SET(qpc, qpc, log_sq_size, ilog2(qp->sq.wqe_cnt)); else - in->ctx.sq_crq_size |= cpu_to_be16(0x8000); + MLX5_SET(qpc, qpc, no_sq, 1); /* Set default resources */ switch (init_attr->qp_type) { case IB_QPT_XRC_TGT: - in->ctx.cqn_recv = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn); - in->ctx.cqn_send = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn); - in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s0)->msrq.srqn); - in->ctx.xrcd = cpu_to_be32(to_mxrcd(init_attr->xrcd)->xrcdn); + MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(devr->c0)->mcq.cqn); + MLX5_SET(qpc, qpc, cqn_snd, to_mcq(devr->c0)->mcq.cqn); + MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s0)->msrq.srqn); + MLX5_SET(qpc, qpc, xrcd, to_mxrcd(init_attr->xrcd)->xrcdn); break; case IB_QPT_XRC_INI: - in->ctx.cqn_recv = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn); - in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x1)->xrcdn); - in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s0)->msrq.srqn); + MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(devr->c0)->mcq.cqn); + MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn); + MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s0)->msrq.srqn); break; default: if (init_attr->srq) { - in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x0)->xrcdn); - in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(init_attr->srq)->msrq.srqn); + MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x0)->xrcdn); + MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(init_attr->srq)->msrq.srqn); } else { - in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x1)->xrcdn); - in->ctx.rq_type_srqn |= - cpu_to_be32(to_msrq(devr->s1)->msrq.srqn); + MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn); + MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s1)->msrq.srqn); } } if (init_attr->send_cq) - in->ctx.cqn_send = cpu_to_be32(to_mcq(init_attr->send_cq)->mcq.cqn); + MLX5_SET(qpc, qpc, cqn_snd, to_mcq(init_attr->send_cq)->mcq.cqn); if (init_attr->recv_cq) - in->ctx.cqn_recv = cpu_to_be32(to_mcq(init_attr->recv_cq)->mcq.cqn); + MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(init_attr->recv_cq)->mcq.cqn); - in->ctx.db_rec_addr = cpu_to_be64(qp->db.dma); + MLX5_SET64(qpc, qpc, dbr_addr, qp->db.dma); - if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1) { - qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); - /* 0xffffff means we ask to work with cqe version 0 */ + /* 0xffffff means we ask to work with cqe version 0 */ + if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1) MLX5_SET(qpc, qpc, user_index, uidx); - } + /* we use IB_QP_CREATE_IPOIB_UD_LSO to indicates ipoib qp */ if (init_attr->qp_type == IB_QPT_UD && (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)) { - qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); MLX5_SET(qpc, qpc, ulp_stateless_offload_mode, 1); qp->flags |= MLX5_IB_QP_LSO; } @@ -4320,21 +4331,24 @@ static int query_raw_packet_qp_state(struct mlx5_ib_dev *dev, static int query_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, struct ib_qp_attr *qp_attr) { - struct mlx5_query_qp_mbox_out *outb; + int outlen = MLX5_ST_SZ_BYTES(query_qp_out); struct mlx5_qp_context *context; int mlx5_state; + u32 *outb; int err = 0; - outb = kzalloc(sizeof(*outb), GFP_KERNEL); + outb = kzalloc(outlen, GFP_KERNEL); if (!outb) return -ENOMEM; - context = &outb->ctx; err = mlx5_core_qp_query(dev->mdev, &qp->trans_qp.base.mqp, outb, - sizeof(*outb)); + outlen); if (err) goto out; + /* FIXME: use MLX5_GET rather than mlx5_qp_context manual struct */ + context = (struct mlx5_qp_context *)MLX5_ADDR_OF(query_qp_out, outb, qpc); + mlx5_state = be32_to_cpu(context->flags) >> 28; qp->state = to_ib_qp_state(mlx5_state); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c index b7484e4128c8..e94a9532e218 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c @@ -277,24 +277,28 @@ void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev) static u64 qp_read_field(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, int index, int *is_str) { - struct mlx5_query_qp_mbox_out *out; + int outlen = MLX5_ST_SZ_BYTES(query_qp_out); struct mlx5_qp_context *ctx; u64 param = 0; + u32 *out; int err; int no_sq; - out = kzalloc(sizeof(*out), GFP_KERNEL); + out = kzalloc(outlen, GFP_KERNEL); if (!out) return param; - err = mlx5_core_qp_query(dev, qp, out, sizeof(*out)); + err = mlx5_core_qp_query(dev, qp, out, outlen); if (err) { - mlx5_core_warn(dev, "failed to query qp\n"); + mlx5_core_warn(dev, "failed to query qp err=%d\n", err); goto out; } *is_str = 0; - ctx = &out->ctx; + + /* FIXME: use MLX5_GET rather than mlx5_qp_context manual struct */ + ctx = (struct mlx5_qp_context *)MLX5_ADDR_OF(query_qp_out, out, qpc); + switch (index) { case QP_PID: param = qp->pid; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index b82d65802d96..36d240c9d15f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -271,30 +271,21 @@ static void destroy_qprqsq_common(struct mlx5_core_dev *dev, int mlx5_core_create_qp(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, - struct mlx5_create_qp_mbox_in *in, - int inlen) + u32 *in, int inlen) { - struct mlx5_create_qp_mbox_out out; - struct mlx5_destroy_qp_mbox_in din; - struct mlx5_destroy_qp_mbox_out dout; + u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {0}; + u32 dout[MLX5_ST_SZ_DW(destroy_qp_out)]; + u32 din[MLX5_ST_SZ_DW(destroy_qp_in)]; int err; - memset(&out, 0, sizeof(out)); - in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_QP); + MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP); - err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out)); - if (err) { - mlx5_core_warn(dev, "ret %d\n", err); + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); + err = err ? : mlx5_cmd_status_to_err_v2(out); + if (err) return err; - } - if (out.hdr.status) { - mlx5_core_warn(dev, "current num of QPs 0x%x\n", - atomic_read(&dev->num_qps)); - return mlx5_cmd_status_to_err(&out.hdr); - } - - qp->qpn = be32_to_cpu(out.qpn) & 0xffffff; + qp->qpn = MLX5_GET(create_qp_out, out, qpn); mlx5_core_dbg(dev, "qpn = 0x%x\n", qp->qpn); err = create_qprqsq_common(dev, qp, MLX5_RES_QP); @@ -311,12 +302,12 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev, return 0; err_cmd: - memset(&din, 0, sizeof(din)); - memset(&dout, 0, sizeof(dout)); - din.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_QP); - din.qpn = cpu_to_be32(qp->qpn); - mlx5_cmd_exec(dev, &din, sizeof(din), &out, sizeof(dout)); - + memset(din, 0, sizeof(din)); + memset(dout, 0, sizeof(dout)); + MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); + MLX5_SET(destroy_qp_in, in, qpn, qp->qpn); + mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout)); + mlx5_cmd_status_to_err_v2(dout); return err; } EXPORT_SYMBOL_GPL(mlx5_core_create_qp); @@ -324,25 +315,21 @@ EXPORT_SYMBOL_GPL(mlx5_core_create_qp); int mlx5_core_destroy_qp(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp) { - struct mlx5_destroy_qp_mbox_in in; - struct mlx5_destroy_qp_mbox_out out; + u32 out[MLX5_ST_SZ_DW(destroy_qp_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {0}; int err; mlx5_debug_qp_remove(dev, qp); destroy_qprqsq_common(dev, qp); - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_QP); - in.qpn = cpu_to_be32(qp->qpn); - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); + MLX5_SET(destroy_qp_in, in, qpn, qp->qpn); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + err = err ? : mlx5_cmd_status_to_err_v2(out); if (err) return err; - if (out.hdr.status) - return mlx5_cmd_status_to_err(&out.hdr); - atomic_dec(&dev->num_qps); return 0; } @@ -382,66 +369,44 @@ void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev) } int mlx5_core_qp_query(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, - struct mlx5_query_qp_mbox_out *out, int outlen) + u32 *out, int outlen) { - struct mlx5_query_qp_mbox_in in; + u32 in[MLX5_ST_SZ_DW(query_qp_in)] = {0}; int err; - memset(&in, 0, sizeof(in)); - memset(out, 0, outlen); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_QP); - in.qpn = cpu_to_be32(qp->qpn); - err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen); - if (err) - return err; + MLX5_SET(query_qp_in, in, opcode, MLX5_CMD_OP_QUERY_QP); + MLX5_SET(query_qp_in, in, qpn, qp->qpn); - if (out->hdr.status) - return mlx5_cmd_status_to_err(&out->hdr); - - return err; + err = mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); + return err ? : mlx5_cmd_status_to_err_v2(out); } EXPORT_SYMBOL_GPL(mlx5_core_qp_query); int mlx5_core_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn) { - struct mlx5_alloc_xrcd_mbox_in in; - struct mlx5_alloc_xrcd_mbox_out out; + u32 out[MLX5_ST_SZ_DW(alloc_xrcd_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(alloc_xrcd_in)] = {0}; int err; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ALLOC_XRCD); - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); - if (err) - return err; - - if (out.hdr.status) - err = mlx5_cmd_status_to_err(&out.hdr); - else - *xrcdn = be32_to_cpu(out.xrcdn) & 0xffffff; - + MLX5_SET(alloc_xrcd_in, in, opcode, MLX5_CMD_OP_ALLOC_XRCD); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + err = err ? : mlx5_cmd_status_to_err_v2(out); + if (!err) + *xrcdn = MLX5_GET(alloc_xrcd_out, out, xrcd); return err; } EXPORT_SYMBOL_GPL(mlx5_core_xrcd_alloc); int mlx5_core_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn) { - struct mlx5_dealloc_xrcd_mbox_in in; - struct mlx5_dealloc_xrcd_mbox_out out; + u32 out[MLX5_ST_SZ_DW(dealloc_xrcd_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(dealloc_xrcd_in)] = {0}; int err; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DEALLOC_XRCD); - in.xrcdn = cpu_to_be32(xrcdn); - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); - if (err) - return err; - - if (out.hdr.status) - err = mlx5_cmd_status_to_err(&out.hdr); - - return err; + MLX5_SET(dealloc_xrcd_in, in, opcode, MLX5_CMD_OP_DEALLOC_XRCD); + MLX5_SET(dealloc_xrcd_in, in, xrcd, xrcdn); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + return err ? : mlx5_cmd_status_to_err_v2(out); } EXPORT_SYMBOL_GPL(mlx5_core_xrcd_dealloc); @@ -449,28 +414,26 @@ EXPORT_SYMBOL_GPL(mlx5_core_xrcd_dealloc); int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 qpn, u8 flags, int error) { - struct mlx5_page_fault_resume_mbox_in in; - struct mlx5_page_fault_resume_mbox_out out; + u32 out[MLX5_ST_SZ_DW(page_fault_resume_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)] = {0}; int err; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_PAGE_FAULT_RESUME); - in.hdr.opmod = 0; - flags &= (MLX5_PAGE_FAULT_RESUME_REQUESTOR | - MLX5_PAGE_FAULT_RESUME_WRITE | - MLX5_PAGE_FAULT_RESUME_RDMA); - flags |= (error ? MLX5_PAGE_FAULT_RESUME_ERROR : 0); - in.flags_qpn = cpu_to_be32((qpn & MLX5_QPN_MASK) | - (flags << MLX5_QPN_BITS)); - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); - if (err) - return err; + MLX5_SET(page_fault_resume_in, in, opcode, + MLX5_CMD_OP_PAGE_FAULT_RESUME); - if (out.hdr.status) - err = mlx5_cmd_status_to_err(&out.hdr); + MLX5_SET(page_fault_resume_in, in, qpn, qpn); - return err; + if (flags & MLX5_PAGE_FAULT_RESUME_REQUESTOR) + MLX5_SET(page_fault_resume_in, in, req_res, 1); + if (flags & MLX5_PAGE_FAULT_RESUME_WRITE) + MLX5_SET(page_fault_resume_in, in, read_write, 1); + if (flags & MLX5_PAGE_FAULT_RESUME_RDMA) + MLX5_SET(page_fault_resume_in, in, rdma, 1); + if (error) + MLX5_SET(page_fault_resume_in, in, error, 1); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + return err ? : mlx5_cmd_status_to_err_v2(out); } EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume); #endif @@ -541,13 +504,10 @@ EXPORT_SYMBOL(mlx5_core_destroy_sq_tracked); int mlx5_core_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id) { - u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)]; - u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)]; + u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {0}; int err; - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); - MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER); err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); if (!err) @@ -559,11 +519,8 @@ EXPORT_SYMBOL_GPL(mlx5_core_alloc_q_counter); int mlx5_core_dealloc_q_counter(struct mlx5_core_dev *dev, u16 counter_id) { - u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)]; - u32 out[MLX5_ST_SZ_DW(dealloc_q_counter_out)]; - - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); + u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(dealloc_q_counter_out)] = {0}; MLX5_SET(dealloc_q_counter_in, in, opcode, MLX5_CMD_OP_DEALLOC_Q_COUNTER); @@ -576,9 +533,7 @@ EXPORT_SYMBOL_GPL(mlx5_core_dealloc_q_counter); int mlx5_core_query_q_counter(struct mlx5_core_dev *dev, u16 counter_id, int reset, void *out, int out_size) { - u32 in[MLX5_ST_SZ_DW(query_q_counter_in)]; - - memset(in, 0, sizeof(in)); + u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {0}; MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER); MLX5_SET(query_q_counter_in, in, clear, reset); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 2a39a06dbad4..cb94ac5b8420 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1966,7 +1966,10 @@ struct mlx5_ifc_qpc_bits { u8 reserved_at_3e0[0x8]; u8 cqn_snd[0x18]; - u8 reserved_at_400[0x40]; + u8 reserved_at_400[0x8]; + u8 deth_sqpn[0x18]; + + u8 reserved_at_420[0x20]; u8 reserved_at_440[0x8]; u8 last_acked_psn[0x18]; diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 7879bf411891..16e1efecaf66 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -123,12 +123,13 @@ enum { }; enum { - MLX5_NON_ZERO_RQ = 0 << 24, - MLX5_SRQ_RQ = 1 << 24, - MLX5_CRQ_RQ = 2 << 24, - MLX5_ZERO_LEN_RQ = 3 << 24 + MLX5_NON_ZERO_RQ = 0x0, + MLX5_SRQ_RQ = 0x1, + MLX5_CRQ_RQ = 0x2, + MLX5_ZERO_LEN_RQ = 0x3 }; +/* TODO REM */ enum { /* params1 */ MLX5_QP_BIT_SRE = 1 << 15, @@ -177,12 +178,6 @@ enum { MLX5_FENCE_MODE_SMALL_AND_FENCE = 4 << 5, }; -enum { - MLX5_QP_LAT_SENSITIVE = 1 << 28, - MLX5_QP_BLOCK_MCAST = 1 << 30, - MLX5_QP_ENABLE_SIG = 1 << 31, -}; - enum { MLX5_RCV_DBR = 0, MLX5_SND_DBR = 1, @@ -525,34 +520,6 @@ struct mlx5_qp_context { u8 rsvd1[24]; }; -struct mlx5_create_qp_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 input_qpn; - u8 rsvd0[4]; - __be32 opt_param_mask; - u8 rsvd1[4]; - struct mlx5_qp_context ctx; - u8 rsvd3[16]; - __be64 pas[0]; -}; - -struct mlx5_create_qp_mbox_out { - struct mlx5_outbox_hdr hdr; - __be32 qpn; - u8 rsvd0[4]; -}; - -struct mlx5_destroy_qp_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 qpn; - u8 rsvd0[4]; -}; - -struct mlx5_destroy_qp_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd0[8]; -}; - struct mlx5_modify_qp_mbox_in { struct mlx5_inbox_hdr hdr; __be32 qpn; @@ -568,56 +535,6 @@ struct mlx5_modify_qp_mbox_out { u8 rsvd0[8]; }; -struct mlx5_query_qp_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 qpn; - u8 rsvd[4]; -}; - -struct mlx5_query_qp_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd1[8]; - __be32 optparam; - u8 rsvd0[4]; - struct mlx5_qp_context ctx; - u8 rsvd2[16]; - __be64 pas[0]; -}; - -struct mlx5_conf_sqp_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 qpn; - u8 rsvd[3]; - u8 type; -}; - -struct mlx5_conf_sqp_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_alloc_xrcd_mbox_in { - struct mlx5_inbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_alloc_xrcd_mbox_out { - struct mlx5_outbox_hdr hdr; - __be32 xrcdn; - u8 rsvd[4]; -}; - -struct mlx5_dealloc_xrcd_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 xrcdn; - u8 rsvd[4]; -}; - -struct mlx5_dealloc_xrcd_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - static inline struct mlx5_core_qp *__mlx5_qp_lookup(struct mlx5_core_dev *dev, u32 qpn) { return radix_tree_lookup(&dev->priv.qp_table.tree, qpn); @@ -628,20 +545,9 @@ static inline struct mlx5_core_mkey *__mlx5_mr_lookup(struct mlx5_core_dev *dev, return radix_tree_lookup(&dev->priv.mkey_table.tree, key); } -struct mlx5_page_fault_resume_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 flags_qpn; - u8 reserved[4]; -}; - -struct mlx5_page_fault_resume_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - int mlx5_core_create_qp(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, - struct mlx5_create_qp_mbox_in *in, + u32 *in, int inlen); int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 operation, struct mlx5_modify_qp_mbox_in *in, int sqd_event, @@ -649,7 +555,7 @@ int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 operation, int mlx5_core_destroy_qp(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp); int mlx5_core_qp_query(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, - struct mlx5_query_qp_mbox_out *out, int outlen); + u32 *out, int outlen); int mlx5_core_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn); int mlx5_core_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn); From 1a412fb1caa2c1b77719ccb5ed8b0c3c2bc65da7 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 19 Jul 2016 18:03:21 +0300 Subject: [PATCH 0245/1715] {net,IB}/mlx5: Modify QP commands via mlx5 ifc Prior to this patch we assumed that modify QP commands have the same layout. In ConnectX-4 for each QP transition there is a specific command and their layout can vary. e.g: 2err/2rst commands don't have QP context in their layout and before this patch we posted the QP context in those commands. Fortunately the FW only checks the suffix of the commands and executes them, while ignoring all invalid data sent after the valid command layout. This patch removes mlx5_modify_qp_mbox_in and changes mlx5_core_qp_modify to receive the required transition and QP context with opt_param_mask if needed. This way the caller is not required to provide the command inbox layout and it will be generated automatically. mlx5_core_qp_modify will generate the command inbox/outbox layouts according to the requested transition and will fill the requested parameters. Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/qp.c | 22 +--- drivers/net/ethernet/mellanox/mlx5/core/qp.c | 124 +++++++++++++++++-- include/linux/mlx5/qp.h | 20 +-- 3 files changed, 124 insertions(+), 42 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index d22492ff863e..626173736749 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1871,7 +1871,6 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) { struct mlx5_ib_cq *send_cq, *recv_cq; struct mlx5_ib_qp_base *base = &qp->trans_qp.base; - struct mlx5_modify_qp_mbox_in *in; unsigned long flags; int err; @@ -1884,16 +1883,12 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) &qp->raw_packet_qp.rq.base : &qp->trans_qp.base; - in = kzalloc(sizeof(*in), GFP_KERNEL); - if (!in) - return; - if (qp->state != IB_QPS_RESET) { if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET) { mlx5_ib_qp_disable_pagefaults(qp); err = mlx5_core_qp_modify(dev->mdev, - MLX5_CMD_OP_2RST_QP, in, 0, - &base->mqp); + MLX5_CMD_OP_2RST_QP, 0, + NULL, &base->mqp); } else { err = modify_raw_packet_qp(dev, qp, MLX5_CMD_OP_2RST_QP); @@ -1935,8 +1930,6 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) base->mqp.qpn); } - kfree(in); - if (qp->create_type == MLX5_QP_KERNEL) destroy_qp_kernel(dev, qp); else if (qp->create_type == MLX5_QP_USER) @@ -2522,7 +2515,6 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, struct mlx5_ib_qp_base *base = &qp->trans_qp.base; struct mlx5_ib_cq *send_cq, *recv_cq; struct mlx5_qp_context *context; - struct mlx5_modify_qp_mbox_in *in; struct mlx5_ib_pd *pd; enum mlx5_qp_state mlx5_cur, mlx5_new; enum mlx5_qp_optpar optpar; @@ -2531,11 +2523,10 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, int err; u16 op; - in = kzalloc(sizeof(*in), GFP_KERNEL); - if (!in) + context = kzalloc(sizeof(*context), GFP_KERNEL); + if (!context) return -ENOMEM; - context = &in->ctx; err = to_mlx5_st(ibqp->qp_type); if (err < 0) { mlx5_ib_dbg(dev, "unsupported qp type %d\n", ibqp->qp_type); @@ -2700,12 +2691,11 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, op = optab[mlx5_cur][mlx5_new]; optpar = ib_mask_to_mlx5_opt(attr_mask); optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st]; - in->optparam = cpu_to_be32(optpar); if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) err = modify_raw_packet_qp(dev, qp, op); else - err = mlx5_core_qp_modify(dev->mdev, op, in, sqd_event, + err = mlx5_core_qp_modify(dev->mdev, op, optpar, context, &base->mqp); if (err) goto out; @@ -2746,7 +2736,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, } out: - kfree(in); + kfree(context); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index 36d240c9d15f..50875a4e9d58 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -335,21 +335,127 @@ int mlx5_core_destroy_qp(struct mlx5_core_dev *dev, } EXPORT_SYMBOL_GPL(mlx5_core_destroy_qp); -int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 operation, - struct mlx5_modify_qp_mbox_in *in, int sqd_event, +struct mbox_info { + u32 *in; + u32 *out; + int inlen; + int outlen; +}; + +static int mbox_alloc(struct mbox_info *mbox, int inlen, int outlen) +{ + mbox->inlen = inlen; + mbox->outlen = outlen; + mbox->in = kzalloc(mbox->inlen, GFP_KERNEL); + mbox->out = kzalloc(mbox->outlen, GFP_KERNEL); + if (!mbox->in || !mbox->out) { + kfree(mbox->in); + kfree(mbox->out); + return -ENOMEM; + } + + return 0; +} + +static void mbox_free(struct mbox_info *mbox) +{ + kfree(mbox->in); + kfree(mbox->out); +} + +static int modify_qp_mbox_alloc(struct mlx5_core_dev *dev, u16 opcode, int qpn, + u32 opt_param_mask, void *qpc, + struct mbox_info *mbox) +{ + mbox->out = NULL; + mbox->in = NULL; + +#define MBOX_ALLOC(mbox, typ) \ + mbox_alloc(mbox, MLX5_ST_SZ_BYTES(typ##_in), MLX5_ST_SZ_BYTES(typ##_out)) + +#define MOD_QP_IN_SET(typ, in, _opcode, _qpn) \ + MLX5_SET(typ##_in, in, opcode, _opcode); \ + MLX5_SET(typ##_in, in, qpn, _qpn) + +#define MOD_QP_IN_SET_QPC(typ, in, _opcode, _qpn, _opt_p, _qpc) \ + MOD_QP_IN_SET(typ, in, _opcode, _qpn); \ + MLX5_SET(typ##_in, in, opt_param_mask, _opt_p); \ + memcpy(MLX5_ADDR_OF(typ##_in, in, qpc), _qpc, MLX5_ST_SZ_BYTES(qpc)) + + switch (opcode) { + /* 2RST & 2ERR */ + case MLX5_CMD_OP_2RST_QP: + if (MBOX_ALLOC(mbox, qp_2rst)) + return -ENOMEM; + MOD_QP_IN_SET(qp_2rst, mbox->in, opcode, qpn); + break; + case MLX5_CMD_OP_2ERR_QP: + if (MBOX_ALLOC(mbox, qp_2err)) + return -ENOMEM; + MOD_QP_IN_SET(qp_2err, mbox->in, opcode, qpn); + break; + + /* MODIFY with QPC */ + case MLX5_CMD_OP_RST2INIT_QP: + if (MBOX_ALLOC(mbox, rst2init_qp)) + return -ENOMEM; + MOD_QP_IN_SET_QPC(rst2init_qp, mbox->in, opcode, qpn, + opt_param_mask, qpc); + break; + case MLX5_CMD_OP_INIT2RTR_QP: + if (MBOX_ALLOC(mbox, init2rtr_qp)) + return -ENOMEM; + MOD_QP_IN_SET_QPC(init2rtr_qp, mbox->in, opcode, qpn, + opt_param_mask, qpc); + break; + case MLX5_CMD_OP_RTR2RTS_QP: + if (MBOX_ALLOC(mbox, rtr2rts_qp)) + return -ENOMEM; + MOD_QP_IN_SET_QPC(rtr2rts_qp, mbox->in, opcode, qpn, + opt_param_mask, qpc); + break; + case MLX5_CMD_OP_RTS2RTS_QP: + if (MBOX_ALLOC(mbox, rts2rts_qp)) + return -ENOMEM; + MOD_QP_IN_SET_QPC(rts2rts_qp, mbox->in, opcode, qpn, + opt_param_mask, qpc); + break; + case MLX5_CMD_OP_SQERR2RTS_QP: + if (MBOX_ALLOC(mbox, sqerr2rts_qp)) + return -ENOMEM; + MOD_QP_IN_SET_QPC(sqerr2rts_qp, mbox->in, opcode, qpn, + opt_param_mask, qpc); + break; + case MLX5_CMD_OP_INIT2INIT_QP: + if (MBOX_ALLOC(mbox, init2init_qp)) + return -ENOMEM; + MOD_QP_IN_SET_QPC(init2init_qp, mbox->in, opcode, qpn, + opt_param_mask, qpc); + break; + default: + mlx5_core_err(dev, "Unknown transition for modify QP: OP(0x%x) QPN(0x%x)\n", + opcode, qpn); + return -EINVAL; + } + return 0; +} + +int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 opcode, + u32 opt_param_mask, void *qpc, struct mlx5_core_qp *qp) { - struct mlx5_modify_qp_mbox_out out; - int err = 0; + struct mbox_info mbox; + int err; - memset(&out, 0, sizeof(out)); - in->hdr.opcode = cpu_to_be16(operation); - in->qpn = cpu_to_be32(qp->qpn); - err = mlx5_cmd_exec(dev, in, sizeof(*in), &out, sizeof(out)); + err = modify_qp_mbox_alloc(dev, opcode, qp->qpn, + opt_param_mask, qpc, &mbox); if (err) return err; - return mlx5_cmd_status_to_err(&out.hdr); + err = mlx5_cmd_exec(dev, mbox.in, mbox.inlen, mbox.out, mbox.outlen); + err = err ? : mlx5_cmd_status_to_err_v2(mbox.out); + mbox_free(&mbox); + return err; } EXPORT_SYMBOL_GPL(mlx5_core_qp_modify); diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 16e1efecaf66..0aacb2a7480d 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -479,6 +479,7 @@ struct mlx5_qp_path { u8 rmac[6]; }; +/* FIXME: use mlx5_ifc.h qpc */ struct mlx5_qp_context { __be32 flags; __be32 flags_pd; @@ -520,21 +521,6 @@ struct mlx5_qp_context { u8 rsvd1[24]; }; -struct mlx5_modify_qp_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 qpn; - u8 rsvd0[4]; - __be32 optparam; - u8 rsvd1[4]; - struct mlx5_qp_context ctx; - u8 rsvd2[16]; -}; - -struct mlx5_modify_qp_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd0[8]; -}; - static inline struct mlx5_core_qp *__mlx5_qp_lookup(struct mlx5_core_dev *dev, u32 qpn) { return radix_tree_lookup(&dev->priv.qp_table.tree, qpn); @@ -549,8 +535,8 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, u32 *in, int inlen); -int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 operation, - struct mlx5_modify_qp_mbox_in *in, int sqd_event, +int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 opcode, + u32 opt_param_mask, void *qpc, struct mlx5_core_qp *qp); int mlx5_core_destroy_qp(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp); From c4f287c4a6ac489c18afc4acc4353141a8c53070 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 19 Jul 2016 20:17:12 +0300 Subject: [PATCH 0246/1715] net/mlx5: Unify and improve command interface Now as all commands use mlx5 ifc interface, instead of doing two calls for executing a command we embed command status checking into mlx5_cmd_exec to simplify the interface. Also we do here some cleanup for redundant software structures (inbox/outbox) and functions and improved command failure output. Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/main.c | 10 +- drivers/infiniband/hw/mlx5/qp.c | 5 +- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 251 +++++++++--------- drivers/net/ethernet/mellanox/mlx5/core/cq.c | 16 +- .../ethernet/mellanox/mlx5/core/en_ethtool.c | 3 +- .../net/ethernet/mellanox/mlx5/core/en_main.c | 10 +- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 12 +- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 74 ++---- .../net/ethernet/mellanox/mlx5/core/fs_cmd.c | 130 +++------ drivers/net/ethernet/mellanox/mlx5/core/fw.c | 15 +- drivers/net/ethernet/mellanox/mlx5/core/mad.c | 1 - .../net/ethernet/mellanox/mlx5/core/main.c | 80 ++---- drivers/net/ethernet/mellanox/mlx5/core/mcg.c | 10 +- .../ethernet/mellanox/mlx5/core/mlx5_core.h | 13 - drivers/net/ethernet/mellanox/mlx5/core/mr.c | 28 +- .../ethernet/mellanox/mlx5/core/pagealloc.c | 11 +- drivers/net/ethernet/mellanox/mlx5/core/pd.c | 11 +- .../net/ethernet/mellanox/mlx5/core/port.c | 99 ++----- drivers/net/ethernet/mellanox/mlx5/core/qp.c | 26 +- drivers/net/ethernet/mellanox/mlx5/core/rl.c | 11 +- drivers/net/ethernet/mellanox/mlx5/core/srq.c | 49 ++-- .../ethernet/mellanox/mlx5/core/transobj.c | 183 +++++-------- drivers/net/ethernet/mellanox/mlx5/core/uar.c | 11 +- .../net/ethernet/mellanox/mlx5/core/vport.c | 74 ++---- .../net/ethernet/mellanox/mlx5/core/vxlan.c | 29 +- include/linux/mlx5/device.h | 115 -------- include/linux/mlx5/driver.h | 7 +- 27 files changed, 386 insertions(+), 898 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index a84bb766fc62..6fb77d791045 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -233,23 +233,19 @@ static int set_roce_addr(struct ib_device *device, u8 port_num, const union ib_gid *gid, const struct ib_gid_attr *attr) { - struct mlx5_ib_dev *dev = to_mdev(device); - u32 in[MLX5_ST_SZ_DW(set_roce_address_in)]; - u32 out[MLX5_ST_SZ_DW(set_roce_address_out)]; + struct mlx5_ib_dev *dev = to_mdev(device); + u32 in[MLX5_ST_SZ_DW(set_roce_address_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(set_roce_address_out)] = {0}; void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address); enum rdma_link_layer ll = mlx5_ib_port_link_layer(device, port_num); if (ll != IB_LINK_LAYER_ETHERNET) return -EINVAL; - memset(in, 0, sizeof(in)); - ib_gid_to_mlx5_roce_addr(gid, attr, in_addr); MLX5_SET(set_roce_address_in, in, roce_address_index, index); MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS); - - memset(out, 0, sizeof(out)); return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); } diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 626173736749..f3c943f6458e 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1007,13 +1007,10 @@ static int is_connected(enum ib_qp_type qp_type) static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev, struct mlx5_ib_sq *sq, u32 tdn) { - u32 in[MLX5_ST_SZ_DW(create_tis_in)]; + u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {0}; void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); - memset(in, 0, sizeof(in)); - MLX5_SET(tisc, tisc, transport_domain, tdn); - return mlx5_core_create_tis(dev->mdev, in, sizeof(in), &sq->tisn); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 88b05400fbc8..23b95daa9533 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -554,11 +554,124 @@ const char *mlx5_command_str(int command) } } +static const char *cmd_status_str(u8 status) +{ + switch (status) { + case MLX5_CMD_STAT_OK: + return "OK"; + case MLX5_CMD_STAT_INT_ERR: + return "internal error"; + case MLX5_CMD_STAT_BAD_OP_ERR: + return "bad operation"; + case MLX5_CMD_STAT_BAD_PARAM_ERR: + return "bad parameter"; + case MLX5_CMD_STAT_BAD_SYS_STATE_ERR: + return "bad system state"; + case MLX5_CMD_STAT_BAD_RES_ERR: + return "bad resource"; + case MLX5_CMD_STAT_RES_BUSY: + return "resource busy"; + case MLX5_CMD_STAT_LIM_ERR: + return "limits exceeded"; + case MLX5_CMD_STAT_BAD_RES_STATE_ERR: + return "bad resource state"; + case MLX5_CMD_STAT_IX_ERR: + return "bad index"; + case MLX5_CMD_STAT_NO_RES_ERR: + return "no resources"; + case MLX5_CMD_STAT_BAD_INP_LEN_ERR: + return "bad input length"; + case MLX5_CMD_STAT_BAD_OUTP_LEN_ERR: + return "bad output length"; + case MLX5_CMD_STAT_BAD_QP_STATE_ERR: + return "bad QP state"; + case MLX5_CMD_STAT_BAD_PKT_ERR: + return "bad packet (discarded)"; + case MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR: + return "bad size too many outstanding CQEs"; + default: + return "unknown status"; + } +} + +static int cmd_status_to_err(u8 status) +{ + switch (status) { + case MLX5_CMD_STAT_OK: return 0; + case MLX5_CMD_STAT_INT_ERR: return -EIO; + case MLX5_CMD_STAT_BAD_OP_ERR: return -EINVAL; + case MLX5_CMD_STAT_BAD_PARAM_ERR: return -EINVAL; + case MLX5_CMD_STAT_BAD_SYS_STATE_ERR: return -EIO; + case MLX5_CMD_STAT_BAD_RES_ERR: return -EINVAL; + case MLX5_CMD_STAT_RES_BUSY: return -EBUSY; + case MLX5_CMD_STAT_LIM_ERR: return -ENOMEM; + case MLX5_CMD_STAT_BAD_RES_STATE_ERR: return -EINVAL; + case MLX5_CMD_STAT_IX_ERR: return -EINVAL; + case MLX5_CMD_STAT_NO_RES_ERR: return -EAGAIN; + case MLX5_CMD_STAT_BAD_INP_LEN_ERR: return -EIO; + case MLX5_CMD_STAT_BAD_OUTP_LEN_ERR: return -EIO; + case MLX5_CMD_STAT_BAD_QP_STATE_ERR: return -EINVAL; + case MLX5_CMD_STAT_BAD_PKT_ERR: return -EINVAL; + case MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR: return -EINVAL; + default: return -EIO; + } +} + +struct mlx5_ifc_mbox_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_mbox_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x40]; +}; + +void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome) +{ + *status = MLX5_GET(mbox_out, out, status); + *syndrome = MLX5_GET(mbox_out, out, syndrome); +} + +static int mlx5_cmd_check(struct mlx5_core_dev *dev, void *in, void *out) +{ + u32 syndrome; + u8 status; + u16 opcode; + u16 op_mod; + + mlx5_cmd_mbox_status(out, &status, &syndrome); + if (!status) + return 0; + + opcode = MLX5_GET(mbox_in, in, opcode); + op_mod = MLX5_GET(mbox_in, in, op_mod); + + mlx5_core_err(dev, + "%s(0x%x) op_mod(0x%x) failed, status %s(0x%x), syndrome (0x%x)\n", + mlx5_command_str(opcode), + opcode, op_mod, + cmd_status_str(status), + status, + syndrome); + + return cmd_status_to_err(status); +} + static void dump_command(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent, int input) { - u16 op = be16_to_cpu(((struct mlx5_inbox_hdr *)(ent->lay->in))->opcode); struct mlx5_cmd_msg *msg = input ? ent->in : ent->out; + u16 op = MLX5_GET(mbox_in, ent->lay->in, opcode); struct mlx5_cmd_mailbox *next = msg->next; int data_only; u32 offset = 0; @@ -608,9 +721,7 @@ static void dump_command(struct mlx5_core_dev *dev, static u16 msg_to_opcode(struct mlx5_cmd_msg *in) { - struct mlx5_inbox_hdr *hdr = (struct mlx5_inbox_hdr *)(in->first.data); - - return be16_to_cpu(hdr->opcode); + return MLX5_GET(mbox_in, in->first.data, opcode); } static void cb_timeout_handler(struct work_struct *work) @@ -749,16 +860,6 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent) return err; } -static __be32 *get_synd_ptr(struct mlx5_outbox_hdr *out) -{ - return &out->syndrome; -} - -static u8 *get_status_ptr(struct mlx5_outbox_hdr *out) -{ - return &out->status; -} - /* Notes: * 1. Callback functions may not sleep * 2. page queue commands do not support asynchrous completion @@ -804,7 +905,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, goto out_free; ds = ent->ts2 - ent->ts1; - op = be16_to_cpu(((struct mlx5_inbox_hdr *)in->first.data)->opcode); + op = MLX5_GET(mbox_in, in->first.data, opcode); if (op < ARRAY_SIZE(cmd->stats)) { stats = &cmd->stats[op]; spin_lock_irq(&stats->lock); @@ -1305,7 +1406,10 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec) err = mlx5_copy_from_msg(ent->uout, ent->out, ent->uout_size); - err = err ? err : mlx5_cmd_status_to_err_v2(ent->uout); + + err = err ? err : mlx5_cmd_check(dev, + ent->in->first.data, + ent->uout); } mlx5_free_cmd_msg(dev, ent->out); @@ -1359,14 +1463,9 @@ static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size, return msg; } -static u16 opcode_from_in(struct mlx5_inbox_hdr *in) +static int is_manage_pages(void *in) { - return be16_to_cpu(in->opcode); -} - -static int is_manage_pages(struct mlx5_inbox_hdr *in) -{ - return be16_to_cpu(in->opcode) == MLX5_CMD_OP_MANAGE_PAGES; + return MLX5_GET(mbox_in, in, opcode) == MLX5_CMD_OP_MANAGE_PAGES; } static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, @@ -1382,9 +1481,11 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, if (pci_channel_offline(dev->pdev) || dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { - err = mlx5_internal_err_ret_value(dev, opcode_from_in(in), &drv_synd, &status); - *get_synd_ptr(out) = cpu_to_be32(drv_synd); - *get_status_ptr(out) = status; + u16 opcode = MLX5_GET(mbox_in, in, opcode); + + err = mlx5_internal_err_ret_value(dev, opcode, &drv_synd, &status); + MLX5_SET(mbox_out, out, status, status); + MLX5_SET(mbox_out, out, syndrome, drv_synd); return err; } @@ -1436,7 +1537,10 @@ out_in: int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size) { - return cmd_exec(dev, in, in_size, out, out_size, NULL, NULL); + int err; + + err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL); + return err ? : mlx5_cmd_check(dev, in, out); } EXPORT_SYMBOL(mlx5_cmd_exec); @@ -1673,96 +1777,3 @@ void mlx5_cmd_cleanup(struct mlx5_core_dev *dev) pci_pool_destroy(cmd->pool); } EXPORT_SYMBOL(mlx5_cmd_cleanup); - -static const char *cmd_status_str(u8 status) -{ - switch (status) { - case MLX5_CMD_STAT_OK: - return "OK"; - case MLX5_CMD_STAT_INT_ERR: - return "internal error"; - case MLX5_CMD_STAT_BAD_OP_ERR: - return "bad operation"; - case MLX5_CMD_STAT_BAD_PARAM_ERR: - return "bad parameter"; - case MLX5_CMD_STAT_BAD_SYS_STATE_ERR: - return "bad system state"; - case MLX5_CMD_STAT_BAD_RES_ERR: - return "bad resource"; - case MLX5_CMD_STAT_RES_BUSY: - return "resource busy"; - case MLX5_CMD_STAT_LIM_ERR: - return "limits exceeded"; - case MLX5_CMD_STAT_BAD_RES_STATE_ERR: - return "bad resource state"; - case MLX5_CMD_STAT_IX_ERR: - return "bad index"; - case MLX5_CMD_STAT_NO_RES_ERR: - return "no resources"; - case MLX5_CMD_STAT_BAD_INP_LEN_ERR: - return "bad input length"; - case MLX5_CMD_STAT_BAD_OUTP_LEN_ERR: - return "bad output length"; - case MLX5_CMD_STAT_BAD_QP_STATE_ERR: - return "bad QP state"; - case MLX5_CMD_STAT_BAD_PKT_ERR: - return "bad packet (discarded)"; - case MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR: - return "bad size too many outstanding CQEs"; - default: - return "unknown status"; - } -} - -static int cmd_status_to_err(u8 status) -{ - switch (status) { - case MLX5_CMD_STAT_OK: return 0; - case MLX5_CMD_STAT_INT_ERR: return -EIO; - case MLX5_CMD_STAT_BAD_OP_ERR: return -EINVAL; - case MLX5_CMD_STAT_BAD_PARAM_ERR: return -EINVAL; - case MLX5_CMD_STAT_BAD_SYS_STATE_ERR: return -EIO; - case MLX5_CMD_STAT_BAD_RES_ERR: return -EINVAL; - case MLX5_CMD_STAT_RES_BUSY: return -EBUSY; - case MLX5_CMD_STAT_LIM_ERR: return -ENOMEM; - case MLX5_CMD_STAT_BAD_RES_STATE_ERR: return -EINVAL; - case MLX5_CMD_STAT_IX_ERR: return -EINVAL; - case MLX5_CMD_STAT_NO_RES_ERR: return -EAGAIN; - case MLX5_CMD_STAT_BAD_INP_LEN_ERR: return -EIO; - case MLX5_CMD_STAT_BAD_OUTP_LEN_ERR: return -EIO; - case MLX5_CMD_STAT_BAD_QP_STATE_ERR: return -EINVAL; - case MLX5_CMD_STAT_BAD_PKT_ERR: return -EINVAL; - case MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR: return -EINVAL; - default: return -EIO; - } -} - -/* this will be available till all the commands use set/get macros */ -int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr) -{ - if (!hdr->status) - return 0; - - pr_warn("command failed, status %s(0x%x), syndrome 0x%x\n", - cmd_status_str(hdr->status), hdr->status, - be32_to_cpu(hdr->syndrome)); - - return cmd_status_to_err(hdr->status); -} - -int mlx5_cmd_status_to_err_v2(void *ptr) -{ - u32 syndrome; - u8 status; - - status = be32_to_cpu(*(__be32 *)ptr) >> 24; - if (!status) - return 0; - - syndrome = be32_to_cpu(*(__be32 *)(ptr + 4)); - - pr_warn("command failed, status %s(0x%x), syndrome 0x%x\n", - cmd_status_str(status), status, syndrome); - - return cmd_status_to_err(status); -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c index cf02d8a27874..32d4af9b594d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -153,7 +153,6 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, memset(out, 0, sizeof(out)); MLX5_SET(create_cq_in, in, opcode, MLX5_CMD_OP_CREATE_CQ); err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); - err = err ? : mlx5_cmd_status_to_err_v2(out); if (err) return err; @@ -187,9 +186,8 @@ err_cmd: memset(dout, 0, sizeof(dout)); MLX5_SET(destroy_cq_in, din, opcode, MLX5_CMD_OP_DESTROY_CQ); MLX5_SET(destroy_cq_in, din, cqn, cq->cqn); - err = mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout)); - return err ? : mlx5_cmd_status_to_err_v2(out); - + mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout)); + return err; } EXPORT_SYMBOL(mlx5_core_create_cq); @@ -216,7 +214,6 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) MLX5_SET(destroy_cq_in, in, opcode, MLX5_CMD_OP_DESTROY_CQ); MLX5_SET(destroy_cq_in, in, cqn, cq->cqn); err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); - err = err ? : mlx5_cmd_status_to_err_v2(out); if (err) return err; @@ -235,13 +232,10 @@ int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, u32 *out, int outlen) { u32 in[MLX5_ST_SZ_DW(query_cq_in)] = {0}; - int err; MLX5_SET(query_cq_in, in, opcode, MLX5_CMD_OP_QUERY_CQ); MLX5_SET(query_cq_in, in, cqn, cq->cqn); - - err = mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); - return err ? : mlx5_cmd_status_to_err_v2(out); + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); } EXPORT_SYMBOL(mlx5_core_query_cq); @@ -249,11 +243,9 @@ int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, u32 *in, int inlen) { u32 out[MLX5_ST_SZ_DW(modify_cq_out)] = {0}; - int err; MLX5_SET(modify_cq_in, in, opcode, MLX5_CMD_OP_MODIFY_CQ); - err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); - return err ? : mlx5_cmd_status_to_err_v2(out); + return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); } EXPORT_SYMBOL(mlx5_core_modify_cq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 4a3757e60441..9561fca494bf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -726,7 +726,7 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev, { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; - u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0}; u32 eth_proto_cap; u32 eth_proto_admin; u32 eth_proto_lp; @@ -736,7 +736,6 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev, int err; err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1); - if (err) { netdev_err(netdev, "%s: query port ptys failed: %d\n", __func__, err); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 98fb0d8ef2d2..10fa12aa063f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -180,18 +180,15 @@ static void mlx5e_update_vport_counters(struct mlx5e_priv *priv) { int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out); u32 *out = (u32 *)priv->stats.vport.query_vport_out; - u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)]; + u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)] = {0}; struct mlx5_core_dev *mdev = priv->mdev; - memset(in, 0, sizeof(in)); - MLX5_SET(query_vport_counter_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_COUNTER); MLX5_SET(query_vport_counter_in, in, op_mod, 0); MLX5_SET(query_vport_counter_in, in, other_vport, 0); memset(out, 0, outlen); - mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen); } @@ -2022,14 +2019,11 @@ static void mlx5e_close_drop_rq(struct mlx5e_priv *priv) static int mlx5e_create_tis(struct mlx5e_priv *priv, int tc) { struct mlx5_core_dev *mdev = priv->mdev; - u32 in[MLX5_ST_SZ_DW(create_tis_in)]; + u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {0}; void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); - memset(in, 0, sizeof(in)); - MLX5_SET(tisc, tisc, prio, tc << 1); MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.td.tdn); - return mlx5_core_create_tis(mdev, in, sizeof(in), &priv->tisn[tc]); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 71411976ef1c..aaca09002ca6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -88,14 +88,10 @@ static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn) { u32 out[MLX5_ST_SZ_DW(destroy_eq_out)] = {0}; u32 in[MLX5_ST_SZ_DW(destroy_eq_in)] = {0}; - int err; MLX5_SET(destroy_eq_in, in, opcode, MLX5_CMD_OP_DESTROY_EQ); MLX5_SET(destroy_eq_in, in, eq_number, eqn); - - err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); - return err ? : mlx5_cmd_status_to_err_v2(out); - + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } static struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry) @@ -383,7 +379,6 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, eq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); - err = err ? : mlx5_cmd_status_to_err_v2(out); if (err) goto err_in; @@ -547,12 +542,9 @@ int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u32 *out, int outlen) { u32 in[MLX5_ST_SZ_DW(query_eq_in)] = {0}; - int err; MLX5_SET(query_eq_in, in, opcode, MLX5_CMD_OP_QUERY_EQ); MLX5_SET(query_eq_in, in, eq_number, eq->eqn); - - err = mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); - return err ? : mlx5_cmd_status_to_err_v2(out); + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); } EXPORT_SYMBOL_GPL(mlx5_core_eq_query); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index f6d667797ee1..0a364bf2cd71 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -87,13 +87,9 @@ void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports); static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, u32 events_mask) { - int in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)]; - int out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)]; + int in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)] = {0}; + int out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)] = {0}; void *nic_vport_ctx; - int err; - - memset(out, 0, sizeof(out)); - memset(in, 0, sizeof(in)); MLX5_SET(modify_nic_vport_context_in, in, opcode, MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); @@ -116,45 +112,31 @@ static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, MLX5_SET(nic_vport_context, nic_vport_ctx, event_on_promisc_change, 1); - err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); - if (err) - goto ex; - err = mlx5_cmd_status_to_err_v2(out); - if (err) - goto ex; - return 0; -ex: - return err; + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } /* E-Switch vport context HW commands */ static int query_esw_vport_context_cmd(struct mlx5_core_dev *mdev, u32 vport, u32 *out, int outlen) { - u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)]; - - memset(in, 0, sizeof(in)); + u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)] = {0}; MLX5_SET(query_nic_vport_context_in, in, opcode, MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT); - MLX5_SET(query_esw_vport_context_in, in, vport_number, vport); if (vport) MLX5_SET(query_esw_vport_context_in, in, other_vport, 1); - - return mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen); + return mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen); } static int query_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport, u16 *vlan, u8 *qos) { - u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)]; + u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {0}; int err; bool cvlan_strip; bool cvlan_insert; - memset(out, 0, sizeof(out)); - *vlan = 0; *qos = 0; @@ -188,27 +170,20 @@ out: static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport, void *in, int inlen) { - u32 out[MLX5_ST_SZ_DW(modify_esw_vport_context_out)]; - - memset(out, 0, sizeof(out)); - - MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport); - if (vport) - MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1); + u32 out[MLX5_ST_SZ_DW(modify_esw_vport_context_out)] = {0}; MLX5_SET(modify_esw_vport_context_in, in, opcode, MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT); - - return mlx5_cmd_exec_check_status(dev, in, inlen, - out, sizeof(out)); + MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport); + if (vport) + MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1); + return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); } static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport, u16 vlan, u8 qos, bool set) { - u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)]; - - memset(in, 0, sizeof(in)); + u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {0}; if (!MLX5_CAP_ESW(dev, vport_cvlan_strip) || !MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist)) @@ -216,7 +191,6 @@ static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport, esw_debug(dev, "Set Vport[%d] VLAN %d qos %d set=%d\n", vport, vlan, qos, set); - if (set) { MLX5_SET(modify_esw_vport_context_in, in, esw_vport_context.vport_cvlan_strip, 1); @@ -241,13 +215,10 @@ static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport, static int set_l2_table_entry_cmd(struct mlx5_core_dev *dev, u32 index, u8 *mac, u8 vlan_valid, u16 vlan) { - u32 in[MLX5_ST_SZ_DW(set_l2_table_entry_in)]; - u32 out[MLX5_ST_SZ_DW(set_l2_table_entry_out)]; + u32 in[MLX5_ST_SZ_DW(set_l2_table_entry_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(set_l2_table_entry_out)] = {0}; u8 *in_mac_addr; - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); - MLX5_SET(set_l2_table_entry_in, in, opcode, MLX5_CMD_OP_SET_L2_TABLE_ENTRY); MLX5_SET(set_l2_table_entry_in, in, table_index, index); @@ -257,23 +228,18 @@ static int set_l2_table_entry_cmd(struct mlx5_core_dev *dev, u32 index, in_mac_addr = MLX5_ADDR_OF(set_l2_table_entry_in, in, mac_address); ether_addr_copy(&in_mac_addr[2], mac); - return mlx5_cmd_exec_check_status(dev, in, sizeof(in), - out, sizeof(out)); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } static int del_l2_table_entry_cmd(struct mlx5_core_dev *dev, u32 index) { - u32 in[MLX5_ST_SZ_DW(delete_l2_table_entry_in)]; - u32 out[MLX5_ST_SZ_DW(delete_l2_table_entry_out)]; - - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); + u32 in[MLX5_ST_SZ_DW(delete_l2_table_entry_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(delete_l2_table_entry_out)] = {0}; MLX5_SET(delete_l2_table_entry_in, in, opcode, MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY); MLX5_SET(delete_l2_table_entry_in, in, table_index, index); - return mlx5_cmd_exec_check_status(dev, in, sizeof(in), - out, sizeof(out)); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } static int alloc_l2_table_index(struct mlx5_l2_table *l2_table, u32 *ix) @@ -1903,7 +1869,7 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, struct ifla_vf_stats *vf_stats) { int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out); - u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)]; + u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)] = {0}; int err = 0; u32 *out; @@ -1916,8 +1882,6 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, if (!out) return -ENOMEM; - memset(in, 0, sizeof(in)); - MLX5_SET(query_vport_counter_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_COUNTER); MLX5_SET(query_vport_counter_in, in, op_mod, 0); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 9134010e2921..e64499ebf2b5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -41,10 +41,8 @@ int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft) { - u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)]; - u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)]; - - memset(in, 0, sizeof(in)); + u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {0}; MLX5_SET(set_flow_table_root_in, in, opcode, MLX5_CMD_OP_SET_FLOW_TABLE_ROOT); @@ -55,9 +53,7 @@ int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, MLX5_SET(set_flow_table_root_in, in, other_vport, 1); } - memset(out, 0, sizeof(out)); - return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, - sizeof(out)); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, @@ -66,12 +62,10 @@ int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, unsigned int log_size, struct mlx5_flow_table *next_ft, unsigned int *table_id) { - u32 out[MLX5_ST_SZ_DW(create_flow_table_out)]; - u32 in[MLX5_ST_SZ_DW(create_flow_table_in)]; + u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(create_flow_table_in)] = {0}; int err; - memset(in, 0, sizeof(in)); - MLX5_SET(create_flow_table_in, in, opcode, MLX5_CMD_OP_CREATE_FLOW_TABLE); @@ -87,10 +81,7 @@ int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, MLX5_SET(create_flow_table_in, in, other_vport, 1); } - memset(out, 0, sizeof(out)); - err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, - sizeof(out)); - + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (!err) *table_id = MLX5_GET(create_flow_table_out, out, table_id); @@ -100,11 +91,8 @@ int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft) { - u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)]; - u32 out[MLX5_ST_SZ_DW(destroy_flow_table_out)]; - - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); + u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(destroy_flow_table_out)] = {0}; MLX5_SET(destroy_flow_table_in, in, opcode, MLX5_CMD_OP_DESTROY_FLOW_TABLE); @@ -115,19 +103,15 @@ int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev, MLX5_SET(destroy_flow_table_in, in, other_vport, 1); } - return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, - sizeof(out)); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, struct mlx5_flow_table *next_ft) { - u32 in[MLX5_ST_SZ_DW(modify_flow_table_in)]; - u32 out[MLX5_ST_SZ_DW(modify_flow_table_out)]; - - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); + u32 in[MLX5_ST_SZ_DW(modify_flow_table_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(modify_flow_table_out)] = {0}; MLX5_SET(modify_flow_table_in, in, opcode, MLX5_CMD_OP_MODIFY_FLOW_TABLE); @@ -146,8 +130,7 @@ int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev, MLX5_SET(modify_flow_table_in, in, table_miss_mode, 0); } - return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, - sizeof(out)); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev, @@ -155,12 +138,10 @@ int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev, u32 *in, unsigned int *group_id) { + u32 out[MLX5_ST_SZ_DW(create_flow_group_out)] = {0}; int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); - u32 out[MLX5_ST_SZ_DW(create_flow_group_out)]; int err; - memset(out, 0, sizeof(out)); - MLX5_SET(create_flow_group_in, in, opcode, MLX5_CMD_OP_CREATE_FLOW_GROUP); MLX5_SET(create_flow_group_in, in, table_type, ft->type); @@ -170,13 +151,10 @@ int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev, MLX5_SET(create_flow_group_in, in, other_vport, 1); } - err = mlx5_cmd_exec_check_status(dev, in, - inlen, out, - sizeof(out)); + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); if (!err) *group_id = MLX5_GET(create_flow_group_out, out, group_id); - return err; } @@ -184,11 +162,8 @@ int mlx5_cmd_destroy_flow_group(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, unsigned int group_id) { - u32 out[MLX5_ST_SZ_DW(destroy_flow_group_out)]; - u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)]; - - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); + u32 out[MLX5_ST_SZ_DW(destroy_flow_group_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)] = {0}; MLX5_SET(destroy_flow_group_in, in, opcode, MLX5_CMD_OP_DESTROY_FLOW_GROUP); @@ -200,8 +175,7 @@ int mlx5_cmd_destroy_flow_group(struct mlx5_core_dev *dev, MLX5_SET(destroy_flow_group_in, in, other_vport, 1); } - return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, - sizeof(out)); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, @@ -212,7 +186,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, { unsigned int inlen = MLX5_ST_SZ_BYTES(set_fte_in) + fte->dests_size * MLX5_ST_SZ_BYTES(dest_format_struct); - u32 out[MLX5_ST_SZ_DW(set_fte_out)]; + u32 out[MLX5_ST_SZ_DW(set_fte_out)] = {0}; struct mlx5_flow_rule *dst; void *in_flow_context; void *in_match_value; @@ -290,11 +264,8 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, list_size); } - memset(out, 0, sizeof(out)); - err = mlx5_cmd_exec_check_status(dev, in, inlen, out, - sizeof(out)); + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); kvfree(in); - return err; } @@ -303,7 +274,7 @@ int mlx5_cmd_create_fte(struct mlx5_core_dev *dev, unsigned group_id, struct fs_fte *fte) { - return mlx5_cmd_set_fte(dev, 0, 0, ft, group_id, fte); + return mlx5_cmd_set_fte(dev, 0, 0, ft, group_id, fte); } int mlx5_cmd_update_fte(struct mlx5_core_dev *dev, @@ -327,12 +298,8 @@ int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, unsigned int index) { - u32 out[MLX5_ST_SZ_DW(delete_fte_out)]; - u32 in[MLX5_ST_SZ_DW(delete_fte_in)]; - int err; - - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); + u32 out[MLX5_ST_SZ_DW(delete_fte_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(delete_fte_in)] = {0}; MLX5_SET(delete_fte_in, in, opcode, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY); MLX5_SET(delete_fte_in, in, table_type, ft->type); @@ -343,74 +310,55 @@ int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev, MLX5_SET(delete_fte_in, in, other_vport, 1); } - err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); - - return err; + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u16 *id) { - u32 in[MLX5_ST_SZ_DW(alloc_flow_counter_in)]; - u32 out[MLX5_ST_SZ_DW(alloc_flow_counter_out)]; + u32 in[MLX5_ST_SZ_DW(alloc_flow_counter_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(alloc_flow_counter_out)] = {0}; int err; - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); - MLX5_SET(alloc_flow_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_FLOW_COUNTER); - err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, - sizeof(out)); - if (err) - return err; - - *id = MLX5_GET(alloc_flow_counter_out, out, flow_counter_id); - - return 0; + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (!err) + *id = MLX5_GET(alloc_flow_counter_out, out, flow_counter_id); + return err; } int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u16 id) { - u32 in[MLX5_ST_SZ_DW(dealloc_flow_counter_in)]; - u32 out[MLX5_ST_SZ_DW(dealloc_flow_counter_out)]; - - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); + u32 in[MLX5_ST_SZ_DW(dealloc_flow_counter_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(dealloc_flow_counter_out)] = {0}; MLX5_SET(dealloc_flow_counter_in, in, opcode, MLX5_CMD_OP_DEALLOC_FLOW_COUNTER); MLX5_SET(dealloc_flow_counter_in, in, flow_counter_id, id); - - return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, - sizeof(out)); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u16 id, u64 *packets, u64 *bytes) { u32 out[MLX5_ST_SZ_BYTES(query_flow_counter_out) + - MLX5_ST_SZ_BYTES(traffic_counter)]; - u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)]; + MLX5_ST_SZ_BYTES(traffic_counter)] = {0}; + u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)] = {0}; void *stats; int err = 0; - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); - MLX5_SET(query_flow_counter_in, in, opcode, MLX5_CMD_OP_QUERY_FLOW_COUNTER); MLX5_SET(query_flow_counter_in, in, op_mod, 0); MLX5_SET(query_flow_counter_in, in, flow_counter_id, id); - - err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (err) return err; stats = MLX5_ADDR_OF(query_flow_counter_out, out, flow_statistics); *packets = MLX5_GET64(traffic_counter, stats, packets); *bytes = MLX5_GET64(traffic_counter, stats, octets); - return 0; } @@ -448,18 +396,14 @@ void mlx5_cmd_fc_bulk_free(struct mlx5_cmd_fc_bulk *b) int mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b) { - u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)]; - - memset(in, 0, sizeof(in)); + u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)] = {0}; MLX5_SET(query_flow_counter_in, in, opcode, MLX5_CMD_OP_QUERY_FLOW_COUNTER); MLX5_SET(query_flow_counter_in, in, op_mod, 0); MLX5_SET(query_flow_counter_in, in, flow_counter_id, b->id); MLX5_SET(query_flow_counter_in, in, num_of_counters, b->num); - - return mlx5_cmd_exec_check_status(dev, in, sizeof(in), - b->out, b->outlen); + return mlx5_cmd_exec(dev, in, sizeof(in), b->out, b->outlen); } void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index 56bf520d1429..5718aada6605 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -38,13 +38,10 @@ static int mlx5_cmd_query_adapter(struct mlx5_core_dev *dev, u32 *out, int outlen) { - u32 in[MLX5_ST_SZ_DW(query_adapter_in)]; - - memset(in, 0, sizeof(in)); + u32 in[MLX5_ST_SZ_DW(query_adapter_in)] = {0}; MLX5_SET(query_adapter_in, in, opcode, MLX5_CMD_OP_QUERY_ADAPTER); - - return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, outlen); + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); } int mlx5_query_board_id(struct mlx5_core_dev *dev) @@ -164,20 +161,16 @@ int mlx5_cmd_init_hca(struct mlx5_core_dev *dev) { u32 out[MLX5_ST_SZ_DW(init_hca_out)] = {0}; u32 in[MLX5_ST_SZ_DW(init_hca_in)] = {0}; - int err; MLX5_SET(init_hca_in, in, opcode, MLX5_CMD_OP_INIT_HCA); - err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); - return err ? : mlx5_cmd_status_to_err_v2(out); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev) { u32 out[MLX5_ST_SZ_DW(teardown_hca_out)] = {0}; u32 in[MLX5_ST_SZ_DW(teardown_hca_in)] = {0}; - int err; MLX5_SET(teardown_hca_in, in, opcode, MLX5_CMD_OP_TEARDOWN_HCA); - err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); - return err ? : mlx5_cmd_status_to_err_v2(out); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mad.c b/drivers/net/ethernet/mellanox/mlx5/core/mad.c index 13e6afd52a9b..3a3b0005fd2b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mad.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mad.c @@ -60,7 +60,6 @@ int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, memcpy(data, inb, MLX5_FLD_SZ_BYTES(mad_ifc_in, mad)); err = mlx5_cmd_exec(dev, in, inlen, out, outlen); - err = err ? : mlx5_cmd_status_to_err_v2(out); if (err) goto out; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 4f491d43e77d..7fd662236061 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -363,10 +363,6 @@ static int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev, MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); MLX5_SET(query_hca_cap_in, in, op_mod, opmod); err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz); - if (err) - goto query_ex; - - err = mlx5_cmd_status_to_err_v2(out); if (err) { mlx5_core_warn(dev, "QUERY_HCA_CAP : type(%x) opmode(%x) Failed(%d)\n", @@ -409,20 +405,11 @@ int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type) static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz, int opmod) { - u32 out[MLX5_ST_SZ_DW(set_hca_cap_out)]; - int err; - - memset(out, 0, sizeof(out)); + u32 out[MLX5_ST_SZ_DW(set_hca_cap_out)] = {0}; MLX5_SET(set_hca_cap_in, in, opcode, MLX5_CMD_OP_SET_HCA_CAP); MLX5_SET(set_hca_cap_in, in, op_mod, opmod << 1); - err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out)); - if (err) - return err; - - err = mlx5_cmd_status_to_err_v2(out); - - return err; + return mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out)); } static int handle_hca_cap_atomic(struct mlx5_core_dev *dev) @@ -528,37 +515,22 @@ static int set_hca_ctrl(struct mlx5_core_dev *dev) int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id) { - u32 out[MLX5_ST_SZ_DW(enable_hca_out)]; - u32 in[MLX5_ST_SZ_DW(enable_hca_in)]; - int err; + u32 out[MLX5_ST_SZ_DW(enable_hca_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(enable_hca_in)] = {0}; - memset(in, 0, sizeof(in)); MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA); MLX5_SET(enable_hca_in, in, function_id, func_id); - memset(out, 0, sizeof(out)); - - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); - if (err) - return err; - - return mlx5_cmd_status_to_err_v2(out); + return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); } int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id) { - u32 out[MLX5_ST_SZ_DW(disable_hca_out)]; - u32 in[MLX5_ST_SZ_DW(disable_hca_in)]; - int err; + u32 out[MLX5_ST_SZ_DW(disable_hca_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(disable_hca_in)] = {0}; - memset(in, 0, sizeof(in)); MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA); MLX5_SET(disable_hca_in, in, function_id, func_id); - memset(out, 0, sizeof(out)); - err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); - if (err) - return err; - - return mlx5_cmd_status_to_err_v2(out); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } cycle_t mlx5_read_internal_timer(struct mlx5_core_dev *dev) @@ -758,44 +730,40 @@ clean: static int mlx5_core_set_issi(struct mlx5_core_dev *dev) { - u32 query_in[MLX5_ST_SZ_DW(query_issi_in)]; - u32 query_out[MLX5_ST_SZ_DW(query_issi_out)]; - u32 set_in[MLX5_ST_SZ_DW(set_issi_in)]; - u32 set_out[MLX5_ST_SZ_DW(set_issi_out)]; - int err; + u32 query_in[MLX5_ST_SZ_DW(query_issi_in)] = {0}; + u32 query_out[MLX5_ST_SZ_DW(query_issi_out)] = {0}; u32 sup_issi; - - memset(query_in, 0, sizeof(query_in)); - memset(query_out, 0, sizeof(query_out)); + int err; MLX5_SET(query_issi_in, query_in, opcode, MLX5_CMD_OP_QUERY_ISSI); - - err = mlx5_cmd_exec_check_status(dev, query_in, sizeof(query_in), - query_out, sizeof(query_out)); + err = mlx5_cmd_exec(dev, query_in, sizeof(query_in), + query_out, sizeof(query_out)); if (err) { - if (((struct mlx5_outbox_hdr *)query_out)->status == - MLX5_CMD_STAT_BAD_OP_ERR) { + u32 syndrome; + u8 status; + + mlx5_cmd_mbox_status(query_out, &status, &syndrome); + if (status == MLX5_CMD_STAT_BAD_OP_ERR) { pr_debug("Only ISSI 0 is supported\n"); return 0; } - pr_err("failed to query ISSI\n"); + pr_err("failed to query ISSI err(%d)\n", err); return err; } sup_issi = MLX5_GET(query_issi_out, query_out, supported_issi_dw0); if (sup_issi & (1 << 1)) { - memset(set_in, 0, sizeof(set_in)); - memset(set_out, 0, sizeof(set_out)); + u32 set_in[MLX5_ST_SZ_DW(set_issi_in)] = {0}; + u32 set_out[MLX5_ST_SZ_DW(set_issi_out)] = {0}; MLX5_SET(set_issi_in, set_in, opcode, MLX5_CMD_OP_SET_ISSI); MLX5_SET(set_issi_in, set_in, current_issi, 1); - - err = mlx5_cmd_exec_check_status(dev, set_in, sizeof(set_in), - set_out, sizeof(set_out)); + err = mlx5_cmd_exec(dev, set_in, sizeof(set_in), + set_out, sizeof(set_out)); if (err) { - pr_err("failed to set ISSI=1\n"); + pr_err("failed to set ISSI=1 err(%d)\n", err); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mcg.c b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c index 01a1abd88203..ba2b09cc192f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c @@ -42,15 +42,12 @@ int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn) u32 out[MLX5_ST_SZ_DW(attach_to_mcg_out)] = {0}; u32 in[MLX5_ST_SZ_DW(attach_to_mcg_in)] = {0}; void *gid; - int err; MLX5_SET(attach_to_mcg_in, in, opcode, MLX5_CMD_OP_ATTACH_TO_MCG); MLX5_SET(attach_to_mcg_in, in, qpn, qpn); gid = MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid); memcpy(gid, mgid, sizeof(*mgid)); - - err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); - return err ? : mlx5_cmd_status_to_err_v2(out); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } EXPORT_SYMBOL(mlx5_core_attach_mcg); @@ -59,14 +56,11 @@ int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn) u32 out[MLX5_ST_SZ_DW(detach_from_mcg_out)] = {0}; u32 in[MLX5_ST_SZ_DW(detach_from_mcg_in)] = {0}; void *gid; - int err; MLX5_SET(detach_from_mcg_in, in, opcode, MLX5_CMD_OP_DETACH_FROM_MCG); MLX5_SET(detach_from_mcg_in, in, qpn, qpn); gid = MLX5_ADDR_OF(detach_from_mcg_in, in, multicast_gid); memcpy(gid, mgid, sizeof(*mgid)); - - err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); - return err ? : mlx5_cmd_status_to_err_v2(out); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } EXPORT_SYMBOL(mlx5_core_detach_mcg); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 2f86ec6fcf25..14242f17a309 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -75,19 +75,6 @@ enum { MLX5_CMD_TIME, /* print command execution time */ }; -static inline int mlx5_cmd_exec_check_status(struct mlx5_core_dev *dev, u32 *in, - int in_size, u32 *out, - int out_size) -{ - int err; - - err = mlx5_cmd_exec(dev, in, in_size, out, out_size); - if (err) - return err; - - return mlx5_cmd_status_to_err((struct mlx5_outbox_hdr *)out); -} - int mlx5_query_hca_caps(struct mlx5_core_dev *dev); int mlx5_query_board_id(struct mlx5_core_dev *dev); int mlx5_cmd_init_hca(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index 0a7b743caafe..b9736f505bdf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -75,7 +75,6 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, callback, context); err = mlx5_cmd_exec(dev, in, inlen, lout, sizeof(lout)); - err = err ? : mlx5_cmd_status_to_err_v2(lout); if (err) return err; @@ -119,7 +118,6 @@ int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)] = {0}; struct mlx5_core_mkey *deleted_mkey; unsigned long flags; - int err; write_lock_irqsave(&table->lock, flags); deleted_mkey = radix_tree_delete(&table->tree, mlx5_base_mkey(mkey->key)); @@ -132,9 +130,7 @@ int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, MLX5_SET(destroy_mkey_in, in, opcode, MLX5_CMD_OP_DESTROY_MKEY); MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey->key)); - - err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); - return err ? : mlx5_cmd_status_to_err_v2(out); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } EXPORT_SYMBOL(mlx5_core_destroy_mkey); @@ -142,14 +138,11 @@ int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, u32 *out, int outlen) { u32 in[MLX5_ST_SZ_DW(query_mkey_in)] = {0}; - int err; memset(out, 0, outlen); MLX5_SET(query_mkey_in, in, opcode, MLX5_CMD_OP_QUERY_MKEY); MLX5_SET(query_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey->key)); - - err = mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); - return err ? : mlx5_cmd_status_to_err_v2(out); + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); } EXPORT_SYMBOL(mlx5_core_query_mkey); @@ -163,11 +156,9 @@ int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *_ MLX5_SET(query_special_contexts_in, in, opcode, MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS); err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); - err = err ? : mlx5_cmd_status_to_err_v2(out); - if (err) - return err; - - *mkey = MLX5_GET(query_special_contexts_out, out, dump_fill_mkey); + if (!err) + *mkey = MLX5_GET(query_special_contexts_out, out, + dump_fill_mkey); return err; } EXPORT_SYMBOL(mlx5_core_dump_fill_mkey); @@ -197,11 +188,8 @@ int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn, MLX5_SET(create_psv_in, in, num_psv, npsvs); err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); - err = err ? : mlx5_cmd_status_to_err_v2(out); - if (err) { - mlx5_core_err(dev, "create_psv cmd exec failed %d\n", err); + if (err) return err; - } for (i = 0; i < npsvs; i++) sig_index[i] = mlx5_get_psv(out, i); @@ -214,11 +202,9 @@ int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num) { u32 out[MLX5_ST_SZ_DW(destroy_psv_out)] = {0}; u32 in[MLX5_ST_SZ_DW(destroy_psv_in)] = {0}; - int err; MLX5_SET(destroy_psv_in, in, opcode, MLX5_CMD_OP_DESTROY_PSV); MLX5_SET(destroy_psv_in, in, psvn, psv_num); - err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); - return err ? : mlx5_cmd_status_to_err_v2(out); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } EXPORT_SYMBOL(mlx5_core_destroy_psv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index 7bfac21fbfee..673a7c96479a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -144,7 +144,6 @@ static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id, MLX5_QUERY_PAGES_IN_OP_MOD_INIT_PAGES); err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); - err = err ? : mlx5_cmd_status_to_err_v2(out); if (err) return err; @@ -252,8 +251,8 @@ static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id) MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES); MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_CANT_GIVE); MLX5_SET(manage_pages_in, in, function_id, func_id); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); - err = err ? : mlx5_cmd_status_to_err_v2(out); if (err) mlx5_core_warn(dev, "page notify failed func_id(%d) err(%d)\n", func_id, err); @@ -297,7 +296,6 @@ retry: MLX5_SET(manage_pages_in, in, input_num_entries, npages); err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); - err = err ? : mlx5_cmd_status_to_err_v2(out); if (err) { mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n", func_id, npages, err); @@ -331,11 +329,8 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev, u32 npages; u32 i = 0; - if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) { - int err = mlx5_cmd_exec(dev, in, in_size, out, out_size); - - return err ? : mlx5_cmd_status_to_err_v2(out); - } + if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) + return mlx5_cmd_exec(dev, in, in_size, out, out_size); /* No hard feelings, we want our pages back! */ npages = MLX5_GET(manage_pages_in, in, input_num_entries); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pd.c b/drivers/net/ethernet/mellanox/mlx5/core/pd.c index efe452c30a8d..bd830d8d6c5f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pd.c @@ -44,11 +44,8 @@ int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn) MLX5_SET(alloc_pd_in, in, opcode, MLX5_CMD_OP_ALLOC_PD); err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); - err = err ? : mlx5_cmd_status_to_err_v2(out); - if (err) - return err; - - *pdn = MLX5_GET(alloc_pd_out, out, pd); + if (!err) + *pdn = MLX5_GET(alloc_pd_out, out, pd); return err; } EXPORT_SYMBOL(mlx5_core_alloc_pd); @@ -57,11 +54,9 @@ int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn) { u32 out[MLX5_ST_SZ_DW(dealloc_pd_out)] = {0}; u32 in[MLX5_ST_SZ_DW(dealloc_pd_in)] = {0}; - int err; MLX5_SET(dealloc_pd_in, in, opcode, MLX5_CMD_OP_DEALLOC_PD); MLX5_SET(dealloc_pd_in, in, pd, pdn); - err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); - return err ? : mlx5_cmd_status_to_err_v2(out); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } EXPORT_SYMBOL(mlx5_core_dealloc_pd); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index e8324c2a8fc3..2ee28ad8f918 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -61,7 +61,6 @@ int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, MLX5_SET(access_register_in, in, register_id, reg_id); err = mlx5_cmd_exec(dev, in, inlen, out, outlen); - err = err ? : mlx5_cmd_status_to_err_v2(out); if (err) goto out; @@ -102,12 +101,10 @@ EXPORT_SYMBOL_GPL(mlx5_set_port_caps); int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys, int ptys_size, int proto_mask, u8 local_port) { - u32 in[MLX5_ST_SZ_DW(ptys_reg)]; + u32 in[MLX5_ST_SZ_DW(ptys_reg)] = {0}; - memset(in, 0, sizeof(in)); MLX5_SET(ptys_reg, in, local_port, local_port); MLX5_SET(ptys_reg, in, proto_mask, proto_mask); - return mlx5_core_access_reg(dev, in, sizeof(in), ptys, ptys_size, MLX5_REG_PTYS, 0, 0); } @@ -115,13 +112,11 @@ EXPORT_SYMBOL_GPL(mlx5_query_port_ptys); int mlx5_set_port_beacon(struct mlx5_core_dev *dev, u16 beacon_duration) { + u32 in[MLX5_ST_SZ_DW(mlcr_reg)] = {0}; u32 out[MLX5_ST_SZ_DW(mlcr_reg)]; - u32 in[MLX5_ST_SZ_DW(mlcr_reg)]; - memset(in, 0, sizeof(in)); MLX5_SET(mlcr_reg, in, local_port, 1); MLX5_SET(mlcr_reg, in, beacon_duration, beacon_duration); - return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_MLCR, 0, 1); } @@ -244,15 +239,12 @@ EXPORT_SYMBOL_GPL(mlx5_toggle_port_link); int mlx5_set_port_admin_status(struct mlx5_core_dev *dev, enum mlx5_port_status status) { - u32 in[MLX5_ST_SZ_DW(paos_reg)]; + u32 in[MLX5_ST_SZ_DW(paos_reg)] = {0}; u32 out[MLX5_ST_SZ_DW(paos_reg)]; - memset(in, 0, sizeof(in)); - MLX5_SET(paos_reg, in, local_port, 1); MLX5_SET(paos_reg, in, admin_status, status); MLX5_SET(paos_reg, in, ase, 1); - return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_PAOS, 0, 1); } @@ -261,19 +253,15 @@ EXPORT_SYMBOL_GPL(mlx5_set_port_admin_status); int mlx5_query_port_admin_status(struct mlx5_core_dev *dev, enum mlx5_port_status *status) { - u32 in[MLX5_ST_SZ_DW(paos_reg)]; + u32 in[MLX5_ST_SZ_DW(paos_reg)] = {0}; u32 out[MLX5_ST_SZ_DW(paos_reg)]; int err; - memset(in, 0, sizeof(in)); - MLX5_SET(paos_reg, in, local_port, 1); - err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_PAOS, 0, 0); if (err) return err; - *status = MLX5_GET(paos_reg, out, admin_status); return 0; } @@ -282,13 +270,10 @@ EXPORT_SYMBOL_GPL(mlx5_query_port_admin_status); static void mlx5_query_port_mtu(struct mlx5_core_dev *dev, u16 *admin_mtu, u16 *max_mtu, u16 *oper_mtu, u8 port) { - u32 in[MLX5_ST_SZ_DW(pmtu_reg)]; + u32 in[MLX5_ST_SZ_DW(pmtu_reg)] = {0}; u32 out[MLX5_ST_SZ_DW(pmtu_reg)]; - memset(in, 0, sizeof(in)); - MLX5_SET(pmtu_reg, in, local_port, port); - mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_PMTU, 0, 0); @@ -302,14 +287,11 @@ static void mlx5_query_port_mtu(struct mlx5_core_dev *dev, u16 *admin_mtu, int mlx5_set_port_mtu(struct mlx5_core_dev *dev, u16 mtu, u8 port) { - u32 in[MLX5_ST_SZ_DW(pmtu_reg)]; + u32 in[MLX5_ST_SZ_DW(pmtu_reg)] = {0}; u32 out[MLX5_ST_SZ_DW(pmtu_reg)]; - memset(in, 0, sizeof(in)); - MLX5_SET(pmtu_reg, in, admin_mtu, mtu); MLX5_SET(pmtu_reg, in, local_port, port); - return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_PMTU, 0, 1); } @@ -331,15 +313,12 @@ EXPORT_SYMBOL_GPL(mlx5_query_port_oper_mtu); static int mlx5_query_module_num(struct mlx5_core_dev *dev, int *module_num) { + u32 in[MLX5_ST_SZ_DW(pmlp_reg)] = {0}; u32 out[MLX5_ST_SZ_DW(pmlp_reg)]; - u32 in[MLX5_ST_SZ_DW(pmlp_reg)]; int module_mapping; int err; - memset(in, 0, sizeof(in)); - MLX5_SET(pmlp_reg, in, local_port, 1); - err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_PMLP, 0, 0); if (err) @@ -408,11 +387,9 @@ EXPORT_SYMBOL_GPL(mlx5_query_module_eeprom); static int mlx5_query_port_pvlc(struct mlx5_core_dev *dev, u32 *pvlc, int pvlc_size, u8 local_port) { - u32 in[MLX5_ST_SZ_DW(pvlc_reg)]; + u32 in[MLX5_ST_SZ_DW(pvlc_reg)] = {0}; - memset(in, 0, sizeof(in)); MLX5_SET(pvlc_reg, in, local_port, local_port); - return mlx5_core_access_reg(dev, in, sizeof(in), pvlc, pvlc_size, MLX5_REG_PVLC, 0, 0); } @@ -458,10 +435,9 @@ EXPORT_SYMBOL_GPL(mlx5_core_query_ib_ppcnt); int mlx5_set_port_pause(struct mlx5_core_dev *dev, u32 rx_pause, u32 tx_pause) { - u32 in[MLX5_ST_SZ_DW(pfcc_reg)]; + u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0}; u32 out[MLX5_ST_SZ_DW(pfcc_reg)]; - memset(in, 0, sizeof(in)); MLX5_SET(pfcc_reg, in, local_port, 1); MLX5_SET(pfcc_reg, in, pptx, tx_pause); MLX5_SET(pfcc_reg, in, pprx, rx_pause); @@ -474,13 +450,11 @@ EXPORT_SYMBOL_GPL(mlx5_set_port_pause); int mlx5_query_port_pause(struct mlx5_core_dev *dev, u32 *rx_pause, u32 *tx_pause) { - u32 in[MLX5_ST_SZ_DW(pfcc_reg)]; + u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0}; u32 out[MLX5_ST_SZ_DW(pfcc_reg)]; int err; - memset(in, 0, sizeof(in)); MLX5_SET(pfcc_reg, in, local_port, 1); - err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_PFCC, 0, 0); if (err) @@ -498,10 +472,9 @@ EXPORT_SYMBOL_GPL(mlx5_query_port_pause); int mlx5_set_port_pfc(struct mlx5_core_dev *dev, u8 pfc_en_tx, u8 pfc_en_rx) { - u32 in[MLX5_ST_SZ_DW(pfcc_reg)]; + u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0}; u32 out[MLX5_ST_SZ_DW(pfcc_reg)]; - memset(in, 0, sizeof(in)); MLX5_SET(pfcc_reg, in, local_port, 1); MLX5_SET(pfcc_reg, in, pfctx, pfc_en_tx); MLX5_SET(pfcc_reg, in, pfcrx, pfc_en_rx); @@ -515,13 +488,11 @@ EXPORT_SYMBOL_GPL(mlx5_set_port_pfc); int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx, u8 *pfc_en_rx) { - u32 in[MLX5_ST_SZ_DW(pfcc_reg)]; + u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0}; u32 out[MLX5_ST_SZ_DW(pfcc_reg)]; int err; - memset(in, 0, sizeof(in)); MLX5_SET(pfcc_reg, in, local_port, 1); - err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_PFCC, 0, 0); if (err) @@ -565,12 +536,11 @@ int mlx5_max_tc(struct mlx5_core_dev *mdev) int mlx5_set_port_prio_tc(struct mlx5_core_dev *mdev, u8 *prio_tc) { - u32 in[MLX5_ST_SZ_DW(qtct_reg)]; + u32 in[MLX5_ST_SZ_DW(qtct_reg)] = {0}; u32 out[MLX5_ST_SZ_DW(qtct_reg)]; int err; int i; - memset(in, 0, sizeof(in)); for (i = 0; i < 8; i++) { if (prio_tc[i] > mlx5_max_tc(mdev)) return -EINVAL; @@ -615,11 +585,9 @@ static int mlx5_query_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *out, int mlx5_set_port_tc_group(struct mlx5_core_dev *mdev, u8 *tc_group) { - u32 in[MLX5_ST_SZ_DW(qetc_reg)]; + u32 in[MLX5_ST_SZ_DW(qetc_reg)] = {0}; int i; - memset(in, 0, sizeof(in)); - for (i = 0; i <= mlx5_max_tc(mdev); i++) { MLX5_SET(qetc_reg, in, tc_configuration[i].g, 1); MLX5_SET(qetc_reg, in, tc_configuration[i].group, tc_group[i]); @@ -631,11 +599,9 @@ EXPORT_SYMBOL_GPL(mlx5_set_port_tc_group); int mlx5_set_port_tc_bw_alloc(struct mlx5_core_dev *mdev, u8 *tc_bw) { - u32 in[MLX5_ST_SZ_DW(qetc_reg)]; + u32 in[MLX5_ST_SZ_DW(qetc_reg)] = {0}; int i; - memset(in, 0, sizeof(in)); - for (i = 0; i <= mlx5_max_tc(mdev); i++) { MLX5_SET(qetc_reg, in, tc_configuration[i].b, 1); MLX5_SET(qetc_reg, in, tc_configuration[i].bw_allocation, tc_bw[i]); @@ -649,12 +615,10 @@ int mlx5_modify_port_ets_rate_limit(struct mlx5_core_dev *mdev, u8 *max_bw_value, u8 *max_bw_units) { - u32 in[MLX5_ST_SZ_DW(qetc_reg)]; + u32 in[MLX5_ST_SZ_DW(qetc_reg)] = {0}; void *ets_tcn_conf; int i; - memset(in, 0, sizeof(in)); - MLX5_SET(qetc_reg, in, port_number, 1); for (i = 0; i <= mlx5_max_tc(mdev); i++) { @@ -699,35 +663,24 @@ EXPORT_SYMBOL_GPL(mlx5_query_port_ets_rate_limit); int mlx5_set_port_wol(struct mlx5_core_dev *mdev, u8 wol_mode) { - u32 in[MLX5_ST_SZ_DW(set_wol_rol_in)]; - u32 out[MLX5_ST_SZ_DW(set_wol_rol_out)]; - - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); + u32 in[MLX5_ST_SZ_DW(set_wol_rol_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(set_wol_rol_out)] = {0}; MLX5_SET(set_wol_rol_in, in, opcode, MLX5_CMD_OP_SET_WOL_ROL); MLX5_SET(set_wol_rol_in, in, wol_mode_valid, 1); MLX5_SET(set_wol_rol_in, in, wol_mode, wol_mode); - - return mlx5_cmd_exec_check_status(mdev, in, sizeof(in), - out, sizeof(out)); + return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); } EXPORT_SYMBOL_GPL(mlx5_set_port_wol); int mlx5_query_port_wol(struct mlx5_core_dev *mdev, u8 *wol_mode) { - u32 in[MLX5_ST_SZ_DW(query_wol_rol_in)]; - u32 out[MLX5_ST_SZ_DW(query_wol_rol_out)]; + u32 in[MLX5_ST_SZ_DW(query_wol_rol_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(query_wol_rol_out)] = {0}; int err; - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); - MLX5_SET(query_wol_rol_in, in, opcode, MLX5_CMD_OP_QUERY_WOL_ROL); - - err = mlx5_cmd_exec_check_status(mdev, in, sizeof(in), - out, sizeof(out)); - + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); if (!err) *wol_mode = MLX5_GET(query_wol_rol_out, out, wol_mode); @@ -738,11 +691,9 @@ EXPORT_SYMBOL_GPL(mlx5_query_port_wol); static int mlx5_query_ports_check(struct mlx5_core_dev *mdev, u32 *out, int outlen) { - u32 in[MLX5_ST_SZ_DW(pcmr_reg)]; + u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0}; - memset(in, 0, sizeof(in)); MLX5_SET(pcmr_reg, in, local_port, 1); - return mlx5_core_access_reg(mdev, in, sizeof(in), out, outlen, MLX5_REG_PCMR, 0, 0); } @@ -757,12 +708,10 @@ static int mlx5_set_ports_check(struct mlx5_core_dev *mdev, u32 *in, int inlen) int mlx5_set_port_fcs(struct mlx5_core_dev *mdev, u8 enable) { - u32 in[MLX5_ST_SZ_DW(pcmr_reg)]; + u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0}; - memset(in, 0, sizeof(in)); MLX5_SET(pcmr_reg, in, local_port, 1); MLX5_SET(pcmr_reg, in, fcs_chk, enable); - return mlx5_set_ports_check(mdev, in, sizeof(in)); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index 50875a4e9d58..d0a4005fe63a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -281,7 +281,6 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev, MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP); err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); - err = err ? : mlx5_cmd_status_to_err_v2(out); if (err) return err; @@ -307,7 +306,6 @@ err_cmd: MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); MLX5_SET(destroy_qp_in, in, qpn, qp->qpn); mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout)); - mlx5_cmd_status_to_err_v2(dout); return err; } EXPORT_SYMBOL_GPL(mlx5_core_create_qp); @@ -326,7 +324,6 @@ int mlx5_core_destroy_qp(struct mlx5_core_dev *dev, MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); MLX5_SET(destroy_qp_in, in, qpn, qp->qpn); err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); - err = err ? : mlx5_cmd_status_to_err_v2(out); if (err) return err; @@ -453,7 +450,6 @@ int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 opcode, return err; err = mlx5_cmd_exec(dev, mbox.in, mbox.inlen, mbox.out, mbox.outlen); - err = err ? : mlx5_cmd_status_to_err_v2(mbox.out); mbox_free(&mbox); return err; } @@ -478,13 +474,10 @@ int mlx5_core_qp_query(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, u32 *out, int outlen) { u32 in[MLX5_ST_SZ_DW(query_qp_in)] = {0}; - int err; MLX5_SET(query_qp_in, in, opcode, MLX5_CMD_OP_QUERY_QP); MLX5_SET(query_qp_in, in, qpn, qp->qpn); - - err = mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); - return err ? : mlx5_cmd_status_to_err_v2(out); + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); } EXPORT_SYMBOL_GPL(mlx5_core_qp_query); @@ -496,7 +489,6 @@ int mlx5_core_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn) MLX5_SET(alloc_xrcd_in, in, opcode, MLX5_CMD_OP_ALLOC_XRCD); err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); - err = err ? : mlx5_cmd_status_to_err_v2(out); if (!err) *xrcdn = MLX5_GET(alloc_xrcd_out, out, xrcd); return err; @@ -507,12 +499,10 @@ int mlx5_core_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn) { u32 out[MLX5_ST_SZ_DW(dealloc_xrcd_out)] = {0}; u32 in[MLX5_ST_SZ_DW(dealloc_xrcd_in)] = {0}; - int err; MLX5_SET(dealloc_xrcd_in, in, opcode, MLX5_CMD_OP_DEALLOC_XRCD); MLX5_SET(dealloc_xrcd_in, in, xrcd, xrcdn); - err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); - return err ? : mlx5_cmd_status_to_err_v2(out); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } EXPORT_SYMBOL_GPL(mlx5_core_xrcd_dealloc); @@ -522,11 +512,9 @@ int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 qpn, { u32 out[MLX5_ST_SZ_DW(page_fault_resume_out)] = {0}; u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)] = {0}; - int err; MLX5_SET(page_fault_resume_in, in, opcode, MLX5_CMD_OP_PAGE_FAULT_RESUME); - MLX5_SET(page_fault_resume_in, in, qpn, qpn); if (flags & MLX5_PAGE_FAULT_RESUME_REQUESTOR) @@ -538,8 +526,7 @@ int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 qpn, if (error) MLX5_SET(page_fault_resume_in, in, error, 1); - err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); - return err ? : mlx5_cmd_status_to_err_v2(out); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume); #endif @@ -615,7 +602,7 @@ int mlx5_core_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id) int err; MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER); - err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (!err) *counter_id = MLX5_GET(alloc_q_counter_out, out, counter_set_id); @@ -631,8 +618,7 @@ int mlx5_core_dealloc_q_counter(struct mlx5_core_dev *dev, u16 counter_id) MLX5_SET(dealloc_q_counter_in, in, opcode, MLX5_CMD_OP_DEALLOC_Q_COUNTER); MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter_id); - return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, - sizeof(out)); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } EXPORT_SYMBOL_GPL(mlx5_core_dealloc_q_counter); @@ -644,7 +630,7 @@ int mlx5_core_query_q_counter(struct mlx5_core_dev *dev, u16 counter_id, MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER); MLX5_SET(query_q_counter_in, in, clear, reset); MLX5_SET(query_q_counter_in, in, counter_set_id, counter_id); - return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, out_size); + return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size); } EXPORT_SYMBOL_GPL(mlx5_core_query_q_counter); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c index c07c28bd3d55..104902a93a0b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c @@ -63,19 +63,14 @@ static struct mlx5_rl_entry *find_rl_entry(struct mlx5_rl_table *table, static int mlx5_set_rate_limit_cmd(struct mlx5_core_dev *dev, u32 rate, u16 index) { - u32 in[MLX5_ST_SZ_DW(set_rate_limit_in)]; - u32 out[MLX5_ST_SZ_DW(set_rate_limit_out)]; - - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); + u32 in[MLX5_ST_SZ_DW(set_rate_limit_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(set_rate_limit_out)] = {0}; MLX5_SET(set_rate_limit_in, in, opcode, MLX5_CMD_OP_SET_RATE_LIMIT); MLX5_SET(set_rate_limit_in, in, rate_limit_index, index); MLX5_SET(set_rate_limit_in, in, rate_limit, rate); - - return mlx5_cmd_exec_check_status(dev, in, sizeof(in), - out, sizeof(out)); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } bool mlx5_rl_is_in_range(struct mlx5_core_dev *dev, u32 rate) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c index c07f4d01b70e..3099630015d7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/srq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/srq.c @@ -175,8 +175,8 @@ static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, MLX5_SET(create_srq_in, create_in, opcode, MLX5_CMD_OP_CREATE_SRQ); - err = mlx5_cmd_exec_check_status(dev, create_in, inlen, create_out, - sizeof(create_out)); + err = mlx5_cmd_exec(dev, create_in, inlen, create_out, + sizeof(create_out)); kvfree(create_in); if (!err) srq->srqn = MLX5_GET(create_srq_out, create_out, srqn); @@ -194,8 +194,8 @@ static int destroy_srq_cmd(struct mlx5_core_dev *dev, MLX5_CMD_OP_DESTROY_SRQ); MLX5_SET(destroy_srq_in, srq_in, srqn, srq->srqn); - return mlx5_cmd_exec_check_status(dev, srq_in, sizeof(srq_in), - srq_out, sizeof(srq_out)); + return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in), + srq_out, sizeof(srq_out)); } static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, @@ -209,8 +209,8 @@ static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, MLX5_SET(arm_xrc_srq_in, srq_in, xrc_srqn, srq->srqn); MLX5_SET(arm_xrc_srq_in, srq_in, lwm, lwm); - return mlx5_cmd_exec_check_status(dev, srq_in, sizeof(srq_in), - srq_out, sizeof(srq_out)); + return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in), + srq_out, sizeof(srq_out)); } static int query_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, @@ -228,9 +228,8 @@ static int query_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, MLX5_SET(query_srq_in, srq_in, opcode, MLX5_CMD_OP_QUERY_SRQ); MLX5_SET(query_srq_in, srq_in, srqn, srq->srqn); - err = mlx5_cmd_exec_check_status(dev, srq_in, sizeof(srq_in), - srq_out, - MLX5_ST_SZ_BYTES(query_srq_out)); + err = mlx5_cmd_exec(dev, srq_in, sizeof(srq_in), + srq_out, MLX5_ST_SZ_BYTES(query_srq_out)); if (err) goto out; @@ -272,8 +271,8 @@ static int create_xrc_srq_cmd(struct mlx5_core_dev *dev, MLX5_CMD_OP_CREATE_XRC_SRQ); memset(create_out, 0, sizeof(create_out)); - err = mlx5_cmd_exec_check_status(dev, create_in, inlen, create_out, - sizeof(create_out)); + err = mlx5_cmd_exec(dev, create_in, inlen, create_out, + sizeof(create_out)); if (err) goto out; @@ -286,36 +285,30 @@ out: static int destroy_xrc_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq) { - u32 xrcsrq_in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)]; - u32 xrcsrq_out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)]; - - memset(xrcsrq_in, 0, sizeof(xrcsrq_in)); - memset(xrcsrq_out, 0, sizeof(xrcsrq_out)); + u32 xrcsrq_in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)] = {0}; + u32 xrcsrq_out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)] = {0}; MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, opcode, MLX5_CMD_OP_DESTROY_XRC_SRQ); MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); - return mlx5_cmd_exec_check_status(dev, xrcsrq_in, sizeof(xrcsrq_in), - xrcsrq_out, sizeof(xrcsrq_out)); + return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), + xrcsrq_out, sizeof(xrcsrq_out)); } static int arm_xrc_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, u16 lwm) { - u32 xrcsrq_in[MLX5_ST_SZ_DW(arm_xrc_srq_in)]; - u32 xrcsrq_out[MLX5_ST_SZ_DW(arm_xrc_srq_out)]; - - memset(xrcsrq_in, 0, sizeof(xrcsrq_in)); - memset(xrcsrq_out, 0, sizeof(xrcsrq_out)); + u32 xrcsrq_in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0}; + u32 xrcsrq_out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0}; MLX5_SET(arm_xrc_srq_in, xrcsrq_in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ); MLX5_SET(arm_xrc_srq_in, xrcsrq_in, op_mod, MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ); MLX5_SET(arm_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); MLX5_SET(arm_xrc_srq_in, xrcsrq_in, lwm, lwm); - return mlx5_cmd_exec_check_status(dev, xrcsrq_in, sizeof(xrcsrq_in), - xrcsrq_out, sizeof(xrcsrq_out)); + return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), + xrcsrq_out, sizeof(xrcsrq_out)); } static int query_xrc_srq_cmd(struct mlx5_core_dev *dev, @@ -335,9 +328,9 @@ static int query_xrc_srq_cmd(struct mlx5_core_dev *dev, MLX5_SET(query_xrc_srq_in, xrcsrq_in, opcode, MLX5_CMD_OP_QUERY_XRC_SRQ); MLX5_SET(query_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); - err = mlx5_cmd_exec_check_status(dev, xrcsrq_in, sizeof(xrcsrq_in), - xrcsrq_out, - MLX5_ST_SZ_BYTES(query_xrc_srq_out)); + + err = mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), xrcsrq_out, + MLX5_ST_SZ_BYTES(query_xrc_srq_out)); if (err) goto out; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c index 28274a6fbafe..a00ff49eec18 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c @@ -36,17 +36,14 @@ int mlx5_core_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn) { - u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)]; - u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)]; + u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)] = {0}; int err; - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); - MLX5_SET(alloc_transport_domain_in, in, opcode, MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN); - err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (!err) *tdn = MLX5_GET(alloc_transport_domain_out, out, transport_domain); @@ -57,29 +54,23 @@ EXPORT_SYMBOL(mlx5_core_alloc_transport_domain); void mlx5_core_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn) { - u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)]; - u32 out[MLX5_ST_SZ_DW(dealloc_transport_domain_out)]; - - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); + u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(dealloc_transport_domain_out)] = {0}; MLX5_SET(dealloc_transport_domain_in, in, opcode, MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN); MLX5_SET(dealloc_transport_domain_in, in, transport_domain, tdn); - - mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); + mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } EXPORT_SYMBOL(mlx5_core_dealloc_transport_domain); int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqn) { - u32 out[MLX5_ST_SZ_DW(create_rq_out)]; + u32 out[MLX5_ST_SZ_DW(create_rq_out)] = {0}; int err; MLX5_SET(create_rq_in, in, opcode, MLX5_CMD_OP_CREATE_RQ); - - memset(out, 0, sizeof(out)); - err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); if (!err) *rqn = MLX5_GET(create_rq_out, out, rqn); @@ -95,21 +86,18 @@ int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen) MLX5_SET(modify_rq_in, in, opcode, MLX5_CMD_OP_MODIFY_RQ); memset(out, 0, sizeof(out)); - return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); + return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); } EXPORT_SYMBOL(mlx5_core_modify_rq); void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn) { - u32 in[MLX5_ST_SZ_DW(destroy_rq_in)]; - u32 out[MLX5_ST_SZ_DW(destroy_rq_out)]; - - memset(in, 0, sizeof(in)); + u32 in[MLX5_ST_SZ_DW(destroy_rq_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(destroy_rq_out)] = {0}; MLX5_SET(destroy_rq_in, in, opcode, MLX5_CMD_OP_DESTROY_RQ); MLX5_SET(destroy_rq_in, in, rqn, rqn); - - mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); + mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } EXPORT_SYMBOL(mlx5_core_destroy_rq); @@ -121,19 +109,17 @@ int mlx5_core_query_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *out) MLX5_SET(query_rq_in, in, opcode, MLX5_CMD_OP_QUERY_RQ); MLX5_SET(query_rq_in, in, rqn, rqn); - return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, outlen); + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); } EXPORT_SYMBOL(mlx5_core_query_rq); int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *sqn) { - u32 out[MLX5_ST_SZ_DW(create_sq_out)]; + u32 out[MLX5_ST_SZ_DW(create_sq_out)] = {0}; int err; MLX5_SET(create_sq_in, in, opcode, MLX5_CMD_OP_CREATE_SQ); - - memset(out, 0, sizeof(out)); - err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); if (!err) *sqn = MLX5_GET(create_sq_out, out, sqn); @@ -142,27 +128,22 @@ int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *sqn) int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in, int inlen) { - u32 out[MLX5_ST_SZ_DW(modify_sq_out)]; + u32 out[MLX5_ST_SZ_DW(modify_sq_out)] = {0}; MLX5_SET(modify_sq_in, in, sqn, sqn); MLX5_SET(modify_sq_in, in, opcode, MLX5_CMD_OP_MODIFY_SQ); - - memset(out, 0, sizeof(out)); - return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); + return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); } EXPORT_SYMBOL(mlx5_core_modify_sq); void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn) { - u32 in[MLX5_ST_SZ_DW(destroy_sq_in)]; - u32 out[MLX5_ST_SZ_DW(destroy_sq_out)]; - - memset(in, 0, sizeof(in)); + u32 in[MLX5_ST_SZ_DW(destroy_sq_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(destroy_sq_out)] = {0}; MLX5_SET(destroy_sq_in, in, opcode, MLX5_CMD_OP_DESTROY_SQ); MLX5_SET(destroy_sq_in, in, sqn, sqn); - - mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); + mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } int mlx5_core_query_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *out) @@ -172,21 +153,20 @@ int mlx5_core_query_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *out) MLX5_SET(query_sq_in, in, opcode, MLX5_CMD_OP_QUERY_SQ); MLX5_SET(query_sq_in, in, sqn, sqn); - - return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, outlen); + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); } EXPORT_SYMBOL(mlx5_core_query_sq); int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *tirn) { - u32 out[MLX5_ST_SZ_DW(create_tir_out)]; + u32 out[MLX5_ST_SZ_DW(create_tir_out)] = {0}; int err; MLX5_SET(create_tir_in, in, opcode, MLX5_CMD_OP_CREATE_TIR); memset(out, 0, sizeof(out)); - err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); if (!err) *tirn = MLX5_GET(create_tir_out, out, tirn); @@ -197,39 +177,32 @@ EXPORT_SYMBOL(mlx5_core_create_tir); int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in, int inlen) { - u32 out[MLX5_ST_SZ_DW(modify_tir_out)]; + u32 out[MLX5_ST_SZ_DW(modify_tir_out)] = {0}; MLX5_SET(modify_tir_in, in, tirn, tirn); MLX5_SET(modify_tir_in, in, opcode, MLX5_CMD_OP_MODIFY_TIR); - - memset(out, 0, sizeof(out)); - return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); + return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); } void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn) { - u32 in[MLX5_ST_SZ_DW(destroy_tir_in)]; - u32 out[MLX5_ST_SZ_DW(destroy_tir_out)]; - - memset(in, 0, sizeof(in)); + u32 in[MLX5_ST_SZ_DW(destroy_tir_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(destroy_tir_out)] = {0}; MLX5_SET(destroy_tir_in, in, opcode, MLX5_CMD_OP_DESTROY_TIR); MLX5_SET(destroy_tir_in, in, tirn, tirn); - - mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); + mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } EXPORT_SYMBOL(mlx5_core_destroy_tir); int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *tisn) { - u32 out[MLX5_ST_SZ_DW(create_tis_out)]; + u32 out[MLX5_ST_SZ_DW(create_tis_out)] = {0}; int err; MLX5_SET(create_tis_in, in, opcode, MLX5_CMD_OP_CREATE_TIS); - - memset(out, 0, sizeof(out)); - err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); if (!err) *tisn = MLX5_GET(create_tis_out, out, tisn); @@ -245,34 +218,29 @@ int mlx5_core_modify_tis(struct mlx5_core_dev *dev, u32 tisn, u32 *in, MLX5_SET(modify_tis_in, in, tisn, tisn); MLX5_SET(modify_tis_in, in, opcode, MLX5_CMD_OP_MODIFY_TIS); - return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); + return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); } EXPORT_SYMBOL(mlx5_core_modify_tis); void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn) { - u32 in[MLX5_ST_SZ_DW(destroy_tis_in)]; - u32 out[MLX5_ST_SZ_DW(destroy_tis_out)]; - - memset(in, 0, sizeof(in)); + u32 in[MLX5_ST_SZ_DW(destroy_tis_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(destroy_tis_out)] = {0}; MLX5_SET(destroy_tis_in, in, opcode, MLX5_CMD_OP_DESTROY_TIS); MLX5_SET(destroy_tis_in, in, tisn, tisn); - - mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); + mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } EXPORT_SYMBOL(mlx5_core_destroy_tis); int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rmpn) { - u32 out[MLX5_ST_SZ_DW(create_rmp_out)]; + u32 out[MLX5_ST_SZ_DW(create_rmp_out)] = {0}; int err; MLX5_SET(create_rmp_in, in, opcode, MLX5_CMD_OP_CREATE_RMP); - - memset(out, 0, sizeof(out)); - err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); if (!err) *rmpn = MLX5_GET(create_rmp_out, out, rmpn); @@ -281,38 +249,31 @@ int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen, int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen) { - u32 out[MLX5_ST_SZ_DW(modify_rmp_out)]; + u32 out[MLX5_ST_SZ_DW(modify_rmp_out)] = {0}; MLX5_SET(modify_rmp_in, in, opcode, MLX5_CMD_OP_MODIFY_RMP); - - memset(out, 0, sizeof(out)); - return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); + return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); } int mlx5_core_destroy_rmp(struct mlx5_core_dev *dev, u32 rmpn) { - u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)]; - u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)]; - - memset(in, 0, sizeof(in)); + u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {0}; MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP); MLX5_SET(destroy_rmp_in, in, rmpn, rmpn); - - return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out) { - u32 in[MLX5_ST_SZ_DW(query_rmp_in)]; + u32 in[MLX5_ST_SZ_DW(query_rmp_in)] = {0}; int outlen = MLX5_ST_SZ_BYTES(query_rmp_out); - memset(in, 0, sizeof(in)); MLX5_SET(query_rmp_in, in, opcode, MLX5_CMD_OP_QUERY_RMP); MLX5_SET(query_rmp_in, in, rmpn, rmpn); - - return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, outlen); + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); } int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm) @@ -347,13 +308,11 @@ int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm) int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *xsrqn) { - u32 out[MLX5_ST_SZ_DW(create_xrc_srq_out)]; + u32 out[MLX5_ST_SZ_DW(create_xrc_srq_out)] = {0}; int err; MLX5_SET(create_xrc_srq_in, in, opcode, MLX5_CMD_OP_CREATE_XRC_SRQ); - - memset(out, 0, sizeof(out)); - err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); if (!err) *xsrqn = MLX5_GET(create_xrc_srq_out, out, xrc_srqn); @@ -362,33 +321,25 @@ int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen, int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 xsrqn) { - u32 in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)]; - u32 out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)]; - - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); + u32 in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)] = {0}; MLX5_SET(destroy_xrc_srq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRC_SRQ); MLX5_SET(destroy_xrc_srq_in, in, xrc_srqn, xsrqn); - - return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, - sizeof(out)); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } int mlx5_core_query_xsrq(struct mlx5_core_dev *dev, u32 xsrqn, u32 *out) { - u32 in[MLX5_ST_SZ_DW(query_xrc_srq_in)]; + u32 in[MLX5_ST_SZ_DW(query_xrc_srq_in)] = {0}; void *srqc; void *xrc_srqc; int err; - memset(in, 0, sizeof(in)); MLX5_SET(query_xrc_srq_in, in, opcode, MLX5_CMD_OP_QUERY_XRC_SRQ); MLX5_SET(query_xrc_srq_in, in, xrc_srqn, xsrqn); - - err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), - out, - MLX5_ST_SZ_BYTES(query_xrc_srq_out)); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, + MLX5_ST_SZ_BYTES(query_xrc_srq_out)); if (!err) { xrc_srqc = MLX5_ADDR_OF(query_xrc_srq_out, out, xrc_srq_context_entry); @@ -401,32 +352,25 @@ int mlx5_core_query_xsrq(struct mlx5_core_dev *dev, u32 xsrqn, u32 *out) int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 xsrqn, u16 lwm) { - u32 in[MLX5_ST_SZ_DW(arm_xrc_srq_in)]; - u32 out[MLX5_ST_SZ_DW(arm_xrc_srq_out)]; - - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); + u32 in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0}; MLX5_SET(arm_xrc_srq_in, in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ); MLX5_SET(arm_xrc_srq_in, in, xrc_srqn, xsrqn); MLX5_SET(arm_xrc_srq_in, in, lwm, lwm); MLX5_SET(arm_xrc_srq_in, in, op_mod, MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ); - - return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, - sizeof(out)); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqtn) { - u32 out[MLX5_ST_SZ_DW(create_rqt_out)]; + u32 out[MLX5_ST_SZ_DW(create_rqt_out)] = {0}; int err; MLX5_SET(create_rqt_in, in, opcode, MLX5_CMD_OP_CREATE_RQT); - - memset(out, 0, sizeof(out)); - err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); if (!err) *rqtn = MLX5_GET(create_rqt_out, out, rqtn); @@ -437,25 +381,20 @@ EXPORT_SYMBOL(mlx5_core_create_rqt); int mlx5_core_modify_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 *in, int inlen) { - u32 out[MLX5_ST_SZ_DW(modify_rqt_out)]; + u32 out[MLX5_ST_SZ_DW(modify_rqt_out)] = {0}; MLX5_SET(modify_rqt_in, in, rqtn, rqtn); MLX5_SET(modify_rqt_in, in, opcode, MLX5_CMD_OP_MODIFY_RQT); - - memset(out, 0, sizeof(out)); - return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); + return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); } void mlx5_core_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn) { - u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)]; - u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)]; - - memset(in, 0, sizeof(in)); + u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)] = {0}; MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT); MLX5_SET(destroy_rqt_in, in, rqtn, rqtn); - - mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); + mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } EXPORT_SYMBOL(mlx5_core_destroy_rqt); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c index d0a0e0bc77e4..ab0b896621a0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/uar.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c @@ -50,11 +50,8 @@ int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn) MLX5_SET(alloc_uar_in, in, opcode, MLX5_CMD_OP_ALLOC_UAR); err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); - err = err ? : mlx5_cmd_status_to_err_v2(out); - if (err) - return err; - - *uarn = MLX5_GET(alloc_uar_out, out, uar); + if (!err) + *uarn = MLX5_GET(alloc_uar_out, out, uar); return err; } EXPORT_SYMBOL(mlx5_cmd_alloc_uar); @@ -63,12 +60,10 @@ int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn) { u32 out[MLX5_ST_SZ_DW(dealloc_uar_out)] = {0}; u32 in[MLX5_ST_SZ_DW(dealloc_uar_in)] = {0}; - int err; MLX5_SET(dealloc_uar_in, in, opcode, MLX5_CMD_OP_DEALLOC_UAR); MLX5_SET(dealloc_uar_in, in, uar, uarn); - err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); - return err ? : mlx5_cmd_status_to_err_v2(out); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } EXPORT_SYMBOL(mlx5_cmd_free_uar); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 21365d06982b..3593bf78caf4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -39,10 +39,7 @@ static int _mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport, u32 *out, int outlen) { - int err; - u32 in[MLX5_ST_SZ_DW(query_vport_state_in)]; - - memset(in, 0, sizeof(in)); + u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {0}; MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE); @@ -51,11 +48,7 @@ static int _mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, if (vport) MLX5_SET(query_vport_state_in, in, other_vport, 1); - err = mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen); - if (err) - mlx5_core_warn(mdev, "MLX5_CMD_OP_QUERY_VPORT_STATE failed\n"); - - return err; + return mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen); } u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport) @@ -81,58 +74,43 @@ EXPORT_SYMBOL_GPL(mlx5_query_vport_admin_state); int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport, u8 state) { - u32 in[MLX5_ST_SZ_DW(modify_vport_state_in)]; - u32 out[MLX5_ST_SZ_DW(modify_vport_state_out)]; - int err; - - memset(in, 0, sizeof(in)); + u32 in[MLX5_ST_SZ_DW(modify_vport_state_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(modify_vport_state_out)] = {0}; MLX5_SET(modify_vport_state_in, in, opcode, MLX5_CMD_OP_MODIFY_VPORT_STATE); MLX5_SET(modify_vport_state_in, in, op_mod, opmod); MLX5_SET(modify_vport_state_in, in, vport_number, vport); - if (vport) MLX5_SET(modify_vport_state_in, in, other_vport, 1); - MLX5_SET(modify_vport_state_in, in, admin_state, state); - err = mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, - sizeof(out)); - if (err) - mlx5_core_warn(mdev, "MLX5_CMD_OP_MODIFY_VPORT_STATE failed\n"); - - return err; + return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); } EXPORT_SYMBOL_GPL(mlx5_modify_vport_admin_state); static int mlx5_query_nic_vport_context(struct mlx5_core_dev *mdev, u16 vport, u32 *out, int outlen) { - u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)]; - - memset(in, 0, sizeof(in)); + u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)] = {0}; MLX5_SET(query_nic_vport_context_in, in, opcode, MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); - MLX5_SET(query_nic_vport_context_in, in, vport_number, vport); if (vport) MLX5_SET(query_nic_vport_context_in, in, other_vport, 1); - return mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen); + return mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen); } static int mlx5_modify_nic_vport_context(struct mlx5_core_dev *mdev, void *in, int inlen) { - u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)]; + u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)] = {0}; MLX5_SET(modify_nic_vport_context_in, in, opcode, MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); - - memset(out, 0, sizeof(out)); - return mlx5_cmd_exec_check_status(mdev, in, inlen, out, sizeof(out)); + return mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); } void mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev, @@ -254,7 +232,7 @@ int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev, u8 addr_list[][ETH_ALEN], int *list_size) { - u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)]; + u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)] = {0}; void *nic_vport_ctx; int max_list_size; int req_list_size; @@ -278,7 +256,6 @@ int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev, out_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) + req_list_size * MLX5_ST_SZ_BYTES(mac_address_layout); - memset(in, 0, sizeof(in)); out = kzalloc(out_sz, GFP_KERNEL); if (!out) return -ENOMEM; @@ -291,7 +268,7 @@ int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev, if (vport) MLX5_SET(query_nic_vport_context_in, in, other_vport, 1); - err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, out_sz); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz); if (err) goto out; @@ -361,7 +338,7 @@ int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev, ether_addr_copy(curr_mac, addr_list[i]); } - err = mlx5_cmd_exec_check_status(dev, in, in_sz, out, sizeof(out)); + err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out)); kfree(in); return err; } @@ -406,7 +383,7 @@ int mlx5_query_nic_vport_vlans(struct mlx5_core_dev *dev, if (vport) MLX5_SET(query_nic_vport_context_in, in, other_vport, 1); - err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, out_sz); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz); if (err) goto out; @@ -473,7 +450,7 @@ int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev, MLX5_SET(vlan_layout, vlan_addr, vlan, vlans[i]); } - err = mlx5_cmd_exec_check_status(dev, in, in_sz, out, sizeof(out)); + err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out)); kfree(in); return err; } @@ -631,10 +608,6 @@ int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport, if (err) goto out; - err = mlx5_cmd_status_to_err_v2(out); - if (err) - goto out; - tmp = out + MLX5_ST_SZ_BYTES(query_hca_vport_gid_out); gid->global.subnet_prefix = tmp->global.subnet_prefix; gid->global.interface_id = tmp->global.interface_id; @@ -700,10 +673,6 @@ int mlx5_query_hca_vport_pkey(struct mlx5_core_dev *dev, u8 other_vport, if (err) goto out; - err = mlx5_cmd_status_to_err_v2(out); - if (err) - goto out; - pkarr = MLX5_ADDR_OF(query_hca_vport_pkey_out, out, pkey); for (i = 0; i < nout; i++, pkey++, pkarr += MLX5_ST_SZ_BYTES(pkey)) *pkey = MLX5_GET_PR(pkey, pkarr, pkey); @@ -721,7 +690,7 @@ int mlx5_query_hca_vport_context(struct mlx5_core_dev *dev, struct mlx5_hca_vport_context *rep) { int out_sz = MLX5_ST_SZ_BYTES(query_hca_vport_context_out); - int in[MLX5_ST_SZ_DW(query_hca_vport_context_in)]; + int in[MLX5_ST_SZ_DW(query_hca_vport_context_in)] = {0}; int is_group_manager; void *out; void *ctx; @@ -729,7 +698,6 @@ int mlx5_query_hca_vport_context(struct mlx5_core_dev *dev, is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager); - memset(in, 0, sizeof(in)); out = kzalloc(out_sz, GFP_KERNEL); if (!out) return -ENOMEM; @@ -750,9 +718,6 @@ int mlx5_query_hca_vport_context(struct mlx5_core_dev *dev, MLX5_SET(query_hca_vport_context_in, in, port_num, port_num); err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz); - if (err) - goto ex; - err = mlx5_cmd_status_to_err_v2(out); if (err) goto ex; @@ -969,10 +934,6 @@ int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport, MLX5_SET(query_vport_counter_in, in, port_num, port_num); err = mlx5_cmd_exec(dev, in, in_sz, out, out_sz); - if (err) - goto free; - err = mlx5_cmd_status_to_err_v2(out); - free: kvfree(in); return err; @@ -1035,11 +996,6 @@ int mlx5_core_modify_hca_vport_context(struct mlx5_core_dev *dev, MLX5_SET(hca_vport_context, ctx, qkey_violation_counter, req->qkey_violation_counter); MLX5_SET(hca_vport_context, ctx, pkey_violation_counter, req->pkey_violation_counter); err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out)); - if (err) - goto ex; - - err = mlx5_cmd_status_to_err_v2(out); - ex: kfree(in); return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c index e25a73ed2981..07a9ba6cfc70 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c @@ -46,41 +46,24 @@ void mlx5e_vxlan_init(struct mlx5e_priv *priv) static int mlx5e_vxlan_core_add_port_cmd(struct mlx5_core_dev *mdev, u16 port) { - struct mlx5_outbox_hdr *hdr; - int err; - - u32 in[MLX5_ST_SZ_DW(add_vxlan_udp_dport_in)]; - u32 out[MLX5_ST_SZ_DW(add_vxlan_udp_dport_out)]; - - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); + u32 in[MLX5_ST_SZ_DW(add_vxlan_udp_dport_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(add_vxlan_udp_dport_out)] = {0}; MLX5_SET(add_vxlan_udp_dport_in, in, opcode, MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT); MLX5_SET(add_vxlan_udp_dport_in, in, vxlan_udp_port, port); - - err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); - if (err) - return err; - - hdr = (struct mlx5_outbox_hdr *)out; - return hdr->status ? -ENOMEM : 0; + return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); } static int mlx5e_vxlan_core_del_port_cmd(struct mlx5_core_dev *mdev, u16 port) { - u32 in[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_in)]; - u32 out[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_out)]; - - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); + u32 in[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_out)] = {0}; MLX5_SET(delete_vxlan_udp_dport_in, in, opcode, MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT); MLX5_SET(delete_vxlan_udp_dport_in, in, vxlan_udp_port, port); - - return mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, - sizeof(out)); + return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); } struct mlx5e_vxlan *mlx5e_vxlan_lookup_port(struct mlx5e_priv *priv, u16 port) diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index fb002db1e2f0..2575070c836e 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -398,33 +398,6 @@ enum { MLX5_MAX_SGE_RD = (512 - 16 - 16) / 16 }; -struct mlx5_inbox_hdr { - __be16 opcode; - u8 rsvd[4]; - __be16 opmod; -}; - -struct mlx5_outbox_hdr { - u8 status; - u8 rsvd[3]; - __be32 syndrome; -}; - -struct mlx5_cmd_query_adapter_mbox_in { - struct mlx5_inbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_cmd_query_adapter_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd0[24]; - u8 intapin; - u8 rsvd1[13]; - __be16 vsd_vendor_id; - u8 vsd[208]; - u8 vsd_psid[16]; -}; - enum mlx5_odp_transport_cap_bits { MLX5_ODP_SUPPORT_SEND = 1 << 31, MLX5_ODP_SUPPORT_RECV = 1 << 30, @@ -457,7 +430,6 @@ struct mlx5_cmd_layout { u8 status_own; }; - struct health_buffer { __be32 assert_var[5]; __be32 rsvd0[3]; @@ -819,93 +791,6 @@ struct mlx5_cqe128 { struct mlx5_cqe64 cqe64; }; -struct mlx5_srq_ctx { - u8 state_log_sz; - u8 rsvd0[3]; - __be32 flags_xrcd; - __be32 pgoff_cqn; - u8 rsvd1[4]; - u8 log_pg_sz; - u8 rsvd2[7]; - __be32 pd; - __be16 lwm; - __be16 wqe_cnt; - u8 rsvd3[8]; - __be64 db_record; -}; - -struct mlx5_create_srq_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 input_srqn; - u8 rsvd0[4]; - struct mlx5_srq_ctx ctx; - u8 rsvd1[208]; - __be64 pas[0]; -}; - -struct mlx5_create_srq_mbox_out { - struct mlx5_outbox_hdr hdr; - __be32 srqn; - u8 rsvd[4]; -}; - -struct mlx5_destroy_srq_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 srqn; - u8 rsvd[4]; -}; - -struct mlx5_destroy_srq_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_query_srq_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 srqn; - u8 rsvd0[4]; -}; - -struct mlx5_query_srq_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd0[8]; - struct mlx5_srq_ctx ctx; - u8 rsvd1[32]; - __be64 pas[0]; -}; - -struct mlx5_arm_srq_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 srqn; - __be16 rsvd; - __be16 lwm; -}; - -struct mlx5_arm_srq_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_enable_hca_mbox_in { - struct mlx5_inbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_enable_hca_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_disable_hca_mbox_in { - struct mlx5_inbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_disable_hca_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - enum { MLX5_MKEY_STATUS_FREE = 1 << 6, }; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 173817187abb..ebe57abf3324 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -771,14 +771,15 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev); void mlx5_cmd_cleanup(struct mlx5_core_dev *dev); void mlx5_cmd_use_events(struct mlx5_core_dev *dev); void mlx5_cmd_use_polling(struct mlx5_core_dev *dev); -int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr); -int mlx5_cmd_status_to_err_v2(void *ptr); -int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type); + int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size); int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size, mlx5_cmd_cbk_t callback, void *context); +void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome); + +int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type); int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn); int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn); int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari); From 2974ab6e8bd8ad06d30ed471b91f8ff7aa6debd8 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 28 Jul 2016 16:43:17 +0300 Subject: [PATCH 0247/1715] net/mlx5: Improve driver log messages Remove duplicate pci dev name printing in mlx5_core_err. Use mlx5_core_{warn,info,err} where possible to have the pci info in the driver log messages. Signed-off-by: Saeed Mahameed Signed-off-by: Parvi Kaustubhi Signed-off-by: Leon Romanovsky --- .../net/ethernet/mellanox/mlx5/core/en_rep.c | 4 +-- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 26 +++++++++++-------- .../net/ethernet/mellanox/mlx5/core/fs_core.c | 18 +++++++------ .../net/ethernet/mellanox/mlx5/core/main.c | 11 ++++---- .../ethernet/mellanox/mlx5/core/mlx5_core.h | 4 +-- .../net/ethernet/mellanox/mlx5/core/sriov.c | 7 ++--- 6 files changed, 39 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 1c7d8b8314bf..681c12c14005 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -416,8 +416,8 @@ int mlx5e_vport_rep_load(struct mlx5_eswitch *esw, { rep->priv_data = mlx5e_create_netdev(esw->dev, &mlx5e_rep_profile, rep); if (!rep->priv_data) { - pr_warn("Failed to create representor for vport %d\n", - rep->vport); + mlx5_core_warn(esw->dev, "Failed to create representor for vport %d\n", + rep->vport); return -EINVAL; } return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 0a364bf2cd71..7c4935993b99 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -306,7 +306,7 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, spec = mlx5_vzalloc(sizeof(*spec)); if (!spec) { - pr_warn("FDB: Failed to alloc match parameters\n"); + esw_warn(esw->dev, "FDB: Failed to alloc match parameters\n"); return NULL; } dmac_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, @@ -340,8 +340,8 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, 0, &dest); if (IS_ERR(flow_rule)) { - pr_warn( - "FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) -> vport(%d), err(%ld)\n", + esw_warn(esw->dev, + "FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) -> vport(%d), err(%ld)\n", dmac_v, dmac_c, vport, PTR_ERR(flow_rule)); flow_rule = NULL; } @@ -1318,8 +1318,9 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, 0, NULL); if (IS_ERR(vport->ingress.allow_rule)) { err = PTR_ERR(vport->ingress.allow_rule); - pr_warn("vport[%d] configure ingress allow rule, err(%d)\n", - vport->vport, err); + esw_warn(esw->dev, + "vport[%d] configure ingress allow rule, err(%d)\n", + vport->vport, err); vport->ingress.allow_rule = NULL; goto out; } @@ -1331,8 +1332,9 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, 0, NULL); if (IS_ERR(vport->ingress.drop_rule)) { err = PTR_ERR(vport->ingress.drop_rule); - pr_warn("vport[%d] configure ingress drop rule, err(%d)\n", - vport->vport, err); + esw_warn(esw->dev, + "vport[%d] configure ingress drop rule, err(%d)\n", + vport->vport, err); vport->ingress.drop_rule = NULL; goto out; } @@ -1384,8 +1386,9 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, 0, NULL); if (IS_ERR(vport->egress.allowed_vlan)) { err = PTR_ERR(vport->egress.allowed_vlan); - pr_warn("vport[%d] configure egress allowed vlan rule failed, err(%d)\n", - vport->vport, err); + esw_warn(esw->dev, + "vport[%d] configure egress allowed vlan rule failed, err(%d)\n", + vport->vport, err); vport->egress.allowed_vlan = NULL; goto out; } @@ -1398,8 +1401,9 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, 0, NULL); if (IS_ERR(vport->egress.drop_rule)) { err = PTR_ERR(vport->egress.drop_rule); - pr_warn("vport[%d] configure egress drop rule failed, err(%d)\n", - vport->vport, err); + esw_warn(esw->dev, + "vport[%d] configure egress drop rule failed, err(%d)\n", + vport->vport, err); vport->egress.drop_rule = NULL; } out: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 75bb8c864557..243efc5a8bd1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -345,7 +345,7 @@ static void del_flow_table(struct fs_node *node) err = mlx5_cmd_destroy_flow_table(dev, ft); if (err) - pr_warn("flow steering can't destroy ft\n"); + mlx5_core_warn(dev, "flow steering can't destroy ft\n"); fs_get_obj(prio, ft->node.parent); prio->num_ft--; } @@ -364,7 +364,7 @@ static void del_rule(struct fs_node *node) match_value = mlx5_vzalloc(match_len); if (!match_value) { - pr_warn("failed to allocate inbox\n"); + mlx5_core_warn(dev, "failed to allocate inbox\n"); return; } @@ -387,8 +387,9 @@ static void del_rule(struct fs_node *node) modify_mask, fte); if (err) - pr_warn("%s can't del rule fg id=%d fte_index=%d\n", - __func__, fg->id, fte->index); + mlx5_core_warn(dev, + "%s can't del rule fg id=%d fte_index=%d\n", + __func__, fg->id, fte->index); } kvfree(match_value); } @@ -409,8 +410,9 @@ static void del_fte(struct fs_node *node) err = mlx5_cmd_delete_fte(dev, ft, fte->index); if (err) - pr_warn("flow steering can't delete fte in index %d of flow group id %d\n", - fte->index, fg->id); + mlx5_core_warn(dev, + "flow steering can't delete fte in index %d of flow group id %d\n", + fte->index, fg->id); fte->status = 0; fg->num_ftes--; @@ -427,8 +429,8 @@ static void del_flow_group(struct fs_node *node) dev = get_dev(&ft->node); if (mlx5_cmd_destroy_flow_group(dev, ft, fg->id)) - pr_warn("flow steering can't destroy fg %d of ft %d\n", - fg->id, ft->id); + mlx5_core_warn(dev, "flow steering can't destroy fg %d of ft %d\n", + fg->id, ft->id); } static struct fs_fte *alloc_fte(u8 action, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 7fd662236061..d6446a3623b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -324,7 +324,7 @@ enum { MLX5_DEV_CAP_FLAG_DCT, }; -static u16 to_fw_pkey_sz(u32 size) +static u16 to_fw_pkey_sz(struct mlx5_core_dev *dev, u32 size) { switch (size) { case 128: @@ -340,7 +340,7 @@ static u16 to_fw_pkey_sz(u32 size) case 4096: return 5; default: - pr_warn("invalid pkey table size %d\n", size); + mlx5_core_warn(dev, "invalid pkey table size %d\n", size); return 0; } } @@ -477,7 +477,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev) 128); /* we limit the size of the pkey table to 128 entries for now */ MLX5_SET(cmd_hca_cap, set_hca_cap, pkey_table_size, - to_fw_pkey_sz(128)); + to_fw_pkey_sz(dev, 128)); if (prof->mask & MLX5_PROF_MASK_QP_SIZE) MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_qp, @@ -1312,8 +1312,9 @@ static int init_one(struct pci_dev *pdev, pci_set_drvdata(pdev, dev); if (prof_sel < 0 || prof_sel >= ARRAY_SIZE(profile)) { - pr_warn("selected profile out of range, selecting default (%d)\n", - MLX5_DEFAULT_PROF); + mlx5_core_warn(dev, + "selected profile out of range, selecting default (%d)\n", + MLX5_DEFAULT_PROF); prof_sel = MLX5_DEFAULT_PROF; } dev->profile = &profile[prof_sel]; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 14242f17a309..ef0eeedcceb6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -58,8 +58,8 @@ do { \ } while (0) #define mlx5_core_err(__dev, format, ...) \ - dev_err(&(__dev)->pdev->dev, "%s:%s:%d:(pid %d): " format, \ - (__dev)->priv.name, __func__, __LINE__, current->pid, \ + dev_err(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, \ ##__VA_ARGS__) #define mlx5_core_warn(__dev, format, ...) \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index b380a6bc1f85..0680dfb4bb1e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -155,13 +155,13 @@ int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) if (!pci_vfs_assigned(pdev)) pci_disable_sriov(pdev); else - pr_info("unloading PF driver while leaving orphan VFs\n"); + mlx5_core_info(dev, "unloading PF driver while leaving orphan VFs\n"); return 0; } err = mlx5_core_sriov_enable(pdev, num_vfs); if (err) { - dev_warn(&pdev->dev, "mlx5_core_sriov_enable failed %d\n", err); + mlx5_core_warn(dev, "mlx5_core_sriov_enable failed %d\n", err); return err; } @@ -180,7 +180,8 @@ static int sync_required(struct pci_dev *pdev) int cur_vfs = pci_num_vf(pdev); if (cur_vfs != sriov->num_vfs) { - pr_info("current VFs %d, registered %d - sync needed\n", cur_vfs, sriov->num_vfs); + mlx5_core_warn(dev, "current VFs %d, registered %d - sync needed\n", + cur_vfs, sriov->num_vfs); return 1; } From 9def7121bed3be8a9d126c900ca7067647bc4789 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Wed, 3 Aug 2016 17:27:30 +0300 Subject: [PATCH 0248/1715] net/mlx5: Enable setting minimum inline header mode for VFs Implement the low-level part of the PF side in setting minimum inline header mode for VFs. Signed-off-by: Hadar Hen Zion Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- .../net/ethernet/mellanox/mlx5/core/vport.c | 20 +++++++++++++++++++ include/linux/mlx5/mlx5_ifc.h | 2 +- include/linux/mlx5/vport.h | 2 ++ 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 3593bf78caf4..525f17af108e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -125,6 +125,26 @@ void mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev, } EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_min_inline); +int mlx5_modify_nic_vport_min_inline(struct mlx5_core_dev *mdev, + u16 vport, u8 min_inline) +{ + u32 in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)] = {0}; + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + void *nic_vport_ctx; + + MLX5_SET(modify_nic_vport_context_in, in, + field_select.min_inline, 1); + MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport); + MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); + + nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, + in, nic_vport_context); + MLX5_SET(nic_vport_context, nic_vport_ctx, + min_wqe_inline_mode, min_inline); + + return mlx5_modify_nic_vport_context(mdev, in, inlen); +} + int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u16 vport, u8 *addr) { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index cb94ac5b8420..7a8ef0af94e7 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -4724,7 +4724,7 @@ struct mlx5_ifc_modify_nic_vport_field_select_bits { u8 reserved_at_0[0x16]; u8 node_guid[0x1]; u8 port_guid[0x1]; - u8 reserved_at_18[0x1]; + u8 min_inline[0x1]; u8 mtu[0x1]; u8 change_event[0x1]; u8 promisc[0x1]; diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index e087b7d047ac..451b0bde9083 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -45,6 +45,8 @@ int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u16 vport, u8 *addr); void mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev, u8 *min_inline); +int mlx5_modify_nic_vport_min_inline(struct mlx5_core_dev *mdev, + u16 vport, u8 min_inline); int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *dev, u16 vport, u8 *addr); int mlx5_query_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 *mtu); From 7adbde2035c2e5baf2f6a90eba11813db4813a67 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Wed, 3 Aug 2016 15:08:33 +0300 Subject: [PATCH 0249/1715] net/mlx5: Update mlx5_ifc.h for vxlan encap/decap Add the required definitions related to vxlan encap/decap. Signed-off-by: Hadar Hen Zion Signed-off-by: Ilya Lesokhin Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 105 ++++++++++++++++++++++++++++++++-- 1 file changed, 101 insertions(+), 4 deletions(-) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 7a8ef0af94e7..3766110e13ea 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -212,6 +212,8 @@ enum { MLX5_CMD_OP_DEALLOC_FLOW_COUNTER = 0x93a, MLX5_CMD_OP_QUERY_FLOW_COUNTER = 0x93b, MLX5_CMD_OP_MODIFY_FLOW_TABLE = 0x93c, + MLX5_CMD_OP_ALLOC_ENCAP_HEADER = 0x93d, + MLX5_CMD_OP_DEALLOC_ENCAP_HEADER = 0x93e, MLX5_CMD_OP_MAX }; @@ -281,7 +283,9 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 modify_root[0x1]; u8 identified_miss_table_mode[0x1]; u8 flow_table_modify[0x1]; - u8 reserved_at_7[0x19]; + u8 encap[0x1]; + u8 decap[0x1]; + u8 reserved_at_9[0x17]; u8 reserved_at_20[0x2]; u8 log_max_ft_size[0x6]; @@ -512,7 +516,15 @@ struct mlx5_ifc_e_switch_cap_bits { u8 nic_vport_node_guid_modify[0x1]; u8 nic_vport_port_guid_modify[0x1]; - u8 reserved_at_20[0x7e0]; + u8 vxlan_encap_decap[0x1]; + u8 nvgre_encap_decap[0x1]; + u8 reserved_at_22[0x9]; + u8 log_max_encap_headers[0x5]; + u8 reserved_2b[0x6]; + u8 max_encap_header_size[0xa]; + + u8 reserved_40[0x7c0]; + }; struct mlx5_ifc_qos_cap_bits { @@ -2067,6 +2079,8 @@ enum { MLX5_FLOW_CONTEXT_ACTION_DROP = 0x2, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST = 0x4, MLX5_FLOW_CONTEXT_ACTION_COUNT = 0x8, + MLX5_FLOW_CONTEXT_ACTION_ENCAP = 0x10, + MLX5_FLOW_CONTEXT_ACTION_DECAP = 0x20, }; struct mlx5_ifc_flow_context_bits { @@ -2086,7 +2100,9 @@ struct mlx5_ifc_flow_context_bits { u8 reserved_at_a0[0x8]; u8 flow_counter_list_size[0x18]; - u8 reserved_at_c0[0x140]; + u8 encap_id[0x20]; + + u8 reserved_at_e0[0x120]; struct mlx5_ifc_fte_match_param_bits match_value; @@ -4216,6 +4232,85 @@ struct mlx5_ifc_query_eq_in_bits { u8 reserved_at_60[0x20]; }; +struct mlx5_ifc_encap_header_in_bits { + u8 reserved_at_0[0x5]; + u8 header_type[0x3]; + u8 reserved_at_8[0xe]; + u8 encap_header_size[0xa]; + + u8 reserved_at_20[0x10]; + u8 encap_header[2][0x8]; + + u8 more_encap_header[0][0x8]; +}; + +struct mlx5_ifc_query_encap_header_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0xa0]; + + struct mlx5_ifc_encap_header_in_bits encap_header[0]; +}; + +struct mlx5_ifc_query_encap_header_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 encap_id[0x20]; + + u8 reserved_at_60[0xa0]; +}; + +struct mlx5_ifc_alloc_encap_header_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 encap_id[0x20]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_alloc_encap_header_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0xa0]; + + struct mlx5_ifc_encap_header_in_bits encap_header; +}; + +struct mlx5_ifc_dealloc_encap_header_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_dealloc_encap_header_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_20[0x10]; + u8 op_mod[0x10]; + + u8 encap_id[0x20]; + + u8 reserved_60[0x20]; +}; + struct mlx5_ifc_query_dct_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -6102,7 +6197,9 @@ struct mlx5_ifc_create_flow_table_in_bits { u8 reserved_at_a0[0x20]; - u8 reserved_at_c0[0x4]; + u8 encap_en[0x1]; + u8 decap_en[0x1]; + u8 reserved_at_c2[0x2]; u8 table_miss_mode[0x4]; u8 level[0x8]; u8 reserved_at_d0[0x8]; From 575ddf5888eaf8f271cb3df7b0806cb2db2c333a Mon Sep 17 00:00:00 2001 From: Ilya Lesokhin Date: Tue, 23 Feb 2016 13:25:22 +0200 Subject: [PATCH 0250/1715] net/mlx5: Introduce alloc_encap and dealloc_encap commands Implement low-level commands to support vxlan encapsulation. Signed-off-by: Ilya Lesokhin Signed-off-by: Hadar Hen Zion Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 4 ++ .../net/ethernet/mellanox/mlx5/core/fs_cmd.c | 48 +++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/fs_cmd.h | 7 +++ 3 files changed, 59 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 23b95daa9533..00bec609aae2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -301,6 +301,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_MODIFY_FLOW_TABLE: case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: case MLX5_CMD_OP_SET_FLOW_TABLE_ROOT: + case MLX5_CMD_OP_DEALLOC_ENCAP_HEADER: return MLX5_CMD_STAT_OK; case MLX5_CMD_OP_QUERY_HCA_CAP: @@ -402,6 +403,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY: case MLX5_CMD_OP_ALLOC_FLOW_COUNTER: case MLX5_CMD_OP_QUERY_FLOW_COUNTER: + case MLX5_CMD_OP_ALLOC_ENCAP_HEADER: *status = MLX5_DRIVER_STATUS_ABORTED; *synd = MLX5_DRIVER_SYND; return -EIO; @@ -550,6 +552,8 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(DEALLOC_FLOW_COUNTER); MLX5_COMMAND_STR_CASE(QUERY_FLOW_COUNTER); MLX5_COMMAND_STR_CASE(MODIFY_FLOW_TABLE); + MLX5_COMMAND_STR_CASE(ALLOC_ENCAP_HEADER); + MLX5_COMMAND_STR_CASE(DEALLOC_ENCAP_HEADER); default: return "unknown command opcode"; } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index e64499ebf2b5..7aaefa9aaf1c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -424,3 +424,51 @@ void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev, *packets = MLX5_GET64(traffic_counter, stats, packets); *bytes = MLX5_GET64(traffic_counter, stats, octets); } + +#define MAX_ENCAP_SIZE (128) + +int mlx5_cmd_alloc_encap(struct mlx5_core_dev *dev, + int header_type, + size_t size, + void *encap_header, + u32 *encap_id) +{ + u32 out[MLX5_ST_SZ_DW(alloc_encap_header_out)]; + u32 in[MLX5_ST_SZ_DW(alloc_encap_header_in) + + (MAX_ENCAP_SIZE / sizeof(u32))]; + void *encap_header_in = MLX5_ADDR_OF(alloc_encap_header_in, in, + encap_header); + void *header = MLX5_ADDR_OF(encap_header_in, encap_header_in, + encap_header); + int inlen = header - (void *)in + size; + int err; + + if (size > MAX_ENCAP_SIZE) + return -EINVAL; + + memset(in, 0, inlen); + MLX5_SET(alloc_encap_header_in, in, opcode, + MLX5_CMD_OP_ALLOC_ENCAP_HEADER); + MLX5_SET(encap_header_in, encap_header_in, encap_header_size, size); + MLX5_SET(encap_header_in, encap_header_in, header_type, header_type); + memcpy(header, encap_header, size); + + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); + + *encap_id = MLX5_GET(alloc_encap_header_out, out, encap_id); + return err; +} + +void mlx5_cmd_dealloc_encap(struct mlx5_core_dev *dev, u32 encap_id) +{ + u32 in[MLX5_ST_SZ_DW(dealloc_encap_header_in)]; + u32 out[MLX5_ST_SZ_DW(dealloc_encap_header_out)]; + + memset(in, 0, sizeof(in)); + MLX5_SET(dealloc_encap_header_in, in, opcode, + MLX5_CMD_OP_DEALLOC_ENCAP_HEADER); + MLX5_SET(dealloc_encap_header_in, in, encap_id, encap_id); + + mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h index 158844cef82b..ac52fdfb5096 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h @@ -88,4 +88,11 @@ void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b, u16 id, u64 *packets, u64 *bytes); +int mlx5_cmd_alloc_encap(struct mlx5_core_dev *dev, + int header_type, + size_t size, + void *encap_header, + u32 *encap_id); +void mlx5_cmd_dealloc_encap(struct mlx5_core_dev *dev, u32 encap_id); + #endif From 92e47ba8839bacc185db89f3b11cd8036193e6a9 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 13 Aug 2016 22:35:36 +0800 Subject: [PATCH 0251/1715] netfilter: conntrack: simplify the code by using nf_conntrack_get_ht Since commit 64b87639c9cb ("netfilter: conntrack: fix race between nf_conntrack proc read and hash resize") introduce the nf_conntrack_get_ht, so there's no need to check nf_conntrack_generation again and again to get the hash table and hash size. And convert nf_conntrack_get_ht to inline function here. Suggested-by: Pablo Neira Ayuso Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 20 ++++++++++ include/net/netfilter/nf_conntrack_core.h | 3 -- net/netfilter/nf_conntrack_core.c | 46 +++++------------------ 3 files changed, 30 insertions(+), 39 deletions(-) diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 445b019c2078..2a127480d4cc 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -303,9 +303,29 @@ struct kernel_param; int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp); int nf_conntrack_hash_resize(unsigned int hashsize); + +extern struct hlist_nulls_head *nf_conntrack_hash; extern unsigned int nf_conntrack_htable_size; +extern seqcount_t nf_conntrack_generation; extern unsigned int nf_conntrack_max; +/* must be called with rcu read lock held */ +static inline void +nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize) +{ + struct hlist_nulls_head *hptr; + unsigned int sequence, hsz; + + do { + sequence = read_seqcount_begin(&nf_conntrack_generation); + hsz = nf_conntrack_htable_size; + hptr = nf_conntrack_hash; + } while (read_seqcount_retry(&nf_conntrack_generation, sequence)); + + *hash = hptr; + *hsize = hsz; +} + struct nf_conn *nf_ct_tmpl_alloc(struct net *net, const struct nf_conntrack_zone *zone, gfp_t flags); diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 79d7ac5c9740..62e17d1319ff 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -51,8 +51,6 @@ bool nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, const struct nf_conntrack_l3proto *l3proto, const struct nf_conntrack_l4proto *l4proto); -void nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize); - /* Find a connection corresponding to a tuple. */ struct nf_conntrack_tuple_hash * nf_conntrack_find_get(struct net *net, @@ -83,7 +81,6 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, #define CONNTRACK_LOCKS 1024 -extern struct hlist_nulls_head *nf_conntrack_hash; extern spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS]; void nf_conntrack_lock(spinlock_t *lock); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 22558b7ff7cd..aeba28c5512b 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -74,7 +74,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_hash); static __read_mostly struct kmem_cache *nf_conntrack_cachep; static __read_mostly spinlock_t nf_conntrack_locks_all_lock; -static __read_mostly seqcount_t nf_conntrack_generation; static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock); static __read_mostly bool nf_conntrack_locks_all; @@ -162,6 +161,7 @@ static void nf_conntrack_all_unlock(void) unsigned int nf_conntrack_htable_size __read_mostly; unsigned int nf_conntrack_max __read_mostly; +seqcount_t nf_conntrack_generation __read_mostly; DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked); EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked); @@ -478,23 +478,6 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h, net_eq(net, nf_ct_net(ct)); } -/* must be called with rcu read lock held */ -void nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize) -{ - struct hlist_nulls_head *hptr; - unsigned int sequence, hsz; - - do { - sequence = read_seqcount_begin(&nf_conntrack_generation); - hsz = nf_conntrack_htable_size; - hptr = nf_conntrack_hash; - } while (read_seqcount_retry(&nf_conntrack_generation, sequence)); - - *hash = hptr; - *hsize = hsz; -} -EXPORT_SYMBOL_GPL(nf_conntrack_get_ht); - /* * Warning : * - Caller must take a reference on returned object @@ -507,14 +490,11 @@ ____nf_conntrack_find(struct net *net, const struct nf_conntrack_zone *zone, struct nf_conntrack_tuple_hash *h; struct hlist_nulls_head *ct_hash; struct hlist_nulls_node *n; - unsigned int bucket, sequence; + unsigned int bucket, hsize; begin: - do { - sequence = read_seqcount_begin(&nf_conntrack_generation); - bucket = scale_hash(hash); - ct_hash = nf_conntrack_hash; - } while (read_seqcount_retry(&nf_conntrack_generation, sequence)); + nf_conntrack_get_ht(&ct_hash, &hsize); + bucket = reciprocal_scale(hash, hsize); hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[bucket], hnnode) { if (nf_ct_key_equal(h, tuple, zone, net)) { @@ -820,18 +800,15 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_zone *zone; struct nf_conntrack_tuple_hash *h; struct hlist_nulls_head *ct_hash; - unsigned int hash, sequence; + unsigned int hash, hsize; struct hlist_nulls_node *n; struct nf_conn *ct; zone = nf_ct_zone(ignored_conntrack); rcu_read_lock(); - do { - sequence = read_seqcount_begin(&nf_conntrack_generation); - hash = hash_conntrack(net, tuple); - ct_hash = nf_conntrack_hash; - } while (read_seqcount_retry(&nf_conntrack_generation, sequence)); + nf_conntrack_get_ht(&ct_hash, &hsize); + hash = __hash_conntrack(net, tuple, hsize); hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[hash], hnnode) { ct = nf_ct_tuplehash_to_ctrack(h); @@ -897,14 +874,11 @@ static noinline int early_drop(struct net *net, unsigned int _hash) for (i = 0; i < NF_CT_EVICTION_RANGE; i++) { struct hlist_nulls_head *ct_hash; - unsigned hash, sequence, drops; + unsigned int hash, hsize, drops; rcu_read_lock(); - do { - sequence = read_seqcount_begin(&nf_conntrack_generation); - hash = scale_hash(_hash++); - ct_hash = nf_conntrack_hash; - } while (read_seqcount_retry(&nf_conntrack_generation, sequence)); + nf_conntrack_get_ht(&ct_hash, &hsize); + hash = reciprocal_scale(_hash++, hsize); drops = early_drop_list(net, &ct_hash[hash]); rcu_read_unlock(); From d2d371ae5dd6af9a6a3d7f50b753627c42868409 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Tue, 16 Aug 2016 16:45:38 +0200 Subject: [PATCH 0252/1715] net: ipconfig: Fix more use after free While commit 9c706a49d660 ("net: ipconfig: fix use after free") avoids the use after free, the resulting code still ends up calling both the ic_setup_if() and ic_setup_routes() after calling ic_close_devs(), and access to the device is still required. Move the call to ic_close_devs() to the very end of the function. Signed-off-by: Thierry Reding Signed-off-by: David S. Miller --- net/ipv4/ipconfig.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 66c2fe602810..ba9cbeafbb2e 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -1530,12 +1530,14 @@ static int __init ip_auto_config(void) * Close all network devices except the device we've * autoconfigured and set up routes. */ - ic_close_devs(); if (ic_setup_if() < 0 || ic_setup_routes() < 0) - return -1; + err = -1; + else + err = 0; + ic_close_devs(); - return 0; + return err; } late_initcall(ip_auto_config); From 43a0c6751a322847cb6fa0ab8cbf77a1d08bfc0a Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 15 Aug 2016 14:51:01 -0700 Subject: [PATCH 0253/1715] strparser: Stream parser for messages This patch introduces a utility for parsing application layer protocol messages in a TCP stream. This is a generalization of the mechanism implemented of Kernel Connection Multiplexor. The API includes a context structure, a set of callbacks, utility functions, and a data ready function. A stream parser instance is defined by a strparse structure that is bound to a TCP socket. The function to initialize the structure is: int strp_init(struct strparser *strp, struct sock *csk, struct strp_callbacks *cb); csk is the TCP socket being bound to and cb are the parser callbacks. The upper layer calls strp_tcp_data_ready when data is ready on the lower socket for strparser to process. This should be called from a data_ready callback that is set on the socket: void strp_tcp_data_ready(struct strparser *strp); A parser is bound to a TCP socket by setting data_ready function to strp_tcp_data_ready so that all receive indications on the socket go through the parser. This is assumes that sk_user_data is set to the strparser structure. There are four callbacks. - parse_msg is called to parse the message (returns length or error). - rcv_msg is called when a complete message has been received - read_sock_done is called when data_ready function exits - abort_parser is called to abort the parser The input to parse_msg is an skbuff which contains next message under construction. The backend processing of parse_msg will parse the application layer protocol headers to determine the length of the message in the stream. The possible return values are: >0 : indicates length of successfully parsed message 0 : indicates more data must be received to parse the message -ESTRPIPE : current message should not be processed by the kernel, return control of the socket to userspace which can proceed to read the messages itself other < 0 : Error is parsing, give control back to userspace assuming that synchronzation is lost and the stream is unrecoverable (application expected to close TCP socket) In the case of error return (< 0) strparse will stop the parser and report and error to userspace. The application must deal with the error. To handle the error the strparser is unbound from the TCP socket. If the error indicates that the stream TCP socket is at recoverable point (ESTRPIPE) then the application can read the TCP socket to process the stream. Once the application has dealt with the exceptions in the stream, it may again bind the socket to a strparser to continue data operations. Note that ENODATA may be returned to the application. In this case parse_msg returned -ESTRPIPE, however strparser was unable to maintain synchronization of the stream (i.e. some of the message in question was already read by the parser). strp_pause and strp_unpause are used to provide flow control. For instance, if rcv_msg is called but the upper layer can't immediately consume the message it can hold the message and pause strparser. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/strparser.h | 145 +++++++++++ net/Kconfig | 1 + net/Makefile | 1 + net/strparser/Kconfig | 4 + net/strparser/Makefile | 1 + net/strparser/strparser.c | 492 ++++++++++++++++++++++++++++++++++++++ 6 files changed, 644 insertions(+) create mode 100644 include/net/strparser.h create mode 100644 net/strparser/Kconfig create mode 100644 net/strparser/Makefile create mode 100644 net/strparser/strparser.c diff --git a/include/net/strparser.h b/include/net/strparser.h new file mode 100644 index 000000000000..fdb3d6746cc4 --- /dev/null +++ b/include/net/strparser.h @@ -0,0 +1,145 @@ +/* + * Stream Parser + * + * Copyright (c) 2016 Tom Herbert + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + */ + +#ifndef __NET_STRPARSER_H_ +#define __NET_STRPARSER_H_ + +#include +#include + +#define STRP_STATS_ADD(stat, count) ((stat) += (count)) +#define STRP_STATS_INCR(stat) ((stat)++) + +struct strp_stats { + unsigned long long rx_msgs; + unsigned long long rx_bytes; + unsigned int rx_mem_fail; + unsigned int rx_need_more_hdr; + unsigned int rx_msg_too_big; + unsigned int rx_msg_timeouts; + unsigned int rx_bad_hdr_len; +}; + +struct strp_aggr_stats { + unsigned long long rx_msgs; + unsigned long long rx_bytes; + unsigned int rx_mem_fail; + unsigned int rx_need_more_hdr; + unsigned int rx_msg_too_big; + unsigned int rx_msg_timeouts; + unsigned int rx_bad_hdr_len; + unsigned int rx_aborts; + unsigned int rx_interrupted; + unsigned int rx_unrecov_intr; +}; + +struct strparser; + +/* Callbacks are called with lock held for the attached socket */ +struct strp_callbacks { + int (*parse_msg)(struct strparser *strp, struct sk_buff *skb); + void (*rcv_msg)(struct strparser *strp, struct sk_buff *skb); + int (*read_sock_done)(struct strparser *strp, int err); + void (*abort_parser)(struct strparser *strp, int err); +}; + +struct strp_rx_msg { + int full_len; + int offset; +}; + +static inline struct strp_rx_msg *strp_rx_msg(struct sk_buff *skb) +{ + return (struct strp_rx_msg *)((void *)skb->cb + + offsetof(struct qdisc_skb_cb, data)); +} + +/* Structure for an attached lower socket */ +struct strparser { + struct sock *sk; + + u32 rx_stopped : 1; + u32 rx_paused : 1; + u32 rx_aborted : 1; + u32 rx_interrupted : 1; + u32 rx_unrecov_intr : 1; + + struct sk_buff **rx_skb_nextp; + struct timer_list rx_msg_timer; + struct sk_buff *rx_skb_head; + unsigned int rx_need_bytes; + struct delayed_work rx_delayed_work; + struct work_struct rx_work; + struct strp_stats stats; + struct strp_callbacks cb; +}; + +/* Must be called with lock held for attached socket */ +static inline void strp_pause(struct strparser *strp) +{ + strp->rx_paused = 1; +} + +/* May be called without holding lock for attached socket */ +static inline void strp_unpause(struct strparser *strp) +{ + strp->rx_paused = 0; +} + +static inline void save_strp_stats(struct strparser *strp, + struct strp_aggr_stats *agg_stats) +{ + /* Save psock statistics in the mux when psock is being unattached. */ + +#define SAVE_PSOCK_STATS(_stat) (agg_stats->_stat += \ + strp->stats._stat) + SAVE_PSOCK_STATS(rx_msgs); + SAVE_PSOCK_STATS(rx_bytes); + SAVE_PSOCK_STATS(rx_mem_fail); + SAVE_PSOCK_STATS(rx_need_more_hdr); + SAVE_PSOCK_STATS(rx_msg_too_big); + SAVE_PSOCK_STATS(rx_msg_timeouts); + SAVE_PSOCK_STATS(rx_bad_hdr_len); +#undef SAVE_PSOCK_STATS + + if (strp->rx_aborted) + agg_stats->rx_aborts++; + if (strp->rx_interrupted) + agg_stats->rx_interrupted++; + if (strp->rx_unrecov_intr) + agg_stats->rx_unrecov_intr++; +} + +static inline void aggregate_strp_stats(struct strp_aggr_stats *stats, + struct strp_aggr_stats *agg_stats) +{ +#define SAVE_PSOCK_STATS(_stat) (agg_stats->_stat += stats->_stat) + SAVE_PSOCK_STATS(rx_msgs); + SAVE_PSOCK_STATS(rx_bytes); + SAVE_PSOCK_STATS(rx_mem_fail); + SAVE_PSOCK_STATS(rx_need_more_hdr); + SAVE_PSOCK_STATS(rx_msg_too_big); + SAVE_PSOCK_STATS(rx_msg_timeouts); + SAVE_PSOCK_STATS(rx_bad_hdr_len); + SAVE_PSOCK_STATS(rx_aborts); + SAVE_PSOCK_STATS(rx_interrupted); + SAVE_PSOCK_STATS(rx_unrecov_intr); +#undef SAVE_PSOCK_STATS + +} + +void strp_done(struct strparser *strp); +void strp_stop(struct strparser *strp); +void strp_check_rcv(struct strparser *strp); +int strp_init(struct strparser *strp, struct sock *csk, + struct strp_callbacks *cb); +void strp_tcp_data_ready(struct strparser *strp); + +#endif /* __NET_STRPARSER_H_ */ diff --git a/net/Kconfig b/net/Kconfig index c2cdbce629bd..7b6cd340b72b 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -369,6 +369,7 @@ source "net/irda/Kconfig" source "net/bluetooth/Kconfig" source "net/rxrpc/Kconfig" source "net/kcm/Kconfig" +source "net/strparser/Kconfig" config FIB_RULES bool diff --git a/net/Makefile b/net/Makefile index 9bd20bb86cc6..4cafaa2b4667 100644 --- a/net/Makefile +++ b/net/Makefile @@ -35,6 +35,7 @@ obj-$(CONFIG_BT) += bluetooth/ obj-$(CONFIG_SUNRPC) += sunrpc/ obj-$(CONFIG_AF_RXRPC) += rxrpc/ obj-$(CONFIG_AF_KCM) += kcm/ +obj-$(CONFIG_STREAM_PARSER) += strparser/ obj-$(CONFIG_ATM) += atm/ obj-$(CONFIG_L2TP) += l2tp/ obj-$(CONFIG_DECNET) += decnet/ diff --git a/net/strparser/Kconfig b/net/strparser/Kconfig new file mode 100644 index 000000000000..6cff3f6d0c3a --- /dev/null +++ b/net/strparser/Kconfig @@ -0,0 +1,4 @@ + +config STREAM_PARSER + tristate + default n diff --git a/net/strparser/Makefile b/net/strparser/Makefile new file mode 100644 index 000000000000..858a126ebaa0 --- /dev/null +++ b/net/strparser/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_STREAM_PARSER) += strparser.o diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c new file mode 100644 index 000000000000..fd688c0a7744 --- /dev/null +++ b/net/strparser/strparser.c @@ -0,0 +1,492 @@ +/* + * Stream Parser + * + * Copyright (c) 2016 Tom Herbert + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static struct workqueue_struct *strp_wq; + +struct _strp_rx_msg { + /* Internal cb structure. struct strp_rx_msg must be first for passing + * to upper layer. + */ + struct strp_rx_msg strp; + int accum_len; + int early_eaten; +}; + +static inline struct _strp_rx_msg *_strp_rx_msg(struct sk_buff *skb) +{ + return (struct _strp_rx_msg *)((void *)skb->cb + + offsetof(struct qdisc_skb_cb, data)); +} + +/* Lower lock held */ +static void strp_abort_rx_strp(struct strparser *strp, int err) +{ + struct sock *csk = strp->sk; + + /* Unrecoverable error in receive */ + + del_timer(&strp->rx_msg_timer); + + if (strp->rx_stopped) + return; + + strp->rx_stopped = 1; + + /* Report an error on the lower socket */ + csk->sk_err = err; + csk->sk_error_report(csk); +} + +static void strp_start_rx_timer(struct strparser *strp) +{ + if (strp->sk->sk_rcvtimeo) + mod_timer(&strp->rx_msg_timer, strp->sk->sk_rcvtimeo); +} + +/* Lower lock held */ +static void strp_parser_err(struct strparser *strp, int err, + read_descriptor_t *desc) +{ + desc->error = err; + kfree_skb(strp->rx_skb_head); + strp->rx_skb_head = NULL; + strp->cb.abort_parser(strp, err); +} + +/* Lower socket lock held */ +static int strp_tcp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, + unsigned int orig_offset, size_t orig_len) +{ + struct strparser *strp = (struct strparser *)desc->arg.data; + struct _strp_rx_msg *rxm; + struct sk_buff *head, *skb; + size_t eaten = 0, cand_len; + ssize_t extra; + int err; + bool cloned_orig = false; + + if (strp->rx_paused) + return 0; + + head = strp->rx_skb_head; + if (head) { + /* Message already in progress */ + + rxm = _strp_rx_msg(head); + if (unlikely(rxm->early_eaten)) { + /* Already some number of bytes on the receive sock + * data saved in rx_skb_head, just indicate they + * are consumed. + */ + eaten = orig_len <= rxm->early_eaten ? + orig_len : rxm->early_eaten; + rxm->early_eaten -= eaten; + + return eaten; + } + + if (unlikely(orig_offset)) { + /* Getting data with a non-zero offset when a message is + * in progress is not expected. If it does happen, we + * need to clone and pull since we can't deal with + * offsets in the skbs for a message expect in the head. + */ + orig_skb = skb_clone(orig_skb, GFP_ATOMIC); + if (!orig_skb) { + STRP_STATS_INCR(strp->stats.rx_mem_fail); + desc->error = -ENOMEM; + return 0; + } + if (!pskb_pull(orig_skb, orig_offset)) { + STRP_STATS_INCR(strp->stats.rx_mem_fail); + kfree_skb(orig_skb); + desc->error = -ENOMEM; + return 0; + } + cloned_orig = true; + orig_offset = 0; + } + + if (!strp->rx_skb_nextp) { + /* We are going to append to the frags_list of head. + * Need to unshare the frag_list. + */ + err = skb_unclone(head, GFP_ATOMIC); + if (err) { + STRP_STATS_INCR(strp->stats.rx_mem_fail); + desc->error = err; + return 0; + } + + if (unlikely(skb_shinfo(head)->frag_list)) { + /* We can't append to an sk_buff that already + * has a frag_list. We create a new head, point + * the frag_list of that to the old head, and + * then are able to use the old head->next for + * appending to the message. + */ + if (WARN_ON(head->next)) { + desc->error = -EINVAL; + return 0; + } + + skb = alloc_skb(0, GFP_ATOMIC); + if (!skb) { + STRP_STATS_INCR(strp->stats.rx_mem_fail); + desc->error = -ENOMEM; + return 0; + } + skb->len = head->len; + skb->data_len = head->len; + skb->truesize = head->truesize; + *_strp_rx_msg(skb) = *_strp_rx_msg(head); + strp->rx_skb_nextp = &head->next; + skb_shinfo(skb)->frag_list = head; + strp->rx_skb_head = skb; + head = skb; + } else { + strp->rx_skb_nextp = + &skb_shinfo(head)->frag_list; + } + } + } + + while (eaten < orig_len) { + /* Always clone since we will consume something */ + skb = skb_clone(orig_skb, GFP_ATOMIC); + if (!skb) { + STRP_STATS_INCR(strp->stats.rx_mem_fail); + desc->error = -ENOMEM; + break; + } + + cand_len = orig_len - eaten; + + head = strp->rx_skb_head; + if (!head) { + head = skb; + strp->rx_skb_head = head; + /* Will set rx_skb_nextp on next packet if needed */ + strp->rx_skb_nextp = NULL; + rxm = _strp_rx_msg(head); + memset(rxm, 0, sizeof(*rxm)); + rxm->strp.offset = orig_offset + eaten; + } else { + /* Unclone since we may be appending to an skb that we + * already share a frag_list with. + */ + err = skb_unclone(skb, GFP_ATOMIC); + if (err) { + STRP_STATS_INCR(strp->stats.rx_mem_fail); + desc->error = err; + break; + } + + rxm = _strp_rx_msg(head); + *strp->rx_skb_nextp = skb; + strp->rx_skb_nextp = &skb->next; + head->data_len += skb->len; + head->len += skb->len; + head->truesize += skb->truesize; + } + + if (!rxm->strp.full_len) { + ssize_t len; + + len = (*strp->cb.parse_msg)(strp, head); + + if (!len) { + /* Need more header to determine length */ + if (!rxm->accum_len) { + /* Start RX timer for new message */ + strp_start_rx_timer(strp); + } + rxm->accum_len += cand_len; + eaten += cand_len; + STRP_STATS_INCR(strp->stats.rx_need_more_hdr); + WARN_ON(eaten != orig_len); + break; + } else if (len < 0) { + if (len == -ESTRPIPE && rxm->accum_len) { + len = -ENODATA; + strp->rx_unrecov_intr = 1; + } else { + strp->rx_interrupted = 1; + } + strp_parser_err(strp, err, desc); + break; + } else if (len > strp->sk->sk_rcvbuf) { + /* Message length exceeds maximum allowed */ + STRP_STATS_INCR(strp->stats.rx_msg_too_big); + strp_parser_err(strp, -EMSGSIZE, desc); + break; + } else if (len <= (ssize_t)head->len - + skb->len - rxm->strp.offset) { + /* Length must be into new skb (and also + * greater than zero) + */ + STRP_STATS_INCR(strp->stats.rx_bad_hdr_len); + strp_parser_err(strp, -EPROTO, desc); + break; + } + + rxm->strp.full_len = len; + } + + extra = (ssize_t)(rxm->accum_len + cand_len) - + rxm->strp.full_len; + + if (extra < 0) { + /* Message not complete yet. */ + if (rxm->strp.full_len - rxm->accum_len > + tcp_inq(strp->sk)) { + /* Don't have the whole messages in the socket + * buffer. Set strp->rx_need_bytes to wait for + * the rest of the message. Also, set "early + * eaten" since we've already buffered the skb + * but don't consume yet per tcp_read_sock. + */ + + if (!rxm->accum_len) { + /* Start RX timer for new message */ + strp_start_rx_timer(strp); + } + + strp->rx_need_bytes = rxm->strp.full_len - + rxm->accum_len; + rxm->accum_len += cand_len; + rxm->early_eaten = cand_len; + STRP_STATS_ADD(strp->stats.rx_bytes, cand_len); + desc->count = 0; /* Stop reading socket */ + break; + } + rxm->accum_len += cand_len; + eaten += cand_len; + WARN_ON(eaten != orig_len); + break; + } + + /* Positive extra indicates ore bytes than needed for the + * message + */ + + WARN_ON(extra > cand_len); + + eaten += (cand_len - extra); + + /* Hurray, we have a new message! */ + del_timer(&strp->rx_msg_timer); + strp->rx_skb_head = NULL; + STRP_STATS_INCR(strp->stats.rx_msgs); + + /* Give skb to upper layer */ + strp->cb.rcv_msg(strp, head); + + if (unlikely(strp->rx_paused)) { + /* Upper layer paused strp */ + break; + } + } + + if (cloned_orig) + kfree_skb(orig_skb); + + STRP_STATS_ADD(strp->stats.rx_bytes, eaten); + + return eaten; +} + +static int default_read_sock_done(struct strparser *strp, int err) +{ + return err; +} + +/* Called with lock held on lower socket */ +static int strp_tcp_read_sock(struct strparser *strp) +{ + read_descriptor_t desc; + + desc.arg.data = strp; + desc.error = 0; + desc.count = 1; /* give more than one skb per call */ + + /* sk should be locked here, so okay to do tcp_read_sock */ + tcp_read_sock(strp->sk, &desc, strp_tcp_recv); + + desc.error = strp->cb.read_sock_done(strp, desc.error); + + return desc.error; +} + +/* Lower sock lock held */ +void strp_tcp_data_ready(struct strparser *strp) +{ + struct sock *csk = strp->sk; + + if (unlikely(strp->rx_stopped)) + return; + + /* This check is needed to synchronize with do_strp_rx_work. + * do_strp_rx_work acquires a process lock (lock_sock) whereas + * the lock held here is bh_lock_sock. The two locks can be + * held by different threads at the same time, but bh_lock_sock + * allows a thread in BH context to safely check if the process + * lock is held. In this case, if the lock is held, queue work. + */ + if (sock_owned_by_user(csk)) { + queue_work(strp_wq, &strp->rx_work); + return; + } + + if (strp->rx_paused) + return; + + if (strp->rx_need_bytes) { + if (tcp_inq(csk) >= strp->rx_need_bytes) + strp->rx_need_bytes = 0; + else + return; + } + + if (strp_tcp_read_sock(strp) == -ENOMEM) + queue_work(strp_wq, &strp->rx_work); +} +EXPORT_SYMBOL_GPL(strp_tcp_data_ready); + +static void do_strp_rx_work(struct strparser *strp) +{ + read_descriptor_t rd_desc; + struct sock *csk = strp->sk; + + /* We need the read lock to synchronize with strp_tcp_data_ready. We + * need the socket lock for calling tcp_read_sock. + */ + lock_sock(csk); + + if (unlikely(csk->sk_user_data != strp)) + goto out; + + if (unlikely(strp->rx_stopped)) + goto out; + + if (strp->rx_paused) + goto out; + + rd_desc.arg.data = strp; + + if (strp_tcp_read_sock(strp) == -ENOMEM) + queue_work(strp_wq, &strp->rx_work); + +out: + release_sock(csk); +} + +static void strp_rx_work(struct work_struct *w) +{ + do_strp_rx_work(container_of(w, struct strparser, rx_work)); +} + +static void strp_rx_msg_timeout(unsigned long arg) +{ + struct strparser *strp = (struct strparser *)arg; + + /* Message assembly timed out */ + STRP_STATS_INCR(strp->stats.rx_msg_timeouts); + lock_sock(strp->sk); + strp->cb.abort_parser(strp, ETIMEDOUT); + release_sock(strp->sk); +} + +int strp_init(struct strparser *strp, struct sock *csk, + struct strp_callbacks *cb) +{ + if (!cb || !cb->rcv_msg || !cb->parse_msg) + return -EINVAL; + + memset(strp, 0, sizeof(*strp)); + + strp->sk = csk; + + setup_timer(&strp->rx_msg_timer, strp_rx_msg_timeout, + (unsigned long)strp); + + INIT_WORK(&strp->rx_work, strp_rx_work); + + strp->cb.rcv_msg = cb->rcv_msg; + strp->cb.parse_msg = cb->parse_msg; + strp->cb.read_sock_done = cb->read_sock_done ? : default_read_sock_done; + strp->cb.abort_parser = cb->abort_parser ? : strp_abort_rx_strp; + + return 0; +} +EXPORT_SYMBOL_GPL(strp_init); + +/* strp must already be stopped so that strp_tcp_recv will no longer be called. + * Note that strp_done is not called with the lower socket held. + */ +void strp_done(struct strparser *strp) +{ + WARN_ON(!strp->rx_stopped); + + del_timer_sync(&strp->rx_msg_timer); + cancel_work_sync(&strp->rx_work); + + if (strp->rx_skb_head) { + kfree_skb(strp->rx_skb_head); + strp->rx_skb_head = NULL; + } +} +EXPORT_SYMBOL_GPL(strp_done); + +void strp_stop(struct strparser *strp) +{ + strp->rx_stopped = 1; +} +EXPORT_SYMBOL_GPL(strp_stop); + +void strp_check_rcv(struct strparser *strp) +{ + queue_work(strp_wq, &strp->rx_work); +} +EXPORT_SYMBOL_GPL(strp_check_rcv); + +static int __init strp_mod_init(void) +{ + strp_wq = create_singlethread_workqueue("kstrp"); + + return 0; +} + +static void __exit strp_mod_exit(void) +{ +} +module_init(strp_mod_init); +module_exit(strp_mod_exit); +MODULE_LICENSE("GPL"); From 9b73896a81dc68a638a011877b7344b252f92276 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 15 Aug 2016 14:51:02 -0700 Subject: [PATCH 0254/1715] kcm: Use stream parser Adapt KCM to use the stream parser. This mostly involves removing the RX handling and setting up the strparser using the interface. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/kcm.h | 37 +--- net/ipv6/ila/ila_common.c | 1 - net/kcm/Kconfig | 1 + net/kcm/kcmproc.c | 44 ++-- net/kcm/kcmsock.c | 456 +++++++------------------------------- 5 files changed, 116 insertions(+), 423 deletions(-) diff --git a/include/net/kcm.h b/include/net/kcm.h index 2840b5825dcc..2a8965819db0 100644 --- a/include/net/kcm.h +++ b/include/net/kcm.h @@ -13,6 +13,7 @@ #include #include +#include #include extern unsigned int kcm_net_id; @@ -21,16 +22,8 @@ extern unsigned int kcm_net_id; #define KCM_STATS_INCR(stat) ((stat)++) struct kcm_psock_stats { - unsigned long long rx_msgs; - unsigned long long rx_bytes; unsigned long long tx_msgs; unsigned long long tx_bytes; - unsigned int rx_aborts; - unsigned int rx_mem_fail; - unsigned int rx_need_more_hdr; - unsigned int rx_msg_too_big; - unsigned int rx_msg_timeouts; - unsigned int rx_bad_hdr_len; unsigned long long reserved; unsigned long long unreserved; unsigned int tx_aborts; @@ -64,13 +57,6 @@ struct kcm_tx_msg { struct sk_buff *last_skb; }; -struct kcm_rx_msg { - int full_len; - int accum_len; - int offset; - int early_eaten; -}; - /* Socket structure for KCM client sockets */ struct kcm_sock { struct sock sk; @@ -87,6 +73,7 @@ struct kcm_sock { struct work_struct tx_work; struct list_head wait_psock_list; struct sk_buff *seq_skb; + u32 tx_stopped : 1; /* Don't use bit fields here, these are set under different locks */ bool tx_wait; @@ -104,11 +91,11 @@ struct bpf_prog; /* Structure for an attached lower socket */ struct kcm_psock { struct sock *sk; + struct strparser strp; struct kcm_mux *mux; int index; u32 tx_stopped : 1; - u32 rx_stopped : 1; u32 done : 1; u32 unattaching : 1; @@ -121,18 +108,12 @@ struct kcm_psock { struct kcm_psock_stats stats; /* Receive */ - struct sk_buff *rx_skb_head; - struct sk_buff **rx_skb_nextp; - struct sk_buff *ready_rx_msg; struct list_head psock_ready_list; - struct work_struct rx_work; - struct delayed_work rx_delayed_work; struct bpf_prog *bpf_prog; struct kcm_sock *rx_kcm; unsigned long long saved_rx_bytes; unsigned long long saved_rx_msgs; - struct timer_list rx_msg_timer; - unsigned int rx_need_bytes; + struct sk_buff *ready_rx_msg; /* Transmit */ struct kcm_sock *tx_kcm; @@ -146,6 +127,7 @@ struct kcm_net { struct mutex mutex; struct kcm_psock_stats aggregate_psock_stats; struct kcm_mux_stats aggregate_mux_stats; + struct strp_aggr_stats aggregate_strp_stats; struct list_head mux_list; int count; }; @@ -163,6 +145,7 @@ struct kcm_mux { struct kcm_mux_stats stats; struct kcm_psock_stats aggregate_psock_stats; + struct strp_aggr_stats aggregate_strp_stats; /* Receive */ spinlock_t rx_lock ____cacheline_aligned_in_smp; @@ -190,14 +173,6 @@ static inline void aggregate_psock_stats(struct kcm_psock_stats *stats, /* Save psock statistics in the mux when psock is being unattached. */ #define SAVE_PSOCK_STATS(_stat) (agg_stats->_stat += stats->_stat) - SAVE_PSOCK_STATS(rx_msgs); - SAVE_PSOCK_STATS(rx_bytes); - SAVE_PSOCK_STATS(rx_aborts); - SAVE_PSOCK_STATS(rx_mem_fail); - SAVE_PSOCK_STATS(rx_need_more_hdr); - SAVE_PSOCK_STATS(rx_msg_too_big); - SAVE_PSOCK_STATS(rx_msg_timeouts); - SAVE_PSOCK_STATS(rx_bad_hdr_len); SAVE_PSOCK_STATS(tx_msgs); SAVE_PSOCK_STATS(tx_bytes); SAVE_PSOCK_STATS(reserved); diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c index ec9efbcdad35..aba0998ddbfb 100644 --- a/net/ipv6/ila/ila_common.c +++ b/net/ipv6/ila/ila_common.c @@ -172,6 +172,5 @@ static void __exit ila_fini(void) module_init(ila_init); module_exit(ila_fini); -MODULE_ALIAS_RTNL_LWT(ILA); MODULE_AUTHOR("Tom Herbert "); MODULE_LICENSE("GPL"); diff --git a/net/kcm/Kconfig b/net/kcm/Kconfig index 5db94d940ecc..87fca36e6c47 100644 --- a/net/kcm/Kconfig +++ b/net/kcm/Kconfig @@ -3,6 +3,7 @@ config AF_KCM tristate "KCM sockets" depends on INET select BPF_SYSCALL + select STREAM_PARSER ---help--- KCM (Kernel Connection Multiplexor) sockets provide a method for multiplexing messages of a message based application diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c index 16c2e03bd388..47e445364f4f 100644 --- a/net/kcm/kcmproc.c +++ b/net/kcm/kcmproc.c @@ -155,8 +155,8 @@ static void kcm_format_psock(struct kcm_psock *psock, struct seq_file *seq, seq_printf(seq, " psock-%-5u %-10llu %-16llu %-10llu %-16llu %-8d %-8d %-8d %-8d ", psock->index, - psock->stats.rx_msgs, - psock->stats.rx_bytes, + psock->strp.stats.rx_msgs, + psock->strp.stats.rx_bytes, psock->stats.tx_msgs, psock->stats.tx_bytes, psock->sk->sk_receive_queue.qlen, @@ -170,9 +170,12 @@ static void kcm_format_psock(struct kcm_psock *psock, struct seq_file *seq, if (psock->tx_stopped) seq_puts(seq, "TxStop "); - if (psock->rx_stopped) + if (psock->strp.rx_stopped) seq_puts(seq, "RxStop "); + if (psock->strp.rx_paused) + seq_puts(seq, "RxPause "); + if (psock->tx_kcm) seq_printf(seq, "Rsvd-%d ", psock->tx_kcm->index); @@ -275,6 +278,7 @@ static int kcm_stats_seq_show(struct seq_file *seq, void *v) { struct kcm_psock_stats psock_stats; struct kcm_mux_stats mux_stats; + struct strp_aggr_stats strp_stats; struct kcm_mux *mux; struct kcm_psock *psock; struct net *net = seq->private; @@ -282,20 +286,28 @@ static int kcm_stats_seq_show(struct seq_file *seq, void *v) memset(&mux_stats, 0, sizeof(mux_stats)); memset(&psock_stats, 0, sizeof(psock_stats)); + memset(&strp_stats, 0, sizeof(strp_stats)); mutex_lock(&knet->mutex); aggregate_mux_stats(&knet->aggregate_mux_stats, &mux_stats); aggregate_psock_stats(&knet->aggregate_psock_stats, &psock_stats); + aggregate_strp_stats(&knet->aggregate_strp_stats, + &strp_stats); list_for_each_entry_rcu(mux, &knet->mux_list, kcm_mux_list) { spin_lock_bh(&mux->lock); aggregate_mux_stats(&mux->stats, &mux_stats); aggregate_psock_stats(&mux->aggregate_psock_stats, &psock_stats); - list_for_each_entry(psock, &mux->psocks, psock_list) + aggregate_strp_stats(&mux->aggregate_strp_stats, + &strp_stats); + list_for_each_entry(psock, &mux->psocks, psock_list) { aggregate_psock_stats(&psock->stats, &psock_stats); + save_strp_stats(&psock->strp, &strp_stats); + } + spin_unlock_bh(&mux->lock); } @@ -328,7 +340,7 @@ static int kcm_stats_seq_show(struct seq_file *seq, void *v) mux_stats.rx_ready_drops); seq_printf(seq, - "%-8s %-10s %-16s %-10s %-16s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s\n", + "%-8s %-10s %-16s %-10s %-16s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s\n", "Psock", "RX-Msgs", "RX-Bytes", @@ -337,6 +349,8 @@ static int kcm_stats_seq_show(struct seq_file *seq, void *v) "Reserved", "Unreserved", "RX-Aborts", + "RX-Intr", + "RX-Unrecov", "RX-MemFail", "RX-NeedMor", "RX-BadLen", @@ -345,20 +359,22 @@ static int kcm_stats_seq_show(struct seq_file *seq, void *v) "TX-Aborts"); seq_printf(seq, - "%-8s %-10llu %-16llu %-10llu %-16llu %-10llu %-10llu %-10u %-10u %-10u %-10u %-10u %-10u %-10u\n", + "%-8s %-10llu %-16llu %-10llu %-16llu %-10llu %-10llu %-10u %-10u %-10u %-10u %-10u %-10u %-10u %-10u %-10u\n", "", - psock_stats.rx_msgs, - psock_stats.rx_bytes, + strp_stats.rx_msgs, + strp_stats.rx_bytes, psock_stats.tx_msgs, psock_stats.tx_bytes, psock_stats.reserved, psock_stats.unreserved, - psock_stats.rx_aborts, - psock_stats.rx_mem_fail, - psock_stats.rx_need_more_hdr, - psock_stats.rx_bad_hdr_len, - psock_stats.rx_msg_too_big, - psock_stats.rx_msg_timeouts, + strp_stats.rx_aborts, + strp_stats.rx_interrupted, + strp_stats.rx_unrecov_intr, + strp_stats.rx_mem_fail, + strp_stats.rx_need_more_hdr, + strp_stats.rx_bad_hdr_len, + strp_stats.rx_msg_too_big, + strp_stats.rx_msg_timeouts, psock_stats.tx_aborts); return 0; diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index cb39e05b166c..eedbe404af35 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1,3 +1,13 @@ +/* + * Kernel Connection Multiplexor + * + * Copyright (c) 2016 Tom Herbert + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + */ + #include #include #include @@ -35,38 +45,12 @@ static inline struct kcm_tx_msg *kcm_tx_msg(struct sk_buff *skb) return (struct kcm_tx_msg *)skb->cb; } -static inline struct kcm_rx_msg *kcm_rx_msg(struct sk_buff *skb) -{ - return (struct kcm_rx_msg *)((void *)skb->cb + - offsetof(struct qdisc_skb_cb, data)); -} - static void report_csk_error(struct sock *csk, int err) { csk->sk_err = EPIPE; csk->sk_error_report(csk); } -/* Callback lock held */ -static void kcm_abort_rx_psock(struct kcm_psock *psock, int err, - struct sk_buff *skb) -{ - struct sock *csk = psock->sk; - - /* Unrecoverable error in receive */ - - del_timer(&psock->rx_msg_timer); - - if (psock->rx_stopped) - return; - - psock->rx_stopped = 1; - KCM_STATS_INCR(psock->stats.rx_aborts); - - /* Report an error on the lower socket */ - report_csk_error(csk, err); -} - static void kcm_abort_tx_psock(struct kcm_psock *psock, int err, bool wakeup_kcm) { @@ -109,12 +93,13 @@ static void kcm_abort_tx_psock(struct kcm_psock *psock, int err, static void kcm_update_rx_mux_stats(struct kcm_mux *mux, struct kcm_psock *psock) { - KCM_STATS_ADD(mux->stats.rx_bytes, - psock->stats.rx_bytes - psock->saved_rx_bytes); + STRP_STATS_ADD(mux->stats.rx_bytes, + psock->strp.stats.rx_bytes - + psock->saved_rx_bytes); mux->stats.rx_msgs += - psock->stats.rx_msgs - psock->saved_rx_msgs; - psock->saved_rx_msgs = psock->stats.rx_msgs; - psock->saved_rx_bytes = psock->stats.rx_bytes; + psock->strp.stats.rx_msgs - psock->saved_rx_msgs; + psock->saved_rx_msgs = psock->strp.stats.rx_msgs; + psock->saved_rx_bytes = psock->strp.stats.rx_bytes; } static void kcm_update_tx_mux_stats(struct kcm_mux *mux, @@ -167,11 +152,11 @@ static void kcm_rcv_ready(struct kcm_sock *kcm) */ list_del(&psock->psock_ready_list); psock->ready_rx_msg = NULL; - /* Commit clearing of ready_rx_msg for queuing work */ smp_mb(); - queue_work(kcm_wq, &psock->rx_work); + strp_unpause(&psock->strp); + strp_check_rcv(&psock->strp); } /* Buffer limit is okay now, add to ready list */ @@ -285,6 +270,7 @@ static struct kcm_sock *reserve_rx_kcm(struct kcm_psock *psock, if (list_empty(&mux->kcm_rx_waiters)) { psock->ready_rx_msg = head; + strp_pause(&psock->strp); list_add_tail(&psock->psock_ready_list, &mux->psocks_ready); spin_unlock_bh(&mux->rx_lock); @@ -353,276 +339,6 @@ static void unreserve_rx_kcm(struct kcm_psock *psock, spin_unlock_bh(&mux->rx_lock); } -static void kcm_start_rx_timer(struct kcm_psock *psock) -{ - if (psock->sk->sk_rcvtimeo) - mod_timer(&psock->rx_msg_timer, psock->sk->sk_rcvtimeo); -} - -/* Macro to invoke filter function. */ -#define KCM_RUN_FILTER(prog, ctx) \ - (*prog->bpf_func)(ctx, prog->insnsi) - -/* Lower socket lock held */ -static int kcm_tcp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, - unsigned int orig_offset, size_t orig_len) -{ - struct kcm_psock *psock = (struct kcm_psock *)desc->arg.data; - struct kcm_rx_msg *rxm; - struct kcm_sock *kcm; - struct sk_buff *head, *skb; - size_t eaten = 0, cand_len; - ssize_t extra; - int err; - bool cloned_orig = false; - - if (psock->ready_rx_msg) - return 0; - - head = psock->rx_skb_head; - if (head) { - /* Message already in progress */ - - rxm = kcm_rx_msg(head); - if (unlikely(rxm->early_eaten)) { - /* Already some number of bytes on the receive sock - * data saved in rx_skb_head, just indicate they - * are consumed. - */ - eaten = orig_len <= rxm->early_eaten ? - orig_len : rxm->early_eaten; - rxm->early_eaten -= eaten; - - return eaten; - } - - if (unlikely(orig_offset)) { - /* Getting data with a non-zero offset when a message is - * in progress is not expected. If it does happen, we - * need to clone and pull since we can't deal with - * offsets in the skbs for a message expect in the head. - */ - orig_skb = skb_clone(orig_skb, GFP_ATOMIC); - if (!orig_skb) { - KCM_STATS_INCR(psock->stats.rx_mem_fail); - desc->error = -ENOMEM; - return 0; - } - if (!pskb_pull(orig_skb, orig_offset)) { - KCM_STATS_INCR(psock->stats.rx_mem_fail); - kfree_skb(orig_skb); - desc->error = -ENOMEM; - return 0; - } - cloned_orig = true; - orig_offset = 0; - } - - if (!psock->rx_skb_nextp) { - /* We are going to append to the frags_list of head. - * Need to unshare the frag_list. - */ - err = skb_unclone(head, GFP_ATOMIC); - if (err) { - KCM_STATS_INCR(psock->stats.rx_mem_fail); - desc->error = err; - return 0; - } - - if (unlikely(skb_shinfo(head)->frag_list)) { - /* We can't append to an sk_buff that already - * has a frag_list. We create a new head, point - * the frag_list of that to the old head, and - * then are able to use the old head->next for - * appending to the message. - */ - if (WARN_ON(head->next)) { - desc->error = -EINVAL; - return 0; - } - - skb = alloc_skb(0, GFP_ATOMIC); - if (!skb) { - KCM_STATS_INCR(psock->stats.rx_mem_fail); - desc->error = -ENOMEM; - return 0; - } - skb->len = head->len; - skb->data_len = head->len; - skb->truesize = head->truesize; - *kcm_rx_msg(skb) = *kcm_rx_msg(head); - psock->rx_skb_nextp = &head->next; - skb_shinfo(skb)->frag_list = head; - psock->rx_skb_head = skb; - head = skb; - } else { - psock->rx_skb_nextp = - &skb_shinfo(head)->frag_list; - } - } - } - - while (eaten < orig_len) { - /* Always clone since we will consume something */ - skb = skb_clone(orig_skb, GFP_ATOMIC); - if (!skb) { - KCM_STATS_INCR(psock->stats.rx_mem_fail); - desc->error = -ENOMEM; - break; - } - - cand_len = orig_len - eaten; - - head = psock->rx_skb_head; - if (!head) { - head = skb; - psock->rx_skb_head = head; - /* Will set rx_skb_nextp on next packet if needed */ - psock->rx_skb_nextp = NULL; - rxm = kcm_rx_msg(head); - memset(rxm, 0, sizeof(*rxm)); - rxm->offset = orig_offset + eaten; - } else { - /* Unclone since we may be appending to an skb that we - * already share a frag_list with. - */ - err = skb_unclone(skb, GFP_ATOMIC); - if (err) { - KCM_STATS_INCR(psock->stats.rx_mem_fail); - desc->error = err; - break; - } - - rxm = kcm_rx_msg(head); - *psock->rx_skb_nextp = skb; - psock->rx_skb_nextp = &skb->next; - head->data_len += skb->len; - head->len += skb->len; - head->truesize += skb->truesize; - } - - if (!rxm->full_len) { - ssize_t len; - - len = KCM_RUN_FILTER(psock->bpf_prog, head); - - if (!len) { - /* Need more header to determine length */ - if (!rxm->accum_len) { - /* Start RX timer for new message */ - kcm_start_rx_timer(psock); - } - rxm->accum_len += cand_len; - eaten += cand_len; - KCM_STATS_INCR(psock->stats.rx_need_more_hdr); - WARN_ON(eaten != orig_len); - break; - } else if (len > psock->sk->sk_rcvbuf) { - /* Message length exceeds maximum allowed */ - KCM_STATS_INCR(psock->stats.rx_msg_too_big); - desc->error = -EMSGSIZE; - psock->rx_skb_head = NULL; - kcm_abort_rx_psock(psock, EMSGSIZE, head); - break; - } else if (len <= (ssize_t)head->len - - skb->len - rxm->offset) { - /* Length must be into new skb (and also - * greater than zero) - */ - KCM_STATS_INCR(psock->stats.rx_bad_hdr_len); - desc->error = -EPROTO; - psock->rx_skb_head = NULL; - kcm_abort_rx_psock(psock, EPROTO, head); - break; - } - - rxm->full_len = len; - } - - extra = (ssize_t)(rxm->accum_len + cand_len) - rxm->full_len; - - if (extra < 0) { - /* Message not complete yet. */ - if (rxm->full_len - rxm->accum_len > - tcp_inq(psock->sk)) { - /* Don't have the whole messages in the socket - * buffer. Set psock->rx_need_bytes to wait for - * the rest of the message. Also, set "early - * eaten" since we've already buffered the skb - * but don't consume yet per tcp_read_sock. - */ - - if (!rxm->accum_len) { - /* Start RX timer for new message */ - kcm_start_rx_timer(psock); - } - - psock->rx_need_bytes = rxm->full_len - - rxm->accum_len; - rxm->accum_len += cand_len; - rxm->early_eaten = cand_len; - KCM_STATS_ADD(psock->stats.rx_bytes, cand_len); - desc->count = 0; /* Stop reading socket */ - break; - } - rxm->accum_len += cand_len; - eaten += cand_len; - WARN_ON(eaten != orig_len); - break; - } - - /* Positive extra indicates ore bytes than needed for the - * message - */ - - WARN_ON(extra > cand_len); - - eaten += (cand_len - extra); - - /* Hurray, we have a new message! */ - del_timer(&psock->rx_msg_timer); - psock->rx_skb_head = NULL; - KCM_STATS_INCR(psock->stats.rx_msgs); - -try_queue: - kcm = reserve_rx_kcm(psock, head); - if (!kcm) { - /* Unable to reserve a KCM, message is held in psock. */ - break; - } - - if (kcm_queue_rcv_skb(&kcm->sk, head)) { - /* Should mean socket buffer full */ - unreserve_rx_kcm(psock, false); - goto try_queue; - } - } - - if (cloned_orig) - kfree_skb(orig_skb); - - KCM_STATS_ADD(psock->stats.rx_bytes, eaten); - - return eaten; -} - -/* Called with lock held on lower socket */ -static int psock_tcp_read_sock(struct kcm_psock *psock) -{ - read_descriptor_t desc; - - desc.arg.data = psock; - desc.error = 0; - desc.count = 1; /* give more than one skb per call */ - - /* sk should be locked here, so okay to do tcp_read_sock */ - tcp_read_sock(psock->sk, &desc, kcm_tcp_recv); - - unreserve_rx_kcm(psock, true); - - return desc.error; -} - /* Lower sock lock held */ static void psock_tcp_data_ready(struct sock *sk) { @@ -631,65 +347,49 @@ static void psock_tcp_data_ready(struct sock *sk) read_lock_bh(&sk->sk_callback_lock); psock = (struct kcm_psock *)sk->sk_user_data; - if (unlikely(!psock || psock->rx_stopped)) - goto out; + if (likely(psock)) + strp_tcp_data_ready(&psock->strp); - if (psock->ready_rx_msg) - goto out; - - if (psock->rx_need_bytes) { - if (tcp_inq(sk) >= psock->rx_need_bytes) - psock->rx_need_bytes = 0; - else - goto out; - } - - if (psock_tcp_read_sock(psock) == -ENOMEM) - queue_delayed_work(kcm_wq, &psock->rx_delayed_work, 0); - -out: read_unlock_bh(&sk->sk_callback_lock); } -static void do_psock_rx_work(struct kcm_psock *psock) +/* Called with lower sock held */ +static void kcm_rcv_strparser(struct strparser *strp, struct sk_buff *skb) { - read_descriptor_t rd_desc; - struct sock *csk = psock->sk; + struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp); + struct kcm_sock *kcm; - /* We need the read lock to synchronize with psock_tcp_data_ready. We - * need the socket lock for calling tcp_read_sock. - */ - lock_sock(csk); - read_lock_bh(&csk->sk_callback_lock); +try_queue: + kcm = reserve_rx_kcm(psock, skb); + if (!kcm) { + /* Unable to reserve a KCM, message is held in psock and strp + * is paused. + */ + return; + } - if (unlikely(csk->sk_user_data != psock)) - goto out; - - if (unlikely(psock->rx_stopped)) - goto out; - - if (psock->ready_rx_msg) - goto out; - - rd_desc.arg.data = psock; - - if (psock_tcp_read_sock(psock) == -ENOMEM) - queue_delayed_work(kcm_wq, &psock->rx_delayed_work, 0); - -out: - read_unlock_bh(&csk->sk_callback_lock); - release_sock(csk); + if (kcm_queue_rcv_skb(&kcm->sk, skb)) { + /* Should mean socket buffer full */ + unreserve_rx_kcm(psock, false); + goto try_queue; + } } -static void psock_rx_work(struct work_struct *w) +static int kcm_parse_func_strparser(struct strparser *strp, struct sk_buff *skb) { - do_psock_rx_work(container_of(w, struct kcm_psock, rx_work)); + struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp); + struct bpf_prog *prog = psock->bpf_prog; + + return (*prog->bpf_func)(skb, prog->insnsi); } -static void psock_rx_delayed_work(struct work_struct *w) +static int kcm_read_sock_done(struct strparser *strp, int err) { - do_psock_rx_work(container_of(w, struct kcm_psock, - rx_delayed_work.work)); + struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp); + + unreserve_rx_kcm(psock, true); + + return err; } static void psock_tcp_state_change(struct sock *sk) @@ -713,14 +413,13 @@ static void psock_tcp_write_space(struct sock *sk) psock = (struct kcm_psock *)sk->sk_user_data; if (unlikely(!psock)) goto out; - mux = psock->mux; spin_lock_bh(&mux->lock); /* Check if the socket is reserved so someone is waiting for sending. */ kcm = psock->tx_kcm; - if (kcm) + if (kcm && !unlikely(kcm->tx_stopped)) queue_work(kcm_wq, &kcm->tx_work); spin_unlock_bh(&mux->lock); @@ -1411,7 +1110,7 @@ static int kcm_recvmsg(struct socket *sock, struct msghdr *msg, struct kcm_sock *kcm = kcm_sk(sk); int err = 0; long timeo; - struct kcm_rx_msg *rxm; + struct strp_rx_msg *rxm; int copied = 0; struct sk_buff *skb; @@ -1425,7 +1124,7 @@ static int kcm_recvmsg(struct socket *sock, struct msghdr *msg, /* Okay, have a message on the receive queue */ - rxm = kcm_rx_msg(skb); + rxm = strp_rx_msg(skb); if (len > rxm->full_len) len = rxm->full_len; @@ -1481,7 +1180,7 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos, struct sock *sk = sock->sk; struct kcm_sock *kcm = kcm_sk(sk); long timeo; - struct kcm_rx_msg *rxm; + struct strp_rx_msg *rxm; int err = 0; ssize_t copied; struct sk_buff *skb; @@ -1498,7 +1197,7 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos, /* Okay, have a message on the receive queue */ - rxm = kcm_rx_msg(skb); + rxm = strp_rx_msg(skb); if (len > rxm->full_len) len = rxm->full_len; @@ -1674,15 +1373,6 @@ static void init_kcm_sock(struct kcm_sock *kcm, struct kcm_mux *mux) spin_unlock_bh(&mux->rx_lock); } -static void kcm_rx_msg_timeout(unsigned long arg) -{ - struct kcm_psock *psock = (struct kcm_psock *)arg; - - /* Message assembly timed out */ - KCM_STATS_INCR(psock->stats.rx_msg_timeouts); - kcm_abort_rx_psock(psock, ETIMEDOUT, NULL); -} - static int kcm_attach(struct socket *sock, struct socket *csock, struct bpf_prog *prog) { @@ -1692,6 +1382,7 @@ static int kcm_attach(struct socket *sock, struct socket *csock, struct kcm_psock *psock = NULL, *tpsock; struct list_head *head; int index = 0; + struct strp_callbacks cb; if (csock->ops->family != PF_INET && csock->ops->family != PF_INET6) @@ -1713,11 +1404,12 @@ static int kcm_attach(struct socket *sock, struct socket *csock, psock->sk = csk; psock->bpf_prog = prog; - setup_timer(&psock->rx_msg_timer, kcm_rx_msg_timeout, - (unsigned long)psock); + cb.rcv_msg = kcm_rcv_strparser; + cb.abort_parser = NULL; + cb.parse_msg = kcm_parse_func_strparser; + cb.read_sock_done = kcm_read_sock_done; - INIT_WORK(&psock->rx_work, psock_rx_work); - INIT_DELAYED_WORK(&psock->rx_delayed_work, psock_rx_delayed_work); + strp_init(&psock->strp, csk, &cb); sock_hold(csk); @@ -1750,7 +1442,7 @@ static int kcm_attach(struct socket *sock, struct socket *csock, spin_unlock_bh(&mux->lock); /* Schedule RX work in case there are already bytes queued */ - queue_work(kcm_wq, &psock->rx_work); + strp_check_rcv(&psock->strp); return 0; } @@ -1785,6 +1477,7 @@ out: return err; } +/* Lower socket lock held */ static void kcm_unattach(struct kcm_psock *psock) { struct sock *csk = psock->sk; @@ -1798,7 +1491,7 @@ static void kcm_unattach(struct kcm_psock *psock) csk->sk_data_ready = psock->save_data_ready; csk->sk_write_space = psock->save_write_space; csk->sk_state_change = psock->save_state_change; - psock->rx_stopped = 1; + strp_stop(&psock->strp); if (WARN_ON(psock->rx_kcm)) { write_unlock_bh(&csk->sk_callback_lock); @@ -1821,18 +1514,14 @@ static void kcm_unattach(struct kcm_psock *psock) write_unlock_bh(&csk->sk_callback_lock); - del_timer_sync(&psock->rx_msg_timer); - cancel_work_sync(&psock->rx_work); - cancel_delayed_work_sync(&psock->rx_delayed_work); + strp_done(&psock->strp); bpf_prog_put(psock->bpf_prog); - kfree_skb(psock->rx_skb_head); - psock->rx_skb_head = NULL; - spin_lock_bh(&mux->lock); aggregate_psock_stats(&psock->stats, &mux->aggregate_psock_stats); + save_strp_stats(&psock->strp, &mux->aggregate_strp_stats); KCM_STATS_INCR(mux->stats.psock_unattach); @@ -1915,6 +1604,7 @@ static int kcm_unattach_ioctl(struct socket *sock, struct kcm_unattach *info) spin_unlock_bh(&mux->lock); + /* Lower socket lock should already be held */ kcm_unattach(psock); err = 0; @@ -2059,8 +1749,11 @@ static void release_mux(struct kcm_mux *mux) /* Release psocks */ list_for_each_entry_safe(psock, tmp_psock, &mux->psocks, psock_list) { - if (!WARN_ON(psock->unattaching)) + if (!WARN_ON(psock->unattaching)) { + lock_sock(psock->strp.sk); kcm_unattach(psock); + release_sock(psock->strp.sk); + } } if (WARN_ON(mux->psocks_cnt)) @@ -2072,6 +1765,8 @@ static void release_mux(struct kcm_mux *mux) aggregate_mux_stats(&mux->stats, &knet->aggregate_mux_stats); aggregate_psock_stats(&mux->aggregate_psock_stats, &knet->aggregate_psock_stats); + aggregate_strp_stats(&mux->aggregate_strp_stats, + &knet->aggregate_strp_stats); list_del_rcu(&mux->kcm_mux_list); knet->count--; mutex_unlock(&knet->mutex); @@ -2151,6 +1846,13 @@ static int kcm_release(struct socket *sock) * it will just return. */ __skb_queue_purge(&sk->sk_write_queue); + + /* Set tx_stopped. This is checked when psock is bound to a kcm and we + * get a writespace callback. This prevents further work being queued + * from the callback (unbinding the psock occurs after canceling work. + */ + kcm->tx_stopped = 1; + release_sock(sk); spin_lock_bh(&mux->lock); From adcce4d5dd46d9356c1c9a6515efc430e331fa69 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 15 Aug 2016 14:51:03 -0700 Subject: [PATCH 0255/1715] strparser: Documentation Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- Documentation/networking/strparser.txt | 136 +++++++++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 Documentation/networking/strparser.txt diff --git a/Documentation/networking/strparser.txt b/Documentation/networking/strparser.txt new file mode 100644 index 000000000000..a0bf573dfa61 --- /dev/null +++ b/Documentation/networking/strparser.txt @@ -0,0 +1,136 @@ +Stream Parser +------------- + +The stream parser (strparser) is a utility that parses messages of an +application layer protocol running over a TCP connection. The stream +parser works in conjunction with an upper layer in the kernel to provide +kernel support for application layer messages. For instance, Kernel +Connection Multiplexor (KCM) uses the Stream Parser to parse messages +using a BPF program. + +Interface +--------- + +The API includes a context structure, a set of callbacks, utility +functions, and a data_ready function. The callbacks include +a parse_msg function that is called to perform parsing (e.g. +BPF parsing in case of KCM), and a rcv_msg function that is called +when a full message has been completed. + +A stream parser can be instantiated for a TCP connection. This is done +by: + +strp_init(struct strparser *strp, struct sock *csk, + struct strp_callbacks *cb) + +strp is a struct of type strparser that is allocated by the upper layer. +csk is the TCP socket associated with the stream parser. Callbacks are +called by the stream parser. + +Callbacks +--------- + +There are four callbacks: + +int (*parse_msg)(struct strparser *strp, struct sk_buff *skb); + + parse_msg is called to determine the length of the next message + in the stream. The upper layer must implement this function. It + should parse the sk_buff as containing the headers for the + next application layer messages in the stream. + + The skb->cb in the input skb is a struct strp_rx_msg. Only + the offset field is relevant in parse_msg and gives the offset + where the message starts in the skb. + + The return values of this function are: + + >0 : indicates length of successfully parsed message + 0 : indicates more data must be received to parse the message + -ESTRPIPE : current message should not be processed by the + kernel, return control of the socket to userspace which + can proceed to read the messages itself + other < 0 : Error is parsing, give control back to userspace + assuming that synchronization is lost and the stream + is unrecoverable (application expected to close TCP socket) + + In the case that an error is returned (return value is less than + zero) the stream parser will set the error on TCP socket and wake + it up. If parse_msg returned -ESTRPIPE and the stream parser had + previously read some bytes for the current message, then the error + set on the attached socket is ENODATA since the stream is + unrecoverable in that case. + +void (*rcv_msg)(struct strparser *strp, struct sk_buff *skb); + + rcv_msg is called when a full message has been received and + is queued. The callee must consume the sk_buff; it can + call strp_pause to prevent any further messages from being + received in rcv_msg (see strp_pause below). This callback + must be set. + + The skb->cb in the input skb is a struct strp_rx_msg. This + struct contains two fields: offset and full_len. Offset is + where the message starts in the skb, and full_len is the + the length of the message. skb->len - offset may be greater + then full_len since strparser does not trim the skb. + +int (*read_sock_done)(struct strparser *strp, int err); + + read_sock_done is called when the stream parser is done reading + the TCP socket. The stream parser may read multiple messages + in a loop and this function allows cleanup to occur when existing + the loop. If the callback is not set (NULL in strp_init) a + default function is used. + +void (*abort_parser)(struct strparser *strp, int err); + + This function is called when stream parser encounters an error + in parsing. The default function stops the stream parser for the + TCP socket and sets the error in the socket. The default function + can be changed by setting the callback to non-NULL in strp_init. + +Functions +--------- + +The upper layer calls strp_tcp_data_ready when data is ready on the lower +socket for strparser to process. This should be called from a data_ready +callback that is set on the socket. + +strp_stop is called to completely stop stream parser operations. This +is called internally when the stream parser encounters an error, and +it is called from the upper layer when unattaching a TCP socket. + +strp_done is called to unattach the stream parser from the TCP socket. +This must be called after the stream processor has be stopped. + +strp_check_rcv is called to check for new messages on the socket. This +is normally called at initialization of the a stream parser instance +of after strp_unpause. + +Statistics +---------- + +Various counters are kept for each stream parser for a TCP socket. +These are in the strp_stats structure. strp_aggr_stats is a convenience +structure for accumulating statistics for multiple stream parser +instances. save_strp_stats and aggregate_strp_stats are helper functions +to save and aggregate statistics. + +Message assembly limits +----------------------- + +The stream parser provide mechanisms to limit the resources consumed by +message assembly. + +A timer is set when assembly starts for a new message. The message +timeout is taken from rcvtime for the associated TCP socket. If the +timer fires before assembly completes the stream parser is aborted +and the ETIMEDOUT error is set on the TCP socket. + +Message length is limited to the receive buffer size of the associated +TCP socket. If the length returned by parse_msg is greater than +the socket buffer size then the stream parser is aborted with +EMSGSIZE error set on the TCP socket. Note that this makes the +maximum size of receive skbuffs for a socket with a stream parser +to be 2*sk_rcvbuf of the TCP socket. From 83b502a12e82d0ae97907d415496fbafe044f0ce Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Thu, 4 Aug 2016 17:32:02 +0300 Subject: [PATCH 0256/1715] net/mlx5: Modify RQ bitmask from mlx5 ifc Use mlx5 ifc MODIFY_BITMASK_VSD in mlx5e_modify_rq_vsd and expose counter set capability bit in hca caps structure. Signed-off-by: Alex Vesker Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 3 ++- include/linux/mlx5/driver.h | 4 ---- include/linux/mlx5/mlx5_ifc.h | 9 ++++++++- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 10fa12aa063f..9e36c1538904 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -489,7 +489,8 @@ static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd) rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); MLX5_SET(modify_rq_in, in, rq_state, MLX5_RQC_STATE_RDY); - MLX5_SET64(modify_rq_in, in, modify_bitmask, MLX5_RQ_BITMASK_VSD); + MLX5_SET64(modify_rq_in, in, modify_bitmask, + MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD); MLX5_SET(rqc, rqc, vsd, vsd); MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RDY); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index ebe57abf3324..0ea78b5edbb2 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -48,10 +48,6 @@ #include #include -enum { - MLX5_RQ_BITMASK_VSD = 1 << 1, -}; - enum { MLX5_BOARD_ID_LEN = 64, MLX5_MAX_NAME_LEN = 16, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 3766110e13ea..e1f8e3491867 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -779,7 +779,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 out_of_seq_cnt[0x1]; u8 vport_counters[0x1]; u8 retransmission_q_counters[0x1]; - u8 reserved_at_183[0x3]; + u8 reserved_at_183[0x1]; + u8 modify_rq_counter_set_id[0x1]; + u8 reserved_at_185[0x1]; u8 max_qp_cnt[0xa]; u8 pkey_table_size[0x10]; @@ -4750,6 +4752,11 @@ struct mlx5_ifc_modify_rq_out_bits { u8 reserved_at_40[0x40]; }; +enum { + MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD = 1ULL << 1, + MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_MODIFY_RQ_COUNTER_SET_ID = 1ULL << 3, +}; + struct mlx5_ifc_modify_rq_in_bits { u8 opcode[0x10]; u8 reserved_at_10[0x10]; From 2e353b3468ecb1d12a44aaf35888f7de47d5c047 Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Sun, 3 Jul 2016 14:57:33 +0300 Subject: [PATCH 0257/1715] net/mlx5: Update struct mlx5_ifc_xrqc_bits Update struct mlx5_ifc_xrqc_bits according to last specification Signed-off-by: Artemy Kovalyov Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index e1f8e3491867..5f150c849a8f 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2829,7 +2829,7 @@ struct mlx5_ifc_xrqc_bits { struct mlx5_ifc_tag_matching_topology_context_bits tag_matching_topology_context; - u8 reserved_at_180[0x180]; + u8 reserved_at_180[0x200]; struct mlx5_ifc_wq_bits wq; }; From 8cca30a7f914fe363fa9700715619ca5c8cb38cc Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Sun, 26 Jun 2016 12:43:24 +0300 Subject: [PATCH 0258/1715] net/mlx5: Expose mlx5e_link_mode The mlx5e_link_mode enumeration will also be used in mlx5_ib for RoCE. This patch moves the enumeration to the mlx5 driver port header file. Signed-off-by: Noa Osherovich Signed-off-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 34 -------------------- include/linux/mlx5/port.h | 33 +++++++++++++++++++ 2 files changed, 33 insertions(+), 34 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 1b495efa7490..61902b147339 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -651,40 +651,6 @@ struct mlx5e_priv { void *ppriv; }; -enum mlx5e_link_mode { - MLX5E_1000BASE_CX_SGMII = 0, - MLX5E_1000BASE_KX = 1, - MLX5E_10GBASE_CX4 = 2, - MLX5E_10GBASE_KX4 = 3, - MLX5E_10GBASE_KR = 4, - MLX5E_20GBASE_KR2 = 5, - MLX5E_40GBASE_CR4 = 6, - MLX5E_40GBASE_KR4 = 7, - MLX5E_56GBASE_R4 = 8, - MLX5E_10GBASE_CR = 12, - MLX5E_10GBASE_SR = 13, - MLX5E_10GBASE_ER = 14, - MLX5E_40GBASE_SR4 = 15, - MLX5E_40GBASE_LR4 = 16, - MLX5E_50GBASE_SR2 = 18, - MLX5E_100GBASE_CR4 = 20, - MLX5E_100GBASE_SR4 = 21, - MLX5E_100GBASE_KR4 = 22, - MLX5E_100GBASE_LR4 = 23, - MLX5E_100BASE_TX = 24, - MLX5E_1000BASE_T = 25, - MLX5E_10GBASE_T = 26, - MLX5E_25GBASE_CR = 27, - MLX5E_25GBASE_KR = 28, - MLX5E_25GBASE_SR = 29, - MLX5E_50GBASE_CR2 = 30, - MLX5E_50GBASE_KR2 = 31, - MLX5E_LINK_MODES_NUMBER, -}; - -#define MLX5E_PROT_MASK(link_mode) (1 << link_mode) - - void mlx5e_build_ptys2ethtool_map(void); void mlx5e_send_nop(struct mlx5e_sq *sq, bool notify_hw); diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index e3012cc64b8a..6f876a4770f6 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -61,6 +61,39 @@ enum mlx5_an_status { #define MLX5_I2C_ADDR_HIGH 0x51 #define MLX5_EEPROM_PAGE_LENGTH 256 +enum mlx5e_link_mode { + MLX5E_1000BASE_CX_SGMII = 0, + MLX5E_1000BASE_KX = 1, + MLX5E_10GBASE_CX4 = 2, + MLX5E_10GBASE_KX4 = 3, + MLX5E_10GBASE_KR = 4, + MLX5E_20GBASE_KR2 = 5, + MLX5E_40GBASE_CR4 = 6, + MLX5E_40GBASE_KR4 = 7, + MLX5E_56GBASE_R4 = 8, + MLX5E_10GBASE_CR = 12, + MLX5E_10GBASE_SR = 13, + MLX5E_10GBASE_ER = 14, + MLX5E_40GBASE_SR4 = 15, + MLX5E_40GBASE_LR4 = 16, + MLX5E_50GBASE_SR2 = 18, + MLX5E_100GBASE_CR4 = 20, + MLX5E_100GBASE_SR4 = 21, + MLX5E_100GBASE_KR4 = 22, + MLX5E_100GBASE_LR4 = 23, + MLX5E_100BASE_TX = 24, + MLX5E_1000BASE_T = 25, + MLX5E_10GBASE_T = 26, + MLX5E_25GBASE_CR = 27, + MLX5E_25GBASE_KR = 28, + MLX5E_25GBASE_SR = 29, + MLX5E_50GBASE_CR2 = 30, + MLX5E_50GBASE_KR2 = 31, + MLX5E_LINK_MODES_NUMBER, +}; + +#define MLX5E_PROT_MASK(link_mode) (1 << link_mode) + int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps); int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys, int ptys_size, int proto_mask, u8 local_port); From d5beb7f2aff4a60237fd97a98d49a78c9045b8f2 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Thu, 2 Jun 2016 10:47:53 +0300 Subject: [PATCH 0259/1715] net/mlx5: Separate query_port_proto_oper for IB and EN Replaced mlx5_query_port_proto_oper with separate functions per link type. The functions should take different arguments so no point in trying to unite them. Signed-off-by: Noa Osherovich Signed-off-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/main.c | 3 +- .../net/ethernet/mellanox/mlx5/core/port.c | 32 +++++++++++++------ include/linux/mlx5/port.h | 7 ++-- 3 files changed, 28 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 6fb77d791045..f02a975320bd 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -748,8 +748,7 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port, &props->active_width); if (err) goto out; - err = mlx5_query_port_proto_oper(mdev, &props->active_speed, MLX5_PTYS_IB, - port); + err = mlx5_query_port_ib_proto_oper(mdev, &props->active_speed, port); if (err) goto out; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 2ee28ad8f918..34e7184e23c9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -175,25 +175,39 @@ int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev, } EXPORT_SYMBOL_GPL(mlx5_query_port_link_width_oper); -int mlx5_query_port_proto_oper(struct mlx5_core_dev *dev, - u8 *proto_oper, int proto_mask, - u8 local_port) +int mlx5_query_port_eth_proto_oper(struct mlx5_core_dev *dev, + u32 *proto_oper, u8 local_port) { u32 out[MLX5_ST_SZ_DW(ptys_reg)]; int err; - err = mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, local_port); + err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, + local_port); if (err) return err; - if (proto_mask == MLX5_PTYS_EN) - *proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper); - else - *proto_oper = MLX5_GET(ptys_reg, out, ib_proto_oper); + *proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper); return 0; } -EXPORT_SYMBOL_GPL(mlx5_query_port_proto_oper); +EXPORT_SYMBOL(mlx5_query_port_eth_proto_oper); + +int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev, + u8 *proto_oper, u8 local_port) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + int err; + + err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_IB, + local_port); + if (err) + return err; + + *proto_oper = MLX5_GET(ptys_reg, out, ib_proto_oper); + + return 0; +} +EXPORT_SYMBOL(mlx5_query_port_ib_proto_oper); int mlx5_set_port_ptys(struct mlx5_core_dev *dev, bool an_disable, u32 proto_admin, int proto_mask) diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index 6f876a4770f6..b3065acd20b4 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -103,9 +103,10 @@ int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev, u32 *proto_admin, int proto_mask); int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev, u8 *link_width_oper, u8 local_port); -int mlx5_query_port_proto_oper(struct mlx5_core_dev *dev, - u8 *proto_oper, int proto_mask, - u8 local_port); +int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev, + u8 *proto_oper, u8 local_port); +int mlx5_query_port_eth_proto_oper(struct mlx5_core_dev *dev, + u32 *proto_oper, u8 local_port); int mlx5_set_port_ptys(struct mlx5_core_dev *dev, bool an_disable, u32 proto_admin, int proto_mask); void mlx5_toggle_port_link(struct mlx5_core_dev *dev); From 84df61ebc69bdc466180e02d654e9b0284781288 Mon Sep 17 00:00:00 2001 From: Aviv Heller Date: Tue, 10 May 2016 13:47:50 +0300 Subject: [PATCH 0260/1715] net/mlx5: Add HW interfaces used by LAG Exposed LAG commands enum and layouts: - CREATE_LAG HW enters LAG mode: RoCE traffic from port two is received on PF0 core dev. Allows to set tx_affinity (tx port) for QPs and TISes. Allows to port remap QPs and TISes, overriding their tx_affinity behavior. - MODIFY_LAG Remap QPs and TISes to another port. - QUERY_LAG Query whether LAG mode is active. - DESTROY_LAG HW exits LAG mode, returning to non-LAG behavior. - CREATE_VPORT_LAG Merge Ethernet flow steering, such that traffic received on port two jumps to PF0 root flow table. Available only in LAG mode. - DESTROY_VPORT_LAG Ethernet flow steering returns to non-LAG behavior. Caps added: - lag_master Driver is in charge of managing the LAG. This is currently the only option. - num_lag_ports LAG is supported only if this field's value is 2. Other fields: - QP/TIS tx port affinity During LAG, this field controls on which port a QP or TIS resides. - TIS strict tx affinity When this field is set, the TIS will not be subject to port remap by CREATE_LAG/MODIFY_LAG. - LAG demux flow table Flow table used for redirecting non user-space traffic back to PF1 root flow table, if the packet was received on port two. Signed-off-by: Aviv Heller Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 12 ++ include/linux/mlx5/mlx5_ifc.h | 166 +++++++++++++++++- 2 files changed, 171 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 00bec609aae2..6388bc0d9b97 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -285,6 +285,8 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN: case MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT: case MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY: + case MLX5_CMD_OP_DESTROY_LAG: + case MLX5_CMD_OP_DESTROY_VPORT_LAG: case MLX5_CMD_OP_DESTROY_TIR: case MLX5_CMD_OP_DESTROY_SQ: case MLX5_CMD_OP_DESTROY_RQ: @@ -376,6 +378,10 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT: case MLX5_CMD_OP_SET_L2_TABLE_ENTRY: case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY: + case MLX5_CMD_OP_CREATE_LAG: + case MLX5_CMD_OP_MODIFY_LAG: + case MLX5_CMD_OP_QUERY_LAG: + case MLX5_CMD_OP_CREATE_VPORT_LAG: case MLX5_CMD_OP_CREATE_TIR: case MLX5_CMD_OP_MODIFY_TIR: case MLX5_CMD_OP_QUERY_TIR: @@ -514,6 +520,12 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(DELETE_L2_TABLE_ENTRY); MLX5_COMMAND_STR_CASE(SET_WOL_ROL); MLX5_COMMAND_STR_CASE(QUERY_WOL_ROL); + MLX5_COMMAND_STR_CASE(CREATE_LAG); + MLX5_COMMAND_STR_CASE(MODIFY_LAG); + MLX5_COMMAND_STR_CASE(QUERY_LAG); + MLX5_COMMAND_STR_CASE(DESTROY_LAG); + MLX5_COMMAND_STR_CASE(CREATE_VPORT_LAG); + MLX5_COMMAND_STR_CASE(DESTROY_VPORT_LAG); MLX5_COMMAND_STR_CASE(CREATE_TIR); MLX5_COMMAND_STR_CASE(MODIFY_TIR); MLX5_COMMAND_STR_CASE(DESTROY_TIR); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 5f150c849a8f..043d5256b754 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -174,6 +174,12 @@ enum { MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY = 0x82b, MLX5_CMD_OP_SET_WOL_ROL = 0x830, MLX5_CMD_OP_QUERY_WOL_ROL = 0x831, + MLX5_CMD_OP_CREATE_LAG = 0x840, + MLX5_CMD_OP_MODIFY_LAG = 0x841, + MLX5_CMD_OP_QUERY_LAG = 0x842, + MLX5_CMD_OP_DESTROY_LAG = 0x843, + MLX5_CMD_OP_CREATE_VPORT_LAG = 0x844, + MLX5_CMD_OP_DESTROY_VPORT_LAG = 0x845, MLX5_CMD_OP_CREATE_TIR = 0x900, MLX5_CMD_OP_MODIFY_TIR = 0x901, MLX5_CMD_OP_DESTROY_TIR = 0x902, @@ -884,7 +890,10 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 pad_tx_eth_packet[0x1]; u8 reserved_at_263[0x8]; u8 log_bf_reg_size[0x5]; - u8 reserved_at_270[0x10]; + + u8 reserved_at_270[0xb]; + u8 lag_master[0x1]; + u8 num_lag_ports[0x4]; u8 reserved_at_280[0x10]; u8 max_wqe_sz_sq[0x10]; @@ -1918,7 +1927,7 @@ enum { struct mlx5_ifc_qpc_bits { u8 state[0x4]; - u8 reserved_at_4[0x4]; + u8 lag_tx_port_affinity[0x4]; u8 st[0x8]; u8 reserved_at_10[0x3]; u8 pm_state[0x2]; @@ -2167,7 +2176,11 @@ struct mlx5_ifc_traffic_counter_bits { }; struct mlx5_ifc_tisc_bits { - u8 reserved_at_0[0xc]; + u8 strict_lag_tx_port_affinity[0x1]; + u8 reserved_at_1[0x3]; + u8 lag_tx_port_affinity[0x04]; + + u8 reserved_at_8[0x4]; u8 prio[0x4]; u8 reserved_at_10[0x10]; @@ -4617,7 +4630,9 @@ struct mlx5_ifc_modify_tis_out_bits { struct mlx5_ifc_modify_tis_bitmask_bits { u8 reserved_at_0[0x20]; - u8 reserved_at_20[0x1f]; + u8 reserved_at_20[0x1d]; + u8 lag_tx_port_affinity[0x1]; + u8 strict_lag_tx_port_affinity[0x1]; u8 prio[0x1]; }; @@ -6215,7 +6230,10 @@ struct mlx5_ifc_create_flow_table_in_bits { u8 reserved_at_e0[0x8]; u8 table_miss_id[0x18]; - u8 reserved_at_100[0x100]; + u8 reserved_at_100[0x8]; + u8 lag_master_next_table_id[0x18]; + + u8 reserved_at_120[0x80]; }; struct mlx5_ifc_create_flow_group_out_bits { @@ -7669,7 +7687,8 @@ struct mlx5_ifc_set_flow_table_root_in_bits { }; enum { - MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID = 0x1, + MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID = (1UL << 0), + MLX5_MODIFY_FLOW_TABLE_LAG_NEXT_TABLE_ID = (1UL << 15), }; struct mlx5_ifc_modify_flow_table_out_bits { @@ -7708,7 +7727,10 @@ struct mlx5_ifc_modify_flow_table_in_bits { u8 reserved_at_e0[0x8]; u8 table_miss_id[0x18]; - u8 reserved_at_100[0x100]; + u8 reserved_at_100[0x8]; + u8 lag_master_next_table_id[0x18]; + + u8 reserved_at_120[0x80]; }; struct mlx5_ifc_ets_tcn_config_reg_bits { @@ -7816,4 +7838,134 @@ struct mlx5_ifc_dcbx_param_bits { u8 error[0x8]; u8 reserved_at_a0[0x160]; }; + +struct mlx5_ifc_lagc_bits { + u8 reserved_at_0[0x1d]; + u8 lag_state[0x3]; + + u8 reserved_at_20[0x14]; + u8 tx_remap_affinity_2[0x4]; + u8 reserved_at_38[0x4]; + u8 tx_remap_affinity_1[0x4]; +}; + +struct mlx5_ifc_create_lag_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_create_lag_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + struct mlx5_ifc_lagc_bits ctx; +}; + +struct mlx5_ifc_modify_lag_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_modify_lag_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x20]; + u8 field_select[0x20]; + + struct mlx5_ifc_lagc_bits ctx; +}; + +struct mlx5_ifc_query_lag_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; + + struct mlx5_ifc_lagc_bits ctx; +}; + +struct mlx5_ifc_query_lag_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_destroy_lag_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_destroy_lag_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_create_vport_lag_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_create_vport_lag_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_destroy_vport_lag_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_destroy_vport_lag_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x40]; +}; + #endif /* MLX5_IFC_H */ From 7907f23adc186700efbe56c032527e47485c86ab Mon Sep 17 00:00:00 2001 From: Aviv Heller Date: Sun, 17 Apr 2016 16:57:32 +0300 Subject: [PATCH 0261/1715] net/mlx5: Implement RoCE LAG feature Available on dual port cards only, this feature keeps track, using netdev LAG events, of the bonding and link status of each port's PF netdev. When both of the card's PF netdevs are enslaved to the same bond/team master, and only them, LAG state is active. During LAG, only one IB device is present for both ports. In addition to the above, this commit includes FW commands used for managing the LAG, new facilities for adding and removing a single device by interface, and port remap functionality according to bond events. Please note that this feature is currently used only for mimicking Ethernet bonding for RoCE - netdevs functionality is not altered, and their bonding continues to be managed solely by bond/team driver. Signed-off-by: Aviv Heller Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- .../net/ethernet/mellanox/mlx5/core/Makefile | 2 +- .../net/ethernet/mellanox/mlx5/core/en_main.c | 3 + drivers/net/ethernet/mellanox/mlx5/core/lag.c | 530 ++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/main.c | 51 +- .../ethernet/mellanox/mlx5/core/mlx5_core.h | 12 + include/linux/mlx5/driver.h | 4 + 6 files changed, 588 insertions(+), 14 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lag.c diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 05cc1effc13c..dad326ccd4dd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -3,7 +3,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ - fs_counters.o rl.o + fs_counters.o rl.o lag.o mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \ en_main.o en_common.o en_fs.o en_ethtool.o en_tx.o \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 9e36c1538904..219804fa7c74 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3369,6 +3369,8 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) struct mlx5_eswitch *esw = mdev->priv.eswitch; struct mlx5_eswitch_rep rep; + mlx5_lag_add(mdev, netdev); + if (mlx5e_vxlan_allowed(mdev)) { rtnl_lock(); udp_tunnel_get_rx_info(netdev); @@ -3391,6 +3393,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv) { queue_work(priv->wq, &priv->set_rx_mode_work); mlx5e_disable_async_events(priv); + mlx5_lag_remove(priv->mdev); } static const struct mlx5e_profile mlx5e_nic_profile = { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c new file mode 100644 index 000000000000..3bf0a7ffe0e0 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -0,0 +1,530 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include "mlx5_core.h" + +enum { + MLX5_LAG_FLAG_BONDED = 1 << 0, +}; + +struct lag_func { + struct mlx5_core_dev *dev; + struct net_device *netdev; +}; + +/* Used for collection of netdev event info. */ +struct lag_tracker { + enum netdev_lag_tx_type tx_type; + struct netdev_lag_lower_state_info netdev_state[MLX5_MAX_PORTS]; + bool is_bonded; +}; + +/* LAG data of a ConnectX card. + * It serves both its phys functions. + */ +struct mlx5_lag { + u8 flags; + u8 v2p_map[MLX5_MAX_PORTS]; + struct lag_func pf[MLX5_MAX_PORTS]; + struct lag_tracker tracker; + struct delayed_work bond_work; + struct notifier_block nb; +}; + +/* General purpose, use for short periods of time. + * Beware of lock dependencies (preferably, no locks should be acquired + * under it). + */ +static DEFINE_MUTEX(lag_mutex); + +static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1, + u8 remap_port2) +{ + u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(create_lag_out)] = {0}; + void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx); + + MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG); + + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1); + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 remap_port1, + u8 remap_port2) +{ + u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(modify_lag_out)] = {0}; + void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx); + + MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG); + MLX5_SET(modify_lag_in, in, field_select, 0x1); + + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1); + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +static int mlx5_cmd_destroy_lag(struct mlx5_core_dev *dev) +{ + u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(destroy_lag_out)] = {0}; + + MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +static struct mlx5_lag *mlx5_lag_dev_get(struct mlx5_core_dev *dev) +{ + return dev->priv.lag; +} + +static int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, + struct net_device *ndev) +{ + int i; + + for (i = 0; i < MLX5_MAX_PORTS; i++) + if (ldev->pf[i].netdev == ndev) + return i; + + return -1; +} + +static bool mlx5_lag_is_bonded(struct mlx5_lag *ldev) +{ + return !!(ldev->flags & MLX5_LAG_FLAG_BONDED); +} + +static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, + u8 *port1, u8 *port2) +{ + if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { + if (tracker->netdev_state[0].tx_enabled) { + *port1 = 1; + *port2 = 1; + } else { + *port1 = 2; + *port2 = 2; + } + } else { + *port1 = 1; + *port2 = 2; + if (!tracker->netdev_state[0].link_up) + *port1 = 2; + else if (!tracker->netdev_state[1].link_up) + *port2 = 1; + } +} + +static void mlx5_activate_lag(struct mlx5_lag *ldev, + struct lag_tracker *tracker) +{ + struct mlx5_core_dev *dev0 = ldev->pf[0].dev; + int err; + + ldev->flags |= MLX5_LAG_FLAG_BONDED; + + mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[0], + &ldev->v2p_map[1]); + + err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[0], ldev->v2p_map[1]); + if (err) + mlx5_core_err(dev0, + "Failed to create LAG (%d)\n", + err); +} + +static void mlx5_deactivate_lag(struct mlx5_lag *ldev) +{ + struct mlx5_core_dev *dev0 = ldev->pf[0].dev; + int err; + + ldev->flags &= ~MLX5_LAG_FLAG_BONDED; + + err = mlx5_cmd_destroy_lag(dev0); + if (err) + mlx5_core_err(dev0, + "Failed to destroy LAG (%d)\n", + err); +} + +static void mlx5_do_bond(struct mlx5_lag *ldev) +{ + struct mlx5_core_dev *dev0 = ldev->pf[0].dev; + struct mlx5_core_dev *dev1 = ldev->pf[1].dev; + struct lag_tracker tracker; + u8 v2p_port1, v2p_port2; + int i, err; + + if (!dev0 || !dev1) + return; + + mutex_lock(&lag_mutex); + tracker = ldev->tracker; + mutex_unlock(&lag_mutex); + + if (tracker.is_bonded && !mlx5_lag_is_bonded(ldev)) { + for (i = 0; i < MLX5_MAX_PORTS; i++) + mlx5_remove_dev_by_protocol(ldev->pf[i].dev, + MLX5_INTERFACE_PROTOCOL_IB); + + mlx5_activate_lag(ldev, &tracker); + + mlx5_add_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB); + mlx5_nic_vport_enable_roce(dev1); + } else if (tracker.is_bonded && mlx5_lag_is_bonded(ldev)) { + mlx5_infer_tx_affinity_mapping(&tracker, &v2p_port1, + &v2p_port2); + + if ((v2p_port1 != ldev->v2p_map[0]) || + (v2p_port2 != ldev->v2p_map[1])) { + ldev->v2p_map[0] = v2p_port1; + ldev->v2p_map[1] = v2p_port2; + + err = mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2); + if (err) + mlx5_core_err(dev0, + "Failed to modify LAG (%d)\n", + err); + } + } else if (!tracker.is_bonded && mlx5_lag_is_bonded(ldev)) { + mlx5_remove_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB); + mlx5_nic_vport_disable_roce(dev1); + + mlx5_deactivate_lag(ldev); + + for (i = 0; i < MLX5_MAX_PORTS; i++) + if (ldev->pf[i].dev) + mlx5_add_dev_by_protocol(ldev->pf[i].dev, + MLX5_INTERFACE_PROTOCOL_IB); + } +} + +static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay) +{ + schedule_delayed_work(&ldev->bond_work, delay); +} + +static void mlx5_do_bond_work(struct work_struct *work) +{ + struct delayed_work *delayed_work = to_delayed_work(work); + struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag, + bond_work); + int status; + + status = mutex_trylock(&mlx5_intf_mutex); + if (!status) { + /* 1 sec delay. */ + mlx5_queue_bond_work(ldev, HZ); + return; + } + + mlx5_do_bond(ldev); + mutex_unlock(&mlx5_intf_mutex); +} + +static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, + struct lag_tracker *tracker, + struct net_device *ndev, + struct netdev_notifier_changeupper_info *info) +{ + struct net_device *upper = info->upper_dev, *ndev_tmp; + struct netdev_lag_upper_info *lag_upper_info; + bool is_bonded; + int bond_status = 0; + int num_slaves = 0; + int idx; + + if (!netif_is_lag_master(upper)) + return 0; + + lag_upper_info = info->upper_info; + + /* The event may still be of interest if the slave does not belong to + * us, but is enslaved to a master which has one or more of our netdevs + * as slaves (e.g., if a new slave is added to a master that bonds two + * of our netdevs, we should unbond). + */ + rcu_read_lock(); + for_each_netdev_in_bond_rcu(upper, ndev_tmp) { + idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp); + if (idx > -1) + bond_status |= (1 << idx); + + num_slaves++; + } + rcu_read_unlock(); + + /* None of this lagdev's netdevs are slaves of this master. */ + if (!(bond_status & 0x3)) + return 0; + + if (lag_upper_info) + tracker->tx_type = lag_upper_info->tx_type; + + /* Determine bonding status: + * A device is considered bonded if both its physical ports are slaves + * of the same lag master, and only them. + * Lag mode must be activebackup or hash. + */ + is_bonded = (num_slaves == MLX5_MAX_PORTS) && + (bond_status == 0x3) && + ((tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) || + (tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH)); + + if (tracker->is_bonded != is_bonded) { + tracker->is_bonded = is_bonded; + return 1; + } + + return 0; +} + +static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev, + struct lag_tracker *tracker, + struct net_device *ndev, + struct netdev_notifier_changelowerstate_info *info) +{ + struct netdev_lag_lower_state_info *lag_lower_info; + int idx; + + if (!netif_is_lag_port(ndev)) + return 0; + + idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev); + if (idx == -1) + return 0; + + /* This information is used to determine virtual to physical + * port mapping. + */ + lag_lower_info = info->lower_state_info; + if (!lag_lower_info) + return 0; + + tracker->netdev_state[idx] = *lag_lower_info; + + return 1; +} + +static int mlx5_lag_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *ndev = netdev_notifier_info_to_dev(ptr); + struct lag_tracker tracker; + struct mlx5_lag *ldev; + int changed = 0; + + if (!net_eq(dev_net(ndev), &init_net)) + return NOTIFY_DONE; + + if ((event != NETDEV_CHANGEUPPER) && (event != NETDEV_CHANGELOWERSTATE)) + return NOTIFY_DONE; + + ldev = container_of(this, struct mlx5_lag, nb); + tracker = ldev->tracker; + + switch (event) { + case NETDEV_CHANGEUPPER: + changed = mlx5_handle_changeupper_event(ldev, &tracker, ndev, + ptr); + break; + case NETDEV_CHANGELOWERSTATE: + changed = mlx5_handle_changelowerstate_event(ldev, &tracker, + ndev, ptr); + break; + } + + mutex_lock(&lag_mutex); + ldev->tracker = tracker; + mutex_unlock(&lag_mutex); + + if (changed) + mlx5_queue_bond_work(ldev, 0); + + return NOTIFY_DONE; +} + +static struct mlx5_lag *mlx5_lag_dev_alloc(void) +{ + struct mlx5_lag *ldev; + + ldev = kzalloc(sizeof(*ldev), GFP_KERNEL); + if (!ldev) + return NULL; + + INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work); + + return ldev; +} + +static void mlx5_lag_dev_free(struct mlx5_lag *ldev) +{ + kfree(ldev); +} + +static void mlx5_lag_dev_add_pf(struct mlx5_lag *ldev, + struct mlx5_core_dev *dev, + struct net_device *netdev) +{ + unsigned int fn = PCI_FUNC(dev->pdev->devfn); + + if (fn >= MLX5_MAX_PORTS) + return; + + mutex_lock(&lag_mutex); + ldev->pf[fn].dev = dev; + ldev->pf[fn].netdev = netdev; + ldev->tracker.netdev_state[fn].link_up = 0; + ldev->tracker.netdev_state[fn].tx_enabled = 0; + + dev->priv.lag = ldev; + mutex_unlock(&lag_mutex); +} + +static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev, + struct mlx5_core_dev *dev) +{ + int i; + + for (i = 0; i < MLX5_MAX_PORTS; i++) + if (ldev->pf[i].dev == dev) + break; + + if (i == MLX5_MAX_PORTS) + return; + + mutex_lock(&lag_mutex); + memset(&ldev->pf[i], 0, sizeof(*ldev->pf)); + + dev->priv.lag = NULL; + mutex_unlock(&lag_mutex); +} + +static u16 mlx5_gen_pci_id(struct mlx5_core_dev *dev) +{ + return (u16)((dev->pdev->bus->number << 8) | + PCI_SLOT(dev->pdev->devfn)); +} + +/* Must be called with intf_mutex held */ +void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev) +{ + struct mlx5_lag *ldev = NULL; + struct mlx5_core_dev *tmp_dev; + struct mlx5_priv *priv; + u16 pci_id; + + if (!MLX5_CAP_GEN(dev, vport_group_manager) || + !MLX5_CAP_GEN(dev, lag_master) || + (MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS)) + return; + + pci_id = mlx5_gen_pci_id(dev); + + mlx5_core_for_each_priv(priv) { + tmp_dev = container_of(priv, struct mlx5_core_dev, priv); + if ((dev != tmp_dev) && + (mlx5_gen_pci_id(tmp_dev) == pci_id)) { + ldev = tmp_dev->priv.lag; + break; + } + } + + if (!ldev) { + ldev = mlx5_lag_dev_alloc(); + if (!ldev) { + mlx5_core_err(dev, "Failed to alloc lag dev\n"); + return; + } + } + + mlx5_lag_dev_add_pf(ldev, dev, netdev); + + if (!ldev->nb.notifier_call) { + ldev->nb.notifier_call = mlx5_lag_netdev_event; + if (register_netdevice_notifier(&ldev->nb)) { + ldev->nb.notifier_call = NULL; + mlx5_core_err(dev, "Failed to register LAG netdev notifier\n"); + } + } +} + +/* Must be called with intf_mutex held */ +void mlx5_lag_remove(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + int i; + + ldev = mlx5_lag_dev_get(dev); + if (!ldev) + return; + + if (mlx5_lag_is_bonded(ldev)) + mlx5_deactivate_lag(ldev); + + mlx5_lag_dev_remove_pf(ldev, dev); + + for (i = 0; i < MLX5_MAX_PORTS; i++) + if (ldev->pf[i].dev) + break; + + if (i == MLX5_MAX_PORTS) { + if (ldev->nb.notifier_call) + unregister_netdevice_notifier(&ldev->nb); + cancel_delayed_work_sync(&ldev->bond_work); + mlx5_lag_dev_free(ldev); + } +} + +bool mlx5_lag_is_active(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + bool res; + + mutex_lock(&lag_mutex); + ldev = mlx5_lag_dev_get(dev); + res = ldev && mlx5_lag_is_bonded(ldev); + mutex_unlock(&lag_mutex); + + return res; +} +EXPORT_SYMBOL(mlx5_lag_is_active); + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index d6446a3623b2..db7bcfcef3da 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -73,8 +73,9 @@ module_param_named(prof_sel, prof_sel, int, 0444); MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2"); static LIST_HEAD(intf_list); -static LIST_HEAD(dev_list); -static DEFINE_MUTEX(intf_mutex); + +LIST_HEAD(mlx5_dev_list); +DEFINE_MUTEX(mlx5_intf_mutex); struct mlx5_device_context { struct list_head list; @@ -820,11 +821,11 @@ static int mlx5_register_device(struct mlx5_core_dev *dev) struct mlx5_priv *priv = &dev->priv; struct mlx5_interface *intf; - mutex_lock(&intf_mutex); - list_add_tail(&priv->dev_list, &dev_list); + mutex_lock(&mlx5_intf_mutex); + list_add_tail(&priv->dev_list, &mlx5_dev_list); list_for_each_entry(intf, &intf_list, list) mlx5_add_device(intf, priv); - mutex_unlock(&intf_mutex); + mutex_unlock(&mlx5_intf_mutex); return 0; } @@ -834,11 +835,11 @@ static void mlx5_unregister_device(struct mlx5_core_dev *dev) struct mlx5_priv *priv = &dev->priv; struct mlx5_interface *intf; - mutex_lock(&intf_mutex); + mutex_lock(&mlx5_intf_mutex); list_for_each_entry(intf, &intf_list, list) mlx5_remove_device(intf, priv); list_del(&priv->dev_list); - mutex_unlock(&intf_mutex); + mutex_unlock(&mlx5_intf_mutex); } int mlx5_register_interface(struct mlx5_interface *intf) @@ -848,11 +849,11 @@ int mlx5_register_interface(struct mlx5_interface *intf) if (!intf->add || !intf->remove) return -EINVAL; - mutex_lock(&intf_mutex); + mutex_lock(&mlx5_intf_mutex); list_add_tail(&intf->list, &intf_list); - list_for_each_entry(priv, &dev_list, dev_list) + list_for_each_entry(priv, &mlx5_dev_list, dev_list) mlx5_add_device(intf, priv); - mutex_unlock(&intf_mutex); + mutex_unlock(&mlx5_intf_mutex); return 0; } @@ -862,11 +863,11 @@ void mlx5_unregister_interface(struct mlx5_interface *intf) { struct mlx5_priv *priv; - mutex_lock(&intf_mutex); - list_for_each_entry(priv, &dev_list, dev_list) + mutex_lock(&mlx5_intf_mutex); + list_for_each_entry(priv, &mlx5_dev_list, dev_list) mlx5_remove_device(intf, priv); list_del(&intf->list); - mutex_unlock(&intf_mutex); + mutex_unlock(&mlx5_intf_mutex); } EXPORT_SYMBOL(mlx5_unregister_interface); @@ -892,6 +893,30 @@ void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol) } EXPORT_SYMBOL(mlx5_get_protocol_dev); +/* Must be called with intf_mutex held */ +void mlx5_add_dev_by_protocol(struct mlx5_core_dev *dev, int protocol) +{ + struct mlx5_interface *intf; + + list_for_each_entry(intf, &intf_list, list) + if (intf->protocol == protocol) { + mlx5_add_device(intf, &dev->priv); + break; + } +} + +/* Must be called with intf_mutex held */ +void mlx5_remove_dev_by_protocol(struct mlx5_core_dev *dev, int protocol) +{ + struct mlx5_interface *intf; + + list_for_each_entry(intf, &intf_list, list) + if (intf->protocol == protocol) { + mlx5_remove_device(intf, &dev->priv); + break; + } +} + static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv) { struct pci_dev *pdev = dev->pdev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index ef0eeedcceb6..173a6b2e358a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -46,6 +46,9 @@ extern int mlx5_core_debug_mask; +extern struct list_head mlx5_dev_list; +extern struct mutex mlx5_intf_mutex; + #define mlx5_core_dbg(__dev, format, ...) \ dev_dbg(&(__dev)->pdev->dev, "%s:%s:%d:(pid %d): " format, \ (__dev)->priv.name, __func__, __LINE__, current->pid, \ @@ -70,6 +73,9 @@ do { \ #define mlx5_core_info(__dev, format, ...) \ dev_info(&(__dev)->pdev->dev, format, ##__VA_ARGS__) +#define mlx5_core_for_each_priv(__priv) \ + list_for_each_entry(__priv, &mlx5_dev_list, dev_list) + enum { MLX5_CMD_DATA, /* print command payload only */ MLX5_CMD_TIME, /* print command execution time */ @@ -92,6 +98,12 @@ u32 mlx5_get_msix_vec(struct mlx5_core_dev *dev, int vecidx); struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn); void mlx5_cq_tasklet_cb(unsigned long data); +void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev); +void mlx5_lag_remove(struct mlx5_core_dev *dev); + +void mlx5_add_dev_by_protocol(struct mlx5_core_dev *dev, int protocol); +void mlx5_remove_dev_by_protocol(struct mlx5_core_dev *dev, int protocol); + void mlx5e_init(void); void mlx5e_cleanup(void); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 0ea78b5edbb2..ed983b8c3213 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -477,6 +477,7 @@ struct mlx5_fc_stats { }; struct mlx5_eswitch; +struct mlx5_lag; struct mlx5_rl_entry { u32 rate; @@ -550,6 +551,7 @@ struct mlx5_priv { struct mlx5_flow_steering *steering; struct mlx5_eswitch *eswitch; struct mlx5_core_sriov sriov; + struct mlx5_lag *lag; unsigned long pci_dev_data; struct mlx5_fc_stats fc_stats; struct mlx5_rl_table rl_table; @@ -942,6 +944,8 @@ int mlx5_register_interface(struct mlx5_interface *intf); void mlx5_unregister_interface(struct mlx5_interface *intf); int mlx5_core_query_vendor_id(struct mlx5_core_dev *mdev, u32 *vendor_id); +bool mlx5_lag_is_active(struct mlx5_core_dev *dev); + struct mlx5_profile { u64 mask; u8 log_max_qp; From 6a32047a441b870dd2570fe0831dada5e9ce40f6 Mon Sep 17 00:00:00 2001 From: Aviv Heller Date: Mon, 9 May 2016 11:06:44 +0000 Subject: [PATCH 0262/1715] net/mlx5: Get RoCE netdev Used by IB driver for determining the IB bond device's netdev, when LAG is active. Returns PF0's netdev if mode is not active-backup, or the PF netdev of the active slave when mode is active-backup. Signed-off-by: Aviv Heller Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/lag.c | 27 +++++++++++++++++++ include/linux/mlx5/driver.h | 1 + 2 files changed, 28 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index 3bf0a7ffe0e0..952305054e16 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -528,3 +528,30 @@ bool mlx5_lag_is_active(struct mlx5_core_dev *dev) } EXPORT_SYMBOL(mlx5_lag_is_active); +struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) +{ + struct net_device *ndev = NULL; + struct mlx5_lag *ldev; + + mutex_lock(&lag_mutex); + ldev = mlx5_lag_dev_get(dev); + + if (!(ldev && mlx5_lag_is_bonded(ldev))) + goto unlock; + + if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { + ndev = ldev->tracker.netdev_state[0].tx_enabled ? + ldev->pf[0].netdev : ldev->pf[1].netdev; + } else { + ndev = ldev->pf[0].netdev; + } + if (ndev) + dev_hold(ndev); + +unlock: + mutex_unlock(&lag_mutex); + + return ndev; +} +EXPORT_SYMBOL(mlx5_lag_get_roce_netdev); + diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index ed983b8c3213..c568dd927330 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -945,6 +945,7 @@ void mlx5_unregister_interface(struct mlx5_interface *intf); int mlx5_core_query_vendor_id(struct mlx5_core_dev *mdev, u32 *vendor_id); bool mlx5_lag_is_active(struct mlx5_core_dev *dev); +struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev); struct mlx5_profile { u64 mask; From db60b80273c5e0e61c2ad59d2903c9988b9d184c Mon Sep 17 00:00:00 2001 From: Aviv Heller Date: Mon, 30 May 2016 18:31:13 +0300 Subject: [PATCH 0263/1715] net/mlx5e: Avoid port remapping of mlx5e netdev TISes TISes belonging to the mlx5e NIC should not be subject to port remap. Signed-off-by: Aviv Heller Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 4 ++++ drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h | 12 ++++++++++++ 2 files changed, 16 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 219804fa7c74..fa404142320d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2025,6 +2025,10 @@ static int mlx5e_create_tis(struct mlx5e_priv *priv, int tc) MLX5_SET(tisc, tisc, prio, tc << 1); MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.td.tdn); + + if (mlx5_lag_is_lacp_owner(mdev)) + MLX5_SET(tisc, tisc, strict_lag_tx_port_affinity, 1); + return mlx5_core_create_tis(mdev, in, sizeof(in), &priv->tisn[tc]); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 173a6b2e358a..e716996a8b92 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -107,4 +107,16 @@ void mlx5_remove_dev_by_protocol(struct mlx5_core_dev *dev, int protocol); void mlx5e_init(void); void mlx5e_cleanup(void); +static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev) +{ + /* LACP owner conditions: + * 1) Function is physical. + * 2) LAG is supported by FW. + * 3) LAG is managed by driver (currently the only option). + */ + return MLX5_CAP_GEN(dev, vport_group_manager) && + (MLX5_CAP_GEN(dev, num_lag_ports) > 1) && + MLX5_CAP_GEN(dev, lag_master); +} + #endif /* __MLX5_CORE_H__ */ From edb31b1686751f605eb02a6dcf5ef29c5d485a8e Mon Sep 17 00:00:00 2001 From: Aviv Heller Date: Sun, 17 Apr 2016 19:32:13 +0300 Subject: [PATCH 0264/1715] net/mlx5: LAG and SRIOV cannot be used together Until support will be added for RoCE LAG SRIOV. Signed-off-by: Aviv Heller Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/lag.c | 6 ++++++ drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h | 1 + drivers/net/ethernet/mellanox/mlx5/core/sriov.c | 12 ++++++++++++ 3 files changed, 19 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index 952305054e16..3cb570ac2b80 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -201,6 +201,12 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) mutex_unlock(&lag_mutex); if (tracker.is_bonded && !mlx5_lag_is_bonded(ldev)) { + if (mlx5_sriov_is_enabled(dev0) || + mlx5_sriov_is_enabled(dev1)) { + mlx5_core_warn(dev0, "LAG is not supported with SRIOV"); + return; + } + for (i = 0; i < MLX5_MAX_PORTS; i++) mlx5_remove_dev_by_protocol(ldev->pf[i].dev, MLX5_INTERFACE_PROTOCOL_IB); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index e716996a8b92..8557435280c3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -90,6 +90,7 @@ void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, void mlx5_enter_error_state(struct mlx5_core_dev *dev); void mlx5_disable_device(struct mlx5_core_dev *dev); int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs); +bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev); int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id); int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id); int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index 0680dfb4bb1e..78e789245183 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -37,6 +37,13 @@ #include "eswitch.h" #endif +bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + + return !!sriov->num_vfs; +} + static void enable_vfs(struct mlx5_core_dev *dev, int num_vfs) { struct mlx5_core_sriov *sriov = &dev->priv.sriov; @@ -144,6 +151,11 @@ int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) if (!mlx5_core_is_pf(dev)) return -EPERM; + if (num_vfs && mlx5_lag_is_active(dev)) { + mlx5_core_warn(dev, "can't turn sriov on while LAG is active"); + return -EINVAL; + } + mlx5_core_cleanup_vfs(dev); if (!num_vfs) { From aaff1bea16bb7f259a263c3ae4633d092e2da799 Mon Sep 17 00:00:00 2001 From: Aviv Heller Date: Mon, 9 May 2016 09:57:05 +0000 Subject: [PATCH 0265/1715] net/mlx5: LAG demux flow table support Add interfaces to allow the creation and destruction of a LAG demux flow table. It is a special flow table used during LAG for redirecting non user-mode packets from PF0 to PF1 root ft, if a packet was received on phys port two. Signed-off-by: Aviv Heller Reviewed-by: Maor Gottlieb Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- .../net/ethernet/mellanox/mlx5/core/fs_cmd.c | 56 ++++++++++++++----- .../net/ethernet/mellanox/mlx5/core/fs_cmd.h | 1 + .../net/ethernet/mellanox/mlx5/core/fs_core.c | 31 +++++++--- .../net/ethernet/mellanox/mlx5/core/fs_core.h | 6 ++ include/linux/mlx5/fs.h | 3 + 5 files changed, 75 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 7aaefa9aaf1c..7a0415e6d339 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -58,6 +58,7 @@ int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, u16 vport, + enum fs_flow_table_op_mod op_mod, enum fs_flow_table_type type, unsigned int level, unsigned int log_size, struct mlx5_flow_table *next_ft, unsigned int *table_id) @@ -69,10 +70,6 @@ int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, MLX5_SET(create_flow_table_in, in, opcode, MLX5_CMD_OP_CREATE_FLOW_TABLE); - if (next_ft) { - MLX5_SET(create_flow_table_in, in, table_miss_mode, 1); - MLX5_SET(create_flow_table_in, in, table_miss_id, next_ft->id); - } MLX5_SET(create_flow_table_in, in, table_type, type); MLX5_SET(create_flow_table_in, in, level, level); MLX5_SET(create_flow_table_in, in, log_size, log_size); @@ -81,6 +78,22 @@ int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, MLX5_SET(create_flow_table_in, in, other_vport, 1); } + switch (op_mod) { + case FS_FT_OP_MOD_NORMAL: + if (next_ft) { + MLX5_SET(create_flow_table_in, in, table_miss_mode, 1); + MLX5_SET(create_flow_table_in, in, table_miss_id, next_ft->id); + } + break; + + case FS_FT_OP_MOD_LAG_DEMUX: + MLX5_SET(create_flow_table_in, in, op_mod, 0x1); + if (next_ft) + MLX5_SET(create_flow_table_in, in, lag_master_next_table_id, + next_ft->id); + break; + } + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (!err) *table_id = MLX5_GET(create_flow_table_out, out, @@ -117,17 +130,32 @@ int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev, MLX5_CMD_OP_MODIFY_FLOW_TABLE); MLX5_SET(modify_flow_table_in, in, table_type, ft->type); MLX5_SET(modify_flow_table_in, in, table_id, ft->id); - if (ft->vport) { - MLX5_SET(modify_flow_table_in, in, vport_number, ft->vport); - MLX5_SET(modify_flow_table_in, in, other_vport, 1); - } - MLX5_SET(modify_flow_table_in, in, modify_field_select, - MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID); - if (next_ft) { - MLX5_SET(modify_flow_table_in, in, table_miss_mode, 1); - MLX5_SET(modify_flow_table_in, in, table_miss_id, next_ft->id); + + if (ft->op_mod == FS_FT_OP_MOD_LAG_DEMUX) { + MLX5_SET(modify_flow_table_in, in, modify_field_select, + MLX5_MODIFY_FLOW_TABLE_LAG_NEXT_TABLE_ID); + if (next_ft) { + MLX5_SET(modify_flow_table_in, in, + lag_master_next_table_id, next_ft->id); + } else { + MLX5_SET(modify_flow_table_in, in, + lag_master_next_table_id, 0); + } } else { - MLX5_SET(modify_flow_table_in, in, table_miss_mode, 0); + if (ft->vport) { + MLX5_SET(modify_flow_table_in, in, vport_number, + ft->vport); + MLX5_SET(modify_flow_table_in, in, other_vport, 1); + } + MLX5_SET(modify_flow_table_in, in, modify_field_select, + MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID); + if (next_ft) { + MLX5_SET(modify_flow_table_in, in, table_miss_mode, 1); + MLX5_SET(modify_flow_table_in, in, table_miss_id, + next_ft->id); + } else { + MLX5_SET(modify_flow_table_in, in, table_miss_mode, 0); + } } return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h index ac52fdfb5096..c5bc4686c832 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h @@ -35,6 +35,7 @@ int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, u16 vport, + enum fs_flow_table_op_mod op_mod, enum fs_flow_table_type type, unsigned int level, unsigned int log_size, struct mlx5_flow_table *next_ft, unsigned int *table_id); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 243efc5a8bd1..eabd73482c86 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -477,7 +477,8 @@ static struct mlx5_flow_group *alloc_flow_group(u32 *create_fg_in) } static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_fte, - enum fs_flow_table_type table_type) + enum fs_flow_table_type table_type, + enum fs_flow_table_op_mod op_mod) { struct mlx5_flow_table *ft; @@ -487,6 +488,7 @@ static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_ft ft->level = level; ft->node.type = FS_TYPE_FLOW_TABLE; + ft->op_mod = op_mod; ft->type = table_type; ft->vport = vport; ft->max_fte = max_fte; @@ -724,6 +726,7 @@ static void list_add_flow_table(struct mlx5_flow_table *ft, } static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespace *ns, + enum fs_flow_table_op_mod op_mod, u16 vport, int prio, int max_fte, u32 level) { @@ -756,18 +759,19 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa level += fs_prio->start_level; ft = alloc_flow_table(level, vport, - roundup_pow_of_two(max_fte), - root->table_type); + max_fte ? roundup_pow_of_two(max_fte) : 0, + root->table_type, + op_mod); if (!ft) { err = -ENOMEM; goto unlock_root; } tree_init_node(&ft->node, 1, del_flow_table); - log_table_sz = ilog2(ft->max_fte); + log_table_sz = ft->max_fte ? ilog2(ft->max_fte) : 0; next_ft = find_next_chained_ft(fs_prio); - err = mlx5_cmd_create_flow_table(root->dev, ft->vport, ft->type, ft->level, - log_table_sz, next_ft, &ft->id); + err = mlx5_cmd_create_flow_table(root->dev, ft->vport, ft->op_mod, ft->type, + ft->level, log_table_sz, next_ft, &ft->id); if (err) goto free_ft; @@ -794,16 +798,27 @@ struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns, int prio, int max_fte, u32 level) { - return __mlx5_create_flow_table(ns, 0, prio, max_fte, level); + return __mlx5_create_flow_table(ns, FS_FT_OP_MOD_NORMAL, 0, prio, + max_fte, level); } struct mlx5_flow_table *mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns, int prio, int max_fte, u32 level, u16 vport) { - return __mlx5_create_flow_table(ns, vport, prio, max_fte, level); + return __mlx5_create_flow_table(ns, FS_FT_OP_MOD_NORMAL, vport, prio, + max_fte, level); } +struct mlx5_flow_table *mlx5_create_lag_demux_flow_table( + struct mlx5_flow_namespace *ns, + int prio, u32 level) +{ + return __mlx5_create_flow_table(ns, FS_FT_OP_MOD_LAG_DEMUX, 0, prio, 0, + level); +} +EXPORT_SYMBOL(mlx5_create_lag_demux_flow_table); + struct mlx5_flow_table *mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns, int prio, int num_flow_table_entries, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 9cffb6aeb4e9..23e46e35413f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -51,6 +51,11 @@ enum fs_flow_table_type { FS_FT_FDB = 0X4, }; +enum fs_flow_table_op_mod { + FS_FT_OP_MOD_NORMAL, + FS_FT_OP_MOD_LAG_DEMUX, +}; + enum fs_fte_status { FS_FTE_STATUS_EXISTING = 1UL << 0, }; @@ -93,6 +98,7 @@ struct mlx5_flow_table { unsigned int max_fte; unsigned int level; enum fs_flow_table_type type; + enum fs_flow_table_op_mod op_mod; struct { bool active; unsigned int required_groups; diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index e036d6030867..7edfe0b8f1ec 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -106,6 +106,9 @@ mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns, int prio, int num_flow_table_entries, u32 level, u16 vport); +struct mlx5_flow_table *mlx5_create_lag_demux_flow_table( + struct mlx5_flow_namespace *ns, + int prio, u32 level); int mlx5_destroy_flow_table(struct mlx5_flow_table *ft); /* inbox should be set with the following values: From 3e75d4ebaae7aac5ba82fc7a6e0e6fb56dac1916 Mon Sep 17 00:00:00 2001 From: Aviv Heller Date: Mon, 9 May 2016 10:02:29 +0000 Subject: [PATCH 0266/1715] net/mlx5: Add LAG flow steering namespace This namespace is used for LAG demux flowtable. The idea is to position the LAG demux ft between bypass and kernel flowtables, allowing raw-eth traffic from both ports to be received by the PF0 IB device. Signed-off-by: Aviv Heller Reviewed-by: Maor Gottlieb Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 11 ++++++++++- include/linux/mlx5/fs.h | 1 + 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index eabd73482c86..ac414b7f366e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -96,6 +96,10 @@ #define OFFLOADS_NUM_PRIOS 1 #define OFFLOADS_MIN_LEVEL (ANCHOR_MIN_LEVEL + 1) +#define LAG_PRIO_NUM_LEVELS 1 +#define LAG_NUM_PRIOS 1 +#define LAG_MIN_LEVEL (OFFLOADS_MIN_LEVEL + 1) + struct node_caps { size_t arr_sz; long *caps; @@ -111,12 +115,16 @@ static struct init_tree_node { int num_levels; } root_fs = { .type = FS_TYPE_NAMESPACE, - .ar_size = 6, + .ar_size = 7, .children = (struct init_tree_node[]) { ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, FS_CHAINING_CAPS, ADD_NS(ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS, BY_PASS_PRIO_NUM_LEVELS))), + ADD_PRIO(0, LAG_MIN_LEVEL, 0, + FS_CHAINING_CAPS, + ADD_NS(ADD_MULTIPLE_PRIO(LAG_NUM_PRIOS, + LAG_PRIO_NUM_LEVELS))), ADD_PRIO(0, OFFLOADS_MIN_LEVEL, 0, {}, ADD_NS(ADD_MULTIPLE_PRIO(OFFLOADS_NUM_PRIOS, OFFLOADS_MAX_FT))), ADD_PRIO(0, ETHTOOL_MIN_LEVEL, 0, @@ -1396,6 +1404,7 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, switch (type) { case MLX5_FLOW_NAMESPACE_BYPASS: + case MLX5_FLOW_NAMESPACE_LAG: case MLX5_FLOW_NAMESPACE_OFFLOADS: case MLX5_FLOW_NAMESPACE_ETHTOOL: case MLX5_FLOW_NAMESPACE_KERNEL: diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 7edfe0b8f1ec..8803212fc3aa 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -54,6 +54,7 @@ static inline void build_leftovers_ft_param(int *priority, enum mlx5_flow_namespace_type { MLX5_FLOW_NAMESPACE_BYPASS, + MLX5_FLOW_NAMESPACE_LAG, MLX5_FLOW_NAMESPACE_OFFLOADS, MLX5_FLOW_NAMESPACE_ETHTOOL, MLX5_FLOW_NAMESPACE_KERNEL, From 3bc34f3bcb087764796d9a6eaa476e270114eb8f Mon Sep 17 00:00:00 2001 From: Aviv Heller Date: Mon, 9 May 2016 10:38:42 +0000 Subject: [PATCH 0267/1715] net/mlx5: Vport LAG creation support Add interfaces for issuing CREATE_VPORT_LAG and DESTROY_VPORT_LAG commands. Used for receiving PF1's eth traffic on PF0's root ft. Signed-off-by: Aviv Heller Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/lag.c | 22 +++++++++++++++++++ include/linux/mlx5/driver.h | 2 ++ 2 files changed, 24 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index 3cb570ac2b80..5fe320c5a60e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -110,6 +110,28 @@ static int mlx5_cmd_destroy_lag(struct mlx5_core_dev *dev) return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } +int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev) +{ + u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(create_vport_lag_out)] = {0}; + + MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} +EXPORT_SYMBOL(mlx5_cmd_create_vport_lag); + +int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev) +{ + u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(destroy_vport_lag_out)] = {0}; + + MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} +EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag); + static struct mlx5_lag *mlx5_lag_dev_get(struct mlx5_core_dev *dev) { return dev->priv.lag; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index c568dd927330..5cb9fa7aec61 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -944,6 +944,8 @@ int mlx5_register_interface(struct mlx5_interface *intf); void mlx5_unregister_interface(struct mlx5_interface *intf); int mlx5_core_query_vendor_id(struct mlx5_core_dev *mdev, u32 *vendor_id); +int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev); +int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev); bool mlx5_lag_is_active(struct mlx5_core_dev *dev); struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev); From 917b41aab7b3afb2221ac0895f302ee32431fa6e Mon Sep 17 00:00:00 2001 From: Aviv Heller Date: Mon, 30 May 2016 18:32:32 +0300 Subject: [PATCH 0268/1715] net/mlx5: Configure IB devices according to LAG state When mlx5_ib is loaded, we would like each card's IB devices to be added according to its LAG state (one IB device, instead of two, is to be added if LAG is active). Signed-off-by: Aviv Heller Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/lag.c | 17 +++++++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/main.c | 3 +++ .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 2 ++ 3 files changed, 22 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index 5fe320c5a60e..92c3e0dbcbdc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -583,3 +583,20 @@ unlock: } EXPORT_SYMBOL(mlx5_lag_get_roce_netdev); +bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv) +{ + struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, + priv); + struct mlx5_lag *ldev; + + if (intf->protocol != MLX5_INTERFACE_PROTOCOL_IB) + return true; + + ldev = mlx5_lag_dev_get(dev); + if (!ldev || !mlx5_lag_is_bonded(ldev) || ldev->pf[0].dev == dev) + return true; + + /* If bonded, we do not add an IB device for PF1. */ + return false; +} + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index db7bcfcef3da..28e3ce690e3f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -783,6 +783,9 @@ static void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) struct mlx5_device_context *dev_ctx; struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); + if (!mlx5_lag_intf_add(intf, priv)) + return; + dev_ctx = kmalloc(sizeof(*dev_ctx), GFP_KERNEL); if (!dev_ctx) return; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 8557435280c3..714b71bed2be 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -105,6 +105,8 @@ void mlx5_lag_remove(struct mlx5_core_dev *dev); void mlx5_add_dev_by_protocol(struct mlx5_core_dev *dev, int protocol); void mlx5_remove_dev_by_protocol(struct mlx5_core_dev *dev, int protocol); +bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv); + void mlx5e_init(void); void mlx5e_cleanup(void); From cea824d416522ce63d83b45fc0dc53c0f5b68cee Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 31 May 2016 14:09:09 +0300 Subject: [PATCH 0269/1715] net/mlx5: Introduce sniffer steering hardware capabilities Define needed hardware capabilities for sniffer RX and TX flow tables. Add the following capabilities: 1. Sniffer RX flow table capabilities. 2. Sniffer TX flow table capabilities. 3. If same TIR can be used by multiple flow tables of different types. Signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/device.h | 12 ++++++++++++ include/linux/mlx5/mlx5_ifc.h | 4 +++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 2575070c836e..77c141797152 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -964,6 +964,18 @@ enum mlx5_cap_type { #define MLX5_CAP_FLOWTABLE_NIC_RX_MAX(mdev, cap) \ MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_receive.cap) +#define MLX5_CAP_FLOWTABLE_SNIFFER_RX(mdev, cap) \ + MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive_sniffer.cap) + +#define MLX5_CAP_FLOWTABLE_SNIFFER_RX_MAX(mdev, cap) \ + MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_receive_sniffer.cap) + +#define MLX5_CAP_FLOWTABLE_SNIFFER_TX(mdev, cap) \ + MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_transmit_sniffer.cap) + +#define MLX5_CAP_FLOWTABLE_SNIFFER_TX_MAX(mdev, cap) \ + MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_transmit_sniffer.cap) + #define MLX5_CAP_ESW_FLOWTABLE(mdev, cap) \ MLX5_GET(flow_table_eswitch_cap, \ mdev->hca_caps_cur[MLX5_CAP_ESWITCH_FLOW_TABLE], cap) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 043d5256b754..73a720f74a69 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -483,7 +483,9 @@ struct mlx5_ifc_ads_bits { struct mlx5_ifc_flow_table_nic_cap_bits { u8 nic_rx_multi_path_tirs[0x1]; - u8 reserved_at_1[0x1ff]; + u8 nic_rx_multi_path_tirs_fts[0x1]; + u8 allow_sniffer_and_nic_rx_shared_tir[0x1]; + u8 reserved_at_3[0x1fd]; struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_receive; From 87d22483ce68e609818d61e3a65361f5634c6cd6 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Mon, 6 Jun 2016 18:09:35 +0300 Subject: [PATCH 0270/1715] net/mlx5: Add sniffer namespaces Add sniffer TX and RX namespaces to receive ingoing and outgoing traffic. Each outgoing/incoming packet is duplicated and steered to the sniffer TX/RX namespace in addition to the regular flow. Signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- .../net/ethernet/mellanox/mlx5/core/fs_core.c | 58 +++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/fs_core.h | 4 ++ include/linux/mlx5/fs.h | 2 + 3 files changed, 64 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index ac414b7f366e..f5571a5a57c5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1427,6 +1427,16 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, return &steering->esw_ingress_root_ns->ns; else return NULL; + case MLX5_FLOW_NAMESPACE_SNIFFER_RX: + if (steering->sniffer_rx_root_ns) + return &steering->sniffer_rx_root_ns->ns; + else + return NULL; + case MLX5_FLOW_NAMESPACE_SNIFFER_TX: + if (steering->sniffer_tx_root_ns) + return &steering->sniffer_tx_root_ns->ns; + else + return NULL; default: return NULL; } @@ -1726,10 +1736,46 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev) cleanup_root_ns(steering->esw_egress_root_ns); cleanup_root_ns(steering->esw_ingress_root_ns); cleanup_root_ns(steering->fdb_root_ns); + cleanup_root_ns(steering->sniffer_rx_root_ns); + cleanup_root_ns(steering->sniffer_tx_root_ns); mlx5_cleanup_fc_stats(dev); kfree(steering); } +static int init_sniffer_tx_root_ns(struct mlx5_flow_steering *steering) +{ + struct fs_prio *prio; + + steering->sniffer_tx_root_ns = create_root_ns(steering, FS_FT_SNIFFER_TX); + if (!steering->sniffer_tx_root_ns) + return -ENOMEM; + + /* Create single prio */ + prio = fs_create_prio(&steering->sniffer_tx_root_ns->ns, 0, 1); + if (IS_ERR(prio)) { + cleanup_root_ns(steering->sniffer_tx_root_ns); + return PTR_ERR(prio); + } + return 0; +} + +static int init_sniffer_rx_root_ns(struct mlx5_flow_steering *steering) +{ + struct fs_prio *prio; + + steering->sniffer_rx_root_ns = create_root_ns(steering, FS_FT_SNIFFER_RX); + if (!steering->sniffer_rx_root_ns) + return -ENOMEM; + + /* Create single prio */ + prio = fs_create_prio(&steering->sniffer_rx_root_ns->ns, 0, 1); + if (IS_ERR(prio)) { + cleanup_root_ns(steering->sniffer_rx_root_ns); + return PTR_ERR(prio); + } + return 0; +} + static int init_fdb_root_ns(struct mlx5_flow_steering *steering) { struct fs_prio *prio; @@ -1826,6 +1872,18 @@ int mlx5_init_fs(struct mlx5_core_dev *dev) } } + if (MLX5_CAP_FLOWTABLE_SNIFFER_RX(dev, ft_support)) { + err = init_sniffer_rx_root_ns(steering); + if (err) + goto err; + } + + if (MLX5_CAP_FLOWTABLE_SNIFFER_TX(dev, ft_support)) { + err = init_sniffer_tx_root_ns(steering); + if (err) + goto err; + } + return 0; err: mlx5_cleanup_fs(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 23e46e35413f..71ff03bceabb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -49,6 +49,8 @@ enum fs_flow_table_type { FS_FT_ESW_EGRESS_ACL = 0x2, FS_FT_ESW_INGRESS_ACL = 0x3, FS_FT_FDB = 0X4, + FS_FT_SNIFFER_RX = 0X5, + FS_FT_SNIFFER_TX = 0X6, }; enum fs_flow_table_op_mod { @@ -66,6 +68,8 @@ struct mlx5_flow_steering { struct mlx5_flow_root_namespace *fdb_root_ns; struct mlx5_flow_root_namespace *esw_egress_root_ns; struct mlx5_flow_root_namespace *esw_ingress_root_ns; + struct mlx5_flow_root_namespace *sniffer_tx_root_ns; + struct mlx5_flow_root_namespace *sniffer_rx_root_ns; }; struct fs_node { diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 8803212fc3aa..93ebc5e21334 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -63,6 +63,8 @@ enum mlx5_flow_namespace_type { MLX5_FLOW_NAMESPACE_FDB, MLX5_FLOW_NAMESPACE_ESW_EGRESS, MLX5_FLOW_NAMESPACE_ESW_INGRESS, + MLX5_FLOW_NAMESPACE_SNIFFER_RX, + MLX5_FLOW_NAMESPACE_SNIFFER_TX, }; struct mlx5_flow_table; From 2567c4eae1f31492b0f547409e035b9b0501326f Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 18 Aug 2016 11:15:12 +0200 Subject: [PATCH 0271/1715] netfilter: nf_conntrack: restore nf_conntrack_htable_size as exported symbol This is required to iterate over the hash table in cttimeout, ctnetlink and nf_conntrack_ipv4. >> ERROR: "nf_conntrack_htable_size" [net/netfilter/nfnetlink_cttimeout.ko] undefined! ERROR: "nf_conntrack_htable_size" [net/netfilter/nf_conntrack_netlink.ko] undefined! ERROR: "nf_conntrack_htable_size" [net/ipv4/netfilter/nf_conntrack_ipv4.ko] undefined! Fixes: adf0516845bcd0 ("netfilter: remove ip_conntrack* sysctl compat code") Reported-by: kbuild test robot Reported-by: Stephen Rothwell Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index aeba28c5512b..7d90a5d15113 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -160,6 +160,8 @@ static void nf_conntrack_all_unlock(void) } unsigned int nf_conntrack_htable_size __read_mostly; +EXPORT_SYMBOL_GPL(nf_conntrack_htable_size); + unsigned int nf_conntrack_max __read_mostly; seqcount_t nf_conntrack_generation __read_mostly; From eb27163b2e0a2606a044dfdf662ff1c26a63515c Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 26 Jul 2016 14:58:30 +0000 Subject: [PATCH 0272/1715] i40e: Use list_move instead of list_del/list_add Using list_move() instead of list_del() + list_add(). Signed-off-by: Wei Yongjun Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_client.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c index e1370c556a3c..e6b9263f5c0d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_client.c +++ b/drivers/net/ethernet/intel/i40e/i40e_client.c @@ -662,8 +662,7 @@ static int i40e_client_release(struct i40e_client *client) client->name, pf->hw.pf_id); } /* delete the client instance from the list */ - list_del(&cdev->list); - list_add(&cdev->list, &cdevs_tmp); + list_move(&cdev->list, &cdevs_tmp); atomic_dec(&client->ref_cnt); dev_info(&pf->pdev->dev, "Deleted client instance of Client %s\n", client->name); From 3a0f52928a0a06a7c4c735cd325ca646a1e40fae Mon Sep 17 00:00:00 2001 From: Anjali Singhai Jain Date: Wed, 27 Jul 2016 12:02:30 -0700 Subject: [PATCH 0273/1715] i40e: Fix a bug where a client close can be called before an open is complete The client->open call in this path was not protected with the client instance mutex, and hence the client->close can get initiated before the open completes. Change-Id: I0ed60c38868dd3f44966b6ed49a063d0e5b7edf5 Signed-off-by: Anjali Singhai Jain Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_client.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c index e6b9263f5c0d..8726269188f3 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_client.c +++ b/drivers/net/ethernet/intel/i40e/i40e_client.c @@ -541,6 +541,7 @@ void i40e_client_subtask(struct i40e_pf *pf) client->name, pf->hw.pf_id, pf->hw.bus.device, pf->hw.bus.func); + mutex_lock(&i40e_client_instance_mutex); /* Send an Open request to the client */ atomic_inc(&cdev->ref_cnt); if (client->ops && client->ops->open) @@ -554,6 +555,7 @@ void i40e_client_subtask(struct i40e_pf *pf) atomic_dec(&client->ref_cnt); continue; } + mutex_unlock(&i40e_client_instance_mutex); } mutex_unlock(&i40e_client_mutex); } From dc5b4e9fad36849ecc85dcaac8de2682751769fa Mon Sep 17 00:00:00 2001 From: Catherine Sullivan Date: Wed, 27 Jul 2016 12:02:31 -0700 Subject: [PATCH 0274/1715] i40e/i40evf: Reset VLAN filter count when resetting When we do a reset, all the VLAN filters get added again. Therefore we also want to reset the VLAN count to 0 or we quickly run out of filters. Change-ID: I459f26851e22204dc8b8999928ad87cde8170119 Signed-off-by: Catherine Sullivan Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 6fcbf764f32b..0fa050a0cc8e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -992,6 +992,7 @@ complete_reset: set_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states); clear_bit(I40E_VF_STAT_DISABLED, &vf->vf_states); i40e_notify_client_of_vf_reset(pf, abs_vf_id); + vf->num_vlan = 0; } /* tell the VF the reset is done */ wr32(hw, I40E_VFGEN_RSTAT1(vf->vf_id), I40E_VFR_VFACTIVE); From 42bce04ef392ccf1aac0086b1318d375287c8952 Mon Sep 17 00:00:00 2001 From: Catherine Sullivan Date: Wed, 27 Jul 2016 12:02:32 -0700 Subject: [PATCH 0275/1715] i40e: reset RX csum error stat with other pf stats When we are resetting the pf stats we should also reset the RX csum error stat. Change-ID: I7af5ee0ec81a10f6deee1a7b8c2082ea068ef620 Signed-off-by: Catherine Sullivan Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index c6ac7a61812f..9f9d24f4a4ed 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -527,6 +527,7 @@ void i40e_pf_reset_stats(struct i40e_pf *pf) pf->veb[i]->stat_offsets_loaded = false; } } + pf->hw_csum_rx_error = 0; } /** From 62fe2a865e6ddc92f574bafacf8376613d8c08f2 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 27 Jul 2016 12:02:33 -0700 Subject: [PATCH 0276/1715] i40evf: add missing rtnl_lock() around i40evf_set_interrupt_capability The function calls netif_set_real_num_(tx|rx)_queues, both of which should be done only under rntl lock. Unfortunately the i40evf_init_task did not hold the rtnl_lock as necessary. This patch adds the locking needed. Change-ID: Ib72a21c3ce22b71a226b16f9bbe0f5f8cc3e849b Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40evf/i40evf_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 600fb9c4a7f0..d1d4a653686a 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -1420,7 +1420,9 @@ int i40evf_init_interrupt_scheme(struct i40evf_adapter *adapter) { int err; + rtnl_lock(); err = i40evf_set_interrupt_capability(adapter); + rtnl_unlock(); if (err) { dev_err(&adapter->pdev->dev, "Unable to setup interrupt capabilities\n"); From 52a08caa0cd5b9b37020c96f2b59ce1160def8fa Mon Sep 17 00:00:00 2001 From: Dave Ertman Date: Wed, 27 Jul 2016 12:02:34 -0700 Subject: [PATCH 0277/1715] i40e: Fix static analysis tool warning This patch fixes a problem where a static analysis tool generates a warning for "INVARIANT_CONDITION: Expression 'enabled_tc' used in the condition always yields the same result." Without this patch, the driver will not pass the static analysis tool checks without generating warnings. This patch fixes the problem by eliminating the irrelevant check and redundant assignment for the value of enabled_tc. Change-ID: Ia7d44cb050f507df7de333e96369d322e08bf408 Signed-off-by: Dave Ertman Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 9f9d24f4a4ed..8c68ac1886d8 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -4617,7 +4617,7 @@ static u8 i40e_dcb_get_enabled_tc(struct i40e_dcbx_config *dcbcfg) static u8 i40e_pf_get_num_tc(struct i40e_pf *pf) { struct i40e_hw *hw = &pf->hw; - u8 i, enabled_tc; + u8 i, enabled_tc = 1; u8 num_tc = 0; struct i40e_dcbx_config *dcbcfg = &hw->local_dcbx_config; @@ -4635,8 +4635,6 @@ static u8 i40e_pf_get_num_tc(struct i40e_pf *pf) else return 1; /* Only TC0 */ - /* At least have TC0 */ - enabled_tc = (enabled_tc ? enabled_tc : 0x1); for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { if (enabled_tc & BIT(i)) num_tc++; From 59826d9becf76d949d09173e2c41e4049f4ca763 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 27 Jul 2016 12:02:35 -0700 Subject: [PATCH 0278/1715] i40e: don't allow reduction of channels below active FD rules If a driver is unable to maintain all current user supplied settings from ethtool (or other sources), it is not ok for a user request to succeed and silently trample over previous configuration. To that end, if you change the number of channels, it must not be allowed to reduce the number of channels (queues) below the current flow director filter rules targets. In this case, return -EINVAL when a request to reduce the number of channels would do so. In addition log a warning to the kernel buffer explaining why we failed, and report the rules which prevent us from lowering the number of channels. Change-ID: If41464d63d7aab11cedf09e4f3aa1a69e21ffd88 Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- .../net/ethernet/intel/i40e/i40e_ethtool.c | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index c912e041d102..a49552f3bb87 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -2744,11 +2744,15 @@ static void i40e_get_channels(struct net_device *dev, static int i40e_set_channels(struct net_device *dev, struct ethtool_channels *ch) { + const u8 drop = I40E_FILTER_PROGRAM_DESC_DEST_DROP_PACKET; struct i40e_netdev_priv *np = netdev_priv(dev); unsigned int count = ch->combined_count; struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; + struct i40e_fdir_filter *rule; + struct hlist_node *node2; int new_count; + int err = 0; /* We do not support setting channels for any other VSI at present */ if (vsi->type != I40E_VSI_MAIN) @@ -2766,6 +2770,26 @@ static int i40e_set_channels(struct net_device *dev, if (count > i40e_max_channels(vsi)) return -EINVAL; + /* verify that the number of channels does not invalidate any current + * flow director rules + */ + hlist_for_each_entry_safe(rule, node2, + &pf->fdir_filter_list, fdir_node) { + if (rule->dest_ctl != drop && count <= rule->q_index) { + dev_warn(&pf->pdev->dev, + "Existing user defined filter %d assigns flow to queue %d\n", + rule->fd_id, rule->q_index); + err = -EINVAL; + } + } + + if (err) { + dev_err(&pf->pdev->dev, + "Existing filter rules must be deleted to reduce combined channel count to %d\n", + count); + return err; + } + /* update feature limits from largest to smallest supported values */ /* TODO: Flow director limit, DCB etc */ From 70df973b5eb48b19ba29105ecfecb3e50efe3c6d Mon Sep 17 00:00:00 2001 From: Avinash Dayanand Date: Wed, 27 Jul 2016 12:02:36 -0700 Subject: [PATCH 0279/1715] i40e: Force register writes to mitigate sync issues with iwarp VF driver This patch is a fix for the bug i.e. unable to create iwarp device in VF. This is a sync issue and the iwarp device open is called even before the PCI register writes are done. Forcing the PCI register writes to happen just before it exits the function. Change-ID: I60c6a2c709da89e845f2764cc50ce8b7373c8c44 Signed-off-by: Avinash Dayanand Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_client.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c index 8726269188f3..90b435cd77ec 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_client.c +++ b/drivers/net/ethernet/intel/i40e/i40e_client.c @@ -793,7 +793,8 @@ static int i40e_client_setup_qvlist(struct i40e_info *ldev, wr32(hw, I40E_PFINT_AEQCTL, reg); } } - + /* Mitigate sync problems with iwarp VF driver */ + i40e_flush(hw); return 0; err: kfree(ldev->qvlist_info); From 6a23449a2356d7d41addb0c89f512dfef455beb2 Mon Sep 17 00:00:00 2001 From: Avinash Dayanand Date: Wed, 27 Jul 2016 12:02:37 -0700 Subject: [PATCH 0280/1715] i40e: Don't notify client of VF reset during VF creation VF goes through reset path during VF creation which happens to also have notification of VF reset to client. Adding conditional check to avoid wrongly notifying VF reset during VF creation. Also changing the call order of VF enable, calling it after VF creation rather than before. Change-ID: I96eabd99deae746a2f0fc465194c886f196178ce Signed-off-by: Avinash Dayanand Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 0fa050a0cc8e..5ea659c7bab7 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -991,7 +991,9 @@ complete_reset: i40e_enable_vf_mappings(vf); set_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states); clear_bit(I40E_VF_STAT_DISABLED, &vf->vf_states); - i40e_notify_client_of_vf_reset(pf, abs_vf_id); + /* Do not notify the client during VF init */ + if (vf->pf->num_alloc_vfs) + i40e_notify_client_of_vf_reset(pf, abs_vf_id); vf->num_vlan = 0; } /* tell the VF the reset is done */ @@ -1090,7 +1092,6 @@ int i40e_alloc_vfs(struct i40e_pf *pf, u16 num_alloc_vfs) goto err_iov; } } - i40e_notify_client_of_vf_enable(pf, num_alloc_vfs); /* allocate memory */ vfs = kcalloc(num_alloc_vfs, sizeof(struct i40e_vf), GFP_KERNEL); if (!vfs) { @@ -1114,6 +1115,8 @@ int i40e_alloc_vfs(struct i40e_pf *pf, u16 num_alloc_vfs) } pf->num_alloc_vfs = num_alloc_vfs; + i40e_notify_client_of_vf_enable(pf, num_alloc_vfs); + err_alloc: if (ret) i40e_free_vfs(pf); From d8ec98646483a9e1c0c9f511236ce8b933deed17 Mon Sep 17 00:00:00 2001 From: Alan Brady Date: Wed, 27 Jul 2016 12:02:38 -0700 Subject: [PATCH 0281/1715] i40e: fix lookup table when RSS disabled/enabled This patch fixes the bug which causes RSS to continue to work after being disabled. After disabling RSS, traffic would continue to be assigned to different queues instead of falling back to a single queue. Without this patch, attempting to disable RSS would not work as expected. This patch fixes the bug by clearing the lookup table used by RSS such that all traffic is assigned to a single queue. This patch also addresses the issue of reinstating the lookup table should RSS then be re-enabled. Change-ID: Ib20c7c6a7e9f1f772bb787370f8a8c664796b141 Signed-off-by: Alan Brady Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 28 +++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 8c68ac1886d8..704cd7da52c7 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -8690,6 +8690,28 @@ bool i40e_set_ntuple(struct i40e_pf *pf, netdev_features_t features) return need_reset; } +/** + * i40e_clear_rss_lut - clear the rx hash lookup table + * @vsi: the VSI being configured + **/ +static void i40e_clear_rss_lut(struct i40e_vsi *vsi) +{ + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; + u16 vf_id = vsi->vf_id; + u8 i; + + if (vsi->type == I40E_VSI_MAIN) { + for (i = 0; i <= I40E_PFQF_HLUT_MAX_INDEX; i++) + wr32(hw, I40E_PFQF_HLUT(i), 0); + } else if (vsi->type == I40E_VSI_SRIOV) { + for (i = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++) + i40e_write_rx_ctl(hw, I40E_VFQF_HLUT1(i, vf_id), 0); + } else { + dev_err(&pf->pdev->dev, "Cannot set RSS LUT - invalid VSI type\n"); + } +} + /** * i40e_set_features - set the netdev feature flags * @netdev: ptr to the netdev being adjusted @@ -8703,6 +8725,12 @@ static int i40e_set_features(struct net_device *netdev, struct i40e_pf *pf = vsi->back; bool need_reset; + if (features & NETIF_F_RXHASH && !(netdev->features & NETIF_F_RXHASH)) + i40e_pf_config_rss(pf); + else if (!(features & NETIF_F_RXHASH) && + netdev->features & NETIF_F_RXHASH) + i40e_clear_rss_lut(vsi); + if (features & NETIF_F_HW_VLAN_CTAG_RX) i40e_vlan_stripping_enable(vsi); else From b7d2cd951f7147f979d9e82773246416dcef2e3e Mon Sep 17 00:00:00 2001 From: Mitch Williams Date: Wed, 27 Jul 2016 12:02:39 -0700 Subject: [PATCH 0282/1715] i40e: fix memory leak When we allocate memory, we must free it. It's simple courtesy. Change-ID: Id007294096fb53344f1a8b9a0f78eddf9853c5d6 Signed-off-by: Mitch Williams Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 5ea659c7bab7..68e3482b3b2b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -2318,6 +2318,7 @@ err: /* send the response back to the VF */ aq_ret = i40e_vc_send_msg_to_vf(vf, I40E_VIRTCHNL_OP_GET_RSS_HENA_CAPS, aq_ret, (u8 *)vrh, len); + kfree(vrh); return aq_ret; } From eb0dd6e4a3b3df5a733476e417ed3230d0adfc4e Mon Sep 17 00:00:00 2001 From: Carolyn Wyborny Date: Wed, 27 Jul 2016 12:02:40 -0700 Subject: [PATCH 0283/1715] i40e: Allow RSS Hash set with less than four parameters This patch implements a feature change which allows using ethtool to set RSS hash opts using less than four parameters if desired. Change-ID: I0fbb91255d81e997c456697c21ac39cc9754821b Signed-off-by: Carolyn Wyborny Signed-off-by: Kiran Patil Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e.h | 3 - .../net/ethernet/intel/i40e/i40e_ethtool.c | 204 +++++++++++------- 2 files changed, 128 insertions(+), 79 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 2a882916b4f6..8dc98c273663 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -586,9 +586,6 @@ struct i40e_vsi { /* VSI specific handlers */ irqreturn_t (*irq_handler)(int irq, void *data); - - /* current rxnfc data */ - struct ethtool_rxnfc rxnfc; /* current rss hash opts */ } ____cacheline_internodealigned_in_smp; struct i40e_netdev_priv { diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index a49552f3bb87..5bd384807476 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -2141,41 +2141,72 @@ static int i40e_set_per_queue_coalesce(struct net_device *netdev, u32 queue, **/ static int i40e_get_rss_hash_opts(struct i40e_pf *pf, struct ethtool_rxnfc *cmd) { + struct i40e_hw *hw = &pf->hw; + u8 flow_pctype = 0; + u64 i_set = 0; + cmd->data = 0; - if (pf->vsi[pf->lan_vsi]->rxnfc.data != 0) { - cmd->data = pf->vsi[pf->lan_vsi]->rxnfc.data; - cmd->flow_type = pf->vsi[pf->lan_vsi]->rxnfc.flow_type; - return 0; - } - /* Report default options for RSS on i40e */ switch (cmd->flow_type) { case TCP_V4_FLOW: + flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP; + break; case UDP_V4_FLOW: - cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; - /* fall through to add IP fields */ + flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP; + break; + case TCP_V6_FLOW: + flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV6_TCP; + break; + case UDP_V6_FLOW: + flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV6_UDP; + break; case SCTP_V4_FLOW: case AH_ESP_V4_FLOW: case AH_V4_FLOW: case ESP_V4_FLOW: case IPV4_FLOW: - cmd->data |= RXH_IP_SRC | RXH_IP_DST; - break; - case TCP_V6_FLOW: - case UDP_V6_FLOW: - cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; - /* fall through to add IP fields */ case SCTP_V6_FLOW: case AH_ESP_V6_FLOW: case AH_V6_FLOW: case ESP_V6_FLOW: case IPV6_FLOW: + /* Default is src/dest for IP, no matter the L4 hashing */ cmd->data |= RXH_IP_SRC | RXH_IP_DST; break; default: return -EINVAL; } + /* Read flow based hash input set register */ + if (flow_pctype) { + i_set = (u64)i40e_read_rx_ctl(hw, I40E_GLQF_HASH_INSET(0, + flow_pctype)) | + ((u64)i40e_read_rx_ctl(hw, I40E_GLQF_HASH_INSET(1, + flow_pctype)) << 32); + } + + /* Process bits of hash input set */ + if (i_set) { + if (i_set & I40E_L4_SRC_MASK) + cmd->data |= RXH_L4_B_0_1; + if (i_set & I40E_L4_DST_MASK) + cmd->data |= RXH_L4_B_2_3; + + if (cmd->flow_type == TCP_V4_FLOW || + cmd->flow_type == UDP_V4_FLOW) { + if (i_set & I40E_L3_SRC_MASK) + cmd->data |= RXH_IP_SRC; + if (i_set & I40E_L3_DST_MASK) + cmd->data |= RXH_IP_DST; + } else if (cmd->flow_type == TCP_V6_FLOW || + cmd->flow_type == UDP_V6_FLOW) { + if (i_set & I40E_L3_V6_SRC_MASK) + cmd->data |= RXH_IP_SRC; + if (i_set & I40E_L3_V6_DST_MASK) + cmd->data |= RXH_IP_DST; + } + } + return 0; } @@ -2317,6 +2348,51 @@ static int i40e_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, return ret; } +/** + * i40e_get_rss_hash_bits - Read RSS Hash bits from register + * @nfc: pointer to user request + * @i_setc bits currently set + * + * Returns value of bits to be set per user request + **/ +static u64 i40e_get_rss_hash_bits(struct ethtool_rxnfc *nfc, u64 i_setc) +{ + u64 i_set = i_setc; + u64 src_l3 = 0, dst_l3 = 0; + + if (nfc->data & RXH_L4_B_0_1) + i_set |= I40E_L4_SRC_MASK; + else + i_set &= ~I40E_L4_SRC_MASK; + if (nfc->data & RXH_L4_B_2_3) + i_set |= I40E_L4_DST_MASK; + else + i_set &= ~I40E_L4_DST_MASK; + + if (nfc->flow_type == TCP_V6_FLOW || nfc->flow_type == UDP_V6_FLOW) { + src_l3 = I40E_L3_V6_SRC_MASK; + dst_l3 = I40E_L3_V6_DST_MASK; + } else if (nfc->flow_type == TCP_V4_FLOW || + nfc->flow_type == UDP_V4_FLOW) { + src_l3 = I40E_L3_SRC_MASK; + dst_l3 = I40E_L3_DST_MASK; + } else { + /* Any other flow type are not supported here */ + return i_set; + } + + if (nfc->data & RXH_IP_SRC) + i_set |= src_l3; + else + i_set &= ~src_l3; + if (nfc->data & RXH_IP_DST) + i_set |= dst_l3; + else + i_set &= ~dst_l3; + + return i_set; +} + /** * i40e_set_rss_hash_opt - Enable/Disable flow types for RSS hash * @pf: pointer to the physical function struct @@ -2329,6 +2405,8 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc) struct i40e_hw *hw = &pf->hw; u64 hena = (u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(0)) | ((u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1)) << 32); + u8 flow_pctype = 0; + u64 i_set, i_setc; /* RSS does not support anything other than hashing * to queues on src and dst IPs and ports @@ -2337,75 +2415,39 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc) RXH_L4_B_0_1 | RXH_L4_B_2_3)) return -EINVAL; - /* We need at least the IP SRC and DEST fields for hashing */ - if (!(nfc->data & RXH_IP_SRC) || - !(nfc->data & RXH_IP_DST)) - return -EINVAL; - switch (nfc->flow_type) { case TCP_V4_FLOW: - switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { - case 0: - return -EINVAL; - case (RXH_L4_B_0_1 | RXH_L4_B_2_3): - if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE) - hena |= - BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK); - - hena |= BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP); - break; - default: - return -EINVAL; - } + flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP; + if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE) + hena |= + BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK); break; case TCP_V6_FLOW: - switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { - case 0: - return -EINVAL; - case (RXH_L4_B_0_1 | RXH_L4_B_2_3): - if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE) - hena |= - BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK); - - hena |= BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP); - break; - default: - return -EINVAL; - } + flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV6_TCP; + if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE) + hena |= + BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK); + if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE) + hena |= + BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK); break; case UDP_V4_FLOW: - switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { - case 0: - return -EINVAL; - case (RXH_L4_B_0_1 | RXH_L4_B_2_3): - if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE) - hena |= - BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP) | - BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP); + flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP; + if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE) + hena |= + BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP) | + BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP); - hena |= (BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_UDP) | - BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV4)); - break; - default: - return -EINVAL; - } + hena |= BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV4); break; case UDP_V6_FLOW: - switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { - case 0: - return -EINVAL; - case (RXH_L4_B_0_1 | RXH_L4_B_2_3): - if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE) - hena |= - BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) | - BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP); + flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV6_UDP; + if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE) + hena |= + BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) | + BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP); - hena |= (BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_UDP) | - BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV6)); - break; - default: - return -EINVAL; - } + hena |= BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV6); break; case AH_ESP_V4_FLOW: case AH_V4_FLOW: @@ -2437,13 +2479,23 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc) return -EINVAL; } + if (flow_pctype) { + i_setc = (u64)i40e_read_rx_ctl(hw, I40E_GLQF_HASH_INSET(0, + flow_pctype)) | + ((u64)i40e_read_rx_ctl(hw, I40E_GLQF_HASH_INSET(1, + flow_pctype)) << 32); + i_set = i40e_get_rss_hash_bits(nfc, i_setc); + i40e_write_rx_ctl(hw, I40E_GLQF_HASH_INSET(0, flow_pctype), + (u32)i_set); + i40e_write_rx_ctl(hw, I40E_GLQF_HASH_INSET(1, flow_pctype), + (u32)(i_set >> 32)); + hena |= BIT_ULL(flow_pctype); + } + i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), (u32)hena); i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), (u32)(hena >> 32)); i40e_flush(hw); - /* Save setting for future output/update */ - pf->vsi[pf->lan_vsi]->rxnfc = *nfc; - return 0; } From 93e6fa2c34bef36fc64be44e501c6ba729671d8d Mon Sep 17 00:00:00 2001 From: Bimmy Pujari Date: Wed, 27 Jul 2016 12:02:41 -0700 Subject: [PATCH 0284/1715] i40e/i40evf-Bump version from 1.6.11 to 1.6.12 Signed-off-by: Bimmy Pujari Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +- drivers/net/ethernet/intel/i40evf/i40evf_main.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 704cd7da52c7..798c4e2dc621 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -41,7 +41,7 @@ static const char i40e_driver_string[] = #define DRV_VERSION_MAJOR 1 #define DRV_VERSION_MINOR 6 -#define DRV_VERSION_BUILD 11 +#define DRV_VERSION_BUILD 12 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ __stringify(DRV_VERSION_MINOR) "." \ __stringify(DRV_VERSION_BUILD) DRV_KERN diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index d1d4a653686a..ba046d1e25e4 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -38,7 +38,7 @@ static const char i40evf_driver_string[] = #define DRV_VERSION_MAJOR 1 #define DRV_VERSION_MINOR 6 -#define DRV_VERSION_BUILD 11 +#define DRV_VERSION_BUILD 12 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ __stringify(DRV_VERSION_MINOR) "." \ __stringify(DRV_VERSION_BUILD) \ From be0cb0a66a1d426f7cd9b170c008725044ae147a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 3 Aug 2016 22:01:07 +0300 Subject: [PATCH 0285/1715] i40e: remove a stray unlock We shifted the locking around a bit but forgot to delete this unlock so now it can unlock twice. Fixes: cd3be169a5ff ('i40e: Move the mutex lock in i40e_client_unregister') Signed-off-by: Dan Carpenter Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_client.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c index 90b435cd77ec..677dae54918d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_client.c +++ b/drivers/net/ethernet/intel/i40e/i40e_client.c @@ -992,7 +992,6 @@ int i40e_unregister_client(struct i40e_client *client) if (!i40e_client_is_registered(client)) { pr_info("i40e: Client %s has not been registered\n", client->name); - mutex_unlock(&i40e_client_mutex); ret = -ENODEV; goto out; } From d40062f3c48612f075f99c58206ab8023eca1029 Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Thu, 4 Aug 2016 18:45:47 +0200 Subject: [PATCH 0286/1715] i40e: Expose 'trust' flag to userspace via ndo_get_vf_config. This enables ip -d l to indicate if trust is on or off for VFs. Signed-off-by: Sridhar Samudrala Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 68e3482b3b2b..da3423561b3a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -3000,6 +3000,7 @@ int i40e_ndo_get_vf_config(struct net_device *netdev, else ivi->linkstate = IFLA_VF_LINK_STATE_DISABLE; ivi->spoofchk = vf->spoofchk; + ivi->trusted = vf->trusted; ret = 0; error_param: From aead88bd0e990540c03df8108671c93b35354736 Mon Sep 17 00:00:00 2001 From: "shubhrajyoti.datta@xilinx.com" Date: Tue, 16 Aug 2016 10:14:50 +0530 Subject: [PATCH 0287/1715] net: ethernet: macb: Add support for rx_clk Some of the platforms like zynqmp ultrascale+ has a separate clock gate for the rx clock. Add an optional rx_clk so that the clock can be enabled. Signed-off-by: Shubhrajyoti Datta Acked-by: Nicolas Ferre Acked-by: Rob Herring Signed-off-by: David S. Miller --- .../devicetree/bindings/net/macb.txt | 1 + drivers/net/ethernet/cadence/macb.c | 31 ++++++++++++++++--- drivers/net/ethernet/cadence/macb.h | 4 ++- 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/Documentation/devicetree/bindings/net/macb.txt b/Documentation/devicetree/bindings/net/macb.txt index b5a42df4c928..1506e948610c 100644 --- a/Documentation/devicetree/bindings/net/macb.txt +++ b/Documentation/devicetree/bindings/net/macb.txt @@ -21,6 +21,7 @@ Required properties: - clock-names: Tuple listing input clock names. Required elements: 'pclk', 'hclk' Optional elements: 'tx_clk' + Optional elements: 'rx_clk' applies to cdns,zynqmp-gem - clocks: Phandles to input clocks. Optional properties for PHY child node: diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index dbce938e7d0c..32568392b9f9 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -2332,7 +2332,8 @@ static void macb_probe_queues(void __iomem *mem, } static int macb_clk_init(struct platform_device *pdev, struct clk **pclk, - struct clk **hclk, struct clk **tx_clk) + struct clk **hclk, struct clk **tx_clk, + struct clk **rx_clk) { int err; @@ -2354,6 +2355,10 @@ static int macb_clk_init(struct platform_device *pdev, struct clk **pclk, if (IS_ERR(*tx_clk)) *tx_clk = NULL; + *rx_clk = devm_clk_get(&pdev->dev, "rx_clk"); + if (IS_ERR(*rx_clk)) + *rx_clk = NULL; + err = clk_prepare_enable(*pclk); if (err) { dev_err(&pdev->dev, "failed to enable pclk (%u)\n", err); @@ -2372,8 +2377,17 @@ static int macb_clk_init(struct platform_device *pdev, struct clk **pclk, goto err_disable_hclk; } + err = clk_prepare_enable(*rx_clk); + if (err) { + dev_err(&pdev->dev, "failed to enable rx_clk (%u)\n", err); + goto err_disable_txclk; + } + return 0; +err_disable_txclk: + clk_disable_unprepare(*tx_clk); + err_disable_hclk: clk_disable_unprepare(*hclk); @@ -2763,12 +2777,14 @@ static const struct net_device_ops at91ether_netdev_ops = { }; static int at91ether_clk_init(struct platform_device *pdev, struct clk **pclk, - struct clk **hclk, struct clk **tx_clk) + struct clk **hclk, struct clk **tx_clk, + struct clk **rx_clk) { int err; *hclk = NULL; *tx_clk = NULL; + *rx_clk = NULL; *pclk = devm_clk_get(&pdev->dev, "ether_clk"); if (IS_ERR(*pclk)) @@ -2892,13 +2908,13 @@ MODULE_DEVICE_TABLE(of, macb_dt_ids); static int macb_probe(struct platform_device *pdev) { int (*clk_init)(struct platform_device *, struct clk **, - struct clk **, struct clk **) + struct clk **, struct clk **, struct clk **) = macb_clk_init; int (*init)(struct platform_device *) = macb_init; struct device_node *np = pdev->dev.of_node; struct device_node *phy_node; const struct macb_config *macb_config = NULL; - struct clk *pclk, *hclk = NULL, *tx_clk = NULL; + struct clk *pclk, *hclk = NULL, *tx_clk = NULL, *rx_clk = NULL; unsigned int queue_mask, num_queues; struct macb_platform_data *pdata; bool native_io; @@ -2926,7 +2942,7 @@ static int macb_probe(struct platform_device *pdev) } } - err = clk_init(pdev, &pclk, &hclk, &tx_clk); + err = clk_init(pdev, &pclk, &hclk, &tx_clk, &rx_clk); if (err) return err; @@ -2962,6 +2978,7 @@ static int macb_probe(struct platform_device *pdev) bp->pclk = pclk; bp->hclk = hclk; bp->tx_clk = tx_clk; + bp->rx_clk = rx_clk; if (macb_config) bp->jumbo_max_len = macb_config->jumbo_max_len; @@ -3060,6 +3077,7 @@ err_disable_clocks: clk_disable_unprepare(tx_clk); clk_disable_unprepare(hclk); clk_disable_unprepare(pclk); + clk_disable_unprepare(rx_clk); return err; } @@ -3086,6 +3104,7 @@ static int macb_remove(struct platform_device *pdev) clk_disable_unprepare(bp->tx_clk); clk_disable_unprepare(bp->hclk); clk_disable_unprepare(bp->pclk); + clk_disable_unprepare(bp->rx_clk); free_netdev(dev); } @@ -3109,6 +3128,7 @@ static int __maybe_unused macb_suspend(struct device *dev) clk_disable_unprepare(bp->tx_clk); clk_disable_unprepare(bp->hclk); clk_disable_unprepare(bp->pclk); + clk_disable_unprepare(bp->rx_clk); } return 0; @@ -3128,6 +3148,7 @@ static int __maybe_unused macb_resume(struct device *dev) clk_prepare_enable(bp->pclk); clk_prepare_enable(bp->hclk); clk_prepare_enable(bp->tx_clk); + clk_prepare_enable(bp->rx_clk); } netif_device_attach(netdev); diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index aa3aeecc3132..8bed4b52fef5 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -772,7 +772,8 @@ struct macb_config { u32 caps; unsigned int dma_burst_length; int (*clk_init)(struct platform_device *pdev, struct clk **pclk, - struct clk **hclk, struct clk **tx_clk); + struct clk **hclk, struct clk **tx_clk, + struct clk **rx_clk); int (*init)(struct platform_device *pdev); int jumbo_max_len; }; @@ -819,6 +820,7 @@ struct macb { struct clk *pclk; struct clk *hclk; struct clk *tx_clk; + struct clk *rx_clk; struct net_device *dev; struct napi_struct napi; struct net_device_stats stats; From e202d4c6350506e036bbd63a1fca1400960c8ab6 Mon Sep 17 00:00:00 2001 From: Appana Durga Kedareswara Rao Date: Tue, 16 Aug 2016 11:58:29 +0530 Subject: [PATCH 0288/1715] net: phy: Update copyright info For implementing this driver most of the inputs is provided by Andrew Lunn. Updating the driver with Andrew Copy right. Signed-off-by: Kedareswara rao Appana Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/xilinx_gmii2rgmii.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/phy/xilinx_gmii2rgmii.c b/drivers/net/phy/xilinx_gmii2rgmii.c index cad6e19724e2..e7a20ec2d8f7 100644 --- a/drivers/net/phy/xilinx_gmii2rgmii.c +++ b/drivers/net/phy/xilinx_gmii2rgmii.c @@ -1,7 +1,9 @@ /* Xilinx GMII2RGMII Converter driver * * Copyright (C) 2016 Xilinx, Inc. + * Copyright (C) 2016 Andrew Lunn * + * Author: Andrew Lunn * Author: Kedareswara rao Appana * * Description: From 0d135e4f263af7da3c2fbe5f99b377cf483b6adf Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 16 Aug 2016 12:08:42 +0100 Subject: [PATCH 0289/1715] net: atm: remove redundant null pointer check on dev->name dev->name is a char array of IFNAMSIZ elements, hence can never be null, so the null pointer check is redundant. Remove it. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- net/atm/mpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/atm/mpc.c b/net/atm/mpc.c index 0e982222d425..3b3b1a292ec8 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -1007,7 +1007,7 @@ static int mpoa_event_listener(struct notifier_block *mpoa_notifier, if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; - if (dev->name == NULL || strncmp(dev->name, "lec", 3)) + if (strncmp(dev->name, "lec", 3)) return NOTIFY_DONE; /* we are only interested in lec:s */ switch (event) { From 1a5a366f083af014fb2eedca1993c6ce6496224c Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Tue, 16 Aug 2016 10:51:01 -0400 Subject: [PATCH 0290/1715] qede: Add support for capturing additional stats in ethtool-stats display. The patch adds driver support for capturing stats ttl0_discard and packet_too_big_discard in "ethtool -S" display. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede.h | 2 ++ drivers/net/ethernet/qlogic/qede/qede_ethtool.c | 2 ++ drivers/net/ethernet/qlogic/qede/qede_main.c | 2 ++ 3 files changed, 6 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index 02b06d4e40ae..9060c2ad9d62 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -36,6 +36,8 @@ struct qede_stats { u64 no_buff_discards; + u64 packet_too_big_discard; + u64 ttl0_discard; u64 rx_ucast_bytes; u64 rx_mcast_bytes; u64 rx_bcast_bytes; diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index 427e043a033f..c035e3c2349d 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -107,6 +107,8 @@ static const struct { QEDE_PF_STAT(mftag_filter_discards), QEDE_PF_STAT(mac_filter_discards), QEDE_STAT(tx_err_drop_pkts), + QEDE_STAT(ttl0_discard), + QEDE_STAT(packet_too_big_discard), QEDE_STAT(coalesced_pkts), QEDE_STAT(coalesced_events), diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 5ce8a3c9ed6f..467d0eb043df 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -1694,6 +1694,8 @@ void qede_fill_by_demand_stats(struct qede_dev *edev) edev->ops->get_vport_stats(edev->cdev, &stats); edev->stats.no_buff_discards = stats.no_buff_discards; + edev->stats.packet_too_big_discard = stats.packet_too_big_discard; + edev->stats.ttl0_discard = stats.ttl0_discard; edev->stats.rx_ucast_bytes = stats.rx_ucast_bytes; edev->stats.rx_mcast_bytes = stats.rx_mcast_bytes; edev->stats.rx_bcast_bytes = stats.rx_bcast_bytes; From 68db9ec2df073f0d15351e0cb7de81d0a322f456 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Tue, 16 Aug 2016 10:51:02 -0400 Subject: [PATCH 0291/1715] qede: Add support for per-queue stats. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede.h | 3 + .../net/ethernet/qlogic/qede/qede_ethtool.c | 63 +++++++++++++++---- drivers/net/ethernet/qlogic/qede/qede_main.c | 4 ++ 3 files changed, 57 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index 9060c2ad9d62..81a63cf0a998 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -237,6 +237,7 @@ struct qede_rx_queue { u16 num_rx_buffers; u16 rxq_id; + u64 rcv_pkts; u64 rx_hw_errors; u64 rx_alloc_errors; u64 rx_ip_frags; @@ -265,6 +266,8 @@ struct qede_tx_queue { union db_prod tx_db; u16 num_tx_buffers; + u64 xmit_pkts; + u64 stopped_cnt; }; #define BD_UNMAP_ADDR(bd) HILO_U64(le32_to_cpu((bd)->addr.hi), \ diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index c035e3c2349d..6e17ee10e748 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -35,6 +35,7 @@ static const struct { u64 offset; char string[ETH_GSTRING_LEN]; } qede_rqstats_arr[] = { + QEDE_RQSTAT(rcv_pkts), QEDE_RQSTAT(rx_hw_errors), QEDE_RQSTAT(rx_alloc_errors), QEDE_RQSTAT(rx_ip_frags), @@ -44,6 +45,24 @@ static const struct { #define QEDE_RQSTATS_DATA(dev, sindex, rqindex) \ (*((u64 *)(((char *)(dev->fp_array[(rqindex)].rxq)) +\ qede_rqstats_arr[(sindex)].offset))) +#define QEDE_TQSTAT_OFFSET(stat_name) \ + (offsetof(struct qede_tx_queue, stat_name)) +#define QEDE_TQSTAT_STRING(stat_name) (#stat_name) +#define QEDE_TQSTAT(stat_name) \ + {QEDE_TQSTAT_OFFSET(stat_name), QEDE_TQSTAT_STRING(stat_name)} +#define QEDE_NUM_TQSTATS ARRAY_SIZE(qede_tqstats_arr) +static const struct { + u64 offset; + char string[ETH_GSTRING_LEN]; +} qede_tqstats_arr[] = { + QEDE_TQSTAT(xmit_pkts), + QEDE_TQSTAT(stopped_cnt), +}; + +#define QEDE_TQSTATS_DATA(dev, sindex, tssid, tcid) \ + (*((u64 *)(((u64)(&dev->fp_array[tssid].txqs[tcid])) +\ + qede_tqstats_arr[(sindex)].offset))) + static const struct { u64 offset; char string[ETH_GSTRING_LEN]; @@ -153,17 +172,29 @@ static void qede_get_strings_stats(struct qede_dev *edev, u8 *buf) { int i, j, k; + for (i = 0, k = 0; i < edev->num_rss; i++) { + int tc; + + for (j = 0; j < QEDE_NUM_RQSTATS; j++) + sprintf(buf + (k + j) * ETH_GSTRING_LEN, + "%d: %s", i, qede_rqstats_arr[j].string); + k += QEDE_NUM_RQSTATS; + for (tc = 0; tc < edev->num_tc; tc++) { + for (j = 0; j < QEDE_NUM_TQSTATS; j++) + sprintf(buf + (k + j) * ETH_GSTRING_LEN, + "%d.%d: %s", i, tc, + qede_tqstats_arr[j].string); + k += QEDE_NUM_TQSTATS; + } + } + for (i = 0, j = 0; i < QEDE_NUM_STATS; i++) { if (IS_VF(edev) && qede_stats_arr[i].pf_only) continue; - strcpy(buf + j * ETH_GSTRING_LEN, + strcpy(buf + (k + j) * ETH_GSTRING_LEN, qede_stats_arr[i].string); j++; } - - for (k = 0; k < QEDE_NUM_RQSTATS; k++, j++) - strcpy(buf + j * ETH_GSTRING_LEN, - qede_rqstats_arr[k].string); } static void qede_get_strings(struct net_device *dev, u32 stringset, u8 *buf) @@ -199,19 +230,24 @@ static void qede_get_ethtool_stats(struct net_device *dev, mutex_lock(&edev->qede_lock); + for (qid = 0; qid < edev->num_rss; qid++) { + int tc; + + for (sidx = 0; sidx < QEDE_NUM_RQSTATS; sidx++) + buf[cnt++] = QEDE_RQSTATS_DATA(edev, sidx, qid); + for (tc = 0; tc < edev->num_tc; tc++) { + for (sidx = 0; sidx < QEDE_NUM_TQSTATS; sidx++) + buf[cnt++] = QEDE_TQSTATS_DATA(edev, sidx, qid, + tc); + } + } + for (sidx = 0; sidx < QEDE_NUM_STATS; sidx++) { if (IS_VF(edev) && qede_stats_arr[sidx].pf_only) continue; buf[cnt++] = QEDE_STATS_DATA(edev, sidx); } - for (sidx = 0; sidx < QEDE_NUM_RQSTATS; sidx++) { - buf[cnt] = 0; - for (qid = 0; qid < edev->num_rss; qid++) - buf[cnt] += QEDE_RQSTATS_DATA(edev, sidx, qid); - cnt++; - } - mutex_unlock(&edev->qede_lock); } @@ -229,7 +265,8 @@ static int qede_get_sset_count(struct net_device *dev, int stringset) if (qede_stats_arr[i].pf_only) num_stats--; } - return num_stats + QEDE_NUM_RQSTATS; + return num_stats + edev->num_rss * + (QEDE_NUM_RQSTATS + QEDE_NUM_TQSTATS * edev->num_tc); case ETH_SS_PRIV_FLAGS: return QEDE_PRI_FLAG_LEN; case ETH_SS_TEST: diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 467d0eb043df..a05459f96962 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -720,6 +720,7 @@ static netdev_tx_t qede_start_xmit(struct sk_buff *skb, if (unlikely(qed_chain_get_elem_left(&txq->tx_pbl) < (MAX_SKB_FRAGS + 1))) { netif_tx_stop_queue(netdev_txq); + txq->stopped_cnt++; DP_VERBOSE(edev, NETIF_MSG_TX_QUEUED, "Stop queue was called\n"); /* paired memory barrier is in qede_tx_int(), we have to keep @@ -779,6 +780,7 @@ static int qede_tx_int(struct qede_dev *edev, struct qede_tx_queue *txq) bytes_compl += len; pkts_compl++; txq->sw_tx_cons++; + txq->xmit_pkts++; } netdev_tx_completed_queue(netdev_txq, pkts_compl, bytes_compl); @@ -1587,6 +1589,8 @@ next_cqe: /* don't consume bd rx buffer */ /* Update producers */ qede_update_rx_prod(edev, rxq); + rxq->rcv_pkts += rx_pkt; + return rx_pkt; } From 6c75424612a724d842f45d9d48cb648a73f184ac Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Tue, 16 Aug 2016 10:51:03 -0400 Subject: [PATCH 0292/1715] qed: Add support for NCSI statistics. The patch adds driver support for sending the NCSI statistics to the MFW. This is an asynchronous request from MFW. Upon receiving this, driver populates the required data and send it to MFW. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed.h | 6 ++- drivers/net/ethernet/qlogic/qed/qed_hsi.h | 14 +++++-- drivers/net/ethernet/qlogic/qed/qed_l2.h | 2 + drivers/net/ethernet/qlogic/qed/qed_main.c | 21 ++++++++++ drivers/net/ethernet/qlogic/qed/qed_mcp.c | 48 ++++++++++++++++++++++ drivers/net/ethernet/qlogic/qed/qed_mcp.h | 41 ++++++++++++++++++ 6 files changed, 127 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index 35e53771533f..d0beb5d10a02 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -42,6 +42,8 @@ enum qed_coalescing_mode { struct qed_eth_cb_ops; struct qed_dev_info; +union qed_mcp_protocol_stats; +enum qed_mcp_protocol_type; /* helpers */ static inline u32 qed_db_addr(u32 cid, u32 DEMS) @@ -597,7 +599,9 @@ void qed_link_update(struct qed_hwfn *hwfn); u32 qed_unzip_data(struct qed_hwfn *p_hwfn, u32 input_len, u8 *input_buf, u32 max_size, u8 *unzip_buf); - +void qed_get_protocol_stats(struct qed_dev *cdev, + enum qed_mcp_protocol_type type, + union qed_mcp_protocol_stats *stats); int qed_slowpath_irq_req(struct qed_hwfn *hwfn); #endif /* _QED_H */ diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h index 6f9d3b831a2a..acf5da39ecb0 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h +++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h @@ -7239,6 +7239,12 @@ struct public_drv_mb { #define DRV_MSG_CODE_MCP_RESET 0x00090000 #define DRV_MSG_CODE_SET_VERSION 0x000f0000 +#define DRV_MSG_CODE_GET_STATS 0x00130000 +#define DRV_MSG_CODE_STATS_TYPE_LAN 1 +#define DRV_MSG_CODE_STATS_TYPE_FCOE 2 +#define DRV_MSG_CODE_STATS_TYPE_ISCSI 3 +#define DRV_MSG_CODE_STATS_TYPE_RDMA 4 + #define DRV_MSG_CODE_BIST_TEST 0x001e0000 #define DRV_MSG_CODE_SET_LED_MODE 0x00200000 @@ -7315,10 +7321,10 @@ enum MFW_DRV_MSG_TYPE { MFW_DRV_MSG_RESERVED4, MFW_DRV_MSG_BW_UPDATE, MFW_DRV_MSG_BW_UPDATE5, - MFW_DRV_MSG_BW_UPDATE6, - MFW_DRV_MSG_BW_UPDATE7, - MFW_DRV_MSG_BW_UPDATE8, - MFW_DRV_MSG_BW_UPDATE9, + MFW_DRV_MSG_GET_LAN_STATS, + MFW_DRV_MSG_GET_FCOE_STATS, + MFW_DRV_MSG_GET_ISCSI_STATS, + MFW_DRV_MSG_GET_RDMA_STATS, MFW_DRV_MSG_BW_UPDATE10, MFW_DRV_MSG_TRANSCEIVER_STATE_CHANGE, MFW_DRV_MSG_BW_UPDATE11, diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.h b/drivers/net/ethernet/qlogic/qed/qed_l2.h index 002114543451..ff3a198d08d7 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.h +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.h @@ -213,6 +213,8 @@ qed_sp_eth_rx_queues_update(struct qed_hwfn *p_hwfn, enum spq_mode comp_mode, struct qed_spq_comp_cb *p_comp_data); +void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats); + int qed_sp_eth_vport_start(struct qed_hwfn *p_hwfn, struct qed_sp_vport_start_params *p_params); diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 54976cc2747d..32f71ee57191 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -1402,3 +1402,24 @@ const struct qed_common_ops qed_common_ops_pass = { .set_coalesce = &qed_set_coalesce, .set_led = &qed_set_led, }; + +void qed_get_protocol_stats(struct qed_dev *cdev, + enum qed_mcp_protocol_type type, + union qed_mcp_protocol_stats *stats) +{ + struct qed_eth_stats eth_stats; + + memset(stats, 0, sizeof(*stats)); + + switch (type) { + case QED_MCP_LAN_STATS: + qed_get_vport_stats(cdev, ð_stats); + stats->lan_stats.ucast_rx_pkts = eth_stats.rx_ucast_pkts; + stats->lan_stats.ucast_tx_pkts = eth_stats.tx_ucast_pkts; + stats->lan_stats.fcs_err = -1; + break; + default: + DP_ERR(cdev, "Invalid protocol type = %d\n", type); + return; + } +} diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index 88b448bb2949..b07bc810af92 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -713,6 +713,48 @@ int qed_mcp_set_link(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, bool b_up) return 0; } +static void qed_mcp_send_protocol_stats(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + enum MFW_DRV_MSG_TYPE type) +{ + enum qed_mcp_protocol_type stats_type; + union qed_mcp_protocol_stats stats; + struct qed_mcp_mb_params mb_params; + union drv_union_data union_data; + u32 hsi_param; + + switch (type) { + case MFW_DRV_MSG_GET_LAN_STATS: + stats_type = QED_MCP_LAN_STATS; + hsi_param = DRV_MSG_CODE_STATS_TYPE_LAN; + break; + case MFW_DRV_MSG_GET_FCOE_STATS: + stats_type = QED_MCP_FCOE_STATS; + hsi_param = DRV_MSG_CODE_STATS_TYPE_FCOE; + break; + case MFW_DRV_MSG_GET_ISCSI_STATS: + stats_type = QED_MCP_ISCSI_STATS; + hsi_param = DRV_MSG_CODE_STATS_TYPE_ISCSI; + break; + case MFW_DRV_MSG_GET_RDMA_STATS: + stats_type = QED_MCP_RDMA_STATS; + hsi_param = DRV_MSG_CODE_STATS_TYPE_RDMA; + break; + default: + DP_NOTICE(p_hwfn, "Invalid protocol type %d\n", type); + return; + } + + qed_get_protocol_stats(p_hwfn->cdev, stats_type, &stats); + + memset(&mb_params, 0, sizeof(mb_params)); + mb_params.cmd = DRV_MSG_CODE_GET_STATS; + mb_params.param = hsi_param; + memcpy(&union_data, &stats, sizeof(stats)); + mb_params.p_data_src = &union_data; + qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params); +} + static void qed_read_pf_bandwidth(struct qed_hwfn *p_hwfn, struct public_func *p_shmem_info) { @@ -854,6 +896,12 @@ int qed_mcp_handle_events(struct qed_hwfn *p_hwfn, case MFW_DRV_MSG_TRANSCEIVER_STATE_CHANGE: qed_mcp_handle_transceiver_change(p_hwfn, p_ptt); break; + case MFW_DRV_MSG_GET_LAN_STATS: + case MFW_DRV_MSG_GET_FCOE_STATS: + case MFW_DRV_MSG_GET_ISCSI_STATS: + case MFW_DRV_MSG_GET_RDMA_STATS: + qed_mcp_send_protocol_stats(p_hwfn, p_ptt, i); + break; case MFW_DRV_MSG_BW_UPDATE: qed_mcp_update_bw(p_hwfn, p_ptt); break; diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h index 013d1b92ed63..3324ef06f358 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h @@ -106,6 +106,47 @@ struct qed_mcp_drv_version { u8 name[MCP_DRV_VER_STR_SIZE - 4]; }; +struct qed_mcp_lan_stats { + u64 ucast_rx_pkts; + u64 ucast_tx_pkts; + u32 fcs_err; +}; + +struct qed_mcp_fcoe_stats { + u64 rx_pkts; + u64 tx_pkts; + u32 fcs_err; + u32 login_failure; +}; + +struct qed_mcp_iscsi_stats { + u64 rx_pdus; + u64 tx_pdus; + u64 rx_bytes; + u64 tx_bytes; +}; + +struct qed_mcp_rdma_stats { + u64 rx_pkts; + u64 tx_pkts; + u64 rx_bytes; + u64 tx_byts; +}; + +enum qed_mcp_protocol_type { + QED_MCP_LAN_STATS, + QED_MCP_FCOE_STATS, + QED_MCP_ISCSI_STATS, + QED_MCP_RDMA_STATS +}; + +union qed_mcp_protocol_stats { + struct qed_mcp_lan_stats lan_stats; + struct qed_mcp_fcoe_stats fcoe_stats; + struct qed_mcp_iscsi_stats iscsi_stats; + struct qed_mcp_rdma_stats rdma_stats; +}; + /** * @brief - returns the link params of the hw function * From 0d051bf93c0640483788db56dfc118d307f8893b Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Tue, 16 Aug 2016 11:53:50 -0400 Subject: [PATCH 0293/1715] tipc: make bearer packet filtering generic In commit 5b7066c3dd24 ("tipc: stricter filtering of packets in bearer layer") we introduced a method of filtering out messages while a bearer is being reset, to avoid that links may be re-created and come back in working state while we are still in the process of shutting them down. This solution works well, but is limited to only work with L2 media, which is insufficient with the increasing use of UDP as carrier media. We now replace this solution with a more generic one, by introducing a new flag "up" in the generic struct tipc_bearer. This field will be set and reset at the same locations as with the previous solution, while the packet filtering is moved to the generic code for the sending side. On the receiving side, the filtering is still done in media specific code, but now including the UDP bearer. Acked-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bearer.c | 78 +++++++++++++++++++++++--------------------- net/tipc/bearer.h | 1 + net/tipc/udp_media.c | 2 +- 3 files changed, 42 insertions(+), 39 deletions(-) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 65b1bbf133bd..6fc4e3cca49a 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -56,6 +56,13 @@ static struct tipc_media * const media_info_array[] = { NULL }; +static struct tipc_bearer *bearer_get(struct net *net, int bearer_id) +{ + struct tipc_net *tn = tipc_net(net); + + return rcu_dereference_rtnl(tn->bearer_list[bearer_id]); +} + static void bearer_disable(struct net *net, struct tipc_bearer *b); /** @@ -323,6 +330,7 @@ restart: b->domain = disc_domain; b->net_plane = bearer_id + 'A'; b->priority = priority; + test_and_set_bit_lock(0, &b->up); res = tipc_disc_create(net, b, &b->bcast_addr, &skb); if (res) { @@ -360,15 +368,24 @@ static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b) */ void tipc_bearer_reset_all(struct net *net) { - struct tipc_net *tn = tipc_net(net); struct tipc_bearer *b; int i; for (i = 0; i < MAX_BEARERS; i++) { - b = rcu_dereference_rtnl(tn->bearer_list[i]); + b = bearer_get(net, i); + if (b) + clear_bit_unlock(0, &b->up); + } + for (i = 0; i < MAX_BEARERS; i++) { + b = bearer_get(net, i); if (b) tipc_reset_bearer(net, b); } + for (i = 0; i < MAX_BEARERS; i++) { + b = bearer_get(net, i); + if (b) + test_and_set_bit_lock(0, &b->up); + } } /** @@ -382,8 +399,9 @@ static void bearer_disable(struct net *net, struct tipc_bearer *b) int bearer_id = b->identity; pr_info("Disabling bearer <%s>\n", b->name); - b->media->disable_media(b); + clear_bit_unlock(0, &b->up); tipc_node_delete_links(net, bearer_id); + b->media->disable_media(b); RCU_INIT_POINTER(b->media_ptr, NULL); if (b->link_req) tipc_disc_delete(b->link_req); @@ -440,22 +458,16 @@ int tipc_l2_send_msg(struct net *net, struct sk_buff *skb, { struct net_device *dev; int delta; - void *tipc_ptr; dev = (struct net_device *)rcu_dereference_rtnl(b->media_ptr); if (!dev) return 0; - /* Send RESET message even if bearer is detached from device */ - tipc_ptr = rcu_dereference_rtnl(dev->tipc_ptr); - if (unlikely(!tipc_ptr && !msg_is_reset(buf_msg(skb)))) - goto drop; - - delta = dev->hard_header_len - skb_headroom(skb); - if ((delta > 0) && - pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) - goto drop; - + delta = SKB_DATA_ALIGN(dev->hard_header_len - skb_headroom(skb)); + if ((delta > 0) && pskb_expand_head(skb, delta, 0, GFP_ATOMIC)) { + kfree_skb(skb); + return 0; + } skb_reset_network_header(skb); skb->dev = dev; skb->protocol = htons(ETH_P_TIPC); @@ -463,9 +475,6 @@ int tipc_l2_send_msg(struct net *net, struct sk_buff *skb, dev->dev_addr, skb->len); dev_queue_xmit(skb); return 0; -drop: - kfree_skb(skb); - return 0; } int tipc_bearer_mtu(struct net *net, u32 bearer_id) @@ -487,12 +496,12 @@ void tipc_bearer_xmit_skb(struct net *net, u32 bearer_id, struct sk_buff *skb, struct tipc_media_addr *dest) { - struct tipc_net *tn = tipc_net(net); + struct tipc_msg *hdr = buf_msg(skb); struct tipc_bearer *b; rcu_read_lock(); - b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); - if (likely(b)) + b = bearer_get(net, bearer_id); + if (likely(b && (test_bit(0, &b->up) || msg_is_reset(hdr)))) b->media->send_msg(net, skb, b, dest); else kfree_skb(skb); @@ -505,7 +514,6 @@ void tipc_bearer_xmit(struct net *net, u32 bearer_id, struct sk_buff_head *xmitq, struct tipc_media_addr *dst) { - struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_bearer *b; struct sk_buff *skb, *tmp; @@ -513,12 +521,15 @@ void tipc_bearer_xmit(struct net *net, u32 bearer_id, return; rcu_read_lock(); - b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); + b = bearer_get(net, bearer_id); if (unlikely(!b)) __skb_queue_purge(xmitq); skb_queue_walk_safe(xmitq, skb, tmp) { __skb_dequeue(xmitq); - b->media->send_msg(net, skb, b, dst); + if (likely(test_bit(0, &b->up) || msg_is_reset(buf_msg(skb)))) + b->media->send_msg(net, skb, b, dst); + else + kfree(skb); } rcu_read_unlock(); } @@ -535,8 +546,8 @@ void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id, struct tipc_msg *hdr; rcu_read_lock(); - b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); - if (unlikely(!b)) + b = bearer_get(net, bearer_id); + if (unlikely(!b || !test_bit(0, &b->up))) __skb_queue_purge(xmitq); skb_queue_walk_safe(xmitq, skb, tmp) { hdr = buf_msg(skb); @@ -566,7 +577,8 @@ static int tipc_l2_rcv_msg(struct sk_buff *skb, struct net_device *dev, rcu_read_lock(); b = rcu_dereference_rtnl(dev->tipc_ptr); - if (likely(b && (skb->pkt_type <= PACKET_BROADCAST))) { + if (likely(b && test_bit(0, &b->up) && + (skb->pkt_type <= PACKET_BROADCAST))) { skb->next = NULL; tipc_rcv(dev_net(dev), skb, b); rcu_read_unlock(); @@ -591,18 +603,9 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); - struct tipc_net *tn = tipc_net(net); struct tipc_bearer *b; - int i; b = rtnl_dereference(dev->tipc_ptr); - if (!b) { - for (i = 0; i < MAX_BEARERS; b = NULL, i++) { - b = rtnl_dereference(tn->bearer_list[i]); - if (b && (b->media_ptr == dev)) - break; - } - } if (!b) return NOTIFY_DONE; @@ -613,11 +616,10 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, if (netif_carrier_ok(dev)) break; case NETDEV_UP: - rcu_assign_pointer(dev->tipc_ptr, b); + test_and_set_bit_lock(0, &b->up); break; case NETDEV_GOING_DOWN: - RCU_INIT_POINTER(dev->tipc_ptr, NULL); - synchronize_net(); + clear_bit_unlock(0, &b->up); tipc_reset_bearer(net, b); break; case NETDEV_CHANGEMTU: diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 43757f1f9cb3..83a9abbfe32c 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -150,6 +150,7 @@ struct tipc_bearer { u32 identity; struct tipc_link_req *link_req; char net_plane; + unsigned long up; }; struct tipc_bearer_names { diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index b016c011970b..33bdf5449a5e 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -224,7 +224,7 @@ static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb) rcu_read_lock(); b = rcu_dereference_rtnl(ub->bearer); - if (b) { + if (b && test_bit(0, &b->up)) { tipc_rcv(sock_net(sk), skb, b); rcu_read_unlock(); return 0; From 5a0950c27236dc6f6a3e9d13259c1a2e89fd1cf7 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Tue, 16 Aug 2016 11:53:51 -0400 Subject: [PATCH 0294/1715] tipc: ensure that link congestion and wakeup use same criteria When a link is attempted woken up after congestion, it uses a different, more generous criteria than when it was originally declared congested. This has the effect that the link, and the sending process, sometimes will be woken up unnecessarily, just to immediately return to congestion when it turns out there is not not enough space in its send queue to host the pending message. This is a waste of CPU cycles. We now change the function link_prepare_wakeup() to use exactly the same criteria as tipc_link_xmit(). However, since we are now excluding the window limit from the wakeup calculation, and the current backlog limit for the lowest level is too small to house even a single maximum-size message, we have to expand this limit. We do this by evaluating an alternative, minimum value during the setting of the importance limits. Acked-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index 877d94f34814..2c6e1b9e024b 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -807,7 +807,7 @@ void link_prepare_wakeup(struct tipc_link *l) skb_queue_walk_safe(&l->wakeupq, skb, tmp) { imp = TIPC_SKB_CB(skb)->chain_imp; - lim = l->window + l->backlog[imp].limit; + lim = l->backlog[imp].limit; pnd[imp] += TIPC_SKB_CB(skb)->chain_sz; if ((pnd[imp] + l->backlog[imp].len) >= lim) break; @@ -873,9 +873,11 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, struct sk_buff *skb, *_skb, *bskb; /* Match msg importance against this and all higher backlog limits: */ - for (i = imp; i <= TIPC_SYSTEM_IMPORTANCE; i++) { - if (unlikely(l->backlog[i].len >= l->backlog[i].limit)) - return link_schedule_user(l, list); + if (!skb_queue_empty(backlogq)) { + for (i = imp; i <= TIPC_SYSTEM_IMPORTANCE; i++) { + if (unlikely(l->backlog[i].len >= l->backlog[i].limit)) + return link_schedule_user(l, list); + } } if (unlikely(msg_size(hdr) > mtu)) { skb_queue_purge(list); @@ -1692,10 +1694,10 @@ void tipc_link_set_queue_limits(struct tipc_link *l, u32 win) int max_bulk = TIPC_MAX_PUBLICATIONS / (l->mtu / ITEM_SIZE); l->window = win; - l->backlog[TIPC_LOW_IMPORTANCE].limit = win / 2; - l->backlog[TIPC_MEDIUM_IMPORTANCE].limit = win; - l->backlog[TIPC_HIGH_IMPORTANCE].limit = win / 2 * 3; - l->backlog[TIPC_CRITICAL_IMPORTANCE].limit = win * 2; + l->backlog[TIPC_LOW_IMPORTANCE].limit = max_t(u16, 50, win); + l->backlog[TIPC_MEDIUM_IMPORTANCE].limit = max_t(u16, 100, win * 2); + l->backlog[TIPC_HIGH_IMPORTANCE].limit = max_t(u16, 150, win * 3); + l->backlog[TIPC_CRITICAL_IMPORTANCE].limit = max_t(u16, 200, win * 4); l->backlog[TIPC_SYSTEM_IMPORTANCE].limit = max_bulk; } From 69012ae425d76ecacc573ecdbd2c360bd8c2b842 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Tue, 16 Aug 2016 23:52:58 +0200 Subject: [PATCH 0295/1715] net: sched: fix handling of singleton qdiscs with qdisc_hash qdisc_match_from_root() is now iterating over per-netdevice qdisc hashtable instead of going through a linked-list of qdiscs (independently on the actual underlying netdev), which was the case before the switch to hashtable for qdiscs. For singleton qdiscs, there is no underlying netdev associated though, and therefore dumping a singleton qdisc will panic, as qdisc_dev(root) will always be NULL. BUG: unable to handle kernel NULL pointer dereference at 0000000000000410 IP: [] qdisc_match_from_root+0x2c/0x70 PGD 1aceba067 PUD 1aceb7067 PMD 0 Oops: 0000 [#1] PREEMPT SMP [ ... ] task: ffff8801ec996e00 task.stack: ffff8801ec934000 RIP: 0010:[] [] qdisc_match_from_root+0x2c/0x70 RSP: 0018:ffff8801ec937ab0 EFLAGS: 00010203 RAX: 0000000000000408 RBX: ffff88025e612000 RCX: ffffffffffffffd8 RDX: 0000000000000000 RSI: 00000000ffff0000 RDI: ffffffff81cf8100 RBP: ffff8801ec937ab0 R08: 000000000001c160 R09: ffff8802668032c0 R10: ffffffff81cf8100 R11: 0000000000000030 R12: 00000000ffff0000 R13: ffff88025e612000 R14: ffffffff81cf3140 R15: 0000000000000000 FS: 00007f24b9af6740(0000) GS:ffff88026f280000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000410 CR3: 00000001aceec000 CR4: 00000000001406e0 Stack: ffff8801ec937ad0 ffffffff81681210 ffff88025dd51a00 00000000fffffff1 ffff8801ec937b88 ffffffff81681e4e ffffffff81c42bc0 ffff880262431500 ffffffff81cf3140 ffff88025dd51a10 ffff88025dd51a24 00000000ec937b38 Call Trace: [] qdisc_lookup+0x40/0x50 [] tc_modify_qdisc+0x21e/0x550 [] rtnetlink_rcv_msg+0x95/0x220 [] ? __kmalloc_track_caller+0x172/0x230 [] ? rtnl_newlink+0x870/0x870 [] netlink_rcv_skb+0xa7/0xc0 [] rtnetlink_rcv+0x28/0x30 [] netlink_unicast+0x15b/0x210 [] netlink_sendmsg+0x319/0x390 [] sock_sendmsg+0x38/0x50 [] ___sys_sendmsg+0x256/0x260 [] ? __pagevec_lru_add_fn+0x135/0x280 [] ? pagevec_lru_move_fn+0xd0/0xf0 [] ? trace_event_raw_event_mm_lru_insertion+0x180/0x180 [] ? __lru_cache_add+0x75/0xb0 [] ? _raw_spin_unlock+0x16/0x40 [] ? handle_mm_fault+0x39f/0x1160 [] __sys_sendmsg+0x45/0x80 [] SyS_sendmsg+0x12/0x20 [] do_syscall_64+0x57/0xb0 Fix this by special-casing singleton qdiscs (those that don't have underlying netdevice) and introduce immediate handling of those rather than trying to go over an underlying netdevice. We're in the same situation in tc_dump_qdisc_root() and tc_dump_tclass_root(). Ultimately, this will have to be slightly reworked so that we are actually able to show singleton qdiscs (noop) in the dump properly; but we're not currently doing that anyway, so no regression there, and better do this in a gradual manner. Fixes: 59cc1f61f ("net: sched: convert qdisc linked list to hashtable") Reported-by: Daniel Borkmann Tested-by: Daniel Borkmann Reported-by: David Ahern Tested-by: David Ahern Signed-off-by: Jiri Kosina Signed-off-by: David S. Miller --- net/sched/sch_api.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 25aada7b095c..ff515d01951e 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -260,6 +260,9 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) { struct Qdisc *q; + if (!qdisc_dev(root)) + return (root->handle == handle ? root : NULL); + if (!(root->flags & TCQ_F_BUILTIN) && root->handle == handle) return root; @@ -1451,6 +1454,10 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, goto done; q_idx++; } + + if (!qdisc_dev(root)) + goto out; + hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) { if (q_idx < s_q_idx) { q_idx++; @@ -1775,6 +1782,9 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb, if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0) return -1; + if (!qdisc_dev(root)) + return 0; + hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) { if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0) return -1; From ea3274695353127d12155d45be1f2d62ab19c897 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Tue, 16 Aug 2016 23:53:46 +0200 Subject: [PATCH 0296/1715] net: sched: avoid duplicates in qdisc dump tc_dump_qdisc() performs dumping of the per-device qdiscs in two phases; first, the "standard" dev->qdisc is being dumped. Second, if there is/are ingress queue(s), they are being dumped as well. After conversion of netdevice's qdisc linked-list into hashtable, these two sets are not in two disjunctive sets/lists any more, but are both "reachable" directly from netdevice's hashtable. As a consequence, the "full-depth" dump of the ingress qdiscs results in immediately hitting the netdevice hashtable again, and duplicating the dump that has already been performed for dev->qdisc. What in fact needs to be dumped in case of ingress queue is "just" the top-level ingress qdisc, as everything else has been dumped already. Fix this by extending tc_dump_qdisc_root() in a way that it can be instructed whether it should (while performing the "full" per-netdev qdisc dump) perform the whole recursion, or just dump "additional" top-level (ingress) qdiscs without performing any kind of recursion. This fixes duplicate dumps such as qdisc mq 0: root qdisc pfifo_fast 0: parent :4 bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1 qdisc pfifo_fast 0: parent :3 bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1 qdisc pfifo_fast 0: parent :2 bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1 qdisc pfifo_fast 0: parent :1 bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1 qdisc clsact ffff: parent ffff:fff1 qdisc pfifo_fast 0: parent :4 bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1 qdisc pfifo_fast 0: parent :3 bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1 qdisc pfifo_fast 0: parent :2 bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1 qdisc pfifo_fast 0: parent :1 bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1 Fixes: 59cc1f61f ("net: sched: convert qdisc linked list to hashtable") Reported-by: Daniel Borkmann Tested-by: Daniel Borkmann Signed-off-by: Jiri Kosina Signed-off-by: David S. Miller --- net/sched/sch_api.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index ff515d01951e..d677b3484d81 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1435,7 +1435,7 @@ err_out: static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, struct netlink_callback *cb, - int *q_idx_p, int s_q_idx) + int *q_idx_p, int s_q_idx, bool recur) { int ret = 0, q_idx = *q_idx_p; struct Qdisc *q; @@ -1455,7 +1455,13 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, q_idx++; } - if (!qdisc_dev(root)) + /* If dumping singletons, there is no qdisc_dev(root) and the singleton + * itself has already been dumped. + * + * If we've already dumped the top-level (ingress) qdisc above and the global + * qdisc hashtable, we don't want to hit it again + */ + if (!qdisc_dev(root) || !recur) goto out; hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) { @@ -1499,13 +1505,13 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) s_q_idx = 0; q_idx = 0; - if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx) < 0) + if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx, true) < 0) goto done; dev_queue = dev_ingress_queue(dev); if (dev_queue && tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, - &q_idx, s_q_idx) < 0) + &q_idx, s_q_idx, false) < 0) goto done; cont: From 0e71def252815d732f86d11d87d63f7186d9d3be Mon Sep 17 00:00:00 2001 From: Gangfeng Huang Date: Wed, 6 Jul 2016 13:22:54 +0800 Subject: [PATCH 0297/1715] igb: add support of RX network flow classification This patch is meant to allow for RX network flow classification to insert and remove Rx filter by ethtool. Ethtool interface has it's own rules manager Show all filters: $ ethtool -n eth0 4 RX rings available Total 2 rules Signed-off-by: Ruhao Gao Signed-off-by: Gangfeng Huang Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb.h | 32 +++ drivers/net/ethernet/intel/igb/igb_ethtool.c | 193 +++++++++++++++++++ drivers/net/ethernet/intel/igb/igb_main.c | 45 +++++ 3 files changed, 270 insertions(+) diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index 5387b3a96489..37f82ca91141 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -355,6 +355,27 @@ struct hwmon_buff { #define IGB_N_SDP 4 #define IGB_RETA_SIZE 128 +enum igb_filter_match_flags { + IGB_FILTER_FLAG_NONE = 0x0, +}; + +#define IGB_MAX_RXNFC_FILTERS 16 + +/* RX network flow classification data structure */ +struct igb_nfc_input { + /* Byte layout in order, all values with MSB first: + * match_flags - 1 byte + */ + u8 match_flags; +}; + +struct igb_nfc_filter { + struct hlist_node nfc_node; + struct igb_nfc_input filter; + u16 sw_idx; + u16 action; +}; + /* board specific private data structure */ struct igb_adapter { unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; @@ -473,6 +494,12 @@ struct igb_adapter { int copper_tries; struct e1000_info ei; u16 eee_advert; + + /* RX network flow classification support */ + struct hlist_head nfc_filter_list; + unsigned int nfc_filter_count; + /* lock for RX network flow classification filter */ + spinlock_t nfc_lock; }; /* flags controlling PTP/1588 function */ @@ -599,4 +626,9 @@ static inline struct netdev_queue *txring_txq(const struct igb_ring *tx_ring) return netdev_get_tx_queue(tx_ring->netdev, tx_ring->queue_index); } +int igb_add_filter(struct igb_adapter *adapter, + struct igb_nfc_filter *input); +int igb_erase_filter(struct igb_adapter *adapter, + struct igb_nfc_filter *input); + #endif /* _IGB_H_ */ diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 64e91c575a39..2599826bbec7 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -2431,6 +2431,48 @@ static int igb_get_ts_info(struct net_device *dev, } } +static int igb_get_ethtool_nfc_entry(struct igb_adapter *adapter, + struct ethtool_rxnfc *cmd) +{ + struct ethtool_rx_flow_spec *fsp = &cmd->fs; + struct igb_nfc_filter *rule = NULL; + + /* report total rule count */ + cmd->data = IGB_MAX_RXNFC_FILTERS; + + hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) { + if (fsp->location <= rule->sw_idx) + break; + } + + if (!rule || fsp->location != rule->sw_idx) + return -EINVAL; + + return -EINVAL; +} + +static int igb_get_ethtool_nfc_all(struct igb_adapter *adapter, + struct ethtool_rxnfc *cmd, + u32 *rule_locs) +{ + struct igb_nfc_filter *rule; + int cnt = 0; + + /* report total rule count */ + cmd->data = IGB_MAX_RXNFC_FILTERS; + + hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) { + if (cnt == cmd->rule_cnt) + return -EMSGSIZE; + rule_locs[cnt] = rule->sw_idx; + cnt++; + } + + cmd->rule_cnt = cnt; + + return 0; +} + static int igb_get_rss_hash_opts(struct igb_adapter *adapter, struct ethtool_rxnfc *cmd) { @@ -2484,6 +2526,16 @@ static int igb_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd, cmd->data = adapter->num_rx_queues; ret = 0; break; + case ETHTOOL_GRXCLSRLCNT: + cmd->rule_cnt = adapter->nfc_filter_count; + ret = 0; + break; + case ETHTOOL_GRXCLSRULE: + ret = igb_get_ethtool_nfc_entry(adapter, cmd); + break; + case ETHTOOL_GRXCLSRLALL: + ret = igb_get_ethtool_nfc_all(adapter, cmd, rule_locs); + break; case ETHTOOL_GRXFH: ret = igb_get_rss_hash_opts(adapter, cmd); break; @@ -2598,6 +2650,142 @@ static int igb_set_rss_hash_opt(struct igb_adapter *adapter, return 0; } +int igb_add_filter(struct igb_adapter *adapter, struct igb_nfc_filter *input) +{ + return -EINVAL; +} + +int igb_erase_filter(struct igb_adapter *adapter, struct igb_nfc_filter *input) +{ + return 0; +} + +static int igb_update_ethtool_nfc_entry(struct igb_adapter *adapter, + struct igb_nfc_filter *input, + u16 sw_idx) +{ + struct igb_nfc_filter *rule, *parent; + int err = -EINVAL; + + parent = NULL; + rule = NULL; + + hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) { + /* hash found, or no matching entry */ + if (rule->sw_idx >= sw_idx) + break; + parent = rule; + } + + /* if there is an old rule occupying our place remove it */ + if (rule && (rule->sw_idx == sw_idx)) { + if (!input) + err = igb_erase_filter(adapter, rule); + + hlist_del(&rule->nfc_node); + kfree(rule); + adapter->nfc_filter_count--; + } + + /* If no input this was a delete, err should be 0 if a rule was + * successfully found and removed from the list else -EINVAL + */ + if (!input) + return err; + + /* initialize node */ + INIT_HLIST_NODE(&input->nfc_node); + + /* add filter to the list */ + if (parent) + hlist_add_behind(&parent->nfc_node, &input->nfc_node); + else + hlist_add_head(&input->nfc_node, &adapter->nfc_filter_list); + + /* update counts */ + adapter->nfc_filter_count++; + + return 0; +} + +static int igb_add_ethtool_nfc_entry(struct igb_adapter *adapter, + struct ethtool_rxnfc *cmd) +{ + struct net_device *netdev = adapter->netdev; + struct ethtool_rx_flow_spec *fsp = + (struct ethtool_rx_flow_spec *)&cmd->fs; + struct igb_nfc_filter *input, *rule; + int err = 0; + + if (!(netdev->hw_features & NETIF_F_NTUPLE)) + return -ENOTSUPP; + + /* Don't allow programming if the action is a queue greater than + * the number of online Rx queues. + */ + if ((fsp->ring_cookie == RX_CLS_FLOW_DISC) || + (fsp->ring_cookie >= adapter->num_rx_queues)) { + dev_err(&adapter->pdev->dev, "ethtool -N: The specified action is invalid\n"); + return -EINVAL; + } + + /* Don't allow indexes to exist outside of available space */ + if (fsp->location >= IGB_MAX_RXNFC_FILTERS) { + dev_err(&adapter->pdev->dev, "Location out of range\n"); + return -EINVAL; + } + + if ((fsp->flow_type & ~FLOW_EXT) != ETHER_FLOW) + return -EINVAL; + + input = kzalloc(sizeof(*input), GFP_KERNEL); + if (!input) + return -ENOMEM; + + input->action = fsp->ring_cookie; + input->sw_idx = fsp->location; + + spin_lock(&adapter->nfc_lock); + + hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) { + if (!memcmp(&input->filter, &rule->filter, + sizeof(input->filter))) { + err = -EEXIST; + dev_err(&adapter->pdev->dev, + "ethtool: this filter is already set\n"); + goto err_out_w_lock; + } + } + + err = igb_add_filter(adapter, input); + if (err) + goto err_out_w_lock; + + igb_update_ethtool_nfc_entry(adapter, input, input->sw_idx); + + spin_unlock(&adapter->nfc_lock); + return 0; + +err_out_w_lock: + spin_unlock(&adapter->nfc_lock); + kfree(input); + return err; +} + +static int igb_del_ethtool_nfc_entry(struct igb_adapter *adapter, + struct ethtool_rxnfc *cmd) +{ + struct ethtool_rx_flow_spec *fsp = + (struct ethtool_rx_flow_spec *)&cmd->fs; + int err; + + spin_lock(&adapter->nfc_lock); + err = igb_update_ethtool_nfc_entry(adapter, NULL, fsp->location); + spin_unlock(&adapter->nfc_lock); + + return err; +} + static int igb_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) { struct igb_adapter *adapter = netdev_priv(dev); @@ -2607,6 +2795,11 @@ static int igb_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) case ETHTOOL_SRXFH: ret = igb_set_rss_hash_opt(adapter, cmd); break; + case ETHTOOL_SRXCLSRLINS: + ret = igb_add_ethtool_nfc_entry(adapter, cmd); + break; + case ETHTOOL_SRXCLSRLDEL: + ret = igb_del_ethtool_nfc_entry(adapter, cmd); default: break; } diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 942a89fb0090..af75eac5fa16 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -176,6 +176,8 @@ static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, static int igb_ndo_get_vf_config(struct net_device *netdev, int vf, struct ifla_vf_info *ivi); static void igb_check_vf_rate_limit(struct igb_adapter *); +static void igb_nfc_filter_exit(struct igb_adapter *adapter); +static void igb_nfc_filter_restore(struct igb_adapter *adapter); #ifdef CONFIG_PCI_IOV static int igb_vf_configure(struct igb_adapter *adapter, int vf); @@ -1611,6 +1613,7 @@ static void igb_configure(struct igb_adapter *adapter) igb_setup_mrqc(adapter); igb_setup_rctl(adapter); + igb_nfc_filter_restore(adapter); igb_configure_tx(adapter); igb_configure_rx(adapter); @@ -2059,6 +2062,21 @@ static int igb_set_features(struct net_device *netdev, if (!(changed & (NETIF_F_RXALL | NETIF_F_NTUPLE))) return 0; + if (!(features & NETIF_F_NTUPLE)) { + struct hlist_node *node2; + struct igb_nfc_filter *rule; + + spin_lock(&adapter->nfc_lock); + hlist_for_each_entry_safe(rule, node2, + &adapter->nfc_filter_list, nfc_node) { + igb_erase_filter(adapter, rule); + hlist_del(&rule->nfc_node); + kfree(rule); + } + spin_unlock(&adapter->nfc_lock); + adapter->nfc_filter_count = 0; + } + netdev->features = features; if (netif_running(netdev)) @@ -3053,6 +3071,7 @@ static int igb_sw_init(struct igb_adapter *adapter) VLAN_HLEN; adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN; + spin_lock_init(&adapter->nfc_lock); spin_lock_init(&adapter->stats64_lock); #ifdef CONFIG_PCI_IOV switch (hw->mac.type) { @@ -3240,6 +3259,8 @@ static int __igb_close(struct net_device *netdev, bool suspending) igb_down(adapter); igb_free_irq(adapter); + igb_nfc_filter_exit(adapter); + igb_free_all_tx_resources(adapter); igb_free_all_rx_resources(adapter); @@ -8306,4 +8327,28 @@ int igb_reinit_queues(struct igb_adapter *adapter) return err; } + +static void igb_nfc_filter_exit(struct igb_adapter *adapter) +{ + struct igb_nfc_filter *rule; + + spin_lock(&adapter->nfc_lock); + + hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) + igb_erase_filter(adapter, rule); + + spin_unlock(&adapter->nfc_lock); +} + +static void igb_nfc_filter_restore(struct igb_adapter *adapter) +{ + struct igb_nfc_filter *rule; + + spin_lock(&adapter->nfc_lock); + + hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) + igb_add_filter(adapter, rule); + + spin_unlock(&adapter->nfc_lock); +} /* igb_main.c */ From 64c75d41ace516b7e4f0f187f91282aa43a51b38 Mon Sep 17 00:00:00 2001 From: Gangfeng Huang Date: Wed, 6 Jul 2016 13:22:55 +0800 Subject: [PATCH 0298/1715] igb: support RX flow classification by ethertype This patch is meant to allow for RX network flow classification to insert and remove ethertype filter by ethtool Example: Add an ethertype filter: $ ethtool -N eth0 flow-type ether proto 0x88F8 action 2 Show all filters: $ ethtool -n eth0 4 RX rings available Total 1 rules Filter: 15 Flow Type: Raw Ethernet Src MAC addr: 00:00:00:00:00:00 mask: FF:FF:FF:FF:FF:FF Dest MAC addr: 00:00:00:00:00:00 mask: FF:FF:FF:FF:FF:FF Ethertype: 0x88F8 mask: 0x0 Action: Direct to queue 2 Delete the filter by location: $ ethtool -N delete 15 Signed-off-by: Ruhao Gao Signed-off-by: Gangfeng Huang Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/e1000_82575.h | 5 ++ drivers/net/ethernet/intel/igb/igb.h | 21 +++++- drivers/net/ethernet/intel/igb/igb_ethtool.c | 77 +++++++++++++++++++- drivers/net/ethernet/intel/igb/igb_ptp.c | 4 +- 4 files changed, 101 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.h b/drivers/net/ethernet/intel/igb/e1000_82575.h index 199ff98209cf..acf06051e111 100644 --- a/drivers/net/ethernet/intel/igb/e1000_82575.h +++ b/drivers/net/ethernet/intel/igb/e1000_82575.h @@ -188,6 +188,11 @@ struct e1000_adv_tx_context_desc { /* ETQF register bit definitions */ #define E1000_ETQF_FILTER_ENABLE BIT(26) #define E1000_ETQF_1588 BIT(30) +#define E1000_ETQF_IMM_INT BIT(29) +#define E1000_ETQF_QUEUE_ENABLE BIT(31) +#define E1000_ETQF_QUEUE_SHIFT 16 +#define E1000_ETQF_QUEUE_MASK 0x00070000 +#define E1000_ETQF_ETYPE_MASK 0x0000FFFF /* FTQF register bit definitions */ #define E1000_FTQF_VF_BP 0x00008000 diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index 37f82ca91141..5c5075819bac 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -350,13 +350,24 @@ struct hwmon_buff { }; #endif +/* The number of L2 ether-type filter registers, Index 3 is reserved + * for PTP 1588 timestamp + */ +#define MAX_ETYPE_FILTER (4 - 1) +/* ETQF filter list: one static filter per filter consumer. This is + * to avoid filter collisions later. Add new filters here!! + * + * Current filters: Filter 3 + */ +#define IGB_ETQF_FILTER_1588 3 + #define IGB_N_EXTTS 2 #define IGB_N_PEROUT 2 #define IGB_N_SDP 4 #define IGB_RETA_SIZE 128 enum igb_filter_match_flags { - IGB_FILTER_FLAG_NONE = 0x0, + IGB_FILTER_FLAG_ETHER_TYPE = 0x1, }; #define IGB_MAX_RXNFC_FILTERS 16 @@ -364,14 +375,17 @@ enum igb_filter_match_flags { /* RX network flow classification data structure */ struct igb_nfc_input { /* Byte layout in order, all values with MSB first: - * match_flags - 1 byte - */ + * match_flags - 1 byte + * etype - 2 bytes + */ u8 match_flags; + __be16 etype; }; struct igb_nfc_filter { struct hlist_node nfc_node; struct igb_nfc_input filter; + u16 etype_reg_index; u16 sw_idx; u16 action; }; @@ -500,6 +514,7 @@ struct igb_adapter { unsigned int nfc_filter_count; /* lock for RX network flow classification filter */ spinlock_t nfc_lock; + bool etype_bitmap[MAX_ETYPE_FILTER]; }; /* flags controlling PTP/1588 function */ diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 2599826bbec7..00e33879a76c 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -2431,6 +2431,7 @@ static int igb_get_ts_info(struct net_device *dev, } } +#define ETHER_TYPE_FULL_MASK ((__force __be16)~0) static int igb_get_ethtool_nfc_entry(struct igb_adapter *adapter, struct ethtool_rxnfc *cmd) { @@ -2448,6 +2449,13 @@ static int igb_get_ethtool_nfc_entry(struct igb_adapter *adapter, if (!rule || fsp->location != rule->sw_idx) return -EINVAL; + if (rule->filter.match_flags & IGB_FILTER_FLAG_ETHER_TYPE) { + fsp->flow_type = ETHER_FLOW; + fsp->ring_cookie = rule->action; + fsp->h_u.ether_spec.h_proto = rule->filter.etype; + fsp->m_u.ether_spec.h_proto = ETHER_TYPE_FULL_MASK; + return 0; + } return -EINVAL; } @@ -2650,13 +2658,75 @@ static int igb_set_rss_hash_opt(struct igb_adapter *adapter, return 0; } +static int igb_rxnfc_write_etype_filter(struct igb_adapter *adapter, + struct igb_nfc_filter *input) +{ + struct e1000_hw *hw = &adapter->hw; + u8 i; + u32 etqf; + u16 etype; + + /* find an empty etype filter register */ + for (i = 0; i < MAX_ETYPE_FILTER; ++i) { + if (!adapter->etype_bitmap[i]) + break; + } + if (i == MAX_ETYPE_FILTER) { + dev_err(&adapter->pdev->dev, "ethtool -N: etype filters are all used.\n"); + return -EINVAL; + } + + adapter->etype_bitmap[i] = true; + + etqf = rd32(E1000_ETQF(i)); + etype = ntohs(input->filter.etype & ETHER_TYPE_FULL_MASK); + + etqf |= E1000_ETQF_FILTER_ENABLE; + etqf &= ~E1000_ETQF_ETYPE_MASK; + etqf |= (etype & E1000_ETQF_ETYPE_MASK); + + etqf &= ~E1000_ETQF_QUEUE_MASK; + etqf |= ((input->action << E1000_ETQF_QUEUE_SHIFT) + & E1000_ETQF_QUEUE_MASK); + etqf |= E1000_ETQF_QUEUE_ENABLE; + + wr32(E1000_ETQF(i), etqf); + + input->etype_reg_index = i; + + return 0; +} + int igb_add_filter(struct igb_adapter *adapter, struct igb_nfc_filter *input) { - return -EINVAL; + int err = -EINVAL; + + if (input->filter.match_flags & IGB_FILTER_FLAG_ETHER_TYPE) + err = igb_rxnfc_write_etype_filter(adapter, input); + + return err; +} + +static void igb_clear_etype_filter_regs(struct igb_adapter *adapter, + u16 reg_index) +{ + struct e1000_hw *hw = &adapter->hw; + u32 etqf = rd32(E1000_ETQF(reg_index)); + + etqf &= ~E1000_ETQF_QUEUE_ENABLE; + etqf &= ~E1000_ETQF_QUEUE_MASK; + etqf &= ~E1000_ETQF_FILTER_ENABLE; + + wr32(E1000_ETQF(reg_index), etqf); + + adapter->etype_bitmap[reg_index] = false; } int igb_erase_filter(struct igb_adapter *adapter, struct igb_nfc_filter *input) { + if (input->filter.match_flags & IGB_FILTER_FLAG_ETHER_TYPE) + igb_clear_etype_filter_regs(adapter, + input->etype_reg_index); return 0; } @@ -2738,10 +2808,15 @@ static int igb_add_ethtool_nfc_entry(struct igb_adapter *adapter, if ((fsp->flow_type & ~FLOW_EXT) != ETHER_FLOW) return -EINVAL; + if (fsp->m_u.ether_spec.h_proto != ETHER_TYPE_FULL_MASK) + return -EINVAL; + input = kzalloc(sizeof(*input), GFP_KERNEL); if (!input) return -ENOMEM; + input->filter.etype = fsp->h_u.ether_spec.h_proto; + input->filter.match_flags = IGB_FILTER_FLAG_ETHER_TYPE; input->action = fsp->ring_cookie; input->sw_idx = fsp->location; diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index 336c103ae374..66dfa2085cc7 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -998,12 +998,12 @@ static int igb_ptp_set_timestamp_mode(struct igb_adapter *adapter, /* define ethertype filter for timestamped packets */ if (is_l2) - wr32(E1000_ETQF(3), + wr32(E1000_ETQF(IGB_ETQF_FILTER_1588), (E1000_ETQF_FILTER_ENABLE | /* enable filter */ E1000_ETQF_1588 | /* enable timestamping */ ETH_P_1588)); /* 1588 eth protocol type */ else - wr32(E1000_ETQF(3), 0); + wr32(E1000_ETQF(IGB_ETQF_FILTER_1588), 0); /* L4 Queue Filter[3]: filter by destination port and protocol */ if (is_l4) { From 7a277a963bf394d80f8998a7143a208e0891f6e7 Mon Sep 17 00:00:00 2001 From: Gangfeng Huang Date: Wed, 6 Jul 2016 13:22:56 +0800 Subject: [PATCH 0299/1715] igb: support RX flow classification by VLAN priority This patch is meant to allow for RX network flow classification to insert and remove VLAN priority filter by ethtool Example: Add an VLAN priority filter: $ ethtool -N eth0 flow-type ether vlan 0x6000 vlan-mask 0x1FFF action 2 loc 1 Show all filters: $ ethtool -n eth0 4 RX rings available Total 1 rules Filter: 1 Flow Type: Raw Ethernet Src MAC addr: 00:00:00:00:00:00 mask: FF:FF:FF:FF:FF:FF Dest MAC addr: 00:00:00:00:00:00 mask: FF:FF:FF:FF:FF:FF Ethertype: 0x0 mask: 0xFFFF VLAN EtherType: 0x0 mask: 0xffff VLAN: 0x6000 mask: 0x1fff User-defined: 0x0 mask: 0xffffffffffffffff Action: Direct to queue 2 Delete the filter by location: $ ethtool -N delete 1 Signed-off-by: Ruhao Gao Signed-off-by: Gangfeng Huang Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- .../net/ethernet/intel/igb/e1000_defines.h | 4 + drivers/net/ethernet/intel/igb/e1000_regs.h | 1 + drivers/net/ethernet/intel/igb/igb.h | 3 + drivers/net/ethernet/intel/igb/igb_ethtool.c | 91 +++++++++++++++++-- 4 files changed, 92 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h index 2997c443c5dc..2688180a7acd 100644 --- a/drivers/net/ethernet/intel/igb/e1000_defines.h +++ b/drivers/net/ethernet/intel/igb/e1000_defines.h @@ -1024,4 +1024,8 @@ #define E1000_RTTBCNRC_RF_INT_MASK \ (E1000_RTTBCNRC_RF_DEC_MASK << E1000_RTTBCNRC_RF_INT_SHIFT) +#define E1000_VLAPQF_QUEUE_SEL(_n, q_idx) (q_idx << ((_n) * 4)) +#define E1000_VLAPQF_P_VALID(_n) (0x1 << (3 + (_n) * 4)) +#define E1000_VLAPQF_QUEUE_MASK 0x03 + #endif diff --git a/drivers/net/ethernet/intel/igb/e1000_regs.h b/drivers/net/ethernet/intel/igb/e1000_regs.h index 21d9d02885cb..d84afdd83e53 100644 --- a/drivers/net/ethernet/intel/igb/e1000_regs.h +++ b/drivers/net/ethernet/intel/igb/e1000_regs.h @@ -309,6 +309,7 @@ (0x054E0 + ((_i - 16) * 8))) #define E1000_RAH(_i) (((_i) <= 15) ? (0x05404 + ((_i) * 8)) : \ (0x054E4 + ((_i - 16) * 8))) +#define E1000_VLAPQF 0x055B0 /* VLAN Priority Queue Filter VLAPQF */ #define E1000_IP4AT_REG(_i) (0x05840 + ((_i) * 8)) #define E1000_IP6AT_REG(_i) (0x05880 + ((_i) * 4)) #define E1000_WUPM_REG(_i) (0x05A00 + ((_i) * 4)) diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index 5c5075819bac..03fbe4b7663b 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -368,6 +368,7 @@ struct hwmon_buff { enum igb_filter_match_flags { IGB_FILTER_FLAG_ETHER_TYPE = 0x1, + IGB_FILTER_FLAG_VLAN_TCI = 0x2, }; #define IGB_MAX_RXNFC_FILTERS 16 @@ -377,9 +378,11 @@ struct igb_nfc_input { /* Byte layout in order, all values with MSB first: * match_flags - 1 byte * etype - 2 bytes + * vlan_tci - 2 bytes */ u8 match_flags; __be16 etype; + __be16 vlan_tci; }; struct igb_nfc_filter { diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 00e33879a76c..ef5408b8e083 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -2449,11 +2449,18 @@ static int igb_get_ethtool_nfc_entry(struct igb_adapter *adapter, if (!rule || fsp->location != rule->sw_idx) return -EINVAL; - if (rule->filter.match_flags & IGB_FILTER_FLAG_ETHER_TYPE) { + if (rule->filter.match_flags) { fsp->flow_type = ETHER_FLOW; fsp->ring_cookie = rule->action; - fsp->h_u.ether_spec.h_proto = rule->filter.etype; - fsp->m_u.ether_spec.h_proto = ETHER_TYPE_FULL_MASK; + if (rule->filter.match_flags & IGB_FILTER_FLAG_ETHER_TYPE) { + fsp->h_u.ether_spec.h_proto = rule->filter.etype; + fsp->m_u.ether_spec.h_proto = ETHER_TYPE_FULL_MASK; + } + if (rule->filter.match_flags & IGB_FILTER_FLAG_VLAN_TCI) { + fsp->flow_type |= FLOW_EXT; + fsp->h_ext.vlan_tci = rule->filter.vlan_tci; + fsp->m_ext.vlan_tci = htons(VLAN_PRIO_MASK); + } return 0; } return -EINVAL; @@ -2697,12 +2704,46 @@ static int igb_rxnfc_write_etype_filter(struct igb_adapter *adapter, return 0; } +int igb_rxnfc_write_vlan_prio_filter(struct igb_adapter *adapter, + struct igb_nfc_filter *input) +{ + struct e1000_hw *hw = &adapter->hw; + u8 vlan_priority; + u16 queue_index; + u32 vlapqf; + + vlapqf = rd32(E1000_VLAPQF); + vlan_priority = (ntohs(input->filter.vlan_tci) & VLAN_PRIO_MASK) + >> VLAN_PRIO_SHIFT; + queue_index = (vlapqf >> (vlan_priority * 4)) & E1000_VLAPQF_QUEUE_MASK; + + /* check whether this vlan prio is already set */ + if ((vlapqf & E1000_VLAPQF_P_VALID(vlan_priority)) && + (queue_index != input->action)) { + dev_err(&adapter->pdev->dev, "ethtool rxnfc set vlan prio filter failed.\n"); + return -EEXIST; + } + + vlapqf |= E1000_VLAPQF_P_VALID(vlan_priority); + vlapqf |= E1000_VLAPQF_QUEUE_SEL(vlan_priority, input->action); + + wr32(E1000_VLAPQF, vlapqf); + + return 0; +} + int igb_add_filter(struct igb_adapter *adapter, struct igb_nfc_filter *input) { int err = -EINVAL; - if (input->filter.match_flags & IGB_FILTER_FLAG_ETHER_TYPE) + if (input->filter.match_flags & IGB_FILTER_FLAG_ETHER_TYPE) { err = igb_rxnfc_write_etype_filter(adapter, input); + if (err) + return err; + } + + if (input->filter.match_flags & IGB_FILTER_FLAG_VLAN_TCI) + err = igb_rxnfc_write_vlan_prio_filter(adapter, input); return err; } @@ -2722,11 +2763,33 @@ static void igb_clear_etype_filter_regs(struct igb_adapter *adapter, adapter->etype_bitmap[reg_index] = false; } +static void igb_clear_vlan_prio_filter(struct igb_adapter *adapter, + u16 vlan_tci) +{ + struct e1000_hw *hw = &adapter->hw; + u8 vlan_priority; + u32 vlapqf; + + vlan_priority = (vlan_tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; + + vlapqf = rd32(E1000_VLAPQF); + vlapqf &= ~E1000_VLAPQF_P_VALID(vlan_priority); + vlapqf &= ~E1000_VLAPQF_QUEUE_SEL(vlan_priority, + E1000_VLAPQF_QUEUE_MASK); + + wr32(E1000_VLAPQF, vlapqf); +} + int igb_erase_filter(struct igb_adapter *adapter, struct igb_nfc_filter *input) { if (input->filter.match_flags & IGB_FILTER_FLAG_ETHER_TYPE) igb_clear_etype_filter_regs(adapter, input->etype_reg_index); + + if (input->filter.match_flags & IGB_FILTER_FLAG_VLAN_TCI) + igb_clear_vlan_prio_filter(adapter, + ntohs(input->filter.vlan_tci)); + return 0; } @@ -2808,15 +2871,28 @@ static int igb_add_ethtool_nfc_entry(struct igb_adapter *adapter, if ((fsp->flow_type & ~FLOW_EXT) != ETHER_FLOW) return -EINVAL; - if (fsp->m_u.ether_spec.h_proto != ETHER_TYPE_FULL_MASK) + if (fsp->m_u.ether_spec.h_proto != ETHER_TYPE_FULL_MASK && + fsp->m_ext.vlan_tci != htons(VLAN_PRIO_MASK)) return -EINVAL; input = kzalloc(sizeof(*input), GFP_KERNEL); if (!input) return -ENOMEM; - input->filter.etype = fsp->h_u.ether_spec.h_proto; - input->filter.match_flags = IGB_FILTER_FLAG_ETHER_TYPE; + if (fsp->m_u.ether_spec.h_proto == ETHER_TYPE_FULL_MASK) { + input->filter.etype = fsp->h_u.ether_spec.h_proto; + input->filter.match_flags = IGB_FILTER_FLAG_ETHER_TYPE; + } + + if ((fsp->flow_type & FLOW_EXT) && fsp->m_ext.vlan_tci) { + if (fsp->m_ext.vlan_tci != htons(VLAN_PRIO_MASK)) { + err = -EINVAL; + goto err_out; + } + input->filter.vlan_tci = fsp->h_ext.vlan_tci; + input->filter.match_flags |= IGB_FILTER_FLAG_VLAN_TCI; + } + input->action = fsp->ring_cookie; input->sw_idx = fsp->location; @@ -2843,6 +2919,7 @@ static int igb_add_ethtool_nfc_entry(struct igb_adapter *adapter, err_out_w_lock: spin_unlock(&adapter->nfc_lock); +err_out: kfree(input); return err; } From 54be81328c07324a88cd5dd86b2e7422db3e768e Mon Sep 17 00:00:00 2001 From: Gangfeng Huang Date: Wed, 6 Jul 2016 13:22:57 +0800 Subject: [PATCH 0300/1715] igb: fix error code in igb_add_ethtool_nfc_entry() Use error "rmgr: Cannot insert RX class rule: Operation not supported" is more meaningful than "rmgr: Cannot insert RX class rule: Unknown error 524" Signed-off-by: Gangfeng Huang Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index ef5408b8e083..0c33eca7c832 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -2851,7 +2851,7 @@ static int igb_add_ethtool_nfc_entry(struct igb_adapter *adapter, int err = 0; if (!(netdev->hw_features & NETIF_F_NTUPLE)) - return -ENOTSUPP; + return -EOPNOTSUPP; /* Don't allow programming if the action is a queue greater than * the number of online Rx queues. From a55defd897cb2b4d96fedd81e2d5513d83ff339e Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Fri, 29 Jul 2016 10:30:06 -0700 Subject: [PATCH 0301/1715] ixgbe: only check Tx queue enablement when debugging Following a write the TXDCTL.ENABLE bit is set only when the Tx queue is actually enabled, which may not happen during the configure phase even if we waited for it. Make this check debug only since this is causing confusion with users who notice the warning in dmesg. Signed-off-by: Emil Tantilov Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index b4f03748adc0..bbbc5b836923 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -3224,7 +3224,7 @@ void ixgbe_configure_tx_ring(struct ixgbe_adapter *adapter, txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(reg_idx)); } while (--wait_loop && !(txdctl & IXGBE_TXDCTL_ENABLE)); if (!wait_loop) - e_err(drv, "Could not enable Tx Queue %d\n", reg_idx); + hw_dbg(hw, "Could not enable Tx Queue %d\n", reg_idx); } static void ixgbe_setup_mtqc(struct ixgbe_adapter *adapter) From 4ad6af0237fd3a2ab8e8ef8a43f7fe4bb3787718 Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Fri, 29 Jul 2016 10:30:11 -0700 Subject: [PATCH 0302/1715] ixgbevf: change hw_dbg to use netdev_dbg Instead of the home brewed macro make use of netdev_dbg same as the ixgbe driver. Signed-off-by: Emil Tantilov Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbevf/ixgbevf.h | 11 ++++------- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 1 + 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index be52f597688b..5639fbe294d0 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -502,12 +502,9 @@ extern void ixgbevf_write_eitr(struct ixgbevf_q_vector *q_vector); void ixgbe_napi_add_all(struct ixgbevf_adapter *adapter); void ixgbe_napi_del_all(struct ixgbevf_adapter *adapter); -#ifdef DEBUG -char *ixgbevf_get_hw_dev_name(struct ixgbe_hw *hw); -#define hw_dbg(hw, format, arg...) \ - printk(KERN_DEBUG "%s: " format, ixgbevf_get_hw_dev_name(hw), ##arg) -#else -#define hw_dbg(hw, format, arg...) do {} while (0) -#endif +#define ixgbevf_hw_to_netdev(hw) \ + (((struct ixgbevf_adapter *)(hw)->back)->netdev) +#define hw_dbg(hw, format, arg...) \ + netdev_dbg(ixgbevf_hw_to_netdev(hw), format, ## arg) #endif /* _IXGBEVF_H_ */ diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index d9d6616f02a4..8d4f535b9e92 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -2993,6 +2993,7 @@ static void ixgbevf_free_all_tx_resources(struct ixgbevf_adapter *adapter) **/ int ixgbevf_setup_tx_resources(struct ixgbevf_ring *tx_ring) { + struct ixgbevf_adapter *adapter = netdev_priv(tx_ring->netdev); int size; size = sizeof(struct ixgbevf_tx_buffer) * tx_ring->count; From ee95053f78ee6883a6aeb75e346346adc0f4aded Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Fri, 29 Jul 2016 10:30:16 -0700 Subject: [PATCH 0303/1715] ixgbevf: only check Tx queue enablement when debugging Following a write the VFTXDCTL.ENABLE bit is set only when the Tx queue is actually enabled, which may not happen during the configure phase even if we waited for it. Make this check debug only since this is causing confusion with users who notice the warning in dmesg. Signed-off-by: Emil Tantilov Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 8d4f535b9e92..4044608083cd 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -1612,7 +1612,7 @@ static void ixgbevf_configure_tx_ring(struct ixgbevf_adapter *adapter, txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(reg_idx)); } while (--wait_loop && !(txdctl & IXGBE_TXDCTL_ENABLE)); if (!wait_loop) - pr_err("Could not enable Tx Queue %d\n", reg_idx); + hw_dbg(hw, "Could not enable Tx Queue %d\n", reg_idx); } /** From 57ca2a4fed520ee85a8fe809ff1947ec7c25aec9 Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Fri, 29 Jul 2016 14:46:31 -0700 Subject: [PATCH 0304/1715] ixgbe: use atomic bitwise operations when handling reset requests Use atomic bitwise operations when setting and checking reset requests. This should help with possible races in the service task. Signed-off-by: Emil Tantilov Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe.h | 2 +- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 16 +++++++--------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 9475ff9055aa..5628e2dae427 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -653,7 +653,6 @@ struct ixgbe_adapter { #define IXGBE_FLAG2_TEMP_SENSOR_EVENT BIT(3) #define IXGBE_FLAG2_SEARCH_FOR_SFP BIT(4) #define IXGBE_FLAG2_SFP_NEEDS_RESET BIT(5) -#define IXGBE_FLAG2_RESET_REQUESTED BIT(6) #define IXGBE_FLAG2_FDIR_REQUIRES_REINIT BIT(7) #define IXGBE_FLAG2_RSS_FIELD_IPV4_UDP BIT(8) #define IXGBE_FLAG2_RSS_FIELD_IPV6_UDP BIT(9) @@ -840,6 +839,7 @@ enum ixgbe_state_t { __IXGBE_IN_SFP_INIT, __IXGBE_PTP_RUNNING, __IXGBE_PTP_TX_IN_PROGRESS, + __IXGBE_RESET_REQUESTED, }; struct ixgbe_cb { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index bbbc5b836923..6bf131faa65e 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1103,7 +1103,7 @@ static void ixgbe_tx_timeout_reset(struct ixgbe_adapter *adapter) /* Do the reset outside of interrupt context */ if (!test_bit(__IXGBE_DOWN, &adapter->state)) { - adapter->flags2 |= IXGBE_FLAG2_RESET_REQUESTED; + set_bit(__IXGBE_RESET_REQUESTED, &adapter->state); e_warn(drv, "initiating reset due to tx timeout\n"); ixgbe_service_event_schedule(adapter); } @@ -2777,7 +2777,7 @@ static irqreturn_t ixgbe_msix_other(int irq, void *data) } if (eicr & IXGBE_EICR_ECC) { e_info(link, "Received ECC Err, initiating reset\n"); - adapter->flags2 |= IXGBE_FLAG2_RESET_REQUESTED; + set_bit(__IXGBE_RESET_REQUESTED, &adapter->state); ixgbe_service_event_schedule(adapter); IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC); } @@ -3007,7 +3007,7 @@ static irqreturn_t ixgbe_intr(int irq, void *data) case ixgbe_mac_x550em_a: if (eicr & IXGBE_EICR_ECC) { e_info(link, "Received ECC Err, initiating reset\n"); - adapter->flags2 |= IXGBE_FLAG2_RESET_REQUESTED; + set_bit(__IXGBE_RESET_REQUESTED, &adapter->state); ixgbe_service_event_schedule(adapter); IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC); } @@ -5500,8 +5500,8 @@ void ixgbe_down(struct ixgbe_adapter *adapter) ixgbe_napi_disable_all(adapter); - adapter->flags2 &= ~(IXGBE_FLAG2_FDIR_REQUIRES_REINIT | - IXGBE_FLAG2_RESET_REQUESTED); + clear_bit(__IXGBE_RESET_REQUESTED, &adapter->state); + adapter->flags2 &= ~IXGBE_FLAG2_FDIR_REQUIRES_REINIT; adapter->flags &= ~IXGBE_FLAG_NEED_LINK_UPDATE; del_timer_sync(&adapter->service_timer); @@ -6921,7 +6921,7 @@ static void ixgbe_watchdog_flush_tx(struct ixgbe_adapter *adapter) * (Do the reset outside of interrupt context). */ e_warn(drv, "initiating reset to clear Tx work after link loss\n"); - adapter->flags2 |= IXGBE_FLAG2_RESET_REQUESTED; + set_bit(__IXGBE_RESET_REQUESTED, &adapter->state); } } } @@ -7187,11 +7187,9 @@ static void ixgbe_phy_interrupt_subtask(struct ixgbe_adapter *adapter) static void ixgbe_reset_subtask(struct ixgbe_adapter *adapter) { - if (!(adapter->flags2 & IXGBE_FLAG2_RESET_REQUESTED)) + if (!test_and_clear_bit(__IXGBE_RESET_REQUESTED, &adapter->state)) return; - adapter->flags2 &= ~IXGBE_FLAG2_RESET_REQUESTED; - /* If we're already down, removing or resetting, just bail */ if (test_bit(__IXGBE_DOWN, &adapter->state) || test_bit(__IXGBE_REMOVING, &adapter->state) || From d194fd265e78ca1b2a4607918778446de44818b2 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Fri, 19 Aug 2016 08:34:57 +0300 Subject: [PATCH 0305/1715] qed*: Fix pause setting When moving into using ethtool's link_ksetting, qed started supplying its own bitmask of speed/capabilities, but qede is still checking for the SUPPORTED value to determine whether it supports pause. Fixes: 054c67d1c82a ("qed*: Add support for ethtool link_ksettings callbacks") Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede_ethtool.c | 2 +- include/linux/qed/qed_if.h | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index 6e17ee10e748..51782bf4987f 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -672,7 +672,7 @@ static int qede_set_pauseparam(struct net_device *dev, memset(¶ms, 0, sizeof(params)); params.override_flags |= QED_LINK_OVERRIDE_PAUSE_CONFIG; if (epause->autoneg) { - if (!(current_link.supported_caps & SUPPORTED_Autoneg)) { + if (!(current_link.supported_caps & QED_LM_Autoneg_BIT)) { DP_INFO(edev, "autoneg not supported\n"); return -EINVAL; } diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 3ed7d20e3811..d8dc5c2243d5 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -318,9 +318,11 @@ struct qed_link_params { struct qed_link_output { bool link_up; - u32 supported_caps; /* In SUPPORTED defs */ - u32 advertised_caps; /* In ADVERTISED defs */ - u32 lp_caps; /* In ADVERTISED defs */ + /* In QED_LM_* defs */ + u32 supported_caps; + u32 advertised_caps; + u32 lp_caps; + u32 speed; /* In Mb/s */ u8 duplex; /* In DUPLEX defs */ u8 port; /* In PORT defs */ From 16d5946a7c966643a78d68ff54ae5c2ddd2e5b63 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Fri, 19 Aug 2016 08:34:58 +0300 Subject: [PATCH 0306/1715] qede: Fix forcing high speeds While '0xdead' and '0xbeef' are "great" values, we should use the correct SPEED_* values instead. Fixes: 054c67d1c82a ("qed*: Add support for ethtool link_ksettings callbacks") Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede_ethtool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index 51782bf4987f..f6b8899cda7f 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -414,7 +414,7 @@ static int qede_set_link_ksettings(struct net_device *dev, } params.adv_speeds = QED_LM_40000baseLR4_Full_BIT; break; - case 0xdead: + case SPEED_50000: if (!(current_link.supported_caps & QED_LM_50000baseKR2_Full_BIT)) { DP_INFO(edev, "50G speed not supported\n"); @@ -422,7 +422,7 @@ static int qede_set_link_ksettings(struct net_device *dev, } params.adv_speeds = QED_LM_50000baseKR2_Full_BIT; break; - case 0xbeef: + case SPEED_100000: if (!(current_link.supported_caps & QED_LM_100000baseKR4_Full_BIT)) { DP_INFO(edev, "100G speed not supported\n"); From d5709f7ab77679d407a7687fc5ad7cc7442cc651 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Wed, 17 Aug 2016 13:36:10 +0300 Subject: [PATCH 0307/1715] flow_dissector: For stripped vlan, get vlan info from skb->vlan_tci Early in the datapath skb_vlan_untag function is called, stripped the vlan from the skb and set skb->vlan_tci and skb->vlan_proto fields. The current dissection doesn't handle stripped vlan packets correctly. In some flows, vlan doesn't exist in skb->data anymore when applying flow dissection on the skb, fix that. In case vlan info wasn't stripped before applying flow_dissector (RPS flow for example), or in case of skb with multiple vlans (e.g. 802.1ad), get the vlan info from skb->data. The flow_dissector correctly skips any number of vlans and stores only the first level vlan. Fixes: 0744dd00c1b1 ('net: introduce skb_flow_dissect()') Signed-off-by: Hadar Hen Zion Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/flow_dissector.c | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 91028ae2fb01..362d693c003f 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -119,12 +119,14 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_dissector_key_ports *key_ports; struct flow_dissector_key_tags *key_tags; struct flow_dissector_key_keyid *key_keyid; + bool skip_vlan = false; u8 ip_proto = 0; bool ret = false; if (!data) { data = skb->data; - proto = skb->protocol; + proto = skb_vlan_tag_present(skb) ? + skb->vlan_proto : skb->protocol; nhoff = skb_network_offset(skb); hlen = skb_headlen(skb); } @@ -243,23 +245,39 @@ ipv6: case htons(ETH_P_8021AD): case htons(ETH_P_8021Q): { const struct vlan_hdr *vlan; - struct vlan_hdr _vlan; - vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan), data, hlen, &_vlan); - if (!vlan) - goto out_bad; + if (skb_vlan_tag_present(skb)) + proto = skb->protocol; + if (!skb_vlan_tag_present(skb) || + proto == cpu_to_be16(ETH_P_8021Q) || + proto == cpu_to_be16(ETH_P_8021AD)) { + struct vlan_hdr _vlan; + + vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan), + data, hlen, &_vlan); + if (!vlan) + goto out_bad; + proto = vlan->h_vlan_encapsulated_proto; + nhoff += sizeof(*vlan); + if (skip_vlan) + goto again; + } + + skip_vlan = true; if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_VLANID)) { key_tags = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_VLANID, target_container); - key_tags->vlan_id = skb_vlan_tag_get_id(skb); + if (skb_vlan_tag_present(skb)) + key_tags->vlan_id = skb_vlan_tag_get_id(skb); + else + key_tags->vlan_id = ntohs(vlan->h_vlan_TCI) & + VLAN_VID_MASK; } - proto = vlan->h_vlan_encapsulated_proto; - nhoff += sizeof(*vlan); goto again; } case htons(ETH_P_PPP_SES): { From f6a66927692e30bdc1792e7a1fc2107d4dfcf42d Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Wed, 17 Aug 2016 13:36:11 +0300 Subject: [PATCH 0308/1715] flow_dissector: Get vlan priority in addition to vlan id Add vlan priority check to the flow dissector by adding new flow dissector struct, flow_dissector_key_vlan which includes vlan tag fields. vlan_id and flow_label fields were under the same struct (flow_dissector_key_tags). It was a convenient setting since struct flow_dissector_key_tags is used by struct flow_keys and by setting vlan_id and flow_label under the same struct, we get precisely 24 or 48 bytes in flow_keys from flow_dissector_key_basic. Now, when adding vlan priority support, the code will be cleaner if flow_label and vlan tag won't be under the same struct anymore. Signed-off-by: Hadar Hen Zion Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 1 + include/net/flow_dissector.h | 12 +++++++++--- net/core/flow_dissector.c | 25 ++++++++++++++++--------- 3 files changed, 26 insertions(+), 12 deletions(-) diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index a5f6ce6b578c..49d4aef1f789 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -81,6 +81,7 @@ static inline bool is_vlan_dev(const struct net_device *dev) #define skb_vlan_tag_present(__skb) ((__skb)->vlan_tci & VLAN_TAG_PRESENT) #define skb_vlan_tag_get(__skb) ((__skb)->vlan_tci & ~VLAN_TAG_PRESENT) #define skb_vlan_tag_get_id(__skb) ((__skb)->vlan_tci & VLAN_VID_MASK) +#define skb_vlan_tag_get_prio(__skb) ((__skb)->vlan_tci & VLAN_PRIO_MASK) /** * struct vlan_pcpu_stats - VLAN percpu rx/tx stats diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index d3d60dccd19f..f266b512c3bd 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -32,8 +32,13 @@ struct flow_dissector_key_basic { }; struct flow_dissector_key_tags { - u32 vlan_id:12, - flow_label:20; + u32 flow_label; +}; + +struct flow_dissector_key_vlan { + u16 vlan_id:12, + vlan_priority:3; + u16 padding; }; struct flow_dissector_key_keyid { @@ -119,7 +124,7 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_PORTS, /* struct flow_dissector_key_ports */ FLOW_DISSECTOR_KEY_ETH_ADDRS, /* struct flow_dissector_key_eth_addrs */ FLOW_DISSECTOR_KEY_TIPC_ADDRS, /* struct flow_dissector_key_tipc_addrs */ - FLOW_DISSECTOR_KEY_VLANID, /* struct flow_dissector_key_flow_tags */ + FLOW_DISSECTOR_KEY_VLAN, /* struct flow_dissector_key_flow_vlan */ FLOW_DISSECTOR_KEY_FLOW_LABEL, /* struct flow_dissector_key_flow_tags */ FLOW_DISSECTOR_KEY_GRE_KEYID, /* struct flow_dissector_key_keyid */ FLOW_DISSECTOR_KEY_MPLS_ENTROPY, /* struct flow_dissector_key_keyid */ @@ -148,6 +153,7 @@ struct flow_keys { #define FLOW_KEYS_HASH_START_FIELD basic struct flow_dissector_key_basic basic; struct flow_dissector_key_tags tags; + struct flow_dissector_key_vlan vlan; struct flow_dissector_key_keyid keyid; struct flow_dissector_key_ports ports; struct flow_dissector_key_addrs addrs; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 362d693c003f..a2879c0f6c4c 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -118,6 +118,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_dissector_key_addrs *key_addrs; struct flow_dissector_key_ports *key_ports; struct flow_dissector_key_tags *key_tags; + struct flow_dissector_key_vlan *key_vlan; struct flow_dissector_key_keyid *key_keyid; bool skip_vlan = false; u8 ip_proto = 0; @@ -266,16 +267,22 @@ ipv6: skip_vlan = true; if (dissector_uses_key(flow_dissector, - FLOW_DISSECTOR_KEY_VLANID)) { - key_tags = skb_flow_dissector_target(flow_dissector, - FLOW_DISSECTOR_KEY_VLANID, + FLOW_DISSECTOR_KEY_VLAN)) { + key_vlan = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_VLAN, target_container); - if (skb_vlan_tag_present(skb)) - key_tags->vlan_id = skb_vlan_tag_get_id(skb); - else - key_tags->vlan_id = ntohs(vlan->h_vlan_TCI) & + if (skb_vlan_tag_present(skb)) { + key_vlan->vlan_id = skb_vlan_tag_get_id(skb); + key_vlan->vlan_priority = + (skb_vlan_tag_get_prio(skb) >> VLAN_PRIO_SHIFT); + } else { + key_vlan->vlan_id = ntohs(vlan->h_vlan_TCI) & VLAN_VID_MASK; + key_vlan->vlan_priority = + (ntohs(vlan->h_vlan_TCI) & + VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; + } } goto again; @@ -935,8 +942,8 @@ static const struct flow_dissector_key flow_keys_dissector_keys[] = { .offset = offsetof(struct flow_keys, ports), }, { - .key_id = FLOW_DISSECTOR_KEY_VLANID, - .offset = offsetof(struct flow_keys, tags), + .key_id = FLOW_DISSECTOR_KEY_VLAN, + .offset = offsetof(struct flow_keys, vlan), }, { .key_id = FLOW_DISSECTOR_KEY_FLOW_LABEL, From 339ba878cfb01b68de3d281ba33fd5e4c9f76546 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Wed, 17 Aug 2016 13:36:12 +0300 Subject: [PATCH 0309/1715] net_sched: flower: Avoid dissection of unmasked keys The current flower implementation checks the mask range and set all the keys included in that range as "used_keys", even if a specific key in the range has a zero mask. This behavior can cause a false positive return value of dissector_uses_key function and unnecessary dissection in __skb_flow_dissect. This patch checks explicitly the mask of each key and "used_keys" will be set accordingly. Fixes: 77b9900ef53a ('tc: introduce Flower classifier') Signed-off-by: Hadar Hen Zion Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_flower.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 5060801a2f6d..0080fc073019 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -404,12 +404,10 @@ static int fl_init_hashtable(struct cls_fl_head *head, #define FL_KEY_MEMBER_OFFSET(member) offsetof(struct fl_flow_key, member) #define FL_KEY_MEMBER_SIZE(member) (sizeof(((struct fl_flow_key *) 0)->member)) -#define FL_KEY_MEMBER_END_OFFSET(member) \ - (FL_KEY_MEMBER_OFFSET(member) + FL_KEY_MEMBER_SIZE(member)) -#define FL_KEY_IN_RANGE(mask, member) \ - (FL_KEY_MEMBER_OFFSET(member) <= (mask)->range.end && \ - FL_KEY_MEMBER_END_OFFSET(member) >= (mask)->range.start) +#define FL_KEY_IS_MASKED(mask, member) \ + memchr_inv(((char *)mask) + FL_KEY_MEMBER_OFFSET(member), \ + 0, FL_KEY_MEMBER_SIZE(member)) \ #define FL_KEY_SET(keys, cnt, id, member) \ do { \ @@ -418,9 +416,9 @@ static int fl_init_hashtable(struct cls_fl_head *head, cnt++; \ } while(0); -#define FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, id, member) \ +#define FL_KEY_SET_IF_MASKED(mask, keys, cnt, id, member) \ do { \ - if (FL_KEY_IN_RANGE(mask, member)) \ + if (FL_KEY_IS_MASKED(mask, member)) \ FL_KEY_SET(keys, cnt, id, member); \ } while(0); @@ -432,14 +430,14 @@ static void fl_init_dissector(struct cls_fl_head *head, FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_CONTROL, control); FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_BASIC, basic); - FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, - FLOW_DISSECTOR_KEY_ETH_ADDRS, eth); - FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, - FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4); - FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, - FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6); - FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, - FLOW_DISSECTOR_KEY_PORTS, tp); + FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, + FLOW_DISSECTOR_KEY_ETH_ADDRS, eth); + FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, + FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4); + FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, + FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6); + FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, + FLOW_DISSECTOR_KEY_PORTS, tp); skb_flow_dissector_init(&head->dissector, keys, cnt); } From 9399ae9a6cb28ebac78216f715ace3b42f1c2132 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Wed, 17 Aug 2016 13:36:13 +0300 Subject: [PATCH 0310/1715] net_sched: flower: Add vlan support Enhance flower to support 802.1Q vlan protocol classification. Currently, the supported fields are vlan_id and vlan_priority. Example: # add a flower filter with vlan id and priority classification tc filter add dev ens4f0 protocol 802.1Q parent ffff: \ flower \ indev ens4f0 \ vlan_ethtype ipv4 \ vlan_id 100 \ vlan_prio 3 \ action vlan pop Signed-off-by: Hadar Hen Zion Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 3 ++ net/sched/cls_flower.c | 70 ++++++++++++++++++++++++++++++++++-- 2 files changed, 70 insertions(+), 3 deletions(-) diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index d1c1ccaba787..51b5b247fb5a 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -428,6 +428,9 @@ enum { TCA_FLOWER_KEY_UDP_DST, /* be16 */ TCA_FLOWER_FLAGS, + TCA_FLOWER_KEY_VLAN_ID, + TCA_FLOWER_KEY_VLAN_PRIO, + TCA_FLOWER_KEY_VLAN_ETH_TYPE, __TCA_FLOWER_MAX, }; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 0080fc073019..1e11e57e6947 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -28,6 +28,7 @@ struct fl_flow_key { struct flow_dissector_key_control control; struct flow_dissector_key_basic basic; struct flow_dissector_key_eth_addrs eth; + struct flow_dissector_key_vlan vlan; struct flow_dissector_key_addrs ipaddrs; union { struct flow_dissector_key_ipv4_addrs ipv4; @@ -293,6 +294,10 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_KEY_TCP_DST] = { .type = NLA_U16 }, [TCA_FLOWER_KEY_UDP_SRC] = { .type = NLA_U16 }, [TCA_FLOWER_KEY_UDP_DST] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_VLAN_ID] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_VLAN_PRIO] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_VLAN_ETH_TYPE] = { .type = NLA_U16 }, + }; static void fl_set_key_val(struct nlattr **tb, @@ -308,9 +313,29 @@ static void fl_set_key_val(struct nlattr **tb, memcpy(mask, nla_data(tb[mask_type]), len); } +static void fl_set_key_vlan(struct nlattr **tb, + struct flow_dissector_key_vlan *key_val, + struct flow_dissector_key_vlan *key_mask) +{ +#define VLAN_PRIORITY_MASK 0x7 + + if (tb[TCA_FLOWER_KEY_VLAN_ID]) { + key_val->vlan_id = + nla_get_u16(tb[TCA_FLOWER_KEY_VLAN_ID]) & VLAN_VID_MASK; + key_mask->vlan_id = VLAN_VID_MASK; + } + if (tb[TCA_FLOWER_KEY_VLAN_PRIO]) { + key_val->vlan_priority = + nla_get_u8(tb[TCA_FLOWER_KEY_VLAN_PRIO]) & + VLAN_PRIORITY_MASK; + key_mask->vlan_priority = VLAN_PRIORITY_MASK; + } +} + static int fl_set_key(struct net *net, struct nlattr **tb, struct fl_flow_key *key, struct fl_flow_key *mask) { + __be16 ethertype; #ifdef CONFIG_NET_CLS_IND if (tb[TCA_FLOWER_INDEV]) { int err = tcf_change_indev(net, tb[TCA_FLOWER_INDEV]); @@ -328,9 +353,19 @@ static int fl_set_key(struct net *net, struct nlattr **tb, mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK, sizeof(key->eth.src)); - fl_set_key_val(tb, &key->basic.n_proto, TCA_FLOWER_KEY_ETH_TYPE, - &mask->basic.n_proto, TCA_FLOWER_UNSPEC, - sizeof(key->basic.n_proto)); + if (tb[TCA_FLOWER_KEY_ETH_TYPE]) + ethertype = nla_get_be16(tb[TCA_FLOWER_KEY_ETH_TYPE]); + + if (ethertype == htons(ETH_P_8021Q)) { + fl_set_key_vlan(tb, &key->vlan, &mask->vlan); + fl_set_key_val(tb, &key->basic.n_proto, + TCA_FLOWER_KEY_VLAN_ETH_TYPE, + &mask->basic.n_proto, TCA_FLOWER_UNSPEC, + sizeof(key->basic.n_proto)); + } else { + key->basic.n_proto = ethertype; + mask->basic.n_proto = cpu_to_be16(~0); + } if (key->basic.n_proto == htons(ETH_P_IP) || key->basic.n_proto == htons(ETH_P_IPV6)) { @@ -438,6 +473,8 @@ static void fl_init_dissector(struct cls_fl_head *head, FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6); FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, FLOW_DISSECTOR_KEY_PORTS, tp); + FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, + FLOW_DISSECTOR_KEY_VLAN, vlan); skb_flow_dissector_init(&head->dissector, keys, cnt); } @@ -666,6 +703,29 @@ static int fl_dump_key_val(struct sk_buff *skb, return 0; } +static int fl_dump_key_vlan(struct sk_buff *skb, + struct flow_dissector_key_vlan *vlan_key, + struct flow_dissector_key_vlan *vlan_mask) +{ + int err; + + if (!memchr_inv(vlan_mask, 0, sizeof(*vlan_mask))) + return 0; + if (vlan_mask->vlan_id) { + err = nla_put_u16(skb, TCA_FLOWER_KEY_VLAN_ID, + vlan_key->vlan_id); + if (err) + return err; + } + if (vlan_mask->vlan_priority) { + err = nla_put_u8(skb, TCA_FLOWER_KEY_VLAN_PRIO, + vlan_key->vlan_priority); + if (err) + return err; + } + return 0; +} + static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *t) { @@ -710,6 +770,10 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, &mask->basic.n_proto, TCA_FLOWER_UNSPEC, sizeof(key->basic.n_proto))) goto nla_put_failure; + + if (fl_dump_key_vlan(skb, &key->vlan, &mask->vlan)) + goto nla_put_failure; + if ((key->basic.n_proto == htons(ETH_P_IP) || key->basic.n_proto == htons(ETH_P_IPV6)) && fl_dump_key_val(skb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO, From 956af37102b515512331a03c35c958b2a1d8dd87 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Wed, 17 Aug 2016 13:36:14 +0300 Subject: [PATCH 0311/1715] net_sched: act_vlan: Add priority option The current vlan push action supports only vid and protocol options. Add priority option. Example script that adds vlan push action with vid and priority: tc filter add dev veth0 protocol ip parent ffff: \ flower \ indev veth0 \ action vlan push id 100 priority 5 Signed-off-by: Hadar Hen Zion Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/tc_act/tc_vlan.h | 1 + include/uapi/linux/tc_act/tc_vlan.h | 1 + net/sched/act_vlan.c | 13 +++++++++++-- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/include/net/tc_act/tc_vlan.h b/include/net/tc_act/tc_vlan.h index e29f52e8bdf1..6b835889ea30 100644 --- a/include/net/tc_act/tc_vlan.h +++ b/include/net/tc_act/tc_vlan.h @@ -20,6 +20,7 @@ struct tcf_vlan { int tcfv_action; u16 tcfv_push_vid; __be16 tcfv_push_proto; + u8 tcfv_push_prio; }; #define to_vlan(a) ((struct tcf_vlan *)a) diff --git a/include/uapi/linux/tc_act/tc_vlan.h b/include/uapi/linux/tc_act/tc_vlan.h index 31151ff6264f..be72b6e3843b 100644 --- a/include/uapi/linux/tc_act/tc_vlan.h +++ b/include/uapi/linux/tc_act/tc_vlan.h @@ -29,6 +29,7 @@ enum { TCA_VLAN_PUSH_VLAN_ID, TCA_VLAN_PUSH_VLAN_PROTOCOL, TCA_VLAN_PAD, + TCA_VLAN_PUSH_VLAN_PRIORITY, __TCA_VLAN_MAX, }; #define TCA_VLAN_MAX (__TCA_VLAN_MAX - 1) diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 691409de3e1a..59a8d3150ae2 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -43,7 +43,8 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a, goto drop; break; case TCA_VLAN_ACT_PUSH: - err = skb_vlan_push(skb, v->tcfv_push_proto, v->tcfv_push_vid); + err = skb_vlan_push(skb, v->tcfv_push_proto, v->tcfv_push_vid | + (v->tcfv_push_prio << VLAN_PRIO_SHIFT)); if (err) goto drop; break; @@ -65,6 +66,7 @@ static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = { [TCA_VLAN_PARMS] = { .len = sizeof(struct tc_vlan) }, [TCA_VLAN_PUSH_VLAN_ID] = { .type = NLA_U16 }, [TCA_VLAN_PUSH_VLAN_PROTOCOL] = { .type = NLA_U16 }, + [TCA_VLAN_PUSH_VLAN_PRIORITY] = { .type = NLA_U8 }, }; static int tcf_vlan_init(struct net *net, struct nlattr *nla, @@ -78,6 +80,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, int action; __be16 push_vid = 0; __be16 push_proto = 0; + u8 push_prio = 0; bool exists = false; int ret = 0, err; @@ -123,6 +126,9 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, } else { push_proto = htons(ETH_P_8021Q); } + + if (tb[TCA_VLAN_PUSH_VLAN_PRIORITY]) + push_prio = nla_get_u8(tb[TCA_VLAN_PUSH_VLAN_PRIORITY]); break; default: if (exists) @@ -150,6 +156,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, v->tcfv_action = action; v->tcfv_push_vid = push_vid; + v->tcfv_push_prio = push_prio; v->tcfv_push_proto = push_proto; v->tcf_action = parm->action; @@ -181,7 +188,9 @@ static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a, if (v->tcfv_action == TCA_VLAN_ACT_PUSH && (nla_put_u16(skb, TCA_VLAN_PUSH_VLAN_ID, v->tcfv_push_vid) || nla_put_be16(skb, TCA_VLAN_PUSH_VLAN_PROTOCOL, - v->tcfv_push_proto))) + v->tcfv_push_proto) || + (nla_put_u8(skb, TCA_VLAN_PUSH_VLAN_PRIORITY, + v->tcfv_push_prio)))) goto nla_put_failure; tcf_tm_dump(&t, &v->tcf_tm); From d5ff8c41b5f7289aee44df817023328295bc8463 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 17 Aug 2016 12:53:09 +0200 Subject: [PATCH 0312/1715] net: bridge: consolidate bridge and port linkxstats calls In the bridge driver we usually have the same function working for both port and bridge. In order to follow that logic and also avoid code duplication, consolidate the bridge_ and brport_ linkxstats calls into one since they share most of their code. As a side effect this allows us to dump the vlan stats also via the slave call which is in preparation for the upcoming per-port vlan stats and vlan flag dumping. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_netlink.c | 114 ++++++++++++++-------------------------- 1 file changed, 39 insertions(+), 75 deletions(-) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index f2a29e467e78..493ab9b3d51a 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1245,14 +1245,30 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) return 0; } -static size_t bridge_get_linkxstats_size(const struct net_device *dev) +static size_t br_get_linkxstats_size(const struct net_device *dev, int attr) { - struct net_bridge *br = netdev_priv(dev); + struct net_bridge_port *p = NULL; struct net_bridge_vlan_group *vg; struct net_bridge_vlan *v; + struct net_bridge *br; int numvls = 0; - vg = br_vlan_group(br); + switch (attr) { + case IFLA_STATS_LINK_XSTATS: + br = netdev_priv(dev); + vg = br_vlan_group(br); + break; + case IFLA_STATS_LINK_XSTATS_SLAVE: + p = br_port_get_rtnl(dev); + if (!p) + return 0; + br = p->br; + vg = nbp_vlan_group(p); + break; + default: + return 0; + } + if (vg) { /* we need to count all, even placeholder entries */ list_for_each_entry(v, &vg->vlan_list, vlist) @@ -1264,44 +1280,38 @@ static size_t bridge_get_linkxstats_size(const struct net_device *dev) nla_total_size(0); } -static size_t brport_get_linkxstats_size(const struct net_device *dev) +static int br_fill_linkxstats(struct sk_buff *skb, + const struct net_device *dev, + int *prividx, int attr) { - return nla_total_size(sizeof(struct br_mcast_stats)) + - nla_total_size(0); -} - -static size_t br_get_linkxstats_size(const struct net_device *dev, int attr) -{ - size_t retsize = 0; + struct nlattr *nla __maybe_unused; + struct net_bridge_port *p = NULL; + struct net_bridge_vlan_group *vg; + struct net_bridge_vlan *v; + struct net_bridge *br; + struct nlattr *nest; + int vl_idx = 0; switch (attr) { case IFLA_STATS_LINK_XSTATS: - retsize = bridge_get_linkxstats_size(dev); + br = netdev_priv(dev); + vg = br_vlan_group(br); break; case IFLA_STATS_LINK_XSTATS_SLAVE: - retsize = brport_get_linkxstats_size(dev); + p = br_port_get_rtnl(dev); + if (!p) + return 0; + br = p->br; + vg = nbp_vlan_group(p); break; + default: + return -EINVAL; } - return retsize; -} - -static int bridge_fill_linkxstats(struct sk_buff *skb, - const struct net_device *dev, - int *prividx) -{ - struct net_bridge *br = netdev_priv(dev); - struct nlattr *nla __maybe_unused; - struct net_bridge_vlan_group *vg; - struct net_bridge_vlan *v; - struct nlattr *nest; - int vl_idx = 0; - nest = nla_nest_start(skb, LINK_XSTATS_TYPE_BRIDGE); if (!nest) return -EMSGSIZE; - vg = br_vlan_group(br); if (vg) { list_for_each_entry(v, &vg->vlan_list, vlist) { struct bridge_vlan_xstats vxi; @@ -1329,7 +1339,7 @@ static int bridge_fill_linkxstats(struct sk_buff *skb, BRIDGE_XSTATS_PAD); if (!nla) goto nla_put_failure; - br_multicast_get_stats(br, NULL, nla_data(nla)); + br_multicast_get_stats(br, p, nla_data(nla)); } #endif nla_nest_end(skb, nest); @@ -1344,52 +1354,6 @@ nla_put_failure: return -EMSGSIZE; } -static int brport_fill_linkxstats(struct sk_buff *skb, - const struct net_device *dev, - int *prividx) -{ - struct net_bridge_port *p = br_port_get_rtnl(dev); - struct nlattr *nla __maybe_unused; - struct nlattr *nest; - - if (!p) - return 0; - - nest = nla_nest_start(skb, LINK_XSTATS_TYPE_BRIDGE); - if (!nest) - return -EMSGSIZE; -#ifdef CONFIG_BRIDGE_IGMP_SNOOPING - nla = nla_reserve_64bit(skb, BRIDGE_XSTATS_MCAST, - sizeof(struct br_mcast_stats), - BRIDGE_XSTATS_PAD); - if (!nla) { - nla_nest_end(skb, nest); - return -EMSGSIZE; - } - br_multicast_get_stats(p->br, p, nla_data(nla)); -#endif - nla_nest_end(skb, nest); - - return 0; -} - -static int br_fill_linkxstats(struct sk_buff *skb, const struct net_device *dev, - int *prividx, int attr) -{ - int ret = -EINVAL; - - switch (attr) { - case IFLA_STATS_LINK_XSTATS: - ret = bridge_fill_linkxstats(skb, dev, prividx); - break; - case IFLA_STATS_LINK_XSTATS_SLAVE: - ret = brport_fill_linkxstats(skb, dev, prividx); - break; - } - - return ret; -} - static struct rtnl_af_ops br_af_ops __read_mostly = { .family = AF_BRIDGE, .get_link_af_size = br_get_link_af_size_filtered, From 61ba1a2da9693b88bf5f2bb8e7a99a29cd139122 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 17 Aug 2016 12:53:10 +0200 Subject: [PATCH 0313/1715] net: bridge: export vlan flags with the stats Use one of the vlan xstats padding fields to export the vlan flags. This is needed in order to be able to distinguish between master (bridge) and port vlan entries in user-space when dumping the bridge vlan stats. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 2 +- net/bridge/br_netlink.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index c186f64fffca..ab92bca6d448 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -140,7 +140,7 @@ struct bridge_vlan_xstats { __u64 tx_bytes; __u64 tx_packets; __u16 vid; - __u16 pad1; + __u16 flags; __u32 pad2; }; diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 493ab9b3d51a..872d4c0deb59 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1321,6 +1321,7 @@ static int br_fill_linkxstats(struct sk_buff *skb, continue; memset(&vxi, 0, sizeof(vxi)); vxi.vid = v->vid; + vxi.flags = v->flags; br_vlan_get_stats(v, &stats); vxi.rx_bytes = stats.rx_bytes; vxi.rx_packets = stats.rx_packets; From dca0aaf8472f15836a68bbed8a0ef9ea39e783aa Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 17 Aug 2016 07:48:36 -0700 Subject: [PATCH 0314/1715] tcp: defer sacked assignment While chasing tcp_xmit_retransmit_queue() kasan issue, I found that we could avoid reading sacked field of skb that we wont send, possibly removing one cache line miss. Very minor change in slow path, but why not ? ;) Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index bdaef7fd6e47..8b45794eb6b2 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2776,7 +2776,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) max_segs = tcp_tso_autosize(sk, tcp_current_mss(sk)); tcp_for_write_queue_from(skb, sk) { - __u8 sacked = TCP_SKB_CB(skb)->sacked; + __u8 sacked; int segs; if (skb == tcp_send_head(sk)) @@ -2788,6 +2788,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) segs = tp->snd_cwnd - tcp_packets_in_flight(tp); if (segs <= 0) return; + sacked = TCP_SKB_CB(skb)->sacked; /* In case tcp_shift_skb_data() have aggregated large skbs, * we need to make sure not sending too bigs TSO packets */ From c7b256f998f6d00d4548090aa16fb34e005828c1 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Wed, 17 Aug 2016 16:00:10 +0100 Subject: [PATCH 0315/1715] sfc: avoid division by zero The division is already being done properly in efx_ef10_get_timer_config which returns zero-on-success, unlike the old efx_ef10_get_sysclk_freq. Fixes: d95e329a55ba ("sfc: get timer configuration from adapter") Signed-off-by: Edward Cree Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef10.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index b8c9f1884a15..2a70b1a39d80 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -646,7 +646,6 @@ static int efx_ef10_probe(struct efx_nic *efx) rc = efx_ef10_get_timer_config(efx); if (rc < 0) goto fail5; - efx->timer_quantum_ns = 1536000 / rc; /* 1536 cycles */ rc = efx_mcdi_mon_probe(efx); if (rc && rc != -EPERM) From c0c64c152389ad73306b9b0796357210ec6d32ee Mon Sep 17 00:00:00 2001 From: Paul Durrant Date: Wed, 17 Aug 2016 16:13:29 +0100 Subject: [PATCH 0316/1715] xen-netback: create a debugfs node for hash information It is useful to be able to see the hash configuration when running tests. This patch adds a debugfs node for that purpose. Signed-off-by: Paul Durrant Cc: Wei Liu Acked-by: Wei Liu Signed-off-by: David S. Miller --- drivers/net/xen-netback/common.h | 4 ++ drivers/net/xen-netback/hash.c | 68 ++++++++++++++++++++++++++++++++ drivers/net/xen-netback/xenbus.c | 37 ++++++++++++++++- 3 files changed, 107 insertions(+), 2 deletions(-) diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 84d6cbdd11b2..3a562683603c 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -412,4 +412,8 @@ u32 xenvif_set_hash_mapping(struct xenvif *vif, u32 gref, u32 len, void xenvif_set_skb_hash(struct xenvif *vif, struct sk_buff *skb); +#ifdef CONFIG_DEBUG_FS +void xenvif_dump_hash_info(struct xenvif *vif, struct seq_file *m); +#endif + #endif /* __XEN_NETBACK__COMMON_H__ */ diff --git a/drivers/net/xen-netback/hash.c b/drivers/net/xen-netback/hash.c index fb87cb39a56b..282b16d8093a 100644 --- a/drivers/net/xen-netback/hash.c +++ b/drivers/net/xen-netback/hash.c @@ -369,6 +369,74 @@ u32 xenvif_set_hash_mapping(struct xenvif *vif, u32 gref, u32 len, return XEN_NETIF_CTRL_STATUS_SUCCESS; } +#ifdef CONFIG_DEBUG_FS +void xenvif_dump_hash_info(struct xenvif *vif, struct seq_file *m) +{ + unsigned int i; + + switch (vif->hash.alg) { + case XEN_NETIF_CTRL_HASH_ALGORITHM_TOEPLITZ: + seq_puts(m, "Hash Algorithm: TOEPLITZ\n"); + break; + + case XEN_NETIF_CTRL_HASH_ALGORITHM_NONE: + seq_puts(m, "Hash Algorithm: NONE\n"); + /* FALLTHRU */ + default: + return; + } + + if (vif->hash.flags) { + seq_puts(m, "\nHash Flags:\n"); + + if (vif->hash.flags & XEN_NETIF_CTRL_HASH_TYPE_IPV4) + seq_puts(m, "- IPv4\n"); + if (vif->hash.flags & XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP) + seq_puts(m, "- IPv4 + TCP\n"); + if (vif->hash.flags & XEN_NETIF_CTRL_HASH_TYPE_IPV6) + seq_puts(m, "- IPv6\n"); + if (vif->hash.flags & XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP) + seq_puts(m, "- IPv6 + TCP\n"); + } + + seq_puts(m, "\nHash Key:\n"); + + for (i = 0; i < XEN_NETBK_MAX_HASH_KEY_SIZE; ) { + unsigned int j, n; + + n = 8; + if (i + n >= XEN_NETBK_MAX_HASH_KEY_SIZE) + n = XEN_NETBK_MAX_HASH_KEY_SIZE - i; + + seq_printf(m, "[%2u - %2u]: ", i, i + n - 1); + + for (j = 0; j < n; j++, i++) + seq_printf(m, "%02x ", vif->hash.key[i]); + + seq_puts(m, "\n"); + } + + if (vif->hash.size != 0) { + seq_puts(m, "\nHash Mapping:\n"); + + for (i = 0; i < vif->hash.size; ) { + unsigned int j, n; + + n = 8; + if (i + n >= vif->hash.size) + n = vif->hash.size - i; + + seq_printf(m, "[%4u - %4u]: ", i, i + n - 1); + + for (j = 0; j < n; j++, i++) + seq_printf(m, "%4u ", vif->hash.mapping[i]); + + seq_puts(m, "\n"); + } + } +} +#endif /* CONFIG_DEBUG_FS */ + void xenvif_init_hash(struct xenvif *vif) { if (xenvif_hash_cache_size == 0) diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index 6a31f2610c23..bacf6e0c12b9 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -165,7 +165,7 @@ xenvif_write_io_ring(struct file *filp, const char __user *buf, size_t count, return count; } -static int xenvif_dump_open(struct inode *inode, struct file *filp) +static int xenvif_io_ring_open(struct inode *inode, struct file *filp) { int ret; void *queue = NULL; @@ -179,13 +179,35 @@ static int xenvif_dump_open(struct inode *inode, struct file *filp) static const struct file_operations xenvif_dbg_io_ring_ops_fops = { .owner = THIS_MODULE, - .open = xenvif_dump_open, + .open = xenvif_io_ring_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, .write = xenvif_write_io_ring, }; +static int xenvif_read_ctrl(struct seq_file *m, void *v) +{ + struct xenvif *vif = m->private; + + xenvif_dump_hash_info(vif, m); + + return 0; +} + +static int xenvif_ctrl_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, xenvif_read_ctrl, inode->i_private); +} + +static const struct file_operations xenvif_dbg_ctrl_ops_fops = { + .owner = THIS_MODULE, + .open = xenvif_ctrl_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static void xenvif_debugfs_addif(struct xenvif *vif) { struct dentry *pfile; @@ -210,6 +232,17 @@ static void xenvif_debugfs_addif(struct xenvif *vif) pr_warn("Creation of io_ring file returned %ld!\n", PTR_ERR(pfile)); } + + if (vif->ctrl_task) { + pfile = debugfs_create_file("ctrl", + S_IRUSR, + vif->xenvif_dbg_root, + vif, + &xenvif_dbg_ctrl_ops_fops); + if (IS_ERR_OR_NULL(pfile)) + pr_warn("Creation of ctrl file returned %ld!\n", + PTR_ERR(pfile)); + } } else netdev_warn(vif->dev, "Creation of vif debugfs dir returned %ld!\n", From 6b2a314f726ad6d54852446ddf599b91f806f0b5 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 17 Aug 2016 18:31:35 +0100 Subject: [PATCH 0317/1715] net: ethernet: nuvoton: fix spelling mistake: "aligment" -> "alignment" trivial fix to spelling mistake in dev_err message Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/nuvoton/w90p910_ether.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/nuvoton/w90p910_ether.c b/drivers/net/ethernet/nuvoton/w90p910_ether.c index 87b7b814778b..712d8bcb7d8c 100644 --- a/drivers/net/ethernet/nuvoton/w90p910_ether.c +++ b/drivers/net/ethernet/nuvoton/w90p910_ether.c @@ -751,7 +751,7 @@ static void netdev_rx(struct net_device *dev) dev_err(&pdev->dev, "rx crc err\n"); ether->stats.rx_crc_errors++; } else if (status & RXDS_ALIE) { - dev_err(&pdev->dev, "rx aligment err\n"); + dev_err(&pdev->dev, "rx alignment err\n"); ether->stats.rx_frame_errors++; } else if (status & RXDS_PTLE) { dev_err(&pdev->dev, "rx longer err\n"); From 1cb94db3d1bfe0075bde78fb2989f17e0a8a3936 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Wed, 17 Aug 2016 23:00:30 +0200 Subject: [PATCH 0318/1715] net: bgmac: support Ethernet core on BCM53573 SoCs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BCM53573 is a new series of Broadcom's SoCs. It's based on ARM and can be found in two packages (versions): BCM53573 and BCM47189. It shares some code with the Northstar family, but also requires some new quirks. First of all there can be up to 2 Ethernet cores on this SoC. If that is the case, they are connected to two different switch ports allowing some more complex/optimized setups. It seems the second unit doesn't come fully configured and requires some IRQ quirk. Other than that only the first core is connected to the PHY. For the second one we have to register fixed PHY (similarly to the Northstar), otherwise generic PHY driver would get some invalid info. This has been successfully tested on Tenda AC9 (BCM47189B0). Signed-off-by: Rafał Miłecki Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bgmac-bcma.c | 19 +++++++++++++++- drivers/net/ethernet/broadcom/bgmac.c | 25 ++++++++++++++++++++++ drivers/net/ethernet/broadcom/bgmac.h | 19 ++++++++++++++++ include/linux/bcma/bcma.h | 3 +++ include/linux/bcma/bcma_regs.h | 1 + 5 files changed, 66 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bgmac-bcma.c b/drivers/net/ethernet/broadcom/bgmac-bcma.c index 9a9745c4047c..3bc0a04df107 100644 --- a/drivers/net/ethernet/broadcom/bgmac-bcma.c +++ b/drivers/net/ethernet/broadcom/bgmac-bcma.c @@ -92,6 +92,7 @@ MODULE_DEVICE_TABLE(bcma, bgmac_bcma_tbl); /* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipattach */ static int bgmac_probe(struct bcma_device *core) { + struct bcma_chipinfo *ci = &core->bus->chipinfo; struct ssb_sprom *sprom = &core->bus->sprom; struct mii_bus *mii_bus; struct bgmac *bgmac; @@ -157,7 +158,8 @@ static int bgmac_probe(struct bcma_device *core) dev_info(bgmac->dev, "Found PHY addr: %d%s\n", bgmac->phyaddr, bgmac->phyaddr == BGMAC_PHY_NOREGS ? " (NOREGS)" : ""); - if (!bgmac_is_bcm4707_family(core)) { + if (!bgmac_is_bcm4707_family(core) && + !(ci->id == BCMA_CHIP_ID_BCM53573 && core->core_unit == 1)) { mii_bus = bcma_mdio_mii_register(core, bgmac->phyaddr); if (!IS_ERR(mii_bus)) { err = PTR_ERR(mii_bus); @@ -230,6 +232,21 @@ static int bgmac_probe(struct bcma_device *core) bgmac->feature_flags |= BGMAC_FEAT_NO_RESET; bgmac->feature_flags |= BGMAC_FEAT_FORCE_SPEED_2500; break; + case BCMA_CHIP_ID_BCM53573: + bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST; + bgmac->feature_flags |= BGMAC_FEAT_SET_RXQ_CLK; + if (ci->pkg == BCMA_PKG_ID_BCM47189) + bgmac->feature_flags |= BGMAC_FEAT_IOST_ATTACHED; + if (core->core_unit == 0) { + bgmac->feature_flags |= BGMAC_FEAT_CC4_IF_SW_TYPE; + if (ci->pkg == BCMA_PKG_ID_BCM47189) + bgmac->feature_flags |= + BGMAC_FEAT_CC4_IF_SW_TYPE_RGMII; + } else if (core->core_unit == 1) { + bgmac->feature_flags |= BGMAC_FEAT_IRQ_ID_OOB_6; + bgmac->feature_flags |= BGMAC_FEAT_CC7_IF_TYPE_RGMII; + } + break; default: bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST; bgmac->feature_flags |= BGMAC_FEAT_SET_RXQ_CLK; diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index c4751ece76f6..63ef72355833 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -940,6 +940,27 @@ static void bgmac_chip_reset(struct bgmac *bgmac) bgmac_cco_ctl_maskset(bgmac, 1, ~(BGMAC_CHIPCTL_1_IF_TYPE_MASK | BGMAC_CHIPCTL_1_SW_TYPE_MASK), sw_type); + } else if (bgmac->feature_flags & BGMAC_FEAT_CC4_IF_SW_TYPE) { + u32 sw_type = BGMAC_CHIPCTL_4_IF_TYPE_MII | + BGMAC_CHIPCTL_4_SW_TYPE_EPHY; + u8 et_swtype = 0; + char buf[4]; + + if (bcm47xx_nvram_getenv("et_swtype", buf, sizeof(buf)) > 0) { + if (kstrtou8(buf, 0, &et_swtype)) + dev_err(bgmac->dev, "Failed to parse et_swtype (%s)\n", + buf); + sw_type = (et_swtype & 0x0f) << 12; + } else if (bgmac->feature_flags & BGMAC_FEAT_CC4_IF_SW_TYPE_RGMII) { + sw_type = BGMAC_CHIPCTL_4_IF_TYPE_RGMII | + BGMAC_CHIPCTL_4_SW_TYPE_RGMII; + } + bgmac_cco_ctl_maskset(bgmac, 4, ~(BGMAC_CHIPCTL_4_IF_TYPE_MASK | + BGMAC_CHIPCTL_4_SW_TYPE_MASK), + sw_type); + } else if (bgmac->feature_flags & BGMAC_FEAT_CC7_IF_TYPE_RGMII) { + bgmac_cco_ctl_maskset(bgmac, 7, ~BGMAC_CHIPCTL_7_IF_TYPE_MASK, + BGMAC_CHIPCTL_7_IF_TYPE_RGMII); } if (iost & BGMAC_BCMA_IOST_ATTACHED && !bgmac->has_robosw) @@ -1467,6 +1488,10 @@ int bgmac_enet_probe(struct bgmac *info) */ bgmac_clk_enable(bgmac, 0); + /* This seems to be fixing IRQ by assigning OOB #6 to the core */ + if (bgmac->feature_flags & BGMAC_FEAT_IRQ_ID_OOB_6) + bgmac_idm_write(bgmac, BCMA_OOB_SEL_OUT_A30, 0x86); + bgmac_chip_reset(bgmac); err = bgmac_dma_alloc(bgmac); diff --git a/drivers/net/ethernet/broadcom/bgmac.h b/drivers/net/ethernet/broadcom/bgmac.h index 24a250267b88..80836b4c9f38 100644 --- a/drivers/net/ethernet/broadcom/bgmac.h +++ b/drivers/net/ethernet/broadcom/bgmac.h @@ -369,6 +369,21 @@ #define BGMAC_CHIPCTL_1_SW_TYPE_RGMII 0x000000C0 #define BGMAC_CHIPCTL_1_RXC_DLL_BYPASS 0x00010000 +#define BGMAC_CHIPCTL_4_IF_TYPE_MASK 0x00003000 +#define BGMAC_CHIPCTL_4_IF_TYPE_RMII 0x00000000 +#define BGMAC_CHIPCTL_4_IF_TYPE_MII 0x00001000 +#define BGMAC_CHIPCTL_4_IF_TYPE_RGMII 0x00002000 +#define BGMAC_CHIPCTL_4_SW_TYPE_MASK 0x0000C000 +#define BGMAC_CHIPCTL_4_SW_TYPE_EPHY 0x00000000 +#define BGMAC_CHIPCTL_4_SW_TYPE_EPHYMII 0x00004000 +#define BGMAC_CHIPCTL_4_SW_TYPE_EPHYRMII 0x00008000 +#define BGMAC_CHIPCTL_4_SW_TYPE_RGMII 0x0000C000 + +#define BGMAC_CHIPCTL_7_IF_TYPE_MASK 0x000000C0 +#define BGMAC_CHIPCTL_7_IF_TYPE_RMII 0x00000000 +#define BGMAC_CHIPCTL_7_IF_TYPE_MII 0x00000040 +#define BGMAC_CHIPCTL_7_IF_TYPE_RGMII 0x00000080 + #define BGMAC_WEIGHT 64 #define ETHER_MAX_LEN 1518 @@ -390,6 +405,10 @@ #define BGMAC_FEAT_NO_CLR_MIB BIT(13) #define BGMAC_FEAT_FORCE_SPEED_2500 BIT(14) #define BGMAC_FEAT_CMDCFG_SR_REV4 BIT(15) +#define BGMAC_FEAT_IRQ_ID_OOB_6 BIT(16) +#define BGMAC_FEAT_CC4_IF_SW_TYPE BIT(17) +#define BGMAC_FEAT_CC4_IF_SW_TYPE_RGMII BIT(18) +#define BGMAC_FEAT_CC7_IF_TYPE_RGMII BIT(19) struct bgmac_slot_info { union { diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h index 3db25df396cb..8eeedb2db924 100644 --- a/include/linux/bcma/bcma.h +++ b/include/linux/bcma/bcma.h @@ -205,6 +205,9 @@ struct bcma_host_ops { #define BCMA_PKG_ID_BCM4709 0 #define BCMA_CHIP_ID_BCM47094 53030 #define BCMA_CHIP_ID_BCM53018 53018 +#define BCMA_CHIP_ID_BCM53573 53573 +#define BCMA_PKG_ID_BCM53573 0 +#define BCMA_PKG_ID_BCM47189 1 /* Board types (on PCI usually equals to the subsystem dev id) */ /* BCM4313 */ diff --git a/include/linux/bcma/bcma_regs.h b/include/linux/bcma/bcma_regs.h index ebd5c1fcdea4..c607fce6aadd 100644 --- a/include/linux/bcma/bcma_regs.h +++ b/include/linux/bcma/bcma_regs.h @@ -23,6 +23,7 @@ #define BCMA_CLKCTLST_4328A0_HAVEALP 0x00020000 /* 4328a0 has reversed bits */ /* Agent registers (common for every core) */ +#define BCMA_OOB_SEL_OUT_A30 0x0100 #define BCMA_IOCTL 0x0408 /* IO control */ #define BCMA_IOCTL_CLK 0x0001 #define BCMA_IOCTL_FGC 0x0002 From e2d8f646c79f26e094bfaf9b21be614d1e148a67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Wed, 17 Aug 2016 23:11:52 +0200 Subject: [PATCH 0319/1715] net: bgmac: make it clear when setting interface type to RMII MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It doesn't really change anything as BGMAC_CHIPCTL_1_IF_TYPE_RMII is equal to 0. It make code a bit clener, so far when reading it one could think we forgot to set a proper mode. It also keeps this mode code in sync with other ones. Signed-off-by: Rafał Miłecki Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bgmac.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index 63ef72355833..6ea0e5ff1e44 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -932,7 +932,8 @@ static void bgmac_chip_reset(struct bgmac *bgmac) et_swtype <<= 4; sw_type = et_swtype; } else if (bgmac->feature_flags & BGMAC_FEAT_SW_TYPE_EPHYRMII) { - sw_type = BGMAC_CHIPCTL_1_SW_TYPE_EPHYRMII; + sw_type = BGMAC_CHIPCTL_1_IF_TYPE_RMII | + BGMAC_CHIPCTL_1_SW_TYPE_EPHYRMII; } else if (bgmac->feature_flags & BGMAC_FEAT_SW_TYPE_RGMII) { sw_type = BGMAC_CHIPCTL_1_IF_TYPE_RGMII | BGMAC_CHIPCTL_1_SW_TYPE_RGMII; From 36a6503feddadbbad415fb3891e80f94c10a9b21 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 17 Aug 2016 14:17:09 -0700 Subject: [PATCH 0320/1715] tcp: refine tcp_prune_ofo_queue() to not drop all packets Over the years, TCP BDP has increased a lot, and is typically in the order of ~10 Mbytes with help of clever Congestion Control modules. In presence of packet losses, TCP stores incoming packets into an out of order queue, and number of skbs sitting there waiting for the missing packets to be received can match the BDP (~10 Mbytes) In some cases, TCP needs to make room for incoming skbs, and current strategy can simply remove all skbs in the out of order queue as a last resort, incurring a huge penalty, both for receiver and sender. Unfortunately these 'last resort events' are quite frequent, forcing sender to send all packets again, stalling the flow and wasting a lot of resources. This patch cleans only a part of the out of order queue in order to meet the memory constraints. Signed-off-by: Eric Dumazet Cc: Neal Cardwell Cc: Yuchung Cheng Cc: Soheil Hassas Yeganeh Cc: C. Stephen Gun Cc: Van Jacobson Acked-by: Soheil Hassas Yeganeh Acked-by: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 47 ++++++++++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3ebf45b38bc3..8cd02c0b056c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4392,12 +4392,9 @@ static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb, if (tcp_prune_queue(sk) < 0) return -1; - if (!sk_rmem_schedule(sk, skb, size)) { + while (!sk_rmem_schedule(sk, skb, size)) { if (!tcp_prune_ofo_queue(sk)) return -1; - - if (!sk_rmem_schedule(sk, skb, size)) - return -1; } } return 0; @@ -4874,29 +4871,41 @@ static void tcp_collapse_ofo_queue(struct sock *sk) } /* - * Purge the out-of-order queue. - * Return true if queue was pruned. + * Clean the out-of-order queue to make room. + * We drop high sequences packets to : + * 1) Let a chance for holes to be filled. + * 2) not add too big latencies if thousands of packets sit there. + * (But if application shrinks SO_RCVBUF, we could still end up + * freeing whole queue here) + * + * Return true if queue has shrunk. */ static bool tcp_prune_ofo_queue(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - bool res = false; + struct sk_buff *skb; - if (!skb_queue_empty(&tp->out_of_order_queue)) { - NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED); - __skb_queue_purge(&tp->out_of_order_queue); + if (skb_queue_empty(&tp->out_of_order_queue)) + return false; - /* Reset SACK state. A conforming SACK implementation will - * do the same at a timeout based retransmit. When a connection - * is in a sad state like this, we care only about integrity - * of the connection not performance. - */ - if (tp->rx_opt.sack_ok) - tcp_sack_reset(&tp->rx_opt); + NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED); + + while ((skb = __skb_dequeue_tail(&tp->out_of_order_queue)) != NULL) { + tcp_drop(sk, skb); sk_mem_reclaim(sk); - res = true; + if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && + !tcp_under_memory_pressure(sk)) + break; } - return res; + + /* Reset SACK state. A conforming SACK implementation will + * do the same at a timeout based retransmit. When a connection + * is in a sad state like this, we care only about integrity + * of the connection not performance. + */ + if (tp->rx_opt.sack_ok) + tcp_sack_reset(&tp->rx_opt); + return true; } /* Reduce allocated memory if we can, trying to get From b34040227be7da760cc72ef3c807e0985e7f0f16 Mon Sep 17 00:00:00 2001 From: Richard Alpe Date: Thu, 18 Aug 2016 10:33:52 +0200 Subject: [PATCH 0321/1715] tipc: add peer removal functionality Add TIPC_NL_PEER_REMOVE netlink command. This command can remove an offline peer node from the internal data structures. This will be supported by the tipc user space tool in iproute2. Signed-off-by: Richard Alpe Reviewed-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- include/uapi/linux/tipc_netlink.h | 1 + net/tipc/net.h | 2 + net/tipc/netlink.c | 5 +++ net/tipc/node.c | 63 +++++++++++++++++++++++++++++++ net/tipc/node.h | 1 + 5 files changed, 72 insertions(+) diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h index 5f3f6d09fb79..bcb65ef725f6 100644 --- a/include/uapi/linux/tipc_netlink.h +++ b/include/uapi/linux/tipc_netlink.h @@ -59,6 +59,7 @@ enum { TIPC_NL_MON_SET, TIPC_NL_MON_GET, TIPC_NL_MON_PEER_GET, + TIPC_NL_PEER_REMOVE, __TIPC_NL_CMD_MAX, TIPC_NL_CMD_MAX = __TIPC_NL_CMD_MAX - 1 diff --git a/net/tipc/net.h b/net/tipc/net.h index 77a7a118911d..c7c254902873 100644 --- a/net/tipc/net.h +++ b/net/tipc/net.h @@ -39,6 +39,8 @@ #include +extern const struct nla_policy tipc_nl_net_policy[]; + int tipc_net_start(struct net *net, u32 addr); void tipc_net_stop(struct net *net); diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index a84daec0afe9..2718de667828 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -238,6 +238,11 @@ static const struct genl_ops tipc_genl_v2_ops[] = { .dumpit = tipc_nl_node_dump_monitor_peer, .policy = tipc_nl_policy, }, + { + .cmd = TIPC_NL_PEER_REMOVE, + .doit = tipc_nl_peer_rm, + .policy = tipc_nl_policy, + } }; int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***attr) diff --git a/net/tipc/node.c b/net/tipc/node.c index 21974191e425..7e8b75fd1a02 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1553,6 +1553,69 @@ discard: kfree_skb(skb); } +int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct nlattr *attrs[TIPC_NLA_NET_MAX + 1]; + struct tipc_node *peer; + u32 addr; + int err; + int i; + + /* We identify the peer by its net */ + if (!info->attrs[TIPC_NLA_NET]) + return -EINVAL; + + err = nla_parse_nested(attrs, TIPC_NLA_NET_MAX, + info->attrs[TIPC_NLA_NET], + tipc_nl_net_policy); + if (err) + return err; + + if (!attrs[TIPC_NLA_NET_ADDR]) + return -EINVAL; + + addr = nla_get_u32(attrs[TIPC_NLA_NET_ADDR]); + + if (in_own_node(net, addr)) + return -ENOTSUPP; + + spin_lock_bh(&tn->node_list_lock); + peer = tipc_node_find(net, addr); + if (!peer) { + spin_unlock_bh(&tn->node_list_lock); + return -ENXIO; + } + + tipc_node_write_lock(peer); + if (peer->state != SELF_DOWN_PEER_DOWN && + peer->state != SELF_DOWN_PEER_LEAVING) { + tipc_node_write_unlock(peer); + err = -EBUSY; + goto err_out; + } + + for (i = 0; i < MAX_BEARERS; i++) { + struct tipc_link_entry *le = &peer->links[i]; + + if (le->link) { + kfree(le->link); + le->link = NULL; + peer->link_cnt--; + } + } + tipc_node_write_unlock(peer); + tipc_node_delete(peer); + + err = 0; +err_out: + tipc_node_put(peer); + spin_unlock_bh(&tn->node_list_lock); + + return err; +} + int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb) { int err; diff --git a/net/tipc/node.h b/net/tipc/node.h index d69fdfcc0ec9..4578b34c7dca 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -77,6 +77,7 @@ int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb); int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info); int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info); int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info); int tipc_nl_node_set_monitor(struct sk_buff *skb, struct genl_info *info); int tipc_nl_node_get_monitor(struct sk_buff *skb, struct genl_info *info); From 45c7fffaf72e77baab9fd8ff1655540a1d5bac9f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 18 Aug 2016 01:00:38 +0200 Subject: [PATCH 0322/1715] bpf: use skb_pkt_type_ok helper in bpf_skb_change_type Since we have a skb_pkt_type_ok() helper for checking the type before mangling, make use of it instead of open coding. Follow-up to commit 8b10cab64c13 ("net: simplify and make pkt_type_ok() available for other users") that came in after d2485c4242a8 ("bpf: add bpf_skb_change_type helper"). Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/core/filter.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index cb06aceb512a..58b5e6dd25fe 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1976,8 +1976,8 @@ static u64 bpf_skb_change_type(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) u32 pkt_type = r2; /* We only allow a restricted subset to be changed for now. */ - if (unlikely(skb->pkt_type > PACKET_OTHERHOST || - pkt_type > PACKET_OTHERHOST)) + if (unlikely(!skb_pkt_type_ok(skb->pkt_type) || + !skb_pkt_type_ok(pkt_type))) return -EINVAL; skb->pkt_type = pkt_type; From 5293efe62df81908f2e90c9820c7edcc8e61f5e9 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 18 Aug 2016 01:00:39 +0200 Subject: [PATCH 0323/1715] bpf: add bpf_skb_change_tail helper This work adds a bpf_skb_change_tail() helper for tc BPF programs. The basic idea is to expand or shrink the skb in a controlled manner. The eBPF program can then rewrite the rest via helpers like bpf_skb_store_bytes(), bpf_lX_csum_replace() and others rather than passing a raw buffer for writing here. bpf_skb_change_tail() is really a slow path helper and intended for replies with f.e. ICMP control messages. Concept is similar to other helpers like bpf_skb_change_proto() helper to keep the helper without protocol specifics and let the BPF program mangle the remaining parts. A flags field has been added and is reserved for now should we extend the helper in future. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/skbuff.h | 43 ++++++++++++++++- include/uapi/linux/bpf.h | 11 +++++ net/core/filter.c | 100 +++++++++++++++++++++++++++++++++++++-- 3 files changed, 150 insertions(+), 4 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 0f665cb26b50..7047448e8129 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2295,7 +2295,7 @@ static inline int pskb_network_may_pull(struct sk_buff *skb, unsigned int len) int ___pskb_trim(struct sk_buff *skb, unsigned int len); -static inline void __skb_trim(struct sk_buff *skb, unsigned int len) +static inline void __skb_set_length(struct sk_buff *skb, unsigned int len) { if (unlikely(skb_is_nonlinear(skb))) { WARN_ON(1); @@ -2305,6 +2305,11 @@ static inline void __skb_trim(struct sk_buff *skb, unsigned int len) skb_set_tail_pointer(skb, len); } +static inline void __skb_trim(struct sk_buff *skb, unsigned int len) +{ + __skb_set_length(skb, len); +} + void skb_trim(struct sk_buff *skb, unsigned int len); static inline int __pskb_trim(struct sk_buff *skb, unsigned int len) @@ -2335,6 +2340,20 @@ static inline void pskb_trim_unique(struct sk_buff *skb, unsigned int len) BUG_ON(err); } +static inline int __skb_grow(struct sk_buff *skb, unsigned int len) +{ + unsigned int diff = len - skb->len; + + if (skb_tailroom(skb) < diff) { + int ret = pskb_expand_head(skb, 0, diff - skb_tailroom(skb), + GFP_ATOMIC); + if (ret) + return ret; + } + __skb_set_length(skb, len); + return 0; +} + /** * skb_orphan - orphan a buffer * @skb: buffer to orphan @@ -2938,6 +2957,21 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) return __pskb_trim(skb, len); } +static inline int __skb_trim_rcsum(struct sk_buff *skb, unsigned int len) +{ + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->ip_summed = CHECKSUM_NONE; + __skb_trim(skb, len); + return 0; +} + +static inline int __skb_grow_rcsum(struct sk_buff *skb, unsigned int len) +{ + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->ip_summed = CHECKSUM_NONE; + return __skb_grow(skb, len); +} + #define skb_queue_walk(queue, skb) \ for (skb = (queue)->next; \ skb != (struct sk_buff *)(queue); \ @@ -3726,6 +3760,13 @@ static inline bool skb_is_gso_v6(const struct sk_buff *skb) return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6; } +static inline void skb_gso_reset(struct sk_buff *skb) +{ + skb_shinfo(skb)->gso_size = 0; + skb_shinfo(skb)->gso_segs = 0; + skb_shinfo(skb)->gso_type = 0; +} + void __skb_warn_lro_forwarding(const struct sk_buff *skb); static inline bool skb_warn_if_lro(const struct sk_buff *skb) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 866d53c33298..e4c5a1baa993 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -386,6 +386,17 @@ enum bpf_func_id { */ BPF_FUNC_current_task_under_cgroup, + /** + * bpf_skb_change_tail(skb, len, flags) + * The helper will resize the skb to the given new size, + * to be used f.e. with control messages. + * @skb: pointer to skb + * @len: new skb length + * @flags: reserved + * Return: 0 on success or negative error + */ + BPF_FUNC_skb_change_tail, + __BPF_FUNC_MAX_ID, }; diff --git a/net/core/filter.c b/net/core/filter.c index 58b5e6dd25fe..abf546d96b6b 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1350,14 +1350,18 @@ struct bpf_scratchpad { static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp); +static inline int __bpf_try_make_writable(struct sk_buff *skb, + unsigned int write_len) +{ + return skb_ensure_writable(skb, write_len); +} + static inline int bpf_try_make_writable(struct sk_buff *skb, unsigned int write_len) { - int err; + int err = __bpf_try_make_writable(skb, write_len); - err = skb_ensure_writable(skb, write_len); bpf_compute_data_end(skb); - return err; } @@ -1992,6 +1996,92 @@ static const struct bpf_func_proto bpf_skb_change_type_proto = { .arg2_type = ARG_ANYTHING, }; +static u32 __bpf_skb_min_len(const struct sk_buff *skb) +{ + u32 min_len = skb_network_offset(skb); + + if (skb_transport_header_was_set(skb)) + min_len = skb_transport_offset(skb); + if (skb->ip_summed == CHECKSUM_PARTIAL) + min_len = skb_checksum_start_offset(skb) + + skb->csum_offset + sizeof(__sum16); + return min_len; +} + +static u32 __bpf_skb_max_len(const struct sk_buff *skb) +{ + return skb->dev ? skb->dev->mtu + skb->dev->hard_header_len : + 65536; +} + +static int bpf_skb_grow_rcsum(struct sk_buff *skb, unsigned int new_len) +{ + unsigned int old_len = skb->len; + int ret; + + ret = __skb_grow_rcsum(skb, new_len); + if (!ret) + memset(skb->data + old_len, 0, new_len - old_len); + return ret; +} + +static int bpf_skb_trim_rcsum(struct sk_buff *skb, unsigned int new_len) +{ + return __skb_trim_rcsum(skb, new_len); +} + +static u64 bpf_skb_change_tail(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5) +{ + struct sk_buff *skb = (struct sk_buff *)(long) r1; + u32 max_len = __bpf_skb_max_len(skb); + u32 min_len = __bpf_skb_min_len(skb); + u32 new_len = (u32) r2; + int ret; + + if (unlikely(flags || new_len > max_len || new_len < min_len)) + return -EINVAL; + if (skb->encapsulation) + return -ENOTSUPP; + + /* The basic idea of this helper is that it's performing the + * needed work to either grow or trim an skb, and eBPF program + * rewrites the rest via helpers like bpf_skb_store_bytes(), + * bpf_lX_csum_replace() and others rather than passing a raw + * buffer here. This one is a slow path helper and intended + * for replies with control messages. + * + * Like in bpf_skb_change_proto(), we want to keep this rather + * minimal and without protocol specifics so that we are able + * to separate concerns as in bpf_skb_store_bytes() should only + * be the one responsible for writing buffers. + * + * It's really expected to be a slow path operation here for + * control message replies, so we're implicitly linearizing, + * uncloning and drop offloads from the skb by this. + */ + ret = __bpf_try_make_writable(skb, skb->len); + if (!ret) { + if (new_len > skb->len) + ret = bpf_skb_grow_rcsum(skb, new_len); + else if (new_len < skb->len) + ret = bpf_skb_trim_rcsum(skb, new_len); + if (!ret && skb_is_gso(skb)) + skb_gso_reset(skb); + } + + bpf_compute_data_end(skb); + return ret; +} + +static const struct bpf_func_proto bpf_skb_change_tail_proto = { + .func = bpf_skb_change_tail, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, +}; + bool bpf_helper_changes_skb_data(void *func) { if (func == bpf_skb_vlan_push) @@ -2002,6 +2092,8 @@ bool bpf_helper_changes_skb_data(void *func) return true; if (func == bpf_skb_change_proto) return true; + if (func == bpf_skb_change_tail) + return true; if (func == bpf_l3_csum_replace) return true; if (func == bpf_l4_csum_replace) @@ -2368,6 +2460,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) return &bpf_skb_change_proto_proto; case BPF_FUNC_skb_change_type: return &bpf_skb_change_type_proto; + case BPF_FUNC_skb_change_tail: + return &bpf_skb_change_tail_proto; case BPF_FUNC_skb_get_tunnel_key: return &bpf_skb_get_tunnel_key_proto; case BPF_FUNC_skb_set_tunnel_key: From 4de16969523c15fb53cf8945dfc6b495d01d1512 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 18 Aug 2016 01:00:40 +0200 Subject: [PATCH 0324/1715] bpf: enable event output helper also for xdp types Follow-up to 555c8a8623a3 ("bpf: avoid stack copy and use skb ctx for event output") for also adding the event output helper for XDP typed programs. The event output helper has been very useful in particular for debugging or event notification purposes, since it's much faster and flexible than regular trace printk due to programmatically being able to attach meta data. Same flags structure applies as with tc BPF programs. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/core/filter.c | 42 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/net/core/filter.c b/net/core/filter.c index abf546d96b6b..3b60dfd2ce92 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2408,6 +2408,41 @@ static const struct bpf_func_proto bpf_skb_under_cgroup_proto = { }; #endif +static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff, + unsigned long off, unsigned long len) +{ + memcpy(dst_buff, src_buff + off, len); + return 0; +} + +static u64 bpf_xdp_event_output(u64 r1, u64 r2, u64 flags, u64 r4, + u64 meta_size) +{ + struct xdp_buff *xdp = (struct xdp_buff *)(long) r1; + struct bpf_map *map = (struct bpf_map *)(long) r2; + u64 xdp_size = (flags & BPF_F_CTXLEN_MASK) >> 32; + void *meta = (void *)(long) r4; + + if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK))) + return -EINVAL; + if (unlikely(xdp_size > (unsigned long)(xdp->data_end - xdp->data))) + return -EFAULT; + + return bpf_event_output(map, flags, meta, meta_size, xdp, xdp_size, + bpf_xdp_copy); +} + +static const struct bpf_func_proto bpf_xdp_event_output_proto = { + .func = bpf_xdp_event_output, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_STACK, + .arg5_type = ARG_CONST_STACK_SIZE, +}; + static const struct bpf_func_proto * sk_filter_func_proto(enum bpf_func_id func_id) { @@ -2492,7 +2527,12 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) static const struct bpf_func_proto * xdp_func_proto(enum bpf_func_id func_id) { - return sk_filter_func_proto(func_id); + switch (func_id) { + case BPF_FUNC_perf_event_output: + return &bpf_xdp_event_output_proto; + default: + return sk_filter_func_proto(func_id); + } } static bool __is_valid_access(int off, int size, enum bpf_access_type type) From 54fd9c2dff144ed287ab3b8189dcdcd4d298d0cc Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 18 Aug 2016 01:00:41 +0200 Subject: [PATCH 0325/1715] bpf: get rid of cgroup helper related ifdefs As recently discussed during the task_under_cgroup_hierarchy() addition, we should get rid of the ifdefs surrounding the bpf_skb_under_cgroup() helper. If related functionality is not built-in, the helper cannot be used anyway, which is also in line with what we do for all other helpers. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/net/sock.h | 10 ++++++++++ net/core/filter.c | 6 +----- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index ff5be7e8ddea..2aab9b63bf16 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1114,6 +1114,16 @@ static inline bool sk_stream_is_writeable(const struct sock *sk) sk_stream_memory_free(sk); } +static inline int sk_under_cgroup_hierarchy(struct sock *sk, + struct cgroup *ancestor) +{ +#ifdef CONFIG_SOCK_CGROUP_DATA + return cgroup_is_descendant(sock_cgroup_ptr(&sk->sk_cgrp_data), + ancestor); +#else + return -ENOTSUPP; +#endif +} static inline bool sk_has_memory_pressure(const struct sock *sk) { diff --git a/net/core/filter.c b/net/core/filter.c index 3b60dfd2ce92..a83766be1ad2 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2374,7 +2374,6 @@ bpf_get_skb_set_tunnel_proto(enum bpf_func_id which) } } -#ifdef CONFIG_SOCK_CGROUP_DATA static u64 bpf_skb_under_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) { struct sk_buff *skb = (struct sk_buff *)(long)r1; @@ -2395,7 +2394,7 @@ static u64 bpf_skb_under_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) if (unlikely(!cgrp)) return -EAGAIN; - return cgroup_is_descendant(sock_cgroup_ptr(&sk->sk_cgrp_data), cgrp); + return sk_under_cgroup_hierarchy(sk, cgrp); } static const struct bpf_func_proto bpf_skb_under_cgroup_proto = { @@ -2406,7 +2405,6 @@ static const struct bpf_func_proto bpf_skb_under_cgroup_proto = { .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, }; -#endif static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff, unsigned long off, unsigned long len) @@ -2515,10 +2513,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) return &bpf_skb_event_output_proto; case BPF_FUNC_get_smp_processor_id: return &bpf_get_smp_processor_id_proto; -#ifdef CONFIG_SOCK_CGROUP_DATA case BPF_FUNC_skb_under_cgroup: return &bpf_skb_under_cgroup_proto; -#endif default: return sk_filter_func_proto(func_id); } From b65b24d42f0a3da68085433f1102c0ac6aa3cebe Mon Sep 17 00:00:00 2001 From: LABBE Corentin Date: Wed, 17 Aug 2016 15:56:45 +0200 Subject: [PATCH 0326/1715] atm: fore200e: Do not drop const qualifier The data member of structure firmware is const and this constness is dropped by some cast. This patch add some const for keeping the const information. Signed-off-by: LABBE Corentin Signed-off-by: David S. Miller --- drivers/atm/fore200e.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c index 75dde903b238..81aaa505862c 100644 --- a/drivers/atm/fore200e.c +++ b/drivers/atm/fore200e.c @@ -2489,7 +2489,7 @@ static int fore200e_load_and_start_fw(struct fore200e *fore200e) { const struct firmware *firmware; struct device *device; - struct fw_header *fw_header; + const struct fw_header *fw_header; const __le32 *fw_data; u32 fw_size; u32 __iomem *load_addr; @@ -2511,9 +2511,9 @@ static int fore200e_load_and_start_fw(struct fore200e *fore200e) return err; } - fw_data = (__le32 *) firmware->data; + fw_data = (const __le32 *)firmware->data; fw_size = firmware->size / sizeof(u32); - fw_header = (struct fw_header *) firmware->data; + fw_header = (const struct fw_header *)firmware->data; load_addr = fore200e->virt_base + le32_to_cpu(fw_header->load_offset); DPRINTK(2, "device %s firmware being loaded at 0x%p (%d words)\n", From 94cdb8bb993a2303d6172b28cbd2b48245b9b8a3 Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Wed, 17 Aug 2016 12:33:03 +0530 Subject: [PATCH 0327/1715] cxgb4: Add support for dynamic allocation of resources for ULD Add a new commmon infrastructure to allocate reosurces dynamically to Upper layer driver's(ULD) when they register with cxgb4 driver and free them during unregistering. All the queues and the interrupts for them will be allocated during ULD probe only and freed during remove. Signed-off-by: Atul Gupta Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/Makefile | 2 +- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 59 +- .../net/ethernet/chelsio/cxgb4/cxgb4_main.c | 153 +++-- .../net/ethernet/chelsio/cxgb4/cxgb4_uld.c | 555 ++++++++++++++++++ .../net/ethernet/chelsio/cxgb4/cxgb4_uld.h | 34 +- drivers/net/ethernet/chelsio/cxgb4/sge.c | 4 +- drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h | 2 +- 7 files changed, 759 insertions(+), 50 deletions(-) create mode 100644 drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c diff --git a/drivers/net/ethernet/chelsio/cxgb4/Makefile b/drivers/net/ethernet/chelsio/cxgb4/Makefile index ace0ab98d0f1..fac2157faf69 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/Makefile +++ b/drivers/net/ethernet/chelsio/cxgb4/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_CHELSIO_T4) += cxgb4.o -cxgb4-objs := cxgb4_main.o l2t.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o +cxgb4-objs := cxgb4_main.o l2t.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o cxgb4_uld.o cxgb4-$(CONFIG_CHELSIO_T4_DCB) += cxgb4_dcb.o cxgb4-$(CONFIG_CHELSIO_T4_FCOE) += cxgb4_fcoe.o cxgb4-$(CONFIG_DEBUG_FS) += cxgb4_debugfs.o diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index bcfa51226b46..6b0528913687 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -53,6 +53,8 @@ #include "cxgb4_uld.h" #define CH_WARN(adap, fmt, ...) dev_warn(adap->pdev_dev, fmt, ## __VA_ARGS__) +extern struct list_head adapter_list; +extern struct mutex uld_mutex; enum { MAX_NPORTS = 4, /* max # of ports */ @@ -338,6 +340,7 @@ struct adapter_params { enum chip_type chip; /* chip code */ struct arch_specific_params arch; /* chip specific params */ unsigned char offload; + unsigned char crypto; /* HW capability for crypto */ unsigned char bypass; @@ -403,7 +406,6 @@ struct fw_info { struct fw_hdr fw_hdr; }; - struct trace_params { u32 data[TRACE_LEN / 4]; u32 mask[TRACE_LEN / 4]; @@ -510,6 +512,10 @@ enum { /* adapter flags */ FW_OFLD_CONN = (1 << 9), }; +enum { + ULP_CRYPTO_LOOKASIDE = 1 << 0, +}; + struct rx_sw_desc; struct sge_fl { /* SGE free-buffer queue state */ @@ -680,6 +686,16 @@ struct sge_ctrl_txq { /* state for an SGE control Tx queue */ u8 full; /* the Tx ring is full */ } ____cacheline_aligned_in_smp; +struct sge_uld_rxq_info { + char name[IFNAMSIZ]; /* name of ULD driver */ + struct sge_ofld_rxq *uldrxq; /* Rxq's for ULD */ + u16 *msix_tbl; /* msix_tbl for uld */ + u16 *rspq_id; /* response queue id's of rxq */ + u16 nrxq; /* # of ingress uld queues */ + u16 nciq; /* # of completion queues */ + u8 uld; /* uld type */ +}; + struct sge { struct sge_eth_txq ethtxq[MAX_ETH_QSETS]; struct sge_ofld_txq ofldtxq[MAX_OFLD_QSETS]; @@ -691,6 +707,7 @@ struct sge { struct sge_ofld_rxq rdmarxq[MAX_RDMA_QUEUES]; struct sge_ofld_rxq rdmaciq[MAX_RDMA_CIQS]; struct sge_rspq fw_evtq ____cacheline_aligned_in_smp; + struct sge_uld_rxq_info **uld_rxq_info; struct sge_rspq intrq ____cacheline_aligned_in_smp; spinlock_t intrq_lock; @@ -702,6 +719,7 @@ struct sge { u16 niscsitq; /* # of available iSCST Rx queues */ u16 rdmaqs; /* # of available RDMA Rx queues */ u16 rdmaciqs; /* # of available RDMA concentrator IQs */ + u16 nqs_per_uld; /* # of Rx queues per ULD */ u16 iscsi_rxq[MAX_OFLD_QSETS]; u16 iscsit_rxq[MAX_ISCSIT_QUEUES]; u16 rdma_rxq[MAX_RDMA_QUEUES]; @@ -757,6 +775,17 @@ struct hash_mac_addr { u8 addr[ETH_ALEN]; }; +struct uld_msix_bmap { + unsigned long *msix_bmap; + unsigned int mapsize; + spinlock_t lock; /* lock for acquiring bitmap */ +}; + +struct uld_msix_info { + unsigned short vec; + char desc[IFNAMSIZ + 10]; +}; + struct adapter { void __iomem *regs; void __iomem *bar2; @@ -779,6 +808,9 @@ struct adapter { unsigned short vec; char desc[IFNAMSIZ + 10]; } msix_info[MAX_INGQ + 1]; + struct uld_msix_info *msix_info_ulds; /* msix info for uld's */ + struct uld_msix_bmap msix_bmap_ulds; /* msix bitmap for all uld */ + unsigned int msi_idx; struct doorbell_stats db_stats; struct sge sge; @@ -793,7 +825,9 @@ struct adapter { unsigned int clipt_start; unsigned int clipt_end; struct clip_tbl *clipt; + struct cxgb4_pci_uld_info *uld; void *uld_handle[CXGB4_ULD_MAX]; + unsigned int num_uld; struct list_head list_node; struct list_head rcu_node; struct list_head mac_hlist; /* list of MAC addresses in MPS Hash */ @@ -952,6 +986,11 @@ static inline int is_offload(const struct adapter *adap) return adap->params.offload; } +static inline int is_pci_uld(const struct adapter *adap) +{ + return adap->params.crypto; +} + static inline u32 t4_read_reg(struct adapter *adap, u32 reg_addr) { return readl(adap->regs + reg_addr); @@ -1185,8 +1224,6 @@ int t4_sge_init(struct adapter *adap); void t4_sge_start(struct adapter *adap); void t4_sge_stop(struct adapter *adap); int cxgb_busy_poll(struct napi_struct *napi); -int cxgb4_set_rspq_intr_params(struct sge_rspq *q, unsigned int us, - unsigned int cnt); void cxgb4_set_ethtool_ops(struct net_device *netdev); int cxgb4_write_rss(const struct port_info *pi, const u16 *queues); extern int dbfifo_int_thresh; @@ -1289,6 +1326,18 @@ static inline int hash_mac_addr(const u8 *addr) return a & 0x3f; } +int cxgb4_set_rspq_intr_params(struct sge_rspq *q, unsigned int us, + unsigned int cnt); +static inline void init_rspq(struct adapter *adap, struct sge_rspq *q, + unsigned int us, unsigned int cnt, + unsigned int size, unsigned int iqe_size) +{ + q->adap = adap; + cxgb4_set_rspq_intr_params(q, us, cnt); + q->iqe_len = iqe_size; + q->size = size; +} + void t4_write_indirect(struct adapter *adap, unsigned int addr_reg, unsigned int data_reg, const u32 *vals, unsigned int nregs, unsigned int start_idx); @@ -1523,5 +1572,7 @@ void t4_idma_monitor(struct adapter *adapter, int hz, int ticks); int t4_set_vf_mac_acl(struct adapter *adapter, unsigned int vf, unsigned int naddr, u8 *addr); - +void uld_mem_free(struct adapter *adap); +int uld_mem_alloc(struct adapter *adap); +void free_rspq_fl(struct adapter *adap, struct sge_rspq *rq, struct sge_fl *fl); #endif /* __CXGB4_H__ */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 2bb804c93688..85e30f19e97a 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -223,8 +223,8 @@ MODULE_PARM_DESC(select_queue, static struct dentry *cxgb4_debugfs_root; -static LIST_HEAD(adapter_list); -static DEFINE_MUTEX(uld_mutex); +LIST_HEAD(adapter_list); +DEFINE_MUTEX(uld_mutex); /* Adapter list to be accessed from atomic context */ static LIST_HEAD(adap_rcu_list); static DEFINE_SPINLOCK(adap_rcu_lock); @@ -1066,20 +1066,20 @@ static int alloc_ofld_rxqs(struct adapter *adap, struct sge_ofld_rxq *q, */ static int setup_sge_queues(struct adapter *adap) { - int err, msi_idx, i, j; + int err, i, j; struct sge *s = &adap->sge; bitmap_zero(s->starving_fl, s->egr_sz); bitmap_zero(s->txq_maperr, s->egr_sz); if (adap->flags & USING_MSIX) - msi_idx = 1; /* vector 0 is for non-queue interrupts */ + adap->msi_idx = 1; /* vector 0 is for non-queue interrupts */ else { err = t4_sge_alloc_rxq(adap, &s->intrq, false, adap->port[0], 0, NULL, NULL, NULL, -1); if (err) return err; - msi_idx = -((int)s->intrq.abs_id + 1); + adap->msi_idx = -((int)s->intrq.abs_id + 1); } /* NOTE: If you add/delete any Ingress/Egress Queue allocations in here, @@ -1096,7 +1096,7 @@ static int setup_sge_queues(struct adapter *adap) * new/deleted queues. */ err = t4_sge_alloc_rxq(adap, &s->fw_evtq, true, adap->port[0], - msi_idx, NULL, fwevtq_handler, NULL, -1); + adap->msi_idx, NULL, fwevtq_handler, NULL, -1); if (err) { freeout: t4_free_sge_resources(adap); return err; @@ -1109,10 +1109,10 @@ freeout: t4_free_sge_resources(adap); struct sge_eth_txq *t = &s->ethtxq[pi->first_qset]; for (j = 0; j < pi->nqsets; j++, q++) { - if (msi_idx > 0) - msi_idx++; + if (adap->msi_idx > 0) + adap->msi_idx++; err = t4_sge_alloc_rxq(adap, &q->rspq, false, dev, - msi_idx, &q->fl, + adap->msi_idx, &q->fl, t4_ethrx_handler, NULL, t4_get_mps_bg_map(adap, @@ -1141,11 +1141,11 @@ freeout: t4_free_sge_resources(adap); } #define ALLOC_OFLD_RXQS(firstq, nq, per_chan, ids, lro) do { \ - err = alloc_ofld_rxqs(adap, firstq, nq, per_chan, msi_idx, ids, lro); \ + err = alloc_ofld_rxqs(adap, firstq, nq, per_chan, adap->msi_idx, ids, lro); \ if (err) \ goto freeout; \ - if (msi_idx > 0) \ - msi_idx += nq; \ + if (adap->msi_idx > 0) \ + adap->msi_idx += nq; \ } while (0) ALLOC_OFLD_RXQS(s->iscsirxq, s->iscsiqsets, j, s->iscsi_rxq, false); @@ -2565,6 +2565,12 @@ static void detach_ulds(struct adapter *adap) CXGB4_STATE_DETACH); adap->uld_handle[i] = NULL; } + for (i = 0; i < CXGB4_PCI_ULD_MAX; i++) + if (adap->uld && adap->uld[i].handle) { + adap->uld[i].state_change(adap->uld[i].handle, + CXGB4_STATE_DETACH); + adap->uld[i].handle = NULL; + } if (netevent_registered && list_empty(&adapter_list)) { unregister_netevent_notifier(&cxgb4_netevent_nb); netevent_registered = false; @@ -2584,6 +2590,10 @@ static void notify_ulds(struct adapter *adap, enum cxgb4_state new_state) for (i = 0; i < CXGB4_ULD_MAX; i++) if (adap->uld_handle[i]) ulds[i].state_change(adap->uld_handle[i], new_state); + for (i = 0; i < CXGB4_PCI_ULD_MAX; i++) + if (adap->uld && adap->uld[i].handle) + adap->uld[i].state_change(adap->uld[i].handle, + new_state); mutex_unlock(&uld_mutex); } @@ -4170,6 +4180,11 @@ static int adap_init0(struct adapter *adap) adap->vres.iscsi.start = val[0]; adap->vres.iscsi.size = val[1] - val[0] + 1; } + if (caps_cmd.cryptocaps) { + /* Should query params here...TODO */ + adap->params.crypto |= ULP_CRYPTO_LOOKASIDE; + adap->num_uld += 1; + } #undef FW_PARAM_PFVF #undef FW_PARAM_DEV @@ -4351,16 +4366,6 @@ static inline bool is_x_10g_port(const struct link_config *lc) (lc->supported & FW_PORT_CAP_SPEED_40G) != 0; } -static inline void init_rspq(struct adapter *adap, struct sge_rspq *q, - unsigned int us, unsigned int cnt, - unsigned int size, unsigned int iqe_size) -{ - q->adap = adap; - cxgb4_set_rspq_intr_params(q, us, cnt); - q->iqe_len = iqe_size; - q->size = size; -} - /* * Perform default configuration of DMA queues depending on the number and type * of ports we found and the number of available CPUs. Most settings can be @@ -4375,6 +4380,15 @@ static void cfg_queues(struct adapter *adap) #endif int ciq_size; + /* Reduce memory usage in kdump environment, disable all offload. + */ + if (is_kdump_kernel()) { + adap->params.offload = 0; + adap->params.crypto = 0; + } else if (adap->num_uld && uld_mem_alloc(adap)) { + adap->params.crypto = 0; + } + for_each_port(adap, i) n10g += is_x_10g_port(&adap2pinfo(adap, i)->link_cfg); #ifdef CONFIG_CHELSIO_T4_DCB @@ -4405,11 +4419,6 @@ static void cfg_queues(struct adapter *adap) if (q10g > netif_get_num_default_rss_queues()) q10g = netif_get_num_default_rss_queues(); - /* Reduce memory usage in kdump environment, disable all offload. - */ - if (is_kdump_kernel()) - adap->params.offload = 0; - for_each_port(adap, i) { struct port_info *pi = adap2pinfo(adap, i); @@ -4538,23 +4547,58 @@ static void reduce_ethqs(struct adapter *adap, int n) } } +static int get_msix_info(struct adapter *adap) +{ + struct uld_msix_info *msix_info; + int max_ingq = (MAX_OFLD_QSETS * adap->num_uld); + + msix_info = kcalloc(max_ingq, sizeof(*msix_info), GFP_KERNEL); + if (!msix_info) + return -ENOMEM; + + adap->msix_bmap_ulds.msix_bmap = kcalloc(BITS_TO_LONGS(max_ingq), + sizeof(long), GFP_KERNEL); + if (!adap->msix_bmap_ulds.msix_bmap) { + kfree(msix_info); + return -ENOMEM; + } + spin_lock_init(&adap->msix_bmap_ulds.lock); + adap->msix_info_ulds = msix_info; + return 0; +} + +static void free_msix_info(struct adapter *adap) +{ + if (!adap->num_uld) + return; + + kfree(adap->msix_info_ulds); + kfree(adap->msix_bmap_ulds.msix_bmap); +} + /* 2 MSI-X vectors needed for the FW queue and non-data interrupts */ #define EXTRA_VECS 2 static int enable_msix(struct adapter *adap) { - int ofld_need = 0; - int i, want, need, allocated; + int ofld_need = 0, uld_need = 0; + int i, j, want, need, allocated; struct sge *s = &adap->sge; unsigned int nchan = adap->params.nports; struct msix_entry *entries; + int max_ingq = MAX_INGQ; - entries = kmalloc(sizeof(*entries) * (MAX_INGQ + 1), + max_ingq += (MAX_OFLD_QSETS * adap->num_uld); + entries = kmalloc(sizeof(*entries) * (max_ingq + 1), GFP_KERNEL); if (!entries) return -ENOMEM; - for (i = 0; i < MAX_INGQ + 1; ++i) + /* map for msix */ + if (is_pci_uld(adap) && get_msix_info(adap)) + adap->params.crypto = 0; + + for (i = 0; i < max_ingq + 1; ++i) entries[i].entry = i; want = s->max_ethqsets + EXTRA_VECS; @@ -4567,13 +4611,17 @@ static int enable_msix(struct adapter *adap) else ofld_need = 4 * nchan; } + if (is_pci_uld(adap)) { + want += netif_get_num_default_rss_queues() * nchan; + uld_need = nchan; + } #ifdef CONFIG_CHELSIO_T4_DCB /* For Data Center Bridging we need 8 Ethernet TX Priority Queues for * each port. */ - need = 8 * adap->params.nports + EXTRA_VECS + ofld_need; + need = 8 * adap->params.nports + EXTRA_VECS + ofld_need + uld_need; #else - need = adap->params.nports + EXTRA_VECS + ofld_need; + need = adap->params.nports + EXTRA_VECS + ofld_need + uld_need; #endif allocated = pci_enable_msix_range(adap->pdev, entries, need, want); if (allocated < 0) { @@ -4587,12 +4635,20 @@ static int enable_msix(struct adapter *adap) * Every group gets its minimum requirement and NIC gets top * priority for leftovers. */ - i = allocated - EXTRA_VECS - ofld_need; + i = allocated - EXTRA_VECS - ofld_need - uld_need; if (i < s->max_ethqsets) { s->max_ethqsets = i; if (i < s->ethqsets) reduce_ethqs(adap, i); } + if (is_pci_uld(adap)) { + if (allocated < want) + s->nqs_per_uld = nchan; + else + s->nqs_per_uld = netif_get_num_default_rss_queues() * + nchan; + } + if (is_offload(adap)) { if (allocated < want) { s->rdmaqs = nchan; @@ -4604,16 +4660,24 @@ static int enable_msix(struct adapter *adap) /* leftovers go to OFLD */ i = allocated - EXTRA_VECS - s->max_ethqsets - - s->rdmaqs - s->rdmaciqs - s->niscsitq; + s->rdmaqs - s->rdmaciqs - s->niscsitq; + if (is_pci_uld(adap)) + i -= s->nqs_per_uld * adap->num_uld; s->iscsiqsets = (i / nchan) * nchan; /* round down */ } - for (i = 0; i < allocated; ++i) + + for (i = 0; i < (allocated - (s->nqs_per_uld * adap->num_uld)); ++i) adap->msix_info[i].vec = entries[i].vector; + if (is_pci_uld(adap)) { + for (j = 0 ; i < allocated; ++i, j++) + adap->msix_info_ulds[j].vec = entries[i].vector; + adap->msix_bmap_ulds.mapsize = j; + } dev_info(adap->pdev_dev, "%d MSI-X vectors allocated, " - "nic %d iscsi %d rdma cpl %d rdma ciq %d\n", + "nic %d iscsi %d rdma cpl %d rdma ciq %d uld %d\n", allocated, s->max_ethqsets, s->iscsiqsets, s->rdmaqs, - s->rdmaciqs); + s->rdmaciqs, s->nqs_per_uld); kfree(entries); return 0; @@ -5215,8 +5279,11 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* See what interrupts we'll be using */ if (msi > 1 && enable_msix(adapter) == 0) adapter->flags |= USING_MSIX; - else if (msi > 0 && pci_enable_msi(pdev) == 0) + else if (msi > 0 && pci_enable_msi(pdev) == 0) { adapter->flags |= USING_MSI; + if (msi > 1) + free_msix_info(adapter); + } /* check for PCI Express bandwidth capabiltites */ cxgb4_check_pcie_caps(adapter); @@ -5332,6 +5399,10 @@ sriov: out_free_dev: free_some_resources(adapter); + if (adapter->flags & USING_MSIX) + free_msix_info(adapter); + if (adapter->num_uld) + uld_mem_free(adapter); out_unmap_bar: if (!is_t4(adapter->params.chip)) iounmap(adapter->bar2); @@ -5393,6 +5464,10 @@ static void remove_one(struct pci_dev *pdev) if (adapter->flags & FULL_INIT_DONE) cxgb_down(adapter); + if (adapter->flags & USING_MSIX) + free_msix_info(adapter); + if (adapter->num_uld) + uld_mem_free(adapter); free_some_resources(adapter); #if IS_ENABLED(CONFIG_IPV6) t4_cleanup_clip_tbl(adapter); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c new file mode 100644 index 000000000000..aac6e444abf2 --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c @@ -0,0 +1,555 @@ +/* + * cxgb4_uld.c:Chelsio Upper Layer Driver Interface for T4/T5/T6 SGE management + * + * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Written by: Atul Gupta (atul.gupta@chelsio.com) + * Written by: Hariprasad Shenai (hariprasad@chelsio.com) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cxgb4.h" +#include "cxgb4_uld.h" +#include "t4_regs.h" +#include "t4fw_api.h" +#include "t4_msg.h" + +#define for_each_uldrxq(m, i) for (i = 0; i < ((m)->nrxq + (m)->nciq); i++) + +static int get_msix_idx_from_bmap(struct adapter *adap) +{ + struct uld_msix_bmap *bmap = &adap->msix_bmap_ulds; + unsigned long flags; + unsigned int msix_idx; + + spin_lock_irqsave(&bmap->lock, flags); + msix_idx = find_first_zero_bit(bmap->msix_bmap, bmap->mapsize); + if (msix_idx < bmap->mapsize) { + __set_bit(msix_idx, bmap->msix_bmap); + } else { + spin_unlock_irqrestore(&bmap->lock, flags); + return -ENOSPC; + } + + spin_unlock_irqrestore(&bmap->lock, flags); + return msix_idx; +} + +static void free_msix_idx_in_bmap(struct adapter *adap, unsigned int msix_idx) +{ + struct uld_msix_bmap *bmap = &adap->msix_bmap_ulds; + unsigned long flags; + + spin_lock_irqsave(&bmap->lock, flags); + __clear_bit(msix_idx, bmap->msix_bmap); + spin_unlock_irqrestore(&bmap->lock, flags); +} + +static int uldrx_handler(struct sge_rspq *q, const __be64 *rsp, + const struct pkt_gl *gl) +{ + struct adapter *adap = q->adap; + struct sge_ofld_rxq *rxq = container_of(q, struct sge_ofld_rxq, rspq); + int ret; + + /* FW can send CPLs encapsulated in a CPL_FW4_MSG */ + if (((const struct rss_header *)rsp)->opcode == CPL_FW4_MSG && + ((const struct cpl_fw4_msg *)(rsp + 1))->type == FW_TYPE_RSSCPL) + rsp += 2; + + if (q->flush_handler) + ret = adap->uld[q->uld].lro_rx_handler(adap->uld[q->uld].handle, + rsp, gl, &q->lro_mgr, + &q->napi); + else + ret = adap->uld[q->uld].rx_handler(adap->uld[q->uld].handle, + rsp, gl); + + if (ret) { + rxq->stats.nomem++; + return -1; + } + + if (!gl) + rxq->stats.imm++; + else if (gl == CXGB4_MSG_AN) + rxq->stats.an++; + else + rxq->stats.pkts++; + return 0; +} + +static int alloc_uld_rxqs(struct adapter *adap, + struct sge_uld_rxq_info *rxq_info, + unsigned int nq, unsigned int offset, bool lro) +{ + struct sge *s = &adap->sge; + struct sge_ofld_rxq *q = rxq_info->uldrxq + offset; + unsigned short *ids = rxq_info->rspq_id + offset; + unsigned int per_chan = nq / adap->params.nports; + unsigned int msi_idx, bmap_idx; + int i, err; + + if (adap->flags & USING_MSIX) + msi_idx = 1; + else + msi_idx = -((int)s->intrq.abs_id + 1); + + for (i = 0; i < nq; i++, q++) { + if (msi_idx >= 0) { + bmap_idx = get_msix_idx_from_bmap(adap); + adap->msi_idx++; + } + err = t4_sge_alloc_rxq(adap, &q->rspq, false, + adap->port[i / per_chan], + adap->msi_idx, + q->fl.size ? &q->fl : NULL, + uldrx_handler, + NULL, + 0); + if (err) + goto freeout; + if (msi_idx >= 0) + rxq_info->msix_tbl[i + offset] = bmap_idx; + memset(&q->stats, 0, sizeof(q->stats)); + if (ids) + ids[i] = q->rspq.abs_id; + } + return 0; +freeout: + q = rxq_info->uldrxq + offset; + for ( ; i; i--, q++) { + if (q->rspq.desc) + free_rspq_fl(adap, &q->rspq, + q->fl.size ? &q->fl : NULL); + adap->msi_idx--; + } + + /* We need to free rxq also in case of ciq allocation failure */ + if (offset) { + q = rxq_info->uldrxq + offset; + for ( ; i; i--, q++) { + if (q->rspq.desc) + free_rspq_fl(adap, &q->rspq, + q->fl.size ? &q->fl : NULL); + adap->msi_idx--; + } + } + return err; +} + +int setup_sge_queues_uld(struct adapter *adap, unsigned int uld_type, bool lro) +{ + struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; + + if (adap->flags & USING_MSIX) { + rxq_info->msix_tbl = kzalloc(rxq_info->nrxq + rxq_info->nciq, + GFP_KERNEL); + if (!rxq_info->msix_tbl) + return -ENOMEM; + } + + return !(!alloc_uld_rxqs(adap, rxq_info, rxq_info->nrxq, 0, lro) && + !alloc_uld_rxqs(adap, rxq_info, rxq_info->nciq, + rxq_info->nrxq, lro)); +} + +static void t4_free_uld_rxqs(struct adapter *adap, int n, + struct sge_ofld_rxq *q) +{ + for ( ; n; n--, q++) { + if (q->rspq.desc) + free_rspq_fl(adap, &q->rspq, + q->fl.size ? &q->fl : NULL); + adap->msi_idx--; + } +} + +void free_sge_queues_uld(struct adapter *adap, unsigned int uld_type) +{ + struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; + + if (rxq_info->nciq) + t4_free_uld_rxqs(adap, rxq_info->nciq, + rxq_info->uldrxq + rxq_info->nrxq); + t4_free_uld_rxqs(adap, rxq_info->nrxq, rxq_info->uldrxq); + if (adap->flags & USING_MSIX) + kfree(rxq_info->msix_tbl); +} + +int cfg_queues_uld(struct adapter *adap, unsigned int uld_type, + const struct cxgb4_pci_uld_info *uld_info) +{ + struct sge *s = &adap->sge; + struct sge_uld_rxq_info *rxq_info; + int i, nrxq; + + rxq_info = kzalloc(sizeof(*rxq_info), GFP_KERNEL); + if (!rxq_info) + return -ENOMEM; + + if (uld_info->nrxq > s->nqs_per_uld) + rxq_info->nrxq = s->nqs_per_uld; + else + rxq_info->nrxq = uld_info->nrxq; + if (!uld_info->nciq) + rxq_info->nciq = 0; + else if (uld_info->nciq && uld_info->nciq > s->nqs_per_uld) + rxq_info->nciq = s->nqs_per_uld; + else + rxq_info->nciq = uld_info->nciq; + + nrxq = rxq_info->nrxq + rxq_info->nciq; /* total rxq's */ + rxq_info->uldrxq = kcalloc(nrxq, sizeof(struct sge_ofld_rxq), + GFP_KERNEL); + if (!rxq_info->uldrxq) { + kfree(rxq_info); + return -ENOMEM; + } + + rxq_info->rspq_id = kcalloc(nrxq, sizeof(unsigned short), GFP_KERNEL); + if (!rxq_info->uldrxq) { + kfree(rxq_info->uldrxq); + kfree(rxq_info); + return -ENOMEM; + } + + for (i = 0; i < rxq_info->nrxq; i++) { + struct sge_ofld_rxq *r = &rxq_info->uldrxq[i]; + + init_rspq(adap, &r->rspq, 5, 1, uld_info->rxq_size, 64); + r->rspq.uld = uld_type; + r->fl.size = 72; + } + + for (i = rxq_info->nrxq; i < nrxq; i++) { + struct sge_ofld_rxq *r = &rxq_info->uldrxq[i]; + + init_rspq(adap, &r->rspq, 5, 1, uld_info->ciq_size, 64); + r->rspq.uld = uld_type; + r->fl.size = 72; + } + + memcpy(rxq_info->name, uld_info->name, IFNAMSIZ); + adap->sge.uld_rxq_info[uld_type] = rxq_info; + + return 0; +} + +void free_queues_uld(struct adapter *adap, unsigned int uld_type) +{ + struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; + + kfree(rxq_info->rspq_id); + kfree(rxq_info->uldrxq); + kfree(rxq_info); +} + +int request_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type) +{ + struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; + int idx, bmap_idx, err = 0; + + for_each_uldrxq(rxq_info, idx) { + bmap_idx = rxq_info->msix_tbl[idx]; + err = request_irq(adap->msix_info_ulds[bmap_idx].vec, + t4_sge_intr_msix, 0, + adap->msix_info_ulds[bmap_idx].desc, + &rxq_info->uldrxq[idx].rspq); + if (err) + goto unwind; + } + return 0; +unwind: + while (--idx >= 0) { + bmap_idx = rxq_info->msix_tbl[idx]; + free_msix_idx_in_bmap(adap, bmap_idx); + free_irq(adap->msix_info_ulds[bmap_idx].vec, + &rxq_info->uldrxq[idx].rspq); + } + return err; +} + +void free_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type) +{ + struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; + int idx; + + for_each_uldrxq(rxq_info, idx) { + unsigned int bmap_idx = rxq_info->msix_tbl[idx]; + + free_msix_idx_in_bmap(adap, bmap_idx); + free_irq(adap->msix_info_ulds[bmap_idx].vec, + &rxq_info->uldrxq[idx].rspq); + } +} + +void name_msix_vecs_uld(struct adapter *adap, unsigned int uld_type) +{ + struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; + int n = sizeof(adap->msix_info_ulds[0].desc); + int idx; + + for_each_uldrxq(rxq_info, idx) { + unsigned int bmap_idx = rxq_info->msix_tbl[idx]; + + snprintf(adap->msix_info_ulds[bmap_idx].desc, n, "%s-%s%d", + adap->port[0]->name, rxq_info->name, idx); + } +} + +static void enable_rx(struct adapter *adap, struct sge_rspq *q) +{ + if (!q) + return; + + if (q->handler) { + cxgb_busy_poll_init_lock(q); + napi_enable(&q->napi); + } + /* 0-increment GTS to start the timer and enable interrupts */ + t4_write_reg(adap, MYPF_REG(SGE_PF_GTS_A), + SEINTARM_V(q->intr_params) | + INGRESSQID_V(q->cntxt_id)); +} + +static void quiesce_rx(struct adapter *adap, struct sge_rspq *q) +{ + if (q && q->handler) { + napi_disable(&q->napi); + local_bh_disable(); + while (!cxgb_poll_lock_napi(q)) + mdelay(1); + local_bh_enable(); + } +} + +void enable_rx_uld(struct adapter *adap, unsigned int uld_type) +{ + struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; + int idx; + + for_each_uldrxq(rxq_info, idx) + enable_rx(adap, &rxq_info->uldrxq[idx].rspq); +} + +void quiesce_rx_uld(struct adapter *adap, unsigned int uld_type) +{ + struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; + int idx; + + for_each_uldrxq(rxq_info, idx) + quiesce_rx(adap, &rxq_info->uldrxq[idx].rspq); +} + +static void uld_queue_init(struct adapter *adap, unsigned int uld_type, + struct cxgb4_lld_info *lli) +{ + struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; + + lli->rxq_ids = rxq_info->rspq_id; + lli->nrxq = rxq_info->nrxq; + lli->ciq_ids = rxq_info->rspq_id + rxq_info->nrxq; + lli->nciq = rxq_info->nciq; +} + +int uld_mem_alloc(struct adapter *adap) +{ + struct sge *s = &adap->sge; + + adap->uld = kcalloc(adap->num_uld, sizeof(*adap->uld), GFP_KERNEL); + if (!adap->uld) + return -ENOMEM; + + s->uld_rxq_info = kzalloc(adap->num_uld * + sizeof(struct sge_uld_rxq_info *), + GFP_KERNEL); + if (!s->uld_rxq_info) + goto err_uld; + + return 0; +err_uld: + kfree(adap->uld); + return -ENOMEM; +} + +void uld_mem_free(struct adapter *adap) +{ + struct sge *s = &adap->sge; + + kfree(s->uld_rxq_info); + kfree(adap->uld); +} + +static void uld_init(struct adapter *adap, struct cxgb4_lld_info *lld) +{ + int i; + + lld->pdev = adap->pdev; + lld->pf = adap->pf; + lld->l2t = adap->l2t; + lld->tids = &adap->tids; + lld->ports = adap->port; + lld->vr = &adap->vres; + lld->mtus = adap->params.mtus; + lld->ntxq = adap->sge.iscsiqsets; + lld->nchan = adap->params.nports; + lld->nports = adap->params.nports; + lld->wr_cred = adap->params.ofldq_wr_cred; + lld->adapter_type = adap->params.chip; + lld->cclk_ps = 1000000000 / adap->params.vpd.cclk; + lld->udb_density = 1 << adap->params.sge.eq_qpp; + lld->ucq_density = 1 << adap->params.sge.iq_qpp; + lld->filt_mode = adap->params.tp.vlan_pri_map; + /* MODQ_REQ_MAP sets queues 0-3 to chan 0-3 */ + for (i = 0; i < NCHAN; i++) + lld->tx_modq[i] = i; + lld->gts_reg = adap->regs + MYPF_REG(SGE_PF_GTS_A); + lld->db_reg = adap->regs + MYPF_REG(SGE_PF_KDOORBELL_A); + lld->fw_vers = adap->params.fw_vers; + lld->dbfifo_int_thresh = dbfifo_int_thresh; + lld->sge_ingpadboundary = adap->sge.fl_align; + lld->sge_egrstatuspagesize = adap->sge.stat_len; + lld->sge_pktshift = adap->sge.pktshift; + lld->enable_fw_ofld_conn = adap->flags & FW_OFLD_CONN; + lld->max_ordird_qp = adap->params.max_ordird_qp; + lld->max_ird_adapter = adap->params.max_ird_adapter; + lld->ulptx_memwrite_dsgl = adap->params.ulptx_memwrite_dsgl; + lld->nodeid = dev_to_node(adap->pdev_dev); +} + +static void uld_attach(struct adapter *adap, unsigned int uld) +{ + void *handle; + struct cxgb4_lld_info lli; + + uld_init(adap, &lli); + uld_queue_init(adap, uld, &lli); + + handle = adap->uld[uld].add(&lli); + if (IS_ERR(handle)) { + dev_warn(adap->pdev_dev, + "could not attach to the %s driver, error %ld\n", + adap->uld[uld].name, PTR_ERR(handle)); + return; + } + + adap->uld[uld].handle = handle; + + if (adap->flags & FULL_INIT_DONE) + adap->uld[uld].state_change(handle, CXGB4_STATE_UP); +} + +int cxgb4_register_pci_uld(enum cxgb4_pci_uld type, + struct cxgb4_pci_uld_info *p) +{ + int ret = 0; + struct adapter *adap; + + if (type >= CXGB4_PCI_ULD_MAX) + return -EINVAL; + + mutex_lock(&uld_mutex); + list_for_each_entry(adap, &adapter_list, list_node) { + if (!is_pci_uld(adap)) + continue; + ret = cfg_queues_uld(adap, type, p); + if (ret) + goto out; + ret = setup_sge_queues_uld(adap, type, p->lro); + if (ret) + goto free_queues; + if (adap->flags & USING_MSIX) { + name_msix_vecs_uld(adap, type); + ret = request_msix_queue_irqs_uld(adap, type); + if (ret) + goto free_rxq; + } + if (adap->flags & FULL_INIT_DONE) + enable_rx_uld(adap, type); + if (adap->uld[type].add) { + ret = -EBUSY; + goto free_irq; + } + adap->uld[type] = *p; + uld_attach(adap, type); + } + mutex_unlock(&uld_mutex); + return 0; + +free_irq: + if (adap->flags & USING_MSIX) + free_msix_queue_irqs_uld(adap, type); +free_rxq: + free_sge_queues_uld(adap, type); +free_queues: + free_queues_uld(adap, type); +out: + mutex_unlock(&uld_mutex); + return ret; +} +EXPORT_SYMBOL(cxgb4_register_pci_uld); + +int cxgb4_unregister_pci_uld(enum cxgb4_pci_uld type) +{ + struct adapter *adap; + + if (type >= CXGB4_PCI_ULD_MAX) + return -EINVAL; + + mutex_lock(&uld_mutex); + list_for_each_entry(adap, &adapter_list, list_node) { + if (!is_pci_uld(adap)) + continue; + adap->uld[type].handle = NULL; + adap->uld[type].add = NULL; + if (adap->flags & FULL_INIT_DONE) + quiesce_rx_uld(adap, type); + if (adap->flags & USING_MSIX) + free_msix_queue_irqs_uld(adap, type); + free_sge_queues_uld(adap, type); + free_queues_uld(adap, type); + } + mutex_unlock(&uld_mutex); + + return 0; +} +EXPORT_SYMBOL(cxgb4_unregister_pci_uld); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index f3c58aaa932d..ab4037222f8d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -32,8 +32,8 @@ * SOFTWARE. */ -#ifndef __CXGB4_OFLD_H -#define __CXGB4_OFLD_H +#ifndef __CXGB4_ULD_H +#define __CXGB4_ULD_H #include #include @@ -296,8 +296,36 @@ struct cxgb4_uld_info { void (*lro_flush)(struct t4_lro_mgr *); }; +enum cxgb4_pci_uld { + CXGB4_PCI_ULD1, + CXGB4_PCI_ULD_MAX +}; + +struct cxgb4_pci_uld_info { + const char *name; + bool lro; + void *handle; + unsigned int nrxq; + unsigned int nciq; + unsigned int rxq_size; + unsigned int ciq_size; + void *(*add)(const struct cxgb4_lld_info *p); + int (*rx_handler)(void *handle, const __be64 *rsp, + const struct pkt_gl *gl); + int (*state_change)(void *handle, enum cxgb4_state new_state); + int (*control)(void *handle, enum cxgb4_control control, ...); + int (*lro_rx_handler)(void *handle, const __be64 *rsp, + const struct pkt_gl *gl, + struct t4_lro_mgr *lro_mgr, + struct napi_struct *napi); + void (*lro_flush)(struct t4_lro_mgr *); +}; + int cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p); int cxgb4_unregister_uld(enum cxgb4_uld type); +int cxgb4_register_pci_uld(enum cxgb4_pci_uld type, + struct cxgb4_pci_uld_info *p); +int cxgb4_unregister_pci_uld(enum cxgb4_pci_uld type); int cxgb4_ofld_send(struct net_device *dev, struct sk_buff *skb); unsigned int cxgb4_dbfifo_count(const struct net_device *dev, int lpfifo); unsigned int cxgb4_port_chan(const struct net_device *dev); @@ -330,4 +358,4 @@ int cxgb4_bar2_sge_qregs(struct net_device *dev, u64 *pbar2_qoffset, unsigned int *pbar2_qid); -#endif /* !__CXGB4_OFLD_H */ +#endif /* !__CXGB4_ULD_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index ad3552df0545..9a607dbc6ca8 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -2928,8 +2928,8 @@ static void free_txq(struct adapter *adap, struct sge_txq *q) q->desc = NULL; } -static void free_rspq_fl(struct adapter *adap, struct sge_rspq *rq, - struct sge_fl *fl) +void free_rspq_fl(struct adapter *adap, struct sge_rspq *rq, + struct sge_fl *fl) { struct sge *s = &adap->sge; unsigned int fl_id = fl ? fl->cntxt_id : 0xffff; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index a89b30720e38..6ea54c126407 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -1060,7 +1060,7 @@ struct fw_caps_config_cmd { __be16 niccaps; __be16 ofldcaps; __be16 rdmacaps; - __be16 r4; + __be16 cryptocaps; __be16 iscsicaps; __be16 fcoecaps; __be32 cfcsum; From d6657781b5a9d2e2c72da1af0d185899b2d7e2f1 Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Wed, 17 Aug 2016 12:33:04 +0530 Subject: [PATCH 0328/1715] cxgb4: Register changes and fw defines for crypto Signed-off-by: Atul Gupta Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/t4_msg.h | 437 ++++++++++++++++++ drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h | 124 +++++ 2 files changed, 561 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h index e0ebe1378cb2..fba3b2ad382d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h @@ -61,6 +61,7 @@ enum { CPL_ABORT_REQ_RSS = 0x2B, CPL_ABORT_RPL_RSS = 0x2D, + CPL_RX_PHYS_ADDR = 0x30, CPL_CLOSE_CON_RPL = 0x32, CPL_ISCSI_HDR = 0x33, CPL_RDMA_CQE = 0x35, @@ -83,6 +84,10 @@ enum { CPL_PASS_OPEN_REQ6 = 0x81, CPL_ACT_OPEN_REQ6 = 0x83, + CPL_TX_TLS_PDU = 0x88, + CPL_TX_SEC_PDU = 0x8A, + CPL_TX_TLS_ACK = 0x8B, + CPL_RDMA_TERMINATE = 0xA2, CPL_RDMA_WRITE = 0xA4, CPL_SGE_EGR_UPDATE = 0xA5, @@ -94,6 +99,8 @@ enum { CPL_FW4_PLD = 0xC1, CPL_FW4_ACK = 0xC3, + CPL_RX_PHYS_DSGL = 0xD0, + CPL_FW6_MSG = 0xE0, CPL_FW6_PLD = 0xE1, CPL_TX_PKT_LSO = 0xED, @@ -1362,6 +1369,15 @@ struct ulptx_idata { __be32 len; }; +struct ulp_txpkt { + __be32 cmd_dest; + __be32 len; +}; + +#define ULPTX_CMD_S 24 +#define ULPTX_CMD_M 0xFF +#define ULPTX_CMD_V(x) ((x) << ULPTX_CMD_S) + #define ULPTX_NSGE_S 0 #define ULPTX_NSGE_V(x) ((x) << ULPTX_NSGE_S) @@ -1369,6 +1385,22 @@ struct ulptx_idata { #define ULPTX_MORE_V(x) ((x) << ULPTX_MORE_S) #define ULPTX_MORE_F ULPTX_MORE_V(1U) +#define ULP_TXPKT_DEST_S 16 +#define ULP_TXPKT_DEST_M 0x3 +#define ULP_TXPKT_DEST_V(x) ((x) << ULP_TXPKT_DEST_S) + +#define ULP_TXPKT_FID_S 4 +#define ULP_TXPKT_FID_M 0x7ff +#define ULP_TXPKT_FID_V(x) ((x) << ULP_TXPKT_FID_S) + +#define ULP_TXPKT_RO_S 3 +#define ULP_TXPKT_RO_V(x) ((x) << ULP_TXPKT_RO_S) +#define ULP_TXPKT_RO_F ULP_TXPKT_RO_V(1U) + +#define ULP_TX_SC_MORE_S 23 +#define ULP_TX_SC_MORE_V(x) ((x) << ULP_TX_SC_MORE_S) +#define ULP_TX_SC_MORE_F ULP_TX_SC_MORE_V(1U) + struct ulp_mem_io { WR_HDR; __be32 cmd; @@ -1406,4 +1438,409 @@ struct ulp_mem_io { #define ULP_MEMIO_DATA_LEN_S 0 #define ULP_MEMIO_DATA_LEN_V(x) ((x) << ULP_MEMIO_DATA_LEN_S) +#define ULPTX_NSGE_S 0 +#define ULPTX_NSGE_M 0xFFFF +#define ULPTX_NSGE_V(x) ((x) << ULPTX_NSGE_S) +#define ULPTX_NSGE_G(x) (((x) >> ULPTX_NSGE_S) & ULPTX_NSGE_M) + +struct ulptx_sc_memrd { + __be32 cmd_to_len; + __be32 addr; +}; + +#define ULP_TXPKT_DATAMODIFY_S 23 +#define ULP_TXPKT_DATAMODIFY_M 0x1 +#define ULP_TXPKT_DATAMODIFY_V(x) ((x) << ULP_TXPKT_DATAMODIFY_S) +#define ULP_TXPKT_DATAMODIFY_G(x) \ + (((x) >> ULP_TXPKT_DATAMODIFY_S) & ULP_TXPKT_DATAMODIFY__M) +#define ULP_TXPKT_DATAMODIFY_F ULP_TXPKT_DATAMODIFY_V(1U) + +#define ULP_TXPKT_CHANNELID_S 22 +#define ULP_TXPKT_CHANNELID_M 0x1 +#define ULP_TXPKT_CHANNELID_V(x) ((x) << ULP_TXPKT_CHANNELID_S) +#define ULP_TXPKT_CHANNELID_G(x) \ + (((x) >> ULP_TXPKT_CHANNELID_S) & ULP_TXPKT_CHANNELID_M) +#define ULP_TXPKT_CHANNELID_F ULP_TXPKT_CHANNELID_V(1U) + +#define SCMD_SEQ_NO_CTRL_S 29 +#define SCMD_SEQ_NO_CTRL_M 0x3 +#define SCMD_SEQ_NO_CTRL_V(x) ((x) << SCMD_SEQ_NO_CTRL_S) +#define SCMD_SEQ_NO_CTRL_G(x) \ + (((x) >> SCMD_SEQ_NO_CTRL_S) & SCMD_SEQ_NO_CTRL_M) + +/* StsFieldPrsnt- Status field at the end of the TLS PDU */ +#define SCMD_STATUS_PRESENT_S 28 +#define SCMD_STATUS_PRESENT_M 0x1 +#define SCMD_STATUS_PRESENT_V(x) ((x) << SCMD_STATUS_PRESENT_S) +#define SCMD_STATUS_PRESENT_G(x) \ + (((x) >> SCMD_STATUS_PRESENT_S) & SCMD_STATUS_PRESENT_M) +#define SCMD_STATUS_PRESENT_F SCMD_STATUS_PRESENT_V(1U) + +/* ProtoVersion - Protocol Version 0: 1.2, 1:1.1, 2:DTLS, 3:Generic, + * 3-15: Reserved. + */ +#define SCMD_PROTO_VERSION_S 24 +#define SCMD_PROTO_VERSION_M 0xf +#define SCMD_PROTO_VERSION_V(x) ((x) << SCMD_PROTO_VERSION_S) +#define SCMD_PROTO_VERSION_G(x) \ + (((x) >> SCMD_PROTO_VERSION_S) & SCMD_PROTO_VERSION_M) + +/* EncDecCtrl - Encryption/Decryption Control. 0: Encrypt, 1: Decrypt */ +#define SCMD_ENC_DEC_CTRL_S 23 +#define SCMD_ENC_DEC_CTRL_M 0x1 +#define SCMD_ENC_DEC_CTRL_V(x) ((x) << SCMD_ENC_DEC_CTRL_S) +#define SCMD_ENC_DEC_CTRL_G(x) \ + (((x) >> SCMD_ENC_DEC_CTRL_S) & SCMD_ENC_DEC_CTRL_M) +#define SCMD_ENC_DEC_CTRL_F SCMD_ENC_DEC_CTRL_V(1U) + +/* CipherAuthSeqCtrl - Cipher Authentication Sequence Control. */ +#define SCMD_CIPH_AUTH_SEQ_CTRL_S 22 +#define SCMD_CIPH_AUTH_SEQ_CTRL_M 0x1 +#define SCMD_CIPH_AUTH_SEQ_CTRL_V(x) \ + ((x) << SCMD_CIPH_AUTH_SEQ_CTRL_S) +#define SCMD_CIPH_AUTH_SEQ_CTRL_G(x) \ + (((x) >> SCMD_CIPH_AUTH_SEQ_CTRL_S) & SCMD_CIPH_AUTH_SEQ_CTRL_M) +#define SCMD_CIPH_AUTH_SEQ_CTRL_F SCMD_CIPH_AUTH_SEQ_CTRL_V(1U) + +/* CiphMode - Cipher Mode. 0: NOP, 1:AES-CBC, 2:AES-GCM, 3:AES-CTR, + * 4:Generic-AES, 5-15: Reserved. + */ +#define SCMD_CIPH_MODE_S 18 +#define SCMD_CIPH_MODE_M 0xf +#define SCMD_CIPH_MODE_V(x) ((x) << SCMD_CIPH_MODE_S) +#define SCMD_CIPH_MODE_G(x) \ + (((x) >> SCMD_CIPH_MODE_S) & SCMD_CIPH_MODE_M) + +/* AuthMode - Auth Mode. 0: NOP, 1:SHA1, 2:SHA2-224, 3:SHA2-256 + * 4-15: Reserved + */ +#define SCMD_AUTH_MODE_S 14 +#define SCMD_AUTH_MODE_M 0xf +#define SCMD_AUTH_MODE_V(x) ((x) << SCMD_AUTH_MODE_S) +#define SCMD_AUTH_MODE_G(x) \ + (((x) >> SCMD_AUTH_MODE_S) & SCMD_AUTH_MODE_M) + +/* HmacCtrl - HMAC Control. 0:NOP, 1:No truncation, 2:Support HMAC Truncation + * per RFC 4366, 3:IPSec 96 bits, 4-7:Reserved + */ +#define SCMD_HMAC_CTRL_S 11 +#define SCMD_HMAC_CTRL_M 0x7 +#define SCMD_HMAC_CTRL_V(x) ((x) << SCMD_HMAC_CTRL_S) +#define SCMD_HMAC_CTRL_G(x) \ + (((x) >> SCMD_HMAC_CTRL_S) & SCMD_HMAC_CTRL_M) + +/* IvSize - IV size in units of 2 bytes */ +#define SCMD_IV_SIZE_S 7 +#define SCMD_IV_SIZE_M 0xf +#define SCMD_IV_SIZE_V(x) ((x) << SCMD_IV_SIZE_S) +#define SCMD_IV_SIZE_G(x) \ + (((x) >> SCMD_IV_SIZE_S) & SCMD_IV_SIZE_M) + +/* NumIVs - Number of IVs */ +#define SCMD_NUM_IVS_S 0 +#define SCMD_NUM_IVS_M 0x7f +#define SCMD_NUM_IVS_V(x) ((x) << SCMD_NUM_IVS_S) +#define SCMD_NUM_IVS_G(x) \ + (((x) >> SCMD_NUM_IVS_S) & SCMD_NUM_IVS_M) + +/* EnbDbgId - If this is enabled upper 20 (63:44) bits if SeqNumber + * (below) are used as Cid (connection id for debug status), these + * bits are padded to zero for forming the 64 bit + * sequence number for TLS + */ +#define SCMD_ENB_DBGID_S 31 +#define SCMD_ENB_DBGID_M 0x1 +#define SCMD_ENB_DBGID_V(x) ((x) << SCMD_ENB_DBGID_S) +#define SCMD_ENB_DBGID_G(x) \ + (((x) >> SCMD_ENB_DBGID_S) & SCMD_ENB_DBGID_M) + +/* IV generation in SW. */ +#define SCMD_IV_GEN_CTRL_S 30 +#define SCMD_IV_GEN_CTRL_M 0x1 +#define SCMD_IV_GEN_CTRL_V(x) ((x) << SCMD_IV_GEN_CTRL_S) +#define SCMD_IV_GEN_CTRL_G(x) \ + (((x) >> SCMD_IV_GEN_CTRL_S) & SCMD_IV_GEN_CTRL_M) +#define SCMD_IV_GEN_CTRL_F SCMD_IV_GEN_CTRL_V(1U) + +/* More frags */ +#define SCMD_MORE_FRAGS_S 20 +#define SCMD_MORE_FRAGS_M 0x1 +#define SCMD_MORE_FRAGS_V(x) ((x) << SCMD_MORE_FRAGS_S) +#define SCMD_MORE_FRAGS_G(x) (((x) >> SCMD_MORE_FRAGS_S) & SCMD_MORE_FRAGS_M) + +/*last frag */ +#define SCMD_LAST_FRAG_S 19 +#define SCMD_LAST_FRAG_M 0x1 +#define SCMD_LAST_FRAG_V(x) ((x) << SCMD_LAST_FRAG_S) +#define SCMD_LAST_FRAG_G(x) (((x) >> SCMD_LAST_FRAG_S) & SCMD_LAST_FRAG_M) + +/* TlsCompPdu */ +#define SCMD_TLS_COMPPDU_S 18 +#define SCMD_TLS_COMPPDU_M 0x1 +#define SCMD_TLS_COMPPDU_V(x) ((x) << SCMD_TLS_COMPPDU_S) +#define SCMD_TLS_COMPPDU_G(x) (((x) >> SCMD_TLS_COMPPDU_S) & SCMD_TLS_COMPPDU_M) + +/* KeyCntxtInline - Key context inline after the scmd OR PayloadOnly*/ +#define SCMD_KEY_CTX_INLINE_S 17 +#define SCMD_KEY_CTX_INLINE_M 0x1 +#define SCMD_KEY_CTX_INLINE_V(x) ((x) << SCMD_KEY_CTX_INLINE_S) +#define SCMD_KEY_CTX_INLINE_G(x) \ + (((x) >> SCMD_KEY_CTX_INLINE_S) & SCMD_KEY_CTX_INLINE_M) +#define SCMD_KEY_CTX_INLINE_F SCMD_KEY_CTX_INLINE_V(1U) + +/* TLSFragEnable - 0: Host created TLS PDUs, 1: TLS Framgmentation in ASIC */ +#define SCMD_TLS_FRAG_ENABLE_S 16 +#define SCMD_TLS_FRAG_ENABLE_M 0x1 +#define SCMD_TLS_FRAG_ENABLE_V(x) ((x) << SCMD_TLS_FRAG_ENABLE_S) +#define SCMD_TLS_FRAG_ENABLE_G(x) \ + (((x) >> SCMD_TLS_FRAG_ENABLE_S) & SCMD_TLS_FRAG_ENABLE_M) +#define SCMD_TLS_FRAG_ENABLE_F SCMD_TLS_FRAG_ENABLE_V(1U) + +/* MacOnly - Only send the MAC and discard PDU. This is valid for hash only + * modes, in this case TLS_TX will drop the PDU and only + * send back the MAC bytes. + */ +#define SCMD_MAC_ONLY_S 15 +#define SCMD_MAC_ONLY_M 0x1 +#define SCMD_MAC_ONLY_V(x) ((x) << SCMD_MAC_ONLY_S) +#define SCMD_MAC_ONLY_G(x) \ + (((x) >> SCMD_MAC_ONLY_S) & SCMD_MAC_ONLY_M) +#define SCMD_MAC_ONLY_F SCMD_MAC_ONLY_V(1U) + +/* AadIVDrop - Drop the AAD and IV fields. Useful in protocols + * which have complex AAD and IV formations Eg:AES-CCM + */ +#define SCMD_AADIVDROP_S 14 +#define SCMD_AADIVDROP_M 0x1 +#define SCMD_AADIVDROP_V(x) ((x) << SCMD_AADIVDROP_S) +#define SCMD_AADIVDROP_G(x) \ + (((x) >> SCMD_AADIVDROP_S) & SCMD_AADIVDROP_M) +#define SCMD_AADIVDROP_F SCMD_AADIVDROP_V(1U) + +/* HdrLength - Length of all headers excluding TLS header + * present before start of crypto PDU/payload. + */ +#define SCMD_HDR_LEN_S 0 +#define SCMD_HDR_LEN_M 0x3fff +#define SCMD_HDR_LEN_V(x) ((x) << SCMD_HDR_LEN_S) +#define SCMD_HDR_LEN_G(x) \ + (((x) >> SCMD_HDR_LEN_S) & SCMD_HDR_LEN_M) + +struct cpl_tx_sec_pdu { + __be32 op_ivinsrtofst; + __be32 pldlen; + __be32 aadstart_cipherstop_hi; + __be32 cipherstop_lo_authinsert; + __be32 seqno_numivs; + __be32 ivgen_hdrlen; + __be64 scmd1; +}; + +#define CPL_TX_SEC_PDU_OPCODE_S 24 +#define CPL_TX_SEC_PDU_OPCODE_M 0xff +#define CPL_TX_SEC_PDU_OPCODE_V(x) ((x) << CPL_TX_SEC_PDU_OPCODE_S) +#define CPL_TX_SEC_PDU_OPCODE_G(x) \ + (((x) >> CPL_TX_SEC_PDU_OPCODE_S) & CPL_TX_SEC_PDU_OPCODE_M) + +/* RX Channel Id */ +#define CPL_TX_SEC_PDU_RXCHID_S 22 +#define CPL_TX_SEC_PDU_RXCHID_M 0x1 +#define CPL_TX_SEC_PDU_RXCHID_V(x) ((x) << CPL_TX_SEC_PDU_RXCHID_S) +#define CPL_TX_SEC_PDU_RXCHID_G(x) \ + (((x) >> CPL_TX_SEC_PDU_RXCHID_S) & CPL_TX_SEC_PDU_RXCHID_M) +#define CPL_TX_SEC_PDU_RXCHID_F CPL_TX_SEC_PDU_RXCHID_V(1U) + +/* Ack Follows */ +#define CPL_TX_SEC_PDU_ACKFOLLOWS_S 21 +#define CPL_TX_SEC_PDU_ACKFOLLOWS_M 0x1 +#define CPL_TX_SEC_PDU_ACKFOLLOWS_V(x) ((x) << CPL_TX_SEC_PDU_ACKFOLLOWS_S) +#define CPL_TX_SEC_PDU_ACKFOLLOWS_G(x) \ + (((x) >> CPL_TX_SEC_PDU_ACKFOLLOWS_S) & CPL_TX_SEC_PDU_ACKFOLLOWS_M) +#define CPL_TX_SEC_PDU_ACKFOLLOWS_F CPL_TX_SEC_PDU_ACKFOLLOWS_V(1U) + +/* Loopback bit in cpl_tx_sec_pdu */ +#define CPL_TX_SEC_PDU_ULPTXLPBK_S 20 +#define CPL_TX_SEC_PDU_ULPTXLPBK_M 0x1 +#define CPL_TX_SEC_PDU_ULPTXLPBK_V(x) ((x) << CPL_TX_SEC_PDU_ULPTXLPBK_S) +#define CPL_TX_SEC_PDU_ULPTXLPBK_G(x) \ + (((x) >> CPL_TX_SEC_PDU_ULPTXLPBK_S) & CPL_TX_SEC_PDU_ULPTXLPBK_M) +#define CPL_TX_SEC_PDU_ULPTXLPBK_F CPL_TX_SEC_PDU_ULPTXLPBK_V(1U) + +/* Length of cpl header encapsulated */ +#define CPL_TX_SEC_PDU_CPLLEN_S 16 +#define CPL_TX_SEC_PDU_CPLLEN_M 0xf +#define CPL_TX_SEC_PDU_CPLLEN_V(x) ((x) << CPL_TX_SEC_PDU_CPLLEN_S) +#define CPL_TX_SEC_PDU_CPLLEN_G(x) \ + (((x) >> CPL_TX_SEC_PDU_CPLLEN_S) & CPL_TX_SEC_PDU_CPLLEN_M) + +/* PlaceHolder */ +#define CPL_TX_SEC_PDU_PLACEHOLDER_S 10 +#define CPL_TX_SEC_PDU_PLACEHOLDER_M 0x1 +#define CPL_TX_SEC_PDU_PLACEHOLDER_V(x) ((x) << CPL_TX_SEC_PDU_PLACEHOLDER_S) +#define CPL_TX_SEC_PDU_PLACEHOLDER_G(x) \ + (((x) >> CPL_TX_SEC_PDU_PLACEHOLDER_S) & \ + CPL_TX_SEC_PDU_PLACEHOLDER_M) + +/* IvInsrtOffset: Insertion location for IV */ +#define CPL_TX_SEC_PDU_IVINSRTOFST_S 0 +#define CPL_TX_SEC_PDU_IVINSRTOFST_M 0x3ff +#define CPL_TX_SEC_PDU_IVINSRTOFST_V(x) ((x) << CPL_TX_SEC_PDU_IVINSRTOFST_S) +#define CPL_TX_SEC_PDU_IVINSRTOFST_G(x) \ + (((x) >> CPL_TX_SEC_PDU_IVINSRTOFST_S) & \ + CPL_TX_SEC_PDU_IVINSRTOFST_M) + +/* AadStartOffset: Offset in bytes for AAD start from + * the first byte following the pkt headers (0-255 bytes) + */ +#define CPL_TX_SEC_PDU_AADSTART_S 24 +#define CPL_TX_SEC_PDU_AADSTART_M 0xff +#define CPL_TX_SEC_PDU_AADSTART_V(x) ((x) << CPL_TX_SEC_PDU_AADSTART_S) +#define CPL_TX_SEC_PDU_AADSTART_G(x) \ + (((x) >> CPL_TX_SEC_PDU_AADSTART_S) & \ + CPL_TX_SEC_PDU_AADSTART_M) + +/* AadStopOffset: offset in bytes for AAD stop/end from the first byte following + * the pkt headers (0-511 bytes) + */ +#define CPL_TX_SEC_PDU_AADSTOP_S 15 +#define CPL_TX_SEC_PDU_AADSTOP_M 0x1ff +#define CPL_TX_SEC_PDU_AADSTOP_V(x) ((x) << CPL_TX_SEC_PDU_AADSTOP_S) +#define CPL_TX_SEC_PDU_AADSTOP_G(x) \ + (((x) >> CPL_TX_SEC_PDU_AADSTOP_S) & CPL_TX_SEC_PDU_AADSTOP_M) + +/* CipherStartOffset: offset in bytes for encryption/decryption start from the + * first byte following the pkt headers (0-1023 bytes) + */ +#define CPL_TX_SEC_PDU_CIPHERSTART_S 5 +#define CPL_TX_SEC_PDU_CIPHERSTART_M 0x3ff +#define CPL_TX_SEC_PDU_CIPHERSTART_V(x) ((x) << CPL_TX_SEC_PDU_CIPHERSTART_S) +#define CPL_TX_SEC_PDU_CIPHERSTART_G(x) \ + (((x) >> CPL_TX_SEC_PDU_CIPHERSTART_S) & \ + CPL_TX_SEC_PDU_CIPHERSTART_M) + +/* CipherStopOffset: offset in bytes for encryption/decryption end + * from end of the payload of this command (0-511 bytes) + */ +#define CPL_TX_SEC_PDU_CIPHERSTOP_HI_S 0 +#define CPL_TX_SEC_PDU_CIPHERSTOP_HI_M 0x1f +#define CPL_TX_SEC_PDU_CIPHERSTOP_HI_V(x) \ + ((x) << CPL_TX_SEC_PDU_CIPHERSTOP_HI_S) +#define CPL_TX_SEC_PDU_CIPHERSTOP_HI_G(x) \ + (((x) >> CPL_TX_SEC_PDU_CIPHERSTOP_HI_S) & \ + CPL_TX_SEC_PDU_CIPHERSTOP_HI_M) + +#define CPL_TX_SEC_PDU_CIPHERSTOP_LO_S 28 +#define CPL_TX_SEC_PDU_CIPHERSTOP_LO_M 0xf +#define CPL_TX_SEC_PDU_CIPHERSTOP_LO_V(x) \ + ((x) << CPL_TX_SEC_PDU_CIPHERSTOP_LO_S) +#define CPL_TX_SEC_PDU_CIPHERSTOP_LO_G(x) \ + (((x) >> CPL_TX_SEC_PDU_CIPHERSTOP_LO_S) & \ + CPL_TX_SEC_PDU_CIPHERSTOP_LO_M) + +/* AuthStartOffset: offset in bytes for authentication start from + * the first byte following the pkt headers (0-1023) + */ +#define CPL_TX_SEC_PDU_AUTHSTART_S 18 +#define CPL_TX_SEC_PDU_AUTHSTART_M 0x3ff +#define CPL_TX_SEC_PDU_AUTHSTART_V(x) ((x) << CPL_TX_SEC_PDU_AUTHSTART_S) +#define CPL_TX_SEC_PDU_AUTHSTART_G(x) \ + (((x) >> CPL_TX_SEC_PDU_AUTHSTART_S) & \ + CPL_TX_SEC_PDU_AUTHSTART_M) + +/* AuthStopOffset: offset in bytes for authentication + * end from end of the payload of this command (0-511 Bytes) + */ +#define CPL_TX_SEC_PDU_AUTHSTOP_S 9 +#define CPL_TX_SEC_PDU_AUTHSTOP_M 0x1ff +#define CPL_TX_SEC_PDU_AUTHSTOP_V(x) ((x) << CPL_TX_SEC_PDU_AUTHSTOP_S) +#define CPL_TX_SEC_PDU_AUTHSTOP_G(x) \ + (((x) >> CPL_TX_SEC_PDU_AUTHSTOP_S) & \ + CPL_TX_SEC_PDU_AUTHSTOP_M) + +/* AuthInsrtOffset: offset in bytes for authentication insertion + * from end of the payload of this command (0-511 bytes) + */ +#define CPL_TX_SEC_PDU_AUTHINSERT_S 0 +#define CPL_TX_SEC_PDU_AUTHINSERT_M 0x1ff +#define CPL_TX_SEC_PDU_AUTHINSERT_V(x) ((x) << CPL_TX_SEC_PDU_AUTHINSERT_S) +#define CPL_TX_SEC_PDU_AUTHINSERT_G(x) \ + (((x) >> CPL_TX_SEC_PDU_AUTHINSERT_S) & \ + CPL_TX_SEC_PDU_AUTHINSERT_M) + +struct cpl_rx_phys_dsgl { + __be32 op_to_tid; + __be32 pcirlxorder_to_noofsgentr; + struct rss_header rss_hdr_int; +}; + +#define CPL_RX_PHYS_DSGL_OPCODE_S 24 +#define CPL_RX_PHYS_DSGL_OPCODE_M 0xff +#define CPL_RX_PHYS_DSGL_OPCODE_V(x) ((x) << CPL_RX_PHYS_DSGL_OPCODE_S) +#define CPL_RX_PHYS_DSGL_OPCODE_G(x) \ + (((x) >> CPL_RX_PHYS_DSGL_OPCODE_S) & CPL_RX_PHYS_DSGL_OPCODE_M) + +#define CPL_RX_PHYS_DSGL_ISRDMA_S 23 +#define CPL_RX_PHYS_DSGL_ISRDMA_M 0x1 +#define CPL_RX_PHYS_DSGL_ISRDMA_V(x) ((x) << CPL_RX_PHYS_DSGL_ISRDMA_S) +#define CPL_RX_PHYS_DSGL_ISRDMA_G(x) \ + (((x) >> CPL_RX_PHYS_DSGL_ISRDMA_S) & CPL_RX_PHYS_DSGL_ISRDMA_M) +#define CPL_RX_PHYS_DSGL_ISRDMA_F CPL_RX_PHYS_DSGL_ISRDMA_V(1U) + +#define CPL_RX_PHYS_DSGL_RSVD1_S 20 +#define CPL_RX_PHYS_DSGL_RSVD1_M 0x7 +#define CPL_RX_PHYS_DSGL_RSVD1_V(x) ((x) << CPL_RX_PHYS_DSGL_RSVD1_S) +#define CPL_RX_PHYS_DSGL_RSVD1_G(x) \ + (((x) >> CPL_RX_PHYS_DSGL_RSVD1_S) & \ + CPL_RX_PHYS_DSGL_RSVD1_M) + +#define CPL_RX_PHYS_DSGL_PCIRLXORDER_S 31 +#define CPL_RX_PHYS_DSGL_PCIRLXORDER_M 0x1 +#define CPL_RX_PHYS_DSGL_PCIRLXORDER_V(x) \ + ((x) << CPL_RX_PHYS_DSGL_PCIRLXORDER_S) +#define CPL_RX_PHYS_DSGL_PCIRLXORDER_G(x) \ + (((x) >> CPL_RX_PHYS_DSGL_PCIRLXORDER_S) & \ + CPL_RX_PHYS_DSGL_PCIRLXORDER_M) +#define CPL_RX_PHYS_DSGL_PCIRLXORDER_F CPL_RX_PHYS_DSGL_PCIRLXORDER_V(1U) + +#define CPL_RX_PHYS_DSGL_PCINOSNOOP_S 30 +#define CPL_RX_PHYS_DSGL_PCINOSNOOP_M 0x1 +#define CPL_RX_PHYS_DSGL_PCINOSNOOP_V(x) \ + ((x) << CPL_RX_PHYS_DSGL_PCINOSNOOP_S) +#define CPL_RX_PHYS_DSGL_PCINOSNOOP_G(x) \ + (((x) >> CPL_RX_PHYS_DSGL_PCINOSNOOP_S) & \ + CPL_RX_PHYS_DSGL_PCINOSNOOP_M) + +#define CPL_RX_PHYS_DSGL_PCINOSNOOP_F CPL_RX_PHYS_DSGL_PCINOSNOOP_V(1U) + +#define CPL_RX_PHYS_DSGL_PCITPHNTENB_S 29 +#define CPL_RX_PHYS_DSGL_PCITPHNTENB_M 0x1 +#define CPL_RX_PHYS_DSGL_PCITPHNTENB_V(x) \ + ((x) << CPL_RX_PHYS_DSGL_PCITPHNTENB_S) +#define CPL_RX_PHYS_DSGL_PCITPHNTENB_G(x) \ + (((x) >> CPL_RX_PHYS_DSGL_PCITPHNTENB_S) & \ + CPL_RX_PHYS_DSGL_PCITPHNTENB_M) +#define CPL_RX_PHYS_DSGL_PCITPHNTENB_F CPL_RX_PHYS_DSGL_PCITPHNTENB_V(1U) + +#define CPL_RX_PHYS_DSGL_PCITPHNT_S 27 +#define CPL_RX_PHYS_DSGL_PCITPHNT_M 0x3 +#define CPL_RX_PHYS_DSGL_PCITPHNT_V(x) ((x) << CPL_RX_PHYS_DSGL_PCITPHNT_S) +#define CPL_RX_PHYS_DSGL_PCITPHNT_G(x) \ + (((x) >> CPL_RX_PHYS_DSGL_PCITPHNT_S) & \ + CPL_RX_PHYS_DSGL_PCITPHNT_M) + +#define CPL_RX_PHYS_DSGL_DCAID_S 16 +#define CPL_RX_PHYS_DSGL_DCAID_M 0x7ff +#define CPL_RX_PHYS_DSGL_DCAID_V(x) ((x) << CPL_RX_PHYS_DSGL_DCAID_S) +#define CPL_RX_PHYS_DSGL_DCAID_G(x) \ + (((x) >> CPL_RX_PHYS_DSGL_DCAID_S) & \ + CPL_RX_PHYS_DSGL_DCAID_M) + +#define CPL_RX_PHYS_DSGL_NOOFSGENTR_S 0 +#define CPL_RX_PHYS_DSGL_NOOFSGENTR_M 0xffff +#define CPL_RX_PHYS_DSGL_NOOFSGENTR_V(x) \ + ((x) << CPL_RX_PHYS_DSGL_NOOFSGENTR_S) +#define CPL_RX_PHYS_DSGL_NOOFSGENTR_G(x) \ + (((x) >> CPL_RX_PHYS_DSGL_NOOFSGENTR_S) & \ + CPL_RX_PHYS_DSGL_NOOFSGENTR_M) + #endif /* __T4_MSG_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index 6ea54c126407..d8f4adb9c7a6 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -102,6 +102,7 @@ enum fw_wr_opcodes { FW_RI_FR_NSMR_WR = 0x19, FW_RI_INV_LSTAG_WR = 0x1a, FW_ISCSI_TX_DATA_WR = 0x45, + FW_CRYPTO_LOOKASIDE_WR = 0X6d, FW_LASTC2E_WR = 0x70 }; @@ -3249,4 +3250,127 @@ struct fw_devlog_cmd { #define PCIE_FW_PF_DEVLOG_MEMTYPE_G(x) \ (((x) >> PCIE_FW_PF_DEVLOG_MEMTYPE_S) & PCIE_FW_PF_DEVLOG_MEMTYPE_M) +#define MAX_IMM_OFLD_TX_DATA_WR_LEN (0xff + sizeof(struct fw_ofld_tx_data_wr)) + +struct fw_crypto_lookaside_wr { + __be32 op_to_cctx_size; + __be32 len16_pkd; + __be32 session_id; + __be32 rx_chid_to_rx_q_id; + __be32 key_addr; + __be32 pld_size_hash_size; + __be64 cookie; +}; + +#define FW_CRYPTO_LOOKASIDE_WR_OPCODE_S 24 +#define FW_CRYPTO_LOOKASIDE_WR_OPCODE_M 0xff +#define FW_CRYPTO_LOOKASIDE_WR_OPCODE_V(x) \ + ((x) << FW_CRYPTO_LOOKASIDE_WR_OPCODE_S) +#define FW_CRYPTO_LOOKASIDE_WR_OPCODE_G(x) \ + (((x) >> FW_CRYPTO_LOOKASIDE_WR_OPCODE_S) & \ + FW_CRYPTO_LOOKASIDE_WR_OPCODE_M) + +#define FW_CRYPTO_LOOKASIDE_WR_COMPL_S 23 +#define FW_CRYPTO_LOOKASIDE_WR_COMPL_M 0x1 +#define FW_CRYPTO_LOOKASIDE_WR_COMPL_V(x) \ + ((x) << FW_CRYPTO_LOOKASIDE_WR_COMPL_S) +#define FW_CRYPTO_LOOKASIDE_WR_COMPL_G(x) \ + (((x) >> FW_CRYPTO_LOOKASIDE_WR_COMPL_S) & \ + FW_CRYPTO_LOOKASIDE_WR_COMPL_M) +#define FW_CRYPTO_LOOKASIDE_WR_COMPL_F FW_CRYPTO_LOOKASIDE_WR_COMPL_V(1U) + +#define FW_CRYPTO_LOOKASIDE_WR_IMM_LEN_S 15 +#define FW_CRYPTO_LOOKASIDE_WR_IMM_LEN_M 0xff +#define FW_CRYPTO_LOOKASIDE_WR_IMM_LEN_V(x) \ + ((x) << FW_CRYPTO_LOOKASIDE_WR_IMM_LEN_S) +#define FW_CRYPTO_LOOKASIDE_WR_IMM_LEN_G(x) \ + (((x) >> FW_CRYPTO_LOOKASIDE_WR_IMM_LEN_S) & \ + FW_CRYPTO_LOOKASIDE_WR_IMM_LEN_M) + +#define FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_S 5 +#define FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_M 0x3 +#define FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_V(x) \ + ((x) << FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_S) +#define FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_G(x) \ + (((x) >> FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_S) & \ + FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_M) + +#define FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_S 0 +#define FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_M 0x1f +#define FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_V(x) \ + ((x) << FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_S) +#define FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_G(x) \ + (((x) >> FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_S) & \ + FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_M) + +#define FW_CRYPTO_LOOKASIDE_WR_LEN16_S 0 +#define FW_CRYPTO_LOOKASIDE_WR_LEN16_M 0xff +#define FW_CRYPTO_LOOKASIDE_WR_LEN16_V(x) \ + ((x) << FW_CRYPTO_LOOKASIDE_WR_LEN16_S) +#define FW_CRYPTO_LOOKASIDE_WR_LEN16_G(x) \ + (((x) >> FW_CRYPTO_LOOKASIDE_WR_LEN16_S) & \ + FW_CRYPTO_LOOKASIDE_WR_LEN16_M) + +#define FW_CRYPTO_LOOKASIDE_WR_RX_CHID_S 29 +#define FW_CRYPTO_LOOKASIDE_WR_RX_CHID_M 0x3 +#define FW_CRYPTO_LOOKASIDE_WR_RX_CHID_V(x) \ + ((x) << FW_CRYPTO_LOOKASIDE_WR_RX_CHID_S) +#define FW_CRYPTO_LOOKASIDE_WR_RX_CHID_G(x) \ + (((x) >> FW_CRYPTO_LOOKASIDE_WR_RX_CHID_S) & \ + FW_CRYPTO_LOOKASIDE_WR_RX_CHID_M) + +#define FW_CRYPTO_LOOKASIDE_WR_LCB_S 27 +#define FW_CRYPTO_LOOKASIDE_WR_LCB_M 0x3 +#define FW_CRYPTO_LOOKASIDE_WR_LCB_V(x) \ + ((x) << FW_CRYPTO_LOOKASIDE_WR_LCB_S) +#define FW_CRYPTO_LOOKASIDE_WR_LCB_G(x) \ + (((x) >> FW_CRYPTO_LOOKASIDE_WR_LCB_S) & FW_CRYPTO_LOOKASIDE_WR_LCB_M) + +#define FW_CRYPTO_LOOKASIDE_WR_PHASH_S 25 +#define FW_CRYPTO_LOOKASIDE_WR_PHASH_M 0x3 +#define FW_CRYPTO_LOOKASIDE_WR_PHASH_V(x) \ + ((x) << FW_CRYPTO_LOOKASIDE_WR_PHASH_S) +#define FW_CRYPTO_LOOKASIDE_WR_PHASH_G(x) \ + (((x) >> FW_CRYPTO_LOOKASIDE_WR_PHASH_S) & \ + FW_CRYPTO_LOOKASIDE_WR_PHASH_M) + +#define FW_CRYPTO_LOOKASIDE_WR_IV_S 23 +#define FW_CRYPTO_LOOKASIDE_WR_IV_M 0x3 +#define FW_CRYPTO_LOOKASIDE_WR_IV_V(x) \ + ((x) << FW_CRYPTO_LOOKASIDE_WR_IV_S) +#define FW_CRYPTO_LOOKASIDE_WR_IV_G(x) \ + (((x) >> FW_CRYPTO_LOOKASIDE_WR_IV_S) & FW_CRYPTO_LOOKASIDE_WR_IV_M) + +#define FW_CRYPTO_LOOKASIDE_WR_TX_CH_S 10 +#define FW_CRYPTO_LOOKASIDE_WR_TX_CH_M 0x3 +#define FW_CRYPTO_LOOKASIDE_WR_TX_CH_V(x) \ + ((x) << FW_CRYPTO_LOOKASIDE_WR_TX_CH_S) +#define FW_CRYPTO_LOOKASIDE_WR_TX_CH_G(x) \ + (((x) >> FW_CRYPTO_LOOKASIDE_WR_TX_CH_S) & \ + FW_CRYPTO_LOOKASIDE_WR_TX_CH_M) + +#define FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_S 0 +#define FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_M 0x3ff +#define FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_V(x) \ + ((x) << FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_S) +#define FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_G(x) \ + (((x) >> FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_S) & \ + FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_M) + +#define FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_S 24 +#define FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_M 0xff +#define FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_V(x) \ + ((x) << FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_S) +#define FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_G(x) \ + (((x) >> FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_S) & \ + FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_M) + +#define FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_S 17 +#define FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_M 0x7f +#define FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_V(x) \ + ((x) << FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_S) +#define FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_G(x) \ + (((x) >> FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_S) & \ + FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_M) + #endif /* _T4FW_INTERFACE_H_ */ From 324429d74127d0cc8a1a42d20035f8f986149ec4 Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Wed, 17 Aug 2016 12:33:05 +0530 Subject: [PATCH 0329/1715] chcr: Support for Chelsio's Crypto Hardware The Chelsio's Crypto Hardware can perform the following operations: SHA1, SHA224, SHA256, SHA384 and SHA512, HMAC(SHA1), HMAC(SHA224), HMAC(SHA256), HMAC(SHA384), HAMC(SHA512), AES-128-CBC, AES-192-CBC, AES-256-CBC, AES-128-XTS, AES-256-XTS This patch implements the driver for above mentioned features. This driver is an Upper Layer Driver which is attached to Chelsio's LLD (cxgb4) and uses the queue allocated by the LLD for sending the crypto requests to the Hardware and receiving the responses from it. The crypto operations can be performed by Chelsio's hardware from the userspace applications and/or from within the kernel space using the kernel's crypto API. The above mentioned crypto features have been tested using kernel's tests mentioned in testmgr.h. They also have been tested from user space using libkcapi and Openssl. Signed-off-by: Atul Gupta Signed-off-by: Hariprasad Shenai Acked-by: Herbert Xu Signed-off-by: David S. Miller --- drivers/crypto/chelsio/chcr_algo.c | 1525 ++++++++++++++++++++++++++ drivers/crypto/chelsio/chcr_algo.h | 471 ++++++++ drivers/crypto/chelsio/chcr_core.c | 240 ++++ drivers/crypto/chelsio/chcr_core.h | 80 ++ drivers/crypto/chelsio/chcr_crypto.h | 203 ++++ 5 files changed, 2519 insertions(+) create mode 100644 drivers/crypto/chelsio/chcr_algo.c create mode 100644 drivers/crypto/chelsio/chcr_algo.h create mode 100644 drivers/crypto/chelsio/chcr_core.c create mode 100644 drivers/crypto/chelsio/chcr_core.h create mode 100644 drivers/crypto/chelsio/chcr_crypto.h diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c new file mode 100644 index 000000000000..ad8e353cf897 --- /dev/null +++ b/drivers/crypto/chelsio/chcr_algo.c @@ -0,0 +1,1525 @@ +/* + * This file is part of the Chelsio T6 Crypto driver for Linux. + * + * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Written and Maintained by: + * Manoj Malviya (manojmalviya@chelsio.com) + * Atul Gupta (atul.gupta@chelsio.com) + * Jitendra Lulla (jlulla@chelsio.com) + * Yeshaswi M R Gowda (yeshaswi@chelsio.com) + * Harsh Jain (harsh@chelsio.com) + */ + +#define pr_fmt(fmt) "chcr:" fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "t4fw_api.h" +#include "t4_msg.h" +#include "chcr_core.h" +#include "chcr_algo.h" +#include "chcr_crypto.h" + +static inline struct ablk_ctx *ABLK_CTX(struct chcr_context *ctx) +{ + return ctx->crypto_ctx->ablkctx; +} + +static inline struct hmac_ctx *HMAC_CTX(struct chcr_context *ctx) +{ + return ctx->crypto_ctx->hmacctx; +} + +static inline struct uld_ctx *ULD_CTX(struct chcr_context *ctx) +{ + return ctx->dev->u_ctx; +} + +static inline int is_ofld_imm(const struct sk_buff *skb) +{ + return (skb->len <= CRYPTO_MAX_IMM_TX_PKT_LEN); +} + +/* + * sgl_len - calculates the size of an SGL of the given capacity + * @n: the number of SGL entries + * Calculates the number of flits needed for a scatter/gather list that + * can hold the given number of entries. + */ +static inline unsigned int sgl_len(unsigned int n) +{ + n--; + return (3 * n) / 2 + (n & 1) + 2; +} + +/* + * chcr_handle_resp - Unmap the DMA buffers associated with the request + * @req: crypto request + */ +int chcr_handle_resp(struct crypto_async_request *req, unsigned char *input, + int error_status) +{ + struct crypto_tfm *tfm = req->tfm; + struct chcr_context *ctx = crypto_tfm_ctx(tfm); + struct uld_ctx *u_ctx = ULD_CTX(ctx); + struct chcr_req_ctx ctx_req; + struct cpl_fw6_pld *fw6_pld; + unsigned int digestsize, updated_digestsize; + + switch (tfm->__crt_alg->cra_flags & CRYPTO_ALG_TYPE_MASK) { + case CRYPTO_ALG_TYPE_BLKCIPHER: + ctx_req.req.ablk_req = (struct ablkcipher_request *)req; + ctx_req.ctx.ablk_ctx = + ablkcipher_request_ctx(ctx_req.req.ablk_req); + if (!error_status) { + fw6_pld = (struct cpl_fw6_pld *)input; + memcpy(ctx_req.req.ablk_req->info, &fw6_pld->data[2], + AES_BLOCK_SIZE); + } + dma_unmap_sg(&u_ctx->lldi.pdev->dev, ctx_req.req.ablk_req->dst, + ABLK_CTX(ctx)->dst_nents, DMA_FROM_DEVICE); + if (ctx_req.ctx.ablk_ctx->skb) { + kfree_skb(ctx_req.ctx.ablk_ctx->skb); + ctx_req.ctx.ablk_ctx->skb = NULL; + } + break; + + case CRYPTO_ALG_TYPE_AHASH: + ctx_req.req.ahash_req = (struct ahash_request *)req; + ctx_req.ctx.ahash_ctx = + ahash_request_ctx(ctx_req.req.ahash_req); + digestsize = + crypto_ahash_digestsize(crypto_ahash_reqtfm( + ctx_req.req.ahash_req)); + updated_digestsize = digestsize; + if (digestsize == SHA224_DIGEST_SIZE) + updated_digestsize = SHA256_DIGEST_SIZE; + else if (digestsize == SHA384_DIGEST_SIZE) + updated_digestsize = SHA512_DIGEST_SIZE; + if (ctx_req.ctx.ahash_ctx->skb) + ctx_req.ctx.ahash_ctx->skb = NULL; + if (ctx_req.ctx.ahash_ctx->result == 1) { + ctx_req.ctx.ahash_ctx->result = 0; + memcpy(ctx_req.req.ahash_req->result, input + + sizeof(struct cpl_fw6_pld), + digestsize); + } else { + memcpy(ctx_req.ctx.ahash_ctx->partial_hash, input + + sizeof(struct cpl_fw6_pld), + updated_digestsize); + } + kfree(ctx_req.ctx.ahash_ctx->dummy_payload_ptr); + ctx_req.ctx.ahash_ctx->dummy_payload_ptr = NULL; + break; + } + return 0; +} + +/* + * calc_tx_flits_ofld - calculate # of flits for an offload packet + * @skb: the packet + * Returns the number of flits needed for the given offload packet. + * These packets are already fully constructed and no additional headers + * will be added. + */ +static inline unsigned int calc_tx_flits_ofld(const struct sk_buff *skb) +{ + unsigned int flits, cnt; + + if (is_ofld_imm(skb)) + return DIV_ROUND_UP(skb->len, 8); + + flits = skb_transport_offset(skb) / 8; /* headers */ + cnt = skb_shinfo(skb)->nr_frags; + if (skb_tail_pointer(skb) != skb_transport_header(skb)) + cnt++; + return flits + sgl_len(cnt); +} + +static struct shash_desc *chcr_alloc_shash(unsigned int ds) +{ + struct crypto_shash *base_hash = NULL; + struct shash_desc *desc; + + switch (ds) { + case SHA1_DIGEST_SIZE: + base_hash = crypto_alloc_shash("sha1-generic", 0, 0); + break; + case SHA224_DIGEST_SIZE: + base_hash = crypto_alloc_shash("sha224-generic", 0, 0); + break; + case SHA256_DIGEST_SIZE: + base_hash = crypto_alloc_shash("sha256-generic", 0, 0); + break; + case SHA384_DIGEST_SIZE: + base_hash = crypto_alloc_shash("sha384-generic", 0, 0); + break; + case SHA512_DIGEST_SIZE: + base_hash = crypto_alloc_shash("sha512-generic", 0, 0); + break; + } + if (IS_ERR(base_hash)) { + pr_err("Can not allocate sha-generic algo.\n"); + return (void *)base_hash; + } + + desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(base_hash), + GFP_KERNEL); + if (!desc) + return ERR_PTR(-ENOMEM); + desc->tfm = base_hash; + desc->flags = crypto_shash_get_flags(base_hash); + return desc; +} + +static int chcr_compute_partial_hash(struct shash_desc *desc, + char *iopad, char *result_hash, + int digest_size) +{ + struct sha1_state sha1_st; + struct sha256_state sha256_st; + struct sha512_state sha512_st; + int error; + + if (digest_size == SHA1_DIGEST_SIZE) { + error = crypto_shash_init(desc) ?: + crypto_shash_update(desc, iopad, SHA1_BLOCK_SIZE) ?: + crypto_shash_export(desc, (void *)&sha1_st); + memcpy(result_hash, sha1_st.state, SHA1_DIGEST_SIZE); + } else if (digest_size == SHA224_DIGEST_SIZE) { + error = crypto_shash_init(desc) ?: + crypto_shash_update(desc, iopad, SHA256_BLOCK_SIZE) ?: + crypto_shash_export(desc, (void *)&sha256_st); + memcpy(result_hash, sha256_st.state, SHA256_DIGEST_SIZE); + + } else if (digest_size == SHA256_DIGEST_SIZE) { + error = crypto_shash_init(desc) ?: + crypto_shash_update(desc, iopad, SHA256_BLOCK_SIZE) ?: + crypto_shash_export(desc, (void *)&sha256_st); + memcpy(result_hash, sha256_st.state, SHA256_DIGEST_SIZE); + + } else if (digest_size == SHA384_DIGEST_SIZE) { + error = crypto_shash_init(desc) ?: + crypto_shash_update(desc, iopad, SHA512_BLOCK_SIZE) ?: + crypto_shash_export(desc, (void *)&sha512_st); + memcpy(result_hash, sha512_st.state, SHA512_DIGEST_SIZE); + + } else if (digest_size == SHA512_DIGEST_SIZE) { + error = crypto_shash_init(desc) ?: + crypto_shash_update(desc, iopad, SHA512_BLOCK_SIZE) ?: + crypto_shash_export(desc, (void *)&sha512_st); + memcpy(result_hash, sha512_st.state, SHA512_DIGEST_SIZE); + } else { + error = -EINVAL; + pr_err("Unknown digest size %d\n", digest_size); + } + return error; +} + +static void chcr_change_order(char *buf, int ds) +{ + int i; + + if (ds == SHA512_DIGEST_SIZE) { + for (i = 0; i < (ds / sizeof(u64)); i++) + *((__be64 *)buf + i) = + cpu_to_be64(*((u64 *)buf + i)); + } else { + for (i = 0; i < (ds / sizeof(u32)); i++) + *((__be32 *)buf + i) = + cpu_to_be32(*((u32 *)buf + i)); + } +} + +static inline int is_hmac(struct crypto_tfm *tfm) +{ + struct crypto_alg *alg = tfm->__crt_alg; + struct chcr_alg_template *chcr_crypto_alg = + container_of(__crypto_ahash_alg(alg), struct chcr_alg_template, + alg.hash); + if ((chcr_crypto_alg->type & CRYPTO_ALG_SUB_TYPE_MASK) == + CRYPTO_ALG_SUB_TYPE_HASH_HMAC) + return 1; + return 0; +} + +static inline unsigned int ch_nents(struct scatterlist *sg, + unsigned int *total_size) +{ + unsigned int nents; + + for (nents = 0, *total_size = 0; sg; sg = sg_next(sg)) { + nents++; + *total_size += sg->length; + } + return nents; +} + +static void write_phys_cpl(struct cpl_rx_phys_dsgl *phys_cpl, + struct scatterlist *sg, + struct phys_sge_parm *sg_param) +{ + struct phys_sge_pairs *to; + unsigned int out_buf_size = sg_param->obsize; + unsigned int nents = sg_param->nents, i, j, tot_len = 0; + + phys_cpl->op_to_tid = htonl(CPL_RX_PHYS_DSGL_OPCODE_V(CPL_RX_PHYS_DSGL) + | CPL_RX_PHYS_DSGL_ISRDMA_V(0)); + phys_cpl->pcirlxorder_to_noofsgentr = + htonl(CPL_RX_PHYS_DSGL_PCIRLXORDER_V(0) | + CPL_RX_PHYS_DSGL_PCINOSNOOP_V(0) | + CPL_RX_PHYS_DSGL_PCITPHNTENB_V(0) | + CPL_RX_PHYS_DSGL_PCITPHNT_V(0) | + CPL_RX_PHYS_DSGL_DCAID_V(0) | + CPL_RX_PHYS_DSGL_NOOFSGENTR_V(nents)); + phys_cpl->rss_hdr_int.opcode = CPL_RX_PHYS_ADDR; + phys_cpl->rss_hdr_int.qid = htons(sg_param->qid); + phys_cpl->rss_hdr_int.hash_val = 0; + to = (struct phys_sge_pairs *)((unsigned char *)phys_cpl + + sizeof(struct cpl_rx_phys_dsgl)); + + for (i = 0; nents; to++) { + for (j = i; (nents && (j < (8 + i))); j++, nents--) { + to->len[j] = htons(sg->length); + to->addr[j] = cpu_to_be64(sg_dma_address(sg)); + if (out_buf_size) { + if (tot_len + sg_dma_len(sg) >= out_buf_size) { + to->len[j] = htons(out_buf_size - + tot_len); + return; + } + tot_len += sg_dma_len(sg); + } + sg = sg_next(sg); + } + } +} + +static inline unsigned +int map_writesg_phys_cpl(struct device *dev, struct cpl_rx_phys_dsgl *phys_cpl, + struct scatterlist *sg, struct phys_sge_parm *sg_param) +{ + if (!sg || !sg_param->nents) + return 0; + + sg_param->nents = dma_map_sg(dev, sg, sg_param->nents, DMA_FROM_DEVICE); + if (sg_param->nents == 0) { + pr_err("CHCR : DMA mapping failed\n"); + return -EINVAL; + } + write_phys_cpl(phys_cpl, sg, sg_param); + return 0; +} + +static inline int get_cryptoalg_subtype(struct crypto_tfm *tfm) +{ + struct crypto_alg *alg = tfm->__crt_alg; + struct chcr_alg_template *chcr_crypto_alg = + container_of(alg, struct chcr_alg_template, alg.crypto); + + return chcr_crypto_alg->type & CRYPTO_ALG_SUB_TYPE_MASK; +} + +static inline void +write_sg_data_page_desc(struct sk_buff *skb, unsigned int *frags, + struct scatterlist *sg, unsigned int count) +{ + struct page *spage; + unsigned int page_len; + + skb->len += count; + skb->data_len += count; + skb->truesize += count; + while (count > 0) { + if (sg && (!(sg->length))) + break; + spage = sg_page(sg); + get_page(spage); + page_len = min(sg->length, count); + skb_fill_page_desc(skb, *frags, spage, sg->offset, page_len); + (*frags)++; + count -= page_len; + sg = sg_next(sg); + } +} + +static int generate_copy_rrkey(struct ablk_ctx *ablkctx, + struct _key_ctx *key_ctx) +{ + if (ablkctx->ciph_mode == CHCR_SCMD_CIPHER_MODE_AES_CBC) { + get_aes_decrypt_key(key_ctx->key, ablkctx->key, + ablkctx->enckey_len << 3); + memset(key_ctx->key + ablkctx->enckey_len, 0, + CHCR_AES_MAX_KEY_LEN - ablkctx->enckey_len); + } else { + memcpy(key_ctx->key, + ablkctx->key + (ablkctx->enckey_len >> 1), + ablkctx->enckey_len >> 1); + get_aes_decrypt_key(key_ctx->key + (ablkctx->enckey_len >> 1), + ablkctx->key, ablkctx->enckey_len << 2); + } + return 0; +} + +static inline void create_wreq(struct chcr_context *ctx, + struct fw_crypto_lookaside_wr *wreq, + void *req, struct sk_buff *skb, + int kctx_len, int hash_sz, + unsigned int phys_dsgl) +{ + struct uld_ctx *u_ctx = ULD_CTX(ctx); + struct ulp_txpkt *ulptx = (struct ulp_txpkt *)(wreq + 1); + struct ulptx_idata *sc_imm = (struct ulptx_idata *)(ulptx + 1); + int iv_loc = IV_DSGL; + int qid = u_ctx->lldi.rxq_ids[ctx->tx_channel_id]; + unsigned int immdatalen = 0, nr_frags = 0; + + if (is_ofld_imm(skb)) { + immdatalen = skb->data_len; + iv_loc = IV_IMMEDIATE; + } else { + nr_frags = skb_shinfo(skb)->nr_frags; + } + + wreq->op_to_cctx_size = FILL_WR_OP_CCTX_SIZE(immdatalen, + (kctx_len >> 4)); + wreq->pld_size_hash_size = + htonl(FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_V(sgl_lengths[nr_frags]) | + FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_V(hash_sz)); + wreq->len16_pkd = htonl(FW_CRYPTO_LOOKASIDE_WR_LEN16_V(DIV_ROUND_UP( + (calc_tx_flits_ofld(skb) * 8), 16))); + wreq->cookie = cpu_to_be64((uintptr_t)req); + wreq->rx_chid_to_rx_q_id = + FILL_WR_RX_Q_ID(ctx->dev->tx_channel_id, qid, + (hash_sz) ? IV_NOP : iv_loc); + + ulptx->cmd_dest = FILL_ULPTX_CMD_DEST(ctx->dev->tx_channel_id); + ulptx->len = htonl((DIV_ROUND_UP((calc_tx_flits_ofld(skb) * 8), + 16) - ((sizeof(*wreq)) >> 4))); + + sc_imm->cmd_more = FILL_CMD_MORE(immdatalen); + sc_imm->len = cpu_to_be32(sizeof(struct cpl_tx_sec_pdu) + kctx_len + + ((hash_sz) ? DUMMY_BYTES : + (sizeof(struct cpl_rx_phys_dsgl) + + phys_dsgl)) + immdatalen); +} + +/** + * create_cipher_wr - form the WR for cipher operations + * @req: cipher req. + * @ctx: crypto driver context of the request. + * @qid: ingress qid where response of this WR should be received. + * @op_type: encryption or decryption + */ +static struct sk_buff +*create_cipher_wr(struct crypto_async_request *req_base, + struct chcr_context *ctx, unsigned short qid, + unsigned short op_type) +{ + struct ablkcipher_request *req = (struct ablkcipher_request *)req_base; + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct uld_ctx *u_ctx = ULD_CTX(ctx); + struct ablk_ctx *ablkctx = ABLK_CTX(ctx); + struct sk_buff *skb = NULL; + struct _key_ctx *key_ctx; + struct fw_crypto_lookaside_wr *wreq; + struct cpl_tx_sec_pdu *sec_cpl; + struct cpl_rx_phys_dsgl *phys_cpl; + struct chcr_blkcipher_req_ctx *req_ctx = ablkcipher_request_ctx(req); + struct phys_sge_parm sg_param; + unsigned int frags = 0, transhdr_len, phys_dsgl, dst_bufsize = 0; + unsigned int ivsize = crypto_ablkcipher_ivsize(tfm), kctx_len; + + if (!req->info) + return ERR_PTR(-EINVAL); + ablkctx->dst_nents = ch_nents(req->dst, &dst_bufsize); + ablkctx->enc = op_type; + + if ((ablkctx->enckey_len == 0) || (ivsize > AES_BLOCK_SIZE) || + (req->nbytes <= 0) || (req->nbytes % AES_BLOCK_SIZE)) + return ERR_PTR(-EINVAL); + + phys_dsgl = get_space_for_phys_dsgl(ablkctx->dst_nents); + + kctx_len = sizeof(*key_ctx) + + (DIV_ROUND_UP(ablkctx->enckey_len, 16) * 16); + transhdr_len = CIPHER_TRANSHDR_SIZE(kctx_len, phys_dsgl); + skb = alloc_skb((transhdr_len + sizeof(struct sge_opaque_hdr)), + GFP_ATOMIC); + if (!skb) + return ERR_PTR(-ENOMEM); + skb_reserve(skb, sizeof(struct sge_opaque_hdr)); + wreq = (struct fw_crypto_lookaside_wr *)__skb_put(skb, transhdr_len); + + sec_cpl = (struct cpl_tx_sec_pdu *)((u8 *)wreq + SEC_CPL_OFFSET); + sec_cpl->op_ivinsrtofst = + FILL_SEC_CPL_OP_IVINSR(ctx->dev->tx_channel_id, 2, 1, 1); + + sec_cpl->pldlen = htonl(ivsize + req->nbytes); + sec_cpl->aadstart_cipherstop_hi = FILL_SEC_CPL_CIPHERSTOP_HI(0, 0, + ivsize + 1, 0); + + sec_cpl->cipherstop_lo_authinsert = FILL_SEC_CPL_AUTHINSERT(0, 0, + 0, 0); + sec_cpl->seqno_numivs = FILL_SEC_CPL_SCMD0_SEQNO(op_type, 0, + ablkctx->ciph_mode, + 0, 0, ivsize >> 1, 1); + sec_cpl->ivgen_hdrlen = FILL_SEC_CPL_IVGEN_HDRLEN(0, 0, 0, + 0, 1, phys_dsgl); + + key_ctx = (struct _key_ctx *)((u8 *)sec_cpl + sizeof(*sec_cpl)); + key_ctx->ctx_hdr = ablkctx->key_ctx_hdr; + if (op_type == CHCR_DECRYPT_OP) { + if (generate_copy_rrkey(ablkctx, key_ctx)) + goto map_fail1; + } else { + if (ablkctx->ciph_mode == CHCR_SCMD_CIPHER_MODE_AES_CBC) { + memcpy(key_ctx->key, ablkctx->key, ablkctx->enckey_len); + } else { + memcpy(key_ctx->key, ablkctx->key + + (ablkctx->enckey_len >> 1), + ablkctx->enckey_len >> 1); + memcpy(key_ctx->key + + (ablkctx->enckey_len >> 1), + ablkctx->key, + ablkctx->enckey_len >> 1); + } + } + phys_cpl = (struct cpl_rx_phys_dsgl *)((u8 *)key_ctx + kctx_len); + + memcpy(ablkctx->iv, req->info, ivsize); + sg_init_table(&ablkctx->iv_sg, 1); + sg_set_buf(&ablkctx->iv_sg, ablkctx->iv, ivsize); + sg_param.nents = ablkctx->dst_nents; + sg_param.obsize = dst_bufsize; + sg_param.qid = qid; + sg_param.align = 1; + if (map_writesg_phys_cpl(&u_ctx->lldi.pdev->dev, phys_cpl, req->dst, + &sg_param)) + goto map_fail1; + + skb_set_transport_header(skb, transhdr_len); + write_sg_data_page_desc(skb, &frags, &ablkctx->iv_sg, ivsize); + write_sg_data_page_desc(skb, &frags, req->src, req->nbytes); + create_wreq(ctx, wreq, req, skb, kctx_len, 0, phys_dsgl); + req_ctx->skb = skb; + skb_get(skb); + return skb; +map_fail1: + kfree_skb(skb); + return ERR_PTR(-ENOMEM); +} + +static int chcr_aes_cbc_setkey(struct crypto_ablkcipher *tfm, const u8 *key, + unsigned int keylen) +{ + struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm); + struct ablk_ctx *ablkctx = ABLK_CTX(ctx); + struct ablkcipher_alg *alg = crypto_ablkcipher_alg(tfm); + unsigned int ck_size, context_size; + u16 alignment = 0; + + if ((keylen < alg->min_keysize) || (keylen > alg->max_keysize)) + goto badkey_err; + + memcpy(ablkctx->key, key, keylen); + ablkctx->enckey_len = keylen; + if (keylen == AES_KEYSIZE_128) { + ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_128; + } else if (keylen == AES_KEYSIZE_192) { + alignment = 8; + ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_192; + } else if (keylen == AES_KEYSIZE_256) { + ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_256; + } else { + goto badkey_err; + } + + context_size = (KEY_CONTEXT_HDR_SALT_AND_PAD + + keylen + alignment) >> 4; + + ablkctx->key_ctx_hdr = FILL_KEY_CTX_HDR(ck_size, CHCR_KEYCTX_NO_KEY, + 0, 0, context_size); + ablkctx->ciph_mode = CHCR_SCMD_CIPHER_MODE_AES_CBC; + return 0; +badkey_err: + crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + ablkctx->enckey_len = 0; + return -EINVAL; +} + +int cxgb4_is_crypto_q_full(struct net_device *dev, unsigned int idx) +{ + int ret = 0; + struct sge_ofld_txq *q; + struct adapter *adap = netdev2adap(dev); + + local_bh_disable(); + q = &adap->sge.ofldtxq[idx]; + spin_lock(&q->sendq.lock); + if (q->full) + ret = -1; + spin_unlock(&q->sendq.lock); + local_bh_enable(); + return ret; +} + +static int chcr_aes_encrypt(struct ablkcipher_request *req) +{ + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm); + struct crypto_async_request *req_base = &req->base; + struct uld_ctx *u_ctx = ULD_CTX(ctx); + struct sk_buff *skb; + + if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0], + ctx->tx_channel_id))) { + if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) + return -EBUSY; + } + + skb = create_cipher_wr(req_base, ctx, + u_ctx->lldi.rxq_ids[ctx->tx_channel_id], + CHCR_ENCRYPT_OP); + if (IS_ERR(skb)) { + pr_err("chcr : %s : Failed to form WR. No memory\n", __func__); + return PTR_ERR(skb); + } + skb->dev = u_ctx->lldi.ports[0]; + set_wr_txq(skb, CPL_PRIORITY_DATA, ctx->tx_channel_id); + chcr_send_wr(skb); + return -EINPROGRESS; +} + +static int chcr_aes_decrypt(struct ablkcipher_request *req) +{ + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm); + struct crypto_async_request *req_base = &req->base; + struct uld_ctx *u_ctx = ULD_CTX(ctx); + struct sk_buff *skb; + + if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0], + ctx->tx_channel_id))) { + if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) + return -EBUSY; + } + + skb = create_cipher_wr(req_base, ctx, u_ctx->lldi.rxq_ids[0], + CHCR_DECRYPT_OP); + if (IS_ERR(skb)) { + pr_err("chcr : %s : Failed to form WR. No memory\n", __func__); + return PTR_ERR(skb); + } + skb->dev = u_ctx->lldi.ports[0]; + set_wr_txq(skb, CPL_PRIORITY_DATA, ctx->tx_channel_id); + chcr_send_wr(skb); + return -EINPROGRESS; +} + +static int chcr_device_init(struct chcr_context *ctx) +{ + struct uld_ctx *u_ctx; + unsigned int id; + int err = 0, rxq_perchan, rxq_idx; + + id = smp_processor_id(); + if (!ctx->dev) { + err = assign_chcr_device(&ctx->dev); + if (err) { + pr_err("chcr device assignment fails\n"); + goto out; + } + u_ctx = ULD_CTX(ctx); + rxq_perchan = u_ctx->lldi.nrxq / u_ctx->lldi.nchan; + ctx->dev->tx_channel_id = 0; + rxq_idx = ctx->dev->tx_channel_id * rxq_perchan; + rxq_idx += id % rxq_perchan; + spin_lock(&ctx->dev->lock_chcr_dev); + ctx->tx_channel_id = rxq_idx; + spin_unlock(&ctx->dev->lock_chcr_dev); + } +out: + return err; +} + +static int chcr_cra_init(struct crypto_tfm *tfm) +{ + tfm->crt_ablkcipher.reqsize = sizeof(struct chcr_blkcipher_req_ctx); + return chcr_device_init(crypto_tfm_ctx(tfm)); +} + +static int get_alg_config(struct algo_param *params, + unsigned int auth_size) +{ + switch (auth_size) { + case SHA1_DIGEST_SIZE: + params->mk_size = CHCR_KEYCTX_MAC_KEY_SIZE_160; + params->auth_mode = CHCR_SCMD_AUTH_MODE_SHA1; + params->result_size = SHA1_DIGEST_SIZE; + break; + case SHA224_DIGEST_SIZE: + params->mk_size = CHCR_KEYCTX_MAC_KEY_SIZE_256; + params->auth_mode = CHCR_SCMD_AUTH_MODE_SHA224; + params->result_size = SHA256_DIGEST_SIZE; + break; + case SHA256_DIGEST_SIZE: + params->mk_size = CHCR_KEYCTX_MAC_KEY_SIZE_256; + params->auth_mode = CHCR_SCMD_AUTH_MODE_SHA256; + params->result_size = SHA256_DIGEST_SIZE; + break; + case SHA384_DIGEST_SIZE: + params->mk_size = CHCR_KEYCTX_MAC_KEY_SIZE_512; + params->auth_mode = CHCR_SCMD_AUTH_MODE_SHA512_384; + params->result_size = SHA512_DIGEST_SIZE; + break; + case SHA512_DIGEST_SIZE: + params->mk_size = CHCR_KEYCTX_MAC_KEY_SIZE_512; + params->auth_mode = CHCR_SCMD_AUTH_MODE_SHA512_512; + params->result_size = SHA512_DIGEST_SIZE; + break; + default: + pr_err("chcr : ERROR, unsupported digest size\n"); + return -EINVAL; + } + return 0; +} + +static inline int +write_buffer_data_page_desc(struct chcr_ahash_req_ctx *req_ctx, + struct sk_buff *skb, unsigned int *frags, char *bfr, + u8 bfr_len) +{ + void *page_ptr = NULL; + + skb->len += bfr_len; + skb->data_len += bfr_len; + skb->truesize += bfr_len; + page_ptr = kmalloc(CHCR_HASH_MAX_BLOCK_SIZE_128, GFP_ATOMIC | GFP_DMA); + if (!page_ptr) + return -ENOMEM; + get_page(virt_to_page(page_ptr)); + req_ctx->dummy_payload_ptr = page_ptr; + memcpy(page_ptr, bfr, bfr_len); + skb_fill_page_desc(skb, *frags, virt_to_page(page_ptr), + offset_in_page(page_ptr), bfr_len); + (*frags)++; + return 0; +} + +/** + * create_final_hash_wr - Create hash work request + * @req - Cipher req base + */ +static struct sk_buff *create_final_hash_wr(struct ahash_request *req, + struct hash_wr_param *param) +{ + struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct chcr_context *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm)); + struct hmac_ctx *hmacctx = HMAC_CTX(ctx); + struct sk_buff *skb = NULL; + struct _key_ctx *key_ctx; + struct fw_crypto_lookaside_wr *wreq; + struct cpl_tx_sec_pdu *sec_cpl; + unsigned int frags = 0, transhdr_len, iopad_alignment = 0; + unsigned int digestsize = crypto_ahash_digestsize(tfm); + unsigned int kctx_len = sizeof(*key_ctx); + u8 hash_size_in_response = 0; + + iopad_alignment = KEYCTX_ALIGN_PAD(digestsize); + kctx_len += param->alg_prm.result_size + iopad_alignment; + if (param->opad_needed) + kctx_len += param->alg_prm.result_size + iopad_alignment; + + if (req_ctx->result) + hash_size_in_response = digestsize; + else + hash_size_in_response = param->alg_prm.result_size; + transhdr_len = HASH_TRANSHDR_SIZE(kctx_len); + skb = alloc_skb((transhdr_len + sizeof(struct sge_opaque_hdr)), + GFP_ATOMIC); + if (!skb) + return skb; + + skb_reserve(skb, sizeof(struct sge_opaque_hdr)); + wreq = (struct fw_crypto_lookaside_wr *)__skb_put(skb, transhdr_len); + memset(wreq, 0, transhdr_len); + + sec_cpl = (struct cpl_tx_sec_pdu *)((u8 *)wreq + SEC_CPL_OFFSET); + sec_cpl->op_ivinsrtofst = + FILL_SEC_CPL_OP_IVINSR(ctx->dev->tx_channel_id, 2, 0, 0); + sec_cpl->pldlen = htonl(param->bfr_len + param->sg_len); + + sec_cpl->aadstart_cipherstop_hi = + FILL_SEC_CPL_CIPHERSTOP_HI(0, 0, 0, 0); + sec_cpl->cipherstop_lo_authinsert = + FILL_SEC_CPL_AUTHINSERT(0, 1, 0, 0); + sec_cpl->seqno_numivs = + FILL_SEC_CPL_SCMD0_SEQNO(0, 0, 0, param->alg_prm.auth_mode, + param->opad_needed, 0, 0); + + sec_cpl->ivgen_hdrlen = + FILL_SEC_CPL_IVGEN_HDRLEN(param->last, param->more, 0, 1, 0, 0); + + key_ctx = (struct _key_ctx *)((u8 *)sec_cpl + sizeof(*sec_cpl)); + memcpy(key_ctx->key, req_ctx->partial_hash, param->alg_prm.result_size); + + if (param->opad_needed) + memcpy(key_ctx->key + ((param->alg_prm.result_size <= 32) ? 32 : + CHCR_HASH_MAX_DIGEST_SIZE), + hmacctx->opad, param->alg_prm.result_size); + + key_ctx->ctx_hdr = FILL_KEY_CTX_HDR(CHCR_KEYCTX_NO_KEY, + param->alg_prm.mk_size, 0, + param->opad_needed, + (kctx_len >> 4)); + sec_cpl->scmd1 = cpu_to_be64((u64)param->scmd1); + + skb_set_transport_header(skb, transhdr_len); + if (param->bfr_len != 0) + write_buffer_data_page_desc(req_ctx, skb, &frags, req_ctx->bfr, + param->bfr_len); + if (param->sg_len != 0) + write_sg_data_page_desc(skb, &frags, req->src, param->sg_len); + + create_wreq(ctx, wreq, req, skb, kctx_len, hash_size_in_response, + 0); + req_ctx->skb = skb; + skb_get(skb); + return skb; +} + +static int chcr_ahash_update(struct ahash_request *req) +{ + struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req); + struct crypto_ahash *rtfm = crypto_ahash_reqtfm(req); + struct chcr_context *ctx = crypto_tfm_ctx(crypto_ahash_tfm(rtfm)); + struct uld_ctx *u_ctx = NULL; + struct sk_buff *skb; + u8 remainder = 0, bs; + unsigned int nbytes = req->nbytes; + struct hash_wr_param params; + + bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm)); + + u_ctx = ULD_CTX(ctx); + if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0], + ctx->tx_channel_id))) { + if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) + return -EBUSY; + } + + if (nbytes + req_ctx->bfr_len >= bs) { + remainder = (nbytes + req_ctx->bfr_len) % bs; + nbytes = nbytes + req_ctx->bfr_len - remainder; + } else { + sg_pcopy_to_buffer(req->src, sg_nents(req->src), req_ctx->bfr + + req_ctx->bfr_len, nbytes, 0); + req_ctx->bfr_len += nbytes; + return 0; + } + + params.opad_needed = 0; + params.more = 1; + params.last = 0; + params.sg_len = nbytes - req_ctx->bfr_len; + params.bfr_len = req_ctx->bfr_len; + params.scmd1 = 0; + get_alg_config(¶ms.alg_prm, crypto_ahash_digestsize(rtfm)); + req_ctx->result = 0; + req_ctx->data_len += params.sg_len + params.bfr_len; + skb = create_final_hash_wr(req, ¶ms); + if (!skb) + return -ENOMEM; + + req_ctx->bfr_len = remainder; + if (remainder) + sg_pcopy_to_buffer(req->src, sg_nents(req->src), + req_ctx->bfr, remainder, req->nbytes - + remainder); + skb->dev = u_ctx->lldi.ports[0]; + set_wr_txq(skb, CPL_PRIORITY_DATA, ctx->tx_channel_id); + chcr_send_wr(skb); + + return -EINPROGRESS; +} + +static void create_last_hash_block(char *bfr_ptr, unsigned int bs, u64 scmd1) +{ + memset(bfr_ptr, 0, bs); + *bfr_ptr = 0x80; + if (bs == 64) + *(__be64 *)(bfr_ptr + 56) = cpu_to_be64(scmd1 << 3); + else + *(__be64 *)(bfr_ptr + 120) = cpu_to_be64(scmd1 << 3); +} + +static int chcr_ahash_final(struct ahash_request *req) +{ + struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req); + struct crypto_ahash *rtfm = crypto_ahash_reqtfm(req); + struct chcr_context *ctx = crypto_tfm_ctx(crypto_ahash_tfm(rtfm)); + struct hash_wr_param params; + struct sk_buff *skb; + struct uld_ctx *u_ctx = NULL; + u8 bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm)); + + u_ctx = ULD_CTX(ctx); + if (is_hmac(crypto_ahash_tfm(rtfm))) + params.opad_needed = 1; + else + params.opad_needed = 0; + params.sg_len = 0; + get_alg_config(¶ms.alg_prm, crypto_ahash_digestsize(rtfm)); + req_ctx->result = 1; + params.bfr_len = req_ctx->bfr_len; + req_ctx->data_len += params.bfr_len + params.sg_len; + if (req_ctx->bfr && (req_ctx->bfr_len == 0)) { + create_last_hash_block(req_ctx->bfr, bs, req_ctx->data_len); + params.last = 0; + params.more = 1; + params.scmd1 = 0; + params.bfr_len = bs; + + } else { + params.scmd1 = req_ctx->data_len; + params.last = 1; + params.more = 0; + } + skb = create_final_hash_wr(req, ¶ms); + skb->dev = u_ctx->lldi.ports[0]; + set_wr_txq(skb, CPL_PRIORITY_DATA, ctx->tx_channel_id); + chcr_send_wr(skb); + return -EINPROGRESS; +} + +static int chcr_ahash_finup(struct ahash_request *req) +{ + struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req); + struct crypto_ahash *rtfm = crypto_ahash_reqtfm(req); + struct chcr_context *ctx = crypto_tfm_ctx(crypto_ahash_tfm(rtfm)); + struct uld_ctx *u_ctx = NULL; + struct sk_buff *skb; + struct hash_wr_param params; + u8 bs; + + bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm)); + u_ctx = ULD_CTX(ctx); + + if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0], + ctx->tx_channel_id))) { + if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) + return -EBUSY; + } + + if (is_hmac(crypto_ahash_tfm(rtfm))) + params.opad_needed = 1; + else + params.opad_needed = 0; + + params.sg_len = req->nbytes; + params.bfr_len = req_ctx->bfr_len; + get_alg_config(¶ms.alg_prm, crypto_ahash_digestsize(rtfm)); + req_ctx->data_len += params.bfr_len + params.sg_len; + req_ctx->result = 1; + if (req_ctx->bfr && (req_ctx->bfr_len + req->nbytes) == 0) { + create_last_hash_block(req_ctx->bfr, bs, req_ctx->data_len); + params.last = 0; + params.more = 1; + params.scmd1 = 0; + params.bfr_len = bs; + } else { + params.scmd1 = req_ctx->data_len; + params.last = 1; + params.more = 0; + } + + skb = create_final_hash_wr(req, ¶ms); + if (!skb) + return -ENOMEM; + skb->dev = u_ctx->lldi.ports[0]; + set_wr_txq(skb, CPL_PRIORITY_DATA, ctx->tx_channel_id); + chcr_send_wr(skb); + + return -EINPROGRESS; +} + +static int chcr_ahash_digest(struct ahash_request *req) +{ + struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req); + struct crypto_ahash *rtfm = crypto_ahash_reqtfm(req); + struct chcr_context *ctx = crypto_tfm_ctx(crypto_ahash_tfm(rtfm)); + struct uld_ctx *u_ctx = NULL; + struct sk_buff *skb; + struct hash_wr_param params; + u8 bs; + + rtfm->init(req); + bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm)); + + u_ctx = ULD_CTX(ctx); + if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0], + ctx->tx_channel_id))) { + if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) + return -EBUSY; + } + + if (is_hmac(crypto_ahash_tfm(rtfm))) + params.opad_needed = 1; + else + params.opad_needed = 0; + + params.last = 0; + params.more = 0; + params.sg_len = req->nbytes; + params.bfr_len = 0; + params.scmd1 = 0; + get_alg_config(¶ms.alg_prm, crypto_ahash_digestsize(rtfm)); + req_ctx->result = 1; + req_ctx->data_len += params.bfr_len + params.sg_len; + + if (req_ctx->bfr && req->nbytes == 0) { + create_last_hash_block(req_ctx->bfr, bs, 0); + params.more = 1; + params.bfr_len = bs; + } + + skb = create_final_hash_wr(req, ¶ms); + if (!skb) + return -ENOMEM; + + skb->dev = u_ctx->lldi.ports[0]; + set_wr_txq(skb, CPL_PRIORITY_DATA, ctx->tx_channel_id); + chcr_send_wr(skb); + return -EINPROGRESS; +} + +static int chcr_ahash_export(struct ahash_request *areq, void *out) +{ + struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(areq); + struct chcr_ahash_req_ctx *state = out; + + state->bfr_len = req_ctx->bfr_len; + state->data_len = req_ctx->data_len; + memcpy(state->bfr, req_ctx->bfr, CHCR_HASH_MAX_BLOCK_SIZE_128); + memcpy(state->partial_hash, req_ctx->partial_hash, + CHCR_HASH_MAX_DIGEST_SIZE); + return 0; +} + +static int chcr_ahash_import(struct ahash_request *areq, const void *in) +{ + struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(areq); + struct chcr_ahash_req_ctx *state = (struct chcr_ahash_req_ctx *)in; + + req_ctx->bfr_len = state->bfr_len; + req_ctx->data_len = state->data_len; + req_ctx->dummy_payload_ptr = NULL; + memcpy(req_ctx->bfr, state->bfr, CHCR_HASH_MAX_BLOCK_SIZE_128); + memcpy(req_ctx->partial_hash, state->partial_hash, + CHCR_HASH_MAX_DIGEST_SIZE); + return 0; +} + +static int chcr_ahash_setkey(struct crypto_ahash *tfm, const u8 *key, + unsigned int keylen) +{ + struct chcr_context *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm)); + struct hmac_ctx *hmacctx = HMAC_CTX(ctx); + unsigned int digestsize = crypto_ahash_digestsize(tfm); + unsigned int bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm)); + unsigned int i, err = 0, updated_digestsize; + + /* + * use the key to calculate the ipad and opad. ipad will sent with the + * first request's data. opad will be sent with the final hash result + * ipad in hmacctx->ipad and opad in hmacctx->opad location + */ + if (!hmacctx->desc) + return -EINVAL; + if (keylen > bs) { + err = crypto_shash_digest(hmacctx->desc, key, keylen, + hmacctx->ipad); + if (err) + goto out; + keylen = digestsize; + } else { + memcpy(hmacctx->ipad, key, keylen); + } + memset(hmacctx->ipad + keylen, 0, bs - keylen); + memcpy(hmacctx->opad, hmacctx->ipad, bs); + + for (i = 0; i < bs / sizeof(int); i++) { + *((unsigned int *)(&hmacctx->ipad) + i) ^= IPAD_DATA; + *((unsigned int *)(&hmacctx->opad) + i) ^= OPAD_DATA; + } + + updated_digestsize = digestsize; + if (digestsize == SHA224_DIGEST_SIZE) + updated_digestsize = SHA256_DIGEST_SIZE; + else if (digestsize == SHA384_DIGEST_SIZE) + updated_digestsize = SHA512_DIGEST_SIZE; + err = chcr_compute_partial_hash(hmacctx->desc, hmacctx->ipad, + hmacctx->ipad, digestsize); + if (err) + goto out; + chcr_change_order(hmacctx->ipad, updated_digestsize); + + err = chcr_compute_partial_hash(hmacctx->desc, hmacctx->opad, + hmacctx->opad, digestsize); + if (err) + goto out; + chcr_change_order(hmacctx->opad, updated_digestsize); +out: + return err; +} + +static int chcr_aes_xts_setkey(struct crypto_ablkcipher *tfm, const u8 *key, + unsigned int key_len) +{ + struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm); + struct ablk_ctx *ablkctx = ABLK_CTX(ctx); + int status = 0; + unsigned short context_size = 0; + + if ((key_len == (AES_KEYSIZE_128 << 1)) || + (key_len == (AES_KEYSIZE_256 << 1))) { + memcpy(ablkctx->key, key, key_len); + ablkctx->enckey_len = key_len; + context_size = (KEY_CONTEXT_HDR_SALT_AND_PAD + key_len) >> 4; + ablkctx->key_ctx_hdr = + FILL_KEY_CTX_HDR((key_len == AES_KEYSIZE_256) ? + CHCR_KEYCTX_CIPHER_KEY_SIZE_128 : + CHCR_KEYCTX_CIPHER_KEY_SIZE_256, + CHCR_KEYCTX_NO_KEY, 1, + 0, context_size); + ablkctx->ciph_mode = CHCR_SCMD_CIPHER_MODE_AES_XTS; + } else { + crypto_tfm_set_flags((struct crypto_tfm *)tfm, + CRYPTO_TFM_RES_BAD_KEY_LEN); + ablkctx->enckey_len = 0; + status = -EINVAL; + } + return status; +} + +static int chcr_sha_init(struct ahash_request *areq) +{ + struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(areq); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); + int digestsize = crypto_ahash_digestsize(tfm); + + req_ctx->data_len = 0; + req_ctx->dummy_payload_ptr = NULL; + req_ctx->bfr_len = 0; + req_ctx->skb = NULL; + req_ctx->result = 0; + copy_hash_init_values(req_ctx->partial_hash, digestsize); + return 0; +} + +static int chcr_sha_cra_init(struct crypto_tfm *tfm) +{ + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), + sizeof(struct chcr_ahash_req_ctx)); + return chcr_device_init(crypto_tfm_ctx(tfm)); +} + +static int chcr_hmac_init(struct ahash_request *areq) +{ + struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(areq); + struct crypto_ahash *rtfm = crypto_ahash_reqtfm(areq); + struct chcr_context *ctx = crypto_tfm_ctx(crypto_ahash_tfm(rtfm)); + struct hmac_ctx *hmacctx = HMAC_CTX(ctx); + unsigned int digestsize = crypto_ahash_digestsize(rtfm); + unsigned int bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm)); + + chcr_sha_init(areq); + req_ctx->data_len = bs; + if (is_hmac(crypto_ahash_tfm(rtfm))) { + if (digestsize == SHA224_DIGEST_SIZE) + memcpy(req_ctx->partial_hash, hmacctx->ipad, + SHA256_DIGEST_SIZE); + else if (digestsize == SHA384_DIGEST_SIZE) + memcpy(req_ctx->partial_hash, hmacctx->ipad, + SHA512_DIGEST_SIZE); + else + memcpy(req_ctx->partial_hash, hmacctx->ipad, + digestsize); + } + return 0; +} + +static int chcr_hmac_cra_init(struct crypto_tfm *tfm) +{ + struct chcr_context *ctx = crypto_tfm_ctx(tfm); + struct hmac_ctx *hmacctx = HMAC_CTX(ctx); + unsigned int digestsize = + crypto_ahash_digestsize(__crypto_ahash_cast(tfm)); + + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), + sizeof(struct chcr_ahash_req_ctx)); + hmacctx->desc = chcr_alloc_shash(digestsize); + if (IS_ERR(hmacctx->desc)) + return PTR_ERR(hmacctx->desc); + return chcr_device_init(crypto_tfm_ctx(tfm)); +} + +static void chcr_free_shash(struct shash_desc *desc) +{ + crypto_free_shash(desc->tfm); + kfree(desc); +} + +static void chcr_hmac_cra_exit(struct crypto_tfm *tfm) +{ + struct chcr_context *ctx = crypto_tfm_ctx(tfm); + struct hmac_ctx *hmacctx = HMAC_CTX(ctx); + + if (hmacctx->desc) { + chcr_free_shash(hmacctx->desc); + hmacctx->desc = NULL; + } +} + +static struct chcr_alg_template driver_algs[] = { + /* AES-CBC */ + { + .type = CRYPTO_ALG_TYPE_ABLKCIPHER, + .is_registered = 0, + .alg.crypto = { + .cra_name = "cbc(aes)", + .cra_driver_name = "cbc(aes-chcr)", + .cra_priority = CHCR_CRA_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | + CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct chcr_context) + + sizeof(struct ablk_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = chcr_cra_init, + .cra_exit = NULL, + .cra_u.ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = chcr_aes_cbc_setkey, + .encrypt = chcr_aes_encrypt, + .decrypt = chcr_aes_decrypt, + } + } + }, + { + .type = CRYPTO_ALG_TYPE_ABLKCIPHER, + .is_registered = 0, + .alg.crypto = { + .cra_name = "xts(aes)", + .cra_driver_name = "xts(aes-chcr)", + .cra_priority = CHCR_CRA_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | + CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct chcr_context) + + sizeof(struct ablk_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = chcr_cra_init, + .cra_exit = NULL, + .cra_u = { + .ablkcipher = { + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = chcr_aes_xts_setkey, + .encrypt = chcr_aes_encrypt, + .decrypt = chcr_aes_decrypt, + } + } + } + }, + /* SHA */ + { + .type = CRYPTO_ALG_TYPE_AHASH, + .is_registered = 0, + .alg.hash = { + .halg.digestsize = SHA1_DIGEST_SIZE, + .halg.base = { + .cra_name = "sha1", + .cra_driver_name = "sha1-chcr", + .cra_blocksize = SHA1_BLOCK_SIZE, + } + } + }, + { + .type = CRYPTO_ALG_TYPE_AHASH, + .is_registered = 0, + .alg.hash = { + .halg.digestsize = SHA256_DIGEST_SIZE, + .halg.base = { + .cra_name = "sha256", + .cra_driver_name = "sha256-chcr", + .cra_blocksize = SHA256_BLOCK_SIZE, + } + } + }, + { + .type = CRYPTO_ALG_TYPE_AHASH, + .is_registered = 0, + .alg.hash = { + .halg.digestsize = SHA224_DIGEST_SIZE, + .halg.base = { + .cra_name = "sha224", + .cra_driver_name = "sha224-chcr", + .cra_blocksize = SHA224_BLOCK_SIZE, + } + } + }, + { + .type = CRYPTO_ALG_TYPE_AHASH, + .is_registered = 0, + .alg.hash = { + .halg.digestsize = SHA384_DIGEST_SIZE, + .halg.base = { + .cra_name = "sha384", + .cra_driver_name = "sha384-chcr", + .cra_blocksize = SHA384_BLOCK_SIZE, + } + } + }, + { + .type = CRYPTO_ALG_TYPE_AHASH, + .is_registered = 0, + .alg.hash = { + .halg.digestsize = SHA512_DIGEST_SIZE, + .halg.base = { + .cra_name = "sha512", + .cra_driver_name = "sha512-chcr", + .cra_blocksize = SHA512_BLOCK_SIZE, + } + } + }, + /* HMAC */ + { + .type = CRYPTO_ALG_TYPE_HMAC, + .is_registered = 0, + .alg.hash = { + .halg.digestsize = SHA1_DIGEST_SIZE, + .halg.base = { + .cra_name = "hmac(sha1)", + .cra_driver_name = "hmac(sha1-chcr)", + .cra_blocksize = SHA1_BLOCK_SIZE, + } + } + }, + { + .type = CRYPTO_ALG_TYPE_HMAC, + .is_registered = 0, + .alg.hash = { + .halg.digestsize = SHA224_DIGEST_SIZE, + .halg.base = { + .cra_name = "hmac(sha224)", + .cra_driver_name = "hmac(sha224-chcr)", + .cra_blocksize = SHA224_BLOCK_SIZE, + } + } + }, + { + .type = CRYPTO_ALG_TYPE_HMAC, + .is_registered = 0, + .alg.hash = { + .halg.digestsize = SHA256_DIGEST_SIZE, + .halg.base = { + .cra_name = "hmac(sha256)", + .cra_driver_name = "hmac(sha256-chcr)", + .cra_blocksize = SHA256_BLOCK_SIZE, + } + } + }, + { + .type = CRYPTO_ALG_TYPE_HMAC, + .is_registered = 0, + .alg.hash = { + .halg.digestsize = SHA384_DIGEST_SIZE, + .halg.base = { + .cra_name = "hmac(sha384)", + .cra_driver_name = "hmac(sha384-chcr)", + .cra_blocksize = SHA384_BLOCK_SIZE, + } + } + }, + { + .type = CRYPTO_ALG_TYPE_HMAC, + .is_registered = 0, + .alg.hash = { + .halg.digestsize = SHA512_DIGEST_SIZE, + .halg.base = { + .cra_name = "hmac(sha512)", + .cra_driver_name = "hmac(sha512-chcr)", + .cra_blocksize = SHA512_BLOCK_SIZE, + } + } + }, +}; + +/* + * chcr_unregister_alg - Deregister crypto algorithms with + * kernel framework. + */ +static int chcr_unregister_alg(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(driver_algs); i++) { + switch (driver_algs[i].type & CRYPTO_ALG_TYPE_MASK) { + case CRYPTO_ALG_TYPE_ABLKCIPHER: + if (driver_algs[i].is_registered) + crypto_unregister_alg( + &driver_algs[i].alg.crypto); + break; + case CRYPTO_ALG_TYPE_AHASH: + if (driver_algs[i].is_registered) + crypto_unregister_ahash( + &driver_algs[i].alg.hash); + break; + } + driver_algs[i].is_registered = 0; + } + return 0; +} + +#define SZ_AHASH_CTX sizeof(struct chcr_context) +#define SZ_AHASH_H_CTX (sizeof(struct chcr_context) + sizeof(struct hmac_ctx)) +#define SZ_AHASH_REQ_CTX sizeof(struct chcr_ahash_req_ctx) +#define AHASH_CRA_FLAGS (CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC) + +/* + * chcr_register_alg - Register crypto algorithms with kernel framework. + */ +static int chcr_register_alg(void) +{ + struct crypto_alg ai; + struct ahash_alg *a_hash; + int err = 0, i; + char *name = NULL; + + for (i = 0; i < ARRAY_SIZE(driver_algs); i++) { + if (driver_algs[i].is_registered) + continue; + switch (driver_algs[i].type & CRYPTO_ALG_TYPE_MASK) { + case CRYPTO_ALG_TYPE_ABLKCIPHER: + err = crypto_register_alg(&driver_algs[i].alg.crypto); + name = driver_algs[i].alg.crypto.cra_driver_name; + break; + case CRYPTO_ALG_TYPE_AHASH: + a_hash = &driver_algs[i].alg.hash; + a_hash->update = chcr_ahash_update; + a_hash->final = chcr_ahash_final; + a_hash->finup = chcr_ahash_finup; + a_hash->digest = chcr_ahash_digest; + a_hash->export = chcr_ahash_export; + a_hash->import = chcr_ahash_import; + a_hash->halg.statesize = SZ_AHASH_REQ_CTX; + a_hash->halg.base.cra_priority = CHCR_CRA_PRIORITY; + a_hash->halg.base.cra_module = THIS_MODULE; + a_hash->halg.base.cra_flags = AHASH_CRA_FLAGS; + a_hash->halg.base.cra_alignmask = 0; + a_hash->halg.base.cra_exit = NULL; + a_hash->halg.base.cra_type = &crypto_ahash_type; + + if (driver_algs[i].type == CRYPTO_ALG_TYPE_HMAC) { + a_hash->halg.base.cra_init = chcr_hmac_cra_init; + a_hash->halg.base.cra_exit = chcr_hmac_cra_exit; + a_hash->init = chcr_hmac_init; + a_hash->setkey = chcr_ahash_setkey; + a_hash->halg.base.cra_ctxsize = SZ_AHASH_H_CTX; + } else { + a_hash->init = chcr_sha_init; + a_hash->halg.base.cra_ctxsize = SZ_AHASH_CTX; + a_hash->halg.base.cra_init = chcr_sha_cra_init; + } + err = crypto_register_ahash(&driver_algs[i].alg.hash); + ai = driver_algs[i].alg.hash.halg.base; + name = ai.cra_driver_name; + break; + } + if (err) { + pr_err("chcr : %s : Algorithm registration failed\n", + name); + goto register_err; + } else { + driver_algs[i].is_registered = 1; + } + } + return 0; + +register_err: + chcr_unregister_alg(); + return err; +} + +/* + * start_crypto - Register the crypto algorithms. + * This should called once when the first device comesup. After this + * kernel will start calling driver APIs for crypto operations. + */ +int start_crypto(void) +{ + return chcr_register_alg(); +} + +/* + * stop_crypto - Deregister all the crypto algorithms with kernel. + * This should be called once when the last device goes down. After this + * kernel will not call the driver API for crypto operations. + */ +int stop_crypto(void) +{ + chcr_unregister_alg(); + return 0; +} diff --git a/drivers/crypto/chelsio/chcr_algo.h b/drivers/crypto/chelsio/chcr_algo.h new file mode 100644 index 000000000000..ec64fbcdeb49 --- /dev/null +++ b/drivers/crypto/chelsio/chcr_algo.h @@ -0,0 +1,471 @@ +/* + * This file is part of the Chelsio T6 Crypto driver for Linux. + * + * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __CHCR_ALGO_H__ +#define __CHCR_ALGO_H__ + +/* Crypto key context */ +#define KEY_CONTEXT_CTX_LEN_S 24 +#define KEY_CONTEXT_CTX_LEN_M 0xff +#define KEY_CONTEXT_CTX_LEN_V(x) ((x) << KEY_CONTEXT_CTX_LEN_S) +#define KEY_CONTEXT_CTX_LEN_G(x) \ + (((x) >> KEY_CONTEXT_CTX_LEN_S) & KEY_CONTEXT_CTX_LEN_M) + +#define KEY_CONTEXT_DUAL_CK_S 12 +#define KEY_CONTEXT_DUAL_CK_M 0x1 +#define KEY_CONTEXT_DUAL_CK_V(x) ((x) << KEY_CONTEXT_DUAL_CK_S) +#define KEY_CONTEXT_DUAL_CK_G(x) \ +(((x) >> KEY_CONTEXT_DUAL_CK_S) & KEY_CONTEXT_DUAL_CK_M) +#define KEY_CONTEXT_DUAL_CK_F KEY_CONTEXT_DUAL_CK_V(1U) + +#define KEY_CONTEXT_SALT_PRESENT_S 10 +#define KEY_CONTEXT_SALT_PRESENT_M 0x1 +#define KEY_CONTEXT_SALT_PRESENT_V(x) ((x) << KEY_CONTEXT_SALT_PRESENT_S) +#define KEY_CONTEXT_SALT_PRESENT_G(x) \ + (((x) >> KEY_CONTEXT_SALT_PRESENT_S) & \ + KEY_CONTEXT_SALT_PRESENT_M) +#define KEY_CONTEXT_SALT_PRESENT_F KEY_CONTEXT_SALT_PRESENT_V(1U) + +#define KEY_CONTEXT_VALID_S 0 +#define KEY_CONTEXT_VALID_M 0x1 +#define KEY_CONTEXT_VALID_V(x) ((x) << KEY_CONTEXT_VALID_S) +#define KEY_CONTEXT_VALID_G(x) \ + (((x) >> KEY_CONTEXT_VALID_S) & \ + KEY_CONTEXT_VALID_M) +#define KEY_CONTEXT_VALID_F KEY_CONTEXT_VALID_V(1U) + +#define KEY_CONTEXT_CK_SIZE_S 6 +#define KEY_CONTEXT_CK_SIZE_M 0xf +#define KEY_CONTEXT_CK_SIZE_V(x) ((x) << KEY_CONTEXT_CK_SIZE_S) +#define KEY_CONTEXT_CK_SIZE_G(x) \ + (((x) >> KEY_CONTEXT_CK_SIZE_S) & KEY_CONTEXT_CK_SIZE_M) + +#define KEY_CONTEXT_MK_SIZE_S 2 +#define KEY_CONTEXT_MK_SIZE_M 0xf +#define KEY_CONTEXT_MK_SIZE_V(x) ((x) << KEY_CONTEXT_MK_SIZE_S) +#define KEY_CONTEXT_MK_SIZE_G(x) \ + (((x) >> KEY_CONTEXT_MK_SIZE_S) & KEY_CONTEXT_MK_SIZE_M) + +#define KEY_CONTEXT_OPAD_PRESENT_S 11 +#define KEY_CONTEXT_OPAD_PRESENT_M 0x1 +#define KEY_CONTEXT_OPAD_PRESENT_V(x) ((x) << KEY_CONTEXT_OPAD_PRESENT_S) +#define KEY_CONTEXT_OPAD_PRESENT_G(x) \ + (((x) >> KEY_CONTEXT_OPAD_PRESENT_S) & \ + KEY_CONTEXT_OPAD_PRESENT_M) +#define KEY_CONTEXT_OPAD_PRESENT_F KEY_CONTEXT_OPAD_PRESENT_V(1U) + +#define CHCR_HASH_MAX_DIGEST_SIZE 64 +#define CHCR_MAX_SHA_DIGEST_SIZE 64 + +#define IPSEC_TRUNCATED_ICV_SIZE 12 +#define TLS_TRUNCATED_HMAC_SIZE 10 +#define CBCMAC_DIGEST_SIZE 16 +#define MAX_HASH_NAME 20 + +#define SHA1_INIT_STATE_5X4B 5 +#define SHA256_INIT_STATE_8X4B 8 +#define SHA512_INIT_STATE_8X8B 8 +#define SHA1_INIT_STATE SHA1_INIT_STATE_5X4B +#define SHA224_INIT_STATE SHA256_INIT_STATE_8X4B +#define SHA256_INIT_STATE SHA256_INIT_STATE_8X4B +#define SHA384_INIT_STATE SHA512_INIT_STATE_8X8B +#define SHA512_INIT_STATE SHA512_INIT_STATE_8X8B + +#define DUMMY_BYTES 16 + +#define IPAD_DATA 0x36363636 +#define OPAD_DATA 0x5c5c5c5c + +#define TRANSHDR_SIZE(alignedkctx_len)\ + (sizeof(struct ulptx_idata) +\ + sizeof(struct ulp_txpkt) +\ + sizeof(struct fw_crypto_lookaside_wr) +\ + sizeof(struct cpl_tx_sec_pdu) +\ + (alignedkctx_len)) +#define CIPHER_TRANSHDR_SIZE(alignedkctx_len, sge_pairs) \ + (TRANSHDR_SIZE(alignedkctx_len) + sge_pairs +\ + sizeof(struct cpl_rx_phys_dsgl)) +#define HASH_TRANSHDR_SIZE(alignedkctx_len)\ + (TRANSHDR_SIZE(alignedkctx_len) + DUMMY_BYTES) + +#define SEC_CPL_OFFSET (sizeof(struct fw_crypto_lookaside_wr) + \ + sizeof(struct ulp_txpkt) + \ + sizeof(struct ulptx_idata)) + +#define FILL_SEC_CPL_OP_IVINSR(id, len, hldr, ofst) \ + htonl( \ + CPL_TX_SEC_PDU_OPCODE_V(CPL_TX_SEC_PDU) | \ + CPL_TX_SEC_PDU_RXCHID_V((id)) | \ + CPL_TX_SEC_PDU_ACKFOLLOWS_V(0) | \ + CPL_TX_SEC_PDU_ULPTXLPBK_V(1) | \ + CPL_TX_SEC_PDU_CPLLEN_V((len)) | \ + CPL_TX_SEC_PDU_PLACEHOLDER_V((hldr)) | \ + CPL_TX_SEC_PDU_IVINSRTOFST_V((ofst))) + +#define FILL_SEC_CPL_CIPHERSTOP_HI(a_start, a_stop, c_start, c_stop_hi) \ + htonl( \ + CPL_TX_SEC_PDU_AADSTART_V((a_start)) | \ + CPL_TX_SEC_PDU_AADSTOP_V((a_stop)) | \ + CPL_TX_SEC_PDU_CIPHERSTART_V((c_start)) | \ + CPL_TX_SEC_PDU_CIPHERSTOP_HI_V((c_stop_hi))) + +#define FILL_SEC_CPL_AUTHINSERT(c_stop_lo, a_start, a_stop, a_inst) \ + htonl( \ + CPL_TX_SEC_PDU_CIPHERSTOP_LO_V((c_stop_lo)) | \ + CPL_TX_SEC_PDU_AUTHSTART_V((a_start)) | \ + CPL_TX_SEC_PDU_AUTHSTOP_V((a_stop)) | \ + CPL_TX_SEC_PDU_AUTHINSERT_V((a_inst))) + +#define FILL_SEC_CPL_SCMD0_SEQNO(ctrl, seq, cmode, amode, opad, size, nivs) \ + htonl( \ + SCMD_SEQ_NO_CTRL_V(0) | \ + SCMD_STATUS_PRESENT_V(0) | \ + SCMD_PROTO_VERSION_V(CHCR_SCMD_PROTO_VERSION_GENERIC) | \ + SCMD_ENC_DEC_CTRL_V((ctrl)) | \ + SCMD_CIPH_AUTH_SEQ_CTRL_V((seq)) | \ + SCMD_CIPH_MODE_V((cmode)) | \ + SCMD_AUTH_MODE_V((amode)) | \ + SCMD_HMAC_CTRL_V((opad)) | \ + SCMD_IV_SIZE_V((size)) | \ + SCMD_NUM_IVS_V((nivs))) + +#define FILL_SEC_CPL_IVGEN_HDRLEN(last, more, ctx_in, mac, ivdrop, len) htonl( \ + SCMD_ENB_DBGID_V(0) | \ + SCMD_IV_GEN_CTRL_V(0) | \ + SCMD_LAST_FRAG_V((last)) | \ + SCMD_MORE_FRAGS_V((more)) | \ + SCMD_TLS_COMPPDU_V(0) | \ + SCMD_KEY_CTX_INLINE_V((ctx_in)) | \ + SCMD_TLS_FRAG_ENABLE_V(0) | \ + SCMD_MAC_ONLY_V((mac)) | \ + SCMD_AADIVDROP_V((ivdrop)) | \ + SCMD_HDR_LEN_V((len))) + +#define FILL_KEY_CTX_HDR(ck_size, mk_size, d_ck, opad, ctx_len) \ + htonl(KEY_CONTEXT_VALID_V(1) | \ + KEY_CONTEXT_CK_SIZE_V((ck_size)) | \ + KEY_CONTEXT_MK_SIZE_V(mk_size) | \ + KEY_CONTEXT_DUAL_CK_V((d_ck)) | \ + KEY_CONTEXT_OPAD_PRESENT_V((opad)) | \ + KEY_CONTEXT_SALT_PRESENT_V(1) | \ + KEY_CONTEXT_CTX_LEN_V((ctx_len))) + +#define FILL_WR_OP_CCTX_SIZE(len, ctx_len) \ + htonl( \ + FW_CRYPTO_LOOKASIDE_WR_OPCODE_V( \ + FW_CRYPTO_LOOKASIDE_WR) | \ + FW_CRYPTO_LOOKASIDE_WR_COMPL_V(0) | \ + FW_CRYPTO_LOOKASIDE_WR_IMM_LEN_V((len)) | \ + FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_V(1) | \ + FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_V((ctx_len))) + +#define FILL_WR_RX_Q_ID(cid, qid, wr_iv) \ + htonl( \ + FW_CRYPTO_LOOKASIDE_WR_RX_CHID_V((cid)) | \ + FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_V((qid)) | \ + FW_CRYPTO_LOOKASIDE_WR_LCB_V(0) | \ + FW_CRYPTO_LOOKASIDE_WR_IV_V((wr_iv))) + +#define FILL_ULPTX_CMD_DEST(cid) \ + htonl(ULPTX_CMD_V(ULP_TX_PKT) | \ + ULP_TXPKT_DEST_V(0) | \ + ULP_TXPKT_DATAMODIFY_V(0) | \ + ULP_TXPKT_CHANNELID_V((cid)) | \ + ULP_TXPKT_RO_V(1) | \ + ULP_TXPKT_FID_V(0)) + +#define KEYCTX_ALIGN_PAD(bs) ({unsigned int _bs = (bs);\ + _bs == SHA1_DIGEST_SIZE ? 12 : 0; }) + +#define FILL_PLD_SIZE_HASH_SIZE(payload_sgl_len, sgl_lengths, total_frags) \ + htonl(FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_V(payload_sgl_len ? \ + sgl_lengths[total_frags] : 0) |\ + FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_V(0)) + +#define FILL_LEN_PKD(calc_tx_flits_ofld, skb) \ + htonl(FW_CRYPTO_LOOKASIDE_WR_LEN16_V(DIV_ROUND_UP((\ + calc_tx_flits_ofld(skb) * 8), 16))) + +#define FILL_CMD_MORE(immdatalen) htonl(ULPTX_CMD_V(ULP_TX_SC_IMM) |\ + ULP_TX_SC_MORE_V((immdatalen) ? 0 : 1)) + +#define MAX_NK 8 +#define CRYPTO_MAX_IMM_TX_PKT_LEN 256 + +struct algo_param { + unsigned int auth_mode; + unsigned int mk_size; + unsigned int result_size; +}; + +struct hash_wr_param { + unsigned int opad_needed; + unsigned int more; + unsigned int last; + struct algo_param alg_prm; + unsigned int sg_len; + unsigned int bfr_len; + u64 scmd1; +}; + +enum { + AES_KEYLENGTH_128BIT = 128, + AES_KEYLENGTH_192BIT = 192, + AES_KEYLENGTH_256BIT = 256 +}; + +enum { + KEYLENGTH_3BYTES = 3, + KEYLENGTH_4BYTES = 4, + KEYLENGTH_6BYTES = 6, + KEYLENGTH_8BYTES = 8 +}; + +enum { + NUMBER_OF_ROUNDS_10 = 10, + NUMBER_OF_ROUNDS_12 = 12, + NUMBER_OF_ROUNDS_14 = 14, +}; + +/* + * CCM defines values of 4, 6, 8, 10, 12, 14, and 16 octets, + * where they indicate the size of the integrity check value (ICV) + */ +enum { + AES_CCM_ICV_4 = 4, + AES_CCM_ICV_6 = 6, + AES_CCM_ICV_8 = 8, + AES_CCM_ICV_10 = 10, + AES_CCM_ICV_12 = 12, + AES_CCM_ICV_14 = 14, + AES_CCM_ICV_16 = 16 +}; + +struct hash_op_params { + unsigned char mk_size; + unsigned char pad_align; + unsigned char auth_mode; + char hash_name[MAX_HASH_NAME]; + unsigned short block_size; + unsigned short word_size; + unsigned short ipad_size; +}; + +struct phys_sge_pairs { + __be16 len[8]; + __be64 addr[8]; +}; + +struct phys_sge_parm { + unsigned int nents; + unsigned int obsize; + unsigned short qid; + unsigned char align; +}; + +struct crypto_result { + struct completion completion; + int err; +}; + +static const u32 sha1_init[SHA1_DIGEST_SIZE / 4] = { + SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4, +}; + +static const u32 sha224_init[SHA256_DIGEST_SIZE / 4] = { + SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3, + SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7, +}; + +static const u32 sha256_init[SHA256_DIGEST_SIZE / 4] = { + SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, + SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7, +}; + +static const u64 sha384_init[SHA512_DIGEST_SIZE / 8] = { + SHA384_H0, SHA384_H1, SHA384_H2, SHA384_H3, + SHA384_H4, SHA384_H5, SHA384_H6, SHA384_H7, +}; + +static const u64 sha512_init[SHA512_DIGEST_SIZE / 8] = { + SHA512_H0, SHA512_H1, SHA512_H2, SHA512_H3, + SHA512_H4, SHA512_H5, SHA512_H6, SHA512_H7, +}; + +static inline void copy_hash_init_values(char *key, int digestsize) +{ + u8 i; + __be32 *dkey = (__be32 *)key; + u64 *ldkey = (u64 *)key; + __be64 *sha384 = (__be64 *)sha384_init; + __be64 *sha512 = (__be64 *)sha512_init; + + switch (digestsize) { + case SHA1_DIGEST_SIZE: + for (i = 0; i < SHA1_INIT_STATE; i++) + dkey[i] = cpu_to_be32(sha1_init[i]); + break; + case SHA224_DIGEST_SIZE: + for (i = 0; i < SHA224_INIT_STATE; i++) + dkey[i] = cpu_to_be32(sha224_init[i]); + break; + case SHA256_DIGEST_SIZE: + for (i = 0; i < SHA256_INIT_STATE; i++) + dkey[i] = cpu_to_be32(sha256_init[i]); + break; + case SHA384_DIGEST_SIZE: + for (i = 0; i < SHA384_INIT_STATE; i++) + ldkey[i] = be64_to_cpu(sha384[i]); + break; + case SHA512_DIGEST_SIZE: + for (i = 0; i < SHA512_INIT_STATE; i++) + ldkey[i] = be64_to_cpu(sha512[i]); + break; + } +} + +static const u8 sgl_lengths[20] = { + 0, 1, 2, 3, 4, 4, 5, 6, 7, 7, 8, 9, 10, 10, 11, 12, 13, 13, 14, 15 +}; + +/* Number of len fields(8) * size of one addr field */ +#define PHYSDSGL_MAX_LEN_SIZE 16 + +static inline u16 get_space_for_phys_dsgl(unsigned int sgl_entr) +{ + /* len field size + addr field size */ + return ((sgl_entr >> 3) + ((sgl_entr % 8) ? + 1 : 0)) * PHYSDSGL_MAX_LEN_SIZE + + (sgl_entr << 3) + ((sgl_entr % 2 ? 1 : 0) << 3); +} + +/* The AES s-transform matrix (s-box). */ +static const u8 aes_sbox[256] = { + 99, 124, 119, 123, 242, 107, 111, 197, 48, 1, 103, 43, 254, 215, + 171, 118, 202, 130, 201, 125, 250, 89, 71, 240, 173, 212, 162, 175, + 156, 164, 114, 192, 183, 253, 147, 38, 54, 63, 247, 204, 52, 165, + 229, 241, 113, 216, 49, 21, 4, 199, 35, 195, 24, 150, 5, 154, 7, + 18, 128, 226, 235, 39, 178, 117, 9, 131, 44, 26, 27, 110, 90, + 160, 82, 59, 214, 179, 41, 227, 47, 132, 83, 209, 0, 237, 32, + 252, 177, 91, 106, 203, 190, 57, 74, 76, 88, 207, 208, 239, 170, + 251, 67, 77, 51, 133, 69, 249, 2, 127, 80, 60, 159, 168, 81, + 163, 64, 143, 146, 157, 56, 245, 188, 182, 218, 33, 16, 255, 243, + 210, 205, 12, 19, 236, 95, 151, 68, 23, 196, 167, 126, 61, 100, + 93, 25, 115, 96, 129, 79, 220, 34, 42, 144, 136, 70, 238, 184, + 20, 222, 94, 11, 219, 224, 50, 58, 10, 73, 6, 36, 92, 194, + 211, 172, 98, 145, 149, 228, 121, 231, 200, 55, 109, 141, 213, 78, + 169, 108, 86, 244, 234, 101, 122, 174, 8, 186, 120, 37, 46, 28, 166, + 180, 198, 232, 221, 116, 31, 75, 189, 139, 138, 112, 62, 181, 102, + 72, 3, 246, 14, 97, 53, 87, 185, 134, 193, 29, 158, 225, 248, + 152, 17, 105, 217, 142, 148, 155, 30, 135, 233, 206, 85, 40, 223, + 140, 161, 137, 13, 191, 230, 66, 104, 65, 153, 45, 15, 176, 84, + 187, 22 +}; + +static u32 aes_ks_subword(const u32 w) +{ + u8 bytes[4]; + + *(u32 *)(&bytes[0]) = w; + bytes[0] = aes_sbox[bytes[0]]; + bytes[1] = aes_sbox[bytes[1]]; + bytes[2] = aes_sbox[bytes[2]]; + bytes[3] = aes_sbox[bytes[3]]; + return *(u32 *)(&bytes[0]); +} + +static u32 round_constant[11] = { + 0x01000000, 0x02000000, 0x04000000, 0x08000000, + 0x10000000, 0x20000000, 0x40000000, 0x80000000, + 0x1B000000, 0x36000000, 0x6C000000 +}; + +/* dec_key - OUTPUT - Reverse round key + * key - INPUT - key + * keylength - INPUT - length of the key in number of bits + */ +static inline void get_aes_decrypt_key(unsigned char *dec_key, + const unsigned char *key, + unsigned int keylength) +{ + u32 temp; + u32 w_ring[MAX_NK]; + int i, j, k = 0; + u8 nr, nk; + + switch (keylength) { + case AES_KEYLENGTH_128BIT: + nk = KEYLENGTH_4BYTES; + nr = NUMBER_OF_ROUNDS_10; + break; + + case AES_KEYLENGTH_192BIT: + nk = KEYLENGTH_6BYTES; + nr = NUMBER_OF_ROUNDS_12; + break; + case AES_KEYLENGTH_256BIT: + nk = KEYLENGTH_8BYTES; + nr = NUMBER_OF_ROUNDS_14; + break; + default: + return; + } + for (i = 0; i < nk; i++ ) + w_ring[i] = be32_to_cpu(*(u32 *)&key[4 * i]); + + i = 0; + temp = w_ring[nk - 1]; + while(i + nk < (nr + 1) * 4) { + if(!(i % nk)) { + /* RotWord(temp) */ + temp = (temp << 8) | (temp >> 24); + temp = aes_ks_subword(temp); + temp ^= round_constant[i / nk]; + } + else if (nk == 8 && (i % 4 == 0)) + temp = aes_ks_subword(temp); + w_ring[i % nk] ^= temp; + temp = w_ring[i % nk]; + i++; + } + for (k = 0, j = i % nk; k < nk; k++) { + *((u32 *)dec_key + k) = htonl(w_ring[j]); + j--; + if(j < 0) + j += nk; + } +} + +#endif /* __CHCR_ALGO_H__ */ diff --git a/drivers/crypto/chelsio/chcr_core.c b/drivers/crypto/chelsio/chcr_core.c new file mode 100644 index 000000000000..2f6156b672ce --- /dev/null +++ b/drivers/crypto/chelsio/chcr_core.c @@ -0,0 +1,240 @@ +/** + * This file is part of the Chelsio T4/T5/T6 Ethernet driver for Linux. + * + * Copyright (C) 2011-2016 Chelsio Communications. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation. + * + * Written and Maintained by: + * Manoj Malviya (manojmalviya@chelsio.com) + * Atul Gupta (atul.gupta@chelsio.com) + * Jitendra Lulla (jlulla@chelsio.com) + * Yeshaswi M R Gowda (yeshaswi@chelsio.com) + * Harsh Jain (harsh@chelsio.com) + */ + +#include +#include +#include + +#include +#include + +#include "t4_msg.h" +#include "chcr_core.h" +#include "cxgb4_uld.h" + +static LIST_HEAD(uld_ctx_list); +static DEFINE_MUTEX(dev_mutex); +static atomic_t dev_count; + +typedef int (*chcr_handler_func)(struct chcr_dev *dev, unsigned char *input); +static int cpl_fw6_pld_handler(struct chcr_dev *dev, unsigned char *input); +static void *chcr_uld_add(const struct cxgb4_lld_info *lld); +static int chcr_uld_state_change(void *handle, enum cxgb4_state state); + +static chcr_handler_func work_handlers[NUM_CPL_CMDS] = { + [CPL_FW6_PLD] = cpl_fw6_pld_handler, +}; + +static struct cxgb4_pci_uld_info chcr_uld_info = { + .name = DRV_MODULE_NAME, + .nrxq = 4, + .rxq_size = 1024, + .nciq = 0, + .ciq_size = 0, + .add = chcr_uld_add, + .state_change = chcr_uld_state_change, + .rx_handler = chcr_uld_rx_handler, +}; + +int assign_chcr_device(struct chcr_dev **dev) +{ + struct uld_ctx *u_ctx; + + /* + * Which device to use if multiple devices are available TODO + * May be select the device based on round robin. One session + * must go to the same device to maintain the ordering. + */ + mutex_lock(&dev_mutex); /* TODO ? */ + u_ctx = list_first_entry(&uld_ctx_list, struct uld_ctx, entry); + if (!u_ctx) { + mutex_unlock(&dev_mutex); + return -ENXIO; + } + + *dev = u_ctx->dev; + mutex_unlock(&dev_mutex); + return 0; +} + +static int chcr_dev_add(struct uld_ctx *u_ctx) +{ + struct chcr_dev *dev; + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return -ENXIO; + + spin_lock_init(&dev->lock_chcr_dev); + u_ctx->dev = dev; + dev->u_ctx = u_ctx; + atomic_inc(&dev_count); + return 0; +} + +static int chcr_dev_remove(struct uld_ctx *u_ctx) +{ + kfree(u_ctx->dev); + u_ctx->dev = NULL; + atomic_dec(&dev_count); + return 0; +} + +static int cpl_fw6_pld_handler(struct chcr_dev *dev, + unsigned char *input) +{ + struct crypto_async_request *req; + struct cpl_fw6_pld *fw6_pld; + u32 ack_err_status = 0; + int error_status = 0; + + fw6_pld = (struct cpl_fw6_pld *)input; + req = (struct crypto_async_request *)(uintptr_t)be64_to_cpu( + fw6_pld->data[1]); + + ack_err_status = + ntohl(*(__be32 *)((unsigned char *)&fw6_pld->data[0] + 4)); + if (ack_err_status) { + if (CHK_MAC_ERR_BIT(ack_err_status) || + CHK_PAD_ERR_BIT(ack_err_status)) + error_status = -EINVAL; + } + /* call completion callback with failure status */ + if (req) { + if (!chcr_handle_resp(req, input, error_status)) + req->complete(req, error_status); + else + return -EINVAL; + } else { + pr_err("Incorrect request address from the firmware\n"); + return -EFAULT; + } + return 0; +} + +int chcr_send_wr(struct sk_buff *skb) +{ + return cxgb4_ofld_send(skb->dev, skb); +} + +static void *chcr_uld_add(const struct cxgb4_lld_info *lld) +{ + struct uld_ctx *u_ctx; + + /* Create the device and add it in the device list */ + u_ctx = kzalloc(sizeof(*u_ctx), GFP_KERNEL); + if (!u_ctx) { + u_ctx = ERR_PTR(-ENOMEM); + goto out; + } + u_ctx->lldi = *lld; + mutex_lock(&dev_mutex); + list_add_tail(&u_ctx->entry, &uld_ctx_list); + mutex_unlock(&dev_mutex); +out: + return u_ctx; +} + +int chcr_uld_rx_handler(void *handle, const __be64 *rsp, + const struct pkt_gl *pgl) +{ + struct uld_ctx *u_ctx = (struct uld_ctx *)handle; + struct chcr_dev *dev = u_ctx->dev; + const struct cpl_act_establish *rpl = (struct cpl_act_establish + *)rsp; + + if (rpl->ot.opcode != CPL_FW6_PLD) { + pr_err("Unsupported opcode\n"); + return 0; + } + + if (!pgl) + work_handlers[rpl->ot.opcode](dev, (unsigned char *)&rsp[1]); + else + work_handlers[rpl->ot.opcode](dev, pgl->va); + return 0; +} + +static int chcr_uld_state_change(void *handle, enum cxgb4_state state) +{ + struct uld_ctx *u_ctx = handle; + int ret = 0; + + switch (state) { + case CXGB4_STATE_UP: + if (!u_ctx->dev) { + ret = chcr_dev_add(u_ctx); + if (ret != 0) + return ret; + } + if (atomic_read(&dev_count) == 1) + ret = start_crypto(); + break; + + case CXGB4_STATE_DETACH: + if (u_ctx->dev) { + mutex_lock(&dev_mutex); + chcr_dev_remove(u_ctx); + mutex_unlock(&dev_mutex); + } + if (!atomic_read(&dev_count)) + stop_crypto(); + break; + + case CXGB4_STATE_START_RECOVERY: + case CXGB4_STATE_DOWN: + default: + break; + } + return ret; +} + +static int __init chcr_crypto_init(void) +{ + if (cxgb4_register_pci_uld(CXGB4_PCI_ULD1, &chcr_uld_info)) { + pr_err("ULD register fail: No chcr crypto support in cxgb4"); + return -1; + } + + return 0; +} + +static void __exit chcr_crypto_exit(void) +{ + struct uld_ctx *u_ctx, *tmp; + + if (atomic_read(&dev_count)) + stop_crypto(); + + /* Remove all devices from list */ + mutex_lock(&dev_mutex); + list_for_each_entry_safe(u_ctx, tmp, &uld_ctx_list, entry) { + if (u_ctx->dev) + chcr_dev_remove(u_ctx); + kfree(u_ctx); + } + mutex_unlock(&dev_mutex); + cxgb4_unregister_pci_uld(CXGB4_PCI_ULD1); +} + +module_init(chcr_crypto_init); +module_exit(chcr_crypto_exit); + +MODULE_DESCRIPTION("Crypto Co-processor for Chelsio Terminator cards."); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Chelsio Communications"); +MODULE_VERSION(DRV_VERSION); diff --git a/drivers/crypto/chelsio/chcr_core.h b/drivers/crypto/chelsio/chcr_core.h new file mode 100644 index 000000000000..2a5c671a4232 --- /dev/null +++ b/drivers/crypto/chelsio/chcr_core.h @@ -0,0 +1,80 @@ +/* + * This file is part of the Chelsio T6 Crypto driver for Linux. + * + * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __CHCR_CORE_H__ +#define __CHCR_CORE_H__ + +#include +#include "t4_hw.h" +#include "cxgb4.h" +#include "cxgb4_uld.h" + +#define DRV_MODULE_NAME "chcr" +#define DRV_VERSION "1.0.0.0" + +#define MAX_PENDING_REQ_TO_HW 20 +#define CHCR_TEST_RESPONSE_TIMEOUT 1000 + +#define PAD_ERROR_BIT 1 +#define CHK_PAD_ERR_BIT(x) (((x) >> PAD_ERROR_BIT) & 1) + +#define MAC_ERROR_BIT 0 +#define CHK_MAC_ERR_BIT(x) (((x) >> MAC_ERROR_BIT) & 1) + +struct uld_ctx; + +struct chcr_dev { + /* Request submited to h/w and waiting for response. */ + spinlock_t lock_chcr_dev; + struct crypto_queue pending_queue; + struct uld_ctx *u_ctx; + unsigned char tx_channel_id; +}; + +struct uld_ctx { + struct list_head entry; + struct cxgb4_lld_info lldi; + struct chcr_dev *dev; +}; + +int assign_chcr_device(struct chcr_dev **dev); +int chcr_send_wr(struct sk_buff *skb); +int start_crypto(void); +int stop_crypto(void); +int chcr_uld_rx_handler(void *handle, const __be64 *rsp, + const struct pkt_gl *pgl); +int chcr_handle_resp(struct crypto_async_request *req, unsigned char *input, + int err); +#endif /* __CHCR_CORE_H__ */ diff --git a/drivers/crypto/chelsio/chcr_crypto.h b/drivers/crypto/chelsio/chcr_crypto.h new file mode 100644 index 000000000000..d7d75605da8b --- /dev/null +++ b/drivers/crypto/chelsio/chcr_crypto.h @@ -0,0 +1,203 @@ +/* + * This file is part of the Chelsio T6 Crypto driver for Linux. + * + * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __CHCR_CRYPTO_H__ +#define __CHCR_CRYPTO_H__ + +/* Define following if h/w is not dropping the AAD and IV data before + * giving the processed data + */ + +#define CHCR_CRA_PRIORITY 300 + +#define CHCR_AES_MAX_KEY_LEN (2 * (AES_MAX_KEY_SIZE)) /* consider xts */ +#define CHCR_MAX_CRYPTO_IV_LEN 16 /* AES IV len */ + +#define CHCR_MAX_AUTHENC_AES_KEY_LEN 32 /* max aes key length*/ +#define CHCR_MAX_AUTHENC_SHA_KEY_LEN 128 /* max sha key length*/ + +#define CHCR_GIVENCRYPT_OP 2 +/* CPL/SCMD parameters */ + +#define CHCR_ENCRYPT_OP 0 +#define CHCR_DECRYPT_OP 1 + +#define CHCR_SCMD_SEQ_NO_CTRL_32BIT 1 +#define CHCR_SCMD_SEQ_NO_CTRL_48BIT 2 +#define CHCR_SCMD_SEQ_NO_CTRL_64BIT 3 + +#define CHCR_SCMD_PROTO_VERSION_GENERIC 4 + +#define CHCR_SCMD_AUTH_CTRL_AUTH_CIPHER 0 +#define CHCR_SCMD_AUTH_CTRL_CIPHER_AUTH 1 + +#define CHCR_SCMD_CIPHER_MODE_NOP 0 +#define CHCR_SCMD_CIPHER_MODE_AES_CBC 1 +#define CHCR_SCMD_CIPHER_MODE_GENERIC_AES 4 +#define CHCR_SCMD_CIPHER_MODE_AES_XTS 6 + +#define CHCR_SCMD_AUTH_MODE_NOP 0 +#define CHCR_SCMD_AUTH_MODE_SHA1 1 +#define CHCR_SCMD_AUTH_MODE_SHA224 2 +#define CHCR_SCMD_AUTH_MODE_SHA256 3 +#define CHCR_SCMD_AUTH_MODE_SHA512_224 5 +#define CHCR_SCMD_AUTH_MODE_SHA512_256 6 +#define CHCR_SCMD_AUTH_MODE_SHA512_384 7 +#define CHCR_SCMD_AUTH_MODE_SHA512_512 8 + +#define CHCR_SCMD_HMAC_CTRL_NOP 0 +#define CHCR_SCMD_HMAC_CTRL_NO_TRUNC 1 + +#define CHCR_SCMD_IVGEN_CTRL_HW 0 +#define CHCR_SCMD_IVGEN_CTRL_SW 1 +/* This are not really mac key size. They are intermediate values + * of sha engine and its size + */ +#define CHCR_KEYCTX_MAC_KEY_SIZE_128 0 +#define CHCR_KEYCTX_MAC_KEY_SIZE_160 1 +#define CHCR_KEYCTX_MAC_KEY_SIZE_192 2 +#define CHCR_KEYCTX_MAC_KEY_SIZE_256 3 +#define CHCR_KEYCTX_MAC_KEY_SIZE_512 4 +#define CHCR_KEYCTX_CIPHER_KEY_SIZE_128 0 +#define CHCR_KEYCTX_CIPHER_KEY_SIZE_192 1 +#define CHCR_KEYCTX_CIPHER_KEY_SIZE_256 2 +#define CHCR_KEYCTX_NO_KEY 15 + +#define CHCR_CPL_FW4_PLD_IV_OFFSET (5 * 64) /* bytes. flt #5 and #6 */ +#define CHCR_CPL_FW4_PLD_HASH_RESULT_OFFSET (7 * 64) /* bytes. flt #7 */ +#define CHCR_CPL_FW4_PLD_DATA_SIZE (4 * 64) /* bytes. flt #4 to #7 */ + +#define KEY_CONTEXT_HDR_SALT_AND_PAD 16 +#define flits_to_bytes(x) (x * 8) + +#define IV_NOP 0 +#define IV_IMMEDIATE 1 +#define IV_DSGL 2 + +#define CRYPTO_ALG_SUB_TYPE_MASK 0x0f000000 +#define CRYPTO_ALG_SUB_TYPE_HASH_HMAC 0x01000000 +#define CRYPTO_ALG_TYPE_HMAC (CRYPTO_ALG_TYPE_AHASH |\ + CRYPTO_ALG_SUB_TYPE_HASH_HMAC) + +#define MAX_SALT 4 +#define MAX_SCRATCH_PAD_SIZE 32 + +#define CHCR_HASH_MAX_BLOCK_SIZE_64 64 +#define CHCR_HASH_MAX_BLOCK_SIZE_128 128 + +/* Aligned to 128 bit boundary */ +struct _key_ctx { + __be32 ctx_hdr; + u8 salt[MAX_SALT]; + __be64 reserverd; + unsigned char key[0]; +}; + +struct ablk_ctx { + u8 enc; + unsigned int processed_len; + __be32 key_ctx_hdr; + unsigned int enckey_len; + unsigned int dst_nents; + struct scatterlist iv_sg; + u8 key[CHCR_AES_MAX_KEY_LEN]; + u8 iv[CHCR_MAX_CRYPTO_IV_LEN]; + unsigned char ciph_mode; +}; + +struct hmac_ctx { + struct shash_desc *desc; + u8 ipad[CHCR_HASH_MAX_BLOCK_SIZE_128]; + u8 opad[CHCR_HASH_MAX_BLOCK_SIZE_128]; +}; + +struct __crypto_ctx { + struct hmac_ctx hmacctx[0]; + struct ablk_ctx ablkctx[0]; +}; + +struct chcr_context { + struct chcr_dev *dev; + unsigned char tx_channel_id; + struct __crypto_ctx crypto_ctx[0]; +}; + +struct chcr_ahash_req_ctx { + u32 result; + char bfr[CHCR_HASH_MAX_BLOCK_SIZE_128]; + u8 bfr_len; + /* DMA the partial hash in it */ + u8 partial_hash[CHCR_HASH_MAX_DIGEST_SIZE]; + u64 data_len; /* Data len till time */ + void *dummy_payload_ptr; + /* SKB which is being sent to the hardware for processing */ + struct sk_buff *skb; +}; + +struct chcr_blkcipher_req_ctx { + struct sk_buff *skb; +}; + +struct chcr_alg_template { + u32 type; + u32 is_registered; + union { + struct crypto_alg crypto; + struct ahash_alg hash; + } alg; +}; + +struct chcr_req_ctx { + union { + struct ahash_request *ahash_req; + struct ablkcipher_request *ablk_req; + } req; + union { + struct chcr_ahash_req_ctx *ahash_ctx; + struct chcr_blkcipher_req_ctx *ablk_ctx; + } ctx; +}; + +struct sge_opaque_hdr { + void *dev; + dma_addr_t addr[MAX_SKB_FRAGS + 1]; +}; + +typedef struct sk_buff *(*create_wr_t)(struct crypto_async_request *req, + struct chcr_context *ctx, + unsigned short qid, + unsigned short op_type); + +#endif /* __CHCR_CRYPTO_H__ */ From 02038fd6645a08df1d3b37c12a065940b15ed4fe Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Wed, 17 Aug 2016 12:33:06 +0530 Subject: [PATCH 0330/1715] crypto: Added Chelsio Menu to the Kconfig file Adds the config entry for the Chelsio Crypto Driver, Makefile changes for the same. Signed-off-by: Atul Gupta Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/crypto/Kconfig | 2 ++ drivers/crypto/Makefile | 1 + drivers/crypto/chelsio/Kconfig | 19 +++++++++++++++++++ drivers/crypto/chelsio/Makefile | 4 ++++ 4 files changed, 26 insertions(+) create mode 100644 drivers/crypto/chelsio/Kconfig create mode 100644 drivers/crypto/chelsio/Makefile diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 1af94e2d1a25..9b035b7d7f4f 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -550,4 +550,6 @@ config CRYPTO_DEV_ROCKCHIP This driver interfaces with the hardware crypto accelerator. Supporting cbc/ecb chainmode, and aes/des/des3_ede cipher mode. +source "drivers/crypto/chelsio/Kconfig" + endif # CRYPTO_HW diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index 3c6432dd09d9..ad7250fa1348 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile @@ -31,3 +31,4 @@ obj-$(CONFIG_CRYPTO_DEV_QCE) += qce/ obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/ obj-$(CONFIG_CRYPTO_DEV_SUN4I_SS) += sunxi-ss/ obj-$(CONFIG_CRYPTO_DEV_ROCKCHIP) += rockchip/ +obj-$(CONFIG_CRYPTO_DEV_CHELSIO) += chelsio/ diff --git a/drivers/crypto/chelsio/Kconfig b/drivers/crypto/chelsio/Kconfig new file mode 100644 index 000000000000..4ce67fb9a880 --- /dev/null +++ b/drivers/crypto/chelsio/Kconfig @@ -0,0 +1,19 @@ +config CRYPTO_DEV_CHELSIO + tristate "Chelsio Crypto Co-processor Driver" + depends on CHELSIO_T4 + select CRYPTO_SHA1 + select CRYPTO_SHA256 + select CRYPTO_SHA512 + ---help--- + The Chelsio Crypto Co-processor driver for T6 adapters. + + For general information about Chelsio and our products, visit + our website at . + + For customer support, please visit our customer support page at + . + + Please send feedback to . + + To compile this driver as a module, choose M here: the module + will be called chcr. diff --git a/drivers/crypto/chelsio/Makefile b/drivers/crypto/chelsio/Makefile new file mode 100644 index 000000000000..bebdf06687ad --- /dev/null +++ b/drivers/crypto/chelsio/Makefile @@ -0,0 +1,4 @@ +ccflags-y := -Idrivers/net/ethernet/chelsio/cxgb4 + +obj-$(CONFIG_CRYPTO_DEV_CHELSIO) += chcr.o +chcr-objs := chcr_core.o chcr_algo.o From 4c51e7db87b753ce16c35ed0111eb273fe1a5135 Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Thu, 18 Aug 2016 16:52:08 +0300 Subject: [PATCH 0331/1715] ath9k: fix misleading indent Fixes smatch warning: ath9k_vif_iter_set_beacon() warn if statement not indented Signed-off-by: Bob Copeland Reviewed-by: Julian Calaby Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath9k/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index a394622c9022..eb00724d560a 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -919,7 +919,7 @@ static void ath9k_vif_iter_set_beacon(struct ath9k_vif_iter_data *iter_data, } else { if (iter_data->primary_beacon_vif->type != NL80211_IFTYPE_AP && vif->type == NL80211_IFTYPE_AP) - iter_data->primary_beacon_vif = vif; + iter_data->primary_beacon_vif = vif; } iter_data->beacons = true; From a5d268277ad566cbcf53a2e6f05fd6db81a61c41 Mon Sep 17 00:00:00 2001 From: Eduardo Abinader Date: Thu, 18 Aug 2016 16:52:09 +0300 Subject: [PATCH 0332/1715] ath9k: consider return code on just to comply with current ath9k_hw_nvram_read to return value, hence behaving reacting accordingly. Signed-off-by: Eduardo Abinader Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath9k/ar9003_eeprom.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c index 5bd2cbaf582d..08607d7fdb56 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c +++ b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c @@ -3252,7 +3252,8 @@ static int ar9300_eeprom_restore_flash(struct ath_hw *ah, u8 *mptr, int i; for (i = 0; i < mdata_size / 2; i++, data++) - ath9k_hw_nvram_read(ah, i, data); + if (!ath9k_hw_nvram_read(ah, i, data)) + return -EIO; return 0; } @@ -3282,7 +3283,8 @@ static int ar9300_eeprom_restore_internal(struct ath_hw *ah, if (ath9k_hw_use_flash(ah)) { u8 txrx; - ar9300_eeprom_restore_flash(ah, mptr, mdata_size); + if (ar9300_eeprom_restore_flash(ah, mptr, mdata_size)) + return -EIO; /* check if eeprom contains valid data */ eep = (struct ar9300_eeprom *) mptr; From 0163b03199006a6ba0d2c991c311e8ac93fcb208 Mon Sep 17 00:00:00 2001 From: Maya Erez Date: Thu, 18 Aug 2016 16:52:10 +0300 Subject: [PATCH 0333/1715] wil6210: align to latest auto generated wmi.h Align to latest version of the auto generated wmi file describing the interface with FW. Signed-off-by: Maya Erez Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/wil6210/wmi.h | 640 ++++++++++++++++++++++--- 1 file changed, 561 insertions(+), 79 deletions(-) diff --git a/drivers/net/wireless/ath/wil6210/wmi.h b/drivers/net/wireless/ath/wil6210/wmi.h index 685fe0ddea26..349510cdb153 100644 --- a/drivers/net/wireless/ath/wil6210/wmi.h +++ b/drivers/net/wireless/ath/wil6210/wmi.h @@ -120,6 +120,8 @@ enum wmi_command_id { WMI_BF_SM_MGMT_CMDID = 0x838, WMI_BF_RXSS_MGMT_CMDID = 0x839, WMI_BF_TRIG_CMDID = 0x83A, + WMI_LINK_MAINTAIN_CFG_WRITE_CMDID = 0x842, + WMI_LINK_MAINTAIN_CFG_READ_CMDID = 0x843, WMI_SET_SECTORS_CMDID = 0x849, WMI_MAINTAIN_PAUSE_CMDID = 0x850, WMI_MAINTAIN_RESUME_CMDID = 0x851, @@ -134,10 +136,15 @@ enum wmi_command_id { WMI_BF_CTRL_CMDID = 0x862, WMI_NOTIFY_REQ_CMDID = 0x863, WMI_GET_STATUS_CMDID = 0x864, + WMI_GET_RF_STATUS_CMDID = 0x866, + WMI_GET_BASEBAND_TYPE_CMDID = 0x867, WMI_UNIT_TEST_CMDID = 0x900, WMI_HICCUP_CMDID = 0x901, WMI_FLASH_READ_CMDID = 0x902, WMI_FLASH_WRITE_CMDID = 0x903, + /* Power management */ + WMI_TRAFFIC_DEFERRAL_CMDID = 0x904, + WMI_TRAFFIC_RESUME_CMDID = 0x905, /* P2P */ WMI_P2P_CFG_CMDID = 0x910, WMI_PORT_ALLOCATE_CMDID = 0x911, @@ -150,6 +157,19 @@ enum wmi_command_id { WMI_PCP_START_CMDID = 0x918, WMI_PCP_STOP_CMDID = 0x919, WMI_GET_PCP_FACTOR_CMDID = 0x91B, + /* Power Save Configuration Commands */ + WMI_PS_DEV_PROFILE_CFG_CMDID = 0x91C, + /* Not supported yet */ + WMI_PS_DEV_CFG_CMDID = 0x91D, + /* Not supported yet */ + WMI_PS_DEV_CFG_READ_CMDID = 0x91E, + /* Per MAC Power Save Configuration commands + * Not supported yet + */ + WMI_PS_MID_CFG_CMDID = 0x91F, + /* Not supported yet */ + WMI_PS_MID_CFG_READ_CMDID = 0x920, + WMI_RS_CFG_CMDID = 0x921, WMI_SET_MAC_ADDRESS_CMDID = 0xF003, WMI_ABORT_SCAN_CMDID = 0xF007, WMI_SET_PROMISCUOUS_MODE_CMDID = 0xF041, @@ -291,9 +311,8 @@ enum wmi_scan_type { /* WMI_START_SCAN_CMDID */ struct wmi_start_scan_cmd { u8 direct_scan_mac_addr[WMI_MAC_LEN]; - /* DMG Beacon frame is transmitted during active scanning */ + /* run scan with discovery beacon. Relevant for ACTIVE scan only. */ u8 discovery_mode; - /* reserved */ u8 reserved; /* Max duration in the home channel(ms) */ __le32 dwell_time; @@ -453,6 +472,12 @@ struct wmi_port_delete_cmd { u8 reserved[3]; } __packed; +/* WMI_TRAFFIC_DEFERRAL_CMDID */ +struct wmi_traffic_deferral_cmd { + /* Bit vector: bit[0] - wake on Unicast, bit[1] - wake on Broadcast */ + u8 wakeup_trigger; +} __packed; + /* WMI_P2P_CFG_CMDID */ enum wmi_discovery_mode { WMI_DISCOVERY_MODE_NON_OFFLOAD = 0x00, @@ -822,81 +847,99 @@ struct wmi_pmc_cmd { * List of Events (target to host) */ enum wmi_event_id { - WMI_READY_EVENTID = 0x1001, - WMI_CONNECT_EVENTID = 0x1002, - WMI_DISCONNECT_EVENTID = 0x1003, - WMI_SCAN_COMPLETE_EVENTID = 0x100A, - WMI_REPORT_STATISTICS_EVENTID = 0x100B, - WMI_RD_MEM_RSP_EVENTID = 0x1800, - WMI_FW_READY_EVENTID = 0x1801, - WMI_EXIT_FAST_MEM_ACC_MODE_EVENTID = 0x200, - WMI_ECHO_RSP_EVENTID = 0x1803, - WMI_FS_TUNE_DONE_EVENTID = 0x180A, - WMI_CORR_MEASURE_EVENTID = 0x180B, - WMI_READ_RSSI_EVENTID = 0x180C, - WMI_TEMP_SENSE_DONE_EVENTID = 0x180E, - WMI_DC_CALIB_DONE_EVENTID = 0x180F, - WMI_IQ_TX_CALIB_DONE_EVENTID = 0x1811, - WMI_IQ_RX_CALIB_DONE_EVENTID = 0x1812, - WMI_SET_WORK_MODE_DONE_EVENTID = 0x1815, - WMI_LO_LEAKAGE_CALIB_DONE_EVENTID = 0x1816, - WMI_MARLON_R_READ_DONE_EVENTID = 0x1818, - WMI_MARLON_R_WRITE_DONE_EVENTID = 0x1819, - WMI_MARLON_R_TXRX_SEL_DONE_EVENTID = 0x181A, - WMI_SILENT_RSSI_CALIB_DONE_EVENTID = 0x181D, - WMI_RF_RX_TEST_DONE_EVENTID = 0x181E, - WMI_CFG_RX_CHAIN_DONE_EVENTID = 0x1820, - WMI_VRING_CFG_DONE_EVENTID = 0x1821, - WMI_BA_STATUS_EVENTID = 0x1823, - WMI_RCP_ADDBA_REQ_EVENTID = 0x1824, - WMI_RCP_ADDBA_RESP_SENT_EVENTID = 0x1825, - WMI_DELBA_EVENTID = 0x1826, - WMI_GET_SSID_EVENTID = 0x1828, - WMI_GET_PCP_CHANNEL_EVENTID = 0x182A, - WMI_SW_TX_COMPLETE_EVENTID = 0x182B, - WMI_READ_MAC_RXQ_EVENTID = 0x1830, - WMI_READ_MAC_TXQ_EVENTID = 0x1831, - WMI_WRITE_MAC_RXQ_EVENTID = 0x1832, - WMI_WRITE_MAC_TXQ_EVENTID = 0x1833, - WMI_WRITE_MAC_XQ_FIELD_EVENTID = 0x1834, - WMI_BEAMFORMING_MGMT_DONE_EVENTID = 0x1836, - WMI_BF_TXSS_MGMT_DONE_EVENTID = 0x1837, - WMI_BF_RXSS_MGMT_DONE_EVENTID = 0x1839, - WMI_RS_MGMT_DONE_EVENTID = 0x1852, - WMI_RF_MGMT_STATUS_EVENTID = 0x1853, - WMI_THERMAL_THROTTLING_STATUS_EVENTID = 0x1855, - WMI_BF_SM_MGMT_DONE_EVENTID = 0x1838, - WMI_RX_MGMT_PACKET_EVENTID = 0x1840, - WMI_TX_MGMT_PACKET_EVENTID = 0x1841, - WMI_OTP_READ_RESULT_EVENTID = 0x1856, - WMI_LED_CFG_DONE_EVENTID = 0x1858, + WMI_READY_EVENTID = 0x1001, + WMI_CONNECT_EVENTID = 0x1002, + WMI_DISCONNECT_EVENTID = 0x1003, + WMI_SCAN_COMPLETE_EVENTID = 0x100A, + WMI_REPORT_STATISTICS_EVENTID = 0x100B, + WMI_RD_MEM_RSP_EVENTID = 0x1800, + WMI_FW_READY_EVENTID = 0x1801, + WMI_EXIT_FAST_MEM_ACC_MODE_EVENTID = 0x200, + WMI_ECHO_RSP_EVENTID = 0x1803, + WMI_FS_TUNE_DONE_EVENTID = 0x180A, + WMI_CORR_MEASURE_EVENTID = 0x180B, + WMI_READ_RSSI_EVENTID = 0x180C, + WMI_TEMP_SENSE_DONE_EVENTID = 0x180E, + WMI_DC_CALIB_DONE_EVENTID = 0x180F, + WMI_IQ_TX_CALIB_DONE_EVENTID = 0x1811, + WMI_IQ_RX_CALIB_DONE_EVENTID = 0x1812, + WMI_SET_WORK_MODE_DONE_EVENTID = 0x1815, + WMI_LO_LEAKAGE_CALIB_DONE_EVENTID = 0x1816, + WMI_MARLON_R_READ_DONE_EVENTID = 0x1818, + WMI_MARLON_R_WRITE_DONE_EVENTID = 0x1819, + WMI_MARLON_R_TXRX_SEL_DONE_EVENTID = 0x181A, + WMI_SILENT_RSSI_CALIB_DONE_EVENTID = 0x181D, + WMI_RF_RX_TEST_DONE_EVENTID = 0x181E, + WMI_CFG_RX_CHAIN_DONE_EVENTID = 0x1820, + WMI_VRING_CFG_DONE_EVENTID = 0x1821, + WMI_BA_STATUS_EVENTID = 0x1823, + WMI_RCP_ADDBA_REQ_EVENTID = 0x1824, + WMI_RCP_ADDBA_RESP_SENT_EVENTID = 0x1825, + WMI_DELBA_EVENTID = 0x1826, + WMI_GET_SSID_EVENTID = 0x1828, + WMI_GET_PCP_CHANNEL_EVENTID = 0x182A, + WMI_SW_TX_COMPLETE_EVENTID = 0x182B, + WMI_READ_MAC_RXQ_EVENTID = 0x1830, + WMI_READ_MAC_TXQ_EVENTID = 0x1831, + WMI_WRITE_MAC_RXQ_EVENTID = 0x1832, + WMI_WRITE_MAC_TXQ_EVENTID = 0x1833, + WMI_WRITE_MAC_XQ_FIELD_EVENTID = 0x1834, + WMI_BEAMFORMING_MGMT_DONE_EVENTID = 0x1836, + WMI_BF_TXSS_MGMT_DONE_EVENTID = 0x1837, + WMI_BF_RXSS_MGMT_DONE_EVENTID = 0x1839, + WMI_RS_MGMT_DONE_EVENTID = 0x1852, + WMI_RF_MGMT_STATUS_EVENTID = 0x1853, + WMI_THERMAL_THROTTLING_STATUS_EVENTID = 0x1855, + WMI_BF_SM_MGMT_DONE_EVENTID = 0x1838, + WMI_RX_MGMT_PACKET_EVENTID = 0x1840, + WMI_TX_MGMT_PACKET_EVENTID = 0x1841, + WMI_LINK_MAINTAIN_CFG_WRITE_DONE_EVENTID = 0x1842, + WMI_LINK_MAINTAIN_CFG_READ_DONE_EVENTID = 0x1843, + WMI_OTP_READ_RESULT_EVENTID = 0x1856, + WMI_LED_CFG_DONE_EVENTID = 0x1858, /* Performance monitoring events */ - WMI_DATA_PORT_OPEN_EVENTID = 0x1860, - WMI_WBE_LINK_DOWN_EVENTID = 0x1861, - WMI_BF_CTRL_DONE_EVENTID = 0x1862, - WMI_NOTIFY_REQ_DONE_EVENTID = 0x1863, - WMI_GET_STATUS_DONE_EVENTID = 0x1864, - WMI_VRING_EN_EVENTID = 0x1865, - WMI_UNIT_TEST_EVENTID = 0x1900, - WMI_FLASH_READ_DONE_EVENTID = 0x1902, - WMI_FLASH_WRITE_DONE_EVENTID = 0x1903, + WMI_DATA_PORT_OPEN_EVENTID = 0x1860, + WMI_WBE_LINK_DOWN_EVENTID = 0x1861, + WMI_BF_CTRL_DONE_EVENTID = 0x1862, + WMI_NOTIFY_REQ_DONE_EVENTID = 0x1863, + WMI_GET_STATUS_DONE_EVENTID = 0x1864, + WMI_VRING_EN_EVENTID = 0x1865, + WMI_GET_RF_STATUS_EVENTID = 0x1866, + WMI_GET_BASEBAND_TYPE_EVENTID = 0x1867, + WMI_UNIT_TEST_EVENTID = 0x1900, + WMI_FLASH_READ_DONE_EVENTID = 0x1902, + WMI_FLASH_WRITE_DONE_EVENTID = 0x1903, + /* Power management */ + WMI_TRAFFIC_DEFERRAL_EVENTID = 0x1904, + WMI_TRAFFIC_RESUME_EVENTID = 0x1905, /* P2P */ - WMI_P2P_CFG_DONE_EVENTID = 0x1910, - WMI_PORT_ALLOCATED_EVENTID = 0x1911, - WMI_PORT_DELETED_EVENTID = 0x1912, - WMI_LISTEN_STARTED_EVENTID = 0x1914, - WMI_SEARCH_STARTED_EVENTID = 0x1915, - WMI_DISCOVERY_STARTED_EVENTID = 0x1916, - WMI_DISCOVERY_STOPPED_EVENTID = 0x1917, - WMI_PCP_STARTED_EVENTID = 0x1918, - WMI_PCP_STOPPED_EVENTID = 0x1919, - WMI_PCP_FACTOR_EVENTID = 0x191A, - WMI_SET_CHANNEL_EVENTID = 0x9000, - WMI_ASSOC_REQ_EVENTID = 0x9001, - WMI_EAPOL_RX_EVENTID = 0x9002, - WMI_MAC_ADDR_RESP_EVENTID = 0x9003, - WMI_FW_VER_EVENTID = 0x9004, - WMI_ACS_PASSIVE_SCAN_COMPLETE_EVENTID = 0x9005, + WMI_P2P_CFG_DONE_EVENTID = 0x1910, + WMI_PORT_ALLOCATED_EVENTID = 0x1911, + WMI_PORT_DELETED_EVENTID = 0x1912, + WMI_LISTEN_STARTED_EVENTID = 0x1914, + WMI_SEARCH_STARTED_EVENTID = 0x1915, + WMI_DISCOVERY_STARTED_EVENTID = 0x1916, + WMI_DISCOVERY_STOPPED_EVENTID = 0x1917, + WMI_PCP_STARTED_EVENTID = 0x1918, + WMI_PCP_STOPPED_EVENTID = 0x1919, + WMI_PCP_FACTOR_EVENTID = 0x191A, + /* Power Save Configuration Events */ + WMI_PS_DEV_PROFILE_CFG_EVENTID = 0x191C, + /* Not supported yet */ + WMI_PS_DEV_CFG_EVENTID = 0x191D, + /* Not supported yet */ + WMI_PS_DEV_CFG_READ_EVENTID = 0x191E, + /* Not supported yet */ + WMI_PS_MID_CFG_EVENTID = 0x191F, + /* Not supported yet */ + WMI_PS_MID_CFG_READ_EVENTID = 0x1920, + WMI_RS_CFG_DONE_EVENTID = 0x1921, + WMI_SET_CHANNEL_EVENTID = 0x9000, + WMI_ASSOC_REQ_EVENTID = 0x9001, + WMI_EAPOL_RX_EVENTID = 0x9002, + WMI_MAC_ADDR_RESP_EVENTID = 0x9003, + WMI_FW_VER_EVENTID = 0x9004, + WMI_ACS_PASSIVE_SCAN_COMPLETE_EVENTID = 0x9005, }; /* Events data structures */ @@ -943,10 +986,78 @@ struct wmi_get_status_done_event { /* WMI_FW_VER_EVENTID */ struct wmi_fw_ver_event { - u8 major; - u8 minor; - __le16 subminor; - __le16 build; + /* FW image version */ + __le32 fw_major; + __le32 fw_minor; + __le32 fw_subminor; + __le32 fw_build; + /* FW image build time stamp */ + __le32 hour; + __le32 minute; + __le32 second; + __le32 day; + __le32 month; + __le32 year; + /* Boot Loader image version */ + __le32 bl_major; + __le32 bl_minor; + __le32 bl_subminor; + __le32 bl_build; +} __packed; + +/* WMI_GET_RF_STATUS_EVENTID */ +enum rf_type { + RF_UNKNOWN = 0x00, + RF_MARLON = 0x01, + RF_SPARROW = 0x02, +}; + +/* WMI_GET_RF_STATUS_EVENTID */ +enum board_file_rf_type { + BF_RF_MARLON = 0x00, + BF_RF_SPARROW = 0x01, +}; + +/* WMI_GET_RF_STATUS_EVENTID */ +enum rf_status { + RF_OK = 0x00, + RF_NO_COMM = 0x01, + RF_WRONG_BOARD_FILE = 0x02, +}; + +/* WMI_GET_RF_STATUS_EVENTID */ +struct wmi_get_rf_status_event { + /* enum rf_type */ + __le32 rf_type; + /* attached RFs bit vector */ + __le32 attached_rf_vector; + /* enabled RFs bit vector */ + __le32 enabled_rf_vector; + /* enum rf_status, refers to enabled RFs */ + u8 rf_status[32]; + /* enum board file RF type */ + __le32 board_file_rf_type; + /* board file platform type */ + __le32 board_file_platform_type; + /* board file version */ + __le32 board_file_version; + __le32 reserved[2]; +} __packed; + +/* WMI_GET_BASEBAND_TYPE_EVENTID */ +enum baseband_type { + BASEBAND_UNKNOWN = 0x00, + BASEBAND_SPARROW_M_A0 = 0x03, + BASEBAND_SPARROW_M_A1 = 0x04, + BASEBAND_SPARROW_M_B0 = 0x05, + BASEBAND_SPARROW_M_C0 = 0x06, + BASEBAND_SPARROW_M_D0 = 0x07, +}; + +/* WMI_GET_BASEBAND_TYPE_EVENTID */ +struct wmi_get_baseband_type_event { + /* enum baseband_type */ + __le32 baseband_type; } __packed; /* WMI_MAC_ADDR_RESP_EVENTID */ @@ -1410,4 +1521,375 @@ struct wmi_led_cfg_done_event { __le32 status; } __packed; +#define WMI_NUM_MCS (13) + +/* Rate search parameters configuration per connection */ +struct wmi_rs_cfg { + /* The maximal allowed PER for each MCS + * MCS will be considered as failed if PER during RS is higher + */ + u8 per_threshold[WMI_NUM_MCS]; + /* Number of MPDUs for each MCS + * this is the minimal statistic required to make an educated + * decision + */ + u8 min_frame_cnt[WMI_NUM_MCS]; + /* stop threshold [0-100] */ + u8 stop_th; + /* MCS1 stop threshold [0-100] */ + u8 mcs1_fail_th; + u8 max_back_failure_th; + /* Debug feature for disabling internal RS trigger (which is + * currently triggered by BF Done) + */ + u8 dbg_disable_internal_trigger; + __le32 back_failure_mask; + __le32 mcs_en_vec; +} __packed; + +/* WMI_RS_CFG_CMDID */ +struct wmi_rs_cfg_cmd { + /* connection id */ + u8 cid; + /* enable or disable rate search */ + u8 rs_enable; + /* rate search configuration */ + struct wmi_rs_cfg rs_cfg; +} __packed; + +/* WMI_RS_CFG_DONE_EVENTID */ +struct wmi_rs_cfg_done_event { + u8 cid; + /* enum wmi_fw_status */ + u8 status; + u8 reserved[2]; +} __packed; + +/* broadcast connection ID */ +#define WMI_LINK_MAINTAIN_CFG_CID_BROADCAST (0xFFFFFFFF) + +/* Types wmi_link_maintain_cfg presets for WMI_LINK_MAINTAIN_CFG_WRITE_CMD */ +enum wmi_link_maintain_cfg_type { + /* AP/PCP default normal (non-FST) configuration settings */ + WMI_LINK_MAINTAIN_CFG_TYPE_DEFAULT_NORMAL_AP = 0x00, + /* AP/PCP default FST configuration settings */ + WMI_LINK_MAINTAIN_CFG_TYPE_DEFAULT_FST_AP = 0x01, + /* STA default normal (non-FST) configuration settings */ + WMI_LINK_MAINTAIN_CFG_TYPE_DEFAULT_NORMAL_STA = 0x02, + /* STA default FST configuration settings */ + WMI_LINK_MAINTAIN_CFG_TYPE_DEFAULT_FST_STA = 0x03, + /* custom configuration settings */ + WMI_LINK_MAINTAIN_CFG_TYPE_CUSTOM = 0x04, + /* number of defined configuration types */ + WMI_LINK_MAINTAIN_CFG_TYPES_NUM = 0x05, +}; + +/* Response status codes for WMI_LINK_MAINTAIN_CFG_WRITE/READ commands */ +enum wmi_link_maintain_cfg_response_status { + /* WMI_LINK_MAINTAIN_CFG_WRITE/READ command successfully accomplished + */ + WMI_LINK_MAINTAIN_CFG_RESPONSE_STATUS_OK = 0x00, + /* ERROR due to bad argument in WMI_LINK_MAINTAIN_CFG_WRITE/READ + * command request + */ + WMI_LINK_MAINTAIN_CFG_RESPONSE_STATUS_BAD_ARGUMENT = 0x01, +}; + +/* Link Loss and Keep Alive configuration */ +struct wmi_link_maintain_cfg { + /* link_loss_enable_detectors_vec */ + __le32 link_loss_enable_detectors_vec; + /* detectors check period usec */ + __le32 check_link_loss_period_usec; + /* max allowed tx ageing */ + __le32 tx_ageing_threshold_usec; + /* keep alive period for high SNR */ + __le32 keep_alive_period_usec_high_snr; + /* keep alive period for low SNR */ + __le32 keep_alive_period_usec_low_snr; + /* lower snr limit for keep alive period update */ + __le32 keep_alive_snr_threshold_low_db; + /* upper snr limit for keep alive period update */ + __le32 keep_alive_snr_threshold_high_db; + /* num of successive bad bcons causing link-loss */ + __le32 bad_beacons_num_threshold; + /* SNR limit for bad_beacons_detector */ + __le32 bad_beacons_snr_threshold_db; +} __packed; + +/* WMI_LINK_MAINTAIN_CFG_WRITE_CMDID */ +struct wmi_link_maintain_cfg_write_cmd { + /* enum wmi_link_maintain_cfg_type_e - type of requested default + * configuration to be applied + */ + __le32 cfg_type; + /* requested connection ID or WMI_LINK_MAINTAIN_CFG_CID_BROADCAST */ + __le32 cid; + /* custom configuration settings to be applied (relevant only if + * cfg_type==WMI_LINK_MAINTAIN_CFG_TYPE_CUSTOM) + */ + struct wmi_link_maintain_cfg lm_cfg; +} __packed; + +/* WMI_LINK_MAINTAIN_CFG_READ_CMDID */ +struct wmi_link_maintain_cfg_read_cmd { + /* connection ID which configuration settings are requested */ + __le32 cid; +} __packed; + +/* WMI_LINK_MAINTAIN_CFG_WRITE_DONE_EVENTID */ +struct wmi_link_maintain_cfg_write_done_event { + /* requested connection ID */ + __le32 cid; + /* wmi_link_maintain_cfg_response_status_e - write status */ + __le32 status; +} __packed; + +/* \WMI_LINK_MAINTAIN_CFG_READ_DONE_EVENT */ +struct wmi_link_maintain_cfg_read_done_event { + /* requested connection ID */ + __le32 cid; + /* wmi_link_maintain_cfg_response_status_e - read status */ + __le32 status; + /* Retrieved configuration settings */ + struct wmi_link_maintain_cfg lm_cfg; +} __packed; + +enum wmi_traffic_deferral_status { + WMI_TRAFFIC_DEFERRAL_APPROVED = 0x0, + WMI_TRAFFIC_DEFERRAL_REJECTED = 0x1, +}; + +/* WMI_TRAFFIC_DEFERRAL_EVENTID */ +struct wmi_traffic_deferral_event { + /* enum wmi_traffic_deferral_status_e */ + u8 status; +} __packed; + +enum wmi_traffic_resume_status { + WMI_TRAFFIC_RESUME_SUCCESS = 0x0, + WMI_TRAFFIC_RESUME_FAILED = 0x1, +}; + +/* WMI_TRAFFIC_RESUME_EVENTID */ +struct wmi_traffic_resume_event { + /* enum wmi_traffic_resume_status_e */ + u8 status; +} __packed; + +/* Power Save command completion status codes */ +enum wmi_ps_cfg_cmd_status { + WMI_PS_CFG_CMD_STATUS_SUCCESS = 0x00, + WMI_PS_CFG_CMD_STATUS_BAD_PARAM = 0x01, + /* other error */ + WMI_PS_CFG_CMD_STATUS_ERROR = 0x02, +}; + +/* Device Power Save Profiles */ +enum wmi_ps_profile_type { + WMI_PS_PROFILE_TYPE_DEFAULT = 0x00, + WMI_PS_PROFILE_TYPE_PS_DISABLED = 0x01, + WMI_PS_PROFILE_TYPE_MAX_PS = 0x02, + WMI_PS_PROFILE_TYPE_LOW_LATENCY_PS = 0x03, +}; + +/* WMI_PS_DEV_PROFILE_CFG_CMDID + * + * Power save profile to be used by the device + * + * Returned event: + * - WMI_PS_DEV_PROFILE_CFG_EVENTID + */ +struct wmi_ps_dev_profile_cfg_cmd { + /* wmi_ps_profile_type_e */ + u8 ps_profile; + u8 reserved[3]; +} __packed; + +/* WMI_PS_DEV_PROFILE_CFG_EVENTID */ +struct wmi_ps_dev_profile_cfg_event { + /* wmi_ps_cfg_cmd_status_e */ + __le32 status; +} __packed; + +enum wmi_ps_level { + WMI_PS_LEVEL_DEEP_SLEEP = 0x00, + WMI_PS_LEVEL_SHALLOW_SLEEP = 0x01, + /* awake = all PS mechanisms are disabled */ + WMI_PS_LEVEL_AWAKE = 0x02, +}; + +enum wmi_ps_deep_sleep_clk_level { + /* 33k */ + WMI_PS_DEEP_SLEEP_CLK_LEVEL_RTC = 0x00, + /* 10k */ + WMI_PS_DEEP_SLEEP_CLK_LEVEL_OSC = 0x01, + /* @RTC Low latency */ + WMI_PS_DEEP_SLEEP_CLK_LEVEL_RTC_LT = 0x02, + WMI_PS_DEEP_SLEEP_CLK_LEVEL_XTAL = 0x03, + WMI_PS_DEEP_SLEEP_CLK_LEVEL_SYSCLK = 0x04, + /* Not Applicable */ + WMI_PS_DEEP_SLEEP_CLK_LEVEL_N_A = 0xFF, +}; + +/* Response by the FW to a D3 entry request */ +enum wmi_ps_d3_resp_policy { + WMI_PS_D3_RESP_POLICY_DEFAULT = 0x00, + /* debug -D3 req is always denied */ + WMI_PS_D3_RESP_POLICY_DENIED = 0x01, + /* debug -D3 req is always approved */ + WMI_PS_D3_RESP_POLICY_APPROVED = 0x02, +}; + +/* Device common power save configurations */ +struct wmi_ps_dev_cfg { + /* lowest level of PS allowed while unassociated, enum wmi_ps_level_e + */ + u8 ps_unassoc_min_level; + /* lowest deep sleep clock level while nonassoc, enum + * wmi_ps_deep_sleep_clk_level_e + */ + u8 ps_unassoc_deep_sleep_min_level; + /* lowest level of PS allowed while associated, enum wmi_ps_level_e */ + u8 ps_assoc_min_level; + /* lowest deep sleep clock level while assoc, enum + * wmi_ps_deep_sleep_clk_level_e + */ + u8 ps_assoc_deep_sleep_min_level; + /* enum wmi_ps_deep_sleep_clk_level_e */ + u8 ps_assoc_low_latency_ds_min_level; + /* enum wmi_ps_d3_resp_policy_e */ + u8 ps_D3_response_policy; + /* BOOL */ + u8 ps_D3_pm_pme_enabled; + /* BOOL */ + u8 ps_halp_enable; + u8 ps_deep_sleep_enter_thresh_msec; + /* BOOL */ + u8 ps_voltage_scaling_en; +} __packed; + +/* WMI_PS_DEV_CFG_CMDID + * + * Configure common Power Save parameters of the device and all MIDs. + * + * Returned event: + * - WMI_PS_DEV_CFG_EVENTID + */ +struct wmi_ps_dev_cfg_cmd { + /* Device Power Save configuration to be applied */ + struct wmi_ps_dev_cfg ps_dev_cfg; + /* alignment to 32b */ + u8 reserved[2]; +} __packed; + +/* WMI_PS_DEV_CFG_EVENTID */ +struct wmi_ps_dev_cfg_event { + /* wmi_ps_cfg_cmd_status_e */ + __le32 status; +} __packed; + +/* WMI_PS_DEV_CFG_READ_CMDID + * + * request to retrieve device Power Save configuration + * (WMI_PS_DEV_CFG_CMD params) + * + * Returned event: + * - WMI_PS_DEV_CFG_READ_EVENTID + */ +struct wmi_ps_dev_cfg_read_cmd { + __le32 reserved; +} __packed; + +/* WMI_PS_DEV_CFG_READ_EVENTID */ +struct wmi_ps_dev_cfg_read_event { + /* wmi_ps_cfg_cmd_status_e */ + __le32 status; + /* Retrieved device Power Save configuration (WMI_PS_DEV_CFG_CMD + * params) + */ + struct wmi_ps_dev_cfg dev_ps_cfg; + /* alignment to 32b */ + u8 reserved[2]; +} __packed; + +/* Per Mac Power Save configurations */ +struct wmi_ps_mid_cfg { + /* Low power RX in BTI is enabled, BOOL */ + u8 beacon_lprx_enable; + /* Sync to sector ID enabled, BOOL */ + u8 beacon_sync_to_sectorId_enable; + /* Low power RX in DTI is enabled, BOOL */ + u8 frame_exchange_lprx_enable; + /* Sleep Cycle while in scheduled PS, 1-31 */ + u8 scheduled_sleep_cycle_pow2; + /* Stay Awake for k BIs every (sleep_cycle - k) BIs, 1-31 */ + u8 scheduled_num_of_awake_bis; + u8 am_to_traffic_load_thresh_mbp; + u8 traffic_to_am_load_thresh_mbps; + u8 traffic_to_am_num_of_no_traffic_bis; + /* BOOL */ + u8 continuous_traffic_psm; + __le16 no_traffic_to_min_usec; + __le16 no_traffic_to_max_usec; + __le16 snoozing_sleep_interval_milisec; + u8 max_no_data_awake_events; + /* Trigger WEB after k failed beacons */ + u8 num_of_failed_beacons_rx_to_trigger_web; + /* Trigger BF after k failed beacons */ + u8 num_of_failed_beacons_rx_to_trigger_bf; + /* Trigger SOB after k successful beacons */ + u8 num_of_successful_beacons_rx_to_trigger_sob; +} __packed; + +/* WMI_PS_MID_CFG_CMDID + * + * Configure Power Save parameters of a specific MID. + * These parameters are relevant for the specific BSS this MID belongs to. + * + * Returned event: + * - WMI_PS_MID_CFG_EVENTID + */ +struct wmi_ps_mid_cfg_cmd { + /* MAC ID */ + u8 mid; + /* mid PS configuration to be applied */ + struct wmi_ps_mid_cfg ps_mid_cfg; +} __packed; + +/* WMI_PS_MID_CFG_EVENTID */ +struct wmi_ps_mid_cfg_event { + /* MAC ID */ + u8 mid; + /* alignment to 32b */ + u8 reserved[3]; + /* wmi_ps_cfg_cmd_status_e */ + __le32 status; +} __packed; + +/* WMI_PS_MID_CFG_READ_CMDID + * + * request to retrieve Power Save configuration of mid + * (WMI_PS_MID_CFG_CMD params) + * + * Returned event: + * - WMI_PS_MID_CFG_READ_EVENTID + */ +struct wmi_ps_mid_cfg_read_cmd { + /* MAC ID */ + u8 mid; + /* alignment to 32b */ + u8 reserved[3]; +} __packed; + +/* WMI_PS_MID_CFG_READ_EVENTID */ +struct wmi_ps_mid_cfg_read_event { + /* MAC ID */ + u8 mid; + /* Retrieved MID Power Save configuration(WMI_PS_MID_CFG_CMD params) */ + struct wmi_ps_mid_cfg mid_ps_cfg; + /* wmi_ps_cfg_cmd_status_e */ + __le32 status; +} __packed; + #endif /* __WILOCITY_WMI_H__ */ From f1b7764f8626b5ee7a42b6648427e71111c5cbb5 Mon Sep 17 00:00:00 2001 From: Maya Erez Date: Thu, 18 Aug 2016 16:52:12 +0300 Subject: [PATCH 0334/1715] wil6210: fix HALP handling in case of HALP vote time-out In case HALP vote times out, we need to mask the HALP IRQ, as done in case the interrupt is received, as this interrupt should be set until completion of the low latency operation. Signed-off-by: Maya Erez Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/wil6210/interrupt.c | 11 +++++++++-- drivers/net/wireless/ath/wil6210/main.c | 7 +++++-- drivers/net/wireless/ath/wil6210/wil6210.h | 1 + 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/ath/wil6210/interrupt.c b/drivers/net/wireless/ath/wil6210/interrupt.c index 011e7412dcc0..f10c47dcbde5 100644 --- a/drivers/net/wireless/ath/wil6210/interrupt.c +++ b/drivers/net/wireless/ath/wil6210/interrupt.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2015 Qualcomm Atheros, Inc. + * Copyright (c) 2012-2016 Qualcomm Atheros, Inc. * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -101,7 +101,7 @@ static void wil6210_mask_irq_misc(struct wil6210_priv *wil, bool mask_halp) mask_halp ? WIL6210_IRQ_DISABLE : WIL6210_IRQ_DISABLE_NO_HALP); } -static void wil6210_mask_halp(struct wil6210_priv *wil) +void wil6210_mask_halp(struct wil6210_priv *wil) { wil_dbg_irq(wil, "%s()\n", __func__); @@ -503,6 +503,13 @@ static int wil6210_debug_irq_mask(struct wil6210_priv *wil, u32 pseudo_cause) offsetof(struct RGF_ICR, ICR)); u32 imv_misc = wil_r(wil, RGF_DMA_EP_MISC_ICR + offsetof(struct RGF_ICR, IMV)); + + /* HALP interrupt can be unmasked when misc interrupts are + * masked + */ + if (icr_misc & BIT_DMA_EP_MISC_ICR_HALP) + return 0; + wil_err(wil, "IRQ when it should be masked: pseudo 0x%08x\n" "Rx icm:icr:imv 0x%08x 0x%08x 0x%08x\n" "Tx icm:icr:imv 0x%08x 0x%08x 0x%08x\n" diff --git a/drivers/net/wireless/ath/wil6210/main.c b/drivers/net/wireless/ath/wil6210/main.c index 4bc92e54984a..4240e81bcc82 100644 --- a/drivers/net/wireless/ath/wil6210/main.c +++ b/drivers/net/wireless/ath/wil6210/main.c @@ -1124,13 +1124,16 @@ void wil_halp_vote(struct wil6210_priv *wil) if (++wil->halp.ref_cnt == 1) { wil6210_set_halp(wil); rc = wait_for_completion_timeout(&wil->halp.comp, to_jiffies); - if (!rc) + if (!rc) { wil_err(wil, "%s: HALP vote timed out\n", __func__); - else + /* Mask HALP as done in case the interrupt is raised */ + wil6210_mask_halp(wil); + } else { wil_dbg_misc(wil, "%s: HALP vote completed after %d ms\n", __func__, jiffies_to_msecs(to_jiffies - rc)); + } } wil_dbg_misc(wil, "%s: end, HALP ref_cnt (%d)\n", __func__, diff --git a/drivers/net/wireless/ath/wil6210/wil6210.h b/drivers/net/wireless/ath/wil6210/wil6210.h index ecab4af90602..9742446eadc4 100644 --- a/drivers/net/wireless/ath/wil6210/wil6210.h +++ b/drivers/net/wireless/ath/wil6210/wil6210.h @@ -828,6 +828,7 @@ void wil_unmask_irq(struct wil6210_priv *wil); void wil_configure_interrupt_moderation(struct wil6210_priv *wil); void wil_disable_irq(struct wil6210_priv *wil); void wil_enable_irq(struct wil6210_priv *wil); +void wil6210_mask_halp(struct wil6210_priv *wil); /* P2P */ bool wil_p2p_is_social_scan(struct cfg80211_scan_request *request); From 74b6ac586d9cb7f45c894841e4204b3648ae865c Mon Sep 17 00:00:00 2001 From: Maya Erez Date: Thu, 18 Aug 2016 16:52:12 +0300 Subject: [PATCH 0335/1715] wil6210: support rx key setting for all TIDs According to the spec the PN should be calculated per TID. In the current implementation, the PN and key_set were set only for TID 0, therefore only traffic for TID 0 was supported. In order to support all TIDs, the key_set and PN should be set for all the TIDs. Signed-off-by: Maya Erez Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/wil6210/cfg80211.c | 116 ++++++++++++++------ 1 file changed, 81 insertions(+), 35 deletions(-) diff --git a/drivers/net/wireless/ath/wil6210/cfg80211.c b/drivers/net/wireless/ath/wil6210/cfg80211.c index f0e1175fb76a..110098e9603b 100644 --- a/drivers/net/wireless/ath/wil6210/cfg80211.c +++ b/drivers/net/wireless/ath/wil6210/cfg80211.c @@ -760,14 +760,11 @@ static enum wmi_key_usage wil_detect_key_usage(struct wil6210_priv *wil, return rc; } -static struct wil_tid_crypto_rx_single * -wil_find_crypto_ctx(struct wil6210_priv *wil, u8 key_index, - enum wmi_key_usage key_usage, const u8 *mac_addr) +static struct wil_sta_info * +wil_find_sta_by_key_usage(struct wil6210_priv *wil, + enum wmi_key_usage key_usage, const u8 *mac_addr) { int cid = -EINVAL; - int tid = 0; - struct wil_sta_info *s; - struct wil_tid_crypto_rx *c; if (key_usage == WMI_KEY_USE_TX_GROUP) return NULL; /* not needed */ @@ -778,18 +775,72 @@ wil_find_crypto_ctx(struct wil6210_priv *wil, u8 key_index, else if (key_usage == WMI_KEY_USE_RX_GROUP) cid = wil_find_cid_by_idx(wil, 0); if (cid < 0) { - wil_err(wil, "No CID for %pM %s[%d]\n", mac_addr, - key_usage_str[key_usage], key_index); + wil_err(wil, "No CID for %pM %s\n", mac_addr, + key_usage_str[key_usage]); return ERR_PTR(cid); } - s = &wil->sta[cid]; - if (key_usage == WMI_KEY_USE_PAIRWISE) - c = &s->tid_crypto_rx[tid]; - else - c = &s->group_crypto_rx; + return &wil->sta[cid]; +} - return &c->key_id[key_index]; +static void wil_set_crypto_rx(u8 key_index, enum wmi_key_usage key_usage, + struct wil_sta_info *cs, + struct key_params *params) +{ + struct wil_tid_crypto_rx_single *cc; + int tid; + + if (!cs) + return; + + switch (key_usage) { + case WMI_KEY_USE_PAIRWISE: + for (tid = 0; tid < WIL_STA_TID_NUM; tid++) { + cc = &cs->tid_crypto_rx[tid].key_id[key_index]; + if (params->seq) + memcpy(cc->pn, params->seq, + IEEE80211_GCMP_PN_LEN); + else + memset(cc->pn, 0, IEEE80211_GCMP_PN_LEN); + cc->key_set = true; + } + break; + case WMI_KEY_USE_RX_GROUP: + cc = &cs->group_crypto_rx.key_id[key_index]; + if (params->seq) + memcpy(cc->pn, params->seq, IEEE80211_GCMP_PN_LEN); + else + memset(cc->pn, 0, IEEE80211_GCMP_PN_LEN); + cc->key_set = true; + break; + default: + break; + } +} + +static void wil_del_rx_key(u8 key_index, enum wmi_key_usage key_usage, + struct wil_sta_info *cs) +{ + struct wil_tid_crypto_rx_single *cc; + int tid; + + if (!cs) + return; + + switch (key_usage) { + case WMI_KEY_USE_PAIRWISE: + for (tid = 0; tid < WIL_STA_TID_NUM; tid++) { + cc = &cs->tid_crypto_rx[tid].key_id[key_index]; + cc->key_set = false; + } + break; + case WMI_KEY_USE_RX_GROUP: + cc = &cs->group_crypto_rx.key_id[key_index]; + cc->key_set = false; + break; + default: + break; + } } static int wil_cfg80211_add_key(struct wiphy *wiphy, @@ -801,24 +852,26 @@ static int wil_cfg80211_add_key(struct wiphy *wiphy, int rc; struct wil6210_priv *wil = wiphy_to_wil(wiphy); enum wmi_key_usage key_usage = wil_detect_key_usage(wil, pairwise); - struct wil_tid_crypto_rx_single *cc = wil_find_crypto_ctx(wil, - key_index, - key_usage, - mac_addr); + struct wil_sta_info *cs = wil_find_sta_by_key_usage(wil, key_usage, + mac_addr); + + if (!params) { + wil_err(wil, "NULL params\n"); + return -EINVAL; + } wil_dbg_misc(wil, "%s(%pM %s[%d] PN %*phN)\n", __func__, mac_addr, key_usage_str[key_usage], key_index, params->seq_len, params->seq); - if (IS_ERR(cc)) { + if (IS_ERR(cs)) { wil_err(wil, "Not connected, %s(%pM %s[%d] PN %*phN)\n", __func__, mac_addr, key_usage_str[key_usage], key_index, params->seq_len, params->seq); return -EINVAL; } - if (cc) - cc->key_set = false; + wil_del_rx_key(key_index, key_usage, cs); if (params->seq && params->seq_len != IEEE80211_GCMP_PN_LEN) { wil_err(wil, @@ -831,13 +884,8 @@ static int wil_cfg80211_add_key(struct wiphy *wiphy, rc = wmi_add_cipher_key(wil, key_index, mac_addr, params->key_len, params->key, key_usage); - if ((rc == 0) && cc) { - if (params->seq) - memcpy(cc->pn, params->seq, IEEE80211_GCMP_PN_LEN); - else - memset(cc->pn, 0, IEEE80211_GCMP_PN_LEN); - cc->key_set = true; - } + if (!rc) + wil_set_crypto_rx(key_index, key_usage, cs, params); return rc; } @@ -849,20 +897,18 @@ static int wil_cfg80211_del_key(struct wiphy *wiphy, { struct wil6210_priv *wil = wiphy_to_wil(wiphy); enum wmi_key_usage key_usage = wil_detect_key_usage(wil, pairwise); - struct wil_tid_crypto_rx_single *cc = wil_find_crypto_ctx(wil, - key_index, - key_usage, - mac_addr); + struct wil_sta_info *cs = wil_find_sta_by_key_usage(wil, key_usage, + mac_addr); wil_dbg_misc(wil, "%s(%pM %s[%d])\n", __func__, mac_addr, key_usage_str[key_usage], key_index); - if (IS_ERR(cc)) + if (IS_ERR(cs)) wil_info(wil, "Not connected, %s(%pM %s[%d])\n", __func__, mac_addr, key_usage_str[key_usage], key_index); - if (!IS_ERR_OR_NULL(cc)) - cc->key_set = false; + if (!IS_ERR_OR_NULL(cs)) + wil_del_rx_key(key_index, key_usage, cs); return wmi_del_cipher_key(wil, key_index, mac_addr, key_usage); } From ef86f249fa4980fc78fe1546e45d8cab6be424b6 Mon Sep 17 00:00:00 2001 From: Lior David Date: Thu, 18 Aug 2016 16:52:13 +0300 Subject: [PATCH 0336/1715] wil6210: change HALP logging category to IRQ Change the logging category of HALP functions from MISC to IRQ, since the HALP mechanism is closely related to interrupts. Both HALP and IRQ create a heavy load of logging messages when enabled, so their logging is typically disabled during normal debug scenarios. Having them in the same logging category will make it easier to disable logging for both in one go. Signed-off-by: Lior David Signed-off-by: Maya Erez Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/wil6210/interrupt.c | 4 +-- drivers/net/wireless/ath/wil6210/main.c | 26 ++++++++++---------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/net/wireless/ath/wil6210/interrupt.c b/drivers/net/wireless/ath/wil6210/interrupt.c index f10c47dcbde5..64046e0bd0a2 100644 --- a/drivers/net/wireless/ath/wil6210/interrupt.c +++ b/drivers/net/wireless/ath/wil6210/interrupt.c @@ -599,7 +599,7 @@ void wil6210_clear_irq(struct wil6210_priv *wil) void wil6210_set_halp(struct wil6210_priv *wil) { - wil_dbg_misc(wil, "%s()\n", __func__); + wil_dbg_irq(wil, "%s()\n", __func__); wil_w(wil, RGF_DMA_EP_MISC_ICR + offsetof(struct RGF_ICR, ICS), BIT_DMA_EP_MISC_ICR_HALP); @@ -607,7 +607,7 @@ void wil6210_set_halp(struct wil6210_priv *wil) void wil6210_clear_halp(struct wil6210_priv *wil) { - wil_dbg_misc(wil, "%s()\n", __func__); + wil_dbg_irq(wil, "%s()\n", __func__); wil_w(wil, RGF_DMA_EP_MISC_ICR + offsetof(struct RGF_ICR, ICR), BIT_DMA_EP_MISC_ICR_HALP); diff --git a/drivers/net/wireless/ath/wil6210/main.c b/drivers/net/wireless/ath/wil6210/main.c index 4240e81bcc82..1205d76331e5 100644 --- a/drivers/net/wireless/ath/wil6210/main.c +++ b/drivers/net/wireless/ath/wil6210/main.c @@ -1118,8 +1118,8 @@ void wil_halp_vote(struct wil6210_priv *wil) mutex_lock(&wil->halp.lock); - wil_dbg_misc(wil, "%s: start, HALP ref_cnt (%d)\n", __func__, - wil->halp.ref_cnt); + wil_dbg_irq(wil, "%s: start, HALP ref_cnt (%d)\n", __func__, + wil->halp.ref_cnt); if (++wil->halp.ref_cnt == 1) { wil6210_set_halp(wil); @@ -1129,15 +1129,15 @@ void wil_halp_vote(struct wil6210_priv *wil) /* Mask HALP as done in case the interrupt is raised */ wil6210_mask_halp(wil); } else { - wil_dbg_misc(wil, - "%s: HALP vote completed after %d ms\n", - __func__, - jiffies_to_msecs(to_jiffies - rc)); + wil_dbg_irq(wil, + "%s: HALP vote completed after %d ms\n", + __func__, + jiffies_to_msecs(to_jiffies - rc)); } } - wil_dbg_misc(wil, "%s: end, HALP ref_cnt (%d)\n", __func__, - wil->halp.ref_cnt); + wil_dbg_irq(wil, "%s: end, HALP ref_cnt (%d)\n", __func__, + wil->halp.ref_cnt); mutex_unlock(&wil->halp.lock); } @@ -1148,16 +1148,16 @@ void wil_halp_unvote(struct wil6210_priv *wil) mutex_lock(&wil->halp.lock); - wil_dbg_misc(wil, "%s: start, HALP ref_cnt (%d)\n", __func__, - wil->halp.ref_cnt); + wil_dbg_irq(wil, "%s: start, HALP ref_cnt (%d)\n", __func__, + wil->halp.ref_cnt); if (--wil->halp.ref_cnt == 0) { wil6210_clear_halp(wil); - wil_dbg_misc(wil, "%s: HALP unvote\n", __func__); + wil_dbg_irq(wil, "%s: HALP unvote\n", __func__); } - wil_dbg_misc(wil, "%s: end, HALP ref_cnt (%d)\n", __func__, - wil->halp.ref_cnt); + wil_dbg_irq(wil, "%s: end, HALP ref_cnt (%d)\n", __func__, + wil->halp.ref_cnt); mutex_unlock(&wil->halp.lock); } From d35c2b6f8ffa75d430fd0fbbc5062f738c44f6e4 Mon Sep 17 00:00:00 2001 From: Maya Erez Date: Thu, 18 Aug 2016 16:52:14 +0300 Subject: [PATCH 0337/1715] wil6210: fix stop p2p device handling fix stop p2p device handling to identify between search and listen and update the upper layers with the appropriate notification. The stop of p2p radio operations also needs to be performed in __wil_down. Signed-off-by: Maya Erez Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/wil6210/cfg80211.c | 25 ++++------- drivers/net/wireless/ath/wil6210/main.c | 2 +- drivers/net/wireless/ath/wil6210/p2p.c | 46 +++++++++++++++++++++ drivers/net/wireless/ath/wil6210/pcie_bus.c | 5 ++- drivers/net/wireless/ath/wil6210/wil6210.h | 1 + 5 files changed, 61 insertions(+), 18 deletions(-) diff --git a/drivers/net/wireless/ath/wil6210/cfg80211.c b/drivers/net/wireless/ath/wil6210/cfg80211.c index 110098e9603b..310a38561736 100644 --- a/drivers/net/wireless/ath/wil6210/cfg80211.c +++ b/drivers/net/wireless/ath/wil6210/cfg80211.c @@ -1409,23 +1409,16 @@ static void wil_cfg80211_stop_p2p_device(struct wiphy *wiphy, struct wireless_dev *wdev) { struct wil6210_priv *wil = wiphy_to_wil(wiphy); - u8 started; + struct wil_p2p_info *p2p = &wil->p2p; + + if (!p2p->p2p_dev_started) + return; wil_dbg_misc(wil, "%s: entered\n", __func__); mutex_lock(&wil->mutex); - started = wil_p2p_stop_discovery(wil); - if (started && wil->scan_request) { - struct cfg80211_scan_info info = { - .aborted = true, - }; - - cfg80211_scan_done(wil->scan_request, &info); - wil->scan_request = NULL; - wil->radio_wdev = wil->wdev; - } + wil_p2p_stop_radio_operations(wil); + p2p->p2p_dev_started = 0; mutex_unlock(&wil->mutex); - - wil->p2p.p2p_dev_started = 0; } static struct cfg80211_ops wil_cfg80211_ops = { @@ -1544,11 +1537,11 @@ void wil_p2p_wdev_free(struct wil6210_priv *wil) mutex_lock(&wil->p2p_wdev_mutex); p2p_wdev = wil->p2p_wdev; + wil->p2p_wdev = NULL; + wil->radio_wdev = wil_to_wdev(wil); + mutex_unlock(&wil->p2p_wdev_mutex); if (p2p_wdev) { - wil->p2p_wdev = NULL; - wil->radio_wdev = wil_to_wdev(wil); cfg80211_unregister_wdev(p2p_wdev); kfree(p2p_wdev); } - mutex_unlock(&wil->p2p_wdev_mutex); } diff --git a/drivers/net/wireless/ath/wil6210/main.c b/drivers/net/wireless/ath/wil6210/main.c index 1205d76331e5..bbc54ee9ad30 100644 --- a/drivers/net/wireless/ath/wil6210/main.c +++ b/drivers/net/wireless/ath/wil6210/main.c @@ -1050,7 +1050,7 @@ int __wil_down(struct wil6210_priv *wil) } wil_enable_irq(wil); - (void)wil_p2p_stop_discovery(wil); + wil_p2p_stop_radio_operations(wil); if (wil->scan_request) { struct cfg80211_scan_info info = { diff --git a/drivers/net/wireless/ath/wil6210/p2p.c b/drivers/net/wireless/ath/wil6210/p2p.c index e0f8aa0ebfac..4087785d3090 100644 --- a/drivers/net/wireless/ath/wil6210/p2p.c +++ b/drivers/net/wireless/ath/wil6210/p2p.c @@ -263,3 +263,49 @@ void wil_p2p_search_expired(struct work_struct *work) mutex_unlock(&wil->p2p_wdev_mutex); } } + +void wil_p2p_stop_radio_operations(struct wil6210_priv *wil) +{ + struct wil_p2p_info *p2p = &wil->p2p; + struct cfg80211_scan_info info = { + .aborted = true, + }; + + lockdep_assert_held(&wil->mutex); + + mutex_lock(&wil->p2p_wdev_mutex); + + if (wil->radio_wdev != wil->p2p_wdev) + goto out; + + if (!p2p->discovery_started) { + /* Regular scan on the p2p device */ + if (wil->scan_request && + wil->scan_request->wdev == wil->p2p_wdev) { + cfg80211_scan_done(wil->scan_request, &info); + wil->scan_request = NULL; + } + goto out; + } + + /* Search or listen on p2p device */ + mutex_unlock(&wil->p2p_wdev_mutex); + wil_p2p_stop_discovery(wil); + mutex_lock(&wil->p2p_wdev_mutex); + + if (wil->scan_request) { + /* search */ + cfg80211_scan_done(wil->scan_request, &info); + wil->scan_request = NULL; + } else { + /* listen */ + cfg80211_remain_on_channel_expired(wil->radio_wdev, + p2p->cookie, + &p2p->listen_chan, + GFP_KERNEL); + } + +out: + wil->radio_wdev = wil->wdev; + mutex_unlock(&wil->p2p_wdev_mutex); +} diff --git a/drivers/net/wireless/ath/wil6210/pcie_bus.c b/drivers/net/wireless/ath/wil6210/pcie_bus.c index 7b5c4222bc33..5b7a9d2f0c29 100644 --- a/drivers/net/wireless/ath/wil6210/pcie_bus.c +++ b/drivers/net/wireless/ath/wil6210/pcie_bus.c @@ -20,6 +20,7 @@ #include #include #include "wil6210.h" +#include static bool use_msi = true; module_param(use_msi, bool, S_IRUGO); @@ -293,6 +294,9 @@ static void wil_pcie_remove(struct pci_dev *pdev) #endif /* CONFIG_PM */ wil6210_debugfs_remove(wil); + rtnl_lock(); + wil_p2p_wdev_free(wil); + rtnl_unlock(); wil_if_remove(wil); wil_if_pcie_disable(wil); pci_iounmap(pdev, csr); @@ -300,7 +304,6 @@ static void wil_pcie_remove(struct pci_dev *pdev) pci_disable_device(pdev); if (wil->platform_ops.uninit) wil->platform_ops.uninit(wil->platform_handle); - wil_p2p_wdev_free(wil); wil_if_free(wil); } diff --git a/drivers/net/wireless/ath/wil6210/wil6210.h b/drivers/net/wireless/ath/wil6210/wil6210.h index 9742446eadc4..608769120957 100644 --- a/drivers/net/wireless/ath/wil6210/wil6210.h +++ b/drivers/net/wireless/ath/wil6210/wil6210.h @@ -841,6 +841,7 @@ u8 wil_p2p_stop_discovery(struct wil6210_priv *wil); int wil_p2p_cancel_listen(struct wil6210_priv *wil, u64 cookie); void wil_p2p_listen_expired(struct work_struct *work); void wil_p2p_search_expired(struct work_struct *work); +void wil_p2p_stop_radio_operations(struct wil6210_priv *wil); /* WMI for P2P */ int wmi_p2p_cfg(struct wil6210_priv *wil, int channel, int bi); From b0c0e688e523eba14abf21ac246b7dd88f5574fa Mon Sep 17 00:00:00 2001 From: Lazar Alexei Date: Thu, 18 Aug 2016 16:52:14 +0300 Subject: [PATCH 0338/1715] wil6210: Fix driver down flow Stations disconnection is executed as part of wil_reset so no need to do it in wil_down. Removal of the disconnect operation will also preserve the lock of wil->mutex during the whole reset flow and prevent handling of connect event while resetting. Set wil_status_resetting in earlier stage in the flow to prevent double resetting call in case communication with FW fails while bringing the interface down. Signed-off-by: Lazar Alexei Signed-off-by: Maya Erez Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/wil6210/main.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/drivers/net/wireless/ath/wil6210/main.c b/drivers/net/wireless/ath/wil6210/main.c index bbc54ee9ad30..dd0ee7fe39bd 100644 --- a/drivers/net/wireless/ath/wil6210/main.c +++ b/drivers/net/wireless/ath/wil6210/main.c @@ -1035,10 +1035,10 @@ int wil_up(struct wil6210_priv *wil) int __wil_down(struct wil6210_priv *wil) { - int rc; - WARN_ON(!mutex_is_locked(&wil->mutex)); + set_bit(wil_status_resetting, wil->status); + if (wil->platform_ops.bus_request) wil->platform_ops.bus_request(wil->platform_handle, 0); @@ -1064,18 +1064,6 @@ int __wil_down(struct wil6210_priv *wil) wil->scan_request = NULL; } - if (test_bit(wil_status_fwconnected, wil->status) || - test_bit(wil_status_fwconnecting, wil->status)) { - - mutex_unlock(&wil->mutex); - rc = wmi_call(wil, WMI_DISCONNECT_CMDID, NULL, 0, - WMI_DISCONNECT_EVENTID, NULL, 0, - WIL6210_DISCONNECT_TO_MS); - mutex_lock(&wil->mutex); - if (rc) - wil_err(wil, "timeout waiting for disconnect\n"); - } - wil_reset(wil, false); return 0; From dc90506f145875b9d88160802cc5fe06a7c79dda Mon Sep 17 00:00:00 2001 From: Maya Erez Date: Thu, 18 Aug 2016 16:52:15 +0300 Subject: [PATCH 0339/1715] wil6210: prevent usage of incorrect TX hwtail txdata->enabled is used in order to determine if the TX vring is valid. As the data transmit is handled in a different context, in case txdata->enabled is set before vring->hwtail is updated, an old or corrupted vring->hwtail can be used. Protect setting of txdata->enabled and vring->hwtail to prevent a case where TX vring start handling TX packets before setting vring->hwtail. Signed-off-by: Maya Erez Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/wil6210/txrx.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/wil6210/txrx.c b/drivers/net/wireless/ath/wil6210/txrx.c index f2f6a404d3d1..4c38520d4dd2 100644 --- a/drivers/net/wireless/ath/wil6210/txrx.c +++ b/drivers/net/wireless/ath/wil6210/txrx.c @@ -873,9 +873,12 @@ int wil_vring_init_tx(struct wil6210_priv *wil, int id, int size, rc = -EINVAL; goto out_free; } - vring->hwtail = le32_to_cpu(reply.cmd.tx_vring_tail_ptr); + spin_lock_bh(&txdata->lock); + vring->hwtail = le32_to_cpu(reply.cmd.tx_vring_tail_ptr); txdata->enabled = 1; + spin_unlock_bh(&txdata->lock); + if (txdata->dot1x_open && (agg_wsize >= 0)) wil_addba_tx_request(wil, id, agg_wsize); @@ -950,9 +953,11 @@ int wil_vring_init_bcast(struct wil6210_priv *wil, int id, int size) rc = -EINVAL; goto out_free; } - vring->hwtail = le32_to_cpu(reply.cmd.tx_vring_tail_ptr); + spin_lock_bh(&txdata->lock); + vring->hwtail = le32_to_cpu(reply.cmd.tx_vring_tail_ptr); txdata->enabled = 1; + spin_unlock_bh(&txdata->lock); return 0; out_free: From 2690c4c0e83b1aa5aa8b8b258ae422b067720224 Mon Sep 17 00:00:00 2001 From: Lior David Date: Thu, 18 Aug 2016 16:52:16 +0300 Subject: [PATCH 0340/1715] wil6210: fix wiphy registration sequence Currently wiphy structure is initialized and registered in wil_if_alloc, before some information is available such as MAC address and capabilities. As a result there is a small chance user space will get incorrect information from calls such as NL80211_CMD_GET_WIPHY. Fix this by seperating the registration and moving it to wil_if_add which is executed later, after all relevant information is known. Signed-off-by: Lior David Signed-off-by: Maya Erez Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/wil6210/cfg80211.c | 7 ----- drivers/net/wireless/ath/wil6210/main.c | 3 ++ drivers/net/wireless/ath/wil6210/netdev.c | 32 +++++++++++++++------ 3 files changed, 26 insertions(+), 16 deletions(-) diff --git a/drivers/net/wireless/ath/wil6210/cfg80211.c b/drivers/net/wireless/ath/wil6210/cfg80211.c index 310a38561736..ffacc7648727 100644 --- a/drivers/net/wireless/ath/wil6210/cfg80211.c +++ b/drivers/net/wireless/ath/wil6210/cfg80211.c @@ -1503,14 +1503,8 @@ struct wireless_dev *wil_cfg80211_init(struct device *dev) set_wiphy_dev(wdev->wiphy, dev); wil_wiphy_init(wdev->wiphy); - rc = wiphy_register(wdev->wiphy); - if (rc < 0) - goto out_failed_reg; - return wdev; -out_failed_reg: - wiphy_free(wdev->wiphy); out: kfree(wdev); @@ -1526,7 +1520,6 @@ void wil_wdev_free(struct wil6210_priv *wil) if (!wdev) return; - wiphy_unregister(wdev->wiphy); wiphy_free(wdev->wiphy); kfree(wdev); } diff --git a/drivers/net/wireless/ath/wil6210/main.c b/drivers/net/wireless/ath/wil6210/main.c index dd0ee7fe39bd..d0b180cc3c60 100644 --- a/drivers/net/wireless/ath/wil6210/main.c +++ b/drivers/net/wireless/ath/wil6210/main.c @@ -232,6 +232,9 @@ static void _wil6210_disconnect(struct wil6210_priv *wil, const u8 *bssid, struct net_device *ndev = wil_to_ndev(wil); struct wireless_dev *wdev = wil->wdev; + if (unlikely(!ndev)) + return; + might_sleep(); wil_info(wil, "%s(bssid=%pM, reason=%d, ev%s)\n", __func__, bssid, reason_code, from_event ? "+" : "-"); diff --git a/drivers/net/wireless/ath/wil6210/netdev.c b/drivers/net/wireless/ath/wil6210/netdev.c index 098409753d5b..4bc9bb0a6cb4 100644 --- a/drivers/net/wireless/ath/wil6210/netdev.c +++ b/drivers/net/wireless/ath/wil6210/netdev.c @@ -179,13 +179,6 @@ void *wil_if_alloc(struct device *dev) SET_NETDEV_DEV(ndev, wiphy_dev(wdev->wiphy)); wdev->netdev = ndev; - netif_napi_add(ndev, &wil->napi_rx, wil6210_netdev_poll_rx, - WIL6210_NAPI_BUDGET); - netif_tx_napi_add(ndev, &wil->napi_tx, wil6210_netdev_poll_tx, - WIL6210_NAPI_BUDGET); - - netif_tx_stop_all_queues(ndev); - return wil; out_priv: @@ -216,25 +209,46 @@ void wil_if_free(struct wil6210_priv *wil) int wil_if_add(struct wil6210_priv *wil) { + struct wireless_dev *wdev = wil_to_wdev(wil); + struct wiphy *wiphy = wdev->wiphy; struct net_device *ndev = wil_to_ndev(wil); int rc; - wil_dbg_misc(wil, "%s()\n", __func__); + wil_dbg_misc(wil, "entered"); + + rc = wiphy_register(wiphy); + if (rc < 0) { + wil_err(wil, "failed to register wiphy, err %d\n", rc); + return rc; + } + + netif_napi_add(ndev, &wil->napi_rx, wil6210_netdev_poll_rx, + WIL6210_NAPI_BUDGET); + netif_tx_napi_add(ndev, &wil->napi_tx, wil6210_netdev_poll_tx, + WIL6210_NAPI_BUDGET); + + netif_tx_stop_all_queues(ndev); rc = register_netdev(ndev); if (rc < 0) { dev_err(&ndev->dev, "Failed to register netdev: %d\n", rc); - return rc; + goto out_wiphy; } return 0; + +out_wiphy: + wiphy_unregister(wdev->wiphy); + return rc; } void wil_if_remove(struct wil6210_priv *wil) { struct net_device *ndev = wil_to_ndev(wil); + struct wireless_dev *wdev = wil_to_wdev(wil); wil_dbg_misc(wil, "%s()\n", __func__); unregister_netdev(ndev); + wiphy_unregister(wdev->wiphy); } From 246779dd090bd1b74d2652b3a6ca7759f593b27a Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 18 Aug 2016 16:50:56 +0800 Subject: [PATCH 0341/1715] rhashtable: Remove GFP flag from rhashtable_walk_init The commit 8f6fd83c6c5ec66a4a70c728535ddcdfef4f3697 ("rhashtable: accept GFP flags in rhashtable_walk_init") added a GFP flag argument to rhashtable_walk_init because some users wish to use the walker in an unsleepable context. In fact we don't need to allocate memory in rhashtable_walk_init at all. The walker is always paired with an iterator so we could just stash ourselves there. This patch does that by introducing a new enter function to replace the existing init function. This way we don't have to churn all the existing users again. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 14 +++++++++--- lib/rhashtable.c | 46 +++++++++++++++----------------------- 2 files changed, 29 insertions(+), 31 deletions(-) diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 3eef0802a0cd..8b72ee710f95 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -173,7 +173,7 @@ struct rhashtable_walker { struct rhashtable_iter { struct rhashtable *ht; struct rhash_head *p; - struct rhashtable_walker *walker; + struct rhashtable_walker walker; unsigned int slot; unsigned int skip; }; @@ -346,8 +346,8 @@ struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht, struct bucket_table *old_tbl); int rhashtable_insert_rehash(struct rhashtable *ht, struct bucket_table *tbl); -int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter, - gfp_t gfp); +void rhashtable_walk_enter(struct rhashtable *ht, + struct rhashtable_iter *iter); void rhashtable_walk_exit(struct rhashtable_iter *iter); int rhashtable_walk_start(struct rhashtable_iter *iter) __acquires(RCU); void *rhashtable_walk_next(struct rhashtable_iter *iter); @@ -906,4 +906,12 @@ static inline int rhashtable_replace_fast( return err; } +/* Obsolete function, do not use in new code. */ +static inline int rhashtable_walk_init(struct rhashtable *ht, + struct rhashtable_iter *iter, gfp_t gfp) +{ + rhashtable_walk_enter(ht, iter); + return 0; +} + #endif /* _LINUX_RHASHTABLE_H */ diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 5ba520b544d7..97e3cf08142c 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -489,10 +489,9 @@ exit: EXPORT_SYMBOL_GPL(rhashtable_insert_slow); /** - * rhashtable_walk_init - Initialise an iterator + * rhashtable_walk_enter - Initialise an iterator * @ht: Table to walk over * @iter: Hash table Iterator - * @gfp: GFP flags for allocations * * This function prepares a hash table walk. * @@ -507,30 +506,22 @@ EXPORT_SYMBOL_GPL(rhashtable_insert_slow); * This function may sleep so you must not call it from interrupt * context or with spin locks held. * - * You must call rhashtable_walk_exit if this function returns - * successfully. + * You must call rhashtable_walk_exit after this function returns. */ -int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter, - gfp_t gfp) +void rhashtable_walk_enter(struct rhashtable *ht, struct rhashtable_iter *iter) { iter->ht = ht; iter->p = NULL; iter->slot = 0; iter->skip = 0; - iter->walker = kmalloc(sizeof(*iter->walker), gfp); - if (!iter->walker) - return -ENOMEM; - spin_lock(&ht->lock); - iter->walker->tbl = + iter->walker.tbl = rcu_dereference_protected(ht->tbl, lockdep_is_held(&ht->lock)); - list_add(&iter->walker->list, &iter->walker->tbl->walkers); + list_add(&iter->walker.list, &iter->walker.tbl->walkers); spin_unlock(&ht->lock); - - return 0; } -EXPORT_SYMBOL_GPL(rhashtable_walk_init); +EXPORT_SYMBOL_GPL(rhashtable_walk_enter); /** * rhashtable_walk_exit - Free an iterator @@ -541,10 +532,9 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_init); void rhashtable_walk_exit(struct rhashtable_iter *iter) { spin_lock(&iter->ht->lock); - if (iter->walker->tbl) - list_del(&iter->walker->list); + if (iter->walker.tbl) + list_del(&iter->walker.list); spin_unlock(&iter->ht->lock); - kfree(iter->walker); } EXPORT_SYMBOL_GPL(rhashtable_walk_exit); @@ -570,12 +560,12 @@ int rhashtable_walk_start(struct rhashtable_iter *iter) rcu_read_lock(); spin_lock(&ht->lock); - if (iter->walker->tbl) - list_del(&iter->walker->list); + if (iter->walker.tbl) + list_del(&iter->walker.list); spin_unlock(&ht->lock); - if (!iter->walker->tbl) { - iter->walker->tbl = rht_dereference_rcu(ht->tbl, ht); + if (!iter->walker.tbl) { + iter->walker.tbl = rht_dereference_rcu(ht->tbl, ht); return -EAGAIN; } @@ -597,7 +587,7 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_start); */ void *rhashtable_walk_next(struct rhashtable_iter *iter) { - struct bucket_table *tbl = iter->walker->tbl; + struct bucket_table *tbl = iter->walker.tbl; struct rhashtable *ht = iter->ht; struct rhash_head *p = iter->p; @@ -630,8 +620,8 @@ next: /* Ensure we see any new tables. */ smp_rmb(); - iter->walker->tbl = rht_dereference_rcu(tbl->future_tbl, ht); - if (iter->walker->tbl) { + iter->walker.tbl = rht_dereference_rcu(tbl->future_tbl, ht); + if (iter->walker.tbl) { iter->slot = 0; iter->skip = 0; return ERR_PTR(-EAGAIN); @@ -651,7 +641,7 @@ void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases(RCU) { struct rhashtable *ht; - struct bucket_table *tbl = iter->walker->tbl; + struct bucket_table *tbl = iter->walker.tbl; if (!tbl) goto out; @@ -660,9 +650,9 @@ void rhashtable_walk_stop(struct rhashtable_iter *iter) spin_lock(&ht->lock); if (tbl->rehash < tbl->size) - list_add(&iter->walker->list, &tbl->walkers); + list_add(&iter->walker.list, &tbl->walkers); else - iter->walker->tbl = NULL; + iter->walker.tbl = NULL; spin_unlock(&ht->lock); iter->p = NULL; From 39ec406d122be703ee51cb2754fe18b1f7037fa0 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 18 Aug 2016 16:50:57 +0800 Subject: [PATCH 0342/1715] MAINTAINERS: Add extra rhashtable maintainer As I'm working actively on rhashtable it helps if people CCed me when they work on in. Signed-off-by: Herbert Xu Acked-by: Thomas Graf Signed-off-by: David S. Miller --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index e902b635cbfc..99f95279e45b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9936,6 +9936,7 @@ F: net/rfkill/ RHASHTABLE M: Thomas Graf +M: Herbert Xu L: netdev@vger.kernel.org S: Maintained F: lib/rhashtable.c From ad202074320cd75b31b8cdb58cca0d4ef6aaea8a Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 19 Aug 2016 16:21:37 +0800 Subject: [PATCH 0343/1715] netlink: Use rhashtable walk interface in diag dump This patch converts the diag dumping code to use the rhashtable walk code instead of going through rhashtable by hand. The lock nl_table_lock is now only taken while we process the multicast list as it's not needed for the rhashtable walk. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/netlink/diag.c | 103 ++++++++++++++++++++++++++++++++------------- 1 file changed, 73 insertions(+), 30 deletions(-) diff --git a/net/netlink/diag.c b/net/netlink/diag.c index 8dd836a8dd60..3e3e2534478a 100644 --- a/net/netlink/diag.c +++ b/net/netlink/diag.c @@ -63,43 +63,75 @@ out_nlmsg_trim: static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, int protocol, int s_num) { + struct rhashtable_iter *hti = (void *)cb->args[2]; struct netlink_table *tbl = &nl_table[protocol]; - struct rhashtable *ht = &tbl->hash; - const struct bucket_table *htbl = rht_dereference_rcu(ht->tbl, ht); struct net *net = sock_net(skb->sk); struct netlink_diag_req *req; struct netlink_sock *nlsk; struct sock *sk; - int ret = 0, num = 0, i; + int num = 2; + int ret = 0; req = nlmsg_data(cb->nlh); - for (i = 0; i < htbl->size; i++) { - struct rhash_head *pos; + if (s_num > 1) + goto mc_list; - rht_for_each_entry_rcu(nlsk, pos, htbl, i, node) { - sk = (struct sock *)nlsk; + num--; - if (!net_eq(sock_net(sk), net)) - continue; - if (num < s_num) { - num++; + if (!hti) { + hti = kmalloc(sizeof(*hti), GFP_KERNEL); + if (!hti) + return -ENOMEM; + + cb->args[2] = (long)hti; + } + + if (!s_num) + rhashtable_walk_enter(&tbl->hash, hti); + + ret = rhashtable_walk_start(hti); + if (ret == -EAGAIN) + ret = 0; + if (ret) + goto stop; + + while ((nlsk = rhashtable_walk_next(hti))) { + if (IS_ERR(nlsk)) { + ret = PTR_ERR(nlsk); + if (ret == -EAGAIN) { + ret = 0; continue; } + break; + } - if (sk_diag_fill(sk, skb, req, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - NLM_F_MULTI, - sock_i_ino(sk)) < 0) { - ret = 1; - goto done; - } + sk = (struct sock *)nlsk; - num++; + if (!net_eq(sock_net(sk), net)) + continue; + + if (sk_diag_fill(sk, skb, req, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI, + sock_i_ino(sk)) < 0) { + ret = 1; + break; } } +stop: + rhashtable_walk_stop(hti); + if (ret) + goto done; + + rhashtable_walk_exit(hti); + cb->args[2] = 0; + num++; + +mc_list: + read_lock(&nl_table_lock); sk_for_each_bound(sk, &tbl->mc_list) { if (sk_hashed(sk)) continue; @@ -116,13 +148,14 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, NLM_F_MULTI, sock_i_ino(sk)) < 0) { ret = 1; - goto done; + break; } num++; } + read_unlock(&nl_table_lock); + done: cb->args[0] = num; - cb->args[1] = protocol; return ret; } @@ -131,20 +164,20 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct netlink_diag_req *req; int s_num = cb->args[0]; + int err = 0; req = nlmsg_data(cb->nlh); - rcu_read_lock(); - read_lock(&nl_table_lock); - if (req->sdiag_protocol == NDIAG_PROTO_ALL) { int i; for (i = cb->args[1]; i < MAX_LINKS; i++) { - if (__netlink_diag_dump(skb, cb, i, s_num)) + err = __netlink_diag_dump(skb, cb, i, s_num); + if (err) break; s_num = 0; } + cb->args[1] = i; } else { if (req->sdiag_protocol >= MAX_LINKS) { read_unlock(&nl_table_lock); @@ -152,13 +185,22 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) return -ENOENT; } - __netlink_diag_dump(skb, cb, req->sdiag_protocol, s_num); + err = __netlink_diag_dump(skb, cb, req->sdiag_protocol, s_num); } - read_unlock(&nl_table_lock); - rcu_read_unlock(); + return err < 0 ? err : skb->len; +} - return skb->len; +static int netlink_diag_dump_done(struct netlink_callback *cb) +{ + struct rhashtable_iter *hti = (void *)cb->args[2]; + + if (cb->args[0] == 1) + rhashtable_walk_exit(hti); + + kfree(hti); + + return 0; } static int netlink_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) @@ -172,6 +214,7 @@ static int netlink_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) if (h->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .dump = netlink_diag_dump, + .done = netlink_diag_dump_done, }; return netlink_dump_start(net->diag_nlsk, skb, h, &c); } else From e018068812e54c407da599513bf8ad2d99fd0eaf Mon Sep 17 00:00:00 2001 From: oulijun Date: Thu, 18 Aug 2016 20:32:52 +0800 Subject: [PATCH 0344/1715] net: hns: Add reset function support for RoCE driver It added reset function for RoCE driver. RoCE is a feature of hns. In hip06 SoC, in RoCE reset process, it's needed to configure dsaf channel reset, port and sl map info. Reset function of RoCE is located in dsaf module, we only call it in RoCE driver when needed. This patch is used to fix the conflict, please refer to this link: https://www.spinics.net/lists/linux-rdma/msg39114.html Signed-off-by: Wei Hu Signed-off-by: Nenglong Zhao Signed-off-by: Lijun Ou Signed-off-by: Sheng Li Reviewed-by: Yisen Zhuang Signed-off-by: David S. Miller --- .../ethernet/hisilicon/hns/hns_dsaf_main.c | 84 +++++++++++++++++++ .../ethernet/hisilicon/hns/hns_dsaf_main.h | 30 +++++++ .../ethernet/hisilicon/hns/hns_dsaf_misc.c | 36 ++++++++ .../net/ethernet/hisilicon/hns/hns_dsaf_reg.h | 12 +++ 4 files changed, 162 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c index afb5daa3721d..05bd19f9ebc5 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -2781,6 +2782,89 @@ static struct platform_driver g_dsaf_driver = { module_platform_driver(g_dsaf_driver); +/** + * hns_dsaf_roce_reset - reset dsaf and roce + * @dsaf_fwnode: Pointer to framework node for the dasf + * @enable: false - request reset , true - drop reset + * retuen 0 - success , negative -fail + */ +int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool enable) +{ + struct dsaf_device *dsaf_dev; + struct platform_device *pdev; + u32 mp; + u32 sl; + u32 credit; + int i; + const u32 port_map[DSAF_ROCE_CREDIT_CHN][DSAF_ROCE_CHAN_MODE_NUM] = { + {DSAF_ROCE_PORT_0, DSAF_ROCE_PORT_0, DSAF_ROCE_PORT_0}, + {DSAF_ROCE_PORT_1, DSAF_ROCE_PORT_0, DSAF_ROCE_PORT_0}, + {DSAF_ROCE_PORT_2, DSAF_ROCE_PORT_1, DSAF_ROCE_PORT_0}, + {DSAF_ROCE_PORT_3, DSAF_ROCE_PORT_1, DSAF_ROCE_PORT_0}, + {DSAF_ROCE_PORT_4, DSAF_ROCE_PORT_2, DSAF_ROCE_PORT_1}, + {DSAF_ROCE_PORT_4, DSAF_ROCE_PORT_2, DSAF_ROCE_PORT_1}, + {DSAF_ROCE_PORT_5, DSAF_ROCE_PORT_3, DSAF_ROCE_PORT_1}, + {DSAF_ROCE_PORT_5, DSAF_ROCE_PORT_3, DSAF_ROCE_PORT_1}, + }; + const u32 sl_map[DSAF_ROCE_CREDIT_CHN][DSAF_ROCE_CHAN_MODE_NUM] = { + {DSAF_ROCE_SL_0, DSAF_ROCE_SL_0, DSAF_ROCE_SL_0}, + {DSAF_ROCE_SL_0, DSAF_ROCE_SL_1, DSAF_ROCE_SL_1}, + {DSAF_ROCE_SL_0, DSAF_ROCE_SL_0, DSAF_ROCE_SL_2}, + {DSAF_ROCE_SL_0, DSAF_ROCE_SL_1, DSAF_ROCE_SL_3}, + {DSAF_ROCE_SL_0, DSAF_ROCE_SL_0, DSAF_ROCE_SL_0}, + {DSAF_ROCE_SL_1, DSAF_ROCE_SL_1, DSAF_ROCE_SL_1}, + {DSAF_ROCE_SL_0, DSAF_ROCE_SL_0, DSAF_ROCE_SL_2}, + {DSAF_ROCE_SL_1, DSAF_ROCE_SL_1, DSAF_ROCE_SL_3}, + }; + + if (!is_of_node(dsaf_fwnode)) { + pr_err("hisi_dsaf: Only support DT node!\n"); + return -EINVAL; + } + pdev = of_find_device_by_node(to_of_node(dsaf_fwnode)); + dsaf_dev = dev_get_drvdata(&pdev->dev); + if (AE_IS_VER1(dsaf_dev->dsaf_ver)) { + dev_err(dsaf_dev->dev, "%s v1 chip doesn't support RoCE!\n", + dsaf_dev->ae_dev.name); + return -ENODEV; + } + + if (!enable) { + /* Reset rocee-channels in dsaf and rocee */ + hns_dsaf_srst_chns(dsaf_dev, DSAF_CHNS_MASK, false); + hns_dsaf_roce_srst(dsaf_dev, false); + } else { + /* Configure dsaf tx roce correspond to port map and sl map */ + mp = dsaf_read_dev(dsaf_dev, DSAF_ROCE_PORT_MAP_REG); + for (i = 0; i < DSAF_ROCE_CREDIT_CHN; i++) + dsaf_set_field(mp, 7 << i * 3, i * 3, + port_map[i][DSAF_ROCE_6PORT_MODE]); + dsaf_set_field(mp, 3 << i * 3, i * 3, 0); + dsaf_write_dev(dsaf_dev, DSAF_ROCE_PORT_MAP_REG, mp); + + sl = dsaf_read_dev(dsaf_dev, DSAF_ROCE_SL_MAP_REG); + for (i = 0; i < DSAF_ROCE_CREDIT_CHN; i++) + dsaf_set_field(sl, 3 << i * 2, i * 2, + sl_map[i][DSAF_ROCE_6PORT_MODE]); + dsaf_write_dev(dsaf_dev, DSAF_ROCE_SL_MAP_REG, sl); + + /* De-reset rocee-channels in dsaf and rocee */ + hns_dsaf_srst_chns(dsaf_dev, DSAF_CHNS_MASK, true); + msleep(SRST_TIME_INTERVAL); + hns_dsaf_roce_srst(dsaf_dev, true); + + /* Eanble dsaf channel rocee credit */ + credit = dsaf_read_dev(dsaf_dev, DSAF_SBM_ROCEE_CFG_REG_REG); + dsaf_set_bit(credit, DSAF_SBM_ROCEE_CFG_CRD_EN_B, 0); + dsaf_write_dev(dsaf_dev, DSAF_SBM_ROCEE_CFG_REG_REG, credit); + + dsaf_set_bit(credit, DSAF_SBM_ROCEE_CFG_CRD_EN_B, 1); + dsaf_write_dev(dsaf_dev, DSAF_SBM_ROCEE_CFG_REG_REG, credit); + } + return 0; +} +EXPORT_SYMBOL(hns_dsaf_roce_reset); + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Huawei Tech. Co., Ltd."); MODULE_DESCRIPTION("HNS DSAF driver"); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h index 1daf018d9071..f3681d566ae6 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h @@ -43,6 +43,32 @@ struct hns_mac_cb; #define DSAF_PRIO_NR 8 #define DSAF_REG_PER_ZONE 3 +#define DSAF_ROCE_CREDIT_CHN 8 +#define DSAF_ROCE_CHAN_MODE 3 + +enum dsaf_roce_port_mode { + DSAF_ROCE_6PORT_MODE, + DSAF_ROCE_4PORT_MODE, + DSAF_ROCE_2PORT_MODE, + DSAF_ROCE_CHAN_MODE_NUM, +}; + +enum dsaf_roce_port_num { + DSAF_ROCE_PORT_0, + DSAF_ROCE_PORT_1, + DSAF_ROCE_PORT_2, + DSAF_ROCE_PORT_3, + DSAF_ROCE_PORT_4, + DSAF_ROCE_PORT_5, +}; + +enum dsaf_roce_qos_sl { + DSAF_ROCE_SL_0, + DSAF_ROCE_SL_1, + DSAF_ROCE_SL_2, + DSAF_ROCE_SL_3, +}; + #define DSAF_STATS_READ(p, offset) (*((u64 *)((u8 *)(p) + (offset)))) #define HNS_DSAF_IS_DEBUG(dev) (dev->dsaf_mode == DSAF_MODE_DISABLE_SP) @@ -419,6 +445,10 @@ int hns_dsaf_get_mac_entry_by_index( void hns_dsaf_fix_mac_mode(struct hns_mac_cb *mac_cb); +void hns_dsaf_srst_chns(struct dsaf_device *dsaf_dev, u32 msk, bool enable); + +void hns_dsaf_roce_srst(struct dsaf_device *dsaf_dev, bool enable); + int hns_dsaf_ae_init(struct dsaf_device *dsaf_dev); void hns_dsaf_ae_uninit(struct dsaf_device *dsaf_dev); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c index 611b67b6f450..36b9f791cf2f 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c @@ -231,6 +231,42 @@ static void hns_dsaf_xge_core_srst_by_port(struct dsaf_device *dsaf_dev, dsaf_write_sub(dsaf_dev, reg_addr, reg_val); } +/** + * hns_dsaf_srst_chns - reset dsaf channels + * @dsaf_dev: dsaf device struct pointer + * @msk: xbar channels mask value: + * bit0-5 for xge0-5 + * bit6-11 for ppe0-5 + * bit12-17 for roce0-5 + * bit18-19 for com/dfx + * @enable: false - request reset , true - drop reset + */ +void hns_dsaf_srst_chns(struct dsaf_device *dsaf_dev, u32 msk, bool enable) +{ + u32 reg_addr; + + if (!enable) + reg_addr = DSAF_SUB_SC_DSAF_RESET_REQ_REG; + else + reg_addr = DSAF_SUB_SC_DSAF_RESET_DREQ_REG; + + dsaf_write_sub(dsaf_dev, reg_addr, msk); +} + +void hns_dsaf_roce_srst(struct dsaf_device *dsaf_dev, bool enable) +{ + if (!enable) { + dsaf_write_sub(dsaf_dev, DSAF_SUB_SC_ROCEE_RESET_REQ_REG, 1); + } else { + dsaf_write_sub(dsaf_dev, + DSAF_SUB_SC_ROCEE_CLK_DIS_REG, 1); + dsaf_write_sub(dsaf_dev, + DSAF_SUB_SC_ROCEE_RESET_DREQ_REG, 1); + msleep(20); + dsaf_write_sub(dsaf_dev, DSAF_SUB_SC_ROCEE_CLK_EN_REG, 1); + } +} + static void hns_dsaf_xge_core_srst_by_port_acpi(struct dsaf_device *dsaf_dev, u32 port, bool dereset) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h index 235f74444b1d..13c16ab7be48 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h @@ -77,6 +77,12 @@ #define DSAF_SUB_SC_PPE_RESET_DREQ_REG 0xA4C #define DSAF_SUB_SC_RCB_PPE_COM_RESET_REQ_REG 0xA88 #define DSAF_SUB_SC_RCB_PPE_COM_RESET_DREQ_REG 0xA8C +#define DSAF_SUB_SC_DSAF_RESET_REQ_REG 0xAA8 +#define DSAF_SUB_SC_ROCEE_RESET_REQ_REG 0xA50 +#define DSAF_SUB_SC_DSAF_RESET_DREQ_REG 0xAAC +#define DSAF_SUB_SC_ROCEE_CLK_DIS_REG 0x32C +#define DSAF_SUB_SC_ROCEE_RESET_DREQ_REG 0xA54 +#define DSAF_SUB_SC_ROCEE_CLK_EN_REG 0x328 #define DSAF_SUB_SC_LIGHT_MODULE_DETECT_EN_REG 0x2060 #define DSAF_SUB_SC_TCAM_MBIST_EN_REG 0x2300 #define DSAF_SUB_SC_DSAF_CLK_ST_REG 0x5300 @@ -133,6 +139,8 @@ #define DSAF_ROCEE_INT_STS_0_REG 0x200 #define DSAFV2_SERDES_LBK_0_REG 0x220 #define DSAF_PAUSE_CFG_REG 0x240 +#define DSAF_ROCE_PORT_MAP_REG 0x2A0 +#define DSAF_ROCE_SL_MAP_REG 0x2A4 #define DSAF_PPE_QID_CFG_0_REG 0x300 #define DSAF_SW_PORT_TYPE_0_REG 0x320 #define DSAF_STP_PORT_TYPE_0_REG 0x340 @@ -178,6 +186,7 @@ #define DSAF_SBM_BP_CFG_2_XGE_REG_0_REG 0x200C #define DSAF_SBM_BP_CFG_2_PPE_REG_0_REG 0x230C #define DSAF_SBM_BP_CFG_2_ROCEE_REG_0_REG 0x260C +#define DSAF_SBM_ROCEE_CFG_REG_REG 0x2380 #define DSAFV2_SBM_BP_CFG_2_ROCEE_REG_0_REG 0x238C #define DSAF_SBM_FREE_CNT_0_0_REG 0x2010 #define DSAF_SBM_FREE_CNT_1_0_REG 0x2014 @@ -796,6 +805,9 @@ #define DSAFV2_SBM_CFG4_RESET_BUF_NUM_NO_PFC_S 9 #define DSAFV2_SBM_CFG4_RESET_BUF_NUM_NO_PFC_M (((1ULL << 9) - 1) << 9) +#define DSAF_CHNS_MASK 0x3f000 +#define DSAF_SBM_ROCEE_CFG_CRD_EN_B 2 +#define SRST_TIME_INTERVAL 20 #define DSAFV2_SBM_CFG2_ROCEE_SET_BUF_NUM_S 0 #define DSAFV2_SBM_CFG2_ROCEE_SET_BUF_NUM_M (((1ULL << 8) - 1) << 0) #define DSAFV2_SBM_CFG2_ROCEE_RESET_BUF_NUM_S 8 From dfad09a6da60d6426b1193029089ef008891f007 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Thu, 18 Aug 2016 11:37:51 -0500 Subject: [PATCH 0345/1715] ibmvnic: Handle backing device failover and reinitialization An upcoming feature of IBM VNIC protocol is the ability to configure redundant backing devices for a VNIC client. In case of a failure on the current backing device, the driver will receive a signal from the hypervisor indicating that a failover will occur. The driver will then wait for a message from the backing device before establishing a new connection. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 34 ++++++++++++++++++++++++++++-- drivers/net/ethernet/ibm/ibmvnic.h | 2 ++ 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 88f3c85fb04a..b942108c85c1 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -203,7 +203,8 @@ static void free_long_term_buff(struct ibmvnic_adapter *adapter, struct device *dev = &adapter->vdev->dev; dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr); - send_request_unmap(adapter, ltb->map_id); + if (!adapter->failover) + send_request_unmap(adapter, ltb->map_id); } static int alloc_rx_pool(struct ibmvnic_adapter *adapter, @@ -522,7 +523,8 @@ static int ibmvnic_close(struct net_device *netdev) for (i = 0; i < adapter->req_rx_queues; i++) napi_disable(&adapter->napi[i]); - netif_tx_stop_all_queues(netdev); + if (!adapter->failover) + netif_tx_stop_all_queues(netdev); if (adapter->bounce_buffer) { if (!dma_mapping_error(dev, adapter->bounce_buffer_dma)) { @@ -3280,6 +3282,10 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq, rc = ibmvnic_send_crq_init(adapter); if (rc) dev_err(dev, "Error sending init rc=%ld\n", rc); + } else if (gen_crq->cmd == IBMVNIC_DEVICE_FAILOVER) { + dev_info(dev, "Backing device failover detected\n"); + netif_carrier_off(netdev); + adapter->failover = true; } else { /* The adapter lost the connection */ dev_err(dev, "Virtual Adapter failed (rc=%d)\n", @@ -3615,8 +3621,18 @@ static void handle_crq_init_rsp(struct work_struct *work) struct device *dev = &adapter->vdev->dev; struct net_device *netdev = adapter->netdev; unsigned long timeout = msecs_to_jiffies(30000); + bool restart = false; int rc; + if (adapter->failover) { + release_sub_crqs(adapter); + if (netif_running(netdev)) { + netif_tx_disable(netdev); + ibmvnic_close(netdev); + restart = true; + } + } + send_version_xchg(adapter); reinit_completion(&adapter->init_done); if (!wait_for_completion_timeout(&adapter->init_done, timeout)) { @@ -3645,6 +3661,17 @@ static void handle_crq_init_rsp(struct work_struct *work) netdev->real_num_tx_queues = adapter->req_tx_queues; + if (adapter->failover) { + adapter->failover = false; + if (restart) { + rc = ibmvnic_open(netdev); + if (rc) + goto restart_failed; + } + netif_carrier_on(netdev); + return; + } + rc = register_netdev(netdev); if (rc) { dev_err(dev, @@ -3655,6 +3682,8 @@ static void handle_crq_init_rsp(struct work_struct *work) return; +restart_failed: + dev_err(dev, "Failed to restart ibmvnic, rc=%d\n", rc); register_failed: release_sub_crqs(adapter); task_failed: @@ -3692,6 +3721,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) dev_set_drvdata(&dev->dev, netdev); adapter->vdev = dev; adapter->netdev = netdev; + adapter->failover = false; ether_addr_copy(adapter->mac_addr, mac_addr_p); ether_addr_copy(netdev->dev_addr, adapter->mac_addr); diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index e82898fd518e..bfc84c7d0e11 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -830,6 +830,7 @@ enum ibmvfc_crq_format { IBMVNIC_CRQ_INIT = 0x01, IBMVNIC_CRQ_INIT_COMPLETE = 0x02, IBMVNIC_PARTITION_MIGRATED = 0x06, + IBMVNIC_DEVICE_FAILOVER = 0x08, }; struct ibmvnic_crq_queue { @@ -1047,4 +1048,5 @@ struct ibmvnic_adapter { u8 map_id; struct work_struct vnic_crq_init; + bool failover; }; From b6c6b645d29158acb41d6fb2031b289d4f0fc936 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 18 Aug 2016 09:49:55 -0700 Subject: [PATCH 0346/1715] tcp: md5: remove tcp_md5_hash_header() After commit 19689e38eca5 ("tcp: md5: use kmalloc() backed scratch areas") this function is no longer used. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 032a96d78c99..f1a9a0a8a1f3 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3092,23 +3092,6 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) } EXPORT_SYMBOL(tcp_get_md5sig_pool); -int tcp_md5_hash_header(struct tcp_md5sig_pool *hp, - const struct tcphdr *th) -{ - struct scatterlist sg; - struct tcphdr hdr; - - /* We are not allowed to change tcphdr, make a local copy */ - memcpy(&hdr, th, sizeof(hdr)); - hdr.check = 0; - - /* options aren't included in the hash */ - sg_init_one(&sg, &hdr, sizeof(hdr)); - ahash_request_set_crypt(hp->md5_req, &sg, NULL, sizeof(hdr)); - return crypto_ahash_update(hp->md5_req); -} -EXPORT_SYMBOL(tcp_md5_hash_header); - int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, const struct sk_buff *skb, unsigned int header_len) { From 217375a0c677d2e885cb05f767af34467399b424 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 18 Aug 2016 09:59:12 -0700 Subject: [PATCH 0347/1715] udp: include addrconf.h Include ipv4_rcv_saddr_equal() definition to avoid this sparse error : net/ipv4/udp.c:362:5: warning: symbol 'ipv4_rcv_saddr_equal' was not declared. Should it be static? Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/udp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index e61f7cd65d08..8f5f7f6026f7 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -114,6 +114,7 @@ #include #include "udp_impl.h" #include +#include struct udp_table udp_table __read_mostly; EXPORT_SYMBOL(udp_table); From d985d15151c0e9d3add8050da77d8f507f6cc7f5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 18 Aug 2016 10:19:34 -0700 Subject: [PATCH 0348/1715] net: ipv4: fix sparse error in fib_good_nh() Fixes following sparse errors : net/ipv4/fib_semantics.c:1579:61: warning: incorrect type in argument 2 (different base types) net/ipv4/fib_semantics.c:1579:61: expected unsigned int [unsigned] [usertype] key net/ipv4/fib_semantics.c:1579:61: got restricted __be32 const [usertype] nh_gw Fixes: a6db4494d218c ("net: ipv4: Consider failed nexthops in multipath routes") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/fib_semantics.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 539fa264e67d..8066ccc48a17 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1576,7 +1576,8 @@ static bool fib_good_nh(const struct fib_nh *nh) rcu_read_lock_bh(); - n = __ipv4_neigh_lookup_noref(nh->nh_dev, nh->nh_gw); + n = __ipv4_neigh_lookup_noref(nh->nh_dev, + (__force u32)nh->nh_gw); if (n) state = n->nud_state; From d75b4a22b255471906e794fe988b18de158cedec Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Thu, 18 Aug 2016 23:56:05 +0200 Subject: [PATCH 0349/1715] net: phy: Sort Makefile and Kconfig Sort the files to reduce merge conflicts and to make it easier to find drivers by name. Also separate the MDIO bus drivers from the PHY drivers, again to help find what you need. Signed-off-by: Andrew Lunn Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/Kconfig | 459 ++++++++++++++++++++------------------- drivers/net/phy/Makefile | 78 +++---- 2 files changed, 270 insertions(+), 267 deletions(-) diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index d66133bf3eb5..786e8b0b2d06 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -15,159 +15,24 @@ if PHYLIB config SWPHY bool -comment "MII PHY device drivers" +comment "MDIO bus device drivers" -config AQUANTIA_PHY - tristate "Drivers for the Aquantia PHYs" - ---help--- - Currently supports the Aquantia AQ1202, AQ2104, AQR105, AQR405 - -config AT803X_PHY - tristate "Drivers for Atheros AT803X PHYs" - ---help--- - Currently supports the AT8030 and AT8035 model - -config AMD_PHY - tristate "Drivers for the AMD PHYs" - ---help--- - Currently supports the am79c874 - -config MARVELL_PHY - tristate "Drivers for Marvell PHYs" - ---help--- - Currently has a driver for the 88E1011S - -config DAVICOM_PHY - tristate "Drivers for Davicom PHYs" - ---help--- - Currently supports dm9161e and dm9131 - -config QSEMI_PHY - tristate "Drivers for Quality Semiconductor PHYs" - ---help--- - Currently supports the qs6612 - -config LXT_PHY - tristate "Drivers for the Intel LXT PHYs" - ---help--- - Currently supports the lxt970, lxt971 - -config CICADA_PHY - tristate "Drivers for the Cicada PHYs" - ---help--- - Currently supports the cis8204 - -config VITESSE_PHY - tristate "Drivers for the Vitesse PHYs" - ---help--- - Currently supports the vsc8244 - -config TERANETICS_PHY - tristate "Drivers for the Teranetics PHYs" - ---help--- - Currently supports the Teranetics TN2020 - -config SMSC_PHY - tristate "Drivers for SMSC PHYs" - ---help--- - Currently supports the LAN83C185, LAN8187 and LAN8700 PHYs - -config BCM_NET_PHYLIB - tristate - -config BROADCOM_PHY - tristate "Drivers for Broadcom PHYs" - select BCM_NET_PHYLIB - ---help--- - Currently supports the BCM5411, BCM5421, BCM5461, BCM54616S, BCM5464, - BCM5481 and BCM5482 PHYs. - -config BCM_CYGNUS_PHY - tristate "Drivers for Broadcom Cygnus SoC internal PHY" - depends on ARCH_BCM_CYGNUS || COMPILE_TEST - depends on MDIO_BCM_IPROC - select BCM_NET_PHYLIB - ---help--- - This PHY driver is for the 1G internal PHYs of the Broadcom - Cygnus Family SoC. - - Currently supports internal PHY's used in the BCM11300, - BCM11320, BCM11350, BCM11360, BCM58300, BCM58302, - BCM58303 & BCM58305 Broadcom Cygnus SoCs. - -config BCM63XX_PHY - tristate "Drivers for Broadcom 63xx SOCs internal PHY" - depends on BCM63XX - select BCM_NET_PHYLIB - ---help--- - Currently supports the 6348 and 6358 PHYs. - -config BCM7XXX_PHY - tristate "Drivers for Broadcom 7xxx SOCs internal PHYs" - select BCM_NET_PHYLIB - ---help--- - Currently supports the BCM7366, BCM7439, BCM7445, and - 40nm and 65nm generation of BCM7xxx Set Top Box SoCs. - -config BCM87XX_PHY - tristate "Driver for Broadcom BCM8706 and BCM8727 PHYs" +config MDIO_BCM_IPROC + tristate "Broadcom iProc MDIO bus controller" + depends on ARCH_BCM_IPROC || COMPILE_TEST + depends on HAS_IOMEM && OF_MDIO help - Currently supports the BCM8706 and BCM8727 10G Ethernet PHYs. + This module provides a driver for the MDIO busses found in the + Broadcom iProc SoC's. -config ICPLUS_PHY - tristate "Drivers for ICPlus PHYs" - ---help--- - Currently supports the IP175C and IP1001 PHYs. - -config REALTEK_PHY - tristate "Drivers for Realtek PHYs" - ---help--- - Supports the Realtek 821x PHY. - -config NATIONAL_PHY - tristate "Drivers for National Semiconductor PHYs" - ---help--- - Currently supports the DP83865 PHY. - -config STE10XP - tristate "Driver for STMicroelectronics STe10Xp PHYs" - ---help--- - This is the driver for the STe100p and STe101p PHYs. - -config LSI_ET1011C_PHY - tristate "Driver for LSI ET1011C PHY" - ---help--- - Supports the LSI ET1011C PHY. - -config MICREL_PHY - tristate "Driver for Micrel PHYs" - ---help--- - Supports the KSZ9021, VSC8201, KS8001 PHYs. - -config DP83848_PHY - tristate "Driver for Texas Instruments DP83848 PHY" - ---help--- - Supports the DP83848 PHY. - -config DP83867_PHY - tristate "Drivers for Texas Instruments DP83867 Gigabit PHY" - ---help--- - Currently supports the DP83867 PHY. - -config MICROCHIP_PHY - tristate "Drivers for Microchip PHYs" +config MDIO_BCM_UNIMAC + tristate "Broadcom UniMAC MDIO bus controller" + depends on HAS_IOMEM help - Supports the LAN88XX PHYs. - -config FIXED_PHY - tristate "Driver for MDIO Bus/PHY emulation with fixed speed/link PHYs" - depends on PHYLIB - select SWPHY - ---help--- - Adds the platform "fixed" MDIO Bus to cover the boards that use - PHYs that are not connected to the real MDIO bus. - - Currently tested with mpc866ads and mpc8349e-mitx. + This module provides a driver for the Broadcom UniMAC MDIO busses. + This hardware can be found in the Broadcom GENET Ethernet MAC + controllers as well as some Broadcom Ethernet switches such as the + Starfighter 2 switches. config MDIO_BITBANG tristate "Support for bitbanged MDIO buses" @@ -178,54 +43,6 @@ config MDIO_BITBANG If in doubt, say N. -config MDIO_GPIO - tristate "Support for GPIO lib-based bitbanged MDIO buses" - depends on MDIO_BITBANG && GPIOLIB - ---help--- - Supports GPIO lib-based MDIO busses. - - To compile this driver as a module, choose M here: the module - will be called mdio-gpio. - -config MDIO_CAVIUM - tristate - -config MDIO_OCTEON - tristate "Support for MDIO buses on Octeon and some ThunderX SOCs" - depends on 64BIT - depends on HAS_IOMEM - select MDIO_CAVIUM - help - This module provides a driver for the Octeon and ThunderX MDIO - buses. It is required by the Octeon and ThunderX ethernet device - drivers on some systems. - -config MDIO_THUNDER - tristate "Support for MDIO buses on ThunderX SOCs" - depends on 64BIT - depends on PCI - select MDIO_CAVIUM - help - This driver supports the MDIO interfaces found on Cavium - ThunderX SoCs when the MDIO bus device appears as a PCI - device. - - -config MDIO_SUN4I - tristate "Allwinner sun4i MDIO interface support" - depends on ARCH_SUNXI - help - This driver supports the MDIO interface found in the network - interface units of the Allwinner SoC that have an EMAC (A10, - A12, A10s, etc.) - -config MDIO_MOXART - tristate "MOXA ART MDIO interface support" - depends on ARCH_MOXART - help - This driver supports the MDIO interface found in the network - interface units of the MOXA ART SoC - config MDIO_BUS_MUX tristate depends on OF_MDIO @@ -235,6 +52,17 @@ config MDIO_BUS_MUX to a parent bus. Switching between child busses is done by device specific drivers. +config MDIO_BUS_MUX_BCM_IPROC + tristate "Support for iProc based MDIO bus multiplexers" + depends on OF && OF_MDIO && (ARCH_BCM_IPROC || COMPILE_TEST) + select MDIO_BUS_MUX + default ARCH_BCM_IPROC + help + This module provides a driver for MDIO bus multiplexers found in + iProc based Broadcom SoCs. This multiplexer connects one of several + child MDIO bus to a parent bus. Buses could be internal as well as + external and selection logic lies inside the same multiplexer. + config MDIO_BUS_MUX_GPIO tristate "Support for GPIO controlled MDIO bus multiplexers" depends on OF_GPIO && OF_MDIO @@ -258,33 +86,159 @@ config MDIO_BUS_MUX_MMIOREG Currently, only 8-bit registers are supported. -config MDIO_BUS_MUX_BCM_IPROC - tristate "Support for iProc based MDIO bus multiplexers" - depends on OF && OF_MDIO && (ARCH_BCM_IPROC || COMPILE_TEST) - select MDIO_BUS_MUX - default ARCH_BCM_IPROC - help - This module provides a driver for MDIO bus multiplexers found in - iProc based Broadcom SoCs. This multiplexer connects one of several - child MDIO bus to a parent bus. Buses could be internal as well as - external and selection logic lies inside the same multiplexer. +config MDIO_CAVIUM + tristate -config MDIO_BCM_UNIMAC - tristate "Broadcom UniMAC MDIO bus controller" - depends on HAS_IOMEM - help - This module provides a driver for the Broadcom UniMAC MDIO busses. - This hardware can be found in the Broadcom GENET Ethernet MAC - controllers as well as some Broadcom Ethernet switches such as the - Starfighter 2 switches. +config MDIO_GPIO + tristate "Support for GPIO lib-based bitbanged MDIO buses" + depends on MDIO_BITBANG && GPIOLIB + ---help--- + Supports GPIO lib-based MDIO busses. -config MDIO_BCM_IPROC - tristate "Broadcom iProc MDIO bus controller" - depends on ARCH_BCM_IPROC || COMPILE_TEST + To compile this driver as a module, choose M here: the module + will be called mdio-gpio. + +config MDIO_HISI_FEMAC + tristate "Hisilicon FEMAC MDIO bus controller" depends on HAS_IOMEM && OF_MDIO help This module provides a driver for the MDIO busses found in the - Broadcom iProc SoC's. + Hisilicon SoC that have an Fast Ethernet MAC. + +config MDIO_MOXART + tristate "MOXA ART MDIO interface support" + depends on ARCH_MOXART + help + This driver supports the MDIO interface found in the network + interface units of the MOXA ART SoC + +config MDIO_OCTEON + tristate "Support for MDIO buses on Octeon and some ThunderX SOCs" + depends on 64BIT + depends on HAS_IOMEM + select MDIO_CAVIUM + help + This module provides a driver for the Octeon and ThunderX MDIO + buses. It is required by the Octeon and ThunderX ethernet device + drivers on some systems. + +config MDIO_SUN4I + tristate "Allwinner sun4i MDIO interface support" + depends on ARCH_SUNXI + help + This driver supports the MDIO interface found in the network + interface units of the Allwinner SoC that have an EMAC (A10, + A12, A10s, etc.) + +config MDIO_THUNDER + tristate "Support for MDIO buses on ThunderX SOCs" + depends on 64BIT + depends on PCI + select MDIO_CAVIUM + help + This driver supports the MDIO interfaces found on Cavium + ThunderX SoCs when the MDIO bus device appears as a PCI + device. + +config MDIO_XGENE + tristate "APM X-Gene SoC MDIO bus controller" + help + This module provides a driver for the MDIO busses found in the + APM X-Gene SoC's. + +comment "MII PHY device drivers" + +config AMD_PHY + tristate "Drivers for the AMD PHYs" + ---help--- + Currently supports the am79c874 + +config AQUANTIA_PHY + tristate "Drivers for the Aquantia PHYs" + ---help--- + Currently supports the Aquantia AQ1202, AQ2104, AQR105, AQR405 + +config AT803X_PHY + tristate "Drivers for Atheros AT803X PHYs" + ---help--- + Currently supports the AT8030 and AT8035 model + +config BCM63XX_PHY + tristate "Drivers for Broadcom 63xx SOCs internal PHY" + depends on BCM63XX + select BCM_NET_PHYLIB + ---help--- + Currently supports the 6348 and 6358 PHYs. + +config BCM7XXX_PHY + tristate "Drivers for Broadcom 7xxx SOCs internal PHYs" + select BCM_NET_PHYLIB + ---help--- + Currently supports the BCM7366, BCM7439, BCM7445, and + 40nm and 65nm generation of BCM7xxx Set Top Box SoCs. + +config BCM87XX_PHY + tristate "Driver for Broadcom BCM8706 and BCM8727 PHYs" + help + Currently supports the BCM8706 and BCM8727 10G Ethernet PHYs. + +config BCM_CYGNUS_PHY + tristate "Drivers for Broadcom Cygnus SoC internal PHY" + depends on ARCH_BCM_CYGNUS || COMPILE_TEST + depends on MDIO_BCM_IPROC + select BCM_NET_PHYLIB + ---help--- + This PHY driver is for the 1G internal PHYs of the Broadcom + Cygnus Family SoC. + + Currently supports internal PHY's used in the BCM11300, + BCM11320, BCM11350, BCM11360, BCM58300, BCM58302, + BCM58303 & BCM58305 Broadcom Cygnus SoCs. + +config BCM_NET_PHYLIB + tristate + +config BROADCOM_PHY + tristate "Drivers for Broadcom PHYs" + select BCM_NET_PHYLIB + ---help--- + Currently supports the BCM5411, BCM5421, BCM5461, BCM54616S, BCM5464, + BCM5481 and BCM5482 PHYs. + +config CICADA_PHY + tristate "Drivers for the Cicada PHYs" + ---help--- + Currently supports the cis8204 + +config DAVICOM_PHY + tristate "Drivers for Davicom PHYs" + ---help--- + Currently supports dm9161e and dm9131 + +config DP83848_PHY + tristate "Driver for Texas Instruments DP83848 PHY" + ---help--- + Supports the DP83848 PHY. + +config DP83867_PHY + tristate "Drivers for Texas Instruments DP83867 Gigabit PHY" + ---help--- + Currently supports the DP83867 PHY. + +config FIXED_PHY + tristate "Driver for MDIO Bus/PHY emulation with fixed speed/link PHYs" + depends on PHYLIB + select SWPHY + ---help--- + Adds the platform "fixed" MDIO Bus to cover the boards that use + PHYs that are not connected to the real MDIO bus. + + Currently tested with mpc866ads and mpc8349e-mitx. + +config ICPLUS_PHY + tristate "Drivers for ICPlus PHYs" + ---help--- + Currently supports the IP175C and IP1001 PHYs. config INTEL_XWAY_PHY tristate "Driver for Intel XWAY PHYs" @@ -294,24 +248,71 @@ config INTEL_XWAY_PHY PEF 7061, PEF 7071 and PEF 7072 or integrated into the Intel SoCs xRX200, xRX300, xRX330, xRX350 and xRX550. -config MDIO_HISI_FEMAC - tristate "Hisilicon FEMAC MDIO bus controller" - depends on HAS_IOMEM && OF_MDIO - help - This module provides a driver for the MDIO busses found in the - Hisilicon SoC that have an Fast Ethernet MAC. +config LSI_ET1011C_PHY + tristate "Driver for LSI ET1011C PHY" + ---help--- + Supports the LSI ET1011C PHY. -config MDIO_XGENE - tristate "APM X-Gene SoC MDIO bus controller" +config LXT_PHY + tristate "Drivers for the Intel LXT PHYs" + ---help--- + Currently supports the lxt970, lxt971 + +config MARVELL_PHY + tristate "Drivers for Marvell PHYs" + ---help--- + Currently has a driver for the 88E1011S + +config MICREL_PHY + tristate "Driver for Micrel PHYs" + ---help--- + Supports the KSZ9021, VSC8201, KS8001 PHYs. + +config MICROCHIP_PHY + tristate "Drivers for Microchip PHYs" help - This module provides a driver for the MDIO busses found in the - APM X-Gene SoC's. + Supports the LAN88XX PHYs. config MICROSEMI_PHY tristate "Drivers for the Microsemi PHYs" ---help--- Currently supports the VSC8531 and VSC8541 PHYs +config NATIONAL_PHY + tristate "Drivers for National Semiconductor PHYs" + ---help--- + Currently supports the DP83865 PHY. + +config QSEMI_PHY + tristate "Drivers for Quality Semiconductor PHYs" + ---help--- + Currently supports the qs6612 + +config REALTEK_PHY + tristate "Drivers for Realtek PHYs" + ---help--- + Supports the Realtek 821x PHY. + +config SMSC_PHY + tristate "Drivers for SMSC PHYs" + ---help--- + Currently supports the LAN83C185, LAN8187 and LAN8700 PHYs + +config STE10XP + tristate "Driver for STMicroelectronics STe10Xp PHYs" + ---help--- + This is the driver for the STe100p and STe101p PHYs. + +config TERANETICS_PHY + tristate "Drivers for the Teranetics PHYs" + ---help--- + Currently supports the Teranetics TN2020 + +config VITESSE_PHY + tristate "Drivers for the Vitesse PHYs" + ---help--- + Currently supports the vsc8244 + config XILINX_GMII2RGMII tristate "Xilinx GMII2RGMII converter driver" ---help--- diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile index 73d65ce04454..e58667d111e7 100644 --- a/drivers/net/phy/Makefile +++ b/drivers/net/phy/Makefile @@ -1,53 +1,55 @@ -# Makefile for Linux PHY drivers +# Makefile for Linux PHY drivers and MDIO bus drivers libphy-y := phy.o phy_device.o mdio_bus.o mdio_device.o libphy-$(CONFIG_SWPHY) += swphy.o obj-$(CONFIG_PHYLIB) += libphy.o + +obj-$(CONFIG_MDIO_BCM_IPROC) += mdio-bcm-iproc.o +obj-$(CONFIG_MDIO_BCM_UNIMAC) += mdio-bcm-unimac.o +obj-$(CONFIG_MDIO_BITBANG) += mdio-bitbang.o +obj-$(CONFIG_MDIO_BUS_MUX) += mdio-mux.o +obj-$(CONFIG_MDIO_BUS_MUX_BCM_IPROC) += mdio-mux-bcm-iproc.o +obj-$(CONFIG_MDIO_BUS_MUX_GPIO) += mdio-mux-gpio.o +obj-$(CONFIG_MDIO_BUS_MUX_MMIOREG) += mdio-mux-mmioreg.o +obj-$(CONFIG_MDIO_CAVIUM) += mdio-cavium.o +obj-$(CONFIG_MDIO_GPIO) += mdio-gpio.o +obj-$(CONFIG_MDIO_HISI_FEMAC) += mdio-hisi-femac.o +obj-$(CONFIG_MDIO_MOXART) += mdio-moxart.o +obj-$(CONFIG_MDIO_OCTEON) += mdio-octeon.o +obj-$(CONFIG_MDIO_SUN4I) += mdio-sun4i.o +obj-$(CONFIG_MDIO_THUNDER) += mdio-thunder.o +obj-$(CONFIG_MDIO_XGENE) += mdio-xgene.o + +obj-$(CONFIG_AMD_PHY) += amd.o obj-$(CONFIG_AQUANTIA_PHY) += aquantia.o -obj-$(CONFIG_MARVELL_PHY) += marvell.o -obj-$(CONFIG_DAVICOM_PHY) += davicom.o -obj-$(CONFIG_CICADA_PHY) += cicada.o -obj-$(CONFIG_LXT_PHY) += lxt.o -obj-$(CONFIG_QSEMI_PHY) += qsemi.o -obj-$(CONFIG_SMSC_PHY) += smsc.o -obj-$(CONFIG_MICROSEMI_PHY) += mscc.o -obj-$(CONFIG_TERANETICS_PHY) += teranetics.o -obj-$(CONFIG_VITESSE_PHY) += vitesse.o -obj-$(CONFIG_BCM_NET_PHYLIB) += bcm-phy-lib.o -obj-$(CONFIG_BROADCOM_PHY) += broadcom.o +obj-$(CONFIG_AT803X_PHY) += at803x.o obj-$(CONFIG_BCM63XX_PHY) += bcm63xx.o obj-$(CONFIG_BCM7XXX_PHY) += bcm7xxx.o obj-$(CONFIG_BCM87XX_PHY) += bcm87xx.o obj-$(CONFIG_BCM_CYGNUS_PHY) += bcm-cygnus.o -obj-$(CONFIG_ICPLUS_PHY) += icplus.o -obj-$(CONFIG_REALTEK_PHY) += realtek.o -obj-$(CONFIG_LSI_ET1011C_PHY) += et1011c.o -obj-$(CONFIG_FIXED_PHY) += fixed_phy.o -obj-$(CONFIG_MDIO_BITBANG) += mdio-bitbang.o -obj-$(CONFIG_MDIO_GPIO) += mdio-gpio.o -obj-$(CONFIG_NATIONAL_PHY) += national.o +obj-$(CONFIG_BCM_NET_PHYLIB) += bcm-phy-lib.o +obj-$(CONFIG_BROADCOM_PHY) += broadcom.o +obj-$(CONFIG_CICADA_PHY) += cicada.o +obj-$(CONFIG_DAVICOM_PHY) += davicom.o obj-$(CONFIG_DP83640_PHY) += dp83640.o obj-$(CONFIG_DP83848_PHY) += dp83848.o obj-$(CONFIG_DP83867_PHY) += dp83867.o -obj-$(CONFIG_STE10XP) += ste10Xp.o -obj-$(CONFIG_MICREL_PHY) += micrel.o -obj-$(CONFIG_MDIO_OCTEON) += mdio-octeon.o -obj-$(CONFIG_MDIO_THUNDER) += mdio-thunder.o -obj-$(CONFIG_MDIO_CAVIUM) += mdio-cavium.o -obj-$(CONFIG_MICREL_KS8995MA) += spi_ks8995.o -obj-$(CONFIG_AT803X_PHY) += at803x.o -obj-$(CONFIG_AMD_PHY) += amd.o -obj-$(CONFIG_MDIO_BUS_MUX) += mdio-mux.o -obj-$(CONFIG_MDIO_BUS_MUX_GPIO) += mdio-mux-gpio.o -obj-$(CONFIG_MDIO_BUS_MUX_MMIOREG) += mdio-mux-mmioreg.o -obj-$(CONFIG_MDIO_BUS_MUX_BCM_IPROC) += mdio-mux-bcm-iproc.o -obj-$(CONFIG_MDIO_SUN4I) += mdio-sun4i.o -obj-$(CONFIG_MDIO_MOXART) += mdio-moxart.o -obj-$(CONFIG_MDIO_BCM_UNIMAC) += mdio-bcm-unimac.o -obj-$(CONFIG_MICROCHIP_PHY) += microchip.o -obj-$(CONFIG_MDIO_BCM_IPROC) += mdio-bcm-iproc.o +obj-$(CONFIG_FIXED_PHY) += fixed_phy.o +obj-$(CONFIG_ICPLUS_PHY) += icplus.o obj-$(CONFIG_INTEL_XWAY_PHY) += intel-xway.o -obj-$(CONFIG_MDIO_HISI_FEMAC) += mdio-hisi-femac.o -obj-$(CONFIG_MDIO_XGENE) += mdio-xgene.o +obj-$(CONFIG_LSI_ET1011C_PHY) += et1011c.o +obj-$(CONFIG_LXT_PHY) += lxt.o +obj-$(CONFIG_MARVELL_PHY) += marvell.o +obj-$(CONFIG_MICREL_KS8995MA) += spi_ks8995.o +obj-$(CONFIG_MICREL_PHY) += micrel.o +obj-$(CONFIG_MICROCHIP_PHY) += microchip.o +obj-$(CONFIG_MICROSEMI_PHY) += mscc.o +obj-$(CONFIG_NATIONAL_PHY) += national.o +obj-$(CONFIG_QSEMI_PHY) += qsemi.o +obj-$(CONFIG_REALTEK_PHY) += realtek.o +obj-$(CONFIG_SMSC_PHY) += smsc.o +obj-$(CONFIG_STE10XP) += ste10Xp.o +obj-$(CONFIG_TERANETICS_PHY) += teranetics.o +obj-$(CONFIG_VITESSE_PHY) += vitesse.o obj-$(CONFIG_XILINX_GMII2RGMII) += xilinx_gmii2rgmii.o From 97c84389ac0f3148ae340d2dda17d00c0131599f Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Thu, 18 Aug 2016 23:56:06 +0200 Subject: [PATCH 0350/1715] net: phy: Kconfig: Remove redundant "Support for" Remove the redundant "Support for" and "Drivers for" from the Kconfig short description. This makes the manufacture much more prominent in the list and makes the shortcut keys useful. Signed-off-by: Andrew Lunn Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/Kconfig | 70 ++++++++++++++++++++--------------------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 786e8b0b2d06..1c3e07c3d0b8 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -35,7 +35,7 @@ config MDIO_BCM_UNIMAC Starfighter 2 switches. config MDIO_BITBANG - tristate "Support for bitbanged MDIO buses" + tristate "Bitbanged MDIO buses" help This module implements the MDIO bus protocol in software, for use by low level drivers that export the ability to @@ -53,7 +53,7 @@ config MDIO_BUS_MUX device specific drivers. config MDIO_BUS_MUX_BCM_IPROC - tristate "Support for iProc based MDIO bus multiplexers" + tristate "Broadcom iProc based MDIO bus multiplexers" depends on OF && OF_MDIO && (ARCH_BCM_IPROC || COMPILE_TEST) select MDIO_BUS_MUX default ARCH_BCM_IPROC @@ -64,7 +64,7 @@ config MDIO_BUS_MUX_BCM_IPROC external and selection logic lies inside the same multiplexer. config MDIO_BUS_MUX_GPIO - tristate "Support for GPIO controlled MDIO bus multiplexers" + tristate "GPIO controlled MDIO bus multiplexers" depends on OF_GPIO && OF_MDIO select MDIO_BUS_MUX help @@ -74,7 +74,7 @@ config MDIO_BUS_MUX_GPIO selection is under the control of GPIO lines. config MDIO_BUS_MUX_MMIOREG - tristate "Support for MMIO device-controlled MDIO bus multiplexers" + tristate "MMIO device-controlled MDIO bus multiplexers" depends on OF_MDIO && HAS_IOMEM select MDIO_BUS_MUX help @@ -90,7 +90,7 @@ config MDIO_CAVIUM tristate config MDIO_GPIO - tristate "Support for GPIO lib-based bitbanged MDIO buses" + tristate "GPIO lib-based bitbanged MDIO buses" depends on MDIO_BITBANG && GPIOLIB ---help--- Supports GPIO lib-based MDIO busses. @@ -113,7 +113,7 @@ config MDIO_MOXART interface units of the MOXA ART SoC config MDIO_OCTEON - tristate "Support for MDIO buses on Octeon and some ThunderX SOCs" + tristate "Octeon and some ThunderX SOCs MDIO buses" depends on 64BIT depends on HAS_IOMEM select MDIO_CAVIUM @@ -131,7 +131,7 @@ config MDIO_SUN4I A12, A10s, etc.) config MDIO_THUNDER - tristate "Support for MDIO buses on ThunderX SOCs" + tristate "ThunderX SOCs MDIO buses" depends on 64BIT depends on PCI select MDIO_CAVIUM @@ -149,41 +149,41 @@ config MDIO_XGENE comment "MII PHY device drivers" config AMD_PHY - tristate "Drivers for the AMD PHYs" + tristate "AMD PHYs" ---help--- Currently supports the am79c874 config AQUANTIA_PHY - tristate "Drivers for the Aquantia PHYs" + tristate "Aquantia PHYs" ---help--- Currently supports the Aquantia AQ1202, AQ2104, AQR105, AQR405 config AT803X_PHY - tristate "Drivers for Atheros AT803X PHYs" + tristate "AT803X PHYs" ---help--- Currently supports the AT8030 and AT8035 model config BCM63XX_PHY - tristate "Drivers for Broadcom 63xx SOCs internal PHY" + tristate "Broadcom 63xx SOCs internal PHY" depends on BCM63XX select BCM_NET_PHYLIB ---help--- Currently supports the 6348 and 6358 PHYs. config BCM7XXX_PHY - tristate "Drivers for Broadcom 7xxx SOCs internal PHYs" + tristate "Broadcom 7xxx SOCs internal PHYs" select BCM_NET_PHYLIB ---help--- Currently supports the BCM7366, BCM7439, BCM7445, and 40nm and 65nm generation of BCM7xxx Set Top Box SoCs. config BCM87XX_PHY - tristate "Driver for Broadcom BCM8706 and BCM8727 PHYs" + tristate "Broadcom BCM8706 and BCM8727 PHYs" help Currently supports the BCM8706 and BCM8727 10G Ethernet PHYs. config BCM_CYGNUS_PHY - tristate "Drivers for Broadcom Cygnus SoC internal PHY" + tristate "Broadcom Cygnus SoC internal PHY" depends on ARCH_BCM_CYGNUS || COMPILE_TEST depends on MDIO_BCM_IPROC select BCM_NET_PHYLIB @@ -199,34 +199,34 @@ config BCM_NET_PHYLIB tristate config BROADCOM_PHY - tristate "Drivers for Broadcom PHYs" + tristate "Broadcom PHYs" select BCM_NET_PHYLIB ---help--- Currently supports the BCM5411, BCM5421, BCM5461, BCM54616S, BCM5464, BCM5481 and BCM5482 PHYs. config CICADA_PHY - tristate "Drivers for the Cicada PHYs" + tristate "Cicada PHYs" ---help--- Currently supports the cis8204 config DAVICOM_PHY - tristate "Drivers for Davicom PHYs" + tristate "Davicom PHYs" ---help--- Currently supports dm9161e and dm9131 config DP83848_PHY - tristate "Driver for Texas Instruments DP83848 PHY" + tristate "Texas Instruments DP83848 PHY" ---help--- Supports the DP83848 PHY. config DP83867_PHY - tristate "Drivers for Texas Instruments DP83867 Gigabit PHY" + tristate "Texas Instruments DP83867 Gigabit PHY" ---help--- Currently supports the DP83867 PHY. config FIXED_PHY - tristate "Driver for MDIO Bus/PHY emulation with fixed speed/link PHYs" + tristate "MDIO Bus/PHY emulation with fixed speed/link PHYs" depends on PHYLIB select SWPHY ---help--- @@ -236,12 +236,12 @@ config FIXED_PHY Currently tested with mpc866ads and mpc8349e-mitx. config ICPLUS_PHY - tristate "Drivers for ICPlus PHYs" + tristate "ICPlus PHYs" ---help--- Currently supports the IP175C and IP1001 PHYs. config INTEL_XWAY_PHY - tristate "Driver for Intel XWAY PHYs" + tristate "Intel XWAY PHYs" ---help--- Supports the Intel XWAY (former Lantiq) 11G and 22E PHYs. These PHYs are marked as standalone chips under the names @@ -249,67 +249,67 @@ config INTEL_XWAY_PHY SoCs xRX200, xRX300, xRX330, xRX350 and xRX550. config LSI_ET1011C_PHY - tristate "Driver for LSI ET1011C PHY" + tristate "LSI ET1011C PHY" ---help--- Supports the LSI ET1011C PHY. config LXT_PHY - tristate "Drivers for the Intel LXT PHYs" + tristate "Intel LXT PHYs" ---help--- Currently supports the lxt970, lxt971 config MARVELL_PHY - tristate "Drivers for Marvell PHYs" + tristate "Marvell PHYs" ---help--- Currently has a driver for the 88E1011S config MICREL_PHY - tristate "Driver for Micrel PHYs" + tristate "Micrel PHYs" ---help--- Supports the KSZ9021, VSC8201, KS8001 PHYs. config MICROCHIP_PHY - tristate "Drivers for Microchip PHYs" + tristate "Microchip PHYs" help Supports the LAN88XX PHYs. config MICROSEMI_PHY - tristate "Drivers for the Microsemi PHYs" + tristate "Microsemi PHYs" ---help--- Currently supports the VSC8531 and VSC8541 PHYs config NATIONAL_PHY - tristate "Drivers for National Semiconductor PHYs" + tristate "National Semiconductor PHYs" ---help--- Currently supports the DP83865 PHY. config QSEMI_PHY - tristate "Drivers for Quality Semiconductor PHYs" + tristate "Quality Semiconductor PHYs" ---help--- Currently supports the qs6612 config REALTEK_PHY - tristate "Drivers for Realtek PHYs" + tristate "Realtek PHYs" ---help--- Supports the Realtek 821x PHY. config SMSC_PHY - tristate "Drivers for SMSC PHYs" + tristate "SMSC PHYs" ---help--- Currently supports the LAN83C185, LAN8187 and LAN8700 PHYs config STE10XP - tristate "Driver for STMicroelectronics STe10Xp PHYs" + tristate "STMicroelectronics STe10Xp PHYs" ---help--- This is the driver for the STe100p and STe101p PHYs. config TERANETICS_PHY - tristate "Drivers for the Teranetics PHYs" + tristate "Teranetics PHYs" ---help--- Currently supports the Teranetics TN2020 config VITESSE_PHY - tristate "Drivers for the Vitesse PHYs" + tristate "Vitesse PHYs" ---help--- Currently supports the vsc8244 From 6441e6695acfce66499c186c591f319a3b125de7 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Fri, 19 Aug 2016 00:01:55 +0200 Subject: [PATCH 0351/1715] dsa: mv88e6xxx: Timeout based on iterations, not time The mv88e6xxx driver times out operations on the switch based on looping until an elapsed wall clock time is reached. However, if usleep_range() sleeps much longer than expected, it could timeout with an error without actually checking to see if the devices has completed the operation. So replace the elapsed time with a fixed upper bound on the number of loops. Testing on various switches has shown that switches takes either 0 or 1 iteration, so a maximum of 16 iterations is a safe limit. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index a230fcba5b64..ac8e9af4879f 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -309,9 +309,9 @@ static int mv88e6xxx_serdes_write(struct mv88e6xxx_chip *chip, int reg, u16 val) static int mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int addr, int reg, u16 mask) { - unsigned long timeout = jiffies + HZ / 10; + int i; - while (time_before(jiffies, timeout)) { + for (i = 0; i < 16; i++) { u16 val; int err; @@ -375,7 +375,7 @@ static int _mv88e6xxx_reg_write(struct mv88e6xxx_chip *chip, int addr, static int mv88e6xxx_ppu_disable(struct mv88e6xxx_chip *chip) { int ret; - unsigned long timeout; + int i; ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_CONTROL); if (ret < 0) @@ -386,8 +386,7 @@ static int mv88e6xxx_ppu_disable(struct mv88e6xxx_chip *chip) if (ret) return ret; - timeout = jiffies + 1 * HZ; - while (time_before(jiffies, timeout)) { + for (i = 0; i < 16; i++) { ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_STATUS); if (ret < 0) return ret; @@ -403,8 +402,7 @@ static int mv88e6xxx_ppu_disable(struct mv88e6xxx_chip *chip) static int mv88e6xxx_ppu_enable(struct mv88e6xxx_chip *chip) { - int ret, err; - unsigned long timeout; + int ret, err, i; ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_CONTROL); if (ret < 0) @@ -415,8 +413,7 @@ static int mv88e6xxx_ppu_enable(struct mv88e6xxx_chip *chip) if (err) return err; - timeout = jiffies + 1 * HZ; - while (time_before(jiffies, timeout)) { + for (i = 0; i < 16; i++) { ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_STATUS); if (ret < 0) return ret; From 0f02b4f75245c9f11f38ed983a2f5e21fd780e80 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Fri, 19 Aug 2016 00:01:56 +0200 Subject: [PATCH 0352/1715] dsa: mv88e6xxx: Use mv88e6xx_wait in mv88e6xxx_update() Now that mv88e6xx_wait() iterated on a counter than a fixed time interval, it implements the same mechanism as mv88e6xxx_update() uses. So use it in mv88e6xx_wait(). Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index ac8e9af4879f..8c846bce4edf 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -333,20 +333,12 @@ static int mv88e6xxx_update(struct mv88e6xxx_chip *chip, int addr, int reg, u16 update) { u16 val; - int i, err; + int err; /* Wait until the previous operation is completed */ - for (i = 0; i < 16; ++i) { - err = mv88e6xxx_read(chip, addr, reg, &val); - if (err) - return err; - - if (!(val & BIT(15))) - break; - } - - if (i == 16) - return -ETIMEDOUT; + err = mv88e6xxx_wait(chip, addr, reg, BIT(15)); + if (err) + return err; /* Set the Update bit to trigger a write operation */ val = BIT(15) | update; From 30853553c0e056403e1d6b2915a903401647839a Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Fri, 19 Aug 2016 00:01:57 +0200 Subject: [PATCH 0353/1715] dsa: mv88e6xxx: Make mv88e6xxx_wait() timeout verbose When mv88e6xxx_wait() returns a timeout, something bad has happened. Make sure it is noticed by logging an error. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 8c846bce4edf..014b52bd72f1 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -325,6 +325,7 @@ static int mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int addr, int reg, usleep_range(1000, 2000); } + dev_err(chip->dev, "Timeout while waiting for switch\n"); return -ETIMEDOUT; } From ea825e70d0e0798eda3a57b05c90f21f5a369128 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 18 Aug 2016 15:30:12 -0700 Subject: [PATCH 0354/1715] net: dsa: Export suspend/resume functions In preparation for allowing switch drivers to implement system-wide suspend/resume functions, export dsa_switch_suspend and dsa_switch_resume() such that these are callable from the appropriate driver specific suspend/resume functions. Reviewed-by: Andrew Lunn Tested-by: Vivien Didelot Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 14 ++++++++++++++ net/dsa/dsa.c | 6 ++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/include/net/dsa.h b/include/net/dsa.h index 2217a3f817f8..d00c392bc9f8 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -386,4 +386,18 @@ static inline bool dsa_uses_tagged_protocol(struct dsa_switch_tree *dst) void dsa_unregister_switch(struct dsa_switch *ds); int dsa_register_switch(struct dsa_switch *ds, struct device_node *np); +#ifdef CONFIG_PM_SLEEP +int dsa_switch_suspend(struct dsa_switch *ds); +int dsa_switch_resume(struct dsa_switch *ds); +#else +static inline int dsa_switch_suspend(struct dsa_switch *ds) +{ + return 0; +} +static inline int dsa_switch_resume(struct dsa_switch *ds) +{ + return 0; +} +#endif /* CONFIG_PM_SLEEP */ + #endif diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 7e68bc6bc853..9f5b47200365 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -543,7 +543,7 @@ static void dsa_switch_destroy(struct dsa_switch *ds) } #ifdef CONFIG_PM_SLEEP -static int dsa_switch_suspend(struct dsa_switch *ds) +int dsa_switch_suspend(struct dsa_switch *ds) { int i, ret = 0; @@ -562,8 +562,9 @@ static int dsa_switch_suspend(struct dsa_switch *ds) return ret; } +EXPORT_SYMBOL_GPL(dsa_switch_suspend); -static int dsa_switch_resume(struct dsa_switch *ds) +int dsa_switch_resume(struct dsa_switch *ds) { int i, ret = 0; @@ -585,6 +586,7 @@ static int dsa_switch_resume(struct dsa_switch *ds) return 0; } +EXPORT_SYMBOL_GPL(dsa_switch_resume); #endif /* platform driver init and cleanup *****************************************/ From a85fad1436b0fd0fe9d7f1d23387e83e7cd21c0a Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 18 Aug 2016 15:30:13 -0700 Subject: [PATCH 0355/1715] Documentation: dt: bindings: Update Broadcom 7445 switch document Document the updated binding which conforms to the new DSA binding in net/dsa/dsa.txt. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- .../bindings/net/brcm,bcm7445-switch-v4.0.txt | 45 ++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt b/Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt index 30d487597ecb..fb40891ee606 100644 --- a/Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt +++ b/Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt @@ -6,9 +6,13 @@ Required properties: - reg: addresses and length of the register sets for the device, must be 6 pairs of register addresses and lengths - interrupts: interrupts for the devices, must be two interrupts +- #address-cells: must be 1, see dsa/dsa.txt +- #size-cells: must be 0, see dsa/dsa.txt + +Deprecated binding required properties: + - dsa,mii-bus: phandle to the MDIO bus controller, see dsa/dsa.txt - dsa,ethernet: phandle to the CPU network interface controller, see dsa/dsa.txt -- #size-cells: must be 0 - #address-cells: must be 2, see dsa/dsa.txt Subnodes: @@ -39,6 +43,45 @@ Optional properties: Example: +switch_top@f0b00000 { + compatible = "simple-bus"; + #size-cells = <1>; + #address-cells = <1>; + ranges = <0 0xf0b00000 0x40804>; + + ethernet_switch@0 { + compatible = "brcm,bcm7445-switch-v4.0"; + #size-cells = <0>; + #address-cells = <1>; + reg = <0x0 0x40000 + 0x40000 0x110 + 0x40340 0x30 + 0x40380 0x30 + 0x40400 0x34 + 0x40600 0x208>; + reg-names = "core", "reg", intrl2_0", "intrl2_1", + "fcb, "acb"; + interrupts = <0 0x18 0 + 0 0x19 0>; + brcm,num-gphy = <1>; + brcm,num-rgmii-ports = <2>; + brcm,fcb-pause-override; + brcm,acb-packets-inflight; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + label = "gphy"; + reg = <0>; + }; + }; + }; +}; + +Example using the old DSA DeviceTree binding: + switch_top@f0b00000 { compatible = "simple-bus"; #size-cells = <1>; From d9338023fb8e60aa09014034a993f8ddc86c65fa Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 18 Aug 2016 15:30:14 -0700 Subject: [PATCH 0356/1715] net: dsa: bcm_sf2: Make it a real platform device driver The Broadcom Starfighter 2 switch driver should be a proper platform driver, now that the DSA code has been updated to allow that, register a switch device, feed it with the proper configuration data coming from Device Tree and register our switch device with DSA. The bulk of the changes consist in moving what bcm_sf2_sw_setup() did into the platform driver probe function. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2.c | 178 +++++++++++++++++++++++++------------- net/dsa/dsa.c | 1 - 2 files changed, 118 insertions(+), 61 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index b2b838724a9b..fe1cc92f72a8 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -1571,23 +1571,84 @@ static int bcm_sf2_sw_vlan_dump(struct dsa_switch *ds, int port, static int bcm_sf2_sw_setup(struct dsa_switch *ds) { - const char *reg_names[BCM_SF2_REGS_NUM] = BCM_SF2_REGS_NAME; struct bcm_sf2_priv *priv = ds_to_priv(ds); - struct device_node *dn; - void __iomem **base; unsigned int port; + + /* Enable all valid ports and disable those unused */ + for (port = 0; port < priv->hw_params.num_ports; port++) { + /* IMP port receives special treatment */ + if ((1 << port) & ds->enabled_port_mask) + bcm_sf2_port_setup(ds, port, NULL); + else if (dsa_is_cpu_port(ds, port)) + bcm_sf2_imp_setup(ds, port); + else + bcm_sf2_port_disable(ds, port, NULL); + } + + bcm_sf2_sw_configure_vlan(ds); + + return 0; +} + +static struct dsa_switch_driver bcm_sf2_switch_driver = { + .tag_protocol = DSA_TAG_PROTO_BRCM, + .probe = bcm_sf2_sw_drv_probe, + .setup = bcm_sf2_sw_setup, + .set_addr = bcm_sf2_sw_set_addr, + .get_phy_flags = bcm_sf2_sw_get_phy_flags, + .get_strings = bcm_sf2_sw_get_strings, + .get_ethtool_stats = bcm_sf2_sw_get_ethtool_stats, + .get_sset_count = bcm_sf2_sw_get_sset_count, + .adjust_link = bcm_sf2_sw_adjust_link, + .fixed_link_update = bcm_sf2_sw_fixed_link_update, + .suspend = bcm_sf2_sw_suspend, + .resume = bcm_sf2_sw_resume, + .get_wol = bcm_sf2_sw_get_wol, + .set_wol = bcm_sf2_sw_set_wol, + .port_enable = bcm_sf2_port_setup, + .port_disable = bcm_sf2_port_disable, + .get_eee = bcm_sf2_sw_get_eee, + .set_eee = bcm_sf2_sw_set_eee, + .port_bridge_join = bcm_sf2_sw_br_join, + .port_bridge_leave = bcm_sf2_sw_br_leave, + .port_stp_state_set = bcm_sf2_sw_br_set_stp_state, + .port_fdb_prepare = bcm_sf2_sw_fdb_prepare, + .port_fdb_add = bcm_sf2_sw_fdb_add, + .port_fdb_del = bcm_sf2_sw_fdb_del, + .port_fdb_dump = bcm_sf2_sw_fdb_dump, + .port_vlan_filtering = bcm_sf2_sw_vlan_filtering, + .port_vlan_prepare = bcm_sf2_sw_vlan_prepare, + .port_vlan_add = bcm_sf2_sw_vlan_add, + .port_vlan_del = bcm_sf2_sw_vlan_del, + .port_vlan_dump = bcm_sf2_sw_vlan_dump, +}; + +static int bcm_sf2_sw_probe(struct platform_device *pdev) +{ + const char *reg_names[BCM_SF2_REGS_NUM] = BCM_SF2_REGS_NAME; + struct device_node *dn = pdev->dev.of_node; + struct bcm_sf2_priv *priv; + struct dsa_switch *ds; + void __iomem **base; unsigned int i; u32 reg, rev; int ret; + ds = devm_kzalloc(&pdev->dev, sizeof(*ds) + sizeof(*priv), GFP_KERNEL); + if (!ds) + return -ENOMEM; + + priv = (struct bcm_sf2_priv *)(ds + 1); + ds->priv = priv; + ds->dev = &pdev->dev; + ds->drv = &bcm_sf2_switch_driver; + + dev_set_drvdata(&pdev->dev, ds); + spin_lock_init(&priv->indir_lock); mutex_init(&priv->stats_mutex); - /* All the interesting properties are at the parent device_node - * level - */ - dn = ds->cd->of_node->parent; - bcm_sf2_identify_ports(priv, ds->cd->of_node); + bcm_sf2_identify_ports(priv, dn->child); priv->irq0 = irq_of_parse_and_map(dn, 0); priv->irq1 = irq_of_parse_and_map(dn, 1); @@ -1649,19 +1710,6 @@ static int bcm_sf2_sw_setup(struct dsa_switch *ds) &priv->hw_params.num_gphy)) priv->hw_params.num_gphy = 1; - /* Enable all valid ports and disable those unused */ - for (port = 0; port < priv->hw_params.num_ports; port++) { - /* IMP port receives special treatment */ - if ((1 << port) & ds->enabled_port_mask) - bcm_sf2_port_setup(ds, port, NULL); - else if (dsa_is_cpu_port(ds, port)) - bcm_sf2_imp_setup(ds, port); - else - bcm_sf2_port_disable(ds, port, NULL); - } - - bcm_sf2_sw_configure_vlan(ds); - rev = reg_readl(priv, REG_SWITCH_REVISION); priv->hw_params.top_rev = (rev >> SWITCH_TOP_REV_SHIFT) & SWITCH_TOP_REV_MASK; @@ -1670,6 +1718,10 @@ static int bcm_sf2_sw_setup(struct dsa_switch *ds) rev = reg_readl(priv, REG_PHY_REVISION); priv->hw_params.gphy_rev = rev & PHY_REVISION_MASK; + ret = dsa_register_switch(ds, dn); + if (ret) + goto out_free_irq1; + pr_info("Starfighter 2 top: %x.%02x, core: %x.%02x base: 0x%p, IRQs: %d, %d\n", priv->hw_params.top_rev >> 8, priv->hw_params.top_rev & 0xff, priv->hw_params.core_rev >> 8, priv->hw_params.core_rev & 0xff, @@ -1677,6 +1729,8 @@ static int bcm_sf2_sw_setup(struct dsa_switch *ds) return 0; +out_free_irq1: + free_irq(priv->irq1, priv); out_free_irq0: free_irq(priv->irq0, priv); out_mdio: @@ -1691,52 +1745,56 @@ out_unmap: return ret; } -static struct dsa_switch_driver bcm_sf2_switch_driver = { - .tag_protocol = DSA_TAG_PROTO_BRCM, - .probe = bcm_sf2_sw_drv_probe, - .setup = bcm_sf2_sw_setup, - .set_addr = bcm_sf2_sw_set_addr, - .get_phy_flags = bcm_sf2_sw_get_phy_flags, - .get_strings = bcm_sf2_sw_get_strings, - .get_ethtool_stats = bcm_sf2_sw_get_ethtool_stats, - .get_sset_count = bcm_sf2_sw_get_sset_count, - .adjust_link = bcm_sf2_sw_adjust_link, - .fixed_link_update = bcm_sf2_sw_fixed_link_update, - .suspend = bcm_sf2_sw_suspend, - .resume = bcm_sf2_sw_resume, - .get_wol = bcm_sf2_sw_get_wol, - .set_wol = bcm_sf2_sw_set_wol, - .port_enable = bcm_sf2_port_setup, - .port_disable = bcm_sf2_port_disable, - .get_eee = bcm_sf2_sw_get_eee, - .set_eee = bcm_sf2_sw_set_eee, - .port_bridge_join = bcm_sf2_sw_br_join, - .port_bridge_leave = bcm_sf2_sw_br_leave, - .port_stp_state_set = bcm_sf2_sw_br_set_stp_state, - .port_fdb_prepare = bcm_sf2_sw_fdb_prepare, - .port_fdb_add = bcm_sf2_sw_fdb_add, - .port_fdb_del = bcm_sf2_sw_fdb_del, - .port_fdb_dump = bcm_sf2_sw_fdb_dump, - .port_vlan_filtering = bcm_sf2_sw_vlan_filtering, - .port_vlan_prepare = bcm_sf2_sw_vlan_prepare, - .port_vlan_add = bcm_sf2_sw_vlan_add, - .port_vlan_del = bcm_sf2_sw_vlan_del, - .port_vlan_dump = bcm_sf2_sw_vlan_dump, -}; - -static int __init bcm_sf2_init(void) +static int bcm_sf2_sw_remove(struct platform_device *pdev) { - register_switch_driver(&bcm_sf2_switch_driver); + struct dsa_switch *ds = platform_get_drvdata(pdev); + struct bcm_sf2_priv *priv = ds_to_priv(ds); + + /* Disable all ports and interrupts */ + priv->wol_ports_mask = 0; + bcm_sf2_sw_suspend(ds); + dsa_unregister_switch(ds); + bcm_sf2_mdio_unregister(priv); return 0; } -module_init(bcm_sf2_init); -static void __exit bcm_sf2_exit(void) +#ifdef CONFIG_PM_SLEEP +static int bcm_sf2_suspend(struct device *dev) { - unregister_switch_driver(&bcm_sf2_switch_driver); + struct platform_device *pdev = to_platform_device(dev); + struct dsa_switch *ds = platform_get_drvdata(pdev); + + return dsa_switch_suspend(ds); } -module_exit(bcm_sf2_exit); + +static int bcm_sf2_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct dsa_switch *ds = platform_get_drvdata(pdev); + + return dsa_switch_resume(ds); +} +#endif /* CONFIG_PM_SLEEP */ + +static SIMPLE_DEV_PM_OPS(bcm_sf2_pm_ops, + bcm_sf2_suspend, bcm_sf2_resume); + +static const struct of_device_id bcm_sf2_of_match[] = { + { .compatible = "brcm,bcm7445-switch-v4.0" }, + { /* sentinel */ }, +}; + +static struct platform_driver bcm_sf2_driver = { + .probe = bcm_sf2_sw_probe, + .remove = bcm_sf2_sw_remove, + .driver = { + .name = "brcm-sf2", + .of_match_table = bcm_sf2_of_match, + .pm = &bcm_sf2_pm_ops, + }, +}; +module_platform_driver(bcm_sf2_driver); MODULE_AUTHOR("Broadcom Corporation"); MODULE_DESCRIPTION("Driver for Broadcom Starfighter 2 ethernet switch chip"); diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 9f5b47200365..8bda74e595a5 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -1088,7 +1088,6 @@ static int dsa_resume(struct device *d) static SIMPLE_DEV_PM_OPS(dsa_pm_ops, dsa_suspend, dsa_resume); static const struct of_device_id dsa_of_match_table[] = { - { .compatible = "brcm,bcm7445-switch-v4.0" }, { .compatible = "marvell,dsa", }, {} }; From 4bd11675cf0d39e912b55e4d8fa91c85a82d946a Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 18 Aug 2016 15:30:15 -0700 Subject: [PATCH 0357/1715] net: dsa: bcm_sf2: Use device managed helpers Now that we have converted the drivers into a proper platform device driver, we can use the device managed helper functions to simplify the error paths a bit wrt. register resources and IRQs. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2.c | 36 +++++++++++++----------------------- 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index fe1cc92f72a8..9f8e007b01d6 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -1630,6 +1630,7 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev) struct bcm_sf2_priv *priv; struct dsa_switch *ds; void __iomem **base; + struct resource *r; unsigned int i; u32 reg, rev; int ret; @@ -1655,11 +1656,11 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev) base = &priv->core; for (i = 0; i < BCM_SF2_REGS_NUM; i++) { - *base = of_iomap(dn, i); - if (*base == NULL) { + r = platform_get_resource(pdev, IORESOURCE_MEM, i); + *base = devm_ioremap_resource(&pdev->dev, r); + if (IS_ERR(*base)) { pr_err("unable to find register: %s\n", reg_names[i]); - ret = -ENOMEM; - goto out_unmap; + return PTR_ERR(*base); } base++; } @@ -1667,30 +1668,30 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev) ret = bcm_sf2_sw_rst(priv); if (ret) { pr_err("unable to software reset switch: %d\n", ret); - goto out_unmap; + return ret; } ret = bcm_sf2_mdio_register(ds); if (ret) { pr_err("failed to register MDIO bus\n"); - goto out_unmap; + return ret; } /* Disable all interrupts and request them */ bcm_sf2_intr_disable(priv); - ret = request_irq(priv->irq0, bcm_sf2_switch_0_isr, 0, - "switch_0", priv); + ret = devm_request_irq(&pdev->dev, priv->irq0, bcm_sf2_switch_0_isr, 0, + "switch_0", priv); if (ret < 0) { pr_err("failed to request switch_0 IRQ\n"); goto out_mdio; } - ret = request_irq(priv->irq1, bcm_sf2_switch_1_isr, 0, - "switch_1", priv); + ret = devm_request_irq(&pdev->dev, priv->irq1, bcm_sf2_switch_1_isr, 0, + "switch_1", priv); if (ret < 0) { pr_err("failed to request switch_1 IRQ\n"); - goto out_free_irq0; + goto out_mdio; } /* Reset the MIB counters */ @@ -1720,7 +1721,7 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev) ret = dsa_register_switch(ds, dn); if (ret) - goto out_free_irq1; + goto out_mdio; pr_info("Starfighter 2 top: %x.%02x, core: %x.%02x base: 0x%p, IRQs: %d, %d\n", priv->hw_params.top_rev >> 8, priv->hw_params.top_rev & 0xff, @@ -1729,19 +1730,8 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev) return 0; -out_free_irq1: - free_irq(priv->irq1, priv); -out_free_irq0: - free_irq(priv->irq0, priv); out_mdio: bcm_sf2_mdio_unregister(priv); -out_unmap: - base = &priv->core; - for (i = 0; i < BCM_SF2_REGS_NUM; i++) { - if (*base) - iounmap(*base); - base++; - } return ret; } From 731410b76e86f72ae18a3a4018cdc30258b10beb Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 18 Aug 2016 15:30:16 -0700 Subject: [PATCH 0358/1715] net: dsa: bcm_sf2: Remove probing through old DSA binding Remove our dsa_switch_driver::drv_probe callback to prevent probing through the old DSA binding, not that this could happen anymore now that we have moved the matching compatible string from net/dsa/dsa.c to drivers/net/dsa/bcm_sf2.c, so this is essentially dead code. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2.c | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 9f8e007b01d6..8e6fe13dbec3 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -136,20 +136,6 @@ static int bcm_sf2_sw_get_sset_count(struct dsa_switch *ds) return BCM_SF2_STATS_SIZE; } -static const char *bcm_sf2_sw_drv_probe(struct device *dsa_dev, - struct device *host_dev, int sw_addr, - void **_priv) -{ - struct bcm_sf2_priv *priv; - - priv = devm_kzalloc(dsa_dev, sizeof(*priv), GFP_KERNEL); - if (!priv) - return NULL; - *_priv = priv; - - return "Broadcom Starfighter 2"; -} - static void bcm_sf2_imp_vlan_setup(struct dsa_switch *ds, int cpu_port) { struct bcm_sf2_priv *priv = ds_to_priv(ds); @@ -1592,7 +1578,6 @@ static int bcm_sf2_sw_setup(struct dsa_switch *ds) static struct dsa_switch_driver bcm_sf2_switch_driver = { .tag_protocol = DSA_TAG_PROTO_BRCM, - .probe = bcm_sf2_sw_drv_probe, .setup = bcm_sf2_sw_setup, .set_addr = bcm_sf2_sw_set_addr, .get_phy_flags = bcm_sf2_sw_get_phy_flags, From 05fafbfb3d77f43ae18341ddc61eb5c477896778 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Fri, 19 Aug 2016 09:33:31 +0300 Subject: [PATCH 0359/1715] qed: utilize FW 8.10.10.0 This new firmware for the qed* adpaters fixes several issues: - Better blocking of malicious VFs. - After FLR, Tx-switching [internal routing] of packets might be incorrect. - Deletion of unicast MAC filters would sometime have side-effect of corrupting the MAC filters configred for a device. It also contains fixes for future qed* drivers that *hopefully* would be sent for review in the near future. In addition, it would allow driver some new functionality, including: - Allowing PF/VF driver compaitibility with old drivers [running pre-8.10.5.0 firmware]. - Better debug facilities. This would also bump the qed* driver versions to 8.10.9.20. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed.h | 2 +- drivers/net/ethernet/qlogic/qed/qed_dev.c | 48 +- drivers/net/ethernet/qlogic/qed/qed_hsi.h | 1420 +++++++++++------ .../net/ethernet/qlogic/qed/qed_init_ops.c | 2 +- drivers/net/ethernet/qlogic/qed/qed_mcp.c | 28 - drivers/net/ethernet/qlogic/qed/qed_mcp.h | 2 - .../net/ethernet/qlogic/qed/qed_reg_addr.h | 6 + drivers/net/ethernet/qlogic/qed/qed_sriov.c | 13 +- drivers/net/ethernet/qlogic/qede/qede.h | 2 +- include/linux/qed/common_hsi.h | 359 ++++- include/linux/qed/eth_common.h | 155 +- include/linux/qed/iscsi_common.h | 28 +- include/linux/qed/qed_chain.h | 13 - include/linux/qed/tcp_common.h | 16 +- 14 files changed, 1372 insertions(+), 722 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index d0beb5d10a02..9ba7a7ee3357 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -26,7 +26,7 @@ #include "qed_hsi.h" extern const struct qed_common_ops qed_common_ops_pass; -#define DRV_MODULE_VERSION "8.7.1.20" +#define DRV_MODULE_VERSION "8.10.9.20" #define MAX_HWFNS_PER_DEVICE (4) #define NAME_SIZE 16 diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index 8117ddff501b..5ae27f2d2fa5 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -772,6 +772,9 @@ static int qed_hw_init_common(struct qed_hwfn *p_hwfn, concrete_fid = qed_vfid_to_concrete(p_hwfn, vf_id); qed_fid_pretend(p_hwfn, p_ptt, (u16) concrete_fid); qed_wr(p_hwfn, p_ptt, CCFC_REG_STRONG_ENABLE_VF, 0x1); + qed_wr(p_hwfn, p_ptt, CCFC_REG_WEAK_ENABLE_VF, 0x0); + qed_wr(p_hwfn, p_ptt, TCFC_REG_STRONG_ENABLE_VF, 0x1); + qed_wr(p_hwfn, p_ptt, TCFC_REG_WEAK_ENABLE_VF, 0x0); } /* pretend to original PF */ qed_fid_pretend(p_hwfn, p_ptt, p_hwfn->rel_pf_id); @@ -782,34 +785,8 @@ static int qed_hw_init_common(struct qed_hwfn *p_hwfn, static int qed_hw_init_port(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, int hw_mode) { - int rc = 0; - - rc = qed_init_run(p_hwfn, p_ptt, PHASE_PORT, p_hwfn->port_id, hw_mode); - if (rc) - return rc; - - if (hw_mode & (1 << MODE_MF_SI)) { - u8 pf_id = 0; - - if (!qed_hw_init_first_eth(p_hwfn, p_ptt, &pf_id)) { - DP_VERBOSE(p_hwfn, NETIF_MSG_IFUP, - "PF[%08x] is first eth on engine\n", pf_id); - - /* We should have configured BIT for ppfid, i.e., the - * relative function number in the port. But there's a - * bug in LLH in BB where the ppfid is actually engine - * based, so we need to take this into account. - */ - qed_wr(p_hwfn, p_ptt, - NIG_REG_LLH_TAGMAC_DEF_PF_VECTOR, 1 << pf_id); - } - - /* Take the protocol-based hit vector if there is a hit, - * otherwise take the other vector. - */ - qed_wr(p_hwfn, p_ptt, NIG_REG_LLH_CLS_TYPE_DUALMODE, 0x2); - } - return rc; + return qed_init_run(p_hwfn, p_ptt, PHASE_PORT, + p_hwfn->port_id, hw_mode); } static int qed_hw_init_pf(struct qed_hwfn *p_hwfn, @@ -878,21 +855,6 @@ static int qed_hw_init_pf(struct qed_hwfn *p_hwfn, /* Pure runtime initializations - directly to the HW */ qed_int_igu_init_pure_rt(p_hwfn, p_ptt, true, true); - if (hw_mode & (1 << MODE_MF_SI)) { - u8 pf_id = 0; - u32 val = 0; - - if (!qed_hw_init_first_eth(p_hwfn, p_ptt, &pf_id)) { - if (p_hwfn->rel_pf_id == pf_id) { - DP_VERBOSE(p_hwfn, NETIF_MSG_IFUP, - "PF[%d] is first ETH on engine\n", - pf_id); - val = 1; - } - qed_wr(p_hwfn, p_ptt, PRS_REG_MSG_INFO, val); - } - } - if (b_hw_start) { /* enable interrupts */ qed_int_igu_enable(p_hwfn, p_ptt, int_mode); diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h index acf5da39ecb0..a67b3554aabd 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h +++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h @@ -536,6 +536,244 @@ struct core_conn_context { struct regpair ustorm_st_padding[2]; }; +enum core_error_handle { + LL2_DROP_PACKET, + LL2_DO_NOTHING, + LL2_ASSERT, + MAX_CORE_ERROR_HANDLE +}; + +enum core_event_opcode { + CORE_EVENT_TX_QUEUE_START, + CORE_EVENT_TX_QUEUE_STOP, + CORE_EVENT_RX_QUEUE_START, + CORE_EVENT_RX_QUEUE_STOP, + MAX_CORE_EVENT_OPCODE +}; + +enum core_l4_pseudo_checksum_mode { + CORE_L4_PSEUDO_CSUM_CORRECT_LENGTH, + CORE_L4_PSEUDO_CSUM_ZERO_LENGTH, + MAX_CORE_L4_PSEUDO_CHECKSUM_MODE +}; + +struct core_ll2_port_stats { + struct regpair gsi_invalid_hdr; + struct regpair gsi_invalid_pkt_length; + struct regpair gsi_unsupported_pkt_typ; + struct regpair gsi_crcchksm_error; +}; + +struct core_ll2_pstorm_per_queue_stat { + struct regpair sent_ucast_bytes; + struct regpair sent_mcast_bytes; + struct regpair sent_bcast_bytes; + struct regpair sent_ucast_pkts; + struct regpair sent_mcast_pkts; + struct regpair sent_bcast_pkts; +}; + +struct core_ll2_rx_prod { + __le16 bd_prod; + __le16 cqe_prod; + __le32 reserved; +}; + +struct core_ll2_tstorm_per_queue_stat { + struct regpair packet_too_big_discard; + struct regpair no_buff_discard; +}; + +struct core_ll2_ustorm_per_queue_stat { + struct regpair rcv_ucast_bytes; + struct regpair rcv_mcast_bytes; + struct regpair rcv_bcast_bytes; + struct regpair rcv_ucast_pkts; + struct regpair rcv_mcast_pkts; + struct regpair rcv_bcast_pkts; +}; + +enum core_ramrod_cmd_id { + CORE_RAMROD_UNUSED, + CORE_RAMROD_RX_QUEUE_START, + CORE_RAMROD_TX_QUEUE_START, + CORE_RAMROD_RX_QUEUE_STOP, + CORE_RAMROD_TX_QUEUE_STOP, + MAX_CORE_RAMROD_CMD_ID +}; + +enum core_roce_flavor_type { + CORE_ROCE, + CORE_RROCE, + MAX_CORE_ROCE_FLAVOR_TYPE +}; + +struct core_rx_action_on_error { + u8 error_type; +#define CORE_RX_ACTION_ON_ERROR_PACKET_TOO_BIG_MASK 0x3 +#define CORE_RX_ACTION_ON_ERROR_PACKET_TOO_BIG_SHIFT 0 +#define CORE_RX_ACTION_ON_ERROR_NO_BUFF_MASK 0x3 +#define CORE_RX_ACTION_ON_ERROR_NO_BUFF_SHIFT 2 +#define CORE_RX_ACTION_ON_ERROR_RESERVED_MASK 0xF +#define CORE_RX_ACTION_ON_ERROR_RESERVED_SHIFT 4 +}; + +struct core_rx_bd { + struct regpair addr; + __le16 reserved[4]; +}; + +struct core_rx_bd_with_buff_len { + struct regpair addr; + __le16 buff_length; + __le16 reserved[3]; +}; + +union core_rx_bd_union { + struct core_rx_bd rx_bd; + struct core_rx_bd_with_buff_len rx_bd_with_len; +}; + +struct core_rx_cqe_opaque_data { + __le32 data[2]; +}; + +enum core_rx_cqe_type { + CORE_RX_CQE_ILLIGAL_TYPE, + CORE_RX_CQE_TYPE_REGULAR, + CORE_RX_CQE_TYPE_GSI_OFFLOAD, + CORE_RX_CQE_TYPE_SLOW_PATH, + MAX_CORE_RX_CQE_TYPE +}; + +struct core_rx_fast_path_cqe { + u8 type; + u8 placement_offset; + struct parsing_and_err_flags parse_flags; + __le16 packet_length; + __le16 vlan; + struct core_rx_cqe_opaque_data opaque_data; + __le32 reserved[4]; +}; + +struct core_rx_gsi_offload_cqe { + u8 type; + u8 data_length_error; + struct parsing_and_err_flags parse_flags; + __le16 data_length; + __le16 vlan; + __le32 src_mac_addrhi; + __le16 src_mac_addrlo; + u8 reserved1[2]; + __le32 gid_dst[4]; +}; + +struct core_rx_slow_path_cqe { + u8 type; + u8 ramrod_cmd_id; + __le16 echo; + __le32 reserved1[7]; +}; + +union core_rx_cqe_union { + struct core_rx_fast_path_cqe rx_cqe_fp; + struct core_rx_gsi_offload_cqe rx_cqe_gsi; + struct core_rx_slow_path_cqe rx_cqe_sp; +}; + +struct core_rx_start_ramrod_data { + struct regpair bd_base; + struct regpair cqe_pbl_addr; + __le16 mtu; + __le16 sb_id; + u8 sb_index; + u8 complete_cqe_flg; + u8 complete_event_flg; + u8 drop_ttl0_flg; + __le16 num_of_pbl_pages; + u8 inner_vlan_removal_en; + u8 queue_id; + u8 main_func_queue; + u8 mf_si_bcast_accept_all; + u8 mf_si_mcast_accept_all; + struct core_rx_action_on_error action_on_error; + u8 gsi_offload_flag; + u8 reserved[7]; +}; + +struct core_rx_stop_ramrod_data { + u8 complete_cqe_flg; + u8 complete_event_flg; + u8 queue_id; + u8 reserved1; + __le16 reserved2[2]; +}; + +struct core_tx_bd_flags { + u8 as_bitfield; +#define CORE_TX_BD_FLAGS_FORCE_VLAN_MODE_MASK 0x1 +#define CORE_TX_BD_FLAGS_FORCE_VLAN_MODE_SHIFT 0 +#define CORE_TX_BD_FLAGS_VLAN_INSERTION_MASK 0x1 +#define CORE_TX_BD_FLAGS_VLAN_INSERTION_SHIFT 1 +#define CORE_TX_BD_FLAGS_START_BD_MASK 0x1 +#define CORE_TX_BD_FLAGS_START_BD_SHIFT 2 +#define CORE_TX_BD_FLAGS_IP_CSUM_MASK 0x1 +#define CORE_TX_BD_FLAGS_IP_CSUM_SHIFT 3 +#define CORE_TX_BD_FLAGS_L4_CSUM_MASK 0x1 +#define CORE_TX_BD_FLAGS_L4_CSUM_SHIFT 4 +#define CORE_TX_BD_FLAGS_IPV6_EXT_MASK 0x1 +#define CORE_TX_BD_FLAGS_IPV6_EXT_SHIFT 5 +#define CORE_TX_BD_FLAGS_L4_PROTOCOL_MASK 0x1 +#define CORE_TX_BD_FLAGS_L4_PROTOCOL_SHIFT 6 +#define CORE_TX_BD_FLAGS_L4_PSEUDO_CSUM_MODE_MASK 0x1 +#define CORE_TX_BD_FLAGS_L4_PSEUDO_CSUM_MODE_SHIFT 7 +}; + +struct core_tx_bd { + struct regpair addr; + __le16 nbytes; + __le16 nw_vlan_or_lb_echo; + u8 bitfield0; +#define CORE_TX_BD_NBDS_MASK 0xF +#define CORE_TX_BD_NBDS_SHIFT 0 +#define CORE_TX_BD_ROCE_FLAV_MASK 0x1 +#define CORE_TX_BD_ROCE_FLAV_SHIFT 4 +#define CORE_TX_BD_RESERVED0_MASK 0x7 +#define CORE_TX_BD_RESERVED0_SHIFT 5 + struct core_tx_bd_flags bd_flags; + __le16 bitfield1; +#define CORE_TX_BD_L4_HDR_OFFSET_W_MASK 0x3FFF +#define CORE_TX_BD_L4_HDR_OFFSET_W_SHIFT 0 +#define CORE_TX_BD_TX_DST_MASK 0x1 +#define CORE_TX_BD_TX_DST_SHIFT 14 +#define CORE_TX_BD_RESERVED1_MASK 0x1 +#define CORE_TX_BD_RESERVED1_SHIFT 15 +}; + +enum core_tx_dest { + CORE_TX_DEST_NW, + CORE_TX_DEST_LB, + MAX_CORE_TX_DEST +}; + +struct core_tx_start_ramrod_data { + struct regpair pbl_base_addr; + __le16 mtu; + __le16 sb_id; + u8 sb_index; + u8 stats_en; + u8 stats_id; + u8 conn_type; + __le16 pbl_size; + __le16 qm_pq_id; + u8 gsi_offload_flag; + u8 resrved[3]; +}; + +struct core_tx_stop_ramrod_data { + __le32 reserved0[2]; +}; + struct eth_mstorm_per_pf_stat { struct regpair gre_discard_pkts; struct regpair vxlan_discard_pkts; @@ -636,9 +874,33 @@ struct hsi_fp_ver_struct { }; /* Mstorm non-triggering VF zone */ +enum malicious_vf_error_id { + MALICIOUS_VF_NO_ERROR, + VF_PF_CHANNEL_NOT_READY, + VF_ZONE_MSG_NOT_VALID, + VF_ZONE_FUNC_NOT_ENABLED, + ETH_PACKET_TOO_SMALL, + ETH_ILLEGAL_VLAN_MODE, + ETH_MTU_VIOLATION, + ETH_ILLEGAL_INBAND_TAGS, + ETH_VLAN_INSERT_AND_INBAND_VLAN, + ETH_ILLEGAL_NBDS, + ETH_FIRST_BD_WO_SOP, + ETH_INSUFFICIENT_BDS, + ETH_ILLEGAL_LSO_HDR_NBDS, + ETH_ILLEGAL_LSO_MSS, + ETH_ZERO_SIZE_BD, + ETH_ILLEGAL_LSO_HDR_LEN, + ETH_INSUFFICIENT_PAYLOAD, + ETH_EDPM_OUT_OF_SYNC, + ETH_TUNN_IPV6_EXT_NBD_ERR, + ETH_CONTROL_PACKET_VIOLATION, + MAX_MALICIOUS_VF_ERROR_ID +}; + struct mstorm_non_trigger_vf_zone { struct eth_mstorm_per_queue_stat eth_queue_stat; - struct eth_rx_prod_data eth_rx_queue_producers[ETH_MAX_NUM_RX_QUEUES_PER_VF]; + struct eth_rx_prod_data eth_rx_queue_producers[ETH_MAX_NUM_RX_QUEUES_PER_VF_QUAD]; }; /* Mstorm VF zone */ @@ -705,13 +967,17 @@ struct pf_start_ramrod_data { struct protocol_dcb_data { u8 dcb_enable_flag; + u8 reserved_a; u8 dcb_priority; u8 dcb_tc; - u8 reserved; + u8 reserved_b; + u8 reserved0; }; struct pf_update_tunnel_config { u8 update_rx_pf_clss; + u8 update_rx_def_ucast_clss; + u8 update_rx_def_non_ucast_clss; u8 update_tx_pf_clss; u8 set_vxlan_udp_port_flg; u8 set_geneve_udp_port_flg; @@ -727,7 +993,7 @@ struct pf_update_tunnel_config { u8 tunnel_clss_ipgre; __le16 vxlan_udp_port; __le16 geneve_udp_port; - __le16 reserved[3]; + __le16 reserved[2]; }; struct pf_update_ramrod_data { @@ -736,16 +1002,17 @@ struct pf_update_ramrod_data { u8 update_fcoe_dcb_data_flag; u8 update_iscsi_dcb_data_flag; u8 update_roce_dcb_data_flag; + u8 update_rroce_dcb_data_flag; u8 update_iwarp_dcb_data_flag; u8 update_mf_vlan_flag; - u8 reserved; struct protocol_dcb_data eth_dcb_data; struct protocol_dcb_data fcoe_dcb_data; struct protocol_dcb_data iscsi_dcb_data; struct protocol_dcb_data roce_dcb_data; + struct protocol_dcb_data rroce_dcb_data; struct protocol_dcb_data iwarp_dcb_data; __le16 mf_vlan; - __le16 reserved2; + __le16 reserved; struct pf_update_tunnel_config tunnel_config; }; @@ -766,10 +1033,14 @@ enum protocol_version_array_key { MAX_PROTOCOL_VERSION_ARRAY_KEY }; -/* Pstorm non-triggering VF zone */ +struct rdma_sent_stats { + struct regpair sent_bytes; + struct regpair sent_pkts; +}; + struct pstorm_non_trigger_vf_zone { struct eth_pstorm_per_queue_stat eth_queue_stat; - struct regpair reserved[2]; + struct rdma_sent_stats rdma_stats; }; /* Pstorm VF zone */ @@ -786,7 +1057,11 @@ struct ramrod_header { __le16 echo; }; -/* Slowpath Element (SPQE) */ +struct rdma_rcv_stats { + struct regpair rcv_bytes; + struct regpair rcv_pkts; +}; + struct slow_path_element { struct ramrod_header hdr; struct regpair data_ptr; @@ -794,7 +1069,7 @@ struct slow_path_element { /* Tstorm non-triggering VF zone */ struct tstorm_non_trigger_vf_zone { - struct regpair reserved[2]; + struct rdma_rcv_stats rdma_stats; }; struct tstorm_per_port_stat { @@ -802,9 +1077,14 @@ struct tstorm_per_port_stat { struct regpair mac_error_discard; struct regpair mftag_filter_discard; struct regpair eth_mac_filter_discard; - struct regpair reserved[5]; + struct regpair ll2_mac_filter_discard; + struct regpair ll2_conn_disabled_discard; + struct regpair iscsi_irregular_pkt; + struct regpair reserved; + struct regpair roce_irregular_pkt; struct regpair eth_irregular_pkt; - struct regpair reserved1[2]; + struct regpair reserved1; + struct regpair preroce_irregular_pkt; struct regpair eth_gre_tunn_filter_discard; struct regpair eth_vxlan_tunn_filter_discard; struct regpair eth_geneve_tunn_filter_discard; @@ -870,7 +1150,13 @@ struct vf_stop_ramrod_data { __le32 reserved2; }; -/* Attentions status block */ +enum vf_zone_size_mode { + VF_ZONE_SIZE_MODE_DEFAULT, + VF_ZONE_SIZE_MODE_DOUBLE, + VF_ZONE_SIZE_MODE_QUAD, + MAX_VF_ZONE_SIZE_MODE +}; + struct atten_status_block { __le32 atten_bits; __le32 atten_ack; @@ -1579,6 +1865,7 @@ enum dbg_status { DBG_STATUS_REG_FIFO_BAD_DATA, DBG_STATUS_PROTECTION_OVERRIDE_BAD_DATA, DBG_STATUS_DBG_ARRAY_NOT_SET, + DBG_STATUS_MULTI_BLOCKS_WITH_FILTER, MAX_DBG_STATUS }; @@ -1589,7 +1876,41 @@ enum dbg_status { /* Number of VLAN priorities */ #define NUM_OF_VLAN_PRIORITIES 8 -/* QM per-port init parameters */ +struct init_brb_ram_req { + __le32 guranteed_per_tc; + __le32 headroom_per_tc; + __le32 min_pkt_size; + __le32 max_ports_per_engine; + u8 num_active_tcs[MAX_NUM_PORTS]; +}; + +struct init_ets_tc_req { + u8 use_sp; + u8 use_wfq; + __le16 weight; +}; + +struct init_ets_req { + __le32 mtu; + struct init_ets_tc_req tc_req[NUM_OF_TCS]; +}; + +struct init_nig_lb_rl_req { + __le16 lb_mac_rate; + __le16 lb_rate; + __le32 mtu; + __le16 tc_rate[NUM_OF_PHYS_TCS]; +}; + +struct init_nig_pri_tc_map_entry { + u8 tc_id; + u8 valid; +}; + +struct init_nig_pri_tc_map_req { + struct init_nig_pri_tc_map_entry pri[NUM_OF_VLAN_PRIORITIES]; +}; + struct init_qm_port_params { u8 active; u8 active_phys_tcs; @@ -1619,7 +1940,7 @@ struct init_qm_vport_params { /* Width of GRC address in bits (addresses are specified in dwords) */ #define GRC_ADDR_BITS 23 -#define MAX_GRC_ADDR ((1 << GRC_ADDR_BITS) - 1) +#define MAX_GRC_ADDR (BIT(GRC_ADDR_BITS) - 1) /* indicates an init that should be applied to any phase ID */ #define ANY_PHASE_ID 0xffff @@ -1674,11 +1995,11 @@ struct bin_buffer_hdr { /* binary init buffer types */ enum bin_init_buffer_type { - BIN_BUF_FW_VER_INFO, + BIN_BUF_INIT_FW_VER_INFO, BIN_BUF_INIT_CMD, BIN_BUF_INIT_VAL, BIN_BUF_INIT_MODE_TREE, - BIN_BUF_IRO, + BIN_BUF_INIT_IRO, MAX_BIN_INIT_BUFFER_TYPE }; @@ -1918,44 +2239,34 @@ enum dbg_status qed_dbg_print_attn(struct qed_hwfn *p_hwfn, #define MAX_NAME_LEN 16 /* Win 2 */ -#define GTT_BAR0_MAP_REG_IGU_CMD \ - 0x00f000UL +#define GTT_BAR0_MAP_REG_IGU_CMD 0x00f000UL /* Win 3 */ -#define GTT_BAR0_MAP_REG_TSDM_RAM \ - 0x010000UL +#define GTT_BAR0_MAP_REG_TSDM_RAM 0x010000UL /* Win 4 */ -#define GTT_BAR0_MAP_REG_MSDM_RAM \ - 0x011000UL +#define GTT_BAR0_MAP_REG_MSDM_RAM 0x011000UL /* Win 5 */ -#define GTT_BAR0_MAP_REG_MSDM_RAM_1024 \ - 0x012000UL +#define GTT_BAR0_MAP_REG_MSDM_RAM_1024 0x012000UL /* Win 6 */ -#define GTT_BAR0_MAP_REG_USDM_RAM \ - 0x013000UL +#define GTT_BAR0_MAP_REG_USDM_RAM 0x013000UL /* Win 7 */ -#define GTT_BAR0_MAP_REG_USDM_RAM_1024 \ - 0x014000UL +#define GTT_BAR0_MAP_REG_USDM_RAM_1024 0x014000UL /* Win 8 */ -#define GTT_BAR0_MAP_REG_USDM_RAM_2048 \ - 0x015000UL +#define GTT_BAR0_MAP_REG_USDM_RAM_2048 0x015000UL /* Win 9 */ -#define GTT_BAR0_MAP_REG_XSDM_RAM \ - 0x016000UL +#define GTT_BAR0_MAP_REG_XSDM_RAM 0x016000UL /* Win 10 */ -#define GTT_BAR0_MAP_REG_YSDM_RAM \ - 0x017000UL +#define GTT_BAR0_MAP_REG_YSDM_RAM 0x017000UL /* Win 11 */ -#define GTT_BAR0_MAP_REG_PSDM_RAM \ - 0x018000UL +#define GTT_BAR0_MAP_REG_PSDM_RAM 0x018000UL /** * @brief qed_qm_pf_mem_size - prepare QM ILT sizes @@ -2003,7 +2314,7 @@ struct qed_qm_pf_rt_init_params { u16 num_vf_pqs; u8 start_vport; u8 num_vports; - u8 pf_wfq; + u16 pf_wfq; u32 pf_rl; struct init_qm_pq_params *pq_params; struct init_qm_vport_params *vport_params; @@ -2138,6 +2449,9 @@ void qed_set_geneve_enable(struct qed_hwfn *p_hwfn, #define TSTORM_PORT_STAT_OFFSET(port_id) \ (IRO[1].base + ((port_id) * IRO[1].m1)) #define TSTORM_PORT_STAT_SIZE (IRO[1].size) +#define TSTORM_LL2_PORT_STAT_OFFSET(port_id) \ + (IRO[2].base + ((port_id) * IRO[2].m1)) +#define TSTORM_LL2_PORT_STAT_SIZE (IRO[2].size) #define USTORM_VF_PF_CHANNEL_READY_OFFSET(vf_id) \ (IRO[3].base + ((vf_id) * IRO[3].m1)) #define USTORM_VF_PF_CHANNEL_READY_SIZE (IRO[3].size) @@ -2153,42 +2467,90 @@ void qed_set_geneve_enable(struct qed_hwfn *p_hwfn, #define USTORM_COMMON_QUEUE_CONS_OFFSET(queue_zone_id) \ (IRO[7].base + ((queue_zone_id) * IRO[7].m1)) #define USTORM_COMMON_QUEUE_CONS_SIZE (IRO[7].size) +#define TSTORM_LL2_RX_PRODS_OFFSET(core_rx_queue_id) \ + (IRO[14].base + ((core_rx_queue_id) * IRO[14].m1)) +#define TSTORM_LL2_RX_PRODS_SIZE (IRO[14].size) +#define CORE_LL2_TSTORM_PER_QUEUE_STAT_OFFSET(core_rx_queue_id) \ + (IRO[15].base + ((core_rx_queue_id) * IRO[15].m1)) +#define CORE_LL2_TSTORM_PER_QUEUE_STAT_SIZE (IRO[15].size) +#define CORE_LL2_USTORM_PER_QUEUE_STAT_OFFSET(core_rx_queue_id) \ + (IRO[16].base + ((core_rx_queue_id) * IRO[16].m1)) +#define CORE_LL2_USTORM_PER_QUEUE_STAT_SIZE (IRO[16].size) +#define CORE_LL2_PSTORM_PER_QUEUE_STAT_OFFSET(core_tx_stats_id) \ + (IRO[17].base + ((core_tx_stats_id) * IRO[17].m1)) +#define CORE_LL2_PSTORM_PER_QUEUE_STAT_SIZE (IRO[17]. size) #define MSTORM_QUEUE_STAT_OFFSET(stat_counter_id) \ (IRO[18].base + ((stat_counter_id) * IRO[18].m1)) #define MSTORM_QUEUE_STAT_SIZE (IRO[18].size) #define MSTORM_ETH_PF_PRODS_OFFSET(queue_id) \ (IRO[19].base + ((queue_id) * IRO[19].m1)) #define MSTORM_ETH_PF_PRODS_SIZE (IRO[19].size) -#define MSTORM_TPA_TIMEOUT_US_OFFSET (IRO[20].base) -#define MSTORM_TPA_TIMEOUT_US_SIZE (IRO[20].size) +#define MSTORM_ETH_VF_PRODS_OFFSET(vf_id, vf_queue_id) \ + (IRO[20].base + ((vf_id) * IRO[20].m1) + ((vf_queue_id) * IRO[20].m2)) +#define MSTORM_ETH_VF_PRODS_SIZE (IRO[20].size) +#define MSTORM_TPA_TIMEOUT_US_OFFSET (IRO[21].base) +#define MSTORM_TPA_TIMEOUT_US_SIZE (IRO[21].size) #define MSTORM_ETH_PF_STAT_OFFSET(pf_id) \ - (IRO[21].base + ((pf_id) * IRO[21].m1)) + (IRO[22].base + ((pf_id) * IRO[22].m1)) #define MSTORM_ETH_PF_STAT_SIZE (IRO[21].size) #define USTORM_QUEUE_STAT_OFFSET(stat_counter_id) \ - (IRO[22].base + ((stat_counter_id) * IRO[22].m1)) -#define USTORM_QUEUE_STAT_SIZE (IRO[22].size) + (IRO[23].base + ((stat_counter_id) * IRO[23].m1)) +#define USTORM_QUEUE_STAT_SIZE (IRO[23].size) #define USTORM_ETH_PF_STAT_OFFSET(pf_id) \ - (IRO[23].base + ((pf_id) * IRO[23].m1)) -#define USTORM_ETH_PF_STAT_SIZE (IRO[23].size) + (IRO[24].base + ((pf_id) * IRO[24].m1)) +#define USTORM_ETH_PF_STAT_SIZE (IRO[24].size) #define PSTORM_QUEUE_STAT_OFFSET(stat_counter_id) \ - (IRO[24].base + ((stat_counter_id) * IRO[24].m1)) -#define PSTORM_QUEUE_STAT_SIZE (IRO[24].size) + (IRO[25].base + ((stat_counter_id) * IRO[25].m1)) +#define PSTORM_QUEUE_STAT_SIZE (IRO[25].size) #define PSTORM_ETH_PF_STAT_OFFSET(pf_id) \ - (IRO[25].base + ((pf_id) * IRO[25].m1)) -#define PSTORM_ETH_PF_STAT_SIZE (IRO[25].size) + (IRO[26].base + ((pf_id) * IRO[26].m1)) +#define PSTORM_ETH_PF_STAT_SIZE (IRO[26].size) #define PSTORM_CTL_FRAME_ETHTYPE_OFFSET(ethtype) \ - (IRO[26].base + ((ethtype) * IRO[26].m1)) -#define PSTORM_CTL_FRAME_ETHTYPE_SIZE (IRO[26].size) -#define TSTORM_ETH_PRS_INPUT_OFFSET (IRO[27].base) -#define TSTORM_ETH_PRS_INPUT_SIZE (IRO[27].size) + (IRO[27].base + ((ethtype) * IRO[27].m1)) +#define PSTORM_CTL_FRAME_ETHTYPE_SIZE (IRO[27].size) +#define TSTORM_ETH_PRS_INPUT_OFFSET (IRO[28].base) +#define TSTORM_ETH_PRS_INPUT_SIZE (IRO[28].size) #define ETH_RX_RATE_LIMIT_OFFSET(pf_id) \ - (IRO[28].base + ((pf_id) * IRO[28].m1)) -#define ETH_RX_RATE_LIMIT_SIZE (IRO[28].size) + (IRO[29].base + ((pf_id) * IRO[29].m1)) +#define ETH_RX_RATE_LIMIT_SIZE (IRO[29].size) #define XSTORM_ETH_QUEUE_ZONE_OFFSET(queue_id) \ - (IRO[29].base + ((queue_id) * IRO[29].m1)) -#define XSTORM_ETH_QUEUE_ZONE_SIZE (IRO[29].size) + (IRO[30].base + ((queue_id) * IRO[30].m1)) +#define XSTORM_ETH_QUEUE_ZONE_SIZE (IRO[30].size) +#define TSTORM_SCSI_CMDQ_CONS_OFFSET(cmdq_queue_id) \ + (IRO[34].base + ((cmdq_queue_id) * IRO[34].m1)) +#define TSTORM_SCSI_CMDQ_CONS_SIZE (IRO[34].size) +#define TSTORM_SCSI_BDQ_EXT_PROD_OFFSET(func_id, bdq_id) \ + (IRO[35].base + ((func_id) * IRO[35].m1) + ((bdq_id) * IRO[35].m2)) +#define TSTORM_SCSI_BDQ_EXT_PROD_SIZE (IRO[35].size) +#define MSTORM_SCSI_BDQ_EXT_PROD_OFFSET(func_id, bdq_id) \ + (IRO[36].base + ((func_id) * IRO[36].m1) + ((bdq_id) * IRO[36].m2)) +#define MSTORM_SCSI_BDQ_EXT_PROD_SIZE (IRO[36].size) +#define TSTORM_ISCSI_RX_STATS_OFFSET(pf_id) \ + (IRO[37].base + ((pf_id) * IRO[37].m1)) +#define TSTORM_ISCSI_RX_STATS_SIZE (IRO[37].size) +#define MSTORM_ISCSI_RX_STATS_OFFSET(pf_id) \ + (IRO[38].base + ((pf_id) * IRO[38].m1)) +#define MSTORM_ISCSI_RX_STATS_SIZE (IRO[38].size) +#define USTORM_ISCSI_RX_STATS_OFFSET(pf_id) \ + (IRO[39].base + ((pf_id) * IRO[39].m1)) +#define USTORM_ISCSI_RX_STATS_SIZE (IRO[39].size) +#define XSTORM_ISCSI_TX_STATS_OFFSET(pf_id) \ + (IRO[40].base + ((pf_id) * IRO[40].m1)) +#define XSTORM_ISCSI_TX_STATS_SIZE (IRO[40].size) +#define YSTORM_ISCSI_TX_STATS_OFFSET(pf_id) \ + (IRO[41].base + ((pf_id) * IRO[41].m1)) +#define YSTORM_ISCSI_TX_STATS_SIZE (IRO[41].size) +#define PSTORM_ISCSI_TX_STATS_OFFSET(pf_id) \ + (IRO[42].base + ((pf_id) * IRO[42].m1)) +#define PSTORM_ISCSI_TX_STATS_SIZE (IRO[42].size) +#define PSTORM_RDMA_QUEUE_STAT_OFFSET(rdma_stat_counter_id) \ + (IRO[45].base + ((rdma_stat_counter_id) * IRO[45].m1)) +#define PSTORM_RDMA_QUEUE_STAT_SIZE (IRO[45].size) +#define TSTORM_RDMA_QUEUE_STAT_OFFSET(rdma_stat_counter_id) \ + (IRO[46].base + ((rdma_stat_counter_id) * IRO[46].m1)) +#define TSTORM_RDMA_QUEUE_STAT_SIZE (IRO[46].size) -static const struct iro iro_arr[46] = { +static const struct iro iro_arr[47] = { {0x0, 0x0, 0x0, 0x0, 0x8}, {0x4cb0, 0x78, 0x0, 0x0, 0x78}, {0x6318, 0x20, 0x0, 0x0, 0x20}, @@ -2201,20 +2563,21 @@ static const struct iro iro_arr[46] = { {0x3df0, 0x0, 0x0, 0x0, 0x78}, {0x29b0, 0x0, 0x0, 0x0, 0x78}, {0x4c38, 0x0, 0x0, 0x0, 0x78}, - {0x4a48, 0x0, 0x0, 0x0, 0x78}, + {0x4990, 0x0, 0x0, 0x0, 0x78}, {0x7e48, 0x0, 0x0, 0x0, 0x78}, {0xa28, 0x8, 0x0, 0x0, 0x8}, {0x60f8, 0x10, 0x0, 0x0, 0x10}, {0xb820, 0x30, 0x0, 0x0, 0x30}, {0x95b8, 0x30, 0x0, 0x0, 0x30}, - {0x4c18, 0x80, 0x0, 0x0, 0x40}, + {0x4b60, 0x80, 0x0, 0x0, 0x40}, {0x1f8, 0x4, 0x0, 0x0, 0x4}, - {0xc9a8, 0x0, 0x0, 0x0, 0x4}, - {0x4c58, 0x80, 0x0, 0x0, 0x20}, + {0x53a0, 0x80, 0x4, 0x0, 0x4}, + {0xc8f0, 0x0, 0x0, 0x0, 0x4}, + {0x4ba0, 0x80, 0x0, 0x0, 0x20}, {0x8050, 0x40, 0x0, 0x0, 0x30}, {0xe770, 0x60, 0x0, 0x0, 0x60}, {0x2b48, 0x80, 0x0, 0x0, 0x38}, - {0xdf88, 0x78, 0x0, 0x0, 0x78}, + {0xf188, 0x78, 0x0, 0x0, 0x78}, {0x1f8, 0x4, 0x0, 0x0, 0x4}, {0xacf0, 0x0, 0x0, 0x0, 0xf0}, {0xade0, 0x8, 0x0, 0x0, 0x8}, @@ -2226,455 +2589,457 @@ static const struct iro iro_arr[46] = { {0x200, 0x10, 0x8, 0x0, 0x8}, {0xb78, 0x10, 0x8, 0x0, 0x2}, {0xd888, 0x38, 0x0, 0x0, 0x24}, - {0x12120, 0x10, 0x0, 0x0, 0x8}, - {0x11b20, 0x38, 0x0, 0x0, 0x18}, + {0x12c38, 0x10, 0x0, 0x0, 0x8}, + {0x11aa0, 0x38, 0x0, 0x0, 0x18}, {0xa8c0, 0x30, 0x0, 0x0, 0x10}, {0x86f8, 0x28, 0x0, 0x0, 0x18}, - {0xeff8, 0x10, 0x0, 0x0, 0x10}, + {0x101f8, 0x10, 0x0, 0x0, 0x10}, {0xdd08, 0x48, 0x0, 0x0, 0x38}, - {0xf460, 0x20, 0x0, 0x0, 0x20}, + {0x10660, 0x20, 0x0, 0x0, 0x20}, {0x2b80, 0x80, 0x0, 0x0, 0x10}, {0x5000, 0x10, 0x0, 0x0, 0x10}, }; /* Runtime array offsets */ -#define DORQ_REG_PF_MAX_ICID_0_RT_OFFSET 0 -#define DORQ_REG_PF_MAX_ICID_1_RT_OFFSET 1 -#define DORQ_REG_PF_MAX_ICID_2_RT_OFFSET 2 -#define DORQ_REG_PF_MAX_ICID_3_RT_OFFSET 3 -#define DORQ_REG_PF_MAX_ICID_4_RT_OFFSET 4 -#define DORQ_REG_PF_MAX_ICID_5_RT_OFFSET 5 -#define DORQ_REG_PF_MAX_ICID_6_RT_OFFSET 6 -#define DORQ_REG_PF_MAX_ICID_7_RT_OFFSET 7 -#define DORQ_REG_VF_MAX_ICID_0_RT_OFFSET 8 -#define DORQ_REG_VF_MAX_ICID_1_RT_OFFSET 9 -#define DORQ_REG_VF_MAX_ICID_2_RT_OFFSET 10 -#define DORQ_REG_VF_MAX_ICID_3_RT_OFFSET 11 -#define DORQ_REG_VF_MAX_ICID_4_RT_OFFSET 12 -#define DORQ_REG_VF_MAX_ICID_5_RT_OFFSET 13 -#define DORQ_REG_VF_MAX_ICID_6_RT_OFFSET 14 -#define DORQ_REG_VF_MAX_ICID_7_RT_OFFSET 15 -#define DORQ_REG_PF_WAKE_ALL_RT_OFFSET 16 -#define DORQ_REG_TAG1_ETHERTYPE_RT_OFFSET 17 -#define IGU_REG_PF_CONFIGURATION_RT_OFFSET 18 -#define IGU_REG_VF_CONFIGURATION_RT_OFFSET 19 -#define IGU_REG_ATTN_MSG_ADDR_L_RT_OFFSET 20 -#define IGU_REG_ATTN_MSG_ADDR_H_RT_OFFSET 21 -#define IGU_REG_LEADING_EDGE_LATCH_RT_OFFSET 22 -#define IGU_REG_TRAILING_EDGE_LATCH_RT_OFFSET 23 -#define CAU_REG_CQE_AGG_UNIT_SIZE_RT_OFFSET 24 -#define CAU_REG_SB_VAR_MEMORY_RT_OFFSET 761 -#define CAU_REG_SB_VAR_MEMORY_RT_SIZE 736 -#define CAU_REG_SB_VAR_MEMORY_RT_OFFSET 761 -#define CAU_REG_SB_VAR_MEMORY_RT_SIZE 736 -#define CAU_REG_SB_ADDR_MEMORY_RT_OFFSET 1497 -#define CAU_REG_SB_ADDR_MEMORY_RT_SIZE 736 -#define CAU_REG_PI_MEMORY_RT_OFFSET 2233 -#define CAU_REG_PI_MEMORY_RT_SIZE 4416 -#define PRS_REG_SEARCH_RESP_INITIATOR_TYPE_RT_OFFSET 6649 -#define PRS_REG_TASK_ID_MAX_INITIATOR_PF_RT_OFFSET 6650 -#define PRS_REG_TASK_ID_MAX_INITIATOR_VF_RT_OFFSET 6651 -#define PRS_REG_TASK_ID_MAX_TARGET_PF_RT_OFFSET 6652 -#define PRS_REG_TASK_ID_MAX_TARGET_VF_RT_OFFSET 6653 -#define PRS_REG_SEARCH_TCP_RT_OFFSET 6654 -#define PRS_REG_SEARCH_FCOE_RT_OFFSET 6655 -#define PRS_REG_SEARCH_ROCE_RT_OFFSET 6656 -#define PRS_REG_ROCE_DEST_QP_MAX_VF_RT_OFFSET 6657 -#define PRS_REG_ROCE_DEST_QP_MAX_PF_RT_OFFSET 6658 -#define PRS_REG_SEARCH_OPENFLOW_RT_OFFSET 6659 -#define PRS_REG_SEARCH_NON_IP_AS_OPENFLOW_RT_OFFSET 6660 -#define PRS_REG_OPENFLOW_SUPPORT_ONLY_KNOWN_OVER_IP_RT_OFFSET 6661 -#define PRS_REG_OPENFLOW_SEARCH_KEY_MASK_RT_OFFSET 6662 -#define PRS_REG_TAG_ETHERTYPE_0_RT_OFFSET 6663 -#define PRS_REG_LIGHT_L2_ETHERTYPE_EN_RT_OFFSET 6664 -#define SRC_REG_FIRSTFREE_RT_OFFSET 6665 -#define SRC_REG_FIRSTFREE_RT_SIZE 2 -#define SRC_REG_LASTFREE_RT_OFFSET 6667 -#define SRC_REG_LASTFREE_RT_SIZE 2 -#define SRC_REG_COUNTFREE_RT_OFFSET 6669 -#define SRC_REG_NUMBER_HASH_BITS_RT_OFFSET 6670 -#define PSWRQ2_REG_CDUT_P_SIZE_RT_OFFSET 6671 -#define PSWRQ2_REG_CDUC_P_SIZE_RT_OFFSET 6672 -#define PSWRQ2_REG_TM_P_SIZE_RT_OFFSET 6673 -#define PSWRQ2_REG_QM_P_SIZE_RT_OFFSET 6674 -#define PSWRQ2_REG_SRC_P_SIZE_RT_OFFSET 6675 -#define PSWRQ2_REG_TSDM_P_SIZE_RT_OFFSET 6676 -#define PSWRQ2_REG_TM_FIRST_ILT_RT_OFFSET 6677 -#define PSWRQ2_REG_TM_LAST_ILT_RT_OFFSET 6678 -#define PSWRQ2_REG_QM_FIRST_ILT_RT_OFFSET 6679 -#define PSWRQ2_REG_QM_LAST_ILT_RT_OFFSET 6680 -#define PSWRQ2_REG_SRC_FIRST_ILT_RT_OFFSET 6681 -#define PSWRQ2_REG_SRC_LAST_ILT_RT_OFFSET 6682 -#define PSWRQ2_REG_CDUC_FIRST_ILT_RT_OFFSET 6683 -#define PSWRQ2_REG_CDUC_LAST_ILT_RT_OFFSET 6684 -#define PSWRQ2_REG_CDUT_FIRST_ILT_RT_OFFSET 6685 -#define PSWRQ2_REG_CDUT_LAST_ILT_RT_OFFSET 6686 -#define PSWRQ2_REG_TSDM_FIRST_ILT_RT_OFFSET 6687 -#define PSWRQ2_REG_TSDM_LAST_ILT_RT_OFFSET 6688 -#define PSWRQ2_REG_TM_NUMBER_OF_PF_BLOCKS_RT_OFFSET 6689 -#define PSWRQ2_REG_CDUT_NUMBER_OF_PF_BLOCKS_RT_OFFSET 6690 -#define PSWRQ2_REG_CDUC_NUMBER_OF_PF_BLOCKS_RT_OFFSET 6691 -#define PSWRQ2_REG_TM_VF_BLOCKS_RT_OFFSET 6692 -#define PSWRQ2_REG_CDUT_VF_BLOCKS_RT_OFFSET 6693 -#define PSWRQ2_REG_CDUC_VF_BLOCKS_RT_OFFSET 6694 -#define PSWRQ2_REG_TM_BLOCKS_FACTOR_RT_OFFSET 6695 -#define PSWRQ2_REG_CDUT_BLOCKS_FACTOR_RT_OFFSET 6696 -#define PSWRQ2_REG_CDUC_BLOCKS_FACTOR_RT_OFFSET 6697 -#define PSWRQ2_REG_VF_BASE_RT_OFFSET 6698 -#define PSWRQ2_REG_VF_LAST_ILT_RT_OFFSET 6699 -#define PSWRQ2_REG_WR_MBS0_RT_OFFSET 6700 -#define PSWRQ2_REG_RD_MBS0_RT_OFFSET 6701 -#define PSWRQ2_REG_DRAM_ALIGN_WR_RT_OFFSET 6702 -#define PSWRQ2_REG_DRAM_ALIGN_RD_RT_OFFSET 6703 -#define PSWRQ2_REG_ILT_MEMORY_RT_OFFSET 6704 -#define PSWRQ2_REG_ILT_MEMORY_RT_SIZE 22000 -#define PGLUE_REG_B_VF_BASE_RT_OFFSET 28704 -#define PGLUE_REG_B_CACHE_LINE_SIZE_RT_OFFSET 28705 -#define PGLUE_REG_B_PF_BAR0_SIZE_RT_OFFSET 28706 -#define PGLUE_REG_B_PF_BAR1_SIZE_RT_OFFSET 28707 -#define PGLUE_REG_B_VF_BAR1_SIZE_RT_OFFSET 28708 -#define TM_REG_VF_ENABLE_CONN_RT_OFFSET 28709 -#define TM_REG_PF_ENABLE_CONN_RT_OFFSET 28710 -#define TM_REG_PF_ENABLE_TASK_RT_OFFSET 28711 -#define TM_REG_GROUP_SIZE_RESOLUTION_CONN_RT_OFFSET 28712 -#define TM_REG_GROUP_SIZE_RESOLUTION_TASK_RT_OFFSET 28713 -#define TM_REG_CONFIG_CONN_MEM_RT_OFFSET 28714 -#define TM_REG_CONFIG_CONN_MEM_RT_SIZE 416 -#define TM_REG_CONFIG_TASK_MEM_RT_OFFSET 29130 -#define TM_REG_CONFIG_TASK_MEM_RT_SIZE 512 -#define QM_REG_MAXPQSIZE_0_RT_OFFSET 29642 -#define QM_REG_MAXPQSIZE_1_RT_OFFSET 29643 -#define QM_REG_MAXPQSIZE_2_RT_OFFSET 29644 -#define QM_REG_MAXPQSIZETXSEL_0_RT_OFFSET 29645 -#define QM_REG_MAXPQSIZETXSEL_1_RT_OFFSET 29646 -#define QM_REG_MAXPQSIZETXSEL_2_RT_OFFSET 29647 -#define QM_REG_MAXPQSIZETXSEL_3_RT_OFFSET 29648 -#define QM_REG_MAXPQSIZETXSEL_4_RT_OFFSET 29649 -#define QM_REG_MAXPQSIZETXSEL_5_RT_OFFSET 29650 -#define QM_REG_MAXPQSIZETXSEL_6_RT_OFFSET 29651 -#define QM_REG_MAXPQSIZETXSEL_7_RT_OFFSET 29652 -#define QM_REG_MAXPQSIZETXSEL_8_RT_OFFSET 29653 -#define QM_REG_MAXPQSIZETXSEL_9_RT_OFFSET 29654 -#define QM_REG_MAXPQSIZETXSEL_10_RT_OFFSET 29655 -#define QM_REG_MAXPQSIZETXSEL_11_RT_OFFSET 29656 -#define QM_REG_MAXPQSIZETXSEL_12_RT_OFFSET 29657 -#define QM_REG_MAXPQSIZETXSEL_13_RT_OFFSET 29658 -#define QM_REG_MAXPQSIZETXSEL_14_RT_OFFSET 29659 -#define QM_REG_MAXPQSIZETXSEL_15_RT_OFFSET 29660 -#define QM_REG_MAXPQSIZETXSEL_16_RT_OFFSET 29661 -#define QM_REG_MAXPQSIZETXSEL_17_RT_OFFSET 29662 -#define QM_REG_MAXPQSIZETXSEL_18_RT_OFFSET 29663 -#define QM_REG_MAXPQSIZETXSEL_19_RT_OFFSET 29664 -#define QM_REG_MAXPQSIZETXSEL_20_RT_OFFSET 29665 -#define QM_REG_MAXPQSIZETXSEL_21_RT_OFFSET 29666 -#define QM_REG_MAXPQSIZETXSEL_22_RT_OFFSET 29667 -#define QM_REG_MAXPQSIZETXSEL_23_RT_OFFSET 29668 -#define QM_REG_MAXPQSIZETXSEL_24_RT_OFFSET 29669 -#define QM_REG_MAXPQSIZETXSEL_25_RT_OFFSET 29670 -#define QM_REG_MAXPQSIZETXSEL_26_RT_OFFSET 29671 -#define QM_REG_MAXPQSIZETXSEL_27_RT_OFFSET 29672 -#define QM_REG_MAXPQSIZETXSEL_28_RT_OFFSET 29673 -#define QM_REG_MAXPQSIZETXSEL_29_RT_OFFSET 29674 -#define QM_REG_MAXPQSIZETXSEL_30_RT_OFFSET 29675 -#define QM_REG_MAXPQSIZETXSEL_31_RT_OFFSET 29676 -#define QM_REG_MAXPQSIZETXSEL_32_RT_OFFSET 29677 -#define QM_REG_MAXPQSIZETXSEL_33_RT_OFFSET 29678 -#define QM_REG_MAXPQSIZETXSEL_34_RT_OFFSET 29679 -#define QM_REG_MAXPQSIZETXSEL_35_RT_OFFSET 29680 -#define QM_REG_MAXPQSIZETXSEL_36_RT_OFFSET 29681 -#define QM_REG_MAXPQSIZETXSEL_37_RT_OFFSET 29682 -#define QM_REG_MAXPQSIZETXSEL_38_RT_OFFSET 29683 -#define QM_REG_MAXPQSIZETXSEL_39_RT_OFFSET 29684 -#define QM_REG_MAXPQSIZETXSEL_40_RT_OFFSET 29685 -#define QM_REG_MAXPQSIZETXSEL_41_RT_OFFSET 29686 -#define QM_REG_MAXPQSIZETXSEL_42_RT_OFFSET 29687 -#define QM_REG_MAXPQSIZETXSEL_43_RT_OFFSET 29688 -#define QM_REG_MAXPQSIZETXSEL_44_RT_OFFSET 29689 -#define QM_REG_MAXPQSIZETXSEL_45_RT_OFFSET 29690 -#define QM_REG_MAXPQSIZETXSEL_46_RT_OFFSET 29691 -#define QM_REG_MAXPQSIZETXSEL_47_RT_OFFSET 29692 -#define QM_REG_MAXPQSIZETXSEL_48_RT_OFFSET 29693 -#define QM_REG_MAXPQSIZETXSEL_49_RT_OFFSET 29694 -#define QM_REG_MAXPQSIZETXSEL_50_RT_OFFSET 29695 -#define QM_REG_MAXPQSIZETXSEL_51_RT_OFFSET 29696 -#define QM_REG_MAXPQSIZETXSEL_52_RT_OFFSET 29697 -#define QM_REG_MAXPQSIZETXSEL_53_RT_OFFSET 29698 -#define QM_REG_MAXPQSIZETXSEL_54_RT_OFFSET 29699 -#define QM_REG_MAXPQSIZETXSEL_55_RT_OFFSET 29700 -#define QM_REG_MAXPQSIZETXSEL_56_RT_OFFSET 29701 -#define QM_REG_MAXPQSIZETXSEL_57_RT_OFFSET 29702 -#define QM_REG_MAXPQSIZETXSEL_58_RT_OFFSET 29703 -#define QM_REG_MAXPQSIZETXSEL_59_RT_OFFSET 29704 -#define QM_REG_MAXPQSIZETXSEL_60_RT_OFFSET 29705 -#define QM_REG_MAXPQSIZETXSEL_61_RT_OFFSET 29706 -#define QM_REG_MAXPQSIZETXSEL_62_RT_OFFSET 29707 -#define QM_REG_MAXPQSIZETXSEL_63_RT_OFFSET 29708 -#define QM_REG_BASEADDROTHERPQ_RT_OFFSET 29709 -#define QM_REG_BASEADDROTHERPQ_RT_SIZE 128 -#define QM_REG_VOQCRDLINE_RT_OFFSET 29837 -#define QM_REG_VOQCRDLINE_RT_SIZE 20 -#define QM_REG_VOQINITCRDLINE_RT_OFFSET 29857 -#define QM_REG_VOQINITCRDLINE_RT_SIZE 20 -#define QM_REG_AFULLQMBYPTHRPFWFQ_RT_OFFSET 29877 -#define QM_REG_AFULLQMBYPTHRVPWFQ_RT_OFFSET 29878 -#define QM_REG_AFULLQMBYPTHRPFRL_RT_OFFSET 29879 -#define QM_REG_AFULLQMBYPTHRGLBLRL_RT_OFFSET 29880 -#define QM_REG_AFULLOPRTNSTCCRDMASK_RT_OFFSET 29881 -#define QM_REG_WRROTHERPQGRP_0_RT_OFFSET 29882 -#define QM_REG_WRROTHERPQGRP_1_RT_OFFSET 29883 -#define QM_REG_WRROTHERPQGRP_2_RT_OFFSET 29884 -#define QM_REG_WRROTHERPQGRP_3_RT_OFFSET 29885 -#define QM_REG_WRROTHERPQGRP_4_RT_OFFSET 29886 -#define QM_REG_WRROTHERPQGRP_5_RT_OFFSET 29887 -#define QM_REG_WRROTHERPQGRP_6_RT_OFFSET 29888 -#define QM_REG_WRROTHERPQGRP_7_RT_OFFSET 29889 -#define QM_REG_WRROTHERPQGRP_8_RT_OFFSET 29890 -#define QM_REG_WRROTHERPQGRP_9_RT_OFFSET 29891 -#define QM_REG_WRROTHERPQGRP_10_RT_OFFSET 29892 -#define QM_REG_WRROTHERPQGRP_11_RT_OFFSET 29893 -#define QM_REG_WRROTHERPQGRP_12_RT_OFFSET 29894 -#define QM_REG_WRROTHERPQGRP_13_RT_OFFSET 29895 -#define QM_REG_WRROTHERPQGRP_14_RT_OFFSET 29896 -#define QM_REG_WRROTHERPQGRP_15_RT_OFFSET 29897 -#define QM_REG_WRROTHERGRPWEIGHT_0_RT_OFFSET 29898 -#define QM_REG_WRROTHERGRPWEIGHT_1_RT_OFFSET 29899 -#define QM_REG_WRROTHERGRPWEIGHT_2_RT_OFFSET 29900 -#define QM_REG_WRROTHERGRPWEIGHT_3_RT_OFFSET 29901 -#define QM_REG_WRRTXGRPWEIGHT_0_RT_OFFSET 29902 -#define QM_REG_WRRTXGRPWEIGHT_1_RT_OFFSET 29903 -#define QM_REG_PQTX2PF_0_RT_OFFSET 29904 -#define QM_REG_PQTX2PF_1_RT_OFFSET 29905 -#define QM_REG_PQTX2PF_2_RT_OFFSET 29906 -#define QM_REG_PQTX2PF_3_RT_OFFSET 29907 -#define QM_REG_PQTX2PF_4_RT_OFFSET 29908 -#define QM_REG_PQTX2PF_5_RT_OFFSET 29909 -#define QM_REG_PQTX2PF_6_RT_OFFSET 29910 -#define QM_REG_PQTX2PF_7_RT_OFFSET 29911 -#define QM_REG_PQTX2PF_8_RT_OFFSET 29912 -#define QM_REG_PQTX2PF_9_RT_OFFSET 29913 -#define QM_REG_PQTX2PF_10_RT_OFFSET 29914 -#define QM_REG_PQTX2PF_11_RT_OFFSET 29915 -#define QM_REG_PQTX2PF_12_RT_OFFSET 29916 -#define QM_REG_PQTX2PF_13_RT_OFFSET 29917 -#define QM_REG_PQTX2PF_14_RT_OFFSET 29918 -#define QM_REG_PQTX2PF_15_RT_OFFSET 29919 -#define QM_REG_PQTX2PF_16_RT_OFFSET 29920 -#define QM_REG_PQTX2PF_17_RT_OFFSET 29921 -#define QM_REG_PQTX2PF_18_RT_OFFSET 29922 -#define QM_REG_PQTX2PF_19_RT_OFFSET 29923 -#define QM_REG_PQTX2PF_20_RT_OFFSET 29924 -#define QM_REG_PQTX2PF_21_RT_OFFSET 29925 -#define QM_REG_PQTX2PF_22_RT_OFFSET 29926 -#define QM_REG_PQTX2PF_23_RT_OFFSET 29927 -#define QM_REG_PQTX2PF_24_RT_OFFSET 29928 -#define QM_REG_PQTX2PF_25_RT_OFFSET 29929 -#define QM_REG_PQTX2PF_26_RT_OFFSET 29930 -#define QM_REG_PQTX2PF_27_RT_OFFSET 29931 -#define QM_REG_PQTX2PF_28_RT_OFFSET 29932 -#define QM_REG_PQTX2PF_29_RT_OFFSET 29933 -#define QM_REG_PQTX2PF_30_RT_OFFSET 29934 -#define QM_REG_PQTX2PF_31_RT_OFFSET 29935 -#define QM_REG_PQTX2PF_32_RT_OFFSET 29936 -#define QM_REG_PQTX2PF_33_RT_OFFSET 29937 -#define QM_REG_PQTX2PF_34_RT_OFFSET 29938 -#define QM_REG_PQTX2PF_35_RT_OFFSET 29939 -#define QM_REG_PQTX2PF_36_RT_OFFSET 29940 -#define QM_REG_PQTX2PF_37_RT_OFFSET 29941 -#define QM_REG_PQTX2PF_38_RT_OFFSET 29942 -#define QM_REG_PQTX2PF_39_RT_OFFSET 29943 -#define QM_REG_PQTX2PF_40_RT_OFFSET 29944 -#define QM_REG_PQTX2PF_41_RT_OFFSET 29945 -#define QM_REG_PQTX2PF_42_RT_OFFSET 29946 -#define QM_REG_PQTX2PF_43_RT_OFFSET 29947 -#define QM_REG_PQTX2PF_44_RT_OFFSET 29948 -#define QM_REG_PQTX2PF_45_RT_OFFSET 29949 -#define QM_REG_PQTX2PF_46_RT_OFFSET 29950 -#define QM_REG_PQTX2PF_47_RT_OFFSET 29951 -#define QM_REG_PQTX2PF_48_RT_OFFSET 29952 -#define QM_REG_PQTX2PF_49_RT_OFFSET 29953 -#define QM_REG_PQTX2PF_50_RT_OFFSET 29954 -#define QM_REG_PQTX2PF_51_RT_OFFSET 29955 -#define QM_REG_PQTX2PF_52_RT_OFFSET 29956 -#define QM_REG_PQTX2PF_53_RT_OFFSET 29957 -#define QM_REG_PQTX2PF_54_RT_OFFSET 29958 -#define QM_REG_PQTX2PF_55_RT_OFFSET 29959 -#define QM_REG_PQTX2PF_56_RT_OFFSET 29960 -#define QM_REG_PQTX2PF_57_RT_OFFSET 29961 -#define QM_REG_PQTX2PF_58_RT_OFFSET 29962 -#define QM_REG_PQTX2PF_59_RT_OFFSET 29963 -#define QM_REG_PQTX2PF_60_RT_OFFSET 29964 -#define QM_REG_PQTX2PF_61_RT_OFFSET 29965 -#define QM_REG_PQTX2PF_62_RT_OFFSET 29966 -#define QM_REG_PQTX2PF_63_RT_OFFSET 29967 -#define QM_REG_PQOTHER2PF_0_RT_OFFSET 29968 -#define QM_REG_PQOTHER2PF_1_RT_OFFSET 29969 -#define QM_REG_PQOTHER2PF_2_RT_OFFSET 29970 -#define QM_REG_PQOTHER2PF_3_RT_OFFSET 29971 -#define QM_REG_PQOTHER2PF_4_RT_OFFSET 29972 -#define QM_REG_PQOTHER2PF_5_RT_OFFSET 29973 -#define QM_REG_PQOTHER2PF_6_RT_OFFSET 29974 -#define QM_REG_PQOTHER2PF_7_RT_OFFSET 29975 -#define QM_REG_PQOTHER2PF_8_RT_OFFSET 29976 -#define QM_REG_PQOTHER2PF_9_RT_OFFSET 29977 -#define QM_REG_PQOTHER2PF_10_RT_OFFSET 29978 -#define QM_REG_PQOTHER2PF_11_RT_OFFSET 29979 -#define QM_REG_PQOTHER2PF_12_RT_OFFSET 29980 -#define QM_REG_PQOTHER2PF_13_RT_OFFSET 29981 -#define QM_REG_PQOTHER2PF_14_RT_OFFSET 29982 -#define QM_REG_PQOTHER2PF_15_RT_OFFSET 29983 -#define QM_REG_RLGLBLPERIOD_0_RT_OFFSET 29984 -#define QM_REG_RLGLBLPERIOD_1_RT_OFFSET 29985 -#define QM_REG_RLGLBLPERIODTIMER_0_RT_OFFSET 29986 -#define QM_REG_RLGLBLPERIODTIMER_1_RT_OFFSET 29987 -#define QM_REG_RLGLBLPERIODSEL_0_RT_OFFSET 29988 -#define QM_REG_RLGLBLPERIODSEL_1_RT_OFFSET 29989 -#define QM_REG_RLGLBLPERIODSEL_2_RT_OFFSET 29990 -#define QM_REG_RLGLBLPERIODSEL_3_RT_OFFSET 29991 -#define QM_REG_RLGLBLPERIODSEL_4_RT_OFFSET 29992 -#define QM_REG_RLGLBLPERIODSEL_5_RT_OFFSET 29993 -#define QM_REG_RLGLBLPERIODSEL_6_RT_OFFSET 29994 -#define QM_REG_RLGLBLPERIODSEL_7_RT_OFFSET 29995 -#define QM_REG_RLGLBLINCVAL_RT_OFFSET 29996 -#define QM_REG_RLGLBLINCVAL_RT_SIZE 256 -#define QM_REG_RLGLBLUPPERBOUND_RT_OFFSET 30252 -#define QM_REG_RLGLBLUPPERBOUND_RT_SIZE 256 -#define QM_REG_RLGLBLCRD_RT_OFFSET 30508 -#define QM_REG_RLGLBLCRD_RT_SIZE 256 -#define QM_REG_RLGLBLENABLE_RT_OFFSET 30764 -#define QM_REG_RLPFPERIOD_RT_OFFSET 30765 -#define QM_REG_RLPFPERIODTIMER_RT_OFFSET 30766 -#define QM_REG_RLPFINCVAL_RT_OFFSET 30767 -#define QM_REG_RLPFINCVAL_RT_SIZE 16 -#define QM_REG_RLPFUPPERBOUND_RT_OFFSET 30783 -#define QM_REG_RLPFUPPERBOUND_RT_SIZE 16 -#define QM_REG_RLPFCRD_RT_OFFSET 30799 -#define QM_REG_RLPFCRD_RT_SIZE 16 -#define QM_REG_RLPFENABLE_RT_OFFSET 30815 -#define QM_REG_RLPFVOQENABLE_RT_OFFSET 30816 -#define QM_REG_WFQPFWEIGHT_RT_OFFSET 30817 -#define QM_REG_WFQPFWEIGHT_RT_SIZE 16 -#define QM_REG_WFQPFUPPERBOUND_RT_OFFSET 30833 -#define QM_REG_WFQPFUPPERBOUND_RT_SIZE 16 -#define QM_REG_WFQPFCRD_RT_OFFSET 30849 -#define QM_REG_WFQPFCRD_RT_SIZE 160 -#define QM_REG_WFQPFENABLE_RT_OFFSET 31009 -#define QM_REG_WFQVPENABLE_RT_OFFSET 31010 -#define QM_REG_BASEADDRTXPQ_RT_OFFSET 31011 -#define QM_REG_BASEADDRTXPQ_RT_SIZE 512 -#define QM_REG_TXPQMAP_RT_OFFSET 31523 -#define QM_REG_TXPQMAP_RT_SIZE 512 -#define QM_REG_WFQVPWEIGHT_RT_OFFSET 32035 -#define QM_REG_WFQVPWEIGHT_RT_SIZE 512 -#define QM_REG_WFQVPCRD_RT_OFFSET 32547 -#define QM_REG_WFQVPCRD_RT_SIZE 512 -#define QM_REG_WFQVPMAP_RT_OFFSET 33059 -#define QM_REG_WFQVPMAP_RT_SIZE 512 -#define QM_REG_WFQPFCRD_MSB_RT_OFFSET 33571 -#define QM_REG_WFQPFCRD_MSB_RT_SIZE 160 -#define NIG_REG_TAG_ETHERTYPE_0_RT_OFFSET 33731 -#define NIG_REG_OUTER_TAG_VALUE_LIST0_RT_OFFSET 33732 -#define NIG_REG_OUTER_TAG_VALUE_LIST1_RT_OFFSET 33733 -#define NIG_REG_OUTER_TAG_VALUE_LIST2_RT_OFFSET 33734 -#define NIG_REG_OUTER_TAG_VALUE_LIST3_RT_OFFSET 33735 -#define NIG_REG_OUTER_TAG_VALUE_MASK_RT_OFFSET 33736 -#define NIG_REG_LLH_FUNC_TAGMAC_CLS_TYPE_RT_OFFSET 33737 -#define NIG_REG_LLH_FUNC_TAG_EN_RT_OFFSET 33738 -#define NIG_REG_LLH_FUNC_TAG_EN_RT_SIZE 4 -#define NIG_REG_LLH_FUNC_TAG_HDR_SEL_RT_OFFSET 33742 -#define NIG_REG_LLH_FUNC_TAG_HDR_SEL_RT_SIZE 4 -#define NIG_REG_LLH_FUNC_TAG_VALUE_RT_OFFSET 33746 -#define NIG_REG_LLH_FUNC_TAG_VALUE_RT_SIZE 4 -#define NIG_REG_LLH_FUNC_NO_TAG_RT_OFFSET 33750 -#define NIG_REG_LLH_FUNC_FILTER_VALUE_RT_OFFSET 33751 -#define NIG_REG_LLH_FUNC_FILTER_VALUE_RT_SIZE 32 -#define NIG_REG_LLH_FUNC_FILTER_EN_RT_OFFSET 33783 -#define NIG_REG_LLH_FUNC_FILTER_EN_RT_SIZE 16 -#define NIG_REG_LLH_FUNC_FILTER_MODE_RT_OFFSET 33799 -#define NIG_REG_LLH_FUNC_FILTER_MODE_RT_SIZE 16 -#define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_RT_OFFSET 33815 -#define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_RT_SIZE 16 -#define NIG_REG_LLH_FUNC_FILTER_HDR_SEL_RT_OFFSET 33831 -#define NIG_REG_LLH_FUNC_FILTER_HDR_SEL_RT_SIZE 16 -#define NIG_REG_TX_EDPM_CTRL_RT_OFFSET 33847 -#define NIG_REG_ROCE_DUPLICATE_TO_HOST_RT_OFFSET 33848 -#define CDU_REG_CID_ADDR_PARAMS_RT_OFFSET 33849 -#define CDU_REG_SEGMENT0_PARAMS_RT_OFFSET 33850 -#define CDU_REG_SEGMENT1_PARAMS_RT_OFFSET 33851 -#define CDU_REG_PF_SEG0_TYPE_OFFSET_RT_OFFSET 33852 -#define CDU_REG_PF_SEG1_TYPE_OFFSET_RT_OFFSET 33853 -#define CDU_REG_PF_SEG2_TYPE_OFFSET_RT_OFFSET 33854 -#define CDU_REG_PF_SEG3_TYPE_OFFSET_RT_OFFSET 33855 -#define CDU_REG_PF_FL_SEG0_TYPE_OFFSET_RT_OFFSET 33856 -#define CDU_REG_PF_FL_SEG1_TYPE_OFFSET_RT_OFFSET 33857 -#define CDU_REG_PF_FL_SEG2_TYPE_OFFSET_RT_OFFSET 33858 -#define CDU_REG_PF_FL_SEG3_TYPE_OFFSET_RT_OFFSET 33859 -#define CDU_REG_VF_SEG_TYPE_OFFSET_RT_OFFSET 33860 -#define CDU_REG_VF_FL_SEG_TYPE_OFFSET_RT_OFFSET 33861 -#define PBF_REG_TAG_ETHERTYPE_0_RT_OFFSET 33862 -#define PBF_REG_BTB_SHARED_AREA_SIZE_RT_OFFSET 33863 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ0_RT_OFFSET 33864 -#define PBF_REG_BTB_GUARANTEED_VOQ0_RT_OFFSET 33865 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ0_RT_OFFSET 33866 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ1_RT_OFFSET 33867 -#define PBF_REG_BTB_GUARANTEED_VOQ1_RT_OFFSET 33868 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ1_RT_OFFSET 33869 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ2_RT_OFFSET 33870 -#define PBF_REG_BTB_GUARANTEED_VOQ2_RT_OFFSET 33871 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ2_RT_OFFSET 33872 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ3_RT_OFFSET 33873 -#define PBF_REG_BTB_GUARANTEED_VOQ3_RT_OFFSET 33874 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ3_RT_OFFSET 33875 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ4_RT_OFFSET 33876 -#define PBF_REG_BTB_GUARANTEED_VOQ4_RT_OFFSET 33877 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ4_RT_OFFSET 33878 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ5_RT_OFFSET 33879 -#define PBF_REG_BTB_GUARANTEED_VOQ5_RT_OFFSET 33880 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ5_RT_OFFSET 33881 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ6_RT_OFFSET 33882 -#define PBF_REG_BTB_GUARANTEED_VOQ6_RT_OFFSET 33883 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ6_RT_OFFSET 33884 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ7_RT_OFFSET 33885 -#define PBF_REG_BTB_GUARANTEED_VOQ7_RT_OFFSET 33886 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ7_RT_OFFSET 33887 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ8_RT_OFFSET 33888 -#define PBF_REG_BTB_GUARANTEED_VOQ8_RT_OFFSET 33889 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ8_RT_OFFSET 33890 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ9_RT_OFFSET 33891 -#define PBF_REG_BTB_GUARANTEED_VOQ9_RT_OFFSET 33892 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ9_RT_OFFSET 33893 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ10_RT_OFFSET 33894 -#define PBF_REG_BTB_GUARANTEED_VOQ10_RT_OFFSET 33895 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ10_RT_OFFSET 33896 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ11_RT_OFFSET 33897 -#define PBF_REG_BTB_GUARANTEED_VOQ11_RT_OFFSET 33898 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ11_RT_OFFSET 33899 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ12_RT_OFFSET 33900 -#define PBF_REG_BTB_GUARANTEED_VOQ12_RT_OFFSET 33901 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ12_RT_OFFSET 33902 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ13_RT_OFFSET 33903 -#define PBF_REG_BTB_GUARANTEED_VOQ13_RT_OFFSET 33904 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ13_RT_OFFSET 33905 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ14_RT_OFFSET 33906 -#define PBF_REG_BTB_GUARANTEED_VOQ14_RT_OFFSET 33907 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ14_RT_OFFSET 33908 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ15_RT_OFFSET 33909 -#define PBF_REG_BTB_GUARANTEED_VOQ15_RT_OFFSET 33910 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ15_RT_OFFSET 33911 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ16_RT_OFFSET 33912 -#define PBF_REG_BTB_GUARANTEED_VOQ16_RT_OFFSET 33913 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ16_RT_OFFSET 33914 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ17_RT_OFFSET 33915 -#define PBF_REG_BTB_GUARANTEED_VOQ17_RT_OFFSET 33916 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ17_RT_OFFSET 33917 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ18_RT_OFFSET 33918 -#define PBF_REG_BTB_GUARANTEED_VOQ18_RT_OFFSET 33919 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ18_RT_OFFSET 33920 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ19_RT_OFFSET 33921 -#define PBF_REG_BTB_GUARANTEED_VOQ19_RT_OFFSET 33922 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ19_RT_OFFSET 33923 -#define XCM_REG_CON_PHY_Q3_RT_OFFSET 33924 +#define DORQ_REG_PF_MAX_ICID_0_RT_OFFSET 0 +#define DORQ_REG_PF_MAX_ICID_1_RT_OFFSET 1 +#define DORQ_REG_PF_MAX_ICID_2_RT_OFFSET 2 +#define DORQ_REG_PF_MAX_ICID_3_RT_OFFSET 3 +#define DORQ_REG_PF_MAX_ICID_4_RT_OFFSET 4 +#define DORQ_REG_PF_MAX_ICID_5_RT_OFFSET 5 +#define DORQ_REG_PF_MAX_ICID_6_RT_OFFSET 6 +#define DORQ_REG_PF_MAX_ICID_7_RT_OFFSET 7 +#define DORQ_REG_VF_MAX_ICID_0_RT_OFFSET 8 +#define DORQ_REG_VF_MAX_ICID_1_RT_OFFSET 9 +#define DORQ_REG_VF_MAX_ICID_2_RT_OFFSET 10 +#define DORQ_REG_VF_MAX_ICID_3_RT_OFFSET 11 +#define DORQ_REG_VF_MAX_ICID_4_RT_OFFSET 12 +#define DORQ_REG_VF_MAX_ICID_5_RT_OFFSET 13 +#define DORQ_REG_VF_MAX_ICID_6_RT_OFFSET 14 +#define DORQ_REG_VF_MAX_ICID_7_RT_OFFSET 15 +#define DORQ_REG_PF_WAKE_ALL_RT_OFFSET 16 +#define DORQ_REG_TAG1_ETHERTYPE_RT_OFFSET 17 +#define IGU_REG_PF_CONFIGURATION_RT_OFFSET 18 +#define IGU_REG_VF_CONFIGURATION_RT_OFFSET 19 +#define IGU_REG_ATTN_MSG_ADDR_L_RT_OFFSET 20 +#define IGU_REG_ATTN_MSG_ADDR_H_RT_OFFSET 21 +#define IGU_REG_LEADING_EDGE_LATCH_RT_OFFSET 22 +#define IGU_REG_TRAILING_EDGE_LATCH_RT_OFFSET 23 +#define CAU_REG_CQE_AGG_UNIT_SIZE_RT_OFFSET 24 +#define CAU_REG_SB_VAR_MEMORY_RT_OFFSET 761 +#define CAU_REG_SB_VAR_MEMORY_RT_SIZE 736 +#define CAU_REG_SB_VAR_MEMORY_RT_OFFSET 761 +#define CAU_REG_SB_VAR_MEMORY_RT_SIZE 736 +#define CAU_REG_SB_ADDR_MEMORY_RT_OFFSET 1497 +#define CAU_REG_SB_ADDR_MEMORY_RT_SIZE 736 +#define CAU_REG_PI_MEMORY_RT_OFFSET 2233 +#define CAU_REG_PI_MEMORY_RT_SIZE 4416 +#define PRS_REG_SEARCH_RESP_INITIATOR_TYPE_RT_OFFSET 6649 +#define PRS_REG_TASK_ID_MAX_INITIATOR_PF_RT_OFFSET 6650 +#define PRS_REG_TASK_ID_MAX_INITIATOR_VF_RT_OFFSET 6651 +#define PRS_REG_TASK_ID_MAX_TARGET_PF_RT_OFFSET 6652 +#define PRS_REG_TASK_ID_MAX_TARGET_VF_RT_OFFSET 6653 +#define PRS_REG_SEARCH_TCP_RT_OFFSET 6654 +#define PRS_REG_SEARCH_FCOE_RT_OFFSET 6655 +#define PRS_REG_SEARCH_ROCE_RT_OFFSET 6656 +#define PRS_REG_ROCE_DEST_QP_MAX_VF_RT_OFFSET 6657 +#define PRS_REG_ROCE_DEST_QP_MAX_PF_RT_OFFSET 6658 +#define PRS_REG_SEARCH_OPENFLOW_RT_OFFSET 6659 +#define PRS_REG_SEARCH_NON_IP_AS_OPENFLOW_RT_OFFSET 6660 +#define PRS_REG_OPENFLOW_SUPPORT_ONLY_KNOWN_OVER_IP_RT_OFFSET 6661 +#define PRS_REG_OPENFLOW_SEARCH_KEY_MASK_RT_OFFSET 6662 +#define PRS_REG_TAG_ETHERTYPE_0_RT_OFFSET 6663 +#define PRS_REG_LIGHT_L2_ETHERTYPE_EN_RT_OFFSET 6664 +#define SRC_REG_FIRSTFREE_RT_OFFSET 6665 +#define SRC_REG_FIRSTFREE_RT_SIZE 2 +#define SRC_REG_LASTFREE_RT_OFFSET 6667 +#define SRC_REG_LASTFREE_RT_SIZE 2 +#define SRC_REG_COUNTFREE_RT_OFFSET 6669 +#define SRC_REG_NUMBER_HASH_BITS_RT_OFFSET 6670 +#define PSWRQ2_REG_CDUT_P_SIZE_RT_OFFSET 6671 +#define PSWRQ2_REG_CDUC_P_SIZE_RT_OFFSET 6672 +#define PSWRQ2_REG_TM_P_SIZE_RT_OFFSET 6673 +#define PSWRQ2_REG_QM_P_SIZE_RT_OFFSET 6674 +#define PSWRQ2_REG_SRC_P_SIZE_RT_OFFSET 6675 +#define PSWRQ2_REG_TSDM_P_SIZE_RT_OFFSET 6676 +#define PSWRQ2_REG_TM_FIRST_ILT_RT_OFFSET 6677 +#define PSWRQ2_REG_TM_LAST_ILT_RT_OFFSET 6678 +#define PSWRQ2_REG_QM_FIRST_ILT_RT_OFFSET 6679 +#define PSWRQ2_REG_QM_LAST_ILT_RT_OFFSET 6680 +#define PSWRQ2_REG_SRC_FIRST_ILT_RT_OFFSET 6681 +#define PSWRQ2_REG_SRC_LAST_ILT_RT_OFFSET 6682 +#define PSWRQ2_REG_CDUC_FIRST_ILT_RT_OFFSET 6683 +#define PSWRQ2_REG_CDUC_LAST_ILT_RT_OFFSET 6684 +#define PSWRQ2_REG_CDUT_FIRST_ILT_RT_OFFSET 6685 +#define PSWRQ2_REG_CDUT_LAST_ILT_RT_OFFSET 6686 +#define PSWRQ2_REG_TSDM_FIRST_ILT_RT_OFFSET 6687 +#define PSWRQ2_REG_TSDM_LAST_ILT_RT_OFFSET 6688 +#define PSWRQ2_REG_TM_NUMBER_OF_PF_BLOCKS_RT_OFFSET 6689 +#define PSWRQ2_REG_CDUT_NUMBER_OF_PF_BLOCKS_RT_OFFSET 6690 +#define PSWRQ2_REG_CDUC_NUMBER_OF_PF_BLOCKS_RT_OFFSET 6691 +#define PSWRQ2_REG_TM_VF_BLOCKS_RT_OFFSET 6692 +#define PSWRQ2_REG_CDUT_VF_BLOCKS_RT_OFFSET 6693 +#define PSWRQ2_REG_CDUC_VF_BLOCKS_RT_OFFSET 6694 +#define PSWRQ2_REG_TM_BLOCKS_FACTOR_RT_OFFSET 6695 +#define PSWRQ2_REG_CDUT_BLOCKS_FACTOR_RT_OFFSET 6696 +#define PSWRQ2_REG_CDUC_BLOCKS_FACTOR_RT_OFFSET 6697 +#define PSWRQ2_REG_VF_BASE_RT_OFFSET 6698 +#define PSWRQ2_REG_VF_LAST_ILT_RT_OFFSET 6699 +#define PSWRQ2_REG_WR_MBS0_RT_OFFSET 6700 +#define PSWRQ2_REG_RD_MBS0_RT_OFFSET 6701 +#define PSWRQ2_REG_DRAM_ALIGN_WR_RT_OFFSET 6702 +#define PSWRQ2_REG_DRAM_ALIGN_RD_RT_OFFSET 6703 +#define PSWRQ2_REG_ILT_MEMORY_RT_OFFSET 6704 +#define PSWRQ2_REG_ILT_MEMORY_RT_SIZE 22000 +#define PGLUE_REG_B_VF_BASE_RT_OFFSET 28704 +#define PGLUE_REG_B_MSDM_OFFSET_MASK_B_RT_OFFSET 28705 +#define PGLUE_REG_B_MSDM_VF_SHIFT_B_RT_OFFSET 28706 +#define PGLUE_REG_B_CACHE_LINE_SIZE_RT_OFFSET 28707 +#define PGLUE_REG_B_PF_BAR0_SIZE_RT_OFFSET 28708 +#define PGLUE_REG_B_PF_BAR1_SIZE_RT_OFFSET 28709 +#define PGLUE_REG_B_VF_BAR1_SIZE_RT_OFFSET 28710 +#define TM_REG_VF_ENABLE_CONN_RT_OFFSET 28711 +#define TM_REG_PF_ENABLE_CONN_RT_OFFSET 28712 +#define TM_REG_PF_ENABLE_TASK_RT_OFFSET 28713 +#define TM_REG_GROUP_SIZE_RESOLUTION_CONN_RT_OFFSET 28714 +#define TM_REG_GROUP_SIZE_RESOLUTION_TASK_RT_OFFSET 28715 +#define TM_REG_CONFIG_CONN_MEM_RT_OFFSET 28716 +#define TM_REG_CONFIG_CONN_MEM_RT_SIZE 416 +#define TM_REG_CONFIG_TASK_MEM_RT_OFFSET 29132 +#define TM_REG_CONFIG_TASK_MEM_RT_SIZE 512 +#define QM_REG_MAXPQSIZE_0_RT_OFFSET 29644 +#define QM_REG_MAXPQSIZE_1_RT_OFFSET 29645 +#define QM_REG_MAXPQSIZE_2_RT_OFFSET 29646 +#define QM_REG_MAXPQSIZETXSEL_0_RT_OFFSET 29647 +#define QM_REG_MAXPQSIZETXSEL_1_RT_OFFSET 29648 +#define QM_REG_MAXPQSIZETXSEL_2_RT_OFFSET 29649 +#define QM_REG_MAXPQSIZETXSEL_3_RT_OFFSET 29650 +#define QM_REG_MAXPQSIZETXSEL_4_RT_OFFSET 29651 +#define QM_REG_MAXPQSIZETXSEL_5_RT_OFFSET 29652 +#define QM_REG_MAXPQSIZETXSEL_6_RT_OFFSET 29653 +#define QM_REG_MAXPQSIZETXSEL_7_RT_OFFSET 29654 +#define QM_REG_MAXPQSIZETXSEL_8_RT_OFFSET 29655 +#define QM_REG_MAXPQSIZETXSEL_9_RT_OFFSET 29656 +#define QM_REG_MAXPQSIZETXSEL_10_RT_OFFSET 29657 +#define QM_REG_MAXPQSIZETXSEL_11_RT_OFFSET 29658 +#define QM_REG_MAXPQSIZETXSEL_12_RT_OFFSET 29659 +#define QM_REG_MAXPQSIZETXSEL_13_RT_OFFSET 29660 +#define QM_REG_MAXPQSIZETXSEL_14_RT_OFFSET 29661 +#define QM_REG_MAXPQSIZETXSEL_15_RT_OFFSET 29662 +#define QM_REG_MAXPQSIZETXSEL_16_RT_OFFSET 29663 +#define QM_REG_MAXPQSIZETXSEL_17_RT_OFFSET 29664 +#define QM_REG_MAXPQSIZETXSEL_18_RT_OFFSET 29665 +#define QM_REG_MAXPQSIZETXSEL_19_RT_OFFSET 29666 +#define QM_REG_MAXPQSIZETXSEL_20_RT_OFFSET 29667 +#define QM_REG_MAXPQSIZETXSEL_21_RT_OFFSET 29668 +#define QM_REG_MAXPQSIZETXSEL_22_RT_OFFSET 29669 +#define QM_REG_MAXPQSIZETXSEL_23_RT_OFFSET 29670 +#define QM_REG_MAXPQSIZETXSEL_24_RT_OFFSET 29671 +#define QM_REG_MAXPQSIZETXSEL_25_RT_OFFSET 29672 +#define QM_REG_MAXPQSIZETXSEL_26_RT_OFFSET 29673 +#define QM_REG_MAXPQSIZETXSEL_27_RT_OFFSET 29674 +#define QM_REG_MAXPQSIZETXSEL_28_RT_OFFSET 29675 +#define QM_REG_MAXPQSIZETXSEL_29_RT_OFFSET 29676 +#define QM_REG_MAXPQSIZETXSEL_30_RT_OFFSET 29677 +#define QM_REG_MAXPQSIZETXSEL_31_RT_OFFSET 29678 +#define QM_REG_MAXPQSIZETXSEL_32_RT_OFFSET 29679 +#define QM_REG_MAXPQSIZETXSEL_33_RT_OFFSET 29680 +#define QM_REG_MAXPQSIZETXSEL_34_RT_OFFSET 29681 +#define QM_REG_MAXPQSIZETXSEL_35_RT_OFFSET 29682 +#define QM_REG_MAXPQSIZETXSEL_36_RT_OFFSET 29683 +#define QM_REG_MAXPQSIZETXSEL_37_RT_OFFSET 29684 +#define QM_REG_MAXPQSIZETXSEL_38_RT_OFFSET 29685 +#define QM_REG_MAXPQSIZETXSEL_39_RT_OFFSET 29686 +#define QM_REG_MAXPQSIZETXSEL_40_RT_OFFSET 29687 +#define QM_REG_MAXPQSIZETXSEL_41_RT_OFFSET 29688 +#define QM_REG_MAXPQSIZETXSEL_42_RT_OFFSET 29689 +#define QM_REG_MAXPQSIZETXSEL_43_RT_OFFSET 29690 +#define QM_REG_MAXPQSIZETXSEL_44_RT_OFFSET 29691 +#define QM_REG_MAXPQSIZETXSEL_45_RT_OFFSET 29692 +#define QM_REG_MAXPQSIZETXSEL_46_RT_OFFSET 29693 +#define QM_REG_MAXPQSIZETXSEL_47_RT_OFFSET 29694 +#define QM_REG_MAXPQSIZETXSEL_48_RT_OFFSET 29695 +#define QM_REG_MAXPQSIZETXSEL_49_RT_OFFSET 29696 +#define QM_REG_MAXPQSIZETXSEL_50_RT_OFFSET 29697 +#define QM_REG_MAXPQSIZETXSEL_51_RT_OFFSET 29698 +#define QM_REG_MAXPQSIZETXSEL_52_RT_OFFSET 29699 +#define QM_REG_MAXPQSIZETXSEL_53_RT_OFFSET 29700 +#define QM_REG_MAXPQSIZETXSEL_54_RT_OFFSET 29701 +#define QM_REG_MAXPQSIZETXSEL_55_RT_OFFSET 29702 +#define QM_REG_MAXPQSIZETXSEL_56_RT_OFFSET 29703 +#define QM_REG_MAXPQSIZETXSEL_57_RT_OFFSET 29704 +#define QM_REG_MAXPQSIZETXSEL_58_RT_OFFSET 29705 +#define QM_REG_MAXPQSIZETXSEL_59_RT_OFFSET 29706 +#define QM_REG_MAXPQSIZETXSEL_60_RT_OFFSET 29707 +#define QM_REG_MAXPQSIZETXSEL_61_RT_OFFSET 29708 +#define QM_REG_MAXPQSIZETXSEL_62_RT_OFFSET 29709 +#define QM_REG_MAXPQSIZETXSEL_63_RT_OFFSET 29710 +#define QM_REG_BASEADDROTHERPQ_RT_OFFSET 29711 +#define QM_REG_BASEADDROTHERPQ_RT_SIZE 128 +#define QM_REG_VOQCRDLINE_RT_OFFSET 29839 +#define QM_REG_VOQCRDLINE_RT_SIZE 20 +#define QM_REG_VOQINITCRDLINE_RT_OFFSET 29859 +#define QM_REG_VOQINITCRDLINE_RT_SIZE 20 +#define QM_REG_AFULLQMBYPTHRPFWFQ_RT_OFFSET 29879 +#define QM_REG_AFULLQMBYPTHRVPWFQ_RT_OFFSET 29880 +#define QM_REG_AFULLQMBYPTHRPFRL_RT_OFFSET 29881 +#define QM_REG_AFULLQMBYPTHRGLBLRL_RT_OFFSET 29882 +#define QM_REG_AFULLOPRTNSTCCRDMASK_RT_OFFSET 29883 +#define QM_REG_WRROTHERPQGRP_0_RT_OFFSET 29884 +#define QM_REG_WRROTHERPQGRP_1_RT_OFFSET 29885 +#define QM_REG_WRROTHERPQGRP_2_RT_OFFSET 29886 +#define QM_REG_WRROTHERPQGRP_3_RT_OFFSET 29887 +#define QM_REG_WRROTHERPQGRP_4_RT_OFFSET 29888 +#define QM_REG_WRROTHERPQGRP_5_RT_OFFSET 29889 +#define QM_REG_WRROTHERPQGRP_6_RT_OFFSET 29890 +#define QM_REG_WRROTHERPQGRP_7_RT_OFFSET 29891 +#define QM_REG_WRROTHERPQGRP_8_RT_OFFSET 29892 +#define QM_REG_WRROTHERPQGRP_9_RT_OFFSET 29893 +#define QM_REG_WRROTHERPQGRP_10_RT_OFFSET 29894 +#define QM_REG_WRROTHERPQGRP_11_RT_OFFSET 29895 +#define QM_REG_WRROTHERPQGRP_12_RT_OFFSET 29896 +#define QM_REG_WRROTHERPQGRP_13_RT_OFFSET 29897 +#define QM_REG_WRROTHERPQGRP_14_RT_OFFSET 29898 +#define QM_REG_WRROTHERPQGRP_15_RT_OFFSET 29899 +#define QM_REG_WRROTHERGRPWEIGHT_0_RT_OFFSET 29900 +#define QM_REG_WRROTHERGRPWEIGHT_1_RT_OFFSET 29901 +#define QM_REG_WRROTHERGRPWEIGHT_2_RT_OFFSET 29902 +#define QM_REG_WRROTHERGRPWEIGHT_3_RT_OFFSET 29903 +#define QM_REG_WRRTXGRPWEIGHT_0_RT_OFFSET 29904 +#define QM_REG_WRRTXGRPWEIGHT_1_RT_OFFSET 29905 +#define QM_REG_PQTX2PF_0_RT_OFFSET 29906 +#define QM_REG_PQTX2PF_1_RT_OFFSET 29907 +#define QM_REG_PQTX2PF_2_RT_OFFSET 29908 +#define QM_REG_PQTX2PF_3_RT_OFFSET 29909 +#define QM_REG_PQTX2PF_4_RT_OFFSET 29910 +#define QM_REG_PQTX2PF_5_RT_OFFSET 29911 +#define QM_REG_PQTX2PF_6_RT_OFFSET 29912 +#define QM_REG_PQTX2PF_7_RT_OFFSET 29913 +#define QM_REG_PQTX2PF_8_RT_OFFSET 29914 +#define QM_REG_PQTX2PF_9_RT_OFFSET 29915 +#define QM_REG_PQTX2PF_10_RT_OFFSET 29916 +#define QM_REG_PQTX2PF_11_RT_OFFSET 29917 +#define QM_REG_PQTX2PF_12_RT_OFFSET 29918 +#define QM_REG_PQTX2PF_13_RT_OFFSET 29919 +#define QM_REG_PQTX2PF_14_RT_OFFSET 29920 +#define QM_REG_PQTX2PF_15_RT_OFFSET 29921 +#define QM_REG_PQTX2PF_16_RT_OFFSET 29922 +#define QM_REG_PQTX2PF_17_RT_OFFSET 29923 +#define QM_REG_PQTX2PF_18_RT_OFFSET 29924 +#define QM_REG_PQTX2PF_19_RT_OFFSET 29925 +#define QM_REG_PQTX2PF_20_RT_OFFSET 29926 +#define QM_REG_PQTX2PF_21_RT_OFFSET 29927 +#define QM_REG_PQTX2PF_22_RT_OFFSET 29928 +#define QM_REG_PQTX2PF_23_RT_OFFSET 29929 +#define QM_REG_PQTX2PF_24_RT_OFFSET 29930 +#define QM_REG_PQTX2PF_25_RT_OFFSET 29931 +#define QM_REG_PQTX2PF_26_RT_OFFSET 29932 +#define QM_REG_PQTX2PF_27_RT_OFFSET 29933 +#define QM_REG_PQTX2PF_28_RT_OFFSET 29934 +#define QM_REG_PQTX2PF_29_RT_OFFSET 29935 +#define QM_REG_PQTX2PF_30_RT_OFFSET 29936 +#define QM_REG_PQTX2PF_31_RT_OFFSET 29937 +#define QM_REG_PQTX2PF_32_RT_OFFSET 29938 +#define QM_REG_PQTX2PF_33_RT_OFFSET 29939 +#define QM_REG_PQTX2PF_34_RT_OFFSET 29940 +#define QM_REG_PQTX2PF_35_RT_OFFSET 29941 +#define QM_REG_PQTX2PF_36_RT_OFFSET 29942 +#define QM_REG_PQTX2PF_37_RT_OFFSET 29943 +#define QM_REG_PQTX2PF_38_RT_OFFSET 29944 +#define QM_REG_PQTX2PF_39_RT_OFFSET 29945 +#define QM_REG_PQTX2PF_40_RT_OFFSET 29946 +#define QM_REG_PQTX2PF_41_RT_OFFSET 29947 +#define QM_REG_PQTX2PF_42_RT_OFFSET 29948 +#define QM_REG_PQTX2PF_43_RT_OFFSET 29949 +#define QM_REG_PQTX2PF_44_RT_OFFSET 29950 +#define QM_REG_PQTX2PF_45_RT_OFFSET 29951 +#define QM_REG_PQTX2PF_46_RT_OFFSET 29952 +#define QM_REG_PQTX2PF_47_RT_OFFSET 29953 +#define QM_REG_PQTX2PF_48_RT_OFFSET 29954 +#define QM_REG_PQTX2PF_49_RT_OFFSET 29955 +#define QM_REG_PQTX2PF_50_RT_OFFSET 29956 +#define QM_REG_PQTX2PF_51_RT_OFFSET 29957 +#define QM_REG_PQTX2PF_52_RT_OFFSET 29958 +#define QM_REG_PQTX2PF_53_RT_OFFSET 29959 +#define QM_REG_PQTX2PF_54_RT_OFFSET 29960 +#define QM_REG_PQTX2PF_55_RT_OFFSET 29961 +#define QM_REG_PQTX2PF_56_RT_OFFSET 29962 +#define QM_REG_PQTX2PF_57_RT_OFFSET 29963 +#define QM_REG_PQTX2PF_58_RT_OFFSET 29964 +#define QM_REG_PQTX2PF_59_RT_OFFSET 29965 +#define QM_REG_PQTX2PF_60_RT_OFFSET 29966 +#define QM_REG_PQTX2PF_61_RT_OFFSET 29967 +#define QM_REG_PQTX2PF_62_RT_OFFSET 29968 +#define QM_REG_PQTX2PF_63_RT_OFFSET 29969 +#define QM_REG_PQOTHER2PF_0_RT_OFFSET 29970 +#define QM_REG_PQOTHER2PF_1_RT_OFFSET 29971 +#define QM_REG_PQOTHER2PF_2_RT_OFFSET 29972 +#define QM_REG_PQOTHER2PF_3_RT_OFFSET 29973 +#define QM_REG_PQOTHER2PF_4_RT_OFFSET 29974 +#define QM_REG_PQOTHER2PF_5_RT_OFFSET 29975 +#define QM_REG_PQOTHER2PF_6_RT_OFFSET 29976 +#define QM_REG_PQOTHER2PF_7_RT_OFFSET 29977 +#define QM_REG_PQOTHER2PF_8_RT_OFFSET 29978 +#define QM_REG_PQOTHER2PF_9_RT_OFFSET 29979 +#define QM_REG_PQOTHER2PF_10_RT_OFFSET 29980 +#define QM_REG_PQOTHER2PF_11_RT_OFFSET 29981 +#define QM_REG_PQOTHER2PF_12_RT_OFFSET 29982 +#define QM_REG_PQOTHER2PF_13_RT_OFFSET 29983 +#define QM_REG_PQOTHER2PF_14_RT_OFFSET 29984 +#define QM_REG_PQOTHER2PF_15_RT_OFFSET 29985 +#define QM_REG_RLGLBLPERIOD_0_RT_OFFSET 29986 +#define QM_REG_RLGLBLPERIOD_1_RT_OFFSET 29987 +#define QM_REG_RLGLBLPERIODTIMER_0_RT_OFFSET 29988 +#define QM_REG_RLGLBLPERIODTIMER_1_RT_OFFSET 29989 +#define QM_REG_RLGLBLPERIODSEL_0_RT_OFFSET 29990 +#define QM_REG_RLGLBLPERIODSEL_1_RT_OFFSET 29991 +#define QM_REG_RLGLBLPERIODSEL_2_RT_OFFSET 29992 +#define QM_REG_RLGLBLPERIODSEL_3_RT_OFFSET 29993 +#define QM_REG_RLGLBLPERIODSEL_4_RT_OFFSET 29994 +#define QM_REG_RLGLBLPERIODSEL_5_RT_OFFSET 29995 +#define QM_REG_RLGLBLPERIODSEL_6_RT_OFFSET 29996 +#define QM_REG_RLGLBLPERIODSEL_7_RT_OFFSET 29997 +#define QM_REG_RLGLBLINCVAL_RT_OFFSET 29998 +#define QM_REG_RLGLBLINCVAL_RT_SIZE 256 +#define QM_REG_RLGLBLUPPERBOUND_RT_OFFSET 30254 +#define QM_REG_RLGLBLUPPERBOUND_RT_SIZE 256 +#define QM_REG_RLGLBLCRD_RT_OFFSET 30510 +#define QM_REG_RLGLBLCRD_RT_SIZE 256 +#define QM_REG_RLGLBLENABLE_RT_OFFSET 30766 +#define QM_REG_RLPFPERIOD_RT_OFFSET 30767 +#define QM_REG_RLPFPERIODTIMER_RT_OFFSET 30768 +#define QM_REG_RLPFINCVAL_RT_OFFSET 30769 +#define QM_REG_RLPFINCVAL_RT_SIZE 16 +#define QM_REG_RLPFUPPERBOUND_RT_OFFSET 30785 +#define QM_REG_RLPFUPPERBOUND_RT_SIZE 16 +#define QM_REG_RLPFCRD_RT_OFFSET 30801 +#define QM_REG_RLPFCRD_RT_SIZE 16 +#define QM_REG_RLPFENABLE_RT_OFFSET 30817 +#define QM_REG_RLPFVOQENABLE_RT_OFFSET 30818 +#define QM_REG_WFQPFWEIGHT_RT_OFFSET 30819 +#define QM_REG_WFQPFWEIGHT_RT_SIZE 16 +#define QM_REG_WFQPFUPPERBOUND_RT_OFFSET 30835 +#define QM_REG_WFQPFUPPERBOUND_RT_SIZE 16 +#define QM_REG_WFQPFCRD_RT_OFFSET 30851 +#define QM_REG_WFQPFCRD_RT_SIZE 160 +#define QM_REG_WFQPFENABLE_RT_OFFSET 31011 +#define QM_REG_WFQVPENABLE_RT_OFFSET 31012 +#define QM_REG_BASEADDRTXPQ_RT_OFFSET 31013 +#define QM_REG_BASEADDRTXPQ_RT_SIZE 512 +#define QM_REG_TXPQMAP_RT_OFFSET 31525 +#define QM_REG_TXPQMAP_RT_SIZE 512 +#define QM_REG_WFQVPWEIGHT_RT_OFFSET 32037 +#define QM_REG_WFQVPWEIGHT_RT_SIZE 512 +#define QM_REG_WFQVPCRD_RT_OFFSET 32549 +#define QM_REG_WFQVPCRD_RT_SIZE 512 +#define QM_REG_WFQVPMAP_RT_OFFSET 33061 +#define QM_REG_WFQVPMAP_RT_SIZE 512 +#define QM_REG_WFQPFCRD_MSB_RT_OFFSET 33573 +#define QM_REG_WFQPFCRD_MSB_RT_SIZE 160 +#define NIG_REG_TAG_ETHERTYPE_0_RT_OFFSET 33733 +#define NIG_REG_OUTER_TAG_VALUE_LIST0_RT_OFFSET 33734 +#define NIG_REG_OUTER_TAG_VALUE_LIST1_RT_OFFSET 33735 +#define NIG_REG_OUTER_TAG_VALUE_LIST2_RT_OFFSET 33736 +#define NIG_REG_OUTER_TAG_VALUE_LIST3_RT_OFFSET 33737 +#define NIG_REG_OUTER_TAG_VALUE_MASK_RT_OFFSET 33738 +#define NIG_REG_LLH_FUNC_TAGMAC_CLS_TYPE_RT_OFFSET 33739 +#define NIG_REG_LLH_FUNC_TAG_EN_RT_OFFSET 33740 +#define NIG_REG_LLH_FUNC_TAG_EN_RT_SIZE 4 +#define NIG_REG_LLH_FUNC_TAG_HDR_SEL_RT_OFFSET 33744 +#define NIG_REG_LLH_FUNC_TAG_HDR_SEL_RT_SIZE 4 +#define NIG_REG_LLH_FUNC_TAG_VALUE_RT_OFFSET 33748 +#define NIG_REG_LLH_FUNC_TAG_VALUE_RT_SIZE 4 +#define NIG_REG_LLH_FUNC_NO_TAG_RT_OFFSET 33752 +#define NIG_REG_LLH_FUNC_FILTER_VALUE_RT_OFFSET 33753 +#define NIG_REG_LLH_FUNC_FILTER_VALUE_RT_SIZE 32 +#define NIG_REG_LLH_FUNC_FILTER_EN_RT_OFFSET 33785 +#define NIG_REG_LLH_FUNC_FILTER_EN_RT_SIZE 16 +#define NIG_REG_LLH_FUNC_FILTER_MODE_RT_OFFSET 33801 +#define NIG_REG_LLH_FUNC_FILTER_MODE_RT_SIZE 16 +#define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_RT_OFFSET 33817 +#define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_RT_SIZE 16 +#define NIG_REG_LLH_FUNC_FILTER_HDR_SEL_RT_OFFSET 33833 +#define NIG_REG_LLH_FUNC_FILTER_HDR_SEL_RT_SIZE 16 +#define NIG_REG_TX_EDPM_CTRL_RT_OFFSET 33849 +#define NIG_REG_ROCE_DUPLICATE_TO_HOST_RT_OFFSET 33850 +#define CDU_REG_CID_ADDR_PARAMS_RT_OFFSET 33851 +#define CDU_REG_SEGMENT0_PARAMS_RT_OFFSET 33852 +#define CDU_REG_SEGMENT1_PARAMS_RT_OFFSET 33853 +#define CDU_REG_PF_SEG0_TYPE_OFFSET_RT_OFFSET 33854 +#define CDU_REG_PF_SEG1_TYPE_OFFSET_RT_OFFSET 33855 +#define CDU_REG_PF_SEG2_TYPE_OFFSET_RT_OFFSET 33856 +#define CDU_REG_PF_SEG3_TYPE_OFFSET_RT_OFFSET 33857 +#define CDU_REG_PF_FL_SEG0_TYPE_OFFSET_RT_OFFSET 33858 +#define CDU_REG_PF_FL_SEG1_TYPE_OFFSET_RT_OFFSET 33859 +#define CDU_REG_PF_FL_SEG2_TYPE_OFFSET_RT_OFFSET 33860 +#define CDU_REG_PF_FL_SEG3_TYPE_OFFSET_RT_OFFSET 33861 +#define CDU_REG_VF_SEG_TYPE_OFFSET_RT_OFFSET 33862 +#define CDU_REG_VF_FL_SEG_TYPE_OFFSET_RT_OFFSET 33863 +#define PBF_REG_TAG_ETHERTYPE_0_RT_OFFSET 33864 +#define PBF_REG_BTB_SHARED_AREA_SIZE_RT_OFFSET 33865 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ0_RT_OFFSET 33866 +#define PBF_REG_BTB_GUARANTEED_VOQ0_RT_OFFSET 33867 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ0_RT_OFFSET 33868 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ1_RT_OFFSET 33869 +#define PBF_REG_BTB_GUARANTEED_VOQ1_RT_OFFSET 33870 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ1_RT_OFFSET 33871 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ2_RT_OFFSET 33872 +#define PBF_REG_BTB_GUARANTEED_VOQ2_RT_OFFSET 33873 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ2_RT_OFFSET 33874 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ3_RT_OFFSET 33875 +#define PBF_REG_BTB_GUARANTEED_VOQ3_RT_OFFSET 33876 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ3_RT_OFFSET 33877 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ4_RT_OFFSET 33878 +#define PBF_REG_BTB_GUARANTEED_VOQ4_RT_OFFSET 33879 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ4_RT_OFFSET 33880 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ5_RT_OFFSET 33881 +#define PBF_REG_BTB_GUARANTEED_VOQ5_RT_OFFSET 33882 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ5_RT_OFFSET 33883 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ6_RT_OFFSET 33884 +#define PBF_REG_BTB_GUARANTEED_VOQ6_RT_OFFSET 33885 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ6_RT_OFFSET 33886 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ7_RT_OFFSET 33887 +#define PBF_REG_BTB_GUARANTEED_VOQ7_RT_OFFSET 33888 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ7_RT_OFFSET 33889 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ8_RT_OFFSET 33890 +#define PBF_REG_BTB_GUARANTEED_VOQ8_RT_OFFSET 33891 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ8_RT_OFFSET 33892 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ9_RT_OFFSET 33893 +#define PBF_REG_BTB_GUARANTEED_VOQ9_RT_OFFSET 33894 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ9_RT_OFFSET 33895 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ10_RT_OFFSET 33896 +#define PBF_REG_BTB_GUARANTEED_VOQ10_RT_OFFSET 33897 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ10_RT_OFFSET 33898 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ11_RT_OFFSET 33899 +#define PBF_REG_BTB_GUARANTEED_VOQ11_RT_OFFSET 33900 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ11_RT_OFFSET 33901 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ12_RT_OFFSET 33902 +#define PBF_REG_BTB_GUARANTEED_VOQ12_RT_OFFSET 33903 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ12_RT_OFFSET 33904 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ13_RT_OFFSET 33905 +#define PBF_REG_BTB_GUARANTEED_VOQ13_RT_OFFSET 33906 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ13_RT_OFFSET 33907 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ14_RT_OFFSET 33908 +#define PBF_REG_BTB_GUARANTEED_VOQ14_RT_OFFSET 33909 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ14_RT_OFFSET 33910 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ15_RT_OFFSET 33911 +#define PBF_REG_BTB_GUARANTEED_VOQ15_RT_OFFSET 33912 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ15_RT_OFFSET 33913 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ16_RT_OFFSET 33914 +#define PBF_REG_BTB_GUARANTEED_VOQ16_RT_OFFSET 33915 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ16_RT_OFFSET 33916 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ17_RT_OFFSET 33917 +#define PBF_REG_BTB_GUARANTEED_VOQ17_RT_OFFSET 33918 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ17_RT_OFFSET 33919 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ18_RT_OFFSET 33920 +#define PBF_REG_BTB_GUARANTEED_VOQ18_RT_OFFSET 33921 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ18_RT_OFFSET 33922 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ19_RT_OFFSET 33923 +#define PBF_REG_BTB_GUARANTEED_VOQ19_RT_OFFSET 33924 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ19_RT_OFFSET 33925 +#define XCM_REG_CON_PHY_Q3_RT_OFFSET 33926 -#define RUNTIME_ARRAY_SIZE 33925 +#define RUNTIME_ARRAY_SIZE 33927 /* The eth storm context for the Tstorm */ struct tstorm_eth_conn_st_ctx { @@ -3201,7 +3566,31 @@ struct eth_conn_context { struct mstorm_eth_conn_st_ctx mstorm_st_context; }; -/* opcodes for the event ring */ +enum eth_error_code { + ETH_OK = 0x00, + ETH_FILTERS_MAC_ADD_FAIL_FULL, + ETH_FILTERS_MAC_ADD_FAIL_FULL_MTT2, + ETH_FILTERS_MAC_ADD_FAIL_DUP_MTT2, + ETH_FILTERS_MAC_ADD_FAIL_DUP_STT2, + ETH_FILTERS_MAC_DEL_FAIL_NOF, + ETH_FILTERS_MAC_DEL_FAIL_NOF_MTT2, + ETH_FILTERS_MAC_DEL_FAIL_NOF_STT2, + ETH_FILTERS_MAC_ADD_FAIL_ZERO_MAC, + ETH_FILTERS_VLAN_ADD_FAIL_FULL, + ETH_FILTERS_VLAN_ADD_FAIL_DUP, + ETH_FILTERS_VLAN_DEL_FAIL_NOF, + ETH_FILTERS_VLAN_DEL_FAIL_NOF_TT1, + ETH_FILTERS_PAIR_ADD_FAIL_DUP, + ETH_FILTERS_PAIR_ADD_FAIL_FULL, + ETH_FILTERS_PAIR_ADD_FAIL_FULL_MAC, + ETH_FILTERS_PAIR_DEL_FAIL_NOF, + ETH_FILTERS_PAIR_DEL_FAIL_NOF_TT1, + ETH_FILTERS_PAIR_ADD_FAIL_ZERO_MAC, + ETH_FILTERS_VNI_ADD_FAIL_FULL, + ETH_FILTERS_VNI_ADD_FAIL_DUP, + MAX_ETH_ERROR_CODE +}; + enum eth_event_opcode { ETH_EVENT_UNUSED, ETH_EVENT_VPORT_START, @@ -3269,7 +3658,13 @@ enum eth_filter_type { MAX_ETH_FILTER_TYPE }; -/* Ethernet Ramrod Command IDs */ +enum eth_ipv4_frag_type { + ETH_IPV4_NOT_FRAG, + ETH_IPV4_FIRST_FRAG, + ETH_IPV4_NON_FIRST_FRAG, + MAX_ETH_IPV4_FRAG_TYPE +}; + enum eth_ramrod_cmd_id { ETH_RAMROD_UNUSED, ETH_RAMROD_VPORT_START, @@ -3451,8 +3846,8 @@ struct rx_queue_start_ramrod_data { u8 toggle_val; u8 vf_rx_prod_index; - - u8 reserved[6]; + u8 vf_rx_prod_use_zone_a; + u8 reserved[5]; __le16 reserved1; struct regpair cqe_pbl_addr; struct regpair bd_base; @@ -3526,10 +3921,11 @@ struct tx_queue_start_ramrod_data { __le16 pxp_st_index; __le16 comp_agg_size; __le16 queue_zone_id; - __le16 test_dup_count; + __le16 reserved2; __le16 pbl_size; __le16 tx_queue_id; - + __le16 same_as_last_id; + __le16 reserved[3]; struct regpair pbl_base_addr; struct regpair bd_cons_address; }; @@ -4926,8 +5322,8 @@ struct roce_create_qp_resp_ramrod_data { #define ROCE_CREATE_QP_RESP_RAMROD_DATA_SRQ_FLG_SHIFT 5 #define ROCE_CREATE_QP_RESP_RAMROD_DATA_E2E_FLOW_CONTROL_EN_MASK 0x1 #define ROCE_CREATE_QP_RESP_RAMROD_DATA_E2E_FLOW_CONTROL_EN_SHIFT 6 -#define ROCE_CREATE_QP_RESP_RAMROD_DATA_RESERVED0_MASK 0x1 -#define ROCE_CREATE_QP_RESP_RAMROD_DATA_RESERVED0_SHIFT 7 +#define ROCE_CREATE_QP_RESP_RAMROD_DATA_RESERVED_KEY_EN_MASK 0x1 +#define ROCE_CREATE_QP_RESP_RAMROD_DATA_RESERVED_KEY_EN_SHIFT 7 #define ROCE_CREATE_QP_RESP_RAMROD_DATA_PRI_MASK 0x7 #define ROCE_CREATE_QP_RESP_RAMROD_DATA_PRI_SHIFT 8 #define ROCE_CREATE_QP_RESP_RAMROD_DATA_MIN_RNR_NAK_TIMER_MASK 0x1F @@ -4988,6 +5384,10 @@ enum roce_event_opcode { MAX_ROCE_EVENT_OPCODE }; +struct roce_init_func_ramrod_data { + struct rdma_init_func_ramrod_data rdma; +}; + struct roce_modify_qp_req_ramrod_data { __le16 flags; #define ROCE_MODIFY_QP_REQ_RAMROD_DATA_MOVE_TO_ERR_FLG_MASK 0x1 diff --git a/drivers/net/ethernet/qlogic/qed/qed_init_ops.c b/drivers/net/ethernet/qlogic/qed/qed_init_ops.c index b7a4b27ebdbd..8ce8564061d5 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_init_ops.c +++ b/drivers/net/ethernet/qlogic/qed/qed_init_ops.c @@ -534,7 +534,7 @@ int qed_init_fw_data(struct qed_dev *cdev, const u8 *data) /* First Dword contains metadata and should be skipped */ buf_hdr = (struct bin_buffer_hdr *)(data + sizeof(u32)); - offset = buf_hdr[BIN_BUF_FW_VER_INFO].offset; + offset = buf_hdr[BIN_BUF_INIT_FW_VER_INFO].offset; fw->fw_ver_info = (struct fw_ver_info *)(data + offset); offset = buf_hdr[BIN_BUF_INIT_CMD].offset; diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index b07bc810af92..4c212667b482 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -802,34 +802,6 @@ static u32 qed_mcp_get_shmem_func(struct qed_hwfn *p_hwfn, return size; } -int qed_hw_init_first_eth(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, u8 *p_pf) -{ - struct public_func shmem_info; - int i; - - /* Find first Ethernet interface in port */ - for (i = 0; i < NUM_OF_ENG_PFS(p_hwfn->cdev); - i += p_hwfn->cdev->num_ports_in_engines) { - qed_mcp_get_shmem_func(p_hwfn, p_ptt, &shmem_info, - MCP_PF_ID_BY_REL(p_hwfn, i)); - - if (shmem_info.config & FUNC_MF_CFG_FUNC_HIDE) - continue; - - if ((shmem_info.config & FUNC_MF_CFG_PROTOCOL_MASK) == - FUNC_MF_CFG_PROTOCOL_ETHERNET) { - *p_pf = (u8)i; - return 0; - } - } - - DP_NOTICE(p_hwfn, - "Failed to find on port an ethernet interface in MF_SI mode\n"); - - return -EINVAL; -} - static void qed_mcp_update_bw(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { struct qed_mcp_function_info *p_info; diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h index 3324ef06f358..c6372fa574b7 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h @@ -500,6 +500,4 @@ int __qed_configure_pf_min_bandwidth(struct qed_hwfn *p_hwfn, struct qed_mcp_link_state *p_link, u8 min_bw); -int qed_hw_init_first_eth(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, u8 *p_pf); #endif diff --git a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h index f6b86ca1ff79..b49d47f3de71 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h +++ b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h @@ -116,8 +116,14 @@ 0x1009c4UL #define QM_REG_PF_EN \ 0x2f2ea4UL +#define TCFC_REG_WEAK_ENABLE_VF \ + 0x2d0704UL #define TCFC_REG_STRONG_ENABLE_PF \ 0x2d0708UL +#define TCFC_REG_STRONG_ENABLE_VF \ + 0x2d070cUL +#define CCFC_REG_WEAK_ENABLE_VF \ + 0x2e0704UL #define CCFC_REG_STRONG_ENABLE_PF \ 0x2e0708UL #define PGLUE_B_REG_PGL_ADDR_88_F0 \ diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c index 51e4c906833f..1579f339a5ef 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c @@ -1280,6 +1280,13 @@ static void qed_iov_vf_mbx_acquire(struct qed_hwfn *p_hwfn, memset(resp, 0, sizeof(*resp)); + /* Write the PF version so that VF would know which version + * is supported - might be later overriden. This guarantees that + * VF could recognize legacy PF based on lack of versions in reply. + */ + pfdev_info->major_fp_hsi = ETH_HSI_VER_MAJOR; + pfdev_info->minor_fp_hsi = ETH_HSI_VER_MINOR; + /* Validate FW compatibility */ if (req->vfdev_info.eth_fp_hsi_major != ETH_HSI_VER_MAJOR) { DP_INFO(p_hwfn, @@ -1289,12 +1296,6 @@ static void qed_iov_vf_mbx_acquire(struct qed_hwfn *p_hwfn, req->vfdev_info.eth_fp_hsi_minor, ETH_HSI_VER_MAJOR, ETH_HSI_VER_MINOR); - /* Write the PF version so that VF would know which version - * is supported. - */ - pfdev_info->major_fp_hsi = ETH_HSI_VER_MAJOR; - pfdev_info->minor_fp_hsi = ETH_HSI_VER_MINOR; - goto out; } diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index 81a63cf0a998..32325ca5951f 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -25,7 +25,7 @@ #define QEDE_MAJOR_VERSION 8 #define QEDE_MINOR_VERSION 10 -#define QEDE_REVISION_VERSION 1 +#define QEDE_REVISION_VERSION 9 #define QEDE_ENGINEERING_VERSION 20 #define DRV_MODULE_VERSION __stringify(QEDE_MAJOR_VERSION) "." \ __stringify(QEDE_MINOR_VERSION) "." \ diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h index 40c0ada01806..d306e0b55581 100644 --- a/include/linux/qed/common_hsi.h +++ b/include/linux/qed/common_hsi.h @@ -5,28 +5,83 @@ * (GPL) Version 2, available from the file COPYING in the main directory of * this source tree. */ +#ifndef _COMMON_HSI_H +#define _COMMON_HSI_H +#include +#include +#include +#include + +/* dma_addr_t manip */ +#define DMA_LO(x) ((u32)(((dma_addr_t)(x)) & 0xffffffff)) +#define DMA_HI(x) ((u32)(((dma_addr_t)(x)) >> 32)) + +#define DMA_LO_LE(x) cpu_to_le32(DMA_LO(x)) +#define DMA_HI_LE(x) cpu_to_le32(DMA_HI(x)) + +/* It's assumed that whoever includes this has previously included an hsi + * file defining the regpair. + */ +#define DMA_REGPAIR_LE(x, val) (x).hi = DMA_HI_LE((val)); \ + (x).lo = DMA_LO_LE((val)) + +#define HILO_GEN(hi, lo, type) ((((type)(hi)) << 32) + (lo)) +#define HILO_DMA(hi, lo) HILO_GEN(hi, lo, dma_addr_t) +#define HILO_64(hi, lo) HILO_GEN(hi, lo, u64) +#define HILO_DMA_REGPAIR(regpair) (HILO_DMA(regpair.hi, regpair.lo)) +#define HILO_64_REGPAIR(regpair) (HILO_64(regpair.hi, regpair.lo)) #ifndef __COMMON_HSI__ #define __COMMON_HSI__ -#define CORE_SPQE_PAGE_SIZE_BYTES 4096 #define X_FINAL_CLEANUP_AGG_INT 1 + +#define EVENT_RING_PAGE_SIZE_BYTES 4096 + #define NUM_OF_GLOBAL_QUEUES 128 +#define COMMON_QUEUE_ENTRY_MAX_BYTE_SIZE 64 + +#define ISCSI_CDU_TASK_SEG_TYPE 0 +#define RDMA_CDU_TASK_SEG_TYPE 1 + +#define FW_ASSERT_GENERAL_ATTN_IDX 32 + +#define MAX_PINNED_CCFC 32 /* Queue Zone sizes in bytes */ #define TSTORM_QZONE_SIZE 8 -#define MSTORM_QZONE_SIZE 0 +#define MSTORM_QZONE_SIZE 16 #define USTORM_QZONE_SIZE 8 #define XSTORM_QZONE_SIZE 8 #define YSTORM_QZONE_SIZE 0 #define PSTORM_QZONE_SIZE 0 -#define ETH_MAX_NUM_RX_QUEUES_PER_VF 16 +#define MSTORM_VF_ZONE_DEFAULT_SIZE_LOG 7 +#define ETH_MAX_NUM_RX_QUEUES_PER_VF_DEFAULT 16 +#define ETH_MAX_NUM_RX_QUEUES_PER_VF_DOUBLE 48 +#define ETH_MAX_NUM_RX_QUEUES_PER_VF_QUAD 112 + +/********************************/ +/* CORE (LIGHT L2) FW CONSTANTS */ +/********************************/ + +#define CORE_LL2_MAX_RAMROD_PER_CON 8 +#define CORE_LL2_TX_BD_PAGE_SIZE_BYTES 4096 +#define CORE_LL2_RX_BD_PAGE_SIZE_BYTES 4096 +#define CORE_LL2_RX_CQE_PAGE_SIZE_BYTES 4096 +#define CORE_LL2_RX_NUM_NEXT_PAGE_BDS 1 + +#define CORE_LL2_TX_MAX_BDS_PER_PACKET 12 + +#define CORE_SPQE_PAGE_SIZE_BYTES 4096 + +#define MAX_NUM_LL2_RX_QUEUES 32 +#define MAX_NUM_LL2_TX_STATS_COUNTERS 32 #define FW_MAJOR_VERSION 8 #define FW_MINOR_VERSION 10 -#define FW_REVISION_VERSION 5 +#define FW_REVISION_VERSION 10 #define FW_ENGINEERING_VERSION 0 /***********************/ @@ -83,6 +138,17 @@ #define NUM_OF_LCIDS (320) #define NUM_OF_LTIDS (320) +/* Clock values */ +#define MASTER_CLK_FREQ_E4 (375e6) +#define STORM_CLK_FREQ_E4 (1000e6) +#define CLK25M_CLK_FREQ_E4 (25e6) + +/* Global PXP windows (GTT) */ +#define NUM_OF_GTT 19 +#define GTT_DWORD_SIZE_BITS 10 +#define GTT_BYTE_SIZE_BITS (GTT_DWORD_SIZE_BITS + 2) +#define GTT_DWORD_SIZE BIT(GTT_DWORD_SIZE_BITS) + /*****************/ /* CDU CONSTANTS */ /*****************/ @@ -90,6 +156,8 @@ #define CDU_SEG_TYPE_OFFSET_REG_TYPE_SHIFT (17) #define CDU_SEG_TYPE_OFFSET_REG_OFFSET_MASK (0x1ffff) +#define CDU_VF_FL_SEG_TYPE_OFFSET_REG_TYPE_SHIFT (12) +#define CDU_VF_FL_SEG_TYPE_OFFSET_REG_OFFSET_MASK (0xfff) /*****************/ /* DQ CONSTANTS */ /*****************/ @@ -115,6 +183,11 @@ #define DQ_XCM_ETH_TX_BD_CONS_CMD DQ_XCM_AGG_VAL_SEL_WORD3 #define DQ_XCM_ETH_TX_BD_PROD_CMD DQ_XCM_AGG_VAL_SEL_WORD4 #define DQ_XCM_ETH_GO_TO_BD_CONS_CMD DQ_XCM_AGG_VAL_SEL_WORD5 +#define DQ_XCM_ISCSI_SQ_CONS_CMD DQ_XCM_AGG_VAL_SEL_WORD3 +#define DQ_XCM_ISCSI_SQ_PROD_CMD DQ_XCM_AGG_VAL_SEL_WORD4 +#define DQ_XCM_ISCSI_MORE_TO_SEND_SEQ_CMD DQ_XCM_AGG_VAL_SEL_REG3 +#define DQ_XCM_ISCSI_EXP_STAT_SN_CMD DQ_XCM_AGG_VAL_SEL_REG6 +#define DQ_XCM_ROCE_SQ_PROD_CMD DQ_XCM_AGG_VAL_SEL_WORD4 /* UCM agg val selection (HW) */ #define DQ_UCM_AGG_VAL_SEL_WORD0 0 @@ -159,13 +232,16 @@ #define DQ_XCM_AGG_FLG_SHIFT_CF23 7 /* XCM agg counter flag selection */ -#define DQ_XCM_CORE_DQ_CF_CMD (1 << DQ_XCM_AGG_FLG_SHIFT_CF18) -#define DQ_XCM_CORE_TERMINATE_CMD (1 << DQ_XCM_AGG_FLG_SHIFT_CF19) -#define DQ_XCM_CORE_SLOW_PATH_CMD (1 << DQ_XCM_AGG_FLG_SHIFT_CF22) -#define DQ_XCM_ETH_DQ_CF_CMD (1 << DQ_XCM_AGG_FLG_SHIFT_CF18) -#define DQ_XCM_ETH_TERMINATE_CMD (1 << DQ_XCM_AGG_FLG_SHIFT_CF19) -#define DQ_XCM_ETH_SLOW_PATH_CMD (1 << DQ_XCM_AGG_FLG_SHIFT_CF22) -#define DQ_XCM_ETH_TPH_EN_CMD (1 << DQ_XCM_AGG_FLG_SHIFT_CF23) +#define DQ_XCM_CORE_DQ_CF_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF18) +#define DQ_XCM_CORE_TERMINATE_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF19) +#define DQ_XCM_CORE_SLOW_PATH_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF22) +#define DQ_XCM_ETH_DQ_CF_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF18) +#define DQ_XCM_ETH_TERMINATE_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF19) +#define DQ_XCM_ETH_SLOW_PATH_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF22) +#define DQ_XCM_ETH_TPH_EN_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF23) +#define DQ_XCM_ISCSI_DQ_FLUSH_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF19) +#define DQ_XCM_ISCSI_SLOW_PATH_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF22) +#define DQ_XCM_ISCSI_PROC_ONLY_CLEANUP_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF23) /* UCM agg counter flag selection (HW) */ #define DQ_UCM_AGG_FLG_SHIFT_CF0 0 @@ -178,9 +254,45 @@ #define DQ_UCM_AGG_FLG_SHIFT_RULE1EN 7 /* UCM agg counter flag selection (FW) */ -#define DQ_UCM_ETH_PMD_TX_ARM_CMD (1 << DQ_UCM_AGG_FLG_SHIFT_CF4) -#define DQ_UCM_ETH_PMD_RX_ARM_CMD (1 << DQ_UCM_AGG_FLG_SHIFT_CF5) +#define DQ_UCM_ETH_PMD_TX_ARM_CMD BIT(DQ_UCM_AGG_FLG_SHIFT_CF4) +#define DQ_UCM_ETH_PMD_RX_ARM_CMD BIT(DQ_UCM_AGG_FLG_SHIFT_CF5) +#define DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD BIT(DQ_UCM_AGG_FLG_SHIFT_CF4) +#define DQ_UCM_ROCE_CQ_ARM_CF_CMD BIT(DQ_UCM_AGG_FLG_SHIFT_CF5) +/* TCM agg counter flag selection (HW) */ +#define DQ_TCM_AGG_FLG_SHIFT_CF0 0 +#define DQ_TCM_AGG_FLG_SHIFT_CF1 1 +#define DQ_TCM_AGG_FLG_SHIFT_CF2 2 +#define DQ_TCM_AGG_FLG_SHIFT_CF3 3 +#define DQ_TCM_AGG_FLG_SHIFT_CF4 4 +#define DQ_TCM_AGG_FLG_SHIFT_CF5 5 +#define DQ_TCM_AGG_FLG_SHIFT_CF6 6 +#define DQ_TCM_AGG_FLG_SHIFT_CF7 7 +/* TCM agg counter flag selection (FW) */ +#define DQ_TCM_ISCSI_FLUSH_Q0_CMD BIT(DQ_TCM_AGG_FLG_SHIFT_CF1) +#define DQ_TCM_ISCSI_TIMER_STOP_ALL_CMD BIT(DQ_TCM_AGG_FLG_SHIFT_CF3) + +/* PWM address mapping */ +#define DQ_PWM_OFFSET_DPM_BASE 0x0 +#define DQ_PWM_OFFSET_DPM_END 0x27 +#define DQ_PWM_OFFSET_XCM16_BASE 0x40 +#define DQ_PWM_OFFSET_XCM32_BASE 0x44 +#define DQ_PWM_OFFSET_UCM16_BASE 0x48 +#define DQ_PWM_OFFSET_UCM32_BASE 0x4C +#define DQ_PWM_OFFSET_UCM16_4 0x50 +#define DQ_PWM_OFFSET_TCM16_BASE 0x58 +#define DQ_PWM_OFFSET_TCM32_BASE 0x5C +#define DQ_PWM_OFFSET_XCM_FLAGS 0x68 +#define DQ_PWM_OFFSET_UCM_FLAGS 0x69 +#define DQ_PWM_OFFSET_TCM_FLAGS 0x6B + +#define DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD (DQ_PWM_OFFSET_XCM16_BASE + 2) +#define DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT (DQ_PWM_OFFSET_UCM32_BASE) +#define DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_16BIT (DQ_PWM_OFFSET_UCM16_4) +#define DQ_PWM_OFFSET_UCM_RDMA_INT_TIMEOUT (DQ_PWM_OFFSET_UCM16_BASE + 2) +#define DQ_PWM_OFFSET_UCM_RDMA_ARM_FLAGS (DQ_PWM_OFFSET_UCM_FLAGS) +#define DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD (DQ_PWM_OFFSET_TCM16_BASE + 1) +#define DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD (DQ_PWM_OFFSET_TCM16_BASE + 3) #define DQ_REGION_SHIFT (12) /* DPM */ @@ -214,15 +326,17 @@ */ #define CM_TX_PQ_BASE 0x200 +/* number of global Vport/QCN rate limiters */ +#define MAX_QM_GLOBAL_RLS 256 /* QM registers data */ #define QM_LINE_CRD_REG_WIDTH 16 -#define QM_LINE_CRD_REG_SIGN_BIT (1 << (QM_LINE_CRD_REG_WIDTH - 1)) +#define QM_LINE_CRD_REG_SIGN_BIT BIT((QM_LINE_CRD_REG_WIDTH - 1)) #define QM_BYTE_CRD_REG_WIDTH 24 -#define QM_BYTE_CRD_REG_SIGN_BIT (1 << (QM_BYTE_CRD_REG_WIDTH - 1)) +#define QM_BYTE_CRD_REG_SIGN_BIT BIT((QM_BYTE_CRD_REG_WIDTH - 1)) #define QM_WFQ_CRD_REG_WIDTH 32 -#define QM_WFQ_CRD_REG_SIGN_BIT (1 << (QM_WFQ_CRD_REG_WIDTH - 1)) +#define QM_WFQ_CRD_REG_SIGN_BIT BIT((QM_WFQ_CRD_REG_WIDTH - 1)) #define QM_RL_CRD_REG_WIDTH 32 -#define QM_RL_CRD_REG_SIGN_BIT (1 << (QM_RL_CRD_REG_WIDTH - 1)) +#define QM_RL_CRD_REG_SIGN_BIT BIT((QM_RL_CRD_REG_WIDTH - 1)) /*****************/ /* CAU CONSTANTS */ @@ -287,6 +401,17 @@ /* PXP CONSTANTS */ /*****************/ +/* Bars for Blocks */ +#define PXP_BAR_GRC 0 +#define PXP_BAR_TSDM 0 +#define PXP_BAR_USDM 0 +#define PXP_BAR_XSDM 0 +#define PXP_BAR_MSDM 0 +#define PXP_BAR_YSDM 0 +#define PXP_BAR_PSDM 0 +#define PXP_BAR_IGU 0 +#define PXP_BAR_DQ 1 + /* PTT and GTT */ #define PXP_NUM_PF_WINDOWS 12 #define PXP_PER_PF_ENTRY_SIZE 8 @@ -334,6 +459,52 @@ (PXP_EXTERNAL_BAR_GLOBAL_WINDOW_START + \ PXP_EXTERNAL_BAR_GLOBAL_WINDOW_LENGTH - 1) +/* PF BAR */ +#define PXP_BAR0_START_GRC 0x0000 +#define PXP_BAR0_GRC_LENGTH 0x1C00000 +#define PXP_BAR0_END_GRC (PXP_BAR0_START_GRC + \ + PXP_BAR0_GRC_LENGTH - 1) + +#define PXP_BAR0_START_IGU 0x1C00000 +#define PXP_BAR0_IGU_LENGTH 0x10000 +#define PXP_BAR0_END_IGU (PXP_BAR0_START_IGU + \ + PXP_BAR0_IGU_LENGTH - 1) + +#define PXP_BAR0_START_TSDM 0x1C80000 +#define PXP_BAR0_SDM_LENGTH 0x40000 +#define PXP_BAR0_SDM_RESERVED_LENGTH 0x40000 +#define PXP_BAR0_END_TSDM (PXP_BAR0_START_TSDM + \ + PXP_BAR0_SDM_LENGTH - 1) + +#define PXP_BAR0_START_MSDM 0x1D00000 +#define PXP_BAR0_END_MSDM (PXP_BAR0_START_MSDM + \ + PXP_BAR0_SDM_LENGTH - 1) + +#define PXP_BAR0_START_USDM 0x1D80000 +#define PXP_BAR0_END_USDM (PXP_BAR0_START_USDM + \ + PXP_BAR0_SDM_LENGTH - 1) + +#define PXP_BAR0_START_XSDM 0x1E00000 +#define PXP_BAR0_END_XSDM (PXP_BAR0_START_XSDM + \ + PXP_BAR0_SDM_LENGTH - 1) + +#define PXP_BAR0_START_YSDM 0x1E80000 +#define PXP_BAR0_END_YSDM (PXP_BAR0_START_YSDM + \ + PXP_BAR0_SDM_LENGTH - 1) + +#define PXP_BAR0_START_PSDM 0x1F00000 +#define PXP_BAR0_END_PSDM (PXP_BAR0_START_PSDM + \ + PXP_BAR0_SDM_LENGTH - 1) + +#define PXP_BAR0_FIRST_INVALID_ADDRESS (PXP_BAR0_END_PSDM + 1) + +/* VF BAR */ +#define PXP_VF_BAR0 0 + +#define PXP_VF_BAR0_START_GRC 0x3E00 +#define PXP_VF_BAR0_GRC_LENGTH 0x200 +#define PXP_VF_BAR0_END_GRC (PXP_VF_BAR0_START_GRC + \ + PXP_VF_BAR0_GRC_LENGTH - 1) #define PXP_VF_BAR0_START_IGU 0 #define PXP_VF_BAR0_IGU_LENGTH 0x3000 @@ -399,6 +570,20 @@ #define PXP_NUM_ILT_RECORDS_BB 7600 #define PXP_NUM_ILT_RECORDS_K2 11000 #define MAX_NUM_ILT_RECORDS MAX(PXP_NUM_ILT_RECORDS_BB, PXP_NUM_ILT_RECORDS_K2) +#define PXP_QUEUES_ZONE_MAX_NUM 320 +/*****************/ +/* PRM CONSTANTS */ +/*****************/ +#define PRM_DMA_PAD_BYTES_NUM 2 +/******************/ +/* SDMs CONSTANTS */ +/******************/ +#define SDM_OP_GEN_TRIG_NONE 0 +#define SDM_OP_GEN_TRIG_WAKE_THREAD 1 +#define SDM_OP_GEN_TRIG_AGG_INT 2 +#define SDM_OP_GEN_TRIG_LOADER 4 +#define SDM_OP_GEN_TRIG_INDICATE_ERROR 6 +#define SDM_OP_GEN_TRIG_RELEASE_THREAD 7 #define SDM_COMP_TYPE_NONE 0 #define SDM_COMP_TYPE_WAKE_THREAD 1 @@ -424,6 +609,8 @@ /* PRS CONSTANTS */ /*****************/ +#define PRS_GFT_CAM_LINES_NO_MATCH 31 + /* Async data KCQ CQE */ struct async_data { __le32 cid; @@ -440,20 +627,6 @@ struct coalescing_timeset { #define COALESCING_TIMESET_VALID_SHIFT 7 }; -struct common_prs_pf_msg_info { - __le32 value; -#define COMMON_PRS_PF_MSG_INFO_NPAR_DEFAULT_PF_MASK 0x1 -#define COMMON_PRS_PF_MSG_INFO_NPAR_DEFAULT_PF_SHIFT 0 -#define COMMON_PRS_PF_MSG_INFO_FW_DEBUG_1_MASK 0x1 -#define COMMON_PRS_PF_MSG_INFO_FW_DEBUG_1_SHIFT 1 -#define COMMON_PRS_PF_MSG_INFO_FW_DEBUG_2_MASK 0x1 -#define COMMON_PRS_PF_MSG_INFO_FW_DEBUG_2_SHIFT 2 -#define COMMON_PRS_PF_MSG_INFO_FW_DEBUG_3_MASK 0x1 -#define COMMON_PRS_PF_MSG_INFO_FW_DEBUG_3_SHIFT 3 -#define COMMON_PRS_PF_MSG_INFO_RESERVED_MASK 0xFFFFFFF -#define COMMON_PRS_PF_MSG_INFO_RESERVED_SHIFT 4 -}; - struct common_queue_zone { __le16 ring_drv_data_consumer; __le16 reserved; @@ -473,6 +646,19 @@ struct vf_pf_channel_eqe_data { struct regpair msg_addr; }; +struct iscsi_eqe_data { + __le32 cid; + __le16 conn_id; + u8 error_code; + u8 error_pdu_opcode_reserved; +#define ISCSI_EQE_DATA_ERROR_PDU_OPCODE_MASK 0x3F +#define ISCSI_EQE_DATA_ERROR_PDU_OPCODE_SHIFT 0 +#define ISCSI_EQE_DATA_ERROR_PDU_OPCODE_VALID_MASK 0x1 +#define ISCSI_EQE_DATA_ERROR_PDU_OPCODE_VALID_SHIFT 6 +#define ISCSI_EQE_DATA_RESERVED0_MASK 0x1 +#define ISCSI_EQE_DATA_RESERVED0_SHIFT 7 +}; + struct malicious_vf_eqe_data { u8 vf_id; u8 err_id; @@ -488,6 +674,7 @@ struct initial_cleanup_eqe_data { union event_ring_data { u8 bytes[8]; struct vf_pf_channel_eqe_data vf_pf_channel; + struct iscsi_eqe_data iscsi_info; struct malicious_vf_eqe_data malicious_vf; struct initial_cleanup_eqe_data vf_init_cleanup; }; @@ -616,6 +803,52 @@ enum db_dest { MAX_DB_DEST }; +/* Enum of doorbell DPM types */ +enum db_dpm_type { + DPM_LEGACY, + DPM_ROCE, + DPM_L2_INLINE, + DPM_L2_BD, + MAX_DB_DPM_TYPE +}; + +/* Structure for doorbell data, in L2 DPM mode, for 1st db in a DPM burst */ +struct db_l2_dpm_data { + __le16 icid; + __le16 bd_prod; + __le32 params; +#define DB_L2_DPM_DATA_SIZE_MASK 0x3F +#define DB_L2_DPM_DATA_SIZE_SHIFT 0 +#define DB_L2_DPM_DATA_DPM_TYPE_MASK 0x3 +#define DB_L2_DPM_DATA_DPM_TYPE_SHIFT 6 +#define DB_L2_DPM_DATA_NUM_BDS_MASK 0xFF +#define DB_L2_DPM_DATA_NUM_BDS_SHIFT 8 +#define DB_L2_DPM_DATA_PKT_SIZE_MASK 0x7FF +#define DB_L2_DPM_DATA_PKT_SIZE_SHIFT 16 +#define DB_L2_DPM_DATA_RESERVED0_MASK 0x1 +#define DB_L2_DPM_DATA_RESERVED0_SHIFT 27 +#define DB_L2_DPM_DATA_SGE_NUM_MASK 0x7 +#define DB_L2_DPM_DATA_SGE_NUM_SHIFT 28 +#define DB_L2_DPM_DATA_RESERVED1_MASK 0x1 +#define DB_L2_DPM_DATA_RESERVED1_SHIFT 31 +}; + +/* Structure for SGE in a DPM doorbell of type DPM_L2_BD */ +struct db_l2_dpm_sge { + struct regpair addr; + __le16 nbytes; + __le16 bitfields; +#define DB_L2_DPM_SGE_TPH_ST_INDEX_MASK 0x1FF +#define DB_L2_DPM_SGE_TPH_ST_INDEX_SHIFT 0 +#define DB_L2_DPM_SGE_RESERVED0_MASK 0x3 +#define DB_L2_DPM_SGE_RESERVED0_SHIFT 9 +#define DB_L2_DPM_SGE_ST_VALID_MASK 0x1 +#define DB_L2_DPM_SGE_ST_VALID_SHIFT 11 +#define DB_L2_DPM_SGE_RESERVED1_MASK 0xF +#define DB_L2_DPM_SGE_RESERVED1_SHIFT 12 + __le32 reserved2; +}; + /* Structure for doorbell address, in legacy mode */ struct db_legacy_addr { __le32 addr; @@ -627,6 +860,49 @@ struct db_legacy_addr { #define DB_LEGACY_ADDR_ICID_SHIFT 5 }; +/* Structure for doorbell address, in PWM mode */ +struct db_pwm_addr { + __le32 addr; +#define DB_PWM_ADDR_RESERVED0_MASK 0x7 +#define DB_PWM_ADDR_RESERVED0_SHIFT 0 +#define DB_PWM_ADDR_OFFSET_MASK 0x7F +#define DB_PWM_ADDR_OFFSET_SHIFT 3 +#define DB_PWM_ADDR_WID_MASK 0x3 +#define DB_PWM_ADDR_WID_SHIFT 10 +#define DB_PWM_ADDR_DPI_MASK 0xFFFF +#define DB_PWM_ADDR_DPI_SHIFT 12 +#define DB_PWM_ADDR_RESERVED1_MASK 0xF +#define DB_PWM_ADDR_RESERVED1_SHIFT 28 +}; + +/* Parameters to RoCE firmware, passed in EDPM doorbell */ +struct db_roce_dpm_params { + __le32 params; +#define DB_ROCE_DPM_PARAMS_SIZE_MASK 0x3F +#define DB_ROCE_DPM_PARAMS_SIZE_SHIFT 0 +#define DB_ROCE_DPM_PARAMS_DPM_TYPE_MASK 0x3 +#define DB_ROCE_DPM_PARAMS_DPM_TYPE_SHIFT 6 +#define DB_ROCE_DPM_PARAMS_OPCODE_MASK 0xFF +#define DB_ROCE_DPM_PARAMS_OPCODE_SHIFT 8 +#define DB_ROCE_DPM_PARAMS_WQE_SIZE_MASK 0x7FF +#define DB_ROCE_DPM_PARAMS_WQE_SIZE_SHIFT 16 +#define DB_ROCE_DPM_PARAMS_RESERVED0_MASK 0x1 +#define DB_ROCE_DPM_PARAMS_RESERVED0_SHIFT 27 +#define DB_ROCE_DPM_PARAMS_COMPLETION_FLG_MASK 0x1 +#define DB_ROCE_DPM_PARAMS_COMPLETION_FLG_SHIFT 28 +#define DB_ROCE_DPM_PARAMS_S_FLG_MASK 0x1 +#define DB_ROCE_DPM_PARAMS_S_FLG_SHIFT 29 +#define DB_ROCE_DPM_PARAMS_RESERVED1_MASK 0x3 +#define DB_ROCE_DPM_PARAMS_RESERVED1_SHIFT 30 +}; + +/* Structure for doorbell data, in ROCE DPM mode, for 1st db in a DPM burst */ +struct db_roce_dpm_data { + __le16 icid; + __le16 prod_val; + struct db_roce_dpm_params params; +}; + /* Igu interrupt command */ enum igu_int_cmd { IGU_INT_ENABLE = 0, @@ -764,6 +1040,19 @@ struct pxp_ptt_entry { struct pxp_pretend_cmd pretend; }; +/* VF Zone A Permission Register. */ +struct pxp_vf_zone_a_permission { + __le32 control; +#define PXP_VF_ZONE_A_PERMISSION_VFID_MASK 0xFF +#define PXP_VF_ZONE_A_PERMISSION_VFID_SHIFT 0 +#define PXP_VF_ZONE_A_PERMISSION_VALID_MASK 0x1 +#define PXP_VF_ZONE_A_PERMISSION_VALID_SHIFT 8 +#define PXP_VF_ZONE_A_PERMISSION_RESERVED0_MASK 0x7F +#define PXP_VF_ZONE_A_PERMISSION_RESERVED0_SHIFT 9 +#define PXP_VF_ZONE_A_PERMISSION_RESERVED1_MASK 0xFFFF +#define PXP_VF_ZONE_A_PERMISSION_RESERVED1_SHIFT 16 +}; + /* RSS hash type */ struct rdif_task_context { __le32 initial_ref_tag; @@ -831,6 +1120,7 @@ struct rdif_task_context { __le32 reserved2; }; +/* RSS hash type */ enum rss_hash_type { RSS_HASH_TYPE_DEFAULT = 0, RSS_HASH_TYPE_IPV4 = 1, @@ -942,7 +1232,7 @@ struct tdif_task_context { }; struct timers_context { - __le32 logical_client0; + __le32 logical_client_0; #define TIMERS_CONTEXT_EXPIRATIONTIMELC0_MASK 0xFFFFFFF #define TIMERS_CONTEXT_EXPIRATIONTIMELC0_SHIFT 0 #define TIMERS_CONTEXT_VALIDLC0_MASK 0x1 @@ -951,7 +1241,7 @@ struct timers_context { #define TIMERS_CONTEXT_ACTIVELC0_SHIFT 29 #define TIMERS_CONTEXT_RESERVED0_MASK 0x3 #define TIMERS_CONTEXT_RESERVED0_SHIFT 30 - __le32 logical_client1; + __le32 logical_client_1; #define TIMERS_CONTEXT_EXPIRATIONTIMELC1_MASK 0xFFFFFFF #define TIMERS_CONTEXT_EXPIRATIONTIMELC1_SHIFT 0 #define TIMERS_CONTEXT_VALIDLC1_MASK 0x1 @@ -960,7 +1250,7 @@ struct timers_context { #define TIMERS_CONTEXT_ACTIVELC1_SHIFT 29 #define TIMERS_CONTEXT_RESERVED1_MASK 0x3 #define TIMERS_CONTEXT_RESERVED1_SHIFT 30 - __le32 logical_client2; + __le32 logical_client_2; #define TIMERS_CONTEXT_EXPIRATIONTIMELC2_MASK 0xFFFFFFF #define TIMERS_CONTEXT_EXPIRATIONTIMELC2_SHIFT 0 #define TIMERS_CONTEXT_VALIDLC2_MASK 0x1 @@ -978,3 +1268,4 @@ struct timers_context { #define TIMERS_CONTEXT_RESERVED3_SHIFT 29 }; #endif /* __COMMON_HSI__ */ +#endif diff --git a/include/linux/qed/eth_common.h b/include/linux/qed/eth_common.h index b5ebc697d05f..1aa0727c4136 100644 --- a/include/linux/qed/eth_common.h +++ b/include/linux/qed/eth_common.h @@ -13,9 +13,12 @@ /* ETH FW CONSTANTS */ /********************/ #define ETH_HSI_VER_MAJOR 3 -#define ETH_HSI_VER_MINOR 0 -#define ETH_CACHE_LINE_SIZE 64 +#define ETH_HSI_VER_MINOR 10 +#define ETH_HSI_VER_NO_PKT_LEN_TUNN 5 + +#define ETH_CACHE_LINE_SIZE 64 +#define ETH_RX_CQE_GAP 32 #define ETH_MAX_RAMROD_PER_CON 8 #define ETH_TX_BD_PAGE_SIZE_BYTES 4096 #define ETH_RX_BD_PAGE_SIZE_BYTES 4096 @@ -24,15 +27,25 @@ #define ETH_TX_MIN_BDS_PER_NON_LSO_PKT 1 #define ETH_TX_MAX_BDS_PER_NON_LSO_PACKET 18 +#define ETH_TX_MAX_BDS_PER_LSO_PACKET 255 #define ETH_TX_MAX_LSO_HDR_NBD 4 #define ETH_TX_MIN_BDS_PER_LSO_PKT 3 #define ETH_TX_MIN_BDS_PER_TUNN_IPV6_WITH_EXT_PKT 3 #define ETH_TX_MIN_BDS_PER_IPV6_WITH_EXT_PKT 2 #define ETH_TX_MIN_BDS_PER_PKT_W_LOOPBACK_MODE 2 -#define ETH_TX_MAX_NON_LSO_PKT_LEN (9700 - (4 + 12 + 8)) +#define ETH_TX_MAX_NON_LSO_PKT_LEN (9700 - (4 + 4 + 12 + 8)) #define ETH_TX_MAX_LSO_HDR_BYTES 510 +#define ETH_TX_LSO_WINDOW_BDS_NUM (18 - 1) +#define ETH_TX_LSO_WINDOW_MIN_LEN 9700 +#define ETH_TX_MAX_LSO_PAYLOAD_LEN 0xFE000 +#define ETH_TX_NUM_SAME_AS_LAST_ENTRIES 320 +#define ETH_TX_INACTIVE_SAME_AS_LAST 0xFFFF #define ETH_NUM_STATISTIC_COUNTERS MAX_NUM_VPORTS +#define ETH_NUM_STATISTIC_COUNTERS_DOUBLE_VF_ZONE \ + (ETH_NUM_STATISTIC_COUNTERS - MAX_NUM_VFS / 2) +#define ETH_NUM_STATISTIC_COUNTERS_QUAD_VF_ZONE \ + (ETH_NUM_STATISTIC_COUNTERS - 3 * MAX_NUM_VFS / 4) /* Maximum number of buffers, used for RX packet placement */ #define ETH_RX_MAX_BUFF_PER_PKT 5 @@ -59,6 +72,8 @@ #define ETH_TPA_CQE_CONT_LEN_LIST_SIZE 6 #define ETH_TPA_CQE_END_LEN_LIST_SIZE 4 +/* Control frame check constants */ +#define ETH_CTL_FRAME_ETH_TYPE_NUM 4 struct eth_tx_1st_bd_flags { u8 bitfields; @@ -82,10 +97,10 @@ struct eth_tx_1st_bd_flags { /* The parsing information data fo rthe first tx bd of a given packet. */ struct eth_tx_data_1st_bd { - __le16 vlan; - u8 nbds; - struct eth_tx_1st_bd_flags bd_flags; - __le16 bitfields; + __le16 vlan; + u8 nbds; + struct eth_tx_1st_bd_flags bd_flags; + __le16 bitfields; #define ETH_TX_DATA_1ST_BD_TUNN_FLAG_MASK 0x1 #define ETH_TX_DATA_1ST_BD_TUNN_FLAG_SHIFT 0 #define ETH_TX_DATA_1ST_BD_RESERVED0_MASK 0x1 @@ -96,7 +111,7 @@ struct eth_tx_data_1st_bd { /* The parsing information data for the second tx bd of a given packet. */ struct eth_tx_data_2nd_bd { - __le16 tunn_ip_size; + __le16 tunn_ip_size; __le16 bitfields1; #define ETH_TX_DATA_2ND_BD_TUNN_INNER_L2_HDR_SIZE_W_MASK 0xF #define ETH_TX_DATA_2ND_BD_TUNN_INNER_L2_HDR_SIZE_W_SHIFT 0 @@ -125,9 +140,14 @@ struct eth_tx_data_2nd_bd { #define ETH_TX_DATA_2ND_BD_RESERVED0_SHIFT 13 }; +/* Firmware data for L2-EDPM packet. */ +struct eth_edpm_fw_data { + struct eth_tx_data_1st_bd data_1st_bd; + struct eth_tx_data_2nd_bd data_2nd_bd; + __le32 reserved; +}; + struct eth_fast_path_cqe_fw_debug { - u8 reserved0; - u8 reserved1; __le16 reserved2; }; @@ -148,6 +168,17 @@ struct eth_tunnel_parsing_flags { #define ETH_TUNNEL_PARSING_FLAGS_IPV4_OPTIONS_SHIFT 7 }; +/* PMD flow control bits */ +struct eth_pmd_flow_flags { + u8 flags; +#define ETH_PMD_FLOW_FLAGS_VALID_MASK 0x1 +#define ETH_PMD_FLOW_FLAGS_VALID_SHIFT 0 +#define ETH_PMD_FLOW_FLAGS_TOGGLE_MASK 0x1 +#define ETH_PMD_FLOW_FLAGS_TOGGLE_SHIFT 1 +#define ETH_PMD_FLOW_FLAGS_RESERVED_MASK 0x3F +#define ETH_PMD_FLOW_FLAGS_RESERVED_SHIFT 2 +}; + /* Regular ETH Rx FP CQE. */ struct eth_fast_path_rx_reg_cqe { u8 type; @@ -166,64 +197,63 @@ struct eth_fast_path_rx_reg_cqe { u8 placement_offset; struct eth_tunnel_parsing_flags tunnel_pars_flags; u8 bd_num; - u8 reserved[7]; + u8 reserved[9]; struct eth_fast_path_cqe_fw_debug fw_debug; u8 reserved1[3]; - u8 flags; -#define ETH_FAST_PATH_RX_REG_CQE_VALID_MASK 0x1 -#define ETH_FAST_PATH_RX_REG_CQE_VALID_SHIFT 0 -#define ETH_FAST_PATH_RX_REG_CQE_VALID_TOGGLE_MASK 0x1 -#define ETH_FAST_PATH_RX_REG_CQE_VALID_TOGGLE_SHIFT 1 -#define ETH_FAST_PATH_RX_REG_CQE_RESERVED2_MASK 0x3F -#define ETH_FAST_PATH_RX_REG_CQE_RESERVED2_SHIFT 2 + struct eth_pmd_flow_flags pmd_flags; }; /* TPA-continue ETH Rx FP CQE. */ struct eth_fast_path_rx_tpa_cont_cqe { - u8 type; - u8 tpa_agg_index; - __le16 len_list[ETH_TPA_CQE_CONT_LEN_LIST_SIZE]; - u8 reserved[5]; - u8 reserved1; - __le16 reserved2[ETH_TPA_CQE_CONT_LEN_LIST_SIZE]; + u8 type; + u8 tpa_agg_index; + __le16 len_list[ETH_TPA_CQE_CONT_LEN_LIST_SIZE]; + u8 reserved; + u8 reserved1; + __le16 reserved2[ETH_TPA_CQE_CONT_LEN_LIST_SIZE]; + u8 reserved3[3]; + struct eth_pmd_flow_flags pmd_flags; }; /* TPA-end ETH Rx FP CQE. */ struct eth_fast_path_rx_tpa_end_cqe { - u8 type; - u8 tpa_agg_index; - __le16 total_packet_len; - u8 num_of_bds; - u8 end_reason; - __le16 num_of_coalesced_segs; - __le32 ts_delta; - __le16 len_list[ETH_TPA_CQE_END_LEN_LIST_SIZE]; - u8 reserved1[3]; - u8 reserved2; - __le16 reserved3[ETH_TPA_CQE_END_LEN_LIST_SIZE]; + u8 type; + u8 tpa_agg_index; + __le16 total_packet_len; + u8 num_of_bds; + u8 end_reason; + __le16 num_of_coalesced_segs; + __le32 ts_delta; + __le16 len_list[ETH_TPA_CQE_END_LEN_LIST_SIZE]; + __le16 reserved3[ETH_TPA_CQE_END_LEN_LIST_SIZE]; + __le16 reserved1; + u8 reserved2; + struct eth_pmd_flow_flags pmd_flags; }; /* TPA-start ETH Rx FP CQE. */ struct eth_fast_path_rx_tpa_start_cqe { - u8 type; - u8 bitfields; + u8 type; + u8 bitfields; #define ETH_FAST_PATH_RX_TPA_START_CQE_RSS_HASH_TYPE_MASK 0x7 #define ETH_FAST_PATH_RX_TPA_START_CQE_RSS_HASH_TYPE_SHIFT 0 #define ETH_FAST_PATH_RX_TPA_START_CQE_TC_MASK 0xF #define ETH_FAST_PATH_RX_TPA_START_CQE_TC_SHIFT 3 #define ETH_FAST_PATH_RX_TPA_START_CQE_RESERVED0_MASK 0x1 #define ETH_FAST_PATH_RX_TPA_START_CQE_RESERVED0_SHIFT 7 - __le16 seg_len; + __le16 seg_len; struct parsing_and_err_flags pars_flags; - __le16 vlan_tag; - __le32 rss_hash; - __le16 len_on_first_bd; - u8 placement_offset; + __le16 vlan_tag; + __le32 rss_hash; + __le16 len_on_first_bd; + u8 placement_offset; struct eth_tunnel_parsing_flags tunnel_pars_flags; - u8 tpa_agg_index; - u8 header_len; - __le16 ext_bd_len_list[ETH_TPA_CQE_START_LEN_LIST_SIZE]; + u8 tpa_agg_index; + u8 header_len; + __le16 ext_bd_len_list[ETH_TPA_CQE_START_LEN_LIST_SIZE]; struct eth_fast_path_cqe_fw_debug fw_debug; + u8 reserved; + struct eth_pmd_flow_flags pmd_flags; }; /* The L4 pseudo checksum mode for Ethernet */ @@ -245,15 +275,7 @@ struct eth_slow_path_rx_cqe { u8 reserved[25]; __le16 echo; u8 reserved1; - u8 flags; -/* for PMD mode - valid indication */ -#define ETH_SLOW_PATH_RX_CQE_VALID_MASK 0x1 -#define ETH_SLOW_PATH_RX_CQE_VALID_SHIFT 0 -/* for PMD mode - valid toggle indication */ -#define ETH_SLOW_PATH_RX_CQE_VALID_TOGGLE_MASK 0x1 -#define ETH_SLOW_PATH_RX_CQE_VALID_TOGGLE_SHIFT 1 -#define ETH_SLOW_PATH_RX_CQE_RESERVED2_MASK 0x3F -#define ETH_SLOW_PATH_RX_CQE_RESERVED2_SHIFT 2 + struct eth_pmd_flow_flags pmd_flags; }; /* union for all ETH Rx CQE types */ @@ -276,6 +298,11 @@ enum eth_rx_cqe_type { MAX_ETH_RX_CQE_TYPE }; +struct eth_rx_pmd_cqe { + union eth_rx_cqe cqe; + u8 reserved[ETH_RX_CQE_GAP]; +}; + enum eth_rx_tunn_type { ETH_RX_NO_TUNN, ETH_RX_TUNN_GENEVE, @@ -313,8 +340,8 @@ struct eth_tx_2nd_bd { /* The parsing information data for the third tx bd of a given packet. */ struct eth_tx_data_3rd_bd { - __le16 lso_mss; - __le16 bitfields; + __le16 lso_mss; + __le16 bitfields; #define ETH_TX_DATA_3RD_BD_TCP_HDR_LEN_DW_MASK 0xF #define ETH_TX_DATA_3RD_BD_TCP_HDR_LEN_DW_SHIFT 0 #define ETH_TX_DATA_3RD_BD_HDR_NBD_MASK 0xF @@ -323,8 +350,8 @@ struct eth_tx_data_3rd_bd { #define ETH_TX_DATA_3RD_BD_START_BD_SHIFT 8 #define ETH_TX_DATA_3RD_BD_RESERVED0_MASK 0x7F #define ETH_TX_DATA_3RD_BD_RESERVED0_SHIFT 9 - u8 tunn_l4_hdr_start_offset_w; - u8 tunn_hdr_size_w; + u8 tunn_l4_hdr_start_offset_w; + u8 tunn_hdr_size_w; }; /* The third tx bd of a given packet */ @@ -355,10 +382,10 @@ struct eth_tx_bd { }; union eth_tx_bd_types { - struct eth_tx_1st_bd first_bd; - struct eth_tx_2nd_bd second_bd; - struct eth_tx_3rd_bd third_bd; - struct eth_tx_bd reg_bd; + struct eth_tx_1st_bd first_bd; + struct eth_tx_2nd_bd second_bd; + struct eth_tx_3rd_bd third_bd; + struct eth_tx_bd reg_bd; }; /* Mstorm Queue Zone */ @@ -389,8 +416,8 @@ struct eth_db_data { #define ETH_DB_DATA_RESERVED_SHIFT 5 #define ETH_DB_DATA_AGG_VAL_SEL_MASK 0x3 #define ETH_DB_DATA_AGG_VAL_SEL_SHIFT 6 - u8 agg_flags; - __le16 bd_prod; + u8 agg_flags; + __le16 bd_prod; }; #endif /* __ETH_COMMON__ */ diff --git a/include/linux/qed/iscsi_common.h b/include/linux/qed/iscsi_common.h index b3c0feb15ae9..8f64b1223c2f 100644 --- a/include/linux/qed/iscsi_common.h +++ b/include/linux/qed/iscsi_common.h @@ -311,7 +311,7 @@ struct iscsi_login_req_hdr { #define ISCSI_LOGIN_REQ_HDR_DATA_SEG_LEN_SHIFT 0 #define ISCSI_LOGIN_REQ_HDR_TOTAL_AHS_LEN_MASK 0xFF #define ISCSI_LOGIN_REQ_HDR_TOTAL_AHS_LEN_SHIFT 24 - __le32 isid_TABC; + __le32 isid_tabc; __le16 tsih; __le16 isid_d; __le32 itt; @@ -464,7 +464,7 @@ struct iscsi_login_response_hdr { #define ISCSI_LOGIN_RESPONSE_HDR_DATA_SEG_LEN_SHIFT 0 #define ISCSI_LOGIN_RESPONSE_HDR_TOTAL_AHS_LEN_MASK 0xFF #define ISCSI_LOGIN_RESPONSE_HDR_TOTAL_AHS_LEN_SHIFT 24 - __le32 isid_TABC; + __le32 isid_tabc; __le16 tsih; __le16 isid_d; __le32 itt; @@ -688,8 +688,7 @@ union iscsi_cqe { enum iscsi_cqes_type { ISCSI_CQE_TYPE_SOLICITED = 1, ISCSI_CQE_TYPE_UNSOLICITED, - ISCSI_CQE_TYPE_SOLICITED_WITH_SENSE - , + ISCSI_CQE_TYPE_SOLICITED_WITH_SENSE, ISCSI_CQE_TYPE_TASK_CLEANUP, ISCSI_CQE_TYPE_DUMMY, MAX_ISCSI_CQES_TYPE @@ -769,9 +768,9 @@ enum iscsi_eqe_opcode { ISCSI_EVENT_TYPE_UPDATE_CONN, ISCSI_EVENT_TYPE_CLEAR_SQ, ISCSI_EVENT_TYPE_TERMINATE_CONN, + ISCSI_EVENT_TYPE_MAC_UPDATE_CONN, ISCSI_EVENT_TYPE_ASYN_CONNECT_COMPLETE, ISCSI_EVENT_TYPE_ASYN_TERMINATE_DONE, - RESERVED8, RESERVED9, ISCSI_EVENT_TYPE_START_OF_ERROR_TYPES = 10, ISCSI_EVENT_TYPE_ASYN_ABORT_RCVD, @@ -867,6 +866,7 @@ enum iscsi_ramrod_cmd_id { ISCSI_RAMROD_CMD_ID_UPDATE_CONN = 4, ISCSI_RAMROD_CMD_ID_TERMINATION_CONN = 5, ISCSI_RAMROD_CMD_ID_CLEAR_SQ = 6, + ISCSI_RAMROD_CMD_ID_MAC_UPDATE = 7, MAX_ISCSI_RAMROD_CMD_ID }; @@ -883,6 +883,16 @@ union iscsi_seq_num { __le16 r2t_sn; }; +struct iscsi_spe_conn_mac_update { + struct iscsi_slow_path_hdr hdr; + __le16 conn_id; + __le32 fw_cid; + __le16 remote_mac_addr_lo; + __le16 remote_mac_addr_mid; + __le16 remote_mac_addr_hi; + u8 reserved0[2]; +}; + struct iscsi_spe_conn_offload { struct iscsi_slow_path_hdr hdr; __le16 conn_id; @@ -1302,14 +1312,6 @@ struct mstorm_iscsi_stats_drv { struct regpair iscsi_rx_dropped_pdus_task_not_valid; }; -struct ooo_opaque { - __le32 cid; - u8 drop_isle; - u8 drop_size; - u8 ooo_opcode; - u8 ooo_isle; -}; - struct pstorm_iscsi_stats_drv { struct regpair iscsi_tx_bytes_cnt; struct regpair iscsi_tx_packet_cnt; diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h index 7e441bdeabdc..72d88cf3ca25 100644 --- a/include/linux/qed/qed_chain.h +++ b/include/linux/qed/qed_chain.h @@ -16,19 +16,6 @@ #include #include -/* dma_addr_t manip */ -#define DMA_LO_LE(x) cpu_to_le32(lower_32_bits(x)) -#define DMA_HI_LE(x) cpu_to_le32(upper_32_bits(x)) -#define DMA_REGPAIR_LE(x, val) do { \ - (x).hi = DMA_HI_LE((val)); \ - (x).lo = DMA_LO_LE((val)); \ - } while (0) - -#define HILO_GEN(hi, lo, type) ((((type)(hi)) << 32) + (lo)) -#define HILO_64(hi, lo) HILO_GEN((le32_to_cpu(hi)), (le32_to_cpu(lo)), u64) -#define HILO_64_REGPAIR(regpair) (HILO_64(regpair.hi, regpair.lo)) -#define HILO_DMA_REGPAIR(regpair) ((dma_addr_t)HILO_64_REGPAIR(regpair)) - enum qed_chain_mode { /* Each Page contains a next pointer at its end */ QED_CHAIN_MODE_NEXT_PTR, diff --git a/include/linux/qed/tcp_common.h b/include/linux/qed/tcp_common.h index accba0e6b704..dc3889d1bbe6 100644 --- a/include/linux/qed/tcp_common.h +++ b/include/linux/qed/tcp_common.h @@ -11,6 +11,14 @@ #define TCP_INVALID_TIMEOUT_VAL -1 +struct ooo_opaque { + __le32 cid; + u8 drop_isle; + u8 drop_size; + u8 ooo_opcode; + u8 ooo_isle; +}; + enum tcp_connect_mode { TCP_CONNECT_ACTIVE, TCP_CONNECT_PASSIVE, @@ -18,14 +26,10 @@ enum tcp_connect_mode { }; struct tcp_init_params { - __le32 max_cwnd; - __le16 dup_ack_threshold; + __le32 two_msl_timer; __le16 tx_sws_timer; - __le16 min_rto; - __le16 min_rto_rt; - __le16 max_rto; u8 maxfinrt; - u8 reserved[1]; + u8 reserved[9]; }; enum tcp_ip_version { From dc833def42e7f2425f69d83a53bee054e80caea5 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Fri, 19 Aug 2016 18:08:57 +0200 Subject: [PATCH 0360/1715] net/irda: remove pointless assignment/check We've already set sk to sock->sk and dereferenced it, so if it's NULL we would have crashed already. Moreover, if it was NULL we would have crashed anyway when jumping to 'out' and trying to unlock the sock. Furthermore, if we had assigned a different value to 'sk' we would have been calling lock_sock() and release_sock() on different sockets. My conclusion is that these two lines are complete nonsense and only serve to confuse the reader. Signed-off-by: Vegard Nossum Signed-off-by: David S. Miller --- net/irda/af_irda.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 8d2f7c9b491d..db639690c205 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -845,9 +845,6 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags) if (sock->state != SS_UNCONNECTED) goto out; - if ((sk = sock->sk) == NULL) - goto out; - err = -EOPNOTSUPP; if ((sk->sk_type != SOCK_STREAM) && (sk->sk_type != SOCK_SEQPACKET) && (sk->sk_type != SOCK_DGRAM)) From 0582b964ece795fa2810eace82bf9b2233403d65 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 19 Jul 2016 16:23:29 -0700 Subject: [PATCH 0361/1715] i40e: move i40e_vsi_config_rss below i40e_get_rss_aq Move this function below the two functions related to configuring RSS via the admin queue. This helps co-locate the two functions, and made it easier to spot a bug in the first i40e_config_rss_aq function as compared to the i40e_get_rss_aq function. Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 54 ++++++++++----------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 798c4e2dc621..1692a896c3c0 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -8027,33 +8027,6 @@ config_rss_aq_out: return ret; } -/** - * i40e_vsi_config_rss - Prepare for VSI(VMDq) RSS if used - * @vsi: VSI structure - **/ -static int i40e_vsi_config_rss(struct i40e_vsi *vsi) -{ - u8 seed[I40E_HKEY_ARRAY_SIZE]; - struct i40e_pf *pf = vsi->back; - u8 *lut; - int ret; - - if (!(pf->flags & I40E_FLAG_RSS_AQ_CAPABLE)) - return 0; - - lut = kzalloc(vsi->rss_table_size, GFP_KERNEL); - if (!lut) - return -ENOMEM; - - i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size); - netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE); - vsi->rss_size = min_t(int, pf->alloc_rss_size, vsi->num_queue_pairs); - ret = i40e_config_rss_aq(vsi, seed, lut, vsi->rss_table_size); - kfree(lut); - - return ret; -} - /** * i40e_get_rss_aq - Get RSS keys and lut by using AQ commands * @vsi: Pointer to vsi structure @@ -8100,6 +8073,33 @@ static int i40e_get_rss_aq(struct i40e_vsi *vsi, const u8 *seed, return ret; } +/** + * i40e_vsi_config_rss - Prepare for VSI(VMDq) RSS if used + * @vsi: VSI structure + **/ +static int i40e_vsi_config_rss(struct i40e_vsi *vsi) +{ + u8 seed[I40E_HKEY_ARRAY_SIZE]; + struct i40e_pf *pf = vsi->back; + u8 *lut; + int ret; + + if (!(pf->flags & I40E_FLAG_RSS_AQ_CAPABLE)) + return 0; + + lut = kzalloc(vsi->rss_table_size, GFP_KERNEL); + if (!lut) + return -ENOMEM; + + i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size); + netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE); + vsi->rss_size = min_t(int, pf->alloc_rss_size, vsi->num_queue_pairs); + ret = i40e_config_rss_aq(vsi, seed, lut, vsi->rss_table_size); + kfree(lut); + + return ret; +} + /** * i40e_config_rss_reg - Configure RSS keys and lut by writing registers * @vsi: Pointer to vsi structure From 776b2e15f8af7900409456292efd45af6de97172 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 19 Jul 2016 16:23:30 -0700 Subject: [PATCH 0362/1715] i40e: fix broken i40e_config_rss_aq function X722 hardware requires using the admin queue to configure RSS. This function was previously re-written in commit e69ff813af35 ("i40e: rework the functions to configure RSS with similar parameters"). However, the previous refactor did not work correctly for a few reasons (a) it does not check whether seed is NULL before using it, resulting in a NULL pointer dereference [ 402.954721] BUG: unable to handle kernel NULL pointer dereference at (null) [ 402.955568] IP: [] i40e_config_rss_aq.constprop.65+0x2f/0x1c0 [i40e] [ 402.956402] PGD ad610067 PUD accc0067 PMD 0 [ 402.957235] Oops: 0000 [#1] SMP [ 402.958064] Modules linked in: ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 xt_conntrack ip_set nfnetlink ebtable_filter ebtable_ broute bridge stp llc ebtable_nat ebtables ip6table_mangle ip6table_raw ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv 6 ip6table_security ip6table_filter ip6_tables iptable_mangle iptable_raw iptable_nat nf_conntrack_ipv4_ nf_defrag_ipv4_ nf_nat_ip v4_ nf_nat nf_conntrack iptable_security intel_rapl i86_kg_temp_thermal coretemp kvm_intel kvm irqbypass crct10dif_clMl crc32_ pclMl ghash_clMlni_intel iTCO_wdt iTCO_vendor_support shpchp sb_edac dcdbas pcspkr joydev ipmi_devintf wmi edac_core ipmi_ssif acpi_ad acpi_ower_meter ipmi_si ipmi_msghandler mei_me nfsd lpc_ich mei ioatdma tpm_tis auth_rpcgss tpm nfs_acl lockd grace s unrpc ifs nngag200 i2c_algo_bit drm_kms_helper ttm drm iigbe bnx2x i40e dca mdio ptp pps_core libcrc32c fjes crc32c_intel [ 402.965563] CPU: 22 PID: 2461 Conm: ethtool Not tainted 4.6.0-rc7_1.2-ABNidQ+ #20 [ 402.966719] Hardware name: Dell Inc. PowerEdge R720/0C4Y3R, BIOS 2.5.2 01/28/2015 [ 402.967862] task: ffff880219b51dc0 ti: ffff8800b3408000 task.ti: ffff8800b3408000 [ 402.969046] RIP: 0010:[] [] i40e_config_rss_aq.constprop.65+0x2f/0x1c0 [i40e] [ 402.970339] RSP: 0018:ffff8800b340ba90 EFLAGS: 00010246 [ 402.971616] RAX: 0000000000000000 RBX: ffff88042ec14000 RCX: 0000000000000200 [ 402.972961] RDX: ffff880428eb9200 RSI: 0000000000000000 RDI: ffff88042ec14000 [ 402.974312] RBP: ffff8800b340baf8 R08: ffff880237ada8f0 R09: ffff880428eb9200 [ 402.975709] R10: ffff880428eb9200 R11: 0000000000000000 R12: ffff88042ec2e000 [ 402.977104] R13: ffff88042ec2e000 R14: ffff88042ec14000 R15: ffff88022ea00800 [ 402.978541] FS: 00007f84fd054700(0000) GS:ffff880237ac0000(0000) knlGS:0000000000000000 [ 402.980003] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 402.981508] CR2: 0000000000000000 CR3: 000000003289e000 CR4: 00000000000406e0 [ 402.983028] Stack: [ 402.984578] 0000000002000200 0000000000000000 ffff88023ffeda68 ffff88023ffef000 [ 402.986187] 0000000000000268 ffff8800b340bbf8 ffff88023ffedd80 0000000088ce4f1d [ 402.987844] ffff88042ec14000 ffff88022ea00800 ffff88042ec2e000 ffff88042ec14000 [ 402.989509] Call Trace: [ 402.991200] [] i40e_config_rss+0x11f/0x1c0 [i40e] [ 402.992924] [] i40e_set_rifh+0ic0/0x130 [i40e] [ 402.994684] [] ethtool_set_rifh+0x1f7/0x300 [ 402.996446] [] ? cred_has_capability+0io6b/0x100 [ 402.998203] [] ? selinux_capable+0x12/0x20 [ 402.999968] [] ? security_capable+0x4b/0x70 [ 403.001707] [] dev_ethtool+0x1423/0x2290 [ 403.003461] [] dev_ioctl+0x191/0io630 [ 403.005186] [] ? lru_cache_add+0x3a/0i80 [ 403.006942] [] ? _raw_spin_unlock+0ie/0x20 [ 403.008691] [] sock_do_ioctl+0x45/0i50 [ 403.010421] [] sock_ioctl+0x209/0x2d0 [ 403.012173] [] do_vfs_ioctl+0u4/0io6c0 [ 403.013911] [] SyS_ioctl+0x79/0x90 [ 403.015710] [] entry_SYSCALL_64_fastpath+0x1a/0u4 [ 403.017500] Code: 90 55 48 89 e5 41 57 41 56 41 55 41 54 53 48 89 fb 48 83 ec 40 4c 8b a7 e0 05 00 00 65 48 8b 04 25 28 00 00 00 48 89 45 d0 31 c0 <48> 8b 06 41 0f b7 bc 24 f2 0f 00 00 48 89 45 9c 48 8b 46 08 48 [ 403.021454] RIP [] i40e_config_rss_aq.constprop.65+0x2f/0x1c0 [i40e] [ 403.023395] RSP [ 403.025271] CR2: 0000000000000000 [ 403.027169] ---[ end trace 64561b528cf61cf0 ]--- (b) it does not even bother to use the passed in *lut parameter which defines the requested lookup table. Instead it uses its own round robin table. Fix these issues by re-writing it to be similar to i40e_config_rss_reg and i40e_get_rss_aq. Fixes: e69ff813af35 ("i40e: rework the functions to configure RSS with similar parameters", 2015-10-21) Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 57 +++++++++------------ 1 file changed, 23 insertions(+), 34 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 1692a896c3c0..2ed1e8b7e9be 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -7985,45 +7985,34 @@ static int i40e_setup_misc_vector(struct i40e_pf *pf) static int i40e_config_rss_aq(struct i40e_vsi *vsi, const u8 *seed, u8 *lut, u16 lut_size) { - struct i40e_aqc_get_set_rss_key_data rss_key; struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; - bool pf_lut = false; - u8 *rss_lut; - int ret, i; + int ret = 0; - memcpy(&rss_key, seed, sizeof(rss_key)); - - rss_lut = kzalloc(pf->rss_table_size, GFP_KERNEL); - if (!rss_lut) - return -ENOMEM; - - /* Populate the LUT with max no. of queues in round robin fashion */ - for (i = 0; i < vsi->rss_table_size; i++) - rss_lut[i] = i % vsi->rss_size; - - ret = i40e_aq_set_rss_key(hw, vsi->id, &rss_key); - if (ret) { - dev_info(&pf->pdev->dev, - "Cannot set RSS key, err %s aq_err %s\n", - i40e_stat_str(&pf->hw, ret), - i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); - goto config_rss_aq_out; + if (seed) { + struct i40e_aqc_get_set_rss_key_data *seed_dw = + (struct i40e_aqc_get_set_rss_key_data *)seed; + ret = i40e_aq_set_rss_key(hw, vsi->id, seed_dw); + if (ret) { + dev_info(&pf->pdev->dev, + "Cannot set RSS key, err %s aq_err %s\n", + i40e_stat_str(hw, ret), + i40e_aq_str(hw, hw->aq.asq_last_status)); + return ret; + } } + if (lut) { + bool pf_lut = vsi->type == I40E_VSI_MAIN ? true : false; - if (vsi->type == I40E_VSI_MAIN) - pf_lut = true; - - ret = i40e_aq_set_rss_lut(hw, vsi->id, pf_lut, rss_lut, - vsi->rss_table_size); - if (ret) - dev_info(&pf->pdev->dev, - "Cannot set RSS lut, err %s aq_err %s\n", - i40e_stat_str(&pf->hw, ret), - i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); - -config_rss_aq_out: - kfree(rss_lut); + ret = i40e_aq_set_rss_lut(hw, vsi->id, pf_lut, lut, lut_size); + if (ret) { + dev_info(&pf->pdev->dev, + "Cannot set RSS lut, err %s aq_err %s\n", + i40e_stat_str(hw, ret), + i40e_aq_str(hw, hw->aq.asq_last_status)); + return ret; + } + } return ret; } From 552b996256241198a08005a91a206b402ac234f6 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 19 Jul 2016 16:23:31 -0700 Subject: [PATCH 0363/1715] i40e: use configured RSS key and lookup table in i40e_vsi_config_rss A previous refactor added support to store user configuration for VSIs, so that extra VSIs such as for VMDq can use this information when configuring. Unfortunately the i40e_vsi_config_rss function was missed in this refactor, and the values were being ignored. Fix this by checking for the fields and using those instead of always using the default values. Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 2ed1e8b7e9be..4ec9565be605 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -8076,13 +8076,26 @@ static int i40e_vsi_config_rss(struct i40e_vsi *vsi) if (!(pf->flags & I40E_FLAG_RSS_AQ_CAPABLE)) return 0; + if (!vsi->rss_size) + vsi->rss_size = min_t(int, pf->alloc_rss_size, + vsi->num_queue_pairs); + if (!vsi->rss_size) + return -EINVAL; + lut = kzalloc(vsi->rss_table_size, GFP_KERNEL); if (!lut) return -ENOMEM; - - i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size); - netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE); - vsi->rss_size = min_t(int, pf->alloc_rss_size, vsi->num_queue_pairs); + /* Use the user configured hash keys and lookup table if there is one, + * otherwise use default + */ + if (vsi->rss_lut_user) + memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size); + else + i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size); + if (vsi->rss_hkey_user) + memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE); + else + netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE); ret = i40e_config_rss_aq(vsi, seed, lut, vsi->rss_table_size); kfree(lut); From 6992a6c9c435c10253a229ce61852f5305fae646 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 4 Aug 2016 11:37:01 -0700 Subject: [PATCH 0364/1715] i40e: use alloc_workqueue instead of create_singlethread_workqueue Replace calls to create_singlethread_workqueue instead with alloc_workqueue as is style with other Intel drivers. This provides more control over workqueue creation, and allows explicit setting of the desired mode of operation. It also makes it more obvious that driver name constant is passed to a format "%s". Change-ID: I6192b44caf5140336cd54c5b350d51c73b541fdb Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 3 ++- drivers/net/ethernet/intel/i40evf/i40evf_main.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 4ec9565be605..f355c04ee4f7 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -11605,7 +11605,8 @@ static int __init i40e_init_module(void) * it can't be any worse than using the system workqueue which * was already single threaded */ - i40e_wq = create_singlethread_workqueue(i40e_driver_name); + i40e_wq = alloc_workqueue("%s", WQ_UNBOUND | WQ_MEM_RECLAIM, 1, + i40e_driver_name); if (!i40e_wq) { pr_err("%s: Failed to create workqueue\n", i40e_driver_name); return -ENOMEM; diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index ba046d1e25e4..24f88ecdcddc 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -2833,7 +2833,8 @@ static int __init i40evf_init_module(void) pr_info("%s\n", i40evf_copyright); - i40evf_wq = create_singlethread_workqueue(i40evf_driver_name); + i40evf_wq = alloc_workqueue("%s", WQ_UNBOUND | WQ_MEM_RECLAIM, 1, + i40evf_driver_name); if (!i40evf_wq) { pr_err("%s: Failed to create workqueue\n", i40evf_driver_name); return -ENOMEM; From fe458e50711b8a3dc773a6aa1f80b477991c8bbe Mon Sep 17 00:00:00 2001 From: Mitch Williams Date: Thu, 4 Aug 2016 11:37:02 -0700 Subject: [PATCH 0365/1715] i40evf: report link speed The PF driver tells us the link speed, so do something with that information. Add link speed to log messages, and report speed through ethtool. Change-Id: I279dc9540cc5203376406050a3e8d67e128d5882 Signed-off-by: Mitch Williams Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40evf/i40evf.h | 1 + .../ethernet/intel/i40evf/i40evf_ethtool.c | 26 ++++++++- .../ethernet/intel/i40evf/i40evf_virtchnl.c | 55 ++++++++++++++++--- 3 files changed, 70 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h index 76ed97db28e2..6fa00f36bdc4 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf.h +++ b/drivers/net/ethernet/intel/i40evf/i40evf.h @@ -258,6 +258,7 @@ struct i40evf_adapter { struct work_struct watchdog_task; bool netdev_registered; bool link_up; + enum i40e_aq_link_speed link_speed; enum i40e_virtchnl_ops current_op; #define CLIENT_ENABLED(_a) ((_a)->vf_res ? \ (_a)->vf_res->vf_offload_flags & \ diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c index c9c202f6c521..e17a15456266 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c @@ -74,13 +74,33 @@ static const struct i40evf_stats i40evf_gstrings_stats[] = { static int i40evf_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd) { - /* In the future the VF will be able to query the PF for - * some information - for now use a dummy value - */ + struct i40evf_adapter *adapter = netdev_priv(netdev); + ecmd->supported = 0; ecmd->autoneg = AUTONEG_DISABLE; ecmd->transceiver = XCVR_DUMMY1; ecmd->port = PORT_NONE; + /* Set speed and duplex */ + switch (adapter->link_speed) { + case I40E_LINK_SPEED_40GB: + ethtool_cmd_speed_set(ecmd, SPEED_40000); + break; + case I40E_LINK_SPEED_20GB: + ethtool_cmd_speed_set(ecmd, SPEED_20000); + break; + case I40E_LINK_SPEED_10GB: + ethtool_cmd_speed_set(ecmd, SPEED_10000); + break; + case I40E_LINK_SPEED_1GB: + ethtool_cmd_speed_set(ecmd, SPEED_1000); + break; + case I40E_LINK_SPEED_100MB: + ethtool_cmd_speed_set(ecmd, SPEED_100); + break; + default: + break; + } + ecmd->duplex = DUPLEX_FULL; return 0; } diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c index d76c221d4c8a..cc6cb30c1667 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c @@ -816,6 +816,45 @@ void i40evf_set_rss_lut(struct i40evf_adapter *adapter) kfree(vrl); } +/** + * i40evf_print_link_message - print link up or down + * @adapter: adapter structure + * + * Log a message telling the world of our wonderous link status + */ +static void i40evf_print_link_message(struct i40evf_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + char *speed = "Unknown "; + + if (!adapter->link_up) { + netdev_info(netdev, "NIC Link is Down\n"); + return; + } + + switch (adapter->link_speed) { + case I40E_LINK_SPEED_40GB: + speed = "40 G"; + break; + case I40E_LINK_SPEED_20GB: + speed = "20 G"; + break; + case I40E_LINK_SPEED_10GB: + speed = "10 G"; + break; + case I40E_LINK_SPEED_1GB: + speed = "1000 M"; + break; + case I40E_LINK_SPEED_100MB: + speed = "100 M"; + break; + default: + break; + } + + netdev_info(netdev, "NIC Link is Up %sbps Full Duplex\n", speed); +} + /** * i40evf_request_reset * @adapter: adapter structure @@ -853,15 +892,13 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, (struct i40e_virtchnl_pf_event *)msg; switch (vpe->event) { case I40E_VIRTCHNL_EVENT_LINK_CHANGE: - adapter->link_up = - vpe->event_data.link_event.link_status; - if (adapter->link_up && !netif_carrier_ok(netdev)) { - dev_info(&adapter->pdev->dev, "NIC Link is Up\n"); - netif_carrier_on(netdev); - netif_tx_wake_all_queues(netdev); - } else if (!adapter->link_up) { - dev_info(&adapter->pdev->dev, "NIC Link is Down\n"); - netif_carrier_off(netdev); + adapter->link_speed = + vpe->event_data.link_event.link_speed; + if (adapter->link_up != + vpe->event_data.link_event.link_status) { + adapter->link_up = + vpe->event_data.link_event.link_status; + i40evf_print_link_message(adapter); netif_tx_stop_all_queues(netdev); } break; From ffeac83685facb6f0829b898d567056eae8097f8 Mon Sep 17 00:00:00 2001 From: Carolyn Wyborny Date: Thu, 4 Aug 2016 11:37:03 -0700 Subject: [PATCH 0366/1715] i40e: refactor tail_bump check This patch refactors tail bump check. Change-ID: Ide0e19171d67d90cb2b06b8dcd4fa791ae120160 Signed-off-by: Carolyn Wyborny Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 6 ++---- drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 6 ++---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index df7ecc9578c9..f8d66236fcbf 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -2840,10 +2840,9 @@ static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, I40E_TXD_QW1_CMD_SHIFT); /* notify HW of packet */ - if (!tail_bump) + if (!tail_bump) { prefetchw(tx_desc + 1); - - if (tail_bump) { + } else { /* Force memory writes to complete before letting h/w * know there are new descriptors to fetch. (Only * applicable for weak-ordered memory model archs, @@ -2852,7 +2851,6 @@ static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, wmb(); writel(i, tx_ring->tail); } - return; dma_error: diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index a579193b2c21..0130458264e5 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -2068,10 +2068,9 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, I40E_TXD_QW1_CMD_SHIFT); /* notify HW of packet */ - if (!tail_bump) + if (!tail_bump) { prefetchw(tx_desc + 1); - - if (tail_bump) { + } else { /* Force memory writes to complete before letting h/w * know there are new descriptors to fetch. (Only * applicable for weak-ordered memory model archs, @@ -2080,7 +2079,6 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, wmb(); writel(i, tx_ring->tail); } - return; dma_error: From b7eeef495f62412bc8b0a9f8c94710653e4cdd28 Mon Sep 17 00:00:00 2001 From: Carolyn Wyborny Date: Thu, 4 Aug 2016 11:37:04 -0700 Subject: [PATCH 0367/1715] i40e: Fix byte ordering in ARP NS code for X722 This patch fixes byte ordering problems found when enabling this feature support. Without this patch, the feature will not work correctly. This patch fixes the definitions to have the correct byte order. Change-ID: Ic7489fbcbe2195df7be62ff5e359201b827cefe6 Signed-off-by: Carolyn Wyborny Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- .../net/ethernet/intel/i40e/i40e_adminq_cmd.h | 38 ++++++++++--------- .../ethernet/intel/i40evf/i40e_adminq_cmd.h | 38 ++++++++++--------- 2 files changed, 42 insertions(+), 34 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h index 11cf1a5ebccf..5c4bf760c730 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h @@ -450,13 +450,15 @@ I40E_CHECK_CMD_LENGTH(i40e_aqc_cppm_configuration); /* Set ARP Proxy command / response (indirect 0x0104) */ struct i40e_aqc_arp_proxy_data { __le16 command_flags; -#define I40E_AQ_ARP_INIT_IPV4 0x0008 -#define I40E_AQ_ARP_UNSUP_CTL 0x0010 -#define I40E_AQ_ARP_ENA 0x0020 -#define I40E_AQ_ARP_ADD_IPV4 0x0040 -#define I40E_AQ_ARP_DEL_IPV4 0x0080 +#define I40E_AQ_ARP_INIT_IPV4 0x0800 +#define I40E_AQ_ARP_UNSUP_CTL 0x1000 +#define I40E_AQ_ARP_ENA 0x2000 +#define I40E_AQ_ARP_ADD_IPV4 0x4000 +#define I40E_AQ_ARP_DEL_IPV4 0x8000 __le16 table_id; - __le32 pfpm_proxyfc; + __le32 enabled_offloads; +#define I40E_AQ_ARP_DIRECTED_OFFLOAD_ENABLE 0x00000020 +#define I40E_AQ_ARP_OFFLOAD_ENABLE 0x00000800 __le32 ip_addr; u8 mac_addr[6]; u8 reserved[2]; @@ -471,17 +473,19 @@ struct i40e_aqc_ns_proxy_data { __le16 table_idx_ipv6_0; __le16 table_idx_ipv6_1; __le16 control; -#define I40E_AQ_NS_PROXY_ADD_0 0x0100 -#define I40E_AQ_NS_PROXY_DEL_0 0x0200 -#define I40E_AQ_NS_PROXY_ADD_1 0x0400 -#define I40E_AQ_NS_PROXY_DEL_1 0x0800 -#define I40E_AQ_NS_PROXY_ADD_IPV6_0 0x1000 -#define I40E_AQ_NS_PROXY_DEL_IPV6_0 0x2000 -#define I40E_AQ_NS_PROXY_ADD_IPV6_1 0x4000 -#define I40E_AQ_NS_PROXY_DEL_IPV6_1 0x8000 -#define I40E_AQ_NS_PROXY_COMMAND_SEQ 0x0001 -#define I40E_AQ_NS_PROXY_INIT_IPV6_TBL 0x0002 -#define I40E_AQ_NS_PROXY_INIT_MAC_TBL 0x0004 +#define I40E_AQ_NS_PROXY_ADD_0 0x0001 +#define I40E_AQ_NS_PROXY_DEL_0 0x0002 +#define I40E_AQ_NS_PROXY_ADD_1 0x0004 +#define I40E_AQ_NS_PROXY_DEL_1 0x0008 +#define I40E_AQ_NS_PROXY_ADD_IPV6_0 0x0010 +#define I40E_AQ_NS_PROXY_DEL_IPV6_0 0x0020 +#define I40E_AQ_NS_PROXY_ADD_IPV6_1 0x0040 +#define I40E_AQ_NS_PROXY_DEL_IPV6_1 0x0080 +#define I40E_AQ_NS_PROXY_COMMAND_SEQ 0x0100 +#define I40E_AQ_NS_PROXY_INIT_IPV6_TBL 0x0200 +#define I40E_AQ_NS_PROXY_INIT_MAC_TBL 0x0400 +#define I40E_AQ_NS_PROXY_OFFLOAD_ENABLE 0x0800 +#define I40E_AQ_NS_PROXY_DIRECTED_OFFLOAD_ENABLE 0x1000 u8 mac_addr_0[6]; u8 mac_addr_1[6]; u8 local_mac_addr[6]; diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h index 3114dcfa1724..004565090335 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h @@ -447,13 +447,15 @@ I40E_CHECK_CMD_LENGTH(i40e_aqc_cppm_configuration); /* Set ARP Proxy command / response (indirect 0x0104) */ struct i40e_aqc_arp_proxy_data { __le16 command_flags; -#define I40E_AQ_ARP_INIT_IPV4 0x0008 -#define I40E_AQ_ARP_UNSUP_CTL 0x0010 -#define I40E_AQ_ARP_ENA 0x0020 -#define I40E_AQ_ARP_ADD_IPV4 0x0040 -#define I40E_AQ_ARP_DEL_IPV4 0x0080 +#define I40E_AQ_ARP_INIT_IPV4 0x0800 +#define I40E_AQ_ARP_UNSUP_CTL 0x1000 +#define I40E_AQ_ARP_ENA 0x2000 +#define I40E_AQ_ARP_ADD_IPV4 0x4000 +#define I40E_AQ_ARP_DEL_IPV4 0x8000 __le16 table_id; - __le32 pfpm_proxyfc; + __le32 enabled_offloads; +#define I40E_AQ_ARP_DIRECTED_OFFLOAD_ENABLE 0x00000020 +#define I40E_AQ_ARP_OFFLOAD_ENABLE 0x00000800 __le32 ip_addr; u8 mac_addr[6]; u8 reserved[2]; @@ -468,17 +470,19 @@ struct i40e_aqc_ns_proxy_data { __le16 table_idx_ipv6_0; __le16 table_idx_ipv6_1; __le16 control; -#define I40E_AQ_NS_PROXY_ADD_0 0x0100 -#define I40E_AQ_NS_PROXY_DEL_0 0x0200 -#define I40E_AQ_NS_PROXY_ADD_1 0x0400 -#define I40E_AQ_NS_PROXY_DEL_1 0x0800 -#define I40E_AQ_NS_PROXY_ADD_IPV6_0 0x1000 -#define I40E_AQ_NS_PROXY_DEL_IPV6_0 0x2000 -#define I40E_AQ_NS_PROXY_ADD_IPV6_1 0x4000 -#define I40E_AQ_NS_PROXY_DEL_IPV6_1 0x8000 -#define I40E_AQ_NS_PROXY_COMMAND_SEQ 0x0001 -#define I40E_AQ_NS_PROXY_INIT_IPV6_TBL 0x0002 -#define I40E_AQ_NS_PROXY_INIT_MAC_TBL 0x0004 +#define I40E_AQ_NS_PROXY_ADD_0 0x0001 +#define I40E_AQ_NS_PROXY_DEL_0 0x0002 +#define I40E_AQ_NS_PROXY_ADD_1 0x0004 +#define I40E_AQ_NS_PROXY_DEL_1 0x0008 +#define I40E_AQ_NS_PROXY_ADD_IPV6_0 0x0010 +#define I40E_AQ_NS_PROXY_DEL_IPV6_0 0x0020 +#define I40E_AQ_NS_PROXY_ADD_IPV6_1 0x0040 +#define I40E_AQ_NS_PROXY_DEL_IPV6_1 0x0080 +#define I40E_AQ_NS_PROXY_COMMAND_SEQ 0x0100 +#define I40E_AQ_NS_PROXY_INIT_IPV6_TBL 0x0200 +#define I40E_AQ_NS_PROXY_INIT_MAC_TBL 0x0400 +#define I40E_AQ_NS_PROXY_OFFLOAD_ENABLE 0x0800 +#define I40E_AQ_NS_PROXY_DIRECTED_OFFLOAD_ENABLE 0x1000 u8 mac_addr_0[6]; u8 mac_addr_1[6]; u8 local_mac_addr[6]; From 7d94906bee43384be85b767d41718463f3630869 Mon Sep 17 00:00:00 2001 From: Carolyn Wyborny Date: Thu, 4 Aug 2016 11:37:05 -0700 Subject: [PATCH 0368/1715] i40e: Add support for HMC resource and profile for X722 This patch adds support for HMC resource and profile cmds for X722 firmware. Change-ID: Icc332101f38ab15d1bfa167823100eb4f6822f7e Signed-off-by: Carolyn Wyborny Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- .../net/ethernet/intel/i40e/i40e_adminq_cmd.h | 21 +++++++++++++++++++ .../ethernet/intel/i40evf/i40e_adminq_cmd.h | 21 +++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h index 5c4bf760c730..67e396b2b347 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h @@ -204,6 +204,9 @@ enum i40e_admin_queue_opc { i40e_aqc_opc_suspend_port_tx = 0x041B, i40e_aqc_opc_resume_port_tx = 0x041C, i40e_aqc_opc_configure_partition_bw = 0x041D, + /* hmc */ + i40e_aqc_opc_query_hmc_resource_profile = 0x0500, + i40e_aqc_opc_set_hmc_resource_profile = 0x0501, /* phy commands*/ i40e_aqc_opc_get_phy_abilities = 0x0600, @@ -1586,6 +1589,24 @@ struct i40e_aqc_configure_partition_bw_data { I40E_CHECK_STRUCT_LEN(0x22, i40e_aqc_configure_partition_bw_data); +/* Get and set the active HMC resource profile and status. + * (direct 0x0500) and (direct 0x0501) + */ +struct i40e_aq_get_set_hmc_resource_profile { + u8 pm_profile; + u8 pe_vf_enabled; + u8 reserved[14]; +}; + +I40E_CHECK_CMD_LENGTH(i40e_aq_get_set_hmc_resource_profile); + +enum i40e_aq_hmc_profile { + /* I40E_HMC_PROFILE_NO_CHANGE = 0, reserved */ + I40E_HMC_PROFILE_DEFAULT = 1, + I40E_HMC_PROFILE_FAVOR_VF = 2, + I40E_HMC_PROFILE_EQUAL = 3, +}; + /* Get PHY Abilities (indirect 0x0600) uses the generic indirect struct */ /* set in param0 for get phy abilities to report qualified modules */ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h index 004565090335..40b0eafd0c71 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h @@ -204,6 +204,9 @@ enum i40e_admin_queue_opc { i40e_aqc_opc_suspend_port_tx = 0x041B, i40e_aqc_opc_resume_port_tx = 0x041C, i40e_aqc_opc_configure_partition_bw = 0x041D, + /* hmc */ + i40e_aqc_opc_query_hmc_resource_profile = 0x0500, + i40e_aqc_opc_set_hmc_resource_profile = 0x0501, /* phy commands*/ i40e_aqc_opc_get_phy_abilities = 0x0600, @@ -1583,6 +1586,24 @@ struct i40e_aqc_configure_partition_bw_data { I40E_CHECK_STRUCT_LEN(0x22, i40e_aqc_configure_partition_bw_data); +/* Get and set the active HMC resource profile and status. + * (direct 0x0500) and (direct 0x0501) + */ +struct i40e_aq_get_set_hmc_resource_profile { + u8 pm_profile; + u8 pe_vf_enabled; + u8 reserved[14]; +}; + +I40E_CHECK_CMD_LENGTH(i40e_aq_get_set_hmc_resource_profile); + +enum i40e_aq_hmc_profile { + /* I40E_HMC_PROFILE_NO_CHANGE = 0, reserved */ + I40E_HMC_PROFILE_DEFAULT = 1, + I40E_HMC_PROFILE_FAVOR_VF = 2, + I40E_HMC_PROFILE_EQUAL = 3, +}; + /* Get PHY Abilities (indirect 0x0600) uses the generic indirect struct */ /* set in param0 for get phy abilities to report qualified modules */ From fbcfac34167328f9a063e6abd03298afabfe228b Mon Sep 17 00:00:00 2001 From: Heinrich Schuchardt Date: Thu, 11 Aug 2016 01:07:22 +0200 Subject: [PATCH 0369/1715] i40e: use matching format identifiers i is defined as int but output as %u several times. Adjust the format identifiers. Signed-off-by: Heinrich Schuchardt Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- .../net/ethernet/intel/i40e/i40e_ethtool.c | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 5bd384807476..1835186b62c9 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -1560,13 +1560,13 @@ static void i40e_get_strings(struct net_device *netdev, u32 stringset, } #endif for (i = 0; i < vsi->num_queue_pairs; i++) { - snprintf(p, ETH_GSTRING_LEN, "tx-%u.tx_packets", i); + snprintf(p, ETH_GSTRING_LEN, "tx-%d.tx_packets", i); p += ETH_GSTRING_LEN; - snprintf(p, ETH_GSTRING_LEN, "tx-%u.tx_bytes", i); + snprintf(p, ETH_GSTRING_LEN, "tx-%d.tx_bytes", i); p += ETH_GSTRING_LEN; - snprintf(p, ETH_GSTRING_LEN, "rx-%u.rx_packets", i); + snprintf(p, ETH_GSTRING_LEN, "rx-%d.rx_packets", i); p += ETH_GSTRING_LEN; - snprintf(p, ETH_GSTRING_LEN, "rx-%u.rx_bytes", i); + snprintf(p, ETH_GSTRING_LEN, "rx-%d.rx_bytes", i); p += ETH_GSTRING_LEN; } if (vsi != pf->vsi[pf->lan_vsi] || pf->hw.partition_id != 1) @@ -1581,16 +1581,16 @@ static void i40e_get_strings(struct net_device *netdev, u32 stringset, } for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { snprintf(p, ETH_GSTRING_LEN, - "veb.tc_%u_tx_packets", i); + "veb.tc_%d_tx_packets", i); p += ETH_GSTRING_LEN; snprintf(p, ETH_GSTRING_LEN, - "veb.tc_%u_tx_bytes", i); + "veb.tc_%d_tx_bytes", i); p += ETH_GSTRING_LEN; snprintf(p, ETH_GSTRING_LEN, - "veb.tc_%u_rx_packets", i); + "veb.tc_%d_rx_packets", i); p += ETH_GSTRING_LEN; snprintf(p, ETH_GSTRING_LEN, - "veb.tc_%u_rx_bytes", i); + "veb.tc_%d_rx_bytes", i); p += ETH_GSTRING_LEN; } } @@ -1601,23 +1601,23 @@ static void i40e_get_strings(struct net_device *netdev, u32 stringset, } for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) { snprintf(p, ETH_GSTRING_LEN, - "port.tx_priority_%u_xon", i); + "port.tx_priority_%d_xon", i); p += ETH_GSTRING_LEN; snprintf(p, ETH_GSTRING_LEN, - "port.tx_priority_%u_xoff", i); + "port.tx_priority_%d_xoff", i); p += ETH_GSTRING_LEN; } for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) { snprintf(p, ETH_GSTRING_LEN, - "port.rx_priority_%u_xon", i); + "port.rx_priority_%d_xon", i); p += ETH_GSTRING_LEN; snprintf(p, ETH_GSTRING_LEN, - "port.rx_priority_%u_xoff", i); + "port.rx_priority_%d_xoff", i); p += ETH_GSTRING_LEN; } for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) { snprintf(p, ETH_GSTRING_LEN, - "port.rx_priority_%u_xon_2_xoff", i); + "port.rx_priority_%d_xon_2_xoff", i); p += ETH_GSTRING_LEN; } /* BUG_ON(p - data != I40E_STATS_LEN * ETH_GSTRING_LEN); */ From 91cdca4faa3e4085b37cc084b15955bb20ca2e31 Mon Sep 17 00:00:00 2001 From: Catherine Sullivan Date: Mon, 15 Aug 2016 14:17:18 -0700 Subject: [PATCH 0370/1715] i40e: Check client is open before calling client ops We were having a race between the completion of the client open and calls to the client ops so don't call a client op unless we are sure the client is open. Testing Hints: Load IWARP driver and make sure it works as expected. Change-Id: I741f4f2aa4fcbfdad3e40dabbbb1b005856c396b Signed-off-by: Catherine Sullivan Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_client.c | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c index 677dae54918d..1035f885d57b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_client.c +++ b/drivers/net/ethernet/intel/i40e/i40e_client.c @@ -148,6 +148,11 @@ i40e_notify_client_of_vf_msg(struct i40e_vsi *vsi, u32 vf_id, u8 *msg, u16 len) "Cannot locate client instance virtual channel receive routine\n"); continue; } + if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED, + &cdev->state)) { + dev_dbg(&vsi->back->pdev->dev, "Client is not open, abort virtchnl_receive\n"); + continue; + } cdev->client->ops->virtchnl_receive(&cdev->lan_info, cdev->client, vf_id, msg, len); @@ -181,6 +186,11 @@ void i40e_notify_client_of_l2_param_changes(struct i40e_vsi *vsi) "Cannot locate client instance l2_param_change routine\n"); continue; } + if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED, + &cdev->state)) { + dev_dbg(&vsi->back->pdev->dev, "Client is not open, abort l2 param change\n"); + continue; + } cdev->lan_info.params = params; cdev->client->ops->l2_param_change(&cdev->lan_info, cdev->client, @@ -298,6 +308,11 @@ void i40e_notify_client_of_vf_reset(struct i40e_pf *pf, u32 vf_id) "Cannot locate client instance VF reset routine\n"); continue; } + if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED, + &cdev->state)) { + dev_dbg(&pf->pdev->dev, "Client is not open, abort vf-reset\n"); + continue; + } cdev->client->ops->vf_reset(&cdev->lan_info, cdev->client, vf_id); } @@ -328,6 +343,11 @@ void i40e_notify_client_of_vf_enable(struct i40e_pf *pf, u32 num_vfs) "Cannot locate client instance VF enable routine\n"); continue; } + if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED, + &cdev->state)) { + dev_dbg(&pf->pdev->dev, "Client is not open, abort vf-enable\n"); + continue; + } cdev->client->ops->vf_enable(&cdev->lan_info, cdev->client, num_vfs); } @@ -362,6 +382,11 @@ int i40e_vf_client_capable(struct i40e_pf *pf, u32 vf_id, "Cannot locate client instance VF capability routine\n"); continue; } + if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED, + &cdev->state)) { + dev_dbg(&pf->pdev->dev, "Client is not open, abort vf-capable\n"); + continue; + } capable = cdev->client->ops->vf_capable(&cdev->lan_info, cdev->client, vf_id); From 682d11d7001e61710b282df51a51250a57514772 Mon Sep 17 00:00:00 2001 From: Harshitha Ramamurthy Date: Mon, 15 Aug 2016 14:17:19 -0700 Subject: [PATCH 0371/1715] i40e: Initialize pointer in client_release function The function i40e_client_release has a print statement that uses an adapter pointer which is not initialized if a previous if statement is not true. Hence, intialize it in the right place. Change-ID: I1afdaa2c46771ac42be56edcc41bb56b455b06c8 Signed-off-by: Harshitha Ramamurthy Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_client.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c index 1035f885d57b..09a37cfcffd6 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_client.c +++ b/drivers/net/ethernet/intel/i40e/i40e_client.c @@ -662,7 +662,7 @@ int i40e_lan_del_device(struct i40e_pf *pf) static int i40e_client_release(struct i40e_client *client) { struct i40e_client_instance *cdev, *tmp; - struct i40e_pf *pf = NULL; + struct i40e_pf *pf; int ret = 0; LIST_HEAD(cdevs_tmp); @@ -672,12 +672,12 @@ static int i40e_client_release(struct i40e_client *client) if (strncmp(cdev->client->name, client->name, I40E_CLIENT_STR_LENGTH)) continue; + pf = (struct i40e_pf *)cdev->lan_info.pf; if (test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state)) { if (atomic_read(&cdev->ref_cnt) > 0) { ret = I40E_ERR_NOT_READY; goto out; } - pf = (struct i40e_pf *)cdev->lan_info.pf; if (client->ops && client->ops->close) client->ops->close(&cdev->lan_info, client, false); From 35f5034f8e4bb4bce0c95a2aea2c719afbf8201f Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 17 Aug 2016 03:37:31 -0700 Subject: [PATCH 0372/1715] i40e: Remove XSTRINGIFY macro definitions and uses Use __stringify instead. Signed-off-by: Joe Perches Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e.h | 4 ---- drivers/net/ethernet/intel/i40e/i40e_client.h | 6 +++--- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 8dc98c273663..747ef2d504f3 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -120,10 +120,6 @@ #define I40E_CURRENT_NVM_VERSION_HI 0x2 #define I40E_CURRENT_NVM_VERSION_LO 0x40 -/* magic for getting defines into strings */ -#define STRINGIFY(foo) #foo -#define XSTRINGIFY(bar) STRINGIFY(bar) - #define I40E_RX_DESC(R, i) \ (&(((union i40e_32byte_rx_desc *)((R)->desc))[i])) #define I40E_TX_DESC(R, i) \ diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.h b/drivers/net/ethernet/intel/i40e/i40e_client.h index a4601d97fb24..38a6c36a6a0e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_client.h +++ b/drivers/net/ethernet/intel/i40e/i40e_client.h @@ -36,9 +36,9 @@ #define I40E_CLIENT_VERSION_MINOR 01 #define I40E_CLIENT_VERSION_BUILD 00 #define I40E_CLIENT_VERSION_STR \ - XSTRINGIFY(I40E_CLIENT_VERSION_MAJOR) "." \ - XSTRINGIFY(I40E_CLIENT_VERSION_MINOR) "." \ - XSTRINGIFY(I40E_CLIENT_VERSION_BUILD) + __stringify(I40E_CLIENT_VERSION_MAJOR) "." \ + __stringify(I40E_CLIENT_VERSION_MINOR) "." \ + __stringify(I40E_CLIENT_VERSION_BUILD) struct i40e_client_version { u8 major; From 0d8ab54a4119880bb9cb4680da1f5e9068d0d333 Mon Sep 17 00:00:00 2001 From: Avinash Dayanand Date: Wed, 17 Aug 2016 16:04:06 -0700 Subject: [PATCH 0373/1715] i40e: Correcting mutex usage in client code Correcting the mutex usage, in client_subtask(), mutex_unlock has to be called just before client_del_instance() since this function opens and later closes the same mutex again. Similarly in client_is_registered removing the mutex since it closes the mutex twice. This is a patch suggested by RDMA team. Change-ID: Icce519c266e4221b8a2a72a15ba5bf01750e5852 Signed-off-by: Avinash Dayanand Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_client.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c index 09a37cfcffd6..5404b32c9adf 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_client.c +++ b/drivers/net/ethernet/intel/i40e/i40e_client.c @@ -576,6 +576,7 @@ void i40e_client_subtask(struct i40e_pf *pf) set_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state); } else { /* remove client instance */ + mutex_unlock(&i40e_client_instance_mutex); i40e_client_del_instance(pf, client); atomic_dec(&client->ref_cnt); continue; From c57c99597376c2a223b1ff472f614211ade44e76 Mon Sep 17 00:00:00 2001 From: Jeff Kirsher Date: Fri, 19 Aug 2016 21:47:41 -0700 Subject: [PATCH 0374/1715] i40e/i40evf: Fix indentation Several defines and code comments were indented with spaces instead of tabs, correct the issue to make indentation consistent. Change-ID: I0dc6bbb990ec4a9e856acc9ec526d876181f092c Signed-off-by: Jeff Kirsher Tested-by: Andrew Bowers --- drivers/net/ethernet/intel/i40e/i40e.h | 129 +++++++++++---------- drivers/net/ethernet/intel/i40evf/i40evf.h | 57 ++++----- 2 files changed, 95 insertions(+), 91 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 747ef2d504f3..19103a6a7dcc 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -65,72 +65,72 @@ #include "i40e_dcb.h" /* Useful i40e defaults */ -#define I40E_MAX_VEB 16 +#define I40E_MAX_VEB 16 -#define I40E_MAX_NUM_DESCRIPTORS 4096 -#define I40E_MAX_CSR_SPACE (4 * 1024 * 1024 - 64 * 1024) -#define I40E_DEFAULT_NUM_DESCRIPTORS 512 -#define I40E_REQ_DESCRIPTOR_MULTIPLE 32 -#define I40E_MIN_NUM_DESCRIPTORS 64 -#define I40E_MIN_MSIX 2 -#define I40E_DEFAULT_NUM_VMDQ_VSI 8 /* max 256 VSIs */ -#define I40E_MIN_VSI_ALLOC 51 /* LAN, ATR, FCOE, 32 VF, 16 VMDQ */ +#define I40E_MAX_NUM_DESCRIPTORS 4096 +#define I40E_MAX_CSR_SPACE (4 * 1024 * 1024 - 64 * 1024) +#define I40E_DEFAULT_NUM_DESCRIPTORS 512 +#define I40E_REQ_DESCRIPTOR_MULTIPLE 32 +#define I40E_MIN_NUM_DESCRIPTORS 64 +#define I40E_MIN_MSIX 2 +#define I40E_DEFAULT_NUM_VMDQ_VSI 8 /* max 256 VSIs */ +#define I40E_MIN_VSI_ALLOC 51 /* LAN, ATR, FCOE, 32 VF, 16 VMDQ */ /* max 16 qps */ #define i40e_default_queues_per_vmdq(pf) \ (((pf)->flags & I40E_FLAG_RSS_AQ_CAPABLE) ? 4 : 1) -#define I40E_DEFAULT_QUEUES_PER_VF 4 -#define I40E_DEFAULT_QUEUES_PER_TC 1 /* should be a power of 2 */ +#define I40E_DEFAULT_QUEUES_PER_VF 4 +#define I40E_DEFAULT_QUEUES_PER_TC 1 /* should be a power of 2 */ #define i40e_pf_get_max_q_per_tc(pf) \ (((pf)->flags & I40E_FLAG_128_QP_RSS_CAPABLE) ? 128 : 64) -#define I40E_FDIR_RING 0 -#define I40E_FDIR_RING_COUNT 32 +#define I40E_FDIR_RING 0 +#define I40E_FDIR_RING_COUNT 32 #ifdef I40E_FCOE -#define I40E_DEFAULT_FCOE 8 /* default number of QPs for FCoE */ -#define I40E_MINIMUM_FCOE 1 /* minimum number of QPs for FCoE */ +#define I40E_DEFAULT_FCOE 8 /* default number of QPs for FCoE */ +#define I40E_MINIMUM_FCOE 1 /* minimum number of QPs for FCoE */ #endif /* I40E_FCOE */ -#define I40E_MAX_AQ_BUF_SIZE 4096 -#define I40E_AQ_LEN 256 -#define I40E_AQ_WORK_LIMIT 66 /* max number of VFs + a little */ -#define I40E_MAX_USER_PRIORITY 8 -#define I40E_DEFAULT_MSG_ENABLE 4 -#define I40E_QUEUE_WAIT_RETRY_LIMIT 10 -#define I40E_INT_NAME_STR_LEN (IFNAMSIZ + 16) +#define I40E_MAX_AQ_BUF_SIZE 4096 +#define I40E_AQ_LEN 256 +#define I40E_AQ_WORK_LIMIT 66 /* max number of VFs + a little */ +#define I40E_MAX_USER_PRIORITY 8 +#define I40E_DEFAULT_MSG_ENABLE 4 +#define I40E_QUEUE_WAIT_RETRY_LIMIT 10 +#define I40E_INT_NAME_STR_LEN (IFNAMSIZ + 16) /* Ethtool Private Flags */ -#define I40E_PRIV_FLAGS_MFP_FLAG BIT(0) -#define I40E_PRIV_FLAGS_LINKPOLL_FLAG BIT(1) +#define I40E_PRIV_FLAGS_MFP_FLAG BIT(0) +#define I40E_PRIV_FLAGS_LINKPOLL_FLAG BIT(1) #define I40E_PRIV_FLAGS_FD_ATR BIT(2) #define I40E_PRIV_FLAGS_VEB_STATS BIT(3) #define I40E_PRIV_FLAGS_HW_ATR_EVICT BIT(4) #define I40E_PRIV_FLAGS_TRUE_PROMISC_SUPPORT BIT(5) -#define I40E_NVM_VERSION_LO_SHIFT 0 -#define I40E_NVM_VERSION_LO_MASK (0xff << I40E_NVM_VERSION_LO_SHIFT) -#define I40E_NVM_VERSION_HI_SHIFT 12 -#define I40E_NVM_VERSION_HI_MASK (0xf << I40E_NVM_VERSION_HI_SHIFT) -#define I40E_OEM_VER_BUILD_MASK 0xffff -#define I40E_OEM_VER_PATCH_MASK 0xff -#define I40E_OEM_VER_BUILD_SHIFT 8 -#define I40E_OEM_VER_SHIFT 24 +#define I40E_NVM_VERSION_LO_SHIFT 0 +#define I40E_NVM_VERSION_LO_MASK (0xff << I40E_NVM_VERSION_LO_SHIFT) +#define I40E_NVM_VERSION_HI_SHIFT 12 +#define I40E_NVM_VERSION_HI_MASK (0xf << I40E_NVM_VERSION_HI_SHIFT) +#define I40E_OEM_VER_BUILD_MASK 0xffff +#define I40E_OEM_VER_PATCH_MASK 0xff +#define I40E_OEM_VER_BUILD_SHIFT 8 +#define I40E_OEM_VER_SHIFT 24 #define I40E_PHY_DEBUG_ALL \ (I40E_AQ_PHY_DEBUG_DISABLE_LINK_FW | \ I40E_AQ_PHY_DEBUG_DISABLE_ALL_LINK_FW) /* The values in here are decimal coded as hex as is the case in the NVM map*/ -#define I40E_CURRENT_NVM_VERSION_HI 0x2 -#define I40E_CURRENT_NVM_VERSION_LO 0x40 +#define I40E_CURRENT_NVM_VERSION_HI 0x2 +#define I40E_CURRENT_NVM_VERSION_LO 0x40 -#define I40E_RX_DESC(R, i) \ +#define I40E_RX_DESC(R, i) \ (&(((union i40e_32byte_rx_desc *)((R)->desc))[i])) -#define I40E_TX_DESC(R, i) \ +#define I40E_TX_DESC(R, i) \ (&(((struct i40e_tx_desc *)((R)->desc))[i])) -#define I40E_TX_CTXTDESC(R, i) \ +#define I40E_TX_CTXTDESC(R, i) \ (&(((struct i40e_tx_context_desc *)((R)->desc))[i])) -#define I40E_TX_FDIRDESC(R, i) \ +#define I40E_TX_FDIRDESC(R, i) \ (&(((struct i40e_filter_program_desc *)((R)->desc))[i])) /* default to trying for four seconds */ -#define I40E_TRY_LINK_TIMEOUT (4 * HZ) +#define I40E_TRY_LINK_TIMEOUT (4 * HZ) /** * i40e_is_mac_710 - Return true if MAC is X710/XL710 @@ -195,9 +195,9 @@ struct i40e_lump_tracking { #define I40E_FDIR_BUFFER_HEAD_ROOM 32 #define I40E_FDIR_BUFFER_HEAD_ROOM_FOR_ATR (I40E_FDIR_BUFFER_HEAD_ROOM * 4) -#define I40E_HKEY_ARRAY_SIZE ((I40E_PFQF_HKEY_MAX_INDEX + 1) * 4) -#define I40E_HLUT_ARRAY_SIZE ((I40E_PFQF_HLUT_MAX_INDEX + 1) * 4) -#define I40E_VF_HLUT_ARRAY_SIZE ((I40E_VFQF_HLUT1_MAX_INDEX + 1) * 4) +#define I40E_HKEY_ARRAY_SIZE ((I40E_PFQF_HKEY_MAX_INDEX + 1) * 4) +#define I40E_HLUT_ARRAY_SIZE ((I40E_PFQF_HLUT_MAX_INDEX + 1) * 4) +#define I40E_VF_HLUT_ARRAY_SIZE ((I40E_VFQF_HLUT1_MAX_INDEX + 1) * 4) enum i40e_fd_stat_idx { I40E_FD_STAT_ATR, @@ -383,8 +383,8 @@ struct i40e_pf { struct mutex switch_mutex; u16 lan_vsi; /* our default LAN VSI */ u16 lan_veb; /* initial relay, if exists */ -#define I40E_NO_VEB 0xffff -#define I40E_NO_VSI 0xffff +#define I40E_NO_VEB 0xffff +#define I40E_NO_VSI 0xffff u16 next_vsi; /* Next unallocated VSI - 0-based! */ struct i40e_vsi **vsi; struct i40e_veb *veb[I40E_MAX_VEB]; @@ -419,8 +419,8 @@ struct i40e_pf { */ u16 dcbx_cap; - u32 fcoe_hmc_filt_num; - u32 fcoe_hmc_cntx_num; + u32 fcoe_hmc_filt_num; + u32 fcoe_hmc_cntx_num; struct i40e_filter_control_settings filter_settings; struct ptp_clock *ptp_clock; @@ -466,10 +466,10 @@ struct i40e_mac_filter { struct i40e_veb { struct i40e_pf *pf; u16 idx; - u16 veb_idx; /* index of VEB parent */ + u16 veb_idx; /* index of VEB parent */ u16 seid; u16 uplink_seid; - u16 stats_idx; /* index of VEB parent */ + u16 stats_idx; /* index of VEB parent */ u8 enabled_tc; u16 bridge_mode; /* Bridge Mode (VEB/VEPA) */ u16 flags; @@ -530,12 +530,13 @@ struct i40e_vsi { u32 promisc_threshold; u16 work_limit; - u16 int_rate_limit; /* value in usecs */ + u16 int_rate_limit; /* value in usecs */ + + u16 rss_table_size; /* HW RSS table size */ + u16 rss_size; /* Allocated RSS queues */ + u8 *rss_hkey_user; /* User configured hash keys */ + u8 *rss_lut_user; /* User configured lookup table entries */ - u16 rss_table_size; /* HW RSS table size */ - u16 rss_size; /* Allocated RSS queues */ - u8 *rss_hkey_user; /* User configured hash keys */ - u8 *rss_lut_user; /* User configured lookup table entries */ u16 max_frame; u16 rx_buf_len; @@ -546,14 +547,14 @@ struct i40e_vsi { int base_vector; bool irqs_ready; - u16 seid; /* HW index of this VSI (absolute index) */ - u16 id; /* VSI number */ + u16 seid; /* HW index of this VSI (absolute index) */ + u16 id; /* VSI number */ u16 uplink_seid; - u16 base_queue; /* vsi's first queue in hw array */ - u16 alloc_queue_pairs; /* Allocated Tx/Rx queues */ - u16 req_queue_pairs; /* User requested queue pairs */ - u16 num_queue_pairs; /* Used tx and rx pairs */ + u16 base_queue; /* vsi's first queue in hw array */ + u16 alloc_queue_pairs; /* Allocated Tx/Rx queues */ + u16 req_queue_pairs; /* User requested queue pairs */ + u16 num_queue_pairs; /* Used tx and rx pairs */ u16 num_desc; enum i40e_vsi_type type; /* VSI type, e.g., LAN, FCoE, etc */ s16 vf_id; /* Virtual function ID for SRIOV VSIs */ @@ -572,11 +573,11 @@ struct i40e_vsi { /* TC BW limit max quanta within VSI */ u8 bw_ets_max_quanta[I40E_MAX_TRAFFIC_CLASS]; - struct i40e_pf *back; /* Backreference to associated PF */ - u16 idx; /* index in pf->vsi[] */ - u16 veb_idx; /* index of VEB parent */ - struct kobject *kobj; /* sysfs object */ - bool current_isup; /* Sync 'link up' logging */ + struct i40e_pf *back; /* Backreference to associated PF */ + u16 idx; /* index in pf->vsi[] */ + u16 veb_idx; /* index of VEB parent */ + struct kobject *kobj; /* sysfs object */ + bool current_isup; /* Sync 'link up' logging */ void *priv; /* client driver data reference. */ diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h index 6fa00f36bdc4..dc00aaf94687 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf.h +++ b/drivers/net/ethernet/intel/i40evf/i40evf.h @@ -71,20 +71,20 @@ struct i40e_vsi { /* How many Rx Buffers do we bundle into one write to the hardware ? */ #define I40EVF_RX_BUFFER_WRITE 16 /* Must be power of 2 */ -#define I40EVF_DEFAULT_TXD 512 -#define I40EVF_DEFAULT_RXD 512 -#define I40EVF_MAX_TXD 4096 -#define I40EVF_MIN_TXD 64 -#define I40EVF_MAX_RXD 4096 -#define I40EVF_MIN_RXD 64 -#define I40EVF_REQ_DESCRIPTOR_MULTIPLE 32 +#define I40EVF_DEFAULT_TXD 512 +#define I40EVF_DEFAULT_RXD 512 +#define I40EVF_MAX_TXD 4096 +#define I40EVF_MIN_TXD 64 +#define I40EVF_MAX_RXD 4096 +#define I40EVF_MIN_RXD 64 +#define I40EVF_REQ_DESCRIPTOR_MULTIPLE 32 /* Supported Rx Buffer Sizes */ -#define I40EVF_RXBUFFER_2048 2048 -#define I40EVF_MAX_RXBUFFER 16384 /* largest size for single descriptor */ -#define I40EVF_MAX_AQ_BUF_SIZE 4096 -#define I40EVF_AQ_LEN 32 -#define I40EVF_AQ_MAX_ERR 20 /* times to try before resetting AQ */ +#define I40EVF_RXBUFFER_2048 2048 +#define I40EVF_MAX_RXBUFFER 16384 /* largest size for single descriptor */ +#define I40EVF_MAX_AQ_BUF_SIZE 4096 +#define I40EVF_AQ_LEN 32 +#define I40EVF_AQ_MAX_ERR 20 /* times to try before resetting AQ */ #define MAXIMUM_ETHERNET_VLAN_SIZE (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN) @@ -111,7 +111,7 @@ struct i40e_q_vector { u8 num_ringpairs; /* total number of ring pairs in vector */ #define ITR_COUNTDOWN_START 100 u8 itr_countdown; /* when 0 or 1 update ITR */ - int v_idx; /* vector index in list */ + int v_idx; /* vector index in list */ char name[IFNAMSIZ + 9]; bool arm_wb_state; cpumask_var_t affinity_mask; @@ -129,11 +129,11 @@ struct i40e_q_vector { ((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \ (R)->next_to_clean - (R)->next_to_use - 1) -#define I40EVF_RX_DESC_ADV(R, i) \ +#define I40EVF_RX_DESC_ADV(R, i) \ (&(((union i40e_adv_rx_desc *)((R).desc))[i])) -#define I40EVF_TX_DESC_ADV(R, i) \ +#define I40EVF_TX_DESC_ADV(R, i) \ (&(((union i40e_adv_tx_desc *)((R).desc))[i])) -#define I40EVF_TX_CTXTDESC_ADV(R, i) \ +#define I40EVF_TX_CTXTDESC_ADV(R, i) \ (&(((struct i40e_adv_tx_context_desc *)((R).desc))[i])) #define OTHER_VECTOR 1 @@ -204,22 +204,25 @@ struct i40evf_adapter { struct msix_entry *msix_entries; u32 flags; -#define I40EVF_FLAG_RX_CSUM_ENABLED BIT(0) -#define I40EVF_FLAG_IMIR_ENABLED BIT(5) -#define I40EVF_FLAG_MQ_CAPABLE BIT(6) -#define I40EVF_FLAG_NEED_LINK_UPDATE BIT(7) -#define I40EVF_FLAG_PF_COMMS_FAILED BIT(8) -#define I40EVF_FLAG_RESET_PENDING BIT(9) -#define I40EVF_FLAG_RESET_NEEDED BIT(10) +#define I40EVF_FLAG_RX_CSUM_ENABLED BIT(0) +#define I40EVF_FLAG_IN_NETPOLL BIT(4) +#define I40EVF_FLAG_IMIR_ENABLED BIT(5) +#define I40EVF_FLAG_MQ_CAPABLE BIT(6) +#define I40EVF_FLAG_NEED_LINK_UPDATE BIT(7) +#define I40EVF_FLAG_PF_COMMS_FAILED BIT(8) +#define I40EVF_FLAG_RESET_PENDING BIT(9) +#define I40EVF_FLAG_RESET_NEEDED BIT(10) #define I40EVF_FLAG_WB_ON_ITR_CAPABLE BIT(11) #define I40EVF_FLAG_OUTER_UDP_CSUM_CAPABLE BIT(12) #define I40EVF_FLAG_ADDR_SET_BY_PF BIT(13) +#define I40EVF_FLAG_SERVICE_CLIENT_REQUESTED BIT(14) #define I40EVF_FLAG_PROMISC_ON BIT(15) #define I40EVF_FLAG_ALLMULTI_ON BIT(16) /* duplicates for common code */ -#define I40E_FLAG_FDIR_ATR_ENABLED 0 -#define I40E_FLAG_DCB_ENABLED 0 -#define I40E_FLAG_RX_CSUM_ENABLED I40EVF_FLAG_RX_CSUM_ENABLED +#define I40E_FLAG_FDIR_ATR_ENABLED 0 +#define I40E_FLAG_DCB_ENABLED 0 +#define I40E_FLAG_IN_NETPOLL I40EVF_FLAG_IN_NETPOLL +#define I40E_FLAG_RX_CSUM_ENABLED I40EVF_FLAG_RX_CSUM_ENABLED #define I40E_FLAG_WB_ON_ITR_CAPABLE I40EVF_FLAG_WB_ON_ITR_CAPABLE #define I40E_FLAG_OUTER_UDP_CSUM_CAPABLE I40EVF_FLAG_OUTER_UDP_CSUM_CAPABLE /* flags for admin queue service task */ @@ -233,7 +236,7 @@ struct i40evf_adapter { #define I40EVF_FLAG_AQ_CONFIGURE_QUEUES BIT(6) #define I40EVF_FLAG_AQ_MAP_VECTORS BIT(7) #define I40EVF_FLAG_AQ_HANDLE_RESET BIT(8) -#define I40EVF_FLAG_AQ_CONFIGURE_RSS BIT(9) /* direct AQ config */ +#define I40EVF_FLAG_AQ_CONFIGURE_RSS BIT(9) /* direct AQ config */ #define I40EVF_FLAG_AQ_GET_CONFIG BIT(10) /* Newer style, RSS done by the PF so we can ignore hardware vagaries. */ #define I40EVF_FLAG_AQ_GET_HENA BIT(11) From bf3a178c8a064d38f2570e6b8e4de2db07cd7083 Mon Sep 17 00:00:00 2001 From: Avinash Dayanand Date: Wed, 17 Aug 2016 16:04:08 -0700 Subject: [PATCH 0375/1715] i40evf: Open RDMA Client after reset RDMA client is closed during the PF reset and needs to be opened again. Setting the flag so that RDMA client is opened in watchdog() function. Change-ID: I507b1e4cbd05528cdff68fd360ef3dcac8901263 Signed-off-by: Avinash Dayanand Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40evf/i40evf_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 24f88ecdcddc..f751f7bc0d81 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -1804,6 +1804,8 @@ continue_reset: } adapter->aq_required |= I40EVF_FLAG_AQ_ADD_MAC_FILTER; adapter->aq_required |= I40EVF_FLAG_AQ_ADD_VLAN_FILTER; + /* Open RDMA Client again */ + adapter->aq_required |= I40EVF_FLAG_SERVICE_CLIENT_REQUESTED; clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section); i40evf_misc_irq_enable(adapter); From c0b558e5a393b77d2fe53335b5e07ca0e77178f8 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Fri, 19 Aug 2016 14:47:09 -0700 Subject: [PATCH 0376/1715] hv_netvsc: Implement batching of receive completions The existing code uses busy retry when unable to send out receive completions due to full ring buffer. It also gives up retrying after limit is reached, and causes receive buffer slots not being recycled. This patch implements batching of receive completions. It also prevents dropping receive completions due to full ring buffer. Signed-off-by: Haiyang Zhang Reviewed-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 17 +++ drivers/net/hyperv/netvsc.c | 168 ++++++++++++++++++++++++------ drivers/net/hyperv/rndis_filter.c | 6 +- 3 files changed, 159 insertions(+), 32 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index fa7b1e42508b..ce45d6835e88 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -634,6 +634,20 @@ struct multi_send_data { u32 count; /* counter of batched packets */ }; +struct recv_comp_data { + u64 tid; /* transaction id */ + u32 status; +}; + +/* Netvsc Receive Slots Max */ +#define NETVSC_RECVSLOT_MAX (NETVSC_RECEIVE_BUFFER_SIZE / ETH_DATA_LEN + 1) + +struct multi_recv_comp { + void *buf; /* queued receive completions */ + u32 first; /* first data entry */ + u32 next; /* next entry for writing */ +}; + struct netvsc_stats { u64 packets; u64 bytes; @@ -736,6 +750,9 @@ struct netvsc_device { u32 max_pkt; /* max number of pkt in one send, e.g. 8 */ u32 pkt_align; /* alignment bytes, e.g. 8 */ + struct multi_recv_comp mrc[VRSS_CHANNEL_MAX]; + atomic_t num_outstanding_recvs; + atomic_t open_cnt; }; diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 8078bc209cac..b15edfc37a59 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -74,6 +74,9 @@ static struct netvsc_device *alloc_net_device(void) return NULL; } + net_device->mrc[0].buf = vzalloc(NETVSC_RECVSLOT_MAX * + sizeof(struct recv_comp_data)); + init_waitqueue_head(&net_device->wait_drain); net_device->destroy = false; atomic_set(&net_device->open_cnt, 0); @@ -85,6 +88,11 @@ static struct netvsc_device *alloc_net_device(void) static void free_netvsc_device(struct netvsc_device *nvdev) { + int i; + + for (i = 0; i < VRSS_CHANNEL_MAX; i++) + vfree(nvdev->mrc[i].buf); + kfree(nvdev->cb_buffer); kfree(nvdev); } @@ -107,7 +115,8 @@ static struct netvsc_device *get_inbound_net_device(struct hv_device *device) goto get_in_err; if (net_device->destroy && - atomic_read(&net_device->num_outstanding_sends) == 0) + atomic_read(&net_device->num_outstanding_sends) == 0 && + atomic_read(&net_device->num_outstanding_recvs) == 0) net_device = NULL; get_in_err: @@ -972,49 +981,121 @@ send_now: return ret; } -static void netvsc_send_recv_completion(struct hv_device *device, - struct vmbus_channel *channel, - struct netvsc_device *net_device, - u64 transaction_id, u32 status) +static int netvsc_send_recv_completion(struct vmbus_channel *channel, + u64 transaction_id, u32 status) { struct nvsp_message recvcompMessage; - int retries = 0; int ret; - struct net_device *ndev = hv_get_drvdata(device); recvcompMessage.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE; recvcompMessage.msg.v1_msg.send_rndis_pkt_complete.status = status; -retry_send_cmplt: /* Send the completion */ ret = vmbus_sendpacket(channel, &recvcompMessage, - sizeof(struct nvsp_message), transaction_id, - VM_PKT_COMP, 0); - if (ret == 0) { - /* success */ - /* no-op */ - } else if (ret == -EAGAIN) { - /* no more room...wait a bit and attempt to retry 3 times */ - retries++; - netdev_err(ndev, "unable to send receive completion pkt" - " (tid %llx)...retrying %d\n", transaction_id, retries); + sizeof(struct nvsp_message_header) + sizeof(u32), + transaction_id, VM_PKT_COMP, 0); - if (retries < 4) { - udelay(100); - goto retry_send_cmplt; - } else { - netdev_err(ndev, "unable to send receive " - "completion pkt (tid %llx)...give up retrying\n", - transaction_id); - } - } else { - netdev_err(ndev, "unable to send receive " - "completion pkt - %llx\n", transaction_id); + return ret; +} + +static inline void count_recv_comp_slot(struct netvsc_device *nvdev, u16 q_idx, + u32 *filled, u32 *avail) +{ + u32 first = nvdev->mrc[q_idx].first; + u32 next = nvdev->mrc[q_idx].next; + + *filled = (first > next) ? NETVSC_RECVSLOT_MAX - first + next : + next - first; + + *avail = NETVSC_RECVSLOT_MAX - *filled - 1; +} + +/* Read the first filled slot, no change to index */ +static inline struct recv_comp_data *read_recv_comp_slot(struct netvsc_device + *nvdev, u16 q_idx) +{ + u32 filled, avail; + + if (!nvdev->mrc[q_idx].buf) + return NULL; + + count_recv_comp_slot(nvdev, q_idx, &filled, &avail); + if (!filled) + return NULL; + + return nvdev->mrc[q_idx].buf + nvdev->mrc[q_idx].first * + sizeof(struct recv_comp_data); +} + +/* Put the first filled slot back to available pool */ +static inline void put_recv_comp_slot(struct netvsc_device *nvdev, u16 q_idx) +{ + int num_recv; + + nvdev->mrc[q_idx].first = (nvdev->mrc[q_idx].first + 1) % + NETVSC_RECVSLOT_MAX; + + num_recv = atomic_dec_return(&nvdev->num_outstanding_recvs); + + if (nvdev->destroy && num_recv == 0) + wake_up(&nvdev->wait_drain); +} + +/* Check and send pending recv completions */ +static void netvsc_chk_recv_comp(struct netvsc_device *nvdev, + struct vmbus_channel *channel, u16 q_idx) +{ + struct recv_comp_data *rcd; + int ret; + + while (true) { + rcd = read_recv_comp_slot(nvdev, q_idx); + if (!rcd) + break; + + ret = netvsc_send_recv_completion(channel, rcd->tid, + rcd->status); + if (ret) + break; + + put_recv_comp_slot(nvdev, q_idx); } } +#define NETVSC_RCD_WATERMARK 80 + +/* Get next available slot */ +static inline struct recv_comp_data *get_recv_comp_slot( + struct netvsc_device *nvdev, struct vmbus_channel *channel, u16 q_idx) +{ + u32 filled, avail, next; + struct recv_comp_data *rcd; + + if (!nvdev->recv_section) + return NULL; + + if (!nvdev->mrc[q_idx].buf) + return NULL; + + if (atomic_read(&nvdev->num_outstanding_recvs) > + nvdev->recv_section->num_sub_allocs * NETVSC_RCD_WATERMARK / 100) + netvsc_chk_recv_comp(nvdev, channel, q_idx); + + count_recv_comp_slot(nvdev, q_idx, &filled, &avail); + if (!avail) + return NULL; + + next = nvdev->mrc[q_idx].next; + rcd = nvdev->mrc[q_idx].buf + next * sizeof(struct recv_comp_data); + nvdev->mrc[q_idx].next = (next + 1) % NETVSC_RECVSLOT_MAX; + + atomic_inc(&nvdev->num_outstanding_recvs); + + return rcd; +} + static void netvsc_receive(struct netvsc_device *net_device, struct vmbus_channel *channel, struct hv_device *device, @@ -1029,6 +1110,9 @@ static void netvsc_receive(struct netvsc_device *net_device, int count = 0; struct net_device *ndev = hv_get_drvdata(device); void *data; + int ret; + struct recv_comp_data *rcd; + u16 q_idx = channel->offermsg.offer.sub_channel_index; /* * All inbound packets other than send completion should be xfer page @@ -1076,8 +1160,26 @@ static void netvsc_receive(struct netvsc_device *net_device, } - netvsc_send_recv_completion(device, channel, net_device, - vmxferpage_packet->d.trans_id, status); + if (!net_device->mrc[q_idx].buf) { + ret = netvsc_send_recv_completion(channel, + vmxferpage_packet->d.trans_id, + status); + if (ret) + netdev_err(ndev, "Recv_comp q:%hd, tid:%llx, err:%d\n", + q_idx, vmxferpage_packet->d.trans_id, ret); + return; + } + + rcd = get_recv_comp_slot(net_device, channel, q_idx); + + if (!rcd) { + netdev_err(ndev, "Recv_comp full buf q:%hd, tid:%llx\n", + q_idx, vmxferpage_packet->d.trans_id); + return; + } + + rcd->tid = vmxferpage_packet->d.trans_id; + rcd->status = status; } @@ -1166,6 +1268,7 @@ void netvsc_channel_cb(void *context) { int ret; struct vmbus_channel *channel = (struct vmbus_channel *)context; + u16 q_idx = channel->offermsg.offer.sub_channel_index; struct hv_device *device; struct netvsc_device *net_device; u32 bytes_recvd; @@ -1245,6 +1348,9 @@ void netvsc_channel_cb(void *context) if (bufferlen > NETVSC_PACKET_SIZE) kfree(buffer); + + netvsc_chk_recv_comp(net_device, channel, q_idx); + return; } diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index dd3b3352a950..3ecb2d05cf3f 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -897,7 +897,8 @@ cleanup: /* Wait for all send completions */ wait_event(nvdev->wait_drain, - atomic_read(&nvdev->num_outstanding_sends) == 0); + atomic_read(&nvdev->num_outstanding_sends) == 0 && + atomic_read(&nvdev->num_outstanding_recvs) == 0); if (request) put_rndis_request(dev, request); @@ -953,6 +954,9 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc) set_per_channel_state(new_sc, nvscdev->sub_cb_buf + (chn_index - 1) * NETVSC_PACKET_SIZE); + nvscdev->mrc[chn_index].buf = vzalloc(NETVSC_RECVSLOT_MAX * + sizeof(struct recv_comp_data)); + ret = vmbus_open(new_sc, nvscdev->ring_size * PAGE_SIZE, nvscdev->ring_size * PAGE_SIZE, NULL, 0, netvsc_channel_cb, new_sc); From 6afb1e28b8596443d72f82e085b4b2e8f410eae0 Mon Sep 17 00:00:00 2001 From: William Tu Date: Fri, 19 Aug 2016 11:55:44 -0700 Subject: [PATCH 0377/1715] samples/bpf: Add tunnel set/get tests. The patch creates sample code exercising bpf_skb_{set,get}_tunnel_key, and bpf_skb_{set,get}_tunnel_opt for GRE, VXLAN, and GENEVE. A native tunnel device is created in a namespace to interact with a lwtunnel device out of the namespace, with metadata enabled. The bpf_skb_set_* program is attached to tc egress and bpf_skb_get_* is attached to egress qdisc. A ping between two tunnels is used to verify correctness and the result of bpf_skb_get_* printed by bpf_trace_printk. Signed-off-by: William Tu Signed-off-by: David S. Miller --- samples/bpf/Makefile | 1 + samples/bpf/bpf_helpers.h | 8 ++ samples/bpf/tcbpf2_kern.c | 191 +++++++++++++++++++++++++++++++++ samples/bpf/test_tunnel_bpf.sh | 127 ++++++++++++++++++++++ 4 files changed, 327 insertions(+) create mode 100644 samples/bpf/tcbpf2_kern.c create mode 100755 samples/bpf/test_tunnel_bpf.sh diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index eb582c6264c3..db3cb061bfcd 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -67,6 +67,7 @@ always += tracex6_kern.o always += test_probe_write_user_kern.o always += trace_output_kern.o always += tcbpf1_kern.o +always += tcbpf2_kern.o always += lathist_kern.o always += offwaketime_kern.o always += spintest_kern.o diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h index 6f1672a7254e..bbdf62a1e45e 100644 --- a/samples/bpf/bpf_helpers.h +++ b/samples/bpf/bpf_helpers.h @@ -47,6 +47,14 @@ static int (*bpf_probe_write_user)(void *dst, void *src, int size) = (void *) BPF_FUNC_probe_write_user; static int (*bpf_current_task_under_cgroup)(void *map, int index) = (void *) BPF_FUNC_current_task_under_cgroup; +static int (*bpf_skb_get_tunnel_key)(void *ctx, void *key, int size, int flags) = + (void *) BPF_FUNC_skb_get_tunnel_key; +static int (*bpf_skb_set_tunnel_key)(void *ctx, void *key, int size, int flags) = + (void *) BPF_FUNC_skb_set_tunnel_key; +static int (*bpf_skb_get_tunnel_opt)(void *ctx, void *md, int size) = + (void *) BPF_FUNC_skb_get_tunnel_opt; +static int (*bpf_skb_set_tunnel_opt)(void *ctx, void *md, int size) = + (void *) BPF_FUNC_skb_set_tunnel_opt; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions diff --git a/samples/bpf/tcbpf2_kern.c b/samples/bpf/tcbpf2_kern.c new file mode 100644 index 000000000000..7a15289da6cc --- /dev/null +++ b/samples/bpf/tcbpf2_kern.c @@ -0,0 +1,191 @@ +/* Copyright (c) 2016 VMware + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include "bpf_helpers.h" + +#define ERROR(ret) do {\ + char fmt[] = "ERROR line:%d ret:%d\n";\ + bpf_trace_printk(fmt, sizeof(fmt), __LINE__, ret); \ + } while(0) + +struct geneve_opt { + __be16 opt_class; + u8 type; + u8 length:5; + u8 r3:1; + u8 r2:1; + u8 r1:1; + u8 opt_data[8]; /* hard-coded to 8 byte */ +}; + +struct vxlan_metadata { + u32 gbp; +}; + +SEC("gre_set_tunnel") +int _gre_set_tunnel(struct __sk_buff *skb) +{ + int ret; + struct bpf_tunnel_key key; + + __builtin_memset(&key, 0x0, sizeof(key)); + key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ + key.tunnel_id = 2; + key.tunnel_tos = 0; + key.tunnel_ttl = 64; + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("gre_get_tunnel") +int _gre_get_tunnel(struct __sk_buff *skb) +{ + int ret; + struct bpf_tunnel_key key; + char fmt[] = "key %d remote ip 0x%x\n"; + + ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + bpf_trace_printk(fmt, sizeof(fmt), key.tunnel_id, key.remote_ipv4); + return TC_ACT_OK; +} + +SEC("vxlan_set_tunnel") +int _vxlan_set_tunnel(struct __sk_buff *skb) +{ + int ret; + struct bpf_tunnel_key key; + struct vxlan_metadata md; + + __builtin_memset(&key, 0x0, sizeof(key)); + key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ + key.tunnel_id = 2; + key.tunnel_tos = 0; + key.tunnel_ttl = 64; + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + md.gbp = 0x800FF; /* Set VXLAN Group Policy extension */ + ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md)); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("vxlan_get_tunnel") +int _vxlan_get_tunnel(struct __sk_buff *skb) +{ + int ret; + struct bpf_tunnel_key key; + struct vxlan_metadata md; + char fmt[] = "key %d remote ip 0x%x vxlan gbp 0x%x\n"; + + ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md)); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + bpf_trace_printk(fmt, sizeof(fmt), + key.tunnel_id, key.remote_ipv4, md.gbp); + + return TC_ACT_OK; +} + +SEC("geneve_set_tunnel") +int _geneve_set_tunnel(struct __sk_buff *skb) +{ + int ret, ret2; + struct bpf_tunnel_key key; + struct geneve_opt gopt; + + __builtin_memset(&key, 0x0, sizeof(key)); + key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ + key.tunnel_id = 2; + key.tunnel_tos = 0; + key.tunnel_ttl = 64; + + __builtin_memset(&gopt, 0x0, sizeof(gopt)); + gopt.opt_class = 0x102; /* Open Virtual Networking (OVN) */ + gopt.type = 0x08; + gopt.r1 = 1; + gopt.r2 = 0; + gopt.r3 = 1; + gopt.length = 2; /* 4-byte multiple */ + *(int *) &gopt.opt_data = 0xdeadbeef; + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt)); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("geneve_get_tunnel") +int _geneve_get_tunnel(struct __sk_buff *skb) +{ + int ret; + struct bpf_tunnel_key key; + struct geneve_opt gopt; + char fmt[] = "key %d remote ip 0x%x geneve class 0x%x\n"; + + ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + ret = bpf_skb_get_tunnel_opt(skb, &gopt, sizeof(gopt)); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + bpf_trace_printk(fmt, sizeof(fmt), + key.tunnel_id, key.remote_ipv4, gopt.opt_class); + return TC_ACT_OK; +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/test_tunnel_bpf.sh b/samples/bpf/test_tunnel_bpf.sh new file mode 100755 index 000000000000..4956589a83ae --- /dev/null +++ b/samples/bpf/test_tunnel_bpf.sh @@ -0,0 +1,127 @@ +#!/bin/bash +# In Namespace 0 (at_ns0) using native tunnel +# Overlay IP: 10.1.1.100 +# local 192.16.1.100 remote 192.16.1.200 +# veth0 IP: 172.16.1.100, tunnel dev 00 + +# Out of Namespace using BPF set/get on lwtunnel +# Overlay IP: 10.1.1.200 +# local 172.16.1.200 remote 172.16.1.100 +# veth1 IP: 172.16.1.200, tunnel dev 11 + +set -e + +function config_device { + ip netns add at_ns0 + ip link add veth0 type veth peer name veth1 + ip link set veth0 netns at_ns0 + ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0 + ip netns exec at_ns0 ip link set dev veth0 up + ip link set dev veth1 up + ip addr add dev veth1 172.16.1.200/24 +} + +function add_gre_tunnel { + # in namespace + ip netns exec at_ns0 \ + ip link add dev $DEV_NS type $TYPE key 2 local 172.16.1.100 remote 172.16.1.200 + ip netns exec at_ns0 ip link set dev $DEV_NS up + ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 + + # out of namespace + ip link add dev $DEV type $TYPE key 2 external + ip link set dev $DEV up + ip addr add dev $DEV 10.1.1.200/24 +} + +function add_vxlan_tunnel { + # Set static ARP entry here because iptables set-mark works + # on L3 packet, as a result not applying to ARP packets, + # causing errors at get_tunnel_{key/opt}. + + # in namespace + ip netns exec at_ns0 \ + ip link add dev $DEV_NS type $TYPE id 2 dstport 4789 gbp remote 172.16.1.200 + ip netns exec at_ns0 ip link set dev $DEV_NS address 52:54:00:d9:01:00 up + ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 + ip netns exec at_ns0 arp -s 10.1.1.200 52:54:00:d9:02:00 + ip netns exec at_ns0 iptables -A OUTPUT -j MARK --set-mark 0x800FF + + # out of namespace + ip link add dev $DEV type $TYPE external gbp dstport 4789 + ip link set dev $DEV address 52:54:00:d9:02:00 up + ip addr add dev $DEV 10.1.1.200/24 + arp -s 10.1.1.100 52:54:00:d9:01:00 +} + +function add_geneve_tunnel { + # in namespace + ip netns exec at_ns0 \ + ip link add dev $DEV_NS type $TYPE id 2 dstport 6081 remote 172.16.1.200 + ip netns exec at_ns0 ip link set dev $DEV_NS up + ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 + + # out of namespace + ip link add dev $DEV type $TYPE dstport 6081 external + ip link set dev $DEV up + ip addr add dev $DEV 10.1.1.200/24 +} + +function attach_bpf { + DEV=$1 + SET_TUNNEL=$2 + GET_TUNNEL=$3 + tc qdisc add dev $DEV clsact + tc filter add dev $DEV egress bpf da obj tcbpf2_kern.o sec $SET_TUNNEL + tc filter add dev $DEV ingress bpf da obj tcbpf2_kern.o sec $GET_TUNNEL +} + +function test_gre { + TYPE=gretap + DEV_NS=gretap00 + DEV=gretap11 + config_device + add_gre_tunnel + attach_bpf $DEV gre_set_tunnel gre_get_tunnel + ping -c 1 10.1.1.100 + ip netns exec at_ns0 ping -c 1 10.1.1.200 +} + +function test_vxlan { + TYPE=vxlan + DEV_NS=vxlan00 + DEV=vxlan11 + config_device + add_vxlan_tunnel + attach_bpf $DEV vxlan_set_tunnel vxlan_get_tunnel + ping -c 1 10.1.1.100 + ip netns exec at_ns0 ping -c 1 10.1.1.200 +} + +function test_geneve { + TYPE=geneve + DEV_NS=geneve00 + DEV=geneve11 + config_device + add_geneve_tunnel + attach_bpf $DEV geneve_set_tunnel geneve_get_tunnel + ping -c 1 10.1.1.100 + ip netns exec at_ns0 ping -c 1 10.1.1.200 +} + +function cleanup { + ip netns delete at_ns0 + ip link del veth1 + ip link del $DEV +} + +echo "Testing GRE tunnel..." +test_gre +cleanup +echo "Testing VXLAN tunnel..." +test_vxlan +cleanup +echo "Testing GENEVE tunnel..." +test_geneve +cleanup +echo "Success" From 28e8190d2fb3918b5d46acafeb1968cd26e30dad Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sat, 20 Aug 2016 08:54:15 +0200 Subject: [PATCH 0378/1715] tun: Use memdup_user() rather than duplicating its implementation Reuse existing functionality from memdup_user() instead of keeping duplicate source code. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Reviewed-by: Shmulik Ladkani Signed-off-by: David S. Miller --- drivers/net/tun.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 9c8b5bc2b9d8..a1aeccbce6d4 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -731,14 +731,9 @@ static int update_filter(struct tap_filter *filter, void __user *arg) } alen = ETH_ALEN * uf.count; - addr = kmalloc(alen, GFP_KERNEL); - if (!addr) - return -ENOMEM; - - if (copy_from_user(addr, arg + sizeof(uf), alen)) { - err = -EFAULT; - goto done; - } + addr = memdup_user(arg + sizeof(uf), alen); + if (IS_ERR(addr)) + return PTR_ERR(addr); /* The filter is updated without holding any locks. Which is * perfectly safe. We disable it first and in the worst From 3b8d2a693dfa9038e8013506355b5e0c2e2edd9c Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sat, 20 Aug 2016 09:00:34 +0200 Subject: [PATCH 0379/1715] tun: Rename a jump label in update_filter() Adjust a jump target according to the Linux coding style convention. Signed-off-by: Markus Elfring Signed-off-by: David S. Miller --- drivers/net/tun.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index a1aeccbce6d4..e249428f2d83 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -753,7 +753,7 @@ static int update_filter(struct tap_filter *filter, void __user *arg) for (; n < uf.count; n++) { if (!is_multicast_ether_addr(addr[n].u)) { err = 0; /* no filter */ - goto done; + goto free_addr; } addr_hash_set(filter->mask, addr[n].u); } @@ -769,8 +769,7 @@ static int update_filter(struct tap_filter *filter, void __user *arg) /* Return the number of exact filters */ err = nexact; - -done: +free_addr: kfree(addr); return err; } From 27b23f96f3cf6ffd680e28bf569d8ddd71842590 Mon Sep 17 00:00:00 2001 From: Veola Nazareth Date: Sat, 20 Aug 2016 19:35:37 -0700 Subject: [PATCH 0380/1715] ixgbe: report correct media type for KR, KX and KX4 interfaces ethtool reports backplane type interfaces as 1000/10000baseT link modes. This has been corrected to report the media as KR, KX or KX4 based on the backplane interface present. Signed-off-by: Veola Nazareth Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 0d7209eb5abf..9547191e26c9 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -193,7 +193,9 @@ static int ixgbe_get_settings(struct net_device *netdev, if (supported_link & IXGBE_LINK_SPEED_10GB_FULL) ecmd->supported |= ixgbe_get_supported_10gtypes(hw); if (supported_link & IXGBE_LINK_SPEED_1GB_FULL) - ecmd->supported |= SUPPORTED_1000baseT_Full; + ecmd->supported |= (ixgbe_isbackplane(hw->phy.media_type)) ? + SUPPORTED_1000baseKX_Full : + SUPPORTED_1000baseT_Full; if (supported_link & IXGBE_LINK_SPEED_100_FULL) ecmd->supported |= ixgbe_isbackplane(hw->phy.media_type) ? SUPPORTED_1000baseKX_Full : From c64269710ef28eb0dea03098853e425851214496 Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Wed, 27 Jul 2016 10:55:08 -0700 Subject: [PATCH 0381/1715] ixgbevf: fix incorrect MAC address on load The PF driver was only receiving the first 4 bytes of the MAC due to an incorrect size parameter for ixgbevf_write_msg_read_ack() in ixgbevf_set_rar_vf(). Correct the size by calculating it on a fly for all instances where we call ixgbevf_write_msg_read_ack() Signed-off-by: Emil Tantilov Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbevf/vf.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c index a52f70ec42b6..d46ba1dabcb7 100644 --- a/drivers/net/ethernet/intel/ixgbevf/vf.c +++ b/drivers/net/ethernet/intel/ixgbevf/vf.c @@ -284,7 +284,8 @@ static s32 ixgbevf_set_uc_addr_vf(struct ixgbe_hw *hw, u32 index, u8 *addr) if (addr) ether_addr_copy(msg_addr, addr); - ret_val = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, 3); + ret_val = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, + sizeof(msgbuf) / sizeof(u32)); if (!ret_val) { msgbuf[0] &= ~IXGBE_VT_MSGTYPE_CTS; @@ -441,7 +442,8 @@ static s32 ixgbevf_set_rar_vf(struct ixgbe_hw *hw, u32 index, u8 *addr, msgbuf[0] = IXGBE_VF_SET_MAC_ADDR; ether_addr_copy(msg_addr, addr); - ret_val = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, 2); + ret_val = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, + sizeof(msgbuf) / sizeof(u32)); msgbuf[0] &= ~IXGBE_VT_MSGTYPE_CTS; @@ -551,7 +553,8 @@ static s32 ixgbevf_update_xcast_mode(struct ixgbe_hw *hw, int xcast_mode) msgbuf[0] = IXGBE_VF_UPDATE_XCAST_MODE; msgbuf[1] = xcast_mode; - err = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, 2); + err = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, + sizeof(msgbuf) / sizeof(u32)); if (err) return err; @@ -588,7 +591,8 @@ static s32 ixgbevf_set_vfta_vf(struct ixgbe_hw *hw, u32 vlan, u32 vind, /* Setting the 8 bit field MSG INFO to TRUE indicates "add" */ msgbuf[0] |= vlan_on << IXGBE_VT_MSGINFO_SHIFT; - err = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, 2); + err = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, + sizeof(msgbuf) / sizeof(u32)); if (err) goto mbx_err; @@ -791,7 +795,8 @@ static s32 ixgbevf_set_rlpml_vf(struct ixgbe_hw *hw, u16 max_size) msgbuf[0] = IXGBE_VF_SET_LPE; msgbuf[1] = max_size; - ret_val = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, 2); + ret_val = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, + sizeof(msgbuf) / sizeof(u32)); if (ret_val) return ret_val; if ((msgbuf[0] & IXGBE_VF_SET_LPE) && @@ -837,7 +842,8 @@ static int ixgbevf_negotiate_api_version_vf(struct ixgbe_hw *hw, int api) msg[1] = api; msg[2] = 0; - err = ixgbevf_write_msg_read_ack(hw, msg, msg, 3); + err = ixgbevf_write_msg_read_ack(hw, msg, msg, + sizeof(msg) / sizeof(u32)); if (!err) { msg[0] &= ~IXGBE_VT_MSGTYPE_CTS; @@ -887,7 +893,8 @@ int ixgbevf_get_queues(struct ixgbe_hw *hw, unsigned int *num_tcs, msg[0] = IXGBE_VF_GET_QUEUE; msg[1] = msg[2] = msg[3] = msg[4] = 0; - err = ixgbevf_write_msg_read_ack(hw, msg, msg, 5); + err = ixgbevf_write_msg_read_ack(hw, msg, msg, + sizeof(msg) / sizeof(u32)); if (!err) { msg[0] &= ~IXGBE_VT_MSGTYPE_CTS; From a21d0822ff693655b4bf412405ecd649636f3d3b Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Wed, 10 Aug 2016 11:19:23 -0700 Subject: [PATCH 0382/1715] ixgbe: add support for geneve Rx offload Add geneve Rx offload support for x550em_a. The implementation follows the vxlan code with the lower 16 bits of the VXLANCTRL register holding the UDP port for VXLAN and the upper for Geneve. Disabled NFS filters in the RFCTL register which allows us to simplify the check for VXLAN and Geneve packets in ixgbe_rx_checksum(). Removed vxlan from the name of the callback functions and replaced it with udp_tunnel which is more in line with the new API. Signed-off-by: Emil Tantilov Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe.h | 4 +- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 174 +++++++++++++----- drivers/net/ethernet/intel/ixgbe/ixgbe_type.h | 7 + 3 files changed, 134 insertions(+), 51 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 5628e2dae427..33c025055011 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -645,6 +645,7 @@ struct ixgbe_adapter { #define IXGBE_FLAG_RX_HWTSTAMP_ENABLED BIT(25) #define IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER BIT(26) #define IXGBE_FLAG_DCB_CAPABLE BIT(27) +#define IXGBE_FLAG_GENEVE_OFFLOAD_CAPABLE BIT(28) u32 flags2; #define IXGBE_FLAG2_RSC_CAPABLE BIT(0) @@ -658,7 +659,7 @@ struct ixgbe_adapter { #define IXGBE_FLAG2_RSS_FIELD_IPV6_UDP BIT(9) #define IXGBE_FLAG2_PTP_PPS_ENABLED BIT(10) #define IXGBE_FLAG2_PHY_INTERRUPT BIT(11) -#define IXGBE_FLAG2_VXLAN_REREG_NEEDED BIT(12) +#define IXGBE_FLAG2_UDP_TUN_REREG_NEEDED BIT(12) #define IXGBE_FLAG2_VLAN_PROMISC BIT(13) /* Tx fast path data */ @@ -672,6 +673,7 @@ struct ixgbe_adapter { /* Port number used to identify VXLAN traffic */ __be16 vxlan_port; + __be16 geneve_port; /* TX */ struct ixgbe_ring *tx_ring[MAX_TX_QUEUES] ____cacheline_aligned_in_smp; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 6bf131faa65e..5b8819c56d53 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1495,7 +1495,6 @@ static inline void ixgbe_rx_checksum(struct ixgbe_ring *ring, struct sk_buff *skb) { __le16 pkt_info = rx_desc->wb.lower.lo_dword.hs_rss.pkt_info; - __le16 hdr_info = rx_desc->wb.lower.lo_dword.hs_rss.hdr_info; bool encap_pkt = false; skb_checksum_none_assert(skb); @@ -1504,8 +1503,8 @@ static inline void ixgbe_rx_checksum(struct ixgbe_ring *ring, if (!(ring->netdev->features & NETIF_F_RXCSUM)) return; - if ((pkt_info & cpu_to_le16(IXGBE_RXDADV_PKTTYPE_VXLAN)) && - (hdr_info & cpu_to_le16(IXGBE_RXDADV_PKTTYPE_TUNNEL >> 16))) { + /* check for VXLAN and Geneve packets */ + if (pkt_info & cpu_to_le16(IXGBE_RXDADV_PKTTYPE_VXLAN)) { encap_pkt = true; skb->encapsulation = 1; } @@ -3922,6 +3921,9 @@ static void ixgbe_configure_rx(struct ixgbe_adapter *adapter) rfctl &= ~IXGBE_RFCTL_RSC_DIS; if (!(adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)) rfctl |= IXGBE_RFCTL_RSC_DIS; + + /* disable NFS filtering */ + rfctl |= (IXGBE_RFCTL_NFSW_DIS | IXGBE_RFCTL_NFSR_DIS); IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl); /* Program registers for the distribution of queues */ @@ -4586,18 +4588,23 @@ static void ixgbe_napi_disable_all(struct ixgbe_adapter *adapter) } } -static void ixgbe_clear_vxlan_port(struct ixgbe_adapter *adapter) +static void ixgbe_clear_udp_tunnel_port(struct ixgbe_adapter *adapter, u32 mask) { - switch (adapter->hw.mac.type) { - case ixgbe_mac_X550: - case ixgbe_mac_X550EM_x: - case ixgbe_mac_x550em_a: - IXGBE_WRITE_REG(&adapter->hw, IXGBE_VXLANCTRL, 0); + struct ixgbe_hw *hw = &adapter->hw; + u32 vxlanctrl; + + if (!(adapter->flags & (IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE | + IXGBE_FLAG_GENEVE_OFFLOAD_CAPABLE))) + return; + + vxlanctrl = IXGBE_READ_REG(hw, IXGBE_VXLANCTRL) && ~mask; + IXGBE_WRITE_REG(hw, IXGBE_VXLANCTRL, vxlanctrl); + + if (mask & IXGBE_VXLANCTRL_VXLAN_UDPPORT_MASK) adapter->vxlan_port = 0; - break; - default: - break; - } + + if (mask & IXGBE_VXLANCTRL_GENEVE_UDPPORT_MASK) + adapter->geneve_port = 0; } #ifdef CONFIG_IXGBE_DCB @@ -5711,8 +5718,10 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter) if (fwsm & IXGBE_FWSM_TS_ENABLED) adapter->flags2 |= IXGBE_FLAG2_TEMP_SENSOR_CAPABLE; break; - case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: + adapter->flags |= IXGBE_FLAG_GENEVE_OFFLOAD_CAPABLE; + /* fall through */ + case ixgbe_mac_X550EM_x: #ifdef CONFIG_IXGBE_DCB adapter->flags &= ~IXGBE_FLAG_DCB_CAPABLE; #endif @@ -6144,7 +6153,7 @@ int ixgbe_open(struct net_device *netdev) ixgbe_up_complete(adapter); - ixgbe_clear_vxlan_port(adapter); + ixgbe_clear_udp_tunnel_port(adapter, IXGBE_VXLANCTRL_ALL_UDPPORT_MASK); udp_tunnel_get_rx_info(netdev); return 0; @@ -7223,9 +7232,9 @@ static void ixgbe_service_task(struct work_struct *work) ixgbe_service_event_complete(adapter); return; } - if (adapter->flags2 & IXGBE_FLAG2_VXLAN_REREG_NEEDED) { + if (adapter->flags2 & IXGBE_FLAG2_UDP_TUN_REREG_NEEDED) { rtnl_lock(); - adapter->flags2 &= ~IXGBE_FLAG2_VXLAN_REREG_NEEDED; + adapter->flags2 &= ~IXGBE_FLAG2_UDP_TUN_REREG_NEEDED; udp_tunnel_get_rx_info(adapter->netdev); rtnl_unlock(); } @@ -7665,6 +7674,10 @@ static void ixgbe_atr(struct ixgbe_ring *ring, if (adapter->vxlan_port && udp_hdr(skb)->dest == adapter->vxlan_port) hdr.network = skb_inner_network_header(skb); + + if (adapter->geneve_port && + udp_hdr(skb)->dest == adapter->geneve_port) + hdr.network = skb_inner_network_header(skb); } /* Currently only IPv4/IPv6 with TCP is supported */ @@ -8800,10 +8813,23 @@ static int ixgbe_set_features(struct net_device *netdev, netdev->features = features; if ((adapter->flags & IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE)) { - if (features & NETIF_F_RXCSUM) - adapter->flags2 |= IXGBE_FLAG2_VXLAN_REREG_NEEDED; - else - ixgbe_clear_vxlan_port(adapter); + if (features & NETIF_F_RXCSUM) { + adapter->flags2 |= IXGBE_FLAG2_UDP_TUN_REREG_NEEDED; + } else { + u32 port_mask = IXGBE_VXLANCTRL_VXLAN_UDPPORT_MASK; + + ixgbe_clear_udp_tunnel_port(adapter, port_mask); + } + } + + if ((adapter->flags & IXGBE_FLAG_GENEVE_OFFLOAD_CAPABLE)) { + if (features & NETIF_F_RXCSUM) { + adapter->flags2 |= IXGBE_FLAG2_UDP_TUN_REREG_NEEDED; + } else { + u32 port_mask = IXGBE_VXLANCTRL_GENEVE_UDPPORT_MASK; + + ixgbe_clear_udp_tunnel_port(adapter, port_mask); + } } if (need_reset) @@ -8816,67 +8842,115 @@ static int ixgbe_set_features(struct net_device *netdev, } /** - * ixgbe_add_vxlan_port - Get notifications about VXLAN ports that come up + * ixgbe_add_udp_tunnel_port - Get notifications about adding UDP tunnel ports * @dev: The port's netdev * @ti: Tunnel endpoint information **/ -static void ixgbe_add_vxlan_port(struct net_device *dev, - struct udp_tunnel_info *ti) +static void ixgbe_add_udp_tunnel_port(struct net_device *dev, + struct udp_tunnel_info *ti) { struct ixgbe_adapter *adapter = netdev_priv(dev); struct ixgbe_hw *hw = &adapter->hw; __be16 port = ti->port; - - if (ti->type != UDP_TUNNEL_TYPE_VXLAN) - return; + u32 port_shift = 0; + u32 reg; if (ti->sa_family != AF_INET) return; - if (!(adapter->flags & IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE)) - return; + switch (ti->type) { + case UDP_TUNNEL_TYPE_VXLAN: + if (!(adapter->flags & IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE)) + return; - if (adapter->vxlan_port == port) - return; + if (adapter->vxlan_port == port) + return; - if (adapter->vxlan_port) { - netdev_info(dev, - "Hit Max num of VXLAN ports, not adding port %d\n", - ntohs(port)); + if (adapter->vxlan_port) { + netdev_info(dev, + "VXLAN port %d set, not adding port %d\n", + ntohs(adapter->vxlan_port), + ntohs(port)); + return; + } + + adapter->vxlan_port = port; + break; + case UDP_TUNNEL_TYPE_GENEVE: + if (!(adapter->flags & IXGBE_FLAG_GENEVE_OFFLOAD_CAPABLE)) + return; + + if (adapter->geneve_port == port) + return; + + if (adapter->geneve_port) { + netdev_info(dev, + "GENEVE port %d set, not adding port %d\n", + ntohs(adapter->geneve_port), + ntohs(port)); + return; + } + + port_shift = IXGBE_VXLANCTRL_GENEVE_UDPPORT_SHIFT; + adapter->geneve_port = port; + break; + default: return; } - adapter->vxlan_port = port; - IXGBE_WRITE_REG(hw, IXGBE_VXLANCTRL, ntohs(port)); + reg = IXGBE_READ_REG(hw, IXGBE_VXLANCTRL) | ntohs(port) << port_shift; + IXGBE_WRITE_REG(hw, IXGBE_VXLANCTRL, reg); } /** - * ixgbe_del_vxlan_port - Get notifications about VXLAN ports that go away + * ixgbe_del_udp_tunnel_port - Get notifications about removing UDP tunnel ports * @dev: The port's netdev * @ti: Tunnel endpoint information **/ -static void ixgbe_del_vxlan_port(struct net_device *dev, - struct udp_tunnel_info *ti) +static void ixgbe_del_udp_tunnel_port(struct net_device *dev, + struct udp_tunnel_info *ti) { struct ixgbe_adapter *adapter = netdev_priv(dev); + u32 port_mask; - if (ti->type != UDP_TUNNEL_TYPE_VXLAN) + if (ti->type != UDP_TUNNEL_TYPE_VXLAN && + ti->type != UDP_TUNNEL_TYPE_GENEVE) return; if (ti->sa_family != AF_INET) return; - if (!(adapter->flags & IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE)) - return; + switch (ti->type) { + case UDP_TUNNEL_TYPE_VXLAN: + if (!(adapter->flags & IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE)) + return; - if (adapter->vxlan_port != ti->port) { - netdev_info(dev, "Port %d was not found, not deleting\n", - ntohs(ti->port)); + if (adapter->vxlan_port != ti->port) { + netdev_info(dev, "VXLAN port %d not found\n", + ntohs(ti->port)); + return; + } + + port_mask = IXGBE_VXLANCTRL_VXLAN_UDPPORT_MASK; + break; + case UDP_TUNNEL_TYPE_GENEVE: + if (!(adapter->flags & IXGBE_FLAG_GENEVE_OFFLOAD_CAPABLE)) + return; + + if (adapter->geneve_port != ti->port) { + netdev_info(dev, "GENEVE port %d not found\n", + ntohs(ti->port)); + return; + } + + port_mask = IXGBE_VXLANCTRL_GENEVE_UDPPORT_MASK; + break; + default: return; } - ixgbe_clear_vxlan_port(adapter); - adapter->flags2 |= IXGBE_FLAG2_VXLAN_REREG_NEEDED; + ixgbe_clear_udp_tunnel_port(adapter, port_mask); + adapter->flags2 |= IXGBE_FLAG2_UDP_TUN_REREG_NEEDED; } static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], @@ -9190,8 +9264,8 @@ static const struct net_device_ops ixgbe_netdev_ops = { .ndo_bridge_getlink = ixgbe_ndo_bridge_getlink, .ndo_dfwd_add_station = ixgbe_fwd_add, .ndo_dfwd_del_station = ixgbe_fwd_del, - .ndo_udp_tunnel_add = ixgbe_add_vxlan_port, - .ndo_udp_tunnel_del = ixgbe_del_vxlan_port, + .ndo_udp_tunnel_add = ixgbe_add_udp_tunnel_port, + .ndo_udp_tunnel_del = ixgbe_del_udp_tunnel_port, .ndo_features_check = ixgbe_features_check, }; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index 1248a9936f7a..5ff91b8d1ca7 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -487,6 +487,13 @@ struct ixgbe_thermal_sensor_data { #define IXGBE_FHFT_EXT(_n) (0x09800 + ((_n) * 0x100)) /* Ext Flexible Host * Filter Table */ +/* masks for accessing VXLAN and GENEVE UDP ports */ +#define IXGBE_VXLANCTRL_VXLAN_UDPPORT_MASK 0x0000ffff /* VXLAN port */ +#define IXGBE_VXLANCTRL_GENEVE_UDPPORT_MASK 0xffff0000 /* GENEVE port */ +#define IXGBE_VXLANCTRL_ALL_UDPPORT_MASK 0xffffffff /* GENEVE/VXLAN */ + +#define IXGBE_VXLANCTRL_GENEVE_UDPPORT_SHIFT 16 + #define IXGBE_FLEXIBLE_FILTER_COUNT_MAX 4 #define IXGBE_EXT_FLEXIBLE_FILTER_COUNT_MAX 2 From a0ad55a36e33905c7a79627f1f3448db1105210f Mon Sep 17 00:00:00 2001 From: Don Skidmore Date: Wed, 17 Aug 2016 14:11:57 -0400 Subject: [PATCH 0383/1715] ixgbe: Fix led interface for X557 devices The X557 devices use a different interface to the LED for the port. This patch reflect that change. Signed-off-by: Don Skidmore Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_type.h | 3 ++ drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 52 ++++++++++++++++++- 2 files changed, 53 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index 5ff91b8d1ca7..a59b65f7e40a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -1830,6 +1830,9 @@ enum { #define IXGBE_LED_IVRT(_i) IXGBE_LED_OFFSET(IXGBE_LED_IVRT_BASE, _i) #define IXGBE_LED_BLINK(_i) IXGBE_LED_OFFSET(IXGBE_LED_BLINK_BASE, _i) #define IXGBE_LED_MODE_MASK(_i) IXGBE_LED_OFFSET(IXGBE_LED_MODE_MASK_BASE, _i) +#define IXGBE_X557_LED_MANUAL_SET_MASK BIT(8) +#define IXGBE_X557_MAX_LED_INDEX 3 +#define IXGBE_X557_LED_PROVISIONING 0xC430 /* LED modes */ #define IXGBE_LED_LINK_UP 0x0 diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index 4716ca499e67..45ad6b10abe3 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -2114,6 +2114,50 @@ static s32 ixgbe_reset_phy_t_X550em(struct ixgbe_hw *hw) return ixgbe_enable_lasi_ext_t_x550em(hw); } +/** + * ixgbe_led_on_t_x550em - Turns on the software controllable LEDs. + * @hw: pointer to hardware structure + * @led_idx: led number to turn on + **/ +s32 ixgbe_led_on_t_x550em(struct ixgbe_hw *hw, u32 led_idx) +{ + u16 phy_data; + + if (led_idx >= IXGBE_X557_MAX_LED_INDEX) + return IXGBE_ERR_PARAM; + + /* To turn on the LED, set mode to ON. */ + hw->phy.ops.read_reg(hw, IXGBE_X557_LED_PROVISIONING + led_idx, + IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, &phy_data); + phy_data |= IXGBE_X557_LED_MANUAL_SET_MASK; + hw->phy.ops.write_reg(hw, IXGBE_X557_LED_PROVISIONING + led_idx, + IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, phy_data); + + return 0; +} + +/** + * ixgbe_led_off_t_x550em - Turns off the software controllable LEDs. + * @hw: pointer to hardware structure + * @led_idx: led number to turn off + **/ +s32 ixgbe_led_off_t_x550em(struct ixgbe_hw *hw, u32 led_idx) +{ + u16 phy_data; + + if (led_idx >= IXGBE_X557_MAX_LED_INDEX) + return IXGBE_ERR_PARAM; + + /* To turn on the LED, set mode to ON. */ + hw->phy.ops.read_reg(hw, IXGBE_X557_LED_PROVISIONING + led_idx, + IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, &phy_data); + phy_data &= ~IXGBE_X557_LED_MANUAL_SET_MASK; + hw->phy.ops.write_reg(hw, IXGBE_X557_LED_PROVISIONING + led_idx, + IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, phy_data); + + return 0; +} + /** ixgbe_get_lcd_x550em - Determine lowest common denominator * @hw: pointer to hardware structure * @lcd_speed: pointer to lowest common link speed @@ -2853,8 +2897,6 @@ static s32 ixgbe_write_phy_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr, .write_analog_reg8 = NULL, \ .set_rxpba = &ixgbe_set_rxpba_generic, \ .check_link = &ixgbe_check_mac_link_generic, \ - .led_on = &ixgbe_led_on_generic, \ - .led_off = &ixgbe_led_off_generic, \ .blink_led_start = &ixgbe_blink_led_start_X540, \ .blink_led_stop = &ixgbe_blink_led_stop_X540, \ .set_rar = &ixgbe_set_rar_generic, \ @@ -2886,6 +2928,8 @@ static s32 ixgbe_write_phy_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr, static const struct ixgbe_mac_operations mac_ops_X550 = { X550_COMMON_MAC + .led_on = ixgbe_led_on_generic, + .led_off = ixgbe_led_off_generic, .reset_hw = &ixgbe_reset_hw_X540, .get_media_type = &ixgbe_get_media_type_X540, .get_san_mac_addr = &ixgbe_get_san_mac_addr_generic, @@ -2904,6 +2948,8 @@ static const struct ixgbe_mac_operations mac_ops_X550 = { static const struct ixgbe_mac_operations mac_ops_X550EM_x = { X550_COMMON_MAC + .led_on = ixgbe_led_on_t_x550em, + .led_off = ixgbe_led_off_t_x550em, .reset_hw = &ixgbe_reset_hw_X550em, .get_media_type = &ixgbe_get_media_type_X550em, .get_san_mac_addr = NULL, @@ -2922,6 +2968,8 @@ static const struct ixgbe_mac_operations mac_ops_X550EM_x = { static struct ixgbe_mac_operations mac_ops_x550em_a = { X550_COMMON_MAC + .led_on = ixgbe_led_on_t_x550em, + .led_off = ixgbe_led_off_t_x550em, .reset_hw = ixgbe_reset_hw_X550em, .get_media_type = ixgbe_get_media_type_X550em, .get_san_mac_addr = NULL, From a83c27e79068cbaa2ce08d696b2150ebd49e8ffd Mon Sep 17 00:00:00 2001 From: Don Skidmore Date: Wed, 17 Aug 2016 17:34:07 -0400 Subject: [PATCH 0384/1715] ixgbe: add device to MDIO speed setting This shouldn't matter as nothing should be attached still to be consisted control MDIO speed for these devices as well. Signed-off-by: Don Skidmore Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index 45ad6b10abe3..a8030e026a76 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -2558,6 +2558,8 @@ static void ixgbe_set_mdio_speed(struct ixgbe_hw *hw) switch (hw->device_id) { case IXGBE_DEV_ID_X550EM_X_10G_T: + case IXGBE_DEV_ID_X550EM_A_SGMII: + case IXGBE_DEV_ID_X550EM_A_SGMII_L: case IXGBE_DEV_ID_X550EM_A_SFP: /* Config MDIO clock speed before the first MDIO PHY access */ hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0); From 92ed84300718de43fd7a92ebbd3dc1189c6dd091 Mon Sep 17 00:00:00 2001 From: Don Skidmore Date: Wed, 17 Aug 2016 20:34:40 -0400 Subject: [PATCH 0385/1715] ixgbe: Add support for new X557 device This patch adds support for the new copper device X557. Signed-off-by: Don Skidmore Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_common.c | 1 + drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 1 + drivers/net/ethernet/intel/ixgbe/ixgbe_type.h | 1 + drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 8 ++++++++ 4 files changed, 11 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c index b4217f30e89c..3095c24e80a9 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c @@ -99,6 +99,7 @@ bool ixgbe_device_supports_autoneg_fc(struct ixgbe_hw *hw) case IXGBE_DEV_ID_X550T: case IXGBE_DEV_ID_X550T1: case IXGBE_DEV_ID_X550EM_X_10G_T: + case IXGBE_DEV_ID_X550EM_A_10G_T: supported = true; break; default: diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 5b8819c56d53..d76bc1a313ea 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -137,6 +137,7 @@ static const struct pci_device_id ixgbe_pci_tbl[] = { {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_SFP_N), board_x550em_a }, {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_SGMII), board_x550em_a }, {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_SGMII_L), board_x550em_a }, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_10G_T), board_x550em_a}, {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_SFP), board_x550em_a }, /* required last entry */ {0, } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index a59b65f7e40a..31d82e3abac8 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -90,6 +90,7 @@ #define IXGBE_DEV_ID_X550EM_A_SFP_N 0x15C4 #define IXGBE_DEV_ID_X550EM_A_SGMII 0x15C6 #define IXGBE_DEV_ID_X550EM_A_SGMII_L 0x15C7 +#define IXGBE_DEV_ID_X550EM_A_10G_T 0x15C8 #define IXGBE_DEV_ID_X550EM_A_SFP 0x15CE /* VF Device IDs */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index a8030e026a76..fb1b819d8311 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -295,6 +295,12 @@ static s32 ixgbe_identify_phy_x550em(struct ixgbe_hw *hw) case IXGBE_DEV_ID_X550EM_A_KR_L: hw->phy.type = ixgbe_phy_x550em_kr; break; + case IXGBE_DEV_ID_X550EM_A_10G_T: + if (hw->bus.lan_id) + hw->phy.phy_semaphore_mask = IXGBE_GSSR_PHY1_SM; + else + hw->phy.phy_semaphore_mask = IXGBE_GSSR_PHY0_SM; + /* Fallthrough */ case IXGBE_DEV_ID_X550EM_X_1G_T: case IXGBE_DEV_ID_X550EM_X_10G_T: return ixgbe_identify_phy_generic(hw); @@ -2500,6 +2506,7 @@ static enum ixgbe_media_type ixgbe_get_media_type_X550em(struct ixgbe_hw *hw) break; case IXGBE_DEV_ID_X550EM_X_1G_T: case IXGBE_DEV_ID_X550EM_X_10G_T: + case IXGBE_DEV_ID_X550EM_A_10G_T: media_type = ixgbe_media_type_copper; break; default: @@ -2560,6 +2567,7 @@ static void ixgbe_set_mdio_speed(struct ixgbe_hw *hw) case IXGBE_DEV_ID_X550EM_X_10G_T: case IXGBE_DEV_ID_X550EM_A_SGMII: case IXGBE_DEV_ID_X550EM_A_SGMII_L: + case IXGBE_DEV_ID_X550EM_A_10G_T: case IXGBE_DEV_ID_X550EM_A_SFP: /* Config MDIO clock speed before the first MDIO PHY access */ hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0); From 0f635171e3adad749c2f374aeac97594aba24b52 Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Sat, 20 Aug 2016 00:52:18 +0200 Subject: [PATCH 0386/1715] net: ethernet: renesas: ravb: use phydev from struct net_device The private structure contain a pointer to phydev, but the structure net_device already contain such pointer. So we can remove the pointer phy_dev in the private structure, and update the driver to use the one contained in struct net_device. Signed-off-by: Philippe Reynes Acked-by: Sergei Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/ravb.h | 1 - drivers/net/ethernet/renesas/ravb_main.c | 29 ++++++++++-------------- 2 files changed, 12 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h index 4e5d5e953e15..f1109661a533 100644 --- a/drivers/net/ethernet/renesas/ravb.h +++ b/drivers/net/ethernet/renesas/ravb.h @@ -1011,7 +1011,6 @@ struct ravb_private { struct work_struct work; /* MII transceiver section. */ struct mii_bus *mii_bus; /* MDIO bus control */ - struct phy_device *phydev; /* PHY device control */ int link; phy_interface_t phy_interface; int msg_enable; diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index d4809adeb048..d21c9a404ce6 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -942,7 +942,7 @@ out: static void ravb_adjust_link(struct net_device *ndev) { struct ravb_private *priv = netdev_priv(ndev); - struct phy_device *phydev = priv->phydev; + struct phy_device *phydev = ndev->phydev; bool new_state = false; if (phydev->link) { @@ -1032,22 +1032,19 @@ static int ravb_phy_init(struct net_device *ndev) phy_attached_info(phydev); - priv->phydev = phydev; - return 0; } /* PHY control start function */ static int ravb_phy_start(struct net_device *ndev) { - struct ravb_private *priv = netdev_priv(ndev); int error; error = ravb_phy_init(ndev); if (error) return error; - phy_start(priv->phydev); + phy_start(ndev->phydev); return 0; } @@ -1058,9 +1055,9 @@ static int ravb_get_settings(struct net_device *ndev, struct ethtool_cmd *ecmd) int error = -ENODEV; unsigned long flags; - if (priv->phydev) { + if (ndev->phydev) { spin_lock_irqsave(&priv->lock, flags); - error = phy_ethtool_gset(priv->phydev, ecmd); + error = phy_ethtool_gset(ndev->phydev, ecmd); spin_unlock_irqrestore(&priv->lock, flags); } @@ -1073,7 +1070,7 @@ static int ravb_set_settings(struct net_device *ndev, struct ethtool_cmd *ecmd) unsigned long flags; int error; - if (!priv->phydev) + if (!ndev->phydev) return -ENODEV; spin_lock_irqsave(&priv->lock, flags); @@ -1081,7 +1078,7 @@ static int ravb_set_settings(struct net_device *ndev, struct ethtool_cmd *ecmd) /* Disable TX and RX */ ravb_rcv_snd_disable(ndev); - error = phy_ethtool_sset(priv->phydev, ecmd); + error = phy_ethtool_sset(ndev->phydev, ecmd); if (error) goto error_exit; @@ -1110,9 +1107,9 @@ static int ravb_nway_reset(struct net_device *ndev) int error = -ENODEV; unsigned long flags; - if (priv->phydev) { + if (ndev->phydev) { spin_lock_irqsave(&priv->lock, flags); - error = phy_start_aneg(priv->phydev); + error = phy_start_aneg(ndev->phydev); spin_unlock_irqrestore(&priv->lock, flags); } @@ -1661,10 +1658,9 @@ static int ravb_close(struct net_device *ndev) } /* PHY disconnect */ - if (priv->phydev) { - phy_stop(priv->phydev); - phy_disconnect(priv->phydev); - priv->phydev = NULL; + if (ndev->phydev) { + phy_stop(ndev->phydev); + phy_disconnect(ndev->phydev); } if (priv->chip_id != RCAR_GEN2) { @@ -1753,8 +1749,7 @@ static int ravb_hwtstamp_set(struct net_device *ndev, struct ifreq *req) /* ioctl to device function */ static int ravb_do_ioctl(struct net_device *ndev, struct ifreq *req, int cmd) { - struct ravb_private *priv = netdev_priv(ndev); - struct phy_device *phydev = priv->phydev; + struct phy_device *phydev = ndev->phydev; if (!netif_running(ndev)) return -EINVAL; From 04462f2aed3960c7c1589117ffe72362a9065490 Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Sat, 20 Aug 2016 00:52:19 +0200 Subject: [PATCH 0387/1715] net: ethernet: renesas: ravb: use new api ethtool_{get|set}_link_ksettings The ethtool api {get|set}_settings is deprecated. We move this driver to new api {get|set}_link_ksettings. Signed-off-by: Philippe Reynes Acked-by: Sergei Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/ravb_main.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index d21c9a404ce6..cad23ba06904 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1049,7 +1049,8 @@ static int ravb_phy_start(struct net_device *ndev) return 0; } -static int ravb_get_settings(struct net_device *ndev, struct ethtool_cmd *ecmd) +static int ravb_get_link_ksettings(struct net_device *ndev, + struct ethtool_link_ksettings *cmd) { struct ravb_private *priv = netdev_priv(ndev); int error = -ENODEV; @@ -1057,14 +1058,15 @@ static int ravb_get_settings(struct net_device *ndev, struct ethtool_cmd *ecmd) if (ndev->phydev) { spin_lock_irqsave(&priv->lock, flags); - error = phy_ethtool_gset(ndev->phydev, ecmd); + error = phy_ethtool_ksettings_get(ndev->phydev, cmd); spin_unlock_irqrestore(&priv->lock, flags); } return error; } -static int ravb_set_settings(struct net_device *ndev, struct ethtool_cmd *ecmd) +static int ravb_set_link_ksettings(struct net_device *ndev, + const struct ethtool_link_ksettings *cmd) { struct ravb_private *priv = netdev_priv(ndev); unsigned long flags; @@ -1078,11 +1080,11 @@ static int ravb_set_settings(struct net_device *ndev, struct ethtool_cmd *ecmd) /* Disable TX and RX */ ravb_rcv_snd_disable(ndev); - error = phy_ethtool_sset(ndev->phydev, ecmd); + error = phy_ethtool_ksettings_set(ndev->phydev, cmd); if (error) goto error_exit; - if (ecmd->duplex == DUPLEX_FULL) + if (cmd->base.duplex == DUPLEX_FULL) priv->duplex = 1; else priv->duplex = 0; @@ -1306,8 +1308,6 @@ static int ravb_get_ts_info(struct net_device *ndev, } static const struct ethtool_ops ravb_ethtool_ops = { - .get_settings = ravb_get_settings, - .set_settings = ravb_set_settings, .nway_reset = ravb_nway_reset, .get_msglevel = ravb_get_msglevel, .set_msglevel = ravb_set_msglevel, @@ -1318,6 +1318,8 @@ static const struct ethtool_ops ravb_ethtool_ops = { .get_ringparam = ravb_get_ringparam, .set_ringparam = ravb_set_ringparam, .get_ts_info = ravb_get_ts_info, + .get_link_ksettings = ravb_get_link_ksettings, + .set_link_ksettings = ravb_set_link_ksettings, }; static inline int ravb_hook_irq(unsigned int irq, irq_handler_t handler, From 5ab1fe72d5490978104fc493615ea29dd7238766 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Sat, 20 Aug 2016 23:52:27 +0800 Subject: [PATCH 0388/1715] l2tp: Refactor the codes with existing macros instead of literal number Use PPP_ALLSTATIONS, PPP_UI, and SEND_SHUTDOWN instead of 0xff, 0x03, and 2 separately. Signed-off-by: Gao Feng Signed-off-by: David S. Miller --- net/l2tp/l2tp_ppp.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index d9560aa2dba3..65e2fd657bea 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -177,7 +177,7 @@ static int pppol2tp_recv_payload_hook(struct sk_buff *skb) if (!pskb_may_pull(skb, 2)) return 1; - if ((skb->data[0] == 0xff) && (skb->data[1] == 0x03)) + if ((skb->data[0] == PPP_ALLSTATIONS) && (skb->data[1] == PPP_UI)) skb_pull(skb, 2); return 0; @@ -282,7 +282,7 @@ static void pppol2tp_session_sock_put(struct l2tp_session *session) static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) { - static const unsigned char ppph[2] = { 0xff, 0x03 }; + static const unsigned char ppph[2] = {PPP_ALLSTATIONS, PPP_UI}; struct sock *sk = sock->sk; struct sk_buff *skb; int error; @@ -369,7 +369,7 @@ error: */ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) { - static const u8 ppph[2] = { 0xff, 0x03 }; + static const u8 ppph[2] = {PPP_ALLSTATIONS, PPP_UI}; struct sock *sk = (struct sock *) chan->private; struct sock *sk_tun; struct l2tp_session *session; @@ -440,7 +440,7 @@ static void pppol2tp_session_close(struct l2tp_session *session) BUG_ON(session->magic != L2TP_SESSION_MAGIC); if (sock) { - inet_shutdown(sock, 2); + inet_shutdown(sock, SEND_SHUTDOWN); /* Don't let the session go away before our socket does */ l2tp_session_inc_refcount(session); } From 8c14846df3998cfc2ffe3e7e5624da2e76b1eccb Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 20 Aug 2016 15:32:41 +0000 Subject: [PATCH 0389/1715] cxgb4: Simplify the return expression Simplify the return expression. Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 85e30f19e97a..0099a0cd53ea 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -2932,7 +2932,6 @@ EXPORT_SYMBOL(cxgb4_create_server_filter); int cxgb4_remove_server_filter(const struct net_device *dev, unsigned int stid, unsigned int queue, bool ipv6) { - int ret; struct filter_entry *f; struct adapter *adap; @@ -2946,11 +2945,7 @@ int cxgb4_remove_server_filter(const struct net_device *dev, unsigned int stid, /* Unlock the filter */ f->locked = 0; - ret = delete_filter(adap, stid); - if (ret) - return ret; - - return 0; + return delete_filter(adap, stid); } EXPORT_SYMBOL(cxgb4_remove_server_filter); From c1346a7e70b5be7f01cc1f64a7e3aefb80d48ad7 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 21 Aug 2016 15:50:11 -0700 Subject: [PATCH 0390/1715] Revert "l2tp: Refactor the codes with existing macros instead of literal number" This reverts commit 5ab1fe72d5490978104fc493615ea29dd7238766. This change still has problems. Signed-off-by: David S. Miller --- net/l2tp/l2tp_ppp.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 65e2fd657bea..d9560aa2dba3 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -177,7 +177,7 @@ static int pppol2tp_recv_payload_hook(struct sk_buff *skb) if (!pskb_may_pull(skb, 2)) return 1; - if ((skb->data[0] == PPP_ALLSTATIONS) && (skb->data[1] == PPP_UI)) + if ((skb->data[0] == 0xff) && (skb->data[1] == 0x03)) skb_pull(skb, 2); return 0; @@ -282,7 +282,7 @@ static void pppol2tp_session_sock_put(struct l2tp_session *session) static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) { - static const unsigned char ppph[2] = {PPP_ALLSTATIONS, PPP_UI}; + static const unsigned char ppph[2] = { 0xff, 0x03 }; struct sock *sk = sock->sk; struct sk_buff *skb; int error; @@ -369,7 +369,7 @@ error: */ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) { - static const u8 ppph[2] = {PPP_ALLSTATIONS, PPP_UI}; + static const u8 ppph[2] = { 0xff, 0x03 }; struct sock *sk = (struct sock *) chan->private; struct sock *sk_tun; struct l2tp_session *session; @@ -440,7 +440,7 @@ static void pppol2tp_session_close(struct l2tp_session *session) BUG_ON(session->magic != L2TP_SESSION_MAGIC); if (sock) { - inet_shutdown(sock, SEND_SHUTDOWN); + inet_shutdown(sock, 2); /* Don't let the session go away before our socket does */ l2tp_session_inc_refcount(session); } From 3d2f30a1df907e3ef4175121f0d21456630a72aa Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 18 Aug 2016 01:46:06 +0200 Subject: [PATCH 0391/1715] netfilter: nf_tables: add quota expression This patch adds the quota expression. This new stateful expression integrate easily into the dynset expression to build 'hashquota' flow tables. Arguably, we could use instead "counter bytes > 1000" instead, but this approach has several problems: 1) We only support for one single stateful expression in dynamic set definitions, and the expression above is a composite of two expressions: get counter + comparison. 2) We would need to restore the packed counter representation (that we used to have) based on seqlock to synchronize this, since per-cpu is not suitable for this. So instead of bloating the counter expression back with the seqlock representation and extending the existing set infrastructure to make it more complex for the composite described above, let's follow the more simple approach of adding a quota expression that we can plug into our existing infrastructure. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 19 ++++ net/netfilter/Kconfig | 6 ++ net/netfilter/Makefile | 1 + net/netfilter/nft_quota.c | 121 +++++++++++++++++++++++ 4 files changed, 147 insertions(+) create mode 100644 net/netfilter/nft_quota.c diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 6ce0a6dd0889..784fbf15ab3d 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -900,6 +900,25 @@ enum nft_queue_attributes { #define NFT_QUEUE_FLAG_CPU_FANOUT 0x02 /* use current CPU (no hashing) */ #define NFT_QUEUE_FLAG_MASK 0x03 +enum nft_quota_flags { + NFT_QUOTA_F_INV = (1 << 0), +}; + +/** + * enum nft_quota_attributes - nf_tables quota expression netlink attributes + * + * @NFTA_QUOTA_BYTES: quota in bytes (NLA_U16) + * @NFTA_QUOTA_FLAGS: flags (NLA_U32) + */ +enum nft_quota_attributes { + NFTA_QUOTA_UNSPEC, + NFTA_QUOTA_BYTES, + NFTA_QUOTA_FLAGS, + NFTA_QUOTA_PAD, + __NFTA_QUOTA_MAX +}; +#define NFTA_QUOTA_MAX (__NFTA_QUOTA_MAX - 1) + /** * enum nft_reject_types - nf_tables reject expression reject types * diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 9cfaa00c79b2..29a8078deafa 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -542,6 +542,12 @@ config NFT_QUEUE This is required if you intend to use the userspace queueing infrastructure (also known as NFQUEUE) from nftables. +config NFT_QUOTA + tristate "Netfilter nf_tables quota module" + help + This option adds the "quota" expression that you can use to match + enforce bytes quotas. + config NFT_REJECT default m if NETFILTER_ADVANCED=n tristate "Netfilter nf_tables reject support" diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 1106ccde215c..0fc42df19b8c 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -84,6 +84,7 @@ obj-$(CONFIG_NFT_CT) += nft_ct.o obj-$(CONFIG_NFT_LIMIT) += nft_limit.o obj-$(CONFIG_NFT_NAT) += nft_nat.o obj-$(CONFIG_NFT_QUEUE) += nft_queue.o +obj-$(CONFIG_NFT_QUOTA) += nft_quota.o obj-$(CONFIG_NFT_REJECT) += nft_reject.o obj-$(CONFIG_NFT_REJECT_INET) += nft_reject_inet.o obj-$(CONFIG_NFT_SET_RBTREE) += nft_set_rbtree.o diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c new file mode 100644 index 000000000000..6eafbf987ed9 --- /dev/null +++ b/net/netfilter/nft_quota.c @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2016 Pablo Neira Ayuso + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_quota { + u64 quota; + bool invert; + atomic64_t remain; +}; + +static inline long nft_quota(struct nft_quota *priv, + const struct nft_pktinfo *pkt) +{ + return atomic64_sub_return(pkt->skb->len, &priv->remain); +} + +static void nft_quota_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_quota *priv = nft_expr_priv(expr); + + if (nft_quota(priv, pkt) < 0 && !priv->invert) + regs->verdict.code = NFT_BREAK; +} + +static const struct nla_policy nft_quota_policy[NFTA_QUOTA_MAX + 1] = { + [NFTA_QUOTA_BYTES] = { .type = NLA_U64 }, + [NFTA_QUOTA_FLAGS] = { .type = NLA_U32 }, +}; + +static int nft_quota_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_quota *priv = nft_expr_priv(expr); + u32 flags = 0; + u64 quota; + + if (!tb[NFTA_QUOTA_BYTES]) + return -EINVAL; + + quota = be64_to_cpu(nla_get_be64(tb[NFTA_QUOTA_BYTES])); + if (quota > S64_MAX) + return -EOVERFLOW; + + if (tb[NFTA_QUOTA_FLAGS]) { + flags = ntohl(nla_get_be32(tb[NFTA_QUOTA_FLAGS])); + if (flags & ~NFT_QUOTA_F_INV) + return -EINVAL; + } + + priv->quota = quota; + priv->invert = (flags & NFT_QUOTA_F_INV) ? true : false; + atomic64_set(&priv->remain, quota); + + return 0; +} + +static int nft_quota_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_quota *priv = nft_expr_priv(expr); + u32 flags = priv->invert ? NFT_QUOTA_F_INV : 0; + + if (nla_put_be64(skb, NFTA_QUOTA_BYTES, cpu_to_be64(priv->quota), + NFTA_QUOTA_PAD) || + nla_put_be32(skb, NFTA_QUOTA_FLAGS, htonl(flags))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_quota_type; +static const struct nft_expr_ops nft_quota_ops = { + .type = &nft_quota_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_quota)), + .eval = nft_quota_eval, + .init = nft_quota_init, + .dump = nft_quota_dump, +}; + +static struct nft_expr_type nft_quota_type __read_mostly = { + .name = "quota", + .ops = &nft_quota_ops, + .policy = nft_quota_policy, + .maxattr = NFTA_QUOTA_MAX, + .flags = NFT_EXPR_STATEFUL, + .owner = THIS_MODULE, +}; + +static int __init nft_quota_module_init(void) +{ + return nft_register_expr(&nft_quota_type); +} + +static void __exit nft_quota_module_exit(void) +{ + nft_unregister_expr(&nft_quota_type); +} + +module_init(nft_quota_module_init); +module_exit(nft_quota_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso "); +MODULE_ALIAS_NFT_EXPR("quota"); From 91dbc6be0a62d3bcea98287734d593610aed507d Mon Sep 17 00:00:00 2001 From: Laura Garcia Liebana Date: Thu, 18 Aug 2016 12:13:13 +0200 Subject: [PATCH 0392/1715] netfilter: nf_tables: add number generator expression This patch adds the numgen expression that allows us to generated incremental and random numbers, this generator is bound to a upper limit that is specified by userspace. This expression is useful to distribute packets in a round-robin fashion as well as randomly. Signed-off-by: Laura Garcia Liebana Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 24 +++ net/netfilter/Kconfig | 6 + net/netfilter/Makefile | 1 + net/netfilter/nft_numgen.c | 192 +++++++++++++++++++++++ 4 files changed, 223 insertions(+) create mode 100644 net/netfilter/nft_numgen.c diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 784fbf15ab3d..8c9d6ff70ec0 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1121,4 +1121,28 @@ enum nft_trace_types { __NFT_TRACETYPE_MAX }; #define NFT_TRACETYPE_MAX (__NFT_TRACETYPE_MAX - 1) + +/** + * enum nft_ng_attributes - nf_tables number generator expression netlink attributes + * + * @NFTA_NG_DREG: destination register (NLA_U32) + * @NFTA_NG_UNTIL: source value to increment the counter until reset (NLA_U32) + * @NFTA_NG_TYPE: operation type (NLA_U32) + */ +enum nft_ng_attributes { + NFTA_NG_UNSPEC, + NFTA_NG_DREG, + NFTA_NG_UNTIL, + NFTA_NG_TYPE, + __NFTA_NG_MAX +}; +#define NFTA_NG_MAX (__NFTA_NG_MAX - 1) + +enum nft_ng_types { + NFT_NG_INCREMENTAL, + NFT_NG_RANDOM, + __NFT_NG_MAX +}; +#define NFT_NG_MAX (__NFT_NG_MAX - 1) + #endif /* _LINUX_NF_TABLES_H */ diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 29a8078deafa..e8d56d9a4df2 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -474,6 +474,12 @@ config NFT_META This option adds the "meta" expression that you can use to match and to set packet metainformation such as the packet mark. +config NFT_NUMGEN + tristate "Netfilter nf_tables number generator module" + help + This option adds the number generator expression used to perform + incremental counting and random numbers bound to a upper limit. + config NFT_CT depends on NF_CONNTRACK tristate "Netfilter nf_tables conntrack module" diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 0fc42df19b8c..0c8581100ac6 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -80,6 +80,7 @@ obj-$(CONFIG_NF_TABLES_NETDEV) += nf_tables_netdev.o obj-$(CONFIG_NFT_COMPAT) += nft_compat.o obj-$(CONFIG_NFT_EXTHDR) += nft_exthdr.o obj-$(CONFIG_NFT_META) += nft_meta.o +obj-$(CONFIG_NFT_NUMGEN) += nft_numgen.o obj-$(CONFIG_NFT_CT) += nft_ct.o obj-$(CONFIG_NFT_LIMIT) += nft_limit.o obj-$(CONFIG_NFT_NAT) += nft_nat.o diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c new file mode 100644 index 000000000000..176e26d5bbd0 --- /dev/null +++ b/net/netfilter/nft_numgen.c @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2016 Laura Garcia + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static DEFINE_PER_CPU(struct rnd_state, nft_numgen_prandom_state); + +struct nft_ng_inc { + enum nft_registers dreg:8; + u32 until; + atomic_t counter; +}; + +static void nft_ng_inc_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_ng_inc *priv = nft_expr_priv(expr); + u32 nval, oval; + + do { + oval = atomic_read(&priv->counter); + nval = (oval + 1 < priv->until) ? oval + 1 : 0; + } while (atomic_cmpxchg(&priv->counter, oval, nval) != oval); + + memcpy(®s->data[priv->dreg], &priv->counter, sizeof(u32)); +} + +static const struct nla_policy nft_ng_policy[NFTA_NG_MAX + 1] = { + [NFTA_NG_DREG] = { .type = NLA_U32 }, + [NFTA_NG_UNTIL] = { .type = NLA_U32 }, + [NFTA_NG_TYPE] = { .type = NLA_U32 }, +}; + +static int nft_ng_inc_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_ng_inc *priv = nft_expr_priv(expr); + + priv->until = ntohl(nla_get_be32(tb[NFTA_NG_UNTIL])); + if (priv->until == 0) + return -ERANGE; + + priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]); + atomic_set(&priv->counter, 0); + + return nft_validate_register_store(ctx, priv->dreg, NULL, + NFT_DATA_VALUE, sizeof(u32)); +} + +static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg, + u32 until, enum nft_ng_types type) +{ + if (nft_dump_register(skb, NFTA_NG_DREG, dreg)) + goto nla_put_failure; + if (nft_dump_register(skb, NFTA_NG_UNTIL, until)) + goto nla_put_failure; + if (nft_dump_register(skb, NFTA_NG_TYPE, type)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + +static int nft_ng_inc_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_ng_inc *priv = nft_expr_priv(expr); + + return nft_ng_dump(skb, priv->dreg, priv->until, NFT_NG_INCREMENTAL); +} + +struct nft_ng_random { + enum nft_registers dreg:8; + u32 until; +}; + +static void nft_ng_random_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_ng_random *priv = nft_expr_priv(expr); + struct rnd_state *state = this_cpu_ptr(&nft_numgen_prandom_state); + + regs->data[priv->dreg] = reciprocal_scale(prandom_u32_state(state), + priv->until); +} + +static int nft_ng_random_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_ng_random *priv = nft_expr_priv(expr); + + priv->until = ntohl(nla_get_be32(tb[NFTA_NG_UNTIL])); + if (priv->until == 0) + return -ERANGE; + + prandom_init_once(&nft_numgen_prandom_state); + + priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]); + + return nft_validate_register_store(ctx, priv->dreg, NULL, + NFT_DATA_VALUE, sizeof(u32)); +} + +static int nft_ng_random_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_ng_random *priv = nft_expr_priv(expr); + + return nft_ng_dump(skb, priv->dreg, priv->until, NFT_NG_RANDOM); +} + +static struct nft_expr_type nft_ng_type; +static const struct nft_expr_ops nft_ng_inc_ops = { + .type = &nft_ng_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_ng_inc)), + .eval = nft_ng_inc_eval, + .init = nft_ng_inc_init, + .dump = nft_ng_inc_dump, +}; + +static const struct nft_expr_ops nft_ng_random_ops = { + .type = &nft_ng_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_ng_random)), + .eval = nft_ng_random_eval, + .init = nft_ng_random_init, + .dump = nft_ng_random_dump, +}; + +static const struct nft_expr_ops * +nft_ng_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) +{ + u32 type; + + if (!tb[NFTA_NG_DREG] || + !tb[NFTA_NG_UNTIL] || + !tb[NFTA_NG_TYPE]) + return ERR_PTR(-EINVAL); + + type = ntohl(nla_get_be32(tb[NFTA_NG_TYPE])); + + switch (type) { + case NFT_NG_INCREMENTAL: + return &nft_ng_inc_ops; + case NFT_NG_RANDOM: + return &nft_ng_random_ops; + } + + return ERR_PTR(-EINVAL); +} + +static struct nft_expr_type nft_ng_type __read_mostly = { + .name = "numgen", + .select_ops = &nft_ng_select_ops, + .policy = nft_ng_policy, + .maxattr = NFTA_NG_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_ng_module_init(void) +{ + return nft_register_expr(&nft_ng_type); +} + +static void __exit nft_ng_module_exit(void) +{ + nft_unregister_expr(&nft_ng_type); +} + +module_init(nft_ng_module_init); +module_exit(nft_ng_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Laura Garcia "); +MODULE_ALIAS_NFT_EXPR("numgen"); From 8d6c0eaa9ee9ea7ef0402c7cf47b288f0de0cd91 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 18 Aug 2016 16:47:57 +0100 Subject: [PATCH 0393/1715] netfilter: fix spelling mistake: "delimitter" -> "delimiter" trivial fix to spelling mistake in pr_debug message Signed-off-by: Colin Ian King Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_ftp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 43147005bea3..b6934b5edf7a 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -237,7 +237,7 @@ static int try_eprt(const char *data, size_t dlen, struct nf_conntrack_man *cmd, } delim = data[0]; if (isdigit(delim) || delim < 33 || delim > 126 || data[2] != delim) { - pr_debug("try_eprt: invalid delimitter.\n"); + pr_debug("try_eprt: invalid delimiter.\n"); return 0; } From a5e57336451d1ad75c24b9f7e60825f12f117ee0 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 21 Aug 2016 15:21:10 +0000 Subject: [PATCH 0394/1715] netfilter: nft_hash: fix non static symbol warning Fixes the following sparse warning: net/netfilter/nft_hash.c:40:25: warning: symbol 'nft_hash_policy' was not declared. Should it be static? Signed-off-by: Wei Yongjun Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_hash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index b82ff29b3f5f..e090aeef3a78 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -37,7 +37,7 @@ static void nft_hash_eval(const struct nft_expr *expr, priv->modulus); } -const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = { +static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = { [NFTA_HASH_SREG] = { .type = NLA_U32 }, [NFTA_HASH_DREG] = { .type = NLA_U32 }, [NFTA_HASH_LEN] = { .type = NLA_U32 }, From b9a24bb76bf611a5268ceffe04219e6ad264559b Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Fri, 19 Aug 2016 12:36:54 -0700 Subject: [PATCH 0395/1715] net_sched: properly handle failure case of tcf_exts_init() After commit 22dc13c837c3 ("net_sched: convert tcf_exts from list to pointer array") we do dynamic allocation in tcf_exts_init(), therefore we need to handle the ENOMEM case properly. Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Acked-by: Jamal Hadi Salim Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 6 ++- net/sched/cls_basic.c | 12 ++++-- net/sched/cls_bpf.c | 29 ++++++++----- net/sched/cls_cgroup.c | 13 ++++-- net/sched/cls_flow.c | 26 +++++++----- net/sched/cls_flower.c | 11 +++-- net/sched/cls_fw.c | 18 +++++++-- net/sched/cls_route.c | 14 +++++-- net/sched/cls_rsvp.h | 17 ++++++-- net/sched/cls_tcindex.c | 90 +++++++++++++++++++++++++++++------------ net/sched/cls_u32.c | 21 +++++++--- 11 files changed, 182 insertions(+), 75 deletions(-) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index c99508d426cc..a459be5fe1c2 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -69,17 +69,19 @@ struct tcf_exts { int police; }; -static inline void tcf_exts_init(struct tcf_exts *exts, int action, int police) +static inline int tcf_exts_init(struct tcf_exts *exts, int action, int police) { #ifdef CONFIG_NET_CLS_ACT exts->type = 0; exts->nr_actions = 0; exts->actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *), GFP_KERNEL); - WARN_ON(!exts->actions); /* TODO: propagate the error to callers */ + if (!exts->actions) + return -ENOMEM; #endif exts->action = action; exts->police = police; + return 0; } /** diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 0b8c3ace671f..eb219b78cd49 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -138,10 +138,12 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp, struct tcf_exts e; struct tcf_ematch_tree t; - tcf_exts_init(&e, TCA_BASIC_ACT, TCA_BASIC_POLICE); - err = tcf_exts_validate(net, tp, tb, est, &e, ovr); + err = tcf_exts_init(&e, TCA_BASIC_ACT, TCA_BASIC_POLICE); if (err < 0) return err; + err = tcf_exts_validate(net, tp, tb, est, &e, ovr); + if (err < 0) + goto errout; err = tcf_em_tree_validate(tp, tb[TCA_BASIC_EMATCHES], &t); if (err < 0) @@ -189,7 +191,10 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, if (!fnew) return -ENOBUFS; - tcf_exts_init(&fnew->exts, TCA_BASIC_ACT, TCA_BASIC_POLICE); + err = tcf_exts_init(&fnew->exts, TCA_BASIC_ACT, TCA_BASIC_POLICE); + if (err < 0) + goto errout; + err = -EINVAL; if (handle) { fnew->handle = handle; @@ -226,6 +231,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, return 0; errout: + tcf_exts_destroy(&fnew->exts); kfree(fnew); return err; } diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index c3002c2c68bb..4742f415ee5b 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -311,17 +311,19 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf)) return -EINVAL; - tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE); - ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr); + ret = tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE); if (ret < 0) return ret; + ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr); + if (ret < 0) + goto errout; if (tb[TCA_BPF_FLAGS]) { u32 bpf_flags = nla_get_u32(tb[TCA_BPF_FLAGS]); if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT) { - tcf_exts_destroy(&exts); - return -EINVAL; + ret = -EINVAL; + goto errout; } have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT; @@ -331,10 +333,8 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) : cls_bpf_prog_from_efd(tb, prog, tp); - if (ret < 0) { - tcf_exts_destroy(&exts); - return ret; - } + if (ret < 0) + goto errout; if (tb[TCA_BPF_CLASSID]) { prog->res.classid = nla_get_u32(tb[TCA_BPF_CLASSID]); @@ -343,6 +343,10 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, tcf_exts_change(tp, &prog->exts, &exts); return 0; + +errout: + tcf_exts_destroy(&exts); + return ret; } static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp, @@ -388,7 +392,9 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, if (!prog) return -ENOBUFS; - tcf_exts_init(&prog->exts, TCA_BPF_ACT, TCA_BPF_POLICE); + ret = tcf_exts_init(&prog->exts, TCA_BPF_ACT, TCA_BPF_POLICE); + if (ret < 0) + goto errout; if (oldprog) { if (handle && oldprog->handle != handle) { @@ -420,9 +426,10 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, *arg = (unsigned long) prog; return 0; -errout: - kfree(prog); +errout: + tcf_exts_destroy(&prog->exts); + kfree(prog); return ret; } diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 4c85bd3a750c..85233c470035 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -93,7 +93,9 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, if (!new) return -ENOBUFS; - tcf_exts_init(&new->exts, TCA_CGROUP_ACT, TCA_CGROUP_POLICE); + err = tcf_exts_init(&new->exts, TCA_CGROUP_ACT, TCA_CGROUP_POLICE); + if (err < 0) + goto errout; new->handle = handle; new->tp = tp; err = nla_parse_nested(tb, TCA_CGROUP_MAX, tca[TCA_OPTIONS], @@ -101,10 +103,14 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, if (err < 0) goto errout; - tcf_exts_init(&e, TCA_CGROUP_ACT, TCA_CGROUP_POLICE); - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr); + err = tcf_exts_init(&e, TCA_CGROUP_ACT, TCA_CGROUP_POLICE); if (err < 0) goto errout; + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr); + if (err < 0) { + tcf_exts_destroy(&e); + goto errout; + } err = tcf_em_tree_validate(tp, tb[TCA_CGROUP_EMATCHES], &t); if (err < 0) { @@ -120,6 +126,7 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, call_rcu(&head->rcu, cls_cgroup_destroy_rcu); return 0; errout: + tcf_exts_destroy(&new->exts); kfree(new); return err; } diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index fbfec6a18839..2c1ae549edbf 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -418,10 +418,12 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, return -EOPNOTSUPP; } - tcf_exts_init(&e, TCA_FLOW_ACT, TCA_FLOW_POLICE); + err = tcf_exts_init(&e, TCA_FLOW_ACT, TCA_FLOW_POLICE); + if (err < 0) + goto err1; err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr); if (err < 0) - return err; + goto err1; err = tcf_em_tree_validate(tp, tb[TCA_FLOW_EMATCHES], &t); if (err < 0) @@ -432,13 +434,15 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, if (!fnew) goto err2; - tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE); + err = tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE); + if (err < 0) + goto err3; fold = (struct flow_filter *)*arg; if (fold) { err = -EINVAL; if (fold->handle != handle && handle) - goto err2; + goto err3; /* Copy fold into fnew */ fnew->tp = fold->tp; @@ -458,31 +462,31 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, if (tb[TCA_FLOW_MODE]) mode = nla_get_u32(tb[TCA_FLOW_MODE]); if (mode != FLOW_MODE_HASH && nkeys > 1) - goto err2; + goto err3; if (mode == FLOW_MODE_HASH) perturb_period = fold->perturb_period; if (tb[TCA_FLOW_PERTURB]) { if (mode != FLOW_MODE_HASH) - goto err2; + goto err3; perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ; } } else { err = -EINVAL; if (!handle) - goto err2; + goto err3; if (!tb[TCA_FLOW_KEYS]) - goto err2; + goto err3; mode = FLOW_MODE_MAP; if (tb[TCA_FLOW_MODE]) mode = nla_get_u32(tb[TCA_FLOW_MODE]); if (mode != FLOW_MODE_HASH && nkeys > 1) - goto err2; + goto err3; if (tb[TCA_FLOW_PERTURB]) { if (mode != FLOW_MODE_HASH) - goto err2; + goto err3; perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ; } @@ -542,6 +546,8 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, call_rcu(&fold->rcu, flow_destroy_filter); return 0; +err3: + tcf_exts_destroy(&fnew->exts); err2: tcf_em_tree_destroy(&t); kfree(fnew); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 1e11e57e6947..532ab6751343 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -513,10 +513,12 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp, struct tcf_exts e; int err; - tcf_exts_init(&e, TCA_FLOWER_ACT, 0); - err = tcf_exts_validate(net, tp, tb, est, &e, ovr); + err = tcf_exts_init(&e, TCA_FLOWER_ACT, 0); if (err < 0) return err; + err = tcf_exts_validate(net, tp, tb, est, &e, ovr); + if (err < 0) + goto errout; if (tb[TCA_FLOWER_CLASSID]) { f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]); @@ -585,7 +587,9 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, if (!fnew) return -ENOBUFS; - tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0); + err = tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0); + if (err < 0) + goto errout; if (!handle) { handle = fl_grab_new_handle(tp, head); @@ -649,6 +653,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, return 0; errout: + tcf_exts_destroy(&fnew->exts); kfree(fnew); return err; } diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index f23a3b68bba6..cc0bda945800 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -195,10 +195,12 @@ fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f, u32 mask; int err; - tcf_exts_init(&e, TCA_FW_ACT, TCA_FW_POLICE); - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr); + err = tcf_exts_init(&e, TCA_FW_ACT, TCA_FW_POLICE); if (err < 0) return err; + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr); + if (err < 0) + goto errout; if (tb[TCA_FW_CLASSID]) { f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]); @@ -270,10 +272,15 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, #endif /* CONFIG_NET_CLS_IND */ fnew->tp = f->tp; - tcf_exts_init(&fnew->exts, TCA_FW_ACT, TCA_FW_POLICE); + err = tcf_exts_init(&fnew->exts, TCA_FW_ACT, TCA_FW_POLICE); + if (err < 0) { + kfree(fnew); + return err; + } err = fw_change_attrs(net, tp, fnew, tb, tca, base, ovr); if (err < 0) { + tcf_exts_destroy(&fnew->exts); kfree(fnew); return err; } @@ -313,7 +320,9 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, if (f == NULL) return -ENOBUFS; - tcf_exts_init(&f->exts, TCA_FW_ACT, TCA_FW_POLICE); + err = tcf_exts_init(&f->exts, TCA_FW_ACT, TCA_FW_POLICE); + if (err < 0) + goto errout; f->id = handle; f->tp = tp; @@ -328,6 +337,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, return 0; errout: + tcf_exts_destroy(&f->exts); kfree(f); return err; } diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 08a3b0a6f5ab..c91e65d81a48 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -383,17 +383,19 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, struct nlattr **tb, struct nlattr *est, int new, bool ovr) { - int err; u32 id = 0, to = 0, nhandle = 0x8000; struct route4_filter *fp; unsigned int h1; struct route4_bucket *b; struct tcf_exts e; + int err; - tcf_exts_init(&e, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE); - err = tcf_exts_validate(net, tp, tb, est, &e, ovr); + err = tcf_exts_init(&e, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE); if (err < 0) return err; + err = tcf_exts_validate(net, tp, tb, est, &e, ovr); + if (err < 0) + goto errout; err = -EINVAL; if (tb[TCA_ROUTE4_TO]) { @@ -503,7 +505,10 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, if (!f) goto errout; - tcf_exts_init(&f->exts, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE); + err = tcf_exts_init(&f->exts, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE); + if (err < 0) + goto errout; + if (fold) { f->id = fold->id; f->iif = fold->iif; @@ -557,6 +562,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, return 0; errout: + tcf_exts_destroy(&f->exts); kfree(f); return err; } diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index f9c9fc075fe6..4f05a19fb073 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -487,10 +487,12 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, if (err < 0) return err; - tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE); - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr); + err = tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE); if (err < 0) return err; + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr); + if (err < 0) + goto errout2; f = (struct rsvp_filter *)*arg; if (f) { @@ -506,7 +508,11 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, goto errout2; } - tcf_exts_init(&n->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE); + err = tcf_exts_init(&n->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE); + if (err < 0) { + kfree(n); + goto errout2; + } if (tb[TCA_RSVP_CLASSID]) { n->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]); @@ -530,7 +536,9 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, if (f == NULL) goto errout2; - tcf_exts_init(&f->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE); + err = tcf_exts_init(&f->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE); + if (err < 0) + goto errout; h2 = 16; if (tb[TCA_RSVP_SRC]) { memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src)); @@ -627,6 +635,7 @@ insert: goto insert; errout: + tcf_exts_destroy(&f->exts); kfree(f); errout2: tcf_exts_destroy(&e); diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 944c8ff45055..d9500709831f 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -219,10 +219,10 @@ static const struct nla_policy tcindex_policy[TCA_TCINDEX_MAX + 1] = { [TCA_TCINDEX_CLASSID] = { .type = NLA_U32 }, }; -static void tcindex_filter_result_init(struct tcindex_filter_result *r) +static int tcindex_filter_result_init(struct tcindex_filter_result *r) { memset(r, 0, sizeof(*r)); - tcf_exts_init(&r->exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); + return tcf_exts_init(&r->exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); } static void __tcindex_partial_destroy(struct rcu_head *head) @@ -233,23 +233,57 @@ static void __tcindex_partial_destroy(struct rcu_head *head) kfree(p); } +static void tcindex_free_perfect_hash(struct tcindex_data *cp) +{ + int i; + + for (i = 0; i < cp->hash; i++) + tcf_exts_destroy(&cp->perfect[i].exts); + kfree(cp->perfect); +} + +static int tcindex_alloc_perfect_hash(struct tcindex_data *cp) +{ + int i, err = 0; + + cp->perfect = kcalloc(cp->hash, sizeof(struct tcindex_filter_result), + GFP_KERNEL); + if (!cp->perfect) + return -ENOMEM; + + for (i = 0; i < cp->hash; i++) { + err = tcf_exts_init(&cp->perfect[i].exts, + TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); + if (err < 0) + goto errout; + } + + return 0; + +errout: + tcindex_free_perfect_hash(cp); + return err; +} + static int tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, u32 handle, struct tcindex_data *p, struct tcindex_filter_result *r, struct nlattr **tb, struct nlattr *est, bool ovr) { - int err, balloc = 0; struct tcindex_filter_result new_filter_result, *old_r = r; struct tcindex_filter_result cr; - struct tcindex_data *cp, *oldp; + struct tcindex_data *cp = NULL, *oldp; struct tcindex_filter *f = NULL; /* make gcc behave */ + int err, balloc = 0; struct tcf_exts e; - tcf_exts_init(&e, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); - err = tcf_exts_validate(net, tp, tb, est, &e, ovr); + err = tcf_exts_init(&e, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); if (err < 0) return err; + err = tcf_exts_validate(net, tp, tb, est, &e, ovr); + if (err < 0) + goto errout; err = -ENOMEM; /* tcindex_data attributes must look atomic to classifier/lookup so @@ -270,19 +304,20 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, if (p->perfect) { int i; - cp->perfect = kmemdup(p->perfect, - sizeof(*r) * cp->hash, GFP_KERNEL); - if (!cp->perfect) + if (tcindex_alloc_perfect_hash(cp) < 0) goto errout; for (i = 0; i < cp->hash; i++) - tcf_exts_init(&cp->perfect[i].exts, - TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); + cp->perfect[i].res = p->perfect[i].res; balloc = 1; } cp->h = p->h; - tcindex_filter_result_init(&new_filter_result); - tcindex_filter_result_init(&cr); + err = tcindex_filter_result_init(&new_filter_result); + if (err < 0) + goto errout1; + err = tcindex_filter_result_init(&cr); + if (err < 0) + goto errout1; if (old_r) cr.res = r->res; @@ -338,15 +373,8 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, err = -ENOMEM; if (!cp->perfect && !cp->h) { if (valid_perfect_hash(cp)) { - int i; - - cp->perfect = kcalloc(cp->hash, sizeof(*r), GFP_KERNEL); - if (!cp->perfect) + if (tcindex_alloc_perfect_hash(cp) < 0) goto errout_alloc; - for (i = 0; i < cp->hash; i++) - tcf_exts_init(&cp->perfect[i].exts, - TCA_TCINDEX_ACT, - TCA_TCINDEX_POLICE); balloc = 1; } else { struct tcindex_filter __rcu **hash; @@ -373,8 +401,12 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, if (!f) goto errout_alloc; f->key = handle; - tcindex_filter_result_init(&f->result); f->next = NULL; + err = tcindex_filter_result_init(&f->result); + if (err < 0) { + kfree(f); + goto errout_alloc; + } } if (tb[TCA_TCINDEX_CLASSID]) { @@ -387,8 +419,13 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, else tcf_exts_change(tp, &cr.exts, &e); - if (old_r && old_r != r) - tcindex_filter_result_init(old_r); + if (old_r && old_r != r) { + err = tcindex_filter_result_init(old_r); + if (err < 0) { + kfree(f); + goto errout_alloc; + } + } oldp = p; r->res = cr.res; @@ -415,9 +452,12 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, errout_alloc: if (balloc == 1) - kfree(cp->perfect); + tcindex_free_perfect_hash(cp); else if (balloc == 2) kfree(cp->h); +errout1: + tcf_exts_destroy(&cr.exts); + tcf_exts_destroy(&new_filter_result.exts); errout: kfree(cp); tcf_exts_destroy(&e); diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index ffe593efe930..a29263a9d8c1 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -709,13 +709,15 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp, struct tc_u_knode *n, struct nlattr **tb, struct nlattr *est, bool ovr) { - int err; struct tcf_exts e; + int err; - tcf_exts_init(&e, TCA_U32_ACT, TCA_U32_POLICE); - err = tcf_exts_validate(net, tp, tb, est, &e, ovr); + err = tcf_exts_init(&e, TCA_U32_ACT, TCA_U32_POLICE); if (err < 0) return err; + err = tcf_exts_validate(net, tp, tb, est, &e, ovr); + if (err < 0) + goto errout; err = -EINVAL; if (tb[TCA_U32_LINK]) { @@ -833,7 +835,10 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp, new->tp = tp; memcpy(&new->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key)); - tcf_exts_init(&new->exts, TCA_U32_ACT, TCA_U32_POLICE); + if (tcf_exts_init(&new->exts, TCA_U32_ACT, TCA_U32_POLICE)) { + kfree(new); + return NULL; + } return new; } @@ -985,9 +990,12 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, n->handle = handle; n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0; n->flags = flags; - tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE); n->tp = tp; + err = tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE); + if (err < 0) + goto errout; + #ifdef CONFIG_CLS_U32_MARK n->pcpu_success = alloc_percpu(u32); if (!n->pcpu_success) { @@ -1028,9 +1036,10 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, errhw: #ifdef CONFIG_CLS_U32_MARK free_percpu(n->pcpu_success); -errout: #endif +errout: + tcf_exts_destroy(&n->exts); #ifdef CONFIG_CLS_U32_PERF free_percpu(n->pf); #endif From 6f0b826da4a33e83501d6c1dc977afa0f5f7166f Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sat, 20 Aug 2016 07:50:09 +0200 Subject: [PATCH 0396/1715] mlx5/core: Use memdup_user() rather than duplicating its implementation * Reuse existing functionality from memdup_user() instead of keeping duplicate source code. This issue was detected by using the Coccinelle software. * Return directly if this copy operation failed. Signed-off-by: Markus Elfring Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index d6e2a1cae19a..bf682252130b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -1015,7 +1015,6 @@ static ssize_t data_write(struct file *filp, const char __user *buf, struct mlx5_core_dev *dev = filp->private_data; struct mlx5_cmd_debug *dbg = &dev->cmd.dbg; void *ptr; - int err; if (*pos != 0) return -EINVAL; @@ -1023,25 +1022,15 @@ static ssize_t data_write(struct file *filp, const char __user *buf, kfree(dbg->in_msg); dbg->in_msg = NULL; dbg->inlen = 0; - - ptr = kzalloc(count, GFP_KERNEL); - if (!ptr) - return -ENOMEM; - - if (copy_from_user(ptr, buf, count)) { - err = -EFAULT; - goto out; - } + ptr = memdup_user(buf, count); + if (IS_ERR(ptr)) + return PTR_ERR(ptr); dbg->in_msg = ptr; dbg->inlen = count; *pos = count; return count; - -out: - kfree(ptr); - return err; } static ssize_t data_read(struct file *filp, char __user *buf, size_t count, From 4d55d014c53d0d011d4cec85fa0c248a97e8eb06 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 21 Aug 2016 22:46:15 +0000 Subject: [PATCH 0397/1715] net: phy: Add missing of_node_put() in xgmiitorgmii_probe() This node pointer is returned by of_parse_phandle() with refcount incremented in this function. of_node_put() on it before exitting this function. This is detected by Coccinelle semantic patch. Signed-off-by: Wei Yongjun Reviewed-by: Kedareswara rao Appana Signed-off-by: David S. Miller --- drivers/net/phy/xilinx_gmii2rgmii.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/phy/xilinx_gmii2rgmii.c b/drivers/net/phy/xilinx_gmii2rgmii.c index e7a20ec2d8f7..f38be7094362 100644 --- a/drivers/net/phy/xilinx_gmii2rgmii.c +++ b/drivers/net/phy/xilinx_gmii2rgmii.c @@ -75,6 +75,7 @@ int xgmiitorgmii_probe(struct mdio_device *mdiodev) } priv->phy_dev = of_phy_find_device(phy_node); + of_node_put(phy_node); if (!priv->phy_dev) { dev_info(dev, "Couldn't find phydev\n"); return -EPROBE_DEFER; From a044df83e194c28697f6aae61851c3d6e34b8456 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Mon, 22 Aug 2016 13:25:09 +0300 Subject: [PATCH 0398/1715] qed: Add support for legacy VFs The 8.10.x FW added support for forward compatability as well as 'future' backward compatibility, but only to those VFs that were using HSI which was 8.10.x based or newer. The latest firmware now supports backward compatibility for the older VFs based on 8.7.x and 8.8.x firmware as well. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_l2.c | 15 +-- drivers/net/ethernet/qlogic/qed/qed_l2.h | 3 +- drivers/net/ethernet/qlogic/qed/qed_sriov.c | 109 +++++++++++++++++--- drivers/net/ethernet/qlogic/qed/qed_vf.h | 2 +- 4 files changed, 104 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index c823c46fd979..c04162dabfea 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -514,7 +514,8 @@ int qed_sp_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn, u8 stats_id, u16 bd_max_bytes, dma_addr_t bd_chain_phys_addr, - dma_addr_t cqe_pbl_addr, u16 cqe_pbl_size) + dma_addr_t cqe_pbl_addr, + u16 cqe_pbl_size, bool b_use_zone_a_prod) { struct rx_queue_start_ramrod_data *p_ramrod = NULL; struct qed_spq_entry *p_ent = NULL; @@ -571,11 +572,14 @@ int qed_sp_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn, p_ramrod->num_of_pbl_pages = cpu_to_le16(cqe_pbl_size); DMA_REGPAIR_LE(p_ramrod->cqe_pbl_addr, cqe_pbl_addr); - p_ramrod->vf_rx_prod_index = p_params->vf_qid; - if (p_params->vf_qid) + if (p_params->vf_qid || b_use_zone_a_prod) { + p_ramrod->vf_rx_prod_index = p_params->vf_qid; DP_VERBOSE(p_hwfn, QED_MSG_SP, - "Queue is meant for VF rxq[%04x]\n", + "Queue%s is meant for VF rxq[%02x]\n", + b_use_zone_a_prod ? " [legacy]" : "", p_params->vf_qid); + p_ramrod->vf_rx_prod_use_zone_a = b_use_zone_a_prod; + } return qed_spq_post(p_hwfn, p_ent, NULL); } @@ -637,8 +641,7 @@ qed_sp_eth_rx_queue_start(struct qed_hwfn *p_hwfn, abs_stats_id, bd_max_bytes, bd_chain_phys_addr, - cqe_pbl_addr, - cqe_pbl_size); + cqe_pbl_addr, cqe_pbl_size, false); if (rc) qed_sp_release_queue_cid(p_hwfn, p_rx_cid); diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.h b/drivers/net/ethernet/qlogic/qed/qed_l2.h index ff3a198d08d7..ea93519bcb7d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.h +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.h @@ -225,7 +225,8 @@ int qed_sp_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn, u8 stats_id, u16 bd_max_bytes, dma_addr_t bd_chain_phys_addr, - dma_addr_t cqe_pbl_addr, u16 cqe_pbl_size); + dma_addr_t cqe_pbl_addr, + u16 cqe_pbl_size, bool b_use_zone_a_prod); int qed_sp_eth_txq_start_ramrod(struct qed_hwfn *p_hwfn, u16 opaque_fid, diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c index 1579f339a5ef..f1fae770e041 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c @@ -60,7 +60,8 @@ static int qed_sp_vf_start(struct qed_hwfn *p_hwfn, struct qed_vf_info *p_vf) } fp_minor = p_vf->acquire.vfdev_info.eth_fp_hsi_minor; - if (fp_minor > ETH_HSI_VER_MINOR) { + if (fp_minor > ETH_HSI_VER_MINOR && + fp_minor != ETH_HSI_VER_NO_PKT_LEN_TUNN) { DP_VERBOSE(p_hwfn, QED_MSG_IOV, "VF [%d] - Requested fp hsi %02x.%02x which is slightly newer than PF's %02x.%02x; Configuring PFs version\n", @@ -1241,6 +1242,16 @@ static u8 qed_iov_vf_mbx_acquire_resc(struct qed_hwfn *p_hwfn, p_req->num_vlan_filters, p_resp->num_vlan_filters, p_req->num_mc_filters, p_resp->num_mc_filters); + + /* Some legacy OSes are incapable of correctly handling this + * failure. + */ + if ((p_vf->acquire.vfdev_info.eth_fp_hsi_minor == + ETH_HSI_VER_NO_PKT_LEN_TUNN) && + (p_vf->acquire.vfdev_info.os_type == + VFPF_ACQUIRE_OS_WINDOWS)) + return PFVF_STATUS_SUCCESS; + return PFVF_STATUS_NO_RESOURCE; } @@ -1287,16 +1298,35 @@ static void qed_iov_vf_mbx_acquire(struct qed_hwfn *p_hwfn, pfdev_info->major_fp_hsi = ETH_HSI_VER_MAJOR; pfdev_info->minor_fp_hsi = ETH_HSI_VER_MINOR; + if (vf->state != VF_FREE && vf->state != VF_STOPPED) { + DP_VERBOSE(p_hwfn, + QED_MSG_IOV, + "VF[%d] sent ACQUIRE but is already in state %d - fail request\n", + vf->abs_vf_id, vf->state); + goto out; + } + /* Validate FW compatibility */ if (req->vfdev_info.eth_fp_hsi_major != ETH_HSI_VER_MAJOR) { - DP_INFO(p_hwfn, - "VF[%d] needs fastpath HSI %02x.%02x, which is incompatible with loaded FW's faspath HSI %02x.%02x\n", - vf->abs_vf_id, - req->vfdev_info.eth_fp_hsi_major, - req->vfdev_info.eth_fp_hsi_minor, - ETH_HSI_VER_MAJOR, ETH_HSI_VER_MINOR); + if (req->vfdev_info.capabilities & + VFPF_ACQUIRE_CAP_PRE_FP_HSI) { + struct vf_pf_vfdev_info *p_vfdev = &req->vfdev_info; - goto out; + DP_VERBOSE(p_hwfn, QED_MSG_IOV, + "VF[%d] is pre-fastpath HSI\n", + vf->abs_vf_id); + p_vfdev->eth_fp_hsi_major = ETH_HSI_VER_MAJOR; + p_vfdev->eth_fp_hsi_minor = ETH_HSI_VER_NO_PKT_LEN_TUNN; + } else { + DP_INFO(p_hwfn, + "VF[%d] needs fastpath HSI %02x.%02x, which is incompatible with loaded FW's faspath HSI %02x.%02x\n", + vf->abs_vf_id, + req->vfdev_info.eth_fp_hsi_major, + req->vfdev_info.eth_fp_hsi_minor, + ETH_HSI_VER_MAJOR, ETH_HSI_VER_MINOR); + + goto out; + } } /* On 100g PFs, prevent old VFs from loading */ @@ -1335,6 +1365,10 @@ static void qed_iov_vf_mbx_acquire(struct qed_hwfn *p_hwfn, pfdev_info->fw_minor = FW_MINOR_VERSION; pfdev_info->fw_rev = FW_REVISION_VERSION; pfdev_info->fw_eng = FW_ENGINEERING_VERSION; + + /* Incorrect when legacy, but doesn't matter as legacy isn't reading + * this field. + */ pfdev_info->minor_fp_hsi = min_t(u8, ETH_HSI_VER_MINOR, req->vfdev_info.eth_fp_hsi_minor); pfdev_info->os_type = VFPF_ACQUIRE_OS_LINUX; @@ -1691,21 +1725,32 @@ static void qed_iov_vf_mbx_stop_vport(struct qed_hwfn *p_hwfn, static void qed_iov_vf_mbx_start_rxq_resp(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, - struct qed_vf_info *vf, u8 status) + struct qed_vf_info *vf, + u8 status, bool b_legacy) { struct qed_iov_vf_mbx *mbx = &vf->vf_mbx; struct pfvf_start_queue_resp_tlv *p_tlv; struct vfpf_start_rxq_tlv *req; + u16 length; mbx->offset = (u8 *)mbx->reply_virt; + /* Taking a bigger struct instead of adding a TLV to list was a + * mistake, but one which we're now stuck with, as some older + * clients assume the size of the previous response. + */ + if (!b_legacy) + length = sizeof(*p_tlv); + else + length = sizeof(struct pfvf_def_resp_tlv); + p_tlv = qed_add_tlv(p_hwfn, &mbx->offset, CHANNEL_TLV_START_RXQ, - sizeof(*p_tlv)); + length); qed_add_tlv(p_hwfn, &mbx->offset, CHANNEL_TLV_LIST_END, sizeof(struct channel_list_end_tlv)); /* Update the TLV with the response */ - if (status == PFVF_STATUS_SUCCESS) { + if ((status == PFVF_STATUS_SUCCESS) && !b_legacy) { req = &mbx->req_virt->start_rxq; p_tlv->offset = PXP_VF_BAR0_START_MSDM_ZONE_B + offsetof(struct mstorm_vf_zone, @@ -1713,7 +1758,7 @@ static void qed_iov_vf_mbx_start_rxq_resp(struct qed_hwfn *p_hwfn, sizeof(struct eth_rx_prod_data) * req->rx_qid; } - qed_iov_send_response(p_hwfn, p_ptt, vf, sizeof(*p_tlv), status); + qed_iov_send_response(p_hwfn, p_ptt, vf, length, status); } static void qed_iov_vf_mbx_start_rxq(struct qed_hwfn *p_hwfn, @@ -1724,6 +1769,7 @@ static void qed_iov_vf_mbx_start_rxq(struct qed_hwfn *p_hwfn, struct qed_iov_vf_mbx *mbx = &vf->vf_mbx; u8 status = PFVF_STATUS_NO_RESOURCE; struct vfpf_start_rxq_tlv *req; + bool b_legacy_vf = false; int rc; memset(¶ms, 0, sizeof(params)); @@ -1739,13 +1785,27 @@ static void qed_iov_vf_mbx_start_rxq(struct qed_hwfn *p_hwfn, params.sb = req->hw_sb; params.sb_idx = req->sb_index; + /* Legacy VFs have their Producers in a different location, which they + * calculate on their own and clean the producer prior to this. + */ + if (vf->acquire.vfdev_info.eth_fp_hsi_minor == + ETH_HSI_VER_NO_PKT_LEN_TUNN) { + b_legacy_vf = true; + } else { + REG_WR(p_hwfn, + GTT_BAR0_MAP_REG_MSDM_RAM + + MSTORM_ETH_VF_PRODS_OFFSET(vf->abs_vf_id, req->rx_qid), + 0); + } + rc = qed_sp_eth_rxq_start_ramrod(p_hwfn, vf->opaque_fid, vf->vf_queues[req->rx_qid].fw_cid, ¶ms, vf->abs_vf_id + 0x10, req->bd_max_bytes, req->rxq_addr, - req->cqe_pbl_addr, req->cqe_pbl_size); + req->cqe_pbl_addr, req->cqe_pbl_size, + b_legacy_vf); if (rc) { status = PFVF_STATUS_FAILURE; @@ -1756,7 +1816,7 @@ static void qed_iov_vf_mbx_start_rxq(struct qed_hwfn *p_hwfn, } out: - qed_iov_vf_mbx_start_rxq_resp(p_hwfn, p_ptt, vf, status); + qed_iov_vf_mbx_start_rxq_resp(p_hwfn, p_ptt, vf, status, b_legacy_vf); } static void qed_iov_vf_mbx_start_txq_resp(struct qed_hwfn *p_hwfn, @@ -1765,23 +1825,38 @@ static void qed_iov_vf_mbx_start_txq_resp(struct qed_hwfn *p_hwfn, { struct qed_iov_vf_mbx *mbx = &p_vf->vf_mbx; struct pfvf_start_queue_resp_tlv *p_tlv; + bool b_legacy = false; + u16 length; mbx->offset = (u8 *)mbx->reply_virt; + /* Taking a bigger struct instead of adding a TLV to list was a + * mistake, but one which we're now stuck with, as some older + * clients assume the size of the previous response. + */ + if (p_vf->acquire.vfdev_info.eth_fp_hsi_minor == + ETH_HSI_VER_NO_PKT_LEN_TUNN) + b_legacy = true; + + if (!b_legacy) + length = sizeof(*p_tlv); + else + length = sizeof(struct pfvf_def_resp_tlv); + p_tlv = qed_add_tlv(p_hwfn, &mbx->offset, CHANNEL_TLV_START_TXQ, - sizeof(*p_tlv)); + length); qed_add_tlv(p_hwfn, &mbx->offset, CHANNEL_TLV_LIST_END, sizeof(struct channel_list_end_tlv)); /* Update the TLV with the response */ - if (status == PFVF_STATUS_SUCCESS) { + if ((status == PFVF_STATUS_SUCCESS) && !b_legacy) { u16 qid = mbx->req_virt->start_txq.tx_qid; p_tlv->offset = qed_db_addr(p_vf->vf_queues[qid].fw_cid, DQ_DEMS_LEGACY); } - qed_iov_send_response(p_hwfn, p_ptt, p_vf, sizeof(*p_tlv), status); + qed_iov_send_response(p_hwfn, p_ptt, p_vf, length, status); } static void qed_iov_vf_mbx_start_txq(struct qed_hwfn *p_hwfn, diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.h b/drivers/net/ethernet/qlogic/qed/qed_vf.h index b23ce58e932f..60a599b579bd 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_vf.h +++ b/drivers/net/ethernet/qlogic/qed/qed_vf.h @@ -86,7 +86,7 @@ struct vfpf_acquire_tlv { struct vfpf_first_tlv first_tlv; struct vf_pf_vfdev_info { -#define VFPF_ACQUIRE_CAP_OBSOLETE (1 << 0) +#define VFPF_ACQUIRE_CAP_PRE_FP_HSI (1 << 0) /* VF pre-FP hsi version */ #define VFPF_ACQUIRE_CAP_100G (1 << 1) /* VF can support 100g */ u64 capabilities; u8 fw_major; From 11a85d759ea5064c986c47112607681c09cdcdd9 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Mon, 22 Aug 2016 13:25:10 +0300 Subject: [PATCH 0399/1715] qed: Prevent VFs from pause flooding Firmware would silently drop any control frame sent by VF to prevent a malicious VF from generating pause flood in the network. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_l2.c | 3 +++ drivers/net/ethernet/qlogic/qed/qed_l2.h | 2 ++ drivers/net/ethernet/qlogic/qed/qed_sriov.c | 1 + 3 files changed, 6 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index c04162dabfea..bf433016551a 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -101,6 +101,9 @@ int qed_sp_eth_vport_start(struct qed_hwfn *p_hwfn, p_ramrod->tx_switching_en = p_params->tx_switching; + p_ramrod->ctl_frame_mac_check_en = !!p_params->check_mac; + p_ramrod->ctl_frame_ethtype_check_en = !!p_params->check_ethtype; + /* Software Function ID in hwfn (PFs are 0 - 15, VFs are 16 - 135) */ p_ramrod->sw_fid = qed_concrete_to_sw_fid(p_hwfn->cdev, p_params->concrete_fid); diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.h b/drivers/net/ethernet/qlogic/qed/qed_l2.h index ea93519bcb7d..e495d62fcc03 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.h +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.h @@ -102,6 +102,8 @@ struct qed_sp_vport_start_params { u16 opaque_fid; u8 vport_id; u16 mtu; + bool check_mac; + bool check_ethtype; }; int qed_sp_eth_vport_start(struct qed_hwfn *p_hwfn, diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c index f1fae770e041..cb68674640f9 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c @@ -1680,6 +1680,7 @@ static void qed_iov_vf_mbx_start_vport(struct qed_hwfn *p_hwfn, params.vport_id = vf->vport_id; params.max_buffers_per_cqe = start->max_buffers_per_cqe; params.mtu = vf->mtu; + params.check_mac = true; rc = qed_sp_eth_vport_start(p_hwfn, ¶ms); if (rc) { From d8c2c7e3404e5bcaeae4af78d6935e5b8fcc97ee Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Mon, 22 Aug 2016 13:25:11 +0300 Subject: [PATCH 0400/1715] qed*: Add support for VFs over legacy PFs Modern VFs can't run on old non-compatible as the fastpath HSI is slightly changed - but as the HSI is actually very close [basically, a single bit whose meaning flipped] this can be supported with small modifications. The major differences would be in: - Recognizing that VF is running on top of a legacy PF. - Returning some slowpath configurations that are no longer needed on top of modern PFs, but would be required when working over the legacy ones. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_l2.c | 2 + drivers/net/ethernet/qlogic/qed/qed_vf.c | 107 +++++++++++++++---- drivers/net/ethernet/qlogic/qed/qed_vf.h | 5 + drivers/net/ethernet/qlogic/qede/qede.h | 2 + drivers/net/ethernet/qlogic/qede/qede_main.c | 10 ++ include/linux/qed/qed_eth_if.h | 3 + 6 files changed, 109 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index bf433016551a..4409ea3f7d40 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -1685,6 +1685,8 @@ static int qed_fill_eth_dev_info(struct qed_dev *cdev, qed_vf_get_num_vlan_filters(&cdev->hwfns[0], &info->num_vlan_filters); qed_vf_get_port_mac(&cdev->hwfns[0], info->port_mac); + + info->is_legacy = !!cdev->hwfns[0].vf_iov_info->b_pre_fp_hsi; } qed_fill_dev_info(cdev, &info->common); diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c index 9b780b31b15c..f9f68da8b277 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_vf.c +++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c @@ -191,6 +191,9 @@ static int qed_vf_pf_acquire(struct qed_hwfn *p_hwfn) DP_VERBOSE(p_hwfn, QED_MSG_IOV, "attempting to acquire resources\n"); + /* Clear response buffer, as this might be a re-send */ + memset(p_iov->pf2vf_reply, 0, sizeof(union pfvf_tlvs)); + /* send acquire request */ rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp)); if (rc) @@ -205,9 +208,12 @@ static int qed_vf_pf_acquire(struct qed_hwfn *p_hwfn) /* PF agrees to allocate our resources */ if (!(resp->pfdev_info.capabilities & PFVF_ACQUIRE_CAP_POST_FW_OVERRIDE)) { - DP_INFO(p_hwfn, - "PF is using old incompatible driver; Either downgrade driver or request provider to update hypervisor version\n"); - return -EINVAL; + /* It's possible legacy PF mistakenly accepted; + * but we don't care - simply mark it as + * legacy and continue. + */ + req->vfdev_info.capabilities |= + VFPF_ACQUIRE_CAP_PRE_FP_HSI; } DP_VERBOSE(p_hwfn, QED_MSG_IOV, "resources acquired\n"); resources_acquired = true; @@ -215,27 +221,55 @@ static int qed_vf_pf_acquire(struct qed_hwfn *p_hwfn) attempts < VF_ACQUIRE_THRESH) { qed_vf_pf_acquire_reduce_resc(p_hwfn, p_resc, &resp->resc); + } else if (resp->hdr.status == PFVF_STATUS_NOT_SUPPORTED) { + if (pfdev_info->major_fp_hsi && + (pfdev_info->major_fp_hsi != ETH_HSI_VER_MAJOR)) { + DP_NOTICE(p_hwfn, + "PF uses an incompatible fastpath HSI %02x.%02x [VF requires %02x.%02x]. Please change to a VF driver using %02x.xx.\n", + pfdev_info->major_fp_hsi, + pfdev_info->minor_fp_hsi, + ETH_HSI_VER_MAJOR, + ETH_HSI_VER_MINOR, + pfdev_info->major_fp_hsi); + rc = -EINVAL; + goto exit; + } - /* Clear response buffer */ - memset(p_iov->pf2vf_reply, 0, sizeof(union pfvf_tlvs)); - } else if ((resp->hdr.status == PFVF_STATUS_NOT_SUPPORTED) && - pfdev_info->major_fp_hsi && - (pfdev_info->major_fp_hsi != ETH_HSI_VER_MAJOR)) { - DP_NOTICE(p_hwfn, - "PF uses an incompatible fastpath HSI %02x.%02x [VF requires %02x.%02x]. Please change to a VF driver using %02x.xx.\n", - pfdev_info->major_fp_hsi, - pfdev_info->minor_fp_hsi, - ETH_HSI_VER_MAJOR, - ETH_HSI_VER_MINOR, pfdev_info->major_fp_hsi); - return -EINVAL; + if (!pfdev_info->major_fp_hsi) { + if (req->vfdev_info.capabilities & + VFPF_ACQUIRE_CAP_PRE_FP_HSI) { + DP_NOTICE(p_hwfn, + "PF uses very old drivers. Please change to a VF driver using no later than 8.8.x.x.\n"); + rc = -EINVAL; + goto exit; + } else { + DP_INFO(p_hwfn, + "PF is old - try re-acquire to see if it supports FW-version override\n"); + req->vfdev_info.capabilities |= + VFPF_ACQUIRE_CAP_PRE_FP_HSI; + continue; + } + } + + /* If PF/VF are using same Major, PF must have had + * it's reasons. Simply fail. + */ + DP_NOTICE(p_hwfn, "PF rejected acquisition by VF\n"); + rc = -EINVAL; + goto exit; } else { DP_ERR(p_hwfn, "PF returned error %d to VF acquisition request\n", resp->hdr.status); - return -EAGAIN; + rc = -EAGAIN; + goto exit; } } + /* Mark the PF as legacy, if needed */ + if (req->vfdev_info.capabilities & VFPF_ACQUIRE_CAP_PRE_FP_HSI) + p_iov->b_pre_fp_hsi = true; + /* Update bulletin board size with response from PF */ p_iov->bulletin.size = resp->bulletin_size; @@ -253,14 +287,16 @@ static int qed_vf_pf_acquire(struct qed_hwfn *p_hwfn) } } - if (ETH_HSI_VER_MINOR && + if (!p_iov->b_pre_fp_hsi && + ETH_HSI_VER_MINOR && (resp->pfdev_info.minor_fp_hsi < ETH_HSI_VER_MINOR)) { DP_INFO(p_hwfn, "PF is using older fastpath HSI; %02x.%02x is configured\n", ETH_HSI_VER_MAJOR, resp->pfdev_info.minor_fp_hsi); } - return 0; +exit: + return rc; } int qed_vf_hw_prepare(struct qed_hwfn *p_hwfn) @@ -347,6 +383,9 @@ free_p_iov: return -ENOMEM; } +#define TSTORM_QZONE_START PXP_VF_BAR0_START_SDM_ZONE_A +#define MSTORM_QZONE_START(dev) (TSTORM_QZONE_START + \ + (TSTORM_QZONE_SIZE * NUM_OF_L2_QUEUES(dev))) int qed_vf_pf_rxq_start(struct qed_hwfn *p_hwfn, u8 rx_qid, @@ -374,6 +413,21 @@ int qed_vf_pf_rxq_start(struct qed_hwfn *p_hwfn, req->bd_max_bytes = bd_max_bytes; req->stat_id = -1; + /* If PF is legacy, we'll need to calculate producers ourselves + * as well as clean them. + */ + if (pp_prod && p_iov->b_pre_fp_hsi) { + u8 hw_qid = p_iov->acquire_resp.resc.hw_qid[rx_qid]; + u32 init_prod_val = 0; + + *pp_prod = (u8 __iomem *)p_hwfn->regview + + MSTORM_QZONE_START(p_hwfn->cdev) + + hw_qid * MSTORM_QZONE_SIZE; + + /* Init the rcq, rx bd and rx sge (if valid) producers to 0 */ + __internal_ram_wr(p_hwfn, *pp_prod, sizeof(u32), + (u32 *)(&init_prod_val)); + } /* add list termination tlv */ qed_add_tlv(p_hwfn, &p_iov->offset, CHANNEL_TLV_LIST_END, sizeof(struct channel_list_end_tlv)); @@ -387,7 +441,7 @@ int qed_vf_pf_rxq_start(struct qed_hwfn *p_hwfn, return -EINVAL; /* Learn the address of the producer from the response */ - if (pp_prod) { + if (pp_prod && !p_iov->b_pre_fp_hsi) { u32 init_prod_val = 0; *pp_prod = (u8 __iomem *)p_hwfn->regview + resp->offset; @@ -470,7 +524,20 @@ int qed_vf_pf_txq_start(struct qed_hwfn *p_hwfn, } if (pp_doorbell) { - *pp_doorbell = (u8 __iomem *)p_hwfn->doorbells + resp->offset; + /* Modern PFs provide the actual offsets, while legacy + * provided only the queue id. + */ + if (!p_iov->b_pre_fp_hsi) { + *pp_doorbell = (u8 __iomem *)p_hwfn->doorbells + + resp->offset; + } else { + u8 cid = p_iov->acquire_resp.resc.cid[tx_queue_id]; + u32 db_addr; + + db_addr = qed_db_addr(cid, DQ_DEMS_LEGACY); + *pp_doorbell = (u8 __iomem *)p_hwfn->doorbells + + db_addr; + } DP_VERBOSE(p_hwfn, QED_MSG_IOV, "Txq[0x%02x]: doorbell at %p [offset 0x%08x]\n", diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.h b/drivers/net/ethernet/qlogic/qed/qed_vf.h index 60a599b579bd..35db7a28aa13 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_vf.h +++ b/drivers/net/ethernet/qlogic/qed/qed_vf.h @@ -551,6 +551,11 @@ struct qed_vf_iov { /* we set aside a copy of the acquire response */ struct pfvf_acquire_resp_tlv acquire_resp; + + /* In case PF originates prior to the fp-hsi version comparison, + * this has to be propagated as it affects the fastpath. + */ + bool b_pre_fp_hsi; }; #ifdef CONFIG_QED_SRIOV diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index 32325ca5951f..700b509f7143 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -268,6 +268,8 @@ struct qede_tx_queue { u16 num_tx_buffers; u64 xmit_pkts; u64 stopped_cnt; + + bool is_legacy; }; #define BD_UNMAP_ADDR(bd) HILO_U64(le32_to_cpu((bd)->addr.hi), \ diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index a05459f96962..ac126e6067ae 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -598,6 +598,14 @@ static netdev_tx_t qede_start_xmit(struct sk_buff *skb, 1 << ETH_TX_DATA_1ST_BD_TUNN_FLAG_SHIFT; } + /* Legacy FW had flipped behavior in regard to this bit - + * I.e., needed to set to prevent FW from touching encapsulated + * packets when it didn't need to. + */ + if (unlikely(txq->is_legacy)) + first_bd->data.bitfields ^= + 1 << ETH_TX_DATA_1ST_BD_TUNN_FLAG_SHIFT; + /* If the packet is IPv6 with extension header, indicate that * to FW and pass few params, since the device cracker doesn't * support parsing IPv6 with extension header/s. @@ -2991,6 +2999,8 @@ static void qede_init_fp(struct qede_dev *edev) for (tc = 0; tc < edev->num_tc; tc++) { txq_index = tc * QEDE_RSS_CNT(edev) + rss_id; fp->txqs[tc].index = txq_index; + if (edev->dev_info.is_legacy) + fp->txqs[tc].is_legacy = true; } snprintf(fp->name, sizeof(fp->name), "%s-fp-%d", diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index 4475a9d8ae15..33c24ebc9b7f 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -23,6 +23,9 @@ struct qed_dev_eth_info { u8 port_mac[ETH_ALEN]; u8 num_vlan_filters; + + /* Legacy VF - this affects the datapath, so qede has to know */ + bool is_legacy; }; struct qed_update_vport_rss_params { From b0bccb69eba3629949eaa28017be56c8b1319b45 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Mon, 22 Aug 2016 13:25:12 +0300 Subject: [PATCH 0401/1715] qed: Change locking scheme for VF channel Each VF employees a lock that's supposed to serialize its usage of the HW channel for communication with its PF, but the critical section is ill-defined: - VFs currently release the lock whenever the PF response arrives, prior to actually processing the reply buffer [which was also supposed to have been protected by same lock]. - The lock would be released on first response, ignoring the possibilty the sw flow isn't over [as might be the case of the acquisition flow]. As a result, the flow would run unprotected and would cause a double mutex release [as the additional message completion would release it while its actually already free]. Change the flow to have a dedicated function to be called at end of each flow and release the lock. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_vf.c | 124 ++++++++++++++++------- 1 file changed, 90 insertions(+), 34 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c index f9f68da8b277..3c9071de5472 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_vf.c +++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c @@ -46,6 +46,17 @@ static void *qed_vf_pf_prep(struct qed_hwfn *p_hwfn, u16 type, u16 length) return p_tlv; } +static void qed_vf_pf_req_end(struct qed_hwfn *p_hwfn, int req_status) +{ + union pfvf_tlvs *resp = p_hwfn->vf_iov_info->pf2vf_reply; + + DP_VERBOSE(p_hwfn, QED_MSG_IOV, + "VF request status = 0x%x, PF reply status = 0x%x\n", + req_status, resp->default_resp.hdr.status); + + mutex_unlock(&(p_hwfn->vf_iov_info->mutex)); +} + static int qed_send_msg2pf(struct qed_hwfn *p_hwfn, u8 *done, u32 resp_size) { union vfpf_tlvs *p_req = p_hwfn->vf_iov_info->vf2pf_request; @@ -103,16 +114,12 @@ static int qed_send_msg2pf(struct qed_hwfn *p_hwfn, u8 *done, u32 resp_size) "VF <-- PF Timeout [Type %d]\n", p_req->first_tlv.tl.type); rc = -EBUSY; - goto exit; } else { DP_VERBOSE(p_hwfn, QED_MSG_IOV, "PF response: %d [Type %d]\n", *done, p_req->first_tlv.tl.type); } -exit: - mutex_unlock(&(p_hwfn->vf_iov_info->mutex)); - return rc; } @@ -296,6 +303,8 @@ static int qed_vf_pf_acquire(struct qed_hwfn *p_hwfn) } exit: + qed_vf_pf_req_end(p_hwfn, rc); + return rc; } @@ -435,10 +444,12 @@ int qed_vf_pf_rxq_start(struct qed_hwfn *p_hwfn, resp = &p_iov->pf2vf_reply->queue_start; rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp)); if (rc) - return rc; + goto exit; - if (resp->hdr.status != PFVF_STATUS_SUCCESS) - return -EINVAL; + if (resp->hdr.status != PFVF_STATUS_SUCCESS) { + rc = -EINVAL; + goto exit; + } /* Learn the address of the producer from the response */ if (pp_prod && !p_iov->b_pre_fp_hsi) { @@ -453,6 +464,8 @@ int qed_vf_pf_rxq_start(struct qed_hwfn *p_hwfn, __internal_ram_wr(p_hwfn, *pp_prod, sizeof(u32), (u32 *)&init_prod_val); } +exit: + qed_vf_pf_req_end(p_hwfn, rc); return rc; } @@ -478,10 +491,15 @@ int qed_vf_pf_rxq_stop(struct qed_hwfn *p_hwfn, u16 rx_qid, bool cqe_completion) resp = &p_iov->pf2vf_reply->default_resp; rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp)); if (rc) - return rc; + goto exit; - if (resp->hdr.status != PFVF_STATUS_SUCCESS) - return -EINVAL; + if (resp->hdr.status != PFVF_STATUS_SUCCESS) { + rc = -EINVAL; + goto exit; + } + +exit: + qed_vf_pf_req_end(p_hwfn, rc); return rc; } @@ -544,6 +562,7 @@ int qed_vf_pf_txq_start(struct qed_hwfn *p_hwfn, tx_queue_id, *pp_doorbell, resp->offset); } exit: + qed_vf_pf_req_end(p_hwfn, rc); return rc; } @@ -568,10 +587,15 @@ int qed_vf_pf_txq_stop(struct qed_hwfn *p_hwfn, u16 tx_qid) resp = &p_iov->pf2vf_reply->default_resp; rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp)); if (rc) - return rc; + goto exit; - if (resp->hdr.status != PFVF_STATUS_SUCCESS) - return -EINVAL; + if (resp->hdr.status != PFVF_STATUS_SUCCESS) { + rc = -EINVAL; + goto exit; + } + +exit: + qed_vf_pf_req_end(p_hwfn, rc); return rc; } @@ -610,10 +634,15 @@ int qed_vf_pf_vport_start(struct qed_hwfn *p_hwfn, resp = &p_iov->pf2vf_reply->default_resp; rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp)); if (rc) - return rc; + goto exit; - if (resp->hdr.status != PFVF_STATUS_SUCCESS) - return -EINVAL; + if (resp->hdr.status != PFVF_STATUS_SUCCESS) { + rc = -EINVAL; + goto exit; + } + +exit: + qed_vf_pf_req_end(p_hwfn, rc); return rc; } @@ -634,10 +663,15 @@ int qed_vf_pf_vport_stop(struct qed_hwfn *p_hwfn) rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp)); if (rc) - return rc; + goto exit; - if (resp->hdr.status != PFVF_STATUS_SUCCESS) - return -EINVAL; + if (resp->hdr.status != PFVF_STATUS_SUCCESS) { + rc = -EINVAL; + goto exit; + } + +exit: + qed_vf_pf_req_end(p_hwfn, rc); return rc; } @@ -837,13 +871,18 @@ int qed_vf_pf_vport_update(struct qed_hwfn *p_hwfn, rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, resp_size); if (rc) - return rc; + goto exit; - if (resp->hdr.status != PFVF_STATUS_SUCCESS) - return -EINVAL; + if (resp->hdr.status != PFVF_STATUS_SUCCESS) { + rc = -EINVAL; + goto exit; + } qed_vf_handle_vp_update_tlvs_resp(p_hwfn, p_params); +exit: + qed_vf_pf_req_end(p_hwfn, rc); + return rc; } @@ -864,14 +903,19 @@ int qed_vf_pf_reset(struct qed_hwfn *p_hwfn) resp = &p_iov->pf2vf_reply->default_resp; rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp)); if (rc) - return rc; + goto exit; - if (resp->hdr.status != PFVF_STATUS_SUCCESS) - return -EAGAIN; + if (resp->hdr.status != PFVF_STATUS_SUCCESS) { + rc = -EAGAIN; + goto exit; + } p_hwfn->b_int_enabled = 0; - return 0; +exit: + qed_vf_pf_req_end(p_hwfn, rc); + + return rc; } int qed_vf_pf_release(struct qed_hwfn *p_hwfn) @@ -895,6 +939,8 @@ int qed_vf_pf_release(struct qed_hwfn *p_hwfn) if (!rc && resp->hdr.status != PFVF_STATUS_SUCCESS) rc = -EAGAIN; + qed_vf_pf_req_end(p_hwfn, rc); + p_hwfn->b_int_enabled = 0; if (p_iov->vf2pf_request) @@ -963,12 +1009,17 @@ int qed_vf_pf_filter_ucast(struct qed_hwfn *p_hwfn, resp = &p_iov->pf2vf_reply->default_resp; rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp)); if (rc) - return rc; + goto exit; - if (resp->hdr.status != PFVF_STATUS_SUCCESS) - return -EAGAIN; + if (resp->hdr.status != PFVF_STATUS_SUCCESS) { + rc = -EAGAIN; + goto exit; + } - return 0; +exit: + qed_vf_pf_req_end(p_hwfn, rc); + + return rc; } int qed_vf_pf_int_cleanup(struct qed_hwfn *p_hwfn) @@ -987,12 +1038,17 @@ int qed_vf_pf_int_cleanup(struct qed_hwfn *p_hwfn) rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp)); if (rc) - return rc; + goto exit; - if (resp->hdr.status != PFVF_STATUS_SUCCESS) - return -EINVAL; + if (resp->hdr.status != PFVF_STATUS_SUCCESS) { + rc = -EINVAL; + goto exit; + } - return 0; +exit: + qed_vf_pf_req_end(p_hwfn, rc); + + return rc; } u16 qed_vf_get_igu_sb_id(struct qed_hwfn *p_hwfn, u16 sb_id) From b72a32dacdfa29b21da8c720ab9ceee40399b2ac Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Mon, 22 Aug 2016 16:29:06 +0530 Subject: [PATCH 0402/1715] cxgb4: add support for tx traffic scheduling classes Add support to create tx traffic scheduling classes with specified scheduling parameters. Return an existing class if a match is found with same scheduling parameters. Signed-off-by: Rahul Lakkireddy Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/Makefile | 2 +- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 28 ++- .../net/ethernet/chelsio/cxgb4/cxgb4_main.c | 20 +- drivers/net/ethernet/chelsio/cxgb4/sched.c | 235 ++++++++++++++++++ drivers/net/ethernet/chelsio/cxgb4/sched.h | 89 +++++++ drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 31 ++- drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h | 38 ++- 7 files changed, 438 insertions(+), 5 deletions(-) create mode 100644 drivers/net/ethernet/chelsio/cxgb4/sched.c create mode 100644 drivers/net/ethernet/chelsio/cxgb4/sched.h diff --git a/drivers/net/ethernet/chelsio/cxgb4/Makefile b/drivers/net/ethernet/chelsio/cxgb4/Makefile index fac2157faf69..246129650967 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/Makefile +++ b/drivers/net/ethernet/chelsio/cxgb4/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_CHELSIO_T4) += cxgb4.o -cxgb4-objs := cxgb4_main.o l2t.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o cxgb4_uld.o +cxgb4-objs := cxgb4_main.o l2t.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o cxgb4_uld.o sched.o cxgb4-$(CONFIG_CHELSIO_T4_DCB) += cxgb4_dcb.o cxgb4-$(CONFIG_CHELSIO_T4_FCOE) += cxgb4_fcoe.o cxgb4-$(CONFIG_DEBUG_FS) += cxgb4_debugfs.o diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 6b0528913687..17a6dd0ee166 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -1,7 +1,7 @@ /* * This file is part of the Chelsio T4 Ethernet driver for Linux. * - * Copyright (c) 2003-2014 Chelsio Communications, Inc. All rights reserved. + * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -347,6 +347,7 @@ struct adapter_params { unsigned int ofldq_wr_cred; bool ulptx_memwrite_dsgl; /* use of T5 DSGL allowed */ + unsigned int nsched_cls; /* number of traffic classes */ unsigned int max_ordird_qp; /* Max read depth per RDMA QP */ unsigned int max_ird_adapter; /* Max read depth per adapter */ }; @@ -495,6 +496,7 @@ struct port_info { #endif /* CONFIG_CHELSIO_T4_FCOE */ bool rxtstamp; /* Enable TS */ struct hwtstamp_config tstamp_config; + struct sched_table *sched_tbl; }; struct dentry; @@ -858,6 +860,27 @@ struct adapter { spinlock_t win0_lock ____cacheline_aligned_in_smp; }; +/* Support for "sched-class" command to allow a TX Scheduling Class to be + * programmed with various parameters. + */ +struct ch_sched_params { + s8 type; /* packet or flow */ + union { + struct { + s8 level; /* scheduler hierarchy level */ + s8 mode; /* per-class or per-flow */ + s8 rateunit; /* bit or packet rate */ + s8 ratemode; /* %port relative or kbps absolute */ + s8 channel; /* scheduler channel [0..N] */ + s8 class; /* scheduler class [0..N] */ + s32 minrate; /* minimum rate */ + s32 maxrate; /* maximum rate */ + s16 weight; /* percent weight */ + s16 pktsize; /* average packet size */ + } params; + } u; +}; + /* Defined bit width of user definable filter tuples */ #define ETHTYPE_BITWIDTH 16 @@ -1563,6 +1586,9 @@ void t4_get_trace_filter(struct adapter *adapter, struct trace_params *tp, int filter_index, int *enabled); int t4_fwaddrspace_write(struct adapter *adap, unsigned int mbox, u32 addr, u32 val); +int t4_sched_params(struct adapter *adapter, int type, int level, int mode, + int rateunit, int ratemode, int channel, int class, + int minrate, int maxrate, int weight, int pktsize); void t4_sge_decode_idma_state(struct adapter *adapter, int state); void t4_free_mem(void *addr); void t4_idma_monitor_init(struct adapter *adapter, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 0099a0cd53ea..2e341bf60ed0 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -1,7 +1,7 @@ /* * This file is part of the Chelsio T4 Ethernet driver for Linux. * - * Copyright (c) 2003-2014 Chelsio Communications, Inc. All rights reserved. + * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -76,6 +76,7 @@ #include "cxgb4_debugfs.h" #include "clip_tbl.h" #include "l2t.h" +#include "sched.h" char cxgb4_driver_name[] = KBUILD_MODNAME; @@ -4024,6 +4025,12 @@ static int adap_init0(struct adapter *adap) adap->clipt_start = val[0]; adap->clipt_end = val[1]; + /* We don't yet have a PARAMs calls to retrieve the number of Traffic + * Classes supported by the hardware/firmware so we hard code it here + * for now. + */ + adap->params.nsched_cls = is_t4(adap->params.chip) ? 15 : 16; + /* query params related to active filter region */ params[0] = FW_PARAM_PFVF(ACTIVE_FILTER_START); params[1] = FW_PARAM_PFVF(ACTIVE_FILTER_END); @@ -4882,6 +4889,7 @@ static void free_some_resources(struct adapter *adapter) unsigned int i; t4_free_mem(adapter->l2t); + t4_cleanup_sched(adapter); t4_free_mem(adapter->tids.tid_tab); kfree(adapter->sge.egr_map); kfree(adapter->sge.ingr_map); @@ -5249,6 +5257,16 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) } } #endif + + for_each_port(adapter, i) { + pi = adap2pinfo(adapter, i); + pi->sched_tbl = t4_init_sched(adapter->params.nsched_cls); + if (!pi->sched_tbl) + dev_warn(&pdev->dev, + "could not activate scheduling on port %d\n", + i); + } + if (is_offload(adapter) && tid_init(&adapter->tids) < 0) { dev_warn(&pdev->dev, "could not allocate TID table, " "continuing\n"); diff --git a/drivers/net/ethernet/chelsio/cxgb4/sched.c b/drivers/net/ethernet/chelsio/cxgb4/sched.c new file mode 100644 index 000000000000..6158daf685c2 --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/sched.c @@ -0,0 +1,235 @@ +/* + * This file is part of the Chelsio T4 Ethernet driver for Linux. + * + * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#include "cxgb4.h" +#include "sched.h" + +/* Spinlock must be held by caller */ +static int t4_sched_class_fw_cmd(struct port_info *pi, + struct ch_sched_params *p, + enum sched_fw_ops op) +{ + struct adapter *adap = pi->adapter; + struct sched_table *s = pi->sched_tbl; + struct sched_class *e; + int err = 0; + + e = &s->tab[p->u.params.class]; + switch (op) { + case SCHED_FW_OP_ADD: + err = t4_sched_params(adap, p->type, + p->u.params.level, p->u.params.mode, + p->u.params.rateunit, + p->u.params.ratemode, + p->u.params.channel, e->idx, + p->u.params.minrate, p->u.params.maxrate, + p->u.params.weight, p->u.params.pktsize); + break; + default: + err = -ENOTSUPP; + break; + } + + return err; +} + +/* If @p is NULL, fetch any available unused class */ +static struct sched_class *t4_sched_class_lookup(struct port_info *pi, + const struct ch_sched_params *p) +{ + struct sched_table *s = pi->sched_tbl; + struct sched_class *e, *end; + struct sched_class *found = NULL; + + if (!p) { + /* Get any available unused class */ + end = &s->tab[s->sched_size]; + for (e = &s->tab[0]; e != end; ++e) { + if (e->state == SCHED_STATE_UNUSED) { + found = e; + break; + } + } + } else { + /* Look for a class with matching scheduling parameters */ + struct ch_sched_params info; + struct ch_sched_params tp; + + memset(&info, 0, sizeof(info)); + memset(&tp, 0, sizeof(tp)); + + memcpy(&tp, p, sizeof(tp)); + /* Don't try to match class parameter */ + tp.u.params.class = SCHED_CLS_NONE; + + end = &s->tab[s->sched_size]; + for (e = &s->tab[0]; e != end; ++e) { + if (e->state == SCHED_STATE_UNUSED) + continue; + + memset(&info, 0, sizeof(info)); + memcpy(&info, &e->info, sizeof(info)); + /* Don't try to match class parameter */ + info.u.params.class = SCHED_CLS_NONE; + + if ((info.type == tp.type) && + (!memcmp(&info.u.params, &tp.u.params, + sizeof(info.u.params)))) { + found = e; + break; + } + } + } + + return found; +} + +static struct sched_class *t4_sched_class_alloc(struct port_info *pi, + struct ch_sched_params *p) +{ + struct sched_table *s = pi->sched_tbl; + struct sched_class *e; + u8 class_id; + int err; + + if (!p) + return NULL; + + class_id = p->u.params.class; + + /* Only accept search for existing class with matching params + * or allocation of new class with specified params + */ + if (class_id != SCHED_CLS_NONE) + return NULL; + + write_lock(&s->rw_lock); + /* See if there's an exisiting class with same + * requested sched params + */ + e = t4_sched_class_lookup(pi, p); + if (!e) { + struct ch_sched_params np; + + /* Fetch any available unused class */ + e = t4_sched_class_lookup(pi, NULL); + if (!e) + goto out; + + memset(&np, 0, sizeof(np)); + memcpy(&np, p, sizeof(np)); + np.u.params.class = e->idx; + + spin_lock(&e->lock); + /* New class */ + err = t4_sched_class_fw_cmd(pi, &np, SCHED_FW_OP_ADD); + if (err) { + spin_unlock(&e->lock); + e = NULL; + goto out; + } + memcpy(&e->info, &np, sizeof(e->info)); + atomic_set(&e->refcnt, 0); + e->state = SCHED_STATE_ACTIVE; + spin_unlock(&e->lock); + } + +out: + write_unlock(&s->rw_lock); + return e; +} + +/** + * cxgb4_sched_class_alloc - allocate a scheduling class + * @dev: net_device pointer + * @p: new scheduling class to create. + * + * Returns pointer to the scheduling class created. If @p is NULL, then + * it allocates and returns any available unused scheduling class. If a + * scheduling class with matching @p is found, then the matching class is + * returned. + */ +struct sched_class *cxgb4_sched_class_alloc(struct net_device *dev, + struct ch_sched_params *p) +{ + struct port_info *pi = netdev2pinfo(dev); + u8 class_id; + + if (!can_sched(dev)) + return NULL; + + class_id = p->u.params.class; + if (!valid_class_id(dev, class_id)) + return NULL; + + return t4_sched_class_alloc(pi, p); +} + +struct sched_table *t4_init_sched(unsigned int sched_size) +{ + struct sched_table *s; + unsigned int i; + + s = t4_alloc_mem(sizeof(*s) + sched_size * sizeof(struct sched_class)); + if (!s) + return NULL; + + s->sched_size = sched_size; + rwlock_init(&s->rw_lock); + + for (i = 0; i < s->sched_size; i++) { + memset(&s->tab[i], 0, sizeof(struct sched_class)); + s->tab[i].idx = i; + s->tab[i].state = SCHED_STATE_UNUSED; + spin_lock_init(&s->tab[i].lock); + atomic_set(&s->tab[i].refcnt, 0); + } + return s; +} + +void t4_cleanup_sched(struct adapter *adap) +{ + struct sched_table *s; + unsigned int i; + + for_each_port(adap, i) { + struct port_info *pi = netdev2pinfo(adap->port[i]); + + s = pi->sched_tbl; + t4_free_mem(s); + } +} diff --git a/drivers/net/ethernet/chelsio/cxgb4/sched.h b/drivers/net/ethernet/chelsio/cxgb4/sched.h new file mode 100644 index 000000000000..60b09e965d34 --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/sched.h @@ -0,0 +1,89 @@ +/* + * This file is part of the Chelsio T4 Ethernet driver for Linux. + * + * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __CXGB4_SCHED_H +#define __CXGB4_SCHED_H + +#include +#include + +#define SCHED_CLS_NONE 0xff + +enum { + SCHED_STATE_ACTIVE, + SCHED_STATE_UNUSED, +}; + +enum sched_fw_ops { + SCHED_FW_OP_ADD, +}; + +struct sched_class { + u8 state; + u8 idx; + struct ch_sched_params info; + spinlock_t lock; /* Per class lock */ + atomic_t refcnt; +}; + +struct sched_table { /* per port scheduling table */ + u8 sched_size; + rwlock_t rw_lock; /* Table lock */ + struct sched_class tab[0]; +}; + +static inline bool can_sched(struct net_device *dev) +{ + struct port_info *pi = netdev2pinfo(dev); + + return !pi->sched_tbl ? false : true; +} + +static inline bool valid_class_id(struct net_device *dev, u8 class_id) +{ + struct port_info *pi = netdev2pinfo(dev); + + if ((class_id > pi->sched_tbl->sched_size - 1) && + (class_id != SCHED_CLS_NONE)) + return false; + + return true; +} + +struct sched_class *cxgb4_sched_class_alloc(struct net_device *dev, + struct ch_sched_params *p); + +struct sched_table *t4_init_sched(unsigned int size); +void t4_cleanup_sched(struct adapter *adap); +#endif /* __CXGB4_SCHED_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 2a476cc4e073..de451ee2ba75 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -1,7 +1,7 @@ /* * This file is part of the Chelsio T4 Ethernet driver for Linux. * - * Copyright (c) 2003-2014 Chelsio Communications, Inc. All rights reserved. + * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -8305,3 +8305,32 @@ int t4_set_vf_mac_acl(struct adapter *adapter, unsigned int vf, return t4_wr_mbox(adapter, adapter->mbox, &cmd, sizeof(cmd), &cmd); } + +int t4_sched_params(struct adapter *adapter, int type, int level, int mode, + int rateunit, int ratemode, int channel, int class, + int minrate, int maxrate, int weight, int pktsize) +{ + struct fw_sched_cmd cmd; + + memset(&cmd, 0, sizeof(cmd)); + cmd.op_to_write = cpu_to_be32(FW_CMD_OP_V(FW_SCHED_CMD) | + FW_CMD_REQUEST_F | + FW_CMD_WRITE_F); + cmd.retval_len16 = cpu_to_be32(FW_LEN16(cmd)); + + cmd.u.params.sc = FW_SCHED_SC_PARAMS; + cmd.u.params.type = type; + cmd.u.params.level = level; + cmd.u.params.mode = mode; + cmd.u.params.ch = channel; + cmd.u.params.cl = class; + cmd.u.params.unit = rateunit; + cmd.u.params.rate = ratemode; + cmd.u.params.min = cpu_to_be32(minrate); + cmd.u.params.max = cpu_to_be32(maxrate); + cmd.u.params.weight = cpu_to_be16(weight); + cmd.u.params.pktsize = cpu_to_be16(pktsize); + + return t4_wr_mbox_meat(adapter, adapter->mbox, &cmd, sizeof(cmd), + NULL, 1); +} diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index d8f4adb9c7a6..ffe4bf4b96da 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -1,7 +1,7 @@ /* * This file is part of the Chelsio T4 Ethernet driver for Linux. * - * Copyright (c) 2009-2014 Chelsio Communications, Inc. All rights reserved. + * Copyright (c) 2009-2016 Chelsio Communications, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -681,6 +681,7 @@ enum fw_cmd_opcodes { FW_RSS_IND_TBL_CMD = 0x20, FW_RSS_GLB_CONFIG_CMD = 0x22, FW_RSS_VI_CONFIG_CMD = 0x23, + FW_SCHED_CMD = 0x24, FW_DEVLOG_CMD = 0x25, FW_CLIP_CMD = 0x28, FW_LASTC2E_CMD = 0x40, @@ -2962,6 +2963,41 @@ struct fw_rss_vi_config_cmd { #define FW_RSS_VI_CONFIG_CMD_UDPEN_V(x) ((x) << FW_RSS_VI_CONFIG_CMD_UDPEN_S) #define FW_RSS_VI_CONFIG_CMD_UDPEN_F FW_RSS_VI_CONFIG_CMD_UDPEN_V(1U) +enum fw_sched_sc { + FW_SCHED_SC_PARAMS = 1, +}; + +struct fw_sched_cmd { + __be32 op_to_write; + __be32 retval_len16; + union fw_sched { + struct fw_sched_config { + __u8 sc; + __u8 type; + __u8 minmaxen; + __u8 r3[5]; + __u8 nclasses[4]; + __be32 r4; + } config; + struct fw_sched_params { + __u8 sc; + __u8 type; + __u8 level; + __u8 mode; + __u8 unit; + __u8 rate; + __u8 ch; + __u8 cl; + __be32 min; + __be32 max; + __be16 weight; + __be16 pktsize; + __be16 burstsize; + __be16 r4; + } params; + } u; +}; + struct fw_clip_cmd { __be32 op_to_write; __be32 alloc_to_len16; From 6cede1f17f51333ecf9cd4c9fca5565842f1bf55 Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Mon, 22 Aug 2016 16:29:07 +0530 Subject: [PATCH 0403/1715] cxgb4: add support for per queue tx scheduling Add support to bind/unbind specified tx queues to/from scheduling classes. If a queue is already bound to a scheduling class, it is unbound first and then bound to a new specified class. Signed-off-by: Rahul Lakkireddy Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 8 + drivers/net/ethernet/chelsio/cxgb4/sched.c | 321 +++++++++++++++++++++ drivers/net/ethernet/chelsio/cxgb4/sched.h | 18 ++ 3 files changed, 347 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 17a6dd0ee166..eb30612c5372 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -881,6 +881,14 @@ struct ch_sched_params { } u; }; +/* Support for "sched_queue" command to allow one or more NIC TX Queues + * to be bound to a TX Scheduling Class. + */ +struct ch_sched_queue { + s8 queue; /* queue index */ + s8 class; /* class index */ +}; + /* Defined bit width of user definable filter tuples */ #define ETHTYPE_BITWIDTH 16 diff --git a/drivers/net/ethernet/chelsio/cxgb4/sched.c b/drivers/net/ethernet/chelsio/cxgb4/sched.c index 6158daf685c2..539de764bbd3 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sched.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sched.c @@ -67,6 +67,312 @@ static int t4_sched_class_fw_cmd(struct port_info *pi, return err; } +/* Spinlock must be held by caller */ +static int t4_sched_bind_unbind_op(struct port_info *pi, void *arg, + enum sched_bind_type type, bool bind) +{ + struct adapter *adap = pi->adapter; + u32 fw_mnem, fw_class, fw_param; + unsigned int pf = adap->pf; + unsigned int vf = 0; + int err = 0; + + switch (type) { + case SCHED_QUEUE: { + struct sched_queue_entry *qe; + + qe = (struct sched_queue_entry *)arg; + + /* Create a template for the FW_PARAMS_CMD mnemonic and + * value (TX Scheduling Class in this case). + */ + fw_mnem = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) | + FW_PARAMS_PARAM_X_V( + FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH)); + fw_class = bind ? qe->param.class : FW_SCHED_CLS_NONE; + fw_param = (fw_mnem | FW_PARAMS_PARAM_YZ_V(qe->cntxt_id)); + + pf = adap->pf; + vf = 0; + break; + } + default: + err = -ENOTSUPP; + goto out; + } + + err = t4_set_params(adap, adap->mbox, pf, vf, 1, &fw_param, &fw_class); + +out: + return err; +} + +static struct sched_class *t4_sched_queue_lookup(struct port_info *pi, + const unsigned int qid, + int *index) +{ + struct sched_table *s = pi->sched_tbl; + struct sched_class *e, *end; + struct sched_class *found = NULL; + int i; + + /* Look for a class with matching bound queue parameters */ + end = &s->tab[s->sched_size]; + for (e = &s->tab[0]; e != end; ++e) { + struct sched_queue_entry *qe; + + i = 0; + if (e->state == SCHED_STATE_UNUSED) + continue; + + list_for_each_entry(qe, &e->queue_list, list) { + if (qe->cntxt_id == qid) { + found = e; + if (index) + *index = i; + break; + } + i++; + } + + if (found) + break; + } + + return found; +} + +static int t4_sched_queue_unbind(struct port_info *pi, struct ch_sched_queue *p) +{ + struct adapter *adap = pi->adapter; + struct sched_class *e; + struct sched_queue_entry *qe = NULL; + struct sge_eth_txq *txq; + unsigned int qid; + int index = -1; + int err = 0; + + if (p->queue < 0 || p->queue >= pi->nqsets) + return -ERANGE; + + txq = &adap->sge.ethtxq[pi->first_qset + p->queue]; + qid = txq->q.cntxt_id; + + /* Find the existing class that the queue is bound to */ + e = t4_sched_queue_lookup(pi, qid, &index); + if (e && index >= 0) { + int i = 0; + + spin_lock(&e->lock); + list_for_each_entry(qe, &e->queue_list, list) { + if (i == index) + break; + i++; + } + err = t4_sched_bind_unbind_op(pi, (void *)qe, SCHED_QUEUE, + false); + if (err) { + spin_unlock(&e->lock); + goto out; + } + + list_del(&qe->list); + t4_free_mem(qe); + if (atomic_dec_and_test(&e->refcnt)) { + e->state = SCHED_STATE_UNUSED; + memset(&e->info, 0, sizeof(e->info)); + } + spin_unlock(&e->lock); + } +out: + return err; +} + +static int t4_sched_queue_bind(struct port_info *pi, struct ch_sched_queue *p) +{ + struct adapter *adap = pi->adapter; + struct sched_table *s = pi->sched_tbl; + struct sched_class *e; + struct sched_queue_entry *qe = NULL; + struct sge_eth_txq *txq; + unsigned int qid; + int err = 0; + + if (p->queue < 0 || p->queue >= pi->nqsets) + return -ERANGE; + + qe = t4_alloc_mem(sizeof(struct sched_queue_entry)); + if (!qe) + return -ENOMEM; + + txq = &adap->sge.ethtxq[pi->first_qset + p->queue]; + qid = txq->q.cntxt_id; + + /* Unbind queue from any existing class */ + err = t4_sched_queue_unbind(pi, p); + if (err) + goto out; + + /* Bind queue to specified class */ + memset(qe, 0, sizeof(*qe)); + qe->cntxt_id = qid; + memcpy(&qe->param, p, sizeof(qe->param)); + + e = &s->tab[qe->param.class]; + spin_lock(&e->lock); + err = t4_sched_bind_unbind_op(pi, (void *)qe, SCHED_QUEUE, true); + if (err) { + t4_free_mem(qe); + spin_unlock(&e->lock); + goto out; + } + + list_add_tail(&qe->list, &e->queue_list); + atomic_inc(&e->refcnt); + spin_unlock(&e->lock); +out: + return err; +} + +static void t4_sched_class_unbind_all(struct port_info *pi, + struct sched_class *e, + enum sched_bind_type type) +{ + if (!e) + return; + + switch (type) { + case SCHED_QUEUE: { + struct sched_queue_entry *qe; + + list_for_each_entry(qe, &e->queue_list, list) + t4_sched_queue_unbind(pi, &qe->param); + break; + } + default: + break; + } +} + +static int t4_sched_class_bind_unbind_op(struct port_info *pi, void *arg, + enum sched_bind_type type, bool bind) +{ + int err = 0; + + if (!arg) + return -EINVAL; + + switch (type) { + case SCHED_QUEUE: { + struct ch_sched_queue *qe = (struct ch_sched_queue *)arg; + + if (bind) + err = t4_sched_queue_bind(pi, qe); + else + err = t4_sched_queue_unbind(pi, qe); + break; + } + default: + err = -ENOTSUPP; + break; + } + + return err; +} + +/** + * cxgb4_sched_class_bind - Bind an entity to a scheduling class + * @dev: net_device pointer + * @arg: Entity opaque data + * @type: Entity type (Queue) + * + * Binds an entity (queue) to a scheduling class. If the entity + * is bound to another class, it will be unbound from the other class + * and bound to the class specified in @arg. + */ +int cxgb4_sched_class_bind(struct net_device *dev, void *arg, + enum sched_bind_type type) +{ + struct port_info *pi = netdev2pinfo(dev); + struct sched_table *s; + int err = 0; + u8 class_id; + + if (!can_sched(dev)) + return -ENOTSUPP; + + if (!arg) + return -EINVAL; + + switch (type) { + case SCHED_QUEUE: { + struct ch_sched_queue *qe = (struct ch_sched_queue *)arg; + + class_id = qe->class; + break; + } + default: + return -ENOTSUPP; + } + + if (!valid_class_id(dev, class_id)) + return -EINVAL; + + if (class_id == SCHED_CLS_NONE) + return -ENOTSUPP; + + s = pi->sched_tbl; + write_lock(&s->rw_lock); + err = t4_sched_class_bind_unbind_op(pi, arg, type, true); + write_unlock(&s->rw_lock); + + return err; +} + +/** + * cxgb4_sched_class_unbind - Unbind an entity from a scheduling class + * @dev: net_device pointer + * @arg: Entity opaque data + * @type: Entity type (Queue) + * + * Unbinds an entity (queue) from a scheduling class. + */ +int cxgb4_sched_class_unbind(struct net_device *dev, void *arg, + enum sched_bind_type type) +{ + struct port_info *pi = netdev2pinfo(dev); + struct sched_table *s; + int err = 0; + u8 class_id; + + if (!can_sched(dev)) + return -ENOTSUPP; + + if (!arg) + return -EINVAL; + + switch (type) { + case SCHED_QUEUE: { + struct ch_sched_queue *qe = (struct ch_sched_queue *)arg; + + class_id = qe->class; + break; + } + default: + return -ENOTSUPP; + } + + if (!valid_class_id(dev, class_id)) + return -EINVAL; + + s = pi->sched_tbl; + write_lock(&s->rw_lock); + err = t4_sched_class_bind_unbind_op(pi, arg, type, false); + write_unlock(&s->rw_lock); + + return err; +} + /* If @p is NULL, fetch any available unused class */ static struct sched_class *t4_sched_class_lookup(struct port_info *pi, const struct ch_sched_params *p) @@ -199,6 +505,11 @@ struct sched_class *cxgb4_sched_class_alloc(struct net_device *dev, return t4_sched_class_alloc(pi, p); } +static void t4_sched_class_free(struct port_info *pi, struct sched_class *e) +{ + t4_sched_class_unbind_all(pi, e, SCHED_QUEUE); +} + struct sched_table *t4_init_sched(unsigned int sched_size) { struct sched_table *s; @@ -215,6 +526,7 @@ struct sched_table *t4_init_sched(unsigned int sched_size) memset(&s->tab[i], 0, sizeof(struct sched_class)); s->tab[i].idx = i; s->tab[i].state = SCHED_STATE_UNUSED; + INIT_LIST_HEAD(&s->tab[i].queue_list); spin_lock_init(&s->tab[i].lock); atomic_set(&s->tab[i].refcnt, 0); } @@ -230,6 +542,15 @@ void t4_cleanup_sched(struct adapter *adap) struct port_info *pi = netdev2pinfo(adap->port[i]); s = pi->sched_tbl; + for (i = 0; i < s->sched_size; i++) { + struct sched_class *e; + + write_lock(&s->rw_lock); + e = &s->tab[i]; + if (e->state == SCHED_STATE_ACTIVE) + t4_sched_class_free(pi, e); + write_unlock(&s->rw_lock); + } t4_free_mem(s); } } diff --git a/drivers/net/ethernet/chelsio/cxgb4/sched.h b/drivers/net/ethernet/chelsio/cxgb4/sched.h index 60b09e965d34..ac415eb24ac7 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sched.h +++ b/drivers/net/ethernet/chelsio/cxgb4/sched.h @@ -40,6 +40,8 @@ #define SCHED_CLS_NONE 0xff +#define FW_SCHED_CLS_NONE 0xffffffff + enum { SCHED_STATE_ACTIVE, SCHED_STATE_UNUSED, @@ -49,10 +51,21 @@ enum sched_fw_ops { SCHED_FW_OP_ADD, }; +enum sched_bind_type { + SCHED_QUEUE, +}; + +struct sched_queue_entry { + struct list_head list; + unsigned int cntxt_id; + struct ch_sched_queue param; +}; + struct sched_class { u8 state; u8 idx; struct ch_sched_params info; + struct list_head queue_list; spinlock_t lock; /* Per class lock */ atomic_t refcnt; }; @@ -81,6 +94,11 @@ static inline bool valid_class_id(struct net_device *dev, u8 class_id) return true; } +int cxgb4_sched_class_bind(struct net_device *dev, void *arg, + enum sched_bind_type type); +int cxgb4_sched_class_unbind(struct net_device *dev, void *arg, + enum sched_bind_type type); + struct sched_class *cxgb4_sched_class_alloc(struct net_device *dev, struct ch_sched_params *p); From 10a2604ea2eac8af2bf7fa5eb11e4fcb6bc336d5 Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Mon, 22 Aug 2016 16:29:08 +0530 Subject: [PATCH 0404/1715] cxgb4: add support for tx max rate limiting Implement set_tx_maxrate NDO to perform per queue tx rate limiting. Signed-off-by: Rahul Lakkireddy Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 20 +++++ .../net/ethernet/chelsio/cxgb4/cxgb4_main.c | 82 +++++++++++++++++++ drivers/net/ethernet/chelsio/cxgb4/sched.h | 3 + 3 files changed, 105 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index eb30612c5372..f988c600c68d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -881,6 +881,26 @@ struct ch_sched_params { } u; }; +enum { + SCHED_CLASS_TYPE_PACKET = 0, /* class type */ +}; + +enum { + SCHED_CLASS_LEVEL_CL_RL = 0, /* class rate limiter */ +}; + +enum { + SCHED_CLASS_MODE_CLASS = 0, /* per-class scheduling */ +}; + +enum { + SCHED_CLASS_RATEUNIT_BITS = 0, /* bit rate scheduling */ +}; + +enum { + SCHED_CLASS_RATEMODE_ABS = 1, /* Kb/s */ +}; + /* Support for "sched_queue" command to allow one or more NIC TX Queues * to be bound to a TX Scheduling Class. */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 2e341bf60ed0..be5c942ad8e0 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -3140,6 +3140,87 @@ static void cxgb_netpoll(struct net_device *dev) } #endif +static int cxgb_set_tx_maxrate(struct net_device *dev, int index, u32 rate) +{ + struct port_info *pi = netdev_priv(dev); + struct adapter *adap = pi->adapter; + struct sched_class *e; + struct ch_sched_params p; + struct ch_sched_queue qe; + u32 req_rate; + int err = 0; + + if (!can_sched(dev)) + return -ENOTSUPP; + + if (index < 0 || index > pi->nqsets - 1) + return -EINVAL; + + if (!(adap->flags & FULL_INIT_DONE)) { + dev_err(adap->pdev_dev, + "Failed to rate limit on queue %d. Link Down?\n", + index); + return -EINVAL; + } + + /* Convert from Mbps to Kbps */ + req_rate = rate << 10; + + /* Max rate is 10 Gbps */ + if (req_rate >= SCHED_MAX_RATE_KBPS) { + dev_err(adap->pdev_dev, + "Invalid rate %u Mbps, Max rate is %u Gbps\n", + rate, SCHED_MAX_RATE_KBPS); + return -ERANGE; + } + + /* First unbind the queue from any existing class */ + memset(&qe, 0, sizeof(qe)); + qe.queue = index; + qe.class = SCHED_CLS_NONE; + + err = cxgb4_sched_class_unbind(dev, (void *)(&qe), SCHED_QUEUE); + if (err) { + dev_err(adap->pdev_dev, + "Unbinding Queue %d on port %d fail. Err: %d\n", + index, pi->port_id, err); + return err; + } + + /* Queue already unbound */ + if (!req_rate) + return 0; + + /* Fetch any available unused or matching scheduling class */ + memset(&p, 0, sizeof(p)); + p.type = SCHED_CLASS_TYPE_PACKET; + p.u.params.level = SCHED_CLASS_LEVEL_CL_RL; + p.u.params.mode = SCHED_CLASS_MODE_CLASS; + p.u.params.rateunit = SCHED_CLASS_RATEUNIT_BITS; + p.u.params.ratemode = SCHED_CLASS_RATEMODE_ABS; + p.u.params.channel = pi->tx_chan; + p.u.params.class = SCHED_CLS_NONE; + p.u.params.minrate = 0; + p.u.params.maxrate = req_rate; + p.u.params.weight = 0; + p.u.params.pktsize = dev->mtu; + + e = cxgb4_sched_class_alloc(dev, &p); + if (!e) + return -ENOMEM; + + /* Bind the queue to a scheduling class */ + memset(&qe, 0, sizeof(qe)); + qe.queue = index; + qe.class = e->idx; + + err = cxgb4_sched_class_bind(dev, (void *)(&qe), SCHED_QUEUE); + if (err) + dev_err(adap->pdev_dev, + "Queue rate limiting failed. Err: %d\n", err); + return err; +} + static const struct net_device_ops cxgb4_netdev_ops = { .ndo_open = cxgb_open, .ndo_stop = cxgb_close, @@ -3162,6 +3243,7 @@ static const struct net_device_ops cxgb4_netdev_ops = { #ifdef CONFIG_NET_RX_BUSY_POLL .ndo_busy_poll = cxgb_busy_poll, #endif + .ndo_set_tx_maxrate = cxgb_set_tx_maxrate, }; static const struct net_device_ops cxgb4_mgmt_netdev_ops = { diff --git a/drivers/net/ethernet/chelsio/cxgb4/sched.h b/drivers/net/ethernet/chelsio/cxgb4/sched.h index ac415eb24ac7..77b2b3fd9021 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sched.h +++ b/drivers/net/ethernet/chelsio/cxgb4/sched.h @@ -42,6 +42,9 @@ #define FW_SCHED_CLS_NONE 0xffffffff +/* Max rate that can be set to a scheduling class is 10 Gbps */ +#define SCHED_MAX_RATE_KBPS 10000000U + enum { SCHED_STATE_ACTIVE, SCHED_STATE_UNUSED, From 1ae292a2457cd692828da2be87cb967260993ad0 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 22 Aug 2016 15:01:03 +0200 Subject: [PATCH 0405/1715] net: ipconfig: Fix NULL pointer dereference on RARP/BOOTP/DHCP timeout If no RARP, BOOTP, or DHCP response is received, ic_dev is never set, causing a NULL pointer dereference in ic_close_devs(): Sending DHCP requests ...... timed out! Unable to handle kernel NULL pointer dereference at virtual address 00000004 To fix this, add a check to avoid dereferencing ic_dev if it is still NULL. Signed-off-by: Geert Uytterhoeven Fixes: 2647cffb2bc6fbed ("net: ipconfig: Support using "delayed" DHCP replies") Signed-off-by: David S. Miller --- net/ipv4/ipconfig.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index ba9cbeafbb2e..071a785c65eb 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -306,7 +306,7 @@ static void __init ic_close_devs(void) while ((d = next)) { next = d->next; dev = d->dev; - if (dev != ic_dev->dev && !netdev_uses_dsa(dev)) { + if ((!ic_dev || dev != ic_dev->dev) && !netdev_uses_dsa(dev)) { pr_debug("IP-Config: Downing %s\n", dev->name); dev_change_flags(dev, d->flags); } From 7b314362a2344feaafbdf6aa8f3d57077728e37a Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 22 Aug 2016 16:01:01 +0200 Subject: [PATCH 0406/1715] net: dsa: Allow the DSA driver to indicate the tag protocol DSA drivers may drive different families of switches which need different tag protocol. Rather than hard code the tag protocol in the driver structure, have a callback for the DSA core to call. Signed-off-by: Andrew Lunn Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 7 ++++++- drivers/net/dsa/bcm_sf2.c | 7 ++++++- drivers/net/dsa/mv88e6060.c | 7 ++++++- drivers/net/dsa/mv88e6xxx/chip.c | 7 ++++++- include/net/dsa.h | 5 +++-- net/dsa/dsa.c | 5 ++++- net/dsa/dsa2.c | 4 +++- 7 files changed, 34 insertions(+), 8 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 38ee10de7884..65ecb51f99e5 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1373,8 +1373,13 @@ static void b53_br_set_stp_state(struct dsa_switch *ds, int port, b53_write8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(port), reg); } +static enum dsa_tag_protocol b53_get_tag_protocol(struct dsa_switch *ds) +{ + return DSA_TAG_PROTO_NONE; +} + static struct dsa_switch_driver b53_switch_ops = { - .tag_protocol = DSA_TAG_PROTO_NONE, + .get_tag_protocol = b53_get_tag_protocol, .setup = b53_setup, .set_addr = b53_set_addr, .get_strings = b53_get_strings, diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 8e6fe13dbec3..b47a74b37a42 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -136,6 +136,11 @@ static int bcm_sf2_sw_get_sset_count(struct dsa_switch *ds) return BCM_SF2_STATS_SIZE; } +static enum dsa_tag_protocol bcm_sf2_sw_get_tag_protocol(struct dsa_switch *ds) +{ + return DSA_TAG_PROTO_BRCM; +} + static void bcm_sf2_imp_vlan_setup(struct dsa_switch *ds, int cpu_port) { struct bcm_sf2_priv *priv = ds_to_priv(ds); @@ -1577,8 +1582,8 @@ static int bcm_sf2_sw_setup(struct dsa_switch *ds) } static struct dsa_switch_driver bcm_sf2_switch_driver = { - .tag_protocol = DSA_TAG_PROTO_BRCM, .setup = bcm_sf2_sw_setup, + .get_tag_protocol = bcm_sf2_sw_get_tag_protocol, .set_addr = bcm_sf2_sw_set_addr, .get_phy_flags = bcm_sf2_sw_get_phy_flags, .get_strings = bcm_sf2_sw_get_strings, diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c index e36b40886bd8..1fdfbf3a50bc 100644 --- a/drivers/net/dsa/mv88e6060.c +++ b/drivers/net/dsa/mv88e6060.c @@ -69,6 +69,11 @@ static const char *mv88e6060_get_name(struct mii_bus *bus, int sw_addr) return NULL; } +static enum dsa_tag_protocol mv88e6060_get_tag_protocol(struct dsa_switch *ds) +{ + return DSA_TAG_PROTO_TRAILER; +} + static const char *mv88e6060_drv_probe(struct device *dsa_dev, struct device *host_dev, int sw_addr, void **_priv) @@ -248,7 +253,7 @@ mv88e6060_phy_write(struct dsa_switch *ds, int port, int regnum, u16 val) } static struct dsa_switch_driver mv88e6060_switch_driver = { - .tag_protocol = DSA_TAG_PROTO_TRAILER, + .get_tag_protocol = mv88e6060_get_tag_protocol, .probe = mv88e6060_drv_probe, .setup = mv88e6060_setup, .set_addr = mv88e6060_set_addr, diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 014b52bd72f1..63cad6c00bc7 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3924,6 +3924,11 @@ static int mv88e6xxx_smi_init(struct mv88e6xxx_chip *chip, return 0; } +static enum dsa_tag_protocol mv88e6xxx_get_tag_protocol(struct dsa_switch *ds) +{ + return DSA_TAG_PROTO_EDSA; +} + static const char *mv88e6xxx_drv_probe(struct device *dsa_dev, struct device *host_dev, int sw_addr, void **priv) @@ -3967,8 +3972,8 @@ free: } static struct dsa_switch_driver mv88e6xxx_switch_driver = { - .tag_protocol = DSA_TAG_PROTO_EDSA, .probe = mv88e6xxx_drv_probe, + .get_tag_protocol = mv88e6xxx_get_tag_protocol, .setup = mv88e6xxx_setup, .set_addr = mv88e6xxx_set_addr, .adjust_link = mv88e6xxx_adjust_link, diff --git a/include/net/dsa.h b/include/net/dsa.h index d00c392bc9f8..8ca2684c5358 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -239,14 +239,15 @@ struct switchdev_obj_port_vlan; struct dsa_switch_driver { struct list_head list; - enum dsa_tag_protocol tag_protocol; - /* * Probing and setup. */ const char *(*probe)(struct device *dsa_dev, struct device *host_dev, int sw_addr, void **priv); + + enum dsa_tag_protocol (*get_tag_protocol)(struct dsa_switch *ds); + int (*setup)(struct dsa_switch *ds); int (*set_addr)(struct dsa_switch *ds, u8 *addr); u32 (*get_phy_flags)(struct dsa_switch *ds, int port); diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 8bda74e595a5..8d3a28d4e99d 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -354,7 +354,10 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) * switch. */ if (dst->cpu_switch == index) { - dst->tag_ops = dsa_resolve_tag_protocol(drv->tag_protocol); + enum dsa_tag_protocol tag_protocol; + + tag_protocol = drv->get_tag_protocol(ds); + dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol); if (IS_ERR(dst->tag_ops)) { ret = PTR_ERR(dst->tag_ops); goto out; diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index f30bad9678f0..2e343221464c 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -443,6 +443,7 @@ static int dsa_cpu_parse(struct device_node *port, u32 index, struct dsa_switch_tree *dst, struct dsa_switch *ds) { + enum dsa_tag_protocol tag_protocol; struct net_device *ethernet_dev; struct device_node *ethernet; @@ -465,7 +466,8 @@ static int dsa_cpu_parse(struct device_node *port, u32 index, dst->cpu_port = index; } - dst->tag_ops = dsa_resolve_tag_protocol(ds->drv->tag_protocol); + tag_protocol = ds->drv->get_tag_protocol(ds); + dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol); if (IS_ERR(dst->tag_ops)) { dev_warn(ds->dev, "No tagger for this switch\n"); return PTR_ERR(dst->tag_ops); From 2bbb33be037361882527c9c762cb9fc928ab0ff7 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 22 Aug 2016 16:01:02 +0200 Subject: [PATCH 0407/1715] net: dsa: mv88e6xxx: Fix support for DSA tagging for older switches. Older chips only support DSA tagging on the CPU port. New devices support both DSA and EDSA. The driver needs to tell the core the tag protocol to use, and configure the switch for what is available. Signed-off-by: Andrew Lunn Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/Kconfig | 1 + drivers/net/dsa/mv88e6xxx/chip.c | 41 +++++++++++---------------- drivers/net/dsa/mv88e6xxx/mv88e6xxx.h | 16 +++++++++-- 3 files changed, 31 insertions(+), 27 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/Kconfig b/drivers/net/dsa/mv88e6xxx/Kconfig index 490bc06f993e..ac77737bbd87 100644 --- a/drivers/net/dsa/mv88e6xxx/Kconfig +++ b/drivers/net/dsa/mv88e6xxx/Kconfig @@ -2,6 +2,7 @@ config NET_DSA_MV88E6XXX tristate "Marvell 88E6xxx Ethernet switch fabric support" depends on NET_DSA select NET_DSA_TAG_EDSA + select NET_DSA_TAG_DSA help This driver adds support for most of the Marvell 88E6xxx models of Ethernet switch chips, except 88E6060. diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 63cad6c00bc7..b315769aa5be 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -2483,28 +2483,13 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) PORT_CONTROL_USE_TAG | PORT_CONTROL_USE_IP | PORT_CONTROL_STATE_FORWARDING; if (dsa_is_cpu_port(ds, port)) { - if (mv88e6xxx_6095_family(chip) || mv88e6xxx_6185_family(chip)) - reg |= PORT_CONTROL_DSA_TAG; - if (mv88e6xxx_6352_family(chip) || - mv88e6xxx_6351_family(chip) || - mv88e6xxx_6165_family(chip) || - mv88e6xxx_6097_family(chip) || - mv88e6xxx_6320_family(chip)) { + if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_EDSA)) reg |= PORT_CONTROL_FRAME_ETHER_TYPE_DSA | PORT_CONTROL_FORWARD_UNKNOWN | PORT_CONTROL_FORWARD_UNKNOWN_MC; - } - - if (mv88e6xxx_6352_family(chip) || - mv88e6xxx_6351_family(chip) || - mv88e6xxx_6165_family(chip) || - mv88e6xxx_6097_family(chip) || - mv88e6xxx_6095_family(chip) || - mv88e6xxx_6065_family(chip) || - mv88e6xxx_6185_family(chip) || - mv88e6xxx_6320_family(chip)) { - reg |= PORT_CONTROL_EGRESS_ADD_TAG; - } + else + reg |= PORT_CONTROL_DSA_TAG; + reg |= PORT_CONTROL_EGRESS_ADD_TAG; } if (dsa_is_dsa_port(ds, port)) { if (mv88e6xxx_6095_family(chip) || @@ -2632,10 +2617,13 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) /* Port Ethertype: use the Ethertype DSA Ethertype * value. */ - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_ETH_TYPE, ETH_P_EDSA); - if (ret) - return ret; + if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_EDSA)) { + ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), + PORT_ETH_TYPE, ETH_P_EDSA); + if (ret) + return ret; + } + /* Tag Remap: use an identity 802.1p prio -> switch * prio mapping. */ @@ -3926,7 +3914,12 @@ static int mv88e6xxx_smi_init(struct mv88e6xxx_chip *chip, static enum dsa_tag_protocol mv88e6xxx_get_tag_protocol(struct dsa_switch *ds) { - return DSA_TAG_PROTO_EDSA; + struct mv88e6xxx_chip *chip = ds_to_priv(ds); + + if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_EDSA)) + return DSA_TAG_PROTO_EDSA; + + return DSA_TAG_PROTO_DSA; } static const char *mv88e6xxx_drv_probe(struct device *dsa_dev, diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h index 1f9bab5891b1..e157d4f69864 100644 --- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h @@ -386,6 +386,12 @@ enum mv88e6xxx_family { }; enum mv88e6xxx_cap { + /* Two different tag protocols can be used by the driver. All + * switches support DSA, but only later generations support + * EDSA. + */ + MV88E6XXX_CAP_EDSA, + /* Energy Efficient Ethernet. */ MV88E6XXX_CAP_EEE, @@ -447,6 +453,7 @@ enum mv88e6xxx_cap { }; /* Bitmask of capabilities */ +#define MV88E6XXX_FLAG_EDSA BIT(MV88E6XXX_CAP_EDSA) #define MV88E6XXX_FLAG_EEE BIT(MV88E6XXX_CAP_EEE) #define MV88E6XXX_FLAG_SMI_CMD BIT(MV88E6XXX_CAP_SMI_CMD) @@ -547,7 +554,8 @@ enum mv88e6xxx_cap { MV88E6XXX_FLAG_VTU) #define MV88E6XXX_FLAGS_FAMILY_6320 \ - (MV88E6XXX_FLAG_EEE | \ + (MV88E6XXX_FLAG_EDSA | \ + MV88E6XXX_FLAG_EEE | \ MV88E6XXX_FLAG_GLOBAL2 | \ MV88E6XXX_FLAG_G2_MGMT_EN_2X | \ MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ @@ -564,7 +572,8 @@ enum mv88e6xxx_cap { MV88E6XXX_FLAGS_SMI_PHY) #define MV88E6XXX_FLAGS_FAMILY_6351 \ - (MV88E6XXX_FLAG_GLOBAL2 | \ + (MV88E6XXX_FLAG_EDSA | \ + MV88E6XXX_FLAG_GLOBAL2 | \ MV88E6XXX_FLAG_G2_MGMT_EN_2X | \ MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ MV88E6XXX_FLAG_G2_SWITCH_MAC | \ @@ -579,7 +588,8 @@ enum mv88e6xxx_cap { MV88E6XXX_FLAGS_SMI_PHY) #define MV88E6XXX_FLAGS_FAMILY_6352 \ - (MV88E6XXX_FLAG_EEE | \ + (MV88E6XXX_FLAG_EDSA | \ + MV88E6XXX_FLAG_EEE | \ MV88E6XXX_FLAG_GLOBAL2 | \ MV88E6XXX_FLAG_G2_MGMT_EN_2X | \ MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ From 930188ceca5fb2f755d4b900a92a6c295f43a434 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 22 Aug 2016 16:01:03 +0200 Subject: [PATCH 0408/1715] dsa: mv88e6xxx: Delete ppu timer when removing module The PPU method of accessing PHYs makes use of a timer. Make sure this timer is deleted before unloading the driver. Reported-by: Jamie Lentin Signed-off-by: Andrew Lunn Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index b315769aa5be..1d5f9576e62a 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -486,6 +486,11 @@ static void mv88e6xxx_ppu_state_init(struct mv88e6xxx_chip *chip) chip->ppu_timer.function = mv88e6xxx_ppu_reenable_timer; } +static void mv88e6xxx_ppu_state_destroy(struct mv88e6xxx_chip *chip) +{ + del_timer_sync(&chip->ppu_timer); +} + static int mv88e6xxx_phy_ppu_read(struct mv88e6xxx_chip *chip, int addr, int reg, u16 *val) { @@ -3892,6 +3897,13 @@ static void mv88e6xxx_phy_init(struct mv88e6xxx_chip *chip) } } +static void mv88e6xxx_phy_destroy(struct mv88e6xxx_chip *chip) +{ + if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU)) { + mv88e6xxx_ppu_state_destroy(chip); + } +} + static int mv88e6xxx_smi_init(struct mv88e6xxx_chip *chip, struct mii_bus *bus, int sw_addr) { @@ -4080,6 +4092,7 @@ static void mv88e6xxx_remove(struct mdio_device *mdiodev) struct dsa_switch *ds = dev_get_drvdata(&mdiodev->dev); struct mv88e6xxx_chip *chip = ds_to_priv(ds); + mv88e6xxx_phy_destroy(chip); mv88e6xxx_unregister_switch(chip); mv88e6xxx_mdio_unregister(chip); } From f027e0cc8255a8af8197835f2f44910dc63e2fa5 Mon Sep 17 00:00:00 2001 From: Jamie Lentin Date: Mon, 22 Aug 2016 16:01:04 +0200 Subject: [PATCH 0409/1715] net: mv88e6xxx: Enable PORT_CONTROL_FORWARD_UNKNOWN for DSA-tagged CPU ports Without it, a mv88e6131 switch will not forward incoming unicast packets to the CPU port. Signed-off-by: Jamie Lentin Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 1d5f9576e62a..82d45165803c 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -2490,11 +2490,11 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) if (dsa_is_cpu_port(ds, port)) { if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_EDSA)) reg |= PORT_CONTROL_FRAME_ETHER_TYPE_DSA | - PORT_CONTROL_FORWARD_UNKNOWN | PORT_CONTROL_FORWARD_UNKNOWN_MC; else reg |= PORT_CONTROL_DSA_TAG; - reg |= PORT_CONTROL_EGRESS_ADD_TAG; + reg |= PORT_CONTROL_EGRESS_ADD_TAG | + PORT_CONTROL_FORWARD_UNKNOWN; } if (dsa_is_dsa_port(ds, port)) { if (mv88e6xxx_6095_family(chip) || From f1ff8666ed87b0013e45ce2d335085407bb38a60 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Tue, 23 Aug 2016 07:19:50 +0300 Subject: [PATCH 0410/1715] qed: Fix address macros Last FW submission reverted various macros into an older form, where they generate compilation warnings on some architectures. Bring back the newer macros instead. Fixes: 05fafbfb3d77 ("qed: utilize FW 8.10.10.0") Reported-by: kbuild test robot Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/common_hsi.h | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h index d306e0b55581..70b30e4d3cc4 100644 --- a/include/linux/qed/common_hsi.h +++ b/include/linux/qed/common_hsi.h @@ -13,23 +13,17 @@ #include /* dma_addr_t manip */ -#define DMA_LO(x) ((u32)(((dma_addr_t)(x)) & 0xffffffff)) -#define DMA_HI(x) ((u32)(((dma_addr_t)(x)) >> 32)) - -#define DMA_LO_LE(x) cpu_to_le32(DMA_LO(x)) -#define DMA_HI_LE(x) cpu_to_le32(DMA_HI(x)) - -/* It's assumed that whoever includes this has previously included an hsi - * file defining the regpair. - */ -#define DMA_REGPAIR_LE(x, val) (x).hi = DMA_HI_LE((val)); \ - (x).lo = DMA_LO_LE((val)) +#define DMA_LO_LE(x) cpu_to_le32(lower_32_bits(x)) +#define DMA_HI_LE(x) cpu_to_le32(upper_32_bits(x)) +#define DMA_REGPAIR_LE(x, val) do { \ + (x).hi = DMA_HI_LE((val)); \ + (x).lo = DMA_LO_LE((val)); \ + } while (0) #define HILO_GEN(hi, lo, type) ((((type)(hi)) << 32) + (lo)) -#define HILO_DMA(hi, lo) HILO_GEN(hi, lo, dma_addr_t) -#define HILO_64(hi, lo) HILO_GEN(hi, lo, u64) -#define HILO_DMA_REGPAIR(regpair) (HILO_DMA(regpair.hi, regpair.lo)) +#define HILO_64(hi, lo) HILO_GEN((le32_to_cpu(hi)), (le32_to_cpu(lo)), u64) #define HILO_64_REGPAIR(regpair) (HILO_64(regpair.hi, regpair.lo)) +#define HILO_DMA_REGPAIR(regpair) ((dma_addr_t)HILO_64_REGPAIR(regpair)) #ifndef __COMMON_HSI__ #define __COMMON_HSI__ From a01512dbe3ec1e7dc58b00161d61ead359f5ac08 Mon Sep 17 00:00:00 2001 From: Dave Watson Date: Mon, 22 Aug 2016 12:27:04 -0700 Subject: [PATCH 0411/1715] net: strparser: fix strparser sk_user_data check sk_user_data mismatch between what kcm expects (psock) and what strparser expects (strparser). Queued rx_work, for example calling strp_check_rcv after socket buffer changes, will never complete. sk_user_data is unused in strparser, so just remove the check. Signed-off-by: Dave Watson Acked-by: Tom Herbert Signed-off-by: David S. Miller --- net/strparser/strparser.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index fd688c0a7744..68334b56db1e 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -390,9 +390,6 @@ static void do_strp_rx_work(struct strparser *strp) */ lock_sock(csk); - if (unlikely(csk->sk_user_data != strp)) - goto out; - if (unlikely(strp->rx_stopped)) goto out; From 3802dce178d244c02c6b11fdcbbd202ceac37f0a Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Mon, 22 Aug 2016 21:18:24 +0300 Subject: [PATCH 0412/1715] net: ethernet: ti: davinci_cpdma: split descs num between all channels Tx channels share same pool of descriptors. Thus one channel can block another if pool is emptied by one. But, the shaper should decide which channel is allowed to send packets. To avoid such impact of one channel on another, let every channel to have its own piece of pool. Signed-off-by: Ivan Khoronzhuk Reviewed-by: Mugunthan V N Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 62 +++++++++++++++---------- drivers/net/ethernet/ti/davinci_cpdma.c | 46 +++++++++++++++++- drivers/net/ethernet/ti/davinci_cpdma.h | 2 +- 3 files changed, 83 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 421ebdaf1208..4065f9662bad 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1213,6 +1213,40 @@ static void cpsw_init_host_port(struct cpsw_priv *priv) } } +static int cpsw_fill_rx_channels(struct cpsw_priv *priv) +{ + struct cpsw_common *cpsw = priv->cpsw; + struct sk_buff *skb; + int ch_buf_num; + int i, ret; + + ch_buf_num = cpdma_chan_get_rx_buf_num(cpsw->rxch); + for (i = 0; i < ch_buf_num; i++) { + skb = __netdev_alloc_skb_ip_align(priv->ndev, + cpsw->rx_packet_max, + GFP_KERNEL); + if (!skb) { + cpsw_err(priv, ifup, "cannot allocate skb\n"); + return -ENOMEM; + } + + ret = cpdma_chan_submit(cpsw->rxch, skb, skb->data, + skb_tailroom(skb), 0); + if (ret < 0) { + cpsw_err(priv, ifup, + "cannot submit skb to rx channel, error %d\n", + ret); + kfree_skb(skb); + return ret; + } + kmemleak_not_leak(skb); + } + + cpsw_info(priv, ifup, "submitted %d rx descriptors\n", ch_buf_num); + + return ch_buf_num; +} + static void cpsw_slave_stop(struct cpsw_slave *slave, struct cpsw_common *cpsw) { u32 slave_port; @@ -1233,7 +1267,7 @@ static int cpsw_ndo_open(struct net_device *ndev) { struct cpsw_priv *priv = netdev_priv(ndev); struct cpsw_common *cpsw = priv->cpsw; - int i, ret; + int ret; u32 reg; ret = pm_runtime_get_sync(cpsw->dev); @@ -1265,8 +1299,6 @@ static int cpsw_ndo_open(struct net_device *ndev) ALE_ALL_PORTS, ALE_ALL_PORTS, 0, 0); if (!cpsw_common_res_usage_state(cpsw)) { - int buf_num; - /* setup tx dma to fixed prio and zero offset */ cpdma_control_set(cpsw->dma, CPDMA_TX_PRIO_FIXED, 1); cpdma_control_set(cpsw->dma, CPDMA_RX_BUFFER_OFFSET, 0); @@ -1293,27 +1325,9 @@ static int cpsw_ndo_open(struct net_device *ndev) enable_irq(cpsw->irqs_table[0]); } - buf_num = cpdma_chan_get_rx_buf_num(cpsw->dma); - for (i = 0; i < buf_num; i++) { - struct sk_buff *skb; - - ret = -ENOMEM; - skb = __netdev_alloc_skb_ip_align(priv->ndev, - cpsw->rx_packet_max, GFP_KERNEL); - if (!skb) - goto err_cleanup; - ret = cpdma_chan_submit(cpsw->rxch, skb, skb->data, - skb_tailroom(skb), 0); - if (ret < 0) { - kfree_skb(skb); - goto err_cleanup; - } - kmemleak_not_leak(skb); - } - /* continue even if we didn't manage to submit all - * receive descs - */ - cpsw_info(priv, ifup, "submitted %d rx descriptors\n", i); + ret = cpsw_fill_rx_channels(priv); + if (ret < 0) + goto err_cleanup; if (cpts_register(cpsw->dev, cpsw->cpts, cpsw->data.cpts_clock_mult, diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c b/drivers/net/ethernet/ti/davinci_cpdma.c index cf72b3390d85..167fd659319d 100644 --- a/drivers/net/ethernet/ti/davinci_cpdma.c +++ b/drivers/net/ethernet/ti/davinci_cpdma.c @@ -104,6 +104,7 @@ struct cpdma_ctlr { struct cpdma_desc_pool *pool; spinlock_t lock; struct cpdma_chan *channels[2 * CPDMA_MAX_CHANNELS]; + int chan_num; }; struct cpdma_chan { @@ -256,6 +257,7 @@ struct cpdma_ctlr *cpdma_ctlr_create(struct cpdma_params *params) ctlr->state = CPDMA_STATE_IDLE; ctlr->params = *params; ctlr->dev = params->dev; + ctlr->chan_num = 0; spin_lock_init(&ctlr->lock); ctlr->pool = cpdma_desc_pool_create(ctlr->dev, @@ -399,6 +401,31 @@ void cpdma_ctlr_eoi(struct cpdma_ctlr *ctlr, u32 value) } EXPORT_SYMBOL_GPL(cpdma_ctlr_eoi); +/** + * cpdma_chan_split_pool - Splits ctrl pool between all channels. + * Has to be called under ctlr lock + */ +static void cpdma_chan_split_pool(struct cpdma_ctlr *ctlr) +{ + struct cpdma_desc_pool *pool = ctlr->pool; + struct cpdma_chan *chan; + int ch_desc_num; + int i; + + if (!ctlr->chan_num) + return; + + /* calculate average size of pool slice */ + ch_desc_num = pool->num_desc / ctlr->chan_num; + + /* split ctlr pool */ + for (i = 0; i < ARRAY_SIZE(ctlr->channels); i++) { + chan = ctlr->channels[i]; + if (chan) + chan->desc_num = ch_desc_num; + } +} + struct cpdma_chan *cpdma_chan_create(struct cpdma_ctlr *ctlr, int chan_num, cpdma_handler_fn handler) { @@ -447,14 +474,25 @@ struct cpdma_chan *cpdma_chan_create(struct cpdma_ctlr *ctlr, int chan_num, spin_lock_init(&chan->lock); ctlr->channels[chan_num] = chan; + ctlr->chan_num++; + + cpdma_chan_split_pool(ctlr); + spin_unlock_irqrestore(&ctlr->lock, flags); return chan; } EXPORT_SYMBOL_GPL(cpdma_chan_create); -int cpdma_chan_get_rx_buf_num(struct cpdma_ctlr *ctlr) +int cpdma_chan_get_rx_buf_num(struct cpdma_chan *chan) { - return ctlr->pool->num_desc / 2; + unsigned long flags; + int desc_num; + + spin_lock_irqsave(&chan->lock, flags); + desc_num = chan->desc_num; + spin_unlock_irqrestore(&chan->lock, flags); + + return desc_num; } EXPORT_SYMBOL_GPL(cpdma_chan_get_rx_buf_num); @@ -471,6 +509,10 @@ int cpdma_chan_destroy(struct cpdma_chan *chan) if (chan->state != CPDMA_STATE_IDLE) cpdma_chan_stop(chan); ctlr->channels[chan->chan_num] = NULL; + ctlr->chan_num--; + + cpdma_chan_split_pool(ctlr); + spin_unlock_irqrestore(&ctlr->lock, flags); return 0; } diff --git a/drivers/net/ethernet/ti/davinci_cpdma.h b/drivers/net/ethernet/ti/davinci_cpdma.h index 4b46cd6e9a3f..9119b43f4f7d 100644 --- a/drivers/net/ethernet/ti/davinci_cpdma.h +++ b/drivers/net/ethernet/ti/davinci_cpdma.h @@ -80,7 +80,7 @@ int cpdma_ctlr_stop(struct cpdma_ctlr *ctlr); struct cpdma_chan *cpdma_chan_create(struct cpdma_ctlr *ctlr, int chan_num, cpdma_handler_fn handler); -int cpdma_chan_get_rx_buf_num(struct cpdma_ctlr *ctlr); +int cpdma_chan_get_rx_buf_num(struct cpdma_chan *chan); int cpdma_chan_destroy(struct cpdma_chan *chan); int cpdma_chan_start(struct cpdma_chan *chan); int cpdma_chan_stop(struct cpdma_chan *chan); From 080d5c5ac8e4cfab4f3b1239667cf422925efcc3 Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Mon, 22 Aug 2016 21:18:25 +0300 Subject: [PATCH 0413/1715] net: ethernet: ti: davinci_cpdma: fix locking while ctrl_stop The interrupts shouldn't be disabled while receiving skb, but while ctrl_stop, the channels are stopped and all remaining packets are handled with netif_receive_skb(), it can cause WARN_ONCE when ctrl is stopping while not all packets were handled with NAPIs: lock_irq_save cpdma_ctlr_stop cpdma_chan_top __cpdma_chan_free cpsw_rx_handler netif_receive_skb So, split locking while ctrl stop thus interrupts are still enabled while skbs handling. It can cause WARN_ONCE in rare cases when ctrl is stopping while not all packets were handled with NAPIs. Reviewed-by: Mugunthan V N Signed-off-by: Ivan Khoronzhuk Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/davinci_cpdma.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c b/drivers/net/ethernet/ti/davinci_cpdma.c index 167fd659319d..ffb32af94f4c 100644 --- a/drivers/net/ethernet/ti/davinci_cpdma.c +++ b/drivers/net/ethernet/ti/davinci_cpdma.c @@ -334,12 +334,14 @@ int cpdma_ctlr_stop(struct cpdma_ctlr *ctlr) } ctlr->state = CPDMA_STATE_TEARDOWN; + spin_unlock_irqrestore(&ctlr->lock, flags); for (i = 0; i < ARRAY_SIZE(ctlr->channels); i++) { if (ctlr->channels[i]) cpdma_chan_stop(ctlr->channels[i]); } + spin_lock_irqsave(&ctlr->lock, flags); dma_reg_write(ctlr, CPDMA_RXINTMASKCLEAR, 0xffffffff); dma_reg_write(ctlr, CPDMA_TXINTMASKCLEAR, 0xffffffff); From e05107e6b74700762e2feda0abd2e74984c24227 Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Mon, 22 Aug 2016 21:18:26 +0300 Subject: [PATCH 0414/1715] net: ethernet: ti: cpsw: add multi queue support The cpsw h/w supports up to 8 tx and 8 rx channels. This patch adds multi-queue support to the driver only, shaper configuration will be added with separate patch series. Default shaper mode, as before, priority mode, but with corrected priority order, 0 - is highest priority, 7 - lowest. The poll function handles all unprocessed channels, till all of them are free, beginning from hi priority channel. In dual_emac mode the channels are shared between two network devices, as it's with single-queue default mode. The statistic for every channel can be read with: $ ethtool -S ethX Signed-off-by: Ivan Khoronzhuk Reviewed-by: Mugunthan V N Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 297 ++++++++++++++++-------- drivers/net/ethernet/ti/davinci_cpdma.c | 12 + drivers/net/ethernet/ti/davinci_cpdma.h | 2 + 3 files changed, 208 insertions(+), 103 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 4065f9662bad..28cc716f22c9 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -124,7 +124,7 @@ do { \ #define RX_PRIORITY_MAPPING 0x76543210 #define TX_PRIORITY_MAPPING 0x33221100 -#define CPDMA_TX_PRIORITY_MAP 0x76543210 +#define CPDMA_TX_PRIORITY_MAP 0x01234567 #define CPSW_VLAN_AWARE BIT(1) #define CPSW_ALE_VLAN_AWARE 1 @@ -144,6 +144,7 @@ do { \ ((cpsw->data.dual_emac) ? priv->emac_port : \ cpsw->data.active_slave) #define IRQ_NUM 2 +#define CPSW_MAX_QUEUES 8 static int debug_level; module_param(debug_level, int, 0); @@ -379,13 +380,15 @@ struct cpsw_common { int rx_packet_max; struct cpsw_slave *slaves; struct cpdma_ctlr *dma; - struct cpdma_chan *txch, *rxch; + struct cpdma_chan *txch[CPSW_MAX_QUEUES]; + struct cpdma_chan *rxch[CPSW_MAX_QUEUES]; struct cpsw_ale *ale; bool quirk_irq; bool rx_irq_disabled; bool tx_irq_disabled; u32 irqs_table[IRQ_NUM]; struct cpts *cpts; + int rx_ch_num, tx_ch_num; }; struct cpsw_priv { @@ -457,35 +460,26 @@ static const struct cpsw_stats cpsw_gstrings_stats[] = { { "Rx Start of Frame Overruns", CPSW_STAT(rxsofoverruns) }, { "Rx Middle of Frame Overruns", CPSW_STAT(rxmofoverruns) }, { "Rx DMA Overruns", CPSW_STAT(rxdmaoverruns) }, - { "Rx DMA chan: head_enqueue", CPDMA_RX_STAT(head_enqueue) }, - { "Rx DMA chan: tail_enqueue", CPDMA_RX_STAT(tail_enqueue) }, - { "Rx DMA chan: pad_enqueue", CPDMA_RX_STAT(pad_enqueue) }, - { "Rx DMA chan: misqueued", CPDMA_RX_STAT(misqueued) }, - { "Rx DMA chan: desc_alloc_fail", CPDMA_RX_STAT(desc_alloc_fail) }, - { "Rx DMA chan: pad_alloc_fail", CPDMA_RX_STAT(pad_alloc_fail) }, - { "Rx DMA chan: runt_receive_buf", CPDMA_RX_STAT(runt_receive_buff) }, - { "Rx DMA chan: runt_transmit_buf", CPDMA_RX_STAT(runt_transmit_buff) }, - { "Rx DMA chan: empty_dequeue", CPDMA_RX_STAT(empty_dequeue) }, - { "Rx DMA chan: busy_dequeue", CPDMA_RX_STAT(busy_dequeue) }, - { "Rx DMA chan: good_dequeue", CPDMA_RX_STAT(good_dequeue) }, - { "Rx DMA chan: requeue", CPDMA_RX_STAT(requeue) }, - { "Rx DMA chan: teardown_dequeue", CPDMA_RX_STAT(teardown_dequeue) }, - { "Tx DMA chan: head_enqueue", CPDMA_TX_STAT(head_enqueue) }, - { "Tx DMA chan: tail_enqueue", CPDMA_TX_STAT(tail_enqueue) }, - { "Tx DMA chan: pad_enqueue", CPDMA_TX_STAT(pad_enqueue) }, - { "Tx DMA chan: misqueued", CPDMA_TX_STAT(misqueued) }, - { "Tx DMA chan: desc_alloc_fail", CPDMA_TX_STAT(desc_alloc_fail) }, - { "Tx DMA chan: pad_alloc_fail", CPDMA_TX_STAT(pad_alloc_fail) }, - { "Tx DMA chan: runt_receive_buf", CPDMA_TX_STAT(runt_receive_buff) }, - { "Tx DMA chan: runt_transmit_buf", CPDMA_TX_STAT(runt_transmit_buff) }, - { "Tx DMA chan: empty_dequeue", CPDMA_TX_STAT(empty_dequeue) }, - { "Tx DMA chan: busy_dequeue", CPDMA_TX_STAT(busy_dequeue) }, - { "Tx DMA chan: good_dequeue", CPDMA_TX_STAT(good_dequeue) }, - { "Tx DMA chan: requeue", CPDMA_TX_STAT(requeue) }, - { "Tx DMA chan: teardown_dequeue", CPDMA_TX_STAT(teardown_dequeue) }, }; -#define CPSW_STATS_LEN ARRAY_SIZE(cpsw_gstrings_stats) +static const struct cpsw_stats cpsw_gstrings_ch_stats[] = { + { "head_enqueue", CPDMA_RX_STAT(head_enqueue) }, + { "tail_enqueue", CPDMA_RX_STAT(tail_enqueue) }, + { "pad_enqueue", CPDMA_RX_STAT(pad_enqueue) }, + { "misqueued", CPDMA_RX_STAT(misqueued) }, + { "desc_alloc_fail", CPDMA_RX_STAT(desc_alloc_fail) }, + { "pad_alloc_fail", CPDMA_RX_STAT(pad_alloc_fail) }, + { "runt_receive_buf", CPDMA_RX_STAT(runt_receive_buff) }, + { "runt_transmit_buf", CPDMA_RX_STAT(runt_transmit_buff) }, + { "empty_dequeue", CPDMA_RX_STAT(empty_dequeue) }, + { "busy_dequeue", CPDMA_RX_STAT(busy_dequeue) }, + { "good_dequeue", CPDMA_RX_STAT(good_dequeue) }, + { "requeue", CPDMA_RX_STAT(requeue) }, + { "teardown_dequeue", CPDMA_RX_STAT(teardown_dequeue) }, +}; + +#define CPSW_STATS_COMMON_LEN ARRAY_SIZE(cpsw_gstrings_stats) +#define CPSW_STATS_CH_LEN ARRAY_SIZE(cpsw_gstrings_ch_stats) #define ndev_to_cpsw(ndev) (((struct cpsw_priv *)netdev_priv(ndev))->cpsw) #define napi_to_cpsw(napi) container_of(napi, struct cpsw_common, napi) @@ -669,6 +663,7 @@ static void cpsw_intr_disable(struct cpsw_common *cpsw) static void cpsw_tx_handler(void *token, int len, int status) { + struct netdev_queue *txq; struct sk_buff *skb = token; struct net_device *ndev = skb->dev; struct cpsw_common *cpsw = ndev_to_cpsw(ndev); @@ -676,8 +671,10 @@ static void cpsw_tx_handler(void *token, int len, int status) /* Check whether the queue is stopped due to stalled tx dma, if the * queue is stopped then start the queue as we have free desc for tx */ - if (unlikely(netif_queue_stopped(ndev))) - netif_wake_queue(ndev); + txq = netdev_get_tx_queue(ndev, skb_get_queue_mapping(skb)); + if (unlikely(netif_tx_queue_stopped(txq))) + netif_tx_wake_queue(txq); + cpts_tx_timestamp(cpsw->cpts, skb); ndev->stats.tx_packets++; ndev->stats.tx_bytes += len; @@ -686,6 +683,7 @@ static void cpsw_tx_handler(void *token, int len, int status) static void cpsw_rx_handler(void *token, int len, int status) { + struct cpdma_chan *ch; struct sk_buff *skb = token; struct sk_buff *new_skb; struct net_device *ndev = skb->dev; @@ -724,6 +722,7 @@ static void cpsw_rx_handler(void *token, int len, int status) new_skb = netdev_alloc_skb_ip_align(ndev, cpsw->rx_packet_max); if (new_skb) { + skb_copy_queue_mapping(new_skb, skb); skb_put(skb, len); cpts_rx_timestamp(cpsw->cpts, skb); skb->protocol = eth_type_trans(skb, ndev); @@ -737,7 +736,8 @@ static void cpsw_rx_handler(void *token, int len, int status) } requeue: - ret = cpdma_chan_submit(cpsw->rxch, new_skb, new_skb->data, + ch = cpsw->rxch[skb_get_queue_mapping(new_skb)]; + ret = cpdma_chan_submit(ch, new_skb, new_skb->data, skb_tailroom(new_skb), 0); if (WARN_ON(ret < 0)) dev_kfree_skb_any(new_skb); @@ -777,10 +777,27 @@ static irqreturn_t cpsw_rx_interrupt(int irq, void *dev_id) static int cpsw_tx_poll(struct napi_struct *napi_tx, int budget) { + u32 ch_map; + int num_tx, ch; struct cpsw_common *cpsw = napi_to_cpsw(napi_tx); - int num_tx; - num_tx = cpdma_chan_process(cpsw->txch, budget); + /* process every unprocessed channel */ + ch_map = cpdma_ctrl_txchs_state(cpsw->dma); + for (ch = 0, num_tx = 0; num_tx < budget; ch_map >>= 1, ch++) { + if (!ch_map) { + ch_map = cpdma_ctrl_txchs_state(cpsw->dma); + if (!ch_map) + break; + + ch = 0; + } + + if (!(ch_map & 0x01)) + continue; + + num_tx += cpdma_chan_process(cpsw->txch[ch], budget - num_tx); + } + if (num_tx < budget) { napi_complete(napi_tx); writel(0xff, &cpsw->wr_regs->tx_en); @@ -795,10 +812,27 @@ static int cpsw_tx_poll(struct napi_struct *napi_tx, int budget) static int cpsw_rx_poll(struct napi_struct *napi_rx, int budget) { + u32 ch_map; + int num_rx, ch; struct cpsw_common *cpsw = napi_to_cpsw(napi_rx); - int num_rx; - num_rx = cpdma_chan_process(cpsw->rxch, budget); + /* process every unprocessed channel */ + ch_map = cpdma_ctrl_rxchs_state(cpsw->dma); + for (ch = 0, num_rx = 0; num_rx < budget; ch_map >>= 1, ch++) { + if (!ch_map) { + ch_map = cpdma_ctrl_rxchs_state(cpsw->dma); + if (!ch_map) + break; + + ch = 0; + } + + if (!(ch_map & 0x01)) + continue; + + num_rx += cpdma_chan_process(cpsw->rxch[ch], budget - num_rx); + } + if (num_rx < budget) { napi_complete(napi_rx); writel(0xff, &cpsw->wr_regs->rx_en); @@ -897,10 +931,10 @@ static void cpsw_adjust_link(struct net_device *ndev) if (link) { netif_carrier_on(ndev); if (netif_running(ndev)) - netif_wake_queue(ndev); + netif_tx_wake_all_queues(ndev); } else { netif_carrier_off(ndev); - netif_stop_queue(ndev); + netif_tx_stop_all_queues(ndev); } } @@ -973,26 +1007,51 @@ update_return: static int cpsw_get_sset_count(struct net_device *ndev, int sset) { + struct cpsw_common *cpsw = ndev_to_cpsw(ndev); + switch (sset) { case ETH_SS_STATS: - return CPSW_STATS_LEN; + return (CPSW_STATS_COMMON_LEN + + (cpsw->rx_ch_num + cpsw->tx_ch_num) * + CPSW_STATS_CH_LEN); default: return -EOPNOTSUPP; } } +static void cpsw_add_ch_strings(u8 **p, int ch_num, int rx_dir) +{ + int ch_stats_len; + int line; + int i; + + ch_stats_len = CPSW_STATS_CH_LEN * ch_num; + for (i = 0; i < ch_stats_len; i++) { + line = i % CPSW_STATS_CH_LEN; + snprintf(*p, ETH_GSTRING_LEN, + "%s DMA chan %d: %s", rx_dir ? "Rx" : "Tx", + i / CPSW_STATS_CH_LEN, + cpsw_gstrings_ch_stats[line].stat_string); + *p += ETH_GSTRING_LEN; + } +} + static void cpsw_get_strings(struct net_device *ndev, u32 stringset, u8 *data) { + struct cpsw_common *cpsw = ndev_to_cpsw(ndev); u8 *p = data; int i; switch (stringset) { case ETH_SS_STATS: - for (i = 0; i < CPSW_STATS_LEN; i++) { + for (i = 0; i < CPSW_STATS_COMMON_LEN; i++) { memcpy(p, cpsw_gstrings_stats[i].stat_string, ETH_GSTRING_LEN); p += ETH_GSTRING_LEN; } + + cpsw_add_ch_strings(&p, cpsw->rx_ch_num, 1); + cpsw_add_ch_strings(&p, cpsw->tx_ch_num, 0); break; } } @@ -1000,36 +1059,31 @@ static void cpsw_get_strings(struct net_device *ndev, u32 stringset, u8 *data) static void cpsw_get_ethtool_stats(struct net_device *ndev, struct ethtool_stats *stats, u64 *data) { - struct cpdma_chan_stats rx_stats; - struct cpdma_chan_stats tx_stats; - u32 val; u8 *p; - int i; struct cpsw_common *cpsw = ndev_to_cpsw(ndev); + struct cpdma_chan_stats ch_stats; + int i, l, ch; /* Collect Davinci CPDMA stats for Rx and Tx Channel */ - cpdma_chan_get_stats(cpsw->rxch, &rx_stats); - cpdma_chan_get_stats(cpsw->txch, &tx_stats); + for (l = 0; l < CPSW_STATS_COMMON_LEN; l++) + data[l] = readl(cpsw->hw_stats + + cpsw_gstrings_stats[l].stat_offset); - for (i = 0; i < CPSW_STATS_LEN; i++) { - switch (cpsw_gstrings_stats[i].type) { - case CPSW_STATS: - val = readl(cpsw->hw_stats + - cpsw_gstrings_stats[i].stat_offset); - data[i] = val; - break; + for (ch = 0; ch < cpsw->rx_ch_num; ch++) { + cpdma_chan_get_stats(cpsw->rxch[ch], &ch_stats); + for (i = 0; i < CPSW_STATS_CH_LEN; i++, l++) { + p = (u8 *)&ch_stats + + cpsw_gstrings_ch_stats[i].stat_offset; + data[l] = *(u32 *)p; + } + } - case CPDMA_RX_STATS: - p = (u8 *)&rx_stats + - cpsw_gstrings_stats[i].stat_offset; - data[i] = *(u32 *)p; - break; - - case CPDMA_TX_STATS: - p = (u8 *)&tx_stats + - cpsw_gstrings_stats[i].stat_offset; - data[i] = *(u32 *)p; - break; + for (ch = 0; ch < cpsw->tx_ch_num; ch++) { + cpdma_chan_get_stats(cpsw->txch[ch], &ch_stats); + for (i = 0; i < CPSW_STATS_CH_LEN; i++, l++) { + p = (u8 *)&ch_stats + + cpsw_gstrings_ch_stats[i].stat_offset; + data[l] = *(u32 *)p; } } } @@ -1050,11 +1104,12 @@ static int cpsw_common_res_usage_state(struct cpsw_common *cpsw) } static inline int cpsw_tx_packet_submit(struct cpsw_priv *priv, - struct sk_buff *skb) + struct sk_buff *skb, + struct cpdma_chan *txch) { struct cpsw_common *cpsw = priv->cpsw; - return cpdma_chan_submit(cpsw->txch, skb, skb->data, skb->len, + return cpdma_chan_submit(txch, skb, skb->data, skb->len, priv->emac_port + cpsw->data.dual_emac); } @@ -1218,33 +1273,37 @@ static int cpsw_fill_rx_channels(struct cpsw_priv *priv) struct cpsw_common *cpsw = priv->cpsw; struct sk_buff *skb; int ch_buf_num; - int i, ret; + int ch, i, ret; - ch_buf_num = cpdma_chan_get_rx_buf_num(cpsw->rxch); - for (i = 0; i < ch_buf_num; i++) { - skb = __netdev_alloc_skb_ip_align(priv->ndev, - cpsw->rx_packet_max, - GFP_KERNEL); - if (!skb) { - cpsw_err(priv, ifup, "cannot allocate skb\n"); - return -ENOMEM; + for (ch = 0; ch < cpsw->rx_ch_num; ch++) { + ch_buf_num = cpdma_chan_get_rx_buf_num(cpsw->rxch[ch]); + for (i = 0; i < ch_buf_num; i++) { + skb = __netdev_alloc_skb_ip_align(priv->ndev, + cpsw->rx_packet_max, + GFP_KERNEL); + if (!skb) { + cpsw_err(priv, ifup, "cannot allocate skb\n"); + return -ENOMEM; + } + + skb_set_queue_mapping(skb, ch); + ret = cpdma_chan_submit(cpsw->rxch[ch], skb, skb->data, + skb_tailroom(skb), 0); + if (ret < 0) { + cpsw_err(priv, ifup, + "cannot submit skb to channel %d rx, error %d\n", + ch, ret); + kfree_skb(skb); + return ret; + } + kmemleak_not_leak(skb); } - ret = cpdma_chan_submit(cpsw->rxch, skb, skb->data, - skb_tailroom(skb), 0); - if (ret < 0) { - cpsw_err(priv, ifup, - "cannot submit skb to rx channel, error %d\n", - ret); - kfree_skb(skb); - return ret; - } - kmemleak_not_leak(skb); + cpsw_info(priv, ifup, "ch %d rx, submitted %d descriptors\n", + ch, ch_buf_num); } - cpsw_info(priv, ifup, "submitted %d rx descriptors\n", ch_buf_num); - - return ch_buf_num; + return 0; } static void cpsw_slave_stop(struct cpsw_slave *slave, struct cpsw_common *cpsw) @@ -1280,6 +1339,19 @@ static int cpsw_ndo_open(struct net_device *ndev) cpsw_intr_disable(cpsw); netif_carrier_off(ndev); + /* Notify the stack of the actual queue counts. */ + ret = netif_set_real_num_tx_queues(ndev, cpsw->tx_ch_num); + if (ret) { + dev_err(priv->dev, "cannot set real number of tx queues\n"); + goto err_cleanup; + } + + ret = netif_set_real_num_rx_queues(ndev, cpsw->rx_ch_num); + if (ret) { + dev_err(priv->dev, "cannot set real number of rx queues\n"); + goto err_cleanup; + } + reg = cpsw->version; dev_info(priv->dev, "initializing cpsw version %d.%d (%d)\n", @@ -1349,6 +1421,9 @@ static int cpsw_ndo_open(struct net_device *ndev) if (cpsw->data.dual_emac) cpsw->slaves[priv->emac_port].open_stat = true; + + netif_tx_start_all_queues(ndev); + return 0; err_cleanup: @@ -1365,7 +1440,7 @@ static int cpsw_ndo_stop(struct net_device *ndev) struct cpsw_common *cpsw = priv->cpsw; cpsw_info(priv, ifdown, "shutting down cpsw device\n"); - netif_stop_queue(priv->ndev); + netif_tx_stop_all_queues(priv->ndev); netif_carrier_off(priv->ndev); if (cpsw_common_res_usage_state(cpsw) <= 1) { @@ -1387,8 +1462,10 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff *skb, struct net_device *ndev) { struct cpsw_priv *priv = netdev_priv(ndev); - int ret; struct cpsw_common *cpsw = priv->cpsw; + struct netdev_queue *txq; + struct cpdma_chan *txch; + int ret, q_idx; netif_trans_update(ndev); @@ -1404,7 +1481,12 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff *skb, skb_tx_timestamp(skb); - ret = cpsw_tx_packet_submit(priv, skb); + q_idx = skb_get_queue_mapping(skb); + if (q_idx >= cpsw->tx_ch_num) + q_idx = q_idx % cpsw->tx_ch_num; + + txch = cpsw->txch[q_idx]; + ret = cpsw_tx_packet_submit(priv, skb, txch); if (unlikely(ret != 0)) { cpsw_err(priv, tx_err, "desc submit failed\n"); goto fail; @@ -1413,13 +1495,16 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff *skb, /* If there is no more tx desc left free then we need to * tell the kernel to stop sending us tx frames. */ - if (unlikely(!cpdma_check_free_tx_desc(cpsw->txch))) - netif_stop_queue(ndev); + if (unlikely(!cpdma_check_free_tx_desc(txch))) { + txq = netdev_get_tx_queue(ndev, q_idx); + netif_tx_stop_queue(txq); + } return NETDEV_TX_OK; fail: ndev->stats.tx_dropped++; - netif_stop_queue(ndev); + txq = netdev_get_tx_queue(ndev, skb_get_queue_mapping(skb)); + netif_tx_stop_queue(txq); return NETDEV_TX_BUSY; } @@ -1601,12 +1686,16 @@ static void cpsw_ndo_tx_timeout(struct net_device *ndev) { struct cpsw_priv *priv = netdev_priv(ndev); struct cpsw_common *cpsw = priv->cpsw; + int ch; cpsw_err(priv, tx_err, "transmit timeout, restarting dma\n"); ndev->stats.tx_errors++; cpsw_intr_disable(cpsw); - cpdma_chan_stop(cpsw->txch); - cpdma_chan_start(cpsw->txch); + for (ch = 0; ch < cpsw->tx_ch_num; ch++) { + cpdma_chan_stop(cpsw->txch[ch]); + cpdma_chan_start(cpsw->txch[ch]); + } + cpsw_intr_enable(cpsw); } @@ -2178,7 +2267,7 @@ static int cpsw_probe_dual_emac(struct cpsw_priv *priv) struct cpsw_priv *priv_sl2; int ret = 0; - ndev = alloc_etherdev(sizeof(struct cpsw_priv)); + ndev = alloc_etherdev_mq(sizeof(struct cpsw_priv), CPSW_MAX_QUEUES); if (!ndev) { dev_err(cpsw->dev, "cpsw: error allocating net_device\n"); return -ENOMEM; @@ -2279,7 +2368,7 @@ static int cpsw_probe(struct platform_device *pdev) cpsw = devm_kzalloc(&pdev->dev, sizeof(struct cpsw_common), GFP_KERNEL); cpsw->dev = &pdev->dev; - ndev = alloc_etherdev(sizeof(struct cpsw_priv)); + ndev = alloc_etherdev_mq(sizeof(struct cpsw_priv), CPSW_MAX_QUEUES); if (!ndev) { dev_err(&pdev->dev, "error allocating net_device\n"); return -ENOMEM; @@ -2320,6 +2409,8 @@ static int cpsw_probe(struct platform_device *pdev) goto clean_runtime_disable_ret; } data = &cpsw->data; + cpsw->rx_ch_num = 1; + cpsw->tx_ch_num = 1; if (is_valid_ether_addr(data->slave_data[0].mac_addr)) { memcpy(priv->mac_addr, data->slave_data[0].mac_addr, ETH_ALEN); @@ -2444,12 +2535,12 @@ static int cpsw_probe(struct platform_device *pdev) goto clean_runtime_disable_ret; } - cpsw->txch = cpdma_chan_create(cpsw->dma, tx_chan_num(0), - cpsw_tx_handler); - cpsw->rxch = cpdma_chan_create(cpsw->dma, rx_chan_num(0), - cpsw_rx_handler); + cpsw->txch[0] = cpdma_chan_create(cpsw->dma, tx_chan_num(0), + cpsw_tx_handler); + cpsw->rxch[0] = cpdma_chan_create(cpsw->dma, rx_chan_num(0), + cpsw_rx_handler); - if (WARN_ON(!cpsw->txch || !cpsw->rxch)) { + if (WARN_ON(!cpsw->rxch[0] || !cpsw->txch[0])) { dev_err(priv->dev, "error initializing dma channels\n"); ret = -ENOMEM; goto clean_dma_ret; diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c b/drivers/net/ethernet/ti/davinci_cpdma.c index ffb32af94f4c..4b578b17ec2d 100644 --- a/drivers/net/ethernet/ti/davinci_cpdma.c +++ b/drivers/net/ethernet/ti/davinci_cpdma.c @@ -403,6 +403,18 @@ void cpdma_ctlr_eoi(struct cpdma_ctlr *ctlr, u32 value) } EXPORT_SYMBOL_GPL(cpdma_ctlr_eoi); +u32 cpdma_ctrl_rxchs_state(struct cpdma_ctlr *ctlr) +{ + return dma_reg_read(ctlr, CPDMA_RXINTSTATMASKED); +} +EXPORT_SYMBOL_GPL(cpdma_ctrl_rxchs_state); + +u32 cpdma_ctrl_txchs_state(struct cpdma_ctlr *ctlr) +{ + return dma_reg_read(ctlr, CPDMA_TXINTSTATMASKED); +} +EXPORT_SYMBOL_GPL(cpdma_ctrl_txchs_state); + /** * cpdma_chan_split_pool - Splits ctrl pool between all channels. * Has to be called under ctlr lock diff --git a/drivers/net/ethernet/ti/davinci_cpdma.h b/drivers/net/ethernet/ti/davinci_cpdma.h index 9119b43f4f7d..070f1d0ea848 100644 --- a/drivers/net/ethernet/ti/davinci_cpdma.h +++ b/drivers/net/ethernet/ti/davinci_cpdma.h @@ -94,6 +94,8 @@ int cpdma_chan_process(struct cpdma_chan *chan, int quota); int cpdma_ctlr_int_ctrl(struct cpdma_ctlr *ctlr, bool enable); void cpdma_ctlr_eoi(struct cpdma_ctlr *ctlr, u32 value); int cpdma_chan_int_ctrl(struct cpdma_chan *chan, bool enable); +u32 cpdma_ctrl_rxchs_state(struct cpdma_ctlr *ctlr); +u32 cpdma_ctrl_txchs_state(struct cpdma_ctlr *ctlr); bool cpdma_check_free_tx_desc(struct cpdma_chan *chan); enum cpdma_control { From 925d65e6d8a4c84c54fbad060f32385b57e210ed Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Mon, 22 Aug 2016 21:18:27 +0300 Subject: [PATCH 0415/1715] net: ethernet: ti: davinci_cpdma: move cpdma channel struct macroses to internals Keep the driver internals in C file. Currently it's not required for drivers to know rx or tx a channel is, except create function. So correct "channel create" function, and use all channel struct macroses only for internal use. Reviewed-by: Mugunthan V N Signed-off-by: Ivan Khoronzhuk Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 7 ++----- drivers/net/ethernet/ti/davinci_cpdma.c | 13 +++++++++++-- drivers/net/ethernet/ti/davinci_cpdma.h | 9 +-------- drivers/net/ethernet/ti/davinci_emac.c | 8 ++++---- 4 files changed, 18 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 28cc716f22c9..1fbb50f25603 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -2535,11 +2535,8 @@ static int cpsw_probe(struct platform_device *pdev) goto clean_runtime_disable_ret; } - cpsw->txch[0] = cpdma_chan_create(cpsw->dma, tx_chan_num(0), - cpsw_tx_handler); - cpsw->rxch[0] = cpdma_chan_create(cpsw->dma, rx_chan_num(0), - cpsw_rx_handler); - + cpsw->txch[0] = cpdma_chan_create(cpsw->dma, 0, cpsw_tx_handler, 0); + cpsw->rxch[0] = cpdma_chan_create(cpsw->dma, 0, cpsw_rx_handler, 1); if (WARN_ON(!cpsw->rxch[0] || !cpsw->txch[0])) { dev_err(priv->dev, "error initializing dma channels\n"); ret = -ENOMEM; diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c b/drivers/net/ethernet/ti/davinci_cpdma.c index 4b578b17ec2d..c3f35f11a8fd 100644 --- a/drivers/net/ethernet/ti/davinci_cpdma.c +++ b/drivers/net/ethernet/ti/davinci_cpdma.c @@ -124,6 +124,13 @@ struct cpdma_chan { int int_set, int_clear, td; }; +#define tx_chan_num(chan) (chan) +#define rx_chan_num(chan) ((chan) + CPDMA_MAX_CHANNELS) +#define is_rx_chan(chan) ((chan)->chan_num >= CPDMA_MAX_CHANNELS) +#define is_tx_chan(chan) (!is_rx_chan(chan)) +#define __chan_linear(chan_num) ((chan_num) & (CPDMA_MAX_CHANNELS - 1)) +#define chan_linear(chan) __chan_linear((chan)->chan_num) + /* The following make access to common cpdma_ctlr params more readable */ #define dmaregs params.dmaregs #define num_chan params.num_chan @@ -441,12 +448,14 @@ static void cpdma_chan_split_pool(struct cpdma_ctlr *ctlr) } struct cpdma_chan *cpdma_chan_create(struct cpdma_ctlr *ctlr, int chan_num, - cpdma_handler_fn handler) + cpdma_handler_fn handler, int rx_type) { + int offset = chan_num * 4; struct cpdma_chan *chan; - int offset = (chan_num % CPDMA_MAX_CHANNELS) * 4; unsigned long flags; + chan_num = rx_type ? rx_chan_num(chan_num) : tx_chan_num(chan_num); + if (__chan_linear(chan_num) >= ctlr->num_chan) return NULL; diff --git a/drivers/net/ethernet/ti/davinci_cpdma.h b/drivers/net/ethernet/ti/davinci_cpdma.h index 070f1d0ea848..a07b22b12bc1 100644 --- a/drivers/net/ethernet/ti/davinci_cpdma.h +++ b/drivers/net/ethernet/ti/davinci_cpdma.h @@ -17,13 +17,6 @@ #define CPDMA_MAX_CHANNELS BITS_PER_LONG -#define tx_chan_num(chan) (chan) -#define rx_chan_num(chan) ((chan) + CPDMA_MAX_CHANNELS) -#define is_rx_chan(chan) ((chan)->chan_num >= CPDMA_MAX_CHANNELS) -#define is_tx_chan(chan) (!is_rx_chan(chan)) -#define __chan_linear(chan_num) ((chan_num) & (CPDMA_MAX_CHANNELS - 1)) -#define chan_linear(chan) __chan_linear((chan)->chan_num) - #define CPDMA_RX_SOURCE_PORT(__status__) ((__status__ >> 16) & 0x7) #define CPDMA_EOI_RX_THRESH 0x0 @@ -79,7 +72,7 @@ int cpdma_ctlr_start(struct cpdma_ctlr *ctlr); int cpdma_ctlr_stop(struct cpdma_ctlr *ctlr); struct cpdma_chan *cpdma_chan_create(struct cpdma_ctlr *ctlr, int chan_num, - cpdma_handler_fn handler); + cpdma_handler_fn handler, int rx_type); int cpdma_chan_get_rx_buf_num(struct cpdma_chan *chan); int cpdma_chan_destroy(struct cpdma_chan *chan); int cpdma_chan_start(struct cpdma_chan *chan); diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index 2d6fc9a0fb21..2fd94a5bc1f3 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -1870,10 +1870,10 @@ static int davinci_emac_probe(struct platform_device *pdev) goto no_pdata; } - priv->txchan = cpdma_chan_create(priv->dma, tx_chan_num(EMAC_DEF_TX_CH), - emac_tx_handler); - priv->rxchan = cpdma_chan_create(priv->dma, rx_chan_num(EMAC_DEF_RX_CH), - emac_rx_handler); + priv->txchan = cpdma_chan_create(priv->dma, EMAC_DEF_TX_CH, + emac_tx_handler, 0); + priv->rxchan = cpdma_chan_create(priv->dma, EMAC_DEF_RX_CH, + emac_rx_handler, 1); if (WARN_ON(!priv->txchan || !priv->rxchan)) { rc = -ENOMEM; goto no_cpdma_chan; From ce52c744574bbe31e5c30788f69d19f20d328225 Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Mon, 22 Aug 2016 21:18:28 +0300 Subject: [PATCH 0416/1715] net: ethernet: ti: cpsw: add ethtool channels support These ops allow to control number of channels driver is allowed to work with at cpdma level. The maximum number of channels is 8 for rx and 8 for tx. In dual_emac mode the h/w channels are shared between two interfaces and changing number on one interface changes number of channels on another. How many channels are supported and enabled: $ ethtool -l ethX Change number of channels (up to 8) $ ethtool -L ethX rx 6 tx 6 Per-channel statistic: $ ethtool -S ethX Signed-off-by: Ivan Khoronzhuk Reviewed-by: Mugunthan V N Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 180 +++++++++++++++++++++++++++++++++ 1 file changed, 180 insertions(+) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 1fbb50f25603..4273e7f9b4aa 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -736,6 +736,11 @@ static void cpsw_rx_handler(void *token, int len, int status) } requeue: + if (netif_dormant(ndev)) { + dev_kfree_skb_any(new_skb); + return; + } + ch = cpsw->rxch[skb_get_queue_mapping(new_skb)]; ret = cpdma_chan_submit(ch, new_skb, new_skb->data, skb_tailroom(new_skb), 0); @@ -2060,6 +2065,179 @@ static void cpsw_ethtool_op_complete(struct net_device *ndev) cpsw_err(priv, drv, "ethtool complete failed %d\n", ret); } +static void cpsw_get_channels(struct net_device *ndev, + struct ethtool_channels *ch) +{ + struct cpsw_common *cpsw = ndev_to_cpsw(ndev); + + ch->max_combined = 0; + ch->max_rx = CPSW_MAX_QUEUES; + ch->max_tx = CPSW_MAX_QUEUES; + ch->max_other = 0; + ch->other_count = 0; + ch->rx_count = cpsw->rx_ch_num; + ch->tx_count = cpsw->tx_ch_num; + ch->combined_count = 0; +} + +static int cpsw_check_ch_settings(struct cpsw_common *cpsw, + struct ethtool_channels *ch) +{ + if (ch->combined_count) + return -EINVAL; + + /* verify we have at least one channel in each direction */ + if (!ch->rx_count || !ch->tx_count) + return -EINVAL; + + if (ch->rx_count > cpsw->data.channels || + ch->tx_count > cpsw->data.channels) + return -EINVAL; + + return 0; +} + +static int cpsw_update_channels_res(struct cpsw_priv *priv, int ch_num, int rx) +{ + int (*poll)(struct napi_struct *, int); + struct cpsw_common *cpsw = priv->cpsw; + void (*handler)(void *, int, int); + struct cpdma_chan **chan; + int ret, *ch; + + if (rx) { + ch = &cpsw->rx_ch_num; + chan = cpsw->rxch; + handler = cpsw_rx_handler; + poll = cpsw_rx_poll; + } else { + ch = &cpsw->tx_ch_num; + chan = cpsw->txch; + handler = cpsw_tx_handler; + poll = cpsw_tx_poll; + } + + while (*ch < ch_num) { + chan[*ch] = cpdma_chan_create(cpsw->dma, *ch, handler, rx); + + if (IS_ERR(chan[*ch])) + return PTR_ERR(chan[*ch]); + + if (!chan[*ch]) + return -EINVAL; + + cpsw_info(priv, ifup, "created new %d %s channel\n", *ch, + (rx ? "rx" : "tx")); + (*ch)++; + } + + while (*ch > ch_num) { + (*ch)--; + + ret = cpdma_chan_destroy(chan[*ch]); + if (ret) + return ret; + + cpsw_info(priv, ifup, "destroyed %d %s channel\n", *ch, + (rx ? "rx" : "tx")); + } + + return 0; +} + +static int cpsw_update_channels(struct cpsw_priv *priv, + struct ethtool_channels *ch) +{ + int ret; + + ret = cpsw_update_channels_res(priv, ch->rx_count, 1); + if (ret) + return ret; + + ret = cpsw_update_channels_res(priv, ch->tx_count, 0); + if (ret) + return ret; + + return 0; +} + +static int cpsw_set_channels(struct net_device *ndev, + struct ethtool_channels *chs) +{ + struct cpsw_priv *priv = netdev_priv(ndev); + struct cpsw_common *cpsw = priv->cpsw; + struct cpsw_slave *slave; + int i, ret; + + ret = cpsw_check_ch_settings(cpsw, chs); + if (ret < 0) + return ret; + + /* Disable NAPI scheduling */ + cpsw_intr_disable(cpsw); + + /* Stop all transmit queues for every network device. + * Disable re-using rx descriptors with dormant_on. + */ + for (i = cpsw->data.slaves, slave = cpsw->slaves; i; i--, slave++) { + if (!(slave->ndev && netif_running(slave->ndev))) + continue; + + netif_tx_stop_all_queues(slave->ndev); + netif_dormant_on(slave->ndev); + } + + /* Handle rest of tx packets and stop cpdma channels */ + cpdma_ctlr_stop(cpsw->dma); + ret = cpsw_update_channels(priv, chs); + if (ret) + goto err; + + for (i = cpsw->data.slaves, slave = cpsw->slaves; i; i--, slave++) { + if (!(slave->ndev && netif_running(slave->ndev))) + continue; + + /* Inform stack about new count of queues */ + ret = netif_set_real_num_tx_queues(slave->ndev, + cpsw->tx_ch_num); + if (ret) { + dev_err(priv->dev, "cannot set real number of tx queues\n"); + goto err; + } + + ret = netif_set_real_num_rx_queues(slave->ndev, + cpsw->rx_ch_num); + if (ret) { + dev_err(priv->dev, "cannot set real number of rx queues\n"); + goto err; + } + + /* Enable rx packets handling */ + netif_dormant_off(slave->ndev); + } + + if (cpsw_common_res_usage_state(cpsw)) { + if (cpsw_fill_rx_channels(priv)) + goto err; + + /* After this receive is started */ + cpdma_ctlr_start(cpsw->dma); + cpsw_intr_enable(cpsw); + } + + /* Resume transmit for every affected interface */ + for (i = cpsw->data.slaves, slave = cpsw->slaves; i; i--, slave++) { + if (!(slave->ndev && netif_running(slave->ndev))) + continue; + netif_tx_start_all_queues(slave->ndev); + } + return 0; +err: + dev_err(priv->dev, "cannot update channels number, closing device\n"); + dev_close(ndev); + return ret; +} + static const struct ethtool_ops cpsw_ethtool_ops = { .get_drvinfo = cpsw_get_drvinfo, .get_msglevel = cpsw_get_msglevel, @@ -2081,6 +2259,8 @@ static const struct ethtool_ops cpsw_ethtool_ops = { .get_regs = cpsw_get_regs, .begin = cpsw_ethtool_op_begin, .complete = cpsw_ethtool_op_complete, + .get_channels = cpsw_get_channels, + .set_channels = cpsw_set_channels, }; static void cpsw_slave_init(struct cpsw_slave *slave, struct cpsw_common *cpsw, From b43f95695124baba9f7c48e247fc8fd212a984d9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Date: Mon, 22 Aug 2016 11:37:36 +0200 Subject: [PATCH 0417/1715] netfilter: nf_tables: typo in trace attribute definition Should be attributes, instead of attibutes, for consistency with other definitions. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 8c9d6ff70ec0..8a63f22b365d 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1090,7 +1090,7 @@ enum nft_gen_attributes { * @NFTA_TRACE_NFPROTO: nf protocol processed (NLA_U32) * @NFTA_TRACE_POLICY: policy that decided fate of packet (NLA_U32) */ -enum nft_trace_attibutes { +enum nft_trace_attributes { NFTA_TRACE_UNSPEC, NFTA_TRACE_TABLE, NFTA_TRACE_CHAIN, From 26164e77cafbde075397f1df45245be03d8f19b6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 23 Aug 2016 15:27:23 +0100 Subject: [PATCH 0418/1715] rxrpc: Remove RXRPC_CALL_PROC_BUSY Remove RXRPC_CALL_PROC_BUSY as work queue items are now 100% non-reentrant. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 1 - net/rxrpc/call_event.c | 6 ------ 2 files changed, 7 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index ff83fb1ddd47..3a2f4c214811 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -341,7 +341,6 @@ enum rxrpc_call_flag { RXRPC_CALL_RCVD_LAST, /* all packets received */ RXRPC_CALL_RUN_RTIMER, /* Tx resend timer started */ RXRPC_CALL_TX_SOFT_ACK, /* sent some soft ACKs */ - RXRPC_CALL_PROC_BUSY, /* the processor is busy */ RXRPC_CALL_INIT_ACCEPT, /* acceptance was initiated */ RXRPC_CALL_HAS_USERID, /* has a user ID attached */ RXRPC_CALL_EXPECT_OOS, /* expect out of sequence packets */ diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index e60cf65c2232..eaa8035dcb71 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -832,11 +832,6 @@ void rxrpc_process_call(struct work_struct *work) call->debug_id, rxrpc_call_states[call->state], call->events, (jiffies - call->creation_jif) / (HZ / 10)); - if (test_and_set_bit(RXRPC_CALL_PROC_BUSY, &call->flags)) { - _debug("XXXXXXXXXXXXX RUNNING ON MULTIPLE CPUS XXXXXXXXXXXXX"); - return; - } - if (!call->conn) goto skip_msg_init; @@ -1281,7 +1276,6 @@ maybe_reschedule: } error: - clear_bit(RXRPC_CALL_PROC_BUSY, &call->flags); kfree(acks); /* because we don't want two CPUs both processing the work item for one From dabe5a790655c79f47d75749874ce7b4d5016de9 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 23 Aug 2016 15:27:24 +0100 Subject: [PATCH 0419/1715] rxrpc: Tidy up the rxrpc_call struct a bit Do a little tidying of the rxrpc_call struct: (1) in_clientflag is no longer compared against the value that's in the packet, so keeping it in this form isn't necessary. Use a flag in flags instead and provide a pair of wrapper functions. (2) We don't read the epoch value, so that can go. (3) Move what remains of the data that were used for hashing up in the struct to be with the channel number. (4) Get rid of the local pointer. We can get at this via the socket struct and we only use this in the procfs viewer. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 23 +++++++++++++++-------- net/rxrpc/call_object.c | 7 +------ net/rxrpc/conn_client.c | 1 - net/rxrpc/output.c | 4 ++-- net/rxrpc/proc.c | 6 +++--- 5 files changed, 21 insertions(+), 20 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 3a2f4c214811..0e6bc8227d54 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -344,6 +344,7 @@ enum rxrpc_call_flag { RXRPC_CALL_INIT_ACCEPT, /* acceptance was initiated */ RXRPC_CALL_HAS_USERID, /* has a user ID attached */ RXRPC_CALL_EXPECT_OOS, /* expect out of sequence packets */ + RXRPC_CALL_IS_SERVICE, /* Call is service call */ }; /* @@ -431,8 +432,11 @@ struct rxrpc_call { int error_report; /* Network error (ICMP/local transport) */ int error; /* Local error incurred */ enum rxrpc_call_state state : 8; /* current state of call */ - int debug_id; /* debug ID for printks */ u8 channel; /* connection channel occupied by this call */ + u16 service_id; /* service ID */ + u32 call_id; /* call ID on connection */ + u32 cid; /* connection ID plus channel index */ + int debug_id; /* debug ID for printks */ /* transmission-phase ACK management */ u8 acks_head; /* offset into window of first entry */ @@ -460,13 +464,6 @@ struct rxrpc_call { /* received packet records, 1 bit per record */ #define RXRPC_ACKR_WINDOW_ASZ DIV_ROUND_UP(RXRPC_MAXACKS, BITS_PER_LONG) unsigned long ackr_window[RXRPC_ACKR_WINDOW_ASZ + 1]; - - u8 in_clientflag; /* Copy of conn->in_clientflag */ - struct rxrpc_local *local; /* Local endpoint. */ - u32 call_id; /* call ID on connection */ - u32 cid; /* connection ID plus channel index */ - u32 epoch; /* epoch of this connection */ - u16 service_id; /* service ID */ }; /* @@ -527,6 +524,16 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *); void __rxrpc_put_call(struct rxrpc_call *); void __exit rxrpc_destroy_all_calls(void); +static inline bool rxrpc_is_service_call(const struct rxrpc_call *call) +{ + return test_bit(RXRPC_CALL_IS_SERVICE, &call->flags); +} + +static inline bool rxrpc_is_client_call(const struct rxrpc_call *call) +{ + return !rxrpc_is_service_call(call); +} + /* * conn_client.c */ diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index ae057e0740f3..5007e7ac889f 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -167,10 +167,7 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx, sock_hold(&rx->sk); call->socket = rx; call->rx_data_post = 1; - - call->local = rx->local; call->service_id = srx->srx_service; - call->in_clientflag = 0; _leave(" = %p", call); return call; @@ -323,6 +320,7 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx, candidate->channel = chan; candidate->rx_data_post = 0; candidate->state = RXRPC_CALL_SERVER_ACCEPTING; + candidate->flags |= (1 << RXRPC_CALL_IS_SERVICE); if (conn->security_ix > 0) candidate->state = RXRPC_CALL_SERVER_SECURING; @@ -397,10 +395,7 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx, list_add_tail(&call->link, &rxrpc_calls); write_unlock_bh(&rxrpc_call_lock); - call->local = conn->params.local; - call->epoch = conn->proto.epoch; call->service_id = conn->params.service_id; - call->in_clientflag = RXRPC_CLIENT_INITIATED; _net("CALL incoming %d on CONN %d", call->debug_id, call->conn->debug_id); diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 9e91f27b0d0f..d8dd8e6bb172 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -281,7 +281,6 @@ found_channel: _debug("found chan"); call->conn = conn; call->channel = chan; - call->epoch = conn->proto.epoch; call->cid = conn->proto.cid | chan; call->call_id = ++conn->channels[chan].call_counter; conn->channels[chan].call_id = call->call_id; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index f4bda06b7d2d..9e626f1e2668 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -218,11 +218,11 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) ret = 0; } else if (cmd != RXRPC_CMD_SEND_DATA) { ret = -EINVAL; - } else if (!call->in_clientflag && + } else if (rxrpc_is_client_call(call) && call->state != RXRPC_CALL_CLIENT_SEND_REQUEST) { /* request phase complete for this client call */ ret = -EPROTO; - } else if (call->in_clientflag && + } else if (rxrpc_is_service_call(call) && call->state != RXRPC_CALL_SERVER_ACK_REQUEST && call->state != RXRPC_CALL_SERVER_SEND_REPLY) { /* Reply phase not begun or not complete for service call. */ diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index ced5f07444e5..f92de18b5893 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -61,8 +61,8 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) call = list_entry(v, struct rxrpc_call, link); sprintf(lbuff, "%pI4:%u", - &call->local->srx.transport.sin.sin_addr, - ntohs(call->local->srx.transport.sin.sin_port)); + &call->socket->local->srx.transport.sin.sin_addr, + ntohs(call->socket->local->srx.transport.sin.sin_port)); conn = call->conn; if (conn) @@ -80,7 +80,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) call->service_id, call->cid, call->call_id, - call->in_clientflag ? "Svc" : "Clt", + rxrpc_is_service_call(call) ? "Svc" : "Clt", atomic_read(&call->usage), rxrpc_call_states[call->state], call->remote_abort ?: call->local_abort, From f36b5e444cf772f52782d47e99c68fef20ac5195 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 23 Aug 2016 15:27:24 +0100 Subject: [PATCH 0420/1715] rxrpc: When clearing a socket, clear the call sets in the right order When clearing a socket, we should clear the securing-in-progress list first, then the accept queue and last the main call tree because that's the order in which a call progresses. Not that a call should move from the accept queue to the main tree whilst we're shutting down a socket, but it a call could possibly move from sequreq to acceptq whilst we're clearing up. Signed-off-by: David Howells --- net/rxrpc/call_object.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 5007e7ac889f..008188103fd6 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -564,12 +564,6 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) read_lock_bh(&rx->call_lock); - /* mark all the calls as no longer wanting incoming packets */ - for (p = rb_first(&rx->calls); p; p = rb_next(p)) { - call = rb_entry(p, struct rxrpc_call, sock_node); - rxrpc_mark_call_released(call); - } - /* kill the not-yet-accepted incoming calls */ list_for_each_entry(call, &rx->secureq, accept_link) { rxrpc_mark_call_released(call); @@ -579,6 +573,12 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) rxrpc_mark_call_released(call); } + /* mark all the calls as no longer wanting incoming packets */ + for (p = rb_first(&rx->calls); p; p = rb_next(p)) { + call = rb_entry(p, struct rxrpc_call, sock_node); + rxrpc_mark_call_released(call); + } + read_unlock_bh(&rx->call_lock); _leave(""); } From 01a90a459850ed1f1573f06f00f7b9d466339df0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 23 Aug 2016 15:27:24 +0100 Subject: [PATCH 0421/1715] rxrpc: Drop channel number field from rxrpc_call struct Drop the channel number (channel) field from the rxrpc_call struct to reduce the size of the call struct. The field is redundant: if the call is attached to a connection, the channel can be obtained from there by AND'ing with RXRPC_CHANNELMASK. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 1 - net/rxrpc/call_object.c | 7 +++---- net/rxrpc/conn_client.c | 1 - net/rxrpc/conn_object.c | 5 +++-- net/rxrpc/rxkad.c | 4 ++-- 5 files changed, 8 insertions(+), 10 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 0e6bc8227d54..648060a5df35 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -432,7 +432,6 @@ struct rxrpc_call { int error_report; /* Network error (ICMP/local transport) */ int error; /* Local error incurred */ enum rxrpc_call_state state : 8; /* current state of call */ - u8 channel; /* connection channel occupied by this call */ u16 service_id; /* service ID */ u32 call_id; /* call ID on connection */ u32 cid; /* connection ID plus channel index */ diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 008188103fd6..4af01805bfc7 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -317,7 +317,6 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx, candidate->conn = conn; candidate->cid = sp->hdr.cid; candidate->call_id = sp->hdr.callNumber; - candidate->channel = chan; candidate->rx_data_post = 0; candidate->state = RXRPC_CALL_SERVER_ACCEPTING; candidate->flags |= (1 << RXRPC_CALL_IS_SERVICE); @@ -330,7 +329,7 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx, call = rcu_dereference_protected(conn->channels[chan].call, lockdep_is_held(&conn->channel_lock)); - _debug("channel[%u] is %p", candidate->channel, call); + _debug("channel[%u] is %p", candidate->cid & RXRPC_CHANNELMASK, call); if (call && call->call_id == sp->hdr.callNumber) { /* already set; must've been a duplicate packet */ _debug("extant call [%d]", call->state); @@ -677,8 +676,8 @@ static void rxrpc_destroy_call(struct work_struct *work) struct rxrpc_call *call = container_of(work, struct rxrpc_call, destroyer); - _enter("%p{%d,%d,%p}", - call, atomic_read(&call->usage), call->channel, call->conn); + _enter("%p{%d,%x,%p}", + call, atomic_read(&call->usage), call->cid, call->conn); ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD); diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index d8dd8e6bb172..fc32cc67c2de 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -280,7 +280,6 @@ attached: found_channel: _debug("found chan"); call->conn = conn; - call->channel = chan; call->cid = conn->proto.cid | chan; call->call_id = ++conn->channels[chan].call_counter; conn->channels[chan].call_id = call->call_id; diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 896d84493a05..6a5a17efc538 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -156,9 +156,10 @@ not_found: void __rxrpc_disconnect_call(struct rxrpc_call *call) { struct rxrpc_connection *conn = call->conn; - struct rxrpc_channel *chan = &conn->channels[call->channel]; + struct rxrpc_channel *chan = + &conn->channels[call->cid & RXRPC_CHANNELMASK]; - _enter("%d,%d", conn->debug_id, call->channel); + _enter("%d,%x", conn->debug_id, call->cid); if (rcu_access_pointer(chan->call) == call) { /* Save the result of the call so that we can repeat it if necessary diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 63afa9e9cc08..89f475febfd7 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -275,7 +275,7 @@ static int rxkad_secure_packet(struct rxrpc_call *call, memcpy(&iv, call->conn->csum_iv.x, sizeof(iv)); /* calculate the security checksum */ - x = call->channel << (32 - RXRPC_CIDSHIFT); + x = (call->cid & RXRPC_CHANNELMASK) << (32 - RXRPC_CIDSHIFT); x |= sp->hdr.seq & 0x3fffffff; call->crypto_buf[0] = htonl(sp->hdr.callNumber); call->crypto_buf[1] = htonl(x); @@ -507,7 +507,7 @@ static int rxkad_verify_packet(struct rxrpc_call *call, memcpy(&iv, call->conn->csum_iv.x, sizeof(iv)); /* validate the security checksum */ - x = call->channel << (32 - RXRPC_CIDSHIFT); + x = (call->cid & RXRPC_CHANNELMASK) << (32 - RXRPC_CIDSHIFT); x |= sp->hdr.seq & 0x3fffffff; call->crypto_buf[0] = htonl(call->call_id); call->crypto_buf[1] = htonl(x); From df844fd46b98c2efde8f4ac2d50d59bc90c4c679 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 23 Aug 2016 15:27:24 +0100 Subject: [PATCH 0422/1715] rxrpc: Use a tracepoint for skb accounting debugging Use a tracepoint to log various skb accounting points to help in debugging refcounting errors. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 56 ++++++++++++++++++++++++++++++++ net/rxrpc/af_rxrpc.c | 1 + net/rxrpc/ar-internal.h | 45 ++++---------------------- net/rxrpc/call_accept.c | 1 + net/rxrpc/call_event.c | 3 ++ net/rxrpc/conn_event.c | 2 ++ net/rxrpc/local_event.c | 1 + net/rxrpc/output.c | 1 + net/rxrpc/recvmsg.c | 1 + net/rxrpc/skbuff.c | 62 ++++++++++++++++++++++++++++++++++++ 10 files changed, 135 insertions(+), 38 deletions(-) create mode 100644 include/trace/events/rxrpc.h diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h new file mode 100644 index 000000000000..15283ee3e41a --- /dev/null +++ b/include/trace/events/rxrpc.h @@ -0,0 +1,56 @@ +/* AF_RXRPC tracepoints + * + * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rxrpc + +#if !defined(_TRACE_RXRPC_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_RXRPC_H + +#include + +TRACE_EVENT(rxrpc_skb, + TP_PROTO(struct sk_buff *skb, int op, int usage, int mod_count, + const void *where), + + TP_ARGS(skb, op, usage, mod_count, where), + + TP_STRUCT__entry( + __field(struct sk_buff *, skb ) + __field(int, op ) + __field(int, usage ) + __field(int, mod_count ) + __field(const void *, where ) + ), + + TP_fast_assign( + __entry->skb = skb; + __entry->op = op; + __entry->usage = usage; + __entry->mod_count = mod_count; + __entry->where = where; + ), + + TP_printk("s=%p %s u=%d m=%d p=%pSR", + __entry->skb, + (__entry->op == 0 ? "NEW" : + __entry->op == 1 ? "SEE" : + __entry->op == 2 ? "GET" : + __entry->op == 3 ? "FRE" : + "PUR"), + __entry->usage, + __entry->mod_count, + __entry->where) + ); + +#endif /* _TRACE_RXRPC_H */ + +/* This part must be outside protection */ +#include diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 88effadd4b16..c7cf356b42b8 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -22,6 +22,7 @@ #include #include #include +#define CREATE_TRACE_POINTS #include "ar-internal.h" MODULE_DESCRIPTION("RxRPC network protocol"); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 648060a5df35..8cb517fbbd23 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -479,6 +479,8 @@ static inline void rxrpc_abort_call(struct rxrpc_call *call, u32 abort_code) write_unlock_bh(&call->state_lock); } +#include + /* * af_rxrpc.c */ @@ -752,6 +754,11 @@ int rxrpc_init_server_conn_security(struct rxrpc_connection *); * skbuff.c */ void rxrpc_packet_destructor(struct sk_buff *); +void rxrpc_new_skb(struct sk_buff *); +void rxrpc_see_skb(struct sk_buff *); +void rxrpc_get_skb(struct sk_buff *); +void rxrpc_free_skb(struct sk_buff *); +void rxrpc_purge_queue(struct sk_buff_head *); /* * sysctl.c @@ -899,44 +906,6 @@ do { \ #endif /* __KDEBUGALL */ -/* - * socket buffer accounting / leak finding - */ -static inline void __rxrpc_new_skb(struct sk_buff *skb, const char *fn) -{ - //_net("new skb %p %s [%d]", skb, fn, atomic_read(&rxrpc_n_skbs)); - //atomic_inc(&rxrpc_n_skbs); -} - -#define rxrpc_new_skb(skb) __rxrpc_new_skb((skb), __func__) - -static inline void __rxrpc_kill_skb(struct sk_buff *skb, const char *fn) -{ - //_net("kill skb %p %s [%d]", skb, fn, atomic_read(&rxrpc_n_skbs)); - //atomic_dec(&rxrpc_n_skbs); -} - -#define rxrpc_kill_skb(skb) __rxrpc_kill_skb((skb), __func__) - -static inline void __rxrpc_free_skb(struct sk_buff *skb, const char *fn) -{ - if (skb) { - CHECK_SLAB_OKAY(&skb->users); - //_net("free skb %p %s [%d]", - // skb, fn, atomic_read(&rxrpc_n_skbs)); - //atomic_dec(&rxrpc_n_skbs); - kfree_skb(skb); - } -} - -#define rxrpc_free_skb(skb) __rxrpc_free_skb((skb), __func__) - -static inline void rxrpc_purge_queue(struct sk_buff_head *list) -{ - struct sk_buff *skb; - while ((skb = skb_dequeue((list))) != NULL) - rxrpc_free_skb(skb); -} #define rxrpc_get_call(CALL) \ do { \ diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 9bae21e66d65..669ac79d3b44 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -203,6 +203,7 @@ void rxrpc_accept_incoming_calls(struct rxrpc_local *local) _net("incoming call skb %p", skb); + rxrpc_see_skb(skb); sp = rxrpc_skb(skb); /* Set up a response packet header in case we need it */ diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index eaa8035dcb71..3d1267cea9ea 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -407,6 +407,7 @@ static int rxrpc_drain_rx_oos_queue(struct rxrpc_call *call) skb = skb_dequeue(&call->rx_oos_queue); if (skb) { + rxrpc_see_skb(skb); sp = rxrpc_skb(skb); _debug("drain OOS packet %d [%d]", @@ -427,6 +428,7 @@ static int rxrpc_drain_rx_oos_queue(struct rxrpc_call *call) /* find out what the next packet is */ skb = skb_peek(&call->rx_oos_queue); + rxrpc_see_skb(skb); if (skb) call->rx_first_oos = rxrpc_skb(skb)->hdr.seq; else @@ -576,6 +578,7 @@ process_further: if (!skb) return -EAGAIN; + rxrpc_see_skb(skb); _net("deferred skb %p", skb); sp = rxrpc_skb(skb); diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index cee0f35bc1cf..c631d926f4db 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -277,6 +277,7 @@ void rxrpc_process_connection(struct work_struct *work) /* go through the conn-level event packets, releasing the ref on this * connection that each one has when we've finished with it */ while ((skb = skb_dequeue(&conn->rx_queue))) { + rxrpc_see_skb(skb); ret = rxrpc_process_event(conn, skb, &abort_code); switch (ret) { case -EPROTO: @@ -365,6 +366,7 @@ void rxrpc_reject_packets(struct rxrpc_local *local) whdr.type = RXRPC_PACKET_TYPE_ABORT; while ((skb = skb_dequeue(&local->reject_queue))) { + rxrpc_see_skb(skb); sp = rxrpc_skb(skb); switch (sa.sa.sa_family) { case AF_INET: diff --git a/net/rxrpc/local_event.c b/net/rxrpc/local_event.c index 31a3f86ef2f6..bcc6593b4cdb 100644 --- a/net/rxrpc/local_event.c +++ b/net/rxrpc/local_event.c @@ -93,6 +93,7 @@ void rxrpc_process_local_events(struct rxrpc_local *local) if (skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + rxrpc_see_skb(skb); _debug("{%d},{%u}", local->debug_id, sp->hdr.type); switch (sp->hdr.type) { diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 9e626f1e2668..e3a08d542fb7 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -548,6 +548,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, skb = call->tx_pending; call->tx_pending = NULL; + rxrpc_see_skb(skb); copied = 0; do { diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 9ed66d533002..b964c2d49a88 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -111,6 +111,7 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, } peek_next_packet: + rxrpc_see_skb(skb); sp = rxrpc_skb(skb); call = sp->call; ASSERT(call != NULL); diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c index 06c51d4b622d..d28058a97bc1 100644 --- a/net/rxrpc/skbuff.c +++ b/net/rxrpc/skbuff.c @@ -163,3 +163,65 @@ void rxrpc_kernel_free_skb(struct sk_buff *skb) rxrpc_free_skb(skb); } EXPORT_SYMBOL(rxrpc_kernel_free_skb); + +/* + * Note the existence of a new-to-us socket buffer (allocated or dequeued). + */ +void rxrpc_new_skb(struct sk_buff *skb) +{ + const void *here = __builtin_return_address(0); + int n = atomic_inc_return(&rxrpc_n_skbs); + trace_rxrpc_skb(skb, 0, atomic_read(&skb->users), n, here); +} + +/* + * Note the re-emergence of a socket buffer from a queue or buffer. + */ +void rxrpc_see_skb(struct sk_buff *skb) +{ + const void *here = __builtin_return_address(0); + if (skb) { + int n = atomic_read(&rxrpc_n_skbs); + trace_rxrpc_skb(skb, 1, atomic_read(&skb->users), n, here); + } +} + +/* + * Note the addition of a ref on a socket buffer. + */ +void rxrpc_get_skb(struct sk_buff *skb) +{ + const void *here = __builtin_return_address(0); + int n = atomic_inc_return(&rxrpc_n_skbs); + trace_rxrpc_skb(skb, 2, atomic_read(&skb->users), n, here); + skb_get(skb); +} + +/* + * Note the destruction of a socket buffer. + */ +void rxrpc_free_skb(struct sk_buff *skb) +{ + const void *here = __builtin_return_address(0); + if (skb) { + int n; + CHECK_SLAB_OKAY(&skb->users); + n = atomic_dec_return(&rxrpc_n_skbs); + trace_rxrpc_skb(skb, 3, atomic_read(&skb->users), n, here); + kfree_skb(skb); + } +} + +/* + * Clear a queue of socket buffers. + */ +void rxrpc_purge_queue(struct sk_buff_head *list) +{ + const void *here = __builtin_return_address(0); + struct sk_buff *skb; + while ((skb = skb_dequeue((list))) != NULL) { + int n = atomic_dec_return(&rxrpc_n_skbs); + trace_rxrpc_skb(skb, 4, atomic_read(&skb->users), n, here); + kfree_skb(skb); + } +} From f51b4480021c470d1f5e8066ccc7c10513bd4e37 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 23 Aug 2016 15:27:24 +0100 Subject: [PATCH 0423/1715] rxrpc: Set connection expiry on idle, not put Set the connection expiry time when a connection becomes idle rather than doing this in rxrpc_put_connection(). This makes the put path more efficient (it is likely to be called occasionally whilst a connection has outstanding calls because active workqueue items needs to be given a ref). The time is also preset in the connection allocator in case the connection never gets used. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 11 +++++++++-- net/rxrpc/conn_object.c | 42 +++++++++++++++++------------------------ 2 files changed, 26 insertions(+), 27 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 8cb517fbbd23..66c917077880 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -313,7 +313,7 @@ struct rxrpc_connection { struct rxrpc_crypt csum_iv; /* packet checksum base */ unsigned long flags; unsigned long events; - unsigned long put_time; /* Time at which last put */ + unsigned long idle_timestamp; /* Time at which last became idle */ spinlock_t state_lock; /* state-change lock */ atomic_t usage; enum rxrpc_conn_proto_state state : 8; /* current state of connection */ @@ -565,7 +565,7 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *, struct sk_buff *); void __rxrpc_disconnect_call(struct rxrpc_call *); void rxrpc_disconnect_call(struct rxrpc_call *); -void rxrpc_put_connection(struct rxrpc_connection *); +void __rxrpc_put_connection(struct rxrpc_connection *); void __exit rxrpc_destroy_all_connections(void); static inline bool rxrpc_conn_is_client(const struct rxrpc_connection *conn) @@ -589,6 +589,13 @@ struct rxrpc_connection *rxrpc_get_connection_maybe(struct rxrpc_connection *con return atomic_inc_not_zero(&conn->usage) ? conn : NULL; } +static inline void rxrpc_put_connection(struct rxrpc_connection *conn) +{ + if (conn && atomic_dec_return(&conn->usage) == 1) + __rxrpc_put_connection(conn); +} + + static inline bool rxrpc_queue_conn(struct rxrpc_connection *conn) { if (!rxrpc_get_connection_maybe(conn)) diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 6a5a17efc538..743f0bb4aaa8 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -56,6 +56,7 @@ struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp) atomic_set(&conn->avail_chans, RXRPC_MAXCALLS); conn->size_align = 4; conn->header_size = sizeof(struct rxrpc_wire_header); + conn->idle_timestamp = jiffies; } _leave(" = %p{%d}", conn, conn ? conn->debug_id : 0); @@ -191,29 +192,16 @@ void rxrpc_disconnect_call(struct rxrpc_call *call) spin_unlock(&conn->channel_lock); call->conn = NULL; + conn->idle_timestamp = jiffies; rxrpc_put_connection(conn); } /* * release a virtual connection */ -void rxrpc_put_connection(struct rxrpc_connection *conn) +void __rxrpc_put_connection(struct rxrpc_connection *conn) { - if (!conn) - return; - - _enter("%p{u=%d,d=%d}", - conn, atomic_read(&conn->usage), conn->debug_id); - - ASSERTCMP(atomic_read(&conn->usage), >, 1); - - conn->put_time = ktime_get_seconds(); - if (atomic_dec_return(&conn->usage) == 1) { - _debug("zombie"); - rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0); - } - - _leave(""); + rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0); } /* @@ -248,14 +236,14 @@ static void rxrpc_destroy_connection(struct rcu_head *rcu) static void rxrpc_connection_reaper(struct work_struct *work) { struct rxrpc_connection *conn, *_p; - unsigned long reap_older_than, earliest, put_time, now; + unsigned long reap_older_than, earliest, idle_timestamp, now; LIST_HEAD(graveyard); _enter(""); - now = ktime_get_seconds(); - reap_older_than = now - rxrpc_connection_expiry; + now = jiffies; + reap_older_than = now - rxrpc_connection_expiry * HZ; earliest = ULONG_MAX; write_lock(&rxrpc_connection_lock); @@ -264,10 +252,14 @@ static void rxrpc_connection_reaper(struct work_struct *work) if (likely(atomic_read(&conn->usage) > 1)) continue; - put_time = READ_ONCE(conn->put_time); - if (time_after(put_time, reap_older_than)) { - if (time_before(put_time, earliest)) - earliest = put_time; + idle_timestamp = READ_ONCE(conn->idle_timestamp); + _debug("reap CONN %d { u=%d,t=%ld }", + conn->debug_id, atomic_read(&conn->usage), + (long)reap_older_than - (long)idle_timestamp); + + if (time_after(idle_timestamp, reap_older_than)) { + if (time_before(idle_timestamp, earliest)) + earliest = idle_timestamp; continue; } @@ -288,9 +280,9 @@ static void rxrpc_connection_reaper(struct work_struct *work) if (earliest != ULONG_MAX) { _debug("reschedule reaper %ld", (long) earliest - now); - ASSERTCMP(earliest, >, now); + ASSERT(time_after(earliest, now)); rxrpc_queue_delayed_work(&rxrpc_connection_reap, - (earliest - now) * HZ); + earliest - now); } while (!list_empty(&graveyard)) { From 563ea7d5d4eaf0ff63ddcaf8ed849eb88bb5738d Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 23 Aug 2016 15:27:25 +0100 Subject: [PATCH 0424/1715] rxrpc: Calculate serial skew on packet reception Calculate the serial number skew in the data_ready handler when a packet has been received and a connection looked up. The skew is cached in the sk_buff's priority field. The connection highest received serial number is updated at this time also. This can be done without locks or atomic instructions because, at this point, the code is serialised by the socket. This generates more accurate skew data because if the packet is offloaded to a work queue before this is determined, more packets may come in, bumping the highest serial number and thereby increasing the apparent skew. This also removes some unnecessary atomic ops. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 7 ++++--- net/rxrpc/call_event.c | 18 ++++++++++-------- net/rxrpc/input.c | 37 +++++++++++++++++++++++-------------- net/rxrpc/proc.c | 2 +- net/rxrpc/skbuff.c | 10 +++++----- 5 files changed, 43 insertions(+), 31 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 66c917077880..c779b50135f6 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -322,7 +322,7 @@ struct rxrpc_connection { int error; /* local error incurred */ int debug_id; /* debug ID for printks */ atomic_t serial; /* packet serial number counter */ - atomic_t hi_serial; /* highest serial number received */ + unsigned int hi_serial; /* highest serial number received */ atomic_t avail_chans; /* number of channels available */ u8 size_align; /* data size alignment (for security) */ u8 header_size; /* rxrpc + security header size */ @@ -457,6 +457,7 @@ struct rxrpc_call { rxrpc_seq_t ackr_win_top; /* top of ACK window (rx_data_eaten is bottom) */ rxrpc_seq_t ackr_prev_seq; /* previous sequence number received */ u8 ackr_reason; /* reason to ACK */ + u16 ackr_skew; /* skew on packet being ACK'd */ rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */ atomic_t ackr_not_idle; /* number of packets in Rx queue */ @@ -499,8 +500,8 @@ int rxrpc_reject_call(struct rxrpc_sock *); /* * call_event.c */ -void __rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool); -void rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool); +void __rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool); +void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool); void rxrpc_process_call(struct work_struct *); /* diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 3d1267cea9ea..3d1961d82325 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -25,7 +25,7 @@ * propose an ACK be sent */ void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, - u32 serial, bool immediate) + u16 skew, u32 serial, bool immediate) { unsigned long expiry; s8 prior = rxrpc_ack_priority[ack_reason]; @@ -44,8 +44,10 @@ void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, /* update DELAY, IDLE, REQUESTED and PING_RESPONSE ACK serial * numbers */ if (prior == rxrpc_ack_priority[call->ackr_reason]) { - if (prior <= 4) + if (prior <= 4) { + call->ackr_skew = skew; call->ackr_serial = serial; + } if (immediate) goto cancel_timer; return; @@ -103,13 +105,13 @@ cancel_timer: * propose an ACK be sent, locking the call structure */ void rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, - u32 serial, bool immediate) + u16 skew, u32 serial, bool immediate) { s8 prior = rxrpc_ack_priority[ack_reason]; if (prior > rxrpc_ack_priority[call->ackr_reason]) { spin_lock_bh(&call->lock); - __rxrpc_propose_ACK(call, ack_reason, serial, immediate); + __rxrpc_propose_ACK(call, ack_reason, skew, serial, immediate); spin_unlock_bh(&call->lock); } } @@ -628,7 +630,7 @@ process_further: if (ack.reason == RXRPC_ACK_PING) { _proto("Rx ACK %%%u PING Request", latest); rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE, - sp->hdr.serial, true); + skb->priority, sp->hdr.serial, true); } /* discard any out-of-order or duplicate ACKs */ @@ -1153,8 +1155,7 @@ skip_msg_init: goto maybe_reschedule; send_ACK_with_skew: - ack.maxSkew = htons(atomic_read(&call->conn->hi_serial) - - ntohl(ack.serial)); + ack.maxSkew = htons(call->ackr_skew); send_ACK: mtu = call->conn->params.peer->if_mtu; mtu -= call->conn->params.peer->hdrsize; @@ -1244,7 +1245,8 @@ send_message_2: case RXRPC_CALL_SERVER_ACK_REQUEST: _debug("start ACK timer"); rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, - call->ackr_serial, false); + call->ackr_skew, call->ackr_serial, + false); default: break; } diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 70bb77818dea..34f7431bf494 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -125,6 +125,7 @@ static int rxrpc_fast_process_data(struct rxrpc_call *call, bool terminal; int ret, ackbit, ack; u32 serial; + u16 skew; u8 flags; _enter("{%u,%u},,{%u}", call->rx_data_post, call->rx_first_oos, seq); @@ -133,6 +134,7 @@ static int rxrpc_fast_process_data(struct rxrpc_call *call, ASSERTCMP(sp->call, ==, NULL); flags = sp->hdr.flags; serial = sp->hdr.serial; + skew = skb->priority; spin_lock(&call->lock); @@ -231,7 +233,7 @@ static int rxrpc_fast_process_data(struct rxrpc_call *call, spin_unlock(&call->lock); atomic_inc(&call->ackr_not_idle); - rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, serial, false); + rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, skew, serial, false); _leave(" = 0 [posted]"); return 0; @@ -244,7 +246,7 @@ out: discard_and_ack: _debug("discard and ACK packet %p", skb); - __rxrpc_propose_ACK(call, ack, serial, true); + __rxrpc_propose_ACK(call, ack, skew, serial, true); discard: spin_unlock(&call->lock); rxrpc_free_skb(skb); @@ -252,7 +254,7 @@ discard: return 0; enqueue_and_ack: - __rxrpc_propose_ACK(call, ack, serial, true); + __rxrpc_propose_ACK(call, ack, skew, serial, true); enqueue_packet: _net("defer skb %p", skb); spin_unlock(&call->lock); @@ -304,7 +306,7 @@ void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); __be32 wtmp; - u32 hi_serial, abort_code; + u32 abort_code; _enter("%p,%p", call, skb); @@ -321,18 +323,12 @@ void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb) } #endif - /* track the latest serial number on this connection for ACK packet - * information */ - hi_serial = atomic_read(&call->conn->hi_serial); - while (sp->hdr.serial > hi_serial) - hi_serial = atomic_cmpxchg(&call->conn->hi_serial, hi_serial, - sp->hdr.serial); - /* request ACK generation for any ACK or DATA packet that requests * it */ if (sp->hdr.flags & RXRPC_REQUEST_ACK) { _proto("ACK Requested on %%%u", sp->hdr.serial); - rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED, sp->hdr.serial, false); + rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED, + skb->priority, sp->hdr.serial, false); } switch (sp->hdr.type) { @@ -637,7 +633,7 @@ void rxrpc_data_ready(struct sock *sk) struct rxrpc_skb_priv *sp; struct rxrpc_local *local = sk->sk_user_data; struct sk_buff *skb; - int ret; + int ret, skew; _enter("%p", sk); @@ -700,8 +696,21 @@ void rxrpc_data_ready(struct sock *sk) rcu_read_lock(); conn = rxrpc_find_connection_rcu(local, skb); - if (!conn) + if (!conn) { + skb->priority = 0; goto cant_route_call; + } + + /* Note the serial number skew here */ + skew = (int)sp->hdr.serial - (int)conn->hi_serial; + if (skew >= 0) { + if (skew > 0) + conn->hi_serial = sp->hdr.serial; + skb->priority = 0; + } else { + skew = -skew; + skb->priority = min(skew, 65535); + } if (sp->hdr.callNumber == 0) { /* Connection-level packet */ diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index f92de18b5893..31b7f36a39cb 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -165,7 +165,7 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v) rxrpc_conn_states[conn->state], key_serial(conn->params.key), atomic_read(&conn->serial), - atomic_read(&conn->hi_serial)); + conn->hi_serial); return 0; } diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c index d28058a97bc1..fbd8c74d9505 100644 --- a/net/rxrpc/skbuff.c +++ b/net/rxrpc/skbuff.c @@ -53,9 +53,9 @@ static void rxrpc_request_final_ACK(struct rxrpc_call *call) /* * drop the bottom ACK off of the call ACK window and advance the window */ -static void rxrpc_hard_ACK_data(struct rxrpc_call *call, - struct rxrpc_skb_priv *sp) +static void rxrpc_hard_ACK_data(struct rxrpc_call *call, struct sk_buff *skb) { + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); int loop; u32 seq; @@ -91,8 +91,8 @@ static void rxrpc_hard_ACK_data(struct rxrpc_call *call, * its Tx bufferage. */ _debug("send Rx idle ACK"); - __rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, sp->hdr.serial, - false); + __rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, + skb->priority, sp->hdr.serial, false); } spin_unlock_bh(&call->lock); @@ -125,7 +125,7 @@ void rxrpc_kernel_data_consumed(struct rxrpc_call *call, struct sk_buff *skb) ASSERTCMP(sp->hdr.seq, >, call->rx_data_eaten); call->rx_data_recv = sp->hdr.seq; - rxrpc_hard_ACK_data(call, sp); + rxrpc_hard_ACK_data(call, skb); } EXPORT_SYMBOL(rxrpc_kernel_data_consumed); From 18bfeba50dfd0c8ee420396f2570f16a0bdbd7de Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 23 Aug 2016 15:27:25 +0100 Subject: [PATCH 0425/1715] rxrpc: Perform terminal call ACK/ABORT retransmission from conn processor Perform terminal call ACK/ABORT retransmission in the connection processor rather than in the call processor. With this change, once last_call is set, no more incoming packets will be routed to the corresponding call or any earlier calls on that channel (call IDs must only increase on a channel on a connection). Further, if a packet's callNumber is before the last_call ID or a packet is aimed at successfully completed service call then that packet is discarded and ignored. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 7 ++- net/rxrpc/conn_event.c | 113 ++++++++++++++++++++++++++++++++++++++++ net/rxrpc/conn_object.c | 10 +++- net/rxrpc/input.c | 31 ++++++++++- 4 files changed, 157 insertions(+), 4 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index c779b50135f6..7296039c537a 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -295,7 +295,12 @@ struct rxrpc_connection { u32 call_id; /* ID of current call */ u32 call_counter; /* Call ID counter */ u32 last_call; /* ID of last call */ - u32 last_result; /* Result of last call (0/abort) */ + u8 last_type; /* Type of last packet */ + u16 last_service_id; + union { + u32 last_seq; + u32 last_abort; + }; } channels[RXRPC_MAXCALLS]; wait_queue_head_t channel_wq; /* queue to wait for channel to become available */ diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index c631d926f4db..c1c6b7f305d1 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -24,6 +24,113 @@ #include #include "ar-internal.h" +/* + * Retransmit terminal ACK or ABORT of the previous call. + */ +static void rxrpc_conn_retransmit(struct rxrpc_connection *conn, + struct sk_buff *skb) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct rxrpc_channel *chan; + struct msghdr msg; + struct kvec iov; + struct { + struct rxrpc_wire_header whdr; + union { + struct { + __be32 code; + } abort; + struct { + struct rxrpc_ackpacket ack; + struct rxrpc_ackinfo info; + }; + }; + } __attribute__((packed)) pkt; + size_t len; + u32 serial, mtu, call_id; + + _enter("%d", conn->debug_id); + + chan = &conn->channels[sp->hdr.cid & RXRPC_CHANNELMASK]; + + /* If the last call got moved on whilst we were waiting to run, just + * ignore this packet. + */ + call_id = READ_ONCE(chan->last_call); + /* Sync with __rxrpc_disconnect_call() */ + smp_rmb(); + if (call_id != sp->hdr.callNumber) + return; + + msg.msg_name = &conn->params.peer->srx.transport; + msg.msg_namelen = conn->params.peer->srx.transport_len; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + pkt.whdr.epoch = htonl(sp->hdr.epoch); + pkt.whdr.cid = htonl(sp->hdr.cid); + pkt.whdr.callNumber = htonl(sp->hdr.callNumber); + pkt.whdr.seq = 0; + pkt.whdr.type = chan->last_type; + pkt.whdr.flags = conn->out_clientflag; + pkt.whdr.userStatus = 0; + pkt.whdr.securityIndex = conn->security_ix; + pkt.whdr._rsvd = 0; + pkt.whdr.serviceId = htons(chan->last_service_id); + + len = sizeof(pkt.whdr); + switch (chan->last_type) { + case RXRPC_PACKET_TYPE_ABORT: + pkt.abort.code = htonl(chan->last_abort); + len += sizeof(pkt.abort); + break; + + case RXRPC_PACKET_TYPE_ACK: + mtu = conn->params.peer->if_mtu; + mtu -= conn->params.peer->hdrsize; + pkt.ack.bufferSpace = 0; + pkt.ack.maxSkew = htons(skb->priority); + pkt.ack.firstPacket = htonl(chan->last_seq); + pkt.ack.previousPacket = htonl(chan->last_seq - 1); + pkt.ack.serial = htonl(sp->hdr.serial); + pkt.ack.reason = RXRPC_ACK_DUPLICATE; + pkt.ack.nAcks = 0; + pkt.info.rxMTU = htonl(rxrpc_rx_mtu); + pkt.info.maxMTU = htonl(mtu); + pkt.info.rwind = htonl(rxrpc_rx_window_size); + pkt.info.jumbo_max = htonl(rxrpc_rx_jumbo_max); + len += sizeof(pkt.ack) + sizeof(pkt.info); + break; + } + + /* Resync with __rxrpc_disconnect_call() and check that the last call + * didn't get advanced whilst we were filling out the packets. + */ + smp_rmb(); + if (READ_ONCE(chan->last_call) != call_id) + return; + + iov.iov_base = &pkt; + iov.iov_len = len; + + serial = atomic_inc_return(&conn->serial); + pkt.whdr.serial = htonl(serial); + + switch (chan->last_type) { + case RXRPC_PACKET_TYPE_ABORT: + _proto("Tx ABORT %%%u { %d } [re]", serial, conn->local_abort); + break; + case RXRPC_PACKET_TYPE_ACK: + _proto("Tx ACK %%%u [re]", serial); + break; + } + + kernel_sendmsg(conn->params.local->socket, &msg, &iov, 1, len); + _leave(""); + return; +} + /* * pass a connection-level abort onto all calls on that connection */ @@ -166,6 +273,12 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, _enter("{%d},{%u,%%%u},", conn->debug_id, sp->hdr.type, sp->hdr.serial); switch (sp->hdr.type) { + case RXRPC_PACKET_TYPE_DATA: + case RXRPC_PACKET_TYPE_ACK: + rxrpc_conn_retransmit(conn, skb); + rxrpc_free_skb(skb); + return 0; + case RXRPC_PACKET_TYPE_ABORT: if (skb_copy_bits(skb, 0, &wtmp, sizeof(wtmp)) < 0) return -EPROTO; diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 743f0bb4aaa8..b4af37ebb112 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -166,7 +166,15 @@ void __rxrpc_disconnect_call(struct rxrpc_call *call) /* Save the result of the call so that we can repeat it if necessary * through the channel, whilst disposing of the actual call record. */ - chan->last_result = call->local_abort; + chan->last_service_id = call->service_id; + if (call->local_abort) { + chan->last_abort = call->local_abort; + chan->last_type = RXRPC_PACKET_TYPE_ABORT; + } else { + chan->last_seq = call->rx_data_eaten; + chan->last_type = RXRPC_PACKET_TYPE_ACK; + } + /* Sync with rxrpc_conn_retransmit(). */ smp_wmb(); chan->last_call = chan->call_id; chan->call_id = chan->call_counter; diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 34f7431bf494..66cdeb56f44f 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -566,7 +566,8 @@ done: /* * post connection-level events to the connection - * - this includes challenges, responses and some aborts + * - this includes challenges, responses, some aborts and call terminal packet + * retransmission. */ static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn, struct sk_buff *skb) @@ -716,18 +717,44 @@ void rxrpc_data_ready(struct sock *sk) /* Connection-level packet */ _debug("CONN %p {%d}", conn, conn->debug_id); rxrpc_post_packet_to_conn(conn, skb); + goto out_unlock; } else { /* Call-bound packets are routed by connection channel. */ unsigned int channel = sp->hdr.cid & RXRPC_CHANNELMASK; struct rxrpc_channel *chan = &conn->channels[channel]; - struct rxrpc_call *call = rcu_dereference(chan->call); + struct rxrpc_call *call; + /* Ignore really old calls */ + if (sp->hdr.callNumber < chan->last_call) + goto discard_unlock; + + if (sp->hdr.callNumber == chan->last_call) { + /* For the previous service call, if completed + * successfully, we discard all further packets. + */ + if (rxrpc_conn_is_service(call->conn) && + (chan->last_type == RXRPC_PACKET_TYPE_ACK || + sp->hdr.type == RXRPC_PACKET_TYPE_ABORT)) + goto discard_unlock; + + /* But otherwise we need to retransmit the final packet + * from data cached in the connection record. + */ + rxrpc_post_packet_to_conn(conn, skb); + goto out_unlock; + } + + call = rcu_dereference(chan->call); if (!call || atomic_read(&call->usage) == 0) goto cant_route_call; rxrpc_post_packet_to_call(call, skb); + goto out_unlock; } +discard_unlock: + rxrpc_free_skb(skb); +out_unlock: rcu_read_unlock(); out: return; From 508f8ccdab0ef530dbc0ef8bb526ee11acc409ed Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 2 Aug 2016 00:20:01 +0200 Subject: [PATCH 0426/1715] netfilter: nf_tables: introduce nft_chain_parse_hook() Introduce a new function to wrap the code that parses the chain hook configuration so we can reuse this code to validate chain updates. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 152 ++++++++++++++++++++-------------- 1 file changed, 89 insertions(+), 63 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 7e1c876c7608..463fcada6074 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1196,6 +1196,83 @@ static void nf_tables_chain_destroy(struct nft_chain *chain) } } +struct nft_chain_hook { + u32 num; + u32 priority; + const struct nf_chain_type *type; + struct net_device *dev; +}; + +static int nft_chain_parse_hook(struct net *net, + const struct nlattr * const nla[], + struct nft_af_info *afi, + struct nft_chain_hook *hook, bool create) +{ + struct nlattr *ha[NFTA_HOOK_MAX + 1]; + const struct nf_chain_type *type; + struct net_device *dev; + int err; + + err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK], + nft_hook_policy); + if (err < 0) + return err; + + if (ha[NFTA_HOOK_HOOKNUM] == NULL || + ha[NFTA_HOOK_PRIORITY] == NULL) + return -EINVAL; + + hook->num = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM])); + if (hook->num >= afi->nhooks) + return -EINVAL; + + hook->priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY])); + + type = chain_type[afi->family][NFT_CHAIN_T_DEFAULT]; + if (nla[NFTA_CHAIN_TYPE]) { + type = nf_tables_chain_type_lookup(afi, nla[NFTA_CHAIN_TYPE], + create); + if (IS_ERR(type)) + return PTR_ERR(type); + } + if (!(type->hook_mask & (1 << hook->num))) + return -EOPNOTSUPP; + if (!try_module_get(type->owner)) + return -ENOENT; + + hook->type = type; + + hook->dev = NULL; + if (afi->flags & NFT_AF_NEEDS_DEV) { + char ifname[IFNAMSIZ]; + + if (!ha[NFTA_HOOK_DEV]) { + module_put(type->owner); + return -EOPNOTSUPP; + } + + nla_strlcpy(ifname, ha[NFTA_HOOK_DEV], IFNAMSIZ); + dev = dev_get_by_name(net, ifname); + if (!dev) { + module_put(type->owner); + return -ENOENT; + } + hook->dev = dev; + } else if (ha[NFTA_HOOK_DEV]) { + module_put(type->owner); + return -EOPNOTSUPP; + } + + return 0; +} + +static void nft_chain_release_hook(struct nft_chain_hook *hook) +{ + module_put(hook->type->owner); + if (hook->dev != NULL) + dev_put(hook->dev); +} + static int nf_tables_newchain(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -1206,10 +1283,8 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, struct nft_table *table; struct nft_chain *chain; struct nft_base_chain *basechain = NULL; - struct nlattr *ha[NFTA_HOOK_MAX + 1]; u8 genmask = nft_genmask_next(net); int family = nfmsg->nfgen_family; - struct net_device *dev = NULL; u8 policy = NF_ACCEPT; u64 handle = 0; unsigned int i; @@ -1320,102 +1395,53 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, return -EOVERFLOW; if (nla[NFTA_CHAIN_HOOK]) { - const struct nf_chain_type *type; + struct nft_chain_hook hook; struct nf_hook_ops *ops; nf_hookfn *hookfn; - u32 hooknum, priority; - type = chain_type[family][NFT_CHAIN_T_DEFAULT]; - if (nla[NFTA_CHAIN_TYPE]) { - type = nf_tables_chain_type_lookup(afi, - nla[NFTA_CHAIN_TYPE], - create); - if (IS_ERR(type)) - return PTR_ERR(type); - } - - err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK], - nft_hook_policy); + err = nft_chain_parse_hook(net, nla, afi, &hook, create); if (err < 0) return err; - if (ha[NFTA_HOOK_HOOKNUM] == NULL || - ha[NFTA_HOOK_PRIORITY] == NULL) - return -EINVAL; - - hooknum = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM])); - if (hooknum >= afi->nhooks) - return -EINVAL; - priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY])); - - if (!(type->hook_mask & (1 << hooknum))) - return -EOPNOTSUPP; - if (!try_module_get(type->owner)) - return -ENOENT; - hookfn = type->hooks[hooknum]; - - if (afi->flags & NFT_AF_NEEDS_DEV) { - char ifname[IFNAMSIZ]; - - if (!ha[NFTA_HOOK_DEV]) { - module_put(type->owner); - return -EOPNOTSUPP; - } - - nla_strlcpy(ifname, ha[NFTA_HOOK_DEV], IFNAMSIZ); - dev = dev_get_by_name(net, ifname); - if (!dev) { - module_put(type->owner); - return -ENOENT; - } - } else if (ha[NFTA_HOOK_DEV]) { - module_put(type->owner); - return -EOPNOTSUPP; - } basechain = kzalloc(sizeof(*basechain), GFP_KERNEL); if (basechain == NULL) { - module_put(type->owner); - if (dev != NULL) - dev_put(dev); + nft_chain_release_hook(&hook); return -ENOMEM; } - if (dev != NULL) - strncpy(basechain->dev_name, dev->name, IFNAMSIZ); + if (hook.dev != NULL) + strncpy(basechain->dev_name, hook.dev->name, IFNAMSIZ); if (nla[NFTA_CHAIN_COUNTERS]) { stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]); if (IS_ERR(stats)) { - module_put(type->owner); + nft_chain_release_hook(&hook); kfree(basechain); - if (dev != NULL) - dev_put(dev); return PTR_ERR(stats); } basechain->stats = stats; } else { stats = netdev_alloc_pcpu_stats(struct nft_stats); if (stats == NULL) { - module_put(type->owner); + nft_chain_release_hook(&hook); kfree(basechain); - if (dev != NULL) - dev_put(dev); return -ENOMEM; } rcu_assign_pointer(basechain->stats, stats); } - basechain->type = type; + hookfn = hook.type->hooks[hook.num]; + basechain->type = hook.type; chain = &basechain->chain; for (i = 0; i < afi->nops; i++) { ops = &basechain->ops[i]; ops->pf = family; - ops->hooknum = hooknum; - ops->priority = priority; + ops->hooknum = hook.num; + ops->priority = hook.priority; ops->priv = chain; ops->hook = afi->hooks[ops->hooknum]; - ops->dev = dev; + ops->dev = hook.dev; if (hookfn) ops->hook = hookfn; if (afi->hook_ops_init) From 6133740d6e80d969ff7d41098a9db1091d0f9c94 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 2 Aug 2016 00:30:38 +0200 Subject: [PATCH 0427/1715] netfilter: nf_tables: reject hook configuration updates on existing chains Currently, if you add a base chain whose name clashes with an existing non-base chain, nf_tables doesn't complain about this. Similarly, if you update the chain type, the hook number and priority. With this patch, nf_tables bails out in case any of this unsupported operations occur by returning EBUSY. # nft add table x # nft add chain x y # nft add chain x y { type nat hook input priority 0\; } :1:1-49: Error: Could not process rule: Device or resource busy add chain x y { type nat hook input priority 0; } ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 463fcada6074..221d27f09623 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1348,6 +1348,37 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, if (nlh->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP; + if (nla[NFTA_CHAIN_HOOK]) { + struct nft_base_chain *basechain; + struct nft_chain_hook hook; + struct nf_hook_ops *ops; + + if (!(chain->flags & NFT_BASE_CHAIN)) + return -EBUSY; + + err = nft_chain_parse_hook(net, nla, afi, &hook, + create); + if (err < 0) + return err; + + basechain = nft_base_chain(chain); + if (basechain->type != hook.type) { + nft_chain_release_hook(&hook); + return -EBUSY; + } + + for (i = 0; i < afi->nops; i++) { + ops = &basechain->ops[i]; + if (ops->hooknum != hook.num || + ops->priority != hook.priority || + ops->dev != hook.dev) { + nft_chain_release_hook(&hook); + return -EBUSY; + } + } + nft_chain_release_hook(&hook); + } + if (nla[NFTA_CHAIN_HANDLE] && name) { struct nft_chain *chain2; From 8737caafd16790c654f1fb8564abcf6e1f3ffe19 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 23 Aug 2016 12:17:44 -0700 Subject: [PATCH 0428/1715] hv_netvsc: fix rtnl locking in callback The function get_netvsc_net_device had conditional locking. This was unnecessary, incorrect, but harmless. It was unnecessary since the code is only called from netlink netdev event callback where RTNL is always acquired before the callbacks are run. It was incorrect because of use of trylock and then continuing. Fix by replacing with proper assertion. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index eb2c122ec10a..6924d01a794e 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1167,9 +1167,8 @@ static void netvsc_free_netdev(struct net_device *netdev) static struct net_device *get_netvsc_net_device(char *mac) { struct net_device *dev, *found = NULL; - int rtnl_locked; - rtnl_locked = rtnl_trylock(); + ASSERT_RTNL(); for_each_netdev(&init_net, dev) { if (memcmp(dev->dev_addr, mac, ETH_ALEN) == 0) { @@ -1179,8 +1178,6 @@ static struct net_device *get_netvsc_net_device(char *mac) break; } } - if (rtnl_locked) - rtnl_unlock(); return found; } From 9477386687354f2aa8f4843170b7093c6dd1eb37 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 23 Aug 2016 12:17:45 -0700 Subject: [PATCH 0429/1715] hv_netvsc: make RSS hash key static Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 2 -- drivers/net/hyperv/rndis_filter.c | 3 ++- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index ce45d6835e88..b58e5592a5d5 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -84,8 +84,6 @@ struct ndis_recv_scale_cap { /* NDIS_RECEIVE_SCALE_CAPABILITIES */ #define NDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2 40 #define ITAB_NUM 128 -#define HASH_KEYLEN NDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2 -extern u8 netvsc_hash_key[]; struct ndis_recv_scale_param { /* NDIS_RECEIVE_SCALE_PARAMETERS */ struct ndis_obj_header hdr; diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 3ecb2d05cf3f..a100380ba6b9 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -663,13 +663,14 @@ cleanup: return ret; } -u8 netvsc_hash_key[HASH_KEYLEN] = { +static const u8 netvsc_hash_key[] = { 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa }; +#define HASH_KEYLEN ARRAY_SIZE(netvsc_hash_key) static int rndis_filter_set_rss_param(struct rndis_device *rdev, int num_queue) { From e53a9c2a5a8cd0a3a8b3c0f9b7a3ad9bc6a28867 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 23 Aug 2016 12:17:46 -0700 Subject: [PATCH 0430/1715] hv_netvsc: use kcalloc Better to use kcalloc rather than kzalloc and multiply for an array. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index b15edfc37a59..f64c9781b6cf 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -421,8 +421,8 @@ static int netvsc_init_buf(struct hv_device *device) net_device->map_words = DIV_ROUND_UP(net_device->send_section_cnt, BITS_PER_LONG); - net_device->send_section_map = - kzalloc(net_device->map_words * sizeof(ulong), GFP_KERNEL); + net_device->send_section_map = kcalloc(net_device->map_words, + sizeof(ulong), GFP_KERNEL); if (net_device->send_section_map == NULL) { ret = -ENOMEM; goto cleanup; From 796cc88c32c1bd1f833d596448ac785a8736e57c Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 23 Aug 2016 12:17:47 -0700 Subject: [PATCH 0431/1715] hv_netvsc: style cleanups Fix most of the complaints about the style of the code. Things like extra blank lines and return statements. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 18 ++---------------- drivers/net/hyperv/netvsc_drv.c | 7 ------- drivers/net/hyperv/rndis_filter.c | 8 +------- 3 files changed, 3 insertions(+), 30 deletions(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index f64c9781b6cf..fe83de33895d 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -59,7 +59,6 @@ void netvsc_switch_datapath(struct net_device *ndev, bool vf) VM_PKT_DATA_INBAND, 0); } - static struct netvsc_device *alloc_net_device(void) { struct netvsc_device *net_device; @@ -123,7 +122,6 @@ get_in_err: return net_device; } - static int netvsc_destroy_buf(struct hv_device *device) { struct nvsp_message *revoke_packet; @@ -285,7 +283,6 @@ static int netvsc_init_buf(struct hv_device *device) goto cleanup; } - /* Notify the NetVsp of the gpadl handle */ init_packet = &net_device->channel_init_pkt; @@ -412,7 +409,7 @@ static int netvsc_init_buf(struct hv_device *device) /* Section count is simply the size divided by the section size. */ net_device->send_section_cnt = - net_device->send_buf_size/net_device->send_section_size; + net_device->send_buf_size / net_device->send_section_size; dev_info(&device->device, "Send section size: %d, Section count:%d\n", net_device->send_section_size, net_device->send_section_cnt); @@ -437,7 +434,6 @@ exit: return ret; } - /* Negotiate NVSP protocol version */ static int negotiate_nvsp_ver(struct hv_device *device, struct netvsc_device *net_device, @@ -593,7 +589,6 @@ int netvsc_device_remove(struct hv_device *device) return 0; } - #define RING_AVAIL_PERCENT_HIWATER 20 #define RING_AVAIL_PERCENT_LOWATER 10 @@ -676,13 +671,11 @@ static void netvsc_send_completion(struct netvsc_device *net_device, !net_device_ctx->start_remove && (hv_ringbuf_avail_percent(&channel->outbound) > RING_AVAIL_PERCENT_HIWATER || queue_sends < 1)) - netif_tx_wake_queue(netdev_get_tx_queue( - ndev, q_idx)); + netif_tx_wake_queue(netdev_get_tx_queue(ndev, q_idx)); } else { netdev_err(ndev, "Unknown send completion packet type- " "%d received!!\n", nvsp_packet->hdr.msg_type); } - } static u32 netvsc_get_next_send_section(struct netvsc_device *net_device) @@ -1157,7 +1150,6 @@ static void netvsc_receive(struct netvsc_device *net_device, /* Pass it to the upper layer */ status = rndis_filter_receive(device, netvsc_packet, &data, channel); - } if (!net_device->mrc[q_idx].buf) { @@ -1182,7 +1174,6 @@ static void netvsc_receive(struct netvsc_device *net_device, rcd->status = status; } - static void netvsc_send_table(struct hv_device *hdev, struct nvsp_message *nvmsg) { @@ -1263,7 +1254,6 @@ static void netvsc_process_raw_pkt(struct hv_device *device, } } - void netvsc_channel_cb(void *context) { int ret; @@ -1320,8 +1310,6 @@ void netvsc_channel_cb(void *context) ndev, request_id, desc); - - } else { /* * We are done for this pass. @@ -1350,8 +1338,6 @@ void netvsc_channel_cb(void *context) kfree(buffer); netvsc_chk_recv_comp(net_device, channel, q_idx); - - return; } /* diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 6924d01a794e..3bbf07352433 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -40,7 +40,6 @@ #include "hyperv_net.h" - #define RING_SIZE_MIN 64 #define LINKCHANGE_INT (2 * HZ) #define NETVSC_HW_FEATURES (NETIF_F_RXCSUM | \ @@ -411,7 +410,6 @@ check_size: FIELD_SIZEOF(struct sk_buff, cb)); packet = (struct hv_netvsc_packet *)skb->cb; - packet->q_idx = skb_get_queue_mapping(skb); packet->total_data_buflen = skb->len; @@ -617,7 +615,6 @@ void netvsc_linkstatus_callback(struct hv_device *device_obj, schedule_delayed_work(&ndev_ctx->dwork, 0); } - static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net, struct hv_netvsc_packet *packet, struct ndis_tcp_ip_checksum_info *csum_info, @@ -1271,7 +1268,6 @@ static int netvsc_vf_up(struct net_device *vf_netdev) return NOTIFY_OK; } - static int netvsc_vf_down(struct net_device *vf_netdev) { struct net_device *ndev; @@ -1305,7 +1301,6 @@ static int netvsc_vf_down(struct net_device *vf_netdev) return NOTIFY_OK; } - static int netvsc_unregister_vf(struct net_device *vf_netdev) { struct net_device *ndev; @@ -1433,7 +1428,6 @@ static int netvsc_remove(struct hv_device *dev) return 0; } - ndev_ctx = netdev_priv(net); net_device = ndev_ctx->nvdev; @@ -1480,7 +1474,6 @@ static struct hv_driver netvsc_drv = { .remove = netvsc_remove, }; - /* * On Hyper-V, every VF interface is matched with a corresponding * synthetic interface. The synthetic interface is presented first diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index a100380ba6b9..9195d5da8485 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -721,7 +721,6 @@ static int rndis_filter_set_rss_param(struct rndis_device *rdev, int num_queue) for (i = 0; i < HASH_KEYLEN; i++) keyp[i] = netvsc_hash_key[i]; - ret = rndis_filter_send_request(rdev, request); if (ret != 0) goto cleanup; @@ -739,7 +738,6 @@ cleanup: return ret; } - static int rndis_filter_query_device_link_status(struct rndis_device *dev) { u32 size = sizeof(u32); @@ -815,7 +813,6 @@ cleanup: return ret; } - static int rndis_filter_init_device(struct rndis_device *dev) { struct rndis_request *request; @@ -903,7 +900,6 @@ cleanup: if (request) put_rndis_request(dev, request); - return; } static int rndis_filter_open_device(struct rndis_device *dev) @@ -973,7 +969,7 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc) } int rndis_filter_device_add(struct hv_device *dev, - void *additional_info) + void *additional_info) { int ret; struct net_device *net = hv_get_drvdata(dev); @@ -1055,7 +1051,6 @@ int rndis_filter_device_add(struct hv_device *dev, offloads.udp_ip_v6_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED; offloads.lso_v2_ipv4 = NDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED; - ret = rndis_filter_set_offload_params(net, &offloads); if (ret) goto err_dev_remv; @@ -1181,7 +1176,6 @@ void rndis_filter_device_remove(struct hv_device *dev) netvsc_device_remove(dev); } - int rndis_filter_open(struct netvsc_device *nvdev) { if (!nvdev) From 30d1de08c87ddde6f73936c3350e7e153988fe02 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 23 Aug 2016 12:17:48 -0700 Subject: [PATCH 0432/1715] hv_netvsc: make inline functions static Several new functions were introduced into hyperv.h but only used in one file. Move them and let compiler decide on inline. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 85 ++++++++++++++++++++++++++++++++++++- include/linux/hyperv.h | 84 ------------------------------------ 2 files changed, 84 insertions(+), 85 deletions(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index fe83de33895d..4ef2af63419f 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -33,6 +33,89 @@ #include "hyperv_net.h" +/* + * An API to support in-place processing of incoming VMBUS packets. + */ +#define VMBUS_PKT_TRAILER 8 + +static struct vmpacket_descriptor * +get_next_pkt_raw(struct vmbus_channel *channel) +{ + struct hv_ring_buffer_info *ring_info = &channel->inbound; + u32 read_loc = ring_info->priv_read_index; + void *ring_buffer = hv_get_ring_buffer(ring_info); + struct vmpacket_descriptor *cur_desc; + u32 packetlen; + u32 dsize = ring_info->ring_datasize; + u32 delta = read_loc - ring_info->ring_buffer->read_index; + u32 bytes_avail_toread = (hv_get_bytes_to_read(ring_info) - delta); + + if (bytes_avail_toread < sizeof(struct vmpacket_descriptor)) + return NULL; + + if ((read_loc + sizeof(*cur_desc)) > dsize) + return NULL; + + cur_desc = ring_buffer + read_loc; + packetlen = cur_desc->len8 << 3; + + /* + * If the packet under consideration is wrapping around, + * return failure. + */ + if ((read_loc + packetlen + VMBUS_PKT_TRAILER) > (dsize - 1)) + return NULL; + + return cur_desc; +} + +/* + * A helper function to step through packets "in-place" + * This API is to be called after each successful call + * get_next_pkt_raw(). + */ +static void put_pkt_raw(struct vmbus_channel *channel, + struct vmpacket_descriptor *desc) +{ + struct hv_ring_buffer_info *ring_info = &channel->inbound; + u32 read_loc = ring_info->priv_read_index; + u32 packetlen = desc->len8 << 3; + u32 dsize = ring_info->ring_datasize; + + BUG_ON((read_loc + packetlen + VMBUS_PKT_TRAILER) > dsize); + + /* + * Include the packet trailer. + */ + ring_info->priv_read_index += packetlen + VMBUS_PKT_TRAILER; +} + +/* + * This call commits the read index and potentially signals the host. + * Here is the pattern for using the "in-place" consumption APIs: + * + * while (get_next_pkt_raw() { + * process the packet "in-place"; + * put_pkt_raw(); + * } + * if (packets processed in place) + * commit_rd_index(); + */ +static void commit_rd_index(struct vmbus_channel *channel) +{ + struct hv_ring_buffer_info *ring_info = &channel->inbound; + /* + * Make sure all reads are done before we update the read index since + * the writer may start writing to the read area once the read index + * is updated. + */ + virt_rmb(); + ring_info->ring_buffer->read_index = ring_info->priv_read_index; + + if (hv_need_to_signal_on_read(ring_info)) + vmbus_set_event(channel); +} + /* * Switch the data path from the synthetic interface to the VF * interface. @@ -749,7 +832,7 @@ static u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device, return msg_size; } -static inline int netvsc_send_pkt( +static int netvsc_send_pkt( struct hv_device *device, struct hv_netvsc_packet *packet, struct netvsc_device *net_device, diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index b10954a66939..a6bc974def8f 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1422,88 +1422,4 @@ static inline bool hv_need_to_signal_on_read(struct hv_ring_buffer_info *rbi) return false; } -/* - * An API to support in-place processing of incoming VMBUS packets. - */ -#define VMBUS_PKT_TRAILER 8 - -static inline struct vmpacket_descriptor * -get_next_pkt_raw(struct vmbus_channel *channel) -{ - struct hv_ring_buffer_info *ring_info = &channel->inbound; - u32 read_loc = ring_info->priv_read_index; - void *ring_buffer = hv_get_ring_buffer(ring_info); - struct vmpacket_descriptor *cur_desc; - u32 packetlen; - u32 dsize = ring_info->ring_datasize; - u32 delta = read_loc - ring_info->ring_buffer->read_index; - u32 bytes_avail_toread = (hv_get_bytes_to_read(ring_info) - delta); - - if (bytes_avail_toread < sizeof(struct vmpacket_descriptor)) - return NULL; - - if ((read_loc + sizeof(*cur_desc)) > dsize) - return NULL; - - cur_desc = ring_buffer + read_loc; - packetlen = cur_desc->len8 << 3; - - /* - * If the packet under consideration is wrapping around, - * return failure. - */ - if ((read_loc + packetlen + VMBUS_PKT_TRAILER) > (dsize - 1)) - return NULL; - - return cur_desc; -} - -/* - * A helper function to step through packets "in-place" - * This API is to be called after each successful call - * get_next_pkt_raw(). - */ -static inline void put_pkt_raw(struct vmbus_channel *channel, - struct vmpacket_descriptor *desc) -{ - struct hv_ring_buffer_info *ring_info = &channel->inbound; - u32 read_loc = ring_info->priv_read_index; - u32 packetlen = desc->len8 << 3; - u32 dsize = ring_info->ring_datasize; - - if ((read_loc + packetlen + VMBUS_PKT_TRAILER) > dsize) - BUG(); - /* - * Include the packet trailer. - */ - ring_info->priv_read_index += packetlen + VMBUS_PKT_TRAILER; -} - -/* - * This call commits the read index and potentially signals the host. - * Here is the pattern for using the "in-place" consumption APIs: - * - * while (get_next_pkt_raw() { - * process the packet "in-place"; - * put_pkt_raw(); - * } - * if (packets processed in place) - * commit_rd_index(); - */ -static inline void commit_rd_index(struct vmbus_channel *channel) -{ - struct hv_ring_buffer_info *ring_info = &channel->inbound; - /* - * Make sure all reads are done before we update the read index since - * the writer may start writing to the read area once the read index - * is updated. - */ - virt_rmb(); - ring_info->ring_buffer->read_index = ring_info->priv_read_index; - - if (hv_need_to_signal_on_read(ring_info)) - vmbus_set_event(channel); -} - - #endif /* _HYPERV_H */ From e5a78fad4f1eb49064e4c35d0a4263424b12124c Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 23 Aug 2016 12:17:49 -0700 Subject: [PATCH 0433/1715] hv_netvsc: use ARRAY_SIZE() for NDIS versions Don't hard code size of array of NDIS versions. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 4ef2af63419f..110bbb8a0baa 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -577,9 +577,10 @@ static int netvsc_connect_vsp(struct hv_device *device) struct netvsc_device *net_device; struct nvsp_message *init_packet; int ndis_version; - u32 ver_list[] = { NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2, + const u32 ver_list[] = { + NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2, NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5 }; - int i, num_ver = 4; /* number of different NVSP versions */ + int i; net_device = get_outbound_net_device(device); if (!net_device) @@ -588,7 +589,7 @@ static int netvsc_connect_vsp(struct hv_device *device) init_packet = &net_device->channel_init_pkt; /* Negotiate the latest NVSP protocol supported */ - for (i = num_ver - 1; i >= 0; i--) + for (i = ARRAY_SIZE(ver_list) - 1; i >= 0; i--) if (negotiate_nvsp_ver(device, net_device, init_packet, ver_list[i]) == 0) { net_device->nvsp_version = ver_list[i]; From e08f3ea586d4145e36c77f0dd1602374b5d7e928 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 23 Aug 2016 12:17:50 -0700 Subject: [PATCH 0434/1715] hv_netvsc: make device_remove void Always returns 0 and no callers check. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 2 +- drivers/net/hyperv/netvsc.c | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index b58e5592a5d5..8031deeb0a40 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -173,7 +173,7 @@ struct rndis_device { struct rndis_message; struct netvsc_device; int netvsc_device_add(struct hv_device *device, void *additional_info); -int netvsc_device_remove(struct hv_device *device); +void netvsc_device_remove(struct hv_device *device); int netvsc_send(struct hv_device *device, struct hv_netvsc_packet *packet, struct rndis_message *rndis_msg, diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 110bbb8a0baa..2ece27ad8429 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -648,7 +648,7 @@ static void netvsc_disconnect_vsp(struct hv_device *device) /* * netvsc_device_remove - Callback when the root bus device is removed */ -int netvsc_device_remove(struct hv_device *device) +void netvsc_device_remove(struct hv_device *device) { struct net_device *ndev = hv_get_drvdata(device); struct net_device_context *net_device_ctx = netdev_priv(ndev); @@ -670,7 +670,6 @@ int netvsc_device_remove(struct hv_device *device) /* Release all resources */ vfree(net_device->sub_cb_buf); free_netvsc_device(net_device); - return 0; } #define RING_AVAIL_PERCENT_HIWATER 20 From fd612602d6a7919982779fda914bd521e5778593 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 23 Aug 2016 12:17:51 -0700 Subject: [PATCH 0435/1715] hv_netvsc: init completion during alloc Move initialization to allocate where other fields are initialized. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 2ece27ad8429..27eb507d64f9 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -164,6 +164,7 @@ static struct netvsc_device *alloc_net_device(void) atomic_set(&net_device->open_cnt, 0); net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT; net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT; + init_completion(&net_device->channel_init_wait); return net_device; } @@ -1442,9 +1443,6 @@ int netvsc_device_add(struct hv_device *device, void *additional_info) net_device->ring_size = ring_size; - /* Initialize the NetVSC channel extension */ - init_completion(&net_device->channel_init_wait); - set_per_channel_state(device->channel, net_device->cb_buffer); /* Open the channel */ From 0ab05141f97de2ac954c267c27b256ebd241e762 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 23 Aug 2016 12:17:52 -0700 Subject: [PATCH 0436/1715] hv_netvsc: rearrange start_xmit Rearrange the transmit routine to eliminate goto's and unnecessary boolean variables. Use standard functions to test for vlan tag. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 106 +++++++++++++++----------------- 1 file changed, 51 insertions(+), 55 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 3bbf07352433..7ed9f13067b5 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -357,11 +357,8 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) struct rndis_message *rndis_msg; struct rndis_packet *rndis_pkt; u32 rndis_msg_size; - bool isvlan; - bool linear = false; struct rndis_per_packet_info *ppi; struct ndis_tcp_ip_checksum_info *csum_info; - struct ndis_tcp_lso_info *lso_info; int hdr_offset; u32 net_trans_info; u32 hash; @@ -376,22 +373,23 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) * more pages we try linearizing it. */ -check_size: skb_length = skb->len; num_data_pgs = netvsc_get_slots(skb) + 2; - if (num_data_pgs > MAX_PAGE_BUFFER_COUNT && linear) { - net_alert_ratelimited("packet too big: %u pages (%u bytes)\n", - num_data_pgs, skb->len); - ret = -EFAULT; - goto drop; - } else if (num_data_pgs > MAX_PAGE_BUFFER_COUNT) { + + if (unlikely(num_data_pgs > MAX_PAGE_BUFFER_COUNT)) { if (skb_linearize(skb)) { net_alert_ratelimited("failed to linearize skb\n"); ret = -ENOMEM; goto drop; } - linear = true; - goto check_size; + + num_data_pgs = netvsc_get_slots(skb) + 2; + if (num_data_pgs > MAX_PAGE_BUFFER_COUNT) { + net_alert_ratelimited("packet too big: %u pages (%u bytes)\n", + num_data_pgs, skb->len); + ret = -EFAULT; + goto drop; + } } /* @@ -418,8 +416,6 @@ check_size: memset(rndis_msg, 0, RNDIS_AND_PPI_SIZE); - isvlan = skb->vlan_tci & VLAN_TAG_PRESENT; - /* Add the rndis header */ rndis_msg->ndis_msg_type = RNDIS_MSG_PACKET; rndis_msg->msg_len = packet->total_data_buflen; @@ -438,7 +434,7 @@ check_size: *(u32 *)((void *)ppi + ppi->ppi_offset) = hash; } - if (isvlan) { + if (skb_vlan_tag_present(skb)) { struct ndis_pkt_8021q_info *vlan; rndis_msg_size += NDIS_VLAN_PPI_SIZE; @@ -459,8 +455,37 @@ check_size: * Setup the sendside checksum offload only if this is not a * GSO packet. */ - if (skb_is_gso(skb)) - goto do_lso; + if (skb_is_gso(skb)) { + struct ndis_tcp_lso_info *lso_info; + + rndis_msg_size += NDIS_LSO_PPI_SIZE; + ppi = init_ppi_data(rndis_msg, NDIS_LSO_PPI_SIZE, + TCP_LARGESEND_PKTINFO); + + lso_info = (struct ndis_tcp_lso_info *)((void *)ppi + + ppi->ppi_offset); + + lso_info->lso_v2_transmit.type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE; + if (net_trans_info & (INFO_IPV4 << 16)) { + lso_info->lso_v2_transmit.ip_version = + NDIS_TCP_LARGE_SEND_OFFLOAD_IPV4; + ip_hdr(skb)->tot_len = 0; + ip_hdr(skb)->check = 0; + tcp_hdr(skb)->check = + ~csum_tcpudp_magic(ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); + } else { + lso_info->lso_v2_transmit.ip_version = + NDIS_TCP_LARGE_SEND_OFFLOAD_IPV6; + ipv6_hdr(skb)->payload_len = 0; + tcp_hdr(skb)->check = + ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); + } + lso_info->lso_v2_transmit.tcp_header_offset = hdr_offset; + lso_info->lso_v2_transmit.mss = skb_shinfo(skb)->gso_size; + goto do_send; + } if ((skb->ip_summed == CHECKSUM_NONE) || (skb->ip_summed == CHECKSUM_UNNECESSARY)) @@ -507,35 +532,6 @@ check_size: csum_info->transmit.udp_checksum = 0; } - goto do_send; - -do_lso: - rndis_msg_size += NDIS_LSO_PPI_SIZE; - ppi = init_ppi_data(rndis_msg, NDIS_LSO_PPI_SIZE, - TCP_LARGESEND_PKTINFO); - - lso_info = (struct ndis_tcp_lso_info *)((void *)ppi + - ppi->ppi_offset); - - lso_info->lso_v2_transmit.type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE; - if (net_trans_info & (INFO_IPV4 << 16)) { - lso_info->lso_v2_transmit.ip_version = - NDIS_TCP_LARGE_SEND_OFFLOAD_IPV4; - ip_hdr(skb)->tot_len = 0; - ip_hdr(skb)->check = 0; - tcp_hdr(skb)->check = - ~csum_tcpudp_magic(ip_hdr(skb)->saddr, - ip_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); - } else { - lso_info->lso_v2_transmit.ip_version = - NDIS_TCP_LARGE_SEND_OFFLOAD_IPV6; - ipv6_hdr(skb)->payload_len = 0; - tcp_hdr(skb)->check = - ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, - &ipv6_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); - } - lso_info->lso_v2_transmit.tcp_header_offset = hdr_offset; - lso_info->lso_v2_transmit.mss = skb_shinfo(skb)->gso_size; do_send: /* Start filling in the page buffers with the rndis hdr */ @@ -548,21 +544,21 @@ do_send: skb_tx_timestamp(skb); ret = netvsc_send(net_device_ctx->device_ctx, packet, rndis_msg, &pb, skb); - -drop: - if (ret == 0) { + if (likely(ret == 0)) { u64_stats_update_begin(&tx_stats->syncp); tx_stats->packets++; tx_stats->bytes += skb_length; u64_stats_update_end(&tx_stats->syncp); - } else { - if (ret != -EAGAIN) { - dev_kfree_skb_any(skb); - net->stats.tx_dropped++; - } + return NETDEV_TX_OK; } + if (ret == -EAGAIN) + return NETDEV_TX_BUSY; - return (ret == -EAGAIN) ? NETDEV_TX_BUSY : NETDEV_TX_OK; +drop: + dev_kfree_skb_any(skb); + net->stats.tx_dropped++; + + return NETDEV_TX_OK; } /* From bc304dd3b4444b84082c483534a8dcac7a22cb9d Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 23 Aug 2016 12:17:53 -0700 Subject: [PATCH 0437/1715] hv_netvsc: refactor completion function Break the different cases, code is cleaner if broken up Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 103 ++++++++++++++++++++---------------- 1 file changed, 56 insertions(+), 47 deletions(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 27eb507d64f9..830aae2d9510 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -696,69 +696,78 @@ static inline void netvsc_free_send_slot(struct netvsc_device *net_device, sync_change_bit(index, net_device->send_section_map); } +static void netvsc_send_tx_complete(struct netvsc_device *net_device, + struct vmbus_channel *incoming_channel, + struct hv_device *device, + struct vmpacket_descriptor *packet) +{ + struct sk_buff *skb = (struct sk_buff *)(unsigned long)packet->trans_id; + struct net_device *ndev = hv_get_drvdata(device); + struct net_device_context *net_device_ctx = netdev_priv(ndev); + struct vmbus_channel *channel = device->channel; + int num_outstanding_sends; + u16 q_idx = 0; + int queue_sends; + + /* Notify the layer above us */ + if (likely(skb)) { + struct hv_netvsc_packet *nvsc_packet + = (struct hv_netvsc_packet *)skb->cb; + u32 send_index = nvsc_packet->send_buf_index; + + if (send_index != NETVSC_INVALID_INDEX) + netvsc_free_send_slot(net_device, send_index); + q_idx = nvsc_packet->q_idx; + channel = incoming_channel; + + dev_kfree_skb_any(skb); + } + + num_outstanding_sends = + atomic_dec_return(&net_device->num_outstanding_sends); + queue_sends = atomic_dec_return(&net_device->queue_sends[q_idx]); + + if (net_device->destroy && num_outstanding_sends == 0) + wake_up(&net_device->wait_drain); + + if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) && + !net_device_ctx->start_remove && + (hv_ringbuf_avail_percent(&channel->outbound) > RING_AVAIL_PERCENT_HIWATER || + queue_sends < 1)) + netif_tx_wake_queue(netdev_get_tx_queue(ndev, q_idx)); +} + static void netvsc_send_completion(struct netvsc_device *net_device, struct vmbus_channel *incoming_channel, struct hv_device *device, struct vmpacket_descriptor *packet) { struct nvsp_message *nvsp_packet; - struct hv_netvsc_packet *nvsc_packet; struct net_device *ndev = hv_get_drvdata(device); - struct net_device_context *net_device_ctx = netdev_priv(ndev); - u32 send_index; - struct sk_buff *skb; nvsp_packet = (struct nvsp_message *)((unsigned long)packet + - (packet->offset8 << 3)); + (packet->offset8 << 3)); - if ((nvsp_packet->hdr.msg_type == NVSP_MSG_TYPE_INIT_COMPLETE) || - (nvsp_packet->hdr.msg_type == - NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE) || - (nvsp_packet->hdr.msg_type == - NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE) || - (nvsp_packet->hdr.msg_type == - NVSP_MSG5_TYPE_SUBCHANNEL)) { + switch (nvsp_packet->hdr.msg_type) { + case NVSP_MSG_TYPE_INIT_COMPLETE: + case NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE: + case NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE: + case NVSP_MSG5_TYPE_SUBCHANNEL: /* Copy the response back */ memcpy(&net_device->channel_init_pkt, nvsp_packet, sizeof(struct nvsp_message)); complete(&net_device->channel_init_wait); - } else if (nvsp_packet->hdr.msg_type == - NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE) { - int num_outstanding_sends; - u16 q_idx = 0; - struct vmbus_channel *channel = device->channel; - int queue_sends; + break; - /* Get the send context */ - skb = (struct sk_buff *)(unsigned long)packet->trans_id; + case NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE: + netvsc_send_tx_complete(net_device, incoming_channel, + device, packet); + break; - /* Notify the layer above us */ - if (skb) { - nvsc_packet = (struct hv_netvsc_packet *) skb->cb; - send_index = nvsc_packet->send_buf_index; - if (send_index != NETVSC_INVALID_INDEX) - netvsc_free_send_slot(net_device, send_index); - q_idx = nvsc_packet->q_idx; - channel = incoming_channel; - dev_kfree_skb_any(skb); - } - - num_outstanding_sends = - atomic_dec_return(&net_device->num_outstanding_sends); - queue_sends = atomic_dec_return(&net_device-> - queue_sends[q_idx]); - - if (net_device->destroy && num_outstanding_sends == 0) - wake_up(&net_device->wait_drain); - - if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) && - !net_device_ctx->start_remove && - (hv_ringbuf_avail_percent(&channel->outbound) > - RING_AVAIL_PERCENT_HIWATER || queue_sends < 1)) - netif_tx_wake_queue(netdev_get_tx_queue(ndev, q_idx)); - } else { - netdev_err(ndev, "Unknown send completion packet type- " - "%d received!!\n", nvsp_packet->hdr.msg_type); + default: + netdev_err(ndev, + "Unknown send completion type %d received!!\n", + nvsp_packet->hdr.msg_type); } } From 7a2a0a84fda33062200decf5e201b182591bf0ec Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 23 Aug 2016 12:17:54 -0700 Subject: [PATCH 0438/1715] hv_netvsc: make netvsc_destroy_buf void No caller checks the return value. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 830aae2d9510..471c476e7253 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -206,12 +206,12 @@ get_in_err: return net_device; } -static int netvsc_destroy_buf(struct hv_device *device) +static void netvsc_destroy_buf(struct hv_device *device) { struct nvsp_message *revoke_packet; - int ret = 0; struct net_device *ndev = hv_get_drvdata(device); struct netvsc_device *net_device = net_device_to_netvsc_device(ndev); + int ret; /* * If we got a section count, it means we received a @@ -241,7 +241,7 @@ static int netvsc_destroy_buf(struct hv_device *device) if (ret != 0) { netdev_err(ndev, "unable to send " "revoke receive buffer to netvsp\n"); - return ret; + return; } } @@ -256,7 +256,7 @@ static int netvsc_destroy_buf(struct hv_device *device) if (ret != 0) { netdev_err(ndev, "unable to teardown receive buffer's gpadl\n"); - return ret; + return; } net_device->recv_buf_gpadl_handle = 0; } @@ -300,7 +300,7 @@ static int netvsc_destroy_buf(struct hv_device *device) if (ret != 0) { netdev_err(ndev, "unable to send " "revoke send buffer to netvsp\n"); - return ret; + return; } } /* Teardown the gpadl on the vsp end */ @@ -314,7 +314,7 @@ static int netvsc_destroy_buf(struct hv_device *device) if (ret != 0) { netdev_err(ndev, "unable to teardown send buffer's gpadl\n"); - return ret; + return; } net_device->send_buf_gpadl_handle = 0; } @@ -324,8 +324,6 @@ static int netvsc_destroy_buf(struct hv_device *device) net_device->send_buf = NULL; } kfree(net_device->send_section_map); - - return ret; } static int netvsc_init_buf(struct hv_device *device) From 6c4c137e5035e0e17fa40c223fa0a3167e0f65fa Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 23 Aug 2016 12:17:55 -0700 Subject: [PATCH 0439/1715] hv_netvsc: make variable local The variable m_ret is only used in one basic block. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 471c476e7253..2a9ccc4d9e3c 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -956,7 +956,7 @@ int netvsc_send(struct hv_device *device, struct sk_buff *skb) { struct netvsc_device *net_device; - int ret = 0, m_ret = 0; + int ret = 0; struct vmbus_channel *out_channel; u16 q_idx = packet->q_idx; u32 pktlen = packet->total_data_buflen, msd_len = 0; @@ -1045,8 +1045,8 @@ int netvsc_send(struct hv_device *device, } if (msd_send) { - m_ret = netvsc_send_pkt(device, msd_send, net_device, - NULL, msd_skb); + int m_ret = netvsc_send_pkt(device, msd_send, net_device, + NULL, msd_skb); if (m_ret != 0) { netvsc_free_send_slot(net_device, From e3f74b841d482e962b9f5a907eeb25eeeb09aa60 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 23 Aug 2016 12:17:56 -0700 Subject: [PATCH 0440/1715] hv_netvsc: report vmbus name in ethtool Make netvsc on vmbus behave more like PCI. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 4 ++++ include/linux/hyperv.h | 7 +++++++ 2 files changed, 11 insertions(+) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 7ed9f13067b5..b874ab1317c6 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -734,8 +734,12 @@ vf_injection_done: static void netvsc_get_drvinfo(struct net_device *net, struct ethtool_drvinfo *info) { + struct net_device_context *net_device_ctx = netdev_priv(net); + struct hv_device *dev = net_device_ctx->device_ctx; + strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver)); strlcpy(info->fw_version, "N/A", sizeof(info->fw_version)); + strlcpy(info->bus_info, vmbus_dev_name(dev), sizeof(info->bus_info)); } static void netvsc_get_channels(struct net_device *net, diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index a6bc974def8f..b01c8c3dd531 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1114,6 +1114,13 @@ int __must_check __vmbus_driver_register(struct hv_driver *hv_driver, const char *mod_name); void vmbus_driver_unregister(struct hv_driver *hv_driver); +static inline const char *vmbus_dev_name(const struct hv_device *device_obj) +{ + const struct kobject *kobj = &device_obj->device.kobj; + + return kobj->name; +} + void vmbus_hvsock_device_unregister(struct vmbus_channel *channel); int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, From 4323b47cf8edfe95bd58e20965667e71121c866e Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 23 Aug 2016 12:17:57 -0700 Subject: [PATCH 0441/1715] hv_netvsc: add ethtool statistics for tx packet issues Printing console messages is not helpful when system is out of memory; and can be disastrous with netconsole. Instead keep statistics of these anomalous conditions. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 9 ++++ drivers/net/hyperv/netvsc_drv.c | 86 +++++++++++++++++++++++++++------ 2 files changed, 79 insertions(+), 16 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 8031deeb0a40..284b97b6b258 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -652,6 +652,14 @@ struct netvsc_stats { struct u64_stats_sync syncp; }; +struct netvsc_ethtool_stats { + unsigned long tx_scattered; + unsigned long tx_no_memory; + unsigned long tx_no_space; + unsigned long tx_too_big; + unsigned long tx_busy; +}; + struct netvsc_reconfig { struct list_head list; u32 event; @@ -681,6 +689,7 @@ struct net_device_context { /* Ethtool settings */ u8 duplex; u32 speed; + struct netvsc_ethtool_stats eth_stats; /* the device is going away */ bool start_remove; diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index b874ab1317c6..2360e704e271 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -365,7 +365,6 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) u32 skb_length; struct hv_page_buffer page_buf[MAX_PAGE_BUFFER_COUNT]; struct hv_page_buffer *pb = page_buf; - struct netvsc_stats *tx_stats = this_cpu_ptr(net_device_ctx->tx_stats); /* We will atmost need two pages to describe the rndis * header. We can only transmit MAX_PAGE_BUFFER_COUNT number @@ -377,17 +376,14 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) num_data_pgs = netvsc_get_slots(skb) + 2; if (unlikely(num_data_pgs > MAX_PAGE_BUFFER_COUNT)) { - if (skb_linearize(skb)) { - net_alert_ratelimited("failed to linearize skb\n"); - ret = -ENOMEM; - goto drop; - } + ++net_device_ctx->eth_stats.tx_scattered; + + if (skb_linearize(skb)) + goto no_memory; num_data_pgs = netvsc_get_slots(skb) + 2; if (num_data_pgs > MAX_PAGE_BUFFER_COUNT) { - net_alert_ratelimited("packet too big: %u pages (%u bytes)\n", - num_data_pgs, skb->len); - ret = -EFAULT; + ++net_device_ctx->eth_stats.tx_too_big; goto drop; } } @@ -398,11 +394,9 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) * structure. */ ret = skb_cow_head(skb, RNDIS_AND_PPI_SIZE); - if (ret) { - netdev_err(net, "unable to alloc hv_netvsc_packet\n"); - ret = -ENOMEM; - goto drop; - } + if (ret) + goto no_memory; + /* Use the skb control buffer for building up the packet */ BUILD_BUG_ON(sizeof(struct hv_netvsc_packet) > FIELD_SIZEOF(struct sk_buff, cb)); @@ -518,7 +512,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) ret = skb_cow_head(skb, 0); if (ret) - goto drop; + goto no_memory; uh = udp_hdr(skb); udp_len = ntohs(uh->len); @@ -545,20 +539,32 @@ do_send: ret = netvsc_send(net_device_ctx->device_ctx, packet, rndis_msg, &pb, skb); if (likely(ret == 0)) { + struct netvsc_stats *tx_stats = this_cpu_ptr(net_device_ctx->tx_stats); + u64_stats_update_begin(&tx_stats->syncp); tx_stats->packets++; tx_stats->bytes += skb_length; u64_stats_update_end(&tx_stats->syncp); return NETDEV_TX_OK; } - if (ret == -EAGAIN) + + if (ret == -EAGAIN) { + ++net_device_ctx->eth_stats.tx_busy; return NETDEV_TX_BUSY; + } + + if (ret == -ENOSPC) + ++net_device_ctx->eth_stats.tx_no_space; drop: dev_kfree_skb_any(skb); net->stats.tx_dropped++; return NETDEV_TX_OK; + +no_memory: + ++net_device_ctx->eth_stats.tx_no_memory; + goto drop; } /* @@ -1015,6 +1021,51 @@ static int netvsc_set_mac_addr(struct net_device *ndev, void *p) return err; } +static const struct { + char name[ETH_GSTRING_LEN]; + u16 offset; +} netvsc_stats[] = { + { "tx_scattered", offsetof(struct netvsc_ethtool_stats, tx_scattered) }, + { "tx_no_memory", offsetof(struct netvsc_ethtool_stats, tx_no_memory) }, + { "tx_no_space", offsetof(struct netvsc_ethtool_stats, tx_no_space) }, + { "tx_too_big", offsetof(struct netvsc_ethtool_stats, tx_too_big) }, + { "tx_busy", offsetof(struct netvsc_ethtool_stats, tx_busy) }, +}; + +static int netvsc_get_sset_count(struct net_device *dev, int string_set) +{ + switch (string_set) { + case ETH_SS_STATS: + return ARRAY_SIZE(netvsc_stats); + default: + return -EINVAL; + } +} + +static void netvsc_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *data) +{ + struct net_device_context *ndc = netdev_priv(dev); + const void *nds = &ndc->eth_stats; + int i; + + for (i = 0; i < ARRAY_SIZE(netvsc_stats); i++) + data[i] = *(unsigned long *)(nds + netvsc_stats[i].offset); +} + +static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data) +{ + int i; + + switch (stringset) { + case ETH_SS_STATS: + for (i = 0; i < ARRAY_SIZE(netvsc_stats); i++) + memcpy(data + i * ETH_GSTRING_LEN, + netvsc_stats[i].name, ETH_GSTRING_LEN); + break; + } +} + #ifdef CONFIG_NET_POLL_CONTROLLER static void netvsc_poll_controller(struct net_device *net) { @@ -1027,6 +1078,9 @@ static void netvsc_poll_controller(struct net_device *net) static const struct ethtool_ops ethtool_ops = { .get_drvinfo = netvsc_get_drvinfo, .get_link = ethtool_op_get_link, + .get_ethtool_stats = netvsc_get_ethtool_stats, + .get_sset_count = netvsc_get_sset_count, + .get_strings = netvsc_get_strings, .get_channels = netvsc_get_channels, .set_channels = netvsc_set_channels, .get_ts_info = ethtool_op_get_ts_info, From cff6a334e63420e95ec40dc7fcdc0b8258615760 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Tue, 23 Aug 2016 11:55:30 -0700 Subject: [PATCH 0442/1715] strparser: Queue work when being unpaused When the upper layer unpauses a stream parser connection we need to queue rx_work to make sure no events are missed. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/strparser.h | 5 +---- net/strparser/strparser.c | 11 +++++++++++ 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/include/net/strparser.h b/include/net/strparser.h index fdb3d6746cc4..91fa0b958426 100644 --- a/include/net/strparser.h +++ b/include/net/strparser.h @@ -88,10 +88,7 @@ static inline void strp_pause(struct strparser *strp) } /* May be called without holding lock for attached socket */ -static inline void strp_unpause(struct strparser *strp) -{ - strp->rx_paused = 0; -} +void strp_unpause(struct strparser *strp); static inline void save_strp_stats(struct strparser *strp, struct strp_aggr_stats *agg_stats) diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index 68334b56db1e..4ecfc10cbe6d 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -445,6 +445,17 @@ int strp_init(struct strparser *strp, struct sock *csk, } EXPORT_SYMBOL_GPL(strp_init); +void strp_unpause(struct strparser *strp) +{ + strp->rx_paused = 0; + + /* Sync setting rx_paused with RX work */ + smp_mb(); + + queue_work(strp_wq, &strp->rx_work); +} +EXPORT_SYMBOL_GPL(strp_unpause); + /* strp must already be stopped so that strp_tcp_recv will no longer be called. * Note that strp_done is not called with the lower socket held. */ From 1616b38f201945f5fc88aa09b525e3625777aa7c Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Tue, 23 Aug 2016 11:55:31 -0700 Subject: [PATCH 0443/1715] kcm: Fix locking issue Lock the lower socket in kcm_unattach. Release during call to strp_done since that function cancels the RX timers and work queue with sync. Also added some status information in psock reporting. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/kcm/kcmproc.c | 20 +++++++++++++++----- net/kcm/kcmsock.c | 13 ++++++++----- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c index 47e445364f4f..bf75c9231cca 100644 --- a/net/kcm/kcmproc.c +++ b/net/kcm/kcmproc.c @@ -173,14 +173,24 @@ static void kcm_format_psock(struct kcm_psock *psock, struct seq_file *seq, if (psock->strp.rx_stopped) seq_puts(seq, "RxStop "); - if (psock->strp.rx_paused) - seq_puts(seq, "RxPause "); - if (psock->tx_kcm) seq_printf(seq, "Rsvd-%d ", psock->tx_kcm->index); - if (psock->ready_rx_msg) - seq_puts(seq, "RdyRx "); + if (!psock->strp.rx_paused && !psock->ready_rx_msg) { + if (psock->sk->sk_receive_queue.qlen) { + if (psock->strp.rx_need_bytes) + seq_printf(seq, "RxWait=%u ", + psock->strp.rx_need_bytes); + else + seq_printf(seq, "RxWait "); + } + } else { + if (psock->strp.rx_paused) + seq_puts(seq, "RxPause "); + + if (psock->ready_rx_msg) + seq_puts(seq, "RdyRx "); + } seq_puts(seq, "\n"); } diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index eedbe404af35..eb731cacc325 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1477,12 +1477,13 @@ out: return err; } -/* Lower socket lock held */ static void kcm_unattach(struct kcm_psock *psock) { struct sock *csk = psock->sk; struct kcm_mux *mux = psock->mux; + lock_sock(csk); + /* Stop getting callbacks from TCP socket. After this there should * be no way to reserve a kcm for this psock. */ @@ -1514,7 +1515,10 @@ static void kcm_unattach(struct kcm_psock *psock) write_unlock_bh(&csk->sk_callback_lock); + /* Call strp_done without sock lock */ + release_sock(csk); strp_done(&psock->strp); + lock_sock(csk); bpf_prog_put(psock->bpf_prog); @@ -1564,6 +1568,8 @@ no_reserved: fput(csk->sk_socket->file); kmem_cache_free(kcm_psockp, psock); } + + release_sock(csk); } static int kcm_unattach_ioctl(struct socket *sock, struct kcm_unattach *info) @@ -1749,11 +1755,8 @@ static void release_mux(struct kcm_mux *mux) /* Release psocks */ list_for_each_entry_safe(psock, tmp_psock, &mux->psocks, psock_list) { - if (!WARN_ON(psock->unattaching)) { - lock_sock(psock->strp.sk); + if (!WARN_ON(psock->unattaching)) kcm_unattach(psock); - release_sock(psock->strp.sk); - } } if (WARN_ON(mux->psocks_cnt)) From 54c151d9ed1321e6e623c80ffe42cd2eb1571744 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Mon, 22 Aug 2016 22:50:02 +0800 Subject: [PATCH 0444/1715] l2tp: Refactor the codes with existing macros instead of literal number Use PPP_ALLSTATIONS, PPP_UI, and SEND_SHUTDOWN instead of 0xff, 0x03, and 2 separately. Signed-off-by: Gao Feng Acked-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_ppp.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index d9560aa2dba3..254ee662b214 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -177,7 +177,7 @@ static int pppol2tp_recv_payload_hook(struct sk_buff *skb) if (!pskb_may_pull(skb, 2)) return 1; - if ((skb->data[0] == 0xff) && (skb->data[1] == 0x03)) + if ((skb->data[0] == PPP_ALLSTATIONS) && (skb->data[1] == PPP_UI)) skb_pull(skb, 2); return 0; @@ -282,7 +282,6 @@ static void pppol2tp_session_sock_put(struct l2tp_session *session) static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) { - static const unsigned char ppph[2] = { 0xff, 0x03 }; struct sock *sk = sock->sk; struct sk_buff *skb; int error; @@ -312,7 +311,7 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m, error = -ENOMEM; skb = sock_wmalloc(sk, NET_SKB_PAD + sizeof(struct iphdr) + uhlen + session->hdr_len + - sizeof(ppph) + total_len, + 2 + total_len, /* 2 bytes for PPP_ALLSTATIONS & PPP_UI */ 0, GFP_KERNEL); if (!skb) goto error_put_sess_tun; @@ -325,8 +324,8 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m, skb_reserve(skb, uhlen); /* Add PPP header */ - skb->data[0] = ppph[0]; - skb->data[1] = ppph[1]; + skb->data[0] = PPP_ALLSTATIONS; + skb->data[1] = PPP_UI; skb_put(skb, 2); /* Copy user data into skb */ @@ -369,7 +368,6 @@ error: */ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) { - static const u8 ppph[2] = { 0xff, 0x03 }; struct sock *sk = (struct sock *) chan->private; struct sock *sk_tun; struct l2tp_session *session; @@ -398,14 +396,14 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) sizeof(struct iphdr) + /* IP header */ uhlen + /* UDP header (if L2TP_ENCAPTYPE_UDP) */ session->hdr_len + /* L2TP header */ - sizeof(ppph); /* PPP header */ + 2; /* 2 bytes for PPP_ALLSTATIONS & PPP_UI */ if (skb_cow_head(skb, headroom)) goto abort_put_sess_tun; /* Setup PPP header */ - __skb_push(skb, sizeof(ppph)); - skb->data[0] = ppph[0]; - skb->data[1] = ppph[1]; + __skb_push(skb, 2); + skb->data[0] = PPP_ALLSTATIONS; + skb->data[1] = PPP_UI; local_bh_disable(); l2tp_xmit_skb(session, skb, session->hdr_len); @@ -440,7 +438,7 @@ static void pppol2tp_session_close(struct l2tp_session *session) BUG_ON(session->magic != L2TP_SESSION_MAGIC); if (sock) { - inet_shutdown(sock, 2); + inet_shutdown(sock, SEND_SHUTDOWN); /* Don't let the session go away before our socket does */ l2tp_session_inc_refcount(session); } From c41419b091f371d846bc5292c9138a78b7e378fe Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Mon, 22 Aug 2016 17:52:00 +0200 Subject: [PATCH 0445/1715] liquidio: declare liquidio_set_rxcsum_command static liquidio_set_rxcsum_command is a local function only, no need to expose it outside of lio_main.c so declare it static and make sparse happy. Signed-off-by: Nicholas Mc Guire Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/liquidio/lio_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index 20d6942edf40..f659a95ffc94 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -3190,8 +3190,8 @@ static int liquidio_vlan_rx_kill_vid(struct net_device *netdev, * OCTNET_CMD_RXCSUM_DISABLE * @returns SUCCESS or FAILURE */ -int liquidio_set_rxcsum_command(struct net_device *netdev, int command, - u8 rx_cmd) +static int liquidio_set_rxcsum_command(struct net_device *netdev, int command, + u8 rx_cmd) { struct lio *lio = GET_LIO(netdev); struct octeon_device *oct = lio->oct_dev; From 1345b1ac57a1b85d73912bd13c2bad5f9f26df91 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 22 Aug 2016 23:01:29 +0000 Subject: [PATCH 0446/1715] xen-netback: using kfree_rcu() to simplify the code The callback function of call_rcu() just calls a kfree(), so we can use kfree_rcu() instead of call_rcu() + callback function. Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/xen-netback/hash.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/drivers/net/xen-netback/hash.c b/drivers/net/xen-netback/hash.c index 282b16d8093a..e8c5dddc54ba 100644 --- a/drivers/net/xen-netback/hash.c +++ b/drivers/net/xen-netback/hash.c @@ -32,15 +32,6 @@ #include #include -static void xenvif_del_hash(struct rcu_head *rcu) -{ - struct xenvif_hash_cache_entry *entry; - - entry = container_of(rcu, struct xenvif_hash_cache_entry, rcu); - - kfree(entry); -} - static void xenvif_add_hash(struct xenvif *vif, const u8 *tag, unsigned int len, u32 val) { @@ -76,7 +67,7 @@ static void xenvif_add_hash(struct xenvif *vif, const u8 *tag, if (++vif->hash.cache.count > xenvif_hash_cache_size) { list_del_rcu(&oldest->link); vif->hash.cache.count--; - call_rcu(&oldest->rcu, xenvif_del_hash); + kfree_rcu(oldest, rcu); } } @@ -114,7 +105,7 @@ static void xenvif_flush_hash(struct xenvif *vif) list_for_each_entry_rcu(entry, &vif->hash.cache.list, link) { list_del_rcu(&entry->link); vif->hash.cache.count--; - call_rcu(&entry->rcu, xenvif_del_hash); + kfree_rcu(entry, rcu); } spin_unlock_irqrestore(&vif->hash.cache.lock, flags); From cebc5cbab48f5c58512e26aa1965284354227258 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Mon, 22 Aug 2016 17:17:54 -0700 Subject: [PATCH 0447/1715] net-tcp: retire TFO_SERVER_WO_SOCKOPT2 config TFO_SERVER_WO_SOCKOPT2 was intended for debugging purposes during Fast Open development. Remove this config option and also update/clean-up the documentation of the Fast Open sysctl. Reported-by: Piotr Jurkiewicz Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 45 +++++++++++++------------- include/net/tcp.h | 3 +- net/ipv4/af_inet.c | 21 +++++------- 3 files changed, 32 insertions(+), 37 deletions(-) diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 9ae929395b24..3db8c67d2c8d 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -575,32 +575,33 @@ tcp_syncookies - BOOLEAN unconditionally generation of syncookies. tcp_fastopen - INTEGER - Enable TCP Fast Open feature (draft-ietf-tcpm-fastopen) to send data - in the opening SYN packet. To use this feature, the client application - must use sendmsg() or sendto() with MSG_FASTOPEN flag rather than - connect() to perform a TCP handshake automatically. + Enable TCP Fast Open (RFC7413) to send and accept data in the opening + SYN packet. + + The client support is enabled by flag 0x1 (on by default). The client + then must use sendmsg() or sendto() with the MSG_FASTOPEN flag, + rather than connect() to send data in SYN. + + The server support is enabled by flag 0x2 (off by default). Then + either enable for all listeners with another flag (0x400) or + enable individual listeners via TCP_FASTOPEN socket option with + the option value being the length of the syn-data backlog. The values (bitmap) are - 1: Enables sending data in the opening SYN on the client w/ MSG_FASTOPEN. - 2: Enables TCP Fast Open on the server side, i.e., allowing data in - a SYN packet to be accepted and passed to the application before - 3-way hand shake finishes. - 4: Send data in the opening SYN regardless of cookie availability and - without a cookie option. - 0x100: Accept SYN data w/o validating the cookie. - 0x200: Accept data-in-SYN w/o any cookie option present. - 0x400/0x800: Enable Fast Open on all listeners regardless of the - TCP_FASTOPEN socket option. The two different flags designate two - different ways of setting max_qlen without the TCP_FASTOPEN socket - option. + 0x1: (client) enables sending data in the opening SYN on the client. + 0x2: (server) enables the server support, i.e., allowing data in + a SYN packet to be accepted and passed to the + application before 3-way handshake finishes. + 0x4: (client) send data in the opening SYN regardless of cookie + availability and without a cookie option. + 0x200: (server) accept data-in-SYN w/o any cookie option present. + 0x400: (server) enable all listeners to support Fast Open by + default without explicit TCP_FASTOPEN socket option. - Default: 1 + Default: 0x1 - Note that the client & server side Fast Open flags (1 and 2 - respectively) must be also enabled before the rest of flags can take - effect. - - See include/net/tcp.h and the code for more details. + Note that that additional client or server features are only + effective if the basic support (0x1 and 0x2) are enabled respectively. tcp_syn_retries - INTEGER Number of times initial SYNs for an active TCP connection attempt diff --git a/include/net/tcp.h b/include/net/tcp.h index c00e7d51bb18..25d64f6de69e 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -227,10 +227,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); #define TFO_SERVER_COOKIE_NOT_REQD 0x200 /* Force enable TFO on all listeners, i.e., not requiring the - * TCP_FASTOPEN socket option. SOCKOPT1/2 determine how to set max_qlen. + * TCP_FASTOPEN socket option. */ #define TFO_SERVER_WO_SOCKOPT1 0x400 -#define TFO_SERVER_WO_SOCKOPT2 0x800 extern struct inet_timewait_death_row tcp_death_row; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 55513e654d79..989a362814a9 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -211,24 +211,19 @@ int inet_listen(struct socket *sock, int backlog) * we can only allow the backlog to be adjusted. */ if (old_state != TCP_LISTEN) { - /* Check special setups for testing purpose to enable TFO w/o - * requiring TCP_FASTOPEN sockopt. + /* Enable TFO w/o requiring TCP_FASTOPEN socket option. * Note that only TCP sockets (SOCK_STREAM) will reach here. - * Also fastopenq may already been allocated because this - * socket was in TCP_LISTEN state previously but was - * shutdown() (rather than close()). + * Also fastopen backlog may already been set via the option + * because the socket was in TCP_LISTEN state previously but + * was shutdown() rather than close(). */ - if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) != 0 && + if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) && + (sysctl_tcp_fastopen & TFO_SERVER_ENABLE) && !inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) { - if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) != 0) - fastopen_queue_tune(sk, backlog); - else if ((sysctl_tcp_fastopen & - TFO_SERVER_WO_SOCKOPT2) != 0) - fastopen_queue_tune(sk, - ((uint)sysctl_tcp_fastopen) >> 16); - + fastopen_queue_tune(sk, backlog); tcp_fastopen_init_key_once(true); } + err = inet_csk_listen_start(sk, backlog); if (err) goto out; From e7b48a32dc5db48ca14e6472b920102c7e53f5aa Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Tue, 23 Aug 2016 11:35:32 +0530 Subject: [PATCH 0448/1715] cxgb4: Fix issue while re-registering VF mgmt netdev When we disable SRIOV, we used to unregister the netdev but wasn't freed. But next time when the same netdev is registered, since the state was in 'NETREG_UNREGISTERED', we used to hit BUG_ON in register_netdevice, where it expects the state to be 'NETREG_UNINITIALIZED'. Alloc netdev and register them while configuring SRIOV, and free them when SRIOV is disabled. Also added a new function to setup ethernet properties instead of using ether_setup. Set carrier off by default, since we don't have to do any transmit on the interface. Fixes: 7829451c695e ("cxgb4: Add control net_device for configuring PCIe VF") Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 1 + .../net/ethernet/chelsio/cxgb4/cxgb4_main.c | 97 +++++++++++++------ 2 files changed, 67 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index f988c600c68d..3f7b33aa5ec5 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -798,6 +798,7 @@ struct adapter { unsigned int mbox; unsigned int pf; unsigned int flags; + unsigned int adap_idx; enum chip_type chip; int msg_enable; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index be5c942ad8e0..44019bdd526d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -3085,6 +3085,15 @@ static int cxgb_change_mtu(struct net_device *dev, int new_mtu) } #ifdef CONFIG_PCI_IOV +static int dummy_open(struct net_device *dev) +{ + /* Turn carrier off since we don't have to transmit anything on this + * interface. + */ + netif_carrier_off(dev); + return 0; +} + static int cxgb_set_vf_mac(struct net_device *dev, int vf, u8 *mac) { struct port_info *pi = netdev_priv(dev); @@ -3246,11 +3255,12 @@ static const struct net_device_ops cxgb4_netdev_ops = { .ndo_set_tx_maxrate = cxgb_set_tx_maxrate, }; -static const struct net_device_ops cxgb4_mgmt_netdev_ops = { #ifdef CONFIG_PCI_IOV +static const struct net_device_ops cxgb4_mgmt_netdev_ops = { + .ndo_open = dummy_open, .ndo_set_vf_mac = cxgb_set_vf_mac, -#endif }; +#endif static void get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { @@ -5023,6 +5033,51 @@ static int get_chip_type(struct pci_dev *pdev, u32 pl_rev) } #ifdef CONFIG_PCI_IOV +static void dummy_setup(struct net_device *dev) +{ + dev->type = ARPHRD_NONE; + dev->mtu = 0; + dev->hard_header_len = 0; + dev->addr_len = 0; + dev->tx_queue_len = 0; + dev->flags |= IFF_NOARP; + dev->priv_flags |= IFF_NO_QUEUE; + + /* Initialize the device structure. */ + dev->netdev_ops = &cxgb4_mgmt_netdev_ops; + dev->ethtool_ops = &cxgb4_mgmt_ethtool_ops; + dev->destructor = free_netdev; +} + +static int config_mgmt_dev(struct pci_dev *pdev) +{ + struct adapter *adap = pci_get_drvdata(pdev); + struct net_device *netdev; + struct port_info *pi; + char name[IFNAMSIZ]; + int err; + + snprintf(name, IFNAMSIZ, "mgmtpf%d%d", adap->adap_idx, adap->pf); + netdev = alloc_netdev(0, name, NET_NAME_UNKNOWN, dummy_setup); + if (!netdev) + return -ENOMEM; + + pi = netdev_priv(netdev); + pi->adapter = adap; + SET_NETDEV_DEV(netdev, &pdev->dev); + + adap->port[0] = netdev; + + err = register_netdev(adap->port[0]); + if (err) { + pr_info("Unable to register VF mgmt netdev %s\n", name); + free_netdev(adap->port[0]); + adap->port[0] = NULL; + return err; + } + return 0; +} + static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs) { struct adapter *adap = pci_get_drvdata(pdev); @@ -5057,8 +5112,10 @@ static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs) */ if (!num_vfs) { pci_disable_sriov(pdev); - if (adap->port[0]->reg_state == NETREG_REGISTERED) + if (adap->port[0]) { unregister_netdev(adap->port[0]); + adap->port[0] = NULL; + } return num_vfs; } @@ -5067,11 +5124,9 @@ static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs) if (err) return err; - if (adap->port[0]->reg_state == NETREG_UNINITIALIZED) { - err = register_netdev(adap->port[0]); - if (err < 0) - pr_info("Unable to register VF mgmt netdev\n"); - } + err = config_mgmt_dev(pdev); + if (err) + return err; } return num_vfs; } @@ -5084,9 +5139,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) bool highdma = false; struct adapter *adapter = NULL; struct net_device *netdev; -#ifdef CONFIG_PCI_IOV - char name[IFNAMSIZ]; -#endif void __iomem *regs; u32 whoami, pl_rev; enum chip_type chip; @@ -5447,40 +5499,24 @@ sriov: goto free_pci_region; } - snprintf(name, IFNAMSIZ, "mgmtpf%d%d", adap_idx, func); - netdev = alloc_netdev(0, name, NET_NAME_UNKNOWN, ether_setup); - if (!netdev) { - err = -ENOMEM; - goto free_adapter; - } - adapter->pdev = pdev; adapter->pdev_dev = &pdev->dev; adapter->name = pci_name(pdev); adapter->mbox = func; adapter->pf = func; adapter->regs = regs; + adapter->adap_idx = adap_idx; adapter->mbox_log = kzalloc(sizeof(*adapter->mbox_log) + (sizeof(struct mbox_cmd) * T4_OS_LOG_MBOX_CMDS), GFP_KERNEL); if (!adapter->mbox_log) { err = -ENOMEM; - goto free_netdevice; + goto free_adapter; } - pi = netdev_priv(netdev); - pi->adapter = adapter; - SET_NETDEV_DEV(netdev, &pdev->dev); pci_set_drvdata(pdev, adapter); - - adapter->port[0] = netdev; - netdev->netdev_ops = &cxgb4_mgmt_netdev_ops; - netdev->ethtool_ops = &cxgb4_mgmt_ethtool_ops; - return 0; - free_netdevice: - free_netdev(adapter->port[0]); free_adapter: kfree(adapter); free_pci_region: @@ -5582,9 +5618,8 @@ static void remove_one(struct pci_dev *pdev) } #ifdef CONFIG_PCI_IOV else { - if (adapter->port[0]->reg_state == NETREG_REGISTERED) + if (adapter->port[0]) unregister_netdev(adapter->port[0]); - free_netdev(adapter->port[0]); iounmap(adapter->regs); kfree(adapter); pci_disable_sriov(pdev); From f8edcd127b5fa2a82bec22b204b434dc363011b2 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Tue, 23 Aug 2016 13:14:31 +0200 Subject: [PATCH 0449/1715] net: rtnetlink: Don't export empty RTAX_FEATURES Since the features bit field has bits for internal only use as well, it may happen that the kernel exports RTAX_FEATURES attribute with zero value which is pointless. Fix this by making sure the attribute is added only if the exported value is non-zero. Signed-off-by: Phil Sutter Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 189cc78c77eb..318fc5231b2b 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -704,6 +704,8 @@ int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics) } else if (i == RTAX_FEATURES - 1) { u32 user_features = metrics[i] & RTAX_FEATURE_MASK; + if (!user_features) + continue; BUILD_BUG_ON(RTAX_FEATURE_MASK & DST_FEATURE_MASK); if (nla_put_u32(skb, i + 1, user_features)) goto nla_put_failure; From 9a4d7e86acf3be8c0c911a552f903a10d0eea814 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Tue, 23 Aug 2016 10:56:55 -0400 Subject: [PATCH 0450/1715] qede: Add support for Tx/Rx-only queues. Add provision for configuring the fastpath queues with Tx (or Rx) only functionality. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede.h | 30 +- .../net/ethernet/qlogic/qede/qede_ethtool.c | 105 ++++-- drivers/net/ethernet/qlogic/qede/qede_main.c | 325 +++++++++++------- 3 files changed, 300 insertions(+), 160 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index 700b509f7143..e01adce4a966 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -126,16 +126,22 @@ struct qede_dev { (edev)->dev_info.num_tc) struct qede_fastpath *fp_array; - u16 req_rss; - u16 num_rss; + u8 req_num_tx; + u8 fp_num_tx; + u8 req_num_rx; + u8 fp_num_rx; + u16 req_queues; + u16 num_queues; u8 num_tc; -#define QEDE_RSS_CNT(edev) ((edev)->num_rss) -#define QEDE_TSS_CNT(edev) ((edev)->num_rss * \ - (edev)->num_tc) -#define QEDE_TSS_IDX(edev, txqidx) ((txqidx) % (edev)->num_rss) -#define QEDE_TC_IDX(edev, txqidx) ((txqidx) / (edev)->num_rss) +#define QEDE_QUEUE_CNT(edev) ((edev)->num_queues) +#define QEDE_RSS_COUNT(edev) ((edev)->num_queues - (edev)->fp_num_tx) +#define QEDE_TSS_COUNT(edev) (((edev)->num_queues - (edev)->fp_num_rx) * \ + (edev)->num_tc) +#define QEDE_TX_IDX(edev, txqidx) ((edev)->fp_num_rx + (txqidx) % \ + QEDE_TSS_COUNT(edev)) +#define QEDE_TC_IDX(edev, txqidx) ((txqidx) / QEDE_TSS_COUNT(edev)) #define QEDE_TX_QUEUE(edev, txqidx) \ - (&(edev)->fp_array[QEDE_TSS_IDX((edev), (txqidx))].txqs[QEDE_TC_IDX( \ + (&(edev)->fp_array[QEDE_TX_IDX((edev), (txqidx))].txqs[QEDE_TC_IDX(\ (edev), (txqidx))]) struct qed_int_info int_info; @@ -284,7 +290,11 @@ struct qede_tx_queue { struct qede_fastpath { struct qede_dev *edev; - u8 rss_id; +#define QEDE_FASTPATH_TX BIT(0) +#define QEDE_FASTPATH_RX BIT(1) +#define QEDE_FASTPATH_COMBINED (QEDE_FASTPATH_TX | QEDE_FASTPATH_RX) + u8 type; + u8 id; struct napi_struct napi; struct qed_sb_info *sb_info; struct qede_rx_queue *rxq; @@ -344,6 +354,6 @@ void qede_recycle_rx_bd_ring(struct qede_rx_queue *rxq, struct qede_dev *edev, #define QEDE_MIN_PKT_LEN 64 #define QEDE_RX_HDR_SIZE 256 -#define for_each_rss(i) for (i = 0; i < edev->num_rss; i++) +#define for_each_queue(i) for (i = 0; i < edev->num_queues; i++) #endif /* _QEDE_H_ */ diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index f6b8899cda7f..4d45945bc34c 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -172,7 +172,7 @@ static void qede_get_strings_stats(struct qede_dev *edev, u8 *buf) { int i, j, k; - for (i = 0, k = 0; i < edev->num_rss; i++) { + for (i = 0, k = 0; i < QEDE_QUEUE_CNT(edev); i++) { int tc; for (j = 0; j < QEDE_NUM_RQSTATS; j++) @@ -230,15 +230,21 @@ static void qede_get_ethtool_stats(struct net_device *dev, mutex_lock(&edev->qede_lock); - for (qid = 0; qid < edev->num_rss; qid++) { + for (qid = 0; qid < QEDE_QUEUE_CNT(edev); qid++) { int tc; - for (sidx = 0; sidx < QEDE_NUM_RQSTATS; sidx++) - buf[cnt++] = QEDE_RQSTATS_DATA(edev, sidx, qid); - for (tc = 0; tc < edev->num_tc; tc++) { - for (sidx = 0; sidx < QEDE_NUM_TQSTATS; sidx++) - buf[cnt++] = QEDE_TQSTATS_DATA(edev, sidx, qid, - tc); + if (edev->fp_array[qid].type & QEDE_FASTPATH_RX) { + for (sidx = 0; sidx < QEDE_NUM_RQSTATS; sidx++) + buf[cnt++] = QEDE_RQSTATS_DATA(edev, sidx, qid); + } + + if (edev->fp_array[qid].type & QEDE_FASTPATH_TX) { + for (tc = 0; tc < edev->num_tc; tc++) { + for (sidx = 0; sidx < QEDE_NUM_TQSTATS; sidx++) + buf[cnt++] = QEDE_TQSTATS_DATA(edev, + sidx, + qid, tc); + } } } @@ -265,8 +271,8 @@ static int qede_get_sset_count(struct net_device *dev, int stringset) if (qede_stats_arr[i].pf_only) num_stats--; } - return num_stats + edev->num_rss * - (QEDE_NUM_RQSTATS + QEDE_NUM_TQSTATS * edev->num_tc); + return num_stats + QEDE_RSS_COUNT(edev) * QEDE_NUM_RQSTATS + + QEDE_TSS_COUNT(edev) * QEDE_NUM_TQSTATS * edev->num_tc; case ETH_SS_PRIV_FLAGS: return QEDE_PRI_FLAG_LEN; case ETH_SS_TEST: @@ -576,7 +582,7 @@ static int qede_set_coalesce(struct net_device *dev, rxc = (u16)coal->rx_coalesce_usecs; txc = (u16)coal->tx_coalesce_usecs; - for_each_rss(i) { + for_each_queue(i) { sb_id = edev->fp_array[i].sb_info->igu_sb_id; rc = edev->ops->common->set_coalesce(edev->cdev, rxc, txc, (u8)i, sb_id); @@ -728,45 +734,70 @@ static void qede_get_channels(struct net_device *dev, struct qede_dev *edev = netdev_priv(dev); channels->max_combined = QEDE_MAX_RSS_CNT(edev); - channels->combined_count = QEDE_RSS_CNT(edev); + channels->combined_count = QEDE_QUEUE_CNT(edev) - edev->fp_num_tx - + edev->fp_num_rx; + channels->tx_count = edev->fp_num_tx; + channels->rx_count = edev->fp_num_rx; } static int qede_set_channels(struct net_device *dev, struct ethtool_channels *channels) { struct qede_dev *edev = netdev_priv(dev); + u32 count; DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN), "set-channels command parameters: rx = %d, tx = %d, other = %d, combined = %d\n", channels->rx_count, channels->tx_count, channels->other_count, channels->combined_count); - /* We don't support separate rx / tx, nor `other' channels. */ - if (channels->rx_count || channels->tx_count || - channels->other_count || (channels->combined_count == 0) || - (channels->combined_count > QEDE_MAX_RSS_CNT(edev))) { + count = channels->rx_count + channels->tx_count + + channels->combined_count; + + /* We don't support `other' channels */ + if (channels->other_count) { DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN), "command parameters not supported\n"); return -EINVAL; } + if (!(channels->combined_count || (channels->rx_count && + channels->tx_count))) { + DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN), + "need to request at least one transmit and one receive channel\n"); + return -EINVAL; + } + + if (count > QEDE_MAX_RSS_CNT(edev)) { + DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN), + "requested channels = %d max supported channels = %d\n", + count, QEDE_MAX_RSS_CNT(edev)); + return -EINVAL; + } + /* Check if there was a change in the active parameters */ - if (channels->combined_count == QEDE_RSS_CNT(edev)) { + if ((count == QEDE_QUEUE_CNT(edev)) && + (channels->tx_count == edev->fp_num_tx) && + (channels->rx_count == edev->fp_num_rx)) { DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN), "No change in active parameters\n"); return 0; } /* We need the number of queues to be divisible between the hwfns */ - if (channels->combined_count % edev->dev_info.common.num_hwfns) { + if ((count % edev->dev_info.common.num_hwfns) || + (channels->tx_count % edev->dev_info.common.num_hwfns) || + (channels->rx_count % edev->dev_info.common.num_hwfns)) { DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN), - "Number of channels must be divisable by %04x\n", + "Number of channels must be divisible by %04x\n", edev->dev_info.common.num_hwfns); return -EINVAL; } /* Set number of queues and reload if necessary */ - edev->req_rss = channels->combined_count; + edev->req_queues = count; + edev->req_num_tx = channels->tx_count; + edev->req_num_rx = channels->rx_count; if (netif_running(dev)) qede_reload(edev, NULL, NULL); @@ -836,7 +867,7 @@ static int qede_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, switch (info->cmd) { case ETHTOOL_GRXRINGS: - info->data = edev->num_rss; + info->data = QEDE_RSS_COUNT(edev); return 0; case ETHTOOL_GRXFH: return qede_get_rss_flags(edev, info); @@ -1039,7 +1070,7 @@ static void qede_netif_start(struct qede_dev *edev) if (!netif_running(edev->ndev)) return; - for_each_rss(i) { + for_each_queue(i) { /* Update and reenable interrupts */ qed_sb_ack(edev->fp_array[i].sb_info, IGU_INT_ENABLE, 1); napi_enable(&edev->fp_array[i].napi); @@ -1051,7 +1082,7 @@ static void qede_netif_stop(struct qede_dev *edev) { int i; - for_each_rss(i) { + for_each_queue(i) { napi_disable(&edev->fp_array[i].napi); /* Disable interrupts */ qed_sb_ack(edev->fp_array[i].sb_info, IGU_INT_DISABLE, 0); @@ -1061,11 +1092,23 @@ static void qede_netif_stop(struct qede_dev *edev) static int qede_selftest_transmit_traffic(struct qede_dev *edev, struct sk_buff *skb) { - struct qede_tx_queue *txq = &edev->fp_array[0].txqs[0]; + struct qede_tx_queue *txq = NULL; struct eth_tx_1st_bd *first_bd; dma_addr_t mapping; int i, idx, val; + for_each_queue(i) { + if (edev->fp_array[i].type & QEDE_FASTPATH_TX) { + txq = edev->fp_array[i].txqs; + break; + } + } + + if (!txq) { + DP_NOTICE(edev, "Tx path is not available\n"); + return -1; + } + /* Fill the entry in the SW ring and the BDs in the FW ring */ idx = txq->sw_tx_prod & NUM_TX_BDS_MAX; txq->sw_tx_ring[idx].skb = skb; @@ -1129,14 +1172,26 @@ static int qede_selftest_transmit_traffic(struct qede_dev *edev, static int qede_selftest_receive_traffic(struct qede_dev *edev) { - struct qede_rx_queue *rxq = edev->fp_array[0].rxq; u16 hw_comp_cons, sw_comp_cons, sw_rx_index, len; struct eth_fast_path_rx_reg_cqe *fp_cqe; + struct qede_rx_queue *rxq = NULL; struct sw_rx_data *sw_rx_data; union eth_rx_cqe *cqe; u8 *data_ptr; int i; + for_each_queue(i) { + if (edev->fp_array[i].type & QEDE_FASTPATH_RX) { + rxq = edev->fp_array[i].rxq; + break; + } + } + + if (!rxq) { + DP_NOTICE(edev, "Rx path is not available\n"); + return -1; + } + /* The packet is expected to receive on rx-queue 0 even though RSS is * enabled. This is because the queue 0 is configured as the default * queue and that the loopback traffic is not IP. diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index ac126e6067ae..4056219591c9 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -519,7 +519,7 @@ static netdev_tx_t qede_start_xmit(struct sk_buff *skb, /* Get tx-queue context and netdev index */ txq_index = skb_get_queue_mapping(skb); - WARN_ON(txq_index >= QEDE_TSS_CNT(edev)); + WARN_ON(txq_index >= QEDE_TSS_COUNT(edev)); txq = QEDE_TX_QUEUE(edev, txq_index); netdev_txq = netdev_get_tx_queue(ndev, txq_index); @@ -1203,7 +1203,7 @@ static void qede_gro_receive(struct qede_dev *edev, #endif send_skb: - skb_record_rx_queue(skb, fp->rss_id); + skb_record_rx_queue(skb, fp->rxq->rxq_id); qede_skb_receive(edev, fp, skb, vlan_tag); } @@ -1407,7 +1407,7 @@ static int qede_rx_int(struct qede_fastpath *fp, int budget) if (unlikely(cqe_type == ETH_RX_CQE_TYPE_SLOW_PATH)) { edev->ops->eth_cqe_completion( - edev->cdev, fp->rss_id, + edev->cdev, fp->id, (struct eth_slow_path_rx_cqe *)cqe); goto next_cqe; } @@ -1578,7 +1578,7 @@ alloc_skb: qede_set_skb_csum(skb, csum_flag); - skb_record_rx_queue(skb, fp->rss_id); + skb_record_rx_queue(skb, fp->rxq->rxq_id); qede_skb_receive(edev, fp, skb, le16_to_cpu(fp_cqe->vlan_tag)); next_rx_only: @@ -1611,10 +1611,12 @@ static int qede_poll(struct napi_struct *napi, int budget) u8 tc; for (tc = 0; tc < edev->num_tc; tc++) - if (qede_txq_has_work(&fp->txqs[tc])) + if (likely(fp->type & QEDE_FASTPATH_TX) && + qede_txq_has_work(&fp->txqs[tc])) qede_tx_int(edev, &fp->txqs[tc]); - rx_work_done = qede_has_rx_work(fp->rxq) ? + rx_work_done = (likely(fp->type & QEDE_FASTPATH_RX) && + qede_has_rx_work(fp->rxq)) ? qede_rx_int(fp, budget) : 0; if (rx_work_done < budget) { qed_sb_update_sb_idx(fp->sb_info); @@ -1634,8 +1636,10 @@ static int qede_poll(struct napi_struct *napi, int budget) rmb(); /* Fall out from the NAPI loop if needed */ - if (!(qede_has_rx_work(fp->rxq) || - qede_has_tx_work(fp))) { + if (!((likely(fp->type & QEDE_FASTPATH_RX) && + qede_has_rx_work(fp->rxq)) || + (likely(fp->type & QEDE_FASTPATH_TX) && + qede_has_tx_work(fp)))) { napi_complete(napi); /* Update and reenable interrupts */ @@ -2349,7 +2353,7 @@ static void qede_free_fp_array(struct qede_dev *edev) struct qede_fastpath *fp; int i; - for_each_rss(i) { + for_each_queue(i) { fp = &edev->fp_array[i]; kfree(fp->sb_info); @@ -2358,22 +2362,33 @@ static void qede_free_fp_array(struct qede_dev *edev) } kfree(edev->fp_array); } - edev->num_rss = 0; + + edev->num_queues = 0; + edev->fp_num_tx = 0; + edev->fp_num_rx = 0; } static int qede_alloc_fp_array(struct qede_dev *edev) { + u8 fp_combined, fp_rx = edev->fp_num_rx; struct qede_fastpath *fp; int i; - edev->fp_array = kcalloc(QEDE_RSS_CNT(edev), + edev->fp_array = kcalloc(QEDE_QUEUE_CNT(edev), sizeof(*edev->fp_array), GFP_KERNEL); if (!edev->fp_array) { DP_NOTICE(edev, "fp array allocation failed\n"); goto err; } - for_each_rss(i) { + fp_combined = QEDE_QUEUE_CNT(edev) - fp_rx - edev->fp_num_tx; + + /* Allocate the FP elements for Rx queues followed by combined and then + * the Tx. This ordering should be maintained so that the respective + * queues (Rx or Tx) will be together in the fastpath array and the + * associated ids will be sequential. + */ + for_each_queue(i) { fp = &edev->fp_array[i]; fp->sb_info = kcalloc(1, sizeof(*fp->sb_info), GFP_KERNEL); @@ -2382,16 +2397,33 @@ static int qede_alloc_fp_array(struct qede_dev *edev) goto err; } - fp->rxq = kcalloc(1, sizeof(*fp->rxq), GFP_KERNEL); - if (!fp->rxq) { - DP_NOTICE(edev, "RXQ struct allocation failed\n"); - goto err; + if (fp_rx) { + fp->type = QEDE_FASTPATH_RX; + fp_rx--; + } else if (fp_combined) { + fp->type = QEDE_FASTPATH_COMBINED; + fp_combined--; + } else { + fp->type = QEDE_FASTPATH_TX; } - fp->txqs = kcalloc(edev->num_tc, sizeof(*fp->txqs), GFP_KERNEL); - if (!fp->txqs) { - DP_NOTICE(edev, "TXQ array allocation failed\n"); - goto err; + if (fp->type & QEDE_FASTPATH_TX) { + fp->txqs = kcalloc(edev->num_tc, sizeof(*fp->txqs), + GFP_KERNEL); + if (!fp->txqs) { + DP_NOTICE(edev, + "TXQ array allocation failed\n"); + goto err; + } + } + + if (fp->type & QEDE_FASTPATH_RX) { + fp->rxq = kcalloc(1, sizeof(*fp->rxq), GFP_KERNEL); + if (!fp->rxq) { + DP_NOTICE(edev, + "RXQ struct allocation failed\n"); + goto err; + } } } @@ -2605,8 +2637,8 @@ static int qede_set_num_queues(struct qede_dev *edev) u16 rss_num; /* Setup queues according to possible resources*/ - if (edev->req_rss) - rss_num = edev->req_rss; + if (edev->req_queues) + rss_num = edev->req_queues; else rss_num = netif_get_num_default_rss_queues() * edev->dev_info.common.num_hwfns; @@ -2616,11 +2648,15 @@ static int qede_set_num_queues(struct qede_dev *edev) rc = edev->ops->common->set_fp_int(edev->cdev, rss_num); if (rc > 0) { /* Managed to request interrupts for our queues */ - edev->num_rss = rc; + edev->num_queues = rc; DP_INFO(edev, "Managed %d [of %d] RSS queues\n", - QEDE_RSS_CNT(edev), rss_num); + QEDE_QUEUE_CNT(edev), rss_num); rc = 0; } + + edev->fp_num_tx = edev->req_num_tx; + edev->fp_num_rx = edev->req_num_rx; + return rc; } @@ -2912,33 +2948,39 @@ static void qede_free_mem_fp(struct qede_dev *edev, struct qede_fastpath *fp) qede_free_mem_sb(edev, fp->sb_info); - qede_free_mem_rxq(edev, fp->rxq); + if (fp->type & QEDE_FASTPATH_RX) + qede_free_mem_rxq(edev, fp->rxq); - for (tc = 0; tc < edev->num_tc; tc++) - qede_free_mem_txq(edev, &fp->txqs[tc]); + if (fp->type & QEDE_FASTPATH_TX) + for (tc = 0; tc < edev->num_tc; tc++) + qede_free_mem_txq(edev, &fp->txqs[tc]); } /* This function allocates all memory needed for a single fp (i.e. an entity - * which contains status block, one rx queue and multiple per-TC tx queues. + * which contains status block, one rx queue and/or multiple per-TC tx queues. */ static int qede_alloc_mem_fp(struct qede_dev *edev, struct qede_fastpath *fp) { int rc, tc; - rc = qede_alloc_mem_sb(edev, fp->sb_info, fp->rss_id); + rc = qede_alloc_mem_sb(edev, fp->sb_info, fp->id); if (rc) goto err; - rc = qede_alloc_mem_rxq(edev, fp->rxq); - if (rc) - goto err; - - for (tc = 0; tc < edev->num_tc; tc++) { - rc = qede_alloc_mem_txq(edev, &fp->txqs[tc]); + if (fp->type & QEDE_FASTPATH_RX) { + rc = qede_alloc_mem_rxq(edev, fp->rxq); if (rc) goto err; } + if (fp->type & QEDE_FASTPATH_TX) { + for (tc = 0; tc < edev->num_tc; tc++) { + rc = qede_alloc_mem_txq(edev, &fp->txqs[tc]); + if (rc) + goto err; + } + } + return 0; err: return rc; @@ -2948,7 +2990,7 @@ static void qede_free_mem_load(struct qede_dev *edev) { int i; - for_each_rss(i) { + for_each_queue(i) { struct qede_fastpath *fp = &edev->fp_array[i]; qede_free_mem_fp(edev, fp); @@ -2958,16 +3000,16 @@ static void qede_free_mem_load(struct qede_dev *edev) /* This function allocates all qede memory at NIC load. */ static int qede_alloc_mem_load(struct qede_dev *edev) { - int rc = 0, rss_id; + int rc = 0, queue_id; - for (rss_id = 0; rss_id < QEDE_RSS_CNT(edev); rss_id++) { - struct qede_fastpath *fp = &edev->fp_array[rss_id]; + for (queue_id = 0; queue_id < QEDE_QUEUE_CNT(edev); queue_id++) { + struct qede_fastpath *fp = &edev->fp_array[queue_id]; rc = qede_alloc_mem_fp(edev, fp); if (rc) { DP_ERR(edev, "Failed to allocate memory for fastpath - rss id = %d\n", - rss_id); + queue_id); qede_free_mem_load(edev); return rc; } @@ -2979,32 +3021,38 @@ static int qede_alloc_mem_load(struct qede_dev *edev) /* This function inits fp content and resets the SB, RXQ and TXQ structures */ static void qede_init_fp(struct qede_dev *edev) { - int rss_id, txq_index, tc; + int queue_id, rxq_index = 0, txq_index = 0, tc; struct qede_fastpath *fp; - for_each_rss(rss_id) { - fp = &edev->fp_array[rss_id]; + for_each_queue(queue_id) { + fp = &edev->fp_array[queue_id]; fp->edev = edev; - fp->rss_id = rss_id; + fp->id = queue_id; memset((void *)&fp->napi, 0, sizeof(fp->napi)); memset((void *)fp->sb_info, 0, sizeof(*fp->sb_info)); - memset((void *)fp->rxq, 0, sizeof(*fp->rxq)); - fp->rxq->rxq_id = rss_id; + if (fp->type & QEDE_FASTPATH_RX) { + memset((void *)fp->rxq, 0, sizeof(*fp->rxq)); + fp->rxq->rxq_id = rxq_index++; + } - memset((void *)fp->txqs, 0, (edev->num_tc * sizeof(*fp->txqs))); - for (tc = 0; tc < edev->num_tc; tc++) { - txq_index = tc * QEDE_RSS_CNT(edev) + rss_id; - fp->txqs[tc].index = txq_index; - if (edev->dev_info.is_legacy) - fp->txqs[tc].is_legacy = true; + if (fp->type & QEDE_FASTPATH_TX) { + memset((void *)fp->txqs, 0, + (edev->num_tc * sizeof(*fp->txqs))); + for (tc = 0; tc < edev->num_tc; tc++) { + fp->txqs[tc].index = txq_index + + tc * QEDE_TSS_COUNT(edev); + if (edev->dev_info.is_legacy) + fp->txqs[tc].is_legacy = true; + } + txq_index++; } snprintf(fp->name, sizeof(fp->name), "%s-fp-%d", - edev->ndev->name, rss_id); + edev->ndev->name, queue_id); } edev->gro_disable = !(edev->ndev->features & NETIF_F_GRO); @@ -3014,12 +3062,13 @@ static int qede_set_real_num_queues(struct qede_dev *edev) { int rc = 0; - rc = netif_set_real_num_tx_queues(edev->ndev, QEDE_TSS_CNT(edev)); + rc = netif_set_real_num_tx_queues(edev->ndev, QEDE_TSS_COUNT(edev)); if (rc) { DP_NOTICE(edev, "Failed to set real number of Tx queues\n"); return rc; } - rc = netif_set_real_num_rx_queues(edev->ndev, QEDE_RSS_CNT(edev)); + + rc = netif_set_real_num_rx_queues(edev->ndev, QEDE_RSS_COUNT(edev)); if (rc) { DP_NOTICE(edev, "Failed to set real number of Rx queues\n"); return rc; @@ -3032,7 +3081,7 @@ static void qede_napi_disable_remove(struct qede_dev *edev) { int i; - for_each_rss(i) { + for_each_queue(i) { napi_disable(&edev->fp_array[i].napi); netif_napi_del(&edev->fp_array[i].napi); @@ -3044,7 +3093,7 @@ static void qede_napi_add_enable(struct qede_dev *edev) int i; /* Add NAPI objects */ - for_each_rss(i) { + for_each_queue(i) { netif_napi_add(edev->ndev, &edev->fp_array[i].napi, qede_poll, NAPI_POLL_WEIGHT); napi_enable(&edev->fp_array[i].napi); @@ -3073,14 +3122,14 @@ static int qede_req_msix_irqs(struct qede_dev *edev) int i, rc; /* Sanitize number of interrupts == number of prepared RSS queues */ - if (QEDE_RSS_CNT(edev) > edev->int_info.msix_cnt) { + if (QEDE_QUEUE_CNT(edev) > edev->int_info.msix_cnt) { DP_ERR(edev, "Interrupt mismatch: %d RSS queues > %d MSI-x vectors\n", - QEDE_RSS_CNT(edev), edev->int_info.msix_cnt); + QEDE_QUEUE_CNT(edev), edev->int_info.msix_cnt); return -EINVAL; } - for (i = 0; i < QEDE_RSS_CNT(edev); i++) { + for (i = 0; i < QEDE_QUEUE_CNT(edev); i++) { rc = request_irq(edev->int_info.msix[i].vector, qede_msix_fp_int, 0, edev->fp_array[i].name, &edev->fp_array[i]); @@ -3125,11 +3174,11 @@ static int qede_setup_irqs(struct qede_dev *edev) /* qed should learn receive the RSS ids and callbacks */ ops = edev->ops->common; - for (i = 0; i < QEDE_RSS_CNT(edev); i++) + for (i = 0; i < QEDE_QUEUE_CNT(edev); i++) ops->simd_handler_config(edev->cdev, &edev->fp_array[i], i, qede_simd_fp_handler); - edev->int_info.used_cnt = QEDE_RSS_CNT(edev); + edev->int_info.used_cnt = QEDE_QUEUE_CNT(edev); } return 0; } @@ -3187,45 +3236,53 @@ static int qede_stop_queues(struct qede_dev *edev) } /* Flush Tx queues. If needed, request drain from MCP */ - for_each_rss(i) { + for_each_queue(i) { struct qede_fastpath *fp = &edev->fp_array[i]; - for (tc = 0; tc < edev->num_tc; tc++) { - struct qede_tx_queue *txq = &fp->txqs[tc]; + if (fp->type & QEDE_FASTPATH_TX) { + for (tc = 0; tc < edev->num_tc; tc++) { + struct qede_tx_queue *txq = &fp->txqs[tc]; - rc = qede_drain_txq(edev, txq, true); - if (rc) - return rc; + rc = qede_drain_txq(edev, txq, true); + if (rc) + return rc; + } } } - /* Stop all Queues in reverse order*/ - for (i = QEDE_RSS_CNT(edev) - 1; i >= 0; i--) { + /* Stop all Queues in reverse order */ + for (i = QEDE_QUEUE_CNT(edev) - 1; i >= 0; i--) { struct qed_stop_rxq_params rx_params; - /* Stop the Tx Queue(s)*/ - for (tc = 0; tc < edev->num_tc; tc++) { - struct qed_stop_txq_params tx_params; + /* Stop the Tx Queue(s) */ + if (edev->fp_array[i].type & QEDE_FASTPATH_TX) { + for (tc = 0; tc < edev->num_tc; tc++) { + struct qed_stop_txq_params tx_params; + u8 val; - tx_params.rss_id = i; - tx_params.tx_queue_id = tc * QEDE_RSS_CNT(edev) + i; - rc = edev->ops->q_tx_stop(cdev, &tx_params); - if (rc) { - DP_ERR(edev, "Failed to stop TXQ #%d\n", - tx_params.tx_queue_id); - return rc; + tx_params.rss_id = i; + val = edev->fp_array[i].txqs[tc].index; + tx_params.tx_queue_id = val; + rc = edev->ops->q_tx_stop(cdev, &tx_params); + if (rc) { + DP_ERR(edev, "Failed to stop TXQ #%d\n", + tx_params.tx_queue_id); + return rc; + } } } - /* Stop the Rx Queue*/ - memset(&rx_params, 0, sizeof(rx_params)); - rx_params.rss_id = i; - rx_params.rx_queue_id = i; + /* Stop the Rx Queue */ + if (edev->fp_array[i].type & QEDE_FASTPATH_RX) { + memset(&rx_params, 0, sizeof(rx_params)); + rx_params.rss_id = i; + rx_params.rx_queue_id = edev->fp_array[i].rxq->rxq_id; - rc = edev->ops->q_rx_stop(cdev, &rx_params); - if (rc) { - DP_ERR(edev, "Failed to stop RXQ #%d\n", i); - return rc; + rc = edev->ops->q_rx_stop(cdev, &rx_params); + if (rc) { + DP_ERR(edev, "Failed to stop RXQ #%d\n", i); + return rc; + } } } @@ -3248,7 +3305,7 @@ static int qede_start_queues(struct qede_dev *edev, bool clear_stats) struct qed_start_vport_params start = {0}; bool reset_rss_indir = false; - if (!edev->num_rss) { + if (!edev->num_queues) { DP_ERR(edev, "Cannot update V-VPORT as active as there are no Rx queues\n"); return -EINVAL; @@ -3272,50 +3329,66 @@ static int qede_start_queues(struct qede_dev *edev, bool clear_stats) "Start vport ramrod passed, vport_id = %d, MTU = %d, vlan_removal_en = %d\n", start.vport_id, edev->ndev->mtu + 0xe, vlan_removal_en); - for_each_rss(i) { + for_each_queue(i) { struct qede_fastpath *fp = &edev->fp_array[i]; - dma_addr_t phys_table = fp->rxq->rx_comp_ring.pbl.p_phys_table; + dma_addr_t p_phys_table; + u32 page_cnt; - memset(&q_params, 0, sizeof(q_params)); - q_params.rss_id = i; - q_params.queue_id = i; - q_params.vport_id = 0; - q_params.sb = fp->sb_info->igu_sb_id; - q_params.sb_idx = RX_PI; - - rc = edev->ops->q_rx_start(cdev, &q_params, - fp->rxq->rx_buf_size, - fp->rxq->rx_bd_ring.p_phys_addr, - phys_table, - fp->rxq->rx_comp_ring.page_cnt, - &fp->rxq->hw_rxq_prod_addr); - if (rc) { - DP_ERR(edev, "Start RXQ #%d failed %d\n", i, rc); - return rc; - } - - fp->rxq->hw_cons_ptr = &fp->sb_info->sb_virt->pi_array[RX_PI]; - - qede_update_rx_prod(edev, fp->rxq); - - for (tc = 0; tc < edev->num_tc; tc++) { - struct qede_tx_queue *txq = &fp->txqs[tc]; - int txq_index = tc * QEDE_RSS_CNT(edev) + i; + if (fp->type & QEDE_FASTPATH_RX) { + struct qede_rx_queue *rxq = fp->rxq; + __le16 *val; memset(&q_params, 0, sizeof(q_params)); q_params.rss_id = i; - q_params.queue_id = txq_index; + q_params.queue_id = rxq->rxq_id; + q_params.vport_id = 0; + q_params.sb = fp->sb_info->igu_sb_id; + q_params.sb_idx = RX_PI; + + p_phys_table = + qed_chain_get_pbl_phys(&rxq->rx_comp_ring); + page_cnt = qed_chain_get_page_cnt(&rxq->rx_comp_ring); + + rc = edev->ops->q_rx_start(cdev, &q_params, + rxq->rx_buf_size, + rxq->rx_bd_ring.p_phys_addr, + p_phys_table, + page_cnt, + &rxq->hw_rxq_prod_addr); + if (rc) { + DP_ERR(edev, "Start RXQ #%d failed %d\n", i, + rc); + return rc; + } + + val = &fp->sb_info->sb_virt->pi_array[RX_PI]; + rxq->hw_cons_ptr = val; + + qede_update_rx_prod(edev, rxq); + } + + if (!(fp->type & QEDE_FASTPATH_TX)) + continue; + + for (tc = 0; tc < edev->num_tc; tc++) { + struct qede_tx_queue *txq = &fp->txqs[tc]; + + p_phys_table = qed_chain_get_pbl_phys(&txq->tx_pbl); + page_cnt = qed_chain_get_page_cnt(&txq->tx_pbl); + + memset(&q_params, 0, sizeof(q_params)); + q_params.rss_id = i; + q_params.queue_id = txq->index; q_params.vport_id = 0; q_params.sb = fp->sb_info->igu_sb_id; q_params.sb_idx = TX_PI(tc); rc = edev->ops->q_tx_start(cdev, &q_params, - txq->tx_pbl.pbl.p_phys_table, - txq->tx_pbl.page_cnt, + p_phys_table, page_cnt, &txq->doorbell_addr); if (rc) { DP_ERR(edev, "Start TXQ #%d failed %d\n", - txq_index, rc); + txq->index, rc); return rc; } @@ -3346,13 +3419,13 @@ static int qede_start_queues(struct qede_dev *edev, bool clear_stats) } /* Fill struct with RSS params */ - if (QEDE_RSS_CNT(edev) > 1) { + if (QEDE_RSS_COUNT(edev) > 1) { vport_update_params.update_rss_flg = 1; /* Need to validate current RSS config uses valid entries */ for (i = 0; i < QED_RSS_IND_TABLE_SIZE; i++) { if (edev->rss_params.rss_ind_table[i] >= - edev->num_rss) { + QEDE_RSS_COUNT(edev)) { reset_rss_indir = true; break; } @@ -3365,7 +3438,7 @@ static int qede_start_queues(struct qede_dev *edev, bool clear_stats) for (i = 0; i < QED_RSS_IND_TABLE_SIZE; i++) { u16 indir_val; - val = QEDE_RSS_CNT(edev); + val = QEDE_RSS_COUNT(edev); indir_val = ethtool_rxfh_indir_default(i, val); edev->rss_params.rss_ind_table[i] = indir_val; } @@ -3494,7 +3567,7 @@ static int qede_load(struct qede_dev *edev, enum qede_load_mode mode) if (rc) goto err1; DP_INFO(edev, "Allocated %d RSS queues on %d TC/s\n", - QEDE_RSS_CNT(edev), edev->num_tc); + QEDE_QUEUE_CNT(edev), edev->num_tc); rc = qede_set_real_num_queues(edev); if (rc) @@ -3547,7 +3620,9 @@ err2: err1: edev->ops->common->set_fp_int(edev->cdev, 0); qede_free_fp_array(edev); - edev->num_rss = 0; + edev->num_queues = 0; + edev->fp_num_tx = 0; + edev->fp_num_rx = 0; err0: return rc; } From 2698f85e88244e1bce5c019e166e5c33aff2b6e5 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 23 Aug 2016 15:06:05 +0000 Subject: [PATCH 0451/1715] net: phy: xgmiitorgmii: Fix non static symbol warning Fixes the following sparse warning: drivers/net/phy/xilinx_gmii2rgmii.c:61:5: warning: symbol 'xgmiitorgmii_probe' was not declared. Should it be static? Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/phy/xilinx_gmii2rgmii.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/xilinx_gmii2rgmii.c b/drivers/net/phy/xilinx_gmii2rgmii.c index f38be7094362..d15dd3938ba8 100644 --- a/drivers/net/phy/xilinx_gmii2rgmii.c +++ b/drivers/net/phy/xilinx_gmii2rgmii.c @@ -58,7 +58,7 @@ static int xgmiitorgmii_read_status(struct phy_device *phydev) return 0; } -int xgmiitorgmii_probe(struct mdio_device *mdiodev) +static int xgmiitorgmii_probe(struct mdio_device *mdiodev) { struct device *dev = &mdiodev->dev; struct device_node *np = dev->of_node, *phy_node; From a26c76798e602fda301a997ec1ded3815b2e0e77 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 23 Aug 2016 15:09:49 +0000 Subject: [PATCH 0452/1715] cxgb4: Remove unused including Remove including that don't need it. Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c index aac6e444abf2..5d402bace6c1 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c @@ -36,7 +36,6 @@ */ #include -#include #include #include #include From b3dc93501e34b6e35245ba06fd0a7bfc7df88f0d Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 23 Aug 2016 15:11:03 +0000 Subject: [PATCH 0453/1715] net: hns: remove redundant dev_err call in hns_dsaf_get_cfg() There is a error message within devm_ioremap_resource already, so remove the dev_err call to avoid redundant error message. Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- .../net/ethernet/hisilicon/hns/hns_dsaf_main.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c index 05bd19f9ebc5..eb448dff7564 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c @@ -116,10 +116,8 @@ int hns_dsaf_get_cfg(struct dsaf_device *dsaf_dev) dsaf_dev->sc_base = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(dsaf_dev->sc_base)) { - dev_err(dsaf_dev->dev, "subctrl can not map!\n"); + if (IS_ERR(dsaf_dev->sc_base)) return PTR_ERR(dsaf_dev->sc_base); - } res = platform_get_resource(pdev, IORESOURCE_MEM, res_idx++); @@ -130,10 +128,8 @@ int hns_dsaf_get_cfg(struct dsaf_device *dsaf_dev) dsaf_dev->sds_base = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(dsaf_dev->sds_base)) { - dev_err(dsaf_dev->dev, "serdes-ctrl can not map!\n"); + if (IS_ERR(dsaf_dev->sds_base)) return PTR_ERR(dsaf_dev->sds_base); - } } else { dsaf_dev->sub_ctrl = syscon; } @@ -148,10 +144,8 @@ int hns_dsaf_get_cfg(struct dsaf_device *dsaf_dev) } } dsaf_dev->ppe_base = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(dsaf_dev->ppe_base)) { - dev_err(dsaf_dev->dev, "ppe-base resource can not map!\n"); + if (IS_ERR(dsaf_dev->ppe_base)) return PTR_ERR(dsaf_dev->ppe_base); - } dsaf_dev->ppe_paddr = res->start; if (!HNS_DSAF_IS_DEBUG(dsaf_dev)) { @@ -167,10 +161,8 @@ int hns_dsaf_get_cfg(struct dsaf_device *dsaf_dev) } } dsaf_dev->io_base = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(dsaf_dev->io_base)) { - dev_err(dsaf_dev->dev, "dsaf-base resource can not map!\n"); + if (IS_ERR(dsaf_dev->io_base)) return PTR_ERR(dsaf_dev->io_base); - } } ret = device_property_read_u32(dsaf_dev->dev, "desc-num", &desc_num); From 184b49c89f39f5c5ad262a6456248284e10984c6 Mon Sep 17 00:00:00 2001 From: Rami Rosen Date: Tue, 23 Aug 2016 20:20:17 +0300 Subject: [PATCH 0454/1715] net: ena: change the return type of ena_set_push_mode() to be void. This patch changes the return type of ena_set_push_mode() to be void, as it always returns 0. Signed-off-by: Rami Rosen Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 5c536b8ec99c..bfeaec5bd7b9 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -2681,8 +2681,8 @@ static int ena_calc_io_queue_num(struct pci_dev *pdev, return io_queue_num; } -static int ena_set_push_mode(struct pci_dev *pdev, struct ena_com_dev *ena_dev, - struct ena_com_dev_get_features_ctx *get_feat_ctx) +static void ena_set_push_mode(struct pci_dev *pdev, struct ena_com_dev *ena_dev, + struct ena_com_dev_get_features_ctx *get_feat_ctx) { bool has_mem_bar; @@ -2693,8 +2693,6 @@ static int ena_set_push_mode(struct pci_dev *pdev, struct ena_com_dev *ena_dev, ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_DEV; else ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; - - return 0; } static void ena_set_dev_offloads(struct ena_com_dev_get_features_ctx *feat, @@ -2913,11 +2911,7 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_free_region; } - rc = ena_set_push_mode(pdev, ena_dev, &get_feat_ctx); - if (rc) { - dev_err(&pdev->dev, "Invalid module param(push_mode)\n"); - goto err_device_destroy; - } + ena_set_push_mode(pdev, ena_dev, &get_feat_ctx); if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { ena_dev->mem_bar = ioremap_wc(pci_resource_start(pdev, ENA_MEM_BAR), From 5128b18522e143e634712ceef6a007333b8c7439 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 23 Aug 2016 23:01:02 +0000 Subject: [PATCH 0455/1715] tipc: use kfree_skb() instead of kfree() Use kfree_skb() instead of kfree() to free sk_buff. Fixes: 0d051bf93c06 ("tipc: make bearer packet filtering generic") Signed-off-by: Wei Yongjun Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/bearer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 6fc4e3cca49a..28056fa8f77a 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -529,7 +529,7 @@ void tipc_bearer_xmit(struct net *net, u32 bearer_id, if (likely(test_bit(0, &b->up) || msg_is_reset(buf_msg(skb)))) b->media->send_msg(net, skb, b, dst); else - kfree(skb); + kfree_skb(skb); } rcu_read_unlock(); } From 5d77dca82839ef016a93ad7acd7058b14d967752 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 23 Aug 2016 21:06:33 -0700 Subject: [PATCH 0456/1715] net: diag: support SOCK_DESTROY for UDP sockets This implements SOCK_DESTROY for UDP sockets similar to what was done for TCP with commit c1e64e298b8ca ("net: diag: Support destroying TCP sockets.") A process with a UDP socket targeted for destroy is awakened and recvmsg fails with ECONNABORTED. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/udp.h | 1 + net/ipv4/udp.c | 15 +++++++++ net/ipv4/udp_diag.c | 79 +++++++++++++++++++++++++++++++++++++++++++++ net/ipv6/udp.c | 1 + 4 files changed, 96 insertions(+) diff --git a/include/net/udp.h b/include/net/udp.h index 8894d7144189..ea53a87d880f 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -251,6 +251,7 @@ int udp_get_port(struct sock *sk, unsigned short snum, int (*saddr_cmp)(const struct sock *, const struct sock *)); void udp_err(struct sk_buff *, u32); +int udp_abort(struct sock *sk, int err); int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); int udp_push_pending_frames(struct sock *sk); void udp_flush_pending_frames(struct sock *sk); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 8f5f7f6026f7..e9ffc27b23d0 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2193,6 +2193,20 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) } EXPORT_SYMBOL(udp_poll); +int udp_abort(struct sock *sk, int err) +{ + lock_sock(sk); + + sk->sk_err = err; + sk->sk_error_report(sk); + udp_disconnect(sk, 0); + + release_sock(sk); + + return 0; +} +EXPORT_SYMBOL_GPL(udp_abort); + struct proto udp_prot = { .name = "UDP", .owner = THIS_MODULE, @@ -2224,6 +2238,7 @@ struct proto udp_prot = { .compat_getsockopt = compat_udp_getsockopt, #endif .clear_sk = sk_prot_clear_portaddr_nulls, + .diag_destroy = udp_abort, }; EXPORT_SYMBOL(udp_prot); diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 3d5ccf4b1412..8a9f6e535caa 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -165,12 +165,88 @@ static void udp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, r->idiag_wqueue = sk_wmem_alloc_get(sk); } +#ifdef CONFIG_INET_DIAG_DESTROY +static int __udp_diag_destroy(struct sk_buff *in_skb, + const struct inet_diag_req_v2 *req, + struct udp_table *tbl) +{ + struct net *net = sock_net(in_skb->sk); + struct sock *sk; + int err; + + rcu_read_lock(); + + if (req->sdiag_family == AF_INET) + sk = __udp4_lib_lookup(net, + req->id.idiag_dst[0], req->id.idiag_dport, + req->id.idiag_src[0], req->id.idiag_sport, + req->id.idiag_if, tbl, NULL); +#if IS_ENABLED(CONFIG_IPV6) + else if (req->sdiag_family == AF_INET6) { + if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) && + ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src)) + sk = __udp4_lib_lookup(net, + req->id.idiag_dst[0], req->id.idiag_dport, + req->id.idiag_src[0], req->id.idiag_sport, + req->id.idiag_if, tbl, NULL); + + else + sk = __udp6_lib_lookup(net, + (struct in6_addr *)req->id.idiag_dst, + req->id.idiag_dport, + (struct in6_addr *)req->id.idiag_src, + req->id.idiag_sport, + req->id.idiag_if, tbl, NULL); + } +#endif + else { + rcu_read_unlock(); + return -EINVAL; + } + + if (sk && !atomic_inc_not_zero(&sk->sk_refcnt)) + sk = NULL; + + rcu_read_unlock(); + + if (!sk) + return -ENOENT; + + if (sock_diag_check_cookie(sk, req->id.idiag_cookie)) { + sock_put(sk); + return -ENOENT; + } + + err = sock_diag_destroy(sk, ECONNABORTED); + + sock_put(sk); + + return err; +} + +static int udp_diag_destroy(struct sk_buff *in_skb, + const struct inet_diag_req_v2 *req) +{ + return __udp_diag_destroy(in_skb, req, &udp_table); +} + +static int udplite_diag_destroy(struct sk_buff *in_skb, + const struct inet_diag_req_v2 *req) +{ + return __udp_diag_destroy(in_skb, req, &udplite_table); +} + +#endif + static const struct inet_diag_handler udp_diag_handler = { .dump = udp_diag_dump, .dump_one = udp_diag_dump_one, .idiag_get_info = udp_diag_get_info, .idiag_type = IPPROTO_UDP, .idiag_info_size = 0, +#ifdef CONFIG_INET_DIAG_DESTROY + .destroy = udp_diag_destroy, +#endif }; static void udplite_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, @@ -192,6 +268,9 @@ static const struct inet_diag_handler udplite_diag_handler = { .idiag_get_info = udp_diag_get_info, .idiag_type = IPPROTO_UDPLITE, .idiag_info_size = 0, +#ifdef CONFIG_INET_DIAG_DESTROY + .destroy = udplite_diag_destroy, +#endif }; static int __init udp_diag_init(void) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 81e2f98b958d..16512cf06e73 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1467,6 +1467,7 @@ struct proto udpv6_prot = { .compat_getsockopt = compat_udpv6_getsockopt, #endif .clear_sk = udp_v6_clear_sk, + .diag_destroy = udp_abort, }; static struct inet_protosw udpv6_protosw = { From 6a6ad2a4e57bc907a6977eef6cad49348ad2744b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 23 Aug 2016 11:39:26 -0700 Subject: [PATCH 0457/1715] ipv6: udp: remove udp_v6_clear_sk() Now RCU lookups of ipv6 udp sockets no longer dereference pinet6 field, we can get rid of udp_v6_clear_sk() helper. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/udp.c | 12 ------------ net/ipv6/udp_impl.h | 2 -- net/ipv6/udplite.c | 2 +- 3 files changed, 1 insertion(+), 15 deletions(-) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 16512cf06e73..9efe740ff6dd 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1424,17 +1424,6 @@ void udp6_proc_exit(struct net *net) } #endif /* CONFIG_PROC_FS */ -void udp_v6_clear_sk(struct sock *sk, int size) -{ - struct inet_sock *inet = inet_sk(sk); - - /* we do not want to clear pinet6 field, because of RCU lookups */ - sk_prot_clear_portaddr_nulls(sk, offsetof(struct inet_sock, pinet6)); - - size -= offsetof(struct inet_sock, pinet6) + sizeof(inet->pinet6); - memset(&inet->pinet6 + 1, 0, size); -} - /* ------------------------------------------------------------------------ */ struct proto udpv6_prot = { @@ -1466,7 +1455,6 @@ struct proto udpv6_prot = { .compat_setsockopt = compat_udpv6_setsockopt, .compat_getsockopt = compat_udpv6_getsockopt, #endif - .clear_sk = udp_v6_clear_sk, .diag_destroy = udp_abort, }; diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index 0682c031ccdc..f6eb1ab34f4b 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -29,8 +29,6 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); void udpv6_destroy_sock(struct sock *sk); -void udp_v6_clear_sk(struct sock *sk, int size); - #ifdef CONFIG_PROC_FS int udp6_seq_show(struct seq_file *seq, void *v); #endif diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index 9cf097e206e9..118057a5b759 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -56,7 +56,7 @@ struct proto udplitev6_prot = { .compat_setsockopt = compat_udpv6_setsockopt, .compat_getsockopt = compat_udpv6_getsockopt, #endif - .clear_sk = udp_v6_clear_sk, + .clear_sk = sk_prot_clear_portaddr_nulls, }; static struct inet_protosw udplite6_protosw = { From 4cac8204661a6d1a842e47911933f1e90b392c84 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 23 Aug 2016 11:39:27 -0700 Subject: [PATCH 0458/1715] udp: get rid of sk_prot_clear_portaddr_nulls() Since we no longer use SLAB_DESTROY_BY_RCU for UDP, we do not need sk_prot_clear_portaddr_nulls() helper. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 2 -- net/core/sock.c | 18 ------------------ net/ipv4/udp.c | 1 - net/ipv4/udplite.c | 1 - net/ipv6/udplite.c | 1 - 5 files changed, 23 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 2aab9b63bf16..1bc57609f8e1 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1242,8 +1242,6 @@ static inline int __sk_prot_rehash(struct sock *sk) return sk->sk_prot->hash(sk); } -void sk_prot_clear_portaddr_nulls(struct sock *sk, int size); - /* About 10 seconds */ #define SOCK_DESTROY_TIME (10*HZ) diff --git a/net/core/sock.c b/net/core/sock.c index 25dab8b60223..2b09c2967e21 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1315,24 +1315,6 @@ static void sock_copy(struct sock *nsk, const struct sock *osk) #endif } -void sk_prot_clear_portaddr_nulls(struct sock *sk, int size) -{ - unsigned long nulls1, nulls2; - - nulls1 = offsetof(struct sock, __sk_common.skc_node.next); - nulls2 = offsetof(struct sock, __sk_common.skc_portaddr_node.next); - if (nulls1 > nulls2) - swap(nulls1, nulls2); - - if (nulls1 != 0) - memset((char *)sk, 0, nulls1); - memset((char *)sk + nulls1 + sizeof(void *), 0, - nulls2 - nulls1 - sizeof(void *)); - memset((char *)sk + nulls2 + sizeof(void *), 0, - size - nulls2 - sizeof(void *)); -} -EXPORT_SYMBOL(sk_prot_clear_portaddr_nulls); - static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, int family) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index e9ffc27b23d0..f0ebb0bd1e11 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2237,7 +2237,6 @@ struct proto udp_prot = { .compat_setsockopt = compat_udp_setsockopt, .compat_getsockopt = compat_udp_getsockopt, #endif - .clear_sk = sk_prot_clear_portaddr_nulls, .diag_destroy = udp_abort, }; EXPORT_SYMBOL(udp_prot); diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 3b3efbda48e1..67fc9d96e67d 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -61,7 +61,6 @@ struct proto udplite_prot = { .compat_setsockopt = compat_udp_setsockopt, .compat_getsockopt = compat_udp_getsockopt, #endif - .clear_sk = sk_prot_clear_portaddr_nulls, }; EXPORT_SYMBOL(udplite_prot); diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index 118057a5b759..5cf0099b86f7 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -56,7 +56,6 @@ struct proto udplitev6_prot = { .compat_setsockopt = compat_udpv6_setsockopt, .compat_getsockopt = compat_udpv6_getsockopt, #endif - .clear_sk = sk_prot_clear_portaddr_nulls, }; static struct inet_protosw udplite6_protosw = { From 391bb6be6578829540bc466fc85da44a68148b84 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 23 Aug 2016 11:39:28 -0700 Subject: [PATCH 0459/1715] ipv6: tcp: get rid of tcp_v6_clear_sk() Now RCU lookups of IPv6 TCP sockets no longer dereference pinet6, we do not need tcp_v6_clear_sk() anymore. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 33df8b8575cc..e0f46439e391 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1862,17 +1862,6 @@ void tcp6_proc_exit(struct net *net) } #endif -static void tcp_v6_clear_sk(struct sock *sk, int size) -{ - struct inet_sock *inet = inet_sk(sk); - - /* we do not want to clear pinet6 field, because of RCU lookups */ - sk_prot_clear_nulls(sk, offsetof(struct inet_sock, pinet6)); - - size -= offsetof(struct inet_sock, pinet6) + sizeof(inet->pinet6); - memset(&inet->pinet6 + 1, 0, size); -} - struct proto tcpv6_prot = { .name = "TCPv6", .owner = THIS_MODULE, @@ -1914,7 +1903,6 @@ struct proto tcpv6_prot = { .compat_setsockopt = compat_tcp_setsockopt, .compat_getsockopt = compat_tcp_getsockopt, #endif - .clear_sk = tcp_v6_clear_sk, .diag_destroy = tcp_abort, }; From ba2489b0e0113f68a25fe7a563842c2b591829d7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 23 Aug 2016 11:39:29 -0700 Subject: [PATCH 0460/1715] net: remove clear_sk() method We no longer use this handler, we can delete it. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 1 - net/core/sock.c | 8 ++------ 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 1bc57609f8e1..c797c57f4d9f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1020,7 +1020,6 @@ struct proto { void (*unhash)(struct sock *sk); void (*rehash)(struct sock *sk); int (*get_port)(struct sock *sk, unsigned short snum); - void (*clear_sk)(struct sock *sk, int size); /* Keeping track of sockets in use */ #ifdef CONFIG_PROC_FS diff --git a/net/core/sock.c b/net/core/sock.c index 2b09c2967e21..51a730485649 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1326,12 +1326,8 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO); if (!sk) return sk; - if (priority & __GFP_ZERO) { - if (prot->clear_sk) - prot->clear_sk(sk, prot->obj_size); - else - sk_prot_clear_nulls(sk, prot->obj_size); - } + if (priority & __GFP_ZERO) + sk_prot_clear_nulls(sk, prot->obj_size); } else sk = kmalloc(prot->obj_size, priority); From 4141b36ab16d7a66b4cf712f2d21eba61c5927e5 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 24 Aug 2016 13:08:40 +0200 Subject: [PATCH 0461/1715] xfrm: Fix xfrm_policy_lock imbalance An earlier patch accidentally replaced a write_lock_bh with a spin_unlock_bh. Fix this by using spin_lock_bh instead. Fixes: 9d0380df6217 ("xfrm: policy: convert policy_lock to spinlock") Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index dd01fd2e55fa..f7ce6265961a 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -979,7 +979,7 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) xfrm_policy_kill(pol); - spin_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); goto again1; } From 35db57bbc4b7ab810bba6e6d6954a0faf5a842cf Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 23 Aug 2016 16:00:12 +0200 Subject: [PATCH 0462/1715] xfrm: state: remove per-netns gc task After commit 5b8ef3415a21f173 ("xfrm: Remove ancient sleeping when the SA is in acquire state") gc does not need any per-netns data anymore. As far as gc is concerned all state structs are the same, so we can use a global work struct for it. Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- include/net/netns/xfrm.h | 2 -- net/xfrm/xfrm_state.c | 18 +++++++++--------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 177ed444d7b2..27bb9633c69d 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -44,8 +44,6 @@ struct netns_xfrm { unsigned int state_hmask; unsigned int state_num; struct work_struct state_hash_work; - struct hlist_head state_gc_list; - struct work_struct state_gc_work; struct list_head policy_all; struct hlist_head *policy_byidx; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 1a15b658a79e..ba8bf518ba14 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -31,6 +31,8 @@ #define xfrm_state_deref_prot(table, net) \ rcu_dereference_protected((table), lockdep_is_held(&(net)->xfrm.xfrm_state_lock)) +static void xfrm_state_gc_task(struct work_struct *work); + /* Each xfrm_state may be linked to two tables: 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl) @@ -41,6 +43,9 @@ static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; static __read_mostly seqcount_t xfrm_state_hash_generation = SEQCNT_ZERO(xfrm_state_hash_generation); +static DECLARE_WORK(xfrm_state_gc_work, xfrm_state_gc_task); +static HLIST_HEAD(xfrm_state_gc_list); + static inline bool xfrm_state_hold_rcu(struct xfrm_state __rcu *x) { return atomic_inc_not_zero(&x->refcnt); @@ -368,13 +373,12 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) static void xfrm_state_gc_task(struct work_struct *work) { - struct net *net = container_of(work, struct net, xfrm.state_gc_work); struct xfrm_state *x; struct hlist_node *tmp; struct hlist_head gc_list; spin_lock_bh(&xfrm_state_gc_lock); - hlist_move_list(&net->xfrm.state_gc_list, &gc_list); + hlist_move_list(&xfrm_state_gc_list, &gc_list); spin_unlock_bh(&xfrm_state_gc_lock); synchronize_rcu(); @@ -515,14 +519,12 @@ EXPORT_SYMBOL(xfrm_state_alloc); void __xfrm_state_destroy(struct xfrm_state *x) { - struct net *net = xs_net(x); - WARN_ON(x->km.state != XFRM_STATE_DEAD); spin_lock_bh(&xfrm_state_gc_lock); - hlist_add_head(&x->gclist, &net->xfrm.state_gc_list); + hlist_add_head(&x->gclist, &xfrm_state_gc_list); spin_unlock_bh(&xfrm_state_gc_lock); - schedule_work(&net->xfrm.state_gc_work); + schedule_work(&xfrm_state_gc_work); } EXPORT_SYMBOL(__xfrm_state_destroy); @@ -2134,8 +2136,6 @@ int __net_init xfrm_state_init(struct net *net) net->xfrm.state_num = 0; INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize); - INIT_HLIST_HEAD(&net->xfrm.state_gc_list); - INIT_WORK(&net->xfrm.state_gc_work, xfrm_state_gc_task); spin_lock_init(&net->xfrm.xfrm_state_lock); return 0; @@ -2153,7 +2153,7 @@ void xfrm_state_fini(struct net *net) flush_work(&net->xfrm.state_hash_work); xfrm_state_flush(net, IPSEC_PROTO_ANY, false); - flush_work(&net->xfrm.state_gc_work); + flush_work(&xfrm_state_gc_work); WARN_ON(!list_empty(&net->xfrm.state_all)); From 2266ffdef5737fdfa96005204fc5606dbd559956 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 24 Aug 2016 13:06:14 +0100 Subject: [PATCH 0463/1715] rxrpc: Fix conn-based retransmit If a duplicate packet comes in for a call that has just completed on a connection's channel then there will be an oops in the data_ready handler because it tries to examine the connection struct via a call struct (which we don't have - the pointer is unset). Since the connection struct pointer is available to us, go direct instead. Also, the ACK packet to be retransmitted needs three octets of padding between the soft ack list and the ackinfo. Fixes: 18bfeba50dfd0c8ee420396f2570f16a0bdbd7de ("rxrpc: Perform terminal call ACK/ABORT retransmission from conn processor") Signed-off-by: David Howells --- net/rxrpc/conn_event.c | 1 + net/rxrpc/input.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index c1c6b7f305d1..6296374df840 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -42,6 +42,7 @@ static void rxrpc_conn_retransmit(struct rxrpc_connection *conn, } abort; struct { struct rxrpc_ackpacket ack; + u8 padding[3]; struct rxrpc_ackinfo info; }; }; diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 66cdeb56f44f..5e683dd21ab9 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -732,7 +732,7 @@ void rxrpc_data_ready(struct sock *sk) /* For the previous service call, if completed * successfully, we discard all further packets. */ - if (rxrpc_conn_is_service(call->conn) && + if (rxrpc_conn_is_service(conn) && (chan->last_type == RXRPC_PACKET_TYPE_ACK || sp->hdr.type == RXRPC_PACKET_TYPE_ABORT)) goto discard_unlock; From df5d8bf70f64a2ee34234553eb215418dbc4c8f3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 24 Aug 2016 14:31:43 +0100 Subject: [PATCH 0464/1715] rxrpc: Make /proc/net/rxrpc_calls safer Make /proc/net/rxrpc_calls safer by stashing a copy of the peer pointer in the rxrpc_call struct and checking in the show routine that the peer pointer, the socket pointer and the local pointer obtained from the socket pointer aren't NULL before we use them. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 4 +++- net/rxrpc/call_object.c | 3 +++ net/rxrpc/conn_client.c | 1 + net/rxrpc/proc.c | 27 +++++++++++++++++++-------- 4 files changed, 26 insertions(+), 9 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 7296039c537a..5292bf0bce52 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -407,6 +407,7 @@ enum rxrpc_call_state { struct rxrpc_call { struct rcu_head rcu; struct rxrpc_connection *conn; /* connection carrying call */ + struct rxrpc_peer *peer; /* Peer record for remote address */ struct rxrpc_sock *socket; /* socket responsible */ struct timer_list lifetimer; /* lifetime remaining on call */ struct timer_list deadspan; /* reap timer for re-ACK'ing, etc */ @@ -717,9 +718,10 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *, struct sockaddr_rxrpc *, gfp_t); struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *, gfp_t); -static inline void rxrpc_get_peer(struct rxrpc_peer *peer) +static inline struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *peer) { atomic_inc(&peer->usage); + return peer; } static inline diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 4af01805bfc7..f23432591a0f 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -315,6 +315,7 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx, chan = sp->hdr.cid & RXRPC_CHANNELMASK; candidate->socket = rx; candidate->conn = conn; + candidate->peer = conn->params.peer; candidate->cid = sp->hdr.cid; candidate->call_id = sp->hdr.callNumber; candidate->rx_data_post = 0; @@ -384,6 +385,7 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx, rcu_assign_pointer(conn->channels[chan].call, call); sock_hold(&rx->sk); rxrpc_get_connection(conn); + rxrpc_get_peer(call->peer); spin_unlock(&conn->channel_lock); spin_lock(&conn->params.peer->lock); @@ -610,6 +612,7 @@ static void rxrpc_rcu_destroy_call(struct rcu_head *rcu) struct rxrpc_call *call = container_of(rcu, struct rxrpc_call, rcu); rxrpc_purge_queue(&call->rx_queue); + rxrpc_put_peer(call->peer); kmem_cache_free(rxrpc_call_jar, call); } diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index fc32cc67c2de..2d43c99e5360 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -280,6 +280,7 @@ attached: found_channel: _debug("found chan"); call->conn = conn; + call->peer = rxrpc_get_peer(conn->params.peer); call->cid = conn->proto.cid | chan; call->call_id = ++conn->channels[chan].call_counter; conn->channels[chan].call_id = call->call_id; diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index 31b7f36a39cb..53872631a66d 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -46,7 +46,9 @@ static void rxrpc_call_seq_stop(struct seq_file *seq, void *v) static int rxrpc_call_seq_show(struct seq_file *seq, void *v) { - struct rxrpc_connection *conn; + struct rxrpc_local *local; + struct rxrpc_sock *rx; + struct rxrpc_peer *peer; struct rxrpc_call *call; char lbuff[4 + 4 + 4 + 4 + 5 + 1], rbuff[4 + 4 + 4 + 4 + 5 + 1]; @@ -60,15 +62,24 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) call = list_entry(v, struct rxrpc_call, link); - sprintf(lbuff, "%pI4:%u", - &call->socket->local->srx.transport.sin.sin_addr, - ntohs(call->socket->local->srx.transport.sin.sin_port)); + rx = READ_ONCE(call->socket); + if (rx) { + local = READ_ONCE(rx->local); + if (local) + sprintf(lbuff, "%pI4:%u", + &local->srx.transport.sin.sin_addr, + ntohs(local->srx.transport.sin.sin_port)); + else + strcpy(lbuff, "no_local"); + } else { + strcpy(lbuff, "no_socket"); + } - conn = call->conn; - if (conn) + peer = call->peer; + if (peer) sprintf(rbuff, "%pI4:%u", - &conn->params.peer->srx.transport.sin.sin_addr, - ntohs(conn->params.peer->srx.transport.sin.sin_port)); + &peer->srx.transport.sin.sin_addr, + ntohs(peer->srx.transport.sin.sin_port)); else strcpy(rbuff, "no_connection"); From 4d028b2c82991e2f9ae89ad90aeaaeb713495043 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 24 Aug 2016 07:30:52 +0100 Subject: [PATCH 0465/1715] rxrpc: Dup the main conn list for the proc interface The main connection list is used for two independent purposes: primarily it is used to find connections to reap and secondarily it is used to list connections in procfs. Split the procfs list out from the reap list. This allows us to stop using the reap list for client connections when they acquire a separate management strategy from service collections. The client connections will not be on a management single list, and sometimes won't be on a management list at all. This doesn't leave them floating, however, as they will also be on an rb-tree rooted on the socket so that the socket can find them to dispatch calls. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 2 ++ net/rxrpc/conn_client.c | 1 + net/rxrpc/conn_object.c | 3 +++ net/rxrpc/conn_service.c | 1 + net/rxrpc/proc.c | 8 ++++---- 5 files changed, 11 insertions(+), 4 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 5292bf0bce52..2efbfba87cbe 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -310,6 +310,7 @@ struct rxrpc_connection { struct rb_node client_node; /* Node in local->client_conns */ struct rb_node service_node; /* Node in peer->service_conns */ }; + struct list_head proc_link; /* link in procfs list */ struct list_head link; /* link in master connection list */ struct sk_buff_head rx_queue; /* received conn-level packets */ const struct rxrpc_security *security; /* applied security module */ @@ -564,6 +565,7 @@ void rxrpc_reject_packets(struct rxrpc_local *); */ extern unsigned int rxrpc_connection_expiry; extern struct list_head rxrpc_connections; +extern struct list_head rxrpc_connection_proc_list; extern rwlock_t rxrpc_connection_lock; int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *); diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 2d43c99e5360..6e1099ed1dbd 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -149,6 +149,7 @@ rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp) write_lock(&rxrpc_connection_lock); list_add_tail(&conn->link, &rxrpc_connections); + list_add_tail(&conn->proc_link, &rxrpc_connection_proc_list); write_unlock(&rxrpc_connection_lock); /* We steal the caller's peer ref. */ diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index b4af37ebb112..afc2d83d5995 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -27,6 +27,7 @@ unsigned int rxrpc_connection_expiry = 10 * 60; static void rxrpc_connection_reaper(struct work_struct *work); LIST_HEAD(rxrpc_connections); +LIST_HEAD(rxrpc_connection_proc_list); DEFINE_RWLOCK(rxrpc_connection_lock); static DECLARE_DELAYED_WORK(rxrpc_connection_reap, rxrpc_connection_reaper); @@ -44,6 +45,7 @@ struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp) spin_lock_init(&conn->channel_lock); init_waitqueue_head(&conn->channel_wq); INIT_WORK(&conn->processor, &rxrpc_process_connection); + INIT_LIST_HEAD(&conn->proc_link); INIT_LIST_HEAD(&conn->link); skb_queue_head_init(&conn->rx_queue); conn->security = &rxrpc_no_security; @@ -283,6 +285,7 @@ static void rxrpc_connection_reaper(struct work_struct *work) rxrpc_unpublish_service_conn(conn); list_move_tail(&conn->link, &graveyard); + list_del_init(&conn->proc_link); } write_unlock(&rxrpc_connection_lock); diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c index fd9027ccba8f..6ad6ae926cc3 100644 --- a/net/rxrpc/conn_service.c +++ b/net/rxrpc/conn_service.c @@ -187,6 +187,7 @@ struct rxrpc_connection *rxrpc_incoming_connection(struct rxrpc_local *local, write_lock(&rxrpc_connection_lock); list_add_tail(&conn->link, &rxrpc_connections); + list_add_tail(&conn->proc_link, &rxrpc_connection_proc_list); write_unlock(&rxrpc_connection_lock); /* Make the connection a target for incoming packets. */ diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index 53872631a66d..060fb4892c39 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -126,13 +126,13 @@ const struct file_operations rxrpc_call_seq_fops = { static void *rxrpc_connection_seq_start(struct seq_file *seq, loff_t *_pos) { read_lock(&rxrpc_connection_lock); - return seq_list_start_head(&rxrpc_connections, *_pos); + return seq_list_start_head(&rxrpc_connection_proc_list, *_pos); } static void *rxrpc_connection_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - return seq_list_next(v, &rxrpc_connections, pos); + return seq_list_next(v, &rxrpc_connection_proc_list, pos); } static void rxrpc_connection_seq_stop(struct seq_file *seq, void *v) @@ -145,7 +145,7 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v) struct rxrpc_connection *conn; char lbuff[4 + 4 + 4 + 4 + 5 + 1], rbuff[4 + 4 + 4 + 4 + 5 + 1]; - if (v == &rxrpc_connections) { + if (v == &rxrpc_connection_proc_list) { seq_puts(seq, "Proto Local Remote " " SvID ConnID End Use State Key " @@ -154,7 +154,7 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v) return 0; } - conn = list_entry(v, struct rxrpc_connection, link); + conn = list_entry(v, struct rxrpc_connection, proc_link); sprintf(lbuff, "%pI4:%u", &conn->params.local->srx.transport.sin.sin_addr, From 45025bceef17ed5d5ed3006b63c85cf289f79dc8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 24 Aug 2016 07:30:52 +0100 Subject: [PATCH 0466/1715] rxrpc: Improve management and caching of client connection objects Improve the management and caching of client rxrpc connection objects. From this point, client connections will be managed separately from service connections because AF_RXRPC controls the creation and re-use of client connections but doesn't have that luxury with service connections. Further, there will be limits on the numbers of client connections that may be live on a machine. No direct restriction will be placed on the number of client calls, excepting that each client connection can support a maximum of four concurrent calls. Note that, for a number of reasons, we don't want to simply discard a client connection as soon as the last call is apparently finished: (1) Security is negotiated per-connection and the context is then shared between all calls on that connection. The context can be negotiated again if the connection lapses, but that involves holding up calls whilst at least two packets are exchanged and various crypto bits are performed - so we'd ideally like to cache it for a little while at least. (2) If a packet goes astray, we will need to retransmit a final ACK or ABORT packet. To make this work, we need to keep around the connection details for a little while. (3) The locally held structures represent some amount of setup time, to be weighed against their occupation of memory when idle. To this end, the client connection cache is managed by a state machine on each connection. There are five states: (1) INACTIVE - The connection is not held in any list and may not have been exposed to the world. If it has been previously exposed, it was discarded from the idle list after expiring. (2) WAITING - The connection is waiting for the number of client conns to drop below the maximum capacity. Calls may be in progress upon it from when it was active and got culled. The connection is on the rxrpc_waiting_client_conns list which is kept in to-be-granted order. Culled conns with waiters go to the back of the queue just like new conns. (3) ACTIVE - The connection has at least one call in progress upon it, it may freely grant available channels to new calls and calls may be waiting on it for channels to become available. The connection is on the rxrpc_active_client_conns list which is kept in activation order for culling purposes. (4) CULLED - The connection got summarily culled to try and free up capacity. Calls currently in progress on the connection are allowed to continue, but new calls will have to wait. There can be no waiters in this state - the conn would have to go to the WAITING state instead. (5) IDLE - The connection has no calls in progress upon it and must have been exposed to the world (ie. the EXPOSED flag must be set). When it expires, the EXPOSED flag is cleared and the connection transitions to the INACTIVE state. The connection is on the rxrpc_idle_client_conns list which is kept in order of how soon they'll expire. A connection in the ACTIVE or CULLED state must have at least one active call upon it; if in the WAITING state it may have active calls upon it; other states may not have active calls. As long as a connection remains active and doesn't get culled, it may continue to process calls - even if there are connections on the wait queue. This simplifies things a bit and reduces the amount of checking we need do. There are a couple flags of relevance to the cache: (1) EXPOSED - The connection ID got exposed to the world. If this flag is set, an extra ref is added to the connection preventing it from being reaped when it has no calls outstanding. This flag is cleared and the ref dropped when a conn is discarded from the idle list. (2) DONT_REUSE - The connection should be discarded as soon as possible and should not be reused. This commit also provides a number of new settings: (*) /proc/net/rxrpc/max_client_conns The maximum number of live client connections. Above this number, new connections get added to the wait list and must wait for an active conn to be culled. Culled connections can be reused, but they will go to the back of the wait list and have to wait. (*) /proc/net/rxrpc/reap_client_conns If the number of desired connections exceeds the maximum above, the active connection list will be culled until there are only this many left in it. (*) /proc/net/rxrpc/idle_conn_expiry The normal expiry time for a client connection, provided there are fewer than reap_client_conns of them around. (*) /proc/net/rxrpc/idle_conn_fast_expiry The expedited expiry time, used when there are more than reap_client_conns of them around. Note that I combined the Tx wait queue with the channel grant wait queue to save space as only one of these should be in use at once. Note also that, for the moment, the service connection cache still uses the old connection management code. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 56 ++- net/rxrpc/call_event.c | 4 +- net/rxrpc/call_object.c | 5 +- net/rxrpc/conn_client.c | 938 +++++++++++++++++++++++++++++++++------ net/rxrpc/conn_object.c | 71 ++- net/rxrpc/conn_service.c | 5 + net/rxrpc/output.c | 6 +- net/rxrpc/sysctl.c | 33 +- 8 files changed, 947 insertions(+), 171 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 2efbfba87cbe..c761124961cc 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -255,6 +255,9 @@ enum rxrpc_conn_flag { RXRPC_CONN_HAS_IDR, /* Has a client conn ID assigned */ RXRPC_CONN_IN_SERVICE_CONNS, /* Conn is in peer->service_conns */ RXRPC_CONN_IN_CLIENT_CONNS, /* Conn is in local->client_conns */ + RXRPC_CONN_EXPOSED, /* Conn has extra ref for exposure */ + RXRPC_CONN_DONT_REUSE, /* Don't reuse this connection */ + RXRPC_CONN_COUNTED, /* Counted by rxrpc_nr_client_conns */ }; /* @@ -264,6 +267,17 @@ enum rxrpc_conn_event { RXRPC_CONN_EV_CHALLENGE, /* Send challenge packet */ }; +/* + * The connection cache state. + */ +enum rxrpc_conn_cache_state { + RXRPC_CONN_CLIENT_INACTIVE, /* Conn is not yet listed */ + RXRPC_CONN_CLIENT_WAITING, /* Conn is on wait list, waiting for capacity */ + RXRPC_CONN_CLIENT_ACTIVE, /* Conn is on active list, doing calls */ + RXRPC_CONN_CLIENT_CULLED, /* Conn is culled and delisted, doing calls */ + RXRPC_CONN_CLIENT_IDLE, /* Conn is on idle list, doing mostly nothing */ +}; + /* * The connection protocol state. */ @@ -276,6 +290,7 @@ enum rxrpc_conn_proto_state { RXRPC_CONN_REMOTELY_ABORTED, /* Conn aborted by peer */ RXRPC_CONN_LOCALLY_ABORTED, /* Conn aborted locally */ RXRPC_CONN_NETWORK_ERROR, /* Conn terminated by network error */ + RXRPC_CONN_LOCAL_ERROR, /* Conn terminated by local error */ RXRPC_CONN__NR_STATES }; @@ -288,8 +303,14 @@ struct rxrpc_connection { struct rxrpc_conn_proto proto; struct rxrpc_conn_parameters params; - spinlock_t channel_lock; + atomic_t usage; + struct rcu_head rcu; + struct list_head cache_link; + spinlock_t channel_lock; + unsigned char active_chans; /* Mask of active channels */ +#define RXRPC_ACTIVE_CHANS_MASK ((1 << RXRPC_MAXCALLS) - 1) + struct list_head waiting_calls; /* Calls waiting for channels */ struct rxrpc_channel { struct rxrpc_call __rcu *call; /* Active call */ u32 call_id; /* ID of current call */ @@ -302,9 +323,7 @@ struct rxrpc_connection { u32 last_abort; }; } channels[RXRPC_MAXCALLS]; - wait_queue_head_t channel_wq; /* queue to wait for channel to become available */ - struct rcu_head rcu; struct work_struct processor; /* connection event processor */ union { struct rb_node client_node; /* Node in local->client_conns */ @@ -321,7 +340,7 @@ struct rxrpc_connection { unsigned long events; unsigned long idle_timestamp; /* Time at which last became idle */ spinlock_t state_lock; /* state-change lock */ - atomic_t usage; + enum rxrpc_conn_cache_state cache_state : 8; enum rxrpc_conn_proto_state state : 8; /* current state of connection */ u32 local_abort; /* local abort code */ u32 remote_abort; /* remote abort code */ @@ -329,7 +348,6 @@ struct rxrpc_connection { int debug_id; /* debug ID for printks */ atomic_t serial; /* packet serial number counter */ unsigned int hi_serial; /* highest serial number received */ - atomic_t avail_chans; /* number of channels available */ u8 size_align; /* data size alignment (for security) */ u8 header_size; /* rxrpc + security header size */ u8 security_size; /* security header size */ @@ -351,6 +369,7 @@ enum rxrpc_call_flag { RXRPC_CALL_HAS_USERID, /* has a user ID attached */ RXRPC_CALL_EXPECT_OOS, /* expect out of sequence packets */ RXRPC_CALL_IS_SERVICE, /* Call is service call */ + RXRPC_CALL_EXPOSED, /* The call was exposed to the world */ }; /* @@ -417,13 +436,14 @@ struct rxrpc_call { struct work_struct destroyer; /* call destroyer */ struct work_struct processor; /* packet processor and ACK generator */ struct list_head link; /* link in master call list */ + struct list_head chan_wait_link; /* Link in conn->waiting_calls */ struct hlist_node error_link; /* link in error distribution list */ struct list_head accept_link; /* calls awaiting acceptance */ struct rb_node sock_node; /* node in socket call tree */ struct sk_buff_head rx_queue; /* received packets */ struct sk_buff_head rx_oos_queue; /* packets received out of sequence */ struct sk_buff *tx_pending; /* Tx socket buffer being filled */ - wait_queue_head_t tx_waitq; /* wait for Tx window space to become available */ + wait_queue_head_t waitq; /* Wait queue for channel or Tx */ __be32 crypto_buf[2]; /* Temporary packet crypto buffer */ unsigned long user_call_ID; /* user-defined call ID */ unsigned long creation_jif; /* time of call creation */ @@ -546,12 +566,19 @@ static inline bool rxrpc_is_client_call(const struct rxrpc_call *call) /* * conn_client.c */ +extern unsigned int rxrpc_max_client_connections; +extern unsigned int rxrpc_reap_client_connections; +extern unsigned int rxrpc_conn_idle_client_expiry; +extern unsigned int rxrpc_conn_idle_client_fast_expiry; extern struct idr rxrpc_client_conn_ids; void rxrpc_destroy_client_conn_ids(void); int rxrpc_connect_call(struct rxrpc_call *, struct rxrpc_conn_parameters *, struct sockaddr_rxrpc *, gfp_t); -void rxrpc_unpublish_client_conn(struct rxrpc_connection *); +void rxrpc_expose_client_call(struct rxrpc_call *); +void rxrpc_disconnect_client_call(struct rxrpc_call *); +void rxrpc_put_client_conn(struct rxrpc_connection *); +void __exit rxrpc_destroy_all_client_connections(void); /* * conn_event.c @@ -572,8 +599,9 @@ int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *); struct rxrpc_connection *rxrpc_alloc_connection(gfp_t); struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *, struct sk_buff *); -void __rxrpc_disconnect_call(struct rxrpc_call *); +void __rxrpc_disconnect_call(struct rxrpc_connection *, struct rxrpc_call *); void rxrpc_disconnect_call(struct rxrpc_call *); +void rxrpc_kill_connection(struct rxrpc_connection *); void __rxrpc_put_connection(struct rxrpc_connection *); void __exit rxrpc_destroy_all_connections(void); @@ -600,8 +628,16 @@ struct rxrpc_connection *rxrpc_get_connection_maybe(struct rxrpc_connection *con static inline void rxrpc_put_connection(struct rxrpc_connection *conn) { - if (conn && atomic_dec_return(&conn->usage) == 1) - __rxrpc_put_connection(conn); + if (!conn) + return; + + if (rxrpc_conn_is_client(conn)) { + if (atomic_dec_and_test(&conn->usage)) + rxrpc_put_client_conn(conn); + } else { + if (atomic_dec_return(&conn->usage) == 1) + __rxrpc_put_connection(conn); + } } diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 3d1961d82325..5292bcfd8816 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -193,6 +193,8 @@ static void rxrpc_resend(struct rxrpc_call *call) stop = true; sp->resend_at = jiffies + 3; } else { + if (rxrpc_is_client_call(call)) + rxrpc_expose_client_call(call); sp->resend_at = jiffies + rxrpc_resend_timeout; } @@ -378,7 +380,7 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, u32 hard) call->acks_hard++; } - wake_up(&call->tx_waitq); + wake_up(&call->waitq); } /* diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index f23432591a0f..e7cbcc4a87cf 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -127,10 +127,11 @@ static struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) INIT_WORK(&call->destroyer, &rxrpc_destroy_call); INIT_WORK(&call->processor, &rxrpc_process_call); INIT_LIST_HEAD(&call->link); + INIT_LIST_HEAD(&call->chan_wait_link); INIT_LIST_HEAD(&call->accept_link); skb_queue_head_init(&call->rx_queue); skb_queue_head_init(&call->rx_oos_queue); - init_waitqueue_head(&call->tx_waitq); + init_waitqueue_head(&call->waitq); spin_lock_init(&call->lock); rwlock_init(&call->state_lock); atomic_set(&call->usage, 1); @@ -358,7 +359,7 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx, call->debug_id, rxrpc_call_states[call->state]); if (call->state >= RXRPC_CALL_COMPLETE) { - __rxrpc_disconnect_call(call); + __rxrpc_disconnect_call(conn, call); } else { spin_unlock(&conn->channel_lock); kmem_cache_free(rxrpc_call_jar, candidate); diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 6e1099ed1dbd..349402b08e5a 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -7,6 +7,68 @@ * modify it under the terms of the GNU General Public Licence * as published by the Free Software Foundation; either version * 2 of the Licence, or (at your option) any later version. + * + * + * Client connections need to be cached for a little while after they've made a + * call so as to handle retransmitted DATA packets in case the server didn't + * receive the final ACK or terminating ABORT we sent it. + * + * Client connections can be in one of a number of cache states: + * + * (1) INACTIVE - The connection is not held in any list and may not have been + * exposed to the world. If it has been previously exposed, it was + * discarded from the idle list after expiring. + * + * (2) WAITING - The connection is waiting for the number of client conns to + * drop below the maximum capacity. Calls may be in progress upon it from + * when it was active and got culled. + * + * The connection is on the rxrpc_waiting_client_conns list which is kept + * in to-be-granted order. Culled conns with waiters go to the back of + * the queue just like new conns. + * + * (3) ACTIVE - The connection has at least one call in progress upon it, it + * may freely grant available channels to new calls and calls may be + * waiting on it for channels to become available. + * + * The connection is on the rxrpc_active_client_conns list which is kept + * in activation order for culling purposes. + * + * rxrpc_nr_active_client_conns is held incremented also. + * + * (4) CULLED - The connection got summarily culled to try and free up + * capacity. Calls currently in progress on the connection are allowed to + * continue, but new calls will have to wait. There can be no waiters in + * this state - the conn would have to go to the WAITING state instead. + * + * (5) IDLE - The connection has no calls in progress upon it and must have + * been exposed to the world (ie. the EXPOSED flag must be set). When it + * expires, the EXPOSED flag is cleared and the connection transitions to + * the INACTIVE state. + * + * The connection is on the rxrpc_idle_client_conns list which is kept in + * order of how soon they'll expire. + * + * There are flags of relevance to the cache: + * + * (1) EXPOSED - The connection ID got exposed to the world. If this flag is + * set, an extra ref is added to the connection preventing it from being + * reaped when it has no calls outstanding. This flag is cleared and the + * ref dropped when a conn is discarded from the idle list. + * + * This allows us to move terminal call state retransmission to the + * connection and to discard the call immediately we think it is done + * with. It also give us a chance to reuse the connection. + * + * (2) DONT_REUSE - The connection should be discarded as soon as possible and + * should not be reused. This is set when an exclusive connection is used + * or a call ID counter overflows. + * + * The caching state may only be changed if the cache lock is held. + * + * There are two idle client connection expiry durations. If the total number + * of connections is below the reap threshold, we use the normal duration; if + * it's above, we use the fast duration. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -16,22 +78,37 @@ #include #include "ar-internal.h" +__read_mostly unsigned int rxrpc_max_client_connections = 1000; +__read_mostly unsigned int rxrpc_reap_client_connections = 900; +__read_mostly unsigned int rxrpc_conn_idle_client_expiry = 2 * 60 * HZ; +__read_mostly unsigned int rxrpc_conn_idle_client_fast_expiry = 2 * HZ; + +static unsigned int rxrpc_nr_client_conns; +static unsigned int rxrpc_nr_active_client_conns; +static __read_mostly bool rxrpc_kill_all_client_conns; + +static DEFINE_SPINLOCK(rxrpc_client_conn_cache_lock); +static DEFINE_SPINLOCK(rxrpc_client_conn_discard_mutex); +static LIST_HEAD(rxrpc_waiting_client_conns); +static LIST_HEAD(rxrpc_active_client_conns); +static LIST_HEAD(rxrpc_idle_client_conns); + /* * We use machine-unique IDs for our client connections. */ DEFINE_IDR(rxrpc_client_conn_ids); static DEFINE_SPINLOCK(rxrpc_conn_id_lock); +static void rxrpc_cull_active_client_conns(void); +static void rxrpc_discard_expired_client_conns(struct work_struct *); + +static DECLARE_DELAYED_WORK(rxrpc_client_conn_reap, + rxrpc_discard_expired_client_conns); + /* * Get a connection ID and epoch for a client connection from the global pool. * The connection struct pointer is then recorded in the idr radix tree. The * epoch is changed if this wraps. - * - * TODO: The IDR tree gets very expensive on memory if the connection IDs are - * widely scattered throughout the number space, so we shall need to retire - * connections that have, say, an ID more than four times the maximum number of - * client conns away from the current allocation point to try and keep the IDs - * concentrated. We will also need to retire connections from an old epoch. */ static int rxrpc_get_client_connection_id(struct rxrpc_connection *conn, gfp_t gfp) @@ -114,8 +191,7 @@ void rxrpc_destroy_client_conn_ids(void) } /* - * Allocate a client connection. The caller must take care to clear any - * padding bytes in *cp. + * Allocate a client connection. */ static struct rxrpc_connection * rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp) @@ -131,6 +207,10 @@ rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp) return ERR_PTR(-ENOMEM); } + atomic_set(&conn->usage, 1); + if (conn->params.exclusive) + __set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); + conn->params = *cp; conn->out_clientflag = RXRPC_CLIENT_INITIATED; conn->state = RXRPC_CONN_CLIENT; @@ -148,7 +228,6 @@ rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp) goto error_2; write_lock(&rxrpc_connection_lock); - list_add_tail(&conn->link, &rxrpc_connections); list_add_tail(&conn->proc_link, &rxrpc_connection_proc_list); write_unlock(&rxrpc_connection_lock); @@ -171,32 +250,68 @@ error_0: } /* - * find a connection for a call - * - called in process context with IRQs enabled + * Determine if a connection may be reused. */ -int rxrpc_connect_call(struct rxrpc_call *call, - struct rxrpc_conn_parameters *cp, - struct sockaddr_rxrpc *srx, - gfp_t gfp) +static bool rxrpc_may_reuse_conn(struct rxrpc_connection *conn) +{ + int id_cursor, id, distance, limit; + + if (test_bit(RXRPC_CONN_DONT_REUSE, &conn->flags)) + goto dont_reuse; + + if (conn->proto.epoch != rxrpc_epoch) + goto mark_dont_reuse; + + /* The IDR tree gets very expensive on memory if the connection IDs are + * widely scattered throughout the number space, so we shall want to + * kill off connections that, say, have an ID more than about four + * times the maximum number of client conns away from the current + * allocation point to try and keep the IDs concentrated. + */ + id_cursor = READ_ONCE(rxrpc_client_conn_ids.cur); + id = conn->proto.cid >> RXRPC_CIDSHIFT; + distance = id - id_cursor; + if (distance < 0) + distance = -distance; + limit = round_up(rxrpc_max_client_connections, IDR_SIZE) * 4; + if (distance > limit) + goto mark_dont_reuse; + + return true; + +mark_dont_reuse: + set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); +dont_reuse: + return false; +} + +/* + * Create or find a client connection to use for a call. + * + * If we return with a connection, the call will be on its waiting list. It's + * left to the caller to assign a channel and wake up the call. + */ +static int rxrpc_get_client_conn(struct rxrpc_call *call, + struct rxrpc_conn_parameters *cp, + struct sockaddr_rxrpc *srx, + gfp_t gfp) { struct rxrpc_connection *conn, *candidate = NULL; struct rxrpc_local *local = cp->local; struct rb_node *p, **pp, *parent; long diff; - int chan; - - DECLARE_WAITQUEUE(myself, current); + int ret = -ENOMEM; _enter("{%d,%lx},", call->debug_id, call->user_call_ID); cp->peer = rxrpc_lookup_peer(cp->local, srx, gfp); if (!cp->peer) - return -ENOMEM; + goto error; + /* If the connection is not meant to be exclusive, search the available + * connections to see if the connection we want to use already exists. + */ if (!cp->exclusive) { - /* Search for a existing client connection unless this is going - * to be a connection that's used exclusively for a single call. - */ _debug("search 1"); spin_lock(&local->client_conns_lock); p = local->client_conns.rb_node; @@ -207,39 +322,55 @@ int rxrpc_connect_call(struct rxrpc_call *call, diff = (cmp(peer) ?: cmp(key) ?: cmp(security_level)); - if (diff < 0) +#undef cmp + if (diff < 0) { p = p->rb_left; - else if (diff > 0) + } else if (diff > 0) { p = p->rb_right; - else - goto found_extant_conn; + } else { + if (rxrpc_may_reuse_conn(conn) && + rxrpc_get_connection_maybe(conn)) + goto found_extant_conn; + /* The connection needs replacing. It's better + * to effect that when we have something to + * replace it with so that we don't have to + * rebalance the tree twice. + */ + break; + } } spin_unlock(&local->client_conns_lock); } - /* We didn't find a connection or we want an exclusive one. */ - _debug("get new conn"); + /* There wasn't a connection yet or we need an exclusive connection. + * We need to create a candidate and then potentially redo the search + * in case we're racing with another thread also trying to connect on a + * shareable connection. + */ + _debug("new conn"); candidate = rxrpc_alloc_client_connection(cp, gfp); - if (!candidate) { - _leave(" = -ENOMEM"); - return -ENOMEM; + if (IS_ERR(candidate)) { + ret = PTR_ERR(candidate); + goto error_peer; } + /* Add the call to the new connection's waiting list in case we're + * going to have to wait for the connection to come live. It's our + * connection, so we want first dibs on the channel slots. We would + * normally have to take channel_lock but we do this before anyone else + * can see the connection. + */ + list_add_tail(&call->chan_wait_link, &candidate->waiting_calls); + if (cp->exclusive) { - /* Assign the call on an exclusive connection to channel 0 and - * don't add the connection to the endpoint's shareable conn - * lookup tree. - */ - _debug("exclusive chan 0"); - conn = candidate; - atomic_set(&conn->avail_chans, RXRPC_MAXCALLS - 1); - spin_lock(&conn->channel_lock); - chan = 0; - goto found_channel; + call->conn = candidate; + _leave(" = 0 [exclusive %d]", candidate->debug_id); + return 0; } - /* We need to redo the search before attempting to add a new connection - * lest we race with someone else adding a conflicting instance. + /* Publish the new connection for userspace to find. We need to redo + * the search before doing this lest we race with someone else adding a + * conflicting instance. */ _debug("search 2"); spin_lock(&local->client_conns_lock); @@ -250,123 +381,672 @@ int rxrpc_connect_call(struct rxrpc_call *call, parent = *pp; conn = rb_entry(parent, struct rxrpc_connection, client_node); +#define cmp(X) ((long)conn->params.X - (long)candidate->params.X) diff = (cmp(peer) ?: cmp(key) ?: cmp(security_level)); - if (diff < 0) +#undef cmp + if (diff < 0) { pp = &(*pp)->rb_left; - else if (diff > 0) + } else if (diff > 0) { pp = &(*pp)->rb_right; - else - goto found_extant_conn; + } else { + if (rxrpc_may_reuse_conn(conn) && + rxrpc_get_connection_maybe(conn)) + goto found_extant_conn; + /* The old connection is from an outdated epoch. */ + _debug("replace conn"); + clear_bit(RXRPC_CONN_IN_CLIENT_CONNS, &conn->flags); + rb_replace_node(&conn->client_node, + &candidate->client_node, + &local->client_conns); + goto candidate_published; + } } - /* The second search also failed; simply add the new connection with - * the new call in channel 0. Note that we need to take the channel - * lock before dropping the client conn lock. - */ _debug("new conn"); - set_bit(RXRPC_CONN_IN_CLIENT_CONNS, &candidate->flags); rb_link_node(&candidate->client_node, parent, pp); rb_insert_color(&candidate->client_node, &local->client_conns); -attached: - conn = candidate; - candidate = NULL; - atomic_set(&conn->avail_chans, RXRPC_MAXCALLS - 1); - spin_lock(&conn->channel_lock); +candidate_published: + set_bit(RXRPC_CONN_IN_CLIENT_CONNS, &candidate->flags); + call->conn = candidate; spin_unlock(&local->client_conns_lock); - chan = 0; - -found_channel: - _debug("found chan"); - call->conn = conn; - call->peer = rxrpc_get_peer(conn->params.peer); - call->cid = conn->proto.cid | chan; - call->call_id = ++conn->channels[chan].call_counter; - conn->channels[chan].call_id = call->call_id; - rcu_assign_pointer(conn->channels[chan].call, call); - - _net("CONNECT call %d on conn %d", call->debug_id, conn->debug_id); - - spin_unlock(&conn->channel_lock); - rxrpc_put_peer(cp->peer); - cp->peer = NULL; - _leave(" = %p {u=%d}", conn, atomic_read(&conn->usage)); + _leave(" = 0 [new %d]", candidate->debug_id); return 0; - /* We found a potentially suitable connection already in existence. If - * we can reuse it (ie. its usage count hasn't been reduced to 0 by the - * reaper), discard any candidate we may have allocated, and try to get - * a channel on this one, otherwise we have to replace it. + /* We come here if we found a suitable connection already in existence. + * Discard any candidate we may have allocated, and try to get a + * channel on this one. */ found_extant_conn: _debug("found conn"); - if (!rxrpc_get_connection_maybe(conn)) { - set_bit(RXRPC_CONN_IN_CLIENT_CONNS, &candidate->flags); - rb_replace_node(&conn->client_node, - &candidate->client_node, - &local->client_conns); - clear_bit(RXRPC_CONN_IN_CLIENT_CONNS, &conn->flags); - goto attached; - } - spin_unlock(&local->client_conns_lock); rxrpc_put_connection(candidate); + candidate = NULL; - if (!atomic_add_unless(&conn->avail_chans, -1, 0)) { - if (!gfpflags_allow_blocking(gfp)) { - rxrpc_put_connection(conn); - _leave(" = -EAGAIN"); - return -EAGAIN; - } - - add_wait_queue(&conn->channel_wq, &myself); - for (;;) { - set_current_state(TASK_INTERRUPTIBLE); - if (atomic_add_unless(&conn->avail_chans, -1, 0)) - break; - if (signal_pending(current)) - goto interrupted; - schedule(); - } - remove_wait_queue(&conn->channel_wq, &myself); - __set_current_state(TASK_RUNNING); - } - - /* The connection allegedly now has a free channel and we can now - * attach the call to it. - */ spin_lock(&conn->channel_lock); + call->conn = conn; + list_add(&call->chan_wait_link, &conn->waiting_calls); + spin_unlock(&conn->channel_lock); + _leave(" = 0 [extant %d]", conn->debug_id); + return 0; - for (chan = 0; chan < RXRPC_MAXCALLS; chan++) - if (!conn->channels[chan].call) - goto found_channel; - BUG(); - -interrupted: - remove_wait_queue(&conn->channel_wq, &myself); - __set_current_state(TASK_RUNNING); - rxrpc_put_connection(conn); +error_peer: rxrpc_put_peer(cp->peer); cp->peer = NULL; - _leave(" = -ERESTARTSYS"); - return -ERESTARTSYS; +error: + _leave(" = %d", ret); + return ret; } /* - * Remove a client connection from the local endpoint's tree, thereby removing - * it as a target for reuse for new client calls. + * Activate a connection. */ -void rxrpc_unpublish_client_conn(struct rxrpc_connection *conn) +static void rxrpc_activate_conn(struct rxrpc_connection *conn) { - struct rxrpc_local *local = conn->params.local; + conn->cache_state = RXRPC_CONN_CLIENT_ACTIVE; + rxrpc_nr_active_client_conns++; + list_move_tail(&conn->cache_link, &rxrpc_active_client_conns); +} - spin_lock(&local->client_conns_lock); - if (test_and_clear_bit(RXRPC_CONN_IN_CLIENT_CONNS, &conn->flags)) - rb_erase(&conn->client_node, &local->client_conns); - spin_unlock(&local->client_conns_lock); +/* + * Attempt to animate a connection for a new call. + * + * If it's not exclusive, the connection is in the endpoint tree, and we're in + * the conn's list of those waiting to grab a channel. There is, however, a + * limit on the number of live connections allowed at any one time, so we may + * have to wait for capacity to become available. + * + * Note that a connection on the waiting queue might *also* have active + * channels if it has been culled to make space and then re-requested by a new + * call. + */ +static void rxrpc_animate_client_conn(struct rxrpc_connection *conn) +{ + unsigned int nr_conns; + + _enter("%d,%d", conn->debug_id, conn->cache_state); + + if (conn->cache_state == RXRPC_CONN_CLIENT_ACTIVE) + goto out; + + spin_lock(&rxrpc_client_conn_cache_lock); + + nr_conns = rxrpc_nr_client_conns; + if (!test_and_set_bit(RXRPC_CONN_COUNTED, &conn->flags)) + rxrpc_nr_client_conns = nr_conns + 1; + + switch (conn->cache_state) { + case RXRPC_CONN_CLIENT_ACTIVE: + case RXRPC_CONN_CLIENT_WAITING: + break; + + case RXRPC_CONN_CLIENT_INACTIVE: + case RXRPC_CONN_CLIENT_CULLED: + case RXRPC_CONN_CLIENT_IDLE: + if (nr_conns >= rxrpc_max_client_connections) + goto wait_for_capacity; + goto activate_conn; + + default: + BUG(); + } + +out_unlock: + spin_unlock(&rxrpc_client_conn_cache_lock); +out: + _leave(" [%d]", conn->cache_state); + return; + +activate_conn: + _debug("activate"); + rxrpc_activate_conn(conn); + goto out_unlock; + +wait_for_capacity: + _debug("wait"); + conn->cache_state = RXRPC_CONN_CLIENT_WAITING; + list_move_tail(&conn->cache_link, &rxrpc_waiting_client_conns); + goto out_unlock; +} + +/* + * Deactivate a channel. + */ +static void rxrpc_deactivate_one_channel(struct rxrpc_connection *conn, + unsigned int channel) +{ + struct rxrpc_channel *chan = &conn->channels[channel]; + + rcu_assign_pointer(chan->call, NULL); + conn->active_chans &= ~(1 << channel); +} + +/* + * Assign a channel to the call at the front of the queue and wake the call up. + * We don't increment the callNumber counter until this number has been exposed + * to the world. + */ +static void rxrpc_activate_one_channel(struct rxrpc_connection *conn, + unsigned int channel) +{ + struct rxrpc_channel *chan = &conn->channels[channel]; + struct rxrpc_call *call = list_entry(conn->waiting_calls.next, + struct rxrpc_call, chan_wait_link); + u32 call_id = chan->call_counter + 1; + + list_del_init(&call->chan_wait_link); + conn->active_chans |= 1 << channel; + call->peer = rxrpc_get_peer(conn->params.peer); + call->cid = conn->proto.cid | channel; + call->call_id = call_id; + + _net("CONNECT call %08x:%08x as call %d on conn %d", + call->cid, call->call_id, call->debug_id, conn->debug_id); + + /* Paired with the read barrier in rxrpc_wait_for_channel(). This + * orders cid and epoch in the connection wrt to call_id without the + * need to take the channel_lock. + * + * We provisionally assign a callNumber at this point, but we don't + * confirm it until the call is about to be exposed. + * + * TODO: Pair with a barrier in the data_ready handler when that looks + * at the call ID through a connection channel. + */ + smp_wmb(); + chan->call_id = call_id; + rcu_assign_pointer(chan->call, call); + wake_up(&call->waitq); +} + +/* + * Assign channels and callNumbers to waiting calls. + */ +static void rxrpc_activate_channels(struct rxrpc_connection *conn) +{ + unsigned char mask; + + _enter("%d", conn->debug_id); + + if (conn->cache_state != RXRPC_CONN_CLIENT_ACTIVE || + conn->active_chans == RXRPC_ACTIVE_CHANS_MASK) + return; + + spin_lock(&conn->channel_lock); + + while (!list_empty(&conn->waiting_calls) && + (mask = ~conn->active_chans, + mask &= RXRPC_ACTIVE_CHANS_MASK, + mask != 0)) + rxrpc_activate_one_channel(conn, __ffs(mask)); + + spin_unlock(&conn->channel_lock); + _leave(""); +} + +/* + * Wait for a callNumber and a channel to be granted to a call. + */ +static int rxrpc_wait_for_channel(struct rxrpc_call *call, gfp_t gfp) +{ + int ret = 0; + + _enter("%d", call->debug_id); + + if (!call->call_id) { + DECLARE_WAITQUEUE(myself, current); + + if (!gfpflags_allow_blocking(gfp)) { + ret = -EAGAIN; + goto out; + } + + add_wait_queue_exclusive(&call->waitq, &myself); + for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + if (call->call_id) + break; + if (signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + schedule(); + } + remove_wait_queue(&call->waitq, &myself); + __set_current_state(TASK_RUNNING); + } + + /* Paired with the write barrier in rxrpc_activate_one_channel(). */ + smp_rmb(); + +out: + _leave(" = %d", ret); + return ret; +} + +/* + * find a connection for a call + * - called in process context with IRQs enabled + */ +int rxrpc_connect_call(struct rxrpc_call *call, + struct rxrpc_conn_parameters *cp, + struct sockaddr_rxrpc *srx, + gfp_t gfp) +{ + int ret; + + _enter("{%d,%lx},", call->debug_id, call->user_call_ID); + + rxrpc_discard_expired_client_conns(NULL); + rxrpc_cull_active_client_conns(); + + ret = rxrpc_get_client_conn(call, cp, srx, gfp); + if (ret < 0) + return ret; + + rxrpc_animate_client_conn(call->conn); + rxrpc_activate_channels(call->conn); + + ret = rxrpc_wait_for_channel(call, gfp); + if (ret < 0) + rxrpc_disconnect_client_call(call); + + _leave(" = %d", ret); + return ret; +} + +/* + * Note that a connection is about to be exposed to the world. Once it is + * exposed, we maintain an extra ref on it that stops it from being summarily + * discarded before it's (a) had a chance to deal with retransmission and (b) + * had a chance at re-use (the per-connection security negotiation is + * expensive). + */ +static void rxrpc_expose_client_conn(struct rxrpc_connection *conn) +{ + if (!test_and_set_bit(RXRPC_CONN_EXPOSED, &conn->flags)) + rxrpc_get_connection(conn); +} + +/* + * Note that a call, and thus a connection, is about to be exposed to the + * world. + */ +void rxrpc_expose_client_call(struct rxrpc_call *call) +{ + struct rxrpc_connection *conn = call->conn; + struct rxrpc_channel *chan = + &conn->channels[call->cid & RXRPC_CHANNELMASK]; + + if (!test_and_set_bit(RXRPC_CALL_EXPOSED, &call->flags)) { + /* Mark the call ID as being used. If the callNumber counter + * exceeds ~2 billion, we kill the connection after its + * outstanding calls have finished so that the counter doesn't + * wrap. + */ + chan->call_counter++; + if (chan->call_counter >= INT_MAX) + set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); + rxrpc_expose_client_conn(conn); + } +} + +/* + * Disconnect a client call. + */ +void rxrpc_disconnect_client_call(struct rxrpc_call *call) +{ + unsigned int channel = call->cid & RXRPC_CHANNELMASK; + struct rxrpc_connection *conn = call->conn; + struct rxrpc_channel *chan = &conn->channels[channel]; + + call->conn = NULL; + + spin_lock(&conn->channel_lock); + + /* Calls that have never actually been assigned a channel can simply be + * discarded. If the conn didn't get used either, it will follow + * immediately unless someone else grabs it in the meantime. + */ + if (!list_empty(&call->chan_wait_link)) { + _debug("call is waiting"); + ASSERTCMP(call->call_id, ==, 0); + ASSERT(!test_bit(RXRPC_CALL_EXPOSED, &call->flags)); + list_del_init(&call->chan_wait_link); + + /* We must deactivate or idle the connection if it's now + * waiting for nothing. + */ + spin_lock(&rxrpc_client_conn_cache_lock); + if (conn->cache_state == RXRPC_CONN_CLIENT_WAITING && + list_empty(&conn->waiting_calls) && + !conn->active_chans) + goto idle_connection; + goto out; + } + + ASSERTCMP(rcu_access_pointer(chan->call), ==, call); + ASSERTCMP(atomic_read(&conn->usage), >=, 2); + + /* If a client call was exposed to the world, we save the result for + * retransmission. + * + * We use a barrier here so that the call number and abort code can be + * read without needing to take a lock. + * + * TODO: Make the incoming packet handler check this and handle + * terminal retransmission without requiring access to the call. + */ + if (test_bit(RXRPC_CALL_EXPOSED, &call->flags)) { + _debug("exposed %u,%u", call->call_id, call->local_abort); + __rxrpc_disconnect_call(conn, call); + } + + /* See if we can pass the channel directly to another call. */ + if (conn->cache_state == RXRPC_CONN_CLIENT_ACTIVE && + !list_empty(&conn->waiting_calls)) { + _debug("pass chan"); + rxrpc_activate_one_channel(conn, channel); + goto out_2; + } + + /* Things are more complex and we need the cache lock. We might be + * able to simply idle the conn or it might now be lurking on the wait + * list. It might even get moved back to the active list whilst we're + * waiting for the lock. + */ + spin_lock(&rxrpc_client_conn_cache_lock); + + switch (conn->cache_state) { + case RXRPC_CONN_CLIENT_ACTIVE: + if (list_empty(&conn->waiting_calls)) { + rxrpc_deactivate_one_channel(conn, channel); + if (!conn->active_chans) { + rxrpc_nr_active_client_conns--; + goto idle_connection; + } + goto out; + } + + _debug("pass chan 2"); + rxrpc_activate_one_channel(conn, channel); + goto out; + + case RXRPC_CONN_CLIENT_CULLED: + rxrpc_deactivate_one_channel(conn, channel); + ASSERT(list_empty(&conn->waiting_calls)); + if (!conn->active_chans) + goto idle_connection; + goto out; + + case RXRPC_CONN_CLIENT_WAITING: + rxrpc_deactivate_one_channel(conn, channel); + goto out; + + default: + BUG(); + } + +out: + spin_unlock(&rxrpc_client_conn_cache_lock); +out_2: + spin_unlock(&conn->channel_lock); + rxrpc_put_connection(conn); + _leave(""); + return; + +idle_connection: + /* As no channels remain active, the connection gets deactivated + * immediately or moved to the idle list for a short while. + */ + if (test_bit(RXRPC_CONN_EXPOSED, &conn->flags)) { + _debug("make idle"); + conn->idle_timestamp = jiffies; + conn->cache_state = RXRPC_CONN_CLIENT_IDLE; + list_move_tail(&conn->cache_link, &rxrpc_idle_client_conns); + if (rxrpc_idle_client_conns.next == &conn->cache_link && + !rxrpc_kill_all_client_conns) + queue_delayed_work(rxrpc_workqueue, + &rxrpc_client_conn_reap, + rxrpc_conn_idle_client_expiry); + } else { + _debug("make inactive"); + conn->cache_state = RXRPC_CONN_CLIENT_INACTIVE; + list_del_init(&conn->cache_link); + } + goto out; +} + +/* + * Clean up a dead client connection. + */ +static struct rxrpc_connection * +rxrpc_put_one_client_conn(struct rxrpc_connection *conn) +{ + struct rxrpc_connection *next; + struct rxrpc_local *local = conn->params.local; + unsigned int nr_conns; + + if (test_bit(RXRPC_CONN_IN_CLIENT_CONNS, &conn->flags)) { + spin_lock(&local->client_conns_lock); + if (test_and_clear_bit(RXRPC_CONN_IN_CLIENT_CONNS, + &conn->flags)) + rb_erase(&conn->client_node, &local->client_conns); + spin_unlock(&local->client_conns_lock); + } rxrpc_put_client_connection_id(conn); + + ASSERTCMP(conn->cache_state, ==, RXRPC_CONN_CLIENT_INACTIVE); + + if (!test_bit(RXRPC_CONN_COUNTED, &conn->flags)) + return NULL; + + spin_lock(&rxrpc_client_conn_cache_lock); + nr_conns = --rxrpc_nr_client_conns; + + next = NULL; + if (nr_conns < rxrpc_max_client_connections && + !list_empty(&rxrpc_waiting_client_conns)) { + next = list_entry(rxrpc_waiting_client_conns.next, + struct rxrpc_connection, cache_link); + rxrpc_get_connection(next); + rxrpc_activate_conn(next); + } + + spin_unlock(&rxrpc_client_conn_cache_lock); + rxrpc_kill_connection(conn); + + if (next) + rxrpc_activate_channels(next); + + /* We need to get rid of the temporary ref we took upon next, but we + * can't call rxrpc_put_connection() recursively. + */ + return next; +} + +/* + * Clean up a dead client connections. + */ +void rxrpc_put_client_conn(struct rxrpc_connection *conn) +{ + struct rxrpc_connection *next; + + do { + _enter("%p{u=%d,d=%d}", + conn, atomic_read(&conn->usage), conn->debug_id); + + next = rxrpc_put_one_client_conn(conn); + + if (!next) + break; + conn = next; + } while (atomic_dec_and_test(&conn->usage)); + + _leave(""); +} + +/* + * Kill the longest-active client connections to make room for new ones. + */ +static void rxrpc_cull_active_client_conns(void) +{ + struct rxrpc_connection *conn; + unsigned int nr_conns = rxrpc_nr_client_conns; + unsigned int nr_active, limit; + + _enter(""); + + ASSERTCMP(nr_conns, >=, 0); + if (nr_conns < rxrpc_max_client_connections) { + _leave(" [ok]"); + return; + } + limit = rxrpc_reap_client_connections; + + spin_lock(&rxrpc_client_conn_cache_lock); + nr_active = rxrpc_nr_active_client_conns; + + while (nr_active > limit) { + ASSERT(!list_empty(&rxrpc_active_client_conns)); + conn = list_entry(rxrpc_active_client_conns.next, + struct rxrpc_connection, cache_link); + ASSERTCMP(conn->cache_state, ==, RXRPC_CONN_CLIENT_ACTIVE); + + if (list_empty(&conn->waiting_calls)) { + conn->cache_state = RXRPC_CONN_CLIENT_CULLED; + list_del_init(&conn->cache_link); + } else { + conn->cache_state = RXRPC_CONN_CLIENT_WAITING; + list_move_tail(&conn->cache_link, + &rxrpc_waiting_client_conns); + } + + nr_active--; + } + + rxrpc_nr_active_client_conns = nr_active; + spin_unlock(&rxrpc_client_conn_cache_lock); + ASSERTCMP(nr_active, >=, 0); + _leave(" [culled]"); +} + +/* + * Discard expired client connections from the idle list. Each conn in the + * idle list has been exposed and holds an extra ref because of that. + * + * This may be called from conn setup or from a work item so cannot be + * considered non-reentrant. + */ +static void rxrpc_discard_expired_client_conns(struct work_struct *work) +{ + struct rxrpc_connection *conn; + unsigned long expiry, conn_expires_at, now; + unsigned int nr_conns; + bool did_discard = false; + + _enter("%c", work ? 'w' : 'n'); + + if (list_empty(&rxrpc_idle_client_conns)) { + _leave(" [empty]"); + return; + } + + /* Don't double up on the discarding */ + if (!spin_trylock(&rxrpc_client_conn_discard_mutex)) { + _leave(" [already]"); + return; + } + + /* We keep an estimate of what the number of conns ought to be after + * we've discarded some so that we don't overdo the discarding. + */ + nr_conns = rxrpc_nr_client_conns; + +next: + spin_lock(&rxrpc_client_conn_cache_lock); + + if (list_empty(&rxrpc_idle_client_conns)) + goto out; + + conn = list_entry(rxrpc_idle_client_conns.next, + struct rxrpc_connection, cache_link); + ASSERT(test_bit(RXRPC_CONN_EXPOSED, &conn->flags)); + + if (!rxrpc_kill_all_client_conns) { + /* If the number of connections is over the reap limit, we + * expedite discard by reducing the expiry timeout. We must, + * however, have at least a short grace period to be able to do + * final-ACK or ABORT retransmission. + */ + expiry = rxrpc_conn_idle_client_expiry; + if (nr_conns > rxrpc_reap_client_connections) + expiry = rxrpc_conn_idle_client_fast_expiry; + + conn_expires_at = conn->idle_timestamp + expiry; + + now = READ_ONCE(jiffies); + if (time_after(conn_expires_at, now)) + goto not_yet_expired; + } + + _debug("discard conn %d", conn->debug_id); + if (!test_and_clear_bit(RXRPC_CONN_EXPOSED, &conn->flags)) + BUG(); + conn->cache_state = RXRPC_CONN_CLIENT_INACTIVE; + list_del_init(&conn->cache_link); + + spin_unlock(&rxrpc_client_conn_cache_lock); + + /* When we cleared the EXPOSED flag, we took on responsibility for the + * reference that that had on the usage count. We deal with that here. + * If someone re-sets the flag and re-gets the ref, that's fine. + */ + rxrpc_put_connection(conn); + did_discard = true; + nr_conns--; + goto next; + +not_yet_expired: + /* The connection at the front of the queue hasn't yet expired, so + * schedule the work item for that point if we discarded something. + * + * We don't worry if the work item is already scheduled - it can look + * after rescheduling itself at a later time. We could cancel it, but + * then things get messier. + */ + _debug("not yet"); + if (!rxrpc_kill_all_client_conns) + queue_delayed_work(rxrpc_workqueue, + &rxrpc_client_conn_reap, + conn_expires_at - now); + +out: + spin_unlock(&rxrpc_client_conn_cache_lock); + spin_unlock(&rxrpc_client_conn_discard_mutex); + _leave(""); +} + +/* + * Preemptively destroy all the client connection records rather than waiting + * for them to time out + */ +void __exit rxrpc_destroy_all_client_connections(void) +{ + _enter(""); + + spin_lock(&rxrpc_client_conn_cache_lock); + rxrpc_kill_all_client_conns = true; + spin_unlock(&rxrpc_client_conn_cache_lock); + + cancel_delayed_work(&rxrpc_client_conn_reap); + + if (!queue_delayed_work(rxrpc_workqueue, &rxrpc_client_conn_reap, 0)) + _debug("destroy: queue failed"); + + _leave(""); } diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index afc2d83d5995..5b45b6c367e7 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -1,6 +1,6 @@ -/* RxRPC virtual connection handler +/* RxRPC virtual connection handler, common bits. * - * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2007, 2016 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -15,8 +15,6 @@ #include #include #include -#include -#include #include "ar-internal.h" /* @@ -31,6 +29,8 @@ LIST_HEAD(rxrpc_connection_proc_list); DEFINE_RWLOCK(rxrpc_connection_lock); static DECLARE_DELAYED_WORK(rxrpc_connection_reap, rxrpc_connection_reaper); +static void rxrpc_destroy_connection(struct rcu_head *); + /* * allocate a new connection */ @@ -42,20 +42,16 @@ struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp) conn = kzalloc(sizeof(struct rxrpc_connection), gfp); if (conn) { + INIT_LIST_HEAD(&conn->cache_link); spin_lock_init(&conn->channel_lock); - init_waitqueue_head(&conn->channel_wq); + INIT_LIST_HEAD(&conn->waiting_calls); INIT_WORK(&conn->processor, &rxrpc_process_connection); INIT_LIST_HEAD(&conn->proc_link); INIT_LIST_HEAD(&conn->link); skb_queue_head_init(&conn->rx_queue); conn->security = &rxrpc_no_security; spin_lock_init(&conn->state_lock); - /* We maintain an extra ref on the connection whilst it is - * on the rxrpc_connections list. - */ - atomic_set(&conn->usage, 2); conn->debug_id = atomic_inc_return(&rxrpc_debug_id); - atomic_set(&conn->avail_chans, RXRPC_MAXCALLS); conn->size_align = 4; conn->header_size = sizeof(struct rxrpc_wire_header); conn->idle_timestamp = jiffies; @@ -156,9 +152,9 @@ not_found: * terminates. The caller must hold the channel_lock and must release the * call's ref on the connection. */ -void __rxrpc_disconnect_call(struct rxrpc_call *call) +void __rxrpc_disconnect_call(struct rxrpc_connection *conn, + struct rxrpc_call *call) { - struct rxrpc_connection *conn = call->conn; struct rxrpc_channel *chan = &conn->channels[call->cid & RXRPC_CHANNELMASK]; @@ -182,8 +178,6 @@ void __rxrpc_disconnect_call(struct rxrpc_call *call) chan->call_id = chan->call_counter; rcu_assign_pointer(chan->call, NULL); - atomic_inc(&conn->avail_chans); - wake_up(&conn->channel_wq); } _leave(""); @@ -197,8 +191,11 @@ void rxrpc_disconnect_call(struct rxrpc_call *call) { struct rxrpc_connection *conn = call->conn; + if (rxrpc_is_client_call(call)) + return rxrpc_disconnect_client_call(call); + spin_lock(&conn->channel_lock); - __rxrpc_disconnect_call(call); + __rxrpc_disconnect_call(conn, call); spin_unlock(&conn->channel_lock); call->conn = NULL; @@ -206,6 +203,34 @@ void rxrpc_disconnect_call(struct rxrpc_call *call) rxrpc_put_connection(conn); } +/* + * Kill off a connection. + */ +void rxrpc_kill_connection(struct rxrpc_connection *conn) +{ + ASSERT(!rcu_access_pointer(conn->channels[0].call) && + !rcu_access_pointer(conn->channels[1].call) && + !rcu_access_pointer(conn->channels[2].call) && + !rcu_access_pointer(conn->channels[3].call)); + ASSERT(list_empty(&conn->cache_link)); + + write_lock(&rxrpc_connection_lock); + list_del_init(&conn->proc_link); + write_unlock(&rxrpc_connection_lock); + + /* Drain the Rx queue. Note that even though we've unpublished, an + * incoming packet could still be being added to our Rx queue, so we + * will need to drain it again in the RCU cleanup handler. + */ + rxrpc_purge_queue(&conn->rx_queue); + + /* Leave final destruction to RCU. The connection processor work item + * must carry a ref on the connection to prevent us getting here whilst + * it is queued or running. + */ + call_rcu(&conn->rcu, rxrpc_destroy_connection); +} + /* * release a virtual connection */ @@ -241,7 +266,7 @@ static void rxrpc_destroy_connection(struct rcu_head *rcu) } /* - * reap dead connections + * reap dead service connections */ static void rxrpc_connection_reaper(struct work_struct *work) { @@ -280,12 +305,11 @@ static void rxrpc_connection_reaper(struct work_struct *work) continue; if (rxrpc_conn_is_client(conn)) - rxrpc_unpublish_client_conn(conn); + BUG(); else rxrpc_unpublish_service_conn(conn); list_move_tail(&conn->link, &graveyard); - list_del_init(&conn->proc_link); } write_unlock(&rxrpc_connection_lock); @@ -302,16 +326,15 @@ static void rxrpc_connection_reaper(struct work_struct *work) list_del_init(&conn->link); ASSERTCMP(atomic_read(&conn->usage), ==, 0); - skb_queue_purge(&conn->rx_queue); - call_rcu(&conn->rcu, rxrpc_destroy_connection); + rxrpc_kill_connection(conn); } _leave(""); } /* - * preemptively destroy all the connection records rather than waiting for them - * to time out + * preemptively destroy all the service connection records rather than + * waiting for them to time out */ void __exit rxrpc_destroy_all_connections(void) { @@ -320,6 +343,8 @@ void __exit rxrpc_destroy_all_connections(void) _enter(""); + rxrpc_destroy_all_client_connections(); + rxrpc_connection_expiry = 0; cancel_delayed_work(&rxrpc_connection_reap); rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0); @@ -334,6 +359,8 @@ void __exit rxrpc_destroy_all_connections(void) write_unlock(&rxrpc_connection_lock); BUG_ON(leak); + ASSERT(list_empty(&rxrpc_connection_proc_list)); + /* Make sure the local and peer records pinned by any dying connections * are released. */ diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c index 6ad6ae926cc3..316a92107fee 100644 --- a/net/rxrpc/conn_service.c +++ b/net/rxrpc/conn_service.c @@ -185,6 +185,11 @@ struct rxrpc_connection *rxrpc_incoming_connection(struct rxrpc_local *local, rxrpc_get_local(local); + /* We maintain an extra ref on the connection whilst it is on + * the rxrpc_connections list. + */ + atomic_set(&conn->usage, 2); + write_lock(&rxrpc_connection_lock); list_add_tail(&conn->link, &rxrpc_connections); list_add_tail(&conn->proc_link, &rxrpc_connection_proc_list); diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index e3a08d542fb7..8a9917cba6fe 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -390,7 +390,7 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx, call->acks_winsz), *timeo); - add_wait_queue(&call->tx_waitq, &myself); + add_wait_queue(&call->waitq, &myself); for (;;) { set_current_state(TASK_INTERRUPTIBLE); @@ -408,7 +408,7 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx, lock_sock(&rx->sk); } - remove_wait_queue(&call->tx_waitq, &myself); + remove_wait_queue(&call->waitq, &myself); set_current_state(TASK_RUNNING); _leave(" = %d", ret); return ret; @@ -482,6 +482,8 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, if (try_to_del_timer_sync(&call->ack_timer) >= 0) { /* the packet may be freed by rxrpc_process_call() before this * returns */ + if (rxrpc_is_client_call(call)) + rxrpc_expose_client_call(call); ret = rxrpc_send_data_packet(call->conn, skb); _net("sent skb %p", skb); } else { diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c index 03ad08774d4e..dc380af8a81e 100644 --- a/net/rxrpc/sysctl.c +++ b/net/rxrpc/sysctl.c @@ -62,6 +62,22 @@ static struct ctl_table rxrpc_sysctl_table[] = { .proc_handler = proc_dointvec_ms_jiffies, .extra1 = (void *)&one, }, + { + .procname = "idle_conn_expiry", + .data = &rxrpc_conn_idle_client_expiry, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_ms_jiffies, + .extra1 = (void *)&one, + }, + { + .procname = "idle_conn_fast_expiry", + .data = &rxrpc_conn_idle_client_fast_expiry, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_ms_jiffies, + .extra1 = (void *)&one, + }, /* Values measured in seconds but used in jiffies */ { @@ -81,17 +97,24 @@ static struct ctl_table rxrpc_sysctl_table[] = { .extra1 = (void *)&one, }, - /* Values measured in seconds */ + /* Non-time values */ { - .procname = "connection_expiry", - .data = &rxrpc_connection_expiry, + .procname = "max_client_conns", + .data = &rxrpc_max_client_connections, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = (void *)&rxrpc_reap_client_connections, + }, + { + .procname = "reap_client_conns", + .data = &rxrpc_reap_client_connections, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = (void *)&one, + .extra2 = (void *)&rxrpc_max_client_connections, }, - - /* Non-time values */ { .procname = "max_backlog", .data = &rxrpc_max_backlog, From 1803e0fb7ef9534154e6a1abf77bc2ca352f72e1 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 24 Aug 2016 12:00:23 +0200 Subject: [PATCH 0467/1715] mlxsw: spectrum: Limit number of FDB records per learning session Up until now a learning session ended whenever the number of queried records was zero. This turned out to be problematic in situations where a large number of MACs (48K) had to be processed by the switch driver, as RTNL mutex is held during the learning session. Instead, limit the number of FDB records that can be processed in a session to 64. This means that every time the device is queried for learning notifications (currently, every 100ms), up to 64 records will be processed by the switch driver. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 7 ++++++ .../mellanox/mlxsw/spectrum_switchdev.c | 22 +++++++++---------- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 1721098eef13..b83d0a7a0b49 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -591,6 +591,12 @@ static const struct mlxsw_reg_info mlxsw_reg_sfn = { */ MLXSW_ITEM32(reg, sfn, swid, 0x00, 24, 8); +/* reg_sfn_end + * Forces the current session to end. + * Access: OP + */ +MLXSW_ITEM32(reg, sfn, end, 0x04, 20, 1); + /* reg_sfn_num_rec * Request: Number of learned notifications and aged-out notification * records requested. @@ -605,6 +611,7 @@ static inline void mlxsw_reg_sfn_pack(char *payload) { MLXSW_REG_ZERO(sfn, payload); mlxsw_reg_sfn_swid_set(payload, 0); + mlxsw_reg_sfn_end_set(payload, 1); mlxsw_reg_sfn_num_rec_set(payload, MLXSW_REG_SFN_REC_MAX_COUNT); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index d1b59cdfacc1..02de24080e79 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1496,20 +1496,18 @@ static void mlxsw_sp_fdb_notify_work(struct work_struct *work) mlxsw_sp = container_of(work, struct mlxsw_sp, fdb_notify.dw.work); rtnl_lock(); - do { - mlxsw_reg_sfn_pack(sfn_pl); - err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(sfn), sfn_pl); - if (err) { - dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to get FDB notifications\n"); - break; - } - num_rec = mlxsw_reg_sfn_num_rec_get(sfn_pl); - for (i = 0; i < num_rec; i++) - mlxsw_sp_fdb_notify_rec_process(mlxsw_sp, sfn_pl, i); + mlxsw_reg_sfn_pack(sfn_pl); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(sfn), sfn_pl); + if (err) { + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to get FDB notifications\n"); + goto out; + } + num_rec = mlxsw_reg_sfn_num_rec_get(sfn_pl); + for (i = 0; i < num_rec; i++) + mlxsw_sp_fdb_notify_rec_process(mlxsw_sp, sfn_pl, i); - } while (num_rec); +out: rtnl_unlock(); - kfree(sfn_pl); mlxsw_sp_fdb_notify_work_schedule(mlxsw_sp); } From f7a8f6cec38a549ffcc559a9eb14cab539100d97 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 24 Aug 2016 12:00:24 +0200 Subject: [PATCH 0468/1715] mlxsw: spectrum: Make VLAN deletion function symmetric Commit 05978481e77e ("mlxsw: spectrum: Create PVID vPort before registering netdevice") removed __mlxsw_sp_port_vlans_del() from the init sequence of the driver, which forced it to be non-symmetric with regards to __mlxsw_sp_port_vlans_add(). Make both functions symmetric as the constraint no longer exists. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_switchdev.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 02de24080e79..2f3831058bfe 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1008,14 +1008,6 @@ static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port, if (!mlxsw_sp_port->bridged) return -EINVAL; - err = __mlxsw_sp_port_vlans_set(mlxsw_sp_port, vid_begin, vid_end, - false, false); - if (err) { - netdev_err(dev, "Unable to del VIDs %d-%d\n", vid_begin, - vid_end); - return err; - } - pvid = mlxsw_sp_port->pvid; if (pvid >= vid_begin && pvid <= vid_end) { err = mlxsw_sp_port_pvid_set(mlxsw_sp_port, 0); @@ -1025,6 +1017,14 @@ static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port, } } + err = __mlxsw_sp_port_vlans_set(mlxsw_sp_port, vid_begin, vid_end, + false, false); + if (err) { + netdev_err(dev, "Unable to del VIDs %d-%d\n", vid_begin, + vid_end); + return err; + } + mlxsw_sp_port_fid_leave(mlxsw_sp_port, vid_begin, vid_end); /* Changing activity bits only if HW operation succeded */ From 640be7b7177dbe026cf7ae387b21f6917f0fc44f Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 24 Aug 2016 12:00:25 +0200 Subject: [PATCH 0469/1715] mlxsw: spectrum: Don't abort on first error when removing VLANs When removing VLANs from the VLAN-aware bridge we shouldn't abort on the first error, as we'll otherwise have resources that will never be freed. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../mellanox/mlxsw/spectrum_switchdev.c | 20 ++++--------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 2f3831058bfe..e5cf8a5da5d0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1001,29 +1001,17 @@ static int mlxsw_sp_port_obj_add(struct net_device *dev, static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin, u16 vid_end) { - struct net_device *dev = mlxsw_sp_port->dev; u16 vid, pvid; - int err; if (!mlxsw_sp_port->bridged) return -EINVAL; pvid = mlxsw_sp_port->pvid; - if (pvid >= vid_begin && pvid <= vid_end) { - err = mlxsw_sp_port_pvid_set(mlxsw_sp_port, 0); - if (err) { - netdev_err(dev, "Unable to del PVID %d\n", pvid); - return err; - } - } + if (pvid >= vid_begin && pvid <= vid_end) + mlxsw_sp_port_pvid_set(mlxsw_sp_port, 0); - err = __mlxsw_sp_port_vlans_set(mlxsw_sp_port, vid_begin, vid_end, - false, false); - if (err) { - netdev_err(dev, "Unable to del VIDs %d-%d\n", vid_begin, - vid_end); - return err; - } + __mlxsw_sp_port_vlans_set(mlxsw_sp_port, vid_begin, vid_end, false, + false); mlxsw_sp_port_fid_leave(mlxsw_sp_port, vid_begin, vid_end); From 584d73df06253f206e31ff897a475283167bf0f0 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 24 Aug 2016 12:00:26 +0200 Subject: [PATCH 0470/1715] mlxsw: spectrum: Configure learning for VLAN-aware bridge port We are going to prevent the device from generating learning notifications for a port that was configured with learning disabled. Since learning configuration is done per {Port, VID} we need to apply the port's learning configuration for any VID that is added to the bridge port's VLAN filter list. When a VID is added to the VLAN filter list of a VLAN-aware bridge port, configure the {Port, VID} learning status according to the port's configuration. When the VID is removed, disable learning for the {Port, VID}. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.c | 16 ++++++--- .../net/ethernet/mellanox/mlxsw/spectrum.h | 3 ++ .../mellanox/mlxsw/spectrum_switchdev.c | 35 +++++++++++++++++++ 3 files changed, 50 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 1f8168906811..3820317d9a42 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -555,8 +555,9 @@ int mlxsw_sp_port_vid_to_fid_set(struct mlxsw_sp_port *mlxsw_sp_port, return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(svfa), svfa_pl); } -static int mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port, - u16 vid, bool learn_enable) +int __mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port, + u16 vid_begin, u16 vid_end, + bool learn_enable) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; char *spvmlr_pl; @@ -565,13 +566,20 @@ static int mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port, spvmlr_pl = kmalloc(MLXSW_REG_SPVMLR_LEN, GFP_KERNEL); if (!spvmlr_pl) return -ENOMEM; - mlxsw_reg_spvmlr_pack(spvmlr_pl, mlxsw_sp_port->local_port, vid, vid, - learn_enable); + mlxsw_reg_spvmlr_pack(spvmlr_pl, mlxsw_sp_port->local_port, vid_begin, + vid_end, learn_enable); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spvmlr), spvmlr_pl); kfree(spvmlr_pl); return err; } +static int mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port, + u16 vid, bool learn_enable) +{ + return __mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, vid, + learn_enable); +} + static int mlxsw_sp_port_system_port_mapping_set(struct mlxsw_sp_port *mlxsw_sp_port) { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index ab3feb81bd43..01537d3a1c48 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -558,6 +558,9 @@ int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu, int mlxsw_sp_port_ets_maxrate_set(struct mlxsw_sp_port *mlxsw_sp_port, enum mlxsw_reg_qeec_hr hr, u8 index, u8 next_index, u32 maxrate); +int __mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port, + u16 vid_begin, u16 vid_end, + bool learn_enable); #ifdef CONFIG_MLXSW_SPECTRUM_DCB diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index e5cf8a5da5d0..d1b2b11a3936 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -635,6 +635,27 @@ static int __mlxsw_sp_port_vlans_set(struct mlxsw_sp_port *mlxsw_sp_port, return 0; } +static int mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port, + u16 vid_begin, u16 vid_end, + bool learn_enable) +{ + u16 vid, vid_e; + int err; + + for (vid = vid_begin; vid <= vid_end; + vid += MLXSW_REG_SPVMLR_REC_MAX_COUNT) { + vid_e = min((u16) (vid + MLXSW_REG_SPVMLR_REC_MAX_COUNT - 1), + vid_end); + + err = __mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, + vid_e, learn_enable); + if (err) + return err; + } + + return 0; +} + static int __mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin, u16 vid_end, bool flag_untagged, bool flag_pvid) @@ -675,6 +696,14 @@ static int __mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, } } + err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid_begin, vid_end, + mlxsw_sp_port->learning); + if (err) { + netdev_err(dev, "Failed to set learning for VIDs %d-%d\n", + vid_begin, vid_end); + goto err_port_vid_learning_set; + } + /* Changing activity bits only if HW operation succeded */ for (vid = vid_begin; vid <= vid_end; vid++) { set_bit(vid, mlxsw_sp_port->active_vlans); @@ -697,6 +726,9 @@ static int __mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, err_port_stp_state_set: for (vid = vid_begin; vid <= vid_end; vid++) clear_bit(vid, mlxsw_sp_port->active_vlans); + mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid_begin, vid_end, + false); +err_port_vid_learning_set: if (old_pvid != mlxsw_sp_port->pvid) mlxsw_sp_port_pvid_set(mlxsw_sp_port, old_pvid); err_port_pvid_set: @@ -1006,6 +1038,9 @@ static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port, if (!mlxsw_sp_port->bridged) return -EINVAL; + mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid_begin, vid_end, + false); + pvid = mlxsw_sp_port->pvid; if (pvid >= vid_begin && pvid <= vid_end) mlxsw_sp_port_pvid_set(mlxsw_sp_port, 0); From 89b548f0cfad8322cefe777746e4c12f0ab9ba60 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 24 Aug 2016 12:00:27 +0200 Subject: [PATCH 0471/1715] mlxsw: spectrum: Offload learning to the switch ASIC Up until now we simply stored the learning configuration of a bridge port in the driver and decided whether to learn a new FDB record based on this value. However, this is sub-optimal in cases where learning is disabled on the bridge port, as the device repeatedly generates learning notifications for the same record. Instead, offload the learning configuration to the device, thereby preventing it from generating notifications when learning is disabled. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../mellanox/mlxsw/spectrum_switchdev.c | 47 +++++++++++++++++-- 1 file changed, 44 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index d1b2b11a3936..e0a72323817f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -261,12 +261,40 @@ int mlxsw_sp_vport_flood_set(struct mlxsw_sp_port *mlxsw_sp_vport, u16 fid, false); } +static int mlxsw_sp_port_learning_set(struct mlxsw_sp_port *mlxsw_sp_port, + bool set) +{ + u16 vid; + int err; + + if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) { + vid = mlxsw_sp_vport_vid_get(mlxsw_sp_port); + + return __mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, vid, + set); + } + + for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID) { + err = __mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, vid, + set); + if (err) + goto err_port_vid_learning_set; + } + + return 0; + +err_port_vid_learning_set: + for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID) + __mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, vid, !set); + return err; +} + static int mlxsw_sp_port_attr_br_flags_set(struct mlxsw_sp_port *mlxsw_sp_port, struct switchdev_trans *trans, unsigned long brport_flags) { + unsigned long learning = mlxsw_sp_port->learning ? BR_LEARNING : 0; unsigned long uc_flood = mlxsw_sp_port->uc_flood ? BR_FLOOD : 0; - bool set; int err; if (!mlxsw_sp_port->bridged) @@ -276,17 +304,30 @@ static int mlxsw_sp_port_attr_br_flags_set(struct mlxsw_sp_port *mlxsw_sp_port, return 0; if ((uc_flood ^ brport_flags) & BR_FLOOD) { - set = mlxsw_sp_port->uc_flood ? false : true; - err = mlxsw_sp_port_uc_flood_set(mlxsw_sp_port, set); + err = mlxsw_sp_port_uc_flood_set(mlxsw_sp_port, + !mlxsw_sp_port->uc_flood); if (err) return err; } + if ((learning ^ brport_flags) & BR_LEARNING) { + err = mlxsw_sp_port_learning_set(mlxsw_sp_port, + !mlxsw_sp_port->learning); + if (err) + goto err_port_learning_set; + } + mlxsw_sp_port->uc_flood = brport_flags & BR_FLOOD ? 1 : 0; mlxsw_sp_port->learning = brport_flags & BR_LEARNING ? 1 : 0; mlxsw_sp_port->learning_sync = brport_flags & BR_LEARNING_SYNC ? 1 : 0; return 0; + +err_port_learning_set: + if ((uc_flood ^ brport_flags) & BR_FLOOD) + mlxsw_sp_port_uc_flood_set(mlxsw_sp_port, + mlxsw_sp_port->uc_flood); + return err; } static int mlxsw_sp_ageing_set(struct mlxsw_sp *mlxsw_sp, u32 ageing_time) From 81f77bc00689ea602e8f19ef5b6dc7b1e238187b Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 24 Aug 2016 12:00:28 +0200 Subject: [PATCH 0472/1715] mlxsw: spectrum: Remove unnecessary check in FDB processing We now offload the learning configuration to the device and don't rely on the driver to decide whether to learn the FDB record, so remove the check. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index e0a72323817f..0c3fbbc6b537 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1430,8 +1430,6 @@ static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp, vid = fid; } - adding = adding && mlxsw_sp_port->learning; - do_fdb_op: err = mlxsw_sp_port_fdb_uc_op(mlxsw_sp, local_port, mac, fid, adding, true); @@ -1493,8 +1491,6 @@ static void mlxsw_sp_fdb_notify_mac_lag_process(struct mlxsw_sp *mlxsw_sp, vid = fid; } - adding = adding && mlxsw_sp_port->learning; - do_fdb_op: err = mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp, lag_id, mac, fid, lag_vid, adding, true); From 0f7a4d8a9d1c6ecd0a93f7e5feedf5cdffaafd5e Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 24 Aug 2016 12:00:29 +0200 Subject: [PATCH 0473/1715] mlxsw: spectrum: Don't set learning when creating vPorts Before commit 99724c18fc66 ("mlxsw: spectrum: Introduce support for router interfaces") we used to assign vFIDs to the created vPorts. Since these vPorts were used for slow path traffic we had to disable learning for them, as it doesn't make sense to have it enabled. This is no longer the case and now vPorts are either used for router interfaces (for which learning is disabled by the firmware) or bridge ports (for which learning is explicitly enabled by the driver). Therefore, we can remove the learning configuration upon vPort creation. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 3820317d9a42..2713a64eae75 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -981,10 +981,6 @@ static int mlxsw_sp_port_add_vid(struct net_device *dev, goto err_port_vp_mode_trans; } - err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, false); - if (err) - goto err_port_vid_learning_set; - err = mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, true, untagged); if (err) goto err_port_add_vid; @@ -992,8 +988,6 @@ static int mlxsw_sp_port_add_vid(struct net_device *dev, return 0; err_port_add_vid: - mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, true); -err_port_vid_learning_set: if (list_is_singular(&mlxsw_sp_port->vports_list)) mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port); err_port_vp_mode_trans: @@ -1020,8 +1014,6 @@ static int mlxsw_sp_port_kill_vid(struct net_device *dev, mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, false, false); - mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, true); - /* Drop FID reference. If this was the last reference the * resources will be freed. */ From c7b7b483ccc9d64ae577a04d490aa9a975afe891 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Wed, 24 Aug 2016 13:27:19 +0300 Subject: [PATCH 0474/1715] bnx2x: Don't flush multicast MACs When ndo_set_rx_mode() is called for bnx2x, as part of process of configuring the new MAC address filters [both unicast & multicast] driver begins by flushing the existing configuration and then iterating over the network device's list of addresses and configures those instead. This has the side-effect of creating a short gap where traffic wouldn't be properly classified, as no filters are configured in HW. While for unicasts this is rather insignificant [as unicast MACs don't frequently change while interface is actually running], for multicast traffic it does pose an issue as there are multicast-based networks where new multicast groups would constantly be removed and added. This patch tries to remedy this [at least for the newer adapters] - Instead of flushing & reconfiguring all existing multicast filters, the driver would instead create the approximate hash match that would result from the required filters. It would then compare it against the currently configured approximate hash match, and only add and remove the delta between those. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/bnx2x/bnx2x_main.c | 47 ++++- .../net/ethernet/broadcom/bnx2x/bnx2x_sp.c | 191 +++++++++++++++++- .../net/ethernet/broadcom/bnx2x/bnx2x_sp.h | 12 +- .../net/ethernet/broadcom/bnx2x/bnx2x_sriov.c | 24 +-- .../net/ethernet/broadcom/bnx2x/bnx2x_sriov.h | 1 - 5 files changed, 241 insertions(+), 34 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 97e892511666..de2d32690394 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -12560,8 +12560,10 @@ static int bnx2x_init_mcast_macs_list(struct bnx2x *bp, kcalloc(mc_count, sizeof(*mc_mac), GFP_ATOMIC); struct netdev_hw_addr *ha; - if (!mc_mac) + if (!mc_mac) { + BNX2X_ERR("Failed to allocate mc MAC list\n"); return -ENOMEM; + } INIT_LIST_HEAD(&p->mcast_list); @@ -12632,7 +12634,7 @@ static int bnx2x_set_uc_list(struct bnx2x *bp) BNX2X_UC_LIST_MAC, &ramrod_flags); } -static int bnx2x_set_mc_list(struct bnx2x *bp) +static int bnx2x_set_mc_list_e1x(struct bnx2x *bp) { struct net_device *dev = bp->dev; struct bnx2x_mcast_ramrod_params rparam = {NULL}; @@ -12650,11 +12652,8 @@ static int bnx2x_set_mc_list(struct bnx2x *bp) /* then, configure a new MACs list */ if (netdev_mc_count(dev)) { rc = bnx2x_init_mcast_macs_list(bp, &rparam); - if (rc) { - BNX2X_ERR("Failed to create multicast MACs list: %d\n", - rc); + if (rc) return rc; - } /* Now add the new MACs */ rc = bnx2x_config_mcast(bp, &rparam, @@ -12669,6 +12668,42 @@ static int bnx2x_set_mc_list(struct bnx2x *bp) return rc; } +static int bnx2x_set_mc_list(struct bnx2x *bp) +{ + struct bnx2x_mcast_ramrod_params rparam = {NULL}; + struct net_device *dev = bp->dev; + int rc = 0; + + /* On older adapters, we need to flush and re-add filters */ + if (CHIP_IS_E1x(bp)) + return bnx2x_set_mc_list_e1x(bp); + + rparam.mcast_obj = &bp->mcast_obj; + + if (netdev_mc_count(dev)) { + rc = bnx2x_init_mcast_macs_list(bp, &rparam); + if (rc) + return rc; + + /* Override the curently configured set of mc filters */ + rc = bnx2x_config_mcast(bp, &rparam, + BNX2X_MCAST_CMD_SET); + if (rc < 0) + BNX2X_ERR("Failed to set a new multicast configuration: %d\n", + rc); + + bnx2x_free_mcast_macs_list(&rparam); + } else { + /* If no mc addresses are required, flush the configuration */ + rc = bnx2x_config_mcast(bp, &rparam, BNX2X_MCAST_CMD_DEL); + if (rc) + BNX2X_ERR("Failed to clear multicast configuration %d\n", + rc); + } + + return rc; +} + /* If bp->state is OPEN, should be called with netif_addr_lock_bh() */ static void bnx2x_set_rx_mode(struct net_device *dev) { diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c index ff702a707a91..d468380c2a23 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c @@ -2600,6 +2600,12 @@ struct bnx2x_mcast_mac_elem { u8 pad[2]; /* For a natural alignment of the following buffer */ }; +struct bnx2x_mcast_bin_elem { + struct list_head link; + int bin; + int type; /* BNX2X_MCAST_CMD_SET_{ADD, DEL} */ +}; + struct bnx2x_pending_mcast_cmd { struct list_head link; int type; /* BNX2X_MCAST_CMD_X */ @@ -2609,6 +2615,11 @@ struct bnx2x_pending_mcast_cmd { int next_bin; /* Needed for RESTORE flow with aprox match */ } data; + bool set_convert; /* in case type == BNX2X_MCAST_CMD_SET, this is set + * when macs_head had been converted to a list of + * bnx2x_mcast_bin_elem. + */ + bool done; /* set to true, when the command has been handled, * practically used in 57712 handling only, where one pending * command may be handled in a few operations. As long as for @@ -2636,15 +2647,30 @@ static int bnx2x_mcast_enqueue_cmd(struct bnx2x *bp, struct bnx2x_pending_mcast_cmd *new_cmd; struct bnx2x_mcast_mac_elem *cur_mac = NULL; struct bnx2x_mcast_list_elem *pos; - int macs_list_len = ((cmd == BNX2X_MCAST_CMD_ADD) ? - p->mcast_list_len : 0); + int macs_list_len = 0, macs_list_len_size; + + /* When adding MACs we'll need to store their values */ + if (cmd == BNX2X_MCAST_CMD_ADD || cmd == BNX2X_MCAST_CMD_SET) + macs_list_len = p->mcast_list_len; /* If the command is empty ("handle pending commands only"), break */ if (!p->mcast_list_len) return 0; - total_sz = sizeof(*new_cmd) + - macs_list_len * sizeof(struct bnx2x_mcast_mac_elem); + /* For a set command, we need to allocate sufficient memory for all + * the bins, since we can't analyze at this point how much memory would + * be required. + */ + macs_list_len_size = macs_list_len * + sizeof(struct bnx2x_mcast_mac_elem); + if (cmd == BNX2X_MCAST_CMD_SET) { + int bin_size = BNX2X_MCAST_BINS_NUM * + sizeof(struct bnx2x_mcast_bin_elem); + + if (bin_size > macs_list_len_size) + macs_list_len_size = bin_size; + } + total_sz = sizeof(*new_cmd) + macs_list_len_size; /* Add mcast is called under spin_lock, thus calling with GFP_ATOMIC */ new_cmd = kzalloc(total_sz, GFP_ATOMIC); @@ -2662,6 +2688,7 @@ static int bnx2x_mcast_enqueue_cmd(struct bnx2x *bp, switch (cmd) { case BNX2X_MCAST_CMD_ADD: + case BNX2X_MCAST_CMD_SET: cur_mac = (struct bnx2x_mcast_mac_elem *) ((u8 *)new_cmd + sizeof(*new_cmd)); @@ -2771,7 +2798,8 @@ static void bnx2x_mcast_set_one_rule_e2(struct bnx2x *bp, u8 rx_tx_add_flag = bnx2x_mcast_get_rx_tx_flag(o); int bin; - if ((cmd == BNX2X_MCAST_CMD_ADD) || (cmd == BNX2X_MCAST_CMD_RESTORE)) + if ((cmd == BNX2X_MCAST_CMD_ADD) || (cmd == BNX2X_MCAST_CMD_RESTORE) || + (cmd == BNX2X_MCAST_CMD_SET_ADD)) rx_tx_add_flag |= ETH_MULTICAST_RULES_CMD_IS_ADD; data->rules[idx].cmd_general_data |= rx_tx_add_flag; @@ -2797,6 +2825,16 @@ static void bnx2x_mcast_set_one_rule_e2(struct bnx2x *bp, bin = cfg_data->bin; break; + case BNX2X_MCAST_CMD_SET_ADD: + bin = cfg_data->bin; + BIT_VEC64_SET_BIT(o->registry.aprox_match.vec, bin); + break; + + case BNX2X_MCAST_CMD_SET_DEL: + bin = cfg_data->bin; + BIT_VEC64_CLEAR_BIT(o->registry.aprox_match.vec, bin); + break; + default: BNX2X_ERR("Unknown command: %d\n", cmd); return; @@ -2932,6 +2970,102 @@ static inline void bnx2x_mcast_hdl_pending_restore_e2(struct bnx2x *bp, cmd_pos->data.next_bin++; } +static void +bnx2x_mcast_hdl_pending_set_e2_convert(struct bnx2x *bp, + struct bnx2x_mcast_obj *o, + struct bnx2x_pending_mcast_cmd *cmd_pos) +{ + u64 cur[BNX2X_MCAST_VEC_SZ], req[BNX2X_MCAST_VEC_SZ]; + struct bnx2x_mcast_mac_elem *pmac_pos, *pmac_pos_n; + struct bnx2x_mcast_bin_elem *p_item; + int i, cnt = 0, mac_cnt = 0; + + memset(req, 0, sizeof(u64) * BNX2X_MCAST_VEC_SZ); + memcpy(cur, o->registry.aprox_match.vec, + sizeof(u64) * BNX2X_MCAST_VEC_SZ); + + /* Fill `current' with the required set of bins to configure */ + list_for_each_entry_safe(pmac_pos, pmac_pos_n, &cmd_pos->data.macs_head, + link) { + int bin = bnx2x_mcast_bin_from_mac(pmac_pos->mac); + + DP(BNX2X_MSG_SP, "Set contains %pM mcast MAC\n", + pmac_pos->mac); + + BIT_VEC64_SET_BIT(req, bin); + list_del(&pmac_pos->link); + mac_cnt++; + } + + /* We no longer have use for the MACs; Need to re-use memory for + * a list that will be used to configure bins. + */ + cmd_pos->set_convert = true; + p_item = (struct bnx2x_mcast_bin_elem *)(cmd_pos + 1); + INIT_LIST_HEAD(&cmd_pos->data.macs_head); + + for (i = 0; i < BNX2X_MCAST_BINS_NUM; i++) { + bool b_current = !!BIT_VEC64_TEST_BIT(cur, i); + bool b_required = !!BIT_VEC64_TEST_BIT(req, i); + + if (b_current == b_required) + continue; + + p_item->bin = i; + p_item->type = b_required ? BNX2X_MCAST_CMD_SET_ADD + : BNX2X_MCAST_CMD_SET_DEL; + list_add_tail(&p_item->link , &cmd_pos->data.macs_head); + p_item++; + cnt++; + } + + /* We now definitely know how many commands are hiding here. + * Also need to correct the disruption we've added to guarantee this + * would be enqueued. + */ + o->total_pending_num -= (o->max_cmd_len + mac_cnt); + o->total_pending_num += cnt; + + DP(BNX2X_MSG_SP, "o->total_pending_num=%d\n", o->total_pending_num); +} + +static void +bnx2x_mcast_hdl_pending_set_e2(struct bnx2x *bp, + struct bnx2x_mcast_obj *o, + struct bnx2x_pending_mcast_cmd *cmd_pos, + int *cnt) +{ + union bnx2x_mcast_config_data cfg_data = {NULL}; + struct bnx2x_mcast_bin_elem *p_item, *p_item_n; + + /* This is actually a 2-part scheme - it starts by converting the MACs + * into a list of bins to be added/removed, and correcting the numbers + * on the object. this is now allowed, as we're now sure that all + * previous configured requests have already applied. + * The second part is actually adding rules for the newly introduced + * entries [like all the rest of the hdl_pending functions]. + */ + if (!cmd_pos->set_convert) + bnx2x_mcast_hdl_pending_set_e2_convert(bp, o, cmd_pos); + + list_for_each_entry_safe(p_item, p_item_n, &cmd_pos->data.macs_head, + link) { + cfg_data.bin = (u8)p_item->bin; + o->set_one_rule(bp, o, *cnt, &cfg_data, p_item->type); + (*cnt)++; + + list_del(&p_item->link); + + /* Break if we reached the maximum number of rules. */ + if (*cnt >= o->max_cmd_len) + break; + } + + /* if no more MACs to configure - we are done */ + if (list_empty(&cmd_pos->data.macs_head)) + cmd_pos->done = true; +} + static inline int bnx2x_mcast_handle_pending_cmds_e2(struct bnx2x *bp, struct bnx2x_mcast_ramrod_params *p) { @@ -2955,6 +3089,10 @@ static inline int bnx2x_mcast_handle_pending_cmds_e2(struct bnx2x *bp, &cnt); break; + case BNX2X_MCAST_CMD_SET: + bnx2x_mcast_hdl_pending_set_e2(bp, o, cmd_pos, &cnt); + break; + default: BNX2X_ERR("Unknown command: %d\n", cmd_pos->type); return -EINVAL; @@ -3095,6 +3233,19 @@ static int bnx2x_mcast_validate_e2(struct bnx2x *bp, o->set_registry_size(o, reg_sz + p->mcast_list_len); break; + case BNX2X_MCAST_CMD_SET: + /* We can only learn how many commands would actually be used + * when this is being configured. So for now, simply guarantee + * the command will be enqueued [to refrain from adding logic + * that handles this and THEN learns it needs several ramrods]. + * Just like for ADD/Cont, the mcast_list_len might be an over + * estimation; or even more so, since we don't take into + * account the possibility of removal of existing bins. + */ + o->set_registry_size(o, reg_sz + p->mcast_list_len); + o->total_pending_num += o->max_cmd_len; + break; + default: BNX2X_ERR("Unknown command: %d\n", cmd); return -EINVAL; @@ -3108,12 +3259,16 @@ static int bnx2x_mcast_validate_e2(struct bnx2x *bp, static void bnx2x_mcast_revert_e2(struct bnx2x *bp, struct bnx2x_mcast_ramrod_params *p, - int old_num_bins) + int old_num_bins, + enum bnx2x_mcast_cmd cmd) { struct bnx2x_mcast_obj *o = p->mcast_obj; o->set_registry_size(o, old_num_bins); o->total_pending_num -= p->mcast_list_len; + + if (cmd == BNX2X_MCAST_CMD_SET) + o->total_pending_num -= o->max_cmd_len; } /** @@ -3223,9 +3378,11 @@ static int bnx2x_mcast_setup_e2(struct bnx2x *bp, bnx2x_mcast_refresh_registry_e2(bp, o); /* If CLEAR_ONLY was requested - don't send a ramrod and clear - * RAMROD_PENDING status immediately. + * RAMROD_PENDING status immediately. due to the SET option, it's also + * possible that after evaluating the differences there's no need for + * a ramrod. In that case, we can skip it as well. */ - if (test_bit(RAMROD_DRV_CLR_ONLY, &p->ramrod_flags)) { + if (test_bit(RAMROD_DRV_CLR_ONLY, &p->ramrod_flags) || !cnt) { raw->clear_pending(raw); return 0; } else { @@ -3253,6 +3410,11 @@ static int bnx2x_mcast_validate_e1h(struct bnx2x *bp, struct bnx2x_mcast_ramrod_params *p, enum bnx2x_mcast_cmd cmd) { + if (cmd == BNX2X_MCAST_CMD_SET) { + BNX2X_ERR("Can't use `set' command on e1h!\n"); + return -EINVAL; + } + /* Mark, that there is a work to do */ if ((cmd == BNX2X_MCAST_CMD_DEL) || (cmd == BNX2X_MCAST_CMD_RESTORE)) p->mcast_list_len = 1; @@ -3262,7 +3424,8 @@ static int bnx2x_mcast_validate_e1h(struct bnx2x *bp, static void bnx2x_mcast_revert_e1h(struct bnx2x *bp, struct bnx2x_mcast_ramrod_params *p, - int old_num_bins) + int old_num_bins, + enum bnx2x_mcast_cmd cmd) { /* Do nothing */ } @@ -3372,6 +3535,11 @@ static int bnx2x_mcast_validate_e1(struct bnx2x *bp, struct bnx2x_mcast_obj *o = p->mcast_obj; int reg_sz = o->get_registry_size(o); + if (cmd == BNX2X_MCAST_CMD_SET) { + BNX2X_ERR("Can't use `set' command on e1!\n"); + return -EINVAL; + } + switch (cmd) { /* DEL command deletes all currently configured MACs */ case BNX2X_MCAST_CMD_DEL: @@ -3422,7 +3590,8 @@ static int bnx2x_mcast_validate_e1(struct bnx2x *bp, static void bnx2x_mcast_revert_e1(struct bnx2x *bp, struct bnx2x_mcast_ramrod_params *p, - int old_num_macs) + int old_num_macs, + enum bnx2x_mcast_cmd cmd) { struct bnx2x_mcast_obj *o = p->mcast_obj; @@ -3816,7 +3985,7 @@ error_exit2: r->clear_pending(r); error_exit1: - o->revert(bp, p, old_reg_size); + o->revert(bp, p, old_reg_size, cmd); return rc; } diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h index 4048fc594cce..0bf2fd470819 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h @@ -536,6 +536,15 @@ enum bnx2x_mcast_cmd { BNX2X_MCAST_CMD_CONT, BNX2X_MCAST_CMD_DEL, BNX2X_MCAST_CMD_RESTORE, + + /* Following this, multicast configuration should equal to approx + * the set of MACs provided [i.e., remove all else]. + * The two sub-commands are used internally to decide whether a given + * bin is to be added or removed + */ + BNX2X_MCAST_CMD_SET, + BNX2X_MCAST_CMD_SET_ADD, + BNX2X_MCAST_CMD_SET_DEL, }; struct bnx2x_mcast_obj { @@ -635,7 +644,8 @@ struct bnx2x_mcast_obj { */ void (*revert)(struct bnx2x *bp, struct bnx2x_mcast_ramrod_params *p, - int old_num_bins); + int old_num_bins, + enum bnx2x_mcast_cmd cmd); int (*get_registry_size)(struct bnx2x_mcast_obj *o); void (*set_registry_size)(struct bnx2x_mcast_obj *o, int n); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c index 632daff117d3..6c586b045d1d 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -573,17 +573,6 @@ int bnx2x_vf_mcast(struct bnx2x *bp, struct bnx2x_virtf *vf, } } - /* clear existing mcasts */ - mcast.mcast_list_len = vf->mcast_list_len; - vf->mcast_list_len = mc_num; - rc = bnx2x_config_mcast(bp, &mcast, BNX2X_MCAST_CMD_DEL); - if (rc) { - BNX2X_ERR("Failed to remove multicasts\n"); - kfree(mc); - return rc; - } - - /* update mcast list on the ramrod params */ if (mc_num) { INIT_LIST_HEAD(&mcast.mcast_list); for (i = 0; i < mc_num; i++) { @@ -594,12 +583,18 @@ int bnx2x_vf_mcast(struct bnx2x *bp, struct bnx2x_virtf *vf, /* add new mcasts */ mcast.mcast_list_len = mc_num; - rc = bnx2x_config_mcast(bp, &mcast, BNX2X_MCAST_CMD_ADD); + rc = bnx2x_config_mcast(bp, &mcast, BNX2X_MCAST_CMD_SET); if (rc) - BNX2X_ERR("Faled to add multicasts\n"); - kfree(mc); + BNX2X_ERR("Faled to set multicasts\n"); + } else { + /* clear existing mcasts */ + rc = bnx2x_config_mcast(bp, &mcast, BNX2X_MCAST_CMD_DEL); + if (rc) + BNX2X_ERR("Failed to remove multicasts\n"); } + kfree(mc); + return rc; } @@ -1583,7 +1578,6 @@ int bnx2x_iov_nic_init(struct bnx2x *bp) * It needs to be initialized here so that it can be safely * handled by a subsequent FLR flow. */ - vf->mcast_list_len = 0; bnx2x_init_mcast_obj(bp, &vf->mcast_obj, 0xFF, 0xFF, 0xFF, 0xFF, bnx2x_vf_sp(bp, vf, mcast_rdata), diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h index 670a581ffabc..7a6d406f4c11 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h @@ -195,7 +195,6 @@ struct bnx2x_virtf { int leading_rss; /* MCAST object */ - int mcast_list_len; struct bnx2x_mcast_obj mcast_obj; /* RSS configuration object */ From 9d490b4ee4d7d495a4f4908ea998d2a7355e0807 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Tue, 23 Aug 2016 12:38:56 -0400 Subject: [PATCH 0475/1715] net: dsa: rename switch operations structure Now that the dsa_switch_driver structure contains only function pointers as it is supposed to, rename it to the more appropriate dsa_switch_ops, uniformly to any other operations structure in the kernel. No functional changes here, basically just the result of something like: s/dsa_switch_driver *drv/dsa_switch_ops *ops/g However keep the {un,}register_switch_driver functions and their dsa_switch_drivers list as is, since they represent the -- likely to be deprecated soon -- legacy DSA registration framework. In the meantime, also fix the following checks from checkpatch.pl to make it happy with this patch: CHECK: Comparison to NULL could be written "!ops" #403: FILE: net/dsa/dsa.c:470: + if (ops == NULL) { CHECK: Comparison to NULL could be written "ds->ops->get_strings" #773: FILE: net/dsa/slave.c:697: + if (ds->ops->get_strings != NULL) CHECK: Comparison to NULL could be written "ds->ops->get_ethtool_stats" #824: FILE: net/dsa/slave.c:785: + if (ds->ops->get_ethtool_stats != NULL) CHECK: Comparison to NULL could be written "ds->ops->get_sset_count" #835: FILE: net/dsa/slave.c:798: + if (ds->ops->get_sset_count != NULL) total: 0 errors, 0 warnings, 4 checks, 784 lines checked Signed-off-by: Vivien Didelot Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- Documentation/networking/dsa/dsa.txt | 10 +- drivers/net/dsa/b53/b53_common.c | 4 +- drivers/net/dsa/bcm_sf2.c | 4 +- drivers/net/dsa/mv88e6060.c | 6 +- drivers/net/dsa/mv88e6xxx/chip.c | 8 +- include/net/dsa.h | 10 +- net/dsa/dsa.c | 70 ++++++------- net/dsa/dsa2.c | 16 +-- net/dsa/slave.c | 146 +++++++++++++-------------- 9 files changed, 137 insertions(+), 137 deletions(-) diff --git a/Documentation/networking/dsa/dsa.txt b/Documentation/networking/dsa/dsa.txt index 9d05ed7f7da5..44ed453ccf66 100644 --- a/Documentation/networking/dsa/dsa.txt +++ b/Documentation/networking/dsa/dsa.txt @@ -227,9 +227,9 @@ to address individual switches in the tree. dsa_switch: structure describing a switch device in the tree, referencing a dsa_switch_tree as a backpointer, slave network devices, master network device, -and a reference to the backing dsa_switch_driver +and a reference to the backing dsa_switch_ops -dsa_switch_driver: structure referencing function pointers, see below for a full +dsa_switch_ops: structure referencing function pointers, see below for a full description. Design limitations @@ -357,10 +357,10 @@ regular HWMON devices in /sys/class/hwmon/. Driver development ================== -DSA switch drivers need to implement a dsa_switch_driver structure which will +DSA switch drivers need to implement a dsa_switch_ops structure which will contain the various members described below. -register_switch_driver() registers this dsa_switch_driver in its internal list +register_switch_driver() registers this dsa_switch_ops in its internal list of drivers to probe for. unregister_switch_driver() does the exact opposite. Unless requested differently by setting the priv_size member accordingly, DSA @@ -379,7 +379,7 @@ Switch configuration buses, return a non-NULL string - setup: setup function for the switch, this function is responsible for setting - up the dsa_switch_driver private structure with all it needs: register maps, + up the dsa_switch_ops private structure with all it needs: register maps, interrupts, mutexes, locks etc.. This function is also expected to properly configure the switch to separate all network interfaces from each other, that is, they should be isolated by the switch hardware itself, typically by creating diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 65ecb51f99e5..6fb77cca78bb 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1378,7 +1378,7 @@ static enum dsa_tag_protocol b53_get_tag_protocol(struct dsa_switch *ds) return DSA_TAG_PROTO_NONE; } -static struct dsa_switch_driver b53_switch_ops = { +static struct dsa_switch_ops b53_switch_ops = { .get_tag_protocol = b53_get_tag_protocol, .setup = b53_setup, .set_addr = b53_set_addr, @@ -1618,7 +1618,7 @@ static int b53_switch_init(struct b53_device *dev) dev->vta_regs[1] = chip->vta_regs[1]; dev->vta_regs[2] = chip->vta_regs[2]; dev->jumbo_pm_reg = chip->jumbo_pm_reg; - ds->drv = &b53_switch_ops; + ds->ops = &b53_switch_ops; dev->cpu_port = chip->cpu_port; dev->num_vlans = chip->vlans; dev->num_arl_entries = chip->arl_entries; diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index b47a74b37a42..220e5d1948ca 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -1581,7 +1581,7 @@ static int bcm_sf2_sw_setup(struct dsa_switch *ds) return 0; } -static struct dsa_switch_driver bcm_sf2_switch_driver = { +static struct dsa_switch_ops bcm_sf2_switch_ops = { .setup = bcm_sf2_sw_setup, .get_tag_protocol = bcm_sf2_sw_get_tag_protocol, .set_addr = bcm_sf2_sw_set_addr, @@ -1632,7 +1632,7 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev) priv = (struct bcm_sf2_priv *)(ds + 1); ds->priv = priv; ds->dev = &pdev->dev; - ds->drv = &bcm_sf2_switch_driver; + ds->ops = &bcm_sf2_switch_ops; dev_set_drvdata(&pdev->dev, ds); diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c index 1fdfbf3a50bc..7ff9d373a9ee 100644 --- a/drivers/net/dsa/mv88e6060.c +++ b/drivers/net/dsa/mv88e6060.c @@ -252,7 +252,7 @@ mv88e6060_phy_write(struct dsa_switch *ds, int port, int regnum, u16 val) return reg_write(ds, addr, regnum, val); } -static struct dsa_switch_driver mv88e6060_switch_driver = { +static struct dsa_switch_ops mv88e6060_switch_ops = { .get_tag_protocol = mv88e6060_get_tag_protocol, .probe = mv88e6060_drv_probe, .setup = mv88e6060_setup, @@ -263,14 +263,14 @@ static struct dsa_switch_driver mv88e6060_switch_driver = { static int __init mv88e6060_init(void) { - register_switch_driver(&mv88e6060_switch_driver); + register_switch_driver(&mv88e6060_switch_ops); return 0; } module_init(mv88e6060_init); static void __exit mv88e6060_cleanup(void) { - unregister_switch_driver(&mv88e6060_switch_driver); + unregister_switch_driver(&mv88e6060_switch_ops); } module_exit(mv88e6060_cleanup); diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 82d45165803c..750d01d775e0 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3976,7 +3976,7 @@ free: return NULL; } -static struct dsa_switch_driver mv88e6xxx_switch_driver = { +static struct dsa_switch_ops mv88e6xxx_switch_ops = { .probe = mv88e6xxx_drv_probe, .get_tag_protocol = mv88e6xxx_get_tag_protocol, .setup = mv88e6xxx_setup, @@ -4025,7 +4025,7 @@ static int mv88e6xxx_register_switch(struct mv88e6xxx_chip *chip, ds->dev = dev; ds->priv = chip; - ds->drv = &mv88e6xxx_switch_driver; + ds->ops = &mv88e6xxx_switch_ops; dev_set_drvdata(dev, ds); @@ -4118,7 +4118,7 @@ static struct mdio_driver mv88e6xxx_driver = { static int __init mv88e6xxx_init(void) { - register_switch_driver(&mv88e6xxx_switch_driver); + register_switch_driver(&mv88e6xxx_switch_ops); return mdio_driver_register(&mv88e6xxx_driver); } module_init(mv88e6xxx_init); @@ -4126,7 +4126,7 @@ module_init(mv88e6xxx_init); static void __exit mv88e6xxx_cleanup(void) { mdio_driver_unregister(&mv88e6xxx_driver); - unregister_switch_driver(&mv88e6xxx_switch_driver); + unregister_switch_driver(&mv88e6xxx_switch_ops); } module_exit(mv88e6xxx_cleanup); diff --git a/include/net/dsa.h b/include/net/dsa.h index 8ca2684c5358..2ebeba44a461 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -165,9 +165,9 @@ struct dsa_switch { struct dsa_chip_data *cd; /* - * The used switch driver. + * The switch operations. */ - struct dsa_switch_driver *drv; + struct dsa_switch_ops *ops; /* * An array of which element [a] indicates which port on this @@ -236,7 +236,7 @@ struct switchdev_obj; struct switchdev_obj_port_fdb; struct switchdev_obj_port_vlan; -struct dsa_switch_driver { +struct dsa_switch_ops { struct list_head list; /* @@ -371,8 +371,8 @@ struct dsa_switch_driver { int (*cb)(struct switchdev_obj *obj)); }; -void register_switch_driver(struct dsa_switch_driver *type); -void unregister_switch_driver(struct dsa_switch_driver *type); +void register_switch_driver(struct dsa_switch_ops *type); +void unregister_switch_driver(struct dsa_switch_ops *type); struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev); static inline void *ds_to_priv(struct dsa_switch *ds) diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 8d3a28d4e99d..d8d267e9a872 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -61,27 +61,27 @@ const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = { static DEFINE_MUTEX(dsa_switch_drivers_mutex); static LIST_HEAD(dsa_switch_drivers); -void register_switch_driver(struct dsa_switch_driver *drv) +void register_switch_driver(struct dsa_switch_ops *ops) { mutex_lock(&dsa_switch_drivers_mutex); - list_add_tail(&drv->list, &dsa_switch_drivers); + list_add_tail(&ops->list, &dsa_switch_drivers); mutex_unlock(&dsa_switch_drivers_mutex); } EXPORT_SYMBOL_GPL(register_switch_driver); -void unregister_switch_driver(struct dsa_switch_driver *drv) +void unregister_switch_driver(struct dsa_switch_ops *ops) { mutex_lock(&dsa_switch_drivers_mutex); - list_del_init(&drv->list); + list_del_init(&ops->list); mutex_unlock(&dsa_switch_drivers_mutex); } EXPORT_SYMBOL_GPL(unregister_switch_driver); -static struct dsa_switch_driver * +static struct dsa_switch_ops * dsa_switch_probe(struct device *parent, struct device *host_dev, int sw_addr, const char **_name, void **priv) { - struct dsa_switch_driver *ret; + struct dsa_switch_ops *ret; struct list_head *list; const char *name; @@ -90,13 +90,13 @@ dsa_switch_probe(struct device *parent, struct device *host_dev, int sw_addr, mutex_lock(&dsa_switch_drivers_mutex); list_for_each(list, &dsa_switch_drivers) { - struct dsa_switch_driver *drv; + struct dsa_switch_ops *ops; - drv = list_entry(list, struct dsa_switch_driver, list); + ops = list_entry(list, struct dsa_switch_ops, list); - name = drv->probe(parent, host_dev, sw_addr, priv); + name = ops->probe(parent, host_dev, sw_addr, priv); if (name != NULL) { - ret = drv; + ret = ops; break; } } @@ -117,7 +117,7 @@ static ssize_t temp1_input_show(struct device *dev, struct dsa_switch *ds = dev_get_drvdata(dev); int temp, ret; - ret = ds->drv->get_temp(ds, &temp); + ret = ds->ops->get_temp(ds, &temp); if (ret < 0) return ret; @@ -131,7 +131,7 @@ static ssize_t temp1_max_show(struct device *dev, struct dsa_switch *ds = dev_get_drvdata(dev); int temp, ret; - ret = ds->drv->get_temp_limit(ds, &temp); + ret = ds->ops->get_temp_limit(ds, &temp); if (ret < 0) return ret; @@ -149,7 +149,7 @@ static ssize_t temp1_max_store(struct device *dev, if (ret < 0) return ret; - ret = ds->drv->set_temp_limit(ds, DIV_ROUND_CLOSEST(temp, 1000)); + ret = ds->ops->set_temp_limit(ds, DIV_ROUND_CLOSEST(temp, 1000)); if (ret < 0) return ret; @@ -164,7 +164,7 @@ static ssize_t temp1_max_alarm_show(struct device *dev, bool alarm; int ret; - ret = ds->drv->get_temp_alarm(ds, &alarm); + ret = ds->ops->get_temp_alarm(ds, &alarm); if (ret < 0) return ret; @@ -184,15 +184,15 @@ static umode_t dsa_hwmon_attrs_visible(struct kobject *kobj, { struct device *dev = container_of(kobj, struct device, kobj); struct dsa_switch *ds = dev_get_drvdata(dev); - struct dsa_switch_driver *drv = ds->drv; + struct dsa_switch_ops *ops = ds->ops; umode_t mode = attr->mode; if (index == 1) { - if (!drv->get_temp_limit) + if (!ops->get_temp_limit) mode = 0; - else if (!drv->set_temp_limit) + else if (!ops->set_temp_limit) mode &= ~S_IWUSR; - } else if (index == 2 && !drv->get_temp_alarm) { + } else if (index == 2 && !ops->get_temp_alarm) { mode = 0; } return mode; @@ -228,8 +228,8 @@ int dsa_cpu_dsa_setup(struct dsa_switch *ds, struct device *dev, genphy_config_init(phydev); genphy_read_status(phydev); - if (ds->drv->adjust_link) - ds->drv->adjust_link(ds, port, phydev); + if (ds->ops->adjust_link) + ds->ops->adjust_link(ds, port, phydev); } return 0; @@ -303,7 +303,7 @@ void dsa_cpu_port_ethtool_restore(struct dsa_switch *ds) static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) { - struct dsa_switch_driver *drv = ds->drv; + struct dsa_switch_ops *ops = ds->ops; struct dsa_switch_tree *dst = ds->dst; struct dsa_chip_data *cd = ds->cd; bool valid_name_found = false; @@ -356,7 +356,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) if (dst->cpu_switch == index) { enum dsa_tag_protocol tag_protocol; - tag_protocol = drv->get_tag_protocol(ds); + tag_protocol = ops->get_tag_protocol(ds); dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol); if (IS_ERR(dst->tag_ops)) { ret = PTR_ERR(dst->tag_ops); @@ -371,15 +371,15 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) /* * Do basic register setup. */ - ret = drv->setup(ds); + ret = ops->setup(ds); if (ret < 0) goto out; - ret = drv->set_addr(ds, dst->master_netdev->dev_addr); + ret = ops->set_addr(ds, dst->master_netdev->dev_addr); if (ret < 0) goto out; - if (!ds->slave_mii_bus && drv->phy_read) { + if (!ds->slave_mii_bus && ops->phy_read) { ds->slave_mii_bus = devm_mdiobus_alloc(parent); if (!ds->slave_mii_bus) { ret = -ENOMEM; @@ -426,7 +426,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) * register with hardware monitoring subsystem. * Treat registration error as non-fatal and ignore it. */ - if (drv->get_temp) { + if (ops->get_temp) { const char *netname = netdev_name(dst->master_netdev); char hname[IFNAMSIZ + 1]; int i, j; @@ -457,7 +457,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, struct device *parent, struct device *host_dev) { struct dsa_chip_data *cd = dst->pd->chip + index; - struct dsa_switch_driver *drv; + struct dsa_switch_ops *ops; struct dsa_switch *ds; int ret; const char *name; @@ -466,8 +466,8 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, /* * Probe for switch model. */ - drv = dsa_switch_probe(parent, host_dev, cd->sw_addr, &name, &priv); - if (drv == NULL) { + ops = dsa_switch_probe(parent, host_dev, cd->sw_addr, &name, &priv); + if (!ops) { netdev_err(dst->master_netdev, "[%d]: could not detect attached switch\n", index); return ERR_PTR(-EINVAL); @@ -486,7 +486,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, ds->dst = dst; ds->index = index; ds->cd = cd; - ds->drv = drv; + ds->ops = ops; ds->priv = priv; ds->dev = parent; @@ -541,7 +541,7 @@ static void dsa_switch_destroy(struct dsa_switch *ds) ds->dsa_port_mask |= ~(1 << port); } - if (ds->slave_mii_bus && ds->drv->phy_read) + if (ds->slave_mii_bus && ds->ops->phy_read) mdiobus_unregister(ds->slave_mii_bus); } @@ -560,8 +560,8 @@ int dsa_switch_suspend(struct dsa_switch *ds) return ret; } - if (ds->drv->suspend) - ret = ds->drv->suspend(ds); + if (ds->ops->suspend) + ret = ds->ops->suspend(ds); return ret; } @@ -571,8 +571,8 @@ int dsa_switch_resume(struct dsa_switch *ds) { int i, ret = 0; - if (ds->drv->resume) - ret = ds->drv->resume(ds); + if (ds->ops->resume) + ret = ds->ops->resume(ds); if (ret) return ret; diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 2e343221464c..8278385dcd21 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -294,25 +294,25 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds) int err; /* Initialize ds->phys_mii_mask before registering the slave MDIO bus - * driver and before drv->setup() has run, since the switch drivers and + * driver and before ops->setup() has run, since the switch drivers and * the slave MDIO bus driver rely on these values for probing PHY * devices or not */ ds->phys_mii_mask = ds->enabled_port_mask; - err = ds->drv->setup(ds); + err = ds->ops->setup(ds); if (err < 0) return err; - err = ds->drv->set_addr(ds, dst->master_netdev->dev_addr); + err = ds->ops->set_addr(ds, dst->master_netdev->dev_addr); if (err < 0) return err; - err = ds->drv->set_addr(ds, dst->master_netdev->dev_addr); + err = ds->ops->set_addr(ds, dst->master_netdev->dev_addr); if (err < 0) return err; - if (!ds->slave_mii_bus && ds->drv->phy_read) { + if (!ds->slave_mii_bus && ds->ops->phy_read) { ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev); if (!ds->slave_mii_bus) return -ENOMEM; @@ -374,7 +374,7 @@ static void dsa_ds_unapply(struct dsa_switch_tree *dst, struct dsa_switch *ds) dsa_user_port_unapply(port, index, ds); } - if (ds->slave_mii_bus && ds->drv->phy_read) + if (ds->slave_mii_bus && ds->ops->phy_read) mdiobus_unregister(ds->slave_mii_bus); } @@ -466,7 +466,7 @@ static int dsa_cpu_parse(struct device_node *port, u32 index, dst->cpu_port = index; } - tag_protocol = ds->drv->get_tag_protocol(ds); + tag_protocol = ds->ops->get_tag_protocol(ds); dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol); if (IS_ERR(dst->tag_ops)) { dev_warn(ds->dev, "No tagger for this switch\n"); @@ -543,7 +543,7 @@ static int dsa_parse_ports_dn(struct device_node *ports, struct dsa_switch *ds) ds->ports[reg].dn = port; - /* Initialize enabled_port_mask now for drv->setup() + /* Initialize enabled_port_mask now for ops->setup() * to have access to a correct value, just like what * net/dsa/dsa.c::dsa_switch_setup_one does. */ diff --git a/net/dsa/slave.c b/net/dsa/slave.c index fc9196745225..9f6c2a20f6ff 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -28,7 +28,7 @@ static int dsa_slave_phy_read(struct mii_bus *bus, int addr, int reg) struct dsa_switch *ds = bus->priv; if (ds->phys_mii_mask & (1 << addr)) - return ds->drv->phy_read(ds, addr, reg); + return ds->ops->phy_read(ds, addr, reg); return 0xffff; } @@ -38,7 +38,7 @@ static int dsa_slave_phy_write(struct mii_bus *bus, int addr, int reg, u16 val) struct dsa_switch *ds = bus->priv; if (ds->phys_mii_mask & (1 << addr)) - return ds->drv->phy_write(ds, addr, reg, val); + return ds->ops->phy_write(ds, addr, reg, val); return 0; } @@ -98,14 +98,14 @@ static int dsa_slave_open(struct net_device *dev) goto clear_allmulti; } - if (ds->drv->port_enable) { - err = ds->drv->port_enable(ds, p->port, p->phy); + if (ds->ops->port_enable) { + err = ds->ops->port_enable(ds, p->port, p->phy); if (err) goto clear_promisc; } - if (ds->drv->port_stp_state_set) - ds->drv->port_stp_state_set(ds, p->port, stp_state); + if (ds->ops->port_stp_state_set) + ds->ops->port_stp_state_set(ds, p->port, stp_state); if (p->phy) phy_start(p->phy); @@ -144,11 +144,11 @@ static int dsa_slave_close(struct net_device *dev) if (!ether_addr_equal(dev->dev_addr, master->dev_addr)) dev_uc_del(master, dev->dev_addr); - if (ds->drv->port_disable) - ds->drv->port_disable(ds, p->port, p->phy); + if (ds->ops->port_disable) + ds->ops->port_disable(ds, p->port, p->phy); - if (ds->drv->port_stp_state_set) - ds->drv->port_stp_state_set(ds, p->port, BR_STATE_DISABLED); + if (ds->ops->port_stp_state_set) + ds->ops->port_stp_state_set(ds, p->port, BR_STATE_DISABLED); return 0; } @@ -209,13 +209,13 @@ static int dsa_slave_port_vlan_add(struct net_device *dev, struct dsa_switch *ds = p->parent; if (switchdev_trans_ph_prepare(trans)) { - if (!ds->drv->port_vlan_prepare || !ds->drv->port_vlan_add) + if (!ds->ops->port_vlan_prepare || !ds->ops->port_vlan_add) return -EOPNOTSUPP; - return ds->drv->port_vlan_prepare(ds, p->port, vlan, trans); + return ds->ops->port_vlan_prepare(ds, p->port, vlan, trans); } - ds->drv->port_vlan_add(ds, p->port, vlan, trans); + ds->ops->port_vlan_add(ds, p->port, vlan, trans); return 0; } @@ -226,10 +226,10 @@ static int dsa_slave_port_vlan_del(struct net_device *dev, struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - if (!ds->drv->port_vlan_del) + if (!ds->ops->port_vlan_del) return -EOPNOTSUPP; - return ds->drv->port_vlan_del(ds, p->port, vlan); + return ds->ops->port_vlan_del(ds, p->port, vlan); } static int dsa_slave_port_vlan_dump(struct net_device *dev, @@ -239,8 +239,8 @@ static int dsa_slave_port_vlan_dump(struct net_device *dev, struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - if (ds->drv->port_vlan_dump) - return ds->drv->port_vlan_dump(ds, p->port, vlan, cb); + if (ds->ops->port_vlan_dump) + return ds->ops->port_vlan_dump(ds, p->port, vlan, cb); return -EOPNOTSUPP; } @@ -253,13 +253,13 @@ static int dsa_slave_port_fdb_add(struct net_device *dev, struct dsa_switch *ds = p->parent; if (switchdev_trans_ph_prepare(trans)) { - if (!ds->drv->port_fdb_prepare || !ds->drv->port_fdb_add) + if (!ds->ops->port_fdb_prepare || !ds->ops->port_fdb_add) return -EOPNOTSUPP; - return ds->drv->port_fdb_prepare(ds, p->port, fdb, trans); + return ds->ops->port_fdb_prepare(ds, p->port, fdb, trans); } - ds->drv->port_fdb_add(ds, p->port, fdb, trans); + ds->ops->port_fdb_add(ds, p->port, fdb, trans); return 0; } @@ -271,8 +271,8 @@ static int dsa_slave_port_fdb_del(struct net_device *dev, struct dsa_switch *ds = p->parent; int ret = -EOPNOTSUPP; - if (ds->drv->port_fdb_del) - ret = ds->drv->port_fdb_del(ds, p->port, fdb); + if (ds->ops->port_fdb_del) + ret = ds->ops->port_fdb_del(ds, p->port, fdb); return ret; } @@ -284,8 +284,8 @@ static int dsa_slave_port_fdb_dump(struct net_device *dev, struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - if (ds->drv->port_fdb_dump) - return ds->drv->port_fdb_dump(ds, p->port, fdb, cb); + if (ds->ops->port_fdb_dump) + return ds->ops->port_fdb_dump(ds, p->port, fdb, cb); return -EOPNOTSUPP; } @@ -308,9 +308,9 @@ static int dsa_slave_stp_state_set(struct net_device *dev, struct dsa_switch *ds = p->parent; if (switchdev_trans_ph_prepare(trans)) - return ds->drv->port_stp_state_set ? 0 : -EOPNOTSUPP; + return ds->ops->port_stp_state_set ? 0 : -EOPNOTSUPP; - ds->drv->port_stp_state_set(ds, p->port, attr->u.stp_state); + ds->ops->port_stp_state_set(ds, p->port, attr->u.stp_state); return 0; } @@ -326,8 +326,8 @@ static int dsa_slave_vlan_filtering(struct net_device *dev, if (switchdev_trans_ph_prepare(trans)) return 0; - if (ds->drv->port_vlan_filtering) - return ds->drv->port_vlan_filtering(ds, p->port, + if (ds->ops->port_vlan_filtering) + return ds->ops->port_vlan_filtering(ds, p->port, attr->u.vlan_filtering); return 0; @@ -365,8 +365,8 @@ static int dsa_slave_ageing_time(struct net_device *dev, ds->ports[p->port].ageing_time = ageing_time; ageing_time = dsa_fastest_ageing_time(ds, ageing_time); - if (ds->drv->set_ageing_time) - return ds->drv->set_ageing_time(ds, ageing_time); + if (ds->ops->set_ageing_time) + return ds->ops->set_ageing_time(ds, ageing_time); return 0; } @@ -481,8 +481,8 @@ static int dsa_slave_bridge_port_join(struct net_device *dev, p->bridge_dev = br; - if (ds->drv->port_bridge_join) - ret = ds->drv->port_bridge_join(ds, p->port, br); + if (ds->ops->port_bridge_join) + ret = ds->ops->port_bridge_join(ds, p->port, br); return ret == -EOPNOTSUPP ? 0 : ret; } @@ -493,16 +493,16 @@ static void dsa_slave_bridge_port_leave(struct net_device *dev) struct dsa_switch *ds = p->parent; - if (ds->drv->port_bridge_leave) - ds->drv->port_bridge_leave(ds, p->port); + if (ds->ops->port_bridge_leave) + ds->ops->port_bridge_leave(ds, p->port); p->bridge_dev = NULL; /* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer, * so allow it to be in BR_STATE_FORWARDING to be kept functional */ - if (ds->drv->port_stp_state_set) - ds->drv->port_stp_state_set(ds, p->port, BR_STATE_FORWARDING); + if (ds->ops->port_stp_state_set) + ds->ops->port_stp_state_set(ds, p->port, BR_STATE_FORWARDING); } static int dsa_slave_port_attr_get(struct net_device *dev, @@ -605,8 +605,8 @@ static int dsa_slave_get_regs_len(struct net_device *dev) struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - if (ds->drv->get_regs_len) - return ds->drv->get_regs_len(ds, p->port); + if (ds->ops->get_regs_len) + return ds->ops->get_regs_len(ds, p->port); return -EOPNOTSUPP; } @@ -617,8 +617,8 @@ dsa_slave_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *_p) struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - if (ds->drv->get_regs) - ds->drv->get_regs(ds, p->port, regs, _p); + if (ds->ops->get_regs) + ds->ops->get_regs(ds, p->port, regs, _p); } static int dsa_slave_nway_reset(struct net_device *dev) @@ -651,8 +651,8 @@ static int dsa_slave_get_eeprom_len(struct net_device *dev) if (ds->cd && ds->cd->eeprom_len) return ds->cd->eeprom_len; - if (ds->drv->get_eeprom_len) - return ds->drv->get_eeprom_len(ds); + if (ds->ops->get_eeprom_len) + return ds->ops->get_eeprom_len(ds); return 0; } @@ -663,8 +663,8 @@ static int dsa_slave_get_eeprom(struct net_device *dev, struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - if (ds->drv->get_eeprom) - return ds->drv->get_eeprom(ds, eeprom, data); + if (ds->ops->get_eeprom) + return ds->ops->get_eeprom(ds, eeprom, data); return -EOPNOTSUPP; } @@ -675,8 +675,8 @@ static int dsa_slave_set_eeprom(struct net_device *dev, struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - if (ds->drv->set_eeprom) - return ds->drv->set_eeprom(ds, eeprom, data); + if (ds->ops->set_eeprom) + return ds->ops->set_eeprom(ds, eeprom, data); return -EOPNOTSUPP; } @@ -694,8 +694,8 @@ static void dsa_slave_get_strings(struct net_device *dev, strncpy(data + len, "tx_bytes", len); strncpy(data + 2 * len, "rx_packets", len); strncpy(data + 3 * len, "rx_bytes", len); - if (ds->drv->get_strings != NULL) - ds->drv->get_strings(ds, p->port, data + 4 * len); + if (ds->ops->get_strings) + ds->ops->get_strings(ds, p->port, data + 4 * len); } } @@ -714,8 +714,8 @@ static void dsa_cpu_port_get_ethtool_stats(struct net_device *dev, dst->master_ethtool_ops.get_ethtool_stats(dev, stats, data); } - if (ds->drv->get_ethtool_stats) - ds->drv->get_ethtool_stats(ds, cpu_port, data + count); + if (ds->ops->get_ethtool_stats) + ds->ops->get_ethtool_stats(ds, cpu_port, data + count); } static int dsa_cpu_port_get_sset_count(struct net_device *dev, int sset) @@ -727,8 +727,8 @@ static int dsa_cpu_port_get_sset_count(struct net_device *dev, int sset) if (dst->master_ethtool_ops.get_sset_count) count += dst->master_ethtool_ops.get_sset_count(dev, sset); - if (sset == ETH_SS_STATS && ds->drv->get_sset_count) - count += ds->drv->get_sset_count(ds); + if (sset == ETH_SS_STATS && ds->ops->get_sset_count) + count += ds->ops->get_sset_count(ds); return count; } @@ -755,14 +755,14 @@ static void dsa_cpu_port_get_strings(struct net_device *dev, dst->master_ethtool_ops.get_strings(dev, stringset, data); } - if (stringset == ETH_SS_STATS && ds->drv->get_strings) { + if (stringset == ETH_SS_STATS && ds->ops->get_strings) { ndata = data + mcount * len; /* This function copies ETH_GSTRINGS_LEN bytes, we will mangle * the output after to prepend our CPU port prefix we * constructed earlier */ - ds->drv->get_strings(ds, cpu_port, ndata); - count = ds->drv->get_sset_count(ds); + ds->ops->get_strings(ds, cpu_port, ndata); + count = ds->ops->get_sset_count(ds); for (i = 0; i < count; i++) { memmove(ndata + (i * len + sizeof(pfx)), ndata + i * len, len - sizeof(pfx)); @@ -782,8 +782,8 @@ static void dsa_slave_get_ethtool_stats(struct net_device *dev, data[1] = dev->stats.tx_bytes; data[2] = dev->stats.rx_packets; data[3] = dev->stats.rx_bytes; - if (ds->drv->get_ethtool_stats != NULL) - ds->drv->get_ethtool_stats(ds, p->port, data + 4); + if (ds->ops->get_ethtool_stats) + ds->ops->get_ethtool_stats(ds, p->port, data + 4); } static int dsa_slave_get_sset_count(struct net_device *dev, int sset) @@ -795,8 +795,8 @@ static int dsa_slave_get_sset_count(struct net_device *dev, int sset) int count; count = 4; - if (ds->drv->get_sset_count != NULL) - count += ds->drv->get_sset_count(ds); + if (ds->ops->get_sset_count) + count += ds->ops->get_sset_count(ds); return count; } @@ -809,8 +809,8 @@ static void dsa_slave_get_wol(struct net_device *dev, struct ethtool_wolinfo *w) struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - if (ds->drv->get_wol) - ds->drv->get_wol(ds, p->port, w); + if (ds->ops->get_wol) + ds->ops->get_wol(ds, p->port, w); } static int dsa_slave_set_wol(struct net_device *dev, struct ethtool_wolinfo *w) @@ -819,8 +819,8 @@ static int dsa_slave_set_wol(struct net_device *dev, struct ethtool_wolinfo *w) struct dsa_switch *ds = p->parent; int ret = -EOPNOTSUPP; - if (ds->drv->set_wol) - ret = ds->drv->set_wol(ds, p->port, w); + if (ds->ops->set_wol) + ret = ds->ops->set_wol(ds, p->port, w); return ret; } @@ -831,10 +831,10 @@ static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e) struct dsa_switch *ds = p->parent; int ret; - if (!ds->drv->set_eee) + if (!ds->ops->set_eee) return -EOPNOTSUPP; - ret = ds->drv->set_eee(ds, p->port, p->phy, e); + ret = ds->ops->set_eee(ds, p->port, p->phy, e); if (ret) return ret; @@ -850,10 +850,10 @@ static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e) struct dsa_switch *ds = p->parent; int ret; - if (!ds->drv->get_eee) + if (!ds->ops->get_eee) return -EOPNOTSUPP; - ret = ds->drv->get_eee(ds, p->port, e); + ret = ds->ops->get_eee(ds, p->port, e); if (ret) return ret; @@ -988,8 +988,8 @@ static void dsa_slave_adjust_link(struct net_device *dev) p->old_pause = p->phy->pause; } - if (ds->drv->adjust_link && status_changed) - ds->drv->adjust_link(ds, p->port, p->phy); + if (ds->ops->adjust_link && status_changed) + ds->ops->adjust_link(ds, p->port, p->phy); if (status_changed) phy_print_status(p->phy); @@ -1004,8 +1004,8 @@ static int dsa_slave_fixed_link_update(struct net_device *dev, if (dev) { p = netdev_priv(dev); ds = p->parent; - if (ds->drv->fixed_link_update) - ds->drv->fixed_link_update(ds, p->port, status); + if (ds->ops->fixed_link_update) + ds->ops->fixed_link_update(ds, p->port, status); } return 0; @@ -1062,8 +1062,8 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p, phy_dn = port_dn; } - if (ds->drv->get_phy_flags) - phy_flags = ds->drv->get_phy_flags(ds, p->port); + if (ds->ops->get_phy_flags) + phy_flags = ds->ops->get_phy_flags(ds, p->port); if (phy_dn) { int phy_id = of_mdio_parse_addr(&slave_dev->dev, phy_dn); From 627cc4add53c0470bfd118002669205d222d3a54 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Wed, 24 Aug 2016 15:46:25 +0900 Subject: [PATCH 0476/1715] net: diag: slightly refactor the inet_diag_bc_audit error checks. This simplifies the code a bit and also allows inet_diag_bc_audit to send to userspace an error that isn't EINVAL. Signed-off-by: Lorenzo Colitti Acked-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/inet_diag.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 38c2c47fe0e8..a89b68c6a934 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -706,10 +706,16 @@ static bool valid_port_comparison(const struct inet_diag_bc_op *op, return true; } -static int inet_diag_bc_audit(const void *bytecode, int bytecode_len) +static int inet_diag_bc_audit(const struct nlattr *attr) { - const void *bc = bytecode; - int len = bytecode_len; + const void *bytecode, *bc; + int bytecode_len, len; + + if (!attr || nla_len(attr) < sizeof(struct inet_diag_bc_op)) + return -EINVAL; + + bytecode = bc = nla_data(attr); + len = bytecode_len = nla_len(attr); while (len > 0) { int min_len = sizeof(struct inet_diag_bc_op); @@ -1020,13 +1026,13 @@ static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) if (nlh->nlmsg_flags & NLM_F_DUMP) { if (nlmsg_attrlen(nlh, hdrlen)) { struct nlattr *attr; + int err; attr = nlmsg_find_attr(nlh, hdrlen, INET_DIAG_REQ_BYTECODE); - if (!attr || - nla_len(attr) < sizeof(struct inet_diag_bc_op) || - inet_diag_bc_audit(nla_data(attr), nla_len(attr))) - return -EINVAL; + err = inet_diag_bc_audit(attr); + if (err) + return err; } { struct netlink_dump_control c = { @@ -1051,13 +1057,13 @@ static int inet_diag_handler_cmd(struct sk_buff *skb, struct nlmsghdr *h) h->nlmsg_flags & NLM_F_DUMP) { if (nlmsg_attrlen(h, hdrlen)) { struct nlattr *attr; + int err; attr = nlmsg_find_attr(h, hdrlen, INET_DIAG_REQ_BYTECODE); - if (!attr || - nla_len(attr) < sizeof(struct inet_diag_bc_op) || - inet_diag_bc_audit(nla_data(attr), nla_len(attr))) - return -EINVAL; + err = inet_diag_bc_audit(attr); + if (err) + return err; } { struct netlink_dump_control c = { From a52e95abf772b43c9226e9a72d3c1353903ba96f Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Wed, 24 Aug 2016 15:46:26 +0900 Subject: [PATCH 0477/1715] net: diag: allow socket bytecode filters to match socket marks This allows a privileged process to filter by socket mark when dumping sockets via INET_DIAG_BY_FAMILY. This is useful on systems that use mark-based routing such as Android. The ability to filter socket marks requires CAP_NET_ADMIN, which is consistent with other privileged operations allowed by the SOCK_DIAG interface such as the ability to destroy sockets and the ability to inspect BPF filters attached to packet sockets. Tested: https://android-review.googlesource.com/261350 Signed-off-by: Lorenzo Colitti Acked-by: David Ahern Signed-off-by: David S. Miller --- include/uapi/linux/inet_diag.h | 6 ++++++ net/ipv4/inet_diag.c | 36 +++++++++++++++++++++++++++++++--- 2 files changed, 39 insertions(+), 3 deletions(-) diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index abbd1dc5d683..5581206a08ae 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -73,6 +73,7 @@ enum { INET_DIAG_BC_S_COND, INET_DIAG_BC_D_COND, INET_DIAG_BC_DEV_COND, /* u32 ifindex */ + INET_DIAG_BC_MARK_COND, }; struct inet_diag_hostcond { @@ -82,6 +83,11 @@ struct inet_diag_hostcond { __be32 addr[0]; }; +struct inet_diag_markcond { + __u32 mark; + __u32 mask; +}; + /* Base info structure. It contains socket identity (addrs/ports/cookie) * and, alas, the information shown by netstat. */ struct inet_diag_msg { diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index a89b68c6a934..abfbe492ebfe 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -45,6 +45,7 @@ struct inet_diag_entry { u16 family; u16 userlocks; u32 ifindex; + u32 mark; }; static DEFINE_MUTEX(inet_diag_table_mutex); @@ -580,6 +581,14 @@ static int inet_diag_bc_run(const struct nlattr *_bc, yes = 0; break; } + case INET_DIAG_BC_MARK_COND: { + struct inet_diag_markcond *cond; + + cond = (struct inet_diag_markcond *)(op + 1); + if ((entry->mark & cond->mask) != cond->mark) + yes = 0; + break; + } } if (yes) { @@ -624,6 +633,12 @@ int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk) entry.dport = ntohs(inet->inet_dport); entry.ifindex = sk->sk_bound_dev_if; entry.userlocks = sk_fullsock(sk) ? sk->sk_userlocks : 0; + if (sk_fullsock(sk)) + entry.mark = sk->sk_mark; + else if (sk->sk_state == TCP_NEW_SYN_RECV) + entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark; + else + entry.mark = 0; return inet_diag_bc_run(bc, &entry); } @@ -706,8 +721,17 @@ static bool valid_port_comparison(const struct inet_diag_bc_op *op, return true; } -static int inet_diag_bc_audit(const struct nlattr *attr) +static bool valid_markcond(const struct inet_diag_bc_op *op, int len, + int *min_len) { + *min_len += sizeof(struct inet_diag_markcond); + return len >= *min_len; +} + +static int inet_diag_bc_audit(const struct nlattr *attr, + const struct sk_buff *skb) +{ + bool net_admin = netlink_net_capable(skb, CAP_NET_ADMIN); const void *bytecode, *bc; int bytecode_len, len; @@ -738,6 +762,12 @@ static int inet_diag_bc_audit(const struct nlattr *attr) if (!valid_port_comparison(bc, len, &min_len)) return -EINVAL; break; + case INET_DIAG_BC_MARK_COND: + if (!net_admin) + return -EPERM; + if (!valid_markcond(bc, len, &min_len)) + return -EINVAL; + break; case INET_DIAG_BC_AUTO: case INET_DIAG_BC_JMP: case INET_DIAG_BC_NOP: @@ -1030,7 +1060,7 @@ static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) attr = nlmsg_find_attr(nlh, hdrlen, INET_DIAG_REQ_BYTECODE); - err = inet_diag_bc_audit(attr); + err = inet_diag_bc_audit(attr, skb); if (err) return err; } @@ -1061,7 +1091,7 @@ static int inet_diag_handler_cmd(struct sk_buff *skb, struct nlmsghdr *h) attr = nlmsg_find_attr(h, hdrlen, INET_DIAG_REQ_BYTECODE); - err = inet_diag_bc_audit(attr); + err = inet_diag_bc_audit(attr, skb); if (err) return err; } From 7a95e94ce0c255327eb8acb07241a95f3250b54b Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 24 Aug 2016 13:50:03 +0000 Subject: [PATCH 0478/1715] ibmvnic: convert to use simple_open() Remove an open coded simple_open() function and replace file operations references to the function with simple_open() instead. Generated by: scripts/coccinelle/api/simple_open.cocci Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index b942108c85c1..e8625309bfb2 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -2779,12 +2779,6 @@ static void handle_control_ras_rsp(union ibmvnic_crq *crq, } } -static int ibmvnic_fw_comp_open(struct inode *inode, struct file *file) -{ - file->private_data = inode->i_private; - return 0; -} - static ssize_t trace_read(struct file *file, char __user *user_buf, size_t len, loff_t *ppos) { @@ -2836,7 +2830,7 @@ static ssize_t trace_read(struct file *file, char __user *user_buf, size_t len, static const struct file_operations trace_ops = { .owner = THIS_MODULE, - .open = ibmvnic_fw_comp_open, + .open = simple_open, .read = trace_read, }; @@ -2886,7 +2880,7 @@ static ssize_t paused_write(struct file *file, const char __user *user_buf, static const struct file_operations paused_ops = { .owner = THIS_MODULE, - .open = ibmvnic_fw_comp_open, + .open = simple_open, .read = paused_read, .write = paused_write, }; @@ -2934,7 +2928,7 @@ static ssize_t tracing_write(struct file *file, const char __user *user_buf, static const struct file_operations tracing_ops = { .owner = THIS_MODULE, - .open = ibmvnic_fw_comp_open, + .open = simple_open, .read = tracing_read, .write = tracing_write, }; @@ -2987,7 +2981,7 @@ static ssize_t error_level_write(struct file *file, const char __user *user_buf, static const struct file_operations error_level_ops = { .owner = THIS_MODULE, - .open = ibmvnic_fw_comp_open, + .open = simple_open, .read = error_level_read, .write = error_level_write, }; @@ -3038,7 +3032,7 @@ static ssize_t trace_level_write(struct file *file, const char __user *user_buf, static const struct file_operations trace_level_ops = { .owner = THIS_MODULE, - .open = ibmvnic_fw_comp_open, + .open = simple_open, .read = trace_level_read, .write = trace_level_write, }; @@ -3091,7 +3085,7 @@ static ssize_t trace_buff_size_write(struct file *file, static const struct file_operations trace_size_ops = { .owner = THIS_MODULE, - .open = ibmvnic_fw_comp_open, + .open = simple_open, .read = trace_buff_size_read, .write = trace_buff_size_write, }; From 0e87203af470d5c169cbe89df4e98ed256107038 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 24 Aug 2016 13:47:58 +0000 Subject: [PATCH 0479/1715] ibmvnic: fix error return code in ibmvnic_probe() Fix to return error code -ENOMEM from the dma_map_single error handling case instead of 0, as done elsewhere in this function. Fixes: 032c5e82847a ("Driver for IBM System i/p VNIC protocol") Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index e8625309bfb2..62454d7a062a 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -3745,6 +3745,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) if (dma_mapping_error(&dev->dev, adapter->stats_token)) { if (!firmware_has_feature(FW_FEATURE_CMO)) dev_err(&dev->dev, "Couldn't map stats buffer\n"); + rc = -ENOMEM; goto free_crq; } From bb40aca7cf153e3e2941140d3850a4b6c4205ccb Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 24 Aug 2016 15:07:26 +0000 Subject: [PATCH 0480/1715] vmxnet3: fix non static symbol warning Fixes the following sparse warning: drivers/net/vmxnet3/vmxnet3_drv.c:1645:1: warning: symbol 'vmxnet3_rq_destroy_all_rxdataring' was not declared. Should it be static? Signed-off-by: Wei Yongjun Signed-off-by: Shrikrishna Khare Signed-off-by: David S. Miller --- drivers/net/vmxnet3/vmxnet3_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index c68fe495d3f9..fc68dd4aec8f 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1639,7 +1639,7 @@ static void vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq, } } -void +static void vmxnet3_rq_destroy_all_rxdataring(struct vmxnet3_adapter *adapter) { int i; From e65c332de8a0c9f570847ec490b60effffcd5320 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 24 Aug 2016 08:50:24 -0700 Subject: [PATCH 0481/1715] tcp: md5: increment sk_drops on syn_recv state TCP MD5 mismatches do increment sk_drops counter in all states but SYN_RECV. This is very unlikely to happen in the real world, but worth adding to help diagnostics. We increase the parent (listener) sk_drops. Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 1 + net/ipv6/tcp_ipv6.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 32b048e524d6..436d978c6c39 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1602,6 +1602,7 @@ process: sk = req->rsk_listener; if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) { + sk_drops_add(sk, skb); reqsk_put(req); goto discard_it; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index e0f46439e391..ac0ed7bda406 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1409,6 +1409,7 @@ process: sk = req->rsk_listener; tcp_v6_fill_cb(skb, hdr, th); if (tcp_v6_inbound_md5_hash(sk, skb)) { + sk_drops_add(sk, skb); reqsk_put(req); goto discard_it; } From 72145a68e4ee116533df49af4b87aca0aacc179c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 24 Aug 2016 09:01:23 -0700 Subject: [PATCH 0482/1715] tcp: md5: add LINUX_MIB_TCPMD5FAILURE counter Adds SNMP counter for drops caused by MD5 mismatches. The current syslog might help, but a counter is more precise and helps monitoring. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/linux/snmp.h | 1 + net/ipv4/proc.c | 1 + net/ipv4/tcp_ipv4.c | 1 + net/ipv6/tcp_ipv6.c | 1 + 4 files changed, 4 insertions(+) diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 25a9ad8bcef1..e7a31f830690 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -235,6 +235,7 @@ enum LINUX_MIB_TCPSPURIOUSRTOS, /* TCPSpuriousRTOs */ LINUX_MIB_TCPMD5NOTFOUND, /* TCPMD5NotFound */ LINUX_MIB_TCPMD5UNEXPECTED, /* TCPMD5Unexpected */ + LINUX_MIB_TCPMD5FAILURE, /* TCPMD5Failure */ LINUX_MIB_SACKSHIFTED, LINUX_MIB_SACKMERGED, LINUX_MIB_SACKSHIFTFALLBACK, diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 9f665b63a927..1ed015e4bc79 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -257,6 +257,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPSpuriousRTOs", LINUX_MIB_TCPSPURIOUSRTOS), SNMP_MIB_ITEM("TCPMD5NotFound", LINUX_MIB_TCPMD5NOTFOUND), SNMP_MIB_ITEM("TCPMD5Unexpected", LINUX_MIB_TCPMD5UNEXPECTED), + SNMP_MIB_ITEM("TCPMD5Failure", LINUX_MIB_TCPMD5FAILURE), SNMP_MIB_ITEM("TCPSackShifted", LINUX_MIB_SACKSHIFTED), SNMP_MIB_ITEM("TCPSackMerged", LINUX_MIB_SACKMERGED), SNMP_MIB_ITEM("TCPSackShiftFallback", LINUX_MIB_SACKSHIFTFALLBACK), diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 436d978c6c39..ad41e8ecf796 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1169,6 +1169,7 @@ static bool tcp_v4_inbound_md5_hash(const struct sock *sk, NULL, skb); if (genhash || memcmp(hash_location, newhash, 16) != 0) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE); net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n", &iph->saddr, ntohs(th->source), &iph->daddr, ntohs(th->dest), diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index ac0ed7bda406..e4f55683af31 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -671,6 +671,7 @@ static bool tcp_v6_inbound_md5_hash(const struct sock *sk, NULL, skb); if (genhash || memcmp(hash_location, newhash, 16) != 0) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE); net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n", genhash ? "failed" : "mismatch", &ip6h->saddr, ntohs(th->source), From eb60a8ddf3c38959cc73821bec5335bed85e0200 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 24 Aug 2016 10:23:34 -0700 Subject: [PATCH 0483/1715] net: minor optimization in qdisc_qstats_cpu_drop() per_cpu_inc() is faster (at least on x86) than per_cpu_ptr(xxx)++; Signed-off-by: Eric Dumazet Acked-by: John Fastabend Signed-off-by: David S. Miller --- include/net/sch_generic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 0d501779cc68..52a2015667b4 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -592,7 +592,7 @@ static inline void qdisc_qstats_drop(struct Qdisc *sch) static inline void qdisc_qstats_cpu_drop(struct Qdisc *sch) { - qstats_drop_inc(this_cpu_ptr(sch->cpu_qstats)); + this_cpu_inc(sch->cpu_qstats->drops); } static inline void qdisc_qstats_overlimit(struct Qdisc *sch) From 9a0a5c4cb1af98b625dcefd72e987ca4929db71d Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 24 Aug 2016 14:21:41 -0700 Subject: [PATCH 0484/1715] net: systemport: Fix ordering in intrl2_*_mask_clear macro Since we keep shadow copies of which interrupt sources are enabled through the intrl2_*_mask_{set,clear} macros, make sure that the ordering in which we do these two operations: update the copy, then unmask the register is correct. This is not currently a problem because we actually do not use them, but we will in a subsequent patch optimizing register accesses, so better be safe here. Fixes: 80105befdb4b ("net: systemport: add Broadcom SYSTEMPORT Ethernet MAC driver") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcmsysport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index b2d30863caeb..2059911014db 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -58,8 +58,8 @@ BCM_SYSPORT_IO_MACRO(topctrl, SYS_PORT_TOPCTRL_OFFSET); static inline void intrl2_##which##_mask_clear(struct bcm_sysport_priv *priv, \ u32 mask) \ { \ - intrl2_##which##_writel(priv, mask, INTRL2_CPU_MASK_CLEAR); \ priv->irq##which##_mask &= ~(mask); \ + intrl2_##which##_writel(priv, mask, INTRL2_CPU_MASK_CLEAR); \ } \ static inline void intrl2_##which##_mask_set(struct bcm_sysport_priv *priv, \ u32 mask) \ From 5ca8cc5bf11faed257c762018aea9106d529232f Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 24 Aug 2016 12:31:31 +0200 Subject: [PATCH 0485/1715] rhashtable: add rhashtable_lookup_get_insert_key() This patch modifies __rhashtable_insert_fast() so it returns the existing object that clashes with the one that you want to insert. In case the object is successfully inserted, NULL is returned. Otherwise, you get an error via ERR_PTR(). This patch adapts the existing callers of __rhashtable_insert_fast() so they handle this new logic, and it adds a new rhashtable_lookup_get_insert_key() interface to fetch this existing object. nf_tables needs this change to improve handling of EEXIST cases via honoring the NLM_F_EXCL flag and by checking if the data part of the mapping matches what we have. Cc: Herbert Xu Cc: Thomas Graf Signed-off-by: Pablo Neira Ayuso Acked-by: Herbert Xu --- include/linux/rhashtable.h | 70 +++++++++++++++++++++++++++++++------- lib/rhashtable.c | 10 ++++-- 2 files changed, 64 insertions(+), 16 deletions(-) diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 3eef0802a0cd..26b7a059c65e 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -343,7 +343,8 @@ int rhashtable_init(struct rhashtable *ht, struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht, const void *key, struct rhash_head *obj, - struct bucket_table *old_tbl); + struct bucket_table *old_tbl, + void **data); int rhashtable_insert_rehash(struct rhashtable *ht, struct bucket_table *tbl); int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter, @@ -563,8 +564,11 @@ restart: return NULL; } -/* Internal function, please use rhashtable_insert_fast() instead */ -static inline int __rhashtable_insert_fast( +/* Internal function, please use rhashtable_insert_fast() instead. This + * function returns the existing element already in hashes in there is a clash, + * otherwise it returns an error via ERR_PTR(). + */ +static inline void *__rhashtable_insert_fast( struct rhashtable *ht, const void *key, struct rhash_head *obj, const struct rhashtable_params params) { @@ -577,6 +581,7 @@ static inline int __rhashtable_insert_fast( spinlock_t *lock; unsigned int elasticity; unsigned int hash; + void *data = NULL; int err; restart: @@ -601,11 +606,14 @@ restart: new_tbl = rht_dereference_rcu(tbl->future_tbl, ht); if (unlikely(new_tbl)) { - tbl = rhashtable_insert_slow(ht, key, obj, new_tbl); + tbl = rhashtable_insert_slow(ht, key, obj, new_tbl, &data); if (!IS_ERR_OR_NULL(tbl)) goto slow_path; err = PTR_ERR(tbl); + if (err == -EEXIST) + err = 0; + goto out; } @@ -619,25 +627,25 @@ slow_path: err = rhashtable_insert_rehash(ht, tbl); rcu_read_unlock(); if (err) - return err; + return ERR_PTR(err); goto restart; } - err = -EEXIST; + err = 0; elasticity = ht->elasticity; rht_for_each(head, tbl, hash) { if (key && unlikely(!(params.obj_cmpfn ? params.obj_cmpfn(&arg, rht_obj(ht, head)) : - rhashtable_compare(&arg, rht_obj(ht, head))))) + rhashtable_compare(&arg, rht_obj(ht, head))))) { + data = rht_obj(ht, head); goto out; + } if (!--elasticity) goto slow_path; } - err = 0; - head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); RCU_INIT_POINTER(obj->next, head); @@ -652,7 +660,7 @@ out: spin_unlock_bh(lock); rcu_read_unlock(); - return err; + return err ? ERR_PTR(err) : data; } /** @@ -675,7 +683,13 @@ static inline int rhashtable_insert_fast( struct rhashtable *ht, struct rhash_head *obj, const struct rhashtable_params params) { - return __rhashtable_insert_fast(ht, NULL, obj, params); + void *ret; + + ret = __rhashtable_insert_fast(ht, NULL, obj, params); + if (IS_ERR(ret)) + return PTR_ERR(ret); + + return ret == NULL ? 0 : -EEXIST; } /** @@ -704,11 +718,15 @@ static inline int rhashtable_lookup_insert_fast( const struct rhashtable_params params) { const char *key = rht_obj(ht, obj); + void *ret; BUG_ON(ht->p.obj_hashfn); - return __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, - params); + ret = __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params); + if (IS_ERR(ret)) + return PTR_ERR(ret); + + return ret == NULL ? 0 : -EEXIST; } /** @@ -736,6 +754,32 @@ static inline int rhashtable_lookup_insert_fast( static inline int rhashtable_lookup_insert_key( struct rhashtable *ht, const void *key, struct rhash_head *obj, const struct rhashtable_params params) +{ + void *ret; + + BUG_ON(!ht->p.obj_hashfn || !key); + + ret = __rhashtable_insert_fast(ht, key, obj, params); + if (IS_ERR(ret)) + return PTR_ERR(ret); + + return ret == NULL ? 0 : -EEXIST; +} + +/** + * rhashtable_lookup_get_insert_key - lookup and insert object into hash table + * @ht: hash table + * @obj: pointer to hash head inside object + * @params: hash table parameters + * @data: pointer to element data already in hashes + * + * Just like rhashtable_lookup_insert_key(), but this function returns the + * object if it exists, NULL if it does not and the insertion was successful, + * and an ERR_PTR otherwise. + */ +static inline void *rhashtable_lookup_get_insert_key( + struct rhashtable *ht, const void *key, struct rhash_head *obj, + const struct rhashtable_params params) { BUG_ON(!ht->p.obj_hashfn || !key); diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 5d845ffd7982..7a940d92f17e 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -438,7 +438,8 @@ EXPORT_SYMBOL_GPL(rhashtable_insert_rehash); struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht, const void *key, struct rhash_head *obj, - struct bucket_table *tbl) + struct bucket_table *tbl, + void **data) { struct rhash_head *head; unsigned int hash; @@ -449,8 +450,11 @@ struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht, spin_lock_nested(rht_bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING); err = -EEXIST; - if (key && rhashtable_lookup_fast(ht, key, ht->p)) - goto exit; + if (key) { + *data = rhashtable_lookup_fast(ht, key, ht->p); + if (*data) + goto exit; + } err = -E2BIG; if (unlikely(rht_grow_above_max(ht, tbl))) From c016c7e45ddfa5085b35b644e659ec014969740d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 24 Aug 2016 12:41:54 +0200 Subject: [PATCH 0486/1715] netfilter: nf_tables: honor NLM_F_EXCL flag in set element insertion If the NLM_F_EXCL flag is set, then new elements that clash with an existing one return EEXIST. In case you try to add an element whose data area differs from what we have, then this returns EBUSY. If no flag is specified at all, then this returns success to userspace. This patch also update the set insert operation so we can fetch the existing element that clashes with the one you want to add, we need this to make sure the element data doesn't differ. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 3 ++- net/netfilter/nf_tables_api.c | 20 +++++++++++++++----- net/netfilter/nft_set_hash.c | 15 ++++++++++++--- net/netfilter/nft_set_rbtree.c | 12 ++++++++---- 4 files changed, 37 insertions(+), 13 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index f2f13399ce44..8972468bc94b 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -251,7 +251,8 @@ struct nft_set_ops { int (*insert)(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem); + const struct nft_set_elem *elem, + struct nft_set_ext **ext); void (*activate)(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 221d27f09623..bd9715e5ff26 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3483,12 +3483,12 @@ static int nft_setelem_parse_flags(const struct nft_set *set, } static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, - const struct nlattr *attr) + const struct nlattr *attr, u32 nlmsg_flags) { struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; struct nft_data_desc d1, d2; struct nft_set_ext_tmpl tmpl; - struct nft_set_ext *ext; + struct nft_set_ext *ext, *ext2; struct nft_set_elem elem; struct nft_set_binding *binding; struct nft_userdata *udata; @@ -3615,9 +3615,19 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, goto err4; ext->genmask = nft_genmask_cur(ctx->net) | NFT_SET_ELEM_BUSY_MASK; - err = set->ops->insert(ctx->net, set, &elem); - if (err < 0) + err = set->ops->insert(ctx->net, set, &elem, &ext2); + if (err) { + if (err == -EEXIST) { + if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) && + nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) && + memcmp(nft_set_ext_data(ext), + nft_set_ext_data(ext2), set->dlen) != 0) + err = -EBUSY; + else if (!(nlmsg_flags & NLM_F_EXCL)) + err = 0; + } goto err5; + } nft_trans_elem(trans) = elem; list_add_tail(&trans->list, &ctx->net->nft.commit_list); @@ -3673,7 +3683,7 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk, !atomic_add_unless(&set->nelems, 1, set->size + set->ndeact)) return -ENFILE; - err = nft_add_set_elem(&ctx, set, attr); + err = nft_add_set_elem(&ctx, set, attr, nlh->nlmsg_flags); if (err < 0) { atomic_dec(&set->nelems); break; diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 564fa7929ed5..3794cb2fc788 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -126,7 +126,8 @@ err1: } static int nft_hash_insert(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem) + const struct nft_set_elem *elem, + struct nft_set_ext **ext) { struct nft_hash *priv = nft_set_priv(set); struct nft_hash_elem *he = elem->priv; @@ -135,9 +136,17 @@ static int nft_hash_insert(const struct net *net, const struct nft_set *set, .set = set, .key = elem->key.val.data, }; + struct nft_hash_elem *prev; - return rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node, - nft_hash_params); + prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node, + nft_hash_params); + if (IS_ERR(prev)) + return PTR_ERR(prev); + if (prev) { + *ext = &prev->ext; + return -EEXIST; + } + return 0; } static void nft_hash_activate(const struct net *net, const struct nft_set *set, diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 6473936d05c6..038682d48261 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -94,7 +94,8 @@ out: } static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, - struct nft_rbtree_elem *new) + struct nft_rbtree_elem *new, + struct nft_set_ext **ext) { struct nft_rbtree *priv = nft_set_priv(set); u8 genmask = nft_genmask_next(net); @@ -122,8 +123,10 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, else if (!nft_rbtree_interval_end(rbe) && nft_rbtree_interval_end(new)) p = &parent->rb_right; - else + else { + *ext = &rbe->ext; return -EEXIST; + } } } } @@ -133,13 +136,14 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, } static int nft_rbtree_insert(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem) + const struct nft_set_elem *elem, + struct nft_set_ext **ext) { struct nft_rbtree_elem *rbe = elem->priv; int err; spin_lock_bh(&nft_rbtree_lock); - err = __nft_rbtree_insert(net, set, rbe); + err = __nft_rbtree_insert(net, set, rbe, ext); spin_unlock_bh(&nft_rbtree_lock); return err; From 7073b16f3dff83df1b0794262386abb869e8d180 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 26 Aug 2016 13:42:17 +0200 Subject: [PATCH 0487/1715] netfilter: nf_tables: Use nla_put_be32() to dump immediate parameters nft_dump_register() should only be used with registers, not with immediates. Fixes: cb1b69b0b15b ("netfilter: nf_tables: add hash expression") Fixes: 91dbc6be0a62("netfilter: nf_tables: add number generator expression") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_hash.c | 6 +++--- net/netfilter/nft_numgen.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index e090aeef3a78..764251d31e46 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -88,11 +88,11 @@ static int nft_hash_dump(struct sk_buff *skb, goto nla_put_failure; if (nft_dump_register(skb, NFTA_HASH_DREG, priv->dreg)) goto nla_put_failure; - if (nft_dump_register(skb, NFTA_HASH_LEN, priv->len)) + if (nla_put_be32(skb, NFTA_HASH_LEN, htonl(priv->len))) goto nla_put_failure; - if (nft_dump_register(skb, NFTA_HASH_MODULUS, priv->modulus)) + if (nla_put_be32(skb, NFTA_HASH_MODULUS, htonl(priv->modulus))) goto nla_put_failure; - if (nft_dump_register(skb, NFTA_HASH_SEED, priv->seed)) + if (nla_put_be32(skb, NFTA_HASH_SEED, htonl(priv->seed))) goto nla_put_failure; return 0; diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c index 176e26d5bbd0..294745ecb0fc 100644 --- a/net/netfilter/nft_numgen.c +++ b/net/netfilter/nft_numgen.c @@ -68,9 +68,9 @@ static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg, { if (nft_dump_register(skb, NFTA_NG_DREG, dreg)) goto nla_put_failure; - if (nft_dump_register(skb, NFTA_NG_UNTIL, until)) + if (nla_put_be32(skb, NFTA_NG_UNTIL, htonl(until))) goto nla_put_failure; - if (nft_dump_register(skb, NFTA_NG_TYPE, type)) + if (nla_put_be32(skb, NFTA_NG_TYPE, htonl(type))) goto nla_put_failure; return 0; From c80fafbbb59ef9924962f83aac85531039395b18 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 25 Aug 2016 13:21:49 +0800 Subject: [PATCH 0488/1715] veth: sctp: add NETIF_F_SCTP_CRC to device features Commit b17c706987fa ("loopback: sctp: add NETIF_F_SCTP_CSUM to device features") added NETIF_F_SCTP_CRC to device features for lo device to improve the performance of sctp over lo. This patch is to add NETIF_F_SCTP_CRC to device features for veth to improve the performance of sctp over veth. Before this patch: ip netns exec cs_client netperf -H 10.167.12.2 -t SCTP_STREAM -- -m 10K Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 212992 212992 10240 10.00 1117.16 After this patch: ip netns exec cs_client netperf -H 10.167.12.2 -t SCTP_STREAM -- -m 10K Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 212992 212992 10240 10.20 1415.22 Tested-by: Li Shuang Signed-off-by: Xin Long Signed-off-by: David S. Miller --- drivers/net/veth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index f37a6e61d4ad..4bda502254fb 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -313,7 +313,7 @@ static const struct net_device_ops veth_netdev_ops = { }; #define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \ - NETIF_F_RXCSUM | NETIF_F_HIGHDMA | \ + NETIF_F_RXCSUM | NETIF_F_SCTP_CRC | NETIF_F_HIGHDMA | \ NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL | \ NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | \ NETIF_F_HW_VLAN_STAG_TX | NETIF_F_HW_VLAN_STAG_RX ) From 72f4af4e4706a07727765bc6b7bdace4b1543543 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Thu, 25 Aug 2016 14:27:51 +0200 Subject: [PATCH 0489/1715] net: bridge: export also pvid flag in the xstats flags When I added support to export the vlan entry flags via xstats I forgot to add support for the pvid since it is manually matched, so check if the entry matches the vlan_group's pvid and set the flag appropriately. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_netlink.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 872d4c0deb59..190a5bc00f4a 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1313,6 +1313,9 @@ static int br_fill_linkxstats(struct sk_buff *skb, return -EMSGSIZE; if (vg) { + u16 pvid; + + pvid = br_get_pvid(vg); list_for_each_entry(v, &vg->vlan_list, vlist) { struct bridge_vlan_xstats vxi; struct br_vlan_stats stats; @@ -1322,6 +1325,8 @@ static int br_fill_linkxstats(struct sk_buff *skb, memset(&vxi, 0, sizeof(vxi)); vxi.vid = v->vid; vxi.flags = v->flags; + if (v->vid == pvid) + vxi.flags |= BRIDGE_VLAN_INFO_PVID; br_vlan_get_stats(v, &stats); vxi.rx_bytes = stats.rx_bytes; vxi.rx_packets = stats.rx_packets; From 145dd5f9c88f6ee645662df0be003e8f04bdae93 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 25 Aug 2016 15:58:44 +0200 Subject: [PATCH 0490/1715] net: flush the softnet backlog in process context Currently in process_backlog(), the process_queue dequeuing is performed with local IRQ disabled, to protect against flush_backlog(), which runs in hard IRQ context. This patch moves the flush operation to a work queue and runs the callback with bottom half disabled to protect the process_queue against dequeuing. Since process_queue is now always manipulated in bottom half context, the irq disable/enable pair around the dequeue operation are removed. To keep the flush time as low as possible, the flush works are scheduled on all online cpu simultaneously, using the high priority work-queue and statically allocated, per cpu, work structs. Overall this change increases the time required to destroy a device to improve slightly the packets reinjection performances. Acked-by: Hannes Frederic Sowa Signed-off-by: Paolo Abeni Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 76 ++++++++++++++++++++++++++++++++++---------------- 1 file changed, 52 insertions(+), 24 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index a75df861fb5e..7feae74ca928 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4292,15 +4292,25 @@ int netif_receive_skb(struct sk_buff *skb) } EXPORT_SYMBOL(netif_receive_skb); -/* Network device is going away, flush any packets still pending - * Called with irqs disabled. - */ -static void flush_backlog(void *arg) -{ - struct net_device *dev = arg; - struct softnet_data *sd = this_cpu_ptr(&softnet_data); - struct sk_buff *skb, *tmp; +struct flush_work { + struct net_device *dev; + struct work_struct work; +}; +DEFINE_PER_CPU(struct flush_work, flush_works); + +/* Network device is going away, flush any packets still pending */ +static void flush_backlog(struct work_struct *work) +{ + struct flush_work *flush = container_of(work, typeof(*flush), work); + struct net_device *dev = flush->dev; + struct sk_buff *skb, *tmp; + struct softnet_data *sd; + + local_bh_disable(); + sd = this_cpu_ptr(&softnet_data); + + local_irq_disable(); rps_lock(sd); skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) { if (skb->dev == dev) { @@ -4310,6 +4320,7 @@ static void flush_backlog(void *arg) } } rps_unlock(sd); + local_irq_enable(); skb_queue_walk_safe(&sd->process_queue, skb, tmp) { if (skb->dev == dev) { @@ -4318,6 +4329,27 @@ static void flush_backlog(void *arg) input_queue_head_incr(sd); } } + local_bh_enable(); +} + +static void flush_all_backlogs(struct net_device *dev) +{ + unsigned int cpu; + + get_online_cpus(); + + for_each_online_cpu(cpu) { + struct flush_work *flush = per_cpu_ptr(&flush_works, cpu); + + INIT_WORK(&flush->work, flush_backlog); + flush->dev = dev; + queue_work_on(cpu, system_highpri_wq, &flush->work); + } + + for_each_online_cpu(cpu) + flush_work(&per_cpu_ptr(&flush_works, cpu)->work); + + put_online_cpus(); } static int napi_gro_complete(struct sk_buff *skb) @@ -4805,8 +4837,9 @@ static bool sd_has_rps_ipi_waiting(struct softnet_data *sd) static int process_backlog(struct napi_struct *napi, int quota) { - int work = 0; struct softnet_data *sd = container_of(napi, struct softnet_data, backlog); + bool again = true; + int work = 0; /* Check if we have pending ipi, its better to send them now, * not waiting net_rx_action() end. @@ -4817,23 +4850,20 @@ static int process_backlog(struct napi_struct *napi, int quota) } napi->weight = weight_p; - local_irq_disable(); - while (1) { + while (again) { struct sk_buff *skb; while ((skb = __skb_dequeue(&sd->process_queue))) { rcu_read_lock(); - local_irq_enable(); __netif_receive_skb(skb); rcu_read_unlock(); - local_irq_disable(); input_queue_head_incr(sd); - if (++work >= quota) { - local_irq_enable(); + if (++work >= quota) return work; - } + } + local_irq_disable(); rps_lock(sd); if (skb_queue_empty(&sd->input_pkt_queue)) { /* @@ -4845,16 +4875,14 @@ static int process_backlog(struct napi_struct *napi, int quota) * and we dont need an smp_mb() memory barrier. */ napi->state = 0; - rps_unlock(sd); - - break; + again = false; + } else { + skb_queue_splice_tail_init(&sd->input_pkt_queue, + &sd->process_queue); } - - skb_queue_splice_tail_init(&sd->input_pkt_queue, - &sd->process_queue); rps_unlock(sd); + local_irq_enable(); } - local_irq_enable(); return work; } @@ -6707,7 +6735,7 @@ static void rollback_registered_many(struct list_head *head) unlist_netdevice(dev); dev->reg_state = NETREG_UNREGISTERING; - on_each_cpu(flush_backlog, dev, 1); + flush_all_backlogs(dev); } synchronize_net(); From 2a313cdf1e6e4cc8cc3f16f976e1abfbdd0626fa Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Thu, 25 Aug 2016 16:46:44 +0200 Subject: [PATCH 0491/1715] devlink: remove unused priv_size Remove unused and useless priv_size member from struct devlink_ops. Cc: Jiri Pirko Signed-off-by: Ivan Vecera Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/devlink.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/net/devlink.h b/include/net/devlink.h index c99ffe8cef3c..211bd3c37028 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -50,7 +50,6 @@ struct devlink_sb_pool_info { }; struct devlink_ops { - size_t priv_size; int (*port_type_set)(struct devlink_port *devlink_port, enum devlink_port_type port_type); int (*port_split)(struct devlink *devlink, unsigned int port_index, From 5c326ab49e5ee014ba5314c076fe9b93fd8b0406 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 25 Aug 2016 18:42:36 +0200 Subject: [PATCH 0492/1715] switchdev: Support parent ID comparison for stacked devices switchdev_port_same_parent_id() currently expects port netdevs, but we need it to support stacked devices in the next patch, so drop the NO_RECURSE flag. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/switchdev/switchdev.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 9e9012956993..2c683f24d557 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -1292,12 +1292,10 @@ bool switchdev_port_same_parent_id(struct net_device *a, struct switchdev_attr a_attr = { .orig_dev = a, .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, - .flags = SWITCHDEV_F_NO_RECURSE, }; struct switchdev_attr b_attr = { .orig_dev = b, .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, - .flags = SWITCHDEV_F_NO_RECURSE, }; if (switchdev_port_attr_get(a, &a_attr) || From 6bc506b4fb065eac3d89ca1ce37082e174493d9e Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 25 Aug 2016 18:42:37 +0200 Subject: [PATCH 0493/1715] bridge: switchdev: Add forward mark support for stacked devices switchdev_port_fwd_mark_set() is used to set the 'offload_fwd_mark' of port netdevs so that packets being flooded by the device won't be flooded twice. It works by assigning a unique identifier (the ifindex of the first bridge port) to bridge ports sharing the same parent ID. This prevents packets from being flooded twice by the same switch, but will flood packets through bridge ports belonging to a different switch. This method is problematic when stacked devices are taken into account, such as VLANs. In such cases, a physical port netdev can have upper devices being members in two different bridges, thus requiring two different 'offload_fwd_mark's to be configured on the port netdev, which is impossible. The main problem is that packet and netdev marking is performed at the physical netdev level, whereas flooding occurs between bridge ports, which are not necessarily port netdevs. Instead, packet and netdev marking should really be done in the bridge driver with the switch driver only telling it which packets it already forwarded. The bridge driver will mark such packets using the mark assigned to the ingress bridge port and will prevent the packet from being forwarded through any bridge port sharing the same mark (i.e. having the same parent ID). Remove the current switchdev 'offload_fwd_mark' implementation and instead implement the proposed method. In addition, make rocker - the sole user of the mark - use the proposed method. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- Documentation/networking/switchdev.txt | 13 +--- drivers/net/ethernet/rocker/rocker_main.c | 2 +- drivers/net/ethernet/rocker/rocker_ofdpa.c | 4 - include/linux/netdevice.h | 5 -- include/linux/skbuff.h | 13 ++-- include/net/switchdev.h | 6 -- net/bridge/Makefile | 2 + net/bridge/br_forward.c | 3 +- net/bridge/br_if.c | 10 ++- net/bridge/br_input.c | 2 + net/bridge/br_private.h | 37 ++++++++++ net/bridge/br_switchdev.c | 57 +++++++++++++++ net/core/dev.c | 10 --- net/switchdev/switchdev.c | 85 ---------------------- 14 files changed, 117 insertions(+), 132 deletions(-) create mode 100644 net/bridge/br_switchdev.c diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt index 31c39115834d..44235e83799b 100644 --- a/Documentation/networking/switchdev.txt +++ b/Documentation/networking/switchdev.txt @@ -283,15 +283,10 @@ be sent to the port netdev for processing by the bridge driver. The bridge should not reflood the packet to the same ports the device flooded, otherwise there will be duplicate packets on the wire. -To avoid duplicate packets, the device/driver should mark a packet as already -forwarded using skb->offload_fwd_mark. The same mark is set on the device -ports in the domain using dev->offload_fwd_mark. If the skb->offload_fwd_mark -is non-zero and matches the forwarding egress port's dev->skb_mark, the kernel -will drop the skb right before transmit on the egress port, with the -understanding that the device already forwarded the packet on same egress port. -The driver can use switchdev_port_fwd_mark_set() to set a globally unique mark -for port's dev->offload_fwd_mark, based on the port's parent ID (switch ID) and -a group ifindex. +To avoid duplicate packets, the switch driver should mark a packet as already +forwarded by setting the skb->offload_fwd_mark bit. The bridge driver will mark +the skb using the ingress bridge port's mark and prevent it from being forwarded +through any bridge port with the same mark. It is possible for the switch device to not handle flooding and push the packets up to the bridge driver for flooding. This is not ideal as the number diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index f0b09b05ed3f..1f0c08602eba 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -2412,7 +2412,7 @@ static int rocker_port_rx_proc(const struct rocker *rocker, skb->protocol = eth_type_trans(skb, rocker_port->dev); if (rx_flags & ROCKER_RX_FLAGS_FWD_OFFLOAD) - skb->offload_fwd_mark = rocker_port->dev->offload_fwd_mark; + skb->offload_fwd_mark = 1; rocker_port->dev->stats.rx_packets++; rocker_port->dev->stats.rx_bytes += skb->len; diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c index 1ca796316173..fcad907baecf 100644 --- a/drivers/net/ethernet/rocker/rocker_ofdpa.c +++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c @@ -2558,7 +2558,6 @@ static int ofdpa_port_init(struct rocker_port *rocker_port) struct ofdpa_port *ofdpa_port = rocker_port->wpriv; int err; - switchdev_port_fwd_mark_set(ofdpa_port->dev, NULL, false); rocker_port_set_learning(rocker_port, !!(ofdpa_port->brport_flags & BR_LEARNING)); @@ -2817,7 +2816,6 @@ static int ofdpa_port_bridge_join(struct ofdpa_port *ofdpa_port, ofdpa_port_internal_vlan_id_get(ofdpa_port, bridge->ifindex); ofdpa_port->bridge_dev = bridge; - switchdev_port_fwd_mark_set(ofdpa_port->dev, bridge, true); return ofdpa_port_vlan_add(ofdpa_port, NULL, OFDPA_UNTAGGED_VID, 0); } @@ -2836,8 +2834,6 @@ static int ofdpa_port_bridge_leave(struct ofdpa_port *ofdpa_port) ofdpa_port_internal_vlan_id_get(ofdpa_port, ofdpa_port->dev->ifindex); - switchdev_port_fwd_mark_set(ofdpa_port->dev, ofdpa_port->bridge_dev, - false); ofdpa_port->bridge_dev = NULL; err = ofdpa_port_vlan_add(ofdpa_port, NULL, OFDPA_UNTAGGED_VID, 0); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 794bb0733799..d122be9345c7 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1562,8 +1562,6 @@ enum netdev_priv_flags { * * @xps_maps: XXX: need comments on this one * - * @offload_fwd_mark: Offload device fwding mark - * * @watchdog_timeo: Represents the timeout that is used by * the watchdog (see dev_watchdog()) * @watchdog_timer: List of timers @@ -1814,9 +1812,6 @@ struct net_device { #ifdef CONFIG_NET_CLS_ACT struct tcf_proto __rcu *egress_cl_list; #endif -#ifdef CONFIG_NET_SWITCHDEV - u32 offload_fwd_mark; -#endif /* These may be needed for future network-power-down code. */ struct timer_list watchdog_timer; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7047448e8129..cfb7219be665 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -612,7 +612,6 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1, * @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS * @napi_id: id of the NAPI struct this skb came from * @secmark: security marking - * @offload_fwd_mark: fwding offload mark * @mark: Generic packet mark * @vlan_proto: vlan encapsulation protocol * @vlan_tci: vlan tag control information @@ -730,7 +729,10 @@ struct sk_buff { __u8 ipvs_property:1; __u8 inner_protocol_type:1; __u8 remcsum_offload:1; - /* 3 or 5 bit hole */ +#ifdef CONFIG_NET_SWITCHDEV + __u8 offload_fwd_mark:1; +#endif + /* 2, 4 or 5 bit hole */ #ifdef CONFIG_NET_SCHED __u16 tc_index; /* traffic control index */ @@ -757,14 +759,9 @@ struct sk_buff { unsigned int sender_cpu; }; #endif - union { #ifdef CONFIG_NETWORK_SECMARK - __u32 secmark; + __u32 secmark; #endif -#ifdef CONFIG_NET_SWITCHDEV - __u32 offload_fwd_mark; -#endif - }; union { __u32 mark; diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 62f6a967a1b7..82f5e0462021 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -347,12 +347,6 @@ static inline int switchdev_port_fdb_dump(struct sk_buff *skb, return idx; } -static inline void switchdev_port_fwd_mark_set(struct net_device *dev, - struct net_device *group_dev, - bool joining) -{ -} - static inline bool switchdev_port_same_parent_id(struct net_device *a, struct net_device *b) { diff --git a/net/bridge/Makefile b/net/bridge/Makefile index a1cda5d4718d..0aefc011b668 100644 --- a/net/bridge/Makefile +++ b/net/bridge/Makefile @@ -20,4 +20,6 @@ bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o bridge-$(CONFIG_BRIDGE_VLAN_FILTERING) += br_vlan.o +bridge-$(CONFIG_NET_SWITCHDEV) += br_switchdev.o + obj-$(CONFIG_NETFILTER) += netfilter/ diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 63a83d8d7da3..32a02de39cd2 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -29,7 +29,8 @@ static inline int should_deliver(const struct net_bridge_port *p, vg = nbp_vlan_group_rcu(p); return ((p->flags & BR_HAIRPIN_MODE) || skb->dev != p->dev) && - br_allowed_egress(vg, skb) && p->state == BR_STATE_FORWARDING; + br_allowed_egress(vg, skb) && p->state == BR_STATE_FORWARDING && + nbp_switchdev_allowed_egress(p, skb); } int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index f2fede05d32c..1da3221845f1 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -545,6 +545,10 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) if (err) goto err5; + err = nbp_switchdev_mark_set(p); + if (err) + goto err6; + dev_disable_lro(dev); list_add_rcu(&p->list, &br->port_list); @@ -566,7 +570,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) err = nbp_vlan_init(p); if (err) { netdev_err(dev, "failed to initialize vlan filtering on this port\n"); - goto err6; + goto err7; } spin_lock_bh(&br->lock); @@ -589,12 +593,12 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) return 0; -err6: +err7: list_del_rcu(&p->list); br_fdb_delete_by_port(br, p, 0, 1); nbp_update_port_count(br); +err6: netdev_upper_dev_unlink(dev, br->dev); - err5: dev->priv_flags &= ~IFF_BRIDGE_PORT; netdev_rx_handler_unregister(dev); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 8e486203d133..3132cfc80e9d 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -145,6 +145,8 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb if (!br_allowed_ingress(p->br, nbp_vlan_group_rcu(p), skb, &vid)) goto out; + nbp_switchdev_frame_mark(p, skb); + /* insert into forwarding database after filtering to avoid spoofing */ br = p->br; if (p->flags & BR_LEARNING) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index aac2a6e6b008..2379b2b865c9 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -251,6 +251,9 @@ struct net_bridge_port #ifdef CONFIG_BRIDGE_VLAN_FILTERING struct net_bridge_vlan_group __rcu *vlgrp; #endif +#ifdef CONFIG_NET_SWITCHDEV + int offload_fwd_mark; +#endif }; #define br_auto_port(p) ((p)->flags & BR_AUTO_MASK) @@ -359,6 +362,11 @@ struct net_bridge struct timer_list gc_timer; struct kobject *ifobj; u32 auto_cnt; + +#ifdef CONFIG_NET_SWITCHDEV + int offload_fwd_mark; +#endif + #ifdef CONFIG_BRIDGE_VLAN_FILTERING struct net_bridge_vlan_group __rcu *vlgrp; u8 vlan_enabled; @@ -381,6 +389,10 @@ struct br_input_skb_cb { #ifdef CONFIG_BRIDGE_VLAN_FILTERING bool vlan_filtered; #endif + +#ifdef CONFIG_NET_SWITCHDEV + int offload_fwd_mark; +#endif }; #define BR_INPUT_SKB_CB(__skb) ((struct br_input_skb_cb *)(__skb)->cb) @@ -1034,4 +1046,29 @@ static inline int br_sysfs_addbr(struct net_device *dev) { return 0; } static inline void br_sysfs_delbr(struct net_device *dev) { return; } #endif /* CONFIG_SYSFS */ +/* br_switchdev.c */ +#ifdef CONFIG_NET_SWITCHDEV +int nbp_switchdev_mark_set(struct net_bridge_port *p); +void nbp_switchdev_frame_mark(const struct net_bridge_port *p, + struct sk_buff *skb); +bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p, + const struct sk_buff *skb); +#else +static inline int nbp_switchdev_mark_set(struct net_bridge_port *p) +{ + return 0; +} + +static inline void nbp_switchdev_frame_mark(const struct net_bridge_port *p, + struct sk_buff *skb) +{ +} + +static inline bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p, + const struct sk_buff *skb) +{ + return true; +} +#endif /* CONFIG_NET_SWITCHDEV */ + #endif diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c new file mode 100644 index 000000000000..f4097b900de1 --- /dev/null +++ b/net/bridge/br_switchdev.c @@ -0,0 +1,57 @@ +#include +#include +#include +#include +#include +#include + +#include "br_private.h" + +static int br_switchdev_mark_get(struct net_bridge *br, struct net_device *dev) +{ + struct net_bridge_port *p; + + /* dev is yet to be added to the port list. */ + list_for_each_entry(p, &br->port_list, list) { + if (switchdev_port_same_parent_id(dev, p->dev)) + return p->offload_fwd_mark; + } + + return ++br->offload_fwd_mark; +} + +int nbp_switchdev_mark_set(struct net_bridge_port *p) +{ + struct switchdev_attr attr = { + .orig_dev = p->dev, + .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, + }; + int err; + + ASSERT_RTNL(); + + err = switchdev_port_attr_get(p->dev, &attr); + if (err) { + if (err == -EOPNOTSUPP) + return 0; + return err; + } + + p->offload_fwd_mark = br_switchdev_mark_get(p->br, p->dev); + + return 0; +} + +void nbp_switchdev_frame_mark(const struct net_bridge_port *p, + struct sk_buff *skb) +{ + if (skb->offload_fwd_mark && !WARN_ON_ONCE(!p->offload_fwd_mark)) + BR_INPUT_SKB_CB(skb)->offload_fwd_mark = p->offload_fwd_mark; +} + +bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p, + const struct sk_buff *skb) +{ + return !skb->offload_fwd_mark || + BR_INPUT_SKB_CB(skb)->offload_fwd_mark != p->offload_fwd_mark; +} diff --git a/net/core/dev.c b/net/core/dev.c index 7feae74ca928..1d5c6dda1988 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3355,16 +3355,6 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) else skb_dst_force(skb); -#ifdef CONFIG_NET_SWITCHDEV - /* Don't forward if offload device already forwarded */ - if (skb->offload_fwd_mark && - skb->offload_fwd_mark == dev->offload_fwd_mark) { - consume_skb(skb); - rc = NET_XMIT_SUCCESS; - goto out; - } -#endif - txq = netdev_pick_tx(dev, skb, accel_priv); q = rcu_dereference_bh(txq->qdisc); diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 2c683f24d557..1031a0327fff 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -1305,88 +1305,3 @@ bool switchdev_port_same_parent_id(struct net_device *a, return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid); } EXPORT_SYMBOL_GPL(switchdev_port_same_parent_id); - -static u32 switchdev_port_fwd_mark_get(struct net_device *dev, - struct net_device *group_dev) -{ - struct net_device *lower_dev; - struct list_head *iter; - - netdev_for_each_lower_dev(group_dev, lower_dev, iter) { - if (lower_dev == dev) - continue; - if (switchdev_port_same_parent_id(dev, lower_dev)) - return lower_dev->offload_fwd_mark; - return switchdev_port_fwd_mark_get(dev, lower_dev); - } - - return dev->ifindex; -} - -static void switchdev_port_fwd_mark_reset(struct net_device *group_dev, - u32 old_mark, u32 *reset_mark) -{ - struct net_device *lower_dev; - struct list_head *iter; - - netdev_for_each_lower_dev(group_dev, lower_dev, iter) { - if (lower_dev->offload_fwd_mark == old_mark) { - if (!*reset_mark) - *reset_mark = lower_dev->ifindex; - lower_dev->offload_fwd_mark = *reset_mark; - } - switchdev_port_fwd_mark_reset(lower_dev, old_mark, reset_mark); - } -} - -/** - * switchdev_port_fwd_mark_set - Set port offload forwarding mark - * - * @dev: port device - * @group_dev: containing device - * @joining: true if dev is joining group; false if leaving group - * - * An ungrouped port's offload mark is just its ifindex. A grouped - * port's (member of a bridge, for example) offload mark is the ifindex - * of one of the ports in the group with the same parent (switch) ID. - * Ports on the same device in the same group will have the same mark. - * - * Example: - * - * br0 ifindex=9 - * sw1p1 ifindex=2 mark=2 - * sw1p2 ifindex=3 mark=2 - * sw2p1 ifindex=4 mark=5 - * sw2p2 ifindex=5 mark=5 - * - * If sw2p2 leaves the bridge, we'll have: - * - * br0 ifindex=9 - * sw1p1 ifindex=2 mark=2 - * sw1p2 ifindex=3 mark=2 - * sw2p1 ifindex=4 mark=4 - * sw2p2 ifindex=5 mark=5 - */ -void switchdev_port_fwd_mark_set(struct net_device *dev, - struct net_device *group_dev, - bool joining) -{ - u32 mark = dev->ifindex; - u32 reset_mark = 0; - - if (group_dev) { - ASSERT_RTNL(); - if (joining) - mark = switchdev_port_fwd_mark_get(dev, group_dev); - else if (dev->offload_fwd_mark == mark) - /* Ohoh, this port was the mark reference port, - * but it's leaving the group, so reset the - * mark for the remaining ports in the group. - */ - switchdev_port_fwd_mark_reset(group_dev, mark, - &reset_mark); - } - - dev->offload_fwd_mark = mark; -} -EXPORT_SYMBOL_GPL(switchdev_port_fwd_mark_set); From 93393b339dea263b42eecd6ff680f77a08614cc5 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 25 Aug 2016 18:42:38 +0200 Subject: [PATCH 0494/1715] mlxsw: spectrum: Simplify traps definition Instead of copying & pasting the same struct initialization for every Rx listener, just use a macro. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.c | 146 ++++-------------- 1 file changed, 31 insertions(+), 115 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 2713a64eae75..cc83cd221c56 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -2570,123 +2570,39 @@ static void mlxsw_sp_rx_listener_func(struct sk_buff *skb, u8 local_port, netif_receive_skb(skb); } +#define MLXSW_SP_RXL(_func, _trap_id) \ + { \ + .func = _func, \ + .local_port = MLXSW_PORT_DONT_CARE, \ + .trap_id = MLXSW_TRAP_ID_##_trap_id, \ + } + static const struct mlxsw_rx_listener mlxsw_sp_rx_listener[] = { - { - .func = mlxsw_sp_rx_listener_func, - .local_port = MLXSW_PORT_DONT_CARE, - .trap_id = MLXSW_TRAP_ID_FDB_MC, - }, + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, FDB_MC), /* Traps for specific L2 packet types, not trapped as FDB MC */ - { - .func = mlxsw_sp_rx_listener_func, - .local_port = MLXSW_PORT_DONT_CARE, - .trap_id = MLXSW_TRAP_ID_STP, - }, - { - .func = mlxsw_sp_rx_listener_func, - .local_port = MLXSW_PORT_DONT_CARE, - .trap_id = MLXSW_TRAP_ID_LACP, - }, - { - .func = mlxsw_sp_rx_listener_func, - .local_port = MLXSW_PORT_DONT_CARE, - .trap_id = MLXSW_TRAP_ID_EAPOL, - }, - { - .func = mlxsw_sp_rx_listener_func, - .local_port = MLXSW_PORT_DONT_CARE, - .trap_id = MLXSW_TRAP_ID_LLDP, - }, - { - .func = mlxsw_sp_rx_listener_func, - .local_port = MLXSW_PORT_DONT_CARE, - .trap_id = MLXSW_TRAP_ID_MMRP, - }, - { - .func = mlxsw_sp_rx_listener_func, - .local_port = MLXSW_PORT_DONT_CARE, - .trap_id = MLXSW_TRAP_ID_MVRP, - }, - { - .func = mlxsw_sp_rx_listener_func, - .local_port = MLXSW_PORT_DONT_CARE, - .trap_id = MLXSW_TRAP_ID_RPVST, - }, - { - .func = mlxsw_sp_rx_listener_func, - .local_port = MLXSW_PORT_DONT_CARE, - .trap_id = MLXSW_TRAP_ID_DHCP, - }, - { - .func = mlxsw_sp_rx_listener_func, - .local_port = MLXSW_PORT_DONT_CARE, - .trap_id = MLXSW_TRAP_ID_IGMP_QUERY, - }, - { - .func = mlxsw_sp_rx_listener_func, - .local_port = MLXSW_PORT_DONT_CARE, - .trap_id = MLXSW_TRAP_ID_IGMP_V1_REPORT, - }, - { - .func = mlxsw_sp_rx_listener_func, - .local_port = MLXSW_PORT_DONT_CARE, - .trap_id = MLXSW_TRAP_ID_IGMP_V2_REPORT, - }, - { - .func = mlxsw_sp_rx_listener_func, - .local_port = MLXSW_PORT_DONT_CARE, - .trap_id = MLXSW_TRAP_ID_IGMP_V2_LEAVE, - }, - { - .func = mlxsw_sp_rx_listener_func, - .local_port = MLXSW_PORT_DONT_CARE, - .trap_id = MLXSW_TRAP_ID_IGMP_V3_REPORT, - }, - { - .func = mlxsw_sp_rx_listener_func, - .local_port = MLXSW_PORT_DONT_CARE, - .trap_id = MLXSW_TRAP_ID_ARPBC, - }, - { - .func = mlxsw_sp_rx_listener_func, - .local_port = MLXSW_PORT_DONT_CARE, - .trap_id = MLXSW_TRAP_ID_ARPUC, - }, - { - .func = mlxsw_sp_rx_listener_func, - .local_port = MLXSW_PORT_DONT_CARE, - .trap_id = MLXSW_TRAP_ID_MTUERROR, - }, - { - .func = mlxsw_sp_rx_listener_func, - .local_port = MLXSW_PORT_DONT_CARE, - .trap_id = MLXSW_TRAP_ID_TTLERROR, - }, - { - .func = mlxsw_sp_rx_listener_func, - .local_port = MLXSW_PORT_DONT_CARE, - .trap_id = MLXSW_TRAP_ID_LBERROR, - }, - { - .func = mlxsw_sp_rx_listener_func, - .local_port = MLXSW_PORT_DONT_CARE, - .trap_id = MLXSW_TRAP_ID_OSPF, - }, - { - .func = mlxsw_sp_rx_listener_func, - .local_port = MLXSW_PORT_DONT_CARE, - .trap_id = MLXSW_TRAP_ID_IP2ME, - }, - { - .func = mlxsw_sp_rx_listener_func, - .local_port = MLXSW_PORT_DONT_CARE, - .trap_id = MLXSW_TRAP_ID_RTR_INGRESS0, - }, - { - .func = mlxsw_sp_rx_listener_func, - .local_port = MLXSW_PORT_DONT_CARE, - .trap_id = MLXSW_TRAP_ID_HOST_MISS_IPV4, - }, + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, STP), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, LACP), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, EAPOL), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, LLDP), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, MMRP), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, MVRP), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, RPVST), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, DHCP), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IGMP_QUERY), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IGMP_V1_REPORT), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IGMP_V2_REPORT), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IGMP_V2_LEAVE), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IGMP_V3_REPORT), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, ARPBC), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, ARPUC), + /* L3 traps */ + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, MTUERROR), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, TTLERROR), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, LBERROR), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, OSPF), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IP2ME), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, RTR_INGRESS0), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, HOST_MISS_IPV4), }; static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp) From 63a811417d97015feb049ba411300990d6835fab Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 25 Aug 2016 18:42:39 +0200 Subject: [PATCH 0495/1715] mlxsw: spectrum: Allow different traps to have different actions Up until now we only trapped packets to CPU, but we are going to allow some packets to be mirrored (trap & forward) to CPU. Extend the Rx listener with 'action' member. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core.h | 1 + .../net/ethernet/mellanox/mlxsw/spectrum.c | 59 ++++++++++--------- 2 files changed, 31 insertions(+), 29 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index d3476ead9982..d2e32979319c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -87,6 +87,7 @@ struct mlxsw_rx_listener { void (*func)(struct sk_buff *skb, u8 local_port, void *priv); u8 local_port; u16 trap_id; + enum mlxsw_reg_hpkt_action action; }; struct mlxsw_event_listener { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index cc83cd221c56..ea4086ac3e9a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -2570,39 +2570,40 @@ static void mlxsw_sp_rx_listener_func(struct sk_buff *skb, u8 local_port, netif_receive_skb(skb); } -#define MLXSW_SP_RXL(_func, _trap_id) \ - { \ - .func = _func, \ - .local_port = MLXSW_PORT_DONT_CARE, \ - .trap_id = MLXSW_TRAP_ID_##_trap_id, \ +#define MLXSW_SP_RXL(_func, _trap_id, _action) \ + { \ + .func = _func, \ + .local_port = MLXSW_PORT_DONT_CARE, \ + .trap_id = MLXSW_TRAP_ID_##_trap_id, \ + .action = MLXSW_REG_HPKT_ACTION_##_action, \ } static const struct mlxsw_rx_listener mlxsw_sp_rx_listener[] = { - MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, FDB_MC), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, FDB_MC, TRAP_TO_CPU), /* Traps for specific L2 packet types, not trapped as FDB MC */ - MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, STP), - MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, LACP), - MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, EAPOL), - MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, LLDP), - MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, MMRP), - MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, MVRP), - MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, RPVST), - MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, DHCP), - MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IGMP_QUERY), - MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IGMP_V1_REPORT), - MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IGMP_V2_REPORT), - MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IGMP_V2_LEAVE), - MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IGMP_V3_REPORT), - MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, ARPBC), - MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, ARPUC), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, STP, TRAP_TO_CPU), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, LACP, TRAP_TO_CPU), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, EAPOL, TRAP_TO_CPU), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, LLDP, TRAP_TO_CPU), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, MMRP, TRAP_TO_CPU), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, MVRP, TRAP_TO_CPU), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, RPVST, TRAP_TO_CPU), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, DHCP, TRAP_TO_CPU), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IGMP_QUERY, TRAP_TO_CPU), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IGMP_V1_REPORT, TRAP_TO_CPU), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IGMP_V2_REPORT, TRAP_TO_CPU), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IGMP_V2_LEAVE, TRAP_TO_CPU), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IGMP_V3_REPORT, TRAP_TO_CPU), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, ARPBC, TRAP_TO_CPU), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, ARPUC, TRAP_TO_CPU), /* L3 traps */ - MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, MTUERROR), - MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, TTLERROR), - MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, LBERROR), - MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, OSPF), - MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IP2ME), - MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, RTR_INGRESS0), - MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, HOST_MISS_IPV4), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, MTUERROR, TRAP_TO_CPU), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, TTLERROR, TRAP_TO_CPU), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, LBERROR, TRAP_TO_CPU), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, OSPF, TRAP_TO_CPU), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IP2ME, TRAP_TO_CPU), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, RTR_INGRESS0, TRAP_TO_CPU), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, HOST_MISS_IPV4, TRAP_TO_CPU), }; static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp) @@ -2629,7 +2630,7 @@ static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp) if (err) goto err_rx_listener_register; - mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_TRAP_TO_CPU, + mlxsw_reg_hpkt_pack(hpkt_pl, mlxsw_sp_rx_listener[i].action, mlxsw_sp_rx_listener[i].trap_id); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(hpkt), hpkt_pl); if (err) From 1c6c6d221e2b2efb7084b215ef7f59f2bf211b14 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 25 Aug 2016 18:42:40 +0200 Subject: [PATCH 0496/1715] mlxsw: spectrum: Mirror certain packets to CPU Instead of trapping certain packets to the CPU and then relying on it to flood them we can instead make the device mirror them. The following packet types are mirrored: * DHCP: Broadcast packets that should be flooded by the device, but also trapped in case CPU is running the DHCP server. * IGMP query: Multicast packets that need to be forwarded to other bridge ports, but also trapped so that receiving netdev will be marked as a router port by the bridge driver. * ARP request: Broadcast packets that should be forwarded to other bridge ports, but also trapped in case requested IP is of the local machine. * ARP response: Unicast packets that should be forwarded by the bridge but also trapped in case response is directed at us. Set the trap action of such packets to mirror and mark them using 'offload_fwd_mark' to prevent the bridge driver from forwarding them itself. Note that OSPF packets are also marked despite their action being trap. The reason for this is that the device traps such packets in the pipeline after they were already flooded. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index ea4086ac3e9a..8c9c124724a6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -2570,6 +2570,13 @@ static void mlxsw_sp_rx_listener_func(struct sk_buff *skb, u8 local_port, netif_receive_skb(skb); } +static void mlxsw_sp_rx_listener_mark_func(struct sk_buff *skb, u8 local_port, + void *priv) +{ + skb->offload_fwd_mark = 1; + return mlxsw_sp_rx_listener_func(skb, local_port, priv); +} + #define MLXSW_SP_RXL(_func, _trap_id, _action) \ { \ .func = _func, \ @@ -2588,19 +2595,19 @@ static const struct mlxsw_rx_listener mlxsw_sp_rx_listener[] = { MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, MMRP, TRAP_TO_CPU), MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, MVRP, TRAP_TO_CPU), MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, RPVST, TRAP_TO_CPU), - MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, DHCP, TRAP_TO_CPU), - MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IGMP_QUERY, TRAP_TO_CPU), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_mark_func, DHCP, MIRROR_TO_CPU), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_mark_func, IGMP_QUERY, MIRROR_TO_CPU), MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IGMP_V1_REPORT, TRAP_TO_CPU), MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IGMP_V2_REPORT, TRAP_TO_CPU), MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IGMP_V2_LEAVE, TRAP_TO_CPU), MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IGMP_V3_REPORT, TRAP_TO_CPU), - MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, ARPBC, TRAP_TO_CPU), - MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, ARPUC, TRAP_TO_CPU), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_mark_func, ARPBC, MIRROR_TO_CPU), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_mark_func, ARPUC, MIRROR_TO_CPU), /* L3 traps */ MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, MTUERROR, TRAP_TO_CPU), MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, TTLERROR, TRAP_TO_CPU), MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, LBERROR, TRAP_TO_CPU), - MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, OSPF, TRAP_TO_CPU), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_mark_func, OSPF, TRAP_TO_CPU), MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IP2ME, TRAP_TO_CPU), MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, RTR_INGRESS0, TRAP_TO_CPU), MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, HOST_MISS_IPV4, TRAP_TO_CPU), From 485ebd618e9e34769c4e046404ddf5f26487d807 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 26 Aug 2016 12:18:29 -0700 Subject: [PATCH 0497/1715] net: dsa: b53: Initialize ds->ops in b53_switch_alloc In order to allow drivers to override specific dsa_switch_driver callbacks, initialize ds->ops to b53_switch_ops earlier, which avoids having to expose this structure to glue drivers. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 6fb77cca78bb..9fc726592071 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1602,7 +1602,6 @@ static const struct b53_chip_data b53_switch_chips[] = { static int b53_switch_init(struct b53_device *dev) { - struct dsa_switch *ds = dev->ds; unsigned int i; int ret; @@ -1618,7 +1617,6 @@ static int b53_switch_init(struct b53_device *dev) dev->vta_regs[1] = chip->vta_regs[1]; dev->vta_regs[2] = chip->vta_regs[2]; dev->jumbo_pm_reg = chip->jumbo_pm_reg; - ds->ops = &b53_switch_ops; dev->cpu_port = chip->cpu_port; dev->num_vlans = chip->vlans; dev->num_arl_entries = chip->arl_entries; @@ -1706,6 +1704,7 @@ struct b53_device *b53_switch_alloc(struct device *base, dev->ds = ds; dev->priv = priv; dev->ops = ops; + ds->ops = &b53_switch_ops; mutex_init(&dev->reg_mutex); mutex_init(&dev->stats_mutex); From 130401d998a49da96d5ffc45a4f82a68426e588b Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 26 Aug 2016 12:18:30 -0700 Subject: [PATCH 0498/1715] net: dsa: b53: Prepare to support 7445 switch Allocate a device entry for the Broadcom BCM7445 integrated switch currently backed by bcm_sf2.c. Since this is the latest generation, it has 4 ARL entries, 4K VLANs and uses Port 8 for the CPU/IMP port. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 12 ++++++++++++ drivers/net/dsa/b53/b53_priv.h | 1 + 2 files changed, 13 insertions(+) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 9fc726592071..d014c918ebfd 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1598,6 +1598,18 @@ static const struct b53_chip_data b53_switch_chips[] = { .jumbo_pm_reg = B53_JUMBO_PORT_MASK, .jumbo_size_reg = B53_JUMBO_MAX_SIZE, }, + { + .chip_id = BCM7445_DEVICE_ID, + .dev_name = "BCM7445", + .vlans = 4096, + .enabled_ports = 0x1ff, + .arl_entries = 4, + .cpu_port = B53_CPU_PORT, + .vta_regs = B53_VTA_REGS, + .duplex_reg = B53_DUPLEX_STAT_GE, + .jumbo_pm_reg = B53_JUMBO_PORT_MASK, + .jumbo_size_reg = B53_JUMBO_MAX_SIZE, + }, }; static int b53_switch_init(struct b53_device *dev) diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index d268493a5fec..cf2ff2cbc8ab 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -60,6 +60,7 @@ enum { BCM53018_DEVICE_ID = 0x53018, BCM53019_DEVICE_ID = 0x53019, BCM58XX_DEVICE_ID = 0x5800, + BCM7445_DEVICE_ID = 0x7445, }; #define B53_N_PORTS 9 From bde5d132c3fc50515e59119794945b0bda5a32bd Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 26 Aug 2016 12:18:31 -0700 Subject: [PATCH 0499/1715] net: dsa: b53: Define SF2 MIB layout The 58xx and 7445 chips use the Starfighter2 code, define its MIB layout and introduce a helper function: is58xx() which checks for both of these IDs for now. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 63 ++++++++++++++++++++++++++++++++ drivers/net/dsa/b53/b53_priv.h | 6 +++ 2 files changed, 69 insertions(+) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index d014c918ebfd..ca13ee134562 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -167,6 +167,65 @@ static const struct b53_mib_desc b53_mibs[] = { #define B53_MIBS_SIZE ARRAY_SIZE(b53_mibs) +static const struct b53_mib_desc b53_mibs_58xx[] = { + { 8, 0x00, "TxOctets" }, + { 4, 0x08, "TxDropPkts" }, + { 4, 0x0c, "TxQPKTQ0" }, + { 4, 0x10, "TxBroadcastPkts" }, + { 4, 0x14, "TxMulticastPkts" }, + { 4, 0x18, "TxUnicastPKts" }, + { 4, 0x1c, "TxCollisions" }, + { 4, 0x20, "TxSingleCollision" }, + { 4, 0x24, "TxMultipleCollision" }, + { 4, 0x28, "TxDeferredCollision" }, + { 4, 0x2c, "TxLateCollision" }, + { 4, 0x30, "TxExcessiveCollision" }, + { 4, 0x34, "TxFrameInDisc" }, + { 4, 0x38, "TxPausePkts" }, + { 4, 0x3c, "TxQPKTQ1" }, + { 4, 0x40, "TxQPKTQ2" }, + { 4, 0x44, "TxQPKTQ3" }, + { 4, 0x48, "TxQPKTQ4" }, + { 4, 0x4c, "TxQPKTQ5" }, + { 8, 0x50, "RxOctets" }, + { 4, 0x58, "RxUndersizePkts" }, + { 4, 0x5c, "RxPausePkts" }, + { 4, 0x60, "RxPkts64Octets" }, + { 4, 0x64, "RxPkts65to127Octets" }, + { 4, 0x68, "RxPkts128to255Octets" }, + { 4, 0x6c, "RxPkts256to511Octets" }, + { 4, 0x70, "RxPkts512to1023Octets" }, + { 4, 0x74, "RxPkts1024toMaxPktsOctets" }, + { 4, 0x78, "RxOversizePkts" }, + { 4, 0x7c, "RxJabbers" }, + { 4, 0x80, "RxAlignmentErrors" }, + { 4, 0x84, "RxFCSErrors" }, + { 8, 0x88, "RxGoodOctets" }, + { 4, 0x90, "RxDropPkts" }, + { 4, 0x94, "RxUnicastPkts" }, + { 4, 0x98, "RxMulticastPkts" }, + { 4, 0x9c, "RxBroadcastPkts" }, + { 4, 0xa0, "RxSAChanges" }, + { 4, 0xa4, "RxFragments" }, + { 4, 0xa8, "RxJumboPkt" }, + { 4, 0xac, "RxSymblErr" }, + { 4, 0xb0, "InRangeErrCount" }, + { 4, 0xb4, "OutRangeErrCount" }, + { 4, 0xb8, "EEELpiEvent" }, + { 4, 0xbc, "EEELpiDuration" }, + { 4, 0xc0, "RxDiscard" }, + { 4, 0xc8, "TxQPKTQ6" }, + { 4, 0xcc, "TxQPKTQ7" }, + { 4, 0xd0, "TxPkts64Octets" }, + { 4, 0xd4, "TxPkts65to127Octets" }, + { 4, 0xd8, "TxPkts128to255Octets" }, + { 4, 0xdc, "TxPkts256to511Ocets" }, + { 4, 0xe0, "TxPkts512to1023Ocets" }, + { 4, 0xe4, "TxPkts1024toMaxPktOcets" }, +}; + +#define B53_MIBS_58XX_SIZE ARRAY_SIZE(b53_mibs_58xx) + static int b53_do_vlan_op(struct b53_device *dev, u8 op) { unsigned int i; @@ -635,6 +694,8 @@ static const struct b53_mib_desc *b53_get_mib(struct b53_device *dev) return b53_mibs_65; else if (is63xx(dev)) return b53_mibs_63xx; + else if (is58xx(dev)) + return b53_mibs_58xx; else return b53_mibs; } @@ -645,6 +706,8 @@ static unsigned int b53_get_mib_size(struct b53_device *dev) return B53_MIBS_65_SIZE; else if (is63xx(dev)) return B53_MIBS_63XX_SIZE; + else if (is58xx(dev)) + return B53_MIBS_58XX_SIZE; else return B53_MIBS_SIZE; } diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index cf2ff2cbc8ab..76672dae412d 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -175,6 +175,12 @@ static inline int is5301x(struct b53_device *dev) dev->chip_id == BCM53019_DEVICE_ID; } +static inline int is58xx(struct b53_device *dev) +{ + return dev->chip_id == BCM58XX_DEVICE_ID || + dev->chip_id == BCM7445_DEVICE_ID; +} + #define B53_CPU_PORT_25 5 #define B53_CPU_PORT 8 From 48aea33a77ab8ec76245336ea08eeb3dda34f98a Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 26 Aug 2016 12:18:32 -0700 Subject: [PATCH 0500/1715] net: dsa: b53: Add JOIN_ALL_VLAN support In order to migrate the bcm_sf2 driver over to the b53 driver for most VLAN/FDB/bridge operations, we need to add support for the "join all VLANs" register and behavior which allows us to make a given port join all VLANs and avoid setting specific VLAN entries when it is leaving the bridge. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 30 ++++++++++++++++++++++++++---- drivers/net/dsa/b53/b53_regs.h | 3 +++ 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index ca13ee134562..1299104a87d4 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1315,9 +1315,21 @@ static int b53_br_join(struct dsa_switch *ds, int port, struct net_device *bridge) { struct b53_device *dev = ds_to_priv(ds); + s8 cpu_port = ds->dst->cpu_port; u16 pvlan, reg; unsigned int i; + /* Make this port leave the all VLANs join since we will have proper + * VLAN entries from now on + */ + if (is58xx(dev)) { + b53_read16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, ®); + reg &= ~BIT(port); + if ((reg & BIT(cpu_port)) == BIT(cpu_port)) + reg &= ~BIT(cpu_port); + b53_write16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, reg); + } + dev->ports[port].bridge_dev = bridge; b53_read16(dev, B53_PVLAN_PAGE, B53_PVLAN_PORT_MASK(port), &pvlan); @@ -1350,6 +1362,7 @@ static void b53_br_leave(struct dsa_switch *ds, int port) struct b53_device *dev = ds_to_priv(ds); struct net_device *bridge = dev->ports[port].bridge_dev; struct b53_vlan *vl = &dev->vlans[0]; + s8 cpu_port = ds->dst->cpu_port; unsigned int i; u16 pvlan, reg, pvid; @@ -1379,10 +1392,19 @@ static void b53_br_leave(struct dsa_switch *ds, int port) else pvid = 0; - b53_get_vlan_entry(dev, pvid, vl); - vl->members |= BIT(port) | BIT(dev->cpu_port); - vl->untag |= BIT(port) | BIT(dev->cpu_port); - b53_set_vlan_entry(dev, pvid, vl); + /* Make this port join all VLANs without VLAN entries */ + if (is58xx(dev)) { + b53_read16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, ®); + reg |= BIT(port); + if (!(reg & BIT(cpu_port))) + reg |= BIT(cpu_port); + b53_write16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, reg); + } else { + b53_get_vlan_entry(dev, pvid, vl); + vl->members |= BIT(port) | BIT(dev->cpu_port); + vl->untag |= BIT(port) | BIT(dev->cpu_port); + b53_set_vlan_entry(dev, pvid, vl); + } } static void b53_br_set_stp_state(struct dsa_switch *ds, int port, diff --git a/drivers/net/dsa/b53/b53_regs.h b/drivers/net/dsa/b53/b53_regs.h index a0b453ea34c9..dac0af4e2cd0 100644 --- a/drivers/net/dsa/b53/b53_regs.h +++ b/drivers/net/dsa/b53/b53_regs.h @@ -309,6 +309,9 @@ /* Port VLAN mask (16 bit) IMP port is always 8, also on 5325 & co */ #define B53_PVLAN_PORT_MASK(i) ((i) * 2) +/* Join all VLANs register (16 bit) */ +#define B53_JOIN_ALL_VLAN_EN 0x50 + /************************************************************************* * 802.1Q Page Registers *************************************************************************/ From f458995b9ad831866ab9ffaa76ed0320315a91d9 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 26 Aug 2016 12:18:33 -0700 Subject: [PATCH 0501/1715] net: dsa: bcm_sf2: Utilize core B53 driver when possible The Broadcom Starfighter2 is almost entirely register compatible with B53, yet for historical reasons came up first in the tree and is now being updated to utilize b53_common.c to the fullest extent possible. A few things need to be adjusted to allow that: - the switch "core" registers currently operate on a 32-bit address, whereas b53 passes a page + reg pair to offset from, so we need to convert that, thankfully there is a generic formula to do that - the link managemenent is not self contained with the B53/CORE register set, but instead is in the SWITCH_REG block which is part of the integration glue logic, so we keep that entirely custom here because this really is part of the existing bcm_sf2 implementation - there are additional power management constraints on the port's memories that make us keep the port_enable/disable callbacks custom for now, also, we support tagging whereas b53_common does not support that yet All the VLAN and bridge code is entirely identical though so, avoid duplicating it. Other things will be migrated in the future like EEE and possibly Wake-on-LAN. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/Kconfig | 1 + drivers/net/dsa/bcm_sf2.c | 230 ++++++++++++++++++++++++++++++-------- drivers/net/dsa/bcm_sf2.h | 11 ++ 3 files changed, 195 insertions(+), 47 deletions(-) diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig index 8f4544394f44..de6d04429a70 100644 --- a/drivers/net/dsa/Kconfig +++ b/drivers/net/dsa/Kconfig @@ -16,6 +16,7 @@ config NET_DSA_BCM_SF2 select FIXED_PHY select BCM7XXX_PHY select MDIO_BCM_UNIMAC + select B53 ---help--- This enables support for the Broadcom Starfighter 2 Ethernet switch chips. diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 220e5d1948ca..e99d0a21bd8b 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -29,9 +29,12 @@ #include #include #include +#include #include "bcm_sf2.h" #include "bcm_sf2_regs.h" +#include "b53/b53_priv.h" +#include "b53/b53_regs.h" /* String, offset, and register size in bytes if different from 4 bytes */ static const struct bcm_sf2_hw_stats bcm_sf2_mib[] = { @@ -106,7 +109,7 @@ static void bcm_sf2_sw_get_strings(struct dsa_switch *ds, static void bcm_sf2_sw_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data) { - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); const struct bcm_sf2_hw_stats *s; unsigned int i; u64 val = 0; @@ -143,7 +146,7 @@ static enum dsa_tag_protocol bcm_sf2_sw_get_tag_protocol(struct dsa_switch *ds) static void bcm_sf2_imp_vlan_setup(struct dsa_switch *ds, int cpu_port) { - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); unsigned int i; u32 reg; @@ -163,7 +166,7 @@ static void bcm_sf2_imp_vlan_setup(struct dsa_switch *ds, int cpu_port) static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port) { - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); u32 reg, val; /* Enable the port memories */ @@ -228,7 +231,7 @@ static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port) static void bcm_sf2_eee_enable_set(struct dsa_switch *ds, int port, bool enable) { - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); u32 reg; reg = core_readl(priv, CORE_EEE_EN_CTRL); @@ -241,7 +244,7 @@ static void bcm_sf2_eee_enable_set(struct dsa_switch *ds, int port, bool enable) static void bcm_sf2_gphy_enable_set(struct dsa_switch *ds, bool enable) { - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); u32 reg; reg = reg_readl(priv, REG_SPHY_CNTRL); @@ -315,7 +318,7 @@ static inline void bcm_sf2_port_intr_disable(struct bcm_sf2_priv *priv, static int bcm_sf2_port_setup(struct dsa_switch *ds, int port, struct phy_device *phy) { - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); s8 cpu_port = ds->dst[ds->index].cpu_port; u32 reg; @@ -371,7 +374,7 @@ static int bcm_sf2_port_setup(struct dsa_switch *ds, int port, static void bcm_sf2_port_disable(struct dsa_switch *ds, int port, struct phy_device *phy) { - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); u32 off, reg; if (priv->wol_ports_mask & (1 << port)) @@ -403,7 +406,7 @@ static void bcm_sf2_port_disable(struct dsa_switch *ds, int port, static int bcm_sf2_eee_init(struct dsa_switch *ds, int port, struct phy_device *phy) { - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); struct ethtool_eee *p = &priv->port_sts[port].eee; int ret; @@ -421,7 +424,7 @@ static int bcm_sf2_eee_init(struct dsa_switch *ds, int port, static int bcm_sf2_sw_get_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e) { - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); struct ethtool_eee *p = &priv->port_sts[port].eee; u32 reg; @@ -436,7 +439,7 @@ static int bcm_sf2_sw_set_eee(struct dsa_switch *ds, int port, struct phy_device *phydev, struct ethtool_eee *e) { - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); struct ethtool_eee *p = &priv->port_sts[port].eee; p->eee_enabled = e->eee_enabled; @@ -482,7 +485,7 @@ static int bcm_sf2_fast_age_op(struct bcm_sf2_priv *priv) */ static int bcm_sf2_sw_fast_age_port(struct dsa_switch *ds, int port) { - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); core_writel(priv, port, CORE_FAST_AGE_PORT); @@ -555,7 +558,7 @@ static int bcm_sf2_get_vlan_entry(struct bcm_sf2_priv *priv, u16 vid, static int bcm_sf2_sw_br_join(struct dsa_switch *ds, int port, struct net_device *bridge) { - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); s8 cpu_port = ds->dst->cpu_port; unsigned int i; u32 reg, p_ctl; @@ -598,7 +601,7 @@ static int bcm_sf2_sw_br_join(struct dsa_switch *ds, int port, static void bcm_sf2_sw_br_leave(struct dsa_switch *ds, int port) { - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); struct net_device *bridge = priv->port_sts[port].bridge_dev; s8 cpu_port = ds->dst->cpu_port; unsigned int i; @@ -636,7 +639,7 @@ static void bcm_sf2_sw_br_leave(struct dsa_switch *ds, int port) static void bcm_sf2_sw_br_set_stp_state(struct dsa_switch *ds, int port, u8 state) { - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); u8 hw_state, cur_hw_state; u32 reg; @@ -816,7 +819,7 @@ static void bcm_sf2_sw_fdb_add(struct dsa_switch *ds, int port, const struct switchdev_obj_port_fdb *fdb, struct switchdev_trans *trans) { - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); if (bcm_sf2_arl_op(priv, 0, port, fdb->addr, fdb->vid, true)) pr_err("%s: failed to add MAC address\n", __func__); @@ -825,7 +828,7 @@ static void bcm_sf2_sw_fdb_add(struct dsa_switch *ds, int port, static int bcm_sf2_sw_fdb_del(struct dsa_switch *ds, int port, const struct switchdev_obj_port_fdb *fdb) { - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); return bcm_sf2_arl_op(priv, 0, port, fdb->addr, fdb->vid, false); } @@ -882,7 +885,7 @@ static int bcm_sf2_sw_fdb_dump(struct dsa_switch *ds, int port, struct switchdev_obj_port_fdb *fdb, int (*cb)(struct switchdev_obj *obj)) { - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); struct net_device *dev = ds->ports[port].netdev; struct bcm_sf2_arl_entry results[2]; unsigned int count = 0; @@ -1073,7 +1076,7 @@ static void bcm_sf2_identify_ports(struct bcm_sf2_priv *priv, static int bcm_sf2_mdio_register(struct dsa_switch *ds) { - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); struct device_node *dn; static int index; int err; @@ -1144,7 +1147,7 @@ static int bcm_sf2_sw_set_addr(struct dsa_switch *ds, u8 *addr) static u32 bcm_sf2_sw_get_phy_flags(struct dsa_switch *ds, int port) { - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); /* The BCM7xxx PHY driver expects to find the integrated PHY revision * in bits 15:8 and the patch level in bits 7:0 which is exactly what @@ -1157,7 +1160,7 @@ static u32 bcm_sf2_sw_get_phy_flags(struct dsa_switch *ds, int port) static void bcm_sf2_sw_adjust_link(struct dsa_switch *ds, int port, struct phy_device *phydev) { - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); u32 id_mode_dis = 0, port_mode; const char *str = NULL; u32 reg; @@ -1237,7 +1240,7 @@ force_link: static void bcm_sf2_sw_fixed_link_update(struct dsa_switch *ds, int port, struct fixed_phy_status *status) { - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); u32 duplex, pause; u32 reg; @@ -1289,7 +1292,7 @@ static void bcm_sf2_sw_fixed_link_update(struct dsa_switch *ds, int port, static int bcm_sf2_sw_suspend(struct dsa_switch *ds) { - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); unsigned int port; bcm_sf2_intr_disable(priv); @@ -1309,7 +1312,7 @@ static int bcm_sf2_sw_suspend(struct dsa_switch *ds) static int bcm_sf2_sw_resume(struct dsa_switch *ds) { - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); unsigned int port; int ret; @@ -1336,7 +1339,7 @@ static void bcm_sf2_sw_get_wol(struct dsa_switch *ds, int port, struct ethtool_wolinfo *wol) { struct net_device *p = ds->dst[ds->index].master_netdev; - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); struct ethtool_wolinfo pwol; /* Get the parent device WoL settings */ @@ -1359,7 +1362,7 @@ static int bcm_sf2_sw_set_wol(struct dsa_switch *ds, int port, struct ethtool_wolinfo *wol) { struct net_device *p = ds->dst[ds->index].master_netdev; - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); s8 cpu_port = ds->dst[ds->index].cpu_port; struct ethtool_wolinfo pwol; @@ -1420,7 +1423,7 @@ static void bcm_sf2_enable_vlan(struct bcm_sf2_priv *priv, bool enable) static void bcm_sf2_sw_configure_vlan(struct dsa_switch *ds) { - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); unsigned int port; /* Clear all VLANs */ @@ -1444,7 +1447,7 @@ static int bcm_sf2_sw_vlan_prepare(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan, struct switchdev_trans *trans) { - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); bcm_sf2_enable_vlan(priv, true); @@ -1455,7 +1458,7 @@ static void bcm_sf2_sw_vlan_add(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan, struct switchdev_trans *trans) { - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; s8 cpu_port = ds->dst->cpu_port; @@ -1488,7 +1491,7 @@ static void bcm_sf2_sw_vlan_add(struct dsa_switch *ds, int port, static int bcm_sf2_sw_vlan_del(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan) { - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; s8 cpu_port = ds->dst->cpu_port; struct bcm_sf2_vlan *vl; @@ -1530,7 +1533,7 @@ static int bcm_sf2_sw_vlan_dump(struct dsa_switch *ds, int port, struct switchdev_obj_port_vlan *vlan, int (*cb)(struct switchdev_obj *obj)) { - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); struct bcm_sf2_port_status *p = &priv->port_sts[port]; struct bcm_sf2_vlan *vl; u16 vid, pvid; @@ -1562,7 +1565,7 @@ static int bcm_sf2_sw_vlan_dump(struct dsa_switch *ds, int port, static int bcm_sf2_sw_setup(struct dsa_switch *ds) { - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); unsigned int port; /* Enable all valid ports and disable those unused */ @@ -1613,11 +1616,112 @@ static struct dsa_switch_ops bcm_sf2_switch_ops = { .port_vlan_dump = bcm_sf2_sw_vlan_dump, }; +/* The SWITCH_CORE register space is managed by b53 but operates on a page + + * register basis so we need to translate that into an address that the + * bus-glue understands. + */ +#define SF2_PAGE_REG_MKADDR(page, reg) ((page) << 10 | (reg) << 2) + +static int bcm_sf2_core_read8(struct b53_device *dev, u8 page, u8 reg, + u8 *val) +{ + struct bcm_sf2_priv *priv = dev->priv; + + *val = core_readl(priv, SF2_PAGE_REG_MKADDR(page, reg)); + + return 0; +} + +static int bcm_sf2_core_read16(struct b53_device *dev, u8 page, u8 reg, + u16 *val) +{ + struct bcm_sf2_priv *priv = dev->priv; + + *val = core_readl(priv, SF2_PAGE_REG_MKADDR(page, reg)); + + return 0; +} + +static int bcm_sf2_core_read32(struct b53_device *dev, u8 page, u8 reg, + u32 *val) +{ + struct bcm_sf2_priv *priv = dev->priv; + + *val = core_readl(priv, SF2_PAGE_REG_MKADDR(page, reg)); + + return 0; +} + +static int bcm_sf2_core_read64(struct b53_device *dev, u8 page, u8 reg, + u64 *val) +{ + struct bcm_sf2_priv *priv = dev->priv; + + *val = core_readq(priv, SF2_PAGE_REG_MKADDR(page, reg)); + + return 0; +} + +static int bcm_sf2_core_write8(struct b53_device *dev, u8 page, u8 reg, + u8 value) +{ + struct bcm_sf2_priv *priv = dev->priv; + + core_writel(priv, value, SF2_PAGE_REG_MKADDR(page, reg)); + + return 0; +} + +static int bcm_sf2_core_write16(struct b53_device *dev, u8 page, u8 reg, + u16 value) +{ + struct bcm_sf2_priv *priv = dev->priv; + + core_writel(priv, value, SF2_PAGE_REG_MKADDR(page, reg)); + + return 0; +} + +static int bcm_sf2_core_write32(struct b53_device *dev, u8 page, u8 reg, + u32 value) +{ + struct bcm_sf2_priv *priv = dev->priv; + + core_writel(priv, value, SF2_PAGE_REG_MKADDR(page, reg)); + + return 0; +} + +static int bcm_sf2_core_write64(struct b53_device *dev, u8 page, u8 reg, + u64 value) +{ + struct bcm_sf2_priv *priv = dev->priv; + + core_writeq(priv, value, SF2_PAGE_REG_MKADDR(page, reg)); + + return 0; +} + +struct b53_io_ops bcm_sf2_io_ops = { + .read8 = bcm_sf2_core_read8, + .read16 = bcm_sf2_core_read16, + .read32 = bcm_sf2_core_read32, + .read48 = bcm_sf2_core_read64, + .read64 = bcm_sf2_core_read64, + .write8 = bcm_sf2_core_write8, + .write16 = bcm_sf2_core_write16, + .write32 = bcm_sf2_core_write32, + .write48 = bcm_sf2_core_write64, + .write64 = bcm_sf2_core_write64, +}; + static int bcm_sf2_sw_probe(struct platform_device *pdev) { const char *reg_names[BCM_SF2_REGS_NUM] = BCM_SF2_REGS_NAME; struct device_node *dn = pdev->dev.of_node; + struct b53_platform_data *pdata; struct bcm_sf2_priv *priv; + struct b53_device *dev; struct dsa_switch *ds; void __iomem **base; struct resource *r; @@ -1625,16 +1729,49 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev) u32 reg, rev; int ret; - ds = devm_kzalloc(&pdev->dev, sizeof(*ds) + sizeof(*priv), GFP_KERNEL); - if (!ds) + priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); + if (!priv) return -ENOMEM; - priv = (struct bcm_sf2_priv *)(ds + 1); - ds->priv = priv; - ds->dev = &pdev->dev; - ds->ops = &bcm_sf2_switch_ops; + dev = b53_switch_alloc(&pdev->dev, &bcm_sf2_io_ops, priv); + if (!dev) + return -ENOMEM; - dev_set_drvdata(&pdev->dev, ds); + pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL); + if (!pdata) + return -ENOMEM; + + /* Auto-detection using standard registers will not work, so + * provide an indication of what kind of device we are for + * b53_common to work with + */ + pdata->chip_id = BCM7445_DEVICE_ID; + dev->pdata = pdata; + + priv->dev = dev; + ds = dev->ds; + + /* Override the parts that are non-standard wrt. normal b53 devices */ + ds->ops->get_tag_protocol = bcm_sf2_sw_get_tag_protocol; + ds->ops->setup = bcm_sf2_sw_setup; + ds->ops->get_phy_flags = bcm_sf2_sw_get_phy_flags; + ds->ops->adjust_link = bcm_sf2_sw_adjust_link; + ds->ops->fixed_link_update = bcm_sf2_sw_fixed_link_update; + ds->ops->suspend = bcm_sf2_sw_suspend; + ds->ops->resume = bcm_sf2_sw_resume; + ds->ops->get_wol = bcm_sf2_sw_get_wol; + ds->ops->set_wol = bcm_sf2_sw_set_wol; + ds->ops->port_enable = bcm_sf2_port_setup; + ds->ops->port_disable = bcm_sf2_port_disable; + ds->ops->get_eee = bcm_sf2_sw_get_eee; + ds->ops->set_eee = bcm_sf2_sw_set_eee; + + /* Avoid having DSA free our slave MDIO bus (checking for + * ds->slave_mii_bus and ds->ops->phy_read being non-NULL) + */ + ds->ops->phy_read = NULL; + + dev_set_drvdata(&pdev->dev, priv); spin_lock_init(&priv->indir_lock); mutex_init(&priv->stats_mutex); @@ -1709,7 +1846,7 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev) rev = reg_readl(priv, REG_PHY_REVISION); priv->hw_params.gphy_rev = rev & PHY_REVISION_MASK; - ret = dsa_register_switch(ds, dn); + ret = b53_switch_register(dev); if (ret) goto out_mdio; @@ -1727,13 +1864,12 @@ out_mdio: static int bcm_sf2_sw_remove(struct platform_device *pdev) { - struct dsa_switch *ds = platform_get_drvdata(pdev); - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = platform_get_drvdata(pdev); /* Disable all ports and interrupts */ priv->wol_ports_mask = 0; - bcm_sf2_sw_suspend(ds); - dsa_unregister_switch(ds); + bcm_sf2_sw_suspend(priv->dev->ds); + dsa_unregister_switch(priv->dev->ds); bcm_sf2_mdio_unregister(priv); return 0; @@ -1743,17 +1879,17 @@ static int bcm_sf2_sw_remove(struct platform_device *pdev) static int bcm_sf2_suspend(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); - struct dsa_switch *ds = platform_get_drvdata(pdev); + struct bcm_sf2_priv *priv = platform_get_drvdata(pdev); - return dsa_switch_suspend(ds); + return dsa_switch_suspend(priv->dev->ds); } static int bcm_sf2_resume(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); - struct dsa_switch *ds = platform_get_drvdata(pdev); + struct bcm_sf2_priv *priv = platform_get_drvdata(pdev); - return dsa_switch_resume(ds); + return dsa_switch_resume(priv->dev->ds); } #endif /* CONFIG_PM_SLEEP */ diff --git a/drivers/net/dsa/bcm_sf2.h b/drivers/net/dsa/bcm_sf2.h index 463bed8cbe4c..7e95fb7d5d2e 100644 --- a/drivers/net/dsa/bcm_sf2.h +++ b/drivers/net/dsa/bcm_sf2.h @@ -26,6 +26,7 @@ #include #include "bcm_sf2_regs.h" +#include "b53/b53_priv.h" struct bcm_sf2_hw_params { u16 top_rev; @@ -134,6 +135,9 @@ struct bcm_sf2_priv { u32 irq1_stat; u32 irq1_mask; + /* Backing b53_device */ + struct b53_device *dev; + /* Mutex protecting access to the MIB counters */ struct mutex stats_mutex; @@ -160,6 +164,13 @@ struct bcm_sf2_priv { struct bcm_sf2_vlan vlans[VLAN_N_VID]; }; +static inline struct bcm_sf2_priv *bcm_sf2_to_priv(struct dsa_switch *ds) +{ + struct b53_device *dev = ds_to_priv(ds); + + return dev->priv; +} + struct bcm_sf2_hw_stats { const char *string; u16 reg; From de0b9d3bad0d410c4d8a58431ba3405aaf8f10b2 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 26 Aug 2016 12:18:34 -0700 Subject: [PATCH 0502/1715] net: dsa: bcm_sf2: Remove duplicate code Now that we are using b53_common for most VLAN, FDB and bridge operations, delete all the redundant code that we had in bcm_sf2.c to keep only the integration specific logic that we have to deal with: power management, link management and the external interfaces (RGMII, MDIO). Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2.c | 772 +-------------------------------- drivers/net/dsa/bcm_sf2.h | 73 +--- drivers/net/dsa/bcm_sf2_regs.h | 122 ------ 3 files changed, 17 insertions(+), 950 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index e99d0a21bd8b..ddd477354e00 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -36,109 +36,6 @@ #include "b53/b53_priv.h" #include "b53/b53_regs.h" -/* String, offset, and register size in bytes if different from 4 bytes */ -static const struct bcm_sf2_hw_stats bcm_sf2_mib[] = { - { "TxOctets", 0x000, 8 }, - { "TxDropPkts", 0x020 }, - { "TxQPKTQ0", 0x030 }, - { "TxBroadcastPkts", 0x040 }, - { "TxMulticastPkts", 0x050 }, - { "TxUnicastPKts", 0x060 }, - { "TxCollisions", 0x070 }, - { "TxSingleCollision", 0x080 }, - { "TxMultipleCollision", 0x090 }, - { "TxDeferredCollision", 0x0a0 }, - { "TxLateCollision", 0x0b0 }, - { "TxExcessiveCollision", 0x0c0 }, - { "TxFrameInDisc", 0x0d0 }, - { "TxPausePkts", 0x0e0 }, - { "TxQPKTQ1", 0x0f0 }, - { "TxQPKTQ2", 0x100 }, - { "TxQPKTQ3", 0x110 }, - { "TxQPKTQ4", 0x120 }, - { "TxQPKTQ5", 0x130 }, - { "RxOctets", 0x140, 8 }, - { "RxUndersizePkts", 0x160 }, - { "RxPausePkts", 0x170 }, - { "RxPkts64Octets", 0x180 }, - { "RxPkts65to127Octets", 0x190 }, - { "RxPkts128to255Octets", 0x1a0 }, - { "RxPkts256to511Octets", 0x1b0 }, - { "RxPkts512to1023Octets", 0x1c0 }, - { "RxPkts1024toMaxPktsOctets", 0x1d0 }, - { "RxOversizePkts", 0x1e0 }, - { "RxJabbers", 0x1f0 }, - { "RxAlignmentErrors", 0x200 }, - { "RxFCSErrors", 0x210 }, - { "RxGoodOctets", 0x220, 8 }, - { "RxDropPkts", 0x240 }, - { "RxUnicastPkts", 0x250 }, - { "RxMulticastPkts", 0x260 }, - { "RxBroadcastPkts", 0x270 }, - { "RxSAChanges", 0x280 }, - { "RxFragments", 0x290 }, - { "RxJumboPkt", 0x2a0 }, - { "RxSymblErr", 0x2b0 }, - { "InRangeErrCount", 0x2c0 }, - { "OutRangeErrCount", 0x2d0 }, - { "EEELpiEvent", 0x2e0 }, - { "EEELpiDuration", 0x2f0 }, - { "RxDiscard", 0x300, 8 }, - { "TxQPKTQ6", 0x320 }, - { "TxQPKTQ7", 0x330 }, - { "TxPkts64Octets", 0x340 }, - { "TxPkts65to127Octets", 0x350 }, - { "TxPkts128to255Octets", 0x360 }, - { "TxPkts256to511Ocets", 0x370 }, - { "TxPkts512to1023Ocets", 0x380 }, - { "TxPkts1024toMaxPktOcets", 0x390 }, -}; - -#define BCM_SF2_STATS_SIZE ARRAY_SIZE(bcm_sf2_mib) - -static void bcm_sf2_sw_get_strings(struct dsa_switch *ds, - int port, uint8_t *data) -{ - unsigned int i; - - for (i = 0; i < BCM_SF2_STATS_SIZE; i++) - memcpy(data + i * ETH_GSTRING_LEN, - bcm_sf2_mib[i].string, ETH_GSTRING_LEN); -} - -static void bcm_sf2_sw_get_ethtool_stats(struct dsa_switch *ds, - int port, uint64_t *data) -{ - struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); - const struct bcm_sf2_hw_stats *s; - unsigned int i; - u64 val = 0; - u32 offset; - - mutex_lock(&priv->stats_mutex); - - /* Now fetch the per-port counters */ - for (i = 0; i < BCM_SF2_STATS_SIZE; i++) { - s = &bcm_sf2_mib[i]; - - /* Do a latched 64-bit read if needed */ - offset = s->reg + CORE_P_MIB_OFFSET(port); - if (s->sizeof_stat == 8) - val = core_readq(priv, offset); - else - val = core_readl(priv, offset); - - data[i] = (u64)val; - } - - mutex_unlock(&priv->stats_mutex); -} - -static int bcm_sf2_sw_get_sset_count(struct dsa_switch *ds) -{ - return BCM_SF2_STATS_SIZE; -} - static enum dsa_tag_protocol bcm_sf2_sw_get_tag_protocol(struct dsa_switch *ds) { return DSA_TAG_PROTO_BRCM; @@ -455,469 +352,6 @@ static int bcm_sf2_sw_set_eee(struct dsa_switch *ds, int port, return 0; } -static int bcm_sf2_fast_age_op(struct bcm_sf2_priv *priv) -{ - unsigned int timeout = 1000; - u32 reg; - - reg = core_readl(priv, CORE_FAST_AGE_CTRL); - reg |= EN_AGE_PORT | EN_AGE_VLAN | EN_AGE_DYNAMIC | FAST_AGE_STR_DONE; - core_writel(priv, reg, CORE_FAST_AGE_CTRL); - - do { - reg = core_readl(priv, CORE_FAST_AGE_CTRL); - if (!(reg & FAST_AGE_STR_DONE)) - break; - - cpu_relax(); - } while (timeout--); - - if (!timeout) - return -ETIMEDOUT; - - core_writel(priv, 0, CORE_FAST_AGE_CTRL); - - return 0; -} - -/* Fast-ageing of ARL entries for a given port, equivalent to an ARL - * flush for that port. - */ -static int bcm_sf2_sw_fast_age_port(struct dsa_switch *ds, int port) -{ - struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); - - core_writel(priv, port, CORE_FAST_AGE_PORT); - - return bcm_sf2_fast_age_op(priv); -} - -static int bcm_sf2_sw_fast_age_vlan(struct bcm_sf2_priv *priv, u16 vid) -{ - core_writel(priv, vid, CORE_FAST_AGE_VID); - - return bcm_sf2_fast_age_op(priv); -} - -static int bcm_sf2_vlan_op_wait(struct bcm_sf2_priv *priv) -{ - unsigned int timeout = 10; - u32 reg; - - do { - reg = core_readl(priv, CORE_ARLA_VTBL_RWCTRL); - if (!(reg & ARLA_VTBL_STDN)) - return 0; - - usleep_range(1000, 2000); - } while (timeout--); - - return -ETIMEDOUT; -} - -static int bcm_sf2_vlan_op(struct bcm_sf2_priv *priv, u8 op) -{ - core_writel(priv, ARLA_VTBL_STDN | op, CORE_ARLA_VTBL_RWCTRL); - - return bcm_sf2_vlan_op_wait(priv); -} - -static void bcm_sf2_set_vlan_entry(struct bcm_sf2_priv *priv, u16 vid, - struct bcm_sf2_vlan *vlan) -{ - int ret; - - core_writel(priv, vid & VTBL_ADDR_INDEX_MASK, CORE_ARLA_VTBL_ADDR); - core_writel(priv, vlan->untag << UNTAG_MAP_SHIFT | vlan->members, - CORE_ARLA_VTBL_ENTRY); - - ret = bcm_sf2_vlan_op(priv, ARLA_VTBL_CMD_WRITE); - if (ret) - pr_err("failed to write VLAN entry\n"); -} - -static int bcm_sf2_get_vlan_entry(struct bcm_sf2_priv *priv, u16 vid, - struct bcm_sf2_vlan *vlan) -{ - u32 entry; - int ret; - - core_writel(priv, vid & VTBL_ADDR_INDEX_MASK, CORE_ARLA_VTBL_ADDR); - - ret = bcm_sf2_vlan_op(priv, ARLA_VTBL_CMD_READ); - if (ret) - return ret; - - entry = core_readl(priv, CORE_ARLA_VTBL_ENTRY); - vlan->members = entry & FWD_MAP_MASK; - vlan->untag = (entry >> UNTAG_MAP_SHIFT) & UNTAG_MAP_MASK; - - return 0; -} - -static int bcm_sf2_sw_br_join(struct dsa_switch *ds, int port, - struct net_device *bridge) -{ - struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); - s8 cpu_port = ds->dst->cpu_port; - unsigned int i; - u32 reg, p_ctl; - - /* Make this port leave the all VLANs join since we will have proper - * VLAN entries from now on - */ - reg = core_readl(priv, CORE_JOIN_ALL_VLAN_EN); - reg &= ~BIT(port); - if ((reg & BIT(cpu_port)) == BIT(cpu_port)) - reg &= ~BIT(cpu_port); - core_writel(priv, reg, CORE_JOIN_ALL_VLAN_EN); - - priv->port_sts[port].bridge_dev = bridge; - p_ctl = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(port)); - - for (i = 0; i < priv->hw_params.num_ports; i++) { - if (priv->port_sts[i].bridge_dev != bridge) - continue; - - /* Add this local port to the remote port VLAN control - * membership and update the remote port bitmask - */ - reg = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(i)); - reg |= 1 << port; - core_writel(priv, reg, CORE_PORT_VLAN_CTL_PORT(i)); - priv->port_sts[i].vlan_ctl_mask = reg; - - p_ctl |= 1 << i; - } - - /* Configure the local port VLAN control membership to include - * remote ports and update the local port bitmask - */ - core_writel(priv, p_ctl, CORE_PORT_VLAN_CTL_PORT(port)); - priv->port_sts[port].vlan_ctl_mask = p_ctl; - - return 0; -} - -static void bcm_sf2_sw_br_leave(struct dsa_switch *ds, int port) -{ - struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); - struct net_device *bridge = priv->port_sts[port].bridge_dev; - s8 cpu_port = ds->dst->cpu_port; - unsigned int i; - u32 reg, p_ctl; - - p_ctl = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(port)); - - for (i = 0; i < priv->hw_params.num_ports; i++) { - /* Don't touch the remaining ports */ - if (priv->port_sts[i].bridge_dev != bridge) - continue; - - reg = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(i)); - reg &= ~(1 << port); - core_writel(priv, reg, CORE_PORT_VLAN_CTL_PORT(i)); - priv->port_sts[port].vlan_ctl_mask = reg; - - /* Prevent self removal to preserve isolation */ - if (port != i) - p_ctl &= ~(1 << i); - } - - core_writel(priv, p_ctl, CORE_PORT_VLAN_CTL_PORT(port)); - priv->port_sts[port].vlan_ctl_mask = p_ctl; - priv->port_sts[port].bridge_dev = NULL; - - /* Make this port join all VLANs without VLAN entries */ - reg = core_readl(priv, CORE_JOIN_ALL_VLAN_EN); - reg |= BIT(port); - if (!(reg & BIT(cpu_port))) - reg |= BIT(cpu_port); - core_writel(priv, reg, CORE_JOIN_ALL_VLAN_EN); -} - -static void bcm_sf2_sw_br_set_stp_state(struct dsa_switch *ds, int port, - u8 state) -{ - struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); - u8 hw_state, cur_hw_state; - u32 reg; - - reg = core_readl(priv, CORE_G_PCTL_PORT(port)); - cur_hw_state = reg & (G_MISTP_STATE_MASK << G_MISTP_STATE_SHIFT); - - switch (state) { - case BR_STATE_DISABLED: - hw_state = G_MISTP_DIS_STATE; - break; - case BR_STATE_LISTENING: - hw_state = G_MISTP_LISTEN_STATE; - break; - case BR_STATE_LEARNING: - hw_state = G_MISTP_LEARN_STATE; - break; - case BR_STATE_FORWARDING: - hw_state = G_MISTP_FWD_STATE; - break; - case BR_STATE_BLOCKING: - hw_state = G_MISTP_BLOCK_STATE; - break; - default: - pr_err("%s: invalid STP state: %d\n", __func__, state); - return; - } - - /* Fast-age ARL entries if we are moving a port from Learning or - * Forwarding (cur_hw_state) state to Disabled, Blocking or Listening - * state (hw_state) - */ - if (cur_hw_state != hw_state) { - if (cur_hw_state >= G_MISTP_LEARN_STATE && - hw_state <= G_MISTP_LISTEN_STATE) { - if (bcm_sf2_sw_fast_age_port(ds, port)) { - pr_err("%s: fast-ageing failed\n", __func__); - return; - } - } - } - - reg = core_readl(priv, CORE_G_PCTL_PORT(port)); - reg &= ~(G_MISTP_STATE_MASK << G_MISTP_STATE_SHIFT); - reg |= hw_state; - core_writel(priv, reg, CORE_G_PCTL_PORT(port)); -} - -/* Address Resolution Logic routines */ -static int bcm_sf2_arl_op_wait(struct bcm_sf2_priv *priv) -{ - unsigned int timeout = 10; - u32 reg; - - do { - reg = core_readl(priv, CORE_ARLA_RWCTL); - if (!(reg & ARL_STRTDN)) - return 0; - - usleep_range(1000, 2000); - } while (timeout--); - - return -ETIMEDOUT; -} - -static int bcm_sf2_arl_rw_op(struct bcm_sf2_priv *priv, unsigned int op) -{ - u32 cmd; - - if (op > ARL_RW) - return -EINVAL; - - cmd = core_readl(priv, CORE_ARLA_RWCTL); - cmd &= ~IVL_SVL_SELECT; - cmd |= ARL_STRTDN; - if (op) - cmd |= ARL_RW; - else - cmd &= ~ARL_RW; - core_writel(priv, cmd, CORE_ARLA_RWCTL); - - return bcm_sf2_arl_op_wait(priv); -} - -static int bcm_sf2_arl_read(struct bcm_sf2_priv *priv, u64 mac, - u16 vid, struct bcm_sf2_arl_entry *ent, u8 *idx, - bool is_valid) -{ - unsigned int i; - int ret; - - ret = bcm_sf2_arl_op_wait(priv); - if (ret) - return ret; - - /* Read the 4 bins */ - for (i = 0; i < 4; i++) { - u64 mac_vid; - u32 fwd_entry; - - mac_vid = core_readq(priv, CORE_ARLA_MACVID_ENTRY(i)); - fwd_entry = core_readl(priv, CORE_ARLA_FWD_ENTRY(i)); - bcm_sf2_arl_to_entry(ent, mac_vid, fwd_entry); - - if (ent->is_valid && is_valid) { - *idx = i; - return 0; - } - - /* This is the MAC we just deleted */ - if (!is_valid && (mac_vid & mac)) - return 0; - } - - return -ENOENT; -} - -static int bcm_sf2_arl_op(struct bcm_sf2_priv *priv, int op, int port, - const unsigned char *addr, u16 vid, bool is_valid) -{ - struct bcm_sf2_arl_entry ent; - u32 fwd_entry; - u64 mac, mac_vid = 0; - u8 idx = 0; - int ret; - - /* Convert the array into a 64-bit MAC */ - mac = bcm_sf2_mac_to_u64(addr); - - /* Perform a read for the given MAC and VID */ - core_writeq(priv, mac, CORE_ARLA_MAC); - core_writel(priv, vid, CORE_ARLA_VID); - - /* Issue a read operation for this MAC */ - ret = bcm_sf2_arl_rw_op(priv, 1); - if (ret) - return ret; - - ret = bcm_sf2_arl_read(priv, mac, vid, &ent, &idx, is_valid); - /* If this is a read, just finish now */ - if (op) - return ret; - - /* We could not find a matching MAC, so reset to a new entry */ - if (ret) { - fwd_entry = 0; - idx = 0; - } - - memset(&ent, 0, sizeof(ent)); - ent.port = port; - ent.is_valid = is_valid; - ent.vid = vid; - ent.is_static = true; - memcpy(ent.mac, addr, ETH_ALEN); - bcm_sf2_arl_from_entry(&mac_vid, &fwd_entry, &ent); - - core_writeq(priv, mac_vid, CORE_ARLA_MACVID_ENTRY(idx)); - core_writel(priv, fwd_entry, CORE_ARLA_FWD_ENTRY(idx)); - - ret = bcm_sf2_arl_rw_op(priv, 0); - if (ret) - return ret; - - /* Re-read the entry to check */ - return bcm_sf2_arl_read(priv, mac, vid, &ent, &idx, is_valid); -} - -static int bcm_sf2_sw_fdb_prepare(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_fdb *fdb, - struct switchdev_trans *trans) -{ - /* We do not need to do anything specific here yet */ - return 0; -} - -static void bcm_sf2_sw_fdb_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_fdb *fdb, - struct switchdev_trans *trans) -{ - struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); - - if (bcm_sf2_arl_op(priv, 0, port, fdb->addr, fdb->vid, true)) - pr_err("%s: failed to add MAC address\n", __func__); -} - -static int bcm_sf2_sw_fdb_del(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_fdb *fdb) -{ - struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); - - return bcm_sf2_arl_op(priv, 0, port, fdb->addr, fdb->vid, false); -} - -static int bcm_sf2_arl_search_wait(struct bcm_sf2_priv *priv) -{ - unsigned timeout = 1000; - u32 reg; - - do { - reg = core_readl(priv, CORE_ARLA_SRCH_CTL); - if (!(reg & ARLA_SRCH_STDN)) - return 0; - - if (reg & ARLA_SRCH_VLID) - return 0; - - usleep_range(1000, 2000); - } while (timeout--); - - return -ETIMEDOUT; -} - -static void bcm_sf2_arl_search_rd(struct bcm_sf2_priv *priv, u8 idx, - struct bcm_sf2_arl_entry *ent) -{ - u64 mac_vid; - u32 fwd_entry; - - mac_vid = core_readq(priv, CORE_ARLA_SRCH_RSLT_MACVID(idx)); - fwd_entry = core_readl(priv, CORE_ARLA_SRCH_RSLT(idx)); - bcm_sf2_arl_to_entry(ent, mac_vid, fwd_entry); -} - -static int bcm_sf2_sw_fdb_copy(struct net_device *dev, int port, - const struct bcm_sf2_arl_entry *ent, - struct switchdev_obj_port_fdb *fdb, - int (*cb)(struct switchdev_obj *obj)) -{ - if (!ent->is_valid) - return 0; - - if (port != ent->port) - return 0; - - ether_addr_copy(fdb->addr, ent->mac); - fdb->vid = ent->vid; - fdb->ndm_state = ent->is_static ? NUD_NOARP : NUD_REACHABLE; - - return cb(&fdb->obj); -} - -static int bcm_sf2_sw_fdb_dump(struct dsa_switch *ds, int port, - struct switchdev_obj_port_fdb *fdb, - int (*cb)(struct switchdev_obj *obj)) -{ - struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); - struct net_device *dev = ds->ports[port].netdev; - struct bcm_sf2_arl_entry results[2]; - unsigned int count = 0; - int ret; - - /* Start search operation */ - core_writel(priv, ARLA_SRCH_STDN, CORE_ARLA_SRCH_CTL); - - do { - ret = bcm_sf2_arl_search_wait(priv); - if (ret) - return ret; - - /* Read both entries, then return their values back */ - bcm_sf2_arl_search_rd(priv, 0, &results[0]); - ret = bcm_sf2_sw_fdb_copy(dev, port, &results[0], fdb, cb); - if (ret) - return ret; - - bcm_sf2_arl_search_rd(priv, 1, &results[1]); - ret = bcm_sf2_sw_fdb_copy(dev, port, &results[1], fdb, cb); - if (ret) - return ret; - - if (!results[0].is_valid && !results[1].is_valid) - break; - - } while (count++ < CORE_ARLA_NUM_ENTRIES); - - return 0; -} - static int bcm_sf2_sw_indir_rw(struct bcm_sf2_priv *priv, int op, int addr, int regnum, u16 val) { @@ -1140,11 +574,6 @@ static void bcm_sf2_mdio_unregister(struct bcm_sf2_priv *priv) of_node_put(priv->master_mii_dn); } -static int bcm_sf2_sw_set_addr(struct dsa_switch *ds, u8 *addr) -{ - return 0; -} - static u32 bcm_sf2_sw_get_phy_flags(struct dsa_switch *ds, int port) { struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); @@ -1387,38 +816,27 @@ static int bcm_sf2_sw_set_wol(struct dsa_switch *ds, int port, return p->ethtool_ops->set_wol(p, wol); } -static void bcm_sf2_enable_vlan(struct bcm_sf2_priv *priv, bool enable) +static int bcm_sf2_vlan_op_wait(struct bcm_sf2_priv *priv) { - u32 mgmt, vc0, vc1, vc4, vc5; + unsigned int timeout = 10; + u32 reg; - mgmt = core_readl(priv, CORE_SWMODE); - vc0 = core_readl(priv, CORE_VLAN_CTRL0); - vc1 = core_readl(priv, CORE_VLAN_CTRL1); - vc4 = core_readl(priv, CORE_VLAN_CTRL4); - vc5 = core_readl(priv, CORE_VLAN_CTRL5); + do { + reg = core_readl(priv, CORE_ARLA_VTBL_RWCTRL); + if (!(reg & ARLA_VTBL_STDN)) + return 0; - mgmt &= ~SW_FWDG_MODE; + usleep_range(1000, 2000); + } while (timeout--); - if (enable) { - vc0 |= VLAN_EN | VLAN_LEARN_MODE_IVL; - vc1 |= EN_RSV_MCAST_UNTAG | EN_RSV_MCAST_FWDMAP; - vc4 &= ~(INGR_VID_CHK_MASK << INGR_VID_CHK_SHIFT); - vc4 |= INGR_VID_CHK_DROP; - vc5 |= DROP_VTABLE_MISS | EN_VID_FFF_FWD; - } else { - vc0 &= ~(VLAN_EN | VLAN_LEARN_MODE_IVL); - vc1 &= ~(EN_RSV_MCAST_UNTAG | EN_RSV_MCAST_FWDMAP); - vc4 &= ~(INGR_VID_CHK_MASK << INGR_VID_CHK_SHIFT); - vc5 &= ~(DROP_VTABLE_MISS | EN_VID_FFF_FWD); - vc4 |= INGR_VID_CHK_VID_VIOL_IMP; - } + return -ETIMEDOUT; +} - core_writel(priv, vc0, CORE_VLAN_CTRL0); - core_writel(priv, vc1, CORE_VLAN_CTRL1); - core_writel(priv, 0, CORE_VLAN_CTRL3); - core_writel(priv, vc4, CORE_VLAN_CTRL4); - core_writel(priv, vc5, CORE_VLAN_CTRL5); - core_writel(priv, mgmt, CORE_SWMODE); +static int bcm_sf2_vlan_op(struct bcm_sf2_priv *priv, u8 op) +{ + core_writel(priv, ARLA_VTBL_STDN | op, CORE_ARLA_VTBL_RWCTRL); + + return bcm_sf2_vlan_op_wait(priv); } static void bcm_sf2_sw_configure_vlan(struct dsa_switch *ds) @@ -1437,132 +855,6 @@ static void bcm_sf2_sw_configure_vlan(struct dsa_switch *ds) } } -static int bcm_sf2_sw_vlan_filtering(struct dsa_switch *ds, int port, - bool vlan_filtering) -{ - return 0; -} - -static int bcm_sf2_sw_vlan_prepare(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan, - struct switchdev_trans *trans) -{ - struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); - - bcm_sf2_enable_vlan(priv, true); - - return 0; -} - -static void bcm_sf2_sw_vlan_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan, - struct switchdev_trans *trans) -{ - struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); - bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; - bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; - s8 cpu_port = ds->dst->cpu_port; - struct bcm_sf2_vlan *vl; - u16 vid; - - for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) { - vl = &priv->vlans[vid]; - - bcm_sf2_get_vlan_entry(priv, vid, vl); - - vl->members |= BIT(port) | BIT(cpu_port); - if (untagged) - vl->untag |= BIT(port) | BIT(cpu_port); - else - vl->untag &= ~(BIT(port) | BIT(cpu_port)); - - bcm_sf2_set_vlan_entry(priv, vid, vl); - bcm_sf2_sw_fast_age_vlan(priv, vid); - } - - if (pvid) { - core_writel(priv, vlan->vid_end, CORE_DEFAULT_1Q_TAG_P(port)); - core_writel(priv, vlan->vid_end, - CORE_DEFAULT_1Q_TAG_P(cpu_port)); - bcm_sf2_sw_fast_age_vlan(priv, vid); - } -} - -static int bcm_sf2_sw_vlan_del(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan) -{ - struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); - bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; - s8 cpu_port = ds->dst->cpu_port; - struct bcm_sf2_vlan *vl; - u16 vid, pvid; - int ret; - - pvid = core_readl(priv, CORE_DEFAULT_1Q_TAG_P(port)); - - for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) { - vl = &priv->vlans[vid]; - - ret = bcm_sf2_get_vlan_entry(priv, vid, vl); - if (ret) - return ret; - - vl->members &= ~BIT(port); - if ((vl->members & BIT(cpu_port)) == BIT(cpu_port)) - vl->members = 0; - if (pvid == vid) - pvid = 0; - if (untagged) { - vl->untag &= ~BIT(port); - if ((vl->untag & BIT(port)) == BIT(cpu_port)) - vl->untag = 0; - } - - bcm_sf2_set_vlan_entry(priv, vid, vl); - bcm_sf2_sw_fast_age_vlan(priv, vid); - } - - core_writel(priv, pvid, CORE_DEFAULT_1Q_TAG_P(port)); - core_writel(priv, pvid, CORE_DEFAULT_1Q_TAG_P(cpu_port)); - bcm_sf2_sw_fast_age_vlan(priv, vid); - - return 0; -} - -static int bcm_sf2_sw_vlan_dump(struct dsa_switch *ds, int port, - struct switchdev_obj_port_vlan *vlan, - int (*cb)(struct switchdev_obj *obj)) -{ - struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); - struct bcm_sf2_port_status *p = &priv->port_sts[port]; - struct bcm_sf2_vlan *vl; - u16 vid, pvid; - int err = 0; - - pvid = core_readl(priv, CORE_DEFAULT_1Q_TAG_P(port)); - - for (vid = 0; vid < VLAN_N_VID; vid++) { - vl = &priv->vlans[vid]; - - if (!(vl->members & BIT(port))) - continue; - - vlan->vid_begin = vlan->vid_end = vid; - vlan->flags = 0; - - if (vl->untag & BIT(port)) - vlan->flags |= BRIDGE_VLAN_INFO_UNTAGGED; - if (p->pvid == vid) - vlan->flags |= BRIDGE_VLAN_INFO_PVID; - - err = cb(&vlan->obj); - if (err) - break; - } - - return err; -} - static int bcm_sf2_sw_setup(struct dsa_switch *ds) { struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); @@ -1584,38 +876,6 @@ static int bcm_sf2_sw_setup(struct dsa_switch *ds) return 0; } -static struct dsa_switch_ops bcm_sf2_switch_ops = { - .setup = bcm_sf2_sw_setup, - .get_tag_protocol = bcm_sf2_sw_get_tag_protocol, - .set_addr = bcm_sf2_sw_set_addr, - .get_phy_flags = bcm_sf2_sw_get_phy_flags, - .get_strings = bcm_sf2_sw_get_strings, - .get_ethtool_stats = bcm_sf2_sw_get_ethtool_stats, - .get_sset_count = bcm_sf2_sw_get_sset_count, - .adjust_link = bcm_sf2_sw_adjust_link, - .fixed_link_update = bcm_sf2_sw_fixed_link_update, - .suspend = bcm_sf2_sw_suspend, - .resume = bcm_sf2_sw_resume, - .get_wol = bcm_sf2_sw_get_wol, - .set_wol = bcm_sf2_sw_set_wol, - .port_enable = bcm_sf2_port_setup, - .port_disable = bcm_sf2_port_disable, - .get_eee = bcm_sf2_sw_get_eee, - .set_eee = bcm_sf2_sw_set_eee, - .port_bridge_join = bcm_sf2_sw_br_join, - .port_bridge_leave = bcm_sf2_sw_br_leave, - .port_stp_state_set = bcm_sf2_sw_br_set_stp_state, - .port_fdb_prepare = bcm_sf2_sw_fdb_prepare, - .port_fdb_add = bcm_sf2_sw_fdb_add, - .port_fdb_del = bcm_sf2_sw_fdb_del, - .port_fdb_dump = bcm_sf2_sw_fdb_dump, - .port_vlan_filtering = bcm_sf2_sw_vlan_filtering, - .port_vlan_prepare = bcm_sf2_sw_vlan_prepare, - .port_vlan_add = bcm_sf2_sw_vlan_add, - .port_vlan_del = bcm_sf2_sw_vlan_del, - .port_vlan_dump = bcm_sf2_sw_vlan_dump, -}; - /* The SWITCH_CORE register space is managed by b53 but operates on a page + * register basis so we need to translate that into an address that the * bus-glue understands. diff --git a/drivers/net/dsa/bcm_sf2.h b/drivers/net/dsa/bcm_sf2.h index 7e95fb7d5d2e..c4c12d9d6956 100644 --- a/drivers/net/dsa/bcm_sf2.h +++ b/drivers/net/dsa/bcm_sf2.h @@ -51,71 +51,9 @@ struct bcm_sf2_port_status { struct ethtool_eee eee; - u32 vlan_ctl_mask; - u16 pvid; - - struct net_device *bridge_dev; + u16 vlan_ctl_mask; }; -struct bcm_sf2_arl_entry { - u8 port; - u8 mac[ETH_ALEN]; - u16 vid; - u8 is_valid:1; - u8 is_age:1; - u8 is_static:1; -}; - -struct bcm_sf2_vlan { - u16 members; - u16 untag; -}; - -static inline void bcm_sf2_mac_from_u64(u64 src, u8 *dst) -{ - unsigned int i; - - for (i = 0; i < ETH_ALEN; i++) - dst[ETH_ALEN - 1 - i] = (src >> (8 * i)) & 0xff; -} - -static inline u64 bcm_sf2_mac_to_u64(const u8 *src) -{ - unsigned int i; - u64 dst = 0; - - for (i = 0; i < ETH_ALEN; i++) - dst |= (u64)src[ETH_ALEN - 1 - i] << (8 * i); - - return dst; -} - -static inline void bcm_sf2_arl_to_entry(struct bcm_sf2_arl_entry *ent, - u64 mac_vid, u32 fwd_entry) -{ - memset(ent, 0, sizeof(*ent)); - ent->port = fwd_entry & PORTID_MASK; - ent->is_valid = !!(fwd_entry & ARL_VALID); - ent->is_age = !!(fwd_entry & ARL_AGE); - ent->is_static = !!(fwd_entry & ARL_STATIC); - bcm_sf2_mac_from_u64(mac_vid, ent->mac); - ent->vid = mac_vid >> VID_SHIFT; -} - -static inline void bcm_sf2_arl_from_entry(u64 *mac_vid, u32 *fwd_entry, - const struct bcm_sf2_arl_entry *ent) -{ - *mac_vid = bcm_sf2_mac_to_u64(ent->mac); - *mac_vid |= (u64)(ent->vid & VID_MASK) << VID_SHIFT; - *fwd_entry = ent->port & PORTID_MASK; - if (ent->is_valid) - *fwd_entry |= ARL_VALID; - if (ent->is_static) - *fwd_entry |= ARL_STATIC; - if (ent->is_age) - *fwd_entry |= ARL_AGE; -} - struct bcm_sf2_priv { /* Base registers, keep those in order with BCM_SF2_REGS_NAME */ void __iomem *core; @@ -159,9 +97,6 @@ struct bcm_sf2_priv { struct device_node *master_mii_dn; struct mii_bus *slave_mii_bus; struct mii_bus *master_mii_bus; - - /* Cache of programmed VLANs */ - struct bcm_sf2_vlan vlans[VLAN_N_VID]; }; static inline struct bcm_sf2_priv *bcm_sf2_to_priv(struct dsa_switch *ds) @@ -171,12 +106,6 @@ static inline struct bcm_sf2_priv *bcm_sf2_to_priv(struct dsa_switch *ds) return dev->priv; } -struct bcm_sf2_hw_stats { - const char *string; - u16 reg; - u8 sizeof_stat; -}; - #define SF2_IO_MACRO(name) \ static inline u32 name##_readl(struct bcm_sf2_priv *priv, u32 off) \ { \ diff --git a/drivers/net/dsa/bcm_sf2_regs.h b/drivers/net/dsa/bcm_sf2_regs.h index 9f2a9cb42074..838fe373cd6f 100644 --- a/drivers/net/dsa/bcm_sf2_regs.h +++ b/drivers/net/dsa/bcm_sf2_regs.h @@ -115,14 +115,6 @@ #define RX_BCST_EN (1 << 2) #define RX_MCST_EN (1 << 3) #define RX_UCST_EN (1 << 4) -#define G_MISTP_STATE_SHIFT 5 -#define G_MISTP_NO_STP (0 << G_MISTP_STATE_SHIFT) -#define G_MISTP_DIS_STATE (1 << G_MISTP_STATE_SHIFT) -#define G_MISTP_BLOCK_STATE (2 << G_MISTP_STATE_SHIFT) -#define G_MISTP_LISTEN_STATE (3 << G_MISTP_STATE_SHIFT) -#define G_MISTP_LEARN_STATE (4 << G_MISTP_STATE_SHIFT) -#define G_MISTP_FWD_STATE (5 << G_MISTP_STATE_SHIFT) -#define G_MISTP_STATE_MASK 0x7 #define CORE_SWMODE 0x0002c #define SW_FWDG_MODE (1 << 0) @@ -205,75 +197,11 @@ #define BRCM_HDR_EN_P5 (1 << 1) #define BRCM_HDR_EN_P7 (1 << 2) -#define CORE_BRCM_HDR_CTRL2 0x0828 - -#define CORE_HL_PRTC_CTRL 0x0940 -#define ARP_EN (1 << 0) -#define RARP_EN (1 << 1) -#define DHCP_EN (1 << 2) -#define ICMPV4_EN (1 << 3) -#define ICMPV6_EN (1 << 4) -#define ICMPV6_FWD_MODE (1 << 5) -#define IGMP_DIP_EN (1 << 8) -#define IGMP_RPTLVE_EN (1 << 9) -#define IGMP_RTPLVE_FWD_MODE (1 << 10) -#define IGMP_QRY_EN (1 << 11) -#define IGMP_QRY_FWD_MODE (1 << 12) -#define IGMP_UKN_EN (1 << 13) -#define IGMP_UKN_FWD_MODE (1 << 14) -#define MLD_RPTDONE_EN (1 << 15) -#define MLD_RPTDONE_FWD_MODE (1 << 16) -#define MLD_QRY_EN (1 << 17) -#define MLD_QRY_FWD_MODE (1 << 18) - #define CORE_RST_MIB_CNT_EN 0x0950 #define CORE_BRCM_HDR_RX_DIS 0x0980 #define CORE_BRCM_HDR_TX_DIS 0x0988 -#define CORE_ARLA_NUM_ENTRIES 1024 - -#define CORE_ARLA_RWCTL 0x1400 -#define ARL_RW (1 << 0) -#define IVL_SVL_SELECT (1 << 6) -#define ARL_STRTDN (1 << 7) - -#define CORE_ARLA_MAC 0x1408 -#define CORE_ARLA_VID 0x1420 -#define ARLA_VIDTAB_INDX_MASK 0x1fff - -#define CORE_ARLA_MACVID0 0x1440 -#define MAC_MASK 0xffffffffff -#define VID_SHIFT 48 -#define VID_MASK 0xfff - -#define CORE_ARLA_FWD_ENTRY0 0x1460 -#define PORTID_MASK 0x1ff -#define ARL_CON_SHIFT 9 -#define ARL_CON_MASK 0x3 -#define ARL_PRI_SHIFT 11 -#define ARL_PRI_MASK 0x7 -#define ARL_AGE (1 << 14) -#define ARL_STATIC (1 << 15) -#define ARL_VALID (1 << 16) - -#define CORE_ARLA_MACVID_ENTRY(x) (CORE_ARLA_MACVID0 + ((x) * 0x40)) -#define CORE_ARLA_FWD_ENTRY(x) (CORE_ARLA_FWD_ENTRY0 + ((x) * 0x40)) - -#define CORE_ARLA_SRCH_CTL 0x1540 -#define ARLA_SRCH_VLID (1 << 0) -#define IVL_SVL_SELECT (1 << 6) -#define ARLA_SRCH_STDN (1 << 7) - -#define CORE_ARLA_SRCH_ADR 0x1544 -#define ARLA_SRCH_ADR_VALID (1 << 15) - -#define CORE_ARLA_SRCH_RSLT_0_MACVID 0x1580 -#define CORE_ARLA_SRCH_RSLT_0 0x15a0 - -#define CORE_ARLA_SRCH_RSLT_MACVID(x) (CORE_ARLA_SRCH_RSLT_0_MACVID + ((x) * 0x40)) -#define CORE_ARLA_SRCH_RSLT(x) (CORE_ARLA_SRCH_RSLT_0 + ((x) * 0x40)) - #define CORE_ARLA_VTBL_RWCTRL 0x1600 #define ARLA_VTBL_CMD_WRITE 0 #define ARLA_VTBL_CMD_READ 1 @@ -297,59 +225,9 @@ #define P_TXQ_PSM_VDD(x) (P_TXQ_PSM_VDD_MASK << \ ((x) * P_TXQ_PSM_VDD_SHIFT)) -#define CORE_P0_MIB_OFFSET 0x8000 -#define P_MIB_SIZE 0x400 -#define CORE_P_MIB_OFFSET(x) (CORE_P0_MIB_OFFSET + (x) * P_MIB_SIZE) - #define CORE_PORT_VLAN_CTL_PORT(x) (0xc400 + ((x) * 0x8)) #define PORT_VLAN_CTRL_MASK 0x1ff -#define CORE_VLAN_CTRL0 0xd000 -#define CHANGE_1P_VID_INNER (1 << 0) -#define CHANGE_1P_VID_OUTER (1 << 1) -#define CHANGE_1Q_VID (1 << 3) -#define VLAN_LEARN_MODE_SVL (0 << 5) -#define VLAN_LEARN_MODE_IVL (3 << 5) -#define VLAN_EN (1 << 7) - -#define CORE_VLAN_CTRL1 0xd004 -#define EN_RSV_MCAST_FWDMAP (1 << 2) -#define EN_RSV_MCAST_UNTAG (1 << 3) -#define EN_IPMC_BYPASS_FWDMAP (1 << 5) -#define EN_IPMC_BYPASS_UNTAG (1 << 6) - -#define CORE_VLAN_CTRL2 0xd008 -#define EN_MIIM_BYPASS_V_FWDMAP (1 << 2) -#define EN_GMRP_GVRP_V_FWDMAP (1 << 5) -#define EN_GMRP_GVRP_UNTAG_MAP (1 << 6) - -#define CORE_VLAN_CTRL3 0xd00c -#define EN_DROP_NON1Q_MASK 0x1ff - -#define CORE_VLAN_CTRL4 0xd014 -#define RESV_MCAST_FLOOD (1 << 1) -#define EN_DOUBLE_TAG_MASK 0x3 -#define EN_DOUBLE_TAG_SHIFT 2 -#define EN_MGE_REV_GMRP (1 << 4) -#define EN_MGE_REV_GVRP (1 << 5) -#define INGR_VID_CHK_SHIFT 6 -#define INGR_VID_CHK_MASK 0x3 -#define INGR_VID_CHK_FWD (0 << INGR_VID_CHK_SHIFT) -#define INGR_VID_CHK_DROP (1 << INGR_VID_CHK_SHIFT) -#define INGR_VID_CHK_NO_CHK (2 << INGR_VID_CHK_SHIFT) -#define INGR_VID_CHK_VID_VIOL_IMP (3 << INGR_VID_CHK_SHIFT) - -#define CORE_VLAN_CTRL5 0xd018 -#define EN_CPU_RX_BYP_INNER_CRCCHCK (1 << 0) -#define EN_VID_FFF_FWD (1 << 2) -#define DROP_VTABLE_MISS (1 << 3) -#define EGRESS_DIR_FRM_BYP_TRUNK_EN (1 << 4) -#define PRESV_NON1Q (1 << 6) - -#define CORE_VLAN_CTRL6 0xd01c -#define STRICT_SFD_DETECT (1 << 0) -#define DIS_ARL_BUST_LMIT (1 << 4) - #define CORE_DEFAULT_1Q_TAG_P(x) (0xd040 + ((x) * 8)) #define CFI_SHIFT 12 #define PRI_SHIFT 13 From bacfd110e059565206e62490357232326cab5049 Mon Sep 17 00:00:00 2001 From: Nelson Chang Date: Fri, 26 Aug 2016 01:09:42 +0800 Subject: [PATCH 0503/1715] net: ethernet: mediatek: modify to use the PDMA instead of the QDMA for Ethernet RX Because the PDMA has richer features than the QDMA for Ethernet RX (such as multiple RX rings, HW LRO, etc.), the patch modifies to use the PDMA to handle Ethernet RX. Acked-by: John Crispin Signed-off-by: Nelson Chang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 76 ++++++++++++--------- drivers/net/ethernet/mediatek/mtk_eth_soc.h | 31 ++++++++- 2 files changed, 74 insertions(+), 33 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 1801fd83acc0..cbeb79340407 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -342,25 +342,27 @@ static void mtk_mdio_cleanup(struct mtk_eth *eth) mdiobus_free(eth->mii_bus); } -static inline void mtk_irq_disable(struct mtk_eth *eth, u32 mask) +static inline void mtk_irq_disable(struct mtk_eth *eth, + unsigned reg, u32 mask) { unsigned long flags; u32 val; spin_lock_irqsave(ð->irq_lock, flags); - val = mtk_r32(eth, MTK_QDMA_INT_MASK); - mtk_w32(eth, val & ~mask, MTK_QDMA_INT_MASK); + val = mtk_r32(eth, reg); + mtk_w32(eth, val & ~mask, reg); spin_unlock_irqrestore(ð->irq_lock, flags); } -static inline void mtk_irq_enable(struct mtk_eth *eth, u32 mask) +static inline void mtk_irq_enable(struct mtk_eth *eth, + unsigned reg, u32 mask) { unsigned long flags; u32 val; spin_lock_irqsave(ð->irq_lock, flags); - val = mtk_r32(eth, MTK_QDMA_INT_MASK); - mtk_w32(eth, val | mask, MTK_QDMA_INT_MASK); + val = mtk_r32(eth, reg); + mtk_w32(eth, val | mask, reg); spin_unlock_irqrestore(ð->irq_lock, flags); } @@ -897,12 +899,12 @@ release_desc: * we continue */ wmb(); - mtk_w32(eth, ring->calc_idx, MTK_QRX_CRX_IDX0); + mtk_w32(eth, ring->calc_idx, MTK_PRX_CRX_IDX0); done++; } if (done < budget) - mtk_w32(eth, MTK_RX_DONE_INT, MTK_QMTK_INT_STATUS); + mtk_w32(eth, MTK_RX_DONE_INT, MTK_PDMA_INT_STATUS); return done; } @@ -1012,7 +1014,7 @@ static int mtk_napi_tx(struct napi_struct *napi, int budget) return budget; napi_complete(napi); - mtk_irq_enable(eth, MTK_TX_DONE_INT); + mtk_irq_enable(eth, MTK_QDMA_INT_MASK, MTK_TX_DONE_INT); return tx_done; } @@ -1024,12 +1026,12 @@ static int mtk_napi_rx(struct napi_struct *napi, int budget) int rx_done = 0; mtk_handle_status_irq(eth); - mtk_w32(eth, MTK_RX_DONE_INT, MTK_QMTK_INT_STATUS); + mtk_w32(eth, MTK_RX_DONE_INT, MTK_PDMA_INT_STATUS); rx_done = mtk_poll_rx(napi, budget, eth); if (unlikely(netif_msg_intr(eth))) { - status = mtk_r32(eth, MTK_QMTK_INT_STATUS); - mask = mtk_r32(eth, MTK_QDMA_INT_MASK); + status = mtk_r32(eth, MTK_PDMA_INT_STATUS); + mask = mtk_r32(eth, MTK_PDMA_INT_MASK); dev_info(eth->dev, "done rx %d, intr 0x%08x/0x%x\n", rx_done, status, mask); @@ -1038,12 +1040,12 @@ static int mtk_napi_rx(struct napi_struct *napi, int budget) if (rx_done == budget) return budget; - status = mtk_r32(eth, MTK_QMTK_INT_STATUS); + status = mtk_r32(eth, MTK_PDMA_INT_STATUS); if (status & MTK_RX_DONE_INT) return budget; napi_complete(napi); - mtk_irq_enable(eth, MTK_RX_DONE_INT); + mtk_irq_enable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT); return rx_done; } @@ -1092,6 +1094,7 @@ static int mtk_tx_alloc(struct mtk_eth *eth) mtk_w32(eth, ring->phys + ((MTK_DMA_SIZE - 1) * sz), MTK_QTX_DRX_PTR); + mtk_w32(eth, (QDMA_RES_THRES << 8) | QDMA_RES_THRES, MTK_QTX_CFG(0)); return 0; @@ -1162,11 +1165,10 @@ static int mtk_rx_alloc(struct mtk_eth *eth) */ wmb(); - mtk_w32(eth, eth->rx_ring.phys, MTK_QRX_BASE_PTR0); - mtk_w32(eth, MTK_DMA_SIZE, MTK_QRX_MAX_CNT0); - mtk_w32(eth, eth->rx_ring.calc_idx, MTK_QRX_CRX_IDX0); - mtk_w32(eth, MTK_PST_DRX_IDX0, MTK_QDMA_RST_IDX); - mtk_w32(eth, (QDMA_RES_THRES << 8) | QDMA_RES_THRES, MTK_QTX_CFG(0)); + mtk_w32(eth, eth->rx_ring.phys, MTK_PRX_BASE_PTR0); + mtk_w32(eth, MTK_DMA_SIZE, MTK_PRX_MAX_CNT0); + mtk_w32(eth, eth->rx_ring.calc_idx, MTK_PRX_CRX_IDX0); + mtk_w32(eth, MTK_PST_DRX_IDX0, MTK_PDMA_RST_IDX); return 0; } @@ -1285,7 +1287,7 @@ static irqreturn_t mtk_handle_irq_rx(int irq, void *_eth) if (likely(napi_schedule_prep(ð->rx_napi))) { __napi_schedule(ð->rx_napi); - mtk_irq_disable(eth, MTK_RX_DONE_INT); + mtk_irq_disable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT); } return IRQ_HANDLED; @@ -1297,7 +1299,7 @@ static irqreturn_t mtk_handle_irq_tx(int irq, void *_eth) if (likely(napi_schedule_prep(ð->tx_napi))) { __napi_schedule(ð->tx_napi); - mtk_irq_disable(eth, MTK_TX_DONE_INT); + mtk_irq_disable(eth, MTK_QDMA_INT_MASK, MTK_TX_DONE_INT); } return IRQ_HANDLED; @@ -1308,11 +1310,12 @@ static void mtk_poll_controller(struct net_device *dev) { struct mtk_mac *mac = netdev_priv(dev); struct mtk_eth *eth = mac->hw; - u32 int_mask = MTK_TX_DONE_INT | MTK_RX_DONE_INT; - mtk_irq_disable(eth, int_mask); + mtk_irq_disable(eth, MTK_QDMA_INT_MASK, MTK_TX_DONE_INT); + mtk_irq_disable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT); mtk_handle_irq_rx(eth->irq[2], dev); - mtk_irq_enable(eth, int_mask); + mtk_irq_enable(eth, MTK_QDMA_INT_MASK, MTK_TX_DONE_INT); + mtk_irq_enable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT); } #endif @@ -1327,11 +1330,15 @@ static int mtk_start_dma(struct mtk_eth *eth) } mtk_w32(eth, - MTK_TX_WB_DDONE | MTK_RX_DMA_EN | MTK_TX_DMA_EN | - MTK_RX_2B_OFFSET | MTK_DMA_SIZE_16DWORDS | - MTK_RX_BT_32DWORDS | MTK_NDP_CO_PRO, + MTK_TX_WB_DDONE | MTK_TX_DMA_EN | + MTK_DMA_SIZE_16DWORDS | MTK_NDP_CO_PRO, MTK_QDMA_GLO_CFG); + mtk_w32(eth, + MTK_RX_DMA_EN | MTK_RX_2B_OFFSET | + MTK_RX_BT_32DWORDS | MTK_MULTI_EN, + MTK_PDMA_GLO_CFG); + return 0; } @@ -1349,7 +1356,8 @@ static int mtk_open(struct net_device *dev) napi_enable(ð->tx_napi); napi_enable(ð->rx_napi); - mtk_irq_enable(eth, MTK_TX_DONE_INT | MTK_RX_DONE_INT); + mtk_irq_enable(eth, MTK_QDMA_INT_MASK, MTK_TX_DONE_INT); + mtk_irq_enable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT); } atomic_inc(ð->dma_refcnt); @@ -1394,7 +1402,8 @@ static int mtk_stop(struct net_device *dev) if (!atomic_dec_and_test(ð->dma_refcnt)) return 0; - mtk_irq_disable(eth, MTK_TX_DONE_INT | MTK_RX_DONE_INT); + mtk_irq_disable(eth, MTK_QDMA_INT_MASK, MTK_TX_DONE_INT); + mtk_irq_disable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT); napi_disable(ð->tx_napi); napi_disable(ð->rx_napi); @@ -1448,7 +1457,9 @@ static int __init mtk_hw_init(struct mtk_eth *eth) /* disable delay and normal interrupt */ mtk_w32(eth, 0, MTK_QDMA_DELAY_INT); - mtk_irq_disable(eth, ~0); + mtk_w32(eth, 0, MTK_PDMA_DELAY_INT); + mtk_irq_disable(eth, MTK_QDMA_INT_MASK, ~0); + mtk_irq_disable(eth, MTK_PDMA_INT_MASK, ~0); mtk_w32(eth, RST_GL_PSE, MTK_RST_GL); mtk_w32(eth, 0, MTK_RST_GL); @@ -1504,7 +1515,8 @@ static void mtk_uninit(struct net_device *dev) phy_disconnect(mac->phy_dev); mtk_mdio_cleanup(eth); - mtk_irq_disable(eth, ~0); + mtk_irq_disable(eth, MTK_QDMA_INT_MASK, ~0); + mtk_irq_disable(eth, MTK_PDMA_INT_MASK, ~0); free_irq(eth->irq[1], dev); free_irq(eth->irq[2], dev); } @@ -1683,7 +1695,7 @@ static void mtk_get_ethtool_stats(struct net_device *dev, } do { - data_src = (u64*)hwstats; + data_src = (u64 *)hwstats; data_dst = data; start = u64_stats_fetch_begin_irq(&hwstats->syncp); diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index f82e3acb947b..7c1f3f2e11d4 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -68,6 +68,32 @@ /* Unicast Filter MAC Address Register - High */ #define MTK_GDMA_MAC_ADRH(x) (0x50C + (x * 0x1000)) +/* PDMA RX Base Pointer Register */ +#define MTK_PRX_BASE_PTR0 0x900 + +/* PDMA RX Maximum Count Register */ +#define MTK_PRX_MAX_CNT0 0x904 + +/* PDMA RX CPU Pointer Register */ +#define MTK_PRX_CRX_IDX0 0x908 + +/* PDMA Global Configuration Register */ +#define MTK_PDMA_GLO_CFG 0xa04 +#define MTK_MULTI_EN BIT(10) + +/* PDMA Reset Index Register */ +#define MTK_PDMA_RST_IDX 0xa08 +#define MTK_PST_DRX_IDX0 BIT(16) + +/* PDMA Delay Interrupt Register */ +#define MTK_PDMA_DELAY_INT 0xa0c + +/* PDMA Interrupt Status Register */ +#define MTK_PDMA_INT_STATUS 0xa20 + +/* PDMA Interrupt Mask Register */ +#define MTK_PDMA_INT_MASK 0xa28 + /* PDMA Interrupt grouping registers */ #define MTK_PDMA_INT_GRP1 0xa50 #define MTK_PDMA_INT_GRP2 0xa54 @@ -119,13 +145,16 @@ /* QDMA Interrupt Status Register */ #define MTK_QMTK_INT_STATUS 0x1A18 +#define MTK_RX_DONE_INT3 BIT(19) +#define MTK_RX_DONE_INT2 BIT(18) #define MTK_RX_DONE_INT1 BIT(17) #define MTK_RX_DONE_INT0 BIT(16) #define MTK_TX_DONE_INT3 BIT(3) #define MTK_TX_DONE_INT2 BIT(2) #define MTK_TX_DONE_INT1 BIT(1) #define MTK_TX_DONE_INT0 BIT(0) -#define MTK_RX_DONE_INT (MTK_RX_DONE_INT0 | MTK_RX_DONE_INT1) +#define MTK_RX_DONE_INT (MTK_RX_DONE_INT0 | MTK_RX_DONE_INT1 | \ + MTK_RX_DONE_INT2 | MTK_RX_DONE_INT3) #define MTK_TX_DONE_INT (MTK_TX_DONE_INT0 | MTK_TX_DONE_INT1 | \ MTK_TX_DONE_INT2 | MTK_TX_DONE_INT3) From 9c08435e0d033ff00e3bd02a38494d63bc8296a0 Mon Sep 17 00:00:00 2001 From: Nelson Chang Date: Fri, 26 Aug 2016 01:09:43 +0800 Subject: [PATCH 0504/1715] net: ethernet: mediatek: modify GDM to send packets to the PDMA for RX Because we change to use the PDMA as the Ethernet RX DMA engine, the patch modifies to set GDM to send packets to PDMA for RX. Acked-by: John Crispin Signed-off-by: Nelson Chang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index cbeb79340407..0fd9fc8d2a79 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1473,9 +1473,8 @@ static int __init mtk_hw_init(struct mtk_eth *eth) for (i = 0; i < 2; i++) { u32 val = mtk_r32(eth, MTK_GDMA_FWD_CFG(i)); - /* setup the forward port to send frame to QDMA */ + /* setup the forward port to send frame to PDMA */ val &= ~0xffff; - val |= 0x5555; /* Enable RX checksum */ val |= MTK_GDMA_ICS_EN | MTK_GDMA_TCS_EN | MTK_GDMA_UCS_EN; From ab725983a96a10630723fe4a0106b37876222993 Mon Sep 17 00:00:00 2001 From: Tobias Regnery Date: Thu, 25 Aug 2016 20:09:53 +0200 Subject: [PATCH 0505/1715] alx: add tso support Add tso/tso6 support to the alx driver. Based on information from the downstream driver at github.com/qca/alx Signed-off-by: Tobias Regnery Signed-off-by: David S. Miller --- drivers/net/ethernet/atheros/alx/main.c | 75 +++++++++++++++++++++++-- 1 file changed, 71 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index 6453148d066a..c4d0026a684a 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -993,6 +993,18 @@ static void alx_reset(struct work_struct *work) rtnl_unlock(); } +static int alx_tpd_req(struct sk_buff *skb) +{ + int num; + + num = skb_shinfo(skb)->nr_frags + 1; + /* we need one extra descriptor for LSOv2 */ + if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) + num++; + + return num; +} + static int alx_tx_csum(struct sk_buff *skb, struct alx_txd *first) { u8 cso, css; @@ -1012,6 +1024,45 @@ static int alx_tx_csum(struct sk_buff *skb, struct alx_txd *first) return 0; } +static int alx_tso(struct sk_buff *skb, struct alx_txd *first) +{ + int err; + + if (skb->ip_summed != CHECKSUM_PARTIAL) + return 0; + + if (!skb_is_gso(skb)) + return 0; + + err = skb_cow_head(skb, 0); + if (err < 0) + return err; + + if (skb->protocol == htons(ETH_P_IP)) { + struct iphdr *iph = ip_hdr(skb); + + iph->check = 0; + tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, + 0, IPPROTO_TCP, 0); + first->word1 |= 1 << TPD_IPV4_SHIFT; + } else if (skb_is_gso_v6(skb)) { + ipv6_hdr(skb)->payload_len = 0; + tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr, + 0, IPPROTO_TCP, 0); + /* LSOv2: the first TPD only provides the packet length */ + first->adrl.l.pkt_len = skb->len; + first->word1 |= 1 << TPD_LSO_V2_SHIFT; + } + + first->word1 |= 1 << TPD_LSO_EN_SHIFT; + first->word1 |= (skb_transport_offset(skb) & + TPD_L4HDROFFSET_MASK) << TPD_L4HDROFFSET_SHIFT; + first->word1 |= (skb_shinfo(skb)->gso_size & + TPD_MSS_MASK) << TPD_MSS_SHIFT; + return 1; +} + static int alx_map_tx_skb(struct alx_priv *alx, struct sk_buff *skb) { struct alx_tx_queue *txq = &alx->txq; @@ -1022,6 +1073,16 @@ static int alx_map_tx_skb(struct alx_priv *alx, struct sk_buff *skb) first_tpd = &txq->tpd[txq->write_idx]; tpd = first_tpd; + if (tpd->word1 & (1 << TPD_LSO_V2_SHIFT)) { + if (++txq->write_idx == alx->tx_ringsz) + txq->write_idx = 0; + + tpd = &txq->tpd[txq->write_idx]; + tpd->len = first_tpd->len; + tpd->vlan_tag = first_tpd->vlan_tag; + tpd->word1 = first_tpd->word1; + } + maplen = skb_headlen(skb); dma = dma_map_single(&alx->hw.pdev->dev, skb->data, maplen, DMA_TO_DEVICE); @@ -1082,9 +1143,9 @@ static netdev_tx_t alx_start_xmit(struct sk_buff *skb, struct alx_priv *alx = netdev_priv(netdev); struct alx_tx_queue *txq = &alx->txq; struct alx_txd *first; - int tpdreq = skb_shinfo(skb)->nr_frags + 1; + int tso; - if (alx_tpd_avail(alx) < tpdreq) { + if (alx_tpd_avail(alx) < alx_tpd_req(skb)) { netif_stop_queue(alx->dev); goto drop; } @@ -1092,7 +1153,10 @@ static netdev_tx_t alx_start_xmit(struct sk_buff *skb, first = &txq->tpd[txq->write_idx]; memset(first, 0, sizeof(*first)); - if (alx_tx_csum(skb, first)) + tso = alx_tso(skb, first); + if (tso < 0) + goto drop; + else if (!tso && alx_tx_csum(skb, first)) goto drop; if (alx_map_tx_skb(alx, skb) < 0) @@ -1351,7 +1415,10 @@ static int alx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } } - netdev->hw_features = NETIF_F_SG | NETIF_F_HW_CSUM; + netdev->hw_features = NETIF_F_SG | + NETIF_F_HW_CSUM | + NETIF_F_TSO | + NETIF_F_TSO6; if (alx_get_perm_macaddr(hw, hw->perm_addr)) { dev_warn(&pdev->dev, From f01d5988639bc7f8c525d16d5d84562755d3c0ca Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 25 Aug 2016 15:23:41 -0700 Subject: [PATCH 0506/1715] net: dsa: bcm_sf2: Utilize mask clear/set helpers in bcm_sf2_intr_disable And while at it, remove the unecessary writing of zeroes to the CPU_MASK_CLEAR register since it has no functional use. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index ddd477354e00..51f1fc0dddc5 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -464,12 +464,10 @@ static int bcm_sf2_sw_rst(struct bcm_sf2_priv *priv) static void bcm_sf2_intr_disable(struct bcm_sf2_priv *priv) { - intrl2_0_writel(priv, 0xffffffff, INTRL2_CPU_MASK_SET); + intrl2_0_mask_set(priv, 0xffffffff); intrl2_0_writel(priv, 0xffffffff, INTRL2_CPU_CLEAR); - intrl2_0_writel(priv, 0, INTRL2_CPU_MASK_CLEAR); - intrl2_1_writel(priv, 0xffffffff, INTRL2_CPU_MASK_SET); + intrl2_1_mask_set(priv, 0xffffffff); intrl2_1_writel(priv, 0xffffffff, INTRL2_CPU_CLEAR); - intrl2_1_writel(priv, 0, INTRL2_CPU_MASK_CLEAR); } static void bcm_sf2_identify_ports(struct bcm_sf2_priv *priv, From ba5aa84a2d2243d56328e51497a6106dcdf67eef Mon Sep 17 00:00:00 2001 From: Richard Alpe Date: Fri, 26 Aug 2016 10:52:50 +0200 Subject: [PATCH 0507/1715] tipc: split UDP nl address parsing Split the UDP netlink parse function so that it only parses one netlink attribute at the time. This makes the parse function more generic and allow future UDP API functions to use it for parsing. Signed-off-by: Richard Alpe Reviewed-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/udp_media.c | 104 +++++++++++++++++++++---------------------- 1 file changed, 51 insertions(+), 53 deletions(-) diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 33bdf5449a5e..adb3c2117a1c 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -258,68 +258,47 @@ static int enable_mcast(struct udp_bearer *ub, struct udp_media_addr *remote) } /** - * parse_options - build local/remote addresses from configuration - * @attrs: netlink config data - * @ub: UDP bearer instance - * @local: local bearer IP address/port - * @remote: peer or multicast IP/port + * tipc_parse_udp_addr - build udp media address from netlink data + * @nlattr: netlink attribute containing sockaddr storage aligned address + * @addr: tipc media address to fill with address, port and protocol type + * @scope_id: IPv6 scope id pointer, not NULL indicates it's required */ -static int parse_options(struct nlattr *attrs[], struct udp_bearer *ub, - struct udp_media_addr *local, - struct udp_media_addr *remote) + +static int tipc_parse_udp_addr(struct nlattr *nla, struct udp_media_addr *addr, + u32 *scope_id) { - struct nlattr *opts[TIPC_NLA_UDP_MAX + 1]; - struct sockaddr_storage sa_local, sa_remote; + struct sockaddr_storage sa; - if (!attrs[TIPC_NLA_BEARER_UDP_OPTS]) - goto err; - if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX, - attrs[TIPC_NLA_BEARER_UDP_OPTS], - tipc_nl_udp_policy)) - goto err; - if (opts[TIPC_NLA_UDP_LOCAL] && opts[TIPC_NLA_UDP_REMOTE]) { - nla_memcpy(&sa_local, opts[TIPC_NLA_UDP_LOCAL], - sizeof(sa_local)); - nla_memcpy(&sa_remote, opts[TIPC_NLA_UDP_REMOTE], - sizeof(sa_remote)); - } else { -err: - pr_err("Invalid UDP bearer configuration"); - return -EINVAL; - } - if ((sa_local.ss_family & sa_remote.ss_family) == AF_INET) { - struct sockaddr_in *ip4; + nla_memcpy(&sa, nla, sizeof(sa)); + if (sa.ss_family == AF_INET) { + struct sockaddr_in *ip4 = (struct sockaddr_in *)&sa; - ip4 = (struct sockaddr_in *)&sa_local; - local->proto = htons(ETH_P_IP); - local->port = ip4->sin_port; - local->ipv4.s_addr = ip4->sin_addr.s_addr; - - ip4 = (struct sockaddr_in *)&sa_remote; - remote->proto = htons(ETH_P_IP); - remote->port = ip4->sin_port; - remote->ipv4.s_addr = ip4->sin_addr.s_addr; + addr->proto = htons(ETH_P_IP); + addr->port = ip4->sin_port; + addr->ipv4.s_addr = ip4->sin_addr.s_addr; return 0; #if IS_ENABLED(CONFIG_IPV6) - } else if ((sa_local.ss_family & sa_remote.ss_family) == AF_INET6) { - int atype; - struct sockaddr_in6 *ip6; + } else if (sa.ss_family == AF_INET6) { + struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)&sa; - ip6 = (struct sockaddr_in6 *)&sa_local; - atype = ipv6_addr_type(&ip6->sin6_addr); - if (__ipv6_addr_needs_scope_id(atype) && !ip6->sin6_scope_id) - return -EINVAL; + addr->proto = htons(ETH_P_IPV6); + addr->port = ip6->sin6_port; + memcpy(&addr->ipv6, &ip6->sin6_addr, sizeof(struct in6_addr)); - local->proto = htons(ETH_P_IPV6); - local->port = ip6->sin6_port; - memcpy(&local->ipv6, &ip6->sin6_addr, sizeof(struct in6_addr)); - ub->ifindex = ip6->sin6_scope_id; + /* Scope ID is only interesting for local addresses */ + if (scope_id) { + int atype; + + atype = ipv6_addr_type(&ip6->sin6_addr); + if (__ipv6_addr_needs_scope_id(atype) && + !ip6->sin6_scope_id) { + return -EINVAL; + } + + *scope_id = ip6->sin6_scope_id ? : 0; + } - ip6 = (struct sockaddr_in6 *)&sa_remote; - remote->proto = htons(ETH_P_IPV6); - remote->port = ip6->sin6_port; - memcpy(&remote->ipv6, &ip6->sin6_addr, sizeof(struct in6_addr)); return 0; #endif } @@ -344,14 +323,33 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, struct udp_media_addr local = {0}; struct udp_port_cfg udp_conf = {0}; struct udp_tunnel_sock_cfg tuncfg = {NULL}; + struct nlattr *opts[TIPC_NLA_UDP_MAX + 1]; ub = kzalloc(sizeof(*ub), GFP_ATOMIC); if (!ub) return -ENOMEM; + if (!attrs[TIPC_NLA_BEARER_UDP_OPTS]) + goto err; + + if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX, + attrs[TIPC_NLA_BEARER_UDP_OPTS], + tipc_nl_udp_policy)) + goto err; + + if (!opts[TIPC_NLA_UDP_LOCAL] || !opts[TIPC_NLA_UDP_REMOTE]) { + pr_err("Invalid UDP bearer configuration"); + return -EINVAL; + } + + err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_LOCAL], &local, + &ub->ifindex); + if (err) + goto err; + remote = (struct udp_media_addr *)&b->bcast_addr.value; memset(remote, 0, sizeof(struct udp_media_addr)); - err = parse_options(attrs, ub, &local, remote); + err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_REMOTE], remote, NULL); if (err) goto err; From ce984da36e11c1e6db6002ea4f8a4b54e46f45c0 Mon Sep 17 00:00:00 2001 From: Richard Alpe Date: Fri, 26 Aug 2016 10:52:51 +0200 Subject: [PATCH 0508/1715] tipc: split UDP send function Split the UDP send function into two. One callback that prepares the skb and one transmit function that sends the skb. This will come in handy in later patches, when we introduce UDP replicast. Signed-off-by: Richard Alpe Reviewed-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/udp_media.c | 50 ++++++++++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 18 deletions(-) diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index adb3c2117a1c..7033b4a1a655 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -140,28 +140,13 @@ static int tipc_udp_addr2msg(char *msg, struct tipc_media_addr *a) } /* tipc_send_msg - enqueue a send request */ -static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb, - struct tipc_bearer *b, - struct tipc_media_addr *dest) +static int tipc_udp_xmit(struct net *net, struct sk_buff *skb, + struct udp_bearer *ub, struct udp_media_addr *src, + struct udp_media_addr *dst) { int ttl, err = 0; - struct udp_bearer *ub; - struct udp_media_addr *dst = (struct udp_media_addr *)&dest->value; - struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value; struct rtable *rt; - if (skb_headroom(skb) < UDP_MIN_HEADROOM) { - err = pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC); - if (err) - goto tx_error; - } - - skb_set_inner_protocol(skb, htons(ETH_P_TIPC)); - ub = rcu_dereference_rtnl(b->media_ptr); - if (!ub) { - err = -ENODEV; - goto tx_error; - } if (dst->proto == htons(ETH_P_IP)) { struct flowi4 fl = { .daddr = dst->ipv4.s_addr, @@ -207,6 +192,35 @@ tx_error: return err; } +static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb, + struct tipc_bearer *b, + struct tipc_media_addr *addr) +{ + struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value; + struct udp_media_addr *dst = (struct udp_media_addr *)&addr->value; + struct udp_bearer *ub; + int err = 0; + + if (skb_headroom(skb) < UDP_MIN_HEADROOM) { + err = pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC); + if (err) + goto tx_error; + } + + skb_set_inner_protocol(skb, htons(ETH_P_TIPC)); + ub = rcu_dereference_rtnl(b->media_ptr); + if (!ub) { + err = -ENODEV; + goto tx_error; + } + + return tipc_udp_xmit(net, skb, ub, src, dst); + +tx_error: + kfree_skb(skb); + return err; +} + /* tipc_udp_recv - read data from bearer socket */ static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb) { From 1ca73e3fa12531bbbc854329cd02a887f986a02a Mon Sep 17 00:00:00 2001 From: Richard Alpe Date: Fri, 26 Aug 2016 10:52:52 +0200 Subject: [PATCH 0509/1715] tipc: refactor multicast ip check Add a function to check if a tipc UDP media address is a multicast address or not. This is a purely cosmetic change. Signed-off-by: Richard Alpe Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/udp_media.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 7033b4a1a655..b8ec1a18241e 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -84,6 +84,17 @@ struct udp_bearer { struct work_struct work; }; +static int tipc_udp_is_mcast_addr(struct udp_media_addr *addr) +{ + if (ntohs(addr->proto) == ETH_P_IP) + return ipv4_is_multicast(addr->ipv4.s_addr); +#if IS_ENABLED(CONFIG_IPV6) + else + return ipv6_addr_is_multicast(&addr->ipv6); +#endif + return 0; +} + /* udp_media_addr_set - convert a ip/udp address to a TIPC media address */ static void tipc_udp_media_addr_set(struct tipc_media_addr *addr, struct udp_media_addr *ua) @@ -91,15 +102,9 @@ static void tipc_udp_media_addr_set(struct tipc_media_addr *addr, memset(addr, 0, sizeof(struct tipc_media_addr)); addr->media_id = TIPC_MEDIA_TYPE_UDP; memcpy(addr->value, ua, sizeof(struct udp_media_addr)); - if (ntohs(ua->proto) == ETH_P_IP) { - if (ipv4_is_multicast(ua->ipv4.s_addr)) - addr->broadcast = 1; - } else if (ntohs(ua->proto) == ETH_P_IPV6) { - if (ipv6_addr_type(&ua->ipv6) & IPV6_ADDR_MULTICAST) - addr->broadcast = 1; - } else { - pr_err("Invalid UDP media address\n"); - } + + if (tipc_udp_is_mcast_addr(ua)) + addr->broadcast = 1; } /* tipc_udp_addr2str - convert ip/udp address to string */ @@ -255,15 +260,11 @@ static int enable_mcast(struct udp_bearer *ub, struct udp_media_addr *remote) struct sock *sk = ub->ubsock->sk; if (ntohs(remote->proto) == ETH_P_IP) { - if (!ipv4_is_multicast(remote->ipv4.s_addr)) - return 0; mreqn.imr_multiaddr = remote->ipv4; mreqn.imr_ifindex = ub->ifindex; err = ip_mc_join_group(sk, &mreqn); #if IS_ENABLED(CONFIG_IPV6) } else { - if (!ipv6_addr_is_multicast(&remote->ipv6)) - return 0; err = ipv6_stub->ipv6_sock_mc_join(sk, ub->ifindex, &remote->ipv6); #endif @@ -408,8 +409,11 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, tuncfg.encap_destroy = NULL; setup_udp_tunnel_sock(net, ub->ubsock, &tuncfg); - if (enable_mcast(ub, remote)) - goto err; + if (tipc_udp_is_mcast_addr(remote)) { + if (enable_mcast(ub, remote)) + goto err; + } + return 0; err: kfree(ub); From ef20cd4dd1633987bcf46ac34ace2c8af212361f Mon Sep 17 00:00:00 2001 From: Richard Alpe Date: Fri, 26 Aug 2016 10:52:53 +0200 Subject: [PATCH 0510/1715] tipc: introduce UDP replicast This patch introduces UDP replicast. A concept where we emulate multicast by sending multiple unicast messages to configured peers. The purpose of replicast is mainly to be able to use TIPC in cloud environments where IP multicast is disabled. Using replicas to unicast multicast messages is costly as we have to copy each skb and send the copies individually. Signed-off-by: Richard Alpe Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- include/uapi/linux/tipc_netlink.h | 1 + net/tipc/bearer.c | 44 +++++++++++ net/tipc/bearer.h | 1 + net/tipc/netlink.c | 5 ++ net/tipc/udp_media.c | 118 +++++++++++++++++++++++++++--- net/tipc/udp_media.h | 44 +++++++++++ 6 files changed, 201 insertions(+), 12 deletions(-) create mode 100644 net/tipc/udp_media.h diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h index bcb65ef725f6..b15664c36219 100644 --- a/include/uapi/linux/tipc_netlink.h +++ b/include/uapi/linux/tipc_netlink.h @@ -60,6 +60,7 @@ enum { TIPC_NL_MON_GET, TIPC_NL_MON_PEER_GET, TIPC_NL_PEER_REMOVE, + TIPC_NL_BEARER_ADD, __TIPC_NL_CMD_MAX, TIPC_NL_CMD_MAX = __TIPC_NL_CMD_MAX - 1 diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 28056fa8f77a..d7b442dd6669 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -42,6 +42,7 @@ #include "monitor.h" #include "bcast.h" #include "netlink.h" +#include "udp_media.h" #define MAX_ADDR_STR 60 @@ -897,6 +898,49 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) return 0; } +int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info) +{ + int err; + char *name; + struct tipc_bearer *b; + struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; + struct net *net = sock_net(skb->sk); + + if (!info->attrs[TIPC_NLA_BEARER]) + return -EINVAL; + + err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX, + info->attrs[TIPC_NLA_BEARER], + tipc_nl_bearer_policy); + if (err) + return err; + + if (!attrs[TIPC_NLA_BEARER_NAME]) + return -EINVAL; + name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); + + rtnl_lock(); + b = tipc_bearer_find(net, name); + if (!b) { + rtnl_unlock(); + return -EINVAL; + } + +#ifdef CONFIG_TIPC_MEDIA_UDP + if (attrs[TIPC_NLA_BEARER_UDP_OPTS]) { + err = tipc_udp_nl_bearer_add(b, + attrs[TIPC_NLA_BEARER_UDP_OPTS]); + if (err) { + rtnl_unlock(); + return err; + } + } +#endif + rtnl_unlock(); + + return 0; +} + int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) { int err; diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 83a9abbfe32c..78892e2f53e3 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -181,6 +181,7 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info); int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb); int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info); int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info); int tipc_nl_media_dump(struct sk_buff *skb, struct netlink_callback *cb); int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info); diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 2718de667828..3122f21ca979 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -160,6 +160,11 @@ static const struct genl_ops tipc_genl_v2_ops[] = { .dumpit = tipc_nl_bearer_dump, .policy = tipc_nl_policy, }, + { + .cmd = TIPC_NL_BEARER_ADD, + .doit = tipc_nl_bearer_add, + .policy = tipc_nl_policy, + }, { .cmd = TIPC_NL_BEARER_SET, .doit = tipc_nl_bearer_set, diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index b8ec1a18241e..6b938cc15daf 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -49,6 +49,7 @@ #include "core.h" #include "bearer.h" #include "netlink.h" +#include "msg.h" /* IANA assigned UDP port */ #define UDP_PORT_DEFAULT 6118 @@ -70,6 +71,13 @@ struct udp_media_addr { }; }; +/* struct udp_replicast - container for UDP remote addresses */ +struct udp_replicast { + struct udp_media_addr addr; + struct rcu_head rcu; + struct list_head list; +}; + /** * struct udp_bearer - ip/udp bearer data structure * @bearer: associated generic tipc bearer @@ -82,6 +90,7 @@ struct udp_bearer { struct socket *ubsock; u32 ifindex; struct work_struct work; + struct udp_replicast rcast; }; static int tipc_udp_is_mcast_addr(struct udp_media_addr *addr) @@ -203,29 +212,75 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb, { struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value; struct udp_media_addr *dst = (struct udp_media_addr *)&addr->value; + struct udp_replicast *rcast; struct udp_bearer *ub; int err = 0; if (skb_headroom(skb) < UDP_MIN_HEADROOM) { err = pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC); if (err) - goto tx_error; + goto out; } skb_set_inner_protocol(skb, htons(ETH_P_TIPC)); ub = rcu_dereference_rtnl(b->media_ptr); if (!ub) { err = -ENODEV; - goto tx_error; + goto out; } - return tipc_udp_xmit(net, skb, ub, src, dst); + if (!addr->broadcast || list_empty(&ub->rcast.list)) + return tipc_udp_xmit(net, skb, ub, src, dst); -tx_error: + /* Replicast, send an skb to each configured IP address */ + list_for_each_entry_rcu(rcast, &ub->rcast.list, list) { + struct sk_buff *_skb; + + _skb = pskb_copy(skb, GFP_ATOMIC); + if (!_skb) { + err = -ENOMEM; + goto out; + } + + err = tipc_udp_xmit(net, _skb, ub, src, &rcast->addr); + if (err) { + kfree_skb(_skb); + goto out; + } + } + err = 0; +out: kfree_skb(skb); return err; } +static int tipc_udp_rcast_add(struct tipc_bearer *b, + struct udp_media_addr *addr) +{ + struct udp_replicast *rcast; + struct udp_bearer *ub; + + ub = rcu_dereference_rtnl(b->media_ptr); + if (!ub) + return -ENODEV; + + rcast = kmalloc(sizeof(*rcast), GFP_ATOMIC); + if (!rcast) + return -ENOMEM; + + memcpy(&rcast->addr, addr, sizeof(struct udp_media_addr)); + + if (ntohs(addr->proto) == ETH_P_IP) + pr_info("New replicast peer: %pI4\n", &rcast->addr.ipv4); +#if IS_ENABLED(CONFIG_IPV6) + else if (ntohs(addr->proto) == ETH_P_IPV6) + pr_info("New replicast peer: %pI6\n", &rcast->addr.ipv6); +#endif + + list_add_rcu(&rcast->list, &ub->rcast.list); + return 0; +} + /* tipc_udp_recv - read data from bearer socket */ static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb) { @@ -320,6 +375,32 @@ static int tipc_parse_udp_addr(struct nlattr *nla, struct udp_media_addr *addr, return -EADDRNOTAVAIL; } +int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct nlattr *attr) +{ + int err; + struct udp_media_addr addr = {0}; + struct nlattr *opts[TIPC_NLA_UDP_MAX + 1]; + struct udp_media_addr *dst; + + if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX, attr, tipc_nl_udp_policy)) + return -EINVAL; + + if (!opts[TIPC_NLA_UDP_REMOTE]) + return -EINVAL; + + err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_REMOTE], &addr, NULL); + if (err) + return err; + + dst = (struct udp_media_addr *)&b->bcast_addr.value; + if (tipc_udp_is_mcast_addr(dst)) { + pr_err("Can't add remote ip to TIPC UDP multicast bearer\n"); + return -EINVAL; + } + + return tipc_udp_rcast_add(b, &addr); +} + /** * tipc_udp_enable - callback to create a new udp bearer instance * @net: network namespace @@ -334,7 +415,7 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, { int err = -EINVAL; struct udp_bearer *ub; - struct udp_media_addr *remote; + struct udp_media_addr remote = {0}; struct udp_media_addr local = {0}; struct udp_port_cfg udp_conf = {0}; struct udp_tunnel_sock_cfg tuncfg = {NULL}; @@ -344,6 +425,8 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, if (!ub) return -ENOMEM; + INIT_LIST_HEAD(&ub->rcast.list); + if (!attrs[TIPC_NLA_BEARER_UDP_OPTS]) goto err; @@ -362,9 +445,7 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, if (err) goto err; - remote = (struct udp_media_addr *)&b->bcast_addr.value; - memset(remote, 0, sizeof(struct udp_media_addr)); - err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_REMOTE], remote, NULL); + err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_REMOTE], &remote, NULL); if (err) goto err; @@ -409,10 +490,17 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, tuncfg.encap_destroy = NULL; setup_udp_tunnel_sock(net, ub->ubsock, &tuncfg); - if (tipc_udp_is_mcast_addr(remote)) { - if (enable_mcast(ub, remote)) - goto err; - } + /** + * The bcast media address port is used for all peers and the ip + * is used if it's a multicast address. + */ + memcpy(&b->bcast_addr.value, &remote, sizeof(remote)); + if (tipc_udp_is_mcast_addr(&remote)) + err = enable_mcast(ub, &remote); + else + err = tipc_udp_rcast_add(b, &remote); + if (err) + goto err; return 0; err: @@ -424,6 +512,12 @@ err: static void cleanup_bearer(struct work_struct *work) { struct udp_bearer *ub = container_of(work, struct udp_bearer, work); + struct udp_replicast *rcast, *tmp; + + list_for_each_entry_safe(rcast, tmp, &ub->rcast.list, list) { + list_del_rcu(&rcast->list); + kfree_rcu(rcast, rcu); + } if (ub->ubsock) udp_tunnel_sock_release(ub->ubsock); diff --git a/net/tipc/udp_media.h b/net/tipc/udp_media.h new file mode 100644 index 000000000000..4dcb54880aa6 --- /dev/null +++ b/net/tipc/udp_media.h @@ -0,0 +1,44 @@ +/* + * net/tipc/udp_media.h: Include file for UDP bearer media + * + * Copyright (c) 1996-2006, 2013-2016, Ericsson AB + * Copyright (c) 2005, 2010-2011, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef CONFIG_TIPC_MEDIA_UDP +#ifndef _TIPC_UDP_MEDIA_H +#define _TIPC_UDP_MEDIA_H + +int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct nlattr *attr); + +#endif +#endif From c9b64d492b1fbc732e3a26b284060c949b737bce Mon Sep 17 00:00:00 2001 From: Richard Alpe Date: Fri, 26 Aug 2016 10:52:54 +0200 Subject: [PATCH 0511/1715] tipc: add replicast peer discovery Automatically learn UDP remote IP addresses of communicating peers by looking at the source IP address of incoming TIPC link configuration messages (neighbor discovery). This makes configuration slightly easier and removes the problematic scenario where a node receives directly addressed neighbor discovery messages sent using replicast which the node cannot "reply" to using mutlicast, leaving the link FSM in a limbo state. Signed-off-by: Richard Alpe Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/udp_media.c | 83 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 80 insertions(+), 3 deletions(-) diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 6b938cc15daf..6ece3c9ccf82 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -254,6 +254,26 @@ out: return err; } +static bool tipc_udp_is_known_peer(struct tipc_bearer *b, + struct udp_media_addr *addr) +{ + struct udp_replicast *rcast, *tmp; + struct udp_bearer *ub; + + ub = rcu_dereference_rtnl(b->media_ptr); + if (!ub) { + pr_err_ratelimited("UDP bearer instance not found\n"); + return false; + } + + list_for_each_entry_safe(rcast, tmp, &ub->rcast.list, list) { + if (!memcmp(&rcast->addr, addr, sizeof(struct udp_media_addr))) + return true; + } + + return false; +} + static int tipc_udp_rcast_add(struct tipc_bearer *b, struct udp_media_addr *addr) { @@ -281,29 +301,83 @@ static int tipc_udp_rcast_add(struct tipc_bearer *b, return 0; } +static int tipc_udp_rcast_disc(struct tipc_bearer *b, struct sk_buff *skb) +{ + struct udp_media_addr src = {0}; + struct udp_media_addr *dst; + + dst = (struct udp_media_addr *)&b->bcast_addr.value; + if (tipc_udp_is_mcast_addr(dst)) + return 0; + + src.port = udp_hdr(skb)->source; + + if (ip_hdr(skb)->version == 4) { + struct iphdr *iphdr = ip_hdr(skb); + + src.proto = htons(ETH_P_IP); + src.ipv4.s_addr = iphdr->saddr; + if (ipv4_is_multicast(iphdr->daddr)) + return 0; +#if IS_ENABLED(CONFIG_IPV6) + } else if (ip_hdr(skb)->version == 6) { + struct ipv6hdr *iphdr = ipv6_hdr(skb); + + src.proto = htons(ETH_P_IPV6); + src.ipv6 = iphdr->saddr; + if (ipv6_addr_is_multicast(&iphdr->daddr)) + return 0; +#endif + } else { + return 0; + } + + if (likely(tipc_udp_is_known_peer(b, &src))) + return 0; + + return tipc_udp_rcast_add(b, &src); +} + /* tipc_udp_recv - read data from bearer socket */ static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb) { struct udp_bearer *ub; struct tipc_bearer *b; + struct tipc_msg *hdr; + int err; ub = rcu_dereference_sk_user_data(sk); if (!ub) { pr_err_ratelimited("Failed to get UDP bearer reference"); - kfree_skb(skb); - return 0; + goto out; } - skb_pull(skb, sizeof(struct udphdr)); + hdr = buf_msg(skb); + rcu_read_lock(); b = rcu_dereference_rtnl(ub->bearer); + if (!b) + goto rcu_out; if (b && test_bit(0, &b->up)) { tipc_rcv(sock_net(sk), skb, b); rcu_read_unlock(); return 0; } + + if (unlikely(msg_user(hdr) == LINK_CONFIG)) { + err = tipc_udp_rcast_disc(b, skb); + if (err) + goto rcu_out; + } + + tipc_rcv(sock_net(sk), skb, b); rcu_read_unlock(); + return 0; + +rcu_out: + rcu_read_unlock(); +out: kfree_skb(skb); return 0; } @@ -398,6 +472,9 @@ int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct nlattr *attr) return -EINVAL; } + if (tipc_udp_is_known_peer(b, &addr)) + return 0; + return tipc_udp_rcast_add(b, &addr); } From fdb3accc2c15fabc2b687b2819da9167027c61b6 Mon Sep 17 00:00:00 2001 From: Richard Alpe Date: Fri, 26 Aug 2016 10:52:55 +0200 Subject: [PATCH 0512/1715] tipc: add the ability to get UDP options via netlink Add UDP bearer options to netlink bearer get message. This is used by the tipc user space tool to display UDP options. The UDP bearer information is passed using either a sockaddr_in or sockaddr_in6 structs. This means the user space receiver should intermediately store the retrieved data in a large enough struct (sockaddr_strage) before casting to the proper IP version type. Signed-off-by: Richard Alpe Reviewed-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- include/uapi/linux/tipc_netlink.h | 2 + net/tipc/bearer.c | 8 ++++ net/tipc/udp_media.c | 61 +++++++++++++++++++++++++++++++ net/tipc/udp_media.h | 1 + 4 files changed, 72 insertions(+) diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h index b15664c36219..f9edd20fe9ba 100644 --- a/include/uapi/linux/tipc_netlink.h +++ b/include/uapi/linux/tipc_netlink.h @@ -61,6 +61,7 @@ enum { TIPC_NL_MON_PEER_GET, TIPC_NL_PEER_REMOVE, TIPC_NL_BEARER_ADD, + TIPC_NL_UDP_GET_REMOTEIP, __TIPC_NL_CMD_MAX, TIPC_NL_CMD_MAX = __TIPC_NL_CMD_MAX - 1 @@ -100,6 +101,7 @@ enum { TIPC_NLA_UDP_UNSPEC, TIPC_NLA_UDP_LOCAL, /* sockaddr_storage */ TIPC_NLA_UDP_REMOTE, /* sockaddr_storage */ + TIPC_NLA_UDP_MULTI_REMOTEIP, /* flag */ __TIPC_NLA_UDP_MAX, TIPC_NLA_UDP_MAX = __TIPC_NLA_UDP_MAX - 1 diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index d7b442dd6669..975dbeb60ab0 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -712,6 +712,14 @@ static int __tipc_nl_add_bearer(struct tipc_nl_msg *msg, goto prop_msg_full; nla_nest_end(msg->skb, prop); + +#ifdef CONFIG_TIPC_MEDIA_UDP + if (bearer->media->type_id == TIPC_MEDIA_TYPE_UDP) { + if (tipc_udp_nl_add_bearer_data(msg, bearer)) + goto attr_msg_full; + } +#endif + nla_nest_end(msg->skb, attrs); genlmsg_end(msg->skb, hdr); diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 6ece3c9ccf82..a6cdd9895653 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -401,6 +401,67 @@ static int enable_mcast(struct udp_bearer *ub, struct udp_media_addr *remote) return err; } +static int __tipc_nl_add_udp_addr(struct sk_buff *skb, + struct udp_media_addr *addr, int nla_t) +{ + if (ntohs(addr->proto) == ETH_P_IP) { + struct sockaddr_in ip4; + + ip4.sin_family = AF_INET; + ip4.sin_port = addr->port; + ip4.sin_addr.s_addr = addr->ipv4.s_addr; + if (nla_put(skb, nla_t, sizeof(ip4), &ip4)) + return -EMSGSIZE; + +#if IS_ENABLED(CONFIG_IPV6) + } else if (ntohs(addr->proto) == ETH_P_IPV6) { + struct sockaddr_in6 ip6; + + ip6.sin6_family = AF_INET6; + ip6.sin6_port = addr->port; + memcpy(&ip6.sin6_addr, &addr->ipv6, sizeof(struct in6_addr)); + if (nla_put(skb, nla_t, sizeof(ip6), &ip6)) + return -EMSGSIZE; +#endif + } + + return 0; +} + +int tipc_udp_nl_add_bearer_data(struct tipc_nl_msg *msg, struct tipc_bearer *b) +{ + struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value; + struct udp_media_addr *dst; + struct udp_bearer *ub; + struct nlattr *nest; + + ub = rcu_dereference_rtnl(b->media_ptr); + if (!ub) + return -ENODEV; + + nest = nla_nest_start(msg->skb, TIPC_NLA_BEARER_UDP_OPTS); + if (!nest) + goto msg_full; + + if (__tipc_nl_add_udp_addr(msg->skb, src, TIPC_NLA_UDP_LOCAL)) + goto msg_full; + + dst = (struct udp_media_addr *)&b->bcast_addr.value; + if (__tipc_nl_add_udp_addr(msg->skb, dst, TIPC_NLA_UDP_REMOTE)) + goto msg_full; + + if (!list_empty(&ub->rcast.list)) { + if (nla_put_flag(msg->skb, TIPC_NLA_UDP_MULTI_REMOTEIP)) + goto msg_full; + } + + nla_nest_end(msg->skb, nest); + return 0; +msg_full: + nla_nest_cancel(msg->skb, nest); + return -EMSGSIZE; +} + /** * tipc_parse_udp_addr - build udp media address from netlink data * @nlattr: netlink attribute containing sockaddr storage aligned address diff --git a/net/tipc/udp_media.h b/net/tipc/udp_media.h index 4dcb54880aa6..c06326a134db 100644 --- a/net/tipc/udp_media.h +++ b/net/tipc/udp_media.h @@ -39,6 +39,7 @@ #define _TIPC_UDP_MEDIA_H int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct nlattr *attr); +int tipc_udp_nl_add_bearer_data(struct tipc_nl_msg *msg, struct tipc_bearer *b); #endif #endif From 832629ca5c313e122b22b8e73a6d80f111b1a1ae Mon Sep 17 00:00:00 2001 From: Richard Alpe Date: Fri, 26 Aug 2016 10:52:56 +0200 Subject: [PATCH 0513/1715] tipc: add UDP remoteip dump to netlink API When using replicast a UDP bearer can have an arbitrary amount of remote ip addresses associated with it. This means we cannot simply add all remote ip addresses to an existing bearer data message as it might fill the message, leaving us with a truncated message that we can't safely resume. To handle this we introduce the new netlink command TIPC_NL_UDP_GET_REMOTEIP. This command is intended to be called when the bearer data message has the TIPC_NLA_UDP_MULTI_REMOTEIP flag set, indicating there are more than one remote ip (replicast). Signed-off-by: Richard Alpe Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/netlink.c | 10 ++++- net/tipc/udp_media.c | 90 ++++++++++++++++++++++++++++++++++++++++++++ net/tipc/udp_media.h | 1 + 3 files changed, 100 insertions(+), 1 deletion(-) diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 3122f21ca979..3200059d14b2 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -41,6 +41,7 @@ #include "link.h" #include "node.h" #include "net.h" +#include "udp_media.h" #include static const struct nla_policy tipc_nl_policy[TIPC_NLA_MAX + 1] = { @@ -247,7 +248,14 @@ static const struct genl_ops tipc_genl_v2_ops[] = { .cmd = TIPC_NL_PEER_REMOVE, .doit = tipc_nl_peer_rm, .policy = tipc_nl_policy, - } + }, +#ifdef CONFIG_TIPC_MEDIA_UDP + { + .cmd = TIPC_NL_UDP_GET_REMOTEIP, + .dumpit = tipc_udp_nl_dump_remoteip, + .policy = tipc_nl_policy, + }, +#endif }; int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***attr) diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index a6cdd9895653..245e9a2eac41 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -428,6 +428,96 @@ static int __tipc_nl_add_udp_addr(struct sk_buff *skb, return 0; } +int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb) +{ + u32 bid = cb->args[0]; + u32 skip_cnt = cb->args[1]; + u32 portid = NETLINK_CB(cb->skb).portid; + struct udp_replicast *rcast, *tmp; + struct tipc_bearer *b; + struct udp_bearer *ub; + void *hdr; + int err; + int i; + + if (!bid && !skip_cnt) { + struct net *net = sock_net(skb->sk); + struct nlattr *battrs[TIPC_NLA_BEARER_MAX + 1]; + struct nlattr **attrs; + char *bname; + + err = tipc_nlmsg_parse(cb->nlh, &attrs); + if (err) + return err; + + if (!attrs[TIPC_NLA_BEARER]) + return -EINVAL; + + err = nla_parse_nested(battrs, TIPC_NLA_BEARER_MAX, + attrs[TIPC_NLA_BEARER], + tipc_nl_bearer_policy); + if (err) + return err; + + if (!battrs[TIPC_NLA_BEARER_NAME]) + return -EINVAL; + + bname = nla_data(battrs[TIPC_NLA_BEARER_NAME]); + + rtnl_lock(); + b = tipc_bearer_find(net, bname); + if (!b) { + rtnl_unlock(); + return -EINVAL; + } + bid = b->identity; + } else { + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); + + rtnl_lock(); + b = rtnl_dereference(tn->bearer_list[bid]); + if (!b) { + rtnl_unlock(); + return -EINVAL; + } + } + + ub = rcu_dereference_rtnl(b->media_ptr); + if (!ub) { + rtnl_unlock(); + return -EINVAL; + } + + i = 0; + list_for_each_entry_safe(rcast, tmp, &ub->rcast.list, list) { + if (i < skip_cnt) + goto count; + + hdr = genlmsg_put(skb, portid, cb->nlh->nlmsg_seq, + &tipc_genl_family, NLM_F_MULTI, + TIPC_NL_BEARER_GET); + if (!hdr) + goto done; + + err = __tipc_nl_add_udp_addr(skb, &rcast->addr, + TIPC_NLA_UDP_REMOTE); + if (err) { + genlmsg_cancel(skb, hdr); + goto done; + } + genlmsg_end(skb, hdr); +count: + i++; + } +done: + rtnl_unlock(); + cb->args[0] = bid; + cb->args[1] = i; + + return skb->len; +} + int tipc_udp_nl_add_bearer_data(struct tipc_nl_msg *msg, struct tipc_bearer *b) { struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value; diff --git a/net/tipc/udp_media.h b/net/tipc/udp_media.h index c06326a134db..281bbae87726 100644 --- a/net/tipc/udp_media.h +++ b/net/tipc/udp_media.h @@ -40,6 +40,7 @@ int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct nlattr *attr); int tipc_udp_nl_add_bearer_data(struct tipc_nl_msg *msg, struct tipc_bearer *b); +int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb); #endif #endif From 69b365c360243e18ec7c2577c14ff057c091cd77 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Fri, 26 Aug 2016 15:12:41 +0100 Subject: [PATCH 0514/1715] sfc: include size-binned TX stats on sfn8542q TX size bins were not supported on the 7000's 40G MAC, but the 8000 series does support them and the MCPU advertises that via a new capability bit. Signed-off-by: Edward Cree Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef10.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 2a70b1a39d80..d039f29cd8d7 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -1533,9 +1533,10 @@ static const struct efx_hw_stat_desc efx_ef10_stat_desc[EF10_STAT_COUNT] = { (1ULL << GENERIC_STAT_rx_nodesc_trunc) | \ (1ULL << GENERIC_STAT_rx_noskb_drops)) -/* These statistics are only provided by the 10G MAC. For a 10G/40G - * switchable port we do not expose these because they might not - * include all the packets they should. +/* On 7000 series NICs, these statistics are only provided by the 10G MAC. + * For a 10G/40G switchable port we do not expose these because they might + * not include all the packets they should. + * On 8000 series NICs these statistics are always provided. */ #define HUNT_10G_ONLY_STAT_MASK ((1ULL << EF10_STAT_port_tx_control) | \ (1ULL << EF10_STAT_port_tx_lt64) | \ @@ -1581,10 +1582,15 @@ static u64 efx_ef10_raw_stat_mask(struct efx_nic *efx) 1 << MC_CMD_DRV_ATTACH_EXT_OUT_FLAG_LINKCTRL)) return 0; - if (port_caps & (1 << MC_CMD_PHY_CAP_40000FDX_LBN)) + if (port_caps & (1 << MC_CMD_PHY_CAP_40000FDX_LBN)) { raw_mask |= HUNT_40G_EXTRA_STAT_MASK; - else + /* 8000 series have everything even at 40G */ + if (nic_data->datapath_caps2 & + (1 << MC_CMD_GET_CAPABILITIES_V2_OUT_MAC_STATS_40G_TX_SIZE_BINS_LBN)) + raw_mask |= HUNT_10G_ONLY_STAT_MASK; + } else { raw_mask |= HUNT_10G_ONLY_STAT_MASK; + } if (nic_data->datapath_caps & (1 << MC_CMD_GET_CAPABILITIES_OUT_PM_AND_RXDP_COUNTERS_LBN)) From df0562c386c778c802b981fc4015a636aa5db866 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Fri, 26 Aug 2016 15:12:57 +0100 Subject: [PATCH 0515/1715] sfc: remove duplicate assignment nic_data was already initialised to the right thing, no need to assign it again. Signed-off-by: Edward Cree Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef10.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index d039f29cd8d7..c4cd6b08acd1 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -1715,7 +1715,6 @@ static int efx_ef10_try_update_nic_stats_pf(struct efx_nic *efx) efx_ef10_get_stat_mask(efx, mask); dma_stats = efx->stats_buffer.addr; - nic_data = efx->nic_data; generation_end = dma_stats[MC_CMD_MAC_GENERATION_END]; if (generation_end == EFX_MC_STATS_GENERATION_INVALID) From 942e298eba850a866a673fa159774313650ce5cc Mon Sep 17 00:00:00 2001 From: Jon Cooper Date: Fri, 26 Aug 2016 15:13:30 +0100 Subject: [PATCH 0516/1715] sfc: work around TRIGGER_INTERRUPT command not working on SFC9140 MC_CMD_TRIGGER_INTERRUPT does not work on the SFC9140, as used in the sfn7x42q and sfn7x24f. Check for this using the MCDI workaround mechanism. The command is only used during self test. If it's not supported, skip the interrupt test. Signed-off-by: Edward Cree Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef10.c | 8 ++++++-- drivers/net/ethernet/sfc/farch.c | 3 ++- drivers/net/ethernet/sfc/net_driver.h | 2 +- drivers/net/ethernet/sfc/nic.c | 4 ++-- drivers/net/ethernet/sfc/nic.h | 4 ++-- drivers/net/ethernet/sfc/selftest.c | 10 +++++++++- drivers/net/ethernet/sfc/selftest.h | 2 +- 7 files changed, 23 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index c4cd6b08acd1..6fe1d0ba78b2 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -2048,14 +2048,18 @@ static irqreturn_t efx_ef10_legacy_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -static void efx_ef10_irq_test_generate(struct efx_nic *efx) +static int efx_ef10_irq_test_generate(struct efx_nic *efx) { MCDI_DECLARE_BUF(inbuf, MC_CMD_TRIGGER_INTERRUPT_IN_LEN); + if (efx_mcdi_set_workaround(efx, MC_CMD_WORKAROUND_BUG41750, true, + NULL) == 0) + return -ENOTSUPP; + BUILD_BUG_ON(MC_CMD_TRIGGER_INTERRUPT_OUT_LEN != 0); MCDI_SET_DWORD(inbuf, TRIGGER_INTERRUPT_IN_INTR_LEVEL, efx->irq_level); - (void) efx_mcdi_rpc(efx, MC_CMD_TRIGGER_INTERRUPT, + return efx_mcdi_rpc(efx, MC_CMD_TRIGGER_INTERRUPT, inbuf, sizeof(inbuf), NULL, 0, NULL); } diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c index 4c83739d158f..4762ec444cb8 100644 --- a/drivers/net/ethernet/sfc/farch.c +++ b/drivers/net/ethernet/sfc/farch.c @@ -1477,9 +1477,10 @@ void efx_farch_irq_disable_master(struct efx_nic *efx) * Interrupt must already have been enabled, otherwise nasty things * may happen. */ -void efx_farch_irq_test_generate(struct efx_nic *efx) +int efx_farch_irq_test_generate(struct efx_nic *efx) { efx_farch_interrupts(efx, true, true); + return 0; } /* Process a fatal interrupt diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 13b7f52e6724..0a2504b5dad5 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -1275,7 +1275,7 @@ struct efx_nic_type { int (*mcdi_poll_reboot)(struct efx_nic *efx); void (*mcdi_reboot_detected)(struct efx_nic *efx); void (*irq_enable_master)(struct efx_nic *efx); - void (*irq_test_generate)(struct efx_nic *efx); + int (*irq_test_generate)(struct efx_nic *efx); void (*irq_disable_non_ev)(struct efx_nic *efx); irqreturn_t (*irq_handle_msi)(int irq, void *dev_id); irqreturn_t (*irq_handle_legacy)(int irq, void *dev_id); diff --git a/drivers/net/ethernet/sfc/nic.c b/drivers/net/ethernet/sfc/nic.c index 89b83e59e1dc..aa1945a858d5 100644 --- a/drivers/net/ethernet/sfc/nic.c +++ b/drivers/net/ethernet/sfc/nic.c @@ -66,11 +66,11 @@ void efx_nic_event_test_start(struct efx_channel *channel) channel->efx->type->ev_test_generate(channel); } -void efx_nic_irq_test_start(struct efx_nic *efx) +int efx_nic_irq_test_start(struct efx_nic *efx) { efx->last_irq_cpu = -1; smp_wmb(); - efx->type->irq_test_generate(efx); + return efx->type->irq_test_generate(efx); } /* Hook interrupt handler(s) diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h index d8b1694638cd..73bee7ea332a 100644 --- a/drivers/net/ethernet/sfc/nic.h +++ b/drivers/net/ethernet/sfc/nic.h @@ -746,12 +746,12 @@ static inline void efx_update_diff_stat(u64 *stat, u64 diff) /* Interrupts */ int efx_nic_init_interrupt(struct efx_nic *efx); -void efx_nic_irq_test_start(struct efx_nic *efx); +int efx_nic_irq_test_start(struct efx_nic *efx); void efx_nic_fini_interrupt(struct efx_nic *efx); /* Falcon/Siena interrupts */ void efx_farch_irq_enable_master(struct efx_nic *efx); -void efx_farch_irq_test_generate(struct efx_nic *efx); +int efx_farch_irq_test_generate(struct efx_nic *efx); void efx_farch_irq_disable_master(struct efx_nic *efx); irqreturn_t efx_farch_msi_interrupt(int irq, void *dev_id); irqreturn_t efx_farch_legacy_interrupt(int irq, void *dev_id); diff --git a/drivers/net/ethernet/sfc/selftest.c b/drivers/net/ethernet/sfc/selftest.c index 9d78830da609..cd38b44ae23a 100644 --- a/drivers/net/ethernet/sfc/selftest.c +++ b/drivers/net/ethernet/sfc/selftest.c @@ -135,11 +135,19 @@ static int efx_test_interrupts(struct efx_nic *efx, { unsigned long timeout, wait; int cpu; + int rc; netif_dbg(efx, drv, efx->net_dev, "testing interrupts\n"); tests->interrupt = -1; - efx_nic_irq_test_start(efx); + rc = efx_nic_irq_test_start(efx); + if (rc == -ENOTSUPP) { + netif_dbg(efx, drv, efx->net_dev, + "direct interrupt testing not supported\n"); + tests->interrupt = 0; + return 0; + } + timeout = jiffies + IRQ_TIMEOUT; wait = 1; diff --git a/drivers/net/ethernet/sfc/selftest.h b/drivers/net/ethernet/sfc/selftest.h index 009dbe88f3be..32a427253a03 100644 --- a/drivers/net/ethernet/sfc/selftest.h +++ b/drivers/net/ethernet/sfc/selftest.h @@ -28,7 +28,7 @@ struct efx_loopback_self_tests { /* Efx self test results * For fields which are not counters, 1 indicates success and -1 - * indicates failure. + * indicates failure; 0 indicates test could not be run. */ struct efx_self_tests { /* online tests */ From 73b86bb7033edfae2a25321116427d8cf386bc51 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 26 Aug 2016 14:21:08 +0000 Subject: [PATCH 0517/1715] chcr: Fix non static symbol warning Fixes the following sparse warning: drivers/crypto/chelsio/chcr_algo.c:593:5: warning: symbol 'cxgb4_is_crypto_q_full' was not declared. Should it be static? Signed-off-by: Wei Yongjun Acked-by: Herbert Xu Signed-off-by: David S. Miller --- drivers/crypto/chelsio/chcr_algo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c index ad8e353cf897..e4ddb921d7b3 100644 --- a/drivers/crypto/chelsio/chcr_algo.c +++ b/drivers/crypto/chelsio/chcr_algo.c @@ -590,7 +590,7 @@ badkey_err: return -EINVAL; } -int cxgb4_is_crypto_q_full(struct net_device *dev, unsigned int idx) +static int cxgb4_is_crypto_q_full(struct net_device *dev, unsigned int idx) { int ret = 0; struct sge_ofld_txq *q; From 2a7a3c564466e9d58217200b36c026a52d99aaff Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Fri, 26 Aug 2016 22:21:47 +0800 Subject: [PATCH 0518/1715] vxlan: remove the useless header file protocol.h This header file is not used in vxlan.c file. Signed-off-by: Zhu Yanjun Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index c0dda6fc0921..3f7e0d2dd21a 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -27,7 +27,6 @@ #include #include #include -#include #if IS_ENABLED(CONFIG_IPV6) #include From e19ac1578fd8eb3f7c93f2be2657deb2ccefffd7 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 26 Aug 2016 14:35:43 +0000 Subject: [PATCH 0519/1715] net: ethernet: ti: cpsw: fix error return code in cpsw_set_channels() Fix to return a negative error code from the cpsw_fill_rx_channels() error handling case instead of 0, as done elsewhere in this function. Fixes: ce52c744574b ("net: ethernet: ti: cpsw: add ethtool channels support") Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 4273e7f9b4aa..c6cff3d2ff05 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -2217,7 +2217,8 @@ static int cpsw_set_channels(struct net_device *ndev, } if (cpsw_common_res_usage_state(cpsw)) { - if (cpsw_fill_rx_channels(priv)) + ret = cpsw_fill_rx_channels(priv); + if (ret) goto err; /* After this receive is started */ From 0294b625ad5a6d1fb50632d67cf384862d8a4a46 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Sun, 28 Aug 2016 14:43:17 -0700 Subject: [PATCH 0520/1715] net: Add read_sock proto_op Add new function in proto_ops structure. This includes moving the typedef got sk_read_actor into net.h and removing the definition from tcp.h. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/net.h | 6 ++++++ include/net/tcp.h | 2 -- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/include/linux/net.h b/include/linux/net.h index b9f0ff4d489c..cd0c8bd0a1de 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -25,6 +25,7 @@ #include #include #include +#include #include @@ -128,6 +129,9 @@ struct page; struct sockaddr; struct msghdr; struct module; +struct sk_buff; +typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *, + unsigned int, size_t); struct proto_ops { int family; @@ -186,6 +190,8 @@ struct proto_ops { struct pipe_inode_info *pipe, size_t len, unsigned int flags); int (*set_peek_off)(struct sock *sk, int val); int (*peek_len)(struct socket *sock); + int (*read_sock)(struct sock *sk, read_descriptor_t *desc, + sk_read_actor_t recv_actor); }; #define DECLARE_SOCKADDR(type, dst, src) \ diff --git a/include/net/tcp.h b/include/net/tcp.h index 25d64f6de69e..d56666ad9249 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -603,8 +603,6 @@ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize) void tcp_get_info(struct sock *, struct tcp_info *); /* Read 'sendfile()'-style from a TCP socket */ -typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *, - unsigned int, size_t); int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, sk_read_actor_t recv_actor); From 3203558589a597e0a10a66b258fbc5a4a6659ed0 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Sun, 28 Aug 2016 14:43:18 -0700 Subject: [PATCH 0521/1715] tcp: Set read_sock and peek_len proto_ops In inet_stream_ops we set read_sock to tcp_read_sock and peek_len to tcp_peek_len (which is just a stub function that calls tcp_inq). Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/tcp.h | 2 ++ net/ipv4/af_inet.c | 2 ++ net/ipv4/tcp.c | 6 ++++++ net/ipv6/af_inet6.c | 2 ++ 4 files changed, 12 insertions(+) diff --git a/include/net/tcp.h b/include/net/tcp.h index d56666ad9249..a5af6be3a572 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1848,6 +1848,8 @@ static inline int tcp_inq(struct sock *sk) return answ; } +int tcp_peek_len(struct socket *sock); + static inline void tcp_segs_in(struct tcp_sock *tp, const struct sk_buff *skb) { u16 segs_in; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 989a362814a9..e94b47be0019 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -916,6 +916,8 @@ const struct proto_ops inet_stream_ops = { .mmap = sock_no_mmap, .sendpage = inet_sendpage, .splice_read = tcp_splice_read, + .read_sock = tcp_read_sock, + .peek_len = tcp_peek_len, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f1a9a0a8a1f3..60a438864f32 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1570,6 +1570,12 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, } EXPORT_SYMBOL(tcp_read_sock); +int tcp_peek_len(struct socket *sock) +{ + return tcp_inq(sock->sk); +} +EXPORT_SYMBOL(tcp_peek_len); + /* * This routine copies from a sock struct into the user buffer. * diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index b454055ba625..46ad699937fd 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -545,6 +545,8 @@ const struct proto_ops inet6_stream_ops = { .mmap = sock_no_mmap, .sendpage = inet_sendpage, .splice_read = tcp_splice_read, + .read_sock = tcp_read_sock, + .peek_len = tcp_peek_len, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, From 96a59083478d1ea66684c59c073424a9d4e6ac6d Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Sun, 28 Aug 2016 14:43:19 -0700 Subject: [PATCH 0522/1715] kcm: Remove TCP specific references from kcm and strparser kcm and strparser need to work with any type of stream socket not just TCP. Eliminate references to TCP and call generic proto_ops functions of read_sock and peek_len. Also in strp_init check if the socket support the proto_ops read_sock and peek_len. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/strparser.h | 2 +- net/kcm/kcmsock.c | 30 +++++++++++------------- net/strparser/strparser.c | 48 +++++++++++++++++++++++---------------- 3 files changed, 43 insertions(+), 37 deletions(-) diff --git a/include/net/strparser.h b/include/net/strparser.h index 91fa0b958426..0c28ad97c52f 100644 --- a/include/net/strparser.h +++ b/include/net/strparser.h @@ -137,6 +137,6 @@ void strp_stop(struct strparser *strp); void strp_check_rcv(struct strparser *strp); int strp_init(struct strparser *strp, struct sock *csk, struct strp_callbacks *cb); -void strp_tcp_data_ready(struct strparser *strp); +void strp_data_ready(struct strparser *strp); #endif /* __NET_STRPARSER_H_ */ diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index eb731cacc325..2632ac748371 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -26,7 +26,6 @@ #include #include #include -#include #include unsigned int kcm_net_id; @@ -340,7 +339,7 @@ static void unreserve_rx_kcm(struct kcm_psock *psock, } /* Lower sock lock held */ -static void psock_tcp_data_ready(struct sock *sk) +static void psock_data_ready(struct sock *sk) { struct kcm_psock *psock; @@ -348,7 +347,7 @@ static void psock_tcp_data_ready(struct sock *sk) psock = (struct kcm_psock *)sk->sk_user_data; if (likely(psock)) - strp_tcp_data_ready(&psock->strp); + strp_data_ready(&psock->strp); read_unlock_bh(&sk->sk_callback_lock); } @@ -392,7 +391,7 @@ static int kcm_read_sock_done(struct strparser *strp, int err) return err; } -static void psock_tcp_state_change(struct sock *sk) +static void psock_state_change(struct sock *sk) { /* TCP only does a POLLIN for a half close. Do a POLLHUP here * since application will normally not poll with POLLIN @@ -402,7 +401,7 @@ static void psock_tcp_state_change(struct sock *sk) report_csk_error(sk, EPIPE); } -static void psock_tcp_write_space(struct sock *sk) +static void psock_write_space(struct sock *sk) { struct kcm_psock *psock; struct kcm_mux *mux; @@ -1383,19 +1382,12 @@ static int kcm_attach(struct socket *sock, struct socket *csock, struct list_head *head; int index = 0; struct strp_callbacks cb; - - if (csock->ops->family != PF_INET && - csock->ops->family != PF_INET6) - return -EINVAL; + int err; csk = csock->sk; if (!csk) return -EINVAL; - /* Only support TCP for now */ - if (csk->sk_protocol != IPPROTO_TCP) - return -EINVAL; - psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL); if (!psock) return -ENOMEM; @@ -1409,7 +1401,11 @@ static int kcm_attach(struct socket *sock, struct socket *csock, cb.parse_msg = kcm_parse_func_strparser; cb.read_sock_done = kcm_read_sock_done; - strp_init(&psock->strp, csk, &cb); + err = strp_init(&psock->strp, csk, &cb); + if (err) { + kmem_cache_free(kcm_psockp, psock); + return err; + } sock_hold(csk); @@ -1418,9 +1414,9 @@ static int kcm_attach(struct socket *sock, struct socket *csock, psock->save_write_space = csk->sk_write_space; psock->save_state_change = csk->sk_state_change; csk->sk_user_data = psock; - csk->sk_data_ready = psock_tcp_data_ready; - csk->sk_write_space = psock_tcp_write_space; - csk->sk_state_change = psock_tcp_state_change; + csk->sk_data_ready = psock_data_ready; + csk->sk_write_space = psock_write_space; + csk->sk_state_change = psock_state_change; write_unlock_bh(&csk->sk_callback_lock); /* Finished initialization, now add the psock to the MUX. */ diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index 4ecfc10cbe6d..5c7549b5b92c 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -26,7 +26,6 @@ #include #include #include -#include static struct workqueue_struct *strp_wq; @@ -80,9 +79,16 @@ static void strp_parser_err(struct strparser *strp, int err, strp->cb.abort_parser(strp, err); } +static inline int strp_peek_len(struct strparser *strp) +{ + struct socket *sock = strp->sk->sk_socket; + + return sock->ops->peek_len(sock); +} + /* Lower socket lock held */ -static int strp_tcp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, - unsigned int orig_offset, size_t orig_len) +static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, + unsigned int orig_offset, size_t orig_len) { struct strparser *strp = (struct strparser *)desc->arg.data; struct _strp_rx_msg *rxm; @@ -266,12 +272,12 @@ static int strp_tcp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, if (extra < 0) { /* Message not complete yet. */ if (rxm->strp.full_len - rxm->accum_len > - tcp_inq(strp->sk)) { + strp_peek_len(strp)) { /* Don't have the whole messages in the socket * buffer. Set strp->rx_need_bytes to wait for * the rest of the message. Also, set "early * eaten" since we've already buffered the skb - * but don't consume yet per tcp_read_sock. + * but don't consume yet per strp_read_sock. */ if (!rxm->accum_len) { @@ -329,16 +335,17 @@ static int default_read_sock_done(struct strparser *strp, int err) } /* Called with lock held on lower socket */ -static int strp_tcp_read_sock(struct strparser *strp) +static int strp_read_sock(struct strparser *strp) { + struct socket *sock = strp->sk->sk_socket; read_descriptor_t desc; desc.arg.data = strp; desc.error = 0; desc.count = 1; /* give more than one skb per call */ - /* sk should be locked here, so okay to do tcp_read_sock */ - tcp_read_sock(strp->sk, &desc, strp_tcp_recv); + /* sk should be locked here, so okay to do read_sock */ + sock->ops->read_sock(strp->sk, &desc, strp_recv); desc.error = strp->cb.read_sock_done(strp, desc.error); @@ -346,10 +353,8 @@ static int strp_tcp_read_sock(struct strparser *strp) } /* Lower sock lock held */ -void strp_tcp_data_ready(struct strparser *strp) +void strp_data_ready(struct strparser *strp) { - struct sock *csk = strp->sk; - if (unlikely(strp->rx_stopped)) return; @@ -360,7 +365,7 @@ void strp_tcp_data_ready(struct strparser *strp) * allows a thread in BH context to safely check if the process * lock is held. In this case, if the lock is held, queue work. */ - if (sock_owned_by_user(csk)) { + if (sock_owned_by_user(strp->sk)) { queue_work(strp_wq, &strp->rx_work); return; } @@ -369,24 +374,24 @@ void strp_tcp_data_ready(struct strparser *strp) return; if (strp->rx_need_bytes) { - if (tcp_inq(csk) >= strp->rx_need_bytes) + if (strp_peek_len(strp) >= strp->rx_need_bytes) strp->rx_need_bytes = 0; else return; } - if (strp_tcp_read_sock(strp) == -ENOMEM) + if (strp_read_sock(strp) == -ENOMEM) queue_work(strp_wq, &strp->rx_work); } -EXPORT_SYMBOL_GPL(strp_tcp_data_ready); +EXPORT_SYMBOL_GPL(strp_data_ready); static void do_strp_rx_work(struct strparser *strp) { read_descriptor_t rd_desc; struct sock *csk = strp->sk; - /* We need the read lock to synchronize with strp_tcp_data_ready. We - * need the socket lock for calling tcp_read_sock. + /* We need the read lock to synchronize with strp_data_ready. We + * need the socket lock for calling strp_read_sock. */ lock_sock(csk); @@ -398,7 +403,7 @@ static void do_strp_rx_work(struct strparser *strp) rd_desc.arg.data = strp; - if (strp_tcp_read_sock(strp) == -ENOMEM) + if (strp_read_sock(strp) == -ENOMEM) queue_work(strp_wq, &strp->rx_work); out: @@ -424,9 +429,14 @@ static void strp_rx_msg_timeout(unsigned long arg) int strp_init(struct strparser *strp, struct sock *csk, struct strp_callbacks *cb) { + struct socket *sock = csk->sk_socket; + if (!cb || !cb->rcv_msg || !cb->parse_msg) return -EINVAL; + if (!sock->ops->read_sock || !sock->ops->peek_len) + return -EAFNOSUPPORT; + memset(strp, 0, sizeof(*strp)); strp->sk = csk; @@ -456,7 +466,7 @@ void strp_unpause(struct strparser *strp) } EXPORT_SYMBOL_GPL(strp_unpause); -/* strp must already be stopped so that strp_tcp_recv will no longer be called. +/* strp must already be stopped so that strp_recv will no longer be called. * Note that strp_done is not called with the lower socket held. */ void strp_done(struct strparser *strp) From 24a24d07d688a4625ed7e07d0a9f29127f5c7708 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 28 Aug 2016 11:40:41 +0100 Subject: [PATCH 0523/1715] wan/fsl_ucc_hdlc: fix spelling mistake "prameter" -> "parameter" Trivial fix to spelling mistake in dev_err message. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/wan/fsl_ucc_hdlc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c index 6f044450b702..5fbf83d5aa57 100644 --- a/drivers/net/wan/fsl_ucc_hdlc.c +++ b/drivers/net/wan/fsl_ucc_hdlc.c @@ -162,7 +162,7 @@ static int uhdlc_init(struct ucc_hdlc_private *priv) ALIGNMENT_OF_UCC_HDLC_PRAM); if (priv->ucc_pram_offset < 0) { - dev_err(priv->dev, "Can not allocate MURAM for hdlc prameter.\n"); + dev_err(priv->dev, "Can not allocate MURAM for hdlc parameter.\n"); ret = -ENOMEM; goto free_tx_bd; } From b9780a810b47153ee134f84a9ce6bdfef99fe1bd Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 28 Aug 2016 12:03:27 +0100 Subject: [PATCH 0524/1715] net: ucc_geth: fix spelling mistake "propperty" -> "property" Trivial fix to spelling mistake in dev_warn message. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/ucc_geth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index 5bf1ade28315..186ef8f16c80 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -3756,7 +3756,7 @@ static int ucc_geth_probe(struct platform_device* ofdev) return -EINVAL; } if ((*prop < QE_CLK_NONE) || (*prop > QE_CLK24)) { - pr_err("invalid rx-clock propperty\n"); + pr_err("invalid rx-clock property\n"); return -EINVAL; } ug_info->uf_info.rx_clock = *prop; From 1a8ff8f52faebe28d145324884357fcdf33322c0 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 28 Aug 2016 12:07:02 +0100 Subject: [PATCH 0525/1715] cxgb4/cxgb4vf: fix spelling mistake "provissioned" -> "provisioned" Trivial fix to spelling mistake in dev_warn message. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c index f2951bf68992..100b2cc064a3 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c @@ -2378,7 +2378,7 @@ static void size_nports_qsets(struct adapter *adapter) */ pmask_nports = hweight32(adapter->params.vfres.pmask); if (pmask_nports < adapter->params.nports) { - dev_warn(adapter->pdev_dev, "only using %d of %d provissioned" + dev_warn(adapter->pdev_dev, "only using %d of %d provisioned" " virtual interfaces; limited by Port Access Rights" " mask %#x\n", pmask_nports, adapter->params.nports, adapter->params.vfres.pmask); From c9c3321257e1b95be9b375f811fb250162af8d39 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 27 Aug 2016 07:37:54 -0700 Subject: [PATCH 0526/1715] tcp: add tcp_add_backlog() When TCP operates in lossy environments (between 1 and 10 % packet losses), many SACK blocks can be exchanged, and I noticed we could drop them on busy senders, if these SACK blocks have to be queued into the socket backlog. While the main cause is the poor performance of RACK/SACK processing, we can try to avoid these drops of valuable information that can lead to spurious timeouts and retransmits. Cause of the drops is the skb->truesize overestimation caused by : - drivers allocating ~2048 (or more) bytes as a fragment to hold an Ethernet frame. - various pskb_may_pull() calls bringing the headers into skb->head might have pulled all the frame content, but skb->truesize could not be lowered, as the stack has no idea of each fragment truesize. The backlog drops are also more visible on bidirectional flows, since their sk_rmem_alloc can be quite big. Let's add some room for the backlog, as only the socket owner can selectively take action to lower memory needs, like collapsing receive queues or partial ofo pruning. Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Cc: Neal Cardwell Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- include/net/tcp.h | 1 + net/ipv4/tcp_ipv4.c | 33 +++++++++++++++++++++++++++++---- net/ipv6/tcp_ipv6.c | 5 +---- 3 files changed, 31 insertions(+), 8 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index a5af6be3a572..dd99679e2e51 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1161,6 +1161,7 @@ static inline void tcp_prequeue_init(struct tcp_sock *tp) } bool tcp_prequeue(struct sock *sk, struct sk_buff *skb); +bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb); #undef STATE_TRACE diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ad41e8ecf796..53e80cd004b6 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1532,6 +1532,34 @@ bool tcp_prequeue(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(tcp_prequeue); +bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) +{ + u32 limit = sk->sk_rcvbuf + sk->sk_sndbuf; + + /* Only socket owner can try to collapse/prune rx queues + * to reduce memory overhead, so add a little headroom here. + * Few sockets backlog are possibly concurrently non empty. + */ + limit += 64*1024; + + /* In case all data was pulled from skb frags (in __pskb_pull_tail()), + * we can fix skb->truesize to its real value to avoid future drops. + * This is valid because skb is not yet charged to the socket. + * It has been noticed pure SACK packets were sometimes dropped + * (if cooked by drivers without copybreak feature). + */ + if (!skb->data_len) + skb->truesize = SKB_TRUESIZE(skb_end_offset(skb)); + + if (unlikely(sk_add_backlog(sk, skb, limit))) { + bh_unlock_sock(sk); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP); + return true; + } + return false; +} +EXPORT_SYMBOL(tcp_add_backlog); + /* * From tcp_input.c */ @@ -1662,10 +1690,7 @@ process: if (!sock_owned_by_user(sk)) { if (!tcp_prequeue(sk, skb)) ret = tcp_v4_do_rcv(sk, skb); - } else if (unlikely(sk_add_backlog(sk, skb, - sk->sk_rcvbuf + sk->sk_sndbuf))) { - bh_unlock_sock(sk); - __NET_INC_STATS(net, LINUX_MIB_TCPBACKLOGDROP); + } else if (tcp_add_backlog(sk, skb)) { goto discard_and_relse; } bh_unlock_sock(sk); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index e4f55683af31..5bf460bd299f 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1467,10 +1467,7 @@ process: if (!sock_owned_by_user(sk)) { if (!tcp_prequeue(sk, skb)) ret = tcp_v6_do_rcv(sk, skb); - } else if (unlikely(sk_add_backlog(sk, skb, - sk->sk_rcvbuf + sk->sk_sndbuf))) { - bh_unlock_sock(sk); - __NET_INC_STATS(net, LINUX_MIB_TCPBACKLOGDROP); + } else if (tcp_add_backlog(sk, skb)) { goto discard_and_relse; } bh_unlock_sock(sk); From a039b638592c8b967797d96f50bebfaa64964c3f Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 26 Aug 2016 09:21:23 +0100 Subject: [PATCH 0527/1715] amd-xgbe: Reset running devices after resume from hibernate After resume from hibernate on arm64, any amd-xgbe devices that were running when we hibernated are reported as down, even when it is not. Re-plugging the cables does not cause the interface to come back, the link must be marked as down then up via 'ip set link' using the serial console. This happens because the device has been power-cycled and possibly re-initialised by firmware, whereas the driver's memory structures have been restored from the hibernate image and the two do not agree. Schedule a restart of the device after powerup in case the world changed while we were asleep. Signed-off-by: James Morse Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/xgbe/xgbe-main.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-main.c b/drivers/net/ethernet/amd/xgbe/xgbe-main.c index 3eee3201b58f..9de078819aa6 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-main.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-main.c @@ -861,9 +861,15 @@ static int xgbe_resume(struct device *dev) pdata->lpm_ctrl &= ~MDIO_CTRL1_LPOWER; XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, pdata->lpm_ctrl); - if (netif_running(netdev)) + if (netif_running(netdev)) { ret = xgbe_powerup(netdev, XGMAC_DRIVER_CONTEXT); + /* Schedule a restart in case the link or phy state changed + * while we were powered down. + */ + schedule_work(&pdata->restart_work); + } + DBGPR("<--xgbe_resume\n"); return ret; From 5711a98221443aec54c4c81ee98c6ae46acccb65 Mon Sep 17 00:00:00 2001 From: Vidya Sagar Ravipati Date: Fri, 26 Aug 2016 01:25:50 -0700 Subject: [PATCH 0528/1715] net: ethtool: add support for 1000BaseX and missing 10G link modes This patch enhances ethtool link mode bitmap to include missing interface modes for 1G/10G speeds Changes: 1000baseX is the mode introduced to cover all 1G Fiber cases. All modes under 1000BaseX i.e. 1000BASE-SX, 1000BASE-LX, 1000BASE-LX10 and 1000BASE-BX10 are not explicitly defined at this moment. 10G CR,SR,LR and ER link modes are included for 10G speed.. Issue: ethtool on 1G/10G SFP port reports Base-T as this port supports 1000baseX,10G CR, SR and LR modes. root@tor-02$ ethtool swp1 Settings for swp1: Supported ports: [ FIBRE ] Supported link modes: 1000baseT/Full 10000baseT/Full Supported pause frame use: Symmetric Receive-only Supports auto-negotiation: Yes Advertised link modes: 1000baseT/Full Advertised pause frame use: No Advertised auto-negotiation: No Speed: 10000Mb/s Duplex: Full Port: FIBRE PHYAD: 0 Transceiver: external Auto-negotiation: off Current message level: 0x00000000 (0) Link detected: yes After fix: root@tor-02$ ethtool swp1 Settings for swp1: Supported ports: [ FIBRE ] Supported link modes: 1000baseX/Full 10000baseCR/Full 10000baseSR/Full 10000baseLR/Full 10000baseER/Full Supported pause frame use: Symmetric Receive-only Supports auto-negotiation: Yes Advertised link modes: 1000baseT/Full Advertised pause frame use: No Advertised auto-negotiation: No Speed: 10000Mb/s Duplex: Full Port: FIBRE PHYAD: 0 Transceiver: external Auto-negotiation: off Current message level: 0x00000000 (0) Link detected: yes Signed-off-by: Vidya Sagar Ravipati Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index b8f38e84d93a..099a4200732c 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1362,7 +1362,14 @@ enum ethtool_link_mode_bit_indices { ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT = 37, ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT = 38, ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT = 39, - ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT = 40, + ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT = 40, + ETHTOOL_LINK_MODE_1000baseX_Full_BIT = 41, + ETHTOOL_LINK_MODE_10000baseCR_Full_BIT = 42, + ETHTOOL_LINK_MODE_10000baseSR_Full_BIT = 43, + ETHTOOL_LINK_MODE_10000baseLR_Full_BIT = 44, + ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT = 45, + ETHTOOL_LINK_MODE_10000baseER_Full_BIT = 46, + /* Last allowed bit for __ETHTOOL_LINK_MODE_LEGACY_MASK is bit * 31. Please do NOT define any SUPPORTED_* or ADVERTISED_* @@ -1371,7 +1378,7 @@ enum ethtool_link_mode_bit_indices { */ __ETHTOOL_LINK_MODE_LAST - = ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT, + = ETHTOOL_LINK_MODE_10000baseER_Full_BIT, }; #define __ETHTOOL_LINK_MODE_LEGACY_MASK(base_name) \ From f9dc70744dc74bc9e128d579f2bc85eb7c0ad8ce Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 26 Aug 2016 17:25:46 +0200 Subject: [PATCH 0529/1715] net/xgene: fix error handling during reset The newly added reset logic uses helper functions for the MMIO that may fail. However, when the read operation fails, we end up writing back uninitialized data to the register, as gcc warns: drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c: In function 'xgene_enet_link_state': drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c:213:2: error: 'data' may be used uninitialized in this function [-Werror=maybe-uninitialized] drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c:209:6: note: 'data' was declared here u32 data; We already print a warning to the console log if that happens, the best alternative that I can see is skip the rest of the reset sequence if the register value cannot be read: Most likely the write would fail as well, and if it succeeded, worse things could happen. Signed-off-by: Arnd Bergmann Fixes: 3eb7cb9dc946 ("drivers: net: xgene: XFI PCS reset when link is down") Cc: Fushen Chen Signed-off-by: David S. Miller --- drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c index d672e71b5a50..279ee27004f7 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c @@ -155,19 +155,23 @@ static void xgene_enet_rd_mac(struct xgene_enet_pdata *pdata, rd_addr); } -static void xgene_enet_rd_pcs(struct xgene_enet_pdata *pdata, +static bool xgene_enet_rd_pcs(struct xgene_enet_pdata *pdata, u32 rd_addr, u32 *rd_data) { void __iomem *addr, *rd, *cmd, *cmd_done; + bool success; addr = pdata->pcs_addr + PCS_ADDR_REG_OFFSET; rd = pdata->pcs_addr + PCS_READ_REG_OFFSET; cmd = pdata->pcs_addr + PCS_COMMAND_REG_OFFSET; cmd_done = pdata->pcs_addr + PCS_COMMAND_DONE_REG_OFFSET; - if (!xgene_enet_rd_indirect(addr, rd, cmd, cmd_done, rd_addr, rd_data)) + success = xgene_enet_rd_indirect(addr, rd, cmd, cmd_done, rd_addr, rd_data); + if (!success) netdev_err(pdata->ndev, "PCS read failed, addr: %04x\n", rd_addr); + + return success; } static int xgene_enet_ecc_init(struct xgene_enet_pdata *pdata) @@ -208,7 +212,9 @@ static void xgene_pcs_reset(struct xgene_enet_pdata *pdata) { u32 data; - xgene_enet_rd_pcs(pdata, PCS_CONTROL_1, &data); + if (!xgene_enet_rd_pcs(pdata, PCS_CONTROL_1, &data)) + return; + xgene_enet_wr_pcs(pdata, PCS_CONTROL_1, data | PCS_CTRL_PCS_RST); xgene_enet_wr_pcs(pdata, PCS_CONTROL_1, data & ~PCS_CTRL_PCS_RST); } From 0b498a52778368ff501557d68c7b50878ab1701e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 26 Aug 2016 17:25:45 +0200 Subject: [PATCH 0530/1715] net_sched: fix use of uninitialized ethertype variable in cls_flower The addition of VLAN support caused a possible use of uninitialized data if we encounter a zero TCA_FLOWER_KEY_ETH_TYPE key, as pointed out by "gcc -Wmaybe-uninitialized": net/sched/cls_flower.c: In function 'fl_change': net/sched/cls_flower.c:366:22: error: 'ethertype' may be used uninitialized in this function [-Werror=maybe-uninitialized] This changes the code to only set the ethertype field if it was nonzero, as before the patch. Signed-off-by: Arnd Bergmann Fixes: 9399ae9a6cb2 ("net_sched: flower: Add vlan support") Cc: Hadar Hen Zion Cc: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_flower.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 532ab6751343..cf9ad5b50889 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -353,18 +353,19 @@ static int fl_set_key(struct net *net, struct nlattr **tb, mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK, sizeof(key->eth.src)); - if (tb[TCA_FLOWER_KEY_ETH_TYPE]) + if (tb[TCA_FLOWER_KEY_ETH_TYPE]) { ethertype = nla_get_be16(tb[TCA_FLOWER_KEY_ETH_TYPE]); - if (ethertype == htons(ETH_P_8021Q)) { - fl_set_key_vlan(tb, &key->vlan, &mask->vlan); - fl_set_key_val(tb, &key->basic.n_proto, - TCA_FLOWER_KEY_VLAN_ETH_TYPE, - &mask->basic.n_proto, TCA_FLOWER_UNSPEC, - sizeof(key->basic.n_proto)); - } else { - key->basic.n_proto = ethertype; - mask->basic.n_proto = cpu_to_be16(~0); + if (ethertype == htons(ETH_P_8021Q)) { + fl_set_key_vlan(tb, &key->vlan, &mask->vlan); + fl_set_key_val(tb, &key->basic.n_proto, + TCA_FLOWER_KEY_VLAN_ETH_TYPE, + &mask->basic.n_proto, TCA_FLOWER_UNSPEC, + sizeof(key->basic.n_proto)); + } else { + key->basic.n_proto = ethertype; + mask->basic.n_proto = cpu_to_be16(~0); + } } if (key->basic.n_proto == htons(ETH_P_IP) || From e59a393d089d08a4622de07f941dd3629fcaec6a Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 9 Jun 2016 12:02:03 -0700 Subject: [PATCH 0531/1715] fm10k: fix PCI device enable_cnt leak in .io_slot_reset A previous patch removed the pci_disable_device() call in .io_error_detected. This call corresponded to a pci_enable_device_mem() call within .io_slot_reset handler. Change the call here to a pci_reenable_device() so that it does not increment and leak the enable_cnt reference count for the device. Without this change, VF devices may fail during an unbind/bind, and we'll never zero the reference counter for the pci_dev structure. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index 774a5654bf42..377a3fbb779f 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -2275,7 +2275,7 @@ static pci_ers_result_t fm10k_io_slot_reset(struct pci_dev *pdev) { pci_ers_result_t result; - if (pci_enable_device_mem(pdev)) { + if (pci_reenable_device(pdev)) { dev_err(&pdev->dev, "Cannot re-enable PCI device after reset.\n"); result = PCI_ERS_RESULT_DISCONNECT; From 5b9e4432db038eefcafe2be468e3adaf2edbbe91 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 9 Jun 2016 14:56:05 -0700 Subject: [PATCH 0532/1715] fm10k: use software values when checking for Tx hangs in hot path A previous patch added support to check for hardware Tx pending in the fm10k_down routine. This support was intended to ensure that we accurately check what the hardware state is. However, checking for Tx hangs in this manor during the hotpath results in a large performance hit. Avoid this by making the hotpath check use the SW counters instead. Fixes: a0f53cf49cb0 ("fm10k: use actual hardware registers when checking for pending Tx", 2016-06-08) Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k.h | 2 +- drivers/net/ethernet/intel/fm10k/fm10k_main.c | 19 +++++++++++++++---- drivers/net/ethernet/intel/fm10k/fm10k_pci.c | 2 +- 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h index c4cf08dcf5af..75429f2bbc97 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k.h @@ -458,7 +458,7 @@ __be16 fm10k_tx_encap_offload(struct sk_buff *skb); netdev_tx_t fm10k_xmit_frame_ring(struct sk_buff *skb, struct fm10k_ring *tx_ring); void fm10k_tx_timeout_reset(struct fm10k_intfc *interface); -u64 fm10k_get_tx_pending(struct fm10k_ring *ring); +u64 fm10k_get_tx_pending(struct fm10k_ring *ring, bool in_sw); bool fm10k_check_tx_hang(struct fm10k_ring *tx_ring); void fm10k_alloc_rx_buffers(struct fm10k_ring *rx_ring, u16 cleaned_count); diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index e9767b6366a8..0d6e69d1586a 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -1128,13 +1128,24 @@ static u64 fm10k_get_tx_completed(struct fm10k_ring *ring) return ring->stats.packets; } -u64 fm10k_get_tx_pending(struct fm10k_ring *ring) +/** + * fm10k_get_tx_pending - how many Tx descriptors not processed + * @ring: the ring structure + * @in_sw: is tx_pending being checked in SW or in HW? + */ +u64 fm10k_get_tx_pending(struct fm10k_ring *ring, bool in_sw) { struct fm10k_intfc *interface = ring->q_vector->interface; struct fm10k_hw *hw = &interface->hw; + u32 head, tail; - u32 head = fm10k_read_reg(hw, FM10K_TDH(ring->reg_idx)); - u32 tail = fm10k_read_reg(hw, FM10K_TDT(ring->reg_idx)); + if (likely(in_sw)) { + head = ring->next_to_clean; + tail = ring->next_to_use; + } else { + head = fm10k_read_reg(hw, FM10K_TDH(ring->reg_idx)); + tail = fm10k_read_reg(hw, FM10K_TDT(ring->reg_idx)); + } return ((head <= tail) ? tail : tail + ring->count) - head; } @@ -1143,7 +1154,7 @@ bool fm10k_check_tx_hang(struct fm10k_ring *tx_ring) { u32 tx_done = fm10k_get_tx_completed(tx_ring); u32 tx_done_old = tx_ring->tx_stats.tx_done_old; - u32 tx_pending = fm10k_get_tx_pending(tx_ring); + u32 tx_pending = fm10k_get_tx_pending(tx_ring, true); clear_check_for_tx_hang(tx_ring); diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index 377a3fbb779f..20c2a0cf83da 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -1699,7 +1699,7 @@ void fm10k_down(struct fm10k_intfc *interface) /* start checking at the last ring to have pending Tx */ for (; i < interface->num_tx_queues; i++) - if (fm10k_get_tx_pending(interface->tx_ring[i])) + if (fm10k_get_tx_pending(interface->tx_ring[i], false)) break; /* if all the queues are drained, we can break now */ From 4aa0bd54d4234d7bdc7cce331e7097c9393aca6e Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 9 Jun 2016 15:42:36 -0700 Subject: [PATCH 0533/1715] fm10k: use variadic form of alloc_workqueue Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index 0d6e69d1586a..316f95b2b600 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -56,7 +56,7 @@ static int __init fm10k_init_module(void) pr_info("%s\n", fm10k_copyright); /* create driver workqueue */ - fm10k_workqueue = alloc_workqueue("fm10k", WQ_MEM_RECLAIM, 0); + fm10k_workqueue = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, fm10k_driver_name); fm10k_dbg_init(); From 88cdcfec9a46af47291207d86493e919093df3d1 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Fri, 17 Jun 2016 14:36:45 -0700 Subject: [PATCH 0534/1715] fm10k: remove fm10k_get_reta_size from namespace The function is only used in fm10k_ethtool.c, so make it static. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k.h | 1 - drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h index 75429f2bbc97..e9850697be04 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k.h @@ -496,7 +496,6 @@ int fm10k_close(struct net_device *netdev); /* Ethtool */ void fm10k_set_ethtool_ops(struct net_device *dev); -u32 fm10k_get_reta_size(struct net_device *netdev); void fm10k_write_reta(struct fm10k_intfc *interface, const u32 *indir); /* IOV */ diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c index c04cbe9c9f7c..adb7cb4311ba 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c @@ -966,7 +966,7 @@ static int fm10k_set_priv_flags(struct net_device *netdev, u32 priv_flags) return 0; } -u32 fm10k_get_reta_size(struct net_device __always_unused *netdev) +static u32 fm10k_get_reta_size(struct net_device __always_unused *netdev) { return FM10K_RETA_SIZE * FM10K_RETA_ENTRIES_PER_REG; } From ce4dad2ce231aa5258ddfc98f8a80d958643c014 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Fri, 17 Jun 2016 16:21:11 -0700 Subject: [PATCH 0535/1715] fm10k: prefer READ_ONCE instead of ACCESS_ONCE While technically not needed, as all our uses of ACCESS_ONCE are scalar types, we already use READ_ONCE in a few places, and for code readability we can swap all the uses of the older ACCESS_ONCE into READ_ONCE. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_common.h | 4 ++-- drivers/net/ethernet/intel/fm10k/fm10k_iov.c | 4 ++-- drivers/net/ethernet/intel/fm10k/fm10k_main.c | 2 +- drivers/net/ethernet/intel/fm10k/fm10k_netdev.c | 6 +++--- drivers/net/ethernet/intel/fm10k/fm10k_pci.c | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_common.h b/drivers/net/ethernet/intel/fm10k/fm10k_common.h index 50f71e997448..d51f9c7a47ff 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_common.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k_common.h @@ -34,7 +34,7 @@ u32 fm10k_read_reg(struct fm10k_hw *hw, int reg); /* write operations, indexed using DWORDS */ #define fm10k_write_reg(hw, reg, val) \ do { \ - u32 __iomem *hw_addr = ACCESS_ONCE((hw)->hw_addr); \ + u32 __iomem *hw_addr = READ_ONCE((hw)->hw_addr); \ if (!FM10K_REMOVED(hw_addr)) \ writel((val), &hw_addr[(reg)]); \ } while (0) @@ -42,7 +42,7 @@ do { \ /* Switch register write operations, index using DWORDS */ #define fm10k_write_sw_reg(hw, reg, val) \ do { \ - u32 __iomem *sw_addr = ACCESS_ONCE((hw)->sw_addr); \ + u32 __iomem *sw_addr = READ_ONCE((hw)->sw_addr); \ if (!FM10K_REMOVED(sw_addr)) \ writel((val), &sw_addr[(reg)]); \ } while (0) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c index 47f0743ec03b..d9dec81f6b6d 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c @@ -51,7 +51,7 @@ s32 fm10k_iov_event(struct fm10k_intfc *interface) int i; /* if there is no iov_data then there is no mailbox to process */ - if (!ACCESS_ONCE(interface->iov_data)) + if (!READ_ONCE(interface->iov_data)) return 0; rcu_read_lock(); @@ -99,7 +99,7 @@ s32 fm10k_iov_mbx(struct fm10k_intfc *interface) int i; /* if there is no iov_data then there is no mailbox to process */ - if (!ACCESS_ONCE(interface->iov_data)) + if (!READ_ONCE(interface->iov_data)) return 0; rcu_read_lock(); diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index 316f95b2b600..4b65364fb322 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -1408,7 +1408,7 @@ static void fm10k_update_itr(struct fm10k_ring_container *ring_container) * that the calculation will never get below a 1. The bit shift * accounts for changes in the ITR due to PCIe link speed. */ - itr_round = ACCESS_ONCE(ring_container->itr_scale) + 8; + itr_round = READ_ONCE(ring_container->itr_scale) + 8; avg_wire_size += BIT(itr_round) - 1; avg_wire_size >>= itr_round; diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c index 20a5bbe3f536..855833e85aac 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c @@ -1098,7 +1098,7 @@ static struct rtnl_link_stats64 *fm10k_get_stats64(struct net_device *netdev, rcu_read_lock(); for (i = 0; i < interface->num_rx_queues; i++) { - ring = ACCESS_ONCE(interface->rx_ring[i]); + ring = READ_ONCE(interface->rx_ring[i]); if (!ring) continue; @@ -1114,7 +1114,7 @@ static struct rtnl_link_stats64 *fm10k_get_stats64(struct net_device *netdev, } for (i = 0; i < interface->num_tx_queues; i++) { - ring = ACCESS_ONCE(interface->tx_ring[i]); + ring = READ_ONCE(interface->tx_ring[i]); if (!ring) continue; @@ -1299,7 +1299,7 @@ static void *fm10k_dfwd_add_station(struct net_device *dev, static void fm10k_dfwd_del_station(struct net_device *dev, void *priv) { struct fm10k_intfc *interface = netdev_priv(dev); - struct fm10k_l2_accel *l2_accel = ACCESS_ONCE(interface->l2_accel); + struct fm10k_l2_accel *l2_accel = READ_ONCE(interface->l2_accel); struct fm10k_dglort_cfg dglort = { 0 }; struct fm10k_hw *hw = &interface->hw; struct net_device *sdev = priv; diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index 20c2a0cf83da..11b29552f664 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -62,7 +62,7 @@ u16 fm10k_read_pci_cfg_word(struct fm10k_hw *hw, u32 reg) u32 fm10k_read_reg(struct fm10k_hw *hw, int reg) { - u32 __iomem *hw_addr = ACCESS_ONCE(hw->hw_addr); + u32 __iomem *hw_addr = READ_ONCE(hw->hw_addr); u32 value = 0; if (FM10K_REMOVED(hw_addr)) From e5fbfb78641ff0c5139ae665289ed9f91524265e Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 20 Jun 2016 10:39:32 -0700 Subject: [PATCH 0536/1715] fm10k: NAPI polling routine must return actual work done When fm10k_poll fully cleans rings it returns 0. This is incorrect as it messes up the budget accounting in the core NAPI code. Fix this by returning actual work done, capped at budget - 1 since the core doesn't expect a return of the full budget when the driver modifies the NAPI status. Cc: Paolo Abeni Cc: Venkatesh Srinivas Signed-off-by: Jacob Keller Acked-by: Paolo Abeni Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index 4b65364fb322..32d44290b04f 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -1484,7 +1484,7 @@ static int fm10k_poll(struct napi_struct *napi, int budget) /* re-enable the q_vector */ fm10k_qv_enable(q_vector); - return 0; + return min(work_done, budget - 1); } /** From 76ef0fc5a7519690a6a87a248f5709463f584292 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 23 Jun 2016 13:31:00 -0700 Subject: [PATCH 0537/1715] fm10k: print error code when pci_enable_device_mem fails during probe Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_pci.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index 11b29552f664..e5f37b788196 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -1951,8 +1951,11 @@ static int fm10k_probe(struct pci_dev *pdev, const struct pci_device_id *ent) int err; err = pci_enable_device_mem(pdev); - if (err) + if (err) { + dev_err(&pdev->dev, + "PCI enable device failed: %d\n", err); return err; + } err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48)); if (err) From 18095937cb1c66b8ff944de02cf04d1497008352 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 23 Jun 2016 13:31:01 -0700 Subject: [PATCH 0538/1715] fm10k: don't continue probe if PCI device not in normal IO state In the event of an uncorrectable AER error occurring when the driver has not loaded, the recovery routines are not done. This is done because future loads of the driver may not be aware of the IO state and may not be able to recover at all. In this case, when we next load the driver it fails due to what appears to be a surprise remove event. Instead, add a check to ensure that the device is in the normal IO state before continuing to probe. This allows us to give a more descriptive message of what is wrong. Without this change, the driver will attempt to probe up to our first call of .reset_hw() which will be unable to read registers and act as if a surprise remove event occurred. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_pci.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index e5f37b788196..860fe04f4c72 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -1950,6 +1950,12 @@ static int fm10k_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct fm10k_intfc *interface; int err; + if (pdev->error_state != pci_channel_io_normal) { + dev_err(&pdev->dev, + "PCI device still in an error state. Unable to load...\n"); + return -EIO; + } + err = pci_enable_device_mem(pdev); if (err) { dev_err(&pdev->dev, From 5f45c83024c49f71e50cf8156b165f11d2ab35a9 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 23 Jun 2016 13:54:01 -0700 Subject: [PATCH 0539/1715] fm10k: don't try to stop queues if we've lost hw_addr In the event of a surprise remove, we expect the driver to go down, which includes calling .stop_hw(). However, this function will return an error because the queues won't appear to cleanly disable. Prevent this and avoid the unnecessary checks by just returning when FM10K_REMOVED(hw->hw_addr) is true. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_common.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_common.c b/drivers/net/ethernet/intel/fm10k/fm10k_common.c index d6baaea8bc7c..dd95ac4f4c64 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_common.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_common.c @@ -207,6 +207,9 @@ s32 fm10k_disable_queues_generic(struct fm10k_hw *hw, u16 q_cnt) /* clear tx_ready to prevent any false hits for reset */ hw->mac.tx_ready = false; + if (FM10K_REMOVED(hw->hw_addr)) + return 0; + /* clear the enable bit for all rings */ for (i = 0; i < q_cnt; i++) { reg = fm10k_read_reg(hw, FM10K_TXDCTL(i)); From f92e0e489225cae41290f8b42bf04128b2451789 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Fri, 26 Aug 2016 00:14:34 -0700 Subject: [PATCH 0540/1715] fm10k: rework vxlan_port offload before adding geneve support In preparation for adding Geneve Rx offload support, refactor the current VXLAN offload flow to be a bit more generic so that it will be easier to add the new Geneve code. The fm10k hardware supports one VXLAN and one Geneve tunnel, so we will eventually treat the VXLAN and Geneve tunnels identically. To this end, factor out the code that handles the current list so that we can use the generic flow for both tunnels in the next patch. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k.h | 6 +- drivers/net/ethernet/intel/fm10k/fm10k_main.c | 4 +- .../net/ethernet/intel/fm10k/fm10k_netdev.c | 159 ++++++++++-------- 3 files changed, 92 insertions(+), 77 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h index e9850697be04..209268a14712 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k.h @@ -240,9 +240,7 @@ struct fm10k_iov_data { struct fm10k_vf_info vf_info[0]; }; -#define fm10k_vxlan_port_for_each(vp, intfc) \ - list_for_each_entry(vp, &(intfc)->vxlan_port, list) -struct fm10k_vxlan_port { +struct fm10k_udp_port { struct list_head list; sa_family_t sa_family; __be16 port; @@ -335,7 +333,7 @@ struct fm10k_intfc { u32 reta[FM10K_RETA_SIZE]; u32 rssrk[FM10K_RSSRK_SIZE]; - /* VXLAN port tracking information */ + /* UDP encapsulation port tracking information */ struct list_head vxlan_port; #ifdef CONFIG_DEBUG_FS diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index 32d44290b04f..0d39103124bd 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -651,11 +651,11 @@ static int fm10k_clean_rx_irq(struct fm10k_q_vector *q_vector, static struct ethhdr *fm10k_port_is_vxlan(struct sk_buff *skb) { struct fm10k_intfc *interface = netdev_priv(skb->dev); - struct fm10k_vxlan_port *vxlan_port; + struct fm10k_udp_port *vxlan_port; /* we can only offload a vxlan if we recognize it as such */ vxlan_port = list_first_entry_or_null(&interface->vxlan_port, - struct fm10k_vxlan_port, list); + struct fm10k_udp_port, list); if (!vxlan_port) return NULL; diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c index 855833e85aac..07c5a685f0e9 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c @@ -384,129 +384,147 @@ static void fm10k_request_glort_range(struct fm10k_intfc *interface) } /** - * fm10k_del_vxlan_port_all + * fm10k_free_udp_port_info * @interface: board private structure * * This function frees the entire vxlan_port list **/ -static void fm10k_del_vxlan_port_all(struct fm10k_intfc *interface) +static void fm10k_free_udp_port_info(struct fm10k_intfc *interface) { - struct fm10k_vxlan_port *vxlan_port; + struct fm10k_udp_port *port; - /* flush all entries from list */ - vxlan_port = list_first_entry_or_null(&interface->vxlan_port, - struct fm10k_vxlan_port, list); - while (vxlan_port) { - list_del(&vxlan_port->list); - kfree(vxlan_port); - vxlan_port = list_first_entry_or_null(&interface->vxlan_port, - struct fm10k_vxlan_port, - list); + /* flush all entries from vxlan list */ + port = list_first_entry_or_null(&interface->vxlan_port, + struct fm10k_udp_port, list); + while (port) { + list_del(&port->list); + kfree(port); + port = list_first_entry_or_null(&interface->vxlan_port, + struct fm10k_udp_port, + list); } } /** - * fm10k_restore_vxlan_port + * fm10k_restore_udp_port_info * @interface: board private structure * - * This function restores the value in the tunnel_cfg register after reset + * This function restores the value in the tunnel_cfg register(s) after reset **/ -static void fm10k_restore_vxlan_port(struct fm10k_intfc *interface) +static void fm10k_restore_udp_port_info(struct fm10k_intfc *interface) { struct fm10k_hw *hw = &interface->hw; - struct fm10k_vxlan_port *vxlan_port; + struct fm10k_udp_port *port; /* only the PF supports configuring tunnels */ if (hw->mac.type != fm10k_mac_pf) return; - vxlan_port = list_first_entry_or_null(&interface->vxlan_port, - struct fm10k_vxlan_port, list); + port = list_first_entry_or_null(&interface->vxlan_port, + struct fm10k_udp_port, list); /* restore tunnel configuration register */ fm10k_write_reg(hw, FM10K_TUNNEL_CFG, - (vxlan_port ? ntohs(vxlan_port->port) : 0) | + (port ? ntohs(port->port) : 0) | (ETH_P_TEB << FM10K_TUNNEL_CFG_NVGRE_SHIFT)); } +static struct fm10k_udp_port * +fm10k_remove_tunnel_port(struct list_head *ports, + struct udp_tunnel_info *ti) +{ + struct fm10k_udp_port *port; + + list_for_each_entry(port, ports, list) { + if ((port->port == ti->port) && + (port->sa_family == ti->sa_family)) { + list_del(&port->list); + return port; + } + } + + return NULL; +} + +static void fm10k_insert_tunnel_port(struct list_head *ports, + struct udp_tunnel_info *ti) +{ + struct fm10k_udp_port *port; + + /* remove existing port entry from the list so that the newest items + * are always at the tail of the list. + */ + port = fm10k_remove_tunnel_port(ports, ti); + if (!port) { + port = kmalloc(sizeof(*port), GFP_ATOMIC); + if (!port) + return; + port->port = ti->port; + port->sa_family = ti->sa_family; + } + + list_add_tail(&port->list, ports); +} + /** - * fm10k_add_vxlan_port + * fm10k_udp_tunnel_add * @netdev: network interface device structure * @ti: Tunnel endpoint information * - * This function is called when a new VXLAN interface has added a new port - * number to the range that is currently in use for VXLAN. The new port - * number is always added to the tail so that the port number list should - * match the order in which the ports were allocated. The head of the list - * is always used as the VXLAN port number for offloads. + * This function is called when a new UDP tunnel port has been added. + * Currently we only support VXLAN and only one port will actually be + * offloaded due to hardware restrictions. **/ -static void fm10k_add_vxlan_port(struct net_device *dev, +static void fm10k_udp_tunnel_add(struct net_device *dev, struct udp_tunnel_info *ti) { struct fm10k_intfc *interface = netdev_priv(dev); - struct fm10k_vxlan_port *vxlan_port; - if (ti->type != UDP_TUNNEL_TYPE_VXLAN) - return; /* only the PF supports configuring tunnels */ if (interface->hw.mac.type != fm10k_mac_pf) return; - /* existing ports are pulled out so our new entry is always last */ - fm10k_vxlan_port_for_each(vxlan_port, interface) { - if ((vxlan_port->port == ti->port) && - (vxlan_port->sa_family == ti->sa_family)) { - list_del(&vxlan_port->list); - goto insert_tail; - } + switch (ti->type) { + case UDP_TUNNEL_TYPE_VXLAN: + fm10k_insert_tunnel_port(&interface->vxlan_port, ti); + break; + default: + return; } - /* allocate memory to track ports */ - vxlan_port = kmalloc(sizeof(*vxlan_port), GFP_ATOMIC); - if (!vxlan_port) - return; - vxlan_port->port = ti->port; - vxlan_port->sa_family = ti->sa_family; - -insert_tail: - /* add new port value to list */ - list_add_tail(&vxlan_port->list, &interface->vxlan_port); - - fm10k_restore_vxlan_port(interface); + fm10k_restore_udp_port_info(interface); } /** - * fm10k_del_vxlan_port + * fm10k_udp_tunnel_del * @netdev: network interface device structure * @ti: Tunnel endpoint information * - * This function is called when a new VXLAN interface has freed a port - * number from the range that is currently in use for VXLAN. The freed - * port is removed from the list and the new head is used to determine - * the port number for offloads. + * This function is called when a new UDP tunnel port is deleted. The freed + * port will be removed from the list, then we reprogram the offloaded port + * based on the head of the list. **/ -static void fm10k_del_vxlan_port(struct net_device *dev, +static void fm10k_udp_tunnel_del(struct net_device *dev, struct udp_tunnel_info *ti) { struct fm10k_intfc *interface = netdev_priv(dev); - struct fm10k_vxlan_port *vxlan_port; + struct fm10k_udp_port *port = NULL; - if (ti->type != UDP_TUNNEL_TYPE_VXLAN) - return; if (interface->hw.mac.type != fm10k_mac_pf) return; - /* find the port in the list and free it */ - fm10k_vxlan_port_for_each(vxlan_port, interface) { - if ((vxlan_port->port == ti->port) && - (vxlan_port->sa_family == ti->sa_family)) { - list_del(&vxlan_port->list); - kfree(vxlan_port); - break; - } + switch (ti->type) { + case UDP_TUNNEL_TYPE_VXLAN: + port = fm10k_remove_tunnel_port(&interface->vxlan_port, ti); + break; + default: + return; } - fm10k_restore_vxlan_port(interface); + /* if we did remove a port we need to free its memory */ + kfree(port); + + fm10k_restore_udp_port_info(interface); } /** @@ -555,7 +573,6 @@ int fm10k_open(struct net_device *netdev) if (err) goto err_set_queues; - /* update VXLAN port configuration */ udp_tunnel_get_rx_info(netdev); fm10k_up(interface); @@ -591,7 +608,7 @@ int fm10k_close(struct net_device *netdev) fm10k_qv_free_irq(interface); - fm10k_del_vxlan_port_all(interface); + fm10k_free_udp_port_info(interface); fm10k_free_all_tx_resources(interface); fm10k_free_all_rx_resources(interface); @@ -1055,7 +1072,7 @@ void fm10k_restore_rx_state(struct fm10k_intfc *interface) interface->xcast_mode = xcast_mode; /* Restore tunnel configuration */ - fm10k_restore_vxlan_port(interface); + fm10k_restore_udp_port_info(interface); } void fm10k_reset_rx_state(struct fm10k_intfc *interface) @@ -1375,8 +1392,8 @@ static const struct net_device_ops fm10k_netdev_ops = { .ndo_set_vf_vlan = fm10k_ndo_set_vf_vlan, .ndo_set_vf_rate = fm10k_ndo_set_vf_bw, .ndo_get_vf_config = fm10k_ndo_get_vf_config, - .ndo_udp_tunnel_add = fm10k_add_vxlan_port, - .ndo_udp_tunnel_del = fm10k_del_vxlan_port, + .ndo_udp_tunnel_add = fm10k_udp_tunnel_add, + .ndo_udp_tunnel_del = fm10k_udp_tunnel_del, .ndo_dfwd_add_station = fm10k_dfwd_add_station, .ndo_dfwd_del_station = fm10k_dfwd_del_station, #ifdef CONFIG_NET_POLL_CONTROLLER From 1ad782928f16e7f1b5269ce4358caffe566f44db Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 23 Jun 2016 13:54:03 -0700 Subject: [PATCH 0541/1715] fm10k: add support for Rx offloads on one Geneve tunnel Similar to how we handle VXLAN offload, enable support for a single Geneve tunnel. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k.h | 1 + .../net/ethernet/intel/fm10k/fm10k_netdev.c | 30 +++++++++++++++++-- drivers/net/ethernet/intel/fm10k/fm10k_pci.c | 3 +- drivers/net/ethernet/intel/fm10k/fm10k_type.h | 1 + 4 files changed, 31 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h index 209268a14712..67ff01aeb11a 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k.h @@ -335,6 +335,7 @@ struct fm10k_intfc { /* UDP encapsulation port tracking information */ struct list_head vxlan_port; + struct list_head geneve_port; #ifdef CONFIG_DEBUG_FS struct dentry *dbg_intfc; diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c index 07c5a685f0e9..05629381be6b 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c @@ -387,7 +387,7 @@ static void fm10k_request_glort_range(struct fm10k_intfc *interface) * fm10k_free_udp_port_info * @interface: board private structure * - * This function frees the entire vxlan_port list + * This function frees both geneve_port and vxlan_port structures **/ static void fm10k_free_udp_port_info(struct fm10k_intfc *interface) { @@ -403,6 +403,17 @@ static void fm10k_free_udp_port_info(struct fm10k_intfc *interface) struct fm10k_udp_port, list); } + + /* flush all entries from geneve list */ + port = list_first_entry_or_null(&interface->geneve_port, + struct fm10k_udp_port, list); + while (port) { + list_del(&port->list); + kfree(port); + port = list_first_entry_or_null(&interface->vxlan_port, + struct fm10k_udp_port, + list); + } } /** @@ -427,6 +438,13 @@ static void fm10k_restore_udp_port_info(struct fm10k_intfc *interface) fm10k_write_reg(hw, FM10K_TUNNEL_CFG, (port ? ntohs(port->port) : 0) | (ETH_P_TEB << FM10K_TUNNEL_CFG_NVGRE_SHIFT)); + + port = list_first_entry_or_null(&interface->geneve_port, + struct fm10k_udp_port, list); + + /* restore Geneve tunnel configuration register */ + fm10k_write_reg(hw, FM10K_TUNNEL_CFG_GENEVE, + (port ? ntohs(port->port) : 0)); } static struct fm10k_udp_port * @@ -472,8 +490,8 @@ static void fm10k_insert_tunnel_port(struct list_head *ports, * @ti: Tunnel endpoint information * * This function is called when a new UDP tunnel port has been added. - * Currently we only support VXLAN and only one port will actually be - * offloaded due to hardware restrictions. + * Due to hardware restrictions, only one port per type can be offloaded at + * once. **/ static void fm10k_udp_tunnel_add(struct net_device *dev, struct udp_tunnel_info *ti) @@ -488,6 +506,9 @@ static void fm10k_udp_tunnel_add(struct net_device *dev, case UDP_TUNNEL_TYPE_VXLAN: fm10k_insert_tunnel_port(&interface->vxlan_port, ti); break; + case UDP_TUNNEL_TYPE_GENEVE: + fm10k_insert_tunnel_port(&interface->geneve_port, ti); + break; default: return; } @@ -517,6 +538,9 @@ static void fm10k_udp_tunnel_del(struct net_device *dev, case UDP_TUNNEL_TYPE_VXLAN: port = fm10k_remove_tunnel_port(&interface->vxlan_port, ti); break; + case UDP_TUNNEL_TYPE_GENEVE: + port = fm10k_remove_tunnel_port(&interface->geneve_port, ti); + break; default: return; } diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index 860fe04f4c72..0a20ca168535 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -1835,8 +1835,9 @@ static int fm10k_sw_init(struct fm10k_intfc *interface, interface->tx_itr = FM10K_TX_ITR_DEFAULT; interface->rx_itr = FM10K_ITR_ADAPTIVE | FM10K_RX_ITR_DEFAULT; - /* initialize vxlan_port list */ + /* initialize udp port lists */ INIT_LIST_HEAD(&interface->vxlan_port); + INIT_LIST_HEAD(&interface->geneve_port); netdev_rss_key_fill(rss_key, sizeof(rss_key)); memcpy(interface->rssrk, rss_key, sizeof(rss_key)); diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_type.h b/drivers/net/ethernet/intel/fm10k/fm10k_type.h index f4e75c498287..6bb16c13d9d6 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_type.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k_type.h @@ -154,6 +154,7 @@ struct fm10k_hw; #define FM10K_DGLORTDEC_INNERRSS_ENABLE 0x08000000 #define FM10K_TUNNEL_CFG 0x0040 #define FM10K_TUNNEL_CFG_NVGRE_SHIFT 16 +#define FM10K_TUNNEL_CFG_GENEVE 0x0041 #define FM10K_SWPRI_MAP(_n) ((_n) + 0x0050) #define FM10K_SWPRI_MAX 16 #define FM10K_RSSRK(_n, _m) (((_n) * 0x10) + (_m) + 0x0800) From 9717c7721302d217fb73f59eb3ab6314ffff54aa Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Fri, 22 Jul 2016 16:00:37 -0700 Subject: [PATCH 0542/1715] fm10k: remove unnecessary extra parenthesis around ((~value)) Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index 0a20ca168535..d7f1ecad9378 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -133,7 +133,7 @@ static void fm10k_detach_subtask(struct fm10k_intfc *interface) /* check the real address space to see if we've recovered */ hw_addr = READ_ONCE(interface->uc_addr); value = readl(hw_addr); - if ((~value)) { + if (~value) { interface->hw.hw_addr = interface->uc_addr; netif_device_attach(netdev); interface->flags |= FM10K_FLAG_RESET_REQUESTED; From c689eff124cb231d91777a447fa05b30939da7b1 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 3 Aug 2016 15:05:27 -0700 Subject: [PATCH 0543/1715] fm10k: don't clear the RXQCTL register when enabling or disabling queues Ensure that other bits in the RXQCTL register do not get cleared. This ensures that bits related to queue ownership are maintained. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_pci.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index d7f1ecad9378..b1a2f8437d59 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -734,15 +734,15 @@ static void fm10k_configure_rx_ring(struct fm10k_intfc *interface, u64 rdba = ring->dma; struct fm10k_hw *hw = &interface->hw; u32 size = ring->count * sizeof(union fm10k_rx_desc); - u32 rxqctl = FM10K_RXQCTL_ENABLE | FM10K_RXQCTL_PF; - u32 rxdctl = FM10K_RXDCTL_WRITE_BACK_MIN_DELAY; + u32 rxqctl, rxdctl = FM10K_RXDCTL_WRITE_BACK_MIN_DELAY; u32 srrctl = FM10K_SRRCTL_BUFFER_CHAINING_EN; u32 rxint = FM10K_INT_MAP_DISABLE; u8 rx_pause = interface->rx_pause; u8 reg_idx = ring->reg_idx; /* disable queue to avoid issues while updating state */ - fm10k_write_reg(hw, FM10K_RXQCTL(reg_idx), 0); + rxqctl = fm10k_read_reg(hw, FM10K_RXQCTL(reg_idx)); + rxqctl &= ~FM10K_RXQCTL_ENABLE; fm10k_write_flush(hw); /* possible poll here to verify ring resources have been cleaned */ @@ -797,6 +797,8 @@ static void fm10k_configure_rx_ring(struct fm10k_intfc *interface, fm10k_write_reg(hw, FM10K_RXINT(reg_idx), rxint); /* enable queue */ + rxqctl = fm10k_read_reg(hw, FM10K_RXQCTL(reg_idx)); + rxqctl |= FM10K_RXQCTL_ENABLE; fm10k_write_reg(hw, FM10K_RXQCTL(reg_idx), rxqctl); /* place buffers on ring for receive data */ From 325782a173d1858bb67a827905e264cd128241d0 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 8 Aug 2016 17:08:19 -0700 Subject: [PATCH 0544/1715] fm10k: don't re-map queues when a mailbox message suffices When the PF assigns a new MAC address to a VF it uses the base address registers to store the MAC address. This allows a VF which loads after this setup the ability to get the initial address without having to wait for a mailbox message. Unfortunately to do this, the PF must take queue ownership away from the VF, which can cause fault errors when there is already an active VF driver. This queue ownership assignment causes race condition between the PF and the VF such that potentially a VF can cause FUM fault errors due to normal PF/VF driver behavior. It is not safe to simply allow the PF to write the base address registers without taking queue ownership back as the PF must also disable the queues, and this would impact active VF use. The current code is safe because the queue ownership will prevent the VF from actually writing but does trigger the FUM fault. We can do better by simply avoiding the register write process when a mailbox message suffices. If the message can be sent over the mailbox, then we will not perform the queue ownership assignment and we won't update the base address to be the same as the MAC address. We do still have to write the TXQCTL registers in order to update the VID of the queue. This is necessary because the TXQCTL register is read-only from the VF, and thus the VF cannot do this for itself. This register does not need to wait for the Tx queue to be disabled and is safe for the PF to write during normal VF operation, so we move this write to the top of the function above the mailbox message. Without this, the TXQCTL register would be misconfigured and cause the VF to Tx hang. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_pf.c | 46 +++++++++++++-------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c index 682299dd0ce4..23fb319fd2a0 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c @@ -867,10 +867,6 @@ static s32 fm10k_iov_assign_default_mac_vlan_pf(struct fm10k_hw *hw, vf_q_idx = fm10k_vf_queue_index(hw, vf_idx); qmap_idx = qmap_stride * vf_idx; - /* MAP Tx queue back to 0 temporarily, and disable it */ - fm10k_write_reg(hw, FM10K_TQMAP(qmap_idx), 0); - fm10k_write_reg(hw, FM10K_TXDCTL(vf_q_idx), 0); - /* Determine correct default VLAN ID. The FM10K_VLAN_OVERRIDE bit is * used here to indicate to the VF that it will not have privilege to * write VLAN_TABLE. All policy is enforced on the PF but this allows @@ -886,9 +882,35 @@ static s32 fm10k_iov_assign_default_mac_vlan_pf(struct fm10k_hw *hw, fm10k_tlv_attr_put_mac_vlan(msg, FM10K_MAC_VLAN_MSG_DEFAULT_MAC, vf_info->mac, vf_vid); - /* load onto outgoing mailbox, ignore any errors on enqueue */ - if (vf_info->mbx.ops.enqueue_tx) - vf_info->mbx.ops.enqueue_tx(hw, &vf_info->mbx, msg); + /* Configure Queue control register with new VLAN ID. The TXQCTL + * register is RO from the VF, so the PF must do this even in the + * case of notifying the VF of a new VID via the mailbox. + */ + txqctl = ((u32)vf_vid << FM10K_TXQCTL_VID_SHIFT) & + FM10K_TXQCTL_VID_MASK; + txqctl |= (vf_idx << FM10K_TXQCTL_TC_SHIFT) | + FM10K_TXQCTL_VF | vf_idx; + + for (i = 0; i < queues_per_pool; i++) + fm10k_write_reg(hw, FM10K_TXQCTL(vf_q_idx + i), txqctl); + + /* try loading a message onto outgoing mailbox first */ + if (vf_info->mbx.ops.enqueue_tx) { + err = vf_info->mbx.ops.enqueue_tx(hw, &vf_info->mbx, msg); + if (err != FM10K_MBX_ERR_NO_MBX) + return err; + err = 0; + } + + /* If we aren't connected to a mailbox, this is most likely because + * the VF driver is not running. It should thus be safe to re-map + * queues and use the registers to pass the MAC address so that the VF + * driver gets correct information during its initialization. + */ + + /* MAP Tx queue back to 0 temporarily, and disable it */ + fm10k_write_reg(hw, FM10K_TQMAP(qmap_idx), 0); + fm10k_write_reg(hw, FM10K_TXDCTL(vf_q_idx), 0); /* verify ring has disabled before modifying base address registers */ txdctl = fm10k_read_reg(hw, FM10K_TXDCTL(vf_q_idx)); @@ -927,16 +949,6 @@ static s32 fm10k_iov_assign_default_mac_vlan_pf(struct fm10k_hw *hw, FM10K_TDLEN_ITR_SCALE_SHIFT); err_out: - /* configure Queue control register */ - txqctl = ((u32)vf_vid << FM10K_TXQCTL_VID_SHIFT) & - FM10K_TXQCTL_VID_MASK; - txqctl |= (vf_idx << FM10K_TXQCTL_TC_SHIFT) | - FM10K_TXQCTL_VF | vf_idx; - - /* assign VLAN ID */ - for (i = 0; i < queues_per_pool; i++) - fm10k_write_reg(hw, FM10K_TXQCTL(vf_q_idx + i), txqctl); - /* restore the queue back to VF ownership */ fm10k_write_reg(hw, FM10K_TQMAP(qmap_idx), vf_q_idx); return err; From f1ae02b186d9b37ee621c7e922ecf5db96f5fb5c Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Sun, 6 Mar 2016 15:08:55 +0200 Subject: [PATCH 0545/1715] iwlwifi: mvm: allow same PN for de-aggregated AMSDU The 9000 hardware will de-aggregate AMSDUs. In the process it will copy the mac header "as is" to the new MPDUs. This means driver should allow the same PN for MPDUs originated from the same AMSDU. Do that by incrementing the PN only for the last MPDU in the sequence. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index df6c32caa5f0..08d8a8abb918 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -132,7 +132,8 @@ static inline int iwl_mvm_check_pn(struct iwl_mvm *mvm, struct sk_buff *skb, IEEE80211_CCMP_PN_LEN) <= 0) return -1; - memcpy(ptk_pn->q[queue].pn[tid], pn, IEEE80211_CCMP_PN_LEN); + if (!(stats->flag & RX_FLAG_AMSDU_MORE)) + memcpy(ptk_pn->q[queue].pn[tid], pn, IEEE80211_CCMP_PN_LEN); stats->flag |= RX_FLAG_PN_VALIDATED; return 0; @@ -883,6 +884,9 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi, u8 *qc = ieee80211_get_qos_ctl(hdr); *qc &= ~IEEE80211_QOS_CTL_A_MSDU_PRESENT; + if (!(desc->amsdu_info & + IWL_RX_MPDU_AMSDU_LAST_SUBFRAME)) + rx_status->flag |= RX_FLAG_AMSDU_MORE; } if (baid != IWL_RX_REORDER_DATA_INVALID_BAID) iwl_mvm_agg_rx_received(mvm, baid); From 8e160ab83a32a16cd45d82778aca1ec3e51b802b Mon Sep 17 00:00:00 2001 From: Ayala Beker Date: Mon, 11 Apr 2016 11:37:38 +0300 Subject: [PATCH 0546/1715] iwlwifi: mvm: support GMAC protocol Add support for installing and removing GMAC key for newer FW versions that support GCM and MFP. GMAC provides authentication and integrity for multicast management frames. Firmware API was changed, update the driver accordingly. Signed-off-by: Ayala Beker Signed-off-by: Luca Coelho --- .../wireless/intel/iwlwifi/mvm/fw-api-sta.h | 31 ++++++++++---- .../net/wireless/intel/iwlwifi/mvm/mac80211.c | 18 ++++++-- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 2 +- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 42 ++++++++++++++++--- 4 files changed, 76 insertions(+), 17 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-sta.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-sta.h index d1c4fb849111..6c8e3ca79323 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-sta.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-sta.h @@ -432,26 +432,43 @@ struct iwl_mvm_rm_sta_cmd { u8 reserved[3]; } __packed; /* REMOVE_STA_CMD_API_S_VER_2 */ +/** + * struct iwl_mvm_mgmt_mcast_key_cmd_v1 + * ( MGMT_MCAST_KEY = 0x1f ) + * @ctrl_flags: %iwl_sta_key_flag + * @igtk: + * @k1: unused + * @k2: unused + * @sta_id: station ID that support IGTK + * @key_id: + * @receive_seq_cnt: initial RSC/PN needed for replay check + */ +struct iwl_mvm_mgmt_mcast_key_cmd_v1 { + __le32 ctrl_flags; + u8 igtk[16]; + u8 k1[16]; + u8 k2[16]; + __le32 key_id; + __le32 sta_id; + __le64 receive_seq_cnt; +} __packed; /* SEC_MGMT_MULTICAST_KEY_CMD_API_S_VER_1 */ + /** * struct iwl_mvm_mgmt_mcast_key_cmd * ( MGMT_MCAST_KEY = 0x1f ) * @ctrl_flags: %iwl_sta_key_flag - * @IGTK: - * @K1: unused - * @K2: unused + * @igtk: IGTK master key * @sta_id: station ID that support IGTK * @key_id: * @receive_seq_cnt: initial RSC/PN needed for replay check */ struct iwl_mvm_mgmt_mcast_key_cmd { __le32 ctrl_flags; - u8 IGTK[16]; - u8 K1[16]; - u8 K2[16]; + u8 igtk[32]; __le32 key_id; __le32 sta_id; __le64 receive_seq_cnt; -} __packed; /* SEC_MGMT_MULTICAST_KEY_CMD_API_S_VER_1 */ +} __packed; /* SEC_MGMT_MULTICAST_KEY_CMD_API_S_VER_2 */ struct iwl_mvm_wep_key { u8 key_index; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 6d6064534d59..f5290c4568ad 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -465,7 +465,7 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm) hw->uapsd_queues = IWL_MVM_UAPSD_QUEUES; hw->uapsd_max_sp_len = IWL_UAPSD_MAX_SP; - BUILD_BUG_ON(ARRAY_SIZE(mvm->ciphers) < ARRAY_SIZE(mvm_ciphers) + 4); + BUILD_BUG_ON(ARRAY_SIZE(mvm->ciphers) < ARRAY_SIZE(mvm_ciphers) + 6); memcpy(mvm->ciphers, mvm_ciphers, sizeof(mvm_ciphers)); hw->wiphy->n_cipher_suites = ARRAY_SIZE(mvm_ciphers); hw->wiphy->cipher_suites = mvm->ciphers; @@ -490,6 +490,14 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm) mvm->ciphers[hw->wiphy->n_cipher_suites] = WLAN_CIPHER_SUITE_AES_CMAC; hw->wiphy->n_cipher_suites++; + if (iwl_mvm_has_new_rx_api(mvm)) { + mvm->ciphers[hw->wiphy->n_cipher_suites] = + WLAN_CIPHER_SUITE_BIP_GMAC_128; + hw->wiphy->n_cipher_suites++; + mvm->ciphers[hw->wiphy->n_cipher_suites] = + WLAN_CIPHER_SUITE_BIP_GMAC_256; + hw->wiphy->n_cipher_suites++; + } } /* currently FW API supports only one optional cipher scheme */ @@ -2746,6 +2754,8 @@ static int iwl_mvm_mac_set_key(struct ieee80211_hw *hw, key->flags |= IEEE80211_KEY_FLAG_PUT_IV_SPACE; break; case WLAN_CIPHER_SUITE_AES_CMAC: + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: WARN_ON_ONCE(!ieee80211_hw_check(hw, MFP_CAPABLE)); break; case WLAN_CIPHER_SUITE_WEP40: @@ -2779,9 +2789,11 @@ static int iwl_mvm_mac_set_key(struct ieee80211_hw *hw, * GTK on AP interface is a TX-only key, return 0; * on IBSS they're per-station and because we're lazy * we don't support them for RX, so do the same. - * CMAC in AP/IBSS modes must be done in software. + * CMAC/GMAC in AP/IBSS modes must be done in software. */ - if (key->cipher == WLAN_CIPHER_SUITE_AES_CMAC) + if (key->cipher == WLAN_CIPHER_SUITE_AES_CMAC || + key->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_128 || + key->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_256) ret = -EOPNOTSUPP; else ret = 0; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index b4fc86d5d7ef..0b0855ae092e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -707,7 +707,7 @@ enum iwl_mvm_queue_status { }; #define IWL_MVM_DQA_QUEUE_TIMEOUT (5 * HZ) -#define IWL_MVM_NUM_CIPHERS 8 +#define IWL_MVM_NUM_CIPHERS 10 struct iwl_mvm { /* for logger access */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 3130b9c68a74..5960eb4fdf1f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -2412,9 +2412,15 @@ static int iwl_mvm_send_sta_igtk(struct iwl_mvm *mvm, struct iwl_mvm_mgmt_mcast_key_cmd igtk_cmd = {}; /* verify the key details match the required command's expectations */ - if (WARN_ON((keyconf->cipher != WLAN_CIPHER_SUITE_AES_CMAC) || - (keyconf->flags & IEEE80211_KEY_FLAG_PAIRWISE) || - (keyconf->keyidx != 4 && keyconf->keyidx != 5))) + if (WARN_ON((keyconf->flags & IEEE80211_KEY_FLAG_PAIRWISE) || + (keyconf->keyidx != 4 && keyconf->keyidx != 5) || + (keyconf->cipher != WLAN_CIPHER_SUITE_AES_CMAC && + keyconf->cipher != WLAN_CIPHER_SUITE_BIP_GMAC_128 && + keyconf->cipher != WLAN_CIPHER_SUITE_BIP_GMAC_256))) + return -EINVAL; + + if (WARN_ON(!iwl_mvm_has_new_rx_api(mvm) && + keyconf->cipher != WLAN_CIPHER_SUITE_AES_CMAC)) return -EINVAL; igtk_cmd.key_id = cpu_to_le32(keyconf->keyidx); @@ -2430,11 +2436,18 @@ static int iwl_mvm_send_sta_igtk(struct iwl_mvm *mvm, case WLAN_CIPHER_SUITE_AES_CMAC: igtk_cmd.ctrl_flags |= cpu_to_le32(STA_KEY_FLG_CCM); break; + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: + igtk_cmd.ctrl_flags |= cpu_to_le32(STA_KEY_FLG_GCMP); + break; default: return -EINVAL; } - memcpy(igtk_cmd.IGTK, keyconf->key, keyconf->keylen); + memcpy(igtk_cmd.igtk, keyconf->key, keyconf->keylen); + if (keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_256) + igtk_cmd.ctrl_flags |= + cpu_to_le32(STA_KEY_FLG_KEY_32BYTES); ieee80211_get_key_rx_seq(keyconf, 0, &seq); pn = seq.aes_cmac.pn; igtk_cmd.receive_seq_cnt = cpu_to_le64(((u64) pn[5] << 0) | @@ -2449,6 +2462,19 @@ static int iwl_mvm_send_sta_igtk(struct iwl_mvm *mvm, remove_key ? "removing" : "installing", igtk_cmd.sta_id); + if (!iwl_mvm_has_new_rx_api(mvm)) { + struct iwl_mvm_mgmt_mcast_key_cmd_v1 igtk_cmd_v1 = { + .ctrl_flags = igtk_cmd.ctrl_flags, + .key_id = igtk_cmd.key_id, + .sta_id = igtk_cmd.sta_id, + .receive_seq_cnt = igtk_cmd.receive_seq_cnt + }; + + memcpy(igtk_cmd_v1.igtk, igtk_cmd.igtk, + ARRAY_SIZE(igtk_cmd_v1.igtk)); + return iwl_mvm_send_cmd_pdu(mvm, MGMT_MCAST_KEY, 0, + sizeof(igtk_cmd_v1), &igtk_cmd_v1); + } return iwl_mvm_send_cmd_pdu(mvm, MGMT_MCAST_KEY, 0, sizeof(igtk_cmd), &igtk_cmd); } @@ -2573,7 +2599,9 @@ int iwl_mvm_set_sta_key(struct iwl_mvm *mvm, } sta_id = mvm_sta->sta_id; - if (keyconf->cipher == WLAN_CIPHER_SUITE_AES_CMAC) { + if (keyconf->cipher == WLAN_CIPHER_SUITE_AES_CMAC || + keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_128 || + keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_256) { ret = iwl_mvm_send_sta_igtk(mvm, keyconf, sta_id, false); goto end; } @@ -2659,7 +2687,9 @@ int iwl_mvm_remove_sta_key(struct iwl_mvm *mvm, IWL_DEBUG_WEP(mvm, "mvm remove dynamic key: idx=%d sta=%d\n", keyconf->keyidx, sta_id); - if (keyconf->cipher == WLAN_CIPHER_SUITE_AES_CMAC) + if (keyconf->cipher == WLAN_CIPHER_SUITE_AES_CMAC || + keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_128 || + keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_256) return iwl_mvm_send_sta_igtk(mvm, keyconf, sta_id, true); if (!__test_and_clear_bit(keyconf->hw_key_idx, mvm->fw_key_table)) { From d975d72016bb2540eff3018c3c0dd96688711748 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Mon, 4 Jul 2016 11:52:07 +0300 Subject: [PATCH 0547/1715] iwlwifi: mvm: support new paging command format For a000 devices there is a support of 64 bit DMA addressing. The paging command was changed accordingly - support it. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- .../net/wireless/intel/iwlwifi/mvm/fw-api.h | 8 ++++-- drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 28 +++++++++++++------ drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 6 ++++ 3 files changed, 32 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h index 71076f02796e..57b574b2a092 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h @@ -482,13 +482,17 @@ struct iwl_nvm_access_cmd { * @block_size: the block size in powers of 2 * @block_num: number of blocks specified in the command. * @device_phy_addr: virtual addresses from device side + * 32 bit address for API version 1, 64 bit address for API version 2. */ struct iwl_fw_paging_cmd { __le32 flags; __le32 block_size; __le32 block_num; - __le32 device_phy_addr[NUM_OF_FW_PAGING_BLOCKS]; -} __packed; /* FW_PAGING_BLOCK_CMD_API_S_VER_1 */ + union { + __le32 addr32[NUM_OF_FW_PAGING_BLOCKS]; + __le64 addr64[NUM_OF_FW_PAGING_BLOCKS]; + } device_phy_addr; +} __packed; /* FW_PAGING_BLOCK_CMD_API_S_VER_2 */ /* * Fw items ID's diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 7e0cdbf8bf74..47e8e70dcfac 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -385,9 +385,7 @@ static int iwl_save_fw_paging(struct iwl_mvm *mvm, /* send paging cmd to FW in case CPU2 has paging image */ static int iwl_send_paging_cmd(struct iwl_mvm *mvm, const struct fw_img *fw) { - int blk_idx; - __le32 dev_phy_addr; - struct iwl_fw_paging_cmd fw_paging_cmd = { + struct iwl_fw_paging_cmd paging_cmd = { .flags = cpu_to_le32(PAGING_CMD_IS_SECURED | PAGING_CMD_IS_ENABLED | @@ -396,18 +394,32 @@ static int iwl_send_paging_cmd(struct iwl_mvm *mvm, const struct fw_img *fw) .block_size = cpu_to_le32(BLOCK_2_EXP_SIZE), .block_num = cpu_to_le32(mvm->num_of_paging_blk), }; + int blk_idx, size = sizeof(paging_cmd); + + /* A bit hard coded - but this is the old API and will be deprecated */ + if (!iwl_mvm_has_new_tx_api(mvm)) + size -= NUM_OF_FW_PAGING_BLOCKS * 4; /* loop for for all paging blocks + CSS block */ for (blk_idx = 0; blk_idx < mvm->num_of_paging_blk + 1; blk_idx++) { - dev_phy_addr = - cpu_to_le32(mvm->fw_paging_db[blk_idx].fw_paging_phys >> - PAGE_2_EXP_SIZE); - fw_paging_cmd.device_phy_addr[blk_idx] = dev_phy_addr; + dma_addr_t addr = mvm->fw_paging_db[blk_idx].fw_paging_phys; + + addr = addr >> PAGE_2_EXP_SIZE; + + if (iwl_mvm_has_new_tx_api(mvm)) { + __le64 phy_addr = cpu_to_le64(addr); + + paging_cmd.device_phy_addr.addr64[blk_idx] = phy_addr; + } else { + __le32 phy_addr = cpu_to_le32(addr); + + paging_cmd.device_phy_addr.addr32[blk_idx] = phy_addr; + } } return iwl_mvm_send_cmd_pdu(mvm, iwl_cmd_id(FW_PAGING_BLOCK_CMD, IWL_ALWAYS_LONG_GROUP, 0), - 0, sizeof(fw_paging_cmd), &fw_paging_cmd); + 0, size, &paging_cmd); } /* diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 0b0855ae092e..28ebc12f1fe0 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -1192,6 +1192,12 @@ static inline bool iwl_mvm_has_new_rx_api(struct iwl_mvm *mvm) IWL_UCODE_TLV_CAPA_MULTI_QUEUE_RX_SUPPORT); } +static inline bool iwl_mvm_has_new_tx_api(struct iwl_mvm *mvm) +{ + /* TODO - replace with TLV once defined */ + return mvm->trans->cfg->use_tfh; +} + static inline bool iwl_mvm_is_tt_in_fw(struct iwl_mvm *mvm) { #ifdef CONFIG_THERMAL From 95a8d19f28e6b29377a880c6264391a62e07fccc Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 25 Aug 2016 15:33:29 +0200 Subject: [PATCH 0548/1715] netfilter: restart search if moved to other chain In case nf_conntrack_tuple_taken did not find a conflicting entry check that all entries in this hash slot were tested and restart in case an entry was moved to another chain. Reported-by: Eric Dumazet Fixes: ea781f197d6a ("netfilter: nf_conntrack: use SLAB_DESTROY_BY_RCU and get rid of call_rcu()") Signed-off-by: Florian Westphal Acked-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 7d90a5d15113..887926aefc72 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -809,6 +809,7 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, zone = nf_ct_zone(ignored_conntrack); rcu_read_lock(); + begin: nf_conntrack_get_ht(&ct_hash, &hsize); hash = __hash_conntrack(net, tuple, hsize); @@ -822,6 +823,12 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, } NF_CT_STAT_INC_ATOMIC(net, searched); } + + if (get_nulls_value(n) != hash) { + NF_CT_STAT_INC_ATOMIC(net, search_restart); + goto begin; + } + rcu_read_unlock(); return 0; From 616b14b46957b52dc7e1f3ec2210d3f9051b1178 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 25 Aug 2016 15:33:30 +0200 Subject: [PATCH 0549/1715] netfilter: don't rely on DYING bit to detect when destroy event was sent The reliable event delivery mode currently (ab)uses the DYING bit to detect which entries on the dying list have to be skipped when re-delivering events from the eache worker in reliable event mode. Currently when we delete the conntrack from main table we only set this bit if we could also deliver the netlink destroy event to userspace. If we fail we move it to the dying list, the ecache worker will reattempt event delivery for all confirmed conntracks on the dying list that do not have the DYING bit set. Once timer is gone, we can no longer use if (del_timer()) to detect when we 'stole' the reference count owned by the timer/hash entry, so we need some other way to avoid racing with other cpu. Pablo suggested to add a marker in the ecache extension that skips entries that have been unhashed from main table but are still waiting for the last reference count to be dropped (e.g. because one skb waiting on nfqueue verdict still holds a reference). We do this by adding a tristate. If we fail to deliver the destroy event, make a note of this in the eache extension. The worker can then skip all entries that are in a different state. Either they never delivered a destroy event, e.g. because the netlink backend was not loaded, or redelivery took place already. Once the conntrack timer is removed we will now be able to replace del_timer() test with test_and_set_bit(DYING, &ct->status) to avoid racing with other cpu that tries to evict the same conntrack. Because DYING will then be set right before we report the destroy event we can no longer skip event reporting when dying bit is set. Suggested-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal Acked-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_ecache.h | 17 +++++++++++----- net/netfilter/nf_conntrack_ecache.c | 22 +++++++++++++-------- 2 files changed, 26 insertions(+), 13 deletions(-) diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index fa36447371c6..12d967b58726 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -12,12 +12,19 @@ #include #include +enum nf_ct_ecache_state { + NFCT_ECACHE_UNKNOWN, /* destroy event not sent */ + NFCT_ECACHE_DESTROY_FAIL, /* tried but failed to send destroy event */ + NFCT_ECACHE_DESTROY_SENT, /* sent destroy event after failure */ +}; + struct nf_conntrack_ecache { - unsigned long cache; /* bitops want long */ - unsigned long missed; /* missed events */ - u16 ctmask; /* bitmask of ct events to be delivered */ - u16 expmask; /* bitmask of expect events to be delivered */ - u32 portid; /* netlink portid of destroyer */ + unsigned long cache; /* bitops want long */ + unsigned long missed; /* missed events */ + u16 ctmask; /* bitmask of ct events to be delivered */ + u16 expmask; /* bitmask of expect events to be delivered */ + u32 portid; /* netlink portid of destroyer */ + enum nf_ct_ecache_state state; /* ecache state */ }; static inline struct nf_conntrack_ecache * diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index d28011b42845..da9df2d56e66 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -49,8 +49,13 @@ static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu) hlist_nulls_for_each_entry(h, n, &pcpu->dying, hnnode) { struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); + struct nf_conntrack_ecache *e; - if (nf_ct_is_dying(ct)) + if (!nf_ct_is_confirmed(ct)) + continue; + + e = nf_ct_ecache_find(ct); + if (!e || e->state != NFCT_ECACHE_DESTROY_FAIL) continue; if (nf_conntrack_event(IPCT_DESTROY, ct)) { @@ -58,8 +63,7 @@ static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu) break; } - /* we've got the event delivered, now it's dying */ - set_bit(IPS_DYING_BIT, &ct->status); + e->state = NFCT_ECACHE_DESTROY_SENT; refs[evicted] = ct; if (++evicted >= ARRAY_SIZE(refs)) { @@ -130,7 +134,7 @@ int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct, if (!e) goto out_unlock; - if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct)) { + if (nf_ct_is_confirmed(ct)) { struct nf_ct_event item = { .ct = ct, .portid = e->portid ? e->portid : portid, @@ -150,11 +154,13 @@ int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct, * triggered by a process, we store the PORTID * to include it in the retransmission. */ - if (eventmask & (1 << IPCT_DESTROY) && - e->portid == 0 && portid != 0) - e->portid = portid; - else + if (eventmask & (1 << IPCT_DESTROY)) { + if (e->portid == 0 && portid != 0) + e->portid = portid; + e->state = NFCT_ECACHE_DESTROY_FAIL; + } else { e->missed |= eventmask; + } } else { e->missed &= ~missed; } From f330a7fdbe1611104622faff7e614a246a7d20f0 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 25 Aug 2016 15:33:31 +0200 Subject: [PATCH 0550/1715] netfilter: conntrack: get rid of conntrack timer With stats enabled this eats 80 bytes on x86_64 per nf_conn entry, as Eric Dumazet pointed out during netfilter workshop 2016. Eric also says: "Another reason was the fact that Thomas was about to change max timer range [..]" (500462a9de657f8, 'timers: Switch to a non-cascading wheel'). Remove the timer and use a 32bit jiffies value containing timestamp until entry is valid. During conntrack lookup, even before doing tuple comparision, check the timeout value and evict the entry in case it is too old. The dying bit is used as a synchronization point to avoid races where multiple cpus try to evict the same entry. Because lookup is always lockless, we need to bump the refcnt once when we evict, else we could try to evict already-dead entry that is being recycled. This is the standard/expected way when conntrack entries are destroyed. Followup patches will introduce garbage colliction via work queue and further places where we can reap obsoleted entries (e.g. during netlink dumps), this is needed to avoid expired conntracks from hanging around for too long when lookup rate is low after a busy period. Signed-off-by: Florian Westphal Acked-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 23 +++++-- net/netfilter/nf_conntrack_core.c | 91 +++++++++++++++------------- net/netfilter/nf_conntrack_netlink.c | 14 ++--- net/netfilter/nf_conntrack_pptp.c | 3 +- net/netfilter/nf_nat_core.c | 6 -- 5 files changed, 74 insertions(+), 63 deletions(-) diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 2a127480d4cc..7277751128e8 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -42,7 +42,6 @@ union nf_conntrack_expect_proto { #include #include -#include #ifdef CONFIG_NETFILTER_DEBUG #define NF_CT_ASSERT(x) WARN_ON(!(x)) @@ -73,7 +72,7 @@ struct nf_conn_help { #include struct nf_conn { - /* Usage count in here is 1 for hash table/destruct timer, 1 per skb, + /* Usage count in here is 1 for hash table, 1 per skb, * plus 1 for any connection(s) we are `master' for * * Hint, SKB address this struct and refcnt via skb->nfct and @@ -96,8 +95,8 @@ struct nf_conn { /* Have we seen traffic both ways yet? (bitset) */ unsigned long status; - /* Timer function; drops refcnt when it goes off. */ - struct timer_list timeout; + /* jiffies32 when this ct is considered dead */ + u32 timeout; possible_net_t ct_net; @@ -291,14 +290,28 @@ static inline bool nf_is_loopback_packet(const struct sk_buff *skb) return skb->dev && skb->skb_iif && skb->dev->flags & IFF_LOOPBACK; } +#define nfct_time_stamp ((u32)(jiffies)) + /* jiffies until ct expires, 0 if already expired */ static inline unsigned long nf_ct_expires(const struct nf_conn *ct) { - long timeout = (long)ct->timeout.expires - (long)jiffies; + s32 timeout = ct->timeout - nfct_time_stamp; return timeout > 0 ? timeout : 0; } +static inline bool nf_ct_is_expired(const struct nf_conn *ct) +{ + return (__s32)(ct->timeout - nfct_time_stamp) <= 0; +} + +/* use after obtaining a reference count */ +static inline bool nf_ct_should_gc(const struct nf_conn *ct) +{ + return nf_ct_is_expired(ct) && nf_ct_is_confirmed(ct) && + !nf_ct_is_dying(ct); +} + struct kernel_param; int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 887926aefc72..87ee6dad777c 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -371,7 +371,6 @@ destroy_conntrack(struct nf_conntrack *nfct) pr_debug("destroy_conntrack(%p)\n", ct); NF_CT_ASSERT(atomic_read(&nfct->use) == 0); - NF_CT_ASSERT(!timer_pending(&ct->timeout)); if (unlikely(nf_ct_is_template(ct))) { nf_ct_tmpl_free(ct); @@ -434,35 +433,30 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report) { struct nf_conn_tstamp *tstamp; + if (test_and_set_bit(IPS_DYING_BIT, &ct->status)) + return false; + tstamp = nf_conn_tstamp_find(ct); if (tstamp && tstamp->stop == 0) tstamp->stop = ktime_get_real_ns(); - if (nf_ct_is_dying(ct)) - goto delete; - if (nf_conntrack_event_report(IPCT_DESTROY, ct, portid, report) < 0) { - /* destroy event was not delivered */ + /* destroy event was not delivered. nf_ct_put will + * be done by event cache worker on redelivery. + */ nf_ct_delete_from_lists(ct); nf_conntrack_ecache_delayed_work(nf_ct_net(ct)); return false; } nf_conntrack_ecache_work(nf_ct_net(ct)); - set_bit(IPS_DYING_BIT, &ct->status); - delete: nf_ct_delete_from_lists(ct); nf_ct_put(ct); return true; } EXPORT_SYMBOL_GPL(nf_ct_delete); -static void death_by_timeout(unsigned long ul_conntrack) -{ - nf_ct_delete((struct nf_conn *)ul_conntrack, 0, 0); -} - static inline bool nf_ct_key_equal(struct nf_conntrack_tuple_hash *h, const struct nf_conntrack_tuple *tuple, @@ -480,6 +474,18 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h, net_eq(net, nf_ct_net(ct)); } +/* caller must hold rcu readlock and none of the nf_conntrack_locks */ +static void nf_ct_gc_expired(struct nf_conn *ct) +{ + if (!atomic_inc_not_zero(&ct->ct_general.use)) + return; + + if (nf_ct_should_gc(ct)) + nf_ct_kill(ct); + + nf_ct_put(ct); +} + /* * Warning : * - Caller must take a reference on returned object @@ -499,6 +505,17 @@ begin: bucket = reciprocal_scale(hash, hsize); hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[bucket], hnnode) { + struct nf_conn *ct; + + ct = nf_ct_tuplehash_to_ctrack(h); + if (nf_ct_is_expired(ct)) { + nf_ct_gc_expired(ct); + continue; + } + + if (nf_ct_is_dying(ct)) + continue; + if (nf_ct_key_equal(h, tuple, zone, net)) { NF_CT_STAT_INC_ATOMIC(net, found); return h; @@ -597,7 +614,6 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct) zone, net)) goto out; - add_timer(&ct->timeout); smp_wmb(); /* The caller holds a reference to this object */ atomic_set(&ct->ct_general.use, 2); @@ -750,8 +766,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) /* Timer relative to confirmation time, not original setting time, otherwise we'd get timer wrap in weird delay cases. */ - ct->timeout.expires += jiffies; - add_timer(&ct->timeout); + ct->timeout += nfct_time_stamp; atomic_inc(&ct->ct_general.use); ct->status |= IPS_CONFIRMED; @@ -815,8 +830,16 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[hash], hnnode) { ct = nf_ct_tuplehash_to_ctrack(h); - if (ct != ignored_conntrack && - nf_ct_key_equal(h, tuple, zone, net)) { + + if (ct == ignored_conntrack) + continue; + + if (nf_ct_is_expired(ct)) { + nf_ct_gc_expired(ct); + continue; + } + + if (nf_ct_key_equal(h, tuple, zone, net)) { NF_CT_STAT_INC_ATOMIC(net, found); rcu_read_unlock(); return 1; @@ -850,6 +873,11 @@ static unsigned int early_drop_list(struct net *net, hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) { tmp = nf_ct_tuplehash_to_ctrack(h); + if (nf_ct_is_expired(tmp)) { + nf_ct_gc_expired(tmp); + continue; + } + if (test_bit(IPS_ASSURED_BIT, &tmp->status) || !net_eq(nf_ct_net(tmp), net) || nf_ct_is_dying(tmp)) @@ -867,7 +895,6 @@ static unsigned int early_drop_list(struct net *net, */ if (net_eq(nf_ct_net(tmp), net) && nf_ct_is_confirmed(tmp) && - del_timer(&tmp->timeout) && nf_ct_delete(tmp, 0, 0)) drops++; @@ -937,8 +964,6 @@ __nf_conntrack_alloc(struct net *net, /* save hash for reusing when confirming */ *(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash; ct->status = 0; - /* Don't set timer yet: wait for confirmation */ - setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct); write_pnet(&ct->ct_net, net); memset(&ct->__nfct_init_offset[0], 0, offsetof(struct nf_conn, proto) - @@ -1312,7 +1337,6 @@ void __nf_ct_refresh_acct(struct nf_conn *ct, unsigned long extra_jiffies, int do_acct) { - NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct); NF_CT_ASSERT(skb); /* Only update if this is not a fixed timeout */ @@ -1320,18 +1344,10 @@ void __nf_ct_refresh_acct(struct nf_conn *ct, goto acct; /* If not in hash table, timer will not be active yet */ - if (!nf_ct_is_confirmed(ct)) { - ct->timeout.expires = extra_jiffies; - } else { - unsigned long newtime = jiffies + extra_jiffies; - - /* Only update the timeout if the new timeout is at least - HZ jiffies from the old timeout. Need del_timer for race - avoidance (may already be dying). */ - if (newtime - ct->timeout.expires >= HZ) - mod_timer_pending(&ct->timeout, newtime); - } + if (nf_ct_is_confirmed(ct)) + extra_jiffies += nfct_time_stamp; + ct->timeout = extra_jiffies; acct: if (do_acct) nf_ct_acct_update(ct, ctinfo, skb->len); @@ -1346,11 +1362,7 @@ bool __nf_ct_kill_acct(struct nf_conn *ct, if (do_acct) nf_ct_acct_update(ct, ctinfo, skb->len); - if (del_timer(&ct->timeout)) { - ct->timeout.function((unsigned long)ct); - return true; - } - return false; + return nf_ct_delete(ct, 0, 0); } EXPORT_SYMBOL_GPL(__nf_ct_kill_acct); @@ -1485,11 +1497,8 @@ void nf_ct_iterate_cleanup(struct net *net, while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) { /* Time to push up daises... */ - if (del_timer(&ct->timeout)) - nf_ct_delete(ct, portid, report); - - /* ... else the timer will get him soon. */ + nf_ct_delete(ct, portid, report); nf_ct_put(ct); cond_resched(); } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 68800c10a320..81fd34ce0a57 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1144,9 +1144,7 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl, } } - if (del_timer(&ct->timeout)) - nf_ct_delete(ct, NETLINK_CB(skb).portid, nlmsg_report(nlh)); - + nf_ct_delete(ct, NETLINK_CB(skb).portid, nlmsg_report(nlh)); nf_ct_put(ct); return 0; @@ -1514,11 +1512,10 @@ static int ctnetlink_change_timeout(struct nf_conn *ct, { u_int32_t timeout = ntohl(nla_get_be32(cda[CTA_TIMEOUT])); - if (!del_timer(&ct->timeout)) - return -ETIME; + ct->timeout = nfct_time_stamp + timeout * HZ; - ct->timeout.expires = jiffies + timeout * HZ; - add_timer(&ct->timeout); + if (test_bit(IPS_DYING_BIT, &ct->status)) + return -ETIME; return 0; } @@ -1716,9 +1713,8 @@ ctnetlink_create_conntrack(struct net *net, if (!cda[CTA_TIMEOUT]) goto err1; - ct->timeout.expires = ntohl(nla_get_be32(cda[CTA_TIMEOUT])); - ct->timeout.expires = jiffies + ct->timeout.expires * HZ; + ct->timeout = nfct_time_stamp + ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ; rcu_read_lock(); if (cda[CTA_HELP]) { diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 5588c7ae1ac2..f60a4755d71e 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -157,8 +157,7 @@ static int destroy_sibling_or_exp(struct net *net, struct nf_conn *ct, pr_debug("setting timeout of conntrack %p to 0\n", sibling); sibling->proto.gre.timeout = 0; sibling->proto.gre.stream_timeout = 0; - if (del_timer(&sibling->timeout)) - sibling->timeout.function((unsigned long)sibling); + nf_ct_kill(sibling); nf_ct_put(sibling); return 1; } else { diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index de31818417b8..81ae41f85d3a 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -565,16 +565,10 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data) * Else, when the conntrack is destoyed, nf_nat_cleanup_conntrack() * will delete entry from already-freed table. */ - if (!del_timer(&ct->timeout)) - return 1; - ct->status &= ~IPS_NAT_DONE_MASK; - rhashtable_remove_fast(&nf_nat_bysource_table, &ct->nat_bysource, nf_nat_bysource_params); - add_timer(&ct->timeout); - /* don't delete conntrack. Although that would make things a lot * simpler, we'd end up flushing all conntracks on nat rmmod. */ From 2344d64ec71661f7201082cf727a7154f662e249 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 25 Aug 2016 15:33:32 +0200 Subject: [PATCH 0551/1715] netfilter: evict stale entries on netlink dumps When dumping we already have to look at the entire table, so we might as well toss those entries whose timeout value is in the past. We also look at every entry during resize operations. However, eviction there is not as simple because we hold the global resize lock so we can't evict without adding a 'expired' list to drop from later. Considering that resizes are very rare it doesn't seem worth doing it. Signed-off-by: Florian Westphal Acked-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 81fd34ce0a57..dedbe4b33f38 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -815,14 +815,23 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) struct hlist_nulls_node *n; struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); u_int8_t l3proto = nfmsg->nfgen_family; - int res; + struct nf_conn *nf_ct_evict[8]; + int res, i; spinlock_t *lockp; last = (struct nf_conn *)cb->args[1]; + i = 0; local_bh_disable(); for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) { restart: + while (i) { + i--; + if (nf_ct_should_gc(nf_ct_evict[i])) + nf_ct_kill(nf_ct_evict[i]); + nf_ct_put(nf_ct_evict[i]); + } + lockp = &nf_conntrack_locks[cb->args[0] % CONNTRACK_LOCKS]; nf_conntrack_lock(lockp); if (cb->args[0] >= nf_conntrack_htable_size) { @@ -834,6 +843,13 @@ restart: if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) continue; ct = nf_ct_tuplehash_to_ctrack(h); + if (nf_ct_is_expired(ct)) { + if (i < ARRAY_SIZE(nf_ct_evict) && + atomic_inc_not_zero(&ct->ct_general.use)) + nf_ct_evict[i++] = ct; + continue; + } + if (!net_eq(net, nf_ct_net(ct))) continue; @@ -875,6 +891,13 @@ out: if (last) nf_ct_put(last); + while (i) { + i--; + if (nf_ct_should_gc(nf_ct_evict[i])) + nf_ct_kill(nf_ct_evict[i]); + nf_ct_put(nf_ct_evict[i]); + } + return skb->len; } From b87a2f9199ea82eaadca5bcc596b62cb8214cc0a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 25 Aug 2016 15:33:33 +0200 Subject: [PATCH 0552/1715] netfilter: conntrack: add gc worker to remove timed-out entries Conntrack gc worker to evict stale entries. GC happens once every 5 seconds, but we only scan at most 1/64th of the table (and not more than 8k) buckets to avoid hogging cpu. This means that a complete scan of the table will take several minutes of wall-clock time. Considering that the gc run will never have to evict any entries during normal operation because those will happen from packet path this should be fine. We only need gc to make sure userspace (conntrack event listeners) eventually learn of the timeout, and for resource reclaim in case the system becomes idle. We do not disable BH and cond_resched for every bucket so this should not introduce noticeable latencies either. A followup patch will add a small change to speed up GC for the extreme case where most entries are timed out on an otherwise idle system. v2: Use cond_resched_rcu_qs & add comment wrt. missing restart on nulls value change in gc worker, suggested by Eric Dumazet. v3: don't call cancel_delayed_work_sync twice (again, Eric). Signed-off-by: Florian Westphal Acked-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 76 +++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 87ee6dad777c..f95a9e999f09 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -72,11 +72,24 @@ EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock); struct hlist_nulls_head *nf_conntrack_hash __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_hash); +struct conntrack_gc_work { + struct delayed_work dwork; + u32 last_bucket; + bool exiting; +}; + static __read_mostly struct kmem_cache *nf_conntrack_cachep; static __read_mostly spinlock_t nf_conntrack_locks_all_lock; static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock); static __read_mostly bool nf_conntrack_locks_all; +#define GC_MAX_BUCKETS_DIV 64u +#define GC_MAX_BUCKETS 8192u +#define GC_INTERVAL (5 * HZ) +#define GC_MAX_EVICTS 256u + +static struct conntrack_gc_work conntrack_gc_work; + void nf_conntrack_lock(spinlock_t *lock) __acquires(lock) { spin_lock(lock); @@ -928,6 +941,63 @@ static noinline int early_drop(struct net *net, unsigned int _hash) return false; } +static void gc_worker(struct work_struct *work) +{ + unsigned int i, goal, buckets = 0, expired_count = 0; + unsigned long next_run = GC_INTERVAL; + struct conntrack_gc_work *gc_work; + + gc_work = container_of(work, struct conntrack_gc_work, dwork.work); + + goal = min(nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV, GC_MAX_BUCKETS); + i = gc_work->last_bucket; + + do { + struct nf_conntrack_tuple_hash *h; + struct hlist_nulls_head *ct_hash; + struct hlist_nulls_node *n; + unsigned int hashsz; + struct nf_conn *tmp; + + i++; + rcu_read_lock(); + + nf_conntrack_get_ht(&ct_hash, &hashsz); + if (i >= hashsz) + i = 0; + + hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) { + tmp = nf_ct_tuplehash_to_ctrack(h); + + if (nf_ct_is_expired(tmp)) { + nf_ct_gc_expired(tmp); + expired_count++; + continue; + } + } + + /* could check get_nulls_value() here and restart if ct + * was moved to another chain. But given gc is best-effort + * we will just continue with next hash slot. + */ + rcu_read_unlock(); + cond_resched_rcu_qs(); + } while (++buckets < goal && + expired_count < GC_MAX_EVICTS); + + if (gc_work->exiting) + return; + + gc_work->last_bucket = i; + schedule_delayed_work(&gc_work->dwork, next_run); +} + +static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work) +{ + INIT_DELAYED_WORK(&gc_work->dwork, gc_worker); + gc_work->exiting = false; +} + static struct nf_conn * __nf_conntrack_alloc(struct net *net, const struct nf_conntrack_zone *zone, @@ -1534,6 +1604,7 @@ static int untrack_refs(void) void nf_conntrack_cleanup_start(void) { + conntrack_gc_work.exiting = true; RCU_INIT_POINTER(ip_ct_attach, NULL); } @@ -1543,6 +1614,7 @@ void nf_conntrack_cleanup_end(void) while (untrack_refs() > 0) schedule(); + cancel_delayed_work_sync(&conntrack_gc_work.dwork); nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size); nf_conntrack_proto_fini(); @@ -1817,6 +1889,10 @@ int nf_conntrack_init_start(void) } /* - and look it like as a confirmed connection */ nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED); + + conntrack_gc_work_init(&conntrack_gc_work); + schedule_delayed_work(&conntrack_gc_work.dwork, GC_INTERVAL); + return 0; err_proto: From c023c0e4a0eb7f0dfebe29b41801bf7373bf4f1d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 25 Aug 2016 15:33:34 +0200 Subject: [PATCH 0553/1715] netfilter: conntrack: resched gc again if eviction rate is high If we evicted a large fraction of the scanned conntrack entries re-schedule the next gc cycle for immediate execution. This triggers during tests where load is high, then drops to zero and many connections will be in TW/CLOSE state with < 30 second timeouts. Without this change it will take several minutes until conntrack count comes back to normal. Signed-off-by: Florian Westphal Acked-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index f95a9e999f09..7c66ce401ce9 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -945,6 +945,7 @@ static void gc_worker(struct work_struct *work) { unsigned int i, goal, buckets = 0, expired_count = 0; unsigned long next_run = GC_INTERVAL; + unsigned int ratio, scanned = 0; struct conntrack_gc_work *gc_work; gc_work = container_of(work, struct conntrack_gc_work, dwork.work); @@ -969,6 +970,7 @@ static void gc_worker(struct work_struct *work) hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) { tmp = nf_ct_tuplehash_to_ctrack(h); + scanned++; if (nf_ct_is_expired(tmp)) { nf_ct_gc_expired(tmp); expired_count++; @@ -988,6 +990,10 @@ static void gc_worker(struct work_struct *work) if (gc_work->exiting) return; + ratio = scanned ? expired_count * 100 / scanned : 0; + if (ratio >= 90) + next_run = 0; + gc_work->last_bucket = i; schedule_delayed_work(&gc_work->dwork, next_run); } From ad66713f5a20034b3b2a0cbc184319b2ede93f11 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 25 Aug 2016 15:33:35 +0200 Subject: [PATCH 0554/1715] netfilter: remove __nf_ct_kill_acct helper After timer removal this just calls nf_ct_delete so remove the __ prefix version and make nf_ct_kill a shorthand for nf_ct_delete. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 13 +++---------- net/netfilter/nf_conntrack_core.c | 12 +++++------- 2 files changed, 8 insertions(+), 17 deletions(-) diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 7277751128e8..50418052a520 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -219,21 +219,14 @@ static inline void nf_ct_refresh(struct nf_conn *ct, __nf_ct_refresh_acct(ct, 0, skb, extra_jiffies, 0); } -bool __nf_ct_kill_acct(struct nf_conn *ct, enum ip_conntrack_info ctinfo, - const struct sk_buff *skb, int do_acct); - /* kill conntrack and do accounting */ -static inline bool nf_ct_kill_acct(struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - const struct sk_buff *skb) -{ - return __nf_ct_kill_acct(ct, ctinfo, skb, 1); -} +bool nf_ct_kill_acct(struct nf_conn *ct, enum ip_conntrack_info ctinfo, + const struct sk_buff *skb); /* kill conntrack without accounting */ static inline bool nf_ct_kill(struct nf_conn *ct) { - return __nf_ct_kill_acct(ct, 0, NULL, 0); + return nf_ct_delete(ct, 0, 0); } /* These are for NAT. Icky. */ diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 7c66ce401ce9..ac1db4019d5c 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1430,17 +1430,15 @@ acct: } EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct); -bool __nf_ct_kill_acct(struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - const struct sk_buff *skb, - int do_acct) +bool nf_ct_kill_acct(struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + const struct sk_buff *skb) { - if (do_acct) - nf_ct_acct_update(ct, ctinfo, skb->len); + nf_ct_acct_update(ct, ctinfo, skb->len); return nf_ct_delete(ct, 0, 0); } -EXPORT_SYMBOL_GPL(__nf_ct_kill_acct); +EXPORT_SYMBOL_GPL(nf_ct_kill_acct); #if IS_ENABLED(CONFIG_NF_CT_NETLINK) From 3cb27991aad74299af2cd55a9c12684f5b0c76c6 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Sun, 28 Aug 2016 21:41:22 +0800 Subject: [PATCH 0555/1715] netfilter: log_arp: Use ARPHRD_ETHER instead of literal '1' There is one macro ARPHRD_ETHER which defines the ethernet proto for ARP, so we could use it instead of the literal number '1'. Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_log_arp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c index e7ad950cf9ef..cf8f2d4e867a 100644 --- a/net/ipv4/netfilter/nf_log_arp.c +++ b/net/ipv4/netfilter/nf_log_arp.c @@ -62,7 +62,7 @@ static void dump_arp_packet(struct nf_log_buf *m, /* If it's for Ethernet and the lengths are OK, then log the ARP * payload. */ - if (ah->ar_hrd != htons(1) || + if (ah->ar_hrd != htons(ARPHRD_ETHER) || ah->ar_hln != ETH_ALEN || ah->ar_pln != sizeof(__be32)) return; From 779994fa3636d46848edb402fe7517968e036e6f Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Mon, 29 Aug 2016 18:25:28 +0800 Subject: [PATCH 0556/1715] netfilter: log: Check param to avoid overflow in nf_log_set The nf_log_set is an interface function, so it should do the strict sanity check of parameters. Convert the return value of nf_log_set as int instead of void. When the pf is invalid, return -EOPNOTSUPP. Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_log.h | 3 +-- net/bridge/netfilter/nf_log_bridge.c | 3 +-- net/ipv4/netfilter/nf_log_arp.c | 3 +-- net/ipv4/netfilter/nf_log_ipv4.c | 3 +-- net/ipv6/netfilter/nf_log_ipv6.c | 3 +-- net/netfilter/nf_log.c | 8 +++++--- 6 files changed, 10 insertions(+), 13 deletions(-) diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h index 83d855ba6af1..ee07dc8b0a7b 100644 --- a/include/net/netfilter/nf_log.h +++ b/include/net/netfilter/nf_log.h @@ -60,8 +60,7 @@ struct nf_logger { int nf_log_register(u_int8_t pf, struct nf_logger *logger); void nf_log_unregister(struct nf_logger *logger); -void nf_log_set(struct net *net, u_int8_t pf, - const struct nf_logger *logger); +int nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger); void nf_log_unset(struct net *net, const struct nf_logger *logger); int nf_log_bind_pf(struct net *net, u_int8_t pf, diff --git a/net/bridge/netfilter/nf_log_bridge.c b/net/bridge/netfilter/nf_log_bridge.c index 5d9953a90929..1663df598545 100644 --- a/net/bridge/netfilter/nf_log_bridge.c +++ b/net/bridge/netfilter/nf_log_bridge.c @@ -50,8 +50,7 @@ static struct nf_logger nf_bridge_logger __read_mostly = { static int __net_init nf_log_bridge_net_init(struct net *net) { - nf_log_set(net, NFPROTO_BRIDGE, &nf_bridge_logger); - return 0; + return nf_log_set(net, NFPROTO_BRIDGE, &nf_bridge_logger); } static void __net_exit nf_log_bridge_net_exit(struct net *net) diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c index cf8f2d4e867a..8945c2653814 100644 --- a/net/ipv4/netfilter/nf_log_arp.c +++ b/net/ipv4/netfilter/nf_log_arp.c @@ -111,8 +111,7 @@ static struct nf_logger nf_arp_logger __read_mostly = { static int __net_init nf_log_arp_net_init(struct net *net) { - nf_log_set(net, NFPROTO_ARP, &nf_arp_logger); - return 0; + return nf_log_set(net, NFPROTO_ARP, &nf_arp_logger); } static void __net_exit nf_log_arp_net_exit(struct net *net) diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c index 076aadda0473..20f225593a8b 100644 --- a/net/ipv4/netfilter/nf_log_ipv4.c +++ b/net/ipv4/netfilter/nf_log_ipv4.c @@ -347,8 +347,7 @@ static struct nf_logger nf_ip_logger __read_mostly = { static int __net_init nf_log_ipv4_net_init(struct net *net) { - nf_log_set(net, NFPROTO_IPV4, &nf_ip_logger); - return 0; + return nf_log_set(net, NFPROTO_IPV4, &nf_ip_logger); } static void __net_exit nf_log_ipv4_net_exit(struct net *net) diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c index 8dd869642f45..c1bcf699a23d 100644 --- a/net/ipv6/netfilter/nf_log_ipv6.c +++ b/net/ipv6/netfilter/nf_log_ipv6.c @@ -379,8 +379,7 @@ static struct nf_logger nf_ip6_logger __read_mostly = { static int __net_init nf_log_ipv6_net_init(struct net *net) { - nf_log_set(net, NFPROTO_IPV6, &nf_ip6_logger); - return 0; + return nf_log_set(net, NFPROTO_IPV6, &nf_ip6_logger); } static void __net_exit nf_log_ipv6_net_exit(struct net *net) diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index aa5847a16713..30a17d649a83 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -39,12 +39,12 @@ static struct nf_logger *__find_logger(int pf, const char *str_logger) return NULL; } -void nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger) +int nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger) { const struct nf_logger *log; - if (pf == NFPROTO_UNSPEC) - return; + if (pf == NFPROTO_UNSPEC || pf >= ARRAY_SIZE(net->nf.nf_loggers)) + return -EOPNOTSUPP; mutex_lock(&nf_log_mutex); log = nft_log_dereference(net->nf.nf_loggers[pf]); @@ -52,6 +52,8 @@ void nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger) rcu_assign_pointer(net->nf.nf_loggers[pf], logger); mutex_unlock(&nf_log_mutex); + + return 0; } EXPORT_SYMBOL(nf_log_set); From 9f9af3d7d303a5f622ceb219bd03bba3af553e76 Mon Sep 17 00:00:00 2001 From: Liad Kaufman Date: Wed, 23 Dec 2015 16:03:46 +0200 Subject: [PATCH 0557/1715] iwlwifi: mvm: re-aggregate shared queue after unsharing When a shared queue becomes unshared, aggregations should be re-enabled if they've existed before. Make sure that they do this, if required. Signed-off-by: Liad Kaufman Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 17 ++ drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 166 +++++++++++++++--- drivers/net/wireless/intel/iwlwifi/mvm/sta.h | 4 + drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 38 +++- .../net/wireless/intel/iwlwifi/mvm/utils.c | 8 +- 5 files changed, 205 insertions(+), 28 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 28ebc12f1fe0..ee5a9adbf025 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -697,6 +697,10 @@ struct iwl_mvm_baid_data { * it. In this state, when a new queue is needed to be allocated but no * such free queue exists, an inactive queue might be freed and given to * the new RA/TID. + * @IWL_MVM_QUEUE_RECONFIGURING: queue is being reconfigured + * This is the state of a queue that has had traffic pass through it, but + * needs to be reconfigured for some reason, e.g. the queue needs to + * become unshared and aggregations re-enabled on. */ enum iwl_mvm_queue_status { IWL_MVM_QUEUE_FREE, @@ -704,6 +708,7 @@ enum iwl_mvm_queue_status { IWL_MVM_QUEUE_READY, IWL_MVM_QUEUE_SHARED, IWL_MVM_QUEUE_INACTIVE, + IWL_MVM_QUEUE_RECONFIGURING, }; #define IWL_MVM_DQA_QUEUE_TIMEOUT (5 * HZ) @@ -1122,6 +1127,18 @@ static inline bool iwl_mvm_enter_d0i3_on_suspend(struct iwl_mvm *mvm) (mvm->trans->runtime_pm_mode != IWL_PLAT_PM_MODE_D0I3); } +static inline bool iwl_mvm_is_dqa_data_queue(struct iwl_mvm *mvm, u8 queue) +{ + return (queue >= IWL_MVM_DQA_MIN_DATA_QUEUE) && + (queue <= IWL_MVM_DQA_MAX_DATA_QUEUE); +} + +static inline bool iwl_mvm_is_dqa_mgmt_queue(struct iwl_mvm *mvm, u8 queue) +{ + return (queue >= IWL_MVM_DQA_MIN_MGMT_QUEUE) && + (queue <= IWL_MVM_DQA_MAX_MGMT_QUEUE); +} + static inline bool iwl_mvm_is_lar_supported(struct iwl_mvm *mvm) { bool nvm_lar = mvm->nvm_data->lar_enabled; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 5960eb4fdf1f..1f235e8d193a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -468,6 +468,11 @@ static int iwl_mvm_get_shared_queue(struct iwl_mvm *mvm, i != IWL_MVM_DQA_BSS_CLIENT_QUEUE) continue; + /* Don't try and take queues being reconfigured */ + if (mvm->queue_info[queue].status == + IWL_MVM_QUEUE_RECONFIGURING) + continue; + ac_to_queue[mvm->queue_info[i].mac80211_ac] = i; } @@ -501,27 +506,33 @@ static int iwl_mvm_get_shared_queue(struct iwl_mvm *mvm, queue = ac_to_queue[IEEE80211_AC_VO]; /* Make sure queue found (or not) is legal */ - if (!((queue >= IWL_MVM_DQA_MIN_MGMT_QUEUE && - queue <= IWL_MVM_DQA_MAX_MGMT_QUEUE) || - (queue >= IWL_MVM_DQA_MIN_DATA_QUEUE && - queue <= IWL_MVM_DQA_MAX_DATA_QUEUE) || - (queue == IWL_MVM_DQA_BSS_CLIENT_QUEUE))) { + if (!iwl_mvm_is_dqa_data_queue(mvm, queue) && + !iwl_mvm_is_dqa_mgmt_queue(mvm, queue) && + (queue != IWL_MVM_DQA_BSS_CLIENT_QUEUE)) { IWL_ERR(mvm, "No DATA queues available to share\n"); - queue = -ENOSPC; + return -ENOSPC; + } + + /* Make sure the queue isn't in the middle of being reconfigured */ + if (mvm->queue_info[queue].status == IWL_MVM_QUEUE_RECONFIGURING) { + IWL_ERR(mvm, + "TXQ %d is in the middle of re-config - try again\n", + queue); + return -EBUSY; } return queue; } /* - * If a given queue has a higher AC than the TID stream that is being added to - * it, the queue needs to be redirected to the lower AC. This function does that + * If a given queue has a higher AC than the TID stream that is being compared + * to, the queue needs to be redirected to the lower AC. This function does that * in such a case, otherwise - if no redirection required - it does nothing, * unless the %force param is true. */ -static int iwl_mvm_scd_queue_redirect(struct iwl_mvm *mvm, int queue, int tid, - int ac, int ssn, unsigned int wdg_timeout, - bool force) +int iwl_mvm_scd_queue_redirect(struct iwl_mvm *mvm, int queue, int tid, + int ac, int ssn, unsigned int wdg_timeout, + bool force) { struct iwl_scd_txq_cfg_cmd cmd = { .scd_queue = queue, @@ -555,7 +566,7 @@ static int iwl_mvm_scd_queue_redirect(struct iwl_mvm *mvm, int queue, int tid, shared_queue = (mvm->queue_info[queue].hw_queue_refcount > 1); spin_unlock_bh(&mvm->queue_info_lock); - IWL_DEBUG_TX_QUEUES(mvm, "Redirecting shared TXQ #%d to FIFO #%d\n", + IWL_DEBUG_TX_QUEUES(mvm, "Redirecting TXQ #%d to FIFO #%d\n", queue, iwl_mvm_ac_to_tx_fifo[ac]); /* Stop MAC queues and wait for this queue to empty */ @@ -709,7 +720,7 @@ static int iwl_mvm_sta_alloc_queue(struct iwl_mvm *mvm, if (WARN_ON(queue <= 0)) { IWL_ERR(mvm, "No available queues for tid %d on sta_id %d\n", tid, cfg.sta_id); - return -ENOSPC; + return queue; } /* @@ -827,6 +838,84 @@ out_err: return ret; } +static void iwl_mvm_unshare_queue(struct iwl_mvm *mvm, int queue) +{ + struct ieee80211_sta *sta; + struct iwl_mvm_sta *mvmsta; + s8 sta_id; + int tid = -1; + unsigned long tid_bitmap; + unsigned int wdg_timeout; + int ssn; + int ret = true; + + lockdep_assert_held(&mvm->mutex); + + spin_lock_bh(&mvm->queue_info_lock); + sta_id = mvm->queue_info[queue].ra_sta_id; + tid_bitmap = mvm->queue_info[queue].tid_bitmap; + spin_unlock_bh(&mvm->queue_info_lock); + + /* Find TID for queue, and make sure it is the only one on the queue */ + tid = find_first_bit(&tid_bitmap, IWL_MAX_TID_COUNT + 1); + if (tid_bitmap != BIT(tid)) { + IWL_ERR(mvm, "Failed to unshare q %d, active tids=0x%lx\n", + queue, tid_bitmap); + return; + } + + IWL_DEBUG_TX_QUEUES(mvm, "Unsharing TXQ %d, keeping tid %d\n", queue, + tid); + + sta = rcu_dereference_protected(mvm->fw_id_to_mac_id[sta_id], + lockdep_is_held(&mvm->mutex)); + + if (WARN_ON_ONCE(IS_ERR_OR_NULL(sta))) + return; + + mvmsta = iwl_mvm_sta_from_mac80211(sta); + wdg_timeout = iwl_mvm_get_wd_timeout(mvm, mvmsta->vif, false, false); + + ssn = IEEE80211_SEQ_TO_SN(mvmsta->tid_data[tid].seq_number); + + ret = iwl_mvm_scd_queue_redirect(mvm, queue, tid, + tid_to_mac80211_ac[tid], ssn, + wdg_timeout, true); + if (ret) { + IWL_ERR(mvm, "Failed to redirect TXQ %d\n", queue); + return; + } + + /* If aggs should be turned back on - do it */ + if (mvmsta->tid_data[tid].state == IWL_AGG_ON) { + struct iwl_mvm_add_sta_cmd cmd; + + mvmsta->tid_disable_agg &= ~BIT(tid); + + cmd.mac_id_n_color = cpu_to_le32(mvmsta->mac_id_n_color); + cmd.sta_id = mvmsta->sta_id; + cmd.add_modify = STA_MODE_MODIFY; + cmd.modify_mask = STA_MODIFY_TID_DISABLE_TX; + cmd.tfd_queue_msk = cpu_to_le32(mvmsta->tfd_queue_msk); + cmd.tid_disable_tx = cpu_to_le16(mvmsta->tid_disable_agg); + + ret = iwl_mvm_send_cmd_pdu(mvm, ADD_STA, CMD_ASYNC, + iwl_mvm_add_sta_cmd_size(mvm), &cmd); + if (!ret) { + IWL_DEBUG_TX_QUEUES(mvm, + "TXQ #%d is now aggregated again\n", + queue); + + /* Mark queue intenally as aggregating again */ + iwl_trans_txq_set_shared_mode(mvm->trans, queue, false); + } + } + + spin_lock_bh(&mvm->queue_info_lock); + mvm->queue_info[queue].status = IWL_MVM_QUEUE_READY; + spin_unlock_bh(&mvm->queue_info_lock); +} + static inline u8 iwl_mvm_tid_to_ac_queue(int tid) { if (tid == IWL_MAX_TID_COUNT) @@ -894,13 +983,26 @@ void iwl_mvm_add_new_dqa_stream_wk(struct work_struct *wk) struct ieee80211_sta *sta; struct iwl_mvm_sta *mvmsta; unsigned long deferred_tid_traffic; - int sta_id, tid; + int queue, sta_id, tid; /* Check inactivity of queues */ iwl_mvm_inactivity_check(mvm); mutex_lock(&mvm->mutex); + /* Reconfigure queues requiring reconfiguation */ + for (queue = 0; queue < IWL_MAX_HW_QUEUES; queue++) { + bool reconfig; + + spin_lock_bh(&mvm->queue_info_lock); + reconfig = (mvm->queue_info[queue].status == + IWL_MVM_QUEUE_RECONFIGURING); + spin_unlock_bh(&mvm->queue_info_lock); + + if (reconfig) + iwl_mvm_unshare_queue(mvm, queue); + } + /* Go over all stations with deferred traffic */ for_each_set_bit(sta_id, mvm->sta_deferred_frames, IWL_MVM_STATION_COUNT) { @@ -1956,7 +2058,7 @@ int iwl_mvm_sta_tx_agg_start(struct iwl_mvm *mvm, struct ieee80211_vif *vif, return -EIO; } - spin_lock_bh(&mvm->queue_info_lock); + spin_lock(&mvm->queue_info_lock); /* * Note the possible cases: @@ -1967,14 +2069,20 @@ int iwl_mvm_sta_tx_agg_start(struct iwl_mvm *mvm, struct ieee80211_vif *vif, * non-DQA mode, since the TXQ hasn't yet been allocated */ txq_id = mvmsta->tid_data[tid].txq_id; - if (!iwl_mvm_is_dqa_supported(mvm) || + if (iwl_mvm_is_dqa_supported(mvm) && + unlikely(mvm->queue_info[txq_id].status == IWL_MVM_QUEUE_SHARED)) { + ret = -ENXIO; + IWL_DEBUG_TX_QUEUES(mvm, + "Can't start tid %d agg on shared queue!\n", + tid); + goto release_locks; + } else if (!iwl_mvm_is_dqa_supported(mvm) || mvm->queue_info[txq_id].status != IWL_MVM_QUEUE_READY) { txq_id = iwl_mvm_find_free_queue(mvm, mvmsta->sta_id, mvm->first_agg_queue, mvm->last_agg_queue); if (txq_id < 0) { ret = txq_id; - spin_unlock_bh(&mvm->queue_info_lock); IWL_ERR(mvm, "Failed to allocate agg queue\n"); goto release_locks; } @@ -1982,7 +2090,8 @@ int iwl_mvm_sta_tx_agg_start(struct iwl_mvm *mvm, struct ieee80211_vif *vif, /* TXQ hasn't yet been enabled, so mark it only as reserved */ mvm->queue_info[txq_id].status = IWL_MVM_QUEUE_RESERVED; } - spin_unlock_bh(&mvm->queue_info_lock); + + spin_unlock(&mvm->queue_info_lock); IWL_DEBUG_TX_QUEUES(mvm, "AGG for tid %d will be on queue #%d\n", @@ -2006,8 +2115,11 @@ int iwl_mvm_sta_tx_agg_start(struct iwl_mvm *mvm, struct ieee80211_vif *vif, } ret = 0; + goto out; release_locks: + spin_unlock(&mvm->queue_info_lock); +out: spin_unlock_bh(&mvmsta->lock); return ret; @@ -2023,6 +2135,7 @@ int iwl_mvm_sta_tx_agg_oper(struct iwl_mvm *mvm, struct ieee80211_vif *vif, iwl_mvm_get_wd_timeout(mvm, vif, sta->tdls, false); int queue, ret; bool alloc_queue = true; + enum iwl_mvm_queue_status queue_status; u16 ssn; struct iwl_trans_txq_scd_cfg cfg = { @@ -2048,13 +2161,15 @@ int iwl_mvm_sta_tx_agg_oper(struct iwl_mvm *mvm, struct ieee80211_vif *vif, cfg.fifo = iwl_mvm_ac_to_tx_fifo[tid_to_mac80211_ac[tid]]; + spin_lock_bh(&mvm->queue_info_lock); + queue_status = mvm->queue_info[queue].status; + spin_unlock_bh(&mvm->queue_info_lock); + /* In DQA mode, the existing queue might need to be reconfigured */ if (iwl_mvm_is_dqa_supported(mvm)) { - spin_lock_bh(&mvm->queue_info_lock); /* Maybe there is no need to even alloc a queue... */ if (mvm->queue_info[queue].status == IWL_MVM_QUEUE_READY) alloc_queue = false; - spin_unlock_bh(&mvm->queue_info_lock); /* * Only reconfig the SCD for the queue if the window size has @@ -2089,9 +2204,12 @@ int iwl_mvm_sta_tx_agg_oper(struct iwl_mvm *mvm, struct ieee80211_vif *vif, vif->hw_queue[tid_to_mac80211_ac[tid]], ssn, &cfg, wdg_timeout); - ret = iwl_mvm_sta_tx_agg(mvm, sta, tid, queue, true); - if (ret) - return -EIO; + /* Send ADD_STA command to enable aggs only if the queue isn't shared */ + if (queue_status != IWL_MVM_QUEUE_SHARED) { + ret = iwl_mvm_sta_tx_agg(mvm, sta, tid, queue, true); + if (ret) + return -EIO; + } /* No need to mark as reserved */ spin_lock_bh(&mvm->queue_info_lock); @@ -2123,7 +2241,6 @@ int iwl_mvm_sta_tx_agg_stop(struct iwl_mvm *mvm, struct ieee80211_vif *vif, u16 txq_id; int err; - /* * If mac80211 is cleaning its state, then say that we finished since * our state has been cleared anyway. @@ -2152,6 +2269,7 @@ int iwl_mvm_sta_tx_agg_stop(struct iwl_mvm *mvm, struct ieee80211_vif *vif, */ if (mvm->queue_info[txq_id].status == IWL_MVM_QUEUE_RESERVED) mvm->queue_info[txq_id].status = IWL_MVM_QUEUE_FREE; + spin_unlock_bh(&mvm->queue_info_lock); switch (tid_data->state) { diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h index bbc1cab2c3bf..709542bbfce5 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h @@ -554,4 +554,8 @@ void iwl_mvm_modify_all_sta_disable_tx(struct iwl_mvm *mvm, void iwl_mvm_csa_client_absent(struct iwl_mvm *mvm, struct ieee80211_vif *vif); void iwl_mvm_add_new_dqa_stream_wk(struct work_struct *wk); +int iwl_mvm_scd_queue_redirect(struct iwl_mvm *mvm, int queue, int tid, + int ac, int ssn, unsigned int wdg_timeout, + bool force); + #endif /* __sta_h__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index c6585ab48df3..8b91544e6220 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -838,6 +838,22 @@ static void iwl_mvm_tx_add_stream(struct iwl_mvm *mvm, } } +/* Check if there are any timed-out TIDs on a given shared TXQ */ +static bool iwl_mvm_txq_should_update(struct iwl_mvm *mvm, int txq_id) +{ + unsigned long queue_tid_bitmap = mvm->queue_info[txq_id].tid_bitmap; + unsigned long now = jiffies; + int tid; + + for_each_set_bit(tid, &queue_tid_bitmap, IWL_MAX_TID_COUNT + 1) { + if (time_before(mvm->queue_info[txq_id].last_frame_time[tid] + + IWL_MVM_DQA_QUEUE_TIMEOUT, now)) + return true; + } + + return false; +} + /* * Sets the fields in the Tx cmd that are crypto related */ @@ -940,7 +956,6 @@ static int iwl_mvm_tx_mpdu(struct iwl_mvm *mvm, struct sk_buff *skb, iwl_trans_free_tx_cmd(mvm->trans, dev_cmd); spin_unlock(&mvmsta->lock); return 0; - } /* If we are here - TXQ exists and needs to be re-activated */ @@ -953,8 +968,25 @@ static int iwl_mvm_tx_mpdu(struct iwl_mvm *mvm, struct sk_buff *skb, txq_id); } - /* Keep track of the time of the last frame for this RA/TID */ - mvm->queue_info[txq_id].last_frame_time[tid] = jiffies; + if (iwl_mvm_is_dqa_supported(mvm)) { + /* Keep track of the time of the last frame for this RA/TID */ + mvm->queue_info[txq_id].last_frame_time[tid] = jiffies; + + /* + * If we have timed-out TIDs - schedule the worker that will + * reconfig the queues and update them + * + * Note that the mvm->queue_info_lock isn't being taken here in + * order to not serialize the TX flow. This isn't dangerous + * because scheduling mvm->add_stream_wk can't ruin the state, + * and if we DON'T schedule it due to some race condition then + * next TX we get here we will. + */ + if (unlikely(mvm->queue_info[txq_id].status == + IWL_MVM_QUEUE_SHARED && + iwl_mvm_txq_should_update(mvm, txq_id))) + schedule_work(&mvm->add_stream_wk); + } IWL_DEBUG_TX(mvm, "TX to [%d|%d] Q:%d - seq: 0x%x\n", mvmsta->sta_id, tid, txq_id, IEEE80211_SEQ_TO_SN(seq_number)); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c index 68f4e7fdfc11..dae64a67a531 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c @@ -1131,7 +1131,13 @@ static void iwl_mvm_remove_inactive_tids(struct iwl_mvm *mvm, BIT(mvmsta->vif->hw_queue[tid_to_mac80211_ac[tid]]); } - /* TODO: if queue was shared - need to re-enable AGGs */ + /* If the queue is marked as shared - "unshare" it */ + if (mvm->queue_info[queue].hw_queue_refcount == 1 && + mvm->queue_info[queue].status == IWL_MVM_QUEUE_SHARED) { + mvm->queue_info[queue].status = IWL_MVM_QUEUE_RECONFIGURING; + IWL_DEBUG_TX_QUEUES(mvm, "Marking Q:%d for reconfig\n", + queue); + } } void iwl_mvm_inactivity_check(struct iwl_mvm *mvm) From edbe961cf44eed1d3b78f3b1eee0dad013ad927f Mon Sep 17 00:00:00 2001 From: Liad Kaufman Date: Tue, 2 Feb 2016 15:43:32 +0200 Subject: [PATCH 0558/1715] iwlwifi: mvm: keep track of tid associated with each queue When sending the SCD_QUEUE_CONFIG command, the queue is associated to a specific TID. If later there is a need to use this TID on a different queue instead, it first needs to be unassociated from the first queue. Keep track for every queue what TID is associated with it. Signed-off-by: Liad Kaufman Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 1 + drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 7 +++++++ drivers/net/wireless/intel/iwlwifi/mvm/utils.c | 3 +++ 3 files changed, 11 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index ee5a9adbf025..1806495aab57 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -772,6 +772,7 @@ struct iwl_mvm { u8 ra_sta_id; /* The RA this queue is mapped to, if exists */ bool reserved; /* Is this the TXQ reserved for a STA */ u8 mac80211_ac; /* The mac80211 AC this queue is mapped to */ + u8 txq_tid; /* The TID "owner" of this queue*/ u16 tid_bitmap; /* Bitmap of the TIDs mapped to this queue */ /* Timestamp for inactivation per TID of this queue */ unsigned long last_frame_time[IWL_MAX_TID_COUNT + 1]; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 1f235e8d193a..730ba78955f2 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -562,6 +562,7 @@ int iwl_mvm_scd_queue_redirect(struct iwl_mvm *mvm, int queue, int tid, cmd.sta_id = mvm->queue_info[queue].ra_sta_id; cmd.tx_fifo = iwl_mvm_ac_to_tx_fifo[mvm->queue_info[queue].mac80211_ac]; + cmd.tid = mvm->queue_info[queue].txq_tid; mq = mvm->queue_info[queue].hw_queue_to_mac80211; shared_queue = (mvm->queue_info[queue].hw_queue_refcount > 1); spin_unlock_bh(&mvm->queue_info_lock); @@ -591,6 +592,11 @@ int iwl_mvm_scd_queue_redirect(struct iwl_mvm *mvm, int queue, int tid, cmd.sta_id, tid, LINK_QUAL_AGG_FRAME_LIMIT_DEF, ssn, wdg_timeout); + /* Update the TID "owner" of the queue */ + spin_lock_bh(&mvm->queue_info_lock); + mvm->queue_info[queue].txq_tid = tid; + spin_unlock_bh(&mvm->queue_info_lock); + /* TODO: Work-around SCD bug when moving back by multiples of 0x40 */ /* Redirect to lower AC */ @@ -749,6 +755,7 @@ static int iwl_mvm_sta_alloc_queue(struct iwl_mvm *mvm, ac = mvm->queue_info[queue].mac80211_ac; cmd.sta_id = mvm->queue_info[queue].ra_sta_id; cmd.tx_fifo = iwl_mvm_ac_to_tx_fifo[ac]; + cmd.tid = mvm->queue_info[queue].txq_tid; spin_unlock_bh(&mvm->queue_info_lock); /* Disable the queue */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c index dae64a67a531..423efab90607 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c @@ -669,6 +669,8 @@ void iwl_mvm_enable_txq(struct iwl_mvm *mvm, int queue, int mac80211_queue, tid_to_mac80211_ac[cfg->tid]; else mvm->queue_info[queue].mac80211_ac = IEEE80211_AC_VO; + + mvm->queue_info[queue].txq_tid = cfg->tid; } IWL_DEBUG_TX_QUEUES(mvm, @@ -761,6 +763,7 @@ void iwl_mvm_disable_txq(struct iwl_mvm *mvm, int queue, int mac80211_queue, } cmd.sta_id = mvm->queue_info[queue].ra_sta_id; + cmd.tid = mvm->queue_info[queue].txq_tid; /* Make sure queue info is correct even though we overwrite it */ WARN(mvm->queue_info[queue].hw_queue_refcount || From 8d98ae6eb0d51f75a7af51758072558ffbb8270f Mon Sep 17 00:00:00 2001 From: Liad Kaufman Date: Tue, 2 Feb 2016 16:02:46 +0200 Subject: [PATCH 0559/1715] iwlwifi: mvm: re-assign old queues after hw restart in dqa mode When working in DQA mode, if a queue is shared and a HW restart occurs, there might be a possible race condition between stations on the queues, and an existing queue might be left with no queues. To solve this, make sure in DQA mode to re-assign the same queues as before the HW restart. Signed-off-by: Liad Kaufman Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 72 +++++++++++++++++++- 1 file changed, 70 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 730ba78955f2..1ddcbeacd5f4 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -759,8 +759,9 @@ static int iwl_mvm_sta_alloc_queue(struct iwl_mvm *mvm, spin_unlock_bh(&mvm->queue_info_lock); /* Disable the queue */ - iwl_mvm_invalidate_sta_queue(mvm, queue, disable_agg_tids, - true); + if (disable_agg_tids) + iwl_mvm_invalidate_sta_queue(mvm, queue, + disable_agg_tids, false); iwl_trans_txq_disable(mvm->trans, queue, false); ret = iwl_mvm_send_cmd_pdu(mvm, SCD_QUEUE_CFG, 0, sizeof(cmd), &cmd); @@ -776,6 +777,10 @@ static int iwl_mvm_sta_alloc_queue(struct iwl_mvm *mvm, return ret; } + + /* If TXQ is allocated to another STA, update removal in FW */ + if (cmd.sta_id != mvmsta->sta_id) + iwl_mvm_invalidate_sta_queue(mvm, queue, 0, true); } IWL_DEBUG_TX_QUEUES(mvm, @@ -1072,6 +1077,61 @@ static int iwl_mvm_reserve_sta_stream(struct iwl_mvm *mvm, return 0; } +/* + * In DQA mode, after a HW restart the queues should be allocated as before, in + * order to avoid race conditions when there are shared queues. This function + * does the re-mapping and queue allocation. + * + * Note that re-enabling aggregations isn't done in this function. + */ +static void iwl_mvm_realloc_queues_after_restart(struct iwl_mvm *mvm, + struct iwl_mvm_sta *mvm_sta) +{ + unsigned int wdg_timeout = + iwl_mvm_get_wd_timeout(mvm, mvm_sta->vif, false, false); + int i; + struct iwl_trans_txq_scd_cfg cfg = { + .sta_id = mvm_sta->sta_id, + .frame_limit = IWL_FRAME_LIMIT, + }; + + /* Make sure reserved queue is still marked as such (or allocated) */ + mvm->queue_info[mvm_sta->reserved_queue].status = + IWL_MVM_QUEUE_RESERVED; + + for (i = 0; i <= IWL_MAX_TID_COUNT; i++) { + struct iwl_mvm_tid_data *tid_data = &mvm_sta->tid_data[i]; + int txq_id = tid_data->txq_id; + int ac; + u8 mac_queue; + + if (txq_id == IEEE80211_INVAL_HW_QUEUE) + continue; + + skb_queue_head_init(&tid_data->deferred_tx_frames); + + ac = tid_to_mac80211_ac[i]; + mac_queue = mvm_sta->vif->hw_queue[ac]; + + cfg.tid = i; + cfg.fifo = iwl_mvm_ac_to_tx_fifo[ac]; + cfg.aggregate = (txq_id >= IWL_MVM_DQA_MIN_DATA_QUEUE || + txq_id == IWL_MVM_DQA_BSS_CLIENT_QUEUE); + + IWL_DEBUG_TX_QUEUES(mvm, + "Re-mapping sta %d tid %d to queue %d\n", + mvm_sta->sta_id, i, txq_id); + + iwl_mvm_enable_txq(mvm, txq_id, mac_queue, + IEEE80211_SEQ_TO_SN(tid_data->seq_number), + &cfg, wdg_timeout); + + mvm->queue_info[txq_id].status = IWL_MVM_QUEUE_READY; + } + + atomic_set(&mvm->pending_frames[mvm_sta->sta_id], 0); +} + int iwl_mvm_add_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif, struct ieee80211_sta *sta) @@ -1094,6 +1154,13 @@ int iwl_mvm_add_sta(struct iwl_mvm *mvm, spin_lock_init(&mvm_sta->lock); + /* In DQA mode, if this is a HW restart, re-alloc existing queues */ + if (iwl_mvm_is_dqa_supported(mvm) && + test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status)) { + iwl_mvm_realloc_queues_after_restart(mvm, mvm_sta); + goto update_fw; + } + mvm_sta->sta_id = sta_id; mvm_sta->mac_id_n_color = FW_CMD_ID_AND_COLOR(mvmvif->id, mvmvif->color); @@ -1157,6 +1224,7 @@ int iwl_mvm_add_sta(struct iwl_mvm *mvm, goto err; } +update_fw: ret = iwl_mvm_sta_send_to_fw(mvm, sta, false, 0); if (ret) goto err; From f7c692deef19ea953e3f792cc1c148dfcc74ba17 Mon Sep 17 00:00:00 2001 From: Liad Kaufman Date: Tue, 8 Mar 2016 10:41:32 +0200 Subject: [PATCH 0560/1715] iwlwifi: mvm: use defines for SCD_CONFIG_CMD enablement Due to the addition of another option in the SCD_CONFIG_CMD's %enable field, change the assignment of this field to use defines rather than hard-code the value itself. Signed-off-by: Liad Kaufman Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h | 12 ++++++++++-- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 4 ++-- drivers/net/wireless/intel/iwlwifi/mvm/utils.c | 13 +++++++------ 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h index 4144623e1616..6b4c63a5e625 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h @@ -675,13 +675,21 @@ static inline u32 iwl_mvm_get_scd_ssn(struct iwl_mvm_tx_resp *tx_resp) tx_resp->frame_count) & 0xfff; } +/* Available options for the SCD_QUEUE_CFG HCMD */ +enum iwl_scd_cfg_actions { + SCD_CFG_DISABLE_QUEUE = 0x0, + SCD_CFG_ENABLE_QUEUE = 0x1, + SCD_CFG_UPDATE_QUEUE_TID = 0x2, +}; + /** * struct iwl_scd_txq_cfg_cmd - New txq hw scheduler config command * @token: * @sta_id: station id * @tid: * @scd_queue: scheduler queue to confiug - * @enable: 1 queue enable, 0 queue disable + * @action: 1 queue enable, 0 queue disable, 2 change txq's tid owner + * Value is one of %iwl_scd_cfg_actions options * @aggregate: 1 aggregated queue, 0 otherwise * @tx_fifo: %enum iwl_mvm_tx_fifo * @window: BA window size @@ -692,7 +700,7 @@ struct iwl_scd_txq_cfg_cmd { u8 sta_id; u8 tid; u8 scd_queue; - u8 enable; + u8 action; u8 aggregate; u8 tx_fifo; u8 window; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 1ddcbeacd5f4..e87473a29df2 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -536,7 +536,7 @@ int iwl_mvm_scd_queue_redirect(struct iwl_mvm *mvm, int queue, int tid, { struct iwl_scd_txq_cfg_cmd cmd = { .scd_queue = queue, - .enable = 0, + .action = SCD_CFG_DISABLE_QUEUE, }; bool shared_queue; unsigned long mq; @@ -745,7 +745,7 @@ static int iwl_mvm_sta_alloc_queue(struct iwl_mvm *mvm, if (using_inactive_queue) { struct iwl_scd_txq_cfg_cmd cmd = { .scd_queue = queue, - .enable = 0, + .action = SCD_CFG_DISABLE_QUEUE, }; u8 ac; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c index 423efab90607..7c138fedcb19 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c @@ -610,7 +610,7 @@ int iwl_mvm_reconfig_scd(struct iwl_mvm *mvm, int queue, int fifo, int sta_id, { struct iwl_scd_txq_cfg_cmd cmd = { .scd_queue = queue, - .enable = 1, + .action = SCD_CFG_ENABLE_QUEUE, .window = frame_limit, .sta_id = sta_id, .ssn = cpu_to_le16(ssn), @@ -684,7 +684,7 @@ void iwl_mvm_enable_txq(struct iwl_mvm *mvm, int queue, int mac80211_queue, if (enable_queue) { struct iwl_scd_txq_cfg_cmd cmd = { .scd_queue = queue, - .enable = 1, + .action = SCD_CFG_ENABLE_QUEUE, .window = cfg->frame_limit, .sta_id = cfg->sta_id, .ssn = cpu_to_le16(ssn), @@ -711,7 +711,7 @@ void iwl_mvm_disable_txq(struct iwl_mvm *mvm, int queue, int mac80211_queue, { struct iwl_scd_txq_cfg_cmd cmd = { .scd_queue = queue, - .enable = 0, + .action = SCD_CFG_DISABLE_QUEUE, }; bool remove_mac_queue = true; int ret; @@ -746,8 +746,9 @@ void iwl_mvm_disable_txq(struct iwl_mvm *mvm, int queue, int mac80211_queue, ~BIT(mac80211_queue); mvm->queue_info[queue].hw_queue_refcount--; - cmd.enable = mvm->queue_info[queue].hw_queue_refcount ? 1 : 0; - if (!cmd.enable) + cmd.action = mvm->queue_info[queue].hw_queue_refcount ? + SCD_CFG_ENABLE_QUEUE : SCD_CFG_DISABLE_QUEUE; + if (cmd.action == SCD_CFG_DISABLE_QUEUE) mvm->queue_info[queue].status = IWL_MVM_QUEUE_FREE; IWL_DEBUG_TX_QUEUES(mvm, @@ -757,7 +758,7 @@ void iwl_mvm_disable_txq(struct iwl_mvm *mvm, int queue, int mac80211_queue, mvm->queue_info[queue].hw_queue_to_mac80211); /* If the queue is still enabled - nothing left to do in this func */ - if (cmd.enable) { + if (cmd.action == SCD_CFG_ENABLE_QUEUE) { spin_unlock_bh(&mvm->queue_info_lock); return; } From 19aefa45941d2d1f6220f0b9768cdef907e15086 Mon Sep 17 00:00:00 2001 From: Liad Kaufman Date: Tue, 8 Mar 2016 14:29:51 +0200 Subject: [PATCH 0561/1715] iwlwifi: mvm: support txq tid owner change Every active TXQ is assigned to a TID given through the SCD_CONFIG_CMD, and acts as an identifier in the FW. However, there may be cases this ownership needs to be changed. For example, in the following scenario: 1. TID x is owner of a queue 2. Due to a shortage of queues, TID y and z share with x 3. TID x becomes inactive and needs to be removed from the shared queue. In this scenario, if another queue is freed and traffic on x continues, we can't allocate it a new queue as long as it is the owner of the first queue. Support moving ownership of a TXQ to a different TID (same STA) without stopping the queue. Signed-off-by: Liad Kaufman Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 51 ++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index e87473a29df2..30fc3afe5241 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -850,6 +850,41 @@ out_err: return ret; } +static void iwl_mvm_change_queue_owner(struct iwl_mvm *mvm, int queue) +{ + struct iwl_scd_txq_cfg_cmd cmd = { + .scd_queue = queue, + .action = SCD_CFG_UPDATE_QUEUE_TID, + }; + s8 sta_id; + int tid; + unsigned long tid_bitmap; + int ret; + + lockdep_assert_held(&mvm->mutex); + + spin_lock_bh(&mvm->queue_info_lock); + sta_id = mvm->queue_info[queue].ra_sta_id; + tid_bitmap = mvm->queue_info[queue].tid_bitmap; + spin_unlock_bh(&mvm->queue_info_lock); + + if (WARN(!tid_bitmap, "TXQ %d has no tids assigned to it\n", queue)) + return; + + /* Find any TID for queue */ + tid = find_first_bit(&tid_bitmap, IWL_MAX_TID_COUNT + 1); + cmd.tid = tid; + cmd.tx_fifo = iwl_mvm_ac_to_tx_fifo[tid_to_mac80211_ac[tid]]; + + ret = iwl_mvm_send_cmd_pdu(mvm, SCD_QUEUE_CFG, 0, sizeof(cmd), &cmd); + if (ret) + IWL_ERR(mvm, "Failed to update owner of TXQ %d (ret=%d)\n", + queue, ret); + else + IWL_DEBUG_TX_QUEUES(mvm, "Changed TXQ %d ownership to tid %d\n", + queue, tid); +} + static void iwl_mvm_unshare_queue(struct iwl_mvm *mvm, int queue) { struct ieee80211_sta *sta; @@ -1005,14 +1040,30 @@ void iwl_mvm_add_new_dqa_stream_wk(struct work_struct *wk) /* Reconfigure queues requiring reconfiguation */ for (queue = 0; queue < IWL_MAX_HW_QUEUES; queue++) { bool reconfig; + bool change_owner; spin_lock_bh(&mvm->queue_info_lock); reconfig = (mvm->queue_info[queue].status == IWL_MVM_QUEUE_RECONFIGURING); + + /* + * We need to take into account a situation in which a TXQ was + * allocated to TID x, and then turned shared by adding TIDs y + * and z. If TID x becomes inactive and is removed from the TXQ, + * ownership must be given to one of the remaining TIDs. + * This is mainly because if TID x continues - a new queue can't + * be allocated for it as long as it is an owner of another TXQ. + */ + change_owner = !(mvm->queue_info[queue].tid_bitmap & + BIT(mvm->queue_info[queue].txq_tid)) && + (mvm->queue_info[queue].status == + IWL_MVM_QUEUE_SHARED); spin_unlock_bh(&mvm->queue_info_lock); if (reconfig) iwl_mvm_unshare_queue(mvm, queue); + else if (change_owner) + iwl_mvm_change_queue_owner(mvm, queue); } /* Go over all stations with deferred traffic */ From 486c96a753177a37e8dff4195df698df35d5c2bc Mon Sep 17 00:00:00 2001 From: Oren Givon Date: Thu, 23 Jun 2016 14:51:41 +0300 Subject: [PATCH 0562/1715] iwlwifi: rename and reorder 9000 series configuration structs Rename and reorder the 9000 series configuration structs: - struct containing configuration of 5165 was renamed to 9000. Signed-off-by: Oren Givon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-9000.c | 63 ++++++++++--------- .../net/wireless/intel/iwlwifi/iwl-config.h | 4 +- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 34 +++++----- 3 files changed, 50 insertions(+), 51 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-9000.c b/drivers/net/wireless/intel/iwlwifi/iwl-9000.c index fbaf705f3fa7..b8356a82c982 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-9000.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-9000.c @@ -72,15 +72,15 @@ #define IWL9000_SMEM_OFFSET 0x400000 #define IWL9000_SMEM_LEN 0x68000 -#define IWL9000_FW_PRE "iwlwifi-9000-pu-a0-lc-a0-" +#define IWL9000_FW_PRE "iwlwifi-9000-pu-a0-jf-a0-" #define IWL9260_FW_PRE "iwlwifi-9260-th-a0-jf-a0-" -#define IWL9260LC_FW_PRE "iwlwifi-9260-th-a0-lc-a0-" +#define IWL9000LC_FW_PRE "iwlwifi-9000-pu-a0-lc-a0-" #define IWL9000_MODULE_FIRMWARE(api) \ IWL9000_FW_PRE "-" __stringify(api) ".ucode" #define IWL9260_MODULE_FIRMWARE(api) \ IWL9260_FW_PRE "-" __stringify(api) ".ucode" -#define IWL9260LC_MODULE_FIRMWARE(api) \ - IWL9260LC_FW_PRE "-" __stringify(api) ".ucode" +#define IWL9000LC_MODULE_FIRMWARE(api) \ + IWL9000LC_FW_PRE "-" __stringify(api) ".ucode" #define NVM_HW_SECTION_NUM_FAMILY_9000 10 @@ -147,40 +147,41 @@ static const struct iwl_tt_params iwl9000_tt_params = { .rf_id = true const struct iwl_cfg iwl9260_2ac_cfg = { - .name = "Intel(R) Dual Band Wireless AC 9260", - .fw_name_pre = IWL9260_FW_PRE, - IWL_DEVICE_9000, - .ht_params = &iwl9000_ht_params, - .nvm_ver = IWL9000_NVM_VERSION, - .nvm_calib_ver = IWL9000_TX_POWER_VERSION, - .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, + .name = "Intel(R) Dual Band Wireless AC 9260", + .fw_name_pre = IWL9260_FW_PRE, + IWL_DEVICE_9000, + .ht_params = &iwl9000_ht_params, + .nvm_ver = IWL9000_NVM_VERSION, + .nvm_calib_ver = IWL9000_TX_POWER_VERSION, + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, +}; + +const struct iwl_cfg iwl9000_2ac_cfg = { + .name = "Intel(R) Dual Band Wireless AC 9000", + .fw_name_pre = IWL9000_FW_PRE, + IWL_DEVICE_9000, + .ht_params = &iwl9000_ht_params, + .nvm_ver = IWL9000_NVM_VERSION, + .nvm_calib_ver = IWL9000_TX_POWER_VERSION, + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, + .integrated = true, }; /* * TODO the struct below is for internal testing only this should be * removed by EO 2016~ */ -const struct iwl_cfg iwl9260lc_2ac_cfg = { - .name = "Intel(R) Dual Band Wireless AC 9260", - .fw_name_pre = IWL9260LC_FW_PRE, - IWL_DEVICE_9000, - .ht_params = &iwl9000_ht_params, - .nvm_ver = IWL9000_NVM_VERSION, - .nvm_calib_ver = IWL9000_TX_POWER_VERSION, - .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, -}; - -const struct iwl_cfg iwl5165_2ac_cfg = { - .name = "Intel(R) Dual Band Wireless AC 5165", - .fw_name_pre = IWL9000_FW_PRE, - IWL_DEVICE_9000, - .ht_params = &iwl9000_ht_params, - .nvm_ver = IWL9000_NVM_VERSION, - .nvm_calib_ver = IWL9000_TX_POWER_VERSION, - .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, - .integrated = true, +const struct iwl_cfg iwl9000lc_2ac_cfg = { + .name = "Intel(R) Dual Band Wireless AC 9000", + .fw_name_pre = IWL9000LC_FW_PRE, + IWL_DEVICE_9000, + .ht_params = &iwl9000_ht_params, + .nvm_ver = IWL9000_NVM_VERSION, + .nvm_calib_ver = IWL9000_TX_POWER_VERSION, + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, + .integrated = true, }; MODULE_FIRMWARE(IWL9000_MODULE_FIRMWARE(IWL9000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL9260_MODULE_FIRMWARE(IWL9000_UCODE_API_MAX)); -MODULE_FIRMWARE(IWL9260LC_MODULE_FIRMWARE(IWL9000_UCODE_API_MAX)); +MODULE_FIRMWARE(IWL9000LC_MODULE_FIRMWARE(IWL9000_UCODE_API_MAX)); diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index 423b23320d4f..04260fc6216e 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -449,9 +449,9 @@ extern const struct iwl_cfg iwl4165_2ac_cfg; extern const struct iwl_cfg iwl8260_2ac_sdio_cfg; extern const struct iwl_cfg iwl8265_2ac_sdio_cfg; extern const struct iwl_cfg iwl4165_2ac_sdio_cfg; +extern const struct iwl_cfg iwl9000_2ac_cfg; +extern const struct iwl_cfg iwl9000lc_2ac_cfg; extern const struct iwl_cfg iwl9260_2ac_cfg; -extern const struct iwl_cfg iwl9260lc_2ac_cfg; -extern const struct iwl_cfg iwl5165_2ac_cfg; extern const struct iwl_cfg iwla000_2ac_cfg; #endif /* CONFIG_IWLMVM */ diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 78cf9a7f3eac..bf523f516a0c 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -504,18 +504,18 @@ static const struct pci_device_id iwl_hw_card_ids[] = { /* 9000 Series */ {IWL_PCI_DEVICE(0x2526, 0x0000, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x0010, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0A10, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0010, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0210, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0410, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0610, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0310, iwl5165_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0000, iwl5165_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0510, iwl5165_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x2010, iwl5165_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2526, 0x1420, iwl5165_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0710, iwl5165_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x2A10, iwl5165_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0A10, iwl9000_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0010, iwl9000_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0210, iwl9000_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0410, iwl9000_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0610, iwl9000_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0310, iwl9000_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0000, iwl9000_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0510, iwl9000_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x2010, iwl9000_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x1420, iwl9000_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0710, iwl9000_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x2A10, iwl9000_2ac_cfg)}, /* a000 Series */ {IWL_PCI_DEVICE(0x2720, 0x0A10, iwla000_2ac_cfg)}, @@ -608,7 +608,6 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { const struct iwl_cfg *cfg = (struct iwl_cfg *)(ent->driver_data); const struct iwl_cfg *cfg_7265d __maybe_unused = NULL; - const struct iwl_cfg *cfg_9260lc __maybe_unused = NULL; struct iwl_trans *iwl_trans; int ret; @@ -637,11 +636,10 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } if (iwl_trans->cfg->rf_id) { - if (cfg == &iwl9260_2ac_cfg) - cfg_9260lc = &iwl9260lc_2ac_cfg; - if (cfg_9260lc && iwl_trans->hw_rf_id == CSR_HW_RF_ID_TYPE_LC) { - cfg = cfg_9260lc; - iwl_trans->cfg = cfg_9260lc; + if (cfg == &iwl9000_2ac_cfg && + iwl_trans->hw_rf_id == CSR_HW_RF_ID_TYPE_LC) { + cfg = &iwl9000lc_2ac_cfg; + iwl_trans->cfg = cfg; } } #endif From 827e9ab85402dc876d23e27c64b837255460108e Mon Sep 17 00:00:00 2001 From: Oren Givon Date: Sun, 26 Jun 2016 14:15:27 +0300 Subject: [PATCH 0563/1715] iwlwifi: add a new series 9460 with new PCI ID Add a new series to the 9000 series called 9460. In addition, add a new PCI ID that is the 9460 new series. Signed-off-by: Oren Givon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-9000.c | 4 +-- .../net/wireless/intel/iwlwifi/iwl-config.h | 2 +- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 27 ++++++++++--------- 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-9000.c b/drivers/net/wireless/intel/iwlwifi/iwl-9000.c index b8356a82c982..5621cf28a901 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-9000.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-9000.c @@ -156,8 +156,8 @@ const struct iwl_cfg iwl9260_2ac_cfg = { .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, }; -const struct iwl_cfg iwl9000_2ac_cfg = { - .name = "Intel(R) Dual Band Wireless AC 9000", +const struct iwl_cfg iwl9460_2ac_cfg = { + .name = "Intel(R) Dual Band Wireless AC 9460", .fw_name_pre = IWL9000_FW_PRE, IWL_DEVICE_9000, .ht_params = &iwl9000_ht_params, diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index 04260fc6216e..a18b57fdb10f 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -449,9 +449,9 @@ extern const struct iwl_cfg iwl4165_2ac_cfg; extern const struct iwl_cfg iwl8260_2ac_sdio_cfg; extern const struct iwl_cfg iwl8265_2ac_sdio_cfg; extern const struct iwl_cfg iwl4165_2ac_sdio_cfg; -extern const struct iwl_cfg iwl9000_2ac_cfg; extern const struct iwl_cfg iwl9000lc_2ac_cfg; extern const struct iwl_cfg iwl9260_2ac_cfg; +extern const struct iwl_cfg iwl9460_2ac_cfg; extern const struct iwl_cfg iwla000_2ac_cfg; #endif /* CONFIG_IWLMVM */ diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index bf523f516a0c..d053183cd5d6 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -504,18 +504,19 @@ static const struct pci_device_id iwl_hw_card_ids[] = { /* 9000 Series */ {IWL_PCI_DEVICE(0x2526, 0x0000, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x0010, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0A10, iwl9000_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0010, iwl9000_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0210, iwl9000_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0410, iwl9000_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0610, iwl9000_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0310, iwl9000_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0000, iwl9000_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0510, iwl9000_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x2010, iwl9000_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2526, 0x1420, iwl9000_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0710, iwl9000_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x2A10, iwl9000_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0A10, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0010, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0210, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0410, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0610, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0310, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0000, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0510, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x2010, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x1420, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0710, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x2A10, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x30DC, 0x0060, iwl9460_2ac_cfg)}, /* a000 Series */ {IWL_PCI_DEVICE(0x2720, 0x0A10, iwla000_2ac_cfg)}, @@ -636,7 +637,7 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } if (iwl_trans->cfg->rf_id) { - if (cfg == &iwl9000_2ac_cfg && + if (cfg == &iwl9460_2ac_cfg && iwl_trans->hw_rf_id == CSR_HW_RF_ID_TYPE_LC) { cfg = &iwl9000lc_2ac_cfg; iwl_trans->cfg = cfg; From c62446d2b028eab024e45f9f73e9496089f5fa7a Mon Sep 17 00:00:00 2001 From: Oren Givon Date: Thu, 7 Jul 2016 10:31:17 +0300 Subject: [PATCH 0564/1715] iwlwifi: add new 9460 series PCI IDs Add 4 more new 9460 series PCI IDs. Signed-off-by: Oren Givon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index d053183cd5d6..3e5a11a18b73 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -517,6 +517,10 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x9DF0, 0x0710, iwl9460_2ac_cfg)}, {IWL_PCI_DEVICE(0x9DF0, 0x2A10, iwl9460_2ac_cfg)}, {IWL_PCI_DEVICE(0x30DC, 0x0060, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x0060, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0060, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0xA370, 0x0060, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x31DC, 0x0060, iwl9460_2ac_cfg)}, /* a000 Series */ {IWL_PCI_DEVICE(0x2720, 0x0A10, iwla000_2ac_cfg)}, From 22ccabf17a2c0e4adf1b6e4ef0d2df79e93cf7b6 Mon Sep 17 00:00:00 2001 From: Oren Givon Date: Thu, 7 Jul 2016 09:23:56 +0300 Subject: [PATCH 0565/1715] iwlwifi: add the new 9270 series Add a new config struct for the new 9270 series and add the first PCI ID for it. Signed-off-by: Oren Givon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-9000.c | 10 ++++++++++ drivers/net/wireless/intel/iwlwifi/iwl-config.h | 1 + drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 1 + 3 files changed, 12 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-9000.c b/drivers/net/wireless/intel/iwlwifi/iwl-9000.c index 5621cf28a901..e442650bef13 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-9000.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-9000.c @@ -156,6 +156,16 @@ const struct iwl_cfg iwl9260_2ac_cfg = { .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, }; +const struct iwl_cfg iwl9270_2ac_cfg = { + .name = "Intel(R) Dual Band Wireless AC 9270", + .fw_name_pre = IWL9260_FW_PRE, + IWL_DEVICE_9000, + .ht_params = &iwl9000_ht_params, + .nvm_ver = IWL9000_NVM_VERSION, + .nvm_calib_ver = IWL9000_TX_POWER_VERSION, + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, +}; + const struct iwl_cfg iwl9460_2ac_cfg = { .name = "Intel(R) Dual Band Wireless AC 9460", .fw_name_pre = IWL9000_FW_PRE, diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index a18b57fdb10f..0ab415c5ce52 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -451,6 +451,7 @@ extern const struct iwl_cfg iwl8265_2ac_sdio_cfg; extern const struct iwl_cfg iwl4165_2ac_sdio_cfg; extern const struct iwl_cfg iwl9000lc_2ac_cfg; extern const struct iwl_cfg iwl9260_2ac_cfg; +extern const struct iwl_cfg iwl9270_2ac_cfg; extern const struct iwl_cfg iwl9460_2ac_cfg; extern const struct iwl_cfg iwla000_2ac_cfg; #endif /* CONFIG_IWLMVM */ diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 3e5a11a18b73..1be0ac2f402b 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -504,6 +504,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = { /* 9000 Series */ {IWL_PCI_DEVICE(0x2526, 0x0000, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x0010, iwl9260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x1410, iwl9270_2ac_cfg)}, {IWL_PCI_DEVICE(0x9DF0, 0x0A10, iwl9460_2ac_cfg)}, {IWL_PCI_DEVICE(0x9DF0, 0x0010, iwl9460_2ac_cfg)}, {IWL_PCI_DEVICE(0x9DF0, 0x0210, iwl9460_2ac_cfg)}, From fe4a7249732de1fe18e7ceb41924e329a572cb2d Mon Sep 17 00:00:00 2001 From: Oren Givon Date: Thu, 7 Jul 2016 09:40:12 +0300 Subject: [PATCH 0566/1715] iwlwifi: add the new 9170 series Add a new config struct for the new 9170 series and add the first PCI ID for it. Signed-off-by: Oren Givon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-9000.c | 10 ++++++++++ drivers/net/wireless/intel/iwlwifi/iwl-config.h | 1 + drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 1 + 3 files changed, 12 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-9000.c b/drivers/net/wireless/intel/iwlwifi/iwl-9000.c index e442650bef13..1fec6afbe041 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-9000.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-9000.c @@ -146,6 +146,16 @@ static const struct iwl_tt_params iwl9000_tt_params = { .mac_addr_from_csr = true, \ .rf_id = true +const struct iwl_cfg iwl9160_2ac_cfg = { + .name = "Intel(R) Dual Band Wireless AC 9160", + .fw_name_pre = IWL9260_FW_PRE, + IWL_DEVICE_9000, + .ht_params = &iwl9000_ht_params, + .nvm_ver = IWL9000_NVM_VERSION, + .nvm_calib_ver = IWL9000_TX_POWER_VERSION, + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, +}; + const struct iwl_cfg iwl9260_2ac_cfg = { .name = "Intel(R) Dual Band Wireless AC 9260", .fw_name_pre = IWL9260_FW_PRE, diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index 0ab415c5ce52..7008319532ef 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -450,6 +450,7 @@ extern const struct iwl_cfg iwl8260_2ac_sdio_cfg; extern const struct iwl_cfg iwl8265_2ac_sdio_cfg; extern const struct iwl_cfg iwl4165_2ac_sdio_cfg; extern const struct iwl_cfg iwl9000lc_2ac_cfg; +extern const struct iwl_cfg iwl9160_2ac_cfg; extern const struct iwl_cfg iwl9260_2ac_cfg; extern const struct iwl_cfg iwl9270_2ac_cfg; extern const struct iwl_cfg iwl9460_2ac_cfg; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 1be0ac2f402b..c6e24fb286be 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -502,6 +502,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x24FD, 0x0850, iwl8265_2ac_cfg)}, /* 9000 Series */ + {IWL_PCI_DEVICE(0x271B, 0x0010, iwl9160_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x0000, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x0010, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x1410, iwl9270_2ac_cfg)}, From ae79785f13972b2180d3d460510b7d1981d08cbc Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Thu, 30 Jun 2016 16:36:24 +0300 Subject: [PATCH 0567/1715] iwlwifi: pcie: refrain from SCD accesses Up till now we accessed SCD configuration only for initial configuration and for enabling command queue. For a000 generation the command queue is open by default and firmware configures the rest. No driver SCD accesses are expected. Make sure this is the case. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 4 ++++ drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 13 ++++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 74f2f035bd28..559f1182055b 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -1960,6 +1960,10 @@ void iwl_trans_pcie_log_scd_error(struct iwl_trans *trans, struct iwl_txq *txq) IWL_ERR(trans, "Current SW read_ptr %d write_ptr %d\n", txq->q.read_ptr, txq->q.write_ptr); + if (trans->cfg->use_tfh) + /* TODO: access new SCD registers and dump them */ + return; + scd_sram_addr = trans_pcie->scd_base_addr + SCD_TX_STTS_QUEUE_OFFSET(txq->q.id); iwl_trans_read_mem_bytes(trans, scd_sram_addr, buf, sizeof(buf)); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index 18650dccdb58..9636dc89f6bd 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -703,6 +703,9 @@ void iwl_pcie_tx_start(struct iwl_trans *trans, u32 scd_base_addr) memset(trans_pcie->queue_stopped, 0, sizeof(trans_pcie->queue_stopped)); memset(trans_pcie->queue_used, 0, sizeof(trans_pcie->queue_used)); + if (trans->cfg->use_tfh) + return; + trans_pcie->scd_base_addr = iwl_read_prph(trans, SCD_SRAM_BASE_ADDR); @@ -970,11 +973,13 @@ int iwl_pcie_tx_init(struct iwl_trans *trans) } } - if (trans->cfg->use_tfh) + if (trans->cfg->use_tfh) { iwl_write_direct32(trans, TFH_TRANSFER_MODE, TFH_TRANSFER_MAX_PENDING_REQ | TFH_CHUNK_SIZE_128 | TFH_CHUNK_SPLIT_MODE); + return 0; + } iwl_set_bits_prph(trans, SCD_GP_CTRL, SCD_GP_CTRL_AUTO_ACTIVE_MODE); if (trans->cfg->base_params->num_of_queues > 20) @@ -1249,6 +1254,9 @@ void iwl_trans_pcie_txq_enable(struct iwl_trans *trans, int txq_id, u16 ssn, if (test_and_set_bit(txq_id, trans_pcie->queue_used)) WARN_ONCE(1, "queue %d already used - expect issues", txq_id); + if (cfg && trans->cfg->use_tfh) + WARN_ONCE(1, "Expected no calls to SCD configuration"); + txq->wd_timeout = msecs_to_jiffies(wdg_timeout); if (cfg) { @@ -1366,6 +1374,9 @@ void iwl_trans_pcie_txq_disable(struct iwl_trans *trans, int txq_id, return; } + if (configure_scd && trans->cfg->use_tfh) + WARN_ONCE(1, "Expected no calls to SCD configuration"); + if (configure_scd) { iwl_scd_txq_set_inactive(trans, txq_id); From d6a2c5c78dcbbbe9dc20ff6e126b83f088cd0501 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Wed, 29 Jun 2016 12:08:48 +0300 Subject: [PATCH 0568/1715] iwlwifi: pcie: fix ucode load flow for a000 devices Turns out we should access TFH relative addresses. Also, the FH_UCODE_LOAD_STATUS was replaced by UREG_UCODE_LOAD_STATUS. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-fh.h | 15 ++++----- drivers/net/wireless/intel/iwlwifi/iwl-prph.h | 8 +++++ .../net/wireless/intel/iwlwifi/pcie/trans.c | 33 ++++++++++++++----- 3 files changed, 40 insertions(+), 16 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-fh.h b/drivers/net/wireless/intel/iwlwifi/iwl-fh.h index 1d6f5d21a663..dd75ea7c936e 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-fh.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-fh.h @@ -77,7 +77,6 @@ */ #define FH_MEM_LOWER_BOUND (0x1000) #define FH_MEM_UPPER_BOUND (0x2000) -#define TFH_MEM_LOWER_BOUND (0xA06000) /** * Keep-Warm (KW) buffer base address. @@ -120,7 +119,7 @@ #define FH_MEM_CBBC_20_31_LOWER_BOUND (FH_MEM_LOWER_BOUND + 0xB20) #define FH_MEM_CBBC_20_31_UPPER_BOUND (FH_MEM_LOWER_BOUND + 0xB80) /* a000 TFD table address, 64 bit */ -#define TFH_TFDQ_CBB_TABLE (TFH_MEM_LOWER_BOUND + 0x1C00) +#define TFH_TFDQ_CBB_TABLE (0x1C00) /* Find TFD CB base pointer for given queue */ static inline unsigned int FH_MEM_CBBC_QUEUE(struct iwl_trans *trans, @@ -156,7 +155,7 @@ static inline unsigned int FH_MEM_CBBC_QUEUE(struct iwl_trans *trans, * In case of DRAM read address which is not aligned to 128B, the TFH will * enable transfer size which doesn't cross 64B DRAM address boundary. */ -#define TFH_TRANSFER_MODE (TFH_MEM_LOWER_BOUND + 0x1F40) +#define TFH_TRANSFER_MODE (0x1F40) #define TFH_TRANSFER_MAX_PENDING_REQ 0xc #define TFH_CHUNK_SIZE_128 BIT(8) #define TFH_CHUNK_SPLIT_MODE BIT(10) @@ -167,7 +166,7 @@ static inline unsigned int FH_MEM_CBBC_QUEUE(struct iwl_trans *trans, * the start of the TFD first TB. * In case of a DRAM Tx CMD update the TFH will update PN and Key ID */ -#define TFH_TXCMD_UPDATE_CFG (TFH_MEM_LOWER_BOUND + 0x1F48) +#define TFH_TXCMD_UPDATE_CFG (0x1F48) /* * Controls TX DMA operation * @@ -181,22 +180,22 @@ static inline unsigned int FH_MEM_CBBC_QUEUE(struct iwl_trans *trans, * set to 1 - interrupt is sent to the driver * Bit 0: Indicates the snoop configuration */ -#define TFH_SRV_DMA_CHNL0_CTRL (TFH_MEM_LOWER_BOUND + 0x1F60) +#define TFH_SRV_DMA_CHNL0_CTRL (0x1F60) #define TFH_SRV_DMA_SNOOP BIT(0) #define TFH_SRV_DMA_TO_DRIVER BIT(24) #define TFH_SRV_DMA_START BIT(31) /* Defines the DMA SRAM write start address to transfer a data block */ -#define TFH_SRV_DMA_CHNL0_SRAM_ADDR (TFH_MEM_LOWER_BOUND + 0x1F64) +#define TFH_SRV_DMA_CHNL0_SRAM_ADDR (0x1F64) /* Defines the 64bits DRAM start address to read the DMA data block from */ -#define TFH_SRV_DMA_CHNL0_DRAM_ADDR (TFH_MEM_LOWER_BOUND + 0x1F68) +#define TFH_SRV_DMA_CHNL0_DRAM_ADDR (0x1F68) /* * Defines the number of bytes to transfer from DRAM to SRAM. * Note that this register may be configured with non-dword aligned size. */ -#define TFH_SRV_DMA_CHNL0_BC (TFH_MEM_LOWER_BOUND + 0x1F70) +#define TFH_SRV_DMA_CHNL0_BC (0x1F70) /** * Rx SRAM Control and Status Registers (RSCSR) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-prph.h b/drivers/net/wireless/intel/iwlwifi/iwl-prph.h index 459bf736fd5b..849ee79cc094 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-prph.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-prph.h @@ -303,6 +303,14 @@ #define FH_UCODE_LOAD_STATUS (0x1AF0) #define CSR_UCODE_LOAD_STATUS_ADDR (0x1E70) + +/* + * Replacing FH_UCODE_LOAD_STATUS + * This register is writen by driver and is read by uCode during boot flow. + * Note this address is cleared after MAC reset. + */ +#define UREG_UCODE_LOAD_STATUS (0xa05c40) + enum secure_load_status_reg { LMPM_CPU_UCODE_LOADING_STARTED = 0x00000001, LMPM_CPU_HDRS_LOADING_COMPLETED = 0x00000003, diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 559f1182055b..039eecaa6fcf 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -827,10 +827,16 @@ static int iwl_pcie_load_cpu_sections_8000(struct iwl_trans *trans, if (ret) return ret; - /* Notify the ucode of the loaded section number and status */ - val = iwl_read_direct32(trans, FH_UCODE_LOAD_STATUS); - val = val | (sec_num << shift_param); - iwl_write_direct32(trans, FH_UCODE_LOAD_STATUS, val); + /* Notify ucode of loaded section number and status */ + if (trans->cfg->use_tfh) { + val = iwl_read_prph(trans, UREG_UCODE_LOAD_STATUS); + val = val | (sec_num << shift_param); + iwl_write_prph(trans, UREG_UCODE_LOAD_STATUS, val); + } else { + val = iwl_read_direct32(trans, FH_UCODE_LOAD_STATUS); + val = val | (sec_num << shift_param); + iwl_write_direct32(trans, FH_UCODE_LOAD_STATUS, val); + } sec_num = (sec_num << 1) | 0x1; } @@ -838,10 +844,21 @@ static int iwl_pcie_load_cpu_sections_8000(struct iwl_trans *trans, iwl_enable_interrupts(trans); - if (cpu == 1) - iwl_write_direct32(trans, FH_UCODE_LOAD_STATUS, 0xFFFF); - else - iwl_write_direct32(trans, FH_UCODE_LOAD_STATUS, 0xFFFFFFFF); + if (trans->cfg->use_tfh) { + if (cpu == 1) + iwl_write_prph(trans, UREG_UCODE_LOAD_STATUS, + 0xFFFF); + else + iwl_write_prph(trans, UREG_UCODE_LOAD_STATUS, + 0xFFFFFFFF); + } else { + if (cpu == 1) + iwl_write_direct32(trans, FH_UCODE_LOAD_STATUS, + 0xFFFF); + else + iwl_write_direct32(trans, FH_UCODE_LOAD_STATUS, + 0xFFFFFFFF); + } return 0; } From 76f8c0e17edc6eba43f84952e5a87c7f50f69370 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Wed, 29 Jun 2016 12:23:06 +0300 Subject: [PATCH 0569/1715] iwlwifi: pcie: remove dead code If device family is 8000 then iwl_pcie_load_cpu_sections() won't be called at all (iwl_pcie_load_cpu_sections_8000() is called in that case) so this piece of code never gets called. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-prph.h | 13 ------------- drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 8 -------- 2 files changed, 21 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-prph.h b/drivers/net/wireless/intel/iwlwifi/iwl-prph.h index 849ee79cc094..406ef301b8ab 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-prph.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-prph.h @@ -302,7 +302,6 @@ #define OSC_CLK_FORCE_CONTROL (0x8) #define FH_UCODE_LOAD_STATUS (0x1AF0) -#define CSR_UCODE_LOAD_STATUS_ADDR (0x1E70) /* * Replacing FH_UCODE_LOAD_STATUS @@ -311,21 +310,9 @@ */ #define UREG_UCODE_LOAD_STATUS (0xa05c40) -enum secure_load_status_reg { - LMPM_CPU_UCODE_LOADING_STARTED = 0x00000001, - LMPM_CPU_HDRS_LOADING_COMPLETED = 0x00000003, - LMPM_CPU_UCODE_LOADING_COMPLETED = 0x00000007, - LMPM_CPU_STATUS_NUM_OF_LAST_COMPLETED = 0x000000F8, - LMPM_CPU_STATUS_NUM_OF_LAST_LOADED_BLOCK = 0x0000FF00, -}; - -#define LMPM_SECURE_INSPECTOR_CODE_ADDR (0x1E38) -#define LMPM_SECURE_INSPECTOR_DATA_ADDR (0x1E3C) #define LMPM_SECURE_UCODE_LOAD_CPU1_HDR_ADDR (0x1E78) #define LMPM_SECURE_UCODE_LOAD_CPU2_HDR_ADDR (0x1E7C) -#define LMPM_SECURE_INSPECTOR_CODE_MEM_SPACE (0x400000) -#define LMPM_SECURE_INSPECTOR_DATA_MEM_SPACE (0x402000) #define LMPM_SECURE_CPU1_HDR_MEM_SPACE (0x420000) #define LMPM_SECURE_CPU2_HDR_MEM_SPACE (0x420400) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 039eecaa6fcf..2f46eedd7c4d 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -903,14 +903,6 @@ static int iwl_pcie_load_cpu_sections(struct iwl_trans *trans, return ret; } - if (trans->cfg->device_family == IWL_DEVICE_FAMILY_8000) - iwl_set_bits_prph(trans, - CSR_UCODE_LOAD_STATUS_ADDR, - (LMPM_CPU_UCODE_LOADING_COMPLETED | - LMPM_CPU_HDRS_LOADING_COMPLETED | - LMPM_CPU_UCODE_LOADING_STARTED) << - shift_param); - *first_ucode_section = last_read_idx; return 0; From ccbd3dbe85e1445231a7e0da2dada130cedce9d0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 30 Aug 2016 09:49:28 +0100 Subject: [PATCH 0570/1715] rxrpc: Fix a potential NULL-pointer deref in rxrpc_abort_calls The call pointer in a channel on a connection will be NULL if there's no active call on that channel. rxrpc_abort_calls() needs to check for this before trying to take the call's state_lock. Signed-off-by: David Howells --- net/rxrpc/conn_event.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 6296374df840..bb81801fb805 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -149,19 +149,23 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, int state, call = rcu_dereference_protected( conn->channels[i].call, lockdep_is_held(&conn->channel_lock)); - write_lock_bh(&call->state_lock); - if (call->state <= RXRPC_CALL_COMPLETE) { - call->state = state; - if (state == RXRPC_CALL_LOCALLY_ABORTED) { - call->local_abort = conn->local_abort; - set_bit(RXRPC_CALL_EV_CONN_ABORT, &call->events); - } else { - call->remote_abort = conn->remote_abort; - set_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events); + if (call) { + write_lock_bh(&call->state_lock); + if (call->state <= RXRPC_CALL_COMPLETE) { + call->state = state; + if (state == RXRPC_CALL_LOCALLY_ABORTED) { + call->local_abort = conn->local_abort; + set_bit(RXRPC_CALL_EV_CONN_ABORT, + &call->events); + } else { + call->remote_abort = conn->remote_abort; + set_bit(RXRPC_CALL_EV_RCVD_ABORT, + &call->events); + } + rxrpc_queue_call(call); } - rxrpc_queue_call(call); + write_unlock_bh(&call->state_lock); } - write_unlock_bh(&call->state_lock); } spin_unlock(&conn->channel_lock); From f5c17aaeb2aee9b6c30d082bbe652a7e5589adff Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 30 Aug 2016 09:49:28 +0100 Subject: [PATCH 0571/1715] rxrpc: Calls should only have one terminal state Condense the terminal states of a call state machine to a single state, plus a separate completion type value. The value is then set, along with error and abort code values, only when the call is transitioned to the completion state. Helpers are provided to simplify this. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 116 +++++++++++++++++++++++++++++++--------- net/rxrpc/call_accept.c | 19 +++---- net/rxrpc/call_event.c | 42 ++++++--------- net/rxrpc/call_object.c | 43 ++++++--------- net/rxrpc/conn_client.c | 2 +- net/rxrpc/conn_event.c | 50 ++++++++--------- net/rxrpc/conn_object.c | 4 +- net/rxrpc/input.c | 68 +++++++++++------------ net/rxrpc/output.c | 27 ++++------ net/rxrpc/peer_event.c | 24 ++++++--- net/rxrpc/proc.c | 3 +- net/rxrpc/recvmsg.c | 12 +++-- 12 files changed, 226 insertions(+), 184 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index c761124961cc..ce6afd931e91 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -289,8 +289,6 @@ enum rxrpc_conn_proto_state { RXRPC_CONN_SERVICE, /* Service secured connection */ RXRPC_CONN_REMOTELY_ABORTED, /* Conn aborted by peer */ RXRPC_CONN_LOCALLY_ABORTED, /* Conn aborted locally */ - RXRPC_CONN_NETWORK_ERROR, /* Conn terminated by network error */ - RXRPC_CONN_LOCAL_ERROR, /* Conn terminated by local error */ RXRPC_CONN__NR_STATES }; @@ -344,7 +342,6 @@ struct rxrpc_connection { enum rxrpc_conn_proto_state state : 8; /* current state of connection */ u32 local_abort; /* local abort code */ u32 remote_abort; /* remote abort code */ - int error; /* local error incurred */ int debug_id; /* debug ID for printks */ atomic_t serial; /* packet serial number counter */ unsigned int hi_serial; /* highest serial number received */ @@ -411,13 +408,22 @@ enum rxrpc_call_state { RXRPC_CALL_SERVER_ACK_REQUEST, /* - server pending ACK of request */ RXRPC_CALL_SERVER_SEND_REPLY, /* - server sending reply */ RXRPC_CALL_SERVER_AWAIT_ACK, /* - server awaiting final ACK */ - RXRPC_CALL_COMPLETE, /* - call completed */ + RXRPC_CALL_COMPLETE, /* - call complete */ + RXRPC_CALL_DEAD, /* - call is dead */ + NR__RXRPC_CALL_STATES +}; + +/* + * Call completion condition (state == RXRPC_CALL_COMPLETE). + */ +enum rxrpc_call_completion { + RXRPC_CALL_SUCCEEDED, /* - Normal termination */ RXRPC_CALL_SERVER_BUSY, /* - call rejected by busy server */ RXRPC_CALL_REMOTELY_ABORTED, /* - call aborted by peer */ RXRPC_CALL_LOCALLY_ABORTED, /* - call aborted locally on error or close */ + RXRPC_CALL_LOCAL_ERROR, /* - call failed due to local error */ RXRPC_CALL_NETWORK_ERROR, /* - call terminated by network error */ - RXRPC_CALL_DEAD, /* - call is dead */ - NR__RXRPC_CALL_STATES + NR__RXRPC_CALL_COMPLETIONS }; /* @@ -451,14 +457,13 @@ struct rxrpc_call { unsigned long events; spinlock_t lock; rwlock_t state_lock; /* lock for state transition */ + u32 abort_code; /* Local/remote abort code */ + int error; /* Local error incurred */ + enum rxrpc_call_state state : 8; /* current state of call */ + enum rxrpc_call_completion completion : 8; /* Call completion condition */ atomic_t usage; atomic_t skb_count; /* Outstanding packets on this call */ atomic_t sequence; /* Tx data packet sequence counter */ - u32 local_abort; /* local abort code */ - u32 remote_abort; /* remote abort code */ - int error_report; /* Network error (ICMP/local transport) */ - int error; /* Local error incurred */ - enum rxrpc_call_state state : 8; /* current state of call */ u16 service_id; /* service ID */ u32 call_id; /* call ID on connection */ u32 cid; /* connection ID plus channel index */ @@ -493,20 +498,6 @@ struct rxrpc_call { unsigned long ackr_window[RXRPC_ACKR_WINDOW_ASZ + 1]; }; -/* - * locally abort an RxRPC call - */ -static inline void rxrpc_abort_call(struct rxrpc_call *call, u32 abort_code) -{ - write_lock_bh(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE) { - call->local_abort = abort_code; - call->state = RXRPC_CALL_LOCALLY_ABORTED; - set_bit(RXRPC_CALL_EV_ABORT, &call->events); - } - write_unlock_bh(&call->state_lock); -} - #include /* @@ -534,6 +525,8 @@ void rxrpc_process_call(struct work_struct *); /* * call_object.c */ +extern const char *const rxrpc_call_states[]; +extern const char *const rxrpc_call_completions[]; extern unsigned int rxrpc_max_call_lifetime; extern unsigned int rxrpc_dead_call_expiry; extern struct kmem_cache *rxrpc_call_jar; @@ -563,6 +556,78 @@ static inline bool rxrpc_is_client_call(const struct rxrpc_call *call) return !rxrpc_is_service_call(call); } +/* + * Transition a call to the complete state. + */ +static inline bool __rxrpc_set_call_completion(struct rxrpc_call *call, + enum rxrpc_call_completion compl, + u32 abort_code, + int error) +{ + if (call->state < RXRPC_CALL_COMPLETE) { + call->abort_code = abort_code; + call->error = error; + call->completion = compl, + call->state = RXRPC_CALL_COMPLETE; + return true; + } + return false; +} + +static inline bool rxrpc_set_call_completion(struct rxrpc_call *call, + enum rxrpc_call_completion compl, + u32 abort_code, + int error) +{ + int ret; + + write_lock_bh(&call->state_lock); + ret = __rxrpc_set_call_completion(call, compl, abort_code, error); + write_unlock_bh(&call->state_lock); + return ret; +} + +/* + * Record that a call successfully completed. + */ +static inline void __rxrpc_call_completed(struct rxrpc_call *call) +{ + __rxrpc_set_call_completion(call, RXRPC_CALL_SUCCEEDED, 0, 0); +} + +static inline void rxrpc_call_completed(struct rxrpc_call *call) +{ + write_lock_bh(&call->state_lock); + __rxrpc_call_completed(call); + write_unlock_bh(&call->state_lock); +} + +/* + * Record that a call is locally aborted. + */ +static inline bool __rxrpc_abort_call(struct rxrpc_call *call, + u32 abort_code, int error) +{ + if (__rxrpc_set_call_completion(call, + RXRPC_CALL_LOCALLY_ABORTED, + abort_code, error)) { + set_bit(RXRPC_CALL_EV_ABORT, &call->events); + return true; + } + return false; +} + +static inline bool rxrpc_abort_call(struct rxrpc_call *call, + u32 abort_code, int error) +{ + bool ret; + + write_lock_bh(&call->state_lock); + ret = __rxrpc_abort_call(call, abort_code, error); + write_unlock_bh(&call->state_lock); + return ret; +} + /* * conn_client.c */ @@ -778,7 +843,6 @@ static inline void rxrpc_put_peer(struct rxrpc_peer *peer) /* * proc.c */ -extern const char *const rxrpc_call_states[]; extern const struct file_operations rxrpc_call_seq_fops; extern const struct file_operations rxrpc_connection_seq_fops; diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 669ac79d3b44..ef9ef0d6c917 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -329,12 +329,8 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx, case RXRPC_CALL_SERVER_ACCEPTING: call->state = RXRPC_CALL_SERVER_RECV_REQUEST; break; - case RXRPC_CALL_REMOTELY_ABORTED: - case RXRPC_CALL_LOCALLY_ABORTED: - ret = -ECONNABORTED; - goto out_release; - case RXRPC_CALL_NETWORK_ERROR: - ret = call->conn->error; + case RXRPC_CALL_COMPLETE: + ret = call->error; goto out_release; case RXRPC_CALL_DEAD: ret = -ETIME; @@ -403,17 +399,14 @@ int rxrpc_reject_call(struct rxrpc_sock *rx) write_lock_bh(&call->state_lock); switch (call->state) { case RXRPC_CALL_SERVER_ACCEPTING: - call->state = RXRPC_CALL_SERVER_BUSY; + __rxrpc_set_call_completion(call, RXRPC_CALL_SERVER_BUSY, + 0, ECONNABORTED); if (test_and_set_bit(RXRPC_CALL_EV_REJECT_BUSY, &call->events)) rxrpc_queue_call(call); ret = 0; goto out_release; - case RXRPC_CALL_REMOTELY_ABORTED: - case RXRPC_CALL_LOCALLY_ABORTED: - ret = -ECONNABORTED; - goto out_release; - case RXRPC_CALL_NETWORK_ERROR: - ret = call->conn->error; + case RXRPC_CALL_COMPLETE: + ret = call->error; goto out_release; case RXRPC_CALL_DEAD: ret = -ETIME; diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 5292bcfd8816..94c7751fd99a 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -95,7 +95,7 @@ cancel_timer: _debug("cancel timer %%%u", serial); try_to_del_timer_sync(&call->ack_timer); read_lock_bh(&call->state_lock); - if (call->state <= RXRPC_CALL_COMPLETE && + if (call->state < RXRPC_CALL_COMPLETE && !test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events)) rxrpc_queue_call(call); read_unlock_bh(&call->state_lock); @@ -123,7 +123,7 @@ static void rxrpc_set_resend(struct rxrpc_call *call, u8 resend, unsigned long resend_at) { read_lock_bh(&call->state_lock); - if (call->state >= RXRPC_CALL_COMPLETE) + if (call->state == RXRPC_CALL_COMPLETE) resend = 0; if (resend & 1) { @@ -230,7 +230,7 @@ static void rxrpc_resend_timer(struct rxrpc_call *call) _enter("%d,%d,%d", call->acks_tail, call->acks_unacked, call->acks_head); - if (call->state >= RXRPC_CALL_COMPLETE) + if (call->state == RXRPC_CALL_COMPLETE) return; resend = 0; @@ -711,7 +711,7 @@ all_acked: break; case RXRPC_CALL_SERVER_AWAIT_ACK: _debug("srv complete"); - call->state = RXRPC_CALL_COMPLETE; + __rxrpc_call_completed(call); post_ACK = true; break; case RXRPC_CALL_CLIENT_SEND_REQUEST: @@ -875,24 +875,22 @@ skip_msg_init: clear_bit(RXRPC_CALL_EV_REJECT_BUSY, &call->events); clear_bit(RXRPC_CALL_EV_ABORT, &call->events); - error = call->error_report; - if (error < RXRPC_LOCAL_ERROR_OFFSET) { + if (call->completion == RXRPC_CALL_NETWORK_ERROR) { mark = RXRPC_SKB_MARK_NET_ERROR; _debug("post net error %d", error); } else { mark = RXRPC_SKB_MARK_LOCAL_ERROR; - error -= RXRPC_LOCAL_ERROR_OFFSET; _debug("post net local error %d", error); } - if (rxrpc_post_message(call, mark, error, true) < 0) + if (rxrpc_post_message(call, mark, call->error, true) < 0) goto no_mem; clear_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events); goto kill_ACKs; } if (test_bit(RXRPC_CALL_EV_CONN_ABORT, &call->events)) { - ASSERTCMP(call->state, >, RXRPC_CALL_COMPLETE); + ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); clear_bit(RXRPC_CALL_EV_REJECT_BUSY, &call->events); clear_bit(RXRPC_CALL_EV_ABORT, &call->events); @@ -900,7 +898,7 @@ skip_msg_init: _debug("post conn abort"); if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR, - call->conn->error, true) < 0) + call->error, true) < 0) goto no_mem; clear_bit(RXRPC_CALL_EV_CONN_ABORT, &call->events); goto kill_ACKs; @@ -913,13 +911,13 @@ skip_msg_init: } if (test_bit(RXRPC_CALL_EV_ABORT, &call->events)) { - ASSERTCMP(call->state, >, RXRPC_CALL_COMPLETE); + ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR, - ECONNABORTED, true) < 0) + call->error, true) < 0) goto no_mem; whdr.type = RXRPC_PACKET_TYPE_ABORT; - data = htonl(call->local_abort); + data = htonl(call->abort_code); iov[1].iov_base = &data; iov[1].iov_len = sizeof(data); genbit = RXRPC_CALL_EV_ABORT; @@ -979,13 +977,7 @@ skip_msg_init: } if (test_bit(RXRPC_CALL_EV_LIFE_TIMER, &call->events)) { - write_lock_bh(&call->state_lock); - if (call->state <= RXRPC_CALL_COMPLETE) { - call->state = RXRPC_CALL_LOCALLY_ABORTED; - call->local_abort = RX_CALL_TIMEOUT; - set_bit(RXRPC_CALL_EV_ABORT, &call->events); - } - write_unlock_bh(&call->state_lock); + rxrpc_abort_call(call, RX_CALL_TIMEOUT, ETIME); _debug("post timeout"); if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR, @@ -998,7 +990,8 @@ skip_msg_init: /* deal with assorted inbound messages */ if (!skb_queue_empty(&call->rx_queue)) { - switch (rxrpc_process_rx_queue(call, &abort_code)) { + ret = rxrpc_process_rx_queue(call, &abort_code); + switch (ret) { case 0: case -EAGAIN: break; @@ -1007,7 +1000,7 @@ skip_msg_init: case -EKEYEXPIRED: case -EKEYREJECTED: case -EPROTO: - rxrpc_abort_call(call, abort_code); + rxrpc_abort_call(call, abort_code, -ret); goto kill_ACKs; } } @@ -1232,10 +1225,7 @@ send_message_2: goto kill_ACKs; case RXRPC_CALL_EV_ACK_FINAL: - write_lock_bh(&call->state_lock); - if (call->state == RXRPC_CALL_CLIENT_FINAL_ACK) - call->state = RXRPC_CALL_COMPLETE; - write_unlock_bh(&call->state_lock); + rxrpc_call_completed(call); goto kill_ACKs; default: diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index e7cbcc4a87cf..852c30dc7b75 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -30,7 +30,7 @@ unsigned int rxrpc_max_call_lifetime = 60 * HZ; unsigned int rxrpc_dead_call_expiry = 2 * HZ; const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = { - [RXRPC_CALL_UNINITIALISED] = "Uninit", + [RXRPC_CALL_UNINITIALISED] = "Uninit ", [RXRPC_CALL_CLIENT_AWAIT_CONN] = "ClWtConn", [RXRPC_CALL_CLIENT_SEND_REQUEST] = "ClSndReq", [RXRPC_CALL_CLIENT_AWAIT_REPLY] = "ClAwtRpl", @@ -43,11 +43,16 @@ const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = { [RXRPC_CALL_SERVER_SEND_REPLY] = "SvSndRpl", [RXRPC_CALL_SERVER_AWAIT_ACK] = "SvAwtACK", [RXRPC_CALL_COMPLETE] = "Complete", + [RXRPC_CALL_DEAD] = "Dead ", +}; + +const char *const rxrpc_call_completions[NR__RXRPC_CALL_COMPLETIONS] = { + [RXRPC_CALL_SUCCEEDED] = "Complete", [RXRPC_CALL_SERVER_BUSY] = "SvBusy ", [RXRPC_CALL_REMOTELY_ABORTED] = "RmtAbort", [RXRPC_CALL_LOCALLY_ABORTED] = "LocAbort", + [RXRPC_CALL_LOCAL_ERROR] = "LocError", [RXRPC_CALL_NETWORK_ERROR] = "NetError", - [RXRPC_CALL_DEAD] = "Dead ", }; struct kmem_cache *rxrpc_call_jar; @@ -358,7 +363,7 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx, _debug("CALL: %u { %s }", call->debug_id, rxrpc_call_states[call->state]); - if (call->state >= RXRPC_CALL_COMPLETE) { + if (call->state == RXRPC_CALL_COMPLETE) { __rxrpc_disconnect_call(conn, call); } else { spin_unlock(&conn->channel_lock); @@ -472,8 +477,7 @@ void rxrpc_release_call(struct rxrpc_call *call) if (call->state < RXRPC_CALL_COMPLETE && call->state != RXRPC_CALL_CLIENT_FINAL_ACK) { _debug("+++ ABORTING STATE %d +++\n", call->state); - call->state = RXRPC_CALL_LOCALLY_ABORTED; - call->local_abort = RX_CALL_DEAD; + __rxrpc_abort_call(call, RX_CALL_DEAD, ECONNRESET); } write_unlock_bh(&call->state_lock); @@ -538,20 +542,13 @@ static void rxrpc_mark_call_released(struct rxrpc_call *call) write_lock(&call->state_lock); if (call->state < RXRPC_CALL_DEAD) { - sched = false; - if (call->state < RXRPC_CALL_COMPLETE) { - _debug("abort call %p", call); - call->state = RXRPC_CALL_LOCALLY_ABORTED; - call->local_abort = RX_CALL_DEAD; - if (!test_and_set_bit(RXRPC_CALL_EV_ABORT, &call->events)) - sched = true; - } + sched = __rxrpc_abort_call(call, RX_CALL_DEAD, ECONNRESET); if (!test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events)) sched = true; - if (sched) - rxrpc_queue_call(call); } write_unlock(&call->state_lock); + if (sched) + rxrpc_queue_call(call); } /* @@ -749,16 +746,13 @@ static void rxrpc_call_life_expired(unsigned long _call) { struct rxrpc_call *call = (struct rxrpc_call *) _call; + _enter("{%d}", call->debug_id); + if (call->state >= RXRPC_CALL_COMPLETE) return; - _enter("{%d}", call->debug_id); - read_lock_bh(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE) { - set_bit(RXRPC_CALL_EV_LIFE_TIMER, &call->events); - rxrpc_queue_call(call); - } - read_unlock_bh(&call->state_lock); + set_bit(RXRPC_CALL_EV_LIFE_TIMER, &call->events); + rxrpc_queue_call(call); } /* @@ -791,9 +785,6 @@ static void rxrpc_ack_time_expired(unsigned long _call) if (call->state >= RXRPC_CALL_COMPLETE) return; - read_lock_bh(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE && - !test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events)) + if (!test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events)) rxrpc_queue_call(call); - read_unlock_bh(&call->state_lock); } diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 349402b08e5a..44850a2d90b5 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -741,7 +741,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call) * terminal retransmission without requiring access to the call. */ if (test_bit(RXRPC_CALL_EXPOSED, &call->flags)) { - _debug("exposed %u,%u", call->call_id, call->local_abort); + _debug("exposed %u,%u", call->call_id, call->abort_code); __rxrpc_disconnect_call(conn, call); } diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index bb81801fb805..bcea99c73b40 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -27,8 +27,8 @@ /* * Retransmit terminal ACK or ABORT of the previous call. */ -static void rxrpc_conn_retransmit(struct rxrpc_connection *conn, - struct sk_buff *skb) +static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, + struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_channel *chan; @@ -135,14 +135,21 @@ static void rxrpc_conn_retransmit(struct rxrpc_connection *conn, /* * pass a connection-level abort onto all calls on that connection */ -static void rxrpc_abort_calls(struct rxrpc_connection *conn, int state, - u32 abort_code) +static void rxrpc_abort_calls(struct rxrpc_connection *conn, + enum rxrpc_call_completion compl, + u32 abort_code, int error) { struct rxrpc_call *call; - int i; + bool queue; + int i, bit; _enter("{%d},%x", conn->debug_id, abort_code); + if (compl == RXRPC_CALL_LOCALLY_ABORTED) + bit = RXRPC_CALL_EV_CONN_ABORT; + else + bit = RXRPC_CALL_EV_RCVD_ABORT; + spin_lock(&conn->channel_lock); for (i = 0; i < RXRPC_MAXCALLS; i++) { @@ -151,20 +158,14 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, int state, lockdep_is_held(&conn->channel_lock)); if (call) { write_lock_bh(&call->state_lock); - if (call->state <= RXRPC_CALL_COMPLETE) { - call->state = state; - if (state == RXRPC_CALL_LOCALLY_ABORTED) { - call->local_abort = conn->local_abort; - set_bit(RXRPC_CALL_EV_CONN_ABORT, - &call->events); - } else { - call->remote_abort = conn->remote_abort; - set_bit(RXRPC_CALL_EV_RCVD_ABORT, - &call->events); - } - rxrpc_queue_call(call); + if (rxrpc_set_call_completion(call, compl, abort_code, + error)) { + set_bit(bit, &call->events); + queue = true; } write_unlock_bh(&call->state_lock); + if (queue) + rxrpc_queue_call(call); } } @@ -190,17 +191,16 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, /* generate a connection-level abort */ spin_lock_bh(&conn->state_lock); - if (conn->state < RXRPC_CONN_REMOTELY_ABORTED) { - conn->state = RXRPC_CONN_LOCALLY_ABORTED; - conn->error = error; - spin_unlock_bh(&conn->state_lock); - } else { + if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) { spin_unlock_bh(&conn->state_lock); _leave(" = 0 [already dead]"); return 0; } - rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, abort_code); + conn->state = RXRPC_CONN_LOCALLY_ABORTED; + spin_unlock_bh(&conn->state_lock); + + rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, abort_code, error); msg.msg_name = &conn->params.peer->srx.transport; msg.msg_namelen = conn->params.peer->srx.transport_len; @@ -280,7 +280,7 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, switch (sp->hdr.type) { case RXRPC_PACKET_TYPE_DATA: case RXRPC_PACKET_TYPE_ACK: - rxrpc_conn_retransmit(conn, skb); + rxrpc_conn_retransmit_call(conn, skb); rxrpc_free_skb(skb); return 0; @@ -291,7 +291,7 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, _proto("Rx ABORT %%%u { ac=%d }", sp->hdr.serial, abort_code); conn->state = RXRPC_CONN_REMOTELY_ABORTED; - rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, + rxrpc_abort_calls(conn, 0, RXRPC_CALL_REMOTELY_ABORTED, abort_code); return -ECONNABORTED; diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 5b45b6c367e7..9c6685b97e70 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -165,8 +165,8 @@ void __rxrpc_disconnect_call(struct rxrpc_connection *conn, * through the channel, whilst disposing of the actual call record. */ chan->last_service_id = call->service_id; - if (call->local_abort) { - chan->last_abort = call->local_abort; + if (call->abort_code) { + chan->last_abort = call->abort_code; chan->last_type = RXRPC_PACKET_TYPE_ABORT; } else { chan->last_seq = call->rx_data_eaten; diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 5e683dd21ab9..af49c2992c4a 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -341,14 +341,13 @@ void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb) abort_code = ntohl(wtmp); _proto("Rx ABORT %%%u { %x }", sp->hdr.serial, abort_code); - write_lock_bh(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE) { - call->state = RXRPC_CALL_REMOTELY_ABORTED; - call->remote_abort = abort_code; + if (__rxrpc_set_call_completion(call, + RXRPC_CALL_REMOTELY_ABORTED, + abort_code, ECONNABORTED)) { set_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events); rxrpc_queue_call(call); } - goto free_packet_unlock; + goto free_packet; case RXRPC_PACKET_TYPE_BUSY: _proto("Rx BUSY %%%u", sp->hdr.serial); @@ -359,7 +358,9 @@ void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb) write_lock_bh(&call->state_lock); switch (call->state) { case RXRPC_CALL_CLIENT_SEND_REQUEST: - call->state = RXRPC_CALL_SERVER_BUSY; + __rxrpc_set_call_completion(call, + RXRPC_CALL_SERVER_BUSY, + 0, EBUSY); set_bit(RXRPC_CALL_EV_RCVD_BUSY, &call->events); rxrpc_queue_call(call); case RXRPC_CALL_SERVER_BUSY: @@ -415,12 +416,8 @@ protocol_error: _debug("protocol error"); write_lock_bh(&call->state_lock); protocol_error_locked: - if (call->state <= RXRPC_CALL_COMPLETE) { - call->state = RXRPC_CALL_LOCALLY_ABORTED; - call->local_abort = RX_PROTOCOL_ERROR; - set_bit(RXRPC_CALL_EV_ABORT, &call->events); + if (__rxrpc_abort_call(call, RX_PROTOCOL_ERROR, EPROTO)) rxrpc_queue_call(call); - } free_packet_unlock: write_unlock_bh(&call->state_lock); free_packet: @@ -486,14 +483,8 @@ protocol_error: _debug("protocol error"); rxrpc_free_skb(part); rxrpc_free_skb(jumbo); - write_lock_bh(&call->state_lock); - if (call->state <= RXRPC_CALL_COMPLETE) { - call->state = RXRPC_CALL_LOCALLY_ABORTED; - call->local_abort = RX_PROTOCOL_ERROR; - set_bit(RXRPC_CALL_EV_ABORT, &call->events); + if (rxrpc_abort_call(call, RX_PROTOCOL_ERROR, EPROTO)) rxrpc_queue_call(call); - } - write_unlock_bh(&call->state_lock); _leave(""); } @@ -514,26 +505,28 @@ static void rxrpc_post_packet_to_call(struct rxrpc_call *call, read_lock(&call->state_lock); switch (call->state) { - case RXRPC_CALL_LOCALLY_ABORTED: - if (!test_and_set_bit(RXRPC_CALL_EV_ABORT, &call->events)) { - rxrpc_queue_call(call); - goto free_unlock; - } - case RXRPC_CALL_REMOTELY_ABORTED: - case RXRPC_CALL_NETWORK_ERROR: case RXRPC_CALL_DEAD: goto dead_call; + case RXRPC_CALL_COMPLETE: - case RXRPC_CALL_CLIENT_FINAL_ACK: - /* complete server call */ - if (rxrpc_conn_is_service(call->conn)) + switch (call->completion) { + case RXRPC_CALL_LOCALLY_ABORTED: + if (!test_and_set_bit(RXRPC_CALL_EV_ABORT, + &call->events)) { + rxrpc_queue_call(call); + goto free_unlock; + } + default: goto dead_call; - /* resend last packet of a completed call */ - _debug("final ack again"); - rxrpc_get_call(call); - set_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events); - rxrpc_queue_call(call); - goto free_unlock; + case RXRPC_CALL_SUCCEEDED: + if (rxrpc_conn_is_service(call->conn)) + goto dead_call; + goto resend_final_ack; + } + + case RXRPC_CALL_CLIENT_FINAL_ACK: + goto resend_final_ack; + default: break; } @@ -550,6 +543,13 @@ static void rxrpc_post_packet_to_call(struct rxrpc_call *call, rxrpc_put_call(call); goto done; +resend_final_ack: + _debug("final ack again"); + rxrpc_get_call(call); + set_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events); + rxrpc_queue_call(call); + goto free_unlock; + dead_call: if (sp->hdr.type != RXRPC_PACKET_TYPE_ABORT) { skb->priority = RX_CALL_DEAD; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 8a9917cba6fe..036e1112b0c5 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -115,12 +115,12 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, */ static void rxrpc_send_abort(struct rxrpc_call *call, u32 abort_code) { + if (call->state >= RXRPC_CALL_COMPLETE) + return; + write_lock_bh(&call->state_lock); - if (call->state <= RXRPC_CALL_COMPLETE) { - call->state = RXRPC_CALL_LOCALLY_ABORTED; - call->local_abort = abort_code; - set_bit(RXRPC_CALL_EV_ABORT, &call->events); + if (__rxrpc_abort_call(call, abort_code, ECONNABORTED)) { del_timer_sync(&call->resend_timer); del_timer_sync(&call->ack_timer); clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events); @@ -212,7 +212,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) if (call->state >= RXRPC_CALL_COMPLETE) { /* it's too late for this call */ - ret = -ECONNRESET; + ret = -ESHUTDOWN; } else if (cmd == RXRPC_CMD_SEND_ABORT) { rxrpc_send_abort(call, abort_code); ret = 0; @@ -295,8 +295,7 @@ void rxrpc_kernel_abort_call(struct rxrpc_call *call, u32 abort_code) _debug("CALL %d USR %lx ST %d on CONN %p", call->debug_id, call->user_call_ID, call->state, call->conn); - if (call->state < RXRPC_CALL_COMPLETE) - rxrpc_send_abort(call, abort_code); + rxrpc_send_abort(call, abort_code); release_sock(&call->socket->sk); _leave(""); @@ -640,8 +639,8 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, /* check for the far side aborting the call or a network error * occurring */ - if (call->state > RXRPC_CALL_COMPLETE) - goto call_aborted; + if (call->state == RXRPC_CALL_COMPLETE) + goto call_terminated; /* add the packet to the send queue if it's now full */ if (sp->remain <= 0 || @@ -702,15 +701,9 @@ out: _leave(" = %d", ret); return ret; -call_aborted: +call_terminated: rxrpc_free_skb(skb); - if (call->state == RXRPC_CALL_NETWORK_ERROR) - ret = call->error_report < RXRPC_LOCAL_ERROR_OFFSET ? - call->error_report : - call->error_report - RXRPC_LOCAL_ERROR_OFFSET; - else - ret = -ECONNABORTED; - _leave(" = %d", ret); + _leave(" = %d", -call->error); return ret; maybe_error: diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index 8940674b5e08..865078d76ad3 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -248,13 +248,21 @@ void rxrpc_peer_error_distributor(struct work_struct *work) struct rxrpc_peer *peer = container_of(work, struct rxrpc_peer, error_distributor); struct rxrpc_call *call; - int error_report; + enum rxrpc_call_completion compl; + bool queue; + int error; _enter(""); - error_report = READ_ONCE(peer->error_report); + error = READ_ONCE(peer->error_report); + if (error < RXRPC_LOCAL_ERROR_OFFSET) { + compl = RXRPC_CALL_NETWORK_ERROR; + } else { + compl = RXRPC_CALL_LOCAL_ERROR; + error -= RXRPC_LOCAL_ERROR_OFFSET; + } - _debug("ISSUE ERROR %d", error_report); + _debug("ISSUE ERROR %s %d", rxrpc_call_completions[compl], error); spin_lock_bh(&peer->lock); @@ -263,15 +271,15 @@ void rxrpc_peer_error_distributor(struct work_struct *work) struct rxrpc_call, error_link); hlist_del_init(&call->error_link); + queue = false; write_lock(&call->state_lock); - if (call->state != RXRPC_CALL_COMPLETE && - call->state < RXRPC_CALL_NETWORK_ERROR) { - call->error_report = error_report; - call->state = RXRPC_CALL_NETWORK_ERROR; + if (__rxrpc_set_call_completion(call, compl, 0, error)) { set_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events); - rxrpc_queue_call(call); + queue = true; } write_unlock(&call->state_lock); + if (queue) + rxrpc_queue_call(call); } spin_unlock_bh(&peer->lock); diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index 060fb4892c39..82c64055449d 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -22,7 +22,6 @@ static const char *const rxrpc_conn_states[RXRPC_CONN__NR_STATES] = { [RXRPC_CONN_SERVICE] = "SvSecure", [RXRPC_CONN_REMOTELY_ABORTED] = "RmtAbort", [RXRPC_CONN_LOCALLY_ABORTED] = "LocAbort", - [RXRPC_CONN_NETWORK_ERROR] = "NetError", }; /* @@ -94,7 +93,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) rxrpc_is_service_call(call) ? "Svc" : "Clt", atomic_read(&call->usage), rxrpc_call_states[call->state], - call->remote_abort ?: call->local_abort, + call->abort_code, call->user_call_ID); return 0; diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index b964c2d49a88..96d98a3a7087 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -294,12 +294,17 @@ receive_non_data_message: ret = put_cmsg(msg, SOL_RXRPC, RXRPC_BUSY, 0, &abort_code); break; case RXRPC_SKB_MARK_REMOTE_ABORT: - abort_code = call->remote_abort; + abort_code = call->abort_code; ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &abort_code); break; case RXRPC_SKB_MARK_LOCAL_ABORT: - abort_code = call->local_abort; + abort_code = call->abort_code; ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &abort_code); + if (call->error) { + abort_code = call->error; + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_LOCAL_ERROR, 4, + &abort_code); + } break; case RXRPC_SKB_MARK_NET_ERROR: _debug("RECV NET ERROR %d", sp->error); @@ -392,9 +397,8 @@ u32 rxrpc_kernel_get_abort_code(struct sk_buff *skb) switch (skb->mark) { case RXRPC_SKB_MARK_REMOTE_ABORT: - return sp->call->remote_abort; case RXRPC_SKB_MARK_LOCAL_ABORT: - return sp->call->local_abort; + return sp->call->abort_code; default: BUG(); } From e34d4234b0b77a8a8b6dd7cf29aff468c288d9e4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 30 Aug 2016 09:49:29 +0100 Subject: [PATCH 0572/1715] rxrpc: Trace rxrpc_call usage Add a trace event for debuging rxrpc_call struct usage. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 39 +++++++++++++++ net/rxrpc/ar-internal.h | 19 ++------ net/rxrpc/call_accept.c | 5 +- net/rxrpc/call_event.c | 11 ++--- net/rxrpc/call_object.c | 92 ++++++++++++++++++++++++++++++++---- net/rxrpc/conn_client.c | 1 + net/rxrpc/conn_event.c | 1 + net/rxrpc/input.c | 4 +- net/rxrpc/output.c | 1 + net/rxrpc/peer_event.c | 1 + net/rxrpc/recvmsg.c | 1 + net/rxrpc/skbuff.c | 4 +- 12 files changed, 144 insertions(+), 35 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 15283ee3e41a..cbe574ea674b 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -16,6 +16,45 @@ #include +TRACE_EVENT(rxrpc_call, + TP_PROTO(struct rxrpc_call *call, int op, int usage, int nskb, + const void *where, const void *aux), + + TP_ARGS(call, op, usage, nskb, where, aux), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(int, op ) + __field(int, usage ) + __field(int, nskb ) + __field(const void *, where ) + __field(const void *, aux ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->op = op; + __entry->usage = usage; + __entry->nskb = nskb; + __entry->where = where; + __entry->aux = aux; + ), + + TP_printk("c=%p %s u=%d s=%d p=%pSR a=%p", + __entry->call, + (__entry->op == 0 ? "NWc" : + __entry->op == 1 ? "NWs" : + __entry->op == 2 ? "SEE" : + __entry->op == 3 ? "GET" : + __entry->op == 4 ? "Gsb" : + __entry->op == 5 ? "PUT" : + "Psb"), + __entry->usage, + __entry->nskb, + __entry->where, + __entry->aux) + ); + TRACE_EVENT(rxrpc_skb, TP_PROTO(struct sk_buff *skb, int op, int usage, int mod_count, const void *where), diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index ce6afd931e91..0c320b2b7b43 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -543,7 +543,11 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *, struct sk_buff *); void rxrpc_release_call(struct rxrpc_call *); void rxrpc_release_calls_on_socket(struct rxrpc_sock *); -void __rxrpc_put_call(struct rxrpc_call *); +void rxrpc_see_call(struct rxrpc_call *); +void rxrpc_get_call(struct rxrpc_call *); +void rxrpc_put_call(struct rxrpc_call *); +void rxrpc_get_call_for_skb(struct rxrpc_call *, struct sk_buff *); +void rxrpc_put_call_for_skb(struct rxrpc_call *, struct sk_buff *); void __exit rxrpc_destroy_all_calls(void); static inline bool rxrpc_is_service_call(const struct rxrpc_call *call) @@ -1022,16 +1026,3 @@ do { \ } while (0) #endif /* __KDEBUGALL */ - - -#define rxrpc_get_call(CALL) \ -do { \ - CHECK_SLAB_OKAY(&(CALL)->usage); \ - if (atomic_inc_return(&(CALL)->usage) == 1) \ - BUG(); \ -} while (0) - -#define rxrpc_put_call(CALL) \ -do { \ - __rxrpc_put_call(CALL); \ -} while (0) diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index ef9ef0d6c917..03af88fe798b 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -129,8 +129,7 @@ static int rxrpc_accept_incoming_call(struct rxrpc_local *local, _debug("conn ready"); call->state = RXRPC_CALL_SERVER_ACCEPTING; list_add_tail(&call->accept_link, &rx->acceptq); - rxrpc_get_call(call); - atomic_inc(&call->skb_count); + rxrpc_get_call_for_skb(call, notification); nsp = rxrpc_skb(notification); nsp->call = call; @@ -323,6 +322,7 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx, call = list_entry(rx->acceptq.next, struct rxrpc_call, accept_link); list_del_init(&call->accept_link); sk_acceptq_removed(&rx->sk); + rxrpc_see_call(call); write_lock_bh(&call->state_lock); switch (call->state) { @@ -395,6 +395,7 @@ int rxrpc_reject_call(struct rxrpc_sock *rx) call = list_entry(rx->acceptq.next, struct rxrpc_call, accept_link); list_del_init(&call->accept_link); sk_acceptq_removed(&rx->sk); + rxrpc_see_call(call); write_lock_bh(&call->state_lock); switch (call->state) { diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 94c7751fd99a..02fe4a4b60d9 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -465,8 +465,7 @@ static void rxrpc_insert_oos_packet(struct rxrpc_call *call, skb->destructor = rxrpc_packet_destructor; ASSERTCMP(sp->call, ==, NULL); sp->call = call; - rxrpc_get_call(call); - atomic_inc(&call->skb_count); + rxrpc_get_call_for_skb(call, skb); /* insert into the buffer in sequence order */ spin_lock_bh(&call->lock); @@ -741,8 +740,7 @@ all_acked: _debug("post ACK"); skb->mark = RXRPC_SKB_MARK_FINAL_ACK; sp->call = call; - rxrpc_get_call(call); - atomic_inc(&call->skb_count); + rxrpc_get_call_for_skb(call, skb); spin_lock_bh(&call->lock); if (rxrpc_queue_rcv_skb(call, skb, true, true) < 0) BUG(); @@ -801,8 +799,7 @@ static int rxrpc_post_message(struct rxrpc_call *call, u32 mark, u32 error, memset(sp, 0, sizeof(*sp)); sp->error = error; sp->call = call; - rxrpc_get_call(call); - atomic_inc(&call->skb_count); + rxrpc_get_call_for_skb(call, skb); spin_lock_bh(&call->lock); ret = rxrpc_queue_rcv_skb(call, skb, true, fatal); @@ -834,6 +831,8 @@ void rxrpc_process_call(struct work_struct *work) u32 serial, abort_code = RX_PROTOCOL_ERROR; u8 *acks = NULL; + rxrpc_see_call(call); + //printk("\n--------------------\n"); _enter("{%d,%s,%lx} [%lu]", call->debug_id, rxrpc_call_states[call->state], call->events, diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 852c30dc7b75..104ee8b1de06 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -219,6 +219,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, { struct rxrpc_call *call, *xcall; struct rb_node *parent, **pp; + const void *here = __builtin_return_address(0); int ret; _enter("%p,%lx", rx, user_call_ID); @@ -229,6 +230,9 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, return call; } + trace_rxrpc_call(call, 0, atomic_read(&call->usage), 0, here, + (const void *)user_call_ID); + /* Publish the call, even though it is incompletely set up as yet */ call->user_call_ID = user_call_ID; __set_bit(RXRPC_CALL_HAS_USERID, &call->flags); @@ -308,6 +312,7 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx, { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_call *call, *candidate; + const void *here = __builtin_return_address(0); u32 call_id, chan; _enter(",%d", conn->debug_id); @@ -318,6 +323,9 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx, if (!candidate) return ERR_PTR(-EBUSY); + trace_rxrpc_call(candidate, 1, atomic_read(&candidate->usage), + 0, here, NULL); + chan = sp->hdr.cid & RXRPC_CHANNELMASK; candidate->socket = rx; candidate->conn = conn; @@ -430,6 +438,44 @@ old_call: return ERR_PTR(-ECONNRESET); } +/* + * Note the re-emergence of a call. + */ +void rxrpc_see_call(struct rxrpc_call *call) +{ + const void *here = __builtin_return_address(0); + if (call) { + int n = atomic_read(&call->usage); + int m = atomic_read(&call->skb_count); + + trace_rxrpc_call(call, 2, n, m, here, 0); + } +} + +/* + * Note the addition of a ref on a call. + */ +void rxrpc_get_call(struct rxrpc_call *call) +{ + const void *here = __builtin_return_address(0); + int n = atomic_inc_return(&call->usage); + int m = atomic_read(&call->skb_count); + + trace_rxrpc_call(call, 3, n, m, here, 0); +} + +/* + * Note the addition of a ref on a call for a socket buffer. + */ +void rxrpc_get_call_for_skb(struct rxrpc_call *call, struct sk_buff *skb) +{ + const void *here = __builtin_return_address(0); + int n = atomic_inc_return(&call->usage); + int m = atomic_inc_return(&call->skb_count); + + trace_rxrpc_call(call, 4, n, m, here, skb); +} + /* * detach a call from a socket and set up for release */ @@ -443,6 +489,8 @@ void rxrpc_release_call(struct rxrpc_call *call) atomic_read(&call->ackr_not_idle), call->rx_first_oos); + rxrpc_see_call(call); + spin_lock_bh(&call->lock); if (test_and_set_bit(RXRPC_CALL_RELEASED, &call->flags)) BUG(); @@ -526,6 +574,7 @@ static void rxrpc_dead_call_expired(unsigned long _call) _enter("{%d}", call->debug_id); + rxrpc_see_call(call); write_lock_bh(&call->state_lock); call->state = RXRPC_CALL_DEAD; write_unlock_bh(&call->state_lock); @@ -540,6 +589,7 @@ static void rxrpc_mark_call_released(struct rxrpc_call *call) { bool sched; + rxrpc_see_call(call); write_lock(&call->state_lock); if (call->state < RXRPC_CALL_DEAD) { sched = __rxrpc_abort_call(call, RX_CALL_DEAD, ECONNRESET); @@ -585,21 +635,43 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) /* * release a call */ -void __rxrpc_put_call(struct rxrpc_call *call) +void rxrpc_put_call(struct rxrpc_call *call) { + const void *here = __builtin_return_address(0); + int n, m; + ASSERT(call != NULL); - _enter("%p{u=%d}", call, atomic_read(&call->usage)); - - ASSERTCMP(atomic_read(&call->usage), >, 0); - - if (atomic_dec_and_test(&call->usage)) { + n = atomic_dec_return(&call->usage); + m = atomic_read(&call->skb_count); + trace_rxrpc_call(call, 5, n, m, here, NULL); + ASSERTCMP(n, >=, 0); + if (n == 0) { _debug("call %d dead", call->debug_id); - WARN_ON(atomic_read(&call->skb_count) != 0); + WARN_ON(m != 0); + ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD); + rxrpc_queue_work(&call->destroyer); + } +} + +/* + * Release a call ref held by a socket buffer. + */ +void rxrpc_put_call_for_skb(struct rxrpc_call *call, struct sk_buff *skb) +{ + const void *here = __builtin_return_address(0); + int n, m; + + n = atomic_dec_return(&call->usage); + m = atomic_dec_return(&call->skb_count); + trace_rxrpc_call(call, 6, n, m, here, skb); + ASSERTCMP(n, >=, 0); + if (n == 0) { + _debug("call %d dead", call->debug_id); + WARN_ON(m != 0); ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD); rxrpc_queue_work(&call->destroyer); } - _leave(""); } /* @@ -705,6 +777,7 @@ void __exit rxrpc_destroy_all_calls(void) call = list_entry(rxrpc_calls.next, struct rxrpc_call, link); _debug("Zapping call %p", call); + rxrpc_see_call(call); list_del_init(&call->link); switch (atomic_read(&call->usage)) { @@ -748,6 +821,7 @@ static void rxrpc_call_life_expired(unsigned long _call) _enter("{%d}", call->debug_id); + rxrpc_see_call(call); if (call->state >= RXRPC_CALL_COMPLETE) return; @@ -765,6 +839,7 @@ static void rxrpc_resend_time_expired(unsigned long _call) _enter("{%d}", call->debug_id); + rxrpc_see_call(call); if (call->state >= RXRPC_CALL_COMPLETE) return; @@ -782,6 +857,7 @@ static void rxrpc_ack_time_expired(unsigned long _call) _enter("{%d}", call->debug_id); + rxrpc_see_call(call); if (call->state >= RXRPC_CALL_COMPLETE) return; diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 44850a2d90b5..4b213bc0f554 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -537,6 +537,7 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn, struct rxrpc_call, chan_wait_link); u32 call_id = chan->call_counter + 1; + rxrpc_see_call(call); list_del_init(&call->chan_wait_link); conn->active_chans |= 1 << channel; call->peer = rxrpc_get_peer(conn->params.peer); diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index bcea99c73b40..bc9b05938ff5 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -157,6 +157,7 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, conn->channels[i].call, lockdep_is_held(&conn->channel_lock)); if (call) { + rxrpc_see_call(call); write_lock_bh(&call->state_lock); if (rxrpc_set_call_completion(call, compl, abort_code, error)) { diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index af49c2992c4a..86bea9ad6c3d 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -196,8 +196,7 @@ static int rxrpc_fast_process_data(struct rxrpc_call *call, goto enqueue_packet; sp->call = call; - rxrpc_get_call(call); - atomic_inc(&call->skb_count); + rxrpc_get_call_for_skb(call, skb); terminal = ((flags & RXRPC_LAST_PACKET) && !(flags & RXRPC_CLIENT_INITIATED)); ret = rxrpc_queue_rcv_skb(call, skb, false, terminal); @@ -748,6 +747,7 @@ void rxrpc_data_ready(struct sock *sk) if (!call || atomic_read(&call->usage) == 0) goto cant_route_call; + rxrpc_see_call(call); rxrpc_post_packet_to_call(call, skb); goto out_unlock; } diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 036e1112b0c5..888fa87ed1d6 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -207,6 +207,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) return PTR_ERR(call); } + rxrpc_see_call(call); _debug("CALL %d USR %lx ST %d on CONN %p", call->debug_id, call->user_call_ID, call->state, call->conn); diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index 865078d76ad3..27b9ecad007e 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -270,6 +270,7 @@ void rxrpc_peer_error_distributor(struct work_struct *work) call = hlist_entry(peer->error_targets.first, struct rxrpc_call, error_link); hlist_del_init(&call->error_link); + rxrpc_see_call(call); queue = false; write_lock(&call->state_lock); diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 96d98a3a7087..c9b38c7fb448 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -115,6 +115,7 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, sp = rxrpc_skb(skb); call = sp->call; ASSERT(call != NULL); + rxrpc_see_call(call); _debug("next pkt %s", rxrpc_pkts[sp->hdr.type]); diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c index fbd8c74d9505..20529205bb8c 100644 --- a/net/rxrpc/skbuff.c +++ b/net/rxrpc/skbuff.c @@ -140,9 +140,7 @@ void rxrpc_packet_destructor(struct sk_buff *skb) _enter("%p{%p}", skb, call); if (call) { - if (atomic_dec_return(&call->skb_count) < 0) - BUG(); - rxrpc_put_call(call); + rxrpc_put_call_for_skb(call, skb); sp->call = NULL; } From 378c9c9603a48135336f9440995e5a342fbc5afa Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 30 Aug 2016 09:49:29 +0100 Subject: [PATCH 0573/1715] afs: Miscellaneous simple cleanups Remove one #ifndef'd-out variable and a couple of excessive blank lines. Signed-off-by: David Howells --- fs/afs/cmservice.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 85737e96ab8b..ca32d891bbc3 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -17,10 +17,6 @@ #include "internal.h" #include "afs_cm.h" -#if 0 -struct workqueue_struct *afs_cm_workqueue; -#endif /* 0 */ - static int afs_deliver_cb_init_call_back_state(struct afs_call *, struct sk_buff *, bool); static int afs_deliver_cb_init_call_back_state3(struct afs_call *, @@ -282,7 +278,6 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, break; } - call->state = AFS_CALL_REPLYING; /* we'll need the file server record as that tells us which set of @@ -426,7 +421,6 @@ static void SRXAFSCB_ProbeUuid(struct work_struct *work) _enter(""); - if (memcmp(r, &afs_uuid, sizeof(afs_uuid)) == 0) reply.match = htonl(0); else From e0661dfc5961cf14f255fa5466041a961ca2ebdf Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 30 Aug 2016 16:05:14 +0100 Subject: [PATCH 0574/1715] afs: Need linux/random.h We should #include linux/random.h to use get_random(). Signed-off-by: David Howells --- fs/afs/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/afs/main.c b/fs/afs/main.c index 35de0c04729f..0b187ef3b5b7 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "internal.h" MODULE_DESCRIPTION("AFS Client File System"); From 8324f0bcfbfc645cf248e4b93ab58341b7d3b135 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 30 Aug 2016 09:49:29 +0100 Subject: [PATCH 0575/1715] rxrpc: Provide a way for AFS to ask for the peer address of a call Provide a function so that kernel users, such as AFS, can ask for the peer address of a call: void rxrpc_kernel_get_peer(struct rxrpc_call *call, struct sockaddr_rxrpc *_srx); In the future the kernel service won't get sk_buffs to look inside. Further, this allows us to hide any canonicalisation inside AF_RXRPC for when IPv6 support is added. Also propagate this through to afs_find_server() and issue a warning if we can't handle the address family yet. Signed-off-by: David Howells --- Documentation/networking/rxrpc.txt | 7 +++++++ fs/afs/cmservice.c | 20 +++++++++++--------- fs/afs/internal.h | 5 ++++- fs/afs/rxrpc.c | 2 +- fs/afs/server.c | 11 ++++++++--- include/net/af_rxrpc.h | 2 ++ net/rxrpc/peer_object.c | 15 +++++++++++++++ 7 files changed, 48 insertions(+), 14 deletions(-) diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt index 70c926ae212d..dfe0b008df74 100644 --- a/Documentation/networking/rxrpc.txt +++ b/Documentation/networking/rxrpc.txt @@ -868,6 +868,13 @@ The kernel interface functions are as follows: This is used to allocate a null RxRPC key that can be used to indicate anonymous security for a particular domain. + (*) Get the peer address of a call. + + void rxrpc_kernel_get_peer(struct socket *sock, struct rxrpc_call *call, + struct sockaddr_rxrpc *_srx); + + This is used to find the remote peer address of a call. + ======================= CONFIGURABLE PARAMETERS diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index ca32d891bbc3..77ee481059ac 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -167,9 +167,9 @@ static void SRXAFSCB_CallBack(struct work_struct *work) static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, bool last) { + struct sockaddr_rxrpc srx; struct afs_callback *cb; struct afs_server *server; - struct in_addr addr; __be32 *bp; u32 tmp; int ret, loop; @@ -178,6 +178,7 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, switch (call->unmarshall) { case 0: + rxrpc_kernel_get_peer(afs_socket, call->rxcall, &srx); call->offset = 0; call->unmarshall++; @@ -282,8 +283,7 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, /* we'll need the file server record as that tells us which set of * vnodes to operate upon */ - memcpy(&addr, &ip_hdr(skb)->saddr, 4); - server = afs_find_server(&addr); + server = afs_find_server(&srx); if (!server) return -ENOTCONN; call->server = server; @@ -314,12 +314,14 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call, struct sk_buff *skb, bool last) { + struct sockaddr_rxrpc srx; struct afs_server *server; - struct in_addr addr; int ret; _enter(",{%u},%d", skb->len, last); + rxrpc_kernel_get_peer(afs_socket, call->rxcall, &srx); + ret = afs_data_complete(call, skb, last); if (ret < 0) return ret; @@ -329,8 +331,7 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call, /* we'll need the file server record as that tells us which set of * vnodes to operate upon */ - memcpy(&addr, &ip_hdr(skb)->saddr, 4); - server = afs_find_server(&addr); + server = afs_find_server(&srx); if (!server) return -ENOTCONN; call->server = server; @@ -347,11 +348,13 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call, struct sk_buff *skb, bool last) { + struct sockaddr_rxrpc srx; struct afs_server *server; - struct in_addr addr; _enter(",{%u},%d", skb->len, last); + rxrpc_kernel_get_peer(afs_socket, call->rxcall, &srx); + /* There are some arguments that we ignore */ afs_data_consumed(call, skb); if (!last) @@ -362,8 +365,7 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call, /* we'll need the file server record as that tells us which set of * vnodes to operate upon */ - memcpy(&addr, &ip_hdr(skb)->saddr, 4); - server = afs_find_server(&addr); + server = afs_find_server(&srx); if (!server) return -ENOTCONN; call->server = server; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index df976b2a7f40..d97552de9c59 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -20,6 +20,7 @@ #include #include #include +#include #include "afs.h" #include "afs_vl.h" @@ -607,6 +608,8 @@ extern void afs_proc_cell_remove(struct afs_cell *); /* * rxrpc.c */ +extern struct socket *afs_socket; + extern int afs_open_socket(void); extern void afs_close_socket(void); extern void afs_data_consumed(struct afs_call *, struct sk_buff *); @@ -654,7 +657,7 @@ do { \ extern struct afs_server *afs_lookup_server(struct afs_cell *, const struct in_addr *); -extern struct afs_server *afs_find_server(const struct in_addr *); +extern struct afs_server *afs_find_server(const struct sockaddr_rxrpc *); extern void afs_put_server(struct afs_server *); extern void __exit afs_purge_servers(void); diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 14d04c848465..a1916750e2f9 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -16,7 +16,7 @@ #include "internal.h" #include "afs_cm.h" -static struct socket *afs_socket; /* my RxRPC socket */ +struct socket *afs_socket; /* my RxRPC socket */ static struct workqueue_struct *afs_async_calls; static atomic_t afs_outstanding_calls; static atomic_t afs_outstanding_skbs; diff --git a/fs/afs/server.c b/fs/afs/server.c index f342acf3547d..d4066ab7dd55 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -178,13 +178,18 @@ server_in_two_cells: /* * look up a server by its IP address */ -struct afs_server *afs_find_server(const struct in_addr *_addr) +struct afs_server *afs_find_server(const struct sockaddr_rxrpc *srx) { struct afs_server *server = NULL; struct rb_node *p; - struct in_addr addr = *_addr; + struct in_addr addr = srx->transport.sin.sin_addr; - _enter("%pI4", &addr.s_addr); + _enter("{%d,%pI4}", srx->transport.family, &addr.s_addr); + + if (srx->transport.family != AF_INET) { + WARN(true, "AFS does not yes support non-IPv4 addresses\n"); + return NULL; + } read_lock(&afs_servers_lock); diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index 7b0f88699b25..f9224e835d43 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -49,5 +49,7 @@ int rxrpc_kernel_get_error_number(struct sk_buff *); void rxrpc_kernel_free_skb(struct sk_buff *); struct rxrpc_call *rxrpc_kernel_accept_call(struct socket *, unsigned long); int rxrpc_kernel_reject_call(struct socket *); +void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *, + struct sockaddr_rxrpc *); #endif /* _NET_RXRPC_H */ diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 538e9831c699..aebc73ac16dc 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -313,3 +313,18 @@ void __rxrpc_put_peer(struct rxrpc_peer *peer) kfree_rcu(peer, rcu); } + +/** + * rxrpc_kernel_get_peer - Get the peer address of a call + * @sock: The socket on which the call is in progress. + * @call: The call to query + * @_srx: Where to place the result + * + * Get the address of the remote peer in a call. + */ +void rxrpc_kernel_get_peer(struct socket *sock, struct rxrpc_call *call, + struct sockaddr_rxrpc *_srx) +{ + *_srx = call->peer->srx; +} +EXPORT_SYMBOL(rxrpc_kernel_get_peer); From ea82aaec9879e4df307ccbbf26491a8e0a52e4f1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 30 Aug 2016 12:36:06 +0100 Subject: [PATCH 0576/1715] rxrpc: Use call->peer rather than going to the connection Use call->peer rather than call->conn->params.peer as call->conn may become NULL. Signed-off-by: David Howells --- net/rxrpc/call_event.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 02fe4a4b60d9..de72de662044 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -551,7 +551,7 @@ static void rxrpc_extract_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, mtu = min(ntohl(ackinfo.rxMTU), ntohl(ackinfo.maxMTU)); - peer = call->conn->params.peer; + peer = call->peer; if (mtu < peer->maxdata) { spin_lock_bh(&peer->lock); peer->maxdata = mtu; @@ -843,8 +843,8 @@ void rxrpc_process_call(struct work_struct *work) /* there's a good chance we're going to have to send a message, so set * one up in advance */ - msg.msg_name = &call->conn->params.peer->srx.transport; - msg.msg_namelen = call->conn->params.peer->srx.transport_len; + msg.msg_name = &call->peer->srx.transport; + msg.msg_namelen = call->peer->srx.transport_len; msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = 0; @@ -1151,8 +1151,8 @@ skip_msg_init: send_ACK_with_skew: ack.maxSkew = htons(call->ackr_skew); send_ACK: - mtu = call->conn->params.peer->if_mtu; - mtu -= call->conn->params.peer->hdrsize; + mtu = call->peer->if_mtu; + mtu -= call->peer->hdrsize; ackinfo.maxMTU = htonl(mtu); ackinfo.rwind = htonl(rxrpc_rx_window_size); From 4de48af663d88d8c9a2550e60725f5a5c660970b Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 30 Aug 2016 12:00:48 +0100 Subject: [PATCH 0577/1715] rxrpc: Pass struct socket * to more rxrpc kernel interface functions Pass struct socket * to more rxrpc kernel interface functions. They should be starting from this rather than the socket pointer in the rxrpc_call struct if they need to access the socket. I have left: rxrpc_kernel_is_data_last() rxrpc_kernel_get_abort_code() rxrpc_kernel_get_error_number() rxrpc_kernel_free_skb() rxrpc_kernel_data_consumed() unmodified as they're all about to be removed (and, in any case, don't touch the socket). Signed-off-by: David Howells --- Documentation/networking/rxrpc.txt | 11 ++++++++--- fs/afs/rxrpc.c | 26 +++++++++++++++----------- include/net/af_rxrpc.h | 10 +++++++--- net/rxrpc/af_rxrpc.c | 5 +++-- net/rxrpc/output.c | 20 +++++++++++--------- 5 files changed, 44 insertions(+), 28 deletions(-) diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt index dfe0b008df74..cfc8cb91452f 100644 --- a/Documentation/networking/rxrpc.txt +++ b/Documentation/networking/rxrpc.txt @@ -725,7 +725,8 @@ The kernel interface functions are as follows: (*) End a client call. - void rxrpc_kernel_end_call(struct rxrpc_call *call); + void rxrpc_kernel_end_call(struct socket *sock, + struct rxrpc_call *call); This is used to end a previously begun call. The user_call_ID is expunged from AF_RXRPC's knowledge and will not be seen again in association with @@ -733,7 +734,9 @@ The kernel interface functions are as follows: (*) Send data through a call. - int rxrpc_kernel_send_data(struct rxrpc_call *call, struct msghdr *msg, + int rxrpc_kernel_send_data(struct socket *sock, + struct rxrpc_call *call, + struct msghdr *msg, size_t len); This is used to supply either the request part of a client call or the @@ -747,7 +750,9 @@ The kernel interface functions are as follows: (*) Abort a call. - void rxrpc_kernel_abort_call(struct rxrpc_call *call, u32 abort_code); + void rxrpc_kernel_abort_call(struct socket *sock, + struct rxrpc_call *call, + u32 abort_code); This is used to abort a call if it's still in an abortable state. The abort code specified will be placed in the ABORT message sent. diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index a1916750e2f9..7b0d18900f50 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -207,7 +207,7 @@ static void afs_free_call(struct afs_call *call) static void afs_end_call_nofree(struct afs_call *call) { if (call->rxcall) { - rxrpc_kernel_end_call(call->rxcall); + rxrpc_kernel_end_call(afs_socket, call->rxcall); call->rxcall = NULL; } if (call->type->destructor) @@ -325,8 +325,8 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg, * returns from sending the request */ if (first + loop >= last) call->state = AFS_CALL_AWAIT_REPLY; - ret = rxrpc_kernel_send_data(call->rxcall, msg, - to - offset); + ret = rxrpc_kernel_send_data(afs_socket, call->rxcall, + msg, to - offset); kunmap(pages[loop]); if (ret < 0) break; @@ -406,7 +406,8 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, * request */ if (!call->send_pages) call->state = AFS_CALL_AWAIT_REPLY; - ret = rxrpc_kernel_send_data(rxcall, &msg, call->request_size); + ret = rxrpc_kernel_send_data(afs_socket, rxcall, + &msg, call->request_size); if (ret < 0) goto error_do_abort; @@ -421,7 +422,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, return wait_mode->wait(call); error_do_abort: - rxrpc_kernel_abort_call(rxcall, RX_USER_ABORT); + rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT); while ((skb = skb_dequeue(&call->rx_queue))) afs_free_skb(skb); error_kill_call: @@ -509,7 +510,8 @@ static void afs_deliver_to_call(struct afs_call *call) if (call->state != AFS_CALL_AWAIT_REPLY) abort_code = RXGEN_SS_UNMARSHAL; do_abort: - rxrpc_kernel_abort_call(call->rxcall, + rxrpc_kernel_abort_call(afs_socket, + call->rxcall, abort_code); call->error = ret; call->state = AFS_CALL_ERROR; @@ -605,7 +607,7 @@ static int afs_wait_for_call_to_complete(struct afs_call *call) /* kill the call */ if (call->state < AFS_CALL_COMPLETE) { _debug("call incomplete"); - rxrpc_kernel_abort_call(call->rxcall, RX_CALL_DEAD); + rxrpc_kernel_abort_call(afs_socket, call->rxcall, RX_CALL_DEAD); while ((skb = skb_dequeue(&call->rx_queue))) afs_free_skb(skb); } @@ -823,14 +825,15 @@ void afs_send_empty_reply(struct afs_call *call) msg.msg_flags = 0; call->state = AFS_CALL_AWAIT_ACK; - switch (rxrpc_kernel_send_data(call->rxcall, &msg, 0)) { + switch (rxrpc_kernel_send_data(afs_socket, call->rxcall, &msg, 0)) { case 0: _leave(" [replied]"); return; case -ENOMEM: _debug("oom"); - rxrpc_kernel_abort_call(call->rxcall, RX_USER_ABORT); + rxrpc_kernel_abort_call(afs_socket, call->rxcall, + RX_USER_ABORT); default: afs_end_call(call); _leave(" [error]"); @@ -859,7 +862,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) msg.msg_flags = 0; call->state = AFS_CALL_AWAIT_ACK; - n = rxrpc_kernel_send_data(call->rxcall, &msg, len); + n = rxrpc_kernel_send_data(afs_socket, call->rxcall, &msg, len); if (n >= 0) { /* Success */ _leave(" [replied]"); @@ -868,7 +871,8 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) if (n == -ENOMEM) { _debug("oom"); - rxrpc_kernel_abort_call(call->rxcall, RX_USER_ABORT); + rxrpc_kernel_abort_call(afs_socket, call->rxcall, + RX_USER_ABORT); } afs_end_call(call); _leave(" [error]"); diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index f9224e835d43..f8d8079dc058 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -15,6 +15,9 @@ #include #include +struct key; +struct sock; +struct socket; struct rxrpc_call; /* @@ -39,10 +42,11 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *, struct key *, unsigned long, gfp_t); -int rxrpc_kernel_send_data(struct rxrpc_call *, struct msghdr *, size_t); +int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *, + struct msghdr *, size_t); void rxrpc_kernel_data_consumed(struct rxrpc_call *, struct sk_buff *); -void rxrpc_kernel_abort_call(struct rxrpc_call *, u32); -void rxrpc_kernel_end_call(struct rxrpc_call *); +void rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *, u32); +void rxrpc_kernel_end_call(struct socket *, struct rxrpc_call *); bool rxrpc_kernel_is_data_last(struct sk_buff *); u32 rxrpc_kernel_get_abort_code(struct sk_buff *); int rxrpc_kernel_get_error_number(struct sk_buff *); diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index c7cf356b42b8..e07c91acd904 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -279,15 +279,16 @@ EXPORT_SYMBOL(rxrpc_kernel_begin_call); /** * rxrpc_kernel_end_call - Allow a kernel service to end a call it was using + * @sock: The socket the call is on * @call: The call to end * * Allow a kernel service to end a call it was using. The call must be * complete before this is called (the call should be aborted if necessary). */ -void rxrpc_kernel_end_call(struct rxrpc_call *call) +void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call) { _enter("%d{%d}", call->debug_id, atomic_read(&call->usage)); - rxrpc_remove_user_ID(call->socket, call); + rxrpc_remove_user_ID(rxrpc_sk(sock->sk), call); rxrpc_put_call(call); } EXPORT_SYMBOL(rxrpc_kernel_end_call); diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 888fa87ed1d6..b1e708a12151 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -239,6 +239,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) /** * rxrpc_kernel_send_data - Allow a kernel service to send data on a call + * @sock: The socket the call is on * @call: The call to send data through * @msg: The data to send * @len: The amount of data to send @@ -248,8 +249,8 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) * nor should an address be supplied. MSG_MORE should be flagged if there's * more data to come, otherwise this data will end the transmission phase. */ -int rxrpc_kernel_send_data(struct rxrpc_call *call, struct msghdr *msg, - size_t len) +int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call, + struct msghdr *msg, size_t len) { int ret; @@ -258,7 +259,7 @@ int rxrpc_kernel_send_data(struct rxrpc_call *call, struct msghdr *msg, ASSERTCMP(msg->msg_name, ==, NULL); ASSERTCMP(msg->msg_control, ==, NULL); - lock_sock(&call->socket->sk); + lock_sock(sock->sk); _debug("CALL %d USR %lx ST %d on CONN %p", call->debug_id, call->user_call_ID, call->state, call->conn); @@ -270,35 +271,36 @@ int rxrpc_kernel_send_data(struct rxrpc_call *call, struct msghdr *msg, call->state != RXRPC_CALL_SERVER_SEND_REPLY) { ret = -EPROTO; /* request phase complete for this client call */ } else { - ret = rxrpc_send_data(call->socket, call, msg, len); + ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len); } - release_sock(&call->socket->sk); + release_sock(sock->sk); _leave(" = %d", ret); return ret; } - EXPORT_SYMBOL(rxrpc_kernel_send_data); /** * rxrpc_kernel_abort_call - Allow a kernel service to abort a call + * @sock: The socket the call is on * @call: The call to be aborted * @abort_code: The abort code to stick into the ABORT packet * * Allow a kernel service to abort a call, if it's still in an abortable state. */ -void rxrpc_kernel_abort_call(struct rxrpc_call *call, u32 abort_code) +void rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call, + u32 abort_code) { _enter("{%d},%d", call->debug_id, abort_code); - lock_sock(&call->socket->sk); + lock_sock(sock->sk); _debug("CALL %d USR %lx ST %d on CONN %p", call->debug_id, call->user_call_ID, call->state, call->conn); rxrpc_send_abort(call, abort_code); - release_sock(&call->socket->sk); + release_sock(sock->sk); _leave(""); } From cf4d13fecfe02ce695fc44d8a136d28505365e8e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 26 Aug 2016 19:35:25 +0100 Subject: [PATCH 0578/1715] drivers: net: stmmac: fix spelling mistake "mulitcast" -> "multicast" Trivial fix to spelling mistake in dev_warn message. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index cbefe9e2207c..6f6bbc54e9fe 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -145,7 +145,7 @@ static void dwmac1000_set_mchash(void __iomem *ioaddr, u32 *mcfilterbits, numhashregs = 8; break; default: - pr_debug("STMMAC: err in setting mulitcast filter\n"); + pr_debug("STMMAC: err in setting multicast filter\n"); return; break; } From ae3cb8cb20c87c0833a54360344ad4ee77bdb184 Mon Sep 17 00:00:00 2001 From: Mark Rustad Date: Tue, 30 Aug 2016 11:33:43 -0700 Subject: [PATCH 0579/1715] ixgbe: Eliminate useless message and improve logic Remove a useless log message and improve the logic for setting a PHY address from the contents of the MNG_IF_SEL register. Signed-off-by: Mark Rustad Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index fb1b819d8311..e092a8929413 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -2394,18 +2394,12 @@ static void ixgbe_read_mng_if_sel_x550em(struct ixgbe_hw *hw) /* If X552 (X550EM_a) and MDIO is connected to external PHY, then set * PHY address. This register field was has only been used for X552. */ - if (!hw->phy.nw_mng_if_sel) { - if (hw->mac.type == ixgbe_mac_x550em_a) { - struct ixgbe_adapter *adapter = hw->back; - - e_warn(drv, "nw_mng_if_sel not set\n"); - } - return; + if (hw->mac.type == ixgbe_mac_x550em_a && + hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_MDIO_ACT) { + hw->phy.mdio.prtad = (hw->phy.nw_mng_if_sel & + IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD) >> + IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT; } - - hw->phy.mdio.prtad = (hw->phy.nw_mng_if_sel & - IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD) >> - IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT; } /** ixgbe_init_phy_ops_X550em - PHY/SFP specific init From 41852497a9205964b958a245a9526040b980926f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 26 Aug 2016 12:50:39 -0700 Subject: [PATCH 0580/1715] net: batch calls to flush_all_backlogs() After commit 145dd5f9c88f ("net: flush the softnet backlog in process context"), we can easily batch calls to flush_all_backlogs() for all devices processed in rollback_registered_many() Tested: Before patch, on an idle host. modprobe dummy numdummies=10000 perf stat -e context-switches -a rmmod dummy Performance counter stats for 'system wide': 1,211,798 context-switches 1.302137465 seconds time elapsed After patch: perf stat -e context-switches -a rmmod dummy Performance counter stats for 'system wide': 225,523 context-switches 0.721623566 seconds time elapsed Signed-off-by: Eric Dumazet Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/core/dev.c | 32 ++++++++++++-------------------- 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 1d5c6dda1988..34b5322bc081 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4282,18 +4282,11 @@ int netif_receive_skb(struct sk_buff *skb) } EXPORT_SYMBOL(netif_receive_skb); -struct flush_work { - struct net_device *dev; - struct work_struct work; -}; - -DEFINE_PER_CPU(struct flush_work, flush_works); +DEFINE_PER_CPU(struct work_struct, flush_works); /* Network device is going away, flush any packets still pending */ static void flush_backlog(struct work_struct *work) { - struct flush_work *flush = container_of(work, typeof(*flush), work); - struct net_device *dev = flush->dev; struct sk_buff *skb, *tmp; struct softnet_data *sd; @@ -4303,7 +4296,7 @@ static void flush_backlog(struct work_struct *work) local_irq_disable(); rps_lock(sd); skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) { - if (skb->dev == dev) { + if (skb->dev->reg_state == NETREG_UNREGISTERING) { __skb_unlink(skb, &sd->input_pkt_queue); kfree_skb(skb); input_queue_head_incr(sd); @@ -4313,7 +4306,7 @@ static void flush_backlog(struct work_struct *work) local_irq_enable(); skb_queue_walk_safe(&sd->process_queue, skb, tmp) { - if (skb->dev == dev) { + if (skb->dev->reg_state == NETREG_UNREGISTERING) { __skb_unlink(skb, &sd->process_queue); kfree_skb(skb); input_queue_head_incr(sd); @@ -4322,22 +4315,18 @@ static void flush_backlog(struct work_struct *work) local_bh_enable(); } -static void flush_all_backlogs(struct net_device *dev) +static void flush_all_backlogs(void) { unsigned int cpu; get_online_cpus(); - for_each_online_cpu(cpu) { - struct flush_work *flush = per_cpu_ptr(&flush_works, cpu); - - INIT_WORK(&flush->work, flush_backlog); - flush->dev = dev; - queue_work_on(cpu, system_highpri_wq, &flush->work); - } + for_each_online_cpu(cpu) + queue_work_on(cpu, system_highpri_wq, + per_cpu_ptr(&flush_works, cpu)); for_each_online_cpu(cpu) - flush_work(&per_cpu_ptr(&flush_works, cpu)->work); + flush_work(per_cpu_ptr(&flush_works, cpu)); put_online_cpus(); } @@ -6725,8 +6714,8 @@ static void rollback_registered_many(struct list_head *head) unlist_netdevice(dev); dev->reg_state = NETREG_UNREGISTERING; - flush_all_backlogs(dev); } + flush_all_backlogs(); synchronize_net(); @@ -8291,8 +8280,11 @@ static int __init net_dev_init(void) */ for_each_possible_cpu(i) { + struct work_struct *flush = per_cpu_ptr(&flush_works, i); struct softnet_data *sd = &per_cpu(softnet_data, i); + INIT_WORK(flush, flush_backlog); + skb_queue_head_init(&sd->input_pkt_queue); skb_queue_head_init(&sd->process_queue); INIT_LIST_HEAD(&sd->poll_list); From 14972cbd34ff668c390cbd2e6497323484c9e812 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Wed, 24 Aug 2016 20:10:43 -0700 Subject: [PATCH 0581/1715] net: lwtunnel: Handle fragmentation Today mpls iptunnel lwtunnel_output redirect expects the tunnel output function to handle fragmentation. This is ok but can be avoided if we did not do the mpls output redirect too early. ie we could wait until ip fragmentation is done and then call mpls output for each ip fragment. To make this work we will need, 1) the lwtunnel state to carry encap headroom 2) and do the redirect to the encap output handler on the ip fragment (essentially do the output redirect after fragmentation) This patch adds tunnel headroom in lwtstate to make sure we account for tunnel data in mtu calculations during fragmentation and adds new xmit redirect handler to redirect to lwtunnel xmit func after ip fragmentation. This includes IPV6 and some mtu fixes and testing from David Ahern. Signed-off-by: Roopa Prabhu Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/lwtunnel.h | 44 ++++++++++++++++++++++++++++++++++++++++ net/core/lwtunnel.c | 35 ++++++++++++++++++++++++++++++++ net/ipv4/ip_output.c | 8 ++++++++ net/ipv4/route.c | 4 +++- net/ipv6/ip6_output.c | 8 ++++++++ net/ipv6/route.c | 4 +++- net/mpls/mpls_iptunnel.c | 9 ++++---- 7 files changed, 106 insertions(+), 6 deletions(-) diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h index e9f116e29c22..ea3f80f58fd6 100644 --- a/include/net/lwtunnel.h +++ b/include/net/lwtunnel.h @@ -13,6 +13,13 @@ /* lw tunnel state flags */ #define LWTUNNEL_STATE_OUTPUT_REDIRECT BIT(0) #define LWTUNNEL_STATE_INPUT_REDIRECT BIT(1) +#define LWTUNNEL_STATE_XMIT_REDIRECT BIT(2) + +enum { + LWTUNNEL_XMIT_DONE, + LWTUNNEL_XMIT_CONTINUE, +}; + struct lwtunnel_state { __u16 type; @@ -21,6 +28,7 @@ struct lwtunnel_state { int (*orig_output)(struct net *net, struct sock *sk, struct sk_buff *skb); int (*orig_input)(struct sk_buff *); int len; + __u16 headroom; __u8 data[0]; }; @@ -34,6 +42,7 @@ struct lwtunnel_encap_ops { struct lwtunnel_state *lwtstate); int (*get_encap_size)(struct lwtunnel_state *lwtstate); int (*cmp_encap)(struct lwtunnel_state *a, struct lwtunnel_state *b); + int (*xmit)(struct sk_buff *skb); }; #ifdef CONFIG_LWTUNNEL @@ -75,6 +84,24 @@ static inline bool lwtunnel_input_redirect(struct lwtunnel_state *lwtstate) return false; } + +static inline bool lwtunnel_xmit_redirect(struct lwtunnel_state *lwtstate) +{ + if (lwtstate && (lwtstate->flags & LWTUNNEL_STATE_XMIT_REDIRECT)) + return true; + + return false; +} + +static inline unsigned int lwtunnel_headroom(struct lwtunnel_state *lwtstate, + unsigned int mtu) +{ + if (lwtunnel_xmit_redirect(lwtstate) && lwtstate->headroom < mtu) + return lwtstate->headroom; + + return 0; +} + int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op, unsigned int num); int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op, @@ -90,6 +117,7 @@ struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len); int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b); int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb); int lwtunnel_input(struct sk_buff *skb); +int lwtunnel_xmit(struct sk_buff *skb); #else @@ -117,6 +145,17 @@ static inline bool lwtunnel_input_redirect(struct lwtunnel_state *lwtstate) return false; } +static inline bool lwtunnel_xmit_redirect(struct lwtunnel_state *lwtstate) +{ + return false; +} + +static inline unsigned int lwtunnel_headroom(struct lwtunnel_state *lwtstate, + unsigned int mtu) +{ + return 0; +} + static inline int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op, unsigned int num) { @@ -170,6 +209,11 @@ static inline int lwtunnel_input(struct sk_buff *skb) return -EOPNOTSUPP; } +static inline int lwtunnel_xmit(struct sk_buff *skb) +{ + return -EOPNOTSUPP; +} + #endif /* CONFIG_LWTUNNEL */ #define MODULE_ALIAS_RTNL_LWT(encap_type) MODULE_ALIAS("rtnl-lwt-" __stringify(encap_type)) diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index 669ecc9f884e..e5f84c26ba1a 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -251,6 +251,41 @@ drop: } EXPORT_SYMBOL(lwtunnel_output); +int lwtunnel_xmit(struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + const struct lwtunnel_encap_ops *ops; + struct lwtunnel_state *lwtstate; + int ret = -EINVAL; + + if (!dst) + goto drop; + + lwtstate = dst->lwtstate; + + if (lwtstate->type == LWTUNNEL_ENCAP_NONE || + lwtstate->type > LWTUNNEL_ENCAP_MAX) + return 0; + + ret = -EOPNOTSUPP; + rcu_read_lock(); + ops = rcu_dereference(lwtun_encaps[lwtstate->type]); + if (likely(ops && ops->xmit)) + ret = ops->xmit(skb); + rcu_read_unlock(); + + if (ret == -EOPNOTSUPP) + goto drop; + + return ret; + +drop: + kfree_skb(skb); + + return ret; +} +EXPORT_SYMBOL(lwtunnel_xmit); + int lwtunnel_input(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index dde37fb340bf..65569274efb8 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -73,6 +73,7 @@ #include #include #include +#include #include #include #include @@ -197,6 +198,13 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s skb = skb2; } + if (lwtunnel_xmit_redirect(dst->lwtstate)) { + int res = lwtunnel_xmit(skb); + + if (res < 0 || res == LWTUNNEL_XMIT_DONE) + return res; + } + rcu_read_lock_bh(); nexthop = (__force u32) rt_nexthop(rt, ip_hdr(skb)->daddr); neigh = __ipv4_neigh_lookup_noref(dev, nexthop); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a1f2830d8110..3e992783c1d0 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1246,7 +1246,9 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) mtu = 576; } - return min_t(unsigned int, mtu, IP_MAX_MTU); + mtu = min_t(unsigned int, mtu, IP_MAX_MTU); + + return mtu - lwtunnel_headroom(dst->lwtstate, mtu); } static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 1dfc402d9ad1..993fd9666f1b 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -56,6 +56,7 @@ #include #include #include +#include static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb) { @@ -104,6 +105,13 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * } } + if (lwtunnel_xmit_redirect(dst->lwtstate)) { + int res = lwtunnel_xmit(skb); + + if (res < 0 || res == LWTUNNEL_XMIT_DONE) + return res; + } + rcu_read_lock_bh(); nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr); neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 49817555449e..09d43ff11a8d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1604,7 +1604,9 @@ static unsigned int ip6_mtu(const struct dst_entry *dst) rcu_read_unlock(); out: - return min_t(unsigned int, mtu, IP6_MAX_MTU); + mtu = min_t(unsigned int, mtu, IP6_MAX_MTU); + + return mtu - lwtunnel_headroom(dst->lwtstate, mtu); } static struct dst_entry *icmp6_dst_gc_list; diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c index 644a8da6d4bd..aed872cc05a6 100644 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@ -37,7 +37,7 @@ static unsigned int mpls_encap_size(struct mpls_iptunnel_encap *en) return en->labels * sizeof(struct mpls_shim_hdr); } -static int mpls_output(struct net *net, struct sock *sk, struct sk_buff *skb) +static int mpls_xmit(struct sk_buff *skb) { struct mpls_iptunnel_encap *tun_encap_info; struct mpls_shim_hdr *hdr; @@ -115,7 +115,7 @@ static int mpls_output(struct net *net, struct sock *sk, struct sk_buff *skb) net_dbg_ratelimited("%s: packet transmission failed: %d\n", __func__, err); - return 0; + return LWTUNNEL_XMIT_DONE; drop: kfree_skb(skb); @@ -153,7 +153,8 @@ static int mpls_build_state(struct net_device *dev, struct nlattr *nla, if (ret) goto errout; newts->type = LWTUNNEL_ENCAP_MPLS; - newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT; + newts->flags |= LWTUNNEL_STATE_XMIT_REDIRECT; + newts->headroom = mpls_encap_size(tun_encap_info); *ts = newts; @@ -209,7 +210,7 @@ static int mpls_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) static const struct lwtunnel_encap_ops mpls_iptun_ops = { .build_state = mpls_build_state, - .output = mpls_output, + .xmit = mpls_xmit, .fill_encap = mpls_fill_encap_info, .get_encap_size = mpls_encap_nlsize, .cmp_encap = mpls_encap_cmp, From 48d2ab609b6bbecb7698487c8579bc40de9d6dfa Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 24 Aug 2016 20:10:44 -0700 Subject: [PATCH 0582/1715] net: mpls: Fixups for GSO As reported by Lennert the MPLS GSO code is failing to properly segment large packets. There are a couple of problems: 1. the inner protocol is not set so the gso segment functions for inner protocol layers are not getting run, and 2 MPLS labels for packets that use the "native" (non-OVS) MPLS code are not properly accounted for in mpls_gso_segment. The MPLS GSO code was added for OVS. It is re-using skb_mac_gso_segment to call the gso segment functions for the higher layer protocols. That means skb_mac_gso_segment is called twice -- once with the network protocol set to MPLS and again with the network protocol set to the inner protocol. This patch sets the inner skb protocol addressing item 1 above and sets the network_header and inner_network_header to mark where the MPLS labels start and end. The MPLS code in OVS is also updated to set the two network markers. >From there the MPLS GSO code uses the difference between the network header and the inner network header to know the size of the MPLS header that was pushed. It then pulls the MPLS header, resets the mac_len and protocol for the inner protocol and then calls skb_mac_gso_segment to segment the skb. Afterward the inner protocol segmentation is done the skb protocol is set to mpls for each segment and the network and mac headers restored. Reported-by: Lennert Buytenhek Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/mpls/mpls_gso.c | 40 ++++++++++++++++++++++++++++----------- net/mpls/mpls_iptunnel.c | 4 ++++ net/openvswitch/actions.c | 9 +++++++-- 3 files changed, 40 insertions(+), 13 deletions(-) diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c index 2055e57ed1c3..b4da6d8e8632 100644 --- a/net/mpls/mpls_gso.c +++ b/net/mpls/mpls_gso.c @@ -23,32 +23,50 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb, netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); + u16 mac_offset = skb->mac_header; netdev_features_t mpls_features; + u16 mac_len = skb->mac_len; __be16 mpls_protocol; + unsigned int mpls_hlen; + + skb_reset_network_header(skb); + mpls_hlen = skb_inner_network_header(skb) - skb_network_header(skb); + if (unlikely(!pskb_may_pull(skb, mpls_hlen))) + goto out; /* Setup inner SKB. */ mpls_protocol = skb->protocol; skb->protocol = skb->inner_protocol; - /* Push back the mac header that skb_mac_gso_segment() has pulled. - * It will be re-pulled by the call to skb_mac_gso_segment() below - */ - __skb_push(skb, skb->mac_len); + __skb_pull(skb, mpls_hlen); + + skb->mac_len = 0; + skb_reset_mac_header(skb); /* Segment inner packet. */ mpls_features = skb->dev->mpls_features & features; segs = skb_mac_gso_segment(skb, mpls_features); + if (IS_ERR_OR_NULL(segs)) { + skb_gso_error_unwind(skb, mpls_protocol, mpls_hlen, mac_offset, + mac_len); + goto out; + } + skb = segs; + mpls_hlen += mac_len; + do { + skb->mac_len = mac_len; + skb->protocol = mpls_protocol; - /* Restore outer protocol. */ - skb->protocol = mpls_protocol; + skb_reset_inner_network_header(skb); - /* Re-pull the mac header that the call to skb_mac_gso_segment() - * above pulled. It will be re-pushed after returning - * skb_mac_gso_segment(), an indirect caller of this function. - */ - __skb_pull(skb, skb->data - skb_mac_header(skb)); + __skb_push(skb, mpls_hlen); + skb_reset_mac_header(skb); + skb_set_network_header(skb, mac_len); + } while ((skb = skb->next)); + +out: return segs; } diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c index aed872cc05a6..cf52cf30ac4b 100644 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@ -90,7 +90,11 @@ static int mpls_xmit(struct sk_buff *skb) if (skb_cow(skb, hh_len + new_header_size)) goto drop; + skb_set_inner_protocol(skb, skb->protocol); + skb_reset_inner_network_header(skb); + skb_push(skb, new_header_size); + skb_reset_network_header(skb); skb->dev = out_dev; diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 1ecbd7715f6d..ca91fc33f8a9 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -162,10 +162,16 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key, if (skb_cow_head(skb, MPLS_HLEN) < 0) return -ENOMEM; + if (!skb->inner_protocol) { + skb_set_inner_network_header(skb, skb->mac_len); + skb_set_inner_protocol(skb, skb->protocol); + } + skb_push(skb, MPLS_HLEN); memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb), skb->mac_len); skb_reset_mac_header(skb); + skb_set_network_header(skb, skb->mac_len); new_mpls_lse = (__be32 *)skb_mpls_header(skb); *new_mpls_lse = mpls->mpls_lse; @@ -173,8 +179,6 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key, skb_postpush_rcsum(skb, new_mpls_lse, MPLS_HLEN); update_ethertype(skb, eth_hdr(skb), mpls->mpls_ethertype); - if (!skb->inner_protocol) - skb_set_inner_protocol(skb, skb->protocol); skb->protocol = mpls->mpls_ethertype; invalidate_flow_key(key); @@ -198,6 +202,7 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key, __skb_pull(skb, MPLS_HLEN); skb_reset_mac_header(skb); + skb_set_network_header(skb, skb->mac_len); /* skb_mpls_header() is used to locate the ethertype * field correctly in the presence of VLAN tags. From 607fca9acfb62a9caf7bd46771ecbc4d8fec7388 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 24 Aug 2016 20:10:45 -0700 Subject: [PATCH 0583/1715] net: veth: Set features for MPLS veth does not really transmit packets only moves the skb from one netdev to another so gso and checksum is not really needed. Add the features to mpls_features to get the same benefit and performance with MPLS as without it. Reported-by: Lennert Buytenhek Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/veth.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 4bda502254fb..fbc853e64531 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -340,6 +340,7 @@ static void veth_setup(struct net_device *dev) dev->hw_features = VETH_FEATURES; dev->hw_enc_features = VETH_FEATURES; + dev->mpls_features = NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE; } /* From 1ddcf41ff3ee790d19978f7bee1ab76a77a5eb7b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 26 Aug 2016 17:30:29 +0200 Subject: [PATCH 0584/1715] ravb: avoid unused function warnings When CONFIG_PM_SLEEP is disabled, we get a couple of harmless warnings: drivers/net/ethernet/renesas/ravb_main.c:2117:12: error: 'ravb_resume' defined but not used [-Werror=unused-function] drivers/net/ethernet/renesas/ravb_main.c:2104:12: error: 'ravb_suspend' defined but not used [-Werror=unused-function] The simplest solution here is to replace the #ifdef with __maybe_unused annotations, which lets the compiler do the right thing by itself. Signed-off-by: Arnd Bergmann Fixes: 0184165b2f42 ("ravb: add sleep PM suspend/resume support") Acked-by: Sergei Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/ravb_main.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index cad23ba06904..630536bc72f9 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -2100,8 +2100,7 @@ static int ravb_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM -static int ravb_suspend(struct device *dev) +static int __maybe_unused ravb_suspend(struct device *dev) { struct net_device *ndev = dev_get_drvdata(dev); int ret = 0; @@ -2114,7 +2113,7 @@ static int ravb_suspend(struct device *dev) return ret; } -static int ravb_resume(struct device *dev) +static int __maybe_unused ravb_resume(struct device *dev) { struct net_device *ndev = dev_get_drvdata(dev); struct ravb_private *priv = netdev_priv(ndev); @@ -2149,7 +2148,7 @@ static int ravb_resume(struct device *dev) return ret; } -static int ravb_runtime_nop(struct device *dev) +static int __maybe_unused ravb_runtime_nop(struct device *dev) { /* Runtime PM callback shared between ->runtime_suspend() * and ->runtime_resume(). Simply returns success. @@ -2166,17 +2165,12 @@ static const struct dev_pm_ops ravb_dev_pm_ops = { SET_RUNTIME_PM_OPS(ravb_runtime_nop, ravb_runtime_nop, NULL) }; -#define RAVB_PM_OPS (&ravb_dev_pm_ops) -#else -#define RAVB_PM_OPS NULL -#endif - static struct platform_driver ravb_driver = { .probe = ravb_probe, .remove = ravb_remove, .driver = { .name = "ravb", - .pm = RAVB_PM_OPS, + .pm = &ravb_dev_pm_ops, .of_match_table = ravb_match_table, }, }; From 84fd1b191a9468dbb2e790fbc394378afc43462e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 26 Aug 2016 17:37:53 +0200 Subject: [PATCH 0585/1715] qede: hide 32-bit compile warning The addition of the per-queue statistics introduced a harmless warning on all 32-bit architectures: drivers/net/ethernet/qlogic/qede/qede_ethtool.c: In function 'qede_get_ethtool_stats': drivers/net/ethernet/qlogic/qede/qede_ethtool.c:244:31: error: cast from pointer to integer of different size [-Werror=pointer-to-int-cast] buf[cnt++] = QEDE_TQSTATS_DATA(edev, ^ drivers/net/ethernet/qlogic/qede/qede_ethtool.c:244:22: error: cast to pointer from integer of different size [-Werror=int-to-pointer-cast] buf[cnt++] = QEDE_TQSTATS_DATA(edev, ^ This changes the cast to 'void *' to shut up the warning, which avoids the assumptions on the size of the pointer type. Signed-off-by: Arnd Bergmann Fixes: 68db9ec2df07 ("qede: Add support for per-queue stats.") Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index 4d45945bc34c..14d5328e6ac9 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -60,7 +60,7 @@ static const struct { }; #define QEDE_TQSTATS_DATA(dev, sindex, tssid, tcid) \ - (*((u64 *)(((u64)(&dev->fp_array[tssid].txqs[tcid])) +\ + (*((u64 *)(((void *)(&dev->fp_array[tssid].txqs[tcid])) +\ qede_tqstats_arr[(sindex)].offset))) static const struct { From 5459c5d47608e7d66c89face4bb6084d0c4136a3 Mon Sep 17 00:00:00 2001 From: Tamizh chelvam Date: Fri, 19 Aug 2016 13:37:39 +0300 Subject: [PATCH 0586/1715] ath10k: move firmware_swap_code_seg_info to ath10k_fw_file Preparation to make use of firmware_swap_code_seg_info for UTF binary. Signed-off-by: Tamizh chelvam Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/core.c | 6 +++--- drivers/net/wireless/ath/ath10k/core.h | 13 +++++++++---- drivers/net/wireless/ath/ath10k/swap.c | 26 ++++++++++++++------------ drivers/net/wireless/ath/ath10k/swap.h | 11 ++++++++--- 4 files changed, 34 insertions(+), 22 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index e88982921aa3..f82877d806a5 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -745,7 +745,7 @@ static int ath10k_download_fw(struct ath10k *ar) data = ar->running_fw->fw_file.firmware_data; data_len = ar->running_fw->fw_file.firmware_len; - ret = ath10k_swap_code_seg_configure(ar); + ret = ath10k_swap_code_seg_configure(ar, &ar->running_fw->fw_file); if (ret) { ath10k_err(ar, "failed to configure fw code swap: %d\n", ret); @@ -787,7 +787,7 @@ static void ath10k_core_free_firmware_files(struct ath10k *ar) if (!IS_ERR(ar->pre_cal_file)) release_firmware(ar->pre_cal_file); - ath10k_swap_code_seg_release(ar); + ath10k_swap_code_seg_release(ar, &ar->normal_mode_fw.fw_file); ar->normal_mode_fw.fw_file.otp_data = NULL; ar->normal_mode_fw.fw_file.otp_len = 0; @@ -2031,7 +2031,7 @@ static int ath10k_core_probe_fw(struct ath10k *ar) goto err_free_firmware_files; } - ret = ath10k_swap_code_seg_init(ar); + ret = ath10k_swap_code_seg_init(ar, &ar->normal_mode_fw.fw_file); if (ret) { ath10k_err(ar, "failed to initialize code swap segment: %d\n", ret); diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h index 30ae5bf81611..cc6e66fdf498 100644 --- a/drivers/net/wireless/ath/ath10k/core.h +++ b/drivers/net/wireless/ath/ath10k/core.h @@ -663,6 +663,15 @@ struct ath10k_fw_file { const void *codeswap_data; size_t codeswap_len; + + /* The original idea of struct ath10k_fw_file was that it only + * contains struct firmware and pointers to various parts (actual + * firmware binary, otp, metadata etc) of the file. This seg_info + * is actually created separate but as this is used similarly as + * the other firmware components it's more convenient to have it + * here. + */ + struct ath10k_swap_code_seg_info *firmware_swap_code_seg_info; }; struct ath10k_fw_components { @@ -774,10 +783,6 @@ struct ath10k { const struct firmware *pre_cal_file; const struct firmware *cal_file; - struct { - struct ath10k_swap_code_seg_info *firmware_swap_code_seg_info; - } swap; - struct { u32 vendor; u32 device; diff --git a/drivers/net/wireless/ath/ath10k/swap.c b/drivers/net/wireless/ath/ath10k/swap.c index 0c5f5863dac8..adf4592374b4 100644 --- a/drivers/net/wireless/ath/ath10k/swap.c +++ b/drivers/net/wireless/ath/ath10k/swap.c @@ -134,17 +134,18 @@ ath10k_swap_code_seg_alloc(struct ath10k *ar, size_t swap_bin_len) return seg_info; } -int ath10k_swap_code_seg_configure(struct ath10k *ar) +int ath10k_swap_code_seg_configure(struct ath10k *ar, + const struct ath10k_fw_file *fw_file) { int ret; struct ath10k_swap_code_seg_info *seg_info = NULL; - if (!ar->swap.firmware_swap_code_seg_info) + if (!fw_file->firmware_swap_code_seg_info) return 0; ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot found firmware code swap binary\n"); - seg_info = ar->swap.firmware_swap_code_seg_info; + seg_info = fw_file->firmware_swap_code_seg_info; ret = ath10k_bmi_write_memory(ar, seg_info->target_addr, &seg_info->seg_hw_info, @@ -158,28 +159,29 @@ int ath10k_swap_code_seg_configure(struct ath10k *ar) return 0; } -void ath10k_swap_code_seg_release(struct ath10k *ar) +void ath10k_swap_code_seg_release(struct ath10k *ar, + struct ath10k_fw_file *fw_file) { - ath10k_swap_code_seg_free(ar, ar->swap.firmware_swap_code_seg_info); + ath10k_swap_code_seg_free(ar, fw_file->firmware_swap_code_seg_info); /* FIXME: these two assignments look to bein wrong place! Shouldn't * they be in ath10k_core_free_firmware_files() like the rest? */ - ar->normal_mode_fw.fw_file.codeswap_data = NULL; - ar->normal_mode_fw.fw_file.codeswap_len = 0; + fw_file->codeswap_data = NULL; + fw_file->codeswap_len = 0; - ar->swap.firmware_swap_code_seg_info = NULL; + fw_file->firmware_swap_code_seg_info = NULL; } -int ath10k_swap_code_seg_init(struct ath10k *ar) +int ath10k_swap_code_seg_init(struct ath10k *ar, struct ath10k_fw_file *fw_file) { int ret; struct ath10k_swap_code_seg_info *seg_info; const void *codeswap_data; size_t codeswap_len; - codeswap_data = ar->normal_mode_fw.fw_file.codeswap_data; - codeswap_len = ar->normal_mode_fw.fw_file.codeswap_len; + codeswap_data = fw_file->codeswap_data; + codeswap_len = fw_file->codeswap_len; if (!codeswap_len || !codeswap_data) return 0; @@ -200,7 +202,7 @@ int ath10k_swap_code_seg_init(struct ath10k *ar) return ret; } - ar->swap.firmware_swap_code_seg_info = seg_info; + fw_file->firmware_swap_code_seg_info = seg_info; return 0; } diff --git a/drivers/net/wireless/ath/ath10k/swap.h b/drivers/net/wireless/ath/ath10k/swap.h index 36991c7b07a0..f5dc0476493e 100644 --- a/drivers/net/wireless/ath/ath10k/swap.h +++ b/drivers/net/wireless/ath/ath10k/swap.h @@ -23,6 +23,8 @@ /* Currently only one swap segment is supported */ #define ATH10K_SWAP_CODE_SEG_NUM_SUPPORTED 1 +struct ath10k_fw_file; + struct ath10k_swap_code_seg_tlv { __le32 address; __le32 length; @@ -58,8 +60,11 @@ struct ath10k_swap_code_seg_info { dma_addr_t paddr[ATH10K_SWAP_CODE_SEG_NUM_SUPPORTED]; }; -int ath10k_swap_code_seg_configure(struct ath10k *ar); -void ath10k_swap_code_seg_release(struct ath10k *ar); -int ath10k_swap_code_seg_init(struct ath10k *ar); +int ath10k_swap_code_seg_configure(struct ath10k *ar, + const struct ath10k_fw_file *fw_file); +void ath10k_swap_code_seg_release(struct ath10k *ar, + struct ath10k_fw_file *fw_file); +int ath10k_swap_code_seg_init(struct ath10k *ar, + struct ath10k_fw_file *fw_file); #endif From d912fc094bc458c7df5e1d8eadf74a35ffc07380 Mon Sep 17 00:00:00 2001 From: Tamizh chelvam Date: Fri, 19 Aug 2016 13:37:40 +0300 Subject: [PATCH 0587/1715] ath10k: handle testmode events for 10.2 and 10.4 based firmware Currently testmode events for 10.x firmware are processed from 10.1 wmi event processing. This patch is used to handle testmode events in 10.2 and 10.4 based firmware. Signed-off-by: Tamizh chelvam Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/wmi.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index d2462886b75c..e67b254a8f8e 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -5124,6 +5124,7 @@ static void ath10k_wmi_10_2_op_rx(struct ath10k *ar, struct sk_buff *skb) { struct wmi_cmd_hdr *cmd_hdr; enum wmi_10_2_event_id id; + bool consumed; cmd_hdr = (struct wmi_cmd_hdr *)skb->data; id = MS(__le32_to_cpu(cmd_hdr->cmd_id), WMI_CMD_HDR_CMD_ID); @@ -5133,6 +5134,18 @@ static void ath10k_wmi_10_2_op_rx(struct ath10k *ar, struct sk_buff *skb) trace_ath10k_wmi_event(ar, id, skb->data, skb->len); + consumed = ath10k_tm_event_wmi(ar, id, skb); + + /* Ready event must be handled normally also in UTF mode so that we + * know the UTF firmware has booted, others we are just bypass WMI + * events to testmode. + */ + if (consumed && id != WMI_10_2_READY_EVENTID) { + ath10k_dbg(ar, ATH10K_DBG_WMI, + "wmi testmode consumed 0x%x\n", id); + goto out; + } + switch (id) { case WMI_10_2_MGMT_RX_EVENTID: ath10k_wmi_event_mgmt_rx(ar, skb); @@ -5248,6 +5261,7 @@ static void ath10k_wmi_10_4_op_rx(struct ath10k *ar, struct sk_buff *skb) { struct wmi_cmd_hdr *cmd_hdr; enum wmi_10_4_event_id id; + bool consumed; cmd_hdr = (struct wmi_cmd_hdr *)skb->data; id = MS(__le32_to_cpu(cmd_hdr->cmd_id), WMI_CMD_HDR_CMD_ID); @@ -5257,6 +5271,18 @@ static void ath10k_wmi_10_4_op_rx(struct ath10k *ar, struct sk_buff *skb) trace_ath10k_wmi_event(ar, id, skb->data, skb->len); + consumed = ath10k_tm_event_wmi(ar, id, skb); + + /* Ready event must be handled normally also in UTF mode so that we + * know the UTF firmware has booted, others we are just bypass WMI + * events to testmode. + */ + if (consumed && id != WMI_10_4_READY_EVENTID) { + ath10k_dbg(ar, ATH10K_DBG_WMI, + "wmi testmode consumed 0x%x\n", id); + goto out; + } + switch (id) { case WMI_10_4_MGMT_RX_EVENTID: ath10k_wmi_event_mgmt_rx(ar, skb); From ebce1a5e3a0e2c035f201c21ae9f403b42efcbcb Mon Sep 17 00:00:00 2001 From: Tamizh chelvam Date: Fri, 19 Aug 2016 13:37:40 +0300 Subject: [PATCH 0588/1715] ath10k: add testmode support for 10.4 firmware This patch adds testmode support for 10.4 based chipsets and added code swap support for UTF binary. Signed-off-by: Tamizh chelvam Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/testmode.c | 23 ++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/testmode.c b/drivers/net/wireless/ath/ath10k/testmode.c index 120f4234d3b0..091f29d60465 100644 --- a/drivers/net/wireless/ath/ath10k/testmode.c +++ b/drivers/net/wireless/ath/ath10k/testmode.c @@ -23,6 +23,7 @@ #include "wmi.h" #include "hif.h" #include "hw.h" +#include "core.h" #include "testmode_i.h" @@ -240,6 +241,18 @@ static int ath10k_tm_cmd_utf_start(struct ath10k *ar, struct nlattr *tb[]) goto err; } + if (ar->testmode.utf_mode_fw.fw_file.codeswap_data && + ar->testmode.utf_mode_fw.fw_file.codeswap_len) { + ret = ath10k_swap_code_seg_init(ar, + &ar->testmode.utf_mode_fw.fw_file); + if (ret) { + ath10k_warn(ar, + "failed to init utf code swap segment: %d\n", + ret); + goto err_release_utf_mode_fw; + } + } + spin_lock_bh(&ar->data_lock); ar->testmode.utf_monitor = true; spin_unlock_bh(&ar->data_lock); @@ -279,6 +292,11 @@ err_power_down: ath10k_hif_power_down(ar); err_release_utf_mode_fw: + if (ar->testmode.utf_mode_fw.fw_file.codeswap_data && + ar->testmode.utf_mode_fw.fw_file.codeswap_len) + ath10k_swap_code_seg_release(ar, + &ar->testmode.utf_mode_fw.fw_file); + release_firmware(ar->testmode.utf_mode_fw.fw_file.firmware); ar->testmode.utf_mode_fw.fw_file.firmware = NULL; @@ -301,6 +319,11 @@ static void __ath10k_tm_cmd_utf_stop(struct ath10k *ar) spin_unlock_bh(&ar->data_lock); + if (ar->testmode.utf_mode_fw.fw_file.codeswap_data && + ar->testmode.utf_mode_fw.fw_file.codeswap_len) + ath10k_swap_code_seg_release(ar, + &ar->testmode.utf_mode_fw.fw_file); + release_firmware(ar->testmode.utf_mode_fw.fw_file.firmware); ar->testmode.utf_mode_fw.fw_file.firmware = NULL; From e25854f2404cc92882e42fe8002b0fd75a77d842 Mon Sep 17 00:00:00 2001 From: Michal Kazior Date: Fri, 19 Aug 2016 13:37:41 +0300 Subject: [PATCH 0589/1715] ath10k: implement wmi echo command Will be useful for implementing command barriers. Signed-off-by: Michal Kazior Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/wmi-ops.h | 17 +++++++++++++ drivers/net/wireless/ath/ath10k/wmi-tlv.c | 29 +++++++++++++++++++++++ drivers/net/wireless/ath/ath10k/wmi.c | 23 ++++++++++++++++++ 3 files changed, 69 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/wmi-ops.h b/drivers/net/wireless/ath/ath10k/wmi-ops.h index 64ebd304f907..b1d88fa60d11 100644 --- a/drivers/net/wireless/ath/ath10k/wmi-ops.h +++ b/drivers/net/wireless/ath/ath10k/wmi-ops.h @@ -194,6 +194,7 @@ struct wmi_ops { struct sk_buff *(*gen_pdev_bss_chan_info_req) (struct ath10k *ar, enum wmi_bss_survey_req_type type); + struct sk_buff *(*gen_echo)(struct ath10k *ar, u32 value); }; int ath10k_wmi_cmd_send(struct ath10k *ar, struct sk_buff *skb, u32 cmd_id); @@ -1382,4 +1383,20 @@ ath10k_wmi_pdev_bss_chan_info_request(struct ath10k *ar, wmi->cmd->pdev_bss_chan_info_request_cmdid); } +static inline int +ath10k_wmi_echo(struct ath10k *ar, u32 value) +{ + struct ath10k_wmi *wmi = &ar->wmi; + struct sk_buff *skb; + + if (!wmi->ops->gen_echo) + return -EOPNOTSUPP; + + skb = wmi->ops->gen_echo(ar, value); + if (IS_ERR(skb)) + return PTR_ERR(skb); + + return ath10k_wmi_cmd_send(ar, skb, wmi->cmd->echo_cmdid); +} + #endif diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.c b/drivers/net/wireless/ath/ath10k/wmi-tlv.c index e09337ee7c96..cd595855af36 100644 --- a/drivers/net/wireless/ath/ath10k/wmi-tlv.c +++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.c @@ -3081,6 +3081,34 @@ ath10k_wmi_tlv_op_gen_adaptive_qcs(struct ath10k *ar, bool enable) return skb; } +static struct sk_buff * +ath10k_wmi_tlv_op_gen_echo(struct ath10k *ar, u32 value) +{ + struct wmi_echo_cmd *cmd; + struct wmi_tlv *tlv; + struct sk_buff *skb; + void *ptr; + size_t len; + + len = sizeof(*tlv) + sizeof(*cmd); + skb = ath10k_wmi_alloc_skb(ar, len); + if (!skb) + return ERR_PTR(-ENOMEM); + + ptr = (void *)skb->data; + tlv = ptr; + tlv->tag = __cpu_to_le16(WMI_TLV_TAG_STRUCT_ECHO_CMD); + tlv->len = __cpu_to_le16(sizeof(*cmd)); + cmd = (void *)tlv->value; + cmd->value = cpu_to_le32(value); + + ptr += sizeof(*tlv); + ptr += sizeof(*cmd); + + ath10k_dbg(ar, ATH10K_DBG_WMI, "wmi tlv echo value 0x%08x\n", value); + return skb; +} + /****************/ /* TLV mappings */ /****************/ @@ -3485,6 +3513,7 @@ static const struct wmi_ops wmi_tlv_ops = { .gen_adaptive_qcs = ath10k_wmi_tlv_op_gen_adaptive_qcs, .fw_stats_fill = ath10k_wmi_main_op_fw_stats_fill, .get_vdev_subtype = ath10k_wmi_op_get_vdev_subtype, + .gen_echo = ath10k_wmi_tlv_op_gen_echo, }; static const struct wmi_peer_flags_map wmi_tlv_peer_flags_map = { diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index e67b254a8f8e..014c310208de 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -7675,6 +7675,24 @@ ath10k_wmi_10_4_ext_resource_config(struct ath10k *ar, return skb; } +static struct sk_buff * +ath10k_wmi_op_gen_echo(struct ath10k *ar, u32 value) +{ + struct wmi_echo_cmd *cmd; + struct sk_buff *skb; + + skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd)); + if (!skb) + return ERR_PTR(-ENOMEM); + + cmd = (struct wmi_echo_cmd *)skb->data; + cmd->value = cpu_to_le32(value); + + ath10k_dbg(ar, ATH10K_DBG_WMI, + "wmi echo value 0x%08x\n", value); + return skb; +} + static const struct wmi_ops wmi_ops = { .rx = ath10k_wmi_op_rx, .map_svc = wmi_main_svc_map, @@ -7735,6 +7753,7 @@ static const struct wmi_ops wmi_ops = { .gen_delba_send = ath10k_wmi_op_gen_delba_send, .fw_stats_fill = ath10k_wmi_main_op_fw_stats_fill, .get_vdev_subtype = ath10k_wmi_op_get_vdev_subtype, + .gen_echo = ath10k_wmi_op_gen_echo, /* .gen_bcn_tmpl not implemented */ /* .gen_prb_tmpl not implemented */ /* .gen_p2p_go_bcn_ie not implemented */ @@ -7803,6 +7822,7 @@ static const struct wmi_ops wmi_10_1_ops = { .gen_delba_send = ath10k_wmi_op_gen_delba_send, .fw_stats_fill = ath10k_wmi_10x_op_fw_stats_fill, .get_vdev_subtype = ath10k_wmi_op_get_vdev_subtype, + .gen_echo = ath10k_wmi_op_gen_echo, /* .gen_bcn_tmpl not implemented */ /* .gen_prb_tmpl not implemented */ /* .gen_p2p_go_bcn_ie not implemented */ @@ -7822,6 +7842,7 @@ static const struct wmi_ops wmi_10_2_ops = { .pull_svc_rdy = ath10k_wmi_10x_op_pull_svc_rdy_ev, .gen_pdev_set_rd = ath10k_wmi_10x_op_gen_pdev_set_rd, .gen_start_scan = ath10k_wmi_10x_op_gen_start_scan, + .gen_echo = ath10k_wmi_op_gen_echo, .pull_scan = ath10k_wmi_op_pull_scan_ev, .pull_mgmt_rx = ath10k_wmi_op_pull_mgmt_rx_ev, @@ -7888,6 +7909,7 @@ static const struct wmi_ops wmi_10_2_4_ops = { .pull_svc_rdy = ath10k_wmi_10x_op_pull_svc_rdy_ev, .gen_pdev_set_rd = ath10k_wmi_10x_op_gen_pdev_set_rd, .gen_start_scan = ath10k_wmi_10x_op_gen_start_scan, + .gen_echo = ath10k_wmi_op_gen_echo, .pull_scan = ath10k_wmi_op_pull_scan_ev, .pull_mgmt_rx = ath10k_wmi_op_pull_mgmt_rx_ev, @@ -8010,6 +8032,7 @@ static const struct wmi_ops wmi_10_4_ops = { .gen_pdev_get_temperature = ath10k_wmi_10_2_op_gen_pdev_get_temperature, .get_vdev_subtype = ath10k_wmi_10_4_op_get_vdev_subtype, .gen_pdev_bss_chan_info_req = ath10k_wmi_10_2_op_gen_pdev_bss_chan_info, + .gen_echo = ath10k_wmi_op_gen_echo, }; int ath10k_wmi_attach(struct ath10k *ar) From 84d4911b7184dfa911ea089c2d6728b994de6cd9 Mon Sep 17 00:00:00 2001 From: Michal Kazior Date: Fri, 19 Aug 2016 13:37:42 +0300 Subject: [PATCH 0590/1715] ath10k: implement wmi echo event Will be useful for implementing command barriers. Signed-off-by: Michal Kazior Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/wmi-ops.h | 12 ++++++++++ drivers/net/wireless/ath/ath10k/wmi-tlv.c | 28 ++++++++++++++++++++++ drivers/net/wireless/ath/ath10k/wmi.c | 29 ++++++++++++++++++++++- drivers/net/wireless/ath/ath10k/wmi.h | 4 ++++ 4 files changed, 72 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/wmi-ops.h b/drivers/net/wireless/ath/ath10k/wmi-ops.h index b1d88fa60d11..c67eda78b69e 100644 --- a/drivers/net/wireless/ath/ath10k/wmi-ops.h +++ b/drivers/net/wireless/ath/ath10k/wmi-ops.h @@ -51,6 +51,8 @@ struct wmi_ops { struct wmi_roam_ev_arg *arg); int (*pull_wow_event)(struct ath10k *ar, struct sk_buff *skb, struct wmi_wow_ev_arg *arg); + int (*pull_echo_ev)(struct ath10k *ar, struct sk_buff *skb, + struct wmi_echo_ev_arg *arg); enum wmi_txbf_conf (*get_txbf_conf_scheme)(struct ath10k *ar); struct sk_buff *(*gen_pdev_suspend)(struct ath10k *ar, u32 suspend_opt); @@ -350,6 +352,16 @@ ath10k_wmi_pull_wow_event(struct ath10k *ar, struct sk_buff *skb, return ar->wmi.ops->pull_wow_event(ar, skb, arg); } +static inline int +ath10k_wmi_pull_echo_ev(struct ath10k *ar, struct sk_buff *skb, + struct wmi_echo_ev_arg *arg) +{ + if (!ar->wmi.ops->pull_echo_ev) + return -EOPNOTSUPP; + + return ar->wmi.ops->pull_echo_ev(ar, skb, arg); +} + static inline enum wmi_txbf_conf ath10k_wmi_get_txbf_conf_scheme(struct ath10k *ar) { diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.c b/drivers/net/wireless/ath/ath10k/wmi-tlv.c index cd595855af36..a42f52dd9a36 100644 --- a/drivers/net/wireless/ath/ath10k/wmi-tlv.c +++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.c @@ -1223,6 +1223,33 @@ ath10k_wmi_tlv_op_pull_wow_ev(struct ath10k *ar, struct sk_buff *skb, return 0; } +static int ath10k_wmi_tlv_op_pull_echo_ev(struct ath10k *ar, + struct sk_buff *skb, + struct wmi_echo_ev_arg *arg) +{ + const void **tb; + const struct wmi_echo_event *ev; + int ret; + + tb = ath10k_wmi_tlv_parse_alloc(ar, skb->data, skb->len, GFP_ATOMIC); + if (IS_ERR(tb)) { + ret = PTR_ERR(tb); + ath10k_warn(ar, "failed to parse tlv: %d\n", ret); + return ret; + } + + ev = tb[WMI_TLV_TAG_STRUCT_ECHO_EVENT]; + if (!ev) { + kfree(tb); + return -EPROTO; + } + + arg->value = ev->value; + + kfree(tb); + return 0; +} + static struct sk_buff * ath10k_wmi_tlv_op_gen_pdev_suspend(struct ath10k *ar, u32 opt) { @@ -3457,6 +3484,7 @@ static const struct wmi_ops wmi_tlv_ops = { .pull_fw_stats = ath10k_wmi_tlv_op_pull_fw_stats, .pull_roam_ev = ath10k_wmi_tlv_op_pull_roam_ev, .pull_wow_event = ath10k_wmi_tlv_op_pull_wow_ev, + .pull_echo_ev = ath10k_wmi_tlv_op_pull_echo_ev, .get_txbf_conf_scheme = ath10k_wmi_tlv_txbf_conf_scheme, .gen_pdev_suspend = ath10k_wmi_tlv_op_gen_pdev_suspend, diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index 014c310208de..b802ca9c0edf 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -2495,7 +2495,18 @@ exit: void ath10k_wmi_event_echo(struct ath10k *ar, struct sk_buff *skb) { - ath10k_dbg(ar, ATH10K_DBG_WMI, "WMI_ECHO_EVENTID\n"); + struct wmi_echo_ev_arg arg = {}; + int ret; + + ret = ath10k_wmi_pull_echo_ev(ar, skb, &arg); + if (ret) { + ath10k_warn(ar, "failed to parse echo: %d\n", ret); + return; + } + + ath10k_dbg(ar, ATH10K_DBG_WMI, + "wmi event echo value 0x%08x\n", + le32_to_cpu(arg.value)); } int ath10k_wmi_event_debug_mesg(struct ath10k *ar, struct sk_buff *skb) @@ -4792,6 +4803,17 @@ static int ath10k_wmi_op_pull_roam_ev(struct ath10k *ar, struct sk_buff *skb, return 0; } +static int ath10k_wmi_op_pull_echo_ev(struct ath10k *ar, + struct sk_buff *skb, + struct wmi_echo_ev_arg *arg) +{ + struct wmi_echo_event *ev = (void *)skb->data; + + arg->value = ev->value; + + return 0; +} + int ath10k_wmi_event_ready(struct ath10k *ar, struct sk_buff *skb) { struct wmi_rdy_ev_arg arg = {}; @@ -7709,6 +7731,7 @@ static const struct wmi_ops wmi_ops = { .pull_rdy = ath10k_wmi_op_pull_rdy_ev, .pull_fw_stats = ath10k_wmi_main_op_pull_fw_stats, .pull_roam_ev = ath10k_wmi_op_pull_roam_ev, + .pull_echo_ev = ath10k_wmi_op_pull_echo_ev, .gen_pdev_suspend = ath10k_wmi_op_gen_pdev_suspend, .gen_pdev_resume = ath10k_wmi_op_gen_pdev_resume, @@ -7783,6 +7806,7 @@ static const struct wmi_ops wmi_10_1_ops = { .pull_phyerr = ath10k_wmi_op_pull_phyerr_ev, .pull_rdy = ath10k_wmi_op_pull_rdy_ev, .pull_roam_ev = ath10k_wmi_op_pull_roam_ev, + .pull_echo_ev = ath10k_wmi_op_pull_echo_ev, .gen_pdev_suspend = ath10k_wmi_op_gen_pdev_suspend, .gen_pdev_resume = ath10k_wmi_op_gen_pdev_resume, @@ -7854,6 +7878,7 @@ static const struct wmi_ops wmi_10_2_ops = { .pull_phyerr = ath10k_wmi_op_pull_phyerr_ev, .pull_rdy = ath10k_wmi_op_pull_rdy_ev, .pull_roam_ev = ath10k_wmi_op_pull_roam_ev, + .pull_echo_ev = ath10k_wmi_op_pull_echo_ev, .gen_pdev_suspend = ath10k_wmi_op_gen_pdev_suspend, .gen_pdev_resume = ath10k_wmi_op_gen_pdev_resume, @@ -7921,6 +7946,7 @@ static const struct wmi_ops wmi_10_2_4_ops = { .pull_phyerr = ath10k_wmi_op_pull_phyerr_ev, .pull_rdy = ath10k_wmi_op_pull_rdy_ev, .pull_roam_ev = ath10k_wmi_op_pull_roam_ev, + .pull_echo_ev = ath10k_wmi_op_pull_echo_ev, .gen_pdev_suspend = ath10k_wmi_op_gen_pdev_suspend, .gen_pdev_resume = ath10k_wmi_op_gen_pdev_resume, @@ -8028,6 +8054,7 @@ static const struct wmi_ops wmi_10_4_ops = { .ext_resource_config = ath10k_wmi_10_4_ext_resource_config, /* shared with 10.2 */ + .pull_echo_ev = ath10k_wmi_op_pull_echo_ev, .gen_request_stats = ath10k_wmi_op_gen_request_stats, .gen_pdev_get_temperature = ath10k_wmi_10_2_op_gen_pdev_get_temperature, .get_vdev_subtype = ath10k_wmi_10_4_op_get_vdev_subtype, diff --git a/drivers/net/wireless/ath/ath10k/wmi.h b/drivers/net/wireless/ath/ath10k/wmi.h index 3ef468893b3f..086d78807d2f 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.h +++ b/drivers/net/wireless/ath/ath10k/wmi.h @@ -6296,6 +6296,10 @@ struct wmi_roam_ev_arg { __le32 rssi; }; +struct wmi_echo_ev_arg { + __le32 value; +}; + struct wmi_pdev_temperature_event { /* temperature value in Celcius degree */ __le32 temperature; From 20ddca21dcf84fcae063f2f75f49cfd545bf5237 Mon Sep 17 00:00:00 2001 From: Michal Kazior Date: Fri, 19 Aug 2016 13:37:42 +0300 Subject: [PATCH 0591/1715] ath10k: add wmi command barrier utility This allows placing command barriers for explicit serializing and synchronizing state. Useful for future driver development. Signed-off-by: Michal Kazior Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/core.h | 1 + drivers/net/wireless/ath/ath10k/wmi.c | 31 ++++++++++++++++++++++++++ drivers/net/wireless/ath/ath10k/wmi.h | 1 + 3 files changed, 33 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h index cc6e66fdf498..56daeb7926f9 100644 --- a/drivers/net/wireless/ath/ath10k/core.h +++ b/drivers/net/wireless/ath/ath10k/core.h @@ -142,6 +142,7 @@ struct ath10k_wmi { enum ath10k_htc_ep_id eid; struct completion service_ready; struct completion unified_ready; + struct completion barrier; wait_queue_head_t tx_credits_wq; DECLARE_BITMAP(svc_map, WMI_SERVICE_MAX); struct wmi_cmd_map *cmd; diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index b802ca9c0edf..ae5f541ec966 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -29,6 +29,9 @@ #include "p2p.h" #include "hw.h" +#define ATH10K_WMI_BARRIER_ECHO_ID 0xBA991E9 +#define ATH10K_WMI_BARRIER_TIMEOUT_HZ (3 * HZ) + /* MAIN WMI cmd track */ static struct wmi_cmd_map wmi_cmd_map = { .init_cmdid = WMI_INIT_CMDID, @@ -2507,6 +2510,9 @@ void ath10k_wmi_event_echo(struct ath10k *ar, struct sk_buff *skb) ath10k_dbg(ar, ATH10K_DBG_WMI, "wmi event echo value 0x%08x\n", le32_to_cpu(arg.value)); + + if (le32_to_cpu(arg.value) == ATH10K_WMI_BARRIER_ECHO_ID) + complete(&ar->wmi.barrier); } int ath10k_wmi_event_debug_mesg(struct ath10k *ar, struct sk_buff *skb) @@ -7715,6 +7721,30 @@ ath10k_wmi_op_gen_echo(struct ath10k *ar, u32 value) return skb; } +int +ath10k_wmi_barrier(struct ath10k *ar) +{ + int ret; + int time_left; + + spin_lock_bh(&ar->data_lock); + reinit_completion(&ar->wmi.barrier); + spin_unlock_bh(&ar->data_lock); + + ret = ath10k_wmi_echo(ar, ATH10K_WMI_BARRIER_ECHO_ID); + if (ret) { + ath10k_warn(ar, "failed to submit wmi echo: %d\n", ret); + return ret; + } + + time_left = wait_for_completion_timeout(&ar->wmi.barrier, + ATH10K_WMI_BARRIER_TIMEOUT_HZ); + if (!time_left) + return -ETIMEDOUT; + + return 0; +} + static const struct wmi_ops wmi_ops = { .rx = ath10k_wmi_op_rx, .map_svc = wmi_main_svc_map, @@ -8112,6 +8142,7 @@ int ath10k_wmi_attach(struct ath10k *ar) init_completion(&ar->wmi.service_ready); init_completion(&ar->wmi.unified_ready); + init_completion(&ar->wmi.barrier); INIT_WORK(&ar->svc_rdy_work, ath10k_wmi_event_service_ready_work); diff --git a/drivers/net/wireless/ath/ath10k/wmi.h b/drivers/net/wireless/ath/ath10k/wmi.h index 086d78807d2f..89adfa90ee8d 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.h +++ b/drivers/net/wireless/ath/ath10k/wmi.h @@ -6628,5 +6628,6 @@ void ath10k_wmi_10_4_op_fw_stats_fill(struct ath10k *ar, char *buf); int ath10k_wmi_op_get_vdev_subtype(struct ath10k *ar, enum wmi_vdev_subtype subtype); +int ath10k_wmi_barrier(struct ath10k *ar); #endif /* _WMI_H_ */ From 47b1848d9fde5daf102f599be6e589a1d3c8da7d Mon Sep 17 00:00:00 2001 From: Michal Kazior Date: Fri, 19 Aug 2016 13:37:43 +0300 Subject: [PATCH 0592/1715] ath10k: fix spurious tx/rx during boot HW Rx filters and masks are not configured properly by firmware during boot sequences. The MAC_PCU_ADDR1 is set to 0s instead of 1s which allows the HW to ACK any frame that passes through MAC_PCU_RX_FILTER. The MAC_PCU_RX_FILTER itself is misconfigured on boot as well. The combination of these bugs ended up with the following manifestations: - "no channel configured; ignoring frame(s)!" warnings in the driver - spurious ACKs (transmission) on the air during firmware bootup sequences The former was a long standing and known bug originally though mostly harmless. However Marek recently discovered that this problem also involves ACKing *all* frames the HW receives (including beacons ;). Such frames are delivered to host and generate the former warning as well. This could be a problem with regulatory compliance in some rare cases (e.g. Taiwan which forbids transmissions on channel 36 which is the default bootup channel on 5Ghz band cards). The good news is that it'd require someone else to violate regulatory first to coerce our device to generate and transmit an ACK. The problem could be reproduced in a rather busy environment that has a lot of APs. The likelihood could be increased by injecting an msleep() of 5000 or longer immediately after ath10k_htt_setup() in ath10k_core_start(). The reason why the former warnings were only showing up seldom is because the device was either quickly reset again (i.e. during firmware probing) or wmi vdev was created (which fixes hw and fw states). It is technically possible for host driver to override adequate hw registers however this can't work reliably because the bug root cause lies in incorrect firmware state on boot (internal structure used to program MAC_PCU_ADDR1 is not properly initialized) and only vdev create/delete events can fix it. This is why the patch takes dummy vdev approach. This could be fixed in firmware as well but having this fixed in driver is more robust, most notably when thinking of users of older firmware such as 999.999.0.636. Reported-by: Marek Puzyniak Signed-off-by: Michal Kazior Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/core.c | 68 ++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index f82877d806a5..6b49374b3956 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -1705,6 +1705,55 @@ static int ath10k_core_init_firmware_features(struct ath10k *ar) return 0; } +static int ath10k_core_reset_rx_filter(struct ath10k *ar) +{ + int ret; + int vdev_id; + int vdev_type; + int vdev_subtype; + const u8 *vdev_addr; + + vdev_id = 0; + vdev_type = WMI_VDEV_TYPE_STA; + vdev_subtype = ath10k_wmi_get_vdev_subtype(ar, WMI_VDEV_SUBTYPE_NONE); + vdev_addr = ar->mac_addr; + + ret = ath10k_wmi_vdev_create(ar, vdev_id, vdev_type, vdev_subtype, + vdev_addr); + if (ret) { + ath10k_err(ar, "failed to create dummy vdev: %d\n", ret); + return ret; + } + + ret = ath10k_wmi_vdev_delete(ar, vdev_id); + if (ret) { + ath10k_err(ar, "failed to delete dummy vdev: %d\n", ret); + return ret; + } + + /* WMI and HTT may use separate HIF pipes and are not guaranteed to be + * serialized properly implicitly. + * + * Moreover (most) WMI commands have no explicit acknowledges. It is + * possible to infer it implicitly by poking firmware with echo + * command - getting a reply means all preceding comments have been + * (mostly) processed. + * + * In case of vdev create/delete this is sufficient. + * + * Without this it's possible to end up with a race when HTT Rx ring is + * started before vdev create/delete hack is complete allowing a short + * window of opportunity to receive (and Tx ACK) a bunch of frames. + */ + ret = ath10k_wmi_barrier(ar); + if (ret) { + ath10k_err(ar, "failed to ping firmware: %d\n", ret); + return ret; + } + + return 0; +} + int ath10k_core_start(struct ath10k *ar, enum ath10k_firmware_mode mode, const struct ath10k_fw_components *fw) { @@ -1872,6 +1921,25 @@ int ath10k_core_start(struct ath10k *ar, enum ath10k_firmware_mode mode, goto err_hif_stop; } + /* Some firmware revisions do not properly set up hardware rx filter + * registers. + * + * A known example from QCA9880 and 10.2.4 is that MAC_PCU_ADDR1_MASK + * is filled with 0s instead of 1s allowing HW to respond with ACKs to + * any frames that matches MAC_PCU_RX_FILTER which is also + * misconfigured to accept anything. + * + * The ADDR1 is programmed using internal firmware structure field and + * can't be (easily/sanely) reached from the driver explicitly. It is + * possible to implicitly make it correct by creating a dummy vdev and + * then deleting it. + */ + status = ath10k_core_reset_rx_filter(ar); + if (status) { + ath10k_err(ar, "failed to reset rx filter: %d\n", status); + goto err_hif_stop; + } + /* If firmware indicates Full Rx Reorder support it must be used in a * slightly different manner. Let HTT code know. */ From 5ffae43208ec160e584117fabee2cddc5ad0e39a Mon Sep 17 00:00:00 2001 From: Lior David Date: Mon, 22 Aug 2016 12:42:19 +0300 Subject: [PATCH 0593/1715] wil6210: fix protection of wil->scan_request Currently the places that check wil->scan_request and call cfg80211_scan_done are not consistently protected, so there is a risk that cfg80211_scan_done will be called with NULL scan_request, causing a kernel crash. Fix this by using p2p_wdev_mutex in few other places that access scan_request. This makes sense since scan_request may point to p2p_wdev, and it is not worth the extra complexity of adding a new mutex. Signed-off-by: Lior David Signed-off-by: Maya Erez Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/wil6210/cfg80211.c | 3 +++ drivers/net/wireless/ath/wil6210/main.c | 4 ++++ drivers/net/wireless/ath/wil6210/wil6210.h | 2 +- drivers/net/wireless/ath/wil6210/wmi.c | 4 ++-- 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/wil6210/cfg80211.c b/drivers/net/wireless/ath/wil6210/cfg80211.c index ffacc7648727..d117240d9a73 100644 --- a/drivers/net/wireless/ath/wil6210/cfg80211.c +++ b/drivers/net/wireless/ath/wil6210/cfg80211.c @@ -354,10 +354,13 @@ static int wil_cfg80211_scan(struct wiphy *wiphy, wil_dbg_misc(wil, "%s(), wdev=0x%p iftype=%d\n", __func__, wdev, wdev->iftype); + mutex_lock(&wil->p2p_wdev_mutex); if (wil->scan_request) { wil_err(wil, "Already scanning\n"); + mutex_unlock(&wil->p2p_wdev_mutex); return -EAGAIN; } + mutex_unlock(&wil->p2p_wdev_mutex); /* check we are client side */ switch (wdev->iftype) { diff --git a/drivers/net/wireless/ath/wil6210/main.c b/drivers/net/wireless/ath/wil6210/main.c index d0b180cc3c60..7b7619c1f9f3 100644 --- a/drivers/net/wireless/ath/wil6210/main.c +++ b/drivers/net/wireless/ath/wil6210/main.c @@ -852,6 +852,7 @@ int wil_reset(struct wil6210_priv *wil, bool load_fw) bitmap_zero(wil->status, wil_status_last); mutex_unlock(&wil->wmi_mutex); + mutex_lock(&wil->p2p_wdev_mutex); if (wil->scan_request) { struct cfg80211_scan_info info = { .aborted = true, @@ -863,6 +864,7 @@ int wil_reset(struct wil6210_priv *wil, bool load_fw) cfg80211_scan_done(wil->scan_request, &info); wil->scan_request = NULL; } + mutex_unlock(&wil->p2p_wdev_mutex); wil_mask_irq(wil); @@ -1055,6 +1057,7 @@ int __wil_down(struct wil6210_priv *wil) wil_p2p_stop_radio_operations(wil); + mutex_lock(&wil->p2p_wdev_mutex); if (wil->scan_request) { struct cfg80211_scan_info info = { .aborted = true, @@ -1066,6 +1069,7 @@ int __wil_down(struct wil6210_priv *wil) cfg80211_scan_done(wil->scan_request, &info); wil->scan_request = NULL; } + mutex_unlock(&wil->p2p_wdev_mutex); wil_reset(wil, false); diff --git a/drivers/net/wireless/ath/wil6210/wil6210.h b/drivers/net/wireless/ath/wil6210/wil6210.h index 608769120957..1eb7fe772273 100644 --- a/drivers/net/wireless/ath/wil6210/wil6210.h +++ b/drivers/net/wireless/ath/wil6210/wil6210.h @@ -657,7 +657,7 @@ struct wil6210_priv { /* P2P_DEVICE vif */ struct wireless_dev *p2p_wdev; - struct mutex p2p_wdev_mutex; /* protect @p2p_wdev */ + struct mutex p2p_wdev_mutex; /* protect @p2p_wdev and @scan_request */ struct wireless_dev *radio_wdev; /* High Access Latency Policy voting */ diff --git a/drivers/net/wireless/ath/wil6210/wmi.c b/drivers/net/wireless/ath/wil6210/wmi.c index 4d92541913c0..0b109b2a31ac 100644 --- a/drivers/net/wireless/ath/wil6210/wmi.c +++ b/drivers/net/wireless/ath/wil6210/wmi.c @@ -424,6 +424,7 @@ static void wmi_evt_tx_mgmt(struct wil6210_priv *wil, int id, void *d, int len) static void wmi_evt_scan_complete(struct wil6210_priv *wil, int id, void *d, int len) { + mutex_lock(&wil->p2p_wdev_mutex); if (wil->scan_request) { struct wmi_scan_complete_event *data = d; struct cfg80211_scan_info info = { @@ -435,14 +436,13 @@ static void wmi_evt_scan_complete(struct wil6210_priv *wil, int id, wil->scan_request, info.aborted); del_timer_sync(&wil->scan_timer); - mutex_lock(&wil->p2p_wdev_mutex); cfg80211_scan_done(wil->scan_request, &info); wil->radio_wdev = wil->wdev; - mutex_unlock(&wil->p2p_wdev_mutex); wil->scan_request = NULL; } else { wil_err(wil, "SCAN_COMPLETE while not scanning\n"); } + mutex_unlock(&wil->p2p_wdev_mutex); } static void wmi_evt_connect(struct wil6210_priv *wil, int id, void *d, int len) From 08989f9640a03939ec170916f80d371a8e3504b0 Mon Sep 17 00:00:00 2001 From: Lior David Date: Mon, 22 Aug 2016 12:42:20 +0300 Subject: [PATCH 0594/1715] wil6210: align to latest auto generated wmi.h Align to latest version of the auto generated wmi file describing the interface with FW. Signed-off-by: Lior David Signed-off-by: Maya Erez Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/wil6210/wmi.h | 292 +++++++++++++++++++++++++ 1 file changed, 292 insertions(+) diff --git a/drivers/net/wireless/ath/wil6210/wmi.h b/drivers/net/wireless/ath/wil6210/wmi.h index 349510cdb153..f430e8a80603 100644 --- a/drivers/net/wireless/ath/wil6210/wmi.h +++ b/drivers/net/wireless/ath/wil6210/wmi.h @@ -46,6 +46,16 @@ enum wmi_mid { MID_BROADCAST = 0xFF, }; +/* FW capability IDs + * Each ID maps to a bit in a 32-bit bitmask value provided by the FW to + * the host + */ +enum wmi_fw_capability { + WMI_FW_CAPABILITY_FTM = 0, + WMI_FW_CAPABILITY_PS_CONFIG = 1, + WMI_FW_CAPABILITY_MAX, +}; + /* WMI_CMD_HDR */ struct wmi_cmd_hdr { u8 mid; @@ -170,6 +180,13 @@ enum wmi_command_id { /* Not supported yet */ WMI_PS_MID_CFG_READ_CMDID = 0x920, WMI_RS_CFG_CMDID = 0x921, + WMI_GET_DETAILED_RS_RES_CMDID = 0x922, + WMI_AOA_MEAS_CMDID = 0x923, + WMI_TOF_SESSION_START_CMDID = 0x991, + WMI_TOF_GET_CAPABILITIES_CMDID = 0x992, + WMI_TOF_SET_LCR_CMDID = 0x993, + WMI_TOF_SET_LCI_CMDID = 0x994, + WMI_TOF_CHANNEL_INFO_CMDID = 0x995, WMI_SET_MAC_ADDRESS_CMDID = 0xF003, WMI_ABORT_SCAN_CMDID = 0xF007, WMI_SET_PROMISCUOUS_MODE_CMDID = 0xF041, @@ -843,6 +860,88 @@ struct wmi_pmc_cmd { __le64 mem_base; } __packed; +enum wmi_aoa_meas_type { + WMI_AOA_PHASE_MEAS = 0x00, + WMI_AOA_PHASE_AMP_MEAS = 0x01, +}; + +/* WMI_AOA_MEAS_CMDID */ +struct wmi_aoa_meas_cmd { + u8 mac_addr[WMI_MAC_LEN]; + /* channels IDs: + * 0 - 58320 MHz + * 1 - 60480 MHz + * 2 - 62640 MHz + */ + u8 channel; + /* enum wmi_aoa_meas_type */ + u8 aoa_meas_type; + __le32 meas_rf_mask; +} __packed; + +enum wmi_tof_burst_duration { + WMI_TOF_BURST_DURATION_250_USEC = 2, + WMI_TOF_BURST_DURATION_500_USEC = 3, + WMI_TOF_BURST_DURATION_1_MSEC = 4, + WMI_TOF_BURST_DURATION_2_MSEC = 5, + WMI_TOF_BURST_DURATION_4_MSEC = 6, + WMI_TOF_BURST_DURATION_8_MSEC = 7, + WMI_TOF_BURST_DURATION_16_MSEC = 8, + WMI_TOF_BURST_DURATION_32_MSEC = 9, + WMI_TOF_BURST_DURATION_64_MSEC = 10, + WMI_TOF_BURST_DURATION_128_MSEC = 11, + WMI_TOF_BURST_DURATION_NO_PREFERENCES = 15, +}; + +enum wmi_tof_session_start_flags { + WMI_TOF_SESSION_START_FLAG_SECURED = 0x1, + WMI_TOF_SESSION_START_FLAG_ASAP = 0x2, + WMI_TOF_SESSION_START_FLAG_LCI_REQ = 0x4, + WMI_TOF_SESSION_START_FLAG_LCR_REQ = 0x8, +}; + +/* WMI_TOF_SESSION_START_CMDID */ +struct wmi_ftm_dest_info { + u8 channel; + /* wmi_tof_session_start_flags_e */ + u8 flags; + u8 initial_token; + u8 num_of_ftm_per_burst; + u8 num_of_bursts_exp; + /* wmi_tof_burst_duration_e */ + u8 burst_duration; + /* Burst Period indicate interval between two consecutive burst + * instances, in units of 100 ms + */ + __le16 burst_period; + u8 dst_mac[WMI_MAC_LEN]; + __le16 reserved; +} __packed; + +/* WMI_TOF_SESSION_START_CMDID */ +struct wmi_tof_session_start_cmd { + __le32 session_id; + u8 num_of_aoa_measures; + u8 aoa_type; + __le16 num_of_dest; + u8 reserved[4]; + struct wmi_ftm_dest_info ftm_dest_info[0]; +} __packed; + +enum wmi_tof_channel_info_report_type { + WMI_TOF_CHANNEL_INFO_TYPE_CIR = 0x1, + WMI_TOF_CHANNEL_INFO_TYPE_RSSI = 0x2, + WMI_TOF_CHANNEL_INFO_TYPE_SNR = 0x4, + WMI_TOF_CHANNEL_INFO_TYPE_DEBUG_DATA = 0x8, + WMI_TOF_CHANNEL_INFO_TYPE_VENDOR_SPECIFIC = 0x10, +}; + +/* WMI_TOF_CHANNEL_INFO_CMDID */ +struct wmi_tof_channel_info_cmd { + /* wmi_tof_channel_info_report_type_e */ + __le32 channel_info_report_request; +} __packed; + /* WMI Events * List of Events (target to host) */ @@ -934,6 +1033,14 @@ enum wmi_event_id { /* Not supported yet */ WMI_PS_MID_CFG_READ_EVENTID = 0x1920, WMI_RS_CFG_DONE_EVENTID = 0x1921, + WMI_GET_DETAILED_RS_RES_EVENTID = 0x1922, + WMI_AOA_MEAS_EVENTID = 0x1923, + WMI_TOF_SESSION_END_EVENTID = 0x1991, + WMI_TOF_GET_CAPABILITIES_EVENTID = 0x1992, + WMI_TOF_SET_LCR_EVENTID = 0x1993, + WMI_TOF_SET_LCI_EVENTID = 0x1994, + WMI_TOF_FTM_PER_DEST_RES_EVENTID = 0x1995, + WMI_TOF_CHANNEL_INFO_EVENTID = 0x1996, WMI_SET_CHANNEL_EVENTID = 0x9000, WMI_ASSOC_REQ_EVENTID = 0x9001, WMI_EAPOL_RX_EVENTID = 0x9002, @@ -1003,6 +1110,13 @@ struct wmi_fw_ver_event { __le32 bl_minor; __le32 bl_subminor; __le32 bl_build; + /* The number of entries in the FW capabilies array */ + u8 fw_capabilities_len; + u8 reserved[3]; + /* FW capabilities info + * Must be the last member of the struct + */ + __le32 fw_capabilities[0]; } __packed; /* WMI_GET_RF_STATUS_EVENTID */ @@ -1565,6 +1679,41 @@ struct wmi_rs_cfg_done_event { u8 reserved[2]; } __packed; +/* WMI_GET_DETAILED_RS_RES_CMDID */ +struct wmi_get_detailed_rs_res_cmd { + /* connection id */ + u8 cid; + u8 reserved[3]; +} __packed; + +/* RS results status */ +enum wmi_rs_results_status { + WMI_RS_RES_VALID = 0x00, + WMI_RS_RES_INVALID = 0x01, +}; + +/* Rate search results */ +struct wmi_rs_results { + /* number of sent MPDUs */ + u8 num_of_tx_pkt[WMI_NUM_MCS]; + /* number of non-acked MPDUs */ + u8 num_of_non_acked_pkt[WMI_NUM_MCS]; + /* RS timestamp */ + __le32 tsf; + /* RS selected MCS */ + u8 mcs; +} __packed; + +/* WMI_GET_DETAILED_RS_RES_EVENTID */ +struct wmi_get_detailed_rs_res_event { + u8 cid; + /* enum wmi_rs_results_status */ + u8 status; + /* detailed rs results */ + struct wmi_rs_results rs_results; + u8 reserved[3]; +} __packed; + /* broadcast connection ID */ #define WMI_LINK_MAINTAIN_CFG_CID_BROADCAST (0xFFFFFFFF) @@ -1892,4 +2041,147 @@ struct wmi_ps_mid_cfg_read_event { __le32 status; } __packed; +#define WMI_AOA_MAX_DATA_SIZE (128) + +enum wmi_aoa_meas_status { + WMI_AOA_MEAS_SUCCESS = 0x00, + WMI_AOA_MEAS_PEER_INCAPABLE = 0x01, + WMI_AOA_MEAS_FAILURE = 0x02, +}; + +/* WMI_AOA_MEAS_EVENTID */ +struct wmi_aoa_meas_event { + u8 mac_addr[WMI_MAC_LEN]; + /* channels IDs: + * 0 - 58320 MHz + * 1 - 60480 MHz + * 2 - 62640 MHz + */ + u8 channel; + /* enum wmi_aoa_meas_type */ + u8 aoa_meas_type; + /* Measurments are from RFs, defined by the mask */ + __le32 meas_rf_mask; + /* enum wmi_aoa_meas_status */ + u8 meas_status; + u8 reserved; + /* Length of meas_data in bytes */ + __le16 length; + u8 meas_data[WMI_AOA_MAX_DATA_SIZE]; +} __packed; + +/* WMI_TOF_GET_CAPABILITIES_EVENTID */ +struct wmi_tof_get_capabilities_event { + u8 ftm_capability; + /* maximum supported number of destination to start TOF */ + u8 max_num_of_dest; + /* maximum supported number of measurements per burst */ + u8 max_num_of_meas_per_burst; + u8 reserved; + /* maximum supported multi bursts */ + __le16 max_multi_bursts_sessions; + /* maximum supported FTM burst duration , wmi_tof_burst_duration_e */ + __le16 max_ftm_burst_duration; + /* AOA supported types */ + __le32 aoa_supported_types; +} __packed; + +enum wmi_tof_session_end_status { + WMI_TOF_SESSION_END_NO_ERROR = 0x00, + WMI_TOF_SESSION_END_FAIL = 0x01, + WMI_TOF_SESSION_END_PARAMS_ERROR = 0x02, + WMI_TOF_SESSION_END_ABORTED = 0x03, +}; + +/* WMI_TOF_SESSION_END_EVENTID */ +struct wmi_tof_session_end_event { + /* FTM session ID */ + __le32 session_id; + /* wmi_tof_session_end_status_e */ + u8 status; + u8 reserved[3]; +} __packed; + +/* Responder FTM Results */ +struct wmi_responder_ftm_res { + u8 t1[6]; + u8 t2[6]; + u8 t3[6]; + u8 t4[6]; + __le16 tod_err; + __le16 toa_err; + __le16 tod_err_initiator; + __le16 toa_err_initiator; +} __packed; + +enum wmi_tof_ftm_per_dest_res_status { + WMI_PER_DEST_RES_NO_ERROR = 0x00, + WMI_PER_DEST_RES_TX_RX_FAIL = 0x01, + WMI_PER_DEST_RES_PARAM_DONT_MATCH = 0x02, +}; + +enum wmi_tof_ftm_per_dest_res_flags { + WMI_PER_DEST_RES_REQ_START = 0x01, + WMI_PER_DEST_RES_BURST_REPORT_END = 0x02, + WMI_PER_DEST_RES_REQ_END = 0x04, + WMI_PER_DEST_RES_PARAM_UPDATE = 0x08, +}; + +/* WMI_TOF_FTM_PER_DEST_RES_EVENTID */ +struct wmi_tof_ftm_per_dest_res_event { + /* FTM session ID */ + __le32 session_id; + /* destination MAC address */ + u8 dst_mac[WMI_MAC_LEN]; + /* wmi_tof_ftm_per_dest_res_flags_e */ + u8 flags; + /* wmi_tof_ftm_per_dest_res_status_e */ + u8 status; + /* responder ASAP */ + u8 responder_asap; + /* responder number of FTM per burst */ + u8 responder_num_ftm_per_burst; + /* responder number of FTM burst exponent */ + u8 responder_num_ftm_bursts_exp; + /* responder burst duration ,wmi_tof_burst_duration_e */ + u8 responder_burst_duration; + /* responder burst period, indicate interval between two consecutive + * burst instances, in units of 100 ms + */ + __le16 responder_burst_period; + /* receive burst counter */ + __le16 bursts_cnt; + /* tsf of responder start burst */ + __le32 tsf_sync; + /* actual received ftm per burst */ + u8 actual_ftm_per_burst; + u8 reserved0[7]; + struct wmi_responder_ftm_res responder_ftm_res[0]; +} __packed; + +enum wmi_tof_channel_info_type { + WMI_TOF_CHANNEL_INFO_AOA = 0x00, + WMI_TOF_CHANNEL_INFO_LCI = 0x01, + WMI_TOF_CHANNEL_INFO_LCR = 0x02, + WMI_TOF_CHANNEL_INFO_VENDOR_SPECIFIC = 0x03, + WMI_TOF_CHANNEL_INFO_CIR = 0x04, + WMI_TOF_CHANNEL_INFO_RSSI = 0x05, + WMI_TOF_CHANNEL_INFO_SNR = 0x06, + WMI_TOF_CHANNEL_INFO_DEBUG = 0x07, +}; + +/* WMI_TOF_CHANNEL_INFO_EVENTID */ +struct wmi_tof_channel_info_event { + /* FTM session ID */ + __le32 session_id; + /* destination MAC address */ + u8 dst_mac[WMI_MAC_LEN]; + /* wmi_tof_channel_info_type_e */ + u8 type; + /* data report length */ + u8 len; + /* data report payload */ + u8 report[0]; +} __packed; + #endif /* __WILOCITY_WMI_H__ */ From 12bace75704ec0d64621be6ebf6e51772ce2cb0f Mon Sep 17 00:00:00 2001 From: Lior David Date: Mon, 22 Aug 2016 12:42:21 +0300 Subject: [PATCH 0595/1715] wil6210: extract firmware capabilities from FW file When driver is loaded, extract a capabilities record from the FW file. This record contains bits indicating which optional features are supported by this FW. The driver can use this information to determine which functionality to support and/or expose to user space. The extraction is done before wiphy structure is registered, because the capabilities can affect information published by the this structure. Signed-off-by: Lior David Signed-off-by: Maya Erez Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/wil6210/debugfs.c | 25 ++++++ drivers/net/wireless/ath/wil6210/fw.h | 11 ++- drivers/net/wireless/ath/wil6210/fw_inc.c | 85 +++++++++++++++------ drivers/net/wireless/ath/wil6210/main.c | 4 +- drivers/net/wireless/ath/wil6210/pcie_bus.c | 4 + drivers/net/wireless/ath/wil6210/wil6210.h | 4 +- 6 files changed, 106 insertions(+), 27 deletions(-) diff --git a/drivers/net/wireless/ath/wil6210/debugfs.c b/drivers/net/wireless/ath/wil6210/debugfs.c index a8098b406cc0..a244a36c19bc 100644 --- a/drivers/net/wireless/ath/wil6210/debugfs.c +++ b/drivers/net/wireless/ath/wil6210/debugfs.c @@ -1553,6 +1553,30 @@ static const struct file_operations fops_led_blink_time = { .open = simple_open, }; +/*---------FW capabilities------------*/ +static int wil_fw_capabilities_debugfs_show(struct seq_file *s, void *data) +{ + struct wil6210_priv *wil = s->private; + + seq_printf(s, "fw_capabilities : %*pb\n", WMI_FW_CAPABILITY_MAX, + wil->fw_capabilities); + + return 0; +} + +static int wil_fw_capabilities_seq_open(struct inode *inode, struct file *file) +{ + return single_open(file, wil_fw_capabilities_debugfs_show, + inode->i_private); +} + +static const struct file_operations fops_fw_capabilities = { + .open = wil_fw_capabilities_seq_open, + .release = single_release, + .read = seq_read, + .llseek = seq_lseek, +}; + /*----------------*/ static void wil6210_debugfs_init_blobs(struct wil6210_priv *wil, struct dentry *dbg) @@ -1603,6 +1627,7 @@ static const struct { {"recovery", S_IRUGO | S_IWUSR, &fops_recovery}, {"led_cfg", S_IRUGO | S_IWUSR, &fops_led_cfg}, {"led_blink_time", S_IRUGO | S_IWUSR, &fops_led_blink_time}, + {"fw_capabilities", S_IRUGO, &fops_fw_capabilities}, }; static void wil6210_debugfs_init_files(struct wil6210_priv *wil, diff --git a/drivers/net/wireless/ath/wil6210/fw.h b/drivers/net/wireless/ath/wil6210/fw.h index 7a2c6c129ad5..c3191c61832c 100644 --- a/drivers/net/wireless/ath/wil6210/fw.h +++ b/drivers/net/wireless/ath/wil6210/fw.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014 Qualcomm Atheros, Inc. + * Copyright (c) 2014,2016 Qualcomm Atheros, Inc. * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -58,6 +58,15 @@ struct wil_fw_record_comment { /* type == wil_fw_type_comment */ u8 data[0]; /* free-form data [data_size], see above */ } __packed; +/* FW capabilities encoded inside a comment record */ +#define WIL_FW_CAPABILITIES_MAGIC (0xabcddcba) +struct wil_fw_record_capabilities { /* type == wil_fw_type_comment */ + /* identifies capabilities record */ + __le32 magic; + /* capabilities (variable size), see enum wmi_fw_capability */ + u8 capabilities[0]; +}; + /* perform action * data_size = @head.size - offsetof(struct wil_fw_record_action, data) */ diff --git a/drivers/net/wireless/ath/wil6210/fw_inc.c b/drivers/net/wireless/ath/wil6210/fw_inc.c index d30657ee7e83..3860238840ba 100644 --- a/drivers/net/wireless/ath/wil6210/fw_inc.c +++ b/drivers/net/wireless/ath/wil6210/fw_inc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2015 Qualcomm Atheros, Inc. + * Copyright (c) 2014-2016 Qualcomm Atheros, Inc. * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -118,6 +118,12 @@ static int wil_fw_verify(struct wil6210_priv *wil, const u8 *data, size_t size) return (int)dlen; } +static int fw_ignore_section(struct wil6210_priv *wil, const void *data, + size_t size) +{ + return 0; +} + static int fw_handle_comment(struct wil6210_priv *wil, const void *data, size_t size) { @@ -126,6 +132,27 @@ static int fw_handle_comment(struct wil6210_priv *wil, const void *data, return 0; } +static int +fw_handle_capabilities(struct wil6210_priv *wil, const void *data, + size_t size) +{ + const struct wil_fw_record_capabilities *rec = data; + size_t capa_size; + + if (size < sizeof(*rec) || + le32_to_cpu(rec->magic) != WIL_FW_CAPABILITIES_MAGIC) + return 0; + + capa_size = size - offsetof(struct wil_fw_record_capabilities, + capabilities); + bitmap_zero(wil->fw_capabilities, WMI_FW_CAPABILITY_MAX); + memcpy(wil->fw_capabilities, rec->capabilities, + min(sizeof(wil->fw_capabilities), capa_size)); + wil_hex_dump_fw("CAPA", DUMP_PREFIX_OFFSET, 16, 1, + rec->capabilities, capa_size, false); + return 0; +} + static int fw_handle_data(struct wil6210_priv *wil, const void *data, size_t size) { @@ -383,42 +410,51 @@ static int fw_handle_gateway_data4(struct wil6210_priv *wil, const void *data, static const struct { int type; - int (*handler)(struct wil6210_priv *wil, const void *data, size_t size); + int (*load_handler)(struct wil6210_priv *wil, const void *data, + size_t size); + int (*parse_handler)(struct wil6210_priv *wil, const void *data, + size_t size); } wil_fw_handlers[] = { - {wil_fw_type_comment, fw_handle_comment}, - {wil_fw_type_data, fw_handle_data}, - {wil_fw_type_fill, fw_handle_fill}, + {wil_fw_type_comment, fw_handle_comment, fw_handle_capabilities}, + {wil_fw_type_data, fw_handle_data, fw_ignore_section}, + {wil_fw_type_fill, fw_handle_fill, fw_ignore_section}, /* wil_fw_type_action */ /* wil_fw_type_verify */ - {wil_fw_type_file_header, fw_handle_file_header}, - {wil_fw_type_direct_write, fw_handle_direct_write}, - {wil_fw_type_gateway_data, fw_handle_gateway_data}, - {wil_fw_type_gateway_data4, fw_handle_gateway_data4}, + {wil_fw_type_file_header, fw_handle_file_header, + fw_handle_file_header}, + {wil_fw_type_direct_write, fw_handle_direct_write, fw_ignore_section}, + {wil_fw_type_gateway_data, fw_handle_gateway_data, fw_ignore_section}, + {wil_fw_type_gateway_data4, fw_handle_gateway_data4, + fw_ignore_section}, }; static int wil_fw_handle_record(struct wil6210_priv *wil, int type, - const void *data, size_t size) + const void *data, size_t size, bool load) { int i; - for (i = 0; i < ARRAY_SIZE(wil_fw_handlers); i++) { + for (i = 0; i < ARRAY_SIZE(wil_fw_handlers); i++) if (wil_fw_handlers[i].type == type) - return wil_fw_handlers[i].handler(wil, data, size); - } + return load ? + wil_fw_handlers[i].load_handler( + wil, data, size) : + wil_fw_handlers[i].parse_handler( + wil, data, size); wil_err_fw(wil, "unknown record type: %d\n", type); return -EINVAL; } /** - * wil_fw_load - load FW into device - * - * Load the FW and uCode code and data to the corresponding device - * memory regions + * wil_fw_process - process section from FW file + * if load is true: Load the FW and uCode code and data to the + * corresponding device memory regions, + * otherwise only parse and look for capabilities * * Return error code */ -static int wil_fw_load(struct wil6210_priv *wil, const void *data, size_t size) +static int wil_fw_process(struct wil6210_priv *wil, const void *data, + size_t size, bool load) { int rc = 0; const struct wil_fw_record_head *hdr; @@ -437,7 +473,7 @@ static int wil_fw_load(struct wil6210_priv *wil, const void *data, size_t size) return -EINVAL; } rc = wil_fw_handle_record(wil, le16_to_cpu(hdr->type), - &hdr[1], hdr_sz); + &hdr[1], hdr_sz, load); if (rc) return rc; } @@ -456,13 +492,16 @@ static int wil_fw_load(struct wil6210_priv *wil, const void *data, size_t size) } /** - * wil_request_firmware - Request firmware and load to device + * wil_request_firmware - Request firmware * - * Request firmware image from the file and load it to device + * Request firmware image from the file + * If load is true, load firmware to device, otherwise + * only parse and extract capabilities * * Return error code */ -int wil_request_firmware(struct wil6210_priv *wil, const char *name) +int wil_request_firmware(struct wil6210_priv *wil, const char *name, + bool load) { int rc, rc1; const struct firmware *fw; @@ -482,7 +521,7 @@ int wil_request_firmware(struct wil6210_priv *wil, const char *name) rc = rc1; goto out; } - rc = wil_fw_load(wil, d, rc1); + rc = wil_fw_process(wil, d, rc1, load); if (rc < 0) goto out; } diff --git a/drivers/net/wireless/ath/wil6210/main.c b/drivers/net/wireless/ath/wil6210/main.c index 7b7619c1f9f3..7198c86e09f2 100644 --- a/drivers/net/wireless/ath/wil6210/main.c +++ b/drivers/net/wireless/ath/wil6210/main.c @@ -894,10 +894,10 @@ int wil_reset(struct wil6210_priv *wil, bool load_fw) wil_halt_cpu(wil); /* Loading f/w from the file */ - rc = wil_request_firmware(wil, WIL_FW_NAME); + rc = wil_request_firmware(wil, WIL_FW_NAME, true); if (rc) return rc; - rc = wil_request_firmware(wil, WIL_FW2_NAME); + rc = wil_request_firmware(wil, WIL_FW2_NAME, true); if (rc) return rc; diff --git a/drivers/net/wireless/ath/wil6210/pcie_bus.c b/drivers/net/wireless/ath/wil6210/pcie_bus.c index 5b7a9d2f0c29..44746ca0d2e6 100644 --- a/drivers/net/wireless/ath/wil6210/pcie_bus.c +++ b/drivers/net/wireless/ath/wil6210/pcie_bus.c @@ -39,6 +39,7 @@ void wil_set_capabilities(struct wil6210_priv *wil) u32 rev_id = wil_r(wil, RGF_USER_JTAG_DEV_ID); bitmap_zero(wil->hw_capabilities, hw_capability_last); + bitmap_zero(wil->fw_capabilities, WMI_FW_CAPABILITY_MAX); switch (rev_id) { case JTAG_DEV_ID_SPARROW_B0: @@ -52,6 +53,9 @@ void wil_set_capabilities(struct wil6210_priv *wil) } wil_info(wil, "Board hardware is %s\n", wil->hw_name); + + /* extract FW capabilities from file without loading the FW */ + wil_request_firmware(wil, WIL_FW_NAME, false); } void wil_disable_irq(struct wil6210_priv *wil) diff --git a/drivers/net/wireless/ath/wil6210/wil6210.h b/drivers/net/wireless/ath/wil6210/wil6210.h index 1eb7fe772273..979536ce4591 100644 --- a/drivers/net/wireless/ath/wil6210/wil6210.h +++ b/drivers/net/wireless/ath/wil6210/wil6210.h @@ -580,6 +580,7 @@ struct wil6210_priv { u32 hw_version; const char *hw_name; DECLARE_BITMAP(hw_capabilities, hw_capability_last); + DECLARE_BITMAP(fw_capabilities, WMI_FW_CAPABILITY_MAX); u8 n_mids; /* number of additional MIDs as reported by FW */ u32 recovery_count; /* num of FW recovery attempts in a short time */ u32 recovery_state; /* FW recovery state machine */ @@ -895,7 +896,8 @@ void wil6210_unmask_irq_rx(struct wil6210_priv *wil); int wil_iftype_nl2wmi(enum nl80211_iftype type); int wil_ioctl(struct wil6210_priv *wil, void __user *data, int cmd); -int wil_request_firmware(struct wil6210_priv *wil, const char *name); +int wil_request_firmware(struct wil6210_priv *wil, const char *name, + bool load); int wil_can_suspend(struct wil6210_priv *wil, bool is_runtime); int wil_suspend(struct wil6210_priv *wil, bool is_runtime); From 13cd9f758a555d1ab547b0dbed3f9d1e529230c3 Mon Sep 17 00:00:00 2001 From: Lior David Date: Mon, 22 Aug 2016 12:42:22 +0300 Subject: [PATCH 0596/1715] wil6210: extract firmware version from file header Currently the FW version is taken from the sw_version field of the FW ready event. This version is based on internal version control revision and it is difficult to map to actual FW version. Fix this by using the actual FW version stored in the FW file header record. Signed-off-by: Lior David Signed-off-by: Maya Erez Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/wil6210/debugfs.c | 28 +++++++++++++++++++++- drivers/net/wireless/ath/wil6210/fw.h | 3 +++ drivers/net/wireless/ath/wil6210/fw_inc.c | 7 ++++++ drivers/net/wireless/ath/wil6210/main.c | 1 + drivers/net/wireless/ath/wil6210/netdev.c | 2 ++ drivers/net/wireless/ath/wil6210/wil6210.h | 3 ++- drivers/net/wireless/ath/wil6210/wmi.c | 8 +++---- 7 files changed, 46 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/ath/wil6210/debugfs.c b/drivers/net/wireless/ath/wil6210/debugfs.c index a244a36c19bc..5e4058a4037b 100644 --- a/drivers/net/wireless/ath/wil6210/debugfs.c +++ b/drivers/net/wireless/ath/wil6210/debugfs.c @@ -1577,6 +1577,32 @@ static const struct file_operations fops_fw_capabilities = { .llseek = seq_lseek, }; +/*---------FW version------------*/ +static int wil_fw_version_debugfs_show(struct seq_file *s, void *data) +{ + struct wil6210_priv *wil = s->private; + + if (wil->fw_version[0]) + seq_printf(s, "%s\n", wil->fw_version); + else + seq_puts(s, "N/A\n"); + + return 0; +} + +static int wil_fw_version_seq_open(struct inode *inode, struct file *file) +{ + return single_open(file, wil_fw_version_debugfs_show, + inode->i_private); +} + +static const struct file_operations fops_fw_version = { + .open = wil_fw_version_seq_open, + .release = single_release, + .read = seq_read, + .llseek = seq_lseek, +}; + /*----------------*/ static void wil6210_debugfs_init_blobs(struct wil6210_priv *wil, struct dentry *dbg) @@ -1628,6 +1654,7 @@ static const struct { {"led_cfg", S_IRUGO | S_IWUSR, &fops_led_cfg}, {"led_blink_time", S_IRUGO | S_IWUSR, &fops_led_blink_time}, {"fw_capabilities", S_IRUGO, &fops_fw_capabilities}, + {"fw_version", S_IRUGO, &fops_fw_version}, }; static void wil6210_debugfs_init_files(struct wil6210_priv *wil, @@ -1668,7 +1695,6 @@ static void wil6210_debugfs_init_isr(struct wil6210_priv *wil, static const struct dbg_off dbg_wil_off[] = { WIL_FIELD(privacy, S_IRUGO, doff_u32), WIL_FIELD(status[0], S_IRUGO | S_IWUSR, doff_ulong), - WIL_FIELD(fw_version, S_IRUGO, doff_u32), WIL_FIELD(hw_version, S_IRUGO, doff_x32), WIL_FIELD(recovery_count, S_IRUGO, doff_u32), WIL_FIELD(ap_isolate, S_IRUGO, doff_u32), diff --git a/drivers/net/wireless/ath/wil6210/fw.h b/drivers/net/wireless/ath/wil6210/fw.h index c3191c61832c..2f2b910501ba 100644 --- a/drivers/net/wireless/ath/wil6210/fw.h +++ b/drivers/net/wireless/ath/wil6210/fw.h @@ -102,6 +102,9 @@ struct wil_fw_record_verify { /* type == wil_fw_verify */ /* file header * First record of every file */ +/* the FW version prefix in the comment */ +#define WIL_FW_VERSION_PREFIX "FW version: " +#define WIL_FW_VERSION_PREFIX_LEN (sizeof(WIL_FW_VERSION_PREFIX) - 1) struct wil_fw_record_file_header { __le32 signature ; /* Wilocity signature */ __le32 reserved; diff --git a/drivers/net/wireless/ath/wil6210/fw_inc.c b/drivers/net/wireless/ath/wil6210/fw_inc.c index 3860238840ba..8f40eb301924 100644 --- a/drivers/net/wireless/ath/wil6210/fw_inc.c +++ b/drivers/net/wireless/ath/wil6210/fw_inc.c @@ -223,6 +223,13 @@ static int fw_handle_file_header(struct wil6210_priv *wil, const void *data, wil_hex_dump_fw("", DUMP_PREFIX_OFFSET, 16, 1, d->comment, sizeof(d->comment), true); + if (!memcmp(d->comment, WIL_FW_VERSION_PREFIX, + WIL_FW_VERSION_PREFIX_LEN)) + memcpy(wil->fw_version, + d->comment + WIL_FW_VERSION_PREFIX_LEN, + min(sizeof(d->comment) - WIL_FW_VERSION_PREFIX_LEN, + sizeof(wil->fw_version) - 1)); + return 0; } diff --git a/drivers/net/wireless/ath/wil6210/main.c b/drivers/net/wireless/ath/wil6210/main.c index 7198c86e09f2..e7130b54d1d8 100644 --- a/drivers/net/wireless/ath/wil6210/main.c +++ b/drivers/net/wireless/ath/wil6210/main.c @@ -893,6 +893,7 @@ int wil_reset(struct wil6210_priv *wil, bool load_fw) WIL_FW2_NAME); wil_halt_cpu(wil); + memset(wil->fw_version, 0, sizeof(wil->fw_version)); /* Loading f/w from the file */ rc = wil_request_firmware(wil, WIL_FW_NAME, true); if (rc) diff --git a/drivers/net/wireless/ath/wil6210/netdev.c b/drivers/net/wireless/ath/wil6210/netdev.c index 4bc9bb0a6cb4..61de5e9f8ef0 100644 --- a/drivers/net/wireless/ath/wil6210/netdev.c +++ b/drivers/net/wireless/ath/wil6210/netdev.c @@ -216,6 +216,8 @@ int wil_if_add(struct wil6210_priv *wil) wil_dbg_misc(wil, "entered"); + strlcpy(wiphy->fw_version, wil->fw_version, sizeof(wiphy->fw_version)); + rc = wiphy_register(wiphy); if (rc < 0) { wil_err(wil, "failed to register wiphy, err %d\n", rc); diff --git a/drivers/net/wireless/ath/wil6210/wil6210.h b/drivers/net/wireless/ath/wil6210/wil6210.h index 979536ce4591..a949cd62bc4e 100644 --- a/drivers/net/wireless/ath/wil6210/wil6210.h +++ b/drivers/net/wireless/ath/wil6210/wil6210.h @@ -17,6 +17,7 @@ #ifndef __WIL6210_H__ #define __WIL6210_H__ +#include #include #include #include @@ -576,7 +577,7 @@ struct wil6210_priv { struct wireless_dev *wdev; void __iomem *csr; DECLARE_BITMAP(status, wil_status_last); - u32 fw_version; + u8 fw_version[ETHTOOL_FWVERS_LEN]; u32 hw_version; const char *hw_name; DECLARE_BITMAP(hw_capabilities, hw_capability_last); diff --git a/drivers/net/wireless/ath/wil6210/wmi.c b/drivers/net/wireless/ath/wil6210/wmi.c index 0b109b2a31ac..fae4f1285d08 100644 --- a/drivers/net/wireless/ath/wil6210/wmi.c +++ b/drivers/net/wireless/ath/wil6210/wmi.c @@ -312,14 +312,14 @@ static void wmi_evt_ready(struct wil6210_priv *wil, int id, void *d, int len) struct wireless_dev *wdev = wil->wdev; struct wmi_ready_event *evt = d; - wil->fw_version = le32_to_cpu(evt->sw_version); wil->n_mids = evt->numof_additional_mids; - wil_info(wil, "FW ver. %d; MAC %pM; %d MID's\n", wil->fw_version, + wil_info(wil, "FW ver. %s(SW %d); MAC %pM; %d MID's\n", + wil->fw_version, le32_to_cpu(evt->sw_version), evt->mac, wil->n_mids); /* ignore MAC address, we already have it from the boot loader */ - snprintf(wdev->wiphy->fw_version, sizeof(wdev->wiphy->fw_version), - "%d", wil->fw_version); + strlcpy(wdev->wiphy->fw_version, wil->fw_version, + sizeof(wdev->wiphy->fw_version)); wil_set_recovery_state(wil, fw_recovery_idle); set_bit(wil_status_fwready, wil->status); From 1eff7002e3220c2814213b3699ad8a3e41c2a422 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 31 Aug 2016 09:30:44 +0200 Subject: [PATCH 0597/1715] net: ethernet: et131x: constify ethtool_ops structures Check for ethtool_ops structures that are only stored in the ethtool_ops field of a net_device structure or passed as the second argument to netdev_set_default_ethtool_ops. These contexts are declared const, so ethtool_ops structures that have these properties can be declared as const also. The semantic patch that makes this change is as follows: (http://coccinelle.lip6.fr/) // @r disable optional_qualifier@ identifier i; position p; @@ static struct ethtool_ops i@p = { ... }; @ok1@ identifier r.i; struct net_device e; position p; @@ e.ethtool_ops = &i@p; @ok2@ identifier r.i; expression e; position p; @@ netdev_set_default_ethtool_ops(e, &i@p) @bad@ position p != {r.p,ok1.p,ok2.p}; identifier r.i; @@ i@p @depends on !bad disable optional_qualifier@ identifier r.i; @@ static +const struct ethtool_ops i = { ... }; // Signed-off-by: Julia Lawall Acked-by: Mark Einon Signed-off-by: David S. Miller --- drivers/net/ethernet/agere/et131x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/agere/et131x.c b/drivers/net/ethernet/agere/et131x.c index c83ebae73d91..906683851c7d 100644 --- a/drivers/net/ethernet/agere/et131x.c +++ b/drivers/net/ethernet/agere/et131x.c @@ -2961,7 +2961,7 @@ static void et131x_get_drvinfo(struct net_device *netdev, sizeof(info->bus_info)); } -static struct ethtool_ops et131x_ethtool_ops = { +static const struct ethtool_ops et131x_ethtool_ops = { .get_drvinfo = et131x_get_drvinfo, .get_regs_len = et131x_get_regs_len, .get_regs = et131x_get_regs, From 70591ab96e42fa4a540499b0befdfd2460556d9b Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 31 Aug 2016 09:30:45 +0200 Subject: [PATCH 0598/1715] net: bcmgenet: constify ethtool_ops structures Check for ethtool_ops structures that are only stored in the ethtool_ops field of a net_device structure or passed as the second argument to netdev_set_default_ethtool_ops. These contexts are declared const, so ethtool_ops structures that have these properties can be declared as const also. The semantic patch that makes this change is as follows: (http://coccinelle.lip6.fr/) // @r disable optional_qualifier@ identifier i; position p; @@ static struct ethtool_ops i@p = { ... }; @ok1@ identifier r.i; struct net_device e; position p; @@ e.ethtool_ops = &i@p; @ok2@ identifier r.i; expression e; position p; @@ netdev_set_default_ethtool_ops(e, &i@p) @bad@ position p != {r.p,ok1.p,ok2.p}; identifier r.i; @@ i@p @depends on !bad disable optional_qualifier@ identifier r.i; @@ static +const struct ethtool_ops i = { ... }; // Signed-off-by: Julia Lawall Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 8d4f8495dbb3..46f904392f88 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -973,7 +973,7 @@ static int bcmgenet_nway_reset(struct net_device *dev) } /* standard ethtool support functions. */ -static struct ethtool_ops bcmgenet_ethtool_ops = { +static const struct ethtool_ops bcmgenet_ethtool_ops = { .get_strings = bcmgenet_get_strings, .get_sset_count = bcmgenet_get_sset_count, .get_ethtool_stats = bcmgenet_get_ethtool_stats, From bc6f0136346a344a3fb328d3d16e954b2b6544b4 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 31 Aug 2016 09:30:46 +0200 Subject: [PATCH 0599/1715] net: hisilicon: constify ethtool_ops structures Check for ethtool_ops structures that are only stored in the ethtool_ops field of a net_device structure or passed as the second argument to netdev_set_default_ethtool_ops. These contexts are declared const, so ethtool_ops structures that have these properties can be declared as const also. The semantic patch that makes this change is as follows: (http://coccinelle.lip6.fr/) // @r disable optional_qualifier@ identifier i; position p; @@ static struct ethtool_ops i@p = { ... }; @ok1@ identifier r.i; struct net_device e; position p; @@ e.ethtool_ops = &i@p; @ok2@ identifier r.i; expression e; position p; @@ netdev_set_default_ethtool_ops(e, &i@p) @bad@ position p != {r.p,ok1.p,ok2.p}; identifier r.i; @@ i@p @depends on !bad disable optional_qualifier@ identifier r.i; @@ static +const struct ethtool_ops i = { ... }; // Signed-off-by: Julia Lawall Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hip04_eth.c | 2 +- drivers/net/ethernet/hisilicon/hisi_femac.c | 2 +- drivers/net/ethernet/hisilicon/hix5hd2_gmac.c | 2 +- drivers/net/ethernet/hisilicon/hns/hns_ethtool.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c index 0c4afe95ef54..a90ab40e55af 100644 --- a/drivers/net/ethernet/hisilicon/hip04_eth.c +++ b/drivers/net/ethernet/hisilicon/hip04_eth.c @@ -755,7 +755,7 @@ static void hip04_get_drvinfo(struct net_device *netdev, strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version)); } -static struct ethtool_ops hip04_ethtool_ops = { +static const struct ethtool_ops hip04_ethtool_ops = { .get_coalesce = hip04_get_coalesce, .set_coalesce = hip04_set_coalesce, .get_drvinfo = hip04_get_drvinfo, diff --git a/drivers/net/ethernet/hisilicon/hisi_femac.c b/drivers/net/ethernet/hisilicon/hisi_femac.c index b5d7ad0252a0..ca68e2208b5b 100644 --- a/drivers/net/ethernet/hisilicon/hisi_femac.c +++ b/drivers/net/ethernet/hisilicon/hisi_femac.c @@ -699,7 +699,7 @@ static int hisi_femac_net_ioctl(struct net_device *dev, return phy_mii_ioctl(dev->phydev, ifreq, cmd); } -static struct ethtool_ops hisi_femac_ethtools_ops = { +static const struct ethtool_ops hisi_femac_ethtools_ops = { .get_link = ethtool_op_get_link, .get_link_ksettings = phy_ethtool_get_link_ksettings, .set_link_ksettings = phy_ethtool_set_link_ksettings, diff --git a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c index 275618bb4646..e69a6bed31a9 100644 --- a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c +++ b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c @@ -750,7 +750,7 @@ static const struct net_device_ops hix5hd2_netdev_ops = { .ndo_set_mac_address = hix5hd2_net_set_mac_address, }; -static struct ethtool_ops hix5hd2_ethtools_ops = { +static const struct ethtool_ops hix5hd2_ethtools_ops = { .get_link = ethtool_op_get_link, .get_link_ksettings = phy_ethtool_get_link_ksettings, .set_link_ksettings = phy_ethtool_set_link_ksettings, diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c index ab33487a5321..5eb3245bdf86 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c @@ -1264,7 +1264,7 @@ static int hns_get_rxnfc(struct net_device *netdev, return 0; } -static struct ethtool_ops hns_ethtool_ops = { +static const struct ethtool_ops hns_ethtool_ops = { .get_drvinfo = hns_nic_get_drvinfo, .get_link = hns_nic_get_link, .get_settings = hns_nic_get_settings, From 777065e5ada22d3ae2fac62f9084d38f494bc1fe Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 31 Aug 2016 09:30:47 +0200 Subject: [PATCH 0600/1715] dwc_eth_qos: constify ethtool_ops structures Check for ethtool_ops structures that are only stored in the ethtool_ops field of a net_device structure or passed as the second argument to netdev_set_default_ethtool_ops. These contexts are declared const, so ethtool_ops structures that have these properties can be declared as const also. The semantic patch that makes this change is as follows: (http://coccinelle.lip6.fr/) // @r disable optional_qualifier@ identifier i; position p; @@ static struct ethtool_ops i@p = { ... }; @ok1@ identifier r.i; struct net_device e; position p; @@ e.ethtool_ops = &i@p; @ok2@ identifier r.i; expression e; position p; @@ netdev_set_default_ethtool_ops(e, &i@p) @bad@ position p != {r.p,ok1.p,ok2.p}; identifier r.i; @@ i@p @depends on !bad disable optional_qualifier@ identifier r.i; @@ static +const struct ethtool_ops i = { ... }; // Signed-off-by: Julia Lawall Signed-off-by: David S. Miller --- drivers/net/ethernet/synopsys/dwc_eth_qos.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/synopsys/dwc_eth_qos.c b/drivers/net/ethernet/synopsys/dwc_eth_qos.c index 5a3941bf250f..c25d9711a221 100644 --- a/drivers/net/ethernet/synopsys/dwc_eth_qos.c +++ b/drivers/net/ethernet/synopsys/dwc_eth_qos.c @@ -2743,7 +2743,7 @@ static void dwceqos_set_msglevel(struct net_device *ndev, u32 msglevel) lp->msg_enable = msglevel; } -static struct ethtool_ops dwceqos_ethtool_ops = { +static const struct ethtool_ops dwceqos_ethtool_ops = { .get_drvinfo = dwceqos_get_drvinfo, .get_link = ethtool_op_get_link, .get_pauseparam = dwceqos_get_pauseparam, From c1ab0e9cf94f52abe59cb63358518dac29b5b101 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 31 Aug 2016 09:30:48 +0200 Subject: [PATCH 0601/1715] net: systemport: constify ethtool_ops structures Check for ethtool_ops structures that are only stored in the ethtool_ops field of a net_device structure or passed as the second argument to netdev_set_default_ethtool_ops. These contexts are declared const, so ethtool_ops structures that have these properties can be declared as const also. The semantic patch that makes this change is as follows: (http://coccinelle.lip6.fr/) // @r disable optional_qualifier@ identifier i; position p; @@ static struct ethtool_ops i@p = { ... }; @ok1@ identifier r.i; struct net_device e; position p; @@ e.ethtool_ops = &i@p; @ok2@ identifier r.i; expression e; position p; @@ netdev_set_default_ethtool_ops(e, &i@p) @bad@ position p != {r.p,ok1.p,ok2.p}; identifier r.i; @@ i@p @depends on !bad disable optional_qualifier@ identifier r.i; @@ static +const struct ethtool_ops i = { ... }; // Signed-off-by: Julia Lawall Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcmsysport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 2059911014db..c3354b9941d1 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1692,7 +1692,7 @@ static int bcm_sysport_stop(struct net_device *dev) return 0; } -static struct ethtool_ops bcm_sysport_ethtool_ops = { +static const struct ethtool_ops bcm_sysport_ethtool_ops = { .get_drvinfo = bcm_sysport_get_drvinfo, .get_msglevel = bcm_sysport_get_msglvl, .set_msglevel = bcm_sysport_set_msglvl, From 650097cdc4ce72f40bceddcd7c7512c7099fa902 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 31 Aug 2016 11:41:22 +0200 Subject: [PATCH 0602/1715] net: wan: sbni: Spelling s/acknoweledge/acknowledge/, Grammar Signed-off-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- drivers/net/wan/sbni.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wan/sbni.c b/drivers/net/wan/sbni.c index d98c7e57137d..3a421ca8a4d0 100644 --- a/drivers/net/wan/sbni.c +++ b/drivers/net/wan/sbni.c @@ -582,8 +582,8 @@ handle_channel( struct net_device *dev ) /* - * Routine returns 1 if it need to acknoweledge received frame. - * Empty frame received without errors won't be acknoweledged. + * Routine returns 1 if it needs to acknowledge received frame. + * Empty frame received without errors won't be acknowledged. */ static int From 3258124534f65c94423238b41fa72633529878c4 Mon Sep 17 00:00:00 2001 From: Raghu Vatsavayi Date: Wed, 31 Aug 2016 11:03:20 -0700 Subject: [PATCH 0603/1715] liquidio: Consolidate common functionality Consolidate common functionality of various devices from different files into lio_core.c/octeon_console.c. Signed-off-by: Derek Chickles Signed-off-by: Satanand Burla Signed-off-by: Felix Manlunas Signed-off-by: Raghu Vatsavayi Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/liquidio/Makefile | 23 +- .../ethernet/cavium/liquidio/cn66xx_device.c | 31 -- .../ethernet/cavium/liquidio/cn66xx_device.h | 1 - .../ethernet/cavium/liquidio/cn68xx_device.c | 1 - .../net/ethernet/cavium/liquidio/lio_core.c | 261 +++++++++++++++++ .../ethernet/cavium/liquidio/lio_ethtool.c | 18 +- .../net/ethernet/cavium/liquidio/lio_main.c | 276 +----------------- .../ethernet/cavium/liquidio/octeon_console.c | 117 +++++++- .../ethernet/cavium/liquidio/octeon_device.c | 104 ------- .../ethernet/cavium/liquidio/octeon_device.h | 1 - .../ethernet/cavium/liquidio/octeon_main.h | 24 +- .../ethernet/cavium/liquidio/octeon_mem_ops.c | 1 - .../ethernet/cavium/liquidio/octeon_network.h | 2 - .../net/ethernet/cavium/liquidio/octeon_nic.c | 8 +- .../net/ethernet/cavium/liquidio/octeon_nic.h | 2 +- 15 files changed, 426 insertions(+), 444 deletions(-) create mode 100644 drivers/net/ethernet/cavium/liquidio/lio_core.c diff --git a/drivers/net/ethernet/cavium/liquidio/Makefile b/drivers/net/ethernet/cavium/liquidio/Makefile index 2f366806835d..d44111df8452 100644 --- a/drivers/net/ethernet/cavium/liquidio/Makefile +++ b/drivers/net/ethernet/cavium/liquidio/Makefile @@ -3,14 +3,15 @@ # obj-$(CONFIG_LIQUIDIO) += liquidio.o -liquidio-objs := lio_main.o \ - lio_ethtool.o \ - request_manager.o \ - response_manager.o \ - octeon_device.o \ - cn66xx_device.o \ - cn68xx_device.o \ - octeon_mem_ops.o \ - octeon_droq.o \ - octeon_console.o \ - octeon_nic.o +liquidio-$(CONFIG_LIQUIDIO) += lio_ethtool.o \ + lio_core.o \ + request_manager.o \ + response_manager.o \ + octeon_device.o \ + cn66xx_device.o \ + cn68xx_device.o \ + octeon_mem_ops.o \ + octeon_droq.o \ + octeon_nic.o + +liquidio-objs := lio_main.o octeon_console.o $(liquidio-y) diff --git a/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c b/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c index c03d37016a48..dc5d14a7effa 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c +++ b/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c @@ -418,36 +418,6 @@ void lio_cn6xxx_disable_io_queues(struct octeon_device *oct) octeon_write_csr(oct, CN6XXX_SLI_PKT_TIME_INT, d32); } -void lio_cn6xxx_reinit_regs(struct octeon_device *oct) -{ - int i; - - for (i = 0; i < MAX_OCTEON_INSTR_QUEUES(oct); i++) { - if (!(oct->io_qmask.iq & (1ULL << i))) - continue; - oct->fn_list.setup_iq_regs(oct, i); - } - - for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES(oct); i++) { - if (!(oct->io_qmask.oq & (1ULL << i))) - continue; - oct->fn_list.setup_oq_regs(oct, i); - } - - oct->fn_list.setup_device_regs(oct); - - oct->fn_list.enable_interrupt(oct->chip); - - oct->fn_list.enable_io_queues(oct); - - /* for (i = 0; i < oct->num_oqs; i++) { */ - for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES(oct); i++) { - if (!(oct->io_qmask.oq & (1ULL << i))) - continue; - writel(oct->droq[i]->max_count, oct->droq[i]->pkts_credit_reg); - } -} - void lio_cn6xxx_bar1_idx_setup(struct octeon_device *oct, u64 core_addr, @@ -714,7 +684,6 @@ int lio_setup_cn66xx_octeon_device(struct octeon_device *oct) oct->fn_list.soft_reset = lio_cn6xxx_soft_reset; oct->fn_list.setup_device_regs = lio_cn6xxx_setup_device_regs; - oct->fn_list.reinit_regs = lio_cn6xxx_reinit_regs; oct->fn_list.update_iq_read_idx = lio_cn6xxx_update_read_index; oct->fn_list.bar1_idx_setup = lio_cn6xxx_bar1_idx_setup; diff --git a/drivers/net/ethernet/cavium/liquidio/cn66xx_device.h b/drivers/net/ethernet/cavium/liquidio/cn66xx_device.h index 28c47224221a..2e4bc25065f0 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn66xx_device.h +++ b/drivers/net/ethernet/cavium/liquidio/cn66xx_device.h @@ -83,7 +83,6 @@ void lio_cn6xxx_setup_oq_regs(struct octeon_device *oct, u32 oq_no); void lio_cn6xxx_enable_io_queues(struct octeon_device *oct); void lio_cn6xxx_disable_io_queues(struct octeon_device *oct); irqreturn_t lio_cn6xxx_process_interrupt_regs(void *dev); -void lio_cn6xxx_reinit_regs(struct octeon_device *oct); void lio_cn6xxx_bar1_idx_setup(struct octeon_device *oct, u64 core_addr, u32 idx, int valid); void lio_cn6xxx_bar1_idx_write(struct octeon_device *oct, u32 idx, u32 mask); diff --git a/drivers/net/ethernet/cavium/liquidio/cn68xx_device.c b/drivers/net/ethernet/cavium/liquidio/cn68xx_device.c index 29755bc68f12..dbf3566ead53 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn68xx_device.c +++ b/drivers/net/ethernet/cavium/liquidio/cn68xx_device.c @@ -148,7 +148,6 @@ int lio_setup_cn68xx_octeon_device(struct octeon_device *oct) oct->fn_list.process_interrupt_regs = lio_cn6xxx_process_interrupt_regs; oct->fn_list.soft_reset = lio_cn68xx_soft_reset; oct->fn_list.setup_device_regs = lio_cn68xx_setup_device_regs; - oct->fn_list.reinit_regs = lio_cn6xxx_reinit_regs; oct->fn_list.update_iq_read_idx = lio_cn6xxx_update_read_index; oct->fn_list.bar1_idx_setup = lio_cn6xxx_bar1_idx_setup; diff --git a/drivers/net/ethernet/cavium/liquidio/lio_core.c b/drivers/net/ethernet/cavium/liquidio/lio_core.c new file mode 100644 index 000000000000..809179c040a7 --- /dev/null +++ b/drivers/net/ethernet/cavium/liquidio/lio_core.c @@ -0,0 +1,261 @@ +/********************************************************************** +* Author: Cavium, Inc. +* +* Contact: support@cavium.com +* Please include "LiquidIO" in the subject. +* +* Copyright (c) 2003-2015 Cavium, Inc. +* +* This file is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License, Version 2, as +* published by the Free Software Foundation. +* +* This file is distributed in the hope that it will be useful, but +* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty +* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or +* NONINFRINGEMENT. See the GNU General Public License for more +* details. +* +* This file may also be available under a different license from Cavium. +* Contact Cavium, Inc. for more information +**********************************************************************/ +#include +#include +#include "liquidio_common.h" +#include "octeon_droq.h" +#include "octeon_iq.h" +#include "response_manager.h" +#include "octeon_device.h" +#include "octeon_nic.h" +#include "octeon_main.h" +#include "octeon_network.h" + +int liquidio_set_feature(struct net_device *netdev, int cmd, u16 param1) +{ + struct lio *lio = GET_LIO(netdev); + struct octeon_device *oct = lio->oct_dev; + struct octnic_ctrl_pkt nctrl; + int ret = 0; + + memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt)); + + nctrl.ncmd.u64 = 0; + nctrl.ncmd.s.cmd = cmd; + nctrl.ncmd.s.param1 = param1; + nctrl.iq_no = lio->linfo.txpciq[0].s.q_no; + nctrl.wait_time = 100; + nctrl.netpndev = (u64)netdev; + nctrl.cb_fn = liquidio_link_ctrl_cmd_completion; + + ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl); + if (ret < 0) { + dev_err(&oct->pci_dev->dev, "Feature change failed in core (ret: 0x%x)\n", + ret); + } + return ret; +} + +void octeon_report_tx_completion_to_bql(void *txq, unsigned int pkts_compl, + unsigned int bytes_compl) +{ + struct netdev_queue *netdev_queue = txq; + + netdev_tx_completed_queue(netdev_queue, pkts_compl, bytes_compl); +} + +void octeon_update_tx_completion_counters(void *buf, int reqtype, + unsigned int *pkts_compl, + unsigned int *bytes_compl) +{ + struct octnet_buf_free_info *finfo; + struct sk_buff *skb = NULL; + struct octeon_soft_command *sc; + + switch (reqtype) { + case REQTYPE_NORESP_NET: + case REQTYPE_NORESP_NET_SG: + finfo = buf; + skb = finfo->skb; + break; + + case REQTYPE_RESP_NET_SG: + case REQTYPE_RESP_NET: + sc = buf; + skb = sc->callback_arg; + break; + + default: + return; + } + + (*pkts_compl)++; +/*TODO, Use some other pound define to suggest + * the fact that iqs are not tied to netdevs + * and can take traffic from different netdevs + * hence bql reporting is done per packet + * than in bulk. Usage of NO_NAPI in txq completion is + * a little confusing + */ + *bytes_compl += skb->len; +} + +void octeon_report_sent_bytes_to_bql(void *buf, int reqtype) +{ + struct octnet_buf_free_info *finfo; + struct sk_buff *skb; + struct octeon_soft_command *sc; + struct netdev_queue *txq; + + switch (reqtype) { + case REQTYPE_NORESP_NET: + case REQTYPE_NORESP_NET_SG: + finfo = buf; + skb = finfo->skb; + break; + + case REQTYPE_RESP_NET_SG: + case REQTYPE_RESP_NET: + sc = buf; + skb = sc->callback_arg; + break; + + default: + return; + } + + txq = netdev_get_tx_queue(skb->dev, skb_get_queue_mapping(skb)); + netdev_tx_sent_queue(txq, skb->len); +} + +void liquidio_link_ctrl_cmd_completion(void *nctrl_ptr) +{ + struct octnic_ctrl_pkt *nctrl = (struct octnic_ctrl_pkt *)nctrl_ptr; + struct net_device *netdev = (struct net_device *)nctrl->netpndev; + struct lio *lio = GET_LIO(netdev); + struct octeon_device *oct = lio->oct_dev; + u8 *mac; + + switch (nctrl->ncmd.s.cmd) { + case OCTNET_CMD_CHANGE_DEVFLAGS: + case OCTNET_CMD_SET_MULTI_LIST: + break; + + case OCTNET_CMD_CHANGE_MACADDR: + mac = ((u8 *)&nctrl->udd[0]) + 2; + netif_info(lio, probe, lio->netdev, + "MACAddr changed to %2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x\n", + mac[0], mac[1], + mac[2], mac[3], + mac[4], mac[5]); + break; + + case OCTNET_CMD_CHANGE_MTU: + /* If command is successful, change the MTU. */ + netif_info(lio, probe, lio->netdev, "MTU Changed from %d to %d\n", + netdev->mtu, nctrl->ncmd.s.param1); + dev_info(&oct->pci_dev->dev, "%s MTU Changed from %d to %d\n", + netdev->name, netdev->mtu, + nctrl->ncmd.s.param1); + rtnl_lock(); + netdev->mtu = nctrl->ncmd.s.param1; + call_netdevice_notifiers(NETDEV_CHANGEMTU, netdev); + rtnl_unlock(); + break; + + case OCTNET_CMD_GPIO_ACCESS: + netif_info(lio, probe, lio->netdev, "LED Flashing visual identification\n"); + break; + + case OCTNET_CMD_LRO_ENABLE: + dev_info(&oct->pci_dev->dev, "%s LRO Enabled\n", netdev->name); + break; + + case OCTNET_CMD_LRO_DISABLE: + dev_info(&oct->pci_dev->dev, "%s LRO Disabled\n", + netdev->name); + break; + + case OCTNET_CMD_VERBOSE_ENABLE: + dev_info(&oct->pci_dev->dev, "%s Firmware debug enabled\n", + netdev->name); + break; + + case OCTNET_CMD_VERBOSE_DISABLE: + dev_info(&oct->pci_dev->dev, "%s Firmware debug disabled\n", + netdev->name); + break; + + case OCTNET_CMD_ENABLE_VLAN_FILTER: + dev_info(&oct->pci_dev->dev, "%s VLAN filter enabled\n", + netdev->name); + break; + + case OCTNET_CMD_ADD_VLAN_FILTER: + dev_info(&oct->pci_dev->dev, "%s VLAN filter %d added\n", + netdev->name, nctrl->ncmd.s.param1); + break; + + case OCTNET_CMD_DEL_VLAN_FILTER: + dev_info(&oct->pci_dev->dev, "%s VLAN filter %d removed\n", + netdev->name, nctrl->ncmd.s.param1); + break; + + case OCTNET_CMD_SET_SETTINGS: + dev_info(&oct->pci_dev->dev, "%s settings changed\n", + netdev->name); + + break; + + /* Case to handle "OCTNET_CMD_TNL_RX_CSUM_CTL" + * Command passed by NIC driver + */ + case OCTNET_CMD_TNL_RX_CSUM_CTL: + if (nctrl->ncmd.s.param1 == OCTNET_CMD_RXCSUM_ENABLE) { + netif_info(lio, probe, lio->netdev, + "RX Checksum Offload Enabled\n"); + } else if (nctrl->ncmd.s.param1 == + OCTNET_CMD_RXCSUM_DISABLE) { + netif_info(lio, probe, lio->netdev, + "RX Checksum Offload Disabled\n"); + } + break; + + /* Case to handle "OCTNET_CMD_TNL_TX_CSUM_CTL" + * Command passed by NIC driver + */ + case OCTNET_CMD_TNL_TX_CSUM_CTL: + if (nctrl->ncmd.s.param1 == OCTNET_CMD_TXCSUM_ENABLE) { + netif_info(lio, probe, lio->netdev, + "TX Checksum Offload Enabled\n"); + } else if (nctrl->ncmd.s.param1 == + OCTNET_CMD_TXCSUM_DISABLE) { + netif_info(lio, probe, lio->netdev, + "TX Checksum Offload Disabled\n"); + } + break; + + /* Case to handle "OCTNET_CMD_VXLAN_PORT_CONFIG" + * Command passed by NIC driver + */ + case OCTNET_CMD_VXLAN_PORT_CONFIG: + if (nctrl->ncmd.s.more == OCTNET_CMD_VXLAN_PORT_ADD) { + netif_info(lio, probe, lio->netdev, + "VxLAN Destination UDP PORT:%d ADDED\n", + nctrl->ncmd.s.param1); + } else if (nctrl->ncmd.s.more == + OCTNET_CMD_VXLAN_PORT_DEL) { + netif_info(lio, probe, lio->netdev, + "VxLAN Destination UDP PORT:%d DELETED\n", + nctrl->ncmd.s.param1); + } + break; + + case OCTNET_CMD_SET_FLOW_CTL: + netif_info(lio, probe, lio->netdev, "Set RX/TX flow control parameters\n"); + break; + + default: + dev_err(&oct->pci_dev->dev, "%s Unknown cmd %d\n", __func__, + nctrl->ncmd.s.cmd); + } +} diff --git a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c index 289eb8907922..f3ce7441cb5f 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c @@ -290,18 +290,16 @@ lio_get_eeprom(struct net_device *netdev, struct ethtool_eeprom *eeprom, struct lio *lio = GET_LIO(netdev); struct octeon_device *oct_dev = lio->oct_dev; struct octeon_board_info *board_info; - int len; - if (eeprom->offset != 0) + if (eeprom->offset) return -EINVAL; eeprom->magic = oct_dev->pci_dev->vendor; board_info = (struct octeon_board_info *)(&oct_dev->boardinfo); - len = - sprintf((char *)bytes, - "boardname:%s serialnum:%s maj:%lld min:%lld\n", - board_info->name, board_info->serial_number, - board_info->major, board_info->minor); + sprintf((char *)bytes, + "boardname:%s serialnum:%s maj:%lld min:%lld\n", + board_info->name, board_info->serial_number, + board_info->major, board_info->minor); return 0; } @@ -406,7 +404,7 @@ octnet_mdio45_access(struct lio *lio, int op, int loc, int *value) dev_err(&oct_dev->pci_dev->dev, "octnet_mdio45_access instruction failed status: %x\n", retval); - retval = -EBUSY; + retval = -EBUSY; } else { /* Sleep on a wait queue till the cond flag indicates that the * response arrived @@ -1320,8 +1318,8 @@ oct_cfg_rx_intrcnt(struct lio *lio, struct ethtool_coalesce *intr_coal) return 0; } -static int oct_cfg_rx_intrtime(struct lio *lio, struct ethtool_coalesce - *intr_coal) +static int oct_cfg_rx_intrtime(struct lio *lio, + struct ethtool_coalesce *intr_coal) { struct octeon_device *oct = lio->oct_dev; u32 time_threshold, rx_coalesce_usecs; diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index f659a95ffc94..2abc110eaef8 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -21,8 +21,6 @@ **********************************************************************/ #include #include -#include -#include #include #include #include @@ -52,11 +50,6 @@ module_param(ddr_timeout, int, 0644); MODULE_PARM_DESC(ddr_timeout, "Number of milliseconds to wait for DDR initialization. 0 waits for ddr_timeout to be set to non-zero value before starting to check"); -static u32 console_bitmask; -module_param(console_bitmask, int, 0644); -MODULE_PARM_DESC(console_bitmask, - "Bitmask indicating which consoles have debug output redirected to syslog."); - #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK) #define INCR_INSTRQUEUE_PKT_COUNT(octeon_dev_ptr, iq_no, field, count) \ @@ -162,27 +155,6 @@ struct octnic_gather { u64 sg_dma_ptr; }; -/** This structure is used by NIC driver to store information required - * to free the sk_buff when the packet has been fetched by Octeon. - * Bytes offset below assume worst-case of a 64-bit system. - */ -struct octnet_buf_free_info { - /** Bytes 1-8. Pointer to network device private structure. */ - struct lio *lio; - - /** Bytes 9-16. Pointer to sk_buff. */ - struct sk_buff *skb; - - /** Bytes 17-24. Pointer to gather list. */ - struct octnic_gather *g; - - /** Bytes 25-32. Physical address of skb->data or gather list. */ - u64 dptr; - - /** Bytes 33-47. Piggybacked soft command, if any */ - struct octeon_soft_command *sc; -}; - struct handshake { struct completion init; struct completion started; @@ -198,6 +170,7 @@ struct octeon_device_priv { }; static int octeon_device_init(struct octeon_device *); +static int liquidio_stop(struct net_device *netdev); static void liquidio_remove(struct pci_dev *pdev); static int liquidio_probe(struct pci_dev *pdev, const struct pci_device_id *ent); @@ -252,76 +225,6 @@ static int lio_wait_for_oq_pkts(struct octeon_device *oct) return pkt_cnt; } -void octeon_report_tx_completion_to_bql(void *txq, unsigned int pkts_compl, - unsigned int bytes_compl) -{ - struct netdev_queue *netdev_queue = txq; - - netdev_tx_completed_queue(netdev_queue, pkts_compl, bytes_compl); -} - -void octeon_update_tx_completion_counters(void *buf, int reqtype, - unsigned int *pkts_compl, - unsigned int *bytes_compl) -{ - struct octnet_buf_free_info *finfo; - struct sk_buff *skb = NULL; - struct octeon_soft_command *sc; - - switch (reqtype) { - case REQTYPE_NORESP_NET: - case REQTYPE_NORESP_NET_SG: - finfo = buf; - skb = finfo->skb; - break; - - case REQTYPE_RESP_NET_SG: - case REQTYPE_RESP_NET: - sc = buf; - skb = sc->callback_arg; - break; - - default: - return; - } - - (*pkts_compl)++; - *bytes_compl += skb->len; -} - -void octeon_report_sent_bytes_to_bql(void *buf, int reqtype) -{ - struct octnet_buf_free_info *finfo; - struct sk_buff *skb; - struct octeon_soft_command *sc; - struct netdev_queue *txq; - - switch (reqtype) { - case REQTYPE_NORESP_NET: - case REQTYPE_NORESP_NET_SG: - finfo = buf; - skb = finfo->skb; - break; - - case REQTYPE_RESP_NET_SG: - case REQTYPE_RESP_NET: - sc = buf; - skb = sc->callback_arg; - break; - - default: - return; - } - - txq = netdev_get_tx_queue(skb->dev, skb_get_queue_mapping(skb)); - netdev_tx_sent_queue(txq, skb->len); -} - -int octeon_console_debug_enabled(u32 console) -{ - return (console_bitmask >> (console)) & 0x1; -} - /** * \brief Forces all IO queues off on a given device * @param oct Pointer to Octeon device @@ -2173,7 +2076,7 @@ static inline int setup_io_queues(struct octeon_device *octeon_dev, lio->ifidx), NULL); if (retval) { dev_err(&octeon_dev->pci_dev->dev, - " %s : Runtime DROQ(RxQ) creation failed.\n", + "%s : Runtime DROQ(RxQ) creation failed.\n", __func__); return 1; } @@ -2340,143 +2243,6 @@ static int liquidio_stop(struct net_device *netdev) return 0; } -void liquidio_link_ctrl_cmd_completion(void *nctrl_ptr) -{ - struct octnic_ctrl_pkt *nctrl = (struct octnic_ctrl_pkt *)nctrl_ptr; - struct net_device *netdev = (struct net_device *)nctrl->netpndev; - struct lio *lio = GET_LIO(netdev); - struct octeon_device *oct = lio->oct_dev; - u8 *mac; - - switch (nctrl->ncmd.s.cmd) { - case OCTNET_CMD_CHANGE_DEVFLAGS: - case OCTNET_CMD_SET_MULTI_LIST: - break; - - case OCTNET_CMD_CHANGE_MACADDR: - mac = ((u8 *)&nctrl->udd[0]) + 2; - netif_info(lio, probe, lio->netdev, - "%s %2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x\n", - "MACAddr changed to", mac[0], mac[1], - mac[2], mac[3], mac[4], mac[5]); - break; - - case OCTNET_CMD_CHANGE_MTU: - /* If command is successful, change the MTU. */ - netif_info(lio, probe, lio->netdev, " MTU Changed from %d to %d\n", - netdev->mtu, nctrl->ncmd.s.param1); - dev_info(&oct->pci_dev->dev, "%s MTU Changed from %d to %d\n", - netdev->name, netdev->mtu, - nctrl->ncmd.s.param1); - rtnl_lock(); - netdev->mtu = nctrl->ncmd.s.param1; - call_netdevice_notifiers(NETDEV_CHANGEMTU, netdev); - rtnl_unlock(); - break; - - case OCTNET_CMD_GPIO_ACCESS: - netif_info(lio, probe, lio->netdev, "LED Flashing visual identification\n"); - - break; - - case OCTNET_CMD_LRO_ENABLE: - dev_info(&oct->pci_dev->dev, "%s LRO Enabled\n", netdev->name); - break; - - case OCTNET_CMD_LRO_DISABLE: - dev_info(&oct->pci_dev->dev, "%s LRO Disabled\n", - netdev->name); - break; - - case OCTNET_CMD_VERBOSE_ENABLE: - dev_info(&oct->pci_dev->dev, "%s LRO Enabled\n", netdev->name); - break; - - case OCTNET_CMD_VERBOSE_DISABLE: - dev_info(&oct->pci_dev->dev, "%s LRO Disabled\n", - netdev->name); - break; - - case OCTNET_CMD_ENABLE_VLAN_FILTER: - dev_info(&oct->pci_dev->dev, "%s VLAN filter enabled\n", - netdev->name); - break; - - case OCTNET_CMD_ADD_VLAN_FILTER: - dev_info(&oct->pci_dev->dev, "%s VLAN filter %d added\n", - netdev->name, nctrl->ncmd.s.param1); - break; - - case OCTNET_CMD_DEL_VLAN_FILTER: - dev_info(&oct->pci_dev->dev, "%s VLAN filter %d removed\n", - netdev->name, nctrl->ncmd.s.param1); - break; - - case OCTNET_CMD_SET_SETTINGS: - dev_info(&oct->pci_dev->dev, "%s settings changed\n", - netdev->name); - - break; - /* Case to handle "OCTNET_CMD_TNL_RX_CSUM_CTL" - * Command passed by NIC driver - */ - case OCTNET_CMD_TNL_RX_CSUM_CTL: - if (nctrl->ncmd.s.param1 == OCTNET_CMD_RXCSUM_ENABLE) { - netif_info(lio, probe, lio->netdev, - "%s RX Checksum Offload Enabled\n", - netdev->name); - } else if (nctrl->ncmd.s.param1 == - OCTNET_CMD_RXCSUM_DISABLE) { - netif_info(lio, probe, lio->netdev, - "%s RX Checksum Offload Disabled\n", - netdev->name); - } - break; - - /* Case to handle "OCTNET_CMD_TNL_TX_CSUM_CTL" - * Command passed by NIC driver - */ - case OCTNET_CMD_TNL_TX_CSUM_CTL: - if (nctrl->ncmd.s.param1 == OCTNET_CMD_TXCSUM_ENABLE) { - netif_info(lio, probe, lio->netdev, - "%s TX Checksum Offload Enabled\n", - netdev->name); - } else if (nctrl->ncmd.s.param1 == - OCTNET_CMD_TXCSUM_DISABLE) { - netif_info(lio, probe, lio->netdev, - "%s TX Checksum Offload Disabled\n", - netdev->name); - } - break; - - /* Case to handle "OCTNET_CMD_VXLAN_PORT_CONFIG" - * Command passed by NIC driver - */ - case OCTNET_CMD_VXLAN_PORT_CONFIG: - if (nctrl->ncmd.s.more == OCTNET_CMD_VXLAN_PORT_ADD) { - netif_info(lio, probe, lio->netdev, - "%s VxLAN Destination UDP PORT:%d ADDED\n", - netdev->name, - nctrl->ncmd.s.param1); - } else if (nctrl->ncmd.s.more == - OCTNET_CMD_VXLAN_PORT_DEL) { - netif_info(lio, probe, lio->netdev, - "%s VxLAN Destination UDP PORT:%d DELETED\n", - netdev->name, - nctrl->ncmd.s.param1); - } - break; - - case OCTNET_CMD_SET_FLOW_CTL: - netif_info(lio, probe, lio->netdev, "Set RX/TX flow control parameters\n"); - break; - - default: - dev_err(&oct->pci_dev->dev, "%s Unknown cmd %d\n", __func__, - nctrl->ncmd.s.cmd); - } -} - /** * \brief Converts a mask based on net device flags * @param netdev network device @@ -2817,8 +2583,7 @@ static void handle_timestamp(struct octeon_device *oct, */ static inline int send_nic_timestamp_pkt(struct octeon_device *oct, struct octnic_data_pkt *ndata, - struct octnet_buf_free_info *finfo, - int xmit_more) + struct octnet_buf_free_info *finfo) { int retval; struct octeon_soft_command *sc; @@ -2848,7 +2613,7 @@ static inline int send_nic_timestamp_pkt(struct octeon_device *oct, len = (u32)((struct octeon_instr_ih2 *)(&sc->cmd.cmd2.ih2))->dlengsz; - ring_doorbell = !xmit_more; + ring_doorbell = 1; retval = octeon_send_command(oct, sc->iq_no, ring_doorbell, &sc->cmd, sc, len, ndata->reqtype); @@ -2881,7 +2646,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev) union tx_info *tx_info; int status = 0; int q_idx = 0, iq_no = 0; - int xmit_more, j; + int j; u64 dptr = 0; u32 tag = 0; @@ -3077,12 +2842,10 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev) irh->vlan = skb_vlan_tag_get(skb) & 0xfff; } - xmit_more = skb->xmit_more; - if (unlikely(cmdsetup.s.timestamp)) - status = send_nic_timestamp_pkt(oct, &ndata, finfo, xmit_more); + status = send_nic_timestamp_pkt(oct, &ndata, finfo); else - status = octnet_send_nic_data_pkt(oct, &ndata, xmit_more); + status = octnet_send_nic_data_pkt(oct, &ndata); if (status == IQ_SEND_FAILED) goto lio_xmit_failed; @@ -3249,31 +3012,6 @@ static int liquidio_vxlan_port_command(struct net_device *netdev, int command, return ret; } -int liquidio_set_feature(struct net_device *netdev, int cmd, u16 param1) -{ - struct lio *lio = GET_LIO(netdev); - struct octeon_device *oct = lio->oct_dev; - struct octnic_ctrl_pkt nctrl; - int ret = 0; - - memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt)); - - nctrl.ncmd.u64 = 0; - nctrl.ncmd.s.cmd = cmd; - nctrl.ncmd.s.param1 = param1; - nctrl.iq_no = lio->linfo.txpciq[0].s.q_no; - nctrl.wait_time = 100; - nctrl.netpndev = (u64)netdev; - nctrl.cb_fn = liquidio_link_ctrl_cmd_completion; - - ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl); - if (ret < 0) { - dev_err(&oct->pci_dev->dev, "Feature change failed in core (ret: 0x%x)\n", - ret); - } - return ret; -} - /** \brief Net device fix features * @param netdev pointer to network device * @param request features requested diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_console.c b/drivers/net/ethernet/cavium/liquidio/octeon_console.c index bbb50ea66f16..01a50f3b0c8e 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_console.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_console.c @@ -25,12 +25,13 @@ */ #include #include +#include #include "liquidio_common.h" #include "octeon_droq.h" #include "octeon_iq.h" #include "response_manager.h" #include "octeon_device.h" -#include "octeon_main.h" +#include "liquidio_image.h" #include "octeon_mem_ops.h" static void octeon_remote_lock(void); @@ -40,6 +41,10 @@ static u64 cvmx_bootmem_phy_named_block_find(struct octeon_device *oct, u32 flags); static int octeon_console_read(struct octeon_device *oct, u32 console_num, char *buffer, u32 buf_size); +static u32 console_bitmask; +module_param(console_bitmask, int, 0644); +MODULE_PARM_DESC(console_bitmask, + "Bitmask indicating which consoles have debug output redirected to syslog."); #define MIN(a, b) min((a), (b)) #define CAST_ULL(v) ((u64)(v)) @@ -177,6 +182,15 @@ struct octeon_pci_console_desc { __cvmx_bootmem_desc_get(oct, addr, \ offsetof(struct cvmx_bootmem_named_block_desc, field), \ SIZEOF_FIELD(struct cvmx_bootmem_named_block_desc, field)) +/** + * \brief determines if a given console has debug enabled. + * @param console console to check + * @returns 1 = enabled. 0 otherwise + */ +static int octeon_console_debug_enabled(u32 console) +{ + return (console_bitmask >> (console)) & 0x1; +} /** * This function is the implementation of the get macros defined @@ -709,3 +723,104 @@ static int octeon_console_read(struct octeon_device *oct, u32 console_num, return bytes_to_read; } + +#define FBUF_SIZE (4 * 1024 * 1024) +u8 fbuf[FBUF_SIZE]; + +int octeon_download_firmware(struct octeon_device *oct, const u8 *data, + size_t size) +{ + int ret = 0; + u8 *p = fbuf; + u32 crc32_result; + u64 load_addr; + u32 image_len; + struct octeon_firmware_file_header *h; + u32 i, rem; + + if (size < sizeof(struct octeon_firmware_file_header)) { + dev_err(&oct->pci_dev->dev, "Firmware file too small (%d < %d).\n", + (u32)size, + (u32)sizeof(struct octeon_firmware_file_header)); + return -EINVAL; + } + + h = (struct octeon_firmware_file_header *)data; + + if (be32_to_cpu(h->magic) != LIO_NIC_MAGIC) { + dev_err(&oct->pci_dev->dev, "Unrecognized firmware file.\n"); + return -EINVAL; + } + + crc32_result = crc32((unsigned int)~0, data, + sizeof(struct octeon_firmware_file_header) - + sizeof(u32)) ^ ~0U; + if (crc32_result != be32_to_cpu(h->crc32)) { + dev_err(&oct->pci_dev->dev, "Firmware CRC mismatch (0x%08x != 0x%08x).\n", + crc32_result, be32_to_cpu(h->crc32)); + return -EINVAL; + } + + if (strncmp(LIQUIDIO_PACKAGE, h->version, strlen(LIQUIDIO_PACKAGE))) { + dev_err(&oct->pci_dev->dev, "Unmatched firmware package type. Expected %s, got %s.\n", + LIQUIDIO_PACKAGE, h->version); + return -EINVAL; + } + + if (memcmp(LIQUIDIO_BASE_VERSION, h->version + strlen(LIQUIDIO_PACKAGE), + strlen(LIQUIDIO_BASE_VERSION))) { + dev_err(&oct->pci_dev->dev, "Unmatched firmware version. Expected %s.x, got %s.\n", + LIQUIDIO_BASE_VERSION, + h->version + strlen(LIQUIDIO_PACKAGE)); + return -EINVAL; + } + + if (be32_to_cpu(h->num_images) > LIO_MAX_IMAGES) { + dev_err(&oct->pci_dev->dev, "Too many images in firmware file (%d).\n", + be32_to_cpu(h->num_images)); + return -EINVAL; + } + + dev_info(&oct->pci_dev->dev, "Firmware version: %s\n", h->version); + snprintf(oct->fw_info.liquidio_firmware_version, 32, "LIQUIDIO: %s", + h->version); + + data += sizeof(struct octeon_firmware_file_header); + + dev_info(&oct->pci_dev->dev, "%s: Loading %d images\n", __func__, + be32_to_cpu(h->num_images)); + /* load all images */ + for (i = 0; i < be32_to_cpu(h->num_images); i++) { + load_addr = be64_to_cpu(h->desc[i].addr); + image_len = be32_to_cpu(h->desc[i].len); + + dev_info(&oct->pci_dev->dev, "Loading firmware %d at %llx\n", + image_len, load_addr); + + /* Write in 4MB chunks*/ + rem = image_len; + + while (rem) { + if (rem < FBUF_SIZE) + size = rem; + else + size = FBUF_SIZE; + + memcpy(p, data, size); + + /* download the image */ + octeon_pci_write_core_mem(oct, load_addr, p, (u32)size); + + data += size; + rem -= (u32)size; + load_addr += size; + } + } + dev_info(&oct->pci_dev->dev, "Writing boot command: %s\n", + h->bootcmd); + + /* Invoke the bootcmd */ + ret = octeon_console_send_cmd(oct, h->bootcmd, 50); + + return 0; +} diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.c b/drivers/net/ethernet/cavium/liquidio/octeon_device.c index 0eb504a4379a..cff845ce8625 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.c @@ -20,7 +20,6 @@ * Contact Cavium, Inc. for more information **********************************************************************/ #include -#include #include #include #include "liquidio_common.h" @@ -32,8 +31,6 @@ #include "octeon_network.h" #include "cn66xx_regs.h" #include "cn66xx_device.h" -#include "liquidio_image.h" -#include "octeon_mem_ops.h" /** Default configuration * for CN66XX OCTEON Models. @@ -541,107 +538,6 @@ static char *get_oct_app_string(u32 app_mode) return oct_dev_app_str[CVM_DRV_INVALID_APP - CVM_DRV_APP_START]; } -u8 fbuf[4 * 1024 * 1024]; - -int octeon_download_firmware(struct octeon_device *oct, const u8 *data, - size_t size) -{ - int ret = 0; - u8 *p = fbuf; - u32 crc32_result; - u64 load_addr; - u32 image_len; - struct octeon_firmware_file_header *h; - u32 i, rem, base_len = strlen(LIQUIDIO_BASE_VERSION); - char *base; - - if (size < sizeof(struct octeon_firmware_file_header)) { - dev_err(&oct->pci_dev->dev, "Firmware file too small (%d < %d).\n", - (u32)size, - (u32)sizeof(struct octeon_firmware_file_header)); - return -EINVAL; - } - - h = (struct octeon_firmware_file_header *)data; - - if (be32_to_cpu(h->magic) != LIO_NIC_MAGIC) { - dev_err(&oct->pci_dev->dev, "Unrecognized firmware file.\n"); - return -EINVAL; - } - - crc32_result = crc32((unsigned int)~0, data, - sizeof(struct octeon_firmware_file_header) - - sizeof(u32)) ^ ~0U; - if (crc32_result != be32_to_cpu(h->crc32)) { - dev_err(&oct->pci_dev->dev, "Firmware CRC mismatch (0x%08x != 0x%08x).\n", - crc32_result, be32_to_cpu(h->crc32)); - return -EINVAL; - } - - if (strncmp(LIQUIDIO_PACKAGE, h->version, strlen(LIQUIDIO_PACKAGE))) { - dev_err(&oct->pci_dev->dev, "Unmatched firmware package type. Expected %s, got %s.\n", - LIQUIDIO_PACKAGE, h->version); - return -EINVAL; - } - - base = h->version + strlen(LIQUIDIO_PACKAGE); - ret = memcmp(LIQUIDIO_BASE_VERSION, base, base_len); - if (ret) { - dev_err(&oct->pci_dev->dev, "Unmatched firmware version. Expected %s.x, got %s.\n", - LIQUIDIO_BASE_VERSION, base); - return -EINVAL; - } - - if (be32_to_cpu(h->num_images) > LIO_MAX_IMAGES) { - dev_err(&oct->pci_dev->dev, "Too many images in firmware file (%d).\n", - be32_to_cpu(h->num_images)); - return -EINVAL; - } - - dev_info(&oct->pci_dev->dev, "Firmware version: %s\n", h->version); - snprintf(oct->fw_info.liquidio_firmware_version, 32, "LIQUIDIO: %s", - h->version); - - data += sizeof(struct octeon_firmware_file_header); - - dev_info(&oct->pci_dev->dev, "%s: Loading %d images\n", __func__, - be32_to_cpu(h->num_images)); - /* load all images */ - for (i = 0; i < be32_to_cpu(h->num_images); i++) { - load_addr = be64_to_cpu(h->desc[i].addr); - image_len = be32_to_cpu(h->desc[i].len); - - dev_info(&oct->pci_dev->dev, "Loading firmware %d at %llx\n", - image_len, load_addr); - - /* Write in 4MB chunks*/ - rem = image_len; - - while (rem) { - if (rem < (4 * 1024 * 1024)) - size = rem; - else - size = 4 * 1024 * 1024; - - memcpy(p, data, size); - - /* download the image */ - octeon_pci_write_core_mem(oct, load_addr, p, (u32)size); - - data += size; - rem -= (u32)size; - load_addr += size; - } - } - dev_info(&oct->pci_dev->dev, "Writing boot command: %s\n", - h->bootcmd); - - /* Invoke the bootcmd */ - ret = octeon_console_send_cmd(oct, h->bootcmd, 50); - - return 0; -} - void octeon_free_device_mem(struct octeon_device *oct) { int i; diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.h b/drivers/net/ethernet/cavium/liquidio/octeon_device.h index 01edfb404346..d1251f4c55de 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.h @@ -200,7 +200,6 @@ struct octeon_fn_list { irqreturn_t (*process_interrupt_regs)(void *); int (*soft_reset)(struct octeon_device *); int (*setup_device_regs)(struct octeon_device *); - void (*reinit_regs)(struct octeon_device *); void (*bar1_idx_setup)(struct octeon_device *, u64, u32, int); void (*bar1_idx_write)(struct octeon_device *, u32, u32); u32 (*bar1_idx_read)(struct octeon_device *, u32); diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_main.h b/drivers/net/ethernet/cavium/liquidio/octeon_main.h index bc14e4c27332..ebeef95ed9b0 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_main.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_main.h @@ -38,12 +38,26 @@ #define DRV_NAME "LiquidIO" -/** - * \brief determines if a given console has debug enabled. - * @param console console to check - * @returns 1 = enabled. 0 otherwise +/** This structure is used by NIC driver to store information required + * to free the sk_buff when the packet has been fetched by Octeon. + * Bytes offset below assume worst-case of a 64-bit system. */ -int octeon_console_debug_enabled(u32 console); +struct octnet_buf_free_info { + /** Bytes 1-8. Pointer to network device private structure. */ + struct lio *lio; + + /** Bytes 9-16. Pointer to sk_buff. */ + struct sk_buff *skb; + + /** Bytes 17-24. Pointer to gather list. */ + struct octnic_gather *g; + + /** Bytes 25-32. Physical address of skb->data or gather list. */ + u64 dptr; + + /** Bytes 33-47. Piggybacked soft command, if any */ + struct octeon_soft_command *sc; +}; /* BQL-related functions */ void octeon_report_sent_bytes_to_bql(void *buf, int reqtype); diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.c b/drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.c index 95a4bbedf557..0dc081a99b30 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.c @@ -19,7 +19,6 @@ * This file may also be available under a different license from Cavium. * Contact Cavium, Inc. for more information **********************************************************************/ -#include #include #include "liquidio_common.h" #include "octeon_droq.h" diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h index fb820dc7fcb7..7cfbdf9b039c 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h @@ -26,8 +26,6 @@ #ifndef __OCTEON_NETWORK_H__ #define __OCTEON_NETWORK_H__ -#include -#include #include #define LIO_MAX_MTU_SIZE (OCTNET_MAX_FRM_SIZE - OCTNET_FRM_HEADER_SIZE) diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_nic.c b/drivers/net/ethernet/cavium/liquidio/octeon_nic.c index 166727be928f..0c4013de599a 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_nic.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_nic.c @@ -19,7 +19,6 @@ * This file may also be available under a different license from Cavium. * Contact Cavium, Inc. for more information **********************************************************************/ -#include #include #include #include "liquidio_common.h" @@ -73,12 +72,9 @@ octeon_alloc_soft_command_resp(struct octeon_device *oct, } int octnet_send_nic_data_pkt(struct octeon_device *oct, - struct octnic_data_pkt *ndata, - u32 xmit_more) + struct octnic_data_pkt *ndata) { - int ring_doorbell; - - ring_doorbell = !xmit_more; + int ring_doorbell = 1; return octeon_send_command(oct, ndata->q_no, ring_doorbell, &ndata->cmd, ndata->buf, ndata->datasize, diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_nic.h b/drivers/net/ethernet/cavium/liquidio/octeon_nic.h index b71a2bbe4bee..e55400c91574 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_nic.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_nic.h @@ -278,7 +278,7 @@ octeon_alloc_soft_command_resp(struct octeon_device *oct, * queue should be stopped, and IQ_SEND_OK if it sent okay. */ int octnet_send_nic_data_pkt(struct octeon_device *oct, - struct octnic_data_pkt *ndata, u32 xmit_more); + struct octnic_data_pkt *ndata); /** Send a NIC control packet to the device * @param oct - octeon device pointer From 83101ce338fc4448000ed0acb71352c1c2c4331b Mon Sep 17 00:00:00 2001 From: Raghu Vatsavayi Date: Wed, 31 Aug 2016 11:03:21 -0700 Subject: [PATCH 0604/1715] liquidio: Firmware version management This patch contains changes for firmware version management. Signed-off-by: Derek Chickles Signed-off-by: Satanand Burla Signed-off-by: Felix Manlunas Signed-off-by: Raghu Vatsavayi Signed-off-by: David S. Miller --- .../net/ethernet/cavium/liquidio/lio_main.c | 12 +++++++++-- .../cavium/liquidio/liquidio_common.h | 20 ++++++++++++++++--- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index 2abc110eaef8..1bbeae880232 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -3230,8 +3230,9 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) union oct_nic_if_cfg if_cfg; unsigned int base_queue; unsigned int gmx_port_id; - u32 resp_size, ctx_size; + u32 resp_size, ctx_size, data_size; u32 ifidx_or_pfnum; + struct lio_version *vdata; /* This is to handle link status changes */ octeon_register_dispatch_fn(octeon_dev, OPCODE_NIC, @@ -3253,11 +3254,18 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) for (i = 0; i < octeon_dev->ifcount; i++) { resp_size = sizeof(struct liquidio_if_cfg_resp); ctx_size = sizeof(struct liquidio_if_cfg_context); + data_size = sizeof(struct lio_version); sc = (struct octeon_soft_command *) - octeon_alloc_soft_command(octeon_dev, 0, + octeon_alloc_soft_command(octeon_dev, data_size, resp_size, ctx_size); resp = (struct liquidio_if_cfg_resp *)sc->virtrptr; ctx = (struct liquidio_if_cfg_context *)sc->ctxptr; + vdata = (struct lio_version *)sc->virtdptr; + + *((u64 *)vdata) = 0; + vdata->major = cpu_to_be16(LIQUIDIO_BASE_MAJOR_VERSION); + vdata->minor = cpu_to_be16(LIQUIDIO_BASE_MINOR_VERSION); + vdata->micro = cpu_to_be16(LIQUIDIO_BASE_MICRO_VERSION); num_iqueues = CFG_GET_NUM_TXQS_NIC_IF(octeon_get_conf(octeon_dev), i); diff --git a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h index 199a8b9c7dc5..11df55a5b246 100644 --- a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h +++ b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h @@ -30,10 +30,24 @@ #include "octeon_config.h" -#define LIQUIDIO_BASE_VERSION "1.4" -#define LIQUIDIO_MICRO_VERSION ".1" #define LIQUIDIO_PACKAGE "" -#define LIQUIDIO_VERSION "1.4.1" +#define LIQUIDIO_BASE_MAJOR_VERSION 1 +#define LIQUIDIO_BASE_MINOR_VERSION 4 +#define LIQUIDIO_BASE_MICRO_VERSION 1 +#define LIQUIDIO_BASE_VERSION __stringify(LIQUIDIO_BASE_MAJOR_VERSION) "." \ + __stringify(LIQUIDIO_BASE_MINOR_VERSION) +#define LIQUIDIO_MICRO_VERSION "." __stringify(LIQUIDIO_BASE_MICRO_VERSION) +#define LIQUIDIO_VERSION LIQUIDIO_PACKAGE \ + __stringify(LIQUIDIO_BASE_MAJOR_VERSION) "." \ + __stringify(LIQUIDIO_BASE_MINOR_VERSION) \ + "." __stringify(LIQUIDIO_BASE_MICRO_VERSION) + +struct lio_version { + u16 major; + u16 minor; + u16 micro; + u16 reserved; +}; #define CONTROL_IQ 0 /** Tag types used by Octeon cores in its work. */ From cd8b1eb4e59e7d20409a1330abe662b996c54b00 Mon Sep 17 00:00:00 2001 From: Raghu Vatsavayi Date: Wed, 31 Aug 2016 11:03:22 -0700 Subject: [PATCH 0605/1715] liquidio: Common enable irq function Add support of common irq enable functionality for both iq(instruction queue) and oq(output queue). Signed-off-by: Derek Chickles Signed-off-by: Satanand Burla Signed-off-by: Felix Manlunas Signed-off-by: Raghu Vatsavayi Signed-off-by: David S. Miller --- .../net/ethernet/cavium/liquidio/lio_main.c | 1 + .../cavium/liquidio/liquidio_common.h | 2 +- .../ethernet/cavium/liquidio/octeon_device.c | 17 ++++++++++ .../ethernet/cavium/liquidio/octeon_device.h | 2 ++ .../ethernet/cavium/liquidio/octeon_droq.c | 33 +++++++++++-------- .../ethernet/cavium/liquidio/octeon_droq.h | 2 ++ .../net/ethernet/cavium/liquidio/octeon_iq.h | 2 ++ .../cavium/liquidio/request_manager.c | 3 ++ 8 files changed, 48 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index 1bbeae880232..8f11a0b4a7ad 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -192,6 +192,7 @@ static void octeon_droq_bh(unsigned long pdev) continue; reschedule |= octeon_droq_process_packets(oct, oct->droq[q_no], MAX_PACKET_BUDGET); + lio_enable_irq(oct->droq[q_no], NULL); } if (reschedule) diff --git a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h index 11df55a5b246..8ffd3b8c4d0f 100644 --- a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h +++ b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h @@ -846,7 +846,7 @@ struct oct_mdio_cmd { /* intrmod: max. packets to trigger interrupt */ #define LIO_INTRMOD_RXMAXCNT_TRIGGER 384 /* intrmod: min. packets to trigger interrupt */ -#define LIO_INTRMOD_RXMINCNT_TRIGGER 1 +#define LIO_INTRMOD_RXMINCNT_TRIGGER 0 /* intrmod: max. time to trigger interrupt */ #define LIO_INTRMOD_RXMAXTMR_TRIGGER 128 /* 66xx:intrmod: min. time to trigger interrupt diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.c b/drivers/net/ethernet/cavium/liquidio/octeon_device.c index cff845ce8625..541137a60145 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.c @@ -1122,3 +1122,20 @@ int lio_get_device_id(void *dev) return octeon_dev->octeon_id; return -1; } + +void lio_enable_irq(struct octeon_droq *droq, struct octeon_instr_queue *iq) +{ + /* the whole thing needs to be atomic, ideally */ + if (droq) { + spin_lock_bh(&droq->lock); + writel(droq->pkt_count, droq->pkts_sent_reg); + droq->pkt_count = 0; + spin_unlock_bh(&droq->lock); + } + if (iq) { + spin_lock_bh(&iq->lock); + writel(iq->pkt_in_done, iq->inst_cnt_reg); + iq->pkt_in_done = 0; + spin_unlock_bh(&iq->lock); + } +} diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.h b/drivers/net/ethernet/cavium/liquidio/octeon_device.h index d1251f4c55de..02e9854477a1 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.h @@ -660,6 +660,8 @@ void *oct_get_config_info(struct octeon_device *oct, u16 card_type); */ struct octeon_config *octeon_get_conf(struct octeon_device *oct); +void lio_enable_irq(struct octeon_droq *droq, struct octeon_instr_queue *iq); + /* LiquidIO driver pivate flags */ enum { OCT_PRIV_FLAG_TX_BYTES = 0, /* Tx interrupts by pending byte count */ diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c index e0afe4c1fd01..5dfc23d70d05 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c @@ -92,22 +92,25 @@ static inline void *octeon_get_dispatch_arg(struct octeon_device *octeon_dev, return fn_arg; } -/** Check for packets on Droq. This function should be called with - * lock held. +/** Check for packets on Droq. This function should be called with lock held. * @param droq - Droq on which count is checked. * @return Returns packet count. */ u32 octeon_droq_check_hw_for_pkts(struct octeon_droq *droq) { u32 pkt_count = 0; + u32 last_count; pkt_count = readl(droq->pkts_sent_reg); - if (pkt_count) { - atomic_add(pkt_count, &droq->pkts_pending); - writel(pkt_count, droq->pkts_sent_reg); - } - return pkt_count; + last_count = pkt_count - droq->pkt_count; + droq->pkt_count = pkt_count; + + /* we shall write to cnts at napi irq enable or end of droq tasklet */ + if (last_count) + atomic_add(last_count, &droq->pkts_pending); + + return last_count; } static void octeon_droq_compute_max_packet_bufs(struct octeon_droq *droq) @@ -735,16 +738,20 @@ octeon_droq_process_packets(struct octeon_device *oct, u32 pkt_count = 0, pkts_processed = 0; struct list_head *tmp, *tmp2; + /* Grab the droq lock */ + spin_lock(&droq->lock); + + octeon_droq_check_hw_for_pkts(droq); pkt_count = atomic_read(&droq->pkts_pending); - if (!pkt_count) + + if (!pkt_count) { + spin_unlock(&droq->lock); return 0; + } if (pkt_count > budget) pkt_count = budget; - /* Grab the droq lock */ - spin_lock(&droq->lock); - pkts_processed = octeon_droq_fast_process_packets(oct, droq, pkt_count); atomic_sub(pkts_processed, &droq->pkts_pending); @@ -789,6 +796,8 @@ octeon_droq_process_poll_pkts(struct octeon_device *oct, spin_lock(&droq->lock); while (total_pkts_processed < budget) { + octeon_droq_check_hw_for_pkts(droq); + pkts_available = CVM_MIN((budget - total_pkts_processed), (u32)(atomic_read(&droq->pkts_pending))); @@ -803,8 +812,6 @@ octeon_droq_process_poll_pkts(struct octeon_device *oct, atomic_sub(pkts_processed, &droq->pkts_pending); total_pkts_processed += pkts_processed; - - octeon_droq_check_hw_for_pkts(droq); } spin_unlock(&droq->lock); diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h index 5a6fb9113bbd..5be002d5dba4 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h @@ -261,6 +261,8 @@ struct octeon_droq { u32 q_no; + u32 pkt_count; + struct octeon_droq_ops ops; struct octeon_device *oct_dev; diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_iq.h b/drivers/net/ethernet/cavium/liquidio/octeon_iq.h index ff4b1d6f007b..e4d426ba18dc 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_iq.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_iq.h @@ -88,6 +88,8 @@ struct octeon_instr_queue { /** A spinlock to protect while posting on the ring. */ spinlock_t post_lock; + u32 pkt_in_done; + /** A spinlock to protect access to the input ring.*/ spinlock_t iq_flush_running_lock; diff --git a/drivers/net/ethernet/cavium/liquidio/request_manager.c b/drivers/net/ethernet/cavium/liquidio/request_manager.c index d32492f185ff..3ee5d023c789 100644 --- a/drivers/net/ethernet/cavium/liquidio/request_manager.c +++ b/drivers/net/ethernet/cavium/liquidio/request_manager.c @@ -499,6 +499,7 @@ static void __check_db_timeout(struct octeon_device *oct, u64 iq_no) if (!oct) return; + iq = oct->instr_queue[iq_no]; if (!iq) return; @@ -514,6 +515,8 @@ static void __check_db_timeout(struct octeon_device *oct, u64 iq_no) /* Flush the instruction queue */ octeon_flush_iq(oct, iq, 1, 0); + + lio_enable_irq(NULL, iq); } /* Called by the Poll thread at regular intervals to check the instruction From 5bc67f587ba7eaa8dd3976bfeff8f2b60718c871 Mon Sep 17 00:00:00 2001 From: Raghu Vatsavayi Date: Wed, 31 Aug 2016 11:03:23 -0700 Subject: [PATCH 0606/1715] liquidio: CN23XX register definitions This patch adds register definitions and structures for new device cn23xx. Signed-off-by: Derek Chickles Signed-off-by: Satanand Burla Signed-off-by: Felix Manlunas Signed-off-by: Raghu Vatsavayi Signed-off-by: David S. Miller --- .../ethernet/cavium/liquidio/cn23xx_pf_regs.h | 604 ++++++++++++++++++ 1 file changed, 604 insertions(+) create mode 100644 drivers/net/ethernet/cavium/liquidio/cn23xx_pf_regs.h diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_regs.h b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_regs.h new file mode 100644 index 000000000000..03d79d95ab75 --- /dev/null +++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_regs.h @@ -0,0 +1,604 @@ +/********************************************************************** +* Author: Cavium, Inc. +* +* Contact: support@cavium.com +* Please include "LiquidIO" in the subject. +* +* Copyright (c) 2003-2015 Cavium, Inc. +* +* This file is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License, Version 2, as +* published by the Free Software Foundation. +* +* This file is distributed in the hope that it will be useful, but +* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty +* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or +* NONINFRINGEMENT. See the GNU General Public License for more +* details. +* +* This file may also be available under a different license from Cavium. +* Contact Cavium, Inc. for more information +**********************************************************************/ + +/*! \file cn23xx_regs.h + * \brief Host Driver: Register Address and Register Mask values for + * Octeon CN23XX devices. +*/ + +#ifndef __CN23XX_PF_REGS_H__ +#define __CN23XX_PF_REGS_H__ + +#define CN23XX_CONFIG_VENDOR_ID 0x00 +#define CN23XX_CONFIG_DEVICE_ID 0x02 + +#define CN23XX_CONFIG_XPANSION_BAR 0x38 + +#define CN23XX_CONFIG_MSIX_CAP 0x50 +#define CN23XX_CONFIG_MSIX_LMSI 0x54 +#define CN23XX_CONFIG_MSIX_UMSI 0x58 +#define CN23XX_CONFIG_MSIX_MSIMD 0x5C +#define CN23XX_CONFIG_MSIX_MSIMM 0x60 +#define CN23XX_CONFIG_MSIX_MSIMP 0x64 + +#define CN23XX_CONFIG_PCIE_CAP 0x70 +#define CN23XX_CONFIG_PCIE_DEVCAP 0x74 +#define CN23XX_CONFIG_PCIE_DEVCTL 0x78 +#define CN23XX_CONFIG_PCIE_LINKCAP 0x7C +#define CN23XX_CONFIG_PCIE_LINKCTL 0x80 +#define CN23XX_CONFIG_PCIE_SLOTCAP 0x84 +#define CN23XX_CONFIG_PCIE_SLOTCTL 0x88 +#define CN23XX_CONFIG_PCIE_DEVCTL2 0x98 +#define CN23XX_CONFIG_PCIE_LINKCTL2 0xA0 +#define CN23XX_CONFIG_PCIE_UNCORRECT_ERR_MASK 0x108 +#define CN23XX_CONFIG_PCIE_CORRECT_ERR_STATUS 0x110 +#define CN23XX_CONFIG_PCIE_DEVCTL_MASK 0x00040000 + +#define CN23XX_PCIE_SRIOV_FDL 0x188 +#define CN23XX_PCIE_SRIOV_FDL_BIT_POS 0x10 +#define CN23XX_PCIE_SRIOV_FDL_MASK 0xFF + +#define CN23XX_CONFIG_PCIE_FLTMSK 0x720 + +#define CN23XX_CONFIG_SRIOV_VFDEVID 0x190 + +#define CN23XX_CONFIG_SRIOV_BAR_START 0x19C +#define CN23XX_CONFIG_SRIOV_BARX(i) \ + (CN23XX_CONFIG_SRIOV_BAR_START + (i * 4)) +#define CN23XX_CONFIG_SRIOV_BAR_PF 0x08 +#define CN23XX_CONFIG_SRIOV_BAR_64BIT 0x04 +#define CN23XX_CONFIG_SRIOV_BAR_IO 0x01 + +/* ############## BAR0 Registers ################ */ + +#define CN23XX_SLI_CTL_PORT_START 0x286E0 +#define CN23XX_PORT_OFFSET 0x10 + +#define CN23XX_SLI_CTL_PORT(p) \ + (CN23XX_SLI_CTL_PORT_START + ((p) * CN23XX_PORT_OFFSET)) + +/* 2 scatch registers (64-bit) */ +#define CN23XX_SLI_WINDOW_CTL 0x282E0 +#define CN23XX_SLI_SCRATCH1 0x283C0 +#define CN23XX_SLI_SCRATCH2 0x283D0 +#define CN23XX_SLI_WINDOW_CTL_DEFAULT 0x200000ULL + +/* 1 registers (64-bit) - SLI_CTL_STATUS */ +#define CN23XX_SLI_CTL_STATUS 0x28570 + +/* SLI Packet Input Jabber Register (64 bit register) + * <31:0> for Byte count for limiting sizes of packet sizes + * that are allowed for sli packet inbound packets. + * the default value is 0xFA00(=64000). + */ +#define CN23XX_SLI_PKT_IN_JABBER 0x29170 +/* The input jabber is used to determine the TSO max size. + * Due to H/W limitation, this need to be reduced to 60000 + * in order to to H/W TSO and avoid the WQE malfarmation + * PKO_BUG_24989_WQE_LEN + */ +#define CN23XX_DEFAULT_INPUT_JABBER 0xEA60 /*60000*/ + +#define CN23XX_WIN_WR_ADDR_LO 0x20000 +#define CN23XX_WIN_WR_ADDR_HI 0x20004 +#define CN23XX_WIN_WR_ADDR64 CN23XX_WIN_WR_ADDR_LO + +#define CN23XX_WIN_RD_ADDR_LO 0x20010 +#define CN23XX_WIN_RD_ADDR_HI 0x20014 +#define CN23XX_WIN_RD_ADDR64 CN23XX_WIN_RD_ADDR_LO + +#define CN23XX_WIN_WR_DATA_LO 0x20020 +#define CN23XX_WIN_WR_DATA_HI 0x20024 +#define CN23XX_WIN_WR_DATA64 CN23XX_WIN_WR_DATA_LO + +#define CN23XX_WIN_RD_DATA_LO 0x20040 +#define CN23XX_WIN_RD_DATA_HI 0x20044 +#define CN23XX_WIN_RD_DATA64 CN23XX_WIN_RD_DATA_LO + +#define CN23XX_WIN_WR_MASK_LO 0x20030 +#define CN23XX_WIN_WR_MASK_HI 0x20034 +#define CN23XX_WIN_WR_MASK_REG CN23XX_WIN_WR_MASK_LO +#define CN23XX_SLI_MAC_CREDIT_CNT 0x23D70 + +/* 4 registers (64-bit) for mapping IOQs to MACs(PEMs)- + * SLI_PKT_MAC(0..3)_PF(0..1)_RINFO + */ +#define CN23XX_SLI_PKT_MAC_RINFO_START64 0x29030 + +/*1 register (64-bit) to determine whether IOQs are in reset. */ +#define CN23XX_SLI_PKT_IOQ_RING_RST 0x291E0 + +/* Each Input Queue register is at a 16-byte Offset in BAR0 */ +#define CN23XX_IQ_OFFSET 0x20000 + +#define CN23XX_MAC_RINFO_OFFSET 0x20 +#define CN23XX_PF_RINFO_OFFSET 0x10 + +#define CN23XX_SLI_PKT_MAC_RINFO64(mac, pf) \ + (CN23XX_SLI_PKT_MAC_RINFO_START64 + \ + ((mac) * CN23XX_MAC_RINFO_OFFSET) + \ + ((pf) * CN23XX_PF_RINFO_OFFSET)) + +/** mask for total rings, setting TRS to base */ +#define CN23XX_PKT_MAC_CTL_RINFO_TRS BIT_ULL(16) +/** mask for starting ring number: setting SRN <6:0> = 0x7F */ +#define CN23XX_PKT_MAC_CTL_RINFO_SRN (0x7F) + +/* Starting bit of the TRS field in CN23XX_SLI_PKT_MAC_RINFO64 register */ +#define CN23XX_PKT_MAC_CTL_RINFO_TRS_BIT_POS 16 +/* Starting bit of SRN field in CN23XX_SLI_PKT_MAC_RINFO64 register */ +#define CN23XX_PKT_MAC_CTL_RINFO_SRN_BIT_POS 0 +/* Starting bit of RPVF field in CN23XX_SLI_PKT_MAC_RINFO64 register */ +#define CN23XX_PKT_MAC_CTL_RINFO_RPVF_BIT_POS 32 +/* Starting bit of NVFS field in CN23XX_SLI_PKT_MAC_RINFO64 register */ +#define CN23XX_PKT_MAC_CTL_RINFO_NVFS_BIT_POS 48 + +/*###################### REQUEST QUEUE #########################*/ + +/* 64 registers for Input Queue Instr Count - SLI_PKT_IN_DONE0_CNTS */ +#define CN23XX_SLI_IQ_INSTR_COUNT_START64 0x10040 + +/* 64 registers for Input Queues Start Addr - SLI_PKT0_INSTR_BADDR */ +#define CN23XX_SLI_IQ_BASE_ADDR_START64 0x10010 + +/* 64 registers for Input Doorbell - SLI_PKT0_INSTR_BAOFF_DBELL */ +#define CN23XX_SLI_IQ_DOORBELL_START 0x10020 + +/* 64 registers for Input Queue size - SLI_PKT0_INSTR_FIFO_RSIZE */ +#define CN23XX_SLI_IQ_SIZE_START 0x10030 + +/* 64 registers (64-bit) - ES, RO, NS, Arbitration for Input Queue Data & + * gather list fetches. SLI_PKT(0..63)_INPUT_CONTROL. + */ +#define CN23XX_SLI_IQ_PKT_CONTROL_START64 0x10000 + +/*------- Request Queue Macros ---------*/ +#define CN23XX_SLI_IQ_PKT_CONTROL64(iq) \ + (CN23XX_SLI_IQ_PKT_CONTROL_START64 + ((iq) * CN23XX_IQ_OFFSET)) + +#define CN23XX_SLI_IQ_BASE_ADDR64(iq) \ + (CN23XX_SLI_IQ_BASE_ADDR_START64 + ((iq) * CN23XX_IQ_OFFSET)) + +#define CN23XX_SLI_IQ_SIZE(iq) \ + (CN23XX_SLI_IQ_SIZE_START + ((iq) * CN23XX_IQ_OFFSET)) + +#define CN23XX_SLI_IQ_DOORBELL(iq) \ + (CN23XX_SLI_IQ_DOORBELL_START + ((iq) * CN23XX_IQ_OFFSET)) + +#define CN23XX_SLI_IQ_INSTR_COUNT64(iq) \ + (CN23XX_SLI_IQ_INSTR_COUNT_START64 + ((iq) * CN23XX_IQ_OFFSET)) + +/*------------------ Masks ----------------*/ +#define CN23XX_PKT_INPUT_CTL_VF_NUM BIT_ULL(32) +#define CN23XX_PKT_INPUT_CTL_MAC_NUM BIT(29) +/* Number of instructions to be read in one MAC read request. + * setting to Max value(4) + */ +#define CN23XX_PKT_INPUT_CTL_RDSIZE (3 << 25) +#define CN23XX_PKT_INPUT_CTL_IS_64B BIT(24) +#define CN23XX_PKT_INPUT_CTL_RST BIT(23) +#define CN23XX_PKT_INPUT_CTL_QUIET BIT(28) +#define CN23XX_PKT_INPUT_CTL_RING_ENB BIT(22) +#define CN23XX_PKT_INPUT_CTL_DATA_NS BIT(8) +#define CN23XX_PKT_INPUT_CTL_DATA_ES_64B_SWAP BIT(6) +#define CN23XX_PKT_INPUT_CTL_DATA_RO BIT(5) +#define CN23XX_PKT_INPUT_CTL_USE_CSR BIT(4) +#define CN23XX_PKT_INPUT_CTL_GATHER_NS BIT(3) +#define CN23XX_PKT_INPUT_CTL_GATHER_ES_64B_SWAP (2) +#define CN23XX_PKT_INPUT_CTL_GATHER_RO (1) + +/** Rings per Virtual Function **/ +#define CN23XX_PKT_INPUT_CTL_RPVF_MASK (0x3F) +#define CN23XX_PKT_INPUT_CTL_RPVF_POS (48) +/** These bits[47:44] select the Physical function number within the MAC */ +#define CN23XX_PKT_INPUT_CTL_PF_NUM_MASK (0x7) +#define CN23XX_PKT_INPUT_CTL_PF_NUM_POS (45) +/** These bits[43:32] select the function number within the PF */ +#define CN23XX_PKT_INPUT_CTL_VF_NUM_MASK (0x1FFF) +#define CN23XX_PKT_INPUT_CTL_VF_NUM_POS (32) +#define CN23XX_PKT_INPUT_CTL_MAC_NUM_MASK (0x3) +#define CN23XX_PKT_INPUT_CTL_MAC_NUM_POS (29) +#define CN23XX_PKT_IN_DONE_WMARK_MASK (0xFFFFULL) +#define CN23XX_PKT_IN_DONE_WMARK_BIT_POS (32) +#define CN23XX_PKT_IN_DONE_CNT_MASK (0x00000000FFFFFFFFULL) + +#ifdef __LITTLE_ENDIAN_BITFIELD +#define CN23XX_PKT_INPUT_CTL_MASK \ + (CN23XX_PKT_INPUT_CTL_RDSIZE | \ + CN23XX_PKT_INPUT_CTL_DATA_ES_64B_SWAP | \ + CN23XX_PKT_INPUT_CTL_USE_CSR) +#else +#define CN23XX_PKT_INPUT_CTL_MASK \ + (CN23XX_PKT_INPUT_CTL_RDSIZE | \ + CN23XX_PKT_INPUT_CTL_DATA_ES_64B_SWAP | \ + CN23XX_PKT_INPUT_CTL_USE_CSR | \ + CN23XX_PKT_INPUT_CTL_GATHER_ES_64B_SWAP) +#endif + +/** Masks for SLI_PKT_IN_DONE(0..63)_CNTS Register */ +#define CN23XX_IN_DONE_CNTS_PI_INT BIT_ULL(62) +#define CN23XX_IN_DONE_CNTS_CINT_ENB BIT_ULL(48) + +/*############################ OUTPUT QUEUE #########################*/ + +/* 64 registers for Output queue control - SLI_PKT(0..63)_OUTPUT_CONTROL */ +#define CN23XX_SLI_OQ_PKT_CONTROL_START 0x10050 + +/* 64 registers for Output queue buffer and info size - SLI_PKT0_OUT_SIZE */ +#define CN23XX_SLI_OQ0_BUFF_INFO_SIZE 0x10060 + +/* 64 registers for Output Queue Start Addr - SLI_PKT0_SLIST_BADDR */ +#define CN23XX_SLI_OQ_BASE_ADDR_START64 0x10070 + +/* 64 registers for Output Queue Packet Credits - SLI_PKT0_SLIST_BAOFF_DBELL */ +#define CN23XX_SLI_OQ_PKT_CREDITS_START 0x10080 + +/* 64 registers for Output Queue size - SLI_PKT0_SLIST_FIFO_RSIZE */ +#define CN23XX_SLI_OQ_SIZE_START 0x10090 + +/* 64 registers for Output Queue Packet Count - SLI_PKT0_CNTS */ +#define CN23XX_SLI_OQ_PKT_SENT_START 0x100B0 + +/* 64 registers for Output Queue INT Levels - SLI_PKT0_INT_LEVELS */ +#define CN23XX_SLI_OQ_PKT_INT_LEVELS_START64 0x100A0 + +/* Each Output Queue register is at a 16-byte Offset in BAR0 */ +#define CN23XX_OQ_OFFSET 0x20000 + +/* 1 (64-bit register) for Output Queue backpressure across all rings. */ +#define CN23XX_SLI_OQ_WMARK 0x29180 + +/* Global pkt control register */ +#define CN23XX_SLI_GBL_CONTROL 0x29210 + +/* Backpressure enable register for PF0 */ +#define CN23XX_SLI_OUT_BP_EN_W1S 0x29260 + +/* Backpressure enable register for PF1 */ +#define CN23XX_SLI_OUT_BP_EN2_W1S 0x29270 + +/* Backpressure disable register for PF0 */ +#define CN23XX_SLI_OUT_BP_EN_W1C 0x29280 + +/* Backpressure disable register for PF1 */ +#define CN23XX_SLI_OUT_BP_EN2_W1C 0x29290 + +/*------- Output Queue Macros ---------*/ + +#define CN23XX_SLI_OQ_PKT_CONTROL(oq) \ + (CN23XX_SLI_OQ_PKT_CONTROL_START + ((oq) * CN23XX_OQ_OFFSET)) + +#define CN23XX_SLI_OQ_BASE_ADDR64(oq) \ + (CN23XX_SLI_OQ_BASE_ADDR_START64 + ((oq) * CN23XX_OQ_OFFSET)) + +#define CN23XX_SLI_OQ_SIZE(oq) \ + (CN23XX_SLI_OQ_SIZE_START + ((oq) * CN23XX_OQ_OFFSET)) + +#define CN23XX_SLI_OQ_BUFF_INFO_SIZE(oq) \ + (CN23XX_SLI_OQ0_BUFF_INFO_SIZE + ((oq) * CN23XX_OQ_OFFSET)) + +#define CN23XX_SLI_OQ_PKTS_SENT(oq) \ + (CN23XX_SLI_OQ_PKT_SENT_START + ((oq) * CN23XX_OQ_OFFSET)) + +#define CN23XX_SLI_OQ_PKTS_CREDIT(oq) \ + (CN23XX_SLI_OQ_PKT_CREDITS_START + ((oq) * CN23XX_OQ_OFFSET)) + +#define CN23XX_SLI_OQ_PKT_INT_LEVELS(oq) \ + (CN23XX_SLI_OQ_PKT_INT_LEVELS_START64 + \ + ((oq) * CN23XX_OQ_OFFSET)) + +/*Macro's for accessing CNT and TIME separately from INT_LEVELS*/ +#define CN23XX_SLI_OQ_PKT_INT_LEVELS_CNT(oq) \ + (CN23XX_SLI_OQ_PKT_INT_LEVELS_START64 + \ + ((oq) * CN23XX_OQ_OFFSET)) + +#define CN23XX_SLI_OQ_PKT_INT_LEVELS_TIME(oq) \ + (CN23XX_SLI_OQ_PKT_INT_LEVELS_START64 + \ + ((oq) * CN23XX_OQ_OFFSET) + 4) + +/*------------------ Masks ----------------*/ +#define CN23XX_PKT_OUTPUT_CTL_TENB BIT(13) +#define CN23XX_PKT_OUTPUT_CTL_CENB BIT(12) +#define CN23XX_PKT_OUTPUT_CTL_IPTR BIT(11) +#define CN23XX_PKT_OUTPUT_CTL_ES BIT(9) +#define CN23XX_PKT_OUTPUT_CTL_NSR BIT(8) +#define CN23XX_PKT_OUTPUT_CTL_ROR BIT(7) +#define CN23XX_PKT_OUTPUT_CTL_DPTR BIT(6) +#define CN23XX_PKT_OUTPUT_CTL_BMODE BIT(5) +#define CN23XX_PKT_OUTPUT_CTL_ES_P BIT(3) +#define CN23XX_PKT_OUTPUT_CTL_NSR_P BIT(2) +#define CN23XX_PKT_OUTPUT_CTL_ROR_P BIT(1) +#define CN23XX_PKT_OUTPUT_CTL_RING_ENB BIT(0) + +/*######################### Mailbox Reg Macros ########################*/ +#define CN23XX_SLI_PKT_MBOX_INT_START 0x10210 +#define CN23XX_SLI_PKT_PF_VF_MBOX_SIG_START 0x10200 +#define CN23XX_SLI_MAC_PF_MBOX_INT_START 0x27380 + +#define CN23XX_SLI_MBOX_OFFSET 0x20000 +#define CN23XX_SLI_MBOX_SIG_IDX_OFFSET 0x8 + +#define CN23XX_SLI_PKT_MBOX_INT(q) \ + (CN23XX_SLI_PKT_MBOX_INT_START + ((q) * CN23XX_SLI_MBOX_OFFSET)) + +#define CN23XX_SLI_PKT_PF_VF_MBOX_SIG(q, idx) \ + (CN23XX_SLI_PKT_PF_VF_MBOX_SIG_START + \ + ((q) * CN23XX_SLI_MBOX_OFFSET + \ + (idx) * CN23XX_SLI_MBOX_SIG_IDX_OFFSET)) + +#define CN23XX_SLI_MAC_PF_MBOX_INT(mac, pf) \ + (CN23XX_SLI_MAC_PF_MBOX_INT_START + \ + ((mac) * CN23XX_MAC_INT_OFFSET + \ + (pf) * CN23XX_PF_INT_OFFSET)) + +/*######################### DMA Counters #########################*/ + +/* 2 registers (64-bit) - DMA Count - 1 for each DMA counter 0/1. */ +#define CN23XX_DMA_CNT_START 0x28400 + +/* 2 registers (64-bit) - DMA Timer 0/1, contains DMA timer values */ +/* SLI_DMA_0_TIM */ +#define CN23XX_DMA_TIM_START 0x28420 + +/* 2 registers (64-bit) - DMA count & Time Interrupt threshold - + * SLI_DMA_0_INT_LEVEL + */ +#define CN23XX_DMA_INT_LEVEL_START 0x283E0 + +/* Each DMA register is at a 16-byte Offset in BAR0 */ +#define CN23XX_DMA_OFFSET 0x10 + +/*---------- DMA Counter Macros ---------*/ +#define CN23XX_DMA_CNT(dq) \ + (CN23XX_DMA_CNT_START + ((dq) * CN23XX_DMA_OFFSET)) + +#define CN23XX_DMA_INT_LEVEL(dq) \ + (CN23XX_DMA_INT_LEVEL_START + ((dq) * CN23XX_DMA_OFFSET)) + +#define CN23XX_DMA_PKT_INT_LEVEL(dq) \ + (CN23XX_DMA_INT_LEVEL_START + ((dq) * CN23XX_DMA_OFFSET)) + +#define CN23XX_DMA_TIME_INT_LEVEL(dq) \ + (CN23XX_DMA_INT_LEVEL_START + 4 + ((dq) * CN23XX_DMA_OFFSET)) + +#define CN23XX_DMA_TIM(dq) \ + (CN23XX_DMA_TIM_START + ((dq) * CN23XX_DMA_OFFSET)) + +/*######################## MSIX TABLE #########################*/ + +#define CN23XX_MSIX_TABLE_ADDR_START 0x0 +#define CN23XX_MSIX_TABLE_DATA_START 0x8 + +#define CN23XX_MSIX_TABLE_SIZE 0x10 +#define CN23XX_MSIX_TABLE_ENTRIES 0x41 + +#define CN23XX_MSIX_ENTRY_VECTOR_CTL BIT_ULL(32) + +#define CN23XX_MSIX_TABLE_ADDR(idx) \ + (CN23XX_MSIX_TABLE_ADDR_START + ((idx) * CN23XX_MSIX_TABLE_SIZE)) + +#define CN23XX_MSIX_TABLE_DATA(idx) \ + (CN23XX_MSIX_TABLE_DATA_START + ((idx) * CN23XX_MSIX_TABLE_SIZE)) + +/*######################## INTERRUPTS #########################*/ +#define CN23XX_MAC_INT_OFFSET 0x20 +#define CN23XX_PF_INT_OFFSET 0x10 + +/* 1 register (64-bit) for Interrupt Summary */ +#define CN23XX_SLI_INT_SUM64 0x27000 + +/* 4 registers (64-bit) for Interrupt Enable for each Port */ +#define CN23XX_SLI_INT_ENB64 0x27080 + +#define CN23XX_SLI_MAC_PF_INT_SUM64(mac, pf) \ + (CN23XX_SLI_INT_SUM64 + \ + ((mac) * CN23XX_MAC_INT_OFFSET) + \ + ((pf) * CN23XX_PF_INT_OFFSET)) + +#define CN23XX_SLI_MAC_PF_INT_ENB64(mac, pf) \ + (CN23XX_SLI_INT_ENB64 + \ + ((mac) * CN23XX_MAC_INT_OFFSET) + \ + ((pf) * CN23XX_PF_INT_OFFSET)) + +/* 1 register (64-bit) to indicate which Output Queue reached pkt threshold */ +#define CN23XX_SLI_PKT_CNT_INT 0x29130 + +/* 1 register (64-bit) to indicate which Output Queue reached time threshold */ +#define CN23XX_SLI_PKT_TIME_INT 0x29140 + +/*------------------ Interrupt Masks ----------------*/ + +#define CN23XX_INTR_PO_INT BIT_ULL(63) +#define CN23XX_INTR_PI_INT BIT_ULL(62) +#define CN23XX_INTR_MBOX_INT BIT_ULL(61) +#define CN23XX_INTR_RESEND BIT_ULL(60) + +#define CN23XX_INTR_CINT_ENB BIT_ULL(48) +#define CN23XX_INTR_MBOX_ENB BIT(0) + +#define CN23XX_INTR_RML_TIMEOUT_ERR (1) + +#define CN23XX_INTR_MIO_INT BIT(1) + +#define CN23XX_INTR_RESERVED1 (3 << 2) + +#define CN23XX_INTR_PKT_COUNT BIT(4) +#define CN23XX_INTR_PKT_TIME BIT(5) + +#define CN23XX_INTR_RESERVED2 (3 << 6) + +#define CN23XX_INTR_M0UPB0_ERR BIT(8) +#define CN23XX_INTR_M0UPWI_ERR BIT(9) +#define CN23XX_INTR_M0UNB0_ERR BIT(10) +#define CN23XX_INTR_M0UNWI_ERR BIT(11) + +#define CN23XX_INTR_RESERVED3 (0xFFFFFULL << 12) + +#define CN23XX_INTR_DMA0_FORCE BIT_ULL(32) +#define CN23XX_INTR_DMA1_FORCE BIT_ULL(33) + +#define CN23XX_INTR_DMA0_COUNT BIT_ULL(34) +#define CN23XX_INTR_DMA1_COUNT BIT_ULL(35) + +#define CN23XX_INTR_DMA0_TIME BIT_ULL(36) +#define CN23XX_INTR_DMA1_TIME BIT_ULL(37) + +#define CN23XX_INTR_RESERVED4 (0x7FFFFULL << 38) + +#define CN23XX_INTR_VF_MBOX BIT_ULL(57) +#define CN23XX_INTR_DMAVF_ERR BIT_ULL(58) +#define CN23XX_INTR_DMAPF_ERR BIT_ULL(59) + +#define CN23XX_INTR_PKTVF_ERR BIT_ULL(60) +#define CN23XX_INTR_PKTPF_ERR BIT_ULL(61) +#define CN23XX_INTR_PPVF_ERR BIT_ULL(62) +#define CN23XX_INTR_PPPF_ERR BIT_ULL(63) + +#define CN23XX_INTR_DMA0_DATA (CN23XX_INTR_DMA0_TIME) +#define CN23XX_INTR_DMA1_DATA (CN23XX_INTR_DMA1_TIME) + +#define CN23XX_INTR_DMA_DATA \ + (CN23XX_INTR_DMA0_DATA | CN23XX_INTR_DMA1_DATA) + +/* By fault only TIME based */ +#define CN23XX_INTR_PKT_DATA (CN23XX_INTR_PKT_TIME) +/* For both COUNT and TIME based */ +/* #define CN23XX_INTR_PKT_DATA \ + * (CN23XX_INTR_PKT_COUNT | CN23XX_INTR_PKT_TIME) + */ + +/* Sum of interrupts for all PCI-Express Data Interrupts */ +#define CN23XX_INTR_PCIE_DATA \ + (CN23XX_INTR_DMA_DATA | CN23XX_INTR_PKT_DAT) + +/* Sum of interrupts for error events */ +#define CN23XX_INTR_ERR \ + (CN23XX_INTR_M0UPB0_ERR | \ + CN23XX_INTR_M0UPWI_ERR | \ + CN23XX_INTR_M0UNB0_ERR | \ + CN23XX_INTR_M0UNWI_ERR | \ + CN23XX_INTR_DMAVF_ERR | \ + CN23XX_INTR_DMAPF_ERR | \ + CN23XX_INTR_PKTPF_ERR | \ + CN23XX_INTR_PPPF_ERR | \ + CN23XX_INTR_PPVF_ERR) + +/* Programmed Mask for Interrupt Sum */ +#define CN23XX_INTR_MASK \ + (CN23XX_INTR_DMA_DATA | \ + CN23XX_INTR_DMA0_FORCE | \ + CN23XX_INTR_DMA1_FORCE | \ + CN23XX_INTR_MIO_INT | \ + CN23XX_INTR_ERR) + +/* 4 Registers (64 - bit) */ +#define CN23XX_SLI_S2M_PORT_CTL_START 0x23D80 +#define CN23XX_SLI_S2M_PORTX_CTL(port) \ + (CN23XX_SLI_S2M_PORT_CTL_START + (port * 0x10)) + +#define CN23XX_SLI_MAC_NUMBER 0x20050 + +/** PEM(0..3)_BAR1_INDEX(0..15)address is defined as + * addr = (0x00011800C0000100 |port <<24 |idx <<3 ) + * Here, port is PEM(0..3) & idx is INDEX(0..15) + */ +#define CN23XX_PEM_BAR1_INDEX_START 0x00011800C0000100ULL +#define CN23XX_PEM_OFFSET 24 +#define CN23XX_BAR1_INDEX_OFFSET 3 + +#define CN23XX_PEM_BAR1_INDEX_REG(port, idx) \ + (CN23XX_PEM_BAR1_INDEX_START + ((port) << CN23XX_PEM_OFFSET) + \ + ((idx) << CN23XX_BAR1_INDEX_OFFSET)) + +/*############################ DPI #########################*/ + +/* 1 register (64-bit) - provides DMA Enable */ +#define CN23XX_DPI_CTL 0x0001df0000000040ULL + +/* 1 register (64-bit) - Controls the DMA IO Operation */ +#define CN23XX_DPI_DMA_CONTROL 0x0001df0000000048ULL + +/* 1 register (64-bit) - Provides DMA Instr'n Queue Enable */ +#define CN23XX_DPI_REQ_GBL_ENB 0x0001df0000000050ULL + +/* 1 register (64-bit) - DPI_REQ_ERR_RSP + * Indicates which Instr'n Queue received error response from the IO sub-system + */ +#define CN23XX_DPI_REQ_ERR_RSP 0x0001df0000000058ULL + +/* 1 register (64-bit) - DPI_REQ_ERR_RST + * Indicates which Instr'n Queue dropped an Instr'n + */ +#define CN23XX_DPI_REQ_ERR_RST 0x0001df0000000060ULL + +/* 6 register (64-bit) - DPI_DMA_ENG(0..5)_EN + * Provides DMA Engine Queue Enable + */ +#define CN23XX_DPI_DMA_ENG0_ENB 0x0001df0000000080ULL +#define CN23XX_DPI_DMA_ENG_ENB(eng) (CN23XX_DPI_DMA_ENG0_ENB + (eng * 8)) + +/* 8 register (64-bit) - DPI_DMA(0..7)_REQQ_CTL + * Provides control bits for transaction on 8 Queues + */ +#define CN23XX_DPI_DMA_REQQ0_CTL 0x0001df0000000180ULL +#define CN23XX_DPI_DMA_REQQ_CTL(q_no) \ + (CN23XX_DPI_DMA_REQQ0_CTL + (q_no * 8)) + +/* 6 register (64-bit) - DPI_ENG(0..5)_BUF + * Provides DMA Engine FIFO (Queue) Size + */ +#define CN23XX_DPI_DMA_ENG0_BUF 0x0001df0000000880ULL +#define CN23XX_DPI_DMA_ENG_BUF(eng) \ + (CN23XX_DPI_DMA_ENG0_BUF + (eng * 8)) + +/* 4 Registers (64-bit) */ +#define CN23XX_DPI_SLI_PRT_CFG_START 0x0001df0000000900ULL +#define CN23XX_DPI_SLI_PRTX_CFG(port) \ + (CN23XX_DPI_SLI_PRT_CFG_START + (port * 0x8)) + +/* Masks for DPI_DMA_CONTROL Register */ +#define CN23XX_DPI_DMA_COMMIT_MODE BIT_ULL(58) +#define CN23XX_DPI_DMA_PKT_EN BIT_ULL(56) +#define CN23XX_DPI_DMA_ENB (0x0FULL << 48) +/* Set the DMA Control, to update packet count not byte count sent by DMA, + * when we use Interrupt Coalescing (CA mode) + */ +#define CN23XX_DPI_DMA_O_ADD1 BIT(19) +/*selecting 64-bit Byte Swap Mode */ +#define CN23XX_DPI_DMA_O_ES BIT(15) +#define CN23XX_DPI_DMA_O_MODE BIT(14) + +#define CN23XX_DPI_DMA_CTL_MASK \ + (CN23XX_DPI_DMA_COMMIT_MODE | \ + CN23XX_DPI_DMA_PKT_EN | \ + CN23XX_DPI_DMA_O_ES | \ + CN23XX_DPI_DMA_O_MODE) + +/*############################ RST #########################*/ + +#define CN23XX_RST_BOOT 0x0001180006001600ULL +#define CN23XX_RST_SOFT_RST 0x0001180006001680ULL + +#define CN23XX_LMC0_RESET_CTL 0x0001180088000180ULL +#define CN23XX_LMC0_RESET_CTL_DDR3RST_MASK 0x0000000000000001ULL + +#endif From e86b1ab6866690691f19a456f24fef8e633e5546 Mon Sep 17 00:00:00 2001 From: Raghu Vatsavayi Date: Wed, 31 Aug 2016 11:03:24 -0700 Subject: [PATCH 0607/1715] liquidio: CN23XX queue definitions Add support for cn23xx specific queue definitions and features. Signed-off-by: Derek Chickles Signed-off-by: Satanand Burla Signed-off-by: Felix Manlunas Signed-off-by: Raghu Vatsavayi Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/Kconfig | 2 +- .../cavium/liquidio/cn23xx_pf_device.h | 48 ++++++ .../net/ethernet/cavium/liquidio/lio_main.c | 47 ++++-- .../ethernet/cavium/liquidio/octeon_config.h | 56 ++++++- .../ethernet/cavium/liquidio/octeon_device.c | 142 +++++++++++++++++- .../ethernet/cavium/liquidio/octeon_device.h | 24 ++- 6 files changed, 286 insertions(+), 33 deletions(-) create mode 100644 drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h diff --git a/drivers/net/ethernet/cavium/Kconfig b/drivers/net/ethernet/cavium/Kconfig index e1b78b500309..92f411c9f0df 100644 --- a/drivers/net/ethernet/cavium/Kconfig +++ b/drivers/net/ethernet/cavium/Kconfig @@ -58,7 +58,7 @@ config LIQUIDIO select LIBCRC32C ---help--- This driver supports Cavium LiquidIO Intelligent Server Adapters - based on CN66XX and CN68XX chips. + based on CN66XX, CN68XX and CN23XX chips. To compile this driver as a module, choose M here: the module will be called liquidio. This is recommended. diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h new file mode 100644 index 000000000000..e07d5306c6dc --- /dev/null +++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h @@ -0,0 +1,48 @@ +/********************************************************************** +* Author: Cavium, Inc. +* +* Contact: support@cavium.com +* Please include "LiquidIO" in the subject. +* +* Copyright (c) 2003-2015 Cavium, Inc. +* +* This file is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License, Version 2, as +* published by the Free Software Foundation. +* +* This file is distributed in the hope that it will be useful, but +* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty +* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or +* NONINFRINGEMENT. See the GNU General Public License for more +* details. +* +* This file may also be available under a different license from Cavium. +* Contact Cavium, Inc. for more information +**********************************************************************/ + +/*! \file cn23xx_device.h + * \brief Host Driver: Routines that perform CN23XX specific operations. +*/ + +#ifndef __CN23XX_PF_DEVICE_H__ +#define __CN23XX_PF_DEVICE_H__ + +#include "cn23xx_pf_regs.h" + +/* Register address and configuration for a CN23XX devices. + * If device specific changes need to be made then add a struct to include + * device specific fields as shown in the commented section + */ +struct octeon_cn23xx_pf { + /** PCI interrupt summary register */ + u8 __iomem *intr_sum_reg64; + + /** PCI interrupt enable register */ + u8 __iomem *intr_enb_reg64; + + /** The PCI interrupt mask used by interrupt handler */ + u64 intr_mask64; + + struct octeon_config *conf; +}; +#endif diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index 8f11a0b4a7ad..7ef073825160 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -474,6 +474,9 @@ static const struct pci_device_id liquidio_pci_tbl[] = { { /* 66xx */ PCI_VENDOR_ID_CAVIUM, 0x92, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, + { /* 23xx pf */ + PCI_VENDOR_ID_CAVIUM, 0x9702, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 + }, { 0, 0, 0, 0, 0, 0, 0 } @@ -491,7 +494,6 @@ static struct pci_driver liquidio_pci_driver = { .suspend = liquidio_suspend, .resume = liquidio_resume, #endif - }; /** @@ -3268,15 +3270,24 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) vdata->minor = cpu_to_be16(LIQUIDIO_BASE_MINOR_VERSION); vdata->micro = cpu_to_be16(LIQUIDIO_BASE_MICRO_VERSION); - num_iqueues = - CFG_GET_NUM_TXQS_NIC_IF(octeon_get_conf(octeon_dev), i); - num_oqueues = - CFG_GET_NUM_RXQS_NIC_IF(octeon_get_conf(octeon_dev), i); - base_queue = - CFG_GET_BASE_QUE_NIC_IF(octeon_get_conf(octeon_dev), i); - gmx_port_id = - CFG_GET_GMXID_NIC_IF(octeon_get_conf(octeon_dev), i); - ifidx_or_pfnum = i; + if (OCTEON_CN23XX_PF(octeon_dev)) { + num_iqueues = octeon_dev->sriov_info.num_pf_rings; + num_oqueues = octeon_dev->sriov_info.num_pf_rings; + base_queue = octeon_dev->sriov_info.pf_srn; + + gmx_port_id = octeon_dev->pf_num; + ifidx_or_pfnum = octeon_dev->pf_num; + } else { + num_iqueues = CFG_GET_NUM_TXQS_NIC_IF( + octeon_get_conf(octeon_dev), i); + num_oqueues = CFG_GET_NUM_RXQS_NIC_IF( + octeon_get_conf(octeon_dev), i); + base_queue = CFG_GET_BASE_QUE_NIC_IF( + octeon_get_conf(octeon_dev), i); + gmx_port_id = CFG_GET_GMXID_NIC_IF( + octeon_get_conf(octeon_dev), i); + ifidx_or_pfnum = i; + } dev_dbg(&octeon_dev->pci_dev->dev, "requesting config for interface %d, iqs %d, oqs %d\n", @@ -3380,12 +3391,16 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) lio->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE); - lio->dev_capability = NETIF_F_HIGHDMA - | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM - | NETIF_F_SG | NETIF_F_RXCSUM - | NETIF_F_GRO - | NETIF_F_TSO | NETIF_F_TSO6 - | NETIF_F_LRO; + if (OCTEON_CN23XX_PF(octeon_dev) || + OCTEON_CN6XXX(octeon_dev)) { + lio->dev_capability = NETIF_F_HIGHDMA + | NETIF_F_IP_CSUM + | NETIF_F_IPV6_CSUM + | NETIF_F_SG | NETIF_F_RXCSUM + | NETIF_F_GRO + | NETIF_F_TSO | NETIF_F_TSO6 + | NETIF_F_LRO; + } netif_set_gso_max_size(netdev, OCTNIC_GSO_MAX_SIZE); /* Copy of transmit encapsulation capabilities: diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_config.h b/drivers/net/ethernet/cavium/liquidio/octeon_config.h index b3396e3a8bab..12ded6a4e7ef 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_config.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_config.h @@ -64,6 +64,34 @@ #define DEFAULT_NUM_NIC_PORTS_68XX 4 #define DEFAULT_NUM_NIC_PORTS_68XX_210NV 2 +/* CN23xx IQ configuration macros */ +#define CN23XX_MAX_RINGS_PER_PF_PASS_1_0 12 +#define CN23XX_MAX_RINGS_PER_PF_PASS_1_1 32 +#define CN23XX_MAX_RINGS_PER_PF 64 + +#define CN23XX_MAX_INPUT_QUEUES CN23XX_MAX_RINGS_PER_PF +#define CN23XX_MAX_IQ_DESCRIPTORS 2048 +#define CN23XX_DB_MIN 1 +#define CN23XX_DB_MAX 8 +#define CN23XX_DB_TIMEOUT 1 + +#define CN23XX_MAX_OUTPUT_QUEUES CN23XX_MAX_RINGS_PER_PF +#define CN23XX_MAX_OQ_DESCRIPTORS 2048 +#define CN23XX_OQ_BUF_SIZE 1536 +#define CN23XX_OQ_PKTSPER_INTR 128 +/*#define CAVIUM_ONLY_CN23XX_RX_PERF*/ +#define CN23XX_OQ_REFIL_THRESHOLD 128 + +#define CN23XX_OQ_INTR_PKT 64 +#define CN23XX_OQ_INTR_TIME 100 +#define DEFAULT_NUM_NIC_PORTS_23XX 1 + +#define CN23XX_CFG_IO_QUEUES CN23XX_MAX_RINGS_PER_PF +/* PEMs count */ +#define CN23XX_MAX_MACS 4 + +#define CN23XX_DEF_IQ_INTR_THRESHOLD 32 +#define CN23XX_DEF_IQ_INTR_BYTE_THRESHOLD (64 * 1024) /* common OCTEON configuration macros */ #define CN6XXX_CFG_IO_QUEUES 32 #define OCTEON_32BYTE_INSTR 32 @@ -140,19 +168,24 @@ enum lio_card_type { LIO_210SV = 0, /* Two port, 66xx */ LIO_210NV, /* Two port, 68xx */ - LIO_410NV /* Four port, 68xx */ + LIO_410NV, /* Four port, 68xx */ + LIO_23XX /* 23xx */ }; #define LIO_210SV_NAME "210sv" #define LIO_210NV_NAME "210nv" #define LIO_410NV_NAME "410nv" +#define LIO_23XX_NAME "23xx" /** Structure to define the configuration attributes for each Input queue. * Applicable to all Octeon processors **/ struct octeon_iq_config { #ifdef __BIG_ENDIAN_BITFIELD - u64 reserved:32; + u64 reserved:16; + + /** Tx interrupt packets. Applicable to 23xx only */ + u64 iq_intr_pkt:16; /** Minimum ticks to wait before checking for pending instructions. */ u64 db_timeout:16; @@ -192,7 +225,10 @@ struct octeon_iq_config { /** Minimum ticks to wait before checking for pending instructions. */ u64 db_timeout:16; - u64 reserved:32; + /** Tx interrupt packets. Applicable to 23xx only */ + u64 iq_intr_pkt:16; + + u64 reserved:16; #endif }; @@ -416,11 +452,15 @@ struct octeon_config { #define DISPATCH_LIST_SIZE BIT(OPCODE_MASK_BITS) /* Maximum number of Octeon Instruction (command) queues */ -#define MAX_OCTEON_INSTR_QUEUES(oct) CN6XXX_MAX_INPUT_QUEUES -/* Maximum number of Octeon Output queues */ -#define MAX_OCTEON_OUTPUT_QUEUES(oct) CN6XXX_MAX_OUTPUT_QUEUES +#define MAX_OCTEON_INSTR_QUEUES(oct) \ + (OCTEON_CN23XX_PF(oct) ? CN23XX_MAX_INPUT_QUEUES : \ + CN6XXX_MAX_INPUT_QUEUES) -#define MAX_POSSIBLE_OCTEON_INSTR_QUEUES CN6XXX_MAX_INPUT_QUEUES -#define MAX_POSSIBLE_OCTEON_OUTPUT_QUEUES CN6XXX_MAX_OUTPUT_QUEUES +/* Maximum number of Octeon Instruction (command) queues */ +#define MAX_OCTEON_OUTPUT_QUEUES(oct) \ + (OCTEON_CN23XX_PF(oct) ? CN23XX_MAX_OUTPUT_QUEUES : \ + CN6XXX_MAX_OUTPUT_QUEUES) +#define MAX_POSSIBLE_OCTEON_INSTR_QUEUES CN23XX_MAX_INPUT_QUEUES +#define MAX_POSSIBLE_OCTEON_OUTPUT_QUEUES CN23XX_MAX_OUTPUT_QUEUES #endif /* __OCTEON_CONFIG_H__ */ diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.c b/drivers/net/ethernet/cavium/liquidio/octeon_device.c index 541137a60145..120b78e8bc01 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.c @@ -31,6 +31,7 @@ #include "octeon_network.h" #include "cn66xx_regs.h" #include "cn66xx_device.h" +#include "cn23xx_pf_device.h" /** Default configuration * for CN66XX OCTEON Models. @@ -417,6 +418,108 @@ static struct octeon_config default_cn68xx_210nv_conf = { , }; +static struct octeon_config default_cn23xx_conf = { + .card_type = LIO_23XX, + .card_name = LIO_23XX_NAME, + /** IQ attributes */ + .iq = { + .max_iqs = CN23XX_CFG_IO_QUEUES, + .pending_list_size = (CN23XX_MAX_IQ_DESCRIPTORS * + CN23XX_CFG_IO_QUEUES), + .instr_type = OCTEON_64BYTE_INSTR, + .db_min = CN23XX_DB_MIN, + .db_timeout = CN23XX_DB_TIMEOUT, + .iq_intr_pkt = CN23XX_DEF_IQ_INTR_THRESHOLD, + }, + + /** OQ attributes */ + .oq = { + .max_oqs = CN23XX_CFG_IO_QUEUES, + .info_ptr = OCTEON_OQ_INFOPTR_MODE, + .pkts_per_intr = CN23XX_OQ_PKTSPER_INTR, + .refill_threshold = CN23XX_OQ_REFIL_THRESHOLD, + .oq_intr_pkt = CN23XX_OQ_INTR_PKT, + .oq_intr_time = CN23XX_OQ_INTR_TIME, + }, + + .num_nic_ports = DEFAULT_NUM_NIC_PORTS_23XX, + .num_def_rx_descs = CN23XX_MAX_OQ_DESCRIPTORS, + .num_def_tx_descs = CN23XX_MAX_IQ_DESCRIPTORS, + .def_rx_buf_size = CN23XX_OQ_BUF_SIZE, + + /* For ethernet interface 0: Port cfg Attributes */ + .nic_if_cfg[0] = { + /* Max Txqs: Half for each of the two ports :max_iq/2 */ + .max_txqs = MAX_TXQS_PER_INTF, + + /* Actual configured value. Range could be: 1...max_txqs */ + .num_txqs = DEF_TXQS_PER_INTF, + + /* Max Rxqs: Half for each of the two ports :max_oq/2 */ + .max_rxqs = MAX_RXQS_PER_INTF, + + /* Actual configured value. Range could be: 1...max_rxqs */ + .num_rxqs = DEF_RXQS_PER_INTF, + + /* Num of desc for rx rings */ + .num_rx_descs = CN23XX_MAX_OQ_DESCRIPTORS, + + /* Num of desc for tx rings */ + .num_tx_descs = CN23XX_MAX_IQ_DESCRIPTORS, + + /* SKB size, We need not change buf size even for Jumbo frames. + * Octeon can send jumbo frames in 4 consecutive descriptors, + */ + .rx_buf_size = CN23XX_OQ_BUF_SIZE, + + .base_queue = BASE_QUEUE_NOT_REQUESTED, + + .gmx_port_id = 0, + }, + + .nic_if_cfg[1] = { + /* Max Txqs: Half for each of the two ports :max_iq/2 */ + .max_txqs = MAX_TXQS_PER_INTF, + + /* Actual configured value. Range could be: 1...max_txqs */ + .num_txqs = DEF_TXQS_PER_INTF, + + /* Max Rxqs: Half for each of the two ports :max_oq/2 */ + .max_rxqs = MAX_RXQS_PER_INTF, + + /* Actual configured value. Range could be: 1...max_rxqs */ + .num_rxqs = DEF_RXQS_PER_INTF, + + /* Num of desc for rx rings */ + .num_rx_descs = CN23XX_MAX_OQ_DESCRIPTORS, + + /* Num of desc for tx rings */ + .num_tx_descs = CN23XX_MAX_IQ_DESCRIPTORS, + + /* SKB size, We need not change buf size even for Jumbo frames. + * Octeon can send jumbo frames in 4 consecutive descriptors, + */ + .rx_buf_size = CN23XX_OQ_BUF_SIZE, + + .base_queue = BASE_QUEUE_NOT_REQUESTED, + + .gmx_port_id = 1, + }, + + .misc = { + /* Host driver link query interval */ + .oct_link_query_interval = 100, + + /* Octeon link query interval */ + .host_link_query_interval = 500, + + .enable_sli_oq_bp = 0, + + /* Control queue group */ + .ctrlq_grp = 1, + } +}; + enum { OCTEON_CONFIG_TYPE_DEFAULT = 0, NUM_OCTEON_CONFS, @@ -484,6 +587,8 @@ static void *__retrieve_octeon_config_info(struct octeon_device *oct, } else if ((oct->chip_id == OCTEON_CN68XX) && (card_type == LIO_410NV)) { ret = (void *)&default_cn68xx_conf; + } else if (oct->chip_id == OCTEON_CN23XX_PF_VID) { + ret = (void *)&default_cn23xx_conf; } break; default: @@ -498,7 +603,8 @@ static int __verify_octeon_config_info(struct octeon_device *oct, void *conf) case OCTEON_CN66XX: case OCTEON_CN68XX: return lio_validate_cn6xxx_config_info(oct, conf); - + case OCTEON_CN23XX_PF_VID: + return 0; default: break; } @@ -572,6 +678,9 @@ static struct octeon_device *octeon_allocate_device_mem(u32 pci_id, configsize = sizeof(struct octeon_cn6xxx); break; + case OCTEON_CN23XX_PF_VID: + configsize = sizeof(struct octeon_cn23xx_pf); + break; default: pr_err("%s: Unknown PCI Device: 0x%x\n", __func__, @@ -649,6 +758,9 @@ int octeon_setup_instr_queues(struct octeon_device *oct) if (OCTEON_CN6XXX(oct)) num_descs = CFG_GET_NUM_DEF_TX_DESCS(CHIP_FIELD(oct, cn6xxx, conf)); + else if (OCTEON_CN23XX_PF(oct)) + num_descs = CFG_GET_NUM_DEF_TX_DESCS(CHIP_FIELD(oct, cn23xx_pf, + conf)); oct->num_iqs = 0; @@ -690,8 +802,12 @@ int octeon_setup_output_queues(struct octeon_device *oct) CFG_GET_NUM_DEF_RX_DESCS(CHIP_FIELD(oct, cn6xxx, conf)); desc_size = CFG_GET_DEF_RX_BUF_SIZE(CHIP_FIELD(oct, cn6xxx, conf)); + } else if (OCTEON_CN23XX_PF(oct)) { + num_descs = CFG_GET_NUM_DEF_RX_DESCS(CHIP_FIELD(oct, cn23xx_pf, + conf)); + desc_size = CFG_GET_DEF_RX_BUF_SIZE(CHIP_FIELD(oct, cn23xx_pf, + conf)); } - oct->num_oqs = 0; oct->droq[0] = vmalloc_node(sizeof(*oct->droq[0]), numa_node); if (!oct->droq[0]) @@ -915,6 +1031,9 @@ int octeon_core_drv_init(struct octeon_recv_info *recv_info, void *buf) if (OCTEON_CN6XXX(oct)) num_nic_ports = CFG_GET_NUM_NIC_PORTS(CHIP_FIELD(oct, cn6xxx, conf)); + else if (OCTEON_CN23XX_PF(oct)) + num_nic_ports = + CFG_GET_NUM_NIC_PORTS(CHIP_FIELD(oct, cn23xx_pf, conf)); if (atomic_read(&oct->status) >= OCT_DEV_RUNNING) { dev_err(&oct->pci_dev->dev, "Received CORE OK when device state is 0x%x\n", @@ -1004,8 +1123,10 @@ struct octeon_config *octeon_get_conf(struct octeon_device *oct) if (OCTEON_CN6XXX(oct)) { default_oct_conf = (struct octeon_config *)(CHIP_FIELD(oct, cn6xxx, conf)); + } else if (OCTEON_CN23XX_PF(oct)) { + default_oct_conf = (struct octeon_config *) + (CHIP_FIELD(oct, cn23xx_pf, conf)); } - return default_oct_conf; } @@ -1037,7 +1158,9 @@ u64 lio_pci_readq(struct octeon_device *oct, u64 addr) * So write MSB first */ addrhi = (addr >> 32); - if ((oct->chip_id == OCTEON_CN66XX) || (oct->chip_id == OCTEON_CN68XX)) + if ((oct->chip_id == OCTEON_CN66XX) || + (oct->chip_id == OCTEON_CN68XX) || + (oct->chip_id == OCTEON_CN23XX_PF_VID)) addrhi |= 0x00060000; writel(addrhi, oct->reg_list.pci_win_rd_addr_hi); @@ -1081,8 +1204,15 @@ int octeon_mem_access_ok(struct octeon_device *oct) u64 lmc0_reset_ctl; /* Check to make sure a DDR interface is enabled */ - lmc0_reset_ctl = lio_pci_readq(oct, CN6XXX_LMC0_RESET_CTL); - access_okay = (lmc0_reset_ctl & CN6XXX_LMC0_RESET_CTL_DDR3RST_MASK); + if (OCTEON_CN23XX_PF(oct)) { + lmc0_reset_ctl = lio_pci_readq(oct, CN23XX_LMC0_RESET_CTL); + access_okay = + (lmc0_reset_ctl & CN23XX_LMC0_RESET_CTL_DDR3RST_MASK); + } else { + lmc0_reset_ctl = lio_pci_readq(oct, CN6XXX_LMC0_RESET_CTL); + access_okay = + (lmc0_reset_ctl & CN6XXX_LMC0_RESET_CTL_DDR3RST_MASK); + } return access_okay ? 0 : 1; } diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.h b/drivers/net/ethernet/cavium/liquidio/octeon_device.h index 02e9854477a1..f10cdd45729e 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.h @@ -30,13 +30,19 @@ /** PCI VendorId Device Id */ #define OCTEON_CN68XX_PCIID 0x91177d #define OCTEON_CN66XX_PCIID 0x92177d - +#define OCTEON_CN23XX_PCIID_PF 0x9702177d /** Driver identifies chips by these Ids, created by clubbing together * DeviceId+RevisionId; Where Revision Id is not used to distinguish * between chips, a value of 0 is used for revision id. */ #define OCTEON_CN68XX 0x0091 #define OCTEON_CN66XX 0x0092 +#define OCTEON_CN23XX_PF_VID 0x9702 + +/**RevisionId for the chips */ +#define OCTEON_CN23XX_REV_1_0 0x00 +#define OCTEON_CN23XX_REV_1_1 0x01 +#define OCTEON_CN23XX_REV_2_0 0x80 /** Endian-swap modes supported by Octeon. */ enum octeon_pci_swap_mode { @@ -270,6 +276,17 @@ struct octdev_props { struct net_device *netdev; }; +struct octeon_sriov_info { + /* Actual rings left for PF device */ + u32 num_pf_rings; + + /* SRN of PF usable IO queues */ + u32 pf_srn; + /* total pf rings */ + u32 trs; + +}; + /** The Octeon device. * Each Octeon device has this structure to represent all its * components. @@ -295,7 +312,7 @@ struct octeon_device { /** Octeon Chip type. */ u16 chip_id; u16 rev_id; - + u16 pf_num; /** This device's id - set by the driver. */ u32 octeon_id; @@ -394,6 +411,8 @@ struct octeon_device { void *priv; + struct octeon_sriov_info sriov_info; + int rx_pause; int tx_pause; @@ -407,6 +426,7 @@ struct octeon_device { #define OCT_DRV_OFFLINE 2 #define OCTEON_CN6XXX(oct) ((oct->chip_id == OCTEON_CN66XX) || \ (oct->chip_id == OCTEON_CN68XX)) +#define OCTEON_CN23XX_PF(oct) (oct->chip_id == OCTEON_CN23XX_PF_VID) #define CHIP_FIELD(oct, TYPE, field) \ (((struct octeon_ ## TYPE *)(oct->chip))->field) From 72c0091293c0082e78fe6110058f808244bd574d Mon Sep 17 00:00:00 2001 From: Raghu Vatsavayi Date: Wed, 31 Aug 2016 11:03:25 -0700 Subject: [PATCH 0608/1715] liquidio: CN23XX device init and sriov config Add support for cn23xx device init and sriov queue config. Signed-off-by: Derek Chickles Signed-off-by: Satanand Burla Signed-off-by: Felix Manlunas Signed-off-by: Raghu Vatsavayi Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/liquidio/Makefile | 1 + .../cavium/liquidio/cn23xx_pf_device.c | 527 ++++++++++++++++++ .../cavium/liquidio/cn23xx_pf_device.h | 7 + .../net/ethernet/cavium/liquidio/lio_main.c | 10 +- 4 files changed, 544 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c diff --git a/drivers/net/ethernet/cavium/liquidio/Makefile b/drivers/net/ethernet/cavium/liquidio/Makefile index d44111df8452..5a27b2a44039 100644 --- a/drivers/net/ethernet/cavium/liquidio/Makefile +++ b/drivers/net/ethernet/cavium/liquidio/Makefile @@ -10,6 +10,7 @@ liquidio-$(CONFIG_LIQUIDIO) += lio_ethtool.o \ octeon_device.o \ cn66xx_device.o \ cn68xx_device.o \ + cn23xx_pf_device.o \ octeon_mem_ops.o \ octeon_droq.o \ octeon_nic.o diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c new file mode 100644 index 000000000000..ccc3d5b54fab --- /dev/null +++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c @@ -0,0 +1,527 @@ +/********************************************************************** +* Author: Cavium, Inc. +* +* Contact: support@cavium.com +* Please include "LiquidIO" in the subject. +* +* Copyright (c) 2003-2015 Cavium, Inc. +* +* This file is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License, Version 2, as +* published by the Free Software Foundation. +* +* This file is distributed in the hope that it will be useful, but +* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty +* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or +* NONINFRINGEMENT. See the GNU General Public License for more +* details. +* +* This file may also be available under a different license from Cavium. +* Contact Cavium, Inc. for more information +**********************************************************************/ + +#include +#include +#include +#include "liquidio_common.h" +#include "octeon_droq.h" +#include "octeon_iq.h" +#include "response_manager.h" +#include "octeon_device.h" +#include "cn23xx_pf_device.h" +#include "octeon_main.h" + +#define RESET_NOTDONE 0 +#define RESET_DONE 1 + +/* Change the value of SLI Packet Input Jabber Register to allow + * VXLAN TSO packets which can be 64424 bytes, exceeding the + * MAX_GSO_SIZE we supplied to the kernel + */ +#define CN23XX_INPUT_JABBER 64600 + +#define LIOLUT_RING_DISTRIBUTION 9 +const int liolut_num_vfs_to_rings_per_vf[LIOLUT_RING_DISTRIBUTION] = { + 0, 8, 4, 2, 2, 2, 1, 1, 1 +}; + +void cn23xx_dump_pf_initialized_regs(struct octeon_device *oct) +{ + int i = 0; + u32 regval = 0; + struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip; + + /*In cn23xx_soft_reset*/ + dev_dbg(&oct->pci_dev->dev, "%s[%llx] : 0x%llx\n", + "CN23XX_WIN_WR_MASK_REG", CVM_CAST64(CN23XX_WIN_WR_MASK_REG), + CVM_CAST64(octeon_read_csr64(oct, CN23XX_WIN_WR_MASK_REG))); + dev_dbg(&oct->pci_dev->dev, "%s[%llx] : 0x%016llx\n", + "CN23XX_SLI_SCRATCH1", CVM_CAST64(CN23XX_SLI_SCRATCH1), + CVM_CAST64(octeon_read_csr64(oct, CN23XX_SLI_SCRATCH1))); + dev_dbg(&oct->pci_dev->dev, "%s[%llx] : 0x%016llx\n", + "CN23XX_RST_SOFT_RST", CN23XX_RST_SOFT_RST, + lio_pci_readq(oct, CN23XX_RST_SOFT_RST)); + + /*In cn23xx_set_dpi_regs*/ + dev_dbg(&oct->pci_dev->dev, "%s[%llx] : 0x%016llx\n", + "CN23XX_DPI_DMA_CONTROL", CN23XX_DPI_DMA_CONTROL, + lio_pci_readq(oct, CN23XX_DPI_DMA_CONTROL)); + + for (i = 0; i < 6; i++) { + dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n", + "CN23XX_DPI_DMA_ENG_ENB", i, + CN23XX_DPI_DMA_ENG_ENB(i), + lio_pci_readq(oct, CN23XX_DPI_DMA_ENG_ENB(i))); + dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n", + "CN23XX_DPI_DMA_ENG_BUF", i, + CN23XX_DPI_DMA_ENG_BUF(i), + lio_pci_readq(oct, CN23XX_DPI_DMA_ENG_BUF(i))); + } + + dev_dbg(&oct->pci_dev->dev, "%s[%llx] : 0x%016llx\n", "CN23XX_DPI_CTL", + CN23XX_DPI_CTL, lio_pci_readq(oct, CN23XX_DPI_CTL)); + + /*In cn23xx_setup_pcie_mps and cn23xx_setup_pcie_mrrs */ + pci_read_config_dword(oct->pci_dev, CN23XX_CONFIG_PCIE_DEVCTL, ®val); + dev_dbg(&oct->pci_dev->dev, "%s[%llx] : 0x%016llx\n", + "CN23XX_CONFIG_PCIE_DEVCTL", + CVM_CAST64(CN23XX_CONFIG_PCIE_DEVCTL), CVM_CAST64(regval)); + + dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n", + "CN23XX_DPI_SLI_PRTX_CFG", oct->pcie_port, + CN23XX_DPI_SLI_PRTX_CFG(oct->pcie_port), + lio_pci_readq(oct, CN23XX_DPI_SLI_PRTX_CFG(oct->pcie_port))); + + /*In cn23xx_specific_regs_setup */ + dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n", + "CN23XX_SLI_S2M_PORTX_CTL", oct->pcie_port, + CVM_CAST64(CN23XX_SLI_S2M_PORTX_CTL(oct->pcie_port)), + CVM_CAST64(octeon_read_csr64( + oct, CN23XX_SLI_S2M_PORTX_CTL(oct->pcie_port)))); + + dev_dbg(&oct->pci_dev->dev, "%s[%llx] : 0x%016llx\n", + "CN23XX_SLI_RING_RST", CVM_CAST64(CN23XX_SLI_PKT_IOQ_RING_RST), + (u64)octeon_read_csr64(oct, CN23XX_SLI_PKT_IOQ_RING_RST)); + + /*In cn23xx_setup_global_mac_regs*/ + for (i = 0; i < CN23XX_MAX_MACS; i++) { + dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n", + "CN23XX_SLI_PKT_MAC_RINFO64", i, + CVM_CAST64(CN23XX_SLI_PKT_MAC_RINFO64(i, oct->pf_num)), + CVM_CAST64(octeon_read_csr64 + (oct, CN23XX_SLI_PKT_MAC_RINFO64 + (i, oct->pf_num)))); + } + + /*In cn23xx_setup_global_input_regs*/ + for (i = 0; i < CN23XX_MAX_INPUT_QUEUES; i++) { + dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n", + "CN23XX_SLI_IQ_PKT_CONTROL64", i, + CVM_CAST64(CN23XX_SLI_IQ_PKT_CONTROL64(i)), + CVM_CAST64(octeon_read_csr64 + (oct, CN23XX_SLI_IQ_PKT_CONTROL64(i)))); + } + + /*In cn23xx_setup_global_output_regs*/ + dev_dbg(&oct->pci_dev->dev, "%s[%llx] : 0x%016llx\n", + "CN23XX_SLI_OQ_WMARK", CVM_CAST64(CN23XX_SLI_OQ_WMARK), + CVM_CAST64(octeon_read_csr64(oct, CN23XX_SLI_OQ_WMARK))); + + for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) { + dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n", + "CN23XX_SLI_OQ_PKT_CONTROL", i, + CVM_CAST64(CN23XX_SLI_OQ_PKT_CONTROL(i)), + CVM_CAST64(octeon_read_csr( + oct, CN23XX_SLI_OQ_PKT_CONTROL(i)))); + dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n", + "CN23XX_SLI_OQ_PKT_INT_LEVELS", i, + CVM_CAST64(CN23XX_SLI_OQ_PKT_INT_LEVELS(i)), + CVM_CAST64(octeon_read_csr64( + oct, CN23XX_SLI_OQ_PKT_INT_LEVELS(i)))); + } + + /*In cn23xx_enable_interrupt and cn23xx_disable_interrupt*/ + dev_dbg(&oct->pci_dev->dev, "%s[%llx] : 0x%016llx\n", + "cn23xx->intr_enb_reg64", + CVM_CAST64((long)(cn23xx->intr_enb_reg64)), + CVM_CAST64(readq(cn23xx->intr_enb_reg64))); + + dev_dbg(&oct->pci_dev->dev, "%s[%llx] : 0x%016llx\n", + "cn23xx->intr_sum_reg64", + CVM_CAST64((long)(cn23xx->intr_sum_reg64)), + CVM_CAST64(readq(cn23xx->intr_sum_reg64))); + + /*In cn23xx_setup_iq_regs*/ + for (i = 0; i < CN23XX_MAX_INPUT_QUEUES; i++) { + dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n", + "CN23XX_SLI_IQ_BASE_ADDR64", i, + CVM_CAST64(CN23XX_SLI_IQ_BASE_ADDR64(i)), + CVM_CAST64(octeon_read_csr64( + oct, CN23XX_SLI_IQ_BASE_ADDR64(i)))); + dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n", + "CN23XX_SLI_IQ_SIZE", i, + CVM_CAST64(CN23XX_SLI_IQ_SIZE(i)), + CVM_CAST64(octeon_read_csr + (oct, CN23XX_SLI_IQ_SIZE(i)))); + dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n", + "CN23XX_SLI_IQ_DOORBELL", i, + CVM_CAST64(CN23XX_SLI_IQ_DOORBELL(i)), + CVM_CAST64(octeon_read_csr64( + oct, CN23XX_SLI_IQ_DOORBELL(i)))); + dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n", + "CN23XX_SLI_IQ_INSTR_COUNT64", i, + CVM_CAST64(CN23XX_SLI_IQ_INSTR_COUNT64(i)), + CVM_CAST64(octeon_read_csr64( + oct, CN23XX_SLI_IQ_INSTR_COUNT64(i)))); + } + + /*In cn23xx_setup_oq_regs*/ + for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) { + dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n", + "CN23XX_SLI_OQ_BASE_ADDR64", i, + CVM_CAST64(CN23XX_SLI_OQ_BASE_ADDR64(i)), + CVM_CAST64(octeon_read_csr64( + oct, CN23XX_SLI_OQ_BASE_ADDR64(i)))); + dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n", + "CN23XX_SLI_OQ_SIZE", i, + CVM_CAST64(CN23XX_SLI_OQ_SIZE(i)), + CVM_CAST64(octeon_read_csr + (oct, CN23XX_SLI_OQ_SIZE(i)))); + dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n", + "CN23XX_SLI_OQ_BUFF_INFO_SIZE", i, + CVM_CAST64(CN23XX_SLI_OQ_BUFF_INFO_SIZE(i)), + CVM_CAST64(octeon_read_csr( + oct, CN23XX_SLI_OQ_BUFF_INFO_SIZE(i)))); + dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n", + "CN23XX_SLI_OQ_PKTS_SENT", i, + CVM_CAST64(CN23XX_SLI_OQ_PKTS_SENT(i)), + CVM_CAST64(octeon_read_csr64( + oct, CN23XX_SLI_OQ_PKTS_SENT(i)))); + dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n", + "CN23XX_SLI_OQ_PKTS_CREDIT", i, + CVM_CAST64(CN23XX_SLI_OQ_PKTS_CREDIT(i)), + CVM_CAST64(octeon_read_csr64( + oct, CN23XX_SLI_OQ_PKTS_CREDIT(i)))); + } + + dev_dbg(&oct->pci_dev->dev, "%s[%llx] : 0x%016llx\n", + "CN23XX_SLI_PKT_TIME_INT", + CVM_CAST64(CN23XX_SLI_PKT_TIME_INT), + CVM_CAST64(octeon_read_csr64(oct, CN23XX_SLI_PKT_TIME_INT))); + dev_dbg(&oct->pci_dev->dev, "%s[%llx] : 0x%016llx\n", + "CN23XX_SLI_PKT_CNT_INT", + CVM_CAST64(CN23XX_SLI_PKT_CNT_INT), + CVM_CAST64(octeon_read_csr64(oct, CN23XX_SLI_PKT_CNT_INT))); +} + +static u32 cn23xx_coprocessor_clock(struct octeon_device *oct) +{ + /* Bits 29:24 of RST_BOOT[PNR_MUL] holds the ref.clock MULTIPLIER + * for SLI. + */ + + /* TBD: get the info in Hand-shake */ + return (((lio_pci_readq(oct, CN23XX_RST_BOOT) >> 24) & 0x3f) * 50); +} + +static void cn23xx_setup_iq_regs(struct octeon_device *oct, u32 iq_no) +{ + struct octeon_instr_queue *iq = oct->instr_queue[iq_no]; + u64 pkt_in_done; + + iq_no += oct->sriov_info.pf_srn; + + /* Write the start of the input queue's ring and its size */ + octeon_write_csr64(oct, CN23XX_SLI_IQ_BASE_ADDR64(iq_no), + iq->base_addr_dma); + octeon_write_csr(oct, CN23XX_SLI_IQ_SIZE(iq_no), iq->max_count); + + /* Remember the doorbell & instruction count register addr + * for this queue + */ + iq->doorbell_reg = + (u8 *)oct->mmio[0].hw_addr + CN23XX_SLI_IQ_DOORBELL(iq_no); + iq->inst_cnt_reg = + (u8 *)oct->mmio[0].hw_addr + CN23XX_SLI_IQ_INSTR_COUNT64(iq_no); + dev_dbg(&oct->pci_dev->dev, "InstQ[%d]:dbell reg @ 0x%p instcnt_reg @ 0x%p\n", + iq_no, iq->doorbell_reg, iq->inst_cnt_reg); + + /* Store the current instruction counter (used in flush_iq + * calculation) + */ + pkt_in_done = readq(iq->inst_cnt_reg); + + /* Clear the count by writing back what we read, but don't + * enable interrupts + */ + writeq(pkt_in_done, iq->inst_cnt_reg); + + iq->reset_instr_cnt = 0; +} + +static void cn23xx_setup_oq_regs(struct octeon_device *oct, u32 oq_no) +{ + u32 reg_val; + struct octeon_droq *droq = oct->droq[oq_no]; + + oq_no += oct->sriov_info.pf_srn; + + octeon_write_csr64(oct, CN23XX_SLI_OQ_BASE_ADDR64(oq_no), + droq->desc_ring_dma); + octeon_write_csr(oct, CN23XX_SLI_OQ_SIZE(oq_no), droq->max_count); + + octeon_write_csr(oct, CN23XX_SLI_OQ_BUFF_INFO_SIZE(oq_no), + (droq->buffer_size | (OCT_RH_SIZE << 16))); + + /* Get the mapped address of the pkt_sent and pkts_credit regs */ + droq->pkts_sent_reg = + (u8 *)oct->mmio[0].hw_addr + CN23XX_SLI_OQ_PKTS_SENT(oq_no); + droq->pkts_credit_reg = + (u8 *)oct->mmio[0].hw_addr + CN23XX_SLI_OQ_PKTS_CREDIT(oq_no); + + /* Enable this output queue to generate Packet Timer Interrupt + */ + reg_val = octeon_read_csr(oct, CN23XX_SLI_OQ_PKT_CONTROL(oq_no)); + reg_val |= CN23XX_PKT_OUTPUT_CTL_TENB; + octeon_write_csr(oct, CN23XX_SLI_OQ_PKT_CONTROL(oq_no), + reg_val); + + /* Enable this output queue to generate Packet Count Interrupt + */ + reg_val = octeon_read_csr(oct, CN23XX_SLI_OQ_PKT_CONTROL(oq_no)); + reg_val |= CN23XX_PKT_OUTPUT_CTL_CENB; + octeon_write_csr(oct, CN23XX_SLI_OQ_PKT_CONTROL(oq_no), + reg_val); +} + +static void cn23xx_get_pcie_qlmport(struct octeon_device *oct) +{ + oct->pcie_port = (octeon_read_csr(oct, CN23XX_SLI_MAC_NUMBER)) & 0xff; + + dev_dbg(&oct->pci_dev->dev, "OCTEON: CN23xx uses PCIE Port %d\n", + oct->pcie_port); +} + +static void cn23xx_get_pf_num(struct octeon_device *oct) +{ + u32 fdl_bit = 0; + + /** Read Function Dependency Link reg to get the function number */ + pci_read_config_dword(oct->pci_dev, CN23XX_PCIE_SRIOV_FDL, &fdl_bit); + oct->pf_num = ((fdl_bit >> CN23XX_PCIE_SRIOV_FDL_BIT_POS) & + CN23XX_PCIE_SRIOV_FDL_MASK); +} + +static void cn23xx_setup_reg_address(struct octeon_device *oct) +{ + u8 __iomem *bar0_pciaddr = oct->mmio[0].hw_addr; + struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip; + + oct->reg_list.pci_win_wr_addr_hi = + (u32 __iomem *)(bar0_pciaddr + CN23XX_WIN_WR_ADDR_HI); + oct->reg_list.pci_win_wr_addr_lo = + (u32 __iomem *)(bar0_pciaddr + CN23XX_WIN_WR_ADDR_LO); + oct->reg_list.pci_win_wr_addr = + (u64 __iomem *)(bar0_pciaddr + CN23XX_WIN_WR_ADDR64); + + oct->reg_list.pci_win_rd_addr_hi = + (u32 __iomem *)(bar0_pciaddr + CN23XX_WIN_RD_ADDR_HI); + oct->reg_list.pci_win_rd_addr_lo = + (u32 __iomem *)(bar0_pciaddr + CN23XX_WIN_RD_ADDR_LO); + oct->reg_list.pci_win_rd_addr = + (u64 __iomem *)(bar0_pciaddr + CN23XX_WIN_RD_ADDR64); + + oct->reg_list.pci_win_wr_data_hi = + (u32 __iomem *)(bar0_pciaddr + CN23XX_WIN_WR_DATA_HI); + oct->reg_list.pci_win_wr_data_lo = + (u32 __iomem *)(bar0_pciaddr + CN23XX_WIN_WR_DATA_LO); + oct->reg_list.pci_win_wr_data = + (u64 __iomem *)(bar0_pciaddr + CN23XX_WIN_WR_DATA64); + + oct->reg_list.pci_win_rd_data_hi = + (u32 __iomem *)(bar0_pciaddr + CN23XX_WIN_RD_DATA_HI); + oct->reg_list.pci_win_rd_data_lo = + (u32 __iomem *)(bar0_pciaddr + CN23XX_WIN_RD_DATA_LO); + oct->reg_list.pci_win_rd_data = + (u64 __iomem *)(bar0_pciaddr + CN23XX_WIN_RD_DATA64); + + cn23xx_get_pcie_qlmport(oct); + + cn23xx->intr_mask64 = CN23XX_INTR_MASK; + cn23xx->intr_mask64 |= CN23XX_INTR_PKT_TIME; + if (oct->rev_id >= OCTEON_CN23XX_REV_1_1) + cn23xx->intr_mask64 |= CN23XX_INTR_VF_MBOX; + + cn23xx->intr_sum_reg64 = + bar0_pciaddr + + CN23XX_SLI_MAC_PF_INT_SUM64(oct->pcie_port, oct->pf_num); + cn23xx->intr_enb_reg64 = + bar0_pciaddr + + CN23XX_SLI_MAC_PF_INT_ENB64(oct->pcie_port, oct->pf_num); +} + +static int cn23xx_sriov_config(struct octeon_device *oct) +{ + u32 total_rings; + struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip; + /* num_vfs is already filled for us */ + u32 pf_srn, num_pf_rings; + + cn23xx->conf = + (struct octeon_config *)oct_get_config_info(oct, LIO_23XX); + switch (oct->rev_id) { + case OCTEON_CN23XX_REV_1_0: + total_rings = CN23XX_MAX_RINGS_PER_PF_PASS_1_0; + break; + case OCTEON_CN23XX_REV_1_1: + total_rings = CN23XX_MAX_RINGS_PER_PF_PASS_1_1; + break; + default: + total_rings = CN23XX_MAX_RINGS_PER_PF; + break; + } + if (!oct->sriov_info.num_pf_rings) { + if (total_rings > num_present_cpus()) + num_pf_rings = num_present_cpus(); + else + num_pf_rings = total_rings; + } else { + num_pf_rings = oct->sriov_info.num_pf_rings; + + if (num_pf_rings > total_rings) { + dev_warn(&oct->pci_dev->dev, + "num_queues_per_pf requested %u is more than available rings. Reducing to %u\n", + num_pf_rings, total_rings); + num_pf_rings = total_rings; + } + } + + total_rings = num_pf_rings; + /* the first ring of the pf */ + pf_srn = total_rings - num_pf_rings; + + oct->sriov_info.trs = total_rings; + oct->sriov_info.pf_srn = pf_srn; + oct->sriov_info.num_pf_rings = num_pf_rings; + dev_dbg(&oct->pci_dev->dev, "trs:%d pf_srn:%d num_pf_rings:%d\n", + oct->sriov_info.trs, oct->sriov_info.pf_srn, + oct->sriov_info.num_pf_rings); + return 0; +} + +int setup_cn23xx_octeon_pf_device(struct octeon_device *oct) +{ + if (octeon_map_pci_barx(oct, 0, 0)) + return 1; + + if (octeon_map_pci_barx(oct, 1, MAX_BAR1_IOREMAP_SIZE)) { + dev_err(&oct->pci_dev->dev, "%s CN23XX BAR1 map failed\n", + __func__); + octeon_unmap_pci_barx(oct, 0); + return 1; + } + + cn23xx_get_pf_num(oct); + + if (cn23xx_sriov_config(oct)) { + octeon_unmap_pci_barx(oct, 0); + octeon_unmap_pci_barx(oct, 1); + return 1; + } + + octeon_write_csr64(oct, CN23XX_SLI_MAC_CREDIT_CNT, 0x3F802080802080ULL); + + oct->fn_list.setup_iq_regs = cn23xx_setup_iq_regs; + oct->fn_list.setup_oq_regs = cn23xx_setup_oq_regs; + + cn23xx_setup_reg_address(oct); + + oct->coproc_clock_rate = 1000000ULL * cn23xx_coprocessor_clock(oct); + + return 0; +} + +int validate_cn23xx_pf_config_info(struct octeon_device *oct, + struct octeon_config *conf23xx) +{ + if (CFG_GET_IQ_MAX_Q(conf23xx) > CN23XX_MAX_INPUT_QUEUES) { + dev_err(&oct->pci_dev->dev, "%s: Num IQ (%d) exceeds Max (%d)\n", + __func__, CFG_GET_IQ_MAX_Q(conf23xx), + CN23XX_MAX_INPUT_QUEUES); + return 1; + } + + if (CFG_GET_OQ_MAX_Q(conf23xx) > CN23XX_MAX_OUTPUT_QUEUES) { + dev_err(&oct->pci_dev->dev, "%s: Num OQ (%d) exceeds Max (%d)\n", + __func__, CFG_GET_OQ_MAX_Q(conf23xx), + CN23XX_MAX_OUTPUT_QUEUES); + return 1; + } + + if (CFG_GET_IQ_INSTR_TYPE(conf23xx) != OCTEON_32BYTE_INSTR && + CFG_GET_IQ_INSTR_TYPE(conf23xx) != OCTEON_64BYTE_INSTR) { + dev_err(&oct->pci_dev->dev, "%s: Invalid instr type for IQ\n", + __func__); + return 1; + } + + if (!(CFG_GET_OQ_INFO_PTR(conf23xx)) || + !(CFG_GET_OQ_REFILL_THRESHOLD(conf23xx))) { + dev_err(&oct->pci_dev->dev, "%s: Invalid parameter for OQ\n", + __func__); + return 1; + } + + if (!(CFG_GET_OQ_INTR_TIME(conf23xx))) { + dev_err(&oct->pci_dev->dev, "%s: Invalid parameter for OQ\n", + __func__); + return 1; + } + + return 0; +} + +void cn23xx_dump_iq_regs(struct octeon_device *oct) +{ + u32 regval, q_no; + + dev_dbg(&oct->pci_dev->dev, "SLI_IQ_DOORBELL_0 [0x%x]: 0x%016llx\n", + CN23XX_SLI_IQ_DOORBELL(0), + CVM_CAST64(octeon_read_csr64 + (oct, CN23XX_SLI_IQ_DOORBELL(0)))); + + dev_dbg(&oct->pci_dev->dev, "SLI_IQ_BASEADDR_0 [0x%x]: 0x%016llx\n", + CN23XX_SLI_IQ_BASE_ADDR64(0), + CVM_CAST64(octeon_read_csr64 + (oct, CN23XX_SLI_IQ_BASE_ADDR64(0)))); + + dev_dbg(&oct->pci_dev->dev, "SLI_IQ_FIFO_RSIZE_0 [0x%x]: 0x%016llx\n", + CN23XX_SLI_IQ_SIZE(0), + CVM_CAST64(octeon_read_csr64(oct, CN23XX_SLI_IQ_SIZE(0)))); + + dev_dbg(&oct->pci_dev->dev, "SLI_CTL_STATUS [0x%x]: 0x%016llx\n", + CN23XX_SLI_CTL_STATUS, + CVM_CAST64(octeon_read_csr64(oct, CN23XX_SLI_CTL_STATUS))); + + for (q_no = 0; q_no < CN23XX_MAX_INPUT_QUEUES; q_no++) { + dev_dbg(&oct->pci_dev->dev, "SLI_PKT[%d]_INPUT_CTL [0x%x]: 0x%016llx\n", + q_no, CN23XX_SLI_IQ_PKT_CONTROL64(q_no), + CVM_CAST64(octeon_read_csr64 + (oct, + CN23XX_SLI_IQ_PKT_CONTROL64(q_no)))); + } + + pci_read_config_dword(oct->pci_dev, CN23XX_CONFIG_PCIE_DEVCTL, ®val); + dev_dbg(&oct->pci_dev->dev, "Config DevCtl [0x%x]: 0x%08x\n", + CN23XX_CONFIG_PCIE_DEVCTL, regval); + + dev_dbg(&oct->pci_dev->dev, "SLI_PRT[%d]_CFG [0x%llx]: 0x%016llx\n", + oct->pcie_port, CN23XX_DPI_SLI_PRTX_CFG(oct->pcie_port), + CVM_CAST64(lio_pci_readq( + oct, CN23XX_DPI_SLI_PRTX_CFG(oct->pcie_port)))); + + dev_dbg(&oct->pci_dev->dev, "SLI_S2M_PORT[%d]_CTL [0x%x]: 0x%016llx\n", + oct->pcie_port, CN23XX_SLI_S2M_PORTX_CTL(oct->pcie_port), + CVM_CAST64(octeon_read_csr64( + oct, CN23XX_SLI_S2M_PORTX_CTL(oct->pcie_port)))); +} diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h index e07d5306c6dc..36252e7ced1a 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h +++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h @@ -45,4 +45,11 @@ struct octeon_cn23xx_pf { struct octeon_config *conf; }; + +int setup_cn23xx_octeon_pf_device(struct octeon_device *oct); + +int validate_cn23xx_pf_config_info(struct octeon_device *oct, + struct octeon_config *conf23xx); + +void cn23xx_dump_pf_initialized_regs(struct octeon_device *oct); #endif diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index 7ef073825160..a9e7c0711c15 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -35,6 +35,7 @@ #include "cn66xx_regs.h" #include "cn66xx_device.h" #include "cn68xx_device.h" +#include "cn23xx_pf_device.h" #include "liquidio_image.h" MODULE_AUTHOR("Cavium Networks, "); @@ -132,7 +133,8 @@ union tx_info { #define OCTNIC_MAX_SG (MAX_SKB_FRAGS) #define OCTNIC_GSO_MAX_HEADER_SIZE 128 -#define OCTNIC_GSO_MAX_SIZE (GSO_MAX_SIZE - OCTNIC_GSO_MAX_HEADER_SIZE) +#define OCTNIC_GSO_MAX_SIZE \ + (CN23XX_DEFAULT_INPUT_JABBER - OCTNIC_GSO_MAX_HEADER_SIZE) /** Structure of a node in list of gather components maintained by * NIC driver for each network device. @@ -1323,6 +1325,12 @@ static int octeon_chip_specific_setup(struct octeon_device *oct) s = "CN66XX"; break; + case OCTEON_CN23XX_PCIID_PF: + oct->chip_id = OCTEON_CN23XX_PF_VID; + ret = setup_cn23xx_octeon_pf_device(oct); + s = "CN23XX"; + break; + default: s = "?"; dev_err(&oct->pci_dev->dev, "Unknown device found (dev_id: %x)\n", From 3451b97cce2d7827bd76378ae6e9aeb7e8fc463d Mon Sep 17 00:00:00 2001 From: Raghu Vatsavayi Date: Wed, 31 Aug 2016 11:03:26 -0700 Subject: [PATCH 0609/1715] liquidio: CN23XX register setup Adds support for initializing cn23xx device registers related to mac, input/output and pf global config. Signed-off-by: Derek Chickles Signed-off-by: Satanand Burla Signed-off-by: Felix Manlunas Signed-off-by: Raghu Vatsavayi Signed-off-by: David S. Miller --- .../cavium/liquidio/cn23xx_pf_device.c | 259 ++++++++++++++++++ .../net/ethernet/cavium/liquidio/lio_main.c | 24 +- .../ethernet/cavium/liquidio/octeon_config.h | 3 + .../ethernet/cavium/liquidio/octeon_device.h | 38 +++ 4 files changed, 316 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c index ccc3d5b54fab..d614b0aae6d9 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c +++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c @@ -214,6 +214,36 @@ void cn23xx_dump_pf_initialized_regs(struct octeon_device *oct) CVM_CAST64(octeon_read_csr64(oct, CN23XX_SLI_PKT_CNT_INT))); } +static void cn23xx_enable_error_reporting(struct octeon_device *oct) +{ + u32 regval; + u32 uncorrectable_err_mask, corrtable_err_status; + + pci_read_config_dword(oct->pci_dev, CN23XX_CONFIG_PCIE_DEVCTL, ®val); + if (regval & CN23XX_CONFIG_PCIE_DEVCTL_MASK) { + uncorrectable_err_mask = 0; + corrtable_err_status = 0; + pci_read_config_dword(oct->pci_dev, + CN23XX_CONFIG_PCIE_UNCORRECT_ERR_MASK, + &uncorrectable_err_mask); + pci_read_config_dword(oct->pci_dev, + CN23XX_CONFIG_PCIE_CORRECT_ERR_STATUS, + &corrtable_err_status); + dev_err(&oct->pci_dev->dev, "PCI-E Fatal error detected;\n" + "\tdev_ctl_status_reg = 0x%08x\n" + "\tuncorrectable_error_mask_reg = 0x%08x\n" + "\tcorrectable_error_status_reg = 0x%08x\n", + regval, uncorrectable_err_mask, + corrtable_err_status); + } + + regval |= 0xf; /* Enable Link error reporting */ + + dev_dbg(&oct->pci_dev->dev, "OCTEON[%d]: Enabling PCI-E error reporting..\n", + oct->octeon_id); + pci_write_config_dword(oct->pci_dev, CN23XX_CONFIG_PCIE_DEVCTL, regval); +} + static u32 cn23xx_coprocessor_clock(struct octeon_device *oct) { /* Bits 29:24 of RST_BOOT[PNR_MUL] holds the ref.clock MULTIPLIER @@ -224,6 +254,234 @@ static u32 cn23xx_coprocessor_clock(struct octeon_device *oct) return (((lio_pci_readq(oct, CN23XX_RST_BOOT) >> 24) & 0x3f) * 50); } +u32 cn23xx_pf_get_oq_ticks(struct octeon_device *oct, u32 time_intr_in_us) +{ + /* This gives the SLI clock per microsec */ + u32 oqticks_per_us = cn23xx_coprocessor_clock(oct); + + oct->pfvf_hsword.coproc_tics_per_us = oqticks_per_us; + + /* This gives the clock cycles per millisecond */ + oqticks_per_us *= 1000; + + /* This gives the oq ticks (1024 core clock cycles) per millisecond */ + oqticks_per_us /= 1024; + + /* time_intr is in microseconds. The next 2 steps gives the oq ticks + * corressponding to time_intr. + */ + oqticks_per_us *= time_intr_in_us; + oqticks_per_us /= 1000; + + return oqticks_per_us; +} + +static void cn23xx_setup_global_mac_regs(struct octeon_device *oct) +{ + u64 reg_val; + u16 mac_no = oct->pcie_port; + u16 pf_num = oct->pf_num; + + /* programming SRN and TRS for each MAC(0..3) */ + + dev_dbg(&oct->pci_dev->dev, "%s:Using pcie port %d\n", + __func__, mac_no); + /* By default, mapping all 64 IOQs to a single MACs */ + + reg_val = + octeon_read_csr64(oct, CN23XX_SLI_PKT_MAC_RINFO64(mac_no, pf_num)); + + if (oct->rev_id == OCTEON_CN23XX_REV_1_1) { + /* setting SRN <6:0> */ + reg_val = pf_num * CN23XX_MAX_RINGS_PER_PF_PASS_1_1; + } else { + /* setting SRN <6:0> */ + reg_val = pf_num * CN23XX_MAX_RINGS_PER_PF; + } + + /* setting TRS <23:16> */ + reg_val = reg_val | + (oct->sriov_info.trs << CN23XX_PKT_MAC_CTL_RINFO_TRS_BIT_POS); + /* write these settings to MAC register */ + octeon_write_csr64(oct, CN23XX_SLI_PKT_MAC_RINFO64(mac_no, pf_num), + reg_val); + + dev_dbg(&oct->pci_dev->dev, "SLI_PKT_MAC(%d)_PF(%d)_RINFO : 0x%016llx\n", + mac_no, pf_num, (u64)octeon_read_csr64 + (oct, CN23XX_SLI_PKT_MAC_RINFO64(mac_no, pf_num))); +} + +static int cn23xx_pf_setup_global_input_regs(struct octeon_device *oct) +{ + u32 q_no, ern, srn; + u64 pf_num; + u64 intr_threshold, reg_val; + struct octeon_instr_queue *iq; + struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip; + + pf_num = oct->pf_num; + + srn = oct->sriov_info.pf_srn; + ern = srn + oct->sriov_info.num_pf_rings; + + /** Set the MAC_NUM and PVF_NUM in IQ_PKT_CONTROL reg + * for all queues.Only PF can set these bits. + * bits 29:30 indicate the MAC num. + * bits 32:47 indicate the PVF num. + */ + for (q_no = 0; q_no < ern; q_no++) { + reg_val = oct->pcie_port << CN23XX_PKT_INPUT_CTL_MAC_NUM_POS; + reg_val |= pf_num << CN23XX_PKT_INPUT_CTL_PF_NUM_POS; + + octeon_write_csr64(oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no), + reg_val); + } + + /* Select ES, RO, NS, RDSIZE,DPTR Fomat#0 for + * pf queues + */ + for (q_no = srn; q_no < ern; q_no++) { + void __iomem *inst_cnt_reg; + + iq = oct->instr_queue[q_no]; + if (iq) + inst_cnt_reg = iq->inst_cnt_reg; + else + inst_cnt_reg = (u8 *)oct->mmio[0].hw_addr + + CN23XX_SLI_IQ_INSTR_COUNT64(q_no); + + reg_val = + octeon_read_csr64(oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no)); + + reg_val |= CN23XX_PKT_INPUT_CTL_MASK; + + octeon_write_csr64(oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no), + reg_val); + + /* Set WMARK level for triggering PI_INT */ + /* intr_threshold = CN23XX_DEF_IQ_INTR_THRESHOLD & */ + intr_threshold = CFG_GET_IQ_INTR_PKT(cn23xx->conf) & + CN23XX_PKT_IN_DONE_WMARK_MASK; + + writeq((readq(inst_cnt_reg) & + ~(CN23XX_PKT_IN_DONE_WMARK_MASK << + CN23XX_PKT_IN_DONE_WMARK_BIT_POS)) | + (intr_threshold << CN23XX_PKT_IN_DONE_WMARK_BIT_POS), + inst_cnt_reg); + } + return 0; +} + +static void cn23xx_pf_setup_global_output_regs(struct octeon_device *oct) +{ + u32 reg_val; + u32 q_no, ern, srn; + u64 time_threshold; + + struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip; + + srn = oct->sriov_info.pf_srn; + ern = srn + oct->sriov_info.num_pf_rings; + + if (CFG_GET_IS_SLI_BP_ON(cn23xx->conf)) { + octeon_write_csr64(oct, CN23XX_SLI_OQ_WMARK, 32); + } else { + /** Set Output queue watermark to 0 to disable backpressure */ + octeon_write_csr64(oct, CN23XX_SLI_OQ_WMARK, 0); + } + + for (q_no = srn; q_no < ern; q_no++) { + reg_val = octeon_read_csr(oct, CN23XX_SLI_OQ_PKT_CONTROL(q_no)); + + /* set IPTR & DPTR */ + reg_val |= + (CN23XX_PKT_OUTPUT_CTL_IPTR | CN23XX_PKT_OUTPUT_CTL_DPTR); + + /* reset BMODE */ + reg_val &= ~(CN23XX_PKT_OUTPUT_CTL_BMODE); + + /* No Relaxed Ordering, No Snoop, 64-bit Byte swap + * for Output Queue ScatterList + * reset ROR_P, NSR_P + */ + reg_val &= ~(CN23XX_PKT_OUTPUT_CTL_ROR_P); + reg_val &= ~(CN23XX_PKT_OUTPUT_CTL_NSR_P); + +#ifdef __LITTLE_ENDIAN_BITFIELD + reg_val &= ~(CN23XX_PKT_OUTPUT_CTL_ES_P); +#else + reg_val |= (CN23XX_PKT_OUTPUT_CTL_ES_P); +#endif + /* No Relaxed Ordering, No Snoop, 64-bit Byte swap + * for Output Queue Data + * reset ROR, NSR + */ + reg_val &= ~(CN23XX_PKT_OUTPUT_CTL_ROR); + reg_val &= ~(CN23XX_PKT_OUTPUT_CTL_NSR); + /* set the ES bit */ + reg_val |= (CN23XX_PKT_OUTPUT_CTL_ES); + + /* write all the selected settings */ + octeon_write_csr(oct, CN23XX_SLI_OQ_PKT_CONTROL(q_no), reg_val); + + /* Enabling these interrupt in oct->fn_list.enable_interrupt() + * routine which called after IOQ init. + * Set up interrupt packet and time thresholds + * for all the OQs + */ + time_threshold = cn23xx_pf_get_oq_ticks( + oct, (u32)CFG_GET_OQ_INTR_TIME(cn23xx->conf)); + + octeon_write_csr64(oct, CN23XX_SLI_OQ_PKT_INT_LEVELS(q_no), + (CFG_GET_OQ_INTR_PKT(cn23xx->conf) | + (time_threshold << 32))); + } + + /** Setting the water mark level for pko back pressure **/ + writeq(0x40, (u8 *)oct->mmio[0].hw_addr + CN23XX_SLI_OQ_WMARK); + + /** Disabling setting OQs in reset when ring has no dorebells + * enabling this will cause of head of line blocking + */ + /* Do it only for pass1.1. and pass1.2 */ + if ((oct->rev_id == OCTEON_CN23XX_REV_1_0) || + (oct->rev_id == OCTEON_CN23XX_REV_1_1)) + writeq(readq((u8 *)oct->mmio[0].hw_addr + + CN23XX_SLI_GBL_CONTROL) | 0x2, + (u8 *)oct->mmio[0].hw_addr + CN23XX_SLI_GBL_CONTROL); + + /** Enable channel-level backpressure */ + if (oct->pf_num) + writeq(0xffffffffffffffffULL, + (u8 *)oct->mmio[0].hw_addr + CN23XX_SLI_OUT_BP_EN2_W1S); + else + writeq(0xffffffffffffffffULL, + (u8 *)oct->mmio[0].hw_addr + CN23XX_SLI_OUT_BP_EN_W1S); +} + +static int cn23xx_setup_pf_device_regs(struct octeon_device *oct) +{ + cn23xx_enable_error_reporting(oct); + + /* program the MAC(0..3)_RINFO before setting up input/output regs */ + cn23xx_setup_global_mac_regs(oct); + + if (cn23xx_pf_setup_global_input_regs(oct)) + return -1; + + cn23xx_pf_setup_global_output_regs(oct); + + /* Default error timeout value should be 0x200000 to avoid host hang + * when reads invalid register + */ + octeon_write_csr64(oct, CN23XX_SLI_WINDOW_CTL, + CN23XX_SLI_WINDOW_CTL_DEFAULT); + + /* set SLI_PKT_IN_JABBER to handle large VXLAN packets */ + octeon_write_csr64(oct, CN23XX_SLI_PKT_IN_JABBER, CN23XX_INPUT_JABBER); + return 0; +} + static void cn23xx_setup_iq_regs(struct octeon_device *oct, u32 iq_no) { struct octeon_instr_queue *iq = oct->instr_queue[iq_no]; @@ -433,6 +691,7 @@ int setup_cn23xx_octeon_pf_device(struct octeon_device *oct) oct->fn_list.setup_iq_regs = cn23xx_setup_iq_regs; oct->fn_list.setup_oq_regs = cn23xx_setup_oq_regs; + oct->fn_list.setup_device_regs = cn23xx_setup_pf_device_regs; cn23xx_setup_reg_address(oct); diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index a9e7c0711c15..a00bd6c82799 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -3695,6 +3695,22 @@ static int octeon_device_init(struct octeon_device *octeon_dev) octeon_set_io_queues_off(octeon_dev); + if (OCTEON_CN23XX_PF(octeon_dev)) { + ret = octeon_dev->fn_list.setup_device_regs(octeon_dev); + if (ret) { + dev_err(&octeon_dev->pci_dev->dev, "OCTEON: Failed to configure device registers\n"); + return ret; + } + } + + /* Initialize soft command buffer pool + */ + if (octeon_setup_sc_buffer_pool(octeon_dev)) { + dev_err(&octeon_dev->pci_dev->dev, "sc buffer pool allocation failed\n"); + return 1; + } + atomic_set(&octeon_dev->status, OCT_DEV_SC_BUFF_POOL_INIT_DONE); + /* Setup the data structures that manage this Octeon's Input queues. */ if (octeon_setup_instr_queues(octeon_dev)) { dev_err(&octeon_dev->pci_dev->dev, @@ -3706,14 +3722,6 @@ static int octeon_device_init(struct octeon_device *octeon_dev) } atomic_set(&octeon_dev->status, OCT_DEV_INSTR_QUEUE_INIT_DONE); - /* Initialize soft command buffer pool - */ - if (octeon_setup_sc_buffer_pool(octeon_dev)) { - dev_err(&octeon_dev->pci_dev->dev, "sc buffer pool allocation failed\n"); - return 1; - } - atomic_set(&octeon_dev->status, OCT_DEV_SC_BUFF_POOL_INIT_DONE); - /* Initialize lists to manage the requests of different types that * arrive from user & kernel applications for this octeon device. */ diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_config.h b/drivers/net/ethernet/cavium/liquidio/octeon_config.h index 12ded6a4e7ef..c76556809ed1 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_config.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_config.h @@ -120,6 +120,9 @@ #define CFG_GET_IQ_DB_MIN(cfg) ((cfg)->iq.db_min) #define CFG_GET_IQ_DB_TIMEOUT(cfg) ((cfg)->iq.db_timeout) +#define CFG_GET_IQ_INTR_PKT(cfg) ((cfg)->iq.iq_intr_pkt) +#define CFG_SET_IQ_INTR_PKT(cfg, val) (cfg)->iq.iq_intr_pkt = val + #define CFG_GET_OQ_MAX_Q(cfg) ((cfg)->oq.max_oqs) #define CFG_GET_OQ_INFO_PTR(cfg) ((cfg)->oq.info_ptr) #define CFG_GET_OQ_PKTS_PER_INTR(cfg) ((cfg)->oq.pkts_per_intr) diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.h b/drivers/net/ethernet/cavium/liquidio/octeon_device.h index f10cdd45729e..c120613f7b0d 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.h @@ -276,6 +276,42 @@ struct octdev_props { struct net_device *netdev; }; +struct octeon_pf_vf_hs_word { +#ifdef __LITTLE_ENDIAN_BITFIELD + /** PKIND value assigned for the DPI interface */ + u64 pkind : 8; + + /** OCTEON core clock multiplier */ + u64 core_tics_per_us : 16; + + /** OCTEON coprocessor clock multiplier */ + u64 coproc_tics_per_us : 16; + + /** app that currently running on OCTEON */ + u64 app_mode : 8; + + /** RESERVED */ + u64 reserved : 16; + +#else + + /** RESERVED */ + u64 reserved : 16; + + /** app that currently running on OCTEON */ + u64 app_mode : 8; + + /** OCTEON coprocessor clock multiplier */ + u64 coproc_tics_per_us : 16; + + /** OCTEON core clock multiplier */ + u64 core_tics_per_us : 16; + + /** PKIND value assigned for the DPI interface */ + u64 pkind : 8; +#endif +}; + struct octeon_sriov_info { /* Actual rings left for PF device */ u32 num_pf_rings; @@ -413,6 +449,8 @@ struct octeon_device { struct octeon_sriov_info sriov_info; + struct octeon_pf_vf_hs_word pfvf_hsword; + int rx_pause; int tx_pause; From 1b7c55c4538bac0eb850359c955f452a8b56c192 Mon Sep 17 00:00:00 2001 From: Raghu Vatsavayi Date: Wed, 31 Aug 2016 11:03:27 -0700 Subject: [PATCH 0610/1715] liquidio: CN23XX queue manipulation This patch adds support for cn23xx queue manipulation. Signed-off-by: Derek Chickles Signed-off-by: Satanand Burla Signed-off-by: Felix Manlunas Signed-off-by: Raghu Vatsavayi Signed-off-by: David S. Miller --- .../cavium/liquidio/cn23xx_pf_device.c | 213 ++++++++++++++++++ .../ethernet/cavium/liquidio/cn66xx_device.c | 4 +- .../ethernet/cavium/liquidio/cn66xx_device.h | 2 +- .../net/ethernet/cavium/liquidio/lio_main.c | 12 +- .../ethernet/cavium/liquidio/octeon_device.h | 2 +- 5 files changed, 225 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c index d614b0aae6d9..7e932a328e58 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c +++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c @@ -311,6 +311,61 @@ static void cn23xx_setup_global_mac_regs(struct octeon_device *oct) (oct, CN23XX_SLI_PKT_MAC_RINFO64(mac_no, pf_num))); } +static int cn23xx_reset_io_queues(struct octeon_device *oct) +{ + int ret_val = 0; + u64 d64; + u32 q_no, srn, ern; + u32 loop = 1000; + + srn = oct->sriov_info.pf_srn; + ern = srn + oct->sriov_info.num_pf_rings; + + /*As per HRM reg description, s/w cant write 0 to ENB. */ + /*to make the queue off, need to set the RST bit. */ + + /* Reset the Enable bit for all the 64 IQs. */ + for (q_no = srn; q_no < ern; q_no++) { + /* set RST bit to 1. This bit applies to both IQ and OQ */ + d64 = octeon_read_csr64(oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no)); + d64 = d64 | CN23XX_PKT_INPUT_CTL_RST; + octeon_write_csr64(oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no), d64); + } + + /*wait until the RST bit is clear or the RST and quite bits are set*/ + for (q_no = srn; q_no < ern; q_no++) { + u64 reg_val = octeon_read_csr64(oct, + CN23XX_SLI_IQ_PKT_CONTROL64(q_no)); + while ((READ_ONCE(reg_val) & CN23XX_PKT_INPUT_CTL_RST) && + !(READ_ONCE(reg_val) & CN23XX_PKT_INPUT_CTL_QUIET) && + loop--) { + WRITE_ONCE(reg_val, octeon_read_csr64( + oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no))); + } + if (!loop) { + dev_err(&oct->pci_dev->dev, + "clearing the reset reg failed or setting the quiet reg failed for qno: %u\n", + q_no); + return -1; + } + WRITE_ONCE(reg_val, READ_ONCE(reg_val) & + ~CN23XX_PKT_INPUT_CTL_RST); + octeon_write_csr64(oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no), + READ_ONCE(reg_val)); + + WRITE_ONCE(reg_val, octeon_read_csr64( + oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no))); + if (READ_ONCE(reg_val) & CN23XX_PKT_INPUT_CTL_RST) { + dev_err(&oct->pci_dev->dev, + "clearing the reset failed for qno: %u\n", + q_no); + ret_val = -1; + } + } + + return ret_val; +} + static int cn23xx_pf_setup_global_input_regs(struct octeon_device *oct) { u32 q_no, ern, srn; @@ -324,6 +379,9 @@ static int cn23xx_pf_setup_global_input_regs(struct octeon_device *oct) srn = oct->sriov_info.pf_srn; ern = srn + oct->sriov_info.num_pf_rings; + if (cn23xx_reset_io_queues(oct)) + return -1; + /** Set the MAC_NUM and PVF_NUM in IQ_PKT_CONTROL reg * for all queues.Only PF can set these bits. * bits 29:30 indicate the MAC num. @@ -552,6 +610,158 @@ static void cn23xx_setup_oq_regs(struct octeon_device *oct, u32 oq_no) reg_val); } +static int cn23xx_enable_io_queues(struct octeon_device *oct) +{ + u64 reg_val; + u32 srn, ern, q_no; + u32 loop = 1000; + + srn = oct->sriov_info.pf_srn; + ern = srn + oct->num_iqs; + + for (q_no = srn; q_no < ern; q_no++) { + /* set the corresponding IQ IS_64B bit */ + if (oct->io_qmask.iq64B & BIT_ULL(q_no - srn)) { + reg_val = octeon_read_csr64( + oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no)); + reg_val = reg_val | CN23XX_PKT_INPUT_CTL_IS_64B; + octeon_write_csr64( + oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no), reg_val); + } + + /* set the corresponding IQ ENB bit */ + if (oct->io_qmask.iq & BIT_ULL(q_no - srn)) { + /* IOQs are in reset by default in PEM2 mode, + * clearing reset bit + */ + reg_val = octeon_read_csr64( + oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no)); + + if (reg_val & CN23XX_PKT_INPUT_CTL_RST) { + while ((reg_val & CN23XX_PKT_INPUT_CTL_RST) && + !(reg_val & + CN23XX_PKT_INPUT_CTL_QUIET) && + loop--) { + reg_val = octeon_read_csr64( + oct, + CN23XX_SLI_IQ_PKT_CONTROL64(q_no)); + } + if (!loop) { + dev_err(&oct->pci_dev->dev, + "clearing the reset reg failed or setting the quiet reg failed for qno: %u\n", + q_no); + return -1; + } + reg_val = reg_val & ~CN23XX_PKT_INPUT_CTL_RST; + octeon_write_csr64( + oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no), + reg_val); + + reg_val = octeon_read_csr64( + oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no)); + if (reg_val & CN23XX_PKT_INPUT_CTL_RST) { + dev_err(&oct->pci_dev->dev, + "clearing the reset failed for qno: %u\n", + q_no); + return -1; + } + } + reg_val = octeon_read_csr64( + oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no)); + reg_val = reg_val | CN23XX_PKT_INPUT_CTL_RING_ENB; + octeon_write_csr64( + oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no), reg_val); + } + } + for (q_no = srn; q_no < ern; q_no++) { + u32 reg_val; + /* set the corresponding OQ ENB bit */ + if (oct->io_qmask.oq & BIT_ULL(q_no - srn)) { + reg_val = octeon_read_csr( + oct, CN23XX_SLI_OQ_PKT_CONTROL(q_no)); + reg_val = reg_val | CN23XX_PKT_OUTPUT_CTL_RING_ENB; + octeon_write_csr(oct, CN23XX_SLI_OQ_PKT_CONTROL(q_no), + reg_val); + } + } + return 0; +} + +static void cn23xx_disable_io_queues(struct octeon_device *oct) +{ + int q_no, loop; + u64 d64; + u32 d32; + u32 srn, ern; + + srn = oct->sriov_info.pf_srn; + ern = srn + oct->num_iqs; + + /*** Disable Input Queues. ***/ + for (q_no = srn; q_no < ern; q_no++) { + loop = HZ; + + /* start the Reset for a particular ring */ + WRITE_ONCE(d64, octeon_read_csr64( + oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no))); + WRITE_ONCE(d64, READ_ONCE(d64) & + (~(CN23XX_PKT_INPUT_CTL_RING_ENB))); + WRITE_ONCE(d64, READ_ONCE(d64) | CN23XX_PKT_INPUT_CTL_RST); + octeon_write_csr64(oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no), + READ_ONCE(d64)); + + /* Wait until hardware indicates that the particular IQ + * is out of reset. + */ + WRITE_ONCE(d64, octeon_read_csr64( + oct, CN23XX_SLI_PKT_IOQ_RING_RST)); + while (!(READ_ONCE(d64) & BIT_ULL(q_no)) && loop--) { + WRITE_ONCE(d64, octeon_read_csr64( + oct, CN23XX_SLI_PKT_IOQ_RING_RST)); + schedule_timeout_uninterruptible(1); + } + + /* Reset the doorbell register for this Input Queue. */ + octeon_write_csr(oct, CN23XX_SLI_IQ_DOORBELL(q_no), 0xFFFFFFFF); + while (octeon_read_csr64(oct, CN23XX_SLI_IQ_DOORBELL(q_no)) && + loop--) { + schedule_timeout_uninterruptible(1); + } + } + + /*** Disable Output Queues. ***/ + for (q_no = srn; q_no < ern; q_no++) { + loop = HZ; + + /* Wait until hardware indicates that the particular IQ + * is out of reset.It given that SLI_PKT_RING_RST is + * common for both IQs and OQs + */ + WRITE_ONCE(d64, octeon_read_csr64( + oct, CN23XX_SLI_PKT_IOQ_RING_RST)); + while (!(READ_ONCE(d64) & BIT_ULL(q_no)) && loop--) { + WRITE_ONCE(d64, octeon_read_csr64( + oct, CN23XX_SLI_PKT_IOQ_RING_RST)); + schedule_timeout_uninterruptible(1); + } + + /* Reset the doorbell register for this Output Queue. */ + octeon_write_csr(oct, CN23XX_SLI_OQ_PKTS_CREDIT(q_no), + 0xFFFFFFFF); + while (octeon_read_csr64(oct, + CN23XX_SLI_OQ_PKTS_CREDIT(q_no)) && + loop--) { + schedule_timeout_uninterruptible(1); + } + + /* clear the SLI_PKT(0..63)_CNTS[CNT] reg value */ + WRITE_ONCE(d32, octeon_read_csr( + oct, CN23XX_SLI_OQ_PKTS_SENT(q_no))); + octeon_write_csr(oct, CN23XX_SLI_OQ_PKTS_SENT(q_no), + READ_ONCE(d32)); + } +} + static void cn23xx_get_pcie_qlmport(struct octeon_device *oct) { oct->pcie_port = (octeon_read_csr(oct, CN23XX_SLI_MAC_NUMBER)) & 0xff; @@ -693,6 +903,9 @@ int setup_cn23xx_octeon_pf_device(struct octeon_device *oct) oct->fn_list.setup_oq_regs = cn23xx_setup_oq_regs; oct->fn_list.setup_device_regs = cn23xx_setup_pf_device_regs; + oct->fn_list.enable_io_queues = cn23xx_enable_io_queues; + oct->fn_list.disable_io_queues = cn23xx_disable_io_queues; + cn23xx_setup_reg_address(oct); oct->coproc_clock_rate = 1000000ULL * cn23xx_coprocessor_clock(oct); diff --git a/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c b/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c index dc5d14a7effa..7ff72476c47e 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c +++ b/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c @@ -338,7 +338,7 @@ void lio_cn6xxx_setup_oq_regs(struct octeon_device *oct, u32 oq_no) octeon_write_csr(oct, CN6XXX_SLI_PKT_CNT_INT_ENB, intr); } -void lio_cn6xxx_enable_io_queues(struct octeon_device *oct) +int lio_cn6xxx_enable_io_queues(struct octeon_device *oct) { u32 mask; @@ -353,6 +353,8 @@ void lio_cn6xxx_enable_io_queues(struct octeon_device *oct) mask = octeon_read_csr(oct, CN6XXX_SLI_PKT_OUT_ENB); mask |= oct->io_qmask.oq; octeon_write_csr(oct, CN6XXX_SLI_PKT_OUT_ENB, mask); + + return 0; } void lio_cn6xxx_disable_io_queues(struct octeon_device *oct) diff --git a/drivers/net/ethernet/cavium/liquidio/cn66xx_device.h b/drivers/net/ethernet/cavium/liquidio/cn66xx_device.h index 2e4bc25065f0..5286f9ec2613 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn66xx_device.h +++ b/drivers/net/ethernet/cavium/liquidio/cn66xx_device.h @@ -80,7 +80,7 @@ void lio_cn6xxx_setup_global_input_regs(struct octeon_device *oct); void lio_cn6xxx_setup_global_output_regs(struct octeon_device *oct); void lio_cn6xxx_setup_iq_regs(struct octeon_device *oct, u32 iq_no); void lio_cn6xxx_setup_oq_regs(struct octeon_device *oct, u32 oq_no); -void lio_cn6xxx_enable_io_queues(struct octeon_device *oct); +int lio_cn6xxx_enable_io_queues(struct octeon_device *oct); void lio_cn6xxx_disable_io_queues(struct octeon_device *oct); irqreturn_t lio_cn6xxx_process_interrupt_regs(void *dev); void lio_cn6xxx_bar1_idx_setup(struct octeon_device *oct, u64 core_addr, diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index a00bd6c82799..cb5838137910 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -2094,10 +2094,8 @@ static inline int setup_io_queues(struct octeon_device *octeon_dev, droq = octeon_dev->droq[q_no]; napi = &droq->napi; - dev_dbg(&octeon_dev->pci_dev->dev, - "netif_napi_add netdev:%llx oct:%llx\n", - (u64)netdev, - (u64)octeon_dev); + dev_dbg(&octeon_dev->pci_dev->dev, "netif_napi_add netdev:%llx oct:%llx pf_num:%d\n", + (u64)netdev, (u64)octeon_dev, octeon_dev->pf_num); netif_napi_add(netdev, napi, liquidio_napi_poll, 64); /* designate a CPU for this droq */ @@ -3766,7 +3764,11 @@ static int octeon_device_init(struct octeon_device *octeon_dev) octeon_dev->fn_list.enable_interrupt(octeon_dev->chip); /* Enable the input and output queues for this Octeon device */ - octeon_dev->fn_list.enable_io_queues(octeon_dev); + ret = octeon_dev->fn_list.enable_io_queues(octeon_dev); + if (ret) { + dev_err(&octeon_dev->pci_dev->dev, "Failed to enable input/output queues"); + return ret; + } atomic_set(&octeon_dev->status, OCT_DEV_IO_QUEUES_DONE); diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.h b/drivers/net/ethernet/cavium/liquidio/octeon_device.h index c120613f7b0d..2439cc5192b1 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.h @@ -217,7 +217,7 @@ struct octeon_fn_list { void (*enable_interrupt)(void *); void (*disable_interrupt)(void *); - void (*enable_io_queues)(struct octeon_device *); + int (*enable_io_queues)(struct octeon_device *); void (*disable_io_queues)(struct octeon_device *); }; From 5b07aee11227fa4ccbf9b084e4fb44f655b135c0 Mon Sep 17 00:00:00 2001 From: Raghu Vatsavayi Date: Wed, 31 Aug 2016 11:03:28 -0700 Subject: [PATCH 0611/1715] liquidio: MSIX support for CN23XX This patch adds support msix interrupt for cn23xx device. Signed-off-by: Derek Chickles Signed-off-by: Satanand Burla Signed-off-by: Felix Manlunas Signed-off-by: Raghu Vatsavayi Signed-off-by: David S. Miller --- .../cavium/liquidio/cn23xx_pf_device.c | 166 +++++++++-- .../ethernet/cavium/liquidio/cn66xx_device.c | 10 +- .../ethernet/cavium/liquidio/cn66xx_device.h | 4 +- .../net/ethernet/cavium/liquidio/lio_main.c | 267 +++++++++++++++--- .../ethernet/cavium/liquidio/octeon_device.c | 39 +++ .../ethernet/cavium/liquidio/octeon_device.h | 33 ++- 6 files changed, 451 insertions(+), 68 deletions(-) diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c index 7e932a328e58..2e78101c3653 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c +++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c @@ -567,10 +567,16 @@ static void cn23xx_setup_iq_regs(struct octeon_device *oct, u32 iq_no) */ pkt_in_done = readq(iq->inst_cnt_reg); - /* Clear the count by writing back what we read, but don't - * enable interrupts - */ - writeq(pkt_in_done, iq->inst_cnt_reg); + if (oct->msix_on) { + /* Set CINT_ENB to enable IQ interrupt */ + writeq((pkt_in_done | CN23XX_INTR_CINT_ENB), + iq->inst_cnt_reg); + } else { + /* Clear the count by writing back what we read, but don't + * enable interrupts + */ + writeq(pkt_in_done, iq->inst_cnt_reg); + } iq->reset_instr_cnt = 0; } @@ -579,6 +585,9 @@ static void cn23xx_setup_oq_regs(struct octeon_device *oct, u32 oq_no) { u32 reg_val; struct octeon_droq *droq = oct->droq[oq_no]; + struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip; + u64 time_threshold; + u64 cnt_threshold; oq_no += oct->sriov_info.pf_srn; @@ -595,19 +604,31 @@ static void cn23xx_setup_oq_regs(struct octeon_device *oct, u32 oq_no) droq->pkts_credit_reg = (u8 *)oct->mmio[0].hw_addr + CN23XX_SLI_OQ_PKTS_CREDIT(oq_no); - /* Enable this output queue to generate Packet Timer Interrupt - */ - reg_val = octeon_read_csr(oct, CN23XX_SLI_OQ_PKT_CONTROL(oq_no)); - reg_val |= CN23XX_PKT_OUTPUT_CTL_TENB; - octeon_write_csr(oct, CN23XX_SLI_OQ_PKT_CONTROL(oq_no), - reg_val); + if (!oct->msix_on) { + /* Enable this output queue to generate Packet Timer Interrupt + */ + reg_val = + octeon_read_csr(oct, CN23XX_SLI_OQ_PKT_CONTROL(oq_no)); + reg_val |= CN23XX_PKT_OUTPUT_CTL_TENB; + octeon_write_csr(oct, CN23XX_SLI_OQ_PKT_CONTROL(oq_no), + reg_val); - /* Enable this output queue to generate Packet Count Interrupt - */ - reg_val = octeon_read_csr(oct, CN23XX_SLI_OQ_PKT_CONTROL(oq_no)); - reg_val |= CN23XX_PKT_OUTPUT_CTL_CENB; - octeon_write_csr(oct, CN23XX_SLI_OQ_PKT_CONTROL(oq_no), - reg_val); + /* Enable this output queue to generate Packet Count Interrupt + */ + reg_val = + octeon_read_csr(oct, CN23XX_SLI_OQ_PKT_CONTROL(oq_no)); + reg_val |= CN23XX_PKT_OUTPUT_CTL_CENB; + octeon_write_csr(oct, CN23XX_SLI_OQ_PKT_CONTROL(oq_no), + reg_val); + } else { + time_threshold = cn23xx_pf_get_oq_ticks( + oct, (u32)CFG_GET_OQ_INTR_TIME(cn23xx->conf)); + cnt_threshold = (u32)CFG_GET_OQ_INTR_PKT(cn23xx->conf); + + octeon_write_csr64( + oct, CN23XX_SLI_OQ_PKT_INT_LEVELS(oq_no), + ((time_threshold << 32 | cnt_threshold))); + } } static int cn23xx_enable_io_queues(struct octeon_device *oct) @@ -762,6 +783,110 @@ static void cn23xx_disable_io_queues(struct octeon_device *oct) } } +static u64 cn23xx_pf_msix_interrupt_handler(void *dev) +{ + struct octeon_ioq_vector *ioq_vector = (struct octeon_ioq_vector *)dev; + struct octeon_device *oct = ioq_vector->oct_dev; + u64 pkts_sent; + u64 ret = 0; + struct octeon_droq *droq = oct->droq[ioq_vector->droq_index]; + + dev_dbg(&oct->pci_dev->dev, "In %s octeon_dev @ %p\n", __func__, oct); + + if (!droq) { + dev_err(&oct->pci_dev->dev, "23XX bringup FIXME: oct pfnum:%d ioq_vector->ioq_num :%d droq is NULL\n", + oct->pf_num, ioq_vector->ioq_num); + return 0; + } + + pkts_sent = readq(droq->pkts_sent_reg); + + /* If our device has interrupted, then proceed. Also check + * for all f's if interrupt was triggered on an error + * and the PCI read fails. + */ + if (!pkts_sent || (pkts_sent == 0xFFFFFFFFFFFFFFFFULL)) + return ret; + + /* Write count reg in sli_pkt_cnts to clear these int.*/ + if ((pkts_sent & CN23XX_INTR_PO_INT) || + (pkts_sent & CN23XX_INTR_PI_INT)) { + if (pkts_sent & CN23XX_INTR_PO_INT) + ret |= MSIX_PO_INT; + } + + if (pkts_sent & CN23XX_INTR_PI_INT) + /* We will clear the count when we update the read_index. */ + ret |= MSIX_PI_INT; + + /* Never need to handle msix mbox intr for pf. They arrive on the last + * msix + */ + return ret; +} + +static irqreturn_t cn23xx_interrupt_handler(void *dev) +{ + struct octeon_device *oct = (struct octeon_device *)dev; + struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip; + u64 intr64; + + dev_dbg(&oct->pci_dev->dev, "In %s octeon_dev @ %p\n", __func__, oct); + intr64 = readq(cn23xx->intr_sum_reg64); + + oct->int_status = 0; + + if (intr64 & CN23XX_INTR_ERR) + dev_err(&oct->pci_dev->dev, "OCTEON[%d]: Error Intr: 0x%016llx\n", + oct->octeon_id, CVM_CAST64(intr64)); + + if (oct->msix_on != LIO_FLAG_MSIX_ENABLED) { + if (intr64 & CN23XX_INTR_PKT_DATA) + oct->int_status |= OCT_DEV_INTR_PKT_DATA; + } + + if (intr64 & (CN23XX_INTR_DMA0_FORCE)) + oct->int_status |= OCT_DEV_INTR_DMA0_FORCE; + if (intr64 & (CN23XX_INTR_DMA1_FORCE)) + oct->int_status |= OCT_DEV_INTR_DMA1_FORCE; + + /* Clear the current interrupts */ + writeq(intr64, cn23xx->intr_sum_reg64); + + return IRQ_HANDLED; +} + +static void cn23xx_enable_pf_interrupt(struct octeon_device *oct, u8 intr_flag) +{ + struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip; + u64 intr_val = 0; + + /* Divide the single write to multiple writes based on the flag. */ + /* Enable Interrupt */ + if (intr_flag == OCTEON_ALL_INTR) { + writeq(cn23xx->intr_mask64, cn23xx->intr_enb_reg64); + } else if (intr_flag & OCTEON_OUTPUT_INTR) { + intr_val = readq(cn23xx->intr_enb_reg64); + intr_val |= CN23XX_INTR_PKT_DATA; + writeq(intr_val, cn23xx->intr_enb_reg64); + } +} + +static void cn23xx_disable_pf_interrupt(struct octeon_device *oct, u8 intr_flag) +{ + struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip; + u64 intr_val = 0; + + /* Disable Interrupts */ + if (intr_flag == OCTEON_ALL_INTR) { + writeq(0, cn23xx->intr_enb_reg64); + } else if (intr_flag & OCTEON_OUTPUT_INTR) { + intr_val = readq(cn23xx->intr_enb_reg64); + intr_val &= ~CN23XX_INTR_PKT_DATA; + writeq(intr_val, cn23xx->intr_enb_reg64); + } +} + static void cn23xx_get_pcie_qlmport(struct octeon_device *oct) { oct->pcie_port = (octeon_read_csr(oct, CN23XX_SLI_MAC_NUMBER)) & 0xff; @@ -816,7 +941,8 @@ static void cn23xx_setup_reg_address(struct octeon_device *oct) cn23xx_get_pcie_qlmport(oct); cn23xx->intr_mask64 = CN23XX_INTR_MASK; - cn23xx->intr_mask64 |= CN23XX_INTR_PKT_TIME; + if (!oct->msix_on) + cn23xx->intr_mask64 |= CN23XX_INTR_PKT_TIME; if (oct->rev_id >= OCTEON_CN23XX_REV_1_1) cn23xx->intr_mask64 |= CN23XX_INTR_VF_MBOX; @@ -901,8 +1027,14 @@ int setup_cn23xx_octeon_pf_device(struct octeon_device *oct) oct->fn_list.setup_iq_regs = cn23xx_setup_iq_regs; oct->fn_list.setup_oq_regs = cn23xx_setup_oq_regs; + oct->fn_list.process_interrupt_regs = cn23xx_interrupt_handler; + oct->fn_list.msix_interrupt_handler = cn23xx_pf_msix_interrupt_handler; + oct->fn_list.setup_device_regs = cn23xx_setup_pf_device_regs; + oct->fn_list.enable_interrupt = cn23xx_enable_pf_interrupt; + oct->fn_list.disable_interrupt = cn23xx_disable_pf_interrupt; + oct->fn_list.enable_io_queues = cn23xx_enable_io_queues; oct->fn_list.disable_io_queues = cn23xx_disable_io_queues; diff --git a/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c b/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c index 7ff72476c47e..e779af88621b 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c +++ b/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c @@ -479,18 +479,20 @@ lio_cn6xxx_update_read_index(struct octeon_instr_queue *iq) return new_idx; } -void lio_cn6xxx_enable_interrupt(void *chip) +void lio_cn6xxx_enable_interrupt(struct octeon_device *oct, + u8 unused __attribute__((unused))) { - struct octeon_cn6xxx *cn6xxx = (struct octeon_cn6xxx *)chip; + struct octeon_cn6xxx *cn6xxx = (struct octeon_cn6xxx *)oct->chip; u64 mask = cn6xxx->intr_mask64 | CN6XXX_INTR_DMA0_FORCE; /* Enable Interrupt */ writeq(mask, cn6xxx->intr_enb_reg64); } -void lio_cn6xxx_disable_interrupt(void *chip) +void lio_cn6xxx_disable_interrupt(struct octeon_device *oct, + u8 unused __attribute__((unused))) { - struct octeon_cn6xxx *cn6xxx = (struct octeon_cn6xxx *)chip; + struct octeon_cn6xxx *cn6xxx = (struct octeon_cn6xxx *)oct->chip; /* Disable Interrupts */ writeq(0, cn6xxx->intr_enb_reg64); diff --git a/drivers/net/ethernet/cavium/liquidio/cn66xx_device.h b/drivers/net/ethernet/cavium/liquidio/cn66xx_device.h index 5286f9ec2613..a40a91394079 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn66xx_device.h +++ b/drivers/net/ethernet/cavium/liquidio/cn66xx_device.h @@ -89,8 +89,8 @@ void lio_cn6xxx_bar1_idx_write(struct octeon_device *oct, u32 idx, u32 mask); u32 lio_cn6xxx_bar1_idx_read(struct octeon_device *oct, u32 idx); u32 lio_cn6xxx_update_read_index(struct octeon_instr_queue *iq); -void lio_cn6xxx_enable_interrupt(void *chip); -void lio_cn6xxx_disable_interrupt(void *chip); +void lio_cn6xxx_enable_interrupt(struct octeon_device *oct, u8 unused); +void lio_cn6xxx_disable_interrupt(struct octeon_device *oct, u8 unused); void cn6xxx_get_pcie_qlmport(struct octeon_device *oct); void lio_cn6xxx_setup_reg_address(struct octeon_device *oct, void *chip, struct octeon_reg_list *reg_list); diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index cb5838137910..464d42bdaca2 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -195,6 +195,19 @@ static void octeon_droq_bh(unsigned long pdev) reschedule |= octeon_droq_process_packets(oct, oct->droq[q_no], MAX_PACKET_BUDGET); lio_enable_irq(oct->droq[q_no], NULL); + + if (OCTEON_CN23XX_PF(oct) && oct->msix_on) { + /* set time and cnt interrupt thresholds for this DROQ + * for NAPI + */ + int adjusted_q_no = q_no + oct->sriov_info.pf_srn; + + octeon_write_csr64( + oct, CN23XX_SLI_OQ_PKT_INT_LEVELS(adjusted_q_no), + 0x5700000040ULL); + octeon_write_csr64( + oct, CN23XX_SLI_OQ_PKTS_SENT(adjusted_q_no), 0); + } } if (reschedule) @@ -347,7 +360,7 @@ static void stop_pci_io(struct octeon_device *oct) pci_disable_device(oct->pci_dev); /* Disable interrupts */ - oct->fn_list.disable_interrupt(oct->chip); + oct->fn_list.disable_interrupt(oct, OCTEON_ALL_INTR); pcierror_quiesce_device(oct); @@ -910,6 +923,27 @@ static void update_txq_status(struct octeon_device *oct, int iq_num) } } +static +int liquidio_schedule_msix_droq_pkt_handler(struct octeon_droq *droq, u64 ret) +{ + struct octeon_device *oct = droq->oct_dev; + struct octeon_device_priv *oct_priv = + (struct octeon_device_priv *)oct->priv; + + if (droq->ops.poll_mode) { + droq->ops.napi_fn(droq); + } else { + if (ret & MSIX_PO_INT) { + tasklet_schedule(&oct_priv->droq_tasklet); + return 1; + } + /* this will be flushed periodically by check iq db */ + if (ret & MSIX_PI_INT) + return 0; + } + return 0; +} + /** * \brief Droq packet processor sceduler * @param oct octeon device @@ -940,19 +974,36 @@ void liquidio_schedule_droq_pkt_handlers(struct octeon_device *oct) } } +static irqreturn_t +liquidio_msix_intr_handler(int irq __attribute__((unused)), void *dev) +{ + u64 ret; + struct octeon_ioq_vector *ioq_vector = (struct octeon_ioq_vector *)dev; + struct octeon_device *oct = ioq_vector->oct_dev; + struct octeon_droq *droq = oct->droq[ioq_vector->droq_index]; + + ret = oct->fn_list.msix_interrupt_handler(ioq_vector); + + if ((ret & MSIX_PO_INT) || (ret & MSIX_PI_INT)) + liquidio_schedule_msix_droq_pkt_handler(droq, ret); + + return IRQ_HANDLED; +} + /** * \brief Interrupt handler for octeon * @param irq unused * @param dev octeon device */ static -irqreturn_t liquidio_intr_handler(int irq __attribute__((unused)), void *dev) +irqreturn_t liquidio_legacy_intr_handler(int irq __attribute__((unused)), + void *dev) { struct octeon_device *oct = (struct octeon_device *)dev; irqreturn_t ret; /* Disable our interrupts for the duration of ISR */ - oct->fn_list.disable_interrupt(oct->chip); + oct->fn_list.disable_interrupt(oct, OCTEON_ALL_INTR); ret = oct->fn_list.process_interrupt_regs(oct); @@ -961,7 +1012,7 @@ irqreturn_t liquidio_intr_handler(int irq __attribute__((unused)), void *dev) /* Re-enable our interrupts */ if (!(atomic_read(&oct->status) == OCT_DEV_IN_RESET)) - oct->fn_list.enable_interrupt(oct->chip); + oct->fn_list.enable_interrupt(oct, OCTEON_ALL_INTR); return ret; } @@ -975,24 +1026,110 @@ irqreturn_t liquidio_intr_handler(int irq __attribute__((unused)), void *dev) static int octeon_setup_interrupt(struct octeon_device *oct) { int irqret, err; + struct msix_entry *msix_entries; + int i; + int num_ioq_vectors; + int num_alloc_ioq_vectors; - err = pci_enable_msi(oct->pci_dev); - if (err) - dev_warn(&oct->pci_dev->dev, "Reverting to legacy interrupts. Error: %d\n", - err); - else - oct->flags |= LIO_FLAG_MSI_ENABLED; + if (OCTEON_CN23XX_PF(oct) && oct->msix_on) { + oct->num_msix_irqs = oct->sriov_info.num_pf_rings; + /* one non ioq interrupt for handling sli_mac_pf_int_sum */ + oct->num_msix_irqs += 1; - irqret = request_irq(oct->pci_dev->irq, liquidio_intr_handler, - IRQF_SHARED, "octeon", oct); - if (irqret) { - if (oct->flags & LIO_FLAG_MSI_ENABLED) - pci_disable_msi(oct->pci_dev); - dev_err(&oct->pci_dev->dev, "Request IRQ failed with code: %d\n", - irqret); - return 1; + oct->msix_entries = kcalloc( + oct->num_msix_irqs, sizeof(struct msix_entry), GFP_KERNEL); + if (!oct->msix_entries) + return 1; + + msix_entries = (struct msix_entry *)oct->msix_entries; + /*Assumption is that pf msix vectors start from pf srn to pf to + * trs and not from 0. if not change this code + */ + for (i = 0; i < oct->num_msix_irqs - 1; i++) + msix_entries[i].entry = oct->sriov_info.pf_srn + i; + msix_entries[oct->num_msix_irqs - 1].entry = + oct->sriov_info.trs; + num_alloc_ioq_vectors = pci_enable_msix_range( + oct->pci_dev, msix_entries, + oct->num_msix_irqs, + oct->num_msix_irqs); + if (num_alloc_ioq_vectors < 0) { + dev_err(&oct->pci_dev->dev, "unable to Allocate MSI-X interrupts\n"); + kfree(oct->msix_entries); + oct->msix_entries = NULL; + return 1; + } + dev_dbg(&oct->pci_dev->dev, "OCTEON: Enough MSI-X interrupts are allocated...\n"); + + num_ioq_vectors = oct->num_msix_irqs; + + /** For PF, there is one non-ioq interrupt handler */ + num_ioq_vectors -= 1; + irqret = request_irq(msix_entries[num_ioq_vectors].vector, + liquidio_legacy_intr_handler, 0, "octeon", + oct); + if (irqret) { + dev_err(&oct->pci_dev->dev, + "OCTEON: Request_irq failed for MSIX interrupt Error: %d\n", + irqret); + pci_disable_msix(oct->pci_dev); + kfree(oct->msix_entries); + oct->msix_entries = NULL; + return 1; + } + + for (i = 0; i < num_ioq_vectors; i++) { + irqret = request_irq(msix_entries[i].vector, + liquidio_msix_intr_handler, 0, + "octeon", &oct->ioq_vector[i]); + if (irqret) { + dev_err(&oct->pci_dev->dev, + "OCTEON: Request_irq failed for MSIX interrupt Error: %d\n", + irqret); + /** Freeing the non-ioq irq vector here . */ + free_irq(msix_entries[num_ioq_vectors].vector, + oct); + + while (i) { + i--; + /** clearing affinity mask. */ + irq_set_affinity_hint( + msix_entries[i].vector, NULL); + free_irq(msix_entries[i].vector, + &oct->ioq_vector[i]); + } + pci_disable_msix(oct->pci_dev); + kfree(oct->msix_entries); + oct->msix_entries = NULL; + return 1; + } + oct->ioq_vector[i].vector = msix_entries[i].vector; + /* assign the cpu mask for this msix interrupt vector */ + irq_set_affinity_hint( + msix_entries[i].vector, + (&oct->ioq_vector[i].affinity_mask)); + } + dev_dbg(&oct->pci_dev->dev, "OCTEON[%d]: MSI-X enabled\n", + oct->octeon_id); + } else { + err = pci_enable_msi(oct->pci_dev); + if (err) + dev_warn(&oct->pci_dev->dev, "Reverting to legacy interrupts. Error: %d\n", + err); + else + oct->flags |= LIO_FLAG_MSI_ENABLED; + + irqret = request_irq(oct->pci_dev->irq, + liquidio_legacy_intr_handler, IRQF_SHARED, + "octeon", oct); + if (irqret) { + if (oct->flags & LIO_FLAG_MSI_ENABLED) + pci_disable_msi(oct->pci_dev); + dev_err(&oct->pci_dev->dev, "Request IRQ failed with code: %d\n", + irqret); + return 1; + } } - return 0; } @@ -1015,6 +1152,9 @@ liquidio_probe(struct pci_dev *pdev, return -ENOMEM; } + if (pdev->device == OCTEON_CN23XX_PF_VID) + oct_dev->msix_on = LIO_FLAG_MSIX_ENABLED; + dev_info(&pdev->dev, "Initializing device %x:%x.\n", (u32)pdev->vendor, (u32)pdev->device); @@ -1054,6 +1194,7 @@ liquidio_probe(struct pci_dev *pdev, static void octeon_destroy_resources(struct octeon_device *oct) { int i; + struct msix_entry *msix_entries; struct octeon_device_priv *oct_priv = (struct octeon_device_priv *)oct->priv; @@ -1098,21 +1239,40 @@ static void octeon_destroy_resources(struct octeon_device *oct) dev_err(&oct->pci_dev->dev, "OQ had pending packets\n"); /* Disable interrupts */ - oct->fn_list.disable_interrupt(oct->chip); + oct->fn_list.disable_interrupt(oct, OCTEON_ALL_INTR); - /* Release the interrupt line */ - free_irq(oct->pci_dev->irq, oct); + if (oct->msix_on) { + msix_entries = (struct msix_entry *)oct->msix_entries; + for (i = 0; i < oct->num_msix_irqs - 1; i++) { + /* clear the affinity_cpumask */ + irq_set_affinity_hint(msix_entries[i].vector, + NULL); + free_irq(msix_entries[i].vector, + &oct->ioq_vector[i]); + } + /* non-iov vector's argument is oct struct */ + free_irq(msix_entries[i].vector, oct); - if (oct->flags & LIO_FLAG_MSI_ENABLED) - pci_disable_msi(oct->pci_dev); + pci_disable_msix(oct->pci_dev); + kfree(oct->msix_entries); + oct->msix_entries = NULL; + } else { + /* Release the interrupt line */ + free_irq(oct->pci_dev->irq, oct); - /* fallthrough */ + if (oct->flags & LIO_FLAG_MSI_ENABLED) + pci_disable_msi(oct->pci_dev); + } + + if (OCTEON_CN23XX_PF(oct)) + octeon_free_ioq_vector(oct); + /* fallthrough */ case OCT_DEV_IN_RESET: case OCT_DEV_DROQ_INIT_DONE: /*atomic_set(&oct->status, OCT_DEV_DROQ_INIT_DONE);*/ mdelay(100); for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES(oct); i++) { - if (!(oct->io_qmask.oq & (1ULL << i))) + if (!(oct->io_qmask.oq & BIT_ULL(i))) continue; octeon_delete_droq(oct, i); } @@ -2147,7 +2307,7 @@ static void octnet_poll_check_txq_status(struct work_struct *work) * \brief Sets up the txq poll check * @param netdev network device */ -static inline void setup_tx_poll_fn(struct net_device *netdev) +static inline int setup_tx_poll_fn(struct net_device *netdev) { struct lio *lio = GET_LIO(netdev); struct octeon_device *oct = lio->oct_dev; @@ -2156,21 +2316,24 @@ static inline void setup_tx_poll_fn(struct net_device *netdev) WQ_MEM_RECLAIM, 0); if (!lio->txq_status_wq.wq) { dev_err(&oct->pci_dev->dev, "unable to create cavium txq status wq\n"); - return; + return -1; } INIT_DELAYED_WORK(&lio->txq_status_wq.wk.work, octnet_poll_check_txq_status); lio->txq_status_wq.wk.ctxptr = lio; queue_delayed_work(lio->txq_status_wq.wq, &lio->txq_status_wq.wk.work, msecs_to_jiffies(1)); + return 0; } static inline void cleanup_tx_poll_fn(struct net_device *netdev) { struct lio *lio = GET_LIO(netdev); - cancel_delayed_work_sync(&lio->txq_status_wq.wk.work); - destroy_workqueue(lio->txq_status_wq.wq); + if (lio->txq_status_wq.wq) { + cancel_delayed_work_sync(&lio->txq_status_wq.wk.work); + destroy_workqueue(lio->txq_status_wq.wq); + } } /** @@ -2194,7 +2357,14 @@ static int liquidio_open(struct net_device *netdev) ifstate_set(lio, LIO_IFSTATE_RUNNING); - setup_tx_poll_fn(netdev); + if (OCTEON_CN23XX_PF(oct)) { + if (!oct->msix_on) + if (setup_tx_poll_fn(netdev)) + return -1; + } else { + if (setup_tx_poll_fn(netdev)) + return -1; + } start_txq(netdev); @@ -2240,7 +2410,12 @@ static int liquidio_stop(struct net_device *netdev) /* Now it should be safe to tell Octeon that nic interface is down. */ send_rx_ctrl_cmd(lio, 0); - cleanup_tx_poll_fn(netdev); + if (OCTEON_CN23XX_PF(oct)) { + if (!oct->msix_on) + cleanup_tx_poll_fn(netdev); + } else { + cleanup_tx_poll_fn(netdev); + } if (lio->ptp_clock) { ptp_clock_unregister(lio->ptp_clock); @@ -3739,15 +3914,23 @@ static int octeon_device_init(struct octeon_device *octeon_dev) atomic_set(&octeon_dev->status, OCT_DEV_DROQ_INIT_DONE); - /* The input and output queue registers were setup earlier (the queues - * were not enabled). Any additional registers that need to be - * programmed should be done now. - */ - ret = octeon_dev->fn_list.setup_device_regs(octeon_dev); - if (ret) { - dev_err(&octeon_dev->pci_dev->dev, - "Failed to configure device registers\n"); - return ret; + if (OCTEON_CN23XX_PF(octeon_dev)) { + if (octeon_allocate_ioq_vector(octeon_dev)) { + dev_err(&octeon_dev->pci_dev->dev, "OCTEON: ioq vector allocation failed\n"); + return 1; + } + + } else { + /* The input and output queue registers were setup earlier (the + * queues were not enabled). Any additional registers + * that need to be programmed should be done now. + */ + ret = octeon_dev->fn_list.setup_device_regs(octeon_dev); + if (ret) { + dev_err(&octeon_dev->pci_dev->dev, + "Failed to configure device registers\n"); + return ret; + } } /* Initialize the tasklet that handles output queue packet processing.*/ @@ -3761,7 +3944,7 @@ static int octeon_device_init(struct octeon_device *octeon_dev) return 1; /* Enable Octeon device interrupts */ - octeon_dev->fn_list.enable_interrupt(octeon_dev->chip); + octeon_dev->fn_list.enable_interrupt(octeon_dev, OCTEON_ALL_INTR); /* Enable the input and output queues for this Octeon device */ ret = octeon_dev->fn_list.enable_io_queues(octeon_dev); diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.c b/drivers/net/ethernet/cavium/liquidio/octeon_device.c index 120b78e8bc01..52527638d413 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.c @@ -746,6 +746,45 @@ struct octeon_device *octeon_allocate_device(u32 pci_id, return oct; } +int +octeon_allocate_ioq_vector(struct octeon_device *oct) +{ + int i, num_ioqs = 0; + struct octeon_ioq_vector *ioq_vector; + int cpu_num; + int size; + + if (OCTEON_CN23XX_PF(oct)) + num_ioqs = oct->sriov_info.num_pf_rings; + size = sizeof(struct octeon_ioq_vector) * num_ioqs; + + oct->ioq_vector = vmalloc(size); + if (!oct->ioq_vector) + return 1; + memset(oct->ioq_vector, 0, size); + for (i = 0; i < num_ioqs; i++) { + ioq_vector = &oct->ioq_vector[i]; + ioq_vector->oct_dev = oct; + ioq_vector->iq_index = i; + ioq_vector->droq_index = i; + + cpu_num = i % num_online_cpus(); + cpumask_set_cpu(cpu_num, &ioq_vector->affinity_mask); + + if (oct->chip_id == OCTEON_CN23XX_PF_VID) + ioq_vector->ioq_num = i + oct->sriov_info.pf_srn; + else + ioq_vector->ioq_num = i; + } + return 0; +} + +void +octeon_free_ioq_vector(struct octeon_device *oct) +{ + vfree(oct->ioq_vector); +} + /* this function is only for setting up the first queue */ int octeon_setup_instr_queues(struct octeon_device *oct) { diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.h b/drivers/net/ethernet/cavium/liquidio/octeon_device.h index 2439cc5192b1..99fc1d899208 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.h @@ -52,6 +52,9 @@ enum octeon_pci_swap_mode { OCTEON_PCI_32BIT_LW_SWAP = 3 }; +#define OCTEON_OUTPUT_INTR (2) +#define OCTEON_ALL_INTR 0xff + /*--------------- PCI BAR1 index registers -------------*/ /* BAR1 Mask */ @@ -204,6 +207,7 @@ struct octeon_fn_list { void (*setup_oq_regs)(struct octeon_device *, u32); irqreturn_t (*process_interrupt_regs)(void *); + u64 (*msix_interrupt_handler)(void *); int (*soft_reset)(struct octeon_device *); int (*setup_device_regs)(struct octeon_device *); void (*bar1_idx_setup)(struct octeon_device *, u64, u32, int); @@ -214,8 +218,8 @@ struct octeon_fn_list { void (*enable_oq_pkt_time_intr)(struct octeon_device *, u32); void (*disable_oq_pkt_time_intr)(struct octeon_device *, u32); - void (*enable_interrupt)(void *); - void (*disable_interrupt)(void *); + void (*enable_interrupt)(struct octeon_device *, u8); + void (*disable_interrupt)(struct octeon_device *, u8); int (*enable_io_queues)(struct octeon_device *); void (*disable_io_queues)(struct octeon_device *); @@ -276,6 +280,10 @@ struct octdev_props { struct net_device *netdev; }; +#define LIO_FLAG_MSIX_ENABLED 0x1 +#define MSIX_PO_INT 0x1 +#define MSIX_PI_INT 0x2 + struct octeon_pf_vf_hs_word { #ifdef __LITTLE_ENDIAN_BITFIELD /** PKIND value assigned for the DPI interface */ @@ -323,6 +331,15 @@ struct octeon_sriov_info { }; +struct octeon_ioq_vector { + struct octeon_device *oct_dev; + int iq_index; + int droq_index; + int vector; + struct cpumask affinity_mask; + u32 ioq_num; +}; + /** The Octeon device. * Each Octeon device has this structure to represent all its * components. @@ -357,7 +374,6 @@ struct octeon_device { u16 flags; #define LIO_FLAG_MSI_ENABLED (u32)(1 << 1) -#define LIO_FLAG_MSIX_ENABLED (u32)(1 << 2) /** The state of this device */ atomic_t status; @@ -447,10 +463,19 @@ struct octeon_device { void *priv; + int num_msix_irqs; + + void *msix_entries; + struct octeon_sriov_info sriov_info; struct octeon_pf_vf_hs_word pfvf_hsword; + int msix_on; + + /** IOq information of it's corresponding MSI-X interrupt. */ + struct octeon_ioq_vector *ioq_vector; + int rx_pause; int tx_pause; @@ -718,6 +743,8 @@ void *oct_get_config_info(struct octeon_device *oct, u16 card_type); */ struct octeon_config *octeon_get_conf(struct octeon_device *oct); +void octeon_free_ioq_vector(struct octeon_device *oct); +int octeon_allocate_ioq_vector(struct octeon_device *oct); void lio_enable_irq(struct octeon_droq *droq, struct octeon_instr_queue *iq); /* LiquidIO driver pivate flags */ From c0eab5b3580af67196b6b4a59db8ed44fc5ed46c Mon Sep 17 00:00:00 2001 From: Raghu Vatsavayi Date: Wed, 31 Aug 2016 11:03:29 -0700 Subject: [PATCH 0612/1715] liquidio: CN23XX firmware download Add firmware download support for cn23xx device. Signed-off-by: Derek Chickles Signed-off-by: Satanand Burla Signed-off-by: Felix Manlunas Signed-off-by: Raghu Vatsavayi Signed-off-by: David S. Miller --- .../cavium/liquidio/cn23xx_pf_device.c | 40 ++++++ .../cavium/liquidio/cn23xx_pf_device.h | 2 + .../net/ethernet/cavium/liquidio/lio_main.c | 115 +++++++++++------- 3 files changed, 111 insertions(+), 46 deletions(-) diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c index 2e78101c3653..2d812064731a 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c +++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c @@ -214,6 +214,37 @@ void cn23xx_dump_pf_initialized_regs(struct octeon_device *oct) CVM_CAST64(octeon_read_csr64(oct, CN23XX_SLI_PKT_CNT_INT))); } +static int cn23xx_pf_soft_reset(struct octeon_device *oct) +{ + octeon_write_csr64(oct, CN23XX_WIN_WR_MASK_REG, 0xFF); + + dev_dbg(&oct->pci_dev->dev, "OCTEON[%d]: BIST enabled for CN23XX soft reset\n", + oct->octeon_id); + + octeon_write_csr64(oct, CN23XX_SLI_SCRATCH1, 0x1234ULL); + + /* Initiate chip-wide soft reset */ + lio_pci_readq(oct, CN23XX_RST_SOFT_RST); + lio_pci_writeq(oct, 1, CN23XX_RST_SOFT_RST); + + /* Wait for 100ms as Octeon resets. */ + mdelay(100); + + if (octeon_read_csr64(oct, CN23XX_SLI_SCRATCH1) == 0x1234ULL) { + dev_err(&oct->pci_dev->dev, "OCTEON[%d]: Soft reset failed\n", + oct->octeon_id); + return 1; + } + + dev_dbg(&oct->pci_dev->dev, "OCTEON[%d]: Reset completed\n", + oct->octeon_id); + + /* restore the reset value*/ + octeon_write_csr64(oct, CN23XX_WIN_WR_MASK_REG, 0xFF); + + return 0; +} + static void cn23xx_enable_error_reporting(struct octeon_device *oct) { u32 regval; @@ -1030,6 +1061,7 @@ int setup_cn23xx_octeon_pf_device(struct octeon_device *oct) oct->fn_list.process_interrupt_regs = cn23xx_interrupt_handler; oct->fn_list.msix_interrupt_handler = cn23xx_pf_msix_interrupt_handler; + oct->fn_list.soft_reset = cn23xx_pf_soft_reset; oct->fn_list.setup_device_regs = cn23xx_setup_pf_device_regs; oct->fn_list.enable_interrupt = cn23xx_enable_pf_interrupt; @@ -1129,3 +1161,11 @@ void cn23xx_dump_iq_regs(struct octeon_device *oct) CVM_CAST64(octeon_read_csr64( oct, CN23XX_SLI_S2M_PORTX_CTL(oct->pcie_port)))); } + +int cn23xx_fw_loaded(struct octeon_device *oct) +{ + u64 val; + + val = octeon_read_csr64(oct, CN23XX_SLI_SCRATCH1); + return (val >> 1) & 1ULL; +} diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h index 36252e7ced1a..33b758992eb2 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h +++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h @@ -52,4 +52,6 @@ int validate_cn23xx_pf_config_info(struct octeon_device *oct, struct octeon_config *conf23xx); void cn23xx_dump_pf_initialized_regs(struct octeon_device *oct); + +int cn23xx_fw_loaded(struct octeon_device *oct); #endif diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index 464d42bdaca2..866c075cc7ea 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -1312,9 +1312,9 @@ static void octeon_destroy_resources(struct octeon_device *oct) /* fallthrough */ case OCT_DEV_PCI_MAP_DONE: - /* Soft reset the octeon device before exiting */ - oct->fn_list.soft_reset(oct); + if ((!OCTEON_CN23XX_PF(oct)) || !oct->octeon_id) + oct->fn_list.soft_reset(oct); octeon_unmap_pci_barx(oct, 0); octeon_unmap_pci_barx(oct, 1); @@ -3823,6 +3823,7 @@ static void nic_starter(struct work_struct *work) static int octeon_device_init(struct octeon_device *octeon_dev) { int j, ret; + int fw_loaded = 0; char bootcmd[] = "\n"; struct octeon_device_priv *oct_priv = (struct octeon_device_priv *)octeon_dev->priv; @@ -3844,9 +3845,23 @@ static int octeon_device_init(struct octeon_device *octeon_dev) octeon_dev->app_mode = CVM_DRV_INVALID_APP; - /* Do a soft reset of the Octeon device. */ - if (octeon_dev->fn_list.soft_reset(octeon_dev)) + if (OCTEON_CN23XX_PF(octeon_dev)) { + if (!cn23xx_fw_loaded(octeon_dev)) { + fw_loaded = 0; + /* Do a soft reset of the Octeon device. */ + if (octeon_dev->fn_list.soft_reset(octeon_dev)) + return 1; + /* things might have changed */ + if (!cn23xx_fw_loaded(octeon_dev)) + fw_loaded = 0; + else + fw_loaded = 1; + } else { + fw_loaded = 1; + } + } else if (octeon_dev->fn_list.soft_reset(octeon_dev)) { return 1; + } /* Initialize the dispatch mechanism used to push packets arriving on * Octeon Output queues. @@ -3955,56 +3970,65 @@ static int octeon_device_init(struct octeon_device *octeon_dev) atomic_set(&octeon_dev->status, OCT_DEV_IO_QUEUES_DONE); - dev_dbg(&octeon_dev->pci_dev->dev, "Waiting for DDR initialization...\n"); + if ((!OCTEON_CN23XX_PF(octeon_dev)) || !fw_loaded) { + dev_dbg(&octeon_dev->pci_dev->dev, "Waiting for DDR initialization...\n"); + if (!ddr_timeout) { + dev_info(&octeon_dev->pci_dev->dev, + "WAITING. Set ddr_timeout to non-zero value to proceed with initialization.\n"); + } - if (ddr_timeout == 0) - dev_info(&octeon_dev->pci_dev->dev, "WAITING. Set ddr_timeout to non-zero value to proceed with initialization.\n"); + schedule_timeout_uninterruptible(HZ * LIO_RESET_SECS); - schedule_timeout_uninterruptible(HZ * LIO_RESET_SECS); - - /* Wait for the octeon to initialize DDR after the soft-reset. */ - while (ddr_timeout == 0) { - set_current_state(TASK_INTERRUPTIBLE); - if (schedule_timeout(HZ / 10)) { - /* user probably pressed Control-C */ + /* Wait for the octeon to initialize DDR after the soft-reset.*/ + while (!ddr_timeout) { + set_current_state(TASK_INTERRUPTIBLE); + if (schedule_timeout(HZ / 10)) { + /* user probably pressed Control-C */ + return 1; + } + } + ret = octeon_wait_for_ddr_init(octeon_dev, &ddr_timeout); + if (ret) { + dev_err(&octeon_dev->pci_dev->dev, + "DDR not initialized. Please confirm that board is configured to boot from Flash, ret: %d\n", + ret); return 1; } - } - ret = octeon_wait_for_ddr_init(octeon_dev, &ddr_timeout); - if (ret) { - dev_err(&octeon_dev->pci_dev->dev, - "DDR not initialized. Please confirm that board is configured to boot from Flash, ret: %d\n", - ret); - return 1; - } - if (octeon_wait_for_bootloader(octeon_dev, 1000) != 0) { - dev_err(&octeon_dev->pci_dev->dev, "Board not responding\n"); - return 1; - } + if (octeon_wait_for_bootloader(octeon_dev, 1000)) { + dev_err(&octeon_dev->pci_dev->dev, "Board not responding\n"); + return 1; + } - /* Divert uboot to take commands from host instead. */ - ret = octeon_console_send_cmd(octeon_dev, bootcmd, 50); + /* Divert uboot to take commands from host instead. */ + ret = octeon_console_send_cmd(octeon_dev, bootcmd, 50); - dev_dbg(&octeon_dev->pci_dev->dev, "Initializing consoles\n"); - ret = octeon_init_consoles(octeon_dev); - if (ret) { - dev_err(&octeon_dev->pci_dev->dev, "Could not access board consoles\n"); - return 1; - } - ret = octeon_add_console(octeon_dev, 0); - if (ret) { - dev_err(&octeon_dev->pci_dev->dev, "Could not access board console\n"); - return 1; - } + dev_dbg(&octeon_dev->pci_dev->dev, "Initializing consoles\n"); + ret = octeon_init_consoles(octeon_dev); + if (ret) { + dev_err(&octeon_dev->pci_dev->dev, "Could not access board consoles\n"); + return 1; + } + ret = octeon_add_console(octeon_dev, 0); + if (ret) { + dev_err(&octeon_dev->pci_dev->dev, "Could not access board console\n"); + return 1; + } - atomic_set(&octeon_dev->status, OCT_DEV_CONSOLE_INIT_DONE); + atomic_set(&octeon_dev->status, OCT_DEV_CONSOLE_INIT_DONE); - dev_dbg(&octeon_dev->pci_dev->dev, "Loading firmware\n"); - ret = load_firmware(octeon_dev); - if (ret) { - dev_err(&octeon_dev->pci_dev->dev, "Could not load firmware to board\n"); - return 1; + dev_dbg(&octeon_dev->pci_dev->dev, "Loading firmware\n"); + ret = load_firmware(octeon_dev); + if (ret) { + dev_err(&octeon_dev->pci_dev->dev, "Could not load firmware to board\n"); + return 1; + } + /* set bit 1 of SLI_SCRATCH_1 to indicate that firmware is + * loaded + */ + if (OCTEON_CN23XX_PF(octeon_dev)) + octeon_write_csr64(octeon_dev, CN23XX_SLI_SCRATCH1, + 2ULL); } handshake[octeon_dev->octeon_id].init_ok = 1; @@ -4020,7 +4044,6 @@ static int octeon_device_init(struct octeon_device *octeon_dev) octeon_dev->droq[j]->pkts_credit_reg); /* Packets can start arriving on the output queues from this point. */ - return 0; } From 8df3025520aaeba36aba867a4851f8968ac65b4d Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Wed, 31 Aug 2016 11:50:03 -0400 Subject: [PATCH 0613/1715] net: dsa: add MDB support Add SWITCHDEV_OBJ_ID_PORT_MDB support to the DSA layer. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- Documentation/networking/dsa/dsa.txt | 23 ++++++++++++ include/net/dsa.h | 16 ++++++++ net/dsa/slave.c | 55 ++++++++++++++++++++++++++++ 3 files changed, 94 insertions(+) diff --git a/Documentation/networking/dsa/dsa.txt b/Documentation/networking/dsa/dsa.txt index a4e55c76d371..6d6c07cf1a9a 100644 --- a/Documentation/networking/dsa/dsa.txt +++ b/Documentation/networking/dsa/dsa.txt @@ -584,6 +584,29 @@ of DSA, would be the its port-based VLAN, used by the associated bridge device. function that the driver has to call for each MAC address known to be behind the given port. A switchdev object is used to carry the VID and FDB info. +- port_mdb_prepare: bridge layer function invoked when the bridge prepares the + installation of a multicast database entry. If the operation is not supported, + this function should return -EOPNOTSUPP to inform the bridge code to fallback + to a software implementation. No hardware setup must be done in this function. + See port_fdb_add for this and details. + +- port_mdb_add: bridge layer function invoked when the bridge wants to install + a multicast database entry, the switch hardware should be programmed with the + specified address in the specified VLAN ID in the forwarding database + associated with this VLAN ID. + +Note: VLAN ID 0 corresponds to the port private database, which, in the context +of DSA, would be the its port-based VLAN, used by the associated bridge device. + +- port_mdb_del: bridge layer function invoked when the bridge wants to remove a + multicast database entry, the switch hardware should be programmed to delete + the specified MAC address from the specified VLAN ID if it was mapped into + this port forwarding database. + +- port_mdb_dump: bridge layer function invoked with a switchdev callback + function that the driver has to call for each MAC address known to be behind + the given port. A switchdev object is used to carry the VID and MDB info. + TODO ==== diff --git a/include/net/dsa.h b/include/net/dsa.h index 2ebeba44a461..e3eb230b970d 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -234,6 +234,7 @@ static inline u8 dsa_upstream_port(struct dsa_switch *ds) struct switchdev_trans; struct switchdev_obj; struct switchdev_obj_port_fdb; +struct switchdev_obj_port_mdb; struct switchdev_obj_port_vlan; struct dsa_switch_ops { @@ -369,6 +370,21 @@ struct dsa_switch_ops { int (*port_fdb_dump)(struct dsa_switch *ds, int port, struct switchdev_obj_port_fdb *fdb, int (*cb)(struct switchdev_obj *obj)); + + /* + * Multicast database + */ + int (*port_mdb_prepare)(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb, + struct switchdev_trans *trans); + void (*port_mdb_add)(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb, + struct switchdev_trans *trans); + int (*port_mdb_del)(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb); + int (*port_mdb_dump)(struct dsa_switch *ds, int port, + struct switchdev_obj_port_mdb *mdb, + int (*cb)(struct switchdev_obj *obj)); }; void register_switch_driver(struct dsa_switch_ops *type); diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 9f6c2a20f6ff..9ecbe787f102 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -290,6 +290,50 @@ static int dsa_slave_port_fdb_dump(struct net_device *dev, return -EOPNOTSUPP; } +static int dsa_slave_port_mdb_add(struct net_device *dev, + const struct switchdev_obj_port_mdb *mdb, + struct switchdev_trans *trans) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + + if (switchdev_trans_ph_prepare(trans)) { + if (!ds->ops->port_mdb_prepare || !ds->ops->port_mdb_add) + return -EOPNOTSUPP; + + return ds->ops->port_mdb_prepare(ds, p->port, mdb, trans); + } + + ds->ops->port_mdb_add(ds, p->port, mdb, trans); + + return 0; +} + +static int dsa_slave_port_mdb_del(struct net_device *dev, + const struct switchdev_obj_port_mdb *mdb) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + + if (ds->ops->port_mdb_del) + return ds->ops->port_mdb_del(ds, p->port, mdb); + + return -EOPNOTSUPP; +} + +static int dsa_slave_port_mdb_dump(struct net_device *dev, + struct switchdev_obj_port_mdb *mdb, + switchdev_obj_dump_cb_t *cb) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + + if (ds->ops->port_mdb_dump) + return ds->ops->port_mdb_dump(ds, p->port, mdb, cb); + + return -EOPNOTSUPP; +} + static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct dsa_slave_priv *p = netdev_priv(dev); @@ -412,6 +456,10 @@ static int dsa_slave_port_obj_add(struct net_device *dev, SWITCHDEV_OBJ_PORT_FDB(obj), trans); break; + case SWITCHDEV_OBJ_ID_PORT_MDB: + err = dsa_slave_port_mdb_add(dev, SWITCHDEV_OBJ_PORT_MDB(obj), + trans); + break; case SWITCHDEV_OBJ_ID_PORT_VLAN: err = dsa_slave_port_vlan_add(dev, SWITCHDEV_OBJ_PORT_VLAN(obj), @@ -435,6 +483,9 @@ static int dsa_slave_port_obj_del(struct net_device *dev, err = dsa_slave_port_fdb_del(dev, SWITCHDEV_OBJ_PORT_FDB(obj)); break; + case SWITCHDEV_OBJ_ID_PORT_MDB: + err = dsa_slave_port_mdb_del(dev, SWITCHDEV_OBJ_PORT_MDB(obj)); + break; case SWITCHDEV_OBJ_ID_PORT_VLAN: err = dsa_slave_port_vlan_del(dev, SWITCHDEV_OBJ_PORT_VLAN(obj)); @@ -459,6 +510,10 @@ static int dsa_slave_port_obj_dump(struct net_device *dev, SWITCHDEV_OBJ_PORT_FDB(obj), cb); break; + case SWITCHDEV_OBJ_ID_PORT_MDB: + err = dsa_slave_port_mdb_dump(dev, SWITCHDEV_OBJ_PORT_MDB(obj), + cb); + break; case SWITCHDEV_OBJ_ID_PORT_VLAN: err = dsa_slave_port_vlan_dump(dev, SWITCHDEV_OBJ_PORT_VLAN(obj), From 83dabd1fa84cdf288303f8cbe7a56849bb553ea9 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Wed, 31 Aug 2016 11:50:04 -0400 Subject: [PATCH 0614/1715] net: dsa: mv88e6xxx: make switchdev DB ops generic The MDB support for the mv88e6xxx driver will be very similar to the FDB support, since it consists of loading/purging/dumping address to/from the Address Translation Unit (ATU). Prepare the support for MDB by making the FDB code accessing the ATU generic. The FDB operations now provide access to the unicast addresses while the MDB operations will provide access to the multicast addresses. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 138 +++++++++++++++++-------------- 1 file changed, 76 insertions(+), 62 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 4e697eea6e0f..acb3167e778e 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -2093,9 +2093,9 @@ static int _mv88e6xxx_atu_load(struct mv88e6xxx_chip *chip, return _mv88e6xxx_atu_cmd(chip, entry->fid, GLOBAL_ATU_OP_LOAD_DB); } -static int _mv88e6xxx_port_fdb_load(struct mv88e6xxx_chip *chip, int port, - const unsigned char *addr, u16 vid, - u8 state) +static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port, + const unsigned char *addr, u16 vid, + u8 state) { struct mv88e6xxx_atu_entry entry = { 0 }; struct mv88e6xxx_vtu_stu_entry vlan; @@ -2134,15 +2134,12 @@ static void mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port, const struct switchdev_obj_port_fdb *fdb, struct switchdev_trans *trans) { - int state = is_multicast_ether_addr(fdb->addr) ? - GLOBAL_ATU_DATA_STATE_MC_STATIC : - GLOBAL_ATU_DATA_STATE_UC_STATIC; struct mv88e6xxx_chip *chip = ds_to_priv(ds); mutex_lock(&chip->reg_lock); - if (_mv88e6xxx_port_fdb_load(chip, port, fdb->addr, fdb->vid, state)) - netdev_err(ds->ports[port].netdev, - "failed to load MAC address\n"); + if (mv88e6xxx_port_db_load_purge(chip, port, fdb->addr, fdb->vid, + GLOBAL_ATU_DATA_STATE_UC_STATIC)) + netdev_err(ds->ports[port].netdev, "failed to load unicast MAC address\n"); mutex_unlock(&chip->reg_lock); } @@ -2150,14 +2147,14 @@ static int mv88e6xxx_port_fdb_del(struct dsa_switch *ds, int port, const struct switchdev_obj_port_fdb *fdb) { struct mv88e6xxx_chip *chip = ds_to_priv(ds); - int ret; + int err; mutex_lock(&chip->reg_lock); - ret = _mv88e6xxx_port_fdb_load(chip, port, fdb->addr, fdb->vid, - GLOBAL_ATU_DATA_STATE_UNUSED); + err = mv88e6xxx_port_db_load_purge(chip, port, fdb->addr, fdb->vid, + GLOBAL_ATU_DATA_STATE_UNUSED); mutex_unlock(&chip->reg_lock); - return ret; + return err; } static int _mv88e6xxx_atu_getnext(struct mv88e6xxx_chip *chip, u16 fid, @@ -2205,10 +2202,10 @@ static int _mv88e6xxx_atu_getnext(struct mv88e6xxx_chip *chip, u16 fid, return 0; } -static int _mv88e6xxx_port_fdb_dump_one(struct mv88e6xxx_chip *chip, - u16 fid, u16 vid, int port, - struct switchdev_obj_port_fdb *fdb, - int (*cb)(struct switchdev_obj *obj)) +static int mv88e6xxx_port_db_dump_fid(struct mv88e6xxx_chip *chip, + u16 fid, u16 vid, int port, + struct switchdev_obj *obj, + int (*cb)(struct switchdev_obj *obj)) { struct mv88e6xxx_atu_entry addr = { .mac = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, @@ -2222,72 +2219,89 @@ static int _mv88e6xxx_port_fdb_dump_one(struct mv88e6xxx_chip *chip, do { err = _mv88e6xxx_atu_getnext(chip, fid, &addr); if (err) - break; + return err; if (addr.state == GLOBAL_ATU_DATA_STATE_UNUSED) break; - if (!addr.trunk && addr.portv_trunkid & BIT(port)) { - bool is_static = addr.state == - (is_multicast_ether_addr(addr.mac) ? - GLOBAL_ATU_DATA_STATE_MC_STATIC : - GLOBAL_ATU_DATA_STATE_UC_STATIC); + if (addr.trunk || (addr.portv_trunkid & BIT(port)) == 0) + continue; + if (obj->id == SWITCHDEV_OBJ_ID_PORT_FDB) { + struct switchdev_obj_port_fdb *fdb; + + if (!is_unicast_ether_addr(addr.mac)) + continue; + + fdb = SWITCHDEV_OBJ_PORT_FDB(obj); fdb->vid = vid; ether_addr_copy(fdb->addr, addr.mac); - fdb->ndm_state = is_static ? NUD_NOARP : NUD_REACHABLE; - - err = cb(&fdb->obj); - if (err) - break; + if (addr.state == GLOBAL_ATU_DATA_STATE_UC_STATIC) + fdb->ndm_state = NUD_NOARP; + else + fdb->ndm_state = NUD_REACHABLE; + } else { + return -EOPNOTSUPP; } + + err = cb(obj); + if (err) + return err; } while (!is_broadcast_ether_addr(addr.mac)); return err; } +static int mv88e6xxx_port_db_dump(struct mv88e6xxx_chip *chip, int port, + struct switchdev_obj *obj, + int (*cb)(struct switchdev_obj *obj)) +{ + struct mv88e6xxx_vtu_stu_entry vlan = { + .vid = GLOBAL_VTU_VID_MASK, /* all ones */ + }; + u16 fid; + int err; + + /* Dump port's default Filtering Information Database (VLAN ID 0) */ + err = _mv88e6xxx_port_fid_get(chip, port, &fid); + if (err) + return err; + + err = mv88e6xxx_port_db_dump_fid(chip, fid, 0, port, obj, cb); + if (err) + return err; + + /* Dump VLANs' Filtering Information Databases */ + err = _mv88e6xxx_vtu_vid_write(chip, vlan.vid); + if (err) + return err; + + do { + err = _mv88e6xxx_vtu_getnext(chip, &vlan); + if (err) + return err; + + if (!vlan.valid) + break; + + err = mv88e6xxx_port_db_dump_fid(chip, vlan.fid, vlan.vid, port, + obj, cb); + if (err) + return err; + } while (vlan.vid < GLOBAL_VTU_VID_MASK); + + return err; +} + static int mv88e6xxx_port_fdb_dump(struct dsa_switch *ds, int port, struct switchdev_obj_port_fdb *fdb, int (*cb)(struct switchdev_obj *obj)) { struct mv88e6xxx_chip *chip = ds_to_priv(ds); - struct mv88e6xxx_vtu_stu_entry vlan = { - .vid = GLOBAL_VTU_VID_MASK, /* all ones */ - }; - u16 fid; int err; mutex_lock(&chip->reg_lock); - - /* Dump port's default Filtering Information Database (VLAN ID 0) */ - err = _mv88e6xxx_port_fid_get(chip, port, &fid); - if (err) - goto unlock; - - err = _mv88e6xxx_port_fdb_dump_one(chip, fid, 0, port, fdb, cb); - if (err) - goto unlock; - - /* Dump VLANs' Filtering Information Databases */ - err = _mv88e6xxx_vtu_vid_write(chip, vlan.vid); - if (err) - goto unlock; - - do { - err = _mv88e6xxx_vtu_getnext(chip, &vlan); - if (err) - break; - - if (!vlan.valid) - break; - - err = _mv88e6xxx_port_fdb_dump_one(chip, vlan.fid, vlan.vid, - port, fdb, cb); - if (err) - break; - } while (vlan.vid < GLOBAL_VTU_VID_MASK); - -unlock: + err = mv88e6xxx_port_db_dump(chip, port, &fdb->obj, cb); mutex_unlock(&chip->reg_lock); return err; From 7df8fbdd44fa3c7ed53964c95e440b0286fd0836 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Wed, 31 Aug 2016 11:50:05 -0400 Subject: [PATCH 0615/1715] net: dsa: mv88e6xxx: add MDB support Add support for the MDB operations. This consists of loading/purging/dumping multicast addresses for a given port in the ATU. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 65 ++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index acb3167e778e..e2a953e7f5f5 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -2240,6 +2240,15 @@ static int mv88e6xxx_port_db_dump_fid(struct mv88e6xxx_chip *chip, fdb->ndm_state = NUD_NOARP; else fdb->ndm_state = NUD_REACHABLE; + } else if (obj->id == SWITCHDEV_OBJ_ID_PORT_MDB) { + struct switchdev_obj_port_mdb *mdb; + + if (!is_multicast_ether_addr(addr.mac)) + continue; + + mdb = SWITCHDEV_OBJ_PORT_MDB(obj); + mdb->vid = vid; + ether_addr_copy(mdb->addr, addr.mac); } else { return -EOPNOTSUPP; } @@ -3994,6 +4003,58 @@ free: return NULL; } +static int mv88e6xxx_port_mdb_prepare(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb, + struct switchdev_trans *trans) +{ + /* We don't need any dynamic resource from the kernel (yet), + * so skip the prepare phase. + */ + + return 0; +} + +static void mv88e6xxx_port_mdb_add(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb, + struct switchdev_trans *trans) +{ + struct mv88e6xxx_chip *chip = ds_to_priv(ds); + + mutex_lock(&chip->reg_lock); + if (mv88e6xxx_port_db_load_purge(chip, port, mdb->addr, mdb->vid, + GLOBAL_ATU_DATA_STATE_MC_STATIC)) + netdev_err(ds->ports[port].netdev, "failed to load multicast MAC address\n"); + mutex_unlock(&chip->reg_lock); +} + +static int mv88e6xxx_port_mdb_del(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb) +{ + struct mv88e6xxx_chip *chip = ds_to_priv(ds); + int err; + + mutex_lock(&chip->reg_lock); + err = mv88e6xxx_port_db_load_purge(chip, port, mdb->addr, mdb->vid, + GLOBAL_ATU_DATA_STATE_UNUSED); + mutex_unlock(&chip->reg_lock); + + return err; +} + +static int mv88e6xxx_port_mdb_dump(struct dsa_switch *ds, int port, + struct switchdev_obj_port_mdb *mdb, + int (*cb)(struct switchdev_obj *obj)) +{ + struct mv88e6xxx_chip *chip = ds_to_priv(ds); + int err; + + mutex_lock(&chip->reg_lock); + err = mv88e6xxx_port_db_dump(chip, port, &mdb->obj, cb); + mutex_unlock(&chip->reg_lock); + + return err; +} + static struct dsa_switch_ops mv88e6xxx_switch_ops = { .probe = mv88e6xxx_drv_probe, .get_tag_protocol = mv88e6xxx_get_tag_protocol, @@ -4029,6 +4090,10 @@ static struct dsa_switch_ops mv88e6xxx_switch_ops = { .port_fdb_add = mv88e6xxx_port_fdb_add, .port_fdb_del = mv88e6xxx_port_fdb_del, .port_fdb_dump = mv88e6xxx_port_fdb_dump, + .port_mdb_prepare = mv88e6xxx_port_mdb_prepare, + .port_mdb_add = mv88e6xxx_port_mdb_add, + .port_mdb_del = mv88e6xxx_port_mdb_del, + .port_mdb_dump = mv88e6xxx_port_mdb_dump, }; static int mv88e6xxx_register_switch(struct mv88e6xxx_chip *chip, From ce0b15d1103102b78ad95739fa71e62fb16774bd Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 31 Aug 2016 08:57:36 -0700 Subject: [PATCH 0616/1715] xgbe: constify get_netdev_ops and get_ethtool_ops Casting away const is bad practice. Since this is ARM specific driver don't have hardware actually test this. Having getter functions for ops is really unnecessary code bloat, but not going to touch that. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 4 ++-- drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c | 4 ++-- drivers/net/ethernet/amd/xgbe/xgbe.h | 5 +++-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index a9b2709567ec..7f9216db026f 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -1708,9 +1708,9 @@ static const struct net_device_ops xgbe_netdev_ops = { .ndo_set_features = xgbe_set_features, }; -struct net_device_ops *xgbe_get_netdev_ops(void) +const struct net_device_ops *xgbe_get_netdev_ops(void) { - return (struct net_device_ops *)&xgbe_netdev_ops; + return &xgbe_netdev_ops; } static void xgbe_rx_refresh(struct xgbe_channel *channel) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c index 11d9f0c5b78b..4007b429c80c 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c @@ -623,7 +623,7 @@ static const struct ethtool_ops xgbe_ethtool_ops = { .get_ts_info = xgbe_get_ts_info, }; -struct ethtool_ops *xgbe_get_ethtool_ops(void) +const struct ethtool_ops *xgbe_get_ethtool_ops(void) { - return (struct ethtool_ops *)&xgbe_ethtool_ops; + return &xgbe_ethtool_ops; } diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index 98d9d63c4353..5dd17dcea2f8 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -956,8 +956,9 @@ struct xgbe_prv_data { void xgbe_init_function_ptrs_dev(struct xgbe_hw_if *); void xgbe_init_function_ptrs_phy(struct xgbe_phy_if *); void xgbe_init_function_ptrs_desc(struct xgbe_desc_if *); -struct net_device_ops *xgbe_get_netdev_ops(void); -struct ethtool_ops *xgbe_get_ethtool_ops(void); +const struct net_device_ops *xgbe_get_netdev_ops(void); +const struct ethtool_ops *xgbe_get_ethtool_ops(void); + #ifdef CONFIG_AMD_XGBE_DCB const struct dcbnl_rtnl_ops *xgbe_get_dcbnl_ops(void); #endif From 55454a565836e1cb002d433e901804dea4406a32 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Sat, 27 Aug 2016 22:22:32 +0200 Subject: [PATCH 0617/1715] ppp: avoid dealock on recursive xmit In case of misconfiguration, a virtual PPP channel might send packets back to their parent PPP interface. This typically happens in misconfigured L2TP setups, where PPP's peer IP address is set with the IP of the L2TP peer. When that happens the system hangs due to PPP trying to recursively lock its xmit path. [ 243.332155] BUG: spinlock recursion on CPU#1, accel-pppd/926 [ 243.333272] lock: 0xffff880033d90f18, .magic: dead4ead, .owner: accel-pppd/926, .owner_cpu: 1 [ 243.334859] CPU: 1 PID: 926 Comm: accel-pppd Not tainted 4.8.0-rc2 #1 [ 243.336010] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Debian-1.8.2-1 04/01/2014 [ 243.336018] ffff7fffffffffff ffff8800319a77a0 ffffffff8128de85 ffff880033d90f18 [ 243.336018] ffff880033ad8000 ffff8800319a77d8 ffffffff810ad7c0 ffffffff0000039e [ 243.336018] ffff880033d90f18 ffff880033d90f60 ffff880033d90f18 ffff880033d90f28 [ 243.336018] Call Trace: [ 243.336018] [] dump_stack+0x4f/0x65 [ 243.336018] [] spin_dump+0xe1/0xeb [ 243.336018] [] spin_bug+0x26/0x28 [ 243.336018] [] do_raw_spin_lock+0x5c/0x160 [ 243.336018] [] _raw_spin_lock_bh+0x35/0x3c [ 243.336018] [] ? ppp_push+0xa7/0x82d [ppp_generic] [ 243.336018] [] ppp_push+0xa7/0x82d [ppp_generic] [ 243.336018] [] ? do_raw_spin_unlock+0xc2/0xcc [ 243.336018] [] ? preempt_count_sub+0x13/0xc7 [ 243.336018] [] ? _raw_spin_unlock_irqrestore+0x34/0x49 [ 243.336018] [] ppp_xmit_process+0x48/0x877 [ppp_generic] [ 243.336018] [] ? preempt_count_sub+0x13/0xc7 [ 243.336018] [] ? skb_queue_tail+0x71/0x7c [ 243.336018] [] ppp_start_xmit+0x21b/0x22a [ppp_generic] [ 243.336018] [] dev_hard_start_xmit+0x15e/0x32c [ 243.336018] [] sch_direct_xmit+0xd6/0x221 [ 243.336018] [] __dev_queue_xmit+0x52a/0x820 [ 243.336018] [] dev_queue_xmit+0xb/0xd [ 243.336018] [] neigh_direct_output+0xc/0xe [ 243.336018] [] ip_finish_output2+0x4d2/0x548 [ 243.336018] [] ? dst_mtu+0x29/0x2e [ 243.336018] [] ip_finish_output+0x152/0x15e [ 243.336018] [] ? ip_output+0x74/0x96 [ 243.336018] [] ip_output+0x8c/0x96 [ 243.336018] [] ip_local_out+0x41/0x4a [ 243.336018] [] ip_queue_xmit+0x531/0x5c5 [ 243.336018] [] ? udp_set_csum+0x207/0x21e [ 243.336018] [] l2tp_xmit_skb+0x582/0x5d7 [l2tp_core] [ 243.336018] [] pppol2tp_xmit+0x1eb/0x257 [l2tp_ppp] [ 243.336018] [] ppp_channel_push+0x91/0x102 [ppp_generic] [ 243.336018] [] ppp_write+0x104/0x11c [ppp_generic] [ 243.336018] [] __vfs_write+0x56/0x120 [ 243.336018] [] ? fsnotify_perm+0x27/0x95 [ 243.336018] [] ? security_file_permission+0x4d/0x54 [ 243.336018] [] vfs_write+0xbd/0x11b [ 243.336018] [] SyS_write+0x5e/0x96 [ 243.336018] [] entry_SYSCALL_64_fastpath+0x13/0x94 The main entry points for sending packets over a PPP unit are the .write() and .ndo_start_xmit() callbacks (simplified view): .write(unit fd) or .ndo_start_xmit() \ CALL ppp_xmit_process() \ LOCK unit's xmit path (ppp->wlock) | CALL ppp_push() \ LOCK channel's xmit path (chan->downl) | CALL lower layer's .start_xmit() callback \ ... might recursively call .ndo_start_xmit() ... / RETURN from .start_xmit() | UNLOCK channel's xmit path / RETURN from ppp_push() | UNLOCK unit's xmit path / RETURN from ppp_xmit_process() Packets can also be directly sent on channels (e.g. LCP packets): .write(channel fd) or ppp_output_wakeup() \ CALL ppp_channel_push() \ LOCK channel's xmit path (chan->downl) | CALL lower layer's .start_xmit() callback \ ... might call .ndo_start_xmit() ... / RETURN from .start_xmit() | UNLOCK channel's xmit path / RETURN from ppp_channel_push() Key points about the lower layer's .start_xmit() callback: * It can be called directly by a channel fd .write() or by ppp_output_wakeup() or indirectly by a unit fd .write() or by .ndo_start_xmit(). * In any case, it's always called with chan->downl held. * It might route the packet back to its parent unit using .ndo_start_xmit() as entry point. This patch detects and breaks recursion in ppp_xmit_process(). This function is a good candidate for the task because it's called early enough after .ndo_start_xmit(), it's always part of the recursion loop and it's on the path of whatever entry point is used to send a packet on a PPP unit. Recursion detection is done using the per-cpu ppp_xmit_recursion variable. Since ppp_channel_push() too locks the channel's xmit path and calls the lower layer's .start_xmit() callback, we need to also increment ppp_xmit_recursion there. However there's no need to check for recursion, as it's out of the recursion loop. Reported-by: Feng Gao Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- drivers/net/ppp/ppp_generic.c | 52 +++++++++++++++++++++++++++-------- 1 file changed, 40 insertions(+), 12 deletions(-) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 70cfa06ccd40..57fc550aa14b 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -1376,12 +1376,8 @@ static void ppp_setup(struct net_device *dev) * Transmit-side routines. */ -/* - * Called to do any work queued up on the transmit side - * that can now be done. - */ -static void -ppp_xmit_process(struct ppp *ppp) +/* Called to do any work queued up on the transmit side that can now be done */ +static void __ppp_xmit_process(struct ppp *ppp) { struct sk_buff *skb; @@ -1401,6 +1397,30 @@ ppp_xmit_process(struct ppp *ppp) ppp_xmit_unlock(ppp); } +static DEFINE_PER_CPU(int, ppp_xmit_recursion); + +static void ppp_xmit_process(struct ppp *ppp) +{ + local_bh_disable(); + + if (unlikely(__this_cpu_read(ppp_xmit_recursion))) + goto err; + + __this_cpu_inc(ppp_xmit_recursion); + __ppp_xmit_process(ppp); + __this_cpu_dec(ppp_xmit_recursion); + + local_bh_enable(); + + return; + +err: + local_bh_enable(); + + if (net_ratelimit()) + netdev_err(ppp->dev, "recursion detected\n"); +} + static inline struct sk_buff * pad_compress_skb(struct ppp *ppp, struct sk_buff *skb) { @@ -1856,11 +1876,8 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb) } #endif /* CONFIG_PPP_MULTILINK */ -/* - * Try to send data out on a channel. - */ -static void -ppp_channel_push(struct channel *pch) +/* Try to send data out on a channel */ +static void __ppp_channel_push(struct channel *pch) { struct sk_buff *skb; struct ppp *ppp; @@ -1885,11 +1902,22 @@ ppp_channel_push(struct channel *pch) read_lock_bh(&pch->upl); ppp = pch->ppp; if (ppp) - ppp_xmit_process(ppp); + __ppp_xmit_process(ppp); read_unlock_bh(&pch->upl); } } +static void ppp_channel_push(struct channel *pch) +{ + local_bh_disable(); + + __this_cpu_inc(ppp_xmit_recursion); + __ppp_channel_push(pch); + __this_cpu_dec(ppp_xmit_recursion); + + local_bh_enable(); +} + /* * Receive-side routines. */ From 077127705acf80cdb9393b891d934509a7081d71 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Sat, 27 Aug 2016 22:22:51 +0200 Subject: [PATCH 0618/1715] ppp: declare PPP devices as LLTX ppp_xmit_process() already locks the xmit path. If HARD_TX_LOCK() tries to hold the _xmit_lock we can get lock inversion. [ 973.726130] ====================================================== [ 973.727311] [ INFO: possible circular locking dependency detected ] [ 973.728546] 4.8.0-rc2 #1 Tainted: G O [ 973.728986] ------------------------------------------------------- [ 973.728986] accel-pppd/1806 is trying to acquire lock: [ 973.728986] (&qdisc_xmit_lock_key){+.-...}, at: [] sch_direct_xmit+0x8d/0x221 [ 973.728986] [ 973.728986] but task is already holding lock: [ 973.728986] (l2tp_sock){+.-...}, at: [] l2tp_xmit_skb+0x1e8/0x5d7 [l2tp_core] [ 973.728986] [ 973.728986] which lock already depends on the new lock. [ 973.728986] [ 973.728986] [ 973.728986] the existing dependency chain (in reverse order) is: [ 973.728986] -> #3 (l2tp_sock){+.-...}: [ 973.728986] [] lock_acquire+0x150/0x217 [ 973.728986] [] _raw_spin_lock+0x2d/0x3c [ 973.728986] [] l2tp_xmit_skb+0x1e8/0x5d7 [l2tp_core] [ 973.728986] [] pppol2tp_xmit+0x1f2/0x25e [l2tp_ppp] [ 973.728986] [] ppp_channel_push+0xb5/0x14a [ppp_generic] [ 973.728986] [] ppp_write+0x104/0x11c [ppp_generic] [ 973.728986] [] __vfs_write+0x56/0x120 [ 973.728986] [] vfs_write+0xbd/0x11b [ 973.728986] [] SyS_write+0x5e/0x96 [ 973.728986] [] entry_SYSCALL_64_fastpath+0x18/0xa8 [ 973.728986] -> #2 (&(&pch->downl)->rlock){+.-...}: [ 973.728986] [] lock_acquire+0x150/0x217 [ 973.728986] [] _raw_spin_lock_bh+0x31/0x40 [ 973.728986] [] ppp_push+0xa7/0x82d [ppp_generic] [ 973.728986] [] __ppp_xmit_process+0x48/0x877 [ppp_generic] [ 973.728986] [] ppp_xmit_process+0x4b/0xaf [ppp_generic] [ 973.728986] [] ppp_write+0x10e/0x11c [ppp_generic] [ 973.728986] [] __vfs_write+0x56/0x120 [ 973.728986] [] vfs_write+0xbd/0x11b [ 973.728986] [] SyS_write+0x5e/0x96 [ 973.728986] [] entry_SYSCALL_64_fastpath+0x18/0xa8 [ 973.728986] -> #1 (&(&ppp->wlock)->rlock){+.-...}: [ 973.728986] [] lock_acquire+0x150/0x217 [ 973.728986] [] _raw_spin_lock_bh+0x31/0x40 [ 973.728986] [] __ppp_xmit_process+0x27/0x877 [ppp_generic] [ 973.728986] [] ppp_xmit_process+0x4b/0xaf [ppp_generic] [ 973.728986] [] ppp_start_xmit+0x21b/0x22a [ppp_generic] [ 973.728986] [] dev_hard_start_xmit+0x1a9/0x43d [ 973.728986] [] sch_direct_xmit+0xd6/0x221 [ 973.728986] [] __dev_queue_xmit+0x62a/0x912 [ 973.728986] [] dev_queue_xmit+0xb/0xd [ 973.728986] [] neigh_direct_output+0xc/0xe [ 973.728986] [] ip6_finish_output2+0x5a9/0x623 [ 973.728986] [] ip6_output+0x15e/0x16a [ 973.728986] [] dst_output+0x76/0x7f [ 973.728986] [] mld_sendpack+0x335/0x404 [ 973.728986] [] mld_send_initial_cr.part.21+0x99/0xa2 [ 973.728986] [] ipv6_mc_dad_complete+0x42/0x71 [ 973.728986] [] addrconf_dad_completed+0x1cf/0x2ea [ 973.728986] [] addrconf_dad_work+0x453/0x520 [ 973.728986] [] process_one_work+0x365/0x6f0 [ 973.728986] [] worker_thread+0x2de/0x421 [ 973.728986] [] kthread+0x121/0x130 [ 973.728986] [] ret_from_fork+0x1f/0x40 [ 973.728986] -> #0 (&qdisc_xmit_lock_key){+.-...}: [ 973.728986] [] __lock_acquire+0x1118/0x1483 [ 973.728986] [] lock_acquire+0x150/0x217 [ 973.728986] [] _raw_spin_lock+0x2d/0x3c [ 973.728986] [] sch_direct_xmit+0x8d/0x221 [ 973.728986] [] __dev_queue_xmit+0x62a/0x912 [ 973.728986] [] dev_queue_xmit+0xb/0xd [ 973.728986] [] neigh_direct_output+0xc/0xe [ 973.728986] [] ip_finish_output2+0x5db/0x609 [ 973.728986] [] ip_finish_output+0x152/0x15e [ 973.728986] [] ip_output+0x8c/0x96 [ 973.728986] [] ip_local_out+0x41/0x4a [ 973.728986] [] ip_queue_xmit+0x5a5/0x609 [ 973.728986] [] l2tp_xmit_skb+0x582/0x5d7 [l2tp_core] [ 973.728986] [] pppol2tp_xmit+0x1f2/0x25e [l2tp_ppp] [ 973.728986] [] ppp_channel_push+0xb5/0x14a [ppp_generic] [ 973.728986] [] ppp_write+0x104/0x11c [ppp_generic] [ 973.728986] [] __vfs_write+0x56/0x120 [ 973.728986] [] vfs_write+0xbd/0x11b [ 973.728986] [] SyS_write+0x5e/0x96 [ 973.728986] [] entry_SYSCALL_64_fastpath+0x18/0xa8 [ 973.728986] [ 973.728986] other info that might help us debug this: [ 973.728986] [ 973.728986] Chain exists of: &qdisc_xmit_lock_key --> &(&pch->downl)->rlock --> l2tp_sock [ 973.728986] Possible unsafe locking scenario: [ 973.728986] [ 973.728986] CPU0 CPU1 [ 973.728986] ---- ---- [ 973.728986] lock(l2tp_sock); [ 973.728986] lock(&(&pch->downl)->rlock); [ 973.728986] lock(l2tp_sock); [ 973.728986] lock(&qdisc_xmit_lock_key); [ 973.728986] [ 973.728986] *** DEADLOCK *** [ 973.728986] [ 973.728986] 6 locks held by accel-pppd/1806: [ 973.728986] #0: (&(&pch->downl)->rlock){+.-...}, at: [] ppp_channel_push+0x56/0x14a [ppp_generic] [ 973.728986] #1: (l2tp_sock){+.-...}, at: [] l2tp_xmit_skb+0x1e8/0x5d7 [l2tp_core] [ 973.728986] #2: (rcu_read_lock){......}, at: [] rcu_lock_acquire+0x0/0x20 [ 973.728986] #3: (rcu_read_lock_bh){......}, at: [] rcu_lock_acquire+0x0/0x20 [ 973.728986] #4: (rcu_read_lock_bh){......}, at: [] rcu_lock_acquire+0x0/0x20 [ 973.728986] #5: (dev->qdisc_running_key ?: &qdisc_running_key#2){+.....}, at: [] __dev_queue_xmit+0x564/0x912 [ 973.728986] [ 973.728986] stack backtrace: [ 973.728986] CPU: 2 PID: 1806 Comm: accel-pppd Tainted: G O 4.8.0-rc2 #1 [ 973.728986] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Debian-1.8.2-1 04/01/2014 [ 973.728986] ffff7fffffffffff ffff88003436f850 ffffffff812a20f4 ffffffff82156e30 [ 973.728986] ffffffff82156920 ffff88003436f890 ffffffff8115c759 ffff88003344ae00 [ 973.728986] ffff88003344b5c0 0000000000000002 0000000000000006 ffff88003344b5e8 [ 973.728986] Call Trace: [ 973.728986] [] dump_stack+0x67/0x90 [ 973.728986] [] print_circular_bug+0x22e/0x23c [ 973.728986] [] __lock_acquire+0x1118/0x1483 [ 973.728986] [] lock_acquire+0x150/0x217 [ 973.728986] [] ? lock_acquire+0x150/0x217 [ 973.728986] [] ? sch_direct_xmit+0x8d/0x221 [ 973.728986] [] _raw_spin_lock+0x2d/0x3c [ 973.728986] [] ? sch_direct_xmit+0x8d/0x221 [ 973.728986] [] sch_direct_xmit+0x8d/0x221 [ 973.728986] [] __dev_queue_xmit+0x62a/0x912 [ 973.728986] [] dev_queue_xmit+0xb/0xd [ 973.728986] [] neigh_direct_output+0xc/0xe [ 973.728986] [] ip_finish_output2+0x5db/0x609 [ 973.728986] [] ? dst_mtu+0x29/0x2e [ 973.728986] [] ip_finish_output+0x152/0x15e [ 973.728986] [] ? ip_output+0x74/0x96 [ 973.728986] [] ip_output+0x8c/0x96 [ 973.728986] [] ip_local_out+0x41/0x4a [ 973.728986] [] ip_queue_xmit+0x5a5/0x609 [ 973.728986] [] ? udp_set_csum+0x207/0x21e [ 973.728986] [] l2tp_xmit_skb+0x582/0x5d7 [l2tp_core] [ 973.728986] [] pppol2tp_xmit+0x1f2/0x25e [l2tp_ppp] [ 973.728986] [] ppp_channel_push+0xb5/0x14a [ppp_generic] [ 973.728986] [] ppp_write+0x104/0x11c [ppp_generic] [ 973.728986] [] __vfs_write+0x56/0x120 [ 973.728986] [] ? fsnotify_perm+0x27/0x95 [ 973.728986] [] ? security_file_permission+0x4d/0x54 [ 973.728986] [] vfs_write+0xbd/0x11b [ 973.728986] [] SyS_write+0x5e/0x96 [ 973.728986] [] entry_SYSCALL_64_fastpath+0x18/0xa8 [ 973.728986] [] ? trace_hardirqs_off_caller+0x121/0x12f Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- drivers/net/ppp/ppp_generic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 57fc550aa14b..5489c0ec1d9a 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -1363,6 +1363,8 @@ static void ppp_setup(struct net_device *dev) dev->netdev_ops = &ppp_netdev_ops; SET_NETDEV_DEVTYPE(dev, &ppp_type); + dev->features |= NETIF_F_LLTX; + dev->hard_header_len = PPP_HDRLEN; dev->mtu = PPP_MRU; dev->addr_len = 0; From 6a38cb15a214e60ae69adf5df907c48dc0994d87 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Thu, 1 Sep 2016 00:21:19 +0200 Subject: [PATCH 0619/1715] net: mediatek: constify ethtool_ops structures Check for ethtool_ops structures that are only stored in the ethtool_ops field of a net_device structure or passed as the second argument to netdev_set_default_ethtool_ops. These contexts are declared const, so ethtool_ops structures that have these properties can be declared as const also. The semantic patch that makes this change is as follows: (http://coccinelle.lip6.fr/) // @r disable optional_qualifier@ identifier i; position p; @@ static struct ethtool_ops i@p = { ... }; @ok1@ identifier r.i; struct net_device e; position p; @@ e.ethtool_ops = &i@p; @ok2@ identifier r.i; expression e; position p; @@ netdev_set_default_ethtool_ops(e, &i@p) @bad@ position p != {r.p,ok1.p,ok2.p}; identifier r.i; @@ i@p @depends on !bad disable optional_qualifier@ identifier r.i; @@ static +const struct ethtool_ops i = { ... }; // Suggested-by: Stephen Hemminger Signed-off-by: Julia Lawall Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 0fd9fc8d2a79..2dadfa961898 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1703,7 +1703,7 @@ static void mtk_get_ethtool_stats(struct net_device *dev, } while (u64_stats_fetch_retry_irq(&hwstats->syncp, start)); } -static struct ethtool_ops mtk_ethtool_ops = { +static const struct ethtool_ops mtk_ethtool_ops = { .get_settings = mtk_get_settings, .set_settings = mtk_set_settings, .get_drvinfo = mtk_get_drvinfo, From 407a471d4d2157cb679220ceab95bb73407ffa0d Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Thu, 1 Sep 2016 00:21:22 +0200 Subject: [PATCH 0620/1715] r8152: constify ethtool_ops structures Check for ethtool_ops structures that are only stored in the ethtool_ops field of a net_device structure or passed as the second argument to netdev_set_default_ethtool_ops. These contexts are declared const, so ethtool_ops structures that have these properties can be declared as const also. The semantic patch that makes this change is as follows: (http://coccinelle.lip6.fr/) // @r disable optional_qualifier@ identifier i; position p; @@ static struct ethtool_ops i@p = { ... }; @ok1@ identifier r.i; struct net_device e; position p; @@ e.ethtool_ops = &i@p; @ok2@ identifier r.i; expression e; position p; @@ netdev_set_default_ethtool_ops(e, &i@p) @bad@ position p != {r.p,ok1.p,ok2.p}; identifier r.i; @@ i@p @depends on !bad disable optional_qualifier@ identifier r.i; @@ static +const struct ethtool_ops i = { ... }; // Suggested-by: Stephen Hemminger Signed-off-by: Julia Lawall Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index f41a8ad4740e..f72f807c7fc5 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -4032,7 +4032,7 @@ static int rtl8152_set_coalesce(struct net_device *netdev, return ret; } -static struct ethtool_ops ops = { +static const struct ethtool_ops ops = { .get_drvinfo = rtl8152_get_drvinfo, .get_settings = rtl8152_get_settings, .set_settings = rtl8152_set_settings, From c7735f1bac209e285839ccead00873b27afc013b Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Thu, 1 Sep 2016 00:21:23 +0200 Subject: [PATCH 0621/1715] net: axienet: constify ethtool_ops structures Check for ethtool_ops structures that are only stored in the ethtool_ops field of a net_device structure or passed as the second argument to netdev_set_default_ethtool_ops. These contexts are declared const, so ethtool_ops structures that have these properties can be declared as const also. The semantic patch that makes this change is as follows: (http://coccinelle.lip6.fr/) // @r disable optional_qualifier@ identifier i; position p; @@ static struct ethtool_ops i@p = { ... }; @ok1@ identifier r.i; struct net_device e; position p; @@ e.ethtool_ops = &i@p; @ok2@ identifier r.i; expression e; position p; @@ netdev_set_default_ethtool_ops(e, &i@p) @bad@ position p != {r.p,ok1.p,ok2.p}; identifier r.i; @@ i@p @depends on !bad disable optional_qualifier@ identifier r.i; @@ static +const struct ethtool_ops i = { ... }; // Suggested-by: Stephen Hemminger Signed-off-by: Julia Lawall Signed-off-by: David S. Miller --- drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 36ee7ab300ae..69e2a833a84f 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -1297,7 +1297,7 @@ static int axienet_ethtools_set_coalesce(struct net_device *ndev, return 0; } -static struct ethtool_ops axienet_ethtool_ops = { +static const struct ethtool_ops axienet_ethtool_ops = { .get_drvinfo = axienet_ethtools_get_drvinfo, .get_regs_len = axienet_ethtools_get_regs_len, .get_regs = axienet_ethtools_get_regs, From d9fe64e511144c1ee7d7555b4111f09dde9692ef Mon Sep 17 00:00:00 2001 From: Robert Foss Date: Mon, 29 Aug 2016 09:32:15 -0400 Subject: [PATCH 0622/1715] net: asix: Add in_pm parameter From: Freddy Xin In order to R/W registers in suspend/resume functions, in_pm flags are added to some functions to determine whether the nopm version of usb functions is called. Save BMCR and ANAR PHY registers in suspend function and restore them in resume function. Reset HW in resume function to ensure the PHY works correctly. Signed-off-by: Freddy Xin Signed-off-by: Robert Foss Tested-by: Robert Foss Signed-off-by: David S. Miller --- drivers/net/usb/asix.h | 40 +++- drivers/net/usb/asix_common.c | 180 +++++++++++---- drivers/net/usb/asix_devices.c | 407 +++++++++++++++++++++++++-------- drivers/net/usb/ax88172a.c | 29 +-- 4 files changed, 489 insertions(+), 167 deletions(-) diff --git a/drivers/net/usb/asix.h b/drivers/net/usb/asix.h index a2d3ea6efb20..d1092421aaa7 100644 --- a/drivers/net/usb/asix.h +++ b/drivers/net/usb/asix.h @@ -46,6 +46,7 @@ #define AX_CMD_SET_SW_MII 0x06 #define AX_CMD_READ_MII_REG 0x07 #define AX_CMD_WRITE_MII_REG 0x08 +#define AX_CMD_STATMNGSTS_REG 0x09 #define AX_CMD_SET_HW_MII 0x0a #define AX_CMD_READ_EEPROM 0x0b #define AX_CMD_WRITE_EEPROM 0x0c @@ -71,6 +72,17 @@ #define AX_CMD_SW_RESET 0x20 #define AX_CMD_SW_PHY_STATUS 0x21 #define AX_CMD_SW_PHY_SELECT 0x22 +#define AX_QCTCTRL 0x2A + +#define AX_CHIPCODE_MASK 0x70 +#define AX_AX88772_CHIPCODE 0x00 +#define AX_AX88772A_CHIPCODE 0x10 +#define AX_AX88772B_CHIPCODE 0x20 +#define AX_HOST_EN 0x01 + +#define AX_PHYSEL_PSEL 0x01 +#define AX_PHYSEL_SSMII 0 +#define AX_PHYSEL_SSEN 0x10 #define AX_PHY_SELECT_MASK (BIT(3) | BIT(2)) #define AX_PHY_SELECT_INTERNAL 0 @@ -173,6 +185,10 @@ struct asix_rx_fixup_info { }; struct asix_common_private { + void (*resume)(struct usbnet *dev); + void (*suspend)(struct usbnet *dev); + u16 presvd_phy_advertise; + u16 presvd_phy_bmcr; struct asix_rx_fixup_info rx_fixup_info; }; @@ -182,10 +198,10 @@ extern const struct driver_info ax88172a_info; #define FLAG_EEPROM_MAC (1UL << 0) /* init device MAC from eeprom */ int asix_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, - u16 size, void *data); + u16 size, void *data, int in_pm); int asix_write_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, - u16 size, void *data); + u16 size, void *data, int in_pm); void asix_write_cmd_async(struct usbnet *dev, u8 cmd, u16 value, u16 index, u16 size, void *data); @@ -197,27 +213,31 @@ int asix_rx_fixup_common(struct usbnet *dev, struct sk_buff *skb); struct sk_buff *asix_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags); -int asix_set_sw_mii(struct usbnet *dev); -int asix_set_hw_mii(struct usbnet *dev); +int asix_set_sw_mii(struct usbnet *dev, int in_pm); +int asix_set_hw_mii(struct usbnet *dev, int in_pm); int asix_read_phy_addr(struct usbnet *dev, int internal); int asix_get_phy_addr(struct usbnet *dev); -int asix_sw_reset(struct usbnet *dev, u8 flags); +int asix_sw_reset(struct usbnet *dev, u8 flags, int in_pm); -u16 asix_read_rx_ctl(struct usbnet *dev); -int asix_write_rx_ctl(struct usbnet *dev, u16 mode); +u16 asix_read_rx_ctl(struct usbnet *dev, int in_pm); +int asix_write_rx_ctl(struct usbnet *dev, u16 mode, int in_pm); -u16 asix_read_medium_status(struct usbnet *dev); -int asix_write_medium_mode(struct usbnet *dev, u16 mode); +u16 asix_read_medium_status(struct usbnet *dev, int in_pm); +int asix_write_medium_mode(struct usbnet *dev, u16 mode, int in_pm); -int asix_write_gpio(struct usbnet *dev, u16 value, int sleep); +int asix_write_gpio(struct usbnet *dev, u16 value, int sleep, int in_pm); void asix_set_multicast(struct net_device *net); int asix_mdio_read(struct net_device *netdev, int phy_id, int loc); void asix_mdio_write(struct net_device *netdev, int phy_id, int loc, int val); +int asix_mdio_read_nopm(struct net_device *netdev, int phy_id, int loc); +void asix_mdio_write_nopm(struct net_device *netdev, int phy_id, int loc, + int val); + void asix_get_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo); int asix_set_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo); diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c index 7de5ab589e4e..25609eefc762 100644 --- a/drivers/net/usb/asix_common.c +++ b/drivers/net/usb/asix_common.c @@ -22,24 +22,49 @@ #include "asix.h" int asix_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, - u16 size, void *data) + u16 size, void *data, int in_pm) { int ret; - ret = usbnet_read_cmd(dev, cmd, - USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - value, index, data, size); + int (*fn)(struct usbnet *, u8, u8, u16, u16, void *, u16); + + BUG_ON(!dev); + + if (!in_pm) + fn = usbnet_read_cmd; + else + fn = usbnet_read_cmd_nopm; + + ret = fn(dev, cmd, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, + value, index, data, size); + + if (unlikely(ret < 0)) + netdev_warn(dev->net, "Failed to read reg index 0x%04x: %d\n", + index, ret); - if (ret != size && ret >= 0) - return -EINVAL; return ret; } int asix_write_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, - u16 size, void *data) + u16 size, void *data, int in_pm) { - return usbnet_write_cmd(dev, cmd, - USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - value, index, data, size); + int ret; + int (*fn)(struct usbnet *, u8, u8, u16, u16, const void *, u16); + + BUG_ON(!dev); + + if (!in_pm) + fn = usbnet_write_cmd; + else + fn = usbnet_write_cmd_nopm; + + ret = fn(dev, cmd, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, + value, index, data, size); + + if (unlikely(ret < 0)) + netdev_warn(dev->net, "Failed to write reg index 0x%04x: %d\n", + index, ret); + + return ret; } void asix_write_cmd_async(struct usbnet *dev, u8 cmd, u16 value, u16 index, @@ -225,19 +250,20 @@ struct sk_buff *asix_tx_fixup(struct usbnet *dev, struct sk_buff *skb, return skb; } -int asix_set_sw_mii(struct usbnet *dev) +int asix_set_sw_mii(struct usbnet *dev, int in_pm) { int ret; - ret = asix_write_cmd(dev, AX_CMD_SET_SW_MII, 0x0000, 0, 0, NULL); + ret = asix_write_cmd(dev, AX_CMD_SET_SW_MII, 0x0000, 0, 0, NULL, in_pm); + if (ret < 0) netdev_err(dev->net, "Failed to enable software MII access\n"); return ret; } -int asix_set_hw_mii(struct usbnet *dev) +int asix_set_hw_mii(struct usbnet *dev, int in_pm) { int ret; - ret = asix_write_cmd(dev, AX_CMD_SET_HW_MII, 0x0000, 0, 0, NULL); + ret = asix_write_cmd(dev, AX_CMD_SET_HW_MII, 0x0000, 0, 0, NULL, in_pm); if (ret < 0) netdev_err(dev->net, "Failed to enable hardware MII access\n"); return ret; @@ -247,7 +273,7 @@ int asix_read_phy_addr(struct usbnet *dev, int internal) { int offset = (internal ? 1 : 0); u8 buf[2]; - int ret = asix_read_cmd(dev, AX_CMD_READ_PHY_ID, 0, 0, 2, buf); + int ret = asix_read_cmd(dev, AX_CMD_READ_PHY_ID, 0, 0, 2, buf, 0); netdev_dbg(dev->net, "asix_get_phy_addr()\n"); @@ -270,21 +296,21 @@ int asix_get_phy_addr(struct usbnet *dev) } -int asix_sw_reset(struct usbnet *dev, u8 flags) +int asix_sw_reset(struct usbnet *dev, u8 flags, int in_pm) { int ret; - ret = asix_write_cmd(dev, AX_CMD_SW_RESET, flags, 0, 0, NULL); + ret = asix_write_cmd(dev, AX_CMD_SW_RESET, flags, 0, 0, NULL, in_pm); if (ret < 0) netdev_err(dev->net, "Failed to send software reset: %02x\n", ret); return ret; } -u16 asix_read_rx_ctl(struct usbnet *dev) +u16 asix_read_rx_ctl(struct usbnet *dev, int in_pm) { __le16 v; - int ret = asix_read_cmd(dev, AX_CMD_READ_RX_CTL, 0, 0, 2, &v); + int ret = asix_read_cmd(dev, AX_CMD_READ_RX_CTL, 0, 0, 2, &v, in_pm); if (ret < 0) { netdev_err(dev->net, "Error reading RX_CTL register: %02x\n", ret); @@ -295,12 +321,12 @@ out: return ret; } -int asix_write_rx_ctl(struct usbnet *dev, u16 mode) +int asix_write_rx_ctl(struct usbnet *dev, u16 mode, int in_pm) { int ret; netdev_dbg(dev->net, "asix_write_rx_ctl() - mode = 0x%04x\n", mode); - ret = asix_write_cmd(dev, AX_CMD_WRITE_RX_CTL, mode, 0, 0, NULL); + ret = asix_write_cmd(dev, AX_CMD_WRITE_RX_CTL, mode, 0, 0, NULL, in_pm); if (ret < 0) netdev_err(dev->net, "Failed to write RX_CTL mode to 0x%04x: %02x\n", mode, ret); @@ -308,10 +334,11 @@ int asix_write_rx_ctl(struct usbnet *dev, u16 mode) return ret; } -u16 asix_read_medium_status(struct usbnet *dev) +u16 asix_read_medium_status(struct usbnet *dev, int in_pm) { __le16 v; - int ret = asix_read_cmd(dev, AX_CMD_READ_MEDIUM_STATUS, 0, 0, 2, &v); + int ret = asix_read_cmd(dev, AX_CMD_READ_MEDIUM_STATUS, + 0, 0, 2, &v, in_pm); if (ret < 0) { netdev_err(dev->net, "Error reading Medium Status register: %02x\n", @@ -323,12 +350,13 @@ u16 asix_read_medium_status(struct usbnet *dev) } -int asix_write_medium_mode(struct usbnet *dev, u16 mode) +int asix_write_medium_mode(struct usbnet *dev, u16 mode, int in_pm) { int ret; netdev_dbg(dev->net, "asix_write_medium_mode() - mode = 0x%04x\n", mode); - ret = asix_write_cmd(dev, AX_CMD_WRITE_MEDIUM_MODE, mode, 0, 0, NULL); + ret = asix_write_cmd(dev, AX_CMD_WRITE_MEDIUM_MODE, + mode, 0, 0, NULL, in_pm); if (ret < 0) netdev_err(dev->net, "Failed to write Medium Mode mode to 0x%04x: %02x\n", mode, ret); @@ -336,12 +364,12 @@ int asix_write_medium_mode(struct usbnet *dev, u16 mode) return ret; } -int asix_write_gpio(struct usbnet *dev, u16 value, int sleep) +int asix_write_gpio(struct usbnet *dev, u16 value, int sleep, int in_pm) { int ret; netdev_dbg(dev->net, "asix_write_gpio() - value = 0x%04x\n", value); - ret = asix_write_cmd(dev, AX_CMD_WRITE_GPIOS, value, 0, 0, NULL); + ret = asix_write_cmd(dev, AX_CMD_WRITE_GPIOS, value, 0, 0, NULL, in_pm); if (ret < 0) netdev_err(dev->net, "Failed to write GPIO value 0x%04x: %02x\n", value, ret); @@ -398,16 +426,23 @@ int asix_mdio_read(struct net_device *netdev, int phy_id, int loc) { struct usbnet *dev = netdev_priv(netdev); __le16 res; + u8 smsr; + int i = 0; mutex_lock(&dev->phy_mutex); - asix_set_sw_mii(dev); + do { + asix_set_sw_mii(dev, 0); + usleep_range(1000, 1100); + asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, 0, 0, 1, &smsr, 0); + } while (!(smsr & AX_HOST_EN) && (i++ < 30)); + asix_read_cmd(dev, AX_CMD_READ_MII_REG, phy_id, - (__u16)loc, 2, &res); - asix_set_hw_mii(dev); + (__u16)loc, 2, &res, 0); + asix_set_hw_mii(dev, 0); mutex_unlock(&dev->phy_mutex); netdev_dbg(dev->net, "asix_mdio_read() phy_id=0x%02x, loc=0x%02x, returns=0x%04x\n", - phy_id, loc, le16_to_cpu(res)); + phy_id, loc, le16_to_cpu(res)); return le16_to_cpu(res); } @@ -416,13 +451,71 @@ void asix_mdio_write(struct net_device *netdev, int phy_id, int loc, int val) { struct usbnet *dev = netdev_priv(netdev); __le16 res = cpu_to_le16(val); + u8 smsr; + int i = 0; netdev_dbg(dev->net, "asix_mdio_write() phy_id=0x%02x, loc=0x%02x, val=0x%04x\n", - phy_id, loc, val); + phy_id, loc, val); + mutex_lock(&dev->phy_mutex); - asix_set_sw_mii(dev); - asix_write_cmd(dev, AX_CMD_WRITE_MII_REG, phy_id, (__u16)loc, 2, &res); - asix_set_hw_mii(dev); + do { + asix_set_sw_mii(dev, 0); + usleep_range(1000, 1100); + asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, 0, 0, 1, &smsr, 0); + } while (!(smsr & AX_HOST_EN) && (i++ < 30)); + + asix_write_cmd(dev, AX_CMD_WRITE_MII_REG, phy_id, + (__u16)loc, 2, &res, 0); + asix_set_hw_mii(dev, 0); + mutex_unlock(&dev->phy_mutex); +} + +int asix_mdio_read_nopm(struct net_device *netdev, int phy_id, int loc) +{ + struct usbnet *dev = netdev_priv(netdev); + __le16 res; + u8 smsr; + int i = 0; + + mutex_lock(&dev->phy_mutex); + do { + asix_set_sw_mii(dev, 1); + usleep_range(1000, 1100); + asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, 0, 0, 1, &smsr, 1); + } while (!(smsr & AX_HOST_EN) && (i++ < 30)); + + asix_read_cmd(dev, AX_CMD_READ_MII_REG, phy_id, + (__u16)loc, 2, &res, 1); + asix_set_hw_mii(dev, 1); + mutex_unlock(&dev->phy_mutex); + + netdev_dbg(dev->net, "asix_mdio_read_nopm() phy_id=0x%02x, loc=0x%02x, returns=0x%04x\n", + phy_id, loc, le16_to_cpu(res)); + + return le16_to_cpu(res); +} + +void +asix_mdio_write_nopm(struct net_device *netdev, int phy_id, int loc, int val) +{ + struct usbnet *dev = netdev_priv(netdev); + __le16 res = cpu_to_le16(val); + u8 smsr; + int i = 0; + + netdev_dbg(dev->net, "asix_mdio_write() phy_id=0x%02x, loc=0x%02x, val=0x%04x\n", + phy_id, loc, val); + + mutex_lock(&dev->phy_mutex); + do { + asix_set_sw_mii(dev, 1); + usleep_range(1000, 1100); + asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, 0, 0, 1, &smsr, 1); + } while (!(smsr & AX_HOST_EN) && (i++ < 30)); + + asix_write_cmd(dev, AX_CMD_WRITE_MII_REG, phy_id, + (__u16)loc, 2, &res, 1); + asix_set_hw_mii(dev, 1); mutex_unlock(&dev->phy_mutex); } @@ -431,7 +524,8 @@ void asix_get_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo) struct usbnet *dev = netdev_priv(net); u8 opt; - if (asix_read_cmd(dev, AX_CMD_READ_MONITOR_MODE, 0, 0, 1, &opt) < 0) { + if (asix_read_cmd(dev, AX_CMD_READ_MONITOR_MODE, + 0, 0, 1, &opt, 0) < 0) { wolinfo->supported = 0; wolinfo->wolopts = 0; return; @@ -455,7 +549,7 @@ int asix_set_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo) opt |= AX_MONITOR_MAGIC; if (asix_write_cmd(dev, AX_CMD_WRITE_MONITOR_MODE, - opt, 0, 0, NULL) < 0) + opt, 0, 0, NULL, 0) < 0) return -EINVAL; return 0; @@ -490,7 +584,7 @@ int asix_get_eeprom(struct net_device *net, struct ethtool_eeprom *eeprom, /* ax8817x returns 2 bytes from eeprom on read */ for (i = first_word; i <= last_word; i++) { if (asix_read_cmd(dev, AX_CMD_READ_EEPROM, i, 0, 2, - &(eeprom_buff[i - first_word])) < 0) { + &eeprom_buff[i - first_word], 0) < 0) { kfree(eeprom_buff); return -EIO; } @@ -531,7 +625,7 @@ int asix_set_eeprom(struct net_device *net, struct ethtool_eeprom *eeprom, the EEPROM */ if (eeprom->offset & 1) { ret = asix_read_cmd(dev, AX_CMD_READ_EEPROM, first_word, 0, 2, - &(eeprom_buff[0])); + &eeprom_buff[0], 0); if (ret < 0) { netdev_err(net, "Failed to read EEPROM at offset 0x%02x.\n", first_word); goto free; @@ -540,7 +634,7 @@ int asix_set_eeprom(struct net_device *net, struct ethtool_eeprom *eeprom, if ((eeprom->offset + eeprom->len) & 1) { ret = asix_read_cmd(dev, AX_CMD_READ_EEPROM, last_word, 0, 2, - &(eeprom_buff[last_word - first_word])); + &eeprom_buff[last_word - first_word], 0); if (ret < 0) { netdev_err(net, "Failed to read EEPROM at offset 0x%02x.\n", last_word); goto free; @@ -550,7 +644,7 @@ int asix_set_eeprom(struct net_device *net, struct ethtool_eeprom *eeprom, memcpy((u8 *)eeprom_buff + (eeprom->offset & 1), data, eeprom->len); /* write data to EEPROM */ - ret = asix_write_cmd(dev, AX_CMD_WRITE_ENABLE, 0x0000, 0, 0, NULL); + ret = asix_write_cmd(dev, AX_CMD_WRITE_ENABLE, 0x0000, 0, 0, NULL, 0); if (ret < 0) { netdev_err(net, "Failed to enable EEPROM write\n"); goto free; @@ -561,7 +655,7 @@ int asix_set_eeprom(struct net_device *net, struct ethtool_eeprom *eeprom, netdev_dbg(net, "write to EEPROM at offset 0x%02x, data 0x%04x\n", i, eeprom_buff[i - first_word]); ret = asix_write_cmd(dev, AX_CMD_WRITE_EEPROM, i, - eeprom_buff[i - first_word], 0, NULL); + eeprom_buff[i - first_word], 0, NULL, 0); if (ret < 0) { netdev_err(net, "Failed to write EEPROM at offset 0x%02x.\n", i); @@ -570,7 +664,7 @@ int asix_set_eeprom(struct net_device *net, struct ethtool_eeprom *eeprom, msleep(20); } - ret = asix_write_cmd(dev, AX_CMD_WRITE_DISABLE, 0x0000, 0, 0, NULL); + ret = asix_write_cmd(dev, AX_CMD_WRITE_DISABLE, 0x0000, 0, 0, NULL, 0); if (ret < 0) { netdev_err(net, "Failed to disable EEPROM write\n"); goto free; diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index 5cabefc23494..aaa42909d932 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -184,7 +184,7 @@ static int ax88172_link_reset(struct usbnet *dev) netdev_dbg(dev->net, "ax88172_link_reset() speed: %u duplex: %d setting mode to 0x%04x\n", ethtool_cmd_speed(&ecmd), ecmd.duplex, mode); - asix_write_medium_mode(dev, mode); + asix_write_medium_mode(dev, mode, 0); return 0; } @@ -213,18 +213,19 @@ static int ax88172_bind(struct usbnet *dev, struct usb_interface *intf) /* Toggle the GPIOs in a manufacturer/model specific way */ for (i = 2; i >= 0; i--) { ret = asix_write_cmd(dev, AX_CMD_WRITE_GPIOS, - (gpio_bits >> (i * 8)) & 0xff, 0, 0, NULL); + (gpio_bits >> (i * 8)) & 0xff, 0, 0, NULL, 0); if (ret < 0) goto out; msleep(5); } - ret = asix_write_rx_ctl(dev, 0x80); + ret = asix_write_rx_ctl(dev, 0x80, 0); if (ret < 0) goto out; /* Get the MAC address */ - ret = asix_read_cmd(dev, AX88172_CMD_READ_NODE_ID, 0, 0, ETH_ALEN, buf); + ret = asix_read_cmd(dev, AX88172_CMD_READ_NODE_ID, + 0, 0, ETH_ALEN, buf, 0); if (ret < 0) { netdev_dbg(dev->net, "read AX_CMD_READ_NODE_ID failed: %d\n", ret); @@ -290,7 +291,7 @@ static int ax88772_link_reset(struct usbnet *dev) netdev_dbg(dev->net, "ax88772_link_reset() speed: %u duplex: %d setting mode to 0x%04x\n", ethtool_cmd_speed(&ecmd), ecmd.duplex, mode); - asix_write_medium_mode(dev, mode); + asix_write_medium_mode(dev, mode, 0); return 0; } @@ -298,100 +299,115 @@ static int ax88772_link_reset(struct usbnet *dev) static int ax88772_reset(struct usbnet *dev) { struct asix_data *data = (struct asix_data *)&dev->data; - int ret, embd_phy; - u16 rx_ctl; + int ret; - ret = asix_write_gpio(dev, - AX_GPIO_RSE | AX_GPIO_GPO_2 | AX_GPIO_GPO2EN, 5); + /* Rewrite MAC address */ + ether_addr_copy(data->mac_addr, dev->net->dev_addr); + ret = asix_write_cmd(dev, AX_CMD_WRITE_NODE_ID, 0, 0, + ETH_ALEN, data->mac_addr, 0); if (ret < 0) goto out; - embd_phy = ((asix_get_phy_addr(dev) & 0x1f) == 0x10 ? 1 : 0); + /* Set RX_CTL to default values with 2k buffer, and enable cactus */ + ret = asix_write_rx_ctl(dev, AX_DEFAULT_RX_CTL, 0); + if (ret < 0) + goto out; - ret = asix_write_cmd(dev, AX_CMD_SW_PHY_SELECT, embd_phy, 0, 0, NULL); + asix_write_medium_mode(dev, AX88772_MEDIUM_DEFAULT, 0); + if (ret < 0) + goto out; + + return 0; + +out: + return ret; +} + +static int ax88772_hw_reset(struct usbnet *dev, int in_pm) +{ + struct asix_data *data = (struct asix_data *)&dev->data; + int ret, embd_phy; + u16 rx_ctl; + + ret = asix_write_gpio(dev, AX_GPIO_RSE | AX_GPIO_GPO_2 | + AX_GPIO_GPO2EN, 5, in_pm); + if (ret < 0) + goto out; + + embd_phy = ((dev->mii.phy_id & 0x1f) == 0x10 ? 1 : 0); + + ret = asix_write_cmd(dev, AX_CMD_SW_PHY_SELECT, embd_phy, + 0, 0, NULL, in_pm); if (ret < 0) { netdev_dbg(dev->net, "Select PHY #1 failed: %d\n", ret); goto out; } - ret = asix_sw_reset(dev, AX_SWRESET_IPPD | AX_SWRESET_PRL); - if (ret < 0) - goto out; - - msleep(150); - - ret = asix_sw_reset(dev, AX_SWRESET_CLEAR); - if (ret < 0) - goto out; - - msleep(150); - if (embd_phy) { - ret = asix_sw_reset(dev, AX_SWRESET_IPRL); + ret = asix_sw_reset(dev, AX_SWRESET_IPPD, in_pm); + if (ret < 0) + goto out; + + usleep_range(10000, 11000); + + ret = asix_sw_reset(dev, AX_SWRESET_CLEAR, in_pm); + if (ret < 0) + goto out; + + msleep(60); + + ret = asix_sw_reset(dev, AX_SWRESET_IPRL | AX_SWRESET_PRL, + in_pm); if (ret < 0) goto out; } else { - ret = asix_sw_reset(dev, AX_SWRESET_PRTE); + ret = asix_sw_reset(dev, AX_SWRESET_IPPD | AX_SWRESET_PRL, + in_pm); if (ret < 0) goto out; } msleep(150); - rx_ctl = asix_read_rx_ctl(dev); - netdev_dbg(dev->net, "RX_CTL is 0x%04x after software reset\n", rx_ctl); - ret = asix_write_rx_ctl(dev, 0x0000); + + if (in_pm && (!asix_mdio_read_nopm(dev->net, dev->mii.phy_id, + MII_PHYSID1))){ + ret = -EIO; + goto out; + } + + ret = asix_write_rx_ctl(dev, AX_DEFAULT_RX_CTL, in_pm); if (ret < 0) goto out; - rx_ctl = asix_read_rx_ctl(dev); - netdev_dbg(dev->net, "RX_CTL is 0x%04x setting to 0x0000\n", rx_ctl); - - ret = asix_sw_reset(dev, AX_SWRESET_PRL); - if (ret < 0) - goto out; - - msleep(150); - - ret = asix_sw_reset(dev, AX_SWRESET_IPRL | AX_SWRESET_PRL); - if (ret < 0) - goto out; - - msleep(150); - - asix_mdio_write(dev->net, dev->mii.phy_id, MII_BMCR, BMCR_RESET); - asix_mdio_write(dev->net, dev->mii.phy_id, MII_ADVERTISE, - ADVERTISE_ALL | ADVERTISE_CSMA); - mii_nway_restart(&dev->mii); - - ret = asix_write_medium_mode(dev, AX88772_MEDIUM_DEFAULT); + ret = asix_write_medium_mode(dev, AX88772_MEDIUM_DEFAULT, in_pm); if (ret < 0) goto out; ret = asix_write_cmd(dev, AX_CMD_WRITE_IPG0, - AX88772_IPG0_DEFAULT | AX88772_IPG1_DEFAULT, - AX88772_IPG2_DEFAULT, 0, NULL); + AX88772_IPG0_DEFAULT | AX88772_IPG1_DEFAULT, + AX88772_IPG2_DEFAULT, 0, NULL, in_pm); if (ret < 0) { netdev_dbg(dev->net, "Write IPG,IPG1,IPG2 failed: %d\n", ret); goto out; } /* Rewrite MAC address */ - memcpy(data->mac_addr, dev->net->dev_addr, ETH_ALEN); - ret = asix_write_cmd(dev, AX_CMD_WRITE_NODE_ID, 0, 0, ETH_ALEN, - data->mac_addr); + ether_addr_copy(data->mac_addr, dev->net->dev_addr); + ret = asix_write_cmd(dev, AX_CMD_WRITE_NODE_ID, 0, 0, + ETH_ALEN, data->mac_addr, in_pm); if (ret < 0) goto out; /* Set RX_CTL to default values with 2k buffer, and enable cactus */ - ret = asix_write_rx_ctl(dev, AX_DEFAULT_RX_CTL); + ret = asix_write_rx_ctl(dev, AX_DEFAULT_RX_CTL, in_pm); if (ret < 0) goto out; - rx_ctl = asix_read_rx_ctl(dev); + rx_ctl = asix_read_rx_ctl(dev, in_pm); netdev_dbg(dev->net, "RX_CTL is 0x%04x after all initializations\n", rx_ctl); - rx_ctl = asix_read_medium_status(dev); + rx_ctl = asix_read_medium_status(dev, in_pm); netdev_dbg(dev->net, "Medium Status is 0x%04x after all initializations\n", rx_ctl); @@ -400,7 +416,114 @@ static int ax88772_reset(struct usbnet *dev) out: return ret; +} +static int ax88772a_hw_reset(struct usbnet *dev, int in_pm) +{ + struct asix_data *data = (struct asix_data *)&dev->data; + int ret, embd_phy; + u16 rx_ctl; + u8 chipcode = 0; + + ret = asix_write_gpio(dev, AX_GPIO_RSE, 5, in_pm); + if (ret < 0) + goto out; + + embd_phy = ((dev->mii.phy_id & 0x1f) == 0x10 ? 1 : 0); + + ret = asix_write_cmd(dev, AX_CMD_SW_PHY_SELECT, embd_phy | + AX_PHYSEL_SSEN, 0, 0, NULL, in_pm); + if (ret < 0) { + netdev_dbg(dev->net, "Select PHY #1 failed: %d\n", ret); + goto out; + } + usleep_range(10000, 11000); + + ret = asix_sw_reset(dev, AX_SWRESET_IPPD | AX_SWRESET_IPRL, in_pm); + if (ret < 0) + goto out; + + usleep_range(10000, 11000); + + ret = asix_sw_reset(dev, AX_SWRESET_IPRL, in_pm); + if (ret < 0) + goto out; + + msleep(160); + + ret = asix_sw_reset(dev, AX_SWRESET_CLEAR, in_pm); + if (ret < 0) + goto out; + + ret = asix_sw_reset(dev, AX_SWRESET_IPRL, in_pm); + if (ret < 0) + goto out; + + msleep(200); + + if (in_pm && (!asix_mdio_read_nopm(dev->net, dev->mii.phy_id, + MII_PHYSID1))) { + ret = -1; + goto out; + } + + ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, 0, + 0, 1, &chipcode, in_pm); + if (ret < 0) + goto out; + + if ((chipcode & AX_CHIPCODE_MASK) == AX_AX88772B_CHIPCODE) { + ret = asix_write_cmd(dev, AX_QCTCTRL, 0x8000, 0x8001, + 0, NULL, in_pm); + if (ret < 0) { + netdev_dbg(dev->net, "Write BQ setting failed: %d\n", + ret); + goto out; + } + } + + ret = asix_write_cmd(dev, AX_CMD_WRITE_IPG0, + AX88772_IPG0_DEFAULT | AX88772_IPG1_DEFAULT, + AX88772_IPG2_DEFAULT, 0, NULL, in_pm); + if (ret < 0) { + netdev_dbg(dev->net, "Write IPG,IPG1,IPG2 failed: %d\n", ret); + goto out; + } + + /* Rewrite MAC address */ + memcpy(data->mac_addr, dev->net->dev_addr, ETH_ALEN); + ret = asix_write_cmd(dev, AX_CMD_WRITE_NODE_ID, 0, 0, ETH_ALEN, + data->mac_addr, in_pm); + if (ret < 0) + goto out; + + /* Set RX_CTL to default values with 2k buffer, and enable cactus */ + ret = asix_write_rx_ctl(dev, AX_DEFAULT_RX_CTL, in_pm); + if (ret < 0) + goto out; + + ret = asix_write_medium_mode(dev, AX88772_MEDIUM_DEFAULT, in_pm); + if (ret < 0) + return ret; + + /* Set RX_CTL to default values with 2k buffer, and enable cactus */ + ret = asix_write_rx_ctl(dev, AX_DEFAULT_RX_CTL, in_pm); + if (ret < 0) + goto out; + + rx_ctl = asix_read_rx_ctl(dev, in_pm); + netdev_dbg(dev->net, "RX_CTL is 0x%04x after all initializations\n", + rx_ctl); + + rx_ctl = asix_read_medium_status(dev, in_pm); + netdev_dbg(dev->net, + "Medium Status is 0x%04x after all initializations\n", + rx_ctl); + + return 0; + +out: + return ret; } static const struct net_device_ops ax88772_netdev_ops = { @@ -415,11 +538,87 @@ static const struct net_device_ops ax88772_netdev_ops = { .ndo_set_rx_mode = asix_set_multicast, }; +static void ax88772_suspend(struct usbnet *dev) +{ + struct asix_common_private *priv = dev->driver_priv; + + /* Preserve BMCR for restoring */ + priv->presvd_phy_bmcr = + asix_mdio_read_nopm(dev->net, dev->mii.phy_id, MII_BMCR); + + /* Preserve ANAR for restoring */ + priv->presvd_phy_advertise = + asix_mdio_read_nopm(dev->net, dev->mii.phy_id, MII_ADVERTISE); +} + +static int asix_suspend(struct usb_interface *intf, pm_message_t message) +{ + struct usbnet *dev = usb_get_intfdata(intf); + struct asix_common_private *priv = dev->driver_priv; + + if (priv->suspend) + priv->suspend(dev); + + return usbnet_suspend(intf, message); +} + +static void ax88772_restore_phy(struct usbnet *dev) +{ + struct asix_common_private *priv = dev->driver_priv; + + if (priv->presvd_phy_advertise) { + /* Restore Advertisement control reg */ + asix_mdio_write_nopm(dev->net, dev->mii.phy_id, MII_ADVERTISE, + priv->presvd_phy_advertise); + + /* Restore BMCR */ + asix_mdio_write_nopm(dev->net, dev->mii.phy_id, MII_BMCR, + priv->presvd_phy_bmcr); + + priv->presvd_phy_advertise = 0; + priv->presvd_phy_bmcr = 0; + } +} + +static void ax88772_resume(struct usbnet *dev) +{ + int i; + + for (i = 0; i < 3; i++) + if (!ax88772_hw_reset(dev, 1)) + break; + ax88772_restore_phy(dev); +} + +static void ax88772a_resume(struct usbnet *dev) +{ + int i; + + for (i = 0; i < 3; i++) { + if (!ax88772a_hw_reset(dev, 1)) + break; + } + + ax88772_restore_phy(dev); +} + +static int asix_resume(struct usb_interface *intf) +{ + struct usbnet *dev = usb_get_intfdata(intf); + struct asix_common_private *priv = dev->driver_priv; + + if (priv->resume) + priv->resume(dev); + + return usbnet_resume(intf); +} + static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf) { - int ret, embd_phy, i; - u8 buf[ETH_ALEN]; + int ret, i; + u8 buf[ETH_ALEN], chipcode = 0; u32 phyid; + struct asix_common_private *priv; usbnet_get_endpoints(dev,intf); @@ -427,13 +626,13 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf) if (dev->driver_info->data & FLAG_EEPROM_MAC) { for (i = 0; i < (ETH_ALEN >> 1); i++) { ret = asix_read_cmd(dev, AX_CMD_READ_EEPROM, 0x04 + i, - 0, 2, buf + i * 2); + 0, 2, buf + i * 2, 0); if (ret < 0) break; } } else { ret = asix_read_cmd(dev, AX_CMD_READ_NODE_ID, - 0, 0, ETH_ALEN, buf); + 0, 0, ETH_ALEN, buf, 0); } if (ret < 0) { @@ -456,16 +655,11 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf) dev->net->needed_headroom = 4; /* cf asix_tx_fixup() */ dev->net->needed_tailroom = 4; /* cf asix_tx_fixup() */ - embd_phy = ((dev->mii.phy_id & 0x1f) == 0x10 ? 1 : 0); + asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, 0, 0, 1, &chipcode, 0); + chipcode &= AX_CHIPCODE_MASK; - /* Reset the PHY to normal operation mode */ - ret = asix_write_cmd(dev, AX_CMD_SW_PHY_SELECT, embd_phy, 0, 0, NULL); - if (ret < 0) { - netdev_dbg(dev->net, "Select PHY #1 failed: %d\n", ret); - return ret; - } - - ax88772_reset(dev); + (chipcode == AX_AX88772_CHIPCODE) ? ax88772_hw_reset(dev, 0) : + ax88772a_hw_reset(dev, 0); /* Read PHYID register *AFTER* the PHY was reset properly */ phyid = asix_get_phyid(dev); @@ -482,6 +676,18 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf) if (!dev->driver_priv) return -ENOMEM; + priv = dev->driver_priv; + + priv->presvd_phy_bmcr = 0; + priv->presvd_phy_advertise = 0; + if (chipcode == AX_AX88772_CHIPCODE) { + priv->resume = ax88772_resume; + priv->suspend = ax88772_suspend; + } else { + priv->resume = ax88772a_resume; + priv->suspend = ax88772_suspend; + } + return 0; } @@ -593,12 +799,12 @@ static int ax88178_reset(struct usbnet *dev) int gpio0 = 0; u32 phyid; - asix_read_cmd(dev, AX_CMD_READ_GPIOS, 0, 0, 1, &status); + asix_read_cmd(dev, AX_CMD_READ_GPIOS, 0, 0, 1, &status, 0); netdev_dbg(dev->net, "GPIO Status: 0x%04x\n", status); - asix_write_cmd(dev, AX_CMD_WRITE_ENABLE, 0, 0, 0, NULL); - asix_read_cmd(dev, AX_CMD_READ_EEPROM, 0x0017, 0, 2, &eeprom); - asix_write_cmd(dev, AX_CMD_WRITE_DISABLE, 0, 0, 0, NULL); + asix_write_cmd(dev, AX_CMD_WRITE_ENABLE, 0, 0, 0, NULL, 0); + asix_read_cmd(dev, AX_CMD_READ_EEPROM, 0x0017, 0, 2, &eeprom, 0); + asix_write_cmd(dev, AX_CMD_WRITE_DISABLE, 0, 0, 0, NULL, 0); netdev_dbg(dev->net, "EEPROM index 0x17 is 0x%04x\n", eeprom); @@ -614,15 +820,16 @@ static int ax88178_reset(struct usbnet *dev) netdev_dbg(dev->net, "GPIO0: %d, PhyMode: %d\n", gpio0, data->phymode); /* Power up external GigaPHY through AX88178 GPIO pin */ - asix_write_gpio(dev, AX_GPIO_RSE | AX_GPIO_GPO_1 | AX_GPIO_GPO1EN, 40); + asix_write_gpio(dev, AX_GPIO_RSE | AX_GPIO_GPO_1 | + AX_GPIO_GPO1EN, 40, 0); if ((le16_to_cpu(eeprom) >> 8) != 1) { - asix_write_gpio(dev, 0x003c, 30); - asix_write_gpio(dev, 0x001c, 300); - asix_write_gpio(dev, 0x003c, 30); + asix_write_gpio(dev, 0x003c, 30, 0); + asix_write_gpio(dev, 0x001c, 300, 0); + asix_write_gpio(dev, 0x003c, 30, 0); } else { netdev_dbg(dev->net, "gpio phymode == 1 path\n"); - asix_write_gpio(dev, AX_GPIO_GPO1EN, 30); - asix_write_gpio(dev, AX_GPIO_GPO1EN | AX_GPIO_GPO_1, 30); + asix_write_gpio(dev, AX_GPIO_GPO1EN, 30, 0); + asix_write_gpio(dev, AX_GPIO_GPO1EN | AX_GPIO_GPO_1, 30, 0); } /* Read PHYID register *AFTER* powering up PHY */ @@ -630,15 +837,15 @@ static int ax88178_reset(struct usbnet *dev) netdev_dbg(dev->net, "PHYID=0x%08x\n", phyid); /* Set AX88178 to enable MII/GMII/RGMII interface for external PHY */ - asix_write_cmd(dev, AX_CMD_SW_PHY_SELECT, 0, 0, 0, NULL); + asix_write_cmd(dev, AX_CMD_SW_PHY_SELECT, 0, 0, 0, NULL, 0); - asix_sw_reset(dev, 0); + asix_sw_reset(dev, 0, 0); msleep(150); - asix_sw_reset(dev, AX_SWRESET_PRL | AX_SWRESET_IPPD); + asix_sw_reset(dev, AX_SWRESET_PRL | AX_SWRESET_IPPD, 0); msleep(150); - asix_write_rx_ctl(dev, 0); + asix_write_rx_ctl(dev, 0, 0); if (data->phymode == PHY_MODE_MARVELL) { marvell_phy_init(dev); @@ -655,18 +862,18 @@ static int ax88178_reset(struct usbnet *dev) mii_nway_restart(&dev->mii); - ret = asix_write_medium_mode(dev, AX88178_MEDIUM_DEFAULT); + ret = asix_write_medium_mode(dev, AX88178_MEDIUM_DEFAULT, 0); if (ret < 0) return ret; /* Rewrite MAC address */ memcpy(data->mac_addr, dev->net->dev_addr, ETH_ALEN); ret = asix_write_cmd(dev, AX_CMD_WRITE_NODE_ID, 0, 0, ETH_ALEN, - data->mac_addr); + data->mac_addr, 0); if (ret < 0) return ret; - ret = asix_write_rx_ctl(dev, AX_DEFAULT_RX_CTL); + ret = asix_write_rx_ctl(dev, AX_DEFAULT_RX_CTL, 0); if (ret < 0) return ret; @@ -704,7 +911,7 @@ static int ax88178_link_reset(struct usbnet *dev) netdev_dbg(dev->net, "ax88178_link_reset() speed: %u duplex: %d setting mode to 0x%04x\n", speed, ecmd.duplex, mode); - asix_write_medium_mode(dev, mode); + asix_write_medium_mode(dev, mode, 0); if (data->phymode == PHY_MODE_MARVELL && data->ledmode) marvell_led_status(dev, speed); @@ -733,15 +940,15 @@ static void ax88178_set_mfb(struct usbnet *dev) mfb = AX_RX_CTL_MFB_16384; } - rxctl = asix_read_rx_ctl(dev); - asix_write_rx_ctl(dev, (rxctl & ~AX_RX_CTL_MFB_16384) | mfb); + rxctl = asix_read_rx_ctl(dev, 0); + asix_write_rx_ctl(dev, (rxctl & ~AX_RX_CTL_MFB_16384) | mfb, 0); - medium = asix_read_medium_status(dev); + medium = asix_read_medium_status(dev, 0); if (dev->net->mtu > 1500) medium |= AX_MEDIUM_JFE; else medium &= ~AX_MEDIUM_JFE; - asix_write_medium_mode(dev, medium); + asix_write_medium_mode(dev, medium, 0); if (dev->rx_urb_size > old_rx_urb_size) usbnet_unlink_rx_urbs(dev); @@ -790,7 +997,7 @@ static int ax88178_bind(struct usbnet *dev, struct usb_interface *intf) usbnet_get_endpoints(dev,intf); /* Get the MAC address */ - ret = asix_read_cmd(dev, AX_CMD_READ_NODE_ID, 0, 0, ETH_ALEN, buf); + ret = asix_read_cmd(dev, AX_CMD_READ_NODE_ID, 0, 0, ETH_ALEN, buf, 0); if (ret < 0) { netdev_dbg(dev->net, "Failed to read MAC address: %d\n", ret); return ret; @@ -811,10 +1018,10 @@ static int ax88178_bind(struct usbnet *dev, struct usb_interface *intf) dev->net->ethtool_ops = &ax88178_ethtool_ops; /* Blink LEDS so users know driver saw dongle */ - asix_sw_reset(dev, 0); + asix_sw_reset(dev, 0, 0); msleep(150); - asix_sw_reset(dev, AX_SWRESET_PRL | AX_SWRESET_IPPD); + asix_sw_reset(dev, AX_SWRESET_PRL | AX_SWRESET_IPPD, 0); msleep(150); /* Asix framing packs multiple eth frames into a 2K usb bulk transfer */ @@ -877,7 +1084,7 @@ static const struct driver_info ax88772_info = { .unbind = ax88772_unbind, .status = asix_status, .link_reset = ax88772_link_reset, - .reset = ax88772_link_reset, + .reset = ax88772_reset, .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_LINK_INTR | FLAG_MULTI_PACKET, .rx_fixup = asix_rx_fixup_common, .tx_fixup = asix_tx_fixup, @@ -1005,7 +1212,7 @@ static const struct usb_device_id products [] = { }, { // Lenovo U2L100P 10/100 USB_DEVICE (0x17ef, 0x7203), - .driver_info = (unsigned long) &ax88772_info, + .driver_info = (unsigned long)&ax88772b_info, }, { // ASIX AX88772B 10/100 USB_DEVICE (0x0b95, 0x772b), @@ -1073,7 +1280,7 @@ static const struct usb_device_id products [] = { }, { // Asus USB Ethernet Adapter USB_DEVICE (0x0b95, 0x7e2b), - .driver_info = (unsigned long) &ax88772_info, + .driver_info = (unsigned long)&ax88772b_info, }, { /* ASIX 88172a demo board */ USB_DEVICE(0x0b95, 0x172a), @@ -1095,8 +1302,8 @@ static struct usb_driver asix_driver = { .name = DRIVER_NAME, .id_table = products, .probe = usbnet_probe, - .suspend = usbnet_suspend, - .resume = usbnet_resume, + .suspend = asix_suspend, + .resume = asix_resume, .disconnect = usbnet_disconnect, .supports_autosuspend = 1, .disable_hub_initiated_lpm = 1, diff --git a/drivers/net/usb/ax88172a.c b/drivers/net/usb/ax88172a.c index 163a2c576e69..49a3bc107d05 100644 --- a/drivers/net/usb/ax88172a.c +++ b/drivers/net/usb/ax88172a.c @@ -81,7 +81,7 @@ static void ax88172a_adjust_link(struct net_device *netdev) } if (mode != priv->oldmode) { - asix_write_medium_mode(dev, mode); + asix_write_medium_mode(dev, mode, 0); priv->oldmode = mode; netdev_dbg(netdev, "speed %u duplex %d, setting mode to 0x%04x\n", phydev->speed, phydev->duplex, mode); @@ -176,18 +176,19 @@ static int ax88172a_reset_phy(struct usbnet *dev, int embd_phy) { int ret; - ret = asix_sw_reset(dev, AX_SWRESET_IPPD); + ret = asix_sw_reset(dev, AX_SWRESET_IPPD, 0); if (ret < 0) goto err; msleep(150); - ret = asix_sw_reset(dev, AX_SWRESET_CLEAR); + ret = asix_sw_reset(dev, AX_SWRESET_CLEAR, 0); if (ret < 0) goto err; msleep(150); - ret = asix_sw_reset(dev, embd_phy ? AX_SWRESET_IPRL : AX_SWRESET_IPPD); + ret = asix_sw_reset(dev, embd_phy ? AX_SWRESET_IPRL : AX_SWRESET_IPPD, + 0); if (ret < 0) goto err; @@ -213,7 +214,7 @@ static int ax88172a_bind(struct usbnet *dev, struct usb_interface *intf) dev->driver_priv = priv; /* Get the MAC address */ - ret = asix_read_cmd(dev, AX_CMD_READ_NODE_ID, 0, 0, ETH_ALEN, buf); + ret = asix_read_cmd(dev, AX_CMD_READ_NODE_ID, 0, 0, ETH_ALEN, buf, 0); if (ret < 0) { netdev_err(dev->net, "Failed to read MAC address: %d\n", ret); goto free; @@ -224,7 +225,7 @@ static int ax88172a_bind(struct usbnet *dev, struct usb_interface *intf) dev->net->ethtool_ops = &ax88172a_ethtool_ops; /* are we using the internal or the external phy? */ - ret = asix_read_cmd(dev, AX_CMD_SW_PHY_STATUS, 0, 0, 1, buf); + ret = asix_read_cmd(dev, AX_CMD_SW_PHY_STATUS, 0, 0, 1, buf, 0); if (ret < 0) { netdev_err(dev->net, "Failed to read software interface selection register: %d\n", ret); @@ -303,20 +304,20 @@ static int ax88172a_reset(struct usbnet *dev) ax88172a_reset_phy(dev, priv->use_embdphy); msleep(150); - rx_ctl = asix_read_rx_ctl(dev); + rx_ctl = asix_read_rx_ctl(dev, 0); netdev_dbg(dev->net, "RX_CTL is 0x%04x after software reset\n", rx_ctl); - ret = asix_write_rx_ctl(dev, 0x0000); + ret = asix_write_rx_ctl(dev, 0x0000, 0); if (ret < 0) goto out; - rx_ctl = asix_read_rx_ctl(dev); + rx_ctl = asix_read_rx_ctl(dev, 0); netdev_dbg(dev->net, "RX_CTL is 0x%04x setting to 0x0000\n", rx_ctl); msleep(150); ret = asix_write_cmd(dev, AX_CMD_WRITE_IPG0, AX88772_IPG0_DEFAULT | AX88772_IPG1_DEFAULT, - AX88772_IPG2_DEFAULT, 0, NULL); + AX88772_IPG2_DEFAULT, 0, NULL, 0); if (ret < 0) { netdev_err(dev->net, "Write IPG,IPG1,IPG2 failed: %d\n", ret); goto out; @@ -325,20 +326,20 @@ static int ax88172a_reset(struct usbnet *dev) /* Rewrite MAC address */ memcpy(data->mac_addr, dev->net->dev_addr, ETH_ALEN); ret = asix_write_cmd(dev, AX_CMD_WRITE_NODE_ID, 0, 0, ETH_ALEN, - data->mac_addr); + data->mac_addr, 0); if (ret < 0) goto out; /* Set RX_CTL to default values with 2k buffer, and enable cactus */ - ret = asix_write_rx_ctl(dev, AX_DEFAULT_RX_CTL); + ret = asix_write_rx_ctl(dev, AX_DEFAULT_RX_CTL, 0); if (ret < 0) goto out; - rx_ctl = asix_read_rx_ctl(dev); + rx_ctl = asix_read_rx_ctl(dev, 0); netdev_dbg(dev->net, "RX_CTL is 0x%04x after all initializations\n", rx_ctl); - rx_ctl = asix_read_medium_status(dev); + rx_ctl = asix_read_medium_status(dev, 0); netdev_dbg(dev->net, "Medium Status is 0x%04x after all initializations\n", rx_ctl); From 8a46f665833a2085e402bd0827be380f161f09ef Mon Sep 17 00:00:00 2001 From: Robert Foss Date: Mon, 29 Aug 2016 09:32:16 -0400 Subject: [PATCH 0623/1715] net: asix: Avoid looping when the device is disconnected From: Vincent Palatin Check the answers from the USB stack and avoid re-sending multiple times the request if the device has disappeared. Signed-off-by: Vincent Palatin Signed-off-by: Robert Foss Tested-by: Robert Foss Signed-off-by: David S. Miller --- drivers/net/usb/asix_common.c | 56 ++++++++++++++++++++++++++-------- drivers/net/usb/asix_devices.c | 2 ++ 2 files changed, 46 insertions(+), 12 deletions(-) diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c index 25609eefc762..f79eb12c326a 100644 --- a/drivers/net/usb/asix_common.c +++ b/drivers/net/usb/asix_common.c @@ -428,13 +428,21 @@ int asix_mdio_read(struct net_device *netdev, int phy_id, int loc) __le16 res; u8 smsr; int i = 0; + int ret; mutex_lock(&dev->phy_mutex); do { - asix_set_sw_mii(dev, 0); + ret = asix_set_sw_mii(dev, 0); + if (ret == -ENODEV) + break; usleep_range(1000, 1100); - asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, 0, 0, 1, &smsr, 0); - } while (!(smsr & AX_HOST_EN) && (i++ < 30)); + ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, + 0, 0, 1, &smsr, 0); + } while (!(smsr & AX_HOST_EN) && (i++ < 30) && (ret != -ENODEV)); + if (ret == -ENODEV) { + mutex_unlock(&dev->phy_mutex); + return ret; + } asix_read_cmd(dev, AX_CMD_READ_MII_REG, phy_id, (__u16)loc, 2, &res, 0); @@ -453,16 +461,24 @@ void asix_mdio_write(struct net_device *netdev, int phy_id, int loc, int val) __le16 res = cpu_to_le16(val); u8 smsr; int i = 0; + int ret; netdev_dbg(dev->net, "asix_mdio_write() phy_id=0x%02x, loc=0x%02x, val=0x%04x\n", phy_id, loc, val); mutex_lock(&dev->phy_mutex); do { - asix_set_sw_mii(dev, 0); + ret = asix_set_sw_mii(dev, 0); + if (ret == -ENODEV) + break; usleep_range(1000, 1100); - asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, 0, 0, 1, &smsr, 0); - } while (!(smsr & AX_HOST_EN) && (i++ < 30)); + ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, + 0, 0, 1, &smsr, 0); + } while (!(smsr & AX_HOST_EN) && (i++ < 30) && (ret != -ENODEV)); + if (ret == -ENODEV) { + mutex_unlock(&dev->phy_mutex); + return; + } asix_write_cmd(dev, AX_CMD_WRITE_MII_REG, phy_id, (__u16)loc, 2, &res, 0); @@ -476,13 +492,21 @@ int asix_mdio_read_nopm(struct net_device *netdev, int phy_id, int loc) __le16 res; u8 smsr; int i = 0; + int ret; mutex_lock(&dev->phy_mutex); do { - asix_set_sw_mii(dev, 1); + ret = asix_set_sw_mii(dev, 1); + if (ret == -ENODEV) + break; usleep_range(1000, 1100); - asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, 0, 0, 1, &smsr, 1); - } while (!(smsr & AX_HOST_EN) && (i++ < 30)); + ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, + 0, 0, 1, &smsr, 1); + } while (!(smsr & AX_HOST_EN) && (i++ < 30) && (ret != -ENODEV)); + if (ret == -ENODEV) { + mutex_unlock(&dev->phy_mutex); + return ret; + } asix_read_cmd(dev, AX_CMD_READ_MII_REG, phy_id, (__u16)loc, 2, &res, 1); @@ -502,16 +526,24 @@ asix_mdio_write_nopm(struct net_device *netdev, int phy_id, int loc, int val) __le16 res = cpu_to_le16(val); u8 smsr; int i = 0; + int ret; netdev_dbg(dev->net, "asix_mdio_write() phy_id=0x%02x, loc=0x%02x, val=0x%04x\n", phy_id, loc, val); mutex_lock(&dev->phy_mutex); do { - asix_set_sw_mii(dev, 1); + ret = asix_set_sw_mii(dev, 1); + if (ret == -ENODEV) + break; usleep_range(1000, 1100); - asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, 0, 0, 1, &smsr, 1); - } while (!(smsr & AX_HOST_EN) && (i++ < 30)); + ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, + 0, 0, 1, &smsr, 1); + } while (!(smsr & AX_HOST_EN) && (i++ < 30) && (ret != -ENODEV)); + if (ret == -ENODEV) { + mutex_unlock(&dev->phy_mutex); + return; + } asix_write_cmd(dev, AX_CMD_WRITE_MII_REG, phy_id, (__u16)loc, 2, &res, 1); diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index aaa42909d932..ebeb73014fdf 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -79,6 +79,8 @@ static u32 asix_get_phyid(struct usbnet *dev) /* Poll for the rare case the FW or phy isn't ready yet. */ for (i = 0; i < 100; i++) { phy_reg = asix_mdio_read(dev->net, dev->mii.phy_id, MII_PHYSID1); + if (phy_reg < 0) + return 0; if (phy_reg != 0 && phy_reg != 0xFFFF) break; mdelay(1); From 4c1442aa8c2c0a2513516aeac184ea172bb04dec Mon Sep 17 00:00:00 2001 From: Robert Foss Date: Mon, 29 Aug 2016 09:32:17 -0400 Subject: [PATCH 0624/1715] net: asix: Fix AX88772x resume failures From: Allan Chou The change fixes AX88772x resume failure by - Restore incorrect AX88772A PHY registers when resetting - Need to stop MAC operation when suspending - Need to restart MII when restoring PHY Signed-off-by: Allan Chou Signed-off-by: Robert Foss Tested-by: Robert Foss Signed-off-by: David S. Miller --- drivers/net/usb/asix_devices.c | 47 +++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index ebeb73014fdf..083dc2ef10c8 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -35,6 +35,15 @@ #define PHY_MODE_RTL8211CL 0x000C +#define AX88772A_PHY14H 0x14 +#define AX88772A_PHY14H_DEFAULT 0x442C + +#define AX88772A_PHY15H 0x15 +#define AX88772A_PHY15H_DEFAULT 0x03C8 + +#define AX88772A_PHY16H 0x16 +#define AX88772A_PHY16H_DEFAULT 0x4044 + struct ax88172_int_data { __le16 res1; u8 link; @@ -424,7 +433,7 @@ static int ax88772a_hw_reset(struct usbnet *dev, int in_pm) { struct asix_data *data = (struct asix_data *)&dev->data; int ret, embd_phy; - u16 rx_ctl; + u16 rx_ctl, phy14h, phy15h, phy16h; u8 chipcode = 0; ret = asix_write_gpio(dev, AX_GPIO_RSE, 5, in_pm); @@ -482,6 +491,32 @@ static int ax88772a_hw_reset(struct usbnet *dev, int in_pm) ret); goto out; } + } else if ((chipcode & AX_CHIPCODE_MASK) == AX_AX88772A_CHIPCODE) { + /* Check if the PHY registers have default settings */ + phy14h = asix_mdio_read_nopm(dev->net, dev->mii.phy_id, + AX88772A_PHY14H); + phy15h = asix_mdio_read_nopm(dev->net, dev->mii.phy_id, + AX88772A_PHY15H); + phy16h = asix_mdio_read_nopm(dev->net, dev->mii.phy_id, + AX88772A_PHY16H); + + netdev_dbg(dev->net, + "772a_hw_reset: MR20=0x%x MR21=0x%x MR22=0x%x\n", + phy14h, phy15h, phy16h); + + /* Restore PHY registers default setting if not */ + if (phy14h != AX88772A_PHY14H_DEFAULT) + asix_mdio_write_nopm(dev->net, dev->mii.phy_id, + AX88772A_PHY14H, + AX88772A_PHY14H_DEFAULT); + if (phy15h != AX88772A_PHY15H_DEFAULT) + asix_mdio_write_nopm(dev->net, dev->mii.phy_id, + AX88772A_PHY15H, + AX88772A_PHY15H_DEFAULT); + if (phy16h != AX88772A_PHY16H_DEFAULT) + asix_mdio_write_nopm(dev->net, dev->mii.phy_id, + AX88772A_PHY16H, + AX88772A_PHY16H_DEFAULT); } ret = asix_write_cmd(dev, AX_CMD_WRITE_IPG0, @@ -543,6 +578,15 @@ static const struct net_device_ops ax88772_netdev_ops = { static void ax88772_suspend(struct usbnet *dev) { struct asix_common_private *priv = dev->driver_priv; + u16 medium; + + /* Stop MAC operation */ + medium = asix_read_medium_status(dev, 0); + medium &= ~AX_MEDIUM_RE; + asix_write_medium_mode(dev, medium, 0); + + netdev_dbg(dev->net, "ax88772_suspend: medium=0x%04x\n", + asix_read_medium_status(dev, 0)); /* Preserve BMCR for restoring */ priv->presvd_phy_bmcr = @@ -577,6 +621,7 @@ static void ax88772_restore_phy(struct usbnet *dev) asix_mdio_write_nopm(dev->net, dev->mii.phy_id, MII_BMCR, priv->presvd_phy_bmcr); + mii_nway_restart(&dev->mii); priv->presvd_phy_advertise = 0; priv->presvd_phy_bmcr = 0; } From a243c2efb5774b29f1e0b43240cfcdcabf001497 Mon Sep 17 00:00:00 2001 From: Robert Foss Date: Mon, 29 Aug 2016 09:32:18 -0400 Subject: [PATCH 0625/1715] net: asix: see 802.3 spec for phy reset From: Grant Grundler https://lkml.org/lkml/2014/11/11/947 Ben Hutchings is correct. IEEE 802.3 spec section "22.2.4.1.1 Reset" requires up to 500ms delay. Mitigate the "max" delay by polling the phy until BCM_RESET bit is clear. Signed-off-by: Grant Grundler Signed-off-by: Robert Foss Tested-by: Robert Foss Signed-off-by: David S. Miller --- drivers/net/usb/asix_devices.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index 083dc2ef10c8..dbcdda2ebb18 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -212,6 +212,28 @@ static const struct net_device_ops ax88172_netdev_ops = { .ndo_set_rx_mode = ax88172_set_multicast, }; +static void asix_phy_reset(struct usbnet *dev, unsigned int reset_bits) +{ + unsigned int timeout = 5000; + + asix_mdio_write(dev->net, dev->mii.phy_id, MII_BMCR, reset_bits); + + /* give phy_id a chance to process reset */ + udelay(500); + + /* See IEEE 802.3 "22.2.4.1.1 Reset": 500ms max */ + while (timeout--) { + if (asix_mdio_read(dev->net, dev->mii.phy_id, MII_BMCR) + & BMCR_RESET) + udelay(100); + else + return; + } + + netdev_err(dev->net, "BMCR_RESET timeout on phy_id %d\n", + dev->mii.phy_id); +} + static int ax88172_bind(struct usbnet *dev, struct usb_interface *intf) { int ret = 0; @@ -258,7 +280,7 @@ static int ax88172_bind(struct usbnet *dev, struct usb_interface *intf) dev->net->needed_headroom = 4; /* cf asix_tx_fixup() */ dev->net->needed_tailroom = 4; /* cf asix_tx_fixup() */ - asix_mdio_write(dev->net, dev->mii.phy_id, MII_BMCR, BMCR_RESET); + asix_phy_reset(dev, BMCR_RESET); asix_mdio_write(dev->net, dev->mii.phy_id, MII_ADVERTISE, ADVERTISE_ALL | ADVERTISE_CSMA | ADVERTISE_PAUSE_CAP); mii_nway_restart(&dev->mii); @@ -900,8 +922,7 @@ static int ax88178_reset(struct usbnet *dev) } else if (data->phymode == PHY_MODE_RTL8211CL) rtl8211cl_phy_init(dev); - asix_mdio_write(dev->net, dev->mii.phy_id, MII_BMCR, - BMCR_RESET | BMCR_ANENABLE); + asix_phy_reset(dev, BMCR_RESET | BMCR_ANENABLE); asix_mdio_write(dev->net, dev->mii.phy_id, MII_ADVERTISE, ADVERTISE_ALL | ADVERTISE_CSMA | ADVERTISE_PAUSE_CAP); asix_mdio_write(dev->net, dev->mii.phy_id, MII_CTRL1000, From 535baf8588d04b177cb33700f81499f2b5203c2d Mon Sep 17 00:00:00 2001 From: Robert Foss Date: Mon, 29 Aug 2016 09:32:19 -0400 Subject: [PATCH 0626/1715] net: asix: autoneg will set WRITE_MEDIUM reg From: Grant Grundler The miii_nway_restart() causes a PHY link change activity and ax88772_link_reset will be called. link_reset will set AX_CMD_WRITE_MEDIUM_MODE register correctly. The asix_write_medium_mode in reset() fills in a default value to the register which may be different from the negotiation result. So do this first. Ignore the ret value since it's ignored in XXX_link_reset() functions. Signed-off-by: Grant Grundler Signed-off-by: Robert Foss Tested-by: Robert Foss Signed-off-by: David S. Miller --- drivers/net/usb/asix_devices.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index dbcdda2ebb18..cce24950a0ab 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -928,12 +928,9 @@ static int ax88178_reset(struct usbnet *dev) asix_mdio_write(dev->net, dev->mii.phy_id, MII_CTRL1000, ADVERTISE_1000FULL); + asix_write_medium_mode(dev, AX88178_MEDIUM_DEFAULT, 0); mii_nway_restart(&dev->mii); - ret = asix_write_medium_mode(dev, AX88178_MEDIUM_DEFAULT, 0); - if (ret < 0) - return ret; - /* Rewrite MAC address */ memcpy(data->mac_addr, dev->net->dev_addr, ETH_ALEN); ret = asix_write_cmd(dev, AX_CMD_WRITE_NODE_ID, 0, 0, ETH_ALEN, From 53700f0c9ac36000d35d1fd3645d1607b5c45fac Mon Sep 17 00:00:00 2001 From: hayeswang Date: Thu, 1 Sep 2016 17:01:42 +0800 Subject: [PATCH 0627/1715] r8152: fix the coding style with checkpatch.pl check the coding style with checkpatch.pl and fix the warnings and errors. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index f72f807c7fc5..9338f5812b7e 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -1076,8 +1076,7 @@ static int vendor_mac_passthru_addr_read(struct r8152 *tp, struct sockaddr *sa) return -ENODEV; if (obj->type != ACPI_TYPE_BUFFER || obj->string.length != 0x17) { netif_warn(tp, probe, tp->netdev, - "Invalid buffer when reading pass-thru MAC addr: " - "(%d, %d)\n", + "Invalid buffer for pass-thru MAC addr: (%d, %d)\n", obj->type, obj->string.length); goto amacout; } @@ -1090,8 +1089,8 @@ static int vendor_mac_passthru_addr_read(struct r8152 *tp, struct sockaddr *sa) ret = hex2bin(buf, obj->string.pointer + 9, 6); if (!(ret == 0 && is_valid_ether_addr(buf))) { netif_warn(tp, probe, tp->netdev, - "Invalid MAC when reading pass-thru MAC addr: " - "%d, %pM\n", ret, buf); + "Invalid MAC for pass-thru MAC addr: %d, %pM\n", + ret, buf); ret = -EINVAL; goto amacout; } @@ -1111,9 +1110,9 @@ static int set_ethernet_addr(struct r8152 *tp) struct sockaddr sa; int ret; - if (tp->version == RTL_VER_01) + if (tp->version == RTL_VER_01) { ret = pla_ocp_read(tp, PLA_IDR, 8, sa.sa_data); - else { + } else { /* if this is not an RTL8153-AD, no eFuse mac pass thru set, * or system doesn't provide valid _SB.AMAC this will be * be expected to non-zero From ba3d0dda6d5facd6fe63a3276825bd2b9fa2990e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 29 Aug 2016 14:37:14 +0200 Subject: [PATCH 0628/1715] net: xgene: fix backward compatibility fix A bugfix for backward compatibility handling introduced undefined behavior for the case that of_parse_phandle() does not return a valid entry, as "gcc -Wmaybe-unused" reports: drivers/net/ethernet/apm/xgene/xgene_enet_hw.c: In function 'xgene_enet_phy_connect': drivers/net/ethernet/apm/xgene/xgene_enet_hw.c:776:6: error: 'phy_dev' may be used uninitialized in this function [-Werror=maybe-uninitialized] drivers/net/ethernet/apm/xgene/xgene_enet_hw.c: In function 'xgene_enet_mdio_config': drivers/net/ethernet/apm/xgene/xgene_enet_hw.c:776:6: error: 'phy_dev' may be used uninitialized in this function [-Werror=maybe-uninitialized] We can work around this by removing the check for zero "np", as of_phy_connect() will correctly handle a NULL argument so we fall back into the normal error handling case. Note that I had previously fixed another bug that resulted in the exact same warning, but this is a different problem that was introduced after my original fix. Signed-off-by: Arnd Bergmann Fixes: 03377e381bf4 ("drivers: net: xgene: Fix backward compatibility") Signed-off-by: David S. Miller --- drivers/net/ethernet/apm/xgene/xgene_enet_hw.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c index 321fb197621e..da413c8d3e19 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c @@ -761,10 +761,6 @@ int xgene_enet_phy_connect(struct net_device *ndev) if (dev->of_node) { for (i = 0 ; i < 2; i++) { np = of_parse_phandle(dev->of_node, "phy-handle", i); - - if (!np) - continue; - phy_dev = of_phy_connect(ndev, np, &xgene_enet_adjust_link, 0, pdata->phy_mode); From ce927bf174b69328e953a77730e12065c19e87cb Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Thu, 1 Sep 2016 08:51:07 -0700 Subject: [PATCH 0629/1715] mpls: get rid of trivial returns return at end of function is useless. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/mpls/af_mpls.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 5c161e7759b5..0e4334cbde17 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -961,9 +961,6 @@ static void mpls_ifdown(struct net_device *dev, int event) RCU_INIT_POINTER(nh->nh_dev, NULL); } endfor_nexthops(rt); } - - - return; } static void mpls_ifup(struct net_device *dev, unsigned int nh_flags) @@ -997,8 +994,6 @@ static void mpls_ifup(struct net_device *dev, unsigned int nh_flags) ACCESS_ONCE(rt->rt_nhn_alive) = alive; } - - return; } static int mpls_dev_notify(struct notifier_block *this, unsigned long event, From c6eec6f332a0504ba3af1d597e7624b9dfd7cfda Mon Sep 17 00:00:00 2001 From: Alexandre TORGUE Date: Mon, 29 Aug 2016 18:23:38 +0200 Subject: [PATCH 0630/1715] net: ethernet: dwmac: add Ethernet glue logic for stm32 chip stm324xx family chips support Synopsys MAC 3.510 IP. This patch adds settings for logical glue logic: -clocks -mode selection MII or RMII. Reviewed-by: Joachim Eastwood Acked-by: Giuseppe Cavallaro Tested-by: Maxime Coquelin Signed-off-by: Alexandre TORGUE Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/Kconfig | 12 ++ drivers/net/ethernet/stmicro/stmmac/Makefile | 1 + .../net/ethernet/stmicro/stmmac/dwmac-stm32.c | 193 ++++++++++++++++++ 3 files changed, 206 insertions(+) create mode 100644 drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index 8f06a6621ab1..c732b8ce2528 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -104,6 +104,18 @@ config DWMAC_STI device driver. This driver is used on for the STi series SOCs GMAC ethernet controller. +config DWMAC_STM32 + tristate "STM32 DWMAC support" + default ARCH_STM32 + depends on OF && HAS_IOMEM + select MFD_SYSCON + ---help--- + Support for ethernet controller on STM32 SOCs. + + This selects STM32 SoC glue layer support for the stmmac + device driver. This driver is used on for the STM32 series + SOCs GMAC ethernet controller. + config DWMAC_SUNXI tristate "Allwinner GMAC support" default ARCH_SUNXI diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile index 44b630cd1755..f0c9396fa28e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Makefile +++ b/drivers/net/ethernet/stmicro/stmmac/Makefile @@ -13,6 +13,7 @@ obj-$(CONFIG_DWMAC_MESON) += dwmac-meson.o obj-$(CONFIG_DWMAC_ROCKCHIP) += dwmac-rk.o obj-$(CONFIG_DWMAC_SOCFPGA) += dwmac-altr-socfpga.o obj-$(CONFIG_DWMAC_STI) += dwmac-sti.o +obj-$(CONFIG_DWMAC_STM32) += dwmac-stm32.o obj-$(CONFIG_DWMAC_SUNXI) += dwmac-sunxi.o obj-$(CONFIG_DWMAC_GENERIC) += dwmac-generic.o stmmac-platform-objs:= stmmac_platform.o diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c new file mode 100644 index 000000000000..79d8b926f17c --- /dev/null +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c @@ -0,0 +1,193 @@ +/* + * dwmac-stm32.c - DWMAC Specific Glue layer for STM32 MCU + * + * Copyright (C) Alexandre Torgue 2015 + * Author: Alexandre Torgue + * License terms: GNU General Public License (GPL), version 2 + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "stmmac_platform.h" + +#define MII_PHY_SEL_MASK BIT(23) + +struct stm32_dwmac { + struct clk *clk_tx; + struct clk *clk_rx; + u32 mode_reg; /* MAC glue-logic mode register */ + struct regmap *regmap; + u32 speed; +}; + +static int stm32_dwmac_init(struct plat_stmmacenet_data *plat_dat) +{ + struct stm32_dwmac *dwmac = plat_dat->bsp_priv; + u32 reg = dwmac->mode_reg; + u32 val; + int ret; + + val = (plat_dat->interface == PHY_INTERFACE_MODE_MII) ? 0 : 1; + ret = regmap_update_bits(dwmac->regmap, reg, MII_PHY_SEL_MASK, val); + if (ret) + return ret; + + ret = clk_prepare_enable(dwmac->clk_tx); + if (ret) + return ret; + + ret = clk_prepare_enable(dwmac->clk_rx); + if (ret) + clk_disable_unprepare(dwmac->clk_tx); + + return ret; +} + +static void stm32_dwmac_clk_disable(struct stm32_dwmac *dwmac) +{ + clk_disable_unprepare(dwmac->clk_tx); + clk_disable_unprepare(dwmac->clk_rx); +} + +static int stm32_dwmac_parse_data(struct stm32_dwmac *dwmac, + struct device *dev) +{ + struct device_node *np = dev->of_node; + int err; + + /* Get TX/RX clocks */ + dwmac->clk_tx = devm_clk_get(dev, "mac-clk-tx"); + if (IS_ERR(dwmac->clk_tx)) { + dev_err(dev, "No tx clock provided...\n"); + return PTR_ERR(dwmac->clk_tx); + } + dwmac->clk_rx = devm_clk_get(dev, "mac-clk-rx"); + if (IS_ERR(dwmac->clk_rx)) { + dev_err(dev, "No rx clock provided...\n"); + return PTR_ERR(dwmac->clk_rx); + } + + /* Get mode register */ + dwmac->regmap = syscon_regmap_lookup_by_phandle(np, "st,syscon"); + if (IS_ERR(dwmac->regmap)) + return PTR_ERR(dwmac->regmap); + + err = of_property_read_u32_index(np, "st,syscon", 1, &dwmac->mode_reg); + if (err) + dev_err(dev, "Can't get sysconfig mode offset (%d)\n", err); + + return err; +} + +static int stm32_dwmac_probe(struct platform_device *pdev) +{ + struct plat_stmmacenet_data *plat_dat; + struct stmmac_resources stmmac_res; + struct stm32_dwmac *dwmac; + int ret; + + ret = stmmac_get_platform_resources(pdev, &stmmac_res); + if (ret) + return ret; + + plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); + if (IS_ERR(plat_dat)) + return PTR_ERR(plat_dat); + + dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL); + if (!dwmac) + return -ENOMEM; + + ret = stm32_dwmac_parse_data(dwmac, &pdev->dev); + if (ret) { + dev_err(&pdev->dev, "Unable to parse OF data\n"); + return ret; + } + + plat_dat->bsp_priv = dwmac; + + ret = stm32_dwmac_init(plat_dat); + if (ret) + return ret; + + ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); + if (ret) + stm32_dwmac_clk_disable(dwmac); + + return ret; +} + +static int stm32_dwmac_remove(struct platform_device *pdev) +{ + struct net_device *ndev = platform_get_drvdata(pdev); + struct stmmac_priv *priv = netdev_priv(ndev); + int ret = stmmac_dvr_remove(&pdev->dev); + + stm32_dwmac_clk_disable(priv->plat->bsp_priv); + + return ret; +} + +#ifdef CONFIG_PM_SLEEP +static int stm32_dwmac_suspend(struct device *dev) +{ + struct net_device *ndev = dev_get_drvdata(dev); + struct stmmac_priv *priv = netdev_priv(ndev); + int ret; + + ret = stmmac_suspend(dev); + stm32_dwmac_clk_disable(priv->plat->bsp_priv); + + return ret; +} + +static int stm32_dwmac_resume(struct device *dev) +{ + struct net_device *ndev = dev_get_drvdata(dev); + struct stmmac_priv *priv = netdev_priv(ndev); + int ret; + + ret = stm32_dwmac_init(priv->plat); + if (ret) + return ret; + + ret = stmmac_resume(dev); + + return ret; +} +#endif /* CONFIG_PM_SLEEP */ + +SIMPLE_DEV_PM_OPS(stm32_dwmac_pm_ops, stm32_dwmac_suspend, stm32_dwmac_resume); + +static const struct of_device_id stm32_dwmac_match[] = { + { .compatible = "st,stm32-dwmac"}, + { } +}; +MODULE_DEVICE_TABLE(of, stm32_dwmac_match); + +static struct platform_driver stm32_dwmac_driver = { + .probe = stm32_dwmac_probe, + .remove = stm32_dwmac_remove, + .driver = { + .name = "stm32-dwmac", + .pm = &stm32_dwmac_pm_ops, + .of_match_table = stm32_dwmac_match, + }, +}; +module_platform_driver(stm32_dwmac_driver); + +MODULE_AUTHOR("Alexandre Torgue "); +MODULE_DESCRIPTION("STMicroelectronics MCU DWMAC Specific Glue layer"); +MODULE_LICENSE("GPL v2"); From 99abf9d6641e7857df67faa2ff7e4044cdea9683 Mon Sep 17 00:00:00 2001 From: Alexandre TORGUE Date: Mon, 29 Aug 2016 18:23:39 +0200 Subject: [PATCH 0631/1715] Documentation: Bindings: Add STM32 DWMAC glue Acked-by: Rob Herring Signed-off-by: Alexandre TORGUE Signed-off-by: David S. Miller --- .../devicetree/bindings/net/stm32-dwmac.txt | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 Documentation/devicetree/bindings/net/stm32-dwmac.txt diff --git a/Documentation/devicetree/bindings/net/stm32-dwmac.txt b/Documentation/devicetree/bindings/net/stm32-dwmac.txt new file mode 100644 index 000000000000..c35afb7e956a --- /dev/null +++ b/Documentation/devicetree/bindings/net/stm32-dwmac.txt @@ -0,0 +1,32 @@ +STMicroelectronics STM32 / MCU DWMAC glue layer controller + +This file documents platform glue layer for stmmac. +Please see stmmac.txt for the other unchanged properties. + +The device node has following properties. + +Required properties: +- compatible: Should be "st,stm32-dwmac" to select glue, and + "snps,dwmac-3.50a" to select IP version. +- clocks: Must contain a phandle for each entry in clock-names. +- clock-names: Should be "stmmaceth" for the host clock. + Should be "mac-clk-tx" for the MAC TX clock. + Should be "mac-clk-rx" for the MAC RX clock. +- st,syscon : Should be phandle/offset pair. The phandle to the syscon node which + encompases the glue register, and the offset of the control register. +Example: + + ethernet@40028000 { + compatible = "st,stm32-dwmac", "snps,dwmac-3.50a"; + status = "disabled"; + reg = <0x40028000 0x8000>; + reg-names = "stmmaceth"; + interrupts = <0 61 0>, <0 62 0>; + interrupt-names = "macirq", "eth_wake_irq"; + clock-names = "stmmaceth", "mac-clk-tx", "mac-clk-rx"; + clocks = <&rcc 0 25>, <&rcc 0 26>, <&rcc 0 27>; + st,syscon = <&syscfg 0x4>; + snps,pbl = <8>; + snps,mixed-burst; + dma-ranges; + }; From f9a09687a87887d1330dd5e5d08d9d7b3d209fb2 Mon Sep 17 00:00:00 2001 From: Alexandre TORGUE Date: Mon, 29 Aug 2016 18:23:40 +0200 Subject: [PATCH 0632/1715] net: ethernet: stmmac: add support of Synopsys 3.50a MAC IP Adds support of Synopsys 3.50a MAC IP in stmmac driver. Acked-by: Giuseppe Cavallaro Tested-by: Maxime Coquelin Signed-off-by: Alexandre TORGUE Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 756bb548e81a..0a0d6a86f397 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -265,6 +265,7 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac) * once needed on other platforms. */ if (of_device_is_compatible(np, "st,spear600-gmac") || + of_device_is_compatible(np, "snps,dwmac-3.50a") || of_device_is_compatible(np, "snps,dwmac-3.70a") || of_device_is_compatible(np, "snps,dwmac")) { /* Note that the max-frame-size parameter as defined in the From 12d8de6d952372102db2faedd19913dbfa883c5d Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 31 Aug 2016 15:22:00 -0700 Subject: [PATCH 0633/1715] net: make genetlink ctrl ops const Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/netlink/genetlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index a09132a69869..23cc12639ba7 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -977,7 +977,7 @@ static int genl_ctrl_event(int event, struct genl_family *family, return 0; } -static struct genl_ops genl_ctrl_ops[] = { +static const struct genl_ops genl_ctrl_ops[] = { { .cmd = CTRL_CMD_GETFAMILY, .doit = ctrl_getfamily, @@ -986,7 +986,7 @@ static struct genl_ops genl_ctrl_ops[] = { }, }; -static struct genl_multicast_group genl_ctrl_groups[] = { +static const struct genl_multicast_group genl_ctrl_groups[] = { { .name = "notify", }, }; From 85bae4bd8ae0a198dd26cca4673b67531ca10923 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 31 Aug 2016 15:15:23 -0700 Subject: [PATCH 0634/1715] drop_monitor: make genl_multicast_group const Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/core/drop_monitor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index d6b3b579560d..72cfb0c61125 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -105,7 +105,7 @@ static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data) return skb; } -static struct genl_multicast_group dropmon_mcgrps[] = { +static const struct genl_multicast_group dropmon_mcgrps[] = { { .name = "events", }, }; From deeb91f59dbcdcb8044c6109b3d7b11448d9a72d Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 31 Aug 2016 15:17:00 -0700 Subject: [PATCH 0635/1715] batman: make netlink attributes const Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/batman-adv/netlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 18831e72b0fb..64cb6acbe0a6 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -62,11 +62,11 @@ enum batadv_netlink_multicast_groups { BATADV_NL_MCGRP_TPMETER, }; -static struct genl_multicast_group batadv_netlink_mcgrps[] = { +static const struct genl_multicast_group batadv_netlink_mcgrps[] = { [BATADV_NL_MCGRP_TPMETER] = { .name = BATADV_NL_MCAST_GROUP_TPMETER }, }; -static struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = { +static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = { [BATADV_ATTR_VERSION] = { .type = NLA_STRING }, [BATADV_ATTR_ALGO_NAME] = { .type = NLA_STRING }, [BATADV_ATTR_MESH_IFINDEX] = { .type = NLA_U32 }, From 3ee5256da092a4047e54dc36e4d6d45ca49652a6 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 31 Aug 2016 15:17:49 -0700 Subject: [PATCH 0636/1715] netns: make nla_policy const Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/core/net_namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 1fe58167d39a..7a77dcabd4e8 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -533,7 +533,7 @@ static struct pernet_operations __net_initdata net_ns_ops = { .exit = net_ns_net_exit, }; -static struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = { +static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = { [NETNSA_NONE] = { .type = NLA_UNSPEC }, [NETNSA_NSID] = { .type = NLA_S32 }, [NETNSA_PID] = { .type = NLA_U32 }, From 3f18ff2b42b95d57df79af50f1149d698e699c0e Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 31 Aug 2016 15:19:37 -0700 Subject: [PATCH 0637/1715] fou: make nla_policy const Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/fou.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 321d57f825ce..cf50f7e2b012 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -631,7 +631,7 @@ static struct genl_family fou_nl_family = { .netnsok = true, }; -static struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = { +static const struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = { [FOU_ATTR_PORT] = { .type = NLA_U16, }, [FOU_ATTR_AF] = { .type = NLA_U8, }, [FOU_ATTR_IPPROTO] = { .type = NLA_U8, }, From 6501f34ff702d5edb0f3c6fb5170b852350ba8cb Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 31 Aug 2016 15:20:51 -0700 Subject: [PATCH 0638/1715] ila: make nla_policy const Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv6/ila/ila_lwt.c | 2 +- net/ipv6/ila/ila_xlat.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c index c8314c6b6154..e50c27a93e17 100644 --- a/net/ipv6/ila/ila_lwt.c +++ b/net/ipv6/ila/ila_lwt.c @@ -51,7 +51,7 @@ drop: return -EINVAL; } -static struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { +static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { [ILA_ATTR_LOCATOR] = { .type = NLA_U64, }, [ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, }, }; diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index e6eca5fdf4c9..e604013dd814 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -128,7 +128,7 @@ static struct genl_family ila_nl_family = { .parallel_ops = true, }; -static struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { +static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { [ILA_ATTR_LOCATOR] = { .type = NLA_U64, }, [ILA_ATTR_LOCATOR_MATCH] = { .type = NLA_U64, }, [ILA_ATTR_IFINDEX] = { .type = NLA_U32, }, From 4f70c96ffd184efabadc322efe6142fe3cd77f27 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 31 Aug 2016 15:21:37 -0700 Subject: [PATCH 0639/1715] tcp: make nla_policy const Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/tcp_metrics.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index b617826e2477..bf1f3b2b29d1 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -751,7 +751,7 @@ static struct genl_family tcp_metrics_nl_family = { .netnsok = true, }; -static struct nla_policy tcp_metrics_nl_policy[TCP_METRICS_ATTR_MAX + 1] = { +static const struct nla_policy tcp_metrics_nl_policy[TCP_METRICS_ATTR_MAX + 1] = { [TCP_METRICS_ATTR_ADDR_IPV4] = { .type = NLA_U32, }, [TCP_METRICS_ATTR_ADDR_IPV6] = { .type = NLA_BINARY, .len = sizeof(struct in6_addr), }, From f5bb341e1d1af7722a73fa8c96ca8c9a91f85e5b Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 31 Aug 2016 23:24:41 -0700 Subject: [PATCH 0640/1715] l2tp: make nla_policy const Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/l2tp/l2tp_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 1d02e8d20e56..bf3117771822 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -867,7 +867,7 @@ out: return skb->len; } -static struct nla_policy l2tp_nl_policy[L2TP_ATTR_MAX + 1] = { +static const struct nla_policy l2tp_nl_policy[L2TP_ATTR_MAX + 1] = { [L2TP_ATTR_NONE] = { .type = NLA_UNSPEC, }, [L2TP_ATTR_PW_TYPE] = { .type = NLA_U16, }, [L2TP_ATTR_ENCAP_TYPE] = { .type = NLA_U16, }, From f4b63ea08fd9e8f9fbf9e45b5921114a0514ef76 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 29 Aug 2016 10:16:37 -0700 Subject: [PATCH 0641/1715] sky2: use napi_complete_done Update the sky2 driver to pass number of packets done to NAPI. The driver was never updated when napi_complete_done was added. Signed-off-by: Stephen Hemminger Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/sky2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index 467138b423d3..f05ea56dcff2 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -3070,7 +3070,7 @@ static int sky2_poll(struct napi_struct *napi, int work_limit) goto done; } - napi_complete(napi); + napi_complete_done(napi, work_done); sky2_read32(hw, B0_Y2_SP_LISR); done: From f9f225ebf54992dd52fea198d32a329e7fab6eb6 Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Tue, 30 Aug 2016 22:02:01 +0530 Subject: [PATCH 0642/1715] bonding: Remove deprecated create_singlethread_workqueue alloc_ordered_workqueue() with WQ_MEM_RECLAIM set, replaces deprecated create_singlethread_workqueue(). This is the identity conversion. The workqueue "wq" queues multiple work items viz &bond->mcast_work, &nnw->work, &bond->mii_work, &bond->arp_work, &bond->alb_work, &bond->mii_work, &bond->ad_work, &bond->slave_arr_work which require strict execution ordering. Hence, an ordered dedicated workqueue has been used. Since, it is a network driver, WQ_MEM_RECLAIM has been set to ensure forward progress under memory pressure. Signed-off-by: Bhaktipriya Shridhar Acked-by: Tejun Heo Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 217e8da0628c..c6a1309b2131 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4627,7 +4627,7 @@ static int bond_init(struct net_device *bond_dev) netdev_dbg(bond_dev, "Begin bond_init\n"); - bond->wq = create_singlethread_workqueue(bond_dev->name); + bond->wq = alloc_ordered_workqueue(bond_dev->name, WQ_MEM_RECLAIM); if (!bond->wq) return -ENOMEM; From 95ac3994514015823634ef1f7116dce24f26aa97 Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Tue, 30 Aug 2016 22:02:47 +0530 Subject: [PATCH 0643/1715] net: pegasus: Remove deprecated create_singlethread_workqueue The workqueue "pegasus_workqueue" queues a single work item per pegasus instance and hence it doesn't require execution ordering. Hence, alloc_workqueue has been used to replace the deprecated create_singlethread_workqueue instance. The WQ_MEM_RECLAIM flag has been set to ensure forward progress under memory pressure since it's a network driver. Since there are fixed number of work items, explicit concurrency limit is unnecessary here. Signed-off-by: Bhaktipriya Shridhar Acked-by: Tejun Heo Acked-by: Petko Manolov Signed-off-by: David S. Miller --- drivers/net/usb/pegasus.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index 9bbe0161a2f4..1434e5dd5f9c 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -1129,7 +1129,8 @@ static int pegasus_probe(struct usb_interface *intf, return -ENODEV; if (pegasus_count == 0) { - pegasus_workqueue = create_singlethread_workqueue("pegasus"); + pegasus_workqueue = alloc_workqueue("pegasus", WQ_MEM_RECLAIM, + 0); if (!pegasus_workqueue) return -ENOMEM; } From d001648ec7cf8b21ae9eec8b9ba4a18295adfb14 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 30 Aug 2016 20:42:14 +0100 Subject: [PATCH 0644/1715] rxrpc: Don't expose skbs to in-kernel users [ver #2] Don't expose skbs to in-kernel users, such as the AFS filesystem, but instead provide a notification hook the indicates that a call needs attention and another that indicates that there's a new call to be collected. This makes the following possibilities more achievable: (1) Call refcounting can be made simpler if skbs don't hold refs to calls. (2) skbs referring to non-data events will be able to be freed much sooner rather than being queued for AFS to pick up as rxrpc_kernel_recv_data will be able to consult the call state. (3) We can shortcut the receive phase when a call is remotely aborted because we don't have to go through all the packets to get to the one cancelling the operation. (4) It makes it easier to do encryption/decryption directly between AFS's buffers and sk_buffs. (5) Encryption/decryption can more easily be done in the AFS's thread contexts - usually that of the userspace process that issued a syscall - rather than in one of rxrpc's background threads on a workqueue. (6) AFS will be able to wait synchronously on a call inside AF_RXRPC. To make this work, the following interface function has been added: int rxrpc_kernel_recv_data( struct socket *sock, struct rxrpc_call *call, void *buffer, size_t bufsize, size_t *_offset, bool want_more, u32 *_abort_code); This is the recvmsg equivalent. It allows the caller to find out about the state of a specific call and to transfer received data into a buffer piecemeal. afs_extract_data() and rxrpc_kernel_recv_data() now do all the extraction logic between them. They don't wait synchronously yet because the socket lock needs to be dealt with. Five interface functions have been removed: rxrpc_kernel_is_data_last() rxrpc_kernel_get_abort_code() rxrpc_kernel_get_error_number() rxrpc_kernel_free_skb() rxrpc_kernel_data_consumed() As a temporary hack, sk_buffs going to an in-kernel call are queued on the rxrpc_call struct (->knlrecv_queue) rather than being handed over to the in-kernel user. To process the queue internally, a temporary function, temp_deliver_data() has been added. This will be replaced with common code between the rxrpc_recvmsg() path and the kernel_rxrpc_recv_data() path in a future patch. Signed-off-by: David Howells Signed-off-by: David S. Miller --- Documentation/networking/rxrpc.txt | 72 ++--- fs/afs/cmservice.c | 142 +++++----- fs/afs/fsclient.c | 148 ++++------ fs/afs/internal.h | 33 +-- fs/afs/rxrpc.c | 437 +++++++++++------------------ fs/afs/vlclient.c | 7 +- include/net/af_rxrpc.h | 35 +-- net/rxrpc/af_rxrpc.c | 29 +- net/rxrpc/ar-internal.h | 23 +- net/rxrpc/call_accept.c | 13 +- net/rxrpc/call_object.c | 5 +- net/rxrpc/conn_event.c | 1 - net/rxrpc/input.c | 10 +- net/rxrpc/output.c | 2 +- net/rxrpc/recvmsg.c | 195 ++++++++++--- net/rxrpc/skbuff.c | 1 - 16 files changed, 566 insertions(+), 587 deletions(-) diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt index cfc8cb91452f..1b63bbc6b94f 100644 --- a/Documentation/networking/rxrpc.txt +++ b/Documentation/networking/rxrpc.txt @@ -748,6 +748,37 @@ The kernel interface functions are as follows: The msg must not specify a destination address, control data or any flags other than MSG_MORE. len is the total amount of data to transmit. + (*) Receive data from a call. + + int rxrpc_kernel_recv_data(struct socket *sock, + struct rxrpc_call *call, + void *buf, + size_t size, + size_t *_offset, + bool want_more, + u32 *_abort) + + This is used to receive data from either the reply part of a client call + or the request part of a service call. buf and size specify how much + data is desired and where to store it. *_offset is added on to buf and + subtracted from size internally; the amount copied into the buffer is + added to *_offset before returning. + + want_more should be true if further data will be required after this is + satisfied and false if this is the last item of the receive phase. + + There are three normal returns: 0 if the buffer was filled and want_more + was true; 1 if the buffer was filled, the last DATA packet has been + emptied and want_more was false; and -EAGAIN if the function needs to be + called again. + + If the last DATA packet is processed but the buffer contains less than + the amount requested, EBADMSG is returned. If want_more wasn't set, but + more data was available, EMSGSIZE is returned. + + If a remote ABORT is detected, the abort code received will be stored in + *_abort and ECONNABORTED will be returned. + (*) Abort a call. void rxrpc_kernel_abort_call(struct socket *sock, @@ -825,47 +856,6 @@ The kernel interface functions are as follows: Other errors may be returned if the call had been aborted (-ECONNABORTED) or had timed out (-ETIME). - (*) Record the delivery of a data message. - - void rxrpc_kernel_data_consumed(struct rxrpc_call *call, - struct sk_buff *skb); - - This is used to record a data message as having been consumed and to - update the ACK state for the call. The message must still be passed to - rxrpc_kernel_free_skb() for disposal by the caller. - - (*) Free a message. - - void rxrpc_kernel_free_skb(struct sk_buff *skb); - - This is used to free a non-DATA socket buffer intercepted from an AF_RXRPC - socket. - - (*) Determine if a data message is the last one on a call. - - bool rxrpc_kernel_is_data_last(struct sk_buff *skb); - - This is used to determine if a socket buffer holds the last data message - to be received for a call (true will be returned if it does, false - if not). - - The data message will be part of the reply on a client call and the - request on an incoming call. In the latter case there will be more - messages, but in the former case there will not. - - (*) Get the abort code from an abort message. - - u32 rxrpc_kernel_get_abort_code(struct sk_buff *skb); - - This is used to extract the abort code from a remote abort message. - - (*) Get the error number from a local or network error message. - - int rxrpc_kernel_get_error_number(struct sk_buff *skb); - - This is used to extract the error number from a message indicating either - a local error occurred or a network error occurred. - (*) Allocate a null key for doing anonymous security. struct key *rxrpc_get_null_key(const char *keyname); diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 77ee481059ac..2037e7a77a37 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -17,15 +17,12 @@ #include "internal.h" #include "afs_cm.h" -static int afs_deliver_cb_init_call_back_state(struct afs_call *, - struct sk_buff *, bool); -static int afs_deliver_cb_init_call_back_state3(struct afs_call *, - struct sk_buff *, bool); -static int afs_deliver_cb_probe(struct afs_call *, struct sk_buff *, bool); -static int afs_deliver_cb_callback(struct afs_call *, struct sk_buff *, bool); -static int afs_deliver_cb_probe_uuid(struct afs_call *, struct sk_buff *, bool); -static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *, - struct sk_buff *, bool); +static int afs_deliver_cb_init_call_back_state(struct afs_call *); +static int afs_deliver_cb_init_call_back_state3(struct afs_call *); +static int afs_deliver_cb_probe(struct afs_call *); +static int afs_deliver_cb_callback(struct afs_call *); +static int afs_deliver_cb_probe_uuid(struct afs_call *); +static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *); static void afs_cm_destructor(struct afs_call *); /* @@ -130,7 +127,7 @@ static void afs_cm_destructor(struct afs_call *call) * received. The step number here must match the final number in * afs_deliver_cb_callback(). */ - if (call->unmarshall == 6) { + if (call->unmarshall == 5) { ASSERT(call->server && call->count && call->request); afs_break_callbacks(call->server, call->count, call->request); } @@ -164,8 +161,7 @@ static void SRXAFSCB_CallBack(struct work_struct *work) /* * deliver request data to a CB.CallBack call */ -static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, - bool last) +static int afs_deliver_cb_callback(struct afs_call *call) { struct sockaddr_rxrpc srx; struct afs_callback *cb; @@ -174,7 +170,7 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, u32 tmp; int ret, loop; - _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); + _enter("{%u}", call->unmarshall); switch (call->unmarshall) { case 0: @@ -185,7 +181,7 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, /* extract the FID array and its count in two steps */ case 1: _debug("extract FID count"); - ret = afs_extract_data(call, skb, last, &call->tmp, 4); + ret = afs_extract_data(call, &call->tmp, 4, true); if (ret < 0) return ret; @@ -202,8 +198,8 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, case 2: _debug("extract FID array"); - ret = afs_extract_data(call, skb, last, call->buffer, - call->count * 3 * 4); + ret = afs_extract_data(call, call->buffer, + call->count * 3 * 4, true); if (ret < 0) return ret; @@ -229,7 +225,7 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, /* extract the callback array and its count in two steps */ case 3: _debug("extract CB count"); - ret = afs_extract_data(call, skb, last, &call->tmp, 4); + ret = afs_extract_data(call, &call->tmp, 4, true); if (ret < 0) return ret; @@ -239,13 +235,11 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, return -EBADMSG; call->offset = 0; call->unmarshall++; - if (tmp == 0) - goto empty_cb_array; case 4: _debug("extract CB array"); - ret = afs_extract_data(call, skb, last, call->request, - call->count * 3 * 4); + ret = afs_extract_data(call, call->buffer, + call->count * 3 * 4, false); if (ret < 0) return ret; @@ -258,15 +252,9 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, cb->type = ntohl(*bp++); } - empty_cb_array: call->offset = 0; call->unmarshall++; - case 5: - ret = afs_data_complete(call, skb, last); - if (ret < 0) - return ret; - /* Record that the message was unmarshalled successfully so * that the call destructor can know do the callback breaking * work, even if the final ACK isn't received. @@ -275,7 +263,7 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, * updated also. */ call->unmarshall++; - case 6: + case 5: break; } @@ -310,19 +298,17 @@ static void SRXAFSCB_InitCallBackState(struct work_struct *work) /* * deliver request data to a CB.InitCallBackState call */ -static int afs_deliver_cb_init_call_back_state(struct afs_call *call, - struct sk_buff *skb, - bool last) +static int afs_deliver_cb_init_call_back_state(struct afs_call *call) { struct sockaddr_rxrpc srx; struct afs_server *server; int ret; - _enter(",{%u},%d", skb->len, last); + _enter(""); rxrpc_kernel_get_peer(afs_socket, call->rxcall, &srx); - ret = afs_data_complete(call, skb, last); + ret = afs_extract_data(call, NULL, 0, false); if (ret < 0) return ret; @@ -344,21 +330,61 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call, /* * deliver request data to a CB.InitCallBackState3 call */ -static int afs_deliver_cb_init_call_back_state3(struct afs_call *call, - struct sk_buff *skb, - bool last) +static int afs_deliver_cb_init_call_back_state3(struct afs_call *call) { struct sockaddr_rxrpc srx; struct afs_server *server; + struct afs_uuid *r; + unsigned loop; + __be32 *b; + int ret; - _enter(",{%u},%d", skb->len, last); + _enter(""); rxrpc_kernel_get_peer(afs_socket, call->rxcall, &srx); - /* There are some arguments that we ignore */ - afs_data_consumed(call, skb); - if (!last) - return -EAGAIN; + _enter("{%u}", call->unmarshall); + + switch (call->unmarshall) { + case 0: + call->offset = 0; + call->buffer = kmalloc(11 * sizeof(__be32), GFP_KERNEL); + if (!call->buffer) + return -ENOMEM; + call->unmarshall++; + + case 1: + _debug("extract UUID"); + ret = afs_extract_data(call, call->buffer, + 11 * sizeof(__be32), false); + switch (ret) { + case 0: break; + case -EAGAIN: return 0; + default: return ret; + } + + _debug("unmarshall UUID"); + call->request = kmalloc(sizeof(struct afs_uuid), GFP_KERNEL); + if (!call->request) + return -ENOMEM; + + b = call->buffer; + r = call->request; + r->time_low = ntohl(b[0]); + r->time_mid = ntohl(b[1]); + r->time_hi_and_version = ntohl(b[2]); + r->clock_seq_hi_and_reserved = ntohl(b[3]); + r->clock_seq_low = ntohl(b[4]); + + for (loop = 0; loop < 6; loop++) + r->node[loop] = ntohl(b[loop + 5]); + + call->offset = 0; + call->unmarshall++; + + case 2: + break; + } /* no unmarshalling required */ call->state = AFS_CALL_REPLYING; @@ -390,14 +416,13 @@ static void SRXAFSCB_Probe(struct work_struct *work) /* * deliver request data to a CB.Probe call */ -static int afs_deliver_cb_probe(struct afs_call *call, struct sk_buff *skb, - bool last) +static int afs_deliver_cb_probe(struct afs_call *call) { int ret; - _enter(",{%u},%d", skb->len, last); + _enter(""); - ret = afs_data_complete(call, skb, last); + ret = afs_extract_data(call, NULL, 0, false); if (ret < 0) return ret; @@ -435,19 +460,14 @@ static void SRXAFSCB_ProbeUuid(struct work_struct *work) /* * deliver request data to a CB.ProbeUuid call */ -static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb, - bool last) +static int afs_deliver_cb_probe_uuid(struct afs_call *call) { struct afs_uuid *r; unsigned loop; __be32 *b; int ret; - _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); - - ret = afs_data_complete(call, skb, last); - if (ret < 0) - return ret; + _enter("{%u}", call->unmarshall); switch (call->unmarshall) { case 0: @@ -459,8 +479,8 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb, case 1: _debug("extract UUID"); - ret = afs_extract_data(call, skb, last, call->buffer, - 11 * sizeof(__be32)); + ret = afs_extract_data(call, call->buffer, + 11 * sizeof(__be32), false); switch (ret) { case 0: break; case -EAGAIN: return 0; @@ -487,16 +507,9 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb, call->unmarshall++; case 2: - _debug("trailer"); - if (skb->len != 0) - return -EBADMSG; break; } - ret = afs_data_complete(call, skb, last); - if (ret < 0) - return ret; - call->state = AFS_CALL_REPLYING; INIT_WORK(&call->work, SRXAFSCB_ProbeUuid); @@ -570,14 +583,13 @@ static void SRXAFSCB_TellMeAboutYourself(struct work_struct *work) /* * deliver request data to a CB.TellMeAboutYourself call */ -static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call) { int ret; - _enter(",{%u},%d", skb->len, last); + _enter(""); - ret = afs_data_complete(call, skb, last); + ret = afs_extract_data(call, NULL, 0, false); if (ret < 0) return ret; diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 9312b92e54be..96f4d764d1a6 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -235,16 +235,15 @@ static void xdr_decode_AFSFetchVolumeStatus(const __be32 **_bp, /* * deliver reply data to an FS.FetchStatus */ -static int afs_deliver_fs_fetch_status(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_fs_fetch_status(struct afs_call *call) { struct afs_vnode *vnode = call->reply; const __be32 *bp; int ret; - _enter(",,%u", last); + _enter(""); - ret = afs_transfer_reply(call, skb, last); + ret = afs_transfer_reply(call); if (ret < 0) return ret; @@ -307,8 +306,7 @@ int afs_fs_fetch_file_status(struct afs_server *server, /* * deliver reply data to an FS.FetchData */ -static int afs_deliver_fs_fetch_data(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_fs_fetch_data(struct afs_call *call) { struct afs_vnode *vnode = call->reply; const __be32 *bp; @@ -316,7 +314,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, void *buffer; int ret; - _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); + _enter("{%u}", call->unmarshall); switch (call->unmarshall) { case 0: @@ -332,7 +330,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, * client) */ case 1: _debug("extract data length (MSW)"); - ret = afs_extract_data(call, skb, last, &call->tmp, 4); + ret = afs_extract_data(call, &call->tmp, 4, true); if (ret < 0) return ret; @@ -347,7 +345,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, /* extract the returned data length */ case 2: _debug("extract data length"); - ret = afs_extract_data(call, skb, last, &call->tmp, 4); + ret = afs_extract_data(call, &call->tmp, 4, true); if (ret < 0) return ret; @@ -363,10 +361,10 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, _debug("extract data"); if (call->count > 0) { page = call->reply3; - buffer = kmap_atomic(page); - ret = afs_extract_data(call, skb, last, buffer, - call->count); - kunmap_atomic(buffer); + buffer = kmap(page); + ret = afs_extract_data(call, buffer, + call->count, true); + kunmap(buffer); if (ret < 0) return ret; } @@ -376,8 +374,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, /* extract the metadata */ case 4: - ret = afs_extract_data(call, skb, last, call->buffer, - (21 + 3 + 6) * 4); + ret = afs_extract_data(call, call->buffer, + (21 + 3 + 6) * 4, false); if (ret < 0) return ret; @@ -391,18 +389,15 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, call->unmarshall++; case 5: - ret = afs_data_complete(call, skb, last); - if (ret < 0) - return ret; break; } if (call->count < PAGE_SIZE) { _debug("clear"); page = call->reply3; - buffer = kmap_atomic(page); + buffer = kmap(page); memset(buffer + call->count, 0, PAGE_SIZE - call->count); - kunmap_atomic(buffer); + kunmap(buffer); } _leave(" = 0 [done]"); @@ -515,13 +510,12 @@ int afs_fs_fetch_data(struct afs_server *server, /* * deliver reply data to an FS.GiveUpCallBacks */ -static int afs_deliver_fs_give_up_callbacks(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_fs_give_up_callbacks(struct afs_call *call) { - _enter(",{%u},%d", skb->len, last); + _enter(""); /* shouldn't be any reply data */ - return afs_data_complete(call, skb, last); + return afs_extract_data(call, NULL, 0, false); } /* @@ -599,16 +593,15 @@ int afs_fs_give_up_callbacks(struct afs_server *server, /* * deliver reply data to an FS.CreateFile or an FS.MakeDir */ -static int afs_deliver_fs_create_vnode(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_fs_create_vnode(struct afs_call *call) { struct afs_vnode *vnode = call->reply; const __be32 *bp; int ret; - _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); + _enter("{%u}", call->unmarshall); - ret = afs_transfer_reply(call, skb, last); + ret = afs_transfer_reply(call); if (ret < 0) return ret; @@ -696,16 +689,15 @@ int afs_fs_create(struct afs_server *server, /* * deliver reply data to an FS.RemoveFile or FS.RemoveDir */ -static int afs_deliver_fs_remove(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_fs_remove(struct afs_call *call) { struct afs_vnode *vnode = call->reply; const __be32 *bp; int ret; - _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); + _enter("{%u}", call->unmarshall); - ret = afs_transfer_reply(call, skb, last); + ret = afs_transfer_reply(call); if (ret < 0) return ret; @@ -777,16 +769,15 @@ int afs_fs_remove(struct afs_server *server, /* * deliver reply data to an FS.Link */ -static int afs_deliver_fs_link(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_fs_link(struct afs_call *call) { struct afs_vnode *dvnode = call->reply, *vnode = call->reply2; const __be32 *bp; int ret; - _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); + _enter("{%u}", call->unmarshall); - ret = afs_transfer_reply(call, skb, last); + ret = afs_transfer_reply(call); if (ret < 0) return ret; @@ -863,16 +854,15 @@ int afs_fs_link(struct afs_server *server, /* * deliver reply data to an FS.Symlink */ -static int afs_deliver_fs_symlink(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_fs_symlink(struct afs_call *call) { struct afs_vnode *vnode = call->reply; const __be32 *bp; int ret; - _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); + _enter("{%u}", call->unmarshall); - ret = afs_transfer_reply(call, skb, last); + ret = afs_transfer_reply(call); if (ret < 0) return ret; @@ -968,16 +958,15 @@ int afs_fs_symlink(struct afs_server *server, /* * deliver reply data to an FS.Rename */ -static int afs_deliver_fs_rename(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_fs_rename(struct afs_call *call) { struct afs_vnode *orig_dvnode = call->reply, *new_dvnode = call->reply2; const __be32 *bp; int ret; - _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); + _enter("{%u}", call->unmarshall); - ret = afs_transfer_reply(call, skb, last); + ret = afs_transfer_reply(call); if (ret < 0) return ret; @@ -1072,16 +1061,15 @@ int afs_fs_rename(struct afs_server *server, /* * deliver reply data to an FS.StoreData */ -static int afs_deliver_fs_store_data(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_fs_store_data(struct afs_call *call) { struct afs_vnode *vnode = call->reply; const __be32 *bp; int ret; - _enter(",,%u", last); + _enter(""); - ret = afs_transfer_reply(call, skb, last); + ret = afs_transfer_reply(call); if (ret < 0) return ret; @@ -1251,17 +1239,16 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb, /* * deliver reply data to an FS.StoreStatus */ -static int afs_deliver_fs_store_status(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_fs_store_status(struct afs_call *call) { afs_dataversion_t *store_version; struct afs_vnode *vnode = call->reply; const __be32 *bp; int ret; - _enter(",,%u", last); + _enter(""); - ret = afs_transfer_reply(call, skb, last); + ret = afs_transfer_reply(call); if (ret < 0) return ret; @@ -1443,14 +1430,13 @@ int afs_fs_setattr(struct afs_server *server, struct key *key, /* * deliver reply data to an FS.GetVolumeStatus */ -static int afs_deliver_fs_get_volume_status(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_fs_get_volume_status(struct afs_call *call) { const __be32 *bp; char *p; int ret; - _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); + _enter("{%u}", call->unmarshall); switch (call->unmarshall) { case 0: @@ -1460,8 +1446,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, /* extract the returned status record */ case 1: _debug("extract status"); - ret = afs_extract_data(call, skb, last, call->buffer, - 12 * 4); + ret = afs_extract_data(call, call->buffer, + 12 * 4, true); if (ret < 0) return ret; @@ -1472,7 +1458,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, /* extract the volume name length */ case 2: - ret = afs_extract_data(call, skb, last, &call->tmp, 4); + ret = afs_extract_data(call, &call->tmp, 4, true); if (ret < 0) return ret; @@ -1487,8 +1473,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, case 3: _debug("extract volname"); if (call->count > 0) { - ret = afs_extract_data(call, skb, last, call->reply3, - call->count); + ret = afs_extract_data(call, call->reply3, + call->count, true); if (ret < 0) return ret; } @@ -1508,8 +1494,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, call->count = 4 - (call->count & 3); case 4: - ret = afs_extract_data(call, skb, last, call->buffer, - call->count); + ret = afs_extract_data(call, call->buffer, + call->count, true); if (ret < 0) return ret; @@ -1519,7 +1505,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, /* extract the offline message length */ case 5: - ret = afs_extract_data(call, skb, last, &call->tmp, 4); + ret = afs_extract_data(call, &call->tmp, 4, true); if (ret < 0) return ret; @@ -1534,8 +1520,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, case 6: _debug("extract offline"); if (call->count > 0) { - ret = afs_extract_data(call, skb, last, call->reply3, - call->count); + ret = afs_extract_data(call, call->reply3, + call->count, true); if (ret < 0) return ret; } @@ -1555,8 +1541,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, call->count = 4 - (call->count & 3); case 7: - ret = afs_extract_data(call, skb, last, call->buffer, - call->count); + ret = afs_extract_data(call, call->buffer, + call->count, true); if (ret < 0) return ret; @@ -1566,7 +1552,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, /* extract the message of the day length */ case 8: - ret = afs_extract_data(call, skb, last, &call->tmp, 4); + ret = afs_extract_data(call, &call->tmp, 4, true); if (ret < 0) return ret; @@ -1581,8 +1567,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, case 9: _debug("extract motd"); if (call->count > 0) { - ret = afs_extract_data(call, skb, last, call->reply3, - call->count); + ret = afs_extract_data(call, call->reply3, + call->count, true); if (ret < 0) return ret; } @@ -1595,26 +1581,17 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, call->unmarshall++; /* extract the message of the day padding */ - if ((call->count & 3) == 0) { - call->unmarshall++; - goto no_motd_padding; - } - call->count = 4 - (call->count & 3); + call->count = (4 - (call->count & 3)) & 3; case 10: - ret = afs_extract_data(call, skb, last, call->buffer, - call->count); + ret = afs_extract_data(call, call->buffer, + call->count, false); if (ret < 0) return ret; call->offset = 0; call->unmarshall++; - no_motd_padding: - case 11: - ret = afs_data_complete(call, skb, last); - if (ret < 0) - return ret; break; } @@ -1685,15 +1662,14 @@ int afs_fs_get_volume_status(struct afs_server *server, /* * deliver reply data to an FS.SetLock, FS.ExtendLock or FS.ReleaseLock */ -static int afs_deliver_fs_xxxx_lock(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_fs_xxxx_lock(struct afs_call *call) { const __be32 *bp; int ret; - _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); + _enter("{%u}", call->unmarshall); - ret = afs_transfer_reply(call, skb, last); + ret = afs_transfer_reply(call); if (ret < 0) return ret; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index d97552de9c59..5497c8496055 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include @@ -57,7 +56,7 @@ struct afs_mount_params { */ struct afs_wait_mode { /* RxRPC received message notification */ - void (*rx_wakeup)(struct afs_call *call); + rxrpc_notify_rx_t notify_rx; /* synchronous call waiter and call dispatched notification */ int (*wait)(struct afs_call *call); @@ -76,10 +75,8 @@ struct afs_call { const struct afs_call_type *type; /* type of call */ const struct afs_wait_mode *wait_mode; /* completion wait mode */ wait_queue_head_t waitq; /* processes awaiting completion */ - void (*async_workfn)(struct afs_call *call); /* asynchronous work function */ struct work_struct async_work; /* asynchronous work processor */ struct work_struct work; /* actual work processor */ - struct sk_buff_head rx_queue; /* received packets */ struct rxrpc_call *rxcall; /* RxRPC call handle */ struct key *key; /* security for this call */ struct afs_server *server; /* server affected by incoming CM call */ @@ -93,6 +90,7 @@ struct afs_call { void *reply4; /* reply buffer (fourth part) */ pgoff_t first; /* first page in mapping to deal with */ pgoff_t last; /* last page in mapping to deal with */ + size_t offset; /* offset into received data store */ enum { /* call state */ AFS_CALL_REQUESTING, /* request is being sent for outgoing call */ AFS_CALL_AWAIT_REPLY, /* awaiting reply to outgoing call */ @@ -100,21 +98,18 @@ struct afs_call { AFS_CALL_AWAIT_REQUEST, /* awaiting request data on incoming call */ AFS_CALL_REPLYING, /* replying to incoming call */ AFS_CALL_AWAIT_ACK, /* awaiting final ACK of incoming call */ - AFS_CALL_COMPLETE, /* successfully completed */ - AFS_CALL_BUSY, /* server was busy */ - AFS_CALL_ABORTED, /* call was aborted */ - AFS_CALL_ERROR, /* call failed due to error */ + AFS_CALL_COMPLETE, /* Completed or failed */ } state; int error; /* error code */ + u32 abort_code; /* Remote abort ID or 0 */ unsigned request_size; /* size of request data */ unsigned reply_max; /* maximum size of reply */ - unsigned reply_size; /* current size of reply */ unsigned first_offset; /* offset into mapping[first] */ unsigned last_to; /* amount of mapping[last] */ - unsigned offset; /* offset into received data store */ unsigned char unmarshall; /* unmarshalling phase */ bool incoming; /* T if incoming call */ bool send_pages; /* T if data from mapping should be sent */ + bool need_attention; /* T if RxRPC poked us */ u16 service_id; /* RxRPC service ID to call */ __be16 port; /* target UDP port */ __be32 operation_ID; /* operation ID for an incoming call */ @@ -129,8 +124,7 @@ struct afs_call_type { /* deliver request or reply data to an call * - returning an error will cause the call to be aborted */ - int (*deliver)(struct afs_call *call, struct sk_buff *skb, - bool last); + int (*deliver)(struct afs_call *call); /* map an abort code to an error number */ int (*abort_to_error)(u32 abort_code); @@ -612,27 +606,18 @@ extern struct socket *afs_socket; extern int afs_open_socket(void); extern void afs_close_socket(void); -extern void afs_data_consumed(struct afs_call *, struct sk_buff *); extern int afs_make_call(struct in_addr *, struct afs_call *, gfp_t, const struct afs_wait_mode *); extern struct afs_call *afs_alloc_flat_call(const struct afs_call_type *, size_t, size_t); extern void afs_flat_call_destructor(struct afs_call *); -extern int afs_transfer_reply(struct afs_call *, struct sk_buff *, bool); extern void afs_send_empty_reply(struct afs_call *); extern void afs_send_simple_reply(struct afs_call *, const void *, size_t); -extern int afs_extract_data(struct afs_call *, struct sk_buff *, bool, void *, - size_t); +extern int afs_extract_data(struct afs_call *, void *, size_t, bool); -static inline int afs_data_complete(struct afs_call *call, struct sk_buff *skb, - bool last) +static inline int afs_transfer_reply(struct afs_call *call) { - if (skb->len > 0) - return -EBADMSG; - afs_data_consumed(call, skb); - if (!last) - return -EAGAIN; - return 0; + return afs_extract_data(call, call->buffer, call->reply_max, false); } /* diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 7b0d18900f50..244896baf241 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -19,31 +19,31 @@ struct socket *afs_socket; /* my RxRPC socket */ static struct workqueue_struct *afs_async_calls; static atomic_t afs_outstanding_calls; -static atomic_t afs_outstanding_skbs; -static void afs_wake_up_call_waiter(struct afs_call *); +static void afs_free_call(struct afs_call *); +static void afs_wake_up_call_waiter(struct sock *, struct rxrpc_call *, unsigned long); static int afs_wait_for_call_to_complete(struct afs_call *); -static void afs_wake_up_async_call(struct afs_call *); +static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned long); static int afs_dont_wait_for_call_to_complete(struct afs_call *); -static void afs_process_async_call(struct afs_call *); -static void afs_rx_interceptor(struct sock *, unsigned long, struct sk_buff *); -static int afs_deliver_cm_op_id(struct afs_call *, struct sk_buff *, bool); +static void afs_process_async_call(struct work_struct *); +static void afs_rx_new_call(struct sock *); +static int afs_deliver_cm_op_id(struct afs_call *); /* synchronous call management */ const struct afs_wait_mode afs_sync_call = { - .rx_wakeup = afs_wake_up_call_waiter, + .notify_rx = afs_wake_up_call_waiter, .wait = afs_wait_for_call_to_complete, }; /* asynchronous call management */ const struct afs_wait_mode afs_async_call = { - .rx_wakeup = afs_wake_up_async_call, + .notify_rx = afs_wake_up_async_call, .wait = afs_dont_wait_for_call_to_complete, }; /* asynchronous incoming call management */ static const struct afs_wait_mode afs_async_incoming_call = { - .rx_wakeup = afs_wake_up_async_call, + .notify_rx = afs_wake_up_async_call, }; /* asynchronous incoming call initial processing */ @@ -55,16 +55,8 @@ static const struct afs_call_type afs_RXCMxxxx = { static void afs_collect_incoming_call(struct work_struct *); -static struct sk_buff_head afs_incoming_calls; static DECLARE_WORK(afs_collect_incoming_call_work, afs_collect_incoming_call); -static void afs_async_workfn(struct work_struct *work) -{ - struct afs_call *call = container_of(work, struct afs_call, async_work); - - call->async_workfn(call); -} - static int afs_wait_atomic_t(atomic_t *p) { schedule(); @@ -83,8 +75,6 @@ int afs_open_socket(void) _enter(""); - skb_queue_head_init(&afs_incoming_calls); - ret = -ENOMEM; afs_async_calls = create_singlethread_workqueue("kafsd"); if (!afs_async_calls) @@ -110,12 +100,12 @@ int afs_open_socket(void) if (ret < 0) goto error_2; + rxrpc_kernel_new_call_notification(socket, afs_rx_new_call); + ret = kernel_listen(socket, INT_MAX); if (ret < 0) goto error_2; - rxrpc_kernel_intercept_rx_messages(socket, afs_rx_interceptor); - afs_socket = socket; _leave(" = 0"); return 0; @@ -136,51 +126,19 @@ void afs_close_socket(void) { _enter(""); + _debug("outstanding %u", atomic_read(&afs_outstanding_calls)); wait_on_atomic_t(&afs_outstanding_calls, afs_wait_atomic_t, TASK_UNINTERRUPTIBLE); _debug("no outstanding calls"); + flush_workqueue(afs_async_calls); sock_release(afs_socket); _debug("dework"); destroy_workqueue(afs_async_calls); - - ASSERTCMP(atomic_read(&afs_outstanding_skbs), ==, 0); _leave(""); } -/* - * Note that the data in a socket buffer is now consumed. - */ -void afs_data_consumed(struct afs_call *call, struct sk_buff *skb) -{ - if (!skb) { - _debug("DLVR NULL [%d]", atomic_read(&afs_outstanding_skbs)); - dump_stack(); - } else { - _debug("DLVR %p{%u} [%d]", - skb, skb->mark, atomic_read(&afs_outstanding_skbs)); - rxrpc_kernel_data_consumed(call->rxcall, skb); - } -} - -/* - * free a socket buffer - */ -static void afs_free_skb(struct sk_buff *skb) -{ - if (!skb) { - _debug("FREE NULL [%d]", atomic_read(&afs_outstanding_skbs)); - dump_stack(); - } else { - _debug("FREE %p{%u} [%d]", - skb, skb->mark, atomic_read(&afs_outstanding_skbs)); - if (atomic_dec_return(&afs_outstanding_skbs) == -1) - BUG(); - rxrpc_kernel_free_skb(skb); - } -} - /* * free a call */ @@ -191,7 +149,6 @@ static void afs_free_call(struct afs_call *call) ASSERTCMP(call->rxcall, ==, NULL); ASSERT(!work_pending(&call->async_work)); - ASSERT(skb_queue_empty(&call->rx_queue)); ASSERT(call->type->name != NULL); kfree(call->request); @@ -227,7 +184,7 @@ static void afs_end_call(struct afs_call *call) * allocate a call with flat request and reply buffers */ struct afs_call *afs_alloc_flat_call(const struct afs_call_type *type, - size_t request_size, size_t reply_size) + size_t request_size, size_t reply_max) { struct afs_call *call; @@ -241,7 +198,7 @@ struct afs_call *afs_alloc_flat_call(const struct afs_call_type *type, call->type = type; call->request_size = request_size; - call->reply_max = reply_size; + call->reply_max = reply_max; if (request_size) { call->request = kmalloc(request_size, GFP_NOFS); @@ -249,14 +206,13 @@ struct afs_call *afs_alloc_flat_call(const struct afs_call_type *type, goto nomem_free; } - if (reply_size) { - call->buffer = kmalloc(reply_size, GFP_NOFS); + if (reply_max) { + call->buffer = kmalloc(reply_max, GFP_NOFS); if (!call->buffer) goto nomem_free; } init_waitqueue_head(&call->waitq); - skb_queue_head_init(&call->rx_queue); return call; nomem_free: @@ -354,7 +310,6 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, struct msghdr msg; struct kvec iov[1]; int ret; - struct sk_buff *skb; _enter("%x,{%d},", addr->s_addr, ntohs(call->port)); @@ -366,8 +321,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, atomic_read(&afs_outstanding_calls)); call->wait_mode = wait_mode; - call->async_workfn = afs_process_async_call; - INIT_WORK(&call->async_work, afs_async_workfn); + INIT_WORK(&call->async_work, afs_process_async_call); memset(&srx, 0, sizeof(srx)); srx.srx_family = AF_RXRPC; @@ -380,7 +334,8 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, /* create a call */ rxcall = rxrpc_kernel_begin_call(afs_socket, &srx, call->key, - (unsigned long) call, gfp); + (unsigned long) call, gfp, + wait_mode->notify_rx); call->key = NULL; if (IS_ERR(rxcall)) { ret = PTR_ERR(rxcall); @@ -423,150 +378,84 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, error_do_abort: rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT); - while ((skb = skb_dequeue(&call->rx_queue))) - afs_free_skb(skb); error_kill_call: afs_end_call(call); _leave(" = %d", ret); return ret; } -/* - * Handles intercepted messages that were arriving in the socket's Rx queue. - * - * Called from the AF_RXRPC call processor in waitqueue process context. For - * each call, it is guaranteed this will be called in order of packet to be - * delivered. - */ -static void afs_rx_interceptor(struct sock *sk, unsigned long user_call_ID, - struct sk_buff *skb) -{ - struct afs_call *call = (struct afs_call *) user_call_ID; - - _enter("%p,,%u", call, skb->mark); - - _debug("ICPT %p{%u} [%d]", - skb, skb->mark, atomic_read(&afs_outstanding_skbs)); - - ASSERTCMP(sk, ==, afs_socket->sk); - atomic_inc(&afs_outstanding_skbs); - - if (!call) { - /* its an incoming call for our callback service */ - skb_queue_tail(&afs_incoming_calls, skb); - queue_work(afs_wq, &afs_collect_incoming_call_work); - } else { - /* route the messages directly to the appropriate call */ - skb_queue_tail(&call->rx_queue, skb); - call->wait_mode->rx_wakeup(call); - } - - _leave(""); -} - /* * deliver messages to a call */ static void afs_deliver_to_call(struct afs_call *call) { - struct sk_buff *skb; - bool last; u32 abort_code; int ret; - _enter(""); + _enter("%s", call->type->name); - while ((call->state == AFS_CALL_AWAIT_REPLY || - call->state == AFS_CALL_AWAIT_OP_ID || - call->state == AFS_CALL_AWAIT_REQUEST || - call->state == AFS_CALL_AWAIT_ACK) && - (skb = skb_dequeue(&call->rx_queue))) { - switch (skb->mark) { - case RXRPC_SKB_MARK_DATA: - _debug("Rcv DATA"); - last = rxrpc_kernel_is_data_last(skb); - ret = call->type->deliver(call, skb, last); - switch (ret) { - case -EAGAIN: - if (last) { - _debug("short data"); - goto unmarshal_error; - } - break; - case 0: - ASSERT(last); - if (call->state == AFS_CALL_AWAIT_REPLY) - call->state = AFS_CALL_COMPLETE; - break; - case -ENOTCONN: - abort_code = RX_CALL_DEAD; - goto do_abort; - case -ENOTSUPP: - abort_code = RX_INVALID_OPERATION; - goto do_abort; - default: - unmarshal_error: - abort_code = RXGEN_CC_UNMARSHAL; - if (call->state != AFS_CALL_AWAIT_REPLY) - abort_code = RXGEN_SS_UNMARSHAL; - do_abort: - rxrpc_kernel_abort_call(afs_socket, - call->rxcall, - abort_code); - call->error = ret; - call->state = AFS_CALL_ERROR; - break; + while (call->state == AFS_CALL_AWAIT_REPLY || + call->state == AFS_CALL_AWAIT_OP_ID || + call->state == AFS_CALL_AWAIT_REQUEST || + call->state == AFS_CALL_AWAIT_ACK + ) { + if (call->state == AFS_CALL_AWAIT_ACK) { + size_t offset = 0; + ret = rxrpc_kernel_recv_data(afs_socket, call->rxcall, + NULL, 0, &offset, false, + &call->abort_code); + if (ret == -EINPROGRESS || ret == -EAGAIN) + return; + if (ret == 1) { + call->state = AFS_CALL_COMPLETE; + goto done; } - break; - case RXRPC_SKB_MARK_FINAL_ACK: - _debug("Rcv ACK"); - call->state = AFS_CALL_COMPLETE; - break; - case RXRPC_SKB_MARK_BUSY: - _debug("Rcv BUSY"); - call->error = -EBUSY; - call->state = AFS_CALL_BUSY; - break; - case RXRPC_SKB_MARK_REMOTE_ABORT: - abort_code = rxrpc_kernel_get_abort_code(skb); - call->error = call->type->abort_to_error(abort_code); - call->state = AFS_CALL_ABORTED; - _debug("Rcv ABORT %u -> %d", abort_code, call->error); - break; - case RXRPC_SKB_MARK_LOCAL_ABORT: - abort_code = rxrpc_kernel_get_abort_code(skb); - call->error = call->type->abort_to_error(abort_code); - call->state = AFS_CALL_ABORTED; - _debug("Loc ABORT %u -> %d", abort_code, call->error); - break; - case RXRPC_SKB_MARK_NET_ERROR: - call->error = -rxrpc_kernel_get_error_number(skb); - call->state = AFS_CALL_ERROR; - _debug("Rcv NET ERROR %d", call->error); - break; - case RXRPC_SKB_MARK_LOCAL_ERROR: - call->error = -rxrpc_kernel_get_error_number(skb); - call->state = AFS_CALL_ERROR; - _debug("Rcv LOCAL ERROR %d", call->error); - break; - default: - BUG(); - break; + return; } - afs_free_skb(skb); - } - - /* make sure the queue is empty if the call is done with (we might have - * aborted the call early because of an unmarshalling error) */ - if (call->state >= AFS_CALL_COMPLETE) { - while ((skb = skb_dequeue(&call->rx_queue))) - afs_free_skb(skb); - if (call->incoming) - afs_end_call(call); + ret = call->type->deliver(call); + switch (ret) { + case 0: + if (call->state == AFS_CALL_AWAIT_REPLY) + call->state = AFS_CALL_COMPLETE; + goto done; + case -EINPROGRESS: + case -EAGAIN: + goto out; + case -ENOTCONN: + abort_code = RX_CALL_DEAD; + rxrpc_kernel_abort_call(afs_socket, call->rxcall, + abort_code); + goto do_abort; + case -ENOTSUPP: + abort_code = RX_INVALID_OPERATION; + rxrpc_kernel_abort_call(afs_socket, call->rxcall, + abort_code); + goto do_abort; + case -ENODATA: + case -EBADMSG: + case -EMSGSIZE: + default: + abort_code = RXGEN_CC_UNMARSHAL; + if (call->state != AFS_CALL_AWAIT_REPLY) + abort_code = RXGEN_SS_UNMARSHAL; + rxrpc_kernel_abort_call(afs_socket, call->rxcall, + abort_code); + goto do_abort; + } } +done: + if (call->state == AFS_CALL_COMPLETE && call->incoming) + afs_end_call(call); +out: _leave(""); + return; + +do_abort: + call->error = ret; + call->state = AFS_CALL_COMPLETE; + goto done; } /* @@ -574,7 +463,6 @@ static void afs_deliver_to_call(struct afs_call *call) */ static int afs_wait_for_call_to_complete(struct afs_call *call) { - struct sk_buff *skb; int ret; DECLARE_WAITQUEUE(myself, current); @@ -586,14 +474,15 @@ static int afs_wait_for_call_to_complete(struct afs_call *call) set_current_state(TASK_INTERRUPTIBLE); /* deliver any messages that are in the queue */ - if (!skb_queue_empty(&call->rx_queue)) { + if (call->state < AFS_CALL_COMPLETE && call->need_attention) { + call->need_attention = false; __set_current_state(TASK_RUNNING); afs_deliver_to_call(call); continue; } ret = call->error; - if (call->state >= AFS_CALL_COMPLETE) + if (call->state == AFS_CALL_COMPLETE) break; ret = -EINTR; if (signal_pending(current)) @@ -607,9 +496,8 @@ static int afs_wait_for_call_to_complete(struct afs_call *call) /* kill the call */ if (call->state < AFS_CALL_COMPLETE) { _debug("call incomplete"); - rxrpc_kernel_abort_call(afs_socket, call->rxcall, RX_CALL_DEAD); - while ((skb = skb_dequeue(&call->rx_queue))) - afs_free_skb(skb); + rxrpc_kernel_abort_call(afs_socket, call->rxcall, + RX_CALL_DEAD); } _debug("call complete"); @@ -621,17 +509,24 @@ static int afs_wait_for_call_to_complete(struct afs_call *call) /* * wake up a waiting call */ -static void afs_wake_up_call_waiter(struct afs_call *call) +static void afs_wake_up_call_waiter(struct sock *sk, struct rxrpc_call *rxcall, + unsigned long call_user_ID) { + struct afs_call *call = (struct afs_call *)call_user_ID; + + call->need_attention = true; wake_up(&call->waitq); } /* * wake up an asynchronous call */ -static void afs_wake_up_async_call(struct afs_call *call) +static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall, + unsigned long call_user_ID) { - _enter(""); + struct afs_call *call = (struct afs_call *)call_user_ID; + + call->need_attention = true; queue_work(afs_async_calls, &call->async_work); } @@ -649,8 +544,10 @@ static int afs_dont_wait_for_call_to_complete(struct afs_call *call) /* * delete an asynchronous call */ -static void afs_delete_async_call(struct afs_call *call) +static void afs_delete_async_call(struct work_struct *work) { + struct afs_call *call = container_of(work, struct afs_call, async_work); + _enter(""); afs_free_call(call); @@ -660,17 +557,19 @@ static void afs_delete_async_call(struct afs_call *call) /* * perform processing on an asynchronous call - * - on a multiple-thread workqueue this work item may try to run on several - * CPUs at the same time */ -static void afs_process_async_call(struct afs_call *call) +static void afs_process_async_call(struct work_struct *work) { + struct afs_call *call = container_of(work, struct afs_call, async_work); + _enter(""); - if (!skb_queue_empty(&call->rx_queue)) + if (call->state < AFS_CALL_COMPLETE && call->need_attention) { + call->need_attention = false; afs_deliver_to_call(call); + } - if (call->state >= AFS_CALL_COMPLETE && call->wait_mode) { + if (call->state == AFS_CALL_COMPLETE && call->wait_mode) { if (call->wait_mode->async_complete) call->wait_mode->async_complete(call->reply, call->error); @@ -681,45 +580,13 @@ static void afs_process_async_call(struct afs_call *call) /* we can't just delete the call because the work item may be * queued */ - call->async_workfn = afs_delete_async_call; + call->async_work.func = afs_delete_async_call; queue_work(afs_async_calls, &call->async_work); } _leave(""); } -/* - * Empty a socket buffer into a flat reply buffer. - */ -int afs_transfer_reply(struct afs_call *call, struct sk_buff *skb, bool last) -{ - size_t len = skb->len; - - if (len > call->reply_max - call->reply_size) { - _leave(" = -EBADMSG [%zu > %u]", - len, call->reply_max - call->reply_size); - return -EBADMSG; - } - - if (len > 0) { - if (skb_copy_bits(skb, 0, call->buffer + call->reply_size, - len) < 0) - BUG(); - call->reply_size += len; - } - - afs_data_consumed(call, skb); - if (!last) - return -EAGAIN; - - if (call->reply_size != call->reply_max) { - _leave(" = -EBADMSG [%u != %u]", - call->reply_size, call->reply_max); - return -EBADMSG; - } - return 0; -} - /* * accept the backlog of incoming calls */ @@ -727,14 +594,10 @@ static void afs_collect_incoming_call(struct work_struct *work) { struct rxrpc_call *rxcall; struct afs_call *call = NULL; - struct sk_buff *skb; - while ((skb = skb_dequeue(&afs_incoming_calls))) { - _debug("new call"); - - /* don't need the notification */ - afs_free_skb(skb); + _enter(""); + do { if (!call) { call = kzalloc(sizeof(struct afs_call), GFP_KERNEL); if (!call) { @@ -742,12 +605,10 @@ static void afs_collect_incoming_call(struct work_struct *work) return; } - call->async_workfn = afs_process_async_call; - INIT_WORK(&call->async_work, afs_async_workfn); + INIT_WORK(&call->async_work, afs_process_async_call); call->wait_mode = &afs_async_incoming_call; call->type = &afs_RXCMxxxx; init_waitqueue_head(&call->waitq); - skb_queue_head_init(&call->rx_queue); call->state = AFS_CALL_AWAIT_OP_ID; _debug("CALL %p{%s} [%d]", @@ -757,46 +618,47 @@ static void afs_collect_incoming_call(struct work_struct *work) } rxcall = rxrpc_kernel_accept_call(afs_socket, - (unsigned long) call); + (unsigned long)call, + afs_wake_up_async_call); if (!IS_ERR(rxcall)) { call->rxcall = rxcall; + call->need_attention = true; + queue_work(afs_async_calls, &call->async_work); call = NULL; } - } + } while (!call); if (call) afs_free_call(call); } +/* + * Notification of an incoming call. + */ +static void afs_rx_new_call(struct sock *sk) +{ + queue_work(afs_wq, &afs_collect_incoming_call_work); +} + /* * Grab the operation ID from an incoming cache manager call. The socket * buffer is discarded on error or if we don't yet have sufficient data. */ -static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb, - bool last) +static int afs_deliver_cm_op_id(struct afs_call *call) { - size_t len = skb->len; - void *oibuf = (void *) &call->operation_ID; + int ret; - _enter("{%u},{%zu},%d", call->offset, len, last); + _enter("{%zu}", call->offset); ASSERTCMP(call->offset, <, 4); /* the operation ID forms the first four bytes of the request data */ - len = min_t(size_t, len, 4 - call->offset); - if (skb_copy_bits(skb, 0, oibuf + call->offset, len) < 0) - BUG(); - if (!pskb_pull(skb, len)) - BUG(); - call->offset += len; - - if (call->offset < 4) { - afs_data_consumed(call, skb); - _leave(" = -EAGAIN"); - return -EAGAIN; - } + ret = afs_extract_data(call, &call->operation_ID, 4, true); + if (ret < 0) + return ret; call->state = AFS_CALL_AWAIT_REQUEST; + call->offset = 0; /* ask the cache manager to route the call (it'll change the call type * if successful) */ @@ -805,7 +667,7 @@ static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb, /* pass responsibility for the remainer of this message off to the * cache manager op */ - return call->type->deliver(call, skb, last); + return call->type->deliver(call); } /* @@ -881,25 +743,40 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) /* * Extract a piece of data from the received data socket buffers. */ -int afs_extract_data(struct afs_call *call, struct sk_buff *skb, - bool last, void *buf, size_t count) +int afs_extract_data(struct afs_call *call, void *buf, size_t count, + bool want_more) { - size_t len = skb->len; + int ret; - _enter("{%u},{%zu},%d,,%zu", call->offset, len, last, count); + _enter("{%s,%zu},,%zu,%d", + call->type->name, call->offset, count, want_more); - ASSERTCMP(call->offset, <, count); + ASSERTCMP(call->offset, <=, count); - len = min_t(size_t, len, count - call->offset); - if (skb_copy_bits(skb, 0, buf + call->offset, len) < 0 || - !pskb_pull(skb, len)) - BUG(); - call->offset += len; + ret = rxrpc_kernel_recv_data(afs_socket, call->rxcall, + buf, count, &call->offset, + want_more, &call->abort_code); + if (ret == 0 || ret == -EAGAIN) + return ret; - if (call->offset < count) { - afs_data_consumed(call, skb); - _leave(" = -EAGAIN"); - return -EAGAIN; + if (ret == 1) { + switch (call->state) { + case AFS_CALL_AWAIT_REPLY: + call->state = AFS_CALL_COMPLETE; + break; + case AFS_CALL_AWAIT_REQUEST: + call->state = AFS_CALL_REPLYING; + break; + default: + break; + } + return 0; } - return 0; + + if (ret == -ECONNABORTED) + call->error = call->type->abort_to_error(call->abort_code); + else + call->error = ret; + call->state = AFS_CALL_COMPLETE; + return ret; } diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c index f94d1abdc3eb..94bcd97d22b8 100644 --- a/fs/afs/vlclient.c +++ b/fs/afs/vlclient.c @@ -58,17 +58,16 @@ static int afs_vl_abort_to_error(u32 abort_code) /* * deliver reply data to a VL.GetEntryByXXX call */ -static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call) { struct afs_cache_vlocation *entry; __be32 *bp; u32 tmp; int loop, ret; - _enter(",,%u", last); + _enter(""); - ret = afs_transfer_reply(call, skb, last); + ret = afs_transfer_reply(call); if (ret < 0) return ret; diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index f8d8079dc058..b4b6a3664dda 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -12,7 +12,6 @@ #ifndef _NET_RXRPC_H #define _NET_RXRPC_H -#include #include struct key; @@ -20,38 +19,26 @@ struct sock; struct socket; struct rxrpc_call; -/* - * the mark applied to socket buffers that may be intercepted - */ -enum rxrpc_skb_mark { - RXRPC_SKB_MARK_DATA, /* data message */ - RXRPC_SKB_MARK_FINAL_ACK, /* final ACK received message */ - RXRPC_SKB_MARK_BUSY, /* server busy message */ - RXRPC_SKB_MARK_REMOTE_ABORT, /* remote abort message */ - RXRPC_SKB_MARK_LOCAL_ABORT, /* local abort message */ - RXRPC_SKB_MARK_NET_ERROR, /* network error message */ - RXRPC_SKB_MARK_LOCAL_ERROR, /* local error message */ - RXRPC_SKB_MARK_NEW_CALL, /* local error message */ -}; +typedef void (*rxrpc_notify_rx_t)(struct sock *, struct rxrpc_call *, + unsigned long); +typedef void (*rxrpc_notify_new_call_t)(struct sock *); -typedef void (*rxrpc_interceptor_t)(struct sock *, unsigned long, - struct sk_buff *); -void rxrpc_kernel_intercept_rx_messages(struct socket *, rxrpc_interceptor_t); +void rxrpc_kernel_new_call_notification(struct socket *, + rxrpc_notify_new_call_t); struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *, struct sockaddr_rxrpc *, struct key *, unsigned long, - gfp_t); + gfp_t, + rxrpc_notify_rx_t); int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *, struct msghdr *, size_t); -void rxrpc_kernel_data_consumed(struct rxrpc_call *, struct sk_buff *); +int rxrpc_kernel_recv_data(struct socket *, struct rxrpc_call *, + void *, size_t, size_t *, bool, u32 *); void rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *, u32); void rxrpc_kernel_end_call(struct socket *, struct rxrpc_call *); -bool rxrpc_kernel_is_data_last(struct sk_buff *); -u32 rxrpc_kernel_get_abort_code(struct sk_buff *); -int rxrpc_kernel_get_error_number(struct sk_buff *); -void rxrpc_kernel_free_skb(struct sk_buff *); -struct rxrpc_call *rxrpc_kernel_accept_call(struct socket *, unsigned long); +struct rxrpc_call *rxrpc_kernel_accept_call(struct socket *, unsigned long, + rxrpc_notify_rx_t); int rxrpc_kernel_reject_call(struct socket *); void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *, struct sockaddr_rxrpc *); diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index e07c91acd904..32d544995dda 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -231,6 +231,8 @@ static int rxrpc_listen(struct socket *sock, int backlog) * @srx: The address of the peer to contact * @key: The security context to use (defaults to socket setting) * @user_call_ID: The ID to use + * @gfp: The allocation constraints + * @notify_rx: Where to send notifications instead of socket queue * * Allow a kernel service to begin a call on the nominated socket. This just * sets up all the internal tracking structures and allocates connection and @@ -243,7 +245,8 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, struct sockaddr_rxrpc *srx, struct key *key, unsigned long user_call_ID, - gfp_t gfp) + gfp_t gfp, + rxrpc_notify_rx_t notify_rx) { struct rxrpc_conn_parameters cp; struct rxrpc_call *call; @@ -270,6 +273,8 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, cp.exclusive = false; cp.service_id = srx->srx_service; call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, gfp); + if (!IS_ERR(call)) + call->notify_rx = notify_rx; release_sock(&rx->sk); _leave(" = %p", call); @@ -289,31 +294,27 @@ void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call) { _enter("%d{%d}", call->debug_id, atomic_read(&call->usage)); rxrpc_remove_user_ID(rxrpc_sk(sock->sk), call); + rxrpc_purge_queue(&call->knlrecv_queue); rxrpc_put_call(call); } EXPORT_SYMBOL(rxrpc_kernel_end_call); /** - * rxrpc_kernel_intercept_rx_messages - Intercept received RxRPC messages + * rxrpc_kernel_new_call_notification - Get notifications of new calls * @sock: The socket to intercept received messages on - * @interceptor: The function to pass the messages to + * @notify_new_call: Function to be called when new calls appear * - * Allow a kernel service to intercept messages heading for the Rx queue on an - * RxRPC socket. They get passed to the specified function instead. - * @interceptor should free the socket buffers it is given. @interceptor is - * called with the socket receive queue spinlock held and softirqs disabled - - * this ensures that the messages will be delivered in the right order. + * Allow a kernel service to be given notifications about new calls. */ -void rxrpc_kernel_intercept_rx_messages(struct socket *sock, - rxrpc_interceptor_t interceptor) +void rxrpc_kernel_new_call_notification( + struct socket *sock, + rxrpc_notify_new_call_t notify_new_call) { struct rxrpc_sock *rx = rxrpc_sk(sock->sk); - _enter(""); - rx->interceptor = interceptor; + rx->notify_new_call = notify_new_call; } - -EXPORT_SYMBOL(rxrpc_kernel_intercept_rx_messages); +EXPORT_SYMBOL(rxrpc_kernel_new_call_notification); /* * connect an RxRPC socket diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 0c320b2b7b43..4e86d248dc5e 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -39,6 +39,20 @@ struct rxrpc_crypt { struct rxrpc_connection; +/* + * Mark applied to socket buffers. + */ +enum rxrpc_skb_mark { + RXRPC_SKB_MARK_DATA, /* data message */ + RXRPC_SKB_MARK_FINAL_ACK, /* final ACK received message */ + RXRPC_SKB_MARK_BUSY, /* server busy message */ + RXRPC_SKB_MARK_REMOTE_ABORT, /* remote abort message */ + RXRPC_SKB_MARK_LOCAL_ABORT, /* local abort message */ + RXRPC_SKB_MARK_NET_ERROR, /* network error message */ + RXRPC_SKB_MARK_LOCAL_ERROR, /* local error message */ + RXRPC_SKB_MARK_NEW_CALL, /* local error message */ +}; + /* * sk_state for RxRPC sockets */ @@ -57,7 +71,7 @@ enum { struct rxrpc_sock { /* WARNING: sk has to be the first member */ struct sock sk; - rxrpc_interceptor_t interceptor; /* kernel service Rx interceptor function */ + rxrpc_notify_new_call_t notify_new_call; /* Func to notify of new call */ struct rxrpc_local *local; /* local endpoint */ struct list_head listen_link; /* link in the local endpoint's listen list */ struct list_head secureq; /* calls awaiting connection security clearance */ @@ -367,6 +381,7 @@ enum rxrpc_call_flag { RXRPC_CALL_EXPECT_OOS, /* expect out of sequence packets */ RXRPC_CALL_IS_SERVICE, /* Call is service call */ RXRPC_CALL_EXPOSED, /* The call was exposed to the world */ + RXRPC_CALL_RX_NO_MORE, /* Don't indicate MSG_MORE from recvmsg() */ }; /* @@ -441,6 +456,7 @@ struct rxrpc_call { struct timer_list resend_timer; /* Tx resend timer */ struct work_struct destroyer; /* call destroyer */ struct work_struct processor; /* packet processor and ACK generator */ + rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */ struct list_head link; /* link in master call list */ struct list_head chan_wait_link; /* Link in conn->waiting_calls */ struct hlist_node error_link; /* link in error distribution list */ @@ -448,6 +464,7 @@ struct rxrpc_call { struct rb_node sock_node; /* node in socket call tree */ struct sk_buff_head rx_queue; /* received packets */ struct sk_buff_head rx_oos_queue; /* packets received out of sequence */ + struct sk_buff_head knlrecv_queue; /* Queue for kernel_recv [TODO: replace this] */ struct sk_buff *tx_pending; /* Tx socket buffer being filled */ wait_queue_head_t waitq; /* Wait queue for channel or Tx */ __be32 crypto_buf[2]; /* Temporary packet crypto buffer */ @@ -512,7 +529,8 @@ extern struct workqueue_struct *rxrpc_workqueue; * call_accept.c */ void rxrpc_accept_incoming_calls(struct rxrpc_local *); -struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *, unsigned long); +struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *, unsigned long, + rxrpc_notify_rx_t); int rxrpc_reject_call(struct rxrpc_sock *); /* @@ -874,6 +892,7 @@ int rxrpc_init_server_conn_security(struct rxrpc_connection *); /* * skbuff.c */ +void rxrpc_kernel_data_consumed(struct rxrpc_call *, struct sk_buff *); void rxrpc_packet_destructor(struct sk_buff *); void rxrpc_new_skb(struct sk_buff *); void rxrpc_see_skb(struct sk_buff *); diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 03af88fe798b..68a439e30df1 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -286,7 +286,8 @@ security_mismatch: * - assign the user call ID to the call at the front of the queue */ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx, - unsigned long user_call_ID) + unsigned long user_call_ID, + rxrpc_notify_rx_t notify_rx) { struct rxrpc_call *call; struct rb_node *parent, **pp; @@ -340,6 +341,7 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx, } /* formalise the acceptance */ + call->notify_rx = notify_rx; call->user_call_ID = user_call_ID; rb_link_node(&call->sock_node, parent, pp); rb_insert_color(&call->sock_node, &rx->calls); @@ -437,17 +439,20 @@ out: * rxrpc_kernel_accept_call - Allow a kernel service to accept an incoming call * @sock: The socket on which the impending call is waiting * @user_call_ID: The tag to attach to the call + * @notify_rx: Where to send notifications instead of socket queue * * Allow a kernel service to accept an incoming call, assuming the incoming - * call is still valid. + * call is still valid. The caller should immediately trigger their own + * notification as there must be data waiting. */ struct rxrpc_call *rxrpc_kernel_accept_call(struct socket *sock, - unsigned long user_call_ID) + unsigned long user_call_ID, + rxrpc_notify_rx_t notify_rx) { struct rxrpc_call *call; _enter(",%lx", user_call_ID); - call = rxrpc_accept_call(rxrpc_sk(sock->sk), user_call_ID); + call = rxrpc_accept_call(rxrpc_sk(sock->sk), user_call_ID, notify_rx); _leave(" = %p", call); return call; } diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 104ee8b1de06..516d8ea82f02 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -136,6 +136,7 @@ static struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) INIT_LIST_HEAD(&call->accept_link); skb_queue_head_init(&call->rx_queue); skb_queue_head_init(&call->rx_oos_queue); + skb_queue_head_init(&call->knlrecv_queue); init_waitqueue_head(&call->waitq); spin_lock_init(&call->lock); rwlock_init(&call->state_lock); @@ -552,8 +553,6 @@ void rxrpc_release_call(struct rxrpc_call *call) spin_lock_bh(&call->lock); } spin_unlock_bh(&call->lock); - - ASSERTCMP(call->state, !=, RXRPC_CALL_COMPLETE); } del_timer_sync(&call->resend_timer); @@ -682,6 +681,7 @@ static void rxrpc_rcu_destroy_call(struct rcu_head *rcu) struct rxrpc_call *call = container_of(rcu, struct rxrpc_call, rcu); rxrpc_purge_queue(&call->rx_queue); + rxrpc_purge_queue(&call->knlrecv_queue); rxrpc_put_peer(call->peer); kmem_cache_free(rxrpc_call_jar, call); } @@ -737,6 +737,7 @@ static void rxrpc_cleanup_call(struct rxrpc_call *call) rxrpc_purge_queue(&call->rx_queue); ASSERT(skb_queue_empty(&call->rx_oos_queue)); + rxrpc_purge_queue(&call->knlrecv_queue); sock_put(&call->socket->sk); call_rcu(&call->rcu, rxrpc_rcu_destroy_call); } diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index bc9b05938ff5..9db90f4f768d 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -282,7 +282,6 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, case RXRPC_PACKET_TYPE_DATA: case RXRPC_PACKET_TYPE_ACK: rxrpc_conn_retransmit_call(conn, skb); - rxrpc_free_skb(skb); return 0; case RXRPC_PACKET_TYPE_ABORT: diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 86bea9ad6c3d..72f016cfaaf5 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -90,9 +90,15 @@ int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb, } /* allow interception by a kernel service */ - if (rx->interceptor) { - rx->interceptor(sk, call->user_call_ID, skb); + if (skb->mark == RXRPC_SKB_MARK_NEW_CALL && + rx->notify_new_call) { spin_unlock_bh(&sk->sk_receive_queue.lock); + skb_queue_tail(&call->knlrecv_queue, skb); + rx->notify_new_call(&rx->sk); + } else if (call->notify_rx) { + spin_unlock_bh(&sk->sk_receive_queue.lock); + skb_queue_tail(&call->knlrecv_queue, skb); + call->notify_rx(&rx->sk, call, call->user_call_ID); } else { _net("post skb %p", skb); __skb_queue_tail(&sk->sk_receive_queue, skb); diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index b1e708a12151..817ae801e769 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -190,7 +190,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) if (cmd == RXRPC_CMD_ACCEPT) { if (rx->sk.sk_state != RXRPC_SERVER_LISTENING) return -EINVAL; - call = rxrpc_accept_call(rx, user_call_ID); + call = rxrpc_accept_call(rx, user_call_ID, NULL); if (IS_ERR(call)) return PTR_ERR(call); rxrpc_put_call(call); diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index c9b38c7fb448..0ab7b334bab1 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -369,55 +369,178 @@ wait_error: } -/** - * rxrpc_kernel_is_data_last - Determine if data message is last one - * @skb: Message holding data +/* + * Deliver messages to a call. This keeps processing packets until the buffer + * is filled and we find either more DATA (returns 0) or the end of the DATA + * (returns 1). If more packets are required, it returns -EAGAIN. * - * Determine if data message is last one for the parent call. + * TODO: Note that this is hacked in at the moment and will be replaced. */ -bool rxrpc_kernel_is_data_last(struct sk_buff *skb) +static int temp_deliver_data(struct socket *sock, struct rxrpc_call *call, + struct iov_iter *iter, size_t size, + size_t *_offset) { - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct rxrpc_skb_priv *sp; + struct sk_buff *skb; + size_t remain; + int ret, copy; - ASSERTCMP(skb->mark, ==, RXRPC_SKB_MARK_DATA); + _enter("%d", call->debug_id); - return sp->hdr.flags & RXRPC_LAST_PACKET; -} +next: + local_bh_disable(); + skb = skb_dequeue(&call->knlrecv_queue); + local_bh_enable(); + if (!skb) { + if (test_bit(RXRPC_CALL_RX_NO_MORE, &call->flags)) + return 1; + _leave(" = -EAGAIN [empty]"); + return -EAGAIN; + } -EXPORT_SYMBOL(rxrpc_kernel_is_data_last); - -/** - * rxrpc_kernel_get_abort_code - Get the abort code from an RxRPC abort message - * @skb: Message indicating an abort - * - * Get the abort code from an RxRPC abort message. - */ -u32 rxrpc_kernel_get_abort_code(struct sk_buff *skb) -{ - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + sp = rxrpc_skb(skb); + _debug("dequeued %p %u/%zu", skb, sp->offset, size); switch (skb->mark) { - case RXRPC_SKB_MARK_REMOTE_ABORT: - case RXRPC_SKB_MARK_LOCAL_ABORT: - return sp->call->abort_code; - default: - BUG(); - } -} + case RXRPC_SKB_MARK_DATA: + remain = size - *_offset; + if (remain > 0) { + copy = skb->len - sp->offset; + if (copy > remain) + copy = remain; + ret = skb_copy_datagram_iter(skb, sp->offset, iter, + copy); + if (ret < 0) + goto requeue_and_leave; -EXPORT_SYMBOL(rxrpc_kernel_get_abort_code); + /* handle piecemeal consumption of data packets */ + sp->offset += copy; + *_offset += copy; + } + + if (sp->offset < skb->len) + goto partially_used_skb; + + /* We consumed the whole packet */ + ASSERTCMP(sp->offset, ==, skb->len); + if (sp->hdr.flags & RXRPC_LAST_PACKET) + set_bit(RXRPC_CALL_RX_NO_MORE, &call->flags); + rxrpc_kernel_data_consumed(call, skb); + rxrpc_free_skb(skb); + goto next; + + default: + rxrpc_free_skb(skb); + goto next; + } + +partially_used_skb: + ASSERTCMP(*_offset, ==, size); + ret = 0; +requeue_and_leave: + skb_queue_head(&call->knlrecv_queue, skb); + return ret; +} /** - * rxrpc_kernel_get_error - Get the error number from an RxRPC error message - * @skb: Message indicating an error + * rxrpc_kernel_recv_data - Allow a kernel service to receive data/info + * @sock: The socket that the call exists on + * @call: The call to send data through + * @buf: The buffer to receive into + * @size: The size of the buffer, including data already read + * @_offset: The running offset into the buffer. + * @want_more: True if more data is expected to be read + * @_abort: Where the abort code is stored if -ECONNABORTED is returned * - * Get the error number from an RxRPC error message. + * Allow a kernel service to receive data and pick up information about the + * state of a call. Returns 0 if got what was asked for and there's more + * available, 1 if we got what was asked for and we're at the end of the data + * and -EAGAIN if we need more data. + * + * Note that we may return -EAGAIN to drain empty packets at the end of the + * data, even if we've already copied over the requested data. + * + * This function adds the amount it transfers to *_offset, so this should be + * precleared as appropriate. Note that the amount remaining in the buffer is + * taken to be size - *_offset. + * + * *_abort should also be initialised to 0. */ -int rxrpc_kernel_get_error_number(struct sk_buff *skb) +int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call, + void *buf, size_t size, size_t *_offset, + bool want_more, u32 *_abort) { - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct iov_iter iter; + struct kvec iov; + int ret; - return sp->error; + _enter("{%d,%s},%zu,%d", + call->debug_id, rxrpc_call_states[call->state], size, want_more); + + ASSERTCMP(*_offset, <=, size); + ASSERTCMP(call->state, !=, RXRPC_CALL_SERVER_ACCEPTING); + + iov.iov_base = buf + *_offset; + iov.iov_len = size - *_offset; + iov_iter_kvec(&iter, ITER_KVEC | READ, &iov, 1, size - *_offset); + + lock_sock(sock->sk); + + switch (call->state) { + case RXRPC_CALL_CLIENT_RECV_REPLY: + case RXRPC_CALL_SERVER_RECV_REQUEST: + case RXRPC_CALL_SERVER_ACK_REQUEST: + ret = temp_deliver_data(sock, call, &iter, size, _offset); + if (ret < 0) + goto out; + + /* We can only reach here with a partially full buffer if we + * have reached the end of the data. We must otherwise have a + * full buffer or have been given -EAGAIN. + */ + if (ret == 1) { + if (*_offset < size) + goto short_data; + if (!want_more) + goto read_phase_complete; + ret = 0; + goto out; + } + + if (!want_more) + goto excess_data; + goto out; + + case RXRPC_CALL_COMPLETE: + goto call_complete; + + default: + *_offset = 0; + ret = -EINPROGRESS; + goto out; + } + +read_phase_complete: + ret = 1; +out: + release_sock(sock->sk); + _leave(" = %d [%zu,%d]", ret, *_offset, *_abort); + return ret; + +short_data: + ret = -EBADMSG; + goto out; +excess_data: + ret = -EMSGSIZE; + goto out; +call_complete: + *_abort = call->abort_code; + ret = call->error; + if (call->completion == RXRPC_CALL_SUCCEEDED) { + ret = 1; + if (size > 0) + ret = -ECONNRESET; + } + goto out; } - -EXPORT_SYMBOL(rxrpc_kernel_get_error_number); +EXPORT_SYMBOL(rxrpc_kernel_recv_data); diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c index 20529205bb8c..9752f8b1fdd0 100644 --- a/net/rxrpc/skbuff.c +++ b/net/rxrpc/skbuff.c @@ -127,7 +127,6 @@ void rxrpc_kernel_data_consumed(struct rxrpc_call *call, struct sk_buff *skb) call->rx_data_recv = sp->hdr.seq; rxrpc_hard_ACK_data(call, skb); } -EXPORT_SYMBOL(rxrpc_kernel_data_consumed); /* * Destroy a packet that has an RxRPC control buffer From 66fdd05e7a85564f86d9b220de946aa98e8bc048 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Wed, 31 Aug 2016 11:16:22 +0800 Subject: [PATCH 0645/1715] rps: flow_dissector: Add the const for the parameter of flow_keys_have_l4 Add the const for the parameter of flow_keys_have_l4 for the readability. Signed-off-by: Gao Feng Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index f266b512c3bd..d9534927d93b 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -183,7 +183,7 @@ struct flow_keys_digest { void make_flow_keys_digest(struct flow_keys_digest *digest, const struct flow_keys *flow); -static inline bool flow_keys_have_l4(struct flow_keys *keys) +static inline bool flow_keys_have_l4(const struct flow_keys *keys) { return (keys->ports.ports || keys->tags.flow_label); } From d297653dd6f07afbe7e6c702a4bcd7615680002e Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Tue, 30 Aug 2016 21:56:45 -0700 Subject: [PATCH 0646/1715] rtnetlink: fdb dump: optimize by saving last interface markers fdb dumps spanning multiple skb's currently restart from the first interface again for every skb. This results in unnecessary iterations on the already visited interfaces and their fdb entries. In large scale setups, we have seen this to slow down fdb dumps considerably. On a system with 30k macs we see fdb dumps spanning across more than 300 skbs. To fix the problem, this patch replaces the existing single fdb marker with three markers: netdev hash entries, netdevs and fdb index to continue where we left off instead of restarting from the first netdev. This is consistent with link dumps. In the process of fixing the performance issue, this patch also re-implements fix done by commit 472681d57a5d ("net: ndo_fdb_dump should report -EMSGSIZE to rtnl_fdb_dump") (with an internal fix from Wilson Kok) in the following ways: - change ndo_fdb_dump handlers to return error code instead of the last fdb index - use cb->args strictly for dump frag markers and not error codes. This is consistent with other dump functions. Below results were taken on a system with 1000 netdevs and 35085 fdb entries: before patch: $time bridge fdb show | wc -l 15065 real 1m11.791s user 0m0.070s sys 1m8.395s (existing code does not return all macs) after patch: $time bridge fdb show | wc -l 35085 real 0m2.017s user 0m0.113s sys 0m1.942s Signed-off-by: Roopa Prabhu Signed-off-by: Wilson Kok Signed-off-by: David S. Miller --- .../net/ethernet/qlogic/qlcnic/qlcnic_main.c | 7 +- drivers/net/vxlan.c | 14 +-- include/linux/netdevice.h | 4 +- include/linux/rtnetlink.h | 2 +- include/net/switchdev.h | 4 +- net/bridge/br_fdb.c | 23 ++-- net/bridge/br_private.h | 2 +- net/core/rtnetlink.c | 109 +++++++++++------- net/switchdev/switchdev.c | 10 +- 9 files changed, 100 insertions(+), 75 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index 3ebef27e0964..3ae3968b0edf 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -432,18 +432,19 @@ static int qlcnic_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], static int qlcnic_fdb_dump(struct sk_buff *skb, struct netlink_callback *ncb, struct net_device *netdev, - struct net_device *filter_dev, int idx) + struct net_device *filter_dev, int *idx) { struct qlcnic_adapter *adapter = netdev_priv(netdev); + int err = 0; if (!adapter->fdb_mac_learn) return ndo_dflt_fdb_dump(skb, ncb, netdev, filter_dev, idx); if ((adapter->flags & QLCNIC_ESWITCH_ENABLED) || qlcnic_sriov_check(adapter)) - idx = ndo_dflt_fdb_dump(skb, ncb, netdev, filter_dev, idx); + err = ndo_dflt_fdb_dump(skb, ncb, netdev, filter_dev, idx); - return idx; + return err; } static void qlcnic_82xx_cancel_idc_work(struct qlcnic_adapter *adapter) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 3f7e0d2dd21a..f605a3684a7f 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -860,20 +860,20 @@ out: /* Dump forwarding table */ static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, - struct net_device *filter_dev, int idx) + struct net_device *filter_dev, int *idx) { struct vxlan_dev *vxlan = netdev_priv(dev); unsigned int h; + int err = 0; for (h = 0; h < FDB_HASH_SIZE; ++h) { struct vxlan_fdb *f; - int err; hlist_for_each_entry_rcu(f, &vxlan->fdb_head[h], hlist) { struct vxlan_rdst *rd; list_for_each_entry_rcu(rd, &f->remotes, list) { - if (idx < cb->args[0]) + if (*idx < cb->args[2]) goto skip; err = vxlan_fdb_info(skb, vxlan, f, @@ -881,17 +881,15 @@ static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, cb->nlh->nlmsg_seq, RTM_NEWNEIGH, NLM_F_MULTI, rd); - if (err < 0) { - cb->args[1] = err; + if (err < 0) goto out; - } skip: - ++idx; + *idx += 1; } } } out: - return idx; + return err; } /* Watch incoming packets to learn mapping between Ethernet address diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d122be9345c7..67bb978470dc 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1031,7 +1031,7 @@ struct netdev_xdp { * Deletes the FDB entry from dev coresponding to addr. * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, * struct net_device *dev, struct net_device *filter_dev, - * int idx) + * int *idx) * Used to add FDB entries to dump requests. Implementers should add * entries to skb and update idx with the number of entries. * @@ -1263,7 +1263,7 @@ struct net_device_ops { struct netlink_callback *cb, struct net_device *dev, struct net_device *filter_dev, - int idx); + int *idx); int (*ndo_bridge_setlink)(struct net_device *dev, struct nlmsghdr *nlh, diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 2daece8979f7..57e54847b0b9 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -105,7 +105,7 @@ extern int ndo_dflt_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, struct net_device *filter_dev, - int idx); + int *idx); extern int ndo_dflt_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 82f5e0462021..6279f2f179ec 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -222,7 +222,7 @@ int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], u16 vid); int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, - struct net_device *filter_dev, int idx); + struct net_device *filter_dev, int *idx); void switchdev_port_fwd_mark_set(struct net_device *dev, struct net_device *group_dev, bool joining); @@ -342,7 +342,7 @@ static inline int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, struct net_device *filter_dev, - int idx) + int *idx) { return idx; } diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index cd620fab41b0..6b43c8c88f19 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -710,24 +710,27 @@ int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, struct net_device *filter_dev, - int idx) + int *idx) { struct net_bridge *br = netdev_priv(dev); + int err = 0; int i; if (!(dev->priv_flags & IFF_EBRIDGE)) goto out; - if (!filter_dev) - idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx); + if (!filter_dev) { + err = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx); + if (err < 0) + goto out; + } for (i = 0; i < BR_HASH_SIZE; i++) { struct net_bridge_fdb_entry *f; hlist_for_each_entry_rcu(f, &br->hash[i], hlist) { - int err; - if (idx < cb->args[0]) + if (*idx < cb->args[2]) goto skip; if (filter_dev && @@ -750,17 +753,15 @@ int br_fdb_dump(struct sk_buff *skb, cb->nlh->nlmsg_seq, RTM_NEWNEIGH, NLM_F_MULTI); - if (err < 0) { - cb->args[1] = err; - break; - } + if (err < 0) + goto out; skip: - ++idx; + *idx += 1; } } out: - return idx; + return err; } /* Update (create or replace) forwarding database entry */ diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 2379b2b865c9..3d36493f4487 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -508,7 +508,7 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, u16 nlh_flags); int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, - struct net_device *dev, struct net_device *fdev, int idx); + struct net_device *dev, struct net_device *fdev, int *idx); int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p); void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p); int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 318fc5231b2b..1dfca1c3f8f5 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3068,7 +3068,7 @@ static int nlmsg_populate_fdb(struct sk_buff *skb, seq = cb->nlh->nlmsg_seq; list_for_each_entry(ha, &list->list, list) { - if (*idx < cb->args[0]) + if (*idx < cb->args[2]) goto skip; err = nlmsg_populate_fdb_fill(skb, dev, ha->addr, 0, @@ -3095,19 +3095,18 @@ int ndo_dflt_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, struct net_device *filter_dev, - int idx) + int *idx) { int err; netif_addr_lock_bh(dev); - err = nlmsg_populate_fdb(skb, cb, dev, &idx, &dev->uc); + err = nlmsg_populate_fdb(skb, cb, dev, idx, &dev->uc); if (err) goto out; - nlmsg_populate_fdb(skb, cb, dev, &idx, &dev->mc); + nlmsg_populate_fdb(skb, cb, dev, idx, &dev->mc); out: netif_addr_unlock_bh(dev); - cb->args[1] = err; - return idx; + return err; } EXPORT_SYMBOL(ndo_dflt_fdb_dump); @@ -3120,9 +3119,13 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) const struct net_device_ops *cops = NULL; struct ifinfomsg *ifm = nlmsg_data(cb->nlh); struct net *net = sock_net(skb->sk); + struct hlist_head *head; int brport_idx = 0; int br_idx = 0; - int idx = 0; + int h, s_h; + int idx = 0, s_idx; + int err = 0; + int fidx = 0; if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX, ifla_policy) == 0) { @@ -3140,49 +3143,71 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) ops = br_dev->netdev_ops; } - cb->args[1] = 0; - for_each_netdev(net, dev) { - if (brport_idx && (dev->ifindex != brport_idx)) - continue; + s_h = cb->args[0]; + s_idx = cb->args[1]; - if (!br_idx) { /* user did not specify a specific bridge */ - if (dev->priv_flags & IFF_BRIDGE_PORT) { - br_dev = netdev_master_upper_dev_get(dev); - cops = br_dev->netdev_ops; + for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { + idx = 0; + head = &net->dev_index_head[h]; + hlist_for_each_entry(dev, head, index_hlist) { + + if (brport_idx && (dev->ifindex != brport_idx)) + continue; + + if (!br_idx) { /* user did not specify a specific bridge */ + if (dev->priv_flags & IFF_BRIDGE_PORT) { + br_dev = netdev_master_upper_dev_get(dev); + cops = br_dev->netdev_ops; + } + } else { + if (dev != br_dev && + !(dev->priv_flags & IFF_BRIDGE_PORT)) + continue; + + if (br_dev != netdev_master_upper_dev_get(dev) && + !(dev->priv_flags & IFF_EBRIDGE)) + continue; + cops = ops; } - } else { - if (dev != br_dev && - !(dev->priv_flags & IFF_BRIDGE_PORT)) - continue; + if (idx < s_idx) + goto cont; - if (br_dev != netdev_master_upper_dev_get(dev) && - !(dev->priv_flags & IFF_EBRIDGE)) - continue; + if (dev->priv_flags & IFF_BRIDGE_PORT) { + if (cops && cops->ndo_fdb_dump) { + err = cops->ndo_fdb_dump(skb, cb, + br_dev, dev, + &fidx); + if (err == -EMSGSIZE) + goto out; + } + } - cops = ops; + if (dev->netdev_ops->ndo_fdb_dump) + err = dev->netdev_ops->ndo_fdb_dump(skb, cb, + dev, NULL, + &fidx); + else + err = ndo_dflt_fdb_dump(skb, cb, dev, NULL, + &fidx); + if (err == -EMSGSIZE) + goto out; + + cops = NULL; + + /* reset fdb offset to 0 for rest of the interfaces */ + cb->args[2] = 0; + fidx = 0; +cont: + idx++; } - - if (dev->priv_flags & IFF_BRIDGE_PORT) { - if (cops && cops->ndo_fdb_dump) - idx = cops->ndo_fdb_dump(skb, cb, br_dev, dev, - idx); - } - if (cb->args[1] == -EMSGSIZE) - break; - - if (dev->netdev_ops->ndo_fdb_dump) - idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, NULL, - idx); - else - idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx); - if (cb->args[1] == -EMSGSIZE) - break; - - cops = NULL; } - cb->args[0] = idx; +out: + cb->args[0] = h; + cb->args[1] = idx; + cb->args[2] = fidx; + return skb->len; } diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 1031a0327fff..10b819308439 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -1042,7 +1042,7 @@ static int switchdev_port_fdb_dump_cb(struct switchdev_obj *obj) struct nlmsghdr *nlh; struct ndmsg *ndm; - if (dump->idx < dump->cb->args[0]) + if (dump->idx < dump->cb->args[2]) goto skip; nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH, @@ -1089,7 +1089,7 @@ nla_put_failure: */ int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, - struct net_device *filter_dev, int idx) + struct net_device *filter_dev, int *idx) { struct switchdev_fdb_dump dump = { .fdb.obj.orig_dev = dev, @@ -1097,14 +1097,14 @@ int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, .dev = dev, .skb = skb, .cb = cb, - .idx = idx, + .idx = *idx, }; int err; err = switchdev_port_obj_dump(dev, &dump.fdb.obj, switchdev_port_fdb_dump_cb); - cb->args[1] = err; - return dump.idx; + *idx = dump.idx; + return err; } EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump); From 8addd5e7d3a5c118a214a7794ae299787198aa25 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 31 Aug 2016 15:36:51 +0200 Subject: [PATCH 0647/1715] net: bridge: change unicast boolean to exact pkt_type Remove the unicast flag and introduce an exact pkt_type. That would help us for the upcoming per-port multicast flood flag and also slightly reduce the tests in the input fast path. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_device.c | 8 ++++---- net/bridge/br_forward.c | 4 ++-- net/bridge/br_input.c | 40 +++++++++++++++++++++++++--------------- net/bridge/br_private.h | 7 ++++++- 4 files changed, 37 insertions(+), 22 deletions(-) diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 09f26940aba5..89a687f3c0a3 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -62,10 +62,10 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) goto out; if (is_broadcast_ether_addr(dest)) { - br_flood(br, skb, false, false, true); + br_flood(br, skb, BR_PKT_BROADCAST, false, true); } else if (is_multicast_ether_addr(dest)) { if (unlikely(netpoll_tx_running(dev))) { - br_flood(br, skb, false, false, true); + br_flood(br, skb, BR_PKT_MULTICAST, false, true); goto out; } if (br_multicast_rcv(br, NULL, skb, vid)) { @@ -78,11 +78,11 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) br_multicast_querier_exists(br, eth_hdr(skb))) br_multicast_flood(mdst, skb, false, true); else - br_flood(br, skb, false, false, true); + br_flood(br, skb, BR_PKT_MULTICAST, false, true); } else if ((dst = __br_fdb_get(br, dest, vid)) != NULL) { br_forward(dst->dst, skb, false, true); } else { - br_flood(br, skb, true, false, true); + br_flood(br, skb, BR_PKT_UNICAST, false, true); } out: rcu_read_unlock(); diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 32a02de39cd2..5de854ed3340 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -176,7 +176,7 @@ out: /* called under rcu_read_lock */ void br_flood(struct net_bridge *br, struct sk_buff *skb, - bool unicast, bool local_rcv, bool local_orig) + enum br_pkt_type pkt_type, bool local_rcv, bool local_orig) { u8 igmp_type = br_multicast_igmp_type(skb); struct net_bridge_port *prev = NULL; @@ -184,7 +184,7 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb, list_for_each_entry_rcu(p, &br->port_list, list) { /* Do not flood unicast traffic to ports that turn it off */ - if (unicast && !(p->flags & BR_FLOOD)) + if (pkt_type == BR_PKT_UNICAST && !(p->flags & BR_FLOOD)) continue; /* Do not flood to ports that enable proxy ARP */ diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 3132cfc80e9d..8a4368461fb0 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -131,11 +131,12 @@ static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br, /* note: already called with rcu_read_lock */ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { - bool local_rcv = false, mcast_hit = false, unicast = true; struct net_bridge_port *p = br_port_get_rcu(skb->dev); const unsigned char *dest = eth_hdr(skb)->h_dest; + enum br_pkt_type pkt_type = BR_PKT_UNICAST; struct net_bridge_fdb_entry *dst = NULL; struct net_bridge_mdb_entry *mdst; + bool local_rcv, mcast_hit = false; struct net_bridge *br; u16 vid = 0; @@ -152,24 +153,29 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb if (p->flags & BR_LEARNING) br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, false); - if (!is_broadcast_ether_addr(dest) && is_multicast_ether_addr(dest) && - br_multicast_rcv(br, p, skb, vid)) - goto drop; + local_rcv = !!(br->dev->flags & IFF_PROMISC); + if (is_multicast_ether_addr(dest)) { + /* by definition the broadcast is also a multicast address */ + if (is_broadcast_ether_addr(dest)) { + pkt_type = BR_PKT_BROADCAST; + local_rcv = true; + } else { + pkt_type = BR_PKT_MULTICAST; + if (br_multicast_rcv(br, p, skb, vid)) + goto drop; + } + } if (p->state == BR_STATE_LEARNING) goto drop; BR_INPUT_SKB_CB(skb)->brdev = br->dev; - local_rcv = !!(br->dev->flags & IFF_PROMISC); - if (IS_ENABLED(CONFIG_INET) && skb->protocol == htons(ETH_P_ARP)) br_do_proxy_arp(skb, br, vid, p); - if (is_broadcast_ether_addr(dest)) { - local_rcv = true; - unicast = false; - } else if (is_multicast_ether_addr(dest)) { + switch (pkt_type) { + case BR_PKT_MULTICAST: mdst = br_mdb_get(br, skb, vid); if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) && br_multicast_querier_exists(br, eth_hdr(skb))) { @@ -183,18 +189,22 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb local_rcv = true; br->dev->stats.multicast++; } - unicast = false; - } else if ((dst = __br_fdb_get(br, dest, vid)) && dst->is_local) { - /* Do not forward the packet since it's local. */ - return br_pass_frame_up(skb); + break; + case BR_PKT_UNICAST: + dst = __br_fdb_get(br, dest, vid); + default: + break; } if (dst) { + if (dst->is_local) + return br_pass_frame_up(skb); + dst->used = jiffies; br_forward(dst->dst, skb, local_rcv, false); } else { if (!mcast_hit) - br_flood(br, skb, unicast, local_rcv, false); + br_flood(br, skb, pkt_type, local_rcv, false); else br_multicast_flood(mdst, skb, local_rcv, false); } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 3d36493f4487..1b63177e0ccd 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -517,12 +517,17 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, u16 vid); /* br_forward.c */ +enum br_pkt_type { + BR_PKT_UNICAST, + BR_PKT_MULTICAST, + BR_PKT_BROADCAST +}; int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb); void br_forward(const struct net_bridge_port *to, struct sk_buff *skb, bool local_rcv, bool local_orig); int br_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb); void br_flood(struct net_bridge *br, struct sk_buff *skb, - bool unicast, bool local_rcv, bool local_orig); + enum br_pkt_type pkt_type, bool local_rcv, bool local_orig); /* br_if.c */ void br_port_carrier_check(struct net_bridge_port *p); From b6cb5ac8331b6bcfe9ce38c7f7f58db6e1d6270a Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 31 Aug 2016 15:36:52 +0200 Subject: [PATCH 0648/1715] net: bridge: add per-port multicast flood flag Add a per-port flag to control the unknown multicast flood, similar to the unknown unicast flood flag and break a few long lines in the netlink flag exports. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/if_bridge.h | 1 + include/uapi/linux/if_link.h | 1 + net/bridge/br_forward.c | 3 +++ net/bridge/br_if.c | 2 +- net/bridge/br_netlink.c | 12 +++++++++--- net/bridge/br_sysfs_if.c | 1 + 6 files changed, 16 insertions(+), 4 deletions(-) diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index dcb89e3515db..c6587c01d951 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -45,6 +45,7 @@ struct br_ip_list { #define BR_PROXYARP BIT(8) #define BR_LEARNING_SYNC BIT(9) #define BR_PROXYARP_WIFI BIT(10) +#define BR_MCAST_FLOOD BIT(11) #define BR_DEFAULT_AGEING_TIME (300 * HZ) diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index a1b5202c5f6b..9bf3aecfe05b 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -318,6 +318,7 @@ enum { IFLA_BRPORT_FLUSH, IFLA_BRPORT_MULTICAST_ROUTER, IFLA_BRPORT_PAD, + IFLA_BRPORT_MCAST_FLOOD, __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 5de854ed3340..7cb41aee4c82 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -186,6 +186,9 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb, /* Do not flood unicast traffic to ports that turn it off */ if (pkt_type == BR_PKT_UNICAST && !(p->flags & BR_FLOOD)) continue; + if (pkt_type == BR_PKT_MULTICAST && + !(p->flags & BR_MCAST_FLOOD)) + continue; /* Do not flood to ports that enable proxy ARP */ if (p->flags & BR_PROXYARP) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 1da3221845f1..ed0dd3340084 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -362,7 +362,7 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br, p->path_cost = port_cost(dev); p->priority = 0x8000 >> BR_PORT_BITS; p->port_no = index; - p->flags = BR_LEARNING | BR_FLOOD; + p->flags = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD; br_init_port(p); br_set_state(p, BR_STATE_DISABLED); br_stp_port_timer_init(p); diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 190a5bc00f4a..e99037c6f7b7 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -169,10 +169,15 @@ static int br_port_fill_attrs(struct sk_buff *skb, nla_put_u32(skb, IFLA_BRPORT_COST, p->path_cost) || nla_put_u8(skb, IFLA_BRPORT_MODE, mode) || nla_put_u8(skb, IFLA_BRPORT_GUARD, !!(p->flags & BR_BPDU_GUARD)) || - nla_put_u8(skb, IFLA_BRPORT_PROTECT, !!(p->flags & BR_ROOT_BLOCK)) || - nla_put_u8(skb, IFLA_BRPORT_FAST_LEAVE, !!(p->flags & BR_MULTICAST_FAST_LEAVE)) || + nla_put_u8(skb, IFLA_BRPORT_PROTECT, + !!(p->flags & BR_ROOT_BLOCK)) || + nla_put_u8(skb, IFLA_BRPORT_FAST_LEAVE, + !!(p->flags & BR_MULTICAST_FAST_LEAVE)) || nla_put_u8(skb, IFLA_BRPORT_LEARNING, !!(p->flags & BR_LEARNING)) || - nla_put_u8(skb, IFLA_BRPORT_UNICAST_FLOOD, !!(p->flags & BR_FLOOD)) || + nla_put_u8(skb, IFLA_BRPORT_UNICAST_FLOOD, + !!(p->flags & BR_FLOOD)) || + nla_put_u8(skb, IFLA_BRPORT_MCAST_FLOOD, + !!(p->flags & BR_MCAST_FLOOD)) || nla_put_u8(skb, IFLA_BRPORT_PROXYARP, !!(p->flags & BR_PROXYARP)) || nla_put_u8(skb, IFLA_BRPORT_PROXYARP_WIFI, !!(p->flags & BR_PROXYARP_WIFI)) || @@ -630,6 +635,7 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) br_set_port_flag(p, tb, IFLA_BRPORT_PROTECT, BR_ROOT_BLOCK); br_set_port_flag(p, tb, IFLA_BRPORT_LEARNING, BR_LEARNING); br_set_port_flag(p, tb, IFLA_BRPORT_UNICAST_FLOOD, BR_FLOOD); + br_set_port_flag(p, tb, IFLA_BRPORT_MCAST_FLOOD, BR_MCAST_FLOOD); br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP, BR_PROXYARP); br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP_WIFI, BR_PROXYARP_WIFI); diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index 1e04d4d44273..e657258e1f2c 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -171,6 +171,7 @@ BRPORT_ATTR_FLAG(learning, BR_LEARNING); BRPORT_ATTR_FLAG(unicast_flood, BR_FLOOD); BRPORT_ATTR_FLAG(proxyarp, BR_PROXYARP); BRPORT_ATTR_FLAG(proxyarp_wifi, BR_PROXYARP_WIFI); +BRPORT_ATTR_FLAG(multicast_flood, BR_MCAST_FLOOD); #ifdef CONFIG_BRIDGE_IGMP_SNOOPING static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf) From 04bed1434df256b07e78fa5deae848bba18a90f3 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Wed, 31 Aug 2016 18:06:13 -0400 Subject: [PATCH 0649/1715] net: dsa: remove ds_to_priv Access the priv member of the dsa_switch structure directly, instead of having an unnecessary helper. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 42 +++++++++---------- drivers/net/dsa/bcm_sf2.h | 2 +- drivers/net/dsa/mv88e6060.c | 4 +- drivers/net/dsa/mv88e6xxx/chip.c | 72 ++++++++++++++++---------------- include/net/dsa.h | 5 --- 5 files changed, 60 insertions(+), 65 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 1299104a87d4..0afc2e5a04dc 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -477,7 +477,7 @@ static int b53_fast_age_vlan(struct b53_device *dev, u16 vid) static void b53_imp_vlan_setup(struct dsa_switch *ds, int cpu_port) { - struct b53_device *dev = ds_to_priv(ds); + struct b53_device *dev = ds->priv; unsigned int i; u16 pvlan; @@ -495,7 +495,7 @@ static void b53_imp_vlan_setup(struct dsa_switch *ds, int cpu_port) static int b53_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy) { - struct b53_device *dev = ds_to_priv(ds); + struct b53_device *dev = ds->priv; unsigned int cpu_port = dev->cpu_port; u16 pvlan; @@ -520,7 +520,7 @@ static int b53_enable_port(struct dsa_switch *ds, int port, static void b53_disable_port(struct dsa_switch *ds, int port, struct phy_device *phy) { - struct b53_device *dev = ds_to_priv(ds); + struct b53_device *dev = ds->priv; u8 reg; /* Disable Tx/Rx for the port */ @@ -629,7 +629,7 @@ static int b53_switch_reset(struct b53_device *dev) static int b53_phy_read16(struct dsa_switch *ds, int addr, int reg) { - struct b53_device *priv = ds_to_priv(ds); + struct b53_device *priv = ds->priv; u16 value = 0; int ret; @@ -644,7 +644,7 @@ static int b53_phy_read16(struct dsa_switch *ds, int addr, int reg) static int b53_phy_write16(struct dsa_switch *ds, int addr, int reg, u16 val) { - struct b53_device *priv = ds_to_priv(ds); + struct b53_device *priv = ds->priv; if (priv->ops->phy_write16) return priv->ops->phy_write16(priv, addr, reg, val); @@ -714,7 +714,7 @@ static unsigned int b53_get_mib_size(struct b53_device *dev) static void b53_get_strings(struct dsa_switch *ds, int port, uint8_t *data) { - struct b53_device *dev = ds_to_priv(ds); + struct b53_device *dev = ds->priv; const struct b53_mib_desc *mibs = b53_get_mib(dev); unsigned int mib_size = b53_get_mib_size(dev); unsigned int i; @@ -727,7 +727,7 @@ static void b53_get_strings(struct dsa_switch *ds, int port, uint8_t *data) static void b53_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data) { - struct b53_device *dev = ds_to_priv(ds); + struct b53_device *dev = ds->priv; const struct b53_mib_desc *mibs = b53_get_mib(dev); unsigned int mib_size = b53_get_mib_size(dev); const struct b53_mib_desc *s; @@ -759,7 +759,7 @@ static void b53_get_ethtool_stats(struct dsa_switch *ds, int port, static int b53_get_sset_count(struct dsa_switch *ds) { - struct b53_device *dev = ds_to_priv(ds); + struct b53_device *dev = ds->priv; return b53_get_mib_size(dev); } @@ -771,7 +771,7 @@ static int b53_set_addr(struct dsa_switch *ds, u8 *addr) static int b53_setup(struct dsa_switch *ds) { - struct b53_device *dev = ds_to_priv(ds); + struct b53_device *dev = ds->priv; unsigned int port; int ret; @@ -802,7 +802,7 @@ static int b53_setup(struct dsa_switch *ds) static void b53_adjust_link(struct dsa_switch *ds, int port, struct phy_device *phydev) { - struct b53_device *dev = ds_to_priv(ds); + struct b53_device *dev = ds->priv; u8 rgmii_ctrl = 0, reg = 0, off; if (!phy_is_pseudo_fixed_link(phydev)) @@ -936,7 +936,7 @@ static int b53_vlan_prepare(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan, struct switchdev_trans *trans) { - struct b53_device *dev = ds_to_priv(ds); + struct b53_device *dev = ds->priv; if ((is5325(dev) || is5365(dev)) && vlan->vid_begin == 0) return -EOPNOTSUPP; @@ -953,7 +953,7 @@ static void b53_vlan_add(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan, struct switchdev_trans *trans) { - struct b53_device *dev = ds_to_priv(ds); + struct b53_device *dev = ds->priv; bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; unsigned int cpu_port = dev->cpu_port; @@ -987,7 +987,7 @@ static void b53_vlan_add(struct dsa_switch *ds, int port, static int b53_vlan_del(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan) { - struct b53_device *dev = ds_to_priv(ds); + struct b53_device *dev = ds->priv; bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; unsigned int cpu_port = dev->cpu_port; struct b53_vlan *vl; @@ -1033,7 +1033,7 @@ static int b53_vlan_dump(struct dsa_switch *ds, int port, struct switchdev_obj_port_vlan *vlan, int (*cb)(struct switchdev_obj *obj)) { - struct b53_device *dev = ds_to_priv(ds); + struct b53_device *dev = ds->priv; u16 vid, vid_start = 0, pvid; struct b53_vlan *vl; int err = 0; @@ -1192,7 +1192,7 @@ static int b53_fdb_prepare(struct dsa_switch *ds, int port, const struct switchdev_obj_port_fdb *fdb, struct switchdev_trans *trans) { - struct b53_device *priv = ds_to_priv(ds); + struct b53_device *priv = ds->priv; /* 5325 and 5365 require some more massaging, but could * be supported eventually @@ -1207,7 +1207,7 @@ static void b53_fdb_add(struct dsa_switch *ds, int port, const struct switchdev_obj_port_fdb *fdb, struct switchdev_trans *trans) { - struct b53_device *priv = ds_to_priv(ds); + struct b53_device *priv = ds->priv; if (b53_arl_op(priv, 0, port, fdb->addr, fdb->vid, true)) pr_err("%s: failed to add MAC address\n", __func__); @@ -1216,7 +1216,7 @@ static void b53_fdb_add(struct dsa_switch *ds, int port, static int b53_fdb_del(struct dsa_switch *ds, int port, const struct switchdev_obj_port_fdb *fdb) { - struct b53_device *priv = ds_to_priv(ds); + struct b53_device *priv = ds->priv; return b53_arl_op(priv, 0, port, fdb->addr, fdb->vid, false); } @@ -1275,7 +1275,7 @@ static int b53_fdb_dump(struct dsa_switch *ds, int port, struct switchdev_obj_port_fdb *fdb, int (*cb)(struct switchdev_obj *obj)) { - struct b53_device *priv = ds_to_priv(ds); + struct b53_device *priv = ds->priv; struct net_device *dev = ds->ports[port].netdev; struct b53_arl_entry results[2]; unsigned int count = 0; @@ -1314,7 +1314,7 @@ static int b53_fdb_dump(struct dsa_switch *ds, int port, static int b53_br_join(struct dsa_switch *ds, int port, struct net_device *bridge) { - struct b53_device *dev = ds_to_priv(ds); + struct b53_device *dev = ds->priv; s8 cpu_port = ds->dst->cpu_port; u16 pvlan, reg; unsigned int i; @@ -1359,7 +1359,7 @@ static int b53_br_join(struct dsa_switch *ds, int port, static void b53_br_leave(struct dsa_switch *ds, int port) { - struct b53_device *dev = ds_to_priv(ds); + struct b53_device *dev = ds->priv; struct net_device *bridge = dev->ports[port].bridge_dev; struct b53_vlan *vl = &dev->vlans[0]; s8 cpu_port = ds->dst->cpu_port; @@ -1410,7 +1410,7 @@ static void b53_br_leave(struct dsa_switch *ds, int port) static void b53_br_set_stp_state(struct dsa_switch *ds, int port, u8 state) { - struct b53_device *dev = ds_to_priv(ds); + struct b53_device *dev = ds->priv; u8 hw_state, cur_hw_state; u8 reg; diff --git a/drivers/net/dsa/bcm_sf2.h b/drivers/net/dsa/bcm_sf2.h index afe56686b3d7..46c4ea796574 100644 --- a/drivers/net/dsa/bcm_sf2.h +++ b/drivers/net/dsa/bcm_sf2.h @@ -101,7 +101,7 @@ struct bcm_sf2_priv { static inline struct bcm_sf2_priv *bcm_sf2_to_priv(struct dsa_switch *ds) { - struct b53_device *dev = ds_to_priv(ds); + struct b53_device *dev = ds->priv; return dev->priv; } diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c index 7ff9d373a9ee..7ce36dbd9b62 100644 --- a/drivers/net/dsa/mv88e6060.c +++ b/drivers/net/dsa/mv88e6060.c @@ -19,7 +19,7 @@ static int reg_read(struct dsa_switch *ds, int addr, int reg) { - struct mv88e6060_priv *priv = ds_to_priv(ds); + struct mv88e6060_priv *priv = ds->priv; return mdiobus_read_nested(priv->bus, priv->sw_addr + addr, reg); } @@ -37,7 +37,7 @@ static int reg_read(struct dsa_switch *ds, int addr, int reg) static int reg_write(struct dsa_switch *ds, int addr, int reg, u16 val) { - struct mv88e6060_priv *priv = ds_to_priv(ds); + struct mv88e6060_priv *priv = ds->priv; return mdiobus_write_nested(priv->bus, priv->sw_addr + addr, reg, val); } diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index e2a953e7f5f5..d6b0f78e9598 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -586,7 +586,7 @@ static bool mv88e6xxx_has_fid_reg(struct mv88e6xxx_chip *chip) static void mv88e6xxx_adjust_link(struct dsa_switch *ds, int port, struct phy_device *phydev) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; u32 reg; int ret; @@ -832,7 +832,7 @@ static uint64_t _mv88e6xxx_get_ethtool_stat(struct mv88e6xxx_chip *chip, static void mv88e6xxx_get_strings(struct dsa_switch *ds, int port, uint8_t *data) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; struct mv88e6xxx_hw_stat *stat; int i, j; @@ -848,7 +848,7 @@ static void mv88e6xxx_get_strings(struct dsa_switch *ds, int port, static int mv88e6xxx_get_sset_count(struct dsa_switch *ds) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; struct mv88e6xxx_hw_stat *stat; int i, j; @@ -863,7 +863,7 @@ static int mv88e6xxx_get_sset_count(struct dsa_switch *ds) static void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; struct mv88e6xxx_hw_stat *stat; int ret; int i, j; @@ -894,7 +894,7 @@ static int mv88e6xxx_get_regs_len(struct dsa_switch *ds, int port) static void mv88e6xxx_get_regs(struct dsa_switch *ds, int port, struct ethtool_regs *regs, void *_p) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; u16 *p = _p; int i; @@ -924,7 +924,7 @@ static int _mv88e6xxx_atu_wait(struct mv88e6xxx_chip *chip) static int mv88e6xxx_get_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; u16 reg; int err; @@ -954,7 +954,7 @@ out: static int mv88e6xxx_set_eee(struct dsa_switch *ds, int port, struct phy_device *phydev, struct ethtool_eee *e) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; u16 reg; int err; @@ -1185,7 +1185,7 @@ static int _mv88e6xxx_port_based_vlan_map(struct mv88e6xxx_chip *chip, int port) static void mv88e6xxx_port_stp_state_set(struct dsa_switch *ds, int port, u8 state) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; int stp_state; int err; @@ -1434,7 +1434,7 @@ static int mv88e6xxx_port_vlan_dump(struct dsa_switch *ds, int port, struct switchdev_obj_port_vlan *vlan, int (*cb)(struct switchdev_obj *obj)) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; struct mv88e6xxx_vtu_stu_entry next; u16 pvid; int err; @@ -1803,7 +1803,7 @@ static int _mv88e6xxx_vtu_get(struct mv88e6xxx_chip *chip, u16 vid, static int mv88e6xxx_port_check_hw_vlan(struct dsa_switch *ds, int port, u16 vid_begin, u16 vid_end) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; struct mv88e6xxx_vtu_stu_entry vlan; int i, err; @@ -1864,7 +1864,7 @@ static const char * const mv88e6xxx_port_8021q_mode_names[] = { static int mv88e6xxx_port_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; u16 old, new = vlan_filtering ? PORT_CONTROL_2_8021Q_SECURE : PORT_CONTROL_2_8021Q_DISABLED; int ret; @@ -1906,7 +1906,7 @@ mv88e6xxx_port_vlan_prepare(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan, struct switchdev_trans *trans) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; int err; if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_VTU)) @@ -1947,7 +1947,7 @@ static void mv88e6xxx_port_vlan_add(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan, struct switchdev_trans *trans) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; u16 vid; @@ -2009,7 +2009,7 @@ static int _mv88e6xxx_port_vlan_del(struct mv88e6xxx_chip *chip, static int mv88e6xxx_port_vlan_del(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; u16 pvid, vid; int err = 0; @@ -2134,7 +2134,7 @@ static void mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port, const struct switchdev_obj_port_fdb *fdb, struct switchdev_trans *trans) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; mutex_lock(&chip->reg_lock); if (mv88e6xxx_port_db_load_purge(chip, port, fdb->addr, fdb->vid, @@ -2146,7 +2146,7 @@ static void mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port, static int mv88e6xxx_port_fdb_del(struct dsa_switch *ds, int port, const struct switchdev_obj_port_fdb *fdb) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; int err; mutex_lock(&chip->reg_lock); @@ -2306,7 +2306,7 @@ static int mv88e6xxx_port_fdb_dump(struct dsa_switch *ds, int port, struct switchdev_obj_port_fdb *fdb, int (*cb)(struct switchdev_obj *obj)) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; int err; mutex_lock(&chip->reg_lock); @@ -2319,7 +2319,7 @@ static int mv88e6xxx_port_fdb_dump(struct dsa_switch *ds, int port, static int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port, struct net_device *bridge) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; int i, err = 0; mutex_lock(&chip->reg_lock); @@ -2342,7 +2342,7 @@ static int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port, static void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, int port) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; struct net_device *bridge = chip->ports[port].bridge_dev; int i; @@ -2763,7 +2763,7 @@ static int mv88e6xxx_g1_set_age_time(struct mv88e6xxx_chip *chip, static int mv88e6xxx_set_ageing_time(struct dsa_switch *ds, unsigned int ageing_time) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; int err; mutex_lock(&chip->reg_lock); @@ -3204,7 +3204,7 @@ static int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip) static int mv88e6xxx_setup(struct dsa_switch *ds) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; int err; int i; @@ -3244,7 +3244,7 @@ unlock: static int mv88e6xxx_set_addr(struct dsa_switch *ds, u8 *addr) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; int err; mutex_lock(&chip->reg_lock); @@ -3352,7 +3352,7 @@ static void mv88e6xxx_mdio_unregister(struct mv88e6xxx_chip *chip) static int mv88e61xx_get_temp(struct dsa_switch *ds, int *temp) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; u16 val; int ret; @@ -3395,7 +3395,7 @@ error: static int mv88e63xx_get_temp(struct dsa_switch *ds, int *temp) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; int phy = mv88e6xxx_6320_family(chip) ? 3 : 0; u16 val; int ret; @@ -3415,7 +3415,7 @@ static int mv88e63xx_get_temp(struct dsa_switch *ds, int *temp) static int mv88e6xxx_get_temp(struct dsa_switch *ds, int *temp) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_TEMP)) return -EOPNOTSUPP; @@ -3428,7 +3428,7 @@ static int mv88e6xxx_get_temp(struct dsa_switch *ds, int *temp) static int mv88e6xxx_get_temp_limit(struct dsa_switch *ds, int *temp) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; int phy = mv88e6xxx_6320_family(chip) ? 3 : 0; u16 val; int ret; @@ -3451,7 +3451,7 @@ static int mv88e6xxx_get_temp_limit(struct dsa_switch *ds, int *temp) static int mv88e6xxx_set_temp_limit(struct dsa_switch *ds, int temp) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; int phy = mv88e6xxx_6320_family(chip) ? 3 : 0; u16 val; int err; @@ -3474,7 +3474,7 @@ unlock: static int mv88e6xxx_get_temp_alarm(struct dsa_switch *ds, bool *alarm) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; int phy = mv88e6xxx_6320_family(chip) ? 3 : 0; u16 val; int ret; @@ -3498,7 +3498,7 @@ static int mv88e6xxx_get_temp_alarm(struct dsa_switch *ds, bool *alarm) static int mv88e6xxx_get_eeprom_len(struct dsa_switch *ds) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; return chip->eeprom_len; } @@ -3556,7 +3556,7 @@ static int mv88e6xxx_get_eeprom16(struct mv88e6xxx_chip *chip, static int mv88e6xxx_get_eeprom(struct dsa_switch *ds, struct ethtool_eeprom *eeprom, u8 *data) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; int err; mutex_lock(&chip->reg_lock); @@ -3645,7 +3645,7 @@ static int mv88e6xxx_set_eeprom16(struct mv88e6xxx_chip *chip, static int mv88e6xxx_set_eeprom(struct dsa_switch *ds, struct ethtool_eeprom *eeprom, u8 *data) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; int err; if (eeprom->magic != 0xc3ec4951) @@ -3953,7 +3953,7 @@ static int mv88e6xxx_smi_init(struct mv88e6xxx_chip *chip, static enum dsa_tag_protocol mv88e6xxx_get_tag_protocol(struct dsa_switch *ds) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_EDSA)) return DSA_TAG_PROTO_EDSA; @@ -4018,7 +4018,7 @@ static void mv88e6xxx_port_mdb_add(struct dsa_switch *ds, int port, const struct switchdev_obj_port_mdb *mdb, struct switchdev_trans *trans) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; mutex_lock(&chip->reg_lock); if (mv88e6xxx_port_db_load_purge(chip, port, mdb->addr, mdb->vid, @@ -4030,7 +4030,7 @@ static void mv88e6xxx_port_mdb_add(struct dsa_switch *ds, int port, static int mv88e6xxx_port_mdb_del(struct dsa_switch *ds, int port, const struct switchdev_obj_port_mdb *mdb) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; int err; mutex_lock(&chip->reg_lock); @@ -4045,7 +4045,7 @@ static int mv88e6xxx_port_mdb_dump(struct dsa_switch *ds, int port, struct switchdev_obj_port_mdb *mdb, int (*cb)(struct switchdev_obj *obj)) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; int err; mutex_lock(&chip->reg_lock); @@ -4173,7 +4173,7 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev) static void mv88e6xxx_remove(struct mdio_device *mdiodev) { struct dsa_switch *ds = dev_get_drvdata(&mdiodev->dev); - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; mv88e6xxx_phy_destroy(chip); mv88e6xxx_unregister_switch(chip); diff --git a/include/net/dsa.h b/include/net/dsa.h index e3eb230b970d..9d97c5214341 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -391,11 +391,6 @@ void register_switch_driver(struct dsa_switch_ops *type); void unregister_switch_driver(struct dsa_switch_ops *type); struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev); -static inline void *ds_to_priv(struct dsa_switch *ds) -{ - return ds->priv; -} - static inline bool dsa_uses_tagged_protocol(struct dsa_switch_tree *dst) { return dst->rcv != NULL; From b9b17debc69d27cd55e21ee51a5ba7fc50a426cf Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Wed, 31 Aug 2016 18:22:08 -0500 Subject: [PATCH 0650/1715] net: emac: emac gigabit ethernet controller driver Add support for the Qualcomm Technologies, Inc. EMAC gigabit Ethernet controller. This driver supports the following features: 1) Checksum offload. 2) Interrupt coalescing support. 3) SGMII phy. 4) phylib interface for external phy Based on original work by Niranjana Vishwanathapura Gilad Avidov Signed-off-by: Timur Tabi Signed-off-by: David S. Miller --- .../devicetree/bindings/net/qcom-emac.txt | 111 ++ MAINTAINERS | 6 + drivers/net/ethernet/qualcomm/Kconfig | 12 + drivers/net/ethernet/qualcomm/Makefile | 2 + drivers/net/ethernet/qualcomm/emac/Makefile | 7 + drivers/net/ethernet/qualcomm/emac/emac-mac.c | 1528 +++++++++++++++++ drivers/net/ethernet/qualcomm/emac/emac-mac.h | 248 +++ drivers/net/ethernet/qualcomm/emac/emac-phy.c | 204 +++ drivers/net/ethernet/qualcomm/emac/emac-phy.h | 33 + .../net/ethernet/qualcomm/emac/emac-sgmii.c | 721 ++++++++ .../net/ethernet/qualcomm/emac/emac-sgmii.h | 24 + drivers/net/ethernet/qualcomm/emac/emac.c | 743 ++++++++ drivers/net/ethernet/qualcomm/emac/emac.h | 335 ++++ 13 files changed, 3974 insertions(+) create mode 100644 Documentation/devicetree/bindings/net/qcom-emac.txt create mode 100644 drivers/net/ethernet/qualcomm/emac/Makefile create mode 100644 drivers/net/ethernet/qualcomm/emac/emac-mac.c create mode 100644 drivers/net/ethernet/qualcomm/emac/emac-mac.h create mode 100644 drivers/net/ethernet/qualcomm/emac/emac-phy.c create mode 100644 drivers/net/ethernet/qualcomm/emac/emac-phy.h create mode 100644 drivers/net/ethernet/qualcomm/emac/emac-sgmii.c create mode 100644 drivers/net/ethernet/qualcomm/emac/emac-sgmii.h create mode 100644 drivers/net/ethernet/qualcomm/emac/emac.c create mode 100644 drivers/net/ethernet/qualcomm/emac/emac.h diff --git a/Documentation/devicetree/bindings/net/qcom-emac.txt b/Documentation/devicetree/bindings/net/qcom-emac.txt new file mode 100644 index 000000000000..346e6c7f47b7 --- /dev/null +++ b/Documentation/devicetree/bindings/net/qcom-emac.txt @@ -0,0 +1,111 @@ +Qualcomm Technologies EMAC Gigabit Ethernet Controller + +This network controller consists of two devices: a MAC and an SGMII +internal PHY. Each device is represented by a device tree node. A phandle +connects the MAC node to its corresponding internal phy node. Another +phandle points to the external PHY node. + +Required properties: + +MAC node: +- compatible : Should be "qcom,fsm9900-emac". +- reg : Offset and length of the register regions for the device +- interrupts : Interrupt number used by this controller +- mac-address : The 6-byte MAC address. If present, it is the default + MAC address. +- internal-phy : phandle to the internal PHY node +- phy-handle : phandle the the external PHY node + +Internal PHY node: +- compatible : Should be "qcom,fsm9900-emac-sgmii" or "qcom,qdf2432-emac-sgmii". +- reg : Offset and length of the register region(s) for the device +- interrupts : Interrupt number used by this controller + +The external phy child node: +- reg : The phy address + +Example: + +FSM9900: + +soc { + #address-cells = <1>; + #size-cells = <1>; + + emac0: ethernet@feb20000 { + compatible = "qcom,fsm9900-emac"; + reg = <0xfeb20000 0x10000>, + <0xfeb36000 0x1000>; + interrupts = <76>; + + clocks = <&gcc 0>, <&gcc 1>, <&gcc 3>, <&gcc 4>, <&gcc 5>, + <&gcc 6>, <&gcc 7>; + clock-names = "axi_clk", "cfg_ahb_clk", "high_speed_clk", + "mdio_clk", "tx_clk", "rx_clk", "sys_clk"; + + internal-phy = <&emac_sgmii>; + + phy-handle = <&phy0>; + + #address-cells = <1>; + #size-cells = <0>; + phy0: ethernet-phy@0 { + reg = <0>; + }; + + pinctrl-names = "default"; + pinctrl-0 = <&mdio_pins_a>; + }; + + emac_sgmii: ethernet@feb38000 { + compatible = "qcom,fsm9900-emac-sgmii"; + reg = <0xfeb38000 0x1000>; + interrupts = <80>; + }; + + tlmm: pinctrl@fd510000 { + compatible = "qcom,fsm9900-pinctrl"; + + mdio_pins_a: mdio { + state { + pins = "gpio123", "gpio124"; + function = "mdio"; + }; + }; + }; + + +QDF2432: + +soc { + #address-cells = <2>; + #size-cells = <2>; + + emac0: ethernet@38800000 { + compatible = "qcom,fsm9900-emac"; + reg = <0x0 0x38800000 0x0 0x10000>, + <0x0 0x38816000 0x0 0x1000>; + interrupts = <0 256 4>; + + clocks = <&gcc 0>, <&gcc 1>, <&gcc 3>, <&gcc 4>, <&gcc 5>, + <&gcc 6>, <&gcc 7>; + clock-names = "axi_clk", "cfg_ahb_clk", "high_speed_clk", + "mdio_clk", "tx_clk", "rx_clk", "sys_clk"; + + internal-phy = <&emac_sgmii>; + + phy-handle = <&phy0>; + + #address-cells = <1>; + #size-cells = <0>; + phy0: ethernet-phy@4 { + reg = <4>; + }; + }; + + emac_sgmii: ethernet@410400 { + compatible = "qcom,qdf2432-emac-sgmii"; + reg = <0x0 0x00410400 0x0 0xc00>, /* Base address */ + <0x0 0x00410000 0x0 0x400>; /* Per-lane digital */ + interrupts = <0 254 1>; + }; diff --git a/MAINTAINERS b/MAINTAINERS index 58b50296e0ee..734e03556e2c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9696,6 +9696,12 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git S: Supported F: drivers/net/wireless/ath/ath10k/ +QUALCOMM EMAC GIGABIT ETHERNET DRIVER +M: Timur Tabi +L: netdev@vger.kernel.org +S: Supported +F: drivers/net/ethernet/qualcomm/emac/ + QUALCOMM HEXAGON ARCHITECTURE M: Richard Kuo L: linux-hexagon@vger.kernel.org diff --git a/drivers/net/ethernet/qualcomm/Kconfig b/drivers/net/ethernet/qualcomm/Kconfig index a76e380cf89a..9ba568db576f 100644 --- a/drivers/net/ethernet/qualcomm/Kconfig +++ b/drivers/net/ethernet/qualcomm/Kconfig @@ -24,4 +24,16 @@ config QCA7000 To compile this driver as a module, choose M here. The module will be called qcaspi. +config QCOM_EMAC + tristate "Qualcomm Technologies, Inc. EMAC Gigabit Ethernet support" + select CRC32 + select PHYLIB + ---help--- + This driver supports the Qualcomm Technologies, Inc. Gigabit + Ethernet Media Access Controller (EMAC). The controller + supports IEEE 802.3-2002, half-duplex mode at 10/100 Mb/s, + full-duplex mode at 10/100/1000Mb/s, Wake On LAN (WOL) for + low power, Receive-Side Scaling (RSS), and IEEE 1588-2008 + Precision Clock Synchronization Protocol. + endif # NET_VENDOR_QUALCOMM diff --git a/drivers/net/ethernet/qualcomm/Makefile b/drivers/net/ethernet/qualcomm/Makefile index 9da2d75db700..aacb0a585c68 100644 --- a/drivers/net/ethernet/qualcomm/Makefile +++ b/drivers/net/ethernet/qualcomm/Makefile @@ -4,3 +4,5 @@ obj-$(CONFIG_QCA7000) += qcaspi.o qcaspi-objs := qca_spi.o qca_framing.o qca_7k.o qca_debug.o + +obj-y += emac/ diff --git a/drivers/net/ethernet/qualcomm/emac/Makefile b/drivers/net/ethernet/qualcomm/emac/Makefile new file mode 100644 index 000000000000..01ee144c6386 --- /dev/null +++ b/drivers/net/ethernet/qualcomm/emac/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for the Qualcomm Technologies, Inc. EMAC Gigabit Ethernet driver +# + +obj-$(CONFIG_QCOM_EMAC) += qcom-emac.o + +qcom-emac-objs := emac.o emac-mac.o emac-phy.o emac-sgmii.o diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c new file mode 100644 index 000000000000..e97968ed4b8f --- /dev/null +++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c @@ -0,0 +1,1528 @@ +/* Copyright (c) 2013-2016, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +/* Qualcomm Technologies, Inc. EMAC Ethernet Controller MAC layer support + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "emac.h" +#include "emac-sgmii.h" + +/* EMAC base register offsets */ +#define EMAC_MAC_CTRL 0x001480 +#define EMAC_WOL_CTRL0 0x0014a0 +#define EMAC_RSS_KEY0 0x0014b0 +#define EMAC_H1TPD_BASE_ADDR_LO 0x0014e0 +#define EMAC_H2TPD_BASE_ADDR_LO 0x0014e4 +#define EMAC_H3TPD_BASE_ADDR_LO 0x0014e8 +#define EMAC_INTER_SRAM_PART9 0x001534 +#define EMAC_DESC_CTRL_0 0x001540 +#define EMAC_DESC_CTRL_1 0x001544 +#define EMAC_DESC_CTRL_2 0x001550 +#define EMAC_DESC_CTRL_10 0x001554 +#define EMAC_DESC_CTRL_12 0x001558 +#define EMAC_DESC_CTRL_13 0x00155c +#define EMAC_DESC_CTRL_3 0x001560 +#define EMAC_DESC_CTRL_4 0x001564 +#define EMAC_DESC_CTRL_5 0x001568 +#define EMAC_DESC_CTRL_14 0x00156c +#define EMAC_DESC_CTRL_15 0x001570 +#define EMAC_DESC_CTRL_16 0x001574 +#define EMAC_DESC_CTRL_6 0x001578 +#define EMAC_DESC_CTRL_8 0x001580 +#define EMAC_DESC_CTRL_9 0x001584 +#define EMAC_DESC_CTRL_11 0x001588 +#define EMAC_TXQ_CTRL_0 0x001590 +#define EMAC_TXQ_CTRL_1 0x001594 +#define EMAC_TXQ_CTRL_2 0x001598 +#define EMAC_RXQ_CTRL_0 0x0015a0 +#define EMAC_RXQ_CTRL_1 0x0015a4 +#define EMAC_RXQ_CTRL_2 0x0015a8 +#define EMAC_RXQ_CTRL_3 0x0015ac +#define EMAC_BASE_CPU_NUMBER 0x0015b8 +#define EMAC_DMA_CTRL 0x0015c0 +#define EMAC_MAILBOX_0 0x0015e0 +#define EMAC_MAILBOX_5 0x0015e4 +#define EMAC_MAILBOX_6 0x0015e8 +#define EMAC_MAILBOX_13 0x0015ec +#define EMAC_MAILBOX_2 0x0015f4 +#define EMAC_MAILBOX_3 0x0015f8 +#define EMAC_MAILBOX_11 0x00160c +#define EMAC_AXI_MAST_CTRL 0x001610 +#define EMAC_MAILBOX_12 0x001614 +#define EMAC_MAILBOX_9 0x001618 +#define EMAC_MAILBOX_10 0x00161c +#define EMAC_ATHR_HEADER_CTRL 0x001620 +#define EMAC_CLK_GATE_CTRL 0x001814 +#define EMAC_MISC_CTRL 0x001990 +#define EMAC_MAILBOX_7 0x0019e0 +#define EMAC_MAILBOX_8 0x0019e4 +#define EMAC_MAILBOX_15 0x001bd4 +#define EMAC_MAILBOX_16 0x001bd8 + +/* EMAC_MAC_CTRL */ +#define SINGLE_PAUSE_MODE 0x10000000 +#define DEBUG_MODE 0x08000000 +#define BROAD_EN 0x04000000 +#define MULTI_ALL 0x02000000 +#define RX_CHKSUM_EN 0x01000000 +#define HUGE 0x00800000 +#define SPEED(x) (((x) & 0x3) << 20) +#define SPEED_MASK SPEED(0x3) +#define SIMR 0x00080000 +#define TPAUSE 0x00010000 +#define PROM_MODE 0x00008000 +#define VLAN_STRIP 0x00004000 +#define PRLEN_BMSK 0x00003c00 +#define PRLEN_SHFT 10 +#define HUGEN 0x00000200 +#define FLCHK 0x00000100 +#define PCRCE 0x00000080 +#define CRCE 0x00000040 +#define FULLD 0x00000020 +#define MAC_LP_EN 0x00000010 +#define RXFC 0x00000008 +#define TXFC 0x00000004 +#define RXEN 0x00000002 +#define TXEN 0x00000001 + + +/* EMAC_WOL_CTRL0 */ +#define LK_CHG_PME 0x20 +#define LK_CHG_EN 0x10 +#define MG_FRAME_PME 0x8 +#define MG_FRAME_EN 0x4 +#define WK_FRAME_EN 0x1 + +/* EMAC_DESC_CTRL_3 */ +#define RFD_RING_SIZE_BMSK 0xfff + +/* EMAC_DESC_CTRL_4 */ +#define RX_BUFFER_SIZE_BMSK 0xffff + +/* EMAC_DESC_CTRL_6 */ +#define RRD_RING_SIZE_BMSK 0xfff + +/* EMAC_DESC_CTRL_9 */ +#define TPD_RING_SIZE_BMSK 0xffff + +/* EMAC_TXQ_CTRL_0 */ +#define NUM_TXF_BURST_PREF_BMSK 0xffff0000 +#define NUM_TXF_BURST_PREF_SHFT 16 +#define LS_8023_SP 0x80 +#define TXQ_MODE 0x40 +#define TXQ_EN 0x20 +#define IP_OP_SP 0x10 +#define NUM_TPD_BURST_PREF_BMSK 0xf +#define NUM_TPD_BURST_PREF_SHFT 0 + +/* EMAC_TXQ_CTRL_1 */ +#define JUMBO_TASK_OFFLOAD_THRESHOLD_BMSK 0x7ff + +/* EMAC_TXQ_CTRL_2 */ +#define TXF_HWM_BMSK 0xfff0000 +#define TXF_LWM_BMSK 0xfff + +/* EMAC_RXQ_CTRL_0 */ +#define RXQ_EN BIT(31) +#define CUT_THRU_EN BIT(30) +#define RSS_HASH_EN BIT(29) +#define NUM_RFD_BURST_PREF_BMSK 0x3f00000 +#define NUM_RFD_BURST_PREF_SHFT 20 +#define IDT_TABLE_SIZE_BMSK 0x1ff00 +#define IDT_TABLE_SIZE_SHFT 8 +#define SP_IPV6 0x80 + +/* EMAC_RXQ_CTRL_1 */ +#define JUMBO_1KAH_BMSK 0xf000 +#define JUMBO_1KAH_SHFT 12 +#define RFD_PREF_LOW_TH 0x10 +#define RFD_PREF_LOW_THRESHOLD_BMSK 0xfc0 +#define RFD_PREF_LOW_THRESHOLD_SHFT 6 +#define RFD_PREF_UP_TH 0x10 +#define RFD_PREF_UP_THRESHOLD_BMSK 0x3f +#define RFD_PREF_UP_THRESHOLD_SHFT 0 + +/* EMAC_RXQ_CTRL_2 */ +#define RXF_DOF_THRESFHOLD 0x1a0 +#define RXF_DOF_THRESHOLD_BMSK 0xfff0000 +#define RXF_DOF_THRESHOLD_SHFT 16 +#define RXF_UOF_THRESFHOLD 0xbe +#define RXF_UOF_THRESHOLD_BMSK 0xfff +#define RXF_UOF_THRESHOLD_SHFT 0 + +/* EMAC_RXQ_CTRL_3 */ +#define RXD_TIMER_BMSK 0xffff0000 +#define RXD_THRESHOLD_BMSK 0xfff +#define RXD_THRESHOLD_SHFT 0 + +/* EMAC_DMA_CTRL */ +#define DMAW_DLY_CNT_BMSK 0xf0000 +#define DMAW_DLY_CNT_SHFT 16 +#define DMAR_DLY_CNT_BMSK 0xf800 +#define DMAR_DLY_CNT_SHFT 11 +#define DMAR_REQ_PRI 0x400 +#define REGWRBLEN_BMSK 0x380 +#define REGWRBLEN_SHFT 7 +#define REGRDBLEN_BMSK 0x70 +#define REGRDBLEN_SHFT 4 +#define OUT_ORDER_MODE 0x4 +#define ENH_ORDER_MODE 0x2 +#define IN_ORDER_MODE 0x1 + +/* EMAC_MAILBOX_13 */ +#define RFD3_PROC_IDX_BMSK 0xfff0000 +#define RFD3_PROC_IDX_SHFT 16 +#define RFD3_PROD_IDX_BMSK 0xfff +#define RFD3_PROD_IDX_SHFT 0 + +/* EMAC_MAILBOX_2 */ +#define NTPD_CONS_IDX_BMSK 0xffff0000 +#define NTPD_CONS_IDX_SHFT 16 + +/* EMAC_MAILBOX_3 */ +#define RFD0_CONS_IDX_BMSK 0xfff +#define RFD0_CONS_IDX_SHFT 0 + +/* EMAC_MAILBOX_11 */ +#define H3TPD_PROD_IDX_BMSK 0xffff0000 +#define H3TPD_PROD_IDX_SHFT 16 + +/* EMAC_AXI_MAST_CTRL */ +#define DATA_BYTE_SWAP 0x8 +#define MAX_BOUND 0x2 +#define MAX_BTYPE 0x1 + +/* EMAC_MAILBOX_12 */ +#define H3TPD_CONS_IDX_BMSK 0xffff0000 +#define H3TPD_CONS_IDX_SHFT 16 + +/* EMAC_MAILBOX_9 */ +#define H2TPD_PROD_IDX_BMSK 0xffff +#define H2TPD_PROD_IDX_SHFT 0 + +/* EMAC_MAILBOX_10 */ +#define H1TPD_CONS_IDX_BMSK 0xffff0000 +#define H1TPD_CONS_IDX_SHFT 16 +#define H2TPD_CONS_IDX_BMSK 0xffff +#define H2TPD_CONS_IDX_SHFT 0 + +/* EMAC_ATHR_HEADER_CTRL */ +#define HEADER_CNT_EN 0x2 +#define HEADER_ENABLE 0x1 + +/* EMAC_MAILBOX_0 */ +#define RFD0_PROC_IDX_BMSK 0xfff0000 +#define RFD0_PROC_IDX_SHFT 16 +#define RFD0_PROD_IDX_BMSK 0xfff +#define RFD0_PROD_IDX_SHFT 0 + +/* EMAC_MAILBOX_5 */ +#define RFD1_PROC_IDX_BMSK 0xfff0000 +#define RFD1_PROC_IDX_SHFT 16 +#define RFD1_PROD_IDX_BMSK 0xfff +#define RFD1_PROD_IDX_SHFT 0 + +/* EMAC_MISC_CTRL */ +#define RX_UNCPL_INT_EN 0x1 + +/* EMAC_MAILBOX_7 */ +#define RFD2_CONS_IDX_BMSK 0xfff0000 +#define RFD2_CONS_IDX_SHFT 16 +#define RFD1_CONS_IDX_BMSK 0xfff +#define RFD1_CONS_IDX_SHFT 0 + +/* EMAC_MAILBOX_8 */ +#define RFD3_CONS_IDX_BMSK 0xfff +#define RFD3_CONS_IDX_SHFT 0 + +/* EMAC_MAILBOX_15 */ +#define NTPD_PROD_IDX_BMSK 0xffff +#define NTPD_PROD_IDX_SHFT 0 + +/* EMAC_MAILBOX_16 */ +#define H1TPD_PROD_IDX_BMSK 0xffff +#define H1TPD_PROD_IDX_SHFT 0 + +#define RXQ0_RSS_HSTYP_IPV6_TCP_EN 0x20 +#define RXQ0_RSS_HSTYP_IPV6_EN 0x10 +#define RXQ0_RSS_HSTYP_IPV4_TCP_EN 0x8 +#define RXQ0_RSS_HSTYP_IPV4_EN 0x4 + +/* EMAC_EMAC_WRAPPER_TX_TS_INX */ +#define EMAC_WRAPPER_TX_TS_EMPTY BIT(31) +#define EMAC_WRAPPER_TX_TS_INX_BMSK 0xffff + +struct emac_skb_cb { + u32 tpd_idx; + unsigned long jiffies; +}; + +#define EMAC_SKB_CB(skb) ((struct emac_skb_cb *)(skb)->cb) +#define EMAC_RSS_IDT_SIZE 256 +#define JUMBO_1KAH 0x4 +#define RXD_TH 0x100 +#define EMAC_TPD_LAST_FRAGMENT 0x80000000 +#define EMAC_TPD_TSTAMP_SAVE 0x80000000 + +/* EMAC Errors in emac_rrd.word[3] */ +#define EMAC_RRD_L4F BIT(14) +#define EMAC_RRD_IPF BIT(15) +#define EMAC_RRD_CRC BIT(21) +#define EMAC_RRD_FAE BIT(22) +#define EMAC_RRD_TRN BIT(23) +#define EMAC_RRD_RNT BIT(24) +#define EMAC_RRD_INC BIT(25) +#define EMAC_RRD_FOV BIT(29) +#define EMAC_RRD_LEN BIT(30) + +/* Error bits that will result in a received frame being discarded */ +#define EMAC_RRD_ERROR (EMAC_RRD_IPF | EMAC_RRD_CRC | EMAC_RRD_FAE | \ + EMAC_RRD_TRN | EMAC_RRD_RNT | EMAC_RRD_INC | \ + EMAC_RRD_FOV | EMAC_RRD_LEN) +#define EMAC_RRD_STATS_DW_IDX 3 + +#define EMAC_RRD(RXQ, SIZE, IDX) ((RXQ)->rrd.v_addr + (SIZE * (IDX))) +#define EMAC_RFD(RXQ, SIZE, IDX) ((RXQ)->rfd.v_addr + (SIZE * (IDX))) +#define EMAC_TPD(TXQ, SIZE, IDX) ((TXQ)->tpd.v_addr + (SIZE * (IDX))) + +#define GET_RFD_BUFFER(RXQ, IDX) (&((RXQ)->rfd.rfbuff[(IDX)])) +#define GET_TPD_BUFFER(RTQ, IDX) (&((RTQ)->tpd.tpbuff[(IDX)])) + +#define EMAC_TX_POLL_HWTXTSTAMP_THRESHOLD 8 + +#define ISR_RX_PKT (\ + RX_PKT_INT0 |\ + RX_PKT_INT1 |\ + RX_PKT_INT2 |\ + RX_PKT_INT3) + +#define EMAC_MAC_IRQ_RES "core0" + +void emac_mac_multicast_addr_set(struct emac_adapter *adpt, u8 *addr) +{ + u32 crc32, bit, reg, mta; + + /* Calculate the CRC of the MAC address */ + crc32 = ether_crc(ETH_ALEN, addr); + + /* The HASH Table is an array of 2 32-bit registers. It is + * treated like an array of 64 bits (BitArray[hash_value]). + * Use the upper 6 bits of the above CRC as the hash value. + */ + reg = (crc32 >> 31) & 0x1; + bit = (crc32 >> 26) & 0x1F; + + mta = readl(adpt->base + EMAC_HASH_TAB_REG0 + (reg << 2)); + mta |= BIT(bit); + writel(mta, adpt->base + EMAC_HASH_TAB_REG0 + (reg << 2)); +} + +void emac_mac_multicast_addr_clear(struct emac_adapter *adpt) +{ + writel(0, adpt->base + EMAC_HASH_TAB_REG0); + writel(0, adpt->base + EMAC_HASH_TAB_REG1); +} + +/* definitions for RSS */ +#define EMAC_RSS_KEY(_i, _type) \ + (EMAC_RSS_KEY0 + ((_i) * sizeof(_type))) +#define EMAC_RSS_TBL(_i, _type) \ + (EMAC_IDT_TABLE0 + ((_i) * sizeof(_type))) + +/* Config MAC modes */ +void emac_mac_mode_config(struct emac_adapter *adpt) +{ + struct net_device *netdev = adpt->netdev; + u32 mac; + + mac = readl(adpt->base + EMAC_MAC_CTRL); + mac &= ~(VLAN_STRIP | PROM_MODE | MULTI_ALL | MAC_LP_EN); + + if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX) + mac |= VLAN_STRIP; + + if (netdev->flags & IFF_PROMISC) + mac |= PROM_MODE; + + if (netdev->flags & IFF_ALLMULTI) + mac |= MULTI_ALL; + + writel(mac, adpt->base + EMAC_MAC_CTRL); +} + +/* Config descriptor rings */ +static void emac_mac_dma_rings_config(struct emac_adapter *adpt) +{ + static const unsigned short tpd_q_offset[] = { + EMAC_DESC_CTRL_8, EMAC_H1TPD_BASE_ADDR_LO, + EMAC_H2TPD_BASE_ADDR_LO, EMAC_H3TPD_BASE_ADDR_LO}; + static const unsigned short rfd_q_offset[] = { + EMAC_DESC_CTRL_2, EMAC_DESC_CTRL_10, + EMAC_DESC_CTRL_12, EMAC_DESC_CTRL_13}; + static const unsigned short rrd_q_offset[] = { + EMAC_DESC_CTRL_5, EMAC_DESC_CTRL_14, + EMAC_DESC_CTRL_15, EMAC_DESC_CTRL_16}; + + /* TPD (Transmit Packet Descriptor) */ + writel(upper_32_bits(adpt->tx_q.tpd.dma_addr), + adpt->base + EMAC_DESC_CTRL_1); + + writel(lower_32_bits(adpt->tx_q.tpd.dma_addr), + adpt->base + tpd_q_offset[0]); + + writel(adpt->tx_q.tpd.count & TPD_RING_SIZE_BMSK, + adpt->base + EMAC_DESC_CTRL_9); + + /* RFD (Receive Free Descriptor) & RRD (Receive Return Descriptor) */ + writel(upper_32_bits(adpt->rx_q.rfd.dma_addr), + adpt->base + EMAC_DESC_CTRL_0); + + writel(lower_32_bits(adpt->rx_q.rfd.dma_addr), + adpt->base + rfd_q_offset[0]); + writel(lower_32_bits(adpt->rx_q.rrd.dma_addr), + adpt->base + rrd_q_offset[0]); + + writel(adpt->rx_q.rfd.count & RFD_RING_SIZE_BMSK, + adpt->base + EMAC_DESC_CTRL_3); + writel(adpt->rx_q.rrd.count & RRD_RING_SIZE_BMSK, + adpt->base + EMAC_DESC_CTRL_6); + + writel(adpt->rxbuf_size & RX_BUFFER_SIZE_BMSK, + adpt->base + EMAC_DESC_CTRL_4); + + writel(0, adpt->base + EMAC_DESC_CTRL_11); + + /* Load all of the base addresses above and ensure that triggering HW to + * read ring pointers is flushed + */ + writel(1, adpt->base + EMAC_INTER_SRAM_PART9); +} + +/* Config transmit parameters */ +static void emac_mac_tx_config(struct emac_adapter *adpt) +{ + u32 val; + + writel((EMAC_MAX_TX_OFFLOAD_THRESH >> 3) & + JUMBO_TASK_OFFLOAD_THRESHOLD_BMSK, adpt->base + EMAC_TXQ_CTRL_1); + + val = (adpt->tpd_burst << NUM_TPD_BURST_PREF_SHFT) & + NUM_TPD_BURST_PREF_BMSK; + + val |= TXQ_MODE | LS_8023_SP; + val |= (0x0100 << NUM_TXF_BURST_PREF_SHFT) & + NUM_TXF_BURST_PREF_BMSK; + + writel(val, adpt->base + EMAC_TXQ_CTRL_0); + emac_reg_update32(adpt->base + EMAC_TXQ_CTRL_2, + (TXF_HWM_BMSK | TXF_LWM_BMSK), 0); +} + +/* Config receive parameters */ +static void emac_mac_rx_config(struct emac_adapter *adpt) +{ + u32 val; + + val = (adpt->rfd_burst << NUM_RFD_BURST_PREF_SHFT) & + NUM_RFD_BURST_PREF_BMSK; + val |= (SP_IPV6 | CUT_THRU_EN); + + writel(val, adpt->base + EMAC_RXQ_CTRL_0); + + val = readl(adpt->base + EMAC_RXQ_CTRL_1); + val &= ~(JUMBO_1KAH_BMSK | RFD_PREF_LOW_THRESHOLD_BMSK | + RFD_PREF_UP_THRESHOLD_BMSK); + val |= (JUMBO_1KAH << JUMBO_1KAH_SHFT) | + (RFD_PREF_LOW_TH << RFD_PREF_LOW_THRESHOLD_SHFT) | + (RFD_PREF_UP_TH << RFD_PREF_UP_THRESHOLD_SHFT); + writel(val, adpt->base + EMAC_RXQ_CTRL_1); + + val = readl(adpt->base + EMAC_RXQ_CTRL_2); + val &= ~(RXF_DOF_THRESHOLD_BMSK | RXF_UOF_THRESHOLD_BMSK); + val |= (RXF_DOF_THRESFHOLD << RXF_DOF_THRESHOLD_SHFT) | + (RXF_UOF_THRESFHOLD << RXF_UOF_THRESHOLD_SHFT); + writel(val, adpt->base + EMAC_RXQ_CTRL_2); + + val = readl(adpt->base + EMAC_RXQ_CTRL_3); + val &= ~(RXD_TIMER_BMSK | RXD_THRESHOLD_BMSK); + val |= RXD_TH << RXD_THRESHOLD_SHFT; + writel(val, adpt->base + EMAC_RXQ_CTRL_3); +} + +/* Config dma */ +static void emac_mac_dma_config(struct emac_adapter *adpt) +{ + u32 dma_ctrl = DMAR_REQ_PRI; + + switch (adpt->dma_order) { + case emac_dma_ord_in: + dma_ctrl |= IN_ORDER_MODE; + break; + case emac_dma_ord_enh: + dma_ctrl |= ENH_ORDER_MODE; + break; + case emac_dma_ord_out: + dma_ctrl |= OUT_ORDER_MODE; + break; + default: + break; + } + + dma_ctrl |= (((u32)adpt->dmar_block) << REGRDBLEN_SHFT) & + REGRDBLEN_BMSK; + dma_ctrl |= (((u32)adpt->dmaw_block) << REGWRBLEN_SHFT) & + REGWRBLEN_BMSK; + dma_ctrl |= (((u32)adpt->dmar_dly_cnt) << DMAR_DLY_CNT_SHFT) & + DMAR_DLY_CNT_BMSK; + dma_ctrl |= (((u32)adpt->dmaw_dly_cnt) << DMAW_DLY_CNT_SHFT) & + DMAW_DLY_CNT_BMSK; + + /* config DMA and ensure that configuration is flushed to HW */ + writel(dma_ctrl, adpt->base + EMAC_DMA_CTRL); +} + +/* set MAC address */ +static void emac_set_mac_address(struct emac_adapter *adpt, u8 *addr) +{ + u32 sta; + + /* for example: 00-A0-C6-11-22-33 + * 0<-->C6112233, 1<-->00A0. + */ + + /* low 32bit word */ + sta = (((u32)addr[2]) << 24) | (((u32)addr[3]) << 16) | + (((u32)addr[4]) << 8) | (((u32)addr[5])); + writel(sta, adpt->base + EMAC_MAC_STA_ADDR0); + + /* hight 32bit word */ + sta = (((u32)addr[0]) << 8) | (u32)addr[1]; + writel(sta, adpt->base + EMAC_MAC_STA_ADDR1); +} + +static void emac_mac_config(struct emac_adapter *adpt) +{ + struct net_device *netdev = adpt->netdev; + unsigned int max_frame; + u32 val; + + emac_set_mac_address(adpt, netdev->dev_addr); + + max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; + adpt->rxbuf_size = netdev->mtu > EMAC_DEF_RX_BUF_SIZE ? + ALIGN(max_frame, 8) : EMAC_DEF_RX_BUF_SIZE; + + emac_mac_dma_rings_config(adpt); + + writel(netdev->mtu + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN, + adpt->base + EMAC_MAX_FRAM_LEN_CTRL); + + emac_mac_tx_config(adpt); + emac_mac_rx_config(adpt); + emac_mac_dma_config(adpt); + + val = readl(adpt->base + EMAC_AXI_MAST_CTRL); + val &= ~(DATA_BYTE_SWAP | MAX_BOUND); + val |= MAX_BTYPE; + writel(val, adpt->base + EMAC_AXI_MAST_CTRL); + writel(0, adpt->base + EMAC_CLK_GATE_CTRL); + writel(RX_UNCPL_INT_EN, adpt->base + EMAC_MISC_CTRL); +} + +void emac_mac_reset(struct emac_adapter *adpt) +{ + emac_mac_stop(adpt); + + emac_reg_update32(adpt->base + EMAC_DMA_MAS_CTRL, 0, SOFT_RST); + usleep_range(100, 150); /* reset may take up to 100usec */ + + /* interrupt clear-on-read */ + emac_reg_update32(adpt->base + EMAC_DMA_MAS_CTRL, 0, INT_RD_CLR_EN); +} + +void emac_mac_start(struct emac_adapter *adpt) +{ + struct phy_device *phydev = adpt->phydev; + u32 mac, csr1; + + /* enable tx queue */ + emac_reg_update32(adpt->base + EMAC_TXQ_CTRL_0, 0, TXQ_EN); + + /* enable rx queue */ + emac_reg_update32(adpt->base + EMAC_RXQ_CTRL_0, 0, RXQ_EN); + + /* enable mac control */ + mac = readl(adpt->base + EMAC_MAC_CTRL); + csr1 = readl(adpt->csr + EMAC_EMAC_WRAPPER_CSR1); + + mac |= TXEN | RXEN; /* enable RX/TX */ + + /* We don't have ethtool support yet, so force flow-control mode + * to 'full' always. + */ + mac |= TXFC | RXFC; + + /* setup link speed */ + mac &= ~SPEED_MASK; + if (phydev->speed == SPEED_1000) { + mac |= SPEED(2); + csr1 |= FREQ_MODE; + } else { + mac |= SPEED(1); + csr1 &= ~FREQ_MODE; + } + + if (phydev->duplex == DUPLEX_FULL) + mac |= FULLD; + else + mac &= ~FULLD; + + /* other parameters */ + mac |= (CRCE | PCRCE); + mac |= ((adpt->preamble << PRLEN_SHFT) & PRLEN_BMSK); + mac |= BROAD_EN; + mac |= FLCHK; + mac &= ~RX_CHKSUM_EN; + mac &= ~(HUGEN | VLAN_STRIP | TPAUSE | SIMR | HUGE | MULTI_ALL | + DEBUG_MODE | SINGLE_PAUSE_MODE); + + writel_relaxed(csr1, adpt->csr + EMAC_EMAC_WRAPPER_CSR1); + + writel_relaxed(mac, adpt->base + EMAC_MAC_CTRL); + + /* enable interrupt read clear, low power sleep mode and + * the irq moderators + */ + + writel_relaxed(adpt->irq_mod, adpt->base + EMAC_IRQ_MOD_TIM_INIT); + writel_relaxed(INT_RD_CLR_EN | LPW_MODE | IRQ_MODERATOR_EN | + IRQ_MODERATOR2_EN, adpt->base + EMAC_DMA_MAS_CTRL); + + emac_mac_mode_config(adpt); + + emac_reg_update32(adpt->base + EMAC_ATHR_HEADER_CTRL, + (HEADER_ENABLE | HEADER_CNT_EN), 0); + + emac_reg_update32(adpt->csr + EMAC_EMAC_WRAPPER_CSR2, 0, WOL_EN); +} + +void emac_mac_stop(struct emac_adapter *adpt) +{ + emac_reg_update32(adpt->base + EMAC_RXQ_CTRL_0, RXQ_EN, 0); + emac_reg_update32(adpt->base + EMAC_TXQ_CTRL_0, TXQ_EN, 0); + emac_reg_update32(adpt->base + EMAC_MAC_CTRL, TXEN | RXEN, 0); + usleep_range(1000, 1050); /* stopping mac may take upto 1msec */ +} + +/* Free all descriptors of given transmit queue */ +static void emac_tx_q_descs_free(struct emac_adapter *adpt) +{ + struct emac_tx_queue *tx_q = &adpt->tx_q; + unsigned int i; + size_t size; + + /* ring already cleared, nothing to do */ + if (!tx_q->tpd.tpbuff) + return; + + for (i = 0; i < tx_q->tpd.count; i++) { + struct emac_buffer *tpbuf = GET_TPD_BUFFER(tx_q, i); + + if (tpbuf->dma_addr) { + dma_unmap_single(adpt->netdev->dev.parent, + tpbuf->dma_addr, tpbuf->length, + DMA_TO_DEVICE); + tpbuf->dma_addr = 0; + } + if (tpbuf->skb) { + dev_kfree_skb_any(tpbuf->skb); + tpbuf->skb = NULL; + } + } + + size = sizeof(struct emac_buffer) * tx_q->tpd.count; + memset(tx_q->tpd.tpbuff, 0, size); + + /* clear the descriptor ring */ + memset(tx_q->tpd.v_addr, 0, tx_q->tpd.size); + + tx_q->tpd.consume_idx = 0; + tx_q->tpd.produce_idx = 0; +} + +/* Free all descriptors of given receive queue */ +static void emac_rx_q_free_descs(struct emac_adapter *adpt) +{ + struct device *dev = adpt->netdev->dev.parent; + struct emac_rx_queue *rx_q = &adpt->rx_q; + unsigned int i; + size_t size; + + /* ring already cleared, nothing to do */ + if (!rx_q->rfd.rfbuff) + return; + + for (i = 0; i < rx_q->rfd.count; i++) { + struct emac_buffer *rfbuf = GET_RFD_BUFFER(rx_q, i); + + if (rfbuf->dma_addr) { + dma_unmap_single(dev, rfbuf->dma_addr, rfbuf->length, + DMA_FROM_DEVICE); + rfbuf->dma_addr = 0; + } + if (rfbuf->skb) { + dev_kfree_skb(rfbuf->skb); + rfbuf->skb = NULL; + } + } + + size = sizeof(struct emac_buffer) * rx_q->rfd.count; + memset(rx_q->rfd.rfbuff, 0, size); + + /* clear the descriptor rings */ + memset(rx_q->rrd.v_addr, 0, rx_q->rrd.size); + rx_q->rrd.produce_idx = 0; + rx_q->rrd.consume_idx = 0; + + memset(rx_q->rfd.v_addr, 0, rx_q->rfd.size); + rx_q->rfd.produce_idx = 0; + rx_q->rfd.consume_idx = 0; +} + +/* Free all buffers associated with given transmit queue */ +static void emac_tx_q_bufs_free(struct emac_adapter *adpt) +{ + struct emac_tx_queue *tx_q = &adpt->tx_q; + + emac_tx_q_descs_free(adpt); + + kfree(tx_q->tpd.tpbuff); + tx_q->tpd.tpbuff = NULL; + tx_q->tpd.v_addr = NULL; + tx_q->tpd.dma_addr = 0; + tx_q->tpd.size = 0; +} + +/* Allocate TX descriptor ring for the given transmit queue */ +static int emac_tx_q_desc_alloc(struct emac_adapter *adpt, + struct emac_tx_queue *tx_q) +{ + struct emac_ring_header *ring_header = &adpt->ring_header; + size_t size; + + size = sizeof(struct emac_buffer) * tx_q->tpd.count; + tx_q->tpd.tpbuff = kzalloc(size, GFP_KERNEL); + if (!tx_q->tpd.tpbuff) + return -ENOMEM; + + tx_q->tpd.size = tx_q->tpd.count * (adpt->tpd_size * 4); + tx_q->tpd.dma_addr = ring_header->dma_addr + ring_header->used; + tx_q->tpd.v_addr = ring_header->v_addr + ring_header->used; + ring_header->used += ALIGN(tx_q->tpd.size, 8); + tx_q->tpd.produce_idx = 0; + tx_q->tpd.consume_idx = 0; + + return 0; +} + +/* Free all buffers associated with given transmit queue */ +static void emac_rx_q_bufs_free(struct emac_adapter *adpt) +{ + struct emac_rx_queue *rx_q = &adpt->rx_q; + + emac_rx_q_free_descs(adpt); + + kfree(rx_q->rfd.rfbuff); + rx_q->rfd.rfbuff = NULL; + + rx_q->rfd.v_addr = NULL; + rx_q->rfd.dma_addr = 0; + rx_q->rfd.size = 0; + + rx_q->rrd.v_addr = NULL; + rx_q->rrd.dma_addr = 0; + rx_q->rrd.size = 0; +} + +/* Allocate RX descriptor rings for the given receive queue */ +static int emac_rx_descs_alloc(struct emac_adapter *adpt) +{ + struct emac_ring_header *ring_header = &adpt->ring_header; + struct emac_rx_queue *rx_q = &adpt->rx_q; + size_t size; + + size = sizeof(struct emac_buffer) * rx_q->rfd.count; + rx_q->rfd.rfbuff = kzalloc(size, GFP_KERNEL); + if (!rx_q->rfd.rfbuff) + return -ENOMEM; + + rx_q->rrd.size = rx_q->rrd.count * (adpt->rrd_size * 4); + rx_q->rfd.size = rx_q->rfd.count * (adpt->rfd_size * 4); + + rx_q->rrd.dma_addr = ring_header->dma_addr + ring_header->used; + rx_q->rrd.v_addr = ring_header->v_addr + ring_header->used; + ring_header->used += ALIGN(rx_q->rrd.size, 8); + + rx_q->rfd.dma_addr = ring_header->dma_addr + ring_header->used; + rx_q->rfd.v_addr = ring_header->v_addr + ring_header->used; + ring_header->used += ALIGN(rx_q->rfd.size, 8); + + rx_q->rrd.produce_idx = 0; + rx_q->rrd.consume_idx = 0; + + rx_q->rfd.produce_idx = 0; + rx_q->rfd.consume_idx = 0; + + return 0; +} + +/* Allocate all TX and RX descriptor rings */ +int emac_mac_rx_tx_rings_alloc_all(struct emac_adapter *adpt) +{ + struct emac_ring_header *ring_header = &adpt->ring_header; + struct device *dev = adpt->netdev->dev.parent; + unsigned int num_tx_descs = adpt->tx_desc_cnt; + unsigned int num_rx_descs = adpt->rx_desc_cnt; + int ret; + + adpt->tx_q.tpd.count = adpt->tx_desc_cnt; + + adpt->rx_q.rrd.count = adpt->rx_desc_cnt; + adpt->rx_q.rfd.count = adpt->rx_desc_cnt; + + /* Ring DMA buffer. Each ring may need up to 8 bytes for alignment, + * hence the additional padding bytes are allocated. + */ + ring_header->size = num_tx_descs * (adpt->tpd_size * 4) + + num_rx_descs * (adpt->rfd_size * 4) + + num_rx_descs * (adpt->rrd_size * 4) + + 8 + 2 * 8; /* 8 byte per one Tx and two Rx rings */ + + ring_header->used = 0; + ring_header->v_addr = dma_zalloc_coherent(dev, ring_header->size, + &ring_header->dma_addr, + GFP_KERNEL); + if (!ring_header->v_addr) + return -ENOMEM; + + ring_header->used = ALIGN(ring_header->dma_addr, 8) - + ring_header->dma_addr; + + ret = emac_tx_q_desc_alloc(adpt, &adpt->tx_q); + if (ret) { + netdev_err(adpt->netdev, "error: Tx Queue alloc failed\n"); + goto err_alloc_tx; + } + + ret = emac_rx_descs_alloc(adpt); + if (ret) { + netdev_err(adpt->netdev, "error: Rx Queue alloc failed\n"); + goto err_alloc_rx; + } + + return 0; + +err_alloc_rx: + emac_tx_q_bufs_free(adpt); +err_alloc_tx: + dma_free_coherent(dev, ring_header->size, + ring_header->v_addr, ring_header->dma_addr); + + ring_header->v_addr = NULL; + ring_header->dma_addr = 0; + ring_header->size = 0; + ring_header->used = 0; + + return ret; +} + +/* Free all TX and RX descriptor rings */ +void emac_mac_rx_tx_rings_free_all(struct emac_adapter *adpt) +{ + struct emac_ring_header *ring_header = &adpt->ring_header; + struct device *dev = adpt->netdev->dev.parent; + + emac_tx_q_bufs_free(adpt); + emac_rx_q_bufs_free(adpt); + + dma_free_coherent(dev, ring_header->size, + ring_header->v_addr, ring_header->dma_addr); + + ring_header->v_addr = NULL; + ring_header->dma_addr = 0; + ring_header->size = 0; + ring_header->used = 0; +} + +/* Initialize descriptor rings */ +static void emac_mac_rx_tx_ring_reset_all(struct emac_adapter *adpt) +{ + unsigned int i; + + adpt->tx_q.tpd.produce_idx = 0; + adpt->tx_q.tpd.consume_idx = 0; + for (i = 0; i < adpt->tx_q.tpd.count; i++) + adpt->tx_q.tpd.tpbuff[i].dma_addr = 0; + + adpt->rx_q.rrd.produce_idx = 0; + adpt->rx_q.rrd.consume_idx = 0; + adpt->rx_q.rfd.produce_idx = 0; + adpt->rx_q.rfd.consume_idx = 0; + for (i = 0; i < adpt->rx_q.rfd.count; i++) + adpt->rx_q.rfd.rfbuff[i].dma_addr = 0; +} + +/* Produce new receive free descriptor */ +static void emac_mac_rx_rfd_create(struct emac_adapter *adpt, + struct emac_rx_queue *rx_q, + dma_addr_t addr) +{ + u32 *hw_rfd = EMAC_RFD(rx_q, adpt->rfd_size, rx_q->rfd.produce_idx); + + *(hw_rfd++) = lower_32_bits(addr); + *hw_rfd = upper_32_bits(addr); + + if (++rx_q->rfd.produce_idx == rx_q->rfd.count) + rx_q->rfd.produce_idx = 0; +} + +/* Fill up receive queue's RFD with preallocated receive buffers */ +static void emac_mac_rx_descs_refill(struct emac_adapter *adpt, + struct emac_rx_queue *rx_q) +{ + struct emac_buffer *curr_rxbuf; + struct emac_buffer *next_rxbuf; + unsigned int count = 0; + u32 next_produce_idx; + + next_produce_idx = rx_q->rfd.produce_idx + 1; + if (next_produce_idx == rx_q->rfd.count) + next_produce_idx = 0; + + curr_rxbuf = GET_RFD_BUFFER(rx_q, rx_q->rfd.produce_idx); + next_rxbuf = GET_RFD_BUFFER(rx_q, next_produce_idx); + + /* this always has a blank rx_buffer*/ + while (!next_rxbuf->dma_addr) { + struct sk_buff *skb; + int ret; + + skb = netdev_alloc_skb_ip_align(adpt->netdev, adpt->rxbuf_size); + if (!skb) + break; + + curr_rxbuf->dma_addr = + dma_map_single(adpt->netdev->dev.parent, skb->data, + curr_rxbuf->length, DMA_FROM_DEVICE); + ret = dma_mapping_error(adpt->netdev->dev.parent, + curr_rxbuf->dma_addr); + if (ret) { + dev_kfree_skb(skb); + break; + } + curr_rxbuf->skb = skb; + curr_rxbuf->length = adpt->rxbuf_size; + + emac_mac_rx_rfd_create(adpt, rx_q, curr_rxbuf->dma_addr); + next_produce_idx = rx_q->rfd.produce_idx + 1; + if (next_produce_idx == rx_q->rfd.count) + next_produce_idx = 0; + + curr_rxbuf = GET_RFD_BUFFER(rx_q, rx_q->rfd.produce_idx); + next_rxbuf = GET_RFD_BUFFER(rx_q, next_produce_idx); + count++; + } + + if (count) { + u32 prod_idx = (rx_q->rfd.produce_idx << rx_q->produce_shift) & + rx_q->produce_mask; + emac_reg_update32(adpt->base + rx_q->produce_reg, + rx_q->produce_mask, prod_idx); + } +} + +static void emac_adjust_link(struct net_device *netdev) +{ + struct emac_adapter *adpt = netdev_priv(netdev); + struct phy_device *phydev = netdev->phydev; + + if (phydev->link) + emac_mac_start(adpt); + else + emac_mac_stop(adpt); + + phy_print_status(phydev); +} + +/* Bringup the interface/HW */ +int emac_mac_up(struct emac_adapter *adpt) +{ + struct net_device *netdev = adpt->netdev; + struct emac_irq *irq = &adpt->irq; + int ret; + + emac_mac_rx_tx_ring_reset_all(adpt); + emac_mac_config(adpt); + + ret = request_irq(irq->irq, emac_isr, 0, EMAC_MAC_IRQ_RES, irq); + if (ret) { + netdev_err(adpt->netdev, "could not request %s irq\n", + EMAC_MAC_IRQ_RES); + return ret; + } + + emac_mac_rx_descs_refill(adpt, &adpt->rx_q); + + ret = phy_connect_direct(netdev, adpt->phydev, emac_adjust_link, + PHY_INTERFACE_MODE_SGMII); + if (ret) { + netdev_err(adpt->netdev, "could not connect phy\n"); + free_irq(irq->irq, irq); + return ret; + } + + /* enable mac irq */ + writel((u32)~DIS_INT, adpt->base + EMAC_INT_STATUS); + writel(adpt->irq.mask, adpt->base + EMAC_INT_MASK); + + adpt->phydev->irq = PHY_IGNORE_INTERRUPT; + phy_start(adpt->phydev); + + napi_enable(&adpt->rx_q.napi); + netif_start_queue(netdev); + + return 0; +} + +/* Bring down the interface/HW */ +void emac_mac_down(struct emac_adapter *adpt) +{ + struct net_device *netdev = adpt->netdev; + + netif_stop_queue(netdev); + napi_disable(&adpt->rx_q.napi); + + phy_stop(adpt->phydev); + phy_disconnect(adpt->phydev); + + /* disable mac irq */ + writel(DIS_INT, adpt->base + EMAC_INT_STATUS); + writel(0, adpt->base + EMAC_INT_MASK); + synchronize_irq(adpt->irq.irq); + free_irq(adpt->irq.irq, &adpt->irq); + + emac_mac_reset(adpt); + + emac_tx_q_descs_free(adpt); + netdev_reset_queue(adpt->netdev); + emac_rx_q_free_descs(adpt); +} + +/* Consume next received packet descriptor */ +static bool emac_rx_process_rrd(struct emac_adapter *adpt, + struct emac_rx_queue *rx_q, + struct emac_rrd *rrd) +{ + u32 *hw_rrd = EMAC_RRD(rx_q, adpt->rrd_size, rx_q->rrd.consume_idx); + + rrd->word[3] = *(hw_rrd + 3); + + if (!RRD_UPDT(rrd)) + return false; + + rrd->word[4] = 0; + rrd->word[5] = 0; + + rrd->word[0] = *(hw_rrd++); + rrd->word[1] = *(hw_rrd++); + rrd->word[2] = *(hw_rrd++); + + if (unlikely(RRD_NOR(rrd) != 1)) { + netdev_err(adpt->netdev, + "error: multi-RFD not support yet! nor:%lu\n", + RRD_NOR(rrd)); + } + + /* mark rrd as processed */ + RRD_UPDT_SET(rrd, 0); + *hw_rrd = rrd->word[3]; + + if (++rx_q->rrd.consume_idx == rx_q->rrd.count) + rx_q->rrd.consume_idx = 0; + + return true; +} + +/* Produce new transmit descriptor */ +static void emac_tx_tpd_create(struct emac_adapter *adpt, + struct emac_tx_queue *tx_q, struct emac_tpd *tpd) +{ + u32 *hw_tpd; + + tx_q->tpd.last_produce_idx = tx_q->tpd.produce_idx; + hw_tpd = EMAC_TPD(tx_q, adpt->tpd_size, tx_q->tpd.produce_idx); + + if (++tx_q->tpd.produce_idx == tx_q->tpd.count) + tx_q->tpd.produce_idx = 0; + + *(hw_tpd++) = tpd->word[0]; + *(hw_tpd++) = tpd->word[1]; + *(hw_tpd++) = tpd->word[2]; + *hw_tpd = tpd->word[3]; +} + +/* Mark the last transmit descriptor as such (for the transmit packet) */ +static void emac_tx_tpd_mark_last(struct emac_adapter *adpt, + struct emac_tx_queue *tx_q) +{ + u32 *hw_tpd = + EMAC_TPD(tx_q, adpt->tpd_size, tx_q->tpd.last_produce_idx); + u32 tmp_tpd; + + tmp_tpd = *(hw_tpd + 1); + tmp_tpd |= EMAC_TPD_LAST_FRAGMENT; + *(hw_tpd + 1) = tmp_tpd; +} + +static void emac_rx_rfd_clean(struct emac_rx_queue *rx_q, struct emac_rrd *rrd) +{ + struct emac_buffer *rfbuf = rx_q->rfd.rfbuff; + u32 consume_idx = RRD_SI(rrd); + unsigned int i; + + for (i = 0; i < RRD_NOR(rrd); i++) { + rfbuf[consume_idx].skb = NULL; + if (++consume_idx == rx_q->rfd.count) + consume_idx = 0; + } + + rx_q->rfd.consume_idx = consume_idx; + rx_q->rfd.process_idx = consume_idx; +} + +/* Push the received skb to upper layers */ +static void emac_receive_skb(struct emac_rx_queue *rx_q, + struct sk_buff *skb, + u16 vlan_tag, bool vlan_flag) +{ + if (vlan_flag) { + u16 vlan; + + EMAC_TAG_TO_VLAN(vlan_tag, vlan); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan); + } + + napi_gro_receive(&rx_q->napi, skb); +} + +/* Process receive event */ +void emac_mac_rx_process(struct emac_adapter *adpt, struct emac_rx_queue *rx_q, + int *num_pkts, int max_pkts) +{ + u32 proc_idx, hw_consume_idx, num_consume_pkts; + struct net_device *netdev = adpt->netdev; + struct emac_buffer *rfbuf; + unsigned int count = 0; + struct emac_rrd rrd; + struct sk_buff *skb; + u32 reg; + + reg = readl_relaxed(adpt->base + rx_q->consume_reg); + + hw_consume_idx = (reg & rx_q->consume_mask) >> rx_q->consume_shift; + num_consume_pkts = (hw_consume_idx >= rx_q->rrd.consume_idx) ? + (hw_consume_idx - rx_q->rrd.consume_idx) : + (hw_consume_idx + rx_q->rrd.count - rx_q->rrd.consume_idx); + + do { + if (!num_consume_pkts) + break; + + if (!emac_rx_process_rrd(adpt, rx_q, &rrd)) + break; + + if (likely(RRD_NOR(&rrd) == 1)) { + /* good receive */ + rfbuf = GET_RFD_BUFFER(rx_q, RRD_SI(&rrd)); + dma_unmap_single(adpt->netdev->dev.parent, + rfbuf->dma_addr, rfbuf->length, + DMA_FROM_DEVICE); + rfbuf->dma_addr = 0; + skb = rfbuf->skb; + } else { + netdev_err(adpt->netdev, + "error: multi-RFD not support yet!\n"); + break; + } + emac_rx_rfd_clean(rx_q, &rrd); + num_consume_pkts--; + count++; + + /* Due to a HW issue in L4 check sum detection (UDP/TCP frags + * with DF set are marked as error), drop packets based on the + * error mask rather than the summary bit (ignoring L4F errors) + */ + if (rrd.word[EMAC_RRD_STATS_DW_IDX] & EMAC_RRD_ERROR) { + netif_dbg(adpt, rx_status, adpt->netdev, + "Drop error packet[RRD: 0x%x:0x%x:0x%x:0x%x]\n", + rrd.word[0], rrd.word[1], + rrd.word[2], rrd.word[3]); + + dev_kfree_skb(skb); + continue; + } + + skb_put(skb, RRD_PKT_SIZE(&rrd) - ETH_FCS_LEN); + skb->dev = netdev; + skb->protocol = eth_type_trans(skb, skb->dev); + if (netdev->features & NETIF_F_RXCSUM) + skb->ip_summed = RRD_L4F(&rrd) ? + CHECKSUM_NONE : CHECKSUM_UNNECESSARY; + else + skb_checksum_none_assert(skb); + + emac_receive_skb(rx_q, skb, (u16)RRD_CVALN_TAG(&rrd), + (bool)RRD_CVTAG(&rrd)); + + netdev->last_rx = jiffies; + (*num_pkts)++; + } while (*num_pkts < max_pkts); + + if (count) { + proc_idx = (rx_q->rfd.process_idx << rx_q->process_shft) & + rx_q->process_mask; + emac_reg_update32(adpt->base + rx_q->process_reg, + rx_q->process_mask, proc_idx); + emac_mac_rx_descs_refill(adpt, rx_q); + } +} + +/* get the number of free transmit descriptors */ +static unsigned int emac_tpd_num_free_descs(struct emac_tx_queue *tx_q) +{ + u32 produce_idx = tx_q->tpd.produce_idx; + u32 consume_idx = tx_q->tpd.consume_idx; + + return (consume_idx > produce_idx) ? + (consume_idx - produce_idx - 1) : + (tx_q->tpd.count + consume_idx - produce_idx - 1); +} + +/* Process transmit event */ +void emac_mac_tx_process(struct emac_adapter *adpt, struct emac_tx_queue *tx_q) +{ + u32 reg = readl_relaxed(adpt->base + tx_q->consume_reg); + u32 hw_consume_idx, pkts_compl = 0, bytes_compl = 0; + struct emac_buffer *tpbuf; + + hw_consume_idx = (reg & tx_q->consume_mask) >> tx_q->consume_shift; + + while (tx_q->tpd.consume_idx != hw_consume_idx) { + tpbuf = GET_TPD_BUFFER(tx_q, tx_q->tpd.consume_idx); + if (tpbuf->dma_addr) { + dma_unmap_single(adpt->netdev->dev.parent, + tpbuf->dma_addr, tpbuf->length, + DMA_TO_DEVICE); + tpbuf->dma_addr = 0; + } + + if (tpbuf->skb) { + pkts_compl++; + bytes_compl += tpbuf->skb->len; + dev_kfree_skb_irq(tpbuf->skb); + tpbuf->skb = NULL; + } + + if (++tx_q->tpd.consume_idx == tx_q->tpd.count) + tx_q->tpd.consume_idx = 0; + } + + netdev_completed_queue(adpt->netdev, pkts_compl, bytes_compl); + + if (netif_queue_stopped(adpt->netdev)) + if (emac_tpd_num_free_descs(tx_q) > (MAX_SKB_FRAGS + 1)) + netif_wake_queue(adpt->netdev); +} + +/* Initialize all queue data structures */ +void emac_mac_rx_tx_ring_init_all(struct platform_device *pdev, + struct emac_adapter *adpt) +{ + adpt->rx_q.netdev = adpt->netdev; + + adpt->rx_q.produce_reg = EMAC_MAILBOX_0; + adpt->rx_q.produce_mask = RFD0_PROD_IDX_BMSK; + adpt->rx_q.produce_shift = RFD0_PROD_IDX_SHFT; + + adpt->rx_q.process_reg = EMAC_MAILBOX_0; + adpt->rx_q.process_mask = RFD0_PROC_IDX_BMSK; + adpt->rx_q.process_shft = RFD0_PROC_IDX_SHFT; + + adpt->rx_q.consume_reg = EMAC_MAILBOX_3; + adpt->rx_q.consume_mask = RFD0_CONS_IDX_BMSK; + adpt->rx_q.consume_shift = RFD0_CONS_IDX_SHFT; + + adpt->rx_q.irq = &adpt->irq; + adpt->rx_q.intr = adpt->irq.mask & ISR_RX_PKT; + + adpt->tx_q.produce_reg = EMAC_MAILBOX_15; + adpt->tx_q.produce_mask = NTPD_PROD_IDX_BMSK; + adpt->tx_q.produce_shift = NTPD_PROD_IDX_SHFT; + + adpt->tx_q.consume_reg = EMAC_MAILBOX_2; + adpt->tx_q.consume_mask = NTPD_CONS_IDX_BMSK; + adpt->tx_q.consume_shift = NTPD_CONS_IDX_SHFT; +} + +/* Fill up transmit descriptors with TSO and Checksum offload information */ +static int emac_tso_csum(struct emac_adapter *adpt, + struct emac_tx_queue *tx_q, + struct sk_buff *skb, + struct emac_tpd *tpd) +{ + unsigned int hdr_len; + int ret; + + if (skb_is_gso(skb)) { + if (skb_header_cloned(skb)) { + ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); + if (unlikely(ret)) + return ret; + } + + if (skb->protocol == htons(ETH_P_IP)) { + u32 pkt_len = ((unsigned char *)ip_hdr(skb) - skb->data) + + ntohs(ip_hdr(skb)->tot_len); + if (skb->len > pkt_len) + pskb_trim(skb, pkt_len); + } + + hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + if (unlikely(skb->len == hdr_len)) { + /* we only need to do csum */ + netif_warn(adpt, tx_err, adpt->netdev, + "tso not needed for packet with 0 data\n"); + goto do_csum; + } + + if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) { + ip_hdr(skb)->check = 0; + tcp_hdr(skb)->check = + ~csum_tcpudp_magic(ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, + 0, IPPROTO_TCP, 0); + TPD_IPV4_SET(tpd, 1); + } + + if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) { + /* ipv6 tso need an extra tpd */ + struct emac_tpd extra_tpd; + + memset(tpd, 0, sizeof(*tpd)); + memset(&extra_tpd, 0, sizeof(extra_tpd)); + + ipv6_hdr(skb)->payload_len = 0; + tcp_hdr(skb)->check = + ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr, + 0, IPPROTO_TCP, 0); + TPD_PKT_LEN_SET(&extra_tpd, skb->len); + TPD_LSO_SET(&extra_tpd, 1); + TPD_LSOV_SET(&extra_tpd, 1); + emac_tx_tpd_create(adpt, tx_q, &extra_tpd); + TPD_LSOV_SET(tpd, 1); + } + + TPD_LSO_SET(tpd, 1); + TPD_TCPHDR_OFFSET_SET(tpd, skb_transport_offset(skb)); + TPD_MSS_SET(tpd, skb_shinfo(skb)->gso_size); + return 0; + } + +do_csum: + if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { + unsigned int css, cso; + + cso = skb_transport_offset(skb); + if (unlikely(cso & 0x1)) { + netdev_err(adpt->netdev, + "error: payload offset should be even\n"); + return -EINVAL; + } + css = cso + skb->csum_offset; + + TPD_PAYLOAD_OFFSET_SET(tpd, cso >> 1); + TPD_CXSUM_OFFSET_SET(tpd, css >> 1); + TPD_CSX_SET(tpd, 1); + } + + return 0; +} + +/* Fill up transmit descriptors */ +static void emac_tx_fill_tpd(struct emac_adapter *adpt, + struct emac_tx_queue *tx_q, struct sk_buff *skb, + struct emac_tpd *tpd) +{ + unsigned int nr_frags = skb_shinfo(skb)->nr_frags; + unsigned int first = tx_q->tpd.produce_idx; + unsigned int len = skb_headlen(skb); + struct emac_buffer *tpbuf = NULL; + unsigned int mapped_len = 0; + unsigned int i; + int count = 0; + int ret; + + /* if Large Segment Offload is (in TCP Segmentation Offload struct) */ + if (TPD_LSO(tpd)) { + mapped_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + + tpbuf = GET_TPD_BUFFER(tx_q, tx_q->tpd.produce_idx); + tpbuf->length = mapped_len; + tpbuf->dma_addr = dma_map_single(adpt->netdev->dev.parent, + skb->data, tpbuf->length, + DMA_TO_DEVICE); + ret = dma_mapping_error(adpt->netdev->dev.parent, + tpbuf->dma_addr); + if (ret) + goto error; + + TPD_BUFFER_ADDR_L_SET(tpd, lower_32_bits(tpbuf->dma_addr)); + TPD_BUFFER_ADDR_H_SET(tpd, upper_32_bits(tpbuf->dma_addr)); + TPD_BUF_LEN_SET(tpd, tpbuf->length); + emac_tx_tpd_create(adpt, tx_q, tpd); + count++; + } + + if (mapped_len < len) { + tpbuf = GET_TPD_BUFFER(tx_q, tx_q->tpd.produce_idx); + tpbuf->length = len - mapped_len; + tpbuf->dma_addr = dma_map_single(adpt->netdev->dev.parent, + skb->data + mapped_len, + tpbuf->length, DMA_TO_DEVICE); + ret = dma_mapping_error(adpt->netdev->dev.parent, + tpbuf->dma_addr); + if (ret) + goto error; + + TPD_BUFFER_ADDR_L_SET(tpd, lower_32_bits(tpbuf->dma_addr)); + TPD_BUFFER_ADDR_H_SET(tpd, upper_32_bits(tpbuf->dma_addr)); + TPD_BUF_LEN_SET(tpd, tpbuf->length); + emac_tx_tpd_create(adpt, tx_q, tpd); + count++; + } + + for (i = 0; i < nr_frags; i++) { + struct skb_frag_struct *frag; + + frag = &skb_shinfo(skb)->frags[i]; + + tpbuf = GET_TPD_BUFFER(tx_q, tx_q->tpd.produce_idx); + tpbuf->length = frag->size; + tpbuf->dma_addr = dma_map_page(adpt->netdev->dev.parent, + frag->page.p, frag->page_offset, + tpbuf->length, DMA_TO_DEVICE); + ret = dma_mapping_error(adpt->netdev->dev.parent, + tpbuf->dma_addr); + if (ret) + goto error; + + TPD_BUFFER_ADDR_L_SET(tpd, lower_32_bits(tpbuf->dma_addr)); + TPD_BUFFER_ADDR_H_SET(tpd, upper_32_bits(tpbuf->dma_addr)); + TPD_BUF_LEN_SET(tpd, tpbuf->length); + emac_tx_tpd_create(adpt, tx_q, tpd); + count++; + } + + /* The last tpd */ + wmb(); + emac_tx_tpd_mark_last(adpt, tx_q); + + /* The last buffer info contain the skb address, + * so it will be freed after unmap + */ + tpbuf->skb = skb; + + return; + +error: + /* One of the memory mappings failed, so undo everything */ + tx_q->tpd.produce_idx = first; + + while (count--) { + tpbuf = GET_TPD_BUFFER(tx_q, first); + dma_unmap_page(adpt->netdev->dev.parent, tpbuf->dma_addr, + tpbuf->length, DMA_TO_DEVICE); + tpbuf->dma_addr = 0; + tpbuf->length = 0; + + if (++first == tx_q->tpd.count) + first = 0; + } + + dev_kfree_skb(skb); +} + +/* Transmit the packet using specified transmit queue */ +int emac_mac_tx_buf_send(struct emac_adapter *adpt, struct emac_tx_queue *tx_q, + struct sk_buff *skb) +{ + struct emac_tpd tpd; + u32 prod_idx; + + memset(&tpd, 0, sizeof(tpd)); + + if (emac_tso_csum(adpt, tx_q, skb, &tpd) != 0) { + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } + + if (skb_vlan_tag_present(skb)) { + u16 tag; + + EMAC_VLAN_TO_TAG(skb_vlan_tag_get(skb), tag); + TPD_CVLAN_TAG_SET(&tpd, tag); + TPD_INSTC_SET(&tpd, 1); + } + + if (skb_network_offset(skb) != ETH_HLEN) + TPD_TYP_SET(&tpd, 1); + + emac_tx_fill_tpd(adpt, tx_q, skb, &tpd); + + netdev_sent_queue(adpt->netdev, skb->len); + + /* Make sure the are enough free descriptors to hold one + * maximum-sized SKB. We need one desc for each fragment, + * one for the checksum (emac_tso_csum), one for TSO, and + * and one for the SKB header. + */ + if (emac_tpd_num_free_descs(tx_q) < (MAX_SKB_FRAGS + 3)) + netif_stop_queue(adpt->netdev); + + /* update produce idx */ + prod_idx = (tx_q->tpd.produce_idx << tx_q->produce_shift) & + tx_q->produce_mask; + emac_reg_update32(adpt->base + tx_q->produce_reg, + tx_q->produce_mask, prod_idx); + + return NETDEV_TX_OK; +} diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.h b/drivers/net/ethernet/qualcomm/emac/emac-mac.h new file mode 100644 index 000000000000..f3aa24dc4a29 --- /dev/null +++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.h @@ -0,0 +1,248 @@ +/* Copyright (c) 2013-2016, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +/* EMAC DMA HW engine uses three rings: + * Tx: + * TPD: Transmit Packet Descriptor ring. + * Rx: + * RFD: Receive Free Descriptor ring. + * Ring of descriptors with empty buffers to be filled by Rx HW. + * RRD: Receive Return Descriptor ring. + * Ring of descriptors with buffers filled with received data. + */ + +#ifndef _EMAC_HW_H_ +#define _EMAC_HW_H_ + +/* EMAC_CSR register offsets */ +#define EMAC_EMAC_WRAPPER_CSR1 0x000000 +#define EMAC_EMAC_WRAPPER_CSR2 0x000004 +#define EMAC_EMAC_WRAPPER_TX_TS_LO 0x000104 +#define EMAC_EMAC_WRAPPER_TX_TS_HI 0x000108 +#define EMAC_EMAC_WRAPPER_TX_TS_INX 0x00010c + +/* DMA Order Settings */ +enum emac_dma_order { + emac_dma_ord_in = 1, + emac_dma_ord_enh = 2, + emac_dma_ord_out = 4 +}; + +enum emac_dma_req_block { + emac_dma_req_128 = 0, + emac_dma_req_256 = 1, + emac_dma_req_512 = 2, + emac_dma_req_1024 = 3, + emac_dma_req_2048 = 4, + emac_dma_req_4096 = 5 +}; + +/* Returns the value of bits idx...idx+n_bits */ +#define BITS_GET(val, lo, hi) ((le32_to_cpu(val) & GENMASK((hi), (lo))) >> lo) +#define BITS_SET(val, lo, hi, new_val) \ + val = cpu_to_le32((le32_to_cpu(val) & (~GENMASK((hi), (lo)))) | \ + (((new_val) << (lo)) & GENMASK((hi), (lo)))) + +/* RRD (Receive Return Descriptor) */ +struct emac_rrd { + u32 word[6]; + +/* number of RFD */ +#define RRD_NOR(rrd) BITS_GET((rrd)->word[0], 16, 19) +/* start consumer index of rfd-ring */ +#define RRD_SI(rrd) BITS_GET((rrd)->word[0], 20, 31) +/* vlan-tag (CVID, CFI and PRI) */ +#define RRD_CVALN_TAG(rrd) BITS_GET((rrd)->word[2], 0, 15) +/* length of the packet */ +#define RRD_PKT_SIZE(rrd) BITS_GET((rrd)->word[3], 0, 13) +/* L4(TCP/UDP) checksum failed */ +#define RRD_L4F(rrd) BITS_GET((rrd)->word[3], 14, 14) +/* vlan tagged */ +#define RRD_CVTAG(rrd) BITS_GET((rrd)->word[3], 16, 16) +/* When set, indicates that the descriptor is updated by the IP core. + * When cleared, indicates that the descriptor is invalid. + */ +#define RRD_UPDT(rrd) BITS_GET((rrd)->word[3], 31, 31) +#define RRD_UPDT_SET(rrd, val) BITS_SET((rrd)->word[3], 31, 31, val) +/* timestamp low */ +#define RRD_TS_LOW(rrd) BITS_GET((rrd)->word[4], 0, 29) +/* timestamp high */ +#define RRD_TS_HI(rrd) le32_to_cpu((rrd)->word[5]) +}; + +/* TPD (Transmit Packet Descriptor) */ +struct emac_tpd { + u32 word[4]; + +/* Number of bytes of the transmit packet. (include 4-byte CRC) */ +#define TPD_BUF_LEN_SET(tpd, val) BITS_SET((tpd)->word[0], 0, 15, val) +/* Custom Checksum Offload: When set, ask IP core to offload custom checksum */ +#define TPD_CSX_SET(tpd, val) BITS_SET((tpd)->word[1], 8, 8, val) +/* TCP Large Send Offload: When set, ask IP core to do offload TCP Large Send */ +#define TPD_LSO(tpd) BITS_GET((tpd)->word[1], 12, 12) +#define TPD_LSO_SET(tpd, val) BITS_SET((tpd)->word[1], 12, 12, val) +/* Large Send Offload Version: When set, indicates this is an LSOv2 + * (for both IPv4 and IPv6). When cleared, indicates this is an LSOv1 + * (only for IPv4). + */ +#define TPD_LSOV_SET(tpd, val) BITS_SET((tpd)->word[1], 13, 13, val) +/* IPv4 packet: When set, indicates this is an IPv4 packet, this bit is only + * for LSOV2 format. + */ +#define TPD_IPV4_SET(tpd, val) BITS_SET((tpd)->word[1], 16, 16, val) +/* 0: Ethernet frame (DA+SA+TYPE+DATA+CRC) + * 1: IEEE 802.3 frame (DA+SA+LEN+DSAP+SSAP+CTL+ORG+TYPE+DATA+CRC) + */ +#define TPD_TYP_SET(tpd, val) BITS_SET((tpd)->word[1], 17, 17, val) +/* Low-32bit Buffer Address */ +#define TPD_BUFFER_ADDR_L_SET(tpd, val) ((tpd)->word[2] = cpu_to_le32(val)) +/* CVLAN Tag to be inserted if INS_VLAN_TAG is set, CVLAN TPID based on global + * register configuration. + */ +#define TPD_CVLAN_TAG_SET(tpd, val) BITS_SET((tpd)->word[3], 0, 15, val) +/* Insert CVlan Tag: When set, ask MAC to insert CVLAN TAG to outgoing packet + */ +#define TPD_INSTC_SET(tpd, val) BITS_SET((tpd)->word[3], 17, 17, val) +/* High-14bit Buffer Address, So, the 64b-bit address is + * {DESC_CTRL_11_TX_DATA_HIADDR[17:0],(register) BUFFER_ADDR_H, BUFFER_ADDR_L} + */ +#define TPD_BUFFER_ADDR_H_SET(tpd, val) BITS_SET((tpd)->word[3], 18, 30, val) +/* Format D. Word offset from the 1st byte of this packet to start to calculate + * the custom checksum. + */ +#define TPD_PAYLOAD_OFFSET_SET(tpd, val) BITS_SET((tpd)->word[1], 0, 7, val) +/* Format D. Word offset from the 1st byte of this packet to fill the custom + * checksum to + */ +#define TPD_CXSUM_OFFSET_SET(tpd, val) BITS_SET((tpd)->word[1], 18, 25, val) + +/* Format C. TCP Header offset from the 1st byte of this packet. (byte unit) */ +#define TPD_TCPHDR_OFFSET_SET(tpd, val) BITS_SET((tpd)->word[1], 0, 7, val) +/* Format C. MSS (Maximum Segment Size) got from the protocol layer. (byte unit) + */ +#define TPD_MSS_SET(tpd, val) BITS_SET((tpd)->word[1], 18, 30, val) +/* packet length in ext tpd */ +#define TPD_PKT_LEN_SET(tpd, val) ((tpd)->word[2] = cpu_to_le32(val)) +}; + +/* emac_ring_header represents a single, contiguous block of DMA space + * mapped for the three descriptor rings (tpd, rfd, rrd) + */ +struct emac_ring_header { + void *v_addr; /* virtual address */ + dma_addr_t dma_addr; /* dma address */ + size_t size; /* length in bytes */ + size_t used; +}; + +/* emac_buffer is wrapper around a pointer to a socket buffer + * so a DMA handle can be stored along with the skb + */ +struct emac_buffer { + struct sk_buff *skb; /* socket buffer */ + u16 length; /* rx buffer length */ + dma_addr_t dma_addr; /* dma address */ +}; + +/* receive free descriptor (rfd) ring */ +struct emac_rfd_ring { + struct emac_buffer *rfbuff; + u32 *v_addr; /* virtual address */ + dma_addr_t dma_addr; /* dma address */ + size_t size; /* length in bytes */ + unsigned int count; /* number of desc in the ring */ + unsigned int produce_idx; + unsigned int process_idx; + unsigned int consume_idx; /* unused */ +}; + +/* Receive Return Desciptor (RRD) ring */ +struct emac_rrd_ring { + u32 *v_addr; /* virtual address */ + dma_addr_t dma_addr; /* physical address */ + size_t size; /* length in bytes */ + unsigned int count; /* number of desc in the ring */ + unsigned int produce_idx; /* unused */ + unsigned int consume_idx; +}; + +/* Rx queue */ +struct emac_rx_queue { + struct net_device *netdev; /* netdev ring belongs to */ + struct emac_rrd_ring rrd; + struct emac_rfd_ring rfd; + struct napi_struct napi; + struct emac_irq *irq; + + u32 intr; + u32 produce_mask; + u32 process_mask; + u32 consume_mask; + + u16 produce_reg; + u16 process_reg; + u16 consume_reg; + + u8 produce_shift; + u8 process_shft; + u8 consume_shift; +}; + +/* Transimit Packet Descriptor (tpd) ring */ +struct emac_tpd_ring { + struct emac_buffer *tpbuff; + u32 *v_addr; /* virtual address */ + dma_addr_t dma_addr; /* dma address */ + + size_t size; /* length in bytes */ + unsigned int count; /* number of desc in the ring */ + unsigned int produce_idx; + unsigned int consume_idx; + unsigned int last_produce_idx; +}; + +/* Tx queue */ +struct emac_tx_queue { + struct emac_tpd_ring tpd; + + u32 produce_mask; + u32 consume_mask; + + u16 max_packets; /* max packets per interrupt */ + u16 produce_reg; + u16 consume_reg; + + u8 produce_shift; + u8 consume_shift; +}; + +struct emac_adapter; + +int emac_mac_up(struct emac_adapter *adpt); +void emac_mac_down(struct emac_adapter *adpt); +void emac_mac_reset(struct emac_adapter *adpt); +void emac_mac_start(struct emac_adapter *adpt); +void emac_mac_stop(struct emac_adapter *adpt); +void emac_mac_mode_config(struct emac_adapter *adpt); +void emac_mac_rx_process(struct emac_adapter *adpt, struct emac_rx_queue *rx_q, + int *num_pkts, int max_pkts); +int emac_mac_tx_buf_send(struct emac_adapter *adpt, struct emac_tx_queue *tx_q, + struct sk_buff *skb); +void emac_mac_tx_process(struct emac_adapter *adpt, struct emac_tx_queue *tx_q); +void emac_mac_rx_tx_ring_init_all(struct platform_device *pdev, + struct emac_adapter *adpt); +int emac_mac_rx_tx_rings_alloc_all(struct emac_adapter *adpt); +void emac_mac_rx_tx_rings_free_all(struct emac_adapter *adpt); +void emac_mac_multicast_addr_clear(struct emac_adapter *adpt); +void emac_mac_multicast_addr_set(struct emac_adapter *adpt, u8 *addr); + +#endif /*_EMAC_HW_H_*/ diff --git a/drivers/net/ethernet/qualcomm/emac/emac-phy.c b/drivers/net/ethernet/qualcomm/emac/emac-phy.c new file mode 100644 index 000000000000..c412ba9a27e7 --- /dev/null +++ b/drivers/net/ethernet/qualcomm/emac/emac-phy.c @@ -0,0 +1,204 @@ +/* Copyright (c) 2013-2016, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +/* Qualcomm Technologies, Inc. EMAC PHY Controller driver. + */ + +#include +#include +#include +#include +#include +#include +#include "emac.h" +#include "emac-mac.h" +#include "emac-phy.h" +#include "emac-sgmii.h" + +/* EMAC base register offsets */ +#define EMAC_MDIO_CTRL 0x001414 +#define EMAC_PHY_STS 0x001418 +#define EMAC_MDIO_EX_CTRL 0x001440 + +/* EMAC_MDIO_CTRL */ +#define MDIO_MODE BIT(30) +#define MDIO_PR BIT(29) +#define MDIO_AP_EN BIT(28) +#define MDIO_BUSY BIT(27) +#define MDIO_CLK_SEL_BMSK 0x7000000 +#define MDIO_CLK_SEL_SHFT 24 +#define MDIO_START BIT(23) +#define SUP_PREAMBLE BIT(22) +#define MDIO_RD_NWR BIT(21) +#define MDIO_REG_ADDR_BMSK 0x1f0000 +#define MDIO_REG_ADDR_SHFT 16 +#define MDIO_DATA_BMSK 0xffff +#define MDIO_DATA_SHFT 0 + +/* EMAC_PHY_STS */ +#define PHY_ADDR_BMSK 0x1f0000 +#define PHY_ADDR_SHFT 16 + +#define MDIO_CLK_25_4 0 +#define MDIO_CLK_25_28 7 + +#define MDIO_WAIT_TIMES 1000 + +#define EMAC_LINK_SPEED_DEFAULT (\ + EMAC_LINK_SPEED_10_HALF |\ + EMAC_LINK_SPEED_10_FULL |\ + EMAC_LINK_SPEED_100_HALF |\ + EMAC_LINK_SPEED_100_FULL |\ + EMAC_LINK_SPEED_1GB_FULL) + +/** + * emac_phy_mdio_autopoll_disable() - disable mdio autopoll + * @adpt: the emac adapter + * + * The autopoll feature takes over the MDIO bus. In order for + * the PHY driver to be able to talk to the PHY over the MDIO + * bus, we need to temporarily disable the autopoll feature. + */ +static int emac_phy_mdio_autopoll_disable(struct emac_adapter *adpt) +{ + u32 val; + + /* disable autopoll */ + emac_reg_update32(adpt->base + EMAC_MDIO_CTRL, MDIO_AP_EN, 0); + + /* wait for any mdio polling to complete */ + if (!readl_poll_timeout(adpt->base + EMAC_MDIO_CTRL, val, + !(val & MDIO_BUSY), 100, MDIO_WAIT_TIMES * 100)) + return 0; + + /* failed to disable; ensure it is enabled before returning */ + emac_reg_update32(adpt->base + EMAC_MDIO_CTRL, 0, MDIO_AP_EN); + + return -EBUSY; +} + +/** + * emac_phy_mdio_autopoll_disable() - disable mdio autopoll + * @adpt: the emac adapter + * + * The EMAC has the ability to poll the external PHY on the MDIO + * bus for link state changes. This eliminates the need for the + * driver to poll the phy. If if the link state does change, + * the EMAC issues an interrupt on behalf of the PHY. + */ +static void emac_phy_mdio_autopoll_enable(struct emac_adapter *adpt) +{ + emac_reg_update32(adpt->base + EMAC_MDIO_CTRL, 0, MDIO_AP_EN); +} + +static int emac_mdio_read(struct mii_bus *bus, int addr, int regnum) +{ + struct emac_adapter *adpt = bus->priv; + u32 reg; + int ret; + + ret = emac_phy_mdio_autopoll_disable(adpt); + if (ret) + return ret; + + emac_reg_update32(adpt->base + EMAC_PHY_STS, PHY_ADDR_BMSK, + (addr << PHY_ADDR_SHFT)); + + reg = SUP_PREAMBLE | + ((MDIO_CLK_25_4 << MDIO_CLK_SEL_SHFT) & MDIO_CLK_SEL_BMSK) | + ((regnum << MDIO_REG_ADDR_SHFT) & MDIO_REG_ADDR_BMSK) | + MDIO_START | MDIO_RD_NWR; + + writel(reg, adpt->base + EMAC_MDIO_CTRL); + + if (readl_poll_timeout(adpt->base + EMAC_MDIO_CTRL, reg, + !(reg & (MDIO_START | MDIO_BUSY)), + 100, MDIO_WAIT_TIMES * 100)) + ret = -EIO; + else + ret = (reg >> MDIO_DATA_SHFT) & MDIO_DATA_BMSK; + + emac_phy_mdio_autopoll_enable(adpt); + + return ret; +} + +static int emac_mdio_write(struct mii_bus *bus, int addr, int regnum, u16 val) +{ + struct emac_adapter *adpt = bus->priv; + u32 reg; + int ret; + + ret = emac_phy_mdio_autopoll_disable(adpt); + if (ret) + return ret; + + emac_reg_update32(adpt->base + EMAC_PHY_STS, PHY_ADDR_BMSK, + (addr << PHY_ADDR_SHFT)); + + reg = SUP_PREAMBLE | + ((MDIO_CLK_25_4 << MDIO_CLK_SEL_SHFT) & MDIO_CLK_SEL_BMSK) | + ((regnum << MDIO_REG_ADDR_SHFT) & MDIO_REG_ADDR_BMSK) | + ((val << MDIO_DATA_SHFT) & MDIO_DATA_BMSK) | + MDIO_START; + + writel(reg, adpt->base + EMAC_MDIO_CTRL); + + if (readl_poll_timeout(adpt->base + EMAC_MDIO_CTRL, reg, + !(reg & (MDIO_START | MDIO_BUSY)), 100, + MDIO_WAIT_TIMES * 100)) + ret = -EIO; + + emac_phy_mdio_autopoll_enable(adpt); + + return ret; +} + +/* Configure the MDIO bus and connect the external PHY */ +int emac_phy_config(struct platform_device *pdev, struct emac_adapter *adpt) +{ + struct device_node *np = pdev->dev.of_node; + struct device_node *phy_np; + struct mii_bus *mii_bus; + int ret; + + /* Create the mii_bus object for talking to the MDIO bus */ + adpt->mii_bus = mii_bus = devm_mdiobus_alloc(&pdev->dev); + if (!mii_bus) + return -ENOMEM; + + mii_bus->name = "emac-mdio"; + snprintf(mii_bus->id, MII_BUS_ID_SIZE, "%s", pdev->name); + mii_bus->read = emac_mdio_read; + mii_bus->write = emac_mdio_write; + mii_bus->parent = &pdev->dev; + mii_bus->priv = adpt; + + ret = of_mdiobus_register(mii_bus, np); + if (ret) { + dev_err(&pdev->dev, "could not register mdio bus\n"); + return ret; + } + + phy_np = of_parse_phandle(np, "phy-handle", 0); + adpt->phydev = of_phy_find_device(phy_np); + if (!adpt->phydev) { + dev_err(&pdev->dev, "could not find external phy\n"); + mdiobus_unregister(mii_bus); + return -ENODEV; + } + + if (adpt->phydev->drv) + phy_attached_print(adpt->phydev, NULL); + + return 0; +} diff --git a/drivers/net/ethernet/qualcomm/emac/emac-phy.h b/drivers/net/ethernet/qualcomm/emac/emac-phy.h new file mode 100644 index 000000000000..49f3701a6dd7 --- /dev/null +++ b/drivers/net/ethernet/qualcomm/emac/emac-phy.h @@ -0,0 +1,33 @@ +/* Copyright (c) 2015-2016, The Linux Foundation. All rights reserved. +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License version 2 and +* only version 2 as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +*/ + +#ifndef _EMAC_PHY_H_ +#define _EMAC_PHY_H_ + +typedef int (*emac_sgmii_initialize)(struct emac_adapter *adpt); + +/** emac_phy - internal emac phy + * @base base address + * @digital per-lane digital block + * @initialize initialization function + */ +struct emac_phy { + void __iomem *base; + void __iomem *digital; + emac_sgmii_initialize initialize; +}; + +struct emac_adapter; + +int emac_phy_config(struct platform_device *pdev, struct emac_adapter *adpt); + +#endif /* _EMAC_PHY_H_ */ diff --git a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c new file mode 100644 index 000000000000..6ab0a3c96431 --- /dev/null +++ b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c @@ -0,0 +1,721 @@ +/* Copyright (c) 2015-2016, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +/* Qualcomm Technologies, Inc. EMAC SGMII Controller driver. + */ + +#include +#include +#include "emac.h" +#include "emac-mac.h" +#include "emac-sgmii.h" + +/* EMAC_QSERDES register offsets */ +#define EMAC_QSERDES_COM_SYS_CLK_CTRL 0x000000 +#define EMAC_QSERDES_COM_PLL_CNTRL 0x000014 +#define EMAC_QSERDES_COM_PLL_IP_SETI 0x000018 +#define EMAC_QSERDES_COM_PLL_CP_SETI 0x000024 +#define EMAC_QSERDES_COM_PLL_IP_SETP 0x000028 +#define EMAC_QSERDES_COM_PLL_CP_SETP 0x00002c +#define EMAC_QSERDES_COM_SYSCLK_EN_SEL 0x000038 +#define EMAC_QSERDES_COM_RESETSM_CNTRL 0x000040 +#define EMAC_QSERDES_COM_PLLLOCK_CMP1 0x000044 +#define EMAC_QSERDES_COM_PLLLOCK_CMP2 0x000048 +#define EMAC_QSERDES_COM_PLLLOCK_CMP3 0x00004c +#define EMAC_QSERDES_COM_PLLLOCK_CMP_EN 0x000050 +#define EMAC_QSERDES_COM_DEC_START1 0x000064 +#define EMAC_QSERDES_COM_DIV_FRAC_START1 0x000098 +#define EMAC_QSERDES_COM_DIV_FRAC_START2 0x00009c +#define EMAC_QSERDES_COM_DIV_FRAC_START3 0x0000a0 +#define EMAC_QSERDES_COM_DEC_START2 0x0000a4 +#define EMAC_QSERDES_COM_PLL_CRCTRL 0x0000ac +#define EMAC_QSERDES_COM_RESET_SM 0x0000bc +#define EMAC_QSERDES_TX_BIST_MODE_LANENO 0x000100 +#define EMAC_QSERDES_TX_TX_EMP_POST1_LVL 0x000108 +#define EMAC_QSERDES_TX_TX_DRV_LVL 0x00010c +#define EMAC_QSERDES_TX_LANE_MODE 0x000150 +#define EMAC_QSERDES_TX_TRAN_DRVR_EMP_EN 0x000170 +#define EMAC_QSERDES_RX_CDR_CONTROL 0x000200 +#define EMAC_QSERDES_RX_CDR_CONTROL2 0x000210 +#define EMAC_QSERDES_RX_RX_EQ_GAIN12 0x000230 + +/* EMAC_SGMII register offsets */ +#define EMAC_SGMII_PHY_SERDES_START 0x000000 +#define EMAC_SGMII_PHY_CMN_PWR_CTRL 0x000004 +#define EMAC_SGMII_PHY_RX_PWR_CTRL 0x000008 +#define EMAC_SGMII_PHY_TX_PWR_CTRL 0x00000C +#define EMAC_SGMII_PHY_LANE_CTRL1 0x000018 +#define EMAC_SGMII_PHY_AUTONEG_CFG2 0x000048 +#define EMAC_SGMII_PHY_CDR_CTRL0 0x000058 +#define EMAC_SGMII_PHY_SPEED_CFG1 0x000074 +#define EMAC_SGMII_PHY_POW_DWN_CTRL0 0x000080 +#define EMAC_SGMII_PHY_RESET_CTRL 0x0000a8 +#define EMAC_SGMII_PHY_IRQ_CMD 0x0000ac +#define EMAC_SGMII_PHY_INTERRUPT_CLEAR 0x0000b0 +#define EMAC_SGMII_PHY_INTERRUPT_MASK 0x0000b4 +#define EMAC_SGMII_PHY_INTERRUPT_STATUS 0x0000b8 +#define EMAC_SGMII_PHY_RX_CHK_STATUS 0x0000d4 +#define EMAC_SGMII_PHY_AUTONEG0_STATUS 0x0000e0 +#define EMAC_SGMII_PHY_AUTONEG1_STATUS 0x0000e4 + +/* EMAC_QSERDES_COM_PLL_IP_SETI */ +#define PLL_IPSETI(x) ((x) & 0x3f) + +/* EMAC_QSERDES_COM_PLL_CP_SETI */ +#define PLL_CPSETI(x) ((x) & 0xff) + +/* EMAC_QSERDES_COM_PLL_IP_SETP */ +#define PLL_IPSETP(x) ((x) & 0x3f) + +/* EMAC_QSERDES_COM_PLL_CP_SETP */ +#define PLL_CPSETP(x) ((x) & 0x1f) + +/* EMAC_QSERDES_COM_PLL_CRCTRL */ +#define PLL_RCTRL(x) (((x) & 0xf) << 4) +#define PLL_CCTRL(x) ((x) & 0xf) + +/* SGMII v2 PHY registers per lane */ +#define EMAC_SGMII_PHY_LN_OFFSET 0x0400 + +/* SGMII v2 digital lane registers */ +#define EMAC_SGMII_LN_DRVR_CTRL0 0x00C +#define EMAC_SGMII_LN_DRVR_TAP_EN 0x018 +#define EMAC_SGMII_LN_TX_MARGINING 0x01C +#define EMAC_SGMII_LN_TX_PRE 0x020 +#define EMAC_SGMII_LN_TX_POST 0x024 +#define EMAC_SGMII_LN_TX_BAND_MODE 0x060 +#define EMAC_SGMII_LN_LANE_MODE 0x064 +#define EMAC_SGMII_LN_PARALLEL_RATE 0x078 +#define EMAC_SGMII_LN_CML_CTRL_MODE0 0x0B8 +#define EMAC_SGMII_LN_MIXER_CTRL_MODE0 0x0D0 +#define EMAC_SGMII_LN_VGA_INITVAL 0x134 +#define EMAC_SGMII_LN_UCDR_FO_GAIN_MODE0 0x17C +#define EMAC_SGMII_LN_UCDR_SO_GAIN_MODE0 0x188 +#define EMAC_SGMII_LN_UCDR_SO_CONFIG 0x194 +#define EMAC_SGMII_LN_RX_BAND 0x19C +#define EMAC_SGMII_LN_RX_RCVR_PATH1_MODE0 0x1B8 +#define EMAC_SGMII_LN_RSM_CONFIG 0x1F0 +#define EMAC_SGMII_LN_SIGDET_ENABLES 0x224 +#define EMAC_SGMII_LN_SIGDET_CNTRL 0x228 +#define EMAC_SGMII_LN_SIGDET_DEGLITCH_CNTRL 0x22C +#define EMAC_SGMII_LN_RX_EN_SIGNAL 0x2A0 +#define EMAC_SGMII_LN_RX_MISC_CNTRL0 0x2AC +#define EMAC_SGMII_LN_DRVR_LOGIC_CLKDIV 0x2BC + +/* SGMII v2 digital lane register values */ +#define UCDR_STEP_BY_TWO_MODE0 BIT(7) +#define UCDR_xO_GAIN_MODE(x) ((x) & 0x7f) +#define UCDR_ENABLE BIT(6) +#define UCDR_SO_SATURATION(x) ((x) & 0x3f) +#define SIGDET_LP_BYP_PS4 BIT(7) +#define SIGDET_EN_PS0_TO_PS2 BIT(6) +#define EN_ACCOUPLEVCM_SW_MUX BIT(5) +#define EN_ACCOUPLEVCM_SW BIT(4) +#define RX_SYNC_EN BIT(3) +#define RXTERM_HIGHZ_PS5 BIT(2) +#define SIGDET_EN_PS3 BIT(1) +#define EN_ACCOUPLE_VCM_PS3 BIT(0) +#define UFS_MODE BIT(5) +#define TXVAL_VALID_INIT BIT(4) +#define TXVAL_VALID_MUX BIT(3) +#define TXVAL_VALID BIT(2) +#define USB3P1_MODE BIT(1) +#define KR_PCIGEN3_MODE BIT(0) +#define PRE_EN BIT(3) +#define POST_EN BIT(2) +#define MAIN_EN_MUX BIT(1) +#define MAIN_EN BIT(0) +#define TX_MARGINING_MUX BIT(6) +#define TX_MARGINING(x) ((x) & 0x3f) +#define TX_PRE_MUX BIT(6) +#define TX_PRE(x) ((x) & 0x3f) +#define TX_POST_MUX BIT(6) +#define TX_POST(x) ((x) & 0x3f) +#define CML_GEAR_MODE(x) (((x) & 7) << 3) +#define CML2CMOS_IBOOST_MODE(x) ((x) & 7) +#define MIXER_LOADB_MODE(x) (((x) & 0xf) << 2) +#define MIXER_DATARATE_MODE(x) ((x) & 3) +#define VGA_THRESH_DFE(x) ((x) & 0x3f) +#define SIGDET_LP_BYP_PS0_TO_PS2 BIT(5) +#define SIGDET_LP_BYP_MUX BIT(4) +#define SIGDET_LP_BYP BIT(3) +#define SIGDET_EN_MUX BIT(2) +#define SIGDET_EN BIT(1) +#define SIGDET_FLT_BYP BIT(0) +#define SIGDET_LVL(x) (((x) & 0xf) << 4) +#define SIGDET_BW_CTRL(x) ((x) & 0xf) +#define SIGDET_DEGLITCH_CTRL(x) (((x) & 0xf) << 1) +#define SIGDET_DEGLITCH_BYP BIT(0) +#define INVERT_PCS_RX_CLK BIT(7) +#define PWM_EN BIT(6) +#define RXBIAS_SEL(x) (((x) & 0x3) << 4) +#define EBDAC_SIGN BIT(3) +#define EDAC_SIGN BIT(2) +#define EN_AUXTAP1SIGN_INVERT BIT(1) +#define EN_DAC_CHOPPING BIT(0) +#define DRVR_LOGIC_CLK_EN BIT(4) +#define DRVR_LOGIC_CLK_DIV(x) ((x) & 0xf) +#define PARALLEL_RATE_MODE2(x) (((x) & 0x3) << 4) +#define PARALLEL_RATE_MODE1(x) (((x) & 0x3) << 2) +#define PARALLEL_RATE_MODE0(x) ((x) & 0x3) +#define BAND_MODE2(x) (((x) & 0x3) << 4) +#define BAND_MODE1(x) (((x) & 0x3) << 2) +#define BAND_MODE0(x) ((x) & 0x3) +#define LANE_SYNC_MODE BIT(5) +#define LANE_MODE(x) ((x) & 0x1f) +#define CDR_PD_SEL_MODE0(x) (((x) & 0x3) << 5) +#define EN_DLL_MODE0 BIT(4) +#define EN_IQ_DCC_MODE0 BIT(3) +#define EN_IQCAL_MODE0 BIT(2) +#define EN_QPATH_MODE0 BIT(1) +#define EN_EPATH_MODE0 BIT(0) +#define FORCE_TSYNC_ACK BIT(7) +#define FORCE_CMN_ACK BIT(6) +#define FORCE_CMN_READY BIT(5) +#define EN_RCLK_DEGLITCH BIT(4) +#define BYPASS_RSM_CDR_RESET BIT(3) +#define BYPASS_RSM_TSYNC BIT(2) +#define BYPASS_RSM_SAMP_CAL BIT(1) +#define BYPASS_RSM_DLL_CAL BIT(0) + +/* EMAC_QSERDES_COM_SYS_CLK_CTRL */ +#define SYSCLK_CM BIT(4) +#define SYSCLK_AC_COUPLE BIT(3) + +/* EMAC_QSERDES_COM_PLL_CNTRL */ +#define OCP_EN BIT(5) +#define PLL_DIV_FFEN BIT(2) +#define PLL_DIV_ORD BIT(1) + +/* EMAC_QSERDES_COM_SYSCLK_EN_SEL */ +#define SYSCLK_SEL_CMOS BIT(3) + +/* EMAC_QSERDES_COM_RESETSM_CNTRL */ +#define FRQ_TUNE_MODE BIT(4) + +/* EMAC_QSERDES_COM_PLLLOCK_CMP_EN */ +#define PLLLOCK_CMP_EN BIT(0) + +/* EMAC_QSERDES_COM_DEC_START1 */ +#define DEC_START1_MUX BIT(7) +#define DEC_START1(x) ((x) & 0x7f) + +/* EMAC_QSERDES_COM_DIV_FRAC_START1 * EMAC_QSERDES_COM_DIV_FRAC_START2 */ +#define DIV_FRAC_START_MUX BIT(7) +#define DIV_FRAC_START(x) ((x) & 0x7f) + +/* EMAC_QSERDES_COM_DIV_FRAC_START3 */ +#define DIV_FRAC_START3_MUX BIT(4) +#define DIV_FRAC_START3(x) ((x) & 0xf) + +/* EMAC_QSERDES_COM_DEC_START2 */ +#define DEC_START2_MUX BIT(1) +#define DEC_START2 BIT(0) + +/* EMAC_QSERDES_COM_RESET_SM */ +#define READY BIT(5) + +/* EMAC_QSERDES_TX_TX_EMP_POST1_LVL */ +#define TX_EMP_POST1_LVL_MUX BIT(5) +#define TX_EMP_POST1_LVL(x) ((x) & 0x1f) +#define TX_EMP_POST1_LVL_BMSK 0x1f +#define TX_EMP_POST1_LVL_SHFT 0 + +/* EMAC_QSERDES_TX_TX_DRV_LVL */ +#define TX_DRV_LVL_MUX BIT(4) +#define TX_DRV_LVL(x) ((x) & 0xf) + +/* EMAC_QSERDES_TX_TRAN_DRVR_EMP_EN */ +#define EMP_EN_MUX BIT(1) +#define EMP_EN BIT(0) + +/* EMAC_QSERDES_RX_CDR_CONTROL & EMAC_QSERDES_RX_CDR_CONTROL2 */ +#define HBW_PD_EN BIT(7) +#define SECONDORDERENABLE BIT(6) +#define FIRSTORDER_THRESH(x) (((x) & 0x7) << 3) +#define SECONDORDERGAIN(x) ((x) & 0x7) + +/* EMAC_QSERDES_RX_RX_EQ_GAIN12 */ +#define RX_EQ_GAIN2(x) (((x) & 0xf) << 4) +#define RX_EQ_GAIN1(x) ((x) & 0xf) + +/* EMAC_SGMII_PHY_SERDES_START */ +#define SERDES_START BIT(0) + +/* EMAC_SGMII_PHY_CMN_PWR_CTRL */ +#define BIAS_EN BIT(6) +#define PLL_EN BIT(5) +#define SYSCLK_EN BIT(4) +#define CLKBUF_L_EN BIT(3) +#define PLL_TXCLK_EN BIT(1) +#define PLL_RXCLK_EN BIT(0) + +/* EMAC_SGMII_PHY_RX_PWR_CTRL */ +#define L0_RX_SIGDET_EN BIT(7) +#define L0_RX_TERM_MODE(x) (((x) & 3) << 4) +#define L0_RX_I_EN BIT(1) + +/* EMAC_SGMII_PHY_TX_PWR_CTRL */ +#define L0_TX_EN BIT(5) +#define L0_CLKBUF_EN BIT(4) +#define L0_TRAN_BIAS_EN BIT(1) + +/* EMAC_SGMII_PHY_LANE_CTRL1 */ +#define L0_RX_EQUALIZE_ENABLE BIT(6) +#define L0_RESET_TSYNC_EN BIT(4) +#define L0_DRV_LVL(x) ((x) & 0xf) + +/* EMAC_SGMII_PHY_AUTONEG_CFG2 */ +#define FORCE_AN_TX_CFG BIT(5) +#define FORCE_AN_RX_CFG BIT(4) +#define AN_ENABLE BIT(0) + +/* EMAC_SGMII_PHY_SPEED_CFG1 */ +#define DUPLEX_MODE BIT(4) +#define SPDMODE_1000 BIT(1) +#define SPDMODE_100 BIT(0) +#define SPDMODE_10 0 +#define SPDMODE_BMSK 3 +#define SPDMODE_SHFT 0 + +/* EMAC_SGMII_PHY_POW_DWN_CTRL0 */ +#define PWRDN_B BIT(0) +#define CDR_MAX_CNT(x) ((x) & 0xff) + +/* EMAC_QSERDES_TX_BIST_MODE_LANENO */ +#define BIST_LANE_NUMBER(x) (((x) & 3) << 5) +#define BISTMODE(x) ((x) & 0x1f) + +/* EMAC_QSERDES_COM_PLLLOCK_CMPx */ +#define PLLLOCK_CMP(x) ((x) & 0xff) + +/* EMAC_SGMII_PHY_RESET_CTRL */ +#define PHY_SW_RESET BIT(0) + +/* EMAC_SGMII_PHY_IRQ_CMD */ +#define IRQ_GLOBAL_CLEAR BIT(0) + +/* EMAC_SGMII_PHY_INTERRUPT_MASK */ +#define DECODE_CODE_ERR BIT(7) +#define DECODE_DISP_ERR BIT(6) +#define PLL_UNLOCK BIT(5) +#define AN_ILLEGAL_TERM BIT(4) +#define SYNC_FAIL BIT(3) +#define AN_START BIT(2) +#define AN_END BIT(1) +#define AN_REQUEST BIT(0) + +#define SGMII_PHY_IRQ_CLR_WAIT_TIME 10 + +#define SGMII_PHY_INTERRUPT_ERR (\ + DECODE_CODE_ERR |\ + DECODE_DISP_ERR) + +#define SGMII_ISR_AN_MASK (\ + AN_REQUEST |\ + AN_START |\ + AN_END |\ + AN_ILLEGAL_TERM |\ + PLL_UNLOCK |\ + SYNC_FAIL) + +#define SGMII_ISR_MASK (\ + SGMII_PHY_INTERRUPT_ERR |\ + SGMII_ISR_AN_MASK) + +/* SGMII TX_CONFIG */ +#define TXCFG_LINK 0x8000 +#define TXCFG_MODE_BMSK 0x1c00 +#define TXCFG_1000_FULL 0x1800 +#define TXCFG_100_FULL 0x1400 +#define TXCFG_100_HALF 0x0400 +#define TXCFG_10_FULL 0x1000 +#define TXCFG_10_HALF 0x0000 + +#define SERDES_START_WAIT_TIMES 100 + +struct emac_reg_write { + unsigned int offset; + u32 val; +}; + +static void emac_reg_write_all(void __iomem *base, + const struct emac_reg_write *itr, size_t size) +{ + size_t i; + + for (i = 0; i < size; ++itr, ++i) + writel(itr->val, base + itr->offset); +} + +static const struct emac_reg_write physical_coding_sublayer_programming_v1[] = { + {EMAC_SGMII_PHY_CDR_CTRL0, CDR_MAX_CNT(15)}, + {EMAC_SGMII_PHY_POW_DWN_CTRL0, PWRDN_B}, + {EMAC_SGMII_PHY_CMN_PWR_CTRL, + BIAS_EN | SYSCLK_EN | CLKBUF_L_EN | PLL_TXCLK_EN | PLL_RXCLK_EN}, + {EMAC_SGMII_PHY_TX_PWR_CTRL, L0_TX_EN | L0_CLKBUF_EN | L0_TRAN_BIAS_EN}, + {EMAC_SGMII_PHY_RX_PWR_CTRL, + L0_RX_SIGDET_EN | L0_RX_TERM_MODE(1) | L0_RX_I_EN}, + {EMAC_SGMII_PHY_CMN_PWR_CTRL, + BIAS_EN | PLL_EN | SYSCLK_EN | CLKBUF_L_EN | PLL_TXCLK_EN | + PLL_RXCLK_EN}, + {EMAC_SGMII_PHY_LANE_CTRL1, + L0_RX_EQUALIZE_ENABLE | L0_RESET_TSYNC_EN | L0_DRV_LVL(15)}, +}; + +static const struct emac_reg_write sysclk_refclk_setting[] = { + {EMAC_QSERDES_COM_SYSCLK_EN_SEL, SYSCLK_SEL_CMOS}, + {EMAC_QSERDES_COM_SYS_CLK_CTRL, SYSCLK_CM | SYSCLK_AC_COUPLE}, +}; + +static const struct emac_reg_write pll_setting[] = { + {EMAC_QSERDES_COM_PLL_IP_SETI, PLL_IPSETI(1)}, + {EMAC_QSERDES_COM_PLL_CP_SETI, PLL_CPSETI(59)}, + {EMAC_QSERDES_COM_PLL_IP_SETP, PLL_IPSETP(10)}, + {EMAC_QSERDES_COM_PLL_CP_SETP, PLL_CPSETP(9)}, + {EMAC_QSERDES_COM_PLL_CRCTRL, PLL_RCTRL(15) | PLL_CCTRL(11)}, + {EMAC_QSERDES_COM_PLL_CNTRL, OCP_EN | PLL_DIV_FFEN | PLL_DIV_ORD}, + {EMAC_QSERDES_COM_DEC_START1, DEC_START1_MUX | DEC_START1(2)}, + {EMAC_QSERDES_COM_DEC_START2, DEC_START2_MUX | DEC_START2}, + {EMAC_QSERDES_COM_DIV_FRAC_START1, + DIV_FRAC_START_MUX | DIV_FRAC_START(85)}, + {EMAC_QSERDES_COM_DIV_FRAC_START2, + DIV_FRAC_START_MUX | DIV_FRAC_START(42)}, + {EMAC_QSERDES_COM_DIV_FRAC_START3, + DIV_FRAC_START3_MUX | DIV_FRAC_START3(3)}, + {EMAC_QSERDES_COM_PLLLOCK_CMP1, PLLLOCK_CMP(43)}, + {EMAC_QSERDES_COM_PLLLOCK_CMP2, PLLLOCK_CMP(104)}, + {EMAC_QSERDES_COM_PLLLOCK_CMP3, PLLLOCK_CMP(0)}, + {EMAC_QSERDES_COM_PLLLOCK_CMP_EN, PLLLOCK_CMP_EN}, + {EMAC_QSERDES_COM_RESETSM_CNTRL, FRQ_TUNE_MODE}, +}; + +static const struct emac_reg_write cdr_setting[] = { + {EMAC_QSERDES_RX_CDR_CONTROL, + SECONDORDERENABLE | FIRSTORDER_THRESH(3) | SECONDORDERGAIN(2)}, + {EMAC_QSERDES_RX_CDR_CONTROL2, + SECONDORDERENABLE | FIRSTORDER_THRESH(3) | SECONDORDERGAIN(4)}, +}; + +static const struct emac_reg_write tx_rx_setting[] = { + {EMAC_QSERDES_TX_BIST_MODE_LANENO, 0}, + {EMAC_QSERDES_TX_TX_DRV_LVL, TX_DRV_LVL_MUX | TX_DRV_LVL(15)}, + {EMAC_QSERDES_TX_TRAN_DRVR_EMP_EN, EMP_EN_MUX | EMP_EN}, + {EMAC_QSERDES_TX_TX_EMP_POST1_LVL, + TX_EMP_POST1_LVL_MUX | TX_EMP_POST1_LVL(1)}, + {EMAC_QSERDES_RX_RX_EQ_GAIN12, RX_EQ_GAIN2(15) | RX_EQ_GAIN1(15)}, + {EMAC_QSERDES_TX_LANE_MODE, LANE_MODE(8)}, +}; + +static const struct emac_reg_write sgmii_v2_laned[] = { + /* CDR Settings */ + {EMAC_SGMII_LN_UCDR_FO_GAIN_MODE0, + UCDR_STEP_BY_TWO_MODE0 | UCDR_xO_GAIN_MODE(10)}, + {EMAC_SGMII_LN_UCDR_SO_GAIN_MODE0, UCDR_xO_GAIN_MODE(6)}, + {EMAC_SGMII_LN_UCDR_SO_CONFIG, UCDR_ENABLE | UCDR_SO_SATURATION(12)}, + + /* TX/RX Settings */ + {EMAC_SGMII_LN_RX_EN_SIGNAL, SIGDET_LP_BYP_PS4 | SIGDET_EN_PS0_TO_PS2}, + + {EMAC_SGMII_LN_DRVR_CTRL0, TXVAL_VALID_INIT | KR_PCIGEN3_MODE}, + {EMAC_SGMII_LN_DRVR_TAP_EN, MAIN_EN}, + {EMAC_SGMII_LN_TX_MARGINING, TX_MARGINING_MUX | TX_MARGINING(25)}, + {EMAC_SGMII_LN_TX_PRE, TX_PRE_MUX}, + {EMAC_SGMII_LN_TX_POST, TX_POST_MUX}, + + {EMAC_SGMII_LN_CML_CTRL_MODE0, + CML_GEAR_MODE(1) | CML2CMOS_IBOOST_MODE(1)}, + {EMAC_SGMII_LN_MIXER_CTRL_MODE0, + MIXER_LOADB_MODE(12) | MIXER_DATARATE_MODE(1)}, + {EMAC_SGMII_LN_VGA_INITVAL, VGA_THRESH_DFE(31)}, + {EMAC_SGMII_LN_SIGDET_ENABLES, + SIGDET_LP_BYP_PS0_TO_PS2 | SIGDET_FLT_BYP}, + {EMAC_SGMII_LN_SIGDET_CNTRL, SIGDET_LVL(8)}, + + {EMAC_SGMII_LN_SIGDET_DEGLITCH_CNTRL, SIGDET_DEGLITCH_CTRL(4)}, + {EMAC_SGMII_LN_RX_MISC_CNTRL0, 0}, + {EMAC_SGMII_LN_DRVR_LOGIC_CLKDIV, + DRVR_LOGIC_CLK_EN | DRVR_LOGIC_CLK_DIV(4)}, + + {EMAC_SGMII_LN_PARALLEL_RATE, PARALLEL_RATE_MODE0(1)}, + {EMAC_SGMII_LN_TX_BAND_MODE, BAND_MODE0(2)}, + {EMAC_SGMII_LN_RX_BAND, BAND_MODE0(3)}, + {EMAC_SGMII_LN_LANE_MODE, LANE_MODE(26)}, + {EMAC_SGMII_LN_RX_RCVR_PATH1_MODE0, CDR_PD_SEL_MODE0(3)}, + {EMAC_SGMII_LN_RSM_CONFIG, BYPASS_RSM_SAMP_CAL | BYPASS_RSM_DLL_CAL}, +}; + +static const struct emac_reg_write physical_coding_sublayer_programming_v2[] = { + {EMAC_SGMII_PHY_POW_DWN_CTRL0, PWRDN_B}, + {EMAC_SGMII_PHY_CDR_CTRL0, CDR_MAX_CNT(15)}, + {EMAC_SGMII_PHY_TX_PWR_CTRL, 0}, + {EMAC_SGMII_PHY_LANE_CTRL1, L0_RX_EQUALIZE_ENABLE}, +}; + +static int emac_sgmii_link_init(struct emac_adapter *adpt) +{ + struct phy_device *phydev = adpt->phydev; + struct emac_phy *phy = &adpt->phy; + u32 val; + + val = readl(phy->base + EMAC_SGMII_PHY_AUTONEG_CFG2); + + if (phydev->autoneg == AUTONEG_ENABLE) { + val &= ~(FORCE_AN_RX_CFG | FORCE_AN_TX_CFG); + val |= AN_ENABLE; + writel(val, phy->base + EMAC_SGMII_PHY_AUTONEG_CFG2); + } else { + u32 speed_cfg; + + switch (phydev->speed) { + case SPEED_10: + speed_cfg = SPDMODE_10; + break; + case SPEED_100: + speed_cfg = SPDMODE_100; + break; + case SPEED_1000: + speed_cfg = SPDMODE_1000; + break; + default: + return -EINVAL; + } + + if (phydev->duplex == DUPLEX_FULL) + speed_cfg |= DUPLEX_MODE; + + val &= ~AN_ENABLE; + writel(speed_cfg, phy->base + EMAC_SGMII_PHY_SPEED_CFG1); + writel(val, phy->base + EMAC_SGMII_PHY_AUTONEG_CFG2); + } + + return 0; +} + +static int emac_sgmii_irq_clear(struct emac_adapter *adpt, u32 irq_bits) +{ + struct emac_phy *phy = &adpt->phy; + u32 status; + + writel_relaxed(irq_bits, phy->base + EMAC_SGMII_PHY_INTERRUPT_CLEAR); + writel_relaxed(IRQ_GLOBAL_CLEAR, phy->base + EMAC_SGMII_PHY_IRQ_CMD); + /* Ensure interrupt clear command is written to HW */ + wmb(); + + /* After set the IRQ_GLOBAL_CLEAR bit, the status clearing must + * be confirmed before clearing the bits in other registers. + * It takes a few cycles for hw to clear the interrupt status. + */ + if (readl_poll_timeout_atomic(phy->base + + EMAC_SGMII_PHY_INTERRUPT_STATUS, + status, !(status & irq_bits), 1, + SGMII_PHY_IRQ_CLR_WAIT_TIME)) { + netdev_err(adpt->netdev, + "error: failed clear SGMII irq: status:0x%x bits:0x%x\n", + status, irq_bits); + return -EIO; + } + + /* Finalize clearing procedure */ + writel_relaxed(0, phy->base + EMAC_SGMII_PHY_IRQ_CMD); + writel_relaxed(0, phy->base + EMAC_SGMII_PHY_INTERRUPT_CLEAR); + + /* Ensure that clearing procedure finalization is written to HW */ + wmb(); + + return 0; +} + +int emac_sgmii_init_v1(struct emac_adapter *adpt) +{ + struct emac_phy *phy = &adpt->phy; + unsigned int i; + int ret; + + ret = emac_sgmii_link_init(adpt); + if (ret) + return ret; + + emac_reg_write_all(phy->base, physical_coding_sublayer_programming_v1, + ARRAY_SIZE(physical_coding_sublayer_programming_v1)); + emac_reg_write_all(phy->base, sysclk_refclk_setting, + ARRAY_SIZE(sysclk_refclk_setting)); + emac_reg_write_all(phy->base, pll_setting, ARRAY_SIZE(pll_setting)); + emac_reg_write_all(phy->base, cdr_setting, ARRAY_SIZE(cdr_setting)); + emac_reg_write_all(phy->base, tx_rx_setting, + ARRAY_SIZE(tx_rx_setting)); + + /* Power up the Ser/Des engine */ + writel(SERDES_START, phy->base + EMAC_SGMII_PHY_SERDES_START); + + for (i = 0; i < SERDES_START_WAIT_TIMES; i++) { + if (readl(phy->base + EMAC_QSERDES_COM_RESET_SM) & READY) + break; + usleep_range(100, 200); + } + + if (i == SERDES_START_WAIT_TIMES) { + netdev_err(adpt->netdev, "error: ser/des failed to start\n"); + return -EIO; + } + /* Mask out all the SGMII Interrupt */ + writel(0, phy->base + EMAC_SGMII_PHY_INTERRUPT_MASK); + + emac_sgmii_irq_clear(adpt, SGMII_PHY_INTERRUPT_ERR); + + return 0; +} + +int emac_sgmii_init_v2(struct emac_adapter *adpt) +{ + struct emac_phy *phy = &adpt->phy; + void __iomem *phy_regs = phy->base; + void __iomem *laned = phy->digital; + unsigned int i; + u32 lnstatus; + int ret; + + ret = emac_sgmii_link_init(adpt); + if (ret) + return ret; + + /* PCS lane-x init */ + emac_reg_write_all(phy->base, physical_coding_sublayer_programming_v2, + ARRAY_SIZE(physical_coding_sublayer_programming_v2)); + + /* SGMII lane-x init */ + emac_reg_write_all(phy->digital, + sgmii_v2_laned, ARRAY_SIZE(sgmii_v2_laned)); + + /* Power up PCS and start reset lane state machine */ + + writel(0, phy_regs + EMAC_SGMII_PHY_RESET_CTRL); + writel(1, laned + SGMII_LN_RSM_START); + + /* Wait for c_ready assertion */ + for (i = 0; i < SERDES_START_WAIT_TIMES; i++) { + lnstatus = readl(phy_regs + SGMII_PHY_LN_LANE_STATUS); + if (lnstatus & BIT(1)) + break; + usleep_range(100, 200); + } + + if (i == SERDES_START_WAIT_TIMES) { + netdev_err(adpt->netdev, "SGMII failed to start\n"); + return -EIO; + } + + /* Disable digital and SERDES loopback */ + writel(0, phy_regs + SGMII_PHY_LN_BIST_GEN0); + writel(0, phy_regs + SGMII_PHY_LN_BIST_GEN2); + writel(0, phy_regs + SGMII_PHY_LN_CDR_CTRL1); + + /* Mask out all the SGMII Interrupt */ + writel(0, phy_regs + EMAC_SGMII_PHY_INTERRUPT_MASK); + + emac_sgmii_irq_clear(adpt, SGMII_PHY_INTERRUPT_ERR); + + return 0; +} + +static void emac_sgmii_reset_prepare(struct emac_adapter *adpt) +{ + struct emac_phy *phy = &adpt->phy; + u32 val; + + /* Reset PHY */ + val = readl(phy->base + EMAC_EMAC_WRAPPER_CSR2); + writel(((val & ~PHY_RESET) | PHY_RESET), phy->base + + EMAC_EMAC_WRAPPER_CSR2); + /* Ensure phy-reset command is written to HW before the release cmd */ + msleep(50); + val = readl(phy->base + EMAC_EMAC_WRAPPER_CSR2); + writel((val & ~PHY_RESET), phy->base + EMAC_EMAC_WRAPPER_CSR2); + /* Ensure phy-reset release command is written to HW before initializing + * SGMII + */ + msleep(50); +} + +void emac_sgmii_reset(struct emac_adapter *adpt) +{ + int ret; + + clk_set_rate(adpt->clk[EMAC_CLK_HIGH_SPEED], 19200000); + emac_sgmii_reset_prepare(adpt); + + ret = adpt->phy.initialize(adpt); + if (ret) + netdev_err(adpt->netdev, + "could not reinitialize internal PHY (error=%i)\n", + ret); + + clk_set_rate(adpt->clk[EMAC_CLK_HIGH_SPEED], 125000000); +} + +static const struct of_device_id emac_sgmii_dt_match[] = { + { + .compatible = "qcom,fsm9900-emac-sgmii", + .data = emac_sgmii_init_v1, + }, + { + .compatible = "qcom,qdf2432-emac-sgmii", + .data = emac_sgmii_init_v2, + }, + {} +}; + +int emac_sgmii_config(struct platform_device *pdev, struct emac_adapter *adpt) +{ + struct platform_device *sgmii_pdev = NULL; + struct emac_phy *phy = &adpt->phy; + struct resource *res; + const struct of_device_id *match; + struct device_node *np; + + np = of_parse_phandle(pdev->dev.of_node, "internal-phy", 0); + if (!np) { + dev_err(&pdev->dev, "missing internal-phy property\n"); + return -ENODEV; + } + + sgmii_pdev = of_find_device_by_node(np); + if (!sgmii_pdev) { + dev_err(&pdev->dev, "invalid internal-phy property\n"); + return -ENODEV; + } + + match = of_match_device(emac_sgmii_dt_match, &sgmii_pdev->dev); + if (!match) { + dev_err(&pdev->dev, "unrecognized internal phy node\n"); + return -ENODEV; + } + + phy->initialize = (emac_sgmii_initialize)match->data; + + /* Base address is the first address */ + res = platform_get_resource(sgmii_pdev, IORESOURCE_MEM, 0); + phy->base = devm_ioremap_resource(&sgmii_pdev->dev, res); + if (IS_ERR(phy->base)) + return PTR_ERR(phy->base); + + /* v2 SGMII has a per-lane digital digital, so parse it if it exists */ + res = platform_get_resource(sgmii_pdev, IORESOURCE_MEM, 1); + if (res) { + phy->digital = devm_ioremap_resource(&sgmii_pdev->dev, res); + if (IS_ERR(phy->base)) + return PTR_ERR(phy->base); + + } + + return phy->initialize(adpt); +} diff --git a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.h b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.h new file mode 100644 index 000000000000..ce79212ff403 --- /dev/null +++ b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.h @@ -0,0 +1,24 @@ +/* Copyright (c) 2015-2016, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _EMAC_SGMII_H_ +#define _EMAC_SGMII_H_ + +struct emac_adapter; +struct platform_device; + +int emac_sgmii_init_v1(struct emac_adapter *adpt); +int emac_sgmii_init_v2(struct emac_adapter *adpt); +int emac_sgmii_config(struct platform_device *pdev, struct emac_adapter *adpt); +void emac_sgmii_reset(struct emac_adapter *adpt); + +#endif diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c new file mode 100644 index 000000000000..56e0a9f1c5ec --- /dev/null +++ b/drivers/net/ethernet/qualcomm/emac/emac.c @@ -0,0 +1,743 @@ +/* Copyright (c) 2013-2016, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +/* Qualcomm Technologies, Inc. EMAC Gigabit Ethernet Driver */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "emac.h" +#include "emac-mac.h" +#include "emac-phy.h" +#include "emac-sgmii.h" + +#define EMAC_MSG_DEFAULT (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \ + NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP) + +#define EMAC_RRD_SIZE 4 +/* The RRD size if timestamping is enabled: */ +#define EMAC_TS_RRD_SIZE 6 +#define EMAC_TPD_SIZE 4 +#define EMAC_RFD_SIZE 2 + +#define REG_MAC_RX_STATUS_BIN EMAC_RXMAC_STATC_REG0 +#define REG_MAC_RX_STATUS_END EMAC_RXMAC_STATC_REG22 +#define REG_MAC_TX_STATUS_BIN EMAC_TXMAC_STATC_REG0 +#define REG_MAC_TX_STATUS_END EMAC_TXMAC_STATC_REG24 + +#define RXQ0_NUM_RFD_PREF_DEF 8 +#define TXQ0_NUM_TPD_PREF_DEF 5 + +#define EMAC_PREAMBLE_DEF 7 + +#define DMAR_DLY_CNT_DEF 15 +#define DMAW_DLY_CNT_DEF 4 + +#define IMR_NORMAL_MASK (\ + ISR_ERROR |\ + ISR_GPHY_LINK |\ + ISR_TX_PKT |\ + GPHY_WAKEUP_INT) + +#define IMR_EXTENDED_MASK (\ + SW_MAN_INT |\ + ISR_OVER |\ + ISR_ERROR |\ + ISR_GPHY_LINK |\ + ISR_TX_PKT |\ + GPHY_WAKEUP_INT) + +#define ISR_TX_PKT (\ + TX_PKT_INT |\ + TX_PKT_INT1 |\ + TX_PKT_INT2 |\ + TX_PKT_INT3) + +#define ISR_GPHY_LINK (\ + GPHY_LINK_UP_INT |\ + GPHY_LINK_DOWN_INT) + +#define ISR_OVER (\ + RFD0_UR_INT |\ + RFD1_UR_INT |\ + RFD2_UR_INT |\ + RFD3_UR_INT |\ + RFD4_UR_INT |\ + RXF_OF_INT |\ + TXF_UR_INT) + +#define ISR_ERROR (\ + DMAR_TO_INT |\ + DMAW_TO_INT |\ + TXQ_TO_INT) + +/* in sync with enum emac_clk_id */ +static const char * const emac_clk_name[] = { + "axi_clk", "cfg_ahb_clk", "high_speed_clk", "mdio_clk", "tx_clk", + "rx_clk", "sys_clk" +}; + +void emac_reg_update32(void __iomem *addr, u32 mask, u32 val) +{ + u32 data = readl(addr); + + writel(((data & ~mask) | val), addr); +} + +/* reinitialize */ +int emac_reinit_locked(struct emac_adapter *adpt) +{ + int ret; + + mutex_lock(&adpt->reset_lock); + + emac_mac_down(adpt); + emac_sgmii_reset(adpt); + ret = emac_mac_up(adpt); + + mutex_unlock(&adpt->reset_lock); + + return ret; +} + +/* NAPI */ +static int emac_napi_rtx(struct napi_struct *napi, int budget) +{ + struct emac_rx_queue *rx_q = + container_of(napi, struct emac_rx_queue, napi); + struct emac_adapter *adpt = netdev_priv(rx_q->netdev); + struct emac_irq *irq = rx_q->irq; + int work_done = 0; + + emac_mac_rx_process(adpt, rx_q, &work_done, budget); + + if (work_done < budget) { + napi_complete(napi); + + irq->mask |= rx_q->intr; + writel(irq->mask, adpt->base + EMAC_INT_MASK); + } + + return work_done; +} + +/* Transmit the packet */ +static int emac_start_xmit(struct sk_buff *skb, struct net_device *netdev) +{ + struct emac_adapter *adpt = netdev_priv(netdev); + + return emac_mac_tx_buf_send(adpt, &adpt->tx_q, skb); +} + +irqreturn_t emac_isr(int _irq, void *data) +{ + struct emac_irq *irq = data; + struct emac_adapter *adpt = + container_of(irq, struct emac_adapter, irq); + struct emac_rx_queue *rx_q = &adpt->rx_q; + u32 isr, status; + + /* disable the interrupt */ + writel(0, adpt->base + EMAC_INT_MASK); + + isr = readl_relaxed(adpt->base + EMAC_INT_STATUS); + + status = isr & irq->mask; + if (status == 0) + goto exit; + + if (status & ISR_ERROR) { + netif_warn(adpt, intr, adpt->netdev, + "warning: error irq status 0x%lx\n", + status & ISR_ERROR); + /* reset MAC */ + schedule_work(&adpt->work_thread); + } + + /* Schedule the napi for receive queue with interrupt + * status bit set + */ + if (status & rx_q->intr) { + if (napi_schedule_prep(&rx_q->napi)) { + irq->mask &= ~rx_q->intr; + __napi_schedule(&rx_q->napi); + } + } + + if (status & TX_PKT_INT) + emac_mac_tx_process(adpt, &adpt->tx_q); + + if (status & ISR_OVER) + net_warn_ratelimited("warning: TX/RX overflow\n"); + + /* link event */ + if (status & ISR_GPHY_LINK) + phy_mac_interrupt(adpt->phydev, !!(status & GPHY_LINK_UP_INT)); + +exit: + /* enable the interrupt */ + writel(irq->mask, adpt->base + EMAC_INT_MASK); + + return IRQ_HANDLED; +} + +/* Configure VLAN tag strip/insert feature */ +static int emac_set_features(struct net_device *netdev, + netdev_features_t features) +{ + netdev_features_t changed = features ^ netdev->features; + struct emac_adapter *adpt = netdev_priv(netdev); + + /* We only need to reprogram the hardware if the VLAN tag features + * have changed, and if it's already running. + */ + if (!(changed & (NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX))) + return 0; + + if (!netif_running(netdev)) + return 0; + + /* emac_mac_mode_config() uses netdev->features to configure the EMAC, + * so make sure it's set first. + */ + netdev->features = features; + + return emac_reinit_locked(adpt); +} + +/* Configure Multicast and Promiscuous modes */ +static void emac_rx_mode_set(struct net_device *netdev) +{ + struct emac_adapter *adpt = netdev_priv(netdev); + struct netdev_hw_addr *ha; + + emac_mac_mode_config(adpt); + + /* update multicast address filtering */ + emac_mac_multicast_addr_clear(adpt); + netdev_for_each_mc_addr(ha, netdev) + emac_mac_multicast_addr_set(adpt, ha->addr); +} + +/* Change the Maximum Transfer Unit (MTU) */ +static int emac_change_mtu(struct net_device *netdev, int new_mtu) +{ + unsigned int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; + struct emac_adapter *adpt = netdev_priv(netdev); + + if ((max_frame < EMAC_MIN_ETH_FRAME_SIZE) || + (max_frame > EMAC_MAX_ETH_FRAME_SIZE)) { + netdev_err(adpt->netdev, "error: invalid MTU setting\n"); + return -EINVAL; + } + + netif_info(adpt, hw, adpt->netdev, + "changing MTU from %d to %d\n", netdev->mtu, + new_mtu); + netdev->mtu = new_mtu; + + if (netif_running(netdev)) + return emac_reinit_locked(adpt); + + return 0; +} + +/* Called when the network interface is made active */ +static int emac_open(struct net_device *netdev) +{ + struct emac_adapter *adpt = netdev_priv(netdev); + int ret; + + /* allocate rx/tx dma buffer & descriptors */ + ret = emac_mac_rx_tx_rings_alloc_all(adpt); + if (ret) { + netdev_err(adpt->netdev, "error allocating rx/tx rings\n"); + return ret; + } + + ret = emac_mac_up(adpt); + if (ret) { + emac_mac_rx_tx_rings_free_all(adpt); + return ret; + } + + emac_mac_start(adpt); + + return 0; +} + +/* Called when the network interface is disabled */ +static int emac_close(struct net_device *netdev) +{ + struct emac_adapter *adpt = netdev_priv(netdev); + + mutex_lock(&adpt->reset_lock); + + emac_mac_down(adpt); + emac_mac_rx_tx_rings_free_all(adpt); + + mutex_unlock(&adpt->reset_lock); + + return 0; +} + +/* Respond to a TX hang */ +static void emac_tx_timeout(struct net_device *netdev) +{ + struct emac_adapter *adpt = netdev_priv(netdev); + + schedule_work(&adpt->work_thread); +} + +/* IOCTL support for the interface */ +static int emac_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) +{ + if (!netif_running(netdev)) + return -EINVAL; + + if (!netdev->phydev) + return -ENODEV; + + return phy_mii_ioctl(netdev->phydev, ifr, cmd); +} + +/* Provide network statistics info for the interface */ +static struct rtnl_link_stats64 *emac_get_stats64(struct net_device *netdev, + struct rtnl_link_stats64 *net_stats) +{ + struct emac_adapter *adpt = netdev_priv(netdev); + unsigned int addr = REG_MAC_RX_STATUS_BIN; + struct emac_stats *stats = &adpt->stats; + u64 *stats_itr = &adpt->stats.rx_ok; + u32 val; + + spin_lock(&stats->lock); + + while (addr <= REG_MAC_RX_STATUS_END) { + val = readl_relaxed(adpt->base + addr); + *stats_itr += val; + stats_itr++; + addr += sizeof(u32); + } + + /* additional rx status */ + val = readl_relaxed(adpt->base + EMAC_RXMAC_STATC_REG23); + adpt->stats.rx_crc_align += val; + val = readl_relaxed(adpt->base + EMAC_RXMAC_STATC_REG24); + adpt->stats.rx_jabbers += val; + + /* update tx status */ + addr = REG_MAC_TX_STATUS_BIN; + stats_itr = &adpt->stats.tx_ok; + + while (addr <= REG_MAC_TX_STATUS_END) { + val = readl_relaxed(adpt->base + addr); + *stats_itr += val; + ++stats_itr; + addr += sizeof(u32); + } + + /* additional tx status */ + val = readl_relaxed(adpt->base + EMAC_TXMAC_STATC_REG25); + adpt->stats.tx_col += val; + + /* return parsed statistics */ + net_stats->rx_packets = stats->rx_ok; + net_stats->tx_packets = stats->tx_ok; + net_stats->rx_bytes = stats->rx_byte_cnt; + net_stats->tx_bytes = stats->tx_byte_cnt; + net_stats->multicast = stats->rx_mcast; + net_stats->collisions = stats->tx_1_col + stats->tx_2_col * 2 + + stats->tx_late_col + stats->tx_abort_col; + + net_stats->rx_errors = stats->rx_frag + stats->rx_fcs_err + + stats->rx_len_err + stats->rx_sz_ov + + stats->rx_align_err; + net_stats->rx_fifo_errors = stats->rx_rxf_ov; + net_stats->rx_length_errors = stats->rx_len_err; + net_stats->rx_crc_errors = stats->rx_fcs_err; + net_stats->rx_frame_errors = stats->rx_align_err; + net_stats->rx_over_errors = stats->rx_rxf_ov; + net_stats->rx_missed_errors = stats->rx_rxf_ov; + + net_stats->tx_errors = stats->tx_late_col + stats->tx_abort_col + + stats->tx_underrun + stats->tx_trunc; + net_stats->tx_fifo_errors = stats->tx_underrun; + net_stats->tx_aborted_errors = stats->tx_abort_col; + net_stats->tx_window_errors = stats->tx_late_col; + + spin_unlock(&stats->lock); + + return net_stats; +} + +static const struct net_device_ops emac_netdev_ops = { + .ndo_open = emac_open, + .ndo_stop = emac_close, + .ndo_validate_addr = eth_validate_addr, + .ndo_start_xmit = emac_start_xmit, + .ndo_set_mac_address = eth_mac_addr, + .ndo_change_mtu = emac_change_mtu, + .ndo_do_ioctl = emac_ioctl, + .ndo_tx_timeout = emac_tx_timeout, + .ndo_get_stats64 = emac_get_stats64, + .ndo_set_features = emac_set_features, + .ndo_set_rx_mode = emac_rx_mode_set, +}; + +/* Watchdog task routine, called to reinitialize the EMAC */ +static void emac_work_thread(struct work_struct *work) +{ + struct emac_adapter *adpt = + container_of(work, struct emac_adapter, work_thread); + + emac_reinit_locked(adpt); +} + +/* Initialize various data structures */ +static void emac_init_adapter(struct emac_adapter *adpt) +{ + u32 reg; + + /* descriptors */ + adpt->tx_desc_cnt = EMAC_DEF_TX_DESCS; + adpt->rx_desc_cnt = EMAC_DEF_RX_DESCS; + + /* dma */ + adpt->dma_order = emac_dma_ord_out; + adpt->dmar_block = emac_dma_req_4096; + adpt->dmaw_block = emac_dma_req_128; + adpt->dmar_dly_cnt = DMAR_DLY_CNT_DEF; + adpt->dmaw_dly_cnt = DMAW_DLY_CNT_DEF; + adpt->tpd_burst = TXQ0_NUM_TPD_PREF_DEF; + adpt->rfd_burst = RXQ0_NUM_RFD_PREF_DEF; + + /* irq moderator */ + reg = ((EMAC_DEF_RX_IRQ_MOD >> 1) << IRQ_MODERATOR2_INIT_SHFT) | + ((EMAC_DEF_TX_IRQ_MOD >> 1) << IRQ_MODERATOR_INIT_SHFT); + adpt->irq_mod = reg; + + /* others */ + adpt->preamble = EMAC_PREAMBLE_DEF; +} + +/* Get the clock */ +static int emac_clks_get(struct platform_device *pdev, + struct emac_adapter *adpt) +{ + unsigned int i; + + for (i = 0; i < EMAC_CLK_CNT; i++) { + struct clk *clk = devm_clk_get(&pdev->dev, emac_clk_name[i]); + + if (IS_ERR(clk)) { + dev_err(&pdev->dev, + "could not claim clock %s (error=%li)\n", + emac_clk_name[i], PTR_ERR(clk)); + + return PTR_ERR(clk); + } + + adpt->clk[i] = clk; + } + + return 0; +} + +/* Initialize clocks */ +static int emac_clks_phase1_init(struct platform_device *pdev, + struct emac_adapter *adpt) +{ + int ret; + + ret = emac_clks_get(pdev, adpt); + if (ret) + return ret; + + ret = clk_prepare_enable(adpt->clk[EMAC_CLK_AXI]); + if (ret) + return ret; + + ret = clk_prepare_enable(adpt->clk[EMAC_CLK_CFG_AHB]); + if (ret) + return ret; + + ret = clk_set_rate(adpt->clk[EMAC_CLK_HIGH_SPEED], 19200000); + if (ret) + return ret; + + return clk_prepare_enable(adpt->clk[EMAC_CLK_HIGH_SPEED]); +} + +/* Enable clocks; needs emac_clks_phase1_init to be called before */ +static int emac_clks_phase2_init(struct platform_device *pdev, + struct emac_adapter *adpt) +{ + int ret; + + ret = clk_set_rate(adpt->clk[EMAC_CLK_TX], 125000000); + if (ret) + return ret; + + ret = clk_prepare_enable(adpt->clk[EMAC_CLK_TX]); + if (ret) + return ret; + + ret = clk_set_rate(adpt->clk[EMAC_CLK_HIGH_SPEED], 125000000); + if (ret) + return ret; + + ret = clk_set_rate(adpt->clk[EMAC_CLK_MDIO], 25000000); + if (ret) + return ret; + + ret = clk_prepare_enable(adpt->clk[EMAC_CLK_MDIO]); + if (ret) + return ret; + + ret = clk_prepare_enable(adpt->clk[EMAC_CLK_RX]); + if (ret) + return ret; + + return clk_prepare_enable(adpt->clk[EMAC_CLK_SYS]); +} + +static void emac_clks_teardown(struct emac_adapter *adpt) +{ + + unsigned int i; + + for (i = 0; i < EMAC_CLK_CNT; i++) + clk_disable_unprepare(adpt->clk[i]); +} + +/* Get the resources */ +static int emac_probe_resources(struct platform_device *pdev, + struct emac_adapter *adpt) +{ + struct device_node *node = pdev->dev.of_node; + struct net_device *netdev = adpt->netdev; + struct resource *res; + const void *maddr; + int ret = 0; + + /* get mac address */ + maddr = of_get_mac_address(node); + if (!maddr) + eth_hw_addr_random(netdev); + else + ether_addr_copy(netdev->dev_addr, maddr); + + /* Core 0 interrupt */ + ret = platform_get_irq(pdev, 0); + if (ret < 0) { + dev_err(&pdev->dev, + "error: missing core0 irq resource (error=%i)\n", ret); + return ret; + } + adpt->irq.irq = ret; + + /* base register address */ + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + adpt->base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(adpt->base)) + return PTR_ERR(adpt->base); + + /* CSR register address */ + res = platform_get_resource(pdev, IORESOURCE_MEM, 1); + adpt->csr = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(adpt->csr)) + return PTR_ERR(adpt->csr); + + netdev->base_addr = (unsigned long)adpt->base; + + return 0; +} + +static const struct of_device_id emac_dt_match[] = { + { + .compatible = "qcom,fsm9900-emac", + }, + {} +}; + +static int emac_probe(struct platform_device *pdev) +{ + struct net_device *netdev; + struct emac_adapter *adpt; + struct emac_phy *phy; + u16 devid, revid; + u32 reg; + int ret; + + /* The EMAC itself is capable of 64-bit DMA, so try that first. */ + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (ret) { + /* Some platforms may restrict the EMAC's address bus to less + * then the size of DDR. In this case, we need to try a + * smaller mask. We could try every possible smaller mask, + * but that's overkill. Instead, just fall to 32-bit, which + * should always work. + */ + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + if (ret) { + dev_err(&pdev->dev, "could not set DMA mask\n"); + return ret; + } + } + + netdev = alloc_etherdev(sizeof(struct emac_adapter)); + if (!netdev) + return -ENOMEM; + + dev_set_drvdata(&pdev->dev, netdev); + SET_NETDEV_DEV(netdev, &pdev->dev); + + adpt = netdev_priv(netdev); + adpt->netdev = netdev; + adpt->msg_enable = EMAC_MSG_DEFAULT; + + phy = &adpt->phy; + + mutex_init(&adpt->reset_lock); + spin_lock_init(&adpt->stats.lock); + + adpt->irq.mask = RX_PKT_INT0 | IMR_NORMAL_MASK; + + ret = emac_probe_resources(pdev, adpt); + if (ret) + goto err_undo_netdev; + + /* initialize clocks */ + ret = emac_clks_phase1_init(pdev, adpt); + if (ret) { + dev_err(&pdev->dev, "could not initialize clocks\n"); + goto err_undo_netdev; + } + + netdev->watchdog_timeo = EMAC_WATCHDOG_TIME; + netdev->irq = adpt->irq.irq; + + adpt->rrd_size = EMAC_RRD_SIZE; + adpt->tpd_size = EMAC_TPD_SIZE; + adpt->rfd_size = EMAC_RFD_SIZE; + + netdev->netdev_ops = &emac_netdev_ops; + + emac_init_adapter(adpt); + + /* init external phy */ + ret = emac_phy_config(pdev, adpt); + if (ret) + goto err_undo_clocks; + + /* init internal sgmii phy */ + ret = emac_sgmii_config(pdev, adpt); + if (ret) + goto err_undo_mdiobus; + + /* enable clocks */ + ret = emac_clks_phase2_init(pdev, adpt); + if (ret) { + dev_err(&pdev->dev, "could not initialize clocks\n"); + goto err_undo_mdiobus; + } + + emac_mac_reset(adpt); + + /* set hw features */ + netdev->features = NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM | + NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_TX; + netdev->hw_features = netdev->features; + + netdev->vlan_features |= NETIF_F_SG | NETIF_F_HW_CSUM | + NETIF_F_TSO | NETIF_F_TSO6; + + INIT_WORK(&adpt->work_thread, emac_work_thread); + + /* Initialize queues */ + emac_mac_rx_tx_ring_init_all(pdev, adpt); + + netif_napi_add(netdev, &adpt->rx_q.napi, emac_napi_rtx, + NAPI_POLL_WEIGHT); + + ret = register_netdev(netdev); + if (ret) { + dev_err(&pdev->dev, "could not register net device\n"); + goto err_undo_napi; + } + + reg = readl_relaxed(adpt->base + EMAC_DMA_MAS_CTRL); + devid = (reg & DEV_ID_NUM_BMSK) >> DEV_ID_NUM_SHFT; + revid = (reg & DEV_REV_NUM_BMSK) >> DEV_REV_NUM_SHFT; + reg = readl_relaxed(adpt->base + EMAC_CORE_HW_VERSION); + + netif_info(adpt, probe, netdev, + "hardware id %d.%d, hardware version %d.%d.%d\n", + devid, revid, + (reg & MAJOR_BMSK) >> MAJOR_SHFT, + (reg & MINOR_BMSK) >> MINOR_SHFT, + (reg & STEP_BMSK) >> STEP_SHFT); + + return 0; + +err_undo_napi: + netif_napi_del(&adpt->rx_q.napi); +err_undo_mdiobus: + mdiobus_unregister(adpt->mii_bus); +err_undo_clocks: + emac_clks_teardown(adpt); +err_undo_netdev: + free_netdev(netdev); + + return ret; +} + +static int emac_remove(struct platform_device *pdev) +{ + struct net_device *netdev = dev_get_drvdata(&pdev->dev); + struct emac_adapter *adpt = netdev_priv(netdev); + + unregister_netdev(netdev); + netif_napi_del(&adpt->rx_q.napi); + + emac_clks_teardown(adpt); + + mdiobus_unregister(adpt->mii_bus); + free_netdev(netdev); + dev_set_drvdata(&pdev->dev, NULL); + + return 0; +} + +static struct platform_driver emac_platform_driver = { + .probe = emac_probe, + .remove = emac_remove, + .driver = { + .owner = THIS_MODULE, + .name = "qcom-emac", + .of_match_table = emac_dt_match, + }, +}; + +module_platform_driver(emac_platform_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("platform:qcom-emac"); diff --git a/drivers/net/ethernet/qualcomm/emac/emac.h b/drivers/net/ethernet/qualcomm/emac/emac.h new file mode 100644 index 000000000000..0c76e6cb8c9e --- /dev/null +++ b/drivers/net/ethernet/qualcomm/emac/emac.h @@ -0,0 +1,335 @@ +/* Copyright (c) 2013-2016, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _EMAC_H_ +#define _EMAC_H_ + +#include +#include +#include +#include +#include "emac-mac.h" +#include "emac-phy.h" + +/* EMAC base register offsets */ +#define EMAC_DMA_MAS_CTRL 0x001400 +#define EMAC_IRQ_MOD_TIM_INIT 0x001408 +#define EMAC_BLK_IDLE_STS 0x00140c +#define EMAC_PHY_LINK_DELAY 0x00141c +#define EMAC_SYS_ALIV_CTRL 0x001434 +#define EMAC_MAC_IPGIFG_CTRL 0x001484 +#define EMAC_MAC_STA_ADDR0 0x001488 +#define EMAC_MAC_STA_ADDR1 0x00148c +#define EMAC_HASH_TAB_REG0 0x001490 +#define EMAC_HASH_TAB_REG1 0x001494 +#define EMAC_MAC_HALF_DPLX_CTRL 0x001498 +#define EMAC_MAX_FRAM_LEN_CTRL 0x00149c +#define EMAC_INT_STATUS 0x001600 +#define EMAC_INT_MASK 0x001604 +#define EMAC_RXMAC_STATC_REG0 0x001700 +#define EMAC_RXMAC_STATC_REG22 0x001758 +#define EMAC_TXMAC_STATC_REG0 0x001760 +#define EMAC_TXMAC_STATC_REG24 0x0017c0 +#define EMAC_CORE_HW_VERSION 0x001974 +#define EMAC_IDT_TABLE0 0x001b00 +#define EMAC_RXMAC_STATC_REG23 0x001bc8 +#define EMAC_RXMAC_STATC_REG24 0x001bcc +#define EMAC_TXMAC_STATC_REG25 0x001bd0 +#define EMAC_INT1_MASK 0x001bf0 +#define EMAC_INT1_STATUS 0x001bf4 +#define EMAC_INT2_MASK 0x001bf8 +#define EMAC_INT2_STATUS 0x001bfc +#define EMAC_INT3_MASK 0x001c00 +#define EMAC_INT3_STATUS 0x001c04 + +/* EMAC_DMA_MAS_CTRL */ +#define DEV_ID_NUM_BMSK 0x7f000000 +#define DEV_ID_NUM_SHFT 24 +#define DEV_REV_NUM_BMSK 0xff0000 +#define DEV_REV_NUM_SHFT 16 +#define INT_RD_CLR_EN 0x4000 +#define IRQ_MODERATOR2_EN 0x800 +#define IRQ_MODERATOR_EN 0x400 +#define LPW_CLK_SEL 0x80 +#define LPW_STATE 0x20 +#define LPW_MODE 0x10 +#define SOFT_RST 0x1 + +/* EMAC_IRQ_MOD_TIM_INIT */ +#define IRQ_MODERATOR2_INIT_BMSK 0xffff0000 +#define IRQ_MODERATOR2_INIT_SHFT 16 +#define IRQ_MODERATOR_INIT_BMSK 0xffff +#define IRQ_MODERATOR_INIT_SHFT 0 + +/* EMAC_INT_STATUS */ +#define DIS_INT BIT(31) +#define PTP_INT BIT(30) +#define RFD4_UR_INT BIT(29) +#define TX_PKT_INT3 BIT(26) +#define TX_PKT_INT2 BIT(25) +#define TX_PKT_INT1 BIT(24) +#define RX_PKT_INT3 BIT(19) +#define RX_PKT_INT2 BIT(18) +#define RX_PKT_INT1 BIT(17) +#define RX_PKT_INT0 BIT(16) +#define TX_PKT_INT BIT(15) +#define TXQ_TO_INT BIT(14) +#define GPHY_WAKEUP_INT BIT(13) +#define GPHY_LINK_DOWN_INT BIT(12) +#define GPHY_LINK_UP_INT BIT(11) +#define DMAW_TO_INT BIT(10) +#define DMAR_TO_INT BIT(9) +#define TXF_UR_INT BIT(8) +#define RFD3_UR_INT BIT(7) +#define RFD2_UR_INT BIT(6) +#define RFD1_UR_INT BIT(5) +#define RFD0_UR_INT BIT(4) +#define RXF_OF_INT BIT(3) +#define SW_MAN_INT BIT(2) + +/* EMAC_MAILBOX_6 */ +#define RFD2_PROC_IDX_BMSK 0xfff0000 +#define RFD2_PROC_IDX_SHFT 16 +#define RFD2_PROD_IDX_BMSK 0xfff +#define RFD2_PROD_IDX_SHFT 0 + +/* EMAC_CORE_HW_VERSION */ +#define MAJOR_BMSK 0xf0000000 +#define MAJOR_SHFT 28 +#define MINOR_BMSK 0xfff0000 +#define MINOR_SHFT 16 +#define STEP_BMSK 0xffff +#define STEP_SHFT 0 + +/* EMAC_EMAC_WRAPPER_CSR1 */ +#define TX_INDX_FIFO_SYNC_RST BIT(23) +#define TX_TS_FIFO_SYNC_RST BIT(22) +#define RX_TS_FIFO2_SYNC_RST BIT(21) +#define RX_TS_FIFO1_SYNC_RST BIT(20) +#define TX_TS_ENABLE BIT(16) +#define DIS_1588_CLKS BIT(11) +#define FREQ_MODE BIT(9) +#define ENABLE_RRD_TIMESTAMP BIT(3) + +/* EMAC_EMAC_WRAPPER_CSR2 */ +#define HDRIVE_BMSK 0x3000 +#define HDRIVE_SHFT 12 +#define SLB_EN BIT(9) +#define PLB_EN BIT(8) +#define WOL_EN BIT(3) +#define PHY_RESET BIT(0) + +#define EMAC_DEV_ID 0x0040 + +/* SGMII v2 per lane registers */ +#define SGMII_LN_RSM_START 0x029C + +/* SGMII v2 PHY common registers */ +#define SGMII_PHY_CMN_CTRL 0x0408 +#define SGMII_PHY_CMN_RESET_CTRL 0x0410 + +/* SGMII v2 PHY registers per lane */ +#define SGMII_PHY_LN_OFFSET 0x0400 +#define SGMII_PHY_LN_LANE_STATUS 0x00DC +#define SGMII_PHY_LN_BIST_GEN0 0x008C +#define SGMII_PHY_LN_BIST_GEN1 0x0090 +#define SGMII_PHY_LN_BIST_GEN2 0x0094 +#define SGMII_PHY_LN_BIST_GEN3 0x0098 +#define SGMII_PHY_LN_CDR_CTRL1 0x005C + +enum emac_clk_id { + EMAC_CLK_AXI, + EMAC_CLK_CFG_AHB, + EMAC_CLK_HIGH_SPEED, + EMAC_CLK_MDIO, + EMAC_CLK_TX, + EMAC_CLK_RX, + EMAC_CLK_SYS, + EMAC_CLK_CNT +}; + +#define EMAC_LINK_SPEED_UNKNOWN 0x0 +#define EMAC_LINK_SPEED_10_HALF BIT(0) +#define EMAC_LINK_SPEED_10_FULL BIT(1) +#define EMAC_LINK_SPEED_100_HALF BIT(2) +#define EMAC_LINK_SPEED_100_FULL BIT(3) +#define EMAC_LINK_SPEED_1GB_FULL BIT(5) + +#define EMAC_MAX_SETUP_LNK_CYCLE 100 + +/* Wake On Lan */ +#define EMAC_WOL_PHY 0x00000001 /* PHY Status Change */ +#define EMAC_WOL_MAGIC 0x00000002 /* Magic Packet */ + +struct emac_stats { + /* rx */ + u64 rx_ok; /* good packets */ + u64 rx_bcast; /* good broadcast packets */ + u64 rx_mcast; /* good multicast packets */ + u64 rx_pause; /* pause packet */ + u64 rx_ctrl; /* control packets other than pause frame. */ + u64 rx_fcs_err; /* packets with bad FCS. */ + u64 rx_len_err; /* packets with length mismatch */ + u64 rx_byte_cnt; /* good bytes count (without FCS) */ + u64 rx_runt; /* runt packets */ + u64 rx_frag; /* fragment count */ + u64 rx_sz_64; /* packets that are 64 bytes */ + u64 rx_sz_65_127; /* packets that are 65-127 bytes */ + u64 rx_sz_128_255; /* packets that are 128-255 bytes */ + u64 rx_sz_256_511; /* packets that are 256-511 bytes */ + u64 rx_sz_512_1023; /* packets that are 512-1023 bytes */ + u64 rx_sz_1024_1518; /* packets that are 1024-1518 bytes */ + u64 rx_sz_1519_max; /* packets that are 1519-MTU bytes*/ + u64 rx_sz_ov; /* packets that are >MTU bytes (truncated) */ + u64 rx_rxf_ov; /* packets dropped due to RX FIFO overflow */ + u64 rx_align_err; /* alignment errors */ + u64 rx_bcast_byte_cnt; /* broadcast packets byte count (without FCS) */ + u64 rx_mcast_byte_cnt; /* multicast packets byte count (without FCS) */ + u64 rx_err_addr; /* packets dropped due to address filtering */ + u64 rx_crc_align; /* CRC align errors */ + u64 rx_jabbers; /* jabbers */ + + /* tx */ + u64 tx_ok; /* good packets */ + u64 tx_bcast; /* good broadcast packets */ + u64 tx_mcast; /* good multicast packets */ + u64 tx_pause; /* pause packets */ + u64 tx_exc_defer; /* packets with excessive deferral */ + u64 tx_ctrl; /* control packets other than pause frame */ + u64 tx_defer; /* packets that are deferred. */ + u64 tx_byte_cnt; /* good bytes count (without FCS) */ + u64 tx_sz_64; /* packets that are 64 bytes */ + u64 tx_sz_65_127; /* packets that are 65-127 bytes */ + u64 tx_sz_128_255; /* packets that are 128-255 bytes */ + u64 tx_sz_256_511; /* packets that are 256-511 bytes */ + u64 tx_sz_512_1023; /* packets that are 512-1023 bytes */ + u64 tx_sz_1024_1518; /* packets that are 1024-1518 bytes */ + u64 tx_sz_1519_max; /* packets that are 1519-MTU bytes */ + u64 tx_1_col; /* packets single prior collision */ + u64 tx_2_col; /* packets with multiple prior collisions */ + u64 tx_late_col; /* packets with late collisions */ + u64 tx_abort_col; /* packets aborted due to excess collisions */ + u64 tx_underrun; /* packets aborted due to FIFO underrun */ + u64 tx_rd_eop; /* count of reads beyond EOP */ + u64 tx_len_err; /* packets with length mismatch */ + u64 tx_trunc; /* packets truncated due to size >MTU */ + u64 tx_bcast_byte; /* broadcast packets byte count (without FCS) */ + u64 tx_mcast_byte; /* multicast packets byte count (without FCS) */ + u64 tx_col; /* collisions */ + + spinlock_t lock; /* prevent multiple simultaneous readers */ +}; + +/* RSS hstype Definitions */ +#define EMAC_RSS_HSTYP_IPV4_EN 0x00000001 +#define EMAC_RSS_HSTYP_TCP4_EN 0x00000002 +#define EMAC_RSS_HSTYP_IPV6_EN 0x00000004 +#define EMAC_RSS_HSTYP_TCP6_EN 0x00000008 +#define EMAC_RSS_HSTYP_ALL_EN (\ + EMAC_RSS_HSTYP_IPV4_EN |\ + EMAC_RSS_HSTYP_TCP4_EN |\ + EMAC_RSS_HSTYP_IPV6_EN |\ + EMAC_RSS_HSTYP_TCP6_EN) + +#define EMAC_VLAN_TO_TAG(_vlan, _tag) \ + (_tag = ((((_vlan) >> 8) & 0xFF) | (((_vlan) & 0xFF) << 8))) + +#define EMAC_TAG_TO_VLAN(_tag, _vlan) \ + (_vlan = ((((_tag) >> 8) & 0xFF) | (((_tag) & 0xFF) << 8))) + +#define EMAC_DEF_RX_BUF_SIZE 1536 +#define EMAC_MAX_JUMBO_PKT_SIZE (9 * 1024) +#define EMAC_MAX_TX_OFFLOAD_THRESH (9 * 1024) + +#define EMAC_MAX_ETH_FRAME_SIZE EMAC_MAX_JUMBO_PKT_SIZE +#define EMAC_MIN_ETH_FRAME_SIZE 68 + +#define EMAC_DEF_TX_QUEUES 1 +#define EMAC_DEF_RX_QUEUES 1 + +#define EMAC_MIN_TX_DESCS 128 +#define EMAC_MIN_RX_DESCS 128 + +#define EMAC_MAX_TX_DESCS 16383 +#define EMAC_MAX_RX_DESCS 2047 + +#define EMAC_DEF_TX_DESCS 512 +#define EMAC_DEF_RX_DESCS 256 + +#define EMAC_DEF_RX_IRQ_MOD 250 +#define EMAC_DEF_TX_IRQ_MOD 250 + +#define EMAC_WATCHDOG_TIME (5 * HZ) + +/* by default check link every 4 seconds */ +#define EMAC_TRY_LINK_TIMEOUT (4 * HZ) + +/* emac_irq per-device (per-adapter) irq properties. + * @irq: irq number. + * @mask mask to use over status register. + */ +struct emac_irq { + unsigned int irq; + u32 mask; +}; + +/* The device's main data structure */ +struct emac_adapter { + struct net_device *netdev; + struct mii_bus *mii_bus; + struct phy_device *phydev; + + void __iomem *base; + void __iomem *csr; + + struct emac_phy phy; + struct emac_stats stats; + + struct emac_irq irq; + struct clk *clk[EMAC_CLK_CNT]; + + /* All Descriptor memory */ + struct emac_ring_header ring_header; + struct emac_tx_queue tx_q; + struct emac_rx_queue rx_q; + unsigned int tx_desc_cnt; + unsigned int rx_desc_cnt; + unsigned int rrd_size; /* in quad words */ + unsigned int rfd_size; /* in quad words */ + unsigned int tpd_size; /* in quad words */ + + unsigned int rxbuf_size; + + /* Ring parameter */ + u8 tpd_burst; + u8 rfd_burst; + unsigned int dmaw_dly_cnt; + unsigned int dmar_dly_cnt; + enum emac_dma_req_block dmar_block; + enum emac_dma_req_block dmaw_block; + enum emac_dma_order dma_order; + + u32 irq_mod; + u32 preamble; + + struct work_struct work_thread; + + u16 msg_enable; + + struct mutex reset_lock; +}; + +int emac_reinit_locked(struct emac_adapter *adpt); +void emac_reg_update32(void __iomem *addr, u32 mask, u32 val); +irqreturn_t emac_isr(int irq, void *data); + +#endif /* _EMAC_H_ */ From 569e937e0100c8841619bd2150d15682141a6b0b Mon Sep 17 00:00:00 2001 From: Baoyou Xie Date: Thu, 1 Sep 2016 14:16:24 +0800 Subject: [PATCH 0651/1715] mISDN: mark symbols static where possible We get a few warnings when building kernel with W=1: drivers/isdn/hardware/mISDN/hfcmulti.c:568:1: warning: no previous declaration for 'enablepcibridge' [-Wmissing-declarations] drivers/isdn/hardware/mISDN/hfcmulti.c:574:1: warning: no previous declaration for 'disablepcibridge' [-Wmissing-declarations] drivers/isdn/hardware/mISDN/hfcmulti.c:580:1: warning: no previous declaration for 'readpcibridge' [-Wmissing-declarations] drivers/isdn/hardware/mISDN/hfcmulti.c:608:1: warning: no previous declaration for 'writepcibridge' [-Wmissing-declarations] drivers/isdn/hardware/mISDN/hfcmulti.c:638:1: warning: no previous declaration for 'cpld_set_reg' [-Wmissing-declarations] drivers/isdn/hardware/mISDN/hfcmulti.c:645:1: warning: no previous declaration for 'cpld_write_reg' [-Wmissing-declarations] drivers/isdn/hardware/mISDN/hfcmulti.c:657:1: warning: no previous declaration for 'cpld_read_reg' [-Wmissing-declarations] drivers/isdn/hardware/mISDN/hfcmulti.c:674:1: warning: no previous declaration for 'vpm_write_address' [-Wmissing-declarations] drivers/isdn/hardware/mISDN/hfcmulti.c:681:1: warning: no previous declaration for 'vpm_read_address' [-Wmissing-declarations] drivers/isdn/hardware/mISDN/hfcmulti.c:695:1: warning: no previous declaration for 'vpm_in' [-Wmissing-declarations] drivers/isdn/hardware/mISDN/hfcmulti.c:716:1: warning: no previous declaration for 'vpm_out' [-Wmissing-declarations] drivers/isdn/hardware/mISDN/hfcmulti.c:1028:1: warning: no previous declaration for 'plxsd_checksync' [-Wmissing-declarations] .... In fact, these functions are only used in the file in which they are declared and don't need a declaration, but can be made static. so this patch marks these functions with 'static'. Signed-off-by: Baoyou Xie Acked-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/isdn/hardware/mISDN/avmfritz.c | 6 +++--- drivers/isdn/hardware/mISDN/hfcmulti.c | 24 ++++++++++++------------ drivers/isdn/hardware/mISDN/mISDNipac.c | 2 +- drivers/isdn/hardware/mISDN/w6692.c | 2 +- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/isdn/hardware/mISDN/avmfritz.c b/drivers/isdn/hardware/mISDN/avmfritz.c index 292991c90c02..e3fa1cd64470 100644 --- a/drivers/isdn/hardware/mISDN/avmfritz.c +++ b/drivers/isdn/hardware/mISDN/avmfritz.c @@ -284,7 +284,7 @@ __write_ctrl_pciv2(struct fritzcard *fc, struct hdlc_hw *hdlc, u32 channel) { AVM_HDLC_STATUS_1)); } -void +static void write_ctrl(struct bchannel *bch, int which) { struct fritzcard *fc = bch->hw; struct hdlc_hw *hdlc; @@ -741,7 +741,7 @@ inithdlc(struct fritzcard *fc) modehdlc(&fc->bch[1], -1); } -void +static void clear_pending_hdlc_ints(struct fritzcard *fc) { u32 val; @@ -962,7 +962,7 @@ avm_dctrl(struct mISDNchannel *ch, u32 cmd, void *arg) return err; } -int +static int setup_fritz(struct fritzcard *fc) { u32 val, ver; diff --git a/drivers/isdn/hardware/mISDN/hfcmulti.c b/drivers/isdn/hardware/mISDN/hfcmulti.c index 28543d795188..480c2d7794eb 100644 --- a/drivers/isdn/hardware/mISDN/hfcmulti.c +++ b/drivers/isdn/hardware/mISDN/hfcmulti.c @@ -564,19 +564,19 @@ disable_hwirq(struct hfc_multi *hc) #define MAX_TDM_CHAN 32 -inline void +static inline void enablepcibridge(struct hfc_multi *c) { HFC_outb(c, R_BRG_PCM_CFG, (0x0 << 6) | 0x3); /* was _io before */ } -inline void +static inline void disablepcibridge(struct hfc_multi *c) { HFC_outb(c, R_BRG_PCM_CFG, (0x0 << 6) | 0x2); /* was _io before */ } -inline unsigned char +static inline unsigned char readpcibridge(struct hfc_multi *hc, unsigned char address) { unsigned short cipv; @@ -604,7 +604,7 @@ readpcibridge(struct hfc_multi *hc, unsigned char address) return data; } -inline void +static inline void writepcibridge(struct hfc_multi *hc, unsigned char address, unsigned char data) { unsigned short cipv; @@ -634,14 +634,14 @@ writepcibridge(struct hfc_multi *hc, unsigned char address, unsigned char data) outl(datav, hc->pci_iobase); } -inline void +static inline void cpld_set_reg(struct hfc_multi *hc, unsigned char reg) { /* Do data pin read low byte */ HFC_outb(hc, R_GPIO_OUT1, reg); } -inline void +static inline void cpld_write_reg(struct hfc_multi *hc, unsigned char reg, unsigned char val) { cpld_set_reg(hc, reg); @@ -653,7 +653,7 @@ cpld_write_reg(struct hfc_multi *hc, unsigned char reg, unsigned char val) return; } -inline unsigned char +static inline unsigned char cpld_read_reg(struct hfc_multi *hc, unsigned char reg) { unsigned char bytein; @@ -670,14 +670,14 @@ cpld_read_reg(struct hfc_multi *hc, unsigned char reg) return bytein; } -inline void +static inline void vpm_write_address(struct hfc_multi *hc, unsigned short addr) { cpld_write_reg(hc, 0, 0xff & addr); cpld_write_reg(hc, 1, 0x01 & (addr >> 8)); } -inline unsigned short +static inline unsigned short vpm_read_address(struct hfc_multi *c) { unsigned short addr; @@ -691,7 +691,7 @@ vpm_read_address(struct hfc_multi *c) return addr & 0x1ff; } -inline unsigned char +static inline unsigned char vpm_in(struct hfc_multi *c, int which, unsigned short addr) { unsigned char res; @@ -712,7 +712,7 @@ vpm_in(struct hfc_multi *c, int which, unsigned short addr) return res; } -inline void +static inline void vpm_out(struct hfc_multi *c, int which, unsigned short addr, unsigned char data) { @@ -1024,7 +1024,7 @@ hfcmulti_resync(struct hfc_multi *locked, struct hfc_multi *newmaster, int rm) } /* This must be called AND hc must be locked irqsave!!! */ -inline void +static inline void plxsd_checksync(struct hfc_multi *hc, int rm) { if (hc->syncronized) { diff --git a/drivers/isdn/hardware/mISDN/mISDNipac.c b/drivers/isdn/hardware/mISDN/mISDNipac.c index aa9b6c3cadc1..8d338ba366d0 100644 --- a/drivers/isdn/hardware/mISDN/mISDNipac.c +++ b/drivers/isdn/hardware/mISDN/mISDNipac.c @@ -113,7 +113,7 @@ isac_ph_state_bh(struct dchannel *dch) pr_debug("%s: TE newstate %x\n", isac->name, dch->state); } -void +static void isac_empty_fifo(struct isac_hw *isac, int count) { u8 *ptr; diff --git a/drivers/isdn/hardware/mISDN/w6692.c b/drivers/isdn/hardware/mISDN/w6692.c index 741675525b53..3b067ea656bd 100644 --- a/drivers/isdn/hardware/mISDN/w6692.c +++ b/drivers/isdn/hardware/mISDN/w6692.c @@ -848,7 +848,7 @@ dbusy_timer_handler(struct dchannel *dch) } } -void initW6692(struct w6692_hw *card) +static void initW6692(struct w6692_hw *card) { u8 val; From 64ed5771aca2fcfb8ea440fc679741054011fd7e Mon Sep 17 00:00:00 2001 From: Tamizh chelvam Date: Tue, 2 Aug 2016 16:13:14 +0530 Subject: [PATCH 0652/1715] ath10k: Add WMI_SERVICE_PERIODIC_CHAN_STAT_SUPPORT wmi service WMI_SERVICE_PERIODIC_CHAN_STAT_SUPPORT service has missed in the commit 7e247a9e88dc ("ath10k: add dynamic tx mode switch config support for qca4019"). This patch adds the service to avoid mismatch between host and target. Fixes: 7e247a9e88dc ("ath10k: add dynamic tx mode switch config support for qca4019") Signed-off-by: Tamizh chelvam Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/wmi.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/wmi.h b/drivers/net/wireless/ath/ath10k/wmi.h index 89adfa90ee8d..2f89c4b2d4f2 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.h +++ b/drivers/net/wireless/ath/ath10k/wmi.h @@ -180,6 +180,7 @@ enum wmi_service { WMI_SERVICE_MESH_NON_11S, WMI_SERVICE_PEER_STATS, WMI_SERVICE_RESTRT_CHNL_SUPPORT, + WMI_SERVICE_PERIODIC_CHAN_STAT_SUPPORT, WMI_SERVICE_TX_MODE_PUSH_ONLY, WMI_SERVICE_TX_MODE_PUSH_PULL, WMI_SERVICE_TX_MODE_DYNAMIC, @@ -305,6 +306,7 @@ enum wmi_10_4_service { WMI_10_4_SERVICE_RESTRT_CHNL_SUPPORT, WMI_10_4_SERVICE_PEER_STATS, WMI_10_4_SERVICE_MESH_11S, + WMI_10_4_SERVICE_PERIODIC_CHAN_STAT_SUPPORT, WMI_10_4_SERVICE_TX_MODE_PUSH_ONLY, WMI_10_4_SERVICE_TX_MODE_PUSH_PULL, WMI_10_4_SERVICE_TX_MODE_DYNAMIC, @@ -402,6 +404,7 @@ static inline char *wmi_service_name(int service_id) SVCSTR(WMI_SERVICE_MESH_NON_11S); SVCSTR(WMI_SERVICE_PEER_STATS); SVCSTR(WMI_SERVICE_RESTRT_CHNL_SUPPORT); + SVCSTR(WMI_SERVICE_PERIODIC_CHAN_STAT_SUPPORT); SVCSTR(WMI_SERVICE_TX_MODE_PUSH_ONLY); SVCSTR(WMI_SERVICE_TX_MODE_PUSH_PULL); SVCSTR(WMI_SERVICE_TX_MODE_DYNAMIC); @@ -652,6 +655,8 @@ static inline void wmi_10_4_svc_map(const __le32 *in, unsigned long *out, WMI_SERVICE_PEER_STATS, len); SVCMAP(WMI_10_4_SERVICE_MESH_11S, WMI_SERVICE_MESH_11S, len); + SVCMAP(WMI_10_4_SERVICE_PERIODIC_CHAN_STAT_SUPPORT, + WMI_SERVICE_PERIODIC_CHAN_STAT_SUPPORT, len); SVCMAP(WMI_10_4_SERVICE_TX_MODE_PUSH_ONLY, WMI_SERVICE_TX_MODE_PUSH_ONLY, len); SVCMAP(WMI_10_4_SERVICE_TX_MODE_PUSH_PULL, From 75b34800a228b5cadc7196485fa0fdabfb9e7684 Mon Sep 17 00:00:00 2001 From: Maharaja Kennadyrajan Date: Thu, 4 Aug 2016 19:21:51 +0530 Subject: [PATCH 0653/1715] ath10k: hide kernel addresses from logs using %pK format specifier With the %pK format specifier we hide the kernel addresses with the help of kptr_restrict sysctl. In this patch, %p is changed to %pK in the driver code. The sysctl is documented in Documentation/sysctl/kernel.txt. Signed-off-by: Maharaja Kennadyrajan Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/ahb.c | 2 +- drivers/net/wireless/ath/ath10k/bmi.c | 4 ++-- drivers/net/wireless/ath/ath10k/ce.c | 4 ++-- drivers/net/wireless/ath/ath10k/core.c | 4 ++-- drivers/net/wireless/ath/ath10k/htc.c | 6 +++--- drivers/net/wireless/ath/ath10k/htt_rx.c | 2 +- drivers/net/wireless/ath/ath10k/mac.c | 20 ++++++++++---------- drivers/net/wireless/ath/ath10k/pci.c | 2 +- drivers/net/wireless/ath/ath10k/testmode.c | 4 ++-- drivers/net/wireless/ath/ath10k/txrx.c | 2 +- drivers/net/wireless/ath/ath10k/wmi.c | 4 ++-- 11 files changed, 27 insertions(+), 27 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/ahb.c b/drivers/net/wireless/ath/ath10k/ahb.c index acec16b9cf49..dede02668128 100644 --- a/drivers/net/wireless/ath/ath10k/ahb.c +++ b/drivers/net/wireless/ath/ath10k/ahb.c @@ -577,7 +577,7 @@ static int ath10k_ahb_resource_init(struct ath10k *ar) ath10k_dbg(ar, ATH10K_DBG_BOOT, "irq: %d\n", ar_ahb->irq); - ath10k_dbg(ar, ATH10K_DBG_BOOT, "mem: 0x%p mem_len: %lu gcc mem: 0x%p tcsr_mem: 0x%p\n", + ath10k_dbg(ar, ATH10K_DBG_BOOT, "mem: 0x%pK mem_len: %lu gcc mem: 0x%pK tcsr_mem: 0x%pK\n", ar_ahb->mem, ar_ahb->mem_len, ar_ahb->gcc_mem, ar_ahb->tcsr_mem); return 0; diff --git a/drivers/net/wireless/ath/ath10k/bmi.c b/drivers/net/wireless/ath/ath10k/bmi.c index 3d29b0875b3e..2872d347ea78 100644 --- a/drivers/net/wireless/ath/ath10k/bmi.c +++ b/drivers/net/wireless/ath/ath10k/bmi.c @@ -221,7 +221,7 @@ int ath10k_bmi_lz_data(struct ath10k *ar, const void *buffer, u32 length) u32 txlen; int ret; - ath10k_dbg(ar, ATH10K_DBG_BMI, "bmi lz data buffer 0x%p length %d\n", + ath10k_dbg(ar, ATH10K_DBG_BMI, "bmi lz data buffer 0x%pK length %d\n", buffer, length); if (ar->bmi.done_sent) { @@ -287,7 +287,7 @@ int ath10k_bmi_fast_download(struct ath10k *ar, int ret; ath10k_dbg(ar, ATH10K_DBG_BMI, - "bmi fast download address 0x%x buffer 0x%p length %d\n", + "bmi fast download address 0x%x buffer 0x%pK length %d\n", address, buffer, length); ret = ath10k_bmi_lz_stream_start(ar, address); diff --git a/drivers/net/wireless/ath/ath10k/ce.c b/drivers/net/wireless/ath/ath10k/ce.c index 9fb8d7472d18..65d8d714e917 100644 --- a/drivers/net/wireless/ath/ath10k/ce.c +++ b/drivers/net/wireless/ath/ath10k/ce.c @@ -840,7 +840,7 @@ static int ath10k_ce_init_src_ring(struct ath10k *ar, ath10k_ce_src_ring_highmark_set(ar, ctrl_addr, nentries); ath10k_dbg(ar, ATH10K_DBG_BOOT, - "boot init ce src ring id %d entries %d base_addr %p\n", + "boot init ce src ring id %d entries %d base_addr %pK\n", ce_id, nentries, src_ring->base_addr_owner_space); return 0; @@ -874,7 +874,7 @@ static int ath10k_ce_init_dest_ring(struct ath10k *ar, ath10k_ce_dest_ring_highmark_set(ar, ctrl_addr, nentries); ath10k_dbg(ar, ATH10K_DBG_BOOT, - "boot ce dest ring id %d entries %d base_addr %p\n", + "boot ce dest ring id %d entries %d base_addr %pK\n", ce_id, nentries, dest_ring->base_addr_owner_space); return 0; diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index 6b49374b3956..f46f91648b06 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -699,7 +699,7 @@ static int ath10k_download_and_run_otp(struct ath10k *ar) if (!ar->running_fw->fw_file.otp_data || !ar->running_fw->fw_file.otp_len) { - ath10k_warn(ar, "Not running otp, calibration will be incorrect (otp-data %p otp_len %zd)!\n", + ath10k_warn(ar, "Not running otp, calibration will be incorrect (otp-data %pK otp_len %zd)!\n", ar->running_fw->fw_file.otp_data, ar->running_fw->fw_file.otp_len); return 0; @@ -753,7 +753,7 @@ static int ath10k_download_fw(struct ath10k *ar) } ath10k_dbg(ar, ATH10K_DBG_BOOT, - "boot uploading firmware image %p len %d\n", + "boot uploading firmware image %pK len %d\n", data, data_len); ret = ath10k_bmi_fast_download(ar, address, data, data_len); diff --git a/drivers/net/wireless/ath/ath10k/htc.c b/drivers/net/wireless/ath/ath10k/htc.c index 5b3c6bcf9598..175aae38c375 100644 --- a/drivers/net/wireless/ath/ath10k/htc.c +++ b/drivers/net/wireless/ath/ath10k/htc.c @@ -44,7 +44,7 @@ static struct sk_buff *ath10k_htc_build_tx_ctrl_skb(void *ar) skb_cb = ATH10K_SKB_CB(skb); memset(skb_cb, 0, sizeof(*skb_cb)); - ath10k_dbg(ar, ATH10K_DBG_HTC, "%s: skb %p\n", __func__, skb); + ath10k_dbg(ar, ATH10K_DBG_HTC, "%s: skb %pK\n", __func__, skb); return skb; } @@ -62,7 +62,7 @@ static void ath10k_htc_notify_tx_completion(struct ath10k_htc_ep *ep, { struct ath10k *ar = ep->htc->ar; - ath10k_dbg(ar, ATH10K_DBG_HTC, "%s: ep %d skb %p\n", __func__, + ath10k_dbg(ar, ATH10K_DBG_HTC, "%s: ep %d skb %pK\n", __func__, ep->eid, skb); ath10k_htc_restore_tx_skb(ep->htc, skb); @@ -404,7 +404,7 @@ void ath10k_htc_rx_completion_handler(struct ath10k *ar, struct sk_buff *skb) goto out; } - ath10k_dbg(ar, ATH10K_DBG_HTC, "htc rx completion ep %d skb %p\n", + ath10k_dbg(ar, ATH10K_DBG_HTC, "htc rx completion ep %d skb %pK\n", eid, skb); ep->ep_ops.ep_rx_complete(ar, skb); diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c index 78db5d679f19..ae6931b99da9 100644 --- a/drivers/net/wireless/ath/ath10k/htt_rx.c +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c @@ -931,7 +931,7 @@ static void ath10k_process_rx(struct ath10k *ar, *status = *rx_status; ath10k_dbg(ar, ATH10K_DBG_DATA, - "rx skb %p len %u peer %pM %s %s sn %u %s%s%s%s%s %srate_idx %u vht_nss %u freq %u band %u flag 0x%llx fcs-err %i mic-err %i amsdu-more %i\n", + "rx skb %pK len %u peer %pM %s %s sn %u %s%s%s%s%s %srate_idx %u vht_nss %u freq %u band %u flag 0x%llx fcs-err %i mic-err %i amsdu-more %i\n", skb, skb->len, ieee80211_get_SA(hdr), diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 0bbd0a00edcc..2a1d9fde13e7 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -824,7 +824,7 @@ static void ath10k_peer_cleanup(struct ath10k *ar, u32 vdev_id) */ for (i = 0; i < ARRAY_SIZE(ar->peer_map); i++) { if (ar->peer_map[i] == peer) { - ath10k_warn(ar, "removing stale peer_map entry for %pM (ptr %p idx %d)\n", + ath10k_warn(ar, "removing stale peer_map entry for %pM (ptr %pK idx %d)\n", peer->addr, peer, i); ar->peer_map[i] = NULL; } @@ -3524,7 +3524,7 @@ static int ath10k_mac_tx(struct ath10k *ar, if (info->flags & IEEE80211_TX_CTL_TX_OFFCHAN) { if (!ath10k_mac_tx_frm_has_freq(ar)) { - ath10k_dbg(ar, ATH10K_DBG_MAC, "queued offchannel skb %p\n", + ath10k_dbg(ar, ATH10K_DBG_MAC, "queued offchannel skb %pK\n", skb); skb_queue_tail(&ar->offchan_tx_queue, skb); @@ -3586,7 +3586,7 @@ void ath10k_offchan_tx_work(struct work_struct *work) mutex_lock(&ar->conf_mutex); - ath10k_dbg(ar, ATH10K_DBG_MAC, "mac offchannel skb %p\n", + ath10k_dbg(ar, ATH10K_DBG_MAC, "mac offchannel skb %pK\n", skb); hdr = (struct ieee80211_hdr *)skb->data; @@ -3643,7 +3643,7 @@ void ath10k_offchan_tx_work(struct work_struct *work) time_left = wait_for_completion_timeout(&ar->offchan_tx_completed, 3 * HZ); if (time_left == 0) - ath10k_warn(ar, "timed out waiting for offchannel skb %p\n", + ath10k_warn(ar, "timed out waiting for offchannel skb %pK\n", skb); if (!peer && tmp_peer_created) { @@ -6001,7 +6001,7 @@ static int ath10k_sta_state(struct ieee80211_hw *hw, continue; if (peer->sta == sta) { - ath10k_warn(ar, "found sta peer %pM (ptr %p id %d) entry on vdev %i after it was supposedly removed\n", + ath10k_warn(ar, "found sta peer %pM (ptr %pK id %d) entry on vdev %i after it was supposedly removed\n", sta->addr, peer, i, arvif->vdev_id); peer->sta = NULL; @@ -7134,7 +7134,7 @@ ath10k_mac_op_add_chanctx(struct ieee80211_hw *hw, struct ath10k *ar = hw->priv; ath10k_dbg(ar, ATH10K_DBG_MAC, - "mac chanctx add freq %hu width %d ptr %p\n", + "mac chanctx add freq %hu width %d ptr %pK\n", ctx->def.chan->center_freq, ctx->def.width, ctx); mutex_lock(&ar->conf_mutex); @@ -7158,7 +7158,7 @@ ath10k_mac_op_remove_chanctx(struct ieee80211_hw *hw, struct ath10k *ar = hw->priv; ath10k_dbg(ar, ATH10K_DBG_MAC, - "mac chanctx remove freq %hu width %d ptr %p\n", + "mac chanctx remove freq %hu width %d ptr %pK\n", ctx->def.chan->center_freq, ctx->def.width, ctx); mutex_lock(&ar->conf_mutex); @@ -7223,7 +7223,7 @@ ath10k_mac_op_change_chanctx(struct ieee80211_hw *hw, mutex_lock(&ar->conf_mutex); ath10k_dbg(ar, ATH10K_DBG_MAC, - "mac chanctx change freq %hu width %d ptr %p changed %x\n", + "mac chanctx change freq %hu width %d ptr %pK changed %x\n", ctx->def.chan->center_freq, ctx->def.width, ctx, changed); /* This shouldn't really happen because channel switching should use @@ -7281,7 +7281,7 @@ ath10k_mac_op_assign_vif_chanctx(struct ieee80211_hw *hw, mutex_lock(&ar->conf_mutex); ath10k_dbg(ar, ATH10K_DBG_MAC, - "mac chanctx assign ptr %p vdev_id %i\n", + "mac chanctx assign ptr %pK vdev_id %i\n", ctx, arvif->vdev_id); if (WARN_ON(arvif->is_started)) { @@ -7342,7 +7342,7 @@ ath10k_mac_op_unassign_vif_chanctx(struct ieee80211_hw *hw, mutex_lock(&ar->conf_mutex); ath10k_dbg(ar, ATH10K_DBG_MAC, - "mac chanctx unassign ptr %p vdev_id %i\n", + "mac chanctx unassign ptr %pK vdev_id %i\n", ctx, arvif->vdev_id); WARN_ON(!arvif->is_started); diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c index 9a22c478dd1b..1b841adfb574 100644 --- a/drivers/net/wireless/ath/ath10k/pci.c +++ b/drivers/net/wireless/ath/ath10k/pci.c @@ -3062,7 +3062,7 @@ static int ath10k_pci_claim(struct ath10k *ar) goto err_master; } - ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot pci_mem 0x%p\n", ar_pci->mem); + ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot pci_mem 0x%pK\n", ar_pci->mem); return 0; err_master: diff --git a/drivers/net/wireless/ath/ath10k/testmode.c b/drivers/net/wireless/ath/ath10k/testmode.c index 091f29d60465..ed85f938e3c0 100644 --- a/drivers/net/wireless/ath/ath10k/testmode.c +++ b/drivers/net/wireless/ath/ath10k/testmode.c @@ -46,7 +46,7 @@ bool ath10k_tm_event_wmi(struct ath10k *ar, u32 cmd_id, struct sk_buff *skb) int ret; ath10k_dbg(ar, ATH10K_DBG_TESTMODE, - "testmode event wmi cmd_id %d skb %p skb->len %d\n", + "testmode event wmi cmd_id %d skb %pK skb->len %d\n", cmd_id, skb, skb->len); ath10k_dbg_dump(ar, ATH10K_DBG_TESTMODE, NULL, "", skb->data, skb->len); @@ -383,7 +383,7 @@ static int ath10k_tm_cmd_wmi(struct ath10k *ar, struct nlattr *tb[]) cmd_id = nla_get_u32(tb[ATH10K_TM_ATTR_WMI_CMDID]); ath10k_dbg(ar, ATH10K_DBG_TESTMODE, - "testmode cmd wmi cmd_id %d buf %p buf_len %d\n", + "testmode cmd wmi cmd_id %d buf %pK buf_len %d\n", cmd_id, buf, buf_len); ath10k_dbg_dump(ar, ATH10K_DBG_TESTMODE, NULL, "", buf, buf_len); diff --git a/drivers/net/wireless/ath/ath10k/txrx.c b/drivers/net/wireless/ath/ath10k/txrx.c index b29a86a26c13..1e695d1b4692 100644 --- a/drivers/net/wireless/ath/ath10k/txrx.c +++ b/drivers/net/wireless/ath/ath10k/txrx.c @@ -44,7 +44,7 @@ static void ath10k_report_offchan_tx(struct ath10k *ar, struct sk_buff *skb) complete(&ar->offchan_tx_completed); ar->offchan_tx_skb = NULL; /* just for sanity */ - ath10k_dbg(ar, ATH10K_DBG_HTT, "completed offchannel skb %p\n", skb); + ath10k_dbg(ar, ATH10K_DBG_HTT, "completed offchannel skb %pK\n", skb); out: spin_unlock_bh(&ar->data_lock); } diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index ae5f541ec966..b9a3b09ec38a 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -1877,7 +1877,7 @@ ath10k_wmi_op_gen_mgmt_tx(struct ath10k *ar, struct sk_buff *msdu) ether_addr_copy(cmd->hdr.peer_macaddr.addr, ieee80211_get_DA(hdr)); memcpy(cmd->buf, msdu->data, msdu->len); - ath10k_dbg(ar, ATH10K_DBG_WMI, "wmi mgmt tx skb %p len %d ftype %02x stype %02x\n", + ath10k_dbg(ar, ATH10K_DBG_WMI, "wmi mgmt tx skb %pK len %d ftype %02x stype %02x\n", msdu, skb->len, fc & IEEE80211_FCTL_FTYPE, fc & IEEE80211_FCTL_STYPE); trace_ath10k_tx_hdr(ar, skb->data, skb->len); @@ -2350,7 +2350,7 @@ int ath10k_wmi_event_mgmt_rx(struct ath10k *ar, struct sk_buff *skb) ath10k_mac_handle_beacon(ar, skb); ath10k_dbg(ar, ATH10K_DBG_MGMT, - "event mgmt rx skb %p len %d ftype %02x stype %02x\n", + "event mgmt rx skb %pK len %d ftype %02x stype %02x\n", skb, skb->len, fc & IEEE80211_FCTL_FTYPE, fc & IEEE80211_FCTL_STYPE); From 7d42298eb43d27442e64d1e52e9f55f9cf9387e1 Mon Sep 17 00:00:00 2001 From: Rajkumar Manoharan Date: Tue, 9 Aug 2016 12:01:51 +0530 Subject: [PATCH 0654/1715] ath10k: fix group privacy action frame decryption for qca4019 Recent commit 46f6b06050b7 ("mac80211: Encrypt "Group addressed privacy" action frames") encrypts group privacy action frames. But qca99x0 family chipset delivers broadcast/multicast management frames as encrypted and it should be decrypted by mac80211. Setting RX_FLAG_DECRYPTED stats for those frames is breaking mesh connection establishment. Signed-off-by: Rajkumar Manoharan Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/core.c | 4 ++++ drivers/net/wireless/ath/ath10k/core.h | 5 +++++ drivers/net/wireless/ath/ath10k/wmi.c | 29 +++++++++++++++++++++----- 3 files changed, 33 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index f46f91648b06..ffedc0386c67 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -182,6 +182,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_size = QCA99X0_BOARD_DATA_SZ, .board_ext_size = QCA99X0_BOARD_EXT_DATA_SZ, }, + .sw_decrypt_mcast_mgmt = true, }, { .id = QCA9984_HW_1_0_DEV_VERSION, @@ -205,6 +206,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_size = QCA99X0_BOARD_DATA_SZ, .board_ext_size = QCA99X0_BOARD_EXT_DATA_SZ, }, + .sw_decrypt_mcast_mgmt = true, }, { .id = QCA9888_HW_2_0_DEV_VERSION, @@ -227,6 +229,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_size = QCA99X0_BOARD_DATA_SZ, .board_ext_size = QCA99X0_BOARD_EXT_DATA_SZ, }, + .sw_decrypt_mcast_mgmt = true, }, { .id = QCA9377_HW_1_0_DEV_VERSION, @@ -285,6 +288,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_size = QCA4019_BOARD_DATA_SZ, .board_ext_size = QCA4019_BOARD_EXT_DATA_SZ, }, + .sw_decrypt_mcast_mgmt = true, }, }; diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h index 56daeb7926f9..7e329dcf9f4d 100644 --- a/drivers/net/wireless/ath/ath10k/core.h +++ b/drivers/net/wireless/ath/ath10k/core.h @@ -771,6 +771,11 @@ struct ath10k { size_t board_size; size_t board_ext_size; } fw; + + /* qca99x0 family chips deliver broadcast/multicast management + * frames encrypted and expect software do decryption. + */ + bool sw_decrypt_mcast_mgmt; } hw_params; /* contains the firmware images used with ATH10K_FIRMWARE_MODE_NORMAL */ diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index b9a3b09ec38a..4b4bf23a398d 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -2243,6 +2243,29 @@ static int ath10k_wmi_10_4_op_pull_mgmt_rx_ev(struct ath10k *ar, return 0; } +static bool ath10k_wmi_rx_is_decrypted(struct ath10k *ar, + struct ieee80211_hdr *hdr) +{ + if (!ieee80211_has_protected(hdr->frame_control)) + return false; + + /* FW delivers WEP Shared Auth frame with Protected Bit set and + * encrypted payload. However in case of PMF it delivers decrypted + * frames with Protected Bit set. + */ + if (ieee80211_is_auth(hdr->frame_control)) + return false; + + /* qca99x0 based FW delivers broadcast or multicast management frames + * (ex: group privacy action frames in mesh) as encrypted payload. + */ + if (is_multicast_ether_addr(ieee80211_get_DA(hdr)) && + ar->hw_params.sw_decrypt_mcast_mgmt) + return false; + + return true; +} + int ath10k_wmi_event_mgmt_rx(struct ath10k *ar, struct sk_buff *skb) { struct wmi_mgmt_rx_ev_arg arg = {}; @@ -2329,11 +2352,7 @@ int ath10k_wmi_event_mgmt_rx(struct ath10k *ar, struct sk_buff *skb) ath10k_wmi_handle_wep_reauth(ar, skb, status); - /* FW delivers WEP Shared Auth frame with Protected Bit set and - * encrypted payload. However in case of PMF it delivers decrypted - * frames with Protected Bit set. */ - if (ieee80211_has_protected(hdr->frame_control) && - !ieee80211_is_auth(hdr->frame_control)) { + if (ath10k_wmi_rx_is_decrypted(ar, hdr)) { status->flag |= RX_FLAG_DECRYPTED; if (!ieee80211_is_action(hdr->frame_control) && From 03c41cc126c8868ef483c2480acfcd5490a844b3 Mon Sep 17 00:00:00 2001 From: Mohammed Shafi Shajakhan Date: Wed, 17 Aug 2016 16:58:00 +0530 Subject: [PATCH 0655/1715] ath10k: suppress warnings when getting wmi WDS peer event id 'WMI_10_4_WDS_PEER_EVENTID' is not yet handled/implemented for WDS mode, as of now suppress the warning message "Unknown eventid: 36903" Signed-off-by: Mohammed Shafi Shajakhan Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/wmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index 4b4bf23a398d..15b7efc184a4 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -5379,6 +5379,7 @@ static void ath10k_wmi_10_4_op_rx(struct ath10k *ar, struct sk_buff *skb) break; case WMI_10_4_WOW_WAKEUP_HOST_EVENTID: case WMI_10_4_PEER_RATECODE_LIST_EVENTID: + case WMI_10_4_WDS_PEER_EVENTID: ath10k_dbg(ar, ATH10K_DBG_WMI, "received event id %d not implemented\n", id); break; From 83e164b7679d46a6a172ca0fd0ead68b48e22103 Mon Sep 17 00:00:00 2001 From: Rajkumar Manoharan Date: Wed, 17 Aug 2016 21:02:53 +0530 Subject: [PATCH 0656/1715] ath10k: improve wake_tx_queue ops performance txqs_lock is interfering with wake_tx_queue submitting more frames. so queues don't get filled in and don't keep firmware/hardware busy enough. This change helps to reduce the txqs_lock contention and wake_tx_queue() blockage to being possible in txrx_unref(). To reduce turn around time of wake_tx_queue ops and to maintain fairness among all txqs, the callback is updated to push first txq alone from pending list for every wake_tx_queue call. Remaining txqs will be processed later upon tx completion. Below improvements are observed in push-only mode and validated on IPQ4019 platform. With this change, in AP mode ~10Mbps increase is observed in downlink (AP -> STA) traffic and approx. 5-10% of CPU usage is reduced. Major improvement is observed in 1-hop Mesh mode topology in 11ACVHT80. Compared to Infra mode, CPU overhead is higher in Mesh mode due to path lookup and no fast-xmit support. So reducing spin lock contention is helping in Mesh. TOT +change -------- -------- TCP DL 545 Mbps 595 Mbps TCP UL 555 Mbps 585 Mbps Signed-off-by: Rajkumar Manoharan Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/mac.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 2a1d9fde13e7..05250afef33c 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -4100,13 +4100,29 @@ static void ath10k_mac_op_wake_tx_queue(struct ieee80211_hw *hw, { struct ath10k *ar = hw->priv; struct ath10k_txq *artxq = (void *)txq->drv_priv; + struct ieee80211_txq *f_txq; + struct ath10k_txq *f_artxq; + int ret = 0; + int max = 16; spin_lock_bh(&ar->txqs_lock); if (list_empty(&artxq->list)) list_add_tail(&artxq->list, &ar->txqs); + + f_artxq = list_first_entry(&ar->txqs, struct ath10k_txq, list); + f_txq = container_of((void *)f_artxq, struct ieee80211_txq, drv_priv); + list_del_init(&f_artxq->list); + + while (ath10k_mac_tx_can_push(hw, f_txq) && max--) { + ret = ath10k_mac_tx_push_txq(hw, f_txq); + if (ret) + break; + } + if (ret != -ENOENT) + list_add_tail(&f_artxq->list, &ar->txqs); spin_unlock_bh(&ar->txqs_lock); - ath10k_mac_tx_push_pending(ar); + ath10k_htt_tx_txq_update(hw, f_txq); ath10k_htt_tx_txq_update(hw, txq); } From e4fd726f21cdae0dc9cea6cbfcb7e27f21393f88 Mon Sep 17 00:00:00 2001 From: Ashok Raj Nagarajan Date: Thu, 18 Aug 2016 15:30:04 +0530 Subject: [PATCH 0657/1715] ath10k: fix sending frame in management path in push txq logic In the wake tx queue path, we are not checking if the frame to be sent takes management path or not. For eg. QOS null func frame coming here will take the management path. Since we are not incrementing the descriptor counter (num_pending_mgmt_tx) w.r.t tx management, on tx completion it is possible to see negative values. When the above counter reaches a negative value, we will not be sending a probe response out. if (is_presp && ar->hw_params.max_probe_resp_desc_thres < htt->num_pending_mgmt_tx) For IPQ4019, max_probe_resp_desc_thres (u32) is 24 is compared against num_pending_mgmt_tx (int) and the above condtions comes true if the counter is negative and we drop the probe response. To avoid this, check on the wake tx queue path as well for the tx path of the frame and increment the appropriate counters Fixes: cac085524cf1 "ath10k: move mgmt descriptor limit handle under mgmt_tx" Signed-off-by: Ashok Raj Nagarajan Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/mac.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 05250afef33c..ac7a368690e9 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -3777,7 +3777,9 @@ int ath10k_mac_tx_push_txq(struct ieee80211_hw *hw, enum ath10k_hw_txrx_mode txmode; enum ath10k_mac_tx_path txpath; struct sk_buff *skb; + struct ieee80211_hdr *hdr; size_t skb_len; + bool is_mgmt, is_presp; int ret; spin_lock_bh(&ar->htt.tx_lock); @@ -3801,6 +3803,22 @@ int ath10k_mac_tx_push_txq(struct ieee80211_hw *hw, skb_len = skb->len; txmode = ath10k_mac_tx_h_get_txmode(ar, vif, sta, skb); txpath = ath10k_mac_tx_h_get_txpath(ar, skb, txmode); + is_mgmt = (txpath == ATH10K_MAC_TX_HTT_MGMT); + + if (is_mgmt) { + hdr = (struct ieee80211_hdr *)skb->data; + is_presp = ieee80211_is_probe_resp(hdr->frame_control); + + spin_lock_bh(&ar->htt.tx_lock); + ret = ath10k_htt_tx_mgmt_inc_pending(htt, is_mgmt, is_presp); + + if (ret) { + ath10k_htt_tx_dec_pending(htt); + spin_unlock_bh(&ar->htt.tx_lock); + return ret; + } + spin_unlock_bh(&ar->htt.tx_lock); + } ret = ath10k_mac_tx(ar, vif, sta, txmode, txpath, skb); if (unlikely(ret)) { @@ -3808,6 +3826,8 @@ int ath10k_mac_tx_push_txq(struct ieee80211_hw *hw, spin_lock_bh(&ar->htt.tx_lock); ath10k_htt_tx_dec_pending(htt); + if (is_mgmt) + ath10k_htt_tx_mgmt_dec_pending(htt); spin_unlock_bh(&ar->htt.tx_lock); return ret; From 881ed54ecc138776adc20058c43d93f9b24f8b6d Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Thu, 18 Aug 2016 15:12:06 +0200 Subject: [PATCH 0658/1715] ath10k: use complete() instead complete_all() There is only one waiter for the completion, therefore there is no need to use complete_all(). Let's make that clear by using complete() instead of complete_all(). The usage pattern of the completion is: waiter context waker context scan.started ------------ ath10k_start_scan() lockdep_assert_held(conf_mutex) auth10k_wmi_start_scan() wait_for_completion_timeout(scan.started) ath10k_wmi_event_scan_start_failed() complete(scan.started) ath10k_wmi_event_scan_started() complete(scan.started) scan.completed -------------- ath10k_scan_stop() lockdep_assert_held(conf_mutex) ath10k_wmi_stop_scan() wait_for_completion_timeout(scan.completed) __ath10k_scan_finish() complete(scan.completed) scan.on_channel --------------- ath10k_remain_on_channel() mutex_lock(conf_mutex) ath10k_start_scan() wait_for_completion_timeout(scan.on_channel) ath10k_wmi_event_scan_foreign_chan() complete(scan.on_channel) offchan_tx_completed -------------------- ath10k_offchan_tx_work() mutex_lock(conf_mutex) reinit_completion(offchan_tx_completed) wait_for_completion_timeout(offchan_tx_completed) ath10k_report_offchain_tx() complete(offchan_tx_completed) install_key_done ---------------- ath10k_install_key() lockep_assert_held(conf_mutex) reinit_completion(install_key_done) wait_for_completion_timeout(install_key_done) ath10k_htt_t2h_msg_handler() complete(install_key_done) vdev_setup_done --------------- ath10k_monitor_vdev_start() lockdep_assert_held(conf_mutex) reinit_completion(vdev_setup_done) ath10k_vdev_setup_sync() wait_for_completion_timeout(vdev_setup_done) ath10k_wmi_event_vdev_start_resp() complete(vdev_setup_done) ath10k_monitor_vdev_stop() lockdep_assert_held(conf_mutex) reinit_completion(vdev_setup_done() ath10k_vdev_setup_sync() wait_for_completion_timeout(vdev_setup_done) ath10k_wmi_event_vdev_stopped() complete(vdev_setup_done) thermal.wmi_sync ---------------- ath10k_thermal_show_temp() mutex_lock(conf_mutex) reinit_completion(thermal.wmi_sync) wait_for_completion_timeout(thermal.wmi_sync) ath10k_thermal_event_temperature() complete(thermal.wmi_sync) bss_survey_done --------------- ath10k_mac_update_bss_chan_survey lockdep_assert_held(conf_mutex) reinit_completion(bss_survey_done) wait_for_completion_timeout(bss_survey_done) ath10k_wmi_event_pdev_bss_chan_info() complete(bss_survey_done) All complete() calls happen while the conf_mutex is taken. That means at max one waiter is possible. Signed-off-by: Daniel Wagner Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/core.c | 16 ++++++++-------- drivers/net/wireless/ath/ath10k/mac.c | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index ffedc0386c67..a9b9fb01d9e6 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -1501,14 +1501,14 @@ static void ath10k_core_restart(struct work_struct *work) ieee80211_stop_queues(ar->hw); ath10k_drain_tx(ar); - complete_all(&ar->scan.started); - complete_all(&ar->scan.completed); - complete_all(&ar->scan.on_channel); - complete_all(&ar->offchan_tx_completed); - complete_all(&ar->install_key_done); - complete_all(&ar->vdev_setup_done); - complete_all(&ar->thermal.wmi_sync); - complete_all(&ar->bss_survey_done); + complete(&ar->scan.started); + complete(&ar->scan.completed); + complete(&ar->scan.on_channel); + complete(&ar->offchan_tx_completed); + complete(&ar->install_key_done); + complete(&ar->vdev_setup_done); + complete(&ar->thermal.wmi_sync); + complete(&ar->bss_survey_done); wake_up(&ar->htt.empty_tx_wq); wake_up(&ar->wmi.tx_credits_wq); wake_up(&ar->peer_mapping_wq); diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index ac7a368690e9..de6e65f612f9 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -3914,7 +3914,7 @@ void __ath10k_scan_finish(struct ath10k *ar) ar->scan.roc_freq = 0; ath10k_offchan_tx_purge(ar); cancel_delayed_work(&ar->scan.timeout); - complete_all(&ar->scan.completed); + complete(&ar->scan.completed); break; } } From afcbc82cea527a046d66ff3088a75e56417abfc5 Mon Sep 17 00:00:00 2001 From: Maharaja Kennadyrajan Date: Tue, 23 Aug 2016 15:35:36 +0530 Subject: [PATCH 0659/1715] ath10k: Added support for extended dbglog module id for 10.4 For 10.4 fw versions, dbglog module id has been extended from u32 to u64, hence this patch fixes the same in the ath10k driver side. This patch doesn't break the older 10.4 releases. The FW change is already present in the older FWs. Signed-off-by: Maharaja Kennadyrajan Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/core.h | 2 +- drivers/net/wireless/ath/ath10k/debug.c | 11 +++--- drivers/net/wireless/ath/ath10k/wmi-ops.h | 4 +-- drivers/net/wireless/ath/ath10k/wmi-tlv.c | 2 +- drivers/net/wireless/ath/ath10k/wmi.c | 42 +++++++++++++++++++++-- drivers/net/wireless/ath/ath10k/wmi.h | 14 ++++++++ 6 files changed, 64 insertions(+), 11 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h index 7e329dcf9f4d..e13e0781fcae 100644 --- a/drivers/net/wireless/ath/ath10k/core.h +++ b/drivers/net/wireless/ath/ath10k/core.h @@ -441,7 +441,7 @@ struct ath10k_debug { struct completion tpc_complete; /* protected by conf_mutex */ - u32 fw_dbglog_mask; + u64 fw_dbglog_mask; u32 fw_dbglog_level; u32 pktlog_filter; u32 reg_addr; diff --git a/drivers/net/wireless/ath/ath10k/debug.c b/drivers/net/wireless/ath/ath10k/debug.c index 8f0fd41dfd4b..832da6ed9f13 100644 --- a/drivers/net/wireless/ath/ath10k/debug.c +++ b/drivers/net/wireless/ath/ath10k/debug.c @@ -1228,9 +1228,9 @@ static ssize_t ath10k_read_fw_dbglog(struct file *file, { struct ath10k *ar = file->private_data; unsigned int len; - char buf[64]; + char buf[96]; - len = scnprintf(buf, sizeof(buf), "0x%08x %u\n", + len = scnprintf(buf, sizeof(buf), "0x%16llx %u\n", ar->debug.fw_dbglog_mask, ar->debug.fw_dbglog_level); return simple_read_from_buffer(user_buf, count, ppos, buf, len); @@ -1242,15 +1242,16 @@ static ssize_t ath10k_write_fw_dbglog(struct file *file, { struct ath10k *ar = file->private_data; int ret; - char buf[64]; - unsigned int log_level, mask; + char buf[96]; + unsigned int log_level; + u64 mask; simple_write_to_buffer(buf, sizeof(buf) - 1, ppos, user_buf, count); /* make sure that buf is null terminated */ buf[sizeof(buf) - 1] = 0; - ret = sscanf(buf, "%x %u", &mask, &log_level); + ret = sscanf(buf, "%llx %u", &mask, &log_level); if (!ret) return -EINVAL; diff --git a/drivers/net/wireless/ath/ath10k/wmi-ops.h b/drivers/net/wireless/ath/ath10k/wmi-ops.h index c67eda78b69e..c9a8bb1186f2 100644 --- a/drivers/net/wireless/ath/ath10k/wmi-ops.h +++ b/drivers/net/wireless/ath/ath10k/wmi-ops.h @@ -125,7 +125,7 @@ struct wmi_ops { enum wmi_force_fw_hang_type type, u32 delay_ms); struct sk_buff *(*gen_mgmt_tx)(struct ath10k *ar, struct sk_buff *skb); - struct sk_buff *(*gen_dbglog_cfg)(struct ath10k *ar, u32 module_enable, + struct sk_buff *(*gen_dbglog_cfg)(struct ath10k *ar, u64 module_enable, u32 log_level); struct sk_buff *(*gen_pktlog_enable)(struct ath10k *ar, u32 filter); struct sk_buff *(*gen_pktlog_disable)(struct ath10k *ar); @@ -945,7 +945,7 @@ ath10k_wmi_force_fw_hang(struct ath10k *ar, } static inline int -ath10k_wmi_dbglog_cfg(struct ath10k *ar, u32 module_enable, u32 log_level) +ath10k_wmi_dbglog_cfg(struct ath10k *ar, u64 module_enable, u32 log_level) { struct sk_buff *skb; diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.c b/drivers/net/wireless/ath/ath10k/wmi-tlv.c index a42f52dd9a36..e64f59300a7c 100644 --- a/drivers/net/wireless/ath/ath10k/wmi-tlv.c +++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.c @@ -2468,7 +2468,7 @@ ath10k_wmi_tlv_op_gen_force_fw_hang(struct ath10k *ar, } static struct sk_buff * -ath10k_wmi_tlv_op_gen_dbglog_cfg(struct ath10k *ar, u32 module_enable, +ath10k_wmi_tlv_op_gen_dbglog_cfg(struct ath10k *ar, u64 module_enable, u32 log_level) { struct wmi_tlv_dbglog_cmd *cmd; struct wmi_tlv *tlv; diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index 15b7efc184a4..eb4ab6fa688f 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -6937,7 +6937,7 @@ ath10k_wmi_op_gen_force_fw_hang(struct ath10k *ar, } static struct sk_buff * -ath10k_wmi_op_gen_dbglog_cfg(struct ath10k *ar, u32 module_enable, +ath10k_wmi_op_gen_dbglog_cfg(struct ath10k *ar, u64 module_enable, u32 log_level) { struct wmi_dbglog_cfg_cmd *cmd; @@ -6974,6 +6974,44 @@ ath10k_wmi_op_gen_dbglog_cfg(struct ath10k *ar, u32 module_enable, return skb; } +static struct sk_buff * +ath10k_wmi_10_4_op_gen_dbglog_cfg(struct ath10k *ar, u64 module_enable, + u32 log_level) +{ + struct wmi_10_4_dbglog_cfg_cmd *cmd; + struct sk_buff *skb; + u32 cfg; + + skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd)); + if (!skb) + return ERR_PTR(-ENOMEM); + + cmd = (struct wmi_10_4_dbglog_cfg_cmd *)skb->data; + + if (module_enable) { + cfg = SM(log_level, + ATH10K_DBGLOG_CFG_LOG_LVL); + } else { + /* set back defaults, all modules with WARN level */ + cfg = SM(ATH10K_DBGLOG_LEVEL_WARN, + ATH10K_DBGLOG_CFG_LOG_LVL); + module_enable = ~0; + } + + cmd->module_enable = __cpu_to_le64(module_enable); + cmd->module_valid = __cpu_to_le64(~0); + cmd->config_enable = __cpu_to_le32(cfg); + cmd->config_valid = __cpu_to_le32(ATH10K_DBGLOG_CFG_LOG_LVL_MASK); + + ath10k_dbg(ar, ATH10K_DBG_WMI, + "wmi dbglog cfg modules 0x%016llx 0x%016llx config %08x %08x\n", + __le64_to_cpu(cmd->module_enable), + __le64_to_cpu(cmd->module_valid), + __le32_to_cpu(cmd->config_enable), + __le32_to_cpu(cmd->config_valid)); + return skb; +} + static struct sk_buff * ath10k_wmi_op_gen_pktlog_enable(struct ath10k *ar, u32 ev_bitmap) { @@ -8092,7 +8130,7 @@ static const struct wmi_ops wmi_10_4_ops = { .gen_pdev_set_wmm = ath10k_wmi_op_gen_pdev_set_wmm, .gen_force_fw_hang = ath10k_wmi_op_gen_force_fw_hang, .gen_mgmt_tx = ath10k_wmi_op_gen_mgmt_tx, - .gen_dbglog_cfg = ath10k_wmi_op_gen_dbglog_cfg, + .gen_dbglog_cfg = ath10k_wmi_10_4_op_gen_dbglog_cfg, .gen_pktlog_enable = ath10k_wmi_op_gen_pktlog_enable, .gen_pktlog_disable = ath10k_wmi_op_gen_pktlog_disable, .gen_pdev_set_quiet_mode = ath10k_wmi_op_gen_pdev_set_quiet_mode, diff --git a/drivers/net/wireless/ath/ath10k/wmi.h b/drivers/net/wireless/ath/ath10k/wmi.h index 2f89c4b2d4f2..48e04b92e231 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.h +++ b/drivers/net/wireless/ath/ath10k/wmi.h @@ -6174,6 +6174,20 @@ struct wmi_dbglog_cfg_cmd { __le32 config_valid; } __packed; +struct wmi_10_4_dbglog_cfg_cmd { + /* bitmask to hold mod id config*/ + __le64 module_enable; + + /* see ATH10K_DBGLOG_CFG_ */ + __le32 config_enable; + + /* mask of module id bits to be changed */ + __le64 module_valid; + + /* mask of config bits to be changed, see ATH10K_DBGLOG_CFG_ */ + __le32 config_valid; +} __packed; + enum wmi_roam_reason { WMI_ROAM_REASON_BETTER_AP = 1, WMI_ROAM_REASON_BEACON_MISS = 2, From 749bc03ae2cd763df19ab8000d21b4342ed3383c Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 24 Aug 2016 01:27:26 +0900 Subject: [PATCH 0660/1715] ath10k: replace config_enabled() with IS_REACHABLE() Commit 97f2645f358b ("tree-wide: replace config_enabled() with IS_ENABLED()") mostly did away with config_enabled(). This is one of the postponed TODO items as config_enabled() is used for a tristate option here. Theoretically, config_enabled() is equivalent to IS_BUILTIN(), but I guess IS_REACHABLE() is the best fit for this case because both CONFIG_HWMON and CONFIG_ATH10K are tristate. Signed-off-by: Masahiro Yamada Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/thermal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/thermal.c b/drivers/net/wireless/ath/ath10k/thermal.c index 444b52c7e4f3..0a47269be289 100644 --- a/drivers/net/wireless/ath/ath10k/thermal.c +++ b/drivers/net/wireless/ath/ath10k/thermal.c @@ -192,7 +192,7 @@ int ath10k_thermal_register(struct ath10k *ar) /* Avoid linking error on devm_hwmon_device_register_with_groups, I * guess linux/hwmon.h is missing proper stubs. */ - if (!config_enabled(CONFIG_HWMON)) + if (!IS_REACHABLE(CONFIG_HWMON)) return 0; hwmon_dev = devm_hwmon_device_register_with_groups(ar->dev, From 2cdce425aa3301648e3a68a361f7f48b681fc5a6 Mon Sep 17 00:00:00 2001 From: Mohammed Shafi Shajakhan Date: Fri, 26 Aug 2016 13:42:20 +0530 Subject: [PATCH 0661/1715] ath10k: Fix broken NULL func data frame status for 10.4 Older firmware with HTT delivers incorrect tx status for null func frames to driver, but this fixed in 10.2 and 10.4 firmware versions. Also this workaround results in reporting of incorrect null func status for 10.4. Fix this is by introducing a firmware feature flag for 10.4 so that this workaround is skipped and proper tx status for null func frames are reported Signed-off-by: Tamizh chelvam Signed-off-by: Mohammed Shafi Shajakhan Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/core.c | 1 + drivers/net/wireless/ath/ath10k/core.h | 7 +++++++ drivers/net/wireless/ath/ath10k/mac.c | 2 ++ 3 files changed, 10 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index a9b9fb01d9e6..c9d163e0f374 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -308,6 +308,7 @@ static const char *const ath10k_core_fw_feature_str[] = { [ATH10K_FW_FEATURE_MFP_SUPPORT] = "mfp", [ATH10K_FW_FEATURE_PEER_FLOW_CONTROL] = "peer-flow-ctrl", [ATH10K_FW_FEATURE_BTCOEX_PARAM] = "btcoex-param", + [ATH10K_FW_FEATURE_SKIP_NULL_FUNC_WAR] = "skip-null-func-war", }; static unsigned int ath10k_core_get_fw_feature_str(char *buf, diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h index e13e0781fcae..b367e9cfcfd4 100644 --- a/drivers/net/wireless/ath/ath10k/core.h +++ b/drivers/net/wireless/ath/ath10k/core.h @@ -552,6 +552,13 @@ enum ath10k_fw_features { */ ATH10K_FW_FEATURE_BTCOEX_PARAM = 14, + /* Older firmware with HTT delivers incorrect tx status for null func + * frames to driver, but this fixed in 10.2 and 10.4 firmware versions. + * Also this workaround results in reporting of incorrect null func + * status for 10.4. This flag is used to skip the workaround. + */ + ATH10K_FW_FEATURE_SKIP_NULL_FUNC_WAR = 15, + /* keep last */ ATH10K_FW_FEATURE_COUNT, }; diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index de6e65f612f9..a110325897a7 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -3255,6 +3255,8 @@ ath10k_mac_tx_h_get_txmode(struct ath10k *ar, if (ar->htt.target_version_major < 3 && (ieee80211_is_nullfunc(fc) || ieee80211_is_qos_nullfunc(fc)) && !test_bit(ATH10K_FW_FEATURE_HAS_WMI_MGMT_TX, + ar->running_fw->fw_file.fw_features) && + !test_bit(ATH10K_FW_FEATURE_SKIP_NULL_FUNC_WAR, ar->running_fw->fw_file.fw_features)) return ATH10K_HW_TXRX_MGMT; From 7f03d3069381266278c058c4ce8349a0d172da7b Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 26 Aug 2016 19:08:52 +0100 Subject: [PATCH 0662/1715] ath10k: fix spelling mistake "montior" -> "monitor" Trivial fix to spelling mistake in ath10k_warn message. Signed-off-by: Colin Ian King Reviewed-by: Julian Calaby Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/mac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index a110325897a7..4565321ad762 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -5224,7 +5224,7 @@ static void ath10k_configure_filter(struct ieee80211_hw *hw, ret = ath10k_monitor_recalc(ar); if (ret) - ath10k_warn(ar, "failed to recalc montior: %d\n", ret); + ath10k_warn(ar, "failed to recalc monitor: %d\n", ret); mutex_unlock(&ar->conf_mutex); } From c39265f72ae6dbcb0367be808837e2f182095d15 Mon Sep 17 00:00:00 2001 From: Baoyou Xie Date: Mon, 29 Aug 2016 20:21:13 +0800 Subject: [PATCH 0663/1715] ath9k: mark ath_fill_led_pin() static We get 1 warning about global functions without a declaration in the ath9k gpio driver when building with W=1: drivers/net/wireless/ath/ath9k/gpio.c:25:6: warning: no previous prototype for 'ath_fill_led_pin' [-Wmissing-prototypes] In fact, this function is only used in the file in which it is declared and don't need a declaration, but can be made static. so this patch marks it 'static'. Signed-off-by: Baoyou Xie Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath9k/gpio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/gpio.c b/drivers/net/wireless/ath/ath9k/gpio.c index 490f74d9ddf0..ddb28861e7fe 100644 --- a/drivers/net/wireless/ath/ath9k/gpio.c +++ b/drivers/net/wireless/ath/ath9k/gpio.c @@ -22,7 +22,7 @@ #ifdef CONFIG_MAC80211_LEDS -void ath_fill_led_pin(struct ath_softc *sc) +static void ath_fill_led_pin(struct ath_softc *sc) { struct ath_hw *ah = sc->sc_ah; From ea2e7ce5d0fc878463ba39deb46cf2ab20398fd2 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 1 Sep 2016 18:37:21 -0700 Subject: [PATCH 0664/1715] bpf: support 8-byte metafield access The verifier supported only 4-byte metafields in struct __sk_buff and struct xdp_md. The metafields in upcoming struct bpf_perf_event are 8-byte to match register width in struct pt_regs. Teach verifier to recognize 8-byte metafield access. The patch doesn't affect safety of sockets and xdp programs. They check for 4-byte only ctx access before these conditions are hit. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index abb61f3f6900..c1c9e441f0f5 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2333,7 +2333,8 @@ static int do_check(struct verifier_env *env) if (err) return err; - if (BPF_SIZE(insn->code) != BPF_W) { + if (BPF_SIZE(insn->code) != BPF_W && + BPF_SIZE(insn->code) != BPF_DW) { insn_idx++; continue; } @@ -2642,9 +2643,11 @@ static int convert_ctx_accesses(struct verifier_env *env) for (i = 0; i < insn_cnt; i++, insn++) { u32 insn_delta, cnt; - if (insn->code == (BPF_LDX | BPF_MEM | BPF_W)) + if (insn->code == (BPF_LDX | BPF_MEM | BPF_W) || + insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) type = BPF_READ; - else if (insn->code == (BPF_STX | BPF_MEM | BPF_W)) + else if (insn->code == (BPF_STX | BPF_MEM | BPF_W) || + insn->code == (BPF_STX | BPF_MEM | BPF_DW)) type = BPF_WRITE; else continue; From 0515e5999a466dfe6e1924f460da599bb6821487 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 1 Sep 2016 18:37:22 -0700 Subject: [PATCH 0665/1715] bpf: introduce BPF_PROG_TYPE_PERF_EVENT program type Introduce BPF_PROG_TYPE_PERF_EVENT programs that can be attached to HW and SW perf events (PERF_TYPE_HARDWARE and PERF_TYPE_SOFTWARE correspondingly in uapi/linux/perf_event.h) The program visible context meta structure is struct bpf_perf_event_data { struct pt_regs regs; __u64 sample_period; }; which is accessible directly from the program: int bpf_prog(struct bpf_perf_event_data *ctx) { ... ctx->sample_period ... ... ctx->regs.ip ... } The bpf verifier rewrites the accesses into kernel internal struct bpf_perf_event_data_kern which allows changing struct perf_sample_data without affecting bpf programs. New fields can be added to the end of struct bpf_perf_event_data in the future. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/perf_event.h | 5 +++ include/uapi/linux/Kbuild | 1 + include/uapi/linux/bpf.h | 1 + include/uapi/linux/bpf_perf_event.h | 18 +++++++++ kernel/trace/bpf_trace.c | 61 +++++++++++++++++++++++++++++ 5 files changed, 86 insertions(+) create mode 100644 include/uapi/linux/bpf_perf_event.h diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 2b6b43cc0dd5..97bfe62f30d7 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -788,6 +788,11 @@ struct perf_output_handle { int page; }; +struct bpf_perf_event_data_kern { + struct pt_regs *regs; + struct perf_sample_data *data; +}; + #ifdef CONFIG_CGROUP_PERF /* diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index 185f8ea2702f..d0352a971ebd 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -71,6 +71,7 @@ header-y += binfmts.h header-y += blkpg.h header-y += blktrace_api.h header-y += bpf_common.h +header-y += bpf_perf_event.h header-y += bpf.h header-y += bpqether.h header-y += bsg.h diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e4c5a1baa993..f896dfac4ac0 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -95,6 +95,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_SCHED_ACT, BPF_PROG_TYPE_TRACEPOINT, BPF_PROG_TYPE_XDP, + BPF_PROG_TYPE_PERF_EVENT, }; #define BPF_PSEUDO_MAP_FD 1 diff --git a/include/uapi/linux/bpf_perf_event.h b/include/uapi/linux/bpf_perf_event.h new file mode 100644 index 000000000000..067427259820 --- /dev/null +++ b/include/uapi/linux/bpf_perf_event.h @@ -0,0 +1,18 @@ +/* Copyright (c) 2016 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#ifndef _UAPI__LINUX_BPF_PERF_EVENT_H__ +#define _UAPI__LINUX_BPF_PERF_EVENT_H__ + +#include +#include + +struct bpf_perf_event_data { + struct pt_regs regs; + __u64 sample_period; +}; + +#endif /* _UAPI__LINUX_BPF_PERF_EVENT_H__ */ diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index ad35213b8405..d3869b03d9fe 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1,4 +1,5 @@ /* Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com + * Copyright (c) 2016 Facebook * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -8,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -552,10 +554,69 @@ static struct bpf_prog_type_list tracepoint_tl = { .type = BPF_PROG_TYPE_TRACEPOINT, }; +static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type, + enum bpf_reg_type *reg_type) +{ + if (off < 0 || off >= sizeof(struct bpf_perf_event_data)) + return false; + if (type != BPF_READ) + return false; + if (off % size != 0) + return false; + if (off == offsetof(struct bpf_perf_event_data, sample_period)) { + if (size != sizeof(u64)) + return false; + } else { + if (size != sizeof(long)) + return false; + } + return true; +} + +static u32 pe_prog_convert_ctx_access(enum bpf_access_type type, int dst_reg, + int src_reg, int ctx_off, + struct bpf_insn *insn_buf, + struct bpf_prog *prog) +{ + struct bpf_insn *insn = insn_buf; + + switch (ctx_off) { + case offsetof(struct bpf_perf_event_data, sample_period): + BUILD_BUG_ON(FIELD_SIZEOF(struct perf_sample_data, period) != sizeof(u64)); + *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct bpf_perf_event_data_kern, data)), + dst_reg, src_reg, + offsetof(struct bpf_perf_event_data_kern, data)); + *insn++ = BPF_LDX_MEM(BPF_DW, dst_reg, dst_reg, + offsetof(struct perf_sample_data, period)); + break; + default: + *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct bpf_perf_event_data_kern, regs)), + dst_reg, src_reg, + offsetof(struct bpf_perf_event_data_kern, regs)); + *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(sizeof(long)), + dst_reg, dst_reg, ctx_off); + break; + } + + return insn - insn_buf; +} + +static const struct bpf_verifier_ops perf_event_prog_ops = { + .get_func_proto = tp_prog_func_proto, + .is_valid_access = pe_prog_is_valid_access, + .convert_ctx_access = pe_prog_convert_ctx_access, +}; + +static struct bpf_prog_type_list perf_event_tl = { + .ops = &perf_event_prog_ops, + .type = BPF_PROG_TYPE_PERF_EVENT, +}; + static int __init register_kprobe_prog_ops(void) { bpf_register_prog_type(&kprobe_tl); bpf_register_prog_type(&tracepoint_tl); + bpf_register_prog_type(&perf_event_tl); return 0; } late_initcall(register_kprobe_prog_ops); From fdc15d388d600d5a1599e14c700af105a5b60761 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 1 Sep 2016 18:37:23 -0700 Subject: [PATCH 0666/1715] bpf: perf_event progs should only use preallocated maps Make sure that BPF_PROG_TYPE_PERF_EVENT programs only use preallocated hash maps, since doing memory allocation in overflow_handler can crash depending on where nmi got triggered. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index c1c9e441f0f5..48c2705db22c 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2511,6 +2511,20 @@ process_bpf_exit: return 0; } +static int check_map_prog_compatibility(struct bpf_map *map, + struct bpf_prog *prog) + +{ + if (prog->type == BPF_PROG_TYPE_PERF_EVENT && + (map->map_type == BPF_MAP_TYPE_HASH || + map->map_type == BPF_MAP_TYPE_PERCPU_HASH) && + (map->map_flags & BPF_F_NO_PREALLOC)) { + verbose("perf_event programs can only use preallocated hash map\n"); + return -EINVAL; + } + return 0; +} + /* look for pseudo eBPF instructions that access map FDs and * replace them with actual map pointers */ @@ -2518,7 +2532,7 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env) { struct bpf_insn *insn = env->prog->insnsi; int insn_cnt = env->prog->len; - int i, j; + int i, j, err; for (i = 0; i < insn_cnt; i++, insn++) { if (BPF_CLASS(insn->code) == BPF_LDX && @@ -2562,6 +2576,12 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env) return PTR_ERR(map); } + err = check_map_prog_compatibility(map, env->prog); + if (err) { + fdput(f); + return err; + } + /* store map pointer inside BPF_LD_IMM64 instruction */ insn[0].imm = (u32) (unsigned long) map; insn[1].imm = ((u64) (unsigned long) map) >> 32; From aa6a5f3cb2b2edc5b9aab0b4fdfdfa9c3b5096a8 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 1 Sep 2016 18:37:24 -0700 Subject: [PATCH 0667/1715] perf, bpf: add perf events core support for BPF_PROG_TYPE_PERF_EVENT programs Allow attaching BPF_PROG_TYPE_PERF_EVENT programs to sw and hw perf events via overflow_handler mechanism. When program is attached the overflow_handlers become stacked. The program acts as a filter. Returning zero from the program means that the normal perf_event_output handler will not be called and sampling event won't be stored in the ring buffer. The overflow_handler_context==NULL is an additional safety check to make sure programs are not attached to hw breakpoints and watchdog in case other checks (that prevent that now anyway) get accidentally relaxed in the future. The program refcnt is incremented in case perf_events are inhereted when target task is forked. Similar to kprobe and tracepoint programs there is no ioctl to detach the program or swap already attached program. The user space expected to close(perf_event_fd) like it does right now for kprobe+bpf. That restriction simplifies the code quite a bit. The invocation of overflow_handler in __perf_event_overflow() is now done via READ_ONCE, since that pointer can be replaced when the program is attached while perf_event itself could have been active already. There is no need to do similar treatment for event->prog, since it's assigned only once before it's accessed. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 4 ++ include/linux/perf_event.h | 4 ++ kernel/events/core.c | 89 +++++++++++++++++++++++++++++++++++++- 3 files changed, 96 insertions(+), 1 deletion(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 11134238417d..9a904f63f8c1 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -297,6 +297,10 @@ static inline struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i) static inline void bpf_prog_put(struct bpf_prog *prog) { } +static inline struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog) +{ + return ERR_PTR(-EOPNOTSUPP); +} #endif /* CONFIG_BPF_SYSCALL */ /* verifier prototypes for helper functions called from eBPF programs */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 97bfe62f30d7..ccb73a58113d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -679,6 +679,10 @@ struct perf_event { u64 (*clock)(void); perf_overflow_handler_t overflow_handler; void *overflow_handler_context; +#ifdef CONFIG_BPF_SYSCALL + perf_overflow_handler_t orig_overflow_handler; + struct bpf_prog *prog; +#endif #ifdef CONFIG_EVENT_TRACING struct trace_event_call *tp_event; diff --git a/kernel/events/core.c b/kernel/events/core.c index 3cfabdf7b942..85bf4c37911f 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7022,7 +7022,7 @@ static int __perf_event_overflow(struct perf_event *event, irq_work_queue(&event->pending); } - event->overflow_handler(event, data, regs); + READ_ONCE(event->overflow_handler)(event, data, regs); if (*perf_event_fasync(event) && event->pending_kill) { event->pending_wakeup = 1; @@ -7637,11 +7637,83 @@ static void perf_event_free_filter(struct perf_event *event) ftrace_profile_free_filter(event); } +#ifdef CONFIG_BPF_SYSCALL +static void bpf_overflow_handler(struct perf_event *event, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + struct bpf_perf_event_data_kern ctx = { + .data = data, + .regs = regs, + }; + int ret = 0; + + preempt_disable(); + if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) + goto out; + rcu_read_lock(); + ret = BPF_PROG_RUN(event->prog, (void *)&ctx); + rcu_read_unlock(); +out: + __this_cpu_dec(bpf_prog_active); + preempt_enable(); + if (!ret) + return; + + event->orig_overflow_handler(event, data, regs); +} + +static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd) +{ + struct bpf_prog *prog; + + if (event->overflow_handler_context) + /* hw breakpoint or kernel counter */ + return -EINVAL; + + if (event->prog) + return -EEXIST; + + prog = bpf_prog_get_type(prog_fd, BPF_PROG_TYPE_PERF_EVENT); + if (IS_ERR(prog)) + return PTR_ERR(prog); + + event->prog = prog; + event->orig_overflow_handler = READ_ONCE(event->overflow_handler); + WRITE_ONCE(event->overflow_handler, bpf_overflow_handler); + return 0; +} + +static void perf_event_free_bpf_handler(struct perf_event *event) +{ + struct bpf_prog *prog = event->prog; + + if (!prog) + return; + + WRITE_ONCE(event->overflow_handler, event->orig_overflow_handler); + event->prog = NULL; + bpf_prog_put(prog); +} +#else +static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd) +{ + return -EOPNOTSUPP; +} +static void perf_event_free_bpf_handler(struct perf_event *event) +{ +} +#endif + static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) { bool is_kprobe, is_tracepoint; struct bpf_prog *prog; + if (event->attr.type == PERF_TYPE_HARDWARE || + event->attr.type == PERF_TYPE_SOFTWARE) + return perf_event_set_bpf_handler(event, prog_fd); + if (event->attr.type != PERF_TYPE_TRACEPOINT) return -EINVAL; @@ -7682,6 +7754,8 @@ static void perf_event_free_bpf_prog(struct perf_event *event) { struct bpf_prog *prog; + perf_event_free_bpf_handler(event); + if (!event->tp_event) return; @@ -8998,6 +9072,19 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, if (!overflow_handler && parent_event) { overflow_handler = parent_event->overflow_handler; context = parent_event->overflow_handler_context; +#ifdef CONFIG_BPF_SYSCALL + if (overflow_handler == bpf_overflow_handler) { + struct bpf_prog *prog = bpf_prog_inc(parent_event->prog); + + if (IS_ERR(prog)) { + err = PTR_ERR(prog); + goto err_ns; + } + event->prog = prog; + event->orig_overflow_handler = + parent_event->orig_overflow_handler; + } +#endif } if (overflow_handler) { From 1c47910ef80135ac89e4d0b471d123572cee5535 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 1 Sep 2016 18:37:25 -0700 Subject: [PATCH 0668/1715] samples/bpf: add perf_event+bpf example The bpf program is called 50 times a second and does hashmap[kern&user_stackid]++ It's primary purpose to check that key bpf helpers like map lookup, update, get_stackid, trace_printk and ctx access are all working. It checks: - PERF_COUNT_HW_CPU_CYCLES on all cpus - PERF_COUNT_HW_CPU_CYCLES for current process and inherited perf_events to children - PERF_COUNT_SW_CPU_CLOCK on all cpus - PERF_COUNT_SW_CPU_CLOCK for current process Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- samples/bpf/Makefile | 4 + samples/bpf/bpf_helpers.h | 2 + samples/bpf/bpf_load.c | 7 +- samples/bpf/trace_event_kern.c | 65 ++++++++++ samples/bpf/trace_event_user.c | 213 +++++++++++++++++++++++++++++++++ 5 files changed, 290 insertions(+), 1 deletion(-) create mode 100644 samples/bpf/trace_event_kern.c create mode 100644 samples/bpf/trace_event_user.c diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index db3cb061bfcd..a69cf9045285 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -25,6 +25,7 @@ hostprogs-y += test_cgrp2_array_pin hostprogs-y += xdp1 hostprogs-y += xdp2 hostprogs-y += test_current_task_under_cgroup +hostprogs-y += trace_event test_verifier-objs := test_verifier.o libbpf.o test_maps-objs := test_maps.o libbpf.o @@ -52,6 +53,7 @@ xdp1-objs := bpf_load.o libbpf.o xdp1_user.o xdp2-objs := bpf_load.o libbpf.o xdp1_user.o test_current_task_under_cgroup-objs := bpf_load.o libbpf.o \ test_current_task_under_cgroup_user.o +trace_event-objs := bpf_load.o libbpf.o trace_event_user.o # Tell kbuild to always build the programs always := $(hostprogs-y) @@ -79,6 +81,7 @@ always += test_cgrp2_tc_kern.o always += xdp1_kern.o always += xdp2_kern.o always += test_current_task_under_cgroup_kern.o +always += trace_event_kern.o HOSTCFLAGS += -I$(objtree)/usr/include @@ -103,6 +106,7 @@ HOSTLOADLIBES_test_overhead += -lelf -lrt HOSTLOADLIBES_xdp1 += -lelf HOSTLOADLIBES_xdp2 += -lelf HOSTLOADLIBES_test_current_task_under_cgroup += -lelf +HOSTLOADLIBES_trace_event += -lelf # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: # make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h index bbdf62a1e45e..90f44bd2045e 100644 --- a/samples/bpf/bpf_helpers.h +++ b/samples/bpf/bpf_helpers.h @@ -55,6 +55,8 @@ static int (*bpf_skb_get_tunnel_opt)(void *ctx, void *md, int size) = (void *) BPF_FUNC_skb_get_tunnel_opt; static int (*bpf_skb_set_tunnel_opt)(void *ctx, void *md, int size) = (void *) BPF_FUNC_skb_set_tunnel_opt; +static unsigned long long (*bpf_get_prandom_u32)(void) = + (void *) BPF_FUNC_get_prandom_u32; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index 0cfda2320320..97913e109b14 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c @@ -51,6 +51,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) bool is_kretprobe = strncmp(event, "kretprobe/", 10) == 0; bool is_tracepoint = strncmp(event, "tracepoint/", 11) == 0; bool is_xdp = strncmp(event, "xdp", 3) == 0; + bool is_perf_event = strncmp(event, "perf_event", 10) == 0; enum bpf_prog_type prog_type; char buf[256]; int fd, efd, err, id; @@ -69,6 +70,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) prog_type = BPF_PROG_TYPE_TRACEPOINT; } else if (is_xdp) { prog_type = BPF_PROG_TYPE_XDP; + } else if (is_perf_event) { + prog_type = BPF_PROG_TYPE_PERF_EVENT; } else { printf("Unknown event '%s'\n", event); return -1; @@ -82,7 +85,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) prog_fd[prog_cnt++] = fd; - if (is_xdp) + if (is_xdp || is_perf_event) return 0; if (is_socket) { @@ -326,6 +329,7 @@ int load_bpf_file(char *path) memcmp(shname_prog, "kretprobe/", 10) == 0 || memcmp(shname_prog, "tracepoint/", 11) == 0 || memcmp(shname_prog, "xdp", 3) == 0 || + memcmp(shname_prog, "perf_event", 10) == 0 || memcmp(shname_prog, "socket", 6) == 0) load_and_attach(shname_prog, insns, data_prog->d_size); } @@ -344,6 +348,7 @@ int load_bpf_file(char *path) memcmp(shname, "kretprobe/", 10) == 0 || memcmp(shname, "tracepoint/", 11) == 0 || memcmp(shname, "xdp", 3) == 0 || + memcmp(shname, "perf_event", 10) == 0 || memcmp(shname, "socket", 6) == 0) load_and_attach(shname, data->d_buf, data->d_size); } diff --git a/samples/bpf/trace_event_kern.c b/samples/bpf/trace_event_kern.c new file mode 100644 index 000000000000..71a8ed32823e --- /dev/null +++ b/samples/bpf/trace_event_kern.c @@ -0,0 +1,65 @@ +/* Copyright (c) 2016 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include "bpf_helpers.h" + +struct key_t { + char comm[TASK_COMM_LEN]; + u32 kernstack; + u32 userstack; +}; + +struct bpf_map_def SEC("maps") counts = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(struct key_t), + .value_size = sizeof(u64), + .max_entries = 10000, +}; + +struct bpf_map_def SEC("maps") stackmap = { + .type = BPF_MAP_TYPE_STACK_TRACE, + .key_size = sizeof(u32), + .value_size = PERF_MAX_STACK_DEPTH * sizeof(u64), + .max_entries = 10000, +}; + +#define KERN_STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP) +#define USER_STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP | BPF_F_USER_STACK) + +SEC("perf_event") +int bpf_prog1(struct bpf_perf_event_data *ctx) +{ + char fmt[] = "CPU-%d period %lld ip %llx"; + u32 cpu = bpf_get_smp_processor_id(); + struct key_t key; + u64 *val, one = 1; + + if (ctx->sample_period < 10000) + /* ignore warmup */ + return 0; + bpf_get_current_comm(&key.comm, sizeof(key.comm)); + key.kernstack = bpf_get_stackid(ctx, &stackmap, KERN_STACKID_FLAGS); + key.userstack = bpf_get_stackid(ctx, &stackmap, USER_STACKID_FLAGS); + if ((int)key.kernstack < 0 && (int)key.userstack < 0) { + bpf_trace_printk(fmt, sizeof(fmt), cpu, ctx->sample_period, + ctx->regs.ip); + return 0; + } + + val = bpf_map_lookup_elem(&counts, &key); + if (val) + (*val)++; + else + bpf_map_update_elem(&counts, &key, &one, BPF_NOEXIST); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/trace_event_user.c b/samples/bpf/trace_event_user.c new file mode 100644 index 000000000000..9a130d31ecf2 --- /dev/null +++ b/samples/bpf/trace_event_user.c @@ -0,0 +1,213 @@ +/* Copyright (c) 2016 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "libbpf.h" +#include "bpf_load.h" + +#define SAMPLE_FREQ 50 + +static bool sys_read_seen, sys_write_seen; + +static void print_ksym(__u64 addr) +{ + struct ksym *sym; + + if (!addr) + return; + sym = ksym_search(addr); + printf("%s;", sym->name); + if (!strcmp(sym->name, "sys_read")) + sys_read_seen = true; + else if (!strcmp(sym->name, "sys_write")) + sys_write_seen = true; +} + +static void print_addr(__u64 addr) +{ + if (!addr) + return; + printf("%llx;", addr); +} + +#define TASK_COMM_LEN 16 + +struct key_t { + char comm[TASK_COMM_LEN]; + __u32 kernstack; + __u32 userstack; +}; + +static void print_stack(struct key_t *key, __u64 count) +{ + __u64 ip[PERF_MAX_STACK_DEPTH] = {}; + static bool warned; + int i; + + printf("%3lld %s;", count, key->comm); + if (bpf_lookup_elem(map_fd[1], &key->kernstack, ip) != 0) { + printf("---;"); + } else { + for (i = PERF_MAX_STACK_DEPTH - 1; i >= 0; i--) + print_ksym(ip[i]); + } + printf("-;"); + if (bpf_lookup_elem(map_fd[1], &key->userstack, ip) != 0) { + printf("---;"); + } else { + for (i = PERF_MAX_STACK_DEPTH - 1; i >= 0; i--) + print_addr(ip[i]); + } + printf("\n"); + + if (key->kernstack == -EEXIST && !warned) { + printf("stackmap collisions seen. Consider increasing size\n"); + warned = true; + } else if ((int)key->kernstack < 0 && (int)key->userstack < 0) { + printf("err stackid %d %d\n", key->kernstack, key->userstack); + } +} + +static void int_exit(int sig) +{ + kill(0, SIGKILL); + exit(0); +} + +static void print_stacks(void) +{ + struct key_t key = {}, next_key; + __u64 value; + __u32 stackid = 0, next_id; + int fd = map_fd[0], stack_map = map_fd[1]; + + sys_read_seen = sys_write_seen = false; + while (bpf_get_next_key(fd, &key, &next_key) == 0) { + bpf_lookup_elem(fd, &next_key, &value); + print_stack(&next_key, value); + bpf_delete_elem(fd, &next_key); + key = next_key; + } + + if (!sys_read_seen || !sys_write_seen) { + printf("BUG kernel stack doesn't contain sys_read() and sys_write()\n"); + int_exit(0); + } + + /* clear stack map */ + while (bpf_get_next_key(stack_map, &stackid, &next_id) == 0) { + bpf_delete_elem(stack_map, &next_id); + stackid = next_id; + } +} + +static void test_perf_event_all_cpu(struct perf_event_attr *attr) +{ + int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + int *pmu_fd = malloc(nr_cpus * sizeof(int)); + int i; + + /* open perf_event on all cpus */ + for (i = 0; i < nr_cpus; i++) { + pmu_fd[i] = perf_event_open(attr, -1, i, -1, 0); + if (pmu_fd[i] < 0) { + printf("perf_event_open failed\n"); + goto all_cpu_err; + } + assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0); + assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE, 0) == 0); + } + system("dd if=/dev/zero of=/dev/null count=5000k"); + print_stacks(); +all_cpu_err: + for (i--; i >= 0; i--) + close(pmu_fd[i]); + free(pmu_fd); +} + +static void test_perf_event_task(struct perf_event_attr *attr) +{ + int pmu_fd; + + /* open task bound event */ + pmu_fd = perf_event_open(attr, 0, -1, -1, 0); + if (pmu_fd < 0) { + printf("perf_event_open failed\n"); + return; + } + assert(ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0); + assert(ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0) == 0); + system("dd if=/dev/zero of=/dev/null count=5000k"); + print_stacks(); + close(pmu_fd); +} + +static void test_bpf_perf_event(void) +{ + struct perf_event_attr attr_type_hw = { + .sample_freq = SAMPLE_FREQ, + .freq = 1, + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES, + .inherit = 1, + }; + struct perf_event_attr attr_type_sw = { + .sample_freq = SAMPLE_FREQ, + .freq = 1, + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_CPU_CLOCK, + .inherit = 1, + }; + + test_perf_event_all_cpu(&attr_type_hw); + test_perf_event_task(&attr_type_hw); + test_perf_event_all_cpu(&attr_type_sw); + test_perf_event_task(&attr_type_sw); +} + + +int main(int argc, char **argv) +{ + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + char filename[256]; + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + setrlimit(RLIMIT_MEMLOCK, &r); + + signal(SIGINT, int_exit); + + if (load_kallsyms()) { + printf("failed to process /proc/kallsyms\n"); + return 1; + } + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 2; + } + + if (fork() == 0) { + read_trace_pipe(); + return 0; + } + test_bpf_perf_event(); + + int_exit(0); + return 0; +} From 72874418e4b9e2673c26a810b0ae9f418b573ee3 Mon Sep 17 00:00:00 2001 From: Brendan Gregg Date: Thu, 1 Sep 2016 18:37:26 -0700 Subject: [PATCH 0669/1715] samples/bpf: add sampleip example sample instruction pointer and frequency count in a BPF map Signed-off-by: Brendan Gregg Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- samples/bpf/Makefile | 4 + samples/bpf/sampleip_kern.c | 38 +++++++ samples/bpf/sampleip_user.c | 196 ++++++++++++++++++++++++++++++++++++ 3 files changed, 238 insertions(+) create mode 100644 samples/bpf/sampleip_kern.c create mode 100644 samples/bpf/sampleip_user.c diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index a69cf9045285..12b7304d55dc 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -26,6 +26,7 @@ hostprogs-y += xdp1 hostprogs-y += xdp2 hostprogs-y += test_current_task_under_cgroup hostprogs-y += trace_event +hostprogs-y += sampleip test_verifier-objs := test_verifier.o libbpf.o test_maps-objs := test_maps.o libbpf.o @@ -54,6 +55,7 @@ xdp2-objs := bpf_load.o libbpf.o xdp1_user.o test_current_task_under_cgroup-objs := bpf_load.o libbpf.o \ test_current_task_under_cgroup_user.o trace_event-objs := bpf_load.o libbpf.o trace_event_user.o +sampleip-objs := bpf_load.o libbpf.o sampleip_user.o # Tell kbuild to always build the programs always := $(hostprogs-y) @@ -82,6 +84,7 @@ always += xdp1_kern.o always += xdp2_kern.o always += test_current_task_under_cgroup_kern.o always += trace_event_kern.o +always += sampleip_kern.o HOSTCFLAGS += -I$(objtree)/usr/include @@ -107,6 +110,7 @@ HOSTLOADLIBES_xdp1 += -lelf HOSTLOADLIBES_xdp2 += -lelf HOSTLOADLIBES_test_current_task_under_cgroup += -lelf HOSTLOADLIBES_trace_event += -lelf +HOSTLOADLIBES_sampleip += -lelf # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: # make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang diff --git a/samples/bpf/sampleip_kern.c b/samples/bpf/sampleip_kern.c new file mode 100644 index 000000000000..774a681f374a --- /dev/null +++ b/samples/bpf/sampleip_kern.c @@ -0,0 +1,38 @@ +/* Copyright 2016 Netflix, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include "bpf_helpers.h" + +#define MAX_IPS 8192 + +struct bpf_map_def SEC("maps") ip_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(u64), + .value_size = sizeof(u32), + .max_entries = MAX_IPS, +}; + +SEC("perf_event") +int do_sample(struct bpf_perf_event_data *ctx) +{ + u64 ip; + u32 *value, init_val = 1; + + ip = ctx->regs.ip; + value = bpf_map_lookup_elem(&ip_map, &ip); + if (value) + *value += 1; + else + /* E2BIG not tested for this example only */ + bpf_map_update_elem(&ip_map, &ip, &init_val, BPF_NOEXIST); + + return 0; +} +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/sampleip_user.c b/samples/bpf/sampleip_user.c new file mode 100644 index 000000000000..260a6bdd6413 --- /dev/null +++ b/samples/bpf/sampleip_user.c @@ -0,0 +1,196 @@ +/* + * sampleip: sample instruction pointer and frequency count in a BPF map. + * + * Copyright 2016 Netflix, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "libbpf.h" +#include "bpf_load.h" + +#define DEFAULT_FREQ 99 +#define DEFAULT_SECS 5 +#define MAX_IPS 8192 +#define PAGE_OFFSET 0xffff880000000000 + +static int nr_cpus; + +static void usage(void) +{ + printf("USAGE: sampleip [-F freq] [duration]\n"); + printf(" -F freq # sample frequency (Hertz), default 99\n"); + printf(" duration # sampling duration (seconds), default 5\n"); +} + +static int sampling_start(int *pmu_fd, int freq) +{ + int i; + + struct perf_event_attr pe_sample_attr = { + .type = PERF_TYPE_SOFTWARE, + .freq = 1, + .sample_period = freq, + .config = PERF_COUNT_SW_CPU_CLOCK, + .inherit = 1, + }; + + for (i = 0; i < nr_cpus; i++) { + pmu_fd[i] = perf_event_open(&pe_sample_attr, -1 /* pid */, i, + -1 /* group_fd */, 0 /* flags */); + if (pmu_fd[i] < 0) { + fprintf(stderr, "ERROR: Initializing perf sampling\n"); + return 1; + } + assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_SET_BPF, + prog_fd[0]) == 0); + assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE, 0) == 0); + } + + return 0; +} + +static void sampling_end(int *pmu_fd) +{ + int i; + + for (i = 0; i < nr_cpus; i++) + close(pmu_fd[i]); +} + +struct ipcount { + __u64 ip; + __u32 count; +}; + +/* used for sorting */ +struct ipcount counts[MAX_IPS]; + +static int count_cmp(const void *p1, const void *p2) +{ + return ((struct ipcount *)p1)->count - ((struct ipcount *)p2)->count; +} + +static void print_ip_map(int fd) +{ + struct ksym *sym; + __u64 key, next_key; + __u32 value; + int i, max; + + printf("%-19s %-32s %s\n", "ADDR", "KSYM", "COUNT"); + + /* fetch IPs and counts */ + key = 0, i = 0; + while (bpf_get_next_key(fd, &key, &next_key) == 0) { + bpf_lookup_elem(fd, &next_key, &value); + counts[i].ip = next_key; + counts[i++].count = value; + key = next_key; + } + max = i; + + /* sort and print */ + qsort(counts, max, sizeof(struct ipcount), count_cmp); + for (i = 0; i < max; i++) { + if (counts[i].ip > PAGE_OFFSET) { + sym = ksym_search(counts[i].ip); + printf("0x%-17llx %-32s %u\n", counts[i].ip, sym->name, + counts[i].count); + } else { + printf("0x%-17llx %-32s %u\n", counts[i].ip, "(user)", + counts[i].count); + } + } + + if (max == MAX_IPS) { + printf("WARNING: IP hash was full (max %d entries); ", max); + printf("may have dropped samples\n"); + } +} + +static void int_exit(int sig) +{ + printf("\n"); + print_ip_map(map_fd[0]); + exit(0); +} + +int main(int argc, char **argv) +{ + char filename[256]; + int *pmu_fd, opt, freq = DEFAULT_FREQ, secs = DEFAULT_SECS; + + /* process arguments */ + while ((opt = getopt(argc, argv, "F:h")) != -1) { + switch (opt) { + case 'F': + freq = atoi(optarg); + break; + case 'h': + default: + usage(); + return 0; + } + } + if (argc - optind == 1) + secs = atoi(argv[optind]); + if (freq == 0 || secs == 0) { + usage(); + return 1; + } + + /* initialize kernel symbol translation */ + if (load_kallsyms()) { + fprintf(stderr, "ERROR: loading /proc/kallsyms\n"); + return 2; + } + + /* create perf FDs for each CPU */ + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + pmu_fd = malloc(nr_cpus * sizeof(int)); + if (pmu_fd == NULL) { + fprintf(stderr, "ERROR: malloc of pmu_fd\n"); + return 1; + } + + /* load BPF program */ + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + if (load_bpf_file(filename)) { + fprintf(stderr, "ERROR: loading BPF program (errno %d):\n", + errno); + if (strcmp(bpf_log_buf, "") == 0) + fprintf(stderr, "Try: ulimit -l unlimited\n"); + else + fprintf(stderr, "%s", bpf_log_buf); + return 1; + } + signal(SIGINT, int_exit); + + /* do sampling */ + printf("Sampling at %d Hertz for %d seconds. Ctrl-C also ends.\n", + freq, secs); + if (sampling_start(pmu_fd, freq) != 0) + return 1; + sleep(secs); + sampling_end(pmu_fd); + free(pmu_fd); + + /* output sample counts */ + print_ip_map(map_fd[0]); + + return 0; +} From dd19bde36739702bbd9a832b5d4995bc0fa8d6d7 Mon Sep 17 00:00:00 2001 From: "Rosen, Rami" Date: Fri, 2 Sep 2016 14:11:57 +0300 Subject: [PATCH 0670/1715] switchdev: Fix return value of switchdev_port_fdb_dump(). This patch fixes the retun value of switchdev_port_fdb_dump() when CONFIG_NET_SWITCHDEV is not set. This avoids getting "warning: return makes integer from pointer without a cast [-Wint-conversion]" when building when CONFIG_NET_SWITCHDEV is not set under several compiler versions. This warning is due to commit d297653dd6f07afbe7e6c702a4bcd7615680002e ("rtnetlink: fdb dump: optimize by saving last interface markers"). Signed-off-by: Rami Rosen Acked-by: Roopa Prabhu Reported-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/switchdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 6279f2f179ec..729fe1534160 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -344,7 +344,7 @@ static inline int switchdev_port_fdb_dump(struct sk_buff *skb, struct net_device *filter_dev, int *idx) { - return idx; + return *idx; } static inline bool switchdev_port_same_parent_id(struct net_device *a, From 30787a417086df301c7eb2f4ae14f2acab70e4b2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 2 Sep 2016 22:39:44 +0100 Subject: [PATCH 0671/1715] rxrpc: fix undefined behavior in rxrpc_mark_call_released gcc -Wmaybe-initialized correctly points out a newly introduced bug through which we can end up calling rxrpc_queue_call() for a dead connection: net/rxrpc/call_object.c: In function 'rxrpc_mark_call_released': net/rxrpc/call_object.c:600:5: error: 'sched' may be used uninitialized in this function [-Werror=maybe-uninitialized] This sets the 'sched' variable to zero to restore the previous behavior. Signed-off-by: Arnd Bergmann Fixes: f5c17aaeb2ae ("rxrpc: Calls should only have one terminal state") Signed-off-by: David Howells --- net/rxrpc/call_object.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 516d8ea82f02..57e00fc9cff2 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -586,7 +586,7 @@ static void rxrpc_dead_call_expired(unsigned long _call) */ static void rxrpc_mark_call_released(struct rxrpc_call *call) { - bool sched; + bool sched = false; rxrpc_see_call(call); write_lock(&call->state_lock); From 00b5407e427ac2588a2496b92035a94602b3cd1b Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 2 Sep 2016 22:39:44 +0100 Subject: [PATCH 0672/1715] rxrpc: Fix uninitialised variable warning Fix the following uninitialised variable warning: ../net/rxrpc/call_event.c: In function 'rxrpc_process_call': ../net/rxrpc/call_event.c:879:58: warning: 'error' may be used uninitialized in this function [-Wmaybe-uninitialized] _debug("post net error %d", error); ^ Signed-off-by: David Howells --- net/rxrpc/call_event.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index de72de662044..4754c7fb6242 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -868,7 +868,6 @@ skip_msg_init: /* deal with events of a final nature */ if (test_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events)) { enum rxrpc_skb_mark mark; - int error; clear_bit(RXRPC_CALL_EV_CONN_ABORT, &call->events); clear_bit(RXRPC_CALL_EV_REJECT_BUSY, &call->events); @@ -876,10 +875,10 @@ skip_msg_init: if (call->completion == RXRPC_CALL_NETWORK_ERROR) { mark = RXRPC_SKB_MARK_NET_ERROR; - _debug("post net error %d", error); + _debug("post net error %d", call->error); } else { mark = RXRPC_SKB_MARK_LOCAL_ERROR; - _debug("post net local error %d", error); + _debug("post net local error %d", call->error); } if (rxrpc_post_message(call, mark, call->error, true) < 0) From ba289af8020a6e81eac424e1d4ef3fcc8ff1b23d Mon Sep 17 00:00:00 2001 From: Roger Chen Date: Fri, 2 Sep 2016 01:49:59 +0800 Subject: [PATCH 0673/1715] net: stmmac: dwmac-rk: add rk3366 & rk3399 specific data Add constants and callback functions for the dwmac on rk3228/rk3229 socs. As can be seen, the base structure is the same, only registers and the bits in them moved slightly. Signed-off-by: Roger Chen Signed-off-by: Caesar Wang Reviewed-by: Heiko Stuebner Signed-off-by: David S. Miller --- .../bindings/net/rockchip-dwmac.txt | 8 +- .../net/ethernet/stmicro/stmmac/dwmac-rk.c | 226 ++++++++++++++++++ 2 files changed, 232 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/net/rockchip-dwmac.txt b/Documentation/devicetree/bindings/net/rockchip-dwmac.txt index cccd945fc45b..95383c5131fc 100644 --- a/Documentation/devicetree/bindings/net/rockchip-dwmac.txt +++ b/Documentation/devicetree/bindings/net/rockchip-dwmac.txt @@ -3,8 +3,12 @@ Rockchip SoC RK3288 10/100/1000 Ethernet driver(GMAC) The device node has following properties. Required properties: - - compatible: Can be one of "rockchip,rk3228-gmac", "rockchip,rk3288-gmac", - "rockchip,rk3368-gmac" + - compatible: should be "rockchip,-gamc" + "rockchip,rk3228-gmac": found on RK322x SoCs + "rockchip,rk3288-gmac": found on RK3288 SoCs + "rockchip,rk3366-gmac": found on RK3366 SoCs + "rockchip,rk3368-gmac": found on RK3368 SoCs + "rockchip,rk3399-gmac": found on RK3399 SoCs - reg: addresses and length of the register sets for the device. - interrupts: Should contain the GMAC interrupts. - interrupt-names: Should contain the interrupt names "macirq". diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index 92105916ef40..4e6a27088313 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -301,6 +301,118 @@ static const struct rk_gmac_ops rk3288_ops = { .set_rmii_speed = rk3288_set_rmii_speed, }; +#define RK3366_GRF_SOC_CON6 0x0418 +#define RK3366_GRF_SOC_CON7 0x041c + +/* RK3366_GRF_SOC_CON6 */ +#define RK3366_GMAC_PHY_INTF_SEL_RGMII (GRF_BIT(9) | GRF_CLR_BIT(10) | \ + GRF_CLR_BIT(11)) +#define RK3366_GMAC_PHY_INTF_SEL_RMII (GRF_CLR_BIT(9) | GRF_CLR_BIT(10) | \ + GRF_BIT(11)) +#define RK3366_GMAC_FLOW_CTRL GRF_BIT(8) +#define RK3366_GMAC_FLOW_CTRL_CLR GRF_CLR_BIT(8) +#define RK3366_GMAC_SPEED_10M GRF_CLR_BIT(7) +#define RK3366_GMAC_SPEED_100M GRF_BIT(7) +#define RK3366_GMAC_RMII_CLK_25M GRF_BIT(3) +#define RK3366_GMAC_RMII_CLK_2_5M GRF_CLR_BIT(3) +#define RK3366_GMAC_CLK_125M (GRF_CLR_BIT(4) | GRF_CLR_BIT(5)) +#define RK3366_GMAC_CLK_25M (GRF_BIT(4) | GRF_BIT(5)) +#define RK3366_GMAC_CLK_2_5M (GRF_CLR_BIT(4) | GRF_BIT(5)) +#define RK3366_GMAC_RMII_MODE GRF_BIT(6) +#define RK3366_GMAC_RMII_MODE_CLR GRF_CLR_BIT(6) + +/* RK3366_GRF_SOC_CON7 */ +#define RK3366_GMAC_TXCLK_DLY_ENABLE GRF_BIT(7) +#define RK3366_GMAC_TXCLK_DLY_DISABLE GRF_CLR_BIT(7) +#define RK3366_GMAC_RXCLK_DLY_ENABLE GRF_BIT(15) +#define RK3366_GMAC_RXCLK_DLY_DISABLE GRF_CLR_BIT(15) +#define RK3366_GMAC_CLK_RX_DL_CFG(val) HIWORD_UPDATE(val, 0x7F, 8) +#define RK3366_GMAC_CLK_TX_DL_CFG(val) HIWORD_UPDATE(val, 0x7F, 0) + +static void rk3366_set_to_rgmii(struct rk_priv_data *bsp_priv, + int tx_delay, int rx_delay) +{ + struct device *dev = &bsp_priv->pdev->dev; + + if (IS_ERR(bsp_priv->grf)) { + dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); + return; + } + + regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6, + RK3366_GMAC_PHY_INTF_SEL_RGMII | + RK3366_GMAC_RMII_MODE_CLR); + regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON7, + RK3366_GMAC_RXCLK_DLY_ENABLE | + RK3366_GMAC_TXCLK_DLY_ENABLE | + RK3366_GMAC_CLK_RX_DL_CFG(rx_delay) | + RK3366_GMAC_CLK_TX_DL_CFG(tx_delay)); +} + +static void rk3366_set_to_rmii(struct rk_priv_data *bsp_priv) +{ + struct device *dev = &bsp_priv->pdev->dev; + + if (IS_ERR(bsp_priv->grf)) { + dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); + return; + } + + regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6, + RK3366_GMAC_PHY_INTF_SEL_RMII | RK3366_GMAC_RMII_MODE); +} + +static void rk3366_set_rgmii_speed(struct rk_priv_data *bsp_priv, int speed) +{ + struct device *dev = &bsp_priv->pdev->dev; + + if (IS_ERR(bsp_priv->grf)) { + dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); + return; + } + + if (speed == 10) + regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6, + RK3366_GMAC_CLK_2_5M); + else if (speed == 100) + regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6, + RK3366_GMAC_CLK_25M); + else if (speed == 1000) + regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6, + RK3366_GMAC_CLK_125M); + else + dev_err(dev, "unknown speed value for RGMII! speed=%d", speed); +} + +static void rk3366_set_rmii_speed(struct rk_priv_data *bsp_priv, int speed) +{ + struct device *dev = &bsp_priv->pdev->dev; + + if (IS_ERR(bsp_priv->grf)) { + dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); + return; + } + + if (speed == 10) { + regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6, + RK3366_GMAC_RMII_CLK_2_5M | + RK3366_GMAC_SPEED_10M); + } else if (speed == 100) { + regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6, + RK3366_GMAC_RMII_CLK_25M | + RK3366_GMAC_SPEED_100M); + } else { + dev_err(dev, "unknown speed value for RMII! speed=%d", speed); + } +} + +static const struct rk_gmac_ops rk3366_ops = { + .set_to_rgmii = rk3366_set_to_rgmii, + .set_to_rmii = rk3366_set_to_rmii, + .set_rgmii_speed = rk3366_set_rgmii_speed, + .set_rmii_speed = rk3366_set_rmii_speed, +}; + #define RK3368_GRF_SOC_CON15 0x043c #define RK3368_GRF_SOC_CON16 0x0440 @@ -413,6 +525,118 @@ static const struct rk_gmac_ops rk3368_ops = { .set_rmii_speed = rk3368_set_rmii_speed, }; +#define RK3399_GRF_SOC_CON5 0xc214 +#define RK3399_GRF_SOC_CON6 0xc218 + +/* RK3399_GRF_SOC_CON5 */ +#define RK3399_GMAC_PHY_INTF_SEL_RGMII (GRF_BIT(9) | GRF_CLR_BIT(10) | \ + GRF_CLR_BIT(11)) +#define RK3399_GMAC_PHY_INTF_SEL_RMII (GRF_CLR_BIT(9) | GRF_CLR_BIT(10) | \ + GRF_BIT(11)) +#define RK3399_GMAC_FLOW_CTRL GRF_BIT(8) +#define RK3399_GMAC_FLOW_CTRL_CLR GRF_CLR_BIT(8) +#define RK3399_GMAC_SPEED_10M GRF_CLR_BIT(7) +#define RK3399_GMAC_SPEED_100M GRF_BIT(7) +#define RK3399_GMAC_RMII_CLK_25M GRF_BIT(3) +#define RK3399_GMAC_RMII_CLK_2_5M GRF_CLR_BIT(3) +#define RK3399_GMAC_CLK_125M (GRF_CLR_BIT(4) | GRF_CLR_BIT(5)) +#define RK3399_GMAC_CLK_25M (GRF_BIT(4) | GRF_BIT(5)) +#define RK3399_GMAC_CLK_2_5M (GRF_CLR_BIT(4) | GRF_BIT(5)) +#define RK3399_GMAC_RMII_MODE GRF_BIT(6) +#define RK3399_GMAC_RMII_MODE_CLR GRF_CLR_BIT(6) + +/* RK3399_GRF_SOC_CON6 */ +#define RK3399_GMAC_TXCLK_DLY_ENABLE GRF_BIT(7) +#define RK3399_GMAC_TXCLK_DLY_DISABLE GRF_CLR_BIT(7) +#define RK3399_GMAC_RXCLK_DLY_ENABLE GRF_BIT(15) +#define RK3399_GMAC_RXCLK_DLY_DISABLE GRF_CLR_BIT(15) +#define RK3399_GMAC_CLK_RX_DL_CFG(val) HIWORD_UPDATE(val, 0x7F, 8) +#define RK3399_GMAC_CLK_TX_DL_CFG(val) HIWORD_UPDATE(val, 0x7F, 0) + +static void rk3399_set_to_rgmii(struct rk_priv_data *bsp_priv, + int tx_delay, int rx_delay) +{ + struct device *dev = &bsp_priv->pdev->dev; + + if (IS_ERR(bsp_priv->grf)) { + dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); + return; + } + + regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5, + RK3399_GMAC_PHY_INTF_SEL_RGMII | + RK3399_GMAC_RMII_MODE_CLR); + regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON6, + RK3399_GMAC_RXCLK_DLY_ENABLE | + RK3399_GMAC_TXCLK_DLY_ENABLE | + RK3399_GMAC_CLK_RX_DL_CFG(rx_delay) | + RK3399_GMAC_CLK_TX_DL_CFG(tx_delay)); +} + +static void rk3399_set_to_rmii(struct rk_priv_data *bsp_priv) +{ + struct device *dev = &bsp_priv->pdev->dev; + + if (IS_ERR(bsp_priv->grf)) { + dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); + return; + } + + regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5, + RK3399_GMAC_PHY_INTF_SEL_RMII | RK3399_GMAC_RMII_MODE); +} + +static void rk3399_set_rgmii_speed(struct rk_priv_data *bsp_priv, int speed) +{ + struct device *dev = &bsp_priv->pdev->dev; + + if (IS_ERR(bsp_priv->grf)) { + dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); + return; + } + + if (speed == 10) + regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5, + RK3399_GMAC_CLK_2_5M); + else if (speed == 100) + regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5, + RK3399_GMAC_CLK_25M); + else if (speed == 1000) + regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5, + RK3399_GMAC_CLK_125M); + else + dev_err(dev, "unknown speed value for RGMII! speed=%d", speed); +} + +static void rk3399_set_rmii_speed(struct rk_priv_data *bsp_priv, int speed) +{ + struct device *dev = &bsp_priv->pdev->dev; + + if (IS_ERR(bsp_priv->grf)) { + dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); + return; + } + + if (speed == 10) { + regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5, + RK3399_GMAC_RMII_CLK_2_5M | + RK3399_GMAC_SPEED_10M); + } else if (speed == 100) { + regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5, + RK3399_GMAC_RMII_CLK_25M | + RK3399_GMAC_SPEED_100M); + } else { + dev_err(dev, "unknown speed value for RMII! speed=%d", speed); + } +} + +static const struct rk_gmac_ops rk3399_ops = { + .set_to_rgmii = rk3399_set_to_rgmii, + .set_to_rmii = rk3399_set_to_rmii, + .set_rgmii_speed = rk3399_set_rgmii_speed, + .set_rmii_speed = rk3399_set_rmii_speed, +}; + static int gmac_clk_init(struct rk_priv_data *bsp_priv) { struct device *dev = &bsp_priv->pdev->dev; @@ -760,7 +984,9 @@ static int rk_gmac_probe(struct platform_device *pdev) static const struct of_device_id rk_gmac_dwmac_match[] = { { .compatible = "rockchip,rk3228-gmac", .data = &rk3228_ops }, { .compatible = "rockchip,rk3288-gmac", .data = &rk3288_ops }, + { .compatible = "rockchip,rk3366-gmac", .data = &rk3366_ops }, { .compatible = "rockchip,rk3368-gmac", .data = &rk3368_ops }, + { .compatible = "rockchip,rk3399-gmac", .data = &rk3399_ops }, { } }; MODULE_DEVICE_TABLE(of, rk_gmac_dwmac_match); From 45383f528fcf417539892440dd0eb9fb96990ef7 Mon Sep 17 00:00:00 2001 From: Roger Chen Date: Fri, 2 Sep 2016 01:50:00 +0800 Subject: [PATCH 0674/1715] net: stmmac: dwmac-rk: fixes the gmac resume after PD on/off GMAC Power Domain(PD) will be disabled during suspend. That will causes GRF registers reset. So corresponding GRF registers for GMAC must be setup again. Signed-off-by: Roger Chen Signed-off-by: Caesar Wang Signed-off-by: David S. Miller --- .../net/ethernet/stmicro/stmmac/dwmac-rk.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index 4e6a27088313..e2ba6c4619fd 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -853,6 +853,16 @@ static struct rk_priv_data *rk_gmac_setup(struct platform_device *pdev, "rockchip,grf"); bsp_priv->pdev = pdev; + gmac_clk_init(bsp_priv); + + return bsp_priv; +} + +static int rk_gmac_powerup(struct rk_priv_data *bsp_priv) +{ + int ret; + struct device *dev = &bsp_priv->pdev->dev; + /*rmii or rgmii*/ if (bsp_priv->phy_iface == PHY_INTERFACE_MODE_RGMII) { dev_info(dev, "init for RGMII\n"); @@ -865,15 +875,6 @@ static struct rk_priv_data *rk_gmac_setup(struct platform_device *pdev, dev_err(dev, "NO interface defined!\n"); } - gmac_clk_init(bsp_priv); - - return bsp_priv; -} - -static int rk_gmac_powerup(struct rk_priv_data *bsp_priv) -{ - int ret; - ret = phy_power_on(bsp_priv, true); if (ret) return ret; From 2c896fb02e7f65299646f295a007bda043e0f382 Mon Sep 17 00:00:00 2001 From: David Wu Date: Fri, 2 Sep 2016 01:50:01 +0800 Subject: [PATCH 0675/1715] net: stmmac: dwmac-rk: add pd_gmac support for rk3399 Add the gmac power domain support for rk3399, in order to save more power consumption. Signed-off-by: David Wu Signed-off-by: Caesar Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index e2ba6c4619fd..3740a4417fa0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "stmmac_platform.h" @@ -883,11 +884,19 @@ static int rk_gmac_powerup(struct rk_priv_data *bsp_priv) if (ret) return ret; + pm_runtime_enable(dev); + pm_runtime_get_sync(dev); + return 0; } static void rk_gmac_powerdown(struct rk_priv_data *gmac) { + struct device *dev = &gmac->pdev->dev; + + pm_runtime_put_sync(dev); + pm_runtime_disable(dev); + phy_power_on(gmac, false); gmac_clk_enable(gmac, false); } From 02d11ca20091fcef904f05defda80c53e5b4e793 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 1 Sep 2016 13:52:49 -0400 Subject: [PATCH 0676/1715] tipc: transfer broadcast nacks in link state messages When we send broadcasts in clusters of more 70-80 nodes, we sometimes see the broadcast link resetting because of an excessive number of retransmissions. This is caused by a combination of two factors: 1) A 'NACK crunch", where loss of broadcast packets is discovered and NACK'ed by several nodes simultaneously, leading to multiple redundant broadcast retransmissions. 2) The fact that the NACKS as such also are sent as broadcast, leading to excessive load and packet loss on the transmitting switch/bridge. This commit deals with the latter problem, by moving sending of broadcast nacks from the dedicated BCAST_PROTOCOL/NACK message type to regular unicast LINK_PROTOCOL/STATE messages. We allocate 10 unused bits in word 8 of the said message for this purpose, and introduce a new capability bit, TIPC_BCAST_STATE_NACK in order to keep the change backwards compatible. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bcast.c | 8 +++--- net/tipc/bcast.h | 4 +-- net/tipc/link.c | 64 ++++++++++++++++++++++++++++++++++++++---------- net/tipc/link.h | 6 ++--- net/tipc/msg.h | 10 ++++++++ net/tipc/node.c | 32 ++++++++++++++++++++++-- net/tipc/node.h | 11 ++++++--- 7 files changed, 108 insertions(+), 27 deletions(-) diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index ae469b37d852..753f774cb46f 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -269,18 +269,19 @@ void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, u32 acked) * * RCU is locked, no other locks set */ -void tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l, - struct tipc_msg *hdr) +int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l, + struct tipc_msg *hdr) { struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq; struct sk_buff_head xmitq; + int rc = 0; __skb_queue_head_init(&xmitq); tipc_bcast_lock(net); if (msg_type(hdr) == STATE_MSG) { tipc_link_bc_ack_rcv(l, msg_bcast_ack(hdr), &xmitq); - tipc_link_bc_sync_rcv(l, hdr, &xmitq); + rc = tipc_link_bc_sync_rcv(l, hdr, &xmitq); } else { tipc_link_bc_init_rcv(l, hdr); } @@ -291,6 +292,7 @@ void tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l, /* Any socket wakeup messages ? */ if (!skb_queue_empty(inputq)) tipc_sk_rcv(net, inputq); + return rc; } /* tipc_bcast_add_peer - add a peer node to broadcast link and bearer diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index d5e79b3767fd..5ffe34472ccd 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -56,8 +56,8 @@ int tipc_bcast_get_mtu(struct net *net); int tipc_bcast_xmit(struct net *net, struct sk_buff_head *list); int tipc_bcast_rcv(struct net *net, struct tipc_link *l, struct sk_buff *skb); void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, u32 acked); -void tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l, - struct tipc_msg *hdr); +int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l, + struct tipc_msg *hdr); int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg); int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]); int tipc_bclink_reset_stats(struct net *net); diff --git a/net/tipc/link.c b/net/tipc/link.c index 2c6e1b9e024b..136316fb37ec 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -367,6 +367,18 @@ int tipc_link_bc_peers(struct tipc_link *l) return l->ackers; } +u16 link_bc_rcv_gap(struct tipc_link *l) +{ + struct sk_buff *skb = skb_peek(&l->deferdq); + u16 gap = 0; + + if (more(l->snd_nxt, l->rcv_nxt)) + gap = l->snd_nxt - l->rcv_nxt; + if (skb) + gap = buf_seqno(skb) - l->rcv_nxt; + return gap; +} + void tipc_link_set_mtu(struct tipc_link *l, int mtu) { l->mtu = mtu; @@ -1135,7 +1147,10 @@ int tipc_link_build_state_msg(struct tipc_link *l, struct sk_buff_head *xmitq) if (((l->rcv_nxt ^ tipc_own_addr(l->net)) & 0xf) != 0xf) return 0; l->rcv_unacked = 0; - return TIPC_LINK_SND_BC_ACK; + + /* Use snd_nxt to store peer's snd_nxt in broadcast rcv link */ + l->snd_nxt = l->rcv_nxt; + return TIPC_LINK_SND_STATE; } /* Unicast ACK */ @@ -1236,7 +1251,7 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, rc |= tipc_link_input(l, skb, l->inputq); if (unlikely(++l->rcv_unacked >= TIPC_MIN_LINK_WIN)) rc |= tipc_link_build_state_msg(l, xmitq); - if (unlikely(rc & ~TIPC_LINK_SND_BC_ACK)) + if (unlikely(rc & ~TIPC_LINK_SND_STATE)) break; } while ((skb = __skb_dequeue(defq))); @@ -1250,10 +1265,11 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, u16 rcvgap, int tolerance, int priority, struct sk_buff_head *xmitq) { + struct tipc_link *bcl = l->bc_rcvlink; struct sk_buff *skb; struct tipc_msg *hdr; struct sk_buff_head *dfq = &l->deferdq; - bool node_up = link_is_up(l->bc_rcvlink); + bool node_up = link_is_up(bcl); struct tipc_mon_state *mstate = &l->mon_state; int dlen = 0; void *data; @@ -1281,7 +1297,7 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, msg_set_net_plane(hdr, l->net_plane); msg_set_next_sent(hdr, l->snd_nxt); msg_set_ack(hdr, l->rcv_nxt - 1); - msg_set_bcast_ack(hdr, l->bc_rcvlink->rcv_nxt - 1); + msg_set_bcast_ack(hdr, bcl->rcv_nxt - 1); msg_set_last_bcast(hdr, l->bc_sndlink->snd_nxt - 1); msg_set_link_tolerance(hdr, tolerance); msg_set_linkprio(hdr, priority); @@ -1291,6 +1307,7 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, if (mtyp == STATE_MSG) { msg_set_seq_gap(hdr, rcvgap); + msg_set_bc_gap(hdr, link_bc_rcv_gap(bcl)); msg_set_probe(hdr, probe); tipc_mon_prep(l->net, data, &dlen, mstate, l->bearer_id); msg_set_size(hdr, INT_H_SIZE + dlen); @@ -1575,49 +1592,68 @@ void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr) /* tipc_link_bc_sync_rcv - update rcv link according to peer's send state */ -void tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr, - struct sk_buff_head *xmitq) +int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr, + struct sk_buff_head *xmitq) { u16 peers_snd_nxt = msg_bc_snd_nxt(hdr); + u16 from = msg_bcast_ack(hdr) + 1; + u16 to = from + msg_bc_gap(hdr) - 1; + int rc = 0; if (!link_is_up(l)) - return; + return rc; if (!msg_peer_node_is_up(hdr)) - return; + return rc; /* Open when peer ackowledges our bcast init msg (pkt #1) */ if (msg_ack(hdr)) l->bc_peer_is_up = true; if (!l->bc_peer_is_up) - return; + return rc; /* Ignore if peers_snd_nxt goes beyond receive window */ if (more(peers_snd_nxt, l->rcv_nxt + l->window)) - return; + return rc; + + if (!less(to, from)) { + rc = tipc_link_retrans(l->bc_sndlink, from, to, xmitq); + l->stats.recv_nacks++; + } + + l->snd_nxt = peers_snd_nxt; + if (link_bc_rcv_gap(l)) + rc |= TIPC_LINK_SND_STATE; + + /* Return now if sender supports nack via STATE messages */ + if (l->peer_caps & TIPC_BCAST_STATE_NACK) + return rc; + + /* Otherwise, be backwards compatible */ if (!more(peers_snd_nxt, l->rcv_nxt)) { l->nack_state = BC_NACK_SND_CONDITIONAL; - return; + return 0; } /* Don't NACK if one was recently sent or peeked */ if (l->nack_state == BC_NACK_SND_SUPPRESS) { l->nack_state = BC_NACK_SND_UNCONDITIONAL; - return; + return 0; } /* Conditionally delay NACK sending until next synch rcv */ if (l->nack_state == BC_NACK_SND_CONDITIONAL) { l->nack_state = BC_NACK_SND_UNCONDITIONAL; if ((peers_snd_nxt - l->rcv_nxt) < TIPC_MIN_LINK_WIN) - return; + return 0; } /* Send NACK now but suppress next one */ tipc_link_build_bc_proto_msg(l, true, peers_snd_nxt, xmitq); l->nack_state = BC_NACK_SND_SUPPRESS; + return 0; } void tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked, @@ -1654,6 +1690,8 @@ void tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked, } /* tipc_link_bc_nack_rcv(): receive broadcast nack message + * This function is here for backwards compatibility, since + * no BCAST_PROTOCOL/STATE messages occur from TIPC v2.5. */ int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *xmitq) diff --git a/net/tipc/link.h b/net/tipc/link.h index d7e9d42fcb2d..d1bd1787a768 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -63,7 +63,7 @@ enum { enum { TIPC_LINK_UP_EVT = 1, TIPC_LINK_DOWN_EVT = (1 << 1), - TIPC_LINK_SND_BC_ACK = (1 << 2) + TIPC_LINK_SND_STATE = (1 << 2) }; /* Starting value for maximum packet size negotiation on unicast links @@ -138,8 +138,8 @@ void tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked, void tipc_link_build_bc_sync_msg(struct tipc_link *l, struct sk_buff_head *xmitq); void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr); -void tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr, - struct sk_buff_head *xmitq); +int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr, + struct sk_buff_head *xmitq); int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *xmitq); #endif diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 7cf52fb39bee..c3832cdf2278 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -719,6 +719,16 @@ static inline char *msg_media_addr(struct tipc_msg *m) return (char *)&m->hdr[TIPC_MEDIA_INFO_OFFSET]; } +static inline u32 msg_bc_gap(struct tipc_msg *m) +{ + return msg_bits(m, 8, 0, 0x3ff); +} + +static inline void msg_set_bc_gap(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 8, 0, 0x3ff, n); +} + /* * Word 9 */ diff --git a/net/tipc/node.c b/net/tipc/node.c index 7e8b75fd1a02..7ef14e2d2356 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1262,6 +1262,34 @@ void tipc_node_broadcast(struct net *net, struct sk_buff *skb) kfree_skb(skb); } +static void tipc_node_bc_sync_rcv(struct tipc_node *n, struct tipc_msg *hdr, + int bearer_id, struct sk_buff_head *xmitq) +{ + struct tipc_link *ucl; + int rc; + + rc = tipc_bcast_sync_rcv(n->net, n->bc_entry.link, hdr); + + if (rc & TIPC_LINK_DOWN_EVT) { + tipc_bearer_reset_all(n->net); + return; + } + + if (!(rc & TIPC_LINK_SND_STATE)) + return; + + /* If probe message, a STATE response will be sent anyway */ + if (msg_probe(hdr)) + return; + + /* Produce a STATE message carrying broadcast NACK */ + tipc_node_read_lock(n); + ucl = n->links[bearer_id].link; + if (ucl) + tipc_link_build_state_msg(ucl, xmitq); + tipc_node_read_unlock(n); +} + /** * tipc_node_bc_rcv - process TIPC broadcast packet arriving from off-node * @net: the applicable net namespace @@ -1298,7 +1326,7 @@ static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id rc = tipc_bcast_rcv(net, be->link, skb); /* Broadcast ACKs are sent on a unicast link */ - if (rc & TIPC_LINK_SND_BC_ACK) { + if (rc & TIPC_LINK_SND_STATE) { tipc_node_read_lock(n); tipc_link_build_state_msg(le->link, &xmitq); tipc_node_read_unlock(n); @@ -1505,7 +1533,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) /* Ensure broadcast reception is in synch with peer's send state */ if (unlikely(usr == LINK_PROTOCOL)) - tipc_bcast_sync_rcv(net, n->bc_entry.link, hdr); + tipc_node_bc_sync_rcv(n, hdr, bearer_id, &xmitq); else if (unlikely(tipc_link_acked(n->bc_entry.link) != bc_ack)) tipc_bcast_ack_rcv(net, n->bc_entry.link, bc_ack); diff --git a/net/tipc/node.h b/net/tipc/node.h index 4578b34c7dca..39ef54c1f2ad 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -1,7 +1,7 @@ /* * net/tipc/node.h: Include file for TIPC node management routines * - * Copyright (c) 2000-2006, 2014-2015, Ericsson AB + * Copyright (c) 2000-2006, 2014-2016, Ericsson AB * Copyright (c) 2005, 2010-2014, Wind River Systems * All rights reserved. * @@ -45,11 +45,14 @@ /* Optional capabilities supported by this code version */ enum { - TIPC_BCAST_SYNCH = (1 << 1), - TIPC_BLOCK_FLOWCTL = (2 << 1) + TIPC_BCAST_SYNCH = (1 << 1), + TIPC_BCAST_STATE_NACK = (1 << 2), + TIPC_BLOCK_FLOWCTL = (1 << 3) }; -#define TIPC_NODE_CAPABILITIES (TIPC_BCAST_SYNCH | TIPC_BLOCK_FLOWCTL) +#define TIPC_NODE_CAPABILITIES (TIPC_BCAST_SYNCH | \ + TIPC_BCAST_STATE_NACK | \ + TIPC_BLOCK_FLOWCTL) #define INVALID_BEARER_ID -1 void tipc_node_stop(struct net *net); From 7c4a54b963b68eee5ef3bd7ca740630d965616e2 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 1 Sep 2016 13:52:50 -0400 Subject: [PATCH 0677/1715] tipc: rate limit broadcast retransmissions As cluster sizes grow, so does the amount of identical or overlapping broadcast NACKs generated by the packet receivers. This often leads to 'NACK crunches' resulting in huge numbers of redundant retransmissions of the same packet ranges. In this commit, we introduce rate control of broadcast retransmissions, so that a retransmitted range cannot be retransmitted again until after at least 10 ms. This reduces the frequency of duplicate, redundant retransmissions by an order of magnitude, while having a significant positive impact on overall throughput and scalability. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 52 ++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 5 deletions(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index 136316fb37ec..58bb44d95f95 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -181,7 +181,10 @@ struct tipc_link { u16 acked; struct tipc_link *bc_rcvlink; struct tipc_link *bc_sndlink; - int nack_state; + unsigned long prev_retr; + u16 prev_from; + u16 prev_to; + u8 nack_state; bool bc_peer_is_up; /* Statistics */ @@ -202,6 +205,8 @@ enum { BC_NACK_SND_SUPPRESS, }; +#define TIPC_BC_RETR_LIMIT 10 /* [ms] */ + /* * Interval between NACKs when packets arrive out of order */ @@ -1590,11 +1595,48 @@ void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr) l->rcv_nxt = peers_snd_nxt; } +/* link_bc_retr eval()- check if the indicated range can be retransmitted now + * - Adjust permitted range if there is overlap with previous retransmission + */ +static bool link_bc_retr_eval(struct tipc_link *l, u16 *from, u16 *to) +{ + unsigned long elapsed = jiffies_to_msecs(jiffies - l->prev_retr); + + if (less(*to, *from)) + return false; + + /* New retransmission request */ + if ((elapsed > TIPC_BC_RETR_LIMIT) || + less(*to, l->prev_from) || more(*from, l->prev_to)) { + l->prev_from = *from; + l->prev_to = *to; + l->prev_retr = jiffies; + return true; + } + + /* Inside range of previous retransmit */ + if (!less(*from, l->prev_from) && !more(*to, l->prev_to)) + return false; + + /* Fully or partially outside previous range => exclude overlap */ + if (less(*from, l->prev_from)) { + *to = l->prev_from - 1; + l->prev_from = *from; + } + if (more(*to, l->prev_to)) { + *from = l->prev_to + 1; + l->prev_to = *to; + } + l->prev_retr = jiffies; + return true; +} + /* tipc_link_bc_sync_rcv - update rcv link according to peer's send state */ int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr, struct sk_buff_head *xmitq) { + struct tipc_link *snd_l = l->bc_sndlink; u16 peers_snd_nxt = msg_bc_snd_nxt(hdr); u16 from = msg_bcast_ack(hdr) + 1; u16 to = from + msg_bc_gap(hdr) - 1; @@ -1613,14 +1655,14 @@ int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr, if (!l->bc_peer_is_up) return rc; + l->stats.recv_nacks++; + /* Ignore if peers_snd_nxt goes beyond receive window */ if (more(peers_snd_nxt, l->rcv_nxt + l->window)) return rc; - if (!less(to, from)) { - rc = tipc_link_retrans(l->bc_sndlink, from, to, xmitq); - l->stats.recv_nacks++; - } + if (link_bc_retr_eval(snd_l, &from, &to)) + rc = tipc_link_retrans(snd_l, from, to, xmitq); l->snd_nxt = peers_snd_nxt; if (link_bc_rcv_gap(l)) From e0a05ebe26c07c4f649a7f5c251a3d4d8bf0402d Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 1 Sep 2016 13:52:51 -0400 Subject: [PATCH 0678/1715] tipc: send broadcast nack directly upon sequence gap detection Because of the risk of an excessive number of NACK messages and retransissions, receivers have until now abstained from sending broadcast NACKS directly upon detection of a packet sequence number gap. We have instead relied on such gaps being detected by link protocol STATE message exchange, something that by necessity delays such detection and subsequent retransmissions. With the introduction of unicast NACK transmission and rate control of retransmissions we can now remove this limitation. We now allow receiving nodes to send NACKS immediately, while coordinating the permission to do so among the nodes in order to avoid NACK storms. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index 58bb44d95f95..b36e16cdc945 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -242,8 +242,8 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, u16 rcvgap, int tolerance, int priority, struct sk_buff_head *xmitq); static void link_print(struct tipc_link *l, const char *str); -static void tipc_link_build_nack_msg(struct tipc_link *l, - struct sk_buff_head *xmitq); +static int tipc_link_build_nack_msg(struct tipc_link *l, + struct sk_buff_head *xmitq); static void tipc_link_build_bc_init_msg(struct tipc_link *l, struct sk_buff_head *xmitq); static bool tipc_link_release_pkts(struct tipc_link *l, u16 to); @@ -1184,17 +1184,26 @@ void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq) } /* tipc_link_build_nack_msg: prepare link nack message for transmission + * Note that sending of broadcast NACK is coordinated among nodes, to + * reduce the risk of NACK storms towards the sender */ -static void tipc_link_build_nack_msg(struct tipc_link *l, - struct sk_buff_head *xmitq) +static int tipc_link_build_nack_msg(struct tipc_link *l, + struct sk_buff_head *xmitq) { u32 def_cnt = ++l->stats.deferred_recv; + int match1, match2; - if (link_is_bc_rcvlink(l)) - return; + if (link_is_bc_rcvlink(l)) { + match1 = def_cnt & 0xf; + match2 = tipc_own_addr(l->net) & 0xf; + if (match1 == match2) + return TIPC_LINK_SND_STATE; + return 0; + } if ((skb_queue_len(&l->deferdq) == 1) || !(def_cnt % TIPC_NACK_INTV)) tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq); + return 0; } /* tipc_link_rcv - process TIPC packets/messages arriving from off-node @@ -1245,7 +1254,7 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, /* Defer delivery if sequence gap */ if (unlikely(seqno != rcv_nxt)) { __tipc_skb_queue_sorted(defq, seqno, skb); - tipc_link_build_nack_msg(l, xmitq); + rc |= tipc_link_build_nack_msg(l, xmitq); break; } From 7b6b6c9556f9bbad4efd85ca6448ab89993294cf Mon Sep 17 00:00:00 2001 From: Raghu Vatsavayi Date: Thu, 1 Sep 2016 11:16:04 -0700 Subject: [PATCH 0679/1715] liquidio: link and control commands This patch adds work queue support for link status and control commands. Signed-off-by: Derek Chickles Signed-off-by: Satanand Burla Signed-off-by: Felix Manlunas Signed-off-by: Raghu Vatsavayi Signed-off-by: David S. Miller --- .../net/ethernet/cavium/liquidio/lio_core.c | 5 +- .../net/ethernet/cavium/liquidio/lio_main.c | 75 +++++++++++++++++-- .../ethernet/cavium/liquidio/octeon_network.h | 4 + 3 files changed, 76 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_core.c b/drivers/net/ethernet/cavium/liquidio/lio_core.c index 809179c040a7..5fb444da3b9c 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_core.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_core.c @@ -156,10 +156,9 @@ void liquidio_link_ctrl_cmd_completion(void *nctrl_ptr) dev_info(&oct->pci_dev->dev, "%s MTU Changed from %d to %d\n", netdev->name, netdev->mtu, nctrl->ncmd.s.param1); - rtnl_lock(); netdev->mtu = nctrl->ncmd.s.param1; - call_netdevice_notifiers(NETDEV_CHANGEMTU, netdev); - rtnl_unlock(); + queue_delayed_work(lio->link_status_wq.wq, + &lio->link_status_wq.wk.work, 0); break; case OCTNET_CMD_GPIO_ACCESS: diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index 866c075cc7ea..9bc5e2316fa1 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -856,6 +856,52 @@ static void print_link_info(struct net_device *netdev) } } +/** + * \brief Routine to notify MTU change + * @param work work_struct data structure + */ +static void octnet_link_status_change(struct work_struct *work) +{ + struct cavium_wk *wk = (struct cavium_wk *)work; + struct lio *lio = (struct lio *)wk->ctxptr; + + rtnl_lock(); + call_netdevice_notifiers(NETDEV_CHANGEMTU, lio->netdev); + rtnl_unlock(); +} + +/** + * \brief Sets up the mtu status change work + * @param netdev network device + */ +static inline int setup_link_status_change_wq(struct net_device *netdev) +{ + struct lio *lio = GET_LIO(netdev); + struct octeon_device *oct = lio->oct_dev; + + lio->link_status_wq.wq = alloc_workqueue("link-status", + WQ_MEM_RECLAIM, 0); + if (!lio->link_status_wq.wq) { + dev_err(&oct->pci_dev->dev, "unable to create cavium link status wq\n"); + return -1; + } + INIT_DELAYED_WORK(&lio->link_status_wq.wk.work, + octnet_link_status_change); + lio->link_status_wq.wk.ctxptr = lio; + + return 0; +} + +static inline void cleanup_link_status_change_wq(struct net_device *netdev) +{ + struct lio *lio = GET_LIO(netdev); + + if (lio->link_status_wq.wq) { + cancel_delayed_work_sync(&lio->link_status_wq.wk.work); + destroy_workqueue(lio->link_status_wq.wq); + } +} + /** * \brief Update link status * @param netdev network device @@ -1385,11 +1431,16 @@ static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx) napi_disable(napi); oct->props[lio->ifidx].napi_enabled = 0; + + if (OCTEON_CN23XX_PF(oct)) + oct->droq[0]->ops.poll_mode = 0; } if (atomic_read(&lio->ifstate) & LIO_IFSTATE_REGISTERED) unregister_netdev(netdev); + cleanup_link_status_change_wq(netdev); + delete_glists(lio); free_netdev(netdev); @@ -2267,6 +2318,14 @@ static inline int setup_io_queues(struct octeon_device *octeon_dev, octeon_register_droq_ops(octeon_dev, q_no, &droq_ops); } + if (OCTEON_CN23XX_PF(octeon_dev)) { + /* 23XX PF can receive control messages (via the first PF-owned + * droq) from the firmware even if the ethX interface is down, + * so that's why poll_mode must be off for the first droq. + */ + octeon_dev->droq[0]->ops.poll_mode = 0; + } + /* set up IQs. */ for (q = 0; q < lio->linfo.num_txpciq; q++) { num_tx_descs = CFG_GET_NUM_TX_DESCS_NIC_IF(octeon_get_conf @@ -2351,12 +2410,20 @@ static int liquidio_open(struct net_device *netdev) napi_enable(napi); oct->props[lio->ifidx].napi_enabled = 1; + + if (OCTEON_CN23XX_PF(oct)) + oct->droq[0]->ops.poll_mode = 1; } oct_ptp_open(netdev); ifstate_set(lio, LIO_IFSTATE_RUNNING); + /* Ready for link status updates */ + lio->intf_open = 1; + + netif_info(lio, ifup, lio->netdev, "Interface Open, ready for traffic\n"); + if (OCTEON_CN23XX_PF(oct)) { if (!oct->msix_on) if (setup_tx_poll_fn(netdev)) @@ -2368,14 +2435,9 @@ static int liquidio_open(struct net_device *netdev) start_txq(netdev); - netif_info(lio, ifup, lio->netdev, "Interface Open, ready for traffic\n"); - /* tell Octeon to start forwarding packets to host */ send_rx_ctrl_cmd(lio, 1); - /* Ready for link status updates */ - lio->intf_open = 1; - dev_info(&oct->pci_dev->dev, "%s interface is opened\n", netdev->name); @@ -3668,6 +3730,9 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) liquidio_set_feature(netdev, OCTNET_CMD_VERBOSE_ENABLE, 0); + if (setup_link_status_change_wq(netdev)) + goto setup_nic_dev_fail; + /* Register the network device with the OS */ if (register_netdev(netdev)) { dev_err(&octeon_dev->pci_dev->dev, "Device registration failed\n"); diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h index 7cfbdf9b039c..d3b68d8279e2 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h @@ -122,6 +122,10 @@ struct lio { /* work queue for txq status */ struct cavium_wq txq_status_wq; + + /* work queue for link status */ + struct cavium_wq link_status_wq; + }; #define LIO_SIZE (sizeof(struct lio)) From afdf841fee732bd89d53530dd06ea8b4748ed8be Mon Sep 17 00:00:00 2001 From: Raghu Vatsavayi Date: Thu, 1 Sep 2016 11:16:05 -0700 Subject: [PATCH 0680/1715] liquidio: RX control commands Adds support for RX control commands on cn23xx device. Signed-off-by: Derek Chickles Signed-off-by: Satanand Burla Signed-off-by: Felix Manlunas Signed-off-by: Raghu Vatsavayi Signed-off-by: David S. Miller --- .../net/ethernet/cavium/liquidio/lio_main.c | 100 ++++++++++++++++-- .../ethernet/cavium/liquidio/octeon_device.h | 1 + .../ethernet/cavium/liquidio/octeon_main.h | 8 +- 3 files changed, 96 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index 9bc5e2316fa1..78d322c72030 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -96,6 +96,14 @@ struct liquidio_if_cfg_resp { u64 status; }; +struct liquidio_rx_ctl_context { + int octeon_id; + + wait_queue_head_t wc; + + int cond; +}; + struct oct_link_status_resp { u64 rh; struct oct_link_info link_info; @@ -1377,6 +1385,34 @@ static void octeon_destroy_resources(struct octeon_device *oct) tasklet_kill(&oct_priv->droq_tasklet); } +/** + * \brief Callback for rx ctrl + * @param status status of request + * @param buf pointer to resp structure + */ +static void rx_ctl_callback(struct octeon_device *oct, + u32 status, + void *buf) +{ + struct octeon_soft_command *sc = (struct octeon_soft_command *)buf; + struct liquidio_rx_ctl_context *ctx; + + ctx = (struct liquidio_rx_ctl_context *)sc->ctxptr; + + oct = lio_get_device(ctx->octeon_id); + if (status) + dev_err(&oct->pci_dev->dev, "rx ctl instruction failed. Status: %llx\n", + CVM_CAST64(status)); + WRITE_ONCE(ctx->cond, 1); + + /* This barrier is required to be sure that the response has been + * written fully before waking up the handler + */ + wmb(); + + wake_up_interruptible(&ctx->wc); +} + /** * \brief Send Rx control command * @param lio per-network private data @@ -1384,17 +1420,55 @@ static void octeon_destroy_resources(struct octeon_device *oct) */ static void send_rx_ctrl_cmd(struct lio *lio, int start_stop) { - struct octnic_ctrl_pkt nctrl; + struct octeon_soft_command *sc; + struct liquidio_rx_ctl_context *ctx; + union octnet_cmd *ncmd; + int ctx_size = sizeof(struct liquidio_rx_ctl_context); + struct octeon_device *oct = (struct octeon_device *)lio->oct_dev; + int retval; - memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt)); + if (oct->props[lio->ifidx].rx_on == start_stop) + return; - nctrl.ncmd.s.cmd = OCTNET_CMD_RX_CTL; - nctrl.ncmd.s.param1 = start_stop; - nctrl.iq_no = lio->linfo.txpciq[0].s.q_no; - nctrl.netpndev = (u64)lio->netdev; + sc = (struct octeon_soft_command *) + octeon_alloc_soft_command(oct, OCTNET_CMD_SIZE, + 16, ctx_size); - if (octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl) < 0) + ncmd = (union octnet_cmd *)sc->virtdptr; + ctx = (struct liquidio_rx_ctl_context *)sc->ctxptr; + + WRITE_ONCE(ctx->cond, 0); + ctx->octeon_id = lio_get_device_id(oct); + init_waitqueue_head(&ctx->wc); + + ncmd->u64 = 0; + ncmd->s.cmd = OCTNET_CMD_RX_CTL; + ncmd->s.param1 = start_stop; + + octeon_swap_8B_data((u64 *)ncmd, (OCTNET_CMD_SIZE >> 3)); + + sc->iq_no = lio->linfo.txpciq[0].s.q_no; + + octeon_prepare_soft_command(oct, sc, OPCODE_NIC, + OPCODE_NIC_CMD, 0, 0, 0); + + sc->callback = rx_ctl_callback; + sc->callback_arg = sc; + sc->wait_time = 5000; + + retval = octeon_send_soft_command(oct, sc); + if (retval == IQ_SEND_FAILED) { netif_info(lio, rx_err, lio->netdev, "Failed to send RX Control message\n"); + } else { + /* Sleep on a wait queue till the cond flag indicates that the + * response arrived or timed-out. + */ + if (sleep_cond(&ctx->wc, &ctx->cond) == -EINTR) + return; + oct->props[lio->ifidx].rx_on = start_stop; + } + + octeon_free_soft_command(oct, sc); } /** @@ -1421,10 +1495,8 @@ static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx) dev_dbg(&oct->pci_dev->dev, "NIC device cleanup\n"); - send_rx_ctrl_cmd(lio, 0); - if (atomic_read(&lio->ifstate) & LIO_IFSTATE_RUNNING) - txqs_stop(netdev); + liquidio_stop(netdev); if (oct->props[lio->ifidx].napi_enabled == 1) { list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list) @@ -3567,7 +3639,11 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) /* Sleep on a wait queue till the cond flag indicates that the * response arrived or timed-out. */ - sleep_cond(&ctx->wc, &ctx->cond); + if (sleep_cond(&ctx->wc, &ctx->cond) == -EINTR) { + dev_err(&octeon_dev->pci_dev->dev, "Wait interrupted\n"); + goto setup_nic_wait_intr; + } + retval = resp->status; if (retval) { dev_err(&octeon_dev->pci_dev->dev, "iq/oq config failed\n"); @@ -3768,6 +3844,8 @@ setup_nic_dev_fail: octeon_free_soft_command(octeon_dev, sc); +setup_nic_wait_intr: + while (i--) { dev_err(&octeon_dev->pci_dev->dev, "NIC ifidx:%d Setup failed\n", i); diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.h b/drivers/net/ethernet/cavium/liquidio/octeon_device.h index 99fc1d899208..07efadc42ab5 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.h @@ -275,6 +275,7 @@ struct octdev_props { /* Each interface in the Octeon device has a network * device pointer (used for OS specific calls). */ + int rx_on; int napi_enabled; int gmxport; struct net_device *netdev; diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_main.h b/drivers/net/ethernet/cavium/liquidio/octeon_main.h index ebeef95ed9b0..366298f7bcb2 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_main.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_main.h @@ -181,22 +181,26 @@ cnnic_numa_alloc_aligned_dma(u32 size, #define cnnic_free_aligned_dma(pci_dev, ptr, size, orig_ptr, dma_addr) \ free_pages(orig_ptr, get_order(size)) -static inline void +static inline int sleep_cond(wait_queue_head_t *wait_queue, int *condition) { + int errno = 0; wait_queue_t we; init_waitqueue_entry(&we, current); add_wait_queue(wait_queue, &we); while (!(READ_ONCE(*condition))) { set_current_state(TASK_INTERRUPTIBLE); - if (signal_pending(current)) + if (signal_pending(current)) { + errno = -EINTR; goto out; + } schedule(); } out: set_current_state(TASK_RUNNING); remove_wait_queue(wait_queue, &we); + return errno; } static inline void From 9bdd46095fb9476323c490674b4c5e1ec61f8a53 Mon Sep 17 00:00:00 2001 From: Raghu Vatsavayi Date: Thu, 1 Sep 2016 11:16:06 -0700 Subject: [PATCH 0681/1715] liquidio: CN23XX IQ access Adds support for Instruction Queue(IQ) index manipulation routines through bar1 of cn23xx. Signed-off-by: Derek Chickles Signed-off-by: Satanand Burla Signed-off-by: Felix Manlunas Signed-off-by: Raghu Vatsavayi Signed-off-by: David S. Miller --- .../cavium/liquidio/cn23xx_pf_device.c | 66 +++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c index 2d812064731a..bddb198c0b74 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c +++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c @@ -887,6 +887,67 @@ static irqreturn_t cn23xx_interrupt_handler(void *dev) return IRQ_HANDLED; } +static void cn23xx_bar1_idx_setup(struct octeon_device *oct, u64 core_addr, + u32 idx, int valid) +{ + u64 bar1; + u64 reg_adr; + + if (!valid) { + reg_adr = lio_pci_readq( + oct, CN23XX_PEM_BAR1_INDEX_REG(oct->pcie_port, idx)); + WRITE_ONCE(bar1, reg_adr); + lio_pci_writeq(oct, (READ_ONCE(bar1) & 0xFFFFFFFEULL), + CN23XX_PEM_BAR1_INDEX_REG(oct->pcie_port, idx)); + reg_adr = lio_pci_readq( + oct, CN23XX_PEM_BAR1_INDEX_REG(oct->pcie_port, idx)); + WRITE_ONCE(bar1, reg_adr); + return; + } + + /* The PEM(0..3)_BAR1_INDEX(0..15)[ADDR_IDX]<23:4> stores + * bits <41:22> of the Core Addr + */ + lio_pci_writeq(oct, (((core_addr >> 22) << 4) | PCI_BAR1_MASK), + CN23XX_PEM_BAR1_INDEX_REG(oct->pcie_port, idx)); + + WRITE_ONCE(bar1, lio_pci_readq( + oct, CN23XX_PEM_BAR1_INDEX_REG(oct->pcie_port, idx))); +} + +static void cn23xx_bar1_idx_write(struct octeon_device *oct, u32 idx, u32 mask) +{ + lio_pci_writeq(oct, mask, + CN23XX_PEM_BAR1_INDEX_REG(oct->pcie_port, idx)); +} + +static u32 cn23xx_bar1_idx_read(struct octeon_device *oct, u32 idx) +{ + return (u32)lio_pci_readq( + oct, CN23XX_PEM_BAR1_INDEX_REG(oct->pcie_port, idx)); +} + +/* always call with lock held */ +static u32 cn23xx_update_read_index(struct octeon_instr_queue *iq) +{ + u32 new_idx; + u32 last_done; + u32 pkt_in_done = readl(iq->inst_cnt_reg); + + last_done = pkt_in_done - iq->pkt_in_done; + iq->pkt_in_done = pkt_in_done; + + /* Modulo of the new index with the IQ size will give us + * the new index. The iq->reset_instr_cnt is always zero for + * cn23xx, so no extra adjustments are needed. + */ + new_idx = (iq->octeon_read_index + + (u32)(last_done & CN23XX_PKT_IN_DONE_CNT_MASK)) % + iq->max_count; + + return new_idx; +} + static void cn23xx_enable_pf_interrupt(struct octeon_device *oct, u8 intr_flag) { struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip; @@ -1063,6 +1124,11 @@ int setup_cn23xx_octeon_pf_device(struct octeon_device *oct) oct->fn_list.soft_reset = cn23xx_pf_soft_reset; oct->fn_list.setup_device_regs = cn23xx_setup_pf_device_regs; + oct->fn_list.update_iq_read_idx = cn23xx_update_read_index; + + oct->fn_list.bar1_idx_setup = cn23xx_bar1_idx_setup; + oct->fn_list.bar1_idx_write = cn23xx_bar1_idx_write; + oct->fn_list.bar1_idx_read = cn23xx_bar1_idx_read; oct->fn_list.enable_interrupt = cn23xx_enable_pf_interrupt; oct->fn_list.disable_interrupt = cn23xx_disable_pf_interrupt; From 5b823514ae31dc50a36454a6a14d40d712dbe8d0 Mon Sep 17 00:00:00 2001 From: Raghu Vatsavayi Date: Thu, 1 Sep 2016 11:16:07 -0700 Subject: [PATCH 0682/1715] liquidio: CN23XX octeon3 instruction Adds support for data path related changes based on octeon3 instruction header(ih3) for cn23xx. Signed-off-by: Derek Chickles Signed-off-by: Satanand Burla Signed-off-by: Felix Manlunas Signed-off-by: Raghu Vatsavayi Signed-off-by: David S. Miller --- .../net/ethernet/cavium/liquidio/lio_main.c | 37 ++-- .../cavium/liquidio/liquidio_common.h | 7 + .../ethernet/cavium/liquidio/octeon_device.c | 31 ++-- .../ethernet/cavium/liquidio/octeon_droq.c | 6 + .../net/ethernet/cavium/liquidio/octeon_nic.c | 27 ++- .../net/ethernet/cavium/liquidio/octeon_nic.h | 4 +- .../cavium/liquidio/request_manager.c | 159 +++++++++++++----- .../cavium/liquidio/response_manager.c | 9 +- 8 files changed, 208 insertions(+), 72 deletions(-) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index 78d322c72030..7a32358e7e22 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -1347,17 +1347,16 @@ static void octeon_destroy_resources(struct octeon_device *oct) case OCT_DEV_RESP_LIST_INIT_DONE: octeon_delete_response_list(oct); - /* fallthrough */ - case OCT_DEV_SC_BUFF_POOL_INIT_DONE: - octeon_free_sc_buffer_pool(oct); - /* fallthrough */ case OCT_DEV_INSTR_QUEUE_INIT_DONE: for (i = 0; i < MAX_OCTEON_INSTR_QUEUES(oct); i++) { - if (!(oct->io_qmask.iq & (1ULL << i))) + if (!(oct->io_qmask.iq & BIT_ULL(i))) continue; octeon_delete_instr_queue(oct, i); } + /* fallthrough */ + case OCT_DEV_SC_BUFF_POOL_INIT_DONE: + octeon_free_sc_buffer_pool(oct); /* fallthrough */ case OCT_DEV_DISPATCH_INIT_DONE: @@ -2929,9 +2928,15 @@ static inline int send_nic_timestamp_pkt(struct octeon_device *oct, sc->callback_arg = finfo->skb; sc->iq_no = ndata->q_no; - len = (u32)((struct octeon_instr_ih2 *)(&sc->cmd.cmd2.ih2))->dlengsz; + if (OCTEON_CN23XX_PF(oct)) + len = (u32)((struct octeon_instr_ih3 *) + (&sc->cmd.cmd3.ih3))->dlengsz; + else + len = (u32)((struct octeon_instr_ih2 *) + (&sc->cmd.cmd2.ih2))->dlengsz; ring_doorbell = 1; + retval = octeon_send_command(oct, sc->iq_no, ring_doorbell, &sc->cmd, sc, len, ndata->reqtype); @@ -3063,7 +3068,10 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev) return NETDEV_TX_BUSY; } - ndata.cmd.cmd2.dptr = dptr; + if (OCTEON_CN23XX_PF(oct)) + ndata.cmd.cmd3.dptr = dptr; + else + ndata.cmd.cmd2.dptr = dptr; finfo->dptr = dptr; ndata.reqtype = REQTYPE_NORESP_NET; @@ -3138,15 +3146,23 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev) g->sg_size, DMA_TO_DEVICE); dptr = g->sg_dma_ptr; - ndata.cmd.cmd2.dptr = dptr; + if (OCTEON_CN23XX_PF(oct)) + ndata.cmd.cmd3.dptr = dptr; + else + ndata.cmd.cmd2.dptr = dptr; finfo->dptr = dptr; finfo->g = g; ndata.reqtype = REQTYPE_NORESP_NET_SG; } - irh = (struct octeon_instr_irh *)&ndata.cmd.cmd2.irh; - tx_info = (union tx_info *)&ndata.cmd.cmd2.ossp[0]; + if (OCTEON_CN23XX_PF(oct)) { + irh = (struct octeon_instr_irh *)&ndata.cmd.cmd3.irh; + tx_info = (union tx_info *)&ndata.cmd.cmd3.ossp[0]; + } else { + irh = (struct octeon_instr_irh *)&ndata.cmd.cmd2.irh; + tx_info = (union tx_info *)&ndata.cmd.cmd2.ossp[0]; + } if (skb_shinfo(skb)->gso_size) { tx_info->s.gso_size = skb_shinfo(skb)->gso_size; @@ -3904,6 +3920,7 @@ static int liquidio_init_nic_module(struct octeon_device *oct) intrmod_cfg->tx_mincnt_trigger = LIO_INTRMOD_TXMINCNT_TRIGGER; intrmod_cfg->rx_frames = CFG_GET_OQ_INTR_PKT(octeon_get_conf(oct)); intrmod_cfg->rx_usecs = CFG_GET_OQ_INTR_TIME(octeon_get_conf(oct)); + intrmod_cfg->tx_frames = CFG_GET_IQ_INTR_PKT(octeon_get_conf(oct)); dev_dbg(&oct->pci_dev->dev, "Network interfaces ready\n"); return retval; diff --git a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h index 8ffd3b8c4d0f..5552f6737423 100644 --- a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h +++ b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h @@ -310,6 +310,13 @@ union octnet_cmd { #define OCTNET_CMD_SIZE (sizeof(union octnet_cmd)) +/*pkiih3 + irh + ossp[0] + ossp[1] + rdp + rptr = 40 bytes */ +#define LIO_SOFTCMDRESP_IH2 40 +#define LIO_SOFTCMDRESP_IH3 (40 + 8) + +#define LIO_PCICMD_O2 24 +#define LIO_PCICMD_O3 (24 + 8) + /* Instruction Header(DPI) - for OCTEON-III models */ struct octeon_instr_ih3 { #ifdef __BIG_ENDIAN_BITFIELD diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.c b/drivers/net/ethernet/cavium/liquidio/octeon_device.c index 52527638d413..85e312381dcc 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.c @@ -793,7 +793,6 @@ int octeon_setup_instr_queues(struct octeon_device *oct) union oct_txpciq txpciq; int numa_node = cpu_to_node(iq_no % num_online_cpus()); - /* this causes queue 0 to be default queue */ if (OCTEON_CN6XXX(oct)) num_descs = CFG_GET_NUM_DEF_TX_DESCS(CHIP_FIELD(oct, cn6xxx, conf)); @@ -816,6 +815,7 @@ int octeon_setup_instr_queues(struct octeon_device *oct) oct->instr_queue[0]->ifidx = 0; txpciq.u64 = 0; txpciq.s.q_no = iq_no; + txpciq.s.pkind = oct->pfvf_hsword.pkind; txpciq.s.use_qpg = 0; txpciq.s.qpg = 0; if (octeon_init_instr_queue(oct, txpciq, num_descs)) { @@ -835,7 +835,6 @@ int octeon_setup_output_queues(struct octeon_device *oct) u32 oq_no = 0; int numa_node = cpu_to_node(oq_no % num_online_cpus()); - /* this causes queue 0 to be default queue */ if (OCTEON_CN6XXX(oct)) { num_descs = CFG_GET_NUM_DEF_RX_DESCS(CHIP_FIELD(oct, cn6xxx, conf)); @@ -863,10 +862,10 @@ int octeon_setup_output_queues(struct octeon_device *oct) void octeon_set_io_queues_off(struct octeon_device *oct) { - /* Disable the i/p and o/p queues for this Octeon. */ - - octeon_write_csr(oct, CN6XXX_SLI_PKT_INSTR_ENB, 0); - octeon_write_csr(oct, CN6XXX_SLI_PKT_OUT_ENB, 0); + if (OCTEON_CN6XXX(oct)) { + octeon_write_csr(oct, CN6XXX_SLI_PKT_INSTR_ENB, 0); + octeon_write_csr(oct, CN6XXX_SLI_PKT_OUT_ENB, 0); + } } void octeon_set_droq_pkt_op(struct octeon_device *oct, @@ -876,14 +875,16 @@ void octeon_set_droq_pkt_op(struct octeon_device *oct, u32 reg_val = 0; /* Disable the i/p and o/p queues for this Octeon. */ - reg_val = octeon_read_csr(oct, CN6XXX_SLI_PKT_OUT_ENB); + if (OCTEON_CN6XXX(oct)) { + reg_val = octeon_read_csr(oct, CN6XXX_SLI_PKT_OUT_ENB); - if (enable) - reg_val = reg_val | (1 << q_no); - else - reg_val = reg_val & (~(1 << q_no)); + if (enable) + reg_val = reg_val | (1 << q_no); + else + reg_val = reg_val & (~(1 << q_no)); - octeon_write_csr(oct, CN6XXX_SLI_PKT_OUT_ENB, reg_val); + octeon_write_csr(oct, CN6XXX_SLI_PKT_OUT_ENB, reg_val); + } } int octeon_init_dispatch_list(struct octeon_device *oct) @@ -1100,6 +1101,12 @@ int octeon_core_drv_init(struct octeon_recv_info *recv_info, void *buf) } oct->fw_info.app_cap_flags = recv_pkt->rh.r_core_drv_init.app_cap_flags; oct->fw_info.app_mode = (u32)recv_pkt->rh.r_core_drv_init.app_mode; + oct->pfvf_hsword.app_mode = (u32)recv_pkt->rh.r_core_drv_init.app_mode; + + oct->pfvf_hsword.pkind = recv_pkt->rh.r_core_drv_init.pkind; + + for (i = 0; i < oct->num_iqs; i++) + oct->instr_queue[i]->txpciq.s.pkind = oct->pfvf_hsword.pkind; atomic_set(&oct->status, OCT_DEV_CORE_OK); diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c index 5dfc23d70d05..8848ce2711fb 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c @@ -31,6 +31,7 @@ #include "octeon_network.h" #include "cn66xx_regs.h" #include "cn66xx_device.h" +#include "cn23xx_pf_device.h" #define CVM_MIN(d1, d2) (((d1) < (d2)) ? (d1) : (d2)) #define CVM_MAX(d1, d2) (((d1) > (d2)) ? (d1) : (d2)) @@ -262,6 +263,11 @@ int octeon_init_droq(struct octeon_device *oct, c_pkts_per_intr = (u32)CFG_GET_OQ_PKTS_PER_INTR(conf6x); c_refill_threshold = (u32)CFG_GET_OQ_REFILL_THRESHOLD(conf6x); + } else if (OCTEON_CN23XX_PF(oct)) { + struct octeon_config *conf23 = CHIP_FIELD(oct, cn23xx_pf, conf); + + c_pkts_per_intr = (u32)CFG_GET_OQ_PKTS_PER_INTR(conf23); + c_refill_threshold = (u32)CFG_GET_OQ_REFILL_THRESHOLD(conf23); } else { return 1; } diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_nic.c b/drivers/net/ethernet/cavium/liquidio/octeon_nic.c index 0c4013de599a..40ac1fe88956 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_nic.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_nic.c @@ -35,6 +35,7 @@ octeon_alloc_soft_command_resp(struct octeon_device *oct, u32 rdatasize) { struct octeon_soft_command *sc; + struct octeon_instr_ih3 *ih3; struct octeon_instr_ih2 *ih2; struct octeon_instr_irh *irh; struct octeon_instr_rdp *rdp; @@ -51,10 +52,19 @@ octeon_alloc_soft_command_resp(struct octeon_device *oct, /* Add in the response related fields. Opcode and Param are already * there. */ - ih2 = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2; - rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp; - irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh; - ih2->fsz = 40; /* irh + ossp[0] + ossp[1] + rdp + rptr = 40 bytes */ + if (OCTEON_CN23XX_PF(oct)) { + ih3 = (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3; + rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd3.rdp; + irh = (struct octeon_instr_irh *)&sc->cmd.cmd3.irh; + /*pkiih3 + irh + ossp[0] + ossp[1] + rdp + rptr = 40 bytes */ + ih3->fsz = LIO_SOFTCMDRESP_IH3; + } else { + ih2 = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2; + rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp; + irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh; + /* irh + ossp[0] + ossp[1] + rdp + rptr = 40 bytes */ + ih2->fsz = LIO_SOFTCMDRESP_IH2; + } irh->rflag = 1; /* a response is required */ @@ -63,7 +73,10 @@ octeon_alloc_soft_command_resp(struct octeon_device *oct, *sc->status_word = COMPLETION_WORD_INIT; - sc->cmd.cmd2.rptr = sc->dmarptr; + if (OCTEON_CN23XX_PF(oct)) + sc->cmd.cmd3.rptr = sc->dmarptr; + else + sc->cmd.cmd2.rptr = sc->dmarptr; sc->wait_time = 1000; sc->timeout = jiffies + sc->wait_time; @@ -179,8 +192,8 @@ octnet_send_nic_ctrl_pkt(struct octeon_device *oct, retval = octeon_send_soft_command(oct, sc); if (retval == IQ_SEND_FAILED) { octeon_free_soft_command(oct, sc); - dev_err(&oct->pci_dev->dev, "%s soft command:%d send failed status: %x\n", - __func__, nctrl->ncmd.s.cmd, retval); + dev_err(&oct->pci_dev->dev, "%s pf_num:%d soft command:%d send failed status: %x\n", + __func__, oct->pf_num, nctrl->ncmd.s.cmd, retval); spin_unlock_bh(&oct->cmd_resp_wqlock); return -1; } diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_nic.h b/drivers/net/ethernet/cavium/liquidio/octeon_nic.h index e55400c91574..4b8da67b995f 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_nic.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_nic.h @@ -138,7 +138,7 @@ octnet_prepare_pci_cmd_o2(struct octeon_device *oct, /* assume that rflag is cleared so therefore front data will only have * irh and ossp[0], ossp[1] for a total of 32 bytes */ - ih2->fsz = 24; + ih2->fsz = LIO_PCICMD_O2; ih2->tagtype = ORDERED_TAG; ih2->grp = DEFAULT_POW_GRP; @@ -196,7 +196,7 @@ octnet_prepare_pci_cmd_o3(struct octeon_device *oct, */ ih3->pkind = oct->instr_queue[setup->s.iq_no]->txpciq.s.pkind; /*PKI IH*/ - ih3->fsz = 24 + 8; + ih3->fsz = LIO_PCICMD_O3; if (!setup->s.gather) { ih3->dlengsz = setup->s.u.datasize; diff --git a/drivers/net/ethernet/cavium/liquidio/request_manager.c b/drivers/net/ethernet/cavium/liquidio/request_manager.c index 3ee5d023c789..90866bb50033 100644 --- a/drivers/net/ethernet/cavium/liquidio/request_manager.c +++ b/drivers/net/ethernet/cavium/liquidio/request_manager.c @@ -30,6 +30,7 @@ #include "octeon_main.h" #include "octeon_network.h" #include "cn66xx_device.h" +#include "cn23xx_pf_device.h" #define INCR_INSTRQUEUE_PKT_COUNT(octeon_dev_ptr, iq_no, field, count) \ (octeon_dev_ptr->instr_queue[iq_no]->stats.field += count) @@ -71,7 +72,8 @@ int octeon_init_instr_queue(struct octeon_device *oct, if (OCTEON_CN6XXX(oct)) conf = &(CFG_GET_IQ_CFG(CHIP_FIELD(oct, cn6xxx, conf))); - + else if (OCTEON_CN23XX_PF(oct)) + conf = &(CFG_GET_IQ_CFG(CHIP_FIELD(oct, cn23xx_pf, conf))); if (!conf) { dev_err(&oct->pci_dev->dev, "Unsupported Chip %x\n", oct->chip_id); @@ -88,6 +90,7 @@ int octeon_init_instr_queue(struct octeon_device *oct, q_size = (u32)conf->instr_type * num_descs; iq = oct->instr_queue[iq_no]; + iq->oct_dev = oct; set_dev_node(&oct->pci_dev->dev, numa_node); @@ -181,6 +184,9 @@ int octeon_delete_instr_queue(struct octeon_device *oct, u32 iq_no) if (OCTEON_CN6XXX(oct)) desc_size = CFG_GET_IQ_INSTR_TYPE(CHIP_FIELD(oct, cn6xxx, conf)); + else if (OCTEON_CN23XX_PF(oct)) + desc_size = + CFG_GET_IQ_INSTR_TYPE(CHIP_FIELD(oct, cn23xx_pf, conf)); vfree(iq->request_list); @@ -383,7 +389,12 @@ lio_process_iq_request_list(struct octeon_device *oct, case REQTYPE_SOFT_COMMAND: sc = buf; - irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh; + if (OCTEON_CN23XX_PF(oct)) + irh = (struct octeon_instr_irh *) + &sc->cmd.cmd3.irh; + else + irh = (struct octeon_instr_irh *) + &sc->cmd.cmd2.irh; if (irh->rflag) { /* We're expecting a response from Octeon. * It's up to lio_process_ordered_list() to @@ -583,6 +594,8 @@ octeon_prepare_soft_command(struct octeon_device *oct, { struct octeon_config *oct_cfg; struct octeon_instr_ih2 *ih2; + struct octeon_instr_ih3 *ih3; + struct octeon_instr_pki_ih3 *pki_ih3; struct octeon_instr_irh *irh; struct octeon_instr_rdp *rdp; @@ -591,36 +604,88 @@ octeon_prepare_soft_command(struct octeon_device *oct, oct_cfg = octeon_get_conf(oct); - ih2 = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2; - ih2->tagtype = ATOMIC_TAG; - ih2->tag = LIO_CONTROL; - ih2->raw = 1; - ih2->grp = CFG_GET_CTRL_Q_GRP(oct_cfg); + if (OCTEON_CN23XX_PF(oct)) { + ih3 = (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3; - if (sc->datasize) { - ih2->dlengsz = sc->datasize; - ih2->rs = 1; - } + ih3->pkind = oct->instr_queue[sc->iq_no]->txpciq.s.pkind; - irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh; - irh->opcode = opcode; - irh->subcode = subcode; + pki_ih3 = (struct octeon_instr_pki_ih3 *)&sc->cmd.cmd3.pki_ih3; - /* opcode/subcode specific parameters (ossp) */ - irh->ossp = irh_ossp; - sc->cmd.cmd2.ossp[0] = ossp0; - sc->cmd.cmd2.ossp[1] = ossp1; + pki_ih3->w = 1; + pki_ih3->raw = 1; + pki_ih3->utag = 1; + pki_ih3->uqpg = + oct->instr_queue[sc->iq_no]->txpciq.s.use_qpg; + pki_ih3->utt = 1; + pki_ih3->tag = LIO_CONTROL; + pki_ih3->tagtype = ATOMIC_TAG; + pki_ih3->qpg = + oct->instr_queue[sc->iq_no]->txpciq.s.qpg; + pki_ih3->pm = 0x7; + pki_ih3->sl = 8; - if (sc->rdatasize) { - rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp; - rdp->pcie_port = oct->pcie_port; - rdp->rlen = sc->rdatasize; + if (sc->datasize) + ih3->dlengsz = sc->datasize; + + irh = (struct octeon_instr_irh *)&sc->cmd.cmd3.irh; + irh->opcode = opcode; + irh->subcode = subcode; + + /* opcode/subcode specific parameters (ossp) */ + irh->ossp = irh_ossp; + sc->cmd.cmd3.ossp[0] = ossp0; + sc->cmd.cmd3.ossp[1] = ossp1; + + if (sc->rdatasize) { + rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd3.rdp; + rdp->pcie_port = oct->pcie_port; + rdp->rlen = sc->rdatasize; + + irh->rflag = 1; + /*PKI IH3*/ + /* pki_ih3 irh+ossp[0]+ossp[1]+rdp+rptr = 48 bytes */ + ih3->fsz = LIO_SOFTCMDRESP_IH3; + } else { + irh->rflag = 0; + /*PKI IH3*/ + /* pki_h3 + irh + ossp[0] + ossp[1] = 32 bytes */ + ih3->fsz = LIO_PCICMD_O3; + } - irh->rflag = 1; - ih2->fsz = 40; /* irh+ossp[0]+ossp[1]+rdp+rptr = 40 bytes */ } else { - irh->rflag = 0; - ih2->fsz = 24; /* irh + ossp[0] + ossp[1] = 24 bytes */ + ih2 = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2; + ih2->tagtype = ATOMIC_TAG; + ih2->tag = LIO_CONTROL; + ih2->raw = 1; + ih2->grp = CFG_GET_CTRL_Q_GRP(oct_cfg); + + if (sc->datasize) { + ih2->dlengsz = sc->datasize; + ih2->rs = 1; + } + + irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh; + irh->opcode = opcode; + irh->subcode = subcode; + + /* opcode/subcode specific parameters (ossp) */ + irh->ossp = irh_ossp; + sc->cmd.cmd2.ossp[0] = ossp0; + sc->cmd.cmd2.ossp[1] = ossp1; + + if (sc->rdatasize) { + rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp; + rdp->pcie_port = oct->pcie_port; + rdp->rlen = sc->rdatasize; + + irh->rflag = 1; + /* irh+ossp[0]+ossp[1]+rdp+rptr = 40 bytes */ + ih2->fsz = LIO_SOFTCMDRESP_IH2; + } else { + irh->rflag = 0; + /* irh + ossp[0] + ossp[1] = 24 bytes */ + ih2->fsz = LIO_PCICMD_O2; + } } } @@ -628,23 +693,39 @@ int octeon_send_soft_command(struct octeon_device *oct, struct octeon_soft_command *sc) { struct octeon_instr_ih2 *ih2; + struct octeon_instr_ih3 *ih3; struct octeon_instr_irh *irh; u32 len; - ih2 = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2; - if (ih2->dlengsz) { - WARN_ON(!sc->dmadptr); - sc->cmd.cmd2.dptr = sc->dmadptr; + if (OCTEON_CN23XX_PF(oct)) { + ih3 = (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3; + if (ih3->dlengsz) { + WARN_ON(!sc->dmadptr); + sc->cmd.cmd3.dptr = sc->dmadptr; + } + irh = (struct octeon_instr_irh *)&sc->cmd.cmd3.irh; + if (irh->rflag) { + WARN_ON(!sc->dmarptr); + WARN_ON(!sc->status_word); + *sc->status_word = COMPLETION_WORD_INIT; + sc->cmd.cmd3.rptr = sc->dmarptr; + } + len = (u32)ih3->dlengsz; + } else { + ih2 = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2; + if (ih2->dlengsz) { + WARN_ON(!sc->dmadptr); + sc->cmd.cmd2.dptr = sc->dmadptr; + } + irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh; + if (irh->rflag) { + WARN_ON(!sc->dmarptr); + WARN_ON(!sc->status_word); + *sc->status_word = COMPLETION_WORD_INIT; + sc->cmd.cmd2.rptr = sc->dmarptr; + } + len = (u32)ih2->dlengsz; } - irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh; - if (irh->rflag) { - WARN_ON(!sc->dmarptr); - WARN_ON(!sc->status_word); - *sc->status_word = COMPLETION_WORD_INIT; - - sc->cmd.cmd2.rptr = sc->dmarptr; - } - len = (u32)ih2->dlengsz; if (sc->wait_time) sc->timeout = jiffies + sc->wait_time; diff --git a/drivers/net/ethernet/cavium/liquidio/response_manager.c b/drivers/net/ethernet/cavium/liquidio/response_manager.c index 709049e36627..be52178d8cb6 100644 --- a/drivers/net/ethernet/cavium/liquidio/response_manager.c +++ b/drivers/net/ethernet/cavium/liquidio/response_manager.c @@ -91,8 +91,13 @@ int lio_process_ordered_list(struct octeon_device *octeon_dev, sc = (struct octeon_soft_command *)ordered_sc_list-> head.next; - rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp; - rptr = sc->cmd.cmd2.rptr; + if (OCTEON_CN23XX_PF(octeon_dev)) { + rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd3.rdp; + rptr = sc->cmd.cmd3.rptr; + } else { + rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp; + rptr = sc->cmd.cmd2.rptr; + } status = OCTEON_REQUEST_PENDING; From dc3abcbeaeb9593d8c0892718dcfe2bd0882832c Mon Sep 17 00:00:00 2001 From: Raghu Vatsavayi Date: Thu, 1 Sep 2016 11:16:08 -0700 Subject: [PATCH 0683/1715] liquidio: ethtool and led control support This patch adds support for some control operations like LED identification, ethtool statistics and intr config for cn23xx device. Signed-off-by: Derek Chickles Signed-off-by: Satanand Burla Signed-off-by: Felix Manlunas Signed-off-by: Raghu Vatsavayi Signed-off-by: David S. Miller --- .../cavium/liquidio/cn23xx_pf_device.h | 2 + .../net/ethernet/cavium/liquidio/lio_core.c | 6 + .../ethernet/cavium/liquidio/lio_ethtool.c | 385 +++++++++++++++++- .../cavium/liquidio/liquidio_common.h | 5 + 4 files changed, 390 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h index 33b758992eb2..21b5c9051967 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h +++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h @@ -51,6 +51,8 @@ int setup_cn23xx_octeon_pf_device(struct octeon_device *oct); int validate_cn23xx_pf_config_info(struct octeon_device *oct, struct octeon_config *conf23xx); +u32 cn23xx_pf_get_oq_ticks(struct octeon_device *oct, u32 time_intr_in_us); + void cn23xx_dump_pf_initialized_regs(struct octeon_device *oct); int cn23xx_fw_loaded(struct octeon_device *oct); diff --git a/drivers/net/ethernet/cavium/liquidio/lio_core.c b/drivers/net/ethernet/cavium/liquidio/lio_core.c index 5fb444da3b9c..201eddb3013a 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_core.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_core.c @@ -163,6 +163,12 @@ void liquidio_link_ctrl_cmd_completion(void *nctrl_ptr) case OCTNET_CMD_GPIO_ACCESS: netif_info(lio, probe, lio->netdev, "LED Flashing visual identification\n"); + + break; + + case OCTNET_CMD_ID_ACTIVE: + netif_info(lio, probe, lio->netdev, "LED Flashing visual identification\n"); + break; case OCTNET_CMD_LRO_ENABLE: diff --git a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c index f3ce7441cb5f..fb29a64b9e51 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c @@ -32,6 +32,7 @@ #include "octeon_network.h" #include "cn66xx_regs.h" #include "cn66xx_device.h" +#include "cn23xx_pf_device.h" static int octnet_get_link_stats(struct net_device *netdev); @@ -75,6 +76,7 @@ enum { #define ARRAY_LENGTH(a) (sizeof(a) / sizeof((a)[0])) #define OCT_ETHTOOL_REGDUMP_LEN 4096 +#define OCT_ETHTOOL_REGDUMP_LEN_23XX (4096 * 11) #define OCT_ETHTOOL_REGSVER 1 /* statistics of PF */ @@ -259,6 +261,13 @@ lio_ethtool_get_channels(struct net_device *dev, max_tx = CFG_GET_IQ_MAX_Q(conf6x); rx_count = CFG_GET_NUM_RXQS_NIC_IF(conf6x, lio->ifidx); tx_count = CFG_GET_NUM_TXQS_NIC_IF(conf6x, lio->ifidx); + } else if (OCTEON_CN23XX_PF(oct)) { + struct octeon_config *conf23 = CHIP_FIELD(oct, cn23xx_pf, conf); + + max_rx = CFG_GET_OQ_MAX_Q(conf23); + max_tx = CFG_GET_IQ_MAX_Q(conf23); + rx_count = CFG_GET_NUM_RXQS_NIC_IF(conf23, lio->ifidx); + tx_count = CFG_GET_NUM_TXQS_NIC_IF(conf23, lio->ifidx); } channel->max_rx = max_rx; @@ -331,6 +340,32 @@ static int octnet_gpio_access(struct net_device *netdev, int addr, int val) return 0; } +static int octnet_id_active(struct net_device *netdev, int val) +{ + struct lio *lio = GET_LIO(netdev); + struct octeon_device *oct = lio->oct_dev; + struct octnic_ctrl_pkt nctrl; + int ret = 0; + + memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt)); + + nctrl.ncmd.u64 = 0; + nctrl.ncmd.s.cmd = OCTNET_CMD_ID_ACTIVE; + nctrl.ncmd.s.param1 = val; + nctrl.iq_no = lio->linfo.txpciq[0].s.q_no; + nctrl.wait_time = 100; + nctrl.netpndev = (u64)netdev; + nctrl.cb_fn = liquidio_link_ctrl_cmd_completion; + + ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl); + if (ret < 0) { + dev_err(&oct->pci_dev->dev, "Failed to configure gpio value\n"); + return -EINVAL; + } + + return 0; +} + /* Callback for when mdio command response arrives */ static void octnet_mdio_resp_callback(struct octeon_device *oct, @@ -474,6 +509,11 @@ static int lio_set_phys_id(struct net_device *netdev, &value); if (ret) return ret; + } else if (oct->chip_id == OCTEON_CN23XX_PF_VID) { + octnet_id_active(netdev, LED_IDENTIFICATION_ON); + + /* returns 0 since updates are asynchronous */ + return 0; } else { return -EINVAL; } @@ -519,7 +559,10 @@ static int lio_set_phys_id(struct net_device *netdev, &lio->phy_beacon_val); if (ret) return ret; + } else if (oct->chip_id == OCTEON_CN23XX_PF_VID) { + octnet_id_active(netdev, LED_IDENTIFICATION_OFF); + return 0; } else { return -EINVAL; } @@ -548,6 +591,13 @@ lio_ethtool_get_ringparam(struct net_device *netdev, rx_max_pending = CN6XXX_MAX_OQ_DESCRIPTORS; rx_pending = CFG_GET_NUM_RX_DESCS_NIC_IF(conf6x, lio->ifidx); tx_pending = CFG_GET_NUM_TX_DESCS_NIC_IF(conf6x, lio->ifidx); + } else if (OCTEON_CN23XX_PF(oct)) { + struct octeon_config *conf23 = CHIP_FIELD(oct, cn23xx_pf, conf); + + tx_max_pending = CN23XX_MAX_IQ_DESCRIPTORS; + rx_max_pending = CN23XX_MAX_OQ_DESCRIPTORS; + rx_pending = CFG_GET_NUM_RX_DESCS_NIC_IF(conf23, lio->ifidx); + tx_pending = CFG_GET_NUM_TX_DESCS_NIC_IF(conf23, lio->ifidx); } if (lio->mtu > OCTNET_DEFAULT_FRM_SIZE - OCTNET_FRM_HEADER_SIZE) { @@ -946,6 +996,16 @@ static int lio_get_intr_coalesce(struct net_device *netdev, intrmod_cfg = &oct->intrmod; switch (oct->chip_id) { + case OCTEON_CN23XX_PF_VID: + if (!intrmod_cfg->rx_enable) { + intr_coal->rx_coalesce_usecs = intrmod_cfg->rx_usecs; + intr_coal->rx_max_coalesced_frames = + intrmod_cfg->rx_frames; + } + if (!intrmod_cfg->tx_enable) + intr_coal->tx_max_coalesced_frames = + intrmod_cfg->tx_frames; + break; case OCTEON_CN68XX: case OCTEON_CN66XX: { struct octeon_cn6xxx *cn6xxx = @@ -981,7 +1041,15 @@ static int lio_get_intr_coalesce(struct net_device *netdev, intr_coal->rx_coalesce_usecs_low = intrmod_cfg->rx_mintmr_trigger; intr_coal->rx_max_coalesced_frames_low = - intrmod_cfg->rx_mincnt_trigger; + intrmod_cfg->rx_mincnt_trigger; + } + if (OCTEON_CN23XX_PF(oct) && + (intrmod_cfg->tx_enable)) { + intr_coal->use_adaptive_tx_coalesce = intrmod_cfg->tx_enable; + intr_coal->tx_max_coalesced_frames_high = + intrmod_cfg->tx_maxcnt_trigger; + intr_coal->tx_max_coalesced_frames_low = + intrmod_cfg->tx_mincnt_trigger; } return 0; } @@ -1058,11 +1126,11 @@ static void octnet_nic_stats_callback(struct octeon_device *oct_dev, u32 status, void *ptr) { - struct octeon_soft_command *sc = (struct octeon_soft_command *)ptr; - struct oct_nic_stats_resp *resp = (struct oct_nic_stats_resp *) - sc->virtrptr; - struct oct_nic_stats_ctrl *ctrl = (struct oct_nic_stats_ctrl *) - sc->ctxptr; + struct octeon_soft_command *sc = (struct octeon_soft_command *)ptr; + struct oct_nic_stats_resp *resp = + (struct oct_nic_stats_resp *)sc->virtrptr; + struct oct_nic_stats_ctrl *ctrl = + (struct oct_nic_stats_ctrl *)sc->ctxptr; struct nic_rx_stats *rsp_rstats = &resp->stats.fromwire; struct nic_tx_stats *rsp_tstats = &resp->stats.fromhost; @@ -1312,6 +1380,27 @@ oct_cfg_rx_intrcnt(struct lio *lio, struct ethtool_coalesce *intr_coal) CFG_SET_OQ_INTR_PKT(cn6xxx->conf, rx_max_coalesced_frames); break; } + case OCTEON_CN23XX_PF_VID: { + int q_no; + + if (!intr_coal->rx_max_coalesced_frames) + rx_max_coalesced_frames = oct->intrmod.rx_frames; + else + rx_max_coalesced_frames = + intr_coal->rx_max_coalesced_frames; + for (q_no = 0; q_no < oct->num_oqs; q_no++) { + q_no += oct->sriov_info.pf_srn; + octeon_write_csr64( + oct, CN23XX_SLI_OQ_PKT_INT_LEVELS(q_no), + (octeon_read_csr64( + oct, CN23XX_SLI_OQ_PKT_INT_LEVELS(q_no)) & + (0x3fffff00000000UL)) | + rx_max_coalesced_frames); + /*consider setting resend bit*/ + } + oct->intrmod.rx_frames = rx_max_coalesced_frames; + break; + } default: return -EINVAL; } @@ -1344,6 +1433,27 @@ static int oct_cfg_rx_intrtime(struct lio *lio, CFG_SET_OQ_INTR_TIME(cn6xxx->conf, rx_coalesce_usecs); break; } + case OCTEON_CN23XX_PF_VID: { + u64 time_threshold; + int q_no; + + if (!intr_coal->rx_coalesce_usecs) + rx_coalesce_usecs = oct->intrmod.rx_usecs; + else + rx_coalesce_usecs = intr_coal->rx_coalesce_usecs; + time_threshold = + cn23xx_pf_get_oq_ticks(oct, (u32)rx_coalesce_usecs); + for (q_no = 0; q_no < oct->num_oqs; q_no++) { + q_no += oct->sriov_info.pf_srn; + octeon_write_csr64(oct, + CN23XX_SLI_OQ_PKT_INT_LEVELS(q_no), + (oct->intrmod.rx_frames | + (time_threshold << 32))); + /*consider writing to resend bit here*/ + } + oct->intrmod.rx_usecs = rx_coalesce_usecs; + break; + } default: return -EINVAL; } @@ -1356,12 +1466,37 @@ oct_cfg_tx_intrcnt(struct lio *lio, struct ethtool_coalesce *intr_coal __attribute__((unused))) { struct octeon_device *oct = lio->oct_dev; + u32 iq_intr_pkt; + void __iomem *inst_cnt_reg; + u64 val; /* Config Cnt based interrupt values */ switch (oct->chip_id) { case OCTEON_CN68XX: case OCTEON_CN66XX: break; + case OCTEON_CN23XX_PF_VID: { + int q_no; + + if (!intr_coal->tx_max_coalesced_frames) + iq_intr_pkt = CN23XX_DEF_IQ_INTR_THRESHOLD & + CN23XX_PKT_IN_DONE_WMARK_MASK; + else + iq_intr_pkt = intr_coal->tx_max_coalesced_frames & + CN23XX_PKT_IN_DONE_WMARK_MASK; + for (q_no = 0; q_no < oct->num_iqs; q_no++) { + inst_cnt_reg = (oct->instr_queue[q_no])->inst_cnt_reg; + val = readq(inst_cnt_reg); + /*clear wmark and count.dont want to write count back*/ + val = (val & 0xFFFF000000000000ULL) | + ((u64)iq_intr_pkt + << CN23XX_PKT_IN_DONE_WMARK_BIT_POS); + writeq(val, inst_cnt_reg); + /*consider setting resend bit*/ + } + oct->intrmod.tx_frames = iq_intr_pkt; + break; + } default: return -EINVAL; } @@ -1397,6 +1532,8 @@ static int lio_set_intr_coalesce(struct net_device *netdev, return -EINVAL; } break; + case OCTEON_CN23XX_PF_VID: + break; default: return -EINVAL; } @@ -1539,9 +1676,237 @@ static int lio_nway_reset(struct net_device *netdev) } /* Return register dump len. */ -static int lio_get_regs_len(struct net_device *dev __attribute__((unused))) +static int lio_get_regs_len(struct net_device *dev) { - return OCT_ETHTOOL_REGDUMP_LEN; + struct lio *lio = GET_LIO(dev); + struct octeon_device *oct = lio->oct_dev; + + switch (oct->chip_id) { + case OCTEON_CN23XX_PF_VID: + return OCT_ETHTOOL_REGDUMP_LEN_23XX; + default: + return OCT_ETHTOOL_REGDUMP_LEN; + } +} + +static int cn23xx_read_csr_reg(char *s, struct octeon_device *oct) +{ + u32 reg; + u8 pf_num = oct->pf_num; + int len = 0; + int i; + + /* PCI Window Registers */ + + len += sprintf(s + len, "\n\t Octeon CSR Registers\n\n"); + + /*0x29030 or 0x29040*/ + reg = CN23XX_SLI_PKT_MAC_RINFO64(oct->pcie_port, oct->pf_num); + len += sprintf(s + len, + "\n[%08x] (SLI_PKT_MAC%d_PF%d_RINFO): %016llx\n", + reg, oct->pcie_port, oct->pf_num, + (u64)octeon_read_csr64(oct, reg)); + + /*0x27080 or 0x27090*/ + reg = CN23XX_SLI_MAC_PF_INT_ENB64(oct->pcie_port, oct->pf_num); + len += + sprintf(s + len, "\n[%08x] (SLI_MAC%d_PF%d_INT_ENB): %016llx\n", + reg, oct->pcie_port, oct->pf_num, + (u64)octeon_read_csr64(oct, reg)); + + /*0x27000 or 0x27010*/ + reg = CN23XX_SLI_MAC_PF_INT_SUM64(oct->pcie_port, oct->pf_num); + len += + sprintf(s + len, "\n[%08x] (SLI_MAC%d_PF%d_INT_SUM): %016llx\n", + reg, oct->pcie_port, oct->pf_num, + (u64)octeon_read_csr64(oct, reg)); + + /*0x29120*/ + reg = 0x29120; + len += sprintf(s + len, "\n[%08x] (SLI_PKT_MEM_CTL): %016llx\n", reg, + (u64)octeon_read_csr64(oct, reg)); + + /*0x27300*/ + reg = 0x27300 + oct->pcie_port * CN23XX_MAC_INT_OFFSET + + (oct->pf_num) * CN23XX_PF_INT_OFFSET; + len += sprintf( + s + len, "\n[%08x] (SLI_MAC%d_PF%d_PKT_VF_INT): %016llx\n", reg, + oct->pcie_port, oct->pf_num, (u64)octeon_read_csr64(oct, reg)); + + /*0x27200*/ + reg = 0x27200 + oct->pcie_port * CN23XX_MAC_INT_OFFSET + + (oct->pf_num) * CN23XX_PF_INT_OFFSET; + len += sprintf(s + len, + "\n[%08x] (SLI_MAC%d_PF%d_PP_VF_INT): %016llx\n", + reg, oct->pcie_port, oct->pf_num, + (u64)octeon_read_csr64(oct, reg)); + + /*29130*/ + reg = CN23XX_SLI_PKT_CNT_INT; + len += sprintf(s + len, "\n[%08x] (SLI_PKT_CNT_INT): %016llx\n", reg, + (u64)octeon_read_csr64(oct, reg)); + + /*0x29140*/ + reg = CN23XX_SLI_PKT_TIME_INT; + len += sprintf(s + len, "\n[%08x] (SLI_PKT_TIME_INT): %016llx\n", reg, + (u64)octeon_read_csr64(oct, reg)); + + /*0x29160*/ + reg = 0x29160; + len += sprintf(s + len, "\n[%08x] (SLI_PKT_INT): %016llx\n", reg, + (u64)octeon_read_csr64(oct, reg)); + + /*0x29180*/ + reg = CN23XX_SLI_OQ_WMARK; + len += sprintf(s + len, "\n[%08x] (SLI_PKT_OUTPUT_WMARK): %016llx\n", + reg, (u64)octeon_read_csr64(oct, reg)); + + /*0x291E0*/ + reg = CN23XX_SLI_PKT_IOQ_RING_RST; + len += sprintf(s + len, "\n[%08x] (SLI_PKT_RING_RST): %016llx\n", reg, + (u64)octeon_read_csr64(oct, reg)); + + /*0x29210*/ + reg = CN23XX_SLI_GBL_CONTROL; + len += sprintf(s + len, + "\n[%08x] (SLI_PKT_GBL_CONTROL): %016llx\n", reg, + (u64)octeon_read_csr64(oct, reg)); + + /*0x29220*/ + reg = 0x29220; + len += sprintf(s + len, "\n[%08x] (SLI_PKT_BIST_STATUS): %016llx\n", + reg, (u64)octeon_read_csr64(oct, reg)); + + /*PF only*/ + if (pf_num == 0) { + /*0x29260*/ + reg = CN23XX_SLI_OUT_BP_EN_W1S; + len += sprintf(s + len, + "\n[%08x] (SLI_PKT_OUT_BP_EN_W1S): %016llx\n", + reg, (u64)octeon_read_csr64(oct, reg)); + } else if (pf_num == 1) { + /*0x29270*/ + reg = CN23XX_SLI_OUT_BP_EN2_W1S; + len += sprintf(s + len, + "\n[%08x] (SLI_PKT_OUT_BP_EN2_W1S): %016llx\n", + reg, (u64)octeon_read_csr64(oct, reg)); + } + + for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) { + reg = CN23XX_SLI_OQ_BUFF_INFO_SIZE(i); + len += + sprintf(s + len, "\n[%08x] (SLI_PKT%d_OUT_SIZE): %016llx\n", + reg, i, (u64)octeon_read_csr64(oct, reg)); + } + + /*0x10040*/ + for (i = 0; i < CN23XX_MAX_INPUT_QUEUES; i++) { + reg = CN23XX_SLI_IQ_INSTR_COUNT64(i); + len += sprintf(s + len, + "\n[%08x] (SLI_PKT_IN_DONE%d_CNTS): %016llx\n", + reg, i, (u64)octeon_read_csr64(oct, reg)); + } + + /*0x10080*/ + for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) { + reg = CN23XX_SLI_OQ_PKTS_CREDIT(i); + len += sprintf(s + len, + "\n[%08x] (SLI_PKT%d_SLIST_BAOFF_DBELL): %016llx\n", + reg, i, (u64)octeon_read_csr64(oct, reg)); + } + + /*0x10090*/ + for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) { + reg = CN23XX_SLI_OQ_SIZE(i); + len += sprintf( + s + len, "\n[%08x] (SLI_PKT%d_SLIST_FIFO_RSIZE): %016llx\n", + reg, i, (u64)octeon_read_csr64(oct, reg)); + } + + /*0x10050*/ + for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) { + reg = CN23XX_SLI_OQ_PKT_CONTROL(i); + len += sprintf( + s + len, + "\n[%08x] (SLI_PKT%d__OUTPUT_CONTROL): %016llx\n", + reg, i, (u64)octeon_read_csr64(oct, reg)); + } + + /*0x10070*/ + for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) { + reg = CN23XX_SLI_OQ_BASE_ADDR64(i); + len += sprintf(s + len, + "\n[%08x] (SLI_PKT%d_SLIST_BADDR): %016llx\n", + reg, i, (u64)octeon_read_csr64(oct, reg)); + } + + /*0x100a0*/ + for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) { + reg = CN23XX_SLI_OQ_PKT_INT_LEVELS(i); + len += sprintf(s + len, + "\n[%08x] (SLI_PKT%d_INT_LEVELS): %016llx\n", + reg, i, (u64)octeon_read_csr64(oct, reg)); + } + + /*0x100b0*/ + for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) { + reg = CN23XX_SLI_OQ_PKTS_SENT(i); + len += sprintf(s + len, "\n[%08x] (SLI_PKT%d_CNTS): %016llx\n", + reg, i, (u64)octeon_read_csr64(oct, reg)); + } + + /*0x100c0*/ + for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) { + reg = 0x100c0 + i * CN23XX_OQ_OFFSET; + len += sprintf(s + len, + "\n[%08x] (SLI_PKT%d_ERROR_INFO): %016llx\n", + reg, i, (u64)octeon_read_csr64(oct, reg)); + + /*0x10000*/ + for (i = 0; i < CN23XX_MAX_INPUT_QUEUES; i++) { + reg = CN23XX_SLI_IQ_PKT_CONTROL64(i); + len += sprintf( + s + len, + "\n[%08x] (SLI_PKT%d_INPUT_CONTROL): %016llx\n", + reg, i, (u64)octeon_read_csr64(oct, reg)); + } + + /*0x10010*/ + for (i = 0; i < CN23XX_MAX_INPUT_QUEUES; i++) { + reg = CN23XX_SLI_IQ_BASE_ADDR64(i); + len += sprintf( + s + len, + "\n[%08x] (SLI_PKT%d_INSTR_BADDR): %016llx\n", reg, + i, (u64)octeon_read_csr64(oct, reg)); + } + + /*0x10020*/ + for (i = 0; i < CN23XX_MAX_INPUT_QUEUES; i++) { + reg = CN23XX_SLI_IQ_DOORBELL(i); + len += sprintf( + s + len, + "\n[%08x] (SLI_PKT%d_INSTR_BAOFF_DBELL): %016llx\n", + reg, i, (u64)octeon_read_csr64(oct, reg)); + } + + /*0x10030*/ + for (i = 0; i < CN23XX_MAX_INPUT_QUEUES; i++) { + reg = CN23XX_SLI_IQ_SIZE(i); + len += sprintf( + s + len, + "\n[%08x] (SLI_PKT%d_INSTR_FIFO_RSIZE): %016llx\n", + reg, i, (u64)octeon_read_csr64(oct, reg)); + } + + /*0x10040*/ + for (i = 0; i < CN23XX_MAX_INPUT_QUEUES; i++) + reg = CN23XX_SLI_IQ_INSTR_COUNT64(i); + len += sprintf(s + len, + "\n[%08x] (SLI_PKT_IN_DONE%d_CNTS): %016llx\n", + reg, i, (u64)octeon_read_csr64(oct, reg)); + } + + return len; } static int cn6xxx_read_csr_reg(char *s, struct octeon_device *oct) @@ -1686,6 +2051,10 @@ static void lio_get_regs(struct net_device *dev, regs->version = OCT_ETHTOOL_REGSVER; switch (oct->chip_id) { + case OCTEON_CN23XX_PF_VID: + memset(regbuf, 0, OCT_ETHTOOL_REGDUMP_LEN_23XX); + len += cn23xx_read_csr_reg(regbuf + len, oct); + break; case OCTEON_CN68XX: case OCTEON_CN66XX: memset(regbuf, 0, OCT_ETHTOOL_REGDUMP_LEN); diff --git a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h index 5552f6737423..0d990accb65e 100644 --- a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h +++ b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h @@ -232,6 +232,9 @@ static inline void add_sg_size(struct octeon_sg_entry *sg_entry, #define OCTNET_CMD_ADD_VLAN_FILTER 0x17 #define OCTNET_CMD_DEL_VLAN_FILTER 0x18 #define OCTNET_CMD_VXLAN_PORT_CONFIG 0x19 + +#define OCTNET_CMD_ID_ACTIVE 0x1a + #define OCTNET_CMD_VXLAN_PORT_ADD 0x0 #define OCTNET_CMD_VXLAN_PORT_DEL 0x1 #define OCTNET_CMD_RXCSUM_ENABLE 0x0 @@ -835,6 +838,8 @@ struct oct_link_stats { #define VITESSE_PHY_GPIO_DRIVEOFF 0x4 #define VITESSE_PHY_GPIO_HIGH 0x2 #define VITESSE_PHY_GPIO_LOW 0x3 +#define LED_IDENTIFICATION_ON 0x1 +#define LED_IDENTIFICATION_OFF 0x0 struct oct_mdio_cmd { u64 op; From 9ff1a9bad867215e4a7ceeef4e9311d1232902fa Mon Sep 17 00:00:00 2001 From: Raghu Vatsavayi Date: Thu, 1 Sep 2016 11:16:09 -0700 Subject: [PATCH 0684/1715] liquidio: CN23XX health monitoring Adds support for watchdog based health monitoring of octeon cores on cn23xx device. Signed-off-by: Derek Chickles Signed-off-by: Satanand Burla Signed-off-by: Felix Manlunas Signed-off-by: Raghu Vatsavayi Signed-off-by: David S. Miller --- .../net/ethernet/cavium/liquidio/lio_main.c | 126 +++++++++++++++++- .../ethernet/cavium/liquidio/octeon_device.h | 2 + .../ethernet/cavium/liquidio/octeon_network.h | 6 + 3 files changed, 132 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index 7a32358e7e22..a3910a631284 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "liquidio_common.h" #include "octeon_droq.h" #include "octeon_iq.h" @@ -948,8 +949,6 @@ static void update_txq_status(struct octeon_device *oct, int iq_num) struct lio *lio; struct octeon_instr_queue *iq = oct->instr_queue[iq_num]; - /*octeon_update_iq_read_idx(oct, iq);*/ - netdev = oct->props[iq->ifidx].netdev; /* This is needed because the first IQ does not have @@ -1187,6 +1186,102 @@ static int octeon_setup_interrupt(struct octeon_device *oct) return 0; } +static int liquidio_watchdog(void *param) +{ + u64 wdog; + u16 mask_of_stuck_cores = 0; + u16 mask_of_crashed_cores = 0; + int core_num; + u8 core_is_stuck[LIO_MAX_CORES]; + u8 core_crashed[LIO_MAX_CORES]; + struct octeon_device *oct = param; + + memset(core_is_stuck, 0, sizeof(core_is_stuck)); + memset(core_crashed, 0, sizeof(core_crashed)); + + while (!kthread_should_stop()) { + mask_of_crashed_cores = + (u16)octeon_read_csr64(oct, CN23XX_SLI_SCRATCH2); + + for (core_num = 0; core_num < LIO_MAX_CORES; core_num++) { + if (!core_is_stuck[core_num]) { + wdog = lio_pci_readq(oct, CIU3_WDOG(core_num)); + + /* look at watchdog state field */ + wdog &= CIU3_WDOG_MASK; + if (wdog) { + /* this watchdog timer has expired */ + core_is_stuck[core_num] = + LIO_MONITOR_WDOG_EXPIRE; + mask_of_stuck_cores |= (1 << core_num); + } + } + + if (!core_crashed[core_num]) + core_crashed[core_num] = + (mask_of_crashed_cores >> core_num) & 1; + } + + if (mask_of_stuck_cores) { + for (core_num = 0; core_num < LIO_MAX_CORES; + core_num++) { + if (core_is_stuck[core_num] == 1) { + dev_err(&oct->pci_dev->dev, + "ERROR: Octeon core %d is stuck!\n", + core_num); + /* 2 means we have printk'd an error + * so no need to repeat the same printk + */ + core_is_stuck[core_num] = + LIO_MONITOR_CORE_STUCK_MSGD; + } + } + } + + if (mask_of_crashed_cores) { + for (core_num = 0; core_num < LIO_MAX_CORES; + core_num++) { + if (core_crashed[core_num] == 1) { + dev_err(&oct->pci_dev->dev, + "ERROR: Octeon core %d crashed! See oct-fwdump for details.\n", + core_num); + /* 2 means we have printk'd an error + * so no need to repeat the same printk + */ + core_crashed[core_num] = + LIO_MONITOR_CORE_STUCK_MSGD; + } + } + } +#ifdef CONFIG_MODULE_UNLOAD + if (mask_of_stuck_cores || mask_of_crashed_cores) { + /* make module refcount=0 so that rmmod will work */ + long refcount; + + refcount = module_refcount(THIS_MODULE); + + while (refcount > 0) { + module_put(THIS_MODULE); + refcount = module_refcount(THIS_MODULE); + } + + /* compensate for and withstand an unlikely (but still + * possible) race condition + */ + while (refcount < 0) { + try_module_get(THIS_MODULE); + refcount = module_refcount(THIS_MODULE); + } + } +#endif + /* sleep for two seconds */ + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(2 * HZ); + } + + return 0; +} + /** * \brief PCI probe handler * @param pdev PCI device structure @@ -1232,6 +1327,30 @@ liquidio_probe(struct pci_dev *pdev, return -ENOMEM; } + if (OCTEON_CN23XX_PF(oct_dev)) { + u64 scratch1; + u8 bus, device, function; + + scratch1 = octeon_read_csr64(oct_dev, CN23XX_SLI_SCRATCH1); + if (!(scratch1 & 4ULL)) { + /* Bit 2 of SLI_SCRATCH_1 is a flag that indicates that + * the lio watchdog kernel thread is running for this + * NIC. Each NIC gets one watchdog kernel thread. + */ + scratch1 |= 4ULL; + octeon_write_csr64(oct_dev, CN23XX_SLI_SCRATCH1, + scratch1); + + bus = pdev->bus->number; + device = PCI_SLOT(pdev->devfn); + function = PCI_FUNC(pdev->devfn); + oct_dev->watchdog_task = kthread_create( + liquidio_watchdog, oct_dev, + "liowd/%02hhx:%02hhx.%hhx", bus, device, function); + wake_up_process(oct_dev->watchdog_task); + } + } + oct_dev->rx_pause = 1; oct_dev->tx_pause = 1; @@ -1564,6 +1683,9 @@ static void liquidio_remove(struct pci_dev *pdev) dev_dbg(&oct_dev->pci_dev->dev, "Stopping device\n"); + if (oct_dev->watchdog_task) + kthread_stop(oct_dev->watchdog_task); + if (oct_dev->app_mode && (oct_dev->app_mode == CVM_DRV_NIC_APP)) liquidio_stop_nic_module(oct_dev); diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.h b/drivers/net/ethernet/cavium/liquidio/octeon_device.h index 07efadc42ab5..6062ff332ad2 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.h @@ -484,6 +484,8 @@ struct octeon_device { /* private flags to control driver-specific features through ethtool */ u32 priv_flags; + + void *watchdog_task; }; #define OCT_DRV_ONLINE 1 diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h index d3b68d8279e2..e5d1debd05ad 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h @@ -131,6 +131,12 @@ struct lio { #define LIO_SIZE (sizeof(struct lio)) #define GET_LIO(netdev) ((struct lio *)netdev_priv(netdev)) +#define CIU3_WDOG(c) (0x1010000020000ULL + (c << 3)) +#define CIU3_WDOG_MASK 12ULL +#define LIO_MONITOR_WDOG_EXPIRE 1 +#define LIO_MONITOR_CORE_STUCK_MSGD 2 +#define LIO_MAX_CORES 12 + /** * \brief Enable or disable feature * @param netdev pointer to network device From 9ded1a512f9de8d47074d208b41dead3c267fcee Mon Sep 17 00:00:00 2001 From: Raghu Vatsavayi Date: Thu, 1 Sep 2016 11:16:10 -0700 Subject: [PATCH 0685/1715] liquidio: CN23XX napi support This patch adds NAPI related support for cn23xx device. Signed-off-by: Derek Chickles Signed-off-by: Satanand Burla Signed-off-by: Felix Manlunas Signed-off-by: Raghu Vatsavayi Signed-off-by: David S. Miller --- .../net/ethernet/cavium/liquidio/lio_main.c | 10 ++++++---- .../ethernet/cavium/liquidio/octeon_device.c | 19 +++++++++++++++++++ .../ethernet/cavium/liquidio/octeon_droq.c | 7 +++++-- 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index a3910a631284..a2460e52fca9 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -1001,8 +1001,7 @@ int liquidio_schedule_msix_droq_pkt_handler(struct octeon_droq *droq, u64 ret) * \brief Droq packet processor sceduler * @param oct octeon device */ -static -void liquidio_schedule_droq_pkt_handlers(struct octeon_device *oct) +static void liquidio_schedule_droq_pkt_handlers(struct octeon_device *oct) { struct octeon_device_priv *oct_priv = (struct octeon_device_priv *)oct->priv; @@ -2378,11 +2377,14 @@ static void napi_schedule_wrapper(void *param) */ static void liquidio_napi_drv_callback(void *arg) { + struct octeon_device *oct; struct octeon_droq *droq = arg; int this_cpu = smp_processor_id(); - if (droq->cpu_id == this_cpu) { - napi_schedule(&droq->napi); + oct = droq->oct_dev; + + if (OCTEON_CN23XX_PF(oct) || droq->cpu_id == this_cpu) { + napi_schedule_irqoff(&droq->napi); } else { struct call_single_data *csd = &droq->csd; diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.c b/drivers/net/ethernet/cavium/liquidio/octeon_device.c index 85e312381dcc..586b68899b06 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.c @@ -1301,17 +1301,36 @@ int lio_get_device_id(void *dev) void lio_enable_irq(struct octeon_droq *droq, struct octeon_instr_queue *iq) { + u64 instr_cnt; + struct octeon_device *oct = NULL; + /* the whole thing needs to be atomic, ideally */ if (droq) { spin_lock_bh(&droq->lock); writel(droq->pkt_count, droq->pkts_sent_reg); droq->pkt_count = 0; spin_unlock_bh(&droq->lock); + oct = droq->oct_dev; } if (iq) { spin_lock_bh(&iq->lock); writel(iq->pkt_in_done, iq->inst_cnt_reg); iq->pkt_in_done = 0; spin_unlock_bh(&iq->lock); + oct = iq->oct_dev; + } + /*write resend. Writing RESEND in SLI_PKTX_CNTS should be enough + *to trigger tx interrupts as well, if they are pending. + */ + if (oct && OCTEON_CN23XX_PF(oct)) { + if (droq) + writeq(CN23XX_INTR_RESEND, droq->pkts_sent_reg); + /*we race with firmrware here. read and write the IN_DONE_CNTS*/ + else if (iq) { + instr_cnt = readq(iq->inst_cnt_reg); + writeq(((instr_cnt & 0xFFFFFFFF00000000ULL) | + CN23XX_INTR_RESEND), + iq->inst_cnt_reg); + } } } diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c index 8848ce2711fb..f60e5320daf4 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c @@ -573,7 +573,7 @@ octeon_droq_dispatch_pkt(struct octeon_device *oct, (unsigned int)rh->r.opcode, (unsigned int)rh->r.subcode); droq->stats.dropped_nodispatch++; - } /* else (dispatch_fn ... */ + } return cnt; } @@ -887,8 +887,11 @@ octeon_process_droq_poll_cmd(struct octeon_device *oct, u32 q_no, int cmd, return 0; } break; + case OCTEON_CN23XX_PF_VID: { + lio_enable_irq(oct->droq[q_no], oct->instr_queue[q_no]); + } + break; } - return 0; } From 30136395a2f63e1aca9a62bfd631feb3eb213428 Mon Sep 17 00:00:00 2001 From: Raghu Vatsavayi Date: Thu, 1 Sep 2016 11:16:11 -0700 Subject: [PATCH 0686/1715] liquidio:CN23XX pause frame support Adds support for pause frame and priv flag for cn23xx device. Signed-off-by: Derek Chickles Signed-off-by: Satanand Burla Signed-off-by: Felix Manlunas Signed-off-by: Raghu Vatsavayi Signed-off-by: David S. Miller --- .../ethernet/cavium/liquidio/lio_ethtool.c | 110 ++++++++++++++++++ .../net/ethernet/cavium/liquidio/lio_main.c | 12 +- .../ethernet/cavium/liquidio/octeon_device.h | 11 +- 3 files changed, 126 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c index fb29a64b9e51..f163e0abbeb2 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c @@ -190,6 +190,10 @@ static const char oct_droq_stats_strings[][ETH_GSTRING_LEN] = { "buffer_alloc_failure", }; +/* LiquidIO driver private flags */ +static const char oct_priv_flags_strings[][ETH_GSTRING_LEN] = { +}; + #define OCTNIC_NCMD_AUTONEG_ON 0x1 #define OCTNIC_NCMD_PHY_ON 0x2 @@ -658,6 +662,69 @@ lio_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) pause->rx_pause = oct->rx_pause; } +static int +lio_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) +{ + /* Notes: Not supporting any auto negotiation in these + * drivers. + */ + struct lio *lio = GET_LIO(netdev); + struct octeon_device *oct = lio->oct_dev; + struct octnic_ctrl_pkt nctrl; + struct oct_link_info *linfo = &lio->linfo; + + int ret = 0; + + if (oct->chip_id != OCTEON_CN23XX_PF_VID) + return -EINVAL; + + if (linfo->link.s.duplex == 0) { + /*no flow control for half duplex*/ + if (pause->rx_pause || pause->tx_pause) + return -EINVAL; + } + + /*do not support autoneg of link flow control*/ + if (pause->autoneg == AUTONEG_ENABLE) + return -EINVAL; + + memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt)); + + nctrl.ncmd.u64 = 0; + nctrl.ncmd.s.cmd = OCTNET_CMD_SET_FLOW_CTL; + nctrl.iq_no = lio->linfo.txpciq[0].s.q_no; + nctrl.wait_time = 100; + nctrl.netpndev = (u64)netdev; + nctrl.cb_fn = liquidio_link_ctrl_cmd_completion; + + if (pause->rx_pause) { + /*enable rx pause*/ + nctrl.ncmd.s.param1 = 1; + } else { + /*disable rx pause*/ + nctrl.ncmd.s.param1 = 0; + } + + if (pause->tx_pause) { + /*enable tx pause*/ + nctrl.ncmd.s.param2 = 1; + } else { + /*disable tx pause*/ + nctrl.ncmd.s.param2 = 0; + } + + ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl); + if (ret < 0) { + dev_err(&oct->pci_dev->dev, "Failed to set pause parameter\n"); + return -EINVAL; + } + + oct->rx_pause = pause->rx_pause; + oct->tx_pause = pause->tx_pause; + + return 0; +} + static void lio_get_ethtool_stats(struct net_device *netdev, struct ethtool_stats *stats __attribute__((unused)), @@ -925,6 +992,27 @@ lio_get_ethtool_stats(struct net_device *netdev, } } +static void lio_get_priv_flags_strings(struct lio *lio, u8 *data) +{ + struct octeon_device *oct_dev = lio->oct_dev; + int i; + + switch (oct_dev->chip_id) { + case OCTEON_CN23XX_PF_VID: + for (i = 0; i < ARRAY_SIZE(oct_priv_flags_strings); i++) { + sprintf(data, "%s", oct_priv_flags_strings[i]); + data += ETH_GSTRING_LEN; + } + break; + case OCTEON_CN68XX: + case OCTEON_CN66XX: + break; + default: + netif_info(lio, drv, lio->netdev, "Unknown Chip !!\n"); + break; + } +} + static void lio_get_strings(struct net_device *netdev, u32 stringset, u8 *data) { struct lio *lio = GET_LIO(netdev); @@ -964,12 +1052,31 @@ static void lio_get_strings(struct net_device *netdev, u32 stringset, u8 *data) } break; + case ETH_SS_PRIV_FLAGS: + lio_get_priv_flags_strings(lio, data); + break; default: netif_info(lio, drv, lio->netdev, "Unknown Stringset !!\n"); break; } } +static int lio_get_priv_flags_ss_count(struct lio *lio) +{ + struct octeon_device *oct_dev = lio->oct_dev; + + switch (oct_dev->chip_id) { + case OCTEON_CN23XX_PF_VID: + return ARRAY_SIZE(oct_priv_flags_strings); + case OCTEON_CN68XX: + case OCTEON_CN66XX: + return -EOPNOTSUPP; + default: + netif_info(lio, drv, lio->netdev, "Unknown Chip !!\n"); + return -EOPNOTSUPP; + } +} + static int lio_get_sset_count(struct net_device *netdev, int sset) { struct lio *lio = GET_LIO(netdev); @@ -980,6 +1087,8 @@ static int lio_get_sset_count(struct net_device *netdev, int sset) return (ARRAY_SIZE(oct_stats_strings) + ARRAY_SIZE(oct_iq_stats_strings) * oct_dev->num_iqs + ARRAY_SIZE(oct_droq_stats_strings) * oct_dev->num_oqs); + case ETH_SS_PRIV_FLAGS: + return lio_get_priv_flags_ss_count(lio); default: return -EOPNOTSUPP; } @@ -2096,6 +2205,7 @@ static const struct ethtool_ops lio_ethtool_ops = { .get_strings = lio_get_strings, .get_ethtool_stats = lio_get_ethtool_stats, .get_pauseparam = lio_get_pauseparam, + .set_pauseparam = lio_set_pauseparam, .get_regs_len = lio_get_regs_len, .get_regs = lio_get_regs, .get_msglevel = lio_get_msglevel, diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index a2460e52fca9..afc6f9dc8119 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -2184,7 +2184,7 @@ static void if_cfg_callback(struct octeon_device *oct, struct liquidio_if_cfg_context *ctx; resp = (struct liquidio_if_cfg_resp *)sc->virtrptr; - ctx = (struct liquidio_if_cfg_context *)sc->ctxptr; + ctx = (struct liquidio_if_cfg_context *)sc->ctxptr; oct = lio_get_device(ctx->octeon_id); if (resp->status) @@ -3934,7 +3934,10 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) /* Register ethtool support */ liquidio_set_ethtool_ops(netdev); - octeon_dev->priv_flags = 0x0; + if (lio->oct_dev->chip_id == OCTEON_CN23XX_PF_VID) + octeon_dev->priv_flags = OCT_PRIV_FLAG_DEFAULT; + else + octeon_dev->priv_flags = 0x0; if (netdev->features & NETIF_F_LRO) liquidio_set_feature(netdev, OCTNET_CMD_LRO_ENABLE, @@ -4015,8 +4018,7 @@ static int liquidio_init_nic_module(struct octeon_device *oct) /* run port_config command for each port */ oct->ifcount = num_nic_ports; - memset(oct->props, 0, - sizeof(struct octdev_props) * num_nic_ports); + memset(oct->props, 0, sizeof(struct octdev_props) * num_nic_ports); for (i = 0; i < MAX_OCTEON_LINKS; i++) oct->props[i].gmxport = -1; @@ -4032,7 +4034,7 @@ static int liquidio_init_nic_module(struct octeon_device *oct) /* Initialize interrupt moderation params */ intrmod_cfg = &((struct octeon_device *)oct)->intrmod; intrmod_cfg->rx_enable = 1; - intrmod_cfg->check_intrvl = LIO_INTRMOD_CHECK_INTERVAL; + intrmod_cfg->check_intrvl = LIO_INTRMOD_CHECK_INTERVAL; intrmod_cfg->maxpkt_ratethr = LIO_INTRMOD_MAXPKT_RATETHR; intrmod_cfg->minpkt_ratethr = LIO_INTRMOD_MINPKT_RATETHR; intrmod_cfg->rx_maxcnt_trigger = LIO_INTRMOD_RXMAXCNT_TRIGGER; diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.h b/drivers/net/ethernet/cavium/liquidio/octeon_device.h index 6062ff332ad2..da15c2ae9330 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.h @@ -755,8 +755,15 @@ enum { OCT_PRIV_FLAG_TX_BYTES = 0, /* Tx interrupts by pending byte count */ }; -static inline void lio_set_priv_flag(struct octeon_device *octdev, u32 flag, - u32 val) +#define OCT_PRIV_FLAG_DEFAULT 0x0 + +static inline u32 lio_get_priv_flag(struct octeon_device *octdev, u32 flag) +{ + return !!(octdev->priv_flags & (0x1 << flag)); +} + +static inline void lio_set_priv_flag(struct octeon_device *octdev, + u32 flag, u32 val) { if (val) octdev->priv_flags |= (0x1 << flag); From 8432ebd66205ef1e088005ae3738600dedc7d9b4 Mon Sep 17 00:00:00 2001 From: Rajan Vaja Date: Thu, 21 Jul 2016 13:44:44 +0530 Subject: [PATCH 0687/1715] hostap: Use memdup_user() to reuse code Fix coccicheck warning which recommends to use memdup_user() instead of reimplementing its code. This patch fixes below coccicheck warnings: drivers/net/wireless/intersil/hostap/hostap_ioctl.c:3044:9-16: WARNING opportunity for memdup_user drivers/net/wireless/intersil/hostap/hostap_ioctl.c:3806:9-16: WARNING opportunity for memdup_user Signed-off-by: Rajan Vaja Reviewed-by: Julian Calaby Signed-off-by: Kalle Valo --- .../wireless/intersil/hostap/hostap_ioctl.c | 20 ++++++------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/drivers/net/wireless/intersil/hostap/hostap_ioctl.c b/drivers/net/wireless/intersil/hostap/hostap_ioctl.c index 3e5fa7872b64..a5656bc0e6aa 100644 --- a/drivers/net/wireless/intersil/hostap/hostap_ioctl.c +++ b/drivers/net/wireless/intersil/hostap/hostap_ioctl.c @@ -3041,13 +3041,9 @@ static int prism2_ioctl_priv_download(local_info_t *local, struct iw_point *p) p->length > 1024 || !p->pointer) return -EINVAL; - param = kmalloc(p->length, GFP_KERNEL); - if (param == NULL) - return -ENOMEM; - - if (copy_from_user(param, p->pointer, p->length)) { - ret = -EFAULT; - goto out; + param = memdup_user(p->pointer, p->length); + if (IS_ERR(param)) { + return PTR_ERR(param); } if (p->length < sizeof(struct prism2_download_param) + @@ -3803,13 +3799,9 @@ static int prism2_ioctl_priv_hostapd(local_info_t *local, struct iw_point *p) p->length > PRISM2_HOSTAPD_MAX_BUF_SIZE || !p->pointer) return -EINVAL; - param = kmalloc(p->length, GFP_KERNEL); - if (param == NULL) - return -ENOMEM; - - if (copy_from_user(param, p->pointer, p->length)) { - ret = -EFAULT; - goto out; + param = memdup_user(p->pointer, p->length); + if (IS_ERR(param)) { + return PTR_ERR(param); } switch (param->cmd) { From 4ad0579a28c0a02613c1d4a53c03ae746f14b0ac Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 22 Jul 2016 14:08:08 +0000 Subject: [PATCH 0688/1715] wlcore: spi: fix non static symbol warning Fixes the following sparse warning: drivers/net/wireless/ti/wlcore/spi.c:87:34: warning: symbol 'wilink_data' was not declared. Should it be static? Signed-off-by: Wei Yongjun Signed-off-by: Kalle Valo --- drivers/net/wireless/ti/wlcore/spi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ti/wlcore/spi.c b/drivers/net/wireless/ti/wlcore/spi.c index 6d24040889b8..0ed526e4c0df 100644 --- a/drivers/net/wireless/ti/wlcore/spi.c +++ b/drivers/net/wireless/ti/wlcore/spi.c @@ -84,7 +84,7 @@ struct wilink_familiy_data { char name[8]; }; -const struct wilink_familiy_data *wilink_data; +static const struct wilink_familiy_data *wilink_data; static const struct wilink_familiy_data wl18xx_data = { .name = "wl18xx", From 902831a7629b8b72d333d214b031a717309bb1eb Mon Sep 17 00:00:00 2001 From: Karthik D A Date: Mon, 25 Jul 2016 21:21:04 +0530 Subject: [PATCH 0689/1715] mwifiex: Fixed endianness problem for big endian platform The driver sends and recives information to and from the firmware. Correct endianness should be ensured as firmware follows little endian format and host can be little/big endian. Signed-off-by: Karthik D A Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/fw.h | 30 +++++++++---------- .../net/wireless/marvell/mwifiex/sta_cmd.c | 27 +++++++++-------- .../wireless/marvell/mwifiex/sta_cmdresp.c | 2 +- 3 files changed, 31 insertions(+), 28 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/fw.h b/drivers/net/wireless/marvell/mwifiex/fw.h index 5596b6be1898..c46267b96462 100644 --- a/drivers/net/wireless/marvell/mwifiex/fw.h +++ b/drivers/net/wireless/marvell/mwifiex/fw.h @@ -1646,7 +1646,7 @@ struct mwifiex_ie_types_sta_info { }; struct host_cmd_ds_sta_list { - u16 sta_count; + __le16 sta_count; u8 tlv[0]; } __packed; @@ -2034,26 +2034,26 @@ struct host_cmd_ds_set_bss_mode { struct host_cmd_ds_pcie_details { /* TX buffer descriptor ring address */ - u32 txbd_addr_lo; - u32 txbd_addr_hi; + __le32 txbd_addr_lo; + __le32 txbd_addr_hi; /* TX buffer descriptor ring count */ - u32 txbd_count; + __le32 txbd_count; /* RX buffer descriptor ring address */ - u32 rxbd_addr_lo; - u32 rxbd_addr_hi; + __le32 rxbd_addr_lo; + __le32 rxbd_addr_hi; /* RX buffer descriptor ring count */ - u32 rxbd_count; + __le32 rxbd_count; /* Event buffer descriptor ring address */ - u32 evtbd_addr_lo; - u32 evtbd_addr_hi; + __le32 evtbd_addr_lo; + __le32 evtbd_addr_hi; /* Event buffer descriptor ring count */ - u32 evtbd_count; + __le32 evtbd_count; /* Sleep cookie buffer physical address */ - u32 sleep_cookie_addr_lo; - u32 sleep_cookie_addr_hi; + __le32 sleep_cookie_addr_lo; + __le32 sleep_cookie_addr_hi; } __packed; struct mwifiex_ie_types_rssi_threshold { @@ -2093,8 +2093,8 @@ struct mwifiex_ie_types_mc_group_info { u8 chan_buf_weight; u8 band_config; u8 chan_num; - u32 chan_time; - u32 reserved; + __le32 chan_time; + __le32 reserved; union { u8 sdio_func_num; u8 usb_ep_num; @@ -2185,7 +2185,7 @@ struct host_cmd_ds_robust_coex { } __packed; struct host_cmd_ds_wakeup_reason { - u16 wakeup_reason; + __le16 wakeup_reason; } __packed; struct host_cmd_ds_gtk_rekey_params { diff --git a/drivers/net/wireless/marvell/mwifiex/sta_cmd.c b/drivers/net/wireless/marvell/mwifiex/sta_cmd.c index 7897037b0992..108c11cc0c16 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_cmd.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_cmd.c @@ -1244,20 +1244,23 @@ mwifiex_cmd_pcie_host_spec(struct mwifiex_private *priv, return 0; /* Send the ring base addresses and count to firmware */ - host_spec->txbd_addr_lo = (u32)(card->txbd_ring_pbase); - host_spec->txbd_addr_hi = (u32)(((u64)card->txbd_ring_pbase)>>32); - host_spec->txbd_count = MWIFIEX_MAX_TXRX_BD; - host_spec->rxbd_addr_lo = (u32)(card->rxbd_ring_pbase); - host_spec->rxbd_addr_hi = (u32)(((u64)card->rxbd_ring_pbase)>>32); - host_spec->rxbd_count = MWIFIEX_MAX_TXRX_BD; - host_spec->evtbd_addr_lo = (u32)(card->evtbd_ring_pbase); - host_spec->evtbd_addr_hi = (u32)(((u64)card->evtbd_ring_pbase)>>32); - host_spec->evtbd_count = MWIFIEX_MAX_EVT_BD; + host_spec->txbd_addr_lo = cpu_to_le32((u32)(card->txbd_ring_pbase)); + host_spec->txbd_addr_hi = + cpu_to_le32((u32)(((u64)card->txbd_ring_pbase) >> 32)); + host_spec->txbd_count = cpu_to_le32(MWIFIEX_MAX_TXRX_BD); + host_spec->rxbd_addr_lo = cpu_to_le32((u32)(card->rxbd_ring_pbase)); + host_spec->rxbd_addr_hi = + cpu_to_le32((u32)(((u64)card->rxbd_ring_pbase) >> 32)); + host_spec->rxbd_count = cpu_to_le32(MWIFIEX_MAX_TXRX_BD); + host_spec->evtbd_addr_lo = cpu_to_le32((u32)(card->evtbd_ring_pbase)); + host_spec->evtbd_addr_hi = + cpu_to_le32((u32)(((u64)card->evtbd_ring_pbase) >> 32)); + host_spec->evtbd_count = cpu_to_le32(MWIFIEX_MAX_EVT_BD); if (card->sleep_cookie_vbase) { host_spec->sleep_cookie_addr_lo = - (u32)(card->sleep_cookie_pbase); - host_spec->sleep_cookie_addr_hi = - (u32)(((u64)(card->sleep_cookie_pbase)) >> 32); + cpu_to_le32((u32)(card->sleep_cookie_pbase)); + host_spec->sleep_cookie_addr_hi = cpu_to_le32((u32)(((u64) + (card->sleep_cookie_pbase)) >> 32)); mwifiex_dbg(priv->adapter, INFO, "sleep_cook_lo phy addr: 0x%x\n", host_spec->sleep_cookie_addr_lo); diff --git a/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c b/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c index ccf54932e321..90e191bf1343 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c @@ -962,7 +962,7 @@ static int mwifiex_ret_uap_sta_list(struct mwifiex_private *priv, int i; struct mwifiex_sta_node *sta_node; - for (i = 0; i < sta_list->sta_count; i++) { + for (i = 0; i < (le16_to_cpu(sta_list->sta_count)); i++) { sta_node = mwifiex_get_sta_entry(priv, sta_info->mac); if (unlikely(!sta_node)) continue; From e5988c62b9e6e5fb279188db916c51fdb5981403 Mon Sep 17 00:00:00 2001 From: Karthik D A Date: Mon, 25 Jul 2016 21:21:05 +0530 Subject: [PATCH 0690/1715] mwifiex: add region code information in debugfs region code is an EEPROM setting received from firmware. Let's display this in debugfs along with other information. Signed-off-by: Karthik D A Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/debugfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/marvell/mwifiex/debugfs.c b/drivers/net/wireless/marvell/mwifiex/debugfs.c index bccf17ad588e..b9284b533294 100644 --- a/drivers/net/wireless/marvell/mwifiex/debugfs.c +++ b/drivers/net/wireless/marvell/mwifiex/debugfs.c @@ -118,6 +118,8 @@ mwifiex_info_read(struct file *file, char __user *ubuf, p += sprintf(p, "bssid=\"%pM\"\n", info.bssid); p += sprintf(p, "channel=\"%d\"\n", (int) info.bss_chan); p += sprintf(p, "country_code = \"%s\"\n", info.country_code); + p += sprintf(p, "region_code=\"0x%x\"\n", + priv->adapter->region_code); netdev_for_each_mc_addr(ha, netdev) p += sprintf(p, "multicast_address[%d]=\"%pM\"\n", From c8ccf3ade7851054f82bf88f5fcd393a394038a3 Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Mon, 25 Jul 2016 21:21:06 +0530 Subject: [PATCH 0691/1715] mwifiex: fix failed to reconnect after interface disabled/enabled Recent patch "mwifiex: fix NULL pointer" skips extended scan event handling when suspend is in progress. It created a problem for scan after interface disabled/enabled case. This patch solves the problem by checking netif_running() status. Fixes:16d25da94f3d654 ("mwifiex: fix NULL pointer dereference during suspend") Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/sta_event.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/marvell/mwifiex/sta_event.c b/drivers/net/wireless/marvell/mwifiex/sta_event.c index a422f3306d4d..7e394d485f54 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_event.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_event.c @@ -708,7 +708,11 @@ int mwifiex_process_sta_event(struct mwifiex_private *priv) case EVENT_EXT_SCAN_REPORT: mwifiex_dbg(adapter, EVENT, "event: EXT_SCAN Report\n"); - if (adapter->ext_scan && !priv->scan_aborting) + /* We intend to skip this event during suspend, but handle + * it in interface disabled case + */ + if (adapter->ext_scan && (!priv->scan_aborting || + !netif_running(priv->netdev))) ret = mwifiex_handle_event_ext_scan_report(priv, adapter->event_skb->data); From c2a8f0ff9c6ca8d04adb68b7959a56a3cbb665b3 Mon Sep 17 00:00:00 2001 From: Ganapathi Bhat Date: Mon, 25 Jul 2016 21:21:07 +0530 Subject: [PATCH 0692/1715] mwifiex: support random MAC address for scanning This patch advertises RANDOM_MAC_ADDR feature to cfg80211. It allow the application to issue scan with a MAC address and mask. Random MACs are generated and used in probe requests sent for scanning until it is changed by the application or device is restarted. Signed-off-by: Ganapathi Bhat Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/cfg80211.c | 15 ++++++++++++++- drivers/net/wireless/marvell/mwifiex/fw.h | 7 +++++++ drivers/net/wireless/marvell/mwifiex/main.h | 1 + drivers/net/wireless/marvell/mwifiex/scan.c | 16 ++++++++++++++++ 4 files changed, 38 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index a8ff969c95c2..871ad8a9868f 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -2485,6 +2485,16 @@ mwifiex_cfg80211_scan(struct wiphy *wiphy, priv->scan_request = request; + if (request->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) { + ether_addr_copy(priv->random_mac, request->mac_addr); + for (i = 0; i < ETH_ALEN; i++) { + priv->random_mac[i] &= request->mac_addr_mask[i]; + priv->random_mac[i] |= get_random_int() & + ~(request->mac_addr_mask[i]); + } + } + + ether_addr_copy(user_scan_cfg->random_mac, priv->random_mac); user_scan_cfg->num_ssids = request->n_ssids; user_scan_cfg->ssid_list = request->ssids; @@ -4173,7 +4183,10 @@ int mwifiex_register_cfg80211(struct mwifiex_adapter *adapter) wiphy->features |= NL80211_FEATURE_HT_IBSS | NL80211_FEATURE_INACTIVITY_TIMER | NL80211_FEATURE_LOW_PRIORITY_SCAN | - NL80211_FEATURE_NEED_OBSS_SCAN; + NL80211_FEATURE_NEED_OBSS_SCAN | + NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR | + NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR | + NL80211_FEATURE_ND_RANDOM_MAC_ADDR; if (ISSUPP_TDLS_ENABLED(adapter->fw_cap_info)) wiphy->features |= NL80211_FEATURE_TDLS_CHANNEL_SWITCH; diff --git a/drivers/net/wireless/marvell/mwifiex/fw.h b/drivers/net/wireless/marvell/mwifiex/fw.h index c46267b96462..3797ef41887e 100644 --- a/drivers/net/wireless/marvell/mwifiex/fw.h +++ b/drivers/net/wireless/marvell/mwifiex/fw.h @@ -188,6 +188,7 @@ enum MWIFIEX_802_11_PRIVACY_FILTER { #define TLV_BTCOEX_WL_AGGR_WINSIZE (PROPRIETARY_TLV_BASE_ID + 202) #define TLV_BTCOEX_WL_SCANTIME (PROPRIETARY_TLV_BASE_ID + 203) #define TLV_TYPE_BSS_MODE (PROPRIETARY_TLV_BASE_ID + 206) +#define TLV_TYPE_RANDOM_MAC (PROPRIETARY_TLV_BASE_ID + 236) #define MWIFIEX_TX_DATA_BUF_SIZE_2K 2048 @@ -780,6 +781,11 @@ struct mwifiex_ie_types_scan_chan_gap { __le16 chan_gap; } __packed; +struct mwifiex_ie_types_random_mac { + struct mwifiex_ie_types_header header; + u8 mac[ETH_ALEN]; +} __packed; + struct mwifiex_ietypes_chanstats { struct mwifiex_ie_types_header header; struct mwifiex_fw_chan_stats chanstats[0]; @@ -1464,6 +1470,7 @@ struct mwifiex_user_scan_cfg { /* Variable number (fixed maximum) of channels to scan up */ struct mwifiex_user_scan_chan chan_list[MWIFIEX_USER_SCAN_CHAN_MAX]; u16 scan_chan_gap; + u8 random_mac[ETH_ALEN]; } __packed; #define MWIFIEX_BG_SCAN_CHAN_MAX 38 diff --git a/drivers/net/wireless/marvell/mwifiex/main.h b/drivers/net/wireless/marvell/mwifiex/main.h index 9f6bb400bdae..59026005e4d7 100644 --- a/drivers/net/wireless/marvell/mwifiex/main.h +++ b/drivers/net/wireless/marvell/mwifiex/main.h @@ -675,6 +675,7 @@ struct mwifiex_private { struct mwifiex_user_scan_chan hidden_chan[MWIFIEX_USER_SCAN_CHAN_MAX]; u8 assoc_resp_ht_param; bool ht_param_present; + u8 random_mac[ETH_ALEN]; }; diff --git a/drivers/net/wireless/marvell/mwifiex/scan.c b/drivers/net/wireless/marvell/mwifiex/scan.c index 21ec84794d0c..8daf0d8657b9 100644 --- a/drivers/net/wireless/marvell/mwifiex/scan.c +++ b/drivers/net/wireless/marvell/mwifiex/scan.c @@ -820,6 +820,7 @@ mwifiex_config_scan(struct mwifiex_private *priv, struct mwifiex_adapter *adapter = priv->adapter; struct mwifiex_ie_types_num_probes *num_probes_tlv; struct mwifiex_ie_types_scan_chan_gap *chan_gap_tlv; + struct mwifiex_ie_types_random_mac *random_mac_tlv; struct mwifiex_ie_types_wildcard_ssid_params *wildcard_ssid_tlv; struct mwifiex_ie_types_bssid_list *bssid_tlv; u8 *tlv_pos; @@ -835,6 +836,7 @@ mwifiex_config_scan(struct mwifiex_private *priv, u8 ssid_filter; struct mwifiex_ie_types_htcap *ht_cap; struct mwifiex_ie_types_bss_mode *bss_mode; + const u8 zero_mac[6] = {0, 0, 0, 0, 0, 0}; /* The tlv_buf_len is calculated for each scan command. The TLVs added in this routine will be preserved since the routine that sends the @@ -967,6 +969,18 @@ mwifiex_config_scan(struct mwifiex_private *priv, tlv_pos += sizeof(struct mwifiex_ie_types_scan_chan_gap); } + + if (!ether_addr_equal(user_scan_in->random_mac, zero_mac)) { + random_mac_tlv = (void *)tlv_pos; + random_mac_tlv->header.type = + cpu_to_le16(TLV_TYPE_RANDOM_MAC); + random_mac_tlv->header.len = + cpu_to_le16(sizeof(random_mac_tlv->mac)); + ether_addr_copy(random_mac_tlv->mac, + user_scan_in->random_mac); + tlv_pos += + sizeof(struct mwifiex_ie_types_random_mac); + } } else { scan_cfg_out->bss_mode = (u8) adapter->scan_mode; num_probes = adapter->scan_probes; @@ -1922,6 +1936,7 @@ mwifiex_active_scan_req_for_passive_chan(struct mwifiex_private *priv) } adapter->active_scan_triggered = true; + ether_addr_copy(user_scan_cfg->random_mac, priv->random_mac); user_scan_cfg->num_ssids = priv->scan_request->n_ssids; user_scan_cfg->ssid_list = priv->scan_request->ssids; @@ -2761,6 +2776,7 @@ static int mwifiex_scan_specific_ssid(struct mwifiex_private *priv, if (!scan_cfg) return -ENOMEM; + ether_addr_copy(scan_cfg->random_mac, priv->random_mac); scan_cfg->ssid_list = req_ssid; scan_cfg->num_ssids = 1; From 99ffe72cdae4f7c326d094c85167802ee0ecacbb Mon Sep 17 00:00:00 2001 From: Xinming Hu Date: Mon, 25 Jul 2016 21:21:08 +0530 Subject: [PATCH 0693/1715] mwifiex: process rxba_sync event Firmware may filter and drop packets under certain condition, for example, ARP SA=DA packet. this event will be used to synchronize the Rx Block Acknowledgment (BA) window bitmap and to fill any holes in driver side. Signed-off-by: Xinming Hu Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- .../wireless/marvell/mwifiex/11n_rxreorder.c | 78 ++++++++++++++++++- .../wireless/marvell/mwifiex/11n_rxreorder.h | 3 +- drivers/net/wireless/marvell/mwifiex/fw.h | 12 +++ .../net/wireless/marvell/mwifiex/sta_event.c | 6 ++ .../net/wireless/marvell/mwifiex/uap_event.c | 7 +- 5 files changed, 103 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c index a74cc43b1953..94480123efa3 100644 --- a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c +++ b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c @@ -78,8 +78,15 @@ static int mwifiex_11n_dispatch_amsdu_pkt(struct mwifiex_private *priv, */ static int mwifiex_11n_dispatch_pkt(struct mwifiex_private *priv, void *payload) { - int ret = mwifiex_11n_dispatch_amsdu_pkt(priv, payload); + int ret; + + if (!payload) { + mwifiex_dbg(priv->adapter, INFO, "info: fw drop data\n"); + return 0; + } + + ret = mwifiex_11n_dispatch_amsdu_pkt(priv, payload); if (!ret) return 0; @@ -921,3 +928,72 @@ void mwifiex_coex_ampdu_rxwinsize(struct mwifiex_adapter *adapter) else mwifiex_update_ampdu_rxwinsize(adapter, false); } + +/* This function handles rxba_sync event + */ +void mwifiex_11n_rxba_sync_event(struct mwifiex_private *priv, + u8 *event_buf, u16 len) +{ + struct mwifiex_ie_types_rxba_sync *tlv_rxba = (void *)event_buf; + u16 tlv_type, tlv_len; + struct mwifiex_rx_reorder_tbl *rx_reor_tbl_ptr; + u8 i, j; + u16 seq_num, tlv_seq_num, tlv_bitmap_len; + int tlv_buf_left = len; + int ret; + u8 *tmp; + + mwifiex_dbg_dump(priv->adapter, EVT_D, "RXBA_SYNC event:", + event_buf, len); + while (tlv_buf_left >= sizeof(*tlv_rxba)) { + tlv_type = le16_to_cpu(tlv_rxba->header.type); + tlv_len = le16_to_cpu(tlv_rxba->header.len); + if (tlv_type != TLV_TYPE_RXBA_SYNC) { + mwifiex_dbg(priv->adapter, ERROR, + "Wrong TLV id=0x%x\n", tlv_type); + return; + } + + tlv_seq_num = le16_to_cpu(tlv_rxba->seq_num); + tlv_bitmap_len = le16_to_cpu(tlv_rxba->bitmap_len); + mwifiex_dbg(priv->adapter, INFO, + "%pM tid=%d seq_num=%d bitmap_len=%d\n", + tlv_rxba->mac, tlv_rxba->tid, tlv_seq_num, + tlv_bitmap_len); + + rx_reor_tbl_ptr = + mwifiex_11n_get_rx_reorder_tbl(priv, tlv_rxba->tid, + tlv_rxba->mac); + if (!rx_reor_tbl_ptr) { + mwifiex_dbg(priv->adapter, ERROR, + "Can not find rx_reorder_tbl!"); + return; + } + + for (i = 0; i < tlv_bitmap_len; i++) { + for (j = 0 ; j < 8; j++) { + if (tlv_rxba->bitmap[i] & (1 << j)) { + seq_num = (MAX_TID_VALUE - 1) & + (tlv_seq_num + i * 8 + j); + + mwifiex_dbg(priv->adapter, ERROR, + "drop packet,seq=%d\n", + seq_num); + + ret = mwifiex_11n_rx_reorder_pkt + (priv, seq_num, tlv_rxba->tid, + tlv_rxba->mac, 0, NULL); + + if (ret) + mwifiex_dbg(priv->adapter, + ERROR, + "Fail to drop packet"); + } + } + } + + tlv_buf_left -= (sizeof(*tlv_rxba) + tlv_len); + tmp = (u8 *)tlv_rxba + tlv_len + sizeof(*tlv_rxba); + tlv_rxba = (struct mwifiex_ie_types_rxba_sync *)tmp; + } +} diff --git a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.h b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.h index 63ecea89b4ab..22d991f514c8 100644 --- a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.h +++ b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.h @@ -81,5 +81,6 @@ struct mwifiex_rx_reorder_tbl * mwifiex_11n_get_rx_reorder_tbl(struct mwifiex_private *priv, int tid, u8 *ta); void mwifiex_11n_del_rx_reorder_tbl_by_ta(struct mwifiex_private *priv, u8 *ta); void mwifiex_update_rxreor_flags(struct mwifiex_adapter *adapter, u8 flags); - +void mwifiex_11n_rxba_sync_event(struct mwifiex_private *priv, + u8 *event_buf, u16 len); #endif /* _MWIFIEX_11N_RXREORDER_H_ */ diff --git a/drivers/net/wireless/marvell/mwifiex/fw.h b/drivers/net/wireless/marvell/mwifiex/fw.h index 3797ef41887e..3b40e0d6f59f 100644 --- a/drivers/net/wireless/marvell/mwifiex/fw.h +++ b/drivers/net/wireless/marvell/mwifiex/fw.h @@ -176,6 +176,7 @@ enum MWIFIEX_802_11_PRIVACY_FILTER { #define TLV_TYPE_PWK_CIPHER (PROPRIETARY_TLV_BASE_ID + 145) #define TLV_TYPE_GWK_CIPHER (PROPRIETARY_TLV_BASE_ID + 146) #define TLV_TYPE_TX_PAUSE (PROPRIETARY_TLV_BASE_ID + 148) +#define TLV_TYPE_RXBA_SYNC (PROPRIETARY_TLV_BASE_ID + 153) #define TLV_TYPE_COALESCE_RULE (PROPRIETARY_TLV_BASE_ID + 154) #define TLV_TYPE_KEY_PARAM_V2 (PROPRIETARY_TLV_BASE_ID + 156) #define TLV_TYPE_REPEAT_COUNT (PROPRIETARY_TLV_BASE_ID + 176) @@ -532,6 +533,7 @@ enum P2P_MODES { #define EVENT_CHANNEL_REPORT_RDY 0x00000054 #define EVENT_TX_DATA_PAUSE 0x00000055 #define EVENT_EXT_SCAN_REPORT 0x00000058 +#define EVENT_RXBA_SYNC 0x00000059 #define EVENT_BG_SCAN_STOPPED 0x00000065 #define EVENT_REMAIN_ON_CHAN_EXPIRED 0x0000005f #define EVENT_MULTI_CHAN_INFO 0x0000006a @@ -735,6 +737,16 @@ struct mwifiex_ie_types_chan_list_param_set { struct mwifiex_chan_scan_param_set chan_scan_param[1]; } __packed; +struct mwifiex_ie_types_rxba_sync { + struct mwifiex_ie_types_header header; + u8 mac[ETH_ALEN]; + u8 tid; + u8 reserved; + __le16 seq_num; + __le16 bitmap_len; + u8 bitmap[1]; +} __packed; + struct chan_band_param_set { u8 radio_type; u8 chan_number; diff --git a/drivers/net/wireless/marvell/mwifiex/sta_event.c b/drivers/net/wireless/marvell/mwifiex/sta_event.c index 7e394d485f54..b973ee87047a 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_event.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_event.c @@ -873,6 +873,12 @@ int mwifiex_process_sta_event(struct mwifiex_private *priv) mwifiex_bt_coex_wlan_param_update_event(priv, adapter->event_skb); break; + case EVENT_RXBA_SYNC: + dev_dbg(adapter->dev, "EVENT: RXBA_SYNC\n"); + mwifiex_11n_rxba_sync_event(priv, adapter->event_body, + adapter->event_skb->len - + sizeof(eventcause)); + break; default: mwifiex_dbg(adapter, ERROR, "event: unknown event id: %#x\n", eventcause); diff --git a/drivers/net/wireless/marvell/mwifiex/uap_event.c b/drivers/net/wireless/marvell/mwifiex/uap_event.c index 86ff54296f39..d24eca34ac11 100644 --- a/drivers/net/wireless/marvell/mwifiex/uap_event.c +++ b/drivers/net/wireless/marvell/mwifiex/uap_event.c @@ -306,7 +306,12 @@ int mwifiex_process_uap_event(struct mwifiex_private *priv) mwifiex_dbg(adapter, EVENT, "event: multi-chan info\n"); mwifiex_process_multi_chan_event(priv, adapter->event_skb); break; - + case EVENT_RXBA_SYNC: + dev_dbg(adapter->dev, "EVENT: RXBA_SYNC\n"); + mwifiex_11n_rxba_sync_event(priv, adapter->event_body, + adapter->event_skb->len - + sizeof(eventcause)); + break; default: mwifiex_dbg(adapter, EVENT, "event: unknown event id: %#x\n", eventcause); From 5536c4aafcac094fb7ea6c3c1e6d999ae586171d Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Mon, 25 Jul 2016 21:21:09 +0530 Subject: [PATCH 0694/1715] mwifiex: remove misleading disconnect message Disconnect message in mwifiex_reset_connect_state() would displays necessary information. We unnecessarily have exactly same message in cfg80211_disconnect(). As priv->cfg_bssid is cleared at this point of time, it prints incorrect(all zero) MAC. This message is removed here. Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/cfg80211.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index 871ad8a9868f..235fb3931e0c 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -2012,10 +2012,6 @@ mwifiex_cfg80211_disconnect(struct wiphy *wiphy, struct net_device *dev, if (mwifiex_deauthenticate(priv, NULL)) return -EFAULT; - mwifiex_dbg(priv->adapter, MSG, - "info: successfully disconnected from %pM:\t" - "reason code %d\n", priv->cfg_bssid, reason_code); - eth_zero_addr(priv->cfg_bssid); priv->hs2_enabled = false; From 432da7d243da32651e1fae677f3a83c16b346d47 Mon Sep 17 00:00:00 2001 From: Xinming Hu Date: Mon, 25 Jul 2016 21:21:10 +0530 Subject: [PATCH 0695/1715] mwifiex: add HT aggregation support for adhoc mode This patch adds HT support for adhoc station. Firmware will upload ibss sta connect event with beacon data, whenever new station joins the adhoc network. Driver will check the HT IE and decide whether to support HT aggreagation or not. Signed-off-by: Xinming Hu Signed-off-by: Cathy Luo Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/11n.h | 7 +- drivers/net/wireless/marvell/mwifiex/fw.h | 9 ++ .../net/wireless/marvell/mwifiex/sta_event.c | 132 +++++++++++++++++- 3 files changed, 144 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/11n.h b/drivers/net/wireless/marvell/mwifiex/11n.h index afdd58aa90de..ea0fa68b9913 100644 --- a/drivers/net/wireless/marvell/mwifiex/11n.h +++ b/drivers/net/wireless/marvell/mwifiex/11n.h @@ -171,9 +171,10 @@ mwifiex_find_stream_to_delete(struct mwifiex_private *priv, int ptr_tid, static inline int mwifiex_is_sta_11n_enabled(struct mwifiex_private *priv, struct mwifiex_sta_node *node) { - - if (!node || (priv->bss_role != MWIFIEX_BSS_ROLE_UAP) || - !priv->ap_11n_enabled) + if (!node || ((priv->bss_role == MWIFIEX_BSS_ROLE_UAP) && + !priv->ap_11n_enabled) || + ((priv->bss_mode == NL80211_IFTYPE_ADHOC) && + !priv->adapter->adhoc_11n_enabled)) return 0; return node->is_11n_enabled; diff --git a/drivers/net/wireless/marvell/mwifiex/fw.h b/drivers/net/wireless/marvell/mwifiex/fw.h index 3b40e0d6f59f..a88030a747b3 100644 --- a/drivers/net/wireless/marvell/mwifiex/fw.h +++ b/drivers/net/wireless/marvell/mwifiex/fw.h @@ -210,6 +210,7 @@ enum MWIFIEX_802_11_PRIVACY_FILTER { #define MWIFIEX_TX_DATA_BUF_SIZE_4K 4096 #define MWIFIEX_TX_DATA_BUF_SIZE_8K 8192 +#define MWIFIEX_TX_DATA_BUF_SIZE_12K 12288 #define ISSUPP_11NENABLED(FwCapInfo) (FwCapInfo & BIT(11)) #define ISSUPP_TDLS_ENABLED(FwCapInfo) (FwCapInfo & BIT(14)) @@ -506,6 +507,8 @@ enum P2P_MODES { #define EVENT_RSSI_HIGH 0x0000001c #define EVENT_SNR_HIGH 0x0000001d #define EVENT_IBSS_COALESCED 0x0000001e +#define EVENT_IBSS_STA_CONNECT 0x00000020 +#define EVENT_IBSS_STA_DISCONNECT 0x00000021 #define EVENT_DATA_RSSI_LOW 0x00000024 #define EVENT_DATA_SNR_LOW 0x00000025 #define EVENT_DATA_RSSI_HIGH 0x00000026 @@ -1686,6 +1689,12 @@ struct mwifiex_ie_types_wmm_param_set { u8 wmm_ie[1]; }; +struct mwifiex_ie_types_mgmt_frame { + struct mwifiex_ie_types_header header; + __le16 frame_control; + u8 frame_contents[0]; +}; + struct mwifiex_ie_types_wmm_queue_status { struct mwifiex_ie_types_header header; u8 queue_index; diff --git a/drivers/net/wireless/marvell/mwifiex/sta_event.c b/drivers/net/wireless/marvell/mwifiex/sta_event.c index b973ee87047a..9df0c4dc06ed 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_event.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_event.c @@ -25,6 +25,99 @@ #include "wmm.h" #include "11n.h" +#define MWIFIEX_IBSS_CONNECT_EVT_FIX_SIZE 12 + +static int mwifiex_check_ibss_peer_capabilties(struct mwifiex_private *priv, + struct mwifiex_sta_node *sta_ptr, + struct sk_buff *event) +{ + int evt_len, ele_len; + u8 *curr; + struct ieee_types_header *ele_hdr; + struct mwifiex_ie_types_mgmt_frame *tlv_mgmt_frame; + const struct ieee80211_ht_cap *ht_cap; + const struct ieee80211_vht_cap *vht_cap; + + skb_pull(event, MWIFIEX_IBSS_CONNECT_EVT_FIX_SIZE); + evt_len = event->len; + curr = event->data; + + mwifiex_dbg_dump(priv->adapter, EVT_D, "ibss peer capabilties:", + event->data, event->len); + + skb_push(event, MWIFIEX_IBSS_CONNECT_EVT_FIX_SIZE); + + tlv_mgmt_frame = (void *)curr; + if (evt_len >= sizeof(*tlv_mgmt_frame) && + le16_to_cpu(tlv_mgmt_frame->header.type) == + TLV_TYPE_UAP_MGMT_FRAME) { + /* Locate curr pointer to the start of beacon tlv, + * timestamp 8 bytes, beacon intervel 2 bytes, + * capability info 2 bytes, totally 12 byte beacon header + */ + evt_len = le16_to_cpu(tlv_mgmt_frame->header.len); + curr += (sizeof(*tlv_mgmt_frame) + 12); + } else { + mwifiex_dbg(priv->adapter, MSG, + "management frame tlv not found!\n"); + return 0; + } + + while (evt_len >= sizeof(*ele_hdr)) { + ele_hdr = (struct ieee_types_header *)curr; + ele_len = ele_hdr->len; + + if (evt_len < ele_len + sizeof(*ele_hdr)) + break; + + switch (ele_hdr->element_id) { + case WLAN_EID_HT_CAPABILITY: + sta_ptr->is_11n_enabled = true; + ht_cap = (void *)(ele_hdr + 2); + sta_ptr->max_amsdu = le16_to_cpu(ht_cap->cap_info) & + IEEE80211_HT_CAP_MAX_AMSDU ? + MWIFIEX_TX_DATA_BUF_SIZE_8K : + MWIFIEX_TX_DATA_BUF_SIZE_4K; + mwifiex_dbg(priv->adapter, INFO, + "11n enabled!, max_amsdu : %d\n", + sta_ptr->max_amsdu); + break; + + case WLAN_EID_VHT_CAPABILITY: + sta_ptr->is_11ac_enabled = true; + vht_cap = (void *)(ele_hdr + 2); + /* check VHT MAXMPDU capability */ + switch (le32_to_cpu(vht_cap->vht_cap_info) & 0x3) { + case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454: + sta_ptr->max_amsdu = + MWIFIEX_TX_DATA_BUF_SIZE_12K; + break; + case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991: + sta_ptr->max_amsdu = + MWIFIEX_TX_DATA_BUF_SIZE_8K; + break; + case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895: + sta_ptr->max_amsdu = + MWIFIEX_TX_DATA_BUF_SIZE_4K; + default: + break; + } + + mwifiex_dbg(priv->adapter, INFO, + "11ac enabled!, max_amsdu : %d\n", + sta_ptr->max_amsdu); + break; + default: + break; + } + + curr += (ele_len + sizeof(*ele_hdr)); + evt_len -= (ele_len + sizeof(*ele_hdr)); + } + + return 0; +} + /* * This function resets the connection state. * @@ -519,6 +612,8 @@ void mwifiex_bt_coex_wlan_param_update_event(struct mwifiex_private *priv, * - EVENT_LINK_QUALITY * - EVENT_PRE_BEACON_LOST * - EVENT_IBSS_COALESCED + * - EVENT_IBSS_STA_CONNECT + * - EVENT_IBSS_STA_DISCONNECT * - EVENT_WEP_ICV_ERR * - EVENT_BW_CHANGE * - EVENT_HOSTWAKE_STAIE @@ -547,9 +642,11 @@ void mwifiex_bt_coex_wlan_param_update_event(struct mwifiex_private *priv, int mwifiex_process_sta_event(struct mwifiex_private *priv) { struct mwifiex_adapter *adapter = priv->adapter; - int ret = 0; + int ret = 0, i; u32 eventcause = adapter->event_cause; u16 ctrl, reason_code; + u8 ibss_sta_addr[ETH_ALEN]; + struct mwifiex_sta_node *sta_ptr; switch (eventcause) { case EVENT_DUMMY_HOST_WAKEUP_SIGNAL: @@ -775,6 +872,39 @@ int mwifiex_process_sta_event(struct mwifiex_private *priv) HostCmd_CMD_802_11_IBSS_COALESCING_STATUS, HostCmd_ACT_GEN_GET, 0, NULL, false); break; + case EVENT_IBSS_STA_CONNECT: + ether_addr_copy(ibss_sta_addr, adapter->event_body + 2); + mwifiex_dbg(adapter, EVENT, "event: IBSS_STA_CONNECT %pM\n", + ibss_sta_addr); + sta_ptr = mwifiex_add_sta_entry(priv, ibss_sta_addr); + if (sta_ptr && adapter->adhoc_11n_enabled) { + mwifiex_check_ibss_peer_capabilties(priv, sta_ptr, + adapter->event_skb); + if (sta_ptr->is_11n_enabled) + for (i = 0; i < MAX_NUM_TID; i++) + sta_ptr->ampdu_sta[i] = + priv->aggr_prio_tbl[i].ampdu_user; + else + for (i = 0; i < MAX_NUM_TID; i++) + sta_ptr->ampdu_sta[i] = + BA_STREAM_NOT_ALLOWED; + memset(sta_ptr->rx_seq, 0xff, sizeof(sta_ptr->rx_seq)); + } + + break; + case EVENT_IBSS_STA_DISCONNECT: + ether_addr_copy(ibss_sta_addr, adapter->event_body + 2); + mwifiex_dbg(adapter, EVENT, "event: IBSS_STA_DISCONNECT %pM\n", + ibss_sta_addr); + sta_ptr = mwifiex_get_sta_entry(priv, ibss_sta_addr); + if (sta_ptr && sta_ptr->is_11n_enabled) { + mwifiex_11n_del_rx_reorder_tbl_by_ta(priv, + ibss_sta_addr); + mwifiex_del_tx_ba_stream_tbl_by_ra(priv, ibss_sta_addr); + } + mwifiex_wmm_del_peer_ra_list(priv, ibss_sta_addr); + mwifiex_del_sta_entry(priv, ibss_sta_addr); + break; case EVENT_ADDBA: mwifiex_dbg(adapter, EVENT, "event: ADDBA Request\n"); mwifiex_send_cmd(priv, HostCmd_CMD_11N_ADDBA_RSP, From 441756b6a6e3818dc6f2e76b9526558d450ce778 Mon Sep 17 00:00:00 2001 From: Ganapathi Bhat Date: Mon, 25 Jul 2016 21:21:11 +0530 Subject: [PATCH 0696/1715] mwifiex: fix radar detection issue It's been observed that firmware sends RADAR detected event without specifying bss_num/bss_type. Also, the event body is empty. Currently the event is being ignored by driver. This patch checks on which interface 11H is active, accordingly fills bss_num/bss_type and handles the event. Condition "if (le32_to_cpu(rdr_event->passed))" which always fails is also removed. Signed-off-by: Ganapathi Bhat Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/11h.c | 27 ++++++++----------- drivers/net/wireless/marvell/mwifiex/cmdevt.c | 18 +++++++++++-- 2 files changed, 27 insertions(+), 18 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/11h.c b/drivers/net/wireless/marvell/mwifiex/11h.c index 81c60d0a1bda..43dccd5b0291 100644 --- a/drivers/net/wireless/marvell/mwifiex/11h.c +++ b/drivers/net/wireless/marvell/mwifiex/11h.c @@ -260,22 +260,17 @@ int mwifiex_11h_handle_radar_detected(struct mwifiex_private *priv, rdr_event = (void *)(skb->data + sizeof(u32)); - if (le32_to_cpu(rdr_event->passed)) { - mwifiex_dbg(priv->adapter, MSG, - "radar detected; indicating kernel\n"); - if (mwifiex_stop_radar_detection(priv, &priv->dfs_chandef)) - mwifiex_dbg(priv->adapter, ERROR, - "Failed to stop CAC in FW\n"); - cfg80211_radar_event(priv->adapter->wiphy, &priv->dfs_chandef, - GFP_KERNEL); - mwifiex_dbg(priv->adapter, MSG, "regdomain: %d\n", - rdr_event->reg_domain); - mwifiex_dbg(priv->adapter, MSG, "radar detection type: %d\n", - rdr_event->det_type); - } else { - mwifiex_dbg(priv->adapter, MSG, - "false radar detection event!\n"); - } + mwifiex_dbg(priv->adapter, MSG, + "radar detected; indicating kernel\n"); + if (mwifiex_stop_radar_detection(priv, &priv->dfs_chandef)) + mwifiex_dbg(priv->adapter, ERROR, + "Failed to stop CAC in FW\n"); + cfg80211_radar_event(priv->adapter->wiphy, &priv->dfs_chandef, + GFP_KERNEL); + mwifiex_dbg(priv->adapter, MSG, "regdomain: %d\n", + rdr_event->reg_domain); + mwifiex_dbg(priv->adapter, MSG, "radar detection type: %d\n", + rdr_event->det_type); return 0; } diff --git a/drivers/net/wireless/marvell/mwifiex/cmdevt.c b/drivers/net/wireless/marvell/mwifiex/cmdevt.c index c29f26d8baf2..d433aa01fdbb 100644 --- a/drivers/net/wireless/marvell/mwifiex/cmdevt.c +++ b/drivers/net/wireless/marvell/mwifiex/cmdevt.c @@ -480,13 +480,27 @@ int mwifiex_free_cmd_buffer(struct mwifiex_adapter *adapter) */ int mwifiex_process_event(struct mwifiex_adapter *adapter) { - int ret; + int ret, i; struct mwifiex_private *priv = mwifiex_get_priv(adapter, MWIFIEX_BSS_ROLE_ANY); struct sk_buff *skb = adapter->event_skb; - u32 eventcause = adapter->event_cause; + u32 eventcause; struct mwifiex_rxinfo *rx_info; + if ((adapter->event_cause & EVENT_ID_MASK) == EVENT_RADAR_DETECTED) { + for (i = 0; i < adapter->priv_num; i++) { + priv = adapter->priv[i]; + if (priv && mwifiex_is_11h_active(priv)) { + adapter->event_cause |= + ((priv->bss_num & 0xff) << 16) | + ((priv->bss_type & 0xff) << 24); + break; + } + } + } + + eventcause = adapter->event_cause; + /* Save the last event to debug log */ adapter->dbg.last_event_index = (adapter->dbg.last_event_index + 1) % DBG_CMD_NUM; From 3f37ec79dd21fbdbbab8143a48a87272b22fef22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Mon, 25 Jul 2016 20:33:56 +0200 Subject: [PATCH 0697/1715] bcma: support BCM53573 series of wireless SoCs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BCM53573 seems to be the first series of Northstar family with wireless on the chip. The base models are BCM53573-s (A0, A1) and there is also BCM47189B0 which seems to be some small modification. The only problem with these chipsets seems to be watchdog. It's totally unavailable on 53573A0 / 53573A1 and preferable PMU watchdog is broken on 53573B0 / 53573B1. Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo --- drivers/bcma/driver_chipcommon.c | 32 +++++++++++++++++++++++++++++--- include/linux/bcma/bcma.h | 3 +++ 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/drivers/bcma/driver_chipcommon.c b/drivers/bcma/driver_chipcommon.c index 921ce1834673..b4f6520e74f0 100644 --- a/drivers/bcma/driver_chipcommon.c +++ b/drivers/bcma/driver_chipcommon.c @@ -36,12 +36,31 @@ u32 bcma_chipco_get_alp_clock(struct bcma_drv_cc *cc) } EXPORT_SYMBOL_GPL(bcma_chipco_get_alp_clock); +static bool bcma_core_cc_has_pmu_watchdog(struct bcma_drv_cc *cc) +{ + struct bcma_bus *bus = cc->core->bus; + + if (cc->capabilities & BCMA_CC_CAP_PMU) { + if (bus->chipinfo.id == BCMA_CHIP_ID_BCM53573) { + WARN(bus->chipinfo.rev <= 1, "No watchdog available\n"); + /* 53573B0 and 53573B1 have bugged PMU watchdog. It can + * be enabled but timer can't be bumped. Use CC one + * instead. + */ + return false; + } + return true; + } else { + return false; + } +} + static u32 bcma_chipco_watchdog_get_max_timer(struct bcma_drv_cc *cc) { struct bcma_bus *bus = cc->core->bus; u32 nb; - if (cc->capabilities & BCMA_CC_CAP_PMU) { + if (bcma_core_cc_has_pmu_watchdog(cc)) { if (bus->chipinfo.id == BCMA_CHIP_ID_BCM4706) nb = 32; else if (cc->core->id.rev < 26) @@ -95,9 +114,16 @@ static int bcma_chipco_watchdog_ticks_per_ms(struct bcma_drv_cc *cc) int bcma_chipco_watchdog_register(struct bcma_drv_cc *cc) { + struct bcma_bus *bus = cc->core->bus; struct bcm47xx_wdt wdt = {}; struct platform_device *pdev; + if (bus->chipinfo.id == BCMA_CHIP_ID_BCM53573 && + bus->chipinfo.rev <= 1) { + pr_debug("No watchdog on 53573A0 / 53573A1\n"); + return 0; + } + wdt.driver_data = cc; wdt.timer_set = bcma_chipco_watchdog_timer_set_wdt; wdt.timer_set_ms = bcma_chipco_watchdog_timer_set_ms_wdt; @@ -105,7 +131,7 @@ int bcma_chipco_watchdog_register(struct bcma_drv_cc *cc) bcma_chipco_watchdog_get_max_timer(cc) / cc->ticks_per_ms; pdev = platform_device_register_data(NULL, "bcm47xx-wdt", - cc->core->bus->num, &wdt, + bus->num, &wdt, sizeof(wdt)); if (IS_ERR(pdev)) return PTR_ERR(pdev); @@ -217,7 +243,7 @@ u32 bcma_chipco_watchdog_timer_set(struct bcma_drv_cc *cc, u32 ticks) u32 maxt; maxt = bcma_chipco_watchdog_get_max_timer(cc); - if (cc->capabilities & BCMA_CC_CAP_PMU) { + if (bcma_core_cc_has_pmu_watchdog(cc)) { if (ticks == 1) ticks = 2; else if (ticks > maxt) diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h index 3db25df396cb..8eeedb2db924 100644 --- a/include/linux/bcma/bcma.h +++ b/include/linux/bcma/bcma.h @@ -205,6 +205,9 @@ struct bcma_host_ops { #define BCMA_PKG_ID_BCM4709 0 #define BCMA_CHIP_ID_BCM47094 53030 #define BCMA_CHIP_ID_BCM53018 53018 +#define BCMA_CHIP_ID_BCM53573 53573 +#define BCMA_PKG_ID_BCM53573 0 +#define BCMA_PKG_ID_BCM47189 1 /* Board types (on PCI usually equals to the subsystem dev id) */ /* BCM4313 */ From 2f69e67058fbe9750a4f66ea30b4b6a8648a2fdc Mon Sep 17 00:00:00 2001 From: Heinrich Schuchardt Date: Sun, 31 Jul 2016 12:39:15 +0200 Subject: [PATCH 0698/1715] mwifiex: remove superfluous condition for_each_property_of_node is only executed if the property prop is not NULL. Signed-off-by: Heinrich Schuchardt Acked-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/sta_cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/marvell/mwifiex/sta_cmd.c b/drivers/net/wireless/marvell/mwifiex/sta_cmd.c index 108c11cc0c16..d1f8011080bc 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_cmd.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_cmd.c @@ -1485,7 +1485,7 @@ int mwifiex_dnld_dt_cfgdata(struct mwifiex_private *priv, continue; /* property header is 6 bytes, data must fit in cmd buffer */ - if (prop && prop->value && prop->length > 6 && + if (prop->value && prop->length > 6 && prop->length <= MWIFIEX_SIZE_OF_CMD_BUFFER - S_DS_GEN) { ret = mwifiex_send_cmd(priv, HostCmd_CMD_CFG_DATA, HostCmd_ACT_GEN_SET, 0, From b0d80f19c14fc5752e806860fe2c702448f5b442 Mon Sep 17 00:00:00 2001 From: Heinrich Schuchardt Date: Sun, 31 Jul 2016 14:11:21 +0200 Subject: [PATCH 0699/1715] mwifiex: key_material_v2 remove superfluous condition We are using mac as source address in a memcpy. In the lines below we can assume mac is not NULL. Signed-off-by: Heinrich Schuchardt Acked-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/sta_cmd.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/sta_cmd.c b/drivers/net/wireless/marvell/mwifiex/sta_cmd.c index d1f8011080bc..1d8f2844a878 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_cmd.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_cmd.c @@ -706,15 +706,10 @@ mwifiex_cmd_802_11_key_material_v2(struct mwifiex_private *priv, (priv->wep_key_curr_index & KEY_INDEX_MASK)) key_info |= KEY_DEFAULT; } else { - if (mac) { - if (is_broadcast_ether_addr(mac)) - key_info |= KEY_MCAST; - else - key_info |= KEY_UNICAST | - KEY_DEFAULT; - } else { + if (is_broadcast_ether_addr(mac)) key_info |= KEY_MCAST; - } + else + key_info |= KEY_UNICAST | KEY_DEFAULT; } } km->key_param_set.key_info = cpu_to_le16(key_info); From bd6b0242652a8a284eed2adec5ac1fc043b8f2ef Mon Sep 17 00:00:00 2001 From: Pavel Andrianov Date: Tue, 2 Aug 2016 12:41:53 +0300 Subject: [PATCH 0700/1715] wl3501_cs: Add spinlock to wl3501_reset Likely wl3501_reset should acquire spinlock as wl3501_{open, close}. One of calls of wl3501_reset has been already protected. The others were unprotected and might lead to a race condition. The patch adds spinlock into the wl3501_reset and removes it from wl3501_tx_timeout. Found by Linux Driver Verification project (linuxtesting.org) Signed-off-by: Pavel Andrianov Acked-by: Vaishali Thakkar Signed-off-by: Kalle Valo --- drivers/net/wireless/wl3501_cs.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/wl3501_cs.c b/drivers/net/wireless/wl3501_cs.c index 82d94f83b6b4..932f3f81e8cf 100644 --- a/drivers/net/wireless/wl3501_cs.c +++ b/drivers/net/wireless/wl3501_cs.c @@ -1258,7 +1258,9 @@ static int wl3501_reset(struct net_device *dev) { struct wl3501_card *this = netdev_priv(dev); int rc = -ENODEV; + unsigned long flags; + spin_lock_irqsave(&this->lock, flags); wl3501_block_interrupt(this); if (wl3501_init_firmware(this)) { @@ -1280,20 +1282,17 @@ static int wl3501_reset(struct net_device *dev) pr_debug("%s: device reset", dev->name); rc = 0; out: + spin_unlock_irqrestore(&this->lock, flags); return rc; } static void wl3501_tx_timeout(struct net_device *dev) { - struct wl3501_card *this = netdev_priv(dev); struct net_device_stats *stats = &dev->stats; - unsigned long flags; int rc; stats->tx_errors++; - spin_lock_irqsave(&this->lock, flags); rc = wl3501_reset(dev); - spin_unlock_irqrestore(&this->lock, flags); if (rc) printk(KERN_ERR "%s: Error %d resetting card on Tx timeout!\n", dev->name, rc); From af8a9a67c3466f70ab28a112d18eb5d327d40ca2 Mon Sep 17 00:00:00 2001 From: Sergey Ryazanov Date: Tue, 2 Aug 2016 14:19:28 +0300 Subject: [PATCH 0701/1715] ath5k: fix EEPROM dumping via debugfs EEPROM size calculated in 16-bit words, so we should take into account this fact during buffer allocation. CC: Jiri Slaby CC: Nick Kossifidis CC: Luis R. Rodriguez Signed-off-by: Sergey Ryazanov Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath5k/debug.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath5k/debug.c b/drivers/net/wireless/ath/ath5k/debug.c index 929d7ccc031c..4f8d9ed04f5e 100644 --- a/drivers/net/wireless/ath/ath5k/debug.c +++ b/drivers/net/wireless/ath/ath5k/debug.c @@ -909,7 +909,7 @@ static int open_file_eeprom(struct inode *inode, struct file *file) struct ath5k_hw *ah = inode->i_private; bool res; int i, ret; - u32 eesize; + u32 eesize; /* NB: in 16-bit words */ u16 val, *buf; /* Get eeprom size */ @@ -932,7 +932,7 @@ static int open_file_eeprom(struct inode *inode, struct file *file) /* Create buffer and read in eeprom */ - buf = vmalloc(eesize); + buf = vmalloc(eesize * 2); if (!buf) { ret = -ENOMEM; goto err; @@ -952,7 +952,7 @@ static int open_file_eeprom(struct inode *inode, struct file *file) } ep->buf = buf; - ep->len = i; + ep->len = eesize * 2; file->private_data = (void *)ep; From f898005ff99f348febba88dff8840df6e4367758 Mon Sep 17 00:00:00 2001 From: Heinrich Schuchardt Date: Tue, 2 Aug 2016 21:26:21 +0200 Subject: [PATCH 0702/1715] rtlwifi: remove superfluous condition If sta == NULL, the changed line will not be reached. So no need to check that sta != NULL here. Signed-off-by: Heinrich Schuchardt Acked-by: Larry Finger Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c index 41f77f8a309e..7aee5ebb147d 100644 --- a/drivers/net/wireless/realtek/rtlwifi/core.c +++ b/drivers/net/wireless/realtek/rtlwifi/core.c @@ -1135,7 +1135,7 @@ static void rtl_op_bss_info_changed(struct ieee80211_hw *hw, mac->mode = WIRELESS_MODE_AC_24G; } - if (vif->type == NL80211_IFTYPE_STATION && sta) + if (vif->type == NL80211_IFTYPE_STATION) rtlpriv->cfg->ops->update_rate_tbl(hw, sta, 0); rcu_read_unlock(); From ba852018d493c99d3183fdcc7e41b725f2ec1321 Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Mon, 8 Aug 2016 09:38:48 +0200 Subject: [PATCH 0703/1715] mwifiex: fix the length parameter of a memset In 'mwifiex_get_ver_ext', we have: struct mwifiex_ver_ext ver_ext; memset(&ver_ext, 0, sizeof(struct host_cmd_ds_version_ext)); This is likely that memset'ing sizeof(struct mwifiex_ver_ext) was expected. Remove the ambiguity by using the variable name directly instead of its type. Signed-off-by: Christophe JAILLET Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/sta_ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c b/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c index e06647a327b6..78819e8018ab 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c @@ -1180,7 +1180,7 @@ mwifiex_get_ver_ext(struct mwifiex_private *priv, u32 version_str_sel) { struct mwifiex_ver_ext ver_ext; - memset(&ver_ext, 0, sizeof(struct host_cmd_ds_version_ext)); + memset(&ver_ext, 0, sizeof(ver_ext)); ver_ext.version_str_sel = version_str_sel; if (mwifiex_send_cmd(priv, HostCmd_CMD_VERSION_EXT, HostCmd_ACT_GEN_GET, 0, &ver_ext, true)) From 6a1622000ac92244ae605e2345c348c0bf281122 Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Mon, 8 Aug 2016 09:39:00 +0200 Subject: [PATCH 0704/1715] mwifiex: simplify length computation for some memset This patch should be a no-op. It just simplifies code by using the name of a variable instead of its type when calling 'sizeof'. Signed-off-by: Christophe JAILLET Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/sta_ioctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c b/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c index 78819e8018ab..644f3a248741 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c @@ -574,7 +574,7 @@ int mwifiex_enable_hs(struct mwifiex_adapter *adapter) adapter->hs_activate_wait_q_woken = false; - memset(&hscfg, 0, sizeof(struct mwifiex_ds_hs_cfg)); + memset(&hscfg, 0, sizeof(hscfg)); hscfg.is_invoke_hostcmd = true; adapter->hs_enabling = true; @@ -1138,7 +1138,7 @@ int mwifiex_set_encode(struct mwifiex_private *priv, struct key_params *kp, { struct mwifiex_ds_encrypt_key encrypt_key; - memset(&encrypt_key, 0, sizeof(struct mwifiex_ds_encrypt_key)); + memset(&encrypt_key, 0, sizeof(encrypt_key)); encrypt_key.key_len = key_len; encrypt_key.key_index = key_index; From b64db1b252e9974a43a51ba083fa7d03e4716167 Mon Sep 17 00:00:00 2001 From: Xinming Hu Date: Tue, 9 Aug 2016 20:20:44 +0530 Subject: [PATCH 0705/1715] mwifiex: correct aid value during tdls setup AID gets updated during TDLS setup, but modified value isn't reflected in "priv->assoc_rsp_buf". This causes TDLS setup failure. The problem is fixed here. Fixes: 4aff53ef18e4a4 ("mwifiex: parsing aid while receiving..") Signed-off-by: Xinming Hu Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/join.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/join.c b/drivers/net/wireless/marvell/mwifiex/join.c index 1c7b00630b90..b89596c18b41 100644 --- a/drivers/net/wireless/marvell/mwifiex/join.c +++ b/drivers/net/wireless/marvell/mwifiex/join.c @@ -669,9 +669,8 @@ int mwifiex_ret_802_11_associate(struct mwifiex_private *priv, priv->assoc_rsp_size = min(le16_to_cpu(resp->size) - S_DS_GEN, sizeof(priv->assoc_rsp_buf)); - memcpy(priv->assoc_rsp_buf, &resp->params, priv->assoc_rsp_size); - assoc_rsp->a_id = cpu_to_le16(aid); + memcpy(priv->assoc_rsp_buf, &resp->params, priv->assoc_rsp_size); if (status_code) { priv->adapter->dbg.num_cmd_assoc_failure++; From 41960b4dfdfce7d669dbec6a492202d1b18accb7 Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Tue, 9 Aug 2016 20:20:45 +0530 Subject: [PATCH 0706/1715] mwifiex: add CHAN_REGION_CFG command This patch adds command preparation and response handling for CHAN_REGION_CFG command. These changes are prerequisites for adding custom regulatory domain support. Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/fw.h | 7 ++++ .../net/wireless/marvell/mwifiex/sta_cmd.c | 21 ++++++++++ .../wireless/marvell/mwifiex/sta_cmdresp.c | 41 +++++++++++++++++++ 3 files changed, 69 insertions(+) diff --git a/drivers/net/wireless/marvell/mwifiex/fw.h b/drivers/net/wireless/marvell/mwifiex/fw.h index a88030a747b3..085db9921c38 100644 --- a/drivers/net/wireless/marvell/mwifiex/fw.h +++ b/drivers/net/wireless/marvell/mwifiex/fw.h @@ -190,6 +190,7 @@ enum MWIFIEX_802_11_PRIVACY_FILTER { #define TLV_BTCOEX_WL_SCANTIME (PROPRIETARY_TLV_BASE_ID + 203) #define TLV_TYPE_BSS_MODE (PROPRIETARY_TLV_BASE_ID + 206) #define TLV_TYPE_RANDOM_MAC (PROPRIETARY_TLV_BASE_ID + 236) +#define TLV_TYPE_CHAN_ATTR_CFG (PROPRIETARY_TLV_BASE_ID + 237) #define MWIFIEX_TX_DATA_BUF_SIZE_2K 2048 @@ -382,6 +383,7 @@ enum MWIFIEX_802_11_PRIVACY_FILTER { #define HostCmd_CMD_MC_POLICY 0x0121 #define HostCmd_CMD_TDLS_OPER 0x0122 #define HostCmd_CMD_SDIO_SP_RX_AGGR_CFG 0x0223 +#define HostCmd_CMD_CHAN_REGION_CFG 0x0242 #define PROTOCOL_NO_SECURITY 0x01 #define PROTOCOL_STATIC_WEP 0x02 @@ -2224,6 +2226,10 @@ struct host_cmd_ds_gtk_rekey_params { __le32 replay_ctr_high; } __packed; +struct host_cmd_ds_chan_region_cfg { + __le16 action; +} __packed; + struct host_cmd_ds_command { __le16 command; __le16 size; @@ -2298,6 +2304,7 @@ struct host_cmd_ds_command { struct host_cmd_ds_robust_coex coex; struct host_cmd_ds_wakeup_reason hs_wakeup_reason; struct host_cmd_ds_gtk_rekey_params rekey; + struct host_cmd_ds_chan_region_cfg reg_cfg; } params; } __packed; diff --git a/drivers/net/wireless/marvell/mwifiex/sta_cmd.c b/drivers/net/wireless/marvell/mwifiex/sta_cmd.c index 1d8f2844a878..49048b41fd36 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_cmd.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_cmd.c @@ -1594,6 +1594,21 @@ static int mwifiex_cmd_gtk_rekey_offload(struct mwifiex_private *priv, return 0; } +static int mwifiex_cmd_chan_region_cfg(struct mwifiex_private *priv, + struct host_cmd_ds_command *cmd, + u16 cmd_action) +{ + struct host_cmd_ds_chan_region_cfg *reg = &cmd->params.reg_cfg; + + cmd->command = cpu_to_le16(HostCmd_CMD_CHAN_REGION_CFG); + cmd->size = cpu_to_le16(sizeof(*reg) + S_DS_GEN); + + if (cmd_action == HostCmd_ACT_GEN_GET) + reg->action = cpu_to_le16(cmd_action); + + return 0; +} + static int mwifiex_cmd_coalesce_cfg(struct mwifiex_private *priv, struct host_cmd_ds_command *cmd, @@ -2134,6 +2149,9 @@ int mwifiex_sta_prepare_cmd(struct mwifiex_private *priv, uint16_t cmd_no, ret = mwifiex_cmd_gtk_rekey_offload(priv, cmd_ptr, cmd_action, data_buf); break; + case HostCmd_CMD_CHAN_REGION_CFG: + ret = mwifiex_cmd_chan_region_cfg(priv, cmd_ptr, cmd_action); + break; default: mwifiex_dbg(priv->adapter, ERROR, "PREP_CMD: unknown cmd- %#x\n", cmd_no); @@ -2271,6 +2289,9 @@ int mwifiex_sta_init_cmd(struct mwifiex_private *priv, u8 first_sta, bool init) if (ret) return -1; } + + mwifiex_send_cmd(priv, HostCmd_CMD_CHAN_REGION_CFG, + HostCmd_ACT_GEN_GET, 0, NULL, true); } /* get tx rate */ diff --git a/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c b/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c index 90e191bf1343..db85330ffb20 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c @@ -1022,6 +1022,44 @@ static int mwifiex_ret_robust_coex(struct mwifiex_private *priv, return 0; } +static int mwifiex_ret_chan_region_cfg(struct mwifiex_private *priv, + struct host_cmd_ds_command *resp) +{ + struct host_cmd_ds_chan_region_cfg *reg = &resp->params.reg_cfg; + u16 action = le16_to_cpu(reg->action); + u16 tlv, tlv_buf_len, tlv_buf_left; + struct mwifiex_ie_types_header *head; + u8 *tlv_buf; + + if (action != HostCmd_ACT_GEN_GET) + return 0; + + tlv_buf = (u8 *)reg + sizeof(*reg); + tlv_buf_left = le16_to_cpu(resp->size) - S_DS_GEN - sizeof(*reg); + + while (tlv_buf_left >= sizeof(*head)) { + head = (struct mwifiex_ie_types_header *)tlv_buf; + tlv = le16_to_cpu(head->type); + tlv_buf_len = le16_to_cpu(head->len); + + if (tlv_buf_left < (sizeof(*head) + tlv_buf_len)) + break; + + switch (tlv) { + case TLV_TYPE_CHAN_ATTR_CFG: + mwifiex_dbg_dump(priv->adapter, CMD_D, "CHAN:", + (u8 *)head + sizeof(*head), + tlv_buf_len); + break; + } + + tlv_buf += (sizeof(*head) + tlv_buf_len); + tlv_buf_left -= (sizeof(*head) + tlv_buf_len); + } + + return 0; +} + /* * This function handles the command responses. * @@ -1239,6 +1277,9 @@ int mwifiex_process_sta_cmdresp(struct mwifiex_private *priv, u16 cmdresp_no, break; case HostCmd_CMD_GTK_REKEY_OFFLOAD_CFG: break; + case HostCmd_CMD_CHAN_REGION_CFG: + ret = mwifiex_ret_chan_region_cfg(priv, resp); + break; default: mwifiex_dbg(adapter, ERROR, "CMD_RESP: unknown cmd response %#x\n", From 72539799104d4d70c2afcb8f0fe2a7a507a41c81 Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Tue, 9 Aug 2016 20:20:46 +0530 Subject: [PATCH 0707/1715] mwifiex: add custom regulatory domain support This patch creates custom regulatory rules based on the information received from firmware and enable them during wiphy registration. Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- .../net/wireless/marvell/mwifiex/cfg80211.c | 39 +++++--- drivers/net/wireless/marvell/mwifiex/fw.h | 8 ++ drivers/net/wireless/marvell/mwifiex/main.c | 2 + drivers/net/wireless/marvell/mwifiex/main.h | 1 + .../wireless/marvell/mwifiex/sta_cmdresp.c | 91 +++++++++++++++++++ 5 files changed, 127 insertions(+), 14 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index 235fb3931e0c..876d420e936e 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -4141,9 +4141,12 @@ int mwifiex_register_cfg80211(struct mwifiex_adapter *adapter) wiphy->cipher_suites = mwifiex_cipher_suites; wiphy->n_cipher_suites = ARRAY_SIZE(mwifiex_cipher_suites); - if (adapter->region_code) - wiphy->regulatory_flags |= REGULATORY_DISABLE_BEACON_HINTS | + if (adapter->regd) { + wiphy->regulatory_flags |= REGULATORY_CUSTOM_REG | + REGULATORY_DISABLE_BEACON_HINTS | REGULATORY_COUNTRY_IE_IGNORE; + wiphy_apply_custom_regulatory(wiphy, adapter->regd); + } ether_addr_copy(wiphy->perm_addr, adapter->perm_addr); wiphy->signal_type = CFG80211_SIGNAL_TYPE_MBM; @@ -4209,19 +4212,27 @@ int mwifiex_register_cfg80211(struct mwifiex_adapter *adapter) return ret; } - if (reg_alpha2 && mwifiex_is_valid_alpha2(reg_alpha2)) { - mwifiex_dbg(adapter, INFO, - "driver hint alpha2: %2.2s\n", reg_alpha2); - regulatory_hint(wiphy, reg_alpha2); - } else { - if (adapter->region_code == 0x00) { - mwifiex_dbg(adapter, WARN, "Ignore world regulatory domain\n"); + if (!adapter->regd) { + if (reg_alpha2 && mwifiex_is_valid_alpha2(reg_alpha2)) { + mwifiex_dbg(adapter, INFO, + "driver hint alpha2: %2.2s\n", reg_alpha2); + regulatory_hint(wiphy, reg_alpha2); } else { - country_code = - mwifiex_11d_code_2_region(adapter->region_code); - if (country_code && - regulatory_hint(wiphy, country_code)) - mwifiex_dbg(priv->adapter, ERROR, "regulatory_hint() failed\n"); + if (adapter->region_code == 0x00) { + mwifiex_dbg(adapter, WARN, + "Ignore world regulatory domain\n"); + } else { + wiphy->regulatory_flags |= + REGULATORY_DISABLE_BEACON_HINTS | + REGULATORY_COUNTRY_IE_IGNORE; + country_code = + mwifiex_11d_code_2_region( + adapter->region_code); + if (country_code && + regulatory_hint(wiphy, country_code)) + mwifiex_dbg(priv->adapter, ERROR, + "regulatory_hint() failed\n"); + } } } diff --git a/drivers/net/wireless/marvell/mwifiex/fw.h b/drivers/net/wireless/marvell/mwifiex/fw.h index 085db9921c38..18aa5256e88b 100644 --- a/drivers/net/wireless/marvell/mwifiex/fw.h +++ b/drivers/net/wireless/marvell/mwifiex/fw.h @@ -416,6 +416,14 @@ enum P2P_MODES { P2P_MODE_CLIENT = 3, }; +enum mwifiex_channel_flags { + MWIFIEX_CHANNEL_PASSIVE = BIT(0), + MWIFIEX_CHANNEL_DFS = BIT(1), + MWIFIEX_CHANNEL_NOHT40 = BIT(2), + MWIFIEX_CHANNEL_NOHT80 = BIT(3), + MWIFIEX_CHANNEL_DISABLED = BIT(7), +}; + #define HostCmd_RET_BIT 0x8000 #define HostCmd_ACT_GEN_GET 0x0000 #define HostCmd_ACT_GEN_SET 0x0001 diff --git a/drivers/net/wireless/marvell/mwifiex/main.c b/drivers/net/wireless/marvell/mwifiex/main.c index db4925db39aa..51d4dfcf1214 100644 --- a/drivers/net/wireless/marvell/mwifiex/main.c +++ b/drivers/net/wireless/marvell/mwifiex/main.c @@ -139,6 +139,8 @@ static int mwifiex_unregister(struct mwifiex_adapter *adapter) adapter->nd_info = NULL; } + kfree(adapter->regd); + vfree(adapter->chan_stats); kfree(adapter); return 0; diff --git a/drivers/net/wireless/marvell/mwifiex/main.h b/drivers/net/wireless/marvell/mwifiex/main.h index 59026005e4d7..cd9a4f152530 100644 --- a/drivers/net/wireless/marvell/mwifiex/main.h +++ b/drivers/net/wireless/marvell/mwifiex/main.h @@ -1005,6 +1005,7 @@ struct mwifiex_adapter { bool usb_mc_status; bool usb_mc_setup; struct cfg80211_wowlan_nd_info *nd_info; + struct ieee80211_regdomain *regd; }; void mwifiex_process_tx_queue(struct mwifiex_adapter *adapter); diff --git a/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c b/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c index db85330ffb20..3344a26bed03 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c @@ -1022,6 +1022,93 @@ static int mwifiex_ret_robust_coex(struct mwifiex_private *priv, return 0; } +static struct ieee80211_regdomain * +mwifiex_create_custom_regdomain(struct mwifiex_private *priv, + u8 *buf, u16 buf_len) +{ + u16 num_chan = buf_len / 2; + struct ieee80211_regdomain *regd; + struct ieee80211_reg_rule *rule; + bool new_rule; + int regd_size, idx, freq, prev_freq = 0; + u32 bw, prev_bw = 0; + u8 chflags, prev_chflags = 0, valid_rules = 0; + + if (WARN_ON_ONCE(num_chan > NL80211_MAX_SUPP_REG_RULES)) + return ERR_PTR(-EINVAL); + + regd_size = sizeof(struct ieee80211_regdomain) + + num_chan * sizeof(struct ieee80211_reg_rule); + + regd = kzalloc(regd_size, GFP_KERNEL); + if (!regd) + return ERR_PTR(-ENOMEM); + + for (idx = 0; idx < num_chan; idx++) { + u8 chan; + enum nl80211_band band; + + chan = *buf++; + if (!chan) + return NULL; + chflags = *buf++; + band = (chan <= 14) ? NL80211_BAND_2GHZ : NL80211_BAND_5GHZ; + freq = ieee80211_channel_to_frequency(chan, band); + new_rule = false; + + if (chflags & MWIFIEX_CHANNEL_DISABLED) + continue; + + if (band == NL80211_BAND_5GHZ) { + if (!(chflags & MWIFIEX_CHANNEL_NOHT80)) + bw = MHZ_TO_KHZ(80); + else if (!(chflags & MWIFIEX_CHANNEL_NOHT40)) + bw = MHZ_TO_KHZ(40); + else + bw = MHZ_TO_KHZ(20); + } else { + if (!(chflags & MWIFIEX_CHANNEL_NOHT40)) + bw = MHZ_TO_KHZ(40); + else + bw = MHZ_TO_KHZ(20); + } + + if (idx == 0 || prev_chflags != chflags || prev_bw != bw || + freq - prev_freq > 20) { + valid_rules++; + new_rule = true; + } + + rule = ®d->reg_rules[valid_rules - 1]; + + rule->freq_range.end_freq_khz = MHZ_TO_KHZ(freq + 10); + + prev_chflags = chflags; + prev_freq = freq; + prev_bw = bw; + + if (!new_rule) + continue; + + rule->freq_range.start_freq_khz = MHZ_TO_KHZ(freq - 10); + rule->power_rule.max_eirp = DBM_TO_MBM(19); + + if (chflags & MWIFIEX_CHANNEL_PASSIVE) + rule->flags = NL80211_RRF_NO_IR; + + if (chflags & MWIFIEX_CHANNEL_DFS) + rule->flags = NL80211_RRF_DFS; + + rule->freq_range.max_bandwidth_khz = bw; + } + + regd->n_reg_rules = valid_rules; + regd->alpha2[0] = '9'; + regd->alpha2[1] = '9'; + + return regd; +} + static int mwifiex_ret_chan_region_cfg(struct mwifiex_private *priv, struct host_cmd_ds_command *resp) { @@ -1050,6 +1137,10 @@ static int mwifiex_ret_chan_region_cfg(struct mwifiex_private *priv, mwifiex_dbg_dump(priv->adapter, CMD_D, "CHAN:", (u8 *)head + sizeof(*head), tlv_buf_len); + priv->adapter->regd = + mwifiex_create_custom_regdomain(priv, + (u8 *)head + + sizeof(*head), tlv_buf_len); break; } From 410280bac6224e066f4cf0b87db08f0418a135b6 Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Thu, 11 Aug 2016 16:38:54 +0200 Subject: [PATCH 0708/1715] rt2x00usb: Fix error return code We know that 'retval = 0' because it has been tested a few lines above. So, if 'devm_kmalloc' fails, 0 will be returned instead of an error code. Return -ENOMEM instead. Fixes: 8b4c0009313f ("rt2x00usb: Use usb anchor to manage URB") Signed-off-by: Christophe JAILLET Acked-by: Stanislaw Gruszka Signed-off-by: Kalle Valo --- drivers/net/wireless/ralink/rt2x00/rt2x00usb.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c b/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c index 7cf26c6124d1..6005e14213ca 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c @@ -831,8 +831,10 @@ int rt2x00usb_probe(struct usb_interface *usb_intf, rt2x00dev->anchor = devm_kmalloc(&usb_dev->dev, sizeof(struct usb_anchor), GFP_KERNEL); - if (!rt2x00dev->anchor) + if (!rt2x00dev->anchor) { + retval = -ENOMEM; goto exit_free_reg; + } init_usb_anchor(rt2x00dev->anchor); return 0; From d393be3ed0bebb30a4666d7f5ed4486cd6b31716 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 16 Aug 2016 13:10:13 +0100 Subject: [PATCH 0709/1715] mwifiex: fix missing break on IEEE80211_STYPE_ACTION case The IEEE80211_STYPE_ACTION case is missing a break in the switch statement, causing it to fall through to the default case that reports a debug message about an unknown frame subtype. Fix this by adding in the missing break statement. Signed-off-by: Colin Ian King Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/util.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/marvell/mwifiex/util.c b/drivers/net/wireless/marvell/mwifiex/util.c index 6681be0511c7..18fbb96a46e9 100644 --- a/drivers/net/wireless/marvell/mwifiex/util.c +++ b/drivers/net/wireless/marvell/mwifiex/util.c @@ -386,6 +386,7 @@ mwifiex_parse_mgmt_packet(struct mwifiex_private *priv, u8 *payload, u16 len, "unknown public action frame category %d\n", category); } + break; default: mwifiex_dbg(priv->adapter, INFO, "unknown mgmt frame subtype %#x\n", stype); From 8af92af3f2d55db143417a5d401696f4b642009a Mon Sep 17 00:00:00 2001 From: Baoyou Xie Date: Mon, 29 Aug 2016 20:39:35 +0800 Subject: [PATCH 0710/1715] brcmfmac: add missing header dependencies We get 1 warning when building kernel with W=1: drivers/net/wireless/broadcom/brcm80211/brcmfmac/tracepoint.c:23:6: warning: no previous prototype for '__brcmf_err' [-Wmissing-prototypes] In fact, this function is declared in brcmfmac/debug.h, so this patch adds missing header dependencies. Signed-off-by: Baoyou Xie Acked-by: Arnd Bergmann Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/tracepoint.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/tracepoint.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/tracepoint.c index a10f35c5eb3d..fe6755944b7b 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/tracepoint.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/tracepoint.c @@ -19,6 +19,7 @@ #ifndef __CHECKER__ #define CREATE_TRACE_POINTS #include "tracepoint.h" +#include "debug.h" void __brcmf_err(const char *func, const char *fmt, ...) { From a0c7858e74793242733a09a3e34356f434bc1571 Mon Sep 17 00:00:00 2001 From: Christian Engelmayer Date: Tue, 9 Aug 2016 21:19:57 +0200 Subject: [PATCH 0711/1715] rtlwifi: rtl8192de: Fix leak in _rtl92de_read_adapter_info() In case rtl_get_hwinfo() fails, the function directly returns and leaks the already allocated hwinfo memory. Go through the correct exit path. Signed-off-by: Christian Engelmayer Acked-by: Larry Finger Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c index b0f632462335..57205514801c 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c @@ -1757,7 +1757,7 @@ static void _rtl92de_read_adapter_info(struct ieee80211_hw *hw) return; if (rtl_get_hwinfo(hw, rtlpriv, HWSET_MAX_SIZE, hwinfo, params)) - return; + goto exit; _rtl92de_efuse_update_chip_version(hw); _rtl92de_read_macphymode_and_bandtype(hw, hwinfo); @@ -1790,6 +1790,7 @@ static void _rtl92de_read_adapter_info(struct ieee80211_hw *hw) break; } rtlefuse->txpwr_fromeprom = true; +exit: kfree(hwinfo); } From 3eeacaa902a31bdf06bc53f23087dcb1c5f260d6 Mon Sep 17 00:00:00 2001 From: Christian Engelmayer Date: Tue, 9 Aug 2016 21:54:12 +0200 Subject: [PATCH 0712/1715] rtlwifi: rtl8723ae: Fix leak in _rtl8723e_read_adapter_info() In case of (rtlhal->oem_id != RT_CID_DEFAULT), the function directly returns and leaks the already allocated hwinfo memory. Go through the correct exit path. Signed-off-by: Christian Engelmayer Acked-by: Larry Finger Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c index b88c7ee72dbf..ba30efc2d195 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c @@ -1654,7 +1654,7 @@ static void _rtl8723e_read_adapter_info(struct ieee80211_hw *hw, rtlefuse->autoload_failflag, hwinfo); if (rtlhal->oem_id != RT_CID_DEFAULT) - return; + goto exit; switch (rtlefuse->eeprom_oemid) { case EEPROM_CID_DEFAULT: From d8c872b57e0f71b5eccb83b06898d678c7f3353f Mon Sep 17 00:00:00 2001 From: Maxim Altshul Date: Mon, 15 Aug 2016 11:23:38 +0300 Subject: [PATCH 0713/1715] wlcore: Remove wl pointer from wl_sta structure This field was added to wl_sta struct to get hw in situations where it was not given to driver by mac80211. In our case, get_expected_throughput op did not send hw to driver. This patch reverts the change, as it is no longer needed due to commit 4fdbc67a25ce ("mac80211: call get_expected_throughput only after adding station") as hw is now sent as a parameter. Signed-off-by: Maxim Altshul Signed-off-by: Kalle Valo --- drivers/net/wireless/ti/wlcore/main.c | 1 - drivers/net/wireless/ti/wlcore/wlcore_i.h | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c index 9e1f2d9c9865..ef6c15b952cc 100644 --- a/drivers/net/wireless/ti/wlcore/main.c +++ b/drivers/net/wireless/ti/wlcore/main.c @@ -4986,7 +4986,6 @@ static int wl12xx_sta_add(struct wl1271 *wl, return ret; wl_sta = (struct wl1271_station *)sta->drv_priv; - wl_sta->wl = wl; hlid = wl_sta->hlid; ret = wl12xx_cmd_add_peer(wl, wlvif, sta, hlid); diff --git a/drivers/net/wireless/ti/wlcore/wlcore_i.h b/drivers/net/wireless/ti/wlcore/wlcore_i.h index 242b4e37b94c..0277ae508b8a 100644 --- a/drivers/net/wireless/ti/wlcore/wlcore_i.h +++ b/drivers/net/wireless/ti/wlcore/wlcore_i.h @@ -347,7 +347,6 @@ struct wl1271_station { * Used in both AP and STA mode. */ u64 total_freed_pkts; - struct wl1271 *wl; }; struct wl12xx_vif { From b81669b9e0b4864f59421e7681512731fae01ab9 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Fri, 19 Aug 2016 17:46:23 -0400 Subject: [PATCH 0714/1715] rtl8xxxu: Mark 0x20f4:0x648b as tested Successfully tested by Jocelyn Mayer Reported-by: J. Mayer Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index 77048db3b32a..47d0868cc463 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -5971,6 +5971,10 @@ static int rtl8xxxu_probe(struct usb_interface *interface, if (id->idProduct == 0x1004) untested = 0; break; + case 0x20f4: + if (id->idProduct == 0x648b) + untested = 0; + break; default: break; } @@ -6140,6 +6144,9 @@ static struct usb_device_id dev_table[] = { /* Tested by Andrea Merello */ {USB_DEVICE_AND_INTERFACE_INFO(0x050d, 0x1004, 0xff, 0xff, 0xff), .driver_info = (unsigned long)&rtl8192cu_fops}, +/* Tested by Jocelyn Mayer */ +{USB_DEVICE_AND_INTERFACE_INFO(0x20f4, 0x648b, 0xff, 0xff, 0xff), + .driver_info = (unsigned long)&rtl8192cu_fops}, /* Currently untested 8188 series devices */ {USB_DEVICE_AND_INTERFACE_INFO(USB_VENDOR_ID_REALTEK, 0x8191, 0xff, 0xff, 0xff), .driver_info = (unsigned long)&rtl8192cu_fops}, @@ -6199,8 +6206,6 @@ static struct usb_device_id dev_table[] = { .driver_info = (unsigned long)&rtl8192cu_fops}, {USB_DEVICE_AND_INTERFACE_INFO(0x2019, 0xed17, 0xff, 0xff, 0xff), .driver_info = (unsigned long)&rtl8192cu_fops}, -{USB_DEVICE_AND_INTERFACE_INFO(0x20f4, 0x648b, 0xff, 0xff, 0xff), - .driver_info = (unsigned long)&rtl8192cu_fops}, {USB_DEVICE_AND_INTERFACE_INFO(0x4855, 0x0090, 0xff, 0xff, 0xff), .driver_info = (unsigned long)&rtl8192cu_fops}, {USB_DEVICE_AND_INTERFACE_INFO(0x4856, 0x0091, 0xff, 0xff, 0xff), From 76a8e07d49b65294ed006ff1e37fa152a1bfd230 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Fri, 19 Aug 2016 17:46:24 -0400 Subject: [PATCH 0715/1715] rtl8xxxu: Mark 0x2001:0x3308 as tested D-Link DWA-121 is reported as working. Reported-by: Stefano Bravi Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index 47d0868cc463..ca4c40d4bc28 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -5975,6 +5975,10 @@ static int rtl8xxxu_probe(struct usb_interface *interface, if (id->idProduct == 0x648b) untested = 0; break; + case 0x2001: + if (id->idProduct == 0x3308) + untested = 0; + break; default: break; } @@ -6147,6 +6151,9 @@ static struct usb_device_id dev_table[] = { /* Tested by Jocelyn Mayer */ {USB_DEVICE_AND_INTERFACE_INFO(0x20f4, 0x648b, 0xff, 0xff, 0xff), .driver_info = (unsigned long)&rtl8192cu_fops}, +/* Tested by Stefano Bravi */ +{USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x3308, 0xff, 0xff, 0xff), + .driver_info = (unsigned long)&rtl8192cu_fops}, /* Currently untested 8188 series devices */ {USB_DEVICE_AND_INTERFACE_INFO(USB_VENDOR_ID_REALTEK, 0x8191, 0xff, 0xff, 0xff), .driver_info = (unsigned long)&rtl8192cu_fops}, @@ -6194,8 +6201,6 @@ static struct usb_device_id dev_table[] = { .driver_info = (unsigned long)&rtl8192cu_fops}, {USB_DEVICE_AND_INTERFACE_INFO(0x13d3, 0x3357, 0xff, 0xff, 0xff), .driver_info = (unsigned long)&rtl8192cu_fops}, -{USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x3308, 0xff, 0xff, 0xff), - .driver_info = (unsigned long)&rtl8192cu_fops}, {USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x330b, 0xff, 0xff, 0xff), .driver_info = (unsigned long)&rtl8192cu_fops}, {USB_DEVICE_AND_INTERFACE_INFO(0x2019, 0x4902, 0xff, 0xff, 0xff), From deb6176e561324884652dd6a49a862bd8b152959 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Fri, 19 Aug 2016 17:46:25 -0400 Subject: [PATCH 0716/1715] rtl8xxxu: Fix error handling if rtl8xxxu_init_device() fails For some reason we lost the code bailing if rtl8xxxu_init_device() returned an error. This catches the error and also cleans up the error handling. Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- .../wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index ca4c40d4bc28..9c6305b0a5e4 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -5947,7 +5947,7 @@ static int rtl8xxxu_probe(struct usb_interface *interface, struct ieee80211_hw *hw; struct usb_device *udev; struct ieee80211_supported_band *sband; - int ret = 0; + int ret; int untested = 1; udev = usb_get_dev(interface_to_usbdev(interface)); @@ -5995,6 +5995,7 @@ static int rtl8xxxu_probe(struct usb_interface *interface, hw = ieee80211_alloc_hw(sizeof(struct rtl8xxxu_priv), &rtl8xxxu_ops); if (!hw) { ret = -ENOMEM; + priv = NULL; goto exit; } @@ -6043,6 +6044,8 @@ static int rtl8xxxu_probe(struct usb_interface *interface, } ret = rtl8xxxu_init_device(hw); + if (ret) + goto exit; hw->wiphy->max_scan_ssids = 1; hw->wiphy->max_scan_ie_len = IEEE80211_MAX_DATA_LEN; @@ -6093,9 +6096,20 @@ static int rtl8xxxu_probe(struct usb_interface *interface, goto exit; } + return 0; + exit: - if (ret < 0) - usb_put_dev(udev); + usb_set_intfdata(interface, NULL); + + if (priv) { + kfree(priv->fw_data); + mutex_destroy(&priv->usb_buf_mutex); + mutex_destroy(&priv->h2c_mutex); + } + usb_put_dev(udev); + + ieee80211_free_hw(hw); + return ret; } From 690a6d268bdf85f8d233823a18d3200b99e5568d Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Fri, 19 Aug 2016 17:46:26 -0400 Subject: [PATCH 0717/1715] rtl8xxxu: Add TP-Link TL-WN823N v2 to list of supported devices This is an rtl8192eu based dongle (the v1 is an rtl8192cu). Reported and tested by Myckel Habets. Reported-by: Myckel Habets Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index 9c6305b0a5e4..948f153a9fee 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -5979,6 +5979,10 @@ static int rtl8xxxu_probe(struct usb_interface *interface, if (id->idProduct == 0x3308) untested = 0; break; + case 0x2357: + if (id->idProduct == 0x0109) + untested = 0; + break; default: break; } @@ -6146,6 +6150,9 @@ static struct usb_device_id dev_table[] = { .driver_info = (unsigned long)&rtl8723au_fops}, {USB_DEVICE_AND_INTERFACE_INFO(USB_VENDOR_ID_REALTEK, 0x818b, 0xff, 0xff, 0xff), .driver_info = (unsigned long)&rtl8192eu_fops}, +/* Tested by Myckel Habets */ +{USB_DEVICE_AND_INTERFACE_INFO(0x2357, 0x0109, 0xff, 0xff, 0xff), + .driver_info = (unsigned long)&rtl8192eu_fops}, {USB_DEVICE_AND_INTERFACE_INFO(USB_VENDOR_ID_REALTEK, 0xb720, 0xff, 0xff, 0xff), .driver_info = (unsigned long)&rtl8723bu_fops}, #ifdef CONFIG_RTL8XXXU_UNTESTED From 44abaa08d002235e1bbc2b9e0fd46a64e4694596 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Fri, 19 Aug 2016 17:46:27 -0400 Subject: [PATCH 0718/1715] rtl8xxxu: Add TX page defines for 8723b This switches the 8723b driver to use the new rtl8xxxu_init_queue_reserved_page() function. Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h | 6 ++++++ drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c | 4 ++++ 2 files changed, 10 insertions(+) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h index 4341d56805f8..da4f1487e292 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h @@ -43,6 +43,7 @@ #define TX_TOTAL_PAGE_NUM 0xf8 #define TX_TOTAL_PAGE_NUM_8192E 0xf3 +#define TX_TOTAL_PAGE_NUM_8723B 0xf7 /* (HPQ + LPQ + NPQ + PUBQ) = TX_TOTAL_PAGE_NUM */ #define TX_PAGE_NUM_PUBQ 0xe7 #define TX_PAGE_NUM_HI_PQ 0x0c @@ -54,6 +55,11 @@ #define TX_PAGE_NUM_LO_PQ_8192E 0x0c #define TX_PAGE_NUM_NORM_PQ_8192E 0x00 +#define TX_PAGE_NUM_PUBQ_8723B 0xe7 +#define TX_PAGE_NUM_HI_PQ_8723B 0x0c +#define TX_PAGE_NUM_LO_PQ_8723B 0x02 +#define TX_PAGE_NUM_NORM_PQ_8723B 0x02 + #define RTL_FW_PAGE_SIZE 4096 #define RTL8XXXU_FIRMWARE_POLL_MAX 1000 diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c index 9d45afb0e3fd..c1323f0851f7 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c @@ -1674,4 +1674,8 @@ struct rtl8xxxu_fileops rtl8723bu_fops = { .pbp_rx = PBP_PAGE_SIZE_256, .pbp_tx = PBP_PAGE_SIZE_256, .mactable = rtl8723b_mac_init_table, + .total_page_num = TX_TOTAL_PAGE_NUM_8723B, + .page_num_hi = TX_PAGE_NUM_HI_PQ_8723B, + .page_num_lo = TX_PAGE_NUM_LO_PQ_8723B, + .page_num_norm = TX_PAGE_NUM_NORM_PQ_8723B, }; From e366f45d36275b80a5615c68123c88a3de75cc22 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Fri, 19 Aug 2016 17:46:28 -0400 Subject: [PATCH 0719/1715] rtl8xxxu: Switch 8723a to use new rtl8xxxu_init_queue_reserved_page() routine This changes the pub-queue value written to REQ_RQPN, however the old code used a hard coded minimum value assuming there would always be an active lo-queue, even when no USB EP was found for it. Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723a.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723a.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723a.c index 686c551581b1..d6f496899be0 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723a.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723a.c @@ -396,4 +396,8 @@ struct rtl8xxxu_fileops rtl8723au_fops = { .pbp_rx = PBP_PAGE_SIZE_128, .pbp_tx = PBP_PAGE_SIZE_128, .mactable = rtl8xxxu_gen1_mac_init_table, + .total_page_num = TX_TOTAL_PAGE_NUM, + .page_num_hi = TX_PAGE_NUM_HI_PQ, + .page_num_lo = TX_PAGE_NUM_LO_PQ, + .page_num_norm = TX_PAGE_NUM_NORM_PQ, }; From b492940dc1f7372fb95930dc5bde8d7dbc560cd3 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Fri, 19 Aug 2016 17:46:29 -0400 Subject: [PATCH 0720/1715] rtl8xxxu: Switch 8192cu/8188cu devices to use rtl8xxxu_init_queue_reserved_page() This was the last user of the old rtl8xxxu_old_init_queue_reserved_page() which can now be removed. Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192c.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192c.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192c.c index 69d1a1453ede..cd13c2538f0d 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192c.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192c.c @@ -579,5 +579,9 @@ struct rtl8xxxu_fileops rtl8192cu_fops = { .pbp_rx = PBP_PAGE_SIZE_128, .pbp_tx = PBP_PAGE_SIZE_128, .mactable = rtl8xxxu_gen1_mac_init_table, + .total_page_num = TX_TOTAL_PAGE_NUM, + .page_num_hi = TX_PAGE_NUM_HI_PQ, + .page_num_lo = TX_PAGE_NUM_LO_PQ, + .page_num_norm = TX_PAGE_NUM_NORM_PQ, }; #endif From efeb8ce7a98cfb60932c6a53ce86359bf6b33b67 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Fri, 19 Aug 2016 17:46:30 -0400 Subject: [PATCH 0721/1715] rtl8xxxu: Remove now obsolete rtl8xxxu_old_init_queue_reserved_page() Switching over the old devices to use the new function allows us to get rid of this legacy. Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- .../wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 30 ++----------------- 1 file changed, 2 insertions(+), 28 deletions(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index 948f153a9fee..efe24a60e131 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -3847,28 +3847,6 @@ void rtl8xxxu_gen2_disable_rf(struct rtl8xxxu_priv *priv) rtl8xxxu_write32(priv, REG_RX_WAIT_CCA, val32); } -static void rtl8xxxu_old_init_queue_reserved_page(struct rtl8xxxu_priv *priv) -{ - u8 val8; - u32 val32; - - if (priv->ep_tx_normal_queue) - val8 = TX_PAGE_NUM_NORM_PQ; - else - val8 = 0; - - rtl8xxxu_write8(priv, REG_RQPN_NPQ, val8); - - val32 = (TX_PAGE_NUM_PUBQ << RQPN_PUB_PQ_SHIFT) | RQPN_LOAD; - - if (priv->ep_tx_high_queue) - val32 |= (TX_PAGE_NUM_HI_PQ << RQPN_HI_PQ_SHIFT); - if (priv->ep_tx_low_queue) - val32 |= (TX_PAGE_NUM_LO_PQ << RQPN_LO_PQ_SHIFT); - - rtl8xxxu_write32(priv, REG_RQPN, val32); -} - static void rtl8xxxu_init_queue_reserved_page(struct rtl8xxxu_priv *priv) { struct rtl8xxxu_fileops *fops = priv->fops; @@ -3929,12 +3907,8 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) goto exit; } - if (!macpower) { - if (priv->fops->total_page_num) - rtl8xxxu_init_queue_reserved_page(priv); - else - rtl8xxxu_old_init_queue_reserved_page(priv); - } + if (!macpower) + rtl8xxxu_init_queue_reserved_page(priv); ret = rtl8xxxu_init_queue_priority(priv); dev_dbg(dev, "%s: init_queue_priority %i\n", __func__, ret); From e02aa3eef786aab82f9929fea25f5f7e9f77ba69 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Fri, 19 Aug 2016 17:46:31 -0400 Subject: [PATCH 0722/1715] rtl8xxxu: Simplify code setting TX buffer boundary With all devices now offering fops->total_page_num, get rid of the if mess for setting the TX buffer boundary. Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index efe24a60e131..7a697c0384a6 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -3968,13 +3968,7 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) /* * Set TX buffer boundary */ - if (priv->rtl_chip == RTL8192E) - val8 = TX_TOTAL_PAGE_NUM_8192E + 1; - else - val8 = TX_TOTAL_PAGE_NUM + 1; - - if (priv->rtl_chip == RTL8723B) - val8 -= 1; + val8 = priv->fops->total_page_num + 1; rtl8xxxu_write8(priv, REG_TXPKTBUF_BCNQ_BDNY, val8); rtl8xxxu_write8(priv, REG_TXPKTBUF_MGQ_BDNY, val8); From dce7548fd9700c4c4ecda936dcf7326b9241a6d3 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Fri, 19 Aug 2016 17:46:32 -0400 Subject: [PATCH 0723/1715] rtl8xxxu: Add bit definitions for REG_FPGA0_TX_INFO This adds TX antenna selection bit defines for OFDM mode. Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h index 921c5653fff2..a3388906781f 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h @@ -780,6 +780,10 @@ #define FPGA_RF_MODE_OFDM BIT(25) #define REG_FPGA0_TX_INFO 0x0804 +#define FPGA0_TX_INFO_OFDM_PATH_A BIT(0) +#define FPGA0_TX_INFO_OFDM_PATH_B BIT(1) +#define FPGA0_TX_INFO_OFDM_PATH_C BIT(2) +#define FPGA0_TX_INFO_OFDM_PATH_D BIT(3) #define REG_FPGA0_PSD_FUNC 0x0808 #define REG_FPGA0_TX_GAIN 0x080c #define REG_FPGA0_RF_TIMING1 0x0810 From 0b09628948bce970e14ef61a6788caa93285a132 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Fri, 19 Aug 2016 17:46:33 -0400 Subject: [PATCH 0724/1715] rtl8xxxu: Add interrupt bit definitions for gen2 parts These are primarily needed for SDIO/PCI parts, but the vendor driver still sets them for some USB devices. Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- .../wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h | 56 +++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h index a3388906781f..3555a2f24285 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h @@ -213,10 +213,66 @@ #define REG_HMBOX_EXT_1 0x008a #define REG_HMBOX_EXT_2 0x008c #define REG_HMBOX_EXT_3 0x008e + /* Interrupt registers for 8192e/8723bu/8812 */ #define REG_HIMR0 0x00b0 +#define IMR0_TXCCK BIT(30) /* TXRPT interrupt when CCX bit + of the packet is set */ +#define IMR0_PSTIMEOUT BIT(29) /* Power Save Time Out Int */ +#define IMR0_GTINT4 BIT(28) /* Set when GTIMER4 expires */ +#define IMR0_GTINT3 BIT(27) /* Set when GTIMER3 expires */ +#define IMR0_TBDER BIT(26) /* Transmit Beacon0 Error */ +#define IMR0_TBDOK BIT(25) /* Transmit Beacon0 OK */ +#define IMR0_TSF_BIT32_TOGGLE BIT(24) /* TSF Timer BIT32 toggle + indication interrupt */ +#define IMR0_BCNDMAINT0 BIT(20) /* Beacon DMA Interrupt 0 */ +#define IMR0_BCNDERR0 BIT(16) /* Beacon Queue DMA Error 0 */ +#define IMR0_HSISR_IND_ON_INT BIT(15) /* HSISR Indicator (HSIMR & + HSISR is true) */ +#define IMR0_BCNDMAINT_E BIT(14) /* Beacon DMA Interrupt + Extension for Win7 */ +#define IMR0_ATIMEND BIT(12) /* CTWidnow End or + ATIM Window End */ +#define IMR0_HISR1_IND_INT BIT(11) /* HISR1 Indicator + (HISR1 & HIMR1 is true) */ +#define IMR0_C2HCMD BIT(10) /* CPU to Host Command INT + Status, Write 1 to clear */ +#define IMR0_CPWM2 BIT(9) /* CPU power Mode exchange INT + Status, Write 1 to clear */ +#define IMR0_CPWM BIT(8) /* CPU power Mode exchange INT + Status, Write 1 to clear */ +#define IMR0_HIGHDOK BIT(7) /* High Queue DMA OK */ +#define IMR0_MGNTDOK BIT(6) /* Management Queue DMA OK */ +#define IMR0_BKDOK BIT(5) /* AC_BK DMA OK */ +#define IMR0_BEDOK BIT(4) /* AC_BE DMA OK */ +#define IMR0_VIDOK BIT(3) /* AC_VI DMA OK */ +#define IMR0_VODOK BIT(2) /* AC_VO DMA OK */ +#define IMR0_RDU BIT(1) /* Rx Descriptor Unavailable */ +#define IMR0_ROK BIT(0) /* Receive DMA OK */ #define REG_HISR0 0x00b4 #define REG_HIMR1 0x00b8 +#define IMR1_BCNDMAINT7 BIT(27) /* Beacon DMA Interrupt 7 */ +#define IMR1_BCNDMAINT6 BIT(26) /* Beacon DMA Interrupt 6 */ +#define IMR1_BCNDMAINT5 BIT(25) /* Beacon DMA Interrupt 5 */ +#define IMR1_BCNDMAINT4 BIT(24) /* Beacon DMA Interrupt 4 */ +#define IMR1_BCNDMAINT3 BIT(23) /* Beacon DMA Interrupt 3 */ +#define IMR1_BCNDMAINT2 BIT(22) /* Beacon DMA Interrupt 2 */ +#define IMR1_BCNDMAINT1 BIT(21) /* Beacon DMA Interrupt 1 */ +#define IMR1_BCNDERR7 BIT(20) /* Beacon Queue DMA Err Int 7 */ +#define IMR1_BCNDERR6 BIT(19) /* Beacon Queue DMA Err Int 6 */ +#define IMR1_BCNDERR5 BIT(18) /* Beacon Queue DMA Err Int 5 */ +#define IMR1_BCNDERR4 BIT(17) /* Beacon Queue DMA Err Int 4 */ +#define IMR1_BCNDERR3 BIT(16) /* Beacon Queue DMA Err Int 3 */ +#define IMR1_BCNDERR2 BIT(15) /* Beacon Queue DMA Err Int 2 */ +#define IMR1_BCNDERR1 BIT(14) /* Beacon Queue DMA Err Int 1 */ +#define IMR1_ATIMEND_E BIT(13) /* ATIM Window End Extension + for Win7 */ +#define IMR1_TXERR BIT(11) /* Tx Error Flag Int Status, + write 1 to clear */ +#define IMR1_RXERR BIT(10) /* Rx Error Flag Int Status, + write 1 to clear */ +#define IMR1_TXFOVW BIT(9) /* Transmit FIFO Overflow */ +#define IMR1_RXFOVW BIT(8) /* Receive FIFO Overflow */ #define REG_HISR1 0x00bc /* Host suspend counter on FPGA platform */ From e3ebcd7428c142fb7b74ded9a624cddfb403d2c5 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Fri, 19 Aug 2016 17:46:34 -0400 Subject: [PATCH 0725/1715] rtl8xxxu: Use flag to indicate whether device has TX report timer support Use a fileops flag to indicate whether the device has TX report timer support. This will make it easier to include future devices such as 8188eu to use the TX report timer. Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h | 1 + drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c | 1 + drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 5 ++--- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h index da4f1487e292..c8d7075aefb9 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h @@ -1341,6 +1341,7 @@ struct rtl8xxxu_fileops { char tx_desc_size; char rx_desc_size; char has_s0s1; + char has_tx_report; u32 adda_1t_init; u32 adda_1t_path_on; u32 adda_2t_path_on_a; diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c index c1323f0851f7..0b6a6ca40b98 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c @@ -1666,6 +1666,7 @@ struct rtl8xxxu_fileops rtl8723bu_fops = { .tx_desc_size = sizeof(struct rtl8xxxu_txdesc40), .rx_desc_size = sizeof(struct rtl8xxxu_rxdesc24), .has_s0s1 = 1, + .has_tx_report = 1, .adda_1t_init = 0x01c00014, .adda_1t_path_on = 0x01c00014, .adda_2t_path_on_a = 0x01c00014, diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index 7a697c0384a6..81b95825bc84 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -4000,10 +4000,9 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) priv->fops->usb_quirks(priv); /* - * Presumably this is for 8188EU as well - * Enable TX report and TX report timer + * Enable TX report and TX report timer for 8723bu/8188eu/... */ - if (priv->rtl_chip == RTL8723B) { + if (priv->fops->has_tx_report) { val8 = rtl8xxxu_read8(priv, REG_TX_REPORT_CTRL); val8 |= TX_REPORT_CTRL_TIMER_ENABLE; rtl8xxxu_write8(priv, REG_TX_REPORT_CTRL, val8); From ee675cc30e07cc925489e9663539e69873b04100 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Fri, 19 Aug 2016 17:46:35 -0400 Subject: [PATCH 0726/1715] rtl8xxxu: Convert flags in rtl8xxxu_fileops to bitflags This leaves space for a few more flags within the same space. Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h index c8d7075aefb9..c5b684622ed6 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h @@ -1340,8 +1340,8 @@ struct rtl8xxxu_fileops { int rx_agg_buf_size; char tx_desc_size; char rx_desc_size; - char has_s0s1; - char has_tx_report; + u8 has_s0s1:1; + u8 has_tx_report:1; u32 adda_1t_init; u32 adda_1t_path_on; u32 adda_2t_path_on_a; From eed145ab25a3eeba4caf2d4be0b5c7a3097f39bd Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Fri, 19 Aug 2016 17:46:36 -0400 Subject: [PATCH 0727/1715] rtl8xxxu: Introduce fops bitflag indicating type of thermal meter Do not rely on TX descriptor size to determine the thermal meter type. Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h | 1 + drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c | 1 + drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c | 1 + drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 2 +- 4 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h index c5b684622ed6..68d80c437449 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h @@ -1342,6 +1342,7 @@ struct rtl8xxxu_fileops { char rx_desc_size; u8 has_s0s1:1; u8 has_tx_report:1; + u8 gen2_thermal_meter:1; u32 adda_1t_init; u32 adda_1t_path_on; u32 adda_2t_path_on_a; diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c index 9a1994f49b7b..99011999829a 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c @@ -1505,6 +1505,7 @@ struct rtl8xxxu_fileops rtl8192eu_fops = { .tx_desc_size = sizeof(struct rtl8xxxu_txdesc40), .rx_desc_size = sizeof(struct rtl8xxxu_rxdesc24), .has_s0s1 = 0, + .gen2_thermal_meter = 1, .adda_1t_init = 0x0fc01616, .adda_1t_path_on = 0x0fc01616, .adda_2t_path_on_a = 0x0fc01616, diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c index 0b6a6ca40b98..dd2c0f0ec0aa 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c @@ -1667,6 +1667,7 @@ struct rtl8xxxu_fileops rtl8723bu_fops = { .rx_desc_size = sizeof(struct rtl8xxxu_rxdesc24), .has_s0s1 = 1, .has_tx_report = 1, + .gen2_thermal_meter = 1, .adda_1t_init = 0x01c00014, .adda_1t_path_on = 0x01c00014, .adda_2t_path_on_a = 0x01c00014, diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index 81b95825bc84..6de5b8f9340f 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -4195,7 +4195,7 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) /* * This should enable thermal meter */ - if (priv->fops->tx_desc_size == sizeof(struct rtl8xxxu_txdesc40)) + if (priv->fops->gen2_thermal_meter) rtl8xxxu_write_rfreg(priv, RF_A, RF6052_REG_T_METER_8723B, 0x37cf8); else From be49b1f111c76ce20a60a90587e2b8e8ab809a06 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Fri, 19 Aug 2016 17:46:37 -0400 Subject: [PATCH 0728/1715] rtl8xxxu: Simplify calculating of hw value used for setting TX rate Calculating the value in one place rather than using one calculation in one place and a different one for management frames in another location makes little sense. Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index 6de5b8f9340f..2f951cf788aa 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -4848,7 +4848,8 @@ static void rtl8xxxu_tx(struct ieee80211_hw *hw, } } - if (rate_flag & IEEE80211_TX_RC_MCS) + if (rate_flag & IEEE80211_TX_RC_MCS && + !ieee80211_is_mgmt(hdr->frame_control)) rate = tx_info->control.rates[0].idx + DESC_RATE_MCS0; else rate = tx_rate->hw_value; @@ -4869,7 +4870,7 @@ static void rtl8xxxu_tx(struct ieee80211_hw *hw, tx_desc->txdw1 |= cpu_to_le32(TXDESC32_AGG_BREAK); if (ieee80211_is_mgmt(hdr->frame_control)) { - tx_desc->txdw5 = cpu_to_le32(tx_rate->hw_value); + tx_desc->txdw5 = cpu_to_le32(rate); tx_desc->txdw4 |= cpu_to_le32(TXDESC32_USE_DRIVER_RATE); tx_desc->txdw5 |= @@ -4923,7 +4924,7 @@ static void rtl8xxxu_tx(struct ieee80211_hw *hw, tx_desc40->txdw2 |= cpu_to_le32(TXDESC40_AGG_BREAK); if (ieee80211_is_mgmt(hdr->frame_control)) { - tx_desc40->txdw4 = cpu_to_le32(tx_rate->hw_value); + tx_desc40->txdw4 = cpu_to_le32(rate); tx_desc40->txdw3 |= cpu_to_le32(TXDESC40_USE_DRIVER_RATE); tx_desc40->txdw4 |= From 3972cc579140e48e8390fff5e94f6e9e78c3dd87 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Fri, 19 Aug 2016 17:46:38 -0400 Subject: [PATCH 0729/1715] rtl8xxxu: Determine the need for SGI before handling specific TX desc formats In order to be able to split out the TX descriptor handling code, determine in advance the need to mark SGI. Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- .../net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index 2f951cf788aa..6dc3f52d0aaf 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -4770,7 +4770,7 @@ static void rtl8xxxu_tx(struct ieee80211_hw *hw, u16 rate_flag = tx_info->control.rates[0].flags; int tx_desc_size = priv->fops->tx_desc_size; int ret; - bool usedesc40, ampdu_enable; + bool usedesc40, ampdu_enable, sgi = false; if (skb_headroom(skb) < tx_desc_size) { dev_warn(dev, @@ -4854,6 +4854,12 @@ static void rtl8xxxu_tx(struct ieee80211_hw *hw, else rate = tx_rate->hw_value; + if (rate_flag & IEEE80211_TX_RC_SHORT_GI || + (ieee80211_is_data_qos(hdr->frame_control) && + sta && sta->ht_cap.cap & + (IEEE80211_HT_CAP_SGI_40 | IEEE80211_HT_CAP_SGI_20))) + sgi = true; + seq_number = IEEE80211_SEQ_TO_SN(le16_to_cpu(hdr->seq_ctrl)); if (!usedesc40) { tx_desc->txdw5 = cpu_to_le32(rate); @@ -4886,12 +4892,8 @@ static void rtl8xxxu_tx(struct ieee80211_hw *hw, (sta && vif && vif->bss_conf.use_short_preamble)) tx_desc->txdw4 |= cpu_to_le32(TXDESC32_SHORT_PREAMBLE); - if (rate_flag & IEEE80211_TX_RC_SHORT_GI || - (ieee80211_is_data_qos(hdr->frame_control) && - sta && sta->ht_cap.cap & - (IEEE80211_HT_CAP_SGI_40 | IEEE80211_HT_CAP_SGI_20))) { + if (sgi) tx_desc->txdw5 |= cpu_to_le32(TXDESC32_SHORT_GI); - } if (rate_flag & IEEE80211_TX_RC_USE_RTS_CTS) { /* From 99afaac4278c9581cef17ddc0c842b51f9b6206d Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Fri, 19 Aug 2016 17:46:39 -0400 Subject: [PATCH 0730/1715] rtl8xxxu: Determine need for shore preamble before updating TX descriptors Another patch to move this detection out of the code handling the TX descriptor update. Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- .../net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index 6dc3f52d0aaf..cfb2dfd54277 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -4770,7 +4770,7 @@ static void rtl8xxxu_tx(struct ieee80211_hw *hw, u16 rate_flag = tx_info->control.rates[0].flags; int tx_desc_size = priv->fops->tx_desc_size; int ret; - bool usedesc40, ampdu_enable, sgi = false; + bool usedesc40, ampdu_enable, sgi = false, short_preamble = false; if (skb_headroom(skb) < tx_desc_size) { dev_warn(dev, @@ -4860,6 +4860,10 @@ static void rtl8xxxu_tx(struct ieee80211_hw *hw, (IEEE80211_HT_CAP_SGI_40 | IEEE80211_HT_CAP_SGI_20))) sgi = true; + if (rate_flag & IEEE80211_TX_RC_USE_SHORT_PREAMBLE || + (sta && vif && vif->bss_conf.use_short_preamble)) + short_preamble = true; + seq_number = IEEE80211_SEQ_TO_SN(le16_to_cpu(hdr->seq_ctrl)); if (!usedesc40) { tx_desc->txdw5 = cpu_to_le32(rate); @@ -4888,8 +4892,7 @@ static void rtl8xxxu_tx(struct ieee80211_hw *hw, if (ieee80211_is_data_qos(hdr->frame_control)) tx_desc->txdw4 |= cpu_to_le32(TXDESC32_QOS); - if (rate_flag & IEEE80211_TX_RC_USE_SHORT_PREAMBLE || - (sta && vif && vif->bss_conf.use_short_preamble)) + if (short_preamble) tx_desc->txdw4 |= cpu_to_le32(TXDESC32_SHORT_PREAMBLE); if (sgi) @@ -4935,8 +4938,7 @@ static void rtl8xxxu_tx(struct ieee80211_hw *hw, cpu_to_le32(TXDESC40_RETRY_LIMIT_ENABLE); } - if (rate_flag & IEEE80211_TX_RC_USE_SHORT_PREAMBLE || - (sta && vif && vif->bss_conf.use_short_preamble)) + if (short_preamble) tx_desc40->txdw5 |= cpu_to_le32(TXDESC40_SHORT_PREAMBLE); From b59415c2dd088ffce62fbde737c8b2d04fb6e015 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Fri, 19 Aug 2016 17:46:40 -0400 Subject: [PATCH 0731/1715] rtl8xxxu: Split filling of TX descriptors into separate functions Split the filling of TX descriptors into a generic portion used on all devices, and format specific helper functions provided in the fops structure. This also cleaned up some mess, even if non harmful, in the handling of txdesc40 descriptors, where the code randomly would switch between the pointer to tx_desc and tx_desc40. Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- .../net/wireless/realtek/rtl8xxxu/rtl8xxxu.h | 12 ++ .../realtek/rtl8xxxu/rtl8xxxu_8192c.c | 1 + .../realtek/rtl8xxxu/rtl8xxxu_8192e.c | 1 + .../realtek/rtl8xxxu/rtl8xxxu_8723a.c | 1 + .../realtek/rtl8xxxu/rtl8xxxu_8723b.c | 1 + .../wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 198 ++++++++++-------- 6 files changed, 125 insertions(+), 89 deletions(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h index 68d80c437449..1f54b8928d3c 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h @@ -1336,6 +1336,10 @@ struct rtl8xxxu_fileops { u32 ramask, int sgi); void (*report_connect) (struct rtl8xxxu_priv *priv, u8 macid, bool connect); + void (*fill_txdesc) (struct ieee80211_hdr *hdr, + struct rtl8xxxu_txdesc32 *tx_desc, u32 rate, + u16 rate_flag, bool sgi, bool short_preamble, + bool ampdu_enable); int writeN_block_size; int rx_agg_buf_size; char tx_desc_size; @@ -1429,6 +1433,14 @@ int rtl8xxxu_parse_rxdesc24(struct rtl8xxxu_priv *priv, struct sk_buff *skb); int rtl8xxxu_gen2_channel_to_group(int channel); bool rtl8xxxu_gen2_simularity_compare(struct rtl8xxxu_priv *priv, int result[][8], int c1, int c2); +void rtl8xxxu_fill_txdesc_v1(struct ieee80211_hdr *hdr, + struct rtl8xxxu_txdesc32 *tx_desc, u32 rate, + u16 rate_flag, bool sgi, bool short_preamble, + bool ampdu_enable); +void rtl8xxxu_fill_txdesc_v2(struct ieee80211_hdr *hdr, + struct rtl8xxxu_txdesc32 *tx_desc32, u32 rate, + u16 rate_flag, bool sgi, bool short_preamble, + bool ampdu_enable); extern struct rtl8xxxu_fileops rtl8192cu_fops; extern struct rtl8xxxu_fileops rtl8192eu_fops; diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192c.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192c.c index cd13c2538f0d..f9e2050812ab 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192c.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192c.c @@ -567,6 +567,7 @@ struct rtl8xxxu_fileops rtl8192cu_fops = { .set_tx_power = rtl8xxxu_gen1_set_tx_power, .update_rate_mask = rtl8xxxu_update_rate_mask, .report_connect = rtl8xxxu_gen1_report_connect, + .fill_txdesc = rtl8xxxu_fill_txdesc_v1, .writeN_block_size = 128, .rx_agg_buf_size = 16000, .tx_desc_size = sizeof(struct rtl8xxxu_txdesc32), diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c index 99011999829a..841522ee6379 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c @@ -1501,6 +1501,7 @@ struct rtl8xxxu_fileops rtl8192eu_fops = { .set_tx_power = rtl8192e_set_tx_power, .update_rate_mask = rtl8xxxu_gen2_update_rate_mask, .report_connect = rtl8xxxu_gen2_report_connect, + .fill_txdesc = rtl8xxxu_fill_txdesc_v2, .writeN_block_size = 128, .tx_desc_size = sizeof(struct rtl8xxxu_txdesc40), .rx_desc_size = sizeof(struct rtl8xxxu_rxdesc24), diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723a.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723a.c index d6f496899be0..aef373028155 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723a.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723a.c @@ -384,6 +384,7 @@ struct rtl8xxxu_fileops rtl8723au_fops = { .set_tx_power = rtl8xxxu_gen1_set_tx_power, .update_rate_mask = rtl8xxxu_update_rate_mask, .report_connect = rtl8xxxu_gen1_report_connect, + .fill_txdesc = rtl8xxxu_fill_txdesc_v1, .writeN_block_size = 1024, .rx_agg_buf_size = 16000, .tx_desc_size = sizeof(struct rtl8xxxu_txdesc32), diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c index dd2c0f0ec0aa..6c086b5657e9 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c @@ -1662,6 +1662,7 @@ struct rtl8xxxu_fileops rtl8723bu_fops = { .set_tx_power = rtl8723b_set_tx_power, .update_rate_mask = rtl8xxxu_gen2_update_rate_mask, .report_connect = rtl8xxxu_gen2_report_connect, + .fill_txdesc = rtl8xxxu_fill_txdesc_v2, .writeN_block_size = 1024, .tx_desc_size = sizeof(struct rtl8xxxu_txdesc40), .rx_desc_size = sizeof(struct rtl8xxxu_rxdesc24), diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index cfb2dfd54277..e02fab099496 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -4750,6 +4750,113 @@ static void rtl8xxxu_dump_action(struct device *dev, } } +/* + * Fill in v1 (gen1) specific TX descriptor bits. + * This format is used on 8188cu/8192cu/8723au + */ +void +rtl8xxxu_fill_txdesc_v1(struct ieee80211_hdr *hdr, + struct rtl8xxxu_txdesc32 *tx_desc, u32 rate, + u16 rate_flag, bool sgi, bool short_preamble, + bool ampdu_enable) +{ + u16 seq_number; + + seq_number = IEEE80211_SEQ_TO_SN(le16_to_cpu(hdr->seq_ctrl)); + + tx_desc->txdw5 = cpu_to_le32(rate); + + if (ieee80211_is_data(hdr->frame_control)) + tx_desc->txdw5 |= cpu_to_le32(0x0001ff00); + + tx_desc->txdw3 = cpu_to_le32((u32)seq_number << TXDESC32_SEQ_SHIFT); + + if (ampdu_enable) + tx_desc->txdw1 |= cpu_to_le32(TXDESC32_AGG_ENABLE); + else + tx_desc->txdw1 |= cpu_to_le32(TXDESC32_AGG_BREAK); + + if (ieee80211_is_mgmt(hdr->frame_control)) { + tx_desc->txdw5 = cpu_to_le32(rate); + tx_desc->txdw4 |= cpu_to_le32(TXDESC32_USE_DRIVER_RATE); + tx_desc->txdw5 |= cpu_to_le32(6 << TXDESC32_RETRY_LIMIT_SHIFT); + tx_desc->txdw5 |= cpu_to_le32(TXDESC32_RETRY_LIMIT_ENABLE); + } + + if (ieee80211_is_data_qos(hdr->frame_control)) + tx_desc->txdw4 |= cpu_to_le32(TXDESC32_QOS); + + if (short_preamble) + tx_desc->txdw4 |= cpu_to_le32(TXDESC32_SHORT_PREAMBLE); + + if (sgi) + tx_desc->txdw5 |= cpu_to_le32(TXDESC32_SHORT_GI); + + if (rate_flag & IEEE80211_TX_RC_USE_RTS_CTS) { + /* + * Use RTS rate 24M - does the mac80211 tell + * us which to use? + */ + tx_desc->txdw4 |= cpu_to_le32(DESC_RATE_24M << + TXDESC32_RTS_RATE_SHIFT); + tx_desc->txdw4 |= cpu_to_le32(TXDESC32_RTS_CTS_ENABLE); + tx_desc->txdw4 |= cpu_to_le32(TXDESC32_HW_RTS_ENABLE); + } +} + +/* + * Fill in v2 (gen2) specific TX descriptor bits. + * This format is used on 8192eu/8723bu + */ +void +rtl8xxxu_fill_txdesc_v2(struct ieee80211_hdr *hdr, + struct rtl8xxxu_txdesc32 *tx_desc32, u32 rate, + u16 rate_flag, bool sgi, bool short_preamble, + bool ampdu_enable) +{ + struct rtl8xxxu_txdesc40 *tx_desc40; + u16 seq_number; + + tx_desc40 = (struct rtl8xxxu_txdesc40 *)tx_desc32; + + seq_number = IEEE80211_SEQ_TO_SN(le16_to_cpu(hdr->seq_ctrl)); + + tx_desc40->txdw4 = cpu_to_le32(rate); + if (ieee80211_is_data(hdr->frame_control)) { + tx_desc40->txdw4 |= cpu_to_le32(0x1f << + TXDESC40_DATA_RATE_FB_SHIFT); + } + + tx_desc40->txdw9 = cpu_to_le32((u32)seq_number << TXDESC40_SEQ_SHIFT); + + if (ampdu_enable) + tx_desc40->txdw2 |= cpu_to_le32(TXDESC40_AGG_ENABLE); + else + tx_desc40->txdw2 |= cpu_to_le32(TXDESC40_AGG_BREAK); + + if (ieee80211_is_mgmt(hdr->frame_control)) { + tx_desc40->txdw4 = cpu_to_le32(rate); + tx_desc40->txdw3 |= cpu_to_le32(TXDESC40_USE_DRIVER_RATE); + tx_desc40->txdw4 |= + cpu_to_le32(6 << TXDESC40_RETRY_LIMIT_SHIFT); + tx_desc40->txdw4 |= cpu_to_le32(TXDESC40_RETRY_LIMIT_ENABLE); + } + + if (short_preamble) + tx_desc40->txdw5 |= cpu_to_le32(TXDESC40_SHORT_PREAMBLE); + + if (rate_flag & IEEE80211_TX_RC_USE_RTS_CTS) { + /* + * Use RTS rate 24M - does the mac80211 tell + * us which to use? + */ + tx_desc40->txdw4 |= cpu_to_le32(DESC_RATE_24M << + TXDESC40_RTS_RATE_SHIFT); + tx_desc40->txdw3 |= cpu_to_le32(TXDESC40_RTS_CTS_ENABLE); + tx_desc40->txdw3 |= cpu_to_le32(TXDESC40_HW_RTS_ENABLE); + } +} + static void rtl8xxxu_tx(struct ieee80211_hw *hw, struct ieee80211_tx_control *control, struct sk_buff *skb) @@ -4759,7 +4866,6 @@ static void rtl8xxxu_tx(struct ieee80211_hw *hw, struct ieee80211_rate *tx_rate = ieee80211_get_tx_rate(hw, tx_info); struct rtl8xxxu_priv *priv = hw->priv; struct rtl8xxxu_txdesc32 *tx_desc; - struct rtl8xxxu_txdesc40 *tx_desc40; struct rtl8xxxu_tx_urb *tx_urb; struct ieee80211_sta *sta = NULL; struct ieee80211_vif *vif = tx_info->control.vif; @@ -4865,95 +4971,9 @@ static void rtl8xxxu_tx(struct ieee80211_hw *hw, short_preamble = true; seq_number = IEEE80211_SEQ_TO_SN(le16_to_cpu(hdr->seq_ctrl)); - if (!usedesc40) { - tx_desc->txdw5 = cpu_to_le32(rate); - if (ieee80211_is_data(hdr->frame_control)) - tx_desc->txdw5 |= cpu_to_le32(0x0001ff00); - - tx_desc->txdw3 = - cpu_to_le32((u32)seq_number << TXDESC32_SEQ_SHIFT); - - if (ampdu_enable) - tx_desc->txdw1 |= cpu_to_le32(TXDESC32_AGG_ENABLE); - else - tx_desc->txdw1 |= cpu_to_le32(TXDESC32_AGG_BREAK); - - if (ieee80211_is_mgmt(hdr->frame_control)) { - tx_desc->txdw5 = cpu_to_le32(rate); - tx_desc->txdw4 |= - cpu_to_le32(TXDESC32_USE_DRIVER_RATE); - tx_desc->txdw5 |= - cpu_to_le32(6 << TXDESC32_RETRY_LIMIT_SHIFT); - tx_desc->txdw5 |= - cpu_to_le32(TXDESC32_RETRY_LIMIT_ENABLE); - } - - if (ieee80211_is_data_qos(hdr->frame_control)) - tx_desc->txdw4 |= cpu_to_le32(TXDESC32_QOS); - - if (short_preamble) - tx_desc->txdw4 |= cpu_to_le32(TXDESC32_SHORT_PREAMBLE); - - if (sgi) - tx_desc->txdw5 |= cpu_to_le32(TXDESC32_SHORT_GI); - - if (rate_flag & IEEE80211_TX_RC_USE_RTS_CTS) { - /* - * Use RTS rate 24M - does the mac80211 tell - * us which to use? - */ - tx_desc->txdw4 |= - cpu_to_le32(DESC_RATE_24M << - TXDESC32_RTS_RATE_SHIFT); - tx_desc->txdw4 |= - cpu_to_le32(TXDESC32_RTS_CTS_ENABLE); - tx_desc->txdw4 |= cpu_to_le32(TXDESC32_HW_RTS_ENABLE); - } - } else { - tx_desc40 = (struct rtl8xxxu_txdesc40 *)tx_desc; - - tx_desc40->txdw4 = cpu_to_le32(rate); - if (ieee80211_is_data(hdr->frame_control)) { - tx_desc->txdw4 |= - cpu_to_le32(0x1f << - TXDESC40_DATA_RATE_FB_SHIFT); - } - - tx_desc40->txdw9 = - cpu_to_le32((u32)seq_number << TXDESC40_SEQ_SHIFT); - - if (ampdu_enable) - tx_desc40->txdw2 |= cpu_to_le32(TXDESC40_AGG_ENABLE); - else - tx_desc40->txdw2 |= cpu_to_le32(TXDESC40_AGG_BREAK); - - if (ieee80211_is_mgmt(hdr->frame_control)) { - tx_desc40->txdw4 = cpu_to_le32(rate); - tx_desc40->txdw3 |= - cpu_to_le32(TXDESC40_USE_DRIVER_RATE); - tx_desc40->txdw4 |= - cpu_to_le32(6 << TXDESC40_RETRY_LIMIT_SHIFT); - tx_desc40->txdw4 |= - cpu_to_le32(TXDESC40_RETRY_LIMIT_ENABLE); - } - - if (short_preamble) - tx_desc40->txdw5 |= - cpu_to_le32(TXDESC40_SHORT_PREAMBLE); - - if (rate_flag & IEEE80211_TX_RC_USE_RTS_CTS) { - /* - * Use RTS rate 24M - does the mac80211 tell - * us which to use? - */ - tx_desc->txdw4 |= - cpu_to_le32(DESC_RATE_24M << - TXDESC40_RTS_RATE_SHIFT); - tx_desc->txdw3 |= cpu_to_le32(TXDESC40_RTS_CTS_ENABLE); - tx_desc->txdw3 |= cpu_to_le32(TXDESC40_HW_RTS_ENABLE); - } - } + priv->fops->fill_txdesc(hdr, tx_desc, rate, rate_flag, + sgi, short_preamble, ampdu_enable); rtl8xxxu_calc_tx_desc_csum(tx_desc); From 77e3980201e7d54b2e63653b46a6baba26040cef Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 19 Aug 2016 17:46:41 -0400 Subject: [PATCH 0732/1715] rtl8xxxu: gen1: Fix non static symbol warning Fixes the following sparse warning: drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c:898:1: warning: symbol 'rtl8xxxu_gen1_h2c_cmd' was not declared. Should it be static? Signed-off-by: Wei Yongjun Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index e02fab099496..508d46f6db7b 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -894,7 +894,7 @@ int rtl8xxxu_write_rfreg(struct rtl8xxxu_priv *priv, return retval; } -int +static int rtl8xxxu_gen1_h2c_cmd(struct rtl8xxxu_priv *priv, struct h2c_cmd *h2c, int len) { struct device *dev = &priv->udev->dev; From 7329dc13107b2315a7b8ba5a75048935304c55a0 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 19 Aug 2016 17:46:42 -0400 Subject: [PATCH 0733/1715] rtl8xxxu: Make rtl8xxxu_ampdu_action less chatty On my home network rtl8xxxu is spamming the log with IEEE80211_AMPDU_RX_START / IEEE80211_AMPDU_RX_STOP every few seconds turn these messages into debug messages. Signed-off-by: Hans de Goede Signed-off-by: Jes Sorensen [kvalo@codeaurora.org: fix commit title] Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index 508d46f6db7b..c3620833db7a 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -5696,7 +5696,7 @@ rtl8xxxu_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif, switch (action) { case IEEE80211_AMPDU_TX_START: - dev_info(dev, "%s: IEEE80211_AMPDU_TX_START\n", __func__); + dev_dbg(dev, "%s: IEEE80211_AMPDU_TX_START\n", __func__); ampdu_factor = sta->ht_cap.ampdu_factor; ampdu_density = sta->ht_cap.ampdu_density; rtl8xxxu_set_ampdu_factor(priv, ampdu_factor); @@ -5706,21 +5706,21 @@ rtl8xxxu_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif, ampdu_factor, ampdu_density); break; case IEEE80211_AMPDU_TX_STOP_FLUSH: - dev_info(dev, "%s: IEEE80211_AMPDU_TX_STOP_FLUSH\n", __func__); + dev_dbg(dev, "%s: IEEE80211_AMPDU_TX_STOP_FLUSH\n", __func__); rtl8xxxu_set_ampdu_factor(priv, 0); rtl8xxxu_set_ampdu_min_space(priv, 0); break; case IEEE80211_AMPDU_TX_STOP_FLUSH_CONT: - dev_info(dev, "%s: IEEE80211_AMPDU_TX_STOP_FLUSH_CONT\n", + dev_dbg(dev, "%s: IEEE80211_AMPDU_TX_STOP_FLUSH_CONT\n", __func__); rtl8xxxu_set_ampdu_factor(priv, 0); rtl8xxxu_set_ampdu_min_space(priv, 0); break; case IEEE80211_AMPDU_RX_START: - dev_info(dev, "%s: IEEE80211_AMPDU_RX_START\n", __func__); + dev_dbg(dev, "%s: IEEE80211_AMPDU_RX_START\n", __func__); break; case IEEE80211_AMPDU_RX_STOP: - dev_info(dev, "%s: IEEE80211_AMPDU_RX_STOP\n", __func__); + dev_dbg(dev, "%s: IEEE80211_AMPDU_RX_STOP\n", __func__); break; default: break; From b46b599328e6d46397ca353266d7ec5b7a04bb02 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 22 Aug 2016 19:35:05 +0100 Subject: [PATCH 0734/1715] zd1211rw: fix spelling mistake "firmeware" -> "firmware" Trivial fix to spelling mistake in dev_err message. Signed-off-by: Colin Ian King Reviewed-by: Julian Calaby Signed-off-by: Kalle Valo --- drivers/net/wireless/zydas/zd1211rw/zd_usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/zydas/zd1211rw/zd_usb.c b/drivers/net/wireless/zydas/zd1211rw/zd_usb.c index a912dc051111..c5effd6c6be9 100644 --- a/drivers/net/wireless/zydas/zd1211rw/zd_usb.c +++ b/drivers/net/wireless/zydas/zd1211rw/zd_usb.c @@ -193,7 +193,7 @@ static int upload_code(struct usb_device *udev, 0, 0, p, sizeof(ret), 5000 /* ms */); if (r != sizeof(ret)) { dev_err(&udev->dev, - "control request firmeware confirmation failed." + "control request firmware confirmation failed." " Return value %d\n", r); if (r >= 0) r = -ENODEV; From 0c9d3491530773858ff9d705ec2a9c382f449230 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Mon, 22 Aug 2016 14:27:59 -0500 Subject: [PATCH 0735/1715] rtlwifi: Fix missing country code for Great Britain Some RTL8821AE devices sold in Great Britain have the country code of 0x25 encoded in their EEPROM. This value is not tested in the routine that establishes the regulatory info for the chip. The fix is to set this code to have the same capabilities as the EU countries. In addition, the channels allowed for COUNTRY_CODE_ETSI were more properly suited for China and Israel, not the EU. This problem has also been fixed. Signed-off-by: Larry Finger Cc: Stable Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/regd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/regd.c b/drivers/net/wireless/realtek/rtlwifi/regd.c index 3524441fd516..6ee6bf8e7eaf 100644 --- a/drivers/net/wireless/realtek/rtlwifi/regd.c +++ b/drivers/net/wireless/realtek/rtlwifi/regd.c @@ -345,9 +345,9 @@ static const struct ieee80211_regdomain *_rtl_regdomain_select( return &rtl_regdom_no_midband; case COUNTRY_CODE_IC: return &rtl_regdom_11; - case COUNTRY_CODE_ETSI: case COUNTRY_CODE_TELEC_NETGEAR: return &rtl_regdom_60_64; + case COUNTRY_CODE_ETSI: case COUNTRY_CODE_SPAIN: case COUNTRY_CODE_FRANCE: case COUNTRY_CODE_ISRAEL: @@ -406,6 +406,8 @@ static u8 channel_plan_to_country_code(u8 channelplan) return COUNTRY_CODE_WORLD_WIDE_13; case 0x22: return COUNTRY_CODE_IC; + case 0x25: + return COUNTRY_CODE_ETSI; case 0x32: return COUNTRY_CODE_TELEC_NETGEAR; case 0x41: From bccf3ffc8c6d8e0251a15541bb4d12b423c4f729 Mon Sep 17 00:00:00 2001 From: Ismael Luceno Date: Mon, 22 Aug 2016 19:40:07 -0300 Subject: [PATCH 0736/1715] brcmfmac: Add USB ID for Cisco Linksys AE1200 The AE1200 comes with different revisions of the BCM43235 chipset, but all have the same USB ID. Only revision 3 can be supported. Signed-off-by: Ismael Luceno Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c | 4 ++++ drivers/net/wireless/broadcom/brcm80211/include/brcm_hw_ids.h | 2 ++ 2 files changed, 6 insertions(+) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c index fa26619a7945..2f978a39b58a 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c @@ -1458,11 +1458,15 @@ static int brcmf_usb_reset_resume(struct usb_interface *intf) #define BRCMF_USB_DEVICE(dev_id) \ { USB_DEVICE(BRCM_USB_VENDOR_ID_BROADCOM, dev_id) } +#define LINKSYS_USB_DEVICE(dev_id) \ + { USB_DEVICE(BRCM_USB_VENDOR_ID_LINKSYS, dev_id) } + static struct usb_device_id brcmf_usb_devid_table[] = { BRCMF_USB_DEVICE(BRCM_USB_43143_DEVICE_ID), BRCMF_USB_DEVICE(BRCM_USB_43236_DEVICE_ID), BRCMF_USB_DEVICE(BRCM_USB_43242_DEVICE_ID), BRCMF_USB_DEVICE(BRCM_USB_43569_DEVICE_ID), + LINKSYS_USB_DEVICE(BRCM_USB_43235_LINKSYS_DEVICE_ID), { USB_DEVICE(BRCM_USB_VENDOR_ID_LG, BRCM_USB_43242_LG_DEVICE_ID) }, /* special entry for device with firmware loaded and running */ BRCMF_USB_DEVICE(BRCM_USB_BCMFW_DEVICE_ID), diff --git a/drivers/net/wireless/broadcom/brcm80211/include/brcm_hw_ids.h b/drivers/net/wireless/broadcom/brcm80211/include/brcm_hw_ids.h index 3cc42bef6245..d0407d9ad782 100644 --- a/drivers/net/wireless/broadcom/brcm80211/include/brcm_hw_ids.h +++ b/drivers/net/wireless/broadcom/brcm80211/include/brcm_hw_ids.h @@ -22,6 +22,7 @@ #define BRCM_USB_VENDOR_ID_BROADCOM 0x0a5c #define BRCM_USB_VENDOR_ID_LG 0x043e +#define BRCM_USB_VENDOR_ID_LINKSYS 0x13b1 #define BRCM_PCIE_VENDOR_ID_BROADCOM PCI_VENDOR_ID_BROADCOM /* Chipcommon Core Chip IDs */ @@ -58,6 +59,7 @@ /* USB Device IDs */ #define BRCM_USB_43143_DEVICE_ID 0xbd1e +#define BRCM_USB_43235_LINKSYS_DEVICE_ID 0x0039 #define BRCM_USB_43236_DEVICE_ID 0xbd17 #define BRCM_USB_43242_DEVICE_ID 0xbd1f #define BRCM_USB_43242_LG_DEVICE_ID 0x3101 From 7703773ef1d85b40433902a8da20167331597e4a Mon Sep 17 00:00:00 2001 From: Nicolas Iooss Date: Tue, 23 Aug 2016 11:37:17 +0200 Subject: [PATCH 0737/1715] brcmfmac: fix pmksa->bssid usage The struct cfg80211_pmksa defines its bssid field as: const u8 *bssid; contrary to struct brcmf_pmksa, which uses: u8 bssid[ETH_ALEN]; Therefore in brcmf_cfg80211_del_pmksa(), &pmksa->bssid takes the address of this field (of type u8**), not the one of its content (which would be u8*). Remove the & operator to make brcmf_dbg("%pM") and memcmp() behave as expected. This bug have been found using a custom static checker (which checks the usage of %p... attributes at build time). It has been introduced in commit 6c404f34f2bd ("brcmfmac: Cleanup pmksa cache handling code"), which replaced pmksa->bssid by &pmksa->bssid while refactoring the code, without modifying struct cfg80211_pmksa definition. Replace &pmk[i].bssid with pmk[i].bssid too to make the code clearer, this change does not affect the semantic. Fixes: 6c404f34f2bd ("brcmfmac: Cleanup pmksa cache handling code") Cc: stable@vger.kernel.org Signed-off-by: Nicolas Iooss Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index 2628d5e12c64..201a98016142 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -3884,11 +3884,11 @@ brcmf_cfg80211_del_pmksa(struct wiphy *wiphy, struct net_device *ndev, if (!check_vif_up(ifp->vif)) return -EIO; - brcmf_dbg(CONN, "del_pmksa - PMK bssid = %pM\n", &pmksa->bssid); + brcmf_dbg(CONN, "del_pmksa - PMK bssid = %pM\n", pmksa->bssid); npmk = le32_to_cpu(cfg->pmk_list.npmk); for (i = 0; i < npmk; i++) - if (!memcmp(&pmksa->bssid, &pmk[i].bssid, ETH_ALEN)) + if (!memcmp(pmksa->bssid, pmk[i].bssid, ETH_ALEN)) break; if ((npmk > 0) && (i < npmk)) { From c5aa9541818a1aacf05ab9a30c3f525841cdc1c9 Mon Sep 17 00:00:00 2001 From: Guy Mishol Date: Wed, 24 Aug 2016 14:35:27 +0300 Subject: [PATCH 0738/1715] wl18xx: add time sync configuration api Add time sync configuration api. The new api allows to configure the synchronization mode (STA/AP/MESH) and (in case of Mesh mode) the master address of each zone. Signed-off-by: Guy Mishol Signed-off-by: Kalle Valo --- drivers/net/wireless/ti/wl18xx/acx.c | 29 +++++++++++++++++++++++++ drivers/net/wireless/ti/wl18xx/acx.h | 13 +++++++++++ drivers/net/wireless/ti/wl18xx/event.c | 1 + drivers/net/wireless/ti/wlcore/wlcore.h | 3 +++ 4 files changed, 46 insertions(+) diff --git a/drivers/net/wireless/ti/wl18xx/acx.c b/drivers/net/wireless/ti/wl18xx/acx.c index 4be0409308cb..b5525a38264b 100644 --- a/drivers/net/wireless/ti/wl18xx/acx.c +++ b/drivers/net/wireless/ti/wl18xx/acx.c @@ -309,3 +309,32 @@ out: kfree(acx); return ret; } + +int wl18xx_acx_time_sync_cfg(struct wl1271 *wl) +{ + struct acx_time_sync_cfg *acx; + int ret; + + wl1271_debug(DEBUG_ACX, "acx time sync cfg: mode %d, addr: %pM", + wl->conf.sg.params[WL18XX_CONF_SG_TIME_SYNC], + wl->zone_master_mac_addr); + + acx = kzalloc(sizeof(*acx), GFP_KERNEL); + if (!acx) { + ret = -ENOMEM; + goto out; + } + + acx->sync_mode = wl->conf.sg.params[WL18XX_CONF_SG_TIME_SYNC]; + memcpy(acx->zone_mac_addr, wl->zone_master_mac_addr, ETH_ALEN); + + ret = wl1271_cmd_configure(wl, ACX_TIME_SYNC_CFG, + acx, sizeof(*acx)); + if (ret < 0) { + wl1271_warning("acx time sync cfg failed: %d", ret); + goto out; + } +out: + kfree(acx); + return ret; +} diff --git a/drivers/net/wireless/ti/wl18xx/acx.h b/drivers/net/wireless/ti/wl18xx/acx.h index 342a2993ef98..2edbbbfd8421 100644 --- a/drivers/net/wireless/ti/wl18xx/acx.h +++ b/drivers/net/wireless/ti/wl18xx/acx.h @@ -37,6 +37,7 @@ enum { ACX_RX_BA_FILTER = 0x0058, ACX_AP_SLEEP_CFG = 0x0059, ACX_DYNAMIC_TRACES_CFG = 0x005A, + ACX_TIME_SYNC_CFG = 0x005B, }; /* numbers of bits the length field takes (add 1 for the actual number) */ @@ -388,6 +389,17 @@ struct acx_dynamic_fw_traces_cfg { __le32 dynamic_fw_traces; } __packed; +/* + * ACX_TIME_SYNC_CFG + * configure the time sync parameters + */ +struct acx_time_sync_cfg { + struct acx_header header; + u8 sync_mode; + u8 zone_mac_addr[ETH_ALEN]; + u8 padding[1]; +} __packed; + int wl18xx_acx_host_if_cfg_bitmap(struct wl1271 *wl, u32 host_cfg_bitmap, u32 sdio_blk_size, u32 extra_mem_blks, u32 len_field_size); @@ -402,5 +414,6 @@ int wl18xx_acx_interrupt_notify_config(struct wl1271 *wl, bool action); int wl18xx_acx_rx_ba_filter(struct wl1271 *wl, bool action); int wl18xx_acx_ap_sleep(struct wl1271 *wl); int wl18xx_acx_dynamic_fw_traces(struct wl1271 *wl); +int wl18xx_acx_time_sync_cfg(struct wl1271 *wl); #endif /* __WL18XX_ACX_H__ */ diff --git a/drivers/net/wireless/ti/wl18xx/event.c b/drivers/net/wireless/ti/wl18xx/event.c index 2c5df43b8ed9..b36ce185c9f2 100644 --- a/drivers/net/wireless/ti/wl18xx/event.c +++ b/drivers/net/wireless/ti/wl18xx/event.c @@ -22,6 +22,7 @@ #include #include "event.h" #include "scan.h" +#include "conf.h" #include "../wlcore/cmd.h" #include "../wlcore/debug.h" #include "../wlcore/vendor_cmd.h" diff --git a/drivers/net/wireless/ti/wlcore/wlcore.h b/drivers/net/wireless/ti/wlcore/wlcore.h index 8f28aa02230c..1827546ba807 100644 --- a/drivers/net/wireless/ti/wlcore/wlcore.h +++ b/drivers/net/wireless/ti/wlcore/wlcore.h @@ -501,6 +501,9 @@ struct wl1271 { /* dynamic fw traces */ u32 dynamic_fw_traces; + + /* time sync zone master */ + u8 zone_master_mac_addr[ETH_ALEN]; }; int wlcore_probe(struct wl1271 *wl, struct platform_device *pdev); From fd3fbb65cab86c07f5881ccb919a440497c0960d Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Thu, 25 Aug 2016 17:05:16 +0200 Subject: [PATCH 0739/1715] mwifiex: make "PCI-E is not the winner" print more informative Printing ret and adapter->winner do not provide any useful information as those are always 0 at point where the massage is printed. Print value read from reg->fw_status register instead. Stanislaw Gruszka Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/pcie.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c index 453ab6ad4784..8abbbfe41785 100644 --- a/drivers/net/wireless/marvell/mwifiex/pcie.c +++ b/drivers/net/wireless/marvell/mwifiex/pcie.c @@ -2074,8 +2074,7 @@ mwifiex_check_winner_status(struct mwifiex_adapter *adapter) adapter->winner = 1; } else { mwifiex_dbg(adapter, ERROR, - "PCI-E is not the winner <%#x,%d>, exit dnld\n", - ret, adapter->winner); + "PCI-E is not the winner <%#x>", winner); } return ret; From 09dd9ec598c3fdcb9874d13d9bbab0e2ece6748b Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Thu, 25 Aug 2016 17:05:17 +0200 Subject: [PATCH 0740/1715] mwifiex: print status of FW ready event For debugging purpose print content of reg->fw_status register and other variables values when waiting for firmware ready event. Signed-off-by: Stanislaw Gruszka Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/pcie.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c index 8abbbfe41785..2f555b0ebf7d 100644 --- a/drivers/net/wireless/marvell/mwifiex/pcie.c +++ b/drivers/net/wireless/marvell/mwifiex/pcie.c @@ -2043,6 +2043,10 @@ mwifiex_check_fw_status(struct mwifiex_adapter *adapter, u32 poll_num) ret = -1; else ret = 0; + + mwifiex_dbg(adapter, INFO, "Try %d if FW is ready <%d,%#x>", + tries, ret, firmware_stat); + if (ret) continue; if (firmware_stat == FIRMWARE_READY_PCIE) { From b9aebb69ecd33f5163b24c2d0f928260caf6a86b Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Thu, 25 Aug 2016 17:05:18 +0200 Subject: [PATCH 0741/1715] mwifiex: do not print dot when downloading FW Printing about 3000 lines like this [ 20.691850] mwifiex_pcie 0000:02:00.0: . [ 20.693466] mwifiex_pcie 0000:02:00.0: . is not useful. If FW downloading will be interrupted, we will get proper error message about that. Signed-off-by: Stanislaw Gruszka Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/pcie.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c index 2f555b0ebf7d..50a6a531ed18 100644 --- a/drivers/net/wireless/marvell/mwifiex/pcie.c +++ b/drivers/net/wireless/marvell/mwifiex/pcie.c @@ -1956,8 +1956,6 @@ static int mwifiex_prog_fw_w_helper(struct mwifiex_adapter *adapter, if (firmware_len - offset < txlen) txlen = firmware_len - offset; - mwifiex_dbg(adapter, INFO, "."); - tx_blocks = (txlen + card->pcie.blksz_fw_dl - 1) / card->pcie.blksz_fw_dl; From 5856cd5b8dda5ee013a2b0abbab0552a6f14d72d Mon Sep 17 00:00:00 2001 From: Oleg Drokin Date: Fri, 26 Aug 2016 23:12:23 -0400 Subject: [PATCH 0742/1715] rtlwifi/rtl8192de: Fix print format string %ul was likely meant as %lu to print an unsigned long, not an unsigned with a letter l at the end. But in fact the value printed is u32 anyway, so just drop the l completely. Signed-off-by: Oleg Drokin Acked-by: Larry Finger Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.c index d334d2a5ea63..2a4810d32182 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.c @@ -588,7 +588,7 @@ static bool _rtl92d_phy_config_bb_with_headerfile(struct ieee80211_hw *hw, * setting. */ udelay(1); RT_TRACE(rtlpriv, COMP_INIT, DBG_TRACE, - "The Rtl819XAGCTAB_Array_Table[0] is %ul Rtl819XPHY_REGArray[1] is %ul\n", + "The Rtl819XAGCTAB_Array_Table[0] is %u Rtl819XPHY_REGArray[1] is %u\n", agctab_array_table[i], agctab_array_table[i + 1]); } @@ -604,7 +604,7 @@ static bool _rtl92d_phy_config_bb_with_headerfile(struct ieee80211_hw *hw, * setting. */ udelay(1); RT_TRACE(rtlpriv, COMP_INIT, DBG_TRACE, - "The Rtl819XAGCTAB_Array_Table[0] is %ul Rtl819XPHY_REGArray[1] is %ul\n", + "The Rtl819XAGCTAB_Array_Table[0] is %u Rtl819XPHY_REGArray[1] is %u\n", agctab_array_table[i], agctab_array_table[i + 1]); } @@ -620,7 +620,7 @@ static bool _rtl92d_phy_config_bb_with_headerfile(struct ieee80211_hw *hw, * setting. */ udelay(1); RT_TRACE(rtlpriv, COMP_INIT, DBG_TRACE, - "The Rtl819XAGCTAB_5GArray_Table[0] is %ul Rtl819XPHY_REGArray[1] is %ul\n", + "The Rtl819XAGCTAB_5GArray_Table[0] is %u Rtl819XPHY_REGArray[1] is %u\n", agctab_5garray_table[i], agctab_5garray_table[i + 1]); } From af338a9ea60acc6337fe9fcdcf664aec2520e541 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sun, 4 Sep 2016 13:10:10 +0100 Subject: [PATCH 0743/1715] rxrpc: The client call state must be changed before attachment to conn We must set the client call state to RXRPC_CALL_CLIENT_SEND_REQUEST before attaching the call to the connection struct, not after, as it's liable to receive errors and conn aborts as soon as the assignment is made - and these will cause its state to be changed outside of the initiating thread's control. Signed-off-by: David Howells --- net/rxrpc/call_object.c | 2 -- net/rxrpc/conn_client.c | 4 ++++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 57e00fc9cff2..65691742199b 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -197,8 +197,6 @@ static int rxrpc_begin_client_call(struct rxrpc_call *call, if (ret < 0) return ret; - call->state = RXRPC_CALL_CLIENT_SEND_REQUEST; - spin_lock(&call->conn->params.peer->lock); hlist_add_head(&call->error_link, &call->conn->params.peer->error_targets); spin_unlock(&call->conn->params.peer->lock); diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 4b213bc0f554..e19804dd6c8d 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -537,6 +537,10 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn, struct rxrpc_call, chan_wait_link); u32 call_id = chan->call_counter + 1; + write_lock_bh(&call->state_lock); + call->state = RXRPC_CALL_CLIENT_SEND_REQUEST; + write_unlock_bh(&call->state_lock); + rxrpc_see_call(call); list_del_init(&call->chan_wait_link); conn->active_chans |= 1 << channel; From 76644232e6122808947aa329bb995ea68ad01442 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Thu, 1 Sep 2016 18:01:47 -0700 Subject: [PATCH 0744/1715] openvswitch: Free tmpl with tmpl_free. When an error occurs during conntrack template creation as part of actions validation, we need to free the template. Previously we've been using nf_ct_put() to do this, but nf_ct_tmpl_free() is more appropriate. Signed-off-by: Joe Stringer Signed-off-by: David S. Miller --- net/openvswitch/conntrack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index e054a748ff25..31045ef44a82 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -1367,7 +1367,7 @@ static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info) if (ct_info->helper) module_put(ct_info->helper->me); if (ct_info->ct) - nf_ct_put(ct_info->ct); + nf_ct_tmpl_free(ct_info->ct); } void ovs_ct_init(struct net *net) From 38f507f1ba013effec3d7deb84273ac1829e6b87 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 1 Sep 2016 21:53:44 -0700 Subject: [PATCH 0745/1715] vxlan: call peernet2id() in fdb notification netns id should be already allocated each time we change netns, that is, in dev_change_net_namespace() (more precisely in rtnl_fill_ifinfo()). It is safe to just call peernet2id() here. Cc: Nicolas Dichtel Signed-off-by: Cong Wang Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 2 +- net/core/net_namespace.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index f605a3684a7f..9735059dee99 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -287,7 +287,7 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, if (!net_eq(dev_net(vxlan->dev), vxlan->net) && nla_put_s32(skb, NDA_LINK_NETNSID, - peernet2id_alloc(dev_net(vxlan->dev), vxlan->net))) + peernet2id(dev_net(vxlan->dev), vxlan->net))) goto nla_put_failure; if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr)) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 7a77dcabd4e8..f3fa43506f16 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -227,7 +227,6 @@ int peernet2id_alloc(struct net *net, struct net *peer) rtnl_net_notifyid(net, RTM_NEWNSID, id); return id; } -EXPORT_SYMBOL(peernet2id_alloc); /* This function returns, if assigned, the id of a peer netns. */ int peernet2id(struct net *net, struct net *peer) @@ -240,6 +239,7 @@ int peernet2id(struct net *net, struct net *peer) spin_unlock_irqrestore(&net->nsid_lock, flags); return id; } +EXPORT_SYMBOL(peernet2id); /* This function returns true is the peer netns has an id assigned into the * current netns. From bc51dddf98c907b598e645ae4b277ed1295b6d5f Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 1 Sep 2016 21:53:45 -0700 Subject: [PATCH 0746/1715] netns: avoid disabling irq for netns id We never read or change netns id in hardirq context, the only place we read netns id in softirq context is in vxlan_xmit(). So, it should be enough to just disable BH. Cc: Nicolas Dichtel Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/core/net_namespace.c | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index f3fa43506f16..42bdda0e616b 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -215,14 +215,13 @@ static void rtnl_net_notifyid(struct net *net, int cmd, int id); */ int peernet2id_alloc(struct net *net, struct net *peer) { - unsigned long flags; bool alloc; int id; - spin_lock_irqsave(&net->nsid_lock, flags); + spin_lock_bh(&net->nsid_lock); alloc = atomic_read(&peer->count) == 0 ? false : true; id = __peernet2id_alloc(net, peer, &alloc); - spin_unlock_irqrestore(&net->nsid_lock, flags); + spin_unlock_bh(&net->nsid_lock); if (alloc && id >= 0) rtnl_net_notifyid(net, RTM_NEWNSID, id); return id; @@ -231,12 +230,11 @@ int peernet2id_alloc(struct net *net, struct net *peer) /* This function returns, if assigned, the id of a peer netns. */ int peernet2id(struct net *net, struct net *peer) { - unsigned long flags; int id; - spin_lock_irqsave(&net->nsid_lock, flags); + spin_lock_bh(&net->nsid_lock); id = __peernet2id(net, peer); - spin_unlock_irqrestore(&net->nsid_lock, flags); + spin_unlock_bh(&net->nsid_lock); return id; } EXPORT_SYMBOL(peernet2id); @@ -251,18 +249,17 @@ bool peernet_has_id(struct net *net, struct net *peer) struct net *get_net_ns_by_id(struct net *net, int id) { - unsigned long flags; struct net *peer; if (id < 0) return NULL; rcu_read_lock(); - spin_lock_irqsave(&net->nsid_lock, flags); + spin_lock_bh(&net->nsid_lock); peer = idr_find(&net->netns_ids, id); if (peer) get_net(peer); - spin_unlock_irqrestore(&net->nsid_lock, flags); + spin_unlock_bh(&net->nsid_lock); rcu_read_unlock(); return peer; @@ -406,17 +403,17 @@ static void cleanup_net(struct work_struct *work) for_each_net(tmp) { int id; - spin_lock_irq(&tmp->nsid_lock); + spin_lock_bh(&tmp->nsid_lock); id = __peernet2id(tmp, net); if (id >= 0) idr_remove(&tmp->netns_ids, id); - spin_unlock_irq(&tmp->nsid_lock); + spin_unlock_bh(&tmp->nsid_lock); if (id >= 0) rtnl_net_notifyid(tmp, RTM_DELNSID, id); } - spin_lock_irq(&net->nsid_lock); + spin_lock_bh(&net->nsid_lock); idr_destroy(&net->netns_ids); - spin_unlock_irq(&net->nsid_lock); + spin_unlock_bh(&net->nsid_lock); } rtnl_unlock(); @@ -544,7 +541,6 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct nlattr *tb[NETNSA_MAX + 1]; - unsigned long flags; struct net *peer; int nsid, err; @@ -565,15 +561,15 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh) if (IS_ERR(peer)) return PTR_ERR(peer); - spin_lock_irqsave(&net->nsid_lock, flags); + spin_lock_bh(&net->nsid_lock); if (__peernet2id(net, peer) >= 0) { - spin_unlock_irqrestore(&net->nsid_lock, flags); + spin_unlock_bh(&net->nsid_lock); err = -EEXIST; goto out; } err = alloc_netid(net, peer, nsid); - spin_unlock_irqrestore(&net->nsid_lock, flags); + spin_unlock_bh(&net->nsid_lock); if (err >= 0) { rtnl_net_notifyid(net, RTM_NEWNSID, err); err = 0; @@ -695,11 +691,10 @@ static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb) .idx = 0, .s_idx = cb->args[0], }; - unsigned long flags; - spin_lock_irqsave(&net->nsid_lock, flags); + spin_lock_bh(&net->nsid_lock); idr_for_each(&net->netns_ids, rtnl_net_dumpid_one, &net_cb); - spin_unlock_irqrestore(&net->nsid_lock, flags); + spin_unlock_bh(&net->nsid_lock); cb->args[0] = net_cb.idx; return skb->len; From 661dbeb9d6e6e698d469c334527eae8177810b1f Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Fri, 2 Sep 2016 19:13:53 +0530 Subject: [PATCH 0747/1715] cxgb4: Add support for ndo_get_vf_config Adds support for ndo_get_vf_config, also fill the default mac address that will be provided to the VF by firmware, in case user doesn't provide one. So user can get the default MAC address address also through ndo_get_vf_config. Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 8 +++ .../net/ethernet/chelsio/cxgb4/cxgb4_main.c | 64 ++++++++++++++++++- drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 2 +- 3 files changed, 72 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 3f7b33aa5ec5..45955691edc7 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -788,6 +788,11 @@ struct uld_msix_info { char desc[IFNAMSIZ + 10]; }; +struct vf_info { + unsigned char vf_mac_addr[ETH_ALEN]; + bool pf_set_mac; +}; + struct adapter { void __iomem *regs; void __iomem *bar2; @@ -821,6 +826,9 @@ struct adapter { struct net_device *port[MAX_NPORTS]; u8 chan_map[NCHAN]; /* channel -> port map */ + struct vf_info *vfinfo; + u8 num_vfs; + u32 filter_mode; unsigned int l2t_start; unsigned int l2t_end; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 44019bdd526d..44cc9767936f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -3094,10 +3094,44 @@ static int dummy_open(struct net_device *dev) return 0; } +/* Fill MAC address that will be assigned by the FW */ +static void fill_vf_station_mac_addr(struct adapter *adap) +{ + unsigned int i; + u8 hw_addr[ETH_ALEN], macaddr[ETH_ALEN]; + int err; + u8 *na; + u16 a, b; + + err = t4_get_raw_vpd_params(adap, &adap->params.vpd); + if (!err) { + na = adap->params.vpd.na; + for (i = 0; i < ETH_ALEN; i++) + hw_addr[i] = (hex2val(na[2 * i + 0]) * 16 + + hex2val(na[2 * i + 1])); + a = (hw_addr[0] << 8) | hw_addr[1]; + b = (hw_addr[1] << 8) | hw_addr[2]; + a ^= b; + a |= 0x0200; /* locally assigned Ethernet MAC address */ + a &= ~0x0100; /* not a multicast Ethernet MAC address */ + macaddr[0] = a >> 8; + macaddr[1] = a & 0xff; + + for (i = 2; i < 5; i++) + macaddr[i] = hw_addr[i + 1]; + + for (i = 0; i < adap->num_vfs; i++) { + macaddr[5] = adap->pf * 16 + i; + ether_addr_copy(adap->vfinfo[i].vf_mac_addr, macaddr); + } + } +} + static int cxgb_set_vf_mac(struct net_device *dev, int vf, u8 *mac) { struct port_info *pi = netdev_priv(dev); struct adapter *adap = pi->adapter; + int ret; /* verify MAC addr is valid */ if (!is_valid_ether_addr(mac)) { @@ -3109,7 +3143,23 @@ static int cxgb_set_vf_mac(struct net_device *dev, int vf, u8 *mac) dev_info(pi->adapter->pdev_dev, "Setting MAC %pM on VF %d\n", mac, vf); - return t4_set_vf_mac_acl(adap, vf + 1, 1, mac); + ret = t4_set_vf_mac_acl(adap, vf + 1, 1, mac); + if (!ret) + ether_addr_copy(adap->vfinfo[vf].vf_mac_addr, mac); + return ret; +} + +static int cxgb_get_vf_config(struct net_device *dev, + int vf, struct ifla_vf_info *ivi) +{ + struct port_info *pi = netdev_priv(dev); + struct adapter *adap = pi->adapter; + + if (vf >= adap->num_vfs) + return -EINVAL; + ivi->vf = vf; + ether_addr_copy(ivi->mac, adap->vfinfo[vf].vf_mac_addr); + return 0; } #endif @@ -3259,6 +3309,7 @@ static const struct net_device_ops cxgb4_netdev_ops = { static const struct net_device_ops cxgb4_mgmt_netdev_ops = { .ndo_open = dummy_open, .ndo_set_vf_mac = cxgb_set_vf_mac, + .ndo_get_vf_config = cxgb_get_vf_config, }; #endif @@ -5116,6 +5167,10 @@ static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs) unregister_netdev(adap->port[0]); adap->port[0] = NULL; } + /* free VF resources */ + kfree(adap->vfinfo); + adap->vfinfo = NULL; + adap->num_vfs = 0; return num_vfs; } @@ -5124,10 +5179,16 @@ static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs) if (err) return err; + adap->num_vfs = num_vfs; err = config_mgmt_dev(pdev); if (err) return err; } + + adap->vfinfo = kcalloc(adap->num_vfs, + sizeof(struct vf_info), GFP_KERNEL); + if (adap->vfinfo) + fill_vf_station_mac_addr(adap); return num_vfs; } #endif @@ -5621,6 +5682,7 @@ static void remove_one(struct pci_dev *pdev) if (adapter->port[0]) unregister_netdev(adapter->port[0]); iounmap(adapter->regs); + kfree(adapter->vfinfo); kfree(adapter); pci_disable_sriov(pdev); pci_release_regions(pdev); diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index de451ee2ba75..15be54324e3d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -2729,7 +2729,7 @@ int t4_get_raw_vpd_params(struct adapter *adapter, struct vpd_params *p) out: vfree(vpd); - return ret; + return ret < 0 ? ret : 0; } /** From 2f5281ba2a8feaf6f0aee93356f350855bb530fc Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 2 Sep 2016 15:22:48 +0100 Subject: [PATCH 0748/1715] net: ti: cpmac: Fix compiler warning due to type confusion cpmac_start_xmit() used the max() macro on skb->len (an unsigned int) and ETH_ZLEN (a signed int literal). This led to the following compiler warning: In file included from include/linux/list.h:8:0, from include/linux/module.h:9, from drivers/net/ethernet/ti/cpmac.c:19: drivers/net/ethernet/ti/cpmac.c: In function 'cpmac_start_xmit': include/linux/kernel.h:748:17: warning: comparison of distinct pointer types lacks a cast (void) (&_max1 == &_max2); \ ^ drivers/net/ethernet/ti/cpmac.c:560:8: note: in expansion of macro 'max' len = max(skb->len, ETH_ZLEN); ^ On top of this, it assigned the result of the max() macro to a signed integer whilst all further uses of it result in it being cast to varying widths of unsigned integer. Fix this up by using max_t to ensure the comparison is performed as unsigned integers, and for consistency change the type of the len variable to unsigned int. Signed-off-by: Paul Burton Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpmac.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ti/cpmac.c b/drivers/net/ethernet/ti/cpmac.c index d300d536d06f..fa0cfda24fd9 100644 --- a/drivers/net/ethernet/ti/cpmac.c +++ b/drivers/net/ethernet/ti/cpmac.c @@ -546,7 +546,8 @@ fatal_error: static int cpmac_start_xmit(struct sk_buff *skb, struct net_device *dev) { - int queue, len; + int queue; + unsigned int len; struct cpmac_desc *desc; struct cpmac_priv *priv = netdev_priv(dev); @@ -556,7 +557,7 @@ static int cpmac_start_xmit(struct sk_buff *skb, struct net_device *dev) if (unlikely(skb_padto(skb, ETH_ZLEN))) return NETDEV_TX_OK; - len = max(skb->len, ETH_ZLEN); + len = max_t(unsigned int, skb->len, ETH_ZLEN); queue = skb_get_queue_mapping(skb); netif_stop_subqueue(dev, queue); From 5f2d9c44389e7cd9fe192570f6f20199bc861eb8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 2 Sep 2016 22:39:45 +0100 Subject: [PATCH 0749/1715] rxrpc: Randomise epoch and starting client conn ID values Create a random epoch value rather than a time-based one on startup and set the top bit to indicate that this is the case. Also create a random starting client connection ID value. This will be incremented from here as new client connections are created. Signed-off-by: David Howells --- include/rxrpc/packet.h | 1 + net/rxrpc/af_rxrpc.c | 9 ++++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/include/rxrpc/packet.h b/include/rxrpc/packet.h index b2017440b765..3c6128e1fdbe 100644 --- a/include/rxrpc/packet.h +++ b/include/rxrpc/packet.h @@ -24,6 +24,7 @@ typedef __be32 rxrpc_serial_net_t; /* on-the-wire Rx message serial number */ */ struct rxrpc_wire_header { __be32 epoch; /* client boot timestamp */ +#define RXRPC_RANDOM_EPOCH 0x80000000 /* Random if set, date-based if not */ __be32 cid; /* connection and channel ID */ #define RXRPC_MAXCALLS 4 /* max active calls per conn */ diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 32d544995dda..b66a9e6f8d04 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -700,7 +701,13 @@ static int __init af_rxrpc_init(void) BUILD_BUG_ON(sizeof(struct rxrpc_skb_priv) > FIELD_SIZEOF(struct sk_buff, cb)); - rxrpc_epoch = get_seconds(); + get_random_bytes(&rxrpc_epoch, sizeof(rxrpc_epoch)); + rxrpc_epoch |= RXRPC_RANDOM_EPOCH; + get_random_bytes(&rxrpc_client_conn_ids.cur, + sizeof(rxrpc_client_conn_ids.cur)); + rxrpc_client_conn_ids.cur &= 0x3fffffff; + if (rxrpc_client_conn_ids.cur == 0) + rxrpc_client_conn_ids.cur = 1; ret = -ENOMEM; rxrpc_call_jar = kmem_cache_create( From 090f85deb6e88f0edff1a18d610abd857e30c753 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sun, 4 Sep 2016 13:14:46 +0100 Subject: [PATCH 0750/1715] rxrpc: Don't change the epoch It seems the local epoch should only be changed on boot, so remove the code that changes it for client connections. Signed-off-by: David Howells --- net/rxrpc/conn_client.c | 32 ++++++++------------------------ 1 file changed, 8 insertions(+), 24 deletions(-) diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index e19804dd6c8d..82de1aeaef21 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -108,12 +108,12 @@ static DECLARE_DELAYED_WORK(rxrpc_client_conn_reap, /* * Get a connection ID and epoch for a client connection from the global pool. * The connection struct pointer is then recorded in the idr radix tree. The - * epoch is changed if this wraps. + * epoch doesn't change until the client is rebooted (or, at least, unless the + * module is unloaded). */ static int rxrpc_get_client_connection_id(struct rxrpc_connection *conn, gfp_t gfp) { - u32 epoch; int id; _enter(""); @@ -121,34 +121,18 @@ static int rxrpc_get_client_connection_id(struct rxrpc_connection *conn, idr_preload(gfp); spin_lock(&rxrpc_conn_id_lock); - epoch = rxrpc_epoch; - - /* We could use idr_alloc_cyclic() here, but we really need to know - * when the thing wraps so that we can advance the epoch. - */ - if (rxrpc_client_conn_ids.cur == 0) - rxrpc_client_conn_ids.cur = 1; - id = idr_alloc(&rxrpc_client_conn_ids, conn, - rxrpc_client_conn_ids.cur, 0x40000000, GFP_NOWAIT); - if (id < 0) { - if (id != -ENOSPC) - goto error; - id = idr_alloc(&rxrpc_client_conn_ids, conn, - 1, 0x40000000, GFP_NOWAIT); - if (id < 0) - goto error; - epoch++; - rxrpc_epoch = epoch; - } - rxrpc_client_conn_ids.cur = id + 1; + id = idr_alloc_cyclic(&rxrpc_client_conn_ids, conn, + 1, 0x40000000, GFP_NOWAIT); + if (id < 0) + goto error; spin_unlock(&rxrpc_conn_id_lock); idr_preload_end(); - conn->proto.epoch = epoch; + conn->proto.epoch = rxrpc_epoch; conn->proto.cid = id << RXRPC_CIDSHIFT; set_bit(RXRPC_CONN_HAS_IDR, &conn->flags); - _leave(" [CID %x:%x]", epoch, conn->proto.cid); + _leave(" [CID %x]", conn->proto.cid); return 0; error: From 9ce4d7d3850d1af0f3732c3da8e324cb83a45ca0 Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Sun, 4 Sep 2016 20:52:39 +0530 Subject: [PATCH 0751/1715] fs/afs/vlocation: Remove deprecated create_singlethread_workqueue The workqueue "afs_vlocation_update_worker" queues a single work item &afs_vlocation_update and hence it doesn't require execution ordering. Hence, alloc_workqueue has been used to replace the deprecated create_singlethread_workqueue instance. Since the workqueue is being used on a memory reclaim path, WQ_MEM_RECLAIM flag has been set to ensure forward progress under memory pressure. Since there are fixed number of work items, explicit concurrency limit is unnecessary here. Signed-off-by: Bhaktipriya Shridhar Signed-off-by: David Howells --- fs/afs/vlocation.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c index 52976785a32c..45a86396fd2d 100644 --- a/fs/afs/vlocation.c +++ b/fs/afs/vlocation.c @@ -594,8 +594,8 @@ static void afs_vlocation_reaper(struct work_struct *work) */ int __init afs_vlocation_update_init(void) { - afs_vlocation_update_worker = - create_singlethread_workqueue("kafs_vlupdated"); + afs_vlocation_update_worker = alloc_workqueue("kafs_vlupdated", + WQ_MEM_RECLAIM, 0); return afs_vlocation_update_worker ? 0 : -ENOMEM; } From 69ad052aec6bb9a3f376b6ae117dfde28ed337c8 Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Sun, 4 Sep 2016 20:53:42 +0530 Subject: [PATCH 0752/1715] fs/afs/rxrpc: Remove deprecated create_singlethread_workqueue The workqueue "afs_async_calls" queues work item &call->async_work per afs_call. Since there could be multiple calls and since these calls can be run concurrently, alloc_workqueue has been used to replace the deprecated create_singlethread_workqueue instance. The WQ_MEM_RECLAIM flag has been set to ensure forward progress under memory pressure because the workqueue is being used on a memory reclaim path. Since there are fixed number of work items, explicit concurrency limit is unnecessary here. Signed-off-by: Bhaktipriya Shridhar Signed-off-by: David Howells --- fs/afs/rxrpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 244896baf241..37608be52abd 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -76,7 +76,7 @@ int afs_open_socket(void) _enter(""); ret = -ENOMEM; - afs_async_calls = create_singlethread_workqueue("kafsd"); + afs_async_calls = alloc_workqueue("kafsd", WQ_MEM_RECLAIM, 0); if (!afs_async_calls) goto error_0; From 0b58b8a18be4932849ec88a820b08345c6528ea5 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 2 Sep 2016 22:39:45 +0100 Subject: [PATCH 0753/1715] rxrpc: Split sendmsg from packet transmission code Split the sendmsg code from the packet transmission code (mostly to be found in output.c). Signed-off-by: David Howells --- net/rxrpc/Makefile | 1 + net/rxrpc/ar-internal.h | 9 +- net/rxrpc/misc.c | 5 + net/rxrpc/output.c | 630 --------------------------------------- net/rxrpc/sendmsg.c | 645 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 657 insertions(+), 633 deletions(-) create mode 100644 net/rxrpc/sendmsg.c diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile index 10f3f48a16a8..8fc6ea347182 100644 --- a/net/rxrpc/Makefile +++ b/net/rxrpc/Makefile @@ -22,6 +22,7 @@ af-rxrpc-y := \ peer_object.o \ recvmsg.o \ security.o \ + sendmsg.o \ skbuff.o \ utils.o diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 4e86d248dc5e..464dfda2a995 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -814,6 +814,7 @@ extern unsigned int rxrpc_idle_ack_delay; extern unsigned int rxrpc_rx_window_size; extern unsigned int rxrpc_rx_mtu; extern unsigned int rxrpc_rx_jumbo_max; +extern unsigned int rxrpc_resend_timeout; extern const char *const rxrpc_pkts[]; extern const s8 rxrpc_ack_priority[]; @@ -823,10 +824,7 @@ extern const char *rxrpc_acks(u8 reason); /* * output.c */ -extern unsigned int rxrpc_resend_timeout; - int rxrpc_send_data_packet(struct rxrpc_connection *, struct sk_buff *); -int rxrpc_do_sendmsg(struct rxrpc_sock *, struct msghdr *, size_t); /* * peer_event.c @@ -888,6 +886,11 @@ int __init rxrpc_init_security(void); void rxrpc_exit_security(void); int rxrpc_init_client_conn_security(struct rxrpc_connection *); int rxrpc_init_server_conn_security(struct rxrpc_connection *); + +/* + * sendmsg.c + */ +int rxrpc_do_sendmsg(struct rxrpc_sock *, struct msghdr *, size_t); /* * skbuff.c diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index bdc5e42fe600..39e7cc37c392 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -64,6 +64,11 @@ unsigned int rxrpc_rx_mtu = 5692; */ unsigned int rxrpc_rx_jumbo_max = 4; +/* + * Time till packet resend (in jiffies). + */ +unsigned int rxrpc_resend_timeout = 4 * HZ; + const char *const rxrpc_pkts[] = { "?00", "DATA", "ACK", "BUSY", "ABORT", "ACKALL", "CHALL", "RESP", "DEBUG", diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 817ae801e769..5b5508f6fc2a 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -14,298 +14,11 @@ #include #include #include -#include #include #include #include #include "ar-internal.h" -/* - * Time till packet resend (in jiffies). - */ -unsigned int rxrpc_resend_timeout = 4 * HZ; - -static int rxrpc_send_data(struct rxrpc_sock *rx, - struct rxrpc_call *call, - struct msghdr *msg, size_t len); - -/* - * extract control messages from the sendmsg() control buffer - */ -static int rxrpc_sendmsg_cmsg(struct msghdr *msg, - unsigned long *user_call_ID, - enum rxrpc_command *command, - u32 *abort_code, - bool *_exclusive) -{ - struct cmsghdr *cmsg; - bool got_user_ID = false; - int len; - - *command = RXRPC_CMD_SEND_DATA; - - if (msg->msg_controllen == 0) - return -EINVAL; - - for_each_cmsghdr(cmsg, msg) { - if (!CMSG_OK(msg, cmsg)) - return -EINVAL; - - len = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)); - _debug("CMSG %d, %d, %d", - cmsg->cmsg_level, cmsg->cmsg_type, len); - - if (cmsg->cmsg_level != SOL_RXRPC) - continue; - - switch (cmsg->cmsg_type) { - case RXRPC_USER_CALL_ID: - if (msg->msg_flags & MSG_CMSG_COMPAT) { - if (len != sizeof(u32)) - return -EINVAL; - *user_call_ID = *(u32 *) CMSG_DATA(cmsg); - } else { - if (len != sizeof(unsigned long)) - return -EINVAL; - *user_call_ID = *(unsigned long *) - CMSG_DATA(cmsg); - } - _debug("User Call ID %lx", *user_call_ID); - got_user_ID = true; - break; - - case RXRPC_ABORT: - if (*command != RXRPC_CMD_SEND_DATA) - return -EINVAL; - *command = RXRPC_CMD_SEND_ABORT; - if (len != sizeof(*abort_code)) - return -EINVAL; - *abort_code = *(unsigned int *) CMSG_DATA(cmsg); - _debug("Abort %x", *abort_code); - if (*abort_code == 0) - return -EINVAL; - break; - - case RXRPC_ACCEPT: - if (*command != RXRPC_CMD_SEND_DATA) - return -EINVAL; - *command = RXRPC_CMD_ACCEPT; - if (len != 0) - return -EINVAL; - break; - - case RXRPC_EXCLUSIVE_CALL: - *_exclusive = true; - if (len != 0) - return -EINVAL; - break; - default: - return -EINVAL; - } - } - - if (!got_user_ID) - return -EINVAL; - _leave(" = 0"); - return 0; -} - -/* - * abort a call, sending an ABORT packet to the peer - */ -static void rxrpc_send_abort(struct rxrpc_call *call, u32 abort_code) -{ - if (call->state >= RXRPC_CALL_COMPLETE) - return; - - write_lock_bh(&call->state_lock); - - if (__rxrpc_abort_call(call, abort_code, ECONNABORTED)) { - del_timer_sync(&call->resend_timer); - del_timer_sync(&call->ack_timer); - clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events); - clear_bit(RXRPC_CALL_EV_ACK, &call->events); - clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); - rxrpc_queue_call(call); - } - - write_unlock_bh(&call->state_lock); -} - -/* - * Create a new client call for sendmsg(). - */ -static struct rxrpc_call * -rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, - unsigned long user_call_ID, bool exclusive) -{ - struct rxrpc_conn_parameters cp; - struct rxrpc_call *call; - struct key *key; - - DECLARE_SOCKADDR(struct sockaddr_rxrpc *, srx, msg->msg_name); - - _enter(""); - - if (!msg->msg_name) - return ERR_PTR(-EDESTADDRREQ); - - key = rx->key; - if (key && !rx->key->payload.data[0]) - key = NULL; - - memset(&cp, 0, sizeof(cp)); - cp.local = rx->local; - cp.key = rx->key; - cp.security_level = rx->min_sec_level; - cp.exclusive = rx->exclusive | exclusive; - cp.service_id = srx->srx_service; - call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, GFP_KERNEL); - - _leave(" = %p\n", call); - return call; -} - -/* - * send a message forming part of a client call through an RxRPC socket - * - caller holds the socket locked - * - the socket may be either a client socket or a server socket - */ -int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) -{ - enum rxrpc_command cmd; - struct rxrpc_call *call; - unsigned long user_call_ID = 0; - bool exclusive = false; - u32 abort_code = 0; - int ret; - - _enter(""); - - ret = rxrpc_sendmsg_cmsg(msg, &user_call_ID, &cmd, &abort_code, - &exclusive); - if (ret < 0) - return ret; - - if (cmd == RXRPC_CMD_ACCEPT) { - if (rx->sk.sk_state != RXRPC_SERVER_LISTENING) - return -EINVAL; - call = rxrpc_accept_call(rx, user_call_ID, NULL); - if (IS_ERR(call)) - return PTR_ERR(call); - rxrpc_put_call(call); - return 0; - } - - call = rxrpc_find_call_by_user_ID(rx, user_call_ID); - if (!call) { - if (cmd != RXRPC_CMD_SEND_DATA) - return -EBADSLT; - call = rxrpc_new_client_call_for_sendmsg(rx, msg, user_call_ID, - exclusive); - if (IS_ERR(call)) - return PTR_ERR(call); - } - - rxrpc_see_call(call); - _debug("CALL %d USR %lx ST %d on CONN %p", - call->debug_id, call->user_call_ID, call->state, call->conn); - - if (call->state >= RXRPC_CALL_COMPLETE) { - /* it's too late for this call */ - ret = -ESHUTDOWN; - } else if (cmd == RXRPC_CMD_SEND_ABORT) { - rxrpc_send_abort(call, abort_code); - ret = 0; - } else if (cmd != RXRPC_CMD_SEND_DATA) { - ret = -EINVAL; - } else if (rxrpc_is_client_call(call) && - call->state != RXRPC_CALL_CLIENT_SEND_REQUEST) { - /* request phase complete for this client call */ - ret = -EPROTO; - } else if (rxrpc_is_service_call(call) && - call->state != RXRPC_CALL_SERVER_ACK_REQUEST && - call->state != RXRPC_CALL_SERVER_SEND_REPLY) { - /* Reply phase not begun or not complete for service call. */ - ret = -EPROTO; - } else { - ret = rxrpc_send_data(rx, call, msg, len); - } - - rxrpc_put_call(call); - _leave(" = %d", ret); - return ret; -} - -/** - * rxrpc_kernel_send_data - Allow a kernel service to send data on a call - * @sock: The socket the call is on - * @call: The call to send data through - * @msg: The data to send - * @len: The amount of data to send - * - * Allow a kernel service to send data on a call. The call must be in an state - * appropriate to sending data. No control data should be supplied in @msg, - * nor should an address be supplied. MSG_MORE should be flagged if there's - * more data to come, otherwise this data will end the transmission phase. - */ -int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call, - struct msghdr *msg, size_t len) -{ - int ret; - - _enter("{%d,%s},", call->debug_id, rxrpc_call_states[call->state]); - - ASSERTCMP(msg->msg_name, ==, NULL); - ASSERTCMP(msg->msg_control, ==, NULL); - - lock_sock(sock->sk); - - _debug("CALL %d USR %lx ST %d on CONN %p", - call->debug_id, call->user_call_ID, call->state, call->conn); - - if (call->state >= RXRPC_CALL_COMPLETE) { - ret = -ESHUTDOWN; /* it's too late for this call */ - } else if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST && - call->state != RXRPC_CALL_SERVER_ACK_REQUEST && - call->state != RXRPC_CALL_SERVER_SEND_REPLY) { - ret = -EPROTO; /* request phase complete for this client call */ - } else { - ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len); - } - - release_sock(sock->sk); - _leave(" = %d", ret); - return ret; -} -EXPORT_SYMBOL(rxrpc_kernel_send_data); - -/** - * rxrpc_kernel_abort_call - Allow a kernel service to abort a call - * @sock: The socket the call is on - * @call: The call to be aborted - * @abort_code: The abort code to stick into the ABORT packet - * - * Allow a kernel service to abort a call, if it's still in an abortable state. - */ -void rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call, - u32 abort_code) -{ - _enter("{%d},%d", call->debug_id, abort_code); - - lock_sock(sock->sk); - - _debug("CALL %d USR %lx ST %d on CONN %p", - call->debug_id, call->user_call_ID, call->state, call->conn); - - rxrpc_send_abort(call, abort_code); - - release_sock(sock->sk); - _leave(""); -} - -EXPORT_SYMBOL(rxrpc_kernel_abort_call); - /* * send a packet through the transport endpoint */ @@ -375,346 +88,3 @@ send_fragmentable: _leave(" = %d [frag %u]", ret, conn->params.peer->maxdata); return ret; } - -/* - * wait for space to appear in the transmit/ACK window - * - caller holds the socket locked - */ -static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx, - struct rxrpc_call *call, - long *timeo) -{ - DECLARE_WAITQUEUE(myself, current); - int ret; - - _enter(",{%d},%ld", - CIRC_SPACE(call->acks_head, ACCESS_ONCE(call->acks_tail), - call->acks_winsz), - *timeo); - - add_wait_queue(&call->waitq, &myself); - - for (;;) { - set_current_state(TASK_INTERRUPTIBLE); - ret = 0; - if (CIRC_SPACE(call->acks_head, ACCESS_ONCE(call->acks_tail), - call->acks_winsz) > 0) - break; - if (signal_pending(current)) { - ret = sock_intr_errno(*timeo); - break; - } - - release_sock(&rx->sk); - *timeo = schedule_timeout(*timeo); - lock_sock(&rx->sk); - } - - remove_wait_queue(&call->waitq, &myself); - set_current_state(TASK_RUNNING); - _leave(" = %d", ret); - return ret; -} - -/* - * attempt to schedule an instant Tx resend - */ -static inline void rxrpc_instant_resend(struct rxrpc_call *call) -{ - read_lock_bh(&call->state_lock); - if (try_to_del_timer_sync(&call->resend_timer) >= 0) { - clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); - if (call->state < RXRPC_CALL_COMPLETE && - !test_and_set_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events)) - rxrpc_queue_call(call); - } - read_unlock_bh(&call->state_lock); -} - -/* - * queue a packet for transmission, set the resend timer and attempt - * to send the packet immediately - */ -static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, - bool last) -{ - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - int ret; - - _net("queue skb %p [%d]", skb, call->acks_head); - - ASSERT(call->acks_window != NULL); - call->acks_window[call->acks_head] = (unsigned long) skb; - smp_wmb(); - call->acks_head = (call->acks_head + 1) & (call->acks_winsz - 1); - - if (last || call->state == RXRPC_CALL_SERVER_ACK_REQUEST) { - _debug("________awaiting reply/ACK__________"); - write_lock_bh(&call->state_lock); - switch (call->state) { - case RXRPC_CALL_CLIENT_SEND_REQUEST: - call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY; - break; - case RXRPC_CALL_SERVER_ACK_REQUEST: - call->state = RXRPC_CALL_SERVER_SEND_REPLY; - if (!last) - break; - case RXRPC_CALL_SERVER_SEND_REPLY: - call->state = RXRPC_CALL_SERVER_AWAIT_ACK; - break; - default: - break; - } - write_unlock_bh(&call->state_lock); - } - - _proto("Tx DATA %%%u { #%u }", sp->hdr.serial, sp->hdr.seq); - - sp->need_resend = false; - sp->resend_at = jiffies + rxrpc_resend_timeout; - if (!test_and_set_bit(RXRPC_CALL_RUN_RTIMER, &call->flags)) { - _debug("run timer"); - call->resend_timer.expires = sp->resend_at; - add_timer(&call->resend_timer); - } - - /* attempt to cancel the rx-ACK timer, deferring reply transmission if - * we're ACK'ing the request phase of an incoming call */ - ret = -EAGAIN; - if (try_to_del_timer_sync(&call->ack_timer) >= 0) { - /* the packet may be freed by rxrpc_process_call() before this - * returns */ - if (rxrpc_is_client_call(call)) - rxrpc_expose_client_call(call); - ret = rxrpc_send_data_packet(call->conn, skb); - _net("sent skb %p", skb); - } else { - _debug("failed to delete ACK timer"); - } - - if (ret < 0) { - _debug("need instant resend %d", ret); - sp->need_resend = true; - rxrpc_instant_resend(call); - } - - _leave(""); -} - -/* - * Convert a host-endian header into a network-endian header. - */ -static void rxrpc_insert_header(struct sk_buff *skb) -{ - struct rxrpc_wire_header whdr; - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - - whdr.epoch = htonl(sp->hdr.epoch); - whdr.cid = htonl(sp->hdr.cid); - whdr.callNumber = htonl(sp->hdr.callNumber); - whdr.seq = htonl(sp->hdr.seq); - whdr.serial = htonl(sp->hdr.serial); - whdr.type = sp->hdr.type; - whdr.flags = sp->hdr.flags; - whdr.userStatus = sp->hdr.userStatus; - whdr.securityIndex = sp->hdr.securityIndex; - whdr._rsvd = htons(sp->hdr._rsvd); - whdr.serviceId = htons(sp->hdr.serviceId); - - memcpy(skb->head, &whdr, sizeof(whdr)); -} - -/* - * send data through a socket - * - must be called in process context - * - caller holds the socket locked - */ -static int rxrpc_send_data(struct rxrpc_sock *rx, - struct rxrpc_call *call, - struct msghdr *msg, size_t len) -{ - struct rxrpc_skb_priv *sp; - struct sk_buff *skb; - struct sock *sk = &rx->sk; - long timeo; - bool more; - int ret, copied; - - timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); - - /* this should be in poll */ - sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); - - if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) - return -EPIPE; - - more = msg->msg_flags & MSG_MORE; - - skb = call->tx_pending; - call->tx_pending = NULL; - rxrpc_see_skb(skb); - - copied = 0; - do { - if (!skb) { - size_t size, chunk, max, space; - - _debug("alloc"); - - if (CIRC_SPACE(call->acks_head, - ACCESS_ONCE(call->acks_tail), - call->acks_winsz) <= 0) { - ret = -EAGAIN; - if (msg->msg_flags & MSG_DONTWAIT) - goto maybe_error; - ret = rxrpc_wait_for_tx_window(rx, call, - &timeo); - if (ret < 0) - goto maybe_error; - } - - max = call->conn->params.peer->maxdata; - max -= call->conn->security_size; - max &= ~(call->conn->size_align - 1UL); - - chunk = max; - if (chunk > msg_data_left(msg) && !more) - chunk = msg_data_left(msg); - - space = chunk + call->conn->size_align; - space &= ~(call->conn->size_align - 1UL); - - size = space + call->conn->header_size; - - _debug("SIZE: %zu/%zu/%zu", chunk, space, size); - - /* create a buffer that we can retain until it's ACK'd */ - skb = sock_alloc_send_skb( - sk, size, msg->msg_flags & MSG_DONTWAIT, &ret); - if (!skb) - goto maybe_error; - - rxrpc_new_skb(skb); - - _debug("ALLOC SEND %p", skb); - - ASSERTCMP(skb->mark, ==, 0); - - _debug("HS: %u", call->conn->header_size); - skb_reserve(skb, call->conn->header_size); - skb->len += call->conn->header_size; - - sp = rxrpc_skb(skb); - sp->remain = chunk; - if (sp->remain > skb_tailroom(skb)) - sp->remain = skb_tailroom(skb); - - _net("skb: hr %d, tr %d, hl %d, rm %d", - skb_headroom(skb), - skb_tailroom(skb), - skb_headlen(skb), - sp->remain); - - skb->ip_summed = CHECKSUM_UNNECESSARY; - } - - _debug("append"); - sp = rxrpc_skb(skb); - - /* append next segment of data to the current buffer */ - if (msg_data_left(msg) > 0) { - int copy = skb_tailroom(skb); - ASSERTCMP(copy, >, 0); - if (copy > msg_data_left(msg)) - copy = msg_data_left(msg); - if (copy > sp->remain) - copy = sp->remain; - - _debug("add"); - ret = skb_add_data(skb, &msg->msg_iter, copy); - _debug("added"); - if (ret < 0) - goto efault; - sp->remain -= copy; - skb->mark += copy; - copied += copy; - } - - /* check for the far side aborting the call or a network error - * occurring */ - if (call->state == RXRPC_CALL_COMPLETE) - goto call_terminated; - - /* add the packet to the send queue if it's now full */ - if (sp->remain <= 0 || - (msg_data_left(msg) == 0 && !more)) { - struct rxrpc_connection *conn = call->conn; - uint32_t seq; - size_t pad; - - /* pad out if we're using security */ - if (conn->security_ix) { - pad = conn->security_size + skb->mark; - pad = conn->size_align - pad; - pad &= conn->size_align - 1; - _debug("pad %zu", pad); - if (pad) - memset(skb_put(skb, pad), 0, pad); - } - - seq = atomic_inc_return(&call->sequence); - - sp->hdr.epoch = conn->proto.epoch; - sp->hdr.cid = call->cid; - sp->hdr.callNumber = call->call_id; - sp->hdr.seq = seq; - sp->hdr.serial = atomic_inc_return(&conn->serial); - sp->hdr.type = RXRPC_PACKET_TYPE_DATA; - sp->hdr.userStatus = 0; - sp->hdr.securityIndex = conn->security_ix; - sp->hdr._rsvd = 0; - sp->hdr.serviceId = call->service_id; - - sp->hdr.flags = conn->out_clientflag; - if (msg_data_left(msg) == 0 && !more) - sp->hdr.flags |= RXRPC_LAST_PACKET; - else if (CIRC_SPACE(call->acks_head, - ACCESS_ONCE(call->acks_tail), - call->acks_winsz) > 1) - sp->hdr.flags |= RXRPC_MORE_PACKETS; - if (more && seq & 1) - sp->hdr.flags |= RXRPC_REQUEST_ACK; - - ret = conn->security->secure_packet( - call, skb, skb->mark, - skb->head + sizeof(struct rxrpc_wire_header)); - if (ret < 0) - goto out; - - rxrpc_insert_header(skb); - rxrpc_queue_packet(call, skb, !msg_data_left(msg) && !more); - skb = NULL; - } - } while (msg_data_left(msg) > 0); - -success: - ret = copied; -out: - call->tx_pending = skb; - _leave(" = %d", ret); - return ret; - -call_terminated: - rxrpc_free_skb(skb); - _leave(" = %d", -call->error); - return ret; - -maybe_error: - if (copied) - goto success; - goto out; - -efault: - ret = -EFAULT; - goto out; -} diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c new file mode 100644 index 000000000000..ff3e28ddc6d8 --- /dev/null +++ b/net/rxrpc/sendmsg.c @@ -0,0 +1,645 @@ +/* AF_RXRPC sendmsg() implementation. + * + * Copyright (C) 2007, 2016 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include "ar-internal.h" + +static int rxrpc_send_data(struct rxrpc_sock *rx, + struct rxrpc_call *call, + struct msghdr *msg, size_t len); + +/* + * extract control messages from the sendmsg() control buffer + */ +static int rxrpc_sendmsg_cmsg(struct msghdr *msg, + unsigned long *user_call_ID, + enum rxrpc_command *command, + u32 *abort_code, + bool *_exclusive) +{ + struct cmsghdr *cmsg; + bool got_user_ID = false; + int len; + + *command = RXRPC_CMD_SEND_DATA; + + if (msg->msg_controllen == 0) + return -EINVAL; + + for_each_cmsghdr(cmsg, msg) { + if (!CMSG_OK(msg, cmsg)) + return -EINVAL; + + len = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)); + _debug("CMSG %d, %d, %d", + cmsg->cmsg_level, cmsg->cmsg_type, len); + + if (cmsg->cmsg_level != SOL_RXRPC) + continue; + + switch (cmsg->cmsg_type) { + case RXRPC_USER_CALL_ID: + if (msg->msg_flags & MSG_CMSG_COMPAT) { + if (len != sizeof(u32)) + return -EINVAL; + *user_call_ID = *(u32 *) CMSG_DATA(cmsg); + } else { + if (len != sizeof(unsigned long)) + return -EINVAL; + *user_call_ID = *(unsigned long *) + CMSG_DATA(cmsg); + } + _debug("User Call ID %lx", *user_call_ID); + got_user_ID = true; + break; + + case RXRPC_ABORT: + if (*command != RXRPC_CMD_SEND_DATA) + return -EINVAL; + *command = RXRPC_CMD_SEND_ABORT; + if (len != sizeof(*abort_code)) + return -EINVAL; + *abort_code = *(unsigned int *) CMSG_DATA(cmsg); + _debug("Abort %x", *abort_code); + if (*abort_code == 0) + return -EINVAL; + break; + + case RXRPC_ACCEPT: + if (*command != RXRPC_CMD_SEND_DATA) + return -EINVAL; + *command = RXRPC_CMD_ACCEPT; + if (len != 0) + return -EINVAL; + break; + + case RXRPC_EXCLUSIVE_CALL: + *_exclusive = true; + if (len != 0) + return -EINVAL; + break; + default: + return -EINVAL; + } + } + + if (!got_user_ID) + return -EINVAL; + _leave(" = 0"); + return 0; +} + +/* + * abort a call, sending an ABORT packet to the peer + */ +static void rxrpc_send_abort(struct rxrpc_call *call, u32 abort_code) +{ + if (call->state >= RXRPC_CALL_COMPLETE) + return; + + write_lock_bh(&call->state_lock); + + if (__rxrpc_abort_call(call, abort_code, ECONNABORTED)) { + del_timer_sync(&call->resend_timer); + del_timer_sync(&call->ack_timer); + clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events); + clear_bit(RXRPC_CALL_EV_ACK, &call->events); + clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); + rxrpc_queue_call(call); + } + + write_unlock_bh(&call->state_lock); +} + +/* + * Create a new client call for sendmsg(). + */ +static struct rxrpc_call * +rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, + unsigned long user_call_ID, bool exclusive) +{ + struct rxrpc_conn_parameters cp; + struct rxrpc_call *call; + struct key *key; + + DECLARE_SOCKADDR(struct sockaddr_rxrpc *, srx, msg->msg_name); + + _enter(""); + + if (!msg->msg_name) + return ERR_PTR(-EDESTADDRREQ); + + key = rx->key; + if (key && !rx->key->payload.data[0]) + key = NULL; + + memset(&cp, 0, sizeof(cp)); + cp.local = rx->local; + cp.key = rx->key; + cp.security_level = rx->min_sec_level; + cp.exclusive = rx->exclusive | exclusive; + cp.service_id = srx->srx_service; + call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, GFP_KERNEL); + + _leave(" = %p\n", call); + return call; +} + +/* + * send a message forming part of a client call through an RxRPC socket + * - caller holds the socket locked + * - the socket may be either a client socket or a server socket + */ +int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) +{ + enum rxrpc_command cmd; + struct rxrpc_call *call; + unsigned long user_call_ID = 0; + bool exclusive = false; + u32 abort_code = 0; + int ret; + + _enter(""); + + ret = rxrpc_sendmsg_cmsg(msg, &user_call_ID, &cmd, &abort_code, + &exclusive); + if (ret < 0) + return ret; + + if (cmd == RXRPC_CMD_ACCEPT) { + if (rx->sk.sk_state != RXRPC_SERVER_LISTENING) + return -EINVAL; + call = rxrpc_accept_call(rx, user_call_ID, NULL); + if (IS_ERR(call)) + return PTR_ERR(call); + rxrpc_put_call(call); + return 0; + } + + call = rxrpc_find_call_by_user_ID(rx, user_call_ID); + if (!call) { + if (cmd != RXRPC_CMD_SEND_DATA) + return -EBADSLT; + call = rxrpc_new_client_call_for_sendmsg(rx, msg, user_call_ID, + exclusive); + if (IS_ERR(call)) + return PTR_ERR(call); + } + + rxrpc_see_call(call); + _debug("CALL %d USR %lx ST %d on CONN %p", + call->debug_id, call->user_call_ID, call->state, call->conn); + + if (call->state >= RXRPC_CALL_COMPLETE) { + /* it's too late for this call */ + ret = -ESHUTDOWN; + } else if (cmd == RXRPC_CMD_SEND_ABORT) { + rxrpc_send_abort(call, abort_code); + ret = 0; + } else if (cmd != RXRPC_CMD_SEND_DATA) { + ret = -EINVAL; + } else if (rxrpc_is_client_call(call) && + call->state != RXRPC_CALL_CLIENT_SEND_REQUEST) { + /* request phase complete for this client call */ + ret = -EPROTO; + } else if (rxrpc_is_service_call(call) && + call->state != RXRPC_CALL_SERVER_ACK_REQUEST && + call->state != RXRPC_CALL_SERVER_SEND_REPLY) { + /* Reply phase not begun or not complete for service call. */ + ret = -EPROTO; + } else { + ret = rxrpc_send_data(rx, call, msg, len); + } + + rxrpc_put_call(call); + _leave(" = %d", ret); + return ret; +} + +/** + * rxrpc_kernel_send_data - Allow a kernel service to send data on a call + * @sock: The socket the call is on + * @call: The call to send data through + * @msg: The data to send + * @len: The amount of data to send + * + * Allow a kernel service to send data on a call. The call must be in an state + * appropriate to sending data. No control data should be supplied in @msg, + * nor should an address be supplied. MSG_MORE should be flagged if there's + * more data to come, otherwise this data will end the transmission phase. + */ +int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call, + struct msghdr *msg, size_t len) +{ + int ret; + + _enter("{%d,%s},", call->debug_id, rxrpc_call_states[call->state]); + + ASSERTCMP(msg->msg_name, ==, NULL); + ASSERTCMP(msg->msg_control, ==, NULL); + + lock_sock(sock->sk); + + _debug("CALL %d USR %lx ST %d on CONN %p", + call->debug_id, call->user_call_ID, call->state, call->conn); + + if (call->state >= RXRPC_CALL_COMPLETE) { + ret = -ESHUTDOWN; /* it's too late for this call */ + } else if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST && + call->state != RXRPC_CALL_SERVER_ACK_REQUEST && + call->state != RXRPC_CALL_SERVER_SEND_REPLY) { + ret = -EPROTO; /* request phase complete for this client call */ + } else { + ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len); + } + + release_sock(sock->sk); + _leave(" = %d", ret); + return ret; +} +EXPORT_SYMBOL(rxrpc_kernel_send_data); + +/** + * rxrpc_kernel_abort_call - Allow a kernel service to abort a call + * @sock: The socket the call is on + * @call: The call to be aborted + * @abort_code: The abort code to stick into the ABORT packet + * + * Allow a kernel service to abort a call, if it's still in an abortable state. + */ +void rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call, + u32 abort_code) +{ + _enter("{%d},%d", call->debug_id, abort_code); + + lock_sock(sock->sk); + + _debug("CALL %d USR %lx ST %d on CONN %p", + call->debug_id, call->user_call_ID, call->state, call->conn); + + rxrpc_send_abort(call, abort_code); + + release_sock(sock->sk); + _leave(""); +} + +EXPORT_SYMBOL(rxrpc_kernel_abort_call); + +/* + * wait for space to appear in the transmit/ACK window + * - caller holds the socket locked + */ +static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx, + struct rxrpc_call *call, + long *timeo) +{ + DECLARE_WAITQUEUE(myself, current); + int ret; + + _enter(",{%d},%ld", + CIRC_SPACE(call->acks_head, ACCESS_ONCE(call->acks_tail), + call->acks_winsz), + *timeo); + + add_wait_queue(&call->waitq, &myself); + + for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + ret = 0; + if (CIRC_SPACE(call->acks_head, ACCESS_ONCE(call->acks_tail), + call->acks_winsz) > 0) + break; + if (signal_pending(current)) { + ret = sock_intr_errno(*timeo); + break; + } + + release_sock(&rx->sk); + *timeo = schedule_timeout(*timeo); + lock_sock(&rx->sk); + } + + remove_wait_queue(&call->waitq, &myself); + set_current_state(TASK_RUNNING); + _leave(" = %d", ret); + return ret; +} + +/* + * attempt to schedule an instant Tx resend + */ +static inline void rxrpc_instant_resend(struct rxrpc_call *call) +{ + read_lock_bh(&call->state_lock); + if (try_to_del_timer_sync(&call->resend_timer) >= 0) { + clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); + if (call->state < RXRPC_CALL_COMPLETE && + !test_and_set_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events)) + rxrpc_queue_call(call); + } + read_unlock_bh(&call->state_lock); +} + +/* + * queue a packet for transmission, set the resend timer and attempt + * to send the packet immediately + */ +static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, + bool last) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + int ret; + + _net("queue skb %p [%d]", skb, call->acks_head); + + ASSERT(call->acks_window != NULL); + call->acks_window[call->acks_head] = (unsigned long) skb; + smp_wmb(); + call->acks_head = (call->acks_head + 1) & (call->acks_winsz - 1); + + if (last || call->state == RXRPC_CALL_SERVER_ACK_REQUEST) { + _debug("________awaiting reply/ACK__________"); + write_lock_bh(&call->state_lock); + switch (call->state) { + case RXRPC_CALL_CLIENT_SEND_REQUEST: + call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY; + break; + case RXRPC_CALL_SERVER_ACK_REQUEST: + call->state = RXRPC_CALL_SERVER_SEND_REPLY; + if (!last) + break; + case RXRPC_CALL_SERVER_SEND_REPLY: + call->state = RXRPC_CALL_SERVER_AWAIT_ACK; + break; + default: + break; + } + write_unlock_bh(&call->state_lock); + } + + _proto("Tx DATA %%%u { #%u }", sp->hdr.serial, sp->hdr.seq); + + sp->need_resend = false; + sp->resend_at = jiffies + rxrpc_resend_timeout; + if (!test_and_set_bit(RXRPC_CALL_RUN_RTIMER, &call->flags)) { + _debug("run timer"); + call->resend_timer.expires = sp->resend_at; + add_timer(&call->resend_timer); + } + + /* attempt to cancel the rx-ACK timer, deferring reply transmission if + * we're ACK'ing the request phase of an incoming call */ + ret = -EAGAIN; + if (try_to_del_timer_sync(&call->ack_timer) >= 0) { + /* the packet may be freed by rxrpc_process_call() before this + * returns */ + if (rxrpc_is_client_call(call)) + rxrpc_expose_client_call(call); + ret = rxrpc_send_data_packet(call->conn, skb); + _net("sent skb %p", skb); + } else { + _debug("failed to delete ACK timer"); + } + + if (ret < 0) { + _debug("need instant resend %d", ret); + sp->need_resend = true; + rxrpc_instant_resend(call); + } + + _leave(""); +} + +/* + * Convert a host-endian header into a network-endian header. + */ +static void rxrpc_insert_header(struct sk_buff *skb) +{ + struct rxrpc_wire_header whdr; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + + whdr.epoch = htonl(sp->hdr.epoch); + whdr.cid = htonl(sp->hdr.cid); + whdr.callNumber = htonl(sp->hdr.callNumber); + whdr.seq = htonl(sp->hdr.seq); + whdr.serial = htonl(sp->hdr.serial); + whdr.type = sp->hdr.type; + whdr.flags = sp->hdr.flags; + whdr.userStatus = sp->hdr.userStatus; + whdr.securityIndex = sp->hdr.securityIndex; + whdr._rsvd = htons(sp->hdr._rsvd); + whdr.serviceId = htons(sp->hdr.serviceId); + + memcpy(skb->head, &whdr, sizeof(whdr)); +} + +/* + * send data through a socket + * - must be called in process context + * - caller holds the socket locked + */ +static int rxrpc_send_data(struct rxrpc_sock *rx, + struct rxrpc_call *call, + struct msghdr *msg, size_t len) +{ + struct rxrpc_skb_priv *sp; + struct sk_buff *skb; + struct sock *sk = &rx->sk; + long timeo; + bool more; + int ret, copied; + + timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); + + /* this should be in poll */ + sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); + + if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) + return -EPIPE; + + more = msg->msg_flags & MSG_MORE; + + skb = call->tx_pending; + call->tx_pending = NULL; + rxrpc_see_skb(skb); + + copied = 0; + do { + if (!skb) { + size_t size, chunk, max, space; + + _debug("alloc"); + + if (CIRC_SPACE(call->acks_head, + ACCESS_ONCE(call->acks_tail), + call->acks_winsz) <= 0) { + ret = -EAGAIN; + if (msg->msg_flags & MSG_DONTWAIT) + goto maybe_error; + ret = rxrpc_wait_for_tx_window(rx, call, + &timeo); + if (ret < 0) + goto maybe_error; + } + + max = call->conn->params.peer->maxdata; + max -= call->conn->security_size; + max &= ~(call->conn->size_align - 1UL); + + chunk = max; + if (chunk > msg_data_left(msg) && !more) + chunk = msg_data_left(msg); + + space = chunk + call->conn->size_align; + space &= ~(call->conn->size_align - 1UL); + + size = space + call->conn->header_size; + + _debug("SIZE: %zu/%zu/%zu", chunk, space, size); + + /* create a buffer that we can retain until it's ACK'd */ + skb = sock_alloc_send_skb( + sk, size, msg->msg_flags & MSG_DONTWAIT, &ret); + if (!skb) + goto maybe_error; + + rxrpc_new_skb(skb); + + _debug("ALLOC SEND %p", skb); + + ASSERTCMP(skb->mark, ==, 0); + + _debug("HS: %u", call->conn->header_size); + skb_reserve(skb, call->conn->header_size); + skb->len += call->conn->header_size; + + sp = rxrpc_skb(skb); + sp->remain = chunk; + if (sp->remain > skb_tailroom(skb)) + sp->remain = skb_tailroom(skb); + + _net("skb: hr %d, tr %d, hl %d, rm %d", + skb_headroom(skb), + skb_tailroom(skb), + skb_headlen(skb), + sp->remain); + + skb->ip_summed = CHECKSUM_UNNECESSARY; + } + + _debug("append"); + sp = rxrpc_skb(skb); + + /* append next segment of data to the current buffer */ + if (msg_data_left(msg) > 0) { + int copy = skb_tailroom(skb); + ASSERTCMP(copy, >, 0); + if (copy > msg_data_left(msg)) + copy = msg_data_left(msg); + if (copy > sp->remain) + copy = sp->remain; + + _debug("add"); + ret = skb_add_data(skb, &msg->msg_iter, copy); + _debug("added"); + if (ret < 0) + goto efault; + sp->remain -= copy; + skb->mark += copy; + copied += copy; + } + + /* check for the far side aborting the call or a network error + * occurring */ + if (call->state == RXRPC_CALL_COMPLETE) + goto call_terminated; + + /* add the packet to the send queue if it's now full */ + if (sp->remain <= 0 || + (msg_data_left(msg) == 0 && !more)) { + struct rxrpc_connection *conn = call->conn; + uint32_t seq; + size_t pad; + + /* pad out if we're using security */ + if (conn->security_ix) { + pad = conn->security_size + skb->mark; + pad = conn->size_align - pad; + pad &= conn->size_align - 1; + _debug("pad %zu", pad); + if (pad) + memset(skb_put(skb, pad), 0, pad); + } + + seq = atomic_inc_return(&call->sequence); + + sp->hdr.epoch = conn->proto.epoch; + sp->hdr.cid = call->cid; + sp->hdr.callNumber = call->call_id; + sp->hdr.seq = seq; + sp->hdr.serial = atomic_inc_return(&conn->serial); + sp->hdr.type = RXRPC_PACKET_TYPE_DATA; + sp->hdr.userStatus = 0; + sp->hdr.securityIndex = conn->security_ix; + sp->hdr._rsvd = 0; + sp->hdr.serviceId = call->service_id; + + sp->hdr.flags = conn->out_clientflag; + if (msg_data_left(msg) == 0 && !more) + sp->hdr.flags |= RXRPC_LAST_PACKET; + else if (CIRC_SPACE(call->acks_head, + ACCESS_ONCE(call->acks_tail), + call->acks_winsz) > 1) + sp->hdr.flags |= RXRPC_MORE_PACKETS; + if (more && seq & 1) + sp->hdr.flags |= RXRPC_REQUEST_ACK; + + ret = conn->security->secure_packet( + call, skb, skb->mark, + skb->head + sizeof(struct rxrpc_wire_header)); + if (ret < 0) + goto out; + + rxrpc_insert_header(skb); + rxrpc_queue_packet(call, skb, !msg_data_left(msg) && !more); + skb = NULL; + } + } while (msg_data_left(msg) > 0); + +success: + ret = copied; +out: + call->tx_pending = skb; + _leave(" = %d", ret); + return ret; + +call_terminated: + rxrpc_free_skb(skb); + _leave(" = %d", -call->error); + return ret; + +maybe_error: + if (copied) + goto success; + goto out; + +efault: + ret = -EFAULT; + goto out; +} From 4c136dae62f7a62383a9d6563c8613e732bfefaf Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Sun, 4 Sep 2016 20:54:11 +0530 Subject: [PATCH 0754/1715] fs/afs/callback: Remove deprecated create_singlethread_workqueue The workqueue "afs_callback_update_worker" queues multiple work items viz &vnode->cb_broken_work, &server->cb_break_work which require strict execution ordering. Hence, an ordered dedicated workqueue has been used. Since the workqueue is being used on a memory reclaim path, WQ_MEM_RECLAIM has been set to ensure forward progress under memory pressure. Signed-off-by: Bhaktipriya Shridhar Signed-off-by: David Howells --- fs/afs/callback.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/afs/callback.c b/fs/afs/callback.c index 7ef637d7f3a5..1e9d2f84e5b5 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -461,8 +461,8 @@ static void afs_callback_updater(struct work_struct *work) */ int __init afs_callback_update_init(void) { - afs_callback_update_worker = - create_singlethread_workqueue("kafs_callbackd"); + afs_callback_update_worker = alloc_ordered_workqueue("kafs_callbackd", + WQ_MEM_RECLAIM); return afs_callback_update_worker ? 0 : -ENOMEM; } From df423a4af125f5b3d6f71b630e5c209774c353fd Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 2 Sep 2016 22:39:45 +0100 Subject: [PATCH 0755/1715] rxrpc: Rearrange net/rxrpc/sendmsg.c Rearrange net/rxrpc/sendmsg.c to be in a more logical order. This makes it easier to follow and eliminates forward declarations. Signed-off-by: David Howells --- net/rxrpc/sendmsg.c | 558 ++++++++++++++++++++++---------------------- 1 file changed, 277 insertions(+), 281 deletions(-) diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index ff3e28ddc6d8..17a9ebbc2346 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -20,287 +20,6 @@ #include #include "ar-internal.h" -static int rxrpc_send_data(struct rxrpc_sock *rx, - struct rxrpc_call *call, - struct msghdr *msg, size_t len); - -/* - * extract control messages from the sendmsg() control buffer - */ -static int rxrpc_sendmsg_cmsg(struct msghdr *msg, - unsigned long *user_call_ID, - enum rxrpc_command *command, - u32 *abort_code, - bool *_exclusive) -{ - struct cmsghdr *cmsg; - bool got_user_ID = false; - int len; - - *command = RXRPC_CMD_SEND_DATA; - - if (msg->msg_controllen == 0) - return -EINVAL; - - for_each_cmsghdr(cmsg, msg) { - if (!CMSG_OK(msg, cmsg)) - return -EINVAL; - - len = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)); - _debug("CMSG %d, %d, %d", - cmsg->cmsg_level, cmsg->cmsg_type, len); - - if (cmsg->cmsg_level != SOL_RXRPC) - continue; - - switch (cmsg->cmsg_type) { - case RXRPC_USER_CALL_ID: - if (msg->msg_flags & MSG_CMSG_COMPAT) { - if (len != sizeof(u32)) - return -EINVAL; - *user_call_ID = *(u32 *) CMSG_DATA(cmsg); - } else { - if (len != sizeof(unsigned long)) - return -EINVAL; - *user_call_ID = *(unsigned long *) - CMSG_DATA(cmsg); - } - _debug("User Call ID %lx", *user_call_ID); - got_user_ID = true; - break; - - case RXRPC_ABORT: - if (*command != RXRPC_CMD_SEND_DATA) - return -EINVAL; - *command = RXRPC_CMD_SEND_ABORT; - if (len != sizeof(*abort_code)) - return -EINVAL; - *abort_code = *(unsigned int *) CMSG_DATA(cmsg); - _debug("Abort %x", *abort_code); - if (*abort_code == 0) - return -EINVAL; - break; - - case RXRPC_ACCEPT: - if (*command != RXRPC_CMD_SEND_DATA) - return -EINVAL; - *command = RXRPC_CMD_ACCEPT; - if (len != 0) - return -EINVAL; - break; - - case RXRPC_EXCLUSIVE_CALL: - *_exclusive = true; - if (len != 0) - return -EINVAL; - break; - default: - return -EINVAL; - } - } - - if (!got_user_ID) - return -EINVAL; - _leave(" = 0"); - return 0; -} - -/* - * abort a call, sending an ABORT packet to the peer - */ -static void rxrpc_send_abort(struct rxrpc_call *call, u32 abort_code) -{ - if (call->state >= RXRPC_CALL_COMPLETE) - return; - - write_lock_bh(&call->state_lock); - - if (__rxrpc_abort_call(call, abort_code, ECONNABORTED)) { - del_timer_sync(&call->resend_timer); - del_timer_sync(&call->ack_timer); - clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events); - clear_bit(RXRPC_CALL_EV_ACK, &call->events); - clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); - rxrpc_queue_call(call); - } - - write_unlock_bh(&call->state_lock); -} - -/* - * Create a new client call for sendmsg(). - */ -static struct rxrpc_call * -rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, - unsigned long user_call_ID, bool exclusive) -{ - struct rxrpc_conn_parameters cp; - struct rxrpc_call *call; - struct key *key; - - DECLARE_SOCKADDR(struct sockaddr_rxrpc *, srx, msg->msg_name); - - _enter(""); - - if (!msg->msg_name) - return ERR_PTR(-EDESTADDRREQ); - - key = rx->key; - if (key && !rx->key->payload.data[0]) - key = NULL; - - memset(&cp, 0, sizeof(cp)); - cp.local = rx->local; - cp.key = rx->key; - cp.security_level = rx->min_sec_level; - cp.exclusive = rx->exclusive | exclusive; - cp.service_id = srx->srx_service; - call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, GFP_KERNEL); - - _leave(" = %p\n", call); - return call; -} - -/* - * send a message forming part of a client call through an RxRPC socket - * - caller holds the socket locked - * - the socket may be either a client socket or a server socket - */ -int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) -{ - enum rxrpc_command cmd; - struct rxrpc_call *call; - unsigned long user_call_ID = 0; - bool exclusive = false; - u32 abort_code = 0; - int ret; - - _enter(""); - - ret = rxrpc_sendmsg_cmsg(msg, &user_call_ID, &cmd, &abort_code, - &exclusive); - if (ret < 0) - return ret; - - if (cmd == RXRPC_CMD_ACCEPT) { - if (rx->sk.sk_state != RXRPC_SERVER_LISTENING) - return -EINVAL; - call = rxrpc_accept_call(rx, user_call_ID, NULL); - if (IS_ERR(call)) - return PTR_ERR(call); - rxrpc_put_call(call); - return 0; - } - - call = rxrpc_find_call_by_user_ID(rx, user_call_ID); - if (!call) { - if (cmd != RXRPC_CMD_SEND_DATA) - return -EBADSLT; - call = rxrpc_new_client_call_for_sendmsg(rx, msg, user_call_ID, - exclusive); - if (IS_ERR(call)) - return PTR_ERR(call); - } - - rxrpc_see_call(call); - _debug("CALL %d USR %lx ST %d on CONN %p", - call->debug_id, call->user_call_ID, call->state, call->conn); - - if (call->state >= RXRPC_CALL_COMPLETE) { - /* it's too late for this call */ - ret = -ESHUTDOWN; - } else if (cmd == RXRPC_CMD_SEND_ABORT) { - rxrpc_send_abort(call, abort_code); - ret = 0; - } else if (cmd != RXRPC_CMD_SEND_DATA) { - ret = -EINVAL; - } else if (rxrpc_is_client_call(call) && - call->state != RXRPC_CALL_CLIENT_SEND_REQUEST) { - /* request phase complete for this client call */ - ret = -EPROTO; - } else if (rxrpc_is_service_call(call) && - call->state != RXRPC_CALL_SERVER_ACK_REQUEST && - call->state != RXRPC_CALL_SERVER_SEND_REPLY) { - /* Reply phase not begun or not complete for service call. */ - ret = -EPROTO; - } else { - ret = rxrpc_send_data(rx, call, msg, len); - } - - rxrpc_put_call(call); - _leave(" = %d", ret); - return ret; -} - -/** - * rxrpc_kernel_send_data - Allow a kernel service to send data on a call - * @sock: The socket the call is on - * @call: The call to send data through - * @msg: The data to send - * @len: The amount of data to send - * - * Allow a kernel service to send data on a call. The call must be in an state - * appropriate to sending data. No control data should be supplied in @msg, - * nor should an address be supplied. MSG_MORE should be flagged if there's - * more data to come, otherwise this data will end the transmission phase. - */ -int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call, - struct msghdr *msg, size_t len) -{ - int ret; - - _enter("{%d,%s},", call->debug_id, rxrpc_call_states[call->state]); - - ASSERTCMP(msg->msg_name, ==, NULL); - ASSERTCMP(msg->msg_control, ==, NULL); - - lock_sock(sock->sk); - - _debug("CALL %d USR %lx ST %d on CONN %p", - call->debug_id, call->user_call_ID, call->state, call->conn); - - if (call->state >= RXRPC_CALL_COMPLETE) { - ret = -ESHUTDOWN; /* it's too late for this call */ - } else if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST && - call->state != RXRPC_CALL_SERVER_ACK_REQUEST && - call->state != RXRPC_CALL_SERVER_SEND_REPLY) { - ret = -EPROTO; /* request phase complete for this client call */ - } else { - ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len); - } - - release_sock(sock->sk); - _leave(" = %d", ret); - return ret; -} -EXPORT_SYMBOL(rxrpc_kernel_send_data); - -/** - * rxrpc_kernel_abort_call - Allow a kernel service to abort a call - * @sock: The socket the call is on - * @call: The call to be aborted - * @abort_code: The abort code to stick into the ABORT packet - * - * Allow a kernel service to abort a call, if it's still in an abortable state. - */ -void rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call, - u32 abort_code) -{ - _enter("{%d},%d", call->debug_id, abort_code); - - lock_sock(sock->sk); - - _debug("CALL %d USR %lx ST %d on CONN %p", - call->debug_id, call->user_call_ID, call->state, call->conn); - - rxrpc_send_abort(call, abort_code); - - release_sock(sock->sk); - _leave(""); -} - -EXPORT_SYMBOL(rxrpc_kernel_abort_call); - /* * wait for space to appear in the transmit/ACK window * - caller holds the socket locked @@ -643,3 +362,280 @@ efault: ret = -EFAULT; goto out; } + +/* + * extract control messages from the sendmsg() control buffer + */ +static int rxrpc_sendmsg_cmsg(struct msghdr *msg, + unsigned long *user_call_ID, + enum rxrpc_command *command, + u32 *abort_code, + bool *_exclusive) +{ + struct cmsghdr *cmsg; + bool got_user_ID = false; + int len; + + *command = RXRPC_CMD_SEND_DATA; + + if (msg->msg_controllen == 0) + return -EINVAL; + + for_each_cmsghdr(cmsg, msg) { + if (!CMSG_OK(msg, cmsg)) + return -EINVAL; + + len = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)); + _debug("CMSG %d, %d, %d", + cmsg->cmsg_level, cmsg->cmsg_type, len); + + if (cmsg->cmsg_level != SOL_RXRPC) + continue; + + switch (cmsg->cmsg_type) { + case RXRPC_USER_CALL_ID: + if (msg->msg_flags & MSG_CMSG_COMPAT) { + if (len != sizeof(u32)) + return -EINVAL; + *user_call_ID = *(u32 *) CMSG_DATA(cmsg); + } else { + if (len != sizeof(unsigned long)) + return -EINVAL; + *user_call_ID = *(unsigned long *) + CMSG_DATA(cmsg); + } + _debug("User Call ID %lx", *user_call_ID); + got_user_ID = true; + break; + + case RXRPC_ABORT: + if (*command != RXRPC_CMD_SEND_DATA) + return -EINVAL; + *command = RXRPC_CMD_SEND_ABORT; + if (len != sizeof(*abort_code)) + return -EINVAL; + *abort_code = *(unsigned int *) CMSG_DATA(cmsg); + _debug("Abort %x", *abort_code); + if (*abort_code == 0) + return -EINVAL; + break; + + case RXRPC_ACCEPT: + if (*command != RXRPC_CMD_SEND_DATA) + return -EINVAL; + *command = RXRPC_CMD_ACCEPT; + if (len != 0) + return -EINVAL; + break; + + case RXRPC_EXCLUSIVE_CALL: + *_exclusive = true; + if (len != 0) + return -EINVAL; + break; + default: + return -EINVAL; + } + } + + if (!got_user_ID) + return -EINVAL; + _leave(" = 0"); + return 0; +} + +/* + * abort a call, sending an ABORT packet to the peer + */ +static void rxrpc_send_abort(struct rxrpc_call *call, u32 abort_code) +{ + if (call->state >= RXRPC_CALL_COMPLETE) + return; + + write_lock_bh(&call->state_lock); + + if (__rxrpc_abort_call(call, abort_code, ECONNABORTED)) { + del_timer_sync(&call->resend_timer); + del_timer_sync(&call->ack_timer); + clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events); + clear_bit(RXRPC_CALL_EV_ACK, &call->events); + clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); + rxrpc_queue_call(call); + } + + write_unlock_bh(&call->state_lock); +} + +/* + * Create a new client call for sendmsg(). + */ +static struct rxrpc_call * +rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, + unsigned long user_call_ID, bool exclusive) +{ + struct rxrpc_conn_parameters cp; + struct rxrpc_call *call; + struct key *key; + + DECLARE_SOCKADDR(struct sockaddr_rxrpc *, srx, msg->msg_name); + + _enter(""); + + if (!msg->msg_name) + return ERR_PTR(-EDESTADDRREQ); + + key = rx->key; + if (key && !rx->key->payload.data[0]) + key = NULL; + + memset(&cp, 0, sizeof(cp)); + cp.local = rx->local; + cp.key = rx->key; + cp.security_level = rx->min_sec_level; + cp.exclusive = rx->exclusive | exclusive; + cp.service_id = srx->srx_service; + call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, GFP_KERNEL); + + _leave(" = %p\n", call); + return call; +} + +/* + * send a message forming part of a client call through an RxRPC socket + * - caller holds the socket locked + * - the socket may be either a client socket or a server socket + */ +int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) +{ + enum rxrpc_command cmd; + struct rxrpc_call *call; + unsigned long user_call_ID = 0; + bool exclusive = false; + u32 abort_code = 0; + int ret; + + _enter(""); + + ret = rxrpc_sendmsg_cmsg(msg, &user_call_ID, &cmd, &abort_code, + &exclusive); + if (ret < 0) + return ret; + + if (cmd == RXRPC_CMD_ACCEPT) { + if (rx->sk.sk_state != RXRPC_SERVER_LISTENING) + return -EINVAL; + call = rxrpc_accept_call(rx, user_call_ID, NULL); + if (IS_ERR(call)) + return PTR_ERR(call); + rxrpc_put_call(call); + return 0; + } + + call = rxrpc_find_call_by_user_ID(rx, user_call_ID); + if (!call) { + if (cmd != RXRPC_CMD_SEND_DATA) + return -EBADSLT; + call = rxrpc_new_client_call_for_sendmsg(rx, msg, user_call_ID, + exclusive); + if (IS_ERR(call)) + return PTR_ERR(call); + } + + rxrpc_see_call(call); + _debug("CALL %d USR %lx ST %d on CONN %p", + call->debug_id, call->user_call_ID, call->state, call->conn); + + if (call->state >= RXRPC_CALL_COMPLETE) { + /* it's too late for this call */ + ret = -ESHUTDOWN; + } else if (cmd == RXRPC_CMD_SEND_ABORT) { + rxrpc_send_abort(call, abort_code); + ret = 0; + } else if (cmd != RXRPC_CMD_SEND_DATA) { + ret = -EINVAL; + } else if (rxrpc_is_client_call(call) && + call->state != RXRPC_CALL_CLIENT_SEND_REQUEST) { + /* request phase complete for this client call */ + ret = -EPROTO; + } else if (rxrpc_is_service_call(call) && + call->state != RXRPC_CALL_SERVER_ACK_REQUEST && + call->state != RXRPC_CALL_SERVER_SEND_REPLY) { + /* Reply phase not begun or not complete for service call. */ + ret = -EPROTO; + } else { + ret = rxrpc_send_data(rx, call, msg, len); + } + + rxrpc_put_call(call); + _leave(" = %d", ret); + return ret; +} + +/** + * rxrpc_kernel_send_data - Allow a kernel service to send data on a call + * @sock: The socket the call is on + * @call: The call to send data through + * @msg: The data to send + * @len: The amount of data to send + * + * Allow a kernel service to send data on a call. The call must be in an state + * appropriate to sending data. No control data should be supplied in @msg, + * nor should an address be supplied. MSG_MORE should be flagged if there's + * more data to come, otherwise this data will end the transmission phase. + */ +int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call, + struct msghdr *msg, size_t len) +{ + int ret; + + _enter("{%d,%s},", call->debug_id, rxrpc_call_states[call->state]); + + ASSERTCMP(msg->msg_name, ==, NULL); + ASSERTCMP(msg->msg_control, ==, NULL); + + lock_sock(sock->sk); + + _debug("CALL %d USR %lx ST %d on CONN %p", + call->debug_id, call->user_call_ID, call->state, call->conn); + + if (call->state >= RXRPC_CALL_COMPLETE) { + ret = -ESHUTDOWN; /* it's too late for this call */ + } else if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST && + call->state != RXRPC_CALL_SERVER_ACK_REQUEST && + call->state != RXRPC_CALL_SERVER_SEND_REPLY) { + ret = -EPROTO; /* request phase complete for this client call */ + } else { + ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len); + } + + release_sock(sock->sk); + _leave(" = %d", ret); + return ret; +} +EXPORT_SYMBOL(rxrpc_kernel_send_data); + +/** + * rxrpc_kernel_abort_call - Allow a kernel service to abort a call + * @sock: The socket the call is on + * @call: The call to be aborted + * @abort_code: The abort code to stick into the ABORT packet + * + * Allow a kernel service to abort a call, if it's still in an abortable state. + */ +void rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call, + u32 abort_code) +{ + _enter("{%d},%d", call->debug_id, abort_code); + + lock_sock(sock->sk); + + _debug("CALL %d USR %lx ST %d on CONN %p", + call->debug_id, call->user_call_ID, call->state, call->conn); + + rxrpc_send_abort(call, abort_code); + + release_sock(sock->sk); + _leave(""); +} + +EXPORT_SYMBOL(rxrpc_kernel_abort_call); From 434e6120036d1dd1004cadf5a99941cdd9c1a59f Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Sun, 4 Sep 2016 20:54:58 +0530 Subject: [PATCH 0756/1715] fs/afs/flock: Remove deprecated create_singlethread_workqueue The workqueue "afs_lock_manager" queues work item &vnode->lock_work, per vnode. Since there can be multiple vnodes and since their work items can be executed concurrently, alloc_workqueue has been used to replace the deprecated create_singlethread_workqueue instance. The WQ_MEM_RECLAIM flag has been set to ensure forward progress under memory pressure because the workqueue is being used on a memory reclaim path. Since there are fixed number of work items, explicit concurrency limit is unnecessary here. Signed-off-by: Bhaktipriya Shridhar Signed-off-by: David Howells --- fs/afs/flock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/afs/flock.c b/fs/afs/flock.c index d91a9c9cfbd0..3191dff2c156 100644 --- a/fs/afs/flock.c +++ b/fs/afs/flock.c @@ -36,8 +36,8 @@ static int afs_init_lock_manager(void) if (!afs_lock_manager) { mutex_lock(&afs_lock_manager_mutex); if (!afs_lock_manager) { - afs_lock_manager = - create_singlethread_workqueue("kafs_lockd"); + afs_lock_manager = alloc_workqueue("kafs_lockd", + WQ_MEM_RECLAIM, 0); if (!afs_lock_manager) ret = -ENOMEM; } From 3dc20f090d84a7b08bc1d5729c874ebbd0465468 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sun, 4 Sep 2016 13:25:21 +0100 Subject: [PATCH 0757/1715] rxrpc Move enum rxrpc_command to sendmsg.c Move enum rxrpc_command to sendmsg.c as it's now only used in that file. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 7 ------- net/rxrpc/sendmsg.c | 7 +++++++ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 464dfda2a995..bb342f5fe7e4 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -131,13 +131,6 @@ struct rxrpc_skb_priv { #define rxrpc_skb(__skb) ((struct rxrpc_skb_priv *) &(__skb)->cb) -enum rxrpc_command { - RXRPC_CMD_SEND_DATA, /* send data message */ - RXRPC_CMD_SEND_ABORT, /* request abort generation */ - RXRPC_CMD_ACCEPT, /* [server] accept incoming call */ - RXRPC_CMD_REJECT_BUSY, /* [server] reject a call as busy */ -}; - /* * RxRPC security module interface */ diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 17a9ebbc2346..7376794a0308 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -20,6 +20,13 @@ #include #include "ar-internal.h" +enum rxrpc_command { + RXRPC_CMD_SEND_DATA, /* send data message */ + RXRPC_CMD_SEND_ABORT, /* request abort generation */ + RXRPC_CMD_ACCEPT, /* [server] accept incoming call */ + RXRPC_CMD_REJECT_BUSY, /* [server] reject a call as busy */ +}; + /* * wait for space to appear in the transmit/ACK window * - caller holds the socket locked From 6654d0bff91ca090feab4bb2ed94a3a009a03337 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 2 Sep 2016 14:45:32 -0400 Subject: [PATCH 0758/1715] net: dsa: mv88e6xxx: fix module naming Since the mv88e6xxx.c file has been renamed, the driver compiled as a module is called chip.ko instead of mv88e6xxx.ko. Fix this. Fixes: fad09c73c270 ("net: dsa: mv88e6xxx: rename single-chip support") Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx/Makefile b/drivers/net/dsa/mv88e6xxx/Makefile index 6e29a75ee2f7..e58b745c3c4d 100644 --- a/drivers/net/dsa/mv88e6xxx/Makefile +++ b/drivers/net/dsa/mv88e6xxx/Makefile @@ -1 +1,2 @@ -obj-$(CONFIG_NET_DSA_MV88E6XXX) += chip.o +obj-$(CONFIG_NET_DSA_MV88E6XXX) += mv88e6xxx.o +mv88e6xxx-objs := chip.o From ec5612761c47ec7ab91b61df1bbcfd65cb0dbec8 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 2 Sep 2016 14:45:33 -0400 Subject: [PATCH 0759/1715] net: dsa: mv88e6xxx: move Global2 code Marvell chips are composed of multiple SMI devices. One of them at address 0x1C is called Global2. It provides an extended set of registers, used for interrupt control, EEPROM access, indirect PHY access (to bypass the PHY Polling Unit) and cross-chip related setup. Most chips have it, but some others don't (older ones such as 6060). Now that its related code is isolated in mv88e6xxx_g2_* functions, move it to its own global2.c file, making most of its setup code static. Document each registers in the meantime. Its compilation can be later avoided for chips without such registers. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/Makefile | 1 + drivers/net/dsa/mv88e6xxx/chip.c | 463 +------------------------ drivers/net/dsa/mv88e6xxx/global2.c | 471 ++++++++++++++++++++++++++ drivers/net/dsa/mv88e6xxx/global2.h | 30 ++ drivers/net/dsa/mv88e6xxx/mv88e6xxx.h | 6 + 5 files changed, 521 insertions(+), 450 deletions(-) create mode 100644 drivers/net/dsa/mv88e6xxx/global2.c create mode 100644 drivers/net/dsa/mv88e6xxx/global2.h diff --git a/drivers/net/dsa/mv88e6xxx/Makefile b/drivers/net/dsa/mv88e6xxx/Makefile index e58b745c3c4d..5a4206652e92 100644 --- a/drivers/net/dsa/mv88e6xxx/Makefile +++ b/drivers/net/dsa/mv88e6xxx/Makefile @@ -1,2 +1,3 @@ obj-$(CONFIG_NET_DSA_MV88E6XXX) += mv88e6xxx.o mv88e6xxx-objs := chip.o +mv88e6xxx-objs += global2.o diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index d6b0f78e9598..745e1588d83c 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -29,7 +29,9 @@ #include #include #include + #include "mv88e6xxx.h" +#include "global2.h" static void assert_reg_lock(struct mv88e6xxx_chip *chip) { @@ -182,8 +184,7 @@ static const struct mv88e6xxx_ops mv88e6xxx_smi_multi_chip_ops = { .write = mv88e6xxx_smi_multi_chip_write, }; -static int mv88e6xxx_read(struct mv88e6xxx_chip *chip, - int addr, int reg, u16 *val) +int mv88e6xxx_read(struct mv88e6xxx_chip *chip, int addr, int reg, u16 *val) { int err; @@ -199,8 +200,7 @@ static int mv88e6xxx_read(struct mv88e6xxx_chip *chip, return 0; } -static int mv88e6xxx_write(struct mv88e6xxx_chip *chip, - int addr, int reg, u16 val) +int mv88e6xxx_write(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val) { int err; @@ -306,8 +306,7 @@ static int mv88e6xxx_serdes_write(struct mv88e6xxx_chip *chip, int reg, u16 val) reg, val); } -static int mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int addr, int reg, - u16 mask) +int mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int addr, int reg, u16 mask) { int i; @@ -330,8 +329,7 @@ static int mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int addr, int reg, } /* Indirect write to single pointer-data register with an Update bit */ -static int mv88e6xxx_update(struct mv88e6xxx_chip *chip, int addr, int reg, - u16 update) +int mv88e6xxx_update(struct mv88e6xxx_chip *chip, int addr, int reg, u16 update) { u16 val; int err; @@ -2878,330 +2876,6 @@ static int mv88e6xxx_g1_setup(struct mv88e6xxx_chip *chip) return 0; } -static int mv88e6xxx_g2_device_mapping_write(struct mv88e6xxx_chip *chip, - int target, int port) -{ - u16 val = (target << 8) | (port & 0xf); - - return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_DEVICE_MAPPING, val); -} - -static int mv88e6xxx_g2_set_device_mapping(struct mv88e6xxx_chip *chip) -{ - int target, port; - int err; - - /* Initialize the routing port to the 32 possible target devices */ - for (target = 0; target < 32; ++target) { - port = 0xf; - - if (target < DSA_MAX_SWITCHES) { - port = chip->ds->rtable[target]; - if (port == DSA_RTABLE_NONE) - port = 0xf; - } - - err = mv88e6xxx_g2_device_mapping_write(chip, target, port); - if (err) - break; - } - - return err; -} - -static int mv88e6xxx_g2_trunk_mask_write(struct mv88e6xxx_chip *chip, int num, - bool hask, u16 mask) -{ - const u16 port_mask = BIT(chip->info->num_ports) - 1; - u16 val = (num << 12) | (mask & port_mask); - - if (hask) - val |= GLOBAL2_TRUNK_MASK_HASK; - - return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_TRUNK_MASK, val); -} - -static int mv88e6xxx_g2_trunk_mapping_write(struct mv88e6xxx_chip *chip, int id, - u16 map) -{ - const u16 port_mask = BIT(chip->info->num_ports) - 1; - u16 val = (id << 11) | (map & port_mask); - - return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_TRUNK_MAPPING, val); -} - -static int mv88e6xxx_g2_clear_trunk(struct mv88e6xxx_chip *chip) -{ - const u16 port_mask = BIT(chip->info->num_ports) - 1; - int i, err; - - /* Clear all eight possible Trunk Mask vectors */ - for (i = 0; i < 8; ++i) { - err = mv88e6xxx_g2_trunk_mask_write(chip, i, false, port_mask); - if (err) - return err; - } - - /* Clear all sixteen possible Trunk ID routing vectors */ - for (i = 0; i < 16; ++i) { - err = mv88e6xxx_g2_trunk_mapping_write(chip, i, 0); - if (err) - return err; - } - - return 0; -} - -static int mv88e6xxx_g2_clear_irl(struct mv88e6xxx_chip *chip) -{ - int port, err; - - /* Init all Ingress Rate Limit resources of all ports */ - for (port = 0; port < chip->info->num_ports; ++port) { - /* XXX newer chips (like 88E6390) have different 2-bit ops */ - err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_IRL_CMD, - GLOBAL2_IRL_CMD_OP_INIT_ALL | - (port << 8)); - if (err) - break; - - /* Wait for the operation to complete */ - err = mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_IRL_CMD, - GLOBAL2_IRL_CMD_BUSY); - if (err) - break; - } - - return err; -} - -/* Indirect write to the Switch MAC/WoL/WoF register */ -static int mv88e6xxx_g2_switch_mac_write(struct mv88e6xxx_chip *chip, - unsigned int pointer, u8 data) -{ - u16 val = (pointer << 8) | data; - - return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_SWITCH_MAC, val); -} - -static int mv88e6xxx_g2_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr) -{ - int i, err; - - for (i = 0; i < 6; i++) { - err = mv88e6xxx_g2_switch_mac_write(chip, i, addr[i]); - if (err) - break; - } - - return err; -} - -static int mv88e6xxx_g2_pot_write(struct mv88e6xxx_chip *chip, int pointer, - u8 data) -{ - u16 val = (pointer << 8) | (data & 0x7); - - return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_PRIO_OVERRIDE, val); -} - -static int mv88e6xxx_g2_clear_pot(struct mv88e6xxx_chip *chip) -{ - int i, err; - - /* Clear all sixteen possible Priority Override entries */ - for (i = 0; i < 16; i++) { - err = mv88e6xxx_g2_pot_write(chip, i, 0); - if (err) - break; - } - - return err; -} - -static int mv88e6xxx_g2_eeprom_wait(struct mv88e6xxx_chip *chip) -{ - return mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_EEPROM_CMD, - GLOBAL2_EEPROM_CMD_BUSY | - GLOBAL2_EEPROM_CMD_RUNNING); -} - -static int mv88e6xxx_g2_eeprom_cmd(struct mv88e6xxx_chip *chip, u16 cmd) -{ - int err; - - err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_EEPROM_CMD, cmd); - if (err) - return err; - - return mv88e6xxx_g2_eeprom_wait(chip); -} - -static int mv88e6xxx_g2_eeprom_read16(struct mv88e6xxx_chip *chip, - u8 addr, u16 *data) -{ - u16 cmd = GLOBAL2_EEPROM_CMD_OP_READ | addr; - int err; - - err = mv88e6xxx_g2_eeprom_wait(chip); - if (err) - return err; - - err = mv88e6xxx_g2_eeprom_cmd(chip, cmd); - if (err) - return err; - - return mv88e6xxx_read(chip, REG_GLOBAL2, GLOBAL2_EEPROM_DATA, data); -} - -static int mv88e6xxx_g2_eeprom_write16(struct mv88e6xxx_chip *chip, - u8 addr, u16 data) -{ - u16 cmd = GLOBAL2_EEPROM_CMD_OP_WRITE | addr; - int err; - - err = mv88e6xxx_g2_eeprom_wait(chip); - if (err) - return err; - - err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_EEPROM_DATA, data); - if (err) - return err; - - return mv88e6xxx_g2_eeprom_cmd(chip, cmd); -} - -static int mv88e6xxx_g2_smi_phy_wait(struct mv88e6xxx_chip *chip) -{ - return mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_CMD, - GLOBAL2_SMI_PHY_CMD_BUSY); -} - -static int mv88e6xxx_g2_smi_phy_cmd(struct mv88e6xxx_chip *chip, u16 cmd) -{ - int err; - - err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_CMD, cmd); - if (err) - return err; - - return mv88e6xxx_g2_smi_phy_wait(chip); -} - -static int mv88e6xxx_g2_smi_phy_read(struct mv88e6xxx_chip *chip, int addr, - int reg, u16 *val) -{ - u16 cmd = GLOBAL2_SMI_PHY_CMD_OP_22_READ_DATA | (addr << 5) | reg; - int err; - - err = mv88e6xxx_g2_smi_phy_wait(chip); - if (err) - return err; - - err = mv88e6xxx_g2_smi_phy_cmd(chip, cmd); - if (err) - return err; - - return mv88e6xxx_read(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_DATA, val); -} - -static int mv88e6xxx_g2_smi_phy_write(struct mv88e6xxx_chip *chip, int addr, - int reg, u16 val) -{ - u16 cmd = GLOBAL2_SMI_PHY_CMD_OP_22_WRITE_DATA | (addr << 5) | reg; - int err; - - err = mv88e6xxx_g2_smi_phy_wait(chip); - if (err) - return err; - - err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_DATA, val); - if (err) - return err; - - return mv88e6xxx_g2_smi_phy_cmd(chip, cmd); -} - -static const struct mv88e6xxx_ops mv88e6xxx_g2_smi_phy_ops = { - .read = mv88e6xxx_g2_smi_phy_read, - .write = mv88e6xxx_g2_smi_phy_write, -}; - -static int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip) -{ - u16 reg; - int err; - - if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_2X)) { - /* Consider the frames with reserved multicast destination - * addresses matching 01:80:c2:00:00:2x as MGMT. - */ - err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_MGMT_EN_2X, - 0xffff); - if (err) - return err; - } - - if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_0X)) { - /* Consider the frames with reserved multicast destination - * addresses matching 01:80:c2:00:00:0x as MGMT. - */ - err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_MGMT_EN_0X, - 0xffff); - if (err) - return err; - } - - /* Ignore removed tag data on doubly tagged packets, disable - * flow control messages, force flow control priority to the - * highest, and send all special multicast frames to the CPU - * port at the highest priority. - */ - reg = GLOBAL2_SWITCH_MGMT_FORCE_FLOW_CTRL_PRI | (0x7 << 4); - if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_0X) || - mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_2X)) - reg |= GLOBAL2_SWITCH_MGMT_RSVD2CPU | 0x7; - err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_SWITCH_MGMT, reg); - if (err) - return err; - - /* Program the DSA routing table. */ - err = mv88e6xxx_g2_set_device_mapping(chip); - if (err) - return err; - - /* Clear all trunk masks and mapping. */ - err = mv88e6xxx_g2_clear_trunk(chip); - if (err) - return err; - - if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_IRL)) { - /* Disable ingress rate limiting by resetting all per port - * ingress rate limit resources to their initial state. - */ - err = mv88e6xxx_g2_clear_irl(chip); - if (err) - return err; - } - - if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_PVT)) { - /* Initialize Cross-chip Port VLAN Table to reset defaults */ - err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_PVT_ADDR, - GLOBAL2_PVT_ADDR_OP_INIT_ONES); - if (err) - return err; - } - - if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_POT)) { - /* Clear the priority override table. */ - err = mv88e6xxx_g2_clear_pot(chip); - if (err) - return err; - } - - return 0; -} - static int mv88e6xxx_setup(struct dsa_switch *ds) { struct mv88e6xxx_chip *chip = ds->priv; @@ -3503,56 +3177,6 @@ static int mv88e6xxx_get_eeprom_len(struct dsa_switch *ds) return chip->eeprom_len; } -static int mv88e6xxx_get_eeprom16(struct mv88e6xxx_chip *chip, - struct ethtool_eeprom *eeprom, u8 *data) -{ - unsigned int offset = eeprom->offset; - unsigned int len = eeprom->len; - u16 val; - int err; - - eeprom->len = 0; - - if (offset & 1) { - err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val); - if (err) - return err; - - *data++ = (val >> 8) & 0xff; - - offset++; - len--; - eeprom->len++; - } - - while (len >= 2) { - err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val); - if (err) - return err; - - *data++ = val & 0xff; - *data++ = (val >> 8) & 0xff; - - offset += 2; - len -= 2; - eeprom->len += 2; - } - - if (len) { - err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val); - if (err) - return err; - - *data++ = val & 0xff; - - offset++; - len--; - eeprom->len++; - } - - return 0; -} - static int mv88e6xxx_get_eeprom(struct dsa_switch *ds, struct ethtool_eeprom *eeprom, u8 *data) { @@ -3562,7 +3186,7 @@ static int mv88e6xxx_get_eeprom(struct dsa_switch *ds, mutex_lock(&chip->reg_lock); if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_EEPROM16)) - err = mv88e6xxx_get_eeprom16(chip, eeprom, data); + err = mv88e6xxx_g2_get_eeprom16(chip, eeprom, data); else err = -EOPNOTSUPP; @@ -3576,72 +3200,6 @@ static int mv88e6xxx_get_eeprom(struct dsa_switch *ds, return 0; } -static int mv88e6xxx_set_eeprom16(struct mv88e6xxx_chip *chip, - struct ethtool_eeprom *eeprom, u8 *data) -{ - unsigned int offset = eeprom->offset; - unsigned int len = eeprom->len; - u16 val; - int err; - - /* Ensure the RO WriteEn bit is set */ - err = mv88e6xxx_read(chip, REG_GLOBAL2, GLOBAL2_EEPROM_CMD, &val); - if (err) - return err; - - if (!(val & GLOBAL2_EEPROM_CMD_WRITE_EN)) - return -EROFS; - - eeprom->len = 0; - - if (offset & 1) { - err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val); - if (err) - return err; - - val = (*data++ << 8) | (val & 0xff); - - err = mv88e6xxx_g2_eeprom_write16(chip, offset >> 1, val); - if (err) - return err; - - offset++; - len--; - eeprom->len++; - } - - while (len >= 2) { - val = *data++; - val |= *data++ << 8; - - err = mv88e6xxx_g2_eeprom_write16(chip, offset >> 1, val); - if (err) - return err; - - offset += 2; - len -= 2; - eeprom->len += 2; - } - - if (len) { - err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val); - if (err) - return err; - - val = (val & 0xff00) | *data++; - - err = mv88e6xxx_g2_eeprom_write16(chip, offset >> 1, val); - if (err) - return err; - - offset++; - len--; - eeprom->len++; - } - - return 0; -} - static int mv88e6xxx_set_eeprom(struct dsa_switch *ds, struct ethtool_eeprom *eeprom, u8 *data) { @@ -3654,7 +3212,7 @@ static int mv88e6xxx_set_eeprom(struct dsa_switch *ds, mutex_lock(&chip->reg_lock); if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_EEPROM16)) - err = mv88e6xxx_set_eeprom16(chip, eeprom, data); + err = mv88e6xxx_g2_set_eeprom16(chip, eeprom, data); else err = -EOPNOTSUPP; @@ -3907,6 +3465,11 @@ static struct mv88e6xxx_chip *mv88e6xxx_alloc_chip(struct device *dev) return chip; } +static const struct mv88e6xxx_ops mv88e6xxx_g2_smi_phy_ops = { + .read = mv88e6xxx_g2_smi_phy_read, + .write = mv88e6xxx_g2_smi_phy_write, +}; + static const struct mv88e6xxx_ops mv88e6xxx_phy_ops = { .read = mv88e6xxx_read, .write = mv88e6xxx_write, diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c new file mode 100644 index 000000000000..99ed028298ac --- /dev/null +++ b/drivers/net/dsa/mv88e6xxx/global2.c @@ -0,0 +1,471 @@ +/* + * Marvell 88E6xxx Switch Global 2 Registers support (device address 0x1C) + * + * Copyright (c) 2008 Marvell Semiconductor + * + * Copyright (c) 2016 Vivien Didelot + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include "mv88e6xxx.h" +#include "global2.h" + +/* Offset 0x06: Device Mapping Table register */ + +static int mv88e6xxx_g2_device_mapping_write(struct mv88e6xxx_chip *chip, + int target, int port) +{ + u16 val = (target << 8) | (port & 0xf); + + return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_DEVICE_MAPPING, val); +} + +static int mv88e6xxx_g2_set_device_mapping(struct mv88e6xxx_chip *chip) +{ + int target, port; + int err; + + /* Initialize the routing port to the 32 possible target devices */ + for (target = 0; target < 32; ++target) { + port = 0xf; + + if (target < DSA_MAX_SWITCHES) { + port = chip->ds->rtable[target]; + if (port == DSA_RTABLE_NONE) + port = 0xf; + } + + err = mv88e6xxx_g2_device_mapping_write(chip, target, port); + if (err) + break; + } + + return err; +} + +/* Offset 0x07: Trunk Mask Table register */ + +static int mv88e6xxx_g2_trunk_mask_write(struct mv88e6xxx_chip *chip, int num, + bool hask, u16 mask) +{ + const u16 port_mask = BIT(chip->info->num_ports) - 1; + u16 val = (num << 12) | (mask & port_mask); + + if (hask) + val |= GLOBAL2_TRUNK_MASK_HASK; + + return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_TRUNK_MASK, val); +} + +/* Offset 0x08: Trunk Mapping Table register */ + +static int mv88e6xxx_g2_trunk_mapping_write(struct mv88e6xxx_chip *chip, int id, + u16 map) +{ + const u16 port_mask = BIT(chip->info->num_ports) - 1; + u16 val = (id << 11) | (map & port_mask); + + return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_TRUNK_MAPPING, val); +} + +static int mv88e6xxx_g2_clear_trunk(struct mv88e6xxx_chip *chip) +{ + const u16 port_mask = BIT(chip->info->num_ports) - 1; + int i, err; + + /* Clear all eight possible Trunk Mask vectors */ + for (i = 0; i < 8; ++i) { + err = mv88e6xxx_g2_trunk_mask_write(chip, i, false, port_mask); + if (err) + return err; + } + + /* Clear all sixteen possible Trunk ID routing vectors */ + for (i = 0; i < 16; ++i) { + err = mv88e6xxx_g2_trunk_mapping_write(chip, i, 0); + if (err) + return err; + } + + return 0; +} + +/* Offset 0x09: Ingress Rate Command register + * Offset 0x0A: Ingress Rate Data register + */ + +static int mv88e6xxx_g2_clear_irl(struct mv88e6xxx_chip *chip) +{ + int port, err; + + /* Init all Ingress Rate Limit resources of all ports */ + for (port = 0; port < chip->info->num_ports; ++port) { + /* XXX newer chips (like 88E6390) have different 2-bit ops */ + err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_IRL_CMD, + GLOBAL2_IRL_CMD_OP_INIT_ALL | + (port << 8)); + if (err) + break; + + /* Wait for the operation to complete */ + err = mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_IRL_CMD, + GLOBAL2_IRL_CMD_BUSY); + if (err) + break; + } + + return err; +} + +/* Offset 0x0D: Switch MAC/WoL/WoF register */ + +static int mv88e6xxx_g2_switch_mac_write(struct mv88e6xxx_chip *chip, + unsigned int pointer, u8 data) +{ + u16 val = (pointer << 8) | data; + + return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_SWITCH_MAC, val); +} + +int mv88e6xxx_g2_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr) +{ + int i, err; + + for (i = 0; i < 6; i++) { + err = mv88e6xxx_g2_switch_mac_write(chip, i, addr[i]); + if (err) + break; + } + + return err; +} + +/* Offset 0x0F: Priority Override Table */ + +static int mv88e6xxx_g2_pot_write(struct mv88e6xxx_chip *chip, int pointer, + u8 data) +{ + u16 val = (pointer << 8) | (data & 0x7); + + return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_PRIO_OVERRIDE, val); +} + +static int mv88e6xxx_g2_clear_pot(struct mv88e6xxx_chip *chip) +{ + int i, err; + + /* Clear all sixteen possible Priority Override entries */ + for (i = 0; i < 16; i++) { + err = mv88e6xxx_g2_pot_write(chip, i, 0); + if (err) + break; + } + + return err; +} + +/* Offset 0x14: EEPROM Command + * Offset 0x15: EEPROM Data + */ + +static int mv88e6xxx_g2_eeprom_wait(struct mv88e6xxx_chip *chip) +{ + return mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_EEPROM_CMD, + GLOBAL2_EEPROM_CMD_BUSY | + GLOBAL2_EEPROM_CMD_RUNNING); +} + +static int mv88e6xxx_g2_eeprom_cmd(struct mv88e6xxx_chip *chip, u16 cmd) +{ + int err; + + err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_EEPROM_CMD, cmd); + if (err) + return err; + + return mv88e6xxx_g2_eeprom_wait(chip); +} + +static int mv88e6xxx_g2_eeprom_read16(struct mv88e6xxx_chip *chip, + u8 addr, u16 *data) +{ + u16 cmd = GLOBAL2_EEPROM_CMD_OP_READ | addr; + int err; + + err = mv88e6xxx_g2_eeprom_wait(chip); + if (err) + return err; + + err = mv88e6xxx_g2_eeprom_cmd(chip, cmd); + if (err) + return err; + + return mv88e6xxx_read(chip, REG_GLOBAL2, GLOBAL2_EEPROM_DATA, data); +} + +static int mv88e6xxx_g2_eeprom_write16(struct mv88e6xxx_chip *chip, + u8 addr, u16 data) +{ + u16 cmd = GLOBAL2_EEPROM_CMD_OP_WRITE | addr; + int err; + + err = mv88e6xxx_g2_eeprom_wait(chip); + if (err) + return err; + + err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_EEPROM_DATA, data); + if (err) + return err; + + return mv88e6xxx_g2_eeprom_cmd(chip, cmd); +} + +int mv88e6xxx_g2_get_eeprom16(struct mv88e6xxx_chip *chip, + struct ethtool_eeprom *eeprom, u8 *data) +{ + unsigned int offset = eeprom->offset; + unsigned int len = eeprom->len; + u16 val; + int err; + + eeprom->len = 0; + + if (offset & 1) { + err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val); + if (err) + return err; + + *data++ = (val >> 8) & 0xff; + + offset++; + len--; + eeprom->len++; + } + + while (len >= 2) { + err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val); + if (err) + return err; + + *data++ = val & 0xff; + *data++ = (val >> 8) & 0xff; + + offset += 2; + len -= 2; + eeprom->len += 2; + } + + if (len) { + err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val); + if (err) + return err; + + *data++ = val & 0xff; + + offset++; + len--; + eeprom->len++; + } + + return 0; +} + +int mv88e6xxx_g2_set_eeprom16(struct mv88e6xxx_chip *chip, + struct ethtool_eeprom *eeprom, u8 *data) +{ + unsigned int offset = eeprom->offset; + unsigned int len = eeprom->len; + u16 val; + int err; + + /* Ensure the RO WriteEn bit is set */ + err = mv88e6xxx_read(chip, REG_GLOBAL2, GLOBAL2_EEPROM_CMD, &val); + if (err) + return err; + + if (!(val & GLOBAL2_EEPROM_CMD_WRITE_EN)) + return -EROFS; + + eeprom->len = 0; + + if (offset & 1) { + err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val); + if (err) + return err; + + val = (*data++ << 8) | (val & 0xff); + + err = mv88e6xxx_g2_eeprom_write16(chip, offset >> 1, val); + if (err) + return err; + + offset++; + len--; + eeprom->len++; + } + + while (len >= 2) { + val = *data++; + val |= *data++ << 8; + + err = mv88e6xxx_g2_eeprom_write16(chip, offset >> 1, val); + if (err) + return err; + + offset += 2; + len -= 2; + eeprom->len += 2; + } + + if (len) { + err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val); + if (err) + return err; + + val = (val & 0xff00) | *data++; + + err = mv88e6xxx_g2_eeprom_write16(chip, offset >> 1, val); + if (err) + return err; + + offset++; + len--; + eeprom->len++; + } + + return 0; +} + +/* Offset 0x18: SMI PHY Command Register + * Offset 0x19: SMI PHY Data Register + */ + +static int mv88e6xxx_g2_smi_phy_wait(struct mv88e6xxx_chip *chip) +{ + return mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_CMD, + GLOBAL2_SMI_PHY_CMD_BUSY); +} + +static int mv88e6xxx_g2_smi_phy_cmd(struct mv88e6xxx_chip *chip, u16 cmd) +{ + int err; + + err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_CMD, cmd); + if (err) + return err; + + return mv88e6xxx_g2_smi_phy_wait(chip); +} + +int mv88e6xxx_g2_smi_phy_read(struct mv88e6xxx_chip *chip, int addr, int reg, + u16 *val) +{ + u16 cmd = GLOBAL2_SMI_PHY_CMD_OP_22_READ_DATA | (addr << 5) | reg; + int err; + + err = mv88e6xxx_g2_smi_phy_wait(chip); + if (err) + return err; + + err = mv88e6xxx_g2_smi_phy_cmd(chip, cmd); + if (err) + return err; + + return mv88e6xxx_read(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_DATA, val); +} + +int mv88e6xxx_g2_smi_phy_write(struct mv88e6xxx_chip *chip, int addr, int reg, + u16 val) +{ + u16 cmd = GLOBAL2_SMI_PHY_CMD_OP_22_WRITE_DATA | (addr << 5) | reg; + int err; + + err = mv88e6xxx_g2_smi_phy_wait(chip); + if (err) + return err; + + err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_DATA, val); + if (err) + return err; + + return mv88e6xxx_g2_smi_phy_cmd(chip, cmd); +} + +int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip) +{ + u16 reg; + int err; + + if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_2X)) { + /* Consider the frames with reserved multicast destination + * addresses matching 01:80:c2:00:00:2x as MGMT. + */ + err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_MGMT_EN_2X, + 0xffff); + if (err) + return err; + } + + if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_0X)) { + /* Consider the frames with reserved multicast destination + * addresses matching 01:80:c2:00:00:0x as MGMT. + */ + err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_MGMT_EN_0X, + 0xffff); + if (err) + return err; + } + + /* Ignore removed tag data on doubly tagged packets, disable + * flow control messages, force flow control priority to the + * highest, and send all special multicast frames to the CPU + * port at the highest priority. + */ + reg = GLOBAL2_SWITCH_MGMT_FORCE_FLOW_CTRL_PRI | (0x7 << 4); + if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_0X) || + mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_2X)) + reg |= GLOBAL2_SWITCH_MGMT_RSVD2CPU | 0x7; + err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_SWITCH_MGMT, reg); + if (err) + return err; + + /* Program the DSA routing table. */ + err = mv88e6xxx_g2_set_device_mapping(chip); + if (err) + return err; + + /* Clear all trunk masks and mapping. */ + err = mv88e6xxx_g2_clear_trunk(chip); + if (err) + return err; + + if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_IRL)) { + /* Disable ingress rate limiting by resetting all per port + * ingress rate limit resources to their initial state. + */ + err = mv88e6xxx_g2_clear_irl(chip); + if (err) + return err; + } + + if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_PVT)) { + /* Initialize Cross-chip Port VLAN Table to reset defaults */ + err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_PVT_ADDR, + GLOBAL2_PVT_ADDR_OP_INIT_ONES); + if (err) + return err; + } + + if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_POT)) { + /* Clear the priority override table. */ + err = mv88e6xxx_g2_clear_pot(chip); + if (err) + return err; + } + + return 0; +} diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h new file mode 100644 index 000000000000..bee1bc98fd97 --- /dev/null +++ b/drivers/net/dsa/mv88e6xxx/global2.h @@ -0,0 +1,30 @@ +/* + * Marvell 88E6xxx Switch Global 2 Registers support (device address 0x1C) + * + * Copyright (c) 2008 Marvell Semiconductor + * + * Copyright (c) 2016 Vivien Didelot + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef _MV88E6XXX_GLOBAL2_H +#define _MV88E6XXX_GLOBAL2_H + +#include "mv88e6xxx.h" + +int mv88e6xxx_g2_smi_phy_read(struct mv88e6xxx_chip *chip, int addr, int reg, + u16 *val); +int mv88e6xxx_g2_smi_phy_write(struct mv88e6xxx_chip *chip, int addr, int reg, + u16 val); +int mv88e6xxx_g2_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr); +int mv88e6xxx_g2_get_eeprom16(struct mv88e6xxx_chip *chip, + struct ethtool_eeprom *eeprom, u8 *data); +int mv88e6xxx_g2_set_eeprom16(struct mv88e6xxx_chip *chip, + struct ethtool_eeprom *eeprom, u8 *data); +int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip); + +#endif /* _MV88E6XXX_GLOBAL2_H */ diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h index e157d4f69864..52f3f5231f51 100644 --- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h @@ -718,4 +718,10 @@ static inline bool mv88e6xxx_has(struct mv88e6xxx_chip *chip, return (chip->info->flags & flags) == flags; } +int mv88e6xxx_read(struct mv88e6xxx_chip *chip, int addr, int reg, u16 *val); +int mv88e6xxx_write(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val); +int mv88e6xxx_update(struct mv88e6xxx_chip *chip, int addr, int reg, + u16 update); +int mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int addr, int reg, u16 mask); + #endif From ca070c1097eb647e87619af5037d8a8a53a3248b Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 2 Sep 2016 14:45:34 -0400 Subject: [PATCH 0760/1715] net: dsa: mv88e6xxx: make global2 code optional Since not every chip has a Global2 set of registers, make its support optional, in which case the related functions will return -EOPNOTSUPP. This also allows to reduce the size of the mv88e6xxx driver for devices such as home routers embedding Ethernet chips without Global2 support. It is present on most recent chips, thus enable its support by default. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/Kconfig | 11 ++++++ drivers/net/dsa/mv88e6xxx/Makefile | 2 +- drivers/net/dsa/mv88e6xxx/chip.c | 4 ++ drivers/net/dsa/mv88e6xxx/global2.h | 58 +++++++++++++++++++++++++++++ 4 files changed, 74 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx/Kconfig b/drivers/net/dsa/mv88e6xxx/Kconfig index ac77737bbd87..486668813e15 100644 --- a/drivers/net/dsa/mv88e6xxx/Kconfig +++ b/drivers/net/dsa/mv88e6xxx/Kconfig @@ -6,3 +6,14 @@ config NET_DSA_MV88E6XXX help This driver adds support for most of the Marvell 88E6xxx models of Ethernet switch chips, except 88E6060. + +config NET_DSA_MV88E6XXX_GLOBAL2 + bool "Switch Global 2 Registers support" + default y + depends on NET_DSA_MV88E6XXX + help + This registers set at internal SMI address 0x1C provides extended + features like EEPROM interface, trunking, cross-chip setup, etc. + + It is required on most chips. If the chip you compile the support for + doesn't have such registers set, say N here. In doubt, say Y. diff --git a/drivers/net/dsa/mv88e6xxx/Makefile b/drivers/net/dsa/mv88e6xxx/Makefile index 5a4206652e92..697103934317 100644 --- a/drivers/net/dsa/mv88e6xxx/Makefile +++ b/drivers/net/dsa/mv88e6xxx/Makefile @@ -1,3 +1,3 @@ obj-$(CONFIG_NET_DSA_MV88E6XXX) += mv88e6xxx.o mv88e6xxx-objs := chip.o -mv88e6xxx-objs += global2.o +mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_GLOBAL2) += global2.o diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 745e1588d83c..70a812d159c9 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3444,6 +3444,10 @@ static int mv88e6xxx_detect(struct mv88e6xxx_chip *chip) /* Update the compatible info with the probed one */ chip->info = info; + err = mv88e6xxx_g2_require(chip); + if (err) + return err; + dev_info(chip->dev, "switch 0x%x detected: %s, revision %u\n", chip->info->prod_num, chip->info->name, rev); diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h index bee1bc98fd97..c4bb9035ee3a 100644 --- a/drivers/net/dsa/mv88e6xxx/global2.h +++ b/drivers/net/dsa/mv88e6xxx/global2.h @@ -16,6 +16,13 @@ #include "mv88e6xxx.h" +#ifdef CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 + +static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip) +{ + return 0; +} + int mv88e6xxx_g2_smi_phy_read(struct mv88e6xxx_chip *chip, int addr, int reg, u16 *val); int mv88e6xxx_g2_smi_phy_write(struct mv88e6xxx_chip *chip, int addr, int reg, @@ -27,4 +34,55 @@ int mv88e6xxx_g2_set_eeprom16(struct mv88e6xxx_chip *chip, struct ethtool_eeprom *eeprom, u8 *data); int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip); +#else /* !CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 */ + +static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip) +{ + if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_GLOBAL2)) { + dev_err(chip->dev, "this chip requires CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 enabled\n"); + return -EOPNOTSUPP; + } + + return 0; +} + +static inline int mv88e6xxx_g2_smi_phy_read(struct mv88e6xxx_chip *chip, + int addr, int reg, u16 *val) +{ + return -EOPNOTSUPP; +} + +static inline int mv88e6xxx_g2_smi_phy_write(struct mv88e6xxx_chip *chip, + int addr, int reg, u16 val) +{ + return -EOPNOTSUPP; +} + +static inline int mv88e6xxx_g2_set_switch_mac(struct mv88e6xxx_chip *chip, + u8 *addr) +{ + return -EOPNOTSUPP; +} + +static inline int mv88e6xxx_g2_get_eeprom16(struct mv88e6xxx_chip *chip, + struct ethtool_eeprom *eeprom, + u8 *data) +{ + return -EOPNOTSUPP; +} + +static inline int mv88e6xxx_g2_set_eeprom16(struct mv88e6xxx_chip *chip, + struct ethtool_eeprom *eeprom, + u8 *data) +{ + return -EOPNOTSUPP; +} + +static inline int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip) +{ + return -EOPNOTSUPP; +} + +#endif /* CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 */ + #endif /* _MV88E6XXX_GLOBAL2_H */ From 983ccd74a14dd6fd28f579fc2a50c0c416f276d1 Mon Sep 17 00:00:00 2001 From: Woojung Huh Date: Fri, 2 Sep 2016 20:34:19 +0000 Subject: [PATCH 0761/1715] smsc95xx: Add maintainer Add Microchip Linux Driver Support as maintainer because this driver is maintaining by Microchip. Signed-off-by: Woojung Huh Acked-by: Steve Glendinning Signed-off-by: David S. Miller --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 734e03556e2c..9a1783547baf 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12285,6 +12285,7 @@ F: drivers/net/usb/smsc75xx.* USB SMSC95XX ETHERNET DRIVER M: Steve Glendinning +M: Microchip Linux Driver Support L: netdev@vger.kernel.org S: Maintained F: drivers/net/usb/smsc95xx.* From 273bf288f91ef922ce8ee851b45243bafc35034f Mon Sep 17 00:00:00 2001 From: Woojung Huh Date: Fri, 2 Sep 2016 20:34:20 +0000 Subject: [PATCH 0762/1715] smsc95xx: Add register define Add STRAP_STATUS defines. Signed-off-by: Woojung Huh Signed-off-by: David S. Miller --- drivers/net/usb/smsc95xx.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/usb/smsc95xx.h b/drivers/net/usb/smsc95xx.h index 526faa0c44e6..29a4d9efce7c 100644 --- a/drivers/net/usb/smsc95xx.h +++ b/drivers/net/usb/smsc95xx.h @@ -144,6 +144,14 @@ #define BURST_CAP (0x38) +#define STRAP_STATUS (0x3C) +#define STRAP_STATUS_PWR_SEL_ (0x00000020) +#define STRAP_STATUS_AMDIX_EN_ (0x00000010) +#define STRAP_STATUS_PORT_SWAP_ (0x00000008) +#define STRAP_STATUS_EEP_SIZE_ (0x00000004) +#define STRAP_STATUS_RMT_WKP_ (0x00000002) +#define STRAP_STATUS_EEP_DISABLE_ (0x00000001) + #define GPIO_WAKE (0x64) #define INT_EP_CTL (0x68) From 13722bbe97b55bbd31651a2e1e12b460f33de134 Mon Sep 17 00:00:00 2001 From: Woojung Huh Date: Fri, 2 Sep 2016 20:34:22 +0000 Subject: [PATCH 0763/1715] smsc95xx: Add mdix control via ethtool Add mdix control through ethtool. Signed-off-by: Woojung Huh Signed-off-by: David S. Miller --- drivers/net/usb/smsc95xx.c | 109 ++++++++++++++++++++++++++++++++++++- 1 file changed, 106 insertions(+), 3 deletions(-) diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index dc989a8b5afb..831aa33d078a 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -33,7 +33,7 @@ #include "smsc95xx.h" #define SMSC_CHIPNAME "smsc95xx" -#define SMSC_DRIVER_VERSION "1.0.4" +#define SMSC_DRIVER_VERSION "1.0.5" #define HS_USB_PKT_SIZE (512) #define FS_USB_PKT_SIZE (64) #define DEFAULT_HS_BURST_CAP_SIZE (16 * 1024 + 5 * HS_USB_PKT_SIZE) @@ -64,6 +64,7 @@ #define CARRIER_CHECK_DELAY (2 * HZ) struct smsc95xx_priv { + u32 chip_id; u32 mac_cr; u32 hash_hi; u32 hash_lo; @@ -71,6 +72,7 @@ struct smsc95xx_priv { spinlock_t mac_cr_lock; u8 features; u8 suspend_flags; + u8 mdix_ctrl; bool link_ok; struct delayed_work carrier_check; struct usbnet *dev; @@ -782,14 +784,113 @@ static int smsc95xx_ethtool_set_wol(struct net_device *net, return ret; } +static int get_mdix_status(struct net_device *net) +{ + struct usbnet *dev = netdev_priv(net); + u32 val; + int buf; + + buf = smsc95xx_mdio_read(dev->net, dev->mii.phy_id, SPECIAL_CTRL_STS); + if (buf & SPECIAL_CTRL_STS_OVRRD_AMDIX_) { + if (buf & SPECIAL_CTRL_STS_AMDIX_ENABLE_) + return ETH_TP_MDI_AUTO; + else if (buf & SPECIAL_CTRL_STS_AMDIX_STATE_) + return ETH_TP_MDI_X; + } else { + buf = smsc95xx_read_reg(dev, STRAP_STATUS, &val); + if (val & STRAP_STATUS_AMDIX_EN_) + return ETH_TP_MDI_AUTO; + } + + return ETH_TP_MDI; +} + +static void set_mdix_status(struct net_device *net, __u8 mdix_ctrl) +{ + struct usbnet *dev = netdev_priv(net); + struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]); + int buf; + + if ((pdata->chip_id == ID_REV_CHIP_ID_9500A_) || + (pdata->chip_id == ID_REV_CHIP_ID_9530_) || + (pdata->chip_id == ID_REV_CHIP_ID_89530_) || + (pdata->chip_id == ID_REV_CHIP_ID_9730_)) { + /* Extend Manual AutoMDIX timer for 9500A/9500Ai */ + buf = smsc95xx_mdio_read(dev->net, dev->mii.phy_id, + PHY_EDPD_CONFIG); + buf |= PHY_EDPD_CONFIG_EXT_CROSSOVER_; + smsc95xx_mdio_write(dev->net, dev->mii.phy_id, + PHY_EDPD_CONFIG, buf); + } + + if (mdix_ctrl == ETH_TP_MDI) { + buf = smsc95xx_mdio_read(dev->net, dev->mii.phy_id, + SPECIAL_CTRL_STS); + buf |= SPECIAL_CTRL_STS_OVRRD_AMDIX_; + buf &= ~(SPECIAL_CTRL_STS_AMDIX_ENABLE_ | + SPECIAL_CTRL_STS_AMDIX_STATE_); + smsc95xx_mdio_write(dev->net, dev->mii.phy_id, + SPECIAL_CTRL_STS, buf); + } else if (mdix_ctrl == ETH_TP_MDI_X) { + buf = smsc95xx_mdio_read(dev->net, dev->mii.phy_id, + SPECIAL_CTRL_STS); + buf |= SPECIAL_CTRL_STS_OVRRD_AMDIX_; + buf &= ~(SPECIAL_CTRL_STS_AMDIX_ENABLE_ | + SPECIAL_CTRL_STS_AMDIX_STATE_); + buf |= SPECIAL_CTRL_STS_AMDIX_STATE_; + smsc95xx_mdio_write(dev->net, dev->mii.phy_id, + SPECIAL_CTRL_STS, buf); + } else if (mdix_ctrl == ETH_TP_MDI_AUTO) { + buf = smsc95xx_mdio_read(dev->net, dev->mii.phy_id, + SPECIAL_CTRL_STS); + buf &= ~SPECIAL_CTRL_STS_OVRRD_AMDIX_; + buf &= ~(SPECIAL_CTRL_STS_AMDIX_ENABLE_ | + SPECIAL_CTRL_STS_AMDIX_STATE_); + buf |= SPECIAL_CTRL_STS_AMDIX_ENABLE_; + smsc95xx_mdio_write(dev->net, dev->mii.phy_id, + SPECIAL_CTRL_STS, buf); + } + pdata->mdix_ctrl = mdix_ctrl; +} + +static int smsc95xx_get_settings(struct net_device *net, + struct ethtool_cmd *cmd) +{ + struct usbnet *dev = netdev_priv(net); + struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]); + int retval; + + retval = usbnet_get_settings(net, cmd); + + cmd->eth_tp_mdix = pdata->mdix_ctrl; + cmd->eth_tp_mdix_ctrl = pdata->mdix_ctrl; + + return retval; +} + +static int smsc95xx_set_settings(struct net_device *net, + struct ethtool_cmd *cmd) +{ + struct usbnet *dev = netdev_priv(net); + struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]); + int retval; + + if (pdata->mdix_ctrl != cmd->eth_tp_mdix_ctrl) + set_mdix_status(net, cmd->eth_tp_mdix_ctrl); + + retval = usbnet_set_settings(net, cmd); + + return retval; +} + static const struct ethtool_ops smsc95xx_ethtool_ops = { .get_link = usbnet_get_link, .nway_reset = usbnet_nway_reset, .get_drvinfo = usbnet_get_drvinfo, .get_msglevel = usbnet_get_msglevel, .set_msglevel = usbnet_set_msglevel, - .get_settings = usbnet_get_settings, - .set_settings = usbnet_set_settings, + .get_settings = smsc95xx_get_settings, + .set_settings = smsc95xx_set_settings, .get_eeprom_len = smsc95xx_ethtool_get_eeprom_len, .get_eeprom = smsc95xx_ethtool_get_eeprom, .set_eeprom = smsc95xx_ethtool_set_eeprom, @@ -1194,6 +1295,8 @@ static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf) if (ret < 0) return ret; val >>= 16; + pdata->chip_id = val; + pdata->mdix_ctrl = get_mdix_status(dev->net); if ((val == ID_REV_CHIP_ID_9500A_) || (val == ID_REV_CHIP_ID_9530_) || (val == ID_REV_CHIP_ID_89530_) || (val == ID_REV_CHIP_ID_9730_)) From 95a691179725fb8b3463d4d0f09a4d77e3c6f808 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 2 Sep 2016 15:58:01 -0700 Subject: [PATCH 0764/1715] hso: Use a more common logging style Macros that end in an underscore are just odd. Add hso_dbg(level, fmt, ...) and use it everwhere instead. Several uses had additional unnecessary newlines as the macro added a newline. Remove the newline from the macro and add newlines to each use as appropriate. Remove now unused D macros. Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- drivers/net/usb/hso.c | 97 ++++++++++++++++++++----------------------- 1 file changed, 44 insertions(+), 53 deletions(-) diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index c5544d36c54f..6c3751299b1e 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -108,23 +108,12 @@ /*****************************************************************************/ /* Debugging functions */ /*****************************************************************************/ -#define D__(lvl_, fmt, arg...) \ - do { \ - printk(lvl_ "[%d:%s]: " fmt "\n", \ - __LINE__, __func__, ## arg); \ - } while (0) - -#define D_(lvl, args...) \ - do { \ - if (lvl & debug) \ - D__(KERN_INFO, args); \ - } while (0) - -#define D1(args...) D_(0x01, ##args) -#define D2(args...) D_(0x02, ##args) -#define D3(args...) D_(0x04, ##args) -#define D4(args...) D_(0x08, ##args) -#define D5(args...) D_(0x10, ##args) +#define hso_dbg(lvl, fmt, ...) \ +do { \ + if ((lvl) & debug) \ + pr_info("[%d:%s] " fmt, \ + __LINE__, __func__, ##__VA_ARGS__); \ +} while (0) /*****************************************************************************/ /* Enumerators */ @@ -709,7 +698,8 @@ static void handle_usb_error(int status, const char *function, } /* log a meaningful explanation of an USB status */ - D1("%s: received USB status - %s (%d)", function, explanation, status); + hso_dbg(0x1, "%s: received USB status - %s (%d)\n", + function, explanation, status); } /* Network interface functions */ @@ -808,7 +798,7 @@ static netdev_tx_t hso_net_start_xmit(struct sk_buff *skb, DUMP1(skb->data, skb->len); /* Copy it from kernel memory to OUR memory */ memcpy(odev->mux_bulk_tx_buf, skb->data, skb->len); - D1("len: %d/%d", skb->len, MUX_BULK_TX_BUF_SIZE); + hso_dbg(0x1, "len: %d/%d\n", skb->len, MUX_BULK_TX_BUF_SIZE); /* Fill in the URB for shipping it out. */ usb_fill_bulk_urb(odev->mux_bulk_tx_urb, @@ -872,7 +862,7 @@ static void packetizeRx(struct hso_net *odev, unsigned char *ip_pkt, unsigned char *tmp_rx_buf; /* log if needed */ - D1("Rx %d bytes", count); + hso_dbg(0x1, "Rx %d bytes\n", count); DUMP(ip_pkt, min(128, (int)count)); while (count) { @@ -912,7 +902,7 @@ static void packetizeRx(struct hso_net *odev, unsigned char *ip_pkt, frame_len); if (!odev->skb_rx_buf) { /* We got no receive buffer. */ - D1("could not allocate memory"); + hso_dbg(0x1, "could not allocate memory\n"); odev->rx_parse_state = WAIT_SYNC; continue; } @@ -972,11 +962,11 @@ static void packetizeRx(struct hso_net *odev, unsigned char *ip_pkt, break; case WAIT_SYNC: - D1(" W_S"); + hso_dbg(0x1, " W_S\n"); count = 0; break; default: - D1(" "); + hso_dbg(0x1, "\n"); count--; break; } @@ -1020,7 +1010,7 @@ static void read_bulk_callback(struct urb *urb) /* Sanity check */ if (!odev || !test_bit(HSO_NET_RUNNING, &odev->flags)) { - D1("BULK IN callback but driver is not active!"); + hso_dbg(0x1, "BULK IN callback but driver is not active!\n"); return; } usb_mark_last_busy(urb->dev); @@ -1116,7 +1106,7 @@ static void _hso_serial_set_termios(struct tty_struct *tty, return; } - D4("port %d", serial->minor); + hso_dbg(0x8, "port %d\n", serial->minor); /* * Fix up unsupported bits @@ -1205,11 +1195,11 @@ static void hso_std_serial_read_bulk_callback(struct urb *urb) struct hso_serial *serial = urb->context; int status = urb->status; - D4("\n--- Got serial_read_bulk callback %02x ---", status); + hso_dbg(0x8, "--- Got serial_read_bulk callback %02x ---\n", status); /* sanity check */ if (!serial) { - D1("serial == NULL"); + hso_dbg(0x1, "serial == NULL\n"); return; } if (status) { @@ -1217,7 +1207,7 @@ static void hso_std_serial_read_bulk_callback(struct urb *urb) return; } - D1("Actual length = %d\n", urb->actual_length); + hso_dbg(0x1, "Actual length = %d\n", urb->actual_length); DUMP1(urb->transfer_buffer, urb->actual_length); /* Anyone listening? */ @@ -1266,7 +1256,7 @@ static int hso_serial_open(struct tty_struct *tty, struct file *filp) if (serial == NULL || serial->magic != HSO_SERIAL_MAGIC) { WARN_ON(1); tty->driver_data = NULL; - D1("Failed to open port"); + hso_dbg(0x1, "Failed to open port\n"); return -ENODEV; } @@ -1275,7 +1265,7 @@ static int hso_serial_open(struct tty_struct *tty, struct file *filp) if (result < 0) goto err_out; - D1("Opening %d", serial->minor); + hso_dbg(0x1, "Opening %d\n", serial->minor); /* setup */ tty->driver_data = serial; @@ -1298,7 +1288,7 @@ static int hso_serial_open(struct tty_struct *tty, struct file *filp) kref_get(&serial->parent->ref); } } else { - D1("Port was already open"); + hso_dbg(0x1, "Port was already open\n"); } usb_autopm_put_interface(serial->parent->interface); @@ -1317,7 +1307,7 @@ static void hso_serial_close(struct tty_struct *tty, struct file *filp) struct hso_serial *serial = tty->driver_data; u8 usb_gone; - D1("Closing serial port"); + hso_dbg(0x1, "Closing serial port\n"); /* Open failed, no close cleanup required */ if (serial == NULL) @@ -1412,8 +1402,8 @@ static void hso_serial_set_termios(struct tty_struct *tty, struct ktermios *old) unsigned long flags; if (old) - D5("Termios called with: cflags new[%d] - old[%d]", - tty->termios.c_cflag, old->c_cflag); + hso_dbg(0x16, "Termios called with: cflags new[%d] - old[%d]\n", + tty->termios.c_cflag, old->c_cflag); /* the actual setup */ spin_lock_irqsave(&serial->serial_lock, flags); @@ -1649,7 +1639,7 @@ static int hso_serial_tiocmget(struct tty_struct *tty) /* sanity check */ if (!serial) { - D1("no tty structures"); + hso_dbg(0x1, "no tty structures\n"); return -EINVAL; } spin_lock_irq(&serial->serial_lock); @@ -1682,7 +1672,7 @@ static int hso_serial_tiocmset(struct tty_struct *tty, /* sanity check */ if (!serial) { - D1("no tty structures"); + hso_dbg(0x1, "no tty structures\n"); return -EINVAL; } @@ -1721,7 +1711,7 @@ static int hso_serial_ioctl(struct tty_struct *tty, { struct hso_serial *serial = tty->driver_data; int ret = 0; - D4("IOCTL cmd: %d, arg: %ld", cmd, arg); + hso_dbg(0x8, "IOCTL cmd: %d, arg: %ld\n", cmd, arg); if (!serial) return -ENODEV; @@ -1808,9 +1798,9 @@ static int mux_device_request(struct hso_serial *serial, u8 type, u16 port, pipe = usb_sndctrlpipe(serial->parent->usb, 0); } /* syslog */ - D2("%s command (%02x) len: %d, port: %d", - type == USB_CDC_GET_ENCAPSULATED_RESPONSE ? "Read" : "Write", - ctrl_req->bRequestType, ctrl_req->wLength, port); + hso_dbg(0x2, "%s command (%02x) len: %d, port: %d\n", + type == USB_CDC_GET_ENCAPSULATED_RESPONSE ? "Read" : "Write", + ctrl_req->bRequestType, ctrl_req->wLength, port); /* Load ctrl urb */ ctrl_urb->transfer_flags = 0; @@ -1876,11 +1866,11 @@ static void intr_callback(struct urb *urb) handle_usb_error(status, __func__, NULL); return; } - D4("\n--- Got intr callback 0x%02X ---", status); + hso_dbg(0x8, "--- Got intr callback 0x%02X ---\n", status); /* what request? */ port_req = urb->transfer_buffer; - D4(" port_req = 0x%.2X\n", *port_req); + hso_dbg(0x8, "port_req = 0x%.2X\n", *port_req); /* loop over all muxed ports to find the one sending this */ for (i = 0; i < 8; i++) { /* max 8 channels on MUX */ @@ -1888,7 +1878,8 @@ static void intr_callback(struct urb *urb) serial = get_serial_by_shared_int_and_type(shared_int, (1 << i)); if (serial != NULL) { - D1("Pending read interrupt on port %d\n", i); + hso_dbg(0x1, "Pending read interrupt on port %d\n", + i); spin_lock(&serial->serial_lock); if (serial->rx_state == RX_IDLE && serial->port.count > 0) { @@ -1900,8 +1891,8 @@ static void intr_callback(struct urb *urb) } else serial->rx_state = RX_PENDING; } else { - D1("Already a read pending on " - "port %d or port not open\n", i); + hso_dbg(0x1, "Already a read pending on port %d or port not open\n", + i); } spin_unlock(&serial->serial_lock); } @@ -1933,7 +1924,7 @@ static void hso_std_serial_write_bulk_callback(struct urb *urb) /* sanity check */ if (!serial) { - D1("serial == NULL"); + hso_dbg(0x1, "serial == NULL\n"); return; } @@ -1948,7 +1939,7 @@ static void hso_std_serial_write_bulk_callback(struct urb *urb) tty_port_tty_wakeup(&serial->port); hso_kick_transmit(serial); - D1(" "); + hso_dbg(0x1, "\n"); } /* called for writing diag or CS serial port */ @@ -1996,8 +1987,8 @@ static void ctrl_callback(struct urb *urb) /* what request? */ req = (struct usb_ctrlrequest *)(urb->setup_packet); - D4("\n--- Got muxed ctrl callback 0x%02X ---", status); - D4("Actual length of urb = %d\n", urb->actual_length); + hso_dbg(0x8, "--- Got muxed ctrl callback 0x%02X ---\n", status); + hso_dbg(0x8, "Actual length of urb = %d\n", urb->actual_length); DUMP1(urb->transfer_buffer, urb->actual_length); if (req->bRequestType == @@ -2023,7 +2014,7 @@ static int put_rxbuf_data(struct urb *urb, struct hso_serial *serial) /* Sanity check */ if (urb == NULL || serial == NULL) { - D1("serial = NULL"); + hso_dbg(0x1, "serial = NULL\n"); return -2; } @@ -2035,7 +2026,7 @@ static int put_rxbuf_data(struct urb *urb, struct hso_serial *serial) } /* Push data to tty */ - D1("data to push to tty"); + hso_dbg(0x1, "data to push to tty\n"); count = tty_buffer_request_room(&serial->port, urb->actual_length); if (count >= urb->actual_length) { tty_insert_flip_string(&serial->port, urb->transfer_buffer, @@ -2415,7 +2406,7 @@ static void hso_net_init(struct net_device *net) { struct hso_net *hso_net = netdev_priv(net); - D1("sizeof hso_net is %d", (int)sizeof(*hso_net)); + hso_dbg(0x1, "sizeof hso_net is %zu\n", sizeof(*hso_net)); /* fill in the other fields */ net->netdev_ops = &hso_netdev_ops; @@ -3301,7 +3292,7 @@ MODULE_DESCRIPTION(MOD_DESCRIPTION); MODULE_LICENSE(MOD_LICENSE); /* change the debug level (eg: insmod hso.ko debug=0x04) */ -MODULE_PARM_DESC(debug, "Level of debug [0x01 | 0x02 | 0x04 | 0x08 | 0x10]"); +MODULE_PARM_DESC(debug, "debug level mask [0x01 | 0x02 | 0x04 | 0x08 | 0x10]"); module_param(debug, int, S_IRUGO | S_IWUSR); /* set the major tty number (eg: insmod hso.ko tty_major=245) */ From 3981cce673c9a37649d79ccef402a8c1651fb3cf Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 2 Sep 2016 15:58:02 -0700 Subject: [PATCH 0765/1715] hso: Convert printk to pr_ Use a more common logging style Miscellanea: o Add pr_fmt to prefix each output message o Realign arguments Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- drivers/net/usb/hso.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index 6c3751299b1e..e7b516342678 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -50,6 +50,8 @@ * *****************************************************************************/ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -638,7 +640,7 @@ static int get_free_serial_index(void) } spin_unlock_irqrestore(&serial_table_lock, flags); - printk(KERN_ERR "%s: no free serial devices in table\n", __func__); + pr_err("%s: no free serial devices in table\n", __func__); return -1; } @@ -1102,7 +1104,7 @@ static void _hso_serial_set_termios(struct tty_struct *tty, struct hso_serial *serial = tty->driver_data; if (!serial) { - printk(KERN_ERR "%s: no tty structures", __func__); + pr_err("%s: no tty structures", __func__); return; } @@ -1347,7 +1349,7 @@ static int hso_serial_write(struct tty_struct *tty, const unsigned char *buf, /* sanity check */ if (serial == NULL) { - printk(KERN_ERR "%s: serial is NULL\n", __func__); + pr_err("%s: serial is NULL\n", __func__); return -ENODEV; } @@ -1773,7 +1775,7 @@ static int mux_device_request(struct hso_serial *serial, u8 type, u16 port, /* Sanity check */ if (!serial || !ctrl_urb || !ctrl_req) { - printk(KERN_ERR "%s: Wrong arguments\n", __func__); + pr_err("%s: Wrong arguments\n", __func__); return -EINVAL; } @@ -3220,7 +3222,7 @@ static int __init hso_init(void) int result; /* put it in the log */ - printk(KERN_INFO "hso: %s\n", version); + pr_info("%s\n", version); /* Initialise the serial table semaphore and table */ spin_lock_init(&serial_table_lock); @@ -3251,16 +3253,15 @@ static int __init hso_init(void) /* register the tty driver */ result = tty_register_driver(tty_drv); if (result) { - printk(KERN_ERR "%s - tty_register_driver failed(%d)\n", - __func__, result); + pr_err("%s - tty_register_driver failed(%d)\n", + __func__, result); goto err_free_tty; } /* register this module as an usb driver */ result = usb_register(&hso_driver); if (result) { - printk(KERN_ERR "Could not register hso driver? error: %d\n", - result); + pr_err("Could not register hso driver - error: %d\n", result); goto err_unreg_tty; } @@ -3275,7 +3276,7 @@ err_free_tty: static void __exit hso_exit(void) { - printk(KERN_INFO "hso: unloaded\n"); + pr_info("unloaded\n"); tty_unregister_driver(tty_drv); put_tty_driver(tty_drv); From 635372ada412967b359421e88c2c0acd35d4e3b5 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Sat, 3 Sep 2016 17:59:26 +0800 Subject: [PATCH 0766/1715] net: ethernet: mediatek: enhance RX path by reducing the frequency of the memory barrier used The patch makes move wmb() to outside the loop that could help RX path handling more faster although that RX descriptors aren't freed for DMA to use as soon as possible, but based on my experiment and the result shows it still can reach about 943Mbpis without performance drop that is tested based on the setup with one port using Giga PHY and 256 RX descriptors for DMA to move. Signed-off-by: Sean Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 2dadfa961898..b43700587c88 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -895,13 +895,14 @@ release_desc: rxd->rxd2 = RX_DMA_PLEN0(ring->buf_size); ring->calc_idx = idx; - /* make sure that all changes to the dma ring are flushed before - * we continue - */ - wmb(); - mtk_w32(eth, ring->calc_idx, MTK_PRX_CRX_IDX0); + done++; } + /* make sure that all changes to the dma ring are flushed before + * we continue + */ + wmb(); + mtk_w32(eth, ring->calc_idx, MTK_PRX_CRX_IDX0); if (done < budget) mtk_w32(eth, MTK_RX_DONE_INT, MTK_PDMA_INT_STATUS); From 41156cea94618d7b5dbb3d2d4e42f9214581c737 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Sat, 3 Sep 2016 17:59:27 +0800 Subject: [PATCH 0767/1715] net: ethernet: mediatek: enhance RX path by aggregating more SKBs into NAPI The patch adds support for aggregating more SKBs feed into NAPI in order to get more benefits from generic receive offload (GRO) by peeking at the RX ring status and moving more packets right before returning from NAPI RX polling handler if NAPI budgets are still available and some packets already present in hardware. Signed-off-by: Sean Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 31 +++++++++++---------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index b43700587c88..473977d6e9e8 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -898,14 +898,14 @@ release_desc: done++; } - /* make sure that all changes to the dma ring are flushed before - * we continue - */ - wmb(); - mtk_w32(eth, ring->calc_idx, MTK_PRX_CRX_IDX0); - if (done < budget) - mtk_w32(eth, MTK_RX_DONE_INT, MTK_PDMA_INT_STATUS); + if (done) { + /* make sure that all changes to the dma ring are flushed before + * we continue + */ + wmb(); + mtk_w32(eth, ring->calc_idx, MTK_PRX_CRX_IDX0); + } return done; } @@ -1025,10 +1025,13 @@ static int mtk_napi_rx(struct napi_struct *napi, int budget) struct mtk_eth *eth = container_of(napi, struct mtk_eth, rx_napi); u32 status, mask; int rx_done = 0; + int remain_budget = budget; mtk_handle_status_irq(eth); + +poll_again: mtk_w32(eth, MTK_RX_DONE_INT, MTK_PDMA_INT_STATUS); - rx_done = mtk_poll_rx(napi, budget, eth); + rx_done = mtk_poll_rx(napi, remain_budget, eth); if (unlikely(netif_msg_intr(eth))) { status = mtk_r32(eth, MTK_PDMA_INT_STATUS); @@ -1037,18 +1040,18 @@ static int mtk_napi_rx(struct napi_struct *napi, int budget) "done rx %d, intr 0x%08x/0x%x\n", rx_done, status, mask); } - - if (rx_done == budget) + if (rx_done == remain_budget) return budget; status = mtk_r32(eth, MTK_PDMA_INT_STATUS); - if (status & MTK_RX_DONE_INT) - return budget; - + if (status & MTK_RX_DONE_INT) { + remain_budget -= rx_done; + goto poll_again; + } napi_complete(napi); mtk_irq_enable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT); - return rx_done; + return rx_done + budget - remain_budget; } static int mtk_tx_alloc(struct mtk_eth *eth) From 326fe02d1ed61a63c30fa1f144758f34f0ef11cb Mon Sep 17 00:00:00 2001 From: Brenden Blanco Date: Sat, 3 Sep 2016 21:29:58 -0700 Subject: [PATCH 0768/1715] net/mlx4_en: protect ring->xdp_prog with rcu_read_lock Depending on the preempt mode, the bpf_prog stored in xdp_prog may be freed despite the use of call_rcu inside bpf_prog_put. The situation is possible when running in PREEMPT_RCU=y mode, for instance, since the rcu callback for destroying the bpf prog can run even during the bh handling in the mlx4 rx path. Several options were considered before this patch was settled on: Add a napi_synchronize loop in mlx4_xdp_set, which would occur after all of the rings are updated with the new program. This approach has the disadvantage that as the number of rings increases, the speed of update will slow down significantly due to napi_synchronize's msleep(1). Add a new rcu_head in bpf_prog_aux, to be used by a new bpf_prog_put_bh. The action of the bpf_prog_put_bh would be to then call bpf_prog_put later. Those drivers that consume a bpf prog in a bh context (like mlx4) would then use the bpf_prog_put_bh instead when the ring is up. This has the problem of complexity, in maintaining proper refcnts and rcu lists, and would likely be harder to review. In addition, this approach to freeing must be exclusive with other frees of the bpf prog, for instance a _bh prog must not be referenced from a prog array that is consumed by a non-_bh prog. The placement of rcu_read_lock in this patch is functionally the same as putting an rcu_read_lock in napi_poll. Actually doing so could be a potentially controversial change, but would bring the implementation in line with sk_busy_loop (though of course the nature of those two paths is substantially different), and would also avoid future copy/paste problems with future supporters of XDP. Still, this patch does not take that opinionated option. Testing was done with kernels in either PREEMPT_RCU=y or CONFIG_PREEMPT_VOLUNTARY=y+PREEMPT_RCU=n modes, with neither exhibiting any drawback. With PREEMPT_RCU=n, the extra call to rcu_read_lock did not show up in the perf report whatsoever, and with PREEMPT_RCU=y the overhead of rcu_read_lock (according to perf) was the same before/after. In the rx path, rcu_read_lock is eventually called for every packet from netif_receive_skb_internal, so the napi poll call's rcu_read_lock is easily amortized. v2: Remove extra rcu_read_lock in mlx4_en_process_rx_cq body Annotate xdp_prog with __rcu, and convert all usages to rcu_assign or rcu_dereference[_protected] as appropriate. Add explicit mutex lock around rcu_assign instead of xchg loop. Fixes: d576acf0a22 ("net/mlx4_en: add page recycle to prepare rx ring for tx support") Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Brenden Blanco Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 13 ++++++++++--- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 15 ++++++++------- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 2 +- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 4198e9bf89d0..31a41add5b4c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2642,12 +2642,16 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog) if (IS_ERR(prog)) return PTR_ERR(prog); } + mutex_lock(&mdev->state_lock); for (i = 0; i < priv->rx_ring_num; i++) { - /* This xchg is paired with READ_ONCE in the fastpath */ - old_prog = xchg(&priv->rx_ring[i]->xdp_prog, prog); + old_prog = rcu_dereference_protected( + priv->rx_ring[i]->xdp_prog, + lockdep_is_held(&mdev->state_lock)); + rcu_assign_pointer(priv->rx_ring[i]->xdp_prog, prog); if (old_prog) bpf_prog_put(old_prog); } + mutex_unlock(&mdev->state_lock); return 0; } @@ -2680,7 +2684,10 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog) priv->xdp_ring_num); for (i = 0; i < priv->rx_ring_num; i++) { - old_prog = xchg(&priv->rx_ring[i]->xdp_prog, prog); + old_prog = rcu_dereference_protected( + priv->rx_ring[i]->xdp_prog, + lockdep_is_held(&mdev->state_lock)); + rcu_assign_pointer(priv->rx_ring[i]->xdp_prog, prog); if (old_prog) bpf_prog_put(old_prog); } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 2040dad8611d..6758292311f4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -537,7 +537,9 @@ void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring = *pring; struct bpf_prog *old_prog; - old_prog = READ_ONCE(ring->xdp_prog); + old_prog = rcu_dereference_protected( + ring->xdp_prog, + lockdep_is_held(&mdev->state_lock)); if (old_prog) bpf_prog_put(old_prog); mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE); @@ -800,7 +802,9 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud if (budget <= 0) return polled; - xdp_prog = READ_ONCE(ring->xdp_prog); + /* Protect accesses to: ring->xdp_prog, priv->mac_hash list */ + rcu_read_lock(); + xdp_prog = rcu_dereference(ring->xdp_prog); doorbell_pending = 0; tx_index = (priv->tx_ring_num - priv->xdp_ring_num) + cq->ring; @@ -858,15 +862,11 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud /* Drop the packet, since HW loopback-ed it */ mac_hash = ethh->h_source[MLX4_EN_MAC_HASH_IDX]; bucket = &priv->mac_hash[mac_hash]; - rcu_read_lock(); hlist_for_each_entry_rcu(entry, bucket, hlist) { if (ether_addr_equal_64bits(entry->mac, - ethh->h_source)) { - rcu_read_unlock(); + ethh->h_source)) goto next; - } } - rcu_read_unlock(); } } @@ -1077,6 +1077,7 @@ consumed: } out: + rcu_read_unlock(); if (doorbell_pending) mlx4_en_xmit_doorbell(priv->tx_ring[tx_index]); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 2c2913dcae98..47867c49f91c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -340,7 +340,7 @@ struct mlx4_en_rx_ring { u8 fcs_del; void *buf; void *rx_info; - struct bpf_prog *xdp_prog; + struct bpf_prog __rcu *xdp_prog; struct mlx4_en_page_cache page_cache; unsigned long bytes; unsigned long packets; From 5e1e61a33f987eb5d87c5acb199da99b6a9da93d Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Sun, 4 Sep 2016 18:52:51 +0800 Subject: [PATCH 0769/1715] vxlan: Update tx_errors statistics if vxlan_build_skb return err. If vxlan_build_skb return err < 0, tx_errors should be also increased. Signed-off-by: Haishuang Yan Acked-by: Jiri Benc Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 9735059dee99..199dec033cf8 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2103,6 +2103,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, vni, md, flags, udp_sum); if (err < 0) { dst_release(ndst); + dev->stats.tx_errors++; return; } udp_tunnel6_xmit_skb(ndst, sk, skb, dev, From 2591c280c375c547022eb619895c1aebbbc38219 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sun, 4 Sep 2016 14:24:03 -0700 Subject: [PATCH 0770/1715] qed: Remove OOM messages These messages are unnecessary as OOM allocation failures already do a dump_stack() giving more or less the same information. $ size drivers/net/ethernet/qlogic/qed/built-in.o* (defconfig x86-64) text data bss dec hex filename 127817 27969 32800 188586 2e0aa drivers/net/ethernet/qlogic/qed/built-in.o.new 132474 27969 32800 193243 2f2db drivers/net/ethernet/qlogic/qed/built-in.o.old Miscellanea: o Change allocs to the generally preferred forms where possible. Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_cxt.c | 20 ++----- drivers/net/ethernet/qlogic/qed/qed_dcbx.c | 13 +--- drivers/net/ethernet/qlogic/qed/qed_dev.c | 60 +++++-------------- drivers/net/ethernet/qlogic/qed/qed_hw.c | 12 +--- .../net/ethernet/qlogic/qed/qed_init_ops.c | 4 +- drivers/net/ethernet/qlogic/qed/qed_int.c | 29 +++------ drivers/net/ethernet/qlogic/qed/qed_main.c | 4 +- drivers/net/ethernet/qlogic/qed/qed_mcp.c | 1 - drivers/net/ethernet/qlogic/qed/qed_spq.c | 28 +++------ drivers/net/ethernet/qlogic/qed/qed_sriov.c | 9 +-- drivers/net/ethernet/qlogic/qed/qed_vf.c | 14 +---- 11 files changed, 50 insertions(+), 144 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c index 547692759d06..dd579b2ef224 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c +++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c @@ -792,10 +792,9 @@ static int qed_cxt_src_t2_alloc(struct qed_hwfn *p_hwfn) p_mngr->t2_num_pages = DIV_ROUND_UP(total_size, psz); /* allocate t2 */ - p_mngr->t2 = kzalloc(p_mngr->t2_num_pages * sizeof(struct qed_dma_mem), + p_mngr->t2 = kcalloc(p_mngr->t2_num_pages, sizeof(struct qed_dma_mem), GFP_KERNEL); if (!p_mngr->t2) { - DP_NOTICE(p_hwfn, "Failed to allocate t2 table\n"); rc = -ENOMEM; goto t2_fail; } @@ -957,7 +956,6 @@ static int qed_ilt_shadow_alloc(struct qed_hwfn *p_hwfn) p_mngr->ilt_shadow = kcalloc(size, sizeof(struct qed_dma_mem), GFP_KERNEL); if (!p_mngr->ilt_shadow) { - DP_NOTICE(p_hwfn, "Failed to allocate ilt shadow table\n"); rc = -ENOMEM; goto ilt_shadow_fail; } @@ -1050,10 +1048,8 @@ int qed_cxt_mngr_alloc(struct qed_hwfn *p_hwfn) u32 i; p_mngr = kzalloc(sizeof(*p_mngr), GFP_KERNEL); - if (!p_mngr) { - DP_NOTICE(p_hwfn, "Failed to allocate `struct qed_cxt_mngr'\n"); + if (!p_mngr) return -ENOMEM; - } /* Initialize ILT client registers */ clients = p_mngr->clients; @@ -1105,24 +1101,18 @@ int qed_cxt_tables_alloc(struct qed_hwfn *p_hwfn) /* Allocate the ILT shadow table */ rc = qed_ilt_shadow_alloc(p_hwfn); - if (rc) { - DP_NOTICE(p_hwfn, "Failed to allocate ilt memory\n"); + if (rc) goto tables_alloc_fail; - } /* Allocate the T2 table */ rc = qed_cxt_src_t2_alloc(p_hwfn); - if (rc) { - DP_NOTICE(p_hwfn, "Failed to allocate T2 memory\n"); + if (rc) goto tables_alloc_fail; - } /* Allocate and initialize the acquired cids bitmaps */ rc = qed_cid_map_alloc(p_hwfn); - if (rc) { - DP_NOTICE(p_hwfn, "Failed to allocate cid maps\n"); + if (rc) goto tables_alloc_fail; - } return 0; diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c index b900dfbb57ff..be7b3dc7c9a7 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c @@ -874,11 +874,8 @@ int qed_dcbx_info_alloc(struct qed_hwfn *p_hwfn) int rc = 0; p_hwfn->p_dcbx_info = kzalloc(sizeof(*p_hwfn->p_dcbx_info), GFP_KERNEL); - if (!p_hwfn->p_dcbx_info) { - DP_NOTICE(p_hwfn, - "Failed to allocate 'struct qed_dcbx_info'\n"); + if (!p_hwfn->p_dcbx_info) rc = -ENOMEM; - } return rc; } @@ -1176,10 +1173,8 @@ int qed_dcbx_get_config_params(struct qed_hwfn *p_hwfn, } dcbx_info = kmalloc(sizeof(*dcbx_info), GFP_KERNEL); - if (!dcbx_info) { - DP_ERR(p_hwfn, "Failed to allocate struct qed_dcbx_info\n"); + if (!dcbx_info) return -ENOMEM; - } rc = qed_dcbx_query_params(p_hwfn, dcbx_info, QED_DCBX_OPERATIONAL_MIB); if (rc) { @@ -1213,10 +1208,8 @@ static struct qed_dcbx_get *qed_dcbnl_get_dcbx(struct qed_hwfn *hwfn, struct qed_dcbx_get *dcbx_info; dcbx_info = kmalloc(sizeof(*dcbx_info), GFP_KERNEL); - if (!dcbx_info) { - DP_ERR(hwfn->cdev, "Failed to allocate memory for dcbx_info\n"); + if (!dcbx_info) return NULL; - } if (qed_dcbx_query_params(hwfn, dcbx_info, type)) { kfree(dcbx_info); diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index 5ae27f2d2fa5..13d8b4075b01 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -340,7 +340,6 @@ static int qed_init_qm_info(struct qed_hwfn *p_hwfn, bool b_sleepable) return 0; alloc_err: - DP_NOTICE(p_hwfn, "Failed to allocate memory for QM params\n"); qed_qm_info_free(p_hwfn); return -ENOMEM; } @@ -424,18 +423,12 @@ int qed_resc_alloc(struct qed_dev *cdev) RESC_NUM(p_hwfn, QED_L2_QUEUE); p_hwfn->p_tx_cids = kzalloc(tx_size, GFP_KERNEL); - if (!p_hwfn->p_tx_cids) { - DP_NOTICE(p_hwfn, - "Failed to allocate memory for Tx Cids\n"); + if (!p_hwfn->p_tx_cids) goto alloc_no_mem; - } p_hwfn->p_rx_cids = kzalloc(rx_size, GFP_KERNEL); - if (!p_hwfn->p_rx_cids) { - DP_NOTICE(p_hwfn, - "Failed to allocate memory for Rx Cids\n"); + if (!p_hwfn->p_rx_cids) goto alloc_no_mem; - } } for_each_hwfn(cdev, i) { @@ -522,26 +515,18 @@ int qed_resc_alloc(struct qed_dev *cdev) /* DMA info initialization */ rc = qed_dmae_info_alloc(p_hwfn); - if (rc) { - DP_NOTICE(p_hwfn, - "Failed to allocate memory for dmae_info structure\n"); + if (rc) goto alloc_err; - } /* DCBX initialization */ rc = qed_dcbx_info_alloc(p_hwfn); - if (rc) { - DP_NOTICE(p_hwfn, - "Failed to allocate memory for dcbx structure\n"); + if (rc) goto alloc_err; - } } cdev->reset_stats = kzalloc(sizeof(*cdev->reset_stats), GFP_KERNEL); - if (!cdev->reset_stats) { - DP_NOTICE(cdev, "Failed to allocate reset statistics\n"); + if (!cdev->reset_stats) goto alloc_no_mem; - } return 0; @@ -1713,10 +1698,8 @@ static int qed_hw_prepare_single(struct qed_hwfn *p_hwfn, /* Allocate PTT pool */ rc = qed_ptt_pool_alloc(p_hwfn); - if (rc) { - DP_NOTICE(p_hwfn, "Failed to prepare hwfn's hw\n"); + if (rc) goto err0; - } /* Allocate the main PTT */ p_hwfn->p_main_ptt = qed_get_reserved_ptt(p_hwfn, RESERVED_PTT_MAIN); @@ -1746,10 +1729,8 @@ static int qed_hw_prepare_single(struct qed_hwfn *p_hwfn, /* Allocate the init RT array and initialize the init-ops engine */ rc = qed_init_alloc(p_hwfn); - if (rc) { - DP_NOTICE(p_hwfn, "Failed to allocate the init array\n"); + if (rc) goto err2; - } return rc; err2: @@ -1957,10 +1938,8 @@ qed_chain_alloc_next_ptr(struct qed_dev *cdev, struct qed_chain *p_chain) p_virt = dma_alloc_coherent(&cdev->pdev->dev, QED_CHAIN_PAGE_SIZE, &p_phys, GFP_KERNEL); - if (!p_virt) { - DP_NOTICE(cdev, "Failed to allocate chain memory\n"); + if (!p_virt) return -ENOMEM; - } if (i == 0) { qed_chain_init_mem(p_chain, p_virt, p_phys); @@ -1990,10 +1969,8 @@ qed_chain_alloc_single(struct qed_dev *cdev, struct qed_chain *p_chain) p_virt = dma_alloc_coherent(&cdev->pdev->dev, QED_CHAIN_PAGE_SIZE, &p_phys, GFP_KERNEL); - if (!p_virt) { - DP_NOTICE(cdev, "Failed to allocate chain memory\n"); + if (!p_virt) return -ENOMEM; - } qed_chain_init_mem(p_chain, p_virt, p_phys); qed_chain_reset(p_chain); @@ -2010,13 +1987,9 @@ static int qed_chain_alloc_pbl(struct qed_dev *cdev, struct qed_chain *p_chain) void *p_virt = NULL; size = page_cnt * sizeof(*pp_virt_addr_tbl); - pp_virt_addr_tbl = vmalloc(size); - if (!pp_virt_addr_tbl) { - DP_NOTICE(cdev, - "Failed to allocate memory for the chain virtual addresses table\n"); + pp_virt_addr_tbl = vzalloc(size); + if (!pp_virt_addr_tbl) return -ENOMEM; - } - memset(pp_virt_addr_tbl, 0, size); /* The allocation of the PBL table is done with its full size, since it * is expected to be successive. @@ -2029,19 +2002,15 @@ static int qed_chain_alloc_pbl(struct qed_dev *cdev, struct qed_chain *p_chain) size, &p_pbl_phys, GFP_KERNEL); qed_chain_init_pbl_mem(p_chain, p_pbl_virt, p_pbl_phys, pp_virt_addr_tbl); - if (!p_pbl_virt) { - DP_NOTICE(cdev, "Failed to allocate chain pbl memory\n"); + if (!p_pbl_virt) return -ENOMEM; - } for (i = 0; i < page_cnt; i++) { p_virt = dma_alloc_coherent(&cdev->pdev->dev, QED_CHAIN_PAGE_SIZE, &p_phys, GFP_KERNEL); - if (!p_virt) { - DP_NOTICE(cdev, "Failed to allocate chain memory\n"); + if (!p_virt) return -ENOMEM; - } if (i == 0) { qed_chain_init_mem(p_chain, p_virt, p_phys); @@ -2076,7 +2045,8 @@ int qed_chain_alloc(struct qed_dev *cdev, rc = qed_chain_alloc_sanity_check(cdev, cnt_type, elem_size, page_cnt); if (rc) { DP_NOTICE(cdev, - "Cannot allocate a chain with the given arguments:\n" + "Cannot allocate a chain with the given arguments:\n"); + DP_NOTICE(cdev, "[use_mode %d, mode %d, cnt_type %d, num_elems %d, elem_size %zu]\n", intended_use, mode, cnt_type, num_elems, elem_size); return rc; diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.c b/drivers/net/ethernet/qlogic/qed/qed_hw.c index 8ebdc79b3850..6e4fae9b1430 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hw.c +++ b/drivers/net/ethernet/qlogic/qed/qed_hw.c @@ -482,28 +482,22 @@ int qed_dmae_info_alloc(struct qed_hwfn *p_hwfn) *p_comp = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, sizeof(u32), p_addr, GFP_KERNEL); - if (!*p_comp) { - DP_NOTICE(p_hwfn, "Failed to allocate `p_completion_word'\n"); + if (!*p_comp) goto err; - } p_addr = &p_hwfn->dmae_info.dmae_cmd_phys_addr; *p_cmd = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, sizeof(struct dmae_cmd), p_addr, GFP_KERNEL); - if (!*p_cmd) { - DP_NOTICE(p_hwfn, "Failed to allocate `struct dmae_cmd'\n"); + if (!*p_cmd) goto err; - } p_addr = &p_hwfn->dmae_info.intermediate_buffer_phys_addr; *p_buff = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, sizeof(u32) * DMAE_MAX_RW_SIZE, p_addr, GFP_KERNEL); - if (!*p_buff) { - DP_NOTICE(p_hwfn, "Failed to allocate `intermediate_buffer'\n"); + if (!*p_buff) goto err; - } p_hwfn->dmae_info.channel = p_hwfn->rel_pf_id; diff --git a/drivers/net/ethernet/qlogic/qed/qed_init_ops.c b/drivers/net/ethernet/qlogic/qed/qed_init_ops.c index 8ce8564061d5..d567ba94c8d1 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_init_ops.c +++ b/drivers/net/ethernet/qlogic/qed/qed_init_ops.c @@ -460,10 +460,8 @@ int qed_init_run(struct qed_hwfn *p_hwfn, init_ops = cdev->fw_data->init_ops; p_hwfn->unzip_buf = kzalloc(MAX_ZIPPED_SIZE * 4, GFP_ATOMIC); - if (!p_hwfn->unzip_buf) { - DP_NOTICE(p_hwfn, "Failed to allocate unzip buffer\n"); + if (!p_hwfn->unzip_buf) return -ENOMEM; - } for (cmd_num = 0; cmd_num < num_init_ops; cmd_num++) { union init_op *cmd = &init_ops[cmd_num]; diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c index 61ec973a06c7..2adedc6fb6cf 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_int.c +++ b/drivers/net/ethernet/qlogic/qed/qed_int.c @@ -2370,10 +2370,8 @@ static int qed_int_sb_attn_alloc(struct qed_hwfn *p_hwfn, /* SB struct */ p_sb = kmalloc(sizeof(*p_sb), GFP_KERNEL); - if (!p_sb) { - DP_NOTICE(cdev, "Failed to allocate `struct qed_sb_attn_info'\n"); + if (!p_sb) return -ENOMEM; - } /* SB ring */ p_virt = dma_alloc_coherent(&cdev->pdev->dev, @@ -2381,7 +2379,6 @@ static int qed_int_sb_attn_alloc(struct qed_hwfn *p_hwfn, &p_phys, GFP_KERNEL); if (!p_virt) { - DP_NOTICE(cdev, "Failed to allocate status block (attentions)\n"); kfree(p_sb); return -ENOMEM; } @@ -2667,17 +2664,14 @@ static int qed_int_sp_sb_alloc(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) /* SB struct */ p_sb = kmalloc(sizeof(*p_sb), GFP_KERNEL); - if (!p_sb) { - DP_NOTICE(p_hwfn, "Failed to allocate `struct qed_sb_info'\n"); + if (!p_sb) return -ENOMEM; - } /* SB ring */ p_virt = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, SB_ALIGNED_SIZE(p_hwfn), &p_phys, GFP_KERNEL); if (!p_virt) { - DP_NOTICE(p_hwfn, "Failed to allocate status block\n"); kfree(p_sb); return -ENOMEM; } @@ -2959,7 +2953,6 @@ int qed_int_igu_read_cam(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) u16 prev_sb_id = 0xFF; p_hwfn->hw_info.p_igu_info = kzalloc(sizeof(*p_igu_info), GFP_KERNEL); - if (!p_hwfn->hw_info.p_igu_info) return -ENOMEM; @@ -3121,18 +3114,14 @@ int qed_int_alloc(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) int rc = 0; rc = qed_int_sp_dpc_alloc(p_hwfn); - if (rc) { - DP_ERR(p_hwfn->cdev, "Failed to allocate sp dpc mem\n"); - return rc; - } - rc = qed_int_sp_sb_alloc(p_hwfn, p_ptt); - if (rc) { - DP_ERR(p_hwfn->cdev, "Failed to allocate sp sb mem\n"); - return rc; - } - rc = qed_int_sb_attn_alloc(p_hwfn, p_ptt); if (rc) - DP_ERR(p_hwfn->cdev, "Failed to allocate sb attn mem\n"); + return rc; + + rc = qed_int_sp_sb_alloc(p_hwfn, p_ptt); + if (rc) + return rc; + + rc = qed_int_sb_attn_alloc(p_hwfn, p_ptt); return rc; } diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 32f71ee57191..d22e3f8816ae 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -841,10 +841,8 @@ static int qed_slowpath_start(struct qed_dev *cdev, if (IS_PF(cdev)) { /* Allocate stream for unzipping */ rc = qed_alloc_stream_mem(cdev); - if (rc) { - DP_NOTICE(cdev, "Failed to allocate stream memory\n"); + if (rc) goto err2; - } /* First Dword used to diffrentiate between various sources */ data = cdev->firmware->data + sizeof(u32); diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index 4c212667b482..472268c021db 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -171,7 +171,6 @@ int qed_mcp_cmd_init(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) return 0; err: - DP_NOTICE(p_hwfn, "Failed to allocate mcp memory\n"); qed_mcp_free(p_hwfn); return -ENOMEM; } diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c index 0265a32c8681..349af182d085 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_spq.c +++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c @@ -323,10 +323,8 @@ struct qed_eq *qed_eq_alloc(struct qed_hwfn *p_hwfn, u16 num_elem) /* Allocate EQ struct */ p_eq = kzalloc(sizeof(*p_eq), GFP_KERNEL); - if (!p_eq) { - DP_NOTICE(p_hwfn, "Failed to allocate `struct qed_eq'\n"); + if (!p_eq) return NULL; - } /* Allocate and initialize EQ chain*/ if (qed_chain_alloc(p_hwfn->cdev, @@ -335,10 +333,8 @@ struct qed_eq *qed_eq_alloc(struct qed_hwfn *p_hwfn, u16 num_elem) QED_CHAIN_CNT_TYPE_U16, num_elem, sizeof(union event_ring_element), - &p_eq->chain)) { - DP_NOTICE(p_hwfn, "Failed to allocate eq chain\n"); + &p_eq->chain)) goto eq_allocate_fail; - } /* register EQ completion on the SP SB */ qed_int_register_cb(p_hwfn, qed_eq_completion, @@ -451,10 +447,8 @@ int qed_spq_alloc(struct qed_hwfn *p_hwfn) /* SPQ struct */ p_spq = kzalloc(sizeof(struct qed_spq), GFP_KERNEL); - if (!p_spq) { - DP_NOTICE(p_hwfn, "Failed to allocate `struct qed_spq'\n"); + if (!p_spq) return -ENOMEM; - } /* SPQ ring */ if (qed_chain_alloc(p_hwfn->cdev, @@ -463,18 +457,14 @@ int qed_spq_alloc(struct qed_hwfn *p_hwfn) QED_CHAIN_CNT_TYPE_U16, 0, /* N/A when the mode is SINGLE */ sizeof(struct slow_path_element), - &p_spq->chain)) { - DP_NOTICE(p_hwfn, "Failed to allocate spq chain\n"); + &p_spq->chain)) goto spq_allocate_fail; - } /* allocate and fill the SPQ elements (incl. ramrod data list) */ capacity = qed_chain_get_capacity(&p_spq->chain); p_virt = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, - capacity * - sizeof(struct qed_spq_entry), + capacity * sizeof(struct qed_spq_entry), &p_phys, GFP_KERNEL); - if (!p_virt) goto spq_allocate_fail; @@ -863,10 +853,8 @@ struct qed_consq *qed_consq_alloc(struct qed_hwfn *p_hwfn) /* Allocate ConsQ struct */ p_consq = kzalloc(sizeof(*p_consq), GFP_KERNEL); - if (!p_consq) { - DP_NOTICE(p_hwfn, "Failed to allocate `struct qed_consq'\n"); + if (!p_consq) return NULL; - } /* Allocate and initialize EQ chain*/ if (qed_chain_alloc(p_hwfn->cdev, @@ -874,10 +862,8 @@ struct qed_consq *qed_consq_alloc(struct qed_hwfn *p_hwfn) QED_CHAIN_MODE_PBL, QED_CHAIN_CNT_TYPE_U16, QED_CHAIN_PAGE_SIZE / 0x80, - 0x80, &p_consq->chain)) { - DP_NOTICE(p_hwfn, "Failed to allocate consq chain"); + 0x80, &p_consq->chain)) goto consq_allocate_fail; - } return p_consq; diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c index cb68674640f9..d7c21cdc608c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c @@ -455,10 +455,8 @@ int qed_iov_alloc(struct qed_hwfn *p_hwfn) } p_sriov = kzalloc(sizeof(*p_sriov), GFP_KERNEL); - if (!p_sriov) { - DP_NOTICE(p_hwfn, "Failed to allocate `struct qed_sriov'\n"); + if (!p_sriov) return -ENOMEM; - } p_hwfn->pf_iov_info = p_sriov; @@ -507,10 +505,9 @@ int qed_iov_hw_info(struct qed_hwfn *p_hwfn) /* Allocate a new struct for IOV information */ cdev->p_iov_info = kzalloc(sizeof(*cdev->p_iov_info), GFP_KERNEL); - if (!cdev->p_iov_info) { - DP_NOTICE(p_hwfn, "Can't support IOV due to lack of memory\n"); + if (!cdev->p_iov_info) return -ENOMEM; - } + cdev->p_iov_info->pos = pos; rc = qed_iov_pci_cfg_info(cdev); diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c index 3c9071de5472..de0acbcaf85c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_vf.c +++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c @@ -331,31 +331,23 @@ int qed_vf_hw_prepare(struct qed_hwfn *p_hwfn) /* Allocate vf sriov info */ p_iov = kzalloc(sizeof(*p_iov), GFP_KERNEL); - if (!p_iov) { - DP_NOTICE(p_hwfn, "Failed to allocate `struct qed_sriov'\n"); + if (!p_iov) return -ENOMEM; - } /* Allocate vf2pf msg */ p_iov->vf2pf_request = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, sizeof(union vfpf_tlvs), &p_iov->vf2pf_request_phys, GFP_KERNEL); - if (!p_iov->vf2pf_request) { - DP_NOTICE(p_hwfn, - "Failed to allocate `vf2pf_request' DMA memory\n"); + if (!p_iov->vf2pf_request) goto free_p_iov; - } p_iov->pf2vf_reply = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, sizeof(union pfvf_tlvs), &p_iov->pf2vf_reply_phys, GFP_KERNEL); - if (!p_iov->pf2vf_reply) { - DP_NOTICE(p_hwfn, - "Failed to allocate `pf2vf_reply' DMA memory\n"); + if (!p_iov->pf2vf_reply) goto free_vf2pf_request; - } DP_VERBOSE(p_hwfn, QED_MSG_IOV, From 736c4c1da76bc78d3013e791581725c11cd20ead Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 5 Sep 2016 13:26:33 +0300 Subject: [PATCH 0771/1715] bnx2x: Add support for segmentation of tunnels with outer checksums Signed-off-by: Alexander Duyck Tested-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index de2d32690394..6f9104b5d599 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -13238,13 +13238,22 @@ static int bnx2x_init_dev(struct bnx2x *bp, struct pci_dev *pdev, NETIF_F_RXCSUM | NETIF_F_LRO | NETIF_F_GRO | NETIF_F_RXHASH | NETIF_F_HW_VLAN_CTAG_TX; if (!chip_is_e1x) { - dev->hw_features |= NETIF_F_GSO_GRE | NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_GSO_IPXIP4; + dev->hw_features |= NETIF_F_GSO_GRE | NETIF_F_GSO_GRE_CSUM | + NETIF_F_GSO_IPXIP4 | + NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM | + NETIF_F_GSO_PARTIAL; + dev->hw_enc_features = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GSO_IPXIP4 | - NETIF_F_GSO_GRE | NETIF_F_GSO_UDP_TUNNEL; + NETIF_F_GSO_GRE | NETIF_F_GSO_GRE_CSUM | + NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_UDP_TUNNEL_CSUM | + NETIF_F_GSO_PARTIAL; + + dev->gso_partial_features = NETIF_F_GSO_GRE_CSUM | + NETIF_F_GSO_UDP_TUNNEL_CSUM; } dev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | From 4102426f9b7b3627c8c23a54d70363e81c93f9b7 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Mon, 5 Sep 2016 14:35:10 +0300 Subject: [PATCH 0772/1715] qed: Add infrastructure for debug data collection Adds support for several infrastructure operations that are done as part of debug data collection. Signed-off-by: Tomer Tayar Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_hsi.h | 3 + drivers/net/ethernet/qlogic/qed/qed_mcp.c | 76 +++++++++++++++++++ drivers/net/ethernet/qlogic/qed/qed_mcp.h | 46 +++++++++++ .../net/ethernet/qlogic/qed/qed_reg_addr.h | 6 ++ 4 files changed, 131 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h index a67b3554aabd..855691f18412 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h +++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h @@ -7638,6 +7638,7 @@ struct public_drv_mb { #define DRV_MSG_CODE_CFG_VF_MSIX 0xc0010000 #define DRV_MSG_CODE_MCP_RESET 0x00090000 #define DRV_MSG_CODE_SET_VERSION 0x000f0000 +#define DRV_MSG_CODE_MCP_HALT 0x00100000 #define DRV_MSG_CODE_GET_STATS 0x00130000 #define DRV_MSG_CODE_STATS_TYPE_LAN 1 @@ -7645,6 +7646,8 @@ struct public_drv_mb { #define DRV_MSG_CODE_STATS_TYPE_ISCSI 3 #define DRV_MSG_CODE_STATS_TYPE_RDMA 4 +#define DRV_MSG_CODE_MASK_PARITIES 0x001a0000 + #define DRV_MSG_CODE_BIST_TEST 0x001e0000 #define DRV_MSG_CODE_SET_LED_MODE 0x00200000 diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index 472268c021db..7d39cb9b775b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -389,6 +389,34 @@ int qed_mcp_cmd(struct qed_hwfn *p_hwfn, return 0; } +int qed_mcp_nvm_rd_cmd(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 cmd, + u32 param, + u32 *o_mcp_resp, + u32 *o_mcp_param, u32 *o_txn_size, u32 *o_buf) +{ + struct qed_mcp_mb_params mb_params; + union drv_union_data union_data; + int rc; + + memset(&mb_params, 0, sizeof(mb_params)); + mb_params.cmd = cmd; + mb_params.param = param; + mb_params.p_data_dst = &union_data; + rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params); + if (rc) + return rc; + + *o_mcp_resp = mb_params.mcp_resp; + *o_mcp_param = mb_params.mcp_param; + + *o_txn_size = *o_mcp_param; + memcpy(o_buf, &union_data.raw_data, *o_txn_size); + + return 0; +} + int qed_mcp_load_req(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, u32 *p_load_code) { @@ -1168,6 +1196,33 @@ qed_mcp_send_drv_version(struct qed_hwfn *p_hwfn, return rc; } +int qed_mcp_halt(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) +{ + u32 resp = 0, param = 0; + int rc; + + rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_MCP_HALT, 0, &resp, + ¶m); + if (rc) + DP_ERR(p_hwfn, "MCP response failure, aborting\n"); + + return rc; +} + +int qed_mcp_resume(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) +{ + u32 value, cpu_mode; + + qed_wr(p_hwfn, p_ptt, MCP_REG_CPU_STATE, 0xffffffff); + + value = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_MODE); + value &= ~MCP_REG_CPU_MODE_SOFT_HALT; + qed_wr(p_hwfn, p_ptt, MCP_REG_CPU_MODE, value); + cpu_mode = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_MODE); + + return (cpu_mode & MCP_REG_CPU_MODE_SOFT_HALT) ? -EAGAIN : 0; +} + int qed_mcp_set_led(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, enum qed_led_mode mode) { @@ -1195,6 +1250,27 @@ int qed_mcp_set_led(struct qed_hwfn *p_hwfn, return rc; } +int qed_mcp_mask_parities(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 mask_parities) +{ + u32 resp = 0, param = 0; + int rc; + + rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_MASK_PARITIES, + mask_parities, &resp, ¶m); + + if (rc) { + DP_ERR(p_hwfn, + "MCP response failure for mask parities, aborting\n"); + } else if (resp != FW_MSG_CODE_OK) { + DP_ERR(p_hwfn, + "MCP did not acknowledge mask parity request. Old MFW?\n"); + rc = -EINVAL; + } + + return rc; +} + int qed_mcp_bist_register_test(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { u32 drv_mb_param = 0, rsp, param; diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h index c6372fa574b7..dff520ed069b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h @@ -467,6 +467,29 @@ int qed_mcp_fill_shmem_func_info(struct qed_hwfn *p_hwfn, int qed_mcp_reset(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); +/** + * @brief - Sends an NVM read command request to the MFW to get + * a buffer. + * + * @param p_hwfn + * @param p_ptt + * @param cmd - Command: DRV_MSG_CODE_NVM_GET_FILE_DATA or + * DRV_MSG_CODE_NVM_READ_NVRAM commands + * @param param - [0:23] - Offset [24:31] - Size + * @param o_mcp_resp - MCP response + * @param o_mcp_param - MCP response param + * @param o_txn_size - Buffer size output + * @param o_buf - Pointer to the buffer returned by the MFW. + * + * @param return 0 upon success. + */ +int qed_mcp_nvm_rd_cmd(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 cmd, + u32 param, + u32 *o_mcp_resp, + u32 *o_mcp_param, u32 *o_txn_size, u32 *o_buf); + /** * @brief indicates whether the MFW objects [under mcp_info] are accessible * @@ -489,6 +512,26 @@ bool qed_mcp_is_init(struct qed_hwfn *p_hwfn); int qed_mcp_config_vf_msix(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, u8 vf_id, u8 num); +/** + * @brief - Halt the MCP. + * + * @param p_hwfn + * @param p_ptt + * + * @param return 0 upon success. + */ +int qed_mcp_halt(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); + +/** + * @brief - Wake up the MCP. + * + * @param p_hwfn + * @param p_ptt + * + * @param return 0 upon success. + */ +int qed_mcp_resume(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); + int qed_configure_pf_min_bandwidth(struct qed_dev *cdev, u8 min_bw); int qed_configure_pf_max_bandwidth(struct qed_dev *cdev, u8 max_bw); int __qed_configure_pf_max_bandwidth(struct qed_hwfn *p_hwfn, @@ -500,4 +543,7 @@ int __qed_configure_pf_min_bandwidth(struct qed_hwfn *p_hwfn, struct qed_mcp_link_state *p_link, u8 min_bw); +int qed_mcp_mask_parities(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 mask_parities); + #endif diff --git a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h index b49d47f3de71..b44f09b18eb1 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h +++ b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h @@ -527,4 +527,10 @@ #define QM_REG_WFQPFWEIGHT 0x2f4e80UL #define QM_REG_WFQVPWEIGHT 0x2fa000UL + +#define MCP_REG_CPU_MODE \ + 0xe05000UL +#define MCP_REG_CPU_MODE_SOFT_HALT \ + (0x1 << 10) + #endif From e0c79ff6286f534f3e9fadab529b0ce1e2c6d917 Mon Sep 17 00:00:00 2001 From: Baoyou Xie Date: Tue, 6 Sep 2016 16:19:02 +0800 Subject: [PATCH 0773/1715] lan78xx: mark symbols static where possible We get a few warnings when building kernel with W=1: drivers/net/usb/lan78xx.c:1182:6: warning: no previous prototype for 'lan78xx_defer_kevent' [-Wmissing-prototypes] drivers/net/usb/lan78xx.c:1409:5: warning: no previous prototype for 'lan78xx_nway_reset' [-Wmissing-prototypes] drivers/net/usb/lan78xx.c:2000:5: warning: no previous prototype for 'lan78xx_set_mac_addr' [-Wmissing-prototypes] .... In fact, these functions are only used in the file in which they are declared and don't need a declaration, but can be made static. so this patch marks these functions with 'static'. Signed-off-by: Baoyou Xie Signed-off-by: David S. Miller --- drivers/net/usb/lan78xx.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 432b8a3ae354..db558b8b32fe 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -1179,7 +1179,7 @@ static int lan78xx_link_reset(struct lan78xx_net *dev) * NOTE: annoying asymmetry: if it's active, schedule_work() fails, * but tasklet_schedule() doesn't. hope the failure is rare. */ -void lan78xx_defer_kevent(struct lan78xx_net *dev, int work) +static void lan78xx_defer_kevent(struct lan78xx_net *dev, int work) { set_bit(work, &dev->flags); if (!schedule_delayed_work(&dev->wq, 0)) @@ -1406,7 +1406,7 @@ static u32 lan78xx_get_link(struct net_device *net) return net->phydev->link; } -int lan78xx_nway_reset(struct net_device *net) +static int lan78xx_nway_reset(struct net_device *net) { return phy_start_aneg(net->phydev); } @@ -1997,7 +1997,7 @@ static int lan78xx_change_mtu(struct net_device *netdev, int new_mtu) return 0; } -int lan78xx_set_mac_addr(struct net_device *netdev, void *p) +static int lan78xx_set_mac_addr(struct net_device *netdev, void *p) { struct lan78xx_net *dev = netdev_priv(netdev); struct sockaddr *addr = p; @@ -2371,7 +2371,7 @@ static void lan78xx_terminate_urbs(struct lan78xx_net *dev) remove_wait_queue(&unlink_wakeup, &wait); } -int lan78xx_stop(struct net_device *net) +static int lan78xx_stop(struct net_device *net) { struct lan78xx_net *dev = netdev_priv(net); @@ -2533,7 +2533,8 @@ static void lan78xx_queue_skb(struct sk_buff_head *list, entry->state = state; } -netdev_tx_t lan78xx_start_xmit(struct sk_buff *skb, struct net_device *net) +static netdev_tx_t +lan78xx_start_xmit(struct sk_buff *skb, struct net_device *net) { struct lan78xx_net *dev = netdev_priv(net); struct sk_buff *skb2 = NULL; @@ -2562,7 +2563,8 @@ netdev_tx_t lan78xx_start_xmit(struct sk_buff *skb, struct net_device *net) return NETDEV_TX_OK; } -int lan78xx_get_endpoints(struct lan78xx_net *dev, struct usb_interface *intf) +static int +lan78xx_get_endpoints(struct lan78xx_net *dev, struct usb_interface *intf) { int tmp; struct usb_host_interface *alt = NULL; @@ -2700,7 +2702,7 @@ static void lan78xx_rx_csum_offload(struct lan78xx_net *dev, } } -void lan78xx_skb_return(struct lan78xx_net *dev, struct sk_buff *skb) +static void lan78xx_skb_return(struct lan78xx_net *dev, struct sk_buff *skb) { int status; @@ -3283,7 +3285,7 @@ static void lan78xx_disconnect(struct usb_interface *intf) usb_put_dev(udev); } -void lan78xx_tx_timeout(struct net_device *net) +static void lan78xx_tx_timeout(struct net_device *net) { struct lan78xx_net *dev = netdev_priv(net); @@ -3603,7 +3605,7 @@ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol) return 0; } -int lan78xx_suspend(struct usb_interface *intf, pm_message_t message) +static int lan78xx_suspend(struct usb_interface *intf, pm_message_t message) { struct lan78xx_net *dev = usb_get_intfdata(intf); struct lan78xx_priv *pdata = (struct lan78xx_priv *)(dev->data[0]); @@ -3699,7 +3701,7 @@ out: return ret; } -int lan78xx_resume(struct usb_interface *intf) +static int lan78xx_resume(struct usb_interface *intf) { struct lan78xx_net *dev = usb_get_intfdata(intf); struct sk_buff *skb; @@ -3766,7 +3768,7 @@ int lan78xx_resume(struct usb_interface *intf) return 0; } -int lan78xx_reset_resume(struct usb_interface *intf) +static int lan78xx_reset_resume(struct usb_interface *intf) { struct lan78xx_net *dev = usb_get_intfdata(intf); From 3f591997b1484cbf869d21b511c8d84902444cfb Mon Sep 17 00:00:00 2001 From: Baoyou Xie Date: Tue, 6 Sep 2016 16:21:56 +0800 Subject: [PATCH 0774/1715] net: arc_emac: mark arc_mdio_reset() static We get 1 warning when building kernel with W=1: drivers/net/ethernet/arc/emac_mdio.c:107:5: warning: no previous prototype for 'arc_mdio_reset' [-Wmissing-prototypes] In fact, this function is only used in the file in which it is declared and don't need a declaration, but can be made static. so this patch marks this function with 'static'. Signed-off-by: Baoyou Xie Signed-off-by: David S. Miller --- drivers/net/ethernet/arc/emac_mdio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/arc/emac_mdio.c b/drivers/net/ethernet/arc/emac_mdio.c index 058460bdd5a6..a22403c688c9 100644 --- a/drivers/net/ethernet/arc/emac_mdio.c +++ b/drivers/net/ethernet/arc/emac_mdio.c @@ -104,7 +104,7 @@ static int arc_mdio_write(struct mii_bus *bus, int phy_addr, * @bus: points to the mii_bus structure * Description: reset the MII bus */ -int arc_mdio_reset(struct mii_bus *bus) +static int arc_mdio_reset(struct mii_bus *bus) { struct arc_emac_priv *priv = bus->priv; struct arc_emac_mdio_bus_data *data = &priv->bus_data; From f1e4ba5b6a6555d1f6b174d4fd0a96a9363bb57f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 6 Sep 2016 15:10:22 +0200 Subject: [PATCH 0775/1715] perf, bpf: fix conditional call to bpf_overflow_handler The newly added bpf_overflow_handler function is only built of both CONFIG_EVENT_TRACING and CONFIG_BPF_SYSCALL are enabled, but the caller only checks the latter: kernel/events/core.c: In function 'perf_event_alloc': kernel/events/core.c:9106:27: error: 'bpf_overflow_handler' undeclared (first use in this function) This changes the caller so we also skip this call if CONFIG_EVENT_TRACING is disabled entirely. Signed-off-by: Arnd Bergmann Fixes: aa6a5f3cb2b2 ("perf, bpf: add perf events core support for BPF_PROG_TYPE_PERF_EVENT programs") Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 85bf4c37911f..a7b8c1c75fa7 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9072,7 +9072,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, if (!overflow_handler && parent_event) { overflow_handler = parent_event->overflow_handler; context = parent_event->overflow_handler_context; -#ifdef CONFIG_BPF_SYSCALL +#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_EVENT_TRACING) if (overflow_handler == bpf_overflow_handler) { struct bpf_prog *prog = bpf_prog_inc(parent_event->prog); From 72a31d85a56581f0369f881c453d9c212a2bad38 Mon Sep 17 00:00:00 2001 From: Bert Kenward Date: Tue, 6 Sep 2016 17:50:00 +0100 Subject: [PATCH 0776/1715] sfc: check MTU against minimum threshold Reported-by: Ma Yuying Suggested-by: Jarod Wilson Signed-off-by: Bert Kenward Reviewed-by: Jarod Wilson Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/efx.c | 12 +++++++++++- drivers/net/ethernet/sfc/net_driver.h | 3 +++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index f3826ae28bac..3cf3557106c2 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -2263,8 +2263,18 @@ static int efx_change_mtu(struct net_device *net_dev, int new_mtu) rc = efx_check_disabled(efx); if (rc) return rc; - if (new_mtu > EFX_MAX_MTU) + if (new_mtu > EFX_MAX_MTU) { + netif_err(efx, drv, efx->net_dev, + "Requested MTU of %d too big (max: %d)\n", + new_mtu, EFX_MAX_MTU); return -EINVAL; + } + if (new_mtu < EFX_MIN_MTU) { + netif_err(efx, drv, efx->net_dev, + "Requested MTU of %d too small (min: %d)\n", + new_mtu, EFX_MIN_MTU); + return -EINVAL; + } netif_dbg(efx, drv, efx->net_dev, "changing MTU to %d\n", new_mtu); diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 0a2504b5dad5..99d8c82124bb 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -76,6 +76,9 @@ /* Maximum possible MTU the driver supports */ #define EFX_MAX_MTU (9 * 1024) +/* Minimum MTU, from RFC791 (IP) */ +#define EFX_MIN_MTU 68 + /* Size of an RX scatter buffer. Small enough to pack 2 into a 4K page, * and should be a multiple of the cache line size. */ From cf86799e816fe3a6e47eab3f0beb735d5944f01a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 6 Sep 2016 21:20:36 +0200 Subject: [PATCH 0777/1715] ptp: ixp46x: remove NO_IRQ handling gpio_to_irq does not return NO_IRQ but instead returns a negative error code on failure. Returning NO_IRQ from the function has no negative effects as we only compare the result to the expected interrupt number, but it's better to return a proper failure code for consistency, and we should remove NO_IRQ from the kernel entirely. Signed-off-by: Arnd Bergmann Acked-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/ptp/ptp_ixp46x.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/ptp/ptp_ixp46x.c b/drivers/ptp/ptp_ixp46x.c index ee4f183ef9ee..344a3bac210b 100644 --- a/drivers/ptp/ptp_ixp46x.c +++ b/drivers/ptp/ptp_ixp46x.c @@ -268,18 +268,19 @@ static int setup_interrupt(int gpio) return err; irq = gpio_to_irq(gpio); + if (irq < 0) + return irq; - if (NO_IRQ == irq) - return NO_IRQ; - - if (irq_set_irq_type(irq, IRQF_TRIGGER_FALLING)) { + err = irq_set_irq_type(irq, IRQF_TRIGGER_FALLING); + if (err) { pr_err("cannot set trigger type for irq %d\n", irq); - return NO_IRQ; + return err; } - if (request_irq(irq, isr, 0, DRIVER, &ixp_clock)) { + err = request_irq(irq, isr, 0, DRIVER, &ixp_clock); + if (err) { pr_err("request_irq failed for irq %d\n", irq); - return NO_IRQ; + return err; } return irq; From ecc6569f3503b39f45bc6b86197b5e0a8533fb72 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Thu, 25 Aug 2016 23:08:11 +0800 Subject: [PATCH 0778/1715] netfilter: gre: Use consistent GRE_* macros instead of ones defined by netfilter. There are already some GRE_* macros in kernel, so it is unnecessary to define these macros. And remove some useless macros Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso --- .../linux/netfilter/nf_conntrack_proto_gre.h | 22 ++----------------- include/uapi/linux/if_tunnel.h | 1 + net/ipv4/netfilter/nf_nat_proto_gre.c | 4 ++-- net/netfilter/nf_conntrack_proto_gre.c | 4 ++-- 4 files changed, 7 insertions(+), 24 deletions(-) diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h b/include/linux/netfilter/nf_conntrack_proto_gre.h index df78dc2b5524..0189747f2691 100644 --- a/include/linux/netfilter/nf_conntrack_proto_gre.h +++ b/include/linux/netfilter/nf_conntrack_proto_gre.h @@ -1,29 +1,11 @@ #ifndef _CONNTRACK_PROTO_GRE_H #define _CONNTRACK_PROTO_GRE_H #include +#include +#include /* GRE PROTOCOL HEADER */ -/* GRE Version field */ -#define GRE_VERSION_1701 0x0 -#define GRE_VERSION_PPTP 0x1 - -/* GRE Protocol field */ -#define GRE_PROTOCOL_PPTP 0x880B - -/* GRE Flags */ -#define GRE_FLAG_C 0x80 -#define GRE_FLAG_R 0x40 -#define GRE_FLAG_K 0x20 -#define GRE_FLAG_S 0x10 -#define GRE_FLAG_A 0x80 - -#define GRE_IS_C(f) ((f)&GRE_FLAG_C) -#define GRE_IS_R(f) ((f)&GRE_FLAG_R) -#define GRE_IS_K(f) ((f)&GRE_FLAG_K) -#define GRE_IS_S(f) ((f)&GRE_FLAG_S) -#define GRE_IS_A(f) ((f)&GRE_FLAG_A) - /* GRE is a mess: Four different standards */ struct gre_hdr { #if defined(__LITTLE_ENDIAN_BITFIELD) diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index 9865c8caedde..fb7337d6b985 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -39,6 +39,7 @@ #define GRE_IS_REC(f) ((f) & GRE_REC) #define GRE_IS_ACK(f) ((f) & GRE_ACK) +#define GRE_VERSION_0 __cpu_to_be16(0x0000) #define GRE_VERSION_1 __cpu_to_be16(0x0001) #define GRE_PROTO_PPP __cpu_to_be16(0x880b) #define GRE_PPTP_KEY_MASK __cpu_to_be32(0xffff) diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c index 9414923f1e15..93198d71dbb6 100644 --- a/net/ipv4/netfilter/nf_nat_proto_gre.c +++ b/net/ipv4/netfilter/nf_nat_proto_gre.c @@ -104,11 +104,11 @@ gre_manip_pkt(struct sk_buff *skb, if (maniptype != NF_NAT_MANIP_DST) return true; switch (greh->version) { - case GRE_VERSION_1701: + case ntohs(GRE_VERSION_0): /* We do not currently NAT any GREv0 packets. * Try to behave like "nf_nat_proto_unknown" */ break; - case GRE_VERSION_PPTP: + case ntohs(GRE_VERSION_1): pr_debug("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key)); pgreh->call_id = tuple->dst.u.gre.key; break; diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index a96451a7af20..deb239a014e4 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -200,7 +200,7 @@ static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, /* first only delinearize old RFC1701 GRE header */ grehdr = skb_header_pointer(skb, dataoff, sizeof(_grehdr), &_grehdr); - if (!grehdr || grehdr->version != GRE_VERSION_PPTP) { + if (!grehdr || grehdr->version != ntohs(GRE_VERSION_1)) { /* try to behave like "nf_conntrack_proto_generic" */ tuple->src.u.all = 0; tuple->dst.u.all = 0; @@ -212,7 +212,7 @@ static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, if (!pgrehdr) return true; - if (ntohs(grehdr->protocol) != GRE_PROTOCOL_PPTP) { + if (grehdr->protocol != GRE_PROTO_PPP) { pr_debug("GRE_VERSION_PPTP but unknown proto\n"); return false; } From c579a9e7d58f66030a144c7a33cc9bdf827a4b6d Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Thu, 25 Aug 2016 23:08:47 +0800 Subject: [PATCH 0779/1715] netfilter: gre: Use consistent GRE and PTTP header structure instead of the ones defined by netfilter There are two existing strutures which defines the GRE and PPTP header. So use these two structures instead of the ones defined by netfilter to keep consitent with other codes. Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso --- .../linux/netfilter/nf_conntrack_proto_gre.h | 42 ------------------- net/ipv4/netfilter/nf_nat_proto_gre.c | 13 +++--- net/netfilter/nf_conntrack_proto_gre.c | 12 +++--- 3 files changed, 13 insertions(+), 54 deletions(-) diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h b/include/linux/netfilter/nf_conntrack_proto_gre.h index 0189747f2691..dee0acd0dd31 100644 --- a/include/linux/netfilter/nf_conntrack_proto_gre.h +++ b/include/linux/netfilter/nf_conntrack_proto_gre.h @@ -4,48 +4,6 @@ #include #include -/* GRE PROTOCOL HEADER */ - -/* GRE is a mess: Four different standards */ -struct gre_hdr { -#if defined(__LITTLE_ENDIAN_BITFIELD) - __u16 rec:3, - srr:1, - seq:1, - key:1, - routing:1, - csum:1, - version:3, - reserved:4, - ack:1; -#elif defined(__BIG_ENDIAN_BITFIELD) - __u16 csum:1, - routing:1, - key:1, - seq:1, - srr:1, - rec:3, - ack:1, - reserved:4, - version:3; -#else -#error "Adjust your defines" -#endif - __be16 protocol; -}; - -/* modified GRE header for PPTP */ -struct gre_hdr_pptp { - __u8 flags; /* bitfield */ - __u8 version; /* should be GRE_VERSION_PPTP */ - __be16 protocol; /* should be GRE_PROTOCOL_PPTP */ - __be16 payload_len; /* size of ppp payload, not inc. gre header */ - __be16 call_id; /* peer's call_id for this session */ - __be32 seq; /* sequence number. Present if S==1 */ - __be32 ack; /* seq number of highest packet received by */ - /* sender in this session */ -}; - struct nf_ct_gre { unsigned int stream_timeout; unsigned int timeout; diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c index 93198d71dbb6..edf05002d674 100644 --- a/net/ipv4/netfilter/nf_nat_proto_gre.c +++ b/net/ipv4/netfilter/nf_nat_proto_gre.c @@ -88,8 +88,8 @@ gre_manip_pkt(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { - const struct gre_hdr *greh; - struct gre_hdr_pptp *pgreh; + const struct gre_base_hdr *greh; + struct pptp_gre_header *pgreh; /* pgreh includes two optional 32bit fields which are not required * to be there. That's where the magic '8' comes from */ @@ -97,18 +97,19 @@ gre_manip_pkt(struct sk_buff *skb, return false; greh = (void *)skb->data + hdroff; - pgreh = (struct gre_hdr_pptp *)greh; + pgreh = (struct pptp_gre_header *)greh; /* we only have destination manip of a packet, since 'source key' * is not present in the packet itself */ if (maniptype != NF_NAT_MANIP_DST) return true; - switch (greh->version) { - case ntohs(GRE_VERSION_0): + + switch (greh->flags & GRE_VERSION) { + case GRE_VERSION_0: /* We do not currently NAT any GREv0 packets. * Try to behave like "nf_nat_proto_unknown" */ break; - case ntohs(GRE_VERSION_1): + case GRE_VERSION_1: pr_debug("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key)); pgreh->call_id = tuple->dst.u.gre.key; break; diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index deb239a014e4..9a715f88b2f1 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -192,15 +192,15 @@ static bool gre_invert_tuple(struct nf_conntrack_tuple *tuple, static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, struct net *net, struct nf_conntrack_tuple *tuple) { - const struct gre_hdr_pptp *pgrehdr; - struct gre_hdr_pptp _pgrehdr; + const struct pptp_gre_header *pgrehdr; + struct pptp_gre_header _pgrehdr; __be16 srckey; - const struct gre_hdr *grehdr; - struct gre_hdr _grehdr; + const struct gre_base_hdr *grehdr; + struct gre_base_hdr _grehdr; /* first only delinearize old RFC1701 GRE header */ grehdr = skb_header_pointer(skb, dataoff, sizeof(_grehdr), &_grehdr); - if (!grehdr || grehdr->version != ntohs(GRE_VERSION_1)) { + if (!grehdr || (grehdr->flags & GRE_VERSION) != GRE_VERSION_1) { /* try to behave like "nf_conntrack_proto_generic" */ tuple->src.u.all = 0; tuple->dst.u.all = 0; @@ -213,7 +213,7 @@ static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, return true; if (grehdr->protocol != GRE_PROTO_PPP) { - pr_debug("GRE_VERSION_PPTP but unknown proto\n"); + pr_debug("Unsupported GRE proto(0x%x)\n", ntohs(grehdr->protocol)); return false; } From 68cb9fe47ea661bffd48c8ca35790be26935e1c5 Mon Sep 17 00:00:00 2001 From: Marco Angaroni Date: Tue, 30 Aug 2016 18:48:19 +0200 Subject: [PATCH 0780/1715] netfilter: nf_ct_sip: correct parsing of continuation lines in SIP headers Current parsing methods for SIP headers do not properly manage continuation lines: in case of Call-ID header the first character of Call-ID header value is truncated. As a result IPVS SIP persistence engine hashes over a call-id that is not exactly the one present in the originale message. Example: "Call-ID: \r\n abcdeABCDE1234" results in extracted call-id equal to "bcdeABCDE1234". In above example Call-ID is represented as a string in C language. Obviously in real message the first bytes after colon (":") are "20 0d 0a 20". Proposed fix is in nf_conntrack_sip module. Since sip_follow_continuation() function walks past the leading spaces or tabs of the continuation line, sip_skip_whitespace() should simply return the ouput of sip_follow_continuation(). Otherwise another iteration of the for loop is done and dptr is incremented by one pointing to the second character of the first word in the header. Below is an extract of relevant SIP ABNF syntax. Call-ID = ( "Call-ID" / "i" ) HCOLON callid callid = word [ "@" word ] HCOLON = *( SP / HTAB ) ":" SWS SWS = [LWS] ; sep whitespace LWS = [*WSP CRLF] 1*WSP ; linear whitespace WSP = SP / HTAB word = 1*(alphanum / "-" / "." / "!" / "%" / "*" / "_" / "+" / "`" / "'" / "~" / "(" / ")" / "<" / ">" / ":" / "\" / DQUOTE / "/" / "[" / "]" / "?" / "{" / "}" ) Signed-off-by: Marco Angaroni Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_sip.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 7d77217de6a3..251a9a44d189 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -334,8 +334,7 @@ static const char *sip_skip_whitespace(const char *dptr, const char *limit) if (*dptr != '\r' && *dptr != '\n') break; dptr = sip_follow_continuation(dptr, limit); - if (dptr == NULL) - return NULL; + break; } return dptr; } From f0608ceaa79d99d24e97517f9a9a0fed2b9698b4 Mon Sep 17 00:00:00 2001 From: Marco Angaroni Date: Tue, 30 Aug 2016 18:48:24 +0200 Subject: [PATCH 0781/1715] netfilter: nf_ct_sip: correct allowed characters in Call-ID SIP header Current parsing methods for SIP header Call-ID do not check correctly all characters allowed by RFC 3261. In particular "," character is allowed instead of "'" character. As a result Call-ID headers like the following are discarded by IPVS SIP persistence engine. Call-ID: -.!%*_+`'~()<>:\"/[]?{} Above example is composed using all non-alphanumeric characters listed in RFC 3261 for Call-ID header syntax. Proposed fix is in nf_conntrack_sip module; function iswordc() checks this range: (c >= '(' && c <= '/') which includes these characters: ()*+,-./ They are all allowed except ",". Instead "'" is not included in the list. Below is an extract of relevant SIP ABNF syntax. Call-ID = ( "Call-ID" / "i" ) HCOLON callid callid = word [ "@" word ] HCOLON = *( SP / HTAB ) ":" SWS SWS = [LWS] ; sep whitespace LWS = [*WSP CRLF] 1*WSP ; linear whitespace WSP = SP / HTAB word = 1*(alphanum / "-" / "." / "!" / "%" / "*" / "_" / "+" / "`" / "'" / "~" / "(" / ")" / "<" / ">" / ":" / "\" / DQUOTE / "/" / "[" / "]" / "?" / "{" / "}" ) Signed-off-by: Marco Angaroni Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_sip.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 251a9a44d189..d8035351aff5 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -83,9 +83,10 @@ static int digits_len(const struct nf_conn *ct, const char *dptr, static int iswordc(const char c) { if (isalnum(c) || c == '!' || c == '"' || c == '%' || - (c >= '(' && c <= '/') || c == ':' || c == '<' || c == '>' || + (c >= '(' && c <= '+') || c == ':' || c == '<' || c == '>' || c == '?' || (c >= '[' && c <= ']') || c == '_' || c == '`' || - c == '{' || c == '}' || c == '~') + c == '{' || c == '}' || c == '~' || (c >= '-' && c <= '/') || + c == '\'') return 1; return 0; } From 723eb299de62ce75dbd31e7ee25d45887c32a602 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Thu, 1 Sep 2016 18:58:29 +0800 Subject: [PATCH 0782/1715] netfilter: ftp: Remove the useless dlen==0 condition check in find_pattern The caller function "help" has already make sure the datalen could not be zero before invoke find_pattern as a parameter by the following codes if (dataoff >= skb->len) { pr_debug("ftp: dataoff(%u) >= skblen(%u)\n", dataoff, skb->len); return NF_ACCEPT; } datalen = skb->len - dataoff; And the latter codes "ends_in_nl = (fb_ptr[datalen - 1] == '\n');" use datalen directly without checking if it is zero. So it is unneccessary to check it in find_pattern too. Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_ftp.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index b6934b5edf7a..d49a2d410813 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -301,8 +301,6 @@ static int find_pattern(const char *data, size_t dlen, size_t i = plen; pr_debug("find_pattern `%s': dlen = %Zu\n", pattern, dlen); - if (dlen == 0) - return 0; if (dlen <= plen) { /* Short packet: try for partial? */ From ddb075b0cdbca140d6c6503db9cc9990d3461308 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Thu, 1 Sep 2016 18:59:02 +0800 Subject: [PATCH 0783/1715] netfilter: ftp: Remove the useless code There are some debug code which are commented out in find_pattern by #if 0. Now remove them. Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_ftp.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index d49a2d410813..e3ed20060878 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -309,19 +309,8 @@ static int find_pattern(const char *data, size_t dlen, else return 0; } - if (strncasecmp(data, pattern, plen) != 0) { -#if 0 - size_t i; - - pr_debug("ftp: string mismatch\n"); - for (i = 0; i < plen; i++) { - pr_debug("ftp:char %u `%c'(%u) vs `%c'(%u)\n", - i, data[i], data[i], - pattern[i], pattern[i]); - } -#endif + if (strncasecmp(data, pattern, plen) != 0) return 0; - } pr_debug("Pattern matches!\n"); /* Now we've found the constant string, try to skip From 0d9932b2875f568d679f2af33ce610da3903ac11 Mon Sep 17 00:00:00 2001 From: Laura Garcia Liebana Date: Fri, 2 Sep 2016 15:05:57 +0200 Subject: [PATCH 0784/1715] netfilter: nft_numgen: rename until attribute by modulus The _until_ attribute is renamed to _modulus_ as the behaviour is similar to other expresions with number limits (ex. nft_hash). Renaming is possible because there isn't a kernel release yet with these changes. Signed-off-by: Laura Garcia Liebana Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 4 ++-- net/netfilter/nft_numgen.c | 30 ++++++++++++------------ 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 28ce01d79707..24161e25576d 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1126,13 +1126,13 @@ enum nft_trace_types { * enum nft_ng_attributes - nf_tables number generator expression netlink attributes * * @NFTA_NG_DREG: destination register (NLA_U32) - * @NFTA_NG_UNTIL: source value to increment the counter until reset (NLA_U32) + * @NFTA_NG_MODULUS: maximum counter value (NLA_U32) * @NFTA_NG_TYPE: operation type (NLA_U32) */ enum nft_ng_attributes { NFTA_NG_UNSPEC, NFTA_NG_DREG, - NFTA_NG_UNTIL, + NFTA_NG_MODULUS, NFTA_NG_TYPE, __NFTA_NG_MAX }; diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c index 294745ecb0fc..f51a3ede3932 100644 --- a/net/netfilter/nft_numgen.c +++ b/net/netfilter/nft_numgen.c @@ -21,7 +21,7 @@ static DEFINE_PER_CPU(struct rnd_state, nft_numgen_prandom_state); struct nft_ng_inc { enum nft_registers dreg:8; - u32 until; + u32 modulus; atomic_t counter; }; @@ -34,7 +34,7 @@ static void nft_ng_inc_eval(const struct nft_expr *expr, do { oval = atomic_read(&priv->counter); - nval = (oval + 1 < priv->until) ? oval + 1 : 0; + nval = (oval + 1 < priv->modulus) ? oval + 1 : 0; } while (atomic_cmpxchg(&priv->counter, oval, nval) != oval); memcpy(®s->data[priv->dreg], &priv->counter, sizeof(u32)); @@ -42,7 +42,7 @@ static void nft_ng_inc_eval(const struct nft_expr *expr, static const struct nla_policy nft_ng_policy[NFTA_NG_MAX + 1] = { [NFTA_NG_DREG] = { .type = NLA_U32 }, - [NFTA_NG_UNTIL] = { .type = NLA_U32 }, + [NFTA_NG_MODULUS] = { .type = NLA_U32 }, [NFTA_NG_TYPE] = { .type = NLA_U32 }, }; @@ -52,8 +52,8 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx, { struct nft_ng_inc *priv = nft_expr_priv(expr); - priv->until = ntohl(nla_get_be32(tb[NFTA_NG_UNTIL])); - if (priv->until == 0) + priv->modulus = ntohl(nla_get_be32(tb[NFTA_NG_MODULUS])); + if (priv->modulus == 0) return -ERANGE; priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]); @@ -64,11 +64,11 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx, } static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg, - u32 until, enum nft_ng_types type) + u32 modulus, enum nft_ng_types type) { if (nft_dump_register(skb, NFTA_NG_DREG, dreg)) goto nla_put_failure; - if (nla_put_be32(skb, NFTA_NG_UNTIL, htonl(until))) + if (nla_put_be32(skb, NFTA_NG_MODULUS, htonl(modulus))) goto nla_put_failure; if (nla_put_be32(skb, NFTA_NG_TYPE, htonl(type))) goto nla_put_failure; @@ -83,12 +83,12 @@ static int nft_ng_inc_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_ng_inc *priv = nft_expr_priv(expr); - return nft_ng_dump(skb, priv->dreg, priv->until, NFT_NG_INCREMENTAL); + return nft_ng_dump(skb, priv->dreg, priv->modulus, NFT_NG_INCREMENTAL); } struct nft_ng_random { enum nft_registers dreg:8; - u32 until; + u32 modulus; }; static void nft_ng_random_eval(const struct nft_expr *expr, @@ -99,7 +99,7 @@ static void nft_ng_random_eval(const struct nft_expr *expr, struct rnd_state *state = this_cpu_ptr(&nft_numgen_prandom_state); regs->data[priv->dreg] = reciprocal_scale(prandom_u32_state(state), - priv->until); + priv->modulus); } static int nft_ng_random_init(const struct nft_ctx *ctx, @@ -108,8 +108,8 @@ static int nft_ng_random_init(const struct nft_ctx *ctx, { struct nft_ng_random *priv = nft_expr_priv(expr); - priv->until = ntohl(nla_get_be32(tb[NFTA_NG_UNTIL])); - if (priv->until == 0) + priv->modulus = ntohl(nla_get_be32(tb[NFTA_NG_MODULUS])); + if (priv->modulus == 0) return -ERANGE; prandom_init_once(&nft_numgen_prandom_state); @@ -124,7 +124,7 @@ static int nft_ng_random_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_ng_random *priv = nft_expr_priv(expr); - return nft_ng_dump(skb, priv->dreg, priv->until, NFT_NG_RANDOM); + return nft_ng_dump(skb, priv->dreg, priv->modulus, NFT_NG_RANDOM); } static struct nft_expr_type nft_ng_type; @@ -149,8 +149,8 @@ nft_ng_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { u32 type; - if (!tb[NFTA_NG_DREG] || - !tb[NFTA_NG_UNTIL] || + if (!tb[NFTA_NG_DREG] || + !tb[NFTA_NG_MODULUS] || !tb[NFTA_NG_TYPE]) return ERR_PTR(-EINVAL); From db6d857b819a00627a3bd911f49ee3156766bba8 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 2 Sep 2016 21:00:58 +0200 Subject: [PATCH 0785/1715] netfilter: nft_quota: fix overquota logic Use xor to decide to break further rule evaluation or not, since the existing logic doesn't achieve the expected inversion. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_quota.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c index 6eafbf987ed9..92b6ff16dbb3 100644 --- a/net/netfilter/nft_quota.c +++ b/net/netfilter/nft_quota.c @@ -33,7 +33,7 @@ static void nft_quota_eval(const struct nft_expr *expr, { struct nft_quota *priv = nft_expr_priv(expr); - if (nft_quota(priv, pkt) < 0 && !priv->invert) + if ((nft_quota(priv, pkt) < 0) ^ priv->invert) regs->verdict.code = NFT_BREAK; } From 22609b43b194917dce2188ae9a78bc40a14e67b5 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 2 Sep 2016 21:00:59 +0200 Subject: [PATCH 0786/1715] netfilter: nft_quota: introduce nft_overquota() This is patch renames the existing function to nft_overquota() and make it return a boolean that tells us if we have exceeded our byte quota. Just a cleanup. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_quota.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c index 92b6ff16dbb3..c00104c07095 100644 --- a/net/netfilter/nft_quota.c +++ b/net/netfilter/nft_quota.c @@ -21,10 +21,10 @@ struct nft_quota { atomic64_t remain; }; -static inline long nft_quota(struct nft_quota *priv, - const struct nft_pktinfo *pkt) +static inline bool nft_overquota(struct nft_quota *priv, + const struct nft_pktinfo *pkt) { - return atomic64_sub_return(pkt->skb->len, &priv->remain); + return atomic64_sub_return(pkt->skb->len, &priv->remain) < 0; } static void nft_quota_eval(const struct nft_expr *expr, @@ -33,7 +33,7 @@ static void nft_quota_eval(const struct nft_expr *expr, { struct nft_quota *priv = nft_expr_priv(expr); - if ((nft_quota(priv, pkt) < 0) ^ priv->invert) + if (nft_overquota(priv, pkt) ^ priv->invert) regs->verdict.code = NFT_BREAK; } From 1bcabc81ee94c0a65989128258f8c1d3e1c1b0ea Mon Sep 17 00:00:00 2001 From: Marco Angaroni Date: Tue, 30 Aug 2016 18:52:22 +0200 Subject: [PATCH 0787/1715] netfilter: nf_ct_sip: allow tab character in SIP headers Current parsing methods for SIP headers do not allow the presence of tab characters between header name and header value. As a result Call-ID SIP headers like the following are discarded by IPVS SIP persistence engine: "Call-ID\t: mycallid@abcde" "Call-ID:\tmycallid@abcde" In above examples Call-IDs are represented as strings in C language. Obviously in real message we have byte "09" before/after colon (":"). Proposed fix is in nf_conntrack_sip module. Function sip_skip_whitespace() should skip tabs in addition to spaces, since in SIP grammar whitespace (WSP) corresponds to space or tab. Below is an extract of relevant SIP ABNF syntax. Call-ID = ( "Call-ID" / "i" ) HCOLON callid callid = word [ "@" word ] HCOLON = *( SP / HTAB ) ":" SWS SWS = [LWS] ; sep whitespace LWS = [*WSP CRLF] 1*WSP ; linear whitespace WSP = SP / HTAB word = 1*(alphanum / "-" / "." / "!" / "%" / "*" / "_" / "+" / "`" / "'" / "~" / "(" / ")" / "<" / ">" / ":" / "\" / DQUOTE / "/" / "[" / "]" / "?" / "{" / "}" ) Signed-off-by: Marco Angaroni Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_sip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index d8035351aff5..621b81c7bddc 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -330,7 +330,7 @@ static const char *sip_follow_continuation(const char *dptr, const char *limit) static const char *sip_skip_whitespace(const char *dptr, const char *limit) { for (; dptr < limit; dptr++) { - if (*dptr == ' ') + if (*dptr == ' ' || *dptr == '\t') continue; if (*dptr != '\r' && *dptr != '\n') break; From 71a17de30733822b1ca6fbb3792581f5e7ee13de Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 7 Sep 2016 14:43:39 +0100 Subject: [PATCH 0788/1715] rxrpc: Whitespace cleanup Remove some whitespace. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index bb342f5fe7e4..ad702f9f8d1f 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -720,7 +720,6 @@ static inline void rxrpc_put_connection(struct rxrpc_connection *conn) } } - static inline bool rxrpc_queue_conn(struct rxrpc_connection *conn) { if (!rxrpc_get_connection_maybe(conn)) @@ -879,7 +878,7 @@ int __init rxrpc_init_security(void); void rxrpc_exit_security(void); int rxrpc_init_client_conn_security(struct rxrpc_connection *); int rxrpc_init_server_conn_security(struct rxrpc_connection *); - + /* * sendmsg.c */ From e796cb419237f54b96442ae7feca1859c693865c Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 7 Sep 2016 14:42:15 +0100 Subject: [PATCH 0789/1715] rxrpc: Delete unused rxrpc_kernel_free_skb() Delete rxrpc_kernel_free_skb() as it's unused. Signed-off-by: David Howells --- net/rxrpc/skbuff.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c index 9752f8b1fdd0..a546a2ba6341 100644 --- a/net/rxrpc/skbuff.c +++ b/net/rxrpc/skbuff.c @@ -148,19 +148,6 @@ void rxrpc_packet_destructor(struct sk_buff *skb) _leave(""); } -/** - * rxrpc_kernel_free_skb - Free an RxRPC socket buffer - * @skb: The socket buffer to be freed - * - * Let RxRPC free its own socket buffer, permitting it to maintain debug - * accounting. - */ -void rxrpc_kernel_free_skb(struct sk_buff *skb) -{ - rxrpc_free_skb(skb); -} -EXPORT_SYMBOL(rxrpc_kernel_free_skb); - /* * Note the existence of a new-to-us socket buffer (allocated or dequeued). */ From fff72429c2e83bdbe32dc7f1ad6398dfe50750c6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 7 Sep 2016 14:34:21 +0100 Subject: [PATCH 0790/1715] rxrpc: Improve the call tracking tracepoint Improve the call tracking tracepoint by showing more differentiation between some of the put and get events, including: (1) Getting and putting refs for the socket call user ID tree. (2) Getting and putting refs for queueing and failing to queue the call processor work item. Note that these aren't necessarily used in this patch, but will be taken advantage of in future patches. An enum is added for the event subtype numbers rather than coding them directly as decimal numbers and a table of 3-letter strings is provided rather than a sequence of ?: operators. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 11 +++------ net/rxrpc/af_rxrpc.c | 2 +- net/rxrpc/ar-internal.h | 22 +++++++++++++++-- net/rxrpc/call_accept.c | 10 ++++---- net/rxrpc/call_event.c | 2 +- net/rxrpc/call_object.c | 48 +++++++++++++++++++++++------------- net/rxrpc/input.c | 6 ++--- net/rxrpc/recvmsg.c | 23 ++++++++--------- net/rxrpc/sendmsg.c | 4 +-- net/rxrpc/skbuff.c | 2 +- 10 files changed, 79 insertions(+), 51 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index cbe574ea674b..30164896f1f6 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -17,7 +17,8 @@ #include TRACE_EVENT(rxrpc_call, - TP_PROTO(struct rxrpc_call *call, int op, int usage, int nskb, + TP_PROTO(struct rxrpc_call *call, enum rxrpc_call_trace op, + int usage, int nskb, const void *where, const void *aux), TP_ARGS(call, op, usage, nskb, where, aux), @@ -42,13 +43,7 @@ TRACE_EVENT(rxrpc_call, TP_printk("c=%p %s u=%d s=%d p=%pSR a=%p", __entry->call, - (__entry->op == 0 ? "NWc" : - __entry->op == 1 ? "NWs" : - __entry->op == 2 ? "SEE" : - __entry->op == 3 ? "GET" : - __entry->op == 4 ? "Gsb" : - __entry->op == 5 ? "PUT" : - "Psb"), + rxrpc_call_traces[__entry->op], __entry->usage, __entry->nskb, __entry->where, diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index b66a9e6f8d04..8356cd003d51 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -296,7 +296,7 @@ void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call) _enter("%d{%d}", call->debug_id, atomic_read(&call->usage)); rxrpc_remove_user_ID(rxrpc_sk(sock->sk), call); rxrpc_purge_queue(&call->knlrecv_queue); - rxrpc_put_call(call); + rxrpc_put_call(call, rxrpc_call_put); } EXPORT_SYMBOL(rxrpc_kernel_end_call); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index ad702f9f8d1f..913255a53564 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -508,6 +508,24 @@ struct rxrpc_call { unsigned long ackr_window[RXRPC_ACKR_WINDOW_ASZ + 1]; }; +enum rxrpc_call_trace { + rxrpc_call_new_client, + rxrpc_call_new_service, + rxrpc_call_queued, + rxrpc_call_queued_ref, + rxrpc_call_seen, + rxrpc_call_got, + rxrpc_call_got_skb, + rxrpc_call_got_userid, + rxrpc_call_put, + rxrpc_call_put_skb, + rxrpc_call_put_userid, + rxrpc_call_put_noqueue, + rxrpc_call__nr_trace +}; + +extern const char rxrpc_call_traces[rxrpc_call__nr_trace][4]; + #include /* @@ -555,8 +573,8 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *, void rxrpc_release_call(struct rxrpc_call *); void rxrpc_release_calls_on_socket(struct rxrpc_sock *); void rxrpc_see_call(struct rxrpc_call *); -void rxrpc_get_call(struct rxrpc_call *); -void rxrpc_put_call(struct rxrpc_call *); +void rxrpc_get_call(struct rxrpc_call *, enum rxrpc_call_trace); +void rxrpc_put_call(struct rxrpc_call *, enum rxrpc_call_trace); void rxrpc_get_call_for_skb(struct rxrpc_call *, struct sk_buff *); void rxrpc_put_call_for_skb(struct rxrpc_call *, struct sk_buff *); void __exit rxrpc_destroy_all_calls(void); diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 68a439e30df1..487ae7aa86db 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -115,7 +115,7 @@ static int rxrpc_accept_incoming_call(struct rxrpc_local *local, write_lock(&rx->call_lock); if (!test_and_set_bit(RXRPC_CALL_INIT_ACCEPT, &call->flags)) { - rxrpc_get_call(call); + rxrpc_get_call(call, rxrpc_call_got); spin_lock(&call->conn->state_lock); if (sp->hdr.securityIndex > 0 && @@ -155,7 +155,7 @@ static int rxrpc_accept_incoming_call(struct rxrpc_local *local, _debug("done"); read_unlock_bh(&local->services_lock); rxrpc_free_skb(notification); - rxrpc_put_call(call); + rxrpc_put_call(call, rxrpc_call_put); _leave(" = 0"); return 0; @@ -166,11 +166,11 @@ invalid_service: read_lock_bh(&call->state_lock); if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) && !test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events)) { - rxrpc_get_call(call); + rxrpc_get_call(call, rxrpc_call_got); rxrpc_queue_call(call); } read_unlock_bh(&call->state_lock); - rxrpc_put_call(call); + rxrpc_put_call(call, rxrpc_call_put); ret = -ECONNREFUSED; error: rxrpc_free_skb(notification); @@ -341,6 +341,7 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx, } /* formalise the acceptance */ + rxrpc_get_call(call, rxrpc_call_got_userid); call->notify_rx = notify_rx; call->user_call_ID = user_call_ID; rb_link_node(&call->sock_node, parent, pp); @@ -351,7 +352,6 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx, BUG(); rxrpc_queue_call(call); - rxrpc_get_call(call); write_unlock_bh(&call->state_lock); write_unlock(&rx->call_lock); _leave(" = %p{%d}", call, call->debug_id); diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 4754c7fb6242..fee8b6ddb334 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -1246,7 +1246,7 @@ send_message_2: kill_ACKs: del_timer_sync(&call->ack_timer); if (test_and_clear_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events)) - rxrpc_put_call(call); + rxrpc_put_call(call, rxrpc_call_put); clear_bit(RXRPC_CALL_EV_ACK, &call->events); maybe_reschedule: diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 65691742199b..3166b5222435 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -55,6 +55,21 @@ const char *const rxrpc_call_completions[NR__RXRPC_CALL_COMPLETIONS] = { [RXRPC_CALL_NETWORK_ERROR] = "NetError", }; +const char rxrpc_call_traces[rxrpc_call__nr_trace][4] = { + [rxrpc_call_new_client] = "NWc", + [rxrpc_call_new_service] = "NWs", + [rxrpc_call_queued] = "QUE", + [rxrpc_call_queued_ref] = "QUR", + [rxrpc_call_seen] = "SEE", + [rxrpc_call_got] = "GOT", + [rxrpc_call_got_skb] = "Gsk", + [rxrpc_call_got_userid] = "Gus", + [rxrpc_call_put] = "PUT", + [rxrpc_call_put_skb] = "Psk", + [rxrpc_call_put_userid] = "Pus", + [rxrpc_call_put_noqueue] = "PNQ", +}; + struct kmem_cache *rxrpc_call_jar; LIST_HEAD(rxrpc_calls); DEFINE_RWLOCK(rxrpc_call_lock); @@ -96,7 +111,7 @@ struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *rx, return NULL; found_extant_call: - rxrpc_get_call(call); + rxrpc_get_call(call, rxrpc_call_got); read_unlock(&rx->call_lock); _leave(" = %p [%d]", call, atomic_read(&call->usage)); return call; @@ -252,8 +267,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, goto found_user_ID_now_present; } - rxrpc_get_call(call); - + rxrpc_get_call(call, rxrpc_call_got_userid); rb_link_node(&call->sock_node, parent, pp); rb_insert_color(&call->sock_node, &rx->calls); write_unlock(&rx->call_lock); @@ -275,7 +289,7 @@ error: write_lock(&rx->call_lock); rb_erase(&call->sock_node, &rx->calls); write_unlock(&rx->call_lock); - rxrpc_put_call(call); + rxrpc_put_call(call, rxrpc_call_put_userid); write_lock_bh(&rxrpc_call_lock); list_del_init(&call->link); @@ -283,7 +297,7 @@ error: set_bit(RXRPC_CALL_RELEASED, &call->flags); call->state = RXRPC_CALL_DEAD; - rxrpc_put_call(call); + rxrpc_put_call(call, rxrpc_call_put); _leave(" = %d", ret); return ERR_PTR(ret); @@ -296,7 +310,7 @@ found_user_ID_now_present: write_unlock(&rx->call_lock); set_bit(RXRPC_CALL_RELEASED, &call->flags); call->state = RXRPC_CALL_DEAD; - rxrpc_put_call(call); + rxrpc_put_call(call, rxrpc_call_put); _leave(" = -EEXIST [%p]", call); return ERR_PTR(-EEXIST); } @@ -322,8 +336,8 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx, if (!candidate) return ERR_PTR(-EBUSY); - trace_rxrpc_call(candidate, 1, atomic_read(&candidate->usage), - 0, here, NULL); + trace_rxrpc_call(candidate, rxrpc_call_new_service, + atomic_read(&candidate->usage), 0, here, NULL); chan = sp->hdr.cid & RXRPC_CHANNELMASK; candidate->socket = rx; @@ -358,7 +372,7 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx, read_unlock(&call->state_lock); goto aborted_call; default: - rxrpc_get_call(call); + rxrpc_get_call(call, rxrpc_call_got); read_unlock(&call->state_lock); goto extant_call; } @@ -447,20 +461,20 @@ void rxrpc_see_call(struct rxrpc_call *call) int n = atomic_read(&call->usage); int m = atomic_read(&call->skb_count); - trace_rxrpc_call(call, 2, n, m, here, 0); + trace_rxrpc_call(call, rxrpc_call_seen, n, m, here, NULL); } } /* * Note the addition of a ref on a call. */ -void rxrpc_get_call(struct rxrpc_call *call) +void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op) { const void *here = __builtin_return_address(0); int n = atomic_inc_return(&call->usage); int m = atomic_read(&call->skb_count); - trace_rxrpc_call(call, 3, n, m, here, 0); + trace_rxrpc_call(call, op, n, m, here, NULL); } /* @@ -472,7 +486,7 @@ void rxrpc_get_call_for_skb(struct rxrpc_call *call, struct sk_buff *skb) int n = atomic_inc_return(&call->usage); int m = atomic_inc_return(&call->skb_count); - trace_rxrpc_call(call, 4, n, m, here, skb); + trace_rxrpc_call(call, rxrpc_call_got_skb, n, m, here, skb); } /* @@ -575,7 +589,7 @@ static void rxrpc_dead_call_expired(unsigned long _call) write_lock_bh(&call->state_lock); call->state = RXRPC_CALL_DEAD; write_unlock_bh(&call->state_lock); - rxrpc_put_call(call); + rxrpc_put_call(call, rxrpc_call_put); } /* @@ -632,7 +646,7 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) /* * release a call */ -void rxrpc_put_call(struct rxrpc_call *call) +void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op) { const void *here = __builtin_return_address(0); int n, m; @@ -641,7 +655,7 @@ void rxrpc_put_call(struct rxrpc_call *call) n = atomic_dec_return(&call->usage); m = atomic_read(&call->skb_count); - trace_rxrpc_call(call, 5, n, m, here, NULL); + trace_rxrpc_call(call, op, n, m, here, NULL); ASSERTCMP(n, >=, 0); if (n == 0) { _debug("call %d dead", call->debug_id); @@ -661,7 +675,7 @@ void rxrpc_put_call_for_skb(struct rxrpc_call *call, struct sk_buff *skb) n = atomic_dec_return(&call->usage); m = atomic_dec_return(&call->skb_count); - trace_rxrpc_call(call, 6, n, m, here, skb); + trace_rxrpc_call(call, rxrpc_call_put_skb, n, m, here, skb); ASSERTCMP(n, >=, 0); if (n == 0) { _debug("call %d dead", call->debug_id); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 72f016cfaaf5..f7239a6f9181 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -537,7 +537,7 @@ static void rxrpc_post_packet_to_call(struct rxrpc_call *call, } read_unlock(&call->state_lock); - rxrpc_get_call(call); + rxrpc_get_call(call, rxrpc_call_got); if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA && sp->hdr.flags & RXRPC_JUMBO_PACKET) @@ -545,12 +545,12 @@ static void rxrpc_post_packet_to_call(struct rxrpc_call *call, else rxrpc_fast_process_packet(call, skb); - rxrpc_put_call(call); + rxrpc_put_call(call, rxrpc_call_put); goto done; resend_final_ack: _debug("final ack again"); - rxrpc_get_call(call); + rxrpc_get_call(call, rxrpc_call_got); set_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events); rxrpc_queue_call(call); goto free_unlock; diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 0ab7b334bab1..97f8ee76c67c 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -79,7 +79,8 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (rx->sk.sk_state != RXRPC_SERVER_LISTENING) { release_sock(&rx->sk); if (continue_call) - rxrpc_put_call(continue_call); + rxrpc_put_call(continue_call, + rxrpc_call_put); return -ENODATA; } } @@ -137,13 +138,13 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (call != continue_call || skb->mark != RXRPC_SKB_MARK_DATA) { release_sock(&rx->sk); - rxrpc_put_call(continue_call); + rxrpc_put_call(continue_call, rxrpc_call_put); _leave(" = %d [noncont]", copied); return copied; } } - rxrpc_get_call(call); + rxrpc_get_call(call, rxrpc_call_got); /* copy the peer address and timestamp */ if (!continue_call) { @@ -233,7 +234,7 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (!continue_call) continue_call = sp->call; else - rxrpc_put_call(call); + rxrpc_put_call(call, rxrpc_call_put); call = NULL; if (flags & MSG_PEEK) { @@ -255,9 +256,9 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, out: release_sock(&rx->sk); if (call) - rxrpc_put_call(call); + rxrpc_put_call(call, rxrpc_call_put); if (continue_call) - rxrpc_put_call(continue_call); + rxrpc_put_call(continue_call, rxrpc_call_put); _leave(" = %d [data]", copied); return copied; @@ -341,18 +342,18 @@ terminal_message: } release_sock(&rx->sk); - rxrpc_put_call(call); + rxrpc_put_call(call, rxrpc_call_put); if (continue_call) - rxrpc_put_call(continue_call); + rxrpc_put_call(continue_call, rxrpc_call_put); _leave(" = %d", ret); return ret; copy_error: _debug("copy error"); release_sock(&rx->sk); - rxrpc_put_call(call); + rxrpc_put_call(call, rxrpc_call_put); if (continue_call) - rxrpc_put_call(continue_call); + rxrpc_put_call(continue_call, rxrpc_call_put); _leave(" = %d", ret); return ret; @@ -361,7 +362,7 @@ wait_interrupted: wait_error: finish_wait(sk_sleep(&rx->sk), &wait); if (continue_call) - rxrpc_put_call(continue_call); + rxrpc_put_call(continue_call, rxrpc_call_put); if (copied) copied = ret; _leave(" = %d [waitfail %d]", copied, ret); diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 7376794a0308..803078bea507 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -534,7 +534,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) call = rxrpc_accept_call(rx, user_call_ID, NULL); if (IS_ERR(call)) return PTR_ERR(call); - rxrpc_put_call(call); + rxrpc_put_call(call, rxrpc_call_put); return 0; } @@ -573,7 +573,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) ret = rxrpc_send_data(rx, call, msg, len); } - rxrpc_put_call(call); + rxrpc_put_call(call, rxrpc_call_put); _leave(" = %d", ret); return ret; } diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c index a546a2ba6341..c0613ab6d2d5 100644 --- a/net/rxrpc/skbuff.c +++ b/net/rxrpc/skbuff.c @@ -35,7 +35,7 @@ static void rxrpc_request_final_ACK(struct rxrpc_call *call) /* get an extra ref on the call for the final-ACK generator to * release */ - rxrpc_get_call(call); + rxrpc_get_call(call, rxrpc_call_got); set_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events); if (try_to_del_timer_sync(&call->ack_timer) >= 0) rxrpc_queue_call(call); From f4fdb3525b247e1b4f9cc563641494c96c80f91a Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 7 Sep 2016 14:45:26 +0100 Subject: [PATCH 0791/1715] rxrpc: Use call->peer rather than call->conn->params.peer Use call->peer rather than call->conn->params.peer to avoid the possibility of call->conn being NULL and, whilst we're at it, check it for NULL before we access it. Signed-off-by: David Howells --- net/rxrpc/call_object.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 3166b5222435..060ddc32a85e 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -514,9 +514,11 @@ void rxrpc_release_call(struct rxrpc_call *call) */ _debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn); - spin_lock(&conn->params.peer->lock); - hlist_del_init(&call->error_link); - spin_unlock(&conn->params.peer->lock); + if (call->peer) { + spin_lock(&call->peer->lock); + hlist_del_init(&call->error_link); + spin_unlock(&call->peer->lock); + } write_lock_bh(&rx->call_lock); if (!list_empty(&call->accept_link)) { From 278ac0cdd5e516bdef2b9b8f5a4dd6366a5bccfe Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 7 Sep 2016 15:19:25 +0100 Subject: [PATCH 0792/1715] rxrpc: Cache the security index in the rxrpc_call struct Cache the security index in the rxrpc_call struct so that we can get at it even when the call has been disconnected and the connection pointer cleared. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 1 + net/rxrpc/call_object.c | 1 + net/rxrpc/conn_client.c | 3 +++ net/rxrpc/input.c | 2 +- net/rxrpc/sendmsg.c | 2 +- 5 files changed, 7 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 913255a53564..e3dfc9da05fe 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -475,6 +475,7 @@ struct rxrpc_call { atomic_t skb_count; /* Outstanding packets on this call */ atomic_t sequence; /* Tx data packet sequence counter */ u16 service_id; /* service ID */ + u8 security_ix; /* Security type */ u32 call_id; /* call ID on connection */ u32 cid; /* connection ID plus channel index */ int debug_id; /* debug ID for printks */ diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 060ddc32a85e..83019e489555 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -345,6 +345,7 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx, candidate->peer = conn->params.peer; candidate->cid = sp->hdr.cid; candidate->call_id = sp->hdr.callNumber; + candidate->security_ix = sp->hdr.securityIndex; candidate->rx_data_post = 0; candidate->state = RXRPC_CALL_SERVER_ACCEPTING; candidate->flags |= (1 << RXRPC_CALL_IS_SERVICE); diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 82de1aeaef21..9344a8416ceb 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -348,6 +348,7 @@ static int rxrpc_get_client_conn(struct rxrpc_call *call, if (cp->exclusive) { call->conn = candidate; + call->security_ix = candidate->security_ix; _leave(" = 0 [exclusive %d]", candidate->debug_id); return 0; } @@ -395,6 +396,7 @@ static int rxrpc_get_client_conn(struct rxrpc_call *call, candidate_published: set_bit(RXRPC_CONN_IN_CLIENT_CONNS, &candidate->flags); call->conn = candidate; + call->security_ix = candidate->security_ix; spin_unlock(&local->client_conns_lock); _leave(" = 0 [new %d]", candidate->debug_id); return 0; @@ -412,6 +414,7 @@ found_extant_conn: spin_lock(&conn->channel_lock); call->conn = conn; + call->security_ix = conn->security_ix; list_add(&call->chan_wait_link, &conn->waiting_calls); spin_unlock(&conn->channel_lock); _leave(" = 0 [extant %d]", conn->debug_id); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index f7239a6f9181..9242fefd7f40 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -198,7 +198,7 @@ static int rxrpc_fast_process_data(struct rxrpc_call *call, /* if the packet need security things doing to it, then it goes down * the slow path */ - if (call->conn->security_ix) + if (call->security_ix) goto enqueue_packet; sp->call = call; diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 803078bea507..2439aff131c7 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -322,7 +322,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, sp->hdr.serial = atomic_inc_return(&conn->serial); sp->hdr.type = RXRPC_PACKET_TYPE_DATA; sp->hdr.userStatus = 0; - sp->hdr.securityIndex = conn->security_ix; + sp->hdr.securityIndex = call->security_ix; sp->hdr._rsvd = 0; sp->hdr.serviceId = call->service_id; From 8b7fac50ab7f2668c43795c135025c472922a344 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 7 Sep 2016 15:28:54 +0100 Subject: [PATCH 0793/1715] rxrpc: Pass the connection pointer to rxrpc_post_packet_to_call() Pass the connection pointer to rxrpc_post_packet_to_call() as the call might get disconnected whilst we're looking at it, but the connection pointer determined by rxrpc_data_read() is guaranteed by RCU for the duration of the call. Signed-off-by: David Howells --- net/rxrpc/input.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 9242fefd7f40..52da4373131f 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -497,7 +497,8 @@ protocol_error: * post an incoming packet to the appropriate call/socket to deal with * - must get rid of the sk_buff, either by freeing it or by queuing it */ -static void rxrpc_post_packet_to_call(struct rxrpc_call *call, +static void rxrpc_post_packet_to_call(struct rxrpc_connection *conn, + struct rxrpc_call *call, struct sk_buff *skb) { struct rxrpc_skb_priv *sp; @@ -558,7 +559,7 @@ resend_final_ack: dead_call: if (sp->hdr.type != RXRPC_PACKET_TYPE_ABORT) { skb->priority = RX_CALL_DEAD; - rxrpc_reject_packet(call->conn->params.local, skb); + rxrpc_reject_packet(conn->params.local, skb); goto unlock; } free_unlock: @@ -754,7 +755,7 @@ void rxrpc_data_ready(struct sock *sk) goto cant_route_call; rxrpc_see_call(call); - rxrpc_post_packet_to_call(call, skb); + rxrpc_post_packet_to_call(conn, call, skb); goto out_unlock; } From 6543ac523558b2392271f3f8088e6455b3f00bb1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 7 Sep 2016 15:26:39 +0100 Subject: [PATCH 0794/1715] rxrpc: Use rxrpc_is_service_call() rather than rxrpc_conn_is_service() Use rxrpc_is_service_call() rather than rxrpc_conn_is_service() if the call is available just in case call->conn is NULL. Signed-off-by: David Howells --- net/rxrpc/input.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 52da4373131f..8267f42a7753 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -357,7 +357,7 @@ void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb) case RXRPC_PACKET_TYPE_BUSY: _proto("Rx BUSY %%%u", sp->hdr.serial); - if (rxrpc_conn_is_service(call->conn)) + if (rxrpc_is_service_call(call)) goto protocol_error; write_lock_bh(&call->state_lock); @@ -525,7 +525,7 @@ static void rxrpc_post_packet_to_call(struct rxrpc_connection *conn, default: goto dead_call; case RXRPC_CALL_SUCCEEDED: - if (rxrpc_conn_is_service(call->conn)) + if (rxrpc_is_service_call(call)) goto dead_call; goto resend_final_ack; } From 8d94aa381dab19f3c0f524f5d255248b0ae50125 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 7 Sep 2016 09:19:31 +0100 Subject: [PATCH 0795/1715] rxrpc: Calls shouldn't hold socket refs rxrpc calls shouldn't hold refs on the sock struct. This was done so that the socket wouldn't go away whilst the call was in progress, such that the call could reach the socket's queues. However, we can mark the socket as requiring an RCU release and rely on the RCU read lock. To make this work, we do: (1) rxrpc_release_call() removes the call's call user ID. This is now only called from socket operations and not from the call processor: rxrpc_accept_call() / rxrpc_kernel_accept_call() rxrpc_reject_call() / rxrpc_kernel_reject_call() rxrpc_kernel_end_call() rxrpc_release_calls_on_socket() rxrpc_recvmsg() Though it is also called in the cleanup path of rxrpc_accept_incoming_call() before we assign a user ID. (2) Pass the socket pointer into rxrpc_release_call() rather than getting it from the call so that we can get rid of uninitialised calls. (3) Fix call processor queueing to pass a ref to the work queue and to release that ref at the end of the processor function (or to pass it back to the work queue if we have to requeue). (4) Skip out of the call processor function asap if the call is complete and don't requeue it if the call is complete. (5) Clean up the call immediately that the refcount reaches 0 rather than trying to defer it. Actual deallocation is deferred to RCU, however. (6) Don't hold socket refs for allocated calls. (7) Use the RCU read lock when queueing a message on a socket and treat the call's socket pointer according to RCU rules and check it for NULL. We also need to use the RCU read lock when viewing a call through procfs. (8) Transmit the final ACK/ABORT to a client call in rxrpc_release_call() if this hasn't been done yet so that we can then disconnect the call. Once the call is disconnected, it won't have any access to the connection struct and the UDP socket for the call work processor to be able to send the ACK. Terminal retransmission will be handled by the connection processor. (9) Release all calls immediately on the closing of a socket rather than trying to defer this. Incomplete calls will be aborted. The call refcount model is much simplified. Refs are held on the call by: (1) A socket's user ID tree. (2) A socket's incoming call secureq and acceptq. (3) A kernel service that has a call in progress. (4) A queued call work processor. We have to take care to put any call that we failed to queue. (5) sk_buffs on a socket's receive queue. A future patch will get rid of this. Whilst we're at it, we can do: (1) Get rid of the RXRPC_CALL_EV_RELEASE event. Release is now done entirely from the socket routines and never from the call's processor. (2) Get rid of the RXRPC_CALL_DEAD state. Calls now end in the RXRPC_CALL_COMPLETE state. (3) Get rid of the rxrpc_call::destroyer work item. Calls are now torn down when their refcount reaches 0 and then handed over to RCU for final cleanup. (4) Get rid of the rxrpc_call::deadspan timer. Calls are cleaned up immediately they're finished with and don't hang around. Post-completion retransmission is handled by the connection processor once the call is disconnected. (5) Get rid of the dead call expiry setting as there's no longer a timer to set. (6) rxrpc_destroy_all_calls() can just check that the call list is empty. Signed-off-by: David Howells --- net/rxrpc/af_rxrpc.c | 4 +- net/rxrpc/ar-internal.h | 15 +-- net/rxrpc/call_accept.c | 55 +++------- net/rxrpc/call_event.c | 74 ++++++------- net/rxrpc/call_object.c | 224 ++++++++++++++++------------------------ net/rxrpc/input.c | 26 ++--- net/rxrpc/output.c | 145 ++++++++++++++++++++++++++ net/rxrpc/proc.c | 4 +- net/rxrpc/recvmsg.c | 24 +---- net/rxrpc/skbuff.c | 3 - net/rxrpc/sysctl.c | 8 -- 11 files changed, 303 insertions(+), 279 deletions(-) diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 8356cd003d51..77a132abf140 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -294,8 +294,7 @@ EXPORT_SYMBOL(rxrpc_kernel_begin_call); void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call) { _enter("%d{%d}", call->debug_id, atomic_read(&call->usage)); - rxrpc_remove_user_ID(rxrpc_sk(sock->sk), call); - rxrpc_purge_queue(&call->knlrecv_queue); + rxrpc_release_call(rxrpc_sk(sock->sk), call); rxrpc_put_call(call, rxrpc_call_put); } EXPORT_SYMBOL(rxrpc_kernel_end_call); @@ -558,6 +557,7 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol, return -ENOMEM; sock_init_data(sock, sk); + sock_set_flag(sk, SOCK_RCU_FREE); sk->sk_state = RXRPC_UNBOUND; sk->sk_write_space = rxrpc_write_space; sk->sk_max_ack_backlog = 0; diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index e3dfc9da05fe..3addda4bfa6b 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -35,8 +35,6 @@ struct rxrpc_crypt { #define rxrpc_queue_delayed_work(WS,D) \ queue_delayed_work(rxrpc_workqueue, (WS), (D)) -#define rxrpc_queue_call(CALL) rxrpc_queue_work(&(CALL)->processor) - struct rxrpc_connection; /* @@ -397,7 +395,6 @@ enum rxrpc_call_event { RXRPC_CALL_EV_ACCEPTED, /* incoming call accepted by userspace app */ RXRPC_CALL_EV_SECURED, /* incoming call's connection is now secure */ RXRPC_CALL_EV_POST_ACCEPT, /* need to post an "accept?" message to the app */ - RXRPC_CALL_EV_RELEASE, /* need to release the call's resources */ }; /* @@ -417,7 +414,6 @@ enum rxrpc_call_state { RXRPC_CALL_SERVER_SEND_REPLY, /* - server sending reply */ RXRPC_CALL_SERVER_AWAIT_ACK, /* - server awaiting final ACK */ RXRPC_CALL_COMPLETE, /* - call complete */ - RXRPC_CALL_DEAD, /* - call is dead */ NR__RXRPC_CALL_STATES }; @@ -442,12 +438,10 @@ struct rxrpc_call { struct rcu_head rcu; struct rxrpc_connection *conn; /* connection carrying call */ struct rxrpc_peer *peer; /* Peer record for remote address */ - struct rxrpc_sock *socket; /* socket responsible */ + struct rxrpc_sock __rcu *socket; /* socket responsible */ struct timer_list lifetimer; /* lifetime remaining on call */ - struct timer_list deadspan; /* reap timer for re-ACK'ing, etc */ struct timer_list ack_timer; /* ACK generation timer */ struct timer_list resend_timer; /* Tx resend timer */ - struct work_struct destroyer; /* call destroyer */ struct work_struct processor; /* packet processor and ACK generator */ rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */ struct list_head link; /* link in master call list */ @@ -558,7 +552,6 @@ void rxrpc_process_call(struct work_struct *); extern const char *const rxrpc_call_states[]; extern const char *const rxrpc_call_completions[]; extern unsigned int rxrpc_max_call_lifetime; -extern unsigned int rxrpc_dead_call_expiry; extern struct kmem_cache *rxrpc_call_jar; extern struct list_head rxrpc_calls; extern rwlock_t rxrpc_call_lock; @@ -571,8 +564,10 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *, struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *, struct rxrpc_connection *, struct sk_buff *); -void rxrpc_release_call(struct rxrpc_call *); +void rxrpc_release_call(struct rxrpc_sock *, struct rxrpc_call *); void rxrpc_release_calls_on_socket(struct rxrpc_sock *); +bool __rxrpc_queue_call(struct rxrpc_call *); +bool rxrpc_queue_call(struct rxrpc_call *); void rxrpc_see_call(struct rxrpc_call *); void rxrpc_get_call(struct rxrpc_call *, enum rxrpc_call_trace); void rxrpc_put_call(struct rxrpc_call *, enum rxrpc_call_trace); @@ -835,6 +830,7 @@ extern const char *rxrpc_acks(u8 reason); /* * output.c */ +int rxrpc_send_call_packet(struct rxrpc_call *, u8); int rxrpc_send_data_packet(struct rxrpc_connection *, struct sk_buff *); /* @@ -880,7 +876,6 @@ extern const struct file_operations rxrpc_connection_seq_fops; /* * recvmsg.c */ -void rxrpc_remove_user_ID(struct rxrpc_sock *, struct rxrpc_call *); int rxrpc_recvmsg(struct socket *, struct msghdr *, size_t, int); /* diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 487ae7aa86db..879a964de80c 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -163,13 +163,7 @@ invalid_service: _debug("invalid"); read_unlock_bh(&local->services_lock); - read_lock_bh(&call->state_lock); - if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) && - !test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events)) { - rxrpc_get_call(call, rxrpc_call_got); - rxrpc_queue_call(call); - } - read_unlock_bh(&call->state_lock); + rxrpc_release_call(rx, call); rxrpc_put_call(call, rxrpc_call_put); ret = -ECONNREFUSED; error: @@ -236,13 +230,11 @@ found_service: if (sk_acceptq_is_full(&rx->sk)) goto backlog_full; sk_acceptq_added(&rx->sk); - sock_hold(&rx->sk); read_unlock_bh(&local->services_lock); ret = rxrpc_accept_incoming_call(local, rx, skb, &srx); if (ret < 0) sk_acceptq_removed(&rx->sk); - sock_put(&rx->sk); switch (ret) { case -ECONNRESET: /* old calls are ignored */ case -ECONNABORTED: /* aborted calls are reaborted or ignored */ @@ -333,9 +325,6 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx, case RXRPC_CALL_COMPLETE: ret = call->error; goto out_release; - case RXRPC_CALL_DEAD: - ret = -ETIME; - goto out_discard; default: BUG(); } @@ -350,24 +339,20 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx, BUG(); if (test_and_set_bit(RXRPC_CALL_EV_ACCEPTED, &call->events)) BUG(); - rxrpc_queue_call(call); write_unlock_bh(&call->state_lock); write_unlock(&rx->call_lock); + rxrpc_queue_call(call); _leave(" = %p{%d}", call, call->debug_id); return call; - /* if the call is already dying or dead, then we leave the socket's ref - * on it to be released by rxrpc_dead_call_expired() as induced by - * rxrpc_release_call() */ out_release: - _debug("release %p", call); - if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) && - !test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events)) - rxrpc_queue_call(call); -out_discard: write_unlock_bh(&call->state_lock); - _debug("discard %p", call); + write_unlock(&rx->call_lock); + _debug("release %p", call); + rxrpc_release_call(rx, call); + _leave(" = %d", ret); + return ERR_PTR(ret); out: write_unlock(&rx->call_lock); _leave(" = %d", ret); @@ -390,8 +375,11 @@ int rxrpc_reject_call(struct rxrpc_sock *rx) write_lock(&rx->call_lock); ret = -ENODATA; - if (list_empty(&rx->acceptq)) - goto out; + if (list_empty(&rx->acceptq)) { + write_unlock(&rx->call_lock); + _leave(" = -ENODATA"); + return -ENODATA; + } /* dequeue the first call and check it's still valid */ call = list_entry(rx->acceptq.next, struct rxrpc_call, accept_link); @@ -407,30 +395,17 @@ int rxrpc_reject_call(struct rxrpc_sock *rx) if (test_and_set_bit(RXRPC_CALL_EV_REJECT_BUSY, &call->events)) rxrpc_queue_call(call); ret = 0; - goto out_release; + break; case RXRPC_CALL_COMPLETE: ret = call->error; - goto out_release; - case RXRPC_CALL_DEAD: - ret = -ETIME; - goto out_discard; + break; default: BUG(); } - /* if the call is already dying or dead, then we leave the socket's ref - * on it to be released by rxrpc_dead_call_expired() as induced by - * rxrpc_release_call() */ -out_release: - _debug("release %p", call); - if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) && - !test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events)) - rxrpc_queue_call(call); -out_discard: write_unlock_bh(&call->state_lock); - _debug("discard %p", call); -out: write_unlock(&rx->call_lock); + rxrpc_release_call(rx, call); _leave(" = %d", ret); return ret; } diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index fee8b6ddb334..8365d3366114 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -811,8 +811,9 @@ static int rxrpc_post_message(struct rxrpc_call *call, u32 mark, u32 error, } /* - * handle background processing of incoming call packets and ACK / abort - * generation + * Handle background processing of incoming call packets and ACK / abort + * generation. A ref on the call is donated to us by whoever queued the work + * item. */ void rxrpc_process_call(struct work_struct *work) { @@ -827,6 +828,7 @@ void rxrpc_process_call(struct work_struct *work) unsigned long bits; __be32 data, pad; size_t len; + bool requeue = false; int loop, nbit, ioc, ret, mtu; u32 serial, abort_code = RX_PROTOCOL_ERROR; u8 *acks = NULL; @@ -838,6 +840,11 @@ void rxrpc_process_call(struct work_struct *work) call->debug_id, rxrpc_call_states[call->state], call->events, (jiffies - call->creation_jif) / (HZ / 10)); + if (call->state >= RXRPC_CALL_COMPLETE) { + rxrpc_put_call(call, rxrpc_call_put); + return; + } + if (!call->conn) goto skip_msg_init; @@ -1088,16 +1095,21 @@ skip_msg_init: spin_lock_bh(&call->lock); if (call->state == RXRPC_CALL_SERVER_SECURING) { + struct rxrpc_sock *rx; _debug("securing"); - write_lock(&call->socket->call_lock); - if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) && - !test_bit(RXRPC_CALL_EV_RELEASE, &call->events)) { - _debug("not released"); - call->state = RXRPC_CALL_SERVER_ACCEPTING; - list_move_tail(&call->accept_link, - &call->socket->acceptq); + rcu_read_lock(); + rx = rcu_dereference(call->socket); + if (rx) { + write_lock(&rx->call_lock); + if (!test_bit(RXRPC_CALL_RELEASED, &call->flags)) { + _debug("not released"); + call->state = RXRPC_CALL_SERVER_ACCEPTING; + list_move_tail(&call->accept_link, + &rx->acceptq); + } + write_unlock(&rx->call_lock); } - write_unlock(&call->socket->call_lock); + rcu_read_unlock(); read_lock(&call->state_lock); if (call->state < RXRPC_CALL_COMPLETE) set_bit(RXRPC_CALL_EV_POST_ACCEPT, &call->events); @@ -1139,11 +1151,6 @@ skip_msg_init: goto maybe_reschedule; } - if (test_bit(RXRPC_CALL_EV_RELEASE, &call->events)) { - rxrpc_release_call(call); - clear_bit(RXRPC_CALL_EV_RELEASE, &call->events); - } - /* other events may have been raised since we started checking */ goto maybe_reschedule; @@ -1209,10 +1216,8 @@ send_message_2: &msg, iov, ioc, len); if (ret < 0) { _debug("sendmsg failed: %d", ret); - read_lock_bh(&call->state_lock); - if (call->state < RXRPC_CALL_DEAD) - rxrpc_queue_call(call); - read_unlock_bh(&call->state_lock); + if (call->state < RXRPC_CALL_COMPLETE) + requeue = true; goto error; } @@ -1245,41 +1250,22 @@ send_message_2: kill_ACKs: del_timer_sync(&call->ack_timer); - if (test_and_clear_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events)) - rxrpc_put_call(call, rxrpc_call_put); clear_bit(RXRPC_CALL_EV_ACK, &call->events); maybe_reschedule: if (call->events || !skb_queue_empty(&call->rx_queue)) { - read_lock_bh(&call->state_lock); - if (call->state < RXRPC_CALL_DEAD) - rxrpc_queue_call(call); - read_unlock_bh(&call->state_lock); - } - - /* don't leave aborted connections on the accept queue */ - if (call->state >= RXRPC_CALL_COMPLETE && - !list_empty(&call->accept_link)) { - _debug("X unlinking once-pending call %p { e=%lx f=%lx c=%x }", - call, call->events, call->flags, call->conn->proto.cid); - - read_lock_bh(&call->state_lock); - if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) && - !test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events)) - rxrpc_queue_call(call); - read_unlock_bh(&call->state_lock); + if (call->state < RXRPC_CALL_COMPLETE) + requeue = true; } error: kfree(acks); - /* because we don't want two CPUs both processing the work item for one - * call at the same time, we use a flag to note when it's busy; however - * this means there's a race between clearing the flag and setting the - * work pending bit and the work item being processed again */ - if (call->events && !work_pending(&call->processor)) { + if ((requeue || call->events) && !work_pending(&call->processor)) { _debug("jumpstart %x", call->conn->proto.cid); - rxrpc_queue_call(call); + __rxrpc_queue_call(call); + } else { + rxrpc_put_call(call, rxrpc_call_put); } _leave(""); diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 83019e489555..be5733d55794 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -24,11 +24,6 @@ */ unsigned int rxrpc_max_call_lifetime = 60 * HZ; -/* - * Time till dead call expires after last use (in jiffies). - */ -unsigned int rxrpc_dead_call_expiry = 2 * HZ; - const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = { [RXRPC_CALL_UNINITIALISED] = "Uninit ", [RXRPC_CALL_CLIENT_AWAIT_CONN] = "ClWtConn", @@ -43,7 +38,6 @@ const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = { [RXRPC_CALL_SERVER_SEND_REPLY] = "SvSndRpl", [RXRPC_CALL_SERVER_AWAIT_ACK] = "SvAwtACK", [RXRPC_CALL_COMPLETE] = "Complete", - [RXRPC_CALL_DEAD] = "Dead ", }; const char *const rxrpc_call_completions[NR__RXRPC_CALL_COMPLETIONS] = { @@ -74,11 +68,10 @@ struct kmem_cache *rxrpc_call_jar; LIST_HEAD(rxrpc_calls); DEFINE_RWLOCK(rxrpc_call_lock); -static void rxrpc_destroy_call(struct work_struct *work); static void rxrpc_call_life_expired(unsigned long _call); -static void rxrpc_dead_call_expired(unsigned long _call); static void rxrpc_ack_time_expired(unsigned long _call); static void rxrpc_resend_time_expired(unsigned long _call); +static void rxrpc_cleanup_call(struct rxrpc_call *call); /* * find an extant server call @@ -138,13 +131,10 @@ static struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) setup_timer(&call->lifetimer, &rxrpc_call_life_expired, (unsigned long) call); - setup_timer(&call->deadspan, &rxrpc_dead_call_expired, - (unsigned long) call); setup_timer(&call->ack_timer, &rxrpc_ack_time_expired, (unsigned long) call); setup_timer(&call->resend_timer, &rxrpc_resend_time_expired, (unsigned long) call); - INIT_WORK(&call->destroyer, &rxrpc_destroy_call); INIT_WORK(&call->processor, &rxrpc_process_call); INIT_LIST_HEAD(&call->link); INIT_LIST_HEAD(&call->chan_wait_link); @@ -185,11 +175,9 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx, if (!call) return ERR_PTR(-ENOMEM); call->state = RXRPC_CALL_CLIENT_AWAIT_CONN; - - sock_hold(&rx->sk); - call->socket = rx; call->rx_data_post = 1; call->service_id = srx->srx_service; + rcu_assign_pointer(call->socket, rx); _leave(" = %p", call); return call; @@ -244,8 +232,9 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, return call; } - trace_rxrpc_call(call, 0, atomic_read(&call->usage), 0, here, - (const void *)user_call_ID); + trace_rxrpc_call(call, rxrpc_call_new_client, + atomic_read(&call->usage), 0, + here, (const void *)user_call_ID); /* Publish the call, even though it is incompletely set up as yet */ call->user_call_ID = user_call_ID; @@ -295,8 +284,10 @@ error: list_del_init(&call->link); write_unlock_bh(&rxrpc_call_lock); +error_out: + __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, + RX_CALL_DEAD, ret); set_bit(RXRPC_CALL_RELEASED, &call->flags); - call->state = RXRPC_CALL_DEAD; rxrpc_put_call(call, rxrpc_call_put); _leave(" = %d", ret); return ERR_PTR(ret); @@ -308,11 +299,8 @@ error: */ found_user_ID_now_present: write_unlock(&rx->call_lock); - set_bit(RXRPC_CALL_RELEASED, &call->flags); - call->state = RXRPC_CALL_DEAD; - rxrpc_put_call(call, rxrpc_call_put); - _leave(" = -EEXIST [%p]", call); - return ERR_PTR(-EEXIST); + ret = -EEXIST; + goto error_out; } /* @@ -340,7 +328,6 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx, atomic_read(&candidate->usage), 0, here, NULL); chan = sp->hdr.cid & RXRPC_CHANNELMASK; - candidate->socket = rx; candidate->conn = conn; candidate->peer = conn->params.peer; candidate->cid = sp->hdr.cid; @@ -351,6 +338,7 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx, candidate->flags |= (1 << RXRPC_CALL_IS_SERVICE); if (conn->security_ix > 0) candidate->state = RXRPC_CALL_SERVER_SECURING; + rcu_assign_pointer(candidate->socket, rx); spin_lock(&conn->channel_lock); @@ -411,7 +399,6 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx, candidate = NULL; conn->channels[chan].call_counter = call_id; rcu_assign_pointer(conn->channels[chan].call, call); - sock_hold(&rx->sk); rxrpc_get_connection(conn); rxrpc_get_peer(call->peer); spin_unlock(&conn->channel_lock); @@ -452,6 +439,39 @@ old_call: return ERR_PTR(-ECONNRESET); } +/* + * Queue a call's work processor, getting a ref to pass to the work queue. + */ +bool rxrpc_queue_call(struct rxrpc_call *call) +{ + const void *here = __builtin_return_address(0); + int n = __atomic_add_unless(&call->usage, 1, 0); + int m = atomic_read(&call->skb_count); + if (n == 0) + return false; + if (rxrpc_queue_work(&call->processor)) + trace_rxrpc_call(call, rxrpc_call_queued, n + 1, m, here, NULL); + else + rxrpc_put_call(call, rxrpc_call_put_noqueue); + return true; +} + +/* + * Queue a call's work processor, passing the callers ref to the work queue. + */ +bool __rxrpc_queue_call(struct rxrpc_call *call) +{ + const void *here = __builtin_return_address(0); + int n = atomic_read(&call->usage); + int m = atomic_read(&call->skb_count); + ASSERTCMP(n, >=, 1); + if (rxrpc_queue_work(&call->processor)) + trace_rxrpc_call(call, rxrpc_call_queued_ref, n, m, here, NULL); + else + rxrpc_put_call(call, rxrpc_call_put_noqueue); + return true; +} + /* * Note the re-emergence of a call. */ @@ -493,11 +513,8 @@ void rxrpc_get_call_for_skb(struct rxrpc_call *call, struct sk_buff *skb) /* * detach a call from a socket and set up for release */ -void rxrpc_release_call(struct rxrpc_call *call) +void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) { - struct rxrpc_connection *conn = call->conn; - struct rxrpc_sock *rx = call->socket; - _enter("{%d,%d,%d,%d}", call->debug_id, atomic_read(&call->usage), atomic_read(&call->ackr_not_idle), @@ -513,7 +530,7 @@ void rxrpc_release_call(struct rxrpc_call *call) /* dissociate from the socket * - the socket's ref on the call is passed to the death timer */ - _debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn); + _debug("RELEASE CALL %p (%d)", call, call->debug_id); if (call->peer) { spin_lock(&call->peer->lock); @@ -532,20 +549,30 @@ void rxrpc_release_call(struct rxrpc_call *call) rb_erase(&call->sock_node, &rx->calls); memset(&call->sock_node, 0xdd, sizeof(call->sock_node)); clear_bit(RXRPC_CALL_HAS_USERID, &call->flags); + rxrpc_put_call(call, rxrpc_call_put_userid); } write_unlock_bh(&rx->call_lock); /* free up the channel for reuse */ - write_lock_bh(&call->state_lock); + if (call->state == RXRPC_CALL_CLIENT_FINAL_ACK) { + clear_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events); + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); + rxrpc_call_completed(call); + } else { + write_lock_bh(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE && - call->state != RXRPC_CALL_CLIENT_FINAL_ACK) { - _debug("+++ ABORTING STATE %d +++\n", call->state); - __rxrpc_abort_call(call, RX_CALL_DEAD, ECONNRESET); + if (call->state < RXRPC_CALL_COMPLETE) { + _debug("+++ ABORTING STATE %d +++\n", call->state); + __rxrpc_abort_call(call, RX_CALL_DEAD, ECONNRESET); + clear_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events); + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); + } + + write_unlock_bh(&call->state_lock); } - write_unlock_bh(&call->state_lock); - rxrpc_disconnect_call(call); + if (call->conn) + rxrpc_disconnect_call(call); /* clean up the Rx queue */ if (!skb_queue_empty(&call->rx_queue) || @@ -569,52 +596,15 @@ void rxrpc_release_call(struct rxrpc_call *call) } spin_unlock_bh(&call->lock); } + rxrpc_purge_queue(&call->knlrecv_queue); del_timer_sync(&call->resend_timer); del_timer_sync(&call->ack_timer); del_timer_sync(&call->lifetimer); - call->deadspan.expires = jiffies + rxrpc_dead_call_expiry; - add_timer(&call->deadspan); _leave(""); } -/* - * handle a dead call being ready for reaping - */ -static void rxrpc_dead_call_expired(unsigned long _call) -{ - struct rxrpc_call *call = (struct rxrpc_call *) _call; - - _enter("{%d}", call->debug_id); - - rxrpc_see_call(call); - write_lock_bh(&call->state_lock); - call->state = RXRPC_CALL_DEAD; - write_unlock_bh(&call->state_lock); - rxrpc_put_call(call, rxrpc_call_put); -} - -/* - * mark a call as to be released, aborting it if it's still in progress - * - called with softirqs disabled - */ -static void rxrpc_mark_call_released(struct rxrpc_call *call) -{ - bool sched = false; - - rxrpc_see_call(call); - write_lock(&call->state_lock); - if (call->state < RXRPC_CALL_DEAD) { - sched = __rxrpc_abort_call(call, RX_CALL_DEAD, ECONNRESET); - if (!test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events)) - sched = true; - } - write_unlock(&call->state_lock); - if (sched) - rxrpc_queue_call(call); -} - /* * release all the calls associated with a socket */ @@ -629,17 +619,17 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) /* kill the not-yet-accepted incoming calls */ list_for_each_entry(call, &rx->secureq, accept_link) { - rxrpc_mark_call_released(call); + rxrpc_release_call(rx, call); } list_for_each_entry(call, &rx->acceptq, accept_link) { - rxrpc_mark_call_released(call); + rxrpc_release_call(rx, call); } /* mark all the calls as no longer wanting incoming packets */ for (p = rb_first(&rx->calls); p; p = rb_next(p)) { call = rb_entry(p, struct rxrpc_call, sock_node); - rxrpc_mark_call_released(call); + rxrpc_release_call(rx, call); } read_unlock_bh(&rx->call_lock); @@ -663,8 +653,7 @@ void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op) if (n == 0) { _debug("call %d dead", call->debug_id); WARN_ON(m != 0); - ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD); - rxrpc_queue_work(&call->destroyer); + rxrpc_cleanup_call(call); } } @@ -683,8 +672,7 @@ void rxrpc_put_call_for_skb(struct rxrpc_call *call, struct sk_buff *skb) if (n == 0) { _debug("call %d dead", call->debug_id); WARN_ON(m != 0); - ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD); - rxrpc_queue_work(&call->destroyer); + rxrpc_cleanup_call(call); } } @@ -708,23 +696,19 @@ static void rxrpc_cleanup_call(struct rxrpc_call *call) { _net("DESTROY CALL %d", call->debug_id); - ASSERT(call->socket); + write_lock_bh(&rxrpc_call_lock); + list_del_init(&call->link); + write_unlock_bh(&rxrpc_call_lock); memset(&call->sock_node, 0xcd, sizeof(call->sock_node)); del_timer_sync(&call->lifetimer); - del_timer_sync(&call->deadspan); del_timer_sync(&call->ack_timer); del_timer_sync(&call->resend_timer); + ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags)); - ASSERTCMP(call->events, ==, 0); - if (work_pending(&call->processor)) { - _debug("defer destroy"); - rxrpc_queue_work(&call->destroyer); - return; - } - + ASSERT(!work_pending(&call->processor)); ASSERTCMP(call->conn, ==, NULL); if (call->acks_window) { @@ -753,40 +737,21 @@ static void rxrpc_cleanup_call(struct rxrpc_call *call) rxrpc_purge_queue(&call->rx_queue); ASSERT(skb_queue_empty(&call->rx_oos_queue)); rxrpc_purge_queue(&call->knlrecv_queue); - sock_put(&call->socket->sk); call_rcu(&call->rcu, rxrpc_rcu_destroy_call); } /* - * destroy a call - */ -static void rxrpc_destroy_call(struct work_struct *work) -{ - struct rxrpc_call *call = - container_of(work, struct rxrpc_call, destroyer); - - _enter("%p{%d,%x,%p}", - call, atomic_read(&call->usage), call->cid, call->conn); - - ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD); - - write_lock_bh(&rxrpc_call_lock); - list_del_init(&call->link); - write_unlock_bh(&rxrpc_call_lock); - - rxrpc_cleanup_call(call); - _leave(""); -} - -/* - * preemptively destroy all the call records from a transport endpoint rather - * than waiting for them to time out + * Make sure that all calls are gone. */ void __exit rxrpc_destroy_all_calls(void) { struct rxrpc_call *call; _enter(""); + + if (list_empty(&rxrpc_calls)) + return; + write_lock_bh(&rxrpc_call_lock); while (!list_empty(&rxrpc_calls)) { @@ -796,28 +761,15 @@ void __exit rxrpc_destroy_all_calls(void) rxrpc_see_call(call); list_del_init(&call->link); - switch (atomic_read(&call->usage)) { - case 0: - ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD); - break; - case 1: - if (del_timer_sync(&call->deadspan) != 0 && - call->state != RXRPC_CALL_DEAD) - rxrpc_dead_call_expired((unsigned long) call); - if (call->state != RXRPC_CALL_DEAD) - break; - default: - pr_err("Call %p still in use (%d,%d,%s,%lx,%lx)!\n", - call, atomic_read(&call->usage), - atomic_read(&call->ackr_not_idle), - rxrpc_call_states[call->state], - call->flags, call->events); - if (!skb_queue_empty(&call->rx_queue)) - pr_err("Rx queue occupied\n"); - if (!skb_queue_empty(&call->rx_oos_queue)) - pr_err("OOS queue occupied\n"); - break; - } + pr_err("Call %p still in use (%d,%d,%s,%lx,%lx)!\n", + call, atomic_read(&call->usage), + atomic_read(&call->ackr_not_idle), + rxrpc_call_states[call->state], + call->flags, call->events); + if (!skb_queue_empty(&call->rx_queue)) + pr_err("Rx queue occupied\n"); + if (!skb_queue_empty(&call->rx_oos_queue)) + pr_err("OOS queue occupied\n"); write_unlock_bh(&rxrpc_call_lock); cond_resched(); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 8267f42a7753..79f3f585cdc3 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -39,7 +39,7 @@ int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb, bool force, bool terminal) { struct rxrpc_skb_priv *sp; - struct rxrpc_sock *rx = call->socket; + struct rxrpc_sock *rx; struct sock *sk; int ret; @@ -59,7 +59,15 @@ int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb, return 0; } + /* The socket may go away under us */ + ret = 0; + rcu_read_lock(); + rx = rcu_dereference(call->socket); + if (!rx) + goto out; sk = &rx->sk; + if (sock_flag(sk, SOCK_DEAD)) + goto out; if (!force) { /* cast skb->rcvbuf to unsigned... It's pointless, but @@ -78,7 +86,7 @@ int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb, spin_lock_bh(&sk->sk_receive_queue.lock); if (!test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags) && !test_bit(RXRPC_CALL_RELEASED, &call->flags) && - call->socket->sk.sk_state != RXRPC_CLOSE) { + sk->sk_state != RXRPC_CLOSE) { skb->destructor = rxrpc_packet_destructor; skb->dev = NULL; skb->sk = sk; @@ -104,8 +112,7 @@ int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb, __skb_queue_tail(&sk->sk_receive_queue, skb); spin_unlock_bh(&sk->sk_receive_queue.lock); - if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_data_ready(sk); + sk->sk_data_ready(sk); } skb = NULL; } else { @@ -115,6 +122,7 @@ int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb, out: rxrpc_free_skb(skb); + rcu_read_unlock(); _leave(" = %d", ret); return ret; @@ -266,7 +274,7 @@ enqueue_packet: skb_queue_tail(&call->rx_queue, skb); atomic_inc(&call->ackr_not_idle); read_lock(&call->state_lock); - if (call->state < RXRPC_CALL_DEAD) + if (call->state < RXRPC_CALL_COMPLETE) rxrpc_queue_call(call); read_unlock(&call->state_lock); _leave(" = 0 [queued]"); @@ -408,7 +416,7 @@ void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb) case RXRPC_PACKET_TYPE_ACK: /* ACK processing is done in process context */ read_lock_bh(&call->state_lock); - if (call->state < RXRPC_CALL_DEAD) { + if (call->state < RXRPC_CALL_COMPLETE) { skb_queue_tail(&call->rx_queue, skb); rxrpc_queue_call(call); skb = NULL; @@ -511,9 +519,6 @@ static void rxrpc_post_packet_to_call(struct rxrpc_connection *conn, read_lock(&call->state_lock); switch (call->state) { - case RXRPC_CALL_DEAD: - goto dead_call; - case RXRPC_CALL_COMPLETE: switch (call->completion) { case RXRPC_CALL_LOCALLY_ABORTED: @@ -538,7 +543,6 @@ static void rxrpc_post_packet_to_call(struct rxrpc_connection *conn, } read_unlock(&call->state_lock); - rxrpc_get_call(call, rxrpc_call_got); if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA && sp->hdr.flags & RXRPC_JUMBO_PACKET) @@ -546,12 +550,10 @@ static void rxrpc_post_packet_to_call(struct rxrpc_connection *conn, else rxrpc_fast_process_packet(call, skb); - rxrpc_put_call(call, rxrpc_call_put); goto done; resend_final_ack: _debug("final ack again"); - rxrpc_get_call(call, rxrpc_call_got); set_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events); rxrpc_queue_call(call); goto free_unlock; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 5b5508f6fc2a..8756d74fd74b 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -19,6 +19,151 @@ #include #include "ar-internal.h" +struct rxrpc_pkt_buffer { + struct rxrpc_wire_header whdr; + union { + struct { + struct rxrpc_ackpacket ack; + u8 acks[255]; + u8 pad[3]; + }; + __be32 abort_code; + }; + struct rxrpc_ackinfo ackinfo; +}; + +/* + * Fill out an ACK packet. + */ +static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, + struct rxrpc_pkt_buffer *pkt) +{ + u32 mtu, jmax; + u8 *ackp = pkt->acks; + + pkt->ack.bufferSpace = htons(8); + pkt->ack.maxSkew = htons(0); + pkt->ack.firstPacket = htonl(call->rx_data_eaten + 1); + pkt->ack.previousPacket = htonl(call->ackr_prev_seq); + pkt->ack.serial = htonl(call->ackr_serial); + pkt->ack.reason = RXRPC_ACK_IDLE; + pkt->ack.nAcks = 0; + + mtu = call->peer->if_mtu; + mtu -= call->peer->hdrsize; + jmax = rxrpc_rx_jumbo_max; + pkt->ackinfo.rxMTU = htonl(rxrpc_rx_mtu); + pkt->ackinfo.maxMTU = htonl(mtu); + pkt->ackinfo.rwind = htonl(rxrpc_rx_window_size); + pkt->ackinfo.jumbo_max = htonl(jmax); + + *ackp++ = 0; + *ackp++ = 0; + *ackp++ = 0; + return 3; +} + +/* + * Send a final ACK or ABORT call packet. + */ +int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) +{ + struct rxrpc_connection *conn = NULL; + struct rxrpc_pkt_buffer *pkt; + struct msghdr msg; + struct kvec iov[2]; + rxrpc_serial_t serial; + size_t len, n; + int ioc, ret; + u32 abort_code; + + _enter("%u,%s", call->debug_id, rxrpc_pkts[type]); + + spin_lock_bh(&call->lock); + if (call->conn) + conn = rxrpc_get_connection_maybe(call->conn); + spin_unlock_bh(&call->lock); + if (!conn) + return -ECONNRESET; + + pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); + if (!pkt) { + rxrpc_put_connection(conn); + return -ENOMEM; + } + + serial = atomic_inc_return(&conn->serial); + + msg.msg_name = &call->peer->srx.transport; + msg.msg_namelen = call->peer->srx.transport_len; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + pkt->whdr.epoch = htonl(conn->proto.epoch); + pkt->whdr.cid = htonl(call->cid); + pkt->whdr.callNumber = htonl(call->call_id); + pkt->whdr.seq = 0; + pkt->whdr.serial = htonl(serial); + pkt->whdr.type = type; + pkt->whdr.flags = conn->out_clientflag; + pkt->whdr.userStatus = 0; + pkt->whdr.securityIndex = call->security_ix; + pkt->whdr._rsvd = 0; + pkt->whdr.serviceId = htons(call->service_id); + + iov[0].iov_base = pkt; + iov[0].iov_len = sizeof(pkt->whdr); + len = sizeof(pkt->whdr); + + switch (type) { + case RXRPC_PACKET_TYPE_ACK: + spin_lock_bh(&call->lock); + n = rxrpc_fill_out_ack(call, pkt); + call->ackr_reason = 0; + + spin_unlock_bh(&call->lock); + + _proto("Tx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }", + serial, + ntohs(pkt->ack.maxSkew), + ntohl(pkt->ack.firstPacket), + ntohl(pkt->ack.previousPacket), + ntohl(pkt->ack.serial), + rxrpc_acks(pkt->ack.reason), + pkt->ack.nAcks); + + iov[0].iov_len += sizeof(pkt->ack) + n; + iov[1].iov_base = &pkt->ackinfo; + iov[1].iov_len = sizeof(pkt->ackinfo); + len += sizeof(pkt->ack) + n + sizeof(pkt->ackinfo); + ioc = 2; + break; + + case RXRPC_PACKET_TYPE_ABORT: + abort_code = call->abort_code; + pkt->abort_code = htonl(abort_code); + _proto("Tx ABORT %%%u { %d }", serial, abort_code); + iov[0].iov_len += sizeof(pkt->abort_code); + len += sizeof(pkt->abort_code); + ioc = 1; + break; + + default: + BUG(); + ret = -ENOANO; + goto out; + } + + ret = kernel_sendmsg(conn->params.local->socket, + &msg, iov, ioc, len); + +out: + rxrpc_put_connection(conn); + kfree(pkt); + return ret; +} + /* * send a packet through the transport endpoint */ diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index 82c64055449d..dfad23821a62 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -29,6 +29,7 @@ static const char *const rxrpc_conn_states[RXRPC_CONN__NR_STATES] = { */ static void *rxrpc_call_seq_start(struct seq_file *seq, loff_t *_pos) { + rcu_read_lock(); read_lock(&rxrpc_call_lock); return seq_list_start_head(&rxrpc_calls, *_pos); } @@ -41,6 +42,7 @@ static void *rxrpc_call_seq_next(struct seq_file *seq, void *v, loff_t *pos) static void rxrpc_call_seq_stop(struct seq_file *seq, void *v) { read_unlock(&rxrpc_call_lock); + rcu_read_unlock(); } static int rxrpc_call_seq_show(struct seq_file *seq, void *v) @@ -61,7 +63,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) call = list_entry(v, struct rxrpc_call, link); - rx = READ_ONCE(call->socket); + rx = rcu_dereference(call->socket); if (rx) { local = READ_ONCE(rx->local); if (local) diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 97f8ee76c67c..6876ffb3b410 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -18,28 +18,6 @@ #include #include "ar-internal.h" -/* - * removal a call's user ID from the socket tree to make the user ID available - * again and so that it won't be seen again in association with that call - */ -void rxrpc_remove_user_ID(struct rxrpc_sock *rx, struct rxrpc_call *call) -{ - _debug("RELEASE CALL %d", call->debug_id); - - if (test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) { - write_lock_bh(&rx->call_lock); - rb_erase(&call->sock_node, &call->socket->calls); - clear_bit(RXRPC_CALL_HAS_USERID, &call->flags); - write_unlock_bh(&rx->call_lock); - } - - read_lock_bh(&call->state_lock); - if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) && - !test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events)) - rxrpc_queue_call(call); - read_unlock_bh(&call->state_lock); -} - /* * receive a message from an RxRPC socket * - we need to be careful about two or more threads calling recvmsg @@ -338,7 +316,7 @@ terminal_message: if (skb_dequeue(&rx->sk.sk_receive_queue) != skb) BUG(); rxrpc_free_skb(skb); - rxrpc_remove_user_ID(rx, call); + rxrpc_release_call(rx, call); } release_sock(&rx->sk); diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c index c0613ab6d2d5..9b8f8456d3bf 100644 --- a/net/rxrpc/skbuff.c +++ b/net/rxrpc/skbuff.c @@ -33,9 +33,6 @@ static void rxrpc_request_final_ACK(struct rxrpc_call *call) call->state = RXRPC_CALL_CLIENT_FINAL_ACK; _debug("request final ACK"); - /* get an extra ref on the call for the final-ACK generator to - * release */ - rxrpc_get_call(call, rxrpc_call_got); set_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events); if (try_to_del_timer_sync(&call->ack_timer) >= 0) rxrpc_queue_call(call); diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c index dc380af8a81e..b7ca8cf13c84 100644 --- a/net/rxrpc/sysctl.c +++ b/net/rxrpc/sysctl.c @@ -88,14 +88,6 @@ static struct ctl_table rxrpc_sysctl_table[] = { .proc_handler = proc_dointvec_jiffies, .extra1 = (void *)&one, }, - { - .procname = "dead_call_expiry", - .data = &rxrpc_dead_call_expiry, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - .extra1 = (void *)&one, - }, /* Non-time values */ { From e8d6bbb05aa5cb985c3661d0db4f858f1d251326 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 7 Sep 2016 16:34:12 +0100 Subject: [PATCH 0796/1715] rxrpc: Fix returns of call completion helpers rxrpc_set_call_completion() returns bool, not int, so the ret variable should match this. rxrpc_call_completed() and __rxrpc_call_completed() should return the value of rxrpc_set_call_completion(). Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 3addda4bfa6b..0353399792b6 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -608,7 +608,7 @@ static inline bool rxrpc_set_call_completion(struct rxrpc_call *call, u32 abort_code, int error) { - int ret; + bool ret; write_lock_bh(&call->state_lock); ret = __rxrpc_set_call_completion(call, compl, abort_code, error); @@ -619,16 +619,19 @@ static inline bool rxrpc_set_call_completion(struct rxrpc_call *call, /* * Record that a call successfully completed. */ -static inline void __rxrpc_call_completed(struct rxrpc_call *call) +static inline bool __rxrpc_call_completed(struct rxrpc_call *call) { - __rxrpc_set_call_completion(call, RXRPC_CALL_SUCCEEDED, 0, 0); + return __rxrpc_set_call_completion(call, RXRPC_CALL_SUCCEEDED, 0, 0); } -static inline void rxrpc_call_completed(struct rxrpc_call *call) +static inline bool rxrpc_call_completed(struct rxrpc_call *call) { + bool ret; + write_lock_bh(&call->state_lock); - __rxrpc_call_completed(call); + ret = __rxrpc_call_completed(call); write_unlock_bh(&call->state_lock); + return ret; } /* From 5a42976d4fe5d7fddce133de995c742c87b1b7e3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 6 Sep 2016 22:19:51 +0100 Subject: [PATCH 0797/1715] rxrpc: Add tracepoint for working out where aborts happen Add a tracepoint for working out where local aborts happen. Each tracepoint call is labelled with a 3-letter code so that they can be distinguished - and the DATA sequence number is added too where available. rxrpc_kernel_abort_call() also takes a 3-letter code so that AFS can indicate the circumstances when it aborts a call. Signed-off-by: David Howells --- fs/afs/rxrpc.c | 17 +++--- include/net/af_rxrpc.h | 3 +- include/trace/events/rxrpc.h | 29 ++++++++++ net/rxrpc/ar-internal.h | 14 +++-- net/rxrpc/call_event.c | 7 ++- net/rxrpc/call_object.c | 2 +- net/rxrpc/conn_event.c | 6 ++ net/rxrpc/input.c | 7 ++- net/rxrpc/insecure.c | 19 +++--- net/rxrpc/rxkad.c | 108 ++++++++++++++++------------------- net/rxrpc/sendmsg.c | 18 +++--- 11 files changed, 132 insertions(+), 98 deletions(-) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 37608be52abd..53750dece80e 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -377,7 +377,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, return wait_mode->wait(call); error_do_abort: - rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT); + rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT, -ret, "KSD"); error_kill_call: afs_end_call(call); _leave(" = %d", ret); @@ -425,12 +425,12 @@ static void afs_deliver_to_call(struct afs_call *call) case -ENOTCONN: abort_code = RX_CALL_DEAD; rxrpc_kernel_abort_call(afs_socket, call->rxcall, - abort_code); + abort_code, -ret, "KNC"); goto do_abort; case -ENOTSUPP: abort_code = RX_INVALID_OPERATION; rxrpc_kernel_abort_call(afs_socket, call->rxcall, - abort_code); + abort_code, -ret, "KIV"); goto do_abort; case -ENODATA: case -EBADMSG: @@ -440,7 +440,7 @@ static void afs_deliver_to_call(struct afs_call *call) if (call->state != AFS_CALL_AWAIT_REPLY) abort_code = RXGEN_SS_UNMARSHAL; rxrpc_kernel_abort_call(afs_socket, call->rxcall, - abort_code); + abort_code, EBADMSG, "KUM"); goto do_abort; } } @@ -463,6 +463,7 @@ do_abort: */ static int afs_wait_for_call_to_complete(struct afs_call *call) { + const char *abort_why; int ret; DECLARE_WAITQUEUE(myself, current); @@ -481,9 +482,11 @@ static int afs_wait_for_call_to_complete(struct afs_call *call) continue; } + abort_why = "KWC"; ret = call->error; if (call->state == AFS_CALL_COMPLETE) break; + abort_why = "KWI"; ret = -EINTR; if (signal_pending(current)) break; @@ -497,7 +500,7 @@ static int afs_wait_for_call_to_complete(struct afs_call *call) if (call->state < AFS_CALL_COMPLETE) { _debug("call incomplete"); rxrpc_kernel_abort_call(afs_socket, call->rxcall, - RX_CALL_DEAD); + RX_CALL_DEAD, -ret, abort_why); } _debug("call complete"); @@ -695,7 +698,7 @@ void afs_send_empty_reply(struct afs_call *call) case -ENOMEM: _debug("oom"); rxrpc_kernel_abort_call(afs_socket, call->rxcall, - RX_USER_ABORT); + RX_USER_ABORT, ENOMEM, "KOO"); default: afs_end_call(call); _leave(" [error]"); @@ -734,7 +737,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) if (n == -ENOMEM) { _debug("oom"); rxrpc_kernel_abort_call(afs_socket, call->rxcall, - RX_USER_ABORT); + RX_USER_ABORT, ENOMEM, "KOO"); } afs_end_call(call); _leave(" [error]"); diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index b4b6a3664dda..08ed8729126c 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -35,7 +35,8 @@ int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *, struct msghdr *, size_t); int rxrpc_kernel_recv_data(struct socket *, struct rxrpc_call *, void *, size_t, size_t *, bool, u32 *); -void rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *, u32); +void rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *, + u32, int, const char *); void rxrpc_kernel_end_call(struct socket *, struct rxrpc_call *); struct rxrpc_call *rxrpc_kernel_accept_call(struct socket *, unsigned long, rxrpc_notify_rx_t); diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 30164896f1f6..85ee035774ae 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -84,6 +84,35 @@ TRACE_EVENT(rxrpc_skb, __entry->where) ); +TRACE_EVENT(rxrpc_abort, + TP_PROTO(const char *why, u32 cid, u32 call_id, rxrpc_seq_t seq, + int abort_code, int error), + + TP_ARGS(why, cid, call_id, seq, abort_code, error), + + TP_STRUCT__entry( + __array(char, why, 4 ) + __field(u32, cid ) + __field(u32, call_id ) + __field(rxrpc_seq_t, seq ) + __field(int, abort_code ) + __field(int, error ) + ), + + TP_fast_assign( + memcpy(__entry->why, why, 4); + __entry->cid = cid; + __entry->call_id = call_id; + __entry->abort_code = abort_code; + __entry->error = error; + __entry->seq = seq; + ), + + TP_printk("%08x:%08x s=%u a=%d e=%d %s", + __entry->cid, __entry->call_id, __entry->seq, + __entry->abort_code, __entry->error, __entry->why) + ); + #endif /* _TRACE_RXRPC_H */ /* This part must be outside protection */ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 0353399792b6..dbfb9ed17483 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -155,7 +155,8 @@ struct rxrpc_security { void *); /* verify the security on a received packet */ - int (*verify_packet)(struct rxrpc_call *, struct sk_buff *, u32 *); + int (*verify_packet)(struct rxrpc_call *, struct sk_buff *, + rxrpc_seq_t, u16); /* issue a challenge */ int (*issue_challenge)(struct rxrpc_connection *); @@ -637,9 +638,12 @@ static inline bool rxrpc_call_completed(struct rxrpc_call *call) /* * Record that a call is locally aborted. */ -static inline bool __rxrpc_abort_call(struct rxrpc_call *call, +static inline bool __rxrpc_abort_call(const char *why, struct rxrpc_call *call, + rxrpc_seq_t seq, u32 abort_code, int error) { + trace_rxrpc_abort(why, call->cid, call->call_id, seq, + abort_code, error); if (__rxrpc_set_call_completion(call, RXRPC_CALL_LOCALLY_ABORTED, abort_code, error)) { @@ -649,13 +653,13 @@ static inline bool __rxrpc_abort_call(struct rxrpc_call *call, return false; } -static inline bool rxrpc_abort_call(struct rxrpc_call *call, - u32 abort_code, int error) +static inline bool rxrpc_abort_call(const char *why, struct rxrpc_call *call, + rxrpc_seq_t seq, u32 abort_code, int error) { bool ret; write_lock_bh(&call->state_lock); - ret = __rxrpc_abort_call(call, abort_code, error); + ret = __rxrpc_abort_call(why, call, seq, abort_code, error); write_unlock_bh(&call->state_lock); return ret; } diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 8365d3366114..af88ad7d2cf9 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -598,7 +598,8 @@ process_further: /* secured packets must be verified and possibly decrypted */ if (call->conn->security->verify_packet(call, skb, - _abort_code) < 0) + sp->hdr.seq, + sp->hdr.cksum) < 0) goto protocol_error; rxrpc_insert_oos_packet(call, skb); @@ -982,7 +983,7 @@ skip_msg_init: } if (test_bit(RXRPC_CALL_EV_LIFE_TIMER, &call->events)) { - rxrpc_abort_call(call, RX_CALL_TIMEOUT, ETIME); + rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, ETIME); _debug("post timeout"); if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR, @@ -1005,7 +1006,7 @@ skip_msg_init: case -EKEYEXPIRED: case -EKEYREJECTED: case -EPROTO: - rxrpc_abort_call(call, abort_code, -ret); + rxrpc_abort_call("PRO", call, 0, abort_code, -ret); goto kill_ACKs; } } diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index be5733d55794..9efd9b0b0bdf 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -563,7 +563,7 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) if (call->state < RXRPC_CALL_COMPLETE) { _debug("+++ ABORTING STATE %d +++\n", call->state); - __rxrpc_abort_call(call, RX_CALL_DEAD, ECONNRESET); + __rxrpc_abort_call("SKT", call, 0, RX_CALL_DEAD, ECONNRESET); clear_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events); rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); } diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 9db90f4f768d..8c7938ba6a84 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -158,6 +158,11 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, lockdep_is_held(&conn->channel_lock)); if (call) { rxrpc_see_call(call); + if (compl == RXRPC_CALL_LOCALLY_ABORTED) + trace_rxrpc_abort("CON", call->cid, + call->call_id, 0, + abort_code, error); + write_lock_bh(&call->state_lock); if (rxrpc_set_call_completion(call, compl, abort_code, error)) { @@ -167,6 +172,7 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, write_unlock_bh(&call->state_lock); if (queue) rxrpc_queue_call(call); + } } diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 79f3f585cdc3..8e624109750a 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -429,7 +429,7 @@ protocol_error: _debug("protocol error"); write_lock_bh(&call->state_lock); protocol_error_locked: - if (__rxrpc_abort_call(call, RX_PROTOCOL_ERROR, EPROTO)) + if (__rxrpc_abort_call("FPR", call, 0, RX_PROTOCOL_ERROR, EPROTO)) rxrpc_queue_call(call); free_packet_unlock: write_unlock_bh(&call->state_lock); @@ -495,9 +495,10 @@ static void rxrpc_process_jumbo_packet(struct rxrpc_call *call, protocol_error: _debug("protocol error"); rxrpc_free_skb(part); - rxrpc_free_skb(jumbo); - if (rxrpc_abort_call(call, RX_PROTOCOL_ERROR, EPROTO)) + if (rxrpc_abort_call("PJP", call, sp->hdr.seq, + RX_PROTOCOL_ERROR, EPROTO)) rxrpc_queue_call(call); + rxrpc_free_skb(jumbo); _leave(""); } diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c index c21ad213b337..a4aba0246731 100644 --- a/net/rxrpc/insecure.c +++ b/net/rxrpc/insecure.c @@ -23,31 +23,32 @@ static int none_prime_packet_security(struct rxrpc_connection *conn) } static int none_secure_packet(struct rxrpc_call *call, - struct sk_buff *skb, - size_t data_size, - void *sechdr) + struct sk_buff *skb, + size_t data_size, + void *sechdr) { return 0; } static int none_verify_packet(struct rxrpc_call *call, - struct sk_buff *skb, - u32 *_abort_code) + struct sk_buff *skb, + rxrpc_seq_t seq, + u16 expected_cksum) { return 0; } static int none_respond_to_challenge(struct rxrpc_connection *conn, - struct sk_buff *skb, - u32 *_abort_code) + struct sk_buff *skb, + u32 *_abort_code) { *_abort_code = RX_PROTOCOL_ERROR; return -EPROTO; } static int none_verify_response(struct rxrpc_connection *conn, - struct sk_buff *skb, - u32 *_abort_code) + struct sk_buff *skb, + u32 *_abort_code) { *_abort_code = RX_PROTOCOL_ERROR; return -EPROTO; diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 89f475febfd7..3777432df10b 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -316,12 +316,10 @@ static int rxkad_secure_packet(struct rxrpc_call *call, /* * decrypt partial encryption on a packet (level 1 security) */ -static int rxkad_verify_packet_auth(const struct rxrpc_call *call, - struct sk_buff *skb, - u32 *_abort_code) +static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, + rxrpc_seq_t seq) { struct rxkad_level1_hdr sechdr; - struct rxrpc_skb_priv *sp; SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher); struct rxrpc_crypt iv; struct scatterlist sg[16]; @@ -332,7 +330,10 @@ static int rxkad_verify_packet_auth(const struct rxrpc_call *call, _enter(""); - sp = rxrpc_skb(skb); + if (skb->len < 8) { + rxrpc_abort_call("V1H", call, seq, RXKADSEALEDINCON, EPROTO); + goto protocol_error; + } /* we want to decrypt the skbuff in-place */ nsg = skb_cow_data(skb, 0, &trailer); @@ -351,9 +352,11 @@ static int rxkad_verify_packet_auth(const struct rxrpc_call *call, crypto_skcipher_decrypt(req); skcipher_request_zero(req); - /* remove the decrypted packet length */ - if (skb_copy_bits(skb, 0, &sechdr, sizeof(sechdr)) < 0) - goto datalen_error; + /* Extract the decrypted packet length */ + if (skb_copy_bits(skb, 0, &sechdr, sizeof(sechdr)) < 0) { + rxrpc_abort_call("XV1", call, seq, RXKADDATALEN, EPROTO); + goto protocol_error; + } if (!skb_pull(skb, sizeof(sechdr))) BUG(); @@ -361,24 +364,24 @@ static int rxkad_verify_packet_auth(const struct rxrpc_call *call, data_size = buf & 0xffff; check = buf >> 16; - check ^= sp->hdr.seq ^ sp->hdr.callNumber; + check ^= seq ^ call->call_id; check &= 0xffff; if (check != 0) { - *_abort_code = RXKADSEALEDINCON; + rxrpc_abort_call("V1C", call, seq, RXKADSEALEDINCON, EPROTO); goto protocol_error; } /* shorten the packet to remove the padding */ - if (data_size > skb->len) - goto datalen_error; - else if (data_size < skb->len) + if (data_size > skb->len) { + rxrpc_abort_call("V1L", call, seq, RXKADDATALEN, EPROTO); + goto protocol_error; + } + if (data_size < skb->len) skb->len = data_size; _leave(" = 0 [dlen=%x]", data_size); return 0; -datalen_error: - *_abort_code = RXKADDATALEN; protocol_error: _leave(" = -EPROTO"); return -EPROTO; @@ -391,13 +394,11 @@ nomem: /* * wholly decrypt a packet (level 2 security) */ -static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call, - struct sk_buff *skb, - u32 *_abort_code) +static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, + rxrpc_seq_t seq) { const struct rxrpc_key_token *token; struct rxkad_level2_hdr sechdr; - struct rxrpc_skb_priv *sp; SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher); struct rxrpc_crypt iv; struct scatterlist _sg[4], *sg; @@ -408,7 +409,10 @@ static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call, _enter(",{%d}", skb->len); - sp = rxrpc_skb(skb); + if (skb->len < 8) { + rxrpc_abort_call("V2H", call, seq, RXKADSEALEDINCON, EPROTO); + goto protocol_error; + } /* we want to decrypt the skbuff in-place */ nsg = skb_cow_data(skb, 0, &trailer); @@ -437,9 +441,11 @@ static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call, if (sg != _sg) kfree(sg); - /* remove the decrypted packet length */ - if (skb_copy_bits(skb, 0, &sechdr, sizeof(sechdr)) < 0) - goto datalen_error; + /* Extract the decrypted packet length */ + if (skb_copy_bits(skb, 0, &sechdr, sizeof(sechdr)) < 0) { + rxrpc_abort_call("XV2", call, seq, RXKADDATALEN, EPROTO); + goto protocol_error; + } if (!skb_pull(skb, sizeof(sechdr))) BUG(); @@ -447,24 +453,23 @@ static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call, data_size = buf & 0xffff; check = buf >> 16; - check ^= sp->hdr.seq ^ sp->hdr.callNumber; + check ^= seq ^ call->call_id; check &= 0xffff; if (check != 0) { - *_abort_code = RXKADSEALEDINCON; + rxrpc_abort_call("V2C", call, seq, RXKADSEALEDINCON, EPROTO); goto protocol_error; } - /* shorten the packet to remove the padding */ - if (data_size > skb->len) - goto datalen_error; - else if (data_size < skb->len) + if (data_size > skb->len) { + rxrpc_abort_call("V2L", call, seq, RXKADDATALEN, EPROTO); + goto protocol_error; + } + if (data_size < skb->len) skb->len = data_size; _leave(" = 0 [dlen=%x]", data_size); return 0; -datalen_error: - *_abort_code = RXKADDATALEN; protocol_error: _leave(" = -EPROTO"); return -EPROTO; @@ -475,40 +480,30 @@ nomem: } /* - * verify the security on a received packet + * Verify the security on a received packet or subpacket (if part of a + * jumbo packet). */ -static int rxkad_verify_packet(struct rxrpc_call *call, - struct sk_buff *skb, - u32 *_abort_code) +static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, + rxrpc_seq_t seq, u16 expected_cksum) { SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher); - struct rxrpc_skb_priv *sp; struct rxrpc_crypt iv; struct scatterlist sg; u16 cksum; u32 x, y; - int ret; - - sp = rxrpc_skb(skb); _enter("{%d{%x}},{#%u}", - call->debug_id, key_serial(call->conn->params.key), sp->hdr.seq); + call->debug_id, key_serial(call->conn->params.key), seq); if (!call->conn->cipher) return 0; - if (sp->hdr.securityIndex != RXRPC_SECURITY_RXKAD) { - *_abort_code = RXKADINCONSISTENCY; - _leave(" = -EPROTO [not rxkad]"); - return -EPROTO; - } - /* continue encrypting from where we left off */ memcpy(&iv, call->conn->csum_iv.x, sizeof(iv)); /* validate the security checksum */ x = (call->cid & RXRPC_CHANNELMASK) << (32 - RXRPC_CIDSHIFT); - x |= sp->hdr.seq & 0x3fffffff; + x |= seq & 0x3fffffff; call->crypto_buf[0] = htonl(call->call_id); call->crypto_buf[1] = htonl(x); @@ -524,29 +519,22 @@ static int rxkad_verify_packet(struct rxrpc_call *call, if (cksum == 0) cksum = 1; /* zero checksums are not permitted */ - if (sp->hdr.cksum != cksum) { - *_abort_code = RXKADSEALEDINCON; + if (cksum != expected_cksum) { + rxrpc_abort_call("VCK", call, seq, RXKADSEALEDINCON, EPROTO); _leave(" = -EPROTO [csum failed]"); return -EPROTO; } switch (call->conn->params.security_level) { case RXRPC_SECURITY_PLAIN: - ret = 0; - break; + return 0; case RXRPC_SECURITY_AUTH: - ret = rxkad_verify_packet_auth(call, skb, _abort_code); - break; + return rxkad_verify_packet_1(call, skb, seq); case RXRPC_SECURITY_ENCRYPT: - ret = rxkad_verify_packet_encrypt(call, skb, _abort_code); - break; + return rxkad_verify_packet_2(call, skb, seq); default: - ret = -ENOANO; - break; + return -ENOANO; } - - _leave(" = %d", ret); - return ret; } /* diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 2439aff131c7..9a4af992fcdf 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -454,14 +454,15 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, /* * abort a call, sending an ABORT packet to the peer */ -static void rxrpc_send_abort(struct rxrpc_call *call, u32 abort_code) +static void rxrpc_send_abort(struct rxrpc_call *call, const char *why, + u32 abort_code, int error) { if (call->state >= RXRPC_CALL_COMPLETE) return; write_lock_bh(&call->state_lock); - if (__rxrpc_abort_call(call, abort_code, ECONNABORTED)) { + if (__rxrpc_abort_call(why, call, 0, abort_code, error)) { del_timer_sync(&call->resend_timer); del_timer_sync(&call->ack_timer); clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events); @@ -556,7 +557,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) /* it's too late for this call */ ret = -ESHUTDOWN; } else if (cmd == RXRPC_CMD_SEND_ABORT) { - rxrpc_send_abort(call, abort_code); + rxrpc_send_abort(call, "CMD", abort_code, ECONNABORTED); ret = 0; } else if (cmd != RXRPC_CMD_SEND_DATA) { ret = -EINVAL; @@ -626,20 +627,19 @@ EXPORT_SYMBOL(rxrpc_kernel_send_data); * @sock: The socket the call is on * @call: The call to be aborted * @abort_code: The abort code to stick into the ABORT packet + * @error: Local error value + * @why: 3-char string indicating why. * * Allow a kernel service to abort a call, if it's still in an abortable state. */ void rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call, - u32 abort_code) + u32 abort_code, int error, const char *why) { - _enter("{%d},%d", call->debug_id, abort_code); + _enter("{%d},%d,%d,%s", call->debug_id, abort_code, error, why); lock_sock(sock->sk); - _debug("CALL %d USR %lx ST %d on CONN %p", - call->debug_id, call->user_call_ID, call->state, call->conn); - - rxrpc_send_abort(call, abort_code); + rxrpc_send_abort(call, why, abort_code, error); release_sock(sock->sk); _leave(""); From 733ade23de1b72c1f11c5e4a1a9020a6f48decd2 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Tue, 6 Sep 2016 21:31:17 -0700 Subject: [PATCH 0798/1715] netlink: don't forget to release a rhashtable_iter structure This bug was detected by kmemleak: unreferenced object 0xffff8804269cc3c0 (size 64): comm "criu", pid 1042, jiffies 4294907360 (age 13.713s) hex dump (first 32 bytes): a0 32 cc 2c 04 88 ff ff 00 00 00 00 00 00 00 00 .2.,............ 00 01 00 00 00 00 ad de 00 02 00 00 00 00 ad de ................ backtrace: [] kmemleak_alloc+0x4a/0xa0 [] kmem_cache_alloc_trace+0x10f/0x280 [] __netlink_diag_dump+0x26c/0x290 [netlink_diag] v2: don't remove a reference on a rhashtable_iter structure to release it from netlink_diag_dump_done Cc: Herbert Xu Fixes: ad202074320c ("netlink: Use rhashtable walk interface in diag dump") Signed-off-by: Andrei Vagin Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/netlink/diag.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/netlink/diag.c b/net/netlink/diag.c index 3e3e2534478a..b2f0e986a6f4 100644 --- a/net/netlink/diag.c +++ b/net/netlink/diag.c @@ -127,7 +127,6 @@ stop: goto done; rhashtable_walk_exit(hti); - cb->args[2] = 0; num++; mc_list: From f95bf346226b9b79352e05508beececc807cc37a Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Wed, 7 Sep 2016 13:38:35 +0900 Subject: [PATCH 0799/1715] net: diag: make udp_diag_destroy work for mapped addresses. udp_diag_destroy does look up the IPv4 UDP hashtable for mapped addresses, but it gets the IPv4 address to look up from the beginning of the IPv6 address instead of the end. Tested: https://android-review.googlesource.com/269874 Fixes: 5d77dca82839 ("net: diag: support SOCK_DESTROY for UDP sockets") Signed-off-by: Lorenzo Colitti Acked-by: Eric Dumazet Acked-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/udp_diag.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 8a9f6e535caa..58b79c0c0d69 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -186,8 +186,8 @@ static int __udp_diag_destroy(struct sk_buff *in_skb, if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) && ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src)) sk = __udp4_lib_lookup(net, - req->id.idiag_dst[0], req->id.idiag_dport, - req->id.idiag_src[0], req->id.idiag_sport, + req->id.idiag_dst[3], req->id.idiag_dport, + req->id.idiag_src[3], req->id.idiag_sport, req->id.idiag_if, tbl, NULL); else From 72e8d5fdf58b7d398b31612e63cc376f43c9da1b Mon Sep 17 00:00:00 2001 From: Baoyou Xie Date: Wed, 7 Sep 2016 19:07:00 +0800 Subject: [PATCH 0800/1715] qed: add missing header dependencies We get 4 warnings when building kernel with W=1: drivers/net/ethernet/qlogic/qed/qed_selftest.c:6:5: warning: no previous prototype for 'qed_selftest_memory' [-Wmissing-prototypes] drivers/net/ethernet/qlogic/qed/qed_selftest.c:19:5: warning: no previous prototype for 'qed_selftest_interrupt' [-Wmissing-prototypes] drivers/net/ethernet/qlogic/qed/qed_selftest.c:32:5: warning: no previous prototype for 'qed_selftest_register' [-Wmissing-prototypes] drivers/net/ethernet/qlogic/qed/qed_selftest.c:55:5: warning: no previous prototype for 'qed_selftest_clock' [-Wmissing-prototypes] In fact, these functions are declared in qed_selftest.h, so this patch add missing header dependencies. Signed-off-by: Baoyou Xie Acked-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_selftest.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_selftest.c b/drivers/net/ethernet/qlogic/qed/qed_selftest.c index a342bfe4280d..9b7678f26909 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_selftest.c +++ b/drivers/net/ethernet/qlogic/qed/qed_selftest.c @@ -2,6 +2,7 @@ #include "qed_dev_api.h" #include "qed_mcp.h" #include "qed_sp.h" +#include "qed_selftest.h" int qed_selftest_memory(struct qed_dev *cdev) { From 936f0600de541416ec8d82037e0e277538c9f945 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 7 Sep 2016 15:27:09 +0200 Subject: [PATCH 0801/1715] kaweth: remove obsolete debugging statements SOme statements in the driver only served to inform which functions were entered. Ftrace can do that just as good without needing memory. Remove the statements. Signed-off-by: Oliver Neukum Signed-off-by: David S. Miller --- drivers/net/usb/kaweth.c | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c index 528b9c9c4e60..66b34ddbe216 100644 --- a/drivers/net/usb/kaweth.c +++ b/drivers/net/usb/kaweth.c @@ -265,8 +265,6 @@ static int kaweth_control(struct kaweth_device *kaweth, struct usb_ctrlrequest *dr; int retval; - netdev_dbg(kaweth->net, "kaweth_control()\n"); - if(in_interrupt()) { netdev_dbg(kaweth->net, "in_interrupt()\n"); return -EBUSY; @@ -300,8 +298,6 @@ static int kaweth_read_configuration(struct kaweth_device *kaweth) { int retval; - netdev_dbg(kaweth->net, "Reading kaweth configuration\n"); - retval = kaweth_control(kaweth, usb_rcvctrlpipe(kaweth->dev, 0), KAWETH_COMMAND_GET_ETHERNET_DESC, @@ -451,8 +447,6 @@ static int kaweth_trigger_firmware(struct kaweth_device *kaweth, kaweth->firmware_buf[6] = 0x00; kaweth->firmware_buf[7] = 0x00; - netdev_dbg(kaweth->net, "Triggering firmware\n"); - return kaweth_control(kaweth, usb_sndctrlpipe(kaweth->dev, 0), KAWETH_COMMAND_SCAN, @@ -471,7 +465,6 @@ static int kaweth_reset(struct kaweth_device *kaweth) { int result; - netdev_dbg(kaweth->net, "kaweth_reset(%p)\n", kaweth); result = usb_reset_configuration(kaweth->dev); mdelay(10); @@ -685,8 +678,6 @@ static int kaweth_open(struct net_device *net) struct kaweth_device *kaweth = netdev_priv(net); int res; - netdev_dbg(kaweth->net, "Opening network device.\n"); - res = usb_autopm_get_interface(kaweth->intf); if (res) { dev_err(&kaweth->intf->dev, "Interface cannot be resumed.\n"); @@ -951,7 +942,6 @@ static int kaweth_suspend(struct usb_interface *intf, pm_message_t message) struct kaweth_device *kaweth = usb_get_intfdata(intf); unsigned long flags; - dev_dbg(&intf->dev, "Suspending device\n"); spin_lock_irqsave(&kaweth->device_lock, flags); kaweth->status |= KAWETH_STATUS_SUSPENDING; spin_unlock_irqrestore(&kaweth->device_lock, flags); @@ -968,7 +958,6 @@ static int kaweth_resume(struct usb_interface *intf) struct kaweth_device *kaweth = usb_get_intfdata(intf); unsigned long flags; - dev_dbg(&intf->dev, "Resuming device\n"); spin_lock_irqsave(&kaweth->device_lock, flags); kaweth->status &= ~KAWETH_STATUS_SUSPENDING; spin_unlock_irqrestore(&kaweth->device_lock, flags); @@ -1190,8 +1179,6 @@ err_fw: dev_info(dev, "kaweth interface created at %s\n", kaweth->net->name); - dev_dbg(dev, "Kaweth probe returning.\n"); - return 0; err_intfdata: @@ -1219,8 +1206,6 @@ static void kaweth_disconnect(struct usb_interface *intf) struct kaweth_device *kaweth = usb_get_intfdata(intf); struct net_device *netdev; - dev_info(&intf->dev, "Unregistering\n"); - usb_set_intfdata(intf, NULL); if (!kaweth) { dev_warn(&intf->dev, "unregistering non-existent device\n"); From c965db44462919f613973aa618271f6c3f5a1e64 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Wed, 7 Sep 2016 16:36:24 +0300 Subject: [PATCH 0802/1715] qed: Add support for debug data collection This patch adds the support for dumping and formatting the HW/FW debug data. Signed-off-by: Tomer Tayar Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/Makefile | 2 +- drivers/net/ethernet/qlogic/qed/qed.h | 20 + drivers/net/ethernet/qlogic/qed/qed_debug.c | 6898 +++++++++++++++++ drivers/net/ethernet/qlogic/qed/qed_debug.h | 54 + drivers/net/ethernet/qlogic/qed/qed_hsi.h | 1056 ++- drivers/net/ethernet/qlogic/qed/qed_main.c | 4 + .../net/ethernet/qlogic/qed/qed_reg_addr.h | 894 +++ include/linux/qed/common_hsi.h | 3 + 8 files changed, 8919 insertions(+), 12 deletions(-) create mode 100644 drivers/net/ethernet/qlogic/qed/qed_debug.c create mode 100644 drivers/net/ethernet/qlogic/qed/qed_debug.h diff --git a/drivers/net/ethernet/qlogic/qed/Makefile b/drivers/net/ethernet/qlogic/qed/Makefile index d1f157e439cf..86a5b4f5f870 100644 --- a/drivers/net/ethernet/qlogic/qed/Makefile +++ b/drivers/net/ethernet/qlogic/qed/Makefile @@ -2,5 +2,5 @@ obj-$(CONFIG_QED) := qed.o qed-y := qed_cxt.o qed_dev.o qed_hw.o qed_init_fw_funcs.o qed_init_ops.o \ qed_int.o qed_main.o qed_mcp.o qed_sp_commands.o qed_spq.o qed_l2.o \ - qed_selftest.o qed_dcbx.o + qed_selftest.o qed_dcbx.o qed_debug.o qed-$(CONFIG_QED_SRIOV) += qed_sriov.o qed_vf.o diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index 2d67469eb8f6..0929582fc82b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -23,6 +23,7 @@ #include #include #include +#include "qed_debug.h" #include "qed_hsi.h" extern const struct qed_common_ops qed_common_ops_pass; @@ -395,6 +396,8 @@ struct qed_hwfn { /* Buffer for unzipping firmware data */ void *unzip_buf; + struct dbg_tools_data dbg_info; + struct qed_simd_fp_handler simd_proto_handler[64]; #ifdef CONFIG_QED_SRIOV @@ -430,6 +433,19 @@ struct qed_int_params { u8 fp_msix_cnt; }; +struct qed_dbg_feature { + struct dentry *dentry; + u8 *dump_buf; + u32 buf_size; + u32 dumped_dwords; +}; + +struct qed_dbg_params { + struct qed_dbg_feature features[DBG_FEATURE_NUM]; + u8 engine_for_debug; + bool print_data; +}; + struct qed_dev { u32 dp_module; u8 dp_level; @@ -444,6 +460,8 @@ struct qed_dev { CHIP_REV_IS_A0(dev)) #define QED_IS_BB_B0(dev) (QED_IS_BB(dev) && \ CHIP_REV_IS_B0(dev)) +#define QED_IS_AH(dev) ((dev)->type == QED_DEV_TYPE_AH) +#define QED_IS_K2(dev) QED_IS_AH(dev) #define QED_GET_TYPE(dev) (QED_IS_BB_A0(dev) ? CHIP_BB_A0 : \ QED_IS_BB_B0(dev) ? CHIP_BB_B0 : CHIP_K2) @@ -544,6 +562,8 @@ struct qed_dev { } protocol_ops; void *ops_cookie; + struct qed_dbg_params dbg_params; + const struct firmware *firmware; }; diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c new file mode 100644 index 000000000000..88e7d5bef909 --- /dev/null +++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c @@ -0,0 +1,6898 @@ +/* QLogic qed NIC Driver + * Copyright (c) 2015 QLogic Corporation + * + * This software is available under the terms of the GNU General Public License + * (GPL) Version 2, available from the file COPYING in the main directory of + * this source tree. + */ + +#include +#include +#include +#include "qed.h" +#include "qed_hsi.h" +#include "qed_hw.h" +#include "qed_mcp.h" +#include "qed_reg_addr.h" + +/* Chip IDs enum */ +enum chip_ids { + CHIP_RESERVED, + CHIP_BB_B0, + CHIP_K2, + MAX_CHIP_IDS +}; + +/* Memory groups enum */ +enum mem_groups { + MEM_GROUP_PXP_MEM, + MEM_GROUP_DMAE_MEM, + MEM_GROUP_CM_MEM, + MEM_GROUP_QM_MEM, + MEM_GROUP_TM_MEM, + MEM_GROUP_BRB_RAM, + MEM_GROUP_BRB_MEM, + MEM_GROUP_PRS_MEM, + MEM_GROUP_SDM_MEM, + MEM_GROUP_PBUF, + MEM_GROUP_IOR, + MEM_GROUP_RAM, + MEM_GROUP_BTB_RAM, + MEM_GROUP_RDIF_CTX, + MEM_GROUP_TDIF_CTX, + MEM_GROUP_CONN_CFC_MEM, + MEM_GROUP_TASK_CFC_MEM, + MEM_GROUP_CAU_PI, + MEM_GROUP_CAU_MEM, + MEM_GROUP_PXP_ILT, + MEM_GROUP_MULD_MEM, + MEM_GROUP_BTB_MEM, + MEM_GROUP_IGU_MEM, + MEM_GROUP_IGU_MSIX, + MEM_GROUP_CAU_SB, + MEM_GROUP_BMB_RAM, + MEM_GROUP_BMB_MEM, + MEM_GROUPS_NUM +}; + +/* Memory groups names */ +static const char * const s_mem_group_names[] = { + "PXP_MEM", + "DMAE_MEM", + "CM_MEM", + "QM_MEM", + "TM_MEM", + "BRB_RAM", + "BRB_MEM", + "PRS_MEM", + "SDM_MEM", + "PBUF", + "IOR", + "RAM", + "BTB_RAM", + "RDIF_CTX", + "TDIF_CTX", + "CONN_CFC_MEM", + "TASK_CFC_MEM", + "CAU_PI", + "CAU_MEM", + "PXP_ILT", + "MULD_MEM", + "BTB_MEM", + "IGU_MEM", + "IGU_MSIX", + "CAU_SB", + "BMB_RAM", + "BMB_MEM", +}; + +/* Idle check conditions */ +static u32 cond4(const u32 *r, const u32 *imm) +{ + return ((r[0] & imm[0]) != imm[1]) && ((r[1] & imm[2]) != imm[3]); +} + +static u32 cond6(const u32 *r, const u32 *imm) +{ + return ((r[0] >> imm[0]) & imm[1]) != imm[2]; +} + +static u32 cond5(const u32 *r, const u32 *imm) +{ + return (r[0] & imm[0]) != imm[1]; +} + +static u32 cond8(const u32 *r, const u32 *imm) +{ + return ((r[0] & imm[0]) >> imm[1]) != + (((r[0] & imm[2]) >> imm[3]) | ((r[1] & imm[4]) << imm[5])); +} + +static u32 cond9(const u32 *r, const u32 *imm) +{ + return ((r[0] & imm[0]) >> imm[1]) != (r[0] & imm[2]); +} + +static u32 cond1(const u32 *r, const u32 *imm) +{ + return (r[0] & ~imm[0]) != imm[1]; +} + +static u32 cond0(const u32 *r, const u32 *imm) +{ + return r[0] != imm[0]; +} + +static u32 cond10(const u32 *r, const u32 *imm) +{ + return r[0] != r[1] && r[2] == imm[0]; +} + +static u32 cond11(const u32 *r, const u32 *imm) +{ + return r[0] != r[1] && r[2] > imm[0]; +} + +static u32 cond3(const u32 *r, const u32 *imm) +{ + return r[0] != r[1]; +} + +static u32 cond12(const u32 *r, const u32 *imm) +{ + return r[0] & imm[0]; +} + +static u32 cond7(const u32 *r, const u32 *imm) +{ + return r[0] < (r[1] - imm[0]); +} + +static u32 cond2(const u32 *r, const u32 *imm) +{ + return r[0] > imm[0]; +} + +/* Array of Idle Check conditions */ +static u32(*cond_arr[]) (const u32 *r, const u32 *imm) = { + cond0, + cond1, + cond2, + cond3, + cond4, + cond5, + cond6, + cond7, + cond8, + cond9, + cond10, + cond11, + cond12, +}; + +/******************************* Data Types **********************************/ + +enum platform_ids { + PLATFORM_ASIC, + PLATFORM_RESERVED, + PLATFORM_RESERVED2, + PLATFORM_RESERVED3, + MAX_PLATFORM_IDS +}; + +struct dbg_array { + const u32 *ptr; + u32 size_in_dwords; +}; + +/* Chip constant definitions */ +struct chip_defs { + const char *name; + struct { + u8 num_ports; + u8 num_pfs; + } per_platform[MAX_PLATFORM_IDS]; +}; + +/* Platform constant definitions */ +struct platform_defs { + const char *name; + u32 delay_factor; +}; + +/* Storm constant definitions */ +struct storm_defs { + char letter; + enum block_id block_id; + enum dbg_bus_clients dbg_client_id[MAX_CHIP_IDS]; + bool has_vfc; + u32 sem_fast_mem_addr; + u32 sem_frame_mode_addr; + u32 sem_slow_enable_addr; + u32 sem_slow_mode_addr; + u32 sem_slow_mode1_conf_addr; + u32 sem_sync_dbg_empty_addr; + u32 sem_slow_dbg_empty_addr; + u32 cm_ctx_wr_addr; + u32 cm_conn_ag_ctx_lid_size; /* In quad-regs */ + u32 cm_conn_ag_ctx_rd_addr; + u32 cm_conn_st_ctx_lid_size; /* In quad-regs */ + u32 cm_conn_st_ctx_rd_addr; + u32 cm_task_ag_ctx_lid_size; /* In quad-regs */ + u32 cm_task_ag_ctx_rd_addr; + u32 cm_task_st_ctx_lid_size; /* In quad-regs */ + u32 cm_task_st_ctx_rd_addr; +}; + +/* Block constant definitions */ +struct block_defs { + const char *name; + bool has_dbg_bus[MAX_CHIP_IDS]; + bool associated_to_storm; + u32 storm_id; /* Valid only if associated_to_storm is true */ + enum dbg_bus_clients dbg_client_id[MAX_CHIP_IDS]; + u32 dbg_select_addr; + u32 dbg_cycle_enable_addr; + u32 dbg_shift_addr; + u32 dbg_force_valid_addr; + u32 dbg_force_frame_addr; + bool has_reset_bit; + bool unreset; /* If true, the block is taken out of reset before dump */ + enum dbg_reset_regs reset_reg; + u8 reset_bit_offset; /* Bit offset in reset register */ +}; + +/* Reset register definitions */ +struct reset_reg_defs { + u32 addr; + u32 unreset_val; + bool exists[MAX_CHIP_IDS]; +}; + +struct grc_param_defs { + u32 default_val[MAX_CHIP_IDS]; + u32 min; + u32 max; + bool is_preset; + u32 exclude_all_preset_val; + u32 crash_preset_val; +}; + +struct rss_mem_defs { + const char *mem_name; + const char *type_name; + u32 addr; /* In 128b units */ + u32 num_entries[MAX_CHIP_IDS]; + u32 entry_width[MAX_CHIP_IDS]; /* In bits */ +}; + +struct vfc_ram_defs { + const char *mem_name; + const char *type_name; + u32 base_row; + u32 num_rows; +}; + +struct big_ram_defs { + const char *instance_name; + enum mem_groups mem_group_id; + enum mem_groups ram_mem_group_id; + enum dbg_grc_params grc_param; + u32 addr_reg_addr; + u32 data_reg_addr; + u32 num_of_blocks[MAX_CHIP_IDS]; +}; + +struct phy_defs { + const char *phy_name; + u32 base_addr; + u32 tbus_addr_lo_addr; + u32 tbus_addr_hi_addr; + u32 tbus_data_lo_addr; + u32 tbus_data_hi_addr; +}; + +/******************************** Constants **********************************/ + +#define MAX_LCIDS 320 +#define MAX_LTIDS 320 +#define NUM_IOR_SETS 2 +#define IORS_PER_SET 176 +#define IOR_SET_OFFSET(set_id) ((set_id) * 256) +#define BYTES_IN_DWORD sizeof(u32) + +/* In the macros below, size and offset are specified in bits */ +#define CEIL_DWORDS(size) DIV_ROUND_UP(size, 32) +#define FIELD_BIT_OFFSET(type, field) type ## _ ## field ## _ ## OFFSET +#define FIELD_BIT_SIZE(type, field) type ## _ ## field ## _ ## SIZE +#define FIELD_DWORD_OFFSET(type, field) \ + (int)(FIELD_BIT_OFFSET(type, field) / 32) +#define FIELD_DWORD_SHIFT(type, field) (FIELD_BIT_OFFSET(type, field) % 32) +#define FIELD_BIT_MASK(type, field) \ + (((1 << FIELD_BIT_SIZE(type, field)) - 1) << \ + FIELD_DWORD_SHIFT(type, field)) +#define SET_VAR_FIELD(var, type, field, val) \ + do { \ + var[FIELD_DWORD_OFFSET(type, field)] &= \ + (~FIELD_BIT_MASK(type, field)); \ + var[FIELD_DWORD_OFFSET(type, field)] |= \ + (val) << FIELD_DWORD_SHIFT(type, field); \ + } while (0) +#define ARR_REG_WR(dev, ptt, addr, arr, arr_size) \ + do { \ + for (i = 0; i < (arr_size); i++) \ + qed_wr(dev, ptt, addr, (arr)[i]); \ + } while (0) +#define ARR_REG_RD(dev, ptt, addr, arr, arr_size) \ + do { \ + for (i = 0; i < (arr_size); i++) \ + (arr)[i] = qed_rd(dev, ptt, addr); \ + } while (0) + +#define DWORDS_TO_BYTES(dwords) ((dwords) * BYTES_IN_DWORD) +#define BYTES_TO_DWORDS(bytes) ((bytes) / BYTES_IN_DWORD) +#define RAM_LINES_TO_DWORDS(lines) ((lines) * 2) +#define RAM_LINES_TO_BYTES(lines) \ + DWORDS_TO_BYTES(RAM_LINES_TO_DWORDS(lines)) +#define REG_DUMP_LEN_SHIFT 24 +#define MEM_DUMP_ENTRY_SIZE_DWORDS \ + BYTES_TO_DWORDS(sizeof(struct dbg_dump_mem)) +#define IDLE_CHK_RULE_SIZE_DWORDS \ + BYTES_TO_DWORDS(sizeof(struct dbg_idle_chk_rule)) +#define IDLE_CHK_RESULT_HDR_DWORDS \ + BYTES_TO_DWORDS(sizeof(struct dbg_idle_chk_result_hdr)) +#define IDLE_CHK_RESULT_REG_HDR_DWORDS \ + BYTES_TO_DWORDS(sizeof(struct dbg_idle_chk_result_reg_hdr)) +#define IDLE_CHK_MAX_ENTRIES_SIZE 32 + +/* The sizes and offsets below are specified in bits */ +#define VFC_CAM_CMD_STRUCT_SIZE 64 +#define VFC_CAM_CMD_ROW_OFFSET 48 +#define VFC_CAM_CMD_ROW_SIZE 9 +#define VFC_CAM_ADDR_STRUCT_SIZE 16 +#define VFC_CAM_ADDR_OP_OFFSET 0 +#define VFC_CAM_ADDR_OP_SIZE 4 +#define VFC_CAM_RESP_STRUCT_SIZE 256 +#define VFC_RAM_ADDR_STRUCT_SIZE 16 +#define VFC_RAM_ADDR_OP_OFFSET 0 +#define VFC_RAM_ADDR_OP_SIZE 2 +#define VFC_RAM_ADDR_ROW_OFFSET 2 +#define VFC_RAM_ADDR_ROW_SIZE 10 +#define VFC_RAM_RESP_STRUCT_SIZE 256 +#define VFC_CAM_CMD_DWORDS CEIL_DWORDS(VFC_CAM_CMD_STRUCT_SIZE) +#define VFC_CAM_ADDR_DWORDS CEIL_DWORDS(VFC_CAM_ADDR_STRUCT_SIZE) +#define VFC_CAM_RESP_DWORDS CEIL_DWORDS(VFC_CAM_RESP_STRUCT_SIZE) +#define VFC_RAM_CMD_DWORDS VFC_CAM_CMD_DWORDS +#define VFC_RAM_ADDR_DWORDS CEIL_DWORDS(VFC_RAM_ADDR_STRUCT_SIZE) +#define VFC_RAM_RESP_DWORDS CEIL_DWORDS(VFC_RAM_RESP_STRUCT_SIZE) +#define NUM_VFC_RAM_TYPES 4 +#define VFC_CAM_NUM_ROWS 512 +#define VFC_OPCODE_CAM_RD 14 +#define VFC_OPCODE_RAM_RD 0 +#define NUM_RSS_MEM_TYPES 5 +#define NUM_BIG_RAM_TYPES 3 +#define BIG_RAM_BLOCK_SIZE_BYTES 128 +#define BIG_RAM_BLOCK_SIZE_DWORDS \ + BYTES_TO_DWORDS(BIG_RAM_BLOCK_SIZE_BYTES) +#define NUM_PHY_TBUS_ADDRESSES 2048 +#define PHY_DUMP_SIZE_DWORDS (NUM_PHY_TBUS_ADDRESSES / 2) +#define RESET_REG_UNRESET_OFFSET 4 +#define STALL_DELAY_MS 500 +#define STATIC_DEBUG_LINE_DWORDS 9 +#define NUM_DBG_BUS_LINES 256 +#define NUM_COMMON_GLOBAL_PARAMS 8 +#define FW_IMG_MAIN 1 +#define REG_FIFO_DEPTH_ELEMENTS 32 +#define REG_FIFO_ELEMENT_DWORDS 2 +#define REG_FIFO_DEPTH_DWORDS \ + (REG_FIFO_ELEMENT_DWORDS * REG_FIFO_DEPTH_ELEMENTS) +#define IGU_FIFO_DEPTH_ELEMENTS 64 +#define IGU_FIFO_ELEMENT_DWORDS 4 +#define IGU_FIFO_DEPTH_DWORDS \ + (IGU_FIFO_ELEMENT_DWORDS * IGU_FIFO_DEPTH_ELEMENTS) +#define PROTECTION_OVERRIDE_DEPTH_ELEMENTS 20 +#define PROTECTION_OVERRIDE_ELEMENT_DWORDS 2 +#define PROTECTION_OVERRIDE_DEPTH_DWORDS \ + (PROTECTION_OVERRIDE_DEPTH_ELEMENTS * \ + PROTECTION_OVERRIDE_ELEMENT_DWORDS) +#define MCP_SPAD_TRACE_OFFSIZE_ADDR \ + (MCP_REG_SCRATCH + \ + offsetof(struct static_init, sections[SPAD_SECTION_TRACE])) +#define MCP_TRACE_META_IMAGE_SIGNATURE 0x669955aa +#define EMPTY_FW_VERSION_STR "???_???_???_???" +#define EMPTY_FW_IMAGE_STR "???????????????" + +/***************************** Constant Arrays *******************************/ + +/* Debug arrays */ +static struct dbg_array s_dbg_arrays[MAX_BIN_DBG_BUFFER_TYPE] = { {0} }; + +/* Chip constant definitions array */ +static struct chip_defs s_chip_defs[MAX_CHIP_IDS] = { + { "reserved", { {0, 0}, {0, 0}, {0, 0}, {0, 0} } }, + { "bb_b0", + { {MAX_NUM_PORTS_BB, MAX_NUM_PFS_BB}, {0, 0}, {0, 0}, {0, 0} } }, + { "k2", { {MAX_NUM_PORTS_K2, MAX_NUM_PFS_K2}, {0, 0}, {0, 0}, {0, 0} } } +}; + +/* Storm constant definitions array */ +static struct storm_defs s_storm_defs[] = { + /* Tstorm */ + {'T', BLOCK_TSEM, + {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, + DBG_BUS_CLIENT_RBCT}, true, + TSEM_REG_FAST_MEMORY, + TSEM_REG_DBG_FRAME_MODE, TSEM_REG_SLOW_DBG_ACTIVE, + TSEM_REG_SLOW_DBG_MODE, TSEM_REG_DBG_MODE1_CFG, + TSEM_REG_SYNC_DBG_EMPTY, TSEM_REG_SLOW_DBG_EMPTY, + TCM_REG_CTX_RBC_ACCS, + 4, TCM_REG_AGG_CON_CTX, + 16, TCM_REG_SM_CON_CTX, + 2, TCM_REG_AGG_TASK_CTX, + 4, TCM_REG_SM_TASK_CTX}, + /* Mstorm */ + {'M', BLOCK_MSEM, + {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, + DBG_BUS_CLIENT_RBCM}, false, + MSEM_REG_FAST_MEMORY, + MSEM_REG_DBG_FRAME_MODE, MSEM_REG_SLOW_DBG_ACTIVE, + MSEM_REG_SLOW_DBG_MODE, MSEM_REG_DBG_MODE1_CFG, + MSEM_REG_SYNC_DBG_EMPTY, MSEM_REG_SLOW_DBG_EMPTY, + MCM_REG_CTX_RBC_ACCS, + 1, MCM_REG_AGG_CON_CTX, + 10, MCM_REG_SM_CON_CTX, + 2, MCM_REG_AGG_TASK_CTX, + 7, MCM_REG_SM_TASK_CTX}, + /* Ustorm */ + {'U', BLOCK_USEM, + {DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU, + DBG_BUS_CLIENT_RBCU}, false, + USEM_REG_FAST_MEMORY, + USEM_REG_DBG_FRAME_MODE, USEM_REG_SLOW_DBG_ACTIVE, + USEM_REG_SLOW_DBG_MODE, USEM_REG_DBG_MODE1_CFG, + USEM_REG_SYNC_DBG_EMPTY, USEM_REG_SLOW_DBG_EMPTY, + UCM_REG_CTX_RBC_ACCS, + 2, UCM_REG_AGG_CON_CTX, + 13, UCM_REG_SM_CON_CTX, + 3, UCM_REG_AGG_TASK_CTX, + 3, UCM_REG_SM_TASK_CTX}, + /* Xstorm */ + {'X', BLOCK_XSEM, + {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, + DBG_BUS_CLIENT_RBCX}, false, + XSEM_REG_FAST_MEMORY, + XSEM_REG_DBG_FRAME_MODE, XSEM_REG_SLOW_DBG_ACTIVE, + XSEM_REG_SLOW_DBG_MODE, XSEM_REG_DBG_MODE1_CFG, + XSEM_REG_SYNC_DBG_EMPTY, XSEM_REG_SLOW_DBG_EMPTY, + XCM_REG_CTX_RBC_ACCS, + 9, XCM_REG_AGG_CON_CTX, + 15, XCM_REG_SM_CON_CTX, + 0, 0, + 0, 0}, + /* Ystorm */ + {'Y', BLOCK_YSEM, + {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, + DBG_BUS_CLIENT_RBCY}, false, + YSEM_REG_FAST_MEMORY, + YSEM_REG_DBG_FRAME_MODE, YSEM_REG_SLOW_DBG_ACTIVE, + YSEM_REG_SLOW_DBG_MODE, YSEM_REG_DBG_MODE1_CFG, + YSEM_REG_SYNC_DBG_EMPTY, TSEM_REG_SLOW_DBG_EMPTY, + YCM_REG_CTX_RBC_ACCS, + 2, YCM_REG_AGG_CON_CTX, + 3, YCM_REG_SM_CON_CTX, + 2, YCM_REG_AGG_TASK_CTX, + 12, YCM_REG_SM_TASK_CTX}, + /* Pstorm */ + {'P', BLOCK_PSEM, + {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, + DBG_BUS_CLIENT_RBCS}, true, + PSEM_REG_FAST_MEMORY, + PSEM_REG_DBG_FRAME_MODE, PSEM_REG_SLOW_DBG_ACTIVE, + PSEM_REG_SLOW_DBG_MODE, PSEM_REG_DBG_MODE1_CFG, + PSEM_REG_SYNC_DBG_EMPTY, PSEM_REG_SLOW_DBG_EMPTY, + PCM_REG_CTX_RBC_ACCS, + 0, 0, + 10, PCM_REG_SM_CON_CTX, + 0, 0, + 0, 0} +}; + +/* Block definitions array */ +static struct block_defs block_grc_defs = { + "grc", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCN, DBG_BUS_CLIENT_RBCN, DBG_BUS_CLIENT_RBCN}, + GRC_REG_DBG_SELECT, GRC_REG_DBG_DWORD_ENABLE, + GRC_REG_DBG_SHIFT, GRC_REG_DBG_FORCE_VALID, + GRC_REG_DBG_FORCE_FRAME, + true, false, DBG_RESET_REG_MISC_PL_UA, 1 +}; + +static struct block_defs block_miscs_defs = { + "miscs", {false, false, false}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS}, + 0, 0, 0, 0, 0, + false, false, MAX_DBG_RESET_REGS, 0 +}; + +static struct block_defs block_misc_defs = { + "misc", {false, false, false}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS}, + 0, 0, 0, 0, 0, + false, false, MAX_DBG_RESET_REGS, 0 +}; + +static struct block_defs block_dbu_defs = { + "dbu", {false, false, false}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS}, + 0, 0, 0, 0, 0, + false, false, MAX_DBG_RESET_REGS, 0 +}; + +static struct block_defs block_pglue_b_defs = { + "pglue_b", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCH, DBG_BUS_CLIENT_RBCH, DBG_BUS_CLIENT_RBCH}, + PGLUE_B_REG_DBG_SELECT, PGLUE_B_REG_DBG_DWORD_ENABLE, + PGLUE_B_REG_DBG_SHIFT, PGLUE_B_REG_DBG_FORCE_VALID, + PGLUE_B_REG_DBG_FORCE_FRAME, + true, false, DBG_RESET_REG_MISCS_PL_HV, 1 +}; + +static struct block_defs block_cnig_defs = { + "cnig", {false, false, true}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCW}, + CNIG_REG_DBG_SELECT_K2, CNIG_REG_DBG_DWORD_ENABLE_K2, + CNIG_REG_DBG_SHIFT_K2, CNIG_REG_DBG_FORCE_VALID_K2, + CNIG_REG_DBG_FORCE_FRAME_K2, + true, false, DBG_RESET_REG_MISCS_PL_HV, 0 +}; + +static struct block_defs block_cpmu_defs = { + "cpmu", {false, false, false}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS}, + 0, 0, 0, 0, 0, + true, false, DBG_RESET_REG_MISCS_PL_HV, 8 +}; + +static struct block_defs block_ncsi_defs = { + "ncsi", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCZ, DBG_BUS_CLIENT_RBCZ, DBG_BUS_CLIENT_RBCZ}, + NCSI_REG_DBG_SELECT, NCSI_REG_DBG_DWORD_ENABLE, + NCSI_REG_DBG_SHIFT, NCSI_REG_DBG_FORCE_VALID, + NCSI_REG_DBG_FORCE_FRAME, + true, false, DBG_RESET_REG_MISCS_PL_HV, 5 +}; + +static struct block_defs block_opte_defs = { + "opte", {false, false, false}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS}, + 0, 0, 0, 0, 0, + true, false, DBG_RESET_REG_MISCS_PL_HV, 4 +}; + +static struct block_defs block_bmb_defs = { + "bmb", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCZ, DBG_BUS_CLIENT_RBCZ, DBG_BUS_CLIENT_RBCB}, + BMB_REG_DBG_SELECT, BMB_REG_DBG_DWORD_ENABLE, + BMB_REG_DBG_SHIFT, BMB_REG_DBG_FORCE_VALID, + BMB_REG_DBG_FORCE_FRAME, + true, false, DBG_RESET_REG_MISCS_PL_UA, 7 +}; + +static struct block_defs block_pcie_defs = { + "pcie", {false, false, true}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCH}, + PCIE_REG_DBG_COMMON_SELECT, PCIE_REG_DBG_COMMON_DWORD_ENABLE, + PCIE_REG_DBG_COMMON_SHIFT, PCIE_REG_DBG_COMMON_FORCE_VALID, + PCIE_REG_DBG_COMMON_FORCE_FRAME, + false, false, MAX_DBG_RESET_REGS, 0 +}; + +static struct block_defs block_mcp_defs = { + "mcp", {false, false, false}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS}, + 0, 0, 0, 0, 0, + false, false, MAX_DBG_RESET_REGS, 0 +}; + +static struct block_defs block_mcp2_defs = { + "mcp2", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCZ, DBG_BUS_CLIENT_RBCZ, DBG_BUS_CLIENT_RBCZ}, + MCP2_REG_DBG_SELECT, MCP2_REG_DBG_DWORD_ENABLE, + MCP2_REG_DBG_SHIFT, MCP2_REG_DBG_FORCE_VALID, + MCP2_REG_DBG_FORCE_FRAME, + false, false, MAX_DBG_RESET_REGS, 0 +}; + +static struct block_defs block_pswhst_defs = { + "pswhst", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP}, + PSWHST_REG_DBG_SELECT, PSWHST_REG_DBG_DWORD_ENABLE, + PSWHST_REG_DBG_SHIFT, PSWHST_REG_DBG_FORCE_VALID, + PSWHST_REG_DBG_FORCE_FRAME, + true, false, DBG_RESET_REG_MISC_PL_HV, 0 +}; + +static struct block_defs block_pswhst2_defs = { + "pswhst2", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP}, + PSWHST2_REG_DBG_SELECT, PSWHST2_REG_DBG_DWORD_ENABLE, + PSWHST2_REG_DBG_SHIFT, PSWHST2_REG_DBG_FORCE_VALID, + PSWHST2_REG_DBG_FORCE_FRAME, + true, false, DBG_RESET_REG_MISC_PL_HV, 0 +}; + +static struct block_defs block_pswrd_defs = { + "pswrd", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP}, + PSWRD_REG_DBG_SELECT, PSWRD_REG_DBG_DWORD_ENABLE, + PSWRD_REG_DBG_SHIFT, PSWRD_REG_DBG_FORCE_VALID, + PSWRD_REG_DBG_FORCE_FRAME, + true, false, DBG_RESET_REG_MISC_PL_HV, 2 +}; + +static struct block_defs block_pswrd2_defs = { + "pswrd2", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP}, + PSWRD2_REG_DBG_SELECT, PSWRD2_REG_DBG_DWORD_ENABLE, + PSWRD2_REG_DBG_SHIFT, PSWRD2_REG_DBG_FORCE_VALID, + PSWRD2_REG_DBG_FORCE_FRAME, + true, false, DBG_RESET_REG_MISC_PL_HV, 2 +}; + +static struct block_defs block_pswwr_defs = { + "pswwr", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP}, + PSWWR_REG_DBG_SELECT, PSWWR_REG_DBG_DWORD_ENABLE, + PSWWR_REG_DBG_SHIFT, PSWWR_REG_DBG_FORCE_VALID, + PSWWR_REG_DBG_FORCE_FRAME, + true, false, DBG_RESET_REG_MISC_PL_HV, 3 +}; + +static struct block_defs block_pswwr2_defs = { + "pswwr2", {false, false, false}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS}, + 0, 0, 0, 0, 0, + true, false, DBG_RESET_REG_MISC_PL_HV, 3 +}; + +static struct block_defs block_pswrq_defs = { + "pswrq", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP}, + PSWRQ_REG_DBG_SELECT, PSWRQ_REG_DBG_DWORD_ENABLE, + PSWRQ_REG_DBG_SHIFT, PSWRQ_REG_DBG_FORCE_VALID, + PSWRQ_REG_DBG_FORCE_FRAME, + true, false, DBG_RESET_REG_MISC_PL_HV, 1 +}; + +static struct block_defs block_pswrq2_defs = { + "pswrq2", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP}, + PSWRQ2_REG_DBG_SELECT, PSWRQ2_REG_DBG_DWORD_ENABLE, + PSWRQ2_REG_DBG_SHIFT, PSWRQ2_REG_DBG_FORCE_VALID, + PSWRQ2_REG_DBG_FORCE_FRAME, + true, false, DBG_RESET_REG_MISC_PL_HV, 1 +}; + +static struct block_defs block_pglcs_defs = { + "pglcs", {false, false, true}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCH}, + PGLCS_REG_DBG_SELECT, PGLCS_REG_DBG_DWORD_ENABLE, + PGLCS_REG_DBG_SHIFT, PGLCS_REG_DBG_FORCE_VALID, + PGLCS_REG_DBG_FORCE_FRAME, + true, false, DBG_RESET_REG_MISCS_PL_HV, 2 +}; + +static struct block_defs block_ptu_defs = { + "ptu", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP}, + PTU_REG_DBG_SELECT, PTU_REG_DBG_DWORD_ENABLE, + PTU_REG_DBG_SHIFT, PTU_REG_DBG_FORCE_VALID, + PTU_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 20 +}; + +static struct block_defs block_dmae_defs = { + "dmae", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP}, + DMAE_REG_DBG_SELECT, DMAE_REG_DBG_DWORD_ENABLE, + DMAE_REG_DBG_SHIFT, DMAE_REG_DBG_FORCE_VALID, + DMAE_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 28 +}; + +static struct block_defs block_tcm_defs = { + "tcm", {true, true, true}, true, DBG_TSTORM_ID, + {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT}, + TCM_REG_DBG_SELECT, TCM_REG_DBG_DWORD_ENABLE, + TCM_REG_DBG_SHIFT, TCM_REG_DBG_FORCE_VALID, + TCM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 5 +}; + +static struct block_defs block_mcm_defs = { + "mcm", {true, true, true}, true, DBG_MSTORM_ID, + {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM}, + MCM_REG_DBG_SELECT, MCM_REG_DBG_DWORD_ENABLE, + MCM_REG_DBG_SHIFT, MCM_REG_DBG_FORCE_VALID, + MCM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 3 +}; + +static struct block_defs block_ucm_defs = { + "ucm", {true, true, true}, true, DBG_USTORM_ID, + {DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU}, + UCM_REG_DBG_SELECT, UCM_REG_DBG_DWORD_ENABLE, + UCM_REG_DBG_SHIFT, UCM_REG_DBG_FORCE_VALID, + UCM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 8 +}; + +static struct block_defs block_xcm_defs = { + "xcm", {true, true, true}, true, DBG_XSTORM_ID, + {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX}, + XCM_REG_DBG_SELECT, XCM_REG_DBG_DWORD_ENABLE, + XCM_REG_DBG_SHIFT, XCM_REG_DBG_FORCE_VALID, + XCM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 19 +}; + +static struct block_defs block_ycm_defs = { + "ycm", {true, true, true}, true, DBG_YSTORM_ID, + {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCY}, + YCM_REG_DBG_SELECT, YCM_REG_DBG_DWORD_ENABLE, + YCM_REG_DBG_SHIFT, YCM_REG_DBG_FORCE_VALID, + YCM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 5 +}; + +static struct block_defs block_pcm_defs = { + "pcm", {true, true, true}, true, DBG_PSTORM_ID, + {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS}, + PCM_REG_DBG_SELECT, PCM_REG_DBG_DWORD_ENABLE, + PCM_REG_DBG_SHIFT, PCM_REG_DBG_FORCE_VALID, + PCM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 4 +}; + +static struct block_defs block_qm_defs = { + "qm", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCQ}, + QM_REG_DBG_SELECT, QM_REG_DBG_DWORD_ENABLE, + QM_REG_DBG_SHIFT, QM_REG_DBG_FORCE_VALID, + QM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 16 +}; + +static struct block_defs block_tm_defs = { + "tm", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS}, + TM_REG_DBG_SELECT, TM_REG_DBG_DWORD_ENABLE, + TM_REG_DBG_SHIFT, TM_REG_DBG_FORCE_VALID, + TM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 17 +}; + +static struct block_defs block_dorq_defs = { + "dorq", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCY}, + DORQ_REG_DBG_SELECT, DORQ_REG_DBG_DWORD_ENABLE, + DORQ_REG_DBG_SHIFT, DORQ_REG_DBG_FORCE_VALID, + DORQ_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 18 +}; + +static struct block_defs block_brb_defs = { + "brb", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCR, DBG_BUS_CLIENT_RBCR, DBG_BUS_CLIENT_RBCR}, + BRB_REG_DBG_SELECT, BRB_REG_DBG_DWORD_ENABLE, + BRB_REG_DBG_SHIFT, BRB_REG_DBG_FORCE_VALID, + BRB_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 0 +}; + +static struct block_defs block_src_defs = { + "src", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF}, + SRC_REG_DBG_SELECT, SRC_REG_DBG_DWORD_ENABLE, + SRC_REG_DBG_SHIFT, SRC_REG_DBG_FORCE_VALID, + SRC_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 2 +}; + +static struct block_defs block_prs_defs = { + "prs", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCR, DBG_BUS_CLIENT_RBCR, DBG_BUS_CLIENT_RBCR}, + PRS_REG_DBG_SELECT, PRS_REG_DBG_DWORD_ENABLE, + PRS_REG_DBG_SHIFT, PRS_REG_DBG_FORCE_VALID, + PRS_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 1 +}; + +static struct block_defs block_tsdm_defs = { + "tsdm", {true, true, true}, true, DBG_TSTORM_ID, + {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT}, + TSDM_REG_DBG_SELECT, TSDM_REG_DBG_DWORD_ENABLE, + TSDM_REG_DBG_SHIFT, TSDM_REG_DBG_FORCE_VALID, + TSDM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 3 +}; + +static struct block_defs block_msdm_defs = { + "msdm", {true, true, true}, true, DBG_MSTORM_ID, + {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM}, + MSDM_REG_DBG_SELECT, MSDM_REG_DBG_DWORD_ENABLE, + MSDM_REG_DBG_SHIFT, MSDM_REG_DBG_FORCE_VALID, + MSDM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 6 +}; + +static struct block_defs block_usdm_defs = { + "usdm", {true, true, true}, true, DBG_USTORM_ID, + {DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU}, + USDM_REG_DBG_SELECT, USDM_REG_DBG_DWORD_ENABLE, + USDM_REG_DBG_SHIFT, USDM_REG_DBG_FORCE_VALID, + USDM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 7 +}; + +static struct block_defs block_xsdm_defs = { + "xsdm", {true, true, true}, true, DBG_XSTORM_ID, + {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX}, + XSDM_REG_DBG_SELECT, XSDM_REG_DBG_DWORD_ENABLE, + XSDM_REG_DBG_SHIFT, XSDM_REG_DBG_FORCE_VALID, + XSDM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 20 +}; + +static struct block_defs block_ysdm_defs = { + "ysdm", {true, true, true}, true, DBG_YSTORM_ID, + {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCY}, + YSDM_REG_DBG_SELECT, YSDM_REG_DBG_DWORD_ENABLE, + YSDM_REG_DBG_SHIFT, YSDM_REG_DBG_FORCE_VALID, + YSDM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 8 +}; + +static struct block_defs block_psdm_defs = { + "psdm", {true, true, true}, true, DBG_PSTORM_ID, + {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS}, + PSDM_REG_DBG_SELECT, PSDM_REG_DBG_DWORD_ENABLE, + PSDM_REG_DBG_SHIFT, PSDM_REG_DBG_FORCE_VALID, + PSDM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 7 +}; + +static struct block_defs block_tsem_defs = { + "tsem", {true, true, true}, true, DBG_TSTORM_ID, + {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT}, + TSEM_REG_DBG_SELECT, TSEM_REG_DBG_DWORD_ENABLE, + TSEM_REG_DBG_SHIFT, TSEM_REG_DBG_FORCE_VALID, + TSEM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 4 +}; + +static struct block_defs block_msem_defs = { + "msem", {true, true, true}, true, DBG_MSTORM_ID, + {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM}, + MSEM_REG_DBG_SELECT, MSEM_REG_DBG_DWORD_ENABLE, + MSEM_REG_DBG_SHIFT, MSEM_REG_DBG_FORCE_VALID, + MSEM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 9 +}; + +static struct block_defs block_usem_defs = { + "usem", {true, true, true}, true, DBG_USTORM_ID, + {DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU}, + USEM_REG_DBG_SELECT, USEM_REG_DBG_DWORD_ENABLE, + USEM_REG_DBG_SHIFT, USEM_REG_DBG_FORCE_VALID, + USEM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 9 +}; + +static struct block_defs block_xsem_defs = { + "xsem", {true, true, true}, true, DBG_XSTORM_ID, + {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX}, + XSEM_REG_DBG_SELECT, XSEM_REG_DBG_DWORD_ENABLE, + XSEM_REG_DBG_SHIFT, XSEM_REG_DBG_FORCE_VALID, + XSEM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 21 +}; + +static struct block_defs block_ysem_defs = { + "ysem", {true, true, true}, true, DBG_YSTORM_ID, + {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCY}, + YSEM_REG_DBG_SELECT, YSEM_REG_DBG_DWORD_ENABLE, + YSEM_REG_DBG_SHIFT, YSEM_REG_DBG_FORCE_VALID, + YSEM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 11 +}; + +static struct block_defs block_psem_defs = { + "psem", {true, true, true}, true, DBG_PSTORM_ID, + {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS}, + PSEM_REG_DBG_SELECT, PSEM_REG_DBG_DWORD_ENABLE, + PSEM_REG_DBG_SHIFT, PSEM_REG_DBG_FORCE_VALID, + PSEM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 10 +}; + +static struct block_defs block_rss_defs = { + "rss", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT}, + RSS_REG_DBG_SELECT, RSS_REG_DBG_DWORD_ENABLE, + RSS_REG_DBG_SHIFT, RSS_REG_DBG_FORCE_VALID, + RSS_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 18 +}; + +static struct block_defs block_tmld_defs = { + "tmld", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM}, + TMLD_REG_DBG_SELECT, TMLD_REG_DBG_DWORD_ENABLE, + TMLD_REG_DBG_SHIFT, TMLD_REG_DBG_FORCE_VALID, + TMLD_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 13 +}; + +static struct block_defs block_muld_defs = { + "muld", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU}, + MULD_REG_DBG_SELECT, MULD_REG_DBG_DWORD_ENABLE, + MULD_REG_DBG_SHIFT, MULD_REG_DBG_FORCE_VALID, + MULD_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 14 +}; + +static struct block_defs block_yuld_defs = { + "yuld", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU}, + YULD_REG_DBG_SELECT, YULD_REG_DBG_DWORD_ENABLE, + YULD_REG_DBG_SHIFT, YULD_REG_DBG_FORCE_VALID, + YULD_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 15 +}; + +static struct block_defs block_xyld_defs = { + "xyld", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX}, + XYLD_REG_DBG_SELECT, XYLD_REG_DBG_DWORD_ENABLE, + XYLD_REG_DBG_SHIFT, XYLD_REG_DBG_FORCE_VALID, + XYLD_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 12 +}; + +static struct block_defs block_prm_defs = { + "prm", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM}, + PRM_REG_DBG_SELECT, PRM_REG_DBG_DWORD_ENABLE, + PRM_REG_DBG_SHIFT, PRM_REG_DBG_FORCE_VALID, + PRM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 21 +}; + +static struct block_defs block_pbf_pb1_defs = { + "pbf_pb1", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCV}, + PBF_PB1_REG_DBG_SELECT, PBF_PB1_REG_DBG_DWORD_ENABLE, + PBF_PB1_REG_DBG_SHIFT, PBF_PB1_REG_DBG_FORCE_VALID, + PBF_PB1_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, + 11 +}; + +static struct block_defs block_pbf_pb2_defs = { + "pbf_pb2", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCV}, + PBF_PB2_REG_DBG_SELECT, PBF_PB2_REG_DBG_DWORD_ENABLE, + PBF_PB2_REG_DBG_SHIFT, PBF_PB2_REG_DBG_FORCE_VALID, + PBF_PB2_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, + 12 +}; + +static struct block_defs block_rpb_defs = { + "rpb", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM}, + RPB_REG_DBG_SELECT, RPB_REG_DBG_DWORD_ENABLE, + RPB_REG_DBG_SHIFT, RPB_REG_DBG_FORCE_VALID, + RPB_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 13 +}; + +static struct block_defs block_btb_defs = { + "btb", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCR, DBG_BUS_CLIENT_RBCR, DBG_BUS_CLIENT_RBCV}, + BTB_REG_DBG_SELECT, BTB_REG_DBG_DWORD_ENABLE, + BTB_REG_DBG_SHIFT, BTB_REG_DBG_FORCE_VALID, + BTB_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 10 +}; + +static struct block_defs block_pbf_defs = { + "pbf", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCV}, + PBF_REG_DBG_SELECT, PBF_REG_DBG_DWORD_ENABLE, + PBF_REG_DBG_SHIFT, PBF_REG_DBG_FORCE_VALID, + PBF_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 15 +}; + +static struct block_defs block_rdif_defs = { + "rdif", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM}, + RDIF_REG_DBG_SELECT, RDIF_REG_DBG_DWORD_ENABLE, + RDIF_REG_DBG_SHIFT, RDIF_REG_DBG_FORCE_VALID, + RDIF_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 16 +}; + +static struct block_defs block_tdif_defs = { + "tdif", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS}, + TDIF_REG_DBG_SELECT, TDIF_REG_DBG_DWORD_ENABLE, + TDIF_REG_DBG_SHIFT, TDIF_REG_DBG_FORCE_VALID, + TDIF_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 17 +}; + +static struct block_defs block_cdu_defs = { + "cdu", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF}, + CDU_REG_DBG_SELECT, CDU_REG_DBG_DWORD_ENABLE, + CDU_REG_DBG_SHIFT, CDU_REG_DBG_FORCE_VALID, + CDU_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 23 +}; + +static struct block_defs block_ccfc_defs = { + "ccfc", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF}, + CCFC_REG_DBG_SELECT, CCFC_REG_DBG_DWORD_ENABLE, + CCFC_REG_DBG_SHIFT, CCFC_REG_DBG_FORCE_VALID, + CCFC_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 24 +}; + +static struct block_defs block_tcfc_defs = { + "tcfc", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF}, + TCFC_REG_DBG_SELECT, TCFC_REG_DBG_DWORD_ENABLE, + TCFC_REG_DBG_SHIFT, TCFC_REG_DBG_FORCE_VALID, + TCFC_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 25 +}; + +static struct block_defs block_igu_defs = { + "igu", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP}, + IGU_REG_DBG_SELECT, IGU_REG_DBG_DWORD_ENABLE, + IGU_REG_DBG_SHIFT, IGU_REG_DBG_FORCE_VALID, + IGU_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 27 +}; + +static struct block_defs block_cau_defs = { + "cau", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP}, + CAU_REG_DBG_SELECT, CAU_REG_DBG_DWORD_ENABLE, + CAU_REG_DBG_SHIFT, CAU_REG_DBG_FORCE_VALID, + CAU_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 19 +}; + +static struct block_defs block_umac_defs = { + "umac", {false, false, true}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCZ}, + UMAC_REG_DBG_SELECT, UMAC_REG_DBG_DWORD_ENABLE, + UMAC_REG_DBG_SHIFT, UMAC_REG_DBG_FORCE_VALID, + UMAC_REG_DBG_FORCE_FRAME, + true, false, DBG_RESET_REG_MISCS_PL_HV, 6 +}; + +static struct block_defs block_xmac_defs = { + "xmac", {false, false, false}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS}, + 0, 0, 0, 0, 0, + false, false, MAX_DBG_RESET_REGS, 0 +}; + +static struct block_defs block_dbg_defs = { + "dbg", {false, false, false}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS}, + 0, 0, 0, 0, 0, + true, true, DBG_RESET_REG_MISC_PL_PDA_VAUX, 3 +}; + +static struct block_defs block_nig_defs = { + "nig", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCN, DBG_BUS_CLIENT_RBCN, DBG_BUS_CLIENT_RBCN}, + NIG_REG_DBG_SELECT, NIG_REG_DBG_DWORD_ENABLE, + NIG_REG_DBG_SHIFT, NIG_REG_DBG_FORCE_VALID, + NIG_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VAUX, 0 +}; + +static struct block_defs block_wol_defs = { + "wol", {false, false, true}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCZ}, + WOL_REG_DBG_SELECT, WOL_REG_DBG_DWORD_ENABLE, + WOL_REG_DBG_SHIFT, WOL_REG_DBG_FORCE_VALID, + WOL_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VAUX, 7 +}; + +static struct block_defs block_bmbn_defs = { + "bmbn", {false, false, true}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCB}, + BMBN_REG_DBG_SELECT, BMBN_REG_DBG_DWORD_ENABLE, + BMBN_REG_DBG_SHIFT, BMBN_REG_DBG_FORCE_VALID, + BMBN_REG_DBG_FORCE_FRAME, + false, false, MAX_DBG_RESET_REGS, 0 +}; + +static struct block_defs block_ipc_defs = { + "ipc", {false, false, false}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS}, + 0, 0, 0, 0, 0, + true, false, DBG_RESET_REG_MISCS_PL_UA, 8 +}; + +static struct block_defs block_nwm_defs = { + "nwm", {false, false, true}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCW}, + NWM_REG_DBG_SELECT, NWM_REG_DBG_DWORD_ENABLE, + NWM_REG_DBG_SHIFT, NWM_REG_DBG_FORCE_VALID, + NWM_REG_DBG_FORCE_FRAME, + true, false, DBG_RESET_REG_MISCS_PL_HV_2, 0 +}; + +static struct block_defs block_nws_defs = { + "nws", {false, false, false}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS}, + 0, 0, 0, 0, 0, + true, false, DBG_RESET_REG_MISCS_PL_HV, 12 +}; + +static struct block_defs block_ms_defs = { + "ms", {false, false, false}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS}, + 0, 0, 0, 0, 0, + true, false, DBG_RESET_REG_MISCS_PL_HV, 13 +}; + +static struct block_defs block_phy_pcie_defs = { + "phy_pcie", {false, false, true}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCH}, + PCIE_REG_DBG_COMMON_SELECT, PCIE_REG_DBG_COMMON_DWORD_ENABLE, + PCIE_REG_DBG_COMMON_SHIFT, PCIE_REG_DBG_COMMON_FORCE_VALID, + PCIE_REG_DBG_COMMON_FORCE_FRAME, + false, false, MAX_DBG_RESET_REGS, 0 +}; + +static struct block_defs block_led_defs = { + "led", {false, false, false}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS}, + 0, 0, 0, 0, 0, + true, true, DBG_RESET_REG_MISCS_PL_HV, 14 +}; + +static struct block_defs block_misc_aeu_defs = { + "misc_aeu", {false, false, false}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS}, + 0, 0, 0, 0, 0, + false, false, MAX_DBG_RESET_REGS, 0 +}; + +static struct block_defs block_bar0_map_defs = { + "bar0_map", {false, false, false}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS}, + 0, 0, 0, 0, 0, + false, false, MAX_DBG_RESET_REGS, 0 +}; + +static struct block_defs *s_block_defs[MAX_BLOCK_ID] = { + &block_grc_defs, + &block_miscs_defs, + &block_misc_defs, + &block_dbu_defs, + &block_pglue_b_defs, + &block_cnig_defs, + &block_cpmu_defs, + &block_ncsi_defs, + &block_opte_defs, + &block_bmb_defs, + &block_pcie_defs, + &block_mcp_defs, + &block_mcp2_defs, + &block_pswhst_defs, + &block_pswhst2_defs, + &block_pswrd_defs, + &block_pswrd2_defs, + &block_pswwr_defs, + &block_pswwr2_defs, + &block_pswrq_defs, + &block_pswrq2_defs, + &block_pglcs_defs, + &block_dmae_defs, + &block_ptu_defs, + &block_tcm_defs, + &block_mcm_defs, + &block_ucm_defs, + &block_xcm_defs, + &block_ycm_defs, + &block_pcm_defs, + &block_qm_defs, + &block_tm_defs, + &block_dorq_defs, + &block_brb_defs, + &block_src_defs, + &block_prs_defs, + &block_tsdm_defs, + &block_msdm_defs, + &block_usdm_defs, + &block_xsdm_defs, + &block_ysdm_defs, + &block_psdm_defs, + &block_tsem_defs, + &block_msem_defs, + &block_usem_defs, + &block_xsem_defs, + &block_ysem_defs, + &block_psem_defs, + &block_rss_defs, + &block_tmld_defs, + &block_muld_defs, + &block_yuld_defs, + &block_xyld_defs, + &block_prm_defs, + &block_pbf_pb1_defs, + &block_pbf_pb2_defs, + &block_rpb_defs, + &block_btb_defs, + &block_pbf_defs, + &block_rdif_defs, + &block_tdif_defs, + &block_cdu_defs, + &block_ccfc_defs, + &block_tcfc_defs, + &block_igu_defs, + &block_cau_defs, + &block_umac_defs, + &block_xmac_defs, + &block_dbg_defs, + &block_nig_defs, + &block_wol_defs, + &block_bmbn_defs, + &block_ipc_defs, + &block_nwm_defs, + &block_nws_defs, + &block_ms_defs, + &block_phy_pcie_defs, + &block_led_defs, + &block_misc_aeu_defs, + &block_bar0_map_defs, +}; + +static struct platform_defs s_platform_defs[] = { + {"asic", 1}, + {"reserved", 0}, + {"reserved2", 0}, + {"reserved3", 0} +}; + +static struct grc_param_defs s_grc_param_defs[] = { + {{1, 1, 1}, 0, 1, false, 1, 1}, /* DBG_GRC_PARAM_DUMP_TSTORM */ + {{1, 1, 1}, 0, 1, false, 1, 1}, /* DBG_GRC_PARAM_DUMP_MSTORM */ + {{1, 1, 1}, 0, 1, false, 1, 1}, /* DBG_GRC_PARAM_DUMP_USTORM */ + {{1, 1, 1}, 0, 1, false, 1, 1}, /* DBG_GRC_PARAM_DUMP_XSTORM */ + {{1, 1, 1}, 0, 1, false, 1, 1}, /* DBG_GRC_PARAM_DUMP_YSTORM */ + {{1, 1, 1}, 0, 1, false, 1, 1}, /* DBG_GRC_PARAM_DUMP_PSTORM */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_REGS */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_RAM */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_PBUF */ + {{0, 0, 0}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_IOR */ + {{0, 0, 0}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_VFC */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_CM_CTX */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_ILT */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_RSS */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_CAU */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_QM */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_MCP */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_RESERVED */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_CFC */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_IGU */ + {{0, 0, 0}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_BRB */ + {{0, 0, 0}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_BTB */ + {{0, 0, 0}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_BMB */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_NIG */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_MULD */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_PRS */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_DMAE */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_TM */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_SDM */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_DIF */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_STATIC */ + {{0, 0, 0}, 0, 1, false, 0, 0}, /* DBG_GRC_PARAM_UNSTALL */ + {{MAX_LCIDS, MAX_LCIDS, MAX_LCIDS}, 1, MAX_LCIDS, false, MAX_LCIDS, + MAX_LCIDS}, /* DBG_GRC_PARAM_NUM_LCIDS */ + {{MAX_LTIDS, MAX_LTIDS, MAX_LTIDS}, 1, MAX_LTIDS, false, MAX_LTIDS, + MAX_LTIDS}, /* DBG_GRC_PARAM_NUM_LTIDS */ + {{0, 0, 0}, 0, 1, true, 0, 0}, /* DBG_GRC_PARAM_EXCLUDE_ALL */ + {{0, 0, 0}, 0, 1, true, 0, 0}, /* DBG_GRC_PARAM_CRASH */ + {{0, 0, 0}, 0, 1, false, 1, 0}, /* DBG_GRC_PARAM_PARITY_SAFE */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_CM */ + {{1, 1, 1}, 0, 1, false, 0, 1} /* DBG_GRC_PARAM_DUMP_PHY */ +}; + +static struct rss_mem_defs s_rss_mem_defs[] = { + { "rss_mem_cid", "rss_cid", 0, + {256, 256, 320}, + {32, 32, 32} }, + { "rss_mem_key_msb", "rss_key", 1024, + {128, 128, 208}, + {256, 256, 256} }, + { "rss_mem_key_lsb", "rss_key", 2048, + {128, 128, 208}, + {64, 64, 64} }, + { "rss_mem_info", "rss_info", 3072, + {128, 128, 208}, + {16, 16, 16} }, + { "rss_mem_ind", "rss_ind", 4096, + {(128 * 128), (128 * 128), (128 * 208)}, + {16, 16, 16} } +}; + +static struct vfc_ram_defs s_vfc_ram_defs[] = { + {"vfc_ram_tt1", "vfc_ram", 0, 512}, + {"vfc_ram_mtt2", "vfc_ram", 512, 128}, + {"vfc_ram_stt2", "vfc_ram", 640, 32}, + {"vfc_ram_ro_vect", "vfc_ram", 672, 32} +}; + +static struct big_ram_defs s_big_ram_defs[] = { + { "BRB", MEM_GROUP_BRB_MEM, MEM_GROUP_BRB_RAM, DBG_GRC_PARAM_DUMP_BRB, + BRB_REG_BIG_RAM_ADDRESS, BRB_REG_BIG_RAM_DATA, + {4800, 4800, 5632} }, + { "BTB", MEM_GROUP_BTB_MEM, MEM_GROUP_BTB_RAM, DBG_GRC_PARAM_DUMP_BTB, + BTB_REG_BIG_RAM_ADDRESS, BTB_REG_BIG_RAM_DATA, + {2880, 2880, 3680} }, + { "BMB", MEM_GROUP_BMB_MEM, MEM_GROUP_BMB_RAM, DBG_GRC_PARAM_DUMP_BMB, + BMB_REG_BIG_RAM_ADDRESS, BMB_REG_BIG_RAM_DATA, + {1152, 1152, 1152} } +}; + +static struct reset_reg_defs s_reset_regs_defs[] = { + { MISCS_REG_RESET_PL_UA, 0x0, + {true, true, true} }, /* DBG_RESET_REG_MISCS_PL_UA */ + { MISCS_REG_RESET_PL_HV, 0x0, + {true, true, true} }, /* DBG_RESET_REG_MISCS_PL_HV */ + { MISCS_REG_RESET_PL_HV_2, 0x0, + {false, false, true} }, /* DBG_RESET_REG_MISCS_PL_HV_2 */ + { MISC_REG_RESET_PL_UA, 0x0, + {true, true, true} }, /* DBG_RESET_REG_MISC_PL_UA */ + { MISC_REG_RESET_PL_HV, 0x0, + {true, true, true} }, /* DBG_RESET_REG_MISC_PL_HV */ + { MISC_REG_RESET_PL_PDA_VMAIN_1, 0x4404040, + {true, true, true} }, /* DBG_RESET_REG_MISC_PL_PDA_VMAIN_1 */ + { MISC_REG_RESET_PL_PDA_VMAIN_2, 0x7c00007, + {true, true, true} }, /* DBG_RESET_REG_MISC_PL_PDA_VMAIN_2 */ + { MISC_REG_RESET_PL_PDA_VAUX, 0x2, + {true, true, true} }, /* DBG_RESET_REG_MISC_PL_PDA_VAUX */ +}; + +static struct phy_defs s_phy_defs[] = { + {"nw_phy", NWS_REG_NWS_CMU, PHY_NW_IP_REG_PHY0_TOP_TBUS_ADDR_7_0, + PHY_NW_IP_REG_PHY0_TOP_TBUS_ADDR_15_8, + PHY_NW_IP_REG_PHY0_TOP_TBUS_DATA_7_0, + PHY_NW_IP_REG_PHY0_TOP_TBUS_DATA_11_8}, + {"sgmii_phy", MS_REG_MS_CMU, PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X132, + PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X133, + PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X130, + PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X131}, + {"pcie_phy0", PHY_PCIE_REG_PHY0, PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X132, + PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X133, + PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X130, + PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X131}, + {"pcie_phy1", PHY_PCIE_REG_PHY1, PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X132, + PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X133, + PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X130, + PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X131}, +}; + +/**************************** Private Functions ******************************/ + +/* Reads and returns a single dword from the specified unaligned buffer */ +static u32 qed_read_unaligned_dword(u8 *buf) +{ + u32 dword; + + memcpy((u8 *)&dword, buf, sizeof(dword)); + return dword; +} + +/* Initializes debug data for the specified device */ +static enum dbg_status qed_dbg_dev_init(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + + if (dev_data->initialized) + return DBG_STATUS_OK; + + if (QED_IS_K2(p_hwfn->cdev)) { + dev_data->chip_id = CHIP_K2; + dev_data->mode_enable[MODE_K2] = 1; + } else if (QED_IS_BB_B0(p_hwfn->cdev)) { + dev_data->chip_id = CHIP_BB_B0; + dev_data->mode_enable[MODE_BB_B0] = 1; + } else { + return DBG_STATUS_UNKNOWN_CHIP; + } + + dev_data->platform_id = PLATFORM_ASIC; + dev_data->mode_enable[MODE_ASIC] = 1; + dev_data->initialized = true; + return DBG_STATUS_OK; +} + +/* Reads the FW info structure for the specified Storm from the chip, + * and writes it to the specified fw_info pointer. + */ +static void qed_read_fw_info(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u8 storm_id, struct fw_info *fw_info) +{ + /* Read first the address that points to fw_info location. + * The address is located in the last line of the Storm RAM. + */ + u32 addr = s_storm_defs[storm_id].sem_fast_mem_addr + + SEM_FAST_REG_INT_RAM + + DWORDS_TO_BYTES(SEM_FAST_REG_INT_RAM_SIZE) - + sizeof(struct fw_info_location); + struct fw_info_location fw_info_location; + u32 *dest = (u32 *)&fw_info_location; + u32 i; + + memset(&fw_info_location, 0, sizeof(fw_info_location)); + memset(fw_info, 0, sizeof(*fw_info)); + for (i = 0; i < BYTES_TO_DWORDS(sizeof(fw_info_location)); + i++, addr += BYTES_IN_DWORD) + dest[i] = qed_rd(p_hwfn, p_ptt, addr); + if (fw_info_location.size > 0 && fw_info_location.size <= + sizeof(*fw_info)) { + /* Read FW version info from Storm RAM */ + addr = fw_info_location.grc_addr; + dest = (u32 *)fw_info; + for (i = 0; i < BYTES_TO_DWORDS(fw_info_location.size); + i++, addr += BYTES_IN_DWORD) + dest[i] = qed_rd(p_hwfn, p_ptt, addr); + } +} + +/* Dumps the specified string to the specified buffer. Returns the dumped size + * in bytes (actual length + 1 for the null character termination). + */ +static u32 qed_dump_str(char *dump_buf, bool dump, const char *str) +{ + if (dump) + strcpy(dump_buf, str); + return (u32)strlen(str) + 1; +} + +/* Dumps zeros to align the specified buffer to dwords. Returns the dumped size + * in bytes. + */ +static u32 qed_dump_align(char *dump_buf, bool dump, u32 byte_offset) +{ + u8 offset_in_dword = (u8)(byte_offset & 0x3), align_size; + + align_size = offset_in_dword ? BYTES_IN_DWORD - offset_in_dword : 0; + + if (dump && align_size) + memset(dump_buf, 0, align_size); + return align_size; +} + +/* Writes the specified string param to the specified buffer. + * Returns the dumped size in dwords. + */ +static u32 qed_dump_str_param(u32 *dump_buf, + bool dump, + const char *param_name, const char *param_val) +{ + char *char_buf = (char *)dump_buf; + u32 offset = 0; + + /* Dump param name */ + offset += qed_dump_str(char_buf + offset, dump, param_name); + + /* Indicate a string param value */ + if (dump) + *(char_buf + offset) = 1; + offset++; + + /* Dump param value */ + offset += qed_dump_str(char_buf + offset, dump, param_val); + + /* Align buffer to next dword */ + offset += qed_dump_align(char_buf + offset, dump, offset); + return BYTES_TO_DWORDS(offset); +} + +/* Writes the specified numeric param to the specified buffer. + * Returns the dumped size in dwords. + */ +static u32 qed_dump_num_param(u32 *dump_buf, + bool dump, const char *param_name, u32 param_val) +{ + char *char_buf = (char *)dump_buf; + u32 offset = 0; + + /* Dump param name */ + offset += qed_dump_str(char_buf + offset, dump, param_name); + + /* Indicate a numeric param value */ + if (dump) + *(char_buf + offset) = 0; + offset++; + + /* Align buffer to next dword */ + offset += qed_dump_align(char_buf + offset, dump, offset); + + /* Dump param value (and change offset from bytes to dwords) */ + offset = BYTES_TO_DWORDS(offset); + if (dump) + *(dump_buf + offset) = param_val; + offset++; + return offset; +} + +/* Reads the FW version and writes it as a param to the specified buffer. + * Returns the dumped size in dwords. + */ +static u32 qed_dump_fw_ver_param(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, bool dump) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + char fw_ver_str[16] = EMPTY_FW_VERSION_STR; + char fw_img_str[16] = EMPTY_FW_IMAGE_STR; + struct fw_info fw_info = { {0}, {0} }; + int printed_chars; + u32 offset = 0; + + if (dump) { + /* Read FW image/version from PRAM in a non-reset SEMI */ + bool found = false; + u8 storm_id; + + for (storm_id = 0; storm_id < MAX_DBG_STORMS && !found; + storm_id++) { + /* Read FW version/image */ + if (!dev_data->block_in_reset + [s_storm_defs[storm_id].block_id]) { + /* read FW info for the current Storm */ + qed_read_fw_info(p_hwfn, + p_ptt, storm_id, &fw_info); + + /* Create FW version/image strings */ + printed_chars = + snprintf(fw_ver_str, + sizeof(fw_ver_str), + "%d_%d_%d_%d", + fw_info.ver.num.major, + fw_info.ver.num.minor, + fw_info.ver.num.rev, + fw_info.ver.num.eng); + if (printed_chars < 0 || printed_chars >= + sizeof(fw_ver_str)) + DP_NOTICE(p_hwfn, + "Unexpected debug error: invalid FW version string\n"); + switch (fw_info.ver.image_id) { + case FW_IMG_MAIN: + strcpy(fw_img_str, "main"); + break; + default: + strcpy(fw_img_str, "unknown"); + break; + } + + found = true; + } + } + } + + /* Dump FW version, image and timestamp */ + offset += qed_dump_str_param(dump_buf + offset, + dump, "fw-version", fw_ver_str); + offset += qed_dump_str_param(dump_buf + offset, + dump, "fw-image", fw_img_str); + offset += qed_dump_num_param(dump_buf + offset, + dump, + "fw-timestamp", fw_info.ver.timestamp); + return offset; +} + +/* Reads the MFW version and writes it as a param to the specified buffer. + * Returns the dumped size in dwords. + */ +static u32 qed_dump_mfw_ver_param(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, bool dump) +{ + char mfw_ver_str[16] = EMPTY_FW_VERSION_STR; + + if (dump) { + u32 global_section_offsize, global_section_addr, mfw_ver; + u32 public_data_addr, global_section_offsize_addr; + int printed_chars; + + /* Find MCP public data GRC address. + * Needs to be ORed with MCP_REG_SCRATCH due to a HW bug. + */ + public_data_addr = qed_rd(p_hwfn, p_ptt, + MISC_REG_SHARED_MEM_ADDR) | + MCP_REG_SCRATCH; + + /* Find MCP public global section offset */ + global_section_offsize_addr = public_data_addr + + offsetof(struct mcp_public_data, + sections) + + sizeof(offsize_t) * PUBLIC_GLOBAL; + global_section_offsize = qed_rd(p_hwfn, p_ptt, + global_section_offsize_addr); + global_section_addr = MCP_REG_SCRATCH + + (global_section_offsize & + OFFSIZE_OFFSET_MASK) * 4; + + /* Read MFW version from MCP public global section */ + mfw_ver = qed_rd(p_hwfn, p_ptt, + global_section_addr + + offsetof(struct public_global, mfw_ver)); + + /* Dump MFW version param */ + printed_chars = snprintf(mfw_ver_str, sizeof(mfw_ver_str), + "%d_%d_%d_%d", + (u8) (mfw_ver >> 24), + (u8) (mfw_ver >> 16), + (u8) (mfw_ver >> 8), + (u8) mfw_ver); + if (printed_chars < 0 || printed_chars >= sizeof(mfw_ver_str)) + DP_NOTICE(p_hwfn, + "Unexpected debug error: invalid MFW version string\n"); + } + + return qed_dump_str_param(dump_buf, dump, "mfw-version", mfw_ver_str); +} + +/* Writes a section header to the specified buffer. + * Returns the dumped size in dwords. + */ +static u32 qed_dump_section_hdr(u32 *dump_buf, + bool dump, const char *name, u32 num_params) +{ + return qed_dump_num_param(dump_buf, dump, name, num_params); +} + +/* Writes the common global params to the specified buffer. + * Returns the dumped size in dwords. + */ +static u32 qed_dump_common_global_params(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + bool dump, + u8 num_specific_global_params) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + u32 offset = 0; + + /* Find platform string and dump global params section header */ + offset += qed_dump_section_hdr(dump_buf + offset, + dump, + "global_params", + NUM_COMMON_GLOBAL_PARAMS + + num_specific_global_params); + + /* Store params */ + offset += qed_dump_fw_ver_param(p_hwfn, p_ptt, dump_buf + offset, dump); + offset += qed_dump_mfw_ver_param(p_hwfn, + p_ptt, dump_buf + offset, dump); + offset += qed_dump_num_param(dump_buf + offset, + dump, "tools-version", TOOLS_VERSION); + offset += qed_dump_str_param(dump_buf + offset, + dump, + "chip", + s_chip_defs[dev_data->chip_id].name); + offset += qed_dump_str_param(dump_buf + offset, + dump, + "platform", + s_platform_defs[dev_data->platform_id]. + name); + offset += + qed_dump_num_param(dump_buf + offset, dump, "pci-func", + p_hwfn->abs_pf_id); + return offset; +} + +/* Writes the last section to the specified buffer at the given offset. + * Returns the dumped size in dwords. + */ +static u32 qed_dump_last_section(u32 *dump_buf, u32 offset, bool dump) +{ + u32 start_offset = offset, crc = ~0; + + /* Dump CRC section header */ + offset += qed_dump_section_hdr(dump_buf + offset, dump, "last", 0); + + /* Calculate CRC32 and add it to the dword following the "last" section. + */ + if (dump) + *(dump_buf + offset) = ~crc32(crc, (u8 *)dump_buf, + DWORDS_TO_BYTES(offset)); + offset++; + return offset - start_offset; +} + +/* Update blocks reset state */ +static void qed_update_blocks_reset_state(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + u32 reg_val[MAX_DBG_RESET_REGS] = { 0 }; + u32 i; + + /* Read reset registers */ + for (i = 0; i < MAX_DBG_RESET_REGS; i++) + if (s_reset_regs_defs[i].exists[dev_data->chip_id]) + reg_val[i] = qed_rd(p_hwfn, + p_ptt, s_reset_regs_defs[i].addr); + + /* Check if blocks are in reset */ + for (i = 0; i < MAX_BLOCK_ID; i++) + dev_data->block_in_reset[i] = + s_block_defs[i]->has_reset_bit && + !(reg_val[s_block_defs[i]->reset_reg] & + BIT(s_block_defs[i]->reset_bit_offset)); +} + +/* Enable / disable the Debug block */ +static void qed_bus_enable_dbg_block(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, bool enable) +{ + qed_wr(p_hwfn, p_ptt, DBG_REG_DBG_BLOCK_ON, enable ? 1 : 0); +} + +/* Resets the Debug block */ +static void qed_bus_reset_dbg_block(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt) +{ + u32 dbg_reset_reg_addr, old_reset_reg_val, new_reset_reg_val; + + dbg_reset_reg_addr = + s_reset_regs_defs[s_block_defs[BLOCK_DBG]->reset_reg].addr; + old_reset_reg_val = qed_rd(p_hwfn, p_ptt, dbg_reset_reg_addr); + new_reset_reg_val = old_reset_reg_val & + ~BIT(s_block_defs[BLOCK_DBG]->reset_bit_offset); + + qed_wr(p_hwfn, p_ptt, dbg_reset_reg_addr, new_reset_reg_val); + qed_wr(p_hwfn, p_ptt, dbg_reset_reg_addr, old_reset_reg_val); +} + +static void qed_bus_set_framing_mode(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + enum dbg_bus_frame_modes mode) +{ + qed_wr(p_hwfn, p_ptt, DBG_REG_FRAMING_MODE, (u8)mode); +} + +/* Enable / disable Debug Bus clients according to the specified mask. + * (1 = enable, 0 = disable) + */ +static void qed_bus_enable_clients(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 client_mask) +{ + qed_wr(p_hwfn, p_ptt, DBG_REG_CLIENT_ENABLE, client_mask); +} + +static bool qed_is_mode_match(struct qed_hwfn *p_hwfn, u16 *modes_buf_offset) +{ + const u32 *ptr = s_dbg_arrays[BIN_BUF_DBG_MODE_TREE].ptr; + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + u8 tree_val = ((u8 *)ptr)[(*modes_buf_offset)++]; + bool arg1, arg2; + + switch (tree_val) { + case INIT_MODE_OP_NOT: + return !qed_is_mode_match(p_hwfn, modes_buf_offset); + case INIT_MODE_OP_OR: + case INIT_MODE_OP_AND: + arg1 = qed_is_mode_match(p_hwfn, modes_buf_offset); + arg2 = qed_is_mode_match(p_hwfn, modes_buf_offset); + return (tree_val == INIT_MODE_OP_OR) ? (arg1 || + arg2) : (arg1 && arg2); + default: + return dev_data->mode_enable[tree_val - MAX_INIT_MODE_OPS] > 0; + } +} + +/* Returns the value of the specified GRC param */ +static u32 qed_grc_get_param(struct qed_hwfn *p_hwfn, + enum dbg_grc_params grc_param) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + + return dev_data->grc.param_val[grc_param]; +} + +/* Clear all GRC params */ +static void qed_dbg_grc_clear_params(struct qed_hwfn *p_hwfn) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + u32 i; + + for (i = 0; i < MAX_DBG_GRC_PARAMS; i++) + dev_data->grc.param_set_by_user[i] = 0; +} + +/* Assign default GRC param values */ +static void qed_dbg_grc_set_params_default(struct qed_hwfn *p_hwfn) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + u32 i; + + for (i = 0; i < MAX_DBG_GRC_PARAMS; i++) + if (!dev_data->grc.param_set_by_user[i]) + dev_data->grc.param_val[i] = + s_grc_param_defs[i].default_val[dev_data->chip_id]; +} + +/* Returns true if the specified entity (indicated by GRC param) should be + * included in the dump, false otherwise. + */ +static bool qed_grc_is_included(struct qed_hwfn *p_hwfn, + enum dbg_grc_params grc_param) +{ + return qed_grc_get_param(p_hwfn, grc_param) > 0; +} + +/* Returns true of the specified Storm should be included in the dump, false + * otherwise. + */ +static bool qed_grc_is_storm_included(struct qed_hwfn *p_hwfn, + enum dbg_storms storm) +{ + return qed_grc_get_param(p_hwfn, (enum dbg_grc_params)storm) > 0; +} + +/* Returns true if the specified memory should be included in the dump, false + * otherwise. + */ +static bool qed_grc_is_mem_included(struct qed_hwfn *p_hwfn, + enum block_id block_id, u8 mem_group_id) +{ + u8 i; + + /* Check Storm match */ + if (s_block_defs[block_id]->associated_to_storm && + !qed_grc_is_storm_included(p_hwfn, + (enum dbg_storms)s_block_defs[block_id]->storm_id)) + return false; + + for (i = 0; i < NUM_BIG_RAM_TYPES; i++) + if (mem_group_id == s_big_ram_defs[i].mem_group_id || + mem_group_id == s_big_ram_defs[i].ram_mem_group_id) + return qed_grc_is_included(p_hwfn, + s_big_ram_defs[i].grc_param); + if (mem_group_id == MEM_GROUP_PXP_ILT || mem_group_id == + MEM_GROUP_PXP_MEM) + return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_PXP); + if (mem_group_id == MEM_GROUP_RAM) + return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_RAM); + if (mem_group_id == MEM_GROUP_PBUF) + return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_PBUF); + if (mem_group_id == MEM_GROUP_CAU_MEM || + mem_group_id == MEM_GROUP_CAU_SB || + mem_group_id == MEM_GROUP_CAU_PI) + return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_CAU); + if (mem_group_id == MEM_GROUP_QM_MEM) + return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_QM); + if (mem_group_id == MEM_GROUP_CONN_CFC_MEM || + mem_group_id == MEM_GROUP_TASK_CFC_MEM) + return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_CFC); + if (mem_group_id == MEM_GROUP_IGU_MEM || mem_group_id == + MEM_GROUP_IGU_MSIX) + return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_IGU); + if (mem_group_id == MEM_GROUP_MULD_MEM) + return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_MULD); + if (mem_group_id == MEM_GROUP_PRS_MEM) + return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_PRS); + if (mem_group_id == MEM_GROUP_DMAE_MEM) + return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_DMAE); + if (mem_group_id == MEM_GROUP_TM_MEM) + return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_TM); + if (mem_group_id == MEM_GROUP_SDM_MEM) + return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_SDM); + if (mem_group_id == MEM_GROUP_TDIF_CTX || mem_group_id == + MEM_GROUP_RDIF_CTX) + return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_DIF); + if (mem_group_id == MEM_GROUP_CM_MEM) + return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_CM); + if (mem_group_id == MEM_GROUP_IOR) + return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_IOR); + + return true; +} + +/* Stalls all Storms */ +static void qed_grc_stall_storms(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, bool stall) +{ + u8 reg_val = stall ? 1 : 0; + u8 storm_id; + + for (storm_id = 0; storm_id < MAX_DBG_STORMS; storm_id++) { + if (qed_grc_is_storm_included(p_hwfn, + (enum dbg_storms)storm_id)) { + u32 reg_addr = + s_storm_defs[storm_id].sem_fast_mem_addr + + SEM_FAST_REG_STALL_0; + + qed_wr(p_hwfn, p_ptt, reg_addr, reg_val); + } + } + + msleep(STALL_DELAY_MS); +} + +/* Takes all blocks out of reset */ +static void qed_grc_unreset_blocks(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + u32 reg_val[MAX_DBG_RESET_REGS] = { 0 }; + u32 i; + + /* Fill reset regs values */ + for (i = 0; i < MAX_BLOCK_ID; i++) + if (s_block_defs[i]->has_reset_bit && s_block_defs[i]->unreset) + reg_val[s_block_defs[i]->reset_reg] |= + BIT(s_block_defs[i]->reset_bit_offset); + + /* Write reset registers */ + for (i = 0; i < MAX_DBG_RESET_REGS; i++) { + if (s_reset_regs_defs[i].exists[dev_data->chip_id]) { + reg_val[i] |= s_reset_regs_defs[i].unreset_val; + if (reg_val[i]) + qed_wr(p_hwfn, + p_ptt, + s_reset_regs_defs[i].addr + + RESET_REG_UNRESET_OFFSET, reg_val[i]); + } + } +} + +/* Returns the attention name offsets of the specified block */ +static const struct dbg_attn_block_type_data * +qed_get_block_attn_data(enum block_id block_id, enum dbg_attn_type attn_type) +{ + const struct dbg_attn_block *base_attn_block_arr = + (const struct dbg_attn_block *) + s_dbg_arrays[BIN_BUF_DBG_ATTN_BLOCKS].ptr; + + return &base_attn_block_arr[block_id].per_type_data[attn_type]; +} + +/* Returns the attention registers of the specified block */ +static const struct dbg_attn_reg * +qed_get_block_attn_regs(enum block_id block_id, enum dbg_attn_type attn_type, + u8 *num_attn_regs) +{ + const struct dbg_attn_block_type_data *block_type_data = + qed_get_block_attn_data(block_id, attn_type); + + *num_attn_regs = block_type_data->num_regs; + return &((const struct dbg_attn_reg *) + s_dbg_arrays[BIN_BUF_DBG_ATTN_REGS].ptr)[block_type_data-> + regs_offset]; +} + +/* For each block, clear the status of all parities */ +static void qed_grc_clear_all_prty(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + u8 reg_idx, num_attn_regs; + u32 block_id; + + for (block_id = 0; block_id < MAX_BLOCK_ID; block_id++) { + const struct dbg_attn_reg *attn_reg_arr; + + if (dev_data->block_in_reset[block_id]) + continue; + + attn_reg_arr = qed_get_block_attn_regs((enum block_id)block_id, + ATTN_TYPE_PARITY, + &num_attn_regs); + for (reg_idx = 0; reg_idx < num_attn_regs; reg_idx++) { + const struct dbg_attn_reg *reg_data = + &attn_reg_arr[reg_idx]; + + /* Check mode */ + bool eval_mode = GET_FIELD(reg_data->mode.data, + DBG_MODE_HDR_EVAL_MODE) > 0; + u16 modes_buf_offset = + GET_FIELD(reg_data->mode.data, + DBG_MODE_HDR_MODES_BUF_OFFSET); + + if (!eval_mode || + qed_is_mode_match(p_hwfn, &modes_buf_offset)) + /* Mode match - read parity status read-clear + * register. + */ + qed_rd(p_hwfn, p_ptt, + DWORDS_TO_BYTES(reg_data-> + sts_clr_address)); + } + } +} + +/* Dumps GRC registers section header. Returns the dumped size in dwords. + * The following parameters are dumped: + * - 'count' = num_dumped_entries + * - 'split' = split_type + * - 'id'i = split_id (dumped only if split_id >= 0) + * - 'param_name' = param_val (user param, dumped only if param_name != NULL and + * param_val != NULL) + */ +static u32 qed_grc_dump_regs_hdr(u32 *dump_buf, + bool dump, + u32 num_reg_entries, + const char *split_type, + int split_id, + const char *param_name, const char *param_val) +{ + u8 num_params = 2 + (split_id >= 0 ? 1 : 0) + (param_name ? 1 : 0); + u32 offset = 0; + + offset += qed_dump_section_hdr(dump_buf + offset, + dump, "grc_regs", num_params); + offset += qed_dump_num_param(dump_buf + offset, + dump, "count", num_reg_entries); + offset += qed_dump_str_param(dump_buf + offset, + dump, "split", split_type); + if (split_id >= 0) + offset += qed_dump_num_param(dump_buf + offset, + dump, "id", split_id); + if (param_name && param_val) + offset += qed_dump_str_param(dump_buf + offset, + dump, param_name, param_val); + return offset; +} + +/* Dumps GRC register/memory. Returns the dumped size in dwords. */ +static u32 qed_grc_dump_reg_entry(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 *dump_buf, + bool dump, u32 addr, u32 len) +{ + u32 offset = 0, i; + + if (dump) { + *(dump_buf + offset++) = addr | (len << REG_DUMP_LEN_SHIFT); + for (i = 0; i < len; i++, addr++, offset++) + *(dump_buf + offset) = qed_rd(p_hwfn, + p_ptt, + DWORDS_TO_BYTES(addr)); + } else { + offset += len + 1; + } + + return offset; +} + +/* Dumps GRC registers entries. Returns the dumped size in dwords. */ +static u32 qed_grc_dump_regs_entries(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct dbg_array input_regs_arr, + u32 *dump_buf, + bool dump, + bool block_enable[MAX_BLOCK_ID], + u32 *num_dumped_reg_entries) +{ + u32 i, offset = 0, input_offset = 0; + bool mode_match = true; + + *num_dumped_reg_entries = 0; + while (input_offset < input_regs_arr.size_in_dwords) { + const struct dbg_dump_cond_hdr *cond_hdr = + (const struct dbg_dump_cond_hdr *) + &input_regs_arr.ptr[input_offset++]; + bool eval_mode = GET_FIELD(cond_hdr->mode.data, + DBG_MODE_HDR_EVAL_MODE) > 0; + + /* Check mode/block */ + if (eval_mode) { + u16 modes_buf_offset = + GET_FIELD(cond_hdr->mode.data, + DBG_MODE_HDR_MODES_BUF_OFFSET); + mode_match = qed_is_mode_match(p_hwfn, + &modes_buf_offset); + } + + if (mode_match && block_enable[cond_hdr->block_id]) { + for (i = 0; i < cond_hdr->data_size; + i++, input_offset++) { + const struct dbg_dump_reg *reg = + (const struct dbg_dump_reg *) + &input_regs_arr.ptr[input_offset]; + + offset += + qed_grc_dump_reg_entry(p_hwfn, p_ptt, + dump_buf + offset, dump, + GET_FIELD(reg->data, + DBG_DUMP_REG_ADDRESS), + GET_FIELD(reg->data, + DBG_DUMP_REG_LENGTH)); + (*num_dumped_reg_entries)++; + } + } else { + input_offset += cond_hdr->data_size; + } + } + + return offset; +} + +/* Dumps GRC registers entries. Returns the dumped size in dwords. */ +static u32 qed_grc_dump_split_data(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct dbg_array input_regs_arr, + u32 *dump_buf, + bool dump, + bool block_enable[MAX_BLOCK_ID], + const char *split_type_name, + u32 split_id, + const char *param_name, + const char *param_val) +{ + u32 num_dumped_reg_entries, offset; + + /* Calculate register dump header size (and skip it for now) */ + offset = qed_grc_dump_regs_hdr(dump_buf, + false, + 0, + split_type_name, + split_id, param_name, param_val); + + /* Dump registers */ + offset += qed_grc_dump_regs_entries(p_hwfn, + p_ptt, + input_regs_arr, + dump_buf + offset, + dump, + block_enable, + &num_dumped_reg_entries); + + /* Write register dump header */ + if (dump && num_dumped_reg_entries > 0) + qed_grc_dump_regs_hdr(dump_buf, + dump, + num_dumped_reg_entries, + split_type_name, + split_id, param_name, param_val); + + return num_dumped_reg_entries > 0 ? offset : 0; +} + +/* Dumps registers according to the input registers array. + * Returns the dumped size in dwords. + */ +static u32 qed_grc_dump_registers(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + bool dump, + bool block_enable[MAX_BLOCK_ID], + const char *param_name, const char *param_val) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + u32 offset = 0, input_offset = 0; + u8 port_id, pf_id; + + if (dump) + DP_VERBOSE(p_hwfn, QED_MSG_DEBUG, "Dumping registers...\n"); + while (input_offset < + s_dbg_arrays[BIN_BUF_DBG_DUMP_REG].size_in_dwords) { + const struct dbg_dump_split_hdr *split_hdr = + (const struct dbg_dump_split_hdr *) + &s_dbg_arrays[BIN_BUF_DBG_DUMP_REG].ptr[input_offset++]; + u8 split_type_id = GET_FIELD(split_hdr->hdr, + DBG_DUMP_SPLIT_HDR_SPLIT_TYPE_ID); + u32 split_data_size = GET_FIELD(split_hdr->hdr, + DBG_DUMP_SPLIT_HDR_DATA_SIZE); + struct dbg_array curr_input_regs_arr = { + &s_dbg_arrays[BIN_BUF_DBG_DUMP_REG].ptr[input_offset], + split_data_size}; + + switch (split_type_id) { + case SPLIT_TYPE_NONE: + case SPLIT_TYPE_VF: + offset += qed_grc_dump_split_data(p_hwfn, + p_ptt, + curr_input_regs_arr, + dump_buf + offset, + dump, + block_enable, + "eng", + (u32)(-1), + param_name, + param_val); + break; + case SPLIT_TYPE_PORT: + for (port_id = 0; + port_id < + s_chip_defs[dev_data->chip_id]. + per_platform[dev_data->platform_id].num_ports; + port_id++) { + if (dump) + qed_port_pretend(p_hwfn, p_ptt, + port_id); + offset += + qed_grc_dump_split_data(p_hwfn, p_ptt, + curr_input_regs_arr, + dump_buf + offset, + dump, block_enable, + "port", port_id, + param_name, + param_val); + } + break; + case SPLIT_TYPE_PF: + case SPLIT_TYPE_PORT_PF: + for (pf_id = 0; + pf_id < + s_chip_defs[dev_data->chip_id]. + per_platform[dev_data->platform_id].num_pfs; + pf_id++) { + if (dump) + qed_fid_pretend(p_hwfn, p_ptt, pf_id); + offset += qed_grc_dump_split_data(p_hwfn, + p_ptt, + curr_input_regs_arr, + dump_buf + offset, + dump, block_enable, + "pf", pf_id, param_name, + param_val); + } + break; + default: + break; + } + + input_offset += split_data_size; + } + + /* Pretend to original PF */ + if (dump) + qed_fid_pretend(p_hwfn, p_ptt, p_hwfn->rel_pf_id); + return offset; +} + +/* Dump reset registers. Returns the dumped size in dwords. */ +static u32 qed_grc_dump_reset_regs(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, bool dump) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + u32 i, offset = 0, num_regs = 0; + + /* Calculate header size */ + offset += qed_grc_dump_regs_hdr(dump_buf, + false, 0, "eng", -1, NULL, NULL); + + /* Write reset registers */ + for (i = 0; i < MAX_DBG_RESET_REGS; i++) { + if (s_reset_regs_defs[i].exists[dev_data->chip_id]) { + offset += qed_grc_dump_reg_entry(p_hwfn, + p_ptt, + dump_buf + offset, + dump, + BYTES_TO_DWORDS + (s_reset_regs_defs + [i].addr), 1); + num_regs++; + } + } + + /* Write header */ + if (dump) + qed_grc_dump_regs_hdr(dump_buf, + true, num_regs, "eng", -1, NULL, NULL); + return offset; +} + +/* Dump registers that are modified during GRC Dump and therefore must be dumped + * first. Returns the dumped size in dwords. + */ +static u32 qed_grc_dump_modified_regs(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, bool dump) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + u32 offset = 0, num_reg_entries = 0, block_id; + u8 storm_id, reg_idx, num_attn_regs; + + /* Calculate header size */ + offset += qed_grc_dump_regs_hdr(dump_buf, + false, 0, "eng", -1, NULL, NULL); + + /* Write parity registers */ + for (block_id = 0; block_id < MAX_BLOCK_ID; block_id++) { + const struct dbg_attn_reg *attn_reg_arr; + + if (dev_data->block_in_reset[block_id] && dump) + continue; + + attn_reg_arr = qed_get_block_attn_regs((enum block_id)block_id, + ATTN_TYPE_PARITY, + &num_attn_regs); + for (reg_idx = 0; reg_idx < num_attn_regs; reg_idx++) { + const struct dbg_attn_reg *reg_data = + &attn_reg_arr[reg_idx]; + u16 modes_buf_offset; + bool eval_mode; + + /* Check mode */ + eval_mode = GET_FIELD(reg_data->mode.data, + DBG_MODE_HDR_EVAL_MODE) > 0; + modes_buf_offset = + GET_FIELD(reg_data->mode.data, + DBG_MODE_HDR_MODES_BUF_OFFSET); + if (!eval_mode || + qed_is_mode_match(p_hwfn, &modes_buf_offset)) { + /* Mode match - read and dump registers */ + offset += qed_grc_dump_reg_entry(p_hwfn, + p_ptt, + dump_buf + offset, + dump, + reg_data->mask_address, + 1); + offset += qed_grc_dump_reg_entry(p_hwfn, + p_ptt, + dump_buf + offset, + dump, + GET_FIELD(reg_data->data, + DBG_ATTN_REG_STS_ADDRESS), + 1); + num_reg_entries += 2; + } + } + } + + /* Write storm stall status registers */ + for (storm_id = 0; storm_id < MAX_DBG_STORMS; storm_id++) { + if (dev_data->block_in_reset[s_storm_defs[storm_id].block_id] && + dump) + continue; + + offset += qed_grc_dump_reg_entry(p_hwfn, + p_ptt, + dump_buf + offset, + dump, + BYTES_TO_DWORDS(s_storm_defs[storm_id]. + sem_fast_mem_addr + + SEM_FAST_REG_STALLED), + 1); + num_reg_entries++; + } + + /* Write header */ + if (dump) + qed_grc_dump_regs_hdr(dump_buf, + true, + num_reg_entries, "eng", -1, NULL, NULL); + return offset; +} + +/* Dumps a GRC memory header (section and params). + * The following parameters are dumped: + * name - name is dumped only if it's not NULL. + * addr - byte_addr is dumped only if name is NULL. + * len - dword_len is always dumped. + * width - bit_width is dumped if it's not zero. + * packed - packed=1 is dumped if it's not false. + * mem_group - mem_group is always dumped. + * is_storm - true only if the memory is related to a Storm. + * storm_letter - storm letter (valid only if is_storm is true). + * Returns the dumped size in dwords. + */ +static u32 qed_grc_dump_mem_hdr(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + bool dump, + const char *name, + u32 byte_addr, + u32 dword_len, + u32 bit_width, + bool packed, + const char *mem_group, + bool is_storm, char storm_letter) +{ + u8 num_params = 3; + u32 offset = 0; + char buf[64]; + + if (!dword_len) + DP_NOTICE(p_hwfn, + "Unexpected GRC Dump error: dumped memory size must be non-zero\n"); + if (bit_width) + num_params++; + if (packed) + num_params++; + + /* Dump section header */ + offset += qed_dump_section_hdr(dump_buf + offset, + dump, "grc_mem", num_params); + if (name) { + /* Dump name */ + if (is_storm) { + strcpy(buf, "?STORM_"); + buf[0] = storm_letter; + strcpy(buf + strlen(buf), name); + } else { + strcpy(buf, name); + } + + offset += qed_dump_str_param(dump_buf + offset, + dump, "name", buf); + if (dump) + DP_VERBOSE(p_hwfn, + QED_MSG_DEBUG, + "Dumping %d registers from %s...\n", + dword_len, buf); + } else { + /* Dump address */ + offset += qed_dump_num_param(dump_buf + offset, + dump, "addr", byte_addr); + if (dump && dword_len > 64) + DP_VERBOSE(p_hwfn, + QED_MSG_DEBUG, + "Dumping %d registers from address 0x%x...\n", + dword_len, byte_addr); + } + + /* Dump len */ + offset += qed_dump_num_param(dump_buf + offset, dump, "len", dword_len); + + /* Dump bit width */ + if (bit_width) + offset += qed_dump_num_param(dump_buf + offset, + dump, "width", bit_width); + + /* Dump packed */ + if (packed) + offset += qed_dump_num_param(dump_buf + offset, + dump, "packed", 1); + + /* Dump reg type */ + if (is_storm) { + strcpy(buf, "?STORM_"); + buf[0] = storm_letter; + strcpy(buf + strlen(buf), mem_group); + } else { + strcpy(buf, mem_group); + } + + offset += qed_dump_str_param(dump_buf + offset, dump, "type", buf); + return offset; +} + +/* Dumps a single GRC memory. If name is NULL, the memory is stored by address. + * Returns the dumped size in dwords. + */ +static u32 qed_grc_dump_mem(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + bool dump, + const char *name, + u32 byte_addr, + u32 dword_len, + u32 bit_width, + bool packed, + const char *mem_group, + bool is_storm, char storm_letter) +{ + u32 offset = 0; + + offset += qed_grc_dump_mem_hdr(p_hwfn, + dump_buf + offset, + dump, + name, + byte_addr, + dword_len, + bit_width, + packed, + mem_group, is_storm, storm_letter); + if (dump) { + u32 i; + + for (i = 0; i < dword_len; + i++, byte_addr += BYTES_IN_DWORD, offset++) + *(dump_buf + offset) = qed_rd(p_hwfn, p_ptt, byte_addr); + } else { + offset += dword_len; + } + + return offset; +} + +/* Dumps GRC memories entries. Returns the dumped size in dwords. */ +static u32 qed_grc_dump_mem_entries(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct dbg_array input_mems_arr, + u32 *dump_buf, bool dump) +{ + u32 i, offset = 0, input_offset = 0; + bool mode_match = true; + + while (input_offset < input_mems_arr.size_in_dwords) { + const struct dbg_dump_cond_hdr *cond_hdr; + u32 num_entries; + bool eval_mode; + + cond_hdr = (const struct dbg_dump_cond_hdr *) + &input_mems_arr.ptr[input_offset++]; + eval_mode = GET_FIELD(cond_hdr->mode.data, + DBG_MODE_HDR_EVAL_MODE) > 0; + + /* Check required mode */ + if (eval_mode) { + u16 modes_buf_offset = + GET_FIELD(cond_hdr->mode.data, + DBG_MODE_HDR_MODES_BUF_OFFSET); + + mode_match = qed_is_mode_match(p_hwfn, + &modes_buf_offset); + } + + if (!mode_match) { + input_offset += cond_hdr->data_size; + continue; + } + + num_entries = cond_hdr->data_size / MEM_DUMP_ENTRY_SIZE_DWORDS; + for (i = 0; i < num_entries; + i++, input_offset += MEM_DUMP_ENTRY_SIZE_DWORDS) { + const struct dbg_dump_mem *mem = + (const struct dbg_dump_mem *) + &input_mems_arr.ptr[input_offset]; + u8 mem_group_id; + + mem_group_id = GET_FIELD(mem->dword0, + DBG_DUMP_MEM_MEM_GROUP_ID); + if (mem_group_id >= MEM_GROUPS_NUM) { + DP_NOTICE(p_hwfn, "Invalid mem_group_id\n"); + return 0; + } + + if (qed_grc_is_mem_included(p_hwfn, + (enum block_id)cond_hdr->block_id, + mem_group_id)) { + u32 mem_byte_addr = + DWORDS_TO_BYTES(GET_FIELD(mem->dword0, + DBG_DUMP_MEM_ADDRESS)); + u32 mem_len = GET_FIELD(mem->dword1, + DBG_DUMP_MEM_LENGTH); + char storm_letter = 'a'; + bool is_storm = false; + + /* Update memory length for CCFC/TCFC memories + * according to number of LCIDs/LTIDs. + */ + if (mem_group_id == MEM_GROUP_CONN_CFC_MEM) + mem_len = qed_grc_get_param(p_hwfn, + DBG_GRC_PARAM_NUM_LCIDS) + * (mem_len / MAX_LCIDS); + else if (mem_group_id == MEM_GROUP_TASK_CFC_MEM) + mem_len = qed_grc_get_param(p_hwfn, + DBG_GRC_PARAM_NUM_LTIDS) + * (mem_len / MAX_LTIDS); + + /* If memory is associated with Storm, update + * Storm details. + */ + if (s_block_defs[cond_hdr->block_id]-> + associated_to_storm) { + is_storm = true; + storm_letter = + s_storm_defs[s_block_defs[ + cond_hdr->block_id]-> + storm_id].letter; + } + + /* Dump memory */ + offset += qed_grc_dump_mem(p_hwfn, p_ptt, + dump_buf + offset, dump, NULL, + mem_byte_addr, mem_len, 0, + false, + s_mem_group_names[mem_group_id], + is_storm, storm_letter); + } + } + } + + return offset; +} + +/* Dumps GRC memories according to the input array dump_mem. + * Returns the dumped size in dwords. + */ +static u32 qed_grc_dump_memories(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, bool dump) +{ + u32 offset = 0, input_offset = 0; + + while (input_offset < + s_dbg_arrays[BIN_BUF_DBG_DUMP_MEM].size_in_dwords) { + const struct dbg_dump_split_hdr *split_hdr = + (const struct dbg_dump_split_hdr *) + &s_dbg_arrays[BIN_BUF_DBG_DUMP_MEM].ptr[input_offset++]; + u8 split_type_id = GET_FIELD(split_hdr->hdr, + DBG_DUMP_SPLIT_HDR_SPLIT_TYPE_ID); + u32 split_data_size = GET_FIELD(split_hdr->hdr, + DBG_DUMP_SPLIT_HDR_DATA_SIZE); + struct dbg_array curr_input_mems_arr = { + &s_dbg_arrays[BIN_BUF_DBG_DUMP_MEM].ptr[input_offset], + split_data_size}; + + switch (split_type_id) { + case SPLIT_TYPE_NONE: + offset += qed_grc_dump_mem_entries(p_hwfn, + p_ptt, + curr_input_mems_arr, + dump_buf + offset, + dump); + break; + default: + DP_NOTICE(p_hwfn, + "Dumping split memories is currently not supported\n"); + break; + } + + input_offset += split_data_size; + } + + return offset; +} + +/* Dumps GRC context data for the specified Storm. + * Returns the dumped size in dwords. + */ +static u32 qed_grc_dump_ctx_data(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + bool dump, + const char *name, + u32 num_lids, + u32 lid_size, + u32 rd_reg_addr, + u8 storm_id) +{ + u32 i, lid, total_size; + u32 offset = 0; + + if (!lid_size) + return 0; + lid_size *= BYTES_IN_DWORD; + total_size = num_lids * lid_size; + offset += qed_grc_dump_mem_hdr(p_hwfn, + dump_buf + offset, + dump, + name, + 0, + total_size, + lid_size * 32, + false, + name, + true, s_storm_defs[storm_id].letter); + + /* Dump context data */ + if (dump) { + for (lid = 0; lid < num_lids; lid++) { + for (i = 0; i < lid_size; i++, offset++) { + qed_wr(p_hwfn, + p_ptt, + s_storm_defs[storm_id].cm_ctx_wr_addr, + BIT(9) | lid); + *(dump_buf + offset) = qed_rd(p_hwfn, + p_ptt, + rd_reg_addr); + } + } + } else { + offset += total_size; + } + + return offset; +} + +/* Dumps GRC contexts. Returns the dumped size in dwords. */ +static u32 qed_grc_dump_ctx(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 *dump_buf, bool dump) +{ + u32 offset = 0; + u8 storm_id; + + for (storm_id = 0; storm_id < MAX_DBG_STORMS; storm_id++) { + if (!qed_grc_is_storm_included(p_hwfn, + (enum dbg_storms)storm_id)) + continue; + + /* Dump Conn AG context size */ + offset += + qed_grc_dump_ctx_data(p_hwfn, + p_ptt, + dump_buf + offset, + dump, + "CONN_AG_CTX", + qed_grc_get_param(p_hwfn, + DBG_GRC_PARAM_NUM_LCIDS), + s_storm_defs[storm_id]. + cm_conn_ag_ctx_lid_size, + s_storm_defs[storm_id]. + cm_conn_ag_ctx_rd_addr, + storm_id); + + /* Dump Conn ST context size */ + offset += + qed_grc_dump_ctx_data(p_hwfn, + p_ptt, + dump_buf + offset, + dump, + "CONN_ST_CTX", + qed_grc_get_param(p_hwfn, + DBG_GRC_PARAM_NUM_LCIDS), + s_storm_defs[storm_id]. + cm_conn_st_ctx_lid_size, + s_storm_defs[storm_id]. + cm_conn_st_ctx_rd_addr, + storm_id); + + /* Dump Task AG context size */ + offset += + qed_grc_dump_ctx_data(p_hwfn, + p_ptt, + dump_buf + offset, + dump, + "TASK_AG_CTX", + qed_grc_get_param(p_hwfn, + DBG_GRC_PARAM_NUM_LTIDS), + s_storm_defs[storm_id]. + cm_task_ag_ctx_lid_size, + s_storm_defs[storm_id]. + cm_task_ag_ctx_rd_addr, + storm_id); + + /* Dump Task ST context size */ + offset += + qed_grc_dump_ctx_data(p_hwfn, + p_ptt, + dump_buf + offset, + dump, + "TASK_ST_CTX", + qed_grc_get_param(p_hwfn, + DBG_GRC_PARAM_NUM_LTIDS), + s_storm_defs[storm_id]. + cm_task_st_ctx_lid_size, + s_storm_defs[storm_id]. + cm_task_st_ctx_rd_addr, + storm_id); + } + + return offset; +} + +/* Dumps GRC IORs data. Returns the dumped size in dwords. */ +static u32 qed_grc_dump_iors(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 *dump_buf, bool dump) +{ + char buf[10] = "IOR_SET_?"; + u8 storm_id, set_id; + u32 offset = 0; + + for (storm_id = 0; storm_id < MAX_DBG_STORMS; storm_id++) { + if (qed_grc_is_storm_included(p_hwfn, + (enum dbg_storms)storm_id)) { + for (set_id = 0; set_id < NUM_IOR_SETS; set_id++) { + u32 addr = + s_storm_defs[storm_id].sem_fast_mem_addr + + SEM_FAST_REG_STORM_REG_FILE + + DWORDS_TO_BYTES(IOR_SET_OFFSET(set_id)); + + buf[strlen(buf) - 1] = '0' + set_id; + offset += qed_grc_dump_mem(p_hwfn, + p_ptt, + dump_buf + offset, + dump, + buf, + addr, + IORS_PER_SET, + 32, + false, + "ior", + true, + s_storm_defs + [storm_id].letter); + } + } + } + + return offset; +} + +/* Dump VFC CAM. Returns the dumped size in dwords. */ +static u32 qed_grc_dump_vfc_cam(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, bool dump, u8 storm_id) +{ + u32 total_size = VFC_CAM_NUM_ROWS * VFC_CAM_RESP_DWORDS; + u32 cam_addr[VFC_CAM_ADDR_DWORDS] = { 0 }; + u32 cam_cmd[VFC_CAM_CMD_DWORDS] = { 0 }; + u32 offset = 0; + u32 row, i; + + offset += qed_grc_dump_mem_hdr(p_hwfn, + dump_buf + offset, + dump, + "vfc_cam", + 0, + total_size, + 256, + false, + "vfc_cam", + true, s_storm_defs[storm_id].letter); + if (dump) { + /* Prepare CAM address */ + SET_VAR_FIELD(cam_addr, VFC_CAM_ADDR, OP, VFC_OPCODE_CAM_RD); + for (row = 0; row < VFC_CAM_NUM_ROWS; + row++, offset += VFC_CAM_RESP_DWORDS) { + /* Write VFC CAM command */ + SET_VAR_FIELD(cam_cmd, VFC_CAM_CMD, ROW, row); + ARR_REG_WR(p_hwfn, + p_ptt, + s_storm_defs[storm_id].sem_fast_mem_addr + + SEM_FAST_REG_VFC_DATA_WR, + cam_cmd, VFC_CAM_CMD_DWORDS); + + /* Write VFC CAM address */ + ARR_REG_WR(p_hwfn, + p_ptt, + s_storm_defs[storm_id].sem_fast_mem_addr + + SEM_FAST_REG_VFC_ADDR, + cam_addr, VFC_CAM_ADDR_DWORDS); + + /* Read VFC CAM read response */ + ARR_REG_RD(p_hwfn, + p_ptt, + s_storm_defs[storm_id].sem_fast_mem_addr + + SEM_FAST_REG_VFC_DATA_RD, + dump_buf + offset, VFC_CAM_RESP_DWORDS); + } + } else { + offset += total_size; + } + + return offset; +} + +/* Dump VFC RAM. Returns the dumped size in dwords. */ +static u32 qed_grc_dump_vfc_ram(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + bool dump, + u8 storm_id, struct vfc_ram_defs *ram_defs) +{ + u32 total_size = ram_defs->num_rows * VFC_RAM_RESP_DWORDS; + u32 ram_addr[VFC_RAM_ADDR_DWORDS] = { 0 }; + u32 ram_cmd[VFC_RAM_CMD_DWORDS] = { 0 }; + u32 offset = 0; + u32 row, i; + + offset += qed_grc_dump_mem_hdr(p_hwfn, + dump_buf + offset, + dump, + ram_defs->mem_name, + 0, + total_size, + 256, + false, + ram_defs->type_name, + true, s_storm_defs[storm_id].letter); + + /* Prepare RAM address */ + SET_VAR_FIELD(ram_addr, VFC_RAM_ADDR, OP, VFC_OPCODE_RAM_RD); + + if (!dump) + return offset + total_size; + + for (row = ram_defs->base_row; + row < ram_defs->base_row + ram_defs->num_rows; + row++, offset += VFC_RAM_RESP_DWORDS) { + /* Write VFC RAM command */ + ARR_REG_WR(p_hwfn, + p_ptt, + s_storm_defs[storm_id].sem_fast_mem_addr + + SEM_FAST_REG_VFC_DATA_WR, + ram_cmd, VFC_RAM_CMD_DWORDS); + + /* Write VFC RAM address */ + SET_VAR_FIELD(ram_addr, VFC_RAM_ADDR, ROW, row); + ARR_REG_WR(p_hwfn, + p_ptt, + s_storm_defs[storm_id].sem_fast_mem_addr + + SEM_FAST_REG_VFC_ADDR, + ram_addr, VFC_RAM_ADDR_DWORDS); + + /* Read VFC RAM read response */ + ARR_REG_RD(p_hwfn, + p_ptt, + s_storm_defs[storm_id].sem_fast_mem_addr + + SEM_FAST_REG_VFC_DATA_RD, + dump_buf + offset, VFC_RAM_RESP_DWORDS); + } + + return offset; +} + +/* Dumps GRC VFC data. Returns the dumped size in dwords. */ +static u32 qed_grc_dump_vfc(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 *dump_buf, bool dump) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + u8 storm_id, i; + u32 offset = 0; + + for (storm_id = 0; storm_id < MAX_DBG_STORMS; storm_id++) { + if (qed_grc_is_storm_included(p_hwfn, + (enum dbg_storms)storm_id) && + s_storm_defs[storm_id].has_vfc && + (storm_id != DBG_PSTORM_ID || + dev_data->platform_id == PLATFORM_ASIC)) { + /* Read CAM */ + offset += qed_grc_dump_vfc_cam(p_hwfn, + p_ptt, + dump_buf + offset, + dump, storm_id); + + /* Read RAM */ + for (i = 0; i < NUM_VFC_RAM_TYPES; i++) + offset += qed_grc_dump_vfc_ram(p_hwfn, + p_ptt, + dump_buf + + offset, + dump, + storm_id, + &s_vfc_ram_defs + [i]); + } + } + + return offset; +} + +/* Dumps GRC RSS data. Returns the dumped size in dwords. */ +static u32 qed_grc_dump_rss(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 *dump_buf, bool dump) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + u32 offset = 0; + u8 rss_mem_id; + + for (rss_mem_id = 0; rss_mem_id < NUM_RSS_MEM_TYPES; rss_mem_id++) { + struct rss_mem_defs *rss_defs = &s_rss_mem_defs[rss_mem_id]; + u32 num_entries = rss_defs->num_entries[dev_data->chip_id]; + u32 entry_width = rss_defs->entry_width[dev_data->chip_id]; + u32 total_size = (num_entries * entry_width) / 32; + bool packed = (entry_width == 16); + u32 addr = rss_defs->addr; + u32 i, j; + + offset += qed_grc_dump_mem_hdr(p_hwfn, + dump_buf + offset, + dump, + rss_defs->mem_name, + addr, + total_size, + entry_width, + packed, + rss_defs->type_name, false, 0); + + if (!dump) { + offset += total_size; + continue; + } + + /* Dump RSS data */ + for (i = 0; i < BYTES_TO_DWORDS(total_size); i++, addr++) { + qed_wr(p_hwfn, p_ptt, RSS_REG_RSS_RAM_ADDR, addr); + for (j = 0; j < BYTES_IN_DWORD; j++, offset++) + *(dump_buf + offset) = + qed_rd(p_hwfn, p_ptt, + RSS_REG_RSS_RAM_DATA + + DWORDS_TO_BYTES(j)); + } + } + + return offset; +} + +/* Dumps GRC Big RAM. Returns the dumped size in dwords. */ +static u32 qed_grc_dump_big_ram(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, bool dump, u8 big_ram_id) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + char mem_name[12] = "???_BIG_RAM"; + char type_name[8] = "???_RAM"; + u32 ram_size, total_blocks; + u32 offset = 0, i, j; + + total_blocks = + s_big_ram_defs[big_ram_id].num_of_blocks[dev_data->chip_id]; + ram_size = total_blocks * BIG_RAM_BLOCK_SIZE_DWORDS; + + strncpy(type_name, s_big_ram_defs[big_ram_id].instance_name, + strlen(s_big_ram_defs[big_ram_id].instance_name)); + strncpy(mem_name, s_big_ram_defs[big_ram_id].instance_name, + strlen(s_big_ram_defs[big_ram_id].instance_name)); + + /* Dump memory header */ + offset += qed_grc_dump_mem_hdr(p_hwfn, + dump_buf + offset, + dump, + mem_name, + 0, + ram_size, + BIG_RAM_BLOCK_SIZE_BYTES * 8, + false, type_name, false, 0); + + if (!dump) + return offset + ram_size; + + /* Read and dump Big RAM data */ + for (i = 0; i < total_blocks / 2; i++) { + qed_wr(p_hwfn, p_ptt, s_big_ram_defs[big_ram_id].addr_reg_addr, + i); + for (j = 0; j < 2 * BIG_RAM_BLOCK_SIZE_DWORDS; j++, offset++) + *(dump_buf + offset) = qed_rd(p_hwfn, p_ptt, + s_big_ram_defs[big_ram_id]. + data_reg_addr + + DWORDS_TO_BYTES(j)); + } + + return offset; +} + +static u32 qed_grc_dump_mcp(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 *dump_buf, bool dump) +{ + bool block_enable[MAX_BLOCK_ID] = { 0 }; + bool halted = false; + u32 offset = 0; + + /* Halt MCP */ + if (dump) { + halted = !qed_mcp_halt(p_hwfn, p_ptt); + if (!halted) + DP_NOTICE(p_hwfn, "MCP halt failed!\n"); + } + + /* Dump MCP scratchpad */ + offset += qed_grc_dump_mem(p_hwfn, + p_ptt, + dump_buf + offset, + dump, + NULL, + MCP_REG_SCRATCH, + MCP_REG_SCRATCH_SIZE, + 0, false, "MCP", false, 0); + + /* Dump MCP cpu_reg_file */ + offset += qed_grc_dump_mem(p_hwfn, + p_ptt, + dump_buf + offset, + dump, + NULL, + MCP_REG_CPU_REG_FILE, + MCP_REG_CPU_REG_FILE_SIZE, + 0, false, "MCP", false, 0); + + /* Dump MCP registers */ + block_enable[BLOCK_MCP] = true; + offset += qed_grc_dump_registers(p_hwfn, + p_ptt, + dump_buf + offset, + dump, block_enable, "block", "MCP"); + + /* Dump required non-MCP registers */ + offset += qed_grc_dump_regs_hdr(dump_buf + offset, + dump, 1, "eng", -1, "block", "MCP"); + offset += qed_grc_dump_reg_entry(p_hwfn, + p_ptt, + dump_buf + offset, + dump, + BYTES_TO_DWORDS + (MISC_REG_SHARED_MEM_ADDR), 1); + + /* Release MCP */ + if (halted && qed_mcp_resume(p_hwfn, p_ptt)) + DP_NOTICE(p_hwfn, "Failed to resume MCP after halt!\n"); + return offset; +} + +/* Dumps the tbus indirect memory for all PHYs. */ +static u32 qed_grc_dump_phy(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 *dump_buf, bool dump) +{ + u32 offset = 0, tbus_lo_offset, tbus_hi_offset; + char mem_name[32]; + u8 phy_id; + + for (phy_id = 0; phy_id < ARRAY_SIZE(s_phy_defs); phy_id++) { + struct phy_defs *phy_defs = &s_phy_defs[phy_id]; + int printed_chars; + + printed_chars = snprintf(mem_name, sizeof(mem_name), "tbus_%s", + phy_defs->phy_name); + if (printed_chars < 0 || printed_chars >= sizeof(mem_name)) + DP_NOTICE(p_hwfn, + "Unexpected debug error: invalid PHY memory name\n"); + offset += qed_grc_dump_mem_hdr(p_hwfn, + dump_buf + offset, + dump, + mem_name, + 0, + PHY_DUMP_SIZE_DWORDS, + 16, true, mem_name, false, 0); + if (dump) { + u32 addr_lo_addr = phy_defs->base_addr + + phy_defs->tbus_addr_lo_addr; + u32 addr_hi_addr = phy_defs->base_addr + + phy_defs->tbus_addr_hi_addr; + u32 data_lo_addr = phy_defs->base_addr + + phy_defs->tbus_data_lo_addr; + u32 data_hi_addr = phy_defs->base_addr + + phy_defs->tbus_data_hi_addr; + u8 *bytes_buf = (u8 *)(dump_buf + offset); + + for (tbus_hi_offset = 0; + tbus_hi_offset < (NUM_PHY_TBUS_ADDRESSES >> 8); + tbus_hi_offset++) { + qed_wr(p_hwfn, + p_ptt, addr_hi_addr, tbus_hi_offset); + for (tbus_lo_offset = 0; tbus_lo_offset < 256; + tbus_lo_offset++) { + qed_wr(p_hwfn, + p_ptt, + addr_lo_addr, tbus_lo_offset); + *(bytes_buf++) = + (u8)qed_rd(p_hwfn, p_ptt, + data_lo_addr); + *(bytes_buf++) = + (u8)qed_rd(p_hwfn, p_ptt, + data_hi_addr); + } + } + } + + offset += PHY_DUMP_SIZE_DWORDS; + } + + return offset; +} + +static void qed_config_dbg_line(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + enum block_id block_id, + u8 line_id, + u8 cycle_en, + u8 right_shift, u8 force_valid, u8 force_frame) +{ + struct block_defs *p_block_defs = s_block_defs[block_id]; + + qed_wr(p_hwfn, p_ptt, p_block_defs->dbg_select_addr, line_id); + qed_wr(p_hwfn, p_ptt, p_block_defs->dbg_cycle_enable_addr, cycle_en); + qed_wr(p_hwfn, p_ptt, p_block_defs->dbg_shift_addr, right_shift); + qed_wr(p_hwfn, p_ptt, p_block_defs->dbg_force_valid_addr, force_valid); + qed_wr(p_hwfn, p_ptt, p_block_defs->dbg_force_frame_addr, force_frame); +} + +/* Dumps Static Debug data. Returns the dumped size in dwords. */ +static u32 qed_grc_dump_static_debug(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, bool dump) +{ + u32 block_dwords = NUM_DBG_BUS_LINES * STATIC_DEBUG_LINE_DWORDS; + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + u32 offset = 0, block_id, line_id, addr, i; + struct block_defs *p_block_defs; + + if (dump) { + DP_VERBOSE(p_hwfn, + QED_MSG_DEBUG, "Dumping static debug data...\n"); + + /* Disable all blocks debug output */ + for (block_id = 0; block_id < MAX_BLOCK_ID; block_id++) { + p_block_defs = s_block_defs[block_id]; + + if (p_block_defs->has_dbg_bus[dev_data->chip_id]) + qed_wr(p_hwfn, p_ptt, + p_block_defs->dbg_cycle_enable_addr, 0); + } + + qed_bus_reset_dbg_block(p_hwfn, p_ptt); + qed_bus_set_framing_mode(p_hwfn, + p_ptt, DBG_BUS_FRAME_MODE_8HW_0ST); + qed_wr(p_hwfn, + p_ptt, DBG_REG_DEBUG_TARGET, DBG_BUS_TARGET_ID_INT_BUF); + qed_wr(p_hwfn, p_ptt, DBG_REG_FULL_MODE, 1); + qed_bus_enable_dbg_block(p_hwfn, p_ptt, true); + } + + /* Dump all static debug lines for each relevant block */ + for (block_id = 0; block_id < MAX_BLOCK_ID; block_id++) { + p_block_defs = s_block_defs[block_id]; + + if (!p_block_defs->has_dbg_bus[dev_data->chip_id]) + continue; + + /* Dump static section params */ + offset += qed_grc_dump_mem_hdr(p_hwfn, + dump_buf + offset, + dump, + p_block_defs->name, 0, + block_dwords, 32, false, + "STATIC", false, 0); + + if (dump && !dev_data->block_in_reset[block_id]) { + u8 dbg_client_id = + p_block_defs->dbg_client_id[dev_data->chip_id]; + + /* Enable block's client */ + qed_bus_enable_clients(p_hwfn, p_ptt, + BIT(dbg_client_id)); + + for (line_id = 0; line_id < NUM_DBG_BUS_LINES; + line_id++) { + /* Configure debug line ID */ + qed_config_dbg_line(p_hwfn, + p_ptt, + (enum block_id)block_id, + (u8)line_id, + 0xf, 0, 0, 0); + + /* Read debug line info */ + for (i = 0, addr = DBG_REG_CALENDAR_OUT_DATA; + i < STATIC_DEBUG_LINE_DWORDS; + i++, offset++, addr += BYTES_IN_DWORD) + dump_buf[offset] = qed_rd(p_hwfn, p_ptt, + addr); + } + + /* Disable block's client and debug output */ + qed_bus_enable_clients(p_hwfn, p_ptt, 0); + qed_wr(p_hwfn, p_ptt, + p_block_defs->dbg_cycle_enable_addr, 0); + } else { + /* All lines are invalid - dump zeros */ + if (dump) + memset(dump_buf + offset, 0, + DWORDS_TO_BYTES(block_dwords)); + offset += block_dwords; + } + } + + if (dump) { + qed_bus_enable_dbg_block(p_hwfn, p_ptt, false); + qed_bus_enable_clients(p_hwfn, p_ptt, 0); + } + + return offset; +} + +/* Performs GRC Dump to the specified buffer. + * Returns the dumped size in dwords. + */ +static enum dbg_status qed_grc_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + bool dump, u32 *num_dumped_dwords) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + bool parities_masked = false; + u8 i, port_mode = 0; + u32 offset = 0; + + /* Check if emulation platform */ + *num_dumped_dwords = 0; + + /* Fill GRC parameters that were not set by the user with their default + * value. + */ + qed_dbg_grc_set_params_default(p_hwfn); + + /* Find port mode */ + if (dump) { + switch (qed_rd(p_hwfn, p_ptt, MISC_REG_PORT_MODE)) { + case 0: + port_mode = 1; + break; + case 1: + port_mode = 2; + break; + case 2: + port_mode = 4; + break; + } + } + + /* Update reset state */ + if (dump) + qed_update_blocks_reset_state(p_hwfn, p_ptt); + + /* Dump global params */ + offset += qed_dump_common_global_params(p_hwfn, + p_ptt, + dump_buf + offset, dump, 4); + offset += qed_dump_str_param(dump_buf + offset, + dump, "dump-type", "grc-dump"); + offset += qed_dump_num_param(dump_buf + offset, + dump, + "num-lcids", + qed_grc_get_param(p_hwfn, + DBG_GRC_PARAM_NUM_LCIDS)); + offset += qed_dump_num_param(dump_buf + offset, + dump, + "num-ltids", + qed_grc_get_param(p_hwfn, + DBG_GRC_PARAM_NUM_LTIDS)); + offset += qed_dump_num_param(dump_buf + offset, + dump, "num-ports", port_mode); + + /* Dump reset registers (dumped before taking blocks out of reset ) */ + if (qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_REGS)) + offset += qed_grc_dump_reset_regs(p_hwfn, + p_ptt, + dump_buf + offset, dump); + + /* Take all blocks out of reset (using reset registers) */ + if (dump) { + qed_grc_unreset_blocks(p_hwfn, p_ptt); + qed_update_blocks_reset_state(p_hwfn, p_ptt); + } + + /* Disable all parities using MFW command */ + if (dump) { + parities_masked = !qed_mcp_mask_parities(p_hwfn, p_ptt, 1); + if (!parities_masked) { + if (qed_grc_get_param + (p_hwfn, DBG_GRC_PARAM_PARITY_SAFE)) + return DBG_STATUS_MCP_COULD_NOT_MASK_PRTY; + else + DP_NOTICE(p_hwfn, + "Failed to mask parities using MFW\n"); + } + } + + /* Dump modified registers (dumped before modifying them) */ + if (qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_REGS)) + offset += qed_grc_dump_modified_regs(p_hwfn, + p_ptt, + dump_buf + offset, dump); + + /* Stall storms */ + if (dump && + (qed_grc_is_included(p_hwfn, + DBG_GRC_PARAM_DUMP_IOR) || + qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_VFC))) + qed_grc_stall_storms(p_hwfn, p_ptt, true); + + /* Dump all regs */ + if (qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_REGS)) { + /* Dump all blocks except MCP */ + bool block_enable[MAX_BLOCK_ID]; + + for (i = 0; i < MAX_BLOCK_ID; i++) + block_enable[i] = true; + block_enable[BLOCK_MCP] = false; + offset += qed_grc_dump_registers(p_hwfn, + p_ptt, + dump_buf + + offset, + dump, + block_enable, NULL, NULL); + } + + /* Dump memories */ + offset += qed_grc_dump_memories(p_hwfn, p_ptt, dump_buf + offset, dump); + + /* Dump MCP */ + if (qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_MCP)) + offset += qed_grc_dump_mcp(p_hwfn, + p_ptt, dump_buf + offset, dump); + + /* Dump context */ + if (qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_CM_CTX)) + offset += qed_grc_dump_ctx(p_hwfn, + p_ptt, dump_buf + offset, dump); + + /* Dump RSS memories */ + if (qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_RSS)) + offset += qed_grc_dump_rss(p_hwfn, + p_ptt, dump_buf + offset, dump); + + /* Dump Big RAM */ + for (i = 0; i < NUM_BIG_RAM_TYPES; i++) + if (qed_grc_is_included(p_hwfn, s_big_ram_defs[i].grc_param)) + offset += qed_grc_dump_big_ram(p_hwfn, + p_ptt, + dump_buf + offset, + dump, i); + + /* Dump IORs */ + if (qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_IOR)) + offset += qed_grc_dump_iors(p_hwfn, + p_ptt, dump_buf + offset, dump); + + /* Dump VFC */ + if (qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_VFC)) + offset += qed_grc_dump_vfc(p_hwfn, + p_ptt, dump_buf + offset, dump); + + /* Dump PHY tbus */ + if (qed_grc_is_included(p_hwfn, + DBG_GRC_PARAM_DUMP_PHY) && dev_data->chip_id == + CHIP_K2 && dev_data->platform_id == PLATFORM_ASIC) + offset += qed_grc_dump_phy(p_hwfn, + p_ptt, dump_buf + offset, dump); + + /* Dump static debug data */ + if (qed_grc_is_included(p_hwfn, + DBG_GRC_PARAM_DUMP_STATIC) && + dev_data->bus.state == DBG_BUS_STATE_IDLE) + offset += qed_grc_dump_static_debug(p_hwfn, + p_ptt, + dump_buf + offset, dump); + + /* Dump last section */ + offset += qed_dump_last_section(dump_buf, offset, dump); + if (dump) { + /* Unstall storms */ + if (qed_grc_get_param(p_hwfn, DBG_GRC_PARAM_UNSTALL)) + qed_grc_stall_storms(p_hwfn, p_ptt, false); + + /* Clear parity status */ + qed_grc_clear_all_prty(p_hwfn, p_ptt); + + /* Enable all parities using MFW command */ + if (parities_masked) + qed_mcp_mask_parities(p_hwfn, p_ptt, 0); + } + + *num_dumped_dwords = offset; + + return DBG_STATUS_OK; +} + +/* Writes the specified failing Idle Check rule to the specified buffer. + * Returns the dumped size in dwords. + */ +static u32 qed_idle_chk_dump_failure(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 * + dump_buf, + bool dump, + u16 rule_id, + const struct dbg_idle_chk_rule *rule, + u16 fail_entry_id, u32 *cond_reg_values) +{ + const union dbg_idle_chk_reg *regs = &((const union dbg_idle_chk_reg *) + s_dbg_arrays + [BIN_BUF_DBG_IDLE_CHK_REGS]. + ptr)[rule->reg_offset]; + const struct dbg_idle_chk_cond_reg *cond_regs = ®s[0].cond_reg; + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + struct dbg_idle_chk_result_hdr *hdr = + (struct dbg_idle_chk_result_hdr *)dump_buf; + const struct dbg_idle_chk_info_reg *info_regs = + ®s[rule->num_cond_regs].info_reg; + u32 next_reg_offset = 0, i, offset = 0; + u8 reg_id; + + /* Dump rule data */ + if (dump) { + memset(hdr, 0, sizeof(*hdr)); + hdr->rule_id = rule_id; + hdr->mem_entry_id = fail_entry_id; + hdr->severity = rule->severity; + hdr->num_dumped_cond_regs = rule->num_cond_regs; + } + + offset += IDLE_CHK_RESULT_HDR_DWORDS; + + /* Dump condition register values */ + for (reg_id = 0; reg_id < rule->num_cond_regs; reg_id++) { + const struct dbg_idle_chk_cond_reg *reg = &cond_regs[reg_id]; + + /* Write register header */ + if (dump) { + struct dbg_idle_chk_result_reg_hdr *reg_hdr = + (struct dbg_idle_chk_result_reg_hdr *)(dump_buf + + offset); + offset += IDLE_CHK_RESULT_REG_HDR_DWORDS; + memset(reg_hdr, 0, + sizeof(struct dbg_idle_chk_result_reg_hdr)); + reg_hdr->start_entry = reg->start_entry; + reg_hdr->size = reg->entry_size; + SET_FIELD(reg_hdr->data, + DBG_IDLE_CHK_RESULT_REG_HDR_IS_MEM, + reg->num_entries > 1 || reg->start_entry > 0 + ? 1 : 0); + SET_FIELD(reg_hdr->data, + DBG_IDLE_CHK_RESULT_REG_HDR_REG_ID, reg_id); + + /* Write register values */ + for (i = 0; i < reg_hdr->size; + i++, next_reg_offset++, offset++) + dump_buf[offset] = + cond_reg_values[next_reg_offset]; + } else { + offset += IDLE_CHK_RESULT_REG_HDR_DWORDS + + reg->entry_size; + } + } + + /* Dump info register values */ + for (reg_id = 0; reg_id < rule->num_info_regs; reg_id++) { + const struct dbg_idle_chk_info_reg *reg = &info_regs[reg_id]; + u32 block_id; + + if (!dump) { + offset += IDLE_CHK_RESULT_REG_HDR_DWORDS + reg->size; + continue; + } + + /* Check if register's block is in reset */ + block_id = GET_FIELD(reg->data, DBG_IDLE_CHK_INFO_REG_BLOCK_ID); + if (block_id >= MAX_BLOCK_ID) { + DP_NOTICE(p_hwfn, "Invalid block_id\n"); + return 0; + } + + if (!dev_data->block_in_reset[block_id]) { + bool eval_mode = GET_FIELD(reg->mode.data, + DBG_MODE_HDR_EVAL_MODE) > 0; + bool mode_match = true; + + /* Check mode */ + if (eval_mode) { + u16 modes_buf_offset = + GET_FIELD(reg->mode.data, + DBG_MODE_HDR_MODES_BUF_OFFSET); + mode_match = + qed_is_mode_match(p_hwfn, + &modes_buf_offset); + } + + if (mode_match) { + u32 grc_addr = + DWORDS_TO_BYTES(GET_FIELD(reg->data, + DBG_IDLE_CHK_INFO_REG_ADDRESS)); + + /* Write register header */ + struct dbg_idle_chk_result_reg_hdr *reg_hdr = + (struct dbg_idle_chk_result_reg_hdr *) + (dump_buf + offset); + + offset += IDLE_CHK_RESULT_REG_HDR_DWORDS; + hdr->num_dumped_info_regs++; + memset(reg_hdr, 0, sizeof(*reg_hdr)); + reg_hdr->size = reg->size; + SET_FIELD(reg_hdr->data, + DBG_IDLE_CHK_RESULT_REG_HDR_REG_ID, + rule->num_cond_regs + reg_id); + + /* Write register values */ + for (i = 0; i < reg->size; + i++, offset++, grc_addr += 4) + dump_buf[offset] = + qed_rd(p_hwfn, p_ptt, grc_addr); + } + } + } + + return offset; +} + +/* Dumps idle check rule entries. Returns the dumped size in dwords. */ +static u32 +qed_idle_chk_dump_rule_entries(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, + u32 *dump_buf, bool dump, + const struct dbg_idle_chk_rule *input_rules, + u32 num_input_rules, u32 *num_failing_rules) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + u32 cond_reg_values[IDLE_CHK_MAX_ENTRIES_SIZE]; + u32 i, j, offset = 0; + u16 entry_id; + u8 reg_id; + + *num_failing_rules = 0; + for (i = 0; i < num_input_rules; i++) { + const struct dbg_idle_chk_cond_reg *cond_regs; + const struct dbg_idle_chk_rule *rule; + const union dbg_idle_chk_reg *regs; + u16 num_reg_entries = 1; + bool check_rule = true; + const u32 *imm_values; + + rule = &input_rules[i]; + regs = &((const union dbg_idle_chk_reg *) + s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_REGS].ptr) + [rule->reg_offset]; + cond_regs = ®s[0].cond_reg; + imm_values = &s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_IMMS].ptr + [rule->imm_offset]; + + /* Check if all condition register blocks are out of reset, and + * find maximal number of entries (all condition registers that + * are memories must have the same size, which is > 1). + */ + for (reg_id = 0; reg_id < rule->num_cond_regs && check_rule; + reg_id++) { + u32 block_id = GET_FIELD(cond_regs[reg_id].data, + DBG_IDLE_CHK_COND_REG_BLOCK_ID); + + if (block_id >= MAX_BLOCK_ID) { + DP_NOTICE(p_hwfn, "Invalid block_id\n"); + return 0; + } + + check_rule = !dev_data->block_in_reset[block_id]; + if (cond_regs[reg_id].num_entries > num_reg_entries) + num_reg_entries = cond_regs[reg_id].num_entries; + } + + if (!check_rule && dump) + continue; + + /* Go over all register entries (number of entries is the same + * for all condition registers). + */ + for (entry_id = 0; entry_id < num_reg_entries; entry_id++) { + /* Read current entry of all condition registers */ + if (dump) { + u32 next_reg_offset = 0; + + for (reg_id = 0; + reg_id < rule->num_cond_regs; + reg_id++) { + const struct dbg_idle_chk_cond_reg + *reg = &cond_regs[reg_id]; + + /* Find GRC address (if it's a memory, + * the address of the specific entry is + * calculated). + */ + u32 grc_addr = + DWORDS_TO_BYTES( + GET_FIELD(reg->data, + DBG_IDLE_CHK_COND_REG_ADDRESS)); + + if (reg->num_entries > 1 || + reg->start_entry > 0) { + u32 padded_entry_size = + reg->entry_size > 1 ? + roundup_pow_of_two + (reg->entry_size) : 1; + + grc_addr += + DWORDS_TO_BYTES( + (reg->start_entry + + entry_id) + * padded_entry_size); + } + + /* Read registers */ + if (next_reg_offset + reg->entry_size >= + IDLE_CHK_MAX_ENTRIES_SIZE) { + DP_NOTICE(p_hwfn, + "idle check registers entry is too large\n"); + return 0; + } + + for (j = 0; j < reg->entry_size; + j++, next_reg_offset++, + grc_addr += 4) + cond_reg_values[next_reg_offset] = + qed_rd(p_hwfn, p_ptt, grc_addr); + } + } + + /* Call rule's condition function - a return value of + * true indicates failure. + */ + if ((*cond_arr[rule->cond_id])(cond_reg_values, + imm_values) || !dump) { + offset += + qed_idle_chk_dump_failure(p_hwfn, + p_ptt, + dump_buf + offset, + dump, + rule->rule_id, + rule, + entry_id, + cond_reg_values); + (*num_failing_rules)++; + break; + } + } + } + + return offset; +} + +/* Performs Idle Check Dump to the specified buffer. + * Returns the dumped size in dwords. + */ +static u32 qed_idle_chk_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 *dump_buf, bool dump) +{ + u32 offset = 0, input_offset = 0, num_failing_rules = 0; + u32 num_failing_rules_offset; + + /* Dump global params */ + offset += qed_dump_common_global_params(p_hwfn, + p_ptt, + dump_buf + offset, dump, 1); + offset += qed_dump_str_param(dump_buf + offset, + dump, "dump-type", "idle-chk"); + + /* Dump idle check section header with a single parameter */ + offset += qed_dump_section_hdr(dump_buf + offset, dump, "idle_chk", 1); + num_failing_rules_offset = offset; + offset += qed_dump_num_param(dump_buf + offset, dump, "num_rules", 0); + while (input_offset < + s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_RULES].size_in_dwords) { + const struct dbg_idle_chk_cond_hdr *cond_hdr = + (const struct dbg_idle_chk_cond_hdr *) + &s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_RULES].ptr + [input_offset++]; + bool eval_mode = GET_FIELD(cond_hdr->mode.data, + DBG_MODE_HDR_EVAL_MODE) > 0; + bool mode_match = true; + + /* Check mode */ + if (eval_mode) { + u16 modes_buf_offset = + GET_FIELD(cond_hdr->mode.data, + DBG_MODE_HDR_MODES_BUF_OFFSET); + + mode_match = qed_is_mode_match(p_hwfn, + &modes_buf_offset); + } + + if (mode_match) { + u32 curr_failing_rules; + + offset += + qed_idle_chk_dump_rule_entries(p_hwfn, + p_ptt, + dump_buf + offset, + dump, + (const struct dbg_idle_chk_rule *) + &s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_RULES]. + ptr[input_offset], + cond_hdr->data_size / IDLE_CHK_RULE_SIZE_DWORDS, + &curr_failing_rules); + num_failing_rules += curr_failing_rules; + } + + input_offset += cond_hdr->data_size; + } + + /* Overwrite num_rules parameter */ + if (dump) + qed_dump_num_param(dump_buf + num_failing_rules_offset, + dump, "num_rules", num_failing_rules); + + return offset; +} + +/* Finds the meta data image in NVRAM. */ +static enum dbg_status qed_find_nvram_image(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 image_type, + u32 *nvram_offset_bytes, + u32 *nvram_size_bytes) +{ + u32 ret_mcp_resp, ret_mcp_param, ret_txn_size; + struct mcp_file_att file_att; + + /* Call NVRAM get file command */ + if (qed_mcp_nvm_rd_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_NVM_GET_FILE_ATT, + image_type, &ret_mcp_resp, &ret_mcp_param, + &ret_txn_size, (u32 *)&file_att) != 0) + return DBG_STATUS_NVRAM_GET_IMAGE_FAILED; + + /* Check response */ + if ((ret_mcp_resp & FW_MSG_CODE_MASK) != FW_MSG_CODE_NVM_OK) + return DBG_STATUS_NVRAM_GET_IMAGE_FAILED; + + /* Update return values */ + *nvram_offset_bytes = file_att.nvm_start_addr; + *nvram_size_bytes = file_att.len; + DP_VERBOSE(p_hwfn, + QED_MSG_DEBUG, + "find_nvram_image: found NVRAM image of type %d in NVRAM offset %d bytes with size %d bytes\n", + image_type, *nvram_offset_bytes, *nvram_size_bytes); + + /* Check alignment */ + if (*nvram_size_bytes & 0x3) + return DBG_STATUS_NON_ALIGNED_NVRAM_IMAGE; + return DBG_STATUS_OK; +} + +static enum dbg_status qed_nvram_read(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 nvram_offset_bytes, + u32 nvram_size_bytes, u32 *ret_buf) +{ + u32 ret_mcp_resp, ret_mcp_param, ret_read_size; + u32 bytes_to_copy, read_offset = 0; + s32 bytes_left = nvram_size_bytes; + + DP_VERBOSE(p_hwfn, + QED_MSG_DEBUG, + "nvram_read: reading image of size %d bytes from NVRAM\n", + nvram_size_bytes); + do { + bytes_to_copy = + (bytes_left > + MCP_DRV_NVM_BUF_LEN) ? MCP_DRV_NVM_BUF_LEN : bytes_left; + + /* Call NVRAM read command */ + if (qed_mcp_nvm_rd_cmd(p_hwfn, p_ptt, + DRV_MSG_CODE_NVM_READ_NVRAM, + (nvram_offset_bytes + + read_offset) | + (bytes_to_copy << + DRV_MB_PARAM_NVM_LEN_SHIFT), + &ret_mcp_resp, &ret_mcp_param, + &ret_read_size, + (u32 *)((u8 *)ret_buf + + read_offset)) != 0) + return DBG_STATUS_NVRAM_READ_FAILED; + + /* Check response */ + if ((ret_mcp_resp & FW_MSG_CODE_MASK) != FW_MSG_CODE_NVM_OK) + return DBG_STATUS_NVRAM_READ_FAILED; + + /* Update read offset */ + read_offset += ret_read_size; + bytes_left -= ret_read_size; + } while (bytes_left > 0); + + return DBG_STATUS_OK; +} + +/* Get info on the MCP Trace data in the scratchpad: + * - trace_data_grc_addr - the GRC address of the trace data + * - trace_data_size_bytes - the size in bytes of the MCP Trace data (without + * the header) + */ +static enum dbg_status qed_mcp_trace_get_data_info(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *trace_data_grc_addr, + u32 *trace_data_size_bytes) +{ + /* Read MCP trace section offsize structure from MCP scratchpad */ + u32 spad_trace_offsize = qed_rd(p_hwfn, + p_ptt, + MCP_SPAD_TRACE_OFFSIZE_ADDR); + u32 signature; + + /* Extract MCP trace section GRC address from offsize structure (within + * scratchpad). + */ + *trace_data_grc_addr = + MCP_REG_SCRATCH + SECTION_OFFSET(spad_trace_offsize); + + /* Read signature from MCP trace section */ + signature = qed_rd(p_hwfn, p_ptt, + *trace_data_grc_addr + + offsetof(struct mcp_trace, signature)); + if (signature != MFW_TRACE_SIGNATURE) + return DBG_STATUS_INVALID_TRACE_SIGNATURE; + + /* Read trace size from MCP trace section */ + *trace_data_size_bytes = qed_rd(p_hwfn, + p_ptt, + *trace_data_grc_addr + + offsetof(struct mcp_trace, size)); + return DBG_STATUS_OK; +} + +/* Reads MCP trace meta data image from NVRAM. + * - running_bundle_id (OUT) - the running bundle ID (invalid when loaded from + * file) + * - trace_meta_offset_bytes (OUT) - the NVRAM offset in bytes in which the MCP + * Trace meta data starts (invalid when loaded from file) + * - trace_meta_size_bytes (OUT) - the size in bytes of the MCP Trace meta data + */ +static enum dbg_status qed_mcp_trace_get_meta_info(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 trace_data_size_bytes, + u32 *running_bundle_id, + u32 *trace_meta_offset_bytes, + u32 *trace_meta_size_bytes) +{ + /* Read MCP trace section offsize structure from MCP scratchpad */ + u32 spad_trace_offsize = qed_rd(p_hwfn, + p_ptt, + MCP_SPAD_TRACE_OFFSIZE_ADDR); + + /* Find running bundle ID */ + u32 running_mfw_addr = + MCP_REG_SCRATCH + SECTION_OFFSET(spad_trace_offsize) + + QED_SECTION_SIZE(spad_trace_offsize) + trace_data_size_bytes; + enum dbg_status status; + u32 nvram_image_type; + + *running_bundle_id = qed_rd(p_hwfn, p_ptt, running_mfw_addr); + if (*running_bundle_id > 1) + return DBG_STATUS_INVALID_NVRAM_BUNDLE; + + /* Find image in NVRAM */ + nvram_image_type = + (*running_bundle_id == + DIR_ID_1) ? NVM_TYPE_MFW_TRACE1 : NVM_TYPE_MFW_TRACE2; + status = qed_find_nvram_image(p_hwfn, + p_ptt, + nvram_image_type, + trace_meta_offset_bytes, + trace_meta_size_bytes); + + return status; +} + +/* Reads the MCP Trace data from the specified GRC address into the specified + * buffer. + */ +static void qed_mcp_trace_read_data(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 grc_addr, u32 size_in_dwords, u32 *buf) +{ + u32 i; + + DP_VERBOSE(p_hwfn, + QED_MSG_DEBUG, + "mcp_trace_read_data: reading trace data of size %d dwords from GRC address 0x%x\n", + size_in_dwords, grc_addr); + for (i = 0; i < size_in_dwords; i++, grc_addr += BYTES_IN_DWORD) + buf[i] = qed_rd(p_hwfn, p_ptt, grc_addr); +} + +/* Reads the MCP Trace meta data (from NVRAM or buffer) into the specified + * buffer. + */ +static enum dbg_status qed_mcp_trace_read_meta(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 nvram_offset_in_bytes, + u32 size_in_bytes, u32 *buf) +{ + u8 *byte_buf = (u8 *)buf; + u8 modules_num, i; + u32 signature; + + /* Read meta data from NVRAM */ + enum dbg_status status = qed_nvram_read(p_hwfn, + p_ptt, + nvram_offset_in_bytes, + size_in_bytes, + buf); + + if (status != DBG_STATUS_OK) + return status; + + /* Extract and check first signature */ + signature = qed_read_unaligned_dword(byte_buf); + byte_buf += sizeof(u32); + if (signature != MCP_TRACE_META_IMAGE_SIGNATURE) + return DBG_STATUS_INVALID_TRACE_SIGNATURE; + + /* Extract number of modules */ + modules_num = *(byte_buf++); + + /* Skip all modules */ + for (i = 0; i < modules_num; i++) { + u8 module_len = *(byte_buf++); + + byte_buf += module_len; + } + + /* Extract and check second signature */ + signature = qed_read_unaligned_dword(byte_buf); + byte_buf += sizeof(u32); + if (signature != MCP_TRACE_META_IMAGE_SIGNATURE) + return DBG_STATUS_INVALID_TRACE_SIGNATURE; + return DBG_STATUS_OK; +} + +/* Dump MCP Trace */ +enum dbg_status qed_mcp_trace_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + bool dump, u32 *num_dumped_dwords) +{ + u32 trace_data_grc_addr, trace_data_size_bytes, trace_data_size_dwords; + u32 trace_meta_size_dwords, running_bundle_id, offset = 0; + u32 trace_meta_offset_bytes, trace_meta_size_bytes; + enum dbg_status status; + int halted = 0; + + *num_dumped_dwords = 0; + + /* Get trace data info */ + status = qed_mcp_trace_get_data_info(p_hwfn, + p_ptt, + &trace_data_grc_addr, + &trace_data_size_bytes); + if (status != DBG_STATUS_OK) + return status; + + /* Dump global params */ + offset += qed_dump_common_global_params(p_hwfn, + p_ptt, + dump_buf + offset, dump, 1); + offset += qed_dump_str_param(dump_buf + offset, + dump, "dump-type", "mcp-trace"); + + /* Halt MCP while reading from scratchpad so the read data will be + * consistent if halt fails, MCP trace is taken anyway, with a small + * risk that it may be corrupt. + */ + if (dump) { + halted = !qed_mcp_halt(p_hwfn, p_ptt); + if (!halted) + DP_NOTICE(p_hwfn, "MCP halt failed!\n"); + } + + /* Find trace data size */ + trace_data_size_dwords = + DIV_ROUND_UP(trace_data_size_bytes + sizeof(struct mcp_trace), + BYTES_IN_DWORD); + + /* Dump trace data section header and param */ + offset += qed_dump_section_hdr(dump_buf + offset, + dump, "mcp_trace_data", 1); + offset += qed_dump_num_param(dump_buf + offset, + dump, "size", trace_data_size_dwords); + + /* Read trace data from scratchpad into dump buffer */ + if (dump) + qed_mcp_trace_read_data(p_hwfn, + p_ptt, + trace_data_grc_addr, + trace_data_size_dwords, + dump_buf + offset); + offset += trace_data_size_dwords; + + /* Resume MCP (only if halt succeeded) */ + if (halted && qed_mcp_resume(p_hwfn, p_ptt) != 0) + DP_NOTICE(p_hwfn, "Failed to resume MCP after halt!\n"); + + /* Dump trace meta section header */ + offset += qed_dump_section_hdr(dump_buf + offset, + dump, "mcp_trace_meta", 1); + + /* Read trace meta info */ + status = qed_mcp_trace_get_meta_info(p_hwfn, + p_ptt, + trace_data_size_bytes, + &running_bundle_id, + &trace_meta_offset_bytes, + &trace_meta_size_bytes); + if (status != DBG_STATUS_OK) + return status; + + /* Dump trace meta size param (trace_meta_size_bytes is always + * dword-aligned). + */ + trace_meta_size_dwords = BYTES_TO_DWORDS(trace_meta_size_bytes); + offset += qed_dump_num_param(dump_buf + offset, dump, "size", + trace_meta_size_dwords); + + /* Read trace meta image into dump buffer */ + if (dump) { + status = qed_mcp_trace_read_meta(p_hwfn, + p_ptt, + trace_meta_offset_bytes, + trace_meta_size_bytes, + dump_buf + offset); + if (status != DBG_STATUS_OK) + return status; + } + + offset += trace_meta_size_dwords; + + *num_dumped_dwords = offset; + + return DBG_STATUS_OK; +} + +/* Dump GRC FIFO */ +enum dbg_status qed_reg_fifo_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + bool dump, u32 *num_dumped_dwords) +{ + u32 offset = 0, dwords_read, size_param_offset; + bool fifo_has_data; + + *num_dumped_dwords = 0; + + /* Dump global params */ + offset += qed_dump_common_global_params(p_hwfn, + p_ptt, + dump_buf + offset, dump, 1); + offset += qed_dump_str_param(dump_buf + offset, + dump, "dump-type", "reg-fifo"); + + /* Dump fifo data section header and param. The size param is 0 for now, + * and is overwritten after reading the FIFO. + */ + offset += qed_dump_section_hdr(dump_buf + offset, + dump, "reg_fifo_data", 1); + size_param_offset = offset; + offset += qed_dump_num_param(dump_buf + offset, dump, "size", 0); + + if (!dump) { + /* FIFO max size is REG_FIFO_DEPTH_DWORDS. There is no way to + * test how much data is available, except for reading it. + */ + offset += REG_FIFO_DEPTH_DWORDS; + *num_dumped_dwords = offset; + return DBG_STATUS_OK; + } + + fifo_has_data = qed_rd(p_hwfn, p_ptt, + GRC_REG_TRACE_FIFO_VALID_DATA) > 0; + + /* Pull available data from fifo. Use DMAE since this is widebus memory + * and must be accessed atomically. Test for dwords_read not passing + * buffer size since more entries could be added to the buffer as we are + * emptying it. + */ + for (dwords_read = 0; + fifo_has_data && dwords_read < REG_FIFO_DEPTH_DWORDS; + dwords_read += REG_FIFO_ELEMENT_DWORDS, offset += + REG_FIFO_ELEMENT_DWORDS) { + if (qed_dmae_grc2host(p_hwfn, p_ptt, GRC_REG_TRACE_FIFO, + (u64)(uintptr_t)(&dump_buf[offset]), + REG_FIFO_ELEMENT_DWORDS, 0)) + return DBG_STATUS_DMAE_FAILED; + fifo_has_data = qed_rd(p_hwfn, p_ptt, + GRC_REG_TRACE_FIFO_VALID_DATA) > 0; + } + + qed_dump_num_param(dump_buf + size_param_offset, dump, "size", + dwords_read); + + *num_dumped_dwords = offset; + return DBG_STATUS_OK; +} + +/* Dump IGU FIFO */ +enum dbg_status qed_igu_fifo_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + bool dump, u32 *num_dumped_dwords) +{ + u32 offset = 0, dwords_read, size_param_offset; + bool fifo_has_data; + + *num_dumped_dwords = 0; + + /* Dump global params */ + offset += qed_dump_common_global_params(p_hwfn, + p_ptt, + dump_buf + offset, dump, 1); + offset += qed_dump_str_param(dump_buf + offset, + dump, "dump-type", "igu-fifo"); + + /* Dump fifo data section header and param. The size param is 0 for now, + * and is overwritten after reading the FIFO. + */ + offset += qed_dump_section_hdr(dump_buf + offset, + dump, "igu_fifo_data", 1); + size_param_offset = offset; + offset += qed_dump_num_param(dump_buf + offset, dump, "size", 0); + + if (!dump) { + /* FIFO max size is IGU_FIFO_DEPTH_DWORDS. There is no way to + * test how much data is available, except for reading it. + */ + offset += IGU_FIFO_DEPTH_DWORDS; + *num_dumped_dwords = offset; + return DBG_STATUS_OK; + } + + fifo_has_data = qed_rd(p_hwfn, p_ptt, + IGU_REG_ERROR_HANDLING_DATA_VALID) > 0; + + /* Pull available data from fifo. Use DMAE since this is widebus memory + * and must be accessed atomically. Test for dwords_read not passing + * buffer size since more entries could be added to the buffer as we are + * emptying it. + */ + for (dwords_read = 0; + fifo_has_data && dwords_read < IGU_FIFO_DEPTH_DWORDS; + dwords_read += IGU_FIFO_ELEMENT_DWORDS, offset += + IGU_FIFO_ELEMENT_DWORDS) { + if (qed_dmae_grc2host(p_hwfn, p_ptt, + IGU_REG_ERROR_HANDLING_MEMORY, + (u64)(uintptr_t)(&dump_buf[offset]), + IGU_FIFO_ELEMENT_DWORDS, 0)) + return DBG_STATUS_DMAE_FAILED; + fifo_has_data = qed_rd(p_hwfn, p_ptt, + IGU_REG_ERROR_HANDLING_DATA_VALID) > 0; + } + + qed_dump_num_param(dump_buf + size_param_offset, dump, "size", + dwords_read); + + *num_dumped_dwords = offset; + return DBG_STATUS_OK; +} + +/* Protection Override dump */ +enum dbg_status qed_protection_override_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + bool dump, u32 *num_dumped_dwords) +{ + u32 offset = 0, size_param_offset, override_window_dwords; + + *num_dumped_dwords = 0; + + /* Dump global params */ + offset += qed_dump_common_global_params(p_hwfn, + p_ptt, + dump_buf + offset, dump, 1); + offset += qed_dump_str_param(dump_buf + offset, + dump, "dump-type", "protection-override"); + + /* Dump data section header and param. The size param is 0 for now, and + * is overwritten after reading the data. + */ + offset += qed_dump_section_hdr(dump_buf + offset, + dump, "protection_override_data", 1); + size_param_offset = offset; + offset += qed_dump_num_param(dump_buf + offset, dump, "size", 0); + + if (!dump) { + offset += PROTECTION_OVERRIDE_DEPTH_DWORDS; + *num_dumped_dwords = offset; + return DBG_STATUS_OK; + } + + /* Add override window info to buffer */ + override_window_dwords = + qed_rd(p_hwfn, p_ptt, + GRC_REG_NUMBER_VALID_OVERRIDE_WINDOW) * + PROTECTION_OVERRIDE_ELEMENT_DWORDS; + if (qed_dmae_grc2host(p_hwfn, p_ptt, + GRC_REG_PROTECTION_OVERRIDE_WINDOW, + (u64)(uintptr_t)(dump_buf + offset), + override_window_dwords, 0)) + return DBG_STATUS_DMAE_FAILED; + offset += override_window_dwords; + qed_dump_num_param(dump_buf + size_param_offset, dump, "size", + override_window_dwords); + + *num_dumped_dwords = offset; + return DBG_STATUS_OK; +} + +/* Performs FW Asserts Dump to the specified buffer. + * Returns the dumped size in dwords. + */ +static u32 qed_fw_asserts_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 *dump_buf, bool dump) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + char storm_letter_str[2] = "?"; + struct fw_info fw_info; + u32 offset = 0, i; + u8 storm_id; + + /* Dump global params */ + offset += qed_dump_common_global_params(p_hwfn, + p_ptt, + dump_buf + offset, dump, 1); + offset += qed_dump_str_param(dump_buf + offset, + dump, "dump-type", "fw-asserts"); + for (storm_id = 0; storm_id < MAX_DBG_STORMS; storm_id++) { + u32 fw_asserts_section_addr, next_list_idx_addr, next_list_idx, + last_list_idx, element_addr; + + if (dev_data->block_in_reset[s_storm_defs[storm_id].block_id]) + continue; + + /* Read FW info for the current Storm */ + qed_read_fw_info(p_hwfn, p_ptt, storm_id, &fw_info); + + /* Dump FW Asserts section header and params */ + storm_letter_str[0] = s_storm_defs[storm_id].letter; + offset += qed_dump_section_hdr(dump_buf + offset, dump, + "fw_asserts", 2); + offset += qed_dump_str_param(dump_buf + offset, dump, "storm", + storm_letter_str); + offset += qed_dump_num_param(dump_buf + offset, dump, "size", + fw_info.fw_asserts_section. + list_element_dword_size); + + if (!dump) { + offset += fw_info.fw_asserts_section. + list_element_dword_size; + continue; + } + + /* Read and dump FW Asserts data */ + fw_asserts_section_addr = + s_storm_defs[storm_id].sem_fast_mem_addr + + SEM_FAST_REG_INT_RAM + + RAM_LINES_TO_BYTES(fw_info.fw_asserts_section. + section_ram_line_offset); + next_list_idx_addr = + fw_asserts_section_addr + + DWORDS_TO_BYTES(fw_info.fw_asserts_section. + list_next_index_dword_offset); + next_list_idx = qed_rd(p_hwfn, p_ptt, next_list_idx_addr); + last_list_idx = (next_list_idx > 0 + ? next_list_idx + : fw_info.fw_asserts_section.list_num_elements) + - 1; + element_addr = + fw_asserts_section_addr + + DWORDS_TO_BYTES(fw_info.fw_asserts_section. + list_dword_offset) + + last_list_idx * + DWORDS_TO_BYTES(fw_info.fw_asserts_section. + list_element_dword_size); + for (i = 0; + i < fw_info.fw_asserts_section.list_element_dword_size; + i++, offset++, element_addr += BYTES_IN_DWORD) + dump_buf[offset] = qed_rd(p_hwfn, p_ptt, element_addr); + } + + /* Dump last section */ + offset += qed_dump_section_hdr(dump_buf + offset, dump, "last", 0); + return offset; +} + +/***************************** Public Functions *******************************/ + +enum dbg_status qed_dbg_set_bin_ptr(const u8 * const bin_ptr) +{ + /* Convert binary data to debug arrays */ + u32 num_of_buffers = *(u32 *)bin_ptr; + struct bin_buffer_hdr *buf_array; + u8 buf_id; + + buf_array = (struct bin_buffer_hdr *)((u32 *)bin_ptr + 1); + + for (buf_id = 0; buf_id < num_of_buffers; buf_id++) { + s_dbg_arrays[buf_id].ptr = + (u32 *)(bin_ptr + buf_array[buf_id].offset); + s_dbg_arrays[buf_id].size_in_dwords = + BYTES_TO_DWORDS(buf_array[buf_id].length); + } + + return DBG_STATUS_OK; +} + +enum dbg_status qed_dbg_grc_get_dump_buf_size(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *buf_size) +{ + enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt); + + *buf_size = 0; + if (status != DBG_STATUS_OK) + return status; + if (!s_dbg_arrays[BIN_BUF_DBG_MODE_TREE].ptr || + !s_dbg_arrays[BIN_BUF_DBG_DUMP_REG].ptr || + !s_dbg_arrays[BIN_BUF_DBG_DUMP_MEM].ptr || + !s_dbg_arrays[BIN_BUF_DBG_ATTN_BLOCKS].ptr || + !s_dbg_arrays[BIN_BUF_DBG_ATTN_REGS].ptr) + return DBG_STATUS_DBG_ARRAY_NOT_SET; + return qed_grc_dump(p_hwfn, p_ptt, NULL, false, buf_size); +} + +enum dbg_status qed_dbg_grc_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + u32 buf_size_in_dwords, + u32 *num_dumped_dwords) +{ + u32 needed_buf_size_in_dwords; + enum dbg_status status; + + status = qed_dbg_grc_get_dump_buf_size(p_hwfn, p_ptt, + &needed_buf_size_in_dwords); + + *num_dumped_dwords = 0; + if (status != DBG_STATUS_OK) + return status; + if (buf_size_in_dwords < needed_buf_size_in_dwords) + return DBG_STATUS_DUMP_BUF_TOO_SMALL; + + /* GRC Dump */ + status = qed_grc_dump(p_hwfn, p_ptt, dump_buf, true, num_dumped_dwords); + + /* Clear all GRC params */ + qed_dbg_grc_clear_params(p_hwfn); + return status; +} + +enum dbg_status qed_dbg_idle_chk_get_dump_buf_size(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *buf_size) +{ + enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt); + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + + *buf_size = 0; + if (status != DBG_STATUS_OK) + return status; + if (!s_dbg_arrays[BIN_BUF_DBG_MODE_TREE].ptr || + !s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_REGS].ptr || + !s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_IMMS].ptr || + !s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_RULES].ptr) + return DBG_STATUS_DBG_ARRAY_NOT_SET; + if (!dev_data->idle_chk.buf_size_set) { + dev_data->idle_chk.buf_size = qed_idle_chk_dump(p_hwfn, + p_ptt, + NULL, false); + dev_data->idle_chk.buf_size_set = true; + } + + *buf_size = dev_data->idle_chk.buf_size; + return DBG_STATUS_OK; +} + +enum dbg_status qed_dbg_idle_chk_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + u32 buf_size_in_dwords, + u32 *num_dumped_dwords) +{ + u32 needed_buf_size_in_dwords; + enum dbg_status status; + + status = qed_dbg_idle_chk_get_dump_buf_size(p_hwfn, p_ptt, + &needed_buf_size_in_dwords); + + *num_dumped_dwords = 0; + if (status != DBG_STATUS_OK) + return status; + if (buf_size_in_dwords < needed_buf_size_in_dwords) + return DBG_STATUS_DUMP_BUF_TOO_SMALL; + + /* Update reset state */ + qed_update_blocks_reset_state(p_hwfn, p_ptt); + + /* Idle Check Dump */ + *num_dumped_dwords = qed_idle_chk_dump(p_hwfn, p_ptt, dump_buf, true); + return DBG_STATUS_OK; +} + +enum dbg_status qed_dbg_mcp_trace_get_dump_buf_size(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *buf_size) +{ + enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt); + + *buf_size = 0; + if (status != DBG_STATUS_OK) + return status; + return qed_mcp_trace_dump(p_hwfn, p_ptt, NULL, false, buf_size); +} + +enum dbg_status qed_dbg_mcp_trace_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + u32 buf_size_in_dwords, + u32 *num_dumped_dwords) +{ + u32 needed_buf_size_in_dwords; + enum dbg_status status; + + status = qed_dbg_mcp_trace_get_dump_buf_size(p_hwfn, p_ptt, + &needed_buf_size_in_dwords); + + if (status != DBG_STATUS_OK) + return status; + if (buf_size_in_dwords < needed_buf_size_in_dwords) + return DBG_STATUS_DUMP_BUF_TOO_SMALL; + + /* Update reset state */ + qed_update_blocks_reset_state(p_hwfn, p_ptt); + + /* Perform dump */ + return qed_mcp_trace_dump(p_hwfn, + p_ptt, dump_buf, true, num_dumped_dwords); +} + +enum dbg_status qed_dbg_reg_fifo_get_dump_buf_size(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *buf_size) +{ + enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt); + + *buf_size = 0; + if (status != DBG_STATUS_OK) + return status; + return qed_reg_fifo_dump(p_hwfn, p_ptt, NULL, false, buf_size); +} + +enum dbg_status qed_dbg_reg_fifo_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + u32 buf_size_in_dwords, + u32 *num_dumped_dwords) +{ + u32 needed_buf_size_in_dwords; + enum dbg_status status; + + status = qed_dbg_reg_fifo_get_dump_buf_size(p_hwfn, p_ptt, + &needed_buf_size_in_dwords); + + *num_dumped_dwords = 0; + if (status != DBG_STATUS_OK) + return status; + if (buf_size_in_dwords < needed_buf_size_in_dwords) + return DBG_STATUS_DUMP_BUF_TOO_SMALL; + + /* Update reset state */ + qed_update_blocks_reset_state(p_hwfn, p_ptt); + return qed_reg_fifo_dump(p_hwfn, + p_ptt, dump_buf, true, num_dumped_dwords); +} + +enum dbg_status qed_dbg_igu_fifo_get_dump_buf_size(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *buf_size) +{ + enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt); + + *buf_size = 0; + if (status != DBG_STATUS_OK) + return status; + return qed_igu_fifo_dump(p_hwfn, p_ptt, NULL, false, buf_size); +} + +enum dbg_status qed_dbg_igu_fifo_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + u32 buf_size_in_dwords, + u32 *num_dumped_dwords) +{ + u32 needed_buf_size_in_dwords; + enum dbg_status status; + + status = qed_dbg_igu_fifo_get_dump_buf_size(p_hwfn, p_ptt, + &needed_buf_size_in_dwords); + + *num_dumped_dwords = 0; + if (status != DBG_STATUS_OK) + return status; + if (buf_size_in_dwords < needed_buf_size_in_dwords) + return DBG_STATUS_DUMP_BUF_TOO_SMALL; + + /* Update reset state */ + qed_update_blocks_reset_state(p_hwfn, p_ptt); + return qed_igu_fifo_dump(p_hwfn, + p_ptt, dump_buf, true, num_dumped_dwords); +} + +enum dbg_status +qed_dbg_protection_override_get_dump_buf_size(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *buf_size) +{ + enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt); + + *buf_size = 0; + if (status != DBG_STATUS_OK) + return status; + return qed_protection_override_dump(p_hwfn, + p_ptt, NULL, false, buf_size); +} + +enum dbg_status qed_dbg_protection_override_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + u32 buf_size_in_dwords, + u32 *num_dumped_dwords) +{ + u32 needed_buf_size_in_dwords; + enum dbg_status status; + + status = qed_dbg_protection_override_get_dump_buf_size(p_hwfn, p_ptt, + &needed_buf_size_in_dwords); + + *num_dumped_dwords = 0; + if (status != DBG_STATUS_OK) + return status; + if (buf_size_in_dwords < needed_buf_size_in_dwords) + return DBG_STATUS_DUMP_BUF_TOO_SMALL; + + /* Update reset state */ + qed_update_blocks_reset_state(p_hwfn, p_ptt); + return qed_protection_override_dump(p_hwfn, + p_ptt, + dump_buf, true, num_dumped_dwords); +} + +enum dbg_status qed_dbg_fw_asserts_get_dump_buf_size(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *buf_size) +{ + enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt); + + *buf_size = 0; + if (status != DBG_STATUS_OK) + return status; + + /* Update reset state */ + qed_update_blocks_reset_state(p_hwfn, p_ptt); + *buf_size = qed_fw_asserts_dump(p_hwfn, p_ptt, NULL, false); + return DBG_STATUS_OK; +} + +enum dbg_status qed_dbg_fw_asserts_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + u32 buf_size_in_dwords, + u32 *num_dumped_dwords) +{ + u32 needed_buf_size_in_dwords; + enum dbg_status status; + + status = qed_dbg_fw_asserts_get_dump_buf_size(p_hwfn, p_ptt, + &needed_buf_size_in_dwords); + + *num_dumped_dwords = 0; + if (status != DBG_STATUS_OK) + return status; + if (buf_size_in_dwords < needed_buf_size_in_dwords) + return DBG_STATUS_DUMP_BUF_TOO_SMALL; + + *num_dumped_dwords = qed_fw_asserts_dump(p_hwfn, p_ptt, dump_buf, true); + return DBG_STATUS_OK; +} + +/******************************* Data Types **********************************/ + +struct mcp_trace_format { + u32 data; +#define MCP_TRACE_FORMAT_MODULE_MASK 0x0000ffff +#define MCP_TRACE_FORMAT_MODULE_SHIFT 0 +#define MCP_TRACE_FORMAT_LEVEL_MASK 0x00030000 +#define MCP_TRACE_FORMAT_LEVEL_SHIFT 16 +#define MCP_TRACE_FORMAT_P1_SIZE_MASK 0x000c0000 +#define MCP_TRACE_FORMAT_P1_SIZE_SHIFT 18 +#define MCP_TRACE_FORMAT_P2_SIZE_MASK 0x00300000 +#define MCP_TRACE_FORMAT_P2_SIZE_SHIFT 20 +#define MCP_TRACE_FORMAT_P3_SIZE_MASK 0x00c00000 +#define MCP_TRACE_FORMAT_P3_SIZE_SHIFT 22 +#define MCP_TRACE_FORMAT_LEN_MASK 0xff000000 +#define MCP_TRACE_FORMAT_LEN_SHIFT 24 + char *format_str; +}; + +struct mcp_trace_meta { + u32 modules_num; + char **modules; + u32 formats_num; + struct mcp_trace_format *formats; +}; + +/* Reg fifo element */ +struct reg_fifo_element { + u64 data; +#define REG_FIFO_ELEMENT_ADDRESS_SHIFT 0 +#define REG_FIFO_ELEMENT_ADDRESS_MASK 0x7fffff +#define REG_FIFO_ELEMENT_ACCESS_SHIFT 23 +#define REG_FIFO_ELEMENT_ACCESS_MASK 0x1 +#define REG_FIFO_ELEMENT_PF_SHIFT 24 +#define REG_FIFO_ELEMENT_PF_MASK 0xf +#define REG_FIFO_ELEMENT_VF_SHIFT 28 +#define REG_FIFO_ELEMENT_VF_MASK 0xff +#define REG_FIFO_ELEMENT_PORT_SHIFT 36 +#define REG_FIFO_ELEMENT_PORT_MASK 0x3 +#define REG_FIFO_ELEMENT_PRIVILEGE_SHIFT 38 +#define REG_FIFO_ELEMENT_PRIVILEGE_MASK 0x3 +#define REG_FIFO_ELEMENT_PROTECTION_SHIFT 40 +#define REG_FIFO_ELEMENT_PROTECTION_MASK 0x7 +#define REG_FIFO_ELEMENT_MASTER_SHIFT 43 +#define REG_FIFO_ELEMENT_MASTER_MASK 0xf +#define REG_FIFO_ELEMENT_ERROR_SHIFT 47 +#define REG_FIFO_ELEMENT_ERROR_MASK 0x1f +}; + +/* IGU fifo element */ +struct igu_fifo_element { + u32 dword0; +#define IGU_FIFO_ELEMENT_DWORD0_FID_SHIFT 0 +#define IGU_FIFO_ELEMENT_DWORD0_FID_MASK 0xff +#define IGU_FIFO_ELEMENT_DWORD0_IS_PF_SHIFT 8 +#define IGU_FIFO_ELEMENT_DWORD0_IS_PF_MASK 0x1 +#define IGU_FIFO_ELEMENT_DWORD0_SOURCE_SHIFT 9 +#define IGU_FIFO_ELEMENT_DWORD0_SOURCE_MASK 0xf +#define IGU_FIFO_ELEMENT_DWORD0_ERR_TYPE_SHIFT 13 +#define IGU_FIFO_ELEMENT_DWORD0_ERR_TYPE_MASK 0xf +#define IGU_FIFO_ELEMENT_DWORD0_CMD_ADDR_SHIFT 17 +#define IGU_FIFO_ELEMENT_DWORD0_CMD_ADDR_MASK 0x7fff + u32 dword1; + u32 dword2; +#define IGU_FIFO_ELEMENT_DWORD12_IS_WR_CMD_SHIFT 0 +#define IGU_FIFO_ELEMENT_DWORD12_IS_WR_CMD_MASK 0x1 +#define IGU_FIFO_ELEMENT_DWORD12_WR_DATA_SHIFT 1 +#define IGU_FIFO_ELEMENT_DWORD12_WR_DATA_MASK 0xffffffff + u32 reserved; +}; + +struct igu_fifo_wr_data { + u32 data; +#define IGU_FIFO_WR_DATA_PROD_CONS_SHIFT 0 +#define IGU_FIFO_WR_DATA_PROD_CONS_MASK 0xffffff +#define IGU_FIFO_WR_DATA_UPDATE_FLAG_SHIFT 24 +#define IGU_FIFO_WR_DATA_UPDATE_FLAG_MASK 0x1 +#define IGU_FIFO_WR_DATA_EN_DIS_INT_FOR_SB_SHIFT 25 +#define IGU_FIFO_WR_DATA_EN_DIS_INT_FOR_SB_MASK 0x3 +#define IGU_FIFO_WR_DATA_SEGMENT_SHIFT 27 +#define IGU_FIFO_WR_DATA_SEGMENT_MASK 0x1 +#define IGU_FIFO_WR_DATA_TIMER_MASK_SHIFT 28 +#define IGU_FIFO_WR_DATA_TIMER_MASK_MASK 0x1 +#define IGU_FIFO_WR_DATA_CMD_TYPE_SHIFT 31 +#define IGU_FIFO_WR_DATA_CMD_TYPE_MASK 0x1 +}; + +struct igu_fifo_cleanup_wr_data { + u32 data; +#define IGU_FIFO_CLEANUP_WR_DATA_RESERVED_SHIFT 0 +#define IGU_FIFO_CLEANUP_WR_DATA_RESERVED_MASK 0x7ffffff +#define IGU_FIFO_CLEANUP_WR_DATA_CLEANUP_VAL_SHIFT 27 +#define IGU_FIFO_CLEANUP_WR_DATA_CLEANUP_VAL_MASK 0x1 +#define IGU_FIFO_CLEANUP_WR_DATA_CLEANUP_TYPE_SHIFT 28 +#define IGU_FIFO_CLEANUP_WR_DATA_CLEANUP_TYPE_MASK 0x7 +#define IGU_FIFO_CLEANUP_WR_DATA_CMD_TYPE_SHIFT 31 +#define IGU_FIFO_CLEANUP_WR_DATA_CMD_TYPE_MASK 0x1 +}; + +/* Protection override element */ +struct protection_override_element { + u64 data; +#define PROTECTION_OVERRIDE_ELEMENT_ADDRESS_SHIFT 0 +#define PROTECTION_OVERRIDE_ELEMENT_ADDRESS_MASK 0x7fffff +#define PROTECTION_OVERRIDE_ELEMENT_WINDOW_SIZE_SHIFT 23 +#define PROTECTION_OVERRIDE_ELEMENT_WINDOW_SIZE_MASK 0xffffff +#define PROTECTION_OVERRIDE_ELEMENT_READ_SHIFT 47 +#define PROTECTION_OVERRIDE_ELEMENT_READ_MASK 0x1 +#define PROTECTION_OVERRIDE_ELEMENT_WRITE_SHIFT 48 +#define PROTECTION_OVERRIDE_ELEMENT_WRITE_MASK 0x1 +#define PROTECTION_OVERRIDE_ELEMENT_READ_PROTECTION_SHIFT 49 +#define PROTECTION_OVERRIDE_ELEMENT_READ_PROTECTION_MASK 0x7 +#define PROTECTION_OVERRIDE_ELEMENT_WRITE_PROTECTION_SHIFT 52 +#define PROTECTION_OVERRIDE_ELEMENT_WRITE_PROTECTION_MASK 0x7 +}; + +enum igu_fifo_sources { + IGU_SRC_PXP0, + IGU_SRC_PXP1, + IGU_SRC_PXP2, + IGU_SRC_PXP3, + IGU_SRC_PXP4, + IGU_SRC_PXP5, + IGU_SRC_PXP6, + IGU_SRC_PXP7, + IGU_SRC_CAU, + IGU_SRC_ATTN, + IGU_SRC_GRC +}; + +enum igu_fifo_addr_types { + IGU_ADDR_TYPE_MSIX_MEM, + IGU_ADDR_TYPE_WRITE_PBA, + IGU_ADDR_TYPE_WRITE_INT_ACK, + IGU_ADDR_TYPE_WRITE_ATTN_BITS, + IGU_ADDR_TYPE_READ_INT, + IGU_ADDR_TYPE_WRITE_PROD_UPDATE, + IGU_ADDR_TYPE_RESERVED +}; + +struct igu_fifo_addr_data { + u16 start_addr; + u16 end_addr; + char *desc; + char *vf_desc; + enum igu_fifo_addr_types type; +}; + +/******************************** Constants **********************************/ + +#define MAX_MSG_LEN 1024 +#define MCP_TRACE_MAX_MODULE_LEN 8 +#define MCP_TRACE_FORMAT_MAX_PARAMS 3 +#define MCP_TRACE_FORMAT_PARAM_WIDTH \ + (MCP_TRACE_FORMAT_P2_SIZE_SHIFT - MCP_TRACE_FORMAT_P1_SIZE_SHIFT) +#define REG_FIFO_ELEMENT_ADDR_FACTOR 4 +#define REG_FIFO_ELEMENT_IS_PF_VF_VAL 127 +#define PROTECTION_OVERRIDE_ELEMENT_ADDR_FACTOR 4 + +/********************************* Macros ************************************/ + +#define BYTES_TO_DWORDS(bytes) ((bytes) / BYTES_IN_DWORD) + +/***************************** Constant Arrays *******************************/ + +/* Status string array */ +static const char * const s_status_str[] = { + "Operation completed successfully", + "Debug application version wasn't set", + "Unsupported debug application version", + "The debug block wasn't reset since the last recording", + "Invalid arguments", + "The debug output was already set", + "Invalid PCI buffer size", + "PCI buffer allocation failed", + "A PCI buffer wasn't allocated", + "Too many inputs were enabled. Enabled less inputs, or set 'unifyInputs' to true", + "GRC/Timestamp input overlap in cycle dword 0", + "Cannot record Storm data since the entire recording cycle is used by HW", + "The Storm was already enabled", + "The specified Storm wasn't enabled", + "The block was already enabled", + "The specified block wasn't enabled", + "No input was enabled for recording", + "Filters and triggers are not allowed when recording in 64b units", + "The filter was already enabled", + "The trigger was already enabled", + "The trigger wasn't enabled", + "A constraint can be added only after a filter was enabled or a trigger state was added", + "Cannot add more than 3 trigger states", + "Cannot add more than 4 constraints per filter or trigger state", + "The recording wasn't started", + "A trigger was configured, but it didn't trigger", + "No data was recorded", + "Dump buffer is too small", + "Dumped data is not aligned to chunks", + "Unknown chip", + "Failed allocating virtual memory", + "The input block is in reset", + "Invalid MCP trace signature found in NVRAM", + "Invalid bundle ID found in NVRAM", + "Failed getting NVRAM image", + "NVRAM image is not dword-aligned", + "Failed reading from NVRAM", + "Idle check parsing failed", + "MCP Trace data is corrupt", + "Dump doesn't contain meta data - it must be provided in an image file", + "Failed to halt MCP", + "Failed to resume MCP after halt", + "DMAE transaction failed", + "Failed to empty SEMI sync FIFO", + "IGU FIFO data is corrupt", + "MCP failed to mask parities", + "FW Asserts parsing failed", + "GRC FIFO data is corrupt", + "Protection Override data is corrupt", + "Debug arrays were not set (when using binary files, dbg_set_bin_ptr must be called)", + "When a block is filtered, no other blocks can be recorded unless inputs are unified (due to a HW bug)" +}; + +/* Idle check severity names array */ +static const char * const s_idle_chk_severity_str[] = { + "Error", + "Error if no traffic", + "Warning" +}; + +/* MCP Trace level names array */ +static const char * const s_mcp_trace_level_str[] = { + "ERROR", + "TRACE", + "DEBUG" +}; + +/* Parsing strings */ +static const char * const s_access_strs[] = { + "read", + "write" +}; + +static const char * const s_privilege_strs[] = { + "VF", + "PDA", + "HV", + "UA" +}; + +static const char * const s_protection_strs[] = { + "(default)", + "(default)", + "(default)", + "(default)", + "override VF", + "override PDA", + "override HV", + "override UA" +}; + +static const char * const s_master_strs[] = { + "???", + "pxp", + "mcp", + "msdm", + "psdm", + "ysdm", + "usdm", + "tsdm", + "xsdm", + "dbu", + "dmae", + "???", + "???", + "???", + "???", + "???" +}; + +static const char * const s_reg_fifo_error_strs[] = { + "grc timeout", + "address doesn't belong to any block", + "reserved address in block or write to read-only address", + "privilege/protection mismatch", + "path isolation error" +}; + +static const char * const s_igu_fifo_source_strs[] = { + "TSTORM", + "MSTORM", + "USTORM", + "XSTORM", + "YSTORM", + "PSTORM", + "PCIE", + "NIG_QM_PBF", + "CAU", + "ATTN", + "GRC", +}; + +static const char * const s_igu_fifo_error_strs[] = { + "no error", + "length error", + "function disabled", + "VF sent command to attnetion address", + "host sent prod update command", + "read of during interrupt register while in MIMD mode", + "access to PXP BAR reserved address", + "producer update command to attention index", + "unknown error", + "SB index not valid", + "SB relative index and FID not found", + "FID not match", + "command with error flag asserted (PCI error or CAU discard)", + "VF sent cleanup and RF cleanup is disabled", + "cleanup command on type bigger than 4" +}; + +/* IGU FIFO address data */ +static const struct igu_fifo_addr_data s_igu_fifo_addr_data[] = { + {0x0, 0x101, "MSI-X Memory", NULL, IGU_ADDR_TYPE_MSIX_MEM}, + {0x102, 0x1ff, "reserved", NULL, IGU_ADDR_TYPE_RESERVED}, + {0x200, 0x200, "Write PBA[0:63]", NULL, IGU_ADDR_TYPE_WRITE_PBA}, + {0x201, 0x201, "Write PBA[64:127]", "reserved", + IGU_ADDR_TYPE_WRITE_PBA}, + {0x202, 0x202, "Write PBA[128]", "reserved", IGU_ADDR_TYPE_WRITE_PBA}, + {0x203, 0x3ff, "reserved", NULL, IGU_ADDR_TYPE_RESERVED}, + {0x400, 0x5ef, "Write interrupt acknowledgment", NULL, + IGU_ADDR_TYPE_WRITE_INT_ACK}, + {0x5f0, 0x5f0, "Attention bits update", NULL, + IGU_ADDR_TYPE_WRITE_ATTN_BITS}, + {0x5f1, 0x5f1, "Attention bits set", NULL, + IGU_ADDR_TYPE_WRITE_ATTN_BITS}, + {0x5f2, 0x5f2, "Attention bits clear", NULL, + IGU_ADDR_TYPE_WRITE_ATTN_BITS}, + {0x5f3, 0x5f3, "Read interrupt 0:63 with mask", NULL, + IGU_ADDR_TYPE_READ_INT}, + {0x5f4, 0x5f4, "Read interrupt 0:31 with mask", NULL, + IGU_ADDR_TYPE_READ_INT}, + {0x5f5, 0x5f5, "Read interrupt 32:63 with mask", NULL, + IGU_ADDR_TYPE_READ_INT}, + {0x5f6, 0x5f6, "Read interrupt 0:63 without mask", NULL, + IGU_ADDR_TYPE_READ_INT}, + {0x5f7, 0x5ff, "reserved", NULL, IGU_ADDR_TYPE_RESERVED}, + {0x600, 0x7ff, "Producer update", NULL, IGU_ADDR_TYPE_WRITE_PROD_UPDATE} +}; + +/******************************** Variables **********************************/ + +/* MCP Trace meta data - used in case the dump doesn't contain the meta data + * (e.g. due to no NVRAM access). + */ +static struct dbg_array s_mcp_trace_meta = { NULL, 0 }; + +/* Temporary buffer, used for print size calculations */ +static char s_temp_buf[MAX_MSG_LEN]; + +/***************************** Public Functions *******************************/ + +enum dbg_status qed_dbg_user_set_bin_ptr(const u8 * const bin_ptr) +{ + /* Convert binary data to debug arrays */ + u32 num_of_buffers = *(u32 *)bin_ptr; + struct bin_buffer_hdr *buf_array; + u8 buf_id; + + buf_array = (struct bin_buffer_hdr *)((u32 *)bin_ptr + 1); + + for (buf_id = 0; buf_id < num_of_buffers; buf_id++) { + s_dbg_arrays[buf_id].ptr = + (u32 *)(bin_ptr + buf_array[buf_id].offset); + s_dbg_arrays[buf_id].size_in_dwords = + BYTES_TO_DWORDS(buf_array[buf_id].length); + } + + return DBG_STATUS_OK; +} + +static u32 qed_cyclic_add(u32 a, u32 b, u32 size) +{ + return (a + b) % size; +} + +static u32 qed_cyclic_sub(u32 a, u32 b, u32 size) +{ + return (size + a - b) % size; +} + +/* Reads the specified number of bytes from the specified cyclic buffer (up to 4 + * bytes) and returns them as a dword value. the specified buffer offset is + * updated. + */ +static u32 qed_read_from_cyclic_buf(void *buf, + u32 *offset, + u32 buf_size, u8 num_bytes_to_read) +{ + u8 *bytes_buf = (u8 *)buf; + u8 *val_ptr; + u32 val = 0; + u8 i; + + val_ptr = (u8 *)&val; + + for (i = 0; i < num_bytes_to_read; i++) { + val_ptr[i] = bytes_buf[*offset]; + *offset = qed_cyclic_add(*offset, 1, buf_size); + } + + return val; +} + +/* Reads and returns the next byte from the specified buffer. + * The specified buffer offset is updated. + */ +static u8 qed_read_byte_from_buf(void *buf, u32 *offset) +{ + return ((u8 *)buf)[(*offset)++]; +} + +/* Reads and returns the next dword from the specified buffer. + * The specified buffer offset is updated. + */ +static u32 qed_read_dword_from_buf(void *buf, u32 *offset) +{ + u32 dword_val = *(u32 *)&((u8 *)buf)[*offset]; + + *offset += 4; + return dword_val; +} + +/* Reads the next string from the specified buffer, and copies it to the + * specified pointer. The specified buffer offset is updated. + */ +static void qed_read_str_from_buf(void *buf, u32 *offset, u32 size, char *dest) +{ + const char *source_str = &((const char *)buf)[*offset]; + + strncpy(dest, source_str, size); + dest[size - 1] = '\0'; + *offset += size; +} + +/* Returns a pointer to the specified offset (in bytes) of the specified buffer. + * If the specified buffer in NULL, a temporary buffer pointer is returned. + */ +static char *qed_get_buf_ptr(void *buf, u32 offset) +{ + return buf ? (char *)buf + offset : s_temp_buf; +} + +/* Reads a param from the specified buffer. Returns the number of dwords read. + * If the returned str_param is NULL, the param is numeric and its value is + * returned in num_param. + * Otheriwise, the param is a string and its pointer is returned in str_param. + */ +static u32 qed_read_param(u32 *dump_buf, + const char **param_name, + const char **param_str_val, u32 *param_num_val) +{ + char *char_buf = (char *)dump_buf; + u32 offset = 0; /* In bytes */ + + /* Extract param name */ + *param_name = char_buf; + offset += strlen(*param_name) + 1; + + /* Check param type */ + if (*(char_buf + offset++)) { + /* String param */ + *param_str_val = char_buf + offset; + offset += strlen(*param_str_val) + 1; + if (offset & 0x3) + offset += (4 - (offset & 0x3)); + } else { + /* Numeric param */ + *param_str_val = NULL; + if (offset & 0x3) + offset += (4 - (offset & 0x3)); + *param_num_val = *(u32 *)(char_buf + offset); + offset += 4; + } + + return offset / 4; +} + +/* Reads a section header from the specified buffer. + * Returns the number of dwords read. + */ +static u32 qed_read_section_hdr(u32 *dump_buf, + const char **section_name, + u32 *num_section_params) +{ + const char *param_str_val; + + return qed_read_param(dump_buf, + section_name, ¶m_str_val, num_section_params); +} + +/* Reads section params from the specified buffer and prints them to the results + * buffer. Returns the number of dwords read. + */ +static u32 qed_print_section_params(u32 *dump_buf, + u32 num_section_params, + char *results_buf, u32 *num_chars_printed) +{ + u32 i, dump_offset = 0, results_offset = 0; + + for (i = 0; i < num_section_params; i++) { + const char *param_name; + const char *param_str_val; + u32 param_num_val = 0; + + dump_offset += qed_read_param(dump_buf + dump_offset, + ¶m_name, + ¶m_str_val, ¶m_num_val); + if (param_str_val) + /* String param */ + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "%s: %s\n", param_name, param_str_val); + else if (strcmp(param_name, "fw-timestamp")) + /* Numeric param */ + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "%s: %d\n", param_name, param_num_val); + } + + results_offset += + sprintf(qed_get_buf_ptr(results_buf, results_offset), "\n"); + *num_chars_printed = results_offset; + return dump_offset; +} + +const char *qed_dbg_get_status_str(enum dbg_status status) +{ + return (status < + MAX_DBG_STATUS) ? s_status_str[status] : "Invalid debug status"; +} + +/* Parses the idle check rules and returns the number of characters printed. + * In case of parsing error, returns 0. + */ +static u32 qed_parse_idle_chk_dump_rules(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 *dump_buf_end, + u32 num_rules, + bool print_fw_idle_chk, + char *results_buf, + u32 *num_errors, u32 *num_warnings) +{ + u32 rule_idx, results_offset = 0; /* Offset in results_buf in bytes */ + u16 i, j; + + *num_errors = 0; + *num_warnings = 0; + + /* Go over dumped results */ + for (rule_idx = 0; rule_idx < num_rules && dump_buf < dump_buf_end; + rule_idx++) { + const struct dbg_idle_chk_rule_parsing_data *rule_parsing_data; + struct dbg_idle_chk_result_hdr *hdr; + const char *parsing_str; + u32 parsing_str_offset; + const char *lsi_msg; + u8 curr_reg_id = 0; + bool has_fw_msg; + + hdr = (struct dbg_idle_chk_result_hdr *)dump_buf; + rule_parsing_data = + (const struct dbg_idle_chk_rule_parsing_data *) + &s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_PARSING_DATA]. + ptr[hdr->rule_id]; + parsing_str_offset = + GET_FIELD(rule_parsing_data->data, + DBG_IDLE_CHK_RULE_PARSING_DATA_STR_OFFSET); + has_fw_msg = + GET_FIELD(rule_parsing_data->data, + DBG_IDLE_CHK_RULE_PARSING_DATA_HAS_FW_MSG) > 0; + parsing_str = &((const char *) + s_dbg_arrays[BIN_BUF_DBG_PARSING_STRINGS].ptr) + [parsing_str_offset]; + lsi_msg = parsing_str; + + if (hdr->severity >= MAX_DBG_IDLE_CHK_SEVERITY_TYPES) + return 0; + + /* Skip rule header */ + dump_buf += (sizeof(struct dbg_idle_chk_result_hdr) / 4); + + /* Update errors/warnings count */ + if (hdr->severity == IDLE_CHK_SEVERITY_ERROR || + hdr->severity == IDLE_CHK_SEVERITY_ERROR_NO_TRAFFIC) + (*num_errors)++; + else + (*num_warnings)++; + + /* Print rule severity */ + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), "%s: ", + s_idle_chk_severity_str[hdr->severity]); + + /* Print rule message */ + if (has_fw_msg) + parsing_str += strlen(parsing_str) + 1; + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), "%s.", + has_fw_msg && + print_fw_idle_chk ? parsing_str : lsi_msg); + parsing_str += strlen(parsing_str) + 1; + + /* Print register values */ + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), " Registers:"); + for (i = 0; + i < hdr->num_dumped_cond_regs + hdr->num_dumped_info_regs; + i++) { + struct dbg_idle_chk_result_reg_hdr *reg_hdr + = (struct dbg_idle_chk_result_reg_hdr *) + dump_buf; + bool is_mem = + GET_FIELD(reg_hdr->data, + DBG_IDLE_CHK_RESULT_REG_HDR_IS_MEM); + u8 reg_id = + GET_FIELD(reg_hdr->data, + DBG_IDLE_CHK_RESULT_REG_HDR_REG_ID); + + /* Skip reg header */ + dump_buf += + (sizeof(struct dbg_idle_chk_result_reg_hdr) / 4); + + /* Skip register names until the required reg_id is + * reached. + */ + for (; reg_id > curr_reg_id; + curr_reg_id++, + parsing_str += strlen(parsing_str) + 1); + + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), " %s", + parsing_str); + if (i < hdr->num_dumped_cond_regs && is_mem) + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "[%d]", hdr->mem_entry_id + + reg_hdr->start_entry); + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), "="); + for (j = 0; j < reg_hdr->size; j++, dump_buf++) { + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "0x%x", *dump_buf); + if (j < reg_hdr->size - 1) + results_offset += + sprintf(qed_get_buf_ptr + (results_buf, + results_offset), ","); + } + } + + results_offset += + sprintf(qed_get_buf_ptr(results_buf, results_offset), "\n"); + } + + /* Check if end of dump buffer was exceeded */ + if (dump_buf > dump_buf_end) + return 0; + return results_offset; +} + +/* Parses an idle check dump buffer. + * If result_buf is not NULL, the idle check results are printed to it. + * In any case, the required results buffer size is assigned to + * parsed_results_bytes. + * The parsing status is returned. + */ +static enum dbg_status qed_parse_idle_chk_dump(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf, + u32 *parsed_results_bytes, + u32 *num_errors, + u32 *num_warnings) +{ + const char *section_name, *param_name, *param_str_val; + u32 *dump_buf_end = dump_buf + num_dumped_dwords; + u32 num_section_params = 0, num_rules; + u32 results_offset = 0; /* Offset in results_buf in bytes */ + + *parsed_results_bytes = 0; + *num_errors = 0; + *num_warnings = 0; + if (!s_dbg_arrays[BIN_BUF_DBG_PARSING_STRINGS].ptr || + !s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_PARSING_DATA].ptr) + return DBG_STATUS_DBG_ARRAY_NOT_SET; + + /* Read global_params section */ + dump_buf += qed_read_section_hdr(dump_buf, + §ion_name, &num_section_params); + if (strcmp(section_name, "global_params")) + return DBG_STATUS_IDLE_CHK_PARSE_FAILED; + + /* Print global params */ + dump_buf += qed_print_section_params(dump_buf, + num_section_params, + results_buf, &results_offset); + + /* Read idle_chk section */ + dump_buf += qed_read_section_hdr(dump_buf, + §ion_name, &num_section_params); + if (strcmp(section_name, "idle_chk") || num_section_params != 1) + return DBG_STATUS_IDLE_CHK_PARSE_FAILED; + + dump_buf += qed_read_param(dump_buf, + ¶m_name, ¶m_str_val, &num_rules); + if (strcmp(param_name, "num_rules") != 0) + return DBG_STATUS_IDLE_CHK_PARSE_FAILED; + + if (num_rules) { + u32 rules_print_size; + + /* Print FW output */ + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "FW_IDLE_CHECK:\n"); + rules_print_size = + qed_parse_idle_chk_dump_rules(p_hwfn, dump_buf, + dump_buf_end, num_rules, + true, + results_buf ? + results_buf + + results_offset : NULL, + num_errors, num_warnings); + results_offset += rules_print_size; + if (rules_print_size == 0) + return DBG_STATUS_IDLE_CHK_PARSE_FAILED; + + /* Print LSI output */ + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "\nLSI_IDLE_CHECK:\n"); + rules_print_size = + qed_parse_idle_chk_dump_rules(p_hwfn, dump_buf, + dump_buf_end, num_rules, + false, + results_buf ? + results_buf + + results_offset : NULL, + num_errors, num_warnings); + results_offset += rules_print_size; + if (rules_print_size == 0) + return DBG_STATUS_IDLE_CHK_PARSE_FAILED; + } + + /* Print errors/warnings count */ + if (*num_errors) { + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "\nIdle Check failed!!! (with %d errors and %d warnings)\n", + *num_errors, *num_warnings); + } else if (*num_warnings) { + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "\nIdle Check completed successfuly (with %d warnings)\n", + *num_warnings); + } else { + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "\nIdle Check completed successfuly\n"); + } + + /* Add 1 for string NULL termination */ + *parsed_results_bytes = results_offset + 1; + return DBG_STATUS_OK; +} + +enum dbg_status qed_get_idle_chk_results_buf_size(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + u32 *results_buf_size) +{ + u32 num_errors, num_warnings; + + return qed_parse_idle_chk_dump(p_hwfn, + dump_buf, + num_dumped_dwords, + NULL, + results_buf_size, + &num_errors, &num_warnings); +} + +enum dbg_status qed_print_idle_chk_results(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf, + u32 *num_errors, u32 *num_warnings) +{ + u32 parsed_buf_size; + + return qed_parse_idle_chk_dump(p_hwfn, + dump_buf, + num_dumped_dwords, + results_buf, + &parsed_buf_size, + num_errors, num_warnings); +} + +/* Frees the specified MCP Trace meta data */ +static void qed_mcp_trace_free_meta(struct qed_hwfn *p_hwfn, + struct mcp_trace_meta *meta) +{ + u32 i; + + /* Release modules */ + if (meta->modules) { + for (i = 0; i < meta->modules_num; i++) + kfree(meta->modules[i]); + kfree(meta->modules); + } + + /* Release formats */ + if (meta->formats) { + for (i = 0; i < meta->formats_num; i++) + kfree(meta->formats[i].format_str); + kfree(meta->formats); + } +} + +/* Allocates and fills MCP Trace meta data based on the specified meta data + * dump buffer. + * Returns debug status code. + */ +static enum dbg_status qed_mcp_trace_alloc_meta(struct qed_hwfn *p_hwfn, + const u32 *meta_buf, + struct mcp_trace_meta *meta) +{ + u8 *meta_buf_bytes = (u8 *)meta_buf; + u32 offset = 0, signature, i; + + memset(meta, 0, sizeof(*meta)); + + /* Read first signature */ + signature = qed_read_dword_from_buf(meta_buf_bytes, &offset); + if (signature != MCP_TRACE_META_IMAGE_SIGNATURE) + return DBG_STATUS_INVALID_TRACE_SIGNATURE; + + /* Read number of modules and allocate memory for all the modules + * pointers. + */ + meta->modules_num = qed_read_byte_from_buf(meta_buf_bytes, &offset); + meta->modules = kzalloc(meta->modules_num * sizeof(char *), GFP_KERNEL); + if (!meta->modules) + return DBG_STATUS_VIRT_MEM_ALLOC_FAILED; + + /* Allocate and read all module strings */ + for (i = 0; i < meta->modules_num; i++) { + u8 module_len = qed_read_byte_from_buf(meta_buf_bytes, &offset); + + *(meta->modules + i) = kzalloc(module_len, GFP_KERNEL); + if (!(*(meta->modules + i))) { + /* Update number of modules to be released */ + meta->modules_num = i ? i - 1 : 0; + return DBG_STATUS_VIRT_MEM_ALLOC_FAILED; + } + + qed_read_str_from_buf(meta_buf_bytes, &offset, module_len, + *(meta->modules + i)); + if (module_len > MCP_TRACE_MAX_MODULE_LEN) + (*(meta->modules + i))[MCP_TRACE_MAX_MODULE_LEN] = '\0'; + } + + /* Read second signature */ + signature = qed_read_dword_from_buf(meta_buf_bytes, &offset); + if (signature != MCP_TRACE_META_IMAGE_SIGNATURE) + return DBG_STATUS_INVALID_TRACE_SIGNATURE; + + /* Read number of formats and allocate memory for all formats */ + meta->formats_num = qed_read_dword_from_buf(meta_buf_bytes, &offset); + meta->formats = kzalloc(meta->formats_num * + sizeof(struct mcp_trace_format), + GFP_KERNEL); + if (!meta->formats) + return DBG_STATUS_VIRT_MEM_ALLOC_FAILED; + + /* Allocate and read all strings */ + for (i = 0; i < meta->formats_num; i++) { + struct mcp_trace_format *format_ptr = &meta->formats[i]; + u8 format_len; + + format_ptr->data = qed_read_dword_from_buf(meta_buf_bytes, + &offset); + format_len = + (format_ptr->data & + MCP_TRACE_FORMAT_LEN_MASK) >> MCP_TRACE_FORMAT_LEN_SHIFT; + format_ptr->format_str = kzalloc(format_len, GFP_KERNEL); + if (!format_ptr->format_str) { + /* Update number of modules to be released */ + meta->formats_num = i ? i - 1 : 0; + return DBG_STATUS_VIRT_MEM_ALLOC_FAILED; + } + + qed_read_str_from_buf(meta_buf_bytes, + &offset, + format_len, format_ptr->format_str); + } + + return DBG_STATUS_OK; +} + +/* Parses an MCP Trace dump buffer. + * If result_buf is not NULL, the MCP Trace results are printed to it. + * In any case, the required results buffer size is assigned to + * parsed_results_bytes. + * The parsing status is returned. + */ +static enum dbg_status qed_parse_mcp_trace_dump(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf, + u32 *parsed_results_bytes) +{ + u32 results_offset = 0, param_mask, param_shift, param_num_val; + u32 num_section_params, offset, end_offset, bytes_left; + const char *section_name, *param_name, *param_str_val; + u32 trace_data_dwords, trace_meta_dwords; + struct mcp_trace_meta meta; + struct mcp_trace *trace; + enum dbg_status status; + const u32 *meta_buf; + u8 *trace_buf; + + *parsed_results_bytes = 0; + + /* Read global_params section */ + dump_buf += qed_read_section_hdr(dump_buf, + §ion_name, &num_section_params); + if (strcmp(section_name, "global_params")) + return DBG_STATUS_MCP_TRACE_BAD_DATA; + + /* Print global params */ + dump_buf += qed_print_section_params(dump_buf, + num_section_params, + results_buf, &results_offset); + + /* Read trace_data section */ + dump_buf += qed_read_section_hdr(dump_buf, + §ion_name, &num_section_params); + if (strcmp(section_name, "mcp_trace_data") || num_section_params != 1) + return DBG_STATUS_MCP_TRACE_BAD_DATA; + dump_buf += qed_read_param(dump_buf, + ¶m_name, ¶m_str_val, ¶m_num_val); + if (strcmp(param_name, "size")) + return DBG_STATUS_MCP_TRACE_BAD_DATA; + trace_data_dwords = param_num_val; + + /* Prepare trace info */ + trace = (struct mcp_trace *)dump_buf; + trace_buf = (u8 *)dump_buf + sizeof(struct mcp_trace); + offset = trace->trace_oldest; + end_offset = trace->trace_prod; + bytes_left = qed_cyclic_sub(end_offset, offset, trace->size); + dump_buf += trace_data_dwords; + + /* Read meta_data section */ + dump_buf += qed_read_section_hdr(dump_buf, + §ion_name, &num_section_params); + if (strcmp(section_name, "mcp_trace_meta")) + return DBG_STATUS_MCP_TRACE_BAD_DATA; + dump_buf += qed_read_param(dump_buf, + ¶m_name, ¶m_str_val, ¶m_num_val); + if (strcmp(param_name, "size") != 0) + return DBG_STATUS_MCP_TRACE_BAD_DATA; + trace_meta_dwords = param_num_val; + + /* Choose meta data buffer */ + if (!trace_meta_dwords) { + /* Dump doesn't include meta data */ + if (!s_mcp_trace_meta.ptr) + return DBG_STATUS_MCP_TRACE_NO_META; + meta_buf = s_mcp_trace_meta.ptr; + } else { + /* Dump includes meta data */ + meta_buf = dump_buf; + } + + /* Allocate meta data memory */ + status = qed_mcp_trace_alloc_meta(p_hwfn, meta_buf, &meta); + if (status != DBG_STATUS_OK) + goto free_mem; + + /* Ignore the level and modules masks - just print everything that is + * already in the buffer. + */ + while (bytes_left) { + struct mcp_trace_format *format_ptr; + u8 format_level, format_module; + u32 params[3] = { 0, 0, 0 }; + u32 header, format_idx, i; + + if (bytes_left < MFW_TRACE_ENTRY_SIZE) { + status = DBG_STATUS_MCP_TRACE_BAD_DATA; + goto free_mem; + } + + header = qed_read_from_cyclic_buf(trace_buf, + &offset, + trace->size, + MFW_TRACE_ENTRY_SIZE); + bytes_left -= MFW_TRACE_ENTRY_SIZE; + format_idx = header & MFW_TRACE_EVENTID_MASK; + + /* Skip message if its index doesn't exist in the meta data */ + if (format_idx > meta.formats_num) { + u8 format_size = + (u8)((header & + MFW_TRACE_PRM_SIZE_MASK) >> + MFW_TRACE_PRM_SIZE_SHIFT); + + if (bytes_left < format_size) { + status = DBG_STATUS_MCP_TRACE_BAD_DATA; + goto free_mem; + } + + offset = qed_cyclic_add(offset, + format_size, trace->size); + bytes_left -= format_size; + continue; + } + + format_ptr = &meta.formats[format_idx]; + for (i = 0, + param_mask = MCP_TRACE_FORMAT_P1_SIZE_MASK, param_shift = + MCP_TRACE_FORMAT_P1_SIZE_SHIFT; + i < MCP_TRACE_FORMAT_MAX_PARAMS; + i++, param_mask <<= MCP_TRACE_FORMAT_PARAM_WIDTH, + param_shift += MCP_TRACE_FORMAT_PARAM_WIDTH) { + /* Extract param size (0..3) */ + u8 param_size = + (u8)((format_ptr->data & + param_mask) >> param_shift); + + /* If the param size is zero, there are no other + * parameters. + */ + if (!param_size) + break; + + /* Size is encoded using 2 bits, where 3 is used to + * encode 4. + */ + if (param_size == 3) + param_size = 4; + if (bytes_left < param_size) { + status = DBG_STATUS_MCP_TRACE_BAD_DATA; + goto free_mem; + } + + params[i] = qed_read_from_cyclic_buf(trace_buf, + &offset, + trace->size, + param_size); + bytes_left -= param_size; + } + + format_level = + (u8)((format_ptr->data & + MCP_TRACE_FORMAT_LEVEL_MASK) >> + MCP_TRACE_FORMAT_LEVEL_SHIFT); + format_module = + (u8)((format_ptr->data & + MCP_TRACE_FORMAT_MODULE_MASK) >> + MCP_TRACE_FORMAT_MODULE_SHIFT); + if (format_level >= ARRAY_SIZE(s_mcp_trace_level_str)) { + status = DBG_STATUS_MCP_TRACE_BAD_DATA; + goto free_mem; + } + + /* Print current message to results buffer */ + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), "%s %-8s: ", + s_mcp_trace_level_str[format_level], + meta.modules[format_module]); + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), + format_ptr->format_str, params[0], params[1], + params[2]); + } + +free_mem: + *parsed_results_bytes = results_offset + 1; + qed_mcp_trace_free_meta(p_hwfn, &meta); + return status; +} + +enum dbg_status qed_get_mcp_trace_results_buf_size(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + u32 *results_buf_size) +{ + return qed_parse_mcp_trace_dump(p_hwfn, + dump_buf, + num_dumped_dwords, + NULL, results_buf_size); +} + +enum dbg_status qed_print_mcp_trace_results(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf) +{ + u32 parsed_buf_size; + + return qed_parse_mcp_trace_dump(p_hwfn, + dump_buf, + num_dumped_dwords, + results_buf, &parsed_buf_size); +} + +/* Parses a Reg FIFO dump buffer. + * If result_buf is not NULL, the Reg FIFO results are printed to it. + * In any case, the required results buffer size is assigned to + * parsed_results_bytes. + * The parsing status is returned. + */ +static enum dbg_status qed_parse_reg_fifo_dump(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf, + u32 *parsed_results_bytes) +{ + u32 results_offset = 0, param_num_val, num_section_params, num_elements; + const char *section_name, *param_name, *param_str_val; + struct reg_fifo_element *elements; + u8 i, j, err_val, vf_val; + char vf_str[4]; + + /* Read global_params section */ + dump_buf += qed_read_section_hdr(dump_buf, + §ion_name, &num_section_params); + if (strcmp(section_name, "global_params")) + return DBG_STATUS_REG_FIFO_BAD_DATA; + + /* Print global params */ + dump_buf += qed_print_section_params(dump_buf, + num_section_params, + results_buf, &results_offset); + + /* Read reg_fifo_data section */ + dump_buf += qed_read_section_hdr(dump_buf, + §ion_name, &num_section_params); + if (strcmp(section_name, "reg_fifo_data")) + return DBG_STATUS_REG_FIFO_BAD_DATA; + dump_buf += qed_read_param(dump_buf, + ¶m_name, ¶m_str_val, ¶m_num_val); + if (strcmp(param_name, "size")) + return DBG_STATUS_REG_FIFO_BAD_DATA; + if (param_num_val % REG_FIFO_ELEMENT_DWORDS) + return DBG_STATUS_REG_FIFO_BAD_DATA; + num_elements = param_num_val / REG_FIFO_ELEMENT_DWORDS; + elements = (struct reg_fifo_element *)dump_buf; + + /* Decode elements */ + for (i = 0; i < num_elements; i++) { + bool err_printed = false; + + /* Discover if element belongs to a VF or a PF */ + vf_val = GET_FIELD(elements[i].data, REG_FIFO_ELEMENT_VF); + if (vf_val == REG_FIFO_ELEMENT_IS_PF_VF_VAL) + sprintf(vf_str, "%s", "N/A"); + else + sprintf(vf_str, "%d", vf_val); + + /* Add parsed element to parsed buffer */ + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "raw: 0x%016llx, address: 0x%07llx, access: %-5s, pf: %2lld, vf: %s, port: %lld, privilege: %-3s, protection: %-12s, master: %-4s, errors: ", + elements[i].data, + GET_FIELD(elements[i].data, + REG_FIFO_ELEMENT_ADDRESS) * + REG_FIFO_ELEMENT_ADDR_FACTOR, + s_access_strs[GET_FIELD(elements[i].data, + REG_FIFO_ELEMENT_ACCESS)], + GET_FIELD(elements[i].data, + REG_FIFO_ELEMENT_PF), vf_str, + GET_FIELD(elements[i].data, + REG_FIFO_ELEMENT_PORT), + s_privilege_strs[GET_FIELD(elements[i]. + data, + REG_FIFO_ELEMENT_PRIVILEGE)], + s_protection_strs[GET_FIELD(elements[i].data, + REG_FIFO_ELEMENT_PROTECTION)], + s_master_strs[GET_FIELD(elements[i].data, + REG_FIFO_ELEMENT_MASTER)]); + + /* Print errors */ + for (j = 0, + err_val = GET_FIELD(elements[i].data, + REG_FIFO_ELEMENT_ERROR); + j < ARRAY_SIZE(s_reg_fifo_error_strs); + j++, err_val >>= 1) { + if (!(err_val & 0x1)) + continue; + if (err_printed) + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), + ", "); + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), "%s", + s_reg_fifo_error_strs[j]); + err_printed = true; + } + + results_offset += + sprintf(qed_get_buf_ptr(results_buf, results_offset), "\n"); + } + + results_offset += sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "fifo contained %d elements", num_elements); + + /* Add 1 for string NULL termination */ + *parsed_results_bytes = results_offset + 1; + return DBG_STATUS_OK; +} + +enum dbg_status qed_get_reg_fifo_results_buf_size(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + u32 *results_buf_size) +{ + return qed_parse_reg_fifo_dump(p_hwfn, + dump_buf, + num_dumped_dwords, + NULL, results_buf_size); +} + +enum dbg_status qed_print_reg_fifo_results(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf) +{ + u32 parsed_buf_size; + + return qed_parse_reg_fifo_dump(p_hwfn, + dump_buf, + num_dumped_dwords, + results_buf, &parsed_buf_size); +} + +/* Parses an IGU FIFO dump buffer. + * If result_buf is not NULL, the IGU FIFO results are printed to it. + * In any case, the required results buffer size is assigned to + * parsed_results_bytes. + * The parsing status is returned. + */ +static enum dbg_status qed_parse_igu_fifo_dump(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf, + u32 *parsed_results_bytes) +{ + u32 results_offset = 0, param_num_val, num_section_params, num_elements; + const char *section_name, *param_name, *param_str_val; + struct igu_fifo_element *elements; + char parsed_addr_data[32]; + char parsed_wr_data[256]; + u8 i, j; + + /* Read global_params section */ + dump_buf += qed_read_section_hdr(dump_buf, + §ion_name, &num_section_params); + if (strcmp(section_name, "global_params")) + return DBG_STATUS_IGU_FIFO_BAD_DATA; + + /* Print global params */ + dump_buf += qed_print_section_params(dump_buf, + num_section_params, + results_buf, &results_offset); + + /* Read igu_fifo_data section */ + dump_buf += qed_read_section_hdr(dump_buf, + §ion_name, &num_section_params); + if (strcmp(section_name, "igu_fifo_data")) + return DBG_STATUS_IGU_FIFO_BAD_DATA; + dump_buf += qed_read_param(dump_buf, + ¶m_name, ¶m_str_val, ¶m_num_val); + if (strcmp(param_name, "size")) + return DBG_STATUS_IGU_FIFO_BAD_DATA; + if (param_num_val % IGU_FIFO_ELEMENT_DWORDS) + return DBG_STATUS_IGU_FIFO_BAD_DATA; + num_elements = param_num_val / IGU_FIFO_ELEMENT_DWORDS; + elements = (struct igu_fifo_element *)dump_buf; + + /* Decode elements */ + for (i = 0; i < num_elements; i++) { + /* dword12 (dword index 1 and 2) contains bits 32..95 of the + * FIFO element. + */ + u64 dword12 = + ((u64)elements[i].dword2 << 32) | elements[i].dword1; + bool is_wr_cmd = GET_FIELD(dword12, + IGU_FIFO_ELEMENT_DWORD12_IS_WR_CMD); + bool is_pf = GET_FIELD(elements[i].dword0, + IGU_FIFO_ELEMENT_DWORD0_IS_PF); + u16 cmd_addr = GET_FIELD(elements[i].dword0, + IGU_FIFO_ELEMENT_DWORD0_CMD_ADDR); + u8 source = GET_FIELD(elements[i].dword0, + IGU_FIFO_ELEMENT_DWORD0_SOURCE); + u8 err_type = GET_FIELD(elements[i].dword0, + IGU_FIFO_ELEMENT_DWORD0_ERR_TYPE); + const struct igu_fifo_addr_data *addr_data = NULL; + + if (source >= ARRAY_SIZE(s_igu_fifo_source_strs)) + return DBG_STATUS_IGU_FIFO_BAD_DATA; + if (err_type >= ARRAY_SIZE(s_igu_fifo_error_strs)) + return DBG_STATUS_IGU_FIFO_BAD_DATA; + + /* Find address data */ + for (j = 0; j < ARRAY_SIZE(s_igu_fifo_addr_data) && !addr_data; + j++) + if (cmd_addr >= s_igu_fifo_addr_data[j].start_addr && + cmd_addr <= s_igu_fifo_addr_data[j].end_addr) + addr_data = &s_igu_fifo_addr_data[j]; + if (!addr_data) + return DBG_STATUS_IGU_FIFO_BAD_DATA; + + /* Prepare parsed address data */ + switch (addr_data->type) { + case IGU_ADDR_TYPE_MSIX_MEM: + sprintf(parsed_addr_data, + " vector_num=0x%x", cmd_addr / 2); + break; + case IGU_ADDR_TYPE_WRITE_INT_ACK: + case IGU_ADDR_TYPE_WRITE_PROD_UPDATE: + sprintf(parsed_addr_data, + " SB=0x%x", cmd_addr - addr_data->start_addr); + break; + default: + parsed_addr_data[0] = '\0'; + } + + /* Prepare parsed write data */ + if (is_wr_cmd) { + u32 wr_data = GET_FIELD(dword12, + IGU_FIFO_ELEMENT_DWORD12_WR_DATA); + u32 prod_cons = GET_FIELD(wr_data, + IGU_FIFO_WR_DATA_PROD_CONS); + u8 is_cleanup = GET_FIELD(wr_data, + IGU_FIFO_WR_DATA_CMD_TYPE); + + if (source == IGU_SRC_ATTN) { + sprintf(parsed_wr_data, + "prod: 0x%x, ", prod_cons); + } else { + if (is_cleanup) { + u8 cleanup_val = GET_FIELD(wr_data, + IGU_FIFO_CLEANUP_WR_DATA_CLEANUP_VAL); + u8 cleanup_type = GET_FIELD(wr_data, + IGU_FIFO_CLEANUP_WR_DATA_CLEANUP_TYPE); + + sprintf(parsed_wr_data, + "cmd_type: cleanup, cleanup_val: %s, cleanup_type: %d, ", + cleanup_val ? "set" : "clear", + cleanup_type); + } else { + u8 update_flag = GET_FIELD(wr_data, + IGU_FIFO_WR_DATA_UPDATE_FLAG); + u8 en_dis_int_for_sb = + GET_FIELD(wr_data, + IGU_FIFO_WR_DATA_EN_DIS_INT_FOR_SB); + u8 segment = GET_FIELD(wr_data, + IGU_FIFO_WR_DATA_SEGMENT); + u8 timer_mask = GET_FIELD(wr_data, + IGU_FIFO_WR_DATA_TIMER_MASK); + + sprintf(parsed_wr_data, + "cmd_type: prod/cons update, prod/cons: 0x%x, update_flag: %s, en_dis_int_for_sb: %s, segment: %s, timer_mask=%d, ", + prod_cons, + update_flag ? "update" : "nop", + en_dis_int_for_sb + ? (en_dis_int_for_sb == + 1 ? "disable" : "nop") : + "enable", + segment ? "attn" : "regular", + timer_mask); + } + } + } else { + parsed_wr_data[0] = '\0'; + } + + /* Add parsed element to parsed buffer */ + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "raw: 0x%01x%08x%08x, %s: %d, source: %s, type: %s, cmd_addr: 0x%x (%s%s), %serror: %s\n", + elements[i].dword2, elements[i].dword1, + elements[i].dword0, + is_pf ? "pf" : "vf", + GET_FIELD(elements[i].dword0, + IGU_FIFO_ELEMENT_DWORD0_FID), + s_igu_fifo_source_strs[source], + is_wr_cmd ? "wr" : "rd", cmd_addr, + (!is_pf && addr_data->vf_desc) + ? addr_data->vf_desc : addr_data->desc, + parsed_addr_data, parsed_wr_data, + s_igu_fifo_error_strs[err_type]); + } + + results_offset += sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "fifo contained %d elements", num_elements); + + /* Add 1 for string NULL termination */ + *parsed_results_bytes = results_offset + 1; + return DBG_STATUS_OK; +} + +enum dbg_status qed_get_igu_fifo_results_buf_size(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + u32 *results_buf_size) +{ + return qed_parse_igu_fifo_dump(p_hwfn, + dump_buf, + num_dumped_dwords, + NULL, results_buf_size); +} + +enum dbg_status qed_print_igu_fifo_results(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf) +{ + u32 parsed_buf_size; + + return qed_parse_igu_fifo_dump(p_hwfn, + dump_buf, + num_dumped_dwords, + results_buf, &parsed_buf_size); +} + +static enum dbg_status +qed_parse_protection_override_dump(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf, + u32 *parsed_results_bytes) +{ + u32 results_offset = 0, param_num_val, num_section_params, num_elements; + const char *section_name, *param_name, *param_str_val; + struct protection_override_element *elements; + u8 i; + + /* Read global_params section */ + dump_buf += qed_read_section_hdr(dump_buf, + §ion_name, &num_section_params); + if (strcmp(section_name, "global_params")) + return DBG_STATUS_PROTECTION_OVERRIDE_BAD_DATA; + + /* Print global params */ + dump_buf += qed_print_section_params(dump_buf, + num_section_params, + results_buf, &results_offset); + + /* Read protection_override_data section */ + dump_buf += qed_read_section_hdr(dump_buf, + §ion_name, &num_section_params); + if (strcmp(section_name, "protection_override_data")) + return DBG_STATUS_PROTECTION_OVERRIDE_BAD_DATA; + dump_buf += qed_read_param(dump_buf, + ¶m_name, ¶m_str_val, ¶m_num_val); + if (strcmp(param_name, "size")) + return DBG_STATUS_PROTECTION_OVERRIDE_BAD_DATA; + if (param_num_val % PROTECTION_OVERRIDE_ELEMENT_DWORDS != 0) + return DBG_STATUS_PROTECTION_OVERRIDE_BAD_DATA; + num_elements = param_num_val / PROTECTION_OVERRIDE_ELEMENT_DWORDS; + elements = (struct protection_override_element *)dump_buf; + + /* Decode elements */ + for (i = 0; i < num_elements; i++) { + u32 address = GET_FIELD(elements[i].data, + PROTECTION_OVERRIDE_ELEMENT_ADDRESS) * + PROTECTION_OVERRIDE_ELEMENT_ADDR_FACTOR; + + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "window %2d, address: 0x%07x, size: %7lld regs, read: %lld, write: %lld, read protection: %-12s, write protection: %-12s\n", + i, address, + GET_FIELD(elements[i].data, + PROTECTION_OVERRIDE_ELEMENT_WINDOW_SIZE), + GET_FIELD(elements[i].data, + PROTECTION_OVERRIDE_ELEMENT_READ), + GET_FIELD(elements[i].data, + PROTECTION_OVERRIDE_ELEMENT_WRITE), + s_protection_strs[GET_FIELD(elements[i].data, + PROTECTION_OVERRIDE_ELEMENT_READ_PROTECTION)], + s_protection_strs[GET_FIELD(elements[i].data, + PROTECTION_OVERRIDE_ELEMENT_WRITE_PROTECTION)]); + } + + results_offset += sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "protection override contained %d elements", + num_elements); + + /* Add 1 for string NULL termination */ + *parsed_results_bytes = results_offset + 1; + return DBG_STATUS_OK; +} + +enum dbg_status +qed_get_protection_override_results_buf_size(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + u32 *results_buf_size) +{ + return qed_parse_protection_override_dump(p_hwfn, + dump_buf, + num_dumped_dwords, + NULL, results_buf_size); +} + +enum dbg_status qed_print_protection_override_results(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf) +{ + u32 parsed_buf_size; + + return qed_parse_protection_override_dump(p_hwfn, + dump_buf, + num_dumped_dwords, + results_buf, + &parsed_buf_size); +} + +/* Parses a FW Asserts dump buffer. + * If result_buf is not NULL, the FW Asserts results are printed to it. + * In any case, the required results buffer size is assigned to + * parsed_results_bytes. + * The parsing status is returned. + */ +static enum dbg_status qed_parse_fw_asserts_dump(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf, + u32 *parsed_results_bytes) +{ + u32 results_offset = 0, num_section_params, param_num_val, i; + const char *param_name, *param_str_val, *section_name; + bool last_section_found = false; + + *parsed_results_bytes = 0; + + /* Read global_params section */ + dump_buf += qed_read_section_hdr(dump_buf, + §ion_name, &num_section_params); + if (strcmp(section_name, "global_params")) + return DBG_STATUS_FW_ASSERTS_PARSE_FAILED; + + /* Print global params */ + dump_buf += qed_print_section_params(dump_buf, + num_section_params, + results_buf, &results_offset); + while (!last_section_found) { + const char *storm_letter = NULL; + u32 storm_dump_size = 0; + + dump_buf += qed_read_section_hdr(dump_buf, + §ion_name, + &num_section_params); + if (!strcmp(section_name, "last")) { + last_section_found = true; + continue; + } else if (strcmp(section_name, "fw_asserts")) { + return DBG_STATUS_FW_ASSERTS_PARSE_FAILED; + } + + /* Extract params */ + for (i = 0; i < num_section_params; i++) { + dump_buf += qed_read_param(dump_buf, + ¶m_name, + ¶m_str_val, + ¶m_num_val); + if (!strcmp(param_name, "storm")) + storm_letter = param_str_val; + else if (!strcmp(param_name, "size")) + storm_dump_size = param_num_val; + else + return DBG_STATUS_FW_ASSERTS_PARSE_FAILED; + } + + if (!storm_letter || !storm_dump_size) + return DBG_STATUS_FW_ASSERTS_PARSE_FAILED; + + /* Print data */ + results_offset += sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "\n%sSTORM_ASSERT: size=%d\n", + storm_letter, storm_dump_size); + for (i = 0; i < storm_dump_size; i++, dump_buf++) + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "%08x\n", *dump_buf); + } + + /* Add 1 for string NULL termination */ + *parsed_results_bytes = results_offset + 1; + return DBG_STATUS_OK; +} + +enum dbg_status qed_get_fw_asserts_results_buf_size(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + u32 *results_buf_size) +{ + return qed_parse_fw_asserts_dump(p_hwfn, + dump_buf, + num_dumped_dwords, + NULL, results_buf_size); +} + +enum dbg_status qed_print_fw_asserts_results(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf) +{ + u32 parsed_buf_size; + + return qed_parse_fw_asserts_dump(p_hwfn, + dump_buf, + num_dumped_dwords, + results_buf, &parsed_buf_size); +} + +/* Wrapper for unifying the idle_chk and mcp_trace api */ +enum dbg_status qed_print_idle_chk_results_wrapper(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf) +{ + u32 num_errors, num_warnnings; + + return qed_print_idle_chk_results(p_hwfn, dump_buf, num_dumped_dwords, + results_buf, &num_errors, + &num_warnnings); +} + +/* Feature meta data lookup table */ +static struct { + char *name; + enum dbg_status (*get_size)(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 *size); + enum dbg_status (*perform_dump)(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 *dump_buf, + u32 buf_size, u32 *dumped_dwords); + enum dbg_status (*print_results)(struct qed_hwfn *p_hwfn, + u32 *dump_buf, u32 num_dumped_dwords, + char *results_buf); + enum dbg_status (*results_buf_size)(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + u32 *results_buf_size); +} qed_features_lookup[] = { + { + "grc", qed_dbg_grc_get_dump_buf_size, + qed_dbg_grc_dump, NULL, NULL}, { + "idle_chk", + qed_dbg_idle_chk_get_dump_buf_size, + qed_dbg_idle_chk_dump, + qed_print_idle_chk_results_wrapper, + qed_get_idle_chk_results_buf_size}, { + "mcp_trace", + qed_dbg_mcp_trace_get_dump_buf_size, + qed_dbg_mcp_trace_dump, qed_print_mcp_trace_results, + qed_get_mcp_trace_results_buf_size}, { + "reg_fifo", + qed_dbg_reg_fifo_get_dump_buf_size, + qed_dbg_reg_fifo_dump, qed_print_reg_fifo_results, + qed_get_reg_fifo_results_buf_size}, { + "igu_fifo", + qed_dbg_igu_fifo_get_dump_buf_size, + qed_dbg_igu_fifo_dump, qed_print_igu_fifo_results, + qed_get_igu_fifo_results_buf_size}, { + "protection_override", + qed_dbg_protection_override_get_dump_buf_size, + qed_dbg_protection_override_dump, + qed_print_protection_override_results, + qed_get_protection_override_results_buf_size}, { + "fw_asserts", + qed_dbg_fw_asserts_get_dump_buf_size, + qed_dbg_fw_asserts_dump, + qed_print_fw_asserts_results, + qed_get_fw_asserts_results_buf_size},}; + +static void qed_dbg_print_feature(u8 *p_text_buf, u32 text_size) +{ + u32 i, precision = 80; + + if (!p_text_buf) + return; + + pr_notice("\n%.*s", precision, p_text_buf); + for (i = precision; i < text_size; i += precision) + pr_cont("%.*s", precision, p_text_buf + i); + pr_cont("\n"); +} + +#define QED_RESULTS_BUF_MIN_SIZE 16 +/* Generic function for decoding debug feature info */ +enum dbg_status format_feature(struct qed_hwfn *p_hwfn, + enum qed_dbg_features feature_idx) +{ + struct qed_dbg_feature *feature = + &p_hwfn->cdev->dbg_params.features[feature_idx]; + u32 text_size_bytes, null_char_pos, i; + enum dbg_status rc; + char *text_buf; + + /* Check if feature supports formatting capability */ + if (!qed_features_lookup[feature_idx].results_buf_size) + return DBG_STATUS_OK; + + /* Obtain size of formatted output */ + rc = qed_features_lookup[feature_idx]. + results_buf_size(p_hwfn, (u32 *)feature->dump_buf, + feature->dumped_dwords, &text_size_bytes); + if (rc != DBG_STATUS_OK) + return rc; + + /* Make sure that the allocated size is a multiple of dword (4 bytes) */ + null_char_pos = text_size_bytes - 1; + text_size_bytes = (text_size_bytes + 3) & ~0x3; + + if (text_size_bytes < QED_RESULTS_BUF_MIN_SIZE) { + DP_NOTICE(p_hwfn->cdev, + "formatted size of feature was too small %d. Aborting\n", + text_size_bytes); + return DBG_STATUS_INVALID_ARGS; + } + + /* Allocate temp text buf */ + text_buf = vzalloc(text_size_bytes); + if (!text_buf) + return DBG_STATUS_VIRT_MEM_ALLOC_FAILED; + + /* Decode feature opcodes to string on temp buf */ + rc = qed_features_lookup[feature_idx]. + print_results(p_hwfn, (u32 *)feature->dump_buf, + feature->dumped_dwords, text_buf); + if (rc != DBG_STATUS_OK) { + vfree(text_buf); + return rc; + } + + /* Replace the original null character with a '\n' character. + * The bytes that were added as a result of the dword alignment are also + * padded with '\n' characters. + */ + for (i = null_char_pos; i < text_size_bytes; i++) + text_buf[i] = '\n'; + + /* Dump printable feature to log */ + if (p_hwfn->cdev->dbg_params.print_data) + qed_dbg_print_feature(text_buf, text_size_bytes); + + /* Free the old dump_buf and point the dump_buf to the newly allocagted + * and formatted text buffer. + */ + vfree(feature->dump_buf); + feature->dump_buf = text_buf; + feature->buf_size = text_size_bytes; + feature->dumped_dwords = text_size_bytes / 4; + return rc; +} + +/* Generic function for performing the dump of a debug feature. */ +enum dbg_status qed_dbg_dump(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, + enum qed_dbg_features feature_idx) +{ + struct qed_dbg_feature *feature = + &p_hwfn->cdev->dbg_params.features[feature_idx]; + u32 buf_size_dwords; + enum dbg_status rc; + + DP_NOTICE(p_hwfn->cdev, "Collecting a debug feature [\"%s\"]\n", + qed_features_lookup[feature_idx].name); + + /* Dump_buf was already allocated need to free (this can happen if dump + * was called but file was never read). + * We can't use the buffer as is since size may have changed. + */ + if (feature->dump_buf) { + vfree(feature->dump_buf); + feature->dump_buf = NULL; + } + + /* Get buffer size from hsi, allocate accordingly, and perform the + * dump. + */ + rc = qed_features_lookup[feature_idx].get_size(p_hwfn, p_ptt, + &buf_size_dwords); + if (rc != DBG_STATUS_OK) + return rc; + feature->buf_size = buf_size_dwords * sizeof(u32); + feature->dump_buf = vmalloc(feature->buf_size); + if (!feature->dump_buf) + return DBG_STATUS_VIRT_MEM_ALLOC_FAILED; + + rc = qed_features_lookup[feature_idx]. + perform_dump(p_hwfn, p_ptt, (u32 *)feature->dump_buf, + feature->buf_size / sizeof(u32), + &feature->dumped_dwords); + + /* If mcp is stuck we get DBG_STATUS_NVRAM_GET_IMAGE_FAILED error. + * In this case the buffer holds valid binary data, but we wont able + * to parse it (since parsing relies on data in NVRAM which is only + * accessible when MFW is responsive). skip the formatting but return + * success so that binary data is provided. + */ + if (rc == DBG_STATUS_NVRAM_GET_IMAGE_FAILED) + return DBG_STATUS_OK; + + if (rc != DBG_STATUS_OK) + return rc; + + /* Format output */ + rc = format_feature(p_hwfn, feature_idx); + return rc; +} + +int qed_dbg_grc(struct qed_dev *cdev, void *buffer, u32 *num_dumped_bytes) +{ + return qed_dbg_feature(cdev, buffer, DBG_FEATURE_GRC, num_dumped_bytes); +} + +int qed_dbg_grc_size(struct qed_dev *cdev) +{ + return qed_dbg_feature_size(cdev, DBG_FEATURE_GRC); +} + +int qed_dbg_idle_chk(struct qed_dev *cdev, void *buffer, u32 *num_dumped_bytes) +{ + return qed_dbg_feature(cdev, buffer, DBG_FEATURE_IDLE_CHK, + num_dumped_bytes); +} + +int qed_dbg_idle_chk_size(struct qed_dev *cdev) +{ + return qed_dbg_feature_size(cdev, DBG_FEATURE_IDLE_CHK); +} + +int qed_dbg_reg_fifo(struct qed_dev *cdev, void *buffer, u32 *num_dumped_bytes) +{ + return qed_dbg_feature(cdev, buffer, DBG_FEATURE_REG_FIFO, + num_dumped_bytes); +} + +int qed_dbg_reg_fifo_size(struct qed_dev *cdev) +{ + return qed_dbg_feature_size(cdev, DBG_FEATURE_REG_FIFO); +} + +int qed_dbg_igu_fifo(struct qed_dev *cdev, void *buffer, u32 *num_dumped_bytes) +{ + return qed_dbg_feature(cdev, buffer, DBG_FEATURE_IGU_FIFO, + num_dumped_bytes); +} + +int qed_dbg_igu_fifo_size(struct qed_dev *cdev) +{ + return qed_dbg_feature_size(cdev, DBG_FEATURE_IGU_FIFO); +} + +int qed_dbg_protection_override(struct qed_dev *cdev, void *buffer, + u32 *num_dumped_bytes) +{ + return qed_dbg_feature(cdev, buffer, DBG_FEATURE_PROTECTION_OVERRIDE, + num_dumped_bytes); +} + +int qed_dbg_protection_override_size(struct qed_dev *cdev) +{ + return qed_dbg_feature_size(cdev, DBG_FEATURE_PROTECTION_OVERRIDE); +} + +int qed_dbg_fw_asserts(struct qed_dev *cdev, void *buffer, + u32 *num_dumped_bytes) +{ + return qed_dbg_feature(cdev, buffer, DBG_FEATURE_FW_ASSERTS, + num_dumped_bytes); +} + +int qed_dbg_fw_asserts_size(struct qed_dev *cdev) +{ + return qed_dbg_feature_size(cdev, DBG_FEATURE_FW_ASSERTS); +} + +int qed_dbg_mcp_trace(struct qed_dev *cdev, void *buffer, + u32 *num_dumped_bytes) +{ + return qed_dbg_feature(cdev, buffer, DBG_FEATURE_MCP_TRACE, + num_dumped_bytes); +} + +int qed_dbg_mcp_trace_size(struct qed_dev *cdev) +{ + return qed_dbg_feature_size(cdev, DBG_FEATURE_MCP_TRACE); +} + +/* Defines the amount of bytes allocated for recording the length of debugfs + * feature buffer. + */ +#define REGDUMP_HEADER_SIZE sizeof(u32) +#define REGDUMP_HEADER_FEATURE_SHIFT 24 +#define REGDUMP_HEADER_ENGINE_SHIFT 31 +#define REGDUMP_HEADER_OMIT_ENGINE_SHIFT 30 +enum debug_print_features { + OLD_MODE = 0, + IDLE_CHK = 1, + GRC_DUMP = 2, + MCP_TRACE = 3, + REG_FIFO = 4, + PROTECTION_OVERRIDE = 5, + IGU_FIFO = 6, + PHY = 7, + FW_ASSERTS = 8, +}; + +static u32 qed_calc_regdump_header(enum debug_print_features feature, + int engine, u32 feature_size, u8 omit_engine) +{ + /* Insert the engine, feature and mode inside the header and combine it + * with feature size. + */ + return feature_size | (feature << REGDUMP_HEADER_FEATURE_SHIFT) | + (omit_engine << REGDUMP_HEADER_OMIT_ENGINE_SHIFT) | + (engine << REGDUMP_HEADER_ENGINE_SHIFT); +} + +int qed_dbg_all_data(struct qed_dev *cdev, void *buffer) +{ + u8 cur_engine, omit_engine = 0, org_engine; + u32 offset = 0, feature_size; + int rc; + + if (cdev->num_hwfns == 1) + omit_engine = 1; + + org_engine = qed_get_debug_engine(cdev); + for (cur_engine = 0; cur_engine < cdev->num_hwfns; cur_engine++) { + /* Collect idle_chks and grcDump for each hw function */ + DP_VERBOSE(cdev, QED_MSG_DEBUG, + "obtaining idle_chk and grcdump for current engine\n"); + qed_set_debug_engine(cdev, cur_engine); + + /* First idle_chk */ + rc = qed_dbg_idle_chk(cdev, (u8 *)buffer + offset + + REGDUMP_HEADER_SIZE, &feature_size); + if (!rc) { + *(u32 *)((u8 *)buffer + offset) = + qed_calc_regdump_header(IDLE_CHK, cur_engine, + feature_size, omit_engine); + offset += (feature_size + REGDUMP_HEADER_SIZE); + } else { + DP_ERR(cdev, "qed_dbg_idle_chk failed. rc = %d\n", rc); + } + + /* Second idle_chk */ + rc = qed_dbg_idle_chk(cdev, (u8 *)buffer + offset + + REGDUMP_HEADER_SIZE, &feature_size); + if (!rc) { + *(u32 *)((u8 *)buffer + offset) = + qed_calc_regdump_header(IDLE_CHK, cur_engine, + feature_size, omit_engine); + offset += (feature_size + REGDUMP_HEADER_SIZE); + } else { + DP_ERR(cdev, "qed_dbg_idle_chk failed. rc = %d\n", rc); + } + + /* reg_fifo dump */ + rc = qed_dbg_reg_fifo(cdev, (u8 *)buffer + offset + + REGDUMP_HEADER_SIZE, &feature_size); + if (!rc) { + *(u32 *)((u8 *)buffer + offset) = + qed_calc_regdump_header(REG_FIFO, cur_engine, + feature_size, omit_engine); + offset += (feature_size + REGDUMP_HEADER_SIZE); + } else { + DP_ERR(cdev, "qed_dbg_reg_fifo failed. rc = %d\n", rc); + } + + /* igu_fifo dump */ + rc = qed_dbg_igu_fifo(cdev, (u8 *)buffer + offset + + REGDUMP_HEADER_SIZE, &feature_size); + if (!rc) { + *(u32 *)((u8 *)buffer + offset) = + qed_calc_regdump_header(IGU_FIFO, cur_engine, + feature_size, omit_engine); + offset += (feature_size + REGDUMP_HEADER_SIZE); + } else { + DP_ERR(cdev, "qed_dbg_igu_fifo failed. rc = %d", rc); + } + + /* protection_override dump */ + rc = qed_dbg_protection_override(cdev, (u8 *)buffer + offset + + REGDUMP_HEADER_SIZE, + &feature_size); + if (!rc) { + *(u32 *)((u8 *)buffer + offset) = + qed_calc_regdump_header(PROTECTION_OVERRIDE, + cur_engine, + feature_size, omit_engine); + offset += (feature_size + REGDUMP_HEADER_SIZE); + } else { + DP_ERR(cdev, + "qed_dbg_protection_override failed. rc = %d\n", + rc); + } + + /* fw_asserts dump */ + rc = qed_dbg_fw_asserts(cdev, (u8 *)buffer + offset + + REGDUMP_HEADER_SIZE, &feature_size); + if (!rc) { + *(u32 *)((u8 *)buffer + offset) = + qed_calc_regdump_header(FW_ASSERTS, cur_engine, + feature_size, omit_engine); + offset += (feature_size + REGDUMP_HEADER_SIZE); + } else { + DP_ERR(cdev, "qed_dbg_fw_asserts failed. rc = %d\n", + rc); + } + + /* GRC dump - must be last because when mcp stuck it will + * clutter idle_chk, reg_fifo, ... + */ + rc = qed_dbg_grc(cdev, (u8 *)buffer + offset + + REGDUMP_HEADER_SIZE, &feature_size); + if (!rc) { + *(u32 *)((u8 *)buffer + offset) = + qed_calc_regdump_header(GRC_DUMP, cur_engine, + feature_size, omit_engine); + offset += (feature_size + REGDUMP_HEADER_SIZE); + } else { + DP_ERR(cdev, "qed_dbg_grc failed. rc = %d", rc); + } + } + + /* mcp_trace */ + rc = qed_dbg_mcp_trace(cdev, (u8 *)buffer + offset + + REGDUMP_HEADER_SIZE, &feature_size); + if (!rc) { + *(u32 *)((u8 *)buffer + offset) = + qed_calc_regdump_header(MCP_TRACE, cur_engine, + feature_size, omit_engine); + offset += (feature_size + REGDUMP_HEADER_SIZE); + } else { + DP_ERR(cdev, "qed_dbg_mcp_trace failed. rc = %d\n", rc); + } + + qed_set_debug_engine(cdev, org_engine); + + return 0; +} + +int qed_dbg_all_data_size(struct qed_dev *cdev) +{ + u8 cur_engine, org_engine; + u32 regs_len = 0; + + org_engine = qed_get_debug_engine(cdev); + for (cur_engine = 0; cur_engine < cdev->num_hwfns; cur_engine++) { + /* Engine specific */ + DP_VERBOSE(cdev, QED_MSG_DEBUG, + "calculating idle_chk and grcdump register length for current engine\n"); + qed_set_debug_engine(cdev, cur_engine); + regs_len += REGDUMP_HEADER_SIZE + qed_dbg_idle_chk_size(cdev) + + REGDUMP_HEADER_SIZE + qed_dbg_idle_chk_size(cdev) + + REGDUMP_HEADER_SIZE + qed_dbg_grc_size(cdev) + + REGDUMP_HEADER_SIZE + qed_dbg_reg_fifo_size(cdev) + + REGDUMP_HEADER_SIZE + qed_dbg_igu_fifo_size(cdev) + + REGDUMP_HEADER_SIZE + + qed_dbg_protection_override_size(cdev) + + REGDUMP_HEADER_SIZE + qed_dbg_fw_asserts_size(cdev); + } + + /* Engine common */ + regs_len += REGDUMP_HEADER_SIZE + qed_dbg_mcp_trace_size(cdev); + qed_set_debug_engine(cdev, org_engine); + + return regs_len; +} + +int qed_dbg_feature(struct qed_dev *cdev, void *buffer, + enum qed_dbg_features feature, u32 *num_dumped_bytes) +{ + struct qed_hwfn *p_hwfn = + &cdev->hwfns[cdev->dbg_params.engine_for_debug]; + struct qed_dbg_feature *qed_feature = + &cdev->dbg_params.features[feature]; + enum dbg_status dbg_rc; + struct qed_ptt *p_ptt; + int rc = 0; + + /* Acquire ptt */ + p_ptt = qed_ptt_acquire(p_hwfn); + if (!p_ptt) + return -EINVAL; + + /* Get dump */ + dbg_rc = qed_dbg_dump(p_hwfn, p_ptt, feature); + if (dbg_rc != DBG_STATUS_OK) { + DP_VERBOSE(cdev, QED_MSG_DEBUG, "%s\n", + qed_dbg_get_status_str(dbg_rc)); + *num_dumped_bytes = 0; + rc = -EINVAL; + goto out; + } + + DP_VERBOSE(cdev, QED_MSG_DEBUG, + "copying debugfs feature to external buffer\n"); + memcpy(buffer, qed_feature->dump_buf, qed_feature->buf_size); + *num_dumped_bytes = cdev->dbg_params.features[feature].dumped_dwords * + 4; + +out: + qed_ptt_release(p_hwfn, p_ptt); + return rc; +} + +int qed_dbg_feature_size(struct qed_dev *cdev, enum qed_dbg_features feature) +{ + struct qed_hwfn *p_hwfn = + &cdev->hwfns[cdev->dbg_params.engine_for_debug]; + struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn); + struct qed_dbg_feature *qed_feature = + &cdev->dbg_params.features[feature]; + u32 buf_size_dwords; + enum dbg_status rc; + + if (!p_ptt) + return -EINVAL; + + rc = qed_features_lookup[feature].get_size(p_hwfn, p_ptt, + &buf_size_dwords); + if (rc != DBG_STATUS_OK) + buf_size_dwords = 0; + + qed_ptt_release(p_hwfn, p_ptt); + qed_feature->buf_size = buf_size_dwords * sizeof(u32); + return qed_feature->buf_size; +} + +u8 qed_get_debug_engine(struct qed_dev *cdev) +{ + return cdev->dbg_params.engine_for_debug; +} + +void qed_set_debug_engine(struct qed_dev *cdev, int engine_number) +{ + DP_VERBOSE(cdev, QED_MSG_DEBUG, "set debug engine to %d\n", + engine_number); + cdev->dbg_params.engine_for_debug = engine_number; +} + +void qed_dbg_pf_init(struct qed_dev *cdev) +{ + const u8 *dbg_values; + + /* Debug values are after init values. + * The offset is the first dword of the file. + */ + dbg_values = cdev->firmware->data + *(u32 *)cdev->firmware->data; + qed_dbg_set_bin_ptr((u8 *)dbg_values); + qed_dbg_user_set_bin_ptr((u8 *)dbg_values); +} + +void qed_dbg_pf_exit(struct qed_dev *cdev) +{ + struct qed_dbg_feature *feature = NULL; + enum qed_dbg_features feature_idx; + + /* Debug features' buffers may be allocated if debug feature was used + * but dump wasn't called. + */ + for (feature_idx = 0; feature_idx < DBG_FEATURE_NUM; feature_idx++) { + feature = &cdev->dbg_params.features[feature_idx]; + if (feature->dump_buf) { + vfree(feature->dump_buf); + feature->dump_buf = NULL; + } + } +} diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.h b/drivers/net/ethernet/qlogic/qed/qed_debug.h new file mode 100644 index 000000000000..f872d7324814 --- /dev/null +++ b/drivers/net/ethernet/qlogic/qed/qed_debug.h @@ -0,0 +1,54 @@ +/* QLogic qed NIC Driver + * Copyright (c) 2015 QLogic Corporation + * + * This software is available under the terms of the GNU General Public License + * (GPL) Version 2, available from the file COPYING in the main directory of + * this source tree. + */ + +#ifndef _QED_DEBUGFS_H +#define _QED_DEBUGFS_H + +enum qed_dbg_features { + DBG_FEATURE_GRC, + DBG_FEATURE_IDLE_CHK, + DBG_FEATURE_MCP_TRACE, + DBG_FEATURE_REG_FIFO, + DBG_FEATURE_IGU_FIFO, + DBG_FEATURE_PROTECTION_OVERRIDE, + DBG_FEATURE_FW_ASSERTS, + DBG_FEATURE_NUM +}; + +int qed_dbg_grc(struct qed_dev *cdev, void *buffer, u32 *num_dumped_bytes); +int qed_dbg_grc_size(struct qed_dev *cdev); +int qed_dbg_idle_chk(struct qed_dev *cdev, void *buffer, + u32 *num_dumped_bytes); +int qed_dbg_idle_chk_size(struct qed_dev *cdev); +int qed_dbg_reg_fifo(struct qed_dev *cdev, void *buffer, + u32 *num_dumped_bytes); +int qed_dbg_reg_fifo_size(struct qed_dev *cdev); +int qed_dbg_igu_fifo(struct qed_dev *cdev, void *buffer, + u32 *num_dumped_bytes); +int qed_dbg_igu_fifo_size(struct qed_dev *cdev); +int qed_dbg_protection_override(struct qed_dev *cdev, void *buffer, + u32 *num_dumped_bytes); +int qed_dbg_protection_override_size(struct qed_dev *cdev); +int qed_dbg_fw_asserts(struct qed_dev *cdev, void *buffer, + u32 *num_dumped_bytes); +int qed_dbg_fw_asserts_size(struct qed_dev *cdev); +int qed_dbg_mcp_trace(struct qed_dev *cdev, void *buffer, + u32 *num_dumped_bytes); +int qed_dbg_mcp_trace_size(struct qed_dev *cdev); +int qed_dbg_all_data(struct qed_dev *cdev, void *buffer); +int qed_dbg_all_data_size(struct qed_dev *cdev); +u8 qed_get_debug_engine(struct qed_dev *cdev); +void qed_set_debug_engine(struct qed_dev *cdev, int engine_number); +int qed_dbg_feature(struct qed_dev *cdev, void *buffer, + enum qed_dbg_features feature, u32 *num_dumped_bytes); +int qed_dbg_feature_size(struct qed_dev *cdev, enum qed_dbg_features feature); + +void qed_dbg_pf_init(struct qed_dev *cdev); +void qed_dbg_pf_exit(struct qed_dev *cdev); + +#endif diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h index 855691f18412..2777d5bb4380 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h +++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h @@ -1728,13 +1728,6 @@ enum bin_dbg_buffer_type { MAX_BIN_DBG_BUFFER_TYPE }; -/* Chip IDs */ -enum chip_ids { - CHIP_RESERVED, - CHIP_BB_B0, - CHIP_RESERVED2, - MAX_CHIP_IDS -}; /* Attention bit mapping */ struct dbg_attn_bit_mapping { @@ -1813,6 +1806,371 @@ enum dbg_attn_type { MAX_DBG_ATTN_TYPE }; +/* condition header for registers dump */ +struct dbg_dump_cond_hdr { + struct dbg_mode_hdr mode; /* Mode header */ + u8 block_id; /* block ID */ + u8 data_size; /* size in dwords of the data following this header */ +}; + +/* memory data for registers dump */ +struct dbg_dump_mem { + __le32 dword0; +#define DBG_DUMP_MEM_ADDRESS_MASK 0xFFFFFF +#define DBG_DUMP_MEM_ADDRESS_SHIFT 0 +#define DBG_DUMP_MEM_MEM_GROUP_ID_MASK 0xFF +#define DBG_DUMP_MEM_MEM_GROUP_ID_SHIFT 24 + __le32 dword1; +#define DBG_DUMP_MEM_LENGTH_MASK 0xFFFFFF +#define DBG_DUMP_MEM_LENGTH_SHIFT 0 +#define DBG_DUMP_MEM_RESERVED_MASK 0xFF +#define DBG_DUMP_MEM_RESERVED_SHIFT 24 +}; + +/* register data for registers dump */ +struct dbg_dump_reg { + __le32 data; +#define DBG_DUMP_REG_ADDRESS_MASK 0xFFFFFF /* register address (in dwords) */ +#define DBG_DUMP_REG_ADDRESS_SHIFT 0 +#define DBG_DUMP_REG_LENGTH_MASK 0xFF /* register size (in dwords) */ +#define DBG_DUMP_REG_LENGTH_SHIFT 24 +}; + +/* split header for registers dump */ +struct dbg_dump_split_hdr { + __le32 hdr; +#define DBG_DUMP_SPLIT_HDR_DATA_SIZE_MASK 0xFFFFFF +#define DBG_DUMP_SPLIT_HDR_DATA_SIZE_SHIFT 0 +#define DBG_DUMP_SPLIT_HDR_SPLIT_TYPE_ID_MASK 0xFF +#define DBG_DUMP_SPLIT_HDR_SPLIT_TYPE_ID_SHIFT 24 +}; + +/* condition header for idle check */ +struct dbg_idle_chk_cond_hdr { + struct dbg_mode_hdr mode; /* Mode header */ + __le16 data_size; /* size in dwords of the data following this header */ +}; + +/* Idle Check condition register */ +struct dbg_idle_chk_cond_reg { + __le32 data; +#define DBG_IDLE_CHK_COND_REG_ADDRESS_MASK 0xFFFFFF +#define DBG_IDLE_CHK_COND_REG_ADDRESS_SHIFT 0 +#define DBG_IDLE_CHK_COND_REG_BLOCK_ID_MASK 0xFF +#define DBG_IDLE_CHK_COND_REG_BLOCK_ID_SHIFT 24 + __le16 num_entries; /* number of registers entries to check */ + u8 entry_size; /* size of registers entry (in dwords) */ + u8 start_entry; /* index of the first entry to check */ +}; + +/* Idle Check info register */ +struct dbg_idle_chk_info_reg { + __le32 data; +#define DBG_IDLE_CHK_INFO_REG_ADDRESS_MASK 0xFFFFFF +#define DBG_IDLE_CHK_INFO_REG_ADDRESS_SHIFT 0 +#define DBG_IDLE_CHK_INFO_REG_BLOCK_ID_MASK 0xFF +#define DBG_IDLE_CHK_INFO_REG_BLOCK_ID_SHIFT 24 + __le16 size; /* register size in dwords */ + struct dbg_mode_hdr mode; /* Mode header */ +}; + +/* Idle Check register */ +union dbg_idle_chk_reg { + struct dbg_idle_chk_cond_reg cond_reg; /* condition register */ + struct dbg_idle_chk_info_reg info_reg; /* info register */ +}; + +/* Idle Check result header */ +struct dbg_idle_chk_result_hdr { + __le16 rule_id; /* Failing rule index */ + __le16 mem_entry_id; /* Failing memory entry index */ + u8 num_dumped_cond_regs; /* number of dumped condition registers */ + u8 num_dumped_info_regs; /* number of dumped condition registers */ + u8 severity; /* from dbg_idle_chk_severity_types enum */ + u8 reserved; +}; + +/* Idle Check result register header */ +struct dbg_idle_chk_result_reg_hdr { + u8 data; +#define DBG_IDLE_CHK_RESULT_REG_HDR_IS_MEM_MASK 0x1 +#define DBG_IDLE_CHK_RESULT_REG_HDR_IS_MEM_SHIFT 0 +#define DBG_IDLE_CHK_RESULT_REG_HDR_REG_ID_MASK 0x7F +#define DBG_IDLE_CHK_RESULT_REG_HDR_REG_ID_SHIFT 1 + u8 start_entry; /* index of the first checked entry */ + __le16 size; /* register size in dwords */ +}; + +/* Idle Check rule */ +struct dbg_idle_chk_rule { + __le16 rule_id; /* Idle Check rule ID */ + u8 severity; /* value from dbg_idle_chk_severity_types enum */ + u8 cond_id; /* Condition ID */ + u8 num_cond_regs; /* number of condition registers */ + u8 num_info_regs; /* number of info registers */ + u8 num_imms; /* number of immediates in the condition */ + u8 reserved1; + __le16 reg_offset; /* offset of this rules registers in the idle check + * register array (in dbg_idle_chk_reg units). + */ + __le16 imm_offset; /* offset of this rules immediate values in the + * immediate values array (in dwords). + */ +}; + +/* Idle Check rule parsing data */ +struct dbg_idle_chk_rule_parsing_data { + __le32 data; +#define DBG_IDLE_CHK_RULE_PARSING_DATA_HAS_FW_MSG_MASK 0x1 +#define DBG_IDLE_CHK_RULE_PARSING_DATA_HAS_FW_MSG_SHIFT 0 +#define DBG_IDLE_CHK_RULE_PARSING_DATA_STR_OFFSET_MASK 0x7FFFFFFF +#define DBG_IDLE_CHK_RULE_PARSING_DATA_STR_OFFSET_SHIFT 1 +}; + +/* idle check severity types */ +enum dbg_idle_chk_severity_types { + /* idle check failure should cause an error */ + IDLE_CHK_SEVERITY_ERROR, + /* idle check failure should cause an error only if theres no traffic */ + IDLE_CHK_SEVERITY_ERROR_NO_TRAFFIC, + /* idle check failure should cause a warning */ + IDLE_CHK_SEVERITY_WARNING, + MAX_DBG_IDLE_CHK_SEVERITY_TYPES +}; + +/* Debug Bus block data */ +struct dbg_bus_block_data { + u8 enabled; /* Indicates if the block is enabled for recording (0/1) */ + u8 hw_id; /* HW ID associated with the block */ + u8 line_num; /* Debug line number to select */ + u8 right_shift; /* Number of units to right the debug data (0-3) */ + u8 cycle_en; /* 4-bit value: bit i set -> unit i is enabled. */ + u8 force_valid; /* 4-bit value: bit i set -> unit i is forced valid. */ + u8 force_frame; /* 4-bit value: bit i set -> unit i frame bit is forced. + */ + u8 reserved; +}; + +/* Debug Bus Clients */ +enum dbg_bus_clients { + DBG_BUS_CLIENT_RBCN, + DBG_BUS_CLIENT_RBCP, + DBG_BUS_CLIENT_RBCR, + DBG_BUS_CLIENT_RBCT, + DBG_BUS_CLIENT_RBCU, + DBG_BUS_CLIENT_RBCF, + DBG_BUS_CLIENT_RBCX, + DBG_BUS_CLIENT_RBCS, + DBG_BUS_CLIENT_RBCH, + DBG_BUS_CLIENT_RBCZ, + DBG_BUS_CLIENT_OTHER_ENGINE, + DBG_BUS_CLIENT_TIMESTAMP, + DBG_BUS_CLIENT_CPU, + DBG_BUS_CLIENT_RBCY, + DBG_BUS_CLIENT_RBCQ, + DBG_BUS_CLIENT_RBCM, + DBG_BUS_CLIENT_RBCB, + DBG_BUS_CLIENT_RBCW, + DBG_BUS_CLIENT_RBCV, + MAX_DBG_BUS_CLIENTS +}; + +/* Debug Bus memory address */ +struct dbg_bus_mem_addr { + __le32 lo; + __le32 hi; +}; + +/* Debug Bus PCI buffer data */ +struct dbg_bus_pci_buf_data { + struct dbg_bus_mem_addr phys_addr; /* PCI buffer physical address */ + struct dbg_bus_mem_addr virt_addr; /* PCI buffer virtual address */ + __le32 size; /* PCI buffer size in bytes */ +}; + +/* Debug Bus Storm EID range filter params */ +struct dbg_bus_storm_eid_range_params { + u8 min; /* Minimal event ID to filter on */ + u8 max; /* Maximal event ID to filter on */ +}; + +/* Debug Bus Storm EID mask filter params */ +struct dbg_bus_storm_eid_mask_params { + u8 val; /* Event ID value */ + u8 mask; /* Event ID mask. 1s in the mask = dont care bits. */ +}; + +/* Debug Bus Storm EID filter params */ +union dbg_bus_storm_eid_params { + struct dbg_bus_storm_eid_range_params range; + struct dbg_bus_storm_eid_mask_params mask; +}; + +/* Debug Bus Storm data */ +struct dbg_bus_storm_data { + u8 fast_enabled; + u8 fast_mode; + u8 slow_enabled; + u8 slow_mode; + u8 hw_id; + u8 eid_filter_en; + u8 eid_range_not_mask; + u8 cid_filter_en; + union dbg_bus_storm_eid_params eid_filter_params; + __le16 reserved; + __le32 cid; +}; + +/* Debug Bus data */ +struct dbg_bus_data { + __le32 app_version; /* The tools version number of the application */ + u8 state; /* The current debug bus state */ + u8 hw_dwords; /* HW dwords per cycle */ + u8 next_hw_id; /* Next HW ID to be associated with an input */ + u8 num_enabled_blocks; /* Number of blocks enabled for recording */ + u8 num_enabled_storms; /* Number of Storms enabled for recording */ + u8 target; /* Output target */ + u8 next_trigger_state; /* ID of next trigger state to be added */ + u8 next_constraint_id; /* ID of next filter/trigger constraint to be + * added. + */ + u8 one_shot_en; /* Indicates if one-shot mode is enabled (0/1) */ + u8 grc_input_en; /* Indicates if GRC recording is enabled (0/1) */ + u8 timestamp_input_en; /* Indicates if timestamp recording is enabled + * (0/1). + */ + u8 filter_en; /* Indicates if the recording filter is enabled (0/1) */ + u8 trigger_en; /* Indicates if the recording trigger is enabled (0/1) */ + u8 adding_filter; /* If true, the next added constraint belong to the + * filter. Otherwise, it belongs to the last added + * trigger state. Valid only if either filter or + * triggers are enabled. + */ + u8 filter_pre_trigger; /* Indicates if the recording filter should be + * applied before the trigger. Valid only if both + * filter and trigger are enabled (0/1). + */ + u8 filter_post_trigger; /* Indicates if the recording filter should be + * applied after the trigger. Valid only if both + * filter and trigger are enabled (0/1). + */ + u8 unify_inputs; /* If true, all inputs are associated with HW ID 0. + * Otherwise, each input is assigned a different HW ID + * (0/1). + */ + u8 rcv_from_other_engine; /* Indicates if the other engine sends it NW + * recording to this engine (0/1). + */ + struct dbg_bus_pci_buf_data pci_buf; /* Debug Bus PCI buffer data. Valid + * only when the target is + * DBG_BUS_TARGET_ID_PCI. + */ + __le16 reserved; + struct dbg_bus_block_data blocks[80];/* Debug Bus data for each block */ + struct dbg_bus_storm_data storms[6]; /* Debug Bus data for each block */ +}; + +/* Debug bus frame modes */ +enum dbg_bus_frame_modes { + DBG_BUS_FRAME_MODE_0HW_4ST = 0, /* 0 HW dwords, 4 Storm dwords */ + DBG_BUS_FRAME_MODE_4HW_0ST = 3, /* 4 HW dwords, 0 Storm dwords */ + DBG_BUS_FRAME_MODE_8HW_0ST = 4, /* 8 HW dwords, 0 Storm dwords */ + MAX_DBG_BUS_FRAME_MODES +}; + +/* Debug bus states */ +enum dbg_bus_states { + DBG_BUS_STATE_IDLE, /* debug bus idle state (not recording) */ + DBG_BUS_STATE_READY, /* debug bus is ready for configuration and + * recording. + */ + DBG_BUS_STATE_RECORDING, /* debug bus is currently recording */ + DBG_BUS_STATE_STOPPED, /* debug bus recording has stopped */ + MAX_DBG_BUS_STATES +}; + +/* Debug bus target IDs */ +enum dbg_bus_targets { + /* records debug bus to DBG block internal buffer */ + DBG_BUS_TARGET_ID_INT_BUF, + /* records debug bus to the NW */ + DBG_BUS_TARGET_ID_NIG, + /* records debug bus to a PCI buffer */ + DBG_BUS_TARGET_ID_PCI, + MAX_DBG_BUS_TARGETS +}; + +/* GRC Dump data */ +struct dbg_grc_data { + __le32 param_val[40]; /* Value of each GRC parameter. Array size must + * match the enum dbg_grc_params. + */ + u8 param_set_by_user[40]; /* Indicates for each GRC parameter if it was + * set by the user (0/1). Array size must + * match the enum dbg_grc_params. + */ +}; + +/* Debug GRC params */ +enum dbg_grc_params { + DBG_GRC_PARAM_DUMP_TSTORM, /* dump Tstorm memories (0/1) */ + DBG_GRC_PARAM_DUMP_MSTORM, /* dump Mstorm memories (0/1) */ + DBG_GRC_PARAM_DUMP_USTORM, /* dump Ustorm memories (0/1) */ + DBG_GRC_PARAM_DUMP_XSTORM, /* dump Xstorm memories (0/1) */ + DBG_GRC_PARAM_DUMP_YSTORM, /* dump Ystorm memories (0/1) */ + DBG_GRC_PARAM_DUMP_PSTORM, /* dump Pstorm memories (0/1) */ + DBG_GRC_PARAM_DUMP_REGS, /* dump non-memory registers (0/1) */ + DBG_GRC_PARAM_DUMP_RAM, /* dump Storm internal RAMs (0/1) */ + DBG_GRC_PARAM_DUMP_PBUF, /* dump Storm passive buffer (0/1) */ + DBG_GRC_PARAM_DUMP_IOR, /* dump Storm IORs (0/1) */ + DBG_GRC_PARAM_DUMP_VFC, /* dump VFC memories (0/1) */ + DBG_GRC_PARAM_DUMP_CM_CTX, /* dump CM contexts (0/1) */ + DBG_GRC_PARAM_DUMP_PXP, /* dump PXP memories (0/1) */ + DBG_GRC_PARAM_DUMP_RSS, /* dump RSS memories (0/1) */ + DBG_GRC_PARAM_DUMP_CAU, /* dump CAU memories (0/1) */ + DBG_GRC_PARAM_DUMP_QM, /* dump QM memories (0/1) */ + DBG_GRC_PARAM_DUMP_MCP, /* dump MCP memories (0/1) */ + DBG_GRC_PARAM_RESERVED, /* reserved */ + DBG_GRC_PARAM_DUMP_CFC, /* dump CFC memories (0/1) */ + DBG_GRC_PARAM_DUMP_IGU, /* dump IGU memories (0/1) */ + DBG_GRC_PARAM_DUMP_BRB, /* dump BRB memories (0/1) */ + DBG_GRC_PARAM_DUMP_BTB, /* dump BTB memories (0/1) */ + DBG_GRC_PARAM_DUMP_BMB, /* dump BMB memories (0/1) */ + DBG_GRC_PARAM_DUMP_NIG, /* dump NIG memories (0/1) */ + DBG_GRC_PARAM_DUMP_MULD, /* dump MULD memories (0/1) */ + DBG_GRC_PARAM_DUMP_PRS, /* dump PRS memories (0/1) */ + DBG_GRC_PARAM_DUMP_DMAE, /* dump PRS memories (0/1) */ + DBG_GRC_PARAM_DUMP_TM, /* dump TM (timers) memories (0/1) */ + DBG_GRC_PARAM_DUMP_SDM, /* dump SDM memories (0/1) */ + DBG_GRC_PARAM_DUMP_DIF, /* dump DIF memories (0/1) */ + DBG_GRC_PARAM_DUMP_STATIC, /* dump static debug data (0/1) */ + DBG_GRC_PARAM_UNSTALL, /* un-stall Storms after dump (0/1) */ + DBG_GRC_PARAM_NUM_LCIDS, /* number of LCIDs (0..320) */ + DBG_GRC_PARAM_NUM_LTIDS, /* number of LTIDs (0..320) */ + /* preset: exclude all memories from dump (1 only) */ + DBG_GRC_PARAM_EXCLUDE_ALL, + /* preset: include memories for crash dump (1 only) */ + DBG_GRC_PARAM_CRASH, + /* perform dump only if MFW is responding (0/1) */ + DBG_GRC_PARAM_PARITY_SAFE, + DBG_GRC_PARAM_DUMP_CM, /* dump CM memories (0/1) */ + DBG_GRC_PARAM_DUMP_PHY, /* dump PHY memories (0/1) */ + MAX_DBG_GRC_PARAMS +}; + +/* Debug reset registers */ +enum dbg_reset_regs { + DBG_RESET_REG_MISCS_PL_UA, + DBG_RESET_REG_MISCS_PL_HV, + DBG_RESET_REG_MISCS_PL_HV_2, + DBG_RESET_REG_MISC_PL_UA, + DBG_RESET_REG_MISC_PL_HV, + DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, + DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, + DBG_RESET_REG_MISC_PL_PDA_VAUX, + MAX_DBG_RESET_REGS +}; + /* Debug status codes */ enum dbg_status { DBG_STATUS_OK, @@ -1869,6 +2227,41 @@ enum dbg_status { MAX_DBG_STATUS }; +/* Debug Storms IDs */ +enum dbg_storms { + DBG_TSTORM_ID, + DBG_MSTORM_ID, + DBG_USTORM_ID, + DBG_XSTORM_ID, + DBG_YSTORM_ID, + DBG_PSTORM_ID, + MAX_DBG_STORMS +}; + +/* Idle Check data */ +struct idle_chk_data { + __le32 buf_size; /* Idle check buffer size in dwords */ + u8 buf_size_set; /* Indicates if the idle check buffer size was set + * (0/1). + */ + u8 reserved1; + __le16 reserved2; +}; + +/* Debug Tools data (per HW function) */ +struct dbg_tools_data { + struct dbg_grc_data grc; /* GRC Dump data */ + struct dbg_bus_data bus; /* Debug Bus data */ + struct idle_chk_data idle_chk; /* Idle Check data */ + u8 mode_enable[40]; /* Indicates if a mode is enabled (0/1) */ + u8 block_in_reset[80]; /* Indicates if a block is in reset state (0/1). + */ + u8 chip_id; /* Chip ID (from enum chip_ids) */ + u8 platform_id; /* Platform ID (from enum platform_ids) */ + u8 initialized; /* Indicates if the data was initialized */ + u8 reserved; +}; + /********************************/ /* HSI Init Functions constants */ /********************************/ @@ -1948,15 +2341,50 @@ struct init_qm_vport_params { /* Max size in dwords of a zipped array */ #define MAX_ZIPPED_SIZE 8192 +struct fw_asserts_ram_section { + __le16 section_ram_line_offset; + __le16 section_ram_line_size; + u8 list_dword_offset; + u8 list_element_dword_size; + u8 list_num_elements; + u8 list_next_index_dword_offset; +}; + +struct fw_ver_num { + u8 major; /* Firmware major version number */ + u8 minor; /* Firmware minor version number */ + u8 rev; /* Firmware revision version number */ + u8 eng; /* Firmware engineering version number (for bootleg versions) */ +}; + +struct fw_ver_info { + __le16 tools_ver; /* Tools version number */ + u8 image_id; /* FW image ID (e.g. main) */ + u8 reserved1; + struct fw_ver_num num; /* FW version number */ + __le32 timestamp; /* FW Timestamp in unix time (sec. since 1970) */ + __le32 reserved2; +}; + +struct fw_info { + struct fw_ver_info ver; + struct fw_asserts_ram_section fw_asserts_section; +}; + +struct fw_info_location { + __le32 grc_addr; + __le32 size; +}; + enum init_modes { MODE_RESERVED, MODE_BB_B0, - MODE_RESERVED2, + MODE_K2, MODE_ASIC, + MODE_RESERVED2, MODE_RESERVED3, MODE_RESERVED4, MODE_RESERVED5, - MODE_RESERVED6, MODE_SF, MODE_MF_SD, MODE_MF_SI, @@ -1965,7 +2393,7 @@ enum init_modes { MODE_PORTS_PER_ENG_4, MODE_100G, MODE_40G, - MODE_RESERVED7, + MODE_RESERVED6, MAX_INIT_MODES }; @@ -2223,8 +2651,276 @@ struct iro { __le16 size; }; +/***************************** Public Functions *******************************/ /** - * @brief qed_dbg_print_attn - Prints attention registers values in the specified results struct. + * @brief qed_dbg_set_bin_ptr - Sets a pointer to the binary data with debug + * arrays. + * + * @param bin_ptr - a pointer to the binary data with debug arrays. + */ +enum dbg_status qed_dbg_set_bin_ptr(const u8 * const bin_ptr); +/** + * @brief qed_dbg_grc_get_dump_buf_size - Returns the required buffer size for + * GRC Dump. + * + * @param p_hwfn - HW device data + * @param p_ptt - Ptt window used for writing the registers. + * @param buf_size - OUT: required buffer size (in dwords) for the GRC Dump + * data. + * + * @return error if one of the following holds: + * - the version wasn't set + * Otherwise, returns ok. + */ +enum dbg_status qed_dbg_grc_get_dump_buf_size(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *buf_size); +/** + * @brief qed_dbg_grc_dump - Dumps GRC data into the specified buffer. + * + * @param p_hwfn - HW device data + * @param p_ptt - Ptt window used for writing the registers. + * @param dump_buf - Pointer to write the collected GRC data into. + * @param buf_size_in_dwords - Size of the specified buffer in dwords. + * @param num_dumped_dwords - OUT: number of dumped dwords. + * + * @return error if one of the following holds: + * - the version wasn't set + * - the specified dump buffer is too small + * Otherwise, returns ok. + */ +enum dbg_status qed_dbg_grc_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + u32 buf_size_in_dwords, + u32 *num_dumped_dwords); +/** + * @brief qed_dbg_idle_chk_get_dump_buf_size - Returns the required buffer size + * for idle check results. + * + * @param p_hwfn - HW device data + * @param p_ptt - Ptt window used for writing the registers. + * @param buf_size - OUT: required buffer size (in dwords) for the idle check + * data. + * + * @return error if one of the following holds: + * - the version wasn't set + * Otherwise, returns ok. + */ +enum dbg_status qed_dbg_idle_chk_get_dump_buf_size(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *buf_size); +/** + * @brief qed_dbg_idle_chk_dump - Performs idle check and writes the results + * into the specified buffer. + * + * @param p_hwfn - HW device data + * @param p_ptt - Ptt window used for writing the registers. + * @param dump_buf - Pointer to write the idle check data into. + * @param buf_size_in_dwords - Size of the specified buffer in dwords. + * @param num_dumped_dwords - OUT: number of dumped dwords. + * + * @return error if one of the following holds: + * - the version wasn't set + * - the specified buffer is too small + * Otherwise, returns ok. + */ +enum dbg_status qed_dbg_idle_chk_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + u32 buf_size_in_dwords, + u32 *num_dumped_dwords); +/** + * @brief qed_dbg_mcp_trace_get_dump_buf_size - Returns the required buffer size + * for mcp trace results. + * + * @param p_hwfn - HW device data + * @param p_ptt - Ptt window used for writing the registers. + * @param buf_size - OUT: required buffer size (in dwords) for mcp trace data. + * + * @return error if one of the following holds: + * - the version wasn't set + * - the trace data in MCP scratchpad contain an invalid signature + * - the bundle ID in NVRAM is invalid + * - the trace meta data cannot be found (in NVRAM or image file) + * Otherwise, returns ok. + */ +enum dbg_status qed_dbg_mcp_trace_get_dump_buf_size(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *buf_size); +/** + * @brief qed_dbg_mcp_trace_dump - Performs mcp trace and writes the results + * into the specified buffer. + * + * @param p_hwfn - HW device data + * @param p_ptt - Ptt window used for writing the registers. + * @param dump_buf - Pointer to write the mcp trace data into. + * @param buf_size_in_dwords - Size of the specified buffer in dwords. + * @param num_dumped_dwords - OUT: number of dumped dwords. + * + * @return error if one of the following holds: + * - the version wasn't set + * - the specified buffer is too small + * - the trace data in MCP scratchpad contain an invalid signature + * - the bundle ID in NVRAM is invalid + * - the trace meta data cannot be found (in NVRAM or image file) + * - the trace meta data cannot be read (from NVRAM or image file) + * Otherwise, returns ok. + */ +enum dbg_status qed_dbg_mcp_trace_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + u32 buf_size_in_dwords, + u32 *num_dumped_dwords); +/** + * @brief qed_dbg_reg_fifo_get_dump_buf_size - Returns the required buffer size + * for grc trace fifo results. + * + * @param p_hwfn - HW device data + * @param p_ptt - Ptt window used for writing the registers. + * @param buf_size - OUT: required buffer size (in dwords) for reg fifo data. + * + * @return error if one of the following holds: + * - the version wasn't set + * Otherwise, returns ok. + */ +enum dbg_status qed_dbg_reg_fifo_get_dump_buf_size(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *buf_size); +/** + * @brief qed_dbg_reg_fifo_dump - Reads the reg fifo and writes the results into + * the specified buffer. + * + * @param p_hwfn - HW device data + * @param p_ptt - Ptt window used for writing the registers. + * @param dump_buf - Pointer to write the reg fifo data into. + * @param buf_size_in_dwords - Size of the specified buffer in dwords. + * @param num_dumped_dwords - OUT: number of dumped dwords. + * + * @return error if one of the following holds: + * - the version wasn't set + * - the specified buffer is too small + * - DMAE transaction failed + * Otherwise, returns ok. + */ +enum dbg_status qed_dbg_reg_fifo_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + u32 buf_size_in_dwords, + u32 *num_dumped_dwords); +/** + * @brief qed_dbg_igu_fifo_get_dump_buf_size - Returns the required buffer size + * for the IGU fifo results. + * + * @param p_hwfn - HW device data + * @param p_ptt - Ptt window used for writing the registers. + * @param buf_size - OUT: required buffer size (in dwords) for the IGU fifo + * data. + * + * @return error if one of the following holds: + * - the version wasn't set + * Otherwise, returns ok. + */ +enum dbg_status qed_dbg_igu_fifo_get_dump_buf_size(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *buf_size); +/** + * @brief qed_dbg_igu_fifo_dump - Reads the IGU fifo and writes the results into + * the specified buffer. + * + * @param p_hwfn - HW device data + * @param p_ptt - Ptt window used for writing the registers. + * @param dump_buf - Pointer to write the IGU fifo data into. + * @param buf_size_in_dwords - Size of the specified buffer in dwords. + * @param num_dumped_dwords - OUT: number of dumped dwords. + * + * @return error if one of the following holds: + * - the version wasn't set + * - the specified buffer is too small + * - DMAE transaction failed + * Otherwise, returns ok. + */ +enum dbg_status qed_dbg_igu_fifo_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + u32 buf_size_in_dwords, + u32 *num_dumped_dwords); +/** + * @brief qed_dbg_protection_override_get_dump_buf_size - Returns the required + * buffer size for protection override window results. + * + * @param p_hwfn - HW device data + * @param p_ptt - Ptt window used for writing the registers. + * @param buf_size - OUT: required buffer size (in dwords) for protection + * override data. + * + * @return error if one of the following holds: + * - the version wasn't set + * Otherwise, returns ok. + */ +enum dbg_status +qed_dbg_protection_override_get_dump_buf_size(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *buf_size); +/** + * @brief qed_dbg_protection_override_dump - Reads protection override window + * entries and writes the results into the specified buffer. + * + * @param p_hwfn - HW device data + * @param p_ptt - Ptt window used for writing the registers. + * @param dump_buf - Pointer to write the protection override data into. + * @param buf_size_in_dwords - Size of the specified buffer in dwords. + * @param num_dumped_dwords - OUT: number of dumped dwords. + * + * @return error if one of the following holds: + * - the version wasn't set + * - the specified buffer is too small + * - DMAE transaction failed + * Otherwise, returns ok. + */ +enum dbg_status qed_dbg_protection_override_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + u32 buf_size_in_dwords, + u32 *num_dumped_dwords); +/** + * @brief qed_dbg_fw_asserts_get_dump_buf_size - Returns the required buffer + * size for FW Asserts results. + * + * @param p_hwfn - HW device data + * @param p_ptt - Ptt window used for writing the registers. + * @param buf_size - OUT: required buffer size (in dwords) for FW Asserts data. + * + * @return error if one of the following holds: + * - the version wasn't set + * Otherwise, returns ok. + */ +enum dbg_status qed_dbg_fw_asserts_get_dump_buf_size(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *buf_size); +/** + * @brief qed_dbg_fw_asserts_dump - Reads the FW Asserts and writes the results + * into the specified buffer. + * + * @param p_hwfn - HW device data + * @param p_ptt - Ptt window used for writing the registers. + * @param dump_buf - Pointer to write the FW Asserts data into. + * @param buf_size_in_dwords - Size of the specified buffer in dwords. + * @param num_dumped_dwords - OUT: number of dumped dwords. + * + * @return error if one of the following holds: + * - the version wasn't set + * - the specified buffer is too small + * Otherwise, returns ok. + */ +enum dbg_status qed_dbg_fw_asserts_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + u32 buf_size_in_dwords, + u32 *num_dumped_dwords); +/** + * @brief qed_dbg_print_attn - Prints attention registers values in the + * specified results struct. * * @param p_hwfn * @param results - Pointer to the attention read results @@ -2236,8 +2932,212 @@ struct iro { enum dbg_status qed_dbg_print_attn(struct qed_hwfn *p_hwfn, struct dbg_attn_block_result *results); +/******************************** Constants **********************************/ + #define MAX_NAME_LEN 16 +/***************************** Public Functions *******************************/ +/** + * @brief qed_dbg_user_set_bin_ptr - Sets a pointer to the binary data with + * debug arrays. + * + * @param bin_ptr - a pointer to the binary data with debug arrays. + */ +enum dbg_status qed_dbg_user_set_bin_ptr(const u8 * const bin_ptr); +/** + * @brief qed_dbg_get_status_str - Returns a string for the specified status. + * + * @param status - a debug status code. + * + * @return a string for the specified status + */ +const char *qed_dbg_get_status_str(enum dbg_status status); +/** + * @brief qed_get_idle_chk_results_buf_size - Returns the required buffer size + * for idle check results (in bytes). + * + * @param p_hwfn - HW device data + * @param dump_buf - idle check dump buffer. + * @param num_dumped_dwords - number of dwords that were dumped. + * @param results_buf_size - OUT: required buffer size (in bytes) for the parsed + * results. + * + * @return error if the parsing fails, ok otherwise. + */ +enum dbg_status qed_get_idle_chk_results_buf_size(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + u32 *results_buf_size); +/** + * @brief qed_print_idle_chk_results - Prints idle check results + * + * @param p_hwfn - HW device data + * @param dump_buf - idle check dump buffer. + * @param num_dumped_dwords - number of dwords that were dumped. + * @param results_buf - buffer for printing the idle check results. + * @param num_errors - OUT: number of errors found in idle check. + * @param num_warnings - OUT: number of warnings found in idle check. + * + * @return error if the parsing fails, ok otherwise. + */ +enum dbg_status qed_print_idle_chk_results(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf, + u32 *num_errors, + u32 *num_warnings); +/** + * @brief qed_get_mcp_trace_results_buf_size - Returns the required buffer size + * for MCP Trace results (in bytes). + * + * @param p_hwfn - HW device data + * @param dump_buf - MCP Trace dump buffer. + * @param num_dumped_dwords - number of dwords that were dumped. + * @param results_buf_size - OUT: required buffer size (in bytes) for the parsed + * results. + * + * @return error if the parsing fails, ok otherwise. + */ +enum dbg_status qed_get_mcp_trace_results_buf_size(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + u32 *results_buf_size); +/** + * @brief qed_print_mcp_trace_results - Prints MCP Trace results + * + * @param p_hwfn - HW device data + * @param dump_buf - mcp trace dump buffer, starting from the header. + * @param num_dumped_dwords - number of dwords that were dumped. + * @param results_buf - buffer for printing the mcp trace results. + * + * @return error if the parsing fails, ok otherwise. + */ +enum dbg_status qed_print_mcp_trace_results(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf); +/** + * @brief qed_get_reg_fifo_results_buf_size - Returns the required buffer size + * for reg_fifo results (in bytes). + * + * @param p_hwfn - HW device data + * @param dump_buf - reg fifo dump buffer. + * @param num_dumped_dwords - number of dwords that were dumped. + * @param results_buf_size - OUT: required buffer size (in bytes) for the parsed + * results. + * + * @return error if the parsing fails, ok otherwise. + */ +enum dbg_status qed_get_reg_fifo_results_buf_size(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + u32 *results_buf_size); +/** + * @brief qed_print_reg_fifo_results - Prints reg fifo results + * + * @param p_hwfn - HW device data + * @param dump_buf - reg fifo dump buffer, starting from the header. + * @param num_dumped_dwords - number of dwords that were dumped. + * @param results_buf - buffer for printing the reg fifo results. + * + * @return error if the parsing fails, ok otherwise. + */ +enum dbg_status qed_print_reg_fifo_results(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf); +/** + * @brief qed_get_igu_fifo_results_buf_size - Returns the required buffer size + * for igu_fifo results (in bytes). + * + * @param p_hwfn - HW device data + * @param dump_buf - IGU fifo dump buffer. + * @param num_dumped_dwords - number of dwords that were dumped. + * @param results_buf_size - OUT: required buffer size (in bytes) for the parsed + * results. + * + * @return error if the parsing fails, ok otherwise. + */ +enum dbg_status qed_get_igu_fifo_results_buf_size(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + u32 *results_buf_size); +/** + * @brief qed_print_igu_fifo_results - Prints IGU fifo results + * + * @param p_hwfn - HW device data + * @param dump_buf - IGU fifo dump buffer, starting from the header. + * @param num_dumped_dwords - number of dwords that were dumped. + * @param results_buf - buffer for printing the IGU fifo results. + * + * @return error if the parsing fails, ok otherwise. + */ +enum dbg_status qed_print_igu_fifo_results(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf); +/** + * @brief qed_get_protection_override_results_buf_size - Returns the required + * buffer size for protection override results (in bytes). + * + * @param p_hwfn - HW device data + * @param dump_buf - protection override dump buffer. + * @param num_dumped_dwords - number of dwords that were dumped. + * @param results_buf_size - OUT: required buffer size (in bytes) for the parsed + * results. + * + * @return error if the parsing fails, ok otherwise. + */ +enum dbg_status +qed_get_protection_override_results_buf_size(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + u32 *results_buf_size); +/** + * @brief qed_print_protection_override_results - Prints protection override + * results. + * + * @param p_hwfn - HW device data + * @param dump_buf - protection override dump buffer, starting from the header. + * @param num_dumped_dwords - number of dwords that were dumped. + * @param results_buf - buffer for printing the reg fifo results. + * + * @return error if the parsing fails, ok otherwise. + */ +enum dbg_status qed_print_protection_override_results(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf); +/** + * @brief qed_get_fw_asserts_results_buf_size - Returns the required buffer size + * for FW Asserts results (in bytes). + * + * @param p_hwfn - HW device data + * @param dump_buf - FW Asserts dump buffer. + * @param num_dumped_dwords - number of dwords that were dumped. + * @param results_buf_size - OUT: required buffer size (in bytes) for the parsed + * results. + * + * @return error if the parsing fails, ok otherwise. + */ +enum dbg_status qed_get_fw_asserts_results_buf_size(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + u32 *results_buf_size); +/** + * @brief qed_print_fw_asserts_results - Prints FW Asserts results + * + * @param p_hwfn - HW device data + * @param dump_buf - FW Asserts dump buffer, starting from the header. + * @param num_dumped_dwords - number of dwords that were dumped. + * @param results_buf - buffer for printing the FW Asserts results. + * + * @return error if the parsing fails, ok otherwise. + */ +enum dbg_status qed_print_fw_asserts_results(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf); /* Win 2 */ #define GTT_BAR0_MAP_REG_IGU_CMD 0x00f000UL @@ -7039,6 +7939,35 @@ struct ystorm_iscsi_conn_ag_ctx { __le32 reg2; __le32 reg3; }; + +#define MFW_TRACE_SIGNATURE 0x25071946 + +/* The trace in the buffer */ +#define MFW_TRACE_EVENTID_MASK 0x00ffff +#define MFW_TRACE_PRM_SIZE_MASK 0x0f0000 +#define MFW_TRACE_PRM_SIZE_SHIFT 16 +#define MFW_TRACE_ENTRY_SIZE 3 + +struct mcp_trace { + u32 signature; /* Help to identify that the trace is valid */ + u32 size; /* the size of the trace buffer in bytes */ + u32 curr_level; /* 2 - all will be written to the buffer + * 1 - debug trace will not be written + * 0 - just errors will be written to the buffer + */ + u32 modules_mask[2]; /* a bit per module, 1 means write it, 0 means + * mask it. + */ + + /* Warning: the following pointers are assumed to be 32bits as they are + * used only in the MFW. + */ + u32 trace_prod; /* The next trace will be written to this offset */ + u32 trace_oldest; /* The oldest valid trace starts at this offset + * (usually very close after the current producer). + */ +}; + #define VF_MAX_STATIC 192 #define MCP_GLOB_PATH_MAX 2 @@ -7046,6 +7975,7 @@ struct ystorm_iscsi_conn_ag_ctx { #define MCP_GLOB_PORT_MAX 4 #define MCP_GLOB_FUNC_MAX 16 +typedef u32 offsize_t; /* In DWORDS !!! */ /* Offset from the beginning of the MCP scratchpad */ #define OFFSIZE_OFFSET_SHIFT 0 #define OFFSIZE_OFFSET_MASK 0x0000ffff @@ -7636,6 +8566,8 @@ struct public_drv_mb { #define DRV_MSG_CODE_NIG_DRAIN 0x30000000 #define DRV_MSG_CODE_VF_DISABLED_DONE 0xc0000000 #define DRV_MSG_CODE_CFG_VF_MSIX 0xc0010000 +#define DRV_MSG_CODE_NVM_GET_FILE_ATT 0x00030000 +#define DRV_MSG_CODE_NVM_READ_NVRAM 0x00050000 #define DRV_MSG_CODE_MCP_RESET 0x00090000 #define DRV_MSG_CODE_SET_VERSION 0x000f0000 #define DRV_MSG_CODE_MCP_HALT 0x00100000 @@ -7657,6 +8589,9 @@ struct public_drv_mb { #define DRV_MB_PARAM_UNLOAD_WOL_MCP 0x00000001 #define DRV_MB_PARAM_DCBX_NOTIFY_MASK 0x000000FF #define DRV_MB_PARAM_DCBX_NOTIFY_SHIFT 3 + +#define DRV_MB_PARAM_NVM_LEN_SHIFT 24 + #define DRV_MB_PARAM_CFG_VF_MSIX_VF_ID_SHIFT 0 #define DRV_MB_PARAM_CFG_VF_MSIX_VF_ID_MASK 0x000000FF #define DRV_MB_PARAM_CFG_VF_MSIX_SB_NUM_SHIFT 8 @@ -7694,6 +8629,8 @@ struct public_drv_mb { #define FW_MSG_CODE_DRV_UNLOAD_FUNCTION 0x20130000 #define FW_MSG_CODE_DRV_UNLOAD_DONE 0x21100000 #define FW_MSG_CODE_DRV_CFG_VF_MSIX_DONE 0xb0010000 + +#define FW_MSG_CODE_NVM_OK 0x00010000 #define FW_MSG_CODE_OK 0x00160000 #define FW_MSG_SEQ_NUMBER_MASK 0x0000ffff @@ -7930,4 +8867,101 @@ struct nvm_cfg1 { struct nvm_cfg1_port port[MCP_GLOB_PORT_MAX]; struct nvm_cfg1_func func[MCP_GLOB_FUNC_MAX]; }; + +enum spad_sections { + SPAD_SECTION_TRACE, + SPAD_SECTION_NVM_CFG, + SPAD_SECTION_PUBLIC, + SPAD_SECTION_PRIVATE, + SPAD_SECTION_MAX +}; + +#define MCP_TRACE_SIZE 2048 /* 2kb */ + +/* This section is located at a fixed location in the beginning of the + * scratchpad, to ensure that the MCP trace is not run over during MFW upgrade. + * All the rest of data has a floating location which differs from version to + * version, and is pointed by the mcp_meta_data below. + * Moreover, the spad_layout section is part of the MFW firmware, and is loaded + * with it from nvram in order to clear this portion. + */ +struct static_init { + u32 num_sections; + offsize_t sections[SPAD_SECTION_MAX]; +#define SECTION(_sec_) (*((offsize_t *)(STRUCT_OFFSET(sections[_sec_])))) + + struct mcp_trace trace; +#define MCP_TRACE_P ((struct mcp_trace *)(STRUCT_OFFSET(trace))) + u8 trace_buffer[MCP_TRACE_SIZE]; +#define MCP_TRACE_BUF ((u8 *)(STRUCT_OFFSET(trace_buffer))) + /* running_mfw has the same definition as in nvm_map.h. + * This bit indicate both the running dir, and the running bundle. + * It is set once when the LIM is loaded. + */ + u32 running_mfw; +#define RUNNING_MFW (*((u32 *)(STRUCT_OFFSET(running_mfw)))) + u32 build_time; +#define MFW_BUILD_TIME (*((u32 *)(STRUCT_OFFSET(build_time)))) + u32 reset_type; +#define RESET_TYPE (*((u32 *)(STRUCT_OFFSET(reset_type)))) + u32 mfw_secure_mode; +#define MFW_SECURE_MODE (*((u32 *)(STRUCT_OFFSET(mfw_secure_mode)))) + u16 pme_status_pf_bitmap; +#define PME_STATUS_PF_BITMAP (*((u16 *)(STRUCT_OFFSET(pme_status_pf_bitmap)))) + u16 pme_enable_pf_bitmap; +#define PME_ENABLE_PF_BITMAP (*((u16 *)(STRUCT_OFFSET(pme_enable_pf_bitmap)))) + u32 mim_nvm_addr; + u32 mim_start_addr; + u32 ah_pcie_link_params; +#define AH_PCIE_LINK_PARAMS_LINK_SPEED_MASK (0x000000ff) +#define AH_PCIE_LINK_PARAMS_LINK_SPEED_SHIFT (0) +#define AH_PCIE_LINK_PARAMS_LINK_WIDTH_MASK (0x0000ff00) +#define AH_PCIE_LINK_PARAMS_LINK_WIDTH_SHIFT (8) +#define AH_PCIE_LINK_PARAMS_ASPM_MODE_MASK (0x00ff0000) +#define AH_PCIE_LINK_PARAMS_ASPM_MODE_SHIFT (16) +#define AH_PCIE_LINK_PARAMS_ASPM_CAP_MASK (0xff000000) +#define AH_PCIE_LINK_PARAMS_ASPM_CAP_SHIFT (24) +#define AH_PCIE_LINK_PARAMS (*((u32 *)(STRUCT_OFFSET(ah_pcie_link_params)))) + + u32 rsrv_persist[5]; /* Persist reserved for MFW upgrades */ +}; + +enum nvm_image_type { + NVM_TYPE_TIM1 = 0x01, + NVM_TYPE_TIM2 = 0x02, + NVM_TYPE_MIM1 = 0x03, + NVM_TYPE_MIM2 = 0x04, + NVM_TYPE_MBA = 0x05, + NVM_TYPE_MODULES_PN = 0x06, + NVM_TYPE_VPD = 0x07, + NVM_TYPE_MFW_TRACE1 = 0x08, + NVM_TYPE_MFW_TRACE2 = 0x09, + NVM_TYPE_NVM_CFG1 = 0x0a, + NVM_TYPE_L2B = 0x0b, + NVM_TYPE_DIR1 = 0x0c, + NVM_TYPE_EAGLE_FW1 = 0x0d, + NVM_TYPE_FALCON_FW1 = 0x0e, + NVM_TYPE_PCIE_FW1 = 0x0f, + NVM_TYPE_HW_SET = 0x10, + NVM_TYPE_LIM = 0x11, + NVM_TYPE_AVS_FW1 = 0x12, + NVM_TYPE_DIR2 = 0x13, + NVM_TYPE_CCM = 0x14, + NVM_TYPE_EAGLE_FW2 = 0x15, + NVM_TYPE_FALCON_FW2 = 0x16, + NVM_TYPE_PCIE_FW2 = 0x17, + NVM_TYPE_AVS_FW2 = 0x18, + NVM_TYPE_INIT_HW = 0x19, + NVM_TYPE_DEFAULT_CFG = 0x1a, + NVM_TYPE_MDUMP = 0x1b, + NVM_TYPE_META = 0x1c, + NVM_TYPE_ISCSI_CFG = 0x1d, + NVM_TYPE_FCOE_CFG = 0x1f, + NVM_TYPE_ETH_PHY_FW1 = 0x20, + NVM_TYPE_ETH_PHY_FW2 = 0x21, + NVM_TYPE_MAX, +}; + +#define DIR_ID_1 (0) + #endif diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index d22e3f8816ae..250efd1d270d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -588,6 +588,8 @@ static int qed_nic_stop(struct qed_dev *cdev) } } + qed_dbg_pf_exit(cdev); + return rc; } @@ -846,6 +848,8 @@ static int qed_slowpath_start(struct qed_dev *cdev, /* First Dword used to diffrentiate between various sources */ data = cdev->firmware->data + sizeof(u32); + + qed_dbg_pf_init(cdev); } memset(&tunn_info, 0, sizeof(tunn_info)); diff --git a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h index b44f09b18eb1..759cb04e02b0 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h +++ b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h @@ -528,9 +528,903 @@ #define QM_REG_WFQPFWEIGHT 0x2f4e80UL #define QM_REG_WFQVPWEIGHT 0x2fa000UL +#define PGLCS_REG_DBG_SELECT \ + 0x001d14UL +#define PGLCS_REG_DBG_DWORD_ENABLE \ + 0x001d18UL +#define PGLCS_REG_DBG_SHIFT \ + 0x001d1cUL +#define PGLCS_REG_DBG_FORCE_VALID \ + 0x001d20UL +#define PGLCS_REG_DBG_FORCE_FRAME \ + 0x001d24UL +#define MISC_REG_RESET_PL_PDA_VMAIN_1 \ + 0x008070UL +#define MISC_REG_RESET_PL_PDA_VMAIN_2 \ + 0x008080UL +#define MISC_REG_RESET_PL_PDA_VAUX \ + 0x008090UL +#define MISCS_REG_RESET_PL_UA \ + 0x009050UL +#define MISCS_REG_RESET_PL_HV \ + 0x009060UL +#define MISCS_REG_RESET_PL_HV_2 \ + 0x009150UL +#define DMAE_REG_DBG_SELECT \ + 0x00c510UL +#define DMAE_REG_DBG_DWORD_ENABLE \ + 0x00c514UL +#define DMAE_REG_DBG_SHIFT \ + 0x00c518UL +#define DMAE_REG_DBG_FORCE_VALID \ + 0x00c51cUL +#define DMAE_REG_DBG_FORCE_FRAME \ + 0x00c520UL +#define NCSI_REG_DBG_SELECT \ + 0x040474UL +#define NCSI_REG_DBG_DWORD_ENABLE \ + 0x040478UL +#define NCSI_REG_DBG_SHIFT \ + 0x04047cUL +#define NCSI_REG_DBG_FORCE_VALID \ + 0x040480UL +#define NCSI_REG_DBG_FORCE_FRAME \ + 0x040484UL +#define GRC_REG_DBG_SELECT \ + 0x0500a4UL +#define GRC_REG_DBG_DWORD_ENABLE \ + 0x0500a8UL +#define GRC_REG_DBG_SHIFT \ + 0x0500acUL +#define GRC_REG_DBG_FORCE_VALID \ + 0x0500b0UL +#define GRC_REG_DBG_FORCE_FRAME \ + 0x0500b4UL +#define UMAC_REG_DBG_SELECT \ + 0x051094UL +#define UMAC_REG_DBG_DWORD_ENABLE \ + 0x051098UL +#define UMAC_REG_DBG_SHIFT \ + 0x05109cUL +#define UMAC_REG_DBG_FORCE_VALID \ + 0x0510a0UL +#define UMAC_REG_DBG_FORCE_FRAME \ + 0x0510a4UL +#define MCP2_REG_DBG_SELECT \ + 0x052400UL +#define MCP2_REG_DBG_DWORD_ENABLE \ + 0x052404UL +#define MCP2_REG_DBG_SHIFT \ + 0x052408UL +#define MCP2_REG_DBG_FORCE_VALID \ + 0x052440UL +#define MCP2_REG_DBG_FORCE_FRAME \ + 0x052444UL +#define PCIE_REG_DBG_SELECT \ + 0x0547e8UL +#define PCIE_REG_DBG_DWORD_ENABLE \ + 0x0547ecUL +#define PCIE_REG_DBG_SHIFT \ + 0x0547f0UL +#define PCIE_REG_DBG_FORCE_VALID \ + 0x0547f4UL +#define PCIE_REG_DBG_FORCE_FRAME \ + 0x0547f8UL +#define DORQ_REG_DBG_SELECT \ + 0x100ad0UL +#define DORQ_REG_DBG_DWORD_ENABLE \ + 0x100ad4UL +#define DORQ_REG_DBG_SHIFT \ + 0x100ad8UL +#define DORQ_REG_DBG_FORCE_VALID \ + 0x100adcUL +#define DORQ_REG_DBG_FORCE_FRAME \ + 0x100ae0UL +#define IGU_REG_DBG_SELECT \ + 0x181578UL +#define IGU_REG_DBG_DWORD_ENABLE \ + 0x18157cUL +#define IGU_REG_DBG_SHIFT \ + 0x181580UL +#define IGU_REG_DBG_FORCE_VALID \ + 0x181584UL +#define IGU_REG_DBG_FORCE_FRAME \ + 0x181588UL +#define CAU_REG_DBG_SELECT \ + 0x1c0ea8UL +#define CAU_REG_DBG_DWORD_ENABLE \ + 0x1c0eacUL +#define CAU_REG_DBG_SHIFT \ + 0x1c0eb0UL +#define CAU_REG_DBG_FORCE_VALID \ + 0x1c0eb4UL +#define CAU_REG_DBG_FORCE_FRAME \ + 0x1c0eb8UL +#define PRS_REG_DBG_SELECT \ + 0x1f0b6cUL +#define PRS_REG_DBG_DWORD_ENABLE \ + 0x1f0b70UL +#define PRS_REG_DBG_SHIFT \ + 0x1f0b74UL +#define PRS_REG_DBG_FORCE_VALID \ + 0x1f0ba0UL +#define PRS_REG_DBG_FORCE_FRAME \ + 0x1f0ba4UL +#define CNIG_REG_DBG_SELECT_K2 \ + 0x218254UL +#define CNIG_REG_DBG_DWORD_ENABLE_K2 \ + 0x218258UL +#define CNIG_REG_DBG_SHIFT_K2 \ + 0x21825cUL +#define CNIG_REG_DBG_FORCE_VALID_K2 \ + 0x218260UL +#define CNIG_REG_DBG_FORCE_FRAME_K2 \ + 0x218264UL +#define PRM_REG_DBG_SELECT \ + 0x2306a8UL +#define PRM_REG_DBG_DWORD_ENABLE \ + 0x2306acUL +#define PRM_REG_DBG_SHIFT \ + 0x2306b0UL +#define PRM_REG_DBG_FORCE_VALID \ + 0x2306b4UL +#define PRM_REG_DBG_FORCE_FRAME \ + 0x2306b8UL +#define SRC_REG_DBG_SELECT \ + 0x238700UL +#define SRC_REG_DBG_DWORD_ENABLE \ + 0x238704UL +#define SRC_REG_DBG_SHIFT \ + 0x238708UL +#define SRC_REG_DBG_FORCE_VALID \ + 0x23870cUL +#define SRC_REG_DBG_FORCE_FRAME \ + 0x238710UL +#define RSS_REG_DBG_SELECT \ + 0x238c4cUL +#define RSS_REG_DBG_DWORD_ENABLE \ + 0x238c50UL +#define RSS_REG_DBG_SHIFT \ + 0x238c54UL +#define RSS_REG_DBG_FORCE_VALID \ + 0x238c58UL +#define RSS_REG_DBG_FORCE_FRAME \ + 0x238c5cUL +#define RPB_REG_DBG_SELECT \ + 0x23c728UL +#define RPB_REG_DBG_DWORD_ENABLE \ + 0x23c72cUL +#define RPB_REG_DBG_SHIFT \ + 0x23c730UL +#define RPB_REG_DBG_FORCE_VALID \ + 0x23c734UL +#define RPB_REG_DBG_FORCE_FRAME \ + 0x23c738UL +#define PSWRQ2_REG_DBG_SELECT \ + 0x240100UL +#define PSWRQ2_REG_DBG_DWORD_ENABLE \ + 0x240104UL +#define PSWRQ2_REG_DBG_SHIFT \ + 0x240108UL +#define PSWRQ2_REG_DBG_FORCE_VALID \ + 0x24010cUL +#define PSWRQ2_REG_DBG_FORCE_FRAME \ + 0x240110UL +#define PSWRQ_REG_DBG_SELECT \ + 0x280020UL +#define PSWRQ_REG_DBG_DWORD_ENABLE \ + 0x280024UL +#define PSWRQ_REG_DBG_SHIFT \ + 0x280028UL +#define PSWRQ_REG_DBG_FORCE_VALID \ + 0x28002cUL +#define PSWRQ_REG_DBG_FORCE_FRAME \ + 0x280030UL +#define PSWWR_REG_DBG_SELECT \ + 0x29a084UL +#define PSWWR_REG_DBG_DWORD_ENABLE \ + 0x29a088UL +#define PSWWR_REG_DBG_SHIFT \ + 0x29a08cUL +#define PSWWR_REG_DBG_FORCE_VALID \ + 0x29a090UL +#define PSWWR_REG_DBG_FORCE_FRAME \ + 0x29a094UL +#define PSWRD_REG_DBG_SELECT \ + 0x29c040UL +#define PSWRD_REG_DBG_DWORD_ENABLE \ + 0x29c044UL +#define PSWRD_REG_DBG_SHIFT \ + 0x29c048UL +#define PSWRD_REG_DBG_FORCE_VALID \ + 0x29c04cUL +#define PSWRD_REG_DBG_FORCE_FRAME \ + 0x29c050UL +#define PSWRD2_REG_DBG_SELECT \ + 0x29d400UL +#define PSWRD2_REG_DBG_DWORD_ENABLE \ + 0x29d404UL +#define PSWRD2_REG_DBG_SHIFT \ + 0x29d408UL +#define PSWRD2_REG_DBG_FORCE_VALID \ + 0x29d40cUL +#define PSWRD2_REG_DBG_FORCE_FRAME \ + 0x29d410UL +#define PSWHST2_REG_DBG_SELECT \ + 0x29e058UL +#define PSWHST2_REG_DBG_DWORD_ENABLE \ + 0x29e05cUL +#define PSWHST2_REG_DBG_SHIFT \ + 0x29e060UL +#define PSWHST2_REG_DBG_FORCE_VALID \ + 0x29e064UL +#define PSWHST2_REG_DBG_FORCE_FRAME \ + 0x29e068UL +#define PSWHST_REG_DBG_SELECT \ + 0x2a0100UL +#define PSWHST_REG_DBG_DWORD_ENABLE \ + 0x2a0104UL +#define PSWHST_REG_DBG_SHIFT \ + 0x2a0108UL +#define PSWHST_REG_DBG_FORCE_VALID \ + 0x2a010cUL +#define PSWHST_REG_DBG_FORCE_FRAME \ + 0x2a0110UL +#define PGLUE_B_REG_DBG_SELECT \ + 0x2a8400UL +#define PGLUE_B_REG_DBG_DWORD_ENABLE \ + 0x2a8404UL +#define PGLUE_B_REG_DBG_SHIFT \ + 0x2a8408UL +#define PGLUE_B_REG_DBG_FORCE_VALID \ + 0x2a840cUL +#define PGLUE_B_REG_DBG_FORCE_FRAME \ + 0x2a8410UL +#define TM_REG_DBG_SELECT \ + 0x2c07a8UL +#define TM_REG_DBG_DWORD_ENABLE \ + 0x2c07acUL +#define TM_REG_DBG_SHIFT \ + 0x2c07b0UL +#define TM_REG_DBG_FORCE_VALID \ + 0x2c07b4UL +#define TM_REG_DBG_FORCE_FRAME \ + 0x2c07b8UL +#define TCFC_REG_DBG_SELECT \ + 0x2d0500UL +#define TCFC_REG_DBG_DWORD_ENABLE \ + 0x2d0504UL +#define TCFC_REG_DBG_SHIFT \ + 0x2d0508UL +#define TCFC_REG_DBG_FORCE_VALID \ + 0x2d050cUL +#define TCFC_REG_DBG_FORCE_FRAME \ + 0x2d0510UL +#define CCFC_REG_DBG_SELECT \ + 0x2e0500UL +#define CCFC_REG_DBG_DWORD_ENABLE \ + 0x2e0504UL +#define CCFC_REG_DBG_SHIFT \ + 0x2e0508UL +#define CCFC_REG_DBG_FORCE_VALID \ + 0x2e050cUL +#define CCFC_REG_DBG_FORCE_FRAME \ + 0x2e0510UL +#define QM_REG_DBG_SELECT \ + 0x2f2e74UL +#define QM_REG_DBG_DWORD_ENABLE \ + 0x2f2e78UL +#define QM_REG_DBG_SHIFT \ + 0x2f2e7cUL +#define QM_REG_DBG_FORCE_VALID \ + 0x2f2e80UL +#define QM_REG_DBG_FORCE_FRAME \ + 0x2f2e84UL +#define RDIF_REG_DBG_SELECT \ + 0x300500UL +#define RDIF_REG_DBG_DWORD_ENABLE \ + 0x300504UL +#define RDIF_REG_DBG_SHIFT \ + 0x300508UL +#define RDIF_REG_DBG_FORCE_VALID \ + 0x30050cUL +#define RDIF_REG_DBG_FORCE_FRAME \ + 0x300510UL +#define TDIF_REG_DBG_SELECT \ + 0x310500UL +#define TDIF_REG_DBG_DWORD_ENABLE \ + 0x310504UL +#define TDIF_REG_DBG_SHIFT \ + 0x310508UL +#define TDIF_REG_DBG_FORCE_VALID \ + 0x31050cUL +#define TDIF_REG_DBG_FORCE_FRAME \ + 0x310510UL +#define BRB_REG_DBG_SELECT \ + 0x340ed0UL +#define BRB_REG_DBG_DWORD_ENABLE \ + 0x340ed4UL +#define BRB_REG_DBG_SHIFT \ + 0x340ed8UL +#define BRB_REG_DBG_FORCE_VALID \ + 0x340edcUL +#define BRB_REG_DBG_FORCE_FRAME \ + 0x340ee0UL +#define XYLD_REG_DBG_SELECT \ + 0x4c1600UL +#define XYLD_REG_DBG_DWORD_ENABLE \ + 0x4c1604UL +#define XYLD_REG_DBG_SHIFT \ + 0x4c1608UL +#define XYLD_REG_DBG_FORCE_VALID \ + 0x4c160cUL +#define XYLD_REG_DBG_FORCE_FRAME \ + 0x4c1610UL +#define YULD_REG_DBG_SELECT \ + 0x4c9600UL +#define YULD_REG_DBG_DWORD_ENABLE \ + 0x4c9604UL +#define YULD_REG_DBG_SHIFT \ + 0x4c9608UL +#define YULD_REG_DBG_FORCE_VALID \ + 0x4c960cUL +#define YULD_REG_DBG_FORCE_FRAME \ + 0x4c9610UL +#define TMLD_REG_DBG_SELECT \ + 0x4d1600UL +#define TMLD_REG_DBG_DWORD_ENABLE \ + 0x4d1604UL +#define TMLD_REG_DBG_SHIFT \ + 0x4d1608UL +#define TMLD_REG_DBG_FORCE_VALID \ + 0x4d160cUL +#define TMLD_REG_DBG_FORCE_FRAME \ + 0x4d1610UL +#define MULD_REG_DBG_SELECT \ + 0x4e1600UL +#define MULD_REG_DBG_DWORD_ENABLE \ + 0x4e1604UL +#define MULD_REG_DBG_SHIFT \ + 0x4e1608UL +#define MULD_REG_DBG_FORCE_VALID \ + 0x4e160cUL +#define MULD_REG_DBG_FORCE_FRAME \ + 0x4e1610UL +#define NIG_REG_DBG_SELECT \ + 0x502140UL +#define NIG_REG_DBG_DWORD_ENABLE \ + 0x502144UL +#define NIG_REG_DBG_SHIFT \ + 0x502148UL +#define NIG_REG_DBG_FORCE_VALID \ + 0x50214cUL +#define NIG_REG_DBG_FORCE_FRAME \ + 0x502150UL +#define BMB_REG_DBG_SELECT \ + 0x540a7cUL +#define BMB_REG_DBG_DWORD_ENABLE \ + 0x540a80UL +#define BMB_REG_DBG_SHIFT \ + 0x540a84UL +#define BMB_REG_DBG_FORCE_VALID \ + 0x540a88UL +#define BMB_REG_DBG_FORCE_FRAME \ + 0x540a8cUL +#define PTU_REG_DBG_SELECT \ + 0x560100UL +#define PTU_REG_DBG_DWORD_ENABLE \ + 0x560104UL +#define PTU_REG_DBG_SHIFT \ + 0x560108UL +#define PTU_REG_DBG_FORCE_VALID \ + 0x56010cUL +#define PTU_REG_DBG_FORCE_FRAME \ + 0x560110UL +#define CDU_REG_DBG_SELECT \ + 0x580704UL +#define CDU_REG_DBG_DWORD_ENABLE \ + 0x580708UL +#define CDU_REG_DBG_SHIFT \ + 0x58070cUL +#define CDU_REG_DBG_FORCE_VALID \ + 0x580710UL +#define CDU_REG_DBG_FORCE_FRAME \ + 0x580714UL +#define WOL_REG_DBG_SELECT \ + 0x600140UL +#define WOL_REG_DBG_DWORD_ENABLE \ + 0x600144UL +#define WOL_REG_DBG_SHIFT \ + 0x600148UL +#define WOL_REG_DBG_FORCE_VALID \ + 0x60014cUL +#define WOL_REG_DBG_FORCE_FRAME \ + 0x600150UL +#define BMBN_REG_DBG_SELECT \ + 0x610140UL +#define BMBN_REG_DBG_DWORD_ENABLE \ + 0x610144UL +#define BMBN_REG_DBG_SHIFT \ + 0x610148UL +#define BMBN_REG_DBG_FORCE_VALID \ + 0x61014cUL +#define BMBN_REG_DBG_FORCE_FRAME \ + 0x610150UL +#define NWM_REG_DBG_SELECT \ + 0x8000ecUL +#define NWM_REG_DBG_DWORD_ENABLE \ + 0x8000f0UL +#define NWM_REG_DBG_SHIFT \ + 0x8000f4UL +#define NWM_REG_DBG_FORCE_VALID \ + 0x8000f8UL +#define NWM_REG_DBG_FORCE_FRAME \ + 0x8000fcUL +#define PBF_REG_DBG_SELECT \ + 0xd80060UL +#define PBF_REG_DBG_DWORD_ENABLE \ + 0xd80064UL +#define PBF_REG_DBG_SHIFT \ + 0xd80068UL +#define PBF_REG_DBG_FORCE_VALID \ + 0xd8006cUL +#define PBF_REG_DBG_FORCE_FRAME \ + 0xd80070UL +#define PBF_PB1_REG_DBG_SELECT \ + 0xda0728UL +#define PBF_PB1_REG_DBG_DWORD_ENABLE \ + 0xda072cUL +#define PBF_PB1_REG_DBG_SHIFT \ + 0xda0730UL +#define PBF_PB1_REG_DBG_FORCE_VALID \ + 0xda0734UL +#define PBF_PB1_REG_DBG_FORCE_FRAME \ + 0xda0738UL +#define PBF_PB2_REG_DBG_SELECT \ + 0xda4728UL +#define PBF_PB2_REG_DBG_DWORD_ENABLE \ + 0xda472cUL +#define PBF_PB2_REG_DBG_SHIFT \ + 0xda4730UL +#define PBF_PB2_REG_DBG_FORCE_VALID \ + 0xda4734UL +#define PBF_PB2_REG_DBG_FORCE_FRAME \ + 0xda4738UL +#define BTB_REG_DBG_SELECT \ + 0xdb08c8UL +#define BTB_REG_DBG_DWORD_ENABLE \ + 0xdb08ccUL +#define BTB_REG_DBG_SHIFT \ + 0xdb08d0UL +#define BTB_REG_DBG_FORCE_VALID \ + 0xdb08d4UL +#define BTB_REG_DBG_FORCE_FRAME \ + 0xdb08d8UL +#define XSDM_REG_DBG_SELECT \ + 0xf80e28UL +#define XSDM_REG_DBG_DWORD_ENABLE \ + 0xf80e2cUL +#define XSDM_REG_DBG_SHIFT \ + 0xf80e30UL +#define XSDM_REG_DBG_FORCE_VALID \ + 0xf80e34UL +#define XSDM_REG_DBG_FORCE_FRAME \ + 0xf80e38UL +#define YSDM_REG_DBG_SELECT \ + 0xf90e28UL +#define YSDM_REG_DBG_DWORD_ENABLE \ + 0xf90e2cUL +#define YSDM_REG_DBG_SHIFT \ + 0xf90e30UL +#define YSDM_REG_DBG_FORCE_VALID \ + 0xf90e34UL +#define YSDM_REG_DBG_FORCE_FRAME \ + 0xf90e38UL +#define PSDM_REG_DBG_SELECT \ + 0xfa0e28UL +#define PSDM_REG_DBG_DWORD_ENABLE \ + 0xfa0e2cUL +#define PSDM_REG_DBG_SHIFT \ + 0xfa0e30UL +#define PSDM_REG_DBG_FORCE_VALID \ + 0xfa0e34UL +#define PSDM_REG_DBG_FORCE_FRAME \ + 0xfa0e38UL +#define TSDM_REG_DBG_SELECT \ + 0xfb0e28UL +#define TSDM_REG_DBG_DWORD_ENABLE \ + 0xfb0e2cUL +#define TSDM_REG_DBG_SHIFT \ + 0xfb0e30UL +#define TSDM_REG_DBG_FORCE_VALID \ + 0xfb0e34UL +#define TSDM_REG_DBG_FORCE_FRAME \ + 0xfb0e38UL +#define MSDM_REG_DBG_SELECT \ + 0xfc0e28UL +#define MSDM_REG_DBG_DWORD_ENABLE \ + 0xfc0e2cUL +#define MSDM_REG_DBG_SHIFT \ + 0xfc0e30UL +#define MSDM_REG_DBG_FORCE_VALID \ + 0xfc0e34UL +#define MSDM_REG_DBG_FORCE_FRAME \ + 0xfc0e38UL +#define USDM_REG_DBG_SELECT \ + 0xfd0e28UL +#define USDM_REG_DBG_DWORD_ENABLE \ + 0xfd0e2cUL +#define USDM_REG_DBG_SHIFT \ + 0xfd0e30UL +#define USDM_REG_DBG_FORCE_VALID \ + 0xfd0e34UL +#define USDM_REG_DBG_FORCE_FRAME \ + 0xfd0e38UL +#define XCM_REG_DBG_SELECT \ + 0x1000040UL +#define XCM_REG_DBG_DWORD_ENABLE \ + 0x1000044UL +#define XCM_REG_DBG_SHIFT \ + 0x1000048UL +#define XCM_REG_DBG_FORCE_VALID \ + 0x100004cUL +#define XCM_REG_DBG_FORCE_FRAME \ + 0x1000050UL +#define YCM_REG_DBG_SELECT \ + 0x1080040UL +#define YCM_REG_DBG_DWORD_ENABLE \ + 0x1080044UL +#define YCM_REG_DBG_SHIFT \ + 0x1080048UL +#define YCM_REG_DBG_FORCE_VALID \ + 0x108004cUL +#define YCM_REG_DBG_FORCE_FRAME \ + 0x1080050UL +#define PCM_REG_DBG_SELECT \ + 0x1100040UL +#define PCM_REG_DBG_DWORD_ENABLE \ + 0x1100044UL +#define PCM_REG_DBG_SHIFT \ + 0x1100048UL +#define PCM_REG_DBG_FORCE_VALID \ + 0x110004cUL +#define PCM_REG_DBG_FORCE_FRAME \ + 0x1100050UL +#define TCM_REG_DBG_SELECT \ + 0x1180040UL +#define TCM_REG_DBG_DWORD_ENABLE \ + 0x1180044UL +#define TCM_REG_DBG_SHIFT \ + 0x1180048UL +#define TCM_REG_DBG_FORCE_VALID \ + 0x118004cUL +#define TCM_REG_DBG_FORCE_FRAME \ + 0x1180050UL +#define MCM_REG_DBG_SELECT \ + 0x1200040UL +#define MCM_REG_DBG_DWORD_ENABLE \ + 0x1200044UL +#define MCM_REG_DBG_SHIFT \ + 0x1200048UL +#define MCM_REG_DBG_FORCE_VALID \ + 0x120004cUL +#define MCM_REG_DBG_FORCE_FRAME \ + 0x1200050UL +#define UCM_REG_DBG_SELECT \ + 0x1280050UL +#define UCM_REG_DBG_DWORD_ENABLE \ + 0x1280054UL +#define UCM_REG_DBG_SHIFT \ + 0x1280058UL +#define UCM_REG_DBG_FORCE_VALID \ + 0x128005cUL +#define UCM_REG_DBG_FORCE_FRAME \ + 0x1280060UL +#define XSEM_REG_DBG_SELECT \ + 0x1401528UL +#define XSEM_REG_DBG_DWORD_ENABLE \ + 0x140152cUL +#define XSEM_REG_DBG_SHIFT \ + 0x1401530UL +#define XSEM_REG_DBG_FORCE_VALID \ + 0x1401534UL +#define XSEM_REG_DBG_FORCE_FRAME \ + 0x1401538UL +#define YSEM_REG_DBG_SELECT \ + 0x1501528UL +#define YSEM_REG_DBG_DWORD_ENABLE \ + 0x150152cUL +#define YSEM_REG_DBG_SHIFT \ + 0x1501530UL +#define YSEM_REG_DBG_FORCE_VALID \ + 0x1501534UL +#define YSEM_REG_DBG_FORCE_FRAME \ + 0x1501538UL +#define PSEM_REG_DBG_SELECT \ + 0x1601528UL +#define PSEM_REG_DBG_DWORD_ENABLE \ + 0x160152cUL +#define PSEM_REG_DBG_SHIFT \ + 0x1601530UL +#define PSEM_REG_DBG_FORCE_VALID \ + 0x1601534UL +#define PSEM_REG_DBG_FORCE_FRAME \ + 0x1601538UL +#define TSEM_REG_DBG_SELECT \ + 0x1701528UL +#define TSEM_REG_DBG_DWORD_ENABLE \ + 0x170152cUL +#define TSEM_REG_DBG_SHIFT \ + 0x1701530UL +#define TSEM_REG_DBG_FORCE_VALID \ + 0x1701534UL +#define TSEM_REG_DBG_FORCE_FRAME \ + 0x1701538UL +#define MSEM_REG_DBG_SELECT \ + 0x1801528UL +#define MSEM_REG_DBG_DWORD_ENABLE \ + 0x180152cUL +#define MSEM_REG_DBG_SHIFT \ + 0x1801530UL +#define MSEM_REG_DBG_FORCE_VALID \ + 0x1801534UL +#define MSEM_REG_DBG_FORCE_FRAME \ + 0x1801538UL +#define USEM_REG_DBG_SELECT \ + 0x1901528UL +#define USEM_REG_DBG_DWORD_ENABLE \ + 0x190152cUL +#define USEM_REG_DBG_SHIFT \ + 0x1901530UL +#define USEM_REG_DBG_FORCE_VALID \ + 0x1901534UL +#define USEM_REG_DBG_FORCE_FRAME \ + 0x1901538UL +#define PCIE_REG_DBG_COMMON_SELECT \ + 0x054398UL +#define PCIE_REG_DBG_COMMON_DWORD_ENABLE \ + 0x05439cUL +#define PCIE_REG_DBG_COMMON_SHIFT \ + 0x0543a0UL +#define PCIE_REG_DBG_COMMON_FORCE_VALID \ + 0x0543a4UL +#define PCIE_REG_DBG_COMMON_FORCE_FRAME \ + 0x0543a8UL +#define MISC_REG_RESET_PL_UA \ + 0x008050UL +#define MISC_REG_RESET_PL_HV \ + 0x008060UL +#define XCM_REG_CTX_RBC_ACCS \ + 0x1001800UL +#define XCM_REG_AGG_CON_CTX \ + 0x1001804UL +#define XCM_REG_SM_CON_CTX \ + 0x1001808UL +#define YCM_REG_CTX_RBC_ACCS \ + 0x1081800UL +#define YCM_REG_AGG_CON_CTX \ + 0x1081804UL +#define YCM_REG_AGG_TASK_CTX \ + 0x1081808UL +#define YCM_REG_SM_CON_CTX \ + 0x108180cUL +#define YCM_REG_SM_TASK_CTX \ + 0x1081810UL +#define PCM_REG_CTX_RBC_ACCS \ + 0x1101440UL +#define PCM_REG_SM_CON_CTX \ + 0x1101444UL +#define TCM_REG_CTX_RBC_ACCS \ + 0x11814c0UL +#define TCM_REG_AGG_CON_CTX \ + 0x11814c4UL +#define TCM_REG_AGG_TASK_CTX \ + 0x11814c8UL +#define TCM_REG_SM_CON_CTX \ + 0x11814ccUL +#define TCM_REG_SM_TASK_CTX \ + 0x11814d0UL +#define MCM_REG_CTX_RBC_ACCS \ + 0x1201800UL +#define MCM_REG_AGG_CON_CTX \ + 0x1201804UL +#define MCM_REG_AGG_TASK_CTX \ + 0x1201808UL +#define MCM_REG_SM_CON_CTX \ + 0x120180cUL +#define MCM_REG_SM_TASK_CTX \ + 0x1201810UL +#define UCM_REG_CTX_RBC_ACCS \ + 0x1281700UL +#define UCM_REG_AGG_CON_CTX \ + 0x1281704UL +#define UCM_REG_AGG_TASK_CTX \ + 0x1281708UL +#define UCM_REG_SM_CON_CTX \ + 0x128170cUL +#define UCM_REG_SM_TASK_CTX \ + 0x1281710UL +#define XSEM_REG_SLOW_DBG_EMPTY \ + 0x1401140UL +#define XSEM_REG_SYNC_DBG_EMPTY \ + 0x1401160UL +#define XSEM_REG_SLOW_DBG_ACTIVE \ + 0x1401400UL +#define XSEM_REG_SLOW_DBG_MODE \ + 0x1401404UL +#define XSEM_REG_DBG_FRAME_MODE \ + 0x1401408UL +#define XSEM_REG_DBG_MODE1_CFG \ + 0x1401420UL +#define XSEM_REG_FAST_MEMORY \ + 0x1440000UL +#define YSEM_REG_SYNC_DBG_EMPTY \ + 0x1501160UL +#define YSEM_REG_SLOW_DBG_ACTIVE \ + 0x1501400UL +#define YSEM_REG_SLOW_DBG_MODE \ + 0x1501404UL +#define YSEM_REG_DBG_FRAME_MODE \ + 0x1501408UL +#define YSEM_REG_DBG_MODE1_CFG \ + 0x1501420UL +#define YSEM_REG_FAST_MEMORY \ + 0x1540000UL +#define PSEM_REG_SLOW_DBG_EMPTY \ + 0x1601140UL +#define PSEM_REG_SYNC_DBG_EMPTY \ + 0x1601160UL +#define PSEM_REG_SLOW_DBG_ACTIVE \ + 0x1601400UL +#define PSEM_REG_SLOW_DBG_MODE \ + 0x1601404UL +#define PSEM_REG_DBG_FRAME_MODE \ + 0x1601408UL +#define PSEM_REG_DBG_MODE1_CFG \ + 0x1601420UL +#define PSEM_REG_FAST_MEMORY \ + 0x1640000UL +#define TSEM_REG_SLOW_DBG_EMPTY \ + 0x1701140UL +#define TSEM_REG_SYNC_DBG_EMPTY \ + 0x1701160UL +#define TSEM_REG_SLOW_DBG_ACTIVE \ + 0x1701400UL +#define TSEM_REG_SLOW_DBG_MODE \ + 0x1701404UL +#define TSEM_REG_DBG_FRAME_MODE \ + 0x1701408UL +#define TSEM_REG_DBG_MODE1_CFG \ + 0x1701420UL +#define TSEM_REG_FAST_MEMORY \ + 0x1740000UL +#define MSEM_REG_SLOW_DBG_EMPTY \ + 0x1801140UL +#define MSEM_REG_SYNC_DBG_EMPTY \ + 0x1801160UL +#define MSEM_REG_SLOW_DBG_ACTIVE \ + 0x1801400UL +#define MSEM_REG_SLOW_DBG_MODE \ + 0x1801404UL +#define MSEM_REG_DBG_FRAME_MODE \ + 0x1801408UL +#define MSEM_REG_DBG_MODE1_CFG \ + 0x1801420UL +#define MSEM_REG_FAST_MEMORY \ + 0x1840000UL +#define USEM_REG_SLOW_DBG_EMPTY \ + 0x1901140UL +#define USEM_REG_SYNC_DBG_EMPTY \ + 0x1901160UL +#define USEM_REG_SLOW_DBG_ACTIVE \ + 0x1901400UL +#define USEM_REG_SLOW_DBG_MODE \ + 0x1901404UL +#define USEM_REG_DBG_FRAME_MODE \ + 0x1901408UL +#define USEM_REG_DBG_MODE1_CFG \ + 0x1901420UL +#define USEM_REG_FAST_MEMORY \ + 0x1940000UL +#define SEM_FAST_REG_INT_RAM \ + 0x020000UL +#define SEM_FAST_REG_INT_RAM_SIZE \ + 20480 +#define GRC_REG_TRACE_FIFO_VALID_DATA \ + 0x050064UL +#define GRC_REG_NUMBER_VALID_OVERRIDE_WINDOW \ + 0x05040cUL +#define GRC_REG_PROTECTION_OVERRIDE_WINDOW \ + 0x050500UL +#define IGU_REG_ERROR_HANDLING_MEMORY \ + 0x181520UL #define MCP_REG_CPU_MODE \ 0xe05000UL #define MCP_REG_CPU_MODE_SOFT_HALT \ (0x1 << 10) +#define BRB_REG_BIG_RAM_ADDRESS \ + 0x340800UL +#define BRB_REG_BIG_RAM_DATA \ + 0x341500UL +#define SEM_FAST_REG_STALL_0 \ + 0x000488UL +#define SEM_FAST_REG_STALLED \ + 0x000494UL +#define BTB_REG_BIG_RAM_ADDRESS \ + 0xdb0800UL +#define BTB_REG_BIG_RAM_DATA \ + 0xdb0c00UL +#define BMB_REG_BIG_RAM_ADDRESS \ + 0x540800UL +#define BMB_REG_BIG_RAM_DATA \ + 0x540f00UL +#define SEM_FAST_REG_STORM_REG_FILE \ + 0x008000UL +#define RSS_REG_RSS_RAM_ADDR \ + 0x238c30UL +#define MISCS_REG_BLOCK_256B_EN \ + 0x009074UL +#define MCP_REG_SCRATCH_SIZE \ + 57344 +#define MCP_REG_CPU_REG_FILE \ + 0xe05200UL +#define MCP_REG_CPU_REG_FILE_SIZE \ + 32 +#define DBG_REG_DEBUG_TARGET \ + 0x01005cUL +#define DBG_REG_FULL_MODE \ + 0x010060UL +#define DBG_REG_CALENDAR_OUT_DATA \ + 0x010480UL +#define GRC_REG_TRACE_FIFO \ + 0x050068UL +#define IGU_REG_ERROR_HANDLING_DATA_VALID \ + 0x181530UL +#define DBG_REG_DBG_BLOCK_ON \ + 0x010454UL +#define DBG_REG_FRAMING_MODE \ + 0x010058UL +#define SEM_FAST_REG_VFC_DATA_WR \ + 0x000b40UL +#define SEM_FAST_REG_VFC_ADDR \ + 0x000b44UL +#define SEM_FAST_REG_VFC_DATA_RD \ + 0x000b48UL +#define RSS_REG_RSS_RAM_DATA \ + 0x238c20UL +#define MISC_REG_BLOCK_256B_EN \ + 0x008c14UL +#define NWS_REG_NWS_CMU \ + 0x720000UL +#define PHY_NW_IP_REG_PHY0_TOP_TBUS_ADDR_7_0 \ + 0x000680UL +#define PHY_NW_IP_REG_PHY0_TOP_TBUS_ADDR_15_8 \ + 0x000684UL +#define PHY_NW_IP_REG_PHY0_TOP_TBUS_DATA_7_0 \ + 0x0006c0UL +#define PHY_NW_IP_REG_PHY0_TOP_TBUS_DATA_11_8 \ + 0x0006c4UL +#define MS_REG_MS_CMU \ + 0x6a4000UL +#define PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X130 \ + 0x000208UL +#define PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X132 \ + 0x000210UL +#define PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X131 \ + 0x00020cUL +#define PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X133 \ + 0x000214UL +#define PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X130 \ + 0x000208UL +#define PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X131 \ + 0x00020cUL +#define PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X132 \ + 0x000210UL +#define PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X133 \ + 0x000214UL +#define PHY_PCIE_REG_PHY0 \ + 0x620000UL +#define PHY_PCIE_REG_PHY1 \ + 0x624000UL #endif diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h index 70b30e4d3cc4..19027635df0d 100644 --- a/include/linux/qed/common_hsi.h +++ b/include/linux/qed/common_hsi.h @@ -143,6 +143,9 @@ #define GTT_BYTE_SIZE_BITS (GTT_DWORD_SIZE_BITS + 2) #define GTT_DWORD_SIZE BIT(GTT_DWORD_SIZE_BITS) +/* Tools Version */ +#define TOOLS_VERSION 10 + /*****************/ /* CDU CONSTANTS */ /*****************/ From e0971c832af4cd906ab931c9f6e9e1791a62fc98 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Wed, 7 Sep 2016 16:36:25 +0300 Subject: [PATCH 0803/1715] qed*: Add support for the ethtool get_regs operation Signed-off-by: Tomer Tayar Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_main.c | 2 ++ .../net/ethernet/qlogic/qede/qede_ethtool.c | 24 +++++++++++++++++++ include/linux/qed/qed_if.h | 4 ++++ 3 files changed, 30 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 250efd1d270d..b730a632c383 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -1398,6 +1398,8 @@ const struct qed_common_ops qed_common_ops_pass = { .get_link = &qed_get_current_link, .drain = &qed_drain, .update_msglvl = &qed_init_dp, + .dbg_all_data = &qed_dbg_all_data, + .dbg_all_data_size = &qed_dbg_all_data_size, .chain_alloc = &qed_chain_alloc, .chain_free = &qed_chain_free, .get_coalesce = &qed_get_coalesce, diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index 14d5328e6ac9..25a9b293ee8f 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -695,6 +695,28 @@ static int qede_set_pauseparam(struct net_device *dev, return 0; } +static void qede_get_regs(struct net_device *ndev, + struct ethtool_regs *regs, void *buffer) +{ + struct qede_dev *edev = netdev_priv(ndev); + + regs->version = 0; + memset(buffer, 0, regs->len); + + if (edev->ops && edev->ops->common) + edev->ops->common->dbg_all_data(edev->cdev, buffer); +} + +static int qede_get_regs_len(struct net_device *ndev) +{ + struct qede_dev *edev = netdev_priv(ndev); + + if (edev->ops && edev->ops->common) + return edev->ops->common->dbg_all_data_size(edev->cdev); + else + return -EINVAL; +} + static void qede_update_mtu(struct qede_dev *edev, union qede_reload_args *args) { edev->ndev->mtu = args->mtu; @@ -1395,6 +1417,8 @@ static const struct ethtool_ops qede_ethtool_ops = { .get_link_ksettings = qede_get_link_ksettings, .set_link_ksettings = qede_set_link_ksettings, .get_drvinfo = qede_get_drvinfo, + .get_regs_len = qede_get_regs_len, + .get_regs = qede_get_regs, .get_msglevel = qede_get_msglevel, .set_msglevel = qede_set_msglevel, .nway_reset = qede_nway_reset, diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index d8dc5c2243d5..e4546abcea08 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -455,6 +455,10 @@ struct qed_common_ops { void (*simd_handler_clean)(struct qed_dev *cdev, int index); + int (*dbg_all_data) (struct qed_dev *cdev, void *buffer); + + int (*dbg_all_data_size) (struct qed_dev *cdev); + /** * @brief can_link_change - can the instance change the link or not * From 216559d9032704a7a5d2fcd3c9c086dd9f7cd557 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 7 Sep 2016 15:53:31 +0200 Subject: [PATCH 0804/1715] net: smsc911x: augment device tree bindings This adds device tree bindings for: - An optional GPIO line for releasing the RESET signal to the SMSC911x devices - An optional PME (power management event) interrupt line that can be utilized to wake up the system on network activity. This signal exist on all the SMSC911x devices, it is just not very often routed. Both these lines are routed to the SoC on the Qualcomm APQ8060 Dragonboard and thus needs to be bound in the device tree. Cc: devicetree@vger.kernel.org Cc: Jeremy Linton Signed-off-by: Linus Walleij Acked-by: Arnd Bergmann Signed-off-by: David S. Miller --- .../devicetree/bindings/net/smsc911x.txt | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/net/smsc911x.txt b/Documentation/devicetree/bindings/net/smsc911x.txt index 3fed3c124411..16c3a9501f5d 100644 --- a/Documentation/devicetree/bindings/net/smsc911x.txt +++ b/Documentation/devicetree/bindings/net/smsc911x.txt @@ -3,9 +3,11 @@ Required properties: - compatible : Should be "smsc,lan", "smsc,lan9115" - reg : Address and length of the io space for SMSC LAN -- interrupts : Should contain SMSC LAN interrupt line -- interrupt-parent : Should be the phandle for the interrupt controller - that services interrupts for this device +- interrupts : one or two interrupt specifiers + - The first interrupt is the SMSC LAN interrupt line + - The second interrupt (if present) is the PME (power + management event) interrupt that is able to wake up the host + system with a 50ms pulse on network activity - phy-mode : See ethernet.txt file in the same directory Optional properties: @@ -21,6 +23,10 @@ Optional properties: external PHY - smsc,save-mac-address : Indicates that mac address needs to be saved before resetting the controller +- reset-gpios : a GPIO line connected to the RESET (active low) signal + of the device. On many systems this is wired high so the device goes + out of reset at power-on, but if it is under program control, this + optional GPIO can wake up in response to it. Examples: @@ -29,7 +35,8 @@ lan9220@f4000000 { reg = <0xf4000000 0x2000000>; phy-mode = "mii"; interrupt-parent = <&gpio1>; - interrupts = <31>; + interrupts = <31>, <32>; + reset-gpios = <&gpio1 30 GPIO_ACTIVE_LOW>; reg-io-width = <4>; smsc,irq-push-pull; }; From dd0cb7dbb065f4acdd8d0597f122d0ed9e93f12e Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 7 Sep 2016 15:53:42 +0200 Subject: [PATCH 0805/1715] net: smsc911x: request and deassert optional RESET GPIO On some systems (such as the Qualcomm APQ8060 Dragonboard) the RESET signal of the SMSC911x is not pulled up by a resistor (or the internal pull-up that will pull it up if the pin is not even connected) but instead connected to a GPIO line, so that the operating system must explicitly deassert RESET before use. Support this in the SMSC911x driver so this ethernet connector can be used on such targets. Notice that we request the line to go logical low (deassert) whilst the line on the actual component is active low. This is managed in the respective hardware description when specifying the GPIO line with e.g. device tree or ACPI. With device tree it looks like this in one case: reset-gpios = <&tlmm 30 GPIO_ACTIVE_LOW>; Which means that logically requesting the RESET line to be deasserted will result in the line being driven high, taking the device out of reset. Cc: Jeremy Linton Signed-off-by: Linus Walleij Reviewed-by: Jeremy Linton Signed-off-by: David S. Miller --- drivers/net/ethernet/smsc/smsc911x.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index ca3134540d2d..8ab8d4b9614b 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -62,6 +62,7 @@ #include #include #include +#include #include "smsc911x.h" @@ -147,6 +148,9 @@ struct smsc911x_data { /* regulators */ struct regulator_bulk_data supplies[SMSC911X_NUM_SUPPLIES]; + /* Reset GPIO */ + struct gpio_desc *reset_gpiod; + /* clock */ struct clk *clk; }; @@ -438,6 +442,11 @@ static int smsc911x_request_resources(struct platform_device *pdev) netdev_err(ndev, "couldn't get regulators %d\n", ret); + /* Request optional RESET GPIO */ + pdata->reset_gpiod = devm_gpiod_get_optional(&pdev->dev, + "reset", + GPIOD_OUT_LOW); + /* Request clock */ pdata->clk = clk_get(&pdev->dev, NULL); if (IS_ERR(pdata->clk)) From 710f3e5961a71dd58fe367eac48deecd5af45a48 Mon Sep 17 00:00:00 2001 From: Sriharsha Basavapatna Date: Wed, 7 Sep 2016 19:57:49 +0530 Subject: [PATCH 0806/1715] be2net: Support UE recovery in BEx/Skyhawk adapters This patch supports recovery from UEs caused due to Transient Parity Errors (TPE), in BE2, BE3 and Skyhawk adapters. This change avoids system reboot when such errors occur. The driver recovers from these errors such that the adapter resumes full operational status as prior to the UE. Following is the list of changes in the driver to support this: o The driver registers its UE recoverable capability with ARM FW at init time. This also allows the driver to know if the feature is supported in the FW. o As the UE recovery requires precise time bound processing, the driver creates its own error recovery work queue with a single worker thread (per module, shared across functions). o Each function runs an error detection task at an interval of 1 second as required by the FW. The error detection logic already exists for BEx/SH, but it now runs in the context of a separate worker thread. o When an error is detected by the task, if it is recoverable, the PF0 driver instance initiates a soft reset, while other PF driver instances wait for the reset to complete and the chip to become ready. Once the chip is ready, all driver instances including PF0, resume to reinitialize the respective functions. o The PF0 driver checks for some recovery criteria, to determine if the recovery can be initiated. If the criteria is not met, the PF0 driver does not initiate a soft reset, it retains the existing behavior to stop further processing and requires a reboot to get the chip to operational state again. o To allow each function to share the workq, while also making progress in its recovery process, a per-function recovery state machine is used. The per-function tasks avoid blocking operations like msleep() while in this state machine (until reinit state) and instead reschedule for the required delay. o With these changes, the existing error recovery code for Lancer also runs in the context of the new worker thread. Signed-off-by: Sriharsha Basavapatna Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be.h | 68 ++++- drivers/net/ethernet/emulex/benet/be_cmds.c | 53 +++- drivers/net/ethernet/emulex/benet/be_cmds.h | 41 ++- .../net/ethernet/emulex/benet/be_ethtool.c | 40 +++ drivers/net/ethernet/emulex/benet/be_hw.h | 7 +- drivers/net/ethernet/emulex/benet/be_main.c | 256 ++++++++++++++++-- 6 files changed, 430 insertions(+), 35 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index 86780b5c40ef..eecf24e92fd3 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -399,13 +399,13 @@ enum vf_state { #define BE_FLAGS_PHY_MISCONFIGURED BIT(10) #define BE_FLAGS_ERR_DETECTION_SCHEDULED BIT(11) #define BE_FLAGS_OS2BMC BIT(12) +#define BE_FLAGS_TRY_RECOVERY BIT(13) #define BE_UC_PMAC_COUNT 30 #define BE_VF_UC_PMAC_COUNT 2 #define MAX_ERR_RECOVERY_RETRY_COUNT 3 #define ERR_DETECTION_DELAY 1000 -#define ERR_RECOVERY_RETRY_DELAY 30000 /* Ethtool set_dump flags */ #define LANCER_INITIATE_FW_DUMP 0x1 @@ -512,6 +512,66 @@ struct be_eth_addr { unsigned char mac[ETH_ALEN]; }; +#define BE_SEC 1000 /* in msec */ +#define BE_MIN (60 * BE_SEC) /* in msec */ +#define BE_HOUR (60 * BE_MIN) /* in msec */ + +#define ERR_RECOVERY_MAX_RETRY_COUNT 3 +#define ERR_RECOVERY_DETECTION_DELAY BE_SEC +#define ERR_RECOVERY_RETRY_DELAY (30 * BE_SEC) + +/* UE-detection-duration in BEx/Skyhawk: + * All PFs must wait for this duration after they detect UE before reading + * SLIPORT_SEMAPHORE register. At the end of this duration, the Firmware + * guarantees that the SLIPORT_SEMAPHORE register is updated to indicate + * if the UE is recoverable. + */ +#define ERR_RECOVERY_UE_DETECT_DURATION BE_SEC + +/* Initial idle time (in msec) to elapse after driver load, + * before UE recovery is allowed. + */ +#define ERR_IDLE_HR 24 +#define ERR_RECOVERY_IDLE_TIME (ERR_IDLE_HR * BE_HOUR) + +/* Time interval (in msec) after which UE recovery can be repeated */ +#define ERR_INTERVAL_HR 72 +#define ERR_RECOVERY_INTERVAL (ERR_INTERVAL_HR * BE_HOUR) + +/* BEx/SH UE recovery state machine */ +enum { + ERR_RECOVERY_ST_NONE = 0, /* No Recovery */ + ERR_RECOVERY_ST_DETECT = 1, /* UE detection duration */ + ERR_RECOVERY_ST_RESET = 2, /* Reset Phase (PF0 only) */ + ERR_RECOVERY_ST_PRE_POLL = 3, /* Pre-Poll Phase (all PFs) */ + ERR_RECOVERY_ST_REINIT = 4 /* Re-initialize Phase */ +}; + +struct be_error_recovery { + /* Lancer error recovery variables */ + u8 recovery_retries; + + /* BEx/Skyhawk error recovery variables */ + u8 recovery_state; + u16 ue_to_reset_time; /* Time after UE, to soft reset + * the chip - PF0 only + */ + u16 ue_to_poll_time; /* Time after UE, to Restart Polling + * of SLIPORT_SEMAPHORE reg + */ + u16 last_err_code; + bool recovery_supported; + unsigned long probe_time; + unsigned long last_recovery_time; + + /* Common to both Lancer & BEx/SH error recovery */ + u32 resched_delay; + struct delayed_work err_detection_work; +}; + +/* Ethtool priv_flags */ +#define BE_DISABLE_TPE_RECOVERY 0x1 + struct be_adapter { struct pci_dev *pdev; struct net_device *netdev; @@ -560,7 +620,6 @@ struct be_adapter { struct delayed_work work; u16 work_counter; - struct delayed_work be_err_detection_work; u8 recovery_retries; u8 err_flags; bool pcicfg_mapped; /* pcicfg obtained via pci_iomap() */ @@ -634,6 +693,8 @@ struct be_adapter { u32 fat_dump_len; u16 serial_num[CNTL_SERIAL_NUM_WORDS]; u8 phy_state; /* state of sfp optics (functional, faulted, etc.,) */ + u32 priv_flags; /* ethtool get/set_priv_flags() */ + struct be_error_recovery error_recovery; }; /* Used for defered FW config cmds. Add fields to this struct as reqd */ @@ -867,6 +928,9 @@ static inline bool is_ipv4_pkt(struct sk_buff *skb) return skb->protocol == htons(ETH_P_IP) && ip_hdr(skb)->version == 4; } +#define be_error_recovering(adapter) \ + (adapter->flags & BE_FLAGS_TRY_RECOVERY) + #define BE_ERROR_EEH 1 #define BE_ERROR_UE BIT(1) #define BE_ERROR_FW BIT(2) diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index fa11a5a8c354..92794f345bcb 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -705,7 +705,7 @@ static int be_mbox_notify_wait(struct be_adapter *adapter) return 0; } -static u16 be_POST_stage_get(struct be_adapter *adapter) +u16 be_POST_stage_get(struct be_adapter *adapter) { u32 sem; @@ -4954,6 +4954,57 @@ int be_cmd_set_logical_link_config(struct be_adapter *adapter, 1, domain); return status; } + +int be_cmd_set_features(struct be_adapter *adapter) +{ + struct be_cmd_resp_set_features *resp; + struct be_cmd_req_set_features *req; + struct be_mcc_wrb *wrb; + int status; + + if (mutex_lock_interruptible(&adapter->mcc_lock)) + return -1; + + wrb = wrb_from_mccq(adapter); + if (!wrb) { + status = -EBUSY; + goto err; + } + + req = embedded_payload(wrb); + + be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON, + OPCODE_COMMON_SET_FEATURES, + sizeof(*req), wrb, NULL); + + req->features = cpu_to_le32(BE_FEATURE_UE_RECOVERY); + req->parameter_len = cpu_to_le32(sizeof(struct be_req_ue_recovery)); + req->parameter.req.uer = cpu_to_le32(BE_UE_RECOVERY_UER_MASK); + + status = be_mcc_notify_wait(adapter); + if (status) + goto err; + + resp = embedded_payload(wrb); + + adapter->error_recovery.ue_to_poll_time = + le16_to_cpu(resp->parameter.resp.ue2rp); + adapter->error_recovery.ue_to_reset_time = + le16_to_cpu(resp->parameter.resp.ue2sr); + adapter->error_recovery.recovery_supported = true; +err: + /* Checking "MCC_STATUS_INVALID_LENGTH" for SKH as FW + * returns this error in older firmware versions + */ + if (base_status(status) == MCC_STATUS_ILLEGAL_REQUEST || + base_status(status) == MCC_STATUS_INVALID_LENGTH) + dev_info(&adapter->pdev->dev, + "Adapter does not support HW error recovery\n"); + + mutex_unlock(&adapter->mcc_lock); + return status; +} + int be_roce_mcc_cmd(void *netdev_handle, void *wrb_payload, int wrb_payload_size, u16 *cmd_status, u16 *ext_status) { diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h index 0d6be224a787..686cbe03e368 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.h +++ b/drivers/net/ethernet/emulex/benet/be_cmds.h @@ -58,7 +58,8 @@ enum mcc_base_status { MCC_STATUS_INSUFFICIENT_BUFFER = 4, MCC_STATUS_UNAUTHORIZED_REQUEST = 5, MCC_STATUS_NOT_SUPPORTED = 66, - MCC_STATUS_FEATURE_NOT_SUPPORTED = 68 + MCC_STATUS_FEATURE_NOT_SUPPORTED = 68, + MCC_STATUS_INVALID_LENGTH = 116 }; /* Additional status */ @@ -308,6 +309,7 @@ struct be_mcc_mailbox { #define OPCODE_COMMON_READ_OBJECT 171 #define OPCODE_COMMON_WRITE_OBJECT 172 #define OPCODE_COMMON_DELETE_OBJECT 174 +#define OPCODE_COMMON_SET_FEATURES 191 #define OPCODE_COMMON_MANAGE_IFACE_FILTERS 193 #define OPCODE_COMMON_GET_IFACE_LIST 194 #define OPCODE_COMMON_ENABLE_DISABLE_VF 196 @@ -2315,6 +2317,41 @@ struct be_cmd_resp_get_iface_list { struct be_if_desc if_desc; }; +/************** Set Features *******************/ +#define BE_FEATURE_UE_RECOVERY 0x10 +#define BE_UE_RECOVERY_UER_MASK 0x1 + +struct be_req_ue_recovery { + u32 uer; + u32 rsvd; +}; + +struct be_cmd_req_set_features { + struct be_cmd_req_hdr hdr; + u32 features; + u32 parameter_len; + union { + struct be_req_ue_recovery req; + u32 rsvd[2]; + } parameter; +}; + +struct be_resp_ue_recovery { + u32 uer; + u16 ue2rp; + u16 ue2sr; +}; + +struct be_cmd_resp_set_features { + struct be_cmd_resp_hdr hdr; + u32 features; + u32 parameter_len; + union { + struct be_resp_ue_recovery resp; + u32 rsvd[2]; + } parameter; +}; + /*************** Set logical link ********************/ #define PLINK_ENABLE BIT(0) #define PLINK_TRACK BIT(8) @@ -2343,6 +2380,7 @@ struct be_cmd_req_manage_iface_filters { u32 cap_control_flags; } __packed; +u16 be_POST_stage_get(struct be_adapter *adapter); int be_pci_fnum_get(struct be_adapter *adapter); int be_fw_wait_ready(struct be_adapter *adapter); int be_cmd_mac_addr_query(struct be_adapter *adapter, u8 *mac_addr, @@ -2470,3 +2508,4 @@ int be_cmd_manage_iface(struct be_adapter *adapter, u32 iface, u8 op); int be_cmd_set_sriov_config(struct be_adapter *adapter, struct be_resources res, u16 num_vfs, struct be_resources *vft_res); +int be_cmd_set_features(struct be_adapter *adapter); diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c index 50e7be5da50c..0a48a31225e6 100644 --- a/drivers/net/ethernet/emulex/benet/be_ethtool.c +++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c @@ -421,6 +421,10 @@ static void be_get_ethtool_stats(struct net_device *netdev, } } +static const char be_priv_flags[][ETH_GSTRING_LEN] = { + "disable-tpe-recovery" +}; + static void be_get_stat_strings(struct net_device *netdev, uint32_t stringset, uint8_t *data) { @@ -454,6 +458,10 @@ static void be_get_stat_strings(struct net_device *netdev, uint32_t stringset, data += ETH_GSTRING_LEN; } break; + case ETH_SS_PRIV_FLAGS: + for (i = 0; i < ARRAY_SIZE(be_priv_flags); i++) + strcpy(data + i * ETH_GSTRING_LEN, be_priv_flags[i]); + break; } } @@ -468,6 +476,8 @@ static int be_get_sset_count(struct net_device *netdev, int stringset) return ETHTOOL_STATS_NUM + adapter->num_rx_qs * ETHTOOL_RXSTATS_NUM + adapter->num_tx_qs * ETHTOOL_TXSTATS_NUM; + case ETH_SS_PRIV_FLAGS: + return ARRAY_SIZE(be_priv_flags); default: return -EINVAL; } @@ -1360,6 +1370,34 @@ err: return be_cmd_status(status); } +static u32 be_get_priv_flags(struct net_device *netdev) +{ + struct be_adapter *adapter = netdev_priv(netdev); + + return adapter->priv_flags; +} + +static int be_set_priv_flags(struct net_device *netdev, u32 flags) +{ + struct be_adapter *adapter = netdev_priv(netdev); + bool tpe_old = !!(adapter->priv_flags & BE_DISABLE_TPE_RECOVERY); + bool tpe_new = !!(flags & BE_DISABLE_TPE_RECOVERY); + + if (tpe_old != tpe_new) { + if (tpe_new) { + adapter->priv_flags |= BE_DISABLE_TPE_RECOVERY; + dev_info(&adapter->pdev->dev, + "HW error recovery is disabled\n"); + } else { + adapter->priv_flags &= ~BE_DISABLE_TPE_RECOVERY; + dev_info(&adapter->pdev->dev, + "HW error recovery is enabled\n"); + } + } + + return 0; +} + const struct ethtool_ops be_ethtool_ops = { .get_settings = be_get_settings, .get_drvinfo = be_get_drvinfo, @@ -1373,6 +1411,8 @@ const struct ethtool_ops be_ethtool_ops = { .get_ringparam = be_get_ringparam, .get_pauseparam = be_get_pauseparam, .set_pauseparam = be_set_pauseparam, + .set_priv_flags = be_set_priv_flags, + .get_priv_flags = be_get_priv_flags, .get_strings = be_get_stat_strings, .set_phys_id = be_set_phys_id, .set_dump = be_set_dump, diff --git a/drivers/net/ethernet/emulex/benet/be_hw.h b/drivers/net/ethernet/emulex/benet/be_hw.h index c684bb32b487..92942c84d329 100644 --- a/drivers/net/ethernet/emulex/benet/be_hw.h +++ b/drivers/net/ethernet/emulex/benet/be_hw.h @@ -32,18 +32,23 @@ #define MPU_EP_CONTROL 0 /********** MPU semphore: used for SH & BE *************/ +#define SLIPORT_SOFTRESET_OFFSET 0x5c /* CSR BAR offset */ #define SLIPORT_SEMAPHORE_OFFSET_BEx 0xac /* CSR BAR offset */ #define SLIPORT_SEMAPHORE_OFFSET_SH 0x94 /* PCI-CFG offset */ #define POST_STAGE_MASK 0x0000FFFF #define POST_ERR_MASK 0x1 #define POST_ERR_SHIFT 31 +#define POST_ERR_RECOVERY_CODE_MASK 0xFFF + +/* Soft Reset register masks */ +#define SLIPORT_SOFTRESET_SR_MASK 0x00000080 /* SR bit */ /* MPU semphore POST stage values */ #define POST_STAGE_AWAITING_HOST_RDY 0x1 /* FW awaiting goahead from host */ #define POST_STAGE_HOST_RDY 0x2 /* Host has given go-ahed to FW */ #define POST_STAGE_BE_RESET 0x3 /* Host wants to reset chip */ #define POST_STAGE_ARMFW_RDY 0xc000 /* FW is done with POST */ - +#define POST_STAGE_RECOVERABLE_ERR 0xE000 /* Recoverable err detected */ /* Lancer SLIPORT registers */ #define SLIPORT_STATUS_OFFSET 0x404 diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index f7584d4139ff..3be5d61aa875 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -41,6 +41,11 @@ static ushort rx_frag_size = 2048; module_param(rx_frag_size, ushort, S_IRUGO); MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data."); +/* Per-module error detection/recovery workq shared across all functions. + * Each function schedules its own work request on this shared workq. + */ +struct workqueue_struct *be_err_recovery_workq; + static const struct pci_device_id be_dev_ids[] = { { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) }, { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) }, @@ -3358,9 +3363,7 @@ void be_detect_error(struct be_adapter *adapter) */ if (ue_lo || ue_hi) { - dev_err(dev, - "Unrecoverable Error detected in the adapter"); - dev_err(dev, "Please reboot server to recover"); + dev_err(dev, "Error detected in the adapter"); if (skyhawk_chip(adapter)) be_set_error(adapter, BE_ERROR_UE); @@ -3903,8 +3906,13 @@ static void be_cancel_worker(struct be_adapter *adapter) static void be_cancel_err_detection(struct be_adapter *adapter) { + struct be_error_recovery *err_rec = &adapter->error_recovery; + + if (!be_err_recovery_workq) + return; + if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) { - cancel_delayed_work_sync(&adapter->be_err_detection_work); + cancel_delayed_work_sync(&err_rec->err_detection_work); adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED; } } @@ -4503,10 +4511,25 @@ static void be_schedule_worker(struct be_adapter *adapter) adapter->flags |= BE_FLAGS_WORKER_SCHEDULED; } +static void be_destroy_err_recovery_workq(void) +{ + if (!be_err_recovery_workq) + return; + + flush_workqueue(be_err_recovery_workq); + destroy_workqueue(be_err_recovery_workq); + be_err_recovery_workq = NULL; +} + static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay) { - schedule_delayed_work(&adapter->be_err_detection_work, - msecs_to_jiffies(delay)); + struct be_error_recovery *err_rec = &adapter->error_recovery; + + if (!be_err_recovery_workq) + return; + + queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work, + msecs_to_jiffies(delay)); adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED; } @@ -4635,10 +4658,15 @@ static inline int fw_major_num(const char *fw_ver) return fw_major; } -/* If any VFs are already enabled don't FLR the PF */ +/* If it is error recovery, FLR the PF + * Else if any VFs are already enabled don't FLR the PF + */ static bool be_reset_required(struct be_adapter *adapter) { - return pci_num_vf(adapter->pdev) ? false : true; + if (be_error_recovering(adapter)) + return true; + else + return pci_num_vf(adapter->pdev) == 0; } /* Wait for the FW to be ready and perform the required initialization */ @@ -4650,6 +4678,9 @@ static int be_func_init(struct be_adapter *adapter) if (status) return status; + /* FW is now ready; clear errors to allow cmds/doorbell */ + be_clear_error(adapter, BE_CLEAR_ALL); + if (be_reset_required(adapter)) { status = be_cmd_reset_function(adapter); if (status) @@ -4657,9 +4688,6 @@ static int be_func_init(struct be_adapter *adapter) /* Wait for interrupts to quiesce after an FLR */ msleep(100); - - /* We can clear all errors when function reset succeeds */ - be_clear_error(adapter, BE_CLEAR_ALL); } /* Tell FW we're ready to fire cmds */ @@ -4767,6 +4795,9 @@ static int be_setup(struct be_adapter *adapter) if (!status && be_pause_supported(adapter)) adapter->phy.fc_autoneg = 1; + if (be_physfn(adapter) && !lancer_chip(adapter)) + be_cmd_set_features(adapter); + be_schedule_worker(adapter); adapter->flags |= BE_FLAGS_SETUP_DONE; return 0; @@ -5210,13 +5241,145 @@ static int be_resume(struct be_adapter *adapter) return 0; } +static void be_soft_reset(struct be_adapter *adapter) +{ + u32 val; + + dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n"); + val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET); + val |= SLIPORT_SOFTRESET_SR_MASK; + iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET); +} + +static bool be_err_is_recoverable(struct be_adapter *adapter) +{ + struct be_error_recovery *err_rec = &adapter->error_recovery; + unsigned long initial_idle_time = + msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME); + unsigned long recovery_interval = + msecs_to_jiffies(ERR_RECOVERY_INTERVAL); + u16 ue_err_code; + u32 val; + + val = be_POST_stage_get(adapter); + if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR) + return false; + ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK; + if (ue_err_code == 0) + return false; + + dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n", + ue_err_code); + + if (jiffies - err_rec->probe_time <= initial_idle_time) { + dev_err(&adapter->pdev->dev, + "Cannot recover within %lu sec from driver load\n", + jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC); + return false; + } + + if (err_rec->last_recovery_time && + (jiffies - err_rec->last_recovery_time <= recovery_interval)) { + dev_err(&adapter->pdev->dev, + "Cannot recover within %lu sec from last recovery\n", + jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC); + return false; + } + + if (ue_err_code == err_rec->last_err_code) { + dev_err(&adapter->pdev->dev, + "Cannot recover from a consecutive TPE error\n"); + return false; + } + + err_rec->last_recovery_time = jiffies; + err_rec->last_err_code = ue_err_code; + return true; +} + +static int be_tpe_recover(struct be_adapter *adapter) +{ + struct be_error_recovery *err_rec = &adapter->error_recovery; + int status = -EAGAIN; + u32 val; + + switch (err_rec->recovery_state) { + case ERR_RECOVERY_ST_NONE: + err_rec->recovery_state = ERR_RECOVERY_ST_DETECT; + err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION; + break; + + case ERR_RECOVERY_ST_DETECT: + val = be_POST_stage_get(adapter); + if ((val & POST_STAGE_RECOVERABLE_ERR) != + POST_STAGE_RECOVERABLE_ERR) { + dev_err(&adapter->pdev->dev, + "Unrecoverable HW error detected: 0x%x\n", val); + status = -EINVAL; + err_rec->resched_delay = 0; + break; + } + + dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n"); + + /* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR + * milliseconds before it checks for final error status in + * SLIPORT_SEMAPHORE to determine if recovery criteria is met. + * If it does, then PF0 initiates a Soft Reset. + */ + if (adapter->pf_num == 0) { + err_rec->recovery_state = ERR_RECOVERY_ST_RESET; + err_rec->resched_delay = err_rec->ue_to_reset_time - + ERR_RECOVERY_UE_DETECT_DURATION; + break; + } + + err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL; + err_rec->resched_delay = err_rec->ue_to_poll_time - + ERR_RECOVERY_UE_DETECT_DURATION; + break; + + case ERR_RECOVERY_ST_RESET: + if (!be_err_is_recoverable(adapter)) { + dev_err(&adapter->pdev->dev, + "Failed to meet recovery criteria\n"); + status = -EIO; + err_rec->resched_delay = 0; + break; + } + be_soft_reset(adapter); + err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL; + err_rec->resched_delay = err_rec->ue_to_poll_time - + err_rec->ue_to_reset_time; + break; + + case ERR_RECOVERY_ST_PRE_POLL: + err_rec->recovery_state = ERR_RECOVERY_ST_REINIT; + err_rec->resched_delay = 0; + status = 0; /* done */ + break; + + default: + status = -EINVAL; + err_rec->resched_delay = 0; + break; + } + + return status; +} + static int be_err_recover(struct be_adapter *adapter) { int status; - /* Error recovery is supported only Lancer as of now */ - if (!lancer_chip(adapter)) - return -EIO; + if (!lancer_chip(adapter)) { + if (!adapter->error_recovery.recovery_supported || + adapter->priv_flags & BE_DISABLE_TPE_RECOVERY) + return -EIO; + status = be_tpe_recover(adapter); + if (status) + goto err; + } /* Wait for adapter to reach quiescent state before * destroying queues @@ -5225,59 +5388,74 @@ static int be_err_recover(struct be_adapter *adapter) if (status) goto err; + adapter->flags |= BE_FLAGS_TRY_RECOVERY; + be_cleanup(adapter); status = be_resume(adapter); if (status) goto err; - return 0; + adapter->flags &= ~BE_FLAGS_TRY_RECOVERY; + err: return status; } static void be_err_detection_task(struct work_struct *work) { + struct be_error_recovery *err_rec = + container_of(work, struct be_error_recovery, + err_detection_work.work); struct be_adapter *adapter = - container_of(work, struct be_adapter, - be_err_detection_work.work); + container_of(err_rec, struct be_adapter, + error_recovery); + u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY; struct device *dev = &adapter->pdev->dev; int recovery_status; - int delay = ERR_DETECTION_DELAY; be_detect_error(adapter); - - if (be_check_error(adapter, BE_ERROR_HW)) - recovery_status = be_err_recover(adapter); - else + if (!be_check_error(adapter, BE_ERROR_HW)) goto reschedule_task; + recovery_status = be_err_recover(adapter); if (!recovery_status) { - adapter->recovery_retries = 0; + err_rec->recovery_retries = 0; + err_rec->recovery_state = ERR_RECOVERY_ST_NONE; dev_info(dev, "Adapter recovery successful\n"); goto reschedule_task; - } else if (be_virtfn(adapter)) { + } else if (!lancer_chip(adapter) && err_rec->resched_delay) { + /* BEx/SH recovery state machine */ + if (adapter->pf_num == 0 && + err_rec->recovery_state > ERR_RECOVERY_ST_DETECT) + dev_err(&adapter->pdev->dev, + "Adapter recovery in progress\n"); + resched_delay = err_rec->resched_delay; + goto reschedule_task; + } else if (lancer_chip(adapter) && be_virtfn(adapter)) { /* For VFs, check if PF have allocated resources * every second. */ dev_err(dev, "Re-trying adapter recovery\n"); goto reschedule_task; - } else if (adapter->recovery_retries++ < - MAX_ERR_RECOVERY_RETRY_COUNT) { + } else if (lancer_chip(adapter) && err_rec->recovery_retries++ < + ERR_RECOVERY_MAX_RETRY_COUNT) { /* In case of another error during recovery, it takes 30 sec * for adapter to come out of error. Retry error recovery after * this time interval. */ dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n"); - delay = ERR_RECOVERY_RETRY_DELAY; + resched_delay = ERR_RECOVERY_RETRY_DELAY; goto reschedule_task; } else { dev_err(dev, "Adapter recovery failed\n"); + dev_err(dev, "Please reboot server to recover\n"); } return; + reschedule_task: - be_schedule_err_detection(adapter, delay); + be_schedule_err_detection(adapter, resched_delay); } static void be_log_sfp_info(struct be_adapter *adapter) @@ -5490,7 +5668,10 @@ static int be_drv_init(struct be_adapter *adapter) pci_save_state(adapter->pdev); INIT_DELAYED_WORK(&adapter->work, be_worker); - INIT_DELAYED_WORK(&adapter->be_err_detection_work, + + adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE; + adapter->error_recovery.resched_delay = 0; + INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work, be_err_detection_task); adapter->rx_fc = true; @@ -5681,6 +5862,7 @@ static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id) be_roce_dev_add(adapter); be_schedule_err_detection(adapter, ERR_DETECTION_DELAY); + adapter->error_recovery.probe_time = jiffies; /* On Die temperature not supported for VF. */ if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) { @@ -5926,6 +6108,8 @@ static struct pci_driver be_driver = { static int __init be_init_module(void) { + int status; + if (rx_frag_size != 8192 && rx_frag_size != 4096 && rx_frag_size != 2048) { printk(KERN_WARNING DRV_NAME @@ -5945,7 +6129,17 @@ static int __init be_init_module(void) return -1; } - return pci_register_driver(&be_driver); + be_err_recovery_workq = + create_singlethread_workqueue("be_err_recover"); + if (!be_err_recovery_workq) + pr_warn(DRV_NAME "Could not create error recovery workqueue\n"); + + status = pci_register_driver(&be_driver); + if (status) { + destroy_workqueue(be_wq); + be_destroy_err_recovery_workq(); + } + return status; } module_init(be_init_module); @@ -5953,6 +6147,8 @@ static void __exit be_exit_module(void) { pci_unregister_driver(&be_driver); + be_destroy_err_recovery_workq(); + if (be_wq) destroy_workqueue(be_wq); } From f72099e057c0b3ea3cfd16301cff9202c4db8ef4 Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Wed, 7 Sep 2016 19:57:50 +0530 Subject: [PATCH 0807/1715] be2net: Issue COMMON_RESET_FUNCTION cmd during driver unload As per SLI guideline, drivers need to issue COMMON_RESET_FUNCTION SLI cmd during driver unload to clean up any non-persistent state information. Issue this cmd only if VFs are not assigned to VMs as it is possible for PF driver to unload while it\'s VF remains functional and assigned to a VM. Signed-off-by: Somnath Kotur Signed-off-by: Sriharsha Basavapatna Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 3be5d61aa875..95d2fa3de035 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -5706,6 +5706,9 @@ static void be_remove(struct pci_dev *pdev) be_clear(adapter); + if (!pci_vfs_assigned(adapter->pdev)) + be_cmd_reset_function(adapter); + /* tell fw we're done with firing cmds */ be_cmd_fw_clean(adapter); From 62259ac4b36e348077635e673f253cc139dd6032 Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Wed, 7 Sep 2016 19:57:51 +0530 Subject: [PATCH 0808/1715] be2net: Add privilege level check for OPCODE_COMMON_GET_EXT_FAT_CAPABILITIES SLI cmd. Driver issues OPCODE_COMMON_GET_EXT_FAT_CAPABILITIES cmd during init which when issued by VFs results in the logging of a cmd failure message since they don't have the required privilege for this cmd. Fix by checking privilege before issuing the cmd. Also fixed typo in CAPABILITIES. Signed-off-by: Somnath Kotur Signed-off-by: Sriharsha Basavapatna Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_cmds.c | 13 +++++++++++-- drivers/net/ethernet/emulex/benet/be_cmds.h | 4 ++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 92794f345bcb..15d02da08d8f 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -92,6 +92,11 @@ static struct be_cmd_priv_map cmd_priv_map[] = { CMD_SUBSYSTEM_COMMON, BE_PRIV_DEVCFG | BE_PRIV_VHADM }, + { + OPCODE_COMMON_GET_EXT_FAT_CAPABILITIES, + CMD_SUBSYSTEM_COMMON, + BE_PRIV_DEVCFG + } }; static bool be_cmd_allowed(struct be_adapter *adapter, u8 opcode, u8 subsystem) @@ -4127,6 +4132,10 @@ int be_cmd_get_ext_fat_capabilites(struct be_adapter *adapter, struct be_cmd_req_get_ext_fat_caps *req; int status; + if (!be_cmd_allowed(adapter, OPCODE_COMMON_GET_EXT_FAT_CAPABILITIES, + CMD_SUBSYSTEM_COMMON)) + return -EPERM; + if (mutex_lock_interruptible(&adapter->mbox_lock)) return -1; @@ -4138,7 +4147,7 @@ int be_cmd_get_ext_fat_capabilites(struct be_adapter *adapter, req = cmd->va; be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON, - OPCODE_COMMON_GET_EXT_FAT_CAPABILITES, + OPCODE_COMMON_GET_EXT_FAT_CAPABILITIES, cmd->size, wrb, cmd); req->parameter_type = cpu_to_le32(1); @@ -4167,7 +4176,7 @@ int be_cmd_set_ext_fat_capabilites(struct be_adapter *adapter, req = cmd->va; memcpy(&req->set_params, configs, sizeof(struct be_fat_conf_params)); be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON, - OPCODE_COMMON_SET_EXT_FAT_CAPABILITES, + OPCODE_COMMON_SET_EXT_FAT_CAPABILITIES, cmd->size, wrb, cmd); status = be_mcc_notify_wait(adapter); diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h index 686cbe03e368..1bd82bcb3be5 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.h +++ b/drivers/net/ethernet/emulex/benet/be_cmds.h @@ -295,8 +295,8 @@ struct be_mcc_mailbox { #define OPCODE_COMMON_GET_PHY_DETAILS 102 #define OPCODE_COMMON_SET_DRIVER_FUNCTION_CAP 103 #define OPCODE_COMMON_GET_CNTL_ADDITIONAL_ATTRIBUTES 121 -#define OPCODE_COMMON_GET_EXT_FAT_CAPABILITES 125 -#define OPCODE_COMMON_SET_EXT_FAT_CAPABILITES 126 +#define OPCODE_COMMON_GET_EXT_FAT_CAPABILITIES 125 +#define OPCODE_COMMON_SET_EXT_FAT_CAPABILITIES 126 #define OPCODE_COMMON_GET_MAC_LIST 147 #define OPCODE_COMMON_SET_MAC_LIST 148 #define OPCODE_COMMON_GET_HSW_CONFIG 152 From 988d44b1636d7f608bd7926493e0f61a034b61db Mon Sep 17 00:00:00 2001 From: Suresh Reddy Date: Wed, 7 Sep 2016 19:57:52 +0530 Subject: [PATCH 0809/1715] be2net: Avoid redundant addition of mac address in HW If a mac address is added to the uc_list and later the same mac address is added via ndo_set_mac_address() or vice versa, the driver does not detect this condition and tries to add it again. This results in a mac address collision error when the FW rejects it. Fix this by checking if the given mac address is present in uc_list while setting the device mac address and vice versa. Similarly skip deletion if the address is still in use in the other form. Signed-off-by: Suresh Reddy Signed-off-by: Sathya Perla Signed-off-by: Sriharsha Basavapatna Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 86 ++++++++++++++++----- 1 file changed, 66 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 95d2fa3de035..a1c9920b2452 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -269,6 +269,38 @@ void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped) iowrite32(val, adapter->db + DB_CQ_OFFSET); } +static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac) +{ + int i; + + /* Check if mac has already been added as part of uc-list */ + for (i = 0; i < adapter->uc_macs; i++) { + if (ether_addr_equal((u8 *)&adapter->uc_list[i * ETH_ALEN], + mac)) { + /* mac already added, skip addition */ + adapter->pmac_id[0] = adapter->pmac_id[i + 1]; + return 0; + } + } + + return be_cmd_pmac_add(adapter, mac, adapter->if_handle, + &adapter->pmac_id[0], 0); +} + +static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id) +{ + int i; + + /* Skip deletion if the programmed mac is + * being used in uc-list + */ + for (i = 0; i < adapter->uc_macs; i++) { + if (adapter->pmac_id[i + 1] == pmac_id) + return; + } + be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0); +} + static int be_mac_addr_set(struct net_device *netdev, void *p) { struct be_adapter *adapter = netdev_priv(netdev); @@ -276,7 +308,7 @@ static int be_mac_addr_set(struct net_device *netdev, void *p) struct sockaddr *addr = p; int status; u8 mac[ETH_ALEN]; - u32 old_pmac_id = adapter->pmac_id[0], curr_pmac_id = 0; + u32 old_pmac_id = adapter->pmac_id[0]; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; @@ -297,23 +329,22 @@ static int be_mac_addr_set(struct net_device *netdev, void *p) * FILTMGMT privilege. This failure is OK, only if the PF programmed * the MAC for the VF. */ - status = be_cmd_pmac_add(adapter, (u8 *)addr->sa_data, - adapter->if_handle, &adapter->pmac_id[0], 0); + mutex_lock(&adapter->rx_filter_lock); + status = be_dev_mac_add(adapter, (u8 *)addr->sa_data); if (!status) { - curr_pmac_id = adapter->pmac_id[0]; /* Delete the old programmed MAC. This call may fail if the * old MAC was already deleted by the PF driver. */ if (adapter->pmac_id[0] != old_pmac_id) - be_cmd_pmac_del(adapter, adapter->if_handle, - old_pmac_id, 0); + be_dev_mac_del(adapter, old_pmac_id); } + mutex_unlock(&adapter->rx_filter_lock); /* Decide if the new MAC is successfully activated only after * querying the FW */ - status = be_cmd_get_active_mac(adapter, curr_pmac_id, mac, + status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac, adapter->if_handle, true, 0); if (status) goto err; @@ -1628,6 +1659,28 @@ static void be_clear_mc_list(struct be_adapter *adapter) adapter->mc_count = 0; } +static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx) +{ + if (ether_addr_equal((u8 *)&adapter->uc_list[uc_idx * ETH_ALEN], + adapter->netdev->dev_addr)) { + adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0]; + return 0; + } + + return be_cmd_pmac_add(adapter, + (u8 *)&adapter->uc_list[uc_idx * ETH_ALEN], + adapter->if_handle, + &adapter->pmac_id[uc_idx + 1], 0); +} + +static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id) +{ + if (pmac_id == adapter->pmac_id[0]) + return; + + be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0); +} + static void be_set_uc_list(struct be_adapter *adapter) { struct net_device *netdev = adapter->netdev; @@ -1668,13 +1721,10 @@ static void be_set_uc_list(struct be_adapter *adapter) be_clear_uc_promisc(adapter); for (i = 0; i < adapter->uc_macs; i++) - be_cmd_pmac_del(adapter, adapter->if_handle, - adapter->pmac_id[i + 1], 0); + be_uc_mac_del(adapter, adapter->pmac_id[i + 1]); for (i = 0; i < curr_uc_macs; i++) - be_cmd_pmac_add(adapter, adapter->uc_list[i].mac, - adapter->if_handle, - &adapter->pmac_id[i + 1], 0); + be_uc_mac_add(adapter, i); adapter->uc_macs = curr_uc_macs; adapter->update_uc_list = false; } @@ -1687,8 +1737,8 @@ static void be_clear_uc_list(struct be_adapter *adapter) __dev_uc_unsync(netdev, NULL); for (i = 0; i < adapter->uc_macs; i++) - be_cmd_pmac_del(adapter, adapter->if_handle, - adapter->pmac_id[i + 1], 0); + be_uc_mac_del(adapter, adapter->pmac_id[i + 1]); + adapter->uc_macs = 0; } @@ -3566,9 +3616,7 @@ static void be_rx_qs_destroy(struct be_adapter *adapter) static void be_disable_if_filters(struct be_adapter *adapter) { - be_cmd_pmac_del(adapter, adapter->if_handle, - adapter->pmac_id[0], 0); - + be_dev_mac_del(adapter, adapter->pmac_id[0]); be_clear_uc_list(adapter); be_clear_mc_list(adapter); @@ -3723,9 +3771,7 @@ static int be_enable_if_filters(struct be_adapter *adapter) /* For BE3 VFs, the PF programs the initial MAC address */ if (!(BEx_chip(adapter) && be_virtfn(adapter))) { - status = be_cmd_pmac_add(adapter, adapter->netdev->dev_addr, - adapter->if_handle, - &adapter->pmac_id[0], 0); + status = be_dev_mac_add(adapter, adapter->netdev->dev_addr); if (status) return status; } From c27ebf58517536c0006813007680b24db17def47 Mon Sep 17 00:00:00 2001 From: Suresh Reddy Date: Wed, 7 Sep 2016 19:57:53 +0530 Subject: [PATCH 0810/1715] be2net: Fix mac address collision in some configurations If the device mac address is updated using ndo_set_mac_address(), while the same mac address is already programmed, the driver does not detect this condition if its netdev->dev_addr has been changed. The driver tries to add the same mac address resulting in mac address collision error. This has been observed in bonding mode-5 configuration. To fix this, store the mac address configured in HW in the adapter structure. Use this to compare against the new address being updated to avoid collision. Signed-off-by: Suresh Reddy Signed-off-by: Sathya Perla Signed-off-by: Sriharsha Basavapatna Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be.h | 1 + drivers/net/ethernet/emulex/benet/be_main.c | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index eecf24e92fd3..30a26aa39a2b 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -693,6 +693,7 @@ struct be_adapter { u32 fat_dump_len; u16 serial_num[CNTL_SERIAL_NUM_WORDS]; u8 phy_state; /* state of sfp optics (functional, faulted, etc.,) */ + u8 dev_mac[ETH_ALEN]; u32 priv_flags; /* ethtool get/set_priv_flags() */ struct be_error_recovery error_recovery; }; diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index a1c9920b2452..34f63eff6e8a 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -316,7 +316,7 @@ static int be_mac_addr_set(struct net_device *netdev, void *p) /* Proceed further only if, User provided MAC is different * from active MAC */ - if (ether_addr_equal(addr->sa_data, netdev->dev_addr)) + if (ether_addr_equal(addr->sa_data, adapter->dev_mac)) return 0; /* if device is not running, copy MAC to netdev->dev_addr */ @@ -357,6 +357,7 @@ static int be_mac_addr_set(struct net_device *netdev, void *p) goto err; } done: + ether_addr_copy(adapter->dev_mac, addr->sa_data); ether_addr_copy(netdev->dev_addr, addr->sa_data); dev_info(dev, "MAC address changed to %pM\n", addr->sa_data); return 0; @@ -1662,7 +1663,7 @@ static void be_clear_mc_list(struct be_adapter *adapter) static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx) { if (ether_addr_equal((u8 *)&adapter->uc_list[uc_idx * ETH_ALEN], - adapter->netdev->dev_addr)) { + adapter->dev_mac)) { adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0]; return 0; } @@ -3774,6 +3775,7 @@ static int be_enable_if_filters(struct be_adapter *adapter) status = be_dev_mac_add(adapter, adapter->netdev->dev_addr); if (status) return status; + ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr); } if (adapter->vlans_added) From 368f2f137f5401f37d2acb42c4ca4e5867570495 Mon Sep 17 00:00:00 2001 From: Sriharsha Basavapatna Date: Wed, 7 Sep 2016 19:57:54 +0530 Subject: [PATCH 0811/1715] be2net: Update the driver version to 11.1.0.0 Signed-off-by: Sriharsha Basavapatna Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index 30a26aa39a2b..6cfa63a5e9b4 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -37,7 +37,7 @@ #include "be_hw.h" #include "be_roce.h" -#define DRV_VER "11.0.0.0" +#define DRV_VER "11.1.0.0" #define DRV_NAME "be2net" #define BE_NAME "Emulex BladeEngine2" #define BE3_NAME "Emulex BladeEngine3" From 0f76d2564469fd3a337de088f533364cef206130 Mon Sep 17 00:00:00 2001 From: "subashab@codeaurora.org" Date: Tue, 6 Sep 2016 18:09:31 -0600 Subject: [PATCH 0812/1715] net: xfrm: Change u32 sysctl entries to use proc_douintvec proc_dointvec limits the values to INT_MAX in u32 sysctl entries. proc_douintvec allows to write upto UINT_MAX. Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: David S. Miller --- net/xfrm/xfrm_sysctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/xfrm/xfrm_sysctl.c b/net/xfrm/xfrm_sysctl.c index 05a6e3d9c258..35a7e794ad04 100644 --- a/net/xfrm/xfrm_sysctl.c +++ b/net/xfrm/xfrm_sysctl.c @@ -17,13 +17,13 @@ static struct ctl_table xfrm_table[] = { .procname = "xfrm_aevent_etime", .maxlen = sizeof(u32), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_douintvec }, { .procname = "xfrm_aevent_rseqth", .maxlen = sizeof(u32), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_douintvec }, { .procname = "xfrm_larval_drop", From cf13258fd4cb86478dfcb7e2c93a0d844307abc6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 8 Sep 2016 11:10:11 +0100 Subject: [PATCH 0813/1715] rxrpc: Fix ASSERTCMP and ASSERTIFCMP to handle signed values Fix ASSERTCMP and ASSERTIFCMP to be able to handle signed values by casting both parameters to the type of the first before comparing. Without this, both values are cast to unsigned long, which means that checks for values less than zero don't work. The downside of this is that the state enum values in struct rxrpc_call and struct rxrpc_connection can't be bitfields as __typeof__ can't handle them. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index dbfb9ed17483..6dc3a59ce0f2 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -344,8 +344,8 @@ struct rxrpc_connection { unsigned long events; unsigned long idle_timestamp; /* Time at which last became idle */ spinlock_t state_lock; /* state-change lock */ - enum rxrpc_conn_cache_state cache_state : 8; - enum rxrpc_conn_proto_state state : 8; /* current state of connection */ + enum rxrpc_conn_cache_state cache_state; + enum rxrpc_conn_proto_state state; /* current state of connection */ u32 local_abort; /* local abort code */ u32 remote_abort; /* remote abort code */ int debug_id; /* debug ID for printks */ @@ -464,8 +464,8 @@ struct rxrpc_call { rwlock_t state_lock; /* lock for state transition */ u32 abort_code; /* Local/remote abort code */ int error; /* Local error incurred */ - enum rxrpc_call_state state : 8; /* current state of call */ - enum rxrpc_call_completion completion : 8; /* Call completion condition */ + enum rxrpc_call_state state; /* current state of call */ + enum rxrpc_call_completion completion; /* Call completion condition */ atomic_t usage; atomic_t skb_count; /* Outstanding packets on this call */ atomic_t sequence; /* Tx data packet sequence counter */ @@ -1014,11 +1014,12 @@ do { \ #define ASSERTCMP(X, OP, Y) \ do { \ - unsigned long _x = (unsigned long)(X); \ - unsigned long _y = (unsigned long)(Y); \ + __typeof__(X) _x = (X); \ + __typeof__(Y) _y = (__typeof__(X))(Y); \ if (unlikely(!(_x OP _y))) { \ - pr_err("Assertion failed - %lu(0x%lx) %s %lu(0x%lx) is false\n", \ - _x, _x, #OP, _y, _y); \ + pr_err("Assertion failed - %lu(0x%lx) %s %lu(0x%lx) is false\n", \ + (unsigned long)_x, (unsigned long)_x, #OP, \ + (unsigned long)_y, (unsigned long)_y); \ BUG(); \ } \ } while (0) @@ -1033,11 +1034,12 @@ do { \ #define ASSERTIFCMP(C, X, OP, Y) \ do { \ - unsigned long _x = (unsigned long)(X); \ - unsigned long _y = (unsigned long)(Y); \ + __typeof__(X) _x = (X); \ + __typeof__(Y) _y = (__typeof__(X))(Y); \ if (unlikely((C) && !(_x OP _y))) { \ pr_err("Assertion failed - %lu(0x%lx) %s %lu(0x%lx) is false\n", \ - _x, _x, #OP, _y, _y); \ + (unsigned long)_x, (unsigned long)_x, #OP, \ + (unsigned long)_y, (unsigned long)_y); \ BUG(); \ } \ } while (0) From 18f1387c7d7c6827b3ed6adf6ae20f65a58dc7b0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 8 Sep 2016 11:10:11 +0100 Subject: [PATCH 0814/1715] rxrpc: Update protocol definitions slightly Update the protocol definitions in include/rxrpc/packet.h slightly: (1) Get rid of RXRPC_PROCESS_MAXCALLS as it's redundant (same as RXRPC_MAXCALLS). (2) In struct rxrpc_jumbo_header, put _rsvd in a union with a field called cksum to match struct rxrpc_wire_header. (3) Provide RXRPC_JUMBO_SUBPKTLEN which is the total of the amount of data in a non-terminal subpacket plus the following secondary header for the next packet included in the jumbo packet. Signed-off-by: David Howells --- include/rxrpc/packet.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/include/rxrpc/packet.h b/include/rxrpc/packet.h index 3c6128e1fdbe..b0ae5c1a6ce6 100644 --- a/include/rxrpc/packet.h +++ b/include/rxrpc/packet.h @@ -34,8 +34,6 @@ struct rxrpc_wire_header { #define RXRPC_CID_INC (1 << RXRPC_CIDSHIFT) /* connection ID increment */ __be32 callNumber; /* call ID (0 for connection-level packets) */ -#define RXRPC_PROCESS_MAXCALLS (1<<2) /* maximum number of active calls per conn (power of 2) */ - __be32 seq; /* sequence number of pkt in call stream */ __be32 serial; /* serial number of pkt sent to network */ @@ -93,10 +91,14 @@ struct rxrpc_wire_header { struct rxrpc_jumbo_header { uint8_t flags; /* packet flags (as per rxrpc_header) */ uint8_t pad; - __be16 _rsvd; /* reserved (used by kerberos security as cksum) */ + union { + __be16 _rsvd; /* reserved */ + __be16 cksum; /* kerberos security checksum */ + }; }; #define RXRPC_JUMBO_DATALEN 1412 /* non-terminal jumbo packet data length */ +#define RXRPC_JUMBO_SUBPKTLEN (RXRPC_JUMBO_DATALEN + sizeof(struct rxrpc_jumbo_header)) /*****************************************************************************/ /* From de8d6c7401ae8f25db3788804c86887ad7347bee Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 8 Sep 2016 11:10:11 +0100 Subject: [PATCH 0815/1715] rxrpc: Convert rxrpc_local::services to an hlist Convert the rxrpc_local::services list to an hlist so that it can be accessed under RCU conditions more readily. Signed-off-by: David Howells --- net/rxrpc/af_rxrpc.c | 10 +++++----- net/rxrpc/ar-internal.h | 4 ++-- net/rxrpc/call_accept.c | 2 +- net/rxrpc/local_object.c | 4 ++-- net/rxrpc/security.c | 2 +- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 77a132abf140..f13cca1e973e 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -156,13 +156,13 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) if (rx->srx.srx_service) { write_lock_bh(&local->services_lock); - list_for_each_entry(prx, &local->services, listen_link) { + hlist_for_each_entry(prx, &local->services, listen_link) { if (prx->srx.srx_service == rx->srx.srx_service) goto service_in_use; } rx->local = local; - list_add_tail(&rx->listen_link, &local->services); + hlist_add_head_rcu(&rx->listen_link, &local->services); write_unlock_bh(&local->services_lock); rx->sk.sk_state = RXRPC_SERVER_BOUND; @@ -567,7 +567,7 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol, rx->family = protocol; rx->calls = RB_ROOT; - INIT_LIST_HEAD(&rx->listen_link); + INIT_HLIST_NODE(&rx->listen_link); INIT_LIST_HEAD(&rx->secureq); INIT_LIST_HEAD(&rx->acceptq); rwlock_init(&rx->call_lock); @@ -615,9 +615,9 @@ static int rxrpc_release_sock(struct sock *sk) ASSERTCMP(rx->listen_link.next, !=, LIST_POISON1); - if (!list_empty(&rx->listen_link)) { + if (!hlist_unhashed(&rx->listen_link)) { write_lock_bh(&rx->local->services_lock); - list_del(&rx->listen_link); + hlist_del_rcu(&rx->listen_link); write_unlock_bh(&rx->local->services_lock); } diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 6dc3a59ce0f2..fd438dc93ee9 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -71,7 +71,7 @@ struct rxrpc_sock { struct sock sk; rxrpc_notify_new_call_t notify_new_call; /* Func to notify of new call */ struct rxrpc_local *local; /* local endpoint */ - struct list_head listen_link; /* link in the local endpoint's listen list */ + struct hlist_node listen_link; /* link in the local endpoint's listen list */ struct list_head secureq; /* calls awaiting connection security clearance */ struct list_head acceptq; /* calls awaiting acceptance */ struct key *key; /* security for this socket */ @@ -186,7 +186,7 @@ struct rxrpc_local { struct list_head link; struct socket *socket; /* my UDP socket */ struct work_struct processor; - struct list_head services; /* services listening on this endpoint */ + struct hlist_head services; /* services listening on this endpoint */ struct rw_semaphore defrag_sem; /* control re-enablement of IP DF bit */ struct sk_buff_head accept_queue; /* incoming calls awaiting acceptance */ struct sk_buff_head reject_queue; /* packets awaiting rejection */ diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 879a964de80c..4c71efcf82ed 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -217,7 +217,7 @@ void rxrpc_accept_incoming_calls(struct rxrpc_local *local) /* get the socket providing the service */ read_lock_bh(&local->services_lock); - list_for_each_entry(rx, &local->services, listen_link) { + hlist_for_each_entry(rx, &local->services, listen_link) { if (rx->srx.srx_service == sp->hdr.serviceId && rx->sk.sk_state != RXRPC_CLOSE) goto found_service; diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index a753796fbe8f..610916f4ae34 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -75,7 +75,7 @@ static struct rxrpc_local *rxrpc_alloc_local(const struct sockaddr_rxrpc *srx) atomic_set(&local->usage, 1); INIT_LIST_HEAD(&local->link); INIT_WORK(&local->processor, rxrpc_local_processor); - INIT_LIST_HEAD(&local->services); + INIT_HLIST_HEAD(&local->services); init_rwsem(&local->defrag_sem); skb_queue_head_init(&local->accept_queue); skb_queue_head_init(&local->reject_queue); @@ -296,7 +296,7 @@ static void rxrpc_local_destroyer(struct rxrpc_local *local) mutex_unlock(&rxrpc_local_mutex); ASSERT(RB_EMPTY_ROOT(&local->client_conns)); - ASSERT(list_empty(&local->services)); + ASSERT(hlist_empty(&local->services)); if (socket) { local->socket = NULL; diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c index 814d285ff802..5d79d5a9c944 100644 --- a/net/rxrpc/security.c +++ b/net/rxrpc/security.c @@ -131,7 +131,7 @@ int rxrpc_init_server_conn_security(struct rxrpc_connection *conn) /* find the service */ read_lock_bh(&local->services_lock); - list_for_each_entry(rx, &local->services, listen_link) { + hlist_for_each_entry(rx, &local->services, listen_link) { if (rx->srx.srx_service == conn->params.service_id) goto found_service; } From 2ab27215ea27475a0b279732ba8a934bfab57ef0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 8 Sep 2016 11:10:12 +0100 Subject: [PATCH 0816/1715] rxrpc: Remove skb_count from struct rxrpc_call Remove the sk_buff count from the rxrpc_call struct as it's less useful once we stop queueing sk_buffs. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 10 +++------- net/rxrpc/ar-internal.h | 1 - net/rxrpc/call_object.c | 34 ++++++++++++---------------------- 3 files changed, 15 insertions(+), 30 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 85ee035774ae..6b06cf050bc0 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -18,16 +18,14 @@ TRACE_EVENT(rxrpc_call, TP_PROTO(struct rxrpc_call *call, enum rxrpc_call_trace op, - int usage, int nskb, - const void *where, const void *aux), + int usage, const void *where, const void *aux), - TP_ARGS(call, op, usage, nskb, where, aux), + TP_ARGS(call, op, usage, where, aux), TP_STRUCT__entry( __field(struct rxrpc_call *, call ) __field(int, op ) __field(int, usage ) - __field(int, nskb ) __field(const void *, where ) __field(const void *, aux ) ), @@ -36,16 +34,14 @@ TRACE_EVENT(rxrpc_call, __entry->call = call; __entry->op = op; __entry->usage = usage; - __entry->nskb = nskb; __entry->where = where; __entry->aux = aux; ), - TP_printk("c=%p %s u=%d s=%d p=%pSR a=%p", + TP_printk("c=%p %s u=%d sp=%pSR a=%p", __entry->call, rxrpc_call_traces[__entry->op], __entry->usage, - __entry->nskb, __entry->where, __entry->aux) ); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index fd438dc93ee9..027791261768 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -467,7 +467,6 @@ struct rxrpc_call { enum rxrpc_call_state state; /* current state of call */ enum rxrpc_call_completion completion; /* Call completion condition */ atomic_t usage; - atomic_t skb_count; /* Outstanding packets on this call */ atomic_t sequence; /* Tx data packet sequence counter */ u16 service_id; /* service ID */ u8 security_ix; /* Security type */ diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 9efd9b0b0bdf..f843397e03b6 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -232,9 +232,8 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, return call; } - trace_rxrpc_call(call, rxrpc_call_new_client, - atomic_read(&call->usage), 0, - here, (const void *)user_call_ID); + trace_rxrpc_call(call, 0, atomic_read(&call->usage), here, + (const void *)user_call_ID); /* Publish the call, even though it is incompletely set up as yet */ call->user_call_ID = user_call_ID; @@ -325,7 +324,7 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx, return ERR_PTR(-EBUSY); trace_rxrpc_call(candidate, rxrpc_call_new_service, - atomic_read(&candidate->usage), 0, here, NULL); + atomic_read(&candidate->usage), here, NULL); chan = sp->hdr.cid & RXRPC_CHANNELMASK; candidate->conn = conn; @@ -446,11 +445,10 @@ bool rxrpc_queue_call(struct rxrpc_call *call) { const void *here = __builtin_return_address(0); int n = __atomic_add_unless(&call->usage, 1, 0); - int m = atomic_read(&call->skb_count); if (n == 0) return false; if (rxrpc_queue_work(&call->processor)) - trace_rxrpc_call(call, rxrpc_call_queued, n + 1, m, here, NULL); + trace_rxrpc_call(call, rxrpc_call_queued, n + 1, here, NULL); else rxrpc_put_call(call, rxrpc_call_put_noqueue); return true; @@ -463,10 +461,9 @@ bool __rxrpc_queue_call(struct rxrpc_call *call) { const void *here = __builtin_return_address(0); int n = atomic_read(&call->usage); - int m = atomic_read(&call->skb_count); ASSERTCMP(n, >=, 1); if (rxrpc_queue_work(&call->processor)) - trace_rxrpc_call(call, rxrpc_call_queued_ref, n, m, here, NULL); + trace_rxrpc_call(call, rxrpc_call_queued_ref, n, here, NULL); else rxrpc_put_call(call, rxrpc_call_put_noqueue); return true; @@ -480,9 +477,8 @@ void rxrpc_see_call(struct rxrpc_call *call) const void *here = __builtin_return_address(0); if (call) { int n = atomic_read(&call->usage); - int m = atomic_read(&call->skb_count); - trace_rxrpc_call(call, rxrpc_call_seen, n, m, here, NULL); + trace_rxrpc_call(call, rxrpc_call_seen, n, here, NULL); } } @@ -493,9 +489,8 @@ void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op) { const void *here = __builtin_return_address(0); int n = atomic_inc_return(&call->usage); - int m = atomic_read(&call->skb_count); - trace_rxrpc_call(call, op, n, m, here, NULL); + trace_rxrpc_call(call, op, n, here, NULL); } /* @@ -505,9 +500,8 @@ void rxrpc_get_call_for_skb(struct rxrpc_call *call, struct sk_buff *skb) { const void *here = __builtin_return_address(0); int n = atomic_inc_return(&call->usage); - int m = atomic_inc_return(&call->skb_count); - trace_rxrpc_call(call, rxrpc_call_got_skb, n, m, here, skb); + trace_rxrpc_call(call, rxrpc_call_got_skb, n, here, skb); } /* @@ -642,17 +636,15 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op) { const void *here = __builtin_return_address(0); - int n, m; + int n; ASSERT(call != NULL); n = atomic_dec_return(&call->usage); - m = atomic_read(&call->skb_count); - trace_rxrpc_call(call, op, n, m, here, NULL); + trace_rxrpc_call(call, op, n, here, NULL); ASSERTCMP(n, >=, 0); if (n == 0) { _debug("call %d dead", call->debug_id); - WARN_ON(m != 0); rxrpc_cleanup_call(call); } } @@ -663,15 +655,13 @@ void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op) void rxrpc_put_call_for_skb(struct rxrpc_call *call, struct sk_buff *skb) { const void *here = __builtin_return_address(0); - int n, m; + int n; n = atomic_dec_return(&call->usage); - m = atomic_dec_return(&call->skb_count); - trace_rxrpc_call(call, rxrpc_call_put_skb, n, m, here, skb); + trace_rxrpc_call(call, rxrpc_call_put_skb, n, here, skb); ASSERTCMP(n, >=, 0); if (n == 0) { _debug("call %d dead", call->debug_id); - WARN_ON(m != 0); rxrpc_cleanup_call(call); } } From 49e19ec7d3499f79d2b3a45bb28418e89512fd7a Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 8 Sep 2016 11:10:12 +0100 Subject: [PATCH 0817/1715] rxrpc: Add tracepoints to record received packets and end of data_ready Add two tracepoints: (1) Record the RxRPC protocol header of packets retrieved from the UDP socket by the data_ready handler. (2) Record the outcome of the data_ready handler. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 38 ++++++++++++++++++++++++++++++++++++ net/rxrpc/input.c | 8 ++++++-- 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 6b06cf050bc0..ea3b10ed91a8 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -80,6 +80,44 @@ TRACE_EVENT(rxrpc_skb, __entry->where) ); +TRACE_EVENT(rxrpc_rx_packet, + TP_PROTO(struct rxrpc_skb_priv *sp), + + TP_ARGS(sp), + + TP_STRUCT__entry( + __field_struct(struct rxrpc_host_header, hdr ) + ), + + TP_fast_assign( + memcpy(&__entry->hdr, &sp->hdr, sizeof(__entry->hdr)); + ), + + TP_printk("%08x:%08x:%08x:%04x %08x %08x %02x %02x", + __entry->hdr.epoch, __entry->hdr.cid, + __entry->hdr.callNumber, __entry->hdr.serviceId, + __entry->hdr.serial, __entry->hdr.seq, + __entry->hdr.type, __entry->hdr.flags) + ); + +TRACE_EVENT(rxrpc_rx_done, + TP_PROTO(int result, int abort_code), + + TP_ARGS(result, abort_code), + + TP_STRUCT__entry( + __field(int, result ) + __field(int, abort_code ) + ), + + TP_fast_assign( + __entry->result = result; + __entry->abort_code = abort_code; + ), + + TP_printk("r=%d a=%d", __entry->result, __entry->abort_code) + ); + TRACE_EVENT(rxrpc_abort, TP_PROTO(const char *why, u32 cid, u32 call_id, rxrpc_seq_t seq, int abort_code, int error), diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 8e624109750a..6c4b7df05e95 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -683,6 +683,7 @@ void rxrpc_data_ready(struct sock *sk) /* dig out the RxRPC connection details */ if (rxrpc_extract_header(sp, skb) < 0) goto bad_message; + trace_rxrpc_rx_packet(sp); _net("Rx RxRPC %s ep=%x call=%x:%x", sp->hdr.flags & RXRPC_CLIENT_INITIATED ? "ToServer" : "ToClient", @@ -767,6 +768,7 @@ discard_unlock: out_unlock: rcu_read_unlock(); out: + trace_rxrpc_rx_done(0, 0); return; cant_route_call: @@ -780,7 +782,7 @@ cant_route_call: skb_queue_tail(&local->accept_queue, skb); rxrpc_queue_work(&local->processor); _leave(" [incoming]"); - return; + goto out; } skb->priority = RX_INVALID_OPERATION; } else { @@ -789,7 +791,7 @@ cant_route_call: if (sp->hdr.type != RXRPC_PACKET_TYPE_ABORT) { _debug("reject type %d",sp->hdr.type); - rxrpc_reject_packet(local, skb); + goto reject_packet; } else { rxrpc_free_skb(skb); } @@ -798,6 +800,8 @@ cant_route_call: bad_message: skb->priority = RX_PROTOCOL_ERROR; +reject_packet: + trace_rxrpc_rx_done(skb->mark, skb->priority); rxrpc_reject_packet(local, skb); _leave(" [badmsg]"); } From 00e907127e6f86d0f9b122d9b4347a8aa09a8b61 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 8 Sep 2016 11:10:12 +0100 Subject: [PATCH 0818/1715] rxrpc: Preallocate peers, conns and calls for incoming service requests Make it possible for the data_ready handler called from the UDP transport socket to completely instantiate an rxrpc_call structure and make it immediately live by preallocating all the memory it might need. The idea is to cut out the background thread usage as much as possible. [Note that the preallocated structs are not actually used in this patch - that will be done in a future patch.] If insufficient resources are available in the preallocation buffers, it will be possible to discard the DATA packet in the data_ready handler or schedule a BUSY packet without the need to schedule an attempt at allocation in a background thread. To this end: (1) Preallocate rxrpc_peer, rxrpc_connection and rxrpc_call structs to a maximum number each of the listen backlog size. The backlog size is limited to a maxmimum of 32. Only this many of each can be in the preallocation buffer. (2) For userspace sockets, the preallocation is charged initially by listen() and will be recharged by accepting or rejecting pending new incoming calls. (3) For kernel services {,re,dis}charging of the preallocation buffers is handled manually. Two notifier callbacks have to be provided before kernel_listen() is invoked: (a) An indication that a new call has been instantiated. This can be used to trigger background recharging. (b) An indication that a call is being discarded. This is used when the socket is being released. A function, rxrpc_kernel_charge_accept() is called by the kernel service to preallocate a single call. It should be passed the user ID to be used for that call and a callback to associate the rxrpc call with the kernel service's side of the ID. (4) Discard the preallocation when the socket is closed. (5) Temporarily bump the refcount on the call allocated in rxrpc_incoming_call() so that rxrpc_release_call() can ditch the preallocation ref on service calls unconditionally. This will no longer be necessary once the preallocation is used. Note that this does not yet control the number of active service calls on a client - that will come in a later patch. A future development would be to provide a setsockopt() call that allows a userspace server to manually charge the preallocation buffer. This would allow user call IDs to be provided in advance and the awkward manual accept stage to be bypassed. Signed-off-by: David Howells --- fs/afs/rxrpc.c | 71 +++++++++++- include/net/af_rxrpc.h | 10 +- net/rxrpc/af_rxrpc.c | 16 ++- net/rxrpc/ar-internal.h | 32 +++++- net/rxrpc/call_accept.c | 229 +++++++++++++++++++++++++++++++++++++++ net/rxrpc/call_object.c | 12 +- net/rxrpc/conn_object.c | 2 + net/rxrpc/conn_service.c | 24 ++++ net/rxrpc/input.c | 2 +- net/rxrpc/proc.c | 8 +- 10 files changed, 391 insertions(+), 15 deletions(-) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 53750dece80e..720ef05a24fe 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -18,6 +18,7 @@ struct socket *afs_socket; /* my RxRPC socket */ static struct workqueue_struct *afs_async_calls; +static struct afs_call *afs_spare_incoming_call; static atomic_t afs_outstanding_calls; static void afs_free_call(struct afs_call *); @@ -26,7 +27,8 @@ static int afs_wait_for_call_to_complete(struct afs_call *); static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned long); static int afs_dont_wait_for_call_to_complete(struct afs_call *); static void afs_process_async_call(struct work_struct *); -static void afs_rx_new_call(struct sock *); +static void afs_rx_new_call(struct sock *, struct rxrpc_call *, unsigned long); +static void afs_rx_discard_new_call(struct rxrpc_call *, unsigned long); static int afs_deliver_cm_op_id(struct afs_call *); /* synchronous call management */ @@ -54,8 +56,10 @@ static const struct afs_call_type afs_RXCMxxxx = { }; static void afs_collect_incoming_call(struct work_struct *); +static void afs_charge_preallocation(struct work_struct *); static DECLARE_WORK(afs_collect_incoming_call_work, afs_collect_incoming_call); +static DECLARE_WORK(afs_charge_preallocation_work, afs_charge_preallocation); static int afs_wait_atomic_t(atomic_t *p) { @@ -100,13 +104,15 @@ int afs_open_socket(void) if (ret < 0) goto error_2; - rxrpc_kernel_new_call_notification(socket, afs_rx_new_call); + rxrpc_kernel_new_call_notification(socket, afs_rx_new_call, + afs_rx_discard_new_call); ret = kernel_listen(socket, INT_MAX); if (ret < 0) goto error_2; afs_socket = socket; + afs_charge_preallocation(NULL); _leave(" = 0"); return 0; @@ -126,6 +132,12 @@ void afs_close_socket(void) { _enter(""); + if (afs_spare_incoming_call) { + atomic_inc(&afs_outstanding_calls); + afs_free_call(afs_spare_incoming_call); + afs_spare_incoming_call = NULL; + } + _debug("outstanding %u", atomic_read(&afs_outstanding_calls)); wait_on_atomic_t(&afs_outstanding_calls, afs_wait_atomic_t, TASK_UNINTERRUPTIBLE); @@ -635,12 +647,65 @@ static void afs_collect_incoming_call(struct work_struct *work) afs_free_call(call); } +static void afs_rx_attach(struct rxrpc_call *rxcall, unsigned long user_call_ID) +{ + struct afs_call *call = (struct afs_call *)user_call_ID; + + call->rxcall = rxcall; +} + +/* + * Charge the incoming call preallocation. + */ +static void afs_charge_preallocation(struct work_struct *work) +{ + struct afs_call *call = afs_spare_incoming_call; + + for (;;) { + if (!call) { + call = kzalloc(sizeof(struct afs_call), GFP_KERNEL); + if (!call) + break; + + INIT_WORK(&call->async_work, afs_process_async_call); + call->wait_mode = &afs_async_incoming_call; + call->type = &afs_RXCMxxxx; + init_waitqueue_head(&call->waitq); + call->state = AFS_CALL_AWAIT_OP_ID; + } + + if (rxrpc_kernel_charge_accept(afs_socket, + afs_wake_up_async_call, + afs_rx_attach, + (unsigned long)call, + GFP_KERNEL) < 0) + break; + call = NULL; + } + afs_spare_incoming_call = call; +} + +/* + * Discard a preallocated call when a socket is shut down. + */ +static void afs_rx_discard_new_call(struct rxrpc_call *rxcall, + unsigned long user_call_ID) +{ + struct afs_call *call = (struct afs_call *)user_call_ID; + + atomic_inc(&afs_outstanding_calls); + call->rxcall = NULL; + afs_free_call(call); +} + /* * Notification of an incoming call. */ -static void afs_rx_new_call(struct sock *sk) +static void afs_rx_new_call(struct sock *sk, struct rxrpc_call *rxcall, + unsigned long user_call_ID) { queue_work(afs_wq, &afs_collect_incoming_call_work); + queue_work(afs_wq, &afs_charge_preallocation_work); } /* diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index 08ed8729126c..9cf551be916b 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -21,10 +21,14 @@ struct rxrpc_call; typedef void (*rxrpc_notify_rx_t)(struct sock *, struct rxrpc_call *, unsigned long); -typedef void (*rxrpc_notify_new_call_t)(struct sock *); +typedef void (*rxrpc_notify_new_call_t)(struct sock *, struct rxrpc_call *, + unsigned long); +typedef void (*rxrpc_discard_new_call_t)(struct rxrpc_call *, unsigned long); +typedef void (*rxrpc_user_attach_call_t)(struct rxrpc_call *, unsigned long); void rxrpc_kernel_new_call_notification(struct socket *, - rxrpc_notify_new_call_t); + rxrpc_notify_new_call_t, + rxrpc_discard_new_call_t); struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *, struct sockaddr_rxrpc *, struct key *, @@ -43,5 +47,7 @@ struct rxrpc_call *rxrpc_kernel_accept_call(struct socket *, unsigned long, int rxrpc_kernel_reject_call(struct socket *); void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *, struct sockaddr_rxrpc *); +int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t, + rxrpc_user_attach_call_t, unsigned long, gfp_t); #endif /* _NET_RXRPC_H */ diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index f13cca1e973e..1e8cf3ded81f 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -193,7 +193,7 @@ static int rxrpc_listen(struct socket *sock, int backlog) { struct sock *sk = sock->sk; struct rxrpc_sock *rx = rxrpc_sk(sk); - unsigned int max; + unsigned int max, old; int ret; _enter("%p,%d", rx, backlog); @@ -212,9 +212,13 @@ static int rxrpc_listen(struct socket *sock, int backlog) backlog = max; else if (backlog < 0 || backlog > max) break; + old = sk->sk_max_ack_backlog; sk->sk_max_ack_backlog = backlog; - rx->sk.sk_state = RXRPC_SERVER_LISTENING; - ret = 0; + ret = rxrpc_service_prealloc(rx, GFP_KERNEL); + if (ret == 0) + rx->sk.sk_state = RXRPC_SERVER_LISTENING; + else + sk->sk_max_ack_backlog = old; break; default: ret = -EBUSY; @@ -303,16 +307,19 @@ EXPORT_SYMBOL(rxrpc_kernel_end_call); * rxrpc_kernel_new_call_notification - Get notifications of new calls * @sock: The socket to intercept received messages on * @notify_new_call: Function to be called when new calls appear + * @discard_new_call: Function to discard preallocated calls * * Allow a kernel service to be given notifications about new calls. */ void rxrpc_kernel_new_call_notification( struct socket *sock, - rxrpc_notify_new_call_t notify_new_call) + rxrpc_notify_new_call_t notify_new_call, + rxrpc_discard_new_call_t discard_new_call) { struct rxrpc_sock *rx = rxrpc_sk(sock->sk); rx->notify_new_call = notify_new_call; + rx->discard_new_call = discard_new_call; } EXPORT_SYMBOL(rxrpc_kernel_new_call_notification); @@ -622,6 +629,7 @@ static int rxrpc_release_sock(struct sock *sk) } /* try to flush out this socket */ + rxrpc_discard_prealloc(rx); rxrpc_release_calls_on_socket(rx); flush_workqueue(rxrpc_workqueue); rxrpc_purge_queue(&sk->sk_receive_queue); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 027791261768..45e1c269f90e 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -63,6 +63,27 @@ enum { RXRPC_CLOSE, /* socket is being closed */ }; +/* + * Service backlog preallocation. + * + * This contains circular buffers of preallocated peers, connections and calls + * for incoming service calls and their head and tail pointers. This allows + * calls to be set up in the data_ready handler, thereby avoiding the need to + * shuffle packets around so much. + */ +struct rxrpc_backlog { + unsigned short peer_backlog_head; + unsigned short peer_backlog_tail; + unsigned short conn_backlog_head; + unsigned short conn_backlog_tail; + unsigned short call_backlog_head; + unsigned short call_backlog_tail; +#define RXRPC_BACKLOG_MAX 32 + struct rxrpc_peer *peer_backlog[RXRPC_BACKLOG_MAX]; + struct rxrpc_connection *conn_backlog[RXRPC_BACKLOG_MAX]; + struct rxrpc_call *call_backlog[RXRPC_BACKLOG_MAX]; +}; + /* * RxRPC socket definition */ @@ -70,13 +91,15 @@ struct rxrpc_sock { /* WARNING: sk has to be the first member */ struct sock sk; rxrpc_notify_new_call_t notify_new_call; /* Func to notify of new call */ + rxrpc_discard_new_call_t discard_new_call; /* Func to discard a new call */ struct rxrpc_local *local; /* local endpoint */ struct hlist_node listen_link; /* link in the local endpoint's listen list */ struct list_head secureq; /* calls awaiting connection security clearance */ struct list_head acceptq; /* calls awaiting acceptance */ + struct rxrpc_backlog *backlog; /* Preallocation for services */ struct key *key; /* security for this socket */ struct key *securities; /* list of server security descriptors */ - struct rb_root calls; /* outstanding calls on this socket */ + struct rb_root calls; /* User ID -> call mapping */ unsigned long flags; #define RXRPC_SOCK_CONNECTED 0 /* connect_srx is set */ rwlock_t call_lock; /* lock for calls */ @@ -290,6 +313,7 @@ enum rxrpc_conn_cache_state { enum rxrpc_conn_proto_state { RXRPC_CONN_UNUSED, /* Connection not yet attempted */ RXRPC_CONN_CLIENT, /* Client connection */ + RXRPC_CONN_SERVICE_PREALLOC, /* Service connection preallocation */ RXRPC_CONN_SERVICE_UNSECURED, /* Service unsecured connection */ RXRPC_CONN_SERVICE_CHALLENGING, /* Service challenging for security */ RXRPC_CONN_SERVICE, /* Service secured connection */ @@ -408,6 +432,7 @@ enum rxrpc_call_state { RXRPC_CALL_CLIENT_AWAIT_REPLY, /* - client awaiting reply */ RXRPC_CALL_CLIENT_RECV_REPLY, /* - client receiving reply phase */ RXRPC_CALL_CLIENT_FINAL_ACK, /* - client sending final ACK phase */ + RXRPC_CALL_SERVER_PREALLOC, /* - service preallocation */ RXRPC_CALL_SERVER_SECURING, /* - server securing request connection */ RXRPC_CALL_SERVER_ACCEPTING, /* - server accepting request */ RXRPC_CALL_SERVER_RECV_REQUEST, /* - server receiving request */ @@ -534,6 +559,8 @@ extern struct workqueue_struct *rxrpc_workqueue; /* * call_accept.c */ +int rxrpc_service_prealloc(struct rxrpc_sock *, gfp_t); +void rxrpc_discard_prealloc(struct rxrpc_sock *); void rxrpc_accept_incoming_calls(struct rxrpc_local *); struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *, unsigned long, rxrpc_notify_rx_t); @@ -557,6 +584,7 @@ extern struct list_head rxrpc_calls; extern rwlock_t rxrpc_call_lock; struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long); +struct rxrpc_call *rxrpc_alloc_call(gfp_t); struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *, struct rxrpc_conn_parameters *, struct sockaddr_rxrpc *, @@ -573,6 +601,7 @@ void rxrpc_get_call(struct rxrpc_call *, enum rxrpc_call_trace); void rxrpc_put_call(struct rxrpc_call *, enum rxrpc_call_trace); void rxrpc_get_call_for_skb(struct rxrpc_call *, struct sk_buff *); void rxrpc_put_call_for_skb(struct rxrpc_call *, struct sk_buff *); +void rxrpc_cleanup_call(struct rxrpc_call *); void __exit rxrpc_destroy_all_calls(void); static inline bool rxrpc_is_service_call(const struct rxrpc_call *call) @@ -757,6 +786,7 @@ struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *, struct rxrpc_connection *rxrpc_incoming_connection(struct rxrpc_local *, struct sockaddr_rxrpc *, struct sk_buff *); +struct rxrpc_connection *rxrpc_prealloc_service_connection(gfp_t); void rxrpc_unpublish_service_conn(struct rxrpc_connection *); /* diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 4c71efcf82ed..cc7194e05a15 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -20,11 +20,209 @@ #include #include #include +#include #include #include #include #include "ar-internal.h" +/* + * Preallocate a single service call, connection and peer and, if possible, + * give them a user ID and attach the user's side of the ID to them. + */ +static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, + struct rxrpc_backlog *b, + rxrpc_notify_rx_t notify_rx, + rxrpc_user_attach_call_t user_attach_call, + unsigned long user_call_ID, gfp_t gfp) +{ + const void *here = __builtin_return_address(0); + struct rxrpc_call *call; + int max, tmp; + unsigned int size = RXRPC_BACKLOG_MAX; + unsigned int head, tail, call_head, call_tail; + + max = rx->sk.sk_max_ack_backlog; + tmp = rx->sk.sk_ack_backlog; + if (tmp >= max) { + _leave(" = -ENOBUFS [full %u]", max); + return -ENOBUFS; + } + max -= tmp; + + /* We don't need more conns and peers than we have calls, but on the + * other hand, we shouldn't ever use more peers than conns or conns + * than calls. + */ + call_head = b->call_backlog_head; + call_tail = READ_ONCE(b->call_backlog_tail); + tmp = CIRC_CNT(call_head, call_tail, size); + if (tmp >= max) { + _leave(" = -ENOBUFS [enough %u]", tmp); + return -ENOBUFS; + } + max = tmp + 1; + + head = b->peer_backlog_head; + tail = READ_ONCE(b->peer_backlog_tail); + if (CIRC_CNT(head, tail, size) < max) { + struct rxrpc_peer *peer = rxrpc_alloc_peer(rx->local, gfp); + if (!peer) + return -ENOMEM; + b->peer_backlog[head] = peer; + smp_store_release(&b->peer_backlog_head, + (head + 1) & (size - 1)); + } + + head = b->conn_backlog_head; + tail = READ_ONCE(b->conn_backlog_tail); + if (CIRC_CNT(head, tail, size) < max) { + struct rxrpc_connection *conn; + + conn = rxrpc_prealloc_service_connection(gfp); + if (!conn) + return -ENOMEM; + b->conn_backlog[head] = conn; + smp_store_release(&b->conn_backlog_head, + (head + 1) & (size - 1)); + } + + /* Now it gets complicated, because calls get registered with the + * socket here, particularly if a user ID is preassigned by the user. + */ + call = rxrpc_alloc_call(gfp); + if (!call) + return -ENOMEM; + call->flags |= (1 << RXRPC_CALL_IS_SERVICE); + call->state = RXRPC_CALL_SERVER_PREALLOC; + + trace_rxrpc_call(call, rxrpc_call_new_service, + atomic_read(&call->usage), + here, (const void *)user_call_ID); + + write_lock(&rx->call_lock); + if (user_attach_call) { + struct rxrpc_call *xcall; + struct rb_node *parent, **pp; + + /* Check the user ID isn't already in use */ + pp = &rx->calls.rb_node; + parent = NULL; + while (*pp) { + parent = *pp; + xcall = rb_entry(parent, struct rxrpc_call, sock_node); + if (user_call_ID < call->user_call_ID) + pp = &(*pp)->rb_left; + else if (user_call_ID > call->user_call_ID) + pp = &(*pp)->rb_right; + else + goto id_in_use; + } + + call->user_call_ID = user_call_ID; + call->notify_rx = notify_rx; + rxrpc_get_call(call, rxrpc_call_got); + user_attach_call(call, user_call_ID); + rxrpc_get_call(call, rxrpc_call_got_userid); + rb_link_node(&call->sock_node, parent, pp); + rb_insert_color(&call->sock_node, &rx->calls); + set_bit(RXRPC_CALL_HAS_USERID, &call->flags); + } + + write_unlock(&rx->call_lock); + + write_lock(&rxrpc_call_lock); + list_add_tail(&call->link, &rxrpc_calls); + write_unlock(&rxrpc_call_lock); + + b->call_backlog[call_head] = call; + smp_store_release(&b->call_backlog_head, (call_head + 1) & (size - 1)); + _leave(" = 0 [%d -> %lx]", call->debug_id, user_call_ID); + return 0; + +id_in_use: + write_unlock(&rx->call_lock); + rxrpc_cleanup_call(call); + _leave(" = -EBADSLT"); + return -EBADSLT; +} + +/* + * Preallocate sufficient service connections, calls and peers to cover the + * entire backlog of a socket. When a new call comes in, if we don't have + * sufficient of each available, the call gets rejected as busy or ignored. + * + * The backlog is replenished when a connection is accepted or rejected. + */ +int rxrpc_service_prealloc(struct rxrpc_sock *rx, gfp_t gfp) +{ + struct rxrpc_backlog *b = rx->backlog; + + if (!b) { + b = kzalloc(sizeof(struct rxrpc_backlog), gfp); + if (!b) + return -ENOMEM; + rx->backlog = b; + } + + if (rx->discard_new_call) + return 0; + + while (rxrpc_service_prealloc_one(rx, b, NULL, NULL, 0, gfp) == 0) + ; + + return 0; +} + +/* + * Discard the preallocation on a service. + */ +void rxrpc_discard_prealloc(struct rxrpc_sock *rx) +{ + struct rxrpc_backlog *b = rx->backlog; + unsigned int size = RXRPC_BACKLOG_MAX, head, tail; + + if (!b) + return; + rx->backlog = NULL; + + head = b->peer_backlog_head; + tail = b->peer_backlog_tail; + while (CIRC_CNT(head, tail, size) > 0) { + struct rxrpc_peer *peer = b->peer_backlog[tail]; + kfree(peer); + tail = (tail + 1) & (size - 1); + } + + head = b->conn_backlog_head; + tail = b->conn_backlog_tail; + while (CIRC_CNT(head, tail, size) > 0) { + struct rxrpc_connection *conn = b->conn_backlog[tail]; + write_lock(&rxrpc_connection_lock); + list_del(&conn->link); + list_del(&conn->proc_link); + write_unlock(&rxrpc_connection_lock); + kfree(conn); + tail = (tail + 1) & (size - 1); + } + + head = b->call_backlog_head; + tail = b->call_backlog_tail; + while (CIRC_CNT(head, tail, size) > 0) { + struct rxrpc_call *call = b->call_backlog[tail]; + if (rx->discard_new_call) { + _debug("discard %lx", call->user_call_ID); + rx->discard_new_call(call, call->user_call_ID); + } + rxrpc_call_completed(call); + rxrpc_release_call(rx, call); + rxrpc_put_call(call, rxrpc_call_put); + tail = (tail + 1) & (size - 1); + } + + kfree(b); +} + /* * generate a connection-level abort */ @@ -450,3 +648,34 @@ int rxrpc_kernel_reject_call(struct socket *sock) return ret; } EXPORT_SYMBOL(rxrpc_kernel_reject_call); + +/* + * rxrpc_kernel_charge_accept - Charge up socket with preallocated calls + * @sock: The socket on which to preallocate + * @notify_rx: Event notification function for the call + * @user_attach_call: Func to attach call to user_call_ID + * @user_call_ID: The tag to attach to the preallocated call + * @gfp: The allocation conditions. + * + * Charge up the socket with preallocated calls, each with a user ID. A + * function should be provided to effect the attachment from the user's side. + * The user is given a ref to hold on the call. + * + * Note that the call may be come connected before this function returns. + */ +int rxrpc_kernel_charge_accept(struct socket *sock, + rxrpc_notify_rx_t notify_rx, + rxrpc_user_attach_call_t user_attach_call, + unsigned long user_call_ID, gfp_t gfp) +{ + struct rxrpc_sock *rx = rxrpc_sk(sock->sk); + struct rxrpc_backlog *b = rx->backlog; + + if (sock->sk->sk_state == RXRPC_CLOSE) + return -ESHUTDOWN; + + return rxrpc_service_prealloc_one(rx, b, notify_rx, + user_attach_call, user_call_ID, + gfp); +} +EXPORT_SYMBOL(rxrpc_kernel_charge_accept); diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index f843397e03b6..d233adc9b5e5 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -31,6 +31,7 @@ const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = { [RXRPC_CALL_CLIENT_AWAIT_REPLY] = "ClAwtRpl", [RXRPC_CALL_CLIENT_RECV_REPLY] = "ClRcvRpl", [RXRPC_CALL_CLIENT_FINAL_ACK] = "ClFnlACK", + [RXRPC_CALL_SERVER_PREALLOC] = "SvPrealc", [RXRPC_CALL_SERVER_SECURING] = "SvSecure", [RXRPC_CALL_SERVER_ACCEPTING] = "SvAccept", [RXRPC_CALL_SERVER_RECV_REQUEST] = "SvRcvReq", @@ -71,7 +72,6 @@ DEFINE_RWLOCK(rxrpc_call_lock); static void rxrpc_call_life_expired(unsigned long _call); static void rxrpc_ack_time_expired(unsigned long _call); static void rxrpc_resend_time_expired(unsigned long _call); -static void rxrpc_cleanup_call(struct rxrpc_call *call); /* * find an extant server call @@ -113,7 +113,7 @@ found_extant_call: /* * allocate a new call */ -static struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) +struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) { struct rxrpc_call *call; @@ -392,6 +392,9 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx, if (call_id <= conn->channels[chan].call_counter) goto old_call; /* TODO: Just drop packet */ + /* Temporary: Mirror the backlog prealloc ref (TODO: use prealloc) */ + rxrpc_get_call(candidate, rxrpc_call_got); + /* make the call available */ _debug("new call"); call = candidate; @@ -596,6 +599,9 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) del_timer_sync(&call->ack_timer); del_timer_sync(&call->lifetimer); + /* We have to release the prealloc backlog ref */ + if (rxrpc_is_service_call(call)) + rxrpc_put_call(call, rxrpc_call_put); _leave(""); } @@ -682,7 +688,7 @@ static void rxrpc_rcu_destroy_call(struct rcu_head *rcu) /* * clean up a call */ -static void rxrpc_cleanup_call(struct rxrpc_call *call) +void rxrpc_cleanup_call(struct rxrpc_call *call) { _net("DESTROY CALL %d", call->debug_id); diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 9c6685b97e70..8da82e3aa00e 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -286,6 +286,8 @@ static void rxrpc_connection_reaper(struct work_struct *work) ASSERTCMP(atomic_read(&conn->usage), >, 0); if (likely(atomic_read(&conn->usage) > 1)) continue; + if (conn->state == RXRPC_CONN_SERVICE_PREALLOC) + continue; idle_timestamp = READ_ONCE(conn->idle_timestamp); _debug("reap CONN %d { u=%d,t=%ld }", diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c index 316a92107fee..189338a60457 100644 --- a/net/rxrpc/conn_service.c +++ b/net/rxrpc/conn_service.c @@ -118,6 +118,30 @@ replace_old_connection: goto conn_published; } +/* + * Preallocate a service connection. The connection is placed on the proc and + * reap lists so that we don't have to get the lock from BH context. + */ +struct rxrpc_connection *rxrpc_prealloc_service_connection(gfp_t gfp) +{ + struct rxrpc_connection *conn = rxrpc_alloc_connection(gfp); + + if (conn) { + /* We maintain an extra ref on the connection whilst it is on + * the rxrpc_connections list. + */ + conn->state = RXRPC_CONN_SERVICE_PREALLOC; + atomic_set(&conn->usage, 2); + + write_lock(&rxrpc_connection_lock); + list_add_tail(&conn->link, &rxrpc_connections); + list_add_tail(&conn->proc_link, &rxrpc_connection_proc_list); + write_unlock(&rxrpc_connection_lock); + } + + return conn; +} + /* * get a record of an incoming connection */ diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 6c4b7df05e95..5906579060cd 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -102,7 +102,7 @@ int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb, rx->notify_new_call) { spin_unlock_bh(&sk->sk_receive_queue.lock); skb_queue_tail(&call->knlrecv_queue, skb); - rx->notify_new_call(&rx->sk); + rx->notify_new_call(&rx->sk, NULL, 0); } else if (call->notify_rx) { spin_unlock_bh(&sk->sk_receive_queue.lock); skb_queue_tail(&call->knlrecv_queue, skb); diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index dfad23821a62..d529d1b4021c 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -17,6 +17,7 @@ static const char *const rxrpc_conn_states[RXRPC_CONN__NR_STATES] = { [RXRPC_CONN_UNUSED] = "Unused ", [RXRPC_CONN_CLIENT] = "Client ", + [RXRPC_CONN_SERVICE_PREALLOC] = "SvPrealc", [RXRPC_CONN_SERVICE_UNSECURED] = "SvUnsec ", [RXRPC_CONN_SERVICE_CHALLENGING] = "SvChall ", [RXRPC_CONN_SERVICE] = "SvSecure", @@ -156,6 +157,11 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v) } conn = list_entry(v, struct rxrpc_connection, proc_link); + if (conn->state == RXRPC_CONN_SERVICE_PREALLOC) { + strcpy(lbuff, "no_local"); + strcpy(rbuff, "no_connection"); + goto print; + } sprintf(lbuff, "%pI4:%u", &conn->params.local->srx.transport.sin.sin_addr, @@ -164,7 +170,7 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v) sprintf(rbuff, "%pI4:%u", &conn->params.peer->srx.transport.sin.sin_addr, ntohs(conn->params.peer->srx.transport.sin.sin_port)); - +print: seq_printf(seq, "UDP %-22.22s %-22.22s %4x %08x %s %3u" " %s %08x %08x %08x\n", From 248f219cb8bcbfbd7f132752d44afa2df7c241d1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 8 Sep 2016 11:10:12 +0100 Subject: [PATCH 0819/1715] rxrpc: Rewrite the data and ack handling code Rewrite the data and ack handling code such that: (1) Parsing of received ACK and ABORT packets and the distribution and the filing of DATA packets happens entirely within the data_ready context called from the UDP socket. This allows us to process and discard ACK and ABORT packets much more quickly (they're no longer stashed on a queue for a background thread to process). (2) We avoid calling skb_clone(), pskb_pull() and pskb_trim(). We instead keep track of the offset and length of the content of each packet in the sk_buff metadata. This means we don't do any allocation in the receive path. (3) Jumbo DATA packet parsing is now done in data_ready context. Rather than cloning the packet once for each subpacket and pulling/trimming it, we file the packet multiple times with an annotation for each indicating which subpacket is there. From that we can directly calculate the offset and length. (4) A call's receive queue can be accessed without taking locks (memory barriers do have to be used, though). (5) Incoming calls are set up from preallocated resources and immediately made live. They can than have packets queued upon them and ACKs generated. If insufficient resources exist, DATA packet #1 is given a BUSY reply and other DATA packets are discarded). (6) sk_buffs no longer take a ref on their parent call. To make this work, the following changes are made: (1) Each call's receive buffer is now a circular buffer of sk_buff pointers (rxtx_buffer) rather than a number of sk_buff_heads spread between the call and the socket. This permits each sk_buff to be in the buffer multiple times. The receive buffer is reused for the transmit buffer. (2) A circular buffer of annotations (rxtx_annotations) is kept parallel to the data buffer. Transmission phase annotations indicate whether a buffered packet has been ACK'd or not and whether it needs retransmission. Receive phase annotations indicate whether a slot holds a whole packet or a jumbo subpacket and, if the latter, which subpacket. They also note whether the packet has been decrypted in place. (3) DATA packet window tracking is much simplified. Each phase has just two numbers representing the window (rx_hard_ack/rx_top and tx_hard_ack/tx_top). The hard_ack number is the sequence number before base of the window, representing the last packet the other side says it has consumed. hard_ack starts from 0 and the first packet is sequence number 1. The top number is the sequence number of the highest-numbered packet residing in the buffer. Packets between hard_ack+1 and top are soft-ACK'd to indicate they've been received, but not yet consumed. Four macros, before(), before_eq(), after() and after_eq() are added to compare sequence numbers within the window. This allows for the top of the window to wrap when the hard-ack sequence number gets close to the limit. Two flags, RXRPC_CALL_RX_LAST and RXRPC_CALL_TX_LAST, are added also to indicate when rx_top and tx_top point at the packets with the LAST_PACKET bit set, indicating the end of the phase. (4) Calls are queued on the socket 'receive queue' rather than packets. This means that we don't need have to invent dummy packets to queue to indicate abnormal/terminal states and we don't have to keep metadata packets (such as ABORTs) around (5) The offset and length of a (sub)packet's content are now passed to the verify_packet security op. This is currently expected to decrypt the packet in place and validate it. However, there's now nowhere to store the revised offset and length of the actual data within the decrypted blob (there may be a header and padding to skip) because an sk_buff may represent multiple packets, so a locate_data security op is added to retrieve these details from the sk_buff content when needed. (6) recvmsg() now has to handle jumbo subpackets, where each subpacket is individually secured and needs to be individually decrypted. The code to do this is broken out into rxrpc_recvmsg_data() and shared with the kernel API. It now iterates over the call's receive buffer rather than walking the socket receive queue. Additional changes: (1) The timers are condensed to a single timer that is set for the soonest of three timeouts (delayed ACK generation, DATA retransmission and call lifespan). (2) Transmission of ACK and ABORT packets is effected immediately from process-context socket ops/kernel API calls that cause them instead of them being punted off to a background work item. The data_ready handler still has to defer to the background, though. (3) A shutdown op is added to the AF_RXRPC socket so that the AFS filesystem can shut down the socket and flush its own work items before closing the socket to deal with any in-progress service calls. Future additional changes that will need to be considered: (1) Make sure that a call doesn't hog the front of the queue by receiving data from the network as fast as userspace is consuming it to the exclusion of other calls. (2) Transmit delayed ACKs from within recvmsg() when we've consumed sufficiently more packets to avoid the background work item needing to run. Signed-off-by: David Howells --- fs/afs/rxrpc.c | 51 +- include/net/af_rxrpc.h | 3 - include/rxrpc/packet.h | 7 + net/rxrpc/af_rxrpc.c | 57 +- net/rxrpc/ar-internal.h | 177 ++--- net/rxrpc/call_accept.c | 474 ++++++------- net/rxrpc/call_event.c | 1361 ++++++-------------------------------- net/rxrpc/call_object.c | 529 ++++----------- net/rxrpc/conn_event.c | 137 +--- net/rxrpc/conn_object.c | 6 +- net/rxrpc/conn_service.c | 101 +-- net/rxrpc/input.c | 1152 ++++++++++++++++---------------- net/rxrpc/insecure.c | 13 +- net/rxrpc/local_event.c | 2 +- net/rxrpc/local_object.c | 7 - net/rxrpc/misc.c | 2 +- net/rxrpc/output.c | 125 +++- net/rxrpc/peer_event.c | 17 +- net/rxrpc/peer_object.c | 84 ++- net/rxrpc/recvmsg.c | 798 ++++++++++++---------- net/rxrpc/rxkad.c | 108 ++- net/rxrpc/security.c | 10 +- net/rxrpc/sendmsg.c | 126 ++-- net/rxrpc/skbuff.c | 127 ---- 24 files changed, 2065 insertions(+), 3409 deletions(-) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 720ef05a24fe..59bdaa7527b6 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -55,10 +55,8 @@ static const struct afs_call_type afs_RXCMxxxx = { .abort_to_error = afs_abort_to_error, }; -static void afs_collect_incoming_call(struct work_struct *); static void afs_charge_preallocation(struct work_struct *); -static DECLARE_WORK(afs_collect_incoming_call_work, afs_collect_incoming_call); static DECLARE_WORK(afs_charge_preallocation_work, afs_charge_preallocation); static int afs_wait_atomic_t(atomic_t *p) @@ -143,6 +141,8 @@ void afs_close_socket(void) TASK_UNINTERRUPTIBLE); _debug("no outstanding calls"); + flush_workqueue(afs_async_calls); + kernel_sock_shutdown(afs_socket, SHUT_RDWR); flush_workqueue(afs_async_calls); sock_release(afs_socket); @@ -602,51 +602,6 @@ static void afs_process_async_call(struct work_struct *work) _leave(""); } -/* - * accept the backlog of incoming calls - */ -static void afs_collect_incoming_call(struct work_struct *work) -{ - struct rxrpc_call *rxcall; - struct afs_call *call = NULL; - - _enter(""); - - do { - if (!call) { - call = kzalloc(sizeof(struct afs_call), GFP_KERNEL); - if (!call) { - rxrpc_kernel_reject_call(afs_socket); - return; - } - - INIT_WORK(&call->async_work, afs_process_async_call); - call->wait_mode = &afs_async_incoming_call; - call->type = &afs_RXCMxxxx; - init_waitqueue_head(&call->waitq); - call->state = AFS_CALL_AWAIT_OP_ID; - - _debug("CALL %p{%s} [%d]", - call, call->type->name, - atomic_read(&afs_outstanding_calls)); - atomic_inc(&afs_outstanding_calls); - } - - rxcall = rxrpc_kernel_accept_call(afs_socket, - (unsigned long)call, - afs_wake_up_async_call); - if (!IS_ERR(rxcall)) { - call->rxcall = rxcall; - call->need_attention = true; - queue_work(afs_async_calls, &call->async_work); - call = NULL; - } - } while (!call); - - if (call) - afs_free_call(call); -} - static void afs_rx_attach(struct rxrpc_call *rxcall, unsigned long user_call_ID) { struct afs_call *call = (struct afs_call *)user_call_ID; @@ -704,7 +659,7 @@ static void afs_rx_discard_new_call(struct rxrpc_call *rxcall, static void afs_rx_new_call(struct sock *sk, struct rxrpc_call *rxcall, unsigned long user_call_ID) { - queue_work(afs_wq, &afs_collect_incoming_call_work); + atomic_inc(&afs_outstanding_calls); queue_work(afs_wq, &afs_charge_preallocation_work); } diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index 9cf551be916b..1061a472a3e3 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -42,9 +42,6 @@ int rxrpc_kernel_recv_data(struct socket *, struct rxrpc_call *, void rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *, u32, int, const char *); void rxrpc_kernel_end_call(struct socket *, struct rxrpc_call *); -struct rxrpc_call *rxrpc_kernel_accept_call(struct socket *, unsigned long, - rxrpc_notify_rx_t); -int rxrpc_kernel_reject_call(struct socket *); void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *, struct sockaddr_rxrpc *); int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t, diff --git a/include/rxrpc/packet.h b/include/rxrpc/packet.h index b0ae5c1a6ce6..fd6eb3a60a8c 100644 --- a/include/rxrpc/packet.h +++ b/include/rxrpc/packet.h @@ -133,6 +133,13 @@ struct rxrpc_ackpacket { } __packed; +/* Some ACKs refer to specific packets and some are general and can be updated. */ +#define RXRPC_ACK_UPDATEABLE ((1 << RXRPC_ACK_REQUESTED) | \ + (1 << RXRPC_ACK_PING_RESPONSE) | \ + (1 << RXRPC_ACK_DELAY) | \ + (1 << RXRPC_ACK_IDLE)) + + /* * ACK packets can have a further piece of information tagged on the end */ diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 1e8cf3ded81f..caa226dd436e 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -155,7 +155,7 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) } if (rx->srx.srx_service) { - write_lock_bh(&local->services_lock); + write_lock(&local->services_lock); hlist_for_each_entry(prx, &local->services, listen_link) { if (prx->srx.srx_service == rx->srx.srx_service) goto service_in_use; @@ -163,7 +163,7 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) rx->local = local; hlist_add_head_rcu(&rx->listen_link, &local->services); - write_unlock_bh(&local->services_lock); + write_unlock(&local->services_lock); rx->sk.sk_state = RXRPC_SERVER_BOUND; } else { @@ -176,7 +176,7 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) return 0; service_in_use: - write_unlock_bh(&local->services_lock); + write_unlock(&local->services_lock); rxrpc_put_local(local); ret = -EADDRINUSE; error_unlock: @@ -515,15 +515,16 @@ error: static unsigned int rxrpc_poll(struct file *file, struct socket *sock, poll_table *wait) { - unsigned int mask; struct sock *sk = sock->sk; + struct rxrpc_sock *rx = rxrpc_sk(sk); + unsigned int mask; sock_poll_wait(file, sk_sleep(sk), wait); mask = 0; /* the socket is readable if there are any messages waiting on the Rx * queue */ - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!list_empty(&rx->recvmsg_q)) mask |= POLLIN | POLLRDNORM; /* the socket is writable if there is space to add new data to the @@ -575,8 +576,11 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol, rx->calls = RB_ROOT; INIT_HLIST_NODE(&rx->listen_link); - INIT_LIST_HEAD(&rx->secureq); - INIT_LIST_HEAD(&rx->acceptq); + spin_lock_init(&rx->incoming_lock); + INIT_LIST_HEAD(&rx->sock_calls); + INIT_LIST_HEAD(&rx->to_be_accepted); + INIT_LIST_HEAD(&rx->recvmsg_q); + rwlock_init(&rx->recvmsg_lock); rwlock_init(&rx->call_lock); memset(&rx->srx, 0, sizeof(rx->srx)); @@ -584,6 +588,39 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol, return 0; } +/* + * Kill all the calls on a socket and shut it down. + */ +static int rxrpc_shutdown(struct socket *sock, int flags) +{ + struct sock *sk = sock->sk; + struct rxrpc_sock *rx = rxrpc_sk(sk); + int ret = 0; + + _enter("%p,%d", sk, flags); + + if (flags != SHUT_RDWR) + return -EOPNOTSUPP; + if (sk->sk_state == RXRPC_CLOSE) + return -ESHUTDOWN; + + lock_sock(sk); + + spin_lock_bh(&sk->sk_receive_queue.lock); + if (sk->sk_state < RXRPC_CLOSE) { + sk->sk_state = RXRPC_CLOSE; + sk->sk_shutdown = SHUTDOWN_MASK; + } else { + ret = -ESHUTDOWN; + } + spin_unlock_bh(&sk->sk_receive_queue.lock); + + rxrpc_discard_prealloc(rx); + + release_sock(sk); + return ret; +} + /* * RxRPC socket destructor */ @@ -623,9 +660,9 @@ static int rxrpc_release_sock(struct sock *sk) ASSERTCMP(rx->listen_link.next, !=, LIST_POISON1); if (!hlist_unhashed(&rx->listen_link)) { - write_lock_bh(&rx->local->services_lock); + write_lock(&rx->local->services_lock); hlist_del_rcu(&rx->listen_link); - write_unlock_bh(&rx->local->services_lock); + write_unlock(&rx->local->services_lock); } /* try to flush out this socket */ @@ -678,7 +715,7 @@ static const struct proto_ops rxrpc_rpc_ops = { .poll = rxrpc_poll, .ioctl = sock_no_ioctl, .listen = rxrpc_listen, - .shutdown = sock_no_shutdown, + .shutdown = rxrpc_shutdown, .setsockopt = rxrpc_setsockopt, .getsockopt = sock_no_getsockopt, .sendmsg = rxrpc_sendmsg, diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 45e1c269f90e..b1cb79ec4e96 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -94,9 +94,12 @@ struct rxrpc_sock { rxrpc_discard_new_call_t discard_new_call; /* Func to discard a new call */ struct rxrpc_local *local; /* local endpoint */ struct hlist_node listen_link; /* link in the local endpoint's listen list */ - struct list_head secureq; /* calls awaiting connection security clearance */ - struct list_head acceptq; /* calls awaiting acceptance */ struct rxrpc_backlog *backlog; /* Preallocation for services */ + spinlock_t incoming_lock; /* Incoming call vs service shutdown lock */ + struct list_head sock_calls; /* List of calls owned by this socket */ + struct list_head to_be_accepted; /* calls awaiting acceptance */ + struct list_head recvmsg_q; /* Calls awaiting recvmsg's attention */ + rwlock_t recvmsg_lock; /* Lock for recvmsg_q */ struct key *key; /* security for this socket */ struct key *securities; /* list of server security descriptors */ struct rb_root calls; /* User ID -> call mapping */ @@ -138,13 +141,16 @@ struct rxrpc_host_header { * - max 48 bytes (struct sk_buff::cb) */ struct rxrpc_skb_priv { - struct rxrpc_call *call; /* call with which associated */ - unsigned long resend_at; /* time in jiffies at which to resend */ + union { + unsigned long resend_at; /* time in jiffies at which to resend */ + struct { + u8 nr_jumbo; /* Number of jumbo subpackets */ + }; + }; union { unsigned int offset; /* offset into buffer of next read */ int remain; /* amount of space remaining for next write */ u32 error; /* network error code */ - bool need_resend; /* T if needs resending */ }; struct rxrpc_host_header hdr; /* RxRPC packet header from this packet */ @@ -179,7 +185,11 @@ struct rxrpc_security { /* verify the security on a received packet */ int (*verify_packet)(struct rxrpc_call *, struct sk_buff *, - rxrpc_seq_t, u16); + unsigned int, unsigned int, rxrpc_seq_t, u16); + + /* Locate the data in a received packet that has been verified. */ + void (*locate_data)(struct rxrpc_call *, struct sk_buff *, + unsigned int *, unsigned int *); /* issue a challenge */ int (*issue_challenge)(struct rxrpc_connection *); @@ -211,7 +221,6 @@ struct rxrpc_local { struct work_struct processor; struct hlist_head services; /* services listening on this endpoint */ struct rw_semaphore defrag_sem; /* control re-enablement of IP DF bit */ - struct sk_buff_head accept_queue; /* incoming calls awaiting acceptance */ struct sk_buff_head reject_queue; /* packets awaiting rejection */ struct sk_buff_head event_queue; /* endpoint event packets awaiting processing */ struct rb_root client_conns; /* Client connections by socket params */ @@ -388,38 +397,21 @@ struct rxrpc_connection { */ enum rxrpc_call_flag { RXRPC_CALL_RELEASED, /* call has been released - no more message to userspace */ - RXRPC_CALL_TERMINAL_MSG, /* call has given the socket its final message */ - RXRPC_CALL_RCVD_LAST, /* all packets received */ - RXRPC_CALL_RUN_RTIMER, /* Tx resend timer started */ - RXRPC_CALL_TX_SOFT_ACK, /* sent some soft ACKs */ - RXRPC_CALL_INIT_ACCEPT, /* acceptance was initiated */ RXRPC_CALL_HAS_USERID, /* has a user ID attached */ - RXRPC_CALL_EXPECT_OOS, /* expect out of sequence packets */ RXRPC_CALL_IS_SERVICE, /* Call is service call */ RXRPC_CALL_EXPOSED, /* The call was exposed to the world */ - RXRPC_CALL_RX_NO_MORE, /* Don't indicate MSG_MORE from recvmsg() */ + RXRPC_CALL_RX_LAST, /* Received the last packet (at rxtx_top) */ + RXRPC_CALL_TX_LAST, /* Last packet in Tx buffer (at rxtx_top) */ }; /* * Events that can be raised on a call. */ enum rxrpc_call_event { - RXRPC_CALL_EV_RCVD_ACKALL, /* ACKALL or reply received */ - RXRPC_CALL_EV_RCVD_BUSY, /* busy packet received */ - RXRPC_CALL_EV_RCVD_ABORT, /* abort packet received */ - RXRPC_CALL_EV_RCVD_ERROR, /* network error received */ - RXRPC_CALL_EV_ACK_FINAL, /* need to generate final ACK (and release call) */ RXRPC_CALL_EV_ACK, /* need to generate ACK */ - RXRPC_CALL_EV_REJECT_BUSY, /* need to generate busy message */ RXRPC_CALL_EV_ABORT, /* need to generate abort */ - RXRPC_CALL_EV_CONN_ABORT, /* local connection abort generated */ - RXRPC_CALL_EV_RESEND_TIMER, /* Tx resend timer expired */ + RXRPC_CALL_EV_TIMER, /* Timer expired */ RXRPC_CALL_EV_RESEND, /* Tx resend required */ - RXRPC_CALL_EV_DRAIN_RX_OOS, /* drain the Rx out of sequence queue */ - RXRPC_CALL_EV_LIFE_TIMER, /* call's lifetimer ran out */ - RXRPC_CALL_EV_ACCEPTED, /* incoming call accepted by userspace app */ - RXRPC_CALL_EV_SECURED, /* incoming call's connection is now secure */ - RXRPC_CALL_EV_POST_ACCEPT, /* need to post an "accept?" message to the app */ }; /* @@ -431,7 +423,6 @@ enum rxrpc_call_state { RXRPC_CALL_CLIENT_SEND_REQUEST, /* - client sending request phase */ RXRPC_CALL_CLIENT_AWAIT_REPLY, /* - client awaiting reply */ RXRPC_CALL_CLIENT_RECV_REPLY, /* - client receiving reply phase */ - RXRPC_CALL_CLIENT_FINAL_ACK, /* - client sending final ACK phase */ RXRPC_CALL_SERVER_PREALLOC, /* - service preallocation */ RXRPC_CALL_SERVER_SECURING, /* - server securing request connection */ RXRPC_CALL_SERVER_ACCEPTING, /* - server accepting request */ @@ -448,7 +439,6 @@ enum rxrpc_call_state { */ enum rxrpc_call_completion { RXRPC_CALL_SUCCEEDED, /* - Normal termination */ - RXRPC_CALL_SERVER_BUSY, /* - call rejected by busy server */ RXRPC_CALL_REMOTELY_ABORTED, /* - call aborted by peer */ RXRPC_CALL_LOCALLY_ABORTED, /* - call aborted locally on error or close */ RXRPC_CALL_LOCAL_ERROR, /* - call failed due to local error */ @@ -465,24 +455,23 @@ struct rxrpc_call { struct rxrpc_connection *conn; /* connection carrying call */ struct rxrpc_peer *peer; /* Peer record for remote address */ struct rxrpc_sock __rcu *socket; /* socket responsible */ - struct timer_list lifetimer; /* lifetime remaining on call */ - struct timer_list ack_timer; /* ACK generation timer */ - struct timer_list resend_timer; /* Tx resend timer */ - struct work_struct processor; /* packet processor and ACK generator */ + unsigned long ack_at; /* When deferred ACK needs to happen */ + unsigned long resend_at; /* When next resend needs to happen */ + unsigned long expire_at; /* When the call times out */ + struct timer_list timer; /* Combined event timer */ + struct work_struct processor; /* Event processor */ rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */ struct list_head link; /* link in master call list */ struct list_head chan_wait_link; /* Link in conn->waiting_calls */ struct hlist_node error_link; /* link in error distribution list */ - struct list_head accept_link; /* calls awaiting acceptance */ - struct rb_node sock_node; /* node in socket call tree */ - struct sk_buff_head rx_queue; /* received packets */ - struct sk_buff_head rx_oos_queue; /* packets received out of sequence */ - struct sk_buff_head knlrecv_queue; /* Queue for kernel_recv [TODO: replace this] */ + struct list_head accept_link; /* Link in rx->acceptq */ + struct list_head recvmsg_link; /* Link in rx->recvmsg_q */ + struct list_head sock_link; /* Link in rx->sock_calls */ + struct rb_node sock_node; /* Node in rx->calls */ struct sk_buff *tx_pending; /* Tx socket buffer being filled */ wait_queue_head_t waitq; /* Wait queue for channel or Tx */ __be32 crypto_buf[2]; /* Temporary packet crypto buffer */ unsigned long user_call_ID; /* user-defined call ID */ - unsigned long creation_jif; /* time of call creation */ unsigned long flags; unsigned long events; spinlock_t lock; @@ -492,40 +481,55 @@ struct rxrpc_call { enum rxrpc_call_state state; /* current state of call */ enum rxrpc_call_completion completion; /* Call completion condition */ atomic_t usage; - atomic_t sequence; /* Tx data packet sequence counter */ u16 service_id; /* service ID */ u8 security_ix; /* Security type */ u32 call_id; /* call ID on connection */ u32 cid; /* connection ID plus channel index */ int debug_id; /* debug ID for printks */ - /* transmission-phase ACK management */ - u8 acks_head; /* offset into window of first entry */ - u8 acks_tail; /* offset into window of last entry */ - u8 acks_winsz; /* size of un-ACK'd window */ - u8 acks_unacked; /* lowest unacked packet in last ACK received */ - int acks_latest; /* serial number of latest ACK received */ - rxrpc_seq_t acks_hard; /* highest definitively ACK'd msg seq */ - unsigned long *acks_window; /* sent packet window - * - elements are pointers with LSB set if ACK'd + /* Rx/Tx circular buffer, depending on phase. + * + * In the Rx phase, packets are annotated with 0 or the number of the + * segment of a jumbo packet each buffer refers to. There can be up to + * 47 segments in a maximum-size UDP packet. + * + * In the Tx phase, packets are annotated with which buffers have been + * acked. + */ +#define RXRPC_RXTX_BUFF_SIZE 64 +#define RXRPC_RXTX_BUFF_MASK (RXRPC_RXTX_BUFF_SIZE - 1) + struct sk_buff **rxtx_buffer; + u8 *rxtx_annotations; +#define RXRPC_TX_ANNO_ACK 0 +#define RXRPC_TX_ANNO_UNACK 1 +#define RXRPC_TX_ANNO_NAK 2 +#define RXRPC_TX_ANNO_RETRANS 3 +#define RXRPC_RX_ANNO_JUMBO 0x3f /* Jumbo subpacket number + 1 if not zero */ +#define RXRPC_RX_ANNO_JLAST 0x40 /* Set if last element of a jumbo packet */ +#define RXRPC_RX_ANNO_VERIFIED 0x80 /* Set if verified and decrypted */ + rxrpc_seq_t tx_hard_ack; /* Dead slot in buffer; the first transmitted but + * not hard-ACK'd packet follows this. */ + rxrpc_seq_t tx_top; /* Highest Tx slot allocated. */ + rxrpc_seq_t rx_hard_ack; /* Dead slot in buffer; the first received but not + * consumed packet follows this. + */ + rxrpc_seq_t rx_top; /* Highest Rx slot allocated. */ + rxrpc_seq_t rx_expect_next; /* Expected next packet sequence number */ + u8 rx_winsize; /* Size of Rx window */ + u8 tx_winsize; /* Maximum size of Tx window */ + u8 nr_jumbo_dup; /* Number of jumbo duplicates */ /* receive-phase ACK management */ - rxrpc_seq_t rx_data_expect; /* next data seq ID expected to be received */ - rxrpc_seq_t rx_data_post; /* next data seq ID expected to be posted */ - rxrpc_seq_t rx_data_recv; /* last data seq ID encountered by recvmsg */ - rxrpc_seq_t rx_data_eaten; /* last data seq ID consumed by recvmsg */ - rxrpc_seq_t rx_first_oos; /* first packet in rx_oos_queue (or 0) */ - rxrpc_seq_t ackr_win_top; /* top of ACK window (rx_data_eaten is bottom) */ - rxrpc_seq_t ackr_prev_seq; /* previous sequence number received */ u8 ackr_reason; /* reason to ACK */ u16 ackr_skew; /* skew on packet being ACK'd */ rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */ - atomic_t ackr_not_idle; /* number of packets in Rx queue */ + rxrpc_seq_t ackr_prev_seq; /* previous sequence number received */ + unsigned short rx_pkt_offset; /* Current recvmsg packet offset */ + unsigned short rx_pkt_len; /* Current recvmsg packet len */ - /* received packet records, 1 bit per record */ -#define RXRPC_ACKR_WINDOW_ASZ DIV_ROUND_UP(RXRPC_MAXACKS, BITS_PER_LONG) - unsigned long ackr_window[RXRPC_ACKR_WINDOW_ASZ + 1]; + /* transmission-phase ACK management */ + rxrpc_serial_t acks_latest; /* serial number of latest ACK received */ }; enum rxrpc_call_trace { @@ -535,10 +539,8 @@ enum rxrpc_call_trace { rxrpc_call_queued_ref, rxrpc_call_seen, rxrpc_call_got, - rxrpc_call_got_skb, rxrpc_call_got_userid, rxrpc_call_put, - rxrpc_call_put_skb, rxrpc_call_put_userid, rxrpc_call_put_noqueue, rxrpc_call__nr_trace @@ -561,6 +563,9 @@ extern struct workqueue_struct *rxrpc_workqueue; */ int rxrpc_service_prealloc(struct rxrpc_sock *, gfp_t); void rxrpc_discard_prealloc(struct rxrpc_sock *); +struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *, + struct rxrpc_connection *, + struct sk_buff *); void rxrpc_accept_incoming_calls(struct rxrpc_local *); struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *, unsigned long, rxrpc_notify_rx_t); @@ -569,8 +574,7 @@ int rxrpc_reject_call(struct rxrpc_sock *); /* * call_event.c */ -void __rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool); -void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool); +void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool); void rxrpc_process_call(struct work_struct *); /* @@ -589,9 +593,8 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *, struct rxrpc_conn_parameters *, struct sockaddr_rxrpc *, unsigned long, gfp_t); -struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *, - struct rxrpc_connection *, - struct sk_buff *); +void rxrpc_incoming_call(struct rxrpc_sock *, struct rxrpc_call *, + struct sk_buff *); void rxrpc_release_call(struct rxrpc_sock *, struct rxrpc_call *); void rxrpc_release_calls_on_socket(struct rxrpc_sock *); bool __rxrpc_queue_call(struct rxrpc_call *); @@ -599,8 +602,6 @@ bool rxrpc_queue_call(struct rxrpc_call *); void rxrpc_see_call(struct rxrpc_call *); void rxrpc_get_call(struct rxrpc_call *, enum rxrpc_call_trace); void rxrpc_put_call(struct rxrpc_call *, enum rxrpc_call_trace); -void rxrpc_get_call_for_skb(struct rxrpc_call *, struct sk_buff *); -void rxrpc_put_call_for_skb(struct rxrpc_call *, struct sk_buff *); void rxrpc_cleanup_call(struct rxrpc_call *); void __exit rxrpc_destroy_all_calls(void); @@ -672,13 +673,8 @@ static inline bool __rxrpc_abort_call(const char *why, struct rxrpc_call *call, { trace_rxrpc_abort(why, call->cid, call->call_id, seq, abort_code, error); - if (__rxrpc_set_call_completion(call, - RXRPC_CALL_LOCALLY_ABORTED, - abort_code, error)) { - set_bit(RXRPC_CALL_EV_ABORT, &call->events); - return true; - } - return false; + return __rxrpc_set_call_completion(call, RXRPC_CALL_LOCALLY_ABORTED, + abort_code, error); } static inline bool rxrpc_abort_call(const char *why, struct rxrpc_call *call, @@ -713,8 +709,6 @@ void __exit rxrpc_destroy_all_client_connections(void); * conn_event.c */ void rxrpc_process_connection(struct work_struct *); -void rxrpc_reject_packet(struct rxrpc_local *, struct sk_buff *); -void rxrpc_reject_packets(struct rxrpc_local *); /* * conn_object.c @@ -783,18 +777,14 @@ static inline bool rxrpc_queue_conn(struct rxrpc_connection *conn) */ struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *, struct sk_buff *); -struct rxrpc_connection *rxrpc_incoming_connection(struct rxrpc_local *, - struct sockaddr_rxrpc *, - struct sk_buff *); struct rxrpc_connection *rxrpc_prealloc_service_connection(gfp_t); +void rxrpc_new_incoming_connection(struct rxrpc_connection *, struct sk_buff *); void rxrpc_unpublish_service_conn(struct rxrpc_connection *); /* * input.c */ void rxrpc_data_ready(struct sock *); -int rxrpc_queue_rcv_skb(struct rxrpc_call *, struct sk_buff *, bool, bool); -void rxrpc_fast_process_packet(struct rxrpc_call *, struct sk_buff *); /* * insecure.c @@ -868,6 +858,7 @@ extern const char *rxrpc_acks(u8 reason); */ int rxrpc_send_call_packet(struct rxrpc_call *, u8); int rxrpc_send_data_packet(struct rxrpc_connection *, struct sk_buff *); +void rxrpc_reject_packets(struct rxrpc_local *); /* * peer_event.c @@ -883,6 +874,8 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *, struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *, struct sockaddr_rxrpc *, gfp_t); struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *, gfp_t); +struct rxrpc_peer *rxrpc_lookup_incoming_peer(struct rxrpc_local *, + struct rxrpc_peer *); static inline struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *peer) { @@ -912,6 +905,7 @@ extern const struct file_operations rxrpc_connection_seq_fops; /* * recvmsg.c */ +void rxrpc_notify_socket(struct rxrpc_call *); int rxrpc_recvmsg(struct socket *, struct msghdr *, size_t, int); /* @@ -961,6 +955,23 @@ static inline void rxrpc_sysctl_exit(void) {} */ int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *); +static inline bool before(u32 seq1, u32 seq2) +{ + return (s32)(seq1 - seq2) < 0; +} +static inline bool before_eq(u32 seq1, u32 seq2) +{ + return (s32)(seq1 - seq2) <= 0; +} +static inline bool after(u32 seq1, u32 seq2) +{ + return (s32)(seq1 - seq2) > 0; +} +static inline bool after_eq(u32 seq1, u32 seq2) +{ + return (s32)(seq1 - seq2) >= 0; +} + /* * debug tracing */ diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index cc7194e05a15..b8acec0d596e 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -129,6 +129,8 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, set_bit(RXRPC_CALL_HAS_USERID, &call->flags); } + list_add(&call->sock_link, &rx->sock_calls); + write_unlock(&rx->call_lock); write_lock(&rxrpc_call_lock); @@ -186,6 +188,12 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx) return; rx->backlog = NULL; + /* Make sure that there aren't any incoming calls in progress before we + * clear the preallocation buffers. + */ + spin_lock_bh(&rx->incoming_lock); + spin_unlock_bh(&rx->incoming_lock); + head = b->peer_backlog_head; tail = b->peer_backlog_tail; while (CIRC_CNT(head, tail, size) > 0) { @@ -224,251 +232,179 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx) } /* - * generate a connection-level abort + * Allocate a new incoming call from the prealloc pool, along with a connection + * and a peer as necessary. */ -static int rxrpc_busy(struct rxrpc_local *local, struct sockaddr_rxrpc *srx, - struct rxrpc_wire_header *whdr) +static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, + struct rxrpc_local *local, + struct rxrpc_connection *conn, + struct sk_buff *skb) { - struct msghdr msg; - struct kvec iov[1]; - size_t len; - int ret; + struct rxrpc_backlog *b = rx->backlog; + struct rxrpc_peer *peer, *xpeer; + struct rxrpc_call *call; + unsigned short call_head, conn_head, peer_head; + unsigned short call_tail, conn_tail, peer_tail; + unsigned short call_count, conn_count; - _enter("%d,,", local->debug_id); + /* #calls >= #conns >= #peers must hold true. */ + call_head = smp_load_acquire(&b->call_backlog_head); + call_tail = b->call_backlog_tail; + call_count = CIRC_CNT(call_head, call_tail, RXRPC_BACKLOG_MAX); + conn_head = smp_load_acquire(&b->conn_backlog_head); + conn_tail = b->conn_backlog_tail; + conn_count = CIRC_CNT(conn_head, conn_tail, RXRPC_BACKLOG_MAX); + ASSERTCMP(conn_count, >=, call_count); + peer_head = smp_load_acquire(&b->peer_backlog_head); + peer_tail = b->peer_backlog_tail; + ASSERTCMP(CIRC_CNT(peer_head, peer_tail, RXRPC_BACKLOG_MAX), >=, + conn_count); - whdr->type = RXRPC_PACKET_TYPE_BUSY; - whdr->serial = htonl(1); + if (call_count == 0) + return NULL; - msg.msg_name = &srx->transport.sin; - msg.msg_namelen = sizeof(srx->transport.sin); - msg.msg_control = NULL; - msg.msg_controllen = 0; - msg.msg_flags = 0; + if (!conn) { + /* No connection. We're going to need a peer to start off + * with. If one doesn't yet exist, use a spare from the + * preallocation set. We dump the address into the spare in + * anticipation - and to save on stack space. + */ + xpeer = b->peer_backlog[peer_tail]; + if (rxrpc_extract_addr_from_skb(&xpeer->srx, skb) < 0) + return NULL; - iov[0].iov_base = whdr; - iov[0].iov_len = sizeof(*whdr); + peer = rxrpc_lookup_incoming_peer(local, xpeer); + if (peer == xpeer) { + b->peer_backlog[peer_tail] = NULL; + smp_store_release(&b->peer_backlog_tail, + (peer_tail + 1) & + (RXRPC_BACKLOG_MAX - 1)); + } - len = iov[0].iov_len; - - _proto("Tx BUSY %%1"); - - ret = kernel_sendmsg(local->socket, &msg, iov, 1, len); - if (ret < 0) { - _leave(" = -EAGAIN [sendmsg failed: %d]", ret); - return -EAGAIN; + /* Now allocate and set up the connection */ + conn = b->conn_backlog[conn_tail]; + b->conn_backlog[conn_tail] = NULL; + smp_store_release(&b->conn_backlog_tail, + (conn_tail + 1) & (RXRPC_BACKLOG_MAX - 1)); + rxrpc_get_local(local); + conn->params.local = local; + conn->params.peer = peer; + rxrpc_new_incoming_connection(conn, skb); + } else { + rxrpc_get_connection(conn); } - _leave(" = 0"); - return 0; + /* And now we can allocate and set up a new call */ + call = b->call_backlog[call_tail]; + b->call_backlog[call_tail] = NULL; + smp_store_release(&b->call_backlog_tail, + (call_tail + 1) & (RXRPC_BACKLOG_MAX - 1)); + + call->conn = conn; + call->peer = rxrpc_get_peer(conn->params.peer); + return call; } /* - * accept an incoming call that needs peer, transport and/or connection setting - * up + * Set up a new incoming call. Called in BH context with the RCU read lock + * held. + * + * If this is for a kernel service, when we allocate the call, it will have + * three refs on it: (1) the kernel service, (2) the user_call_ID tree, (3) the + * retainer ref obtained from the backlog buffer. Prealloc calls for userspace + * services only have the ref from the backlog buffer. We want to pass this + * ref to non-BH context to dispose of. + * + * If we want to report an error, we mark the skb with the packet type and + * abort code and return NULL. */ -static int rxrpc_accept_incoming_call(struct rxrpc_local *local, - struct rxrpc_sock *rx, - struct sk_buff *skb, - struct sockaddr_rxrpc *srx) +struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, + struct rxrpc_connection *conn, + struct sk_buff *skb) { - struct rxrpc_connection *conn; - struct rxrpc_skb_priv *sp, *nsp; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct rxrpc_sock *rx; struct rxrpc_call *call; - struct sk_buff *notification; - int ret; _enter(""); - sp = rxrpc_skb(skb); - - /* get a notification message to send to the server app */ - notification = alloc_skb(0, GFP_NOFS); - if (!notification) { - _debug("no memory"); - ret = -ENOMEM; - goto error_nofree; - } - rxrpc_new_skb(notification); - notification->mark = RXRPC_SKB_MARK_NEW_CALL; - - conn = rxrpc_incoming_connection(local, srx, skb); - if (IS_ERR(conn)) { - _debug("no conn"); - ret = PTR_ERR(conn); - goto error; - } - - call = rxrpc_incoming_call(rx, conn, skb); - rxrpc_put_connection(conn); - if (IS_ERR(call)) { - _debug("no call"); - ret = PTR_ERR(call); - goto error; - } - - /* attach the call to the socket */ - read_lock_bh(&local->services_lock); - if (rx->sk.sk_state == RXRPC_CLOSE) - goto invalid_service; - - write_lock(&rx->call_lock); - if (!test_and_set_bit(RXRPC_CALL_INIT_ACCEPT, &call->flags)) { - rxrpc_get_call(call, rxrpc_call_got); - - spin_lock(&call->conn->state_lock); - if (sp->hdr.securityIndex > 0 && - call->conn->state == RXRPC_CONN_SERVICE_UNSECURED) { - _debug("await conn sec"); - list_add_tail(&call->accept_link, &rx->secureq); - call->conn->state = RXRPC_CONN_SERVICE_CHALLENGING; - set_bit(RXRPC_CONN_EV_CHALLENGE, &call->conn->events); - rxrpc_queue_conn(call->conn); - } else { - _debug("conn ready"); - call->state = RXRPC_CALL_SERVER_ACCEPTING; - list_add_tail(&call->accept_link, &rx->acceptq); - rxrpc_get_call_for_skb(call, notification); - nsp = rxrpc_skb(notification); - nsp->call = call; - - ASSERTCMP(atomic_read(&call->usage), >=, 3); - - _debug("notify"); - spin_lock(&call->lock); - ret = rxrpc_queue_rcv_skb(call, notification, true, - false); - spin_unlock(&call->lock); - notification = NULL; - BUG_ON(ret < 0); - } - spin_unlock(&call->conn->state_lock); - - _debug("queued"); - } - write_unlock(&rx->call_lock); - - _debug("process"); - rxrpc_fast_process_packet(call, skb); - - _debug("done"); - read_unlock_bh(&local->services_lock); - rxrpc_free_skb(notification); - rxrpc_put_call(call, rxrpc_call_put); - _leave(" = 0"); - return 0; - -invalid_service: - _debug("invalid"); - read_unlock_bh(&local->services_lock); - - rxrpc_release_call(rx, call); - rxrpc_put_call(call, rxrpc_call_put); - ret = -ECONNREFUSED; -error: - rxrpc_free_skb(notification); -error_nofree: - _leave(" = %d", ret); - return ret; -} - -/* - * accept incoming calls that need peer, transport and/or connection setting up - * - the packets we get are all incoming client DATA packets that have seq == 1 - */ -void rxrpc_accept_incoming_calls(struct rxrpc_local *local) -{ - struct rxrpc_skb_priv *sp; - struct sockaddr_rxrpc srx; - struct rxrpc_sock *rx; - struct rxrpc_wire_header whdr; - struct sk_buff *skb; - int ret; - - _enter("%d", local->debug_id); - - skb = skb_dequeue(&local->accept_queue); - if (!skb) { - _leave("\n"); - return; - } - - _net("incoming call skb %p", skb); - - rxrpc_see_skb(skb); - sp = rxrpc_skb(skb); - - /* Set up a response packet header in case we need it */ - whdr.epoch = htonl(sp->hdr.epoch); - whdr.cid = htonl(sp->hdr.cid); - whdr.callNumber = htonl(sp->hdr.callNumber); - whdr.seq = htonl(sp->hdr.seq); - whdr.serial = 0; - whdr.flags = 0; - whdr.type = 0; - whdr.userStatus = 0; - whdr.securityIndex = sp->hdr.securityIndex; - whdr._rsvd = 0; - whdr.serviceId = htons(sp->hdr.serviceId); - - if (rxrpc_extract_addr_from_skb(&srx, skb) < 0) - goto drop; - - /* get the socket providing the service */ - read_lock_bh(&local->services_lock); - hlist_for_each_entry(rx, &local->services, listen_link) { - if (rx->srx.srx_service == sp->hdr.serviceId && - rx->sk.sk_state != RXRPC_CLOSE) + /* Get the socket providing the service */ + hlist_for_each_entry_rcu_bh(rx, &local->services, listen_link) { + if (rx->srx.srx_service == sp->hdr.serviceId) goto found_service; } - read_unlock_bh(&local->services_lock); - goto invalid_service; + + trace_rxrpc_abort("INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + RX_INVALID_OPERATION, EOPNOTSUPP); + skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT; + skb->priority = RX_INVALID_OPERATION; + _leave(" = NULL [service]"); + return NULL; found_service: - _debug("found service %hd", rx->srx.srx_service); - if (sk_acceptq_is_full(&rx->sk)) - goto backlog_full; - sk_acceptq_added(&rx->sk); - read_unlock_bh(&local->services_lock); + spin_lock(&rx->incoming_lock); + if (rx->sk.sk_state == RXRPC_CLOSE) { + trace_rxrpc_abort("CLS", sp->hdr.cid, sp->hdr.callNumber, + sp->hdr.seq, RX_INVALID_OPERATION, ESHUTDOWN); + skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT; + skb->priority = RX_INVALID_OPERATION; + _leave(" = NULL [close]"); + call = NULL; + goto out; + } - ret = rxrpc_accept_incoming_call(local, rx, skb, &srx); - if (ret < 0) - sk_acceptq_removed(&rx->sk); - switch (ret) { - case -ECONNRESET: /* old calls are ignored */ - case -ECONNABORTED: /* aborted calls are reaborted or ignored */ - case 0: - return; - case -ECONNREFUSED: - goto invalid_service; - case -EBUSY: - goto busy; - case -EKEYREJECTED: - goto security_mismatch; + call = rxrpc_alloc_incoming_call(rx, local, conn, skb); + if (!call) { + skb->mark = RXRPC_SKB_MARK_BUSY; + _leave(" = NULL [busy]"); + call = NULL; + goto out; + } + + /* Make the call live. */ + rxrpc_incoming_call(rx, call, skb); + conn = call->conn; + + if (rx->notify_new_call) + rx->notify_new_call(&rx->sk, call, call->user_call_ID); + + spin_lock(&conn->state_lock); + switch (conn->state) { + case RXRPC_CONN_SERVICE_UNSECURED: + conn->state = RXRPC_CONN_SERVICE_CHALLENGING; + set_bit(RXRPC_CONN_EV_CHALLENGE, &call->conn->events); + rxrpc_queue_conn(call->conn); + break; + + case RXRPC_CONN_SERVICE: + write_lock(&call->state_lock); + if (rx->discard_new_call) + call->state = RXRPC_CALL_SERVER_RECV_REQUEST; + else + call->state = RXRPC_CALL_SERVER_ACCEPTING; + write_unlock(&call->state_lock); + break; + + case RXRPC_CONN_REMOTELY_ABORTED: + rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, + conn->remote_abort, ECONNABORTED); + break; + case RXRPC_CONN_LOCALLY_ABORTED: + rxrpc_abort_call("CON", call, sp->hdr.seq, + conn->local_abort, ECONNABORTED); + break; default: BUG(); } + spin_unlock(&conn->state_lock); -backlog_full: - read_unlock_bh(&local->services_lock); -busy: - rxrpc_busy(local, &srx, &whdr); - rxrpc_free_skb(skb); - return; + if (call->state == RXRPC_CALL_SERVER_ACCEPTING) + rxrpc_notify_socket(call); -drop: - rxrpc_free_skb(skb); - return; - -invalid_service: - skb->priority = RX_INVALID_OPERATION; - rxrpc_reject_packet(local, skb); - return; - - /* can't change connection security type mid-flow */ -security_mismatch: - skb->priority = RX_PROTOCOL_ERROR; - rxrpc_reject_packet(local, skb); - return; + _leave(" = %p{%d}", call, call->debug_id); +out: + spin_unlock(&rx->incoming_lock); + return call; } /* @@ -490,11 +426,10 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx, write_lock(&rx->call_lock); ret = -ENODATA; - if (list_empty(&rx->acceptq)) + if (list_empty(&rx->to_be_accepted)) goto out; /* check the user ID isn't already in use */ - ret = -EBADSLT; pp = &rx->calls.rb_node; parent = NULL; while (*pp) { @@ -506,11 +441,14 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx, else if (user_call_ID > call->user_call_ID) pp = &(*pp)->rb_right; else - goto out; + goto id_in_use; } - /* dequeue the first call and check it's still valid */ - call = list_entry(rx->acceptq.next, struct rxrpc_call, accept_link); + /* Dequeue the first call and check it's still valid. We gain + * responsibility for the queue's reference. + */ + call = list_entry(rx->to_be_accepted.next, + struct rxrpc_call, accept_link); list_del_init(&call->accept_link); sk_acceptq_removed(&rx->sk); rxrpc_see_call(call); @@ -528,31 +466,35 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx, } /* formalise the acceptance */ - rxrpc_get_call(call, rxrpc_call_got_userid); + rxrpc_get_call(call, rxrpc_call_got); call->notify_rx = notify_rx; call->user_call_ID = user_call_ID; + rxrpc_get_call(call, rxrpc_call_got_userid); rb_link_node(&call->sock_node, parent, pp); rb_insert_color(&call->sock_node, &rx->calls); if (test_and_set_bit(RXRPC_CALL_HAS_USERID, &call->flags)) BUG(); - if (test_and_set_bit(RXRPC_CALL_EV_ACCEPTED, &call->events)) - BUG(); write_unlock_bh(&call->state_lock); write_unlock(&rx->call_lock); - rxrpc_queue_call(call); + rxrpc_notify_socket(call); + rxrpc_service_prealloc(rx, GFP_KERNEL); _leave(" = %p{%d}", call, call->debug_id); return call; out_release: + _debug("release %p", call); write_unlock_bh(&call->state_lock); write_unlock(&rx->call_lock); - _debug("release %p", call); rxrpc_release_call(rx, call); - _leave(" = %d", ret); - return ERR_PTR(ret); -out: + rxrpc_put_call(call, rxrpc_call_put); + goto out; + +id_in_use: + ret = -EBADSLT; write_unlock(&rx->call_lock); +out: + rxrpc_service_prealloc(rx, GFP_KERNEL); _leave(" = %d", ret); return ERR_PTR(ret); } @@ -564,6 +506,7 @@ out: int rxrpc_reject_call(struct rxrpc_sock *rx) { struct rxrpc_call *call; + bool abort = false; int ret; _enter(""); @@ -572,15 +515,16 @@ int rxrpc_reject_call(struct rxrpc_sock *rx) write_lock(&rx->call_lock); - ret = -ENODATA; - if (list_empty(&rx->acceptq)) { + if (list_empty(&rx->to_be_accepted)) { write_unlock(&rx->call_lock); - _leave(" = -ENODATA"); return -ENODATA; } - /* dequeue the first call and check it's still valid */ - call = list_entry(rx->acceptq.next, struct rxrpc_call, accept_link); + /* Dequeue the first call and check it's still valid. We gain + * responsibility for the queue's reference. + */ + call = list_entry(rx->to_be_accepted.next, + struct rxrpc_call, accept_link); list_del_init(&call->accept_link); sk_acceptq_removed(&rx->sk); rxrpc_see_call(call); @@ -588,67 +532,29 @@ int rxrpc_reject_call(struct rxrpc_sock *rx) write_lock_bh(&call->state_lock); switch (call->state) { case RXRPC_CALL_SERVER_ACCEPTING: - __rxrpc_set_call_completion(call, RXRPC_CALL_SERVER_BUSY, - 0, ECONNABORTED); - if (test_and_set_bit(RXRPC_CALL_EV_REJECT_BUSY, &call->events)) - rxrpc_queue_call(call); - ret = 0; - break; + __rxrpc_abort_call("REJ", call, 1, RX_USER_ABORT, ECONNABORTED); + abort = true; + /* fall through */ case RXRPC_CALL_COMPLETE: ret = call->error; - break; + goto out_discard; default: BUG(); } +out_discard: write_unlock_bh(&call->state_lock); write_unlock(&rx->call_lock); - rxrpc_release_call(rx, call); + if (abort) { + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); + rxrpc_release_call(rx, call); + rxrpc_put_call(call, rxrpc_call_put); + } + rxrpc_service_prealloc(rx, GFP_KERNEL); _leave(" = %d", ret); return ret; } -/** - * rxrpc_kernel_accept_call - Allow a kernel service to accept an incoming call - * @sock: The socket on which the impending call is waiting - * @user_call_ID: The tag to attach to the call - * @notify_rx: Where to send notifications instead of socket queue - * - * Allow a kernel service to accept an incoming call, assuming the incoming - * call is still valid. The caller should immediately trigger their own - * notification as there must be data waiting. - */ -struct rxrpc_call *rxrpc_kernel_accept_call(struct socket *sock, - unsigned long user_call_ID, - rxrpc_notify_rx_t notify_rx) -{ - struct rxrpc_call *call; - - _enter(",%lx", user_call_ID); - call = rxrpc_accept_call(rxrpc_sk(sock->sk), user_call_ID, notify_rx); - _leave(" = %p", call); - return call; -} -EXPORT_SYMBOL(rxrpc_kernel_accept_call); - -/** - * rxrpc_kernel_reject_call - Allow a kernel service to reject an incoming call - * @sock: The socket on which the impending call is waiting - * - * Allow a kernel service to reject an incoming call with a BUSY message, - * assuming the incoming call is still valid. - */ -int rxrpc_kernel_reject_call(struct socket *sock) -{ - int ret; - - _enter(""); - ret = rxrpc_reject_call(rxrpc_sk(sock->sk)); - _leave(" = %d", ret); - return ret; -} -EXPORT_SYMBOL(rxrpc_kernel_reject_call); - /* * rxrpc_kernel_charge_accept - Charge up socket with preallocated calls * @sock: The socket on which to preallocate diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index af88ad7d2cf9..2b976e789562 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -21,1258 +21,287 @@ #include #include "ar-internal.h" +/* + * Set the timer + */ +static void rxrpc_set_timer(struct rxrpc_call *call) +{ + unsigned long t, now = jiffies; + + _enter("{%ld,%ld,%ld:%ld}", + call->ack_at - now, call->resend_at - now, call->expire_at - now, + call->timer.expires - now); + + read_lock_bh(&call->state_lock); + + if (call->state < RXRPC_CALL_COMPLETE) { + t = call->ack_at; + if (time_before(call->resend_at, t)) + t = call->resend_at; + if (time_before(call->expire_at, t)) + t = call->expire_at; + if (!timer_pending(&call->timer) || + time_before(t, call->timer.expires)) { + _debug("set timer %ld", t - now); + mod_timer(&call->timer, t); + } + } + read_unlock_bh(&call->state_lock); +} + /* * propose an ACK be sent */ -void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, - u16 skew, u32 serial, bool immediate) +static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, + u16 skew, u32 serial, bool immediate, + bool background) { - unsigned long expiry; + unsigned long now, ack_at, expiry = rxrpc_soft_ack_delay; s8 prior = rxrpc_ack_priority[ack_reason]; - ASSERTCMP(prior, >, 0); - _enter("{%d},%s,%%%x,%u", call->debug_id, rxrpc_acks(ack_reason), serial, immediate); - if (prior < rxrpc_ack_priority[call->ackr_reason]) { - if (immediate) - goto cancel_timer; - return; - } - - /* update DELAY, IDLE, REQUESTED and PING_RESPONSE ACK serial - * numbers */ - if (prior == rxrpc_ack_priority[call->ackr_reason]) { - if (prior <= 4) { - call->ackr_skew = skew; + /* Update DELAY, IDLE, REQUESTED and PING_RESPONSE ACK serial + * numbers, but we don't alter the timeout. + */ + _debug("prior %u %u vs %u %u", + ack_reason, prior, + call->ackr_reason, rxrpc_ack_priority[call->ackr_reason]); + if (ack_reason == call->ackr_reason) { + if (RXRPC_ACK_UPDATEABLE & (1 << ack_reason)) { call->ackr_serial = serial; + call->ackr_skew = skew; } - if (immediate) - goto cancel_timer; - return; + if (!immediate) + return; + } else if (prior > rxrpc_ack_priority[call->ackr_reason]) { + call->ackr_reason = ack_reason; + call->ackr_serial = serial; + call->ackr_skew = skew; } - call->ackr_reason = ack_reason; - call->ackr_serial = serial; - switch (ack_reason) { + case RXRPC_ACK_REQUESTED: + if (rxrpc_requested_ack_delay < expiry) + expiry = rxrpc_requested_ack_delay; + if (serial == 1) + immediate = false; + break; + case RXRPC_ACK_DELAY: - _debug("run delay timer"); - expiry = rxrpc_soft_ack_delay; - goto run_timer; + if (rxrpc_soft_ack_delay < expiry) + expiry = rxrpc_soft_ack_delay; + break; case RXRPC_ACK_IDLE: - if (!immediate) { - _debug("run defer timer"); + if (rxrpc_soft_ack_delay < expiry) expiry = rxrpc_idle_ack_delay; - goto run_timer; - } - goto cancel_timer; - - case RXRPC_ACK_REQUESTED: - expiry = rxrpc_requested_ack_delay; - if (!expiry) - goto cancel_timer; - if (!immediate || serial == 1) { - _debug("run defer timer"); - goto run_timer; - } + break; default: - _debug("immediate ACK"); - goto cancel_timer; + immediate = true; + break; } -run_timer: - expiry += jiffies; - if (!timer_pending(&call->ack_timer) || - time_after(call->ack_timer.expires, expiry)) - mod_timer(&call->ack_timer, expiry); - return; - -cancel_timer: - _debug("cancel timer %%%u", serial); - try_to_del_timer_sync(&call->ack_timer); - read_lock_bh(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE && - !test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events)) - rxrpc_queue_call(call); - read_unlock_bh(&call->state_lock); + now = jiffies; + if (test_bit(RXRPC_CALL_EV_ACK, &call->events)) { + _debug("already scheduled"); + } else if (immediate || expiry == 0) { + _debug("immediate ACK %lx", call->events); + if (!test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events) && + background) + rxrpc_queue_call(call); + } else { + ack_at = now + expiry; + _debug("deferred ACK %ld < %ld", expiry, call->ack_at - now); + if (time_before(ack_at, call->ack_at)) { + call->ack_at = ack_at; + rxrpc_set_timer(call); + } + } } /* * propose an ACK be sent, locking the call structure */ void rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, - u16 skew, u32 serial, bool immediate) + u16 skew, u32 serial, bool immediate, bool background) { - s8 prior = rxrpc_ack_priority[ack_reason]; - - if (prior > rxrpc_ack_priority[call->ackr_reason]) { - spin_lock_bh(&call->lock); - __rxrpc_propose_ACK(call, ack_reason, skew, serial, immediate); - spin_unlock_bh(&call->lock); - } + spin_lock_bh(&call->lock); + __rxrpc_propose_ACK(call, ack_reason, skew, serial, + immediate, background); + spin_unlock_bh(&call->lock); } /* - * set the resend timer - */ -static void rxrpc_set_resend(struct rxrpc_call *call, u8 resend, - unsigned long resend_at) -{ - read_lock_bh(&call->state_lock); - if (call->state == RXRPC_CALL_COMPLETE) - resend = 0; - - if (resend & 1) { - _debug("SET RESEND"); - set_bit(RXRPC_CALL_EV_RESEND, &call->events); - } - - if (resend & 2) { - _debug("MODIFY RESEND TIMER"); - set_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); - mod_timer(&call->resend_timer, resend_at); - } else { - _debug("KILL RESEND TIMER"); - del_timer_sync(&call->resend_timer); - clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events); - clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); - } - read_unlock_bh(&call->state_lock); -} - -/* - * resend packets + * Perform retransmission of NAK'd and unack'd packets. */ static void rxrpc_resend(struct rxrpc_call *call) { struct rxrpc_wire_header *whdr; - struct rxrpc_skb_priv *sp; - struct sk_buff *txb; - unsigned long *p_txb, resend_at; - bool stop; - int loop; - u8 resend; - - _enter("{%d,%d,%d,%d},", - call->acks_hard, call->acks_unacked, - atomic_read(&call->sequence), - CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz)); - - stop = false; - resend = 0; - resend_at = 0; - - for (loop = call->acks_tail; - loop != call->acks_head || stop; - loop = (loop + 1) & (call->acks_winsz - 1) - ) { - p_txb = call->acks_window + loop; - smp_read_barrier_depends(); - if (*p_txb & 1) - continue; - - txb = (struct sk_buff *) *p_txb; - sp = rxrpc_skb(txb); - - if (sp->need_resend) { - sp->need_resend = false; - - /* each Tx packet has a new serial number */ - sp->hdr.serial = atomic_inc_return(&call->conn->serial); - - whdr = (struct rxrpc_wire_header *)txb->head; - whdr->serial = htonl(sp->hdr.serial); - - _proto("Tx DATA %%%u { #%d }", - sp->hdr.serial, sp->hdr.seq); - if (rxrpc_send_data_packet(call->conn, txb) < 0) { - stop = true; - sp->resend_at = jiffies + 3; - } else { - if (rxrpc_is_client_call(call)) - rxrpc_expose_client_call(call); - sp->resend_at = - jiffies + rxrpc_resend_timeout; - } - } - - if (time_after_eq(jiffies + 1, sp->resend_at)) { - sp->need_resend = true; - resend |= 1; - } else if (resend & 2) { - if (time_before(sp->resend_at, resend_at)) - resend_at = sp->resend_at; - } else { - resend_at = sp->resend_at; - resend |= 2; - } - } - - rxrpc_set_resend(call, resend, resend_at); - _leave(""); -} - -/* - * handle resend timer expiry - */ -static void rxrpc_resend_timer(struct rxrpc_call *call) -{ - struct rxrpc_skb_priv *sp; - struct sk_buff *txb; - unsigned long *p_txb, resend_at; - int loop; - u8 resend; - - _enter("%d,%d,%d", - call->acks_tail, call->acks_unacked, call->acks_head); - - if (call->state == RXRPC_CALL_COMPLETE) - return; - - resend = 0; - resend_at = 0; - - for (loop = call->acks_unacked; - loop != call->acks_head; - loop = (loop + 1) & (call->acks_winsz - 1) - ) { - p_txb = call->acks_window + loop; - smp_read_barrier_depends(); - txb = (struct sk_buff *) (*p_txb & ~1); - sp = rxrpc_skb(txb); - - ASSERT(!(*p_txb & 1)); - - if (sp->need_resend) { - ; - } else if (time_after_eq(jiffies + 1, sp->resend_at)) { - sp->need_resend = true; - resend |= 1; - } else if (resend & 2) { - if (time_before(sp->resend_at, resend_at)) - resend_at = sp->resend_at; - } else { - resend_at = sp->resend_at; - resend |= 2; - } - } - - rxrpc_set_resend(call, resend, resend_at); - _leave(""); -} - -/* - * process soft ACKs of our transmitted packets - * - these indicate packets the peer has or has not received, but hasn't yet - * given to the consumer, and so can still be discarded and re-requested - */ -static int rxrpc_process_soft_ACKs(struct rxrpc_call *call, - struct rxrpc_ackpacket *ack, - struct sk_buff *skb) -{ - struct rxrpc_skb_priv *sp; - struct sk_buff *txb; - unsigned long *p_txb, resend_at; - int loop; - u8 sacks[RXRPC_MAXACKS], resend; - - _enter("{%d,%d},{%d},", - call->acks_hard, - CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz), - ack->nAcks); - - if (skb_copy_bits(skb, 0, sacks, ack->nAcks) < 0) - goto protocol_error; - - resend = 0; - resend_at = 0; - for (loop = 0; loop < ack->nAcks; loop++) { - p_txb = call->acks_window; - p_txb += (call->acks_tail + loop) & (call->acks_winsz - 1); - smp_read_barrier_depends(); - txb = (struct sk_buff *) (*p_txb & ~1); - sp = rxrpc_skb(txb); - - switch (sacks[loop]) { - case RXRPC_ACK_TYPE_ACK: - sp->need_resend = false; - *p_txb |= 1; - break; - case RXRPC_ACK_TYPE_NACK: - sp->need_resend = true; - *p_txb &= ~1; - resend = 1; - break; - default: - _debug("Unsupported ACK type %d", sacks[loop]); - goto protocol_error; - } - } - - smp_mb(); - call->acks_unacked = (call->acks_tail + loop) & (call->acks_winsz - 1); - - /* anything not explicitly ACK'd is implicitly NACK'd, but may just not - * have been received or processed yet by the far end */ - for (loop = call->acks_unacked; - loop != call->acks_head; - loop = (loop + 1) & (call->acks_winsz - 1) - ) { - p_txb = call->acks_window + loop; - smp_read_barrier_depends(); - txb = (struct sk_buff *) (*p_txb & ~1); - sp = rxrpc_skb(txb); - - if (*p_txb & 1) { - /* packet must have been discarded */ - sp->need_resend = true; - *p_txb &= ~1; - resend |= 1; - } else if (sp->need_resend) { - ; - } else if (time_after_eq(jiffies + 1, sp->resend_at)) { - sp->need_resend = true; - resend |= 1; - } else if (resend & 2) { - if (time_before(sp->resend_at, resend_at)) - resend_at = sp->resend_at; - } else { - resend_at = sp->resend_at; - resend |= 2; - } - } - - rxrpc_set_resend(call, resend, resend_at); - _leave(" = 0"); - return 0; - -protocol_error: - _leave(" = -EPROTO"); - return -EPROTO; -} - -/* - * discard hard-ACK'd packets from the Tx window - */ -static void rxrpc_rotate_tx_window(struct rxrpc_call *call, u32 hard) -{ - unsigned long _skb; - int tail = call->acks_tail, old_tail; - int win = CIRC_CNT(call->acks_head, tail, call->acks_winsz); - - _enter("{%u,%u},%u", call->acks_hard, win, hard); - - ASSERTCMP(hard - call->acks_hard, <=, win); - - while (call->acks_hard < hard) { - smp_read_barrier_depends(); - _skb = call->acks_window[tail] & ~1; - rxrpc_free_skb((struct sk_buff *) _skb); - old_tail = tail; - tail = (tail + 1) & (call->acks_winsz - 1); - call->acks_tail = tail; - if (call->acks_unacked == old_tail) - call->acks_unacked = tail; - call->acks_hard++; - } - - wake_up(&call->waitq); -} - -/* - * clear the Tx window in the event of a failure - */ -static void rxrpc_clear_tx_window(struct rxrpc_call *call) -{ - rxrpc_rotate_tx_window(call, atomic_read(&call->sequence)); -} - -/* - * drain the out of sequence received packet queue into the packet Rx queue - */ -static int rxrpc_drain_rx_oos_queue(struct rxrpc_call *call) -{ struct rxrpc_skb_priv *sp; struct sk_buff *skb; - bool terminal; - int ret; + rxrpc_seq_t cursor, seq, top; + unsigned long resend_at, now; + int ix; + u8 annotation; - _enter("{%d,%d}", call->rx_data_post, call->rx_first_oos); + _enter("{%d,%d}", call->tx_hard_ack, call->tx_top); spin_lock_bh(&call->lock); - ret = -ECONNRESET; - if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) - goto socket_unavailable; + cursor = call->tx_hard_ack; + top = call->tx_top; + ASSERT(before_eq(cursor, top)); + if (cursor == top) + goto out_unlock; - skb = skb_dequeue(&call->rx_oos_queue); - if (skb) { + /* Scan the packet list without dropping the lock and decide which of + * the packets in the Tx buffer we're going to resend and what the new + * resend timeout will be. + */ + now = jiffies; + resend_at = now + rxrpc_resend_timeout; + seq = cursor + 1; + do { + ix = seq & RXRPC_RXTX_BUFF_MASK; + annotation = call->rxtx_annotations[ix]; + if (annotation == RXRPC_TX_ANNO_ACK) + continue; + + skb = call->rxtx_buffer[ix]; rxrpc_see_skb(skb); sp = rxrpc_skb(skb); - _debug("drain OOS packet %d [%d]", - sp->hdr.seq, call->rx_first_oos); - - if (sp->hdr.seq != call->rx_first_oos) { - skb_queue_head(&call->rx_oos_queue, skb); - call->rx_first_oos = rxrpc_skb(skb)->hdr.seq; - _debug("requeue %p {%u}", skb, call->rx_first_oos); - } else { - skb->mark = RXRPC_SKB_MARK_DATA; - terminal = ((sp->hdr.flags & RXRPC_LAST_PACKET) && - !(sp->hdr.flags & RXRPC_CLIENT_INITIATED)); - ret = rxrpc_queue_rcv_skb(call, skb, true, terminal); - BUG_ON(ret < 0); - _debug("drain #%u", call->rx_data_post); - call->rx_data_post++; - - /* find out what the next packet is */ - skb = skb_peek(&call->rx_oos_queue); - rxrpc_see_skb(skb); - if (skb) - call->rx_first_oos = rxrpc_skb(skb)->hdr.seq; - else - call->rx_first_oos = 0; - _debug("peek %p {%u}", skb, call->rx_first_oos); + if (annotation == RXRPC_TX_ANNO_UNACK) { + if (time_after(sp->resend_at, now)) { + if (time_before(sp->resend_at, resend_at)) + resend_at = sp->resend_at; + continue; + } } - } - ret = 0; -socket_unavailable: - spin_unlock_bh(&call->lock); - _leave(" = %d", ret); - return ret; -} + /* Okay, we need to retransmit a packet. */ + call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS; + seq++; + } while (before_eq(seq, top)); -/* - * insert an out of sequence packet into the buffer - */ -static void rxrpc_insert_oos_packet(struct rxrpc_call *call, - struct sk_buff *skb) -{ - struct rxrpc_skb_priv *sp, *psp; - struct sk_buff *p; - u32 seq; + call->resend_at = resend_at; - sp = rxrpc_skb(skb); - seq = sp->hdr.seq; - _enter(",,{%u}", seq); + /* Now go through the Tx window and perform the retransmissions. We + * have to drop the lock for each send. If an ACK comes in whilst the + * lock is dropped, it may clear some of the retransmission markers for + * packets that it soft-ACKs. + */ + seq = cursor + 1; + do { + ix = seq & RXRPC_RXTX_BUFF_MASK; + annotation = call->rxtx_annotations[ix]; + if (annotation != RXRPC_TX_ANNO_RETRANS) + continue; - skb->destructor = rxrpc_packet_destructor; - ASSERTCMP(sp->call, ==, NULL); - sp->call = call; - rxrpc_get_call_for_skb(call, skb); - - /* insert into the buffer in sequence order */ - spin_lock_bh(&call->lock); - - skb_queue_walk(&call->rx_oos_queue, p) { - psp = rxrpc_skb(p); - if (psp->hdr.seq > seq) { - _debug("insert oos #%u before #%u", seq, psp->hdr.seq); - skb_insert(p, skb, &call->rx_oos_queue); - goto inserted; - } - } - - _debug("append oos #%u", seq); - skb_queue_tail(&call->rx_oos_queue, skb); -inserted: - - /* we might now have a new front to the queue */ - if (call->rx_first_oos == 0 || seq < call->rx_first_oos) - call->rx_first_oos = seq; - - read_lock(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE && - call->rx_data_post == call->rx_first_oos) { - _debug("drain rx oos now"); - set_bit(RXRPC_CALL_EV_DRAIN_RX_OOS, &call->events); - } - read_unlock(&call->state_lock); - - spin_unlock_bh(&call->lock); - _leave(" [stored #%u]", call->rx_first_oos); -} - -/* - * clear the Tx window on final ACK reception - */ -static void rxrpc_zap_tx_window(struct rxrpc_call *call) -{ - struct rxrpc_skb_priv *sp; - struct sk_buff *skb; - unsigned long _skb, *acks_window; - u8 winsz = call->acks_winsz; - int tail; - - acks_window = call->acks_window; - call->acks_window = NULL; - - while (CIRC_CNT(call->acks_head, call->acks_tail, winsz) > 0) { - tail = call->acks_tail; - smp_read_barrier_depends(); - _skb = acks_window[tail] & ~1; - smp_mb(); - call->acks_tail = (call->acks_tail + 1) & (winsz - 1); - - skb = (struct sk_buff *) _skb; + skb = call->rxtx_buffer[ix]; + rxrpc_get_skb(skb); + spin_unlock_bh(&call->lock); sp = rxrpc_skb(skb); - _debug("+++ clear Tx %u", sp->hdr.seq); + + /* Each Tx packet needs a new serial number */ + sp->hdr.serial = atomic_inc_return(&call->conn->serial); + + whdr = (struct rxrpc_wire_header *)skb->head; + whdr->serial = htonl(sp->hdr.serial); + + if (rxrpc_send_data_packet(call->conn, skb) < 0) { + call->resend_at = now + 2; + rxrpc_free_skb(skb); + return; + } + + if (rxrpc_is_client_call(call)) + rxrpc_expose_client_call(call); + sp->resend_at = now + rxrpc_resend_timeout; + rxrpc_free_skb(skb); - } - - kfree(acks_window); -} - -/* - * process the extra information that may be appended to an ACK packet - */ -static void rxrpc_extract_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, - unsigned int latest, int nAcks) -{ - struct rxrpc_ackinfo ackinfo; - struct rxrpc_peer *peer; - unsigned int mtu; - - if (skb_copy_bits(skb, nAcks + 3, &ackinfo, sizeof(ackinfo)) < 0) { - _leave(" [no ackinfo]"); - return; - } - - _proto("Rx ACK %%%u Info { rx=%u max=%u rwin=%u jm=%u }", - latest, - ntohl(ackinfo.rxMTU), ntohl(ackinfo.maxMTU), - ntohl(ackinfo.rwind), ntohl(ackinfo.jumbo_max)); - - mtu = min(ntohl(ackinfo.rxMTU), ntohl(ackinfo.maxMTU)); - - peer = call->peer; - if (mtu < peer->maxdata) { - spin_lock_bh(&peer->lock); - peer->maxdata = mtu; - peer->mtu = mtu + peer->hdrsize; - spin_unlock_bh(&peer->lock); - _net("Net MTU %u (maxdata %u)", peer->mtu, peer->maxdata); - } -} - -/* - * process packets in the reception queue - */ -static int rxrpc_process_rx_queue(struct rxrpc_call *call, - u32 *_abort_code) -{ - struct rxrpc_ackpacket ack; - struct rxrpc_skb_priv *sp; - struct sk_buff *skb; - bool post_ACK; - int latest; - u32 hard, tx; - - _enter(""); - -process_further: - skb = skb_dequeue(&call->rx_queue); - if (!skb) - return -EAGAIN; - - rxrpc_see_skb(skb); - _net("deferred skb %p", skb); - - sp = rxrpc_skb(skb); - - _debug("process %s [st %d]", rxrpc_pkts[sp->hdr.type], call->state); - - post_ACK = false; - - switch (sp->hdr.type) { - /* data packets that wind up here have been received out of - * order, need security processing or are jumbo packets */ - case RXRPC_PACKET_TYPE_DATA: - _proto("OOSQ DATA %%%u { #%u }", sp->hdr.serial, sp->hdr.seq); - - /* secured packets must be verified and possibly decrypted */ - if (call->conn->security->verify_packet(call, skb, - sp->hdr.seq, - sp->hdr.cksum) < 0) - goto protocol_error; - - rxrpc_insert_oos_packet(call, skb); - goto process_further; - - /* partial ACK to process */ - case RXRPC_PACKET_TYPE_ACK: - if (skb_copy_bits(skb, 0, &ack, sizeof(ack)) < 0) { - _debug("extraction failure"); - goto protocol_error; - } - if (!skb_pull(skb, sizeof(ack))) - BUG(); - - latest = sp->hdr.serial; - hard = ntohl(ack.firstPacket); - tx = atomic_read(&call->sequence); - - _proto("Rx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }", - latest, - ntohs(ack.maxSkew), - hard, - ntohl(ack.previousPacket), - ntohl(ack.serial), - rxrpc_acks(ack.reason), - ack.nAcks); - - rxrpc_extract_ackinfo(call, skb, latest, ack.nAcks); - - if (ack.reason == RXRPC_ACK_PING) { - _proto("Rx ACK %%%u PING Request", latest); - rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE, - skb->priority, sp->hdr.serial, true); - } - - /* discard any out-of-order or duplicate ACKs */ - if (latest - call->acks_latest <= 0) { - _debug("discard ACK %d <= %d", - latest, call->acks_latest); - goto discard; - } - call->acks_latest = latest; - - if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST && - call->state != RXRPC_CALL_CLIENT_AWAIT_REPLY && - call->state != RXRPC_CALL_SERVER_SEND_REPLY && - call->state != RXRPC_CALL_SERVER_AWAIT_ACK) - goto discard; - - _debug("Tx=%d H=%u S=%d", tx, call->acks_hard, call->state); - - if (hard > 0) { - if (hard - 1 > tx) { - _debug("hard-ACK'd packet %d not transmitted" - " (%d top)", - hard - 1, tx); - goto protocol_error; - } - - if ((call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY || - call->state == RXRPC_CALL_SERVER_AWAIT_ACK) && - hard > tx) { - call->acks_hard = tx; - goto all_acked; - } - - smp_rmb(); - rxrpc_rotate_tx_window(call, hard - 1); - } - - if (ack.nAcks > 0) { - if (hard - 1 + ack.nAcks > tx) { - _debug("soft-ACK'd packet %d+%d not" - " transmitted (%d top)", - hard - 1, ack.nAcks, tx); - goto protocol_error; - } - - if (rxrpc_process_soft_ACKs(call, &ack, skb) < 0) - goto protocol_error; - } - goto discard; - - /* complete ACK to process */ - case RXRPC_PACKET_TYPE_ACKALL: - goto all_acked; - - /* abort and busy are handled elsewhere */ - case RXRPC_PACKET_TYPE_BUSY: - case RXRPC_PACKET_TYPE_ABORT: - BUG(); - - /* connection level events - also handled elsewhere */ - case RXRPC_PACKET_TYPE_CHALLENGE: - case RXRPC_PACKET_TYPE_RESPONSE: - case RXRPC_PACKET_TYPE_DEBUG: - BUG(); - } - - /* if we've had a hard ACK that covers all the packets we've sent, then - * that ends that phase of the operation */ -all_acked: - write_lock_bh(&call->state_lock); - _debug("ack all %d", call->state); - - switch (call->state) { - case RXRPC_CALL_CLIENT_AWAIT_REPLY: - call->state = RXRPC_CALL_CLIENT_RECV_REPLY; - break; - case RXRPC_CALL_SERVER_AWAIT_ACK: - _debug("srv complete"); - __rxrpc_call_completed(call); - post_ACK = true; - break; - case RXRPC_CALL_CLIENT_SEND_REQUEST: - case RXRPC_CALL_SERVER_RECV_REQUEST: - goto protocol_error_unlock; /* can't occur yet */ - default: - write_unlock_bh(&call->state_lock); - goto discard; /* assume packet left over from earlier phase */ - } - - write_unlock_bh(&call->state_lock); - - /* if all the packets we sent are hard-ACK'd, then we can discard - * whatever we've got left */ - _debug("clear Tx %d", - CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz)); - - del_timer_sync(&call->resend_timer); - clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); - clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events); - - if (call->acks_window) - rxrpc_zap_tx_window(call); - - if (post_ACK) { - /* post the final ACK message for userspace to pick up */ - _debug("post ACK"); - skb->mark = RXRPC_SKB_MARK_FINAL_ACK; - sp->call = call; - rxrpc_get_call_for_skb(call, skb); spin_lock_bh(&call->lock); - if (rxrpc_queue_rcv_skb(call, skb, true, true) < 0) - BUG(); - spin_unlock_bh(&call->lock); - goto process_further; - } -discard: - rxrpc_free_skb(skb); - goto process_further; + /* We need to clear the retransmit state, but there are two + * things we need to be aware of: A new ACK/NAK might have been + * received and the packet might have been hard-ACK'd (in which + * case it will no longer be in the buffer). + */ + if (after(seq, call->tx_hard_ack) && + (call->rxtx_annotations[ix] == RXRPC_TX_ANNO_RETRANS || + call->rxtx_annotations[ix] == RXRPC_TX_ANNO_NAK)) + call->rxtx_annotations[ix] = RXRPC_TX_ANNO_UNACK; -protocol_error_unlock: - write_unlock_bh(&call->state_lock); -protocol_error: - rxrpc_free_skb(skb); - _leave(" = -EPROTO"); - return -EPROTO; + if (after(call->tx_hard_ack, seq)) + seq = call->tx_hard_ack; + seq++; + } while (before_eq(seq, top)); + +out_unlock: + spin_unlock_bh(&call->lock); + _leave(""); } /* - * post a message to the socket Rx queue for recvmsg() to pick up - */ -static int rxrpc_post_message(struct rxrpc_call *call, u32 mark, u32 error, - bool fatal) -{ - struct rxrpc_skb_priv *sp; - struct sk_buff *skb; - int ret; - - _enter("{%d,%lx},%u,%u,%d", - call->debug_id, call->flags, mark, error, fatal); - - /* remove timers and things for fatal messages */ - if (fatal) { - del_timer_sync(&call->resend_timer); - del_timer_sync(&call->ack_timer); - clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); - } - - if (mark != RXRPC_SKB_MARK_NEW_CALL && - !test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) { - _leave("[no userid]"); - return 0; - } - - if (!test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags)) { - skb = alloc_skb(0, GFP_NOFS); - if (!skb) - return -ENOMEM; - - rxrpc_new_skb(skb); - - skb->mark = mark; - - sp = rxrpc_skb(skb); - memset(sp, 0, sizeof(*sp)); - sp->error = error; - sp->call = call; - rxrpc_get_call_for_skb(call, skb); - - spin_lock_bh(&call->lock); - ret = rxrpc_queue_rcv_skb(call, skb, true, fatal); - spin_unlock_bh(&call->lock); - BUG_ON(ret < 0); - } - - return 0; -} - -/* - * Handle background processing of incoming call packets and ACK / abort - * generation. A ref on the call is donated to us by whoever queued the work - * item. + * Handle retransmission and deferred ACK/abort generation. */ void rxrpc_process_call(struct work_struct *work) { struct rxrpc_call *call = container_of(work, struct rxrpc_call, processor); - struct rxrpc_wire_header whdr; - struct rxrpc_ackpacket ack; - struct rxrpc_ackinfo ackinfo; - struct msghdr msg; - struct kvec iov[5]; - enum rxrpc_call_event genbit; - unsigned long bits; - __be32 data, pad; - size_t len; - bool requeue = false; - int loop, nbit, ioc, ret, mtu; - u32 serial, abort_code = RX_PROTOCOL_ERROR; - u8 *acks = NULL; + unsigned long now; rxrpc_see_call(call); //printk("\n--------------------\n"); - _enter("{%d,%s,%lx} [%lu]", - call->debug_id, rxrpc_call_states[call->state], call->events, - (jiffies - call->creation_jif) / (HZ / 10)); + _enter("{%d,%s,%lx}", + call->debug_id, rxrpc_call_states[call->state], call->events); - if (call->state >= RXRPC_CALL_COMPLETE) { - rxrpc_put_call(call, rxrpc_call_put); - return; +recheck_state: + if (test_and_clear_bit(RXRPC_CALL_EV_ABORT, &call->events)) { + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); + goto recheck_state; } - if (!call->conn) - goto skip_msg_init; - - /* there's a good chance we're going to have to send a message, so set - * one up in advance */ - msg.msg_name = &call->peer->srx.transport; - msg.msg_namelen = call->peer->srx.transport_len; - msg.msg_control = NULL; - msg.msg_controllen = 0; - msg.msg_flags = 0; - - whdr.epoch = htonl(call->conn->proto.epoch); - whdr.cid = htonl(call->cid); - whdr.callNumber = htonl(call->call_id); - whdr.seq = 0; - whdr.type = RXRPC_PACKET_TYPE_ACK; - whdr.flags = call->conn->out_clientflag; - whdr.userStatus = 0; - whdr.securityIndex = call->conn->security_ix; - whdr._rsvd = 0; - whdr.serviceId = htons(call->service_id); - - memset(iov, 0, sizeof(iov)); - iov[0].iov_base = &whdr; - iov[0].iov_len = sizeof(whdr); -skip_msg_init: - - /* deal with events of a final nature */ - if (test_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events)) { - enum rxrpc_skb_mark mark; - - clear_bit(RXRPC_CALL_EV_CONN_ABORT, &call->events); - clear_bit(RXRPC_CALL_EV_REJECT_BUSY, &call->events); - clear_bit(RXRPC_CALL_EV_ABORT, &call->events); - - if (call->completion == RXRPC_CALL_NETWORK_ERROR) { - mark = RXRPC_SKB_MARK_NET_ERROR; - _debug("post net error %d", call->error); - } else { - mark = RXRPC_SKB_MARK_LOCAL_ERROR; - _debug("post net local error %d", call->error); - } - - if (rxrpc_post_message(call, mark, call->error, true) < 0) - goto no_mem; - clear_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events); - goto kill_ACKs; + if (call->state == RXRPC_CALL_COMPLETE) { + del_timer_sync(&call->timer); + goto out_put; } - if (test_bit(RXRPC_CALL_EV_CONN_ABORT, &call->events)) { - ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); - - clear_bit(RXRPC_CALL_EV_REJECT_BUSY, &call->events); - clear_bit(RXRPC_CALL_EV_ABORT, &call->events); - - _debug("post conn abort"); - - if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR, - call->error, true) < 0) - goto no_mem; - clear_bit(RXRPC_CALL_EV_CONN_ABORT, &call->events); - goto kill_ACKs; - } - - if (test_bit(RXRPC_CALL_EV_REJECT_BUSY, &call->events)) { - whdr.type = RXRPC_PACKET_TYPE_BUSY; - genbit = RXRPC_CALL_EV_REJECT_BUSY; - goto send_message; - } - - if (test_bit(RXRPC_CALL_EV_ABORT, &call->events)) { - ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); - - if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR, - call->error, true) < 0) - goto no_mem; - whdr.type = RXRPC_PACKET_TYPE_ABORT; - data = htonl(call->abort_code); - iov[1].iov_base = &data; - iov[1].iov_len = sizeof(data); - genbit = RXRPC_CALL_EV_ABORT; - goto send_message; - } - - if (test_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events)) { - genbit = RXRPC_CALL_EV_ACK_FINAL; - - ack.bufferSpace = htons(8); - ack.maxSkew = 0; - ack.serial = 0; - ack.reason = RXRPC_ACK_IDLE; - ack.nAcks = 0; - call->ackr_reason = 0; - - spin_lock_bh(&call->lock); - ack.serial = htonl(call->ackr_serial); - ack.previousPacket = htonl(call->ackr_prev_seq); - ack.firstPacket = htonl(call->rx_data_eaten + 1); - spin_unlock_bh(&call->lock); - - pad = 0; - - iov[1].iov_base = &ack; - iov[1].iov_len = sizeof(ack); - iov[2].iov_base = &pad; - iov[2].iov_len = 3; - iov[3].iov_base = &ackinfo; - iov[3].iov_len = sizeof(ackinfo); - goto send_ACK; - } - - if (call->events & ((1 << RXRPC_CALL_EV_RCVD_BUSY) | - (1 << RXRPC_CALL_EV_RCVD_ABORT)) - ) { - u32 mark; - - if (test_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events)) - mark = RXRPC_SKB_MARK_REMOTE_ABORT; - else - mark = RXRPC_SKB_MARK_BUSY; - - _debug("post abort/busy"); - rxrpc_clear_tx_window(call); - if (rxrpc_post_message(call, mark, ECONNABORTED, true) < 0) - goto no_mem; - - clear_bit(RXRPC_CALL_EV_RCVD_BUSY, &call->events); - clear_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events); - goto kill_ACKs; - } - - if (test_and_clear_bit(RXRPC_CALL_EV_RCVD_ACKALL, &call->events)) { - _debug("do implicit ackall"); - rxrpc_clear_tx_window(call); - } - - if (test_bit(RXRPC_CALL_EV_LIFE_TIMER, &call->events)) { + now = jiffies; + if (time_after_eq(now, call->expire_at)) { rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, ETIME); - - _debug("post timeout"); - if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR, - ETIME, true) < 0) - goto no_mem; - - clear_bit(RXRPC_CALL_EV_LIFE_TIMER, &call->events); - goto kill_ACKs; + set_bit(RXRPC_CALL_EV_ABORT, &call->events); } - /* deal with assorted inbound messages */ - if (!skb_queue_empty(&call->rx_queue)) { - ret = rxrpc_process_rx_queue(call, &abort_code); - switch (ret) { - case 0: - case -EAGAIN: - break; - case -ENOMEM: - goto no_mem; - case -EKEYEXPIRED: - case -EKEYREJECTED: - case -EPROTO: - rxrpc_abort_call("PRO", call, 0, abort_code, -ret); - goto kill_ACKs; + if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events) || + time_after_eq(now, call->ack_at)) { + call->ack_at = call->expire_at; + if (call->ackr_reason) { + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); + goto recheck_state; } } - /* handle resending */ - if (test_and_clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events)) - rxrpc_resend_timer(call); - if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events)) + if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events) || + time_after_eq(now, call->resend_at)) { rxrpc_resend(call); - - /* consider sending an ordinary ACK */ - if (test_bit(RXRPC_CALL_EV_ACK, &call->events)) { - _debug("send ACK: window: %d - %d { %lx }", - call->rx_data_eaten, call->ackr_win_top, - call->ackr_window[0]); - - if (call->state > RXRPC_CALL_SERVER_ACK_REQUEST && - call->ackr_reason != RXRPC_ACK_PING_RESPONSE) { - /* ACK by sending reply DATA packet in this state */ - clear_bit(RXRPC_CALL_EV_ACK, &call->events); - goto maybe_reschedule; - } - - genbit = RXRPC_CALL_EV_ACK; - - acks = kzalloc(call->ackr_win_top - call->rx_data_eaten, - GFP_NOFS); - if (!acks) - goto no_mem; - - //hdr.flags = RXRPC_SLOW_START_OK; - ack.bufferSpace = htons(8); - ack.maxSkew = 0; - - spin_lock_bh(&call->lock); - ack.reason = call->ackr_reason; - ack.serial = htonl(call->ackr_serial); - ack.previousPacket = htonl(call->ackr_prev_seq); - ack.firstPacket = htonl(call->rx_data_eaten + 1); - - ack.nAcks = 0; - for (loop = 0; loop < RXRPC_ACKR_WINDOW_ASZ; loop++) { - nbit = loop * BITS_PER_LONG; - for (bits = call->ackr_window[loop]; bits; bits >>= 1 - ) { - _debug("- l=%d n=%d b=%lx", loop, nbit, bits); - if (bits & 1) { - acks[nbit] = RXRPC_ACK_TYPE_ACK; - ack.nAcks = nbit + 1; - } - nbit++; - } - } - call->ackr_reason = 0; - spin_unlock_bh(&call->lock); - - pad = 0; - - iov[1].iov_base = &ack; - iov[1].iov_len = sizeof(ack); - iov[2].iov_base = acks; - iov[2].iov_len = ack.nAcks; - iov[3].iov_base = &pad; - iov[3].iov_len = 3; - iov[4].iov_base = &ackinfo; - iov[4].iov_len = sizeof(ackinfo); - - switch (ack.reason) { - case RXRPC_ACK_REQUESTED: - case RXRPC_ACK_DUPLICATE: - case RXRPC_ACK_OUT_OF_SEQUENCE: - case RXRPC_ACK_EXCEEDS_WINDOW: - case RXRPC_ACK_NOSPACE: - case RXRPC_ACK_PING: - case RXRPC_ACK_PING_RESPONSE: - goto send_ACK_with_skew; - case RXRPC_ACK_DELAY: - case RXRPC_ACK_IDLE: - goto send_ACK; - } + goto recheck_state; } - /* handle completion of security negotiations on an incoming - * connection */ - if (test_and_clear_bit(RXRPC_CALL_EV_SECURED, &call->events)) { - _debug("secured"); - spin_lock_bh(&call->lock); - - if (call->state == RXRPC_CALL_SERVER_SECURING) { - struct rxrpc_sock *rx; - _debug("securing"); - rcu_read_lock(); - rx = rcu_dereference(call->socket); - if (rx) { - write_lock(&rx->call_lock); - if (!test_bit(RXRPC_CALL_RELEASED, &call->flags)) { - _debug("not released"); - call->state = RXRPC_CALL_SERVER_ACCEPTING; - list_move_tail(&call->accept_link, - &rx->acceptq); - } - write_unlock(&rx->call_lock); - } - rcu_read_unlock(); - read_lock(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE) - set_bit(RXRPC_CALL_EV_POST_ACCEPT, &call->events); - read_unlock(&call->state_lock); - } - - spin_unlock_bh(&call->lock); - if (!test_bit(RXRPC_CALL_EV_POST_ACCEPT, &call->events)) - goto maybe_reschedule; - } - - /* post a notification of an acceptable connection to the app */ - if (test_bit(RXRPC_CALL_EV_POST_ACCEPT, &call->events)) { - _debug("post accept"); - if (rxrpc_post_message(call, RXRPC_SKB_MARK_NEW_CALL, - 0, false) < 0) - goto no_mem; - clear_bit(RXRPC_CALL_EV_POST_ACCEPT, &call->events); - goto maybe_reschedule; - } - - /* handle incoming call acceptance */ - if (test_and_clear_bit(RXRPC_CALL_EV_ACCEPTED, &call->events)) { - _debug("accepted"); - ASSERTCMP(call->rx_data_post, ==, 0); - call->rx_data_post = 1; - read_lock_bh(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE) - set_bit(RXRPC_CALL_EV_DRAIN_RX_OOS, &call->events); - read_unlock_bh(&call->state_lock); - } - - /* drain the out of sequence received packet queue into the packet Rx - * queue */ - if (test_and_clear_bit(RXRPC_CALL_EV_DRAIN_RX_OOS, &call->events)) { - while (call->rx_data_post == call->rx_first_oos) - if (rxrpc_drain_rx_oos_queue(call) < 0) - break; - goto maybe_reschedule; - } + rxrpc_set_timer(call); /* other events may have been raised since we started checking */ - goto maybe_reschedule; - -send_ACK_with_skew: - ack.maxSkew = htons(call->ackr_skew); -send_ACK: - mtu = call->peer->if_mtu; - mtu -= call->peer->hdrsize; - ackinfo.maxMTU = htonl(mtu); - ackinfo.rwind = htonl(rxrpc_rx_window_size); - - /* permit the peer to send us jumbo packets if it wants to */ - ackinfo.rxMTU = htonl(rxrpc_rx_mtu); - ackinfo.jumbo_max = htonl(rxrpc_rx_jumbo_max); - - serial = atomic_inc_return(&call->conn->serial); - whdr.serial = htonl(serial); - _proto("Tx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }", - serial, - ntohs(ack.maxSkew), - ntohl(ack.firstPacket), - ntohl(ack.previousPacket), - ntohl(ack.serial), - rxrpc_acks(ack.reason), - ack.nAcks); - - del_timer_sync(&call->ack_timer); - if (ack.nAcks > 0) - set_bit(RXRPC_CALL_TX_SOFT_ACK, &call->flags); - goto send_message_2; - -send_message: - _debug("send message"); - - serial = atomic_inc_return(&call->conn->serial); - whdr.serial = htonl(serial); - _proto("Tx %s %%%u", rxrpc_pkts[whdr.type], serial); -send_message_2: - - len = iov[0].iov_len; - ioc = 1; - if (iov[4].iov_len) { - ioc = 5; - len += iov[4].iov_len; - len += iov[3].iov_len; - len += iov[2].iov_len; - len += iov[1].iov_len; - } else if (iov[3].iov_len) { - ioc = 4; - len += iov[3].iov_len; - len += iov[2].iov_len; - len += iov[1].iov_len; - } else if (iov[2].iov_len) { - ioc = 3; - len += iov[2].iov_len; - len += iov[1].iov_len; - } else if (iov[1].iov_len) { - ioc = 2; - len += iov[1].iov_len; - } - - ret = kernel_sendmsg(call->conn->params.local->socket, - &msg, iov, ioc, len); - if (ret < 0) { - _debug("sendmsg failed: %d", ret); - if (call->state < RXRPC_CALL_COMPLETE) - requeue = true; - goto error; - } - - switch (genbit) { - case RXRPC_CALL_EV_ABORT: - clear_bit(genbit, &call->events); - clear_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events); - goto kill_ACKs; - - case RXRPC_CALL_EV_ACK_FINAL: - rxrpc_call_completed(call); - goto kill_ACKs; - - default: - clear_bit(genbit, &call->events); - switch (call->state) { - case RXRPC_CALL_CLIENT_AWAIT_REPLY: - case RXRPC_CALL_CLIENT_RECV_REPLY: - case RXRPC_CALL_SERVER_RECV_REQUEST: - case RXRPC_CALL_SERVER_ACK_REQUEST: - _debug("start ACK timer"); - rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, - call->ackr_skew, call->ackr_serial, - false); - default: - break; - } - goto maybe_reschedule; - } - -kill_ACKs: - del_timer_sync(&call->ack_timer); - clear_bit(RXRPC_CALL_EV_ACK, &call->events); - -maybe_reschedule: - if (call->events || !skb_queue_empty(&call->rx_queue)) { - if (call->state < RXRPC_CALL_COMPLETE) - requeue = true; - } - -error: - kfree(acks); - - if ((requeue || call->events) && !work_pending(&call->processor)) { - _debug("jumpstart %x", call->conn->proto.cid); + if (call->events && call->state < RXRPC_CALL_COMPLETE) { __rxrpc_queue_call(call); - } else { - rxrpc_put_call(call, rxrpc_call_put); + goto out; } +out_put: + rxrpc_put_call(call, rxrpc_call_put); +out: _leave(""); - return; - -no_mem: - _debug("out of memory"); - goto maybe_reschedule; } diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index d233adc9b5e5..18ab13f82f6e 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -30,7 +30,6 @@ const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = { [RXRPC_CALL_CLIENT_SEND_REQUEST] = "ClSndReq", [RXRPC_CALL_CLIENT_AWAIT_REPLY] = "ClAwtRpl", [RXRPC_CALL_CLIENT_RECV_REPLY] = "ClRcvRpl", - [RXRPC_CALL_CLIENT_FINAL_ACK] = "ClFnlACK", [RXRPC_CALL_SERVER_PREALLOC] = "SvPrealc", [RXRPC_CALL_SERVER_SECURING] = "SvSecure", [RXRPC_CALL_SERVER_ACCEPTING] = "SvAccept", @@ -43,7 +42,6 @@ const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = { const char *const rxrpc_call_completions[NR__RXRPC_CALL_COMPLETIONS] = { [RXRPC_CALL_SUCCEEDED] = "Complete", - [RXRPC_CALL_SERVER_BUSY] = "SvBusy ", [RXRPC_CALL_REMOTELY_ABORTED] = "RmtAbort", [RXRPC_CALL_LOCALLY_ABORTED] = "LocAbort", [RXRPC_CALL_LOCAL_ERROR] = "LocError", @@ -57,10 +55,8 @@ const char rxrpc_call_traces[rxrpc_call__nr_trace][4] = { [rxrpc_call_queued_ref] = "QUR", [rxrpc_call_seen] = "SEE", [rxrpc_call_got] = "GOT", - [rxrpc_call_got_skb] = "Gsk", [rxrpc_call_got_userid] = "Gus", [rxrpc_call_put] = "PUT", - [rxrpc_call_put_skb] = "Psk", [rxrpc_call_put_userid] = "Pus", [rxrpc_call_put_noqueue] = "PNQ", }; @@ -69,9 +65,15 @@ struct kmem_cache *rxrpc_call_jar; LIST_HEAD(rxrpc_calls); DEFINE_RWLOCK(rxrpc_call_lock); -static void rxrpc_call_life_expired(unsigned long _call); -static void rxrpc_ack_time_expired(unsigned long _call); -static void rxrpc_resend_time_expired(unsigned long _call); +static void rxrpc_call_timer_expired(unsigned long _call) +{ + struct rxrpc_call *call = (struct rxrpc_call *)_call; + + _enter("%d", call->debug_id); + + if (call->state < RXRPC_CALL_COMPLETE) + rxrpc_queue_call(call); +} /* * find an extant server call @@ -121,27 +123,24 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) if (!call) return NULL; - call->acks_winsz = 16; - call->acks_window = kmalloc(call->acks_winsz * sizeof(unsigned long), + call->rxtx_buffer = kcalloc(RXRPC_RXTX_BUFF_SIZE, + sizeof(struct sk_buff *), gfp); - if (!call->acks_window) { - kmem_cache_free(rxrpc_call_jar, call); - return NULL; - } + if (!call->rxtx_buffer) + goto nomem; - setup_timer(&call->lifetimer, &rxrpc_call_life_expired, - (unsigned long) call); - setup_timer(&call->ack_timer, &rxrpc_ack_time_expired, - (unsigned long) call); - setup_timer(&call->resend_timer, &rxrpc_resend_time_expired, - (unsigned long) call); + call->rxtx_annotations = kcalloc(RXRPC_RXTX_BUFF_SIZE, sizeof(u8), gfp); + if (!call->rxtx_annotations) + goto nomem_2; + + setup_timer(&call->timer, rxrpc_call_timer_expired, + (unsigned long)call); INIT_WORK(&call->processor, &rxrpc_process_call); INIT_LIST_HEAD(&call->link); INIT_LIST_HEAD(&call->chan_wait_link); INIT_LIST_HEAD(&call->accept_link); - skb_queue_head_init(&call->rx_queue); - skb_queue_head_init(&call->rx_oos_queue); - skb_queue_head_init(&call->knlrecv_queue); + INIT_LIST_HEAD(&call->recvmsg_link); + INIT_LIST_HEAD(&call->sock_link); init_waitqueue_head(&call->waitq); spin_lock_init(&call->lock); rwlock_init(&call->state_lock); @@ -150,63 +149,52 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) memset(&call->sock_node, 0xed, sizeof(call->sock_node)); - call->rx_data_expect = 1; - call->rx_data_eaten = 0; - call->rx_first_oos = 0; - call->ackr_win_top = call->rx_data_eaten + 1 + rxrpc_rx_window_size; - call->creation_jif = jiffies; + /* Leave space in the ring to handle a maxed-out jumbo packet */ + call->rx_winsize = RXRPC_RXTX_BUFF_SIZE - 1 - 46; + call->tx_winsize = 16; + call->rx_expect_next = 1; return call; + +nomem_2: + kfree(call->rxtx_buffer); +nomem: + kmem_cache_free(rxrpc_call_jar, call); + return NULL; } /* * Allocate a new client call. */ -static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx, - struct sockaddr_rxrpc *srx, +static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx, gfp_t gfp) { struct rxrpc_call *call; _enter(""); - ASSERT(rx->local != NULL); - call = rxrpc_alloc_call(gfp); if (!call) return ERR_PTR(-ENOMEM); call->state = RXRPC_CALL_CLIENT_AWAIT_CONN; - call->rx_data_post = 1; call->service_id = srx->srx_service; - rcu_assign_pointer(call->socket, rx); _leave(" = %p", call); return call; } /* - * Begin client call. + * Initiate the call ack/resend/expiry timer. */ -static int rxrpc_begin_client_call(struct rxrpc_call *call, - struct rxrpc_conn_parameters *cp, - struct sockaddr_rxrpc *srx, - gfp_t gfp) +static void rxrpc_start_call_timer(struct rxrpc_call *call) { - int ret; + unsigned long expire_at; - /* Set up or get a connection record and set the protocol parameters, - * including channel number and call ID. - */ - ret = rxrpc_connect_call(call, cp, srx, gfp); - if (ret < 0) - return ret; - - spin_lock(&call->conn->params.peer->lock); - hlist_add_head(&call->error_link, &call->conn->params.peer->error_targets); - spin_unlock(&call->conn->params.peer->lock); - - call->lifetimer.expires = jiffies + rxrpc_max_call_lifetime; - add_timer(&call->lifetimer); - return 0; + expire_at = jiffies + rxrpc_max_call_lifetime; + call->expire_at = expire_at; + call->ack_at = expire_at; + call->resend_at = expire_at; + call->timer.expires = expire_at; + add_timer(&call->timer); } /* @@ -226,7 +214,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, _enter("%p,%lx", rx, user_call_ID); - call = rxrpc_alloc_client_call(rx, srx, gfp); + call = rxrpc_alloc_client_call(srx, gfp); if (IS_ERR(call)) { _leave(" = %ld", PTR_ERR(call)); return call; @@ -255,19 +243,32 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, goto found_user_ID_now_present; } + rcu_assign_pointer(call->socket, rx); rxrpc_get_call(call, rxrpc_call_got_userid); rb_link_node(&call->sock_node, parent, pp); rb_insert_color(&call->sock_node, &rx->calls); + list_add(&call->sock_link, &rx->sock_calls); + write_unlock(&rx->call_lock); - write_lock_bh(&rxrpc_call_lock); + write_lock(&rxrpc_call_lock); list_add_tail(&call->link, &rxrpc_calls); - write_unlock_bh(&rxrpc_call_lock); + write_unlock(&rxrpc_call_lock); - ret = rxrpc_begin_client_call(call, cp, srx, gfp); + /* Set up or get a connection record and set the protocol parameters, + * including channel number and call ID. + */ + ret = rxrpc_connect_call(call, cp, srx, gfp); if (ret < 0) goto error; + spin_lock_bh(&call->conn->params.peer->lock); + hlist_add_head(&call->error_link, + &call->conn->params.peer->error_targets); + spin_unlock_bh(&call->conn->params.peer->lock); + + rxrpc_start_call_timer(call); + _net("CALL new %d on CONN %d", call->debug_id, call->conn->debug_id); _leave(" = %p [new]", call); @@ -279,9 +280,9 @@ error: write_unlock(&rx->call_lock); rxrpc_put_call(call, rxrpc_call_put_userid); - write_lock_bh(&rxrpc_call_lock); + write_lock(&rxrpc_call_lock); list_del_init(&call->link); - write_unlock_bh(&rxrpc_call_lock); + write_unlock(&rxrpc_call_lock); error_out: __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, @@ -303,142 +304,46 @@ found_user_ID_now_present: } /* - * set up an incoming call - * - called in process context with IRQs enabled + * Set up an incoming call. call->conn points to the connection. + * This is called in BH context and isn't allowed to fail. */ -struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx, - struct rxrpc_connection *conn, - struct sk_buff *skb) +void rxrpc_incoming_call(struct rxrpc_sock *rx, + struct rxrpc_call *call, + struct sk_buff *skb) { + struct rxrpc_connection *conn = call->conn; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - struct rxrpc_call *call, *candidate; - const void *here = __builtin_return_address(0); - u32 call_id, chan; + u32 chan; - _enter(",%d", conn->debug_id); + _enter(",%d", call->conn->debug_id); - ASSERT(rx != NULL); + rcu_assign_pointer(call->socket, rx); + call->call_id = sp->hdr.callNumber; + call->service_id = sp->hdr.serviceId; + call->cid = sp->hdr.cid; + call->state = RXRPC_CALL_SERVER_ACCEPTING; + if (sp->hdr.securityIndex > 0) + call->state = RXRPC_CALL_SERVER_SECURING; - candidate = rxrpc_alloc_call(GFP_NOIO); - if (!candidate) - return ERR_PTR(-EBUSY); - - trace_rxrpc_call(candidate, rxrpc_call_new_service, - atomic_read(&candidate->usage), here, NULL); - - chan = sp->hdr.cid & RXRPC_CHANNELMASK; - candidate->conn = conn; - candidate->peer = conn->params.peer; - candidate->cid = sp->hdr.cid; - candidate->call_id = sp->hdr.callNumber; - candidate->security_ix = sp->hdr.securityIndex; - candidate->rx_data_post = 0; - candidate->state = RXRPC_CALL_SERVER_ACCEPTING; - candidate->flags |= (1 << RXRPC_CALL_IS_SERVICE); - if (conn->security_ix > 0) - candidate->state = RXRPC_CALL_SERVER_SECURING; - rcu_assign_pointer(candidate->socket, rx); - - spin_lock(&conn->channel_lock); - - /* set the channel for this call */ - call = rcu_dereference_protected(conn->channels[chan].call, - lockdep_is_held(&conn->channel_lock)); - - _debug("channel[%u] is %p", candidate->cid & RXRPC_CHANNELMASK, call); - if (call && call->call_id == sp->hdr.callNumber) { - /* already set; must've been a duplicate packet */ - _debug("extant call [%d]", call->state); - ASSERTCMP(call->conn, ==, conn); - - read_lock(&call->state_lock); - switch (call->state) { - case RXRPC_CALL_LOCALLY_ABORTED: - if (!test_and_set_bit(RXRPC_CALL_EV_ABORT, &call->events)) - rxrpc_queue_call(call); - case RXRPC_CALL_REMOTELY_ABORTED: - read_unlock(&call->state_lock); - goto aborted_call; - default: - rxrpc_get_call(call, rxrpc_call_got); - read_unlock(&call->state_lock); - goto extant_call; - } - } - - if (call) { - /* it seems the channel is still in use from the previous call - * - ditch the old binding if its call is now complete */ - _debug("CALL: %u { %s }", - call->debug_id, rxrpc_call_states[call->state]); - - if (call->state == RXRPC_CALL_COMPLETE) { - __rxrpc_disconnect_call(conn, call); - } else { - spin_unlock(&conn->channel_lock); - kmem_cache_free(rxrpc_call_jar, candidate); - _leave(" = -EBUSY"); - return ERR_PTR(-EBUSY); - } - } - - /* check the call number isn't duplicate */ - _debug("check dup"); - call_id = sp->hdr.callNumber; - - /* We just ignore calls prior to the current call ID. Terminated calls - * are handled via the connection. + /* Set the channel for this call. We don't get channel_lock as we're + * only defending against the data_ready handler (which we're called + * from) and the RESPONSE packet parser (which is only really + * interested in call_counter and can cope with a disagreement with the + * call pointer). */ - if (call_id <= conn->channels[chan].call_counter) - goto old_call; /* TODO: Just drop packet */ - - /* Temporary: Mirror the backlog prealloc ref (TODO: use prealloc) */ - rxrpc_get_call(candidate, rxrpc_call_got); - - /* make the call available */ - _debug("new call"); - call = candidate; - candidate = NULL; - conn->channels[chan].call_counter = call_id; + chan = sp->hdr.cid & RXRPC_CHANNELMASK; + conn->channels[chan].call_counter = call->call_id; + conn->channels[chan].call_id = call->call_id; rcu_assign_pointer(conn->channels[chan].call, call); - rxrpc_get_connection(conn); - rxrpc_get_peer(call->peer); - spin_unlock(&conn->channel_lock); spin_lock(&conn->params.peer->lock); hlist_add_head(&call->error_link, &conn->params.peer->error_targets); spin_unlock(&conn->params.peer->lock); - write_lock_bh(&rxrpc_call_lock); - list_add_tail(&call->link, &rxrpc_calls); - write_unlock_bh(&rxrpc_call_lock); - - call->service_id = conn->params.service_id; - _net("CALL incoming %d on CONN %d", call->debug_id, call->conn->debug_id); - call->lifetimer.expires = jiffies + rxrpc_max_call_lifetime; - add_timer(&call->lifetimer); - _leave(" = %p {%d} [new]", call, call->debug_id); - return call; - -extant_call: - spin_unlock(&conn->channel_lock); - kmem_cache_free(rxrpc_call_jar, candidate); - _leave(" = %p {%d} [extant]", call, call ? call->debug_id : -1); - return call; - -aborted_call: - spin_unlock(&conn->channel_lock); - kmem_cache_free(rxrpc_call_jar, candidate); - _leave(" = -ECONNABORTED"); - return ERR_PTR(-ECONNABORTED); - -old_call: - spin_unlock(&conn->channel_lock); - kmem_cache_free(rxrpc_call_jar, candidate); - _leave(" = -ECONNRESET [old]"); - return ERR_PTR(-ECONNRESET); + rxrpc_start_call_timer(call); + _leave(""); } /* @@ -497,25 +402,17 @@ void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op) } /* - * Note the addition of a ref on a call for a socket buffer. - */ -void rxrpc_get_call_for_skb(struct rxrpc_call *call, struct sk_buff *skb) -{ - const void *here = __builtin_return_address(0); - int n = atomic_inc_return(&call->usage); - - trace_rxrpc_call(call, rxrpc_call_got_skb, n, here, skb); -} - -/* - * detach a call from a socket and set up for release + * Detach a call from its owning socket. */ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) { - _enter("{%d,%d,%d,%d}", - call->debug_id, atomic_read(&call->usage), - atomic_read(&call->ackr_not_idle), - call->rx_first_oos); + struct rxrpc_connection *conn = call->conn; + bool put = false; + int i; + + _enter("{%d,%d}", call->debug_id, atomic_read(&call->usage)); + + ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); rxrpc_see_call(call); @@ -524,80 +421,46 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) BUG(); spin_unlock_bh(&call->lock); - /* dissociate from the socket - * - the socket's ref on the call is passed to the death timer - */ - _debug("RELEASE CALL %p (%d)", call, call->debug_id); + del_timer_sync(&call->timer); - if (call->peer) { - spin_lock(&call->peer->lock); - hlist_del_init(&call->error_link); - spin_unlock(&call->peer->lock); - } + /* Make sure we don't get any more notifications */ + write_lock_bh(&rx->recvmsg_lock); - write_lock_bh(&rx->call_lock); - if (!list_empty(&call->accept_link)) { + if (!list_empty(&call->recvmsg_link)) { _debug("unlinking once-pending call %p { e=%lx f=%lx }", call, call->events, call->flags); - ASSERT(!test_bit(RXRPC_CALL_HAS_USERID, &call->flags)); - list_del_init(&call->accept_link); - sk_acceptq_removed(&rx->sk); - } else if (test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) { + list_del(&call->recvmsg_link); + put = true; + } + + /* list_empty() must return false in rxrpc_notify_socket() */ + call->recvmsg_link.next = NULL; + call->recvmsg_link.prev = NULL; + + write_unlock_bh(&rx->recvmsg_lock); + if (put) + rxrpc_put_call(call, rxrpc_call_put); + + write_lock(&rx->call_lock); + + if (test_and_clear_bit(RXRPC_CALL_HAS_USERID, &call->flags)) { rb_erase(&call->sock_node, &rx->calls); memset(&call->sock_node, 0xdd, sizeof(call->sock_node)); - clear_bit(RXRPC_CALL_HAS_USERID, &call->flags); rxrpc_put_call(call, rxrpc_call_put_userid); } - write_unlock_bh(&rx->call_lock); - /* free up the channel for reuse */ - if (call->state == RXRPC_CALL_CLIENT_FINAL_ACK) { - clear_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events); - rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); - rxrpc_call_completed(call); - } else { - write_lock_bh(&call->state_lock); + list_del(&call->sock_link); + write_unlock(&rx->call_lock); - if (call->state < RXRPC_CALL_COMPLETE) { - _debug("+++ ABORTING STATE %d +++\n", call->state); - __rxrpc_abort_call("SKT", call, 0, RX_CALL_DEAD, ECONNRESET); - clear_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events); - rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); - } + _debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn); - write_unlock_bh(&call->state_lock); - } - - if (call->conn) + if (conn) rxrpc_disconnect_call(call); - /* clean up the Rx queue */ - if (!skb_queue_empty(&call->rx_queue) || - !skb_queue_empty(&call->rx_oos_queue)) { - struct rxrpc_skb_priv *sp; - struct sk_buff *skb; - - _debug("purge Rx queues"); - - spin_lock_bh(&call->lock); - while ((skb = skb_dequeue(&call->rx_queue)) || - (skb = skb_dequeue(&call->rx_oos_queue))) { - spin_unlock_bh(&call->lock); - - sp = rxrpc_skb(skb); - _debug("- zap %s %%%u #%u", - rxrpc_pkts[sp->hdr.type], - sp->hdr.serial, sp->hdr.seq); - rxrpc_free_skb(skb); - spin_lock_bh(&call->lock); - } - spin_unlock_bh(&call->lock); + for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) { + rxrpc_free_skb(call->rxtx_buffer[i]); + call->rxtx_buffer[i] = NULL; } - rxrpc_purge_queue(&call->knlrecv_queue); - - del_timer_sync(&call->resend_timer); - del_timer_sync(&call->ack_timer); - del_timer_sync(&call->lifetimer); /* We have to release the prealloc backlog ref */ if (rxrpc_is_service_call(call)) @@ -611,28 +474,19 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) { struct rxrpc_call *call; - struct rb_node *p; _enter("%p", rx); - read_lock_bh(&rx->call_lock); - - /* kill the not-yet-accepted incoming calls */ - list_for_each_entry(call, &rx->secureq, accept_link) { + while (!list_empty(&rx->sock_calls)) { + call = list_entry(rx->sock_calls.next, + struct rxrpc_call, sock_link); + rxrpc_get_call(call, rxrpc_call_got); + rxrpc_abort_call("SKT", call, 0, RX_CALL_DEAD, ECONNRESET); + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); rxrpc_release_call(rx, call); + rxrpc_put_call(call, rxrpc_call_put); } - list_for_each_entry(call, &rx->acceptq, accept_link) { - rxrpc_release_call(rx, call); - } - - /* mark all the calls as no longer wanting incoming packets */ - for (p = rb_first(&rx->calls); p; p = rb_next(p)) { - call = rb_entry(p, struct rxrpc_call, sock_node); - rxrpc_release_call(rx, call); - } - - read_unlock_bh(&rx->call_lock); _leave(""); } @@ -651,23 +505,12 @@ void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op) ASSERTCMP(n, >=, 0); if (n == 0) { _debug("call %d dead", call->debug_id); - rxrpc_cleanup_call(call); - } -} + ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); -/* - * Release a call ref held by a socket buffer. - */ -void rxrpc_put_call_for_skb(struct rxrpc_call *call, struct sk_buff *skb) -{ - const void *here = __builtin_return_address(0); - int n; + write_lock(&rxrpc_call_lock); + list_del_init(&call->link); + write_unlock(&rxrpc_call_lock); - n = atomic_dec_return(&call->usage); - trace_rxrpc_call(call, rxrpc_call_put_skb, n, here, skb); - ASSERTCMP(n, >=, 0); - if (n == 0) { - _debug("call %d dead", call->debug_id); rxrpc_cleanup_call(call); } } @@ -679,9 +522,9 @@ static void rxrpc_rcu_destroy_call(struct rcu_head *rcu) { struct rxrpc_call *call = container_of(rcu, struct rxrpc_call, rcu); - rxrpc_purge_queue(&call->rx_queue); - rxrpc_purge_queue(&call->knlrecv_queue); rxrpc_put_peer(call->peer); + kfree(call->rxtx_buffer); + kfree(call->rxtx_annotations); kmem_cache_free(rxrpc_call_jar, call); } @@ -690,49 +533,24 @@ static void rxrpc_rcu_destroy_call(struct rcu_head *rcu) */ void rxrpc_cleanup_call(struct rxrpc_call *call) { - _net("DESTROY CALL %d", call->debug_id); + int i; - write_lock_bh(&rxrpc_call_lock); - list_del_init(&call->link); - write_unlock_bh(&rxrpc_call_lock); + _net("DESTROY CALL %d", call->debug_id); memset(&call->sock_node, 0xcd, sizeof(call->sock_node)); - del_timer_sync(&call->lifetimer); - del_timer_sync(&call->ack_timer); - del_timer_sync(&call->resend_timer); + del_timer_sync(&call->timer); ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags)); - ASSERT(!work_pending(&call->processor)); ASSERTCMP(call->conn, ==, NULL); - if (call->acks_window) { - _debug("kill Tx window %d", - CIRC_CNT(call->acks_head, call->acks_tail, - call->acks_winsz)); - smp_mb(); - while (CIRC_CNT(call->acks_head, call->acks_tail, - call->acks_winsz) > 0) { - struct rxrpc_skb_priv *sp; - unsigned long _skb; - - _skb = call->acks_window[call->acks_tail] & ~1; - sp = rxrpc_skb((struct sk_buff *)_skb); - _debug("+++ clear Tx %u", sp->hdr.seq); - rxrpc_free_skb((struct sk_buff *)_skb); - call->acks_tail = - (call->acks_tail + 1) & (call->acks_winsz - 1); - } - - kfree(call->acks_window); - } + /* Clean up the Rx/Tx buffer */ + for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) + rxrpc_free_skb(call->rxtx_buffer[i]); rxrpc_free_skb(call->tx_pending); - rxrpc_purge_queue(&call->rx_queue); - ASSERT(skb_queue_empty(&call->rx_oos_queue)); - rxrpc_purge_queue(&call->knlrecv_queue); call_rcu(&call->rcu, rxrpc_rcu_destroy_call); } @@ -747,8 +565,8 @@ void __exit rxrpc_destroy_all_calls(void) if (list_empty(&rxrpc_calls)) return; - - write_lock_bh(&rxrpc_call_lock); + + write_lock(&rxrpc_call_lock); while (!list_empty(&rxrpc_calls)) { call = list_entry(rxrpc_calls.next, struct rxrpc_call, link); @@ -757,74 +575,15 @@ void __exit rxrpc_destroy_all_calls(void) rxrpc_see_call(call); list_del_init(&call->link); - pr_err("Call %p still in use (%d,%d,%s,%lx,%lx)!\n", + pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n", call, atomic_read(&call->usage), - atomic_read(&call->ackr_not_idle), rxrpc_call_states[call->state], call->flags, call->events); - if (!skb_queue_empty(&call->rx_queue)) - pr_err("Rx queue occupied\n"); - if (!skb_queue_empty(&call->rx_oos_queue)) - pr_err("OOS queue occupied\n"); - write_unlock_bh(&rxrpc_call_lock); + write_unlock(&rxrpc_call_lock); cond_resched(); - write_lock_bh(&rxrpc_call_lock); + write_lock(&rxrpc_call_lock); } - write_unlock_bh(&rxrpc_call_lock); - _leave(""); -} - -/* - * handle call lifetime being exceeded - */ -static void rxrpc_call_life_expired(unsigned long _call) -{ - struct rxrpc_call *call = (struct rxrpc_call *) _call; - - _enter("{%d}", call->debug_id); - - rxrpc_see_call(call); - if (call->state >= RXRPC_CALL_COMPLETE) - return; - - set_bit(RXRPC_CALL_EV_LIFE_TIMER, &call->events); - rxrpc_queue_call(call); -} - -/* - * handle resend timer expiry - * - may not take call->state_lock as this can deadlock against del_timer_sync() - */ -static void rxrpc_resend_time_expired(unsigned long _call) -{ - struct rxrpc_call *call = (struct rxrpc_call *) _call; - - _enter("{%d}", call->debug_id); - - rxrpc_see_call(call); - if (call->state >= RXRPC_CALL_COMPLETE) - return; - - clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); - if (!test_and_set_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events)) - rxrpc_queue_call(call); -} - -/* - * handle ACK timer expiry - */ -static void rxrpc_ack_time_expired(unsigned long _call) -{ - struct rxrpc_call *call = (struct rxrpc_call *) _call; - - _enter("{%d}", call->debug_id); - - rxrpc_see_call(call); - if (call->state >= RXRPC_CALL_COMPLETE) - return; - - if (!test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events)) - rxrpc_queue_call(call); + write_unlock(&rxrpc_call_lock); } diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 8c7938ba6a84..0691007cfc02 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -15,10 +15,6 @@ #include #include #include -#include -#include -#include -#include #include #include #include @@ -140,16 +136,10 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, u32 abort_code, int error) { struct rxrpc_call *call; - bool queue; - int i, bit; + int i; _enter("{%d},%x", conn->debug_id, abort_code); - if (compl == RXRPC_CALL_LOCALLY_ABORTED) - bit = RXRPC_CALL_EV_CONN_ABORT; - else - bit = RXRPC_CALL_EV_RCVD_ABORT; - spin_lock(&conn->channel_lock); for (i = 0; i < RXRPC_MAXCALLS; i++) { @@ -157,22 +147,13 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, conn->channels[i].call, lockdep_is_held(&conn->channel_lock)); if (call) { - rxrpc_see_call(call); if (compl == RXRPC_CALL_LOCALLY_ABORTED) trace_rxrpc_abort("CON", call->cid, call->call_id, 0, abort_code, error); - - write_lock_bh(&call->state_lock); - if (rxrpc_set_call_completion(call, compl, abort_code, - error)) { - set_bit(bit, &call->events); - queue = true; - } - write_unlock_bh(&call->state_lock); - if (queue) - rxrpc_queue_call(call); - + if (rxrpc_set_call_completion(call, compl, + abort_code, error)) + rxrpc_notify_socket(call); } } @@ -251,17 +232,18 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, /* * mark a call as being on a now-secured channel - * - must be called with softirqs disabled + * - must be called with BH's disabled. */ static void rxrpc_call_is_secure(struct rxrpc_call *call) { _enter("%p", call); if (call) { - read_lock(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE && - !test_and_set_bit(RXRPC_CALL_EV_SECURED, &call->events)) - rxrpc_queue_call(call); - read_unlock(&call->state_lock); + write_lock_bh(&call->state_lock); + if (call->state == RXRPC_CALL_SERVER_SECURING) { + call->state = RXRPC_CALL_SERVER_ACCEPTING; + rxrpc_notify_socket(call); + } + write_unlock_bh(&call->state_lock); } } @@ -278,7 +260,7 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, int loop, ret; if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) { - kleave(" = -ECONNABORTED [%u]", conn->state); + _leave(" = -ECONNABORTED [%u]", conn->state); return -ECONNABORTED; } @@ -291,14 +273,14 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, return 0; case RXRPC_PACKET_TYPE_ABORT: - if (skb_copy_bits(skb, 0, &wtmp, sizeof(wtmp)) < 0) + if (skb_copy_bits(skb, sp->offset, &wtmp, sizeof(wtmp)) < 0) return -EPROTO; abort_code = ntohl(wtmp); _proto("Rx ABORT %%%u { ac=%d }", sp->hdr.serial, abort_code); conn->state = RXRPC_CONN_REMOTELY_ABORTED; - rxrpc_abort_calls(conn, 0, RXRPC_CALL_REMOTELY_ABORTED, - abort_code); + rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, + abort_code, ECONNABORTED); return -ECONNABORTED; case RXRPC_PACKET_TYPE_CHALLENGE: @@ -323,14 +305,16 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, if (conn->state == RXRPC_CONN_SERVICE_CHALLENGING) { conn->state = RXRPC_CONN_SERVICE; + spin_unlock(&conn->state_lock); for (loop = 0; loop < RXRPC_MAXCALLS; loop++) rxrpc_call_is_secure( rcu_dereference_protected( conn->channels[loop].call, lockdep_is_held(&conn->channel_lock))); + } else { + spin_unlock(&conn->state_lock); } - spin_unlock(&conn->state_lock); spin_unlock(&conn->channel_lock); return 0; @@ -433,88 +417,3 @@ protocol_error: _leave(" [EPROTO]"); goto out; } - -/* - * put a packet up for transport-level abort - */ -void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb) -{ - CHECK_SLAB_OKAY(&local->usage); - - skb_queue_tail(&local->reject_queue, skb); - rxrpc_queue_local(local); -} - -/* - * reject packets through the local endpoint - */ -void rxrpc_reject_packets(struct rxrpc_local *local) -{ - union { - struct sockaddr sa; - struct sockaddr_in sin; - } sa; - struct rxrpc_skb_priv *sp; - struct rxrpc_wire_header whdr; - struct sk_buff *skb; - struct msghdr msg; - struct kvec iov[2]; - size_t size; - __be32 code; - - _enter("%d", local->debug_id); - - iov[0].iov_base = &whdr; - iov[0].iov_len = sizeof(whdr); - iov[1].iov_base = &code; - iov[1].iov_len = sizeof(code); - size = sizeof(whdr) + sizeof(code); - - msg.msg_name = &sa; - msg.msg_control = NULL; - msg.msg_controllen = 0; - msg.msg_flags = 0; - - memset(&sa, 0, sizeof(sa)); - sa.sa.sa_family = local->srx.transport.family; - switch (sa.sa.sa_family) { - case AF_INET: - msg.msg_namelen = sizeof(sa.sin); - break; - default: - msg.msg_namelen = 0; - break; - } - - memset(&whdr, 0, sizeof(whdr)); - whdr.type = RXRPC_PACKET_TYPE_ABORT; - - while ((skb = skb_dequeue(&local->reject_queue))) { - rxrpc_see_skb(skb); - sp = rxrpc_skb(skb); - switch (sa.sa.sa_family) { - case AF_INET: - sa.sin.sin_port = udp_hdr(skb)->source; - sa.sin.sin_addr.s_addr = ip_hdr(skb)->saddr; - code = htonl(skb->priority); - - whdr.epoch = htonl(sp->hdr.epoch); - whdr.cid = htonl(sp->hdr.cid); - whdr.callNumber = htonl(sp->hdr.callNumber); - whdr.serviceId = htons(sp->hdr.serviceId); - whdr.flags = sp->hdr.flags; - whdr.flags ^= RXRPC_CLIENT_INITIATED; - whdr.flags &= RXRPC_CLIENT_INITIATED; - - kernel_sendmsg(local->socket, &msg, iov, 2, size); - break; - - default: - break; - } - - rxrpc_free_skb(skb); - } - - _leave(""); -} diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 8da82e3aa00e..ffa9addb97b2 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -169,7 +169,7 @@ void __rxrpc_disconnect_call(struct rxrpc_connection *conn, chan->last_abort = call->abort_code; chan->last_type = RXRPC_PACKET_TYPE_ABORT; } else { - chan->last_seq = call->rx_data_eaten; + chan->last_seq = call->rx_hard_ack; chan->last_type = RXRPC_PACKET_TYPE_ACK; } /* Sync with rxrpc_conn_retransmit(). */ @@ -191,6 +191,10 @@ void rxrpc_disconnect_call(struct rxrpc_call *call) { struct rxrpc_connection *conn = call->conn; + spin_lock_bh(&conn->params.peer->lock); + hlist_del_init(&call->error_link); + spin_unlock_bh(&conn->params.peer->lock); + if (rxrpc_is_client_call(call)) return rxrpc_disconnect_client_call(call); diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c index 189338a60457..83d54da4ce8b 100644 --- a/net/rxrpc/conn_service.c +++ b/net/rxrpc/conn_service.c @@ -65,9 +65,8 @@ done: * Insert a service connection into a peer's tree, thereby making it a target * for incoming packets. */ -static struct rxrpc_connection * -rxrpc_publish_service_conn(struct rxrpc_peer *peer, - struct rxrpc_connection *conn) +static void rxrpc_publish_service_conn(struct rxrpc_peer *peer, + struct rxrpc_connection *conn) { struct rxrpc_connection *cursor = NULL; struct rxrpc_conn_proto k = conn->proto; @@ -96,7 +95,7 @@ conn_published: set_bit(RXRPC_CONN_IN_SERVICE_CONNS, &conn->flags); write_sequnlock_bh(&peer->service_conn_lock); _leave(" = %d [new]", conn->debug_id); - return conn; + return; found_extant_conn: if (atomic_read(&cursor->usage) == 0) @@ -143,106 +142,30 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(gfp_t gfp) } /* - * get a record of an incoming connection + * Set up an incoming connection. This is called in BH context with the RCU + * read lock held. */ -struct rxrpc_connection *rxrpc_incoming_connection(struct rxrpc_local *local, - struct sockaddr_rxrpc *srx, - struct sk_buff *skb) +void rxrpc_new_incoming_connection(struct rxrpc_connection *conn, + struct sk_buff *skb) { - struct rxrpc_connection *conn; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - struct rxrpc_peer *peer; - const char *new = "old"; _enter(""); - peer = rxrpc_lookup_peer(local, srx, GFP_NOIO); - if (!peer) { - _debug("no peer"); - return ERR_PTR(-EBUSY); - } - - ASSERT(sp->hdr.flags & RXRPC_CLIENT_INITIATED); - - rcu_read_lock(); - peer = rxrpc_lookup_peer_rcu(local, srx); - if (peer) { - conn = rxrpc_find_service_conn_rcu(peer, skb); - if (conn) { - if (sp->hdr.securityIndex != conn->security_ix) - goto security_mismatch_rcu; - if (rxrpc_get_connection_maybe(conn)) - goto found_extant_connection_rcu; - - /* The conn has expired but we can't remove it without - * the appropriate lock, so we attempt to replace it - * when we have a new candidate. - */ - } - - if (!rxrpc_get_peer_maybe(peer)) - peer = NULL; - } - rcu_read_unlock(); - - if (!peer) { - peer = rxrpc_lookup_peer(local, srx, GFP_NOIO); - if (!peer) - goto enomem; - } - - /* We don't have a matching record yet. */ - conn = rxrpc_alloc_connection(GFP_NOIO); - if (!conn) - goto enomem_peer; - conn->proto.epoch = sp->hdr.epoch; conn->proto.cid = sp->hdr.cid & RXRPC_CIDMASK; - conn->params.local = local; - conn->params.peer = peer; conn->params.service_id = sp->hdr.serviceId; conn->security_ix = sp->hdr.securityIndex; conn->out_clientflag = 0; - conn->state = RXRPC_CONN_SERVICE; - if (conn->params.service_id) + if (conn->security_ix) conn->state = RXRPC_CONN_SERVICE_UNSECURED; - - rxrpc_get_local(local); - - /* We maintain an extra ref on the connection whilst it is on - * the rxrpc_connections list. - */ - atomic_set(&conn->usage, 2); - - write_lock(&rxrpc_connection_lock); - list_add_tail(&conn->link, &rxrpc_connections); - list_add_tail(&conn->proc_link, &rxrpc_connection_proc_list); - write_unlock(&rxrpc_connection_lock); + else + conn->state = RXRPC_CONN_SERVICE; /* Make the connection a target for incoming packets. */ - rxrpc_publish_service_conn(peer, conn); + rxrpc_publish_service_conn(conn->params.peer, conn); - new = "new"; - -success: - _net("CONNECTION %s %d {%x}", new, conn->debug_id, conn->proto.cid); - _leave(" = %p {u=%d}", conn, atomic_read(&conn->usage)); - return conn; - -found_extant_connection_rcu: - rcu_read_unlock(); - goto success; - -security_mismatch_rcu: - rcu_read_unlock(); - _leave(" = -EKEYREJECTED"); - return ERR_PTR(-EKEYREJECTED); - -enomem_peer: - rxrpc_put_peer(peer); -enomem: - _leave(" = -ENOMEM"); - return ERR_PTR(-ENOMEM); + _net("CONNECTION new %d {%x}", conn->debug_id, conn->proto.cid); } /* diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 5906579060cd..afeba98004b1 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -1,6 +1,6 @@ /* RxRPC packet reception * - * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2007, 2016 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -27,549 +27,547 @@ #include #include "ar-internal.h" -/* - * queue a packet for recvmsg to pass to userspace - * - the caller must hold a lock on call->lock - * - must not be called with interrupts disabled (sk_filter() disables BH's) - * - eats the packet whether successful or not - * - there must be just one reference to the packet, which the caller passes to - * this function - */ -int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb, - bool force, bool terminal) +static void rxrpc_proto_abort(const char *why, + struct rxrpc_call *call, rxrpc_seq_t seq) { - struct rxrpc_skb_priv *sp; - struct rxrpc_sock *rx; - struct sock *sk; - int ret; - - _enter(",,%d,%d", force, terminal); - - ASSERT(!irqs_disabled()); - - sp = rxrpc_skb(skb); - ASSERTCMP(sp->call, ==, call); - - /* if we've already posted the terminal message for a call, then we - * don't post any more */ - if (test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags)) { - _debug("already terminated"); - ASSERTCMP(call->state, >=, RXRPC_CALL_COMPLETE); - rxrpc_free_skb(skb); - return 0; + if (rxrpc_abort_call(why, call, seq, RX_PROTOCOL_ERROR, EBADMSG)) { + set_bit(RXRPC_CALL_EV_ABORT, &call->events); + rxrpc_queue_call(call); } - - /* The socket may go away under us */ - ret = 0; - rcu_read_lock(); - rx = rcu_dereference(call->socket); - if (!rx) - goto out; - sk = &rx->sk; - if (sock_flag(sk, SOCK_DEAD)) - goto out; - - if (!force) { - /* cast skb->rcvbuf to unsigned... It's pointless, but - * reduces number of warnings when compiling with -W - * --ANK */ -// ret = -ENOBUFS; -// if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= -// (unsigned int) sk->sk_rcvbuf) -// goto out; - - ret = sk_filter(sk, skb); - if (ret < 0) - goto out; - } - - spin_lock_bh(&sk->sk_receive_queue.lock); - if (!test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags) && - !test_bit(RXRPC_CALL_RELEASED, &call->flags) && - sk->sk_state != RXRPC_CLOSE) { - skb->destructor = rxrpc_packet_destructor; - skb->dev = NULL; - skb->sk = sk; - atomic_add(skb->truesize, &sk->sk_rmem_alloc); - - if (terminal) { - _debug("<<<< TERMINAL MESSAGE >>>>"); - set_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags); - } - - /* allow interception by a kernel service */ - if (skb->mark == RXRPC_SKB_MARK_NEW_CALL && - rx->notify_new_call) { - spin_unlock_bh(&sk->sk_receive_queue.lock); - skb_queue_tail(&call->knlrecv_queue, skb); - rx->notify_new_call(&rx->sk, NULL, 0); - } else if (call->notify_rx) { - spin_unlock_bh(&sk->sk_receive_queue.lock); - skb_queue_tail(&call->knlrecv_queue, skb); - call->notify_rx(&rx->sk, call, call->user_call_ID); - } else { - _net("post skb %p", skb); - __skb_queue_tail(&sk->sk_receive_queue, skb); - spin_unlock_bh(&sk->sk_receive_queue.lock); - - sk->sk_data_ready(sk); - } - skb = NULL; - } else { - spin_unlock_bh(&sk->sk_receive_queue.lock); - } - ret = 0; - -out: - rxrpc_free_skb(skb); - rcu_read_unlock(); - - _leave(" = %d", ret); - return ret; } /* - * process a DATA packet, posting the packet to the appropriate queue - * - eats the packet if successful + * Apply a hard ACK by advancing the Tx window. */ -static int rxrpc_fast_process_data(struct rxrpc_call *call, - struct sk_buff *skb, u32 seq) +static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to) { - struct rxrpc_skb_priv *sp; - bool terminal; - int ret, ackbit, ack; - u32 serial; - u16 skew; - u8 flags; - - _enter("{%u,%u},,{%u}", call->rx_data_post, call->rx_first_oos, seq); - - sp = rxrpc_skb(skb); - ASSERTCMP(sp->call, ==, NULL); - flags = sp->hdr.flags; - serial = sp->hdr.serial; - skew = skb->priority; + struct sk_buff *skb, *list = NULL; + int ix; spin_lock(&call->lock); - if (call->state > RXRPC_CALL_COMPLETE) - goto discard; - - ASSERTCMP(call->rx_data_expect, >=, call->rx_data_post); - ASSERTCMP(call->rx_data_post, >=, call->rx_data_recv); - ASSERTCMP(call->rx_data_recv, >=, call->rx_data_eaten); - - if (seq < call->rx_data_post) { - _debug("dup #%u [-%u]", seq, call->rx_data_post); - ack = RXRPC_ACK_DUPLICATE; - ret = -ENOBUFS; - goto discard_and_ack; - } - - /* we may already have the packet in the out of sequence queue */ - ackbit = seq - (call->rx_data_eaten + 1); - ASSERTCMP(ackbit, >=, 0); - if (__test_and_set_bit(ackbit, call->ackr_window)) { - _debug("dup oos #%u [%u,%u]", - seq, call->rx_data_eaten, call->rx_data_post); - ack = RXRPC_ACK_DUPLICATE; - goto discard_and_ack; - } - - if (seq >= call->ackr_win_top) { - _debug("exceed #%u [%u]", seq, call->ackr_win_top); - __clear_bit(ackbit, call->ackr_window); - ack = RXRPC_ACK_EXCEEDS_WINDOW; - goto discard_and_ack; - } - - if (seq == call->rx_data_expect) { - clear_bit(RXRPC_CALL_EXPECT_OOS, &call->flags); - call->rx_data_expect++; - } else if (seq > call->rx_data_expect) { - _debug("oos #%u [%u]", seq, call->rx_data_expect); - call->rx_data_expect = seq + 1; - if (test_and_set_bit(RXRPC_CALL_EXPECT_OOS, &call->flags)) { - ack = RXRPC_ACK_OUT_OF_SEQUENCE; - goto enqueue_and_ack; - } - goto enqueue_packet; - } - - if (seq != call->rx_data_post) { - _debug("ahead #%u [%u]", seq, call->rx_data_post); - goto enqueue_packet; - } - - if (test_bit(RXRPC_CALL_RCVD_LAST, &call->flags)) - goto protocol_error; - - /* if the packet need security things doing to it, then it goes down - * the slow path */ - if (call->security_ix) - goto enqueue_packet; - - sp->call = call; - rxrpc_get_call_for_skb(call, skb); - terminal = ((flags & RXRPC_LAST_PACKET) && - !(flags & RXRPC_CLIENT_INITIATED)); - ret = rxrpc_queue_rcv_skb(call, skb, false, terminal); - if (ret < 0) { - if (ret == -ENOMEM || ret == -ENOBUFS) { - __clear_bit(ackbit, call->ackr_window); - ack = RXRPC_ACK_NOSPACE; - goto discard_and_ack; - } - goto out; - } - - skb = NULL; - sp = NULL; - - _debug("post #%u", seq); - ASSERTCMP(call->rx_data_post, ==, seq); - call->rx_data_post++; - - if (flags & RXRPC_LAST_PACKET) - set_bit(RXRPC_CALL_RCVD_LAST, &call->flags); - - /* if we've reached an out of sequence packet then we need to drain - * that queue into the socket Rx queue now */ - if (call->rx_data_post == call->rx_first_oos) { - _debug("drain rx oos now"); - read_lock(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE && - !test_and_set_bit(RXRPC_CALL_EV_DRAIN_RX_OOS, &call->events)) - rxrpc_queue_call(call); - read_unlock(&call->state_lock); + while (before(call->tx_hard_ack, to)) { + call->tx_hard_ack++; + ix = call->tx_hard_ack & RXRPC_RXTX_BUFF_MASK; + skb = call->rxtx_buffer[ix]; + rxrpc_see_skb(skb); + call->rxtx_buffer[ix] = NULL; + call->rxtx_annotations[ix] = 0; + skb->next = list; + list = skb; } spin_unlock(&call->lock); - atomic_inc(&call->ackr_not_idle); - rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, skew, serial, false); - _leave(" = 0 [posted]"); - return 0; -protocol_error: - ret = -EBADMSG; -out: - spin_unlock(&call->lock); - _leave(" = %d", ret); - return ret; - -discard_and_ack: - _debug("discard and ACK packet %p", skb); - __rxrpc_propose_ACK(call, ack, skew, serial, true); -discard: - spin_unlock(&call->lock); - rxrpc_free_skb(skb); - _leave(" = 0 [discarded]"); - return 0; - -enqueue_and_ack: - __rxrpc_propose_ACK(call, ack, skew, serial, true); -enqueue_packet: - _net("defer skb %p", skb); - spin_unlock(&call->lock); - skb_queue_tail(&call->rx_queue, skb); - atomic_inc(&call->ackr_not_idle); - read_lock(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE) - rxrpc_queue_call(call); - read_unlock(&call->state_lock); - _leave(" = 0 [queued]"); - return 0; + while (list) { + skb = list; + list = skb->next; + skb->next = NULL; + rxrpc_free_skb(skb); + } } /* - * assume an implicit ACKALL of the transmission phase of a client socket upon - * reception of the first reply packet + * End the transmission phase of a call. + * + * This occurs when we get an ACKALL packet, the first DATA packet of a reply, + * or a final ACK packet. */ -static void rxrpc_assume_implicit_ackall(struct rxrpc_call *call, u32 serial) +static bool rxrpc_end_tx_phase(struct rxrpc_call *call, const char *abort_why) { - write_lock_bh(&call->state_lock); + _enter(""); switch (call->state) { + case RXRPC_CALL_CLIENT_RECV_REPLY: + return true; + case RXRPC_CALL_CLIENT_AWAIT_REPLY: + case RXRPC_CALL_SERVER_AWAIT_ACK: + break; + default: + rxrpc_proto_abort(abort_why, call, call->tx_top); + return false; + } + + rxrpc_rotate_tx_window(call, call->tx_top); + + write_lock(&call->state_lock); + + switch (call->state) { + default: + break; case RXRPC_CALL_CLIENT_AWAIT_REPLY: call->state = RXRPC_CALL_CLIENT_RECV_REPLY; - call->acks_latest = serial; - - _debug("implicit ACKALL %%%u", call->acks_latest); - set_bit(RXRPC_CALL_EV_RCVD_ACKALL, &call->events); - write_unlock_bh(&call->state_lock); - - if (try_to_del_timer_sync(&call->resend_timer) >= 0) { - clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events); - clear_bit(RXRPC_CALL_EV_RESEND, &call->events); - clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); - } break; - - default: - write_unlock_bh(&call->state_lock); + case RXRPC_CALL_SERVER_AWAIT_ACK: + __rxrpc_call_completed(call); + rxrpc_notify_socket(call); break; } + + write_unlock(&call->state_lock); + _leave(" = ok"); + return true; +} + +/* + * Scan a jumbo packet to validate its structure and to work out how many + * subpackets it contains. + * + * A jumbo packet is a collection of consecutive packets glued together with + * little headers between that indicate how to change the initial header for + * each subpacket. + * + * RXRPC_JUMBO_PACKET must be set on all but the last subpacket - and all but + * the last are RXRPC_JUMBO_DATALEN in size. The last subpacket may be of any + * size. + */ +static bool rxrpc_validate_jumbo(struct sk_buff *skb) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + unsigned int offset = sp->offset; + unsigned int len = skb->data_len; + int nr_jumbo = 1; + u8 flags = sp->hdr.flags; + + do { + nr_jumbo++; + if (len - offset < RXRPC_JUMBO_SUBPKTLEN) + goto protocol_error; + if (flags & RXRPC_LAST_PACKET) + goto protocol_error; + offset += RXRPC_JUMBO_DATALEN; + if (skb_copy_bits(skb, offset, &flags, 1) < 0) + goto protocol_error; + offset += sizeof(struct rxrpc_jumbo_header); + } while (flags & RXRPC_JUMBO_PACKET); + + sp->nr_jumbo = nr_jumbo; + return true; + +protocol_error: + return false; +} + +/* + * Handle reception of a duplicate packet. + * + * We have to take care to avoid an attack here whereby we're given a series of + * jumbograms, each with a sequence number one before the preceding one and + * filled up to maximum UDP size. If they never send us the first packet in + * the sequence, they can cause us to have to hold on to around 2MiB of kernel + * space until the call times out. + * + * We limit the space usage by only accepting three duplicate jumbo packets per + * call. After that, we tell the other side we're no longer accepting jumbos + * (that information is encoded in the ACK packet). + */ +static void rxrpc_input_dup_data(struct rxrpc_call *call, rxrpc_seq_t seq, + u8 annotation, bool *_jumbo_dup) +{ + /* Discard normal packets that are duplicates. */ + if (annotation == 0) + return; + + /* Skip jumbo subpackets that are duplicates. When we've had three or + * more partially duplicate jumbo packets, we refuse to take any more + * jumbos for this call. + */ + if (!*_jumbo_dup) { + call->nr_jumbo_dup++; + *_jumbo_dup = true; + } } /* - * post an incoming packet to the nominated call to deal with - * - must get rid of the sk_buff, either by freeing it or by queuing it + * Process a DATA packet, adding the packet to the Rx ring. */ -void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb) +static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb, + u16 skew) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + unsigned int offset = sp->offset; + unsigned int ix; + rxrpc_serial_t serial = sp->hdr.serial, ack_serial = 0; + rxrpc_seq_t seq = sp->hdr.seq, hard_ack; + bool immediate_ack = false, jumbo_dup = false, queued; + u16 len; + u8 ack = 0, flags, annotation = 0; + + _enter("{%u,%u},{%u,%u}", + call->rx_hard_ack, call->rx_top, skb->data_len, seq); + + _proto("Rx DATA %%%u { #%u f=%02x }", + sp->hdr.serial, seq, sp->hdr.flags); + + if (call->state >= RXRPC_CALL_COMPLETE) + return; + + /* Received data implicitly ACKs all of the request packets we sent + * when we're acting as a client. + */ + if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY && + !rxrpc_end_tx_phase(call, "ETD")) + return; + + call->ackr_prev_seq = seq; + + hard_ack = READ_ONCE(call->rx_hard_ack); + if (after(seq, hard_ack + call->rx_winsize)) { + ack = RXRPC_ACK_EXCEEDS_WINDOW; + ack_serial = serial; + goto ack; + } + + flags = sp->hdr.flags; + if (flags & RXRPC_JUMBO_PACKET) { + if (call->nr_jumbo_dup > 3) { + ack = RXRPC_ACK_NOSPACE; + ack_serial = serial; + goto ack; + } + annotation = 1; + } + +next_subpacket: + queued = false; + ix = seq & RXRPC_RXTX_BUFF_MASK; + len = skb->data_len; + if (flags & RXRPC_JUMBO_PACKET) + len = RXRPC_JUMBO_DATALEN; + + if (flags & RXRPC_LAST_PACKET) { + if (test_and_set_bit(RXRPC_CALL_RX_LAST, &call->flags) && + seq != call->rx_top) + return rxrpc_proto_abort("LSN", call, seq); + } else { + if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) && + after_eq(seq, call->rx_top)) + return rxrpc_proto_abort("LSA", call, seq); + } + + if (before_eq(seq, hard_ack)) { + ack = RXRPC_ACK_DUPLICATE; + ack_serial = serial; + goto skip; + } + + if (flags & RXRPC_REQUEST_ACK && !ack) { + ack = RXRPC_ACK_REQUESTED; + ack_serial = serial; + } + + if (call->rxtx_buffer[ix]) { + rxrpc_input_dup_data(call, seq, annotation, &jumbo_dup); + if (ack != RXRPC_ACK_DUPLICATE) { + ack = RXRPC_ACK_DUPLICATE; + ack_serial = serial; + } + immediate_ack = true; + goto skip; + } + + /* Queue the packet. We use a couple of memory barriers here as need + * to make sure that rx_top is perceived to be set after the buffer + * pointer and that the buffer pointer is set after the annotation and + * the skb data. + * + * Barriers against rxrpc_recvmsg_data() and rxrpc_rotate_rx_window() + * and also rxrpc_fill_out_ack(). + */ + rxrpc_get_skb(skb); + call->rxtx_annotations[ix] = annotation; + smp_wmb(); + call->rxtx_buffer[ix] = skb; + if (after(seq, call->rx_top)) + smp_store_release(&call->rx_top, seq); + queued = true; + + if (after_eq(seq, call->rx_expect_next)) { + if (after(seq, call->rx_expect_next)) { + _net("OOS %u > %u", seq, call->rx_expect_next); + ack = RXRPC_ACK_OUT_OF_SEQUENCE; + ack_serial = serial; + } + call->rx_expect_next = seq + 1; + } + +skip: + offset += len; + if (flags & RXRPC_JUMBO_PACKET) { + if (skb_copy_bits(skb, offset, &flags, 1) < 0) + return rxrpc_proto_abort("XJF", call, seq); + offset += sizeof(struct rxrpc_jumbo_header); + seq++; + serial++; + annotation++; + if (flags & RXRPC_JUMBO_PACKET) + annotation |= RXRPC_RX_ANNO_JLAST; + + _proto("Rx DATA Jumbo %%%u", serial); + goto next_subpacket; + } + + if (queued && flags & RXRPC_LAST_PACKET && !ack) { + ack = RXRPC_ACK_DELAY; + ack_serial = serial; + } + +ack: + if (ack) + rxrpc_propose_ACK(call, ack, skew, ack_serial, + immediate_ack, true); + + if (sp->hdr.seq == READ_ONCE(call->rx_hard_ack) + 1) + rxrpc_notify_socket(call); + _leave(" [queued]"); +} + +/* + * Process the extra information that may be appended to an ACK packet + */ +static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, + struct rxrpc_ackinfo *ackinfo) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct rxrpc_peer *peer; + unsigned int mtu; + + _proto("Rx ACK %%%u Info { rx=%u max=%u rwin=%u jm=%u }", + sp->hdr.serial, + ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU), + ntohl(ackinfo->rwind), ntohl(ackinfo->jumbo_max)); + + if (call->tx_winsize > ntohl(ackinfo->rwind)) + call->tx_winsize = ntohl(ackinfo->rwind); + + mtu = min(ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU)); + + peer = call->peer; + if (mtu < peer->maxdata) { + spin_lock_bh(&peer->lock); + peer->maxdata = mtu; + peer->mtu = mtu + peer->hdrsize; + spin_unlock_bh(&peer->lock); + _net("Net MTU %u (maxdata %u)", peer->mtu, peer->maxdata); + } +} + +/* + * Process individual soft ACKs. + * + * Each ACK in the array corresponds to one packet and can be either an ACK or + * a NAK. If we get find an explicitly NAK'd packet we resend immediately; + * packets that lie beyond the end of the ACK list are scheduled for resend by + * the timer on the basis that the peer might just not have processed them at + * the time the ACK was sent. + */ +static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks, + rxrpc_seq_t seq, int nr_acks) +{ + bool resend = false; + int ix; + + for (; nr_acks > 0; nr_acks--, seq++) { + ix = seq & RXRPC_RXTX_BUFF_MASK; + switch (*acks) { + case RXRPC_ACK_TYPE_ACK: + call->rxtx_annotations[ix] = RXRPC_TX_ANNO_ACK; + break; + case RXRPC_ACK_TYPE_NACK: + if (call->rxtx_annotations[ix] == RXRPC_TX_ANNO_NAK) + continue; + call->rxtx_annotations[ix] = RXRPC_TX_ANNO_NAK; + resend = true; + break; + default: + return rxrpc_proto_abort("SFT", call, 0); + } + } + + if (resend && + !test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events)) + rxrpc_queue_call(call); +} + +/* + * Process an ACK packet. + * + * ack.firstPacket is the sequence number of the first soft-ACK'd/NAK'd packet + * in the ACK array. Anything before that is hard-ACK'd and may be discarded. + * + * A hard-ACK means that a packet has been processed and may be discarded; a + * soft-ACK means that the packet may be discarded and retransmission + * requested. A phase is complete when all packets are hard-ACK'd. + */ +static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, + u16 skew) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + union { + struct rxrpc_ackpacket ack; + struct rxrpc_ackinfo info; + u8 acks[RXRPC_MAXACKS]; + } buf; + rxrpc_seq_t first_soft_ack, hard_ack; + int nr_acks, offset; + + _enter(""); + + if (skb_copy_bits(skb, sp->offset, &buf.ack, sizeof(buf.ack)) < 0) { + _debug("extraction failure"); + return rxrpc_proto_abort("XAK", call, 0); + } + sp->offset += sizeof(buf.ack); + + first_soft_ack = ntohl(buf.ack.firstPacket); + hard_ack = first_soft_ack - 1; + nr_acks = buf.ack.nAcks; + + _proto("Rx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }", + sp->hdr.serial, + ntohs(buf.ack.maxSkew), + first_soft_ack, + ntohl(buf.ack.previousPacket), + ntohl(buf.ack.serial), + rxrpc_acks(buf.ack.reason), + buf.ack.nAcks); + + if (buf.ack.reason == RXRPC_ACK_PING) { + _proto("Rx ACK %%%u PING Request", sp->hdr.serial); + rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE, + skew, sp->hdr.serial, true, true); + } else if (sp->hdr.flags & RXRPC_REQUEST_ACK) { + rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED, + skew, sp->hdr.serial, true, true); + } + + offset = sp->offset + nr_acks + 3; + if (skb->data_len >= offset + sizeof(buf.info)) { + if (skb_copy_bits(skb, offset, &buf.info, sizeof(buf.info)) < 0) + return rxrpc_proto_abort("XAI", call, 0); + rxrpc_input_ackinfo(call, skb, &buf.info); + } + + if (first_soft_ack == 0) + return rxrpc_proto_abort("AK0", call, 0); + + /* Ignore ACKs unless we are or have just been transmitting. */ + switch (call->state) { + case RXRPC_CALL_CLIENT_SEND_REQUEST: + case RXRPC_CALL_CLIENT_AWAIT_REPLY: + case RXRPC_CALL_SERVER_SEND_REPLY: + case RXRPC_CALL_SERVER_AWAIT_ACK: + break; + default: + return; + } + + /* Discard any out-of-order or duplicate ACKs. */ + if ((int)sp->hdr.serial - (int)call->acks_latest <= 0) { + _debug("discard ACK %d <= %d", + sp->hdr.serial, call->acks_latest); + return; + } + call->acks_latest = sp->hdr.serial; + + if (test_bit(RXRPC_CALL_TX_LAST, &call->flags) && + hard_ack == call->tx_top) { + rxrpc_end_tx_phase(call, "ETA"); + return; + } + + if (before(hard_ack, call->tx_hard_ack) || + after(hard_ack, call->tx_top)) + return rxrpc_proto_abort("AKW", call, 0); + + if (after(hard_ack, call->tx_hard_ack)) + rxrpc_rotate_tx_window(call, hard_ack); + + if (after(first_soft_ack, call->tx_top)) + return; + + if (nr_acks > call->tx_top - first_soft_ack + 1) + nr_acks = first_soft_ack - call->tx_top + 1; + if (skb_copy_bits(skb, sp->offset, buf.acks, nr_acks) < 0) + return rxrpc_proto_abort("XSA", call, 0); + rxrpc_input_soft_acks(call, buf.acks, first_soft_ack, nr_acks); +} + +/* + * Process an ACKALL packet. + */ +static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + + _proto("Rx ACKALL %%%u", sp->hdr.serial); + + rxrpc_end_tx_phase(call, "ETL"); +} + +/* + * Process an ABORT packet. + */ +static void rxrpc_input_abort(struct rxrpc_call *call, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); __be32 wtmp; - u32 abort_code; + u32 abort_code = RX_CALL_DEAD; + + _enter(""); + + if (skb->len >= 4 && + skb_copy_bits(skb, sp->offset, &wtmp, sizeof(wtmp)) >= 0) + abort_code = ntohl(wtmp); + + _proto("Rx ABORT %%%u { %x }", sp->hdr.serial, abort_code); + + if (rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, + abort_code, ECONNABORTED)) + rxrpc_notify_socket(call); +} + +/* + * Process an incoming call packet. + */ +static void rxrpc_input_call_packet(struct rxrpc_call *call, + struct sk_buff *skb, u16 skew) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); _enter("%p,%p", call, skb); - ASSERT(!irqs_disabled()); - -#if 0 // INJECT RX ERROR - if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA) { - static int skip = 0; - if (++skip == 3) { - printk("DROPPED 3RD PACKET!!!!!!!!!!!!!\n"); - skip = 0; - goto free_packet; - } - } -#endif - - /* request ACK generation for any ACK or DATA packet that requests - * it */ - if (sp->hdr.flags & RXRPC_REQUEST_ACK) { - _proto("ACK Requested on %%%u", sp->hdr.serial); - rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED, - skb->priority, sp->hdr.serial, false); - } - switch (sp->hdr.type) { - case RXRPC_PACKET_TYPE_ABORT: - _debug("abort"); + case RXRPC_PACKET_TYPE_DATA: + rxrpc_input_data(call, skb, skew); + break; - if (skb_copy_bits(skb, 0, &wtmp, sizeof(wtmp)) < 0) - goto protocol_error; - - abort_code = ntohl(wtmp); - _proto("Rx ABORT %%%u { %x }", sp->hdr.serial, abort_code); - - if (__rxrpc_set_call_completion(call, - RXRPC_CALL_REMOTELY_ABORTED, - abort_code, ECONNABORTED)) { - set_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events); - rxrpc_queue_call(call); - } - goto free_packet; + case RXRPC_PACKET_TYPE_ACK: + rxrpc_input_ack(call, skb, skew); + break; case RXRPC_PACKET_TYPE_BUSY: _proto("Rx BUSY %%%u", sp->hdr.serial); - if (rxrpc_is_service_call(call)) - goto protocol_error; + /* Just ignore BUSY packets from the server; the retry and + * lifespan timers will take care of business. BUSY packets + * from the client don't make sense. + */ + break; - write_lock_bh(&call->state_lock); - switch (call->state) { - case RXRPC_CALL_CLIENT_SEND_REQUEST: - __rxrpc_set_call_completion(call, - RXRPC_CALL_SERVER_BUSY, - 0, EBUSY); - set_bit(RXRPC_CALL_EV_RCVD_BUSY, &call->events); - rxrpc_queue_call(call); - case RXRPC_CALL_SERVER_BUSY: - goto free_packet_unlock; - default: - goto protocol_error_locked; - } + case RXRPC_PACKET_TYPE_ABORT: + rxrpc_input_abort(call, skb); + break; + + case RXRPC_PACKET_TYPE_ACKALL: + rxrpc_input_ackall(call, skb); + break; default: _proto("Rx %s %%%u", rxrpc_pkts[sp->hdr.type], sp->hdr.serial); - goto protocol_error; - - case RXRPC_PACKET_TYPE_DATA: - _proto("Rx DATA %%%u { #%u }", sp->hdr.serial, sp->hdr.seq); - - if (sp->hdr.seq == 0) - goto protocol_error; - - call->ackr_prev_seq = sp->hdr.seq; - - /* received data implicitly ACKs all of the request packets we - * sent when we're acting as a client */ - if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) - rxrpc_assume_implicit_ackall(call, sp->hdr.serial); - - switch (rxrpc_fast_process_data(call, skb, sp->hdr.seq)) { - case 0: - skb = NULL; - goto done; - - default: - BUG(); - - /* data packet received beyond the last packet */ - case -EBADMSG: - goto protocol_error; - } - - case RXRPC_PACKET_TYPE_ACKALL: - case RXRPC_PACKET_TYPE_ACK: - /* ACK processing is done in process context */ - read_lock_bh(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE) { - skb_queue_tail(&call->rx_queue, skb); - rxrpc_queue_call(call); - skb = NULL; - } - read_unlock_bh(&call->state_lock); - goto free_packet; - } - -protocol_error: - _debug("protocol error"); - write_lock_bh(&call->state_lock); -protocol_error_locked: - if (__rxrpc_abort_call("FPR", call, 0, RX_PROTOCOL_ERROR, EPROTO)) - rxrpc_queue_call(call); -free_packet_unlock: - write_unlock_bh(&call->state_lock); -free_packet: - rxrpc_free_skb(skb); -done: - _leave(""); -} - -/* - * split up a jumbo data packet - */ -static void rxrpc_process_jumbo_packet(struct rxrpc_call *call, - struct sk_buff *jumbo) -{ - struct rxrpc_jumbo_header jhdr; - struct rxrpc_skb_priv *sp; - struct sk_buff *part; - - _enter(",{%u,%u}", jumbo->data_len, jumbo->len); - - sp = rxrpc_skb(jumbo); - - do { - sp->hdr.flags &= ~RXRPC_JUMBO_PACKET; - - /* make a clone to represent the first subpacket in what's left - * of the jumbo packet */ - part = skb_clone(jumbo, GFP_ATOMIC); - if (!part) { - /* simply ditch the tail in the event of ENOMEM */ - pskb_trim(jumbo, RXRPC_JUMBO_DATALEN); - break; - } - rxrpc_new_skb(part); - - pskb_trim(part, RXRPC_JUMBO_DATALEN); - - if (!pskb_pull(jumbo, RXRPC_JUMBO_DATALEN)) - goto protocol_error; - - if (skb_copy_bits(jumbo, 0, &jhdr, sizeof(jhdr)) < 0) - goto protocol_error; - if (!pskb_pull(jumbo, sizeof(jhdr))) - BUG(); - - sp->hdr.seq += 1; - sp->hdr.serial += 1; - sp->hdr.flags = jhdr.flags; - sp->hdr._rsvd = ntohs(jhdr._rsvd); - - _proto("Rx DATA Jumbo %%%u", sp->hdr.serial - 1); - - rxrpc_fast_process_packet(call, part); - part = NULL; - - } while (sp->hdr.flags & RXRPC_JUMBO_PACKET); - - rxrpc_fast_process_packet(call, jumbo); - _leave(""); - return; - -protocol_error: - _debug("protocol error"); - rxrpc_free_skb(part); - if (rxrpc_abort_call("PJP", call, sp->hdr.seq, - RX_PROTOCOL_ERROR, EPROTO)) - rxrpc_queue_call(call); - rxrpc_free_skb(jumbo); - _leave(""); -} - -/* - * post an incoming packet to the appropriate call/socket to deal with - * - must get rid of the sk_buff, either by freeing it or by queuing it - */ -static void rxrpc_post_packet_to_call(struct rxrpc_connection *conn, - struct rxrpc_call *call, - struct sk_buff *skb) -{ - struct rxrpc_skb_priv *sp; - - _enter("%p,%p", call, skb); - - sp = rxrpc_skb(skb); - - _debug("extant call [%d]", call->state); - - read_lock(&call->state_lock); - switch (call->state) { - case RXRPC_CALL_COMPLETE: - switch (call->completion) { - case RXRPC_CALL_LOCALLY_ABORTED: - if (!test_and_set_bit(RXRPC_CALL_EV_ABORT, - &call->events)) { - rxrpc_queue_call(call); - goto free_unlock; - } - default: - goto dead_call; - case RXRPC_CALL_SUCCEEDED: - if (rxrpc_is_service_call(call)) - goto dead_call; - goto resend_final_ack; - } - - case RXRPC_CALL_CLIENT_FINAL_ACK: - goto resend_final_ack; - - default: break; } - read_unlock(&call->state_lock); - - if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA && - sp->hdr.flags & RXRPC_JUMBO_PACKET) - rxrpc_process_jumbo_packet(call, skb); - else - rxrpc_fast_process_packet(call, skb); - - goto done; - -resend_final_ack: - _debug("final ack again"); - set_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events); - rxrpc_queue_call(call); - goto free_unlock; - -dead_call: - if (sp->hdr.type != RXRPC_PACKET_TYPE_ABORT) { - skb->priority = RX_CALL_DEAD; - rxrpc_reject_packet(conn->params.local, skb); - goto unlock; - } -free_unlock: - rxrpc_free_skb(skb); -unlock: - read_unlock(&call->state_lock); -done: _leave(""); } @@ -600,6 +598,17 @@ static void rxrpc_post_packet_to_local(struct rxrpc_local *local, rxrpc_queue_local(local); } +/* + * put a packet up for transport-level abort + */ +static void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb) +{ + CHECK_SLAB_OKAY(&local->usage); + + skb_queue_tail(&local->reject_queue, skb); + rxrpc_queue_local(local); +} + /* * Extract the wire header from a packet and translate the byte order. */ @@ -611,8 +620,6 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb) /* dig out the RxRPC connection details */ if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0) return -EBADMSG; - if (!pskb_pull(skb, sizeof(whdr))) - BUG(); memset(sp, 0, sizeof(*sp)); sp->hdr.epoch = ntohl(whdr.epoch); @@ -626,6 +633,7 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb) sp->hdr.securityIndex = whdr.securityIndex; sp->hdr._rsvd = ntohs(whdr._rsvd); sp->hdr.serviceId = ntohs(whdr.serviceId); + sp->offset = sizeof(whdr); return 0; } @@ -637,19 +645,22 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb) * shut down and the local endpoint from going away, thus sk_user_data will not * be cleared until this function returns. */ -void rxrpc_data_ready(struct sock *sk) +void rxrpc_data_ready(struct sock *udp_sk) { struct rxrpc_connection *conn; + struct rxrpc_channel *chan; + struct rxrpc_call *call; struct rxrpc_skb_priv *sp; - struct rxrpc_local *local = sk->sk_user_data; + struct rxrpc_local *local = udp_sk->sk_user_data; struct sk_buff *skb; + unsigned int channel; int ret, skew; - _enter("%p", sk); + _enter("%p", udp_sk); ASSERT(!irqs_disabled()); - skb = skb_recv_datagram(sk, 0, 1, &ret); + skb = skb_recv_datagram(udp_sk, 0, 1, &ret); if (!skb) { if (ret == -EAGAIN) return; @@ -695,111 +706,122 @@ void rxrpc_data_ready(struct sock *sk) goto bad_message; } - if (sp->hdr.type == RXRPC_PACKET_TYPE_VERSION) { + switch (sp->hdr.type) { + case RXRPC_PACKET_TYPE_VERSION: rxrpc_post_packet_to_local(local, skb); goto out; - } - if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA && - (sp->hdr.callNumber == 0 || sp->hdr.seq == 0)) - goto bad_message; + case RXRPC_PACKET_TYPE_BUSY: + if (sp->hdr.flags & RXRPC_CLIENT_INITIATED) + goto discard; + + case RXRPC_PACKET_TYPE_DATA: + if (sp->hdr.callNumber == 0) + goto bad_message; + if (sp->hdr.flags & RXRPC_JUMBO_PACKET && + !rxrpc_validate_jumbo(skb)) + goto bad_message; + break; + } rcu_read_lock(); conn = rxrpc_find_connection_rcu(local, skb); - if (!conn) { - skb->priority = 0; - goto cant_route_call; - } + if (conn) { + if (sp->hdr.securityIndex != conn->security_ix) + goto wrong_security; - /* Note the serial number skew here */ - skew = (int)sp->hdr.serial - (int)conn->hi_serial; - if (skew >= 0) { - if (skew > 0) - conn->hi_serial = sp->hdr.serial; - skb->priority = 0; - } else { - skew = -skew; - skb->priority = min(skew, 65535); - } + if (sp->hdr.callNumber == 0) { + /* Connection-level packet */ + _debug("CONN %p {%d}", conn, conn->debug_id); + rxrpc_post_packet_to_conn(conn, skb); + goto out_unlock; + } + + /* Note the serial number skew here */ + skew = (int)sp->hdr.serial - (int)conn->hi_serial; + if (skew >= 0) { + if (skew > 0) + conn->hi_serial = sp->hdr.serial; + } else { + skew = -skew; + skew = min(skew, 65535); + } - if (sp->hdr.callNumber == 0) { - /* Connection-level packet */ - _debug("CONN %p {%d}", conn, conn->debug_id); - rxrpc_post_packet_to_conn(conn, skb); - goto out_unlock; - } else { /* Call-bound packets are routed by connection channel. */ - unsigned int channel = sp->hdr.cid & RXRPC_CHANNELMASK; - struct rxrpc_channel *chan = &conn->channels[channel]; - struct rxrpc_call *call; + channel = sp->hdr.cid & RXRPC_CHANNELMASK; + chan = &conn->channels[channel]; /* Ignore really old calls */ if (sp->hdr.callNumber < chan->last_call) goto discard_unlock; if (sp->hdr.callNumber == chan->last_call) { - /* For the previous service call, if completed - * successfully, we discard all further packets. + /* For the previous service call, if completed successfully, we + * discard all further packets. */ if (rxrpc_conn_is_service(conn) && (chan->last_type == RXRPC_PACKET_TYPE_ACK || sp->hdr.type == RXRPC_PACKET_TYPE_ABORT)) goto discard_unlock; - /* But otherwise we need to retransmit the final packet - * from data cached in the connection record. + /* But otherwise we need to retransmit the final packet from + * data cached in the connection record. */ rxrpc_post_packet_to_conn(conn, skb); goto out_unlock; } call = rcu_dereference(chan->call); - if (!call || atomic_read(&call->usage) == 0) - goto cant_route_call; - - rxrpc_see_call(call); - rxrpc_post_packet_to_call(conn, call, skb); - goto out_unlock; + } else { + skew = 0; + call = NULL; } + if (!call || atomic_read(&call->usage) == 0) { + if (!(sp->hdr.type & RXRPC_CLIENT_INITIATED) || + sp->hdr.callNumber == 0 || + sp->hdr.type != RXRPC_PACKET_TYPE_DATA) + goto bad_message_unlock; + if (sp->hdr.seq != 1) + goto discard_unlock; + call = rxrpc_new_incoming_call(local, conn, skb); + if (!call) { + rcu_read_unlock(); + goto reject_packet; + } + } + + rxrpc_input_call_packet(call, skb, skew); + goto discard_unlock; + discard_unlock: - rxrpc_free_skb(skb); -out_unlock: rcu_read_unlock(); +discard: + rxrpc_free_skb(skb); out: trace_rxrpc_rx_done(0, 0); return; -cant_route_call: +out_unlock: rcu_read_unlock(); + goto out; - _debug("can't route call"); - if (sp->hdr.flags & RXRPC_CLIENT_INITIATED && - sp->hdr.type == RXRPC_PACKET_TYPE_DATA) { - if (sp->hdr.seq == 1) { - _debug("first packet"); - skb_queue_tail(&local->accept_queue, skb); - rxrpc_queue_work(&local->processor); - _leave(" [incoming]"); - goto out; - } - skb->priority = RX_INVALID_OPERATION; - } else { - skb->priority = RX_CALL_DEAD; - } - - if (sp->hdr.type != RXRPC_PACKET_TYPE_ABORT) { - _debug("reject type %d",sp->hdr.type); - goto reject_packet; - } else { - rxrpc_free_skb(skb); - } - _leave(" [no call]"); - return; +wrong_security: + rcu_read_unlock(); + trace_rxrpc_abort("SEC", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + RXKADINCONSISTENCY, EBADMSG); + skb->priority = RXKADINCONSISTENCY; + goto post_abort; +bad_message_unlock: + rcu_read_unlock(); bad_message: + trace_rxrpc_abort("BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + RX_PROTOCOL_ERROR, EBADMSG); skb->priority = RX_PROTOCOL_ERROR; +post_abort: + skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT; reject_packet: trace_rxrpc_rx_done(skb->mark, skb->priority); rxrpc_reject_packet(local, skb); diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c index a4aba0246731..7d4375e557e6 100644 --- a/net/rxrpc/insecure.c +++ b/net/rxrpc/insecure.c @@ -30,14 +30,18 @@ static int none_secure_packet(struct rxrpc_call *call, return 0; } -static int none_verify_packet(struct rxrpc_call *call, - struct sk_buff *skb, - rxrpc_seq_t seq, - u16 expected_cksum) +static int none_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, + unsigned int offset, unsigned int len, + rxrpc_seq_t seq, u16 expected_cksum) { return 0; } +static void none_locate_data(struct rxrpc_call *call, struct sk_buff *skb, + unsigned int *_offset, unsigned int *_len) +{ +} + static int none_respond_to_challenge(struct rxrpc_connection *conn, struct sk_buff *skb, u32 *_abort_code) @@ -79,6 +83,7 @@ const struct rxrpc_security rxrpc_no_security = { .prime_packet_security = none_prime_packet_security, .secure_packet = none_secure_packet, .verify_packet = none_verify_packet, + .locate_data = none_locate_data, .respond_to_challenge = none_respond_to_challenge, .verify_response = none_verify_response, .clear = none_clear, diff --git a/net/rxrpc/local_event.c b/net/rxrpc/local_event.c index bcc6593b4cdb..cdd58e6e9fbd 100644 --- a/net/rxrpc/local_event.c +++ b/net/rxrpc/local_event.c @@ -98,7 +98,7 @@ void rxrpc_process_local_events(struct rxrpc_local *local) switch (sp->hdr.type) { case RXRPC_PACKET_TYPE_VERSION: - if (skb_copy_bits(skb, 0, &v, 1) < 0) + if (skb_copy_bits(skb, sp->offset, &v, 1) < 0) return; _proto("Rx VERSION { %02x }", v); if (v == 0) diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 610916f4ae34..782b9adf67cb 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -77,7 +77,6 @@ static struct rxrpc_local *rxrpc_alloc_local(const struct sockaddr_rxrpc *srx) INIT_WORK(&local->processor, rxrpc_local_processor); INIT_HLIST_HEAD(&local->services); init_rwsem(&local->defrag_sem); - skb_queue_head_init(&local->accept_queue); skb_queue_head_init(&local->reject_queue); skb_queue_head_init(&local->event_queue); local->client_conns = RB_ROOT; @@ -308,7 +307,6 @@ static void rxrpc_local_destroyer(struct rxrpc_local *local) /* At this point, there should be no more packets coming in to the * local endpoint. */ - rxrpc_purge_queue(&local->accept_queue); rxrpc_purge_queue(&local->reject_queue); rxrpc_purge_queue(&local->event_queue); @@ -332,11 +330,6 @@ static void rxrpc_local_processor(struct work_struct *work) if (atomic_read(&local->usage) == 0) return rxrpc_local_destroyer(local); - if (!skb_queue_empty(&local->accept_queue)) { - rxrpc_accept_incoming_calls(local); - again = true; - } - if (!skb_queue_empty(&local->reject_queue)) { rxrpc_reject_packets(local); again = true; diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 39e7cc37c392..fd096f742e4b 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -50,7 +50,7 @@ unsigned int rxrpc_idle_ack_delay = 0.5 * HZ; * limit is hit, we should generate an EXCEEDS_WINDOW ACK and discard further * packets. */ -unsigned int rxrpc_rx_window_size = 32; +unsigned int rxrpc_rx_window_size = RXRPC_RXTX_BUFF_SIZE - 46; /* * Maximum Rx MTU size. This indicates to the sender the size of jumbo packet diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 8756d74fd74b..719a4c23f09d 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -15,6 +15,8 @@ #include #include #include +#include +#include #include #include #include "ar-internal.h" @@ -38,20 +40,38 @@ struct rxrpc_pkt_buffer { static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, struct rxrpc_pkt_buffer *pkt) { + rxrpc_seq_t hard_ack, top, seq; + int ix; u32 mtu, jmax; u8 *ackp = pkt->acks; + /* Barrier against rxrpc_input_data(). */ + hard_ack = READ_ONCE(call->rx_hard_ack); + top = smp_load_acquire(&call->rx_top); + pkt->ack.bufferSpace = htons(8); - pkt->ack.maxSkew = htons(0); - pkt->ack.firstPacket = htonl(call->rx_data_eaten + 1); + pkt->ack.maxSkew = htons(call->ackr_skew); + pkt->ack.firstPacket = htonl(hard_ack + 1); pkt->ack.previousPacket = htonl(call->ackr_prev_seq); pkt->ack.serial = htonl(call->ackr_serial); - pkt->ack.reason = RXRPC_ACK_IDLE; - pkt->ack.nAcks = 0; + pkt->ack.reason = call->ackr_reason; + pkt->ack.nAcks = top - hard_ack; - mtu = call->peer->if_mtu; - mtu -= call->peer->hdrsize; - jmax = rxrpc_rx_jumbo_max; + if (after(top, hard_ack)) { + seq = hard_ack + 1; + do { + ix = seq & RXRPC_RXTX_BUFF_MASK; + if (call->rxtx_buffer[ix]) + *ackp++ = RXRPC_ACK_TYPE_ACK; + else + *ackp++ = RXRPC_ACK_TYPE_NACK; + seq++; + } while (before_eq(seq, top)); + } + + mtu = call->conn->params.peer->if_mtu; + mtu -= call->conn->params.peer->hdrsize; + jmax = (call->nr_jumbo_dup > 3) ? 1 : rxrpc_rx_jumbo_max; pkt->ackinfo.rxMTU = htonl(rxrpc_rx_mtu); pkt->ackinfo.maxMTU = htonl(mtu); pkt->ackinfo.rwind = htonl(rxrpc_rx_window_size); @@ -60,11 +80,11 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, *ackp++ = 0; *ackp++ = 0; *ackp++ = 0; - return 3; + return top - hard_ack + 3; } /* - * Send a final ACK or ABORT call packet. + * Send an ACK or ABORT call packet. */ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) { @@ -158,6 +178,19 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, ioc, len); + if (ret < 0 && call->state < RXRPC_CALL_COMPLETE) { + switch (pkt->whdr.type) { + case RXRPC_PACKET_TYPE_ACK: + rxrpc_propose_ACK(call, pkt->ack.reason, + ntohs(pkt->ack.maxSkew), + ntohl(pkt->ack.serial), + true, true); + break; + case RXRPC_PACKET_TYPE_ABORT: + break; + } + } + out: rxrpc_put_connection(conn); kfree(pkt); @@ -233,3 +266,77 @@ send_fragmentable: _leave(" = %d [frag %u]", ret, conn->params.peer->maxdata); return ret; } + +/* + * reject packets through the local endpoint + */ +void rxrpc_reject_packets(struct rxrpc_local *local) +{ + union { + struct sockaddr sa; + struct sockaddr_in sin; + } sa; + struct rxrpc_skb_priv *sp; + struct rxrpc_wire_header whdr; + struct sk_buff *skb; + struct msghdr msg; + struct kvec iov[2]; + size_t size; + __be32 code; + + _enter("%d", local->debug_id); + + iov[0].iov_base = &whdr; + iov[0].iov_len = sizeof(whdr); + iov[1].iov_base = &code; + iov[1].iov_len = sizeof(code); + size = sizeof(whdr) + sizeof(code); + + msg.msg_name = &sa; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + memset(&sa, 0, sizeof(sa)); + sa.sa.sa_family = local->srx.transport.family; + switch (sa.sa.sa_family) { + case AF_INET: + msg.msg_namelen = sizeof(sa.sin); + break; + default: + msg.msg_namelen = 0; + break; + } + + memset(&whdr, 0, sizeof(whdr)); + whdr.type = RXRPC_PACKET_TYPE_ABORT; + + while ((skb = skb_dequeue(&local->reject_queue))) { + rxrpc_see_skb(skb); + sp = rxrpc_skb(skb); + switch (sa.sa.sa_family) { + case AF_INET: + sa.sin.sin_port = udp_hdr(skb)->source; + sa.sin.sin_addr.s_addr = ip_hdr(skb)->saddr; + code = htonl(skb->priority); + + whdr.epoch = htonl(sp->hdr.epoch); + whdr.cid = htonl(sp->hdr.cid); + whdr.callNumber = htonl(sp->hdr.callNumber); + whdr.serviceId = htons(sp->hdr.serviceId); + whdr.flags = sp->hdr.flags; + whdr.flags ^= RXRPC_CLIENT_INITIATED; + whdr.flags &= RXRPC_CLIENT_INITIATED; + + kernel_sendmsg(local->socket, &msg, iov, 2, size); + break; + + default: + break; + } + + rxrpc_free_skb(skb); + } + + _leave(""); +} diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index 27b9ecad007e..c8948936c6fc 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -129,15 +129,14 @@ void rxrpc_error_report(struct sock *sk) _leave("UDP socket errqueue empty"); return; } + rxrpc_new_skb(skb); serr = SKB_EXT_ERR(skb); if (!skb->len && serr->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) { _leave("UDP empty message"); - kfree_skb(skb); + rxrpc_free_skb(skb); return; } - rxrpc_new_skb(skb); - rcu_read_lock(); peer = rxrpc_lookup_peer_icmp_rcu(local, skb); if (peer && !rxrpc_get_peer_maybe(peer)) @@ -249,7 +248,6 @@ void rxrpc_peer_error_distributor(struct work_struct *work) container_of(work, struct rxrpc_peer, error_distributor); struct rxrpc_call *call; enum rxrpc_call_completion compl; - bool queue; int error; _enter(""); @@ -272,15 +270,8 @@ void rxrpc_peer_error_distributor(struct work_struct *work) hlist_del_init(&call->error_link); rxrpc_see_call(call); - queue = false; - write_lock(&call->state_lock); - if (__rxrpc_set_call_completion(call, compl, 0, error)) { - set_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events); - queue = true; - } - write_unlock(&call->state_lock); - if (queue) - rxrpc_queue_call(call); + if (rxrpc_set_call_completion(call, compl, 0, error)) + rxrpc_notify_socket(call); } spin_unlock_bh(&peer->lock); diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index aebc73ac16dc..2efe29a4c232 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -198,6 +198,32 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp) return peer; } +/* + * Initialise peer record. + */ +static void rxrpc_init_peer(struct rxrpc_peer *peer, unsigned long hash_key) +{ + rxrpc_assess_MTU_size(peer); + peer->mtu = peer->if_mtu; + + if (peer->srx.transport.family == AF_INET) { + peer->hdrsize = sizeof(struct iphdr); + switch (peer->srx.transport_type) { + case SOCK_DGRAM: + peer->hdrsize += sizeof(struct udphdr); + break; + default: + BUG(); + break; + } + } else { + BUG(); + } + + peer->hdrsize += sizeof(struct rxrpc_wire_header); + peer->maxdata = peer->mtu - peer->hdrsize; +} + /* * Set up a new peer. */ @@ -214,32 +240,42 @@ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_local *local, if (peer) { peer->hash_key = hash_key; memcpy(&peer->srx, srx, sizeof(*srx)); - - rxrpc_assess_MTU_size(peer); - peer->mtu = peer->if_mtu; - - if (srx->transport.family == AF_INET) { - peer->hdrsize = sizeof(struct iphdr); - switch (srx->transport_type) { - case SOCK_DGRAM: - peer->hdrsize += sizeof(struct udphdr); - break; - default: - BUG(); - break; - } - } else { - BUG(); - } - - peer->hdrsize += sizeof(struct rxrpc_wire_header); - peer->maxdata = peer->mtu - peer->hdrsize; + rxrpc_init_peer(peer, hash_key); } _leave(" = %p", peer); return peer; } +/* + * Set up a new incoming peer. The address is prestored in the preallocated + * peer. + */ +struct rxrpc_peer *rxrpc_lookup_incoming_peer(struct rxrpc_local *local, + struct rxrpc_peer *prealloc) +{ + struct rxrpc_peer *peer; + unsigned long hash_key; + + hash_key = rxrpc_peer_hash_key(local, &prealloc->srx); + prealloc->local = local; + rxrpc_init_peer(prealloc, hash_key); + + spin_lock(&rxrpc_peer_hash_lock); + + /* Need to check that we aren't racing with someone else */ + peer = __rxrpc_lookup_peer_rcu(local, &prealloc->srx, hash_key); + if (peer && !rxrpc_get_peer_maybe(peer)) + peer = NULL; + if (!peer) { + peer = prealloc; + hash_add_rcu(rxrpc_peer_hash, &peer->hash_link, hash_key); + } + + spin_unlock(&rxrpc_peer_hash_lock); + return peer; +} + /* * obtain a remote transport endpoint for the specified address */ @@ -272,7 +308,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local, return NULL; } - spin_lock(&rxrpc_peer_hash_lock); + spin_lock_bh(&rxrpc_peer_hash_lock); /* Need to check that we aren't racing with someone else */ peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key); @@ -282,7 +318,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local, hash_add_rcu(rxrpc_peer_hash, &candidate->hash_link, hash_key); - spin_unlock(&rxrpc_peer_hash_lock); + spin_unlock_bh(&rxrpc_peer_hash_lock); if (peer) kfree(candidate); @@ -307,9 +343,9 @@ void __rxrpc_put_peer(struct rxrpc_peer *peer) { ASSERT(hlist_empty(&peer->error_targets)); - spin_lock(&rxrpc_peer_hash_lock); + spin_lock_bh(&rxrpc_peer_hash_lock); hash_del_rcu(&peer->hash_link); - spin_unlock(&rxrpc_peer_hash_lock); + spin_unlock_bh(&rxrpc_peer_hash_lock); kfree_rcu(peer, rcu); } diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 6876ffb3b410..20d0b5c6f81b 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -19,20 +19,357 @@ #include "ar-internal.h" /* - * receive a message from an RxRPC socket + * Post a call for attention by the socket or kernel service. Further + * notifications are suppressed by putting recvmsg_link on a dummy queue. + */ +void rxrpc_notify_socket(struct rxrpc_call *call) +{ + struct rxrpc_sock *rx; + struct sock *sk; + + _enter("%d", call->debug_id); + + if (!list_empty(&call->recvmsg_link)) + return; + + rcu_read_lock(); + + rx = rcu_dereference(call->socket); + sk = &rx->sk; + if (rx && sk->sk_state < RXRPC_CLOSE) { + if (call->notify_rx) { + call->notify_rx(sk, call, call->user_call_ID); + } else { + write_lock_bh(&rx->recvmsg_lock); + if (list_empty(&call->recvmsg_link)) { + rxrpc_get_call(call, rxrpc_call_got); + list_add_tail(&call->recvmsg_link, &rx->recvmsg_q); + } + write_unlock_bh(&rx->recvmsg_lock); + + if (!sock_flag(sk, SOCK_DEAD)) { + _debug("call %ps", sk->sk_data_ready); + sk->sk_data_ready(sk); + } + } + } + + rcu_read_unlock(); + _leave(""); +} + +/* + * Pass a call terminating message to userspace. + */ +static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg) +{ + u32 tmp = 0; + int ret; + + switch (call->completion) { + case RXRPC_CALL_SUCCEEDED: + ret = 0; + if (rxrpc_is_service_call(call)) + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ACK, 0, &tmp); + break; + case RXRPC_CALL_REMOTELY_ABORTED: + tmp = call->abort_code; + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &tmp); + break; + case RXRPC_CALL_LOCALLY_ABORTED: + tmp = call->abort_code; + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &tmp); + break; + case RXRPC_CALL_NETWORK_ERROR: + tmp = call->error; + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NET_ERROR, 4, &tmp); + break; + case RXRPC_CALL_LOCAL_ERROR: + tmp = call->error; + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_LOCAL_ERROR, 4, &tmp); + break; + default: + pr_err("Invalid terminal call state %u\n", call->state); + BUG(); + break; + } + + return ret; +} + +/* + * Pass back notification of a new call. The call is added to the + * to-be-accepted list. This means that the next call to be accepted might not + * be the last call seen awaiting acceptance, but unless we leave this on the + * front of the queue and block all other messages until someone gives us a + * user_ID for it, there's not a lot we can do. + */ +static int rxrpc_recvmsg_new_call(struct rxrpc_sock *rx, + struct rxrpc_call *call, + struct msghdr *msg, int flags) +{ + int tmp = 0, ret; + + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NEW_CALL, 0, &tmp); + + if (ret == 0 && !(flags & MSG_PEEK)) { + _debug("to be accepted"); + write_lock_bh(&rx->recvmsg_lock); + list_del_init(&call->recvmsg_link); + write_unlock_bh(&rx->recvmsg_lock); + + write_lock(&rx->call_lock); + list_add_tail(&call->accept_link, &rx->to_be_accepted); + write_unlock(&rx->call_lock); + } + + return ret; +} + +/* + * End the packet reception phase. + */ +static void rxrpc_end_rx_phase(struct rxrpc_call *call) +{ + _enter("%d,%s", call->debug_id, rxrpc_call_states[call->state]); + + if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) { + rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, true, false); + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); + } else { + rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, false, false); + } + + write_lock_bh(&call->state_lock); + + switch (call->state) { + case RXRPC_CALL_CLIENT_RECV_REPLY: + __rxrpc_call_completed(call); + break; + + case RXRPC_CALL_SERVER_RECV_REQUEST: + call->state = RXRPC_CALL_SERVER_ACK_REQUEST; + break; + default: + break; + } + + write_unlock_bh(&call->state_lock); +} + +/* + * Discard a packet we've used up and advance the Rx window by one. + */ +static void rxrpc_rotate_rx_window(struct rxrpc_call *call) +{ + struct sk_buff *skb; + rxrpc_seq_t hard_ack, top; + int ix; + + _enter("%d", call->debug_id); + + hard_ack = call->rx_hard_ack; + top = smp_load_acquire(&call->rx_top); + ASSERT(before(hard_ack, top)); + + hard_ack++; + ix = hard_ack & RXRPC_RXTX_BUFF_MASK; + skb = call->rxtx_buffer[ix]; + rxrpc_see_skb(skb); + call->rxtx_buffer[ix] = NULL; + call->rxtx_annotations[ix] = 0; + /* Barrier against rxrpc_input_data(). */ + smp_store_release(&call->rx_hard_ack, hard_ack); + + rxrpc_free_skb(skb); + + _debug("%u,%u,%lx", hard_ack, top, call->flags); + if (hard_ack == top && test_bit(RXRPC_CALL_RX_LAST, &call->flags)) + rxrpc_end_rx_phase(call); +} + +/* + * Decrypt and verify a (sub)packet. The packet's length may be changed due to + * padding, but if this is the case, the packet length will be resident in the + * socket buffer. Note that we can't modify the master skb info as the skb may + * be the home to multiple subpackets. + */ +static int rxrpc_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, + u8 annotation, + unsigned int offset, unsigned int len) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + rxrpc_seq_t seq = sp->hdr.seq; + u16 cksum = sp->hdr.cksum; + + _enter(""); + + /* For all but the head jumbo subpacket, the security checksum is in a + * jumbo header immediately prior to the data. + */ + if ((annotation & RXRPC_RX_ANNO_JUMBO) > 1) { + __be16 tmp; + if (skb_copy_bits(skb, offset - 2, &tmp, 2) < 0) + BUG(); + cksum = ntohs(tmp); + seq += (annotation & RXRPC_RX_ANNO_JUMBO) - 1; + } + + return call->conn->security->verify_packet(call, skb, offset, len, + seq, cksum); +} + +/* + * Locate the data within a packet. This is complicated by: + * + * (1) An skb may contain a jumbo packet - so we have to find the appropriate + * subpacket. + * + * (2) The (sub)packets may be encrypted and, if so, the encrypted portion + * contains an extra header which includes the true length of the data, + * excluding any encrypted padding. + */ +static int rxrpc_locate_data(struct rxrpc_call *call, struct sk_buff *skb, + u8 *_annotation, + unsigned int *_offset, unsigned int *_len) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + unsigned int offset = *_offset; + unsigned int len = *_len; + int ret; + u8 annotation = *_annotation; + + if (offset > 0) + return 0; + + /* Locate the subpacket */ + offset = sp->offset; + len = skb->len - sp->offset; + if ((annotation & RXRPC_RX_ANNO_JUMBO) > 0) { + offset += (((annotation & RXRPC_RX_ANNO_JUMBO) - 1) * + RXRPC_JUMBO_SUBPKTLEN); + len = (annotation & RXRPC_RX_ANNO_JLAST) ? + skb->len - offset : RXRPC_JUMBO_SUBPKTLEN; + } + + if (!(annotation & RXRPC_RX_ANNO_VERIFIED)) { + ret = rxrpc_verify_packet(call, skb, annotation, offset, len); + if (ret < 0) + return ret; + *_annotation |= RXRPC_RX_ANNO_VERIFIED; + } + + *_offset = offset; + *_len = len; + call->conn->security->locate_data(call, skb, _offset, _len); + return 0; +} + +/* + * Deliver messages to a call. This keeps processing packets until the buffer + * is filled and we find either more DATA (returns 0) or the end of the DATA + * (returns 1). If more packets are required, it returns -EAGAIN. + */ +static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, + struct msghdr *msg, struct iov_iter *iter, + size_t len, int flags, size_t *_offset) +{ + struct rxrpc_skb_priv *sp; + struct sk_buff *skb; + rxrpc_seq_t hard_ack, top, seq; + size_t remain; + bool last; + unsigned int rx_pkt_offset, rx_pkt_len; + int ix, copy, ret = 0; + + _enter(""); + + rx_pkt_offset = call->rx_pkt_offset; + rx_pkt_len = call->rx_pkt_len; + + /* Barriers against rxrpc_input_data(). */ + hard_ack = call->rx_hard_ack; + top = smp_load_acquire(&call->rx_top); + for (seq = hard_ack + 1; before_eq(seq, top); seq++) { + ix = seq & RXRPC_RXTX_BUFF_MASK; + skb = call->rxtx_buffer[ix]; + if (!skb) + break; + smp_rmb(); + rxrpc_see_skb(skb); + sp = rxrpc_skb(skb); + + if (msg) + sock_recv_timestamp(msg, sock->sk, skb); + + ret = rxrpc_locate_data(call, skb, &call->rxtx_annotations[ix], + &rx_pkt_offset, &rx_pkt_len); + _debug("recvmsg %x DATA #%u { %d, %d }", + sp->hdr.callNumber, seq, rx_pkt_offset, rx_pkt_len); + + /* We have to handle short, empty and used-up DATA packets. */ + remain = len - *_offset; + copy = rx_pkt_len; + if (copy > remain) + copy = remain; + if (copy > 0) { + ret = skb_copy_datagram_iter(skb, rx_pkt_offset, iter, + copy); + if (ret < 0) + goto out; + + /* handle piecemeal consumption of data packets */ + _debug("copied %d @%zu", copy, *_offset); + + rx_pkt_offset += copy; + rx_pkt_len -= copy; + *_offset += copy; + } + + if (rx_pkt_len > 0) { + _debug("buffer full"); + ASSERTCMP(*_offset, ==, len); + break; + } + + /* The whole packet has been transferred. */ + last = sp->hdr.flags & RXRPC_LAST_PACKET; + if (!(flags & MSG_PEEK)) + rxrpc_rotate_rx_window(call); + rx_pkt_offset = 0; + rx_pkt_len = 0; + + ASSERTIFCMP(last, seq, ==, top); + } + + if (after(seq, top)) { + ret = -EAGAIN; + if (test_bit(RXRPC_CALL_RX_LAST, &call->flags)) + ret = 1; + } +out: + if (!(flags & MSG_PEEK)) { + call->rx_pkt_offset = rx_pkt_offset; + call->rx_pkt_len = rx_pkt_len; + } + _leave(" = %d [%u/%u]", ret, seq, top); + return ret; +} + +/* + * Receive a message from an RxRPC socket * - we need to be careful about two or more threads calling recvmsg * simultaneously */ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { - struct rxrpc_skb_priv *sp; - struct rxrpc_call *call = NULL, *continue_call = NULL; + struct rxrpc_call *call; struct rxrpc_sock *rx = rxrpc_sk(sock->sk); - struct sk_buff *skb; + struct list_head *l; + size_t copied = 0; long timeo; - int copy, ret, ullen, offset, copied = 0; - u32 abort_code; + int ret; DEFINE_WAIT(wait); @@ -41,297 +378,120 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (flags & (MSG_OOB | MSG_TRUNC)) return -EOPNOTSUPP; - ullen = msg->msg_flags & MSG_CMSG_COMPAT ? 4 : sizeof(unsigned long); - timeo = sock_rcvtimeo(&rx->sk, flags & MSG_DONTWAIT); - msg->msg_flags |= MSG_MORE; +try_again: lock_sock(&rx->sk); - for (;;) { - /* return immediately if a client socket has no outstanding - * calls */ - if (RB_EMPTY_ROOT(&rx->calls)) { - if (copied) - goto out; - if (rx->sk.sk_state != RXRPC_SERVER_LISTENING) { - release_sock(&rx->sk); - if (continue_call) - rxrpc_put_call(continue_call, - rxrpc_call_put); - return -ENODATA; - } - } - - /* get the next message on the Rx queue */ - skb = skb_peek(&rx->sk.sk_receive_queue); - if (!skb) { - /* nothing remains on the queue */ - if (copied && - (flags & MSG_PEEK || timeo == 0)) - goto out; - - /* wait for a message to turn up */ - release_sock(&rx->sk); - prepare_to_wait_exclusive(sk_sleep(&rx->sk), &wait, - TASK_INTERRUPTIBLE); - ret = sock_error(&rx->sk); - if (ret) - goto wait_error; - - if (skb_queue_empty(&rx->sk.sk_receive_queue)) { - if (signal_pending(current)) - goto wait_interrupted; - timeo = schedule_timeout(timeo); - } - finish_wait(sk_sleep(&rx->sk), &wait); - lock_sock(&rx->sk); - continue; - } - - peek_next_packet: - rxrpc_see_skb(skb); - sp = rxrpc_skb(skb); - call = sp->call; - ASSERT(call != NULL); - rxrpc_see_call(call); - - _debug("next pkt %s", rxrpc_pkts[sp->hdr.type]); - - /* make sure we wait for the state to be updated in this call */ - spin_lock_bh(&call->lock); - spin_unlock_bh(&call->lock); - - if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) { - _debug("packet from released call"); - if (skb_dequeue(&rx->sk.sk_receive_queue) != skb) - BUG(); - rxrpc_free_skb(skb); - continue; - } - - /* determine whether to continue last data receive */ - if (continue_call) { - _debug("maybe cont"); - if (call != continue_call || - skb->mark != RXRPC_SKB_MARK_DATA) { - release_sock(&rx->sk); - rxrpc_put_call(continue_call, rxrpc_call_put); - _leave(" = %d [noncont]", copied); - return copied; - } + /* Return immediately if a client socket has no outstanding calls */ + if (RB_EMPTY_ROOT(&rx->calls) && + list_empty(&rx->recvmsg_q) && + rx->sk.sk_state != RXRPC_SERVER_LISTENING) { + release_sock(&rx->sk); + return -ENODATA; + } + + if (list_empty(&rx->recvmsg_q)) { + ret = -EWOULDBLOCK; + if (timeo == 0) + goto error_no_call; + + release_sock(&rx->sk); + + /* Wait for something to happen */ + prepare_to_wait_exclusive(sk_sleep(&rx->sk), &wait, + TASK_INTERRUPTIBLE); + ret = sock_error(&rx->sk); + if (ret) + goto wait_error; + + if (list_empty(&rx->recvmsg_q)) { + if (signal_pending(current)) + goto wait_interrupted; + timeo = schedule_timeout(timeo); } + finish_wait(sk_sleep(&rx->sk), &wait); + goto try_again; + } + /* Find the next call and dequeue it if we're not just peeking. If we + * do dequeue it, that comes with a ref that we will need to release. + */ + write_lock_bh(&rx->recvmsg_lock); + l = rx->recvmsg_q.next; + call = list_entry(l, struct rxrpc_call, recvmsg_link); + if (!(flags & MSG_PEEK)) + list_del_init(&call->recvmsg_link); + else rxrpc_get_call(call, rxrpc_call_got); + write_unlock_bh(&rx->recvmsg_lock); - /* copy the peer address and timestamp */ - if (!continue_call) { - if (msg->msg_name) { - size_t len = - sizeof(call->conn->params.peer->srx); - memcpy(msg->msg_name, - &call->conn->params.peer->srx, len); - msg->msg_namelen = len; - } - sock_recv_timestamp(msg, &rx->sk, skb); - } + _debug("recvmsg call %p", call); - /* receive the message */ - if (skb->mark != RXRPC_SKB_MARK_DATA) - goto receive_non_data_message; - - _debug("recvmsg DATA #%u { %d, %d }", - sp->hdr.seq, skb->len, sp->offset); - - if (!continue_call) { - /* only set the control data once per recvmsg() */ - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID, - ullen, &call->user_call_ID); - if (ret < 0) - goto copy_error; - ASSERT(test_bit(RXRPC_CALL_HAS_USERID, &call->flags)); - } - - ASSERTCMP(sp->hdr.seq, >=, call->rx_data_recv); - ASSERTCMP(sp->hdr.seq, <=, call->rx_data_recv + 1); - call->rx_data_recv = sp->hdr.seq; - - ASSERTCMP(sp->hdr.seq, >, call->rx_data_eaten); - - offset = sp->offset; - copy = skb->len - offset; - if (copy > len - copied) - copy = len - copied; - - ret = skb_copy_datagram_msg(skb, offset, msg, copy); - - if (ret < 0) - goto copy_error; - - /* handle piecemeal consumption of data packets */ - _debug("copied %d+%d", copy, copied); - - offset += copy; - copied += copy; - - if (!(flags & MSG_PEEK)) - sp->offset = offset; - - if (sp->offset < skb->len) { - _debug("buffer full"); - ASSERTCMP(copied, ==, len); - break; - } - - /* we transferred the whole data packet */ - if (!(flags & MSG_PEEK)) - rxrpc_kernel_data_consumed(call, skb); - - if (sp->hdr.flags & RXRPC_LAST_PACKET) { - _debug("last"); - if (rxrpc_conn_is_client(call->conn)) { - /* last byte of reply received */ - ret = copied; - goto terminal_message; - } - - /* last bit of request received */ - if (!(flags & MSG_PEEK)) { - _debug("eat packet"); - if (skb_dequeue(&rx->sk.sk_receive_queue) != - skb) - BUG(); - rxrpc_free_skb(skb); - } - msg->msg_flags &= ~MSG_MORE; - break; - } - - /* move on to the next data message */ - _debug("next"); - if (!continue_call) - continue_call = sp->call; - else - rxrpc_put_call(call, rxrpc_call_put); - call = NULL; - - if (flags & MSG_PEEK) { - _debug("peek next"); - skb = skb->next; - if (skb == (struct sk_buff *) &rx->sk.sk_receive_queue) - break; - goto peek_next_packet; - } - - _debug("eat packet"); - if (skb_dequeue(&rx->sk.sk_receive_queue) != skb) - BUG(); - rxrpc_free_skb(skb); - } - - /* end of non-terminal data packet reception for the moment */ - _debug("end rcv data"); -out: - release_sock(&rx->sk); - if (call) - rxrpc_put_call(call, rxrpc_call_put); - if (continue_call) - rxrpc_put_call(continue_call, rxrpc_call_put); - _leave(" = %d [data]", copied); - return copied; - - /* handle non-DATA messages such as aborts, incoming connections and - * final ACKs */ -receive_non_data_message: - _debug("non-data"); - - if (skb->mark == RXRPC_SKB_MARK_NEW_CALL) { - _debug("RECV NEW CALL"); - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NEW_CALL, 0, &abort_code); - if (ret < 0) - goto copy_error; - if (!(flags & MSG_PEEK)) { - if (skb_dequeue(&rx->sk.sk_receive_queue) != skb) - BUG(); - rxrpc_free_skb(skb); - } - goto out; - } - - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID, - ullen, &call->user_call_ID); - if (ret < 0) - goto copy_error; - ASSERT(test_bit(RXRPC_CALL_HAS_USERID, &call->flags)); - - switch (skb->mark) { - case RXRPC_SKB_MARK_DATA: + if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) BUG(); - case RXRPC_SKB_MARK_FINAL_ACK: - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ACK, 0, &abort_code); - break; - case RXRPC_SKB_MARK_BUSY: - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_BUSY, 0, &abort_code); - break; - case RXRPC_SKB_MARK_REMOTE_ABORT: - abort_code = call->abort_code; - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &abort_code); - break; - case RXRPC_SKB_MARK_LOCAL_ABORT: - abort_code = call->abort_code; - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &abort_code); - if (call->error) { - abort_code = call->error; - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_LOCAL_ERROR, 4, - &abort_code); + + if (test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) { + if (flags & MSG_CMSG_COMPAT) { + unsigned int id32 = call->user_call_ID; + + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID, + sizeof(unsigned int), &id32); + } else { + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID, + sizeof(unsigned long), + &call->user_call_ID); } + if (ret < 0) + goto error; + } + + if (msg->msg_name) { + size_t len = sizeof(call->conn->params.peer->srx); + memcpy(msg->msg_name, &call->conn->params.peer->srx, len); + msg->msg_namelen = len; + } + + switch (call->state) { + case RXRPC_CALL_SERVER_ACCEPTING: + ret = rxrpc_recvmsg_new_call(rx, call, msg, flags); break; - case RXRPC_SKB_MARK_NET_ERROR: - _debug("RECV NET ERROR %d", sp->error); - abort_code = sp->error; - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NET_ERROR, 4, &abort_code); - break; - case RXRPC_SKB_MARK_LOCAL_ERROR: - _debug("RECV LOCAL ERROR %d", sp->error); - abort_code = sp->error; - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_LOCAL_ERROR, 4, - &abort_code); + case RXRPC_CALL_CLIENT_RECV_REPLY: + case RXRPC_CALL_SERVER_RECV_REQUEST: + case RXRPC_CALL_SERVER_ACK_REQUEST: + ret = rxrpc_recvmsg_data(sock, call, msg, &msg->msg_iter, len, + flags, &copied); + if (ret == -EAGAIN) + ret = 0; break; default: - pr_err("Unknown packet mark %u\n", skb->mark); - BUG(); + ret = 0; break; } if (ret < 0) - goto copy_error; + goto error; -terminal_message: - _debug("terminal"); - msg->msg_flags &= ~MSG_MORE; - msg->msg_flags |= MSG_EOR; - - if (!(flags & MSG_PEEK)) { - _net("free terminal skb %p", skb); - if (skb_dequeue(&rx->sk.sk_receive_queue) != skb) - BUG(); - rxrpc_free_skb(skb); - rxrpc_release_call(rx, call); + if (call->state == RXRPC_CALL_COMPLETE) { + ret = rxrpc_recvmsg_term(call, msg); + if (ret < 0) + goto error; + if (!(flags & MSG_PEEK)) + rxrpc_release_call(rx, call); + msg->msg_flags |= MSG_EOR; + ret = 1; } - release_sock(&rx->sk); - rxrpc_put_call(call, rxrpc_call_put); - if (continue_call) - rxrpc_put_call(continue_call, rxrpc_call_put); - _leave(" = %d", ret); - return ret; + if (ret == 0) + msg->msg_flags |= MSG_MORE; + else + msg->msg_flags &= ~MSG_MORE; + ret = copied; -copy_error: - _debug("copy error"); - release_sock(&rx->sk); +error: rxrpc_put_call(call, rxrpc_call_put); - if (continue_call) - rxrpc_put_call(continue_call, rxrpc_call_put); +error_no_call: + release_sock(&rx->sk); _leave(" = %d", ret); return ret; @@ -339,85 +499,8 @@ wait_interrupted: ret = sock_intr_errno(timeo); wait_error: finish_wait(sk_sleep(&rx->sk), &wait); - if (continue_call) - rxrpc_put_call(continue_call, rxrpc_call_put); - if (copied) - copied = ret; - _leave(" = %d [waitfail %d]", copied, ret); - return copied; - -} - -/* - * Deliver messages to a call. This keeps processing packets until the buffer - * is filled and we find either more DATA (returns 0) or the end of the DATA - * (returns 1). If more packets are required, it returns -EAGAIN. - * - * TODO: Note that this is hacked in at the moment and will be replaced. - */ -static int temp_deliver_data(struct socket *sock, struct rxrpc_call *call, - struct iov_iter *iter, size_t size, - size_t *_offset) -{ - struct rxrpc_skb_priv *sp; - struct sk_buff *skb; - size_t remain; - int ret, copy; - - _enter("%d", call->debug_id); - -next: - local_bh_disable(); - skb = skb_dequeue(&call->knlrecv_queue); - local_bh_enable(); - if (!skb) { - if (test_bit(RXRPC_CALL_RX_NO_MORE, &call->flags)) - return 1; - _leave(" = -EAGAIN [empty]"); - return -EAGAIN; - } - - sp = rxrpc_skb(skb); - _debug("dequeued %p %u/%zu", skb, sp->offset, size); - - switch (skb->mark) { - case RXRPC_SKB_MARK_DATA: - remain = size - *_offset; - if (remain > 0) { - copy = skb->len - sp->offset; - if (copy > remain) - copy = remain; - ret = skb_copy_datagram_iter(skb, sp->offset, iter, - copy); - if (ret < 0) - goto requeue_and_leave; - - /* handle piecemeal consumption of data packets */ - sp->offset += copy; - *_offset += copy; - } - - if (sp->offset < skb->len) - goto partially_used_skb; - - /* We consumed the whole packet */ - ASSERTCMP(sp->offset, ==, skb->len); - if (sp->hdr.flags & RXRPC_LAST_PACKET) - set_bit(RXRPC_CALL_RX_NO_MORE, &call->flags); - rxrpc_kernel_data_consumed(call, skb); - rxrpc_free_skb(skb); - goto next; - - default: - rxrpc_free_skb(skb); - goto next; - } - -partially_used_skb: - ASSERTCMP(*_offset, ==, size); - ret = 0; -requeue_and_leave: - skb_queue_head(&call->knlrecv_queue, skb); + release_sock(&rx->sk); + _leave(" = %d [wait]", ret); return ret; } @@ -453,8 +536,9 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call, struct kvec iov; int ret; - _enter("{%d,%s},%zu,%d", - call->debug_id, rxrpc_call_states[call->state], size, want_more); + _enter("{%d,%s},%zu/%zu,%d", + call->debug_id, rxrpc_call_states[call->state], + *_offset, size, want_more); ASSERTCMP(*_offset, <=, size); ASSERTCMP(call->state, !=, RXRPC_CALL_SERVER_ACCEPTING); @@ -469,7 +553,8 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call, case RXRPC_CALL_CLIENT_RECV_REPLY: case RXRPC_CALL_SERVER_RECV_REQUEST: case RXRPC_CALL_SERVER_ACK_REQUEST: - ret = temp_deliver_data(sock, call, &iter, size, _offset); + ret = rxrpc_recvmsg_data(sock, call, NULL, &iter, size, 0, + _offset); if (ret < 0) goto out; @@ -494,7 +579,6 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call, goto call_complete; default: - *_offset = 0; ret = -EINPROGRESS; goto out; } diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 3777432df10b..ae392558829d 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -317,6 +317,7 @@ static int rxkad_secure_packet(struct rxrpc_call *call, * decrypt partial encryption on a packet (level 1 security) */ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, + unsigned int offset, unsigned int len, rxrpc_seq_t seq) { struct rxkad_level1_hdr sechdr; @@ -330,18 +331,20 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, _enter(""); - if (skb->len < 8) { + if (len < 8) { rxrpc_abort_call("V1H", call, seq, RXKADSEALEDINCON, EPROTO); goto protocol_error; } - /* we want to decrypt the skbuff in-place */ + /* Decrypt the skbuff in-place. TODO: We really want to decrypt + * directly into the target buffer. + */ nsg = skb_cow_data(skb, 0, &trailer); if (nsg < 0 || nsg > 16) goto nomem; sg_init_table(sg, nsg); - skb_to_sgvec(skb, sg, 0, 8); + skb_to_sgvec(skb, sg, offset, 8); /* start the decryption afresh */ memset(&iv, 0, sizeof(iv)); @@ -353,12 +356,12 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, skcipher_request_zero(req); /* Extract the decrypted packet length */ - if (skb_copy_bits(skb, 0, &sechdr, sizeof(sechdr)) < 0) { + if (skb_copy_bits(skb, offset, &sechdr, sizeof(sechdr)) < 0) { rxrpc_abort_call("XV1", call, seq, RXKADDATALEN, EPROTO); goto protocol_error; } - if (!skb_pull(skb, sizeof(sechdr))) - BUG(); + offset += sizeof(sechdr); + len -= sizeof(sechdr); buf = ntohl(sechdr.data_size); data_size = buf & 0xffff; @@ -371,18 +374,16 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, goto protocol_error; } - /* shorten the packet to remove the padding */ - if (data_size > skb->len) { + if (data_size > len) { rxrpc_abort_call("V1L", call, seq, RXKADDATALEN, EPROTO); goto protocol_error; } - if (data_size < skb->len) - skb->len = data_size; _leave(" = 0 [dlen=%x]", data_size); return 0; protocol_error: + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); _leave(" = -EPROTO"); return -EPROTO; @@ -395,6 +396,7 @@ nomem: * wholly decrypt a packet (level 2 security) */ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, + unsigned int offset, unsigned int len, rxrpc_seq_t seq) { const struct rxrpc_key_token *token; @@ -409,12 +411,14 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, _enter(",{%d}", skb->len); - if (skb->len < 8) { + if (len < 8) { rxrpc_abort_call("V2H", call, seq, RXKADSEALEDINCON, EPROTO); goto protocol_error; } - /* we want to decrypt the skbuff in-place */ + /* Decrypt the skbuff in-place. TODO: We really want to decrypt + * directly into the target buffer. + */ nsg = skb_cow_data(skb, 0, &trailer); if (nsg < 0) goto nomem; @@ -427,7 +431,7 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, } sg_init_table(sg, nsg); - skb_to_sgvec(skb, sg, 0, skb->len); + skb_to_sgvec(skb, sg, offset, len); /* decrypt from the session key */ token = call->conn->params.key->payload.data[0]; @@ -435,19 +439,19 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, skcipher_request_set_tfm(req, call->conn->cipher); skcipher_request_set_callback(req, 0, NULL, NULL); - skcipher_request_set_crypt(req, sg, sg, skb->len, iv.x); + skcipher_request_set_crypt(req, sg, sg, len, iv.x); crypto_skcipher_decrypt(req); skcipher_request_zero(req); if (sg != _sg) kfree(sg); /* Extract the decrypted packet length */ - if (skb_copy_bits(skb, 0, &sechdr, sizeof(sechdr)) < 0) { + if (skb_copy_bits(skb, offset, &sechdr, sizeof(sechdr)) < 0) { rxrpc_abort_call("XV2", call, seq, RXKADDATALEN, EPROTO); goto protocol_error; } - if (!skb_pull(skb, sizeof(sechdr))) - BUG(); + offset += sizeof(sechdr); + len -= sizeof(sechdr); buf = ntohl(sechdr.data_size); data_size = buf & 0xffff; @@ -460,17 +464,16 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, goto protocol_error; } - if (data_size > skb->len) { + if (data_size > len) { rxrpc_abort_call("V2L", call, seq, RXKADDATALEN, EPROTO); goto protocol_error; } - if (data_size < skb->len) - skb->len = data_size; _leave(" = 0 [dlen=%x]", data_size); return 0; protocol_error: + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); _leave(" = -EPROTO"); return -EPROTO; @@ -484,6 +487,7 @@ nomem: * jumbo packet). */ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, + unsigned int offset, unsigned int len, rxrpc_seq_t seq, u16 expected_cksum) { SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher); @@ -521,6 +525,7 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, if (cksum != expected_cksum) { rxrpc_abort_call("VCK", call, seq, RXKADSEALEDINCON, EPROTO); + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); _leave(" = -EPROTO [csum failed]"); return -EPROTO; } @@ -529,14 +534,60 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, case RXRPC_SECURITY_PLAIN: return 0; case RXRPC_SECURITY_AUTH: - return rxkad_verify_packet_1(call, skb, seq); + return rxkad_verify_packet_1(call, skb, offset, len, seq); case RXRPC_SECURITY_ENCRYPT: - return rxkad_verify_packet_2(call, skb, seq); + return rxkad_verify_packet_2(call, skb, offset, len, seq); default: return -ENOANO; } } +/* + * Locate the data contained in a packet that was partially encrypted. + */ +static void rxkad_locate_data_1(struct rxrpc_call *call, struct sk_buff *skb, + unsigned int *_offset, unsigned int *_len) +{ + struct rxkad_level1_hdr sechdr; + + if (skb_copy_bits(skb, *_offset, &sechdr, sizeof(sechdr)) < 0) + BUG(); + *_offset += sizeof(sechdr); + *_len = ntohl(sechdr.data_size) & 0xffff; +} + +/* + * Locate the data contained in a packet that was completely encrypted. + */ +static void rxkad_locate_data_2(struct rxrpc_call *call, struct sk_buff *skb, + unsigned int *_offset, unsigned int *_len) +{ + struct rxkad_level2_hdr sechdr; + + if (skb_copy_bits(skb, *_offset, &sechdr, sizeof(sechdr)) < 0) + BUG(); + *_offset += sizeof(sechdr); + *_len = ntohl(sechdr.data_size) & 0xffff; +} + +/* + * Locate the data contained in an already decrypted packet. + */ +static void rxkad_locate_data(struct rxrpc_call *call, struct sk_buff *skb, + unsigned int *_offset, unsigned int *_len) +{ + switch (call->conn->params.security_level) { + case RXRPC_SECURITY_AUTH: + rxkad_locate_data_1(call, skb, _offset, _len); + return; + case RXRPC_SECURITY_ENCRYPT: + rxkad_locate_data_2(call, skb, _offset, _len); + return; + default: + return; + } +} + /* * issue a challenge */ @@ -704,7 +755,7 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, struct rxkad_challenge challenge; struct rxkad_response resp __attribute__((aligned(8))); /* must be aligned for crypto */ - struct rxrpc_skb_priv *sp; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); u32 version, nonce, min_level, abort_code; int ret; @@ -722,8 +773,7 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, } abort_code = RXKADPACKETSHORT; - sp = rxrpc_skb(skb); - if (skb_copy_bits(skb, 0, &challenge, sizeof(challenge)) < 0) + if (skb_copy_bits(skb, sp->offset, &challenge, sizeof(challenge)) < 0) goto protocol_error; version = ntohl(challenge.version); @@ -969,7 +1019,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, { struct rxkad_response response __attribute__((aligned(8))); /* must be aligned for crypto */ - struct rxrpc_skb_priv *sp; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_crypt session_key; time_t expiry; void *ticket; @@ -980,7 +1030,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, _enter("{%d,%x}", conn->debug_id, key_serial(conn->server_key)); abort_code = RXKADPACKETSHORT; - if (skb_copy_bits(skb, 0, &response, sizeof(response)) < 0) + if (skb_copy_bits(skb, sp->offset, &response, sizeof(response)) < 0) goto protocol_error; if (!pskb_pull(skb, sizeof(response))) BUG(); @@ -988,7 +1038,6 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, version = ntohl(response.version); ticket_len = ntohl(response.ticket_len); kvno = ntohl(response.kvno); - sp = rxrpc_skb(skb); _proto("Rx RESPONSE %%%u { v=%u kv=%u tl=%u }", sp->hdr.serial, version, kvno, ticket_len); @@ -1010,7 +1059,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, return -ENOMEM; abort_code = RXKADPACKETSHORT; - if (skb_copy_bits(skb, 0, ticket, ticket_len) < 0) + if (skb_copy_bits(skb, sp->offset, ticket, ticket_len) < 0) goto protocol_error_free; ret = rxkad_decrypt_ticket(conn, ticket, ticket_len, &session_key, @@ -1135,6 +1184,7 @@ const struct rxrpc_security rxkad = { .prime_packet_security = rxkad_prime_packet_security, .secure_packet = rxkad_secure_packet, .verify_packet = rxkad_verify_packet, + .locate_data = rxkad_locate_data, .issue_challenge = rxkad_issue_challenge, .respond_to_challenge = rxkad_respond_to_challenge, .verify_response = rxkad_verify_response, diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c index 5d79d5a9c944..82d8134e9287 100644 --- a/net/rxrpc/security.c +++ b/net/rxrpc/security.c @@ -130,20 +130,20 @@ int rxrpc_init_server_conn_security(struct rxrpc_connection *conn) } /* find the service */ - read_lock_bh(&local->services_lock); + read_lock(&local->services_lock); hlist_for_each_entry(rx, &local->services, listen_link) { if (rx->srx.srx_service == conn->params.service_id) goto found_service; } /* the service appears to have died */ - read_unlock_bh(&local->services_lock); + read_unlock(&local->services_lock); _leave(" = -ENOENT"); return -ENOENT; found_service: if (!rx->securities) { - read_unlock_bh(&local->services_lock); + read_unlock(&local->services_lock); _leave(" = -ENOKEY"); return -ENOKEY; } @@ -152,13 +152,13 @@ found_service: kref = keyring_search(make_key_ref(rx->securities, 1UL), &key_type_rxrpc_s, kdesc); if (IS_ERR(kref)) { - read_unlock_bh(&local->services_lock); + read_unlock(&local->services_lock); _leave(" = %ld [search]", PTR_ERR(kref)); return PTR_ERR(kref); } key = key_ref_to_ptr(kref); - read_unlock_bh(&local->services_lock); + read_unlock(&local->services_lock); conn->server_key = key; conn->security = sec; diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 9a4af992fcdf..cba236575073 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include "ar-internal.h" @@ -38,19 +37,20 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx, DECLARE_WAITQUEUE(myself, current); int ret; - _enter(",{%d},%ld", - CIRC_SPACE(call->acks_head, ACCESS_ONCE(call->acks_tail), - call->acks_winsz), - *timeo); + _enter(",{%u,%u,%u}", + call->tx_hard_ack, call->tx_top, call->tx_winsize); add_wait_queue(&call->waitq, &myself); for (;;) { set_current_state(TASK_INTERRUPTIBLE); ret = 0; - if (CIRC_SPACE(call->acks_head, ACCESS_ONCE(call->acks_tail), - call->acks_winsz) > 0) + if (call->tx_top - call->tx_hard_ack < call->tx_winsize) break; + if (call->state >= RXRPC_CALL_COMPLETE) { + ret = -call->error; + break; + } if (signal_pending(current)) { ret = sock_intr_errno(*timeo); break; @@ -68,36 +68,44 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx, } /* - * attempt to schedule an instant Tx resend + * Schedule an instant Tx resend. */ -static inline void rxrpc_instant_resend(struct rxrpc_call *call) +static inline void rxrpc_instant_resend(struct rxrpc_call *call, int ix) { - read_lock_bh(&call->state_lock); - if (try_to_del_timer_sync(&call->resend_timer) >= 0) { - clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); - if (call->state < RXRPC_CALL_COMPLETE && - !test_and_set_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events)) + spin_lock_bh(&call->lock); + + if (call->state < RXRPC_CALL_COMPLETE) { + call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS; + if (!test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events)) rxrpc_queue_call(call); } - read_unlock_bh(&call->state_lock); + + spin_unlock_bh(&call->lock); } /* - * queue a packet for transmission, set the resend timer and attempt - * to send the packet immediately + * Queue a DATA packet for transmission, set the resend timeout and send the + * packet immediately */ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, bool last) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - int ret; + rxrpc_seq_t seq = sp->hdr.seq; + int ret, ix; - _net("queue skb %p [%d]", skb, call->acks_head); + _net("queue skb %p [%d]", skb, seq); - ASSERT(call->acks_window != NULL); - call->acks_window[call->acks_head] = (unsigned long) skb; + ASSERTCMP(seq, ==, call->tx_top + 1); + + ix = seq & RXRPC_RXTX_BUFF_MASK; + rxrpc_get_skb(skb); + call->rxtx_annotations[ix] = RXRPC_TX_ANNO_UNACK; smp_wmb(); - call->acks_head = (call->acks_head + 1) & (call->acks_winsz - 1); + call->rxtx_buffer[ix] = skb; + call->tx_top = seq; + if (last) + set_bit(RXRPC_CALL_TX_LAST, &call->flags); if (last || call->state == RXRPC_CALL_SERVER_ACK_REQUEST) { _debug("________awaiting reply/ACK__________"); @@ -121,34 +129,17 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, _proto("Tx DATA %%%u { #%u }", sp->hdr.serial, sp->hdr.seq); - sp->need_resend = false; + if (seq == 1 && rxrpc_is_client_call(call)) + rxrpc_expose_client_call(call); + sp->resend_at = jiffies + rxrpc_resend_timeout; - if (!test_and_set_bit(RXRPC_CALL_RUN_RTIMER, &call->flags)) { - _debug("run timer"); - call->resend_timer.expires = sp->resend_at; - add_timer(&call->resend_timer); - } - - /* attempt to cancel the rx-ACK timer, deferring reply transmission if - * we're ACK'ing the request phase of an incoming call */ - ret = -EAGAIN; - if (try_to_del_timer_sync(&call->ack_timer) >= 0) { - /* the packet may be freed by rxrpc_process_call() before this - * returns */ - if (rxrpc_is_client_call(call)) - rxrpc_expose_client_call(call); - ret = rxrpc_send_data_packet(call->conn, skb); - _net("sent skb %p", skb); - } else { - _debug("failed to delete ACK timer"); - } - + ret = rxrpc_send_data_packet(call->conn, skb); if (ret < 0) { _debug("need instant resend %d", ret); - sp->need_resend = true; - rxrpc_instant_resend(call); + rxrpc_instant_resend(call, ix); } + rxrpc_free_skb(skb); _leave(""); } @@ -212,9 +203,8 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, _debug("alloc"); - if (CIRC_SPACE(call->acks_head, - ACCESS_ONCE(call->acks_tail), - call->acks_winsz) <= 0) { + if (call->tx_top - call->tx_hard_ack >= + call->tx_winsize) { ret = -EAGAIN; if (msg->msg_flags & MSG_DONTWAIT) goto maybe_error; @@ -313,7 +303,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, memset(skb_put(skb, pad), 0, pad); } - seq = atomic_inc_return(&call->sequence); + seq = call->tx_top + 1; sp->hdr.epoch = conn->proto.epoch; sp->hdr.cid = call->cid; @@ -329,9 +319,8 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, sp->hdr.flags = conn->out_clientflag; if (msg_data_left(msg) == 0 && !more) sp->hdr.flags |= RXRPC_LAST_PACKET; - else if (CIRC_SPACE(call->acks_head, - ACCESS_ONCE(call->acks_tail), - call->acks_winsz) > 1) + else if (call->tx_top - call->tx_hard_ack < + call->tx_winsize) sp->hdr.flags |= RXRPC_MORE_PACKETS; if (more && seq & 1) sp->hdr.flags |= RXRPC_REQUEST_ACK; @@ -358,7 +347,7 @@ out: call_terminated: rxrpc_free_skb(skb); _leave(" = %d", -call->error); - return ret; + return -call->error; maybe_error: if (copied) @@ -451,29 +440,6 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, return 0; } -/* - * abort a call, sending an ABORT packet to the peer - */ -static void rxrpc_send_abort(struct rxrpc_call *call, const char *why, - u32 abort_code, int error) -{ - if (call->state >= RXRPC_CALL_COMPLETE) - return; - - write_lock_bh(&call->state_lock); - - if (__rxrpc_abort_call(why, call, 0, abort_code, error)) { - del_timer_sync(&call->resend_timer); - del_timer_sync(&call->ack_timer); - clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events); - clear_bit(RXRPC_CALL_EV_ACK, &call->events); - clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); - rxrpc_queue_call(call); - } - - write_unlock_bh(&call->state_lock); -} - /* * Create a new client call for sendmsg(). */ @@ -549,7 +515,6 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) return PTR_ERR(call); } - rxrpc_see_call(call); _debug("CALL %d USR %lx ST %d on CONN %p", call->debug_id, call->user_call_ID, call->state, call->conn); @@ -557,8 +522,10 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) /* it's too late for this call */ ret = -ESHUTDOWN; } else if (cmd == RXRPC_CMD_SEND_ABORT) { - rxrpc_send_abort(call, "CMD", abort_code, ECONNABORTED); ret = 0; + if (rxrpc_abort_call("CMD", call, 0, abort_code, ECONNABORTED)) + ret = rxrpc_send_call_packet(call, + RXRPC_PACKET_TYPE_ABORT); } else if (cmd != RXRPC_CMD_SEND_DATA) { ret = -EINVAL; } else if (rxrpc_is_client_call(call) && @@ -639,7 +606,8 @@ void rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call, lock_sock(sock->sk); - rxrpc_send_abort(call, why, abort_code, error); + if (rxrpc_abort_call(why, call, 0, abort_code, error)) + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); release_sock(sock->sk); _leave(""); diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c index 9b8f8456d3bf..620d9ccaf3c1 100644 --- a/net/rxrpc/skbuff.c +++ b/net/rxrpc/skbuff.c @@ -18,133 +18,6 @@ #include #include "ar-internal.h" -/* - * set up for the ACK at the end of the receive phase when we discard the final - * receive phase data packet - * - called with softirqs disabled - */ -static void rxrpc_request_final_ACK(struct rxrpc_call *call) -{ - /* the call may be aborted before we have a chance to ACK it */ - write_lock(&call->state_lock); - - switch (call->state) { - case RXRPC_CALL_CLIENT_RECV_REPLY: - call->state = RXRPC_CALL_CLIENT_FINAL_ACK; - _debug("request final ACK"); - - set_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events); - if (try_to_del_timer_sync(&call->ack_timer) >= 0) - rxrpc_queue_call(call); - break; - - case RXRPC_CALL_SERVER_RECV_REQUEST: - call->state = RXRPC_CALL_SERVER_ACK_REQUEST; - default: - break; - } - - write_unlock(&call->state_lock); -} - -/* - * drop the bottom ACK off of the call ACK window and advance the window - */ -static void rxrpc_hard_ACK_data(struct rxrpc_call *call, struct sk_buff *skb) -{ - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - int loop; - u32 seq; - - spin_lock_bh(&call->lock); - - _debug("hard ACK #%u", sp->hdr.seq); - - for (loop = 0; loop < RXRPC_ACKR_WINDOW_ASZ; loop++) { - call->ackr_window[loop] >>= 1; - call->ackr_window[loop] |= - call->ackr_window[loop + 1] << (BITS_PER_LONG - 1); - } - - seq = sp->hdr.seq; - ASSERTCMP(seq, ==, call->rx_data_eaten + 1); - call->rx_data_eaten = seq; - - if (call->ackr_win_top < UINT_MAX) - call->ackr_win_top++; - - ASSERTIFCMP(call->state <= RXRPC_CALL_COMPLETE, - call->rx_data_post, >=, call->rx_data_recv); - ASSERTIFCMP(call->state <= RXRPC_CALL_COMPLETE, - call->rx_data_recv, >=, call->rx_data_eaten); - - if (sp->hdr.flags & RXRPC_LAST_PACKET) { - rxrpc_request_final_ACK(call); - } else if (atomic_dec_and_test(&call->ackr_not_idle) && - test_and_clear_bit(RXRPC_CALL_TX_SOFT_ACK, &call->flags)) { - /* We previously soft-ACK'd some received packets that have now - * been consumed, so send a hard-ACK if no more packets are - * immediately forthcoming to allow the transmitter to free up - * its Tx bufferage. - */ - _debug("send Rx idle ACK"); - __rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, - skb->priority, sp->hdr.serial, false); - } - - spin_unlock_bh(&call->lock); -} - -/** - * rxrpc_kernel_data_consumed - Record consumption of data message - * @call: The call to which the message pertains. - * @skb: Message holding data - * - * Record the consumption of a data message and generate an ACK if appropriate. - * The call state is shifted if this was the final packet. The caller must be - * in process context with no spinlocks held. - * - * TODO: Actually generate the ACK here rather than punting this to the - * workqueue. - */ -void rxrpc_kernel_data_consumed(struct rxrpc_call *call, struct sk_buff *skb) -{ - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - - _enter("%d,%p{%u}", call->debug_id, skb, sp->hdr.seq); - - ASSERTCMP(sp->call, ==, call); - ASSERTCMP(sp->hdr.type, ==, RXRPC_PACKET_TYPE_DATA); - - /* TODO: Fix the sequence number tracking */ - ASSERTCMP(sp->hdr.seq, >=, call->rx_data_recv); - ASSERTCMP(sp->hdr.seq, <=, call->rx_data_recv + 1); - ASSERTCMP(sp->hdr.seq, >, call->rx_data_eaten); - - call->rx_data_recv = sp->hdr.seq; - rxrpc_hard_ACK_data(call, skb); -} - -/* - * Destroy a packet that has an RxRPC control buffer - */ -void rxrpc_packet_destructor(struct sk_buff *skb) -{ - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - struct rxrpc_call *call = sp->call; - - _enter("%p{%p}", skb, call); - - if (call) { - rxrpc_put_call_for_skb(call, skb); - sp->call = NULL; - } - - if (skb->sk) - sock_rfree(skb); - _leave(""); -} - /* * Note the existence of a new-to-us socket buffer (allocated or dequeued). */ From 74f13c80e210ff5a9e7b13b9853d8a866972f385 Mon Sep 17 00:00:00 2001 From: Zubair Lutfullah Kakakhel Date: Mon, 5 Sep 2016 13:07:54 +0100 Subject: [PATCH 0820/1715] net: ethernet: xilinx: Enable emaclite for MIPS The MIPS based xilfpga platform uses this driver. Enable it for MIPS Signed-off-by: Zubair Lutfullah Kakakhel Signed-off-by: David S. Miller --- drivers/net/ethernet/xilinx/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/xilinx/Kconfig b/drivers/net/ethernet/xilinx/Kconfig index 4f5c024c6192..6d68c8a8f4f2 100644 --- a/drivers/net/ethernet/xilinx/Kconfig +++ b/drivers/net/ethernet/xilinx/Kconfig @@ -5,7 +5,7 @@ config NET_VENDOR_XILINX bool "Xilinx devices" default y - depends on PPC || PPC32 || MICROBLAZE || ARCH_ZYNQ + depends on PPC || PPC32 || MICROBLAZE || ARCH_ZYNQ || MIPS ---help--- If you have a network (Ethernet) card belonging to this class, say Y. @@ -18,7 +18,7 @@ if NET_VENDOR_XILINX config XILINX_EMACLITE tristate "Xilinx 10/100 Ethernet Lite support" - depends on (PPC32 || MICROBLAZE || ARCH_ZYNQ) + depends on PPC32 || MICROBLAZE || ARCH_ZYNQ || MIPS select PHYLIB ---help--- This driver supports the 10/100 Ethernet Lite from Xilinx. From d545caca827b65aab557a9e9dcdcf1e5a3823c2d Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Thu, 8 Sep 2016 00:42:25 +0900 Subject: [PATCH 0821/1715] net: inet: diag: expose the socket mark to privileged processes. This adds the capability for a process that has CAP_NET_ADMIN on a socket to see the socket mark in socket dumps. Commit a52e95abf772 ("net: diag: allow socket bytecode filters to match socket marks") recently gave privileged processes the ability to filter socket dumps based on mark. This patch is complementary: it ensures that the mark is also passed to userspace in the socket's netlink attributes. It is useful for tools like ss which display information about sockets. Tested: https://android-review.googlesource.com/270210 Signed-off-by: Lorenzo Colitti Signed-off-by: David S. Miller --- include/linux/inet_diag.h | 4 +-- include/uapi/linux/inet_diag.h | 1 + net/ipv4/inet_diag.c | 49 +++++++++++++++++++++++----------- net/ipv4/udp_diag.c | 10 ++++--- net/sctp/sctp_diag.c | 20 +++++++++----- 5 files changed, 56 insertions(+), 28 deletions(-) diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index feb04ea20f11..65da430e260f 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -37,7 +37,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, struct sk_buff *skb, const struct inet_diag_req_v2 *req, struct user_namespace *user_ns, u32 pid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh); + const struct nlmsghdr *unlh, bool net_admin); void inet_diag_dump_icsk(struct inet_hashinfo *h, struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r, @@ -56,7 +56,7 @@ void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk); int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, struct inet_diag_msg *r, int ext, - struct user_namespace *user_ns); + struct user_namespace *user_ns, bool net_admin); extern int inet_diag_register(const struct inet_diag_handler *handler); extern void inet_diag_unregister(const struct inet_diag_handler *handler); diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index 5581206a08ae..b5c366f87b3e 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -123,6 +123,7 @@ enum { INET_DIAG_LOCALS, INET_DIAG_PEERS, INET_DIAG_PAD, + INET_DIAG_MARK, __INET_DIAG_MAX, }; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index abfbe492ebfe..e4d16fc5bbb3 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -99,6 +99,7 @@ static size_t inet_sk_attr_size(void) + nla_total_size(1) /* INET_DIAG_SHUTDOWN */ + nla_total_size(1) /* INET_DIAG_TOS */ + nla_total_size(1) /* INET_DIAG_TCLASS */ + + nla_total_size(4) /* INET_DIAG_MARK */ + nla_total_size(sizeof(struct inet_diag_meminfo)) + nla_total_size(sizeof(struct inet_diag_msg)) + nla_total_size(SK_MEMINFO_VARS * sizeof(u32)) @@ -109,7 +110,8 @@ static size_t inet_sk_attr_size(void) int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, struct inet_diag_msg *r, int ext, - struct user_namespace *user_ns) + struct user_namespace *user_ns, + bool net_admin) { const struct inet_sock *inet = inet_sk(sk); @@ -136,6 +138,9 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, } #endif + if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, sk->sk_mark)) + goto errout; + r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk)); r->idiag_inode = sock_i_ino(sk); @@ -149,7 +154,8 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, struct sk_buff *skb, const struct inet_diag_req_v2 *req, struct user_namespace *user_ns, u32 portid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh) + const struct nlmsghdr *unlh, + bool net_admin) { const struct tcp_congestion_ops *ca_ops; const struct inet_diag_handler *handler; @@ -175,7 +181,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, r->idiag_timer = 0; r->idiag_retrans = 0; - if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns)) + if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns, net_admin)) goto errout; if (ext & (1 << (INET_DIAG_MEMINFO - 1))) { @@ -274,10 +280,11 @@ static int inet_csk_diag_fill(struct sock *sk, const struct inet_diag_req_v2 *req, struct user_namespace *user_ns, u32 portid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh) + const struct nlmsghdr *unlh, + bool net_admin) { - return inet_sk_diag_fill(sk, inet_csk(sk), skb, req, - user_ns, portid, seq, nlmsg_flags, unlh); + return inet_sk_diag_fill(sk, inet_csk(sk), skb, req, user_ns, + portid, seq, nlmsg_flags, unlh, net_admin); } static int inet_twsk_diag_fill(struct sock *sk, @@ -319,8 +326,9 @@ static int inet_twsk_diag_fill(struct sock *sk, static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb, u32 portid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh) + const struct nlmsghdr *unlh, bool net_admin) { + struct request_sock *reqsk = inet_reqsk(sk); struct inet_diag_msg *r; struct nlmsghdr *nlh; long tmo; @@ -334,7 +342,7 @@ static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb, inet_diag_msg_common_fill(r, sk); r->idiag_state = TCP_SYN_RECV; r->idiag_timer = 1; - r->idiag_retrans = inet_reqsk(sk)->num_retrans; + r->idiag_retrans = reqsk->num_retrans; BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) != offsetof(struct sock, sk_cookie)); @@ -346,6 +354,10 @@ static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb, r->idiag_uid = 0; r->idiag_inode = 0; + if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, + inet_rsk(reqsk)->ir_mark)) + return -EMSGSIZE; + nlmsg_end(skb, nlh); return 0; } @@ -354,7 +366,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, const struct inet_diag_req_v2 *r, struct user_namespace *user_ns, u32 portid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh) + const struct nlmsghdr *unlh, bool net_admin) { if (sk->sk_state == TCP_TIME_WAIT) return inet_twsk_diag_fill(sk, skb, portid, seq, @@ -362,10 +374,10 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, if (sk->sk_state == TCP_NEW_SYN_RECV) return inet_req_diag_fill(sk, skb, portid, seq, - nlmsg_flags, unlh); + nlmsg_flags, unlh, net_admin); return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq, - nlmsg_flags, unlh); + nlmsg_flags, unlh, net_admin); } struct sock *inet_diag_find_one_icsk(struct net *net, @@ -435,7 +447,8 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, err = sk_diag_fill(sk, rep, req, sk_user_ns(NETLINK_CB(in_skb).sk), NETLINK_CB(in_skb).portid, - nlh->nlmsg_seq, 0, nlh); + nlh->nlmsg_seq, 0, nlh, + netlink_net_capable(in_skb, CAP_NET_ADMIN)); if (err < 0) { WARN_ON(err == -EMSGSIZE); nlmsg_free(rep); @@ -796,7 +809,8 @@ static int inet_csk_diag_dump(struct sock *sk, struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r, - const struct nlattr *bc) + const struct nlattr *bc, + bool net_admin) { if (!inet_diag_bc_sk(bc, sk)) return 0; @@ -804,7 +818,8 @@ static int inet_csk_diag_dump(struct sock *sk, return inet_csk_diag_fill(sk, skb, r, sk_user_ns(NETLINK_CB(cb->skb).sk), NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); + cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh, + net_admin); } static void twsk_build_assert(void) @@ -840,6 +855,7 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, struct net *net = sock_net(skb->sk); int i, num, s_i, s_num; u32 idiag_states = r->idiag_states; + bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); if (idiag_states & TCPF_SYN_RECV) idiag_states |= TCPF_NEW_SYN_RECV; @@ -880,7 +896,8 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, cb->args[3] > 0) goto next_listen; - if (inet_csk_diag_dump(sk, skb, cb, r, bc) < 0) { + if (inet_csk_diag_dump(sk, skb, cb, r, + bc, net_admin) < 0) { spin_unlock_bh(&ilb->lock); goto done; } @@ -948,7 +965,7 @@ skip_listen_ht: sk_user_ns(NETLINK_CB(cb->skb).sk), NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - cb->nlh); + cb->nlh, net_admin); if (res < 0) { spin_unlock_bh(lock); goto done; diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 58b79c0c0d69..9a89c10a55f0 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -20,7 +20,7 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *req, - struct nlattr *bc) + struct nlattr *bc, bool net_admin) { if (!inet_diag_bc_sk(bc, sk)) return 0; @@ -28,7 +28,7 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, return inet_sk_diag_fill(sk, NULL, skb, req, sk_user_ns(NETLINK_CB(cb->skb).sk), NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); + cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh, net_admin); } static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, @@ -76,7 +76,8 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, err = inet_sk_diag_fill(sk, NULL, rep, req, sk_user_ns(NETLINK_CB(in_skb).sk), NETLINK_CB(in_skb).portid, - nlh->nlmsg_seq, 0, nlh); + nlh->nlmsg_seq, 0, nlh, + netlink_net_capable(in_skb, CAP_NET_ADMIN)); if (err < 0) { WARN_ON(err == -EMSGSIZE); kfree_skb(rep); @@ -97,6 +98,7 @@ static void udp_dump(struct udp_table *table, struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r, struct nlattr *bc) { + bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); struct net *net = sock_net(skb->sk); int num, s_num, slot, s_slot; @@ -132,7 +134,7 @@ static void udp_dump(struct udp_table *table, struct sk_buff *skb, r->id.idiag_dport) goto next; - if (sk_diag_dump(sk, skb, cb, r, bc) < 0) { + if (sk_diag_dump(sk, skb, cb, r, bc, net_admin) < 0) { spin_unlock_bh(&hslot->lock); goto done; } diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c index f3508aa75815..807158e32f5f 100644 --- a/net/sctp/sctp_diag.c +++ b/net/sctp/sctp_diag.c @@ -106,7 +106,8 @@ static int inet_sctp_diag_fill(struct sock *sk, struct sctp_association *asoc, const struct inet_diag_req_v2 *req, struct user_namespace *user_ns, int portid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh) + const struct nlmsghdr *unlh, + bool net_admin) { struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct list_head *addr_list; @@ -133,7 +134,7 @@ static int inet_sctp_diag_fill(struct sock *sk, struct sctp_association *asoc, r->idiag_retrans = 0; } - if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns)) + if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns, net_admin)) goto errout; if (ext & (1 << (INET_DIAG_SKMEMINFO - 1))) { @@ -203,6 +204,7 @@ struct sctp_comm_param { struct netlink_callback *cb; const struct inet_diag_req_v2 *r; const struct nlmsghdr *nlh; + bool net_admin; }; static size_t inet_assoc_attr_size(struct sctp_association *asoc) @@ -219,6 +221,7 @@ static size_t inet_assoc_attr_size(struct sctp_association *asoc) + nla_total_size(1) /* INET_DIAG_SHUTDOWN */ + nla_total_size(1) /* INET_DIAG_TOS */ + nla_total_size(1) /* INET_DIAG_TCLASS */ + + nla_total_size(4) /* INET_DIAG_MARK */ + nla_total_size(addrlen * asoc->peer.transport_count) + nla_total_size(addrlen * addrcnt) + nla_total_size(sizeof(struct inet_diag_meminfo)) @@ -256,7 +259,8 @@ static int sctp_tsp_dump_one(struct sctp_transport *tsp, void *p) err = inet_sctp_diag_fill(sk, assoc, rep, req, sk_user_ns(NETLINK_CB(in_skb).sk), NETLINK_CB(in_skb).portid, - nlh->nlmsg_seq, 0, nlh); + nlh->nlmsg_seq, 0, nlh, + commp->net_admin); release_sock(sk); if (err < 0) { WARN_ON(err == -EMSGSIZE); @@ -310,7 +314,8 @@ static int sctp_tsp_dump(struct sctp_transport *tsp, void *p) sk_user_ns(NETLINK_CB(cb->skb).sk), NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - NLM_F_MULTI, cb->nlh) < 0) { + NLM_F_MULTI, cb->nlh, + commp->net_admin) < 0) { cb->args[3] = 1; err = 2; goto release; @@ -320,7 +325,8 @@ static int sctp_tsp_dump(struct sctp_transport *tsp, void *p) if (inet_sctp_diag_fill(sk, assoc, skb, r, sk_user_ns(NETLINK_CB(cb->skb).sk), NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, 0, cb->nlh) < 0) { + cb->nlh->nlmsg_seq, 0, cb->nlh, + commp->net_admin) < 0) { err = 2; goto release; } @@ -375,7 +381,7 @@ static int sctp_ep_dump(struct sctp_endpoint *ep, void *p) sk_user_ns(NETLINK_CB(cb->skb).sk), NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - cb->nlh) < 0) { + cb->nlh, commp->net_admin) < 0) { err = 2; goto out; } @@ -412,6 +418,7 @@ static int sctp_diag_dump_one(struct sk_buff *in_skb, .skb = in_skb, .r = req, .nlh = nlh, + .net_admin = netlink_net_capable(in_skb, CAP_NET_ADMIN), }; if (req->sdiag_family == AF_INET) { @@ -447,6 +454,7 @@ static void sctp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, .skb = skb, .cb = cb, .r = r, + .net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN), }; /* eps hashtable dumps From 8c146bb9d59aa2ac45222171916ece186c4b3943 Mon Sep 17 00:00:00 2001 From: Thomas F Herbert Date: Wed, 7 Sep 2016 12:56:57 -0400 Subject: [PATCH 0822/1715] openvswitch: 802.1ad uapi changes. openvswitch: Add support for 8021.AD Change the description of the VLAN tpid field. Signed-off-by: Thomas F Herbert Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 54c3b4f4aceb..59ed3992c760 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -605,13 +605,13 @@ struct ovs_action_push_mpls { * @vlan_tci: Tag control identifier (TCI) to push. The CFI bit must be set * (but it will not be set in the 802.1Q header that is pushed). * - * The @vlan_tpid value is typically %ETH_P_8021Q. The only acceptable TPID - * values are those that the kernel module also parses as 802.1Q headers, to - * prevent %OVS_ACTION_ATTR_PUSH_VLAN followed by %OVS_ACTION_ATTR_POP_VLAN - * from having surprising results. + * The @vlan_tpid value is typically %ETH_P_8021Q or %ETH_P_8021AD. + * The only acceptable TPID values are those that the kernel module also parses + * as 802.1Q or 802.1AD headers, to prevent %OVS_ACTION_ATTR_PUSH_VLAN followed + * by %OVS_ACTION_ATTR_POP_VLAN from having surprising results. */ struct ovs_action_push_vlan { - __be16 vlan_tpid; /* 802.1Q TPID. */ + __be16 vlan_tpid; /* 802.1Q or 802.1ad TPID. */ __be16 vlan_tci; /* 802.1Q TCI (VLAN ID and priority). */ }; @@ -721,9 +721,10 @@ enum ovs_nat_attr { * is copied from the value to the packet header field, rest of the bits are * left unchanged. The non-masked value bits must be passed in as zeroes. * Masking is not supported for the %OVS_KEY_ATTR_TUNNEL attribute. - * @OVS_ACTION_ATTR_PUSH_VLAN: Push a new outermost 802.1Q header onto the - * packet. - * @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q header off the packet. + * @OVS_ACTION_ATTR_PUSH_VLAN: Push a new outermost 802.1Q or 802.1ad header + * onto the packet. + * @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q or 802.1ad header + * from the packet. * @OVS_ACTION_ATTR_SAMPLE: Probabilitically executes actions, as specified in * the nested %OVS_SAMPLE_ATTR_* attributes. * @OVS_ACTION_ATTR_PUSH_MPLS: Push a new MPLS label stack entry onto the From fe19c4f971a55cea3be442d8032a5f6021702791 Mon Sep 17 00:00:00 2001 From: Eric Garver Date: Wed, 7 Sep 2016 12:56:58 -0400 Subject: [PATCH 0823/1715] vlan: Check for vlan ethernet types for 8021.q or 802.1ad This is to simplify using double tagged vlans. This function allows all valid vlan ethertypes to be checked in a single function call. Also replace some instances that check for both ETH_P_8021Q and ETH_P_8021AD. Patch based on one originally by Thomas F Herbert. Signed-off-by: Thomas F Herbert Signed-off-by: Eric Garver Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 49d4aef1f789..3319d97d789d 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -272,6 +272,23 @@ static inline int vlan_get_encap_level(struct net_device *dev) } #endif +/** + * eth_type_vlan - check for valid vlan ether type. + * @ethertype: ether type to check + * + * Returns true if the ether type is a vlan ether type. + */ +static inline bool eth_type_vlan(__be16 ethertype) +{ + switch (ethertype) { + case htons(ETH_P_8021Q): + case htons(ETH_P_8021AD): + return true; + default: + return false; + } +} + static inline bool vlan_hw_offload_capable(netdev_features_t features, __be16 proto) { @@ -425,8 +442,7 @@ static inline int __vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci) { struct vlan_ethhdr *veth = (struct vlan_ethhdr *)skb->data; - if (veth->h_vlan_proto != htons(ETH_P_8021Q) && - veth->h_vlan_proto != htons(ETH_P_8021AD)) + if (!eth_type_vlan(veth->h_vlan_proto)) return -EINVAL; *vlan_tci = ntohs(veth->h_vlan_TCI); @@ -488,7 +504,7 @@ static inline __be16 __vlan_get_protocol(struct sk_buff *skb, __be16 type, * present at mac_len - VLAN_HLEN (if mac_len > 0), or at * ETH_HLEN otherwise */ - if (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) { + if (eth_type_vlan(type)) { if (vlan_depth) { if (WARN_ON(vlan_depth < VLAN_HLEN)) return 0; @@ -506,8 +522,7 @@ static inline __be16 __vlan_get_protocol(struct sk_buff *skb, __be16 type, vh = (struct vlan_hdr *)(skb->data + vlan_depth); type = vh->h_vlan_encapsulated_proto; vlan_depth += VLAN_HLEN; - } while (type == htons(ETH_P_8021Q) || - type == htons(ETH_P_8021AD)); + } while (eth_type_vlan(type)); } if (depth) @@ -572,8 +587,7 @@ static inline void vlan_set_encap_proto(struct sk_buff *skb, static inline bool skb_vlan_tagged(const struct sk_buff *skb) { if (!skb_vlan_tag_present(skb) && - likely(skb->protocol != htons(ETH_P_8021Q) && - skb->protocol != htons(ETH_P_8021AD))) + likely(!eth_type_vlan(skb->protocol))) return false; return true; @@ -593,15 +607,14 @@ static inline bool skb_vlan_tagged_multi(const struct sk_buff *skb) if (!skb_vlan_tag_present(skb)) { struct vlan_ethhdr *veh; - if (likely(protocol != htons(ETH_P_8021Q) && - protocol != htons(ETH_P_8021AD))) + if (likely(!eth_type_vlan(protocol))) return false; veh = (struct vlan_ethhdr *)skb->data; protocol = veh->h_vlan_encapsulated_proto; } - if (protocol != htons(ETH_P_8021Q) && protocol != htons(ETH_P_8021AD)) + if (!eth_type_vlan(protocol)) return false; return true; From 018c1dda5ff1e7bd1fe2d9fd1d0f5b82dc6fc0cd Mon Sep 17 00:00:00 2001 From: Eric Garver Date: Wed, 7 Sep 2016 12:56:59 -0400 Subject: [PATCH 0824/1715] openvswitch: 802.1AD Flow handling, actions, vlan parsing, netlink attributes Add support for 802.1ad including the ability to push and pop double tagged vlans. Add support for 802.1ad to netlink parsing and flow conversion. Uses double nested encap attributes to represent double tagged vlan. Inner TPID encoded along with ctci in nested attributes. This is based on Thomas F Herbert's original v20 patch. I made some small clean ups and bug fixes. Signed-off-by: Thomas F Herbert Signed-off-by: Eric Garver Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/actions.c | 16 +- net/openvswitch/flow.c | 65 +++++-- net/openvswitch/flow.h | 8 +- net/openvswitch/flow_netlink.c | 310 ++++++++++++++++++++++----------- net/openvswitch/vport.c | 7 +- 5 files changed, 282 insertions(+), 124 deletions(-) diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index ca91fc33f8a9..4fe9032b1160 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -246,20 +246,24 @@ static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key) int err; err = skb_vlan_pop(skb); - if (skb_vlan_tag_present(skb)) + if (skb_vlan_tag_present(skb)) { invalidate_flow_key(key); - else - key->eth.tci = 0; + } else { + key->eth.vlan.tci = 0; + key->eth.vlan.tpid = 0; + } return err; } static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key, const struct ovs_action_push_vlan *vlan) { - if (skb_vlan_tag_present(skb)) + if (skb_vlan_tag_present(skb)) { invalidate_flow_key(key); - else - key->eth.tci = vlan->vlan_tci; + } else { + key->eth.vlan.tci = vlan->vlan_tci; + key->eth.vlan.tpid = vlan->vlan_tpid; + } return skb_vlan_push(skb, vlan->vlan_tpid, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT); } diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 0ea128eeeab2..1240ae3b88d2 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -302,24 +302,57 @@ static bool icmp6hdr_ok(struct sk_buff *skb) sizeof(struct icmp6hdr)); } -static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key) +/** + * Parse vlan tag from vlan header. + * Returns ERROR on memory error. + * Returns 0 if it encounters a non-vlan or incomplete packet. + * Returns 1 after successfully parsing vlan tag. + */ +static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh) { - struct qtag_prefix { - __be16 eth_type; /* ETH_P_8021Q */ - __be16 tci; - }; - struct qtag_prefix *qp; + struct vlan_head *vh = (struct vlan_head *)skb->data; - if (unlikely(skb->len < sizeof(struct qtag_prefix) + sizeof(__be16))) + if (likely(!eth_type_vlan(vh->tpid))) return 0; - if (unlikely(!pskb_may_pull(skb, sizeof(struct qtag_prefix) + - sizeof(__be16)))) + if (unlikely(skb->len < sizeof(struct vlan_head) + sizeof(__be16))) + return 0; + + if (unlikely(!pskb_may_pull(skb, sizeof(struct vlan_head) + + sizeof(__be16)))) return -ENOMEM; - qp = (struct qtag_prefix *) skb->data; - key->eth.tci = qp->tci | htons(VLAN_TAG_PRESENT); - __skb_pull(skb, sizeof(struct qtag_prefix)); + vh = (struct vlan_head *)skb->data; + key_vh->tci = vh->tci | htons(VLAN_TAG_PRESENT); + key_vh->tpid = vh->tpid; + + __skb_pull(skb, sizeof(struct vlan_head)); + return 1; +} + +static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key) +{ + int res; + + key->eth.vlan.tci = 0; + key->eth.vlan.tpid = 0; + key->eth.cvlan.tci = 0; + key->eth.cvlan.tpid = 0; + + if (likely(skb_vlan_tag_present(skb))) { + key->eth.vlan.tci = htons(skb->vlan_tci); + key->eth.vlan.tpid = skb->vlan_proto; + } else { + /* Parse outer vlan tag in the non-accelerated case. */ + res = parse_vlan_tag(skb, &key->eth.vlan); + if (res <= 0) + return res; + } + + /* Parse inner vlan tag. */ + res = parse_vlan_tag(skb, &key->eth.cvlan); + if (res <= 0) + return res; return 0; } @@ -480,12 +513,8 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) * update skb->csum here. */ - key->eth.tci = 0; - if (skb_vlan_tag_present(skb)) - key->eth.tci = htons(skb->vlan_tci); - else if (eth->h_proto == htons(ETH_P_8021Q)) - if (unlikely(parse_vlan(skb, key))) - return -ENOMEM; + if (unlikely(parse_vlan(skb, key))) + return -ENOMEM; key->eth.type = parse_ethertype(skb); if (unlikely(key->eth.type == htons(0))) diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 03378e75a67c..156a3029c17b 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -50,6 +50,11 @@ struct ovs_tunnel_info { struct metadata_dst *tun_dst; }; +struct vlan_head { + __be16 tpid; /* Vlan type. Generally 802.1q or 802.1ad.*/ + __be16 tci; /* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */ +}; + #define OVS_SW_FLOW_KEY_METADATA_SIZE \ (offsetof(struct sw_flow_key, recirc_id) + \ FIELD_SIZEOF(struct sw_flow_key, recirc_id)) @@ -69,7 +74,8 @@ struct sw_flow_key { struct { u8 src[ETH_ALEN]; /* Ethernet source address. */ u8 dst[ETH_ALEN]; /* Ethernet destination address. */ - __be16 tci; /* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */ + struct vlan_head vlan; + struct vlan_head cvlan; __be16 type; /* Ethernet frame type. */ } eth; union { diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index c78a6a1476fb..8efa718ddb5e 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -808,6 +808,167 @@ int ovs_nla_put_tunnel_info(struct sk_buff *skb, ip_tunnel_info_af(tun_info)); } +static int encode_vlan_from_nlattrs(struct sw_flow_match *match, + const struct nlattr *a[], + bool is_mask, bool inner) +{ + __be16 tci = 0; + __be16 tpid = 0; + + if (a[OVS_KEY_ATTR_VLAN]) + tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); + + if (a[OVS_KEY_ATTR_ETHERTYPE]) + tpid = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); + + if (likely(!inner)) { + SW_FLOW_KEY_PUT(match, eth.vlan.tpid, tpid, is_mask); + SW_FLOW_KEY_PUT(match, eth.vlan.tci, tci, is_mask); + } else { + SW_FLOW_KEY_PUT(match, eth.cvlan.tpid, tpid, is_mask); + SW_FLOW_KEY_PUT(match, eth.cvlan.tci, tci, is_mask); + } + return 0; +} + +static int validate_vlan_from_nlattrs(const struct sw_flow_match *match, + u64 key_attrs, bool inner, + const struct nlattr **a, bool log) +{ + __be16 tci = 0; + + if (!((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) && + (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) && + eth_type_vlan(nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE])))) { + /* Not a VLAN. */ + return 0; + } + + if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) && + (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) { + OVS_NLERR(log, "Invalid %s frame", (inner) ? "C-VLAN" : "VLAN"); + return -EINVAL; + } + + if (a[OVS_KEY_ATTR_VLAN]) + tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); + + if (!(tci & htons(VLAN_TAG_PRESENT))) { + if (tci) { + OVS_NLERR(log, "%s TCI does not have VLAN_TAG_PRESENT bit set.", + (inner) ? "C-VLAN" : "VLAN"); + return -EINVAL; + } else if (nla_len(a[OVS_KEY_ATTR_ENCAP])) { + /* Corner case for truncated VLAN header. */ + OVS_NLERR(log, "Truncated %s header has non-zero encap attribute.", + (inner) ? "C-VLAN" : "VLAN"); + return -EINVAL; + } + } + + return 1; +} + +static int validate_vlan_mask_from_nlattrs(const struct sw_flow_match *match, + u64 key_attrs, bool inner, + const struct nlattr **a, bool log) +{ + __be16 tci = 0; + __be16 tpid = 0; + bool encap_valid = !!(match->key->eth.vlan.tci & + htons(VLAN_TAG_PRESENT)); + bool i_encap_valid = !!(match->key->eth.cvlan.tci & + htons(VLAN_TAG_PRESENT)); + + if (!(key_attrs & (1 << OVS_KEY_ATTR_ENCAP))) { + /* Not a VLAN. */ + return 0; + } + + if ((!inner && !encap_valid) || (inner && !i_encap_valid)) { + OVS_NLERR(log, "Encap mask attribute is set for non-%s frame.", + (inner) ? "C-VLAN" : "VLAN"); + return -EINVAL; + } + + if (a[OVS_KEY_ATTR_VLAN]) + tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); + + if (a[OVS_KEY_ATTR_ETHERTYPE]) + tpid = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); + + if (tpid != htons(0xffff)) { + OVS_NLERR(log, "Must have an exact match on %s TPID (mask=%x).", + (inner) ? "C-VLAN" : "VLAN", ntohs(tpid)); + return -EINVAL; + } + if (!(tci & htons(VLAN_TAG_PRESENT))) { + OVS_NLERR(log, "%s TCI mask does not have exact match for VLAN_TAG_PRESENT bit.", + (inner) ? "C-VLAN" : "VLAN"); + return -EINVAL; + } + + return 1; +} + +static int __parse_vlan_from_nlattrs(struct sw_flow_match *match, + u64 *key_attrs, bool inner, + const struct nlattr **a, bool is_mask, + bool log) +{ + int err; + const struct nlattr *encap; + + if (!is_mask) + err = validate_vlan_from_nlattrs(match, *key_attrs, inner, + a, log); + else + err = validate_vlan_mask_from_nlattrs(match, *key_attrs, inner, + a, log); + if (err <= 0) + return err; + + err = encode_vlan_from_nlattrs(match, a, is_mask, inner); + if (err) + return err; + + *key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP); + *key_attrs &= ~(1 << OVS_KEY_ATTR_VLAN); + *key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); + + encap = a[OVS_KEY_ATTR_ENCAP]; + + if (!is_mask) + err = parse_flow_nlattrs(encap, a, key_attrs, log); + else + err = parse_flow_mask_nlattrs(encap, a, key_attrs, log); + + return err; +} + +static int parse_vlan_from_nlattrs(struct sw_flow_match *match, + u64 *key_attrs, const struct nlattr **a, + bool is_mask, bool log) +{ + int err; + bool encap_valid = false; + + err = __parse_vlan_from_nlattrs(match, key_attrs, false, a, + is_mask, log); + if (err) + return err; + + encap_valid = !!(match->key->eth.vlan.tci & htons(VLAN_TAG_PRESENT)); + if (encap_valid) { + err = __parse_vlan_from_nlattrs(match, key_attrs, true, a, + is_mask, log); + if (err) + return err; + } + + return 0; +} + static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, u64 *attrs, const struct nlattr **a, bool is_mask, bool log) @@ -923,20 +1084,11 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match, } if (attrs & (1 << OVS_KEY_ATTR_VLAN)) { - __be16 tci; - - tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); - if (!(tci & htons(VLAN_TAG_PRESENT))) { - if (is_mask) - OVS_NLERR(log, "VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit."); - else - OVS_NLERR(log, "VLAN TCI does not have VLAN_TAG_PRESENT bit set."); - - return -EINVAL; - } - - SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask); - attrs &= ~(1 << OVS_KEY_ATTR_VLAN); + /* VLAN attribute is always parsed before getting here since it + * may occur multiple times. + */ + OVS_NLERR(log, "VLAN attribute unexpected."); + return -EINVAL; } if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) { @@ -1182,49 +1334,18 @@ int ovs_nla_get_match(struct net *net, struct sw_flow_match *match, bool log) { const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; - const struct nlattr *encap; struct nlattr *newmask = NULL; u64 key_attrs = 0; u64 mask_attrs = 0; - bool encap_valid = false; int err; err = parse_flow_nlattrs(nla_key, a, &key_attrs, log); if (err) return err; - if ((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) && - (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) && - (nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q))) { - __be16 tci; - - if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) && - (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) { - OVS_NLERR(log, "Invalid Vlan frame."); - return -EINVAL; - } - - key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); - tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); - encap = a[OVS_KEY_ATTR_ENCAP]; - key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP); - encap_valid = true; - - if (tci & htons(VLAN_TAG_PRESENT)) { - err = parse_flow_nlattrs(encap, a, &key_attrs, log); - if (err) - return err; - } else if (!tci) { - /* Corner case for truncated 802.1Q header. */ - if (nla_len(encap)) { - OVS_NLERR(log, "Truncated 802.1Q header has non-zero encap attribute."); - return -EINVAL; - } - } else { - OVS_NLERR(log, "Encap attr is set for non-VLAN frame"); - return -EINVAL; - } - } + err = parse_vlan_from_nlattrs(match, &key_attrs, a, false, log); + if (err) + return err; err = ovs_key_from_nlattrs(net, match, key_attrs, a, false, log); if (err) @@ -1265,46 +1386,12 @@ int ovs_nla_get_match(struct net *net, struct sw_flow_match *match, goto free_newmask; /* Always match on tci. */ - SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true); + SW_FLOW_KEY_PUT(match, eth.vlan.tci, htons(0xffff), true); + SW_FLOW_KEY_PUT(match, eth.cvlan.tci, htons(0xffff), true); - if (mask_attrs & 1 << OVS_KEY_ATTR_ENCAP) { - __be16 eth_type = 0; - __be16 tci = 0; - - if (!encap_valid) { - OVS_NLERR(log, "Encap mask attribute is set for non-VLAN frame."); - err = -EINVAL; - goto free_newmask; - } - - mask_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP); - if (a[OVS_KEY_ATTR_ETHERTYPE]) - eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); - - if (eth_type == htons(0xffff)) { - mask_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); - encap = a[OVS_KEY_ATTR_ENCAP]; - err = parse_flow_mask_nlattrs(encap, a, - &mask_attrs, log); - if (err) - goto free_newmask; - } else { - OVS_NLERR(log, "VLAN frames must have an exact match on the TPID (mask=%x).", - ntohs(eth_type)); - err = -EINVAL; - goto free_newmask; - } - - if (a[OVS_KEY_ATTR_VLAN]) - tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); - - if (!(tci & htons(VLAN_TAG_PRESENT))) { - OVS_NLERR(log, "VLAN tag present bit must have an exact match (tci_mask=%x).", - ntohs(tci)); - err = -EINVAL; - goto free_newmask; - } - } + err = parse_vlan_from_nlattrs(match, &mask_attrs, a, true, log); + if (err) + goto free_newmask; err = ovs_key_from_nlattrs(net, match, mask_attrs, a, true, log); @@ -1410,12 +1497,25 @@ int ovs_nla_get_flow_metadata(struct net *net, const struct nlattr *attr, return metadata_from_nlattrs(net, &match, &attrs, a, false, log); } +static int ovs_nla_put_vlan(struct sk_buff *skb, const struct vlan_head *vh, + bool is_mask) +{ + __be16 eth_type = !is_mask ? vh->tpid : htons(0xffff); + + if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) || + nla_put_be16(skb, OVS_KEY_ATTR_VLAN, vh->tci)) + return -EMSGSIZE; + return 0; +} + static int __ovs_nla_put_key(const struct sw_flow_key *swkey, const struct sw_flow_key *output, bool is_mask, struct sk_buff *skb) { struct ovs_key_ethernet *eth_key; - struct nlattr *nla, *encap; + struct nlattr *nla; + struct nlattr *encap = NULL; + struct nlattr *in_encap = NULL; if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id)) goto nla_put_failure; @@ -1464,17 +1564,21 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, ether_addr_copy(eth_key->eth_src, output->eth.src); ether_addr_copy(eth_key->eth_dst, output->eth.dst); - if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) { - __be16 eth_type; - eth_type = !is_mask ? htons(ETH_P_8021Q) : htons(0xffff); - if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) || - nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci)) + if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) { + if (ovs_nla_put_vlan(skb, &output->eth.vlan, is_mask)) goto nla_put_failure; encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); - if (!swkey->eth.tci) + if (!swkey->eth.vlan.tci) goto unencap; - } else - encap = NULL; + + if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) { + if (ovs_nla_put_vlan(skb, &output->eth.cvlan, is_mask)) + goto nla_put_failure; + in_encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); + if (!swkey->eth.cvlan.tci) + goto unencap; + } + } if (swkey->eth.type == htons(ETH_P_802_2)) { /* @@ -1493,6 +1597,14 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type)) goto nla_put_failure; + if (eth_type_vlan(swkey->eth.type)) { + /* There are 3 VLAN tags, we don't know anything about the rest + * of the packet, so truncate here. + */ + WARN_ON_ONCE(!(encap && in_encap)); + goto unencap; + } + if (swkey->eth.type == htons(ETH_P_IP)) { struct ovs_key_ipv4 *ipv4_key; @@ -1619,6 +1731,8 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, } unencap: + if (in_encap) + nla_nest_end(skb, in_encap); if (encap) nla_nest_end(skb, encap); @@ -2283,7 +2397,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, case OVS_ACTION_ATTR_PUSH_VLAN: vlan = nla_data(a); - if (vlan->vlan_tpid != htons(ETH_P_8021Q)) + if (!eth_type_vlan(vlan->vlan_tpid)) return -EINVAL; if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT))) return -EINVAL; @@ -2388,7 +2502,7 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, (*sfa)->orig_len = nla_len(attr); err = __ovs_nla_copy_actions(net, attr, key, 0, sfa, key->eth.type, - key->eth.tci, log); + key->eth.vlan.tci, log); if (err) ovs_nla_free_flow_actions(*sfa); diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 6b21fd068d87..8f198437c724 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -485,9 +485,14 @@ static unsigned int packet_length(const struct sk_buff *skb) { unsigned int length = skb->len - ETH_HLEN; - if (skb->protocol == htons(ETH_P_8021Q)) + if (skb_vlan_tagged(skb)) length -= VLAN_HLEN; + /* Don't subtract for multiple VLAN tags. Most (all?) drivers allow + * (ETH_LEN + VLAN_HLEN) in addition to the mtu value, but almost none + * account for 802.1ad. e.g. is_skb_forwardable(). + */ + return length; } From 9f5afeae51526b3ad7b7cb21ee8b145ce6ea7a7a Mon Sep 17 00:00:00 2001 From: Yaogong Wang Date: Wed, 7 Sep 2016 14:49:28 -0700 Subject: [PATCH 0825/1715] tcp: use an RB tree for ooo receive queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Over the years, TCP BDP has increased by several orders of magnitude, and some people are considering to reach the 2 Gbytes limit. Even with current window scale limit of 14, ~1 Gbytes maps to ~740,000 MSS. In presence of packet losses (or reorders), TCP stores incoming packets into an out of order queue, and number of skbs sitting there waiting for the missing packets to be received can be in the 10^5 range. Most packets are appended to the tail of this queue, and when packets can finally be transferred to receive queue, we scan the queue from its head. However, in presence of heavy losses, we might have to find an arbitrary point in this queue, involving a linear scan for every incoming packet, throwing away cpu caches. This patch converts it to a RB tree, to get bounded latencies. Yaogong wrote a preliminary patch about 2 years ago. Eric did the rebase, added ofo_last_skb cache, polishing and tests. Tested with network dropping between 1 and 10 % packets, with good success (about 30 % increase of throughput in stress tests) Next step would be to also use an RB tree for the write queue at sender side ;) Signed-off-by: Yaogong Wang Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Cc: Neal Cardwell Cc: Ilpo Järvinen Acked-By: Ilpo Järvinen Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 + include/linux/tcp.h | 7 +- include/net/tcp.h | 2 +- net/core/skbuff.c | 19 +++ net/ipv4/tcp.c | 4 +- net/ipv4/tcp_input.c | 332 ++++++++++++++++++++++----------------- net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/tcp_minisocks.c | 1 - 8 files changed, 219 insertions(+), 150 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index cfb7219be665..4c5662f05bda 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2402,6 +2402,8 @@ static inline void __skb_queue_purge(struct sk_buff_head *list) kfree_skb(skb); } +void skb_rbtree_purge(struct rb_root *root); + void *netdev_alloc_frag(unsigned int fragsz); struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int length, diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 7be9b1242354..c723a465125d 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -281,10 +281,9 @@ struct tcp_sock { struct sk_buff* lost_skb_hint; struct sk_buff *retransmit_skb_hint; - /* OOO segments go in this list. Note that socket lock must be held, - * as we do not use sk_buff_head lock. - */ - struct sk_buff_head out_of_order_queue; + /* OOO segments go in this rbtree. Socket lock must be held. */ + struct rb_root out_of_order_queue; + struct sk_buff *ooo_last_skb; /* cache rb_last(out_of_order_queue) */ /* SACKs data, these 2 need to be together (see tcp_options_write) */ struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */ diff --git a/include/net/tcp.h b/include/net/tcp.h index d6ae36512429..fdfbedd61c67 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -640,7 +640,7 @@ static inline void tcp_fast_path_check(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - if (skb_queue_empty(&tp->out_of_order_queue) && + if (RB_EMPTY_ROOT(&tp->out_of_order_queue) && tp->rcv_wnd && atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf && !tp->urg_data) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3864b4b68fa1..1e329d411242 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2444,6 +2444,25 @@ void skb_queue_purge(struct sk_buff_head *list) } EXPORT_SYMBOL(skb_queue_purge); +/** + * skb_rbtree_purge - empty a skb rbtree + * @root: root of the rbtree to empty + * + * Delete all buffers on an &sk_buff rbtree. Each buffer is removed from + * the list and one reference dropped. This function does not take + * any lock. Synchronization should be handled by the caller (e.g., TCP + * out-of-order queue is protected by the socket lock). + */ +void skb_rbtree_purge(struct rb_root *root) +{ + struct sk_buff *skb, *next; + + rbtree_postorder_for_each_entry_safe(skb, next, root, rbnode) + kfree_skb(skb); + + *root = RB_ROOT; +} + /** * skb_queue_head - queue a buffer at the list head * @list: list to use diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 77311a92275c..a13fcb369f52 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -380,7 +380,7 @@ void tcp_init_sock(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - __skb_queue_head_init(&tp->out_of_order_queue); + tp->out_of_order_queue = RB_ROOT; tcp_init_xmit_timers(sk); tcp_prequeue_init(tp); INIT_LIST_HEAD(&tp->tsq_node); @@ -2243,7 +2243,7 @@ int tcp_disconnect(struct sock *sk, int flags) tcp_clear_xmit_timers(sk); __skb_queue_purge(&sk->sk_receive_queue); tcp_write_queue_purge(sk); - __skb_queue_purge(&tp->out_of_order_queue); + skb_rbtree_purge(&tp->out_of_order_queue); inet->inet_dport = 0; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8cd02c0b056c..a5934c4c8cd4 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4108,7 +4108,7 @@ void tcp_fin(struct sock *sk) /* It _is_ possible, that we have something out-of-order _after_ FIN. * Probably, we should reset in this case. For now drop them. */ - __skb_queue_purge(&tp->out_of_order_queue); + skb_rbtree_purge(&tp->out_of_order_queue); if (tcp_is_sack(tp)) tcp_sack_reset(&tp->rx_opt); sk_mem_reclaim(sk); @@ -4268,7 +4268,7 @@ static void tcp_sack_remove(struct tcp_sock *tp) int this_sack; /* Empty ofo queue, hence, all the SACKs are eaten. Clear. */ - if (skb_queue_empty(&tp->out_of_order_queue)) { + if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) { tp->rx_opt.num_sacks = 0; return; } @@ -4344,10 +4344,13 @@ static void tcp_ofo_queue(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); __u32 dsack_high = tp->rcv_nxt; + bool fin, fragstolen, eaten; struct sk_buff *skb, *tail; - bool fragstolen, eaten; + struct rb_node *p; - while ((skb = skb_peek(&tp->out_of_order_queue)) != NULL) { + p = rb_first(&tp->out_of_order_queue); + while (p) { + skb = rb_entry(p, struct sk_buff, rbnode); if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) break; @@ -4357,9 +4360,10 @@ static void tcp_ofo_queue(struct sock *sk) dsack_high = TCP_SKB_CB(skb)->end_seq; tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack); } + p = rb_next(p); + rb_erase(&skb->rbnode, &tp->out_of_order_queue); - __skb_unlink(skb, &tp->out_of_order_queue); - if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) { + if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) { SOCK_DEBUG(sk, "ofo packet was already received\n"); tcp_drop(sk, skb); continue; @@ -4371,12 +4375,19 @@ static void tcp_ofo_queue(struct sock *sk) tail = skb_peek_tail(&sk->sk_receive_queue); eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen); tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); + fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN; if (!eaten) __skb_queue_tail(&sk->sk_receive_queue, skb); - if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) - tcp_fin(sk); - if (eaten) + else kfree_skb_partial(skb, fragstolen); + + if (unlikely(fin)) { + tcp_fin(sk); + /* tcp_fin() purges tp->out_of_order_queue, + * so we must end this loop right now. + */ + break; + } } } @@ -4403,8 +4414,10 @@ static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb, static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); + struct rb_node **p, *q, *parent; struct sk_buff *skb1; u32 seq, end_seq; + bool fragstolen; tcp_ecn_check_ce(tp, skb); @@ -4419,88 +4432,85 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) inet_csk_schedule_ack(sk); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE); + seq = TCP_SKB_CB(skb)->seq; + end_seq = TCP_SKB_CB(skb)->end_seq; SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n", - tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); + tp->rcv_nxt, seq, end_seq); - skb1 = skb_peek_tail(&tp->out_of_order_queue); - if (!skb1) { + p = &tp->out_of_order_queue.rb_node; + if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) { /* Initial out of order segment, build 1 SACK. */ if (tcp_is_sack(tp)) { tp->rx_opt.num_sacks = 1; - tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq; - tp->selective_acks[0].end_seq = - TCP_SKB_CB(skb)->end_seq; + tp->selective_acks[0].start_seq = seq; + tp->selective_acks[0].end_seq = end_seq; } - __skb_queue_head(&tp->out_of_order_queue, skb); + rb_link_node(&skb->rbnode, NULL, p); + rb_insert_color(&skb->rbnode, &tp->out_of_order_queue); + tp->ooo_last_skb = skb; goto end; } - seq = TCP_SKB_CB(skb)->seq; - end_seq = TCP_SKB_CB(skb)->end_seq; - - if (seq == TCP_SKB_CB(skb1)->end_seq) { - bool fragstolen; - - if (!tcp_try_coalesce(sk, skb1, skb, &fragstolen)) { - __skb_queue_after(&tp->out_of_order_queue, skb1, skb); - } else { - tcp_grow_window(sk, skb); - kfree_skb_partial(skb, fragstolen); - skb = NULL; - } - - if (!tp->rx_opt.num_sacks || - tp->selective_acks[0].end_seq != seq) - goto add_sack; - - /* Common case: data arrive in order after hole. */ - tp->selective_acks[0].end_seq = end_seq; - goto end; + /* In the typical case, we are adding an skb to the end of the list. + * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup. + */ + if (tcp_try_coalesce(sk, tp->ooo_last_skb, skb, &fragstolen)) { +coalesce_done: + tcp_grow_window(sk, skb); + kfree_skb_partial(skb, fragstolen); + skb = NULL; + goto add_sack; } - /* Find place to insert this segment. */ - while (1) { - if (!after(TCP_SKB_CB(skb1)->seq, seq)) - break; - if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) { - skb1 = NULL; - break; + /* Find place to insert this segment. Handle overlaps on the way. */ + parent = NULL; + while (*p) { + parent = *p; + skb1 = rb_entry(parent, struct sk_buff, rbnode); + if (before(seq, TCP_SKB_CB(skb1)->seq)) { + p = &parent->rb_left; + continue; } - skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1); + if (before(seq, TCP_SKB_CB(skb1)->end_seq)) { + if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) { + /* All the bits are present. Drop. */ + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPOFOMERGE); + __kfree_skb(skb); + skb = NULL; + tcp_dsack_set(sk, seq, end_seq); + goto add_sack; + } + if (after(seq, TCP_SKB_CB(skb1)->seq)) { + /* Partial overlap. */ + tcp_dsack_set(sk, seq, TCP_SKB_CB(skb1)->end_seq); + } else { + /* skb's seq == skb1's seq and skb covers skb1. + * Replace skb1 with skb. + */ + rb_replace_node(&skb1->rbnode, &skb->rbnode, + &tp->out_of_order_queue); + tcp_dsack_extend(sk, + TCP_SKB_CB(skb1)->seq, + TCP_SKB_CB(skb1)->end_seq); + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPOFOMERGE); + __kfree_skb(skb1); + goto add_sack; + } + } else if (tcp_try_coalesce(sk, skb1, skb, &fragstolen)) { + goto coalesce_done; + } + p = &parent->rb_right; } - /* Do skb overlap to previous one? */ - if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) { - if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) { - /* All the bits are present. Drop. */ - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE); - tcp_drop(sk, skb); - skb = NULL; - tcp_dsack_set(sk, seq, end_seq); - goto add_sack; - } - if (after(seq, TCP_SKB_CB(skb1)->seq)) { - /* Partial overlap. */ - tcp_dsack_set(sk, seq, - TCP_SKB_CB(skb1)->end_seq); - } else { - if (skb_queue_is_first(&tp->out_of_order_queue, - skb1)) - skb1 = NULL; - else - skb1 = skb_queue_prev( - &tp->out_of_order_queue, - skb1); - } - } - if (!skb1) - __skb_queue_head(&tp->out_of_order_queue, skb); - else - __skb_queue_after(&tp->out_of_order_queue, skb1, skb); + /* Insert segment into RB tree. */ + rb_link_node(&skb->rbnode, parent, p); + rb_insert_color(&skb->rbnode, &tp->out_of_order_queue); - /* And clean segments covered by new one as whole. */ - while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) { - skb1 = skb_queue_next(&tp->out_of_order_queue, skb); + /* Remove other segments covered by skb. */ + while ((q = rb_next(&skb->rbnode)) != NULL) { + skb1 = rb_entry(q, struct sk_buff, rbnode); if (!after(end_seq, TCP_SKB_CB(skb1)->seq)) break; @@ -4509,12 +4519,15 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) end_seq); break; } - __skb_unlink(skb1, &tp->out_of_order_queue); + rb_erase(&skb1->rbnode, &tp->out_of_order_queue); tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq, TCP_SKB_CB(skb1)->end_seq); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE); tcp_drop(sk, skb1); } + /* If there is no skb after us, we are the last_skb ! */ + if (!q) + tp->ooo_last_skb = skb; add_sack: if (tcp_is_sack(tp)) @@ -4651,13 +4664,13 @@ queue_and_out: if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) tcp_fin(sk); - if (!skb_queue_empty(&tp->out_of_order_queue)) { + if (!RB_EMPTY_ROOT(&tp->out_of_order_queue)) { tcp_ofo_queue(sk); /* RFC2581. 4.2. SHOULD send immediate ACK, when * gap in queue is filled. */ - if (skb_queue_empty(&tp->out_of_order_queue)) + if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) inet_csk(sk)->icsk_ack.pingpong = 0; } @@ -4711,48 +4724,76 @@ drop: tcp_data_queue_ofo(sk, skb); } -static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb, - struct sk_buff_head *list) +static struct sk_buff *tcp_skb_next(struct sk_buff *skb, struct sk_buff_head *list) { - struct sk_buff *next = NULL; + if (list) + return !skb_queue_is_last(list, skb) ? skb->next : NULL; - if (!skb_queue_is_last(list, skb)) - next = skb_queue_next(list, skb); + return rb_entry_safe(rb_next(&skb->rbnode), struct sk_buff, rbnode); +} + +static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb, + struct sk_buff_head *list, + struct rb_root *root) +{ + struct sk_buff *next = tcp_skb_next(skb, list); + + if (list) + __skb_unlink(skb, list); + else + rb_erase(&skb->rbnode, root); - __skb_unlink(skb, list); __kfree_skb(skb); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED); return next; } +/* Insert skb into rb tree, ordered by TCP_SKB_CB(skb)->seq */ +static void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + struct sk_buff *skb1; + + while (*p) { + parent = *p; + skb1 = rb_entry(parent, struct sk_buff, rbnode); + if (before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb1)->seq)) + p = &parent->rb_left; + else + p = &parent->rb_right; + } + rb_link_node(&skb->rbnode, parent, p); + rb_insert_color(&skb->rbnode, root); +} + /* Collapse contiguous sequence of skbs head..tail with * sequence numbers start..end. * - * If tail is NULL, this means until the end of the list. + * If tail is NULL, this means until the end of the queue. * * Segments with FIN/SYN are not collapsed (only because this * simplifies code) */ static void -tcp_collapse(struct sock *sk, struct sk_buff_head *list, - struct sk_buff *head, struct sk_buff *tail, - u32 start, u32 end) +tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct rb_root *root, + struct sk_buff *head, struct sk_buff *tail, u32 start, u32 end) { - struct sk_buff *skb, *n; + struct sk_buff *skb = head, *n; + struct sk_buff_head tmp; bool end_of_skbs; /* First, check that queue is collapsible and find - * the point where collapsing can be useful. */ - skb = head; + * the point where collapsing can be useful. + */ restart: - end_of_skbs = true; - skb_queue_walk_from_safe(list, skb, n) { - if (skb == tail) - break; + for (end_of_skbs = true; skb != NULL && skb != tail; skb = n) { + n = tcp_skb_next(skb, list); + /* No new bits? It is possible on ofo queue. */ if (!before(start, TCP_SKB_CB(skb)->end_seq)) { - skb = tcp_collapse_one(sk, skb, list); + skb = tcp_collapse_one(sk, skb, list, root); if (!skb) break; goto restart; @@ -4770,13 +4811,10 @@ restart: break; } - if (!skb_queue_is_last(list, skb)) { - struct sk_buff *next = skb_queue_next(list, skb); - if (next != tail && - TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(next)->seq) { - end_of_skbs = false; - break; - } + if (n && n != tail && + TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(n)->seq) { + end_of_skbs = false; + break; } /* Decided to skip this, advance start seq. */ @@ -4786,17 +4824,22 @@ restart: (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN))) return; + __skb_queue_head_init(&tmp); + while (before(start, end)) { int copy = min_t(int, SKB_MAX_ORDER(0, 0), end - start); struct sk_buff *nskb; nskb = alloc_skb(copy, GFP_ATOMIC); if (!nskb) - return; + break; memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start; - __skb_queue_before(list, skb, nskb); + if (list) + __skb_queue_before(list, skb, nskb); + else + __skb_queue_tail(&tmp, nskb); /* defer rbtree insertion */ skb_set_owner_r(nskb, sk); /* Copy data, releasing collapsed skbs. */ @@ -4814,14 +4857,17 @@ restart: start += size; } if (!before(start, TCP_SKB_CB(skb)->end_seq)) { - skb = tcp_collapse_one(sk, skb, list); + skb = tcp_collapse_one(sk, skb, list, root); if (!skb || skb == tail || (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN))) - return; + goto end; } } } +end: + skb_queue_walk_safe(&tmp, skb, n) + tcp_rbtree_insert(root, skb); } /* Collapse ofo queue. Algorithm: select contiguous sequence of skbs @@ -4830,43 +4876,43 @@ restart: static void tcp_collapse_ofo_queue(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb = skb_peek(&tp->out_of_order_queue); - struct sk_buff *head; + struct sk_buff *skb, *head; + struct rb_node *p; u32 start, end; - if (!skb) + p = rb_first(&tp->out_of_order_queue); + skb = rb_entry_safe(p, struct sk_buff, rbnode); +new_range: + if (!skb) { + p = rb_last(&tp->out_of_order_queue); + /* Note: This is possible p is NULL here. We do not + * use rb_entry_safe(), as ooo_last_skb is valid only + * if rbtree is not empty. + */ + tp->ooo_last_skb = rb_entry(p, struct sk_buff, rbnode); return; - + } start = TCP_SKB_CB(skb)->seq; end = TCP_SKB_CB(skb)->end_seq; - head = skb; - for (;;) { - struct sk_buff *next = NULL; + for (head = skb;;) { + skb = tcp_skb_next(skb, NULL); - if (!skb_queue_is_last(&tp->out_of_order_queue, skb)) - next = skb_queue_next(&tp->out_of_order_queue, skb); - skb = next; - - /* Segment is terminated when we see gap or when - * we are at the end of all the queue. */ + /* Range is terminated when we see a gap or when + * we are at the queue end. + */ if (!skb || after(TCP_SKB_CB(skb)->seq, end) || before(TCP_SKB_CB(skb)->end_seq, start)) { - tcp_collapse(sk, &tp->out_of_order_queue, + tcp_collapse(sk, NULL, &tp->out_of_order_queue, head, skb, start, end); - head = skb; - if (!skb) - break; - /* Start new segment */ - start = TCP_SKB_CB(skb)->seq; - end = TCP_SKB_CB(skb)->end_seq; - } else { - if (before(TCP_SKB_CB(skb)->seq, start)) - start = TCP_SKB_CB(skb)->seq; - if (after(TCP_SKB_CB(skb)->end_seq, end)) - end = TCP_SKB_CB(skb)->end_seq; + goto new_range; } + + if (unlikely(before(TCP_SKB_CB(skb)->seq, start))) + start = TCP_SKB_CB(skb)->seq; + if (after(TCP_SKB_CB(skb)->end_seq, end)) + end = TCP_SKB_CB(skb)->end_seq; } } @@ -4883,20 +4929,24 @@ static void tcp_collapse_ofo_queue(struct sock *sk) static bool tcp_prune_ofo_queue(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb; + struct rb_node *node, *prev; - if (skb_queue_empty(&tp->out_of_order_queue)) + if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) return false; NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED); - - while ((skb = __skb_dequeue_tail(&tp->out_of_order_queue)) != NULL) { - tcp_drop(sk, skb); + node = &tp->ooo_last_skb->rbnode; + do { + prev = rb_prev(node); + rb_erase(node, &tp->out_of_order_queue); + tcp_drop(sk, rb_entry(node, struct sk_buff, rbnode)); sk_mem_reclaim(sk); if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && !tcp_under_memory_pressure(sk)) break; - } + node = prev; + } while (node); + tp->ooo_last_skb = rb_entry(prev, struct sk_buff, rbnode); /* Reset SACK state. A conforming SACK implementation will * do the same at a timeout based retransmit. When a connection @@ -4930,7 +4980,7 @@ static int tcp_prune_queue(struct sock *sk) tcp_collapse_ofo_queue(sk); if (!skb_queue_empty(&sk->sk_receive_queue)) - tcp_collapse(sk, &sk->sk_receive_queue, + tcp_collapse(sk, &sk->sk_receive_queue, NULL, skb_peek(&sk->sk_receive_queue), NULL, tp->copied_seq, tp->rcv_nxt); @@ -5035,7 +5085,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) /* We ACK each frame or... */ tcp_in_quickack_mode(sk) || /* We have out of order data. */ - (ofo_possible && skb_peek(&tp->out_of_order_queue))) { + (ofo_possible && !RB_EMPTY_ROOT(&tp->out_of_order_queue))) { /* Then ack it now */ tcp_send_ack(sk); } else { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a75bf48d7950..04b989328558 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1845,7 +1845,7 @@ void tcp_v4_destroy_sock(struct sock *sk) tcp_write_queue_purge(sk); /* Cleans up our, hopefully empty, out_of_order_queue. */ - __skb_queue_purge(&tp->out_of_order_queue); + skb_rbtree_purge(&tp->out_of_order_queue); #ifdef CONFIG_TCP_MD5SIG /* Clean up the MD5 key list, if any */ diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 4b95ec4ed2c8..f63c73dc0acb 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -488,7 +488,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->snd_cwnd_cnt = 0; tcp_init_xmit_timers(newsk); - __skb_queue_head_init(&newtp->out_of_order_queue); newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1; newtp->rx_opt.saw_tstamp = 0; From 2d2be8cab26ed918e94d2deae89580003242a123 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 8 Sep 2016 01:03:42 +0200 Subject: [PATCH 0826/1715] bpf: fix range propagation on direct packet access LLVM can generate code that tests for direct packet access via skb->data/data_end in a way that currently gets rejected by the verifier, example: [...] 7: (61) r3 = *(u32 *)(r6 +80) 8: (61) r9 = *(u32 *)(r6 +76) 9: (bf) r2 = r9 10: (07) r2 += 54 11: (3d) if r3 >= r2 goto pc+12 R1=inv R2=pkt(id=0,off=54,r=0) R3=pkt_end R4=inv R6=ctx R9=pkt(id=0,off=0,r=0) R10=fp 12: (18) r4 = 0xffffff7a 14: (05) goto pc+430 [...] from 11 to 24: R1=inv R2=pkt(id=0,off=54,r=0) R3=pkt_end R4=inv R6=ctx R9=pkt(id=0,off=0,r=0) R10=fp 24: (7b) *(u64 *)(r10 -40) = r1 25: (b7) r1 = 0 26: (63) *(u32 *)(r6 +56) = r1 27: (b7) r2 = 40 28: (71) r8 = *(u8 *)(r9 +20) invalid access to packet, off=20 size=1, R9(id=0,off=0,r=0) The reason why this gets rejected despite a proper test is that we currently call find_good_pkt_pointers() only in case where we detect tests like rX > pkt_end, where rX is of type pkt(id=Y,off=Z,r=0) and derived, for example, from a register of type pkt(id=Y,off=0,r=0) pointing to skb->data. find_good_pkt_pointers() then fills the range in the current branch to pkt(id=Y,off=0,r=Z) on success. For above case, we need to extend that to recognize pkt_end >= rX pattern and mark the other branch that is taken on success with the appropriate pkt(id=Y,off=0,r=Z) type via find_good_pkt_pointers(). Since eBPF operates on BPF_JGT (>) and BPF_JGE (>=), these are the only two practical options to test for from what LLVM could have generated, since there's no such thing as BPF_JLT (<) or BPF_JLE (<=) that we would need to take into account as well. After the fix: [...] 7: (61) r3 = *(u32 *)(r6 +80) 8: (61) r9 = *(u32 *)(r6 +76) 9: (bf) r2 = r9 10: (07) r2 += 54 11: (3d) if r3 >= r2 goto pc+12 R1=inv R2=pkt(id=0,off=54,r=0) R3=pkt_end R4=inv R6=ctx R9=pkt(id=0,off=0,r=0) R10=fp 12: (18) r4 = 0xffffff7a 14: (05) goto pc+430 [...] from 11 to 24: R1=inv R2=pkt(id=0,off=54,r=54) R3=pkt_end R4=inv R6=ctx R9=pkt(id=0,off=0,r=54) R10=fp 24: (7b) *(u64 *)(r10 -40) = r1 25: (b7) r1 = 0 26: (63) *(u32 *)(r6 +56) = r1 27: (b7) r2 = 40 28: (71) r8 = *(u8 *)(r9 +20) 29: (bf) r1 = r8 30: (25) if r8 > 0x3c goto pc+47 R1=inv56 R2=imm40 R3=pkt_end R4=inv R6=ctx R8=inv56 R9=pkt(id=0,off=0,r=54) R10=fp 31: (b7) r1 = 1 [...] Verifier test cases are also added in this work, one that demonstrates the mentioned example here and one that tries a bad packet access for the current/fall-through branch (the one with types pkt(id=X,off=Y,r=0), pkt(id=X,off=0,r=0)), then a case with good and bad accesses, and two with both test variants (>, >=). Fixes: 969bf05eb3ce ("bpf: direct packet access") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 55 +++++++++++++------ samples/bpf/test_verifier.c | 102 ++++++++++++++++++++++++++++++++++++ 2 files changed, 142 insertions(+), 15 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 48c2705db22c..90493a66dddd 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1637,21 +1637,42 @@ static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn) return 0; } -static void find_good_pkt_pointers(struct verifier_env *env, - struct reg_state *dst_reg) +static void find_good_pkt_pointers(struct verifier_state *state, + const struct reg_state *dst_reg) { - struct verifier_state *state = &env->cur_state; struct reg_state *regs = state->regs, *reg; int i; - /* r2 = r3; - * r2 += 8 - * if (r2 > pkt_end) goto somewhere - * r2 == dst_reg, pkt_end == src_reg, - * r2=pkt(id=n,off=8,r=0) - * r3=pkt(id=n,off=0,r=0) - * find register r3 and mark its range as r3=pkt(id=n,off=0,r=8) - * so that range of bytes [r3, r3 + 8) is safe to access + + /* LLVM can generate two kind of checks: + * + * Type 1: + * + * r2 = r3; + * r2 += 8; + * if (r2 > pkt_end) goto + * + * + * Where: + * r2 == dst_reg, pkt_end == src_reg + * r2=pkt(id=n,off=8,r=0) + * r3=pkt(id=n,off=0,r=0) + * + * Type 2: + * + * r2 = r3; + * r2 += 8; + * if (pkt_end >= r2) goto + * + * + * Where: + * pkt_end == dst_reg, r2 == src_reg + * r2=pkt(id=n,off=8,r=0) + * r3=pkt(id=n,off=0,r=0) + * + * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8) + * so that range of bytes [r3, r3 + 8) is safe to access. */ + for (i = 0; i < MAX_BPF_REG; i++) if (regs[i].type == PTR_TO_PACKET && regs[i].id == dst_reg->id) regs[i].range = dst_reg->off; @@ -1668,8 +1689,8 @@ static void find_good_pkt_pointers(struct verifier_env *env, static int check_cond_jmp_op(struct verifier_env *env, struct bpf_insn *insn, int *insn_idx) { - struct reg_state *regs = env->cur_state.regs, *dst_reg; - struct verifier_state *other_branch; + struct verifier_state *other_branch, *this_branch = &env->cur_state; + struct reg_state *regs = this_branch->regs, *dst_reg; u8 opcode = BPF_OP(insn->code); int err; @@ -1750,13 +1771,17 @@ static int check_cond_jmp_op(struct verifier_env *env, } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT && dst_reg->type == PTR_TO_PACKET && regs[insn->src_reg].type == PTR_TO_PACKET_END) { - find_good_pkt_pointers(env, dst_reg); + find_good_pkt_pointers(this_branch, dst_reg); + } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE && + dst_reg->type == PTR_TO_PACKET_END && + regs[insn->src_reg].type == PTR_TO_PACKET) { + find_good_pkt_pointers(other_branch, ®s[insn->src_reg]); } else if (is_pointer_value(env, insn->dst_reg)) { verbose("R%d pointer comparison prohibited\n", insn->dst_reg); return -EACCES; } if (log_level) - print_verifier_state(&env->cur_state); + print_verifier_state(this_branch); return 0; } diff --git a/samples/bpf/test_verifier.c b/samples/bpf/test_verifier.c index 78c6f131d94f..1f6cc9b6a23b 100644 --- a/samples/bpf/test_verifier.c +++ b/samples/bpf/test_verifier.c @@ -1528,6 +1528,108 @@ static struct bpf_test tests[] = { .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, + { + "direct packet access: test5 (pkt_end >= reg, good access)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "direct packet access: test6 (pkt_end >= reg, bad access)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 3), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid access to packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "direct packet access: test7 (pkt_end >= reg, both accesses)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 3), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid access to packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "direct packet access: test8 (double test, variant 1)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 4), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "direct packet access: test9 (double test, variant 2)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, { "helper access to packet: test1, valid packet_ptr range", .insns = { From e895cdce683161081e3626c4f5a5c55cb72089f8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 7 Sep 2016 21:52:56 -0700 Subject: [PATCH 0827/1715] ipv4: accept u8 in IP_TOS ancillary data In commit f02db315b8d8 ("ipv4: IP_TOS and IP_TTL can be specified as ancillary data") Francesco added IP_TOS values specified as integer. However, kernel sends to userspace (at recvmsg() time) an IP_TOS value in a single byte, when IP_RECVTOS is set on the socket. It can be very useful to reflect all ancillary options as given by the kernel in a subsequent sendmsg(), instead of aborting the sendmsg() with EINVAL after Francesco patch. So this patch extends IP_TOS ancillary to accept an u8, so that an UDP server can simply reuse same ancillary block without having to mangle it. Jesper can then augment https://github.com/netoptimizer/network-testing/blob/master/src/udp_example02.c to add TOS reflection ;) Fixes: f02db315b8d8 ("ipv4: IP_TOS and IP_TTL can be specified as ancillary data") Signed-off-by: Eric Dumazet Cc: Francesco Fusco Cc: Jesper Dangaard Brouer Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- net/ipv4/ip_sockglue.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 71a52f4d4cff..af4919792b6a 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -284,9 +284,12 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, ipc->ttl = val; break; case IP_TOS: - if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) + if (cmsg->cmsg_len == CMSG_LEN(sizeof(int))) + val = *(int *)CMSG_DATA(cmsg); + else if (cmsg->cmsg_len == CMSG_LEN(sizeof(u8))) + val = *(u8 *)CMSG_DATA(cmsg); + else return -EINVAL; - val = *(int *)CMSG_DATA(cmsg); if (val < 0 || val > 255) return -EINVAL; ipc->tos = val; From 34a3d4b2d1f1b7c81af79f6f93a6cef4c3a0f54a Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Thu, 8 Sep 2016 13:55:56 -0400 Subject: [PATCH 0828/1715] xfrm: fix header file comment reference to struct xfrm_replay_state_esn Reported-by: Paul Wouters Signed-off-by: Richard Guy Briggs Signed-off-by: Steffen Klassert --- include/uapi/linux/xfrm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index 143338978b48..1fc62b239f1b 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -298,7 +298,7 @@ enum xfrm_attr_type_t { XFRMA_ALG_AUTH_TRUNC, /* struct xfrm_algo_auth */ XFRMA_MARK, /* struct xfrm_mark */ XFRMA_TFCPAD, /* __u32 */ - XFRMA_REPLAY_ESN_VAL, /* struct xfrm_replay_esn */ + XFRMA_REPLAY_ESN_VAL, /* struct xfrm_replay_state_esn */ XFRMA_SA_EXTRA_FLAGS, /* __u32 */ XFRMA_PROTO, /* __u8 */ XFRMA_ADDRESS_FILTER, /* struct xfrm_address_filter */ From defb893fffef89ac6db4e68fccae1783d7c93977 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 17 Mar 2016 10:39:17 +0100 Subject: [PATCH 0829/1715] bcma: use of_dma_configure() to set initial dma mask While fixing another bug, I noticed that bcma manually sets up a dma_mask pointer for its child devices. We have a generic helper for that now, which should be able to cope better with any variations that might be needed to deal with cache coherency, unusual DMA address offsets, iommus, or limited DMA masks, none of which are currently handled here. This changes the core to use the of_dma_configure(), like we do for platform devices that are probed directly from DT. Signed-off-by: Arnd Bergmann Signed-off-by: Kalle Valo --- drivers/bcma/main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c index 1f635471f318..2c1798e38abd 100644 --- a/drivers/bcma/main.c +++ b/drivers/bcma/main.c @@ -209,6 +209,8 @@ static void bcma_of_fill_device(struct platform_device *parent, core->dev.of_node = node; core->irq = bcma_of_get_irq(parent, core, 0); + + of_dma_configure(&core->dev, node); } unsigned int bcma_core_irq(struct bcma_device *core, int num) @@ -248,12 +250,12 @@ void bcma_prepare_core(struct bcma_bus *bus, struct bcma_device *core) core->irq = bus->host_pci->irq; break; case BCMA_HOSTTYPE_SOC: - core->dev.dma_mask = &core->dev.coherent_dma_mask; - if (bus->host_pdev) { + if (IS_ENABLED(CONFIG_OF) && bus->host_pdev) { core->dma_dev = &bus->host_pdev->dev; core->dev.parent = &bus->host_pdev->dev; bcma_of_fill_device(bus->host_pdev, core); } else { + core->dev.dma_mask = &core->dev.coherent_dma_mask; core->dma_dev = &core->dev; } break; From cf5383b088d07f304d189986fdbd4efbd7d41538 Mon Sep 17 00:00:00 2001 From: Xinming Hu Date: Fri, 2 Sep 2016 13:05:06 +0530 Subject: [PATCH 0830/1715] mwifiex: add manufacturing mode support By default normal mode is chosen when driver is loaded. This patch adds a provision to choose manufacturing mode via module parameters. Below command loads driver in manufacturing mode insmod mwifiex.ko mfg_mode=1. Tested-by: chunfan chen Signed-off-by: Xinming Hu Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/cmdevt.c | 8 ++++++ drivers/net/wireless/marvell/mwifiex/init.c | 22 +++++++++++----- drivers/net/wireless/marvell/mwifiex/main.c | 26 ++++++++++++++++--- drivers/net/wireless/marvell/mwifiex/main.h | 2 ++ drivers/net/wireless/marvell/mwifiex/pcie.c | 2 +- drivers/net/wireless/marvell/mwifiex/sdio.c | 2 +- drivers/net/wireless/marvell/mwifiex/usb.c | 2 +- 7 files changed, 50 insertions(+), 14 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/cmdevt.c b/drivers/net/wireless/marvell/mwifiex/cmdevt.c index d433aa01fdbb..53477280f39c 100644 --- a/drivers/net/wireless/marvell/mwifiex/cmdevt.c +++ b/drivers/net/wireless/marvell/mwifiex/cmdevt.c @@ -595,6 +595,14 @@ int mwifiex_send_cmd(struct mwifiex_private *priv, u16 cmd_no, return -1; } } + /* We don't expect commands in manufacturing mode. They are cooked + * in application and ready to download buffer is passed to the driver + */ + if (adapter->mfg_mode && cmd_no) { + dev_dbg(adapter->dev, "Ignoring commands in manufacturing mode\n"); + return -1; + } + /* Get a new command node */ cmd_node = mwifiex_get_cmd_node(adapter); diff --git a/drivers/net/wireless/marvell/mwifiex/init.c b/drivers/net/wireless/marvell/mwifiex/init.c index 1489c90192bd..82839d9f079f 100644 --- a/drivers/net/wireless/marvell/mwifiex/init.c +++ b/drivers/net/wireless/marvell/mwifiex/init.c @@ -298,6 +298,7 @@ static void mwifiex_init_adapter(struct mwifiex_adapter *adapter) memset(&adapter->arp_filter, 0, sizeof(adapter->arp_filter)); adapter->arp_filter_size = 0; adapter->max_mgmt_ie_index = MAX_MGMT_IE_INDEX; + adapter->mfg_mode = mfg_mode; adapter->key_api_major_ver = 0; adapter->key_api_minor_ver = 0; eth_broadcast_addr(adapter->perm_addr); @@ -553,15 +554,22 @@ int mwifiex_init_fw(struct mwifiex_adapter *adapter) return -1; } } + if (adapter->mfg_mode) { + adapter->hw_status = MWIFIEX_HW_STATUS_READY; + ret = -EINPROGRESS; + } else { + for (i = 0; i < adapter->priv_num; i++) { + if (adapter->priv[i]) { + ret = mwifiex_sta_init_cmd(adapter->priv[i], + first_sta, true); + if (ret == -1) + return -1; + + first_sta = false; + } + - for (i = 0; i < adapter->priv_num; i++) { - if (adapter->priv[i]) { - ret = mwifiex_sta_init_cmd(adapter->priv[i], first_sta, - true); - if (ret == -1) - return -1; - first_sta = false; } } diff --git a/drivers/net/wireless/marvell/mwifiex/main.c b/drivers/net/wireless/marvell/mwifiex/main.c index 51d4dfcf1214..029e5da1e6fd 100644 --- a/drivers/net/wireless/marvell/mwifiex/main.c +++ b/drivers/net/wireless/marvell/mwifiex/main.c @@ -23,6 +23,7 @@ #include "11n.h" #define VERSION "1.0" +#define MFG_FIRMWARE "mwifiex_mfg.bin" static unsigned int debug_mask = MWIFIEX_DEFAULT_DEBUG_MASK; module_param(debug_mask, uint, 0); @@ -37,6 +38,10 @@ module_param(driver_mode, ushort, 0); MODULE_PARM_DESC(driver_mode, "station=0x1(default), ap-sta=0x3, station-p2p=0x5, ap-sta-p2p=0x7"); +bool mfg_mode; +module_param(mfg_mode, bool, 0); +MODULE_PARM_DESC(mfg_mode, "manufacturing mode enable:1, disable:0"); + /* * This function registers the device and performs all the necessary * initializations. @@ -561,10 +566,12 @@ static void mwifiex_fw_dpc(const struct firmware *firmware, void *context) goto done; } /* Wait for mwifiex_init to complete */ - wait_event_interruptible(adapter->init_wait_q, - adapter->init_wait_q_woken); - if (adapter->hw_status != MWIFIEX_HW_STATUS_READY) - goto err_init_fw; + if (!adapter->mfg_mode) { + wait_event_interruptible(adapter->init_wait_q, + adapter->init_wait_q_woken); + if (adapter->hw_status != MWIFIEX_HW_STATUS_READY) + goto err_init_fw; + } priv = adapter->priv[MWIFIEX_BSS_ROLE_STA]; if (mwifiex_register_cfg80211(adapter)) { @@ -668,6 +675,17 @@ static int mwifiex_init_hw_fw(struct mwifiex_adapter *adapter) { int ret; + /* Override default firmware with manufacturing one if + * manufacturing mode is enabled + */ + if (mfg_mode) { + if (strlcpy(adapter->fw_name, MFG_FIRMWARE, + sizeof(adapter->fw_name)) >= + sizeof(adapter->fw_name)) { + pr_err("%s: fw_name too long!\n", __func__); + return -1; + } + } ret = request_firmware_nowait(THIS_MODULE, 1, adapter->fw_name, adapter->dev, GFP_KERNEL, adapter, mwifiex_fw_dpc); diff --git a/drivers/net/wireless/marvell/mwifiex/main.h b/drivers/net/wireless/marvell/mwifiex/main.h index cd9a4f152530..17221c4f681f 100644 --- a/drivers/net/wireless/marvell/mwifiex/main.h +++ b/drivers/net/wireless/marvell/mwifiex/main.h @@ -58,6 +58,7 @@ #include "sdio.h" extern const char driver_version[]; +extern bool mfg_mode; struct mwifiex_adapter; struct mwifiex_private; @@ -990,6 +991,7 @@ struct mwifiex_adapter { u32 drv_info_size; bool scan_chan_gap_enabled; struct sk_buff_head rx_data_q; + bool mfg_mode; struct mwifiex_chan_stats *chan_stats; u32 num_in_chan_stats; int survey_idx; diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c index 50a6a531ed18..c1b3e8ef34ec 100644 --- a/drivers/net/wireless/marvell/mwifiex/pcie.c +++ b/drivers/net/wireless/marvell/mwifiex/pcie.c @@ -225,7 +225,7 @@ static void mwifiex_pcie_remove(struct pci_dev *pdev) if (!adapter || !adapter->priv_num) return; - if (user_rmmod) { + if (user_rmmod && !adapter->mfg_mode) { #ifdef CONFIG_PM_SLEEP if (adapter->is_suspended) mwifiex_pcie_resume(&pdev->dev); diff --git a/drivers/net/wireless/marvell/mwifiex/sdio.c b/drivers/net/wireless/marvell/mwifiex/sdio.c index d3e1561ca075..6dba40998a66 100644 --- a/drivers/net/wireless/marvell/mwifiex/sdio.c +++ b/drivers/net/wireless/marvell/mwifiex/sdio.c @@ -289,7 +289,7 @@ mwifiex_sdio_remove(struct sdio_func *func) mwifiex_dbg(adapter, INFO, "info: SDIO func num=%d\n", func->num); - if (user_rmmod) { + if (user_rmmod && !adapter->mfg_mode) { if (adapter->is_suspended) mwifiex_sdio_resume(adapter->dev); diff --git a/drivers/net/wireless/marvell/mwifiex/usb.c b/drivers/net/wireless/marvell/mwifiex/usb.c index 3bd04f52f369..92135167a822 100644 --- a/drivers/net/wireless/marvell/mwifiex/usb.c +++ b/drivers/net/wireless/marvell/mwifiex/usb.c @@ -611,7 +611,7 @@ static void mwifiex_usb_disconnect(struct usb_interface *intf) if (!adapter->priv_num) return; - if (user_rmmod) { + if (user_rmmod && !adapter->mfg_mode) { #ifdef CONFIG_PM if (adapter->is_suspended) mwifiex_usb_resume(intf); From 3935ccc14d2c68488bd96448fc073da48eaeebf0 Mon Sep 17 00:00:00 2001 From: Xinming Hu Date: Fri, 2 Sep 2016 13:05:07 +0530 Subject: [PATCH 0831/1715] mwifiex: add cfg80211 testmode support This patch adds cfg80211 testmode support so that userspace tools can download necessary commands to firmware during manufacturing mode tests. Signed-off-by: Xinming Hu Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- .../net/wireless/marvell/mwifiex/cfg80211.c | 83 +++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index 876d420e936e..0a03d3f90b56 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -3919,6 +3919,88 @@ static int mwifiex_cfg80211_get_channel(struct wiphy *wiphy, return ret; } +#ifdef CONFIG_NL80211_TESTMODE + +enum mwifiex_tm_attr { + __MWIFIEX_TM_ATTR_INVALID = 0, + MWIFIEX_TM_ATTR_CMD = 1, + MWIFIEX_TM_ATTR_DATA = 2, + + /* keep last */ + __MWIFIEX_TM_ATTR_AFTER_LAST, + MWIFIEX_TM_ATTR_MAX = __MWIFIEX_TM_ATTR_AFTER_LAST - 1, +}; + +static const struct nla_policy mwifiex_tm_policy[MWIFIEX_TM_ATTR_MAX + 1] = { + [MWIFIEX_TM_ATTR_CMD] = { .type = NLA_U32 }, + [MWIFIEX_TM_ATTR_DATA] = { .type = NLA_BINARY, + .len = MWIFIEX_SIZE_OF_CMD_BUFFER }, +}; + +enum mwifiex_tm_command { + MWIFIEX_TM_CMD_HOSTCMD = 0, +}; + +static int mwifiex_tm_cmd(struct wiphy *wiphy, struct wireless_dev *wdev, + void *data, int len) +{ + struct mwifiex_private *priv = mwifiex_netdev_get_priv(wdev->netdev); + struct mwifiex_ds_misc_cmd *hostcmd; + struct nlattr *tb[MWIFIEX_TM_ATTR_MAX + 1]; + struct mwifiex_adapter *adapter; + struct sk_buff *skb; + int err; + + if (!priv) + return -EINVAL; + adapter = priv->adapter; + + err = nla_parse(tb, MWIFIEX_TM_ATTR_MAX, data, len, + mwifiex_tm_policy); + if (err) + return err; + + if (!tb[MWIFIEX_TM_ATTR_CMD]) + return -EINVAL; + + switch (nla_get_u32(tb[MWIFIEX_TM_ATTR_CMD])) { + case MWIFIEX_TM_CMD_HOSTCMD: + if (!tb[MWIFIEX_TM_ATTR_DATA]) + return -EINVAL; + + hostcmd = kzalloc(sizeof(*hostcmd), GFP_KERNEL); + if (!hostcmd) + return -ENOMEM; + + hostcmd->len = nla_len(tb[MWIFIEX_TM_ATTR_DATA]); + memcpy(hostcmd->cmd, nla_data(tb[MWIFIEX_TM_ATTR_DATA]), + hostcmd->len); + + if (mwifiex_send_cmd(priv, 0, 0, 0, hostcmd, true)) { + dev_err(priv->adapter->dev, "Failed to process hostcmd\n"); + return -EFAULT; + } + + /* process hostcmd response*/ + skb = cfg80211_testmode_alloc_reply_skb(wiphy, hostcmd->len); + if (!skb) + return -ENOMEM; + err = nla_put(skb, MWIFIEX_TM_ATTR_DATA, + hostcmd->len, hostcmd->cmd); + if (err) { + kfree_skb(skb); + return -EMSGSIZE; + } + + err = cfg80211_testmode_reply(skb); + kfree(hostcmd); + return err; + default: + return -EOPNOTSUPP; + } +} +#endif + static int mwifiex_cfg80211_start_radar_detection(struct wiphy *wiphy, struct net_device *dev, @@ -4031,6 +4113,7 @@ static struct cfg80211_ops mwifiex_cfg80211_ops = { .tdls_cancel_channel_switch = mwifiex_cfg80211_tdls_cancel_chan_switch, .add_station = mwifiex_cfg80211_add_station, .change_station = mwifiex_cfg80211_change_station, + CFG80211_TESTMODE_CMD(mwifiex_tm_cmd) .get_channel = mwifiex_cfg80211_get_channel, .start_radar_detection = mwifiex_cfg80211_start_radar_detection, .channel_switch = mwifiex_cfg80211_channel_switch, From 3e9b3112ec74f192eaab976c3889e34255cae940 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 31 Aug 2016 12:46:44 +0100 Subject: [PATCH 0832/1715] add basic register-field manipulation macros Common approach to accessing register fields is to define structures or sets of macros containing mask and shift pair. Operations on the register are then performed as follows: field = (reg >> shift) & mask; reg &= ~(mask << shift); reg |= (field & mask) << shift; Defining shift and mask separately is tedious. Ivo van Doorn came up with an idea of computing them at compilation time based on a single shifted mask (later refined by Felix) which can be used like this: #define REG_FIELD 0x000ff000 field = FIELD_GET(REG_FIELD, reg); reg &= ~REG_FIELD; reg |= FIELD_PREP(REG_FIELD, field); FIELD_{GET,PREP} macros take care of finding out what the appropriate shift is based on compilation time ffs operation. GENMASK can be used to define registers (which is usually less error-prone and easier to match with datasheets). This approach is the most convenient I've seen so to limit code multiplication let's move the macros to a global header file. Attempts to use static inlines instead of macros failed due to false positive triggering of BUILD_BUG_ON()s, especially with GCC < 6.0. Signed-off-by: Jakub Kicinski Reviewed-by: Dinan Gunawardena Signed-off-by: Kalle Valo --- include/linux/bitfield.h | 93 ++++++++++++++++++++++++++++++++++++++++ include/linux/bug.h | 3 ++ 2 files changed, 96 insertions(+) create mode 100644 include/linux/bitfield.h diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h new file mode 100644 index 000000000000..f6505d83069d --- /dev/null +++ b/include/linux/bitfield.h @@ -0,0 +1,93 @@ +/* + * Copyright (C) 2014 Felix Fietkau + * Copyright (C) 2004 - 2009 Ivo van Doorn + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _LINUX_BITFIELD_H +#define _LINUX_BITFIELD_H + +#include + +/* + * Bitfield access macros + * + * FIELD_{GET,PREP} macros take as first parameter shifted mask + * from which they extract the base mask and shift amount. + * Mask must be a compilation time constant. + * + * Example: + * + * #define REG_FIELD_A GENMASK(6, 0) + * #define REG_FIELD_B BIT(7) + * #define REG_FIELD_C GENMASK(15, 8) + * #define REG_FIELD_D GENMASK(31, 16) + * + * Get: + * a = FIELD_GET(REG_FIELD_A, reg); + * b = FIELD_GET(REG_FIELD_B, reg); + * + * Set: + * reg = FIELD_PREP(REG_FIELD_A, 1) | + * FIELD_PREP(REG_FIELD_B, 0) | + * FIELD_PREP(REG_FIELD_C, c) | + * FIELD_PREP(REG_FIELD_D, 0x40); + * + * Modify: + * reg &= ~REG_FIELD_C; + * reg |= FIELD_PREP(REG_FIELD_C, c); + */ + +#define __bf_shf(x) (__builtin_ffsll(x) - 1) + +#define __BF_FIELD_CHECK(_mask, _reg, _val, _pfx) \ + ({ \ + BUILD_BUG_ON_MSG(!__builtin_constant_p(_mask), \ + _pfx "mask is not constant"); \ + BUILD_BUG_ON_MSG(!(_mask), _pfx "mask is zero"); \ + BUILD_BUG_ON_MSG(__builtin_constant_p(_val) ? \ + ~((_mask) >> __bf_shf(_mask)) & (_val) : 0, \ + _pfx "value too large for the field"); \ + BUILD_BUG_ON_MSG((_mask) > (typeof(_reg))~0ull, \ + _pfx "type of reg too small for mask"); \ + __BUILD_BUG_ON_NOT_POWER_OF_2((_mask) + \ + (1ULL << __bf_shf(_mask))); \ + }) + +/** + * FIELD_PREP() - prepare a bitfield element + * @_mask: shifted mask defining the field's length and position + * @_val: value to put in the field + * + * FIELD_PREP() masks and shifts up the value. The result should + * be combined with other fields of the bitfield using logical OR. + */ +#define FIELD_PREP(_mask, _val) \ + ({ \ + __BF_FIELD_CHECK(_mask, 0ULL, _val, "FIELD_PREP: "); \ + ((typeof(_mask))(_val) << __bf_shf(_mask)) & (_mask); \ + }) + +/** + * FIELD_GET() - extract a bitfield element + * @_mask: shifted mask defining the field's length and position + * @_reg: 32bit value of entire bitfield + * + * FIELD_GET() extracts the field specified by @_mask from the + * bitfield passed in as @_reg by masking and shifting it down. + */ +#define FIELD_GET(_mask, _reg) \ + ({ \ + __BF_FIELD_CHECK(_mask, _reg, 0U, "FIELD_GET: "); \ + (typeof(_mask))(((_reg) & (_mask)) >> __bf_shf(_mask)); \ + }) + +#endif diff --git a/include/linux/bug.h b/include/linux/bug.h index e51b0709e78d..292d6a10b0c2 100644 --- a/include/linux/bug.h +++ b/include/linux/bug.h @@ -13,6 +13,7 @@ enum bug_trap_type { struct pt_regs; #ifdef __CHECKER__ +#define __BUILD_BUG_ON_NOT_POWER_OF_2(n) (0) #define BUILD_BUG_ON_NOT_POWER_OF_2(n) (0) #define BUILD_BUG_ON_ZERO(e) (0) #define BUILD_BUG_ON_NULL(e) ((void*)0) @@ -24,6 +25,8 @@ struct pt_regs; #else /* __CHECKER__ */ /* Force a compilation error if a constant expression is not a power of 2 */ +#define __BUILD_BUG_ON_NOT_POWER_OF_2(n) \ + BUILD_BUG_ON(((n) & ((n) - 1)) != 0) #define BUILD_BUG_ON_NOT_POWER_OF_2(n) \ BUILD_BUG_ON((n) == 0 || (((n) & ((n) - 1)) != 0)) From faad5433b7227e0091c390c68be9076fe846627f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 31 Aug 2016 12:46:45 +0100 Subject: [PATCH 0833/1715] mt7601u: remove redefinition of GENMASK Remove redefinition of GENMASK which should not be there in the upstream version of the code. Signed-off-by: Jakub Kicinski Reviewed-by: Dinan Gunawardena Signed-off-by: Kalle Valo --- drivers/net/wireless/mediatek/mt7601u/regs.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt7601u/regs.h b/drivers/net/wireless/mediatek/mt7601u/regs.h index afd8978e83fa..27a429d90cec 100644 --- a/drivers/net/wireless/mediatek/mt7601u/regs.h +++ b/drivers/net/wireless/mediatek/mt7601u/regs.h @@ -17,10 +17,6 @@ #include -#ifndef GENMASK -#define GENMASK(h, l) (((U32_C(1) << ((h) - (l) + 1)) - 1) << (l)) -#endif - #define MT_ASIC_VERSION 0x0000 #define MT76XX_REV_E3 0x22 From adcc710d0a9e260ae315bc7d31b68c5697f58b43 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 31 Aug 2016 12:46:46 +0100 Subject: [PATCH 0834/1715] mt7601u: remove unnecessary include There is no need to include linux/version.h in a in-tree driver. Signed-off-by: Jakub Kicinski Reviewed-by: Dinan Gunawardena Signed-off-by: Kalle Valo --- drivers/net/wireless/mediatek/mt7601u/main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt7601u/main.c b/drivers/net/wireless/mediatek/mt7601u/main.c index e70dd9523911..43ebd460ba86 100644 --- a/drivers/net/wireless/mediatek/mt7601u/main.c +++ b/drivers/net/wireless/mediatek/mt7601u/main.c @@ -15,7 +15,6 @@ #include "mt7601u.h" #include "mac.h" #include -#include static int mt7601u_start(struct ieee80211_hw *hw) { From d43af50566b43fb4abce42789ba999a7e9dc45bb Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 31 Aug 2016 12:46:47 +0100 Subject: [PATCH 0835/1715] mt7601u: use linux/bitfield.h Use the newly added linux/bitfield.h. Signed-off-by: Jakub Kicinski Reviewed-by: Dinan Gunawardena Signed-off-by: Kalle Valo --- drivers/net/wireless/mediatek/mt7601u/dma.c | 2 +- drivers/net/wireless/mediatek/mt7601u/dma.h | 10 +-- .../net/wireless/mediatek/mt7601u/eeprom.c | 12 +-- drivers/net/wireless/mediatek/mt7601u/init.c | 10 ++- drivers/net/wireless/mediatek/mt7601u/mac.c | 38 ++++----- drivers/net/wireless/mediatek/mt7601u/mcu.c | 20 ++--- .../net/wireless/mediatek/mt7601u/mt7601u.h | 4 +- drivers/net/wireless/mediatek/mt7601u/phy.c | 44 ++++++----- drivers/net/wireless/mediatek/mt7601u/tx.c | 19 ++--- drivers/net/wireless/mediatek/mt7601u/util.h | 77 ------------------- 10 files changed, 81 insertions(+), 155 deletions(-) delete mode 100644 drivers/net/wireless/mediatek/mt7601u/util.h diff --git a/drivers/net/wireless/mediatek/mt7601u/dma.c b/drivers/net/wireless/mediatek/mt7601u/dma.c index 57a80cfa39b1..a8bc064bc14f 100644 --- a/drivers/net/wireless/mediatek/mt7601u/dma.c +++ b/drivers/net/wireless/mediatek/mt7601u/dma.c @@ -103,7 +103,7 @@ static void mt7601u_rx_process_seg(struct mt7601u_dev *dev, u8 *data, if (unlikely(rxwi->zero[0] || rxwi->zero[1] || rxwi->zero[2])) dev_err_once(dev->dev, "Error: RXWI zero fields are set\n"); - if (unlikely(MT76_GET(MT_RXD_INFO_TYPE, fce_info))) + if (unlikely(FIELD_GET(MT_RXD_INFO_TYPE, fce_info))) dev_err_once(dev->dev, "Error: RX path seen a non-pkt urb\n"); trace_mt_rx(dev, rxwi, fce_info); diff --git a/drivers/net/wireless/mediatek/mt7601u/dma.h b/drivers/net/wireless/mediatek/mt7601u/dma.h index 978e8a90b87f..270d126880c0 100644 --- a/drivers/net/wireless/mediatek/mt7601u/dma.h +++ b/drivers/net/wireless/mediatek/mt7601u/dma.h @@ -18,8 +18,6 @@ #include #include -#include "util.h" - #define MT_DMA_HDR_LEN 4 #define MT_RX_INFO_LEN 4 #define MT_FCE_INFO_LEN 4 @@ -79,9 +77,9 @@ static inline int mt7601u_dma_skb_wrap(struct sk_buff *skb, */ info = flags | - MT76_SET(MT_TXD_INFO_LEN, round_up(skb->len, 4)) | - MT76_SET(MT_TXD_INFO_D_PORT, d_port) | - MT76_SET(MT_TXD_INFO_TYPE, type); + FIELD_PREP(MT_TXD_INFO_LEN, round_up(skb->len, 4)) | + FIELD_PREP(MT_TXD_INFO_D_PORT, d_port) | + FIELD_PREP(MT_TXD_INFO_TYPE, type); put_unaligned_le32(info, skb_push(skb, sizeof(info))); return skb_put_padto(skb, round_up(skb->len, 4) + 4); @@ -90,7 +88,7 @@ static inline int mt7601u_dma_skb_wrap(struct sk_buff *skb, static inline int mt7601u_dma_skb_wrap_pkt(struct sk_buff *skb, enum mt76_qsel qsel, u32 flags) { - flags |= MT76_SET(MT_TXD_PKT_INFO_QSEL, qsel); + flags |= FIELD_PREP(MT_TXD_PKT_INFO_QSEL, qsel); return mt7601u_dma_skb_wrap(skb, WLAN_PORT, DMA_PACKET, flags); } diff --git a/drivers/net/wireless/mediatek/mt7601u/eeprom.c b/drivers/net/wireless/mediatek/mt7601u/eeprom.c index 8d8ee0344f7b..da6faea092d6 100644 --- a/drivers/net/wireless/mediatek/mt7601u/eeprom.c +++ b/drivers/net/wireless/mediatek/mt7601u/eeprom.c @@ -45,8 +45,8 @@ mt7601u_efuse_read(struct mt7601u_dev *dev, u16 addr, u8 *data, val = mt76_rr(dev, MT_EFUSE_CTRL); val &= ~(MT_EFUSE_CTRL_AIN | MT_EFUSE_CTRL_MODE); - val |= MT76_SET(MT_EFUSE_CTRL_AIN, addr & ~0xf) | - MT76_SET(MT_EFUSE_CTRL_MODE, mode) | + val |= FIELD_PREP(MT_EFUSE_CTRL_AIN, addr & ~0xf) | + FIELD_PREP(MT_EFUSE_CTRL_MODE, mode) | MT_EFUSE_CTRL_KICK; mt76_wr(dev, MT_EFUSE_CTRL, val); @@ -128,8 +128,8 @@ mt7601u_set_chip_cap(struct mt7601u_dev *dev, u8 *eeprom) if (!field_valid(nic_conf0 >> 8)) return; - if (MT76_GET(MT_EE_NIC_CONF_0_RX_PATH, nic_conf0) > 1 || - MT76_GET(MT_EE_NIC_CONF_0_TX_PATH, nic_conf0) > 1) + if (FIELD_GET(MT_EE_NIC_CONF_0_RX_PATH, nic_conf0) > 1 || + FIELD_GET(MT_EE_NIC_CONF_0_TX_PATH, nic_conf0) > 1) dev_err(dev->dev, "Error: device has more than 1 RX/TX stream!\n"); } @@ -150,7 +150,7 @@ mt7601u_set_macaddr(struct mt7601u_dev *dev, const u8 *eeprom) mt76_wr(dev, MT_MAC_ADDR_DW0, get_unaligned_le32(dev->macaddr)); mt76_wr(dev, MT_MAC_ADDR_DW1, get_unaligned_le16(dev->macaddr + 4) | - MT76_SET(MT_MAC_ADDR_DW1_U2ME_MASK, 0xff)); + FIELD_PREP(MT_MAC_ADDR_DW1_U2ME_MASK, 0xff)); return 0; } @@ -176,7 +176,7 @@ mt7601u_set_channel_power(struct mt7601u_dev *dev, u8 *eeprom) u8 max_pwr; val = mt7601u_rr(dev, MT_TX_ALC_CFG_0); - max_pwr = MT76_GET(MT_TX_ALC_CFG_0_LIMIT_0, val); + max_pwr = FIELD_GET(MT_TX_ALC_CFG_0_LIMIT_0, val); if (mt7601u_has_tssi(dev, eeprom)) { mt7601u_set_channel_target_power(dev, eeprom, max_pwr); diff --git a/drivers/net/wireless/mediatek/mt7601u/init.c b/drivers/net/wireless/mediatek/mt7601u/init.c index 8fa78d7156be..44d46e25db80 100644 --- a/drivers/net/wireless/mediatek/mt7601u/init.c +++ b/drivers/net/wireless/mediatek/mt7601u/init.c @@ -108,8 +108,9 @@ static void mt7601u_init_usb_dma(struct mt7601u_dev *dev) { u32 val; - val = MT76_SET(MT_USB_DMA_CFG_RX_BULK_AGG_TOUT, MT_USB_AGGR_TIMEOUT) | - MT76_SET(MT_USB_DMA_CFG_RX_BULK_AGG_LMT, MT_USB_AGGR_SIZE_LIMIT) | + val = FIELD_PREP(MT_USB_DMA_CFG_RX_BULK_AGG_TOUT, MT_USB_AGGR_TIMEOUT) | + FIELD_PREP(MT_USB_DMA_CFG_RX_BULK_AGG_LMT, + MT_USB_AGGR_SIZE_LIMIT) | MT_USB_DMA_CFG_RX_BULK_EN | MT_USB_DMA_CFG_TX_BULK_EN; if (dev->in_max_packet == 512) @@ -396,8 +397,9 @@ int mt7601u_init_hardware(struct mt7601u_dev *dev) mt7601u_rmw(dev, MT_US_CYC_CFG, MT_US_CYC_CNT, 0x1e); - mt7601u_wr(dev, MT_TXOP_CTRL_CFG, MT76_SET(MT_TXOP_TRUN_EN, 0x3f) | - MT76_SET(MT_TXOP_EXT_CCA_DLY, 0x58)); + mt7601u_wr(dev, MT_TXOP_CTRL_CFG, + FIELD_PREP(MT_TXOP_TRUN_EN, 0x3f) | + FIELD_PREP(MT_TXOP_EXT_CCA_DLY, 0x58)); ret = mt7601u_eeprom_init(dev); if (ret) diff --git a/drivers/net/wireless/mediatek/mt7601u/mac.c b/drivers/net/wireless/mediatek/mt7601u/mac.c index e21c53ed09fb..3c576392ed89 100644 --- a/drivers/net/wireless/mediatek/mt7601u/mac.c +++ b/drivers/net/wireless/mediatek/mt7601u/mac.c @@ -19,13 +19,13 @@ static void mt76_mac_process_tx_rate(struct ieee80211_tx_rate *txrate, u16 rate) { - u8 idx = MT76_GET(MT_TXWI_RATE_MCS, rate); + u8 idx = FIELD_GET(MT_TXWI_RATE_MCS, rate); txrate->idx = 0; txrate->flags = 0; txrate->count = 1; - switch (MT76_GET(MT_TXWI_RATE_PHY_MODE, rate)) { + switch (FIELD_GET(MT_TXWI_RATE_PHY_MODE, rate)) { case MT_PHY_TYPE_OFDM: txrate->idx = idx + 4; return; @@ -47,7 +47,7 @@ mt76_mac_process_tx_rate(struct ieee80211_tx_rate *txrate, u16 rate) return; } - if (MT76_GET(MT_TXWI_RATE_BW, rate) == MT_PHY_BW_40) + if (FIELD_GET(MT_TXWI_RATE_BW, rate) == MT_PHY_BW_40) txrate->flags |= IEEE80211_TX_RC_40_MHZ_WIDTH; if (rate & MT_TXWI_RATE_SGI) @@ -125,9 +125,9 @@ u16 mt76_mac_tx_rate_val(struct mt7601u_dev *dev, bw = 0; } - rateval = MT76_SET(MT_RXWI_RATE_MCS, rate_idx); - rateval |= MT76_SET(MT_RXWI_RATE_PHY, phy); - rateval |= MT76_SET(MT_RXWI_RATE_BW, bw); + rateval = FIELD_PREP(MT_RXWI_RATE_MCS, rate_idx); + rateval |= FIELD_PREP(MT_RXWI_RATE_PHY, phy); + rateval |= FIELD_PREP(MT_RXWI_RATE_BW, bw); if (rate->flags & IEEE80211_TX_RC_SHORT_GI) rateval |= MT_RXWI_RATE_SGI; @@ -156,9 +156,9 @@ struct mt76_tx_status mt7601u_mac_fetch_tx_status(struct mt7601u_dev *dev) stat.success = !!(val & MT_TX_STAT_FIFO_SUCCESS); stat.aggr = !!(val & MT_TX_STAT_FIFO_AGGR); stat.ack_req = !!(val & MT_TX_STAT_FIFO_ACKREQ); - stat.pktid = MT76_GET(MT_TX_STAT_FIFO_PID_TYPE, val); - stat.wcid = MT76_GET(MT_TX_STAT_FIFO_WCID, val); - stat.rate = MT76_GET(MT_TX_STAT_FIFO_RATE, val); + stat.pktid = FIELD_GET(MT_TX_STAT_FIFO_PID_TYPE, val); + stat.wcid = FIELD_GET(MT_TX_STAT_FIFO_WCID, val); + stat.rate = FIELD_GET(MT_TX_STAT_FIFO_RATE, val); return stat; } @@ -270,7 +270,7 @@ void mt7601u_mac_config_tsf(struct mt7601u_dev *dev, bool enable, int interval) } val &= ~MT_BEACON_TIME_CFG_INTVAL; - val |= MT76_SET(MT_BEACON_TIME_CFG_INTVAL, interval << 4) | + val |= FIELD_PREP(MT_BEACON_TIME_CFG_INTVAL, interval << 4) | MT_BEACON_TIME_CFG_TIMER_EN | MT_BEACON_TIME_CFG_SYNC_MODE | MT_BEACON_TIME_CFG_TBTT_EN; @@ -349,8 +349,8 @@ mt7601u_mac_wcid_setup(struct mt7601u_dev *dev, u8 idx, u8 vif_idx, u8 *mac) u8 zmac[ETH_ALEN] = {}; u32 attr; - attr = MT76_SET(MT_WCID_ATTR_BSS_IDX, vif_idx & 7) | - MT76_SET(MT_WCID_ATTR_BSS_IDX_EXT, !!(vif_idx & 8)); + attr = FIELD_PREP(MT_WCID_ATTR_BSS_IDX, vif_idx & 7) | + FIELD_PREP(MT_WCID_ATTR_BSS_IDX_EXT, !!(vif_idx & 8)); mt76_wr(dev, MT_WCID_ATTR(idx), attr); @@ -382,15 +382,15 @@ void mt7601u_mac_set_ampdu_factor(struct mt7601u_dev *dev) rcu_read_unlock(); mt7601u_wr(dev, MT_MAX_LEN_CFG, 0xa0fff | - MT76_SET(MT_MAX_LEN_CFG_AMPDU, min_factor)); + FIELD_PREP(MT_MAX_LEN_CFG_AMPDU, min_factor)); } static void mt76_mac_process_rate(struct ieee80211_rx_status *status, u16 rate) { - u8 idx = MT76_GET(MT_RXWI_RATE_MCS, rate); + u8 idx = FIELD_GET(MT_RXWI_RATE_MCS, rate); - switch (MT76_GET(MT_RXWI_RATE_PHY, rate)) { + switch (FIELD_GET(MT_RXWI_RATE_PHY, rate)) { case MT_PHY_TYPE_OFDM: if (WARN_ON(idx >= 8)) idx = 0; @@ -436,7 +436,7 @@ mt7601u_rx_monitor_beacon(struct mt7601u_dev *dev, struct mt7601u_rxwi *rxwi, u16 rate, int rssi) { dev->bcn_freq_off = rxwi->freq_off; - dev->bcn_phy_mode = MT76_GET(MT_RXWI_RATE_PHY, rate); + dev->bcn_phy_mode = FIELD_GET(MT_RXWI_RATE_PHY, rate); dev->avg_rssi = (dev->avg_rssi * 15) / 16 + (rssi << 8); } @@ -458,7 +458,7 @@ u32 mt76_mac_process_rx(struct mt7601u_dev *dev, struct sk_buff *skb, u16 rate = le16_to_cpu(rxwi->rate); int rssi; - len = MT76_GET(MT_RXWI_CTL_MPDU_LEN, ctl); + len = FIELD_GET(MT_RXWI_CTL_MPDU_LEN, ctl); if (len < 10) return 0; @@ -542,8 +542,8 @@ int mt76_mac_wcid_set_key(struct mt7601u_dev *dev, u8 idx, val = mt7601u_rr(dev, MT_WCID_ATTR(idx)); val &= ~MT_WCID_ATTR_PKEY_MODE & ~MT_WCID_ATTR_PKEY_MODE_EXT; - val |= MT76_SET(MT_WCID_ATTR_PKEY_MODE, cipher & 7) | - MT76_SET(MT_WCID_ATTR_PKEY_MODE_EXT, cipher >> 3); + val |= FIELD_PREP(MT_WCID_ATTR_PKEY_MODE, cipher & 7) | + FIELD_PREP(MT_WCID_ATTR_PKEY_MODE_EXT, cipher >> 3); val &= ~MT_WCID_ATTR_PAIRWISE; val |= MT_WCID_ATTR_PAIRWISE * !!(key && key->flags & IEEE80211_KEY_FLAG_PAIRWISE); diff --git a/drivers/net/wireless/mediatek/mt7601u/mcu.c b/drivers/net/wireless/mediatek/mt7601u/mcu.c index 91c4b3427965..dbdfb3f5c507 100644 --- a/drivers/net/wireless/mediatek/mt7601u/mcu.c +++ b/drivers/net/wireless/mediatek/mt7601u/mcu.c @@ -43,8 +43,8 @@ static inline void mt7601u_dma_skb_wrap_cmd(struct sk_buff *skb, u8 seq, enum mcu_cmd cmd) { WARN_ON(mt7601u_dma_skb_wrap(skb, CPU_TX_PORT, DMA_COMMAND, - MT76_SET(MT_TXD_CMD_INFO_SEQ, seq) | - MT76_SET(MT_TXD_CMD_INFO_TYPE, cmd))); + FIELD_PREP(MT_TXD_CMD_INFO_SEQ, seq) | + FIELD_PREP(MT_TXD_CMD_INFO_TYPE, cmd))); } static inline void trace_mt_mcu_msg_send_cs(struct mt7601u_dev *dev, @@ -100,13 +100,13 @@ static int mt7601u_mcu_wait_resp(struct mt7601u_dev *dev, u8 seq) dev_err(dev->dev, "Error: MCU resp urb failed:%d\n", urb_status); - if (MT76_GET(MT_RXD_CMD_INFO_CMD_SEQ, rxfce) == seq && - MT76_GET(MT_RXD_CMD_INFO_EVT_TYPE, rxfce) == CMD_DONE) + if (FIELD_GET(MT_RXD_CMD_INFO_CMD_SEQ, rxfce) == seq && + FIELD_GET(MT_RXD_CMD_INFO_EVT_TYPE, rxfce) == CMD_DONE) return 0; - dev_err(dev->dev, "Error: MCU resp evt:%hhx seq:%hhx-%hhx!\n", - MT76_GET(MT_RXD_CMD_INFO_EVT_TYPE, rxfce), - seq, MT76_GET(MT_RXD_CMD_INFO_CMD_SEQ, rxfce)); + dev_err(dev->dev, "Error: MCU resp evt:%lx seq:%hhx-%lx!\n", + FIELD_GET(MT_RXD_CMD_INFO_EVT_TYPE, rxfce), + seq, FIELD_GET(MT_RXD_CMD_INFO_CMD_SEQ, rxfce)); } dev_err(dev->dev, "Error: %s timed out\n", __func__); @@ -291,9 +291,9 @@ static int __mt7601u_dma_fw(struct mt7601u_dev *dev, u32 val; int ret; - reg = cpu_to_le32(MT76_SET(MT_TXD_INFO_TYPE, DMA_PACKET) | - MT76_SET(MT_TXD_INFO_D_PORT, CPU_TX_PORT) | - MT76_SET(MT_TXD_INFO_LEN, len)); + reg = cpu_to_le32(FIELD_PREP(MT_TXD_INFO_TYPE, DMA_PACKET) | + FIELD_PREP(MT_TXD_INFO_D_PORT, CPU_TX_PORT) | + FIELD_PREP(MT_TXD_INFO_LEN, len)); memcpy(buf.buf, ®, sizeof(reg)); memcpy(buf.buf + sizeof(reg), data, len); memset(buf.buf + sizeof(reg) + len, 0, 8); diff --git a/drivers/net/wireless/mediatek/mt7601u/mt7601u.h b/drivers/net/wireless/mediatek/mt7601u/mt7601u.h index 428bd2f10b7b..c7ec40475a5f 100644 --- a/drivers/net/wireless/mediatek/mt7601u/mt7601u.h +++ b/drivers/net/wireless/mediatek/mt7601u/mt7601u.h @@ -15,6 +15,7 @@ #ifndef MT7601U_H #define MT7601U_H +#include #include #include #include @@ -24,7 +25,6 @@ #include #include "regs.h" -#include "util.h" #define MT_CALIBRATE_INTERVAL (4 * HZ) @@ -299,7 +299,7 @@ bool mt76_poll_msec(struct mt7601u_dev *dev, u32 offset, u32 mask, u32 val, /* Compatibility with mt76 */ #define mt76_rmw_field(_dev, _reg, _field, _val) \ - mt76_rmw(_dev, _reg, _field, MT76_SET(_field, _val)) + mt76_rmw(_dev, _reg, _field, FIELD_PREP(_field, _val)) static inline u32 mt76_rr(struct mt7601u_dev *dev, u32 offset) { diff --git a/drivers/net/wireless/mediatek/mt7601u/phy.c b/drivers/net/wireless/mediatek/mt7601u/phy.c index 1908af6add87..ca09a5d4305e 100644 --- a/drivers/net/wireless/mediatek/mt7601u/phy.c +++ b/drivers/net/wireless/mediatek/mt7601u/phy.c @@ -41,11 +41,12 @@ mt7601u_rf_wr(struct mt7601u_dev *dev, u8 bank, u8 offset, u8 value) goto out; } - mt7601u_wr(dev, MT_RF_CSR_CFG, MT76_SET(MT_RF_CSR_CFG_DATA, value) | - MT76_SET(MT_RF_CSR_CFG_REG_BANK, bank) | - MT76_SET(MT_RF_CSR_CFG_REG_ID, offset) | - MT_RF_CSR_CFG_WR | - MT_RF_CSR_CFG_KICK); + mt7601u_wr(dev, MT_RF_CSR_CFG, + FIELD_PREP(MT_RF_CSR_CFG_DATA, value) | + FIELD_PREP(MT_RF_CSR_CFG_REG_BANK, bank) | + FIELD_PREP(MT_RF_CSR_CFG_REG_ID, offset) | + MT_RF_CSR_CFG_WR | + MT_RF_CSR_CFG_KICK); trace_rf_write(dev, bank, offset, value); out: mutex_unlock(&dev->reg_atomic_mutex); @@ -74,17 +75,18 @@ mt7601u_rf_rr(struct mt7601u_dev *dev, u8 bank, u8 offset) if (!mt76_poll(dev, MT_RF_CSR_CFG, MT_RF_CSR_CFG_KICK, 0, 100)) goto out; - mt7601u_wr(dev, MT_RF_CSR_CFG, MT76_SET(MT_RF_CSR_CFG_REG_BANK, bank) | - MT76_SET(MT_RF_CSR_CFG_REG_ID, offset) | - MT_RF_CSR_CFG_KICK); + mt7601u_wr(dev, MT_RF_CSR_CFG, + FIELD_PREP(MT_RF_CSR_CFG_REG_BANK, bank) | + FIELD_PREP(MT_RF_CSR_CFG_REG_ID, offset) | + MT_RF_CSR_CFG_KICK); if (!mt76_poll(dev, MT_RF_CSR_CFG, MT_RF_CSR_CFG_KICK, 0, 100)) goto out; val = mt7601u_rr(dev, MT_RF_CSR_CFG); - if (MT76_GET(MT_RF_CSR_CFG_REG_ID, val) == offset && - MT76_GET(MT_RF_CSR_CFG_REG_BANK, val) == bank) { - ret = MT76_GET(MT_RF_CSR_CFG_DATA, val); + if (FIELD_GET(MT_RF_CSR_CFG_REG_ID, val) == offset && + FIELD_GET(MT_RF_CSR_CFG_REG_BANK, val) == bank) { + ret = FIELD_GET(MT_RF_CSR_CFG_DATA, val); trace_rf_read(dev, bank, offset, ret); } out: @@ -139,8 +141,8 @@ static void mt7601u_bbp_wr(struct mt7601u_dev *dev, u8 offset, u8 val) } mt7601u_wr(dev, MT_BBP_CSR_CFG, - MT76_SET(MT_BBP_CSR_CFG_VAL, val) | - MT76_SET(MT_BBP_CSR_CFG_REG_NUM, offset) | + FIELD_PREP(MT_BBP_CSR_CFG_VAL, val) | + FIELD_PREP(MT_BBP_CSR_CFG_REG_NUM, offset) | MT_BBP_CSR_CFG_RW_MODE | MT_BBP_CSR_CFG_BUSY); trace_bbp_write(dev, offset, val); out: @@ -163,7 +165,7 @@ static int mt7601u_bbp_rr(struct mt7601u_dev *dev, u8 offset) goto out; mt7601u_wr(dev, MT_BBP_CSR_CFG, - MT76_SET(MT_BBP_CSR_CFG_REG_NUM, offset) | + FIELD_PREP(MT_BBP_CSR_CFG_REG_NUM, offset) | MT_BBP_CSR_CFG_RW_MODE | MT_BBP_CSR_CFG_BUSY | MT_BBP_CSR_CFG_READ); @@ -171,8 +173,8 @@ static int mt7601u_bbp_rr(struct mt7601u_dev *dev, u8 offset) goto out; val = mt7601u_rr(dev, MT_BBP_CSR_CFG); - if (MT76_GET(MT_BBP_CSR_CFG_REG_NUM, val) == offset) { - ret = MT76_GET(MT_BBP_CSR_CFG_VAL, val); + if (FIELD_GET(MT_BBP_CSR_CFG_REG_NUM, val) == offset) { + ret = FIELD_GET(MT_BBP_CSR_CFG_VAL, val); trace_bbp_read(dev, offset, ret); } out: @@ -249,9 +251,9 @@ int mt7601u_phy_get_rssi(struct mt7601u_dev *dev, /* bw40 */ { -2, 16, 34 } } }; - int bw = MT76_GET(MT_RXWI_RATE_BW, rate); - int aux_lna = MT76_GET(MT_RXWI_ANT_AUX_LNA, rxwi->ant); - int lna_id = MT76_GET(MT_RXWI_GAIN_RSSI_LNA_ID, rxwi->gain); + int bw = FIELD_GET(MT_RXWI_RATE_BW, rate); + int aux_lna = FIELD_GET(MT_RXWI_ANT_AUX_LNA, rxwi->ant); + int lna_id = FIELD_GET(MT_RXWI_GAIN_RSSI_LNA_ID, rxwi->gain); int val; if (lna_id) /* LNA id can be 0, 2, 3. */ @@ -259,7 +261,7 @@ int mt7601u_phy_get_rssi(struct mt7601u_dev *dev, val = 8; val -= lna[aux_lna][bw][lna_id]; - val -= MT76_GET(MT_RXWI_GAIN_RSSI_VAL, rxwi->gain); + val -= FIELD_GET(MT_RXWI_GAIN_RSSI_VAL, rxwi->gain); val -= dev->ee->lna_gain; val -= dev->ee->rssi_offset[0]; @@ -939,7 +941,7 @@ static int mt7601u_tssi_cal(struct mt7601u_dev *dev) dev_dbg(dev->dev, "final diff: %08x\n", diff_pwr); val = mt7601u_rr(dev, MT_TX_ALC_CFG_1); - curr_pwr = s6_to_int(MT76_GET(MT_TX_ALC_CFG_1_TEMP_COMP, val)); + curr_pwr = s6_to_int(FIELD_GET(MT_TX_ALC_CFG_1_TEMP_COMP, val)); diff_pwr += curr_pwr; val = (val & ~MT_TX_ALC_CFG_1_TEMP_COMP) | int_to_s6(diff_pwr); mt7601u_wr(dev, MT_TX_ALC_CFG_1, val); diff --git a/drivers/net/wireless/mediatek/mt7601u/tx.c b/drivers/net/wireless/mediatek/mt7601u/tx.c index a0a33dc8f6bc..ad77bec1ba0f 100644 --- a/drivers/net/wireless/mediatek/mt7601u/tx.c +++ b/drivers/net/wireless/mediatek/mt7601u/tx.c @@ -175,11 +175,12 @@ mt7601u_push_txwi(struct mt7601u_dev *dev, struct sk_buff *skb, ba_size = min_t(int, 63, ba_size); if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) ba_size = 0; - txwi->ack_ctl |= MT76_SET(MT_TXWI_ACK_CTL_BA_WINDOW, ba_size); + txwi->ack_ctl |= FIELD_PREP(MT_TXWI_ACK_CTL_BA_WINDOW, ba_size); - txwi->flags = cpu_to_le16(MT_TXWI_FLAGS_AMPDU | - MT76_SET(MT_TXWI_FLAGS_MPDU_DENSITY, - sta->ht_cap.ampdu_density)); + txwi->flags = + cpu_to_le16(MT_TXWI_FLAGS_AMPDU | + FIELD_PREP(MT_TXWI_FLAGS_MPDU_DENSITY, + sta->ht_cap.ampdu_density)); if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) txwi->flags = 0; } @@ -188,7 +189,7 @@ mt7601u_push_txwi(struct mt7601u_dev *dev, struct sk_buff *skb, is_probe = !!(info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE); pkt_id = mt7601u_tx_pktid_enc(dev, rate_ctl & 0x7, is_probe); - pkt_len |= MT76_SET(MT_TXWI_LEN_PKTID, pkt_id); + pkt_len |= FIELD_PREP(MT_TXWI_LEN_PKTID, pkt_id); txwi->len_ctl = cpu_to_le16(pkt_len); return txwi; @@ -285,9 +286,9 @@ int mt7601u_conf_tx(struct ieee80211_hw *hw, struct ieee80211_vif *vif, WARN_ON(cw_min > 0xf); WARN_ON(cw_max > 0xf); - val = MT76_SET(MT_EDCA_CFG_AIFSN, params->aifs) | - MT76_SET(MT_EDCA_CFG_CWMIN, cw_min) | - MT76_SET(MT_EDCA_CFG_CWMAX, cw_max); + val = FIELD_PREP(MT_EDCA_CFG_AIFSN, params->aifs) | + FIELD_PREP(MT_EDCA_CFG_CWMIN, cw_min) | + FIELD_PREP(MT_EDCA_CFG_CWMAX, cw_max); /* TODO: based on user-controlled EnableTxBurst var vendor drv sets * a really long txop on AC0 (see connect.c:2009) but only on * connect? When not connected should be 0. @@ -295,7 +296,7 @@ int mt7601u_conf_tx(struct ieee80211_hw *hw, struct ieee80211_vif *vif, if (!hw_q) val |= 0x60; else - val |= MT76_SET(MT_EDCA_CFG_TXOP, params->txop); + val |= FIELD_PREP(MT_EDCA_CFG_TXOP, params->txop); mt76_wr(dev, MT_EDCA_CFG_AC(hw_q), val); val = mt76_rr(dev, MT_WMM_TXOP(hw_q)); diff --git a/drivers/net/wireless/mediatek/mt7601u/util.h b/drivers/net/wireless/mediatek/mt7601u/util.h deleted file mode 100644 index b89140bf1210..000000000000 --- a/drivers/net/wireless/mediatek/mt7601u/util.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (C) 2014 Felix Fietkau - * Copyright (C) 2004 - 2009 Ivo van Doorn - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#ifndef __MT76_UTIL_H -#define __MT76_UTIL_H - -/* - * Power of two check, this will check - * if the mask that has been given contains and contiguous set of bits. - * Note that we cannot use the is_power_of_2() function since this - * check must be done at compile-time. - */ -#define is_power_of_two(x) ( !((x) & ((x)-1)) ) -#define low_bit_mask(x) ( ((x)-1) & ~(x) ) -#define is_valid_mask(x) is_power_of_two(1LU + (x) + low_bit_mask(x)) - -/* - * Macros to find first set bit in a variable. - * These macros behave the same as the __ffs() functions but - * the most important difference that this is done during - * compile-time rather then run-time. - */ -#define compile_ffs2(__x) \ - __builtin_choose_expr(((__x) & 0x1), 0, 1) - -#define compile_ffs4(__x) \ - __builtin_choose_expr(((__x) & 0x3), \ - (compile_ffs2((__x))), \ - (compile_ffs2((__x) >> 2) + 2)) - -#define compile_ffs8(__x) \ - __builtin_choose_expr(((__x) & 0xf), \ - (compile_ffs4((__x))), \ - (compile_ffs4((__x) >> 4) + 4)) - -#define compile_ffs16(__x) \ - __builtin_choose_expr(((__x) & 0xff), \ - (compile_ffs8((__x))), \ - (compile_ffs8((__x) >> 8) + 8)) - -#define compile_ffs32(__x) \ - __builtin_choose_expr(((__x) & 0xffff), \ - (compile_ffs16((__x))), \ - (compile_ffs16((__x) >> 16) + 16)) - -/* - * This macro will check the requirements for the FIELD{8,16,32} macros - * The mask should be a constant non-zero contiguous set of bits which - * does not exceed the given typelimit. - */ -#define FIELD_CHECK(__mask) \ - BUILD_BUG_ON(!(__mask) || !is_valid_mask(__mask)) - -#define MT76_SET(_mask, _val) \ - ({ \ - FIELD_CHECK(_mask); \ - (((u32) (_val)) << compile_ffs32(_mask)) & _mask; \ - }) - -#define MT76_GET(_mask, _val) \ - ({ \ - FIELD_CHECK(_mask); \ - (u32) (((_val) & _mask) >> compile_ffs32(_mask)); \ - }) - -#endif From 634faf3686900ccdee87b77e2c56df8b2159912b Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Mon, 5 Sep 2016 11:42:12 +0100 Subject: [PATCH 0836/1715] brcmfmac: add support for bcm4339 chip with modalias sdio:c00v02D0d4339 The driver already supports the bcm4339 chipset but only for the variant that shares the same modalias as the bcm4335, ie. sdio:c00v02D0d4335. It turns out that there are also bcm4339 devices out there that have a more distiguishable modalias sdio:c00v02D0d4339. Reported-by: Steve deRosier Reviewed-by: Hante Meuleman Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Franky Lin Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c | 1 + drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c | 3 ++- include/linux/mmc/sdio_ids.h | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c index f549c25608d6..03404cbe9237 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c @@ -1101,6 +1101,7 @@ static const struct sdio_device_id brcmf_sdmmc_ids[] = { BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43341), BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43362), BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_4335_4339), + BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_4339), BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43430), BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_4345), BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_4354), diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index 68ab3ac15650..589a49cd9cd5 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c @@ -3757,7 +3757,8 @@ static u32 brcmf_sdio_buscore_read32(void *ctx, u32 addr) u32 val, rev; val = brcmf_sdiod_regrl(sdiodev, addr, NULL); - if (sdiodev->func[0]->device == SDIO_DEVICE_ID_BROADCOM_4335_4339 && + if ((sdiodev->func[0]->device == SDIO_DEVICE_ID_BROADCOM_4335_4339 || + sdiodev->func[0]->device == SDIO_DEVICE_ID_BROADCOM_4339) && addr == CORE_CC_REG(SI_ENUM_BASE, chipid)) { rev = (val & CID_REV_MASK) >> CID_REV_SHIFT; if (rev >= 2) { diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h index 0d126aeb3ec0..d43ef96bf075 100644 --- a/include/linux/mmc/sdio_ids.h +++ b/include/linux/mmc/sdio_ids.h @@ -32,6 +32,7 @@ #define SDIO_DEVICE_ID_BROADCOM_43340 0xa94c #define SDIO_DEVICE_ID_BROADCOM_43341 0xa94d #define SDIO_DEVICE_ID_BROADCOM_4335_4339 0x4335 +#define SDIO_DEVICE_ID_BROADCOM_4339 0x4339 #define SDIO_DEVICE_ID_BROADCOM_43362 0xa962 #define SDIO_DEVICE_ID_BROADCOM_43430 0xa9a6 #define SDIO_DEVICE_ID_BROADCOM_4345 0x4345 From 5251b6be8bb5c5675bdf12347c7b83937a5c91e5 Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Mon, 5 Sep 2016 11:42:13 +0100 Subject: [PATCH 0837/1715] brcmfmac: sdio: shorten retry loop in brcmf_sdio_kso_control() In brcmf_sdio_kso_control() there is a retry loop as hardware may take time to settle. However, when the call to brcmf_sdiod_regrb() returns an error it is due to SDIO access failure and it makes no sense to wait for hardware to settle. This patch aborts the loop after a number of subsequent access errors. Reviewed-by: Hante Meuleman Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Franky Lin Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- .../net/wireless/broadcom/brcm80211/brcmfmac/sdio.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index 589a49cd9cd5..b892dac70f4b 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c @@ -313,6 +313,7 @@ struct rte_console { #define KSO_WAIT_US 50 #define MAX_KSO_ATTEMPTS (PMU_MAX_TRANSITION_DLY/KSO_WAIT_US) +#define BRCMF_SDIO_MAX_ACCESS_ERRORS 5 /* * Conversion of 802.1D priority to precedence level @@ -677,6 +678,7 @@ brcmf_sdio_kso_control(struct brcmf_sdio *bus, bool on) { u8 wr_val = 0, rd_val, cmp_val, bmask; int err = 0; + int err_cnt = 0; int try_cnt = 0; brcmf_dbg(TRACE, "Enter: on=%d\n", on); @@ -712,9 +714,14 @@ brcmf_sdio_kso_control(struct brcmf_sdio *bus, bool on) */ rd_val = brcmf_sdiod_regrb(bus->sdiodev, SBSDIO_FUNC1_SLEEPCSR, &err); - if (((rd_val & bmask) == cmp_val) && !err) + if (!err) { + if ((rd_val & bmask) == cmp_val) + break; + err_cnt = 0; + } + /* bail out upon subsequent access errors */ + if (err && (err_cnt++ > BRCMF_SDIO_MAX_ACCESS_ERRORS)) break; - udelay(KSO_WAIT_US); brcmf_sdiod_regwb(bus->sdiodev, SBSDIO_FUNC1_SLEEPCSR, wr_val, &err); From 4c5dae59d2e9386c706a2f3c7c2746ae277bf568 Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Tue, 26 Jul 2016 19:31:44 +0530 Subject: [PATCH 0838/1715] mwifiex: add PCIe function level reset support This patch implements pre and post FLR handlers to support PCIe FLR functionality. Software cleanup is performed in pre-FLR whereas firmware is downloaded and software is re-initialised in post-FLR handler. Following command triggers FLR. echo "1" > /sys/bus/pci/devices/$NUMBER/reset This feature can be used as a recovery mechanism when firmware gets hang. Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/main.c | 242 ++++++++++++++++++-- drivers/net/wireless/marvell/mwifiex/main.h | 3 + drivers/net/wireless/marvell/mwifiex/pcie.c | 136 ++++++++++- drivers/net/wireless/marvell/mwifiex/pcie.h | 1 + 4 files changed, 365 insertions(+), 17 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/main.c b/drivers/net/wireless/marvell/mwifiex/main.c index 029e5da1e6fd..9b2e98cfe090 100644 --- a/drivers/net/wireless/marvell/mwifiex/main.c +++ b/drivers/net/wireless/marvell/mwifiex/main.c @@ -493,9 +493,11 @@ static void mwifiex_free_adapter(struct mwifiex_adapter *adapter) */ static void mwifiex_terminate_workqueue(struct mwifiex_adapter *adapter) { - flush_workqueue(adapter->workqueue); - destroy_workqueue(adapter->workqueue); - adapter->workqueue = NULL; + if (adapter->workqueue) { + flush_workqueue(adapter->workqueue); + destroy_workqueue(adapter->workqueue); + adapter->workqueue = NULL; + } if (adapter->rx_workqueue) { flush_workqueue(adapter->rx_workqueue); @@ -574,10 +576,13 @@ static void mwifiex_fw_dpc(const struct firmware *firmware, void *context) } priv = adapter->priv[MWIFIEX_BSS_ROLE_STA]; - if (mwifiex_register_cfg80211(adapter)) { - mwifiex_dbg(adapter, ERROR, - "cannot register with cfg80211\n"); - goto err_init_fw; + + if (!adapter->wiphy) { + if (mwifiex_register_cfg80211(adapter)) { + mwifiex_dbg(adapter, ERROR, + "cannot register with cfg80211\n"); + goto err_init_fw; + } } if (mwifiex_init_channel_scan_gap(adapter)) { @@ -671,7 +676,8 @@ done: /* * This function initializes the hardware and gets firmware. */ -static int mwifiex_init_hw_fw(struct mwifiex_adapter *adapter) +static int mwifiex_init_hw_fw(struct mwifiex_adapter *adapter, + bool req_fw_nowait) { int ret; @@ -686,12 +692,25 @@ static int mwifiex_init_hw_fw(struct mwifiex_adapter *adapter) return -1; } } - ret = request_firmware_nowait(THIS_MODULE, 1, adapter->fw_name, - adapter->dev, GFP_KERNEL, adapter, - mwifiex_fw_dpc); - if (ret < 0) - mwifiex_dbg(adapter, ERROR, - "request_firmware_nowait error %d\n", ret); + + if (req_fw_nowait) { + ret = request_firmware_nowait(THIS_MODULE, 1, adapter->fw_name, + adapter->dev, GFP_KERNEL, adapter, + mwifiex_fw_dpc); + if (ret < 0) + mwifiex_dbg(adapter, ERROR, + "request_firmware_nowait error %d\n", ret); + } else { + ret = request_firmware(&adapter->firmware, + adapter->fw_name, + adapter->dev); + if (ret < 0) + mwifiex_dbg(adapter, ERROR, + "request_firmware error %d\n", ret); + else + mwifiex_fw_dpc(adapter->firmware, (void *)adapter); + } + return ret; } @@ -1340,6 +1359,199 @@ static void mwifiex_main_work_queue(struct work_struct *work) mwifiex_main_process(adapter); } +/* + * This function gets called during PCIe function level reset. Required + * code is extracted from mwifiex_remove_card() + */ +static int +mwifiex_shutdown_sw(struct mwifiex_adapter *adapter, struct semaphore *sem) +{ + struct mwifiex_private *priv; + int i; + + if (down_interruptible(sem)) + goto exit_sem_err; + + if (!adapter) + goto exit_remove; + + priv = mwifiex_get_priv(adapter, MWIFIEX_BSS_ROLE_ANY); + mwifiex_deauthenticate(priv, NULL); + + /* We can no longer handle interrupts once we start doing the teardown + * below. + */ + if (adapter->if_ops.disable_int) + adapter->if_ops.disable_int(adapter); + + adapter->surprise_removed = true; + mwifiex_terminate_workqueue(adapter); + + /* Stop data */ + for (i = 0; i < adapter->priv_num; i++) { + priv = adapter->priv[i]; + if (priv && priv->netdev) { + mwifiex_stop_net_dev_queue(priv->netdev, adapter); + if (netif_carrier_ok(priv->netdev)) + netif_carrier_off(priv->netdev); + netif_device_detach(priv->netdev); + } + } + + mwifiex_dbg(adapter, CMD, "cmd: calling mwifiex_shutdown_drv...\n"); + adapter->init_wait_q_woken = false; + + if (mwifiex_shutdown_drv(adapter) == -EINPROGRESS) + wait_event_interruptible(adapter->init_wait_q, + adapter->init_wait_q_woken); + if (adapter->if_ops.down_dev) + adapter->if_ops.down_dev(adapter); + + mwifiex_dbg(adapter, CMD, "cmd: mwifiex_shutdown_drv done\n"); + if (atomic_read(&adapter->rx_pending) || + atomic_read(&adapter->tx_pending) || + atomic_read(&adapter->cmd_pending)) { + mwifiex_dbg(adapter, ERROR, + "rx_pending=%d, tx_pending=%d,\t" + "cmd_pending=%d\n", + atomic_read(&adapter->rx_pending), + atomic_read(&adapter->tx_pending), + atomic_read(&adapter->cmd_pending)); + } + + for (i = 0; i < adapter->priv_num; i++) { + priv = adapter->priv[i]; + if (!priv) + continue; + rtnl_lock(); + if (priv->netdev && + priv->wdev.iftype != NL80211_IFTYPE_UNSPECIFIED) + mwifiex_del_virtual_intf(adapter->wiphy, &priv->wdev); + rtnl_unlock(); + } + +exit_remove: + up(sem); +exit_sem_err: + mwifiex_dbg(adapter, INFO, "%s, successful\n", __func__); + return 0; +} + +/* This function gets called during PCIe function level reset. Required + * code is extracted from mwifiex_add_card() + */ +static int +mwifiex_reinit_sw(struct mwifiex_adapter *adapter, struct semaphore *sem, + struct mwifiex_if_ops *if_ops, u8 iface_type) +{ + char fw_name[32]; + struct pcie_service_card *card = adapter->card; + + if (down_interruptible(sem)) + goto exit_sem_err; + + mwifiex_init_lock_list(adapter); + if (adapter->if_ops.up_dev) + adapter->if_ops.up_dev(adapter); + + adapter->iface_type = iface_type; + adapter->card_sem = sem; + + adapter->hw_status = MWIFIEX_HW_STATUS_INITIALIZING; + adapter->surprise_removed = false; + init_waitqueue_head(&adapter->init_wait_q); + adapter->is_suspended = false; + adapter->hs_activated = false; + init_waitqueue_head(&adapter->hs_activate_wait_q); + init_waitqueue_head(&adapter->cmd_wait_q.wait); + adapter->cmd_wait_q.status = 0; + adapter->scan_wait_q_woken = false; + + if ((num_possible_cpus() > 1) || adapter->iface_type == MWIFIEX_USB) + adapter->rx_work_enabled = true; + + adapter->workqueue = + alloc_workqueue("MWIFIEX_WORK_QUEUE", + WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_UNBOUND, 1); + if (!adapter->workqueue) + goto err_kmalloc; + + INIT_WORK(&adapter->main_work, mwifiex_main_work_queue); + + if (adapter->rx_work_enabled) { + adapter->rx_workqueue = alloc_workqueue("MWIFIEX_RX_WORK_QUEUE", + WQ_HIGHPRI | + WQ_MEM_RECLAIM | + WQ_UNBOUND, 1); + if (!adapter->rx_workqueue) + goto err_kmalloc; + INIT_WORK(&adapter->rx_work, mwifiex_rx_work_queue); + } + + /* Register the device. Fill up the private data structure with + * relevant information from the card. Some code extracted from + * mwifiex_register_dev() + */ + mwifiex_dbg(adapter, INFO, "%s, mwifiex_init_hw_fw()...\n", __func__); + strcpy(fw_name, adapter->fw_name); + strcpy(adapter->fw_name, PCIE8997_DEFAULT_WIFIFW_NAME); + + adapter->tx_buf_size = card->pcie.tx_buf_size; + adapter->ext_scan = card->pcie.can_ext_scan; + if (mwifiex_init_hw_fw(adapter, false)) { + strcpy(adapter->fw_name, fw_name); + mwifiex_dbg(adapter, ERROR, + "%s: firmware init failed\n", __func__); + goto err_init_fw; + } + strcpy(adapter->fw_name, fw_name); + mwifiex_dbg(adapter, INFO, "%s, successful\n", __func__); + up(sem); + return 0; + +err_init_fw: + mwifiex_dbg(adapter, ERROR, "info: %s: unregister device\n", __func__); + if (adapter->if_ops.unregister_dev) + adapter->if_ops.unregister_dev(adapter); + if (adapter->hw_status == MWIFIEX_HW_STATUS_READY) { + mwifiex_dbg(adapter, ERROR, + "info: %s: shutdown mwifiex\n", __func__); + adapter->init_wait_q_woken = false; + + if (mwifiex_shutdown_drv(adapter) == -EINPROGRESS) + wait_event_interruptible(adapter->init_wait_q, + adapter->init_wait_q_woken); + } + +err_kmalloc: + mwifiex_terminate_workqueue(adapter); + adapter->surprise_removed = true; + up(sem); +exit_sem_err: + mwifiex_dbg(adapter, INFO, "%s, error\n", __func__); + + return -1; +} + +/* This function processes pre and post PCIe function level resets. + * It performs software cleanup without touching PCIe specific code. + * Also, during initialization PCIe stuff is skipped. + */ +void mwifiex_do_flr(struct mwifiex_adapter *adapter, bool prepare) +{ + struct mwifiex_if_ops if_ops; + + if (!prepare) { + mwifiex_reinit_sw(adapter, adapter->card_sem, &if_ops, + adapter->iface_type); + } else { + memcpy(&if_ops, &adapter->if_ops, + sizeof(struct mwifiex_if_ops)); + mwifiex_shutdown_sw(adapter, adapter->card_sem); + } +} +EXPORT_SYMBOL_GPL(mwifiex_do_flr); + /* * This function adds the card. * @@ -1411,7 +1623,7 @@ mwifiex_add_card(void *card, struct semaphore *sem, goto err_registerdev; } - if (mwifiex_init_hw_fw(adapter)) { + if (mwifiex_init_hw_fw(adapter, true)) { pr_err("%s: firmware init failed\n", __func__); goto err_init_fw; } diff --git a/drivers/net/wireless/marvell/mwifiex/main.h b/drivers/net/wireless/marvell/mwifiex/main.h index 17221c4f681f..26df28f4bfb2 100644 --- a/drivers/net/wireless/marvell/mwifiex/main.h +++ b/drivers/net/wireless/marvell/mwifiex/main.h @@ -829,6 +829,8 @@ struct mwifiex_if_ops { void (*deaggr_pkt)(struct mwifiex_adapter *, struct sk_buff *); void (*multi_port_resync)(struct mwifiex_adapter *); bool (*is_port_ready)(struct mwifiex_private *); + void (*down_dev)(struct mwifiex_adapter *); + void (*up_dev)(struct mwifiex_adapter *); }; struct mwifiex_adapter { @@ -1629,4 +1631,5 @@ void mwifiex_debugfs_remove(void); void mwifiex_dev_debugfs_init(struct mwifiex_private *priv); void mwifiex_dev_debugfs_remove(struct mwifiex_private *priv); #endif +void mwifiex_do_flr(struct mwifiex_adapter *adapter, bool prepare); #endif /* !_MWIFIEX_MAIN_H_ */ diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c index c1b3e8ef34ec..e4ef90e222d9 100644 --- a/drivers/net/wireless/marvell/mwifiex/pcie.c +++ b/drivers/net/wireless/marvell/mwifiex/pcie.c @@ -277,6 +277,52 @@ static const struct pci_device_id mwifiex_ids[] = { MODULE_DEVICE_TABLE(pci, mwifiex_ids); +static void mwifiex_pcie_reset_notify(struct pci_dev *pdev, bool prepare) +{ + struct mwifiex_adapter *adapter; + struct pcie_service_card *card; + + if (!pdev) { + pr_err("%s: PCIe device is not specified\n", __func__); + return; + } + + card = (struct pcie_service_card *)pci_get_drvdata(pdev); + if (!card || !card->adapter) { + pr_err("%s: Card or adapter structure is not valid (%ld)\n", + __func__, (long)card); + return; + } + + adapter = card->adapter; + mwifiex_dbg(adapter, INFO, + "%s: vendor=0x%4.04x device=0x%4.04x rev=%d %s\n", + __func__, pdev->vendor, pdev->device, + pdev->revision, + prepare ? "Pre-FLR" : "Post-FLR"); + + if (prepare) { + /* Kernel would be performing FLR after this notification. + * Cleanup all software without cleaning anything related to + * PCIe and HW. + */ + mwifiex_do_flr(adapter, prepare); + adapter->surprise_removed = true; + } else { + /* Kernel stores and restores PCIe function context before and + * after performing FLR respectively. Reconfigure the software + * and firmware including firmware redownload + */ + adapter->surprise_removed = false; + mwifiex_do_flr(adapter, prepare); + } + mwifiex_dbg(adapter, INFO, "%s, successful\n", __func__); +} + +static const struct pci_error_handlers mwifiex_pcie_err_handler[] = { + { .reset_notify = mwifiex_pcie_reset_notify, }, +}; + #ifdef CONFIG_PM_SLEEP /* Power Management Hooks */ static SIMPLE_DEV_PM_OPS(mwifiex_pcie_pm_ops, mwifiex_pcie_suspend, @@ -295,6 +341,7 @@ static struct pci_driver __refdata mwifiex_pcie = { }, #endif .shutdown = mwifiex_pcie_shutdown, + .err_handler = mwifiex_pcie_err_handler, }; /* @@ -2953,7 +3000,6 @@ static int mwifiex_register_dev(struct mwifiex_adapter *adapter) static void mwifiex_unregister_dev(struct mwifiex_adapter *adapter) { struct pcie_service_card *card = adapter->card; - const struct mwifiex_pcie_card_reg *reg; struct pci_dev *pdev; int i; @@ -2977,8 +3023,90 @@ static void mwifiex_unregister_dev(struct mwifiex_adapter *adapter) if (card->msi_enable) pci_disable_msi(pdev); } + } +} - reg = card->pcie.reg; +/* This function initializes the PCI-E host memory space, WCB rings, etc. + * + * The following initializations steps are followed - + * - Allocate TXBD ring buffers + * - Allocate RXBD ring buffers + * - Allocate event BD ring buffers + * - Allocate command response ring buffer + * - Allocate sleep cookie buffer + * Part of mwifiex_pcie_init(), not reset the PCIE registers + */ +static void mwifiex_pcie_up_dev(struct mwifiex_adapter *adapter) +{ + struct pcie_service_card *card = adapter->card; + int ret; + struct pci_dev *pdev = card->dev; + const struct mwifiex_pcie_card_reg *reg = card->pcie.reg; + + card->cmdrsp_buf = NULL; + ret = mwifiex_pcie_create_txbd_ring(adapter); + if (ret) { + mwifiex_dbg(adapter, ERROR, "Failed to create txbd ring\n"); + goto err_cre_txbd; + } + + ret = mwifiex_pcie_create_rxbd_ring(adapter); + if (ret) { + mwifiex_dbg(adapter, ERROR, "Failed to create rxbd ring\n"); + goto err_cre_rxbd; + } + + ret = mwifiex_pcie_create_evtbd_ring(adapter); + if (ret) { + mwifiex_dbg(adapter, ERROR, "Failed to create evtbd ring\n"); + goto err_cre_evtbd; + } + + ret = mwifiex_pcie_alloc_cmdrsp_buf(adapter); + if (ret) { + mwifiex_dbg(adapter, ERROR, "Failed to allocate cmdbuf buffer\n"); + goto err_alloc_cmdbuf; + } + + if (reg->sleep_cookie) { + ret = mwifiex_pcie_alloc_sleep_cookie_buf(adapter); + if (ret) { + mwifiex_dbg(adapter, ERROR, "Failed to allocate sleep_cookie buffer\n"); + goto err_alloc_cookie; + } + } else { + card->sleep_cookie_vbase = NULL; + } + return; + +err_alloc_cookie: + mwifiex_pcie_delete_cmdrsp_buf(adapter); +err_alloc_cmdbuf: + mwifiex_pcie_delete_evtbd_ring(adapter); +err_cre_evtbd: + mwifiex_pcie_delete_rxbd_ring(adapter); +err_cre_rxbd: + mwifiex_pcie_delete_txbd_ring(adapter); +err_cre_txbd: + pci_iounmap(pdev, card->pci_mmap1); +} + +/* This function cleans up the PCI-E host memory space. + * Some code is extracted from mwifiex_unregister_dev() + * + */ +static void mwifiex_pcie_down_dev(struct mwifiex_adapter *adapter) +{ + struct pcie_service_card *card = adapter->card; + const struct mwifiex_pcie_card_reg *reg = card->pcie.reg; + + if (mwifiex_write_reg(adapter, reg->drv_rdy, 0x00000000)) + mwifiex_dbg(adapter, ERROR, "Failed to write driver not-ready signature\n"); + + adapter->seq_num = 0; + adapter->tx_buf_size = MWIFIEX_TX_DATA_BUF_SIZE_4K; + + if (card) { if (reg->sleep_cookie) mwifiex_pcie_delete_sleep_cookie_buf(adapter); @@ -2988,6 +3116,8 @@ static void mwifiex_unregister_dev(struct mwifiex_adapter *adapter) mwifiex_pcie_delete_txbd_ring(adapter); card->cmdrsp_buf = NULL; } + + return; } static struct mwifiex_if_ops pcie_ops = { @@ -3014,6 +3144,8 @@ static struct mwifiex_if_ops pcie_ops = { .clean_pcie_ring = mwifiex_clean_pcie_ring_buf, .reg_dump = mwifiex_pcie_reg_dump, .device_dump = mwifiex_pcie_device_dump, + .down_dev = mwifiex_pcie_down_dev, + .up_dev = mwifiex_pcie_up_dev, }; /* diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.h b/drivers/net/wireless/marvell/mwifiex/pcie.h index f05061cea5cd..f6992f0dbf17 100644 --- a/drivers/net/wireless/marvell/mwifiex/pcie.h +++ b/drivers/net/wireless/marvell/mwifiex/pcie.h @@ -37,6 +37,7 @@ #define PCIEUART8997_FW_NAME_V2 "mrvl/pcieuart8997_combo_v2.bin" #define PCIEUSB8997_FW_NAME_Z "mrvl/pcieusb8997_combo.bin" #define PCIEUSB8997_FW_NAME_V2 "mrvl/pcieusb8997_combo_v2.bin" +#define PCIE8997_DEFAULT_WIFIFW_NAME "mrvl/pcie8997_wlan.bin" #define PCIE_VENDOR_ID_MARVELL (0x11ab) #define PCIE_VENDOR_ID_V2_MARVELL (0x1b4b) From b711657616947e7b4c15f6825d259324216b23f2 Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Wed, 31 Aug 2016 13:50:59 +0200 Subject: [PATCH 0839/1715] mwifiex: scan: Simplify code This patch: - improves code layout - removes a useless memset(0) for some memory allocated with kzalloc - removes a useless if. We know that 'if (chan_band_tlv)' will succeed because it has been tested a few lines above Signed-off-by: Christophe JAILLET Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/scan.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/scan.c b/drivers/net/wireless/marvell/mwifiex/scan.c index 8daf0d8657b9..97c9765b5bc6 100644 --- a/drivers/net/wireless/marvell/mwifiex/scan.c +++ b/drivers/net/wireless/marvell/mwifiex/scan.c @@ -2194,18 +2194,14 @@ int mwifiex_ret_802_11_scan(struct mwifiex_private *priv, if (chan_band_tlv && adapter->nd_info) { adapter->nd_info->matches[idx] = - kzalloc(sizeof(*pmatch) + - sizeof(u32), GFP_ATOMIC); + kzalloc(sizeof(*pmatch) + sizeof(u32), + GFP_ATOMIC); pmatch = adapter->nd_info->matches[idx]; if (pmatch) { - memset(pmatch, 0, sizeof(*pmatch)); - if (chan_band_tlv) { - pmatch->n_channels = 1; - pmatch->channels[0] = - chan_band->chan_number; - } + pmatch->n_channels = 1; + pmatch->channels[0] = chan_band->chan_number; } } From 75696fe704774039e0e2ca65be24d79739ed206d Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Fri, 29 Jul 2016 16:08:51 +0530 Subject: [PATCH 0840/1715] mwifiex: PCIe8997 chip specific handling The patch corrects the revision id register and uses it along with magic value and chip version registers to download appropriate firmware image. PCIe8997 Z chipset variant code has been removed, as it won't be used in production. Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/pcie.c | 35 +++++++-------------- drivers/net/wireless/marvell/mwifiex/pcie.h | 14 ++++----- 2 files changed, 18 insertions(+), 31 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c index e4ef90e222d9..3c3c4f197da8 100644 --- a/drivers/net/wireless/marvell/mwifiex/pcie.c +++ b/drivers/net/wireless/marvell/mwifiex/pcie.c @@ -2911,7 +2911,7 @@ static int mwifiex_pcie_request_irq(struct mwifiex_adapter *adapter) static void mwifiex_pcie_get_fw_name(struct mwifiex_adapter *adapter) { int revision_id = 0; - int version; + int version, magic; struct pcie_service_card *card = adapter->card; switch (card->dev->device) { @@ -2936,30 +2936,19 @@ static void mwifiex_pcie_get_fw_name(struct mwifiex_adapter *adapter) } break; case PCIE_DEVICE_ID_MARVELL_88W8997: - mwifiex_read_reg(adapter, 0x0c48, &revision_id); + mwifiex_read_reg(adapter, 0x8, &revision_id); mwifiex_read_reg(adapter, 0x0cd0, &version); + mwifiex_read_reg(adapter, 0x0cd4, &magic); + revision_id &= 0xff; version &= 0x7; - switch (revision_id) { - case PCIE8997_V2: - if (version == CHIP_VER_PCIEUART) - strcpy(adapter->fw_name, - PCIEUART8997_FW_NAME_V2); - else - strcpy(adapter->fw_name, - PCIEUSB8997_FW_NAME_V2); - break; - case PCIE8997_Z: - if (version == CHIP_VER_PCIEUART) - strcpy(adapter->fw_name, - PCIEUART8997_FW_NAME_Z); - else - strcpy(adapter->fw_name, - PCIEUSB8997_FW_NAME_Z); - break; - default: - strcpy(adapter->fw_name, PCIE8997_DEFAULT_FW_NAME); - break; - } + magic &= 0xff; + if (revision_id == PCIE8997_A1 && + magic == CHIP_MAGIC_VALUE && + version == CHIP_VER_PCIEUART) + strcpy(adapter->fw_name, PCIEUART8997_FW_NAME_V4); + else + strcpy(adapter->fw_name, PCIEUSB8997_FW_NAME_V4); + break; default: break; } diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.h b/drivers/net/wireless/marvell/mwifiex/pcie.h index f6992f0dbf17..46f99cae9399 100644 --- a/drivers/net/wireless/marvell/mwifiex/pcie.h +++ b/drivers/net/wireless/marvell/mwifiex/pcie.h @@ -32,12 +32,9 @@ #define PCIE8897_DEFAULT_FW_NAME "mrvl/pcie8897_uapsta.bin" #define PCIE8897_A0_FW_NAME "mrvl/pcie8897_uapsta_a0.bin" #define PCIE8897_B0_FW_NAME "mrvl/pcie8897_uapsta.bin" -#define PCIE8997_DEFAULT_FW_NAME "mrvl/pcieusb8997_combo_v2.bin" -#define PCIEUART8997_FW_NAME_Z "mrvl/pcieuart8997_combo.bin" -#define PCIEUART8997_FW_NAME_V2 "mrvl/pcieuart8997_combo_v2.bin" -#define PCIEUSB8997_FW_NAME_Z "mrvl/pcieusb8997_combo.bin" -#define PCIEUSB8997_FW_NAME_V2 "mrvl/pcieusb8997_combo_v2.bin" -#define PCIE8997_DEFAULT_WIFIFW_NAME "mrvl/pcie8997_wlan.bin" +#define PCIEUART8997_FW_NAME_V4 "mrvl/pcieuart8997_combo_v4.bin" +#define PCIEUSB8997_FW_NAME_V4 "mrvl/pcieusb8997_combo_v4.bin" +#define PCIE8997_DEFAULT_WIFIFW_NAME "mrvl/pcie8997_wlan_v4.bin" #define PCIE_VENDOR_ID_MARVELL (0x11ab) #define PCIE_VENDOR_ID_V2_MARVELL (0x1b4b) @@ -47,9 +44,10 @@ #define PCIE8897_A0 0x1100 #define PCIE8897_B0 0x1200 -#define PCIE8997_Z 0x0 -#define PCIE8997_V2 0x471 +#define PCIE8997_A0 0x10 +#define PCIE8997_A1 0x11 #define CHIP_VER_PCIEUART 0x3 +#define CHIP_MAGIC_VALUE 0x24 /* Constants for Buffer Descriptor (BD) rings */ #define MWIFIEX_MAX_TXRX_BD 0x20 From 3c97f5de1f282492335a6aec1f94b77f7f899b8c Mon Sep 17 00:00:00 2001 From: Rajkumar Manoharan Date: Fri, 2 Sep 2016 19:46:09 +0300 Subject: [PATCH 0841/1715] ath10k: implement NAPI support Add NAPI support for rx and tx completion. NAPI poll is scheduled from interrupt handler. The design is as below - on interrupt - schedule napi and mask interrupts - on poll - process all pipes (no actual Tx/Rx) - process Rx within budget - if quota exceeds budget reschedule napi poll by returning budget - process Tx completions and update budget if necessary - process Tx fetch indications (pull-push) - push any other pending Tx (if possible) - before resched or napi completion replenish htt rx ring buffer - if work done < budget, complete napi poll and unmask interrupts This change also get rid of two tasklets (intr_tq and txrx_compl_task). Measured peak throughput with NAPI on IPQ4019 platform in controlled environment. No noticeable reduction in throughput is seen and also observed improvements in CPU usage. Approx. 15% CPU usage got reduced in UDP uplink case. DL: AP DUT Tx UL: AP DUT Rx IPQ4019 (avg. cpu usage %) ======== TOT +NAPI =========== ============= TCP DL 644 Mbps (42%) 645 Mbps (36%) TCP UL 673 Mbps (30%) 675 Mbps (26%) UDP DL 682 Mbps (49%) 680 Mbps (49%) UDP UL 720 Mbps (28%) 717 Mbps (11%) Signed-off-by: Rajkumar Manoharan Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/ahb.c | 10 +- drivers/net/wireless/ath/ath10k/core.c | 2 + drivers/net/wireless/ath/ath10k/core.h | 8 ++ drivers/net/wireless/ath/ath10k/htt.h | 2 +- drivers/net/wireless/ath/ath10k/htt_rx.c | 154 ++++++++++++++--------- drivers/net/wireless/ath/ath10k/htt_tx.c | 2 - drivers/net/wireless/ath/ath10k/pci.c | 71 +++++++---- drivers/net/wireless/ath/ath10k/pci.h | 6 +- 8 files changed, 159 insertions(+), 96 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/ahb.c b/drivers/net/wireless/ath/ath10k/ahb.c index dede02668128..b99ad5df383d 100644 --- a/drivers/net/wireless/ath/ath10k/ahb.c +++ b/drivers/net/wireless/ath/ath10k/ahb.c @@ -462,13 +462,13 @@ static void ath10k_ahb_halt_chip(struct ath10k *ar) static irqreturn_t ath10k_ahb_interrupt_handler(int irq, void *arg) { struct ath10k *ar = arg; - struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); if (!ath10k_pci_irq_pending(ar)) return IRQ_NONE; ath10k_pci_disable_and_clear_legacy_irq(ar); - tasklet_schedule(&ar_pci->intr_tq); + ath10k_pci_irq_msi_fw_mask(ar); + napi_schedule(&ar->napi); return IRQ_HANDLED; } @@ -717,6 +717,9 @@ static void ath10k_ahb_hif_stop(struct ath10k *ar) synchronize_irq(ar_ahb->irq); ath10k_pci_flush(ar); + + napi_synchronize(&ar->napi); + napi_disable(&ar->napi); } static int ath10k_ahb_hif_power_up(struct ath10k *ar) @@ -748,6 +751,7 @@ static int ath10k_ahb_hif_power_up(struct ath10k *ar) ath10k_err(ar, "could not wake up target CPU: %d\n", ret); goto err_ce_deinit; } + napi_enable(&ar->napi); return 0; @@ -831,7 +835,7 @@ static int ath10k_ahb_probe(struct platform_device *pdev) goto err_resource_deinit; } - ath10k_pci_init_irq_tasklets(ar); + ath10k_pci_init_napi(ar); ret = ath10k_ahb_request_irq_legacy(ar); if (ret) diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index c9d163e0f374..3abf8d617d32 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -2322,6 +2322,8 @@ struct ath10k *ath10k_core_create(size_t priv_size, struct device *dev, INIT_WORK(&ar->register_work, ath10k_core_register_work); INIT_WORK(&ar->restart_work, ath10k_core_restart); + init_dummy_netdev(&ar->napi_dev); + ret = ath10k_debug_create(ar); if (ret) goto err_free_aux_wq; diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h index b367e9cfcfd4..c22391394ad1 100644 --- a/drivers/net/wireless/ath/ath10k/core.h +++ b/drivers/net/wireless/ath/ath10k/core.h @@ -65,6 +65,10 @@ #define ATH10K_KEEPALIVE_MAX_IDLE 3895 #define ATH10K_KEEPALIVE_MAX_UNRESPONSIVE 3900 +/* NAPI poll budget */ +#define ATH10K_NAPI_BUDGET 64 +#define ATH10K_NAPI_QUOTA_LIMIT 60 + struct ath10k; enum ath10k_bus { @@ -954,6 +958,10 @@ struct ath10k { struct ath10k_thermal thermal; struct ath10k_wow wow; + /* NAPI */ + struct net_device napi_dev; + struct napi_struct napi; + /* must be last */ u8 drv_priv[0] __aligned(sizeof(void *)); }; diff --git a/drivers/net/wireless/ath/ath10k/htt.h b/drivers/net/wireless/ath/ath10k/htt.h index 430a83e142aa..98c14247021b 100644 --- a/drivers/net/wireless/ath/ath10k/htt.h +++ b/drivers/net/wireless/ath/ath10k/htt.h @@ -1665,7 +1665,6 @@ struct ath10k_htt { /* This is used to group tx/rx completions separately and process them * in batches to reduce cache stalls */ - struct tasklet_struct txrx_compl_task; struct sk_buff_head rx_compl_q; struct sk_buff_head rx_in_ord_compl_q; struct sk_buff_head tx_fetch_ind_q; @@ -1798,5 +1797,6 @@ int ath10k_htt_tx(struct ath10k_htt *htt, struct sk_buff *msdu); void ath10k_htt_rx_pktlog_completion_handler(struct ath10k *ar, struct sk_buff *skb); +int ath10k_htt_txrx_compl_task(struct ath10k *ar, int budget); #endif diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c index ae6931b99da9..740b0faae927 100644 --- a/drivers/net/wireless/ath/ath10k/htt_rx.c +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c @@ -34,7 +34,6 @@ #define HTT_RX_RING_REFILL_RESCHED_MS 5 static int ath10k_htt_rx_get_csum_state(struct sk_buff *skb); -static void ath10k_htt_txrx_compl_task(unsigned long ptr); static struct sk_buff * ath10k_htt_rx_find_skb_paddr(struct ath10k *ar, u32 paddr) @@ -226,7 +225,6 @@ int ath10k_htt_rx_ring_refill(struct ath10k *ar) void ath10k_htt_rx_free(struct ath10k_htt *htt) { del_timer_sync(&htt->rx_ring.refill_retry_timer); - tasklet_kill(&htt->txrx_compl_task); skb_queue_purge(&htt->rx_compl_q); skb_queue_purge(&htt->rx_in_ord_compl_q); @@ -520,9 +518,6 @@ int ath10k_htt_rx_alloc(struct ath10k_htt *htt) skb_queue_head_init(&htt->tx_fetch_ind_q); atomic_set(&htt->num_mpdus_ready, 0); - tasklet_init(&htt->txrx_compl_task, ath10k_htt_txrx_compl_task, - (unsigned long)htt); - ath10k_dbg(ar, ATH10K_DBG_BOOT, "htt rx ring size %d fill_level %d\n", htt->rx_ring.size, htt->rx_ring.fill_level); return 0; @@ -958,7 +953,7 @@ static void ath10k_process_rx(struct ath10k *ar, trace_ath10k_rx_hdr(ar, skb->data, skb->len); trace_ath10k_rx_payload(ar, skb->data, skb->len); - ieee80211_rx(ar->hw, skb); + ieee80211_rx_napi(ar->hw, NULL, skb, &ar->napi); } static int ath10k_htt_rx_nwifi_hdrlen(struct ath10k *ar, @@ -1527,7 +1522,7 @@ static int ath10k_htt_rx_handle_amsdu(struct ath10k_htt *htt) struct ath10k *ar = htt->ar; static struct ieee80211_rx_status rx_status; struct sk_buff_head amsdu; - int ret; + int ret, num_msdus; __skb_queue_head_init(&amsdu); @@ -1549,13 +1544,14 @@ static int ath10k_htt_rx_handle_amsdu(struct ath10k_htt *htt) return ret; } + num_msdus = skb_queue_len(&amsdu); ath10k_htt_rx_h_ppdu(ar, &amsdu, &rx_status, 0xffff); ath10k_htt_rx_h_unchain(ar, &amsdu, ret > 0); ath10k_htt_rx_h_filter(ar, &amsdu, &rx_status); ath10k_htt_rx_h_mpdu(ar, &amsdu, &rx_status); ath10k_htt_rx_h_deliver(ar, &amsdu, &rx_status); - return 0; + return num_msdus; } static void ath10k_htt_rx_proc_rx_ind(struct ath10k_htt *htt, @@ -1579,15 +1575,6 @@ static void ath10k_htt_rx_proc_rx_ind(struct ath10k_htt *htt, mpdu_count += mpdu_ranges[i].mpdu_count; atomic_add(mpdu_count, &htt->num_mpdus_ready); - - tasklet_schedule(&htt->txrx_compl_task); -} - -static void ath10k_htt_rx_frag_handler(struct ath10k_htt *htt) -{ - atomic_inc(&htt->num_mpdus_ready); - - tasklet_schedule(&htt->txrx_compl_task); } static void ath10k_htt_rx_tx_compl_ind(struct ath10k *ar, @@ -1772,14 +1759,15 @@ static void ath10k_htt_rx_h_rx_offload_prot(struct ieee80211_rx_status *status, RX_FLAG_MMIC_STRIPPED; } -static void ath10k_htt_rx_h_rx_offload(struct ath10k *ar, - struct sk_buff_head *list) +static int ath10k_htt_rx_h_rx_offload(struct ath10k *ar, + struct sk_buff_head *list) { struct ath10k_htt *htt = &ar->htt; struct ieee80211_rx_status *status = &htt->rx_status; struct htt_rx_offload_msdu *rx; struct sk_buff *msdu; size_t offset; + int num_msdu = 0; while ((msdu = __skb_dequeue(list))) { /* Offloaded frames don't have Rx descriptor. Instead they have @@ -1819,10 +1807,12 @@ static void ath10k_htt_rx_h_rx_offload(struct ath10k *ar, ath10k_htt_rx_h_rx_offload_prot(status, msdu); ath10k_htt_rx_h_channel(ar, status, NULL, rx->vdev_id); ath10k_process_rx(ar, status, msdu); + num_msdu++; } + return num_msdu; } -static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb) +static int ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb) { struct ath10k_htt *htt = &ar->htt; struct htt_resp *resp = (void *)skb->data; @@ -1835,12 +1825,12 @@ static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb) u8 tid; bool offload; bool frag; - int ret; + int ret, num_msdus = 0; lockdep_assert_held(&htt->rx_ring.lock); if (htt->rx_confused) - return; + return -EIO; skb_pull(skb, sizeof(resp->hdr)); skb_pull(skb, sizeof(resp->rx_in_ord_ind)); @@ -1859,7 +1849,7 @@ static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb) if (skb->len < msdu_count * sizeof(*resp->rx_in_ord_ind.msdu_descs)) { ath10k_warn(ar, "dropping invalid in order rx indication\n"); - return; + return -EINVAL; } /* The event can deliver more than 1 A-MSDU. Each A-MSDU is later @@ -1870,14 +1860,14 @@ static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb) if (ret < 0) { ath10k_warn(ar, "failed to pop paddr list: %d\n", ret); htt->rx_confused = true; - return; + return -EIO; } /* Offloaded frames are very different and need to be handled * separately. */ if (offload) - ath10k_htt_rx_h_rx_offload(ar, &list); + num_msdus = ath10k_htt_rx_h_rx_offload(ar, &list); while (!skb_queue_empty(&list)) { __skb_queue_head_init(&amsdu); @@ -1890,6 +1880,7 @@ static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb) * better to report something than nothing though. This * should still give an idea about rx rate to the user. */ + num_msdus += skb_queue_len(&amsdu); ath10k_htt_rx_h_ppdu(ar, &amsdu, status, vdev_id); ath10k_htt_rx_h_filter(ar, &amsdu, status); ath10k_htt_rx_h_mpdu(ar, &amsdu, status); @@ -1902,9 +1893,10 @@ static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb) ath10k_warn(ar, "failed to extract amsdu: %d\n", ret); htt->rx_confused = true; __skb_queue_purge(&list); - return; + return -EIO; } } + return num_msdus; } static void ath10k_htt_rx_tx_fetch_resp_id_confirm(struct ath10k *ar, @@ -2267,7 +2259,6 @@ bool ath10k_htt_t2h_msg_handler(struct ath10k *ar, struct sk_buff *skb) } case HTT_T2H_MSG_TYPE_TX_COMPL_IND: ath10k_htt_rx_tx_compl_ind(htt->ar, skb); - tasklet_schedule(&htt->txrx_compl_task); break; case HTT_T2H_MSG_TYPE_SEC_IND: { struct ath10k *ar = htt->ar; @@ -2284,7 +2275,7 @@ bool ath10k_htt_t2h_msg_handler(struct ath10k *ar, struct sk_buff *skb) case HTT_T2H_MSG_TYPE_RX_FRAG_IND: { ath10k_dbg_dump(ar, ATH10K_DBG_HTT_DUMP, NULL, "htt event: ", skb->data, skb->len); - ath10k_htt_rx_frag_handler(htt); + atomic_inc(&htt->num_mpdus_ready); break; } case HTT_T2H_MSG_TYPE_TEST: @@ -2320,8 +2311,7 @@ bool ath10k_htt_t2h_msg_handler(struct ath10k *ar, struct sk_buff *skb) break; } case HTT_T2H_MSG_TYPE_RX_IN_ORD_PADDR_IND: { - skb_queue_tail(&htt->rx_in_ord_compl_q, skb); - tasklet_schedule(&htt->txrx_compl_task); + __skb_queue_tail(&htt->rx_in_ord_compl_q, skb); return false; } case HTT_T2H_MSG_TYPE_TX_CREDIT_UPDATE_IND: @@ -2347,7 +2337,6 @@ bool ath10k_htt_t2h_msg_handler(struct ath10k *ar, struct sk_buff *skb) break; } skb_queue_tail(&htt->tx_fetch_ind_q, tx_fetch_ind); - tasklet_schedule(&htt->txrx_compl_task); break; } case HTT_T2H_MSG_TYPE_TX_FETCH_CONFIRM: @@ -2376,27 +2365,77 @@ void ath10k_htt_rx_pktlog_completion_handler(struct ath10k *ar, } EXPORT_SYMBOL(ath10k_htt_rx_pktlog_completion_handler); -static void ath10k_htt_txrx_compl_task(unsigned long ptr) +int ath10k_htt_txrx_compl_task(struct ath10k *ar, int budget) { - struct ath10k_htt *htt = (struct ath10k_htt *)ptr; - struct ath10k *ar = htt->ar; + struct ath10k_htt *htt = &ar->htt; struct htt_tx_done tx_done = {}; - struct sk_buff_head rx_ind_q; struct sk_buff_head tx_ind_q; struct sk_buff *skb; unsigned long flags; - int num_mpdus; + int quota = 0, done, num_rx_msdus; + bool resched_napi = false; - __skb_queue_head_init(&rx_ind_q); __skb_queue_head_init(&tx_ind_q); - spin_lock_irqsave(&htt->rx_in_ord_compl_q.lock, flags); - skb_queue_splice_init(&htt->rx_in_ord_compl_q, &rx_ind_q); - spin_unlock_irqrestore(&htt->rx_in_ord_compl_q.lock, flags); + /* Since in-ord-ind can deliver more than 1 A-MSDU in single event, + * process it first to utilize full available quota. + */ + while (quota < budget) { + if (skb_queue_empty(&htt->rx_in_ord_compl_q)) + break; - spin_lock_irqsave(&htt->tx_fetch_ind_q.lock, flags); - skb_queue_splice_init(&htt->tx_fetch_ind_q, &tx_ind_q); - spin_unlock_irqrestore(&htt->tx_fetch_ind_q.lock, flags); + skb = __skb_dequeue(&htt->rx_in_ord_compl_q); + if (!skb) { + resched_napi = true; + goto exit; + } + + spin_lock_bh(&htt->rx_ring.lock); + num_rx_msdus = ath10k_htt_rx_in_ord_ind(ar, skb); + spin_unlock_bh(&htt->rx_ring.lock); + if (num_rx_msdus < 0) { + resched_napi = true; + goto exit; + } + + dev_kfree_skb_any(skb); + if (num_rx_msdus > 0) + quota += num_rx_msdus; + + if ((quota > ATH10K_NAPI_QUOTA_LIMIT) && + !skb_queue_empty(&htt->rx_in_ord_compl_q)) { + resched_napi = true; + goto exit; + } + } + + while (quota < budget) { + /* no more data to receive */ + if (!atomic_read(&htt->num_mpdus_ready)) + break; + + num_rx_msdus = ath10k_htt_rx_handle_amsdu(htt); + if (num_rx_msdus < 0) { + resched_napi = true; + goto exit; + } + + quota += num_rx_msdus; + atomic_dec(&htt->num_mpdus_ready); + if ((quota > ATH10K_NAPI_QUOTA_LIMIT) && + atomic_read(&htt->num_mpdus_ready)) { + resched_napi = true; + goto exit; + } + } + + /* From NAPI documentation: + * The napi poll() function may also process TX completions, in which + * case if it processes the entire TX ring then it should count that + * work as the rest of the budget. + */ + if ((quota < budget) && !kfifo_is_empty(&htt->txdone_fifo)) + quota = budget; /* kfifo_get: called only within txrx_tasklet so it's neatly serialized. * From kfifo_get() documentation: @@ -2406,27 +2445,22 @@ static void ath10k_htt_txrx_compl_task(unsigned long ptr) while (kfifo_get(&htt->txdone_fifo, &tx_done)) ath10k_txrx_tx_unref(htt, &tx_done); + spin_lock_irqsave(&htt->tx_fetch_ind_q.lock, flags); + skb_queue_splice_init(&htt->tx_fetch_ind_q, &tx_ind_q); + spin_unlock_irqrestore(&htt->tx_fetch_ind_q.lock, flags); + while ((skb = __skb_dequeue(&tx_ind_q))) { ath10k_htt_rx_tx_fetch_ind(ar, skb); dev_kfree_skb_any(skb); } - num_mpdus = atomic_read(&htt->num_mpdus_ready); - - while (num_mpdus) { - if (ath10k_htt_rx_handle_amsdu(htt)) - break; - - num_mpdus--; - atomic_dec(&htt->num_mpdus_ready); - } - - while ((skb = __skb_dequeue(&rx_ind_q))) { - spin_lock_bh(&htt->rx_ring.lock); - ath10k_htt_rx_in_ord_ind(ar, skb); - spin_unlock_bh(&htt->rx_ring.lock); - dev_kfree_skb_any(skb); - } - +exit: ath10k_htt_rx_msdu_buff_replenish(htt); + /* In case of rx failure or more data to read, report budget + * to reschedule NAPI poll + */ + done = resched_napi ? budget : quota; + + return done; } +EXPORT_SYMBOL(ath10k_htt_txrx_compl_task); diff --git a/drivers/net/wireless/ath/ath10k/htt_tx.c b/drivers/net/wireless/ath/ath10k/htt_tx.c index 7c072b605bc7..ae5b33fe5ba8 100644 --- a/drivers/net/wireless/ath/ath10k/htt_tx.c +++ b/drivers/net/wireless/ath/ath10k/htt_tx.c @@ -390,8 +390,6 @@ void ath10k_htt_tx_free(struct ath10k_htt *htt) { int size; - tasklet_kill(&htt->txrx_compl_task); - idr_for_each(&htt->pending_tx, ath10k_htt_tx_clean_up_pending, htt->ar); idr_destroy(&htt->pending_tx); diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c index 1b841adfb574..930e9511a917 100644 --- a/drivers/net/wireless/ath/ath10k/pci.c +++ b/drivers/net/wireless/ath/ath10k/pci.c @@ -1506,12 +1506,10 @@ void ath10k_pci_hif_send_complete_check(struct ath10k *ar, u8 pipe, ath10k_ce_per_engine_service(ar, pipe); } -void ath10k_pci_kill_tasklet(struct ath10k *ar) +static void ath10k_pci_rx_retry_sync(struct ath10k *ar) { struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); - tasklet_kill(&ar_pci->intr_tq); - del_timer_sync(&ar_pci->rx_post_retry); } @@ -1570,7 +1568,7 @@ void ath10k_pci_hif_get_default_pipe(struct ath10k *ar, ul_pipe, dl_pipe); } -static void ath10k_pci_irq_msi_fw_mask(struct ath10k *ar) +void ath10k_pci_irq_msi_fw_mask(struct ath10k *ar) { u32 val; @@ -1753,7 +1751,7 @@ void ath10k_pci_ce_deinit(struct ath10k *ar) void ath10k_pci_flush(struct ath10k *ar) { - ath10k_pci_kill_tasklet(ar); + ath10k_pci_rx_retry_sync(ar); ath10k_pci_buffer_cleanup(ar); } @@ -1780,6 +1778,8 @@ static void ath10k_pci_hif_stop(struct ath10k *ar) ath10k_pci_irq_disable(ar); ath10k_pci_irq_sync(ar); ath10k_pci_flush(ar); + napi_synchronize(&ar->napi); + napi_disable(&ar->napi); spin_lock_irqsave(&ar_pci->ps_lock, flags); WARN_ON(ar_pci->ps_wake_refcount > 0); @@ -2533,6 +2533,7 @@ static int ath10k_pci_hif_power_up(struct ath10k *ar) ath10k_err(ar, "could not wake up target CPU: %d\n", ret); goto err_ce; } + napi_enable(&ar->napi); return 0; @@ -2772,35 +2773,53 @@ static irqreturn_t ath10k_pci_interrupt_handler(int irq, void *arg) return IRQ_NONE; } - if (ar_pci->oper_irq_mode == ATH10K_PCI_IRQ_LEGACY) { - if (!ath10k_pci_irq_pending(ar)) - return IRQ_NONE; + if ((ar_pci->oper_irq_mode == ATH10K_PCI_IRQ_LEGACY) && + !ath10k_pci_irq_pending(ar)) + return IRQ_NONE; - ath10k_pci_disable_and_clear_legacy_irq(ar); - } - - tasklet_schedule(&ar_pci->intr_tq); + ath10k_pci_disable_and_clear_legacy_irq(ar); + ath10k_pci_irq_msi_fw_mask(ar); + napi_schedule(&ar->napi); return IRQ_HANDLED; } -static void ath10k_pci_tasklet(unsigned long data) +static int ath10k_pci_napi_poll(struct napi_struct *ctx, int budget) { - struct ath10k *ar = (struct ath10k *)data; - struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); + struct ath10k *ar = container_of(ctx, struct ath10k, napi); + int done = 0; if (ath10k_pci_has_fw_crashed(ar)) { - ath10k_pci_irq_disable(ar); ath10k_pci_fw_crashed_clear(ar); ath10k_pci_fw_crashed_dump(ar); - return; + napi_complete(ctx); + return done; } ath10k_ce_per_engine_service_any(ar); - /* Re-enable legacy irq that was disabled in the irq handler */ - if (ar_pci->oper_irq_mode == ATH10K_PCI_IRQ_LEGACY) + done = ath10k_htt_txrx_compl_task(ar, budget); + + if (done < budget) { + napi_complete(ctx); + /* In case of MSI, it is possible that interrupts are received + * while NAPI poll is inprogress. So pending interrupts that are + * received after processing all copy engine pipes by NAPI poll + * will not be handled again. This is causing failure to + * complete boot sequence in x86 platform. So before enabling + * interrupts safer to check for pending interrupts for + * immediate servicing. + */ + if (CE_INTERRUPT_SUMMARY(ar)) { + napi_reschedule(ctx); + goto out; + } ath10k_pci_enable_legacy_irq(ar); + ath10k_pci_irq_msi_fw_unmask(ar); + } + +out: + return done; } static int ath10k_pci_request_irq_msi(struct ath10k *ar) @@ -2858,11 +2877,10 @@ static void ath10k_pci_free_irq(struct ath10k *ar) free_irq(ar_pci->pdev->irq, ar); } -void ath10k_pci_init_irq_tasklets(struct ath10k *ar) +void ath10k_pci_init_napi(struct ath10k *ar) { - struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); - - tasklet_init(&ar_pci->intr_tq, ath10k_pci_tasklet, (unsigned long)ar); + netif_napi_add(&ar->napi_dev, &ar->napi, ath10k_pci_napi_poll, + ATH10K_NAPI_BUDGET); } static int ath10k_pci_init_irq(struct ath10k *ar) @@ -2870,7 +2888,7 @@ static int ath10k_pci_init_irq(struct ath10k *ar) struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); int ret; - ath10k_pci_init_irq_tasklets(ar); + ath10k_pci_init_napi(ar); if (ath10k_pci_irq_mode != ATH10K_PCI_IRQ_AUTO) ath10k_info(ar, "limiting irq mode to: %d\n", @@ -3131,7 +3149,8 @@ int ath10k_pci_setup_resource(struct ath10k *ar) void ath10k_pci_release_resource(struct ath10k *ar) { - ath10k_pci_kill_tasklet(ar); + ath10k_pci_rx_retry_sync(ar); + netif_napi_del(&ar->napi); ath10k_pci_ce_deinit(ar); ath10k_pci_free_pipes(ar); } @@ -3298,7 +3317,7 @@ static int ath10k_pci_probe(struct pci_dev *pdev, err_free_irq: ath10k_pci_free_irq(ar); - ath10k_pci_kill_tasklet(ar); + ath10k_pci_rx_retry_sync(ar); err_deinit_irq: ath10k_pci_deinit_irq(ar); diff --git a/drivers/net/wireless/ath/ath10k/pci.h b/drivers/net/wireless/ath/ath10k/pci.h index 6eca1df2ce60..9854ad56b2de 100644 --- a/drivers/net/wireless/ath/ath10k/pci.h +++ b/drivers/net/wireless/ath/ath10k/pci.h @@ -177,8 +177,6 @@ struct ath10k_pci { /* Operating interrupt mode */ enum ath10k_pci_irq_mode oper_irq_mode; - struct tasklet_struct intr_tq; - struct ath10k_pci_pipe pipe_info[CE_COUNT_MAX]; /* Copy Engine used for Diagnostic Accesses */ @@ -294,8 +292,7 @@ void ath10k_pci_free_pipes(struct ath10k *ar); void ath10k_pci_free_pipes(struct ath10k *ar); void ath10k_pci_rx_replenish_retry(unsigned long ptr); void ath10k_pci_ce_deinit(struct ath10k *ar); -void ath10k_pci_init_irq_tasklets(struct ath10k *ar); -void ath10k_pci_kill_tasklet(struct ath10k *ar); +void ath10k_pci_init_napi(struct ath10k *ar); int ath10k_pci_init_pipes(struct ath10k *ar); int ath10k_pci_init_config(struct ath10k *ar); void ath10k_pci_rx_post(struct ath10k *ar); @@ -303,6 +300,7 @@ void ath10k_pci_flush(struct ath10k *ar); void ath10k_pci_enable_legacy_irq(struct ath10k *ar); bool ath10k_pci_irq_pending(struct ath10k *ar); void ath10k_pci_disable_and_clear_legacy_irq(struct ath10k *ar); +void ath10k_pci_irq_msi_fw_mask(struct ath10k *ar); int ath10k_pci_wait_for_target_init(struct ath10k *ar); int ath10k_pci_setup_resource(struct ath10k *ar); void ath10k_pci_release_resource(struct ath10k *ar); From 18f53fe0f30331e826b075709ed7b26b9283235e Mon Sep 17 00:00:00 2001 From: Rajkumar Manoharan Date: Fri, 2 Sep 2016 19:46:10 +0300 Subject: [PATCH 0842/1715] ath10k: fix throughput regression in multi client mode commit 7a0adc83f34d ("ath10k: improve tx scheduling") is causing severe throughput drop in multi client mode. This issue is originally reported in veriwave setup with 50 clients with TCP downlink traffic. While increasing number of clients, the average throughput drops gradually. With 50 clients, the combined peak throughput is decreased to 98 Mbps whereas reverting given commit restored it to 550 Mbps. Processing txqs for every tx completion is causing overhead. Ideally for management frame tx completion, pending txqs processing can be avoided. The change partly reverts the commit "ath10k: improve tx scheduling". Processing pending txqs after all skbs tx completion will yeild enough room to burst tx frames. Fixes: 7a0adc83f34d ("ath10k: improve tx scheduling") Signed-off-by: Rajkumar Manoharan Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/htt_rx.c | 2 ++ drivers/net/wireless/ath/ath10k/txrx.c | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c index 740b0faae927..2067ac24d71e 100644 --- a/drivers/net/wireless/ath/ath10k/htt_rx.c +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c @@ -2445,6 +2445,8 @@ int ath10k_htt_txrx_compl_task(struct ath10k *ar, int budget) while (kfifo_get(&htt->txdone_fifo, &tx_done)) ath10k_txrx_tx_unref(htt, &tx_done); + ath10k_mac_tx_push_pending(ar); + spin_lock_irqsave(&htt->tx_fetch_ind_q.lock, flags); skb_queue_splice_init(&htt->tx_fetch_ind_q, &tx_ind_q); spin_unlock_irqrestore(&htt->tx_fetch_ind_q.lock, flags); diff --git a/drivers/net/wireless/ath/ath10k/txrx.c b/drivers/net/wireless/ath/ath10k/txrx.c index 1e695d1b4692..9852c5d51139 100644 --- a/drivers/net/wireless/ath/ath10k/txrx.c +++ b/drivers/net/wireless/ath/ath10k/txrx.c @@ -119,8 +119,6 @@ int ath10k_txrx_tx_unref(struct ath10k_htt *htt, ieee80211_tx_status(htt->ar->hw, msdu); /* we do not own the msdu anymore */ - ath10k_mac_tx_push_pending(ar); - return 0; } From d94a461d7a7df68991fb9663531173f60ef89c68 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 2 Sep 2016 19:46:12 +0300 Subject: [PATCH 0843/1715] ath9k: use ieee80211_tx_status_noskb where possible It removes the need for undoing the padding changes to skb->data and it improves performance by eliminating one tx status lookup per MPDU in the status path. It is also useful for preparing a follow-up fix to better handle powersave filtering. A side effect is that these counters, available via debugfs, become now invalid: * dot11TransmittedFragmentCount * dot11FrameDuplicateCount, * dot11ReceivedFragmentCount * dot11MulticastReceivedFrameCount Signed-off-by: Felix Fietkau [kvalo@qca.qualcomm.com: add a note about counters, thanks to Zefir Kurtisi] Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath9k/xmit.c | 94 ++++++++++++++++++--------- 1 file changed, 62 insertions(+), 32 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c index 8ddd604bd00c..b99ffa035c77 100644 --- a/drivers/net/wireless/ath/ath9k/xmit.c +++ b/drivers/net/wireless/ath/ath9k/xmit.c @@ -50,9 +50,11 @@ static u16 bits_per_symbol[][2] = { static void ath_tx_send_normal(struct ath_softc *sc, struct ath_txq *txq, struct ath_atx_tid *tid, struct sk_buff *skb); static void ath_tx_complete(struct ath_softc *sc, struct sk_buff *skb, - int tx_flags, struct ath_txq *txq); + int tx_flags, struct ath_txq *txq, + struct ieee80211_sta *sta); static void ath_tx_complete_buf(struct ath_softc *sc, struct ath_buf *bf, struct ath_txq *txq, struct list_head *bf_q, + struct ieee80211_sta *sta, struct ath_tx_status *ts, int txok); static void ath_tx_txqaddbuf(struct ath_softc *sc, struct ath_txq *txq, struct list_head *head, bool internal); @@ -77,6 +79,22 @@ enum { /* Aggregation logic */ /*********************/ +static void ath_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) +{ + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_sta *sta = info->status.status_driver_data[0]; + + if (info->flags & IEEE80211_TX_CTL_REQ_TX_STATUS) { + ieee80211_tx_status(hw, skb); + return; + } + + if (sta) + ieee80211_tx_status_noskb(hw, sta, info); + + dev_kfree_skb(skb); +} + void ath_txq_lock(struct ath_softc *sc, struct ath_txq *txq) __acquires(&txq->axq_lock) { @@ -92,6 +110,7 @@ void ath_txq_unlock(struct ath_softc *sc, struct ath_txq *txq) void ath_txq_unlock_complete(struct ath_softc *sc, struct ath_txq *txq) __releases(&txq->axq_lock) { + struct ieee80211_hw *hw = sc->hw; struct sk_buff_head q; struct sk_buff *skb; @@ -100,7 +119,7 @@ void ath_txq_unlock_complete(struct ath_softc *sc, struct ath_txq *txq) spin_unlock_bh(&txq->axq_lock); while ((skb = __skb_dequeue(&q))) - ieee80211_tx_status(sc->hw, skb); + ath_tx_status(hw, skb); } static void ath_tx_queue_tid(struct ath_softc *sc, struct ath_txq *txq, @@ -253,7 +272,7 @@ static void ath_tx_flush_tid(struct ath_softc *sc, struct ath_atx_tid *tid) } list_add_tail(&bf->list, &bf_head); - ath_tx_complete_buf(sc, bf, txq, &bf_head, &ts, 0); + ath_tx_complete_buf(sc, bf, txq, &bf_head, NULL, &ts, 0); } if (sendbar) { @@ -318,12 +337,12 @@ static void ath_tid_drain(struct ath_softc *sc, struct ath_txq *txq, bf = fi->bf; if (!bf) { - ath_tx_complete(sc, skb, ATH_TX_ERROR, txq); + ath_tx_complete(sc, skb, ATH_TX_ERROR, txq, NULL); continue; } list_add_tail(&bf->list, &bf_head); - ath_tx_complete_buf(sc, bf, txq, &bf_head, &ts, 0); + ath_tx_complete_buf(sc, bf, txq, &bf_head, NULL, &ts, 0); } } @@ -426,12 +445,11 @@ static void ath_tx_count_frames(struct ath_softc *sc, struct ath_buf *bf, static void ath_tx_complete_aggr(struct ath_softc *sc, struct ath_txq *txq, struct ath_buf *bf, struct list_head *bf_q, + struct ieee80211_sta *sta, struct ath_tx_status *ts, int txok) { struct ath_node *an = NULL; struct sk_buff *skb; - struct ieee80211_sta *sta; - struct ieee80211_hw *hw = sc->hw; struct ieee80211_hdr *hdr; struct ieee80211_tx_info *tx_info; struct ath_atx_tid *tid = NULL; @@ -460,12 +478,7 @@ static void ath_tx_complete_aggr(struct ath_softc *sc, struct ath_txq *txq, for (i = 0; i < ts->ts_rateindex; i++) retries += rates[i].count; - rcu_read_lock(); - - sta = ieee80211_find_sta_by_ifaddr(hw, hdr->addr1, hdr->addr2); if (!sta) { - rcu_read_unlock(); - INIT_LIST_HEAD(&bf_head); while (bf) { bf_next = bf->bf_next; @@ -473,7 +486,7 @@ static void ath_tx_complete_aggr(struct ath_softc *sc, struct ath_txq *txq, if (!bf->bf_state.stale || bf_next != NULL) list_move_tail(&bf->list, &bf_head); - ath_tx_complete_buf(sc, bf, txq, &bf_head, ts, 0); + ath_tx_complete_buf(sc, bf, txq, &bf_head, NULL, ts, 0); bf = bf_next; } @@ -583,7 +596,7 @@ static void ath_tx_complete_aggr(struct ath_softc *sc, struct ath_txq *txq, ts); } - ath_tx_complete_buf(sc, bf, txq, &bf_head, ts, + ath_tx_complete_buf(sc, bf, txq, &bf_head, sta, ts, !txfail); } else { if (tx_info->flags & IEEE80211_TX_STATUS_EOSP) { @@ -604,7 +617,8 @@ static void ath_tx_complete_aggr(struct ath_softc *sc, struct ath_txq *txq, ath_tx_update_baw(sc, tid, seqno); ath_tx_complete_buf(sc, bf, txq, - &bf_head, ts, 0); + &bf_head, NULL, ts, + 0); bar_index = max_t(int, bar_index, ATH_BA_INDEX(seq_first, seqno)); break; @@ -648,8 +662,6 @@ static void ath_tx_complete_aggr(struct ath_softc *sc, struct ath_txq *txq, ath_txq_lock(sc, txq); } - rcu_read_unlock(); - if (needreset) ath9k_queue_reset(sc, RESET_TYPE_TX_ERROR); } @@ -664,7 +676,10 @@ static void ath_tx_process_buffer(struct ath_softc *sc, struct ath_txq *txq, struct ath_tx_status *ts, struct ath_buf *bf, struct list_head *bf_head) { + struct ieee80211_hw *hw = sc->hw; struct ieee80211_tx_info *info; + struct ieee80211_sta *sta; + struct ieee80211_hdr *hdr; bool txok, flush; txok = !(ts->ts_status & ATH9K_TXERR_MASK); @@ -677,6 +692,10 @@ static void ath_tx_process_buffer(struct ath_softc *sc, struct ath_txq *txq, ts->duration = ath9k_hw_get_duration(sc->sc_ah, bf->bf_desc, ts->ts_rateindex); + + hdr = (struct ieee80211_hdr *) bf->bf_mpdu->data; + sta = ieee80211_find_sta_by_ifaddr(hw, hdr->addr1, hdr->addr2); + if (!bf_isampdu(bf)) { if (!flush) { info = IEEE80211_SKB_CB(bf->bf_mpdu); @@ -685,9 +704,9 @@ static void ath_tx_process_buffer(struct ath_softc *sc, struct ath_txq *txq, ath_tx_rc_status(sc, bf, ts, 1, txok ? 0 : 1, txok); ath_dynack_sample_tx_ts(sc->sc_ah, bf->bf_mpdu, ts); } - ath_tx_complete_buf(sc, bf, txq, bf_head, ts, txok); + ath_tx_complete_buf(sc, bf, txq, bf_head, sta, ts, txok); } else - ath_tx_complete_aggr(sc, txq, bf, bf_head, ts, txok); + ath_tx_complete_aggr(sc, txq, bf, bf_head, sta, ts, txok); if (!flush) ath_txq_schedule(sc, txq); @@ -923,7 +942,7 @@ ath_tx_get_tid_subframe(struct ath_softc *sc, struct ath_txq *txq, list_add(&bf->list, &bf_head); __skb_unlink(skb, *q); ath_tx_update_baw(sc, tid, seqno); - ath_tx_complete_buf(sc, bf, txq, &bf_head, &ts, 0); + ath_tx_complete_buf(sc, bf, txq, &bf_head, NULL, &ts, 0); continue; } @@ -1832,6 +1851,7 @@ static void ath_drain_txq_list(struct ath_softc *sc, struct ath_txq *txq, */ void ath_draintxq(struct ath_softc *sc, struct ath_txq *txq) { + rcu_read_lock(); ath_txq_lock(sc, txq); if (sc->sc_ah->caps.hw_caps & ATH9K_HW_CAP_EDMA) { @@ -1850,6 +1870,7 @@ void ath_draintxq(struct ath_softc *sc, struct ath_txq *txq) ath_drain_txq_list(sc, txq, &txq->axq_q); ath_txq_unlock_complete(sc, txq); + rcu_read_unlock(); } bool ath_drain_all_txq(struct ath_softc *sc) @@ -2472,7 +2493,8 @@ void ath_tx_cabq(struct ieee80211_hw *hw, struct ieee80211_vif *vif, /*****************/ static void ath_tx_complete(struct ath_softc *sc, struct sk_buff *skb, - int tx_flags, struct ath_txq *txq) + int tx_flags, struct ath_txq *txq, + struct ieee80211_sta *sta) { struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb); struct ath_common *common = ath9k_hw_common(sc->sc_ah); @@ -2492,15 +2514,17 @@ static void ath_tx_complete(struct ath_softc *sc, struct sk_buff *skb, tx_info->flags |= IEEE80211_TX_STAT_ACK; } - padpos = ieee80211_hdrlen(hdr->frame_control); - padsize = padpos & 3; - if (padsize && skb->len>padpos+padsize) { - /* - * Remove MAC header padding before giving the frame back to - * mac80211. - */ - memmove(skb->data + padsize, skb->data, padpos); - skb_pull(skb, padsize); + if (tx_info->flags & IEEE80211_TX_CTL_REQ_TX_STATUS) { + padpos = ieee80211_hdrlen(hdr->frame_control); + padsize = padpos & 3; + if (padsize && skb->len>padpos+padsize) { + /* + * Remove MAC header padding before giving the frame back to + * mac80211. + */ + memmove(skb->data + padsize, skb->data, padpos); + skb_pull(skb, padsize); + } } spin_lock_irqsave(&sc->sc_pm_lock, flags); @@ -2515,12 +2539,14 @@ static void ath_tx_complete(struct ath_softc *sc, struct sk_buff *skb, } spin_unlock_irqrestore(&sc->sc_pm_lock, flags); - __skb_queue_tail(&txq->complete_q, skb); ath_txq_skb_done(sc, txq, skb); + tx_info->status.status_driver_data[0] = sta; + __skb_queue_tail(&txq->complete_q, skb); } static void ath_tx_complete_buf(struct ath_softc *sc, struct ath_buf *bf, struct ath_txq *txq, struct list_head *bf_q, + struct ieee80211_sta *sta, struct ath_tx_status *ts, int txok) { struct sk_buff *skb = bf->bf_mpdu; @@ -2548,7 +2574,7 @@ static void ath_tx_complete_buf(struct ath_softc *sc, struct ath_buf *bf, complete(&sc->paprd_complete); } else { ath_debug_stat_tx(sc, bf, ts, txq, tx_flags); - ath_tx_complete(sc, skb, tx_flags, txq); + ath_tx_complete(sc, skb, tx_flags, txq, sta); } skip_tx_complete: /* At this point, skb (bf->bf_mpdu) is consumed...make sure we don't @@ -2700,10 +2726,12 @@ void ath_tx_tasklet(struct ath_softc *sc) u32 qcumask = ((1 << ATH9K_NUM_TX_QUEUES) - 1) & ah->intr_txqs; int i; + rcu_read_lock(); for (i = 0; i < ATH9K_NUM_TX_QUEUES; i++) { if (ATH_TXQ_SETUP(sc, i) && (qcumask & (1 << i))) ath_tx_processq(sc, &sc->tx.txq[i]); } + rcu_read_unlock(); } void ath_tx_edma_tasklet(struct ath_softc *sc) @@ -2717,6 +2745,7 @@ void ath_tx_edma_tasklet(struct ath_softc *sc) struct list_head *fifo_list; int status; + rcu_read_lock(); for (;;) { if (test_bit(ATH_OP_HW_RESET, &common->op_flags)) break; @@ -2787,6 +2816,7 @@ void ath_tx_edma_tasklet(struct ath_softc *sc) ath_tx_process_buffer(sc, txq, &ts, bf, &bf_head); ath_txq_unlock_complete(sc, txq); } + rcu_read_unlock(); } /*****************/ From 315c457ff123d5e36eb5fa41ed41512609f64bee Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 2 Sep 2016 19:46:13 +0300 Subject: [PATCH 0844/1715] ath9k: improve powersave filter handling For non-aggregated frames, ath9k was leaving handling of powersave filtered packets to mac80211. This can be too slow if the intermediate queue is already filled with packets and mac80211 does not immediately send a new packet via drv_tx(). Improve response time with filtered frames by triggering clearing the powersave filter internally. Signed-off-by: Felix Fietkau Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath9k/xmit.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c index b99ffa035c77..52bfbb988611 100644 --- a/drivers/net/wireless/ath/ath9k/xmit.c +++ b/drivers/net/wireless/ath/ath9k/xmit.c @@ -446,13 +446,13 @@ static void ath_tx_count_frames(struct ath_softc *sc, struct ath_buf *bf, static void ath_tx_complete_aggr(struct ath_softc *sc, struct ath_txq *txq, struct ath_buf *bf, struct list_head *bf_q, struct ieee80211_sta *sta, + struct ath_atx_tid *tid, struct ath_tx_status *ts, int txok) { struct ath_node *an = NULL; struct sk_buff *skb; struct ieee80211_hdr *hdr; struct ieee80211_tx_info *tx_info; - struct ath_atx_tid *tid = NULL; struct ath_buf *bf_next, *bf_last = bf->bf_lastbf; struct list_head bf_head; struct sk_buff_head bf_pending; @@ -494,7 +494,6 @@ static void ath_tx_complete_aggr(struct ath_softc *sc, struct ath_txq *txq, } an = (struct ath_node *)sta->drv_priv; - tid = ath_get_skb_tid(sc, an, skb); seq_first = tid->seq_start; isba = ts->ts_flags & ATH9K_TX_BA; @@ -680,6 +679,7 @@ static void ath_tx_process_buffer(struct ath_softc *sc, struct ath_txq *txq, struct ieee80211_tx_info *info; struct ieee80211_sta *sta; struct ieee80211_hdr *hdr; + struct ath_atx_tid *tid = NULL; bool txok, flush; txok = !(ts->ts_status & ATH9K_TXERR_MASK); @@ -695,6 +695,12 @@ static void ath_tx_process_buffer(struct ath_softc *sc, struct ath_txq *txq, hdr = (struct ieee80211_hdr *) bf->bf_mpdu->data; sta = ieee80211_find_sta_by_ifaddr(hw, hdr->addr1, hdr->addr2); + if (sta) { + struct ath_node *an = (struct ath_node *)sta->drv_priv; + tid = ath_get_skb_tid(sc, an, bf->bf_mpdu); + if (ts->ts_status & (ATH9K_TXERR_FILT | ATH9K_TXERR_XRETRY)) + tid->clear_ps_filter = true; + } if (!bf_isampdu(bf)) { if (!flush) { @@ -706,7 +712,7 @@ static void ath_tx_process_buffer(struct ath_softc *sc, struct ath_txq *txq, } ath_tx_complete_buf(sc, bf, txq, bf_head, sta, ts, txok); } else - ath_tx_complete_aggr(sc, txq, bf, bf_head, sta, ts, txok); + ath_tx_complete_aggr(sc, txq, bf, bf_head, sta, tid, ts, txok); if (!flush) ath_txq_schedule(sc, txq); From fe79f6314a717cf031a6c04f180910583633c37e Mon Sep 17 00:00:00 2001 From: Mohammed Shafi Shajakhan Date: Thu, 1 Sep 2016 21:43:36 +0530 Subject: [PATCH 0845/1715] ath10k: remove unnecessary error code assignment The error assigned does not seems to be used anywhere, fixes nothing just a small cleanup Signed-off-by: Mohammed Shafi Shajakhan Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/wmi.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index eb4ab6fa688f..38993d72f5e6 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -3563,7 +3563,6 @@ void ath10k_wmi_event_host_swba(struct ath10k *ar, struct sk_buff *skb) ath10k_warn(ar, "failed to map beacon: %d\n", ret); dev_kfree_skb_any(bcn); - ret = -EIO; goto skip; } From 77eb3d693182b4eaa88c6ba406fbb92b1f1bd636 Mon Sep 17 00:00:00 2001 From: Ashok Raj Nagarajan Date: Fri, 2 Sep 2016 10:59:53 +0530 Subject: [PATCH 0846/1715] ath10k: fix reporting channel survey data When user requests for survey dump data, driver is providing wrong survey information. This information we sent is the survey data that we have collected during previous user request. This issue occurs because we request survey dump for wrong channel. With this change, we correctly display the correct and current survey information to userspace. Fixes: fa7937e3d5c2 ("ath10k: update bss channel survey information") Signed-off-by: Ashok Raj Nagarajan Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/mac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 4565321ad762..c4d965fe990e 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -6576,7 +6576,7 @@ static int ath10k_get_survey(struct ieee80211_hw *hw, int idx, goto exit; } - ath10k_mac_update_bss_chan_survey(ar, survey->channel); + ath10k_mac_update_bss_chan_survey(ar, &sband->channels[idx]); spin_lock_bh(&ar->data_lock); memcpy(survey, ar_survey, sizeof(*survey)); From b5182e157d3a1d94d7ee6b4f4cb8267f9d7ad606 Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Sat, 16 Apr 2016 10:54:37 -0400 Subject: [PATCH 0847/1715] ath9k: remove repetitions of mask array size The constant "123", which is the number of elements in mask_m / mask_p, is repeated several times in this function. Replace memsets with array initialization, and replace a loop conditional with ARRAY_SIZE() so that we don't repeat ourselves. Signed-off-by: Bob Copeland Reviewed-by: Oleksij Rempel Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath9k/ar5008_phy.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/ar5008_phy.c b/drivers/net/wireless/ath/ath9k/ar5008_phy.c index 1b271b99c49e..8eea8d22e72e 100644 --- a/drivers/net/wireless/ath/ath9k/ar5008_phy.c +++ b/drivers/net/wireless/ath/ath9k/ar5008_phy.c @@ -260,8 +260,8 @@ void ar5008_hw_cmn_spur_mitigate(struct ath_hw *ah, int cur_bin; int upper, lower, cur_vit_mask; int i; - int8_t mask_m[123]; - int8_t mask_p[123]; + int8_t mask_m[123] = {0}; + int8_t mask_p[123] = {0}; int8_t mask_amt; int tmp_mask; static const int pilot_mask_reg[4] = { @@ -274,9 +274,6 @@ void ar5008_hw_cmn_spur_mitigate(struct ath_hw *ah, }; static const int inc[4] = { 0, 100, 0, 0 }; - memset(&mask_m, 0, sizeof(int8_t) * 123); - memset(&mask_p, 0, sizeof(int8_t) * 123); - cur_bin = -6000; upper = bin + 100; lower = bin - 100; @@ -302,7 +299,7 @@ void ar5008_hw_cmn_spur_mitigate(struct ath_hw *ah, upper = bin + 120; lower = bin - 120; - for (i = 0; i < 123; i++) { + for (i = 0; i < ARRAY_SIZE(mask_m); i++) { if ((cur_vit_mask > lower) && (cur_vit_mask < upper)) { /* workaround for gcc bug #37014 */ volatile int tmp_v = abs(cur_vit_mask - bin); From f3fa63144482d6dffd26d8b0a94b06a55d22d940 Mon Sep 17 00:00:00 2001 From: Dan Kephart Date: Wed, 3 Aug 2016 16:43:43 -0400 Subject: [PATCH 0848/1715] ath6kl: enable firmware crash dumps on the AR6004 The firmware crash dumps on the 6004 are the same as the 6003. Remove the statement guarding it from dumping on the 6004. Renamed the REG_DUMP_COUNT_AR6003 to reflect support on both chips. Signed-off-by: Dan Kephart Reviewed-by: Steve deRosier Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath6kl/hif.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/ath/ath6kl/hif.c b/drivers/net/wireless/ath/ath6kl/hif.c index 18c070850a09..d1942537ea10 100644 --- a/drivers/net/wireless/ath/ath6kl/hif.c +++ b/drivers/net/wireless/ath/ath6kl/hif.c @@ -64,7 +64,7 @@ int ath6kl_hif_rw_comp_handler(void *context, int status) } EXPORT_SYMBOL(ath6kl_hif_rw_comp_handler); -#define REG_DUMP_COUNT_AR6003 60 +#define REGISTER_DUMP_COUNT 60 #define REGISTER_DUMP_LEN_MAX 60 static void ath6kl_hif_dump_fw_crash(struct ath6kl *ar) @@ -73,9 +73,6 @@ static void ath6kl_hif_dump_fw_crash(struct ath6kl *ar) u32 i, address, regdump_addr = 0; int ret; - if (ar->target_type != TARGET_TYPE_AR6003) - return; - /* the reg dump pointer is copied to the host interest area */ address = ath6kl_get_hi_item_addr(ar, HI_ITEM(hi_failure_state)); address = TARG_VTOP(ar->target_type, address); @@ -95,7 +92,7 @@ static void ath6kl_hif_dump_fw_crash(struct ath6kl *ar) /* fetch register dump data */ ret = ath6kl_diag_read(ar, regdump_addr, (u8 *)®dump_val[0], - REG_DUMP_COUNT_AR6003 * (sizeof(u32))); + REGISTER_DUMP_COUNT * (sizeof(u32))); if (ret) { ath6kl_warn("failed to get register dump: %d\n", ret); return; @@ -105,9 +102,9 @@ static void ath6kl_hif_dump_fw_crash(struct ath6kl *ar) ath6kl_info("hw 0x%x fw %s\n", ar->wiphy->hw_version, ar->wiphy->fw_version); - BUILD_BUG_ON(REG_DUMP_COUNT_AR6003 % 4); + BUILD_BUG_ON(REGISTER_DUMP_COUNT % 4); - for (i = 0; i < REG_DUMP_COUNT_AR6003; i += 4) { + for (i = 0; i < REGISTER_DUMP_COUNT; i += 4) { ath6kl_info("%d: 0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", i, le32_to_cpu(regdump_val[i]), From cabd34d0e9c47ac8747e7a1d871e10acdb8e980f Mon Sep 17 00:00:00 2001 From: Eric Bentley Date: Tue, 30 Aug 2016 02:16:19 -0400 Subject: [PATCH 0849/1715] ath6kl: Allow the radio to report 0 dbm txpower without timing out The ath6kl driver attempts to get the txpower value from the radio by first clearing the existing stored value and then watching the value to become non-zero. APs allow setting client power to values from -127..127, but this radio is not capable of setting values less then 0 and so will report txpower as 0dbm for both negative and 0 client power values. When the radio has txpower set to 0dbm txpower (equivalent to 1mw) the ath6kl_cfg80211_get_txpower() function will remain in the wait_event_interruptible_timeout() loop waiting for the value to be non-zero, and will eventually timeout. This results in a 5 second delay in response. However, the correct value of zero is eventually returned. The 6004 defaults to 63dbm which is then limited by regulatory and hardware limits with max of 18dbm (6003 max is 16dbm), therefore we can use values larger then these to be able to determine when the value has been updated. To correct the issue, set the value to a nonsensical value (255) and wait for it to change to the valid value. Tested on both 6003 and 6004 based radios. Return value of zero is correctly returned in an expected amount of time (similar to when returning non-zero values) when AP client power is set to both 0 and negative values. Signed-off-by: Eric Bentley Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath6kl/cfg80211.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c index 72e2ec67768d..b7fe0af4cb24 100644 --- a/drivers/net/wireless/ath/ath6kl/cfg80211.c +++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c @@ -1449,14 +1449,14 @@ static int ath6kl_cfg80211_get_txpower(struct wiphy *wiphy, return -EIO; if (test_bit(CONNECTED, &vif->flags)) { - ar->tx_pwr = 0; + ar->tx_pwr = 255; if (ath6kl_wmi_get_tx_pwr_cmd(ar->wmi, vif->fw_vif_idx) != 0) { ath6kl_err("ath6kl_wmi_get_tx_pwr_cmd failed\n"); return -EIO; } - wait_event_interruptible_timeout(ar->event_wq, ar->tx_pwr != 0, + wait_event_interruptible_timeout(ar->event_wq, ar->tx_pwr != 255, 5 * HZ); if (signal_pending(current)) { From 78a9e170388b672f609cb6e8e097e0ddca24e6f5 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Thu, 18 Aug 2016 15:12:05 +0200 Subject: [PATCH 0850/1715] carl9170: Fix wrong completion usage carl9170_usb_stop() is used from several places to flush and cleanup any pending work. The normal pattern is to send a request and wait for the irq handler to call complete(). The completion is not reinitialized during normal operation and as the old comment indicates it is important to keep calls to wait_for_completion_timeout() and complete() balanced. Calling complete_all() brings this equilibirum out of balance and needs to be fixed by a reinit_completion(). But that opens a small race window. It is possible that the sequence of complete_all(), reinit_completion() is faster than the wait_for_completion_timeout() can do its work. The wake up is not lost but the done counter test is after reinit_completion() has been executed. The only reason we don't see carl9170_exec_cmd() hang forever is we use the timeout version of wait_for_copletion(). Let's fix this by reinitializing the completion (that is just setting done counter to 0) just before we send out an request. Now, carl9170_usb_stop() can be sure a complete() call is enough to make progess since there is only one waiter at max. This is a common pattern also seen in various drivers which use completion. Signed-off-by: Daniel Wagner Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/carl9170/usb.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/ath/carl9170/usb.c b/drivers/net/wireless/ath/carl9170/usb.c index 76842e6ca38e..99ab20334d21 100644 --- a/drivers/net/wireless/ath/carl9170/usb.c +++ b/drivers/net/wireless/ath/carl9170/usb.c @@ -670,6 +670,7 @@ int carl9170_exec_cmd(struct ar9170 *ar, const enum carl9170_cmd_oids cmd, ar->readlen = outlen; spin_unlock_bh(&ar->cmd_lock); + reinit_completion(&ar->cmd_wait); err = __carl9170_exec_cmd(ar, &ar->cmd, false); if (!(cmd & CARL9170_CMD_ASYNC_FLAG)) { @@ -778,10 +779,7 @@ void carl9170_usb_stop(struct ar9170 *ar) spin_lock_bh(&ar->cmd_lock); ar->readlen = 0; spin_unlock_bh(&ar->cmd_lock); - complete_all(&ar->cmd_wait); - - /* This is required to prevent an early completion on _start */ - reinit_completion(&ar->cmd_wait); + complete(&ar->cmd_wait); /* * Note: From fe01111d23810c0cf6830ce5af1c14c6d3df6dc5 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Tue, 6 Sep 2016 22:33:37 +0800 Subject: [PATCH 0851/1715] netfilter: nft_queue: check the validation of queues_total and queuenum Although the validation of queues_total and queuenum is checked in nft utility, but user can add nft rules via nfnetlink, so it is necessary to check the validation at the nft_queue expr init routine too. Tested by run ./nft-test.py any/queue.t: any/queue.t: 6 unit tests, 0 error, 0 warning Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_queue.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c index 61d216eb7917..d16d59959ff6 100644 --- a/net/netfilter/nft_queue.c +++ b/net/netfilter/nft_queue.c @@ -65,6 +65,7 @@ static int nft_queue_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_queue *priv = nft_expr_priv(expr); + u32 maxid; if (tb[NFTA_QUEUE_NUM] == NULL) return -EINVAL; @@ -74,6 +75,16 @@ static int nft_queue_init(const struct nft_ctx *ctx, if (tb[NFTA_QUEUE_TOTAL] != NULL) priv->queues_total = ntohs(nla_get_be16(tb[NFTA_QUEUE_TOTAL])); + else + priv->queues_total = 1; + + if (priv->queues_total == 0) + return -EINVAL; + + maxid = priv->queues_total - 1 + priv->queuenum; + if (maxid > U16_MAX) + return -ERANGE; + if (tb[NFTA_QUEUE_FLAGS] != NULL) { priv->flags = ntohs(nla_get_be16(tb[NFTA_QUEUE_FLAGS])); if (priv->flags & ~NFT_QUEUE_FLAG_MASK) From 3c15b8e112d7351b2586c649a759318548523364 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Tue, 6 Sep 2016 22:35:47 +0800 Subject: [PATCH 0852/1715] netfilter: nf_conntrack: remove unused ctl_table_path member in nf_conntrack_l3proto After commit adf0516845bc ("netfilter: remove ip_conntrack* sysctl compat code"), ctl_table_path member in struct nf_conntrack_l3proto{} is not used anymore, remove it. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l3proto.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index cdc920b4c4c2..8992e4229da9 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -63,10 +63,6 @@ struct nf_conntrack_l3proto { size_t nla_size; -#ifdef CONFIG_SYSCTL - const char *ctl_table_path; -#endif /* CONFIG_SYSCTL */ - /* Init l3proto pernet data */ int (*init_net)(struct net *net); From b93e1fa7106582e3a81cc818b719e0341585ff1b Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 7 Sep 2016 17:20:46 +0200 Subject: [PATCH 0853/1715] ipv4: fix value of ->nlmsg_flags reported in RTM_NEWROUTE events fib_table_insert() inconsistently fills the nlmsg_flags field in its notification messages. Since commit b8f558313506 ("[RTNETLINK]: Fix sending netlink message when replace route."), the netlink message has its nlmsg_flags set to NLM_F_REPLACE if the route replaced a preexisting one. Then commit a2bb6d7d6f42 ("ipv4: include NLM_F_APPEND flag in append route notifications") started setting nlmsg_flags to NLM_F_APPEND if the route matched a preexisting one but was appended. In other cases (exclusive creation or prepend), nlmsg_flags is 0. This patch sets ->nlmsg_flags in all situations, preserving the semantic of the NLM_F_* bits: * NLM_F_CREATE: a new fib entry has been created for this route. * NLM_F_EXCL: no other fib entry existed for this route. * NLM_F_REPLACE: this route has overwritten a preexisting fib entry. * NLM_F_APPEND: the new fib entry was added after other entries for the same route. As a result, the possible flag combination can now be reported (iproute2's terminology into parentheses): * NLM_F_CREATE | NLM_F_EXCL: route didn't exist, exclusive creation ("add"). * NLM_F_CREATE | NLM_F_APPEND: route did already exist, new route added after preexisting ones ("append"). * NLM_F_CREATE: route did already exist, new route added before preexisting ones ("prepend"). * NLM_F_REPLACE: route did already exist, new route replaced the first preexisting one ("change"). Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index e2ffc2a5c7db..241f27bbd7ad 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1081,7 +1081,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) struct trie *t = (struct trie *)tb->tb_data; struct fib_alias *fa, *new_fa; struct key_vector *l, *tp; - unsigned int nlflags = 0; + u16 nlflags = NLM_F_EXCL; struct fib_info *fi; u8 plen = cfg->fc_dst_len; u8 slen = KEYLENGTH - plen; @@ -1126,6 +1126,8 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) if (cfg->fc_nlflags & NLM_F_EXCL) goto out; + nlflags &= ~NLM_F_EXCL; + /* We have 2 goals: * 1. Find exact match for type, scope, fib_info to avoid * duplicate routes @@ -1151,6 +1153,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) struct fib_info *fi_drop; u8 state; + nlflags |= NLM_F_REPLACE; fa = fa_first; if (fa_match) { if (fa == fa_match) @@ -1191,7 +1194,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) if (state & FA_S_ACCESSED) rt_cache_flush(cfg->fc_nlinfo.nl_net); rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, - tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE); + tb->tb_id, &cfg->fc_nlinfo, nlflags); goto succeeded; } @@ -1203,7 +1206,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) goto out; if (cfg->fc_nlflags & NLM_F_APPEND) - nlflags = NLM_F_APPEND; + nlflags |= NLM_F_APPEND; else fa = fa_first; } @@ -1211,6 +1214,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) if (!(cfg->fc_nlflags & NLM_F_CREATE)) goto out; + nlflags |= NLM_F_CREATE; err = -ENOBUFS; new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); if (!new_fa) From 73483c1289d148282be3aac3ad30b4aa1f8fac87 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 7 Sep 2016 17:21:40 +0200 Subject: [PATCH 0854/1715] ipv6: report NLM_F_CREATE and NLM_F_EXCL flags in RTM_NEWROUTE events Since commit 37a1d3611c12 ("ipv6: include NLM_F_REPLACE in route replace notifications"), RTM_NEWROUTE notifications have their NLM_F_REPLACE flag set if the new route replaced a preexisting one. However, other flags aren't set. This patch reports the missing NLM_F_CREATE and NLM_F_EXCL flag bits. NLM_F_APPEND is not reported, because in ipv6 a NLM_F_CREATE request is interpreted as an append request (contrary to ipv4, "prepend" is not supported, so if NLM_F_EXCL is not set then NLM_F_APPEND is implicit). As a result, the possible flag combination can now be reported (iproute2's terminology into parentheses): * NLM_F_CREATE | NLM_F_EXCL: route didn't exist, exclusive creation ("add"). * NLM_F_CREATE: route did already exist, new route added after preexisting ones ("append"). * NLM_F_REPLACE: route did already exist, new route replaced the first preexisting one ("change"). Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 771be1fa4176..ef5485204522 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -743,6 +743,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, (info->nlh->nlmsg_flags & NLM_F_CREATE)); int found = 0; bool rt_can_ecmp = rt6_qualify_for_ecmp(rt); + u16 nlflags = NLM_F_EXCL; int err; ins = &fn->leaf; @@ -759,6 +760,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, if (info->nlh && (info->nlh->nlmsg_flags & NLM_F_EXCL)) return -EEXIST; + + nlflags &= ~NLM_F_EXCL; if (replace) { if (rt_can_ecmp == rt6_qualify_for_ecmp(iter)) { found++; @@ -856,6 +859,7 @@ next_iter: pr_warn("NLM_F_CREATE should be set when creating new route\n"); add: + nlflags |= NLM_F_CREATE; err = fib6_commit_metrics(&rt->dst, mxc); if (err) return err; @@ -864,7 +868,7 @@ add: *ins = rt; rt->rt6i_node = fn; atomic_inc(&rt->rt6i_ref); - inet6_rt_notify(RTM_NEWROUTE, rt, info, 0); + inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags); info->nl_net->ipv6.rt6_stats->fib_rt_entries++; if (!(fn->fn_flags & RTN_RTINFO)) { From b8b867e132d2c32f16a49b3ce5c11ee289a92c4e Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 7 Sep 2016 13:57:36 -0700 Subject: [PATCH 0855/1715] rtnetlink: remove unused ifla_stats_policy This structure is defined but never used. Flagged with W=1 Signed-off-by: Stephen Hemminger Acked-by: Roopa Prabhu Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 1dfca1c3f8f5..937e459bdaa9 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3669,10 +3669,6 @@ nla_put_failure: return -EMSGSIZE; } -static const struct nla_policy ifla_stats_policy[IFLA_STATS_MAX + 1] = { - [IFLA_STATS_LINK_64] = { .len = sizeof(struct rtnl_link_stats64) }, -}; - static size_t if_nlmsg_stats_size(const struct net_device *dev, u32 filter_mask) { From c24acf03c7352bd10a99e58b0366b2acf8722856 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 7 Sep 2016 14:07:32 -0700 Subject: [PATCH 0856/1715] macsec: set network devtype The netdevice type structure for macsec was being defined but never used. To set the network device type the macro SET_NETDEV_DEVTYPE must be called. Compile tested only, I don't use macsec. Signed-off-by: Stephen Hemminger Acked-by: Sabrina Dubroca Signed-off-by: David S. Miller --- drivers/net/macsec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 351e701eb043..3ea47f28e143 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -2973,6 +2973,7 @@ static void macsec_setup(struct net_device *dev) dev->priv_flags |= IFF_NO_QUEUE; dev->netdev_ops = &macsec_netdev_ops; dev->destructor = macsec_free_netdev; + SET_NETDEV_DEVTYPE(dev, &macsec_type); eth_zero_addr(dev->broadcast); } From 05f1b12f71a49848730a0eb9acda032d5c43432b Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 8 Sep 2016 08:42:06 +0100 Subject: [PATCH 0857/1715] net: x25: remove null checks on arrays calling_ae and called_ae dtefacs.calling_ae and called_ae are both 20 element __u8 arrays and cannot be null and hence are redundant checks. Remove these. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- net/x25/af_x25.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index a750f330b8dd..f83b74d3e2ac 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1500,12 +1500,8 @@ out_fac_release: goto out_dtefac_release; if (dtefacs.calling_len > X25_MAX_AE_LEN) goto out_dtefac_release; - if (dtefacs.calling_ae == NULL) - goto out_dtefac_release; if (dtefacs.called_len > X25_MAX_AE_LEN) goto out_dtefac_release; - if (dtefacs.called_ae == NULL) - goto out_dtefac_release; x25->dte_facilities = dtefacs; rc = 0; out_dtefac_release: From 4ffd03f5e47d18e06543f585d71a5540e7e61f0e Mon Sep 17 00:00:00 2001 From: Raju Lakkaraju Date: Thu, 8 Sep 2016 14:09:31 +0530 Subject: [PATCH 0858/1715] net: phy: Fixed checkpatch errors for Microsemi PHYs. The existing VSC85xx PHY driver did not follow the coding style and caused "checkpatch" to complain. This commit fixes this. Signed-off-by: Raju Lakkaraju Signed-off-by: David S. Miller --- drivers/net/phy/Kconfig | 6 +- drivers/net/phy/mscc.c | 174 ++++++++++++++++++++-------------------- 2 files changed, 90 insertions(+), 90 deletions(-) diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 1c3e07c3d0b8..87b566f54cc1 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -274,9 +274,9 @@ config MICROCHIP_PHY Supports the LAN88XX PHYs. config MICROSEMI_PHY - tristate "Microsemi PHYs" - ---help--- - Currently supports the VSC8531 and VSC8541 PHYs + tristate "Microsemi PHYs" + ---help--- + Currently supports the VSC8531 and VSC8541 PHYs config NATIONAL_PHY tristate "National Semiconductor PHYs" diff --git a/drivers/net/phy/mscc.c b/drivers/net/phy/mscc.c index ad33390b382a..c09cc4a3d166 100644 --- a/drivers/net/phy/mscc.c +++ b/drivers/net/phy/mscc.c @@ -13,135 +13,135 @@ #include enum rgmii_rx_clock_delay { - RGMII_RX_CLK_DELAY_0_2_NS = 0, - RGMII_RX_CLK_DELAY_0_8_NS = 1, - RGMII_RX_CLK_DELAY_1_1_NS = 2, - RGMII_RX_CLK_DELAY_1_7_NS = 3, - RGMII_RX_CLK_DELAY_2_0_NS = 4, - RGMII_RX_CLK_DELAY_2_3_NS = 5, - RGMII_RX_CLK_DELAY_2_6_NS = 6, - RGMII_RX_CLK_DELAY_3_4_NS = 7 + RGMII_RX_CLK_DELAY_0_2_NS = 0, + RGMII_RX_CLK_DELAY_0_8_NS = 1, + RGMII_RX_CLK_DELAY_1_1_NS = 2, + RGMII_RX_CLK_DELAY_1_7_NS = 3, + RGMII_RX_CLK_DELAY_2_0_NS = 4, + RGMII_RX_CLK_DELAY_2_3_NS = 5, + RGMII_RX_CLK_DELAY_2_6_NS = 6, + RGMII_RX_CLK_DELAY_3_4_NS = 7 }; -#define MII_VSC85XX_INT_MASK 25 -#define MII_VSC85XX_INT_MASK_MASK 0xa000 -#define MII_VSC85XX_INT_STATUS 26 +#define MII_VSC85XX_INT_MASK 25 +#define MII_VSC85XX_INT_MASK_MASK 0xa000 +#define MII_VSC85XX_INT_STATUS 26 -#define MSCC_EXT_PAGE_ACCESS 31 -#define MSCC_PHY_PAGE_STANDARD 0x0000 /* Standard registers */ -#define MSCC_PHY_PAGE_EXTENDED_2 0x0002 /* Extended reg - page 2 */ +#define MSCC_EXT_PAGE_ACCESS 31 +#define MSCC_PHY_PAGE_STANDARD 0x0000 /* Standard registers */ +#define MSCC_PHY_PAGE_EXTENDED_2 0x0002 /* Extended reg - page 2 */ /* Extended Page 2 Registers */ -#define MSCC_PHY_RGMII_CNTL 20 -#define RGMII_RX_CLK_DELAY_MASK 0x0070 -#define RGMII_RX_CLK_DELAY_POS 4 +#define MSCC_PHY_RGMII_CNTL 20 +#define RGMII_RX_CLK_DELAY_MASK 0x0070 +#define RGMII_RX_CLK_DELAY_POS 4 /* Microsemi PHY ID's */ -#define PHY_ID_VSC8531 0x00070570 -#define PHY_ID_VSC8541 0x00070770 +#define PHY_ID_VSC8531 0x00070570 +#define PHY_ID_VSC8541 0x00070770 static int vsc85xx_phy_page_set(struct phy_device *phydev, u8 page) { - int rc; + int rc; - rc = phy_write(phydev, MSCC_EXT_PAGE_ACCESS, page); - return rc; + rc = phy_write(phydev, MSCC_EXT_PAGE_ACCESS, page); + return rc; } static int vsc85xx_default_config(struct phy_device *phydev) { - int rc; - u16 reg_val; + int rc; + u16 reg_val; - mutex_lock(&phydev->lock); - rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_EXTENDED_2); - if (rc != 0) - goto out_unlock; + mutex_lock(&phydev->lock); + rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_EXTENDED_2); + if (rc != 0) + goto out_unlock; - reg_val = phy_read(phydev, MSCC_PHY_RGMII_CNTL); - reg_val &= ~(RGMII_RX_CLK_DELAY_MASK); - reg_val |= (RGMII_RX_CLK_DELAY_1_1_NS << RGMII_RX_CLK_DELAY_POS); - phy_write(phydev, MSCC_PHY_RGMII_CNTL, reg_val); - rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_STANDARD); + reg_val = phy_read(phydev, MSCC_PHY_RGMII_CNTL); + reg_val &= ~(RGMII_RX_CLK_DELAY_MASK); + reg_val |= (RGMII_RX_CLK_DELAY_1_1_NS << RGMII_RX_CLK_DELAY_POS); + phy_write(phydev, MSCC_PHY_RGMII_CNTL, reg_val); + rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_STANDARD); out_unlock: - mutex_unlock(&phydev->lock); + mutex_unlock(&phydev->lock); - return rc; + return rc; } static int vsc85xx_config_init(struct phy_device *phydev) { - int rc; + int rc; - rc = vsc85xx_default_config(phydev); - if (rc) - return rc; - rc = genphy_config_init(phydev); + rc = vsc85xx_default_config(phydev); + if (rc) + return rc; + rc = genphy_config_init(phydev); - return rc; + return rc; } static int vsc85xx_ack_interrupt(struct phy_device *phydev) { - int rc = 0; + int rc = 0; - if (phydev->interrupts == PHY_INTERRUPT_ENABLED) - rc = phy_read(phydev, MII_VSC85XX_INT_STATUS); + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) + rc = phy_read(phydev, MII_VSC85XX_INT_STATUS); - return (rc < 0) ? rc : 0; + return (rc < 0) ? rc : 0; } static int vsc85xx_config_intr(struct phy_device *phydev) { - int rc; + int rc; - if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { - rc = phy_write(phydev, MII_VSC85XX_INT_MASK, - MII_VSC85XX_INT_MASK_MASK); - } else { - rc = phy_write(phydev, MII_VSC85XX_INT_MASK, 0); - if (rc < 0) - return rc; - rc = phy_read(phydev, MII_VSC85XX_INT_STATUS); - } + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { + rc = phy_write(phydev, MII_VSC85XX_INT_MASK, + MII_VSC85XX_INT_MASK_MASK); + } else { + rc = phy_write(phydev, MII_VSC85XX_INT_MASK, 0); + if (rc < 0) + return rc; + rc = phy_read(phydev, MII_VSC85XX_INT_STATUS); + } - return rc; + return rc; } /* Microsemi VSC85xx PHYs */ static struct phy_driver vsc85xx_driver[] = { { - .phy_id = PHY_ID_VSC8531, - .name = "Microsemi VSC8531", - .phy_id_mask = 0xfffffff0, - .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, - .soft_reset = &genphy_soft_reset, - .config_init = &vsc85xx_config_init, - .config_aneg = &genphy_config_aneg, - .aneg_done = &genphy_aneg_done, - .read_status = &genphy_read_status, - .ack_interrupt = &vsc85xx_ack_interrupt, - .config_intr = &vsc85xx_config_intr, - .suspend = &genphy_suspend, - .resume = &genphy_resume, + .phy_id = PHY_ID_VSC8531, + .name = "Microsemi VSC8531", + .phy_id_mask = 0xfffffff0, + .features = PHY_GBIT_FEATURES, + .flags = PHY_HAS_INTERRUPT, + .soft_reset = &genphy_soft_reset, + .config_init = &vsc85xx_config_init, + .config_aneg = &genphy_config_aneg, + .aneg_done = &genphy_aneg_done, + .read_status = &genphy_read_status, + .ack_interrupt = &vsc85xx_ack_interrupt, + .config_intr = &vsc85xx_config_intr, + .suspend = &genphy_suspend, + .resume = &genphy_resume, }, { - .phy_id = PHY_ID_VSC8541, - .name = "Microsemi VSC8541 SyncE", - .phy_id_mask = 0xfffffff0, - .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, - .soft_reset = &genphy_soft_reset, - .config_init = &vsc85xx_config_init, - .config_aneg = &genphy_config_aneg, - .aneg_done = &genphy_aneg_done, - .read_status = &genphy_read_status, - .ack_interrupt = &vsc85xx_ack_interrupt, - .config_intr = &vsc85xx_config_intr, - .suspend = &genphy_suspend, - .resume = &genphy_resume, + .phy_id = PHY_ID_VSC8541, + .name = "Microsemi VSC8541 SyncE", + .phy_id_mask = 0xfffffff0, + .features = PHY_GBIT_FEATURES, + .flags = PHY_HAS_INTERRUPT, + .soft_reset = &genphy_soft_reset, + .config_init = &vsc85xx_config_init, + .config_aneg = &genphy_config_aneg, + .aneg_done = &genphy_aneg_done, + .read_status = &genphy_read_status, + .ack_interrupt = &vsc85xx_ack_interrupt, + .config_intr = &vsc85xx_config_intr, + .suspend = &genphy_suspend, + .resume = &genphy_resume, } }; @@ -149,9 +149,9 @@ static struct phy_driver vsc85xx_driver[] = { module_phy_driver(vsc85xx_driver); static struct mdio_device_id __maybe_unused vsc85xx_tbl[] = { - { PHY_ID_VSC8531, 0xfffffff0, }, - { PHY_ID_VSC8541, 0xfffffff0, }, - { } + { PHY_ID_VSC8531, 0xfffffff0, }, + { PHY_ID_VSC8541, 0xfffffff0, }, + { } }; MODULE_DEVICE_TABLE(mdio, vsc85xx_tbl); From 9438451e7325815fb38db04b1da0670ecc601b5e Mon Sep 17 00:00:00 2001 From: Baoyou Xie Date: Thu, 8 Sep 2016 16:43:23 +0800 Subject: [PATCH 0859/1715] qede: mark qede_set_features() static We get 1 warning when building kernel with W=1: drivers/net/ethernet/qlogic/qede/qede_main.c:2113:5: warning: no previous prototype for 'qede_set_features' [-Wmissing-prototypes] In fact, this function is only used in the file in which it is declared and don't need a declaration, but can be made static. so this patch marks this function with 'static'. Signed-off-by: Baoyou Xie Acked-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index b4a56e61631a..578bbec1ce18 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -2116,7 +2116,7 @@ static void qede_vlan_mark_nonconfigured(struct qede_dev *edev) edev->accept_any_vlan = false; } -int qede_set_features(struct net_device *dev, netdev_features_t features) +static int qede_set_features(struct net_device *dev, netdev_features_t features) { struct qede_dev *edev = netdev_priv(dev); netdev_features_t changes = features ^ dev->features; From 46dfc23e9e0823616abee670cd24acde0d900ca9 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 8 Sep 2016 10:04:24 +0100 Subject: [PATCH 0860/1715] via-velocity: remove null pointer check on array tdinfo->skb_dma tdinfo->skb_dma is a 7 element array of dma_addr_t hence cannot be null, so the pull pointer check on tdinfo->skb_dma is redundant. Remove it. Signed-off-by: Colin Ian King Acked-by: Francois Romieu Signed-off-by: David S. Miller --- drivers/net/ethernet/via/via-velocity.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/via/via-velocity.c b/drivers/net/ethernet/via/via-velocity.c index f38696ceee74..908e72e18ef7 100644 --- a/drivers/net/ethernet/via/via-velocity.c +++ b/drivers/net/ethernet/via/via-velocity.c @@ -1724,24 +1724,21 @@ static void velocity_free_tx_buf(struct velocity_info *vptr, struct velocity_td_info *tdinfo, struct tx_desc *td) { struct sk_buff *skb = tdinfo->skb; + int i; /* * Don't unmap the pre-allocated tx_bufs */ - if (tdinfo->skb_dma) { - int i; + for (i = 0; i < tdinfo->nskb_dma; i++) { + size_t pktlen = max_t(size_t, skb->len, ETH_ZLEN); - for (i = 0; i < tdinfo->nskb_dma; i++) { - size_t pktlen = max_t(size_t, skb->len, ETH_ZLEN); + /* For scatter-gather */ + if (skb_shinfo(skb)->nr_frags > 0) + pktlen = max_t(size_t, pktlen, + td->td_buf[i].size & ~TD_QUEUE); - /* For scatter-gather */ - if (skb_shinfo(skb)->nr_frags > 0) - pktlen = max_t(size_t, pktlen, - td->td_buf[i].size & ~TD_QUEUE); - - dma_unmap_single(vptr->dev, tdinfo->skb_dma[i], - le16_to_cpu(pktlen), DMA_TO_DEVICE); - } + dma_unmap_single(vptr->dev, tdinfo->skb_dma[i], + le16_to_cpu(pktlen), DMA_TO_DEVICE); } dev_kfree_skb_irq(skb); tdinfo->skb = NULL; From d9e6620c8ee108f068cd703b3b82d9a8d38c1ada Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Thu, 8 Sep 2016 14:20:17 +0200 Subject: [PATCH 0861/1715] ATM-ENI: Use kmalloc_array() in eni_start() * A multiplication for the size determination of a memory allocation indicated that an array data structure should be processed. Thus use the corresponding function "kmalloc_array". This issue was detected by using the Coccinelle software. * Replace the specification of a data structure by a pointer dereference to make the corresponding size determination a bit safer according to the Linux coding style convention. Signed-off-by: Markus Elfring Signed-off-by: David S. Miller --- drivers/atm/eni.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c index 6339efd32697..f2aaf9e32a36 100644 --- a/drivers/atm/eni.c +++ b/drivers/atm/eni.c @@ -1845,8 +1845,9 @@ static int eni_start(struct atm_dev *dev) /* initialize memory management */ buffer_mem = eni_dev->mem - (buf - eni_dev->ram); eni_dev->free_list_size = buffer_mem/MID_MIN_BUF_SIZE/2; - eni_dev->free_list = kmalloc( - sizeof(struct eni_free)*(eni_dev->free_list_size+1),GFP_KERNEL); + eni_dev->free_list = kmalloc_array(eni_dev->free_list_size + 1, + sizeof(*eni_dev->free_list), + GFP_KERNEL); if (!eni_dev->free_list) { printk(KERN_ERR DEV_LABEL "(itf %d): couldn't get free page\n", dev->number); From 2c4f414f0290e019d463f1f7f447807fd6a6470c Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Thu, 8 Sep 2016 15:43:37 +0200 Subject: [PATCH 0862/1715] ATM-ForeRunnerHE: Use kmalloc_array() in he_init_group() * Multiplications for the size determination of memory allocations indicated that array data structures should be processed. Thus use the corresponding function "kmalloc_array". This issue was detected by using the Coccinelle software. * Replace the specification of data types by pointer dereferences to make the corresponding size determination a bit safer according to the Linux coding style convention. Signed-off-by: Markus Elfring Signed-off-by: David S. Miller --- drivers/atm/he.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/atm/he.c b/drivers/atm/he.c index 0f5cb37636bc..31b513a23ae0 100644 --- a/drivers/atm/he.c +++ b/drivers/atm/he.c @@ -779,8 +779,9 @@ static int he_init_group(struct he_dev *he_dev, int group) G0_RBPS_BS + (group * 32)); /* bitmap table */ - he_dev->rbpl_table = kmalloc(BITS_TO_LONGS(RBPL_TABLE_SIZE) - * sizeof(unsigned long), GFP_KERNEL); + he_dev->rbpl_table = kmalloc_array(BITS_TO_LONGS(RBPL_TABLE_SIZE), + sizeof(*he_dev->rbpl_table), + GFP_KERNEL); if (!he_dev->rbpl_table) { hprintk("unable to allocate rbpl bitmap table\n"); return -ENOMEM; @@ -788,8 +789,9 @@ static int he_init_group(struct he_dev *he_dev, int group) bitmap_zero(he_dev->rbpl_table, RBPL_TABLE_SIZE); /* rbpl_virt 64-bit pointers */ - he_dev->rbpl_virt = kmalloc(RBPL_TABLE_SIZE - * sizeof(struct he_buff *), GFP_KERNEL); + he_dev->rbpl_virt = kmalloc_array(RBPL_TABLE_SIZE, + sizeof(*he_dev->rbpl_virt), + GFP_KERNEL); if (!he_dev->rbpl_virt) { hprintk("unable to allocate rbpl virt table\n"); goto out_free_rbpl_table; From bf8d85d4f907d2156e6e5d2831378527957d9bde Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 8 Sep 2016 15:40:48 -0700 Subject: [PATCH 0863/1715] ip_tunnel: do not clear l4 hashes If skb has a valid l4 hash, there is no point clearing hash and force a further flow dissection when a tunnel encapsulation is added. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 0f227db0e9ac..777bc1883870 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -69,7 +69,7 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, skb_scrub_packet(skb, xnet); - skb_clear_hash(skb); + skb_clear_hash_if_not_l4(skb); skb_dst_set(skb, &rt->dst); memset(IPCB(skb), 0, sizeof(*IPCB(skb))); From 6088b5823b4cb132a838878747384cbfb5ce6646 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 9 Sep 2016 02:45:28 +0200 Subject: [PATCH 0864/1715] bpf: minor cleanups in helpers Some minor misc cleanups, f.e. use sizeof(__u32) instead of hardcoding and in __bpf_skb_max_len(), I missed that we always have skb->dev valid anyway, so we can drop the unneeded test for dev; also few more other misc bits addressed here. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/helpers.c | 6 +++--- net/core/filter.c | 7 +++---- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 1ea3afba1a4f..6df73bd1ba34 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -126,7 +126,7 @@ static u64 bpf_get_current_pid_tgid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) { struct task_struct *task = current; - if (!task) + if (unlikely(!task)) return -EINVAL; return (u64) task->tgid << 32 | task->pid; @@ -144,12 +144,12 @@ static u64 bpf_get_current_uid_gid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) kuid_t uid; kgid_t gid; - if (!task) + if (unlikely(!task)) return -EINVAL; current_uid_gid(&uid, &gid); return (u64) from_kgid(&init_user_ns, gid) << 32 | - from_kuid(&init_user_ns, uid); + from_kuid(&init_user_ns, uid); } const struct bpf_func_proto bpf_get_current_uid_gid_proto = { diff --git a/net/core/filter.c b/net/core/filter.c index a83766be1ad2..628ed8c7d38d 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2010,8 +2010,7 @@ static u32 __bpf_skb_min_len(const struct sk_buff *skb) static u32 __bpf_skb_max_len(const struct sk_buff *skb) { - return skb->dev ? skb->dev->mtu + skb->dev->hard_header_len : - 65536; + return skb->dev->mtu + skb->dev->hard_header_len; } static int bpf_skb_grow_rcsum(struct sk_buff *skb, unsigned int new_len) @@ -2605,7 +2604,7 @@ static bool __is_valid_xdp_access(int off, int size, return false; if (off % size != 0) return false; - if (size != 4) + if (size != sizeof(__u32)) return false; return true; @@ -2727,7 +2726,7 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg, dst_reg, src_reg, insn); case offsetof(struct __sk_buff, cb[0]) ... - offsetof(struct __sk_buff, cb[4]): + offsetof(struct __sk_buff, cb[4]): BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20); prog->cb_access = 1; From f035a51536af9802f55d8c79bd87f184ebffb093 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 9 Sep 2016 02:45:29 +0200 Subject: [PATCH 0865/1715] bpf: add BPF_SIZEOF and BPF_FIELD_SIZEOF macros Add BPF_SIZEOF() and BPF_FIELD_SIZEOF() macros to improve the code a bit which otherwise often result in overly long bytes_to_bpf_size(sizeof()) and bytes_to_bpf_size(FIELD_SIZEOF()) lines. So place them into a macro helper instead. Moreover, we currently have a BUILD_BUG_ON(BPF_FIELD_SIZEOF()) check in convert_bpf_extensions(), but we should rather make that generic as well and add a BUILD_BUG_ON() test in all BPF_SIZEOF()/BPF_FIELD_SIZEOF() users to detect any rewriter size issues at compile time. Note, there are currently none, but we want to assert that it stays this way. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 14 ++++++++++++++ kernel/trace/bpf_trace.c | 12 ++++++------ net/core/filter.c | 15 +++++++-------- 3 files changed, 27 insertions(+), 14 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index a16439b99fd9..7fabad8dc3fc 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -314,6 +314,20 @@ struct bpf_prog_aux; bpf_size; \ }) +#define BPF_SIZEOF(type) \ + ({ \ + const int __size = bytes_to_bpf_size(sizeof(type)); \ + BUILD_BUG_ON(__size < 0); \ + __size; \ + }) + +#define BPF_FIELD_SIZEOF(type, field) \ + ({ \ + const int __size = bytes_to_bpf_size(FIELD_SIZEOF(type, field)); \ + BUILD_BUG_ON(__size < 0); \ + __size; \ + }) + #ifdef CONFIG_COMPAT /* A struct sock_filter is architecture independent. */ struct compat_sock_fprog { diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index d3869b03d9fe..e63d7d435796 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -583,18 +583,18 @@ static u32 pe_prog_convert_ctx_access(enum bpf_access_type type, int dst_reg, switch (ctx_off) { case offsetof(struct bpf_perf_event_data, sample_period): BUILD_BUG_ON(FIELD_SIZEOF(struct perf_sample_data, period) != sizeof(u64)); - *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct bpf_perf_event_data_kern, data)), - dst_reg, src_reg, + + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern, + data), dst_reg, src_reg, offsetof(struct bpf_perf_event_data_kern, data)); *insn++ = BPF_LDX_MEM(BPF_DW, dst_reg, dst_reg, offsetof(struct perf_sample_data, period)); break; default: - *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct bpf_perf_event_data_kern, regs)), - dst_reg, src_reg, + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern, + regs), dst_reg, src_reg, offsetof(struct bpf_perf_event_data_kern, regs)); - *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(sizeof(long)), - dst_reg, dst_reg, ctx_off); + *insn++ = BPF_LDX_MEM(BPF_SIZEOF(long), dst_reg, dst_reg, ctx_off); break; } diff --git a/net/core/filter.c b/net/core/filter.c index 628ed8c7d38d..120c813ef030 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -233,9 +233,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp, case SKF_AD_OFF + SKF_AD_HATYPE: BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2); - BUILD_BUG_ON(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)) < 0); - *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)), + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev), BPF_REG_TMP, BPF_REG_CTX, offsetof(struct sk_buff, dev)); /* if (tmp != 0) goto pc + 1 */ @@ -2685,7 +2684,7 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg, case offsetof(struct __sk_buff, ifindex): BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); - *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)), + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev), dst_reg, src_reg, offsetof(struct sk_buff, dev)); *insn++ = BPF_JMP_IMM(BPF_JEQ, dst_reg, 0, 1); @@ -2750,7 +2749,7 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg, break; case offsetof(struct __sk_buff, data): - *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, data)), + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data), dst_reg, src_reg, offsetof(struct sk_buff, data)); break; @@ -2759,8 +2758,8 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg, ctx_off -= offsetof(struct __sk_buff, data_end); ctx_off += offsetof(struct sk_buff, cb); ctx_off += offsetof(struct bpf_skb_data_end, data_end); - *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(sizeof(void *)), - dst_reg, src_reg, ctx_off); + *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), dst_reg, src_reg, + ctx_off); break; case offsetof(struct __sk_buff, tc_index): @@ -2795,12 +2794,12 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type, int dst_reg, switch (ctx_off) { case offsetof(struct xdp_md, data): - *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct xdp_buff, data)), + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data), dst_reg, src_reg, offsetof(struct xdp_buff, data)); break; case offsetof(struct xdp_md, data_end): - *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct xdp_buff, data_end)), + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_end), dst_reg, src_reg, offsetof(struct xdp_buff, data_end)); break; From 374fb54eeaaa6b2cb82bca73a11273687bb2a96a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 9 Sep 2016 02:45:30 +0200 Subject: [PATCH 0866/1715] bpf: add own ctx rewriter on ifindex for clsact progs When fetching ifindex, we don't need to test dev for being NULL since we're always guaranteed to have a valid dev for clsact programs. Thus, avoid this test in fast path. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/core/filter.c | 37 +++++++++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index 120c813ef030..d6d9bb89ce3a 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2634,10 +2634,10 @@ void bpf_warn_invalid_xdp_action(u32 act) } EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action); -static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg, - int src_reg, int ctx_off, - struct bpf_insn *insn_buf, - struct bpf_prog *prog) +static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg, + int src_reg, int ctx_off, + struct bpf_insn *insn_buf, + struct bpf_prog *prog) { struct bpf_insn *insn = insn_buf; @@ -2785,6 +2785,31 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg, return insn - insn_buf; } +static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type, int dst_reg, + int src_reg, int ctx_off, + struct bpf_insn *insn_buf, + struct bpf_prog *prog) +{ + struct bpf_insn *insn = insn_buf; + + switch (ctx_off) { + case offsetof(struct __sk_buff, ifindex): + BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); + + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev), + dst_reg, src_reg, + offsetof(struct sk_buff, dev)); + *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, dst_reg, + offsetof(struct net_device, ifindex)); + break; + default: + return sk_filter_convert_ctx_access(type, dst_reg, src_reg, + ctx_off, insn_buf, prog); + } + + return insn - insn_buf; +} + static u32 xdp_convert_ctx_access(enum bpf_access_type type, int dst_reg, int src_reg, int ctx_off, struct bpf_insn *insn_buf, @@ -2811,13 +2836,13 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type, int dst_reg, static const struct bpf_verifier_ops sk_filter_ops = { .get_func_proto = sk_filter_func_proto, .is_valid_access = sk_filter_is_valid_access, - .convert_ctx_access = bpf_net_convert_ctx_access, + .convert_ctx_access = sk_filter_convert_ctx_access, }; static const struct bpf_verifier_ops tc_cls_act_ops = { .get_func_proto = tc_cls_act_func_proto, .is_valid_access = tc_cls_act_is_valid_access, - .convert_ctx_access = bpf_net_convert_ctx_access, + .convert_ctx_access = tc_cls_act_convert_ctx_access, }; static const struct bpf_verifier_ops xdp_ops = { From f3694e00123802d688180e7ae90b240669910e3c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 9 Sep 2016 02:45:31 +0200 Subject: [PATCH 0867/1715] bpf: add BPF_CALL_x macros for declaring helpers This work adds BPF_CALL_() macros and converts all the eBPF helper functions to use them, in a similar fashion like we do with SYSCALL_DEFINE() macros that are used today. Motivation for this is to hide all the register handling and all necessary casts from the user, so that it is done automatically in the background when adding a BPF_CALL_() call. This makes current helpers easier to review, eases to write future helpers, avoids getting the casting mess wrong, and allows for extending all helpers at once (f.e. build time checks, etc). It also helps detecting more easily in code reviews that unused registers are not instrumented in the code by accident, breaking compatibility with existing programs. BPF_CALL_() internals are quite similar to SYSCALL_DEFINE() ones with some fundamental differences, for example, for generating the actual helper function that carries all u64 regs, we need to fill unused regs, so that we always end up with 5 u64 regs as an argument. I reviewed several 0-5 generated BPF_CALL_() variants of the .i results and they look all as expected. No sparse issue spotted. We let this also sit for a few days with Fengguang's kbuild test robot, and there were no issues seen. On s390, it barked on the "uses dynamic stack allocation" notice, which is an old one from bpf_perf_event_output{,_tp}() reappearing here due to the conversion to the call wrapper, just telling that the perf raw record/frag sits on stack (gcc with s390's -mwarn-dynamicstack), but that's all. Did various runtime tests and they were fine as well. All eBPF helpers are now converted to use these macros, getting rid of a good chunk of all the raw castings. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 50 +++++++++++++++ kernel/bpf/core.c | 2 +- kernel/bpf/helpers.c | 46 ++++---------- kernel/bpf/stackmap.c | 5 +- kernel/trace/bpf_trace.c | 75 +++++++++++------------ net/core/filter.c | 129 +++++++++++++++------------------------ 6 files changed, 149 insertions(+), 158 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index 7fabad8dc3fc..1f09c521adfe 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -328,6 +328,56 @@ struct bpf_prog_aux; __size; \ }) +#define __BPF_MAP_0(m, v, ...) v +#define __BPF_MAP_1(m, v, t, a, ...) m(t, a) +#define __BPF_MAP_2(m, v, t, a, ...) m(t, a), __BPF_MAP_1(m, v, __VA_ARGS__) +#define __BPF_MAP_3(m, v, t, a, ...) m(t, a), __BPF_MAP_2(m, v, __VA_ARGS__) +#define __BPF_MAP_4(m, v, t, a, ...) m(t, a), __BPF_MAP_3(m, v, __VA_ARGS__) +#define __BPF_MAP_5(m, v, t, a, ...) m(t, a), __BPF_MAP_4(m, v, __VA_ARGS__) + +#define __BPF_REG_0(...) __BPF_PAD(5) +#define __BPF_REG_1(...) __BPF_MAP(1, __VA_ARGS__), __BPF_PAD(4) +#define __BPF_REG_2(...) __BPF_MAP(2, __VA_ARGS__), __BPF_PAD(3) +#define __BPF_REG_3(...) __BPF_MAP(3, __VA_ARGS__), __BPF_PAD(2) +#define __BPF_REG_4(...) __BPF_MAP(4, __VA_ARGS__), __BPF_PAD(1) +#define __BPF_REG_5(...) __BPF_MAP(5, __VA_ARGS__) + +#define __BPF_MAP(n, ...) __BPF_MAP_##n(__VA_ARGS__) +#define __BPF_REG(n, ...) __BPF_REG_##n(__VA_ARGS__) + +#define __BPF_CAST(t, a) \ + (__force t) \ + (__force \ + typeof(__builtin_choose_expr(sizeof(t) == sizeof(unsigned long), \ + (unsigned long)0, (t)0))) a +#define __BPF_V void +#define __BPF_N + +#define __BPF_DECL_ARGS(t, a) t a +#define __BPF_DECL_REGS(t, a) u64 a + +#define __BPF_PAD(n) \ + __BPF_MAP(n, __BPF_DECL_ARGS, __BPF_N, u64, __ur_1, u64, __ur_2, \ + u64, __ur_3, u64, __ur_4, u64, __ur_5) + +#define BPF_CALL_x(x, name, ...) \ + static __always_inline \ + u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \ + u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)); \ + u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)) \ + { \ + return ____##name(__BPF_MAP(x,__BPF_CAST,__BPF_N,__VA_ARGS__));\ + } \ + static __always_inline \ + u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)) + +#define BPF_CALL_0(name, ...) BPF_CALL_x(0, name, __VA_ARGS__) +#define BPF_CALL_1(name, ...) BPF_CALL_x(1, name, __VA_ARGS__) +#define BPF_CALL_2(name, ...) BPF_CALL_x(2, name, __VA_ARGS__) +#define BPF_CALL_3(name, ...) BPF_CALL_x(3, name, __VA_ARGS__) +#define BPF_CALL_4(name, ...) BPF_CALL_x(4, name, __VA_ARGS__) +#define BPF_CALL_5(name, ...) BPF_CALL_x(5, name, __VA_ARGS__) + #ifdef CONFIG_COMPAT /* A struct sock_filter is architecture independent. */ struct compat_sock_fprog { diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 03fd23d4d587..7b7baaed9ed4 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1018,7 +1018,7 @@ void bpf_user_rnd_init_once(void) prandom_init_once(&bpf_user_rnd_state); } -u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_0(bpf_user_rnd_u32) { /* Should someone ever have the rather unwise idea to use some * of the registers passed into this function, then note that diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 6df73bd1ba34..a5b8bf8cfcfd 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -16,6 +16,7 @@ #include #include #include +#include /* If kernel subsystem is allowing eBPF programs to call this function, * inside its own verifier_ops->get_func_proto() callback it should return @@ -26,24 +27,10 @@ * if program is allowed to access maps, so check rcu_read_lock_held in * all three functions. */ -static u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key) { - /* verifier checked that R1 contains a valid pointer to bpf_map - * and R2 points to a program stack and map->key_size bytes were - * initialized - */ - struct bpf_map *map = (struct bpf_map *) (unsigned long) r1; - void *key = (void *) (unsigned long) r2; - void *value; - WARN_ON_ONCE(!rcu_read_lock_held()); - - value = map->ops->map_lookup_elem(map, key); - - /* lookup() returns either pointer to element value or NULL - * which is the meaning of PTR_TO_MAP_VALUE_OR_NULL type - */ - return (unsigned long) value; + return (unsigned long) map->ops->map_lookup_elem(map, key); } const struct bpf_func_proto bpf_map_lookup_elem_proto = { @@ -54,15 +41,11 @@ const struct bpf_func_proto bpf_map_lookup_elem_proto = { .arg2_type = ARG_PTR_TO_MAP_KEY, }; -static u64 bpf_map_update_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key, + void *, value, u64, flags) { - struct bpf_map *map = (struct bpf_map *) (unsigned long) r1; - void *key = (void *) (unsigned long) r2; - void *value = (void *) (unsigned long) r3; - WARN_ON_ONCE(!rcu_read_lock_held()); - - return map->ops->map_update_elem(map, key, value, r4); + return map->ops->map_update_elem(map, key, value, flags); } const struct bpf_func_proto bpf_map_update_elem_proto = { @@ -75,13 +58,9 @@ const struct bpf_func_proto bpf_map_update_elem_proto = { .arg4_type = ARG_ANYTHING, }; -static u64 bpf_map_delete_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key) { - struct bpf_map *map = (struct bpf_map *) (unsigned long) r1; - void *key = (void *) (unsigned long) r2; - WARN_ON_ONCE(!rcu_read_lock_held()); - return map->ops->map_delete_elem(map, key); } @@ -99,7 +78,7 @@ const struct bpf_func_proto bpf_get_prandom_u32_proto = { .ret_type = RET_INTEGER, }; -static u64 bpf_get_smp_processor_id(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_0(bpf_get_smp_processor_id) { return smp_processor_id(); } @@ -110,7 +89,7 @@ const struct bpf_func_proto bpf_get_smp_processor_id_proto = { .ret_type = RET_INTEGER, }; -static u64 bpf_ktime_get_ns(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_0(bpf_ktime_get_ns) { /* NMI safe access to clock monotonic */ return ktime_get_mono_fast_ns(); @@ -122,7 +101,7 @@ const struct bpf_func_proto bpf_ktime_get_ns_proto = { .ret_type = RET_INTEGER, }; -static u64 bpf_get_current_pid_tgid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_0(bpf_get_current_pid_tgid) { struct task_struct *task = current; @@ -138,7 +117,7 @@ const struct bpf_func_proto bpf_get_current_pid_tgid_proto = { .ret_type = RET_INTEGER, }; -static u64 bpf_get_current_uid_gid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_0(bpf_get_current_uid_gid) { struct task_struct *task = current; kuid_t uid; @@ -158,10 +137,9 @@ const struct bpf_func_proto bpf_get_current_uid_gid_proto = { .ret_type = RET_INTEGER, }; -static u64 bpf_get_current_comm(u64 r1, u64 size, u64 r3, u64 r4, u64 r5) +BPF_CALL_2(bpf_get_current_comm, char *, buf, u32, size) { struct task_struct *task = current; - char *buf = (char *) (long) r1; if (unlikely(!task)) goto err_clear; diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index bf4495fcd25d..732ae16d12b7 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -116,10 +116,9 @@ free_smap: return ERR_PTR(err); } -u64 bpf_get_stackid(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5) +BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map, + u64, flags) { - struct pt_regs *regs = (struct pt_regs *) (long) r1; - struct bpf_map *map = (struct bpf_map *) (long) r2; struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); struct perf_callchain_entry *trace; struct stack_map_bucket *bucket, *new_bucket, *old_bucket; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index e63d7d435796..5dcb99281259 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -61,11 +61,9 @@ unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx) } EXPORT_SYMBOL_GPL(trace_call_bpf); -static u64 bpf_probe_read(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr) { - void *dst = (void *) (long) r1; - int ret, size = (int) r2; - void *unsafe_ptr = (void *) (long) r3; + int ret; ret = probe_kernel_read(dst, unsafe_ptr, size); if (unlikely(ret < 0)) @@ -83,12 +81,9 @@ static const struct bpf_func_proto bpf_probe_read_proto = { .arg3_type = ARG_ANYTHING, }; -static u64 bpf_probe_write_user(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src, + u32, size) { - void *unsafe_ptr = (void *) (long) r1; - void *src = (void *) (long) r2; - int size = (int) r3; - /* * Ensure we're in user context which is safe for the helper to * run. This helper has no business in a kthread. @@ -130,9 +125,9 @@ static const struct bpf_func_proto *bpf_get_probe_write_proto(void) * limited trace_printk() * only %d %u %x %ld %lu %lx %lld %llu %llx %p %s conversion specifiers allowed */ -static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5) +BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, + u64, arg2, u64, arg3) { - char *fmt = (char *) (long) r1; bool str_seen = false; int mod[3] = {}; int fmt_cnt = 0; @@ -178,16 +173,16 @@ static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5) switch (fmt_cnt) { case 1: - unsafe_addr = r3; - r3 = (long) buf; + unsafe_addr = arg1; + arg1 = (long) buf; break; case 2: - unsafe_addr = r4; - r4 = (long) buf; + unsafe_addr = arg2; + arg2 = (long) buf; break; case 3: - unsafe_addr = r5; - r5 = (long) buf; + unsafe_addr = arg3; + arg3 = (long) buf; break; } buf[0] = 0; @@ -209,9 +204,9 @@ static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5) } return __trace_printk(1/* fake ip will not be printed */, fmt, - mod[0] == 2 ? r3 : mod[0] == 1 ? (long) r3 : (u32) r3, - mod[1] == 2 ? r4 : mod[1] == 1 ? (long) r4 : (u32) r4, - mod[2] == 2 ? r5 : mod[2] == 1 ? (long) r5 : (u32) r5); + mod[0] == 2 ? arg1 : mod[0] == 1 ? (long) arg1 : (u32) arg1, + mod[1] == 2 ? arg2 : mod[1] == 1 ? (long) arg2 : (u32) arg2, + mod[2] == 2 ? arg3 : mod[2] == 1 ? (long) arg3 : (u32) arg3); } static const struct bpf_func_proto bpf_trace_printk_proto = { @@ -233,9 +228,8 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void) return &bpf_trace_printk_proto; } -static u64 bpf_perf_event_read(u64 r1, u64 flags, u64 r3, u64 r4, u64 r5) +BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags) { - struct bpf_map *map = (struct bpf_map *) (unsigned long) r1; struct bpf_array *array = container_of(map, struct bpf_array, map); unsigned int cpu = smp_processor_id(); u64 index = flags & BPF_F_INDEX_MASK; @@ -312,11 +306,9 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, return 0; } -static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size) +BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, + u64, flags, void *, data, u64, size) { - struct pt_regs *regs = (struct pt_regs *)(long) r1; - struct bpf_map *map = (struct bpf_map *)(long) r2; - void *data = (void *)(long) r4; struct perf_raw_record raw = { .frag = { .size = size, @@ -367,7 +359,7 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, return __bpf_perf_event_output(regs, map, flags, &raw); } -static u64 bpf_get_current_task(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_0(bpf_get_current_task) { return (long) current; } @@ -378,16 +370,13 @@ static const struct bpf_func_proto bpf_get_current_task_proto = { .ret_type = RET_INTEGER, }; -static u64 bpf_current_task_under_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx) { - struct bpf_map *map = (struct bpf_map *)(long)r1; struct bpf_array *array = container_of(map, struct bpf_array, map); struct cgroup *cgrp; - u32 idx = (u32)r2; if (unlikely(in_interrupt())) return -EINVAL; - if (unlikely(idx >= array->map.max_entries)) return -E2BIG; @@ -481,16 +470,17 @@ static struct bpf_prog_type_list kprobe_tl = { .type = BPF_PROG_TYPE_KPROBE, }; -static u64 bpf_perf_event_output_tp(u64 r1, u64 r2, u64 index, u64 r4, u64 size) +BPF_CALL_5(bpf_perf_event_output_tp, void *, tp_buff, struct bpf_map *, map, + u64, flags, void *, data, u64, size) { + struct pt_regs *regs = *(struct pt_regs **)tp_buff; + /* * r1 points to perf tracepoint buffer where first 8 bytes are hidden * from bpf program and contain a pointer to 'struct pt_regs'. Fetch it - * from there and call the same bpf_perf_event_output() helper + * from there and call the same bpf_perf_event_output() helper inline. */ - u64 ctx = *(long *)(uintptr_t)r1; - - return bpf_perf_event_output(ctx, r2, index, r4, size); + return ____bpf_perf_event_output(regs, map, flags, data, size); } static const struct bpf_func_proto bpf_perf_event_output_proto_tp = { @@ -504,11 +494,18 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_tp = { .arg5_type = ARG_CONST_STACK_SIZE, }; -static u64 bpf_get_stackid_tp(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_3(bpf_get_stackid_tp, void *, tp_buff, struct bpf_map *, map, + u64, flags) { - u64 ctx = *(long *)(uintptr_t)r1; + struct pt_regs *regs = *(struct pt_regs **)tp_buff; - return bpf_get_stackid(ctx, r2, r3, r4, r5); + /* + * Same comment as in bpf_perf_event_output_tp(), only that this time + * the other helper's function body cannot be inlined due to being + * external, thus we need to call raw helper function. + */ + return bpf_get_stackid((unsigned long) regs, (unsigned long) map, + flags, 0, 0); } static const struct bpf_func_proto bpf_get_stackid_proto_tp = { diff --git a/net/core/filter.c b/net/core/filter.c index d6d9bb89ce3a..298b146b47e7 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -94,14 +94,13 @@ int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap) } EXPORT_SYMBOL(sk_filter_trim_cap); -static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) +BPF_CALL_1(__skb_get_pay_offset, struct sk_buff *, skb) { - return skb_get_poff((struct sk_buff *)(unsigned long) ctx); + return skb_get_poff(skb); } -static u64 __skb_get_nlattr(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) +BPF_CALL_3(__skb_get_nlattr, struct sk_buff *, skb, u32, a, u32, x) { - struct sk_buff *skb = (struct sk_buff *)(unsigned long) ctx; struct nlattr *nla; if (skb_is_nonlinear(skb)) @@ -120,9 +119,8 @@ static u64 __skb_get_nlattr(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) return 0; } -static u64 __skb_get_nlattr_nest(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) +BPF_CALL_3(__skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x) { - struct sk_buff *skb = (struct sk_buff *)(unsigned long) ctx; struct nlattr *nla; if (skb_is_nonlinear(skb)) @@ -145,7 +143,7 @@ static u64 __skb_get_nlattr_nest(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) return 0; } -static u64 __get_raw_cpu_id(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) +BPF_CALL_0(__get_raw_cpu_id) { return raw_smp_processor_id(); } @@ -1376,12 +1374,9 @@ static inline void bpf_pull_mac_rcsum(struct sk_buff *skb) skb_postpull_rcsum(skb, skb_mac_header(skb), skb->mac_len); } -static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) +BPF_CALL_5(bpf_skb_store_bytes, struct sk_buff *, skb, u32, offset, + const void *, from, u32, len, u64, flags) { - struct sk_buff *skb = (struct sk_buff *) (long) r1; - unsigned int offset = (unsigned int) r2; - void *from = (void *) (long) r3; - unsigned int len = (unsigned int) r4; void *ptr; if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH))) @@ -1416,12 +1411,9 @@ static const struct bpf_func_proto bpf_skb_store_bytes_proto = { .arg5_type = ARG_ANYTHING, }; -static u64 bpf_skb_load_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_4(bpf_skb_load_bytes, const struct sk_buff *, skb, u32, offset, + void *, to, u32, len) { - const struct sk_buff *skb = (const struct sk_buff *)(unsigned long) r1; - unsigned int offset = (unsigned int) r2; - void *to = (void *)(unsigned long) r3; - unsigned int len = (unsigned int) r4; void *ptr; if (unlikely(offset > 0xffff)) @@ -1449,10 +1441,9 @@ static const struct bpf_func_proto bpf_skb_load_bytes_proto = { .arg4_type = ARG_CONST_STACK_SIZE, }; -static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) +BPF_CALL_5(bpf_l3_csum_replace, struct sk_buff *, skb, u32, offset, + u64, from, u64, to, u64, flags) { - struct sk_buff *skb = (struct sk_buff *) (long) r1; - unsigned int offset = (unsigned int) r2; __sum16 *ptr; if (unlikely(flags & ~(BPF_F_HDR_FIELD_MASK))) @@ -1494,12 +1485,11 @@ static const struct bpf_func_proto bpf_l3_csum_replace_proto = { .arg5_type = ARG_ANYTHING, }; -static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) +BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset, + u64, from, u64, to, u64, flags) { - struct sk_buff *skb = (struct sk_buff *) (long) r1; bool is_pseudo = flags & BPF_F_PSEUDO_HDR; bool is_mmzero = flags & BPF_F_MARK_MANGLED_0; - unsigned int offset = (unsigned int) r2; __sum16 *ptr; if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_PSEUDO_HDR | @@ -1547,12 +1537,11 @@ static const struct bpf_func_proto bpf_l4_csum_replace_proto = { .arg5_type = ARG_ANYTHING, }; -static u64 bpf_csum_diff(u64 r1, u64 from_size, u64 r3, u64 to_size, u64 seed) +BPF_CALL_5(bpf_csum_diff, __be32 *, from, u32, from_size, + __be32 *, to, u32, to_size, __wsum, seed) { struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp); - u64 diff_size = from_size + to_size; - __be32 *from = (__be32 *) (long) r1; - __be32 *to = (__be32 *) (long) r3; + u32 diff_size = from_size + to_size; int i, j = 0; /* This is quite flexible, some examples: @@ -1610,9 +1599,8 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb) return ret; } -static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5) +BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags) { - struct sk_buff *skb = (struct sk_buff *) (long) r1; struct net_device *dev; if (unlikely(flags & ~(BPF_F_INGRESS))) @@ -1648,7 +1636,7 @@ struct redirect_info { static DEFINE_PER_CPU(struct redirect_info, redirect_info); -static u64 bpf_redirect(u64 ifindex, u64 flags, u64 r3, u64 r4, u64 r5) +BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags) { struct redirect_info *ri = this_cpu_ptr(&redirect_info); @@ -1687,9 +1675,9 @@ static const struct bpf_func_proto bpf_redirect_proto = { .arg2_type = ARG_ANYTHING, }; -static u64 bpf_get_cgroup_classid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb) { - return task_get_classid((struct sk_buff *) (unsigned long) r1); + return task_get_classid(skb); } static const struct bpf_func_proto bpf_get_cgroup_classid_proto = { @@ -1699,9 +1687,9 @@ static const struct bpf_func_proto bpf_get_cgroup_classid_proto = { .arg1_type = ARG_PTR_TO_CTX, }; -static u64 bpf_get_route_realm(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_1(bpf_get_route_realm, const struct sk_buff *, skb) { - return dst_tclassid((struct sk_buff *) (unsigned long) r1); + return dst_tclassid(skb); } static const struct bpf_func_proto bpf_get_route_realm_proto = { @@ -1711,14 +1699,14 @@ static const struct bpf_func_proto bpf_get_route_realm_proto = { .arg1_type = ARG_PTR_TO_CTX, }; -static u64 bpf_get_hash_recalc(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_1(bpf_get_hash_recalc, struct sk_buff *, skb) { /* If skb_clear_hash() was called due to mangling, we can * trigger SW recalculation here. Later access to hash * can then use the inline skb->hash via context directly * instead of calling this helper again. */ - return skb_get_hash((struct sk_buff *) (unsigned long) r1); + return skb_get_hash(skb); } static const struct bpf_func_proto bpf_get_hash_recalc_proto = { @@ -1728,10 +1716,9 @@ static const struct bpf_func_proto bpf_get_hash_recalc_proto = { .arg1_type = ARG_PTR_TO_CTX, }; -static u64 bpf_skb_vlan_push(u64 r1, u64 r2, u64 vlan_tci, u64 r4, u64 r5) +BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto, + u16, vlan_tci) { - struct sk_buff *skb = (struct sk_buff *) (long) r1; - __be16 vlan_proto = (__force __be16) r2; int ret; if (unlikely(vlan_proto != htons(ETH_P_8021Q) && @@ -1756,9 +1743,8 @@ const struct bpf_func_proto bpf_skb_vlan_push_proto = { }; EXPORT_SYMBOL_GPL(bpf_skb_vlan_push_proto); -static u64 bpf_skb_vlan_pop(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_1(bpf_skb_vlan_pop, struct sk_buff *, skb) { - struct sk_buff *skb = (struct sk_buff *) (long) r1; int ret; bpf_push_mac_rcsum(skb); @@ -1933,10 +1919,9 @@ static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto) return -ENOTSUPP; } -static u64 bpf_skb_change_proto(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5) +BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto, + u64, flags) { - struct sk_buff *skb = (struct sk_buff *) (long) r1; - __be16 proto = (__force __be16) r2; int ret; if (unlikely(flags)) @@ -1973,11 +1958,8 @@ static const struct bpf_func_proto bpf_skb_change_proto_proto = { .arg3_type = ARG_ANYTHING, }; -static u64 bpf_skb_change_type(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_2(bpf_skb_change_type, struct sk_buff *, skb, u32, pkt_type) { - struct sk_buff *skb = (struct sk_buff *) (long) r1; - u32 pkt_type = r2; - /* We only allow a restricted subset to be changed for now. */ if (unlikely(!skb_pkt_type_ok(skb->pkt_type) || !skb_pkt_type_ok(pkt_type))) @@ -2028,12 +2010,11 @@ static int bpf_skb_trim_rcsum(struct sk_buff *skb, unsigned int new_len) return __skb_trim_rcsum(skb, new_len); } -static u64 bpf_skb_change_tail(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5) +BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len, + u64, flags) { - struct sk_buff *skb = (struct sk_buff *)(long) r1; u32 max_len = __bpf_skb_max_len(skb); u32 min_len = __bpf_skb_min_len(skb); - u32 new_len = (u32) r2; int ret; if (unlikely(flags || new_len > max_len || new_len < min_len)) @@ -2113,13 +2094,10 @@ static unsigned long bpf_skb_copy(void *dst_buff, const void *skb, return 0; } -static u64 bpf_skb_event_output(u64 r1, u64 r2, u64 flags, u64 r4, - u64 meta_size) +BPF_CALL_5(bpf_skb_event_output, struct sk_buff *, skb, struct bpf_map *, map, + u64, flags, void *, meta, u64, meta_size) { - struct sk_buff *skb = (struct sk_buff *)(long) r1; - struct bpf_map *map = (struct bpf_map *)(long) r2; u64 skb_size = (flags & BPF_F_CTXLEN_MASK) >> 32; - void *meta = (void *)(long) r4; if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK))) return -EINVAL; @@ -2146,10 +2124,9 @@ static unsigned short bpf_tunnel_key_af(u64 flags) return flags & BPF_F_TUNINFO_IPV6 ? AF_INET6 : AF_INET; } -static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) +BPF_CALL_4(bpf_skb_get_tunnel_key, struct sk_buff *, skb, struct bpf_tunnel_key *, to, + u32, size, u64, flags) { - struct sk_buff *skb = (struct sk_buff *) (long) r1; - struct bpf_tunnel_key *to = (struct bpf_tunnel_key *) (long) r2; const struct ip_tunnel_info *info = skb_tunnel_info(skb); u8 compat[sizeof(struct bpf_tunnel_key)]; void *to_orig = to; @@ -2214,10 +2191,8 @@ static const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = { .arg4_type = ARG_ANYTHING, }; -static u64 bpf_skb_get_tunnel_opt(u64 r1, u64 r2, u64 size, u64 r4, u64 r5) +BPF_CALL_3(bpf_skb_get_tunnel_opt, struct sk_buff *, skb, u8 *, to, u32, size) { - struct sk_buff *skb = (struct sk_buff *) (long) r1; - u8 *to = (u8 *) (long) r2; const struct ip_tunnel_info *info = skb_tunnel_info(skb); int err; @@ -2252,10 +2227,9 @@ static const struct bpf_func_proto bpf_skb_get_tunnel_opt_proto = { static struct metadata_dst __percpu *md_dst; -static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) +BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb, + const struct bpf_tunnel_key *, from, u32, size, u64, flags) { - struct sk_buff *skb = (struct sk_buff *) (long) r1; - struct bpf_tunnel_key *from = (struct bpf_tunnel_key *) (long) r2; struct metadata_dst *md = this_cpu_ptr(md_dst); u8 compat[sizeof(struct bpf_tunnel_key)]; struct ip_tunnel_info *info; @@ -2273,7 +2247,7 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) */ memcpy(compat, from, size); memset(compat + size, 0, sizeof(compat) - size); - from = (struct bpf_tunnel_key *)compat; + from = (const struct bpf_tunnel_key *) compat; break; default: return -EINVAL; @@ -2323,10 +2297,9 @@ static const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = { .arg4_type = ARG_ANYTHING, }; -static u64 bpf_skb_set_tunnel_opt(u64 r1, u64 r2, u64 size, u64 r4, u64 r5) +BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb, + const u8 *, from, u32, size) { - struct sk_buff *skb = (struct sk_buff *) (long) r1; - u8 *from = (u8 *) (long) r2; struct ip_tunnel_info *info = skb_tunnel_info(skb); const struct metadata_dst *md = this_cpu_ptr(md_dst); @@ -2372,23 +2345,20 @@ bpf_get_skb_set_tunnel_proto(enum bpf_func_id which) } } -static u64 bpf_skb_under_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_3(bpf_skb_under_cgroup, struct sk_buff *, skb, struct bpf_map *, map, + u32, idx) { - struct sk_buff *skb = (struct sk_buff *)(long)r1; - struct bpf_map *map = (struct bpf_map *)(long)r2; struct bpf_array *array = container_of(map, struct bpf_array, map); struct cgroup *cgrp; struct sock *sk; - u32 i = (u32)r3; sk = skb->sk; if (!sk || !sk_fullsock(sk)) return -ENOENT; - - if (unlikely(i >= array->map.max_entries)) + if (unlikely(idx >= array->map.max_entries)) return -E2BIG; - cgrp = READ_ONCE(array->ptrs[i]); + cgrp = READ_ONCE(array->ptrs[idx]); if (unlikely(!cgrp)) return -EAGAIN; @@ -2411,13 +2381,10 @@ static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff, return 0; } -static u64 bpf_xdp_event_output(u64 r1, u64 r2, u64 flags, u64 r4, - u64 meta_size) +BPF_CALL_5(bpf_xdp_event_output, struct xdp_buff *, xdp, struct bpf_map *, map, + u64, flags, void *, meta, u64, meta_size) { - struct xdp_buff *xdp = (struct xdp_buff *)(long) r1; - struct bpf_map *map = (struct bpf_map *)(long) r2; u64 xdp_size = (flags & BPF_F_CTXLEN_MASK) >> 32; - void *meta = (void *)(long) r4; if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK))) return -EINVAL; From ba56947a33541fd8c2e2e6fafd0126a5f6faaf15 Mon Sep 17 00:00:00 2001 From: Baoyou Xie Date: Fri, 9 Sep 2016 09:21:15 +0800 Subject: [PATCH 0868/1715] qed: mark symbols static where possible We get a few warnings when building kernel with W=1: drivers/net/ethernet/qlogic/qed/qed_l2.c:112:5: warning: no previous prototype for 'qed_sp_vport_start' [-Wmissing-prototypes] drivers/net/ethernet/qlogic/qed/qed_sriov.c:110:6: warning: no previous prototype for 'qed_iov_is_valid_vfid' [-Wmissing-prototypes] drivers/net/ethernet/qlogic/qed/qed_sriov.c:188:5: warning: no previous prototype for 'qed_iov_post_vf_bulletin' [-Wmissing-prototypes] drivers/net/ethernet/qlogic/qed/qed_sriov.c:578:6: warning: no previous prototype for 'qed_iov_set_vfs_to_disable' [-Wmissing-prototypes] drivers/net/ethernet/qlogic/qed/qed_sriov.c:1135:28: warning: no previous prototype for 'qed_iov_get_public_vf_info' [-Wmissing-prototypes] drivers/net/ethernet/qlogic/qed/qed_sriov.c:1148:6: warning: no previous prototype for 'qed_iov_clean_vf' [-Wmissing-prototypes] drivers/net/ethernet/qlogic/qed/qed_sriov.c:2444:5: warning: no previous prototype for 'qed_iov_chk_ucast' [-Wmissing-prototypes] drivers/net/ethernet/qlogic/qed/qed_sriov.c:2762:5: warning: no previous prototype for 'qed_iov_vf_flr_cleanup' [-Wmissing-prototypes] .... In fact, these functions are only used in the file in which they are declared and don't need a declaration, but can be made static. so this patch marks these functions with 'static'. Signed-off-by: Baoyou Xie Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_dcbx.c | 10 +++-- drivers/net/ethernet/qlogic/qed/qed_l2.c | 4 +- drivers/net/ethernet/qlogic/qed/qed_sriov.c | 45 ++++++++++++--------- drivers/net/ethernet/qlogic/qed/qed_vf.c | 4 +- 4 files changed, 35 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c index be7b3dc7c9a7..12c399b1a40a 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c @@ -2130,17 +2130,19 @@ static int qed_dcbnl_ieee_setets(struct qed_dev *cdev, struct ieee_ets *ets) return rc; } -int qed_dcbnl_ieee_peer_getets(struct qed_dev *cdev, struct ieee_ets *ets) +static int +qed_dcbnl_ieee_peer_getets(struct qed_dev *cdev, struct ieee_ets *ets) { return qed_dcbnl_get_ieee_ets(cdev, ets, true); } -int qed_dcbnl_ieee_peer_getpfc(struct qed_dev *cdev, struct ieee_pfc *pfc) +static int +qed_dcbnl_ieee_peer_getpfc(struct qed_dev *cdev, struct ieee_pfc *pfc) { return qed_dcbnl_get_ieee_pfc(cdev, pfc, true); } -int qed_dcbnl_ieee_getapp(struct qed_dev *cdev, struct dcb_app *app) +static int qed_dcbnl_ieee_getapp(struct qed_dev *cdev, struct dcb_app *app) { struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); struct qed_dcbx_get *dcbx_info; @@ -2184,7 +2186,7 @@ int qed_dcbnl_ieee_getapp(struct qed_dev *cdev, struct dcb_app *app) return 0; } -int qed_dcbnl_ieee_setapp(struct qed_dev *cdev, struct dcb_app *app) +static int qed_dcbnl_ieee_setapp(struct qed_dev *cdev, struct dcb_app *app) { struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); struct qed_dcbx_get *dcbx_info; diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index 4409ea3f7d40..ddd410a91e13 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -111,8 +111,8 @@ int qed_sp_eth_vport_start(struct qed_hwfn *p_hwfn, return qed_spq_post(p_hwfn, p_ent, NULL); } -int qed_sp_vport_start(struct qed_hwfn *p_hwfn, - struct qed_sp_vport_start_params *p_params) +static int qed_sp_vport_start(struct qed_hwfn *p_hwfn, + struct qed_sp_vport_start_params *p_params) { if (IS_VF(p_hwfn->cdev)) { return qed_vf_pf_vport_start(p_hwfn, p_params->vport_id, diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c index d7c21cdc608c..a4a3cead15bb 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c @@ -108,8 +108,8 @@ static int qed_sp_vf_stop(struct qed_hwfn *p_hwfn, return qed_spq_post(p_hwfn, p_ent, NULL); } -bool qed_iov_is_valid_vfid(struct qed_hwfn *p_hwfn, - int rel_vf_id, bool b_enabled_only) +static bool qed_iov_is_valid_vfid(struct qed_hwfn *p_hwfn, + int rel_vf_id, bool b_enabled_only) { if (!p_hwfn->pf_iov_info) { DP_NOTICE(p_hwfn->cdev, "No iov info\n"); @@ -186,8 +186,8 @@ static bool qed_iov_validate_sb(struct qed_hwfn *p_hwfn, return false; } -int qed_iov_post_vf_bulletin(struct qed_hwfn *p_hwfn, - int vfid, struct qed_ptt *p_ptt) +static int qed_iov_post_vf_bulletin(struct qed_hwfn *p_hwfn, + int vfid, struct qed_ptt *p_ptt) { struct qed_bulletin_content *p_bulletin; int crc_size = sizeof(p_bulletin->crc); @@ -573,7 +573,7 @@ static void qed_iov_set_vf_to_disable(struct qed_dev *cdev, } } -void qed_iov_set_vfs_to_disable(struct qed_dev *cdev, u8 to_disable) +static void qed_iov_set_vfs_to_disable(struct qed_dev *cdev, u8 to_disable) { u16 i; @@ -1130,9 +1130,10 @@ static void qed_iov_prepare_resp(struct qed_hwfn *p_hwfn, qed_iov_send_response(p_hwfn, p_ptt, vf_info, length, status); } -struct qed_public_vf_info *qed_iov_get_public_vf_info(struct qed_hwfn *p_hwfn, - u16 relative_vf_id, - bool b_enabled_only) +static struct +qed_public_vf_info *qed_iov_get_public_vf_info(struct qed_hwfn *p_hwfn, + u16 relative_vf_id, + bool b_enabled_only) { struct qed_vf_info *vf = NULL; @@ -1143,7 +1144,7 @@ struct qed_public_vf_info *qed_iov_get_public_vf_info(struct qed_hwfn *p_hwfn, return &vf->p_vf_info; } -void qed_iov_clean_vf(struct qed_hwfn *p_hwfn, u8 vfid) +static void qed_iov_clean_vf(struct qed_hwfn *p_hwfn, u8 vfid) { struct qed_public_vf_info *vf_info; @@ -2510,8 +2511,8 @@ qed_iov_vf_update_unicast_shadow(struct qed_hwfn *p_hwfn, return rc; } -int qed_iov_chk_ucast(struct qed_hwfn *hwfn, - int vfid, struct qed_filter_ucast *params) +static int qed_iov_chk_ucast(struct qed_hwfn *hwfn, + int vfid, struct qed_filter_ucast *params) { struct qed_public_vf_info *vf; @@ -2828,7 +2829,8 @@ cleanup: return rc; } -int qed_iov_vf_flr_cleanup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) +static int +qed_iov_vf_flr_cleanup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { u32 ack_vfs[VF_MAX_STATIC / 32]; int rc = 0; @@ -3015,7 +3017,7 @@ static void qed_iov_process_mbx_req(struct qed_hwfn *p_hwfn, } } -void qed_iov_pf_add_pending_events(struct qed_hwfn *p_hwfn, u8 vfid) +static void qed_iov_pf_add_pending_events(struct qed_hwfn *p_hwfn, u8 vfid) { u64 add_bit = 1ULL << (vfid % 64); @@ -3138,8 +3140,8 @@ static void qed_iov_bulletin_set_forced_mac(struct qed_hwfn *p_hwfn, qed_iov_configure_vport_forced(p_hwfn, vf_info, feature); } -void qed_iov_bulletin_set_forced_vlan(struct qed_hwfn *p_hwfn, - u16 pvid, int vfid) +static void qed_iov_bulletin_set_forced_vlan(struct qed_hwfn *p_hwfn, + u16 pvid, int vfid) { struct qed_vf_info *vf_info; u64 feature; @@ -3172,7 +3174,7 @@ static bool qed_iov_vf_has_vport_instance(struct qed_hwfn *p_hwfn, int vfid) return !!p_vf_info->vport_instance; } -bool qed_iov_is_vf_stopped(struct qed_hwfn *p_hwfn, int vfid) +static bool qed_iov_is_vf_stopped(struct qed_hwfn *p_hwfn, int vfid) { struct qed_vf_info *p_vf_info; @@ -3194,7 +3196,7 @@ static bool qed_iov_spoofchk_get(struct qed_hwfn *p_hwfn, int vfid) return vf_info->spoof_chk; } -int qed_iov_spoofchk_set(struct qed_hwfn *p_hwfn, int vfid, bool val) +static int qed_iov_spoofchk_set(struct qed_hwfn *p_hwfn, int vfid, bool val) { struct qed_vf_info *vf; int rc = -EINVAL; @@ -3237,7 +3239,8 @@ static u8 *qed_iov_bulletin_get_forced_mac(struct qed_hwfn *p_hwfn, return p_vf->bulletin.p_virt->mac; } -u16 qed_iov_bulletin_get_forced_vlan(struct qed_hwfn *p_hwfn, u16 rel_vf_id) +static u16 +qed_iov_bulletin_get_forced_vlan(struct qed_hwfn *p_hwfn, u16 rel_vf_id) { struct qed_vf_info *p_vf; @@ -3269,7 +3272,8 @@ static int qed_iov_configure_tx_rate(struct qed_hwfn *p_hwfn, return qed_init_vport_rl(p_hwfn, p_ptt, abs_vp_id, (u32)val); } -int qed_iov_configure_min_tx_rate(struct qed_dev *cdev, int vfid, u32 rate) +static int +qed_iov_configure_min_tx_rate(struct qed_dev *cdev, int vfid, u32 rate) { struct qed_vf_info *vf; u8 vport_id; @@ -3828,7 +3832,8 @@ static void qed_handle_bulletin_post(struct qed_hwfn *hwfn) qed_ptt_release(hwfn, ptt); } -void qed_iov_pf_task(struct work_struct *work) +static void qed_iov_pf_task(struct work_struct *work) + { struct qed_hwfn *hwfn = container_of(work, struct qed_hwfn, iov_task.work); diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c index de0acbcaf85c..85334ceaf69c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_vf.c +++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c @@ -1186,8 +1186,8 @@ bool qed_vf_check_mac(struct qed_hwfn *p_hwfn, u8 *mac) return false; } -bool qed_vf_bulletin_get_forced_mac(struct qed_hwfn *hwfn, - u8 *dst_mac, u8 *p_is_forced) +static bool qed_vf_bulletin_get_forced_mac(struct qed_hwfn *hwfn, + u8 *dst_mac, u8 *p_is_forced) { struct qed_bulletin_content *bulletin; From 9ee7b683ea6313e9cd27bf9c4f70a3d360abe5df Mon Sep 17 00:00:00 2001 From: Tobias Regnery Date: Fri, 9 Sep 2016 12:19:52 +0200 Subject: [PATCH 0869/1715] alx: refactor msi enablement and disablement Introduce a new flag field for the advanced interrupt capatibilities and add new functions to enable and disable msi interrupts. These functions will be extended later to cover msi-x interrupts. We enable msi interrupts earlier in alx_init_intr because with msi-x and multi queue support the number of queues must be set before we allocate resources for the rx and tx paths. Signed-off-by: Tobias Regnery Signed-off-by: David S. Miller --- drivers/net/ethernet/atheros/alx/alx.h | 5 ++++- drivers/net/ethernet/atheros/alx/main.c | 30 ++++++++++++++++++------- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/atheros/alx/alx.h b/drivers/net/ethernet/atheros/alx/alx.h index 8fc93c5f6abc..16ca3f4fa2cc 100644 --- a/drivers/net/ethernet/atheros/alx/alx.h +++ b/drivers/net/ethernet/atheros/alx/alx.h @@ -76,6 +76,9 @@ enum alx_device_quirks { ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG = BIT(0), }; +#define ALX_FLAG_USING_MSIX BIT(0) +#define ALX_FLAG_USING_MSI BIT(1) + struct alx_priv { struct net_device *dev; @@ -105,7 +108,7 @@ struct alx_priv { u16 msg_enable; - bool msi; + int flags; /* protects hw.stats */ spinlock_t stats_lock; diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index d29a4f3102d6..6dc1539205eb 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -620,6 +620,22 @@ static void alx_config_vector_mapping(struct alx_priv *alx) alx_write_mem32(hw, ALX_MSI_ID_MAP, 0); } +static void alx_init_intr(struct alx_priv *alx, bool msix) +{ + if (!(alx->flags & ALX_FLAG_USING_MSIX)) { + if (!pci_enable_msi(alx->hw.pdev)) + alx->flags |= ALX_FLAG_USING_MSI; + } +} + +static void alx_disable_advanced_intr(struct alx_priv *alx) +{ + if (alx->flags & ALX_FLAG_USING_MSI) { + pci_disable_msi(alx->hw.pdev); + alx->flags &= ~ALX_FLAG_USING_MSI; + } +} + static void alx_irq_enable(struct alx_priv *alx) { struct alx_hw *hw = &alx->hw; @@ -650,9 +666,7 @@ static int alx_request_irq(struct alx_priv *alx) msi_ctrl = (hw->imt >> 1) << ALX_MSI_RETRANS_TM_SHIFT; - if (!pci_enable_msi(alx->hw.pdev)) { - alx->msi = true; - + if (alx->flags & ALX_FLAG_USING_MSI) { alx_write_mem32(hw, ALX_MSI_RETRANS_TIMER, msi_ctrl | ALX_MSI_MASK_SEL_LINE); err = request_irq(pdev->irq, alx_intr_msi, 0, @@ -660,6 +674,7 @@ static int alx_request_irq(struct alx_priv *alx) if (!err) goto out; /* fall back to legacy interrupt */ + alx->flags &= ~ALX_FLAG_USING_MSI; pci_disable_msi(alx->hw.pdev); } @@ -678,10 +693,7 @@ static void alx_free_irq(struct alx_priv *alx) free_irq(pdev->irq, alx); - if (alx->msi) { - pci_disable_msi(alx->hw.pdev); - alx->msi = false; - } + alx_disable_advanced_intr(alx); } static int alx_identify_hw(struct alx_priv *alx) @@ -847,6 +859,8 @@ static int __alx_open(struct alx_priv *alx, bool resume) { int err; + alx_init_intr(alx, false); + if (!resume) netif_carrier_off(alx->dev); @@ -1236,7 +1250,7 @@ static void alx_poll_controller(struct net_device *netdev) { struct alx_priv *alx = netdev_priv(netdev); - if (alx->msi) + if (alx->flags & ALX_FLAG_USING_MSI) alx_intr_msi(0, alx); else alx_intr_legacy(0, alx); From a0373aef3ecf12d97a8332f953f0e16092f068b4 Mon Sep 17 00:00:00 2001 From: Tobias Regnery Date: Fri, 9 Sep 2016 12:19:53 +0200 Subject: [PATCH 0870/1715] alx: factor out part of the interrupt handler Factor out the handling of misc interrupts into a new function. This function can be reused later for msi-x interrupts. Signed-off-by: Tobias Regnery Signed-off-by: David S. Miller --- drivers/net/ethernet/atheros/alx/main.c | 34 +++++++++++++++---------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index 6dc1539205eb..b34f7b693cec 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -302,22 +302,15 @@ static int alx_poll(struct napi_struct *napi, int budget) return work; } -static irqreturn_t alx_intr_handle(struct alx_priv *alx, u32 intr) +static bool alx_intr_handle_misc(struct alx_priv *alx, u32 intr) { struct alx_hw *hw = &alx->hw; - bool write_int_mask = false; - - spin_lock(&alx->irq_lock); - - /* ACK interrupt */ - alx_write_mem32(hw, ALX_ISR, intr | ALX_ISR_DIS); - intr &= alx->int_mask; if (intr & ALX_ISR_FATAL) { netif_warn(alx, hw, alx->dev, "fatal interrupt 0x%x, resetting\n", intr); alx_schedule_reset(alx); - goto out; + return true; } if (intr & ALX_ISR_ALERT) @@ -329,19 +322,32 @@ static irqreturn_t alx_intr_handle(struct alx_priv *alx, u32 intr) * is cleared, the interrupt status could be cleared. */ alx->int_mask &= ~ALX_ISR_PHY; - write_int_mask = true; + alx_write_mem32(hw, ALX_IMR, alx->int_mask); alx_schedule_link_check(alx); } + return false; +} + +static irqreturn_t alx_intr_handle(struct alx_priv *alx, u32 intr) +{ + struct alx_hw *hw = &alx->hw; + + spin_lock(&alx->irq_lock); + + /* ACK interrupt */ + alx_write_mem32(hw, ALX_ISR, intr | ALX_ISR_DIS); + intr &= alx->int_mask; + + if (alx_intr_handle_misc(alx, intr)) + goto out; + if (intr & (ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0)) { napi_schedule(&alx->napi); /* mask rx/tx interrupt, enable them when napi complete */ alx->int_mask &= ~ALX_ISR_ALL_QUEUES; - write_int_mask = true; - } - - if (write_int_mask) alx_write_mem32(hw, ALX_IMR, alx->int_mask); + } alx_write_mem32(hw, ALX_ISR, 0); From dc39a78b3c6113dcad5e0f52e3b9deba7ad2fa3d Mon Sep 17 00:00:00 2001 From: Tobias Regnery Date: Fri, 9 Sep 2016 12:19:54 +0200 Subject: [PATCH 0871/1715] alx: add msi-x support Add msi-x support to the alx driver. This is in preparation for multi queue support. msi-x interrupts are disabled by default because without multi queue support there is no advantage over msi interrupts. The performance numbers observed with iperf stay the same. Based on information of the downstream driver at github.com/qca/alx Signed-off-by: Tobias Regnery Signed-off-by: David S. Miller --- drivers/net/ethernet/atheros/alx/alx.h | 5 + drivers/net/ethernet/atheros/alx/hw.c | 14 ++ drivers/net/ethernet/atheros/alx/hw.h | 1 + drivers/net/ethernet/atheros/alx/main.c | 172 ++++++++++++++++++++++-- 4 files changed, 184 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/atheros/alx/alx.h b/drivers/net/ethernet/atheros/alx/alx.h index 16ca3f4fa2cc..6cac919272ea 100644 --- a/drivers/net/ethernet/atheros/alx/alx.h +++ b/drivers/net/ethernet/atheros/alx/alx.h @@ -84,6 +84,11 @@ struct alx_priv { struct alx_hw hw; + /* msi-x vectors */ + int num_vec; + struct msix_entry *msix_entries; + char irq_lbl[IFNAMSIZ + 8]; + /* all descriptor memory */ struct { dma_addr_t dma; diff --git a/drivers/net/ethernet/atheros/alx/hw.c b/drivers/net/ethernet/atheros/alx/hw.c index 1fe35e453d43..6ac40b0003a3 100644 --- a/drivers/net/ethernet/atheros/alx/hw.c +++ b/drivers/net/ethernet/atheros/alx/hw.c @@ -1031,6 +1031,20 @@ void alx_configure_basic(struct alx_hw *hw) alx_write_mem32(hw, ALX_WRR, val); } +void alx_mask_msix(struct alx_hw *hw, int index, bool mask) +{ + u32 reg, val; + + reg = ALX_MSIX_ENTRY_BASE + index * PCI_MSIX_ENTRY_SIZE + + PCI_MSIX_ENTRY_VECTOR_CTRL; + + val = mask ? PCI_MSIX_ENTRY_CTRL_MASKBIT : 0; + + alx_write_mem32(hw, reg, val); + alx_post_write(hw); +} + + bool alx_get_phy_info(struct alx_hw *hw) { u16 devs1, devs2; diff --git a/drivers/net/ethernet/atheros/alx/hw.h b/drivers/net/ethernet/atheros/alx/hw.h index f289c05f5cb4..0191477ace51 100644 --- a/drivers/net/ethernet/atheros/alx/hw.h +++ b/drivers/net/ethernet/atheros/alx/hw.h @@ -562,6 +562,7 @@ int alx_reset_mac(struct alx_hw *hw); void alx_set_macaddr(struct alx_hw *hw, const u8 *addr); bool alx_phy_configured(struct alx_hw *hw); void alx_configure_basic(struct alx_hw *hw); +void alx_mask_msix(struct alx_hw *hw, int index, bool mask); void alx_disable_rss(struct alx_hw *hw); bool alx_get_phy_info(struct alx_hw *hw); void alx_update_hw_stats(struct alx_hw *hw); diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index b34f7b693cec..a4f74d4a976c 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -292,10 +292,14 @@ static int alx_poll(struct napi_struct *napi, int budget) napi_complete(&alx->napi); /* enable interrupt */ - spin_lock_irqsave(&alx->irq_lock, flags); - alx->int_mask |= ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0; - alx_write_mem32(hw, ALX_IMR, alx->int_mask); - spin_unlock_irqrestore(&alx->irq_lock, flags); + if (alx->flags & ALX_FLAG_USING_MSIX) { + alx_mask_msix(hw, 1, false); + } else { + spin_lock_irqsave(&alx->irq_lock, flags); + alx->int_mask |= ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0; + alx_write_mem32(hw, ALX_IMR, alx->int_mask); + spin_unlock_irqrestore(&alx->irq_lock, flags); + } alx_post_write(hw); @@ -356,6 +360,46 @@ static irqreturn_t alx_intr_handle(struct alx_priv *alx, u32 intr) return IRQ_HANDLED; } +static irqreturn_t alx_intr_msix_ring(int irq, void *data) +{ + struct alx_priv *alx = data; + struct alx_hw *hw = &alx->hw; + + /* mask interrupt to ACK chip */ + alx_mask_msix(hw, 1, true); + /* clear interrupt status */ + alx_write_mem32(hw, ALX_ISR, (ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0)); + + napi_schedule(&alx->napi); + + return IRQ_HANDLED; +} + +static irqreturn_t alx_intr_msix_misc(int irq, void *data) +{ + struct alx_priv *alx = data; + struct alx_hw *hw = &alx->hw; + u32 intr; + + /* mask interrupt to ACK chip */ + alx_mask_msix(hw, 0, true); + + /* read interrupt status */ + intr = alx_read_mem32(hw, ALX_ISR); + intr &= (alx->int_mask & ~ALX_ISR_ALL_QUEUES); + + if (alx_intr_handle_misc(alx, intr)) + return IRQ_HANDLED; + + /* clear interrupt status */ + alx_write_mem32(hw, ALX_ISR, intr); + + /* enable interrupt again */ + alx_mask_msix(hw, 0, false); + + return IRQ_HANDLED; +} + static irqreturn_t alx_intr_msi(int irq, void *data) { struct alx_priv *alx = data; @@ -620,15 +664,84 @@ static void alx_free_rings(struct alx_priv *alx) static void alx_config_vector_mapping(struct alx_priv *alx) { struct alx_hw *hw = &alx->hw; + u32 tbl = 0; - alx_write_mem32(hw, ALX_MSI_MAP_TBL1, 0); + if (alx->flags & ALX_FLAG_USING_MSIX) { + tbl |= 1 << ALX_MSI_MAP_TBL1_TXQ0_SHIFT; + tbl |= 1 << ALX_MSI_MAP_TBL1_RXQ0_SHIFT; + } + + alx_write_mem32(hw, ALX_MSI_MAP_TBL1, tbl); alx_write_mem32(hw, ALX_MSI_MAP_TBL2, 0); alx_write_mem32(hw, ALX_MSI_ID_MAP, 0); } +static bool alx_enable_msix(struct alx_priv *alx) +{ + int i, err, num_vec = 2; + + alx->msix_entries = kcalloc(num_vec, sizeof(struct msix_entry), + GFP_KERNEL); + if (!alx->msix_entries) { + netdev_warn(alx->dev, "Allocation of msix entries failed!\n"); + return false; + } + + for (i = 0; i < num_vec; i++) + alx->msix_entries[i].entry = i; + + err = pci_enable_msix(alx->hw.pdev, alx->msix_entries, num_vec); + if (err) { + kfree(alx->msix_entries); + netdev_warn(alx->dev, "Enabling MSI-X interrupts failed!\n"); + return false; + } + + alx->num_vec = num_vec; + return true; +} + +static int alx_request_msix(struct alx_priv *alx) +{ + struct net_device *netdev = alx->dev; + int i, err, vector = 0, free_vector = 0; + + err = request_irq(alx->msix_entries[0].vector, alx_intr_msix_misc, + 0, netdev->name, alx); + if (err) + goto out_err; + + vector++; + sprintf(alx->irq_lbl, "%s-TxRx-0", netdev->name); + + err = request_irq(alx->msix_entries[vector].vector, + alx_intr_msix_ring, 0, alx->irq_lbl, alx); + if (err) + goto out_free; + + return 0; + +out_free: + free_irq(alx->msix_entries[free_vector++].vector, alx); + + vector--; + for (i = 0; i < vector; i++) + free_irq(alx->msix_entries[free_vector++].vector, alx); + +out_err: + return err; +} + static void alx_init_intr(struct alx_priv *alx, bool msix) { + if (msix) { + if (alx_enable_msix(alx)) + alx->flags |= ALX_FLAG_USING_MSIX; + } + if (!(alx->flags & ALX_FLAG_USING_MSIX)) { + alx->num_vec = 1; + if (!pci_enable_msi(alx->hw.pdev)) alx->flags |= ALX_FLAG_USING_MSI; } @@ -636,6 +749,12 @@ static void alx_init_intr(struct alx_priv *alx, bool msix) static void alx_disable_advanced_intr(struct alx_priv *alx) { + if (alx->flags & ALX_FLAG_USING_MSIX) { + kfree(alx->msix_entries); + pci_disable_msix(alx->hw.pdev); + alx->flags &= ~ALX_FLAG_USING_MSIX; + } + if (alx->flags & ALX_FLAG_USING_MSI) { pci_disable_msi(alx->hw.pdev); alx->flags &= ~ALX_FLAG_USING_MSI; @@ -645,22 +764,36 @@ static void alx_disable_advanced_intr(struct alx_priv *alx) static void alx_irq_enable(struct alx_priv *alx) { struct alx_hw *hw = &alx->hw; + int i; /* level-1 interrupt switch */ alx_write_mem32(hw, ALX_ISR, 0); alx_write_mem32(hw, ALX_IMR, alx->int_mask); alx_post_write(hw); + + if (alx->flags & ALX_FLAG_USING_MSIX) + /* enable all msix irqs */ + for (i = 0; i < alx->num_vec; i++) + alx_mask_msix(hw, i, false); } static void alx_irq_disable(struct alx_priv *alx) { struct alx_hw *hw = &alx->hw; + int i; alx_write_mem32(hw, ALX_ISR, ALX_ISR_DIS); alx_write_mem32(hw, ALX_IMR, 0); alx_post_write(hw); - synchronize_irq(alx->hw.pdev->irq); + if (alx->flags & ALX_FLAG_USING_MSIX) { + for (i = 0; i < alx->num_vec; i++) { + alx_mask_msix(hw, i, true); + synchronize_irq(alx->msix_entries[i].vector); + } + } else { + synchronize_irq(alx->hw.pdev->irq); + } } static int alx_request_irq(struct alx_priv *alx) @@ -672,6 +805,17 @@ static int alx_request_irq(struct alx_priv *alx) msi_ctrl = (hw->imt >> 1) << ALX_MSI_RETRANS_TM_SHIFT; + if (alx->flags & ALX_FLAG_USING_MSIX) { + alx_write_mem32(hw, ALX_MSI_RETRANS_TIMER, msi_ctrl); + err = alx_request_msix(alx); + if (!err) + goto out; + + /* msix request failed, realloc resources */ + alx_disable_advanced_intr(alx); + alx_init_intr(alx, false); + } + if (alx->flags & ALX_FLAG_USING_MSI) { alx_write_mem32(hw, ALX_MSI_RETRANS_TIMER, msi_ctrl | ALX_MSI_MASK_SEL_LINE); @@ -690,14 +834,23 @@ static int alx_request_irq(struct alx_priv *alx) out: if (!err) alx_config_vector_mapping(alx); + else + netdev_err(alx->dev, "IRQ registration failed!\n"); return err; } static void alx_free_irq(struct alx_priv *alx) { struct pci_dev *pdev = alx->hw.pdev; + int i; - free_irq(pdev->irq, alx); + if (alx->flags & ALX_FLAG_USING_MSIX) { + /* we have only 2 vectors without multi queue support */ + for (i = 0; i < 2; i++) + free_irq(alx->msix_entries[i].vector, alx); + } else { + free_irq(pdev->irq, alx); + } alx_disable_advanced_intr(alx); } @@ -1256,7 +1409,10 @@ static void alx_poll_controller(struct net_device *netdev) { struct alx_priv *alx = netdev_priv(netdev); - if (alx->flags & ALX_FLAG_USING_MSI) + if (alx->flags & ALX_FLAG_USING_MSIX) { + alx_intr_msix_misc(0, alx); + alx_intr_msix_ring(0, alx); + } else if (alx->flags & ALX_FLAG_USING_MSI) alx_intr_msi(0, alx); else alx_intr_legacy(0, alx); From 0c58ee0bfa28ad06dbc2b6305b1b950f7c392cdf Mon Sep 17 00:00:00 2001 From: Tobias Regnery Date: Fri, 9 Sep 2016 12:19:55 +0200 Subject: [PATCH 0872/1715] alx: add module parameter to enable msi-x support msi-x support is default disabled in the alx driver. In order to test msi-x interrupts for regressions add a module parameter to the driver. Signed-off-by: Tobias Regnery Signed-off-by: David S. Miller --- drivers/net/ethernet/atheros/alx/main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index a4f74d4a976c..9887cee434dd 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -51,6 +51,9 @@ const char alx_drv_name[] = "alx"; +static bool msix = false; +module_param(msix, bool, 0); +MODULE_PARM_DESC(msix, "Enable msi-x interrupt support"); static void alx_free_txbuf(struct alx_priv *alx, int entry) { @@ -1018,7 +1021,7 @@ static int __alx_open(struct alx_priv *alx, bool resume) { int err; - alx_init_intr(alx, false); + alx_init_intr(alx, msix); if (!resume) netif_carrier_off(alx->dev); From e808bb6ed042020d3a15cb6e85ca646bc7c9eda8 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Fri, 9 Sep 2016 20:40:16 +0200 Subject: [PATCH 0873/1715] ATM-iphase: Use kmalloc_array() in tx_init() * Multiplications for the size determination of memory allocations indicated that array data structures should be processed. Thus use the corresponding function "kmalloc_array". This issue was detected by using the Coccinelle software. * Replace the specification of data types by pointer dereferences to make the corresponding size determination a bit safer according to the Linux coding style convention. Signed-off-by: Markus Elfring Signed-off-by: David S. Miller --- drivers/atm/iphase.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/atm/iphase.c b/drivers/atm/iphase.c index 809dd1e02091..9d8807e76639 100644 --- a/drivers/atm/iphase.c +++ b/drivers/atm/iphase.c @@ -1975,7 +1975,9 @@ static int tx_init(struct atm_dev *dev) buf_desc_ptr++; tx_pkt_start += iadev->tx_buf_sz; } - iadev->tx_buf = kmalloc(iadev->num_tx_desc*sizeof(struct cpcs_trailer_desc), GFP_KERNEL); + iadev->tx_buf = kmalloc_array(iadev->num_tx_desc, + sizeof(*iadev->tx_buf), + GFP_KERNEL); if (!iadev->tx_buf) { printk(KERN_ERR DEV_LABEL " couldn't get mem\n"); goto err_free_dle; @@ -1995,8 +1997,9 @@ static int tx_init(struct atm_dev *dev) sizeof(*cpcs), DMA_TO_DEVICE); } - iadev->desc_tbl = kmalloc(iadev->num_tx_desc * - sizeof(struct desc_tbl_t), GFP_KERNEL); + iadev->desc_tbl = kmalloc_array(iadev->num_tx_desc, + sizeof(*iadev->desc_tbl), + GFP_KERNEL); if (!iadev->desc_tbl) { printk(KERN_ERR DEV_LABEL " couldn't get mem\n"); goto err_free_all_tx_bufs; @@ -2124,7 +2127,9 @@ static int tx_init(struct atm_dev *dev) memset((caddr_t)(iadev->seg_ram+i), 0, iadev->num_vc*4); vc = (struct main_vc *)iadev->MAIN_VC_TABLE_ADDR; evc = (struct ext_vc *)iadev->EXT_VC_TABLE_ADDR; - iadev->testTable = kmalloc(sizeof(long)*iadev->num_vc, GFP_KERNEL); + iadev->testTable = kmalloc_array(iadev->num_vc, + sizeof(*iadev->testTable), + GFP_KERNEL); if (!iadev->testTable) { printk("Get freepage failed\n"); goto err_free_desc_tbl; From d817f432c2ab7639a4f69de73eafdc55e57c45ad Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Thu, 8 Sep 2016 16:23:45 +0300 Subject: [PATCH 0874/1715] net/ip_tunnels: Introduce tunnel_id_to_key32() and key32_to_tunnel_id() Add utility functions to convert a 32 bits key into a 64 bits tunnel and vice versa. These functions will be used instead of cloning code in GRE and VXLAN, and in tc act_iptunnel which will be introduced in a following patch in this patchset. Signed-off-by: Amir Vadai Signed-off-by: Hadar Hen Zion Reviewed-by: Shmulik Ladkani Acked-by: Jiri Benc Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 4 ++-- include/net/ip_tunnels.h | 19 +++++++++++++++++++ include/net/vxlan.h | 18 ------------------ net/ipv4/ip_gre.c | 23 ++--------------------- 4 files changed, 23 insertions(+), 41 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 199dec033cf8..4bfeb9765c55 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1291,7 +1291,7 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) struct metadata_dst *tun_dst; tun_dst = udp_tun_rx_dst(skb, vxlan_get_sk_family(vs), TUNNEL_KEY, - vxlan_vni_to_tun_id(vni), sizeof(*md)); + key32_to_tunnel_id(vni), sizeof(*md)); if (!tun_dst) goto drop; @@ -1945,7 +1945,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, goto drop; } dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port; - vni = vxlan_tun_id_to_vni(info->key.tun_id); + vni = tunnel_id_to_key32(info->key.tun_id); remote_ip.sa.sa_family = ip_tunnel_info_af(info); if (remote_ip.sa.sa_family == AF_INET) { remote_ip.sin.sin_addr.s_addr = info->key.u.ipv4.dst; diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index a5e7035fb93f..e598c639aa6f 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -222,6 +222,25 @@ static inline unsigned short ip_tunnel_info_af(const struct ip_tunnel_info return tun_info->mode & IP_TUNNEL_INFO_IPV6 ? AF_INET6 : AF_INET; } +static inline __be64 key32_to_tunnel_id(__be32 key) +{ +#ifdef __BIG_ENDIAN + return (__force __be64)key; +#else + return (__force __be64)((__force u64)key << 32); +#endif +} + +/* Returns the least-significant 32 bits of a __be64. */ +static inline __be32 tunnel_id_to_key32(__be64 tun_id) +{ +#ifdef __BIG_ENDIAN + return (__force __be32)tun_id; +#else + return (__force __be32)((__force u64)tun_id >> 32); +#endif +} + #ifdef CONFIG_INET int ip_tunnel_init(struct net_device *dev); diff --git a/include/net/vxlan.h b/include/net/vxlan.h index b96d0360c095..0255613a54a4 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -350,24 +350,6 @@ static inline __be32 vxlan_vni_field(__be32 vni) #endif } -static inline __be32 vxlan_tun_id_to_vni(__be64 tun_id) -{ -#if defined(__BIG_ENDIAN) - return (__force __be32)tun_id; -#else - return (__force __be32)((__force u64)tun_id >> 32); -#endif -} - -static inline __be64 vxlan_vni_to_tun_id(__be32 vni) -{ -#if defined(__BIG_ENDIAN) - return (__force __be64)vni; -#else - return (__force __be64)((u64)(__force u32)vni << 32); -#endif -} - static inline size_t vxlan_rco_start(__be32 vni_field) { return be32_to_cpu(vni_field & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 113cc43df789..576f705d8180 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -246,25 +246,6 @@ static void gre_err(struct sk_buff *skb, u32 info) ipgre_err(skb, info, &tpi); } -static __be64 key_to_tunnel_id(__be32 key) -{ -#ifdef __BIG_ENDIAN - return (__force __be64)((__force u32)key); -#else - return (__force __be64)((__force u64)key << 32); -#endif -} - -/* Returns the least-significant 32 bits of a __be64. */ -static __be32 tunnel_id_to_key(__be64 x) -{ -#ifdef __BIG_ENDIAN - return (__force __be32)x; -#else - return (__force __be32)((__force u64)x >> 32); -#endif -} - static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi, struct ip_tunnel_net *itn, int hdr_len, bool raw_proto) { @@ -290,7 +271,7 @@ static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi, __be64 tun_id; flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY); - tun_id = key_to_tunnel_id(tpi->key); + tun_id = key32_to_tunnel_id(tpi->key); tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0); if (!tun_dst) return PACKET_REJECT; @@ -446,7 +427,7 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev, flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY); gre_build_header(skb, tunnel_hlen, flags, proto, - tunnel_id_to_key(tun_info->key.tun_id), 0); + tunnel_id_to_key32(tun_info->key.tun_id), 0); df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; From 2ff378b7474feac1ec665d01e4dfc6907cccc11c Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Thu, 8 Sep 2016 16:23:46 +0300 Subject: [PATCH 0875/1715] net/dst: Utility functions to build dst_metadata without supplying an skb Extract __ip_tun_set_dst() and __ipv6_tun_set_dst() out of ip_tun_rx_dst() and ipv6_tun_rx_dst(), to be used without supplying an skb. Signed-off-by: Amir Vadai Signed-off-by: Hadar Hen Zion Acked-by: Jiri Pirko Reviewed-by: Shmulik Ladkani Signed-off-by: David S. Miller --- include/net/dst_metadata.h | 52 ++++++++++++++++++++++++++++---------- 1 file changed, 39 insertions(+), 13 deletions(-) diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index 5db9f5910428..6965c8f68ade 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -112,12 +112,13 @@ static inline struct ip_tunnel_info *skb_tunnel_info_unclone(struct sk_buff *skb return &dst->u.tun_info; } -static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb, - __be16 flags, - __be64 tunnel_id, - int md_size) +static inline struct metadata_dst *__ip_tun_set_dst(__be32 saddr, + __be32 daddr, + __u8 tos, __u8 ttl, + __be16 flags, + __be64 tunnel_id, + int md_size) { - const struct iphdr *iph = ip_hdr(skb); struct metadata_dst *tun_dst; tun_dst = tun_rx_dst(md_size); @@ -125,17 +126,30 @@ static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb, return NULL; ip_tunnel_key_init(&tun_dst->u.tun_info.key, - iph->saddr, iph->daddr, iph->tos, iph->ttl, + saddr, daddr, tos, ttl, 0, 0, 0, tunnel_id, flags); return tun_dst; } -static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb, +static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb, __be16 flags, __be64 tunnel_id, int md_size) { - const struct ipv6hdr *ip6h = ipv6_hdr(skb); + const struct iphdr *iph = ip_hdr(skb); + + return __ip_tun_set_dst(iph->saddr, iph->daddr, iph->tos, iph->ttl, + flags, tunnel_id, md_size); +} + +static inline struct metadata_dst *__ipv6_tun_set_dst(const struct in6_addr *saddr, + const struct in6_addr *daddr, + __u8 tos, __u8 ttl, + __be32 label, + __be16 flags, + __be64 tunnel_id, + int md_size) +{ struct metadata_dst *tun_dst; struct ip_tunnel_info *info; @@ -150,14 +164,26 @@ static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb, info->key.tp_src = 0; info->key.tp_dst = 0; - info->key.u.ipv6.src = ip6h->saddr; - info->key.u.ipv6.dst = ip6h->daddr; + info->key.u.ipv6.src = *saddr; + info->key.u.ipv6.dst = *daddr; - info->key.tos = ipv6_get_dsfield(ip6h); - info->key.ttl = ip6h->hop_limit; - info->key.label = ip6_flowlabel(ip6h); + info->key.tos = tos; + info->key.ttl = ttl; + info->key.label = label; return tun_dst; } +static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb, + __be16 flags, + __be64 tunnel_id, + int md_size) +{ + const struct ipv6hdr *ip6h = ipv6_hdr(skb); + + return __ipv6_tun_set_dst(&ip6h->saddr, &ip6h->daddr, + ipv6_get_dsfield(ip6h), ip6h->hop_limit, + ip6_flowlabel(ip6h), flags, tunnel_id, + md_size); +} #endif /* __NET_DST_METADATA_H */ From bc3103f1ed405de587fa43d8b0671e615505a700 Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Thu, 8 Sep 2016 16:23:47 +0300 Subject: [PATCH 0876/1715] net/sched: cls_flower: Classify packet in ip tunnels Introduce classifying by metadata extracted by the tunnel device. Outer header fields - source/dest ip and tunnel id, are extracted from the metadata when classifying. For example, the following will add a filter on the ingress Qdisc of shared vxlan device named 'vxlan0'. To forward packets with outer src ip 11.11.0.2, dst ip 11.11.0.1 and tunnel id 11. The packets will be forwarded to tap device 'vnet0' (after metadata is released): $ tc filter add dev vxlan0 protocol ip parent ffff: \ flower \ enc_src_ip 11.11.0.2 \ enc_dst_ip 11.11.0.1 \ enc_key_id 11 \ dst_ip 11.11.11.1 \ action tunnel_key release \ action mirred egress redirect dev vnet0 The action tunnel_key, will be introduced in the next patch in this series. Signed-off-by: Amir Vadai Signed-off-by: Hadar Hen Zion Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 11 ++++ net/sched/cls_flower.c | 100 ++++++++++++++++++++++++++++++++++- 2 files changed, 110 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 51b5b247fb5a..f9c287c67eae 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -431,6 +431,17 @@ enum { TCA_FLOWER_KEY_VLAN_ID, TCA_FLOWER_KEY_VLAN_PRIO, TCA_FLOWER_KEY_VLAN_ETH_TYPE, + + TCA_FLOWER_KEY_ENC_KEY_ID, /* be32 */ + TCA_FLOWER_KEY_ENC_IPV4_SRC, /* be32 */ + TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK,/* be32 */ + TCA_FLOWER_KEY_ENC_IPV4_DST, /* be32 */ + TCA_FLOWER_KEY_ENC_IPV4_DST_MASK,/* be32 */ + TCA_FLOWER_KEY_ENC_IPV6_SRC, /* struct in6_addr */ + TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK,/* struct in6_addr */ + TCA_FLOWER_KEY_ENC_IPV6_DST, /* struct in6_addr */ + TCA_FLOWER_KEY_ENC_IPV6_DST_MASK,/* struct in6_addr */ + __TCA_FLOWER_MAX, }; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index cf9ad5b50889..b084b2aab2d7 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -23,9 +23,13 @@ #include #include +#include +#include + struct fl_flow_key { int indev_ifindex; struct flow_dissector_key_control control; + struct flow_dissector_key_control enc_control; struct flow_dissector_key_basic basic; struct flow_dissector_key_eth_addrs eth; struct flow_dissector_key_vlan vlan; @@ -35,6 +39,11 @@ struct fl_flow_key { struct flow_dissector_key_ipv6_addrs ipv6; }; struct flow_dissector_key_ports tp; + struct flow_dissector_key_keyid enc_key_id; + union { + struct flow_dissector_key_ipv4_addrs enc_ipv4; + struct flow_dissector_key_ipv6_addrs enc_ipv6; + }; } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ struct fl_flow_mask_range { @@ -124,11 +133,31 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct cls_fl_filter *f; struct fl_flow_key skb_key; struct fl_flow_key skb_mkey; + struct ip_tunnel_info *info; if (!atomic_read(&head->ht.nelems)) return -1; fl_clear_masked_range(&skb_key, &head->mask); + + info = skb_tunnel_info(skb); + if (info) { + struct ip_tunnel_key *key = &info->key; + + switch (ip_tunnel_info_af(info)) { + case AF_INET: + skb_key.enc_ipv4.src = key->u.ipv4.src; + skb_key.enc_ipv4.dst = key->u.ipv4.dst; + break; + case AF_INET6: + skb_key.enc_ipv6.src = key->u.ipv6.src; + skb_key.enc_ipv6.dst = key->u.ipv6.dst; + break; + } + + skb_key.enc_key_id.keyid = tunnel_id_to_key32(key->tun_id); + } + skb_key.indev_ifindex = skb->skb_iif; /* skb_flow_dissect() does not set n_proto in case an unknown protocol, * so do it rather here. @@ -297,7 +326,15 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_KEY_VLAN_ID] = { .type = NLA_U16 }, [TCA_FLOWER_KEY_VLAN_PRIO] = { .type = NLA_U8 }, [TCA_FLOWER_KEY_VLAN_ETH_TYPE] = { .type = NLA_U16 }, - + [TCA_FLOWER_KEY_ENC_KEY_ID] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_ENC_IPV4_SRC] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_ENC_IPV4_DST] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_ENC_IPV4_DST_MASK] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_ENC_IPV6_SRC] = { .len = sizeof(struct in6_addr) }, + [TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK] = { .len = sizeof(struct in6_addr) }, + [TCA_FLOWER_KEY_ENC_IPV6_DST] = { .len = sizeof(struct in6_addr) }, + [TCA_FLOWER_KEY_ENC_IPV6_DST_MASK] = { .len = sizeof(struct in6_addr) }, }; static void fl_set_key_val(struct nlattr **tb, @@ -409,6 +446,40 @@ static int fl_set_key(struct net *net, struct nlattr **tb, sizeof(key->tp.dst)); } + if (tb[TCA_FLOWER_KEY_ENC_IPV4_SRC] || + tb[TCA_FLOWER_KEY_ENC_IPV4_DST]) { + key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; + fl_set_key_val(tb, &key->enc_ipv4.src, + TCA_FLOWER_KEY_ENC_IPV4_SRC, + &mask->enc_ipv4.src, + TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK, + sizeof(key->enc_ipv4.src)); + fl_set_key_val(tb, &key->enc_ipv4.dst, + TCA_FLOWER_KEY_ENC_IPV4_DST, + &mask->enc_ipv4.dst, + TCA_FLOWER_KEY_ENC_IPV4_DST_MASK, + sizeof(key->enc_ipv4.dst)); + } + + if (tb[TCA_FLOWER_KEY_ENC_IPV6_SRC] || + tb[TCA_FLOWER_KEY_ENC_IPV6_DST]) { + key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; + fl_set_key_val(tb, &key->enc_ipv6.src, + TCA_FLOWER_KEY_ENC_IPV6_SRC, + &mask->enc_ipv6.src, + TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK, + sizeof(key->enc_ipv6.src)); + fl_set_key_val(tb, &key->enc_ipv6.dst, + TCA_FLOWER_KEY_ENC_IPV6_DST, + &mask->enc_ipv6.dst, + TCA_FLOWER_KEY_ENC_IPV6_DST_MASK, + sizeof(key->enc_ipv6.dst)); + } + + fl_set_key_val(tb, &key->enc_key_id.keyid, TCA_FLOWER_KEY_ENC_KEY_ID, + &mask->enc_key_id.keyid, TCA_FLOWER_KEY_ENC_KEY_ID, + sizeof(key->enc_key_id.keyid)); + return 0; } @@ -821,6 +892,33 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, sizeof(key->tp.dst)))) goto nla_put_failure; + if (key->enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS && + (fl_dump_key_val(skb, &key->enc_ipv4.src, + TCA_FLOWER_KEY_ENC_IPV4_SRC, &mask->enc_ipv4.src, + TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK, + sizeof(key->enc_ipv4.src)) || + fl_dump_key_val(skb, &key->enc_ipv4.dst, + TCA_FLOWER_KEY_ENC_IPV4_DST, &mask->enc_ipv4.dst, + TCA_FLOWER_KEY_ENC_IPV4_DST_MASK, + sizeof(key->enc_ipv4.dst)))) + goto nla_put_failure; + else if (key->enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS && + (fl_dump_key_val(skb, &key->enc_ipv6.src, + TCA_FLOWER_KEY_ENC_IPV6_SRC, &mask->enc_ipv6.src, + TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK, + sizeof(key->enc_ipv6.src)) || + fl_dump_key_val(skb, &key->enc_ipv6.dst, + TCA_FLOWER_KEY_ENC_IPV6_DST, + &mask->enc_ipv6.dst, + TCA_FLOWER_KEY_ENC_IPV6_DST_MASK, + sizeof(key->enc_ipv6.dst)))) + goto nla_put_failure; + + if (fl_dump_key_val(skb, &key->enc_key_id, TCA_FLOWER_KEY_ENC_KEY_ID, + &mask->enc_key_id, TCA_FLOWER_KEY_ENC_KEY_ID, + sizeof(key->enc_key_id))) + goto nla_put_failure; + nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags); if (tcf_exts_dump(skb, &f->exts)) From d0f6dd8a914f42c6f1a3a8c08caa16559d3d9a1b Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Thu, 8 Sep 2016 16:23:48 +0300 Subject: [PATCH 0877/1715] net/sched: Introduce act_tunnel_key This action could be used before redirecting packets to a shared tunnel device, or when redirecting packets arriving from a such a device. The action will release the metadata created by the tunnel device (decap), or set the metadata with the specified values for encap operation. For example, the following flower filter will forward all ICMP packets destined to 11.11.11.2 through the shared vxlan device 'vxlan0'. Before redirecting, a metadata for the vxlan tunnel is created using the tunnel_key action and it's arguments: $ tc filter add dev net0 protocol ip parent ffff: \ flower \ ip_proto 1 \ dst_ip 11.11.11.2 \ action tunnel_key set \ src_ip 11.11.0.1 \ dst_ip 11.11.0.2 \ id 11 \ action mirred egress redirect dev vxlan0 Signed-off-by: Amir Vadai Signed-off-by: Hadar Hen Zion Reviewed-by: Shmulik Ladkani Acked-by: Jamal Hadi Salim Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tc_act/tc_tunnel_key.h | 30 ++ include/uapi/linux/tc_act/tc_tunnel_key.h | 41 +++ net/sched/Kconfig | 11 + net/sched/Makefile | 1 + net/sched/act_tunnel_key.c | 351 ++++++++++++++++++++++ 5 files changed, 434 insertions(+) create mode 100644 include/net/tc_act/tc_tunnel_key.h create mode 100644 include/uapi/linux/tc_act/tc_tunnel_key.h create mode 100644 net/sched/act_tunnel_key.c diff --git a/include/net/tc_act/tc_tunnel_key.h b/include/net/tc_act/tc_tunnel_key.h new file mode 100644 index 000000000000..253f8da6c2a6 --- /dev/null +++ b/include/net/tc_act/tc_tunnel_key.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2016, Amir Vadai + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __NET_TC_TUNNEL_KEY_H +#define __NET_TC_TUNNEL_KEY_H + +#include + +struct tcf_tunnel_key_params { + struct rcu_head rcu; + int tcft_action; + int action; + struct metadata_dst *tcft_enc_metadata; +}; + +struct tcf_tunnel_key { + struct tc_action common; + struct tcf_tunnel_key_params __rcu *params; +}; + +#define to_tunnel_key(a) ((struct tcf_tunnel_key *)a) + +#endif /* __NET_TC_TUNNEL_KEY_H */ diff --git a/include/uapi/linux/tc_act/tc_tunnel_key.h b/include/uapi/linux/tc_act/tc_tunnel_key.h new file mode 100644 index 000000000000..890106ff16e6 --- /dev/null +++ b/include/uapi/linux/tc_act/tc_tunnel_key.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2016, Amir Vadai + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __LINUX_TC_TUNNEL_KEY_H +#define __LINUX_TC_TUNNEL_KEY_H + +#include + +#define TCA_ACT_TUNNEL_KEY 17 + +#define TCA_TUNNEL_KEY_ACT_SET 1 +#define TCA_TUNNEL_KEY_ACT_RELEASE 2 + +struct tc_tunnel_key { + tc_gen; + int t_action; +}; + +enum { + TCA_TUNNEL_KEY_UNSPEC, + TCA_TUNNEL_KEY_TM, + TCA_TUNNEL_KEY_PARMS, + TCA_TUNNEL_KEY_ENC_IPV4_SRC, /* be32 */ + TCA_TUNNEL_KEY_ENC_IPV4_DST, /* be32 */ + TCA_TUNNEL_KEY_ENC_IPV6_SRC, /* struct in6_addr */ + TCA_TUNNEL_KEY_ENC_IPV6_DST, /* struct in6_addr */ + TCA_TUNNEL_KEY_ENC_KEY_ID, /* be64 */ + TCA_TUNNEL_KEY_PAD, + __TCA_TUNNEL_KEY_MAX, +}; + +#define TCA_TUNNEL_KEY_MAX (__TCA_TUNNEL_KEY_MAX - 1) + +#endif diff --git a/net/sched/Kconfig b/net/sched/Kconfig index ccf931b3b94c..72e3426fa48f 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -761,6 +761,17 @@ config NET_ACT_IFE To compile this code as a module, choose M here: the module will be called act_ife. +config NET_ACT_TUNNEL_KEY + tristate "IP tunnel metadata manipulation" + depends on NET_CLS_ACT + ---help--- + Say Y here to set/release ip tunnel metadata. + + If unsure, say N. + + To compile this code as a module, choose M here: the + module will be called act_tunnel_key. + config NET_IFE_SKBMARK tristate "Support to encoding decoding skb mark on IFE action" depends on NET_ACT_IFE diff --git a/net/sched/Makefile b/net/sched/Makefile index ae088a5a9d95..b9d046b9535a 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -22,6 +22,7 @@ obj-$(CONFIG_NET_ACT_CONNMARK) += act_connmark.o obj-$(CONFIG_NET_ACT_IFE) += act_ife.o obj-$(CONFIG_NET_IFE_SKBMARK) += act_meta_mark.o obj-$(CONFIG_NET_IFE_SKBPRIO) += act_meta_skbprio.o +obj-$(CONFIG_NET_ACT_TUNNEL_KEY)+= act_tunnel_key.o obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c new file mode 100644 index 000000000000..dceff7412dc3 --- /dev/null +++ b/net/sched/act_tunnel_key.c @@ -0,0 +1,351 @@ +/* + * Copyright (c) 2016, Amir Vadai + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define TUNNEL_KEY_TAB_MASK 15 + +static int tunnel_key_net_id; +static struct tc_action_ops act_tunnel_key_ops; + +static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) +{ + struct tcf_tunnel_key *t = to_tunnel_key(a); + struct tcf_tunnel_key_params *params; + int action; + + rcu_read_lock(); + + params = rcu_dereference(t->params); + + tcf_lastuse_update(&t->tcf_tm); + bstats_cpu_update(this_cpu_ptr(t->common.cpu_bstats), skb); + action = params->action; + + switch (params->tcft_action) { + case TCA_TUNNEL_KEY_ACT_RELEASE: + skb_dst_drop(skb); + break; + case TCA_TUNNEL_KEY_ACT_SET: + skb_dst_drop(skb); + skb_dst_set(skb, dst_clone(¶ms->tcft_enc_metadata->dst)); + break; + default: + WARN_ONCE(1, "Bad tunnel_key action %d.\n", + params->tcft_action); + break; + } + + rcu_read_unlock(); + + return action; +} + +static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = { + [TCA_TUNNEL_KEY_PARMS] = { .len = sizeof(struct tc_tunnel_key) }, + [TCA_TUNNEL_KEY_ENC_IPV4_SRC] = { .type = NLA_U32 }, + [TCA_TUNNEL_KEY_ENC_IPV4_DST] = { .type = NLA_U32 }, + [TCA_TUNNEL_KEY_ENC_IPV6_SRC] = { .len = sizeof(struct in6_addr) }, + [TCA_TUNNEL_KEY_ENC_IPV6_DST] = { .len = sizeof(struct in6_addr) }, + [TCA_TUNNEL_KEY_ENC_KEY_ID] = { .type = NLA_U32 }, +}; + +static int tunnel_key_init(struct net *net, struct nlattr *nla, + struct nlattr *est, struct tc_action **a, + int ovr, int bind) +{ + struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); + struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1]; + struct tcf_tunnel_key_params *params_old; + struct tcf_tunnel_key_params *params_new; + struct metadata_dst *metadata = NULL; + struct tc_tunnel_key *parm; + struct tcf_tunnel_key *t; + bool exists = false; + __be64 key_id; + int ret = 0; + int err; + + if (!nla) + return -EINVAL; + + err = nla_parse_nested(tb, TCA_TUNNEL_KEY_MAX, nla, tunnel_key_policy); + if (err < 0) + return err; + + if (!tb[TCA_TUNNEL_KEY_PARMS]) + return -EINVAL; + + parm = nla_data(tb[TCA_TUNNEL_KEY_PARMS]); + exists = tcf_hash_check(tn, parm->index, a, bind); + if (exists && bind) + return 0; + + switch (parm->t_action) { + case TCA_TUNNEL_KEY_ACT_RELEASE: + break; + case TCA_TUNNEL_KEY_ACT_SET: + if (!tb[TCA_TUNNEL_KEY_ENC_KEY_ID]) { + ret = -EINVAL; + goto err_out; + } + + key_id = key32_to_tunnel_id(nla_get_be32(tb[TCA_TUNNEL_KEY_ENC_KEY_ID])); + + if (tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC] && + tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]) { + __be32 saddr; + __be32 daddr; + + saddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC]); + daddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]); + + metadata = __ip_tun_set_dst(saddr, daddr, 0, 0, + TUNNEL_KEY, key_id, 0); + } else if (tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC] && + tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]) { + struct in6_addr saddr; + struct in6_addr daddr; + + saddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC]); + daddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]); + + metadata = __ipv6_tun_set_dst(&saddr, &daddr, 0, 0, 0, + TUNNEL_KEY, key_id, 0); + } + + if (!metadata) { + ret = -EINVAL; + goto err_out; + } + + metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX; + break; + default: + goto err_out; + } + + if (!exists) { + ret = tcf_hash_create(tn, parm->index, est, a, + &act_tunnel_key_ops, bind, true); + if (ret) + return ret; + + ret = ACT_P_CREATED; + } else { + tcf_hash_release(*a, bind); + if (!ovr) + return -EEXIST; + } + + t = to_tunnel_key(*a); + + ASSERT_RTNL(); + params_new = kzalloc(sizeof(*params_new), GFP_KERNEL); + if (unlikely(!params_new)) { + if (ret == ACT_P_CREATED) + tcf_hash_release(*a, bind); + return -ENOMEM; + } + + params_old = rtnl_dereference(t->params); + + params_new->action = parm->action; + params_new->tcft_action = parm->t_action; + params_new->tcft_enc_metadata = metadata; + + rcu_assign_pointer(t->params, params_new); + + if (params_old) + kfree_rcu(params_old, rcu); + + if (ret == ACT_P_CREATED) + tcf_hash_insert(tn, *a); + + return ret; + +err_out: + if (exists) + tcf_hash_release(*a, bind); + return ret; +} + +static void tunnel_key_release(struct tc_action *a, int bind) +{ + struct tcf_tunnel_key *t = to_tunnel_key(a); + struct tcf_tunnel_key_params *params; + + rcu_read_lock(); + params = rcu_dereference(t->params); + + if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET) + dst_release(¶ms->tcft_enc_metadata->dst); + + kfree_rcu(params, rcu); + + rcu_read_unlock(); +} + +static int tunnel_key_dump_addresses(struct sk_buff *skb, + const struct ip_tunnel_info *info) +{ + unsigned short family = ip_tunnel_info_af(info); + + if (family == AF_INET) { + __be32 saddr = info->key.u.ipv4.src; + __be32 daddr = info->key.u.ipv4.dst; + + if (!nla_put_in_addr(skb, TCA_TUNNEL_KEY_ENC_IPV4_SRC, saddr) && + !nla_put_in_addr(skb, TCA_TUNNEL_KEY_ENC_IPV4_DST, daddr)) + return 0; + } + + if (family == AF_INET6) { + const struct in6_addr *saddr6 = &info->key.u.ipv6.src; + const struct in6_addr *daddr6 = &info->key.u.ipv6.dst; + + if (!nla_put_in6_addr(skb, + TCA_TUNNEL_KEY_ENC_IPV6_SRC, saddr6) && + !nla_put_in6_addr(skb, + TCA_TUNNEL_KEY_ENC_IPV6_DST, daddr6)) + return 0; + } + + return -EINVAL; +} + +static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a, + int bind, int ref) +{ + unsigned char *b = skb_tail_pointer(skb); + struct tcf_tunnel_key *t = to_tunnel_key(a); + struct tcf_tunnel_key_params *params; + struct tc_tunnel_key opt = { + .index = t->tcf_index, + .refcnt = t->tcf_refcnt - ref, + .bindcnt = t->tcf_bindcnt - bind, + }; + struct tcf_t tm; + int ret = -1; + + rcu_read_lock(); + params = rcu_dereference(t->params); + + opt.t_action = params->tcft_action; + opt.action = params->action; + + if (nla_put(skb, TCA_TUNNEL_KEY_PARMS, sizeof(opt), &opt)) + goto nla_put_failure; + + if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET) { + struct ip_tunnel_key *key = + ¶ms->tcft_enc_metadata->u.tun_info.key; + __be32 key_id = tunnel_id_to_key32(key->tun_id); + + if (nla_put_be32(skb, TCA_TUNNEL_KEY_ENC_KEY_ID, key_id) || + tunnel_key_dump_addresses(skb, + ¶ms->tcft_enc_metadata->u.tun_info)) + goto nla_put_failure; + } + + tcf_tm_dump(&tm, &t->tcf_tm); + if (nla_put_64bit(skb, TCA_TUNNEL_KEY_TM, sizeof(tm), + &tm, TCA_TUNNEL_KEY_PAD)) + goto nla_put_failure; + + ret = skb->len; + goto out; + +nla_put_failure: + nlmsg_trim(skb, b); +out: + rcu_read_unlock(); + + return ret; +} + +static int tunnel_key_walker(struct net *net, struct sk_buff *skb, + struct netlink_callback *cb, int type, + const struct tc_action_ops *ops) +{ + struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); + + return tcf_generic_walker(tn, skb, cb, type, ops); +} + +static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index) +{ + struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); + + return tcf_hash_search(tn, a, index); +} + +static struct tc_action_ops act_tunnel_key_ops = { + .kind = "tunnel_key", + .type = TCA_ACT_TUNNEL_KEY, + .owner = THIS_MODULE, + .act = tunnel_key_act, + .dump = tunnel_key_dump, + .init = tunnel_key_init, + .cleanup = tunnel_key_release, + .walk = tunnel_key_walker, + .lookup = tunnel_key_search, + .size = sizeof(struct tcf_tunnel_key), +}; + +static __net_init int tunnel_key_init_net(struct net *net) +{ + struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); + + return tc_action_net_init(tn, &act_tunnel_key_ops, TUNNEL_KEY_TAB_MASK); +} + +static void __net_exit tunnel_key_exit_net(struct net *net) +{ + struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); + + tc_action_net_exit(tn); +} + +static struct pernet_operations tunnel_key_net_ops = { + .init = tunnel_key_init_net, + .exit = tunnel_key_exit_net, + .id = &tunnel_key_net_id, + .size = sizeof(struct tc_action_net), +}; + +static int __init tunnel_key_init_module(void) +{ + return tcf_register_action(&act_tunnel_key_ops, &tunnel_key_net_ops); +} + +static void __exit tunnel_key_cleanup_module(void) +{ + tcf_unregister_action(&act_tunnel_key_ops, &tunnel_key_net_ops); +} + +module_init(tunnel_key_init_module); +module_exit(tunnel_key_cleanup_module); + +MODULE_AUTHOR("Amir Vadai "); +MODULE_DESCRIPTION("ip tunnel manipulation actions"); +MODULE_LICENSE("GPL v2"); From 8572763af48728561f8bf708b6c0ea9f4db5929e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 9 Sep 2016 14:26:21 +0200 Subject: [PATCH 0878/1715] net: fs_enet: merge NAPI RX and NAPI TX Initially, a NAPI TX routine has been implemented separately from NAPI RX, as done on the freescale/gianfar driver. By merging NAPI RX and NAPI TX, we reduce the amount of TX completion interrupts. Handling of the budget in association with TX interrupts is based on indications provided at https://wiki.linuxfoundation.org/networking/napi We never proceed more than the complete TX ring on a single run. At the same time, we fix an issue in the handling of fep->tx_free: It is only when fep->tx_free goes up to MAX_SKB_FRAGS that we need to wake up the queue. There is no need to call netif_wake_queue() at every packet successfully transmitted. Signed-off-by: Christophe Leroy Signed-off-by: David S. Miller --- .../ethernet/freescale/fs_enet/fs_enet-main.c | 268 ++++++++---------- .../net/ethernet/freescale/fs_enet/fs_enet.h | 16 +- .../net/ethernet/freescale/fs_enet/mac-fcc.c | 57 +--- .../net/ethernet/freescale/fs_enet/mac-fec.c | 57 +--- .../net/ethernet/freescale/fs_enet/mac-scc.c | 57 +--- 5 files changed, 160 insertions(+), 295 deletions(-) diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c index 61fd486c50bb..37574a97a61c 100644 --- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c +++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c @@ -60,6 +60,9 @@ module_param(fs_enet_debug, int, 0); MODULE_PARM_DESC(fs_enet_debug, "Freescale bitmapped debugging message enable value"); +#define RX_RING_SIZE 32 +#define TX_RING_SIZE 64 + #ifdef CONFIG_NET_POLL_CONTROLLER static void fs_enet_netpoll(struct net_device *dev); #endif @@ -79,8 +82,8 @@ static void skb_align(struct sk_buff *skb, int align) skb_reserve(skb, align - off); } -/* NAPI receive function */ -static int fs_enet_rx_napi(struct napi_struct *napi, int budget) +/* NAPI function */ +static int fs_enet_napi(struct napi_struct *napi, int budget) { struct fs_enet_private *fep = container_of(napi, struct fs_enet_private, napi); struct net_device *dev = fep->ndev; @@ -90,9 +93,102 @@ static int fs_enet_rx_napi(struct napi_struct *napi, int budget) int received = 0; u16 pkt_len, sc; int curidx; + int dirtyidx, do_wake, do_restart; + int tx_left = TX_RING_SIZE; - if (budget <= 0) - return received; + spin_lock(&fep->tx_lock); + bdp = fep->dirty_tx; + + /* clear status bits for napi*/ + (*fep->ops->napi_clear_event)(dev); + + do_wake = do_restart = 0; + while (((sc = CBDR_SC(bdp)) & BD_ENET_TX_READY) == 0 && tx_left) { + dirtyidx = bdp - fep->tx_bd_base; + + if (fep->tx_free == fep->tx_ring) + break; + + skb = fep->tx_skbuff[dirtyidx]; + + /* + * Check for errors. + */ + if (sc & (BD_ENET_TX_HB | BD_ENET_TX_LC | + BD_ENET_TX_RL | BD_ENET_TX_UN | BD_ENET_TX_CSL)) { + + if (sc & BD_ENET_TX_HB) /* No heartbeat */ + fep->stats.tx_heartbeat_errors++; + if (sc & BD_ENET_TX_LC) /* Late collision */ + fep->stats.tx_window_errors++; + if (sc & BD_ENET_TX_RL) /* Retrans limit */ + fep->stats.tx_aborted_errors++; + if (sc & BD_ENET_TX_UN) /* Underrun */ + fep->stats.tx_fifo_errors++; + if (sc & BD_ENET_TX_CSL) /* Carrier lost */ + fep->stats.tx_carrier_errors++; + + if (sc & (BD_ENET_TX_LC | BD_ENET_TX_RL | BD_ENET_TX_UN)) { + fep->stats.tx_errors++; + do_restart = 1; + } + } else + fep->stats.tx_packets++; + + if (sc & BD_ENET_TX_READY) { + dev_warn(fep->dev, + "HEY! Enet xmit interrupt and TX_READY.\n"); + } + + /* + * Deferred means some collisions occurred during transmit, + * but we eventually sent the packet OK. + */ + if (sc & BD_ENET_TX_DEF) + fep->stats.collisions++; + + /* unmap */ + if (fep->mapped_as_page[dirtyidx]) + dma_unmap_page(fep->dev, CBDR_BUFADDR(bdp), + CBDR_DATLEN(bdp), DMA_TO_DEVICE); + else + dma_unmap_single(fep->dev, CBDR_BUFADDR(bdp), + CBDR_DATLEN(bdp), DMA_TO_DEVICE); + + /* + * Free the sk buffer associated with this last transmit. + */ + if (skb) { + dev_kfree_skb(skb); + fep->tx_skbuff[dirtyidx] = NULL; + } + + /* + * Update pointer to next buffer descriptor to be transmitted. + */ + if ((sc & BD_ENET_TX_WRAP) == 0) + bdp++; + else + bdp = fep->tx_bd_base; + + /* + * Since we have freed up a buffer, the ring is no longer + * full. + */ + if (++fep->tx_free == MAX_SKB_FRAGS) + do_wake = 1; + tx_left--; + } + + fep->dirty_tx = bdp; + + if (do_restart) + (*fep->ops->tx_restart)(dev); + + spin_unlock(&fep->tx_lock); + + if (do_wake) + netif_wake_queue(dev); /* * First, grab all of the stats for the incoming packet. @@ -100,10 +196,8 @@ static int fs_enet_rx_napi(struct napi_struct *napi, int budget) */ bdp = fep->cur_rx; - /* clear RX status bits for napi*/ - (*fep->ops->napi_clear_rx_event)(dev); - - while (((sc = CBDR_SC(bdp)) & BD_ENET_RX_EMPTY) == 0) { + while (((sc = CBDR_SC(bdp)) & BD_ENET_RX_EMPTY) == 0 && + received < budget) { curidx = bdp - fep->rx_bd_base; /* @@ -197,134 +291,19 @@ static int fs_enet_rx_napi(struct napi_struct *napi, int budget) bdp = fep->rx_bd_base; (*fep->ops->rx_bd_done)(dev); - - if (received >= budget) - break; } fep->cur_rx = bdp; - if (received < budget) { + if (received < budget && tx_left) { /* done */ napi_complete(napi); - (*fep->ops->napi_enable_rx)(dev); - } - return received; -} + (*fep->ops->napi_enable)(dev); -static int fs_enet_tx_napi(struct napi_struct *napi, int budget) -{ - struct fs_enet_private *fep = container_of(napi, struct fs_enet_private, - napi_tx); - struct net_device *dev = fep->ndev; - cbd_t __iomem *bdp; - struct sk_buff *skb; - int dirtyidx, do_wake, do_restart; - u16 sc; - int has_tx_work = 0; - - spin_lock(&fep->tx_lock); - bdp = fep->dirty_tx; - - /* clear TX status bits for napi*/ - (*fep->ops->napi_clear_tx_event)(dev); - - do_wake = do_restart = 0; - while (((sc = CBDR_SC(bdp)) & BD_ENET_TX_READY) == 0) { - dirtyidx = bdp - fep->tx_bd_base; - - if (fep->tx_free == fep->tx_ring) - break; - - skb = fep->tx_skbuff[dirtyidx]; - - /* - * Check for errors. - */ - if (sc & (BD_ENET_TX_HB | BD_ENET_TX_LC | - BD_ENET_TX_RL | BD_ENET_TX_UN | BD_ENET_TX_CSL)) { - - if (sc & BD_ENET_TX_HB) /* No heartbeat */ - fep->stats.tx_heartbeat_errors++; - if (sc & BD_ENET_TX_LC) /* Late collision */ - fep->stats.tx_window_errors++; - if (sc & BD_ENET_TX_RL) /* Retrans limit */ - fep->stats.tx_aborted_errors++; - if (sc & BD_ENET_TX_UN) /* Underrun */ - fep->stats.tx_fifo_errors++; - if (sc & BD_ENET_TX_CSL) /* Carrier lost */ - fep->stats.tx_carrier_errors++; - - if (sc & (BD_ENET_TX_LC | BD_ENET_TX_RL | BD_ENET_TX_UN)) { - fep->stats.tx_errors++; - do_restart = 1; - } - } else - fep->stats.tx_packets++; - - if (sc & BD_ENET_TX_READY) { - dev_warn(fep->dev, - "HEY! Enet xmit interrupt and TX_READY.\n"); - } - - /* - * Deferred means some collisions occurred during transmit, - * but we eventually sent the packet OK. - */ - if (sc & BD_ENET_TX_DEF) - fep->stats.collisions++; - - /* unmap */ - if (fep->mapped_as_page[dirtyidx]) - dma_unmap_page(fep->dev, CBDR_BUFADDR(bdp), - CBDR_DATLEN(bdp), DMA_TO_DEVICE); - else - dma_unmap_single(fep->dev, CBDR_BUFADDR(bdp), - CBDR_DATLEN(bdp), DMA_TO_DEVICE); - - /* - * Free the sk buffer associated with this last transmit. - */ - if (skb) { - dev_kfree_skb(skb); - fep->tx_skbuff[dirtyidx] = NULL; - } - - /* - * Update pointer to next buffer descriptor to be transmitted. - */ - if ((sc & BD_ENET_TX_WRAP) == 0) - bdp++; - else - bdp = fep->tx_bd_base; - - /* - * Since we have freed up a buffer, the ring is no longer - * full. - */ - if (++fep->tx_free >= MAX_SKB_FRAGS) - do_wake = 1; - has_tx_work = 1; + return received; } - fep->dirty_tx = bdp; - - if (do_restart) - (*fep->ops->tx_restart)(dev); - - if (!has_tx_work) { - napi_complete(napi); - (*fep->ops->napi_enable_tx)(dev); - } - - spin_unlock(&fep->tx_lock); - - if (do_wake) - netif_wake_queue(dev); - - if (has_tx_work) - return budget; - return 0; + return budget; } /* @@ -350,18 +329,18 @@ fs_enet_interrupt(int irq, void *dev_id) nr++; int_clr_events = int_events; - int_clr_events &= ~fep->ev_napi_rx; + int_clr_events &= ~fep->ev_napi; (*fep->ops->clear_int_events)(dev, int_clr_events); if (int_events & fep->ev_err) (*fep->ops->ev_error)(dev, int_events); - if (int_events & fep->ev_rx) { + if (int_events & fep->ev) { napi_ok = napi_schedule_prep(&fep->napi); - (*fep->ops->napi_disable_rx)(dev); - (*fep->ops->clear_int_events)(dev, fep->ev_napi_rx); + (*fep->ops->napi_disable)(dev); + (*fep->ops->clear_int_events)(dev, fep->ev_napi); /* NOTE: it is possible for FCCs in NAPI mode */ /* to submit a spurious interrupt while in poll */ @@ -369,17 +348,6 @@ fs_enet_interrupt(int irq, void *dev_id) __napi_schedule(&fep->napi); } - if (int_events & fep->ev_tx) { - napi_ok = napi_schedule_prep(&fep->napi_tx); - - (*fep->ops->napi_disable_tx)(dev); - (*fep->ops->clear_int_events)(dev, fep->ev_napi_tx); - - /* NOTE: it is possible for FCCs in NAPI mode */ - /* to submit a spurious interrupt while in poll */ - if (napi_ok) - __napi_schedule(&fep->napi_tx); - } } handled = nr > 0; @@ -659,7 +627,8 @@ static void fs_timeout(struct net_device *dev) } phy_start(dev->phydev); - wake = fep->tx_free && !(CBDR_SC(fep->cur_tx) & BD_ENET_TX_READY); + wake = fep->tx_free >= MAX_SKB_FRAGS && + !(CBDR_SC(fep->cur_tx) & BD_ENET_TX_READY); spin_unlock_irqrestore(&fep->lock, flags); if (wake) @@ -751,11 +720,10 @@ static int fs_enet_open(struct net_device *dev) int err; /* to initialize the fep->cur_rx,... */ - /* not doing this, will cause a crash in fs_enet_rx_napi */ + /* not doing this, will cause a crash in fs_enet_napi */ fs_init_bds(fep->ndev); napi_enable(&fep->napi); - napi_enable(&fep->napi_tx); /* Install our interrupt handler. */ r = request_irq(fep->interrupt, fs_enet_interrupt, IRQF_SHARED, @@ -763,7 +731,6 @@ static int fs_enet_open(struct net_device *dev) if (r != 0) { dev_err(fep->dev, "Could not allocate FS_ENET IRQ!"); napi_disable(&fep->napi); - napi_disable(&fep->napi_tx); return -EINVAL; } @@ -771,7 +738,6 @@ static int fs_enet_open(struct net_device *dev) if (err) { free_irq(fep->interrupt, dev); napi_disable(&fep->napi); - napi_disable(&fep->napi_tx); return err; } phy_start(dev->phydev); @@ -789,7 +755,6 @@ static int fs_enet_close(struct net_device *dev) netif_stop_queue(dev); netif_carrier_off(dev); napi_disable(&fep->napi); - napi_disable(&fep->napi_tx); phy_stop(dev->phydev); spin_lock_irqsave(&fep->lock, flags); @@ -939,8 +904,8 @@ static int fs_enet_probe(struct platform_device *ofdev) fpi->cp_command = *data; } - fpi->rx_ring = 32; - fpi->tx_ring = 64; + fpi->rx_ring = RX_RING_SIZE; + fpi->tx_ring = TX_RING_SIZE; fpi->rx_copybreak = 240; fpi->napi_weight = 17; fpi->phy_node = of_parse_phandle(ofdev->dev.of_node, "phy-handle", 0); @@ -1024,8 +989,7 @@ static int fs_enet_probe(struct platform_device *ofdev) ndev->netdev_ops = &fs_enet_netdev_ops; ndev->watchdog_timeo = 2 * HZ; - netif_napi_add(ndev, &fep->napi, fs_enet_rx_napi, fpi->napi_weight); - netif_tx_napi_add(ndev, &fep->napi_tx, fs_enet_tx_napi, 2); + netif_napi_add(ndev, &fep->napi, fs_enet_napi, fpi->napi_weight); ndev->ethtool_ops = &fs_ethtool_ops; diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet.h b/drivers/net/ethernet/freescale/fs_enet/fs_enet.h index e29f54a35210..fee24c822fad 100644 --- a/drivers/net/ethernet/freescale/fs_enet/fs_enet.h +++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet.h @@ -81,12 +81,9 @@ struct fs_ops { void (*adjust_link)(struct net_device *dev); void (*restart)(struct net_device *dev); void (*stop)(struct net_device *dev); - void (*napi_clear_rx_event)(struct net_device *dev); - void (*napi_enable_rx)(struct net_device *dev); - void (*napi_disable_rx)(struct net_device *dev); - void (*napi_clear_tx_event)(struct net_device *dev); - void (*napi_enable_tx)(struct net_device *dev); - void (*napi_disable_tx)(struct net_device *dev); + void (*napi_clear_event)(struct net_device *dev); + void (*napi_enable)(struct net_device *dev); + void (*napi_disable)(struct net_device *dev); void (*rx_bd_done)(struct net_device *dev); void (*tx_kickstart)(struct net_device *dev); u32 (*get_int_events)(struct net_device *dev); @@ -122,7 +119,6 @@ struct phy_info { struct fs_enet_private { struct napi_struct napi; - struct napi_struct napi_tx; struct device *dev; /* pointer back to the device (must be initialized first) */ struct net_device *ndev; spinlock_t lock; /* during all ops except TX pckt processing */ @@ -152,10 +148,8 @@ struct fs_enet_private { int oldduplex, oldspeed, oldlink; /* current settings */ /* event masks */ - u32 ev_napi_rx; /* mask of NAPI rx events */ - u32 ev_napi_tx; /* mask of NAPI rx events */ - u32 ev_rx; /* rx event mask */ - u32 ev_tx; /* tx event mask */ + u32 ev_napi; /* mask of NAPI events */ + u32 ev; /* event mask */ u32 ev_err; /* error event mask */ u16 bd_rx_empty; /* mask of BD rx empty */ diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c b/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c index d71761a34022..7919896a9a4e 100644 --- a/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c +++ b/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c @@ -124,10 +124,8 @@ out: return ret; } -#define FCC_NAPI_RX_EVENT_MSK (FCC_ENET_RXF | FCC_ENET_RXB) -#define FCC_NAPI_TX_EVENT_MSK (FCC_ENET_TXB) -#define FCC_RX_EVENT (FCC_ENET_RXF) -#define FCC_TX_EVENT (FCC_ENET_TXB) +#define FCC_NAPI_EVENT_MSK (FCC_ENET_RXF | FCC_ENET_RXB | FCC_ENET_TXB) +#define FCC_EVENT (FCC_ENET_RXF | FCC_ENET_TXB) #define FCC_ERR_EVENT_MSK (FCC_ENET_TXE) static int setup_data(struct net_device *dev) @@ -137,10 +135,8 @@ static int setup_data(struct net_device *dev) if (do_pd_setup(fep) != 0) return -EINVAL; - fep->ev_napi_rx = FCC_NAPI_RX_EVENT_MSK; - fep->ev_napi_tx = FCC_NAPI_TX_EVENT_MSK; - fep->ev_rx = FCC_RX_EVENT; - fep->ev_tx = FCC_TX_EVENT; + fep->ev_napi = FCC_NAPI_EVENT_MSK; + fep->ev = FCC_EVENT; fep->ev_err = FCC_ERR_EVENT_MSK; return 0; @@ -424,52 +420,28 @@ static void stop(struct net_device *dev) fs_cleanup_bds(dev); } -static void napi_clear_rx_event(struct net_device *dev) +static void napi_clear_event_fs(struct net_device *dev) { struct fs_enet_private *fep = netdev_priv(dev); fcc_t __iomem *fccp = fep->fcc.fccp; - W16(fccp, fcc_fcce, FCC_NAPI_RX_EVENT_MSK); + W16(fccp, fcc_fcce, FCC_NAPI_EVENT_MSK); } -static void napi_enable_rx(struct net_device *dev) +static void napi_enable_fs(struct net_device *dev) { struct fs_enet_private *fep = netdev_priv(dev); fcc_t __iomem *fccp = fep->fcc.fccp; - S16(fccp, fcc_fccm, FCC_NAPI_RX_EVENT_MSK); + S16(fccp, fcc_fccm, FCC_NAPI_EVENT_MSK); } -static void napi_disable_rx(struct net_device *dev) +static void napi_disable_fs(struct net_device *dev) { struct fs_enet_private *fep = netdev_priv(dev); fcc_t __iomem *fccp = fep->fcc.fccp; - C16(fccp, fcc_fccm, FCC_NAPI_RX_EVENT_MSK); -} - -static void napi_clear_tx_event(struct net_device *dev) -{ - struct fs_enet_private *fep = netdev_priv(dev); - fcc_t __iomem *fccp = fep->fcc.fccp; - - W16(fccp, fcc_fcce, FCC_NAPI_TX_EVENT_MSK); -} - -static void napi_enable_tx(struct net_device *dev) -{ - struct fs_enet_private *fep = netdev_priv(dev); - fcc_t __iomem *fccp = fep->fcc.fccp; - - S16(fccp, fcc_fccm, FCC_NAPI_TX_EVENT_MSK); -} - -static void napi_disable_tx(struct net_device *dev) -{ - struct fs_enet_private *fep = netdev_priv(dev); - fcc_t __iomem *fccp = fep->fcc.fccp; - - C16(fccp, fcc_fccm, FCC_NAPI_TX_EVENT_MSK); + C16(fccp, fcc_fccm, FCC_NAPI_EVENT_MSK); } static void rx_bd_done(struct net_device *dev) @@ -595,12 +567,9 @@ const struct fs_ops fs_fcc_ops = { .set_multicast_list = set_multicast_list, .restart = restart, .stop = stop, - .napi_clear_rx_event = napi_clear_rx_event, - .napi_enable_rx = napi_enable_rx, - .napi_disable_rx = napi_disable_rx, - .napi_clear_tx_event = napi_clear_tx_event, - .napi_enable_tx = napi_enable_tx, - .napi_disable_tx = napi_disable_tx, + .napi_clear_event = napi_clear_event_fs, + .napi_enable = napi_enable_fs, + .napi_disable = napi_disable_fs, .rx_bd_done = rx_bd_done, .tx_kickstart = tx_kickstart, .get_int_events = get_int_events, diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-fec.c b/drivers/net/ethernet/freescale/fs_enet/mac-fec.c index 35a318ed3a62..21fbaaf3a5fc 100644 --- a/drivers/net/ethernet/freescale/fs_enet/mac-fec.c +++ b/drivers/net/ethernet/freescale/fs_enet/mac-fec.c @@ -109,10 +109,8 @@ static int do_pd_setup(struct fs_enet_private *fep) return 0; } -#define FEC_NAPI_RX_EVENT_MSK (FEC_ENET_RXF | FEC_ENET_RXB) -#define FEC_NAPI_TX_EVENT_MSK (FEC_ENET_TXF) -#define FEC_RX_EVENT (FEC_ENET_RXF) -#define FEC_TX_EVENT (FEC_ENET_TXF) +#define FEC_NAPI_EVENT_MSK (FEC_ENET_RXF | FEC_ENET_RXB | FEC_ENET_TXF) +#define FEC_EVENT (FEC_ENET_RXF | FEC_ENET_TXF) #define FEC_ERR_EVENT_MSK (FEC_ENET_HBERR | FEC_ENET_BABR | \ FEC_ENET_BABT | FEC_ENET_EBERR) @@ -126,10 +124,8 @@ static int setup_data(struct net_device *dev) fep->fec.hthi = 0; fep->fec.htlo = 0; - fep->ev_napi_rx = FEC_NAPI_RX_EVENT_MSK; - fep->ev_napi_tx = FEC_NAPI_TX_EVENT_MSK; - fep->ev_rx = FEC_RX_EVENT; - fep->ev_tx = FEC_TX_EVENT; + fep->ev_napi = FEC_NAPI_EVENT_MSK; + fep->ev = FEC_EVENT; fep->ev_err = FEC_ERR_EVENT_MSK; return 0; @@ -396,52 +392,28 @@ static void stop(struct net_device *dev) } } -static void napi_clear_rx_event(struct net_device *dev) +static void napi_clear_event_fs(struct net_device *dev) { struct fs_enet_private *fep = netdev_priv(dev); struct fec __iomem *fecp = fep->fec.fecp; - FW(fecp, ievent, FEC_NAPI_RX_EVENT_MSK); + FW(fecp, ievent, FEC_NAPI_EVENT_MSK); } -static void napi_enable_rx(struct net_device *dev) +static void napi_enable_fs(struct net_device *dev) { struct fs_enet_private *fep = netdev_priv(dev); struct fec __iomem *fecp = fep->fec.fecp; - FS(fecp, imask, FEC_NAPI_RX_EVENT_MSK); + FS(fecp, imask, FEC_NAPI_EVENT_MSK); } -static void napi_disable_rx(struct net_device *dev) +static void napi_disable_fs(struct net_device *dev) { struct fs_enet_private *fep = netdev_priv(dev); struct fec __iomem *fecp = fep->fec.fecp; - FC(fecp, imask, FEC_NAPI_RX_EVENT_MSK); -} - -static void napi_clear_tx_event(struct net_device *dev) -{ - struct fs_enet_private *fep = netdev_priv(dev); - struct fec __iomem *fecp = fep->fec.fecp; - - FW(fecp, ievent, FEC_NAPI_TX_EVENT_MSK); -} - -static void napi_enable_tx(struct net_device *dev) -{ - struct fs_enet_private *fep = netdev_priv(dev); - struct fec __iomem *fecp = fep->fec.fecp; - - FS(fecp, imask, FEC_NAPI_TX_EVENT_MSK); -} - -static void napi_disable_tx(struct net_device *dev) -{ - struct fs_enet_private *fep = netdev_priv(dev); - struct fec __iomem *fecp = fep->fec.fecp; - - FC(fecp, imask, FEC_NAPI_TX_EVENT_MSK); + FC(fecp, imask, FEC_NAPI_EVENT_MSK); } static void rx_bd_done(struct net_device *dev) @@ -513,12 +485,9 @@ const struct fs_ops fs_fec_ops = { .set_multicast_list = set_multicast_list, .restart = restart, .stop = stop, - .napi_clear_rx_event = napi_clear_rx_event, - .napi_enable_rx = napi_enable_rx, - .napi_disable_rx = napi_disable_rx, - .napi_clear_tx_event = napi_clear_tx_event, - .napi_enable_tx = napi_enable_tx, - .napi_disable_tx = napi_disable_tx, + .napi_clear_event = napi_clear_event_fs, + .napi_enable = napi_enable_fs, + .napi_disable = napi_disable_fs, .rx_bd_done = rx_bd_done, .tx_kickstart = tx_kickstart, .get_int_events = get_int_events, diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-scc.c b/drivers/net/ethernet/freescale/fs_enet/mac-scc.c index e8b9c33d35b4..9d52e1ec0052 100644 --- a/drivers/net/ethernet/freescale/fs_enet/mac-scc.c +++ b/drivers/net/ethernet/freescale/fs_enet/mac-scc.c @@ -115,10 +115,8 @@ static int do_pd_setup(struct fs_enet_private *fep) return 0; } -#define SCC_NAPI_RX_EVENT_MSK (SCCE_ENET_RXF | SCCE_ENET_RXB) -#define SCC_NAPI_TX_EVENT_MSK (SCCE_ENET_TXB) -#define SCC_RX_EVENT (SCCE_ENET_RXF) -#define SCC_TX_EVENT (SCCE_ENET_TXB) +#define SCC_NAPI_EVENT_MSK (SCCE_ENET_RXF | SCCE_ENET_RXB | SCCE_ENET_TXB) +#define SCC_EVENT (SCCE_ENET_RXF | SCCE_ENET_TXB) #define SCC_ERR_EVENT_MSK (SCCE_ENET_TXE | SCCE_ENET_BSY) static int setup_data(struct net_device *dev) @@ -130,10 +128,8 @@ static int setup_data(struct net_device *dev) fep->scc.hthi = 0; fep->scc.htlo = 0; - fep->ev_napi_rx = SCC_NAPI_RX_EVENT_MSK; - fep->ev_napi_tx = SCC_NAPI_TX_EVENT_MSK; - fep->ev_rx = SCC_RX_EVENT; - fep->ev_tx = SCC_TX_EVENT | SCCE_ENET_TXE; + fep->ev_napi = SCC_NAPI_EVENT_MSK; + fep->ev = SCC_EVENT | SCCE_ENET_TXE; fep->ev_err = SCC_ERR_EVENT_MSK; return 0; @@ -379,52 +375,28 @@ static void stop(struct net_device *dev) fs_cleanup_bds(dev); } -static void napi_clear_rx_event(struct net_device *dev) +static void napi_clear_event_fs(struct net_device *dev) { struct fs_enet_private *fep = netdev_priv(dev); scc_t __iomem *sccp = fep->scc.sccp; - W16(sccp, scc_scce, SCC_NAPI_RX_EVENT_MSK); + W16(sccp, scc_scce, SCC_NAPI_EVENT_MSK); } -static void napi_enable_rx(struct net_device *dev) +static void napi_enable_fs(struct net_device *dev) { struct fs_enet_private *fep = netdev_priv(dev); scc_t __iomem *sccp = fep->scc.sccp; - S16(sccp, scc_sccm, SCC_NAPI_RX_EVENT_MSK); + S16(sccp, scc_sccm, SCC_NAPI_EVENT_MSK); } -static void napi_disable_rx(struct net_device *dev) +static void napi_disable_fs(struct net_device *dev) { struct fs_enet_private *fep = netdev_priv(dev); scc_t __iomem *sccp = fep->scc.sccp; - C16(sccp, scc_sccm, SCC_NAPI_RX_EVENT_MSK); -} - -static void napi_clear_tx_event(struct net_device *dev) -{ - struct fs_enet_private *fep = netdev_priv(dev); - scc_t __iomem *sccp = fep->scc.sccp; - - W16(sccp, scc_scce, SCC_NAPI_TX_EVENT_MSK); -} - -static void napi_enable_tx(struct net_device *dev) -{ - struct fs_enet_private *fep = netdev_priv(dev); - scc_t __iomem *sccp = fep->scc.sccp; - - S16(sccp, scc_sccm, SCC_NAPI_TX_EVENT_MSK); -} - -static void napi_disable_tx(struct net_device *dev) -{ - struct fs_enet_private *fep = netdev_priv(dev); - scc_t __iomem *sccp = fep->scc.sccp; - - C16(sccp, scc_sccm, SCC_NAPI_TX_EVENT_MSK); + C16(sccp, scc_sccm, SCC_NAPI_EVENT_MSK); } static void rx_bd_done(struct net_device *dev) @@ -497,12 +469,9 @@ const struct fs_ops fs_scc_ops = { .set_multicast_list = set_multicast_list, .restart = restart, .stop = stop, - .napi_clear_rx_event = napi_clear_rx_event, - .napi_enable_rx = napi_enable_rx, - .napi_disable_rx = napi_disable_rx, - .napi_clear_tx_event = napi_clear_tx_event, - .napi_enable_tx = napi_enable_tx, - .napi_disable_tx = napi_disable_tx, + .napi_clear_event = napi_clear_event_fs, + .napi_enable = napi_enable_fs, + .napi_disable = napi_disable_fs, .rx_bd_done = rx_bd_done, .tx_kickstart = tx_kickstart, .get_int_events = get_int_events, From 070e1f01827c658b76bef6e3fa79046b4e4a7693 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 9 Sep 2016 14:26:23 +0200 Subject: [PATCH 0879/1715] net: fs_enet: don't unmap DMA when packet len is below copybreak When the length of the packet is below the defined copybreak limit, the received packet is copied into a newly allocated skb in order to reuse the skb. This is only interesting if it allow us to avoid a new DMA mapping. We shall therefore not DMA unmap and remap the skb->data. Instead, we invalidate the cache with dma_sync_single_for_cpu() once the received data has been copied into the new skb. The following measures have been obtained on a mpc885 running at 132Mhz. Measurement is done using the timebase with packets sent to the target with 'ping -s 1' (packet len is 60): * Without this patch: 182 TB ticks * With this patch: 143 TB ticks As a comparison, if we set the copybreak limit to 0, then we get 148 TB ticks. It means that without this patch, duration is even worse when copying received data to a new skb instead of allocating a new skb for next packet to be received Signed-off-by: Christophe Leroy Signed-off-by: David S. Miller --- .../ethernet/freescale/fs_enet/fs_enet-main.c | 36 ++++++++++--------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c index 37574a97a61c..f2a60cd1feed 100644 --- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c +++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c @@ -226,21 +226,10 @@ static int fs_enet_napi(struct napi_struct *napi, int budget) if (sc & BD_ENET_RX_OV) fep->stats.rx_crc_errors++; - skb = fep->rx_skbuff[curidx]; - - dma_unmap_single(fep->dev, CBDR_BUFADDR(bdp), - L1_CACHE_ALIGN(PKT_MAXBUF_SIZE), - DMA_FROM_DEVICE); - - skbn = skb; - + skbn = fep->rx_skbuff[curidx]; } else { skb = fep->rx_skbuff[curidx]; - dma_unmap_single(fep->dev, CBDR_BUFADDR(bdp), - L1_CACHE_ALIGN(PKT_MAXBUF_SIZE), - DMA_FROM_DEVICE); - /* * Process the incoming frame. */ @@ -256,12 +245,30 @@ static int fs_enet_napi(struct napi_struct *napi, int budget) skb_copy_from_linear_data(skb, skbn->data, pkt_len); swap(skb, skbn); + dma_sync_single_for_cpu(fep->dev, + CBDR_BUFADDR(bdp), + L1_CACHE_ALIGN(pkt_len), + DMA_FROM_DEVICE); } } else { skbn = netdev_alloc_skb(dev, ENET_RX_FRSIZE); - if (skbn) + if (skbn) { + dma_addr_t dma; + skb_align(skbn, ENET_RX_ALIGN); + + dma_unmap_single(fep->dev, + CBDR_BUFADDR(bdp), + L1_CACHE_ALIGN(PKT_MAXBUF_SIZE), + DMA_FROM_DEVICE); + + dma = dma_map_single(fep->dev, + skbn->data, + L1_CACHE_ALIGN(PKT_MAXBUF_SIZE), + DMA_FROM_DEVICE); + CBDW_BUFADDR(bdp, dma); + } } if (skbn != NULL) { @@ -276,9 +283,6 @@ static int fs_enet_napi(struct napi_struct *napi, int budget) } fep->rx_skbuff[curidx] = skbn; - CBDW_BUFADDR(bdp, dma_map_single(fep->dev, skbn->data, - L1_CACHE_ALIGN(PKT_MAXBUF_SIZE), - DMA_FROM_DEVICE)); CBDW_DATLEN(bdp, 0); CBDW_SC(bdp, (sc & ~BD_ENET_RX_STATS) | BD_ENET_RX_EMPTY); From b0ba357bfb463bb10fe486c99c5dff892fa207fa Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 9 Sep 2016 14:26:25 +0200 Subject: [PATCH 0880/1715] net: fs_enet: make rx_copybreak value configurable Measurement shows that on a MPC8xx running at 132MHz, the optimal limit is 112: * 114 bytes packets are processed in 147 TB ticks with higher copybreak * 114 bytes packets are processed in 148 TB ticks with lower copybreak * 128 bytes packets are processed in 154 TB ticks with higher copybreak * 128 bytes packets are processed in 148 TB ticks with lower copybreak * 238 bytes packets are processed in 172 TB ticks with higher copybreak * 238 bytes packets are processed in 148 TB ticks with lower copybreak However it might be different on other processors and/or frequencies. So it is useful to make it configurable. Signed-off-by: Christophe Leroy Signed-off-by: David S. Miller --- .../ethernet/freescale/fs_enet/fs_enet-main.c | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c index f2a60cd1feed..dc120c148d97 100644 --- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c +++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c @@ -830,6 +830,44 @@ static void fs_set_msglevel(struct net_device *dev, u32 value) fep->msg_enable = value; } +static int fs_get_tunable(struct net_device *dev, + const struct ethtool_tunable *tuna, void *data) +{ + struct fs_enet_private *fep = netdev_priv(dev); + struct fs_platform_info *fpi = fep->fpi; + int ret = 0; + + switch (tuna->id) { + case ETHTOOL_RX_COPYBREAK: + *(u32 *)data = fpi->rx_copybreak; + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static int fs_set_tunable(struct net_device *dev, + const struct ethtool_tunable *tuna, const void *data) +{ + struct fs_enet_private *fep = netdev_priv(dev); + struct fs_platform_info *fpi = fep->fpi; + int ret = 0; + + switch (tuna->id) { + case ETHTOOL_RX_COPYBREAK: + fpi->rx_copybreak = *(u32 *)data; + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + static const struct ethtool_ops fs_ethtool_ops = { .get_drvinfo = fs_get_drvinfo, .get_regs_len = fs_get_regs_len, @@ -841,6 +879,8 @@ static const struct ethtool_ops fs_ethtool_ops = { .get_ts_info = ethtool_op_get_ts_info, .get_link_ksettings = phy_ethtool_get_link_ksettings, .set_link_ksettings = phy_ethtool_set_link_ksettings, + .get_tunable = fs_get_tunable, + .set_tunable = fs_set_tunable, }; static int fs_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) From a73ec314a0d28cdbc29b4e4ad10871df0829986d Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Fri, 9 Sep 2016 08:43:13 -0400 Subject: [PATCH 0881/1715] appletalk: use IS_ENABLED() instead of checking for built-in or module The IS_ENABLED() macro checks if a Kconfig symbol has been enabled either built-in or as a module, use that macro instead of open coding the same. Using the macro makes the code more readable by helping abstract away some of the Kconfig built-in and module enable details. Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller --- net/appletalk/ddp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index f066781be3c8..10d2bdce686e 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1278,7 +1278,7 @@ out: return err; } -#if defined(CONFIG_IPDDP) || defined(CONFIG_IPDDP_MODULE) +#if IS_ENABLED(CONFIG_IPDDP) static __inline__ int is_ip_over_ddp(struct sk_buff *skb) { return skb->data[12] == 22; From 9a81c34ace3598188f633d4654a2a57b7f7a2c2a Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Fri, 9 Sep 2016 08:43:14 -0400 Subject: [PATCH 0882/1715] lec: use IS_ENABLED() instead of checking for built-in or module The IS_ENABLED() macro checks if a Kconfig symbol has been enabled either built-in or as a module, use that macro instead of open coding the same. Using the macro makes the code more readable by helping abstract away some of the Kconfig built-in and module enable details. Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller --- net/atm/lec.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/atm/lec.c b/net/atm/lec.c index e574a7e9db6f..5d2693826afb 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -31,7 +31,7 @@ #include /* Proxy LEC knows about bridging */ -#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) +#if IS_ENABLED(CONFIG_BRIDGE) #include "../bridge/br_private.h" static unsigned char bridge_ula_lec[] = { 0x01, 0x80, 0xc2, 0x00, 0x00 }; @@ -121,7 +121,7 @@ static unsigned char bus_mac[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; /* Device structures */ static struct net_device *dev_lec[MAX_LEC_ITF]; -#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) +#if IS_ENABLED(CONFIG_BRIDGE) static void lec_handle_bridge(struct sk_buff *skb, struct net_device *dev) { char *buff; @@ -155,7 +155,7 @@ static void lec_handle_bridge(struct sk_buff *skb, struct net_device *dev) sk->sk_data_ready(sk); } } -#endif /* defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) */ +#endif /* IS_ENABLED(CONFIG_BRIDGE) */ /* * Open/initialize the netdevice. This is called (in the current kernel) @@ -222,7 +222,7 @@ static netdev_tx_t lec_start_xmit(struct sk_buff *skb, pr_debug("skbuff head:%lx data:%lx tail:%lx end:%lx\n", (long)skb->head, (long)skb->data, (long)skb_tail_pointer(skb), (long)skb_end_pointer(skb)); -#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) +#if IS_ENABLED(CONFIG_BRIDGE) if (memcmp(skb->data, bridge_ula_lec, sizeof(bridge_ula_lec)) == 0) lec_handle_bridge(skb, dev); #endif @@ -426,7 +426,7 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb) (unsigned short)(0xffff & mesg->content.normal.flag); break; case l_should_bridge: -#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) +#if IS_ENABLED(CONFIG_BRIDGE) { pr_debug("%s: bridge zeppelin asks about %pM\n", dev->name, mesg->content.proxy.mac_addr); @@ -452,7 +452,7 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb) sk->sk_data_ready(sk); } } -#endif /* defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) */ +#endif /* IS_ENABLED(CONFIG_BRIDGE) */ break; default: pr_info("%s: Unknown message type %d\n", dev->name, mesg->type); From 181402a5c7899fad945485130ded47ca2bf1161e Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Fri, 9 Sep 2016 08:43:15 -0400 Subject: [PATCH 0883/1715] net: use IS_ENABLED() instead of checking for built-in or module The IS_ENABLED() macro checks if a Kconfig symbol has been enabled either built-in or as a module, use that macro instead of open coding the same. Using the macro makes the code more readable by helping abstract away some of the Kconfig built-in and module enable details. Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller --- net/core/dev.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 34b5322bc081..b0d307b6af19 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3904,8 +3904,7 @@ static void net_tx_action(struct softirq_action *h) } } -#if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \ - (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)) +#if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_ATM_LANE) /* This hook is defined here for ATM LANE */ int (*br_fdb_test_addr_hook)(struct net_device *dev, unsigned char *addr) __read_mostly; From 6ca40d4e8463c53e6b778010b9331268865725a6 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Fri, 9 Sep 2016 08:43:16 -0400 Subject: [PATCH 0884/1715] ipv4: use IS_ENABLED() instead of checking for built-in or module The IS_ENABLED() macro checks if a Kconfig symbol has been enabled either built-in or as a module, use that macro instead of open coding the same. Using the macro makes the code more readable by helping abstract away some of the Kconfig built-in and module enable details. Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 65569274efb8..b913f5bf0757 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -490,7 +490,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) to->tc_index = from->tc_index; #endif nf_copy(to, from); -#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) +#if IS_ENABLED(CONFIG_IP_VS) to->ipvs_property = from->ipvs_property; #endif skb_copy_secmark(to, from); From 9dd79945b0f846ca5282c7df7ecf3823f0243898 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Fri, 9 Sep 2016 08:43:17 -0400 Subject: [PATCH 0885/1715] l2tp: use IS_ENABLED() instead of checking for built-in or module The IS_ENABLED() macro checks if a Kconfig symbol has been enabled either built-in or as a module, use that macro instead of open coding the same. Using the macro makes the code more readable by helping abstract away some of the Kconfig built-in and module enable details. Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.h | 2 +- net/l2tp/l2tp_eth.c | 4 ++-- net/l2tp/l2tp_ppp.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 5871537af387..2599af6378e4 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -139,7 +139,7 @@ struct l2tp_session { void (*session_close)(struct l2tp_session *session); void (*ref)(struct l2tp_session *session); void (*deref)(struct l2tp_session *session); -#if defined(CONFIG_L2TP_DEBUGFS) || defined(CONFIG_L2TP_DEBUGFS_MODULE) +#if IS_ENABLED(CONFIG_L2TP_DEBUGFS) void (*show)(struct seq_file *m, void *priv); #endif uint8_t priv[0]; /* private data */ diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index 57fc5a46ce06..ef2cd30ca06e 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -195,7 +195,7 @@ static void l2tp_eth_delete(struct l2tp_session *session) } } -#if defined(CONFIG_L2TP_DEBUGFS) || defined(CONFIG_L2TP_DEBUGFS_MODULE) +#if IS_ENABLED(CONFIG_L2TP_DEBUGFS) static void l2tp_eth_show(struct seq_file *m, void *arg) { struct l2tp_session *session = arg; @@ -268,7 +268,7 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p priv->tunnel_sock = tunnel->sock; session->recv_skb = l2tp_eth_dev_recv; session->session_close = l2tp_eth_delete; -#if defined(CONFIG_L2TP_DEBUGFS) || defined(CONFIG_L2TP_DEBUGFS_MODULE) +#if IS_ENABLED(CONFIG_L2TP_DEBUGFS) session->show = l2tp_eth_show; #endif diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 34eff77982cf..41d47bfda15c 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -552,7 +552,7 @@ out: return error; } -#if defined(CONFIG_L2TP_DEBUGFS) || defined(CONFIG_L2TP_DEBUGFS_MODULE) +#if IS_ENABLED(CONFIG_L2TP_DEBUGFS) static void pppol2tp_show(struct seq_file *m, void *arg) { struct l2tp_session *session = arg; @@ -723,7 +723,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, session->recv_skb = pppol2tp_recv; session->session_close = pppol2tp_session_close; -#if defined(CONFIG_L2TP_DEBUGFS) || defined(CONFIG_L2TP_DEBUGFS_MODULE) +#if IS_ENABLED(CONFIG_L2TP_DEBUGFS) session->show = pppol2tp_show; #endif From 0013de38a829db3f83a36e3e178ff386eb589c51 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Fri, 9 Sep 2016 08:43:18 -0400 Subject: [PATCH 0886/1715] net: sched: use IS_ENABLED() instead of checking for built-in or module The IS_ENABLED() macro checks if a Kconfig symbol has been enabled either built-in or as a module, use that macro instead of open coding the same. Using the macro makes the code more readable by helping abstract away some of the Kconfig built-in and module enable details. Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller --- net/sched/cls_flow.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 2c1ae549edbf..a379bae1d74e 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -29,7 +29,7 @@ #include #include -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#if IS_ENABLED(CONFIG_NF_CONNTRACK) #include #endif @@ -125,14 +125,14 @@ static u32 flow_get_mark(const struct sk_buff *skb) static u32 flow_get_nfct(const struct sk_buff *skb) { -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#if IS_ENABLED(CONFIG_NF_CONNTRACK) return addr_fold(skb->nfct); #else return 0; #endif } -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#if IS_ENABLED(CONFIG_NF_CONNTRACK) #define CTTUPLE(skb, member) \ ({ \ enum ip_conntrack_info ctinfo; \ From aebf5de07aabd44db740c9d33b6daa1abd19fa56 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Fri, 9 Sep 2016 08:43:19 -0400 Subject: [PATCH 0887/1715] sctp: use IS_ENABLED() instead of checking for built-in or module The IS_ENABLED() macro checks if a Kconfig symbol has been enabled either built-in or as a module, use that macro instead of open coding the same. Using the macro makes the code more readable by helping abstract away some of the Kconfig built-in and module enable details. Signed-off-by: Javier Martinez Canillas Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/auth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/auth.c b/net/sctp/auth.c index 912eb1685a5d..f99d4855d3de 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -48,7 +48,7 @@ static struct sctp_hmac sctp_hmac_list[SCTP_AUTH_NUM_HMACS] = { /* id 2 is reserved as well */ .hmac_id = SCTP_AUTH_HMAC_ID_RESERVED_2, }, -#if defined (CONFIG_CRYPTO_SHA256) || defined (CONFIG_CRYPTO_SHA256_MODULE) +#if IS_ENABLED(CONFIG_CRYPTO_SHA256) { .hmac_id = SCTP_AUTH_HMAC_ID_SHA256, .hmac_name = "hmac(sha256)", From 65b323e2ffbb05db4136ee822e08a9b0ec6ac716 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Fri, 9 Sep 2016 08:43:20 -0400 Subject: [PATCH 0888/1715] xfrm: use IS_ENABLED() instead of checking for built-in or module The IS_ENABLED() macro checks if a Kconfig symbol has been enabled either built-in or as a module, use that macro instead of open coding the same. Using the macro makes the code more readable by helping abstract away some of the Kconfig built-in and module enable details. Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller --- net/xfrm/xfrm_algo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 250e567ba3d6..44ac85fe2bc9 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -17,7 +17,7 @@ #include #include #include -#if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE) +#if IS_ENABLED(CONFIG_INET_ESP) || IS_ENABLED(CONFIG_INET6_ESP) #include #endif From d62292e85028e553943a285cb6006de0f17dea1e Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Fri, 9 Sep 2016 17:35:17 +0300 Subject: [PATCH 0889/1715] net/mlx5: Skip waiting for vf pages in internal error In case of device in internal error state there is no need to wait for vf pages since they will be reclaimed manually later in the unload flow. Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index 673a7c96479a..d4585154151d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -326,6 +326,7 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev, { struct fw_page *fwp; struct rb_node *p; + u32 func_id; u32 npages; u32 i = 0; @@ -334,12 +335,16 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev, /* No hard feelings, we want our pages back! */ npages = MLX5_GET(manage_pages_in, in, input_num_entries); + func_id = MLX5_GET(manage_pages_in, in, function_id); p = rb_first(&dev->priv.page_root); while (p && i < npages) { fwp = rb_entry(p, struct fw_page, rb_node); - MLX5_SET64(manage_pages_out, out, pas[i], fwp->addr); p = rb_next(p); + if (fwp->func_id != func_id) + continue; + + MLX5_SET64(manage_pages_out, out, pas[i], fwp->addr); i++; } @@ -540,6 +545,12 @@ int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev) unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS); int prev_vfs_pages = dev->priv.vfs_pages; + /* In case of internal error we will free the pages manually later */ + if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + mlx5_core_warn(dev, "Skipping wait for vf pages stage"); + return 0; + } + mlx5_core_dbg(dev, "Waiting for %d pages from %s\n", prev_vfs_pages, dev->priv.name); while (dev->priv.vfs_pages) { From 6b6adee3dad25bbe568ee24fc843372d02fb425f Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Fri, 9 Sep 2016 17:35:18 +0300 Subject: [PATCH 0890/1715] net/mlx5: SRIOV core code refactoring Simplify the code and makes it look modular and symmetric. Split sriov enable/disable to two levels: device level and pci level. When user enable/disable sriov (via sriov_configure driver callback) we will enable/disable both device and pci sriov. When driver load/unload we will enable/disable (on demand) only device sriov while keeping the PCI sriov enabled for next driver load. On internal/pci error, VFs will be kept enabled on PCI and the reset is done only in device level. Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/main.c | 12 +- .../ethernet/mellanox/mlx5/core/mlx5_core.h | 2 + .../net/ethernet/mellanox/mlx5/core/sriov.c | 252 ++++++++---------- include/linux/mlx5/driver.h | 2 - 4 files changed, 119 insertions(+), 149 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index c132ef1faefe..baba53fb58b7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1180,8 +1180,7 @@ out: return 0; err_sriov: - if (mlx5_sriov_cleanup(dev)) - dev_err(&dev->pdev->dev, "sriov cleanup failed\n"); + mlx5_sriov_cleanup(dev); #ifdef CONFIG_MLX5_CORE_EN mlx5_eswitch_cleanup(dev->priv.eswitch); @@ -1241,19 +1240,14 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) { int err = 0; - err = mlx5_sriov_cleanup(dev); - if (err) { - dev_warn(&dev->pdev->dev, "%s: sriov cleanup failed - abort\n", - __func__); - return err; - } - mutex_lock(&dev->intf_state_mutex); if (test_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state)) { dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n", __func__); goto out; } + + mlx5_sriov_cleanup(dev); mlx5_unregister_device(dev); #ifdef CONFIG_MLX5_CORE_EN mlx5_eswitch_cleanup(dev->priv.eswitch); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 714b71bed2be..7dd14cf73181 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -89,6 +89,8 @@ void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, unsigned long param); void mlx5_enter_error_state(struct mlx5_core_dev *dev); void mlx5_disable_device(struct mlx5_core_dev *dev); +int mlx5_sriov_init(struct mlx5_core_dev *dev); +void mlx5_sriov_cleanup(struct mlx5_core_dev *dev); int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs); bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev); int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index 78e789245183..72a8215a2d02 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -44,108 +44,132 @@ bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev) return !!sriov->num_vfs; } -static void enable_vfs(struct mlx5_core_dev *dev, int num_vfs) +static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) { struct mlx5_core_sriov *sriov = &dev->priv.sriov; int err; int vf; - for (vf = 1; vf <= num_vfs; vf++) { - err = mlx5_core_enable_hca(dev, vf); - if (err) { - mlx5_core_warn(dev, "failed to enable VF %d\n", vf - 1); - } else { - sriov->vfs_ctx[vf - 1].enabled = 1; - mlx5_core_dbg(dev, "successfully enabled VF %d\n", vf - 1); - } + if (sriov->enabled_vfs) { + mlx5_core_warn(dev, + "failed to enable SRIOV on device, already enabled with %d vfs\n", + sriov->enabled_vfs); + return -EBUSY; } -} -static void disable_vfs(struct mlx5_core_dev *dev, int num_vfs) -{ - struct mlx5_core_sriov *sriov = &dev->priv.sriov; - int vf; - - for (vf = 1; vf <= num_vfs; vf++) { - if (sriov->vfs_ctx[vf - 1].enabled) { - if (mlx5_core_disable_hca(dev, vf)) - mlx5_core_warn(dev, "failed to disable VF %d\n", vf - 1); - else - sriov->vfs_ctx[vf - 1].enabled = 0; - } - } -} - -static int mlx5_core_create_vfs(struct pci_dev *pdev, int num_vfs) -{ - struct mlx5_core_dev *dev = pci_get_drvdata(pdev); - int err; - - if (pci_num_vf(pdev)) - pci_disable_sriov(pdev); - - enable_vfs(dev, num_vfs); - - err = pci_enable_sriov(pdev, num_vfs); +#ifdef CONFIG_MLX5_CORE_EN + err = mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs, SRIOV_LEGACY); if (err) { - dev_warn(&pdev->dev, "enable sriov failed %d\n", err); - goto ex; + mlx5_core_warn(dev, + "failed to enable eswitch SRIOV (%d)\n", err); + return err; + } +#endif + + for (vf = 0; vf < num_vfs; vf++) { + err = mlx5_core_enable_hca(dev, vf + 1); + if (err) { + mlx5_core_warn(dev, "failed to enable VF %d (%d)\n", vf, err); + continue; + } + sriov->vfs_ctx[vf].enabled = 1; + sriov->enabled_vfs++; + mlx5_core_dbg(dev, "successfully enabled VF* %d\n", vf); + } return 0; +} + +static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + int err; + int vf; + + if (!sriov->enabled_vfs) + return; + + for (vf = 0; vf < sriov->num_vfs; vf++) { + if (!sriov->vfs_ctx[vf].enabled) + continue; + err = mlx5_core_disable_hca(dev, vf + 1); + if (err) { + mlx5_core_warn(dev, "failed to disable VF %d\n", vf); + continue; + } + sriov->vfs_ctx[vf].enabled = 0; + sriov->enabled_vfs--; + } + +#ifdef CONFIG_MLX5_CORE_EN + mlx5_eswitch_disable_sriov(dev->priv.eswitch); +#endif + + if (mlx5_wait_for_vf_pages(dev)) + mlx5_core_warn(dev, "timeout reclaiming VFs pages\n"); +} + +static int mlx5_pci_enable_sriov(struct pci_dev *pdev, int num_vfs) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + int err = 0; + + if (pci_num_vf(pdev)) { + mlx5_core_warn(dev, "Unable to enable pci sriov, already enabled\n"); + return -EBUSY; + } + + err = pci_enable_sriov(pdev, num_vfs); + if (err) + mlx5_core_warn(dev, "pci_enable_sriov failed : %d\n", err); -ex: - disable_vfs(dev, num_vfs); return err; } -static int mlx5_core_sriov_enable(struct pci_dev *pdev, int num_vfs) +static void mlx5_pci_disable_sriov(struct pci_dev *pdev) +{ + pci_disable_sriov(pdev); +} + +static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); struct mlx5_core_sriov *sriov = &dev->priv.sriov; - int err; + int err = 0; - kfree(sriov->vfs_ctx); - sriov->vfs_ctx = kcalloc(num_vfs, sizeof(*sriov->vfs_ctx), GFP_ATOMIC); - if (!sriov->vfs_ctx) - return -ENOMEM; - - sriov->enabled_vfs = num_vfs; - err = mlx5_core_create_vfs(pdev, num_vfs); + err = mlx5_device_enable_sriov(dev, num_vfs); if (err) { - kfree(sriov->vfs_ctx); - sriov->vfs_ctx = NULL; + mlx5_core_warn(dev, "mlx5_device_enable_sriov failed : %d\n", err); return err; } + err = mlx5_pci_enable_sriov(pdev, num_vfs); + if (err) { + mlx5_core_warn(dev, "mlx5_pci_enable_sriov failed : %d\n", err); + mlx5_device_disable_sriov(dev); + return err; + } + + sriov->num_vfs = num_vfs; + return 0; } -static void mlx5_core_init_vfs(struct mlx5_core_dev *dev, int num_vfs) +static void mlx5_sriov_disable(struct pci_dev *pdev) { + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); struct mlx5_core_sriov *sriov = &dev->priv.sriov; - sriov->num_vfs = num_vfs; -} - -static void mlx5_core_cleanup_vfs(struct mlx5_core_dev *dev) -{ - struct mlx5_core_sriov *sriov; - - sriov = &dev->priv.sriov; - disable_vfs(dev, sriov->num_vfs); - - if (mlx5_wait_for_vf_pages(dev)) - mlx5_core_warn(dev, "timeout claiming VFs pages\n"); - + mlx5_pci_disable_sriov(pdev); + mlx5_device_disable_sriov(dev); sriov->num_vfs = 0; } int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); - struct mlx5_core_sriov *sriov = &dev->priv.sriov; - int err; + int err = 0; mlx5_core_dbg(dev, "requested num_vfs %d\n", num_vfs); if (!mlx5_core_is_pf(dev)) @@ -156,92 +180,44 @@ int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) return -EINVAL; } - mlx5_core_cleanup_vfs(dev); + if (num_vfs) + err = mlx5_sriov_enable(pdev, num_vfs); + else + mlx5_sriov_disable(pdev); - if (!num_vfs) { -#ifdef CONFIG_MLX5_CORE_EN - mlx5_eswitch_disable_sriov(dev->priv.eswitch); -#endif - kfree(sriov->vfs_ctx); - sriov->vfs_ctx = NULL; - if (!pci_vfs_assigned(pdev)) - pci_disable_sriov(pdev); - else - mlx5_core_info(dev, "unloading PF driver while leaving orphan VFs\n"); - return 0; - } - - err = mlx5_core_sriov_enable(pdev, num_vfs); - if (err) { - mlx5_core_warn(dev, "mlx5_core_sriov_enable failed %d\n", err); - return err; - } - - mlx5_core_init_vfs(dev, num_vfs); -#ifdef CONFIG_MLX5_CORE_EN - mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs, SRIOV_LEGACY); -#endif - - return num_vfs; -} - -static int sync_required(struct pci_dev *pdev) -{ - struct mlx5_core_dev *dev = pci_get_drvdata(pdev); - struct mlx5_core_sriov *sriov = &dev->priv.sriov; - int cur_vfs = pci_num_vf(pdev); - - if (cur_vfs != sriov->num_vfs) { - mlx5_core_warn(dev, "current VFs %d, registered %d - sync needed\n", - cur_vfs, sriov->num_vfs); - return 1; - } - - return 0; + return err ? err : num_vfs; } int mlx5_sriov_init(struct mlx5_core_dev *dev) { struct mlx5_core_sriov *sriov = &dev->priv.sriov; struct pci_dev *pdev = dev->pdev; - int cur_vfs; + int total_vfs; if (!mlx5_core_is_pf(dev)) return 0; - if (!sync_required(dev->pdev)) - return 0; - - cur_vfs = pci_num_vf(pdev); - sriov->vfs_ctx = kcalloc(cur_vfs, sizeof(*sriov->vfs_ctx), GFP_KERNEL); + total_vfs = pci_sriov_get_totalvfs(pdev); + sriov->num_vfs = pci_num_vf(pdev); + sriov->vfs_ctx = kcalloc(total_vfs, sizeof(*sriov->vfs_ctx), GFP_KERNEL); if (!sriov->vfs_ctx) return -ENOMEM; - sriov->enabled_vfs = cur_vfs; - - mlx5_core_init_vfs(dev, cur_vfs); -#ifdef CONFIG_MLX5_CORE_EN - if (cur_vfs) - mlx5_eswitch_enable_sriov(dev->priv.eswitch, cur_vfs, - SRIOV_LEGACY); -#endif - - enable_vfs(dev, cur_vfs); - - return 0; -} - -int mlx5_sriov_cleanup(struct mlx5_core_dev *dev) -{ - struct pci_dev *pdev = dev->pdev; - int err; - - if (!mlx5_core_is_pf(dev)) + /* If sriov VFs exist in PCI level, enable them in device level */ + if (!sriov->num_vfs) return 0; - err = mlx5_core_sriov_configure(pdev, 0); - if (err) - return err; - + mlx5_device_enable_sriov(dev, sriov->num_vfs); return 0; } + +void mlx5_sriov_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + + if (!mlx5_core_is_pf(dev)) + return; + + mlx5_device_disable_sriov(dev); + kfree(sriov->vfs_ctx); +} diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 5cb9fa7aec61..0d7aedfce1d7 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -828,8 +828,6 @@ void mlx5_pagealloc_init(struct mlx5_core_dev *dev); void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev); int mlx5_pagealloc_start(struct mlx5_core_dev *dev); void mlx5_pagealloc_stop(struct mlx5_core_dev *dev); -int mlx5_sriov_init(struct mlx5_core_dev *dev); -int mlx5_sriov_cleanup(struct mlx5_core_dev *dev); void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, s32 npages); int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot); From 737a234bb6384800a5b632be85c6b0ad6221d137 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Fri, 9 Sep 2016 17:35:19 +0300 Subject: [PATCH 0891/1715] net/mlx5: Introduce attach/detach to interface API Add attach/detach callbacks to interface API. This is crucial for implementing seamless reset flow which releases the hardware and it's resources upon detach while keeping software structures and state (e.g netdev) then reset and reallocate the hardware needed resources upon attach. Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/main.c | 149 +++++++++++++++--- include/linux/mlx5/driver.h | 2 + 2 files changed, 131 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index baba53fb58b7..108d8f2e73e1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -81,6 +81,7 @@ struct mlx5_device_context { struct list_head list; struct mlx5_interface *intf; void *context; + unsigned long state; }; enum { @@ -778,6 +779,11 @@ static int mlx5_core_set_issi(struct mlx5_core_dev *dev) return -ENOTSUPP; } +enum { + MLX5_INTERFACE_ADDED, + MLX5_INTERFACE_ATTACHED, +}; + static void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) { struct mlx5_device_context *dev_ctx; @@ -786,12 +792,15 @@ static void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) if (!mlx5_lag_intf_add(intf, priv)) return; - dev_ctx = kmalloc(sizeof(*dev_ctx), GFP_KERNEL); + dev_ctx = kzalloc(sizeof(*dev_ctx), GFP_KERNEL); if (!dev_ctx) return; - dev_ctx->intf = intf; + dev_ctx->intf = intf; dev_ctx->context = intf->add(dev); + set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); + if (intf->attach) + set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state); if (dev_ctx->context) { spin_lock_irq(&priv->ctx_lock); @@ -802,21 +811,114 @@ static void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) } } +static struct mlx5_device_context *mlx5_get_device(struct mlx5_interface *intf, + struct mlx5_priv *priv) +{ + struct mlx5_device_context *dev_ctx; + + list_for_each_entry(dev_ctx, &priv->ctx_list, list) + if (dev_ctx->intf == intf) + return dev_ctx; + return NULL; +} + static void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv) { struct mlx5_device_context *dev_ctx; struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); - list_for_each_entry(dev_ctx, &priv->ctx_list, list) - if (dev_ctx->intf == intf) { - spin_lock_irq(&priv->ctx_lock); - list_del(&dev_ctx->list); - spin_unlock_irq(&priv->ctx_lock); + dev_ctx = mlx5_get_device(intf, priv); + if (!dev_ctx) + return; - intf->remove(dev, dev_ctx->context); - kfree(dev_ctx); + spin_lock_irq(&priv->ctx_lock); + list_del(&dev_ctx->list); + spin_unlock_irq(&priv->ctx_lock); + + if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state)) + intf->remove(dev, dev_ctx->context); + + kfree(dev_ctx); +} + +static void mlx5_attach_interface(struct mlx5_interface *intf, struct mlx5_priv *priv) +{ + struct mlx5_device_context *dev_ctx; + struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); + + dev_ctx = mlx5_get_device(intf, priv); + if (!dev_ctx) + return; + + if (intf->attach) { + if (test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state)) return; - } + intf->attach(dev, dev_ctx->context); + set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state); + } else { + if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state)) + return; + dev_ctx->context = intf->add(dev); + set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); + } +} + +static void mlx5_attach_device(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + struct mlx5_interface *intf; + + mutex_lock(&mlx5_intf_mutex); + list_for_each_entry(intf, &intf_list, list) + mlx5_attach_interface(intf, priv); + mutex_unlock(&mlx5_intf_mutex); +} + +static void mlx5_detach_interface(struct mlx5_interface *intf, struct mlx5_priv *priv) +{ + struct mlx5_device_context *dev_ctx; + struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); + + dev_ctx = mlx5_get_device(intf, priv); + if (!dev_ctx) + return; + + if (intf->detach) { + if (!test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state)) + return; + intf->detach(dev, dev_ctx->context); + clear_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state); + } else { + if (!test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state)) + return; + intf->remove(dev, dev_ctx->context); + clear_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); + } +} + +static void mlx5_detach_device(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + struct mlx5_interface *intf; + + mutex_lock(&mlx5_intf_mutex); + list_for_each_entry(intf, &intf_list, list) + mlx5_detach_interface(intf, priv); + mutex_unlock(&mlx5_intf_mutex); +} + +static bool mlx5_device_registered(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv; + bool found = false; + + mutex_lock(&mlx5_intf_mutex); + list_for_each_entry(priv, &mlx5_dev_list, dev_list) + if (priv == &dev->priv) + found = true; + mutex_unlock(&mlx5_intf_mutex); + + return found; } static int mlx5_register_device(struct mlx5_core_dev *dev) @@ -1162,16 +1264,16 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) goto err_sriov; } - err = mlx5_register_device(dev); - if (err) { - dev_err(&pdev->dev, "mlx5_register_device failed %d\n", err); - goto err_reg_dev; + if (mlx5_device_registered(dev)) { + mlx5_attach_device(dev); + } else { + err = mlx5_register_device(dev); + if (err) { + dev_err(&pdev->dev, "mlx5_register_device failed %d\n", err); + goto err_reg_dev; + } } - err = request_module_nowait(MLX5_IB_MOD); - if (err) - pr_info("failed request module on %s\n", MLX5_IB_MOD); - clear_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state); set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); out: @@ -1247,12 +1349,13 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) goto out; } + if (mlx5_device_registered(dev)) + mlx5_detach_device(dev); + mlx5_sriov_cleanup(dev); - mlx5_unregister_device(dev); #ifdef CONFIG_MLX5_CORE_EN mlx5_eswitch_cleanup(dev->priv.eswitch); #endif - mlx5_cleanup_rl_table(dev); mlx5_cleanup_fs(dev); mlx5_cleanup_mkey_table(dev); @@ -1364,6 +1467,9 @@ static int init_one(struct pci_dev *pdev, dev_err(&pdev->dev, "mlx5_load_one failed with error code %d\n", err); goto clean_health; } + err = request_module_nowait(MLX5_IB_MOD); + if (err) + pr_info("failed request module on %s\n", MLX5_IB_MOD); err = devlink_register(devlink, &pdev->dev); if (err) @@ -1391,11 +1497,14 @@ static void remove_one(struct pci_dev *pdev) struct mlx5_priv *priv = &dev->priv; devlink_unregister(devlink); + mlx5_unregister_device(dev); + if (mlx5_unload_one(dev, priv)) { dev_err(&dev->pdev->dev, "mlx5_unload_one failed\n"); mlx5_health_cleanup(dev); return; } + mlx5_health_cleanup(dev); mlx5_pci_close(dev, priv); pci_set_drvdata(pdev, NULL); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 0d7aedfce1d7..85c4786427e4 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -930,6 +930,8 @@ enum { struct mlx5_interface { void * (*add)(struct mlx5_core_dev *dev); void (*remove)(struct mlx5_core_dev *dev, void *context); + int (*attach)(struct mlx5_core_dev *dev, void *context); + void (*detach)(struct mlx5_core_dev *dev, void *context); void (*event)(struct mlx5_core_dev *dev, void *context, enum mlx5_dev_event event, unsigned long param); void * (*get_dev)(void *context); From 59211bd3b6329c3e5f4a90ac3d7f87ffa7867073 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Fri, 9 Sep 2016 17:35:20 +0300 Subject: [PATCH 0892/1715] net/mlx5: Split the load/unload flow into hardware and software flows Gather all software context creating/destroying in one function and call it once in the first load and in the last unload. load/unload functions will now receive indication if we need to create/destroy the software contexts. In internal/pci error do the unload/load flows without releasing the software objects. In this way we perserve the sw core state and it help us restoring old driver state after PCI error/shutdown. Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/main.c | 171 +++++++++++------- 1 file changed, 107 insertions(+), 64 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 108d8f2e73e1..966647f74db5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1093,8 +1093,76 @@ static void mlx5_pci_close(struct mlx5_core_dev *dev, struct mlx5_priv *priv) debugfs_remove(priv->dbg_root); } -#define MLX5_IB_MOD "mlx5_ib" -static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) +static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv) +{ + struct pci_dev *pdev = dev->pdev; + int err; + + err = mlx5_query_hca_caps(dev); + if (err) { + dev_err(&pdev->dev, "query hca failed\n"); + goto out; + } + + err = mlx5_query_board_id(dev); + if (err) { + dev_err(&pdev->dev, "query board id failed\n"); + goto out; + } + + err = mlx5_eq_init(dev); + if (err) { + dev_err(&pdev->dev, "failed to initialize eq\n"); + goto out; + } + + MLX5_INIT_DOORBELL_LOCK(&priv->cq_uar_lock); + + err = mlx5_init_cq_table(dev); + if (err) { + dev_err(&pdev->dev, "failed to initialize cq table\n"); + goto err_eq_cleanup; + } + + mlx5_init_qp_table(dev); + + mlx5_init_srq_table(dev); + + mlx5_init_mkey_table(dev); + + err = mlx5_init_rl_table(dev); + if (err) { + dev_err(&pdev->dev, "Failed to init rate limiting\n"); + goto err_tables_cleanup; + } + + return 0; + +err_tables_cleanup: + mlx5_cleanup_mkey_table(dev); + mlx5_cleanup_srq_table(dev); + mlx5_cleanup_qp_table(dev); + mlx5_cleanup_cq_table(dev); + +err_eq_cleanup: + mlx5_eq_cleanup(dev); + +out: + return err; +} + +static void mlx5_cleanup_once(struct mlx5_core_dev *dev) +{ + mlx5_cleanup_rl_table(dev); + mlx5_cleanup_mkey_table(dev); + mlx5_cleanup_srq_table(dev); + mlx5_cleanup_qp_table(dev); + mlx5_cleanup_cq_table(dev); + mlx5_eq_cleanup(dev); +} + +static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, + bool boot) { struct pci_dev *pdev = dev->pdev; int err; @@ -1127,12 +1195,10 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) goto out_err; } - mlx5_pagealloc_init(dev); - err = mlx5_core_enable_hca(dev, 0); if (err) { dev_err(&pdev->dev, "enable hca failed\n"); - goto err_pagealloc_cleanup; + goto err_cmd_cleanup; } err = mlx5_core_set_issi(dev); @@ -1185,34 +1251,21 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) mlx5_start_health_poll(dev); - err = mlx5_query_hca_caps(dev); - if (err) { - dev_err(&pdev->dev, "query hca failed\n"); - goto err_stop_poll; - } - - err = mlx5_query_board_id(dev); - if (err) { - dev_err(&pdev->dev, "query board id failed\n"); + if (boot && mlx5_init_once(dev, priv)) { + dev_err(&pdev->dev, "sw objs init failed\n"); goto err_stop_poll; } err = mlx5_enable_msix(dev); if (err) { dev_err(&pdev->dev, "enable msix failed\n"); - goto err_stop_poll; - } - - err = mlx5_eq_init(dev); - if (err) { - dev_err(&pdev->dev, "failed to initialize eq\n"); - goto disable_msix; + goto err_cleanup_once; } err = mlx5_alloc_uuars(dev, &priv->uuari); if (err) { dev_err(&pdev->dev, "Failed allocating uar, aborting\n"); - goto err_eq_cleanup; + goto err_disable_msix; } err = mlx5_start_eqs(dev); @@ -1228,15 +1281,10 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) } err = mlx5_irq_set_affinity_hints(dev); - if (err) + if (err) { dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n"); - - MLX5_INIT_DOORBELL_LOCK(&priv->cq_uar_lock); - - mlx5_init_cq_table(dev); - mlx5_init_qp_table(dev); - mlx5_init_srq_table(dev); - mlx5_init_mkey_table(dev); + goto err_affinity_hints; + } err = mlx5_init_fs(dev); if (err) { @@ -1244,12 +1292,6 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) goto err_fs; } - err = mlx5_init_rl_table(dev); - if (err) { - dev_err(&pdev->dev, "Failed to init rate limiting\n"); - goto err_rl; - } - #ifdef CONFIG_MLX5_CORE_EN err = mlx5_eswitch_init(dev); if (err) { @@ -1281,22 +1323,19 @@ out: return 0; -err_sriov: +err_reg_dev: mlx5_sriov_cleanup(dev); +err_sriov: #ifdef CONFIG_MLX5_CORE_EN mlx5_eswitch_cleanup(dev->priv.eswitch); #endif -err_reg_dev: - mlx5_cleanup_rl_table(dev); -err_rl: mlx5_cleanup_fs(dev); + err_fs: - mlx5_cleanup_mkey_table(dev); - mlx5_cleanup_srq_table(dev); - mlx5_cleanup_qp_table(dev); - mlx5_cleanup_cq_table(dev); mlx5_irq_clear_affinity_hints(dev); + +err_affinity_hints: free_comp_eqs(dev); err_stop_eqs: @@ -1305,12 +1344,13 @@ err_stop_eqs: err_free_uar: mlx5_free_uuars(dev, &priv->uuari); -err_eq_cleanup: - mlx5_eq_cleanup(dev); - -disable_msix: +err_disable_msix: mlx5_disable_msix(dev); +err_cleanup_once: + if (boot) + mlx5_cleanup_once(dev); + err_stop_poll: mlx5_stop_health_poll(dev); if (mlx5_cmd_teardown_hca(dev)) { @@ -1327,8 +1367,7 @@ reclaim_boot_pages: err_disable_hca: mlx5_core_disable_hca(dev, 0); -err_pagealloc_cleanup: - mlx5_pagealloc_cleanup(dev); +err_cmd_cleanup: mlx5_cmd_cleanup(dev); out_err: @@ -1338,7 +1377,8 @@ out_err: return err; } -static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) +static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, + bool cleanup) { int err = 0; @@ -1346,6 +1386,8 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) if (test_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state)) { dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n", __func__); + if (cleanup) + mlx5_cleanup_once(dev); goto out; } @@ -1356,18 +1398,14 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) #ifdef CONFIG_MLX5_CORE_EN mlx5_eswitch_cleanup(dev->priv.eswitch); #endif - mlx5_cleanup_rl_table(dev); mlx5_cleanup_fs(dev); - mlx5_cleanup_mkey_table(dev); - mlx5_cleanup_srq_table(dev); - mlx5_cleanup_qp_table(dev); - mlx5_cleanup_cq_table(dev); mlx5_irq_clear_affinity_hints(dev); free_comp_eqs(dev); mlx5_stop_eqs(dev); mlx5_free_uuars(dev, &priv->uuari); - mlx5_eq_cleanup(dev); mlx5_disable_msix(dev); + if (cleanup) + mlx5_cleanup_once(dev); mlx5_stop_health_poll(dev); err = mlx5_cmd_teardown_hca(dev); if (err) { @@ -1377,7 +1415,6 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) mlx5_pagealloc_stop(dev); mlx5_reclaim_startup_pages(dev); mlx5_core_disable_hca(dev, 0); - mlx5_pagealloc_cleanup(dev); mlx5_cmd_cleanup(dev); out: @@ -1416,6 +1453,7 @@ static const struct devlink_ops mlx5_devlink_ops = { #endif }; +#define MLX5_IB_MOD "mlx5_ib" static int init_one(struct pci_dev *pdev, const struct pci_device_id *id) { @@ -1462,11 +1500,14 @@ static int init_one(struct pci_dev *pdev, goto close_pci; } - err = mlx5_load_one(dev, priv); + mlx5_pagealloc_init(dev); + + err = mlx5_load_one(dev, priv, true); if (err) { dev_err(&pdev->dev, "mlx5_load_one failed with error code %d\n", err); goto clean_health; } + err = request_module_nowait(MLX5_IB_MOD); if (err) pr_info("failed request module on %s\n", MLX5_IB_MOD); @@ -1478,8 +1519,9 @@ static int init_one(struct pci_dev *pdev, return 0; clean_load: - mlx5_unload_one(dev, priv); + mlx5_unload_one(dev, priv, true); clean_health: + mlx5_pagealloc_cleanup(dev); mlx5_health_cleanup(dev); close_pci: mlx5_pci_close(dev, priv); @@ -1499,12 +1541,13 @@ static void remove_one(struct pci_dev *pdev) devlink_unregister(devlink); mlx5_unregister_device(dev); - if (mlx5_unload_one(dev, priv)) { + if (mlx5_unload_one(dev, priv, true)) { dev_err(&dev->pdev->dev, "mlx5_unload_one failed\n"); mlx5_health_cleanup(dev); return; } + mlx5_pagealloc_cleanup(dev); mlx5_health_cleanup(dev); mlx5_pci_close(dev, priv); pci_set_drvdata(pdev, NULL); @@ -1519,7 +1562,7 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, dev_info(&pdev->dev, "%s was called\n", __func__); mlx5_enter_error_state(dev); - mlx5_unload_one(dev, priv); + mlx5_unload_one(dev, priv, false); pci_save_state(pdev); mlx5_pci_disable_device(dev); return state == pci_channel_io_perm_failure ? @@ -1591,7 +1634,7 @@ static void mlx5_pci_resume(struct pci_dev *pdev) dev_info(&pdev->dev, "%s was called\n", __func__); - err = mlx5_load_one(dev, priv); + err = mlx5_load_one(dev, priv, false); if (err) dev_err(&pdev->dev, "%s: mlx5_load_one failed with error code: %d\n" , __func__, err); @@ -1613,7 +1656,7 @@ static void shutdown(struct pci_dev *pdev) dev_info(&pdev->dev, "Shutdown was called\n"); /* Notify mlx5 clients that the kernel is being shut down */ set_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &dev->intf_state); - mlx5_unload_one(dev, priv); + mlx5_unload_one(dev, priv, false); mlx5_pci_disable_device(dev); } From acab721b5d8d9431cc80acc827973eeeda4dec24 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Fri, 9 Sep 2016 17:35:21 +0300 Subject: [PATCH 0893/1715] net/mlx5: Implement SRIOV attach/detach flows Needed for lightweight and modular internal/pci error handling. Implement sriov attach function which enables pre-saved number of vfs on the device side. Implement sriov detach function which disable the current vfs on the device side. Init/cleanup function only handles sriov software context allocation and destruction. Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlx5/core/mlx5_core.h | 2 ++ .../net/ethernet/mellanox/mlx5/core/sriov.c | 29 ++++++++++++++----- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 7dd14cf73181..04b719a53313 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -91,6 +91,8 @@ void mlx5_enter_error_state(struct mlx5_core_dev *dev); void mlx5_disable_device(struct mlx5_core_dev *dev); int mlx5_sriov_init(struct mlx5_core_dev *dev); void mlx5_sriov_cleanup(struct mlx5_core_dev *dev); +int mlx5_sriov_attach(struct mlx5_core_dev *dev); +void mlx5_sriov_detach(struct mlx5_core_dev *dev); int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs); bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev); int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index 72a8215a2d02..f4f02b64f552 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -188,6 +188,25 @@ int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) return err ? err : num_vfs; } +int mlx5_sriov_attach(struct mlx5_core_dev *dev) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + + if (!mlx5_core_is_pf(dev) || !sriov->num_vfs) + return 0; + + /* If sriov VFs exist in PCI level, enable them in device level */ + return mlx5_device_enable_sriov(dev, sriov->num_vfs); +} + +void mlx5_sriov_detach(struct mlx5_core_dev *dev) +{ + if (!mlx5_core_is_pf(dev)) + return; + + mlx5_device_disable_sriov(dev); +} + int mlx5_sriov_init(struct mlx5_core_dev *dev) { struct mlx5_core_sriov *sriov = &dev->priv.sriov; @@ -203,12 +222,7 @@ int mlx5_sriov_init(struct mlx5_core_dev *dev) if (!sriov->vfs_ctx) return -ENOMEM; - /* If sriov VFs exist in PCI level, enable them in device level */ - if (!sriov->num_vfs) - return 0; - - mlx5_device_enable_sriov(dev, sriov->num_vfs); - return 0; + return mlx5_sriov_attach(dev); } void mlx5_sriov_cleanup(struct mlx5_core_dev *dev) @@ -217,7 +231,6 @@ void mlx5_sriov_cleanup(struct mlx5_core_dev *dev) if (!mlx5_core_is_pf(dev)) return; - - mlx5_device_disable_sriov(dev); + mlx5_sriov_detach(dev); kfree(sriov->vfs_ctx); } From 62a9b90ad83ebe584bf22a2e716a96e75f82e137 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Fri, 9 Sep 2016 17:35:22 +0300 Subject: [PATCH 0894/1715] net/mlx5: Implement eswitch attach/detach flows Needed for lightweight and modular internal/pci error handling. Implement eswitch attach function which allocates/starts hw related resources. Implement eswitch detach function which releases/stops hw related resources. Init/cleanup function only handle eswitch software context allocation and destruction. Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 24 ++++++++++++++++--- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 2 ++ 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 101430571d6d..2405889421a5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1559,6 +1559,25 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) esw_enable_vport(esw, 0, UC_ADDR_CHANGE); } +void mlx5_eswitch_attach(struct mlx5_eswitch *esw) +{ + if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || + MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return; + + esw_enable_vport(esw, 0, UC_ADDR_CHANGE); + /* VF Vports will be enabled when SRIOV is enabled */ +} + +void mlx5_eswitch_detach(struct mlx5_eswitch *esw) +{ + if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || + MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return; + + esw_disable_vport(esw, 0); +} + int mlx5_eswitch_init(struct mlx5_core_dev *dev) { int l2_table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table); @@ -1635,9 +1654,8 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) esw->enabled_vports = 0; esw->mode = SRIOV_NONE; + mlx5_eswitch_attach(esw); dev->priv.eswitch = esw; - esw_enable_vport(esw, 0, UC_ADDR_CHANGE); - /* VF Vports will be enabled when SRIOV is enabled */ return 0; abort: if (esw->work_queue) @@ -1656,8 +1674,8 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) return; esw_info(esw->dev, "cleanup\n"); - esw_disable_vport(esw, 0); + mlx5_eswitch_detach(esw); esw->dev->priv.eswitch = NULL; destroy_workqueue(esw->work_queue); kfree(esw->l2_table.bitmap); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index a96140971d77..48c273d5b709 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -204,6 +204,8 @@ struct mlx5_eswitch { /* E-Switch API */ int mlx5_eswitch_init(struct mlx5_core_dev *dev); void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); +void mlx5_eswitch_attach(struct mlx5_eswitch *esw); +void mlx5_eswitch_detach(struct mlx5_eswitch *esw); void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe); int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode); void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw); From c2d6e31a0008f8188f935f8dd81c81c44697b256 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Fri, 9 Sep 2016 17:35:23 +0300 Subject: [PATCH 0895/1715] net/mlx5: Align sriov/eswitch modules with the new load/unload flow. Init/cleanup sriov/eswitch in the core software context init/cleanup flows. Attach/detach sriov/eswitch in the core load/unload flows. Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 2 - .../net/ethernet/mellanox/mlx5/core/main.c | 42 ++++++++++++++----- .../net/ethernet/mellanox/mlx5/core/sriov.c | 4 +- 3 files changed, 34 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 2405889421a5..015f1bfec7d7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1654,7 +1654,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) esw->enabled_vports = 0; esw->mode = SRIOV_NONE; - mlx5_eswitch_attach(esw); dev->priv.eswitch = esw; return 0; abort: @@ -1675,7 +1674,6 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) esw_info(esw->dev, "cleanup\n"); - mlx5_eswitch_detach(esw); esw->dev->priv.eswitch = NULL; destroy_workqueue(esw->work_queue); kfree(esw->l2_table.bitmap); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 966647f74db5..16660cf16e0d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1136,8 +1136,30 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv) goto err_tables_cleanup; } +#ifdef CONFIG_MLX5_CORE_EN + err = mlx5_eswitch_init(dev); + if (err) { + dev_err(&pdev->dev, "Failed to init eswitch %d\n", err); + goto err_rl_cleanup; + } +#endif + + err = mlx5_sriov_init(dev); + if (err) { + dev_err(&pdev->dev, "Failed to init sriov %d\n", err); + goto err_eswitch_cleanup; + } + return 0; +err_eswitch_cleanup: +#ifdef CONFIG_MLX5_CORE_EN + mlx5_eswitch_cleanup(dev->priv.eswitch); + +err_rl_cleanup: +#endif + mlx5_cleanup_rl_table(dev); + err_tables_cleanup: mlx5_cleanup_mkey_table(dev); mlx5_cleanup_srq_table(dev); @@ -1153,6 +1175,10 @@ out: static void mlx5_cleanup_once(struct mlx5_core_dev *dev) { + mlx5_sriov_cleanup(dev); +#ifdef CONFIG_MLX5_CORE_EN + mlx5_eswitch_cleanup(dev->priv.eswitch); +#endif mlx5_cleanup_rl_table(dev); mlx5_cleanup_mkey_table(dev); mlx5_cleanup_srq_table(dev); @@ -1293,14 +1319,10 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, } #ifdef CONFIG_MLX5_CORE_EN - err = mlx5_eswitch_init(dev); - if (err) { - dev_err(&pdev->dev, "eswitch init failed %d\n", err); - goto err_reg_dev; - } + mlx5_eswitch_attach(dev->priv.eswitch); #endif - err = mlx5_sriov_init(dev); + err = mlx5_sriov_attach(dev); if (err) { dev_err(&pdev->dev, "sriov init failed %d\n", err); goto err_sriov; @@ -1324,11 +1346,11 @@ out: return 0; err_reg_dev: - mlx5_sriov_cleanup(dev); + mlx5_sriov_detach(dev); err_sriov: #ifdef CONFIG_MLX5_CORE_EN - mlx5_eswitch_cleanup(dev->priv.eswitch); + mlx5_eswitch_detach(dev->priv.eswitch); #endif mlx5_cleanup_fs(dev); @@ -1394,9 +1416,9 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, if (mlx5_device_registered(dev)) mlx5_detach_device(dev); - mlx5_sriov_cleanup(dev); + mlx5_sriov_detach(dev); #ifdef CONFIG_MLX5_CORE_EN - mlx5_eswitch_cleanup(dev->priv.eswitch); + mlx5_eswitch_detach(dev->priv.eswitch); #endif mlx5_cleanup_fs(dev); mlx5_irq_clear_affinity_hints(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index f4f02b64f552..e08627785590 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -222,7 +222,7 @@ int mlx5_sriov_init(struct mlx5_core_dev *dev) if (!sriov->vfs_ctx) return -ENOMEM; - return mlx5_sriov_attach(dev); + return 0; } void mlx5_sriov_cleanup(struct mlx5_core_dev *dev) @@ -231,6 +231,6 @@ void mlx5_sriov_cleanup(struct mlx5_core_dev *dev) if (!mlx5_core_is_pf(dev)) return; - mlx5_sriov_detach(dev); + kfree(sriov->vfs_ctx); } From 1ab2068a4c663cbb2e0e0cfea934bc4e163abed0 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Fri, 9 Sep 2016 17:35:24 +0300 Subject: [PATCH 0896/1715] net/mlx5: Implement vports admin state backup/restore Save the user configuration in the vport sturct. Restore saved old configuration upon vport enable. Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 249 ++++++++---------- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 16 +- 2 files changed, 124 insertions(+), 141 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 015f1bfec7d7..654b76ff962f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -116,57 +116,6 @@ static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, } /* E-Switch vport context HW commands */ -static int query_esw_vport_context_cmd(struct mlx5_core_dev *mdev, u32 vport, - u32 *out, int outlen) -{ - u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)] = {0}; - - MLX5_SET(query_nic_vport_context_in, in, opcode, - MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT); - MLX5_SET(query_esw_vport_context_in, in, vport_number, vport); - if (vport) - MLX5_SET(query_esw_vport_context_in, in, other_vport, 1); - return mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen); -} - -static int query_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport, - u16 *vlan, u8 *qos) -{ - u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {0}; - int err; - bool cvlan_strip; - bool cvlan_insert; - - *vlan = 0; - *qos = 0; - - if (!MLX5_CAP_ESW(dev, vport_cvlan_strip) || - !MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist)) - return -ENOTSUPP; - - err = query_esw_vport_context_cmd(dev, vport, out, sizeof(out)); - if (err) - goto out; - - cvlan_strip = MLX5_GET(query_esw_vport_context_out, out, - esw_vport_context.vport_cvlan_strip); - - cvlan_insert = MLX5_GET(query_esw_vport_context_out, out, - esw_vport_context.vport_cvlan_insert); - - if (cvlan_strip || cvlan_insert) { - *vlan = MLX5_GET(query_esw_vport_context_out, out, - esw_vport_context.cvlan_id); - *qos = MLX5_GET(query_esw_vport_context_out, out, - esw_vport_context.cvlan_pcp); - } - - esw_debug(dev, "Query Vport[%d] cvlan: VLAN %d qos=%d\n", - vport, *vlan, *qos); -out: - return err; -} - static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport, void *in, int inlen) { @@ -921,7 +870,7 @@ static void esw_update_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num) esw_debug(esw->dev, "vport[%d] context update rx mode promisc_all=%d, all_multi=%d\n", vport_num, promisc_all, promisc_mc); - if (!vport->trusted || !vport->enabled) { + if (!vport->info.trusted || !vport->enabled) { promisc_uc = 0; promisc_mc = 0; promisc_all = 0; @@ -1257,30 +1206,20 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { struct mlx5_flow_spec *spec; - u8 smac[ETH_ALEN]; int err = 0; u8 *smac_v; - if (vport->spoofchk) { - err = mlx5_query_nic_vport_mac_address(esw->dev, vport->vport, smac); - if (err) { - esw_warn(esw->dev, - "vport[%d] configure ingress rules failed, query smac failed, err(%d)\n", - vport->vport, err); - return err; - } + if (vport->info.spoofchk && !is_valid_ether_addr(vport->info.mac)) { + mlx5_core_warn(esw->dev, + "vport[%d] configure ingress rules failed, illegal mac with spoofchk\n", + vport->vport); + return -EPERM; - if (!is_valid_ether_addr(smac)) { - mlx5_core_warn(esw->dev, - "vport[%d] configure ingress rules failed, illegal mac with spoofchk\n", - vport->vport); - return -EPERM; - } } esw_vport_cleanup_ingress_rules(esw, vport); - if (!vport->vlan && !vport->qos && !vport->spoofchk) { + if (!vport->info.vlan && !vport->info.qos && !vport->info.spoofchk) { esw_vport_disable_ingress_acl(esw, vport); return 0; } @@ -1289,7 +1228,7 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, esw_debug(esw->dev, "vport[%d] configure ingress rules, vlan(%d) qos(%d)\n", - vport->vport, vport->vlan, vport->qos); + vport->vport, vport->info.vlan, vport->info.qos); spec = mlx5_vzalloc(sizeof(*spec)); if (!spec) { @@ -1299,16 +1238,16 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, goto out; } - if (vport->vlan || vport->qos) + if (vport->info.vlan || vport->info.qos) MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.vlan_tag); - if (vport->spoofchk) { + if (vport->info.spoofchk) { MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_47_16); MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_15_0); smac_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers.smac_47_16); - ether_addr_copy(smac_v, smac); + ether_addr_copy(smac_v, vport->info.mac); } spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; @@ -1354,7 +1293,7 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, esw_vport_cleanup_egress_rules(esw, vport); - if (!vport->vlan && !vport->qos) { + if (!vport->info.vlan && !vport->info.qos) { esw_vport_disable_egress_acl(esw, vport); return 0; } @@ -1363,7 +1302,7 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, esw_debug(esw->dev, "vport[%d] configure egress rules, vlan(%d) qos(%d)\n", - vport->vport, vport->vlan, vport->qos); + vport->vport, vport->info.vlan, vport->info.qos); spec = mlx5_vzalloc(sizeof(*spec)); if (!spec) { @@ -1377,7 +1316,7 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.vlan_tag); MLX5_SET_TO_ONES(fte_match_param, spec->match_value, outer_headers.vlan_tag); MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.first_vid); - MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vport->vlan); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vport->info.vlan); spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; vport->egress.allowed_vlan = @@ -1411,6 +1350,41 @@ out: return err; } +static void node_guid_gen_from_mac(u64 *node_guid, u8 mac[ETH_ALEN]) +{ + ((u8 *)node_guid)[7] = mac[0]; + ((u8 *)node_guid)[6] = mac[1]; + ((u8 *)node_guid)[5] = mac[2]; + ((u8 *)node_guid)[4] = 0xff; + ((u8 *)node_guid)[3] = 0xfe; + ((u8 *)node_guid)[2] = mac[3]; + ((u8 *)node_guid)[1] = mac[4]; + ((u8 *)node_guid)[0] = mac[5]; +} + +static void esw_apply_vport_conf(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + int vport_num = vport->vport; + + if (!vport_num) + return; + + mlx5_modify_vport_admin_state(esw->dev, + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, + vport_num, + vport->info.link_state); + mlx5_modify_nic_vport_mac_address(esw->dev, vport_num, vport->info.mac); + mlx5_modify_nic_vport_node_guid(esw->dev, vport_num, vport->info.node_guid); + modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, vport->info.qos, + (vport->info.vlan || vport->info.qos)); + + /* Only legacy mode needs ACLs */ + if (esw->mode == SRIOV_LEGACY) { + esw_vport_ingress_config(esw, vport); + esw_vport_egress_config(esw, vport); + } +} static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, int enable_events) { @@ -1421,23 +1395,17 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num); - /* Only VFs need ACLs for VST and spoofchk filtering */ - if (vport_num && esw->mode == SRIOV_LEGACY) { - esw_vport_ingress_config(esw, vport); - esw_vport_egress_config(esw, vport); - } - - mlx5_modify_vport_admin_state(esw->dev, - MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, - vport_num, - MLX5_ESW_VPORT_ADMIN_STATE_AUTO); + /* Restore old vport configuration */ + esw_apply_vport_conf(esw, vport); /* Sync with current vport context */ vport->enabled_events = enable_events; vport->enabled = true; /* only PF is trusted by default */ - vport->trusted = (vport_num) ? false : true; + if (!vport_num) + vport->info.trusted = true; + esw_vport_change_handle_locked(vport); esw->enabled_vports++; @@ -1457,11 +1425,6 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) vport->enabled = false; synchronize_irq(mlx5_get_msix_vec(esw->dev, MLX5_EQ_VEC_ASYNC)); - - mlx5_modify_vport_admin_state(esw->dev, - MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, - vport_num, - MLX5_ESW_VPORT_ADMIN_STATE_DOWN); /* Wait for current already scheduled events to complete */ flush_workqueue(esw->work_queue); /* Disable events from this vport */ @@ -1473,7 +1436,12 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) */ esw_vport_change_handle_locked(vport); vport->enabled_events = 0; + if (vport_num && esw->mode == SRIOV_LEGACY) { + mlx5_modify_vport_admin_state(esw->dev, + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, + vport_num, + MLX5_ESW_VPORT_ADMIN_STATE_DOWN); esw_vport_disable_egress_acl(esw, vport); esw_vport_disable_ingress_acl(esw, vport); } @@ -1645,6 +1613,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) struct mlx5_vport *vport = &esw->vports[vport_num]; vport->vport = vport_num; + vport->info.link_state = MLX5_ESW_VPORT_ADMIN_STATE_AUTO; vport->dev = dev; INIT_WORK(&vport->vport_change_handler, esw_vport_change_handler); @@ -1705,18 +1674,6 @@ void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) (esw && MLX5_CAP_GEN(esw->dev, vport_group_manager) && mlx5_core_is_pf(esw->dev)) #define LEGAL_VPORT(esw, vport) (vport >= 0 && vport < esw->total_vports) -static void node_guid_gen_from_mac(u64 *node_guid, u8 mac[ETH_ALEN]) -{ - ((u8 *)node_guid)[7] = mac[0]; - ((u8 *)node_guid)[6] = mac[1]; - ((u8 *)node_guid)[5] = mac[2]; - ((u8 *)node_guid)[4] = 0xff; - ((u8 *)node_guid)[3] = 0xfe; - ((u8 *)node_guid)[2] = mac[3]; - ((u8 *)node_guid)[1] = mac[4]; - ((u8 *)node_guid)[0] = mac[5]; -} - int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, int vport, u8 mac[ETH_ALEN]) { @@ -1729,13 +1686,15 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, if (!LEGAL_VPORT(esw, vport)) return -EINVAL; + mutex_lock(&esw->state_lock); evport = &esw->vports[vport]; - if (evport->spoofchk && !is_valid_ether_addr(mac)) { + if (evport->info.spoofchk && !is_valid_ether_addr(mac)) { mlx5_core_warn(esw->dev, "MAC invalidation is not allowed when spoofchk is on, vport(%d)\n", vport); - return -EPERM; + err = -EPERM; + goto unlock; } err = mlx5_modify_nic_vport_mac_address(esw->dev, vport, mac); @@ -1743,7 +1702,7 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, mlx5_core_warn(esw->dev, "Failed to mlx5_modify_nic_vport_mac vport(%d) err=(%d)\n", vport, err); - return err; + goto unlock; } node_guid_gen_from_mac(&node_guid, mac); @@ -1753,9 +1712,12 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, "Failed to set vport %d node guid, err = %d. RDMA_CM will not function properly for this VF.\n", vport, err); - mutex_lock(&esw->state_lock); + ether_addr_copy(evport->info.mac, mac); + evport->info.node_guid = node_guid; if (evport->enabled && esw->mode == SRIOV_LEGACY) err = esw_vport_ingress_config(esw, evport); + +unlock: mutex_unlock(&esw->state_lock); return err; } @@ -1763,22 +1725,38 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, int vport, int link_state) { + struct mlx5_vport *evport; + int err = 0; + if (!ESW_ALLOWED(esw)) return -EPERM; if (!LEGAL_VPORT(esw, vport)) return -EINVAL; - return mlx5_modify_vport_admin_state(esw->dev, - MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, - vport, link_state); + mutex_lock(&esw->state_lock); + evport = &esw->vports[vport]; + + err = mlx5_modify_vport_admin_state(esw->dev, + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, + vport, link_state); + if (err) { + mlx5_core_warn(esw->dev, + "Failed to set vport %d link state, err = %d", + vport, err); + goto unlock; + } + + evport->info.link_state = link_state; + +unlock: + mutex_unlock(&esw->state_lock); + return 0; } int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, int vport, struct ifla_vf_info *ivi) { struct mlx5_vport *evport; - u16 vlan; - u8 qos; if (!ESW_ALLOWED(esw)) return -EPERM; @@ -1790,14 +1768,14 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, memset(ivi, 0, sizeof(*ivi)); ivi->vf = vport - 1; - mlx5_query_nic_vport_mac_address(esw->dev, vport, ivi->mac); - ivi->linkstate = mlx5_query_vport_admin_state(esw->dev, - MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, - vport); - query_esw_vport_cvlan(esw->dev, vport, &vlan, &qos); - ivi->vlan = vlan; - ivi->qos = qos; - ivi->spoofchk = evport->spoofchk; + mutex_lock(&esw->state_lock); + ether_addr_copy(ivi->mac, evport->info.mac); + ivi->linkstate = evport->info.link_state; + ivi->vlan = evport->info.vlan; + ivi->qos = evport->info.qos; + ivi->spoofchk = evport->info.spoofchk; + ivi->trusted = evport->info.trusted; + mutex_unlock(&esw->state_lock); return 0; } @@ -1817,23 +1795,23 @@ int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, if (vlan || qos) set = 1; + mutex_lock(&esw->state_lock); evport = &esw->vports[vport]; err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set); if (err) - return err; + goto unlock; - mutex_lock(&esw->state_lock); - evport->vlan = vlan; - evport->qos = qos; + evport->info.vlan = vlan; + evport->info.qos = qos; if (evport->enabled && esw->mode == SRIOV_LEGACY) { err = esw_vport_ingress_config(esw, evport); if (err) - goto out; + goto unlock; err = esw_vport_egress_config(esw, evport); } -out: +unlock: mutex_unlock(&esw->state_lock); return err; } @@ -1850,16 +1828,14 @@ int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw, if (!LEGAL_VPORT(esw, vport)) return -EINVAL; - evport = &esw->vports[vport]; - mutex_lock(&esw->state_lock); - pschk = evport->spoofchk; - evport->spoofchk = spoofchk; - if (evport->enabled && esw->mode == SRIOV_LEGACY) { + evport = &esw->vports[vport]; + pschk = evport->info.spoofchk; + evport->info.spoofchk = spoofchk; + if (evport->enabled && esw->mode == SRIOV_LEGACY) err = esw_vport_ingress_config(esw, evport); - if (err) - evport->spoofchk = pschk; - } + if (err) + evport->info.spoofchk = pschk; mutex_unlock(&esw->state_lock); return err; @@ -1875,10 +1851,9 @@ int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw, if (!LEGAL_VPORT(esw, vport)) return -EINVAL; - evport = &esw->vports[vport]; - mutex_lock(&esw->state_lock); - evport->trusted = setting; + evport = &esw->vports[vport]; + evport->info.trusted = setting; if (evport->enabled) esw_vport_change_handle_locked(evport); mutex_unlock(&esw->state_lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 48c273d5b709..6855783f3bb3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -109,6 +109,16 @@ struct vport_egress { struct mlx5_flow_rule *drop_rule; }; +struct mlx5_vport_info { + u8 mac[ETH_ALEN]; + u16 vlan; + u8 qos; + u64 node_guid; + int link_state; + bool spoofchk; + bool trusted; +}; + struct mlx5_vport { struct mlx5_core_dev *dev; int vport; @@ -121,10 +131,8 @@ struct mlx5_vport { struct vport_ingress ingress; struct vport_egress egress; - u16 vlan; - u8 qos; - bool spoofchk; - bool trusted; + struct mlx5_vport_info info; + bool enabled; u16 enabled_events; }; From 26e59d8077a31972dc81fe5ff75aa4fd5b260b71 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Fri, 9 Sep 2016 17:35:25 +0300 Subject: [PATCH 0897/1715] net/mlx5e: Implement mlx5e interface attach/detach callbacks Needed to support seamless and lightweight PCI/Internal error recovery. Implement the attach/detach interface callbacks. In attach callback we only allocate HW resources. In detach callback we only deallocate HW resources. All SW/kernel objects initialzing/destroying is kept in add/remove callbacks. Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 7 +- .../net/ethernet/mellanox/mlx5/core/en_main.c | 208 +++++++++++++----- .../net/ethernet/mellanox/mlx5/core/en_rep.c | 39 +++- 3 files changed, 187 insertions(+), 67 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 96995609f205..a9358cf7386a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -844,9 +844,12 @@ void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv); int mlx5e_close(struct net_device *netdev); int mlx5e_open(struct net_device *netdev); void mlx5e_update_stats_work(struct work_struct *work); -void *mlx5e_create_netdev(struct mlx5_core_dev *mdev, - const struct mlx5e_profile *profile, void *ppriv); +struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev, + const struct mlx5e_profile *profile, + void *ppriv); void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv); +int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev); +void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev); struct rtnl_link_stats64 * mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 03586ee68fc4..af4c61e6d589 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1883,6 +1883,9 @@ int mlx5e_close(struct net_device *netdev) struct mlx5e_priv *priv = netdev_priv(netdev); int err; + if (!netif_device_present(netdev)) + return -ENODEV; + mutex_lock(&priv->state_lock); err = mlx5e_close_locked(netdev); mutex_unlock(&priv->state_lock); @@ -3401,13 +3404,13 @@ static const struct mlx5e_profile mlx5e_nic_profile = { .max_tc = MLX5E_MAX_NUM_TC, }; -void *mlx5e_create_netdev(struct mlx5_core_dev *mdev, - const struct mlx5e_profile *profile, void *ppriv) +struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev, + const struct mlx5e_profile *profile, + void *ppriv) { + int nch = profile->max_nch(mdev); struct net_device *netdev; struct mlx5e_priv *priv; - int nch = profile->max_nch(mdev); - int err; netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv), nch * profile->max_tc, @@ -3425,12 +3428,31 @@ void *mlx5e_create_netdev(struct mlx5_core_dev *mdev, priv->wq = create_singlethread_workqueue("mlx5e"); if (!priv->wq) - goto err_free_netdev; + goto err_cleanup_nic; + + return netdev; + +err_cleanup_nic: + profile->cleanup(priv); + free_netdev(netdev); + + return NULL; +} + +int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev) +{ + const struct mlx5e_profile *profile; + struct mlx5e_priv *priv; + int err; + + priv = netdev_priv(netdev); + profile = priv->profile; + clear_bit(MLX5E_STATE_DESTROYING, &priv->state); err = mlx5e_create_umr_mkey(priv); if (err) { mlx5_core_err(mdev, "create umr mkey failed, %d\n", err); - goto err_destroy_wq; + goto out; } err = profile->init_tx(priv); @@ -3453,20 +3475,16 @@ void *mlx5e_create_netdev(struct mlx5_core_dev *mdev, mlx5e_set_dev_port_mtu(netdev); - err = register_netdev(netdev); - if (err) { - mlx5_core_err(mdev, "register_netdev failed, %d\n", err); - goto err_dealloc_q_counters; - } - if (profile->enable) profile->enable(priv); - return priv; + rtnl_lock(); + if (netif_running(netdev)) + mlx5e_open(netdev); + netif_device_attach(netdev); + rtnl_unlock(); -err_dealloc_q_counters: - mlx5e_destroy_q_counter(priv); - profile->cleanup_rx(priv); + return 0; err_close_drop_rq: mlx5e_close_drop_rq(priv); @@ -3477,13 +3495,8 @@ err_cleanup_tx: err_destroy_umr_mkey: mlx5_core_destroy_mkey(mdev, &priv->umr_mkey); -err_destroy_wq: - destroy_workqueue(priv->wq); - -err_free_netdev: - free_netdev(netdev); - - return NULL; +out: + return err; } static void mlx5e_register_vport_rep(struct mlx5_core_dev *mdev) @@ -3509,47 +3522,22 @@ static void mlx5e_register_vport_rep(struct mlx5_core_dev *mdev) } } -static void *mlx5e_add(struct mlx5_core_dev *mdev) -{ - struct mlx5_eswitch *esw = mdev->priv.eswitch; - void *ppriv = NULL; - void *ret; - - if (mlx5e_check_required_hca_cap(mdev)) - return NULL; - - if (mlx5e_create_mdev_resources(mdev)) - return NULL; - - mlx5e_register_vport_rep(mdev); - - if (MLX5_CAP_GEN(mdev, vport_group_manager)) - ppriv = &esw->offloads.vport_reps[0]; - - ret = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, ppriv); - if (!ret) { - mlx5e_destroy_mdev_resources(mdev); - return NULL; - } - return ret; -} - -void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv) +void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev) { + struct mlx5e_priv *priv = netdev_priv(netdev); const struct mlx5e_profile *profile = priv->profile; - struct net_device *netdev = priv->netdev; set_bit(MLX5E_STATE_DESTROYING, &priv->state); if (profile->disable) profile->disable(priv); flush_workqueue(priv->wq); - if (test_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &mdev->intf_state)) { - netif_device_detach(netdev); + + rtnl_lock(); + if (netif_running(netdev)) mlx5e_close(netdev); - } else { - unregister_netdev(netdev); - } + netif_device_detach(netdev); + rtnl_unlock(); mlx5e_destroy_q_counter(priv); profile->cleanup_rx(priv); @@ -3557,12 +3545,109 @@ void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv) profile->cleanup_tx(priv); mlx5_core_destroy_mkey(priv->mdev, &priv->umr_mkey); cancel_delayed_work_sync(&priv->update_stats_work); +} + +/* mlx5e_attach and mlx5e_detach scope should be only creating/destroying + * hardware contexts and to connect it to the current netdev. + */ +static int mlx5e_attach(struct mlx5_core_dev *mdev, void *vpriv) +{ + struct mlx5e_priv *priv = vpriv; + struct net_device *netdev = priv->netdev; + int err; + + if (netif_device_present(netdev)) + return 0; + + err = mlx5e_create_mdev_resources(mdev); + if (err) + return err; + + err = mlx5e_attach_netdev(mdev, netdev); + if (err) { + mlx5e_destroy_mdev_resources(mdev); + return err; + } + + return 0; +} + +static void mlx5e_detach(struct mlx5_core_dev *mdev, void *vpriv) +{ + struct mlx5e_priv *priv = vpriv; + struct net_device *netdev = priv->netdev; + + if (!netif_device_present(netdev)) + return; + + mlx5e_detach_netdev(mdev, netdev); + mlx5e_destroy_mdev_resources(mdev); +} + +static void *mlx5e_add(struct mlx5_core_dev *mdev) +{ + struct mlx5_eswitch *esw = mdev->priv.eswitch; + int total_vfs = MLX5_TOTAL_VPORTS(mdev); + void *ppriv = NULL; + void *priv; + int vport; + int err; + struct net_device *netdev; + + err = mlx5e_check_required_hca_cap(mdev); + if (err) + return NULL; + + mlx5e_register_vport_rep(mdev); + + if (MLX5_CAP_GEN(mdev, vport_group_manager)) + ppriv = &esw->offloads.vport_reps[0]; + + netdev = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, ppriv); + if (!netdev) { + mlx5_core_err(mdev, "mlx5e_create_netdev failed\n"); + goto err_unregister_reps; + } + + priv = netdev_priv(netdev); + + err = mlx5e_attach(mdev, priv); + if (err) { + mlx5_core_err(mdev, "mlx5e_attach failed, %d\n", err); + goto err_destroy_netdev; + } + + err = register_netdev(netdev); + if (err) { + mlx5_core_err(mdev, "register_netdev failed, %d\n", err); + goto err_detach; + } + + return priv; + +err_detach: + mlx5e_detach(mdev, priv); + +err_destroy_netdev: + mlx5e_destroy_netdev(mdev, priv); + +err_unregister_reps: + for (vport = 1; vport < total_vfs; vport++) + mlx5_eswitch_unregister_vport_rep(esw, vport); + + return NULL; +} + +void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv) +{ + const struct mlx5e_profile *profile = priv->profile; + struct net_device *netdev = priv->netdev; + + unregister_netdev(netdev); destroy_workqueue(priv->wq); if (profile->cleanup) profile->cleanup(priv); - - if (!test_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &mdev->intf_state)) - free_netdev(netdev); + free_netdev(netdev); } static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv) @@ -3572,12 +3657,11 @@ static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv) struct mlx5e_priv *priv = vpriv; int vport; - mlx5e_destroy_netdev(mdev, priv); - for (vport = 1; vport < total_vfs; vport++) mlx5_eswitch_unregister_vport_rep(esw, vport); - mlx5e_destroy_mdev_resources(mdev); + mlx5e_detach(mdev, vpriv); + mlx5e_destroy_netdev(mdev, priv); } static void *mlx5e_get_netdev(void *vpriv) @@ -3590,6 +3674,8 @@ static void *mlx5e_get_netdev(void *vpriv) static struct mlx5_interface mlx5e_interface = { .add = mlx5e_add, .remove = mlx5e_remove, + .attach = mlx5e_attach, + .detach = mlx5e_detach, .event = mlx5e_async_event, .protocol = MLX5_INTERFACE_PROTOCOL_ETH, .get_dev = mlx5e_get_netdev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 29db4735182a..3c97da103d30 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -413,19 +413,50 @@ static struct mlx5e_profile mlx5e_rep_profile = { int mlx5e_vport_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) { - rep->priv_data = mlx5e_create_netdev(esw->dev, &mlx5e_rep_profile, rep); - if (!rep->priv_data) { - mlx5_core_warn(esw->dev, "Failed to create representor for vport %d\n", - rep->vport); + struct net_device *netdev; + int err; + + netdev = mlx5e_create_netdev(esw->dev, &mlx5e_rep_profile, rep); + if (!netdev) { + pr_warn("Failed to create representor netdev for vport %d\n", + rep->vport); return -EINVAL; } + + rep->priv_data = netdev_priv(netdev); + + err = mlx5e_attach_netdev(esw->dev, netdev); + if (err) { + pr_warn("Failed to attach representor netdev for vport %d\n", + rep->vport); + goto err_destroy_netdev; + } + + err = register_netdev(netdev); + if (err) { + pr_warn("Failed to register representor netdev for vport %d\n", + rep->vport); + goto err_detach_netdev; + } + return 0; + +err_detach_netdev: + mlx5e_detach_netdev(esw->dev, netdev); + +err_destroy_netdev: + mlx5e_destroy_netdev(esw->dev, rep->priv_data); + + return err; + } void mlx5e_vport_rep_unload(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) { struct mlx5e_priv *priv = rep->priv_data; + struct net_device *netdev = priv->netdev; + mlx5e_detach_netdev(esw->dev, netdev); mlx5e_destroy_netdev(esw->dev, priv); } From 9df30601c843aeb9877c966d9d75d4947117c923 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Fri, 9 Sep 2016 17:35:26 +0300 Subject: [PATCH 0898/1715] net/mlx5e: Restore vlan filter after seamless reset When detaching the mlx5e interface clear all the vlans rules from the vlan flow table. When attaching it back restore all the active vlans rules to the HW. Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/en_fs.c | 38 ++++++++++++++++--- 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 1587a9fd5724..36fbc6b21a33 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -294,6 +294,36 @@ int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, return 0; } +static void mlx5e_add_vlan_rules(struct mlx5e_priv *priv) +{ + int i; + + mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); + + for_each_set_bit(i, priv->fs.vlan.active_vlans, VLAN_N_VID) { + mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, i); + } + + if (priv->fs.vlan.filter_disabled && + !(priv->netdev->flags & IFF_PROMISC)) + mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0); +} + +static void mlx5e_del_vlan_rules(struct mlx5e_priv *priv) +{ + int i; + + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); + + for_each_set_bit(i, priv->fs.vlan.active_vlans, VLAN_N_VID) { + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, i); + } + + if (priv->fs.vlan.filter_disabled && + !(priv->netdev->flags & IFF_PROMISC)) + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0); +} + #define mlx5e_for_each_hash_node(hn, tmp, hash, i) \ for (i = 0; i < MLX5E_L2_ADDR_HASH_SIZE; i++) \ hlist_for_each_entry_safe(hn, tmp, &hash[i], hlist) @@ -1024,14 +1054,10 @@ static int mlx5e_create_vlan_table(struct mlx5e_priv *priv) if (err) goto err_free_g; - err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); - if (err) - goto err_destroy_vlan_flow_groups; + mlx5e_add_vlan_rules(priv); return 0; -err_destroy_vlan_flow_groups: - mlx5e_destroy_groups(ft); err_free_g: kfree(ft->g); err_destroy_vlan_table: @@ -1043,6 +1069,7 @@ err_destroy_vlan_table: static void mlx5e_destroy_vlan_table(struct mlx5e_priv *priv) { + mlx5e_del_vlan_rules(priv); mlx5e_destroy_flow_table(&priv->fs.vlan.ft); } @@ -1100,7 +1127,6 @@ err_destroy_arfs_tables: void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv) { - mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); mlx5e_destroy_vlan_table(priv); mlx5e_destroy_l2_table(priv); mlx5e_destroy_ttc_table(priv); From f1ee87fe55c86d4c5adc804db15b3ed06169fba5 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Fri, 9 Sep 2016 17:35:27 +0300 Subject: [PATCH 0899/1715] net/mlx5: Organize device list API in one place Hide the exposed (external) mlx5_dev_list and mlx5_intf_mutex and expose an organized modular API to manage and manipulate mlx5 devices list. Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/Makefile | 2 +- drivers/net/ethernet/mellanox/mlx5/core/dev.c | 345 ++++++++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/lag.c | 24 +- .../net/ethernet/mellanox/mlx5/core/main.c | 270 -------------- .../ethernet/mellanox/mlx5/core/mlx5_core.h | 17 +- 5 files changed, 362 insertions(+), 296 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/dev.c diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index dad326ccd4dd..0343725d7f44 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -3,7 +3,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ - fs_counters.o rl.o lag.o + fs_counters.o rl.o lag.o dev.o mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \ en_main.o en_common.o en_fs.o en_ethtool.o en_tx.o \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c new file mode 100644 index 000000000000..a9dbc28f6b97 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -0,0 +1,345 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include "mlx5_core.h" + +static LIST_HEAD(intf_list); +static LIST_HEAD(mlx5_dev_list); +/* intf dev list mutex */ +static DEFINE_MUTEX(mlx5_intf_mutex); + +struct mlx5_device_context { + struct list_head list; + struct mlx5_interface *intf; + void *context; + unsigned long state; +}; + +enum { + MLX5_INTERFACE_ADDED, + MLX5_INTERFACE_ATTACHED, +}; + +void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) +{ + struct mlx5_device_context *dev_ctx; + struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); + + if (!mlx5_lag_intf_add(intf, priv)) + return; + + dev_ctx = kzalloc(sizeof(*dev_ctx), GFP_KERNEL); + if (!dev_ctx) + return; + + dev_ctx->intf = intf; + dev_ctx->context = intf->add(dev); + set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); + if (intf->attach) + set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state); + + if (dev_ctx->context) { + spin_lock_irq(&priv->ctx_lock); + list_add_tail(&dev_ctx->list, &priv->ctx_list); + spin_unlock_irq(&priv->ctx_lock); + } else { + kfree(dev_ctx); + } +} + +static struct mlx5_device_context *mlx5_get_device(struct mlx5_interface *intf, + struct mlx5_priv *priv) +{ + struct mlx5_device_context *dev_ctx; + + list_for_each_entry(dev_ctx, &priv->ctx_list, list) + if (dev_ctx->intf == intf) + return dev_ctx; + return NULL; +} + +void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv) +{ + struct mlx5_device_context *dev_ctx; + struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); + + dev_ctx = mlx5_get_device(intf, priv); + if (!dev_ctx) + return; + + spin_lock_irq(&priv->ctx_lock); + list_del(&dev_ctx->list); + spin_unlock_irq(&priv->ctx_lock); + + if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state)) + intf->remove(dev, dev_ctx->context); + + kfree(dev_ctx); +} + +static void mlx5_attach_interface(struct mlx5_interface *intf, struct mlx5_priv *priv) +{ + struct mlx5_device_context *dev_ctx; + struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); + + dev_ctx = mlx5_get_device(intf, priv); + if (!dev_ctx) + return; + + if (intf->attach) { + if (test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state)) + return; + intf->attach(dev, dev_ctx->context); + set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state); + } else { + if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state)) + return; + dev_ctx->context = intf->add(dev); + set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); + } +} + +void mlx5_attach_device(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + struct mlx5_interface *intf; + + mutex_lock(&mlx5_intf_mutex); + list_for_each_entry(intf, &intf_list, list) + mlx5_attach_interface(intf, priv); + mutex_unlock(&mlx5_intf_mutex); +} + +static void mlx5_detach_interface(struct mlx5_interface *intf, struct mlx5_priv *priv) +{ + struct mlx5_device_context *dev_ctx; + struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); + + dev_ctx = mlx5_get_device(intf, priv); + if (!dev_ctx) + return; + + if (intf->detach) { + if (!test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state)) + return; + intf->detach(dev, dev_ctx->context); + clear_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state); + } else { + if (!test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state)) + return; + intf->remove(dev, dev_ctx->context); + clear_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); + } +} + +void mlx5_detach_device(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + struct mlx5_interface *intf; + + mutex_lock(&mlx5_intf_mutex); + list_for_each_entry(intf, &intf_list, list) + mlx5_detach_interface(intf, priv); + mutex_unlock(&mlx5_intf_mutex); +} + +bool mlx5_device_registered(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv; + bool found = false; + + mutex_lock(&mlx5_intf_mutex); + list_for_each_entry(priv, &mlx5_dev_list, dev_list) + if (priv == &dev->priv) + found = true; + mutex_unlock(&mlx5_intf_mutex); + + return found; +} + +int mlx5_register_device(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + struct mlx5_interface *intf; + + mutex_lock(&mlx5_intf_mutex); + list_add_tail(&priv->dev_list, &mlx5_dev_list); + list_for_each_entry(intf, &intf_list, list) + mlx5_add_device(intf, priv); + mutex_unlock(&mlx5_intf_mutex); + + return 0; +} + +void mlx5_unregister_device(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + struct mlx5_interface *intf; + + mutex_lock(&mlx5_intf_mutex); + list_for_each_entry(intf, &intf_list, list) + mlx5_remove_device(intf, priv); + list_del(&priv->dev_list); + mutex_unlock(&mlx5_intf_mutex); +} + +int mlx5_register_interface(struct mlx5_interface *intf) +{ + struct mlx5_priv *priv; + + if (!intf->add || !intf->remove) + return -EINVAL; + + mutex_lock(&mlx5_intf_mutex); + list_add_tail(&intf->list, &intf_list); + list_for_each_entry(priv, &mlx5_dev_list, dev_list) + mlx5_add_device(intf, priv); + mutex_unlock(&mlx5_intf_mutex); + + return 0; +} +EXPORT_SYMBOL(mlx5_register_interface); + +void mlx5_unregister_interface(struct mlx5_interface *intf) +{ + struct mlx5_priv *priv; + + mutex_lock(&mlx5_intf_mutex); + list_for_each_entry(priv, &mlx5_dev_list, dev_list) + mlx5_remove_device(intf, priv); + list_del(&intf->list); + mutex_unlock(&mlx5_intf_mutex); +} +EXPORT_SYMBOL(mlx5_unregister_interface); + +void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol) +{ + struct mlx5_priv *priv = &mdev->priv; + struct mlx5_device_context *dev_ctx; + unsigned long flags; + void *result = NULL; + + spin_lock_irqsave(&priv->ctx_lock, flags); + + list_for_each_entry(dev_ctx, &mdev->priv.ctx_list, list) + if ((dev_ctx->intf->protocol == protocol) && + dev_ctx->intf->get_dev) { + result = dev_ctx->intf->get_dev(dev_ctx->context); + break; + } + + spin_unlock_irqrestore(&priv->ctx_lock, flags); + + return result; +} +EXPORT_SYMBOL(mlx5_get_protocol_dev); + +/* Must be called with intf_mutex held */ +void mlx5_add_dev_by_protocol(struct mlx5_core_dev *dev, int protocol) +{ + struct mlx5_interface *intf; + + list_for_each_entry(intf, &intf_list, list) + if (intf->protocol == protocol) { + mlx5_add_device(intf, &dev->priv); + break; + } +} + +/* Must be called with intf_mutex held */ +void mlx5_remove_dev_by_protocol(struct mlx5_core_dev *dev, int protocol) +{ + struct mlx5_interface *intf; + + list_for_each_entry(intf, &intf_list, list) + if (intf->protocol == protocol) { + mlx5_remove_device(intf, &dev->priv); + break; + } +} + +static u16 mlx5_gen_pci_id(struct mlx5_core_dev *dev) +{ + return (u16)((dev->pdev->bus->number << 8) | + PCI_SLOT(dev->pdev->devfn)); +} + +/* Must be called with intf_mutex held */ +struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev) +{ + u16 pci_id = mlx5_gen_pci_id(dev); + struct mlx5_core_dev *res = NULL; + struct mlx5_core_dev *tmp_dev; + struct mlx5_priv *priv; + + list_for_each_entry(priv, &mlx5_dev_list, dev_list) { + tmp_dev = container_of(priv, struct mlx5_core_dev, priv); + if ((dev != tmp_dev) && (mlx5_gen_pci_id(tmp_dev) == pci_id)) { + res = tmp_dev; + break; + } + } + + return res; +} + +void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, + unsigned long param) +{ + struct mlx5_priv *priv = &dev->priv; + struct mlx5_device_context *dev_ctx; + unsigned long flags; + + spin_lock_irqsave(&priv->ctx_lock, flags); + + list_for_each_entry(dev_ctx, &priv->ctx_list, list) + if (dev_ctx->intf->event) + dev_ctx->intf->event(dev, dev_ctx->context, event, param); + + spin_unlock_irqrestore(&priv->ctx_lock, flags); +} + +void mlx5_dev_list_lock(void) +{ + mutex_lock(&mlx5_intf_mutex); +} + +void mlx5_dev_list_unlock(void) +{ + mutex_unlock(&mlx5_intf_mutex); +} + +int mlx5_dev_list_trylock(void) +{ + return mutex_trylock(&mlx5_intf_mutex); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index 92c3e0dbcbdc..55957246c0e8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -277,7 +277,7 @@ static void mlx5_do_bond_work(struct work_struct *work) bond_work); int status; - status = mutex_trylock(&mlx5_intf_mutex); + status = mlx5_dev_list_trylock(); if (!status) { /* 1 sec delay. */ mlx5_queue_bond_work(ldev, HZ); @@ -285,7 +285,7 @@ static void mlx5_do_bond_work(struct work_struct *work) } mlx5_do_bond(ldev); - mutex_unlock(&mlx5_intf_mutex); + mlx5_dev_list_unlock(); } static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, @@ -466,35 +466,21 @@ static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev, mutex_unlock(&lag_mutex); } -static u16 mlx5_gen_pci_id(struct mlx5_core_dev *dev) -{ - return (u16)((dev->pdev->bus->number << 8) | - PCI_SLOT(dev->pdev->devfn)); -} /* Must be called with intf_mutex held */ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev) { struct mlx5_lag *ldev = NULL; struct mlx5_core_dev *tmp_dev; - struct mlx5_priv *priv; - u16 pci_id; if (!MLX5_CAP_GEN(dev, vport_group_manager) || !MLX5_CAP_GEN(dev, lag_master) || (MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS)) return; - pci_id = mlx5_gen_pci_id(dev); - - mlx5_core_for_each_priv(priv) { - tmp_dev = container_of(priv, struct mlx5_core_dev, priv); - if ((dev != tmp_dev) && - (mlx5_gen_pci_id(tmp_dev) == pci_id)) { - ldev = tmp_dev->priv.lag; - break; - } - } + tmp_dev = mlx5_get_next_phys_dev(dev); + if (tmp_dev) + ldev = tmp_dev->priv.lag; if (!ldev) { ldev = mlx5_lag_dev_alloc(); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 16660cf16e0d..d9c3c70b29e4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -72,18 +72,6 @@ static int prof_sel = MLX5_DEFAULT_PROF; module_param_named(prof_sel, prof_sel, int, 0444); MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2"); -static LIST_HEAD(intf_list); - -LIST_HEAD(mlx5_dev_list); -DEFINE_MUTEX(mlx5_intf_mutex); - -struct mlx5_device_context { - struct list_head list; - struct mlx5_interface *intf; - void *context; - unsigned long state; -}; - enum { MLX5_ATOMIC_REQ_MODE_BE = 0x0, MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS = 0x1, @@ -779,248 +767,6 @@ static int mlx5_core_set_issi(struct mlx5_core_dev *dev) return -ENOTSUPP; } -enum { - MLX5_INTERFACE_ADDED, - MLX5_INTERFACE_ATTACHED, -}; - -static void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) -{ - struct mlx5_device_context *dev_ctx; - struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); - - if (!mlx5_lag_intf_add(intf, priv)) - return; - - dev_ctx = kzalloc(sizeof(*dev_ctx), GFP_KERNEL); - if (!dev_ctx) - return; - - dev_ctx->intf = intf; - dev_ctx->context = intf->add(dev); - set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); - if (intf->attach) - set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state); - - if (dev_ctx->context) { - spin_lock_irq(&priv->ctx_lock); - list_add_tail(&dev_ctx->list, &priv->ctx_list); - spin_unlock_irq(&priv->ctx_lock); - } else { - kfree(dev_ctx); - } -} - -static struct mlx5_device_context *mlx5_get_device(struct mlx5_interface *intf, - struct mlx5_priv *priv) -{ - struct mlx5_device_context *dev_ctx; - - list_for_each_entry(dev_ctx, &priv->ctx_list, list) - if (dev_ctx->intf == intf) - return dev_ctx; - return NULL; -} - -static void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv) -{ - struct mlx5_device_context *dev_ctx; - struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); - - dev_ctx = mlx5_get_device(intf, priv); - if (!dev_ctx) - return; - - spin_lock_irq(&priv->ctx_lock); - list_del(&dev_ctx->list); - spin_unlock_irq(&priv->ctx_lock); - - if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state)) - intf->remove(dev, dev_ctx->context); - - kfree(dev_ctx); -} - -static void mlx5_attach_interface(struct mlx5_interface *intf, struct mlx5_priv *priv) -{ - struct mlx5_device_context *dev_ctx; - struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); - - dev_ctx = mlx5_get_device(intf, priv); - if (!dev_ctx) - return; - - if (intf->attach) { - if (test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state)) - return; - intf->attach(dev, dev_ctx->context); - set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state); - } else { - if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state)) - return; - dev_ctx->context = intf->add(dev); - set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); - } -} - -static void mlx5_attach_device(struct mlx5_core_dev *dev) -{ - struct mlx5_priv *priv = &dev->priv; - struct mlx5_interface *intf; - - mutex_lock(&mlx5_intf_mutex); - list_for_each_entry(intf, &intf_list, list) - mlx5_attach_interface(intf, priv); - mutex_unlock(&mlx5_intf_mutex); -} - -static void mlx5_detach_interface(struct mlx5_interface *intf, struct mlx5_priv *priv) -{ - struct mlx5_device_context *dev_ctx; - struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); - - dev_ctx = mlx5_get_device(intf, priv); - if (!dev_ctx) - return; - - if (intf->detach) { - if (!test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state)) - return; - intf->detach(dev, dev_ctx->context); - clear_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state); - } else { - if (!test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state)) - return; - intf->remove(dev, dev_ctx->context); - clear_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); - } -} - -static void mlx5_detach_device(struct mlx5_core_dev *dev) -{ - struct mlx5_priv *priv = &dev->priv; - struct mlx5_interface *intf; - - mutex_lock(&mlx5_intf_mutex); - list_for_each_entry(intf, &intf_list, list) - mlx5_detach_interface(intf, priv); - mutex_unlock(&mlx5_intf_mutex); -} - -static bool mlx5_device_registered(struct mlx5_core_dev *dev) -{ - struct mlx5_priv *priv; - bool found = false; - - mutex_lock(&mlx5_intf_mutex); - list_for_each_entry(priv, &mlx5_dev_list, dev_list) - if (priv == &dev->priv) - found = true; - mutex_unlock(&mlx5_intf_mutex); - - return found; -} - -static int mlx5_register_device(struct mlx5_core_dev *dev) -{ - struct mlx5_priv *priv = &dev->priv; - struct mlx5_interface *intf; - - mutex_lock(&mlx5_intf_mutex); - list_add_tail(&priv->dev_list, &mlx5_dev_list); - list_for_each_entry(intf, &intf_list, list) - mlx5_add_device(intf, priv); - mutex_unlock(&mlx5_intf_mutex); - - return 0; -} - -static void mlx5_unregister_device(struct mlx5_core_dev *dev) -{ - struct mlx5_priv *priv = &dev->priv; - struct mlx5_interface *intf; - - mutex_lock(&mlx5_intf_mutex); - list_for_each_entry(intf, &intf_list, list) - mlx5_remove_device(intf, priv); - list_del(&priv->dev_list); - mutex_unlock(&mlx5_intf_mutex); -} - -int mlx5_register_interface(struct mlx5_interface *intf) -{ - struct mlx5_priv *priv; - - if (!intf->add || !intf->remove) - return -EINVAL; - - mutex_lock(&mlx5_intf_mutex); - list_add_tail(&intf->list, &intf_list); - list_for_each_entry(priv, &mlx5_dev_list, dev_list) - mlx5_add_device(intf, priv); - mutex_unlock(&mlx5_intf_mutex); - - return 0; -} -EXPORT_SYMBOL(mlx5_register_interface); - -void mlx5_unregister_interface(struct mlx5_interface *intf) -{ - struct mlx5_priv *priv; - - mutex_lock(&mlx5_intf_mutex); - list_for_each_entry(priv, &mlx5_dev_list, dev_list) - mlx5_remove_device(intf, priv); - list_del(&intf->list); - mutex_unlock(&mlx5_intf_mutex); -} -EXPORT_SYMBOL(mlx5_unregister_interface); - -void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol) -{ - struct mlx5_priv *priv = &mdev->priv; - struct mlx5_device_context *dev_ctx; - unsigned long flags; - void *result = NULL; - - spin_lock_irqsave(&priv->ctx_lock, flags); - - list_for_each_entry(dev_ctx, &mdev->priv.ctx_list, list) - if ((dev_ctx->intf->protocol == protocol) && - dev_ctx->intf->get_dev) { - result = dev_ctx->intf->get_dev(dev_ctx->context); - break; - } - - spin_unlock_irqrestore(&priv->ctx_lock, flags); - - return result; -} -EXPORT_SYMBOL(mlx5_get_protocol_dev); - -/* Must be called with intf_mutex held */ -void mlx5_add_dev_by_protocol(struct mlx5_core_dev *dev, int protocol) -{ - struct mlx5_interface *intf; - - list_for_each_entry(intf, &intf_list, list) - if (intf->protocol == protocol) { - mlx5_add_device(intf, &dev->priv); - break; - } -} - -/* Must be called with intf_mutex held */ -void mlx5_remove_dev_by_protocol(struct mlx5_core_dev *dev, int protocol) -{ - struct mlx5_interface *intf; - - list_for_each_entry(intf, &intf_list, list) - if (intf->protocol == protocol) { - mlx5_remove_device(intf, &dev->priv); - break; - } -} static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv) { @@ -1446,22 +1192,6 @@ out: return err; } -void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, - unsigned long param) -{ - struct mlx5_priv *priv = &dev->priv; - struct mlx5_device_context *dev_ctx; - unsigned long flags; - - spin_lock_irqsave(&priv->ctx_lock, flags); - - list_for_each_entry(dev_ctx, &priv->ctx_list, list) - if (dev_ctx->intf->event) - dev_ctx->intf->event(dev, dev_ctx->context, event, param); - - spin_unlock_irqrestore(&priv->ctx_lock, flags); -} - struct mlx5_core_event_handler { void (*event)(struct mlx5_core_dev *dev, enum mlx5_dev_event event, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 04b719a53313..3d0cfb9f18f9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -46,9 +46,6 @@ extern int mlx5_core_debug_mask; -extern struct list_head mlx5_dev_list; -extern struct mutex mlx5_intf_mutex; - #define mlx5_core_dbg(__dev, format, ...) \ dev_dbg(&(__dev)->pdev->dev, "%s:%s:%d:(pid %d): " format, \ (__dev)->priv.name, __func__, __LINE__, current->pid, \ @@ -73,9 +70,6 @@ do { \ #define mlx5_core_info(__dev, format, ...) \ dev_info(&(__dev)->pdev->dev, format, ##__VA_ARGS__) -#define mlx5_core_for_each_priv(__priv) \ - list_for_each_entry(__priv, &mlx5_dev_list, dev_list) - enum { MLX5_CMD_DATA, /* print command payload only */ MLX5_CMD_TIME, /* print command execution time */ @@ -106,8 +100,19 @@ void mlx5_cq_tasklet_cb(unsigned long data); void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev); void mlx5_lag_remove(struct mlx5_core_dev *dev); +void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv); +void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv); +void mlx5_attach_device(struct mlx5_core_dev *dev); +void mlx5_detach_device(struct mlx5_core_dev *dev); +bool mlx5_device_registered(struct mlx5_core_dev *dev); +int mlx5_register_device(struct mlx5_core_dev *dev); +void mlx5_unregister_device(struct mlx5_core_dev *dev); void mlx5_add_dev_by_protocol(struct mlx5_core_dev *dev, int protocol); void mlx5_remove_dev_by_protocol(struct mlx5_core_dev *dev, int protocol); +struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev); +void mlx5_dev_list_lock(void); +void mlx5_dev_list_unlock(void); +int mlx5_dev_list_trylock(void); bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv); From 3a8963acc70e69606729404713cfa9a03b58b18c Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 9 Sep 2016 12:45:24 -0700 Subject: [PATCH 0900/1715] Revert "hv_netvsc: make inline functions static" These functions are used by other code misc-next tree. This reverts commit 30d1de08c87ddde6f73936c3350e7e153988fe02. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 85 +------------------------------------ include/linux/hyperv.h | 84 ++++++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+), 84 deletions(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 2a9ccc4d9e3c..ff05b9b0837f 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -33,89 +33,6 @@ #include "hyperv_net.h" -/* - * An API to support in-place processing of incoming VMBUS packets. - */ -#define VMBUS_PKT_TRAILER 8 - -static struct vmpacket_descriptor * -get_next_pkt_raw(struct vmbus_channel *channel) -{ - struct hv_ring_buffer_info *ring_info = &channel->inbound; - u32 read_loc = ring_info->priv_read_index; - void *ring_buffer = hv_get_ring_buffer(ring_info); - struct vmpacket_descriptor *cur_desc; - u32 packetlen; - u32 dsize = ring_info->ring_datasize; - u32 delta = read_loc - ring_info->ring_buffer->read_index; - u32 bytes_avail_toread = (hv_get_bytes_to_read(ring_info) - delta); - - if (bytes_avail_toread < sizeof(struct vmpacket_descriptor)) - return NULL; - - if ((read_loc + sizeof(*cur_desc)) > dsize) - return NULL; - - cur_desc = ring_buffer + read_loc; - packetlen = cur_desc->len8 << 3; - - /* - * If the packet under consideration is wrapping around, - * return failure. - */ - if ((read_loc + packetlen + VMBUS_PKT_TRAILER) > (dsize - 1)) - return NULL; - - return cur_desc; -} - -/* - * A helper function to step through packets "in-place" - * This API is to be called after each successful call - * get_next_pkt_raw(). - */ -static void put_pkt_raw(struct vmbus_channel *channel, - struct vmpacket_descriptor *desc) -{ - struct hv_ring_buffer_info *ring_info = &channel->inbound; - u32 read_loc = ring_info->priv_read_index; - u32 packetlen = desc->len8 << 3; - u32 dsize = ring_info->ring_datasize; - - BUG_ON((read_loc + packetlen + VMBUS_PKT_TRAILER) > dsize); - - /* - * Include the packet trailer. - */ - ring_info->priv_read_index += packetlen + VMBUS_PKT_TRAILER; -} - -/* - * This call commits the read index and potentially signals the host. - * Here is the pattern for using the "in-place" consumption APIs: - * - * while (get_next_pkt_raw() { - * process the packet "in-place"; - * put_pkt_raw(); - * } - * if (packets processed in place) - * commit_rd_index(); - */ -static void commit_rd_index(struct vmbus_channel *channel) -{ - struct hv_ring_buffer_info *ring_info = &channel->inbound; - /* - * Make sure all reads are done before we update the read index since - * the writer may start writing to the read area once the read index - * is updated. - */ - virt_rmb(); - ring_info->ring_buffer->read_index = ring_info->priv_read_index; - - if (hv_need_to_signal_on_read(ring_info)) - vmbus_set_event(channel); -} - /* * Switch the data path from the synthetic interface to the VF * interface. @@ -840,7 +757,7 @@ static u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device, return msg_size; } -static int netvsc_send_pkt( +static inline int netvsc_send_pkt( struct hv_device *device, struct hv_netvsc_packet *packet, struct netvsc_device *net_device, diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index b01c8c3dd531..5df444b1ac18 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1429,4 +1429,88 @@ static inline bool hv_need_to_signal_on_read(struct hv_ring_buffer_info *rbi) return false; } +/* + * An API to support in-place processing of incoming VMBUS packets. + */ +#define VMBUS_PKT_TRAILER 8 + +static inline struct vmpacket_descriptor * +get_next_pkt_raw(struct vmbus_channel *channel) +{ + struct hv_ring_buffer_info *ring_info = &channel->inbound; + u32 read_loc = ring_info->priv_read_index; + void *ring_buffer = hv_get_ring_buffer(ring_info); + struct vmpacket_descriptor *cur_desc; + u32 packetlen; + u32 dsize = ring_info->ring_datasize; + u32 delta = read_loc - ring_info->ring_buffer->read_index; + u32 bytes_avail_toread = (hv_get_bytes_to_read(ring_info) - delta); + + if (bytes_avail_toread < sizeof(struct vmpacket_descriptor)) + return NULL; + + if ((read_loc + sizeof(*cur_desc)) > dsize) + return NULL; + + cur_desc = ring_buffer + read_loc; + packetlen = cur_desc->len8 << 3; + + /* + * If the packet under consideration is wrapping around, + * return failure. + */ + if ((read_loc + packetlen + VMBUS_PKT_TRAILER) > (dsize - 1)) + return NULL; + + return cur_desc; +} + +/* + * A helper function to step through packets "in-place" + * This API is to be called after each successful call + * get_next_pkt_raw(). + */ +static inline void put_pkt_raw(struct vmbus_channel *channel, + struct vmpacket_descriptor *desc) +{ + struct hv_ring_buffer_info *ring_info = &channel->inbound; + u32 read_loc = ring_info->priv_read_index; + u32 packetlen = desc->len8 << 3; + u32 dsize = ring_info->ring_datasize; + + if ((read_loc + packetlen + VMBUS_PKT_TRAILER) > dsize) + BUG(); + /* + * Include the packet trailer. + */ + ring_info->priv_read_index += packetlen + VMBUS_PKT_TRAILER; +} + +/* + * This call commits the read index and potentially signals the host. + * Here is the pattern for using the "in-place" consumption APIs: + * + * while (get_next_pkt_raw() { + * process the packet "in-place"; + * put_pkt_raw(); + * } + * if (packets processed in place) + * commit_rd_index(); + */ +static inline void commit_rd_index(struct vmbus_channel *channel) +{ + struct hv_ring_buffer_info *ring_info = &channel->inbound; + /* + * Make sure all reads are done before we update the read index since + * the writer may start writing to the read area once the read index + * is updated. + */ + virt_rmb(); + ring_info->ring_buffer->read_index = ring_info->priv_read_index; + + if (hv_need_to_signal_on_read(ring_info)) + vmbus_set_event(channel); +} + + #endif /* _HYPERV_H */ From ed227099dac95128e2aecd62af51bb9d922e5977 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Fri, 9 Sep 2016 17:42:30 -0300 Subject: [PATCH 0901/1715] openvswitch: use alias for genetlink family names When userspace tries to create datapaths and the module is not loaded, it will simply fail. With this patch, the module will be automatically loaded. Signed-off-by: Thadeu Lima de Souza Cascardo Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/datapath.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 524c0fd3078e..0536ab3504d5 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -2437,3 +2437,7 @@ module_exit(dp_cleanup); MODULE_DESCRIPTION("Open vSwitch switching datapath"); MODULE_LICENSE("GPL"); +MODULE_ALIAS_GENL_FAMILY(OVS_DATAPATH_FAMILY); +MODULE_ALIAS_GENL_FAMILY(OVS_VPORT_FAMILY); +MODULE_ALIAS_GENL_FAMILY(OVS_FLOW_FAMILY); +MODULE_ALIAS_GENL_FAMILY(OVS_PACKET_FAMILY); From 2594a2a928a010bf27e6545f90bc2de7ed5ed075 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 9 Sep 2016 14:22:45 -0700 Subject: [PATCH 0902/1715] tcp: better use ooo_last_skb in tcp_data_queue_ofo() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Willem noticed that we could avoid an rbtree lookup if the the attempt to coalesce incoming skb to the last skb failed for some reason. Since most ooo additions are at the tail, this is definitely worth adding a test and fast path. Suggested-by: Willem de Bruijn Signed-off-by: Eric Dumazet Cc: Yaogong Wang Cc: Yuchung Cheng Cc: Neal Cardwell Cc: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a5934c4c8cd4..70b892db9901 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4461,6 +4461,12 @@ coalesce_done: skb = NULL; goto add_sack; } + /* Can avoid an rbtree lookup if we are adding skb after ooo_last_skb */ + if (!before(seq, TCP_SKB_CB(tp->ooo_last_skb)->end_seq)) { + parent = &tp->ooo_last_skb->rbnode; + p = &parent->rb_right; + goto insert; + } /* Find place to insert this segment. Handle overlaps on the way. */ parent = NULL; @@ -4503,7 +4509,7 @@ coalesce_done: } p = &parent->rb_right; } - +insert: /* Insert segment into RB tree. */ rb_link_node(&skb->rbnode, parent, p); rb_insert_color(&skb->rbnode, &tp->out_of_order_queue); From 78706121d59d0692534d087df6eefef5469fc5a1 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sat, 10 Sep 2016 08:02:06 +0200 Subject: [PATCH 0903/1715] ATM-nicstar: Use kmalloc_array() in get_scq() * A multiplication for the size determination of a memory allocation indicated that an array data structure should be processed. Thus use the corresponding function "kmalloc_array". This issue was detected by using the Coccinelle software. * Replace the specification of a data type by a pointer dereference to make the corresponding size determination a bit safer according to the Linux coding style convention. Signed-off-by: Markus Elfring Signed-off-by: David S. Miller --- drivers/atm/nicstar.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c index 700ed15c2362..50dec1349e14 100644 --- a/drivers/atm/nicstar.c +++ b/drivers/atm/nicstar.c @@ -871,8 +871,9 @@ static scq_info *get_scq(ns_dev *card, int size, u32 scd) kfree(scq); return NULL; } - scq->skb = kmalloc(sizeof(struct sk_buff *) * - (size / NS_SCQE_SIZE), GFP_KERNEL); + scq->skb = kmalloc_array(size / NS_SCQE_SIZE, + sizeof(*scq->skb), + GFP_KERNEL); if (!scq->skb) { dma_free_coherent(&card->pcidev->dev, 2 * size, scq->org, scq->dma); From 24310fd565b16ec7c39e3db5b0240f0cdbbd0475 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sat, 10 Sep 2016 08:18:10 +0200 Subject: [PATCH 0904/1715] ATM-nicstar: Improve another size determination in get_scq() Replace the specification of a data structure by a pointer dereference as the parameter for the operator "sizeof" to make the corresponding size determination a bit safer according to the Linux coding style convention. Signed-off-by: Markus Elfring Signed-off-by: David S. Miller --- drivers/atm/nicstar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c index 50dec1349e14..96062e10c8eb 100644 --- a/drivers/atm/nicstar.c +++ b/drivers/atm/nicstar.c @@ -862,7 +862,7 @@ static scq_info *get_scq(ns_dev *card, int size, u32 scd) if (size != VBR_SCQSIZE && size != CBR_SCQSIZE) return NULL; - scq = kmalloc(sizeof(scq_info), GFP_KERNEL); + scq = kmalloc(sizeof(*scq), GFP_KERNEL); if (!scq) return NULL; scq->org = dma_alloc_coherent(&card->pcidev->dev, From ee41f07c2f2c944465ade7c9591a6139a783be41 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sat, 10 Sep 2016 08:30:09 +0200 Subject: [PATCH 0905/1715] ATM-nicstar: Improve another size determination in ns_init_card() Replace the specification of a data structure by a reference for a field in a local variable as the parameter for the operator "sizeof" to make the corresponding size determination a bit safer according to the Linux coding style convention. Signed-off-by: Markus Elfring Signed-off-by: David S. Miller --- drivers/atm/nicstar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c index 96062e10c8eb..ef977bf54a40 100644 --- a/drivers/atm/nicstar.c +++ b/drivers/atm/nicstar.c @@ -611,7 +611,7 @@ static int ns_init_card(int i, struct pci_dev *pcidev) for (j = 0; j < card->rct_size; j++) ns_write_sram(card, j * 4, u32d, 4); - memset(card->vcmap, 0, NS_MAX_RCTSIZE * sizeof(vc_map)); + memset(card->vcmap, 0, sizeof(card->vcmap)); for (j = 0; j < NS_FRSCD_NUM; j++) card->scd2vc[j] = NULL; From 304f0a4edbbb2d273d7c6043df4126b623cd472c Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sat, 10 Sep 2016 08:48:17 +0200 Subject: [PATCH 0906/1715] ATM-nicstar: Refactor a kmalloc() call in ns_init_card() * The script "checkpatch.pl" can point out that assignments should usually not be performed within condition checks. Thus move an assignment for a local variable to a separate statement in this function. * Replace the specification of a data structure by a pointer dereference as the parameter for the operator "sizeof" to make the corresponding size determination a bit safer according to the Linux coding style convention. Signed-off-by: Markus Elfring Signed-off-by: David S. Miller --- drivers/atm/nicstar.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c index ef977bf54a40..04f5781dca30 100644 --- a/drivers/atm/nicstar.c +++ b/drivers/atm/nicstar.c @@ -370,7 +370,8 @@ static int ns_init_card(int i, struct pci_dev *pcidev) return error; } - if ((card = kmalloc(sizeof(ns_dev), GFP_KERNEL)) == NULL) { + card = kmalloc(sizeof(*card), GFP_KERNEL); + if (!card) { printk ("nicstar%d: can't allocate memory for device structure.\n", i); From 0ba8abb770a8771ffb05cce2a5a7441530f06a55 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sat, 10 Sep 2016 08:56:03 +0200 Subject: [PATCH 0907/1715] ATM-nicstar: Refactor a dev_alloc_skb() call in dequeue_rx() The script "checkpatch.pl" can point out that assignments should usually not be performed within condition checks. Thus move an assignment for a local variable to a separate statement in this function. Signed-off-by: Markus Elfring Signed-off-by: David S. Miller --- drivers/atm/nicstar.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c index 04f5781dca30..c7296b583787 100644 --- a/drivers/atm/nicstar.c +++ b/drivers/atm/nicstar.c @@ -2023,7 +2023,8 @@ static void dequeue_rx(ns_dev * card, ns_rsqe * rsqe) cell = skb->data; for (i = ns_rsqe_cellcount(rsqe); i; i--) { - if ((sb = dev_alloc_skb(NS_SMSKBSIZE)) == NULL) { + sb = dev_alloc_skb(NS_SMSKBSIZE); + if (!sb) { printk ("nicstar%d: Can't allocate buffers for aal0.\n", card->index); From 32230ac1ccbd66f36bd6955eddc45fc06861c1b5 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sat, 10 Sep 2016 09:55:53 +0200 Subject: [PATCH 0908/1715] ATM-ZeitNet: Use kmalloc_array() in start_tx() * A multiplication for the size determination of a memory allocation indicated that an array data structure should be processed. Thus use the corresponding function "kmalloc_array". This issue was detected by using the Coccinelle software. * Replace the specification of a data type by a pointer dereference to make the corresponding size determination a bit safer according to the Linux coding style convention. Signed-off-by: Markus Elfring Signed-off-by: David S. Miller --- drivers/atm/zatm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c index cecfb943762f..d378ff2d3925 100644 --- a/drivers/atm/zatm.c +++ b/drivers/atm/zatm.c @@ -998,8 +998,9 @@ static int start_tx(struct atm_dev *dev) DPRINTK("start_tx\n"); zatm_dev = ZATM_DEV(dev); - zatm_dev->tx_map = kmalloc(sizeof(struct atm_vcc *)* - zatm_dev->chans,GFP_KERNEL); + zatm_dev->tx_map = kmalloc_array(zatm_dev->chans, + sizeof(*zatm_dev->tx_map), + GFP_KERNEL); if (!zatm_dev->tx_map) return -ENOMEM; zatm_dev->tx_bw = ATM_OC3_PCR; zatm_dev->free_shapers = (1 << NR_SHAPERS)-1; From 5ad3ea3d3952dcbb8047f97fbfa49804ea53a53a Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sat, 10 Sep 2016 10:07:38 +0200 Subject: [PATCH 0909/1715] ATM-ZeitNet: Improve a size determination in zatm_open() Replace the specification of a data structure by a pointer dereference as the parameter for the operator "sizeof" to make the corresponding size determination a bit safer according to the Linux coding style convention. Signed-off-by: Markus Elfring Signed-off-by: David S. Miller --- drivers/atm/zatm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c index d378ff2d3925..218c6af60f21 100644 --- a/drivers/atm/zatm.c +++ b/drivers/atm/zatm.c @@ -1399,7 +1399,7 @@ static int zatm_open(struct atm_vcc *vcc) DPRINTK(DEV_LABEL "(itf %d): open %d.%d\n",vcc->dev->number,vcc->vpi, vcc->vci); if (!test_bit(ATM_VF_PARTIAL,&vcc->flags)) { - zatm_vcc = kmalloc(sizeof(struct zatm_vcc),GFP_KERNEL); + zatm_vcc = kmalloc(sizeof(*zatm_vcc), GFP_KERNEL); if (!zatm_vcc) { clear_bit(ATM_VF_ADDR,&vcc->flags); return -ENOMEM; From 0f0d0ed0870eca21e36dc520d7d9be292c103f80 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sat, 10 Sep 2016 10:21:15 +0200 Subject: [PATCH 0910/1715] ATM-ZeitNet: Replace one kzalloc() call by kcalloc() * The script "checkpatch.pl" can point information out like the following. WARNING: Prefer kcalloc over kzalloc with multiply Thus fix the affected source code place. * Replace the specification of a data type by a pointer dereference to make the corresponding size determination a bit safer according to the Linux coding style convention. * Delete the local variable "size" which became unnecessary with this refactoring. Signed-off-by: Markus Elfring Signed-off-by: David S. Miller --- drivers/atm/zatm.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c index 218c6af60f21..2cc9e2a63d3e 100644 --- a/drivers/atm/zatm.c +++ b/drivers/atm/zatm.c @@ -598,12 +598,13 @@ static void close_rx(struct atm_vcc *vcc) static int start_rx(struct atm_dev *dev) { struct zatm_dev *zatm_dev; - int size,i; + int i; DPRINTK("start_rx\n"); zatm_dev = ZATM_DEV(dev); - size = sizeof(struct atm_vcc *)*zatm_dev->chans; - zatm_dev->rx_map = kzalloc(size,GFP_KERNEL); + zatm_dev->rx_map = kcalloc(zatm_dev->chans, + sizeof(*zatm_dev->rx_map), + GFP_KERNEL); if (!zatm_dev->rx_map) return -ENOMEM; /* set VPI/VCI split (use all VCIs and give what's left to VPIs) */ zpokel(zatm_dev,(1 << dev->ci_range.vci_bits)-1,uPD98401_VRR); From cf9932a9414e241571008edd7412ab22f02b5704 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sat, 10 Sep 2016 10:38:04 +0200 Subject: [PATCH 0911/1715] ATM-ZeitNet: Fix indentation for one DPRINTK() call in start_rx() Adjust the indentation for a call of the macro "DPRINTK" in this function. Signed-off-by: Markus Elfring Signed-off-by: David S. Miller --- drivers/atm/zatm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c index 2cc9e2a63d3e..d3dc95484161 100644 --- a/drivers/atm/zatm.c +++ b/drivers/atm/zatm.c @@ -600,7 +600,7 @@ static int start_rx(struct atm_dev *dev) struct zatm_dev *zatm_dev; int i; -DPRINTK("start_rx\n"); + DPRINTK("start_rx\n"); zatm_dev = ZATM_DEV(dev); zatm_dev->rx_map = kcalloc(zatm_dev->chans, sizeof(*zatm_dev->rx_map), From 9ee0034b8f49aaaa7e7c2da8db1038915db99c19 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 10 Sep 2016 12:09:52 -0700 Subject: [PATCH 0912/1715] net: flow: Add l3mdev flow update Add l3mdev hook to set FLOWI_FLAG_SKIP_NH_OIF flag and update oif/iif in flow struct if its oif or iif points to a device enslaved to an L3 Master device. Only 1 needs to be converted to match the l3mdev FIB rule. This moves the flow adjustment for l3mdev to a single point catching all lookups. It is redundant for existing hooks (those are removed in later patches) but is needed for missed lookups such as PMTU updates. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/l3mdev.h | 6 ++++++ net/ipv4/fib_rules.c | 3 +++ net/ipv6/fib6_rules.c | 3 +++ net/l3mdev/l3mdev.c | 35 +++++++++++++++++++++++++++++++++++ 4 files changed, 47 insertions(+) diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index e90095091aa0..81e175e80537 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -49,6 +49,8 @@ struct l3mdev_ops { int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, struct fib_lookup_arg *arg); +void l3mdev_update_flow(struct net *net, struct flowi *fl); + int l3mdev_master_ifindex_rcu(const struct net_device *dev); static inline int l3mdev_master_ifindex(struct net_device *dev) { @@ -290,6 +292,10 @@ int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, { return 1; } +static inline +void l3mdev_update_flow(struct net *net, struct flowi *fl) +{ +} #endif #endif /* _NET_L3MDEV_H_ */ diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 6e9ea69e5f75..770bebed6b28 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -56,6 +56,9 @@ int __fib_lookup(struct net *net, struct flowi4 *flp, }; int err; + /* update flow if oif or iif point to device enslaved to l3mdev */ + l3mdev_update_flow(net, flowi4_to_flowi(flp)); + err = fib_rules_lookup(net->ipv4.rules_ops, flowi4_to_flowi(flp), 0, &arg); #ifdef CONFIG_IP_ROUTE_CLASSID if (arg.rule) diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 5857c1fc8b67..eea23b57c6a5 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -38,6 +38,9 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, .flags = FIB_LOOKUP_NOREF, }; + /* update flow if oif or iif point to device enslaved to l3mdev */ + l3mdev_update_flow(net, flowi6_to_flowi(fl6)); + fib_rules_lookup(net->ipv6.fib6_rules_ops, flowi6_to_flowi(fl6), flags, &arg); diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c index c4a1c3e84e12..43610e5acc4e 100644 --- a/net/l3mdev/l3mdev.c +++ b/net/l3mdev/l3mdev.c @@ -222,3 +222,38 @@ out: return rc; } + +void l3mdev_update_flow(struct net *net, struct flowi *fl) +{ + struct net_device *dev; + int ifindex; + + rcu_read_lock(); + + if (fl->flowi_oif) { + dev = dev_get_by_index_rcu(net, fl->flowi_oif); + if (dev) { + ifindex = l3mdev_master_ifindex_rcu(dev); + if (ifindex) { + fl->flowi_oif = ifindex; + fl->flowi_flags |= FLOWI_FLAG_SKIP_NH_OIF; + goto out; + } + } + } + + if (fl->flowi_iif) { + dev = dev_get_by_index_rcu(net, fl->flowi_iif); + if (dev) { + ifindex = l3mdev_master_ifindex_rcu(dev); + if (ifindex) { + fl->flowi_iif = ifindex; + fl->flowi_flags |= FLOWI_FLAG_SKIP_NH_OIF; + } + } + } + +out: + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(l3mdev_update_flow); From a8e3e1a9f02094145580ea7920c6a1d9aabd5539 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 10 Sep 2016 12:09:53 -0700 Subject: [PATCH 0913/1715] net: l3mdev: Add hook to output path This patch adds the infrastructure to the output path to pass an skb to an l3mdev device if it has a hook registered. This is the Tx parallel to l3mdev_ip{6}_rcv in the receive path and is the basis for removing the existing hook that returns the vrf dst on the fib lookup. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/l3mdev.h | 48 ++++++++++++++++++++++++++++++++++++++++++ net/ipv4/ip_output.c | 8 +++++++ net/ipv6/ip6_output.c | 8 +++++++ net/ipv6/output_core.c | 7 ++++++ net/ipv6/raw.c | 7 ++++++ 5 files changed, 78 insertions(+) diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 81e175e80537..53d5274920e3 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -11,6 +11,7 @@ #ifndef _NET_L3MDEV_H_ #define _NET_L3MDEV_H_ +#include #include /** @@ -18,6 +19,10 @@ * * @l3mdev_fib_table: Get FIB table id to use for lookups * + * @l3mdev_l3_rcv: Hook in L3 receive path + * + * @l3mdev_l3_out: Hook in L3 output path + * * @l3mdev_get_rtable: Get cached IPv4 rtable (dst_entry) for device * * @l3mdev_get_saddr: Get source address for a flow @@ -29,6 +34,9 @@ struct l3mdev_ops { u32 (*l3mdev_fib_table)(const struct net_device *dev); struct sk_buff * (*l3mdev_l3_rcv)(struct net_device *dev, struct sk_buff *skb, u16 proto); + struct sk_buff * (*l3mdev_l3_out)(struct net_device *dev, + struct sock *sk, struct sk_buff *skb, + u16 proto); /* IPv4 ops */ struct rtable * (*l3mdev_get_rtable)(const struct net_device *dev, @@ -201,6 +209,34 @@ struct sk_buff *l3mdev_ip6_rcv(struct sk_buff *skb) return l3mdev_l3_rcv(skb, AF_INET6); } +static inline +struct sk_buff *l3mdev_l3_out(struct sock *sk, struct sk_buff *skb, u16 proto) +{ + struct net_device *dev = skb_dst(skb)->dev; + + if (netif_is_l3_slave(dev)) { + struct net_device *master; + + master = netdev_master_upper_dev_get_rcu(dev); + if (master && master->l3mdev_ops->l3mdev_l3_out) + skb = master->l3mdev_ops->l3mdev_l3_out(master, sk, + skb, proto); + } + + return skb; +} + +static inline +struct sk_buff *l3mdev_ip_out(struct sock *sk, struct sk_buff *skb) +{ + return l3mdev_l3_out(sk, skb, AF_INET); +} + +static inline +struct sk_buff *l3mdev_ip6_out(struct sock *sk, struct sk_buff *skb) +{ + return l3mdev_l3_out(sk, skb, AF_INET6); +} #else static inline int l3mdev_master_ifindex_rcu(const struct net_device *dev) @@ -286,6 +322,18 @@ struct sk_buff *l3mdev_ip6_rcv(struct sk_buff *skb) return skb; } +static inline +struct sk_buff *l3mdev_ip_out(struct sock *sk, struct sk_buff *skb) +{ + return skb; +} + +static inline +struct sk_buff *l3mdev_ip6_out(struct sock *sk, struct sk_buff *skb) +{ + return skb; +} + static inline int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, struct fib_lookup_arg *arg) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index b913f5bf0757..41e10e34769c 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -99,6 +99,14 @@ int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) iph->tot_len = htons(skb->len); ip_send_check(iph); + + /* if egress device is enslaved to an L3 master device pass the + * skb to its handler for processing + */ + skb = l3mdev_ip_out(sk, skb); + if (unlikely(!skb)) + return 0; + return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk, skb, NULL, skb_dst(skb)->dev, dst_output); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 993fd9666f1b..6ea6caace3a8 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -236,6 +236,14 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUT, skb->len); + + /* if egress device is enslaved to an L3 master device pass the + * skb to its handler for processing + */ + skb = l3mdev_ip6_out((struct sock *)sk, skb); + if (unlikely(!skb)) + return 0; + /* hooks should never assume socket lock is held. * we promote our socket to non const */ diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index 462f2a76b5c2..7cca8ac66fe9 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -148,6 +148,13 @@ int __ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) ipv6_hdr(skb)->payload_len = htons(len); IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); + /* if egress device is enslaved to an L3 master device pass the + * skb to its handler for processing + */ + skb = l3mdev_ip6_out(sk, skb); + if (unlikely(!skb)) + return 0; + return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, skb_dst(skb)->dev, dst_output); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 590dd1f7746f..54404f08efcc 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -653,6 +653,13 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, if (err) goto error_fault; + /* if egress device is enslaved to an L3 master device pass the + * skb to its handler for processing + */ + skb = l3mdev_ip6_out(sk, skb); + if (unlikely(!skb)) + return 0; + IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, rt->dst.dev, dst_output); From 5f02ce24c2696fec33f2a5dfcf753996f5fdd211 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 10 Sep 2016 12:09:54 -0700 Subject: [PATCH 0914/1715] net: l3mdev: Allow the l3mdev to be a loopback Allow an L3 master device to act as the loopback for that L3 domain. For IPv4 the device can also have the address 127.0.0.1. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/l3mdev.h | 6 +++--- net/ipv4/route.c | 8 ++++++-- net/ipv6/route.c | 12 ++++++++++-- 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 53d5274920e3..3ee110518584 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -90,7 +90,7 @@ static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex) } static inline -const struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev) +struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev) { /* netdev_master_upper_dev_get_rcu calls * list_first_or_null_rcu to walk the upper dev list. @@ -99,7 +99,7 @@ const struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev) * typecast to remove the const */ struct net_device *dev = (struct net_device *)_dev; - const struct net_device *master; + struct net_device *master; if (!dev) return NULL; @@ -254,7 +254,7 @@ static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex) } static inline -const struct net_device *l3mdev_master_dev_rcu(const struct net_device *dev) +struct net_device *l3mdev_master_dev_rcu(const struct net_device *dev) { return NULL; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 3e992783c1d0..f49b2c534e92 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2018,7 +2018,9 @@ static struct rtable *__mkroute_output(const struct fib_result *res, return ERR_PTR(-EINVAL); if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev))) - if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK)) + if (ipv4_is_loopback(fl4->saddr) && + !(dev_out->flags & IFF_LOOPBACK) && + !netif_is_l3_master(dev_out)) return ERR_PTR(-EINVAL); if (ipv4_is_lbcast(fl4->daddr)) @@ -2302,7 +2304,9 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, else fl4->saddr = fl4->daddr; } - dev_out = net->loopback_dev; + + /* L3 master device is the loopback for that domain */ + dev_out = l3mdev_master_dev_rcu(dev_out) ? : net->loopback_dev; fl4->flowi4_oif = dev_out->ifindex; flags |= RTCF_LOCAL; goto make_route; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 09d43ff11a8d..2c681113c055 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2558,8 +2558,16 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, { u32 tb_id; struct net *net = dev_net(idev->dev); - struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, - DST_NOCOUNT); + struct net_device *dev = net->loopback_dev; + struct rt6_info *rt; + + /* use L3 Master device as loopback for host routes if device + * is enslaved and address is not link local or multicast + */ + if (!rt6_need_strict(addr)) + dev = l3mdev_master_dev_rcu(idev->dev) ? : dev; + + rt = ip6_dst_alloc(net, dev, DST_NOCOUNT); if (!rt) return ERR_PTR(-ENOMEM); From ebfc102c566d0d9c174ff9b721fd35ebda01f7eb Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 10 Sep 2016 12:09:55 -0700 Subject: [PATCH 0915/1715] net: vrf: Flip IPv4 output path from FIB lookup hook to out hook Flip the IPv4 output path to use the l3mdev tx out hook. The VRF dst is not returned on the first FIB lookup. Instead, the dst on the skb is switched at the beginning of the IPv4 output processing to send the packet to the VRF driver on xmit. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++- net/ipv4/route.c | 4 --- 2 files changed, 63 insertions(+), 5 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 1ce7420322ee..08540b96ec18 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -227,6 +227,20 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb, } #endif +/* based on ip_local_out; can't use it b/c the dst is switched pointing to us */ +static int vrf_ip_local_out(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + int err; + + err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk, + skb, NULL, skb_dst(skb)->dev, dst_output); + if (likely(err == 1)) + err = dst_output(net, sk, skb); + + return err; +} + static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb, struct net_device *vrf_dev) { @@ -292,7 +306,7 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb, RT_SCOPE_LINK); } - ret = ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb); + ret = vrf_ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb); if (unlikely(net_xmit_eval(ret))) vrf_dev->stats.tx_errors++; else @@ -531,6 +545,53 @@ static int vrf_output(struct net *net, struct sock *sk, struct sk_buff *skb) !(IPCB(skb)->flags & IPSKB_REROUTED)); } +/* set dst on skb to send packet to us via dev_xmit path. Allows + * packet to go through device based features such as qdisc, netfilter + * hooks and packet sockets with skb->dev set to vrf device. + */ +static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev, + struct sock *sk, + struct sk_buff *skb) +{ + struct net_vrf *vrf = netdev_priv(vrf_dev); + struct dst_entry *dst = NULL; + struct rtable *rth; + + rcu_read_lock(); + + rth = rcu_dereference(vrf->rth); + if (likely(rth)) { + dst = &rth->dst; + dst_hold(dst); + } + + rcu_read_unlock(); + + if (unlikely(!dst)) { + vrf_tx_error(vrf_dev, skb); + return NULL; + } + + skb_dst_drop(skb); + skb_dst_set(skb, dst); + + return skb; +} + +/* called with rcu lock held */ +static struct sk_buff *vrf_l3_out(struct net_device *vrf_dev, + struct sock *sk, + struct sk_buff *skb, + u16 proto) +{ + switch (proto) { + case AF_INET: + return vrf_ip_out(vrf_dev, sk, skb); + } + + return skb; +} + /* holding rtnl */ static void vrf_rtable_release(struct net_device *dev, struct net_vrf *vrf) { @@ -1067,6 +1128,7 @@ static const struct l3mdev_ops vrf_l3mdev_ops = { .l3mdev_get_rtable = vrf_get_rtable, .l3mdev_get_saddr = vrf_get_saddr, .l3mdev_l3_rcv = vrf_l3_rcv, + .l3mdev_l3_out = vrf_l3_out, #if IS_ENABLED(CONFIG_IPV6) .l3mdev_get_rt6_dst = vrf_get_rt6_dst, .l3mdev_get_saddr6 = vrf_get_saddr6, diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f49b2c534e92..ad83f85fb240 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2246,10 +2246,6 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, fl4->saddr = inet_select_addr(dev_out, 0, RT_SCOPE_HOST); } - - rth = l3mdev_get_rtable(dev_out, fl4); - if (rth) - goto out; } if (!fl4->daddr) { From 4c1feac58e06270321cc500b85c2d94a11495775 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 10 Sep 2016 12:09:56 -0700 Subject: [PATCH 0916/1715] net: vrf: Flip IPv6 output path from FIB lookup hook to out hook Flip the IPv6 output path to use the l3mdev tx out hook. The VRF dst is not returned on the first FIB lookup. Instead, the dst on the skb is switched at the beginning of the IPv6 output processing to send the packet to the VRF driver on xmit. Link scope addresses (linklocal and multicast) need special handling: specifically the oif the flow struct can not be changed because we want the lookup tied to the enslaved interface. ie., the source address and the returned route MUST point to the interface scope passed in. Convert the existing vrf_get_rt6_dst to handle only link scope addresses. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 124 ++++++++++++++++++++++++++++--------------- include/net/l3mdev.h | 8 +-- net/ipv6/route.c | 11 ++-- net/l3mdev/l3mdev.c | 15 +++--- 4 files changed, 100 insertions(+), 58 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 08540b96ec18..f5372edf6edc 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -137,6 +137,20 @@ static int vrf_local_xmit(struct sk_buff *skb, struct net_device *dev, } #if IS_ENABLED(CONFIG_IPV6) +static int vrf_ip6_local_out(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + int err; + + err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, + sk, skb, NULL, skb_dst(skb)->dev, dst_output); + + if (likely(err == 1)) + err = dst_output(net, sk, skb); + + return err; +} + static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb, struct net_device *dev) { @@ -207,7 +221,7 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb, /* strip the ethernet header added for pass through VRF device */ __skb_pull(skb, skb_network_offset(skb)); - ret = ip6_local_out(net, skb->sk, skb); + ret = vrf_ip6_local_out(net, skb->sk, skb); if (unlikely(net_xmit_eval(ret))) dev->stats.tx_errors++; else @@ -391,6 +405,43 @@ static int vrf_output6(struct net *net, struct sock *sk, struct sk_buff *skb) !(IP6CB(skb)->flags & IP6SKB_REROUTED)); } +/* set dst on skb to send packet to us via dev_xmit path. Allows + * packet to go through device based features such as qdisc, netfilter + * hooks and packet sockets with skb->dev set to vrf device. + */ +static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev, + struct sock *sk, + struct sk_buff *skb) +{ + struct net_vrf *vrf = netdev_priv(vrf_dev); + struct dst_entry *dst = NULL; + struct rt6_info *rt6; + + /* don't divert link scope packets */ + if (rt6_need_strict(&ipv6_hdr(skb)->daddr)) + return skb; + + rcu_read_lock(); + + rt6 = rcu_dereference(vrf->rt6); + if (likely(rt6)) { + dst = &rt6->dst; + dst_hold(dst); + } + + rcu_read_unlock(); + + if (unlikely(!dst)) { + vrf_tx_error(vrf_dev, skb); + return NULL; + } + + skb_dst_drop(skb); + skb_dst_set(skb, dst); + + return skb; +} + /* holding rtnl */ static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf) { @@ -477,6 +528,13 @@ out: return rc; } #else +static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev, + struct sock *sk, + struct sk_buff *skb) +{ + return skb; +} + static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf) { } @@ -587,6 +645,8 @@ static struct sk_buff *vrf_l3_out(struct net_device *vrf_dev, switch (proto) { case AF_INET: return vrf_ip_out(vrf_dev, sk, skb); + case AF_INET6: + return vrf_ip6_out(vrf_dev, sk, skb); } return skb; @@ -1031,53 +1091,33 @@ static struct sk_buff *vrf_l3_rcv(struct net_device *vrf_dev, } #if IS_ENABLED(CONFIG_IPV6) -static struct dst_entry *vrf_get_rt6_dst(const struct net_device *dev, - struct flowi6 *fl6) +/* send to link-local or multicast address via interface enslaved to + * VRF device. Force lookup to VRF table without changing flow struct + */ +static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev, + struct flowi6 *fl6) { - bool need_strict = rt6_need_strict(&fl6->daddr); - struct net_vrf *vrf = netdev_priv(dev); struct net *net = dev_net(dev); + int flags = RT6_LOOKUP_F_IFACE; struct dst_entry *dst = NULL; struct rt6_info *rt; - /* send to link-local or multicast address */ - if (need_strict) { - int flags = RT6_LOOKUP_F_IFACE; - - /* VRF device does not have a link-local address and - * sending packets to link-local or mcast addresses over - * a VRF device does not make sense - */ - if (fl6->flowi6_oif == dev->ifindex) { - struct dst_entry *dst = &net->ipv6.ip6_null_entry->dst; - - dst_hold(dst); - return dst; - } - - if (!ipv6_addr_any(&fl6->saddr)) - flags |= RT6_LOOKUP_F_HAS_SADDR; - - rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, flags); - if (rt) - dst = &rt->dst; - - } else if (!(fl6->flowi6_flags & FLOWI_FLAG_L3MDEV_SRC)) { - - rcu_read_lock(); - - rt = rcu_dereference(vrf->rt6); - if (likely(rt)) { - dst = &rt->dst; - dst_hold(dst); - } - - rcu_read_unlock(); + /* VRF device does not have a link-local address and + * sending packets to link-local or mcast addresses over + * a VRF device does not make sense + */ + if (fl6->flowi6_oif == dev->ifindex) { + dst = &net->ipv6.ip6_null_entry->dst; + dst_hold(dst); + return dst; } - /* make sure oif is set to VRF device for lookup */ - if (!need_strict) - fl6->flowi6_oif = dev->ifindex; + if (!ipv6_addr_any(&fl6->saddr)) + flags |= RT6_LOOKUP_F_HAS_SADDR; + + rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, flags); + if (rt) + dst = &rt->dst; return dst; } @@ -1130,7 +1170,7 @@ static const struct l3mdev_ops vrf_l3mdev_ops = { .l3mdev_l3_rcv = vrf_l3_rcv, .l3mdev_l3_out = vrf_l3_out, #if IS_ENABLED(CONFIG_IPV6) - .l3mdev_get_rt6_dst = vrf_get_rt6_dst, + .l3mdev_link_scope_lookup = vrf_link_scope_lookup, .l3mdev_get_saddr6 = vrf_get_saddr6, #endif }; diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 3ee110518584..51aab20a4d0a 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -27,7 +27,7 @@ * * @l3mdev_get_saddr: Get source address for a flow * - * @l3mdev_get_rt6_dst: Get cached IPv6 rt6_info (dst_entry) for device + * @l3mdev_link_scope_lookup: IPv6 lookup for linklocal and mcast destinations */ struct l3mdev_ops { @@ -45,7 +45,7 @@ struct l3mdev_ops { struct flowi4 *fl4); /* IPv6 ops */ - struct dst_entry * (*l3mdev_get_rt6_dst)(const struct net_device *dev, + struct dst_entry * (*l3mdev_link_scope_lookup)(const struct net_device *dev, struct flowi6 *fl6); int (*l3mdev_get_saddr6)(struct net_device *dev, const struct sock *sk, @@ -177,7 +177,7 @@ static inline bool netif_index_is_l3_master(struct net *net, int ifindex) int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4); -struct dst_entry *l3mdev_get_rt6_dst(struct net *net, struct flowi6 *fl6); +struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6); int l3mdev_get_saddr6(struct net *net, const struct sock *sk, struct flowi6 *fl6); @@ -299,7 +299,7 @@ static inline int l3mdev_get_saddr(struct net *net, int ifindex, } static inline -struct dst_entry *l3mdev_get_rt6_dst(struct net *net, struct flowi6 *fl6) +struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6) { return NULL; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 2c681113c055..87e0a01ce744 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1188,12 +1188,15 @@ static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, struct flowi6 *fl6, int flags) { - struct dst_entry *dst; bool any_src; - dst = l3mdev_get_rt6_dst(net, fl6); - if (dst) - return dst; + if (rt6_need_strict(&fl6->daddr)) { + struct dst_entry *dst; + + dst = l3mdev_link_scope_lookup(net, fl6); + if (dst) + return dst; + } fl6->flowi6_iif = LOOPBACK_IFINDEX; diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c index 43610e5acc4e..ac9d928d0a9e 100644 --- a/net/l3mdev/l3mdev.c +++ b/net/l3mdev/l3mdev.c @@ -100,15 +100,14 @@ u32 l3mdev_fib_table_by_index(struct net *net, int ifindex) EXPORT_SYMBOL_GPL(l3mdev_fib_table_by_index); /** - * l3mdev_get_rt6_dst - IPv6 route lookup based on flow. Returns - * cached route for L3 master device if relevant - * to flow + * l3mdev_link_scope_lookup - IPv6 route lookup based on flow for link + * local and multicast addresses * @net: network namespace for device index lookup * @fl6: IPv6 flow struct for lookup */ -struct dst_entry *l3mdev_get_rt6_dst(struct net *net, - struct flowi6 *fl6) +struct dst_entry *l3mdev_link_scope_lookup(struct net *net, + struct flowi6 *fl6) { struct dst_entry *dst = NULL; struct net_device *dev; @@ -121,15 +120,15 @@ struct dst_entry *l3mdev_get_rt6_dst(struct net *net, dev = netdev_master_upper_dev_get_rcu(dev); if (dev && netif_is_l3_master(dev) && - dev->l3mdev_ops->l3mdev_get_rt6_dst) - dst = dev->l3mdev_ops->l3mdev_get_rt6_dst(dev, fl6); + dev->l3mdev_ops->l3mdev_link_scope_lookup) + dst = dev->l3mdev_ops->l3mdev_link_scope_lookup(dev, fl6); rcu_read_unlock(); } return dst; } -EXPORT_SYMBOL_GPL(l3mdev_get_rt6_dst); +EXPORT_SYMBOL_GPL(l3mdev_link_scope_lookup); /** * l3mdev_get_saddr - get source address for a flow based on an interface From e0d56fdd734224666e7bd5fafbc620286d2a7ee8 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 10 Sep 2016 12:09:57 -0700 Subject: [PATCH 0917/1715] net: l3mdev: remove redundant calls A previous patch added l3mdev flow update making these hooks redundant. Remove them. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 3 +-- net/ipv4/route.c | 12 ++---------- net/ipv4/xfrm4_policy.c | 2 +- net/ipv6/ip6_output.c | 2 -- net/ipv6/ndisc.c | 11 ++--------- net/ipv6/route.c | 7 +------ net/ipv6/tcp_ipv6.c | 8 ++------ net/ipv6/xfrm6_policy.c | 2 +- 8 files changed, 10 insertions(+), 37 deletions(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 41e10e34769c..05d105832bdb 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1582,8 +1582,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, } oif = arg->bound_dev_if; - if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) - oif = skb->skb_iif; + oif = oif ? : skb->skb_iif; flowi4_init_output(&fl4, oif, IP4_REPLY_MARK(net, skb->mark), diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ad83f85fb240..b52496fd5107 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1831,7 +1831,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, * Now we are ready to route packet. */ fl4.flowi4_oif = 0; - fl4.flowi4_iif = l3mdev_fib_oif_rcu(dev); + fl4.flowi4_iif = dev->ifindex; fl4.flowi4_mark = skb->mark; fl4.flowi4_tos = tos; fl4.flowi4_scope = RT_SCOPE_UNIVERSE; @@ -2150,7 +2150,6 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, unsigned int flags = 0; struct fib_result res; struct rtable *rth; - int master_idx; int orig_oif; int err = -ENETUNREACH; @@ -2160,9 +2159,6 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, orig_oif = fl4->flowi4_oif; - master_idx = l3mdev_master_ifindex_by_index(net, fl4->flowi4_oif); - if (master_idx) - fl4->flowi4_oif = master_idx; fl4->flowi4_iif = LOOPBACK_IFINDEX; fl4->flowi4_tos = tos & IPTOS_RT_MASK; fl4->flowi4_scope = ((tos & RTO_ONLINK) ? @@ -2263,8 +2259,7 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, if (err) { res.fi = NULL; res.table = NULL; - if (fl4->flowi4_oif && - !netif_index_is_l3_master(net, fl4->flowi4_oif)) { + if (fl4->flowi4_oif) { /* Apparently, routing tables are wrong. Assume, that the destination is on link. @@ -2577,9 +2572,6 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0; fl4.flowi4_mark = mark; - if (netif_index_is_l3_master(net, fl4.flowi4_oif)) - fl4.flowi4_flags = FLOWI_FLAG_L3MDEV_SRC | FLOWI_FLAG_SKIP_NH_OIF; - if (iif) { struct net_device *dev; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index b644a23c3db0..3155ed73d3b3 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -112,7 +112,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) int oif = 0; if (skb_dst(skb)) - oif = l3mdev_fib_oif(skb_dst(skb)->dev); + oif = skb_dst(skb)->dev->ifindex; memset(fl4, 0, sizeof(struct flowi4)); fl4->flowi4_mark = skb->mark; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 6ea6caace3a8..1cb41b365048 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1070,8 +1070,6 @@ struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6, return ERR_PTR(err); if (final_dst) fl6->daddr = *final_dst; - if (!fl6->flowi6_oif) - fl6->flowi6_oif = l3mdev_fib_oif(dst->dev); return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index fe65cdc28a45..d8e671457d10 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -67,7 +67,6 @@ #include #include #include -#include #include #include @@ -457,11 +456,9 @@ static void ndisc_send_skb(struct sk_buff *skb, if (!dst) { struct flowi6 fl6; - int oif = l3mdev_fib_oif(skb->dev); + int oif = skb->dev->ifindex; icmpv6_flow_init(sk, &fl6, type, saddr, daddr, oif); - if (oif != skb->dev->ifindex) - fl6.flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC; dst = icmp6_dst_alloc(skb->dev, &fl6); if (IS_ERR(dst)) { kfree_skb(skb); @@ -1538,7 +1535,6 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) int rd_len; u8 ha_buf[MAX_ADDR_LEN], *ha = NULL, ops_data_buf[NDISC_OPS_REDIRECT_DATA_SPACE], *ops_data = NULL; - int oif = l3mdev_fib_oif(dev); bool ret; if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) { @@ -1555,10 +1551,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) } icmpv6_flow_init(sk, &fl6, NDISC_REDIRECT, - &saddr_buf, &ipv6_hdr(skb)->saddr, oif); - - if (oif != skb->dev->ifindex) - fl6.flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC; + &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex); dst = ip6_route_output(net, NULL, &fl6); if (dst->error) { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 87e0a01ce744..ad4a7ff301fc 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1164,7 +1164,7 @@ void ip6_route_input(struct sk_buff *skb) int flags = RT6_LOOKUP_F_HAS_SADDR; struct ip_tunnel_info *tun_info; struct flowi6 fl6 = { - .flowi6_iif = l3mdev_fib_oif(skb->dev), + .flowi6_iif = skb->dev->ifindex, .daddr = iph->daddr, .saddr = iph->saddr, .flowlabel = ip6_flowinfo(iph), @@ -3349,11 +3349,6 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) } else { fl6.flowi6_oif = oif; - if (netif_index_is_l3_master(net, oif)) { - fl6.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC | - FLOWI_FLAG_SKIP_NH_OIF; - } - rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6); } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 04529a3d42cb..54cf7197c7ab 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -818,12 +818,8 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 fl6.flowi6_proto = IPPROTO_TCP; if (rt6_need_strict(&fl6.daddr) && !oif) fl6.flowi6_oif = tcp_v6_iif(skb); - else { - if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) - oif = skb->skb_iif; - - fl6.flowi6_oif = oif; - } + else + fl6.flowi6_oif = oif ? : skb->skb_iif; fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark); fl6.fl6_dport = t1->dest; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 6cc97003e4a9..b7b7e863a2bb 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -134,7 +134,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) nexthdr = nh[nhoff]; if (skb_dst(skb)) - oif = l3mdev_fib_oif(skb_dst(skb)->dev); + oif = skb_dst(skb)->dev->ifindex; memset(fl6, 0, sizeof(struct flowi6)); fl6->flowi6_mark = skb->mark; From d66f6c0a8f3c0bcc4ee7a9b1da4b0ebe7ee555a3 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 10 Sep 2016 12:09:58 -0700 Subject: [PATCH 0918/1715] net: ipv4: Remove l3mdev_get_saddr No longer needed Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 38 -------------------------------------- include/net/l3mdev.h | 12 ------------ include/net/route.h | 10 ---------- net/ipv4/raw.c | 6 ------ net/ipv4/udp.c | 6 ------ net/l3mdev/l3mdev.c | 31 ------------------------------- 6 files changed, 103 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index f5372edf6edc..9ad2a169485f 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -863,43 +863,6 @@ static struct rtable *vrf_get_rtable(const struct net_device *dev, return rth; } -/* called under rcu_read_lock */ -static int vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4) -{ - struct fib_result res = { .tclassid = 0 }; - struct net *net = dev_net(dev); - u32 orig_tos = fl4->flowi4_tos; - u8 flags = fl4->flowi4_flags; - u8 scope = fl4->flowi4_scope; - u8 tos = RT_FL_TOS(fl4); - int rc; - - if (unlikely(!fl4->daddr)) - return 0; - - fl4->flowi4_flags |= FLOWI_FLAG_SKIP_NH_OIF; - fl4->flowi4_iif = LOOPBACK_IFINDEX; - /* make sure oif is set to VRF device for lookup */ - fl4->flowi4_oif = dev->ifindex; - fl4->flowi4_tos = tos & IPTOS_RT_MASK; - fl4->flowi4_scope = ((tos & RTO_ONLINK) ? - RT_SCOPE_LINK : RT_SCOPE_UNIVERSE); - - rc = fib_lookup(net, fl4, &res, 0); - if (!rc) { - if (res.type == RTN_LOCAL) - fl4->saddr = res.fi->fib_prefsrc ? : fl4->daddr; - else - fib_select_path(net, &res, fl4, -1); - } - - fl4->flowi4_flags = flags; - fl4->flowi4_tos = orig_tos; - fl4->flowi4_scope = scope; - - return rc; -} - static int vrf_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { return 0; @@ -1166,7 +1129,6 @@ static int vrf_get_saddr6(struct net_device *dev, const struct sock *sk, static const struct l3mdev_ops vrf_l3mdev_ops = { .l3mdev_fib_table = vrf_fib_table, .l3mdev_get_rtable = vrf_get_rtable, - .l3mdev_get_saddr = vrf_get_saddr, .l3mdev_l3_rcv = vrf_l3_rcv, .l3mdev_l3_out = vrf_l3_out, #if IS_ENABLED(CONFIG_IPV6) diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 51aab20a4d0a..1129e1d8cd6e 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -25,8 +25,6 @@ * * @l3mdev_get_rtable: Get cached IPv4 rtable (dst_entry) for device * - * @l3mdev_get_saddr: Get source address for a flow - * * @l3mdev_link_scope_lookup: IPv6 lookup for linklocal and mcast destinations */ @@ -41,8 +39,6 @@ struct l3mdev_ops { /* IPv4 ops */ struct rtable * (*l3mdev_get_rtable)(const struct net_device *dev, const struct flowi4 *fl4); - int (*l3mdev_get_saddr)(struct net_device *dev, - struct flowi4 *fl4); /* IPv6 ops */ struct dst_entry * (*l3mdev_link_scope_lookup)(const struct net_device *dev, @@ -175,8 +171,6 @@ static inline bool netif_index_is_l3_master(struct net *net, int ifindex) return rc; } -int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4); - struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6); int l3mdev_get_saddr6(struct net *net, const struct sock *sk, struct flowi6 *fl6); @@ -292,12 +286,6 @@ static inline bool netif_index_is_l3_master(struct net *net, int ifindex) return false; } -static inline int l3mdev_get_saddr(struct net *net, int ifindex, - struct flowi4 *fl4) -{ - return 0; -} - static inline struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6) { diff --git a/include/net/route.h b/include/net/route.h index ad777d79af94..0429d47cad25 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -29,7 +29,6 @@ #include #include #include -#include #include #include #include @@ -285,15 +284,6 @@ static inline struct rtable *ip_route_connect(struct flowi4 *fl4, ip_route_connect_init(fl4, dst, src, tos, oif, protocol, sport, dport, sk); - if (!src && oif) { - int rc; - - rc = l3mdev_get_saddr(net, oif, fl4); - if (rc < 0) - return ERR_PTR(rc); - - src = fl4->saddr; - } if (!dst || !src) { rt = __ip_route_output_key(net, fl4); if (IS_ERR(rt)) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 438f50c1a676..90a85c955872 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -606,12 +606,6 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), daddr, saddr, 0, 0); - if (!saddr && ipc.oif) { - err = l3mdev_get_saddr(net, ipc.oif, &fl4); - if (err < 0) - goto done; - } - if (!inet->hdrincl) { rfv.msg = msg; rfv.hlen = 0; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 058c31286ce1..7d96dc2d3d08 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1021,12 +1021,6 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) flow_flags, faddr, saddr, dport, inet->inet_sport); - if (!saddr && ipc.oif) { - err = l3mdev_get_saddr(net, ipc.oif, fl4); - if (err < 0) - goto out; - } - security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) { diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c index ac9d928d0a9e..be40df60703c 100644 --- a/net/l3mdev/l3mdev.c +++ b/net/l3mdev/l3mdev.c @@ -130,37 +130,6 @@ struct dst_entry *l3mdev_link_scope_lookup(struct net *net, } EXPORT_SYMBOL_GPL(l3mdev_link_scope_lookup); -/** - * l3mdev_get_saddr - get source address for a flow based on an interface - * enslaved to an L3 master device - * @net: network namespace for device index lookup - * @ifindex: Interface index - * @fl4: IPv4 flow struct - */ - -int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4) -{ - struct net_device *dev; - int rc = 0; - - if (ifindex) { - rcu_read_lock(); - - dev = dev_get_by_index_rcu(net, ifindex); - if (dev && netif_is_l3_slave(dev)) - dev = netdev_master_upper_dev_get_rcu(dev); - - if (dev && netif_is_l3_master(dev) && - dev->l3mdev_ops->l3mdev_get_saddr) - rc = dev->l3mdev_ops->l3mdev_get_saddr(dev, fl4); - - rcu_read_unlock(); - } - - return rc; -} -EXPORT_SYMBOL_GPL(l3mdev_get_saddr); - int l3mdev_get_saddr6(struct net *net, const struct sock *sk, struct flowi6 *fl6) { From 8a966fc016b67d2a8ab4a83d22ded8cde032a0eb Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 10 Sep 2016 12:09:59 -0700 Subject: [PATCH 0919/1715] net: ipv6: Remove l3mdev_get_saddr6 No longer needed Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 41 ----------------------------------------- include/net/l3mdev.h | 11 ----------- net/ipv6/ip6_output.c | 9 +-------- net/l3mdev/l3mdev.c | 24 ------------------------ 4 files changed, 1 insertion(+), 84 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 9ad2a169485f..3a34f547c578 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -1084,46 +1084,6 @@ static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev, return dst; } - -/* called under rcu_read_lock */ -static int vrf_get_saddr6(struct net_device *dev, const struct sock *sk, - struct flowi6 *fl6) -{ - struct net *net = dev_net(dev); - struct dst_entry *dst; - struct rt6_info *rt; - int err; - - if (rt6_need_strict(&fl6->daddr)) { - rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, - RT6_LOOKUP_F_IFACE); - if (unlikely(!rt)) - return 0; - - dst = &rt->dst; - } else { - __u8 flags = fl6->flowi6_flags; - - fl6->flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC; - fl6->flowi6_flags |= FLOWI_FLAG_SKIP_NH_OIF; - - dst = ip6_route_output(net, sk, fl6); - rt = (struct rt6_info *)dst; - - fl6->flowi6_flags = flags; - } - - err = dst->error; - if (!err) { - err = ip6_route_get_saddr(net, rt, &fl6->daddr, - sk ? inet6_sk(sk)->srcprefs : 0, - &fl6->saddr); - } - - dst_release(dst); - - return err; -} #endif static const struct l3mdev_ops vrf_l3mdev_ops = { @@ -1133,7 +1093,6 @@ static const struct l3mdev_ops vrf_l3mdev_ops = { .l3mdev_l3_out = vrf_l3_out, #if IS_ENABLED(CONFIG_IPV6) .l3mdev_link_scope_lookup = vrf_link_scope_lookup, - .l3mdev_get_saddr6 = vrf_get_saddr6, #endif }; diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 1129e1d8cd6e..a5e506eb51de 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -43,9 +43,6 @@ struct l3mdev_ops { /* IPv6 ops */ struct dst_entry * (*l3mdev_link_scope_lookup)(const struct net_device *dev, struct flowi6 *fl6); - int (*l3mdev_get_saddr6)(struct net_device *dev, - const struct sock *sk, - struct flowi6 *fl6); }; #ifdef CONFIG_NET_L3_MASTER_DEV @@ -172,8 +169,6 @@ static inline bool netif_index_is_l3_master(struct net *net, int ifindex) } struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6); -int l3mdev_get_saddr6(struct net *net, const struct sock *sk, - struct flowi6 *fl6); static inline struct sk_buff *l3mdev_l3_rcv(struct sk_buff *skb, u16 proto) @@ -292,12 +287,6 @@ struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6) return NULL; } -static inline int l3mdev_get_saddr6(struct net *net, const struct sock *sk, - struct flowi6 *fl6) -{ - return 0; -} - static inline struct sk_buff *l3mdev_ip_rcv(struct sk_buff *skb) { diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 1cb41b365048..6001e781164e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -926,13 +926,6 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, int err; int flags = 0; - if (ipv6_addr_any(&fl6->saddr) && fl6->flowi6_oif && - (!*dst || !(*dst)->error)) { - err = l3mdev_get_saddr6(net, sk, fl6); - if (err) - goto out_err; - } - /* The correct way to handle this would be to do * ip6_route_get_saddr, and then ip6_route_output; however, * the route-specific preferred source forces the @@ -1024,7 +1017,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, out_err_release: dst_release(*dst); *dst = NULL; -out_err: + if (err == -ENETUNREACH) IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES); return err; diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c index be40df60703c..8da86ceca33d 100644 --- a/net/l3mdev/l3mdev.c +++ b/net/l3mdev/l3mdev.c @@ -130,30 +130,6 @@ struct dst_entry *l3mdev_link_scope_lookup(struct net *net, } EXPORT_SYMBOL_GPL(l3mdev_link_scope_lookup); -int l3mdev_get_saddr6(struct net *net, const struct sock *sk, - struct flowi6 *fl6) -{ - struct net_device *dev; - int rc = 0; - - if (fl6->flowi6_oif) { - rcu_read_lock(); - - dev = dev_get_by_index_rcu(net, fl6->flowi6_oif); - if (dev && netif_is_l3_slave(dev)) - dev = netdev_master_upper_dev_get_rcu(dev); - - if (dev && netif_is_l3_master(dev) && - dev->l3mdev_ops->l3mdev_get_saddr6) - rc = dev->l3mdev_ops->l3mdev_get_saddr6(dev, sk, fl6); - - rcu_read_unlock(); - } - - return rc; -} -EXPORT_SYMBOL_GPL(l3mdev_get_saddr6); - /** * l3mdev_fib_rule_match - Determine if flowi references an * L3 master device From ca28b8f2b8f316b9973693c72770c98da3e9500e Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 10 Sep 2016 12:10:00 -0700 Subject: [PATCH 0920/1715] net: l3mdev: Remove l3mdev_fib_oif No longer used Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/l3mdev.h | 29 ----------------------------- 1 file changed, 29 deletions(-) diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index a5e506eb51de..a586035c97cb 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -107,26 +107,6 @@ struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev) return master; } -/* get index of an interface to use for FIB lookups. For devices - * enslaved to an L3 master device FIB lookups are based on the - * master index - */ -static inline int l3mdev_fib_oif_rcu(struct net_device *dev) -{ - return l3mdev_master_ifindex_rcu(dev) ? : dev->ifindex; -} - -static inline int l3mdev_fib_oif(struct net_device *dev) -{ - int oif; - - rcu_read_lock(); - oif = l3mdev_fib_oif_rcu(dev); - rcu_read_unlock(); - - return oif; -} - u32 l3mdev_fib_table_rcu(const struct net_device *dev); u32 l3mdev_fib_table_by_index(struct net *net, int ifindex); static inline u32 l3mdev_fib_table(const struct net_device *dev) @@ -248,15 +228,6 @@ struct net_device *l3mdev_master_dev_rcu(const struct net_device *dev) return NULL; } -static inline int l3mdev_fib_oif_rcu(struct net_device *dev) -{ - return dev ? dev->ifindex : 0; -} -static inline int l3mdev_fib_oif(struct net_device *dev) -{ - return dev ? dev->ifindex : 0; -} - static inline u32 l3mdev_fib_table_rcu(const struct net_device *dev) { return 0; From afb460fe0ef0af6d98ed51006153acb01278df2d Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 10 Sep 2016 12:10:01 -0700 Subject: [PATCH 0921/1715] net: l3mdev: remove get_rtable method No longer used Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 21 --------------------- include/net/l3mdev.h | 21 --------------------- 2 files changed, 42 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 3a34f547c578..ccce59fbb2b3 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -843,26 +843,6 @@ static u32 vrf_fib_table(const struct net_device *dev) return vrf->tb_id; } -static struct rtable *vrf_get_rtable(const struct net_device *dev, - const struct flowi4 *fl4) -{ - struct rtable *rth = NULL; - - if (!(fl4->flowi4_flags & FLOWI_FLAG_L3MDEV_SRC)) { - struct net_vrf *vrf = netdev_priv(dev); - - rcu_read_lock(); - - rth = rcu_dereference(vrf->rth); - if (likely(rth)) - dst_hold(&rth->dst); - - rcu_read_unlock(); - } - - return rth; -} - static int vrf_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { return 0; @@ -1088,7 +1068,6 @@ static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev, static const struct l3mdev_ops vrf_l3mdev_ops = { .l3mdev_fib_table = vrf_fib_table, - .l3mdev_get_rtable = vrf_get_rtable, .l3mdev_l3_rcv = vrf_l3_rcv, .l3mdev_l3_out = vrf_l3_out, #if IS_ENABLED(CONFIG_IPV6) diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index a586035c97cb..3832099289c5 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -23,8 +23,6 @@ * * @l3mdev_l3_out: Hook in L3 output path * - * @l3mdev_get_rtable: Get cached IPv4 rtable (dst_entry) for device - * * @l3mdev_link_scope_lookup: IPv6 lookup for linklocal and mcast destinations */ @@ -36,10 +34,6 @@ struct l3mdev_ops { struct sock *sk, struct sk_buff *skb, u16 proto); - /* IPv4 ops */ - struct rtable * (*l3mdev_get_rtable)(const struct net_device *dev, - const struct flowi4 *fl4); - /* IPv6 ops */ struct dst_entry * (*l3mdev_link_scope_lookup)(const struct net_device *dev, struct flowi6 *fl6); @@ -120,15 +114,6 @@ static inline u32 l3mdev_fib_table(const struct net_device *dev) return tb_id; } -static inline struct rtable *l3mdev_get_rtable(const struct net_device *dev, - const struct flowi4 *fl4) -{ - if (netif_is_l3_master(dev) && dev->l3mdev_ops->l3mdev_get_rtable) - return dev->l3mdev_ops->l3mdev_get_rtable(dev, fl4); - - return NULL; -} - static inline bool netif_index_is_l3_master(struct net *net, int ifindex) { struct net_device *dev; @@ -241,12 +226,6 @@ static inline u32 l3mdev_fib_table_by_index(struct net *net, int ifindex) return 0; } -static inline struct rtable *l3mdev_get_rtable(const struct net_device *dev, - const struct flowi4 *fl4) -{ - return NULL; -} - static inline bool netif_index_is_l3_master(struct net *net, int ifindex) { return false; From c71ad3d45a5e928e617ca436f3ce88bb773fb766 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 10 Sep 2016 12:10:02 -0700 Subject: [PATCH 0922/1715] net: flow: Remove FLOWI_FLAG_L3MDEV_SRC flag No longer used Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 5 ++--- include/net/flow.h | 3 +-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index ccce59fbb2b3..55674b0e65b7 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -165,7 +165,7 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb, .flowlabel = ip6_flowinfo(iph), .flowi6_mark = skb->mark, .flowi6_proto = iph->nexthdr, - .flowi6_flags = FLOWI_FLAG_L3MDEV_SRC | FLOWI_FLAG_SKIP_NH_OIF, + .flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF, }; int ret = NET_XMIT_DROP; struct dst_entry *dst; @@ -265,8 +265,7 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb, .flowi4_oif = vrf_dev->ifindex, .flowi4_iif = LOOPBACK_IFINDEX, .flowi4_tos = RT_TOS(ip4h->tos), - .flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_L3MDEV_SRC | - FLOWI_FLAG_SKIP_NH_OIF, + .flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_SKIP_NH_OIF, .daddr = ip4h->daddr, }; struct net *net = dev_net(vrf_dev); diff --git a/include/net/flow.h b/include/net/flow.h index d47ef4bb5423..035aa7716967 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -34,8 +34,7 @@ struct flowi_common { __u8 flowic_flags; #define FLOWI_FLAG_ANYSRC 0x01 #define FLOWI_FLAG_KNOWN_NH 0x02 -#define FLOWI_FLAG_L3MDEV_SRC 0x04 -#define FLOWI_FLAG_SKIP_NH_OIF 0x08 +#define FLOWI_FLAG_SKIP_NH_OIF 0x04 __u32 flowic_secid; struct flowi_tunnel flowic_tun_key; }; From 02154927c115c7599677df57203988e05b576346 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sat, 10 Sep 2016 12:39:03 -0700 Subject: [PATCH 0923/1715] net: dsa: bcm_sf2: Get VLAN_PORT_MASK from b53_device While migrating the bcm_sf2 driver to use b53_common, we left a small piece untouched where we kept our local copy of the per-port port_vlan_ctl bitmask value. This value is now maintained by b53_device so we need to use it instead of our local (and now stale) copy of it. Fixes: f458995b9ad8 ("net: dsa: bcm_sf2: Utilize core B53 driver when possible") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2.c | 2 +- drivers/net/dsa/bcm_sf2.h | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 51f1fc0dddc5..5bf4f3452676 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -256,7 +256,7 @@ static int bcm_sf2_port_setup(struct dsa_switch *ds, int port, reg = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(port)); reg &= ~PORT_VLAN_CTRL_MASK; reg |= (1 << port); - reg |= priv->port_sts[port].vlan_ctl_mask; + reg |= priv->dev->ports[port].vlan_ctl_mask; core_writel(priv, reg, CORE_PORT_VLAN_CTL_PORT(port)); bcm_sf2_imp_vlan_setup(ds, cpu_port); diff --git a/drivers/net/dsa/bcm_sf2.h b/drivers/net/dsa/bcm_sf2.h index 46c4ea796574..44692673e1d5 100644 --- a/drivers/net/dsa/bcm_sf2.h +++ b/drivers/net/dsa/bcm_sf2.h @@ -50,8 +50,6 @@ struct bcm_sf2_port_status { unsigned int link; struct ethtool_eee eee; - - u16 vlan_ctl_mask; }; struct bcm_sf2_priv { From b7fb44dacae04219c82f20897382ba34860d1a16 Mon Sep 17 00:00:00 2001 From: Denis Kenzior Date: Wed, 3 Aug 2016 17:02:15 -0500 Subject: [PATCH 0924/1715] nl80211: Allow GET_INTERFACE dumps to be filtered This patch allows GET_INTERFACE dumps to be filtered based on NL80211_ATTR_WIPHY or NL80211_ATTR_WDEV. The documentation for GET_INTERFACE mentions that this is possible: "Request an interface's configuration; either a dump request on a %NL80211_ATTR_WIPHY or ..." However, this behavior has not been implemented until now. Johannes: rewrite most of the patch: * use nl80211_dump_wiphy_parse() to also allow passing an interface to be able to dump its siblings * fix locking (must hold rtnl around using nl80211_fam.attrbuf) * make init self-contained instead of relying on other cb->args Signed-off-by: Denis Kenzior Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 499785778983..7ebad350abc1 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2525,10 +2525,35 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * int if_idx = 0; int wp_start = cb->args[0]; int if_start = cb->args[1]; + int filter_wiphy = -1; struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; rtnl_lock(); + if (!cb->args[2]) { + struct nl80211_dump_wiphy_state state = { + .filter_wiphy = -1, + }; + int ret; + + ret = nl80211_dump_wiphy_parse(skb, cb, &state); + if (ret) + return ret; + + filter_wiphy = state.filter_wiphy; + + /* + * if filtering, set cb->args[2] to +1 since 0 is the default + * value needed to determine that parsing is necessary. + */ + if (filter_wiphy >= 0) + cb->args[2] = filter_wiphy + 1; + else + cb->args[2] = -1; + } else if (cb->args[2] > 0) { + filter_wiphy = cb->args[2] - 1; + } + list_for_each_entry(rdev, &cfg80211_rdev_list, list) { if (!net_eq(wiphy_net(&rdev->wiphy), sock_net(skb->sk))) continue; @@ -2536,6 +2561,10 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * wp_idx++; continue; } + + if (filter_wiphy >= 0 && filter_wiphy != rdev->wiphy_idx) + continue; + if_idx = 0; list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { From 5a1f044b5048e834f936fbb33a93e5d8410779ec Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Mon, 29 Aug 2016 23:25:14 +0300 Subject: [PATCH 0925/1715] cfg80211: clarify the requirements of .disconnect() cfg80211 expects the .disconnect() handler to call cfg80211_disconnect() when done. Make this requirement more explicit. Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 9c23f4d33e06..d5e7f690bad9 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2423,7 +2423,8 @@ struct cfg80211_qos_map { * cases, the result of roaming is indicated with a call to * cfg80211_roamed() or cfg80211_roamed_bss(). * (invoked with the wireless_dev mutex held) - * @disconnect: Disconnect from the BSS/ESS. + * @disconnect: Disconnect from the BSS/ESS. Once done, call + * cfg80211_disconnected(). * (invoked with the wireless_dev mutex held) * * @join_ibss: Join the specified IBSS (or create if necessary). Once done, call From d82121845d44334f5ec3c98906d1e4a592350beb Mon Sep 17 00:00:00 2001 From: Aviya Erenfeld Date: Mon, 29 Aug 2016 23:25:15 +0300 Subject: [PATCH 0926/1715] mac80211: refactor monitor representation in sdata Insert the u32 monitor flags variable in a new structure that represents a monitor interface. This will allow to add more configuration variables to that structure which will happen in an upcoming change. Signed-off-by: Aviya Erenfeld Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 8 ++++---- net/mac80211/driver-ops.c | 2 +- net/mac80211/ieee80211_i.h | 6 +++++- net/mac80211/iface.c | 16 ++++++++-------- net/mac80211/rx.c | 4 ++-- net/mac80211/status.c | 2 +- net/mac80211/tx.c | 2 +- net/mac80211/util.c | 2 +- 8 files changed, 23 insertions(+), 19 deletions(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 543b1d4fc33d..f2c8cd22d317 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -39,7 +39,7 @@ static struct wireless_dev *ieee80211_add_iface(struct wiphy *wiphy, if (type == NL80211_IFTYPE_MONITOR && flags) { sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); - sdata->u.mntr_flags = *flags; + sdata->u.mntr.flags = *flags; } return wdev; @@ -89,11 +89,11 @@ static int ieee80211_change_iface(struct wiphy *wiphy, * cooked_mntrs, monitor and all fif_* counters * reconfigure hardware */ - if ((*flags & mask) != (sdata->u.mntr_flags & mask)) + if ((*flags & mask) != (sdata->u.mntr.flags & mask)) return -EBUSY; ieee80211_adjust_monitor_flags(sdata, -1); - sdata->u.mntr_flags = *flags; + sdata->u.mntr.flags = *flags; ieee80211_adjust_monitor_flags(sdata, 1); ieee80211_configure_filter(local); @@ -103,7 +103,7 @@ static int ieee80211_change_iface(struct wiphy *wiphy, * and ieee80211_do_open take care of "everything" * mentioned in the comment above. */ - sdata->u.mntr_flags = *flags; + sdata->u.mntr.flags = *flags; } } diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index c258f1041d33..c701b6438bd9 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -62,7 +62,7 @@ int drv_add_interface(struct ieee80211_local *local, if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_AP_VLAN || (sdata->vif.type == NL80211_IFTYPE_MONITOR && !ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF) && - !(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE)))) + !(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE)))) return -EINVAL; trace_drv_add_interface(local, sdata); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index f56d342c31b8..9211cce10d3e 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -824,6 +824,10 @@ struct txq_info { struct ieee80211_txq txq; }; +struct ieee80211_if_mntr { + u32 flags; +}; + struct ieee80211_sub_if_data { struct list_head list; @@ -922,7 +926,7 @@ struct ieee80211_sub_if_data { struct ieee80211_if_ibss ibss; struct ieee80211_if_mesh mesh; struct ieee80211_if_ocb ocb; - u32 mntr_flags; + struct ieee80211_if_mntr mntr; } u; #ifdef CONFIG_MAC80211_DEBUGFS diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index b123a9e325b3..c8509d95e09d 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -188,7 +188,7 @@ static int ieee80211_verify_mac(struct ieee80211_sub_if_data *sdata, u8 *addr, continue; if (iter->vif.type == NL80211_IFTYPE_MONITOR && - !(iter->u.mntr_flags & MONITOR_FLAG_ACTIVE)) + !(iter->u.mntr.flags & MONITOR_FLAG_ACTIVE)) continue; m = iter->vif.addr; @@ -217,7 +217,7 @@ static int ieee80211_change_mac(struct net_device *dev, void *addr) return -EBUSY; if (sdata->vif.type == NL80211_IFTYPE_MONITOR && - !(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE)) + !(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE)) check_dup = false; ret = ieee80211_verify_mac(sdata, sa->sa_data, check_dup); @@ -357,7 +357,7 @@ void ieee80211_adjust_monitor_flags(struct ieee80211_sub_if_data *sdata, const int offset) { struct ieee80211_local *local = sdata->local; - u32 flags = sdata->u.mntr_flags; + u32 flags = sdata->u.mntr.flags; #define ADJUST(_f, _s) do { \ if (flags & MONITOR_FLAG_##_f) \ @@ -589,12 +589,12 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) } break; case NL80211_IFTYPE_MONITOR: - if (sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) { + if (sdata->u.mntr.flags & MONITOR_FLAG_COOK_FRAMES) { local->cooked_mntrs++; break; } - if (sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE) { + if (sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) { res = drv_add_interface(local, sdata); if (res) goto err_stop; @@ -926,7 +926,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, /* no need to tell driver */ break; case NL80211_IFTYPE_MONITOR: - if (sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) { + if (sdata->u.mntr.flags & MONITOR_FLAG_COOK_FRAMES) { local->cooked_mntrs--; break; } @@ -1012,7 +1012,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, ieee80211_recalc_idle(local); mutex_unlock(&local->mtx); - if (!(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE)) + if (!(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE)) break; /* fall through */ @@ -1444,7 +1444,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, case NL80211_IFTYPE_MONITOR: sdata->dev->type = ARPHRD_IEEE80211_RADIOTAP; sdata->dev->netdev_ops = &ieee80211_monitorif_ops; - sdata->u.mntr_flags = MONITOR_FLAG_CONTROL | + sdata->u.mntr.flags = MONITOR_FLAG_CONTROL | MONITOR_FLAG_OTHER_BSS; break; case NL80211_IFTYPE_WDS: diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 9dce3b157908..708c3b1e49a1 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -567,7 +567,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, if (sdata->vif.type != NL80211_IFTYPE_MONITOR) continue; - if (sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) + if (sdata->u.mntr.flags & MONITOR_FLAG_COOK_FRAMES) continue; if (!ieee80211_sdata_running(sdata)) @@ -3147,7 +3147,7 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx, continue; if (sdata->vif.type != NL80211_IFTYPE_MONITOR || - !(sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES)) + !(sdata->u.mntr.flags & MONITOR_FLAG_COOK_FRAMES)) continue; if (prev_dev) { diff --git a/net/mac80211/status.c b/net/mac80211/status.c index a2a68269675d..fabd9ff710d9 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -709,7 +709,7 @@ void ieee80211_tx_monitor(struct ieee80211_local *local, struct sk_buff *skb, if (!ieee80211_sdata_running(sdata)) continue; - if ((sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) && + if ((sdata->u.mntr.flags & MONITOR_FLAG_COOK_FRAMES) && !send_to_cooked) continue; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 1d0746dfea57..efc38e7b90b9 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1643,7 +1643,7 @@ static bool __ieee80211_tx(struct ieee80211_local *local, switch (sdata->vif.type) { case NL80211_IFTYPE_MONITOR: - if (sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE) { + if (sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) { vif = &sdata->vif; break; } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 42bf0b6685e8..e777c2a6568f 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -598,7 +598,7 @@ static void __iterate_interfaces(struct ieee80211_local *local, list_for_each_entry_rcu(sdata, &local->interfaces, list) { switch (sdata->vif.type) { case NL80211_IFTYPE_MONITOR: - if (!(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE)) + if (!(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE)) continue; break; case NL80211_IFTYPE_AP_VLAN: From e4819013840bbad025ed6da660c1e8b3e9e8430a Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Wed, 31 Aug 2016 00:35:07 +0530 Subject: [PATCH 0927/1715] cfg80211: Remove deprecated create_singlethread_workqueue The workqueue "cfg80211_wq" is involved in cleanup, scan and event related works. It queues multiple work items &rdev->event_work, &rdev->dfs_update_channels_wk, &wiphy_to_rdev(request->wiphy)->scan_done_wk, &wiphy_to_rdev(wiphy)->sched_scan_results_wk, which require strict execution ordering. Hence, an ordered dedicated workqueue has been used. Since it's a wireless driver, WQ_MEM_RECLAIM has been set to ensure forward progress under memory pressure. Signed-off-by: Bhaktipriya Shridhar Acked-by: Tejun Heo Signed-off-by: Johannes Berg --- net/wireless/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/core.c b/net/wireless/core.c index 2029b49a1df3..4911cd997b9a 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1252,7 +1252,7 @@ static int __init cfg80211_init(void) if (err) goto out_fail_reg; - cfg80211_wq = create_singlethread_workqueue("cfg80211"); + cfg80211_wq = alloc_ordered_workqueue("cfg80211", WQ_MEM_RECLAIM); if (!cfg80211_wq) { err = -ENOMEM; goto out_fail_wq; From 480dd46b9d6812e5fb7172c305ee0f1154c26eed Mon Sep 17 00:00:00 2001 From: Maxim Altshul Date: Mon, 22 Aug 2016 17:14:04 +0300 Subject: [PATCH 0928/1715] mac80211: RX BA support for sta max_rx_aggregation_subframes The ability to change the max_rx_aggregation frames is useful in cases of IOP. There exist some devices (latest mobile phones and some AP's) that tend to not respect a BA sessions maximum size (in Kbps). These devices won't respect the AMPDU size that was negotiated during association (even though they do respect the maximal number of packets). This violation is characterized by a valid number of packets in a single AMPDU. Even so, the total size will exceed the size negotiated during association. Eventually, this will cause some undefined behavior, which in turn causes the hw to drop packets, causing the throughput to plummet. This patch will make the subframe limitation to be held by each station, instead of being held only by hw. Signed-off-by: Maxim Altshul Signed-off-by: Johannes Berg --- include/net/mac80211.h | 4 ++++ net/mac80211/agg-rx.c | 7 +++++-- net/mac80211/sta_info.c | 3 +++ 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index cca510a585c3..a1457ca2a30c 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1735,6 +1735,9 @@ struct ieee80211_sta_rates { * @supp_rates: Bitmap of supported rates (per band) * @ht_cap: HT capabilities of this STA; restricted to our own capabilities * @vht_cap: VHT capabilities of this STA; restricted to our own capabilities + * @max_rx_aggregation_subframes: maximal amount of frames in a single AMPDU + * that this station is allowed to transmit to us. + * Can be modified by driver. * @wme: indicates whether the STA supports QoS/WME (if local devices does, * otherwise always false) * @drv_priv: data area for driver use, will always be aligned to @@ -1775,6 +1778,7 @@ struct ieee80211_sta { u16 aid; struct ieee80211_sta_ht_cap ht_cap; struct ieee80211_sta_vht_cap vht_cap; + u8 max_rx_aggregation_subframes; bool wme; u8 uapsd_queues; u8 max_sp; diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index a9aff6079c42..282e99bdb301 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -298,10 +298,13 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta, buf_size = IEEE80211_MAX_AMPDU_BUF; /* make sure the size doesn't exceed the maximum supported by the hw */ - if (buf_size > local->hw.max_rx_aggregation_subframes) - buf_size = local->hw.max_rx_aggregation_subframes; + if (buf_size > sta->sta.max_rx_aggregation_subframes) + buf_size = sta->sta.max_rx_aggregation_subframes; params.buf_size = buf_size; + ht_dbg(sta->sdata, "AddBA Req buf_size=%d for %pM\n", + buf_size, sta->sta.addr); + /* examine state machine */ mutex_lock(&sta->ampdu_mlme.mtx); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 19f14c907d74..5e70fa52e1ff 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -340,6 +340,9 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, memcpy(sta->addr, addr, ETH_ALEN); memcpy(sta->sta.addr, addr, ETH_ALEN); + sta->sta.max_rx_aggregation_subframes = + local->hw.max_rx_aggregation_subframes; + sta->local = local; sta->sdata = sdata; sta->rx_stats.last_rx = jiffies; From 42bd20d99857e69e368d5421ea402127d5835cd3 Mon Sep 17 00:00:00 2001 From: Aviya Erenfeld Date: Mon, 29 Aug 2016 23:25:16 +0300 Subject: [PATCH 0929/1715] mac80211: add support for MU-MIMO air sniffer add support to MU-MIMO air sniffer according groupID: in monitor mode, use a given MU-MIMO groupID to monitor stations that belongs to that group using MU-MIMO. add support for following a station according to its MAC address using VHT MU-MIMO sniffer: the monitors wait until they get an action MU-MIMO notification frame, then parses it in order to find the groupID that corresponds to the given MAC address and monitors packets destined to that groupID using VHT MU-MIMO. Signed-off-by: Aviya Erenfeld Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 23 ++++++++++++++++++++++- net/mac80211/driver-ops.h | 3 ++- net/mac80211/ieee80211_i.h | 3 ++- net/mac80211/iface.c | 5 +++++ net/mac80211/rx.c | 20 ++++++++++++++++++++ 5 files changed, 51 insertions(+), 3 deletions(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index f2c8cd22d317..5d4afead804e 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -73,8 +73,29 @@ static int ieee80211_change_iface(struct wiphy *wiphy, sdata->u.mgd.use_4addr = params->use_4addr; } - if (sdata->vif.type == NL80211_IFTYPE_MONITOR && flags) { + if (sdata->vif.type == NL80211_IFTYPE_MONITOR) { struct ieee80211_local *local = sdata->local; + struct ieee80211_sub_if_data *monitor_sdata; + u32 mu_mntr_cap_flag = NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER; + + monitor_sdata = rtnl_dereference(local->monitor_sdata); + if (monitor_sdata && + wiphy_ext_feature_isset(wiphy, mu_mntr_cap_flag)) { + memcpy(monitor_sdata->vif.bss_conf.mu_group.membership, + params->vht_mumimo_groups, WLAN_MEMBERSHIP_LEN); + memcpy(monitor_sdata->vif.bss_conf.mu_group.position, + params->vht_mumimo_groups + WLAN_MEMBERSHIP_LEN, + WLAN_USER_POSITION_LEN); + monitor_sdata->vif.mu_mimo_owner = true; + ieee80211_bss_info_change_notify(monitor_sdata, + BSS_CHANGED_MU_GROUPS); + + ether_addr_copy(monitor_sdata->u.mntr.mu_follow_addr, + params->macaddr); + } + + if (!flags) + return 0; if (ieee80211_sdata_running(sdata)) { u32 mask = MONITOR_FLAG_COOK_FRAMES | diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 42a41ae405ba..c39f93b48791 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -162,7 +162,8 @@ static inline void drv_bss_info_changed(struct ieee80211_local *local, return; if (WARN_ON_ONCE(sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE || - sdata->vif.type == NL80211_IFTYPE_MONITOR)) + (sdata->vif.type == NL80211_IFTYPE_MONITOR && + !sdata->vif.mu_mimo_owner))) return; if (!check_sdata_in_driver(sdata)) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 9211cce10d3e..75761686a98b 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -3,7 +3,7 @@ * Copyright 2005, Devicescape Software, Inc. * Copyright 2006-2007 Jiri Benc * Copyright 2007-2010 Johannes Berg - * Copyright 2013-2014 Intel Mobile Communications GmbH + * Copyright 2013-2015 Intel Mobile Communications GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -826,6 +826,7 @@ struct txq_info { struct ieee80211_if_mntr { u32 flags; + u8 mu_follow_addr[ETH_ALEN] __aligned(2); }; struct ieee80211_sub_if_data { diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index c8509d95e09d..b0abddc714ef 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -43,6 +43,8 @@ * by either the RTNL, the iflist_mtx or RCU. */ +static void ieee80211_iface_work(struct work_struct *work); + bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata) { struct ieee80211_chanctx_conf *chanctx_conf; @@ -448,6 +450,9 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local) return ret; } + skb_queue_head_init(&sdata->skb_queue); + INIT_WORK(&sdata->work, ieee80211_iface_work); + return 0; } diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 708c3b1e49a1..6a265aa73a46 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -485,6 +485,9 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, struct net_device *prev_dev = NULL; int present_fcs_len = 0; unsigned int rtap_vendor_space = 0; + struct ieee80211_mgmt *mgmt; + struct ieee80211_sub_if_data *monitor_sdata = + rcu_dereference(local->monitor_sdata); if (unlikely(status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA)) { struct ieee80211_vendor_radiotap *rtap = (void *)origskb->data; @@ -585,6 +588,23 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, ieee80211_rx_stats(sdata->dev, skb->len); } + mgmt = (void *)skb->data; + if (monitor_sdata && + skb->len >= IEEE80211_MIN_ACTION_SIZE + 1 + VHT_MUMIMO_GROUPS_DATA_LEN && + ieee80211_is_action(mgmt->frame_control) && + mgmt->u.action.category == WLAN_CATEGORY_VHT && + mgmt->u.action.u.vht_group_notif.action_code == WLAN_VHT_ACTION_GROUPID_MGMT && + is_valid_ether_addr(monitor_sdata->u.mntr.mu_follow_addr) && + ether_addr_equal(mgmt->da, monitor_sdata->u.mntr.mu_follow_addr)) { + struct sk_buff *mu_skb = skb_copy(skb, GFP_ATOMIC); + + if (mu_skb) { + mu_skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME; + skb_queue_tail(&monitor_sdata->skb_queue, mu_skb); + ieee80211_queue_work(&local->hw, &monitor_sdata->work); + } + } + if (prev_dev) { skb->dev = prev_dev; netif_receive_skb(skb); From 99ee7cae3bf3ce04e90d7b193d9f4f59a7044d91 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Aug 2016 23:25:17 +0300 Subject: [PATCH 0930/1715] mac80211: add support for radiotap timestamp field Use the existing device timestamp from the RX status information to add support for the new radiotap timestamp field. Currently only 32-bit counters are supported, but we also add the radiotap mactime where applicable. This new field allows more flexibility in where the timestamp is taken etc. The non-timestamp data in the field is taken from a new field in the hw struct. Signed-off-by: Johannes Berg --- include/net/ieee80211_radiotap.h | 21 +++++++++++++++++++++ include/net/mac80211.h | 12 ++++++++++++ net/mac80211/main.c | 3 +++ net/mac80211/rx.c | 30 ++++++++++++++++++++++++++++++ 4 files changed, 66 insertions(+) diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h index b0fd9476c538..ba07b9d8ed63 100644 --- a/include/net/ieee80211_radiotap.h +++ b/include/net/ieee80211_radiotap.h @@ -190,6 +190,10 @@ struct ieee80211_radiotap_header { * IEEE80211_RADIOTAP_VHT u16, u8, u8, u8[4], u8, u8, u16 * * Contains VHT information about this frame. + * + * IEEE80211_RADIOTAP_TIMESTAMP u64, u16, u8, u8 variable + * + * Contains timestamp information for this frame. */ enum ieee80211_radiotap_type { IEEE80211_RADIOTAP_TSFT = 0, @@ -214,6 +218,7 @@ enum ieee80211_radiotap_type { IEEE80211_RADIOTAP_MCS = 19, IEEE80211_RADIOTAP_AMPDU_STATUS = 20, IEEE80211_RADIOTAP_VHT = 21, + IEEE80211_RADIOTAP_TIMESTAMP = 22, /* valid in every it_present bitmap, even vendor namespaces */ IEEE80211_RADIOTAP_RADIOTAP_NAMESPACE = 29, @@ -321,6 +326,22 @@ enum ieee80211_radiotap_type { #define IEEE80211_RADIOTAP_CODING_LDPC_USER2 0x04 #define IEEE80211_RADIOTAP_CODING_LDPC_USER3 0x08 +/* For IEEE80211_RADIOTAP_TIMESTAMP */ +#define IEEE80211_RADIOTAP_TIMESTAMP_UNIT_MASK 0x000F +#define IEEE80211_RADIOTAP_TIMESTAMP_UNIT_MS 0x0000 +#define IEEE80211_RADIOTAP_TIMESTAMP_UNIT_US 0x0001 +#define IEEE80211_RADIOTAP_TIMESTAMP_UNIT_NS 0x0003 +#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_MASK 0x00F0 +#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_BEGIN_MDPU 0x0000 +#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_EO_MPDU 0x0010 +#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_EO_PPDU 0x0020 +#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_PLCP_SIG_ACQ 0x0030 +#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_UNKNOWN 0x00F0 + +#define IEEE80211_RADIOTAP_TIMESTAMP_FLAG_64BIT 0x00 +#define IEEE80211_RADIOTAP_TIMESTAMP_FLAG_32BIT 0x01 +#define IEEE80211_RADIOTAP_TIMESTAMP_FLAG_ACCURACY 0x02 + /* helpers */ static inline int ieee80211_get_radiotap_len(unsigned char *data) { diff --git a/include/net/mac80211.h b/include/net/mac80211.h index a1457ca2a30c..08bac23c8de1 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2145,6 +2145,14 @@ enum ieee80211_hw_flags { * the default is _GI | _BANDWIDTH. * Use the %IEEE80211_RADIOTAP_VHT_KNOWN_* values. * + * @radiotap_timestamp: Information for the radiotap timestamp field; if the + * 'units_pos' member is set to a non-negative value it must be set to + * a combination of a IEEE80211_RADIOTAP_TIMESTAMP_UNIT_* and a + * IEEE80211_RADIOTAP_TIMESTAMP_SPOS_* value, and then the timestamp + * field will be added and populated from the &struct ieee80211_rx_status + * device_timestamp. If the 'accuracy' member is non-negative, it's put + * into the accuracy radiotap field and the accuracy known flag is set. + * * @netdev_features: netdev features to be set in each netdev created * from this HW. Note that not all features are usable with mac80211, * other features will be rejected during HW registration. @@ -2188,6 +2196,10 @@ struct ieee80211_hw { u8 offchannel_tx_hw_queue; u8 radiotap_mcs_details; u16 radiotap_vht_details; + struct { + int units_pos; + s16 accuracy; + } radiotap_timestamp; netdev_features_t netdev_features; u8 uapsd_queues; u8 uapsd_max_sp_len; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index d00ea9b13f49..ac053a9df36d 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -660,6 +660,9 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, ieee80211_roc_setup(local); + local->hw.radiotap_timestamp.units_pos = -1; + local->hw.radiotap_timestamp.accuracy = -1; + return &local->hw; err_free: wiphy_free(wiphy); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 6a265aa73a46..284f0f25e22e 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -180,6 +180,11 @@ ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local, len += 12; } + if (local->hw.radiotap_timestamp.units_pos >= 0) { + len = ALIGN(len, 8); + len += 12; + } + if (status->chains) { /* antenna and antenna signal fields */ len += 2 * hweight8(status->chains); @@ -447,6 +452,31 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, pos += 2; } + if (local->hw.radiotap_timestamp.units_pos >= 0) { + u16 accuracy = 0; + u8 flags = IEEE80211_RADIOTAP_TIMESTAMP_FLAG_32BIT; + + rthdr->it_present |= + cpu_to_le32(1 << IEEE80211_RADIOTAP_TIMESTAMP); + + /* ensure 8 byte alignment */ + while ((pos - (u8 *)rthdr) & 7) + pos++; + + put_unaligned_le64(status->device_timestamp, pos); + pos += sizeof(u64); + + if (local->hw.radiotap_timestamp.accuracy >= 0) { + accuracy = local->hw.radiotap_timestamp.accuracy; + flags |= IEEE80211_RADIOTAP_TIMESTAMP_FLAG_ACCURACY; + } + put_unaligned_le16(accuracy, pos); + pos += sizeof(u16); + + *pos++ = local->hw.radiotap_timestamp.units_pos; + *pos++ = flags; + } + for_each_set_bit(chain, &chains, IEEE80211_MAX_CHAINS) { *pos++ = status->chain_signal[chain]; *pos++ = chain; From bfe40fa395ddc41e45310a4426574703a05e1177 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Aug 2016 23:25:18 +0300 Subject: [PATCH 0931/1715] mac80211: send delBA on unexpected BlockAck data frames When we receive data frames with ACK policy BlockAck, send delBA as requested by the 802.11 spec. Since this would be happening for every frame inside an A-MPDU if it's really received outside a session, limit it to a single attempt. Signed-off-by: Johannes Berg --- net/mac80211/agg-rx.c | 4 +++- net/mac80211/rx.c | 9 ++++++++- net/mac80211/sta_info.h | 3 +++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 282e99bdb301..a5d69dfc8e03 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -409,8 +409,10 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta, } end: - if (status == WLAN_STATUS_SUCCESS) + if (status == WLAN_STATUS_SUCCESS) { __set_bit(tid, sta->ampdu_mlme.agg_session_valid); + __clear_bit(tid, sta->ampdu_mlme.unexpected_agg); + } mutex_unlock(&sta->ampdu_mlme.mtx); end_no_lock: diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 284f0f25e22e..ad636c930f84 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1122,8 +1122,15 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx, tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK; tid_agg_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]); - if (!tid_agg_rx) + if (!tid_agg_rx) { + if (ack_policy == IEEE80211_QOS_CTL_ACK_POLICY_BLOCKACK && + !test_bit(tid, rx->sta->ampdu_mlme.agg_session_valid) && + !test_and_set_bit(tid, rx->sta->ampdu_mlme.unexpected_agg)) + ieee80211_send_delba(rx->sdata, rx->sta->sta.addr, tid, + WLAN_BACK_RECIPIENT, + WLAN_REASON_QSTA_REQUIRE_SETUP); goto dont_reorder; + } /* qos null data frames are excluded */ if (unlikely(hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_NULLFUNC))) diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 0556be3e3628..530231b73278 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -230,6 +230,8 @@ struct tid_ampdu_rx { * @tid_rx_stop_requested: bitmap indicating which BA sessions per TID the * driver requested to close until the work for it runs * @agg_session_valid: bitmap indicating which TID has a rx BA session open on + * @unexpected_agg: bitmap indicating which TID already sent a delBA due to + * unexpected aggregation related frames outside a session * @work: work struct for starting/stopping aggregation * @tid_tx: aggregation info for Tx per TID * @tid_start_tx: sessions where start was requested @@ -244,6 +246,7 @@ struct sta_ampdu_mlme { unsigned long tid_rx_timer_expired[BITS_TO_LONGS(IEEE80211_NUM_TIDS)]; unsigned long tid_rx_stop_requested[BITS_TO_LONGS(IEEE80211_NUM_TIDS)]; unsigned long agg_session_valid[BITS_TO_LONGS(IEEE80211_NUM_TIDS)]; + unsigned long unexpected_agg[BITS_TO_LONGS(IEEE80211_NUM_TIDS)]; /* tx */ struct work_struct work; struct tid_ampdu_tx __rcu *tid_tx[IEEE80211_NUM_TIDS]; From 53f249747d5ec6434415a6895b5690bf4f1d5d7d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Aug 2016 23:25:19 +0300 Subject: [PATCH 0932/1715] mac80211: send delBA on unexpected BlockAck Request If we don't have a BA session, send delBA, as requested by the IEEE 802.11 spec. Apply the same limit of sending such a delBA only once as in the previous patch. Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index ad636c930f84..e796060b7c5e 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2592,6 +2592,12 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames) tid = le16_to_cpu(bar_data.control) >> 12; + if (!test_bit(tid, rx->sta->ampdu_mlme.agg_session_valid) && + !test_and_set_bit(tid, rx->sta->ampdu_mlme.unexpected_agg)) + ieee80211_send_delba(rx->sdata, rx->sta->sta.addr, tid, + WLAN_BACK_RECIPIENT, + WLAN_REASON_QSTA_REQUIRE_SETUP); + tid_agg_rx = rcu_dereference(rx->sta->ampdu_mlme.tid_rx[tid]); if (!tid_agg_rx) return RX_DROP_MONITOR; From 70ca767ea1b2748f45e96192400e515dddbe517c Mon Sep 17 00:00:00 2001 From: Laura Garcia Liebana Date: Tue, 6 Sep 2016 08:44:19 +0200 Subject: [PATCH 0933/1715] netfilter: nft_hash: Add hash offset value Add support to pass through an offset to the hash value. With this feature, the sysadmin is able to generate a hash with a given offset value. Example: meta mark set jhash ip saddr mod 2 seed 0xabcd offset 100 This option generates marks according to the source address from 100 to 101. Signed-off-by: Laura Garcia Liebana --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ net/netfilter/nft_hash.c | 17 +++++++++++++---- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 24161e25576d..8c653bbd1ead 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -731,6 +731,7 @@ enum nft_meta_keys { * @NFTA_HASH_LEN: source data length (NLA_U32) * @NFTA_HASH_MODULUS: modulus value (NLA_U32) * @NFTA_HASH_SEED: seed value (NLA_U32) + * @NFTA_HASH_OFFSET: add this offset value to hash result (NLA_U32) */ enum nft_hash_attributes { NFTA_HASH_UNSPEC, @@ -739,6 +740,7 @@ enum nft_hash_attributes { NFTA_HASH_LEN, NFTA_HASH_MODULUS, NFTA_HASH_SEED, + NFTA_HASH_OFFSET, __NFTA_HASH_MAX, }; #define NFTA_HASH_MAX (__NFTA_HASH_MAX - 1) diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 764251d31e46..bd12f7a801c2 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -23,6 +23,7 @@ struct nft_hash { u8 len; u32 modulus; u32 seed; + u32 offset; }; static void nft_hash_eval(const struct nft_expr *expr, @@ -31,10 +32,10 @@ static void nft_hash_eval(const struct nft_expr *expr, { struct nft_hash *priv = nft_expr_priv(expr); const void *data = ®s->data[priv->sreg]; + u32 h; - regs->data[priv->dreg] = - reciprocal_scale(jhash(data, priv->len, priv->seed), - priv->modulus); + h = reciprocal_scale(jhash(data, priv->len, priv->seed), priv->modulus); + regs->data[priv->dreg] = h + priv->offset; } static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = { @@ -59,6 +60,9 @@ static int nft_hash_init(const struct nft_ctx *ctx, !tb[NFTA_HASH_MODULUS]) return -EINVAL; + if (tb[NFTA_HASH_OFFSET]) + priv->offset = ntohl(nla_get_be32(tb[NFTA_HASH_OFFSET])); + priv->sreg = nft_parse_register(tb[NFTA_HASH_SREG]); priv->dreg = nft_parse_register(tb[NFTA_HASH_DREG]); @@ -72,6 +76,9 @@ static int nft_hash_init(const struct nft_ctx *ctx, if (priv->modulus <= 1) return -ERANGE; + if (priv->offset + priv->modulus - 1 < U32_MAX) + return -EOVERFLOW; + priv->seed = ntohl(nla_get_be32(tb[NFTA_HASH_SEED])); return nft_validate_register_load(priv->sreg, len) && @@ -94,7 +101,9 @@ static int nft_hash_dump(struct sk_buff *skb, goto nla_put_failure; if (nla_put_be32(skb, NFTA_HASH_SEED, htonl(priv->seed))) goto nla_put_failure; - + if (priv->offset != 0) + if (nla_put_be32(skb, NFTA_HASH_OFFSET, htonl(priv->offset))) + goto nla_put_failure; return 0; nla_put_failure: From dbd2be0646e3239022630c426cbceefa15714bca Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 7 Sep 2016 12:22:18 +0200 Subject: [PATCH 0934/1715] netfilter: nft_dynset: allow to invert match criteria The dynset expression matches if we can fit a new entry into the set. If there is no room for it, then it breaks the rule evaluation. This patch introduces the inversion flag so you can add rules to explicitly drop packets that don't fit into the set. For example: # nft filter input flow table xyz size 4 { ip saddr timeout 120s counter } overflow drop This is useful to provide a replacement for connlimit. For the rule above, every new entry uses the IPv4 address as key in the set, this entry gets a timeout of 120 seconds that gets refresh on every packet seen. If we get new flow and our set already contains 4 entries already, then this packet is dropped. You can already express this in positive logic, assuming default policy to drop: # nft filter input flow table xyz size 4 { ip saddr timeout 10s counter } accept Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 6 ++++++ net/netfilter/nft_dynset.c | 20 +++++++++++++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 8c653bbd1ead..bc0eb6a1066d 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -575,6 +575,10 @@ enum nft_dynset_ops { NFT_DYNSET_OP_UPDATE, }; +enum nft_dynset_flags { + NFT_DYNSET_F_INV = (1 << 0), +}; + /** * enum nft_dynset_attributes - dynset expression attributes * @@ -585,6 +589,7 @@ enum nft_dynset_ops { * @NFTA_DYNSET_SREG_DATA: source register of the data (NLA_U32) * @NFTA_DYNSET_TIMEOUT: timeout value for the new element (NLA_U64) * @NFTA_DYNSET_EXPR: expression (NLA_NESTED: nft_expr_attributes) + * @NFTA_DYNSET_FLAGS: flags (NLA_U32) */ enum nft_dynset_attributes { NFTA_DYNSET_UNSPEC, @@ -596,6 +601,7 @@ enum nft_dynset_attributes { NFTA_DYNSET_TIMEOUT, NFTA_DYNSET_EXPR, NFTA_DYNSET_PAD, + NFTA_DYNSET_FLAGS, __NFTA_DYNSET_MAX, }; #define NFTA_DYNSET_MAX (__NFTA_DYNSET_MAX - 1) diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 0af26699bf04..e3b83c31da2e 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -22,6 +22,7 @@ struct nft_dynset { enum nft_dynset_ops op:8; enum nft_registers sreg_key:8; enum nft_registers sreg_data:8; + bool invert; u64 timeout; struct nft_expr *expr; struct nft_set_binding binding; @@ -82,10 +83,14 @@ static void nft_dynset_eval(const struct nft_expr *expr, if (sexpr != NULL) sexpr->ops->eval(sexpr, regs, pkt); + + if (priv->invert) + regs->verdict.code = NFT_BREAK; return; } out: - regs->verdict.code = NFT_BREAK; + if (!priv->invert) + regs->verdict.code = NFT_BREAK; } static const struct nla_policy nft_dynset_policy[NFTA_DYNSET_MAX + 1] = { @@ -96,6 +101,7 @@ static const struct nla_policy nft_dynset_policy[NFTA_DYNSET_MAX + 1] = { [NFTA_DYNSET_SREG_DATA] = { .type = NLA_U32 }, [NFTA_DYNSET_TIMEOUT] = { .type = NLA_U64 }, [NFTA_DYNSET_EXPR] = { .type = NLA_NESTED }, + [NFTA_DYNSET_FLAGS] = { .type = NLA_U32 }, }; static int nft_dynset_init(const struct nft_ctx *ctx, @@ -113,6 +119,15 @@ static int nft_dynset_init(const struct nft_ctx *ctx, tb[NFTA_DYNSET_SREG_KEY] == NULL) return -EINVAL; + if (tb[NFTA_DYNSET_FLAGS]) { + u32 flags = ntohl(nla_get_be32(tb[NFTA_DYNSET_FLAGS])); + + if (flags & ~NFT_DYNSET_F_INV) + return -EINVAL; + if (flags & NFT_DYNSET_F_INV) + priv->invert = true; + } + set = nf_tables_set_lookup(ctx->table, tb[NFTA_DYNSET_SET_NAME], genmask); if (IS_ERR(set)) { @@ -220,6 +235,7 @@ static void nft_dynset_destroy(const struct nft_ctx *ctx, static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_dynset *priv = nft_expr_priv(expr); + u32 flags = priv->invert ? NFT_DYNSET_F_INV : 0; if (nft_dump_register(skb, NFTA_DYNSET_SREG_KEY, priv->sreg_key)) goto nla_put_failure; @@ -235,6 +251,8 @@ static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr) goto nla_put_failure; if (priv->expr && nft_expr_dump(skb, NFTA_DYNSET_EXPR, priv->expr)) goto nla_put_failure; + if (nla_put_be32(skb, NFTA_DYNSET_FLAGS, htonl(flags))) + goto nla_put_failure; return 0; nla_put_failure: From beac5afa2d78605b70f40cf5ab5601ab10659c7f Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 9 Sep 2016 12:42:49 +0200 Subject: [PATCH 0935/1715] netfilter: nf_tables: ensure proper initialization of nft_pktinfo fields This patch introduces nft_set_pktinfo_unspec() that ensures proper initialization all of pktinfo fields for non-IP traffic. This is used by the bridge, netdev and arp families. This new function relies on nft_set_pktinfo_proto_unspec() to set a new tprot_set field that indicates if transport protocol information is available. Remain fields are zeroed. The meta expression has been also updated to check to tprot_set in first place given that zero is a valid tprot value. Even a handcrafted packet may come with the IPPROTO_RAW (255) protocol number so we can't rely on this value as tprot unset. Reported-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 18 ++++++++++++++++++ include/net/netfilter/nf_tables_ipv4.h | 1 + include/net/netfilter/nf_tables_ipv6.h | 1 + net/bridge/netfilter/nf_tables_bridge.c | 6 +++--- net/ipv4/netfilter/nf_tables_arp.c | 2 +- net/netfilter/nf_tables_netdev.c | 4 +++- net/netfilter/nft_meta.c | 2 ++ 7 files changed, 29 insertions(+), 5 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 8972468bc94b..a7a7cebc8d07 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -19,6 +19,7 @@ struct nft_pktinfo { const struct net_device *out; u8 pf; u8 hook; + bool tprot_set; u8 tprot; /* for x_tables compatibility */ struct xt_action_param xt; @@ -36,6 +37,23 @@ static inline void nft_set_pktinfo(struct nft_pktinfo *pkt, pkt->pf = pkt->xt.family = state->pf; } +static inline void nft_set_pktinfo_proto_unspec(struct nft_pktinfo *pkt, + struct sk_buff *skb) +{ + pkt->tprot_set = false; + pkt->tprot = 0; + pkt->xt.thoff = 0; + pkt->xt.fragoff = 0; +} + +static inline void nft_set_pktinfo_unspec(struct nft_pktinfo *pkt, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + nft_set_pktinfo(pkt, skb, state); + nft_set_pktinfo_proto_unspec(pkt, skb); +} + /** * struct nft_verdict - nf_tables verdict * diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h index ca6ef6bf775e..af952f7843ee 100644 --- a/include/net/netfilter/nf_tables_ipv4.h +++ b/include/net/netfilter/nf_tables_ipv4.h @@ -14,6 +14,7 @@ nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt, nft_set_pktinfo(pkt, skb, state); ip = ip_hdr(pkt->skb); + pkt->tprot_set = true; pkt->tprot = ip->protocol; pkt->xt.thoff = ip_hdrlen(pkt->skb); pkt->xt.fragoff = ntohs(ip->frag_off) & IP_OFFSET; diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h index 8ad39a6a5fe1..6aeee47b1b5e 100644 --- a/include/net/netfilter/nf_tables_ipv6.h +++ b/include/net/netfilter/nf_tables_ipv6.h @@ -19,6 +19,7 @@ nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt, if (protohdr < 0) return -1; + pkt->tprot_set = true; pkt->tprot = protohdr; pkt->xt.thoff = thoff; pkt->xt.fragoff = frag_off; diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c index a78c4e2826e5..29899887163e 100644 --- a/net/bridge/netfilter/nf_tables_bridge.c +++ b/net/bridge/netfilter/nf_tables_bridge.c @@ -71,7 +71,7 @@ static inline void nft_bridge_set_pktinfo_ipv4(struct nft_pktinfo *pkt, if (nft_bridge_iphdr_validate(skb)) nft_set_pktinfo_ipv4(pkt, skb, state); else - nft_set_pktinfo(pkt, skb, state); + nft_set_pktinfo_unspec(pkt, skb, state); } static inline void nft_bridge_set_pktinfo_ipv6(struct nft_pktinfo *pkt, @@ -83,7 +83,7 @@ static inline void nft_bridge_set_pktinfo_ipv6(struct nft_pktinfo *pkt, nft_set_pktinfo_ipv6(pkt, skb, state) == 0) return; #endif - nft_set_pktinfo(pkt, skb, state); + nft_set_pktinfo_unspec(pkt, skb, state); } static unsigned int @@ -101,7 +101,7 @@ nft_do_chain_bridge(void *priv, nft_bridge_set_pktinfo_ipv6(&pkt, skb, state); break; default: - nft_set_pktinfo(&pkt, skb, state); + nft_set_pktinfo_unspec(&pkt, skb, state); break; } diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c index cd84d4295a20..058c034be376 100644 --- a/net/ipv4/netfilter/nf_tables_arp.c +++ b/net/ipv4/netfilter/nf_tables_arp.c @@ -21,7 +21,7 @@ nft_do_chain_arp(void *priv, { struct nft_pktinfo pkt; - nft_set_pktinfo(&pkt, skb, state); + nft_set_pktinfo_unspec(&pkt, skb, state); return nft_do_chain(&pkt, priv); } diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c index 5eefe4a355c6..8de502b0c37b 100644 --- a/net/netfilter/nf_tables_netdev.c +++ b/net/netfilter/nf_tables_netdev.c @@ -41,6 +41,7 @@ nft_netdev_set_pktinfo_ipv4(struct nft_pktinfo *pkt, else if (len < thoff) return; + pkt->tprot_set = true; pkt->tprot = iph->protocol; pkt->xt.thoff = thoff; pkt->xt.fragoff = ntohs(iph->frag_off) & IP_OFFSET; @@ -74,6 +75,7 @@ __nft_netdev_set_pktinfo_ipv6(struct nft_pktinfo *pkt, if (protohdr < 0) return; + pkt->tprot_set = true; pkt->tprot = protohdr; pkt->xt.thoff = thoff; pkt->xt.fragoff = frag_off; @@ -102,7 +104,7 @@ nft_do_chain_netdev(void *priv, struct sk_buff *skb, nft_netdev_set_pktinfo_ipv6(&pkt, skb, state); break; default: - nft_set_pktinfo(&pkt, skb, state); + nft_set_pktinfo_unspec(&pkt, skb, state); break; } diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 2863f3493038..14264edf2d77 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -52,6 +52,8 @@ void nft_meta_get_eval(const struct nft_expr *expr, *dest = pkt->pf; break; case NFT_META_L4PROTO: + if (!pkt->tprot_set) + goto err; *dest = pkt->tprot; break; case NFT_META_PRIORITY: From 8df9e32e7ed2978f90cce780ce6a27513044158a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 9 Sep 2016 12:42:50 +0200 Subject: [PATCH 0936/1715] netfilter: nf_tables_ipv6: setup pktinfo transport field on failure to parse Make sure the pktinfo protocol fields are initialized if this fails to parse the transport header. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_ipv6.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h index 6aeee47b1b5e..1e0ffd5aea47 100644 --- a/include/net/netfilter/nf_tables_ipv6.h +++ b/include/net/netfilter/nf_tables_ipv6.h @@ -15,9 +15,10 @@ nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt, nft_set_pktinfo(pkt, skb, state); protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL); - /* If malformed, drop it */ - if (protohdr < 0) + if (protohdr < 0) { + nft_set_pktinfo_proto_unspec(pkt, skb); return -1; + } pkt->tprot_set = true; pkt->tprot = protohdr; From ddc8b6027ad08d145a6d7a6a6abc00e43f315bd1 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 9 Sep 2016 12:42:51 +0200 Subject: [PATCH 0937/1715] netfilter: introduce nft_set_pktinfo_{ipv4, ipv6}_validate() These functions are extracted from the netdev family, they initialize the pktinfo structure and validate that the IPv4 and IPv6 headers are well-formed given that these functions are called from a path where layer 3 sanitization did not happen yet. These functions are placed in include/net/netfilter/nf_tables_ipv{4,6}.h so they can be reused by a follow up patch to use them from the bridge family too. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_ipv4.h | 42 ++++++++++++++ include/net/netfilter/nf_tables_ipv6.h | 49 ++++++++++++++++ net/netfilter/nf_tables_netdev.c | 79 +------------------------- 3 files changed, 93 insertions(+), 77 deletions(-) diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h index af952f7843ee..968f00b82fb5 100644 --- a/include/net/netfilter/nf_tables_ipv4.h +++ b/include/net/netfilter/nf_tables_ipv4.h @@ -20,6 +20,48 @@ nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt, pkt->xt.fragoff = ntohs(ip->frag_off) & IP_OFFSET; } +static inline int +__nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct iphdr *iph, _iph; + u32 len, thoff; + + iph = skb_header_pointer(skb, skb_network_offset(skb), sizeof(*iph), + &_iph); + if (!iph) + return -1; + + iph = ip_hdr(skb); + if (iph->ihl < 5 || iph->version != 4) + return -1; + + len = ntohs(iph->tot_len); + thoff = iph->ihl * 4; + if (skb->len < len) + return -1; + else if (len < thoff) + return -1; + + pkt->tprot_set = true; + pkt->tprot = iph->protocol; + pkt->xt.thoff = thoff; + pkt->xt.fragoff = ntohs(iph->frag_off) & IP_OFFSET; + + return 0; +} + +static inline void +nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + nft_set_pktinfo(pkt, skb, state); + if (__nft_set_pktinfo_ipv4_validate(pkt, skb, state) < 0) + nft_set_pktinfo_proto_unspec(pkt, skb); +} + extern struct nft_af_info nft_af_ipv4; #endif diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h index 1e0ffd5aea47..39b7b717b540 100644 --- a/include/net/netfilter/nf_tables_ipv6.h +++ b/include/net/netfilter/nf_tables_ipv6.h @@ -28,6 +28,55 @@ nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt, return 0; } +static inline int +__nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ +#if IS_ENABLED(CONFIG_IPV6) + struct ipv6hdr *ip6h, _ip6h; + unsigned int thoff = 0; + unsigned short frag_off; + int protohdr; + u32 pkt_len; + + ip6h = skb_header_pointer(skb, skb_network_offset(skb), sizeof(*ip6h), + &_ip6h); + if (!ip6h) + return -1; + + if (ip6h->version != 6) + return -1; + + pkt_len = ntohs(ip6h->payload_len); + if (pkt_len + sizeof(*ip6h) > skb->len) + return -1; + + protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL); + if (protohdr < 0) + return -1; + + pkt->tprot_set = true; + pkt->tprot = protohdr; + pkt->xt.thoff = thoff; + pkt->xt.fragoff = frag_off; + + return 0; +#else + return -1; +#endif +} + +static inline void +nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + nft_set_pktinfo(pkt, skb, state); + if (__nft_set_pktinfo_ipv6_validate(pkt, skb, state) < 0) + nft_set_pktinfo_proto_unspec(pkt, skb); +} + extern struct nft_af_info nft_af_ipv6; #endif diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c index 8de502b0c37b..3e5475a833a5 100644 --- a/net/netfilter/nf_tables_netdev.c +++ b/net/netfilter/nf_tables_netdev.c @@ -15,81 +15,6 @@ #include #include -static inline void -nft_netdev_set_pktinfo_ipv4(struct nft_pktinfo *pkt, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - struct iphdr *iph, _iph; - u32 len, thoff; - - nft_set_pktinfo(pkt, skb, state); - - iph = skb_header_pointer(skb, skb_network_offset(skb), sizeof(*iph), - &_iph); - if (!iph) - return; - - iph = ip_hdr(skb); - if (iph->ihl < 5 || iph->version != 4) - return; - - len = ntohs(iph->tot_len); - thoff = iph->ihl * 4; - if (skb->len < len) - return; - else if (len < thoff) - return; - - pkt->tprot_set = true; - pkt->tprot = iph->protocol; - pkt->xt.thoff = thoff; - pkt->xt.fragoff = ntohs(iph->frag_off) & IP_OFFSET; -} - -static inline void -__nft_netdev_set_pktinfo_ipv6(struct nft_pktinfo *pkt, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ -#if IS_ENABLED(CONFIG_IPV6) - struct ipv6hdr *ip6h, _ip6h; - unsigned int thoff = 0; - unsigned short frag_off; - int protohdr; - u32 pkt_len; - - ip6h = skb_header_pointer(skb, skb_network_offset(skb), sizeof(*ip6h), - &_ip6h); - if (!ip6h) - return; - - if (ip6h->version != 6) - return; - - pkt_len = ntohs(ip6h->payload_len); - if (pkt_len + sizeof(*ip6h) > skb->len) - return; - - protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL); - if (protohdr < 0) - return; - - pkt->tprot_set = true; - pkt->tprot = protohdr; - pkt->xt.thoff = thoff; - pkt->xt.fragoff = frag_off; -#endif -} - -static inline void nft_netdev_set_pktinfo_ipv6(struct nft_pktinfo *pkt, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - nft_set_pktinfo(pkt, skb, state); - __nft_netdev_set_pktinfo_ipv6(pkt, skb, state); -} - static unsigned int nft_do_chain_netdev(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) @@ -98,10 +23,10 @@ nft_do_chain_netdev(void *priv, struct sk_buff *skb, switch (skb->protocol) { case htons(ETH_P_IP): - nft_netdev_set_pktinfo_ipv4(&pkt, skb, state); + nft_set_pktinfo_ipv4_validate(&pkt, skb, state); break; case htons(ETH_P_IPV6): - nft_netdev_set_pktinfo_ipv6(&pkt, skb, state); + nft_set_pktinfo_ipv6_validate(&pkt, skb, state); break; default: nft_set_pktinfo_unspec(&pkt, skb, state); From 10151d7b03e23afce76a59f717f2616a10ddef86 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 9 Sep 2016 12:42:52 +0200 Subject: [PATCH 0938/1715] netfilter: nf_tables_bridge: use nft_set_pktinfo_ipv{4, 6}_validate Consolidate pktinfo setup and validation by using the new generic functions so we converge to the netdev family codebase. We only need a linear IPv4 and IPv6 header from the reject expression, so move nft_bridge_iphdr_validate() and nft_bridge_ip6hdr_validate() to net/bridge/netfilter/nft_reject_bridge.c. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_bridge.h | 7 --- net/bridge/netfilter/nf_tables_bridge.c | 72 +----------------------- net/bridge/netfilter/nft_reject_bridge.c | 44 ++++++++++++++- 3 files changed, 45 insertions(+), 78 deletions(-) delete mode 100644 include/net/netfilter/nf_tables_bridge.h diff --git a/include/net/netfilter/nf_tables_bridge.h b/include/net/netfilter/nf_tables_bridge.h deleted file mode 100644 index 511fb79f6dad..000000000000 --- a/include/net/netfilter/nf_tables_bridge.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef _NET_NF_TABLES_BRIDGE_H -#define _NET_NF_TABLES_BRIDGE_H - -int nft_bridge_iphdr_validate(struct sk_buff *skb); -int nft_bridge_ip6hdr_validate(struct sk_buff *skb); - -#endif /* _NET_NF_TABLES_BRIDGE_H */ diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c index 29899887163e..06f0f81456a0 100644 --- a/net/bridge/netfilter/nf_tables_bridge.c +++ b/net/bridge/netfilter/nf_tables_bridge.c @@ -13,79 +13,11 @@ #include #include #include -#include #include #include #include #include -int nft_bridge_iphdr_validate(struct sk_buff *skb) -{ - struct iphdr *iph; - u32 len; - - if (!pskb_may_pull(skb, sizeof(struct iphdr))) - return 0; - - iph = ip_hdr(skb); - if (iph->ihl < 5 || iph->version != 4) - return 0; - - len = ntohs(iph->tot_len); - if (skb->len < len) - return 0; - else if (len < (iph->ihl*4)) - return 0; - - if (!pskb_may_pull(skb, iph->ihl*4)) - return 0; - - return 1; -} -EXPORT_SYMBOL_GPL(nft_bridge_iphdr_validate); - -int nft_bridge_ip6hdr_validate(struct sk_buff *skb) -{ - struct ipv6hdr *hdr; - u32 pkt_len; - - if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) - return 0; - - hdr = ipv6_hdr(skb); - if (hdr->version != 6) - return 0; - - pkt_len = ntohs(hdr->payload_len); - if (pkt_len + sizeof(struct ipv6hdr) > skb->len) - return 0; - - return 1; -} -EXPORT_SYMBOL_GPL(nft_bridge_ip6hdr_validate); - -static inline void nft_bridge_set_pktinfo_ipv4(struct nft_pktinfo *pkt, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - if (nft_bridge_iphdr_validate(skb)) - nft_set_pktinfo_ipv4(pkt, skb, state); - else - nft_set_pktinfo_unspec(pkt, skb, state); -} - -static inline void nft_bridge_set_pktinfo_ipv6(struct nft_pktinfo *pkt, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ -#if IS_ENABLED(CONFIG_IPV6) - if (nft_bridge_ip6hdr_validate(skb) && - nft_set_pktinfo_ipv6(pkt, skb, state) == 0) - return; -#endif - nft_set_pktinfo_unspec(pkt, skb, state); -} - static unsigned int nft_do_chain_bridge(void *priv, struct sk_buff *skb, @@ -95,10 +27,10 @@ nft_do_chain_bridge(void *priv, switch (eth_hdr(skb)->h_proto) { case htons(ETH_P_IP): - nft_bridge_set_pktinfo_ipv4(&pkt, skb, state); + nft_set_pktinfo_ipv4_validate(&pkt, skb, state); break; case htons(ETH_P_IPV6): - nft_bridge_set_pktinfo_ipv6(&pkt, skb, state); + nft_set_pktinfo_ipv6_validate(&pkt, skb, state); break; default: nft_set_pktinfo_unspec(&pkt, skb, state); diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index 0b77ffbc27d6..4b3df6b0e3b9 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include @@ -37,6 +36,30 @@ static void nft_reject_br_push_etherhdr(struct sk_buff *oldskb, skb_pull(nskb, ETH_HLEN); } +static int nft_bridge_iphdr_validate(struct sk_buff *skb) +{ + struct iphdr *iph; + u32 len; + + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + return 0; + + iph = ip_hdr(skb); + if (iph->ihl < 5 || iph->version != 4) + return 0; + + len = ntohs(iph->tot_len); + if (skb->len < len) + return 0; + else if (len < (iph->ihl*4)) + return 0; + + if (!pskb_may_pull(skb, iph->ihl*4)) + return 0; + + return 1; +} + /* We cannot use oldskb->dev, it can be either bridge device (NF_BRIDGE INPUT) * or the bridge port (NF_BRIDGE PREROUTING). */ @@ -143,6 +166,25 @@ static void nft_reject_br_send_v4_unreach(struct net *net, br_forward(br_port_get_rcu(dev), nskb, false, true); } +static int nft_bridge_ip6hdr_validate(struct sk_buff *skb) +{ + struct ipv6hdr *hdr; + u32 pkt_len; + + if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) + return 0; + + hdr = ipv6_hdr(skb); + if (hdr->version != 6) + return 0; + + pkt_len = ntohs(hdr->payload_len); + if (pkt_len + sizeof(struct ipv6hdr) > skb->len) + return 0; + + return 1; +} + static void nft_reject_br_send_v6_tcp_reset(struct net *net, struct sk_buff *oldskb, const struct net_device *dev, From 71212c9b04eba76faa4dca26ccd1552d6bb300c1 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 9 Sep 2016 12:42:53 +0200 Subject: [PATCH 0939/1715] netfilter: nf_tables: don't drop IPv6 packets that cannot parse transport This is overly conservative and not flexible at all, so better let them go through and let the filtering policy decide what to do with them. We use skb_header_pointer() all over the place so we would just fail to match when trying to access fields from malformed traffic. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_ipv6.h | 6 ++---- net/ipv6/netfilter/nf_tables_ipv6.c | 4 +--- net/ipv6/netfilter/nft_chain_route_ipv6.c | 4 +--- 3 files changed, 4 insertions(+), 10 deletions(-) diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h index 39b7b717b540..d150b5066201 100644 --- a/include/net/netfilter/nf_tables_ipv6.h +++ b/include/net/netfilter/nf_tables_ipv6.h @@ -4,7 +4,7 @@ #include #include -static inline int +static inline void nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt, struct sk_buff *skb, const struct nf_hook_state *state) @@ -17,15 +17,13 @@ nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt, protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL); if (protohdr < 0) { nft_set_pktinfo_proto_unspec(pkt, skb); - return -1; + return; } pkt->tprot_set = true; pkt->tprot = protohdr; pkt->xt.thoff = thoff; pkt->xt.fragoff = frag_off; - - return 0; } static inline int diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c index 30b22f4dff55..05d05926962a 100644 --- a/net/ipv6/netfilter/nf_tables_ipv6.c +++ b/net/ipv6/netfilter/nf_tables_ipv6.c @@ -22,9 +22,7 @@ static unsigned int nft_do_chain_ipv6(void *priv, { struct nft_pktinfo pkt; - /* malformed packet, drop it */ - if (nft_set_pktinfo_ipv6(&pkt, skb, state) < 0) - return NF_DROP; + nft_set_pktinfo_ipv6(&pkt, skb, state); return nft_do_chain(&pkt, priv); } diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c index 71d995ff3108..01eb0f658366 100644 --- a/net/ipv6/netfilter/nft_chain_route_ipv6.c +++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c @@ -32,9 +32,7 @@ static unsigned int nf_route_table_hook(void *priv, u_int8_t hop_limit; u32 mark, flowlabel; - /* malformed packet, drop it */ - if (nft_set_pktinfo_ipv6(&pkt, skb, state) < 0) - return NF_DROP; + nft_set_pktinfo_ipv6(&pkt, skb, state); /* save source/dest address, mark, hoplimit, flowlabel, priority */ memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr)); From cf71c03edf10076f05a0b678fc9c8f8e6c6e24e4 Mon Sep 17 00:00:00 2001 From: Pablo Neira Date: Fri, 9 Sep 2016 14:01:26 +0200 Subject: [PATCH 0940/1715] netfilter: nf_conntrack: simplify __nf_ct_try_assign_helper() return logic Instead of several goto's just to return the result, simply return it. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_helper.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index b989b81ac156..4ffe388a9a1e 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -189,7 +189,6 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, struct nf_conntrack_helper *helper = NULL; struct nf_conn_help *help; struct net *net = nf_ct_net(ct); - int ret = 0; /* We already got a helper explicitly attached. The function * nf_conntrack_alter_reply - in case NAT is in use - asks for looking @@ -223,15 +222,13 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, if (helper == NULL) { if (help) RCU_INIT_POINTER(help->helper, NULL); - goto out; + return 0; } if (help == NULL) { help = nf_ct_helper_ext_add(ct, helper, flags); - if (help == NULL) { - ret = -ENOMEM; - goto out; - } + if (help == NULL) + return -ENOMEM; } else { /* We only allow helper re-assignment of the same sort since * we cannot reallocate the helper extension area. @@ -240,13 +237,13 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, if (tmp && tmp->help != helper->help) { RCU_INIT_POINTER(help->helper, NULL); - goto out; + return 0; } } rcu_assign_pointer(help->helper, helper); -out: - return ret; + + return 0; } EXPORT_SYMBOL_GPL(__nf_ct_try_assign_helper); From 4e6577de71803142d01e374cf15664af0388799a Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Fri, 9 Sep 2016 23:25:09 +0800 Subject: [PATCH 0941/1715] netfilter: Add the missed return value check of register_netdevice_notifier There are some codes of netfilter module which did not check the return value of register_netdevice_notifier. Add the checks now. Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_netdev.c | 18 +++++++++++++----- net/netfilter/nfnetlink_queue.c | 9 ++++++++- net/netfilter/xt_TEE.c | 8 +++++++- 3 files changed, 28 insertions(+), 7 deletions(-) diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c index 3e5475a833a5..38a3e8385042 100644 --- a/net/netfilter/nf_tables_netdev.c +++ b/net/netfilter/nf_tables_netdev.c @@ -151,12 +151,20 @@ static int __init nf_tables_netdev_init(void) nft_register_chain_type(&nft_filter_chain_netdev); ret = register_pernet_subsys(&nf_tables_netdev_net_ops); - if (ret < 0) { - nft_unregister_chain_type(&nft_filter_chain_netdev); - return ret; - } - register_netdevice_notifier(&nf_tables_netdev_notifier); + if (ret) + goto err1; + + ret = register_netdevice_notifier(&nf_tables_netdev_notifier); + if (ret) + goto err2; + return 0; + +err2: + unregister_pernet_subsys(&nf_tables_netdev_net_ops); +err1: + nft_unregister_chain_type(&nft_filter_chain_netdev); + return ret; } static void __exit nf_tables_netdev_exit(void) diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index f49f45081acb..808da34f94cd 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -1522,9 +1522,16 @@ static int __init nfnetlink_queue_init(void) goto cleanup_netlink_notifier; } - register_netdevice_notifier(&nfqnl_dev_notifier); + status = register_netdevice_notifier(&nfqnl_dev_notifier); + if (status < 0) { + pr_err("nf_queue: failed to register netdevice notifier\n"); + goto cleanup_netlink_subsys; + } + return status; +cleanup_netlink_subsys: + nfnetlink_subsys_unregister(&nfqnl_subsys); cleanup_netlink_notifier: netlink_unregister_notifier(&nfqnl_rtnl_notifier); unregister_pernet_subsys(&nfnl_queue_net_ops); diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c index 6e57a3966dc5..0471db4032c5 100644 --- a/net/netfilter/xt_TEE.c +++ b/net/netfilter/xt_TEE.c @@ -89,6 +89,8 @@ static int tee_tg_check(const struct xt_tgchk_param *par) return -EINVAL; if (info->oif[0]) { + int ret; + if (info->oif[sizeof(info->oif)-1] != '\0') return -EINVAL; @@ -101,7 +103,11 @@ static int tee_tg_check(const struct xt_tgchk_param *par) priv->notifier.notifier_call = tee_netdev_event; info->priv = priv; - register_netdevice_notifier(&priv->notifier); + ret = register_netdevice_notifier(&priv->notifier); + if (ret) { + kfree(priv); + return ret; + } } else info->priv = NULL; From 23d07508d25cea9984ee068538b4e86932b015c2 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Sat, 10 Sep 2016 10:04:30 +0800 Subject: [PATCH 0942/1715] netfilter: Add the missed return value check of nft_register_chain_type There are some codes of netfilter module which did not check the return value of nft_register_chain_type. Add the checks now. Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/nf_tables_bridge.c | 18 +++++++++++++----- net/ipv4/netfilter/nf_tables_arp.c | 5 ++++- net/ipv4/netfilter/nf_tables_ipv4.c | 5 ++++- net/ipv6/netfilter/nf_tables_ipv6.c | 5 ++++- net/netfilter/nf_tables_inet.c | 5 ++++- net/netfilter/nf_tables_netdev.c | 5 ++++- 6 files changed, 33 insertions(+), 10 deletions(-) diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c index 06f0f81456a0..97afdc0744e6 100644 --- a/net/bridge/netfilter/nf_tables_bridge.c +++ b/net/bridge/netfilter/nf_tables_bridge.c @@ -139,12 +139,20 @@ static int __init nf_tables_bridge_init(void) int ret; nf_register_afinfo(&nf_br_afinfo); - nft_register_chain_type(&filter_bridge); + ret = nft_register_chain_type(&filter_bridge); + if (ret < 0) + goto err1; + ret = register_pernet_subsys(&nf_tables_bridge_net_ops); - if (ret < 0) { - nft_unregister_chain_type(&filter_bridge); - nf_unregister_afinfo(&nf_br_afinfo); - } + if (ret < 0) + goto err2; + + return ret; + +err2: + nft_unregister_chain_type(&filter_bridge); +err1: + nf_unregister_afinfo(&nf_br_afinfo); return ret; } diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c index 058c034be376..805c8ddfe860 100644 --- a/net/ipv4/netfilter/nf_tables_arp.c +++ b/net/ipv4/netfilter/nf_tables_arp.c @@ -80,7 +80,10 @@ static int __init nf_tables_arp_init(void) { int ret; - nft_register_chain_type(&filter_arp); + ret = nft_register_chain_type(&filter_arp); + if (ret < 0) + return ret; + ret = register_pernet_subsys(&nf_tables_arp_net_ops); if (ret < 0) nft_unregister_chain_type(&filter_arp); diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c index e44ba3b12fbb..2840a29b2e04 100644 --- a/net/ipv4/netfilter/nf_tables_ipv4.c +++ b/net/ipv4/netfilter/nf_tables_ipv4.c @@ -103,7 +103,10 @@ static int __init nf_tables_ipv4_init(void) { int ret; - nft_register_chain_type(&filter_ipv4); + ret = nft_register_chain_type(&filter_ipv4); + if (ret < 0) + return ret; + ret = register_pernet_subsys(&nf_tables_ipv4_net_ops); if (ret < 0) nft_unregister_chain_type(&filter_ipv4); diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c index 05d05926962a..d6e4ba5de916 100644 --- a/net/ipv6/netfilter/nf_tables_ipv6.c +++ b/net/ipv6/netfilter/nf_tables_ipv6.c @@ -100,7 +100,10 @@ static int __init nf_tables_ipv6_init(void) { int ret; - nft_register_chain_type(&filter_ipv6); + ret = nft_register_chain_type(&filter_ipv6); + if (ret < 0) + return ret; + ret = register_pernet_subsys(&nf_tables_ipv6_net_ops); if (ret < 0) nft_unregister_chain_type(&filter_ipv6); diff --git a/net/netfilter/nf_tables_inet.c b/net/netfilter/nf_tables_inet.c index 6b5f76295d3d..f713cc205669 100644 --- a/net/netfilter/nf_tables_inet.c +++ b/net/netfilter/nf_tables_inet.c @@ -82,7 +82,10 @@ static int __init nf_tables_inet_init(void) { int ret; - nft_register_chain_type(&filter_inet); + ret = nft_register_chain_type(&filter_inet); + if (ret < 0) + return ret; + ret = register_pernet_subsys(&nf_tables_inet_net_ops); if (ret < 0) nft_unregister_chain_type(&filter_inet); diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c index 38a3e8385042..9e2ae424b640 100644 --- a/net/netfilter/nf_tables_netdev.c +++ b/net/netfilter/nf_tables_netdev.c @@ -149,7 +149,10 @@ static int __init nf_tables_netdev_init(void) { int ret; - nft_register_chain_type(&nft_filter_chain_netdev); + ret = nft_register_chain_type(&nft_filter_chain_netdev); + if (ret) + return ret; + ret = register_pernet_subsys(&nf_tables_netdev_net_ops); if (ret) goto err1; From 6bd14303a908833e10ac731a3308eba938305269 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sun, 11 Sep 2016 22:05:27 +0800 Subject: [PATCH 0943/1715] netfilter: nf_queue: get rid of dependency on IP6_NF_IPTABLES hash_v6 is used by both nftables and ip6tables, so depend on IP6_NF_IPTABLES is not properly. Actually, it only parses ipv6hdr and computes a hash value, so even if IPV6 is disabled, there's no side effect too, remove it. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_queue.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index 0dbce55437f2..cc8a11f7e306 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -54,7 +54,6 @@ static inline u32 hash_v4(const struct sk_buff *skb, u32 jhash_initval) (__force u32)iph->saddr, iph->protocol, jhash_initval); } -#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) static inline u32 hash_v6(const struct sk_buff *skb, u32 jhash_initval) { const struct ipv6hdr *ip6h = ipv6_hdr(skb); @@ -77,7 +76,6 @@ static inline u32 hash_v6(const struct sk_buff *skb, u32 jhash_initval) return jhash_3words(a, b, c, jhash_initval); } -#endif static inline u32 nfqueue_hash(const struct sk_buff *skb, u16 queue, u16 queues_total, u8 family, @@ -85,10 +83,8 @@ nfqueue_hash(const struct sk_buff *skb, u16 queue, u16 queues_total, u8 family, { if (family == NFPROTO_IPV4) queue += ((u64) hash_v4(skb, jhash_initval) * queues_total) >> 32; -#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) else if (family == NFPROTO_IPV6) queue += ((u64) hash_v6(skb, jhash_initval) * queues_total) >> 32; -#endif return queue; } From 8e8118f893138d4cc3d4dbf4163d7497fca54a9d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 11 Sep 2016 22:55:53 +0200 Subject: [PATCH 0944/1715] netfilter: conntrack: remove packet hotpath stats These counters sit in hot path and do show up in perf, this is especially true for 'found' and 'searched' which get incremented for every packet processed. Information like searched=212030105 new=623431 found=333613 delete=623327 does not seem too helpful nowadays: - on busy systems found and searched will overflow every few hours (these are 32bit integers), other more busy ones every few days. - for debugging there are better methods, such as iptables' trace target, the conntrack log sysctls. Nowadays we also have perf tool. This removes packet path stat counters except those that are expected to be 0 (or close to 0) on a normal system, e.g. 'insert_failed' (race happened) or 'invalid' (proto tracker rejects). The insert stat is retained for the ctnetlink case. The found stat is retained for the tuple-is-taken check when NAT has to determine if it needs to pick a different source address. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_conntrack_common.h | 4 ---- include/uapi/linux/netfilter/nfnetlink_conntrack.h | 8 ++++---- net/netfilter/nf_conntrack_core.c | 14 ++------------ net/netfilter/nf_conntrack_netlink.c | 6 +----- net/netfilter/nf_conntrack_standalone.c | 8 ++++---- 5 files changed, 11 insertions(+), 29 deletions(-) diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index 275505792664..1d1ef4e20512 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -4,13 +4,9 @@ #include struct ip_conntrack_stat { - unsigned int searched; unsigned int found; - unsigned int new; unsigned int invalid; unsigned int ignore; - unsigned int delete; - unsigned int delete_list; unsigned int insert; unsigned int insert_failed; unsigned int drop; diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h index 9df789709abe..6deb8867c5fc 100644 --- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h +++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h @@ -231,13 +231,13 @@ enum ctattr_secctx { enum ctattr_stats_cpu { CTA_STATS_UNSPEC, - CTA_STATS_SEARCHED, + CTA_STATS_SEARCHED, /* no longer used */ CTA_STATS_FOUND, - CTA_STATS_NEW, + CTA_STATS_NEW, /* no longer used */ CTA_STATS_INVALID, CTA_STATS_IGNORE, - CTA_STATS_DELETE, - CTA_STATS_DELETE_LIST, + CTA_STATS_DELETE, /* no longer used */ + CTA_STATS_DELETE_LIST, /* no longer used */ CTA_STATS_INSERT, CTA_STATS_INSERT_FAILED, CTA_STATS_DROP, diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index ac1db4019d5c..8d1ddb9b63ed 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -379,7 +379,6 @@ static void destroy_conntrack(struct nf_conntrack *nfct) { struct nf_conn *ct = (struct nf_conn *)nfct; - struct net *net = nf_ct_net(ct); struct nf_conntrack_l4proto *l4proto; pr_debug("destroy_conntrack(%p)\n", ct); @@ -406,7 +405,6 @@ destroy_conntrack(struct nf_conntrack *nfct) nf_ct_del_from_dying_or_unconfirmed_list(ct); - NF_CT_STAT_INC(net, delete); local_bh_enable(); if (ct->master) @@ -438,7 +436,6 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct) nf_ct_add_to_dying_list(ct); - NF_CT_STAT_INC(net, delete_list); local_bh_enable(); } @@ -529,11 +526,8 @@ begin: if (nf_ct_is_dying(ct)) continue; - if (nf_ct_key_equal(h, tuple, zone, net)) { - NF_CT_STAT_INC_ATOMIC(net, found); + if (nf_ct_key_equal(h, tuple, zone, net)) return h; - } - NF_CT_STAT_INC_ATOMIC(net, searched); } /* * if the nulls value we got at the end of this lookup is @@ -798,7 +792,6 @@ __nf_conntrack_confirm(struct sk_buff *skb) */ __nf_conntrack_hash_insert(ct, hash, reply_hash); nf_conntrack_double_unlock(hash, reply_hash); - NF_CT_STAT_INC(net, insert); local_bh_enable(); help = nfct_help(ct); @@ -857,7 +850,6 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, rcu_read_unlock(); return 1; } - NF_CT_STAT_INC_ATOMIC(net, searched); } if (get_nulls_value(n) != hash) { @@ -1177,10 +1169,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, } spin_unlock(&nf_conntrack_expect_lock); } - if (!exp) { + if (!exp) __nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC); - NF_CT_STAT_INC(net, new); - } /* Now it is inserted into the unconfirmed list, bump refcount */ nf_conntrack_get(&ct->ct_general); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index c052b712c49f..27540455dc62 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1984,13 +1984,9 @@ ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 portid, u32 seq, nfmsg->version = NFNETLINK_V0; nfmsg->res_id = htons(cpu); - if (nla_put_be32(skb, CTA_STATS_SEARCHED, htonl(st->searched)) || - nla_put_be32(skb, CTA_STATS_FOUND, htonl(st->found)) || - nla_put_be32(skb, CTA_STATS_NEW, htonl(st->new)) || + if (nla_put_be32(skb, CTA_STATS_FOUND, htonl(st->found)) || nla_put_be32(skb, CTA_STATS_INVALID, htonl(st->invalid)) || nla_put_be32(skb, CTA_STATS_IGNORE, htonl(st->ignore)) || - nla_put_be32(skb, CTA_STATS_DELETE, htonl(st->delete)) || - nla_put_be32(skb, CTA_STATS_DELETE_LIST, htonl(st->delete_list)) || nla_put_be32(skb, CTA_STATS_INSERT, htonl(st->insert)) || nla_put_be32(skb, CTA_STATS_INSERT_FAILED, htonl(st->insert_failed)) || diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 3d9a316a3c77..7d52f8401afd 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -352,13 +352,13 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x " "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", nr_conntracks, - st->searched, + 0, st->found, - st->new, + 0, st->invalid, st->ignore, - st->delete, - st->delete_list, + 0, + 0, st->insert, st->insert_failed, st->drop, From 2e917d602acd9e3e8c6e4c43b213c8929d986503 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Mon, 12 Sep 2016 22:21:36 +0800 Subject: [PATCH 0945/1715] netfilter: nft_numgen: fix race between num generate and store it After we generate a new number, we still use the priv->counter and store it to the dreg. This is not correct, another cpu may already change it to a new number. So we must use the generated number, not the priv->counter itself. Fixes: 91dbc6be0a62 ("netfilter: nf_tables: add number generator expression") Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_numgen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c index f51a3ede3932..f173ebec30a7 100644 --- a/net/netfilter/nft_numgen.c +++ b/net/netfilter/nft_numgen.c @@ -37,7 +37,7 @@ static void nft_ng_inc_eval(const struct nft_expr *expr, nval = (oval + 1 < priv->modulus) ? oval + 1 : 0; } while (atomic_cmpxchg(&priv->counter, oval, nval) != oval); - memcpy(®s->data[priv->dreg], &priv->counter, sizeof(u32)); + regs->data[priv->dreg] = nval; } static const struct nla_policy nft_ng_policy[NFTA_NG_MAX + 1] = { From aa211d2074ec4266b89673a54719421464c943e3 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 12 Sep 2016 10:03:32 -0400 Subject: [PATCH 0946/1715] 3c59x: use IS_ENABLED() instead of checking for built-in or module The IS_ENABLED() macro checks if a Kconfig symbol has been enabled either built-in or as a module, use that macro instead of open coding the same. Using the macro makes the code more readable by helping abstract away some of the Kconfig built-in and module enable details. Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller --- drivers/net/ethernet/3com/3c59x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c index 25c55ab05c7d..9133e7926da5 100644 --- a/drivers/net/ethernet/3com/3c59x.c +++ b/drivers/net/ethernet/3com/3c59x.c @@ -3089,7 +3089,7 @@ static void set_rx_mode(struct net_device *dev) iowrite16(new_mode, ioaddr + EL3_CMD); } -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) +#if IS_ENABLED(CONFIG_VLAN_8021Q) /* Setup the card so that it can receive frames with an 802.1q VLAN tag. Note that this must be done after each RxReset due to some backwards compatibility logic in the Cyclone and Tornado ASICs */ From 5a5ab1611aa5c17a32b64a4c5069c26e1fd7c960 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 12 Sep 2016 10:03:33 -0400 Subject: [PATCH 0947/1715] starfire: use IS_ENABLED() instead of checking for built-in or module The IS_ENABLED() macro checks if a Kconfig symbol has been enabled either built-in or as a module, use that macro instead of open coding the same. Using the macro makes the code more readable by helping abstract away some of the Kconfig built-in and module enable details. Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller --- drivers/net/ethernet/adaptec/starfire.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/adaptec/starfire.c b/drivers/net/ethernet/adaptec/starfire.c index 1d1069641d81..8af2c88d5b33 100644 --- a/drivers/net/ethernet/adaptec/starfire.c +++ b/drivers/net/ethernet/adaptec/starfire.c @@ -66,7 +66,7 @@ */ #define ZEROCOPY -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) +#if IS_ENABLED(CONFIG_VLAN_8021Q) #define VLAN_SUPPORT #endif From 941992d2944789641470626e9336d663236b1d28 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 12 Sep 2016 10:03:34 -0400 Subject: [PATCH 0948/1715] ethernet: amd: use IS_ENABLED() instead of checking for built-in or module The IS_ENABLED() macro checks if a Kconfig symbol has been enabled either built-in or as a module, use that macro instead of open coding the same. Using the macro makes the code more readable by helping abstract away some of the Kconfig built-in and module enable details. Signed-off-by: Javier Martinez Canillas Reviewed-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/7990.c | 6 +++--- drivers/net/ethernet/amd/amd8111e.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/amd/7990.c b/drivers/net/ethernet/amd/7990.c index dcf2a1f3643d..dc57f2759f44 100644 --- a/drivers/net/ethernet/amd/7990.c +++ b/drivers/net/ethernet/amd/7990.c @@ -45,14 +45,14 @@ #define WRITERDP(lp, x) out_be16(lp->base + LANCE_RDP, (x)) #define READRDP(lp) in_be16(lp->base + LANCE_RDP) -#if defined(CONFIG_HPLANCE) || defined(CONFIG_HPLANCE_MODULE) +#if IS_ENABLED(CONFIG_HPLANCE) #include "hplance.h" #undef WRITERAP #undef WRITERDP #undef READRDP -#if defined(CONFIG_MVME147_NET) || defined(CONFIG_MVME147_NET_MODULE) +#if IS_ENABLED(CONFIG_MVME147_NET) /* Lossage Factor Nine, Mr Sulu. */ #define WRITERAP(lp, x) (lp->writerap(lp, x)) @@ -86,7 +86,7 @@ static inline __u16 READRDP(struct lance_private *lp) } #endif -#endif /* CONFIG_HPLANCE || CONFIG_HPLANCE_MODULE */ +#endif /* IS_ENABLED(CONFIG_HPLANCE) */ /* debugging output macros, various flavours */ /* #define TEST_HITS */ diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c index 94960055fa1f..f92cc97151ec 100644 --- a/drivers/net/ethernet/amd/amd8111e.c +++ b/drivers/net/ethernet/amd/amd8111e.c @@ -89,7 +89,7 @@ Revision History: #include #include -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) +#if IS_ENABLED(CONFIG_VLAN_8021Q) #define AMD8111E_VLAN_TAG_USED 1 #else #define AMD8111E_VLAN_TAG_USED 0 From da556d6a1a386aaf9ead99ef5574497c535bd26e Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 12 Sep 2016 10:03:35 -0400 Subject: [PATCH 0949/1715] bnx2: use IS_ENABLED() instead of checking for built-in or module The IS_ENABLED() macro checks if a Kconfig symbol has been enabled either built-in or as a module, use that macro instead of open coding the same. Using the macro makes the code more readable by helping abstract away some of the Kconfig built-in and module enable details. Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index 8fc3f3c137f8..ecd357dbb1d4 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -50,7 +50,7 @@ #include #include -#if defined(CONFIG_CNIC) || defined(CONFIG_CNIC_MODULE) +#if IS_ENABLED(CONFIG_CNIC) #define BCM_CNIC 1 #include "cnic_if.h" #endif From 067577868b4fdeaf2f93ec5c4e9b29a0eded6528 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 12 Sep 2016 10:03:36 -0400 Subject: [PATCH 0950/1715] sundance: use IS_ENABLED() instead of checking for built-in or module The IS_ENABLED() macro checks if a Kconfig symbol has been enabled either built-in or as a module, use that macro instead of open coding the same. Using the macro makes the code more readable by helping abstract away some of the Kconfig built-in and module enable details. Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller --- drivers/net/ethernet/dlink/sundance.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c index 58c6338a839e..79d80090eac8 100644 --- a/drivers/net/ethernet/dlink/sundance.c +++ b/drivers/net/ethernet/dlink/sundance.c @@ -867,7 +867,7 @@ static int netdev_open(struct net_device *dev) /* Initialize other registers. */ __set_mac_addr(dev); -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) +#if IS_ENABLED(CONFIG_VLAN_8021Q) iowrite16(dev->mtu + 18, ioaddr + MaxFrameSize); #else iowrite16(dev->mtu + 14, ioaddr + MaxFrameSize); From 504e76e5b93d9c6a5fbfaa8ea63ad3f7fe77f601 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 12 Sep 2016 10:03:37 -0400 Subject: [PATCH 0951/1715] net/fsl_pq_mdio: use IS_ENABLED() instead of checking for built-in or module The IS_ENABLED() macro checks if a Kconfig symbol has been enabled either built-in or as a module, use that macro instead of open coding the same. Using the macro makes the code more readable by helping abstract away some of the Kconfig built-in and module enable details. Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fsl_pq_mdio.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/freescale/fsl_pq_mdio.c b/drivers/net/ethernet/freescale/fsl_pq_mdio.c index f3c63dce1e30..446c7b374ff5 100644 --- a/drivers/net/ethernet/freescale/fsl_pq_mdio.c +++ b/drivers/net/ethernet/freescale/fsl_pq_mdio.c @@ -195,7 +195,7 @@ static int fsl_pq_mdio_reset(struct mii_bus *bus) return 0; } -#if defined(CONFIG_GIANFAR) || defined(CONFIG_GIANFAR_MODULE) +#if IS_ENABLED(CONFIG_GIANFAR) /* * Return the TBIPA address, starting from the address * of the mapped GFAR MDIO registers (struct gfar) @@ -228,7 +228,7 @@ static uint32_t __iomem *get_etsec_tbipa(void __iomem *p) } #endif -#if defined(CONFIG_UCC_GETH) || defined(CONFIG_UCC_GETH_MODULE) +#if IS_ENABLED(CONFIG_UCC_GETH) /* * Return the TBIPAR address for a QE MDIO node, starting from the address * of the mapped MII registers (struct fsl_pq_mii) @@ -306,7 +306,7 @@ static void ucc_configure(phys_addr_t start, phys_addr_t end) #endif static const struct of_device_id fsl_pq_mdio_match[] = { -#if defined(CONFIG_GIANFAR) || defined(CONFIG_GIANFAR_MODULE) +#if IS_ENABLED(CONFIG_GIANFAR) { .compatible = "fsl,gianfar-tbi", .data = &(struct fsl_pq_mdio_data) { @@ -344,7 +344,7 @@ static const struct of_device_id fsl_pq_mdio_match[] = { }, }, #endif -#if defined(CONFIG_UCC_GETH) || defined(CONFIG_UCC_GETH_MODULE) +#if IS_ENABLED(CONFIG_UCC_GETH) { .compatible = "fsl,ucc-mdio", .data = &(struct fsl_pq_mdio_data) { From e2eae5b80f66a9a9b6fd5246fffc86d9ec3f49df Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 12 Sep 2016 10:03:38 -0400 Subject: [PATCH 0952/1715] i825xx: use IS_ENABLED() instead of checking for built-in or module The IS_ENABLED() macro checks if a Kconfig symbol has been enabled either built-in or as a module, use that macro instead of open coding the same. Using the macro makes the code more readable by helping abstract away some of the Kconfig built-in and module enable details. Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller --- drivers/net/ethernet/i825xx/82596.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/i825xx/82596.c b/drivers/net/ethernet/i825xx/82596.c index befb4ac3e2b0..ce235b776793 100644 --- a/drivers/net/ethernet/i825xx/82596.c +++ b/drivers/net/ethernet/i825xx/82596.c @@ -89,10 +89,10 @@ static char version[] __initdata = #define DEB(x,y) if (i596_debug & (x)) y -#if defined(CONFIG_MVME16x_NET) || defined(CONFIG_MVME16x_NET_MODULE) +#if IS_ENABLED(CONFIG_MVME16x_NET) #define ENABLE_MVME16x_NET #endif -#if defined(CONFIG_BVME6000_NET) || defined(CONFIG_BVME6000_NET_MODULE) +#if IS_ENABLED(CONFIG_BVME6000_NET) #define ENABLE_BVME6000_NET #endif From ee58c1149e2bba6043e31f736f6f6136562758ac Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 12 Sep 2016 10:03:39 -0400 Subject: [PATCH 0953/1715] ixgbe: use IS_ENABLED() instead of checking for built-in or module The IS_ENABLED() macro checks if a Kconfig symbol has been enabled either built-in or as a module, use that macro instead of open coding the same. Using the macro makes the code more readable by helping abstract away some of the Kconfig built-in and module enable details. Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ixgbe/ixgbe.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 33c025055011..b06e32d0d22a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -45,10 +45,10 @@ #include "ixgbe_type.h" #include "ixgbe_common.h" #include "ixgbe_dcb.h" -#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) +#if IS_ENABLED(CONFIG_FCOE) #define IXGBE_FCOE #include "ixgbe_fcoe.h" -#endif /* CONFIG_FCOE or CONFIG_FCOE_MODULE */ +#endif /* IS_ENABLED(CONFIG_FCOE) */ #ifdef CONFIG_IXGBE_DCA #include #endif From bb152934015bb1f3633ead0cd3404227355294ea Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 12 Sep 2016 10:03:40 -0400 Subject: [PATCH 0954/1715] net: mvneta: use IS_ENABLED() instead of checking for built-in or module The IS_ENABLED() macro checks if a Kconfig symbol has been enabled either built-in or as a module, use that macro instead of open coding the same. Using the macro makes the code more readable by helping abstract away some of the Kconfig built-in and module enable details. Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta_bm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvneta_bm.h b/drivers/net/ethernet/marvell/mvneta_bm.h index e74fd44a92f7..a32de432800c 100644 --- a/drivers/net/ethernet/marvell/mvneta_bm.h +++ b/drivers/net/ethernet/marvell/mvneta_bm.h @@ -133,7 +133,7 @@ struct mvneta_bm_pool { void *mvneta_frag_alloc(unsigned int frag_size); void mvneta_frag_free(unsigned int frag_size, void *data); -#if defined(CONFIG_MVNETA_BM) || defined(CONFIG_MVNETA_BM_MODULE) +#if IS_ENABLED(CONFIG_MVNETA_BM) void mvneta_bm_pool_destroy(struct mvneta_bm *priv, struct mvneta_bm_pool *bm_pool, u8 port_map); void mvneta_bm_bufs_free(struct mvneta_bm *priv, struct mvneta_bm_pool *bm_pool, From cbbe6096de425131e94c95bfe017718dc625330e Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 12 Sep 2016 10:03:41 -0400 Subject: [PATCH 0955/1715] natsemi: use IS_ENABLED() instead of checking for built-in or module The IS_ENABLED() macro checks if a Kconfig symbol has been enabled either built-in or as a module, use that macro instead of open coding the same. Using the macro makes the code more readable by helping abstract away some of the Kconfig built-in and module enable details. Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller --- drivers/net/ethernet/natsemi/ns83820.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/natsemi/ns83820.c b/drivers/net/ethernet/natsemi/ns83820.c index eb807b0dc72a..569ade6cf85c 100644 --- a/drivers/net/ethernet/natsemi/ns83820.c +++ b/drivers/net/ethernet/natsemi/ns83820.c @@ -134,7 +134,7 @@ static int lnksts = 0; /* CFG_LNKSTS bit polarity */ /* tunables */ #define RX_BUF_SIZE 1500 /* 8192 */ -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) +#if IS_ENABLED(CONFIG_VLAN_8021Q) #define NS83820_VLAN_ACCEL_SUPPORT #endif From 795f02344af4c81ddc30a90f0b11b7bba053abd4 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 12 Sep 2016 10:03:42 -0400 Subject: [PATCH 0956/1715] sfc: use IS_ENABLED() instead of checking for built-in or module The IS_ENABLED() macro checks if a Kconfig symbol has been enabled either built-in or as a module, use that macro instead of open coding the same. Using the macro makes the code more readable by helping abstract away some of the Kconfig built-in and module enable details. Signed-off-by: Javier Martinez Canillas Acked-by: Bert Kenward Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/falcon_boards.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/sfc/falcon_boards.c b/drivers/net/ethernet/sfc/falcon_boards.c index 1736f4b806af..f6883b2b5da3 100644 --- a/drivers/net/ethernet/sfc/falcon_boards.c +++ b/drivers/net/ethernet/sfc/falcon_boards.c @@ -64,7 +64,7 @@ #define LM87_ALARM_TEMP_INT 0x10 #define LM87_ALARM_TEMP_EXT1 0x20 -#if defined(CONFIG_SENSORS_LM87) || defined(CONFIG_SENSORS_LM87_MODULE) +#if IS_ENABLED(CONFIG_SENSORS_LM87) static int efx_poke_lm87(struct i2c_client *client, const u8 *reg_values) { @@ -455,7 +455,7 @@ static int sfe4001_init(struct efx_nic *efx) struct falcon_board *board = falcon_board(efx); int rc; -#if defined(CONFIG_SENSORS_LM90) || defined(CONFIG_SENSORS_LM90_MODULE) +#if IS_ENABLED(CONFIG_SENSORS_LM90) board->hwmon_client = i2c_new_device(&board->i2c_adap, &sfe4001_hwmon_info); #else From 547e530a5e10fbc8e78bf2573508e46ca1bf571f Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 12 Sep 2016 10:03:43 -0400 Subject: [PATCH 0957/1715] sis900: use IS_ENABLED() instead of checking for built-in or module The IS_ENABLED() macro checks if a Kconfig symbol has been enabled either built-in or as a module, use that macro instead of open coding the same. Using the macro makes the code more readable by helping abstract away some of the Kconfig built-in and module enable details. Signed-off-by: Javier Martinez Canillas Acked-by: Daniele Venzano Signed-off-by: David S. Miller --- drivers/net/ethernet/sis/sis900.c | 4 ++-- drivers/net/ethernet/sis/sis900.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c index 95001ee408ab..6f85276376e8 100644 --- a/drivers/net/ethernet/sis/sis900.c +++ b/drivers/net/ethernet/sis/sis900.c @@ -1426,7 +1426,7 @@ static void sis900_set_mode(struct sis900_private *sp, int speed, int duplex) rx_flags |= RxATX; } -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) +#if IS_ENABLED(CONFIG_VLAN_8021Q) /* Can accept Jumbo packet */ rx_flags |= RxAJAB; #endif @@ -1750,7 +1750,7 @@ static int sis900_rx(struct net_device *net_dev) data_size = rx_status & DSIZE; rx_size = data_size - CRC_SIZE; -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) +#if IS_ENABLED(CONFIG_VLAN_8021Q) /* ``TOOLONG'' flag means jumbo packet received. */ if ((rx_status & TOOLONG) && data_size <= MAX_FRAME_SIZE) rx_status &= (~ ((unsigned int)TOOLONG)); diff --git a/drivers/net/ethernet/sis/sis900.h b/drivers/net/ethernet/sis/sis900.h index 7d430d322931..f0da3dc52c01 100644 --- a/drivers/net/ethernet/sis/sis900.h +++ b/drivers/net/ethernet/sis/sis900.h @@ -310,7 +310,7 @@ enum sis630_revision_id { #define CRC_SIZE 4 #define MAC_HEADER_SIZE 14 -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) +#if IS_ENABLED(CONFIG_VLAN_8021Q) #define MAX_FRAME_SIZE (1518 + 4) #else #define MAX_FRAME_SIZE 1518 From 12c70f30533ebf31e86c070253555149b9bf6ff6 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 12 Sep 2016 10:03:44 -0400 Subject: [PATCH 0958/1715] stmmac: use IS_ENABLED() instead of checking for built-in or module The IS_ENABLED() macro checks if a Kconfig symbol has been enabled either built-in or as a module, use that macro instead of open coding the same. Using the macro makes the code more readable by helping abstract away some of the Kconfig built-in and module enable details. Signed-off-by: Javier Martinez Canillas Reviewed-by: Alexandre TORGUE Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 2533b91f1421..d3292c4a6eda 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -30,7 +30,7 @@ #include #include #include -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) +#if IS_ENABLED(CONFIG_VLAN_8021Q) #define STMMAC_VLAN_TAG_USED #include #endif From 5f94bebe72ede10da4b779690894f34b259cfce4 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 12 Sep 2016 10:03:45 -0400 Subject: [PATCH 0959/1715] hamradio: use IS_ENABLED() instead of checking for built-in or module The IS_ENABLED() macro checks if a Kconfig symbol has been enabled either built-in or as a module, use that macro instead of open coding the same. Using the macro makes the code more readable by helping abstract away some of the Kconfig built-in and module enable details. Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller --- drivers/net/hamradio/bpqether.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c index d95a50ae996d..622ab3ab9e93 100644 --- a/drivers/net/hamradio/bpqether.c +++ b/drivers/net/hamradio/bpqether.c @@ -484,7 +484,7 @@ static void bpq_setup(struct net_device *dev) dev->flags = 0; dev->features = NETIF_F_LLTX; /* Allow recursion */ -#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) +#if IS_ENABLED(CONFIG_AX25) dev->header_ops = &ax25_header_ops; #endif From 4c73195edbe3a5d7e14ea549bb261cf35c29f0cc Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 12 Sep 2016 10:03:46 -0400 Subject: [PATCH 0960/1715] iwlegacy: use IS_ENABLED() instead of checking for built-in or module The IS_ENABLED() macro checks if a Kconfig symbol has been enabled either built-in or as a module, use that macro instead of open coding the same. Using the macro makes the code more readable by helping abstract away some of the Kconfig built-in and module enable details. Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller --- drivers/net/wireless/intel/iwlegacy/common.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlegacy/common.h b/drivers/net/wireless/intel/iwlegacy/common.h index 726ede391cb9..3bba521d2cd9 100644 --- a/drivers/net/wireless/intel/iwlegacy/common.h +++ b/drivers/net/wireless/intel/iwlegacy/common.h @@ -1320,7 +1320,7 @@ struct il_priv { u64 timestamp; union { -#if defined(CONFIG_IWL3945) || defined(CONFIG_IWL3945_MODULE) +#if IS_ENABLED(CONFIG_IWL3945) struct { void *shared_virt; dma_addr_t shared_phys; @@ -1351,7 +1351,7 @@ struct il_priv { } _3945; #endif -#if defined(CONFIG_IWL4965) || defined(CONFIG_IWL4965_MODULE) +#if IS_ENABLED(CONFIG_IWL4965) struct { struct il_rx_phy_res last_phy_res; bool last_phy_res_valid; From 8d51dbb8c7fb5412f0935c20f66e27d2c63ef4a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Mon, 12 Sep 2016 15:55:43 +0200 Subject: [PATCH 0961/1715] mac80211: Re-structure aqm debugfs output and keep CoDel stats per txq MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently the 'aqm' stats in mac80211 only keeps overlimit drop stats, not CoDel stats. This moves the CoDel stats into the txqi structure to keep them per txq in order to show them in debugfs. In addition, the aqm debugfs output is restructured by splitting it up into three files: One global per phy, one per netdev and one per station, in the appropriate directories. The files are all called aqm, and are only created if the driver supports the wake_tx_queue op (rather than emitting an error on open as previously). Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Johannes Berg --- net/mac80211/debugfs.c | 161 +++++++--------------------------- net/mac80211/debugfs_netdev.c | 37 +++++++- net/mac80211/debugfs_sta.c | 52 +++++++++++ net/mac80211/ieee80211_i.h | 2 +- net/mac80211/tx.c | 4 +- 5 files changed, 122 insertions(+), 134 deletions(-) diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 2906c1004e1a..5bbb470f335f 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -71,138 +71,39 @@ DEBUGFS_READONLY_FILE(wep_iv, "%#08x", DEBUGFS_READONLY_FILE(rate_ctrl_alg, "%s", local->rate_ctrl ? local->rate_ctrl->ops->name : "hw/driver"); -struct aqm_info { - struct ieee80211_local *local; - size_t size; - size_t len; - unsigned char buf[0]; -}; - -#define AQM_HDR_LEN 200 -#define AQM_HW_ENTRY_LEN 40 -#define AQM_TXQ_ENTRY_LEN 110 - -static int aqm_open(struct inode *inode, struct file *file) -{ - struct ieee80211_local *local = inode->i_private; - struct ieee80211_sub_if_data *sdata; - struct sta_info *sta; - struct txq_info *txqi; - struct fq *fq = &local->fq; - struct aqm_info *info = NULL; - int len = 0; - int i; - - if (!local->ops->wake_tx_queue) - return -EOPNOTSUPP; - - len += AQM_HDR_LEN; - len += 6 * AQM_HW_ENTRY_LEN; - - rcu_read_lock(); - list_for_each_entry_rcu(sdata, &local->interfaces, list) - len += AQM_TXQ_ENTRY_LEN; - list_for_each_entry_rcu(sta, &local->sta_list, list) - len += AQM_TXQ_ENTRY_LEN * ARRAY_SIZE(sta->sta.txq); - rcu_read_unlock(); - - info = vmalloc(len); - if (!info) - return -ENOMEM; - - spin_lock_bh(&local->fq.lock); - rcu_read_lock(); - - file->private_data = info; - info->local = local; - info->size = len; - len = 0; - - len += scnprintf(info->buf + len, info->size - len, - "* hw\n" - "access name value\n" - "R fq_flows_cnt %u\n" - "R fq_backlog %u\n" - "R fq_overlimit %u\n" - "R fq_collisions %u\n" - "RW fq_limit %u\n" - "RW fq_quantum %u\n", - fq->flows_cnt, - fq->backlog, - fq->overlimit, - fq->collisions, - fq->limit, - fq->quantum); - - len += scnprintf(info->buf + len, - info->size - len, - "* vif\n" - "ifname addr ac backlog-bytes backlog-packets flows overlimit collisions tx-bytes tx-packets\n"); - - list_for_each_entry_rcu(sdata, &local->interfaces, list) { - txqi = to_txq_info(sdata->vif.txq); - len += scnprintf(info->buf + len, info->size - len, - "%s %pM %u %u %u %u %u %u %u %u\n", - sdata->name, - sdata->vif.addr, - txqi->txq.ac, - txqi->tin.backlog_bytes, - txqi->tin.backlog_packets, - txqi->tin.flows, - txqi->tin.overlimit, - txqi->tin.collisions, - txqi->tin.tx_bytes, - txqi->tin.tx_packets); - } - - len += scnprintf(info->buf + len, - info->size - len, - "* sta\n" - "ifname addr tid ac backlog-bytes backlog-packets flows overlimit collisions tx-bytes tx-packets\n"); - - list_for_each_entry_rcu(sta, &local->sta_list, list) { - sdata = sta->sdata; - for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) { - txqi = to_txq_info(sta->sta.txq[i]); - len += scnprintf(info->buf + len, info->size - len, - "%s %pM %d %d %u %u %u %u %u %u %u\n", - sdata->name, - sta->sta.addr, - txqi->txq.tid, - txqi->txq.ac, - txqi->tin.backlog_bytes, - txqi->tin.backlog_packets, - txqi->tin.flows, - txqi->tin.overlimit, - txqi->tin.collisions, - txqi->tin.tx_bytes, - txqi->tin.tx_packets); - } - } - - info->len = len; - - rcu_read_unlock(); - spin_unlock_bh(&local->fq.lock); - - return 0; -} - -static int aqm_release(struct inode *inode, struct file *file) -{ - vfree(file->private_data); - return 0; -} - static ssize_t aqm_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { - struct aqm_info *info = file->private_data; + struct ieee80211_local *local = file->private_data; + struct fq *fq = &local->fq; + char buf[200]; + int len = 0; + + spin_lock_bh(&local->fq.lock); + rcu_read_lock(); + + len = scnprintf(buf, sizeof(buf), + "access name value\n" + "R fq_flows_cnt %u\n" + "R fq_backlog %u\n" + "R fq_overlimit %u\n" + "R fq_collisions %u\n" + "RW fq_limit %u\n" + "RW fq_quantum %u\n", + fq->flows_cnt, + fq->backlog, + fq->overlimit, + fq->collisions, + fq->limit, + fq->quantum); + + rcu_read_unlock(); + spin_unlock_bh(&local->fq.lock); return simple_read_from_buffer(user_buf, count, ppos, - info->buf, info->len); + buf, len); } static ssize_t aqm_write(struct file *file, @@ -210,8 +111,7 @@ static ssize_t aqm_write(struct file *file, size_t count, loff_t *ppos) { - struct aqm_info *info = file->private_data; - struct ieee80211_local *local = info->local; + struct ieee80211_local *local = file->private_data; char buf[100]; size_t len; @@ -237,8 +137,7 @@ static ssize_t aqm_write(struct file *file, static const struct file_operations aqm_ops = { .write = aqm_write, .read = aqm_read, - .open = aqm_open, - .release = aqm_release, + .open = simple_open, .llseek = default_llseek, }; @@ -428,7 +327,9 @@ void debugfs_hw_add(struct ieee80211_local *local) DEBUGFS_ADD(hwflags); DEBUGFS_ADD(user_power); DEBUGFS_ADD(power); - DEBUGFS_ADD_MODE(aqm, 0600); + + if (local->ops->wake_tx_queue) + DEBUGFS_ADD_MODE(aqm, 0600); statsd = debugfs_create_dir("statistics", phyd); diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index a5ba739cd2a7..5d35c0f37bb7 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -30,7 +30,7 @@ static ssize_t ieee80211_if_read( size_t count, loff_t *ppos, ssize_t (*format)(const struct ieee80211_sub_if_data *, char *, int)) { - char buf[70]; + char buf[200]; ssize_t ret = -EINVAL; read_lock(&dev_base_lock); @@ -486,6 +486,38 @@ static ssize_t ieee80211_if_fmt_num_buffered_multicast( } IEEE80211_IF_FILE_R(num_buffered_multicast); +static ssize_t ieee80211_if_fmt_aqm( + const struct ieee80211_sub_if_data *sdata, char *buf, int buflen) +{ + struct ieee80211_local *local = sdata->local; + struct txq_info *txqi = to_txq_info(sdata->vif.txq); + int len; + + spin_lock_bh(&local->fq.lock); + rcu_read_lock(); + + len = scnprintf(buf, + buflen, + "ac backlog-bytes backlog-packets new-flows drops marks overlimit collisions tx-bytes tx-packets\n" + "%u %u %u %u %u %u %u %u %u %u\n", + txqi->txq.ac, + txqi->tin.backlog_bytes, + txqi->tin.backlog_packets, + txqi->tin.flows, + txqi->cstats.drop_count, + txqi->cstats.ecn_mark, + txqi->tin.overlimit, + txqi->tin.collisions, + txqi->tin.tx_bytes, + txqi->tin.tx_packets); + + rcu_read_unlock(); + spin_unlock_bh(&local->fq.lock); + + return len; +} +IEEE80211_IF_FILE_R(aqm); + /* IBSS attributes */ static ssize_t ieee80211_if_fmt_tsf( const struct ieee80211_sub_if_data *sdata, char *buf, int buflen) @@ -618,6 +650,9 @@ static void add_common_files(struct ieee80211_sub_if_data *sdata) DEBUGFS_ADD(rc_rateidx_vht_mcs_mask_2ghz); DEBUGFS_ADD(rc_rateidx_vht_mcs_mask_5ghz); DEBUGFS_ADD(hw_queues); + + if (sdata->local->ops->wake_tx_queue) + DEBUGFS_ADD(aqm); } static void add_sta_files(struct ieee80211_sub_if_data *sdata) diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index fd334133ff45..fb2693582e40 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -133,6 +133,55 @@ static ssize_t sta_last_seq_ctrl_read(struct file *file, char __user *userbuf, } STA_OPS(last_seq_ctrl); +#define AQM_TXQ_ENTRY_LEN 130 + +static ssize_t sta_aqm_read(struct file *file, char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct sta_info *sta = file->private_data; + struct ieee80211_local *local = sta->local; + size_t bufsz = AQM_TXQ_ENTRY_LEN*(IEEE80211_NUM_TIDS+1); + char *buf = kzalloc(bufsz, GFP_KERNEL), *p = buf; + struct txq_info *txqi; + ssize_t rv; + int i; + + if (!buf) + return -ENOMEM; + + spin_lock_bh(&local->fq.lock); + rcu_read_lock(); + + p += scnprintf(p, + bufsz+buf-p, + "tid ac backlog-bytes backlog-packets new-flows drops marks overlimit collisions tx-bytes tx-packets\n"); + + for (i = 0; i < IEEE80211_NUM_TIDS; i++) { + txqi = to_txq_info(sta->sta.txq[i]); + p += scnprintf(p, bufsz+buf-p, + "%d %d %u %u %u %u %u %u %u %u %u\n", + txqi->txq.tid, + txqi->txq.ac, + txqi->tin.backlog_bytes, + txqi->tin.backlog_packets, + txqi->tin.flows, + txqi->cstats.drop_count, + txqi->cstats.ecn_mark, + txqi->tin.overlimit, + txqi->tin.collisions, + txqi->tin.tx_bytes, + txqi->tin.tx_packets); + } + + rcu_read_unlock(); + spin_unlock_bh(&local->fq.lock); + + rv = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); + kfree(buf); + return rv; +} +STA_OPS(aqm); + static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { @@ -478,6 +527,9 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta) DEBUGFS_ADD_COUNTER(rx_fragments, rx_stats.fragments); DEBUGFS_ADD_COUNTER(tx_filtered, status_stats.filtered); + if (local->ops->wake_tx_queue) + DEBUGFS_ADD(aqm); + if (sizeof(sta->driver_buffered_tids) == sizeof(u32)) debugfs_create_x32("driver_buffered_tids", 0400, sta->debugfs_dir, diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 75761686a98b..c71c73594790 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -818,6 +818,7 @@ struct txq_info { struct fq_tin tin; struct fq_flow def_flow; struct codel_vars def_cvars; + struct codel_stats cstats; unsigned long flags; /* keep last! */ @@ -1117,7 +1118,6 @@ struct ieee80211_local { struct fq fq; struct codel_vars *cvars; struct codel_params cparams; - struct codel_stats cstats; const struct ieee80211_ops *ops; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index efc38e7b90b9..ee9e7d60cb78 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1343,7 +1343,7 @@ static struct sk_buff *fq_tin_dequeue_func(struct fq *fq, local = container_of(fq, struct ieee80211_local, fq); txqi = container_of(tin, struct txq_info, tin); cparams = &local->cparams; - cstats = &local->cstats; + cstats = &txqi->cstats; if (flow == &txqi->def_flow) cvars = &txqi->def_cvars; @@ -1403,6 +1403,7 @@ void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata, fq_tin_init(&txqi->tin); fq_flow_init(&txqi->def_flow); codel_vars_init(&txqi->def_cvars); + codel_stats_init(&txqi->cstats); txqi->txq.vif = &sdata->vif; @@ -1441,7 +1442,6 @@ int ieee80211_txq_setup_flows(struct ieee80211_local *local) return ret; codel_params_init(&local->cparams); - codel_stats_init(&local->cstats); local->cparams.interval = MS2TIME(100); local->cparams.target = MS2TIME(20); local->cparams.ecn = true; From 11d62caf93cf12ce80ff8304849887666ec8880a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 13 Sep 2016 08:28:22 +0200 Subject: [PATCH 0962/1715] mac80211: simplify TDLS RA lookup smatch pointed out that the second check of "tdls_auth" was pointless since if it was true, we returned from the function already. We can further simplify the code by moving the first check (if it's a TDLS peer at all) into the outer if, to only handle that inside. This simplifies the control flow here. Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index ee9e7d60cb78..61d302d97145 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2263,15 +2263,9 @@ static int ieee80211_lookup_ra_sta(struct ieee80211_sub_if_data *sdata, case NL80211_IFTYPE_STATION: if (sdata->wdev.wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS) { sta = sta_info_get(sdata, skb->data); - if (sta) { - bool tdls_peer, tdls_auth; - - tdls_peer = test_sta_flag(sta, - WLAN_STA_TDLS_PEER); - tdls_auth = test_sta_flag(sta, - WLAN_STA_TDLS_PEER_AUTH); - - if (tdls_peer && tdls_auth) { + if (sta && test_sta_flag(sta, WLAN_STA_TDLS_PEER)) { + if (test_sta_flag(sta, + WLAN_STA_TDLS_PEER_AUTH)) { *sta_out = sta; return 0; } @@ -2283,8 +2277,7 @@ static int ieee80211_lookup_ra_sta(struct ieee80211_sub_if_data *sdata, * after a TDLS sta is removed due to being * unreachable. */ - if (tdls_peer && !tdls_auth && - !ieee80211_is_tdls_setup(skb)) + if (!ieee80211_is_tdls_setup(skb)) return -EINVAL; } From 14e2dee0996f51e0ff0d868497c7e1b90f012665 Mon Sep 17 00:00:00 2001 From: Laura Garcia Liebana Date: Tue, 13 Sep 2016 10:21:46 +0200 Subject: [PATCH 0963/1715] netfilter: nft_hash: fix hash overflow validation The overflow validation in the init() function establishes that the maximum value that the hash could reach is less than U32_MAX, which is likely to be true. The fix detects the overflow when the maximum hash value is less than the offset itself. Fixes: 70ca767ea1b2 ("netfilter: nft_hash: Add hash offset value") Reported-by: Liping Zhang Signed-off-by: Laura Garcia Liebana Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_hash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index bd12f7a801c2..09473b415b95 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -76,7 +76,7 @@ static int nft_hash_init(const struct nft_ctx *ctx, if (priv->modulus <= 1) return -ERANGE; - if (priv->offset + priv->modulus - 1 < U32_MAX) + if (priv->offset + priv->modulus - 1 < priv->offset) return -EOVERFLOW; priv->seed = ntohl(nla_get_be32(tb[NFTA_HASH_SEED])); From 5f4761dda2ba3743ceb5eb5b5e7483172927831a Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sat, 3 Sep 2016 17:38:19 +0100 Subject: [PATCH 0964/1715] ath10k: fix memory leak on caldata on error exit path caldata is not being free'd on the error exit path, causing a memory leak and data definitely should not be freed. Free caldata instead of data. Thanks to Kalle Valo for spotting that data should not be free'd. Signed-off-by: Colin Ian King Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c index 0635995ba165..a6d9c06ed045 100644 --- a/drivers/net/wireless/ath/ath10k/pci.c +++ b/drivers/net/wireless/ath/ath10k/pci.c @@ -2726,7 +2726,7 @@ static int ath10k_pci_hif_fetch_cal_eeprom(struct ath10k *ar, void **data, return 0; err_free: - kfree(data); + kfree(caldata); return -EINVAL; } From 214d553944815245897f6ac71f0b2e1905badcd9 Mon Sep 17 00:00:00 2001 From: Chaehyun Lim Date: Mon, 5 Sep 2016 22:38:02 +0900 Subject: [PATCH 0965/1715] ath10k: remove unused variable ar_pci Trival fix to remove unused variable ar_pci in ath10k_pci_tx_pipe_cleanup when building with W=1: drivers/net/wireless/ath/ath10k/pci.c:1696:21: warning: variable 'ar_pci' set but not used [-Wunused-but-set-variable] Signed-off-by: Chaehyun Lim Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/pci.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c index a6d9c06ed045..0457e315d336 100644 --- a/drivers/net/wireless/ath/ath10k/pci.c +++ b/drivers/net/wireless/ath/ath10k/pci.c @@ -1691,14 +1691,12 @@ static void ath10k_pci_rx_pipe_cleanup(struct ath10k_pci_pipe *pci_pipe) static void ath10k_pci_tx_pipe_cleanup(struct ath10k_pci_pipe *pci_pipe) { struct ath10k *ar; - struct ath10k_pci *ar_pci; struct ath10k_ce_pipe *ce_pipe; struct ath10k_ce_ring *ce_ring; struct sk_buff *skb; int i; ar = pci_pipe->hif_ce_state; - ar_pci = ath10k_pci_priv(ar); ce_pipe = pci_pipe->ce_hdl; ce_ring = ce_pipe->src_ring; From 8c1d7fa53166dd82bcf6be5ffc83bc4066150bf5 Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Tue, 6 Sep 2016 12:05:28 -0700 Subject: [PATCH 0966/1715] ath10k: enable peer stats by default IFTYPE_MESH_POINT need to rely on these for accurate path selection metrics. Other modes will probably also find them useful. Enabling peer stats has the side effect of reducing max number of STAs from 128 to 118. There should be negligible performance impact. If users really need 128 STAs and don't mind losing out on peer stats, they can still disable them: echo 0 > debugfs/ieee80211/phyn/ath10k/peer_stats Signed-off-by: Thomas Pedersen Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index 3abf8d617d32..e859ca626ca0 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -2145,6 +2145,9 @@ static void ath10k_core_register_work(struct work_struct *work) struct ath10k *ar = container_of(work, struct ath10k, register_work); int status; + /* peer stats are enabled by default */ + set_bit(ATH10K_FLAG_PEER_STATS, &ar->dev_flags); + status = ath10k_core_probe_fw(ar); if (status) { ath10k_err(ar, "could not probe fw (%d)\n", status); From 3040420158c139f64776935587bfad2584152f4c Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Thu, 18 Aug 2016 18:26:35 -0700 Subject: [PATCH 0967/1715] ath10k: improve logging message Helps to know the sta pointer. Signed-off-by: Ben Greear [kvalo@qca.qualcomm.com: add %pK and remove the colon] Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/mac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index c4d965fe990e..0a44dab5a287 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -6022,8 +6022,8 @@ static int ath10k_sta_state(struct ieee80211_hw *hw, * Existing station deletion. */ ath10k_dbg(ar, ATH10K_DBG_MAC, - "mac vdev %d peer delete %pM (sta gone)\n", - arvif->vdev_id, sta->addr); + "mac vdev %d peer delete %pM sta %pK (sta gone)\n", + arvif->vdev_id, sta->addr, sta); ret = ath10k_peer_delete(ar, arvif->vdev_id, sta->addr); if (ret) From 43d923e2c192ecef19447dc2b0ca0bab6d8b1f64 Mon Sep 17 00:00:00 2001 From: Vasanthakumar Thiagarajan Date: Fri, 9 Sep 2016 17:25:13 +0300 Subject: [PATCH 0968/1715] ath10k: move ath10k_hw_params definition to hw.h This is to prepare for rx descriptor abstraction where we'll be dereferencing ath10k_hw_params member in hw.h. Moreover hw.h looks more suitable to house ath10k_hw_params definition than core.h Signed-off-by: Vasanthakumar Thiagarajan Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/core.h | 53 +------------------------- drivers/net/wireless/ath/ath10k/hw.h | 53 ++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 52 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h index c22391394ad1..6ec9495bcc04 100644 --- a/drivers/net/wireless/ath/ath10k/core.h +++ b/drivers/net/wireless/ath/ath10k/core.h @@ -736,58 +736,7 @@ struct ath10k { struct ath10k_htc htc; struct ath10k_htt htt; - struct ath10k_hw_params { - u32 id; - u16 dev_id; - const char *name; - u32 patch_load_addr; - int uart_pin; - u32 otp_exe_param; - - /* Type of hw cycle counter wraparound logic, for more info - * refer enum ath10k_hw_cc_wraparound_type. - */ - enum ath10k_hw_cc_wraparound_type cc_wraparound_type; - - /* Some of chip expects fragment descriptor to be continuous - * memory for any TX operation. Set continuous_frag_desc flag - * for the hardware which have such requirement. - */ - bool continuous_frag_desc; - - /* CCK hardware rate table mapping for the newer chipsets - * like QCA99X0, QCA4019 got revised. The CCK h/w rate values - * are in a proper order with respect to the rate/preamble - */ - bool cck_rate_map_rev2; - - u32 channel_counters_freq_hz; - - /* Mgmt tx descriptors threshold for limiting probe response - * frames. - */ - u32 max_probe_resp_desc_thres; - - /* The padding bytes's location is different on various chips */ - enum ath10k_hw_4addr_pad hw_4addr_pad; - - u32 tx_chain_mask; - u32 rx_chain_mask; - u32 max_spatial_stream; - u32 cal_data_len; - - struct ath10k_hw_params_fw { - const char *dir; - const char *board; - size_t board_size; - size_t board_ext_size; - } fw; - - /* qca99x0 family chips deliver broadcast/multicast management - * frames encrypted and expect software do decryption. - */ - bool sw_decrypt_mcast_mgmt; - } hw_params; + struct ath10k_hw_params hw_params; /* contains the firmware images used with ATH10K_FIRMWARE_MODE_NORMAL */ struct ath10k_fw_components normal_mode_fw; diff --git a/drivers/net/wireless/ath/ath10k/hw.h b/drivers/net/wireless/ath/ath10k/hw.h index e014cd732a0d..af0d5d1e8213 100644 --- a/drivers/net/wireless/ath/ath10k/hw.h +++ b/drivers/net/wireless/ath/ath10k/hw.h @@ -363,6 +363,59 @@ enum ath10k_hw_cc_wraparound_type { ATH10K_HW_CC_WRAP_SHIFTED_EACH = 2, }; +struct ath10k_hw_params { + u32 id; + u16 dev_id; + const char *name; + u32 patch_load_addr; + int uart_pin; + u32 otp_exe_param; + + /* Type of hw cycle counter wraparound logic, for more info + * refer enum ath10k_hw_cc_wraparound_type. + */ + enum ath10k_hw_cc_wraparound_type cc_wraparound_type; + + /* Some of chip expects fragment descriptor to be continuous + * memory for any TX operation. Set continuous_frag_desc flag + * for the hardware which have such requirement. + */ + bool continuous_frag_desc; + + /* CCK hardware rate table mapping for the newer chipsets + * like QCA99X0, QCA4019 got revised. The CCK h/w rate values + * are in a proper order with respect to the rate/preamble + */ + bool cck_rate_map_rev2; + + u32 channel_counters_freq_hz; + + /* Mgmt tx descriptors threshold for limiting probe response + * frames. + */ + u32 max_probe_resp_desc_thres; + + /* The padding bytes's location is different on various chips */ + enum ath10k_hw_4addr_pad hw_4addr_pad; + + u32 tx_chain_mask; + u32 rx_chain_mask; + u32 max_spatial_stream; + u32 cal_data_len; + + struct ath10k_hw_params_fw { + const char *dir; + const char *board; + size_t board_size; + size_t board_ext_size; + } fw; + + /* qca99x0 family chips deliver broadcast/multicast management + * frames encrypted and expect software do decryption. + */ + bool sw_decrypt_mcast_mgmt; +}; + /* Target specific defines for MAIN firmware */ #define TARGET_NUM_VDEVS 8 #define TARGET_NUM_PEER_AST 2 From ae02c8719aab19bf311b6ce2881feb844456297e Mon Sep 17 00:00:00 2001 From: Vasanthakumar Thiagarajan Date: Fri, 9 Sep 2016 17:25:22 +0300 Subject: [PATCH 0969/1715] ath10k: add provision for Rx descriptor abstraction There are slight differences in Rx hw descriptor information among different chips. So far driver does not use those new information for any functionalities, but there is one important information which is available from QCA99X0 onwards to indicate the number of bytes that hw padded at the begining of the rx payload and this information is needed to undecap the rx packet. Add an abstraction for Rx desc to make use of the new desc information available. The callback that this patch defines to retrieve the padding bytes will be used in follow-up patch. Signed-off-by: Vasanthakumar Thiagarajan [Rename operations to hw_ops for other purposes] Signed-off-by: Benjamin Berg Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/core.c | 12 ++++++++++++ drivers/net/wireless/ath/ath10k/hw.c | 13 +++++++++++++ drivers/net/wireless/ath/ath10k/hw.h | 12 ++++++++++++ 3 files changed, 37 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index e859ca626ca0..2d405a6c3375 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -68,6 +68,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_size = QCA988X_BOARD_DATA_SZ, .board_ext_size = QCA988X_BOARD_EXT_DATA_SZ, }, + .hw_ops = &qca988x_ops, }, { .id = QCA9887_HW_1_0_VERSION, @@ -87,6 +88,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_size = QCA9887_BOARD_DATA_SZ, .board_ext_size = QCA9887_BOARD_EXT_DATA_SZ, }, + .hw_ops = &qca988x_ops, }, { .id = QCA6174_HW_2_1_VERSION, @@ -104,6 +106,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_size = QCA6174_BOARD_DATA_SZ, .board_ext_size = QCA6174_BOARD_EXT_DATA_SZ, }, + .hw_ops = &qca988x_ops, }, { .id = QCA6174_HW_2_1_VERSION, @@ -122,6 +125,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_size = QCA6174_BOARD_DATA_SZ, .board_ext_size = QCA6174_BOARD_EXT_DATA_SZ, }, + .hw_ops = &qca988x_ops, }, { .id = QCA6174_HW_3_0_VERSION, @@ -140,6 +144,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_size = QCA6174_BOARD_DATA_SZ, .board_ext_size = QCA6174_BOARD_EXT_DATA_SZ, }, + .hw_ops = &qca988x_ops, }, { .id = QCA6174_HW_3_2_VERSION, @@ -159,6 +164,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_size = QCA6174_BOARD_DATA_SZ, .board_ext_size = QCA6174_BOARD_EXT_DATA_SZ, }, + .hw_ops = &qca988x_ops, }, { .id = QCA99X0_HW_2_0_DEV_VERSION, @@ -183,6 +189,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_ext_size = QCA99X0_BOARD_EXT_DATA_SZ, }, .sw_decrypt_mcast_mgmt = true, + .hw_ops = &qca99x0_ops, }, { .id = QCA9984_HW_1_0_DEV_VERSION, @@ -207,6 +214,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_ext_size = QCA99X0_BOARD_EXT_DATA_SZ, }, .sw_decrypt_mcast_mgmt = true, + .hw_ops = &qca99x0_ops, }, { .id = QCA9888_HW_2_0_DEV_VERSION, @@ -230,6 +238,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_ext_size = QCA99X0_BOARD_EXT_DATA_SZ, }, .sw_decrypt_mcast_mgmt = true, + .hw_ops = &qca99x0_ops, }, { .id = QCA9377_HW_1_0_DEV_VERSION, @@ -247,6 +256,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_size = QCA9377_BOARD_DATA_SZ, .board_ext_size = QCA9377_BOARD_EXT_DATA_SZ, }, + .hw_ops = &qca988x_ops, }, { .id = QCA9377_HW_1_1_DEV_VERSION, @@ -264,6 +274,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_size = QCA9377_BOARD_DATA_SZ, .board_ext_size = QCA9377_BOARD_EXT_DATA_SZ, }, + .hw_ops = &qca988x_ops, }, { .id = QCA4019_HW_1_0_DEV_VERSION, @@ -289,6 +300,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_ext_size = QCA4019_BOARD_EXT_DATA_SZ, }, .sw_decrypt_mcast_mgmt = true, + .hw_ops = &qca99x0_ops, }, }; diff --git a/drivers/net/wireless/ath/ath10k/hw.c b/drivers/net/wireless/ath/ath10k/hw.c index f903d468dbe6..c2ecb9bd824a 100644 --- a/drivers/net/wireless/ath/ath10k/hw.c +++ b/drivers/net/wireless/ath/ath10k/hw.c @@ -219,3 +219,16 @@ void ath10k_hw_fill_survey_time(struct ath10k *ar, struct survey_info *survey, survey->time = CCNT_TO_MSEC(ar, cc); survey->time_busy = CCNT_TO_MSEC(ar, rcc); } + +const struct ath10k_hw_ops qca988x_ops = { +}; + +static int ath10k_qca99x0_rx_desc_get_l3_pad_bytes(struct htt_rx_desc *rxd) +{ + return MS(__le32_to_cpu(rxd->msdu_end.qca99x0.info1), + RX_MSDU_END_INFO1_L3_HDR_PAD); +} + +const struct ath10k_hw_ops qca99x0_ops = { + .rx_desc_get_l3_pad_bytes = ath10k_qca99x0_rx_desc_get_l3_pad_bytes, +}; diff --git a/drivers/net/wireless/ath/ath10k/hw.h b/drivers/net/wireless/ath/ath10k/hw.h index af0d5d1e8213..1b5ea3147f3a 100644 --- a/drivers/net/wireless/ath/ath10k/hw.h +++ b/drivers/net/wireless/ath/ath10k/hw.h @@ -414,8 +414,20 @@ struct ath10k_hw_params { * frames encrypted and expect software do decryption. */ bool sw_decrypt_mcast_mgmt; + + const struct ath10k_hw_ops *hw_ops; }; +struct htt_rx_desc; + +/* Defines needed for Rx descriptor abstraction */ +struct ath10k_hw_ops { + int (*rx_desc_get_l3_pad_bytes)(struct htt_rx_desc *rxd); +}; + +extern const struct ath10k_hw_ops qca988x_ops; +extern const struct ath10k_hw_ops qca99x0_ops; + /* Target specific defines for MAIN firmware */ #define TARGET_NUM_VDEVS 8 #define TARGET_NUM_PEER_AST 2 From 9e19e13261423eeb4398177001daa874c2128aa4 Mon Sep 17 00:00:00 2001 From: Vasanthakumar Thiagarajan Date: Fri, 9 Sep 2016 17:25:29 +0300 Subject: [PATCH 0970/1715] ath10k: properly remove padding from the start of rx payload In QCA99X0 (QCA99X0, QCA9984, QCA9888 and QCA4019) family chips, hw adds padding at the begining of the rx payload to make L3 header 4-byte aligned. In the chips doing this type of padding, the number of bytes padded will be indicated through msdu_end:info1. Define a hw_rx_desc_ops wrapper to retrieve the number of padded bytes and use this while doing undecap. This should fix padding related issues with ethernt decap format with QCA99X0, QCA9984, QCA9888 and QCA4019 hw. Signed-off-by: Vasanthakumar Thiagarajan [Rename operations to hw_ops for other purposes] Signed-off-by: Benjamin Berg Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/htt_rx.c | 36 +++++++++++++++--------- drivers/net/wireless/ath/ath10k/hw.h | 9 ++++++ 2 files changed, 31 insertions(+), 14 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c index 714b0deded9c..a3785a9aa843 100644 --- a/drivers/net/wireless/ath/ath10k/htt_rx.c +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c @@ -1051,9 +1051,11 @@ static void ath10k_htt_rx_h_undecap_nwifi(struct ath10k *ar, const u8 first_hdr[64]) { struct ieee80211_hdr *hdr; + struct htt_rx_desc *rxd; size_t hdr_len; u8 da[ETH_ALEN]; u8 sa[ETH_ALEN]; + int l3_pad_bytes; /* Delivered decapped frame: * [nwifi 802.11 header] <-- replaced with 802.11 hdr @@ -1067,19 +1069,12 @@ static void ath10k_htt_rx_h_undecap_nwifi(struct ath10k *ar, */ /* pull decapped header and copy SA & DA */ - if ((ar->hw_params.hw_4addr_pad == ATH10K_HW_4ADDR_PAD_BEFORE) && - ieee80211_has_a4(((struct ieee80211_hdr *)first_hdr)->frame_control)) { - /* The QCA99X0 4 address mode pad 2 bytes at the - * beginning of MSDU - */ - hdr = (struct ieee80211_hdr *)(msdu->data + 2); - /* The skb length need be extended 2 as the 2 bytes at the tail - * be excluded due to the padding - */ - skb_put(msdu, 2); - } else { - hdr = (struct ieee80211_hdr *)(msdu->data); - } + rxd = (void *)msdu->data - sizeof(*rxd); + + l3_pad_bytes = ath10k_rx_desc_get_l3_pad_bytes(&ar->hw_params, rxd); + skb_put(msdu, l3_pad_bytes); + + hdr = (struct ieee80211_hdr *)(msdu->data + l3_pad_bytes); hdr_len = ath10k_htt_rx_nwifi_hdrlen(ar, hdr); ether_addr_copy(da, ieee80211_get_DA(hdr)); @@ -1146,6 +1141,8 @@ static void ath10k_htt_rx_h_undecap_eth(struct ath10k *ar, void *rfc1042; u8 da[ETH_ALEN]; u8 sa[ETH_ALEN]; + int l3_pad_bytes; + struct htt_rx_desc *rxd; /* Delivered decapped frame: * [eth header] <-- replaced with 802.11 hdr & rfc1042/llc @@ -1156,6 +1153,11 @@ static void ath10k_htt_rx_h_undecap_eth(struct ath10k *ar, if (WARN_ON_ONCE(!rfc1042)) return; + rxd = (void *)msdu->data - sizeof(*rxd); + l3_pad_bytes = ath10k_rx_desc_get_l3_pad_bytes(&ar->hw_params, rxd); + skb_put(msdu, l3_pad_bytes); + skb_pull(msdu, l3_pad_bytes); + /* pull decapped header and copy SA & DA */ eth = (struct ethhdr *)msdu->data; ether_addr_copy(da, eth->h_dest); @@ -1186,6 +1188,8 @@ static void ath10k_htt_rx_h_undecap_snap(struct ath10k *ar, { struct ieee80211_hdr *hdr; size_t hdr_len; + int l3_pad_bytes; + struct htt_rx_desc *rxd; /* Delivered decapped frame: * [amsdu header] <-- replaced with 802.11 hdr @@ -1193,7 +1197,11 @@ static void ath10k_htt_rx_h_undecap_snap(struct ath10k *ar, * [payload] */ - skb_pull(msdu, sizeof(struct amsdu_subframe_hdr)); + rxd = (void *)msdu->data - sizeof(*rxd); + l3_pad_bytes = ath10k_rx_desc_get_l3_pad_bytes(&ar->hw_params, rxd); + + skb_put(msdu, l3_pad_bytes); + skb_pull(msdu, sizeof(struct amsdu_subframe_hdr) + l3_pad_bytes); hdr = (struct ieee80211_hdr *)first_hdr; hdr_len = ieee80211_hdrlen(hdr->frame_control); diff --git a/drivers/net/wireless/ath/ath10k/hw.h b/drivers/net/wireless/ath/ath10k/hw.h index 1b5ea3147f3a..204f88273572 100644 --- a/drivers/net/wireless/ath/ath10k/hw.h +++ b/drivers/net/wireless/ath/ath10k/hw.h @@ -428,6 +428,15 @@ struct ath10k_hw_ops { extern const struct ath10k_hw_ops qca988x_ops; extern const struct ath10k_hw_ops qca99x0_ops; +static inline int +ath10k_rx_desc_get_l3_pad_bytes(struct ath10k_hw_params *hw, + struct htt_rx_desc *rxd) +{ + if (hw->hw_ops->rx_desc_get_l3_pad_bytes) + return hw->hw_ops->rx_desc_get_l3_pad_bytes(rxd); + return 0; +} + /* Target specific defines for MAIN firmware */ #define TARGET_NUM_VDEVS 8 #define TARGET_NUM_PEER_AST 2 From 95b5bf7ccca0d7ba575a7e26a7ed4146b9190071 Mon Sep 17 00:00:00 2001 From: Vasanthakumar Thiagarajan Date: Fri, 9 Sep 2016 17:25:36 +0300 Subject: [PATCH 0971/1715] ath10k: remove 4-addr padding related hw_param configuration hw_4addr_pad was added to handle different types of padding in 4-address rx frame. But this padding is not very specific to 4-address, it can happen even with three address + ethernet decap mode. Since the padding information can be obtained through Rx desc for QCA99X0 and newer chips, this hw_param is not needed any more. Signed-off-by: Vasanthakumar Thiagarajan Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/core.c | 9 --------- drivers/net/wireless/ath/ath10k/hw.h | 8 -------- 2 files changed, 17 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index 2d405a6c3375..3a8984ba9f74 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -60,7 +60,6 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .otp_exe_param = 0, .channel_counters_freq_hz = 88000, .max_probe_resp_desc_thres = 0, - .hw_4addr_pad = ATH10K_HW_4ADDR_PAD_AFTER, .cal_data_len = 2116, .fw = { .dir = QCA988X_HW_2_0_FW_DIR, @@ -80,7 +79,6 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .otp_exe_param = 0, .channel_counters_freq_hz = 88000, .max_probe_resp_desc_thres = 0, - .hw_4addr_pad = ATH10K_HW_4ADDR_PAD_AFTER, .cal_data_len = 2116, .fw = { .dir = QCA9887_HW_1_0_FW_DIR, @@ -117,7 +115,6 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .otp_exe_param = 0, .channel_counters_freq_hz = 88000, .max_probe_resp_desc_thres = 0, - .hw_4addr_pad = ATH10K_HW_4ADDR_PAD_AFTER, .cal_data_len = 8124, .fw = { .dir = QCA6174_HW_2_1_FW_DIR, @@ -136,7 +133,6 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .otp_exe_param = 0, .channel_counters_freq_hz = 88000, .max_probe_resp_desc_thres = 0, - .hw_4addr_pad = ATH10K_HW_4ADDR_PAD_AFTER, .cal_data_len = 8124, .fw = { .dir = QCA6174_HW_3_0_FW_DIR, @@ -155,7 +151,6 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .otp_exe_param = 0, .channel_counters_freq_hz = 88000, .max_probe_resp_desc_thres = 0, - .hw_4addr_pad = ATH10K_HW_4ADDR_PAD_AFTER, .cal_data_len = 8124, .fw = { /* uses same binaries as hw3.0 */ @@ -177,7 +172,6 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .cck_rate_map_rev2 = true, .channel_counters_freq_hz = 150000, .max_probe_resp_desc_thres = 24, - .hw_4addr_pad = ATH10K_HW_4ADDR_PAD_BEFORE, .tx_chain_mask = 0xf, .rx_chain_mask = 0xf, .max_spatial_stream = 4, @@ -202,7 +196,6 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .cck_rate_map_rev2 = true, .channel_counters_freq_hz = 150000, .max_probe_resp_desc_thres = 24, - .hw_4addr_pad = ATH10K_HW_4ADDR_PAD_BEFORE, .tx_chain_mask = 0xf, .rx_chain_mask = 0xf, .max_spatial_stream = 4, @@ -226,7 +219,6 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .continuous_frag_desc = true, .channel_counters_freq_hz = 150000, .max_probe_resp_desc_thres = 24, - .hw_4addr_pad = ATH10K_HW_4ADDR_PAD_BEFORE, .tx_chain_mask = 3, .rx_chain_mask = 3, .max_spatial_stream = 2, @@ -288,7 +280,6 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .cck_rate_map_rev2 = true, .channel_counters_freq_hz = 125000, .max_probe_resp_desc_thres = 24, - .hw_4addr_pad = ATH10K_HW_4ADDR_PAD_BEFORE, .tx_chain_mask = 0x3, .rx_chain_mask = 0x3, .max_spatial_stream = 2, diff --git a/drivers/net/wireless/ath/ath10k/hw.h b/drivers/net/wireless/ath/ath10k/hw.h index 204f88273572..308e423d4b99 100644 --- a/drivers/net/wireless/ath/ath10k/hw.h +++ b/drivers/net/wireless/ath/ath10k/hw.h @@ -338,11 +338,6 @@ enum ath10k_hw_rate_rev2_cck { ATH10K_HW_RATE_REV2_CCK_SP_11M, }; -enum ath10k_hw_4addr_pad { - ATH10K_HW_4ADDR_PAD_AFTER, - ATH10K_HW_4ADDR_PAD_BEFORE, -}; - enum ath10k_hw_cc_wraparound_type { ATH10K_HW_CC_WRAP_DISABLED = 0, @@ -395,9 +390,6 @@ struct ath10k_hw_params { */ u32 max_probe_resp_desc_thres; - /* The padding bytes's location is different on various chips */ - enum ath10k_hw_4addr_pad hw_4addr_pad; - u32 tx_chain_mask; u32 rx_chain_mask; u32 max_spatial_stream; From 4854f175c3182816d906c4bc34be5f30556346a5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 13 Sep 2016 15:39:29 +0200 Subject: [PATCH 0972/1715] mac80211: remove useless open_count check __ieee80211_suspend() checks early on if there's anything to do by checking open_count, so there's no need to check again later in the function. Remove the useless check. Signed-off-by: Johannes Berg --- net/mac80211/pm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 00a43a70e1fc..28a3a0957c9e 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -178,8 +178,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) WARN_ON(!list_empty(&local->chanctx_list)); /* stop hardware - this must stop RX */ - if (local->open_count) - ieee80211_stop_device(local); + ieee80211_stop_device(local); suspend: local->suspended = true; From 308433155a67cb097142292c8943e0aa8d1a1c79 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 8 Sep 2016 12:50:43 -0400 Subject: [PATCH 0973/1715] net: bridge: add helper to call /sbin/bridge-stp If /sbin/bridge-stp is available on the system, bridge tries to execute it instead of the kernel implementation when starting/stopping STP. If anything goes wrong with /sbin/bridge-stp, bridge silently falls back to kernel STP, making hard to debug userspace STP. This patch adds a br_stp_call_user helper to start/stop userspace STP and debug errors from the program: abnormal exit status is stored in the lower byte and normal exit status is stored in higher byte. Below is a simple example on a kernel with dynamic debug enabled: # ln -s /bin/false /sbin/bridge-stp # brctl stp br0 on br0: failed to start userspace STP (256) # dmesg br0: /sbin/bridge-stp exited with code 1 br0: failed to start userspace STP (256) br0: using kernel STP Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- net/bridge/br_stp_if.c | 43 ++++++++++++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 341caa0ca63a..d8ad73b38de2 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -134,17 +134,36 @@ void br_stp_disable_port(struct net_bridge_port *p) br_become_root_bridge(br); } +static int br_stp_call_user(struct net_bridge *br, char *arg) +{ + char *argv[] = { BR_STP_PROG, br->dev->name, arg, NULL }; + char *envp[] = { NULL }; + int rc; + + /* call userspace STP and report program errors */ + rc = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC); + if (rc > 0) { + if (rc & 0xff) + br_debug(br, BR_STP_PROG " received signal %d\n", + rc & 0x7f); + else + br_debug(br, BR_STP_PROG " exited with code %d\n", + (rc >> 8) & 0xff); + } + + return rc; +} + static void br_stp_start(struct net_bridge *br) { - int r; - char *argv[] = { BR_STP_PROG, br->dev->name, "start", NULL }; - char *envp[] = { NULL }; struct net_bridge_port *p; + int err = -ENOENT; if (net_eq(dev_net(br->dev), &init_net)) - r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC); - else - r = -ENOENT; + err = br_stp_call_user(br, "start"); + + if (err && err != -ENOENT) + br_err(br, "failed to start userspace STP (%d)\n", err); spin_lock_bh(&br->lock); @@ -153,9 +172,10 @@ static void br_stp_start(struct net_bridge *br) else if (br->bridge_forward_delay > BR_MAX_FORWARD_DELAY) __br_set_forward_delay(br, BR_MAX_FORWARD_DELAY); - if (r == 0) { + if (!err) { br->stp_enabled = BR_USER_STP; br_debug(br, "userspace STP started\n"); + /* Stop hello and hold timers */ del_timer(&br->hello_timer); list_for_each_entry(p, &br->port_list, list) @@ -173,14 +193,13 @@ static void br_stp_start(struct net_bridge *br) static void br_stp_stop(struct net_bridge *br) { - int r; - char *argv[] = { BR_STP_PROG, br->dev->name, "stop", NULL }; - char *envp[] = { NULL }; struct net_bridge_port *p; + int err; if (br->stp_enabled == BR_USER_STP) { - r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC); - br_info(br, "userspace STP stopped, return code %d\n", r); + err = br_stp_call_user(br, "stop"); + if (err) + br_err(br, "failed to stop userspace STP (%d)\n", err); /* To start timers on any ports left in blocking */ mod_timer(&br->hello_timer, jiffies + br->hello_time); From c20cb8119337052a84e40cba94af732d870e22e3 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 10 Sep 2016 00:56:55 +0000 Subject: [PATCH 0974/1715] tipc: fix possible memory leak in tipc_udp_enable() 'ub' is malloced in tipc_udp_enable() and should be freed before leaving from the error handling cases, otherwise it will cause memory leak. Fixes: ba5aa84a2d22 ("tipc: split UDP nl address parsing") Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/tipc/udp_media.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index dd274687a53d..d80cd3f7503f 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -665,7 +665,8 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, if (!opts[TIPC_NLA_UDP_LOCAL] || !opts[TIPC_NLA_UDP_REMOTE]) { pr_err("Invalid UDP bearer configuration"); - return -EINVAL; + err = -EINVAL; + goto err; } err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_LOCAL], &local, From 99c1790e5bbd31fe2b646bff868a55a13b1eeeb2 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Sat, 10 Sep 2016 19:59:05 +1000 Subject: [PATCH 0975/1715] net: Remove NO_IRQ from powerpc-only network drivers We'd like to eventually remove NO_IRQ on powerpc, so remove usages of it from powerpc-only drivers. Signed-off-by: Michael Ellerman Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fman/fman_mac.h | 2 +- drivers/net/ethernet/freescale/fs_enet/mac-fcc.c | 2 +- drivers/net/ethernet/freescale/fs_enet/mac-fec.c | 2 +- drivers/net/ethernet/freescale/fs_enet/mac-scc.c | 2 +- drivers/net/ethernet/ibm/emac/core.c | 10 +++++----- drivers/net/ethernet/ibm/emac/mal.c | 5 ++--- drivers/net/ethernet/ibm/ibmvnic.c | 4 ++-- drivers/net/ethernet/toshiba/ps3_gelic_net.c | 4 ++-- 8 files changed, 15 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/freescale/fman/fman_mac.h b/drivers/net/ethernet/freescale/fman/fman_mac.h index 8ddeedbcef9c..ddf0260176c9 100644 --- a/drivers/net/ethernet/freescale/fman/fman_mac.h +++ b/drivers/net/ethernet/freescale/fman/fman_mac.h @@ -192,7 +192,7 @@ struct fman_mac_params { /* A handle to the FM object this port related to */ void *fm; /* MDIO exceptions interrupt source - not valid for all - * MACs; MUST be set to 'NO_IRQ' for MACs that don't have + * MACs; MUST be set to 0 for MACs that don't have * mdio-irq, or for polling */ void *dev_id; /* device cookie used by the exception cbs */ diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c b/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c index 7919896a9a4e..120c758f5d01 100644 --- a/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c +++ b/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c @@ -90,7 +90,7 @@ static int do_pd_setup(struct fs_enet_private *fep) int ret = -EINVAL; fep->interrupt = irq_of_parse_and_map(ofdev->dev.of_node, 0); - if (fep->interrupt == NO_IRQ) + if (!fep->interrupt) goto out; fep->fcc.fccp = of_iomap(ofdev->dev.of_node, 0); diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-fec.c b/drivers/net/ethernet/freescale/fs_enet/mac-fec.c index 21fbaaf3a5fc..777beffa1e1e 100644 --- a/drivers/net/ethernet/freescale/fs_enet/mac-fec.c +++ b/drivers/net/ethernet/freescale/fs_enet/mac-fec.c @@ -99,7 +99,7 @@ static int do_pd_setup(struct fs_enet_private *fep) struct platform_device *ofdev = to_platform_device(fep->dev); fep->interrupt = irq_of_parse_and_map(ofdev->dev.of_node, 0); - if (fep->interrupt == NO_IRQ) + if (!fep->interrupt) return -EINVAL; fep->fec.fecp = of_iomap(ofdev->dev.of_node, 0); diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-scc.c b/drivers/net/ethernet/freescale/fs_enet/mac-scc.c index 9d52e1ec0052..15abd37d70e3 100644 --- a/drivers/net/ethernet/freescale/fs_enet/mac-scc.c +++ b/drivers/net/ethernet/freescale/fs_enet/mac-scc.c @@ -99,7 +99,7 @@ static int do_pd_setup(struct fs_enet_private *fep) struct platform_device *ofdev = to_platform_device(fep->dev); fep->interrupt = irq_of_parse_and_map(ofdev->dev.of_node, 0); - if (fep->interrupt == NO_IRQ) + if (!fep->interrupt) return -EINVAL; fep->scc.sccp = of_iomap(ofdev->dev.of_node, 0); diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index 4c9771d57d6e..ec4d0f3b7850 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -2750,7 +2750,7 @@ static int emac_probe(struct platform_device *ofdev) /* Get interrupts. EMAC irq is mandatory, WOL irq is optional */ dev->emac_irq = irq_of_parse_and_map(np, 0); dev->wol_irq = irq_of_parse_and_map(np, 1); - if (dev->emac_irq == NO_IRQ) { + if (!dev->emac_irq) { printk(KERN_ERR "%s: Can't map main interrupt\n", np->full_name); goto err_free; } @@ -2913,9 +2913,9 @@ static int emac_probe(struct platform_device *ofdev) err_reg_unmap: iounmap(dev->emacp); err_irq_unmap: - if (dev->wol_irq != NO_IRQ) + if (dev->wol_irq) irq_dispose_mapping(dev->wol_irq); - if (dev->emac_irq != NO_IRQ) + if (dev->emac_irq) irq_dispose_mapping(dev->emac_irq); err_free: free_netdev(ndev); @@ -2957,9 +2957,9 @@ static int emac_remove(struct platform_device *ofdev) emac_dbg_unregister(dev); iounmap(dev->emacp); - if (dev->wol_irq != NO_IRQ) + if (dev->wol_irq) irq_dispose_mapping(dev->wol_irq); - if (dev->emac_irq != NO_IRQ) + if (dev->emac_irq) irq_dispose_mapping(dev->emac_irq); free_netdev(dev->ndev); diff --git a/drivers/net/ethernet/ibm/emac/mal.c b/drivers/net/ethernet/ibm/emac/mal.c index fdb5cdb3cd15..aaf6fec566b5 100644 --- a/drivers/net/ethernet/ibm/emac/mal.c +++ b/drivers/net/ethernet/ibm/emac/mal.c @@ -597,9 +597,8 @@ static int mal_probe(struct platform_device *ofdev) mal->rxde_irq = irq_of_parse_and_map(ofdev->dev.of_node, 4); } - if (mal->txeob_irq == NO_IRQ || mal->rxeob_irq == NO_IRQ || - mal->serr_irq == NO_IRQ || mal->txde_irq == NO_IRQ || - mal->rxde_irq == NO_IRQ) { + if (!mal->txeob_irq || !mal->rxeob_irq || !mal->serr_irq || + !mal->txde_irq || !mal->rxde_irq) { printk(KERN_ERR "mal%d: failed to map interrupts !\n", index); err = -ENODEV; diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 62454d7a062a..bfe17d9c022d 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1424,7 +1424,7 @@ static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter) scrq = adapter->tx_scrq[i]; scrq->irq = irq_create_mapping(NULL, scrq->hw_irq); - if (scrq->irq == NO_IRQ) { + if (!scrq->irq) { rc = -EINVAL; dev_err(dev, "Error mapping irq\n"); goto req_tx_irq_failed; @@ -1444,7 +1444,7 @@ static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter) for (i = 0; i < adapter->req_rx_queues; i++) { scrq = adapter->rx_scrq[i]; scrq->irq = irq_create_mapping(NULL, scrq->hw_irq); - if (scrq->irq == NO_IRQ) { + if (!scrq->irq) { rc = -EINVAL; dev_err(dev, "Error mapping irq\n"); goto req_rx_irq_failed; diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c index 79f0ec4e51ac..bc258d7e41df 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c +++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c @@ -1791,7 +1791,7 @@ fail_alloc_rx: gelic_card_free_chain(card, card->tx_chain.head); fail_alloc_tx: free_irq(card->irq, card); - netdev->irq = NO_IRQ; + netdev->irq = 0; fail_request_irq: ps3_sb_event_receive_port_destroy(dev, card->irq); fail_alloc_irq: @@ -1843,7 +1843,7 @@ static int ps3_gelic_driver_remove(struct ps3_system_bus_device *dev) netdev0 = card->netdev[GELIC_PORT_ETHERNET_0]; /* disconnect event port */ free_irq(card->irq, card); - netdev0->irq = NO_IRQ; + netdev0->irq = 0; ps3_sb_event_receive_port_destroy(card->dev, card->irq); wait_event(card->waitq, From a7c22bda52e24094c7bc96afcd897ddad2b28bc0 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 10 Sep 2016 11:17:57 +0000 Subject: [PATCH 0976/1715] net: macb: fix missing unlock on error in macb_start_xmit() Fix missing unlock before return from function macb_start_xmit() in the error handling case. Fixes: 007e4ba3ee13 ("net: macb: initialize checksum when using checksum offloading") Signed-off-by: Wei Yongjun Acked-by: Nicolas Ferre Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index 0294b6a87265..63144bb413d1 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -1398,7 +1398,7 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev) if (macb_clear_csum(skb)) { dev_kfree_skb_any(skb); - return NETDEV_TX_OK; + goto unlock; } /* Map socket buffer for DMA transfer */ From a5f54fcc8a63f4e93ea48243c3a762aa848299d5 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 10 Sep 2016 12:31:30 +0000 Subject: [PATCH 0977/1715] net: ethernet: dwmac: fix non static symbol warning Fixes the following sparse warning: drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c:172:1: warning: symbol 'stm32_dwmac_pm_ops' was not declared. Should it be static? Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c index 79d8b926f17c..e5a926b8bee7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c @@ -169,7 +169,8 @@ static int stm32_dwmac_resume(struct device *dev) } #endif /* CONFIG_PM_SLEEP */ -SIMPLE_DEV_PM_OPS(stm32_dwmac_pm_ops, stm32_dwmac_suspend, stm32_dwmac_resume); +static SIMPLE_DEV_PM_OPS(stm32_dwmac_pm_ops, + stm32_dwmac_suspend, stm32_dwmac_resume); static const struct of_device_id stm32_dwmac_match[] = { { .compatible = "st,stm32-dwmac"}, From 971d3a44c00dcb27353f929c4c28367956c15527 Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Sun, 11 Sep 2016 17:54:03 +0200 Subject: [PATCH 0978/1715] net: ethernet: apm: xgene: use phydev from struct net_device The private structure contain a pointer to phydev, but the structure net_device already contain such pointer. So we can remove the pointer phy_dev in the private structure, and update the driver to use the one contained in struct net_device. Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- .../ethernet/apm/xgene/xgene_enet_ethtool.c | 4 ++-- .../net/ethernet/apm/xgene/xgene_enet_hw.c | 24 +++++++++---------- .../net/ethernet/apm/xgene/xgene_enet_main.c | 8 +++---- .../net/ethernet/apm/xgene/xgene_enet_main.h | 1 - 4 files changed, 18 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c b/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c index 22a7b26ca1d6..e1f44aed585d 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c @@ -57,7 +57,7 @@ static void xgene_get_drvinfo(struct net_device *ndev, static int xgene_get_settings(struct net_device *ndev, struct ethtool_cmd *cmd) { struct xgene_enet_pdata *pdata = netdev_priv(ndev); - struct phy_device *phydev = pdata->phy_dev; + struct phy_device *phydev = ndev->phydev; if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII) { if (phydev == NULL) @@ -96,7 +96,7 @@ static int xgene_get_settings(struct net_device *ndev, struct ethtool_cmd *cmd) static int xgene_set_settings(struct net_device *ndev, struct ethtool_cmd *cmd) { struct xgene_enet_pdata *pdata = netdev_priv(ndev); - struct phy_device *phydev = pdata->phy_dev; + struct phy_device *phydev = ndev->phydev; if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII) { if (!phydev) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c index da413c8d3e19..c481f104a8fe 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c @@ -713,7 +713,7 @@ static void xgene_enet_adjust_link(struct net_device *ndev) { struct xgene_enet_pdata *pdata = netdev_priv(ndev); const struct xgene_mac_ops *mac_ops = pdata->mac_ops; - struct phy_device *phydev = pdata->phy_dev; + struct phy_device *phydev = ndev->phydev; if (phydev->link) { if (pdata->phy_speed != phydev->speed) { @@ -773,15 +773,13 @@ int xgene_enet_phy_connect(struct net_device *ndev) netdev_err(ndev, "Could not connect to PHY\n"); return -ENODEV; } - - pdata->phy_dev = phy_dev; } else { #ifdef CONFIG_ACPI struct acpi_device *adev = acpi_phy_find_device(dev); if (adev) - pdata->phy_dev = adev->driver_data; - - phy_dev = pdata->phy_dev; + phy_dev = adev->driver_data; + else + phy_dev = NULL; if (!phy_dev || phy_connect_direct(ndev, phy_dev, &xgene_enet_adjust_link, @@ -849,8 +847,6 @@ static int xgene_mdiobus_register(struct xgene_enet_pdata *pdata, if (!phy) return -EIO; - pdata->phy_dev = phy; - return ret; } @@ -890,14 +886,18 @@ int xgene_enet_mdio_config(struct xgene_enet_pdata *pdata) void xgene_enet_phy_disconnect(struct xgene_enet_pdata *pdata) { - if (pdata->phy_dev) - phy_disconnect(pdata->phy_dev); + struct net_device *ndev = pdata->ndev; + + if (ndev->phydev) + phy_disconnect(ndev->phydev); } void xgene_enet_mdio_remove(struct xgene_enet_pdata *pdata) { - if (pdata->phy_dev) - phy_disconnect(pdata->phy_dev); + struct net_device *ndev = pdata->ndev; + + if (ndev->phydev) + phy_disconnect(ndev->phydev); mdiobus_unregister(pdata->mdio_bus); mdiobus_free(pdata->mdio_bus); diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index b8b9495e6da6..522ba920bfc1 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -748,8 +748,8 @@ static int xgene_enet_open(struct net_device *ndev) if (ret) return ret; - if (pdata->phy_dev) { - phy_start(pdata->phy_dev); + if (ndev->phydev) { + phy_start(ndev->phydev); } else { schedule_delayed_work(&pdata->link_work, PHY_POLL_LINK_OFF); netif_carrier_off(ndev); @@ -772,8 +772,8 @@ static int xgene_enet_close(struct net_device *ndev) mac_ops->tx_disable(pdata); mac_ops->rx_disable(pdata); - if (pdata->phy_dev) - phy_stop(pdata->phy_dev); + if (ndev->phydev) + phy_stop(ndev->phydev); else cancel_delayed_work_sync(&pdata->link_work); diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h index b339fc1e8841..773537161171 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h @@ -174,7 +174,6 @@ struct xgene_cle_ops { struct xgene_enet_pdata { struct net_device *ndev; struct mii_bus *mdio_bus; - struct phy_device *phy_dev; int phy_speed; struct clk *clk; struct platform_device *pdev; From 36a19b299536746f5c01d7716dac962f831e4d38 Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Sun, 11 Sep 2016 17:54:04 +0200 Subject: [PATCH 0979/1715] net: ethernet: apm: xgene: use new api ethtool_{get|set}_link_ksettings The ethtool api {get|set}_settings is deprecated. We move this driver to new api {get|set}_link_ksettings. Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- .../ethernet/apm/xgene/xgene_enet_ethtool.c | 61 +++++++++++-------- 1 file changed, 37 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c b/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c index e1f44aed585d..d372d4235c81 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c @@ -54,46 +54,59 @@ static void xgene_get_drvinfo(struct net_device *ndev, sprintf(info->bus_info, "%s", pdev->name); } -static int xgene_get_settings(struct net_device *ndev, struct ethtool_cmd *cmd) +static int xgene_get_link_ksettings(struct net_device *ndev, + struct ethtool_link_ksettings *cmd) { struct xgene_enet_pdata *pdata = netdev_priv(ndev); struct phy_device *phydev = ndev->phydev; + u32 supported; if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII) { if (phydev == NULL) return -ENODEV; - return phy_ethtool_gset(phydev, cmd); + return phy_ethtool_ksettings_get(phydev, cmd); } else if (pdata->phy_mode == PHY_INTERFACE_MODE_SGMII) { if (pdata->mdio_driver) { if (!phydev) return -ENODEV; - return phy_ethtool_gset(phydev, cmd); + return phy_ethtool_ksettings_get(phydev, cmd); } - cmd->supported = SUPPORTED_1000baseT_Full | SUPPORTED_Autoneg | - SUPPORTED_MII; - cmd->advertising = cmd->supported; - ethtool_cmd_speed_set(cmd, SPEED_1000); - cmd->duplex = DUPLEX_FULL; - cmd->port = PORT_MII; - cmd->transceiver = XCVR_INTERNAL; - cmd->autoneg = AUTONEG_ENABLE; + supported = SUPPORTED_1000baseT_Full | SUPPORTED_Autoneg | + SUPPORTED_MII; + ethtool_convert_legacy_u32_to_link_mode( + cmd->link_modes.supported, + supported); + ethtool_convert_legacy_u32_to_link_mode( + cmd->link_modes.advertising, + supported); + + cmd->base.speed = SPEED_1000; + cmd->base.duplex = DUPLEX_FULL; + cmd->base.port = PORT_MII; + cmd->base.autoneg = AUTONEG_ENABLE; } else { - cmd->supported = SUPPORTED_10000baseT_Full | SUPPORTED_FIBRE; - cmd->advertising = cmd->supported; - ethtool_cmd_speed_set(cmd, SPEED_10000); - cmd->duplex = DUPLEX_FULL; - cmd->port = PORT_FIBRE; - cmd->transceiver = XCVR_INTERNAL; - cmd->autoneg = AUTONEG_DISABLE; + supported = SUPPORTED_10000baseT_Full | SUPPORTED_FIBRE; + ethtool_convert_legacy_u32_to_link_mode( + cmd->link_modes.supported, + supported); + ethtool_convert_legacy_u32_to_link_mode( + cmd->link_modes.advertising, + supported); + + cmd->base.speed = SPEED_10000; + cmd->base.duplex = DUPLEX_FULL; + cmd->base.port = PORT_FIBRE; + cmd->base.autoneg = AUTONEG_DISABLE; } return 0; } -static int xgene_set_settings(struct net_device *ndev, struct ethtool_cmd *cmd) +static int xgene_set_link_ksettings(struct net_device *ndev, + const struct ethtool_link_ksettings *cmd) { struct xgene_enet_pdata *pdata = netdev_priv(ndev); struct phy_device *phydev = ndev->phydev; @@ -102,7 +115,7 @@ static int xgene_set_settings(struct net_device *ndev, struct ethtool_cmd *cmd) if (!phydev) return -ENODEV; - return phy_ethtool_sset(phydev, cmd); + return phy_ethtool_ksettings_set(phydev, cmd); } if (pdata->phy_mode == PHY_INTERFACE_MODE_SGMII) { @@ -110,7 +123,7 @@ static int xgene_set_settings(struct net_device *ndev, struct ethtool_cmd *cmd) if (!phydev) return -ENODEV; - return phy_ethtool_sset(phydev, cmd); + return phy_ethtool_ksettings_set(phydev, cmd); } } @@ -152,12 +165,12 @@ static void xgene_get_ethtool_stats(struct net_device *ndev, static const struct ethtool_ops xgene_ethtool_ops = { .get_drvinfo = xgene_get_drvinfo, - .get_settings = xgene_get_settings, - .set_settings = xgene_set_settings, .get_link = ethtool_op_get_link, .get_strings = xgene_get_strings, .get_sset_count = xgene_get_sset_count, - .get_ethtool_stats = xgene_get_ethtool_stats + .get_ethtool_stats = xgene_get_ethtool_stats, + .get_link_ksettings = xgene_get_link_ksettings, + .set_link_ksettings = xgene_set_link_ksettings, }; void xgene_enet_set_ethtool_ops(struct net_device *ndev) From 0c83f88c02085a762d52ebcd9cc4ca3df39db797 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 12 Sep 2016 13:26:23 +0200 Subject: [PATCH 0980/1715] mlxsw: spectrum: Correctly report autonegotiation Up until now the device always reported autonegotiation to be off although it was on by default. Allow the user to disable / enable autonegotiation and report its status correctly. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 14 ++++++++++++-- drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 3 ++- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index a7efd2af2373..cbec5f301939 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1815,7 +1815,12 @@ static int mlxsw_sp_port_get_settings(struct net_device *dev, mlxsw_sp_from_ptys_supported_link(eth_proto_cap) | SUPPORTED_Pause | SUPPORTED_Asym_Pause | SUPPORTED_Autoneg; - cmd->advertising = mlxsw_sp_from_ptys_advert_link(eth_proto_admin); + if (mlxsw_sp_port->link.autoneg) { + cmd->advertising = + mlxsw_sp_from_ptys_advert_link(eth_proto_admin); + cmd->advertising |= ADVERTISED_Autoneg; + cmd->autoneg = AUTONEG_ENABLE; + } mlxsw_sp_from_ptys_speed_duplex(netif_carrier_ok(dev), eth_proto_oper, cmd); @@ -1873,11 +1878,13 @@ static int mlxsw_sp_port_set_settings(struct net_device *dev, u32 eth_proto_new; u32 eth_proto_cap; u32 eth_proto_admin; + bool autoneg; int err; + autoneg = cmd->autoneg == AUTONEG_ENABLE; speed = ethtool_cmd_speed(cmd); - eth_proto_new = cmd->autoneg == AUTONEG_ENABLE ? + eth_proto_new = autoneg ? mlxsw_sp_to_ptys_advert_link(cmd->advertising) : mlxsw_sp_to_ptys_speed(speed); @@ -1907,6 +1914,8 @@ static int mlxsw_sp_port_set_settings(struct net_device *dev, if (!netif_running(dev)) return 0; + mlxsw_sp_port->link.autoneg = autoneg; + err = mlxsw_sp_port_admin_status_set(mlxsw_sp_port, false); if (err) { netdev_err(dev, "Failed to set admin status"); @@ -2082,6 +2091,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, mlxsw_sp_port->mapping.module = module; mlxsw_sp_port->mapping.width = width; mlxsw_sp_port->mapping.lane = lane; + mlxsw_sp_port->link.autoneg = 1; bytes = DIV_ROUND_UP(VLAN_N_VID, BITS_PER_BYTE); mlxsw_sp_port->active_vlans = kzalloc(bytes, GFP_KERNEL); if (!mlxsw_sp_port->active_vlans) { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 31a2f3df55a6..969c250b3048 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -341,7 +341,8 @@ struct mlxsw_sp_port { } vport; struct { u8 tx_pause:1, - rx_pause:1; + rx_pause:1, + autoneg:1; } link; struct { struct ieee_ets *ets; From 4149b97f728edc9247939ece42a784c14b4e212f Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 12 Sep 2016 13:26:24 +0200 Subject: [PATCH 0981/1715] mlxsw: spectrum: Report link partner's advertised speeds If autonegotiation was performed successfully, then we should report the link partner's advertised speeds instead of the operational speed of the port. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 19 +++++++++++++++++++ .../net/ethernet/mellanox/mlxsw/spectrum.c | 9 ++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index b83d0a7a0b49..43ce27f8c4a7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -2138,6 +2138,18 @@ MLXSW_ITEM32(reg, ptys, local_port, 0x00, 16, 8); */ MLXSW_ITEM32(reg, ptys, proto_mask, 0x00, 0, 3); +enum { + MLXSW_REG_PTYS_AN_STATUS_NA, + MLXSW_REG_PTYS_AN_STATUS_OK, + MLXSW_REG_PTYS_AN_STATUS_FAIL, +}; + +/* reg_ptys_an_status + * Autonegotiation status. + * Access: RO + */ +MLXSW_ITEM32(reg, ptys, an_status, 0x04, 28, 4); + #define MLXSW_REG_PTYS_ETH_SPEED_SGMII BIT(0) #define MLXSW_REG_PTYS_ETH_SPEED_1000BASE_KX BIT(1) #define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CX4 BIT(2) @@ -2184,6 +2196,13 @@ MLXSW_ITEM32(reg, ptys, eth_proto_admin, 0x18, 0, 32); */ MLXSW_ITEM32(reg, ptys, eth_proto_oper, 0x24, 0, 32); +/* reg_ptys_eth_proto_lp_advertise + * The protocols that were advertised by the link partner during + * autonegotiation. + * Access: RO + */ +MLXSW_ITEM32(reg, ptys, eth_proto_lp_advertise, 0x30, 0, 32); + static inline void mlxsw_reg_ptys_pack(char *payload, u8 local_port, u32 proto_admin) { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index cbec5f301939..07930cc1fa60 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1800,6 +1800,8 @@ static int mlxsw_sp_port_get_settings(struct net_device *dev, u32 eth_proto_cap; u32 eth_proto_admin; u32 eth_proto_oper; + u8 autoneg_status; + u32 eth_proto_lp; int err; mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sp_port->local_port, 0); @@ -1810,6 +1812,8 @@ static int mlxsw_sp_port_get_settings(struct net_device *dev, } mlxsw_reg_ptys_unpack(ptys_pl, ð_proto_cap, ð_proto_admin, ð_proto_oper); + eth_proto_lp = mlxsw_reg_ptys_eth_proto_lp_advertise_get(ptys_pl); + autoneg_status = mlxsw_reg_ptys_an_status_get(ptys_pl); cmd->supported = mlxsw_sp_from_ptys_supported_port(eth_proto_cap) | mlxsw_sp_from_ptys_supported_link(eth_proto_cap) | @@ -1826,7 +1830,10 @@ static int mlxsw_sp_port_get_settings(struct net_device *dev, eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap; cmd->port = mlxsw_sp_port_connector_port(eth_proto_oper); - cmd->lp_advertising = mlxsw_sp_from_ptys_advert_link(eth_proto_oper); + + if (autoneg_status == MLXSW_REG_PTYS_AN_STATUS_OK && eth_proto_lp) + cmd->lp_advertising = + mlxsw_sp_from_ptys_advert_link(eth_proto_lp); cmd->transceiver = XCVR_INTERNAL; return 0; From 0213424adad63bfea08ef19cd6c997648ddcf44e Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 12 Sep 2016 13:26:25 +0200 Subject: [PATCH 0982/1715] mlxsw: spectrum: Report port type according to operational speed In case port isn't operational we shouldn't report the port type, but instead return PORT_OTHER. This is consistent with most other drivers that return PORT_OTHER when media type can't be determined. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 07930cc1fa60..38e4f03f9d9f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1828,7 +1828,6 @@ static int mlxsw_sp_port_get_settings(struct net_device *dev, mlxsw_sp_from_ptys_speed_duplex(netif_carrier_ok(dev), eth_proto_oper, cmd); - eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap; cmd->port = mlxsw_sp_port_connector_port(eth_proto_oper); if (autoneg_status == MLXSW_REG_PTYS_AN_STATUS_OK && eth_proto_lp) From 91bdc7a43ac7cfe044caa5ab7e74dca114f15904 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 12 Sep 2016 13:26:26 +0200 Subject: [PATCH 0983/1715] mlxsw: spectrum: Indicate support of multiple port types The device can support multiple port types, so don't return on first match. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 38e4f03f9d9f..d49f51846325 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1706,21 +1706,23 @@ static const struct mlxsw_sp_port_link_mode mlxsw_sp_port_link_mode[] = { static u32 mlxsw_sp_from_ptys_supported_port(u32 ptys_eth_proto) { + u32 modes = 0; + if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CR | MLXSW_REG_PTYS_ETH_SPEED_10GBASE_SR | MLXSW_REG_PTYS_ETH_SPEED_40GBASE_CR4 | MLXSW_REG_PTYS_ETH_SPEED_40GBASE_SR4 | MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 | MLXSW_REG_PTYS_ETH_SPEED_SGMII)) - return SUPPORTED_FIBRE; + modes |= SUPPORTED_FIBRE; if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KR | MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4 | MLXSW_REG_PTYS_ETH_SPEED_40GBASE_KR4 | MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4 | MLXSW_REG_PTYS_ETH_SPEED_1000BASE_KX)) - return SUPPORTED_Backplane; - return 0; + modes |= SUPPORTED_Backplane; + return modes; } static u32 mlxsw_sp_from_ptys_supported_link(u32 ptys_eth_proto) From b9d66a36aa7737d0f975d99aabc200b7496e26b8 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 12 Sep 2016 13:26:27 +0200 Subject: [PATCH 0984/1715] mlxsw: spectrum: Add support for new ethtool API Remove the deprecated {get,set}_settings callbacks and instead add {get,set}_link_ksettings along with support for newly available speeds. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 1 + .../net/ethernet/mellanox/mlxsw/spectrum.c | 348 ++++++++++-------- 2 files changed, 187 insertions(+), 162 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 43ce27f8c4a7..4e2354ca0e4a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -2164,6 +2164,7 @@ MLXSW_ITEM32(reg, ptys, an_status, 0x04, 28, 4); #define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_ER_LR BIT(14) #define MLXSW_REG_PTYS_ETH_SPEED_40GBASE_SR4 BIT(15) #define MLXSW_REG_PTYS_ETH_SPEED_40GBASE_LR4_ER4 BIT(16) +#define MLXSW_REG_PTYS_ETH_SPEED_50GBASE_SR2 BIT(18) #define MLXSW_REG_PTYS_ETH_SPEED_50GBASE_KR4 BIT(19) #define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4 BIT(20) #define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 BIT(21) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index d49f51846325..27bbcaf9cfcd 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1599,158 +1599,179 @@ static int mlxsw_sp_port_get_sset_count(struct net_device *dev, int sset) } struct mlxsw_sp_port_link_mode { + enum ethtool_link_mode_bit_indices mask_ethtool; u32 mask; - u32 supported; - u32 advertised; u32 speed; }; static const struct mlxsw_sp_port_link_mode mlxsw_sp_port_link_mode[] = { { .mask = MLXSW_REG_PTYS_ETH_SPEED_100BASE_T, - .supported = SUPPORTED_100baseT_Full, - .advertised = ADVERTISED_100baseT_Full, - .speed = 100, - }, - { - .mask = MLXSW_REG_PTYS_ETH_SPEED_100BASE_TX, - .speed = 100, + .mask_ethtool = ETHTOOL_LINK_MODE_100baseT_Full_BIT, + .speed = SPEED_100, }, { .mask = MLXSW_REG_PTYS_ETH_SPEED_SGMII | MLXSW_REG_PTYS_ETH_SPEED_1000BASE_KX, - .supported = SUPPORTED_1000baseKX_Full, - .advertised = ADVERTISED_1000baseKX_Full, - .speed = 1000, + .mask_ethtool = ETHTOOL_LINK_MODE_1000baseKX_Full_BIT, + .speed = SPEED_1000, }, { .mask = MLXSW_REG_PTYS_ETH_SPEED_10GBASE_T, - .supported = SUPPORTED_10000baseT_Full, - .advertised = ADVERTISED_10000baseT_Full, - .speed = 10000, + .mask_ethtool = ETHTOOL_LINK_MODE_10000baseT_Full_BIT, + .speed = SPEED_10000, }, { .mask = MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CX4 | MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4, - .supported = SUPPORTED_10000baseKX4_Full, - .advertised = ADVERTISED_10000baseKX4_Full, - .speed = 10000, + .mask_ethtool = ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT, + .speed = SPEED_10000, }, { .mask = MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KR | MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CR | MLXSW_REG_PTYS_ETH_SPEED_10GBASE_SR | MLXSW_REG_PTYS_ETH_SPEED_10GBASE_ER_LR, - .supported = SUPPORTED_10000baseKR_Full, - .advertised = ADVERTISED_10000baseKR_Full, - .speed = 10000, + .mask_ethtool = ETHTOOL_LINK_MODE_10000baseKR_Full_BIT, + .speed = SPEED_10000, }, { .mask = MLXSW_REG_PTYS_ETH_SPEED_20GBASE_KR2, - .supported = SUPPORTED_20000baseKR2_Full, - .advertised = ADVERTISED_20000baseKR2_Full, - .speed = 20000, + .mask_ethtool = ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT, + .speed = SPEED_20000, }, { .mask = MLXSW_REG_PTYS_ETH_SPEED_40GBASE_CR4, - .supported = SUPPORTED_40000baseCR4_Full, - .advertised = ADVERTISED_40000baseCR4_Full, - .speed = 40000, + .mask_ethtool = ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT, + .speed = SPEED_40000, }, { .mask = MLXSW_REG_PTYS_ETH_SPEED_40GBASE_KR4, - .supported = SUPPORTED_40000baseKR4_Full, - .advertised = ADVERTISED_40000baseKR4_Full, - .speed = 40000, + .mask_ethtool = ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT, + .speed = SPEED_40000, }, { .mask = MLXSW_REG_PTYS_ETH_SPEED_40GBASE_SR4, - .supported = SUPPORTED_40000baseSR4_Full, - .advertised = ADVERTISED_40000baseSR4_Full, - .speed = 40000, + .mask_ethtool = ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT, + .speed = SPEED_40000, }, { .mask = MLXSW_REG_PTYS_ETH_SPEED_40GBASE_LR4_ER4, - .supported = SUPPORTED_40000baseLR4_Full, - .advertised = ADVERTISED_40000baseLR4_Full, - .speed = 40000, + .mask_ethtool = ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT, + .speed = SPEED_40000, }, { - .mask = MLXSW_REG_PTYS_ETH_SPEED_25GBASE_CR | - MLXSW_REG_PTYS_ETH_SPEED_25GBASE_KR | - MLXSW_REG_PTYS_ETH_SPEED_25GBASE_SR, - .speed = 25000, + .mask = MLXSW_REG_PTYS_ETH_SPEED_25GBASE_CR, + .mask_ethtool = ETHTOOL_LINK_MODE_25000baseCR_Full_BIT, + .speed = SPEED_25000, }, { - .mask = MLXSW_REG_PTYS_ETH_SPEED_50GBASE_KR4 | - MLXSW_REG_PTYS_ETH_SPEED_50GBASE_CR2 | - MLXSW_REG_PTYS_ETH_SPEED_50GBASE_KR2, - .speed = 50000, + .mask = MLXSW_REG_PTYS_ETH_SPEED_25GBASE_KR, + .mask_ethtool = ETHTOOL_LINK_MODE_25000baseKR_Full_BIT, + .speed = SPEED_25000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_25GBASE_SR, + .mask_ethtool = ETHTOOL_LINK_MODE_25000baseSR_Full_BIT, + .speed = SPEED_25000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_25GBASE_SR, + .mask_ethtool = ETHTOOL_LINK_MODE_25000baseSR_Full_BIT, + .speed = SPEED_25000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_50GBASE_CR2, + .mask_ethtool = ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT, + .speed = SPEED_50000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_50GBASE_KR2, + .mask_ethtool = ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT, + .speed = SPEED_50000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_50GBASE_SR2, + .mask_ethtool = ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT, + .speed = SPEED_50000, }, { .mask = MLXSW_REG_PTYS_ETH_SPEED_56GBASE_R4, - .supported = SUPPORTED_56000baseKR4_Full, - .advertised = ADVERTISED_56000baseKR4_Full, - .speed = 56000, + .mask_ethtool = ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT, + .speed = SPEED_56000, }, { - .mask = MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4 | - MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 | - MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4 | - MLXSW_REG_PTYS_ETH_SPEED_100GBASE_LR4_ER4, - .speed = 100000, + .mask = MLXSW_REG_PTYS_ETH_SPEED_56GBASE_R4, + .mask_ethtool = ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT, + .speed = SPEED_56000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_56GBASE_R4, + .mask_ethtool = ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT, + .speed = SPEED_56000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_56GBASE_R4, + .mask_ethtool = ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT, + .speed = SPEED_56000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4, + .mask_ethtool = ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT, + .speed = SPEED_100000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4, + .mask_ethtool = ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT, + .speed = SPEED_100000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4, + .mask_ethtool = ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT, + .speed = SPEED_100000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_100GBASE_LR4_ER4, + .mask_ethtool = ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT, + .speed = SPEED_100000, }, }; #define MLXSW_SP_PORT_LINK_MODE_LEN ARRAY_SIZE(mlxsw_sp_port_link_mode) -static u32 mlxsw_sp_from_ptys_supported_port(u32 ptys_eth_proto) +static void +mlxsw_sp_from_ptys_supported_port(u32 ptys_eth_proto, + struct ethtool_link_ksettings *cmd) { - u32 modes = 0; - if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CR | MLXSW_REG_PTYS_ETH_SPEED_10GBASE_SR | MLXSW_REG_PTYS_ETH_SPEED_40GBASE_CR4 | MLXSW_REG_PTYS_ETH_SPEED_40GBASE_SR4 | MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 | MLXSW_REG_PTYS_ETH_SPEED_SGMII)) - modes |= SUPPORTED_FIBRE; + ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE); if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KR | MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4 | MLXSW_REG_PTYS_ETH_SPEED_40GBASE_KR4 | MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4 | MLXSW_REG_PTYS_ETH_SPEED_1000BASE_KX)) - modes |= SUPPORTED_Backplane; - return modes; + ethtool_link_ksettings_add_link_mode(cmd, supported, Backplane); } -static u32 mlxsw_sp_from_ptys_supported_link(u32 ptys_eth_proto) +static void mlxsw_sp_from_ptys_link(u32 ptys_eth_proto, unsigned long *mode) { - u32 modes = 0; int i; for (i = 0; i < MLXSW_SP_PORT_LINK_MODE_LEN; i++) { if (ptys_eth_proto & mlxsw_sp_port_link_mode[i].mask) - modes |= mlxsw_sp_port_link_mode[i].supported; + __set_bit(mlxsw_sp_port_link_mode[i].mask_ethtool, + mode); } - return modes; -} - -static u32 mlxsw_sp_from_ptys_advert_link(u32 ptys_eth_proto) -{ - u32 modes = 0; - int i; - - for (i = 0; i < MLXSW_SP_PORT_LINK_MODE_LEN; i++) { - if (ptys_eth_proto & mlxsw_sp_port_link_mode[i].mask) - modes |= mlxsw_sp_port_link_mode[i].advertised; - } - return modes; } static void mlxsw_sp_from_ptys_speed_duplex(bool carrier_ok, u32 ptys_eth_proto, - struct ethtool_cmd *cmd) + struct ethtool_link_ksettings *cmd) { u32 speed = SPEED_UNKNOWN; u8 duplex = DUPLEX_UNKNOWN; @@ -1767,8 +1788,8 @@ static void mlxsw_sp_from_ptys_speed_duplex(bool carrier_ok, u32 ptys_eth_proto, } } out: - ethtool_cmd_speed_set(cmd, speed); - cmd->duplex = duplex; + cmd->base.speed = speed; + cmd->base.duplex = duplex; } static u8 mlxsw_sp_port_connector_port(u32 ptys_eth_proto) @@ -1793,60 +1814,15 @@ static u8 mlxsw_sp_port_connector_port(u32 ptys_eth_proto) return PORT_OTHER; } -static int mlxsw_sp_port_get_settings(struct net_device *dev, - struct ethtool_cmd *cmd) -{ - struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - char ptys_pl[MLXSW_REG_PTYS_LEN]; - u32 eth_proto_cap; - u32 eth_proto_admin; - u32 eth_proto_oper; - u8 autoneg_status; - u32 eth_proto_lp; - int err; - - mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sp_port->local_port, 0); - err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl); - if (err) { - netdev_err(dev, "Failed to get proto"); - return err; - } - mlxsw_reg_ptys_unpack(ptys_pl, ð_proto_cap, - ð_proto_admin, ð_proto_oper); - eth_proto_lp = mlxsw_reg_ptys_eth_proto_lp_advertise_get(ptys_pl); - autoneg_status = mlxsw_reg_ptys_an_status_get(ptys_pl); - - cmd->supported = mlxsw_sp_from_ptys_supported_port(eth_proto_cap) | - mlxsw_sp_from_ptys_supported_link(eth_proto_cap) | - SUPPORTED_Pause | SUPPORTED_Asym_Pause | - SUPPORTED_Autoneg; - if (mlxsw_sp_port->link.autoneg) { - cmd->advertising = - mlxsw_sp_from_ptys_advert_link(eth_proto_admin); - cmd->advertising |= ADVERTISED_Autoneg; - cmd->autoneg = AUTONEG_ENABLE; - } - mlxsw_sp_from_ptys_speed_duplex(netif_carrier_ok(dev), - eth_proto_oper, cmd); - - cmd->port = mlxsw_sp_port_connector_port(eth_proto_oper); - - if (autoneg_status == MLXSW_REG_PTYS_AN_STATUS_OK && eth_proto_lp) - cmd->lp_advertising = - mlxsw_sp_from_ptys_advert_link(eth_proto_lp); - - cmd->transceiver = XCVR_INTERNAL; - return 0; -} - -static u32 mlxsw_sp_to_ptys_advert_link(u32 advertising) +static u32 +mlxsw_sp_to_ptys_advert_link(const struct ethtool_link_ksettings *cmd) { u32 ptys_proto = 0; int i; for (i = 0; i < MLXSW_SP_PORT_LINK_MODE_LEN; i++) { - if (advertising & mlxsw_sp_port_link_mode[i].advertised) + if (test_bit(mlxsw_sp_port_link_mode[i].mask_ethtool, + cmd->link_modes.advertising)) ptys_proto |= mlxsw_sp_port_link_mode[i].mask; } return ptys_proto; @@ -1876,65 +1852,113 @@ static u32 mlxsw_sp_to_ptys_upper_speed(u32 upper_speed) return ptys_proto; } -static int mlxsw_sp_port_set_settings(struct net_device *dev, - struct ethtool_cmd *cmd) +static void mlxsw_sp_port_get_link_supported(u32 eth_proto_cap, + struct ethtool_link_ksettings *cmd) +{ + ethtool_link_ksettings_add_link_mode(cmd, supported, Asym_Pause); + ethtool_link_ksettings_add_link_mode(cmd, supported, Autoneg); + ethtool_link_ksettings_add_link_mode(cmd, supported, Pause); + + mlxsw_sp_from_ptys_supported_port(eth_proto_cap, cmd); + mlxsw_sp_from_ptys_link(eth_proto_cap, cmd->link_modes.supported); +} + +static void mlxsw_sp_port_get_link_advertise(u32 eth_proto_admin, bool autoneg, + struct ethtool_link_ksettings *cmd) +{ + if (!autoneg) + return; + + ethtool_link_ksettings_add_link_mode(cmd, advertising, Autoneg); + mlxsw_sp_from_ptys_link(eth_proto_admin, cmd->link_modes.advertising); +} + +static void +mlxsw_sp_port_get_link_lp_advertise(u32 eth_proto_lp, u8 autoneg_status, + struct ethtool_link_ksettings *cmd) +{ + if (autoneg_status != MLXSW_REG_PTYS_AN_STATUS_OK || !eth_proto_lp) + return; + + ethtool_link_ksettings_add_link_mode(cmd, lp_advertising, Autoneg); + mlxsw_sp_from_ptys_link(eth_proto_lp, cmd->link_modes.lp_advertising); +} + +static int mlxsw_sp_port_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) +{ + u32 eth_proto_cap, eth_proto_admin, eth_proto_oper, eth_proto_lp; + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char ptys_pl[MLXSW_REG_PTYS_LEN]; + u8 autoneg_status; + bool autoneg; + int err; + + autoneg = mlxsw_sp_port->link.autoneg; + mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sp_port->local_port, 0); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl); + if (err) + return err; + mlxsw_reg_ptys_unpack(ptys_pl, ð_proto_cap, ð_proto_admin, + ð_proto_oper); + + mlxsw_sp_port_get_link_supported(eth_proto_cap, cmd); + + mlxsw_sp_port_get_link_advertise(eth_proto_admin, autoneg, cmd); + + eth_proto_lp = mlxsw_reg_ptys_eth_proto_lp_advertise_get(ptys_pl); + autoneg_status = mlxsw_reg_ptys_an_status_get(ptys_pl); + mlxsw_sp_port_get_link_lp_advertise(eth_proto_lp, autoneg_status, cmd); + + cmd->base.autoneg = autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE; + cmd->base.port = mlxsw_sp_port_connector_port(eth_proto_oper); + mlxsw_sp_from_ptys_speed_duplex(netif_carrier_ok(dev), eth_proto_oper, + cmd); + + return 0; +} + +static int +mlxsw_sp_port_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; char ptys_pl[MLXSW_REG_PTYS_LEN]; - u32 speed; - u32 eth_proto_new; - u32 eth_proto_cap; - u32 eth_proto_admin; + u32 eth_proto_cap, eth_proto_new; bool autoneg; int err; - autoneg = cmd->autoneg == AUTONEG_ENABLE; - speed = ethtool_cmd_speed(cmd); - - eth_proto_new = autoneg ? - mlxsw_sp_to_ptys_advert_link(cmd->advertising) : - mlxsw_sp_to_ptys_speed(speed); - mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sp_port->local_port, 0); err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl); - if (err) { - netdev_err(dev, "Failed to get proto"); + if (err) return err; - } - mlxsw_reg_ptys_unpack(ptys_pl, ð_proto_cap, ð_proto_admin, NULL); + mlxsw_reg_ptys_unpack(ptys_pl, ð_proto_cap, NULL, NULL); + + autoneg = cmd->base.autoneg == AUTONEG_ENABLE; + eth_proto_new = autoneg ? + mlxsw_sp_to_ptys_advert_link(cmd) : + mlxsw_sp_to_ptys_speed(cmd->base.speed); eth_proto_new = eth_proto_new & eth_proto_cap; if (!eth_proto_new) { - netdev_err(dev, "Not supported proto admin requested"); + netdev_err(dev, "No supported speed requested\n"); return -EINVAL; } - if (eth_proto_new == eth_proto_admin) - return 0; mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sp_port->local_port, eth_proto_new); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl); - if (err) { - netdev_err(dev, "Failed to set proto admin"); + if (err) return err; - } if (!netif_running(dev)) return 0; mlxsw_sp_port->link.autoneg = autoneg; - err = mlxsw_sp_port_admin_status_set(mlxsw_sp_port, false); - if (err) { - netdev_err(dev, "Failed to set admin status"); - return err; - } - - err = mlxsw_sp_port_admin_status_set(mlxsw_sp_port, true); - if (err) { - netdev_err(dev, "Failed to set admin status"); - return err; - } + mlxsw_sp_port_admin_status_set(mlxsw_sp_port, false); + mlxsw_sp_port_admin_status_set(mlxsw_sp_port, true); return 0; } @@ -1948,8 +1972,8 @@ static const struct ethtool_ops mlxsw_sp_port_ethtool_ops = { .set_phys_id = mlxsw_sp_port_set_phys_id, .get_ethtool_stats = mlxsw_sp_port_get_stats, .get_sset_count = mlxsw_sp_port_get_sset_count, - .get_settings = mlxsw_sp_port_get_settings, - .set_settings = mlxsw_sp_port_set_settings, + .get_link_ksettings = mlxsw_sp_port_get_link_ksettings, + .set_link_ksettings = mlxsw_sp_port_set_link_ksettings, }; static int From b6b5555bc89f52e49244104ca4d7764c7b0f11cd Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 13 Sep 2016 16:25:58 +0200 Subject: [PATCH 0985/1715] cfg80211: disallow shared key authentication with key index 4 Key index 4 can only be used for an IGTK, so the range checks for shared key authentication should treat 4 as an error, fix that in the code. Signed-off-by: Johannes Berg --- net/wireless/mlme.c | 2 +- net/wireless/nl80211.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index c284d883c349..d6abb0704db5 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -222,7 +222,7 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, ASSERT_WDEV_LOCK(wdev); if (auth_type == NL80211_AUTHTYPE_SHARED_KEY) - if (!key || !key_len || key_idx < 0 || key_idx > 4) + if (!key || !key_len || key_idx < 0 || key_idx > 3) return -EINVAL; if (wdev->current_bss && diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 7ebad350abc1..c11c1ef6daa7 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -7388,7 +7388,7 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info) (key.p.cipher != WLAN_CIPHER_SUITE_WEP104 || key.p.key_len != WLAN_KEY_LEN_WEP104)) return -EINVAL; - if (key.idx > 4) + if (key.idx > 3) return -EINVAL; } else { key.p.key_len = 0; From 42ee231cd12c2e1eb015163d04a65950e895d4b7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 13 Sep 2016 15:51:03 +0200 Subject: [PATCH 0986/1715] nl80211: fix connect keys range check Only key index 0-3 should be accepted, 4/5 are for IGTKs and cannot be used as connect keys. Fix the range checking to not allow such erroneous configurations. Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index c11c1ef6daa7..524f5d238c14 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -866,7 +866,7 @@ nl80211_parse_connkeys(struct cfg80211_registered_device *rdev, err = -EINVAL; if (!parse.p.key) goto error; - if (parse.idx < 0 || parse.idx > 4) + if (parse.idx < 0 || parse.idx > 3) goto error; if (parse.def) { if (def) From 386b1f273850380a1887044673922843736c6d0a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 13 Sep 2016 16:10:02 +0200 Subject: [PATCH 0987/1715] nl80211: only allow WEP keys during connect command This was already documented that way in nl80211.h, but the parsing code still accepted other key types. Change it to really only accept WEP keys as documented. Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 524f5d238c14..6cb33ae38ae3 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -881,16 +881,19 @@ nl80211_parse_connkeys(struct cfg80211_registered_device *rdev, parse.idx, false, NULL); if (err) goto error; + if (parse.p.cipher != WLAN_CIPHER_SUITE_WEP40 && + parse.p.cipher != WLAN_CIPHER_SUITE_WEP104) { + err = -EINVAL; + goto error; + } result->params[parse.idx].cipher = parse.p.cipher; result->params[parse.idx].key_len = parse.p.key_len; result->params[parse.idx].key = result->data[parse.idx]; memcpy(result->data[parse.idx], parse.p.key, parse.p.key_len); - if (parse.p.cipher == WLAN_CIPHER_SUITE_WEP40 || - parse.p.cipher == WLAN_CIPHER_SUITE_WEP104) { - if (no_ht) - *no_ht = true; - } + /* must be WEP key if we got here */ + if (no_ht) + *no_ht = true; } return result; From 9381e267b69acfea96c8429dc99da3e78835cef1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 13 Sep 2016 16:11:32 +0200 Subject: [PATCH 0988/1715] cfg80211: wext: only allow WEP keys to be configured before connected When not connected, anything but WEP keys shouldn't be allowed to be configured for later - only static WEP keys make sense at this point. Change wext to reject anything else just like nl80211 does. Signed-off-by: Johannes Berg --- net/wireless/wext-compat.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 9f27221c8913..e45a76449b43 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -487,6 +487,9 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, err = 0; if (wdev->current_bss) err = rdev_add_key(rdev, dev, idx, pairwise, addr, params); + else if (params->cipher != WLAN_CIPHER_SUITE_WEP40 && + params->cipher != WLAN_CIPHER_SUITE_WEP104) + return -EINVAL; if (err) return err; From e9c8f8d3a4d54106a30f2b981b53d658c9bc0c8e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 13 Sep 2016 16:37:40 +0200 Subject: [PATCH 0989/1715] cfg80211: validate key index better Don't accept it if a key_idx < 0 snuck through, reject WEP keys with key index 4 and 5 (which are used for IGTKs) and don't allow IGTKs with key indices other than 4 and 5. This makes the key data match expectations better. Signed-off-by: Johannes Berg --- net/wireless/util.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/wireless/util.c b/net/wireless/util.c index 0675f513e7b9..12e2d3fae843 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -218,7 +218,7 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, struct key_params *params, int key_idx, bool pairwise, const u8 *mac_addr) { - if (key_idx > 5) + if (key_idx < 0 || key_idx > 5) return -EINVAL; if (!pairwise && mac_addr && !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN)) @@ -249,7 +249,13 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, /* Disallow BIP (group-only) cipher as pairwise cipher */ if (pairwise) return -EINVAL; + if (key_idx < 4) + return -EINVAL; break; + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: + if (key_idx > 3) + return -EINVAL; default: break; } From 89b706fb28e431fa7639348536c284fb375eb3c0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 13 Sep 2016 16:39:38 +0200 Subject: [PATCH 0990/1715] cfg80211: reduce connect key caching struct size After the previous patches, connect keys can only (correctly) be used for storing static WEP keys. Therefore, remove all the data for dealing with key index 4/5 and reduce the size of the key material to the maximum for WEP keys. Signed-off-by: Johannes Berg --- net/wireless/core.h | 6 +++--- net/wireless/ibss.c | 6 ++---- net/wireless/nl80211.c | 1 - net/wireless/util.c | 5 +---- net/wireless/wext-compat.c | 6 +++--- net/wireless/wext-sme.c | 3 +-- 6 files changed, 10 insertions(+), 17 deletions(-) diff --git a/net/wireless/core.h b/net/wireless/core.h index eee91443924d..5555e3c13ae9 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -249,9 +249,9 @@ struct cfg80211_event { }; struct cfg80211_cached_keys { - struct key_params params[6]; - u8 data[6][WLAN_MAX_KEY_LEN]; - int def, defmgmt; + struct key_params params[4]; + u8 data[4][WLAN_KEY_LEN_WEP104]; + int def; }; enum cfg80211_chan_mode { diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index 4a4dda53bdf1..896cbb20b6e1 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -284,10 +284,8 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, if (!netif_running(wdev->netdev)) return 0; - if (wdev->wext.keys) { + if (wdev->wext.keys) wdev->wext.keys->def = wdev->wext.default_key; - wdev->wext.keys->defmgmt = wdev->wext.default_mgmt_key; - } wdev->wext.ibss.privacy = wdev->wext.default_key != -1; @@ -295,7 +293,7 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, ck = kmemdup(wdev->wext.keys, sizeof(*ck), GFP_KERNEL); if (!ck) return -ENOMEM; - for (i = 0; i < 6; i++) + for (i = 0; i < 4; i++) ck->params[i].key = ck->data[i]; } err = __cfg80211_join_ibss(rdev, wdev->netdev, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 6cb33ae38ae3..71af96e8a947 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -854,7 +854,6 @@ nl80211_parse_connkeys(struct cfg80211_registered_device *rdev, return ERR_PTR(-ENOMEM); result->def = -1; - result->defmgmt = -1; nla_for_each_nested(key, keys, rem) { memset(&parse, 0, sizeof(parse)); diff --git a/net/wireless/util.c b/net/wireless/util.c index 12e2d3fae843..9e6e2aaa7766 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -912,7 +912,7 @@ void cfg80211_upload_connect_keys(struct wireless_dev *wdev) if (!wdev->connect_keys) return; - for (i = 0; i < 6; i++) { + for (i = 0; i < 4; i++) { if (!wdev->connect_keys->params[i].cipher) continue; if (rdev_add_key(rdev, dev, i, false, NULL, @@ -925,9 +925,6 @@ void cfg80211_upload_connect_keys(struct wireless_dev *wdev) netdev_err(dev, "failed to set defkey %d\n", i); continue; } - if (wdev->connect_keys->defmgmt == i) - if (rdev_set_default_mgmt_key(rdev, dev, i)) - netdev_err(dev, "failed to set mgtdef %d\n", i); } kzfree(wdev->connect_keys); diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index e45a76449b43..7b97d43b27e1 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -408,10 +408,10 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, if (!wdev->wext.keys) { wdev->wext.keys = kzalloc(sizeof(*wdev->wext.keys), - GFP_KERNEL); + GFP_KERNEL); if (!wdev->wext.keys) return -ENOMEM; - for (i = 0; i < 6; i++) + for (i = 0; i < 4; i++) wdev->wext.keys->params[i].key = wdev->wext.keys->data[i]; } @@ -460,7 +460,7 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, if (err == -ENOENT) err = 0; if (!err) { - if (!addr) { + if (!addr && idx < 4) { memset(wdev->wext.keys->data[idx], 0, sizeof(wdev->wext.keys->data[idx])); wdev->wext.keys->params[idx].key_len = 0; diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index a4e8af3321d2..f6523a4387cc 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -35,7 +35,6 @@ int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev, if (wdev->wext.keys) { wdev->wext.keys->def = wdev->wext.default_key; - wdev->wext.keys->defmgmt = wdev->wext.default_mgmt_key; if (wdev->wext.default_key != -1) wdev->wext.connect.privacy = true; } @@ -47,7 +46,7 @@ int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev, ck = kmemdup(wdev->wext.keys, sizeof(*ck), GFP_KERNEL); if (!ck) return -ENOMEM; - for (i = 0; i < 6; i++) + for (i = 0; i < 4; i++) ck->params[i].key = ck->data[i]; } From 08a39685a771b4b1108889ea5e4e0a71b51782ba Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 13 Sep 2016 22:36:21 +0100 Subject: [PATCH 0991/1715] rxrpc: Make sure we initialise the peer hash key Peer records created for incoming connections weren't getting their hash key set. This meant that incoming calls wouldn't see more than one DATA packet - which is not a problem for AFS CM calls with small request data blobs. Signed-off-by: David Howells --- net/rxrpc/peer_object.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 2efe29a4c232..3e6cd174b53d 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -203,6 +203,7 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp) */ static void rxrpc_init_peer(struct rxrpc_peer *peer, unsigned long hash_key) { + peer->hash_key = hash_key; rxrpc_assess_MTU_size(peer); peer->mtu = peer->if_mtu; @@ -238,7 +239,6 @@ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_local *local, peer = rxrpc_alloc_peer(local, gfp); if (peer) { - peer->hash_key = hash_key; memcpy(&peer->srx, srx, sizeof(*srx)); rxrpc_init_peer(peer, hash_key); } From bc4abfcf51835420d61440b2b7aa18181bc1f273 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 13 Sep 2016 22:36:21 +0100 Subject: [PATCH 0992/1715] rxrpc: Add missing wakeup on Tx window rotation We need to wake up the sender when Tx window rotation due to an incoming ACK makes space in the buffer otherwise the sender is liable to just hang endlessly. This problem isn't noticeable if the Tx phase transfers no more than will fit in a single window or the Tx window rotates fast enough that it doesn't get full. Signed-off-by: David Howells --- net/rxrpc/input.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index afeba98004b1..a707d5952164 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -59,6 +59,8 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to) spin_unlock(&call->lock); + wake_up(&call->waitq); + while (list) { skb = list; list = skb->next; From 91c2c7b656a80984362dbcb3d326e4a7274d0607 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 13 Sep 2016 22:36:21 +0100 Subject: [PATCH 0993/1715] rxrpc: The IDLE ACK packet should use rxrpc_idle_ack_delay The IDLE ACK packet should use the rxrpc_idle_ack_delay setting when the timer is set for it. Signed-off-by: David Howells --- net/rxrpc/call_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 2b976e789562..61432049869b 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -95,7 +95,7 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, break; case RXRPC_ACK_IDLE: - if (rxrpc_soft_ack_delay < expiry) + if (rxrpc_idle_ack_delay < expiry) expiry = rxrpc_idle_ack_delay; break; From 33b603fda815faf12f66156a49b510126fac984b Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 13 Sep 2016 22:36:21 +0100 Subject: [PATCH 0994/1715] rxrpc: Requeue call for recvmsg if more data rxrpc_recvmsg() needs to make sure that the call it has just been processing gets requeued for further attention if the buffer has been filled and there's more data to be consumed. The softirq producer only queues the call and wakes the socket if it fills the first slot in the window, so userspace might end up sleeping forever otherwise, despite there being data available. This is not a problem provided the userspace buffer is big enough or it empties the buffer completely before more data comes in. Signed-off-by: David Howells --- net/rxrpc/recvmsg.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 20d0b5c6f81b..16ff56f69256 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -463,6 +463,10 @@ try_again: flags, &copied); if (ret == -EAGAIN) ret = 0; + + if (after(call->rx_top, call->rx_hard_ack) && + call->rxtx_buffer[(call->rx_hard_ack + 1) & RXRPC_RXTX_BUFF_MASK]) + rxrpc_notify_socket(call); break; default: ret = 0; From b25de3605339c94a6c27d42efe8f7748ea206a8b Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 13 Sep 2016 22:36:22 +0100 Subject: [PATCH 0995/1715] rxrpc: Add missing unlock in rxrpc_call_accept() Add a missing unlock in rxrpc_call_accept() in the path taken if there's no call to wake up. Signed-off-by: David Howells --- net/rxrpc/call_accept.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index b8acec0d596e..06e328f6b0f0 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -425,9 +425,11 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx, write_lock(&rx->call_lock); - ret = -ENODATA; - if (list_empty(&rx->to_be_accepted)) - goto out; + if (list_empty(&rx->to_be_accepted)) { + write_unlock(&rx->call_lock); + kleave(" = -ENODATA [empty]"); + return ERR_PTR(-ENODATA); + } /* check the user ID isn't already in use */ pp = &rx->calls.rb_node; From 89a80ed4c09afb2aff6abe32f6dd68605f857a7d Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 13 Sep 2016 22:36:22 +0100 Subject: [PATCH 0996/1715] rxrpc: Use skb->len not skb->data_len skb->len should be used rather than skb->data_len when referring to the amount of data in a packet. This will only cause a malfunction in the following cases: (1) We receive a jumbo packet (validation and splitting both are wrong). (2) We see if there's extra ACK info in an ACK packet (we think it's not there and just ignore it). Signed-off-by: David Howells --- net/rxrpc/input.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index a707d5952164..5958ef8ba2a0 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -127,7 +127,7 @@ static bool rxrpc_validate_jumbo(struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); unsigned int offset = sp->offset; - unsigned int len = skb->data_len; + unsigned int len = skb->len; int nr_jumbo = 1; u8 flags = sp->hdr.flags; @@ -196,7 +196,7 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb, u8 ack = 0, flags, annotation = 0; _enter("{%u,%u},{%u,%u}", - call->rx_hard_ack, call->rx_top, skb->data_len, seq); + call->rx_hard_ack, call->rx_top, skb->len, seq); _proto("Rx DATA %%%u { #%u f=%02x }", sp->hdr.serial, seq, sp->hdr.flags); @@ -233,7 +233,7 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb, next_subpacket: queued = false; ix = seq & RXRPC_RXTX_BUFF_MASK; - len = skb->data_len; + len = skb->len; if (flags & RXRPC_JUMBO_PACKET) len = RXRPC_JUMBO_DATALEN; @@ -444,7 +444,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, } offset = sp->offset + nr_acks + 3; - if (skb->data_len >= offset + sizeof(buf.info)) { + if (skb->len >= offset + sizeof(buf.info)) { if (skb_copy_bits(skb, offset, &buf.info, sizeof(buf.info)) < 0) return rxrpc_proto_abort("XAI", call, 0); rxrpc_input_ackinfo(call, skb, &buf.info); From 01fd0742248cfc99b3b0cba1e09e1c0ecb8658fa Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 13 Sep 2016 10:23:01 +0100 Subject: [PATCH 0997/1715] rxrpc: Allow tx_winsize to grow in response to an ACK Allow tx_winsize to grow when the ACK info packet shows a larger receive window at the other end rather than only permitting it to shrink. Signed-off-by: David Howells --- net/rxrpc/input.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 5958ef8ba2a0..8e529afcd6c1 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -333,14 +333,16 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_peer *peer; unsigned int mtu; + u32 rwind = ntohl(ackinfo->rwind); _proto("Rx ACK %%%u Info { rx=%u max=%u rwin=%u jm=%u }", sp->hdr.serial, ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU), - ntohl(ackinfo->rwind), ntohl(ackinfo->jumbo_max)); + rwind, ntohl(ackinfo->jumbo_max)); - if (call->tx_winsize > ntohl(ackinfo->rwind)) - call->tx_winsize = ntohl(ackinfo->rwind); + if (rwind > RXRPC_RXTX_BUFF_SIZE - 1) + rwind = RXRPC_RXTX_BUFF_SIZE - 1; + call->tx_winsize = rwind; mtu = min(ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU)); From cbd00891de9bb4756bac6f6edfa945d5a6468977 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 13 Sep 2016 09:12:34 +0100 Subject: [PATCH 0998/1715] rxrpc: Adjust the call ref tracepoint to show kernel API refs Adjust the call ref tracepoint to show references held on a call by the kernel API separately as much as possible and add an additional trace to at the allocation point from the preallocation buffer for an incoming call. Note that this doesn't show the allocation of a client call for the kernel separately at the moment. Signed-off-by: David Howells --- net/rxrpc/af_rxrpc.c | 2 +- net/rxrpc/ar-internal.h | 2 ++ net/rxrpc/call_accept.c | 3 ++- net/rxrpc/call_object.c | 2 ++ 4 files changed, 7 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index caa226dd436e..25d00ded24bc 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -299,7 +299,7 @@ void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call) { _enter("%d{%d}", call->debug_id, atomic_read(&call->usage)); rxrpc_release_call(rxrpc_sk(sock->sk), call); - rxrpc_put_call(call, rxrpc_call_put); + rxrpc_put_call(call, rxrpc_call_put_kernel); } EXPORT_SYMBOL(rxrpc_kernel_end_call); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index b1cb79ec4e96..47c74a581a0f 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -540,8 +540,10 @@ enum rxrpc_call_trace { rxrpc_call_seen, rxrpc_call_got, rxrpc_call_got_userid, + rxrpc_call_got_kernel, rxrpc_call_put, rxrpc_call_put_userid, + rxrpc_call_put_kernel, rxrpc_call_put_noqueue, rxrpc_call__nr_trace }; diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 06e328f6b0f0..5fd9d2c89b7f 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -121,7 +121,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, call->user_call_ID = user_call_ID; call->notify_rx = notify_rx; - rxrpc_get_call(call, rxrpc_call_got); + rxrpc_get_call(call, rxrpc_call_got_kernel); user_attach_call(call, user_call_ID); rxrpc_get_call(call, rxrpc_call_got_userid); rb_link_node(&call->sock_node, parent, pp); @@ -300,6 +300,7 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, smp_store_release(&b->call_backlog_tail, (call_tail + 1) & (RXRPC_BACKLOG_MAX - 1)); + rxrpc_see_call(call); call->conn = conn; call->peer = rxrpc_get_peer(conn->params.peer); return call; diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 18ab13f82f6e..3f9476508204 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -56,8 +56,10 @@ const char rxrpc_call_traces[rxrpc_call__nr_trace][4] = { [rxrpc_call_seen] = "SEE", [rxrpc_call_got] = "GOT", [rxrpc_call_got_userid] = "Gus", + [rxrpc_call_got_kernel] = "Gke", [rxrpc_call_put] = "PUT", [rxrpc_call_put_userid] = "Pus", + [rxrpc_call_put_kernel] = "Pke", [rxrpc_call_put_noqueue] = "PNQ", }; From 3432a757b1f889f8c0d33cd9fcabdae172ed812b Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 13 Sep 2016 09:05:14 +0100 Subject: [PATCH 0999/1715] rxrpc: Fix prealloc refcounting The preallocated call buffer holds a ref on the calls within that buffer. The ref was being released in the wrong place - it worked okay for incoming calls to the AFS cache manager service, but doesn't work right for incoming calls to a userspace service. Instead of releasing an extra ref service calls in rxrpc_release_call(), the ref needs to be released during the acceptance/rejectance process. To this end: (1) The prealloc ref is now normally released during rxrpc_new_incoming_call(). (2) For preallocated kernel API calls, the kernel API's ref needs to be released when the call is discarded on socket close. (3) We shouldn't take a second ref in rxrpc_accept_call(). (4) rxrpc_recvmsg_new_call() needs to get a ref of its own when it adds the call to the to_be_accepted socket queue. In doing (4) above, we would prefer not to put the call's refcount down to 0 as that entails doing cleanup in softirq context, but it's unlikely as there are several refs held elsewhere, at least one of which must be put by someone in process context calling rxrpc_release_call(). However, it's not a problem if we do have to do that. Signed-off-by: David Howells --- net/rxrpc/call_accept.c | 9 ++++++++- net/rxrpc/call_object.c | 3 --- net/rxrpc/recvmsg.c | 1 + 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 5fd9d2c89b7f..26c293ef98eb 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -221,6 +221,7 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx) if (rx->discard_new_call) { _debug("discard %lx", call->user_call_ID); rx->discard_new_call(call, call->user_call_ID); + rxrpc_put_call(call, rxrpc_call_put_kernel); } rxrpc_call_completed(call); rxrpc_release_call(rx, call); @@ -402,6 +403,13 @@ found_service: if (call->state == RXRPC_CALL_SERVER_ACCEPTING) rxrpc_notify_socket(call); + /* We have to discard the prealloc queue's ref here and rely on a + * combination of the RCU read lock and refs held either by the socket + * (recvmsg queue, to-be-accepted queue or user ID tree) or the kernel + * service to prevent the call from being deallocated too early. + */ + rxrpc_put_call(call, rxrpc_call_put); + _leave(" = %p{%d}", call, call->debug_id); out: spin_unlock(&rx->incoming_lock); @@ -469,7 +477,6 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx, } /* formalise the acceptance */ - rxrpc_get_call(call, rxrpc_call_got); call->notify_rx = notify_rx; call->user_call_ID = user_call_ID; rxrpc_get_call(call, rxrpc_call_got_userid); diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 3f9476508204..9aa1c4b53563 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -464,9 +464,6 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) call->rxtx_buffer[i] = NULL; } - /* We have to release the prealloc backlog ref */ - if (rxrpc_is_service_call(call)) - rxrpc_put_call(call, rxrpc_call_put); _leave(""); } diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 16ff56f69256..a284205b8ecf 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -118,6 +118,7 @@ static int rxrpc_recvmsg_new_call(struct rxrpc_sock *rx, list_del_init(&call->recvmsg_link); write_unlock_bh(&rx->recvmsg_lock); + rxrpc_get_call(call, rxrpc_call_got); write_lock(&rx->call_lock); list_add_tail(&call->accept_link, &rx->to_be_accepted); write_unlock(&rx->call_lock); From 75e42126399220069ada0ca0e93237993c6afccf Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 13 Sep 2016 22:36:22 +0100 Subject: [PATCH 1000/1715] rxrpc: Correctly initialise, limit and transmit call->rx_winsize call->rx_winsize should be initialised to the sysctl setting and the sysctl setting should be limited to the maximum we want to permit. Further, we need to place this in the ACK info instead of the sysctl setting. Furthermore, discard the idea of accepting the subpackets of a jumbo packet that lie beyond the receive window when the first packet of the jumbo is within the window. Just discard the excess subpackets instead. This allows the receive window to be opened up right to the buffer size less one for the dead slot. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 3 ++- net/rxrpc/call_object.c | 2 +- net/rxrpc/input.c | 23 ++++++++++++++++------- net/rxrpc/misc.c | 5 ++++- net/rxrpc/output.c | 4 ++-- net/rxrpc/sysctl.c | 2 +- 6 files changed, 26 insertions(+), 13 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 47c74a581a0f..e78c40b37db5 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -498,6 +498,7 @@ struct rxrpc_call { */ #define RXRPC_RXTX_BUFF_SIZE 64 #define RXRPC_RXTX_BUFF_MASK (RXRPC_RXTX_BUFF_SIZE - 1) +#define RXRPC_INIT_RX_WINDOW_SIZE 32 struct sk_buff **rxtx_buffer; u8 *rxtx_annotations; #define RXRPC_TX_ANNO_ACK 0 @@ -518,7 +519,7 @@ struct rxrpc_call { rxrpc_seq_t rx_expect_next; /* Expected next packet sequence number */ u8 rx_winsize; /* Size of Rx window */ u8 tx_winsize; /* Maximum size of Tx window */ - u8 nr_jumbo_dup; /* Number of jumbo duplicates */ + u8 nr_jumbo_bad; /* Number of jumbo dups/exceeds-windows */ /* receive-phase ACK management */ u8 ackr_reason; /* reason to ACK */ diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 9aa1c4b53563..22f9b0d1a138 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -152,7 +152,7 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) memset(&call->sock_node, 0xed, sizeof(call->sock_node)); /* Leave space in the ring to handle a maxed-out jumbo packet */ - call->rx_winsize = RXRPC_RXTX_BUFF_SIZE - 1 - 46; + call->rx_winsize = rxrpc_rx_window_size; call->tx_winsize = 16; call->rx_expect_next = 1; return call; diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 8e529afcd6c1..75af0bd316c7 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -164,7 +164,7 @@ protocol_error: * (that information is encoded in the ACK packet). */ static void rxrpc_input_dup_data(struct rxrpc_call *call, rxrpc_seq_t seq, - u8 annotation, bool *_jumbo_dup) + u8 annotation, bool *_jumbo_bad) { /* Discard normal packets that are duplicates. */ if (annotation == 0) @@ -174,9 +174,9 @@ static void rxrpc_input_dup_data(struct rxrpc_call *call, rxrpc_seq_t seq, * more partially duplicate jumbo packets, we refuse to take any more * jumbos for this call. */ - if (!*_jumbo_dup) { - call->nr_jumbo_dup++; - *_jumbo_dup = true; + if (!*_jumbo_bad) { + call->nr_jumbo_bad++; + *_jumbo_bad = true; } } @@ -191,7 +191,7 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb, unsigned int ix; rxrpc_serial_t serial = sp->hdr.serial, ack_serial = 0; rxrpc_seq_t seq = sp->hdr.seq, hard_ack; - bool immediate_ack = false, jumbo_dup = false, queued; + bool immediate_ack = false, jumbo_bad = false, queued; u16 len; u8 ack = 0, flags, annotation = 0; @@ -222,7 +222,7 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb, flags = sp->hdr.flags; if (flags & RXRPC_JUMBO_PACKET) { - if (call->nr_jumbo_dup > 3) { + if (call->nr_jumbo_bad > 3) { ack = RXRPC_ACK_NOSPACE; ack_serial = serial; goto ack; @@ -259,7 +259,7 @@ next_subpacket: } if (call->rxtx_buffer[ix]) { - rxrpc_input_dup_data(call, seq, annotation, &jumbo_dup); + rxrpc_input_dup_data(call, seq, annotation, &jumbo_bad); if (ack != RXRPC_ACK_DUPLICATE) { ack = RXRPC_ACK_DUPLICATE; ack_serial = serial; @@ -304,6 +304,15 @@ skip: annotation++; if (flags & RXRPC_JUMBO_PACKET) annotation |= RXRPC_RX_ANNO_JLAST; + if (after(seq, hard_ack + call->rx_winsize)) { + ack = RXRPC_ACK_EXCEEDS_WINDOW; + ack_serial = serial; + if (!jumbo_bad) { + call->nr_jumbo_bad++; + jumbo_bad = true; + } + goto ack; + } _proto("Rx DATA Jumbo %%%u", serial); goto next_subpacket; diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index fd096f742e4b..8b910780f1ac 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -50,7 +50,10 @@ unsigned int rxrpc_idle_ack_delay = 0.5 * HZ; * limit is hit, we should generate an EXCEEDS_WINDOW ACK and discard further * packets. */ -unsigned int rxrpc_rx_window_size = RXRPC_RXTX_BUFF_SIZE - 46; +unsigned int rxrpc_rx_window_size = RXRPC_INIT_RX_WINDOW_SIZE; +#if (RXRPC_RXTX_BUFF_SIZE - 1) < RXRPC_INIT_RX_WINDOW_SIZE +#error Need to reduce RXRPC_INIT_RX_WINDOW_SIZE +#endif /* * Maximum Rx MTU size. This indicates to the sender the size of jumbo packet diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 719a4c23f09d..90c7722d5779 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -71,10 +71,10 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, mtu = call->conn->params.peer->if_mtu; mtu -= call->conn->params.peer->hdrsize; - jmax = (call->nr_jumbo_dup > 3) ? 1 : rxrpc_rx_jumbo_max; + jmax = (call->nr_jumbo_bad > 3) ? 1 : rxrpc_rx_jumbo_max; pkt->ackinfo.rxMTU = htonl(rxrpc_rx_mtu); pkt->ackinfo.maxMTU = htonl(mtu); - pkt->ackinfo.rwind = htonl(rxrpc_rx_window_size); + pkt->ackinfo.rwind = htonl(call->rx_winsize); pkt->ackinfo.jumbo_max = htonl(jmax); *ackp++ = 0; diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c index b7ca8cf13c84..a03c61c672f5 100644 --- a/net/rxrpc/sysctl.c +++ b/net/rxrpc/sysctl.c @@ -20,7 +20,7 @@ static const unsigned int one = 1; static const unsigned int four = 4; static const unsigned int thirtytwo = 32; static const unsigned int n_65535 = 65535; -static const unsigned int n_max_acks = RXRPC_MAXACKS; +static const unsigned int n_max_acks = RXRPC_RXTX_BUFF_SIZE - 1; /* * RxRPC operating parameters. From cd5892c756f51ed6ff18ff49c837d219bfd9bb5d Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 13 Sep 2016 08:49:05 +0100 Subject: [PATCH 1001/1715] rxrpc: Create an address for sendmsg() to bind unbound socket with Create an address for sendmsg() to bind unbound socket with rather than using a completely blank address otherwise the transport socket creation will fail because it will try to use address family 0. We use the address family specified in the protocol argument when the AF_RXRPC socket was created and SOCK_DGRAM as the default. For anything else, bind() must be used. Signed-off-by: David Howells --- net/rxrpc/af_rxrpc.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 25d00ded24bc..741b0d8d2e8c 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -401,6 +401,18 @@ static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len) switch (rx->sk.sk_state) { case RXRPC_UNBOUND: + rx->srx.srx_family = AF_RXRPC; + rx->srx.srx_service = 0; + rx->srx.transport_type = SOCK_DGRAM; + rx->srx.transport.family = rx->family; + switch (rx->family) { + case AF_INET: + rx->srx.transport_len = sizeof(struct sockaddr_in); + break; + default: + ret = -EAFNOSUPPORT; + goto error_unlock; + } local = rxrpc_lookup_local(&rx->srx); if (IS_ERR(local)) { ret = PTR_ERR(local); From aaa31cbc66733386406464ec6c5c0889d9968a95 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 13 Sep 2016 08:49:05 +0100 Subject: [PATCH 1002/1715] rxrpc: Don't specify protocol to when creating transport socket Pass 0 as the protocol argument when creating the transport socket rather than IPPROTO_UDP. Signed-off-by: David Howells --- net/rxrpc/local_object.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 782b9adf67cb..8720be2a6250 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -103,8 +103,8 @@ static int rxrpc_open_socket(struct rxrpc_local *local) _enter("%p{%d}", local, local->srx.transport_type); /* create a socket to represent the local endpoint */ - ret = sock_create_kern(&init_net, PF_INET, local->srx.transport_type, - IPPROTO_UDP, &local->socket); + ret = sock_create_kern(&init_net, local->srx.transport.family, + local->srx.transport_type, 0, &local->socket); if (ret < 0) { _leave(" = %d [socket]", ret); return ret; From 1c2bc7b948a2adee0d3e070f4ce14645efa0a2d2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 13 Sep 2016 08:49:05 +0100 Subject: [PATCH 1003/1715] rxrpc: Use rxrpc_extract_addr_from_skb() rather than doing this manually There are two places that want to transmit a packet in response to one just received and manually pick the address to reply to out of the sk_buff. Make them use rxrpc_extract_addr_from_skb() instead so that IPv6 is handled automatically. Signed-off-by: David Howells --- net/rxrpc/local_event.c | 13 +++++-------- net/rxrpc/output.c | 32 ++++++-------------------------- 2 files changed, 11 insertions(+), 34 deletions(-) diff --git a/net/rxrpc/local_event.c b/net/rxrpc/local_event.c index cdd58e6e9fbd..f073e932500e 100644 --- a/net/rxrpc/local_event.c +++ b/net/rxrpc/local_event.c @@ -15,8 +15,6 @@ #include #include #include -#include -#include #include #include #include @@ -33,7 +31,7 @@ static void rxrpc_send_version_request(struct rxrpc_local *local, { struct rxrpc_wire_header whdr; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - struct sockaddr_in sin; + struct sockaddr_rxrpc srx; struct msghdr msg; struct kvec iov[2]; size_t len; @@ -41,12 +39,11 @@ static void rxrpc_send_version_request(struct rxrpc_local *local, _enter(""); - sin.sin_family = AF_INET; - sin.sin_port = udp_hdr(skb)->source; - sin.sin_addr.s_addr = ip_hdr(skb)->saddr; + if (rxrpc_extract_addr_from_skb(&srx, skb) < 0) + return; - msg.msg_name = &sin; - msg.msg_namelen = sizeof(sin); + msg.msg_name = &srx.transport; + msg.msg_namelen = srx.transport_len; msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = 0; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 90c7722d5779..ec3621f2c5c8 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -15,8 +15,6 @@ #include #include #include -#include -#include #include #include #include "ar-internal.h" @@ -272,10 +270,7 @@ send_fragmentable: */ void rxrpc_reject_packets(struct rxrpc_local *local) { - union { - struct sockaddr sa; - struct sockaddr_in sin; - } sa; + struct sockaddr_rxrpc srx; struct rxrpc_skb_priv *sp; struct rxrpc_wire_header whdr; struct sk_buff *skb; @@ -292,32 +287,21 @@ void rxrpc_reject_packets(struct rxrpc_local *local) iov[1].iov_len = sizeof(code); size = sizeof(whdr) + sizeof(code); - msg.msg_name = &sa; + msg.msg_name = &srx.transport; msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = 0; - memset(&sa, 0, sizeof(sa)); - sa.sa.sa_family = local->srx.transport.family; - switch (sa.sa.sa_family) { - case AF_INET: - msg.msg_namelen = sizeof(sa.sin); - break; - default: - msg.msg_namelen = 0; - break; - } - memset(&whdr, 0, sizeof(whdr)); whdr.type = RXRPC_PACKET_TYPE_ABORT; while ((skb = skb_dequeue(&local->reject_queue))) { rxrpc_see_skb(skb); sp = rxrpc_skb(skb); - switch (sa.sa.sa_family) { - case AF_INET: - sa.sin.sin_port = udp_hdr(skb)->source; - sa.sin.sin_addr.s_addr = ip_hdr(skb)->saddr; + + if (rxrpc_extract_addr_from_skb(&srx, skb) == 0) { + msg.msg_namelen = srx.transport_len; + code = htonl(skb->priority); whdr.epoch = htonl(sp->hdr.epoch); @@ -329,10 +313,6 @@ void rxrpc_reject_packets(struct rxrpc_local *local) whdr.flags &= RXRPC_CLIENT_INITIATED; kernel_sendmsg(local->socket, &msg, iov, 2, size); - break; - - default: - break; } rxrpc_free_skb(skb); From 75b54cb57ca34cbe7a87c6ac757c55360a624590 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 13 Sep 2016 08:49:05 +0100 Subject: [PATCH 1004/1715] rxrpc: Add IPv6 support Add IPv6 support to AF_RXRPC. With this, AF_RXRPC sockets can be created: service = socket(AF_RXRPC, SOCK_DGRAM, PF_INET6); instead of: service = socket(AF_RXRPC, SOCK_DGRAM, PF_INET); The AFS filesystem doesn't support IPv6 at the moment, though, since that requires upgrades to some of the RPC calls. Note that a good portion of this patch is replacing "%pI4:%u" in print statements with "%pISpc" which is able to handle both protocols and print the port. Signed-off-by: David Howells --- net/rxrpc/af_rxrpc.c | 15 ++++-- net/rxrpc/conn_object.c | 8 +++ net/rxrpc/local_object.c | 35 ++++++------- net/rxrpc/output.c | 16 ++++++ net/rxrpc/peer_event.c | 24 +++++++++ net/rxrpc/peer_object.c | 109 ++++++++++++++++++++++++--------------- net/rxrpc/proc.c | 30 +++++------ 7 files changed, 154 insertions(+), 83 deletions(-) diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 741b0d8d2e8c..f61f7b2d1ca4 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -106,19 +106,23 @@ static int rxrpc_validate_address(struct rxrpc_sock *rx, case AF_INET: if (srx->transport_len < sizeof(struct sockaddr_in)) return -EINVAL; - _debug("INET: %x @ %pI4", - ntohs(srx->transport.sin.sin_port), - &srx->transport.sin.sin_addr); tail = offsetof(struct sockaddr_rxrpc, transport.sin.__pad); break; case AF_INET6: + if (srx->transport_len < sizeof(struct sockaddr_in6)) + return -EINVAL; + tail = offsetof(struct sockaddr_rxrpc, transport) + + sizeof(struct sockaddr_in6); + break; + default: return -EAFNOSUPPORT; } if (tail < len) memset((void *)srx + tail, 0, len - tail); + _debug("INET: %pISp", &srx->transport); return 0; } @@ -409,6 +413,9 @@ static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len) case AF_INET: rx->srx.transport_len = sizeof(struct sockaddr_in); break; + case AF_INET6: + rx->srx.transport_len = sizeof(struct sockaddr_in6); + break; default: ret = -EAFNOSUPPORT; goto error_unlock; @@ -563,7 +570,7 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol, return -EAFNOSUPPORT; /* we support transport protocol UDP/UDP6 only */ - if (protocol != PF_INET) + if (protocol != PF_INET && protocol != PF_INET6) return -EPROTONOSUPPORT; if (sock->type != SOCK_DGRAM) diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index ffa9addb97b2..c0ddba787fd4 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -134,6 +134,14 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local, srx.transport.sin.sin_addr.s_addr) goto not_found; break; + case AF_INET6: + if (peer->srx.transport.sin6.sin6_port != + srx.transport.sin6.sin6_port || + memcmp(&peer->srx.transport.sin6.sin6_addr, + &srx.transport.sin6.sin6_addr, + sizeof(struct in6_addr)) != 0) + goto not_found; + break; default: BUG(); } diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 8720be2a6250..f5b9bb0d3f98 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -58,6 +58,15 @@ static long rxrpc_local_cmp_key(const struct rxrpc_local *local, memcmp(&local->srx.transport.sin.sin_addr, &srx->transport.sin.sin_addr, sizeof(struct in_addr)); + case AF_INET6: + /* If the choice of UDP6 port is left up to the transport, then + * the endpoint record doesn't match. + */ + return ((u16 __force)local->srx.transport.sin6.sin6_port - + (u16 __force)srx->transport.sin6.sin6_port) ?: + memcmp(&local->srx.transport.sin6.sin6_addr, + &srx->transport.sin6.sin6_addr, + sizeof(struct in6_addr)); default: BUG(); } @@ -100,7 +109,8 @@ static int rxrpc_open_socket(struct rxrpc_local *local) struct sock *sock; int ret, opt; - _enter("%p{%d}", local, local->srx.transport_type); + _enter("%p{%d,%d}", + local, local->srx.transport_type, local->srx.transport.family); /* create a socket to represent the local endpoint */ ret = sock_create_kern(&init_net, local->srx.transport.family, @@ -169,18 +179,8 @@ struct rxrpc_local *rxrpc_lookup_local(const struct sockaddr_rxrpc *srx) long diff; int ret; - if (srx->transport.family == AF_INET) { - _enter("{%d,%u,%pI4+%hu}", - srx->transport_type, - srx->transport.family, - &srx->transport.sin.sin_addr, - ntohs(srx->transport.sin.sin_port)); - } else { - _enter("{%d,%u}", - srx->transport_type, - srx->transport.family); - return ERR_PTR(-EAFNOSUPPORT); - } + _enter("{%d,%d,%pISp}", + srx->transport_type, srx->transport.family, &srx->transport); mutex_lock(&rxrpc_local_mutex); @@ -233,13 +233,8 @@ struct rxrpc_local *rxrpc_lookup_local(const struct sockaddr_rxrpc *srx) found: mutex_unlock(&rxrpc_local_mutex); - _net("LOCAL %s %d {%d,%u,%pI4+%hu}", - age, - local->debug_id, - local->srx.transport_type, - local->srx.transport.family, - &local->srx.transport.sin.sin_addr, - ntohs(local->srx.transport.sin.sin_port)); + _net("LOCAL %s %d {%pISp}", + age, local->debug_id, &local->srx.transport); _leave(" = %p", local); return local; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index ec3621f2c5c8..d7cd87f17f0d 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -258,6 +258,22 @@ send_fragmentable: (char *)&opt, sizeof(opt)); } break; + + case AF_INET6: + opt = IPV6_PMTUDISC_DONT; + ret = kernel_setsockopt(conn->params.local->socket, + SOL_IPV6, IPV6_MTU_DISCOVER, + (char *)&opt, sizeof(opt)); + if (ret == 0) { + ret = kernel_sendmsg(conn->params.local->socket, &msg, + iov, 1, iov[0].iov_len); + + opt = IPV6_PMTUDISC_DO; + kernel_setsockopt(conn->params.local->socket, + SOL_IPV6, IPV6_MTU_DISCOVER, + (char *)&opt, sizeof(opt)); + } + break; } up_write(&conn->params.local->defrag_sem); diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index c8948936c6fc..74217589cf44 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -66,6 +66,30 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local, } break; + case AF_INET6: + srx.transport.sin6.sin6_port = serr->port; + srx.transport_len = sizeof(struct sockaddr_in6); + switch (serr->ee.ee_origin) { + case SO_EE_ORIGIN_ICMP6: + _net("Rx ICMP6"); + memcpy(&srx.transport.sin6.sin6_addr, + skb_network_header(skb) + serr->addr_offset, + sizeof(struct in6_addr)); + break; + case SO_EE_ORIGIN_ICMP: + _net("Rx ICMP on v6 sock"); + memcpy(&srx.transport.sin6.sin6_addr.s6_addr + 12, + skb_network_header(skb) + serr->addr_offset, + sizeof(struct in_addr)); + break; + default: + memcpy(&srx.transport.sin6.sin6_addr, + &ipv6_hdr(skb)->saddr, + sizeof(struct in6_addr)); + break; + } + break; + default: BUG(); } diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 3e6cd174b53d..dfc07b41a472 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -16,12 +16,14 @@ #include #include #include +#include #include #include #include #include #include #include +#include #include "ar-internal.h" static DEFINE_HASHTABLE(rxrpc_peer_hash, 10); @@ -50,6 +52,11 @@ static unsigned long rxrpc_peer_hash_key(struct rxrpc_local *local, size = sizeof(srx->transport.sin.sin_addr); p = (u16 *)&srx->transport.sin.sin_addr; break; + case AF_INET6: + hash_key += (u16 __force)srx->transport.sin.sin_port; + size = sizeof(srx->transport.sin6.sin6_addr); + p = (u16 *)&srx->transport.sin6.sin6_addr; + break; default: WARN(1, "AF_RXRPC: Unsupported transport address family\n"); return 0; @@ -93,6 +100,12 @@ static long rxrpc_peer_cmp_key(const struct rxrpc_peer *peer, memcmp(&peer->srx.transport.sin.sin_addr, &srx->transport.sin.sin_addr, sizeof(struct in_addr)); + case AF_INET6: + return ((u16 __force)peer->srx.transport.sin6.sin6_port - + (u16 __force)srx->transport.sin6.sin6_port) ?: + memcmp(&peer->srx.transport.sin6.sin6_addr, + &srx->transport.sin6.sin6_addr, + sizeof(struct in6_addr)); default: BUG(); } @@ -130,17 +143,7 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *local, peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key); if (peer) { - switch (srx->transport.family) { - case AF_INET: - _net("PEER %d {%d,%u,%pI4+%hu}", - peer->debug_id, - peer->srx.transport_type, - peer->srx.transport.family, - &peer->srx.transport.sin.sin_addr, - ntohs(peer->srx.transport.sin.sin_port)); - break; - } - + _net("PEER %d {%pISp}", peer->debug_id, &peer->srx.transport); _leave(" = %p {u=%d}", peer, atomic_read(&peer->usage)); } return peer; @@ -152,22 +155,49 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *local, */ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer) { + struct dst_entry *dst; struct rtable *rt; - struct flowi4 fl4; + struct flowi fl; + struct flowi4 *fl4 = &fl.u.ip4; + struct flowi6 *fl6 = &fl.u.ip6; peer->if_mtu = 1500; - rt = ip_route_output_ports(&init_net, &fl4, NULL, - peer->srx.transport.sin.sin_addr.s_addr, 0, - htons(7000), htons(7001), - IPPROTO_UDP, 0, 0); - if (IS_ERR(rt)) { - _leave(" [route err %ld]", PTR_ERR(rt)); - return; + memset(&fl, 0, sizeof(fl)); + switch (peer->srx.transport.family) { + case AF_INET: + rt = ip_route_output_ports( + &init_net, fl4, NULL, + peer->srx.transport.sin.sin_addr.s_addr, 0, + htons(7000), htons(7001), IPPROTO_UDP, 0, 0); + if (IS_ERR(rt)) { + _leave(" [route err %ld]", PTR_ERR(rt)); + return; + } + dst = &rt->dst; + break; + + case AF_INET6: + fl6->flowi6_iif = LOOPBACK_IFINDEX; + fl6->flowi6_scope = RT_SCOPE_UNIVERSE; + fl6->flowi6_proto = IPPROTO_UDP; + memcpy(&fl6->daddr, &peer->srx.transport.sin6.sin6_addr, + sizeof(struct in6_addr)); + fl6->fl6_dport = htons(7001); + fl6->fl6_sport = htons(7000); + dst = ip6_route_output(&init_net, NULL, fl6); + if (IS_ERR(dst)) { + _leave(" [route err %ld]", PTR_ERR(dst)); + return; + } + break; + + default: + BUG(); } - peer->if_mtu = dst_mtu(&rt->dst); - dst_release(&rt->dst); + peer->if_mtu = dst_mtu(dst); + dst_release(dst); _leave(" [if_mtu %u]", peer->if_mtu); } @@ -207,17 +237,22 @@ static void rxrpc_init_peer(struct rxrpc_peer *peer, unsigned long hash_key) rxrpc_assess_MTU_size(peer); peer->mtu = peer->if_mtu; - if (peer->srx.transport.family == AF_INET) { + switch (peer->srx.transport.family) { + case AF_INET: peer->hdrsize = sizeof(struct iphdr); - switch (peer->srx.transport_type) { - case SOCK_DGRAM: - peer->hdrsize += sizeof(struct udphdr); - break; - default: - BUG(); - break; - } - } else { + break; + case AF_INET6: + peer->hdrsize = sizeof(struct ipv6hdr); + break; + default: + BUG(); + } + + switch (peer->srx.transport_type) { + case SOCK_DGRAM: + peer->hdrsize += sizeof(struct udphdr); + break; + default: BUG(); } @@ -285,11 +320,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local, struct rxrpc_peer *peer, *candidate; unsigned long hash_key = rxrpc_peer_hash_key(local, srx); - _enter("{%d,%d,%pI4+%hu}", - srx->transport_type, - srx->transport_len, - &srx->transport.sin.sin_addr, - ntohs(srx->transport.sin.sin_port)); + _enter("{%pISp}", &srx->transport); /* search the peer list first */ rcu_read_lock(); @@ -326,11 +357,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local, peer = candidate; } - _net("PEER %d {%d,%pI4+%hu}", - peer->debug_id, - peer->srx.transport_type, - &peer->srx.transport.sin.sin_addr, - ntohs(peer->srx.transport.sin.sin_port)); + _net("PEER %d {%pISp}", peer->debug_id, &peer->srx.transport); _leave(" = %p {u=%d}", peer, atomic_read(&peer->usage)); return peer; diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index d529d1b4021c..65cd980767fa 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -52,11 +52,12 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) struct rxrpc_sock *rx; struct rxrpc_peer *peer; struct rxrpc_call *call; - char lbuff[4 + 4 + 4 + 4 + 5 + 1], rbuff[4 + 4 + 4 + 4 + 5 + 1]; + char lbuff[50], rbuff[50]; if (v == &rxrpc_calls) { seq_puts(seq, - "Proto Local Remote " + "Proto Local " + " Remote " " SvID ConnID CallID End Use State Abort " " UserID\n"); return 0; @@ -68,9 +69,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) if (rx) { local = READ_ONCE(rx->local); if (local) - sprintf(lbuff, "%pI4:%u", - &local->srx.transport.sin.sin_addr, - ntohs(local->srx.transport.sin.sin_port)); + sprintf(lbuff, "%pISpc", &local->srx.transport); else strcpy(lbuff, "no_local"); } else { @@ -79,14 +78,12 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) peer = call->peer; if (peer) - sprintf(rbuff, "%pI4:%u", - &peer->srx.transport.sin.sin_addr, - ntohs(peer->srx.transport.sin.sin_port)); + sprintf(rbuff, "%pISpc", &peer->srx.transport); else strcpy(rbuff, "no_connection"); seq_printf(seq, - "UDP %-22.22s %-22.22s %4x %08x %08x %s %3u" + "UDP %-47.47s %-47.47s %4x %08x %08x %s %3u" " %-8.8s %08x %lx\n", lbuff, rbuff, @@ -145,11 +142,12 @@ static void rxrpc_connection_seq_stop(struct seq_file *seq, void *v) static int rxrpc_connection_seq_show(struct seq_file *seq, void *v) { struct rxrpc_connection *conn; - char lbuff[4 + 4 + 4 + 4 + 5 + 1], rbuff[4 + 4 + 4 + 4 + 5 + 1]; + char lbuff[50], rbuff[50]; if (v == &rxrpc_connection_proc_list) { seq_puts(seq, - "Proto Local Remote " + "Proto Local " + " Remote " " SvID ConnID End Use State Key " " Serial ISerial\n" ); @@ -163,16 +161,12 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v) goto print; } - sprintf(lbuff, "%pI4:%u", - &conn->params.local->srx.transport.sin.sin_addr, - ntohs(conn->params.local->srx.transport.sin.sin_port)); + sprintf(lbuff, "%pISpc", &conn->params.local->srx.transport); - sprintf(rbuff, "%pI4:%u", - &conn->params.peer->srx.transport.sin.sin_addr, - ntohs(conn->params.peer->srx.transport.sin.sin_port)); + sprintf(rbuff, "%pISpc", &conn->params.peer->srx.transport); print: seq_printf(seq, - "UDP %-22.22s %-22.22s %4x %08x %s %3u" + "UDP %-47.47s %-47.47s %4x %08x %s %3u" " %s %08x %08x %08x\n", lbuff, rbuff, From 6b03144d93fc7de7ef03334384fea0fab058fa6d Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Fri, 9 Sep 2016 16:27:58 +0530 Subject: [PATCH 1005/1715] mwifiex: handle error if IRQ request fails in mwifiex_sdio_of() When this failure occurs, we will clear card->plt_wake_cfg so that device would initialize without wake up on external interrupt feature. This feature specific code in suspend and resume handlers will be skipped. Signed-off-by: Amitkumar Karwar Reviewed-by: Javier Martinez Canillas Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/sdio.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/marvell/mwifiex/sdio.c b/drivers/net/wireless/marvell/mwifiex/sdio.c index 6dba40998a66..8718950004f3 100644 --- a/drivers/net/wireless/marvell/mwifiex/sdio.c +++ b/drivers/net/wireless/marvell/mwifiex/sdio.c @@ -122,9 +122,11 @@ static int mwifiex_sdio_probe_of(struct device *dev, struct sdio_mmc_card *card) IRQF_TRIGGER_LOW, "wifi_wake", cfg); if (ret) { - dev_err(dev, + dev_dbg(dev, "Failed to request irq_wifi %d (%d)\n", cfg->irq_wifi, ret); + card->plt_wake_cfg = NULL; + return 0; } disable_irq(cfg->irq_wifi); } From ae1799a1cb130170c3ba3370793cea5b0d9d2aa8 Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Fri, 9 Sep 2016 20:26:19 +0530 Subject: [PATCH 1006/1715] mwifiex: correction in Rx STBC field of htcapinfo Currently Rx STBC in assoc request frame is advertised as 3. It should be 2, as our chipsets support two spatial streams. Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/cfg80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index 0a03d3f90b56..c7f2faa81921 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -2732,7 +2732,7 @@ mwifiex_setup_ht_caps(struct ieee80211_sta_ht_cap *ht_info, ht_info->cap &= ~IEEE80211_HT_CAP_SGI_40; if (adapter->user_dev_mcs_support == HT_STREAM_2X2) - ht_info->cap |= 3 << IEEE80211_HT_CAP_RX_STBC_SHIFT; + ht_info->cap |= 2 << IEEE80211_HT_CAP_RX_STBC_SHIFT; else ht_info->cap |= 1 << IEEE80211_HT_CAP_RX_STBC_SHIFT; From 54cdf5c727cb3d3124e61433a13e9724a7a4a952 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Fri, 9 Sep 2016 14:01:24 -0400 Subject: [PATCH 1007/1715] rtl8xxxu: Reset device on module unload if still attached If the USB dongle is still attached, reset it on module unload to avoid scans failing when reloading the driver. Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index c3620833db7a..d2611a49a26d 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -6129,6 +6129,11 @@ static void rtl8xxxu_disconnect(struct usb_interface *interface) mutex_destroy(&priv->usb_buf_mutex); mutex_destroy(&priv->h2c_mutex); + if (priv->udev->state != USB_STATE_NOTATTACHED) { + dev_info(&priv->udev->dev, + "Device still attached, trying to reset\n"); + usb_reset_device(priv->udev); + } usb_put_dev(priv->udev); ieee80211_free_hw(hw); } From 0cd7f70399f71fdd87b34d28670248c36e4db455 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 9 Sep 2016 14:01:25 -0400 Subject: [PATCH 1008/1715] rtl8xxxu: fix spelling mistake "firmare" -> "firmware" Trivial fix to spelling mistakes in dev_dbg message. Signed-off-by: Colin Ian King Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index d2611a49a26d..ca92022f9d50 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -3921,11 +3921,11 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) rtl8xxxu_write16(priv, REG_TRXFF_BNDY + 2, priv->fops->trxff_boundary); ret = rtl8xxxu_download_firmware(priv); - dev_dbg(dev, "%s: download_fiwmare %i\n", __func__, ret); + dev_dbg(dev, "%s: download_firmware %i\n", __func__, ret); if (ret) goto exit; ret = rtl8xxxu_start_firmware(priv); - dev_dbg(dev, "%s: start_fiwmare %i\n", __func__, ret); + dev_dbg(dev, "%s: start_firmware %i\n", __func__, ret); if (ret) goto exit; From 8136fd58ad60e25cf8b99d08bf92c09d02b416ef Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 11 Sep 2016 15:05:48 +0200 Subject: [PATCH 1009/1715] ath: constify local structures For structure types defined in the same file or local header files, find top-level static structure declarations that have the following properties: 1. Never reassigned. 2. Address never taken 3. Not passed to a top-level macro call 4. No pointer or array-typed field passed to a function or stored in a variable. Declare structures having all of these properties as const. Done using Coccinelle. Based on a suggestion by Joe Perches . Signed-off-by: Julia Lawall Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/dfs_pattern_detector.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/dfs_pattern_detector.c b/drivers/net/wireless/ath/dfs_pattern_detector.c index 2f8136d50f78..4100ffd42a43 100644 --- a/drivers/net/wireless/ath/dfs_pattern_detector.c +++ b/drivers/net/wireless/ath/dfs_pattern_detector.c @@ -338,7 +338,7 @@ static bool dpd_set_domain(struct dfs_pattern_detector *dpd, return true; } -static struct dfs_pattern_detector default_dpd = { +static const struct dfs_pattern_detector default_dpd = { .exit = dpd_exit, .set_dfs_domain = dpd_set_domain, .add_pulse = dpd_add_pulse, From 1dc80798a8caab8b5788da96ab220c91a03d7d29 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 11 Sep 2016 15:05:50 +0200 Subject: [PATCH 1010/1715] iwlegacy: constify local structures For structure types defined in the same file or local header files, find top-level static structure declarations that have the following properties: 1. Never reassigned. 2. Address never taken 3. Not passed to a top-level macro call 4. No pointer or array-typed field passed to a function or stored in a variable. Declare structures having all of these properties as const. Done using Coccinelle. Based on a suggestion by Joe Perches . Signed-off-by: Julia Lawall Acked-by: Stanislaw Gruszka Signed-off-by: Kalle Valo --- drivers/net/wireless/intel/iwlegacy/3945.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlegacy/3945.c b/drivers/net/wireless/intel/iwlegacy/3945.c index 209dc9988455..4db327a95414 100644 --- a/drivers/net/wireless/intel/iwlegacy/3945.c +++ b/drivers/net/wireless/intel/iwlegacy/3945.c @@ -2671,7 +2671,7 @@ const struct il_ops il3945_ops = { .send_led_cmd = il3945_send_led_cmd, }; -static struct il_cfg il3945_bg_cfg = { +static const struct il_cfg il3945_bg_cfg = { .name = "3945BG", .fw_name_pre = IL3945_FW_PRE, .ucode_api_max = IL3945_UCODE_API_MAX, @@ -2700,7 +2700,7 @@ static struct il_cfg il3945_bg_cfg = { }, }; -static struct il_cfg il3945_abg_cfg = { +static const struct il_cfg il3945_abg_cfg = { .name = "3945ABG", .fw_name_pre = IL3945_FW_PRE, .ucode_api_max = IL3945_UCODE_API_MAX, From d86e64768859fca82c78e52877ceeba04e25d27a Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 11 Sep 2016 15:06:03 +0200 Subject: [PATCH 1011/1715] rtlwifi: rtl818x: constify local structures For structure types defined in the same file or local header files, find top-level static structure declarations that have the following properties: 1. Never reassigned. 2. Address never taken 3. Not passed to a top-level macro call 4. No pointer or array-typed field passed to a function or stored in a variable. Declare structures having all of these properties as const. Done using Coccinelle. Based on a suggestion by Joe Perches . Signed-off-by: Julia Lawall Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c | 2 +- drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c | 2 +- drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c | 2 +- drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c | 2 +- drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c | 2 +- drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c | 2 +- drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c | 2 +- drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c index 47e32cb0ec1a..e7b11b40e68d 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c @@ -280,7 +280,7 @@ static struct rtl_mod_params rtl88ee_mod_params = { .debug = DBG_EMERG, }; -static struct rtl_hal_cfg rtl88ee_hal_cfg = { +static const struct rtl_hal_cfg rtl88ee_hal_cfg = { .bar_id = 2, .write_readback = true, .name = "rtl88e_pci", diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c index 4780bdc63b2b..87aa209ae325 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c @@ -258,7 +258,7 @@ static struct rtl_mod_params rtl92ce_mod_params = { .debug = DBG_EMERG, }; -static struct rtl_hal_cfg rtl92ce_hal_cfg = { +static const struct rtl_hal_cfg rtl92ce_hal_cfg = { .bar_id = 2, .write_readback = true, .name = "rtl92c_pci", diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c index c6e09a19de1a..0538a4d09568 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c @@ -262,7 +262,7 @@ static struct rtl_mod_params rtl92de_mod_params = { .debug = DBG_EMERG, }; -static struct rtl_hal_cfg rtl92de_hal_cfg = { +static const struct rtl_hal_cfg rtl92de_hal_cfg = { .bar_id = 2, .write_readback = true, .name = "rtl8192de", diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c index c31c6bfb536d..ac299cbe59b0 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c @@ -262,7 +262,7 @@ static struct rtl_mod_params rtl92ee_mod_params = { .debug = DBG_EMERG, }; -static struct rtl_hal_cfg rtl92ee_hal_cfg = { +static const struct rtl_hal_cfg rtl92ee_hal_cfg = { .bar_id = 2, .write_readback = true, .name = "rtl92ee_pci", diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c index 31baca41ac2f..5e8e02d5de8a 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c @@ -306,7 +306,7 @@ static struct rtl_mod_params rtl92se_mod_params = { /* Because memory R/W bursting will cause system hang/crash * for 92se, so we don't read back after every write action */ -static struct rtl_hal_cfg rtl92se_hal_cfg = { +static const struct rtl_hal_cfg rtl92se_hal_cfg = { .bar_id = 1, .write_readback = false, .name = "rtl92s_pci", diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c index ff49a8c0ff61..89c828ad89f4 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c @@ -276,7 +276,7 @@ static struct rtl_mod_params rtl8723e_mod_params = { .disable_watchdog = false, }; -static struct rtl_hal_cfg rtl8723e_hal_cfg = { +static const struct rtl_hal_cfg rtl8723e_hal_cfg = { .bar_id = 2, .write_readback = true, .name = "rtl8723e_pci", diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c index 2101793438ed..20b53f035483 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c @@ -276,7 +276,7 @@ static struct rtl_mod_params rtl8723be_mod_params = { .ant_sel = 0, }; -static struct rtl_hal_cfg rtl8723be_hal_cfg = { +static const struct rtl_hal_cfg rtl8723be_hal_cfg = { .bar_id = 2, .write_readback = true, .name = "rtl8723be_pci", diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c index 4159f9b14db6..22f687b1f133 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c @@ -316,7 +316,7 @@ static struct rtl_mod_params rtl8821ae_mod_params = { .disable_watchdog = 0, }; -static struct rtl_hal_cfg rtl8821ae_hal_cfg = { +static const struct rtl_hal_cfg rtl8821ae_hal_cfg = { .bar_id = 2, .write_readback = true, .name = "rtl8821ae_pci", From 787764676f94114980d17e627b21937f4245c866 Mon Sep 17 00:00:00 2001 From: Ganapathi Bhat Date: Mon, 12 Sep 2016 18:55:27 +0530 Subject: [PATCH 1012/1715] mwifiex: Command 7 handling for USB chipsets Firmware image for newer USB chipsets starts with a command 7 block (special command). It doesn't contain data length field. This patch adds necessary handling. Signed-off-by: Ganapathi Bhat Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/usb.c | 4 ++++ drivers/net/wireless/marvell/mwifiex/usb.h | 1 + 2 files changed, 5 insertions(+) diff --git a/drivers/net/wireless/marvell/mwifiex/usb.c b/drivers/net/wireless/marvell/mwifiex/usb.c index 92135167a822..8a20620fa119 100644 --- a/drivers/net/wireless/marvell/mwifiex/usb.c +++ b/drivers/net/wireless/marvell/mwifiex/usb.c @@ -1026,6 +1026,10 @@ static int mwifiex_prog_fw_w_helper(struct mwifiex_adapter *adapter, dnld_cmd = le32_to_cpu(fwdata->fw_hdr.dnld_cmd); tlen += sizeof(struct fw_header); + /* Command 7 doesn't have data length field */ + if (dnld_cmd == FW_CMD_7) + dlen = 0; + memcpy(fwdata->data, &firmware[tlen], dlen); fwdata->seq_num = cpu_to_le32(fw_seqnum); diff --git a/drivers/net/wireless/marvell/mwifiex/usb.h b/drivers/net/wireless/marvell/mwifiex/usb.h index b4e9246bbcdc..1b49c520782a 100644 --- a/drivers/net/wireless/marvell/mwifiex/usb.h +++ b/drivers/net/wireless/marvell/mwifiex/usb.h @@ -51,6 +51,7 @@ #define FW_DNLD_TX_BUF_SIZE 620 #define FW_DNLD_RX_BUF_SIZE 2048 #define FW_HAS_LAST_BLOCK 0x00000004 +#define FW_CMD_7 0x00000007 #define FW_DATA_XMIT_SIZE \ (sizeof(struct fw_header) + dlen + sizeof(u32)) From b7450e248d71067e0c1a09614cf3d7571f7e10fa Mon Sep 17 00:00:00 2001 From: Ganapathi Bhat Date: Mon, 12 Sep 2016 18:55:28 +0530 Subject: [PATCH 1013/1715] mwifiex: firmware name correction for usb8997 chipset Similar to pcie8997 chipset, first firmware submitted for usb8997 chipset will be usbusb8997_combo_v4.bin. This patch corrects the name used in driver. Signed-off-by: Ganapathi Bhat Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/usb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/marvell/mwifiex/usb.h b/drivers/net/wireless/marvell/mwifiex/usb.h index 1b49c520782a..30e8eb8c259d 100644 --- a/drivers/net/wireless/marvell/mwifiex/usb.h +++ b/drivers/net/wireless/marvell/mwifiex/usb.h @@ -46,7 +46,7 @@ #define USB8766_DEFAULT_FW_NAME "mrvl/usb8766_uapsta.bin" #define USB8797_DEFAULT_FW_NAME "mrvl/usb8797_uapsta.bin" #define USB8801_DEFAULT_FW_NAME "mrvl/usb8801_uapsta.bin" -#define USB8997_DEFAULT_FW_NAME "mrvl/usb8997_uapsta.bin" +#define USB8997_DEFAULT_FW_NAME "mrvl/usbusb8997_combo_v4.bin" #define FW_DNLD_TX_BUF_SIZE 620 #define FW_DNLD_RX_BUF_SIZE 2048 From f1c1f17ac52d22227c0074b3d661d7ed692b707a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 13 Sep 2016 17:08:23 +0200 Subject: [PATCH 1014/1715] cfg80211: allow connect keys only with default (TX) key There's no point in allowing connect keys when one of them isn't also configured as the TX key, it would just confuse drivers and probably cause them to pick something for TX. Disallow this confusing and erroneous configuration. As wpa_supplicant will always send NL80211_ATTR_KEYS, even when there are no keys inside, allow that and treat it as though the attribute isn't present at all. Signed-off-by: Johannes Berg --- net/wireless/ibss.c | 5 ++++- net/wireless/nl80211.c | 14 ++++++++++++++ net/wireless/sme.c | 3 +++ net/wireless/wext-sme.c | 2 +- 4 files changed, 22 insertions(+), 2 deletions(-) diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index 896cbb20b6e1..eafdfa5798ae 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -114,6 +114,9 @@ static int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, } } + if (WARN_ON(connkeys && connkeys->def < 0)) + return -EINVAL; + if (WARN_ON(wdev->connect_keys)) kzfree(wdev->connect_keys); wdev->connect_keys = connkeys; @@ -289,7 +292,7 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, wdev->wext.ibss.privacy = wdev->wext.default_key != -1; - if (wdev->wext.keys) { + if (wdev->wext.keys && wdev->wext.keys->def != -1) { ck = kmemdup(wdev->wext.keys, sizeof(*ck), GFP_KERNEL); if (!ck) return -ENOMEM; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 71af96e8a947..f2a77c3daa59 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -848,6 +848,15 @@ nl80211_parse_connkeys(struct cfg80211_registered_device *rdev, struct nlattr *key; struct cfg80211_cached_keys *result; int rem, err, def = 0; + bool have_key = false; + + nla_for_each_nested(key, keys, rem) { + have_key = true; + break; + } + + if (!have_key) + return NULL; result = kzalloc(sizeof(*result), GFP_KERNEL); if (!result) @@ -895,6 +904,11 @@ nl80211_parse_connkeys(struct cfg80211_registered_device *rdev, *no_ht = true; } + if (result->def < 0) { + err = -EINVAL; + goto error; + } + return result; error: kfree(result); diff --git a/net/wireless/sme.c b/net/wireless/sme.c index add6824c44fd..c08a3b57dca1 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -1043,6 +1043,9 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev, connect->crypto.ciphers_pairwise[0] = cipher; } } + } else { + if (WARN_ON(connkeys)) + return -EINVAL; } wdev->connect_keys = connkeys; diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index f6523a4387cc..88f1f6931ab8 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -42,7 +42,7 @@ int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev, if (!wdev->wext.connect.ssid_len) return 0; - if (wdev->wext.keys) { + if (wdev->wext.keys && wdev->wext.keys->def != -1) { ck = kmemdup(wdev->wext.keys, sizeof(*ck), GFP_KERNEL); if (!ck) return -ENOMEM; From 93db1d9e6c96050b74bb2793de8db00cd0afe6ab Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 14 Sep 2016 09:23:51 +0200 Subject: [PATCH 1015/1715] mac80211: fix possible out-of-bounds access In the unlikely situation that the supplicant has negotiated admission for the background AC (which it has no reason to as it's not supposed to be requiring admission control to start with, and we'd ignore such a requirement anyway), the loop here may terminate with non_acm_ac == 4, which leads to an array overrun. Check this explicitly just for completeness. Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 8d426f637f58..7486f2dab4ba 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1672,11 +1672,15 @@ __ieee80211_sta_handle_tspec_ac_params(struct ieee80211_sub_if_data *sdata) non_acm_ac++) if (!(sdata->wmm_acm & BIT(7 - 2 * non_acm_ac))) break; - /* The loop will result in using BK even if it requires - * admission control, such configuration makes no sense - * and we have to transmit somehow - the AC selection - * does the same thing. + /* Usually the loop will result in using BK even if it + * requires admission control, but such a configuration + * makes no sense and we have to transmit somehow - the + * AC selection does the same thing. + * If we started out trying to downgrade from BK, then + * the extra condition here might be needed. */ + if (non_acm_ac >= IEEE80211_NUM_ACS) + non_acm_ac = IEEE80211_AC_BK; if (drv_conf_tx(local, sdata, ac, &sdata->tx_conf[non_acm_ac])) sdata_err(sdata, From 58bd7f1158ac7543ccdcddc7f4ecd7db458e6d0b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 14 Sep 2016 09:37:54 +0200 Subject: [PATCH 1016/1715] mac80211: fix scan completed tracing Passing the 'info' pointer where a 'info->aborted' is expected will always lead to tracing to erroneously record that the scan was aborted, fix that by passing the correct info->aborted. The remaining data will be collected in cfg80211, so I haven't duplicated it here. Signed-off-by: Johannes Berg --- net/mac80211/scan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 070b40f15850..23d8ac829279 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -420,7 +420,7 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, { struct ieee80211_local *local = hw_to_local(hw); - trace_api_scan_completed(local, info); + trace_api_scan_completed(local, info->aborted); set_bit(SCAN_COMPLETED, &local->scanning); if (info->aborted) From 76e1fb4b5532a9df9eb14cfe002412c7617c4ad0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 14 Sep 2016 09:55:57 +0200 Subject: [PATCH 1017/1715] nl80211: always check nla_nest_start() return value If the message got full during nla_nest_start(), it can return NULL. None of the cases here seem like that can really happen, but check the return value nonetheless. Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index f2a77c3daa59..60c8a7429d33 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -8022,6 +8022,8 @@ __cfg80211_alloc_vendor_skb(struct cfg80211_registered_device *rdev, } data = nla_nest_start(skb, attr); + if (!data) + goto nla_put_failure; ((void **)skb->cb)[0] = rdev; ((void **)skb->cb)[1] = hdr; @@ -9458,8 +9460,14 @@ static int nl80211_send_wowlan_nd(struct sk_buff *msg, if (req->n_match_sets) { matches = nla_nest_start(msg, NL80211_ATTR_SCHED_SCAN_MATCH); + if (!matches) + return -ENOBUFS; + for (i = 0; i < req->n_match_sets; i++) { match = nla_nest_start(msg, i); + if (!match) + return -ENOBUFS; + nla_put(msg, NL80211_SCHED_SCAN_MATCH_ATTR_SSID, req->match_sets[i].ssid.ssid_len, req->match_sets[i].ssid.ssid); @@ -9474,6 +9482,9 @@ static int nl80211_send_wowlan_nd(struct sk_buff *msg, for (i = 0; i < req->n_scan_plans; i++) { scan_plan = nla_nest_start(msg, i + 1); + if (!scan_plan) + return -ENOBUFS; + if (!scan_plan || nla_put_u32(msg, NL80211_SCHED_SCAN_PLAN_INTERVAL, req->scan_plans[i].interval) || From 53b18980fded52e39520661af3528577d36eb279 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 14 Sep 2016 09:59:21 +0200 Subject: [PATCH 1018/1715] nl80211: always check nla_put* return values A few instances were found where we didn't check them, add the missing checks even though they'll probably never trigger as the message should be large enough here. Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 60c8a7429d33..887c4c114206 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -9453,8 +9453,10 @@ static int nl80211_send_wowlan_nd(struct sk_buff *msg, if (!freqs) return -ENOBUFS; - for (i = 0; i < req->n_channels; i++) - nla_put_u32(msg, i, req->channels[i]->center_freq); + for (i = 0; i < req->n_channels; i++) { + if (nla_put_u32(msg, i, req->channels[i]->center_freq)) + return -ENOBUFS; + } nla_nest_end(msg, freqs); @@ -9468,9 +9470,10 @@ static int nl80211_send_wowlan_nd(struct sk_buff *msg, if (!match) return -ENOBUFS; - nla_put(msg, NL80211_SCHED_SCAN_MATCH_ATTR_SSID, - req->match_sets[i].ssid.ssid_len, - req->match_sets[i].ssid.ssid); + if (nla_put(msg, NL80211_SCHED_SCAN_MATCH_ATTR_SSID, + req->match_sets[i].ssid.ssid_len, + req->match_sets[i].ssid.ssid)) + return -ENOBUFS; nla_nest_end(msg, match); } nla_nest_end(msg, matches); From 5140974dca69f0eace465bccd93891ca242a7e61 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 14 Sep 2016 09:58:31 +0200 Subject: [PATCH 1019/1715] mac80211: remove unused assignment The next line overwrites this assignment, so remove it; there's no real value in using it for the next assignment either. Signed-off-by: Johannes Berg --- net/mac80211/util.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index e777c2a6568f..b6865d884487 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2555,7 +2555,6 @@ int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata, if (need_basic && basic_rates & BIT(i)) basic = 0x80; - rate = sband->bitrates[i].bitrate; rate = DIV_ROUND_UP(sband->bitrates[i].bitrate, 5 * (1 << shift)); *pos++ = basic | (u8) rate; From 8826fef95bd5f846f7745d9ce1e3009927ec0cb8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 14 Sep 2016 10:00:23 +0200 Subject: [PATCH 1020/1715] mac80211: remove pointless chanctx NULL check If chanctx is derived as container_of() from a non-NULL pointer, it can't ever be NULL. Since we checked conf before, that's true here, so remove the useless NULL check. Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 5d4afead804e..e29ff5749944 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2961,10 +2961,6 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, } chanctx = container_of(conf, struct ieee80211_chanctx, conf); - if (!chanctx) { - err = -EBUSY; - goto out; - } ch_switch.timestamp = 0; ch_switch.device_timestamp = 0; From c7e9dbcf09bddd01568113103d62423d8894eabd Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 14 Sep 2016 10:03:00 +0200 Subject: [PATCH 1021/1715] mac80211: remove sta_remove_debugfs driver callback No drivers implement this, relying either on the recursive directory removal to remove their debugfs, or not having any to start with. Remove the dead driver callback. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 11 ++--------- net/mac80211/debugfs_sta.c | 4 ---- net/mac80211/driver-ops.h | 15 --------------- 3 files changed, 2 insertions(+), 28 deletions(-) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 08bac23c8de1..d9c8ccd6b4e6 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3101,11 +3101,8 @@ enum ieee80211_reconfig_type { * * @sta_add_debugfs: Drivers can use this callback to add debugfs files * when a station is added to mac80211's station list. This callback - * and @sta_remove_debugfs should be within a CONFIG_MAC80211_DEBUGFS - * conditional. This callback can sleep. - * - * @sta_remove_debugfs: Remove the debugfs files which were added using - * @sta_add_debugfs. This callback can sleep. + * should be within a CONFIG_MAC80211_DEBUGFS conditional. This + * callback can sleep. * * @sta_notify: Notifies low level driver about power state transition of an * associated station, AP, IBSS/WDS/mesh peer etc. For a VIF operating @@ -3501,10 +3498,6 @@ struct ieee80211_ops { struct ieee80211_vif *vif, struct ieee80211_sta *sta, struct dentry *dir); - void (*sta_remove_debugfs)(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, - struct ieee80211_sta *sta, - struct dentry *dir); #endif void (*sta_notify)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, enum sta_notify_cmd, struct ieee80211_sta *sta); diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index fb2693582e40..a2fcdb47a0e6 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -544,10 +544,6 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta) void ieee80211_sta_debugfs_remove(struct sta_info *sta) { - struct ieee80211_local *local = sta->local; - struct ieee80211_sub_if_data *sdata = sta->sdata; - - drv_sta_remove_debugfs(local, sdata, &sta->sta, sta->debugfs_dir); debugfs_remove_recursive(sta->debugfs_dir); sta->debugfs_dir = NULL; } diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index c39f93b48791..fe35a1c0dc86 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -499,21 +499,6 @@ static inline void drv_sta_add_debugfs(struct ieee80211_local *local, local->ops->sta_add_debugfs(&local->hw, &sdata->vif, sta, dir); } - -static inline void drv_sta_remove_debugfs(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata, - struct ieee80211_sta *sta, - struct dentry *dir) -{ - might_sleep(); - - sdata = get_bss_sdata(sdata); - check_sdata_in_driver(sdata); - - if (local->ops->sta_remove_debugfs) - local->ops->sta_remove_debugfs(&local->hw, &sdata->vif, - sta, dir); -} #endif static inline void drv_sta_pre_rcu_remove(struct ieee80211_local *local, From e8a24cd4b87247beedb1addc7b683422092047e5 Mon Sep 17 00:00:00 2001 From: Rajkumar Manoharan Date: Wed, 14 Sep 2016 12:48:32 +0530 Subject: [PATCH 1022/1715] mac80211: allow driver to handle packet-loss mechanism Based on consecutive msdu failures, mac80211 triggers CQM packet-loss mechanism. Drivers like ath10k that have its own connection monitoring algorithm, offloaded to firmware for triggering station kickout. In case of station kickout, driver will report low ack status by mac80211 API (ieee80211_report_low_ack). This flag will enable the driver to completely rely on firmware events for station kickout and bypass mac80211 packet loss mechanism. Signed-off-by: Rajkumar Manoharan Signed-off-by: Johannes Berg --- include/net/mac80211.h | 6 ++++++ net/mac80211/debugfs.c | 1 + net/mac80211/status.c | 6 ++++++ 3 files changed, 13 insertions(+) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index d9c8ccd6b4e6..5296100f3889 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2018,6 +2018,11 @@ struct ieee80211_txq { * @IEEE80211_HW_TX_FRAG_LIST: Hardware (or driver) supports sending frag_list * skbs, needed for zero-copy software A-MSDU. * + * @IEEE80211_HW_REPORTS_LOW_ACK: The driver (or firmware) reports low ack event + * by ieee80211_report_low_ack() based on its own algorithm. For such + * drivers, mac80211 packet loss mechanism will not be triggered and driver + * is completely depending on firmware event for station kickout. + * * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays */ enum ieee80211_hw_flags { @@ -2058,6 +2063,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_USES_RSS, IEEE80211_HW_TX_AMSDU, IEEE80211_HW_TX_FRAG_LIST, + IEEE80211_HW_REPORTS_LOW_ACK, /* keep last, obviously */ NUM_IEEE80211_HW_FLAGS diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 5bbb470f335f..8ca62b6bb02a 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -201,6 +201,7 @@ static const char *hw_flag_names[] = { FLAG(USES_RSS), FLAG(TX_AMSDU), FLAG(TX_FRAG_LIST), + FLAG(REPORTS_LOW_ACK), #undef FLAG }; diff --git a/net/mac80211/status.c b/net/mac80211/status.c index fabd9ff710d9..ea39f8a7baf3 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -557,6 +557,12 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local, static void ieee80211_lost_packet(struct sta_info *sta, struct ieee80211_tx_info *info) { + /* If driver relies on its own algorithm for station kickout, skip + * mac80211 packet loss mechanism. + */ + if (ieee80211_hw_check(&sta->local->hw, REPORTS_LOW_ACK)) + return; + /* This packet was aggregated but doesn't carry status info */ if ((info->flags & IEEE80211_TX_CTL_AMPDU) && !(info->flags & IEEE80211_TX_STAT_AMPDU)) From ec53c832ee90b86414ca243d0e6fdbb9cf5e413b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 15 Sep 2016 10:57:50 +0200 Subject: [PATCH 1023/1715] cfg80211: remove unnecessary pointer-of For an array, there's no need to use &array, so just use the plain wiphy->addresses[i].addr here to silence smatch. Signed-off-by: Johannes Berg --- net/wireless/sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index e46469bc130f..0082f4b01795 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -57,7 +57,7 @@ static ssize_t addresses_show(struct device *dev, return sprintf(buf, "%pM\n", wiphy->perm_addr); for (i = 0; i < wiphy->n_addresses; i++) - buf += sprintf(buf, "%pM\n", &wiphy->addresses[i].addr); + buf += sprintf(buf, "%pM\n", wiphy->addresses[i].addr); return buf - start; } From ca3b9c6b6d4db9a8ba5fc8b95664e75468c59f9f Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Thu, 30 Jun 2016 16:14:02 +0300 Subject: [PATCH 1024/1715] iwlwifi: mvm: call a different txq_enable function Since the SCD_QUEUE_CFG command was introduced the driver calls iwl_trans_txq_enable_cfg() with a NULL for scd_cfg parameter. This makes the transport avoid writing to the SCD pointers, since it can cause races with firmware, which is also accessing the registers. The transport only updates the write pointer in that case. Fix a wrong call to iwl_trans_txq_enable() which caused a scd_cfg parameter to be sent to transport, resulting with an access to SCD registers. Fixes: 58f2cc57dc6a ("iwlwifi: mvm: support dqa-mode scd queue redirection") Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 30fc3afe5241..a92e12edeb3a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -588,9 +588,7 @@ int iwl_mvm_scd_queue_redirect(struct iwl_mvm *mvm, int queue, int tid, ret); /* Make sure the SCD wrptr is correctly set before reconfiguring */ - iwl_trans_txq_enable(mvm->trans, queue, iwl_mvm_ac_to_tx_fifo[ac], - cmd.sta_id, tid, LINK_QUAL_AGG_FRAME_LIMIT_DEF, - ssn, wdg_timeout); + iwl_trans_txq_enable_cfg(mvm->trans, queue, ssn, NULL, wdg_timeout); /* Update the TID "owner" of the queue */ spin_lock_bh(&mvm->queue_info_lock); From 15985fba2e93872f3070af63ae9b0d0f42b2c7ea Mon Sep 17 00:00:00 2001 From: Liad Kaufman Date: Sun, 26 Jun 2016 14:45:12 +0300 Subject: [PATCH 1025/1715] iwlwifi: mvm: don't free queue after delba in dqa In DQA mode, a delBA might free the queue although it shouldn't. Fix that. Fixes: cf941e174ee2 ("iwlwifi: mvm: support dqa-mode agg on non-shared queue") Signed-off-by: Liad Kaufman Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index 8b91544e6220..62714709ba8f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -1100,9 +1100,13 @@ static void iwl_mvm_check_ratid_empty(struct iwl_mvm *mvm, IWL_DEBUG_TX_QUEUES(mvm, "Can continue DELBA flow ssn = next_recl = %d\n", tid_data->next_reclaimed); - iwl_mvm_disable_txq(mvm, tid_data->txq_id, - vif->hw_queue[tid_to_mac80211_ac[tid]], tid, - CMD_ASYNC); + if (!iwl_mvm_is_dqa_supported(mvm)) { + u8 mac80211_ac = tid_to_mac80211_ac[tid]; + + iwl_mvm_disable_txq(mvm, tid_data->txq_id, + vif->hw_queue[mac80211_ac], tid, + CMD_ASYNC); + } tid_data->state = IWL_AGG_OFF; ieee80211_stop_tx_ba_cb_irqsafe(vif, sta->addr, tid); break; From c0ed8aa4d1babfe173d01ce6169c237ad9b0c462 Mon Sep 17 00:00:00 2001 From: kbuild test robot Date: Mon, 11 Jul 2016 05:01:12 +0800 Subject: [PATCH 1026/1715] iwlwifi: fix semicolon.cocci warnings drivers/net/wireless/intel/iwlwifi/iwl-io.c:243:2-3: Unneeded semicolon Remove unneeded semicolon. Generated by: scripts/coccinelle/misc/semicolon.cocci CC: Sara Sharon Signed-off-by: Fengguang Wu Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-io.c b/drivers/net/wireless/intel/iwlwifi/iwl-io.c index 92c8b5f9a9cb..a9f69fdd170b 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-io.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-io.c @@ -267,7 +267,7 @@ static const char *get_rfh_string(int cmd) IWL_CMD_MQ(cmd, RFH_Q_FRBDCB_WIDX, i); IWL_CMD_MQ(cmd, RFH_Q_FRBDCB_RIDX, i); IWL_CMD_MQ(cmd, RFH_Q_URBD_STTS_WPTR_LSB, i); - }; + } switch (cmd) { IWL_CMD(RFH_RXF_DMA_CFG); From 3cd1980b0cdf66443a610b62e3a630e44eac4e45 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Thu, 23 Jun 2016 16:31:40 +0300 Subject: [PATCH 1027/1715] iwlwifi: pcie: introduce new tfd and tb formats New hardware supports bigger TFDs and TBs. Introduce the new formats and adjust defines and code relying on old format. Changing the actual TFD allocation is trickier and deferred to the next patch. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-fh.h | 50 +++++++++++++++---- .../net/wireless/intel/iwlwifi/iwl-trans.c | 1 + .../net/wireless/intel/iwlwifi/iwl-trans.h | 4 -- .../wireless/intel/iwlwifi/pcie/internal.h | 4 +- .../net/wireless/intel/iwlwifi/pcie/trans.c | 16 ++++-- drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 20 ++++---- 6 files changed, 64 insertions(+), 31 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-fh.h b/drivers/net/wireless/intel/iwlwifi/iwl-fh.h index dd75ea7c936e..98d2ff240ea5 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-fh.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-fh.h @@ -643,6 +643,7 @@ struct iwl_rb_status { #define TFD_QUEUE_BC_SIZE (TFD_QUEUE_SIZE_MAX + TFD_QUEUE_SIZE_BC_DUP) #define IWL_TX_DMA_MASK DMA_BIT_MASK(36) #define IWL_NUM_OF_TBS 20 +#define IWL_TFH_NUM_TBS 25 static inline u8 iwl_get_dma_hi_addr(dma_addr_t addr) { @@ -664,25 +665,29 @@ struct iwl_tfd_tb { } __packed; /** - * struct iwl_tfd + * struct iwl_tfh_tb transmit buffer descriptor within transmit frame descriptor * - * Transmit Frame Descriptor (TFD) - * - * @ __reserved1[3] reserved - * @ num_tbs 0-4 number of active tbs - * 5 reserved - * 6-7 padding (not used) - * @ tbs[20] transmit frame buffer descriptors - * @ __pad padding + * This structure contains dma address and length of transmission address * + * @tb_len length of the tx buffer + * @addr 64 bits dma address + */ +struct iwl_tfh_tb { + __le16 tb_len; + __le64 addr; +} __packed; + +/** * Each Tx queue uses a circular buffer of 256 TFDs stored in host DRAM. * Both driver and device share these circular buffers, each of which must be - * contiguous 256 TFDs x 128 bytes-per-TFD = 32 KBytes + * contiguous 256 TFDs. + * For pre a000 HW it is 256 x 128 bytes-per-TFD = 32 KBytes + * For a000 HW and on it is 256 x 256 bytes-per-TFD = 65 KBytes * * Driver must indicate the physical address of the base of each * circular buffer via the FH_MEM_CBBC_QUEUE registers. * - * Each TFD contains pointer/size information for up to 20 data buffers + * Each TFD contains pointer/size information for up to 20 / 25 data buffers * in host DRAM. These buffers collectively contain the (one) frame described * by the TFD. Each buffer must be a single contiguous block of memory within * itself, but buffers may be scattered in host DRAM. Each buffer has max size @@ -691,6 +696,16 @@ struct iwl_tfd_tb { * * A maximum of 255 (not 256!) TFDs may be on a queue waiting for Tx. */ + +/** + * struct iwl_tfd - Transmit Frame Descriptor (TFD) + * @ __reserved1[3] reserved + * @ num_tbs 0-4 number of active tbs + * 5 reserved + * 6-7 padding (not used) + * @ tbs[20] transmit frame buffer descriptors + * @ __pad padding + */ struct iwl_tfd { u8 __reserved1[3]; u8 num_tbs; @@ -698,6 +713,19 @@ struct iwl_tfd { __le32 __pad; } __packed; +/** + * struct iwl_tfh_tfd - Transmit Frame Descriptor (TFD) + * @ num_tbs 0-4 number of active tbs + * 5 -15 reserved + * @ tbs[25] transmit frame buffer descriptors + * @ __pad padding + */ +struct iwl_tfh_tfd { + __le16 num_tbs; + struct iwl_tfh_tb tbs[IWL_TFH_NUM_TBS]; + __le32 __pad; +} __packed; + /* Keep Warm Size */ #define IWL_KW_SIZE 0x1000 /* 4k */ diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.c b/drivers/net/wireless/intel/iwlwifi/iwl-trans.c index 6069a9ff53fa..b0bd67c64b5c 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.c @@ -65,6 +65,7 @@ #include "iwl-trans.h" #include "iwl-drv.h" +#include "iwl-fh.h" struct iwl_trans *iwl_trans_alloc(unsigned int priv_size, struct device *dev, diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h index 5535e2238da3..883cb487b652 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h @@ -262,8 +262,6 @@ static inline u32 iwl_rx_packet_payload_len(const struct iwl_rx_packet *pkt) * (i.e. mark it as non-idle). * @CMD_WANT_ASYNC_CALLBACK: the op_mode's async callback function must be * called after this command completes. Valid only with CMD_ASYNC. - * @CMD_TB_BITMAP_POS: Position of the first bit for the TB bitmap. We need to - * check that we leave enough room for the TBs bitmap which needs 20 bits. */ enum CMD_MODE { CMD_ASYNC = BIT(0), @@ -274,8 +272,6 @@ enum CMD_MODE { CMD_MAKE_TRANS_IDLE = BIT(5), CMD_WAKE_UP_TRANS = BIT(6), CMD_WANT_ASYNC_CALLBACK = BIT(7), - - CMD_TB_BITMAP_POS = 11, }; #define DEF_CMD_PAYLOAD_SIZE 320 diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h index 11e347dd44c7..975900a1efa1 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h +++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h @@ -49,7 +49,7 @@ * be needed for potential data in the SKB's head. The remaining ones can * be used for frags. */ -#define IWL_PCIE_MAX_FRAGS (IWL_NUM_OF_TBS - 3) +#define IWL_PCIE_MAX_FRAGS(x) (x->max_tbs - 3) /* * RX related structures and functions @@ -192,6 +192,7 @@ struct iwl_cmd_meta { /* only for SYNC commands, iff the reply skb is wanted */ struct iwl_host_cmd *source; u32 flags; + u32 tbs; }; /* @@ -391,6 +392,7 @@ struct iwl_trans_pcie { unsigned int cmd_q_wdg_timeout; u8 n_no_reclaim_cmds; u8 no_reclaim_cmds[MAX_NO_RECLAIM_CMDS]; + u8 max_tbs; enum iwl_amsdu_size rx_buf_size; bool bc_table_dword; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 2f46eedd7c4d..0c2ccbeab167 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -2437,12 +2437,14 @@ err: } #endif /*CONFIG_IWLWIFI_DEBUGFS */ -static u32 iwl_trans_pcie_get_cmdlen(struct iwl_tfd *tfd) +static u32 iwl_trans_pcie_get_cmdlen(struct iwl_trans *trans, + struct iwl_tfd *tfd) { + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); u32 cmdlen = 0; int i; - for (i = 0; i < IWL_NUM_OF_TBS; i++) + for (i = 0; i < trans_pcie->max_tbs; i++) cmdlen += iwl_pcie_tfd_tb_get_len(tfd, i); return cmdlen; @@ -2731,7 +2733,7 @@ static struct iwl_trans_dump_data u8 idx = get_cmd_index(&cmdq->q, ptr); u32 caplen, cmdlen; - cmdlen = iwl_trans_pcie_get_cmdlen(&cmdq->tfds[ptr]); + cmdlen = iwl_trans_pcie_get_cmdlen(trans, &cmdq->tfds[ptr]); caplen = min_t(u32, TFD_MAX_PAYLOAD_SIZE, cmdlen); if (cmdlen) { @@ -2839,8 +2841,6 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, if (!trans) return ERR_PTR(-ENOMEM); - trans->max_skb_frags = IWL_PCIE_MAX_FRAGS; - trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); trans_pcie->trans = trans; @@ -2874,6 +2874,12 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, else addr_size = 36; + if (cfg->use_tfh) + trans_pcie->max_tbs = IWL_TFH_NUM_TBS; + else + trans_pcie->max_tbs = IWL_NUM_OF_TBS; + trans->max_skb_frags = IWL_PCIE_MAX_FRAGS(trans_pcie); + pci_set_master(pdev); ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(addr_size)); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index 9636dc89f6bd..1c46f146e168 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -348,13 +348,13 @@ static void iwl_pcie_tfd_unmap(struct iwl_trans *trans, struct iwl_cmd_meta *meta, struct iwl_tfd *tfd) { - int i; - int num_tbs; + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + int i, num_tbs; /* Sanity check on number of chunks */ num_tbs = iwl_pcie_tfd_get_num_tbs(tfd); - if (num_tbs >= IWL_NUM_OF_TBS) { + if (num_tbs >= trans_pcie->max_tbs) { IWL_ERR(trans, "Too many chunks: %i\n", num_tbs); /* @todo issue fatal error, it is quite serious situation */ return; @@ -363,7 +363,7 @@ static void iwl_pcie_tfd_unmap(struct iwl_trans *trans, /* first TB is never freed - it's the bidirectional DMA data */ for (i = 1; i < num_tbs; i++) { - if (meta->flags & BIT(i + CMD_TB_BITMAP_POS)) + if (meta->tbs & BIT(i)) dma_unmap_page(trans->dev, iwl_pcie_tfd_tb_get_addr(tfd, i), iwl_pcie_tfd_tb_get_len(tfd, i), @@ -423,6 +423,7 @@ static void iwl_pcie_txq_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq) static int iwl_pcie_txq_build_tfd(struct iwl_trans *trans, struct iwl_txq *txq, dma_addr_t addr, u16 len, bool reset) { + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct iwl_queue *q; struct iwl_tfd *tfd, *tfd_tmp; u32 num_tbs; @@ -437,9 +438,9 @@ static int iwl_pcie_txq_build_tfd(struct iwl_trans *trans, struct iwl_txq *txq, num_tbs = iwl_pcie_tfd_get_num_tbs(tfd); /* Each TFD can point to a maximum 20 Tx buffers */ - if (num_tbs >= IWL_NUM_OF_TBS) { + if (num_tbs >= trans_pcie->max_tbs) { IWL_ERR(trans, "Error can not send more than %d chunks\n", - IWL_NUM_OF_TBS); + trans_pcie->max_tbs); return -EINVAL; } @@ -1640,8 +1641,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, iwl_pcie_txq_build_tfd(trans, txq, phys_addr, cmdlen[i], false); } - BUILD_BUG_ON(IWL_NUM_OF_TBS + CMD_TB_BITMAP_POS > - sizeof(out_meta->flags) * BITS_PER_BYTE); + BUILD_BUG_ON(IWL_TFH_NUM_TBS > sizeof(out_meta->tbs) * BITS_PER_BYTE); out_meta->flags = cmd->flags; if (WARN_ON_ONCE(txq->entries[idx].free_buf)) kzfree(txq->entries[idx].free_buf); @@ -1953,7 +1953,7 @@ static int iwl_fill_data_tbs(struct iwl_trans *trans, struct sk_buff *skb, tb_idx = iwl_pcie_txq_build_tfd(trans, txq, tb_phys, skb_frag_size(frag), false); - out_meta->flags |= BIT(tb_idx + CMD_TB_BITMAP_POS); + out_meta->tbs |= BIT(tb_idx); } trace_iwlwifi_dev_tx(trans->dev, skb, @@ -2247,7 +2247,7 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb, } if (skb_is_nonlinear(skb) && - skb_shinfo(skb)->nr_frags > IWL_PCIE_MAX_FRAGS && + skb_shinfo(skb)->nr_frags > IWL_PCIE_MAX_FRAGS(trans_pcie) && __skb_linearize(skb)) return -ENOMEM; From 585a26274221e1cd7e0c06e0f96fbf4a7269187b Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Wed, 13 Jul 2016 16:53:36 +0300 Subject: [PATCH 1028/1715] iwlwifi: mvm: remove dump of locked registers Firmware may lock those registers for access. This results in 9000 devices with a bus stall and an endless loop of 0x5a5a5a. Don't dump those registers. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c index 1abcabb9b6cd..bebb148d352b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c @@ -440,14 +440,12 @@ static const struct iwl_prph_range iwl_prph_dump_addr_comm[] = { { .start = 0x00a04560, .end = 0x00a0457c }, { .start = 0x00a04590, .end = 0x00a04598 }, { .start = 0x00a045c0, .end = 0x00a045f4 }, - { .start = 0x00a44000, .end = 0x00a7bf80 }, }; static const struct iwl_prph_range iwl_prph_dump_addr_9000[] = { { .start = 0x00a05c00, .end = 0x00a05c18 }, { .start = 0x00a05400, .end = 0x00a056e8 }, { .start = 0x00a08000, .end = 0x00a098bc }, - { .start = 0x00adfc00, .end = 0x00adfd1c }, { .start = 0x00a02400, .end = 0x00a02758 }, }; From db06f04dafa1c1d94db1ef162addaff778f6457a Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Tue, 5 Jul 2016 17:37:58 +0300 Subject: [PATCH 1029/1715] iwlwifi: mvm: support new shared memory config API In a000 devices we have 15 fifos, so in the shared memory config the number of tx fifos in the array was changed accordingly. As it is in the middle of the struct, the parsing code needs to be duplicated. To minimize the duplication, do not save variables we never actually use. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- .../net/wireless/intel/iwlwifi/mvm/fw-api.h | 22 ++++- .../net/wireless/intel/iwlwifi/mvm/fw-dbg.c | 2 +- drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 98 +++++++++++-------- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 9 +- 4 files changed, 78 insertions(+), 53 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h index 57b574b2a092..2f92994d0e5b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h @@ -1977,8 +1977,9 @@ struct iwl_tdls_config_res { struct iwl_tdls_config_sta_info_res sta_info[IWL_MVM_TDLS_STA_COUNT]; } __packed; /* TDLS_CONFIG_RSP_API_S_VER_1 */ -#define TX_FIFO_MAX_NUM 8 -#define RX_FIFO_MAX_NUM 2 +#define TX_FIFO_MAX_NUM_9000 8 +#define TX_FIFO_MAX_NUM 15 +#define RX_FIFO_MAX_NUM 2 #define TX_FIFO_INTERNAL_MAX_NUM 6 /** @@ -2004,6 +2005,21 @@ struct iwl_tdls_config_res { * NOTE: on firmware that don't have IWL_UCODE_TLV_CAPA_EXTEND_SHARED_MEM_CFG * set, the last 3 members don't exist. */ +struct iwl_shared_mem_cfg_v1 { + __le32 shared_mem_addr; + __le32 shared_mem_size; + __le32 sample_buff_addr; + __le32 sample_buff_size; + __le32 txfifo_addr; + __le32 txfifo_size[TX_FIFO_MAX_NUM_9000]; + __le32 rxfifo_size[RX_FIFO_MAX_NUM]; + __le32 page_buff_addr; + __le32 page_buff_size; + __le32 rxfifo_addr; + __le32 internal_txfifo_addr; + __le32 internal_txfifo_size[TX_FIFO_INTERNAL_MAX_NUM]; +} __packed; /* SHARED_MEM_ALLOC_API_S_VER_2 */ + struct iwl_shared_mem_cfg { __le32 shared_mem_addr; __le32 shared_mem_size; @@ -2017,7 +2033,7 @@ struct iwl_shared_mem_cfg { __le32 rxfifo_addr; __le32 internal_txfifo_addr; __le32 internal_txfifo_size[TX_FIFO_INTERNAL_MAX_NUM]; -} __packed; /* SHARED_MEM_ALLOC_API_S_VER_2 */ +} __packed; /* SHARED_MEM_ALLOC_API_S_VER_3 */ /** * VHT MU-MIMO group configuration diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c index bebb148d352b..42dcefe33104 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c @@ -557,7 +557,7 @@ void iwl_mvm_fw_error_dump(struct iwl_mvm *mvm) sizeof(struct iwl_fw_error_dump_fifo); } - for (i = 0; i < ARRAY_SIZE(mem_cfg->txfifo_size); i++) { + for (i = 0; i < mem_cfg->num_txfifo_entries; i++) { if (!mem_cfg->txfifo_size[i]) continue; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 47e8e70dcfac..b951a7f06060 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -838,6 +838,59 @@ out: return ret; } +static void iwl_mvm_parse_shared_mem_a000(struct iwl_mvm *mvm, + struct iwl_rx_packet *pkt) +{ + struct iwl_shared_mem_cfg *mem_cfg = (void *)pkt->data; + int i; + + mvm->shared_mem_cfg.num_txfifo_entries = + ARRAY_SIZE(mvm->shared_mem_cfg.txfifo_size); + for (i = 0; i < ARRAY_SIZE(mem_cfg->txfifo_size); i++) + mvm->shared_mem_cfg.txfifo_size[i] = + le32_to_cpu(mem_cfg->txfifo_size[i]); + for (i = 0; i < ARRAY_SIZE(mvm->shared_mem_cfg.rxfifo_size); i++) + mvm->shared_mem_cfg.rxfifo_size[i] = + le32_to_cpu(mem_cfg->rxfifo_size[i]); + + BUILD_BUG_ON(sizeof(mvm->shared_mem_cfg.internal_txfifo_size) != + sizeof(mem_cfg->internal_txfifo_size)); + + for (i = 0; i < ARRAY_SIZE(mvm->shared_mem_cfg.internal_txfifo_size); + i++) + mvm->shared_mem_cfg.internal_txfifo_size[i] = + le32_to_cpu(mem_cfg->internal_txfifo_size[i]); +} + +static void iwl_mvm_parse_shared_mem(struct iwl_mvm *mvm, + struct iwl_rx_packet *pkt) +{ + struct iwl_shared_mem_cfg_v1 *mem_cfg = (void *)pkt->data; + int i; + + mvm->shared_mem_cfg.num_txfifo_entries = + ARRAY_SIZE(mvm->shared_mem_cfg.txfifo_size); + for (i = 0; i < ARRAY_SIZE(mem_cfg->txfifo_size); i++) + mvm->shared_mem_cfg.txfifo_size[i] = + le32_to_cpu(mem_cfg->txfifo_size[i]); + for (i = 0; i < ARRAY_SIZE(mvm->shared_mem_cfg.rxfifo_size); i++) + mvm->shared_mem_cfg.rxfifo_size[i] = + le32_to_cpu(mem_cfg->rxfifo_size[i]); + + /* new API has more data, from rxfifo_addr field and on */ + if (fw_has_capa(&mvm->fw->ucode_capa, + IWL_UCODE_TLV_CAPA_EXTEND_SHARED_MEM_CFG)) { + BUILD_BUG_ON(sizeof(mvm->shared_mem_cfg.internal_txfifo_size) != + sizeof(mem_cfg->internal_txfifo_size)); + + for (i = 0; + i < ARRAY_SIZE(mvm->shared_mem_cfg.internal_txfifo_size); + i++) + mvm->shared_mem_cfg.internal_txfifo_size[i] = + le32_to_cpu(mem_cfg->internal_txfifo_size[i]); + } +} + static void iwl_mvm_get_shared_mem_conf(struct iwl_mvm *mvm) { struct iwl_host_cmd cmd = { @@ -845,9 +898,7 @@ static void iwl_mvm_get_shared_mem_conf(struct iwl_mvm *mvm) .data = { NULL, }, .len = { 0, }, }; - struct iwl_shared_mem_cfg *mem_cfg; struct iwl_rx_packet *pkt; - u32 i; lockdep_assert_held(&mvm->mutex); @@ -861,45 +912,10 @@ static void iwl_mvm_get_shared_mem_conf(struct iwl_mvm *mvm) return; pkt = cmd.resp_pkt; - mem_cfg = (void *)pkt->data; - - mvm->shared_mem_cfg.shared_mem_addr = - le32_to_cpu(mem_cfg->shared_mem_addr); - mvm->shared_mem_cfg.shared_mem_size = - le32_to_cpu(mem_cfg->shared_mem_size); - mvm->shared_mem_cfg.sample_buff_addr = - le32_to_cpu(mem_cfg->sample_buff_addr); - mvm->shared_mem_cfg.sample_buff_size = - le32_to_cpu(mem_cfg->sample_buff_size); - mvm->shared_mem_cfg.txfifo_addr = le32_to_cpu(mem_cfg->txfifo_addr); - for (i = 0; i < ARRAY_SIZE(mvm->shared_mem_cfg.txfifo_size); i++) - mvm->shared_mem_cfg.txfifo_size[i] = - le32_to_cpu(mem_cfg->txfifo_size[i]); - for (i = 0; i < ARRAY_SIZE(mvm->shared_mem_cfg.rxfifo_size); i++) - mvm->shared_mem_cfg.rxfifo_size[i] = - le32_to_cpu(mem_cfg->rxfifo_size[i]); - mvm->shared_mem_cfg.page_buff_addr = - le32_to_cpu(mem_cfg->page_buff_addr); - mvm->shared_mem_cfg.page_buff_size = - le32_to_cpu(mem_cfg->page_buff_size); - - /* new API has more data */ - if (fw_has_capa(&mvm->fw->ucode_capa, - IWL_UCODE_TLV_CAPA_EXTEND_SHARED_MEM_CFG)) { - mvm->shared_mem_cfg.rxfifo_addr = - le32_to_cpu(mem_cfg->rxfifo_addr); - mvm->shared_mem_cfg.internal_txfifo_addr = - le32_to_cpu(mem_cfg->internal_txfifo_addr); - - BUILD_BUG_ON(sizeof(mvm->shared_mem_cfg.internal_txfifo_size) != - sizeof(mem_cfg->internal_txfifo_size)); - - for (i = 0; - i < ARRAY_SIZE(mvm->shared_mem_cfg.internal_txfifo_size); - i++) - mvm->shared_mem_cfg.internal_txfifo_size[i] = - le32_to_cpu(mem_cfg->internal_txfifo_size[i]); - } + if (iwl_mvm_has_new_tx_api(mvm)) + iwl_mvm_parse_shared_mem_a000(mvm, pkt); + else + iwl_mvm_parse_shared_mem(mvm, pkt); IWL_DEBUG_INFO(mvm, "SHARED MEM CFG: got memory offsets/sizes\n"); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 1806495aab57..f5df7064abe3 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -602,16 +602,9 @@ enum iwl_mvm_tdls_cs_state { }; struct iwl_mvm_shared_mem_cfg { - u32 shared_mem_addr; - u32 shared_mem_size; - u32 sample_buff_addr; - u32 sample_buff_size; - u32 txfifo_addr; + int num_txfifo_entries; u32 txfifo_size[TX_FIFO_MAX_NUM]; u32 rxfifo_size[RX_FIFO_MAX_NUM]; - u32 page_buff_addr; - u32 page_buff_size; - u32 rxfifo_addr; u32 internal_txfifo_addr; u32 internal_txfifo_size[TX_FIFO_INTERNAL_MAX_NUM]; }; From d560846e40fefd7b6e5c29d115f1d8f73db7f5e6 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 12 Sep 2016 13:01:50 +0100 Subject: [PATCH 1030/1715] atm: iphase: fix newline escape and minor tweak to source formatting The newline escape is incorrect and needs fixing. Also adjust source formatting / indentation and add { } to trailing else. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/atm/iphase.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/atm/iphase.c b/drivers/atm/iphase.c index 9d8807e76639..b2756765950e 100644 --- a/drivers/atm/iphase.c +++ b/drivers/atm/iphase.c @@ -1885,9 +1885,9 @@ static int open_tx(struct atm_vcc *vcc) if ((ret = ia_cbr_setup (iadev, vcc)) < 0) { return ret; } - } - else - printk("iadev: Non UBR, ABR and CBR traffic not supportedn"); + } else { + printk("iadev: Non UBR, ABR and CBR traffic not supported\n"); + } iadev->testTable[vcc->vci]->vc_status |= VC_ACTIVE; IF_EVENT(printk("ia open_tx returning \n");) From 5c0ca3f566d7a19e9bf9671dfc2108fad1b7b9b2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 12 Sep 2016 13:04:57 +0100 Subject: [PATCH 1031/1715] test_bpf: fix the dummy skb after dissector changes Commit d5709f7ab776 ("flow_dissector: For stripped vlan, get vlan info from skb->vlan_tci") made flow dissector look at vlan_proto when vlan is present. Since test_bpf sets skb->vlan_tci to ~0 (including VLAN_TAG_PRESENT) we have to populate skb->vlan_proto. Fixes false negative on test #24: test_bpf: #24 LD_PAYLOAD_OFF jited:0 175 ret 0 != 42 FAIL (1 times) Signed-off-by: Jakub Kicinski Reviewed-by: Dinan Gunawardena Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- lib/test_bpf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 93f45011a59d..94346b4d8984 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -5485,6 +5485,7 @@ static struct sk_buff *populate_skb(char *buf, int size) skb->hash = SKB_HASH; skb->queue_mapping = SKB_QUEUE_MAP; skb->vlan_tci = SKB_VLAN_TCI; + skb->vlan_proto = htons(ETH_P_IP); skb->dev = &dev; skb->dev->ifindex = SKB_DEV_IFINDEX; skb->dev->type = SKB_DEV_TYPE; From 07c0f09e23b47815251ed9e5ce245a58c6391974 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Mon, 12 Sep 2016 15:19:21 +0300 Subject: [PATCH 1032/1715] net/sched: act_tunnel_key: Remove rcu_read_lock protection Remove rcu_read_lock protection from tunnel_key_dump and use rtnl_dereference, dump operation is protected by rtnl lock. Also, remove rcu_read_lock from tunnel_key_release and use rcu_dereference_protected. Both operations are running exclusively and a writer couldn't modify t->params while those functions are executed. Fixes: 54d94fd89d90 ('net/sched: Introduce act_tunnel_key') Signed-off-by: Hadar Hen Zion Acked-by: John Fastabend Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/act_tunnel_key.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index dceff7412dc3..af47bdf2f483 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -194,15 +194,12 @@ static void tunnel_key_release(struct tc_action *a, int bind) struct tcf_tunnel_key *t = to_tunnel_key(a); struct tcf_tunnel_key_params *params; - rcu_read_lock(); - params = rcu_dereference(t->params); + params = rcu_dereference_protected(t->params, 1); if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET) dst_release(¶ms->tcft_enc_metadata->dst); kfree_rcu(params, rcu); - - rcu_read_unlock(); } static int tunnel_key_dump_addresses(struct sk_buff *skb, @@ -245,10 +242,8 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a, .bindcnt = t->tcf_bindcnt - bind, }; struct tcf_t tm; - int ret = -1; - rcu_read_lock(); - params = rcu_dereference(t->params); + params = rtnl_dereference(t->params); opt.t_action = params->tcft_action; opt.action = params->action; @@ -272,15 +267,11 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a, &tm, TCA_TUNNEL_KEY_PAD)) goto nla_put_failure; - ret = skb->len; - goto out; + return skb->len; nla_put_failure: nlmsg_trim(skb, b); -out: - rcu_read_unlock(); - - return ret; + return -1; } static int tunnel_key_walker(struct net *net, struct sk_buff *skb, From 04b3f8de4b6d90758938a40303c0ee9a86bcb8ab Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 12 Sep 2016 23:38:42 +0200 Subject: [PATCH 1033/1715] bpf: drop unnecessary test in cls_bpf_classify and tcf_bpf The skb_mac_header_was_set() test in cls_bpf's and act_bpf's fast-path is actually unnecessary and can be removed altogether. This was added by commit a166151cbe33 ("bpf: fix bpf helpers to use skb->mac_header relative offsets"), which was later on improved by 3431205e0397 ("bpf: make programs see skb->data == L2 for ingress and egress"). We're always guaranteed to have valid mac header at the time we invoke cls_bpf_classify() or tcf_bpf(). Reason is that since 6d1ccff62780 ("net: reset mac header in dev_start_xmit()") we do skb_reset_mac_header() in __dev_queue_xmit() before we could call into sch_handle_egress() or any subsequent enqueue. sch_handle_ingress() always sees a valid mac header as well (things like skb_reset_mac_len() would badly fail otherwise). Thus, drop the unnecessary test in classifier and action case. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/sched/act_bpf.c | 3 --- net/sched/cls_bpf.c | 3 --- 2 files changed, 6 deletions(-) diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index bfa870731e74..78400defa790 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -44,9 +44,6 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, int action, filter_res; bool at_ingress = G_TC_AT(skb->tc_verd) & AT_INGRESS; - if (unlikely(!skb_mac_header_was_set(skb))) - return TC_ACT_UNSPEC; - tcf_lastuse_update(&prog->tcf_tm); bstats_cpu_update(this_cpu_ptr(prog->common.cpu_bstats), skb); diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 4742f415ee5b..1d92d4d3f222 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -83,9 +83,6 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct cls_bpf_prog *prog; int ret = -1; - if (unlikely(!skb_mac_header_was_set(skb))) - return -1; - /* Needed here for accessing maps. */ rcu_read_lock(); list_for_each_entry_rcu(prog, &head->plist, link) { From f53d8c7b18faf1bd361abe91f3c4bcbb21d0c985 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 12 Sep 2016 23:38:43 +0200 Subject: [PATCH 1034/1715] bpf: use skb_at_tc_ingress helper in tcf_bpf We have a small skb_at_tc_ingress() helper for testing for ingress, so make use of it. cls_bpf already uses it and so should act_bpf. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/sched/act_bpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 78400defa790..1d3960033f61 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -39,10 +39,10 @@ static struct tc_action_ops act_bpf_ops; static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, struct tcf_result *res) { + bool at_ingress = skb_at_tc_ingress(skb); struct tcf_bpf *prog = to_bpf(act); struct bpf_prog *filter; int action, filter_res; - bool at_ingress = G_TC_AT(skb->tc_verd) & AT_INGRESS; tcf_lastuse_update(&prog->tcf_tm); bstats_cpu_update(this_cpu_ptr(prog->common.cpu_bstats), skb); From 86da71b57383d40993cb90baafb3735cffe5d800 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Mon, 12 Sep 2016 20:13:09 -0400 Subject: [PATCH 1035/1715] net_sched: Introduce skbmod action This action is intended to be an upgrade from a usability perspective from pedit (as well as operational debugability). Compare this: sudo tc filter add dev $ETH parent 1: protocol ip prio 10 \ u32 match ip protocol 1 0xff flowid 1:2 \ action pedit munge offset -14 u8 set 0x02 \ munge offset -13 u8 set 0x15 \ munge offset -12 u8 set 0x15 \ munge offset -11 u8 set 0x15 \ munge offset -10 u16 set 0x1515 \ pipe to: sudo tc filter add dev $ETH parent 1: protocol ip prio 10 \ u32 match ip protocol 1 0xff flowid 1:2 \ action skbmod dmac 02:15:15:15:15:15 Also try to do a MAC address swap with pedit or worse try to debug a policy with destination mac, source mac and etherype. Then make few rules out of those and you'll get my point. In the future common use cases on pedit can be migrated to this action (as an example different fields in ip v4/6, transports like tcp/udp/sctp etc). For this first cut, this allows modifying basic ethernet header. The most important ethernet use case at the moment is when redirecting or mirroring packets to a remote machine. The dst mac address needs a re-write so that it doesnt get dropped or confuse an interconnecting (learning) switch or dropped by a target machine (which looks at the dst mac). And at times when flipping back the packet a swap of the MAC addresses is needed. Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/tc_act/tc_skbmod.h | 30 +++ include/uapi/linux/tc_act/tc_skbmod.h | 39 ++++ net/sched/Kconfig | 11 + net/sched/Makefile | 1 + net/sched/act_skbmod.c | 301 ++++++++++++++++++++++++++ 5 files changed, 382 insertions(+) create mode 100644 include/net/tc_act/tc_skbmod.h create mode 100644 include/uapi/linux/tc_act/tc_skbmod.h create mode 100644 net/sched/act_skbmod.c diff --git a/include/net/tc_act/tc_skbmod.h b/include/net/tc_act/tc_skbmod.h new file mode 100644 index 000000000000..644a2116b47b --- /dev/null +++ b/include/net/tc_act/tc_skbmod.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2016, Jamal Hadi Salim + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. +*/ + +#ifndef __NET_TC_SKBMOD_H +#define __NET_TC_SKBMOD_H + +#include +#include + +struct tcf_skbmod_params { + struct rcu_head rcu; + u64 flags; /*up to 64 types of operations; extend if needed */ + u8 eth_dst[ETH_ALEN]; + u16 eth_type; + u8 eth_src[ETH_ALEN]; +}; + +struct tcf_skbmod { + struct tc_action common; + struct tcf_skbmod_params __rcu *skbmod_p; +}; +#define to_skbmod(a) ((struct tcf_skbmod *)a) + +#endif /* __NET_TC_SKBMOD_H */ diff --git a/include/uapi/linux/tc_act/tc_skbmod.h b/include/uapi/linux/tc_act/tc_skbmod.h new file mode 100644 index 000000000000..10fc07da6c69 --- /dev/null +++ b/include/uapi/linux/tc_act/tc_skbmod.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2016, Jamal Hadi Salim + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. +*/ + +#ifndef __LINUX_TC_SKBMOD_H +#define __LINUX_TC_SKBMOD_H + +#include + +#define TCA_ACT_SKBMOD 15 + +#define SKBMOD_F_DMAC 0x1 +#define SKBMOD_F_SMAC 0x2 +#define SKBMOD_F_ETYPE 0x4 +#define SKBMOD_F_SWAPMAC 0x8 + +struct tc_skbmod { + tc_gen; + __u64 flags; +}; + +enum { + TCA_SKBMOD_UNSPEC, + TCA_SKBMOD_TM, + TCA_SKBMOD_PARMS, + TCA_SKBMOD_DMAC, + TCA_SKBMOD_SMAC, + TCA_SKBMOD_ETYPE, + TCA_SKBMOD_PAD, + __TCA_SKBMOD_MAX +}; +#define TCA_SKBMOD_MAX (__TCA_SKBMOD_MAX - 1) + +#endif diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 72e3426fa48f..7795d5a3f79a 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -749,6 +749,17 @@ config NET_ACT_CONNMARK To compile this code as a module, choose M here: the module will be called act_connmark. +config NET_ACT_SKBMOD + tristate "skb data modification action" + depends on NET_CLS_ACT + ---help--- + Say Y here to allow modification of skb data + + If unsure, say N. + + To compile this code as a module, choose M here: the + module will be called act_skbmod. + config NET_ACT_IFE tristate "Inter-FE action based on IETF ForCES InterFE LFB" depends on NET_CLS_ACT diff --git a/net/sched/Makefile b/net/sched/Makefile index b9d046b9535a..148ae0d5ac2c 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -19,6 +19,7 @@ obj-$(CONFIG_NET_ACT_CSUM) += act_csum.o obj-$(CONFIG_NET_ACT_VLAN) += act_vlan.o obj-$(CONFIG_NET_ACT_BPF) += act_bpf.o obj-$(CONFIG_NET_ACT_CONNMARK) += act_connmark.o +obj-$(CONFIG_NET_ACT_SKBMOD) += act_skbmod.o obj-$(CONFIG_NET_ACT_IFE) += act_ife.o obj-$(CONFIG_NET_IFE_SKBMARK) += act_meta_mark.o obj-$(CONFIG_NET_IFE_SKBPRIO) += act_meta_skbprio.o diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c new file mode 100644 index 000000000000..e7d96381c908 --- /dev/null +++ b/net/sched/act_skbmod.c @@ -0,0 +1,301 @@ +/* + * net/sched/act_skbmod.c skb data modifier + * + * Copyright (c) 2016 Jamal Hadi Salim + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. +*/ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define SKBMOD_TAB_MASK 15 + +static int skbmod_net_id; +static struct tc_action_ops act_skbmod_ops; + +#define MAX_EDIT_LEN ETH_HLEN +static int tcf_skbmod_run(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) +{ + struct tcf_skbmod *d = to_skbmod(a); + int action; + struct tcf_skbmod_params *p; + u64 flags; + int err; + + tcf_lastuse_update(&d->tcf_tm); + bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb); + + /* XXX: if you are going to edit more fields beyond ethernet header + * (example when you add IP header replacement or vlan swap) + * then MAX_EDIT_LEN needs to change appropriately + */ + err = skb_ensure_writable(skb, MAX_EDIT_LEN); + if (unlikely(err)) { /* best policy is to drop on the floor */ + qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats)); + return TC_ACT_SHOT; + } + + rcu_read_lock(); + action = READ_ONCE(d->tcf_action); + if (unlikely(action == TC_ACT_SHOT)) { + qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats)); + rcu_read_unlock(); + return action; + } + + p = rcu_dereference(d->skbmod_p); + flags = p->flags; + if (flags & SKBMOD_F_DMAC) + ether_addr_copy(eth_hdr(skb)->h_dest, p->eth_dst); + if (flags & SKBMOD_F_SMAC) + ether_addr_copy(eth_hdr(skb)->h_source, p->eth_src); + if (flags & SKBMOD_F_ETYPE) + eth_hdr(skb)->h_proto = p->eth_type; + rcu_read_unlock(); + + if (flags & SKBMOD_F_SWAPMAC) { + u16 tmpaddr[ETH_ALEN / 2]; /* ether_addr_copy() requirement */ + /*XXX: I am sure we can come up with more efficient swapping*/ + ether_addr_copy((u8 *)tmpaddr, eth_hdr(skb)->h_dest); + ether_addr_copy(eth_hdr(skb)->h_dest, eth_hdr(skb)->h_source); + ether_addr_copy(eth_hdr(skb)->h_source, (u8 *)tmpaddr); + } + + return action; +} + +static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = { + [TCA_SKBMOD_PARMS] = { .len = sizeof(struct tc_skbmod) }, + [TCA_SKBMOD_DMAC] = { .len = ETH_ALEN }, + [TCA_SKBMOD_SMAC] = { .len = ETH_ALEN }, + [TCA_SKBMOD_ETYPE] = { .type = NLA_U16 }, +}; + +static int tcf_skbmod_init(struct net *net, struct nlattr *nla, + struct nlattr *est, struct tc_action **a, + int ovr, int bind) +{ + struct tc_action_net *tn = net_generic(net, skbmod_net_id); + struct nlattr *tb[TCA_SKBMOD_MAX + 1]; + struct tcf_skbmod_params *p, *p_old; + struct tc_skbmod *parm; + struct tcf_skbmod *d; + bool exists = false; + u8 *daddr = NULL; + u8 *saddr = NULL; + u16 eth_type = 0; + u32 lflags = 0; + int ret = 0, err; + + if (!nla) + return -EINVAL; + + err = nla_parse_nested(tb, TCA_SKBMOD_MAX, nla, skbmod_policy); + if (err < 0) + return err; + + if (!tb[TCA_SKBMOD_PARMS]) + return -EINVAL; + + if (tb[TCA_SKBMOD_DMAC]) { + daddr = nla_data(tb[TCA_SKBMOD_DMAC]); + lflags |= SKBMOD_F_DMAC; + } + + if (tb[TCA_SKBMOD_SMAC]) { + saddr = nla_data(tb[TCA_SKBMOD_SMAC]); + lflags |= SKBMOD_F_SMAC; + } + + if (tb[TCA_SKBMOD_ETYPE]) { + eth_type = nla_get_u16(tb[TCA_SKBMOD_ETYPE]); + lflags |= SKBMOD_F_ETYPE; + } + + parm = nla_data(tb[TCA_SKBMOD_PARMS]); + if (parm->flags & SKBMOD_F_SWAPMAC) + lflags = SKBMOD_F_SWAPMAC; + + exists = tcf_hash_check(tn, parm->index, a, bind); + if (exists && bind) + return 0; + + if (!lflags) + return -EINVAL; + + if (!exists) { + ret = tcf_hash_create(tn, parm->index, est, a, + &act_skbmod_ops, bind, true); + if (ret) + return ret; + + ret = ACT_P_CREATED; + } else { + tcf_hash_release(*a, bind); + if (!ovr) + return -EEXIST; + } + + d = to_skbmod(*a); + + ASSERT_RTNL(); + p = kzalloc(sizeof(struct tcf_skbmod_params), GFP_KERNEL); + if (unlikely(!p)) { + if (ovr) + tcf_hash_release(*a, bind); + return -ENOMEM; + } + + p->flags = lflags; + d->tcf_action = parm->action; + + p_old = rtnl_dereference(d->skbmod_p); + + if (ovr) + spin_lock_bh(&d->tcf_lock); + + if (lflags & SKBMOD_F_DMAC) + ether_addr_copy(p->eth_dst, daddr); + if (lflags & SKBMOD_F_SMAC) + ether_addr_copy(p->eth_src, saddr); + if (lflags & SKBMOD_F_ETYPE) + p->eth_type = htons(eth_type); + + rcu_assign_pointer(d->skbmod_p, p); + if (ovr) + spin_unlock_bh(&d->tcf_lock); + + if (p_old) + kfree_rcu(p_old, rcu); + + if (ret == ACT_P_CREATED) + tcf_hash_insert(tn, *a); + return ret; +} + +static void tcf_skbmod_cleanup(struct tc_action *a, int bind) +{ + struct tcf_skbmod *d = to_skbmod(a); + struct tcf_skbmod_params *p; + + p = rcu_dereference_protected(d->skbmod_p, 1); + kfree_rcu(p, rcu); +} + +static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a, + int bind, int ref) +{ + struct tcf_skbmod *d = to_skbmod(a); + unsigned char *b = skb_tail_pointer(skb); + struct tcf_skbmod_params *p = rtnl_dereference(d->skbmod_p); + struct tc_skbmod opt = { + .index = d->tcf_index, + .refcnt = d->tcf_refcnt - ref, + .bindcnt = d->tcf_bindcnt - bind, + .action = d->tcf_action, + }; + struct tcf_t t; + + opt.flags = p->flags; + if (nla_put(skb, TCA_SKBMOD_PARMS, sizeof(opt), &opt)) + goto nla_put_failure; + if ((p->flags & SKBMOD_F_DMAC) && + nla_put(skb, TCA_SKBMOD_DMAC, ETH_ALEN, p->eth_dst)) + goto nla_put_failure; + if ((p->flags & SKBMOD_F_SMAC) && + nla_put(skb, TCA_SKBMOD_SMAC, ETH_ALEN, p->eth_src)) + goto nla_put_failure; + if ((p->flags & SKBMOD_F_ETYPE) && + nla_put_u16(skb, TCA_SKBMOD_ETYPE, ntohs(p->eth_type))) + goto nla_put_failure; + + tcf_tm_dump(&t, &d->tcf_tm); + if (nla_put_64bit(skb, TCA_SKBMOD_TM, sizeof(t), &t, TCA_SKBMOD_PAD)) + goto nla_put_failure; + + return skb->len; +nla_put_failure: + rcu_read_unlock(); + nlmsg_trim(skb, b); + return -1; +} + +static int tcf_skbmod_walker(struct net *net, struct sk_buff *skb, + struct netlink_callback *cb, int type, + const struct tc_action_ops *ops) +{ + struct tc_action_net *tn = net_generic(net, skbmod_net_id); + + return tcf_generic_walker(tn, skb, cb, type, ops); +} + +static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index) +{ + struct tc_action_net *tn = net_generic(net, skbmod_net_id); + + return tcf_hash_search(tn, a, index); +} + +static struct tc_action_ops act_skbmod_ops = { + .kind = "skbmod", + .type = TCA_ACT_SKBMOD, + .owner = THIS_MODULE, + .act = tcf_skbmod_run, + .dump = tcf_skbmod_dump, + .init = tcf_skbmod_init, + .cleanup = tcf_skbmod_cleanup, + .walk = tcf_skbmod_walker, + .lookup = tcf_skbmod_search, + .size = sizeof(struct tcf_skbmod), +}; + +static __net_init int skbmod_init_net(struct net *net) +{ + struct tc_action_net *tn = net_generic(net, skbmod_net_id); + + return tc_action_net_init(tn, &act_skbmod_ops, SKBMOD_TAB_MASK); +} + +static void __net_exit skbmod_exit_net(struct net *net) +{ + struct tc_action_net *tn = net_generic(net, skbmod_net_id); + + tc_action_net_exit(tn); +} + +static struct pernet_operations skbmod_net_ops = { + .init = skbmod_init_net, + .exit = skbmod_exit_net, + .id = &skbmod_net_id, + .size = sizeof(struct tc_action_net), +}; + +MODULE_AUTHOR("Jamal Hadi Salim, "); +MODULE_DESCRIPTION("SKB data mod-ing"); +MODULE_LICENSE("GPL"); + +static int __init skbmod_init_module(void) +{ + return tcf_register_action(&act_skbmod_ops, &skbmod_net_ops); +} + +static void __exit skbmod_cleanup_module(void) +{ + tcf_unregister_action(&act_skbmod_ops, &skbmod_net_ops); +} + +module_init(skbmod_init_module); +module_exit(skbmod_cleanup_module); From 5400e54add3618759ef607a97d711e356eef6e2a Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Tue, 13 Sep 2016 13:39:24 +0530 Subject: [PATCH 1036/1715] cxgb4vf: don't offload Rx checksums for IPv6 fragments The checksum provided by the device doesn't include the L3 headers, as IPv6 expects Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4vf/sge.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c index c8fd4f8fe1fa..f3ed9ce99e5e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c @@ -1648,14 +1648,15 @@ int t4vf_ethrx_handler(struct sge_rspq *rspq, const __be64 *rsp, if (csum_ok && !pkt->err_vec && (be32_to_cpu(pkt->l2info) & (RXF_UDP_F | RXF_TCP_F))) { - if (!pkt->ip_frag) + if (!pkt->ip_frag) { skb->ip_summed = CHECKSUM_UNNECESSARY; - else { + rxq->stats.rx_cso++; + } else if (pkt->l2info & htonl(RXF_IP_F)) { __sum16 c = (__force __sum16)pkt->csum; skb->csum = csum_unfold(c); skb->ip_summed = CHECKSUM_COMPLETE; + rxq->stats.rx_cso++; } - rxq->stats.rx_cso++; } else skb_checksum_none_assert(skb); From 0ca4e20ba3de1a1077ee36df22176c2055497923 Mon Sep 17 00:00:00 2001 From: Tobias Regnery Date: Tue, 13 Sep 2016 12:06:57 +0200 Subject: [PATCH 1037/1715] alx: fix error handling in __alx_open In commit 9ee7b683ea63 we moved the enablement of msi interrupts earlier in alx_init_intr. If there is an error in alx_alloc_rings, __alx_open returns with an error but msi (or msi-x) interrupts stays enabled. Add a new error label to disable msi (or msi-x) interrupts. Fixes: 9ee7b683ea63 ("alx: refactor msi enablement and disablement") Signed-off-by: Tobias Regnery Signed-off-by: David S. Miller --- drivers/net/ethernet/atheros/alx/main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index 9887cee434dd..c0f84b73574d 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -1028,7 +1028,7 @@ static int __alx_open(struct alx_priv *alx, bool resume) err = alx_alloc_rings(alx); if (err) - return err; + goto out_disable_adv_intr; alx_configure(alx); @@ -1049,6 +1049,8 @@ static int __alx_open(struct alx_priv *alx, bool resume) out_free_rings: alx_free_rings(alx); +out_disable_adv_intr: + alx_disable_advanced_intr(alx); return err; } From aa72d708373dacfa690960b336543b867784b350 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 15 Sep 2016 15:28:22 +0300 Subject: [PATCH 1038/1715] net/sched: cls_flower: Support masking for matching on tcp/udp ports Add the definitions for src/dst udp/tcp port masks and use them when setting && dumping the relevant keys. Signed-off-by: Or Gerlitz Signed-off-by: Paul Blakey Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 4 ++++ net/sched/cls_flower.c | 20 ++++++++++++-------- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index f9c287c67eae..60ea2a084880 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -442,6 +442,10 @@ enum { TCA_FLOWER_KEY_ENC_IPV6_DST, /* struct in6_addr */ TCA_FLOWER_KEY_ENC_IPV6_DST_MASK,/* struct in6_addr */ + TCA_FLOWER_KEY_TCP_SRC_MASK, /* be16 */ + TCA_FLOWER_KEY_TCP_DST_MASK, /* be16 */ + TCA_FLOWER_KEY_UDP_SRC_MASK, /* be16 */ + TCA_FLOWER_KEY_UDP_DST_MASK, /* be16 */ __TCA_FLOWER_MAX, }; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index b084b2aab2d7..027523c82797 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -335,6 +335,10 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK] = { .len = sizeof(struct in6_addr) }, [TCA_FLOWER_KEY_ENC_IPV6_DST] = { .len = sizeof(struct in6_addr) }, [TCA_FLOWER_KEY_ENC_IPV6_DST_MASK] = { .len = sizeof(struct in6_addr) }, + [TCA_FLOWER_KEY_TCP_SRC_MASK] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_TCP_DST_MASK] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_UDP_SRC_MASK] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_UDP_DST_MASK] = { .type = NLA_U16 }, }; static void fl_set_key_val(struct nlattr **tb, @@ -432,17 +436,17 @@ static int fl_set_key(struct net *net, struct nlattr **tb, if (key->basic.ip_proto == IPPROTO_TCP) { fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC, - &mask->tp.src, TCA_FLOWER_UNSPEC, + &mask->tp.src, TCA_FLOWER_KEY_TCP_SRC_MASK, sizeof(key->tp.src)); fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST, - &mask->tp.dst, TCA_FLOWER_UNSPEC, + &mask->tp.dst, TCA_FLOWER_KEY_TCP_DST_MASK, sizeof(key->tp.dst)); } else if (key->basic.ip_proto == IPPROTO_UDP) { fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC, - &mask->tp.src, TCA_FLOWER_UNSPEC, + &mask->tp.src, TCA_FLOWER_KEY_UDP_SRC_MASK, sizeof(key->tp.src)); fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST, - &mask->tp.dst, TCA_FLOWER_UNSPEC, + &mask->tp.dst, TCA_FLOWER_KEY_UDP_DST_MASK, sizeof(key->tp.dst)); } @@ -877,18 +881,18 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, if (key->basic.ip_proto == IPPROTO_TCP && (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC, - &mask->tp.src, TCA_FLOWER_UNSPEC, + &mask->tp.src, TCA_FLOWER_KEY_TCP_SRC_MASK, sizeof(key->tp.src)) || fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST, - &mask->tp.dst, TCA_FLOWER_UNSPEC, + &mask->tp.dst, TCA_FLOWER_KEY_TCP_DST_MASK, sizeof(key->tp.dst)))) goto nla_put_failure; else if (key->basic.ip_proto == IPPROTO_UDP && (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC, - &mask->tp.src, TCA_FLOWER_UNSPEC, + &mask->tp.src, TCA_FLOWER_KEY_UDP_SRC_MASK, sizeof(key->tp.src)) || fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST, - &mask->tp.dst, TCA_FLOWER_UNSPEC, + &mask->tp.dst, TCA_FLOWER_KEY_UDP_DST_MASK, sizeof(key->tp.dst)))) goto nla_put_failure; From a53d850a79c39b97a2d954d0db5c481c44e8dc7c Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 15 Sep 2016 15:28:23 +0300 Subject: [PATCH 1039/1715] net/sched: cls_flower: Remove an unused field from the filter key structure Commit c3f8324188fa "net: Add full IPv6 addresses to flow_keys" added an unused instance of struct flow_dissector_key_addrs into struct fl_flow_key, remove it. Signed-off-by: Or Gerlitz Reported-by: Hadar Hen Zion Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_flower.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 027523c82797..a3f4c706dfaa 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -33,7 +33,6 @@ struct fl_flow_key { struct flow_dissector_key_basic basic; struct flow_dissector_key_eth_addrs eth; struct flow_dissector_key_vlan vlan; - struct flow_dissector_key_addrs ipaddrs; union { struct flow_dissector_key_ipv4_addrs ipv4; struct flow_dissector_key_ipv6_addrs ipv6; From 37a6c1512314d2439ef7136d773d5a470e0996b9 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 15 Sep 2016 15:28:24 +0300 Subject: [PATCH 1040/1715] net/sched: cls_flower: Specify vlan attributes format in the UAPI header Specify the format (size and endianess) for the vlan attributes. Signed-off-by: Or Gerlitz Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 60ea2a084880..8915b61bbf83 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -428,9 +428,9 @@ enum { TCA_FLOWER_KEY_UDP_DST, /* be16 */ TCA_FLOWER_FLAGS, - TCA_FLOWER_KEY_VLAN_ID, - TCA_FLOWER_KEY_VLAN_PRIO, - TCA_FLOWER_KEY_VLAN_ETH_TYPE, + TCA_FLOWER_KEY_VLAN_ID, /* be16 */ + TCA_FLOWER_KEY_VLAN_PRIO, /* u8 */ + TCA_FLOWER_KEY_VLAN_ETH_TYPE, /* be16 */ TCA_FLOWER_KEY_ENC_KEY_ID, /* be32 */ TCA_FLOWER_KEY_ENC_IPV4_SRC, /* be32 */ From 2679d040412df847d390a3a8f0f224a7c91f7fae Mon Sep 17 00:00:00 2001 From: Lance Richardson Date: Tue, 13 Sep 2016 10:08:54 -0400 Subject: [PATCH 1041/1715] openvswitch: avoid deferred execution of recirc actions The ovs kernel data path currently defers the execution of all recirc actions until stack utilization is at a minimum. This is too limiting for some packet forwarding scenarios due to the small size of the deferred action FIFO (10 entries). For example, broadcast traffic sent out more than 10 ports with recirculation results in packet drops when the deferred action FIFO becomes full, as reported here: http://openvswitch.org/pipermail/dev/2016-March/067672.html Since the current recursion depth is available (it is already tracked by the exec_actions_level pcpu variable), we can use it to determine whether to execute recirculation actions immediately (safe when recursion depth is low) or defer execution until more stack space is available. With this change, the deferred action fifo size becomes a non-issue for currently failing scenarios because it is no longer used when there are three or fewer recursions through ovs_execute_actions(). Suggested-by: Pravin Shelar Signed-off-by: Lance Richardson Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/actions.c | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 4fe9032b1160..863e992dfbc0 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -71,6 +71,8 @@ struct ovs_frag_data { static DEFINE_PER_CPU(struct ovs_frag_data, ovs_frag_data_storage); #define DEFERRED_ACTION_FIFO_SIZE 10 +#define OVS_RECURSION_LIMIT 5 +#define OVS_DEFERRED_ACTION_THRESHOLD (OVS_RECURSION_LIMIT - 2) struct action_fifo { int head; int tail; @@ -78,7 +80,12 @@ struct action_fifo { struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE]; }; +struct recirc_keys { + struct sw_flow_key key[OVS_DEFERRED_ACTION_THRESHOLD]; +}; + static struct action_fifo __percpu *action_fifos; +static struct recirc_keys __percpu *recirc_keys; static DEFINE_PER_CPU(int, exec_actions_level); static void action_fifo_init(struct action_fifo *fifo) @@ -1020,6 +1027,7 @@ static int execute_recirc(struct datapath *dp, struct sk_buff *skb, const struct nlattr *a, int rem) { struct deferred_action *da; + int level; if (!is_flow_key_valid(key)) { int err; @@ -1043,6 +1051,18 @@ static int execute_recirc(struct datapath *dp, struct sk_buff *skb, return 0; } + level = this_cpu_read(exec_actions_level); + if (level <= OVS_DEFERRED_ACTION_THRESHOLD) { + struct recirc_keys *rks = this_cpu_ptr(recirc_keys); + struct sw_flow_key *recirc_key = &rks->key[level - 1]; + + *recirc_key = *key; + recirc_key->recirc_id = nla_get_u32(a); + ovs_dp_process_packet(skb, recirc_key); + + return 0; + } + da = add_deferred_actions(skb, key, NULL); if (da) { da->pkt_key.recirc_id = nla_get_u32(a); @@ -1209,11 +1229,10 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, const struct sw_flow_actions *acts, struct sw_flow_key *key) { - static const int ovs_recursion_limit = 5; int err, level; level = __this_cpu_inc_return(exec_actions_level); - if (unlikely(level > ovs_recursion_limit)) { + if (unlikely(level > OVS_RECURSION_LIMIT)) { net_crit_ratelimited("ovs: recursion limit reached on datapath %s, probable configuration error\n", ovs_dp_name(dp)); kfree_skb(skb); @@ -1238,10 +1257,17 @@ int action_fifos_init(void) if (!action_fifos) return -ENOMEM; + recirc_keys = alloc_percpu(struct recirc_keys); + if (!recirc_keys) { + free_percpu(action_fifos); + return -ENOMEM; + } + return 0; } void action_fifos_exit(void) { free_percpu(action_fifos); + free_percpu(recirc_keys); } From 85e42b044e8dda0acb60c645d089f4a19e0136d3 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Tue, 13 Sep 2016 21:23:56 +0530 Subject: [PATCH 1042/1715] libcxgb,iw_cxgb4,cxgbit: add cxgb_get_4tuple() Add cxgb_get_4tuple() in libcxgb_cm.c to remove it's duplicate definitions from cxgb4/cm.c and cxgbit/cxgbit_cm.c. Signed-off-by: Varun Prakash Signed-off-by: David S. Miller --- drivers/infiniband/hw/cxgb4/Kconfig | 1 + drivers/infiniband/hw/cxgb4/Makefile | 1 + drivers/infiniband/hw/cxgb4/cm.c | 41 +---------- drivers/net/ethernet/chelsio/libcxgb/Makefile | 4 +- .../net/ethernet/chelsio/libcxgb/libcxgb_cm.c | 72 +++++++++++++++++++ .../net/ethernet/chelsio/libcxgb/libcxgb_cm.h | 42 +++++++++++ drivers/target/iscsi/cxgbit/cxgbit_cm.c | 41 +---------- 7 files changed, 125 insertions(+), 77 deletions(-) create mode 100644 drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c create mode 100644 drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h diff --git a/drivers/infiniband/hw/cxgb4/Kconfig b/drivers/infiniband/hw/cxgb4/Kconfig index 23f38cf2c5cd..afe8b28e0878 100644 --- a/drivers/infiniband/hw/cxgb4/Kconfig +++ b/drivers/infiniband/hw/cxgb4/Kconfig @@ -1,6 +1,7 @@ config INFINIBAND_CXGB4 tristate "Chelsio T4/T5 RDMA Driver" depends on CHELSIO_T4 && INET && (IPV6 || IPV6=n) + select CHELSIO_LIB select GENERIC_ALLOCATOR ---help--- This is an iWARP/RDMA driver for the Chelsio T4 and T5 diff --git a/drivers/infiniband/hw/cxgb4/Makefile b/drivers/infiniband/hw/cxgb4/Makefile index e11cf7299945..fa40b685831b 100644 --- a/drivers/infiniband/hw/cxgb4/Makefile +++ b/drivers/infiniband/hw/cxgb4/Makefile @@ -1,4 +1,5 @@ ccflags-y := -Idrivers/net/ethernet/chelsio/cxgb4 +ccflags-y += -Idrivers/net/ethernet/chelsio/libcxgb obj-$(CONFIG_INFINIBAND_CXGB4) += iw_cxgb4.o diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index b6a953aed7e8..e591f61c5601 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -49,6 +49,7 @@ #include +#include #include "iw_cxgb4.h" #include "clip_tbl.h" @@ -2518,42 +2519,6 @@ static void reject_cr(struct c4iw_dev *dev, u32 hwtid, struct sk_buff *skb) return; } -static void get_4tuple(struct cpl_pass_accept_req *req, enum chip_type type, - int *iptype, __u8 *local_ip, __u8 *peer_ip, - __be16 *local_port, __be16 *peer_port) -{ - int eth_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ? - ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len)) : - T6_ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len)); - int ip_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ? - IP_HDR_LEN_G(be32_to_cpu(req->hdr_len)) : - T6_IP_HDR_LEN_G(be32_to_cpu(req->hdr_len)); - struct iphdr *ip = (struct iphdr *)((u8 *)(req + 1) + eth_len); - struct ipv6hdr *ip6 = (struct ipv6hdr *)((u8 *)(req + 1) + eth_len); - struct tcphdr *tcp = (struct tcphdr *) - ((u8 *)(req + 1) + eth_len + ip_len); - - if (ip->version == 4) { - PDBG("%s saddr 0x%x daddr 0x%x sport %u dport %u\n", __func__, - ntohl(ip->saddr), ntohl(ip->daddr), ntohs(tcp->source), - ntohs(tcp->dest)); - *iptype = 4; - memcpy(peer_ip, &ip->saddr, 4); - memcpy(local_ip, &ip->daddr, 4); - } else { - PDBG("%s saddr %pI6 daddr %pI6 sport %u dport %u\n", __func__, - ip6->saddr.s6_addr, ip6->daddr.s6_addr, ntohs(tcp->source), - ntohs(tcp->dest)); - *iptype = 6; - memcpy(peer_ip, ip6->saddr.s6_addr, 16); - memcpy(local_ip, ip6->daddr.s6_addr, 16); - } - *peer_port = tcp->source; - *local_port = tcp->dest; - - return; -} - static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) { struct c4iw_ep *child_ep = NULL, *parent_ep; @@ -2582,8 +2547,8 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) goto reject; } - get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type, &iptype, - local_ip, peer_ip, &local_port, &peer_port); + cxgb_get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type, + &iptype, local_ip, peer_ip, &local_port, &peer_port); /* Find output route */ if (iptype == 4) { diff --git a/drivers/net/ethernet/chelsio/libcxgb/Makefile b/drivers/net/ethernet/chelsio/libcxgb/Makefile index 2362230ef4fe..2534e30a1560 100644 --- a/drivers/net/ethernet/chelsio/libcxgb/Makefile +++ b/drivers/net/ethernet/chelsio/libcxgb/Makefile @@ -1,3 +1,5 @@ +ccflags-y := -Idrivers/net/ethernet/chelsio/cxgb4 + obj-$(CONFIG_CHELSIO_LIB) += libcxgb.o -libcxgb-y := libcxgb_ppm.o +libcxgb-y := libcxgb_ppm.o libcxgb_cm.o diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c new file mode 100644 index 000000000000..d7342bb7205d --- /dev/null +++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#include "libcxgb_cm.h" + +void +cxgb_get_4tuple(struct cpl_pass_accept_req *req, enum chip_type type, + int *iptype, __u8 *local_ip, __u8 *peer_ip, + __be16 *local_port, __be16 *peer_port) +{ + int eth_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ? + ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len)) : + T6_ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len)); + int ip_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ? + IP_HDR_LEN_G(be32_to_cpu(req->hdr_len)) : + T6_IP_HDR_LEN_G(be32_to_cpu(req->hdr_len)); + struct iphdr *ip = (struct iphdr *)((u8 *)(req + 1) + eth_len); + struct ipv6hdr *ip6 = (struct ipv6hdr *)((u8 *)(req + 1) + eth_len); + struct tcphdr *tcp = (struct tcphdr *) + ((u8 *)(req + 1) + eth_len + ip_len); + + if (ip->version == 4) { + pr_debug("%s saddr 0x%x daddr 0x%x sport %u dport %u\n", + __func__, ntohl(ip->saddr), ntohl(ip->daddr), + ntohs(tcp->source), ntohs(tcp->dest)); + *iptype = 4; + memcpy(peer_ip, &ip->saddr, 4); + memcpy(local_ip, &ip->daddr, 4); + } else { + pr_debug("%s saddr %pI6 daddr %pI6 sport %u dport %u\n", + __func__, ip6->saddr.s6_addr, ip6->daddr.s6_addr, + ntohs(tcp->source), ntohs(tcp->dest)); + *iptype = 6; + memcpy(peer_ip, ip6->saddr.s6_addr, 16); + memcpy(local_ip, ip6->daddr.s6_addr, 16); + } + *peer_port = tcp->source; + *local_port = tcp->dest; +} +EXPORT_SYMBOL(cxgb_get_4tuple); diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h new file mode 100644 index 000000000000..2ab8d9bef2bb --- /dev/null +++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __LIBCXGB_CM_H__ +#define __LIBCXGB_CM_H__ + +#include +#include + +void +cxgb_get_4tuple(struct cpl_pass_accept_req *, enum chip_type, + int *, __u8 *, __u8 *, __be16 *, __be16 *); +#endif diff --git a/drivers/target/iscsi/cxgbit/cxgbit_cm.c b/drivers/target/iscsi/cxgbit/cxgbit_cm.c index 0ae0b131abfc..8bb5a2562235 100644 --- a/drivers/target/iscsi/cxgbit/cxgbit_cm.c +++ b/drivers/target/iscsi/cxgbit/cxgbit_cm.c @@ -24,6 +24,7 @@ #include #include +#include #include "cxgbit.h" #include "clip_tbl.h" @@ -789,42 +790,6 @@ void _cxgbit_free_csk(struct kref *kref) kfree(csk); } -static void -cxgbit_get_tuple_info(struct cpl_pass_accept_req *req, int *iptype, - __u8 *local_ip, __u8 *peer_ip, __be16 *local_port, - __be16 *peer_port) -{ - u32 eth_len = ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len)); - u32 ip_len = IP_HDR_LEN_G(be32_to_cpu(req->hdr_len)); - struct iphdr *ip = (struct iphdr *)((u8 *)(req + 1) + eth_len); - struct ipv6hdr *ip6 = (struct ipv6hdr *)((u8 *)(req + 1) + eth_len); - struct tcphdr *tcp = (struct tcphdr *) - ((u8 *)(req + 1) + eth_len + ip_len); - - if (ip->version == 4) { - pr_debug("%s saddr 0x%x daddr 0x%x sport %u dport %u\n", - __func__, - ntohl(ip->saddr), ntohl(ip->daddr), - ntohs(tcp->source), - ntohs(tcp->dest)); - *iptype = 4; - memcpy(peer_ip, &ip->saddr, 4); - memcpy(local_ip, &ip->daddr, 4); - } else { - pr_debug("%s saddr %pI6 daddr %pI6 sport %u dport %u\n", - __func__, - ip6->saddr.s6_addr, ip6->daddr.s6_addr, - ntohs(tcp->source), - ntohs(tcp->dest)); - *iptype = 6; - memcpy(peer_ip, ip6->saddr.s6_addr, 16); - memcpy(local_ip, ip6->daddr.s6_addr, 16); - } - - *peer_port = tcp->source; - *local_port = tcp->dest; -} - static int cxgbit_our_interface(struct cxgbit_device *cdev, struct net_device *egress_dev) { @@ -1340,8 +1305,8 @@ cxgbit_pass_accept_req(struct cxgbit_device *cdev, struct sk_buff *skb) goto rel_skb; } - cxgbit_get_tuple_info(req, &iptype, local_ip, peer_ip, - &local_port, &peer_port); + cxgb_get_4tuple(req, cdev->lldi.adapter_type, &iptype, local_ip, + peer_ip, &local_port, &peer_port); /* Find output route */ if (iptype == 4) { From 804c2f3e36ef60e6f50e6101ae06b02fbaa14b9a Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Tue, 13 Sep 2016 21:23:57 +0530 Subject: [PATCH 1043/1715] libcxgb,iw_cxgb4,cxgbit: add cxgb_find_route() Add cxgb_find_route() in libcxgb_cm.c to remove it's duplicate definitions from cxgb4/cm.c and cxgbit/cxgbit_cm.c. Signed-off-by: Varun Prakash Signed-off-by: David S. Miller --- drivers/infiniband/hw/cxgb4/cm.c | 53 ++++++------------- .../net/ethernet/chelsio/libcxgb/libcxgb_cm.c | 44 +++++++++++++++ .../net/ethernet/chelsio/libcxgb/libcxgb_cm.h | 4 ++ drivers/target/iscsi/cxgbit/cxgbit_cm.c | 36 ++----------- 4 files changed, 69 insertions(+), 68 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index e591f61c5601..02f5e20cd3a3 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -505,32 +505,6 @@ out: return dst; } -static struct dst_entry *find_route(struct c4iw_dev *dev, __be32 local_ip, - __be32 peer_ip, __be16 local_port, - __be16 peer_port, u8 tos) -{ - struct rtable *rt; - struct flowi4 fl4; - struct neighbour *n; - - rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip, - peer_port, local_port, IPPROTO_TCP, - tos, 0); - if (IS_ERR(rt)) - return NULL; - n = dst_neigh_lookup(&rt->dst, &peer_ip); - if (!n) - return NULL; - if (!our_interface(dev, n->dev) && - !(n->dev->flags & IFF_LOOPBACK)) { - neigh_release(n); - dst_release(&rt->dst); - return NULL; - } - neigh_release(n); - return &rt->dst; -} - static void arp_failure_discard(void *handle, struct sk_buff *skb) { pr_err(MOD "ARP failure\n"); @@ -2215,9 +2189,11 @@ static int c4iw_reconnect(struct c4iw_ep *ep) /* find a route */ if (ep->com.cm_id->m_local_addr.ss_family == AF_INET) { - ep->dst = find_route(ep->com.dev, laddr->sin_addr.s_addr, - raddr->sin_addr.s_addr, laddr->sin_port, - raddr->sin_port, ep->com.cm_id->tos); + ep->dst = cxgb_find_route(&ep->com.dev->rdev.lldi, get_real_dev, + laddr->sin_addr.s_addr, + raddr->sin_addr.s_addr, + laddr->sin_port, + raddr->sin_port, ep->com.cm_id->tos); iptype = 4; ra = (__u8 *)&raddr->sin_addr; } else { @@ -2556,9 +2532,9 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) , __func__, parent_ep, hwtid, local_ip, peer_ip, ntohs(local_port), ntohs(peer_port), peer_mss); - dst = find_route(dev, *(__be32 *)local_ip, *(__be32 *)peer_ip, - local_port, peer_port, - tos); + dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev, + *(__be32 *)local_ip, *(__be32 *)peer_ip, + local_port, peer_port, tos); } else { PDBG("%s parent ep %p hwtid %u laddr %pI6 raddr %pI6 lport %d rport %d peer_mss %d\n" , __func__, parent_ep, hwtid, @@ -3340,9 +3316,11 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) PDBG("%s saddr %pI4 sport 0x%x raddr %pI4 rport 0x%x\n", __func__, &laddr->sin_addr, ntohs(laddr->sin_port), ra, ntohs(raddr->sin_port)); - ep->dst = find_route(dev, laddr->sin_addr.s_addr, - raddr->sin_addr.s_addr, laddr->sin_port, - raddr->sin_port, cm_id->tos); + ep->dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev, + laddr->sin_addr.s_addr, + raddr->sin_addr.s_addr, + laddr->sin_port, + raddr->sin_port, cm_id->tos); } else { iptype = 6; ra = (__u8 *)&raddr6->sin6_addr; @@ -4006,8 +3984,9 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) ntohl(iph->daddr), ntohs(tcph->dest), ntohl(iph->saddr), ntohs(tcph->source), iph->tos); - dst = find_route(dev, iph->daddr, iph->saddr, tcph->dest, tcph->source, - iph->tos); + dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev, + iph->daddr, iph->saddr, tcph->dest, + tcph->source, iph->tos); if (!dst) { pr_err("%s - failed to find dst entry!\n", __func__); diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c index d7342bb7205d..a31841247954 100644 --- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c +++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c @@ -32,6 +32,7 @@ #include #include +#include #include "libcxgb_cm.h" @@ -70,3 +71,46 @@ cxgb_get_4tuple(struct cpl_pass_accept_req *req, enum chip_type type, *local_port = tcp->dest; } EXPORT_SYMBOL(cxgb_get_4tuple); + +static bool +cxgb_our_interface(struct cxgb4_lld_info *lldi, + struct net_device *(*get_real_dev)(struct net_device *), + struct net_device *egress_dev) +{ + int i; + + egress_dev = get_real_dev(egress_dev); + for (i = 0; i < lldi->nports; i++) + if (lldi->ports[i] == egress_dev) + return true; + return false; +} + +struct dst_entry * +cxgb_find_route(struct cxgb4_lld_info *lldi, + struct net_device *(*get_real_dev)(struct net_device *), + __be32 local_ip, __be32 peer_ip, __be16 local_port, + __be16 peer_port, u8 tos) +{ + struct rtable *rt; + struct flowi4 fl4; + struct neighbour *n; + + rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip, + peer_port, local_port, IPPROTO_TCP, + tos, 0); + if (IS_ERR(rt)) + return NULL; + n = dst_neigh_lookup(&rt->dst, &peer_ip); + if (!n) + return NULL; + if (!cxgb_our_interface(lldi, get_real_dev, n->dev) && + !(n->dev->flags & IFF_LOOPBACK)) { + neigh_release(n); + dst_release(&rt->dst); + return NULL; + } + neigh_release(n); + return &rt->dst; +} +EXPORT_SYMBOL(cxgb_find_route); diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h index 2ab8d9bef2bb..fe69161288e5 100644 --- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h +++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h @@ -39,4 +39,8 @@ void cxgb_get_4tuple(struct cpl_pass_accept_req *, enum chip_type, int *, __u8 *, __u8 *, __be16 *, __be16 *); +struct dst_entry * +cxgb_find_route(struct cxgb4_lld_info *, + struct net_device *(*)(struct net_device *), + __be32, __be32, __be16, __be16, u8); #endif diff --git a/drivers/target/iscsi/cxgbit/cxgbit_cm.c b/drivers/target/iscsi/cxgbit/cxgbit_cm.c index 8bb5a2562235..49b24b93ab12 100644 --- a/drivers/target/iscsi/cxgbit/cxgbit_cm.c +++ b/drivers/target/iscsi/cxgbit/cxgbit_cm.c @@ -830,33 +830,6 @@ out: return dst; } -static struct dst_entry * -cxgbit_find_route(struct cxgbit_device *cdev, __be32 local_ip, __be32 peer_ip, - __be16 local_port, __be16 peer_port, u8 tos) -{ - struct rtable *rt; - struct flowi4 fl4; - struct neighbour *n; - - rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, - local_ip, - peer_port, local_port, IPPROTO_TCP, - tos, 0); - if (IS_ERR(rt)) - return NULL; - n = dst_neigh_lookup(&rt->dst, &peer_ip); - if (!n) - return NULL; - if (!cxgbit_our_interface(cdev, n->dev) && - !(n->dev->flags & IFF_LOOPBACK)) { - neigh_release(n); - dst_release(&rt->dst); - return NULL; - } - neigh_release(n); - return &rt->dst; -} - static void cxgbit_set_tcp_window(struct cxgbit_sock *csk, struct port_info *pi) { unsigned int linkspeed; @@ -1315,10 +1288,11 @@ cxgbit_pass_accept_req(struct cxgbit_device *cdev, struct sk_buff *skb) , __func__, cnp, tid, local_ip, peer_ip, ntohs(local_port), ntohs(peer_port), peer_mss); - dst = cxgbit_find_route(cdev, *(__be32 *)local_ip, - *(__be32 *)peer_ip, - local_port, peer_port, - PASS_OPEN_TOS_G(ntohl(req->tos_stid))); + dst = cxgb_find_route(&cdev->lldi, cxgbit_get_real_dev, + *(__be32 *)local_ip, + *(__be32 *)peer_ip, + local_port, peer_port, + PASS_OPEN_TOS_G(ntohl(req->tos_stid))); } else { pr_debug("%s parent sock %p tid %u laddr %pI6 raddr %pI6 " "lport %d rport %d peer_mss %d\n" From 95554761d1db54f7c058cc0ed389282ce7361999 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Tue, 13 Sep 2016 21:23:58 +0530 Subject: [PATCH 1044/1715] libcxgb,iw_cxgb4,cxgbit: add cxgb_find_route6() Add cxgb_find_route6() in libcxgb_cm.c to remove it's duplicate definitions from cxgb4/cm.c and cxgbit/cxgbit_cm.c. Signed-off-by: Varun Prakash Signed-off-by: David S. Miller --- drivers/infiniband/hw/cxgb4/cm.c | 70 +++++-------------- .../net/ethernet/chelsio/libcxgb/libcxgb_cm.c | 33 +++++++++ .../net/ethernet/chelsio/libcxgb/libcxgb_cm.h | 4 ++ drivers/target/iscsi/cxgbit/cxgbit_cm.c | 51 ++------------ 4 files changed, 61 insertions(+), 97 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 02f5e20cd3a3..a08a74839c13 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -465,46 +465,6 @@ static struct net_device *get_real_dev(struct net_device *egress_dev) return rdma_vlan_dev_real_dev(egress_dev) ? : egress_dev; } -static int our_interface(struct c4iw_dev *dev, struct net_device *egress_dev) -{ - int i; - - egress_dev = get_real_dev(egress_dev); - for (i = 0; i < dev->rdev.lldi.nports; i++) - if (dev->rdev.lldi.ports[i] == egress_dev) - return 1; - return 0; -} - -static struct dst_entry *find_route6(struct c4iw_dev *dev, __u8 *local_ip, - __u8 *peer_ip, __be16 local_port, - __be16 peer_port, u8 tos, - __u32 sin6_scope_id) -{ - struct dst_entry *dst = NULL; - - if (IS_ENABLED(CONFIG_IPV6)) { - struct flowi6 fl6; - - memset(&fl6, 0, sizeof(fl6)); - memcpy(&fl6.daddr, peer_ip, 16); - memcpy(&fl6.saddr, local_ip, 16); - if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL) - fl6.flowi6_oif = sin6_scope_id; - dst = ip6_route_output(&init_net, NULL, &fl6); - if (!dst) - goto out; - if (!our_interface(dev, ip6_dst_idev(dst)->dev) && - !(ip6_dst_idev(dst)->dev->flags & IFF_LOOPBACK)) { - dst_release(dst); - dst = NULL; - } - } - -out: - return dst; -} - static void arp_failure_discard(void *handle, struct sk_buff *skb) { pr_err(MOD "ARP failure\n"); @@ -2197,10 +2157,13 @@ static int c4iw_reconnect(struct c4iw_ep *ep) iptype = 4; ra = (__u8 *)&raddr->sin_addr; } else { - ep->dst = find_route6(ep->com.dev, laddr6->sin6_addr.s6_addr, - raddr6->sin6_addr.s6_addr, - laddr6->sin6_port, raddr6->sin6_port, 0, - raddr6->sin6_scope_id); + ep->dst = cxgb_find_route6(&ep->com.dev->rdev.lldi, + get_real_dev, + laddr6->sin6_addr.s6_addr, + raddr6->sin6_addr.s6_addr, + laddr6->sin6_port, + raddr6->sin6_port, 0, + raddr6->sin6_scope_id); iptype = 6; ra = (__u8 *)&raddr6->sin6_addr; } @@ -2540,10 +2503,11 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) , __func__, parent_ep, hwtid, local_ip, peer_ip, ntohs(local_port), ntohs(peer_port), peer_mss); - dst = find_route6(dev, local_ip, peer_ip, local_port, peer_port, - PASS_OPEN_TOS_G(ntohl(req->tos_stid)), - ((struct sockaddr_in6 *) - &parent_ep->com.local_addr)->sin6_scope_id); + dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev, + local_ip, peer_ip, local_port, peer_port, + PASS_OPEN_TOS_G(ntohl(req->tos_stid)), + ((struct sockaddr_in6 *) + &parent_ep->com.local_addr)->sin6_scope_id); } if (!dst) { printk(KERN_ERR MOD "%s - failed to find dst entry!\n", @@ -3339,10 +3303,12 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) __func__, laddr6->sin6_addr.s6_addr, ntohs(laddr6->sin6_port), raddr6->sin6_addr.s6_addr, ntohs(raddr6->sin6_port)); - ep->dst = find_route6(dev, laddr6->sin6_addr.s6_addr, - raddr6->sin6_addr.s6_addr, - laddr6->sin6_port, raddr6->sin6_port, 0, - raddr6->sin6_scope_id); + ep->dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev, + laddr6->sin6_addr.s6_addr, + raddr6->sin6_addr.s6_addr, + laddr6->sin6_port, + raddr6->sin6_port, 0, + raddr6->sin6_scope_id); } if (!ep->dst) { printk(KERN_ERR MOD "%s - cannot find route.\n", __func__); diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c index a31841247954..0f0de5b63622 100644 --- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c +++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "libcxgb_cm.h" @@ -114,3 +115,35 @@ cxgb_find_route(struct cxgb4_lld_info *lldi, return &rt->dst; } EXPORT_SYMBOL(cxgb_find_route); + +struct dst_entry * +cxgb_find_route6(struct cxgb4_lld_info *lldi, + struct net_device *(*get_real_dev)(struct net_device *), + __u8 *local_ip, __u8 *peer_ip, __be16 local_port, + __be16 peer_port, u8 tos, __u32 sin6_scope_id) +{ + struct dst_entry *dst = NULL; + + if (IS_ENABLED(CONFIG_IPV6)) { + struct flowi6 fl6; + + memset(&fl6, 0, sizeof(fl6)); + memcpy(&fl6.daddr, peer_ip, 16); + memcpy(&fl6.saddr, local_ip, 16); + if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL) + fl6.flowi6_oif = sin6_scope_id; + dst = ip6_route_output(&init_net, NULL, &fl6); + if (!dst) + goto out; + if (!cxgb_our_interface(lldi, get_real_dev, + ip6_dst_idev(dst)->dev) && + !(ip6_dst_idev(dst)->dev->flags & IFF_LOOPBACK)) { + dst_release(dst); + dst = NULL; + } + } + +out: + return dst; +} +EXPORT_SYMBOL(cxgb_find_route6); diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h index fe69161288e5..c4df04a4a323 100644 --- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h +++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h @@ -43,4 +43,8 @@ struct dst_entry * cxgb_find_route(struct cxgb4_lld_info *, struct net_device *(*)(struct net_device *), __be32, __be32, __be16, __be16, u8); +struct dst_entry * +cxgb_find_route6(struct cxgb4_lld_info *, + struct net_device *(*)(struct net_device *), + __u8 *, __u8 *, __be16, __be16, u8, __u32); #endif diff --git a/drivers/target/iscsi/cxgbit/cxgbit_cm.c b/drivers/target/iscsi/cxgbit/cxgbit_cm.c index 49b24b93ab12..e961ac4bf5db 100644 --- a/drivers/target/iscsi/cxgbit/cxgbit_cm.c +++ b/drivers/target/iscsi/cxgbit/cxgbit_cm.c @@ -790,46 +790,6 @@ void _cxgbit_free_csk(struct kref *kref) kfree(csk); } -static int -cxgbit_our_interface(struct cxgbit_device *cdev, struct net_device *egress_dev) -{ - u8 i; - - egress_dev = cxgbit_get_real_dev(egress_dev); - for (i = 0; i < cdev->lldi.nports; i++) - if (cdev->lldi.ports[i] == egress_dev) - return 1; - return 0; -} - -static struct dst_entry * -cxgbit_find_route6(struct cxgbit_device *cdev, __u8 *local_ip, __u8 *peer_ip, - __be16 local_port, __be16 peer_port, u8 tos, - __u32 sin6_scope_id) -{ - struct dst_entry *dst = NULL; - - if (IS_ENABLED(CONFIG_IPV6)) { - struct flowi6 fl6; - - memset(&fl6, 0, sizeof(fl6)); - memcpy(&fl6.daddr, peer_ip, 16); - memcpy(&fl6.saddr, local_ip, 16); - if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL) - fl6.flowi6_oif = sin6_scope_id; - dst = ip6_route_output(&init_net, NULL, &fl6); - if (!dst) - goto out; - if (!cxgbit_our_interface(cdev, ip6_dst_idev(dst)->dev) && - !(ip6_dst_idev(dst)->dev->flags & IFF_LOOPBACK)) { - dst_release(dst); - dst = NULL; - } - } -out: - return dst; -} - static void cxgbit_set_tcp_window(struct cxgbit_sock *csk, struct port_info *pi) { unsigned int linkspeed; @@ -1299,11 +1259,12 @@ cxgbit_pass_accept_req(struct cxgbit_device *cdev, struct sk_buff *skb) , __func__, cnp, tid, local_ip, peer_ip, ntohs(local_port), ntohs(peer_port), peer_mss); - dst = cxgbit_find_route6(cdev, local_ip, peer_ip, - local_port, peer_port, - PASS_OPEN_TOS_G(ntohl(req->tos_stid)), - ((struct sockaddr_in6 *) - &cnp->com.local_addr)->sin6_scope_id); + dst = cxgb_find_route6(&cdev->lldi, cxgbit_get_real_dev, + local_ip, peer_ip, + local_port, peer_port, + PASS_OPEN_TOS_G(ntohl(req->tos_stid)), + ((struct sockaddr_in6 *) + &cnp->com.local_addr)->sin6_scope_id); } if (!dst) { pr_err("%s - failed to find dst entry!\n", From b65eef0a5b1a635f9b056cf74d3ef778f3794a75 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Tue, 13 Sep 2016 21:23:59 +0530 Subject: [PATCH 1045/1715] libcxgb,iw_cxgb4,cxgbit: add cxgb_is_neg_adv() Add cxgb_is_neg_adv() in libcxgb_cm.h to remove it's duplicate definitions from cxgb4/cm.c and cxgbit/cxgbit_cm.c. Signed-off-by: Varun Prakash Signed-off-by: David S. Miller --- drivers/infiniband/hw/cxgb4/cm.c | 15 +++------------ drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h | 9 +++++++++ drivers/target/iscsi/cxgbit/cxgbit_cm.c | 11 +---------- 3 files changed, 13 insertions(+), 22 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index a08a74839c13..b35fdc0e0004 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1987,15 +1987,6 @@ static inline int act_open_has_tid(int status) status != CPL_ERR_CONN_EXIST); } -/* Returns whether a CPL status conveys negative advice. - */ -static int is_neg_adv(unsigned int status) -{ - return status == CPL_ERR_RTX_NEG_ADVICE || - status == CPL_ERR_PERSIST_NEG_ADVICE || - status == CPL_ERR_KEEPALV_NEG_ADVICE; -} - static char *neg_adv_str(unsigned int status) { switch (status) { @@ -2235,7 +2226,7 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) PDBG("%s ep %p atid %u status %u errno %d\n", __func__, ep, atid, status, status2errno(status)); - if (is_neg_adv(status)) { + if (cxgb_is_neg_adv(status)) { PDBG("%s Connection problems for atid %u status %u (%s)\n", __func__, atid, status, neg_adv_str(status)); ep->stats.connect_neg_adv++; @@ -2751,7 +2742,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) if (!ep) return 0; - if (is_neg_adv(req->status)) { + if (cxgb_is_neg_adv(req->status)) { PDBG("%s Negative advice on abort- tid %u status %d (%s)\n", __func__, ep->hwtid, req->status, neg_adv_str(req->status)); @@ -4227,7 +4218,7 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb) kfree_skb(skb); return 0; } - if (is_neg_adv(req->status)) { + if (cxgb_is_neg_adv(req->status)) { PDBG("%s Negative advice on abort- tid %u status %d (%s)\n", __func__, ep->hwtid, req->status, neg_adv_str(req->status)); diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h index c4df04a4a323..57fcc9899234 100644 --- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h +++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h @@ -47,4 +47,13 @@ struct dst_entry * cxgb_find_route6(struct cxgb4_lld_info *, struct net_device *(*)(struct net_device *), __u8 *, __u8 *, __be16, __be16, u8, __u32); + +/* Returns whether a CPL status conveys negative advice. + */ +static inline bool cxgb_is_neg_adv(unsigned int status) +{ + return status == CPL_ERR_RTX_NEG_ADVICE || + status == CPL_ERR_PERSIST_NEG_ADVICE || + status == CPL_ERR_KEEPALV_NEG_ADVICE; +} #endif diff --git a/drivers/target/iscsi/cxgbit/cxgbit_cm.c b/drivers/target/iscsi/cxgbit/cxgbit_cm.c index e961ac4bf5db..c46bdd5d58ca 100644 --- a/drivers/target/iscsi/cxgbit/cxgbit_cm.c +++ b/drivers/target/iscsi/cxgbit/cxgbit_cm.c @@ -73,15 +73,6 @@ out: return wr_waitp->ret; } -/* Returns whether a CPL status conveys negative advice. - */ -static int cxgbit_is_neg_adv(unsigned int status) -{ - return status == CPL_ERR_RTX_NEG_ADVICE || - status == CPL_ERR_PERSIST_NEG_ADVICE || - status == CPL_ERR_KEEPALV_NEG_ADVICE; -} - static int cxgbit_np_hashfn(const struct cxgbit_np *cnp) { return ((unsigned long)cnp >> 10) & (NP_INFO_HASH_SIZE - 1); @@ -1704,7 +1695,7 @@ static void cxgbit_abort_req_rss(struct cxgbit_sock *csk, struct sk_buff *skb) pr_debug("%s: csk %p; tid %u; state %d\n", __func__, csk, tid, csk->com.state); - if (cxgbit_is_neg_adv(hdr->status)) { + if (cxgb_is_neg_adv(hdr->status)) { pr_err("%s: got neg advise %d on tid %u\n", __func__, hdr->status, tid); goto rel_skb; From 44c6d06992ac663e5163bdbe00844cb845ed5703 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Tue, 13 Sep 2016 21:24:00 +0530 Subject: [PATCH 1046/1715] libcxgb,iw_cxgb4,cxgbit: add cxgb_best_mtu() Add cxgb_best_mtu() in libcxgb_cm.h to remove it's duplicate definitions from cxgb4/cm.c and cxgbit/cxgbit_cm.c Signed-off-by: Varun Prakash Signed-off-by: David S. Miller --- drivers/infiniband/hw/cxgb4/cm.c | 32 ++++++------------- .../net/ethernet/chelsio/libcxgb/libcxgb_cm.h | 18 +++++++++++ drivers/target/iscsi/cxgbit/cxgbit_cm.c | 20 ++---------- 3 files changed, 30 insertions(+), 40 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index b35fdc0e0004..c3c678ff6ebb 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -677,20 +677,6 @@ static int send_abort(struct c4iw_ep *ep) return c4iw_l2t_send(&ep->com.dev->rdev, req_skb, ep->l2t); } -static void best_mtu(const unsigned short *mtus, unsigned short mtu, - unsigned int *idx, int use_ts, int ipv6) -{ - unsigned short hdr_size = (ipv6 ? - sizeof(struct ipv6hdr) : - sizeof(struct iphdr)) + - sizeof(struct tcphdr) + - (use_ts ? - round_up(TCPOLEN_TIMESTAMP, 4) : 0); - unsigned short data_size = mtu - hdr_size; - - cxgb4_best_aligned_mtu(mtus, hdr_size, data_size, 8, idx); -} - static int send_connect(struct c4iw_ep *ep) { struct cpl_act_open_req *req = NULL; @@ -750,9 +736,9 @@ static int send_connect(struct c4iw_ep *ep) } set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx); - best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, - enable_tcp_timestamps, - (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1); + cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, + enable_tcp_timestamps, + (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1); wscale = compute_wscale(rcv_win); /* @@ -1930,9 +1916,9 @@ static int send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) htons(FW_OFLD_CONNECTION_WR_CPLRXDATAACK_F); req->tcb.tx_max = (__force __be32) jiffies; req->tcb.rcv_adv = htons(1); - best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, - enable_tcp_timestamps, - (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1); + cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, + enable_tcp_timestamps, + (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1); wscale = compute_wscale(rcv_win); /* @@ -2374,9 +2360,9 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, ep->hwtid)); - best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, - enable_tcp_timestamps && req->tcpopt.tstamp, - (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1); + cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, + enable_tcp_timestamps && req->tcpopt.tstamp, + (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1); wscale = compute_wscale(rcv_win); /* diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h index 57fcc9899234..7fb4feb3c221 100644 --- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h +++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h @@ -33,6 +33,9 @@ #ifndef __LIBCXGB_CM_H__ #define __LIBCXGB_CM_H__ + +#include + #include #include @@ -56,4 +59,19 @@ static inline bool cxgb_is_neg_adv(unsigned int status) status == CPL_ERR_PERSIST_NEG_ADVICE || status == CPL_ERR_KEEPALV_NEG_ADVICE; } + +static inline void +cxgb_best_mtu(const unsigned short *mtus, unsigned short mtu, + unsigned int *idx, int use_ts, int ipv6) +{ + unsigned short hdr_size = (ipv6 ? + sizeof(struct ipv6hdr) : + sizeof(struct iphdr)) + + sizeof(struct tcphdr) + + (use_ts ? + round_up(TCPOLEN_TIMESTAMP, 4) : 0); + unsigned short data_size = mtu - hdr_size; + + cxgb4_best_aligned_mtu(mtus, hdr_size, data_size, 8, idx); +} #endif diff --git a/drivers/target/iscsi/cxgbit/cxgbit_cm.c b/drivers/target/iscsi/cxgbit/cxgbit_cm.c index c46bdd5d58ca..b09c09ba9659 100644 --- a/drivers/target/iscsi/cxgbit/cxgbit_cm.c +++ b/drivers/target/iscsi/cxgbit/cxgbit_cm.c @@ -997,20 +997,6 @@ cxgbit_l2t_send(struct cxgbit_device *cdev, struct sk_buff *skb, return ret < 0 ? ret : 0; } -static void -cxgbit_best_mtu(const unsigned short *mtus, unsigned short mtu, - unsigned int *idx, int use_ts, int ipv6) -{ - unsigned short hdr_size = (ipv6 ? sizeof(struct ipv6hdr) : - sizeof(struct iphdr)) + - sizeof(struct tcphdr) + - (use_ts ? round_up(TCPOLEN_TIMESTAMP, - 4) : 0); - unsigned short data_size = mtu - hdr_size; - - cxgb4_best_aligned_mtu(mtus, hdr_size, data_size, 8, idx); -} - static void cxgbit_send_rx_credits(struct cxgbit_sock *csk, struct sk_buff *skb) { if (csk->com.state != CSK_STATE_ESTABLISHED) { @@ -1135,9 +1121,9 @@ cxgbit_pass_accept_rpl(struct cxgbit_sock *csk, struct cpl_pass_accept_req *req) INIT_TP_WR(rpl5, csk->tid); OPCODE_TID(rpl5) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, csk->tid)); - cxgbit_best_mtu(csk->com.cdev->lldi.mtus, csk->mtu, &mtu_idx, - req->tcpopt.tstamp, - (csk->com.remote_addr.ss_family == AF_INET) ? 0 : 1); + cxgb_best_mtu(csk->com.cdev->lldi.mtus, csk->mtu, &mtu_idx, + req->tcpopt.tstamp, + (csk->com.remote_addr.ss_family == AF_INET) ? 0 : 1); wscale = cxgbit_compute_wscale(csk->rcv_win); /* * Specify the largest window that will fit in opt0. The From cc516700c7edab4197d08998ac023c3043369391 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Tue, 13 Sep 2016 21:24:01 +0530 Subject: [PATCH 1047/1715] libcxgb,iw_cxgb4,cxgbit: add cxgb_compute_wscale() Add cxgb_compute_wscale() in libcxgb_cm.h to remove it's duplicate definitions from cxgb4/cm.c and cxgbit/cxgbit_cm.c. Signed-off-by: Varun Prakash Signed-off-by: David S. Miller --- drivers/infiniband/hw/cxgb4/cm.c | 12 ++++++------ drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 9 --------- drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h | 9 +++++++++ drivers/target/iscsi/cxgbit/cxgbit_cm.c | 11 +---------- 4 files changed, 16 insertions(+), 25 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index c3c678ff6ebb..b9d77df0a2f6 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -689,7 +689,7 @@ static int send_connect(struct c4iw_ep *ep) u64 opt0; u32 opt2; unsigned int mtu_idx; - int wscale; + u32 wscale; int win, sizev4, sizev6, wrlen; struct sockaddr_in *la = (struct sockaddr_in *) &ep->com.local_addr; @@ -739,7 +739,7 @@ static int send_connect(struct c4iw_ep *ep) cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, enable_tcp_timestamps, (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1); - wscale = compute_wscale(rcv_win); + wscale = cxgb_compute_wscale(rcv_win); /* * Specify the largest window that will fit in opt0. The @@ -1891,7 +1891,7 @@ static int send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) struct sk_buff *skb; struct fw_ofld_connection_wr *req; unsigned int mtu_idx; - int wscale; + u32 wscale; struct sockaddr_in *sin; int win; @@ -1919,7 +1919,7 @@ static int send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, enable_tcp_timestamps, (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1); - wscale = compute_wscale(rcv_win); + wscale = cxgb_compute_wscale(rcv_win); /* * Specify the largest window that will fit in opt0. The @@ -2339,7 +2339,7 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, unsigned int mtu_idx; u64 opt0; u32 opt2; - int wscale; + u32 wscale; struct cpl_t5_pass_accept_rpl *rpl5 = NULL; int win; enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type; @@ -2363,7 +2363,7 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, enable_tcp_timestamps && req->tcpopt.tstamp, (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1); - wscale = compute_wscale(rcv_win); + wscale = cxgb_compute_wscale(rcv_win); /* * Specify the largest window that will fit in opt0. The diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index aa47e0ae80bc..6a9bef1f09a8 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -881,15 +881,6 @@ static inline struct c4iw_listen_ep *to_listen_ep(struct iw_cm_id *cm_id) return cm_id->provider_data; } -static inline int compute_wscale(int win) -{ - int wscale = 0; - - while (wscale < 14 && (65535<com.cdev->lldi.mtus, csk->mtu, &mtu_idx, req->tcpopt.tstamp, (csk->com.remote_addr.ss_family == AF_INET) ? 0 : 1); - wscale = cxgbit_compute_wscale(csk->rcv_win); + wscale = cxgb_compute_wscale(csk->rcv_win); /* * Specify the largest window that will fit in opt0. The * remainder will be specified in the rx_data_ack. From a1a234542b7817c28770ad4e80be1bf69e6a4f86 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Tue, 13 Sep 2016 21:24:02 +0530 Subject: [PATCH 1048/1715] libcxgb,iw_cxgb4,cxgbit: add cxgb_mk_tid_release() Add cxgb_mk_tid_release() to remove duplicate code to form CPL_TID_RELEASE hardware command. Signed-off-by: Varun Prakash Signed-off-by: David S. Miller --- drivers/infiniband/hw/cxgb4/cm.c | 10 ++++------ drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h | 13 +++++++++++++ drivers/target/iscsi/cxgbit/cxgbit_cm.c | 11 ++--------- 3 files changed, 19 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index b9d77df0a2f6..b818bd6d1fb5 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -240,15 +240,13 @@ int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb) static void release_tid(struct c4iw_rdev *rdev, u32 hwtid, struct sk_buff *skb) { - struct cpl_tid_release *req; + u32 len = roundup(sizeof(struct cpl_tid_release), 16); - skb = get_skb(skb, sizeof *req, GFP_KERNEL); + skb = get_skb(skb, len, GFP_KERNEL); if (!skb) return; - req = (struct cpl_tid_release *) skb_put(skb, sizeof(*req)); - INIT_TP_WR(req, hwtid); - OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_TID_RELEASE, hwtid)); - set_wr_txq(skb, CPL_PRIORITY_SETUP, 0); + + cxgb_mk_tid_release(skb, len, hwtid, 0); c4iw_ofld_send(rdev, skb); return; } diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h index ecf3baa940b9..fbb973e9ec29 100644 --- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h +++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h @@ -83,4 +83,17 @@ static inline u32 cxgb_compute_wscale(u32 win) wscale++; return wscale; } + +static inline void +cxgb_mk_tid_release(struct sk_buff *skb, u32 len, u32 tid, u16 chan) +{ + struct cpl_tid_release *req; + + req = (struct cpl_tid_release *)__skb_put(skb, len); + memset(req, 0, len); + + INIT_TP_WR(req, tid); + OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_TID_RELEASE, tid)); + set_wr_txq(skb, CPL_PRIORITY_SETUP, chan); +} #endif diff --git a/drivers/target/iscsi/cxgbit/cxgbit_cm.c b/drivers/target/iscsi/cxgbit/cxgbit_cm.c index cd29c91d01fc..994058f0c4e0 100644 --- a/drivers/target/iscsi/cxgbit/cxgbit_cm.c +++ b/drivers/target/iscsi/cxgbit/cxgbit_cm.c @@ -961,21 +961,14 @@ int cxgbit_ofld_send(struct cxgbit_device *cdev, struct sk_buff *skb) static void cxgbit_release_tid(struct cxgbit_device *cdev, u32 tid) { - struct cpl_tid_release *req; - unsigned int len = roundup(sizeof(*req), 16); + u32 len = roundup(sizeof(struct cpl_tid_release), 16); struct sk_buff *skb; skb = alloc_skb(len, GFP_ATOMIC); if (!skb) return; - req = (struct cpl_tid_release *)__skb_put(skb, len); - memset(req, 0, len); - - INIT_TP_WR(req, tid); - OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID( - CPL_TID_RELEASE, tid)); - set_wr_txq(skb, CPL_PRIORITY_SETUP, 0); + cxgb_mk_tid_release(skb, len, tid, 0); cxgbit_ofld_send(cdev, skb); } From 29fb6f42e7282322672eff8b4ad85918b9dcbae3 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Tue, 13 Sep 2016 21:24:03 +0530 Subject: [PATCH 1049/1715] libcxgb, iw_cxgb4, cxgbit: add cxgb_mk_close_con_req() Add cxgb_mk_close_con_req() to remove duplicate code to form CPL_CLOSE_CON_REQ hardware command. Signed-off-by: Varun Prakash Signed-off-by: David S. Miller --- drivers/infiniband/hw/cxgb4/cm.c | 13 ++++--------- .../net/ethernet/chelsio/libcxgb/libcxgb_cm.h | 16 ++++++++++++++++ drivers/target/iscsi/cxgbit/cxgbit_cm.c | 13 +++---------- 3 files changed, 23 insertions(+), 19 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index b818bd6d1fb5..22bccd87c5d2 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -637,21 +637,16 @@ static int send_flowc(struct c4iw_ep *ep) static int send_halfclose(struct c4iw_ep *ep) { - struct cpl_close_con_req *req; struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list); - int wrlen = roundup(sizeof *req, 16); + u32 wrlen = roundup(sizeof(struct cpl_close_con_req), 16); PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); if (WARN_ON(!skb)) return -ENOMEM; - set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); - t4_set_arp_err_handler(skb, NULL, arp_failure_discard); - req = (struct cpl_close_con_req *) skb_put(skb, wrlen); - memset(req, 0, wrlen); - INIT_TP_WR(req, ep->hwtid); - OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, - ep->hwtid)); + cxgb_mk_close_con_req(skb, wrlen, ep->hwtid, ep->txq_idx, + NULL, arp_failure_discard); + return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); } diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h index fbb973e9ec29..e77661d98738 100644 --- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h +++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h @@ -38,6 +38,7 @@ #include #include +#include void cxgb_get_4tuple(struct cpl_pass_accept_req *, enum chip_type, @@ -96,4 +97,19 @@ cxgb_mk_tid_release(struct sk_buff *skb, u32 len, u32 tid, u16 chan) OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_TID_RELEASE, tid)); set_wr_txq(skb, CPL_PRIORITY_SETUP, chan); } + +static inline void +cxgb_mk_close_con_req(struct sk_buff *skb, u32 len, u32 tid, u16 chan, + void *handle, arp_err_handler_t handler) +{ + struct cpl_close_con_req *req; + + req = (struct cpl_close_con_req *)__skb_put(skb, len); + memset(req, 0, len); + + INIT_TP_WR(req, tid); + OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid)); + set_wr_txq(skb, CPL_PRIORITY_DATA, chan); + t4_set_arp_err_handler(skb, handle, handler); +} #endif diff --git a/drivers/target/iscsi/cxgbit/cxgbit_cm.c b/drivers/target/iscsi/cxgbit/cxgbit_cm.c index 994058f0c4e0..a8f5f360414f 100644 --- a/drivers/target/iscsi/cxgbit/cxgbit_cm.c +++ b/drivers/target/iscsi/cxgbit/cxgbit_cm.c @@ -615,21 +615,14 @@ void cxgbit_free_np(struct iscsi_np *np) static void cxgbit_send_halfclose(struct cxgbit_sock *csk) { struct sk_buff *skb; - struct cpl_close_con_req *req; - unsigned int len = roundup(sizeof(struct cpl_close_con_req), 16); + u32 len = roundup(sizeof(struct cpl_close_con_req), 16); skb = alloc_skb(len, GFP_ATOMIC); if (!skb) return; - req = (struct cpl_close_con_req *)__skb_put(skb, len); - memset(req, 0, len); - - set_wr_txq(skb, CPL_PRIORITY_DATA, csk->txq_idx); - INIT_TP_WR(req, csk->tid); - OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, - csk->tid)); - req->rsvd = 0; + cxgb_mk_close_con_req(skb, len, csk->tid, csk->txq_idx, + NULL, NULL); cxgbit_skcb_flags(skb) |= SKCBF_TX_FLAG_COMPL; __skb_queue_tail(&csk->txq, skb); From a7e1a97f88058ed9b6aa054b38167fbe62f59f50 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Tue, 13 Sep 2016 21:24:04 +0530 Subject: [PATCH 1050/1715] libcxgb,iw_cxgb4,cxgbit: add cxgb_mk_abort_req() Add cxgb_mk_abort_req() to remove duplicate code to form CPL_ABORT_REQ hardware command. Signed-off-by: Varun Prakash Signed-off-by: David S. Miller --- drivers/infiniband/hw/cxgb4/cm.c | 13 ++++--------- .../net/ethernet/chelsio/libcxgb/libcxgb_cm.h | 16 ++++++++++++++++ drivers/target/iscsi/cxgbit/cxgbit_cm.c | 13 +++---------- 3 files changed, 23 insertions(+), 19 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 22bccd87c5d2..484196e8a49c 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -652,21 +652,16 @@ static int send_halfclose(struct c4iw_ep *ep) static int send_abort(struct c4iw_ep *ep) { - struct cpl_abort_req *req; - int wrlen = roundup(sizeof *req, 16); + u32 wrlen = roundup(sizeof(struct cpl_abort_req), 16); struct sk_buff *req_skb = skb_dequeue(&ep->com.ep_skb_list); PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); if (WARN_ON(!req_skb)) return -ENOMEM; - set_wr_txq(req_skb, CPL_PRIORITY_DATA, ep->txq_idx); - t4_set_arp_err_handler(req_skb, ep, abort_arp_failure); - req = (struct cpl_abort_req *)skb_put(req_skb, wrlen); - memset(req, 0, wrlen); - INIT_TP_WR(req, ep->hwtid); - OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid)); - req->cmd = CPL_ABORT_SEND_RST; + cxgb_mk_abort_req(req_skb, wrlen, ep->hwtid, ep->txq_idx, + ep, abort_arp_failure); + return c4iw_l2t_send(&ep->com.dev->rdev, req_skb, ep->l2t); } diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h index e77661d98738..2d3a3bfb1919 100644 --- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h +++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h @@ -112,4 +112,20 @@ cxgb_mk_close_con_req(struct sk_buff *skb, u32 len, u32 tid, u16 chan, set_wr_txq(skb, CPL_PRIORITY_DATA, chan); t4_set_arp_err_handler(skb, handle, handler); } + +static inline void +cxgb_mk_abort_req(struct sk_buff *skb, u32 len, u32 tid, u16 chan, + void *handle, arp_err_handler_t handler) +{ + struct cpl_abort_req *req; + + req = (struct cpl_abort_req *)__skb_put(skb, len); + memset(req, 0, len); + + INIT_TP_WR(req, tid); + OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_REQ, tid)); + req->cmd = CPL_ABORT_SEND_RST; + set_wr_txq(skb, CPL_PRIORITY_DATA, chan); + t4_set_arp_err_handler(skb, handle, handler); +} #endif diff --git a/drivers/target/iscsi/cxgbit/cxgbit_cm.c b/drivers/target/iscsi/cxgbit/cxgbit_cm.c index a8f5f360414f..f2b737e6fa6a 100644 --- a/drivers/target/iscsi/cxgbit/cxgbit_cm.c +++ b/drivers/target/iscsi/cxgbit/cxgbit_cm.c @@ -647,9 +647,8 @@ static void cxgbit_abort_arp_failure(void *handle, struct sk_buff *skb) static int cxgbit_send_abort_req(struct cxgbit_sock *csk) { - struct cpl_abort_req *req; - unsigned int len = roundup(sizeof(*req), 16); struct sk_buff *skb; + u32 len = roundup(sizeof(struct cpl_abort_req), 16); pr_debug("%s: csk %p tid %u; state %d\n", __func__, csk, csk->tid, csk->com.state); @@ -660,15 +659,9 @@ static int cxgbit_send_abort_req(struct cxgbit_sock *csk) cxgbit_send_tx_flowc_wr(csk); skb = __skb_dequeue(&csk->skbq); - req = (struct cpl_abort_req *)__skb_put(skb, len); - memset(req, 0, len); + cxgb_mk_abort_req(skb, len, csk->tid, csk->txq_idx, + csk->com.cdev, cxgbit_abort_arp_failure); - set_wr_txq(skb, CPL_PRIORITY_DATA, csk->txq_idx); - t4_set_arp_err_handler(skb, csk->com.cdev, cxgbit_abort_arp_failure); - INIT_TP_WR(req, csk->tid); - OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_REQ, - csk->tid)); - req->cmd = CPL_ABORT_SEND_RST; return cxgbit_l2t_send(csk->com.cdev, skb, csk->l2t); } From 052f4731ed1fd6b132a14c56f49435377a246834 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Tue, 13 Sep 2016 21:24:05 +0530 Subject: [PATCH 1051/1715] libcxgb,iw_cxgb4,cxgbit: add cxgb_mk_abort_rpl() Add cxgb_mk_abort_rpl() to remove duplicate code to form CPL_ABORT_RPL hardware command. Signed-off-by: Varun Prakash Signed-off-by: David S. Miller --- drivers/infiniband/hw/cxgb4/cm.c | 10 ++++------ drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h | 14 ++++++++++++++ drivers/target/iscsi/cxgbit/cxgbit_cm.c | 11 ++--------- 3 files changed, 20 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 484196e8a49c..a6d5fcb9d8d7 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -2705,12 +2705,12 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) { struct cpl_abort_req_rss *req = cplhdr(skb); struct c4iw_ep *ep; - struct cpl_abort_rpl *rpl; struct sk_buff *rpl_skb; struct c4iw_qp_attributes attrs; int ret; int release = 0; unsigned int tid = GET_TID(req); + u32 len = roundup(sizeof(struct cpl_abort_rpl), 16); ep = get_ep_from_tid(dev, tid); if (!ep) @@ -2809,11 +2809,9 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) release = 1; goto out; } - set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); - rpl = (struct cpl_abort_rpl *) skb_put(rpl_skb, sizeof(*rpl)); - INIT_TP_WR(rpl, ep->hwtid); - OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid)); - rpl->cmd = CPL_ABORT_NO_RST; + + cxgb_mk_abort_rpl(rpl_skb, len, ep->hwtid, ep->txq_idx); + c4iw_ofld_send(&ep->com.dev->rdev, rpl_skb); out: if (release) diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h index 2d3a3bfb1919..70999e8cf4b7 100644 --- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h +++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h @@ -128,4 +128,18 @@ cxgb_mk_abort_req(struct sk_buff *skb, u32 len, u32 tid, u16 chan, set_wr_txq(skb, CPL_PRIORITY_DATA, chan); t4_set_arp_err_handler(skb, handle, handler); } + +static inline void +cxgb_mk_abort_rpl(struct sk_buff *skb, u32 len, u32 tid, u16 chan) +{ + struct cpl_abort_rpl *rpl; + + rpl = (struct cpl_abort_rpl *)__skb_put(skb, len); + memset(rpl, 0, len); + + INIT_TP_WR(rpl, tid); + OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_RPL, tid)); + rpl->cmd = CPL_ABORT_NO_RST; + set_wr_txq(skb, CPL_PRIORITY_DATA, chan); +} #endif diff --git a/drivers/target/iscsi/cxgbit/cxgbit_cm.c b/drivers/target/iscsi/cxgbit/cxgbit_cm.c index f2b737e6fa6a..9bdbe3b84e19 100644 --- a/drivers/target/iscsi/cxgbit/cxgbit_cm.c +++ b/drivers/target/iscsi/cxgbit/cxgbit_cm.c @@ -1642,11 +1642,10 @@ static void cxgbit_abort_req_rss(struct cxgbit_sock *csk, struct sk_buff *skb) { struct cpl_abort_req_rss *hdr = cplhdr(skb); unsigned int tid = GET_TID(hdr); - struct cpl_abort_rpl *rpl; struct sk_buff *rpl_skb; bool release = false; bool wakeup_thread = false; - unsigned int len = roundup(sizeof(*rpl), 16); + u32 len = roundup(sizeof(struct cpl_abort_rpl), 16); pr_debug("%s: csk %p; tid %u; state %d\n", __func__, csk, tid, csk->com.state); @@ -1686,14 +1685,8 @@ static void cxgbit_abort_req_rss(struct cxgbit_sock *csk, struct sk_buff *skb) cxgbit_send_tx_flowc_wr(csk); rpl_skb = __skb_dequeue(&csk->skbq); - set_wr_txq(skb, CPL_PRIORITY_DATA, csk->txq_idx); - rpl = (struct cpl_abort_rpl *)__skb_put(rpl_skb, len); - memset(rpl, 0, len); - - INIT_TP_WR(rpl, csk->tid); - OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_RPL, tid)); - rpl->cmd = CPL_ABORT_NO_RST; + cxgb_mk_abort_rpl(rpl_skb, len, csk->tid, csk->txq_idx); cxgbit_ofld_send(csk->com.cdev, rpl_skb); if (wakeup_thread) { From 6e3b6fc201fe16d3944e2b293e7f47a72f4a56c1 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Tue, 13 Sep 2016 21:24:06 +0530 Subject: [PATCH 1052/1715] libcxgb,iw_cxgb4,cxgbit: add cxgb_mk_rx_data_ack() Add cxgb_mk_rx_data_ack() to remove duplicate code to form CPL_RX_DATA_ACK hardware command. Signed-off-by: Varun Prakash Signed-off-by: David S. Miller --- drivers/infiniband/hw/cxgb4/cm.c | 19 ++++++++----------- .../net/ethernet/chelsio/libcxgb/libcxgb_cm.h | 15 +++++++++++++++ drivers/target/iscsi/cxgbit/cxgbit_cm.c | 16 ++++++---------- 3 files changed, 29 insertions(+), 21 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index a6d5fcb9d8d7..3cbbfbe7da7c 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1354,9 +1354,9 @@ static void established_upcall(struct c4iw_ep *ep) static int update_rx_credits(struct c4iw_ep *ep, u32 credits) { - struct cpl_rx_data_ack *req; struct sk_buff *skb; - int wrlen = roundup(sizeof *req, 16); + u32 wrlen = roundup(sizeof(struct cpl_rx_data_ack), 16); + u32 credit_dack; PDBG("%s ep %p tid %u credits %u\n", __func__, ep, ep->hwtid, credits); skb = get_skb(NULL, wrlen, GFP_KERNEL); @@ -1373,15 +1373,12 @@ static int update_rx_credits(struct c4iw_ep *ep, u32 credits) if (ep->rcv_win > RCV_BUFSIZ_M * 1024) credits += ep->rcv_win - RCV_BUFSIZ_M * 1024; - req = (struct cpl_rx_data_ack *) skb_put(skb, wrlen); - memset(req, 0, wrlen); - INIT_TP_WR(req, ep->hwtid); - OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK, - ep->hwtid)); - req->credit_dack = cpu_to_be32(credits | RX_FORCE_ACK_F | - RX_DACK_CHANGE_F | - RX_DACK_MODE_V(dack_mode)); - set_wr_txq(skb, CPL_PRIORITY_ACK, ep->ctrlq_idx); + credit_dack = credits | RX_FORCE_ACK_F | RX_DACK_CHANGE_F | + RX_DACK_MODE_V(dack_mode); + + cxgb_mk_rx_data_ack(skb, wrlen, ep->hwtid, ep->ctrlq_idx, + credit_dack); + c4iw_ofld_send(&ep->com.dev->rdev, skb); return credits; } diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h index 70999e8cf4b7..515b94ff9080 100644 --- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h +++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h @@ -142,4 +142,19 @@ cxgb_mk_abort_rpl(struct sk_buff *skb, u32 len, u32 tid, u16 chan) rpl->cmd = CPL_ABORT_NO_RST; set_wr_txq(skb, CPL_PRIORITY_DATA, chan); } + +static inline void +cxgb_mk_rx_data_ack(struct sk_buff *skb, u32 len, u32 tid, u16 chan, + u32 credit_dack) +{ + struct cpl_rx_data_ack *req; + + req = (struct cpl_rx_data_ack *)__skb_put(skb, len); + memset(req, 0, len); + + INIT_TP_WR(req, tid); + OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK, tid)); + req->credit_dack = cpu_to_be32(credit_dack); + set_wr_txq(skb, CPL_PRIORITY_ACK, chan); +} #endif diff --git a/drivers/target/iscsi/cxgbit/cxgbit_cm.c b/drivers/target/iscsi/cxgbit/cxgbit_cm.c index 9bdbe3b84e19..2fb1bf1a26c5 100644 --- a/drivers/target/iscsi/cxgbit/cxgbit_cm.c +++ b/drivers/target/iscsi/cxgbit/cxgbit_cm.c @@ -994,22 +994,18 @@ static void cxgbit_send_rx_credits(struct cxgbit_sock *csk, struct sk_buff *skb) int cxgbit_rx_data_ack(struct cxgbit_sock *csk) { struct sk_buff *skb; - struct cpl_rx_data_ack *req; - unsigned int len = roundup(sizeof(*req), 16); + u32 len = roundup(sizeof(struct cpl_rx_data_ack), 16); + u32 credit_dack; skb = alloc_skb(len, GFP_KERNEL); if (!skb) return -1; - req = (struct cpl_rx_data_ack *)__skb_put(skb, len); - memset(req, 0, len); + credit_dack = RX_DACK_CHANGE_F | RX_DACK_MODE_V(1) | + RX_CREDITS_V(csk->rx_credits); - set_wr_txq(skb, CPL_PRIORITY_ACK, csk->ctrlq_idx); - INIT_TP_WR(req, csk->tid); - OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK, - csk->tid)); - req->credit_dack = cpu_to_be32(RX_DACK_CHANGE_F | RX_DACK_MODE_V(1) | - RX_CREDITS_V(csk->rx_credits)); + cxgb_mk_rx_data_ack(skb, len, csk->tid, csk->ctrlq_idx, + credit_dack); csk->rx_credits = 0; From 8aacf4b73fe87bc8fbe75a83862f411b52b7f272 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Mon, 4 Jul 2016 15:40:11 +0300 Subject: [PATCH 1053/1715] iwlwifi: introduce trans API to get byte count table In future HW the byte count table address will be configured by ucode per queue. Add API to expose the byte count table to the opmode Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-trans.h | 11 +++++++++++ drivers/net/wireless/intel/iwlwifi/pcie/internal.h | 1 + drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 2 ++ drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 8 ++++++++ 4 files changed, 22 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h index 883cb487b652..04e998d9d5fe 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h @@ -645,6 +645,8 @@ struct iwl_trans_ops { void (*txq_set_shared_mode)(struct iwl_trans *trans, u32 txq_id, bool shared); + dma_addr_t (*get_txq_byte_table)(struct iwl_trans *trans, int txq_id); + int (*wait_tx_queue_empty)(struct iwl_trans *trans, u32 txq_bm); void (*freeze_txq_timer)(struct iwl_trans *trans, unsigned long txqs, bool freeze); @@ -1069,6 +1071,15 @@ static inline void iwl_trans_txq_set_shared_mode(struct iwl_trans *trans, trans->ops->txq_set_shared_mode(trans, queue, shared_mode); } +static inline dma_addr_t iwl_trans_get_txq_byte_table(struct iwl_trans *trans, + int queue) +{ + /* we should never be called if the trans doesn't support it */ + BUG_ON(!trans->ops->get_txq_byte_table); + + return trans->ops->get_txq_byte_table(trans, queue); +} + static inline void iwl_trans_txq_enable(struct iwl_trans *trans, int queue, int fifo, int sta_id, int tid, int frame_limit, u16 ssn, diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h index 975900a1efa1..b9dc82b981e1 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h +++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h @@ -476,6 +476,7 @@ void iwl_trans_pcie_txq_disable(struct iwl_trans *trans, int queue, bool configure_scd); void iwl_trans_pcie_txq_set_shared_mode(struct iwl_trans *trans, u32 txq_id, bool shared_mode); +dma_addr_t iwl_trans_pcie_get_txq_byte_table(struct iwl_trans *trans, int txq); void iwl_trans_pcie_log_scd_error(struct iwl_trans *trans, struct iwl_txq *txq); int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb, diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 0c2ccbeab167..21b1be11100a 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -2803,6 +2803,8 @@ static const struct iwl_trans_ops trans_ops_pcie = { .txq_disable = iwl_trans_pcie_txq_disable, .txq_enable = iwl_trans_pcie_txq_enable, + .get_txq_byte_table = iwl_trans_pcie_get_txq_byte_table, + .txq_set_shared_mode = iwl_trans_pcie_txq_set_shared_mode, .wait_tx_queue_empty = iwl_trans_pcie_wait_txq_empty, diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index 1c46f146e168..57a657a17e1e 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -1352,6 +1352,14 @@ void iwl_trans_pcie_txq_set_shared_mode(struct iwl_trans *trans, u32 txq_id, txq->ampdu = !shared_mode; } +dma_addr_t iwl_trans_pcie_get_txq_byte_table(struct iwl_trans *trans, int txq) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + + return trans_pcie->scd_bc_tbls.dma + + txq * sizeof(struct iwlagn_scd_bc_tbl); +} + void iwl_trans_pcie_txq_disable(struct iwl_trans *trans, int txq_id, bool configure_scd) { From 6983ba6951139c99f0692c94f83d8d75ea559bcc Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Sun, 26 Jun 2016 13:17:56 +0300 Subject: [PATCH 1054/1715] iwlwifi: pcie: assign and access a000 TFD & TBs Previous patch introduced the new formats. This patch allocates the new structures and adjusts code accordingly. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- .../wireless/intel/iwlwifi/pcie/internal.h | 19 ++- .../net/wireless/intel/iwlwifi/pcie/trans.c | 16 +- drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 141 ++++++++++++------ 3 files changed, 120 insertions(+), 56 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h index b9dc82b981e1..d185692676fd 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h +++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h @@ -280,7 +280,7 @@ struct iwl_pcie_first_tb_buf { */ struct iwl_txq { struct iwl_queue q; - struct iwl_tfd *tfds; + void *tfds; struct iwl_pcie_first_tb_buf *first_tb_bufs; dma_addr_t first_tb_dma; struct iwl_pcie_txq_entry *entries; @@ -393,6 +393,7 @@ struct iwl_trans_pcie { u8 n_no_reclaim_cmds; u8 no_reclaim_cmds[MAX_NO_RECLAIM_CMDS]; u8 max_tbs; + u16 tfd_size; enum iwl_amsdu_size rx_buf_size; bool bc_table_dword; @@ -489,9 +490,21 @@ void iwl_trans_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn, struct sk_buff_head *skbs); void iwl_trans_pcie_tx_reset(struct iwl_trans *trans); -static inline u16 iwl_pcie_tfd_tb_get_len(struct iwl_tfd *tfd, u8 idx) +static inline u16 iwl_pcie_tfd_tb_get_len(struct iwl_trans *trans, void *tfd, + u8 idx) { - struct iwl_tfd_tb *tb = &tfd->tbs[idx]; + struct iwl_tfd *tfd_fh; + struct iwl_tfd_tb *tb; + + if (trans->cfg->use_tfh) { + struct iwl_tfh_tfd *tfd_fh = (void *)tfd; + struct iwl_tfh_tb *tb = &tfd_fh->tbs[idx]; + + return le16_to_cpu(tb->tb_len); + } + + tfd_fh = (void *)tfd; + tb = &tfd_fh->tbs[idx]; return le16_to_cpu(tb->hi_n_len) >> 4; } diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 21b1be11100a..e908bb8e10b6 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -2437,15 +2437,14 @@ err: } #endif /*CONFIG_IWLWIFI_DEBUGFS */ -static u32 iwl_trans_pcie_get_cmdlen(struct iwl_trans *trans, - struct iwl_tfd *tfd) +static u32 iwl_trans_pcie_get_cmdlen(struct iwl_trans *trans, void *tfd) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); u32 cmdlen = 0; int i; for (i = 0; i < trans_pcie->max_tbs; i++) - cmdlen += iwl_pcie_tfd_tb_get_len(tfd, i); + cmdlen += iwl_pcie_tfd_tb_get_len(trans, tfd, i); return cmdlen; } @@ -2733,7 +2732,8 @@ static struct iwl_trans_dump_data u8 idx = get_cmd_index(&cmdq->q, ptr); u32 caplen, cmdlen; - cmdlen = iwl_trans_pcie_get_cmdlen(trans, &cmdq->tfds[ptr]); + cmdlen = iwl_trans_pcie_get_cmdlen(trans, cmdq->tfds + + trans_pcie->tfd_size * ptr); caplen = min_t(u32, TFD_MAX_PAYLOAD_SIZE, cmdlen); if (cmdlen) { @@ -2876,10 +2876,14 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, else addr_size = 36; - if (cfg->use_tfh) + if (cfg->use_tfh) { trans_pcie->max_tbs = IWL_TFH_NUM_TBS; - else + trans_pcie->tfd_size = sizeof(struct iwl_tfh_tb); + + } else { trans_pcie->max_tbs = IWL_NUM_OF_TBS; + trans_pcie->tfd_size = sizeof(struct iwl_tfd); + } trans->max_skb_frags = IWL_PCIE_MAX_FRAGS(trans_pcie); pci_set_master(pdev); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index 57a657a17e1e..f893ee111b46 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -312,11 +312,30 @@ void iwl_pcie_txq_check_wrptrs(struct iwl_trans *trans) } } -static inline dma_addr_t iwl_pcie_tfd_tb_get_addr(struct iwl_tfd *tfd, u8 idx) +static inline void *iwl_pcie_get_tfd(struct iwl_trans_pcie *trans_pcie, + struct iwl_txq *txq, int idx) { - struct iwl_tfd_tb *tb = &tfd->tbs[idx]; + return txq->tfds + trans_pcie->tfd_size * idx; +} + +static inline dma_addr_t iwl_pcie_tfd_tb_get_addr(struct iwl_trans *trans, + void *tfd, u8 idx) +{ + struct iwl_tfd *tfd_fh; + struct iwl_tfd_tb *tb; + dma_addr_t addr; + + if (trans->cfg->use_tfh) { + struct iwl_tfh_tfd *tfd_fh = (void *)tfd; + struct iwl_tfh_tb *tb = &tfd_fh->tbs[idx]; + + return (dma_addr_t)(le64_to_cpu(tb->addr)); + } + + tfd_fh = (void *)tfd; + tb = &tfd_fh->tbs[idx]; + addr = get_unaligned_le32(&tb->lo); - dma_addr_t addr = get_unaligned_le32(&tb->lo); if (sizeof(dma_addr_t) > sizeof(u32)) addr |= ((dma_addr_t)(le16_to_cpu(tb->hi_n_len) & 0xF) << 16) << 16; @@ -324,35 +343,57 @@ static inline dma_addr_t iwl_pcie_tfd_tb_get_addr(struct iwl_tfd *tfd, u8 idx) return addr; } -static inline void iwl_pcie_tfd_set_tb(struct iwl_tfd *tfd, u8 idx, - dma_addr_t addr, u16 len) +static inline void iwl_pcie_tfd_set_tb(struct iwl_trans *trans, void *tfd, + u8 idx, dma_addr_t addr, u16 len) { - struct iwl_tfd_tb *tb = &tfd->tbs[idx]; - u16 hi_n_len = len << 4; + if (trans->cfg->use_tfh) { + struct iwl_tfh_tfd *tfd_fh = (void *)tfd; + struct iwl_tfh_tb *tb = &tfd_fh->tbs[idx]; - put_unaligned_le32(addr, &tb->lo); - if (sizeof(dma_addr_t) > sizeof(u32)) - hi_n_len |= ((addr >> 16) >> 16) & 0xF; + put_unaligned_le64(addr, &tb->addr); + tb->tb_len = cpu_to_le16(len); - tb->hi_n_len = cpu_to_le16(hi_n_len); + tfd_fh->num_tbs = cpu_to_le16(idx + 1); + } else { + struct iwl_tfd *tfd_fh = (void *)tfd; + struct iwl_tfd_tb *tb = &tfd_fh->tbs[idx]; - tfd->num_tbs = idx + 1; + u16 hi_n_len = len << 4; + + put_unaligned_le32(addr, &tb->lo); + if (sizeof(dma_addr_t) > sizeof(u32)) + hi_n_len |= ((addr >> 16) >> 16) & 0xF; + + tb->hi_n_len = cpu_to_le16(hi_n_len); + + tfd_fh->num_tbs = idx + 1; + } } -static inline u8 iwl_pcie_tfd_get_num_tbs(struct iwl_tfd *tfd) +static inline u8 iwl_pcie_tfd_get_num_tbs(struct iwl_trans *trans, void *tfd) { - return tfd->num_tbs & 0x1f; + struct iwl_tfd *tfd_fh; + + if (trans->cfg->use_tfh) { + struct iwl_tfh_tfd *tfd_fh = (void *)tfd; + + return le16_to_cpu(tfd_fh->num_tbs) & 0x1f; + } + + tfd_fh = (void *)tfd; + return tfd_fh->num_tbs & 0x1f; } static void iwl_pcie_tfd_unmap(struct iwl_trans *trans, struct iwl_cmd_meta *meta, - struct iwl_tfd *tfd) + struct iwl_txq *txq, int index) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); int i, num_tbs; + void *tfd = iwl_pcie_get_tfd(trans_pcie, txq, index); /* Sanity check on number of chunks */ - num_tbs = iwl_pcie_tfd_get_num_tbs(tfd); + num_tbs = iwl_pcie_tfd_get_num_tbs(trans, tfd); if (num_tbs >= trans_pcie->max_tbs) { IWL_ERR(trans, "Too many chunks: %i\n", num_tbs); @@ -365,16 +406,28 @@ static void iwl_pcie_tfd_unmap(struct iwl_trans *trans, for (i = 1; i < num_tbs; i++) { if (meta->tbs & BIT(i)) dma_unmap_page(trans->dev, - iwl_pcie_tfd_tb_get_addr(tfd, i), - iwl_pcie_tfd_tb_get_len(tfd, i), + iwl_pcie_tfd_tb_get_addr(trans, tfd, i), + iwl_pcie_tfd_tb_get_len(trans, tfd, i), DMA_TO_DEVICE); else dma_unmap_single(trans->dev, - iwl_pcie_tfd_tb_get_addr(tfd, i), - iwl_pcie_tfd_tb_get_len(tfd, i), + iwl_pcie_tfd_tb_get_addr(trans, tfd, + i), + iwl_pcie_tfd_tb_get_len(trans, tfd, + i), DMA_TO_DEVICE); } - tfd->num_tbs = 0; + + if (trans->cfg->use_tfh) { + struct iwl_tfh_tfd *tfd_fh = (void *)tfd; + + tfd_fh->num_tbs = 0; + } else { + struct iwl_tfd *tfd_fh = (void *)tfd; + + tfd_fh->num_tbs = 0; + } + } /* @@ -388,8 +441,6 @@ static void iwl_pcie_tfd_unmap(struct iwl_trans *trans, */ static void iwl_pcie_txq_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq) { - struct iwl_tfd *tfd_tmp = txq->tfds; - /* rd_ptr is bounded by TFD_QUEUE_SIZE_MAX and * idx is bounded by n_window */ @@ -401,7 +452,7 @@ static void iwl_pcie_txq_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq) /* We have only q->n_window txq->entries, but we use * TFD_QUEUE_SIZE_MAX tfds */ - iwl_pcie_tfd_unmap(trans, &txq->entries[idx].meta, &tfd_tmp[rd_ptr]); + iwl_pcie_tfd_unmap(trans, &txq->entries[idx].meta, txq, rd_ptr); /* free SKB */ if (txq->entries) { @@ -425,19 +476,18 @@ static int iwl_pcie_txq_build_tfd(struct iwl_trans *trans, struct iwl_txq *txq, { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct iwl_queue *q; - struct iwl_tfd *tfd, *tfd_tmp; + void *tfd; u32 num_tbs; q = &txq->q; - tfd_tmp = txq->tfds; - tfd = &tfd_tmp[q->write_ptr]; + tfd = txq->tfds + trans_pcie->tfd_size * q->write_ptr; if (reset) - memset(tfd, 0, sizeof(*tfd)); + memset(tfd, 0, trans_pcie->tfd_size); - num_tbs = iwl_pcie_tfd_get_num_tbs(tfd); + num_tbs = iwl_pcie_tfd_get_num_tbs(trans, tfd); - /* Each TFD can point to a maximum 20 Tx buffers */ + /* Each TFD can point to a maximum max_tbs Tx buffers */ if (num_tbs >= trans_pcie->max_tbs) { IWL_ERR(trans, "Error can not send more than %d chunks\n", trans_pcie->max_tbs); @@ -448,7 +498,7 @@ static int iwl_pcie_txq_build_tfd(struct iwl_trans *trans, struct iwl_txq *txq, "Unaligned address = %llx\n", (unsigned long long)addr)) return -EINVAL; - iwl_pcie_tfd_set_tb(tfd, num_tbs, addr, len); + iwl_pcie_tfd_set_tb(trans, tfd, num_tbs, addr, len); return num_tbs; } @@ -458,7 +508,7 @@ static int iwl_pcie_txq_alloc(struct iwl_trans *trans, u32 txq_id) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); - size_t tfd_sz = sizeof(struct iwl_tfd) * TFD_QUEUE_SIZE_MAX; + size_t tfd_sz = trans_pcie->tfd_size * TFD_QUEUE_SIZE_MAX; size_t tb0_buf_sz; int i; @@ -672,7 +722,7 @@ static void iwl_pcie_txq_free(struct iwl_trans *trans, int txq_id) /* De-alloc circular buffer of TFDs */ if (txq->tfds) { dma_free_coherent(dev, - sizeof(struct iwl_tfd) * TFD_QUEUE_SIZE_MAX, + trans_pcie->tfd_size * TFD_QUEUE_SIZE_MAX, txq->tfds, txq->q.dma_addr); txq->q.dma_addr = 0; txq->tfds = NULL; @@ -1616,8 +1666,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, copy_size - tb0_size, DMA_TO_DEVICE); if (dma_mapping_error(trans->dev, phys_addr)) { - iwl_pcie_tfd_unmap(trans, out_meta, - &txq->tfds[q->write_ptr]); + iwl_pcie_tfd_unmap(trans, out_meta, txq, q->write_ptr); idx = -ENOMEM; goto out; } @@ -1640,8 +1689,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, phys_addr = dma_map_single(trans->dev, (void *)data, cmdlen[i], DMA_TO_DEVICE); if (dma_mapping_error(trans->dev, phys_addr)) { - iwl_pcie_tfd_unmap(trans, out_meta, - &txq->tfds[q->write_ptr]); + iwl_pcie_tfd_unmap(trans, out_meta, txq, q->write_ptr); idx = -ENOMEM; goto out; } @@ -1721,7 +1769,7 @@ void iwl_pcie_hcmd_complete(struct iwl_trans *trans, meta = &txq->entries[cmd_index].meta; cmd_id = iwl_cmd_id(cmd->hdr.cmd, group_id, 0); - iwl_pcie_tfd_unmap(trans, meta, &txq->tfds[index]); + iwl_pcie_tfd_unmap(trans, meta, txq, index); /* Input error checking is done when commands are added to queue. */ if (meta->flags & CMD_WANT_SKB) { @@ -1919,6 +1967,7 @@ static int iwl_fill_data_tbs(struct iwl_trans *trans, struct sk_buff *skb, struct iwl_cmd_meta *out_meta, struct iwl_device_cmd *dev_cmd, u16 tb1_len) { + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct iwl_queue *q = &txq->q; u16 tb2_len; int i; @@ -1934,8 +1983,7 @@ static int iwl_fill_data_tbs(struct iwl_trans *trans, struct sk_buff *skb, skb->data + hdr_len, tb2_len, DMA_TO_DEVICE); if (unlikely(dma_mapping_error(trans->dev, tb2_phys))) { - iwl_pcie_tfd_unmap(trans, out_meta, - &txq->tfds[q->write_ptr]); + iwl_pcie_tfd_unmap(trans, out_meta, txq, q->write_ptr); return -EINVAL; } iwl_pcie_txq_build_tfd(trans, txq, tb2_phys, tb2_len, false); @@ -1954,8 +2002,7 @@ static int iwl_fill_data_tbs(struct iwl_trans *trans, struct sk_buff *skb, skb_frag_size(frag), DMA_TO_DEVICE); if (unlikely(dma_mapping_error(trans->dev, tb_phys))) { - iwl_pcie_tfd_unmap(trans, out_meta, - &txq->tfds[q->write_ptr]); + iwl_pcie_tfd_unmap(trans, out_meta, txq, q->write_ptr); return -EINVAL; } tb_idx = iwl_pcie_txq_build_tfd(trans, txq, tb_phys, @@ -1965,8 +2012,8 @@ static int iwl_fill_data_tbs(struct iwl_trans *trans, struct sk_buff *skb, } trace_iwlwifi_dev_tx(trans->dev, skb, - &txq->tfds[txq->q.write_ptr], - sizeof(struct iwl_tfd), + iwl_pcie_get_tfd(trans_pcie, txq, q->write_ptr), + trans_pcie->tfd_size, &dev_cmd->hdr, IWL_FIRST_TB_SIZE + tb1_len, skb->data + hdr_len, tb2_len); trace_iwlwifi_dev_tx_data(trans->dev, skb, @@ -2041,8 +2088,8 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb, IEEE80211_CCMP_HDR_LEN : 0; trace_iwlwifi_dev_tx(trans->dev, skb, - &txq->tfds[txq->q.write_ptr], - sizeof(struct iwl_tfd), + iwl_pcie_get_tfd(trans_pcie, txq, q->write_ptr), + trans_pcie->tfd_size, &dev_cmd->hdr, IWL_FIRST_TB_SIZE + tb1_len, NULL, 0); @@ -2198,7 +2245,7 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb, return 0; out_unmap: - iwl_pcie_tfd_unmap(trans, out_meta, &txq->tfds[q->write_ptr]); + iwl_pcie_tfd_unmap(trans, out_meta, txq, q->write_ptr); return ret; } #else /* CONFIG_INET */ From 4fe10bc6038a6b3f6a025aca8fc9c6c23e046b1e Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Mon, 4 Jul 2016 14:34:26 +0300 Subject: [PATCH 1055/1715] iwlwifi: change byte count table for a000 devices Since TFD was enlarged to 256 bytes, the fetch of the TFD itself is very expensive. To make DRAM to SRAM more efficient, bits 12-13 will indicate the number of 64 byte chunks that should be transferred to SRAM. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-fh.h | 7 +++- drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 36 ++++++++++++++++---- 2 files changed, 35 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-fh.h b/drivers/net/wireless/intel/iwlwifi/iwl-fh.h index 98d2ff240ea5..33ef5372d195 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-fh.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-fh.h @@ -734,8 +734,13 @@ struct iwl_tfh_tfd { /** * struct iwlagn_schedq_bc_tbl scheduler byte count table * base physical address provided by SCD_DRAM_BASE_ADDR + * For devices up to a000: * @tfd_offset 0-12 - tx command byte count - * 12-16 - station index + * 12-16 - station index + * For a000 and on: + * @tfd_offset 0-12 - tx command byte count + * 12-13 - number of 64 byte chunks + * 14-16 - reserved */ struct iwlagn_scd_bc_tbl { __le16 tfd_offset[TFD_QUEUE_BC_SIZE]; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index f893ee111b46..b4b925ed5267 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -176,14 +176,14 @@ static void iwl_pcie_txq_stuck_timer(unsigned long data) * iwl_pcie_txq_update_byte_cnt_tbl - Set up entry in Tx byte-count array */ static void iwl_pcie_txq_update_byte_cnt_tbl(struct iwl_trans *trans, - struct iwl_txq *txq, u16 byte_cnt) + struct iwl_txq *txq, u16 byte_cnt, + int num_tbs) { struct iwlagn_scd_bc_tbl *scd_bc_tbl; struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); int write_ptr = txq->q.write_ptr; int txq_id = txq->q.id; u8 sec_ctl = 0; - u8 sta_id = 0; u16 len = byte_cnt + IWL_TX_CRC_SIZE + IWL_TX_DELIMITER_SIZE; __le16 bc_ent; struct iwl_tx_cmd *tx_cmd = @@ -191,7 +191,6 @@ static void iwl_pcie_txq_update_byte_cnt_tbl(struct iwl_trans *trans, scd_bc_tbl = trans_pcie->scd_bc_tbls.addr; - sta_id = tx_cmd->sta_id; sec_ctl = tx_cmd->sec_ctl; switch (sec_ctl & TX_CMD_SEC_MSK) { @@ -205,14 +204,32 @@ static void iwl_pcie_txq_update_byte_cnt_tbl(struct iwl_trans *trans, len += IEEE80211_WEP_IV_LEN + IEEE80211_WEP_ICV_LEN; break; } - if (trans_pcie->bc_table_dword) len = DIV_ROUND_UP(len, 4); if (WARN_ON(len > 0xFFF || write_ptr >= TFD_QUEUE_SIZE_MAX)) return; - bc_ent = cpu_to_le16(len | (sta_id << 12)); + if (trans->cfg->use_tfh) { + u8 filled_tfd_size = offsetof(struct iwl_tfh_tfd, tbs) + + num_tbs * sizeof(struct iwl_tfh_tb); + /* + * filled_tfd_size contains the number of filled bytes in the + * TFD. + * Dividing it by 64 will give the number of chunks to fetch + * to SRAM- 0 for one chunk, 1 for 2 and so on. + * If, for example, TFD contains only 3 TBs then 32 bytes + * of the TFD are used, and only one chunk of 64 bytes should + * be fetched + */ + u8 num_fetch_chunks = DIV_ROUND_UP(filled_tfd_size, 64) - 1; + + bc_ent = cpu_to_le16(len | (num_fetch_chunks << 12)); + } else { + u8 sta_id = tx_cmd->sta_id; + + bc_ent = cpu_to_le16(len | (sta_id << 12)); + } scd_bc_tbl[txq_id].tfd_offset[write_ptr] = bc_ent; @@ -240,6 +257,7 @@ static void iwl_pcie_txq_inval_byte_cnt_tbl(struct iwl_trans *trans, sta_id = tx_cmd->sta_id; bc_ent = cpu_to_le16(1 | (sta_id << 12)); + scd_bc_tbl[txq_id].tfd_offset[read_ptr] = bc_ent; if (read_ptr < TFD_QUEUE_SIZE_BC_DUP) @@ -1126,7 +1144,8 @@ void iwl_trans_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn, txq->entries[txq->q.read_ptr].skb = NULL; - iwl_pcie_txq_inval_byte_cnt_tbl(trans, txq); + if (!trans->cfg->use_tfh) + iwl_pcie_txq_inval_byte_cnt_tbl(trans, txq); iwl_pcie_txq_free_tfd(trans, txq); } @@ -2272,6 +2291,7 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb, struct iwl_queue *q; dma_addr_t tb0_phys, tb1_phys, scratch_phys; void *tb1_addr; + void *tfd; u16 len, tb1_len; bool wait_write_ptr; __le16 fc; @@ -2410,8 +2430,10 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb, goto out_err; } + tfd = iwl_pcie_get_tfd(trans_pcie, txq, q->write_ptr); /* Set up entry for this TFD in Tx byte-count array */ - iwl_pcie_txq_update_byte_cnt_tbl(trans, txq, le16_to_cpu(tx_cmd->len)); + iwl_pcie_txq_update_byte_cnt_tbl(trans, txq, le16_to_cpu(tx_cmd->len), + iwl_pcie_tfd_get_num_tbs(trans, tfd)); wait_write_ptr = ieee80211_has_morefrags(fc); From bb98ecd4d347f052887518293736ab70c9909b2a Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Thu, 7 Jul 2016 18:17:45 +0300 Subject: [PATCH 1056/1715] iwlwifi: pcie: merge iwl_queue and iwl_txq The original intent was to have the general iwl_queue shared between RX and TX queues, but it is not the actual status. Since it is not shared with any struct but iwl_txq, it adds unnecessary complexity. Merge those structs. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- .../wireless/intel/iwlwifi/pcie/internal.h | 80 ++++--- drivers/net/wireless/intel/iwlwifi/pcie/rx.c | 2 +- .../net/wireless/intel/iwlwifi/pcie/trans.c | 30 ++- drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 197 +++++++++--------- 4 files changed, 146 insertions(+), 163 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h index d185692676fd..cad2d1199321 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h +++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h @@ -195,39 +195,6 @@ struct iwl_cmd_meta { u32 tbs; }; -/* - * Generic queue structure - * - * Contains common data for Rx and Tx queues. - * - * Note the difference between TFD_QUEUE_SIZE_MAX and n_window: the hardware - * always assumes 256 descriptors, so TFD_QUEUE_SIZE_MAX is always 256 (unless - * there might be HW changes in the future). For the normal TX - * queues, n_window, which is the size of the software queue data - * is also 256; however, for the command queue, n_window is only - * 32 since we don't need so many commands pending. Since the HW - * still uses 256 BDs for DMA though, TFD_QUEUE_SIZE_MAX stays 256. As a result, - * the software buffers (in the variables @meta, @txb in struct - * iwl_txq) only have 32 entries, while the HW buffers (@tfds in - * the same struct) have 256. - * This means that we end up with the following: - * HW entries: | 0 | ... | N * 32 | ... | N * 32 + 31 | ... | 255 | - * SW entries: | 0 | ... | 31 | - * where N is a number between 0 and 7. This means that the SW - * data is a window overlayed over the HW queue. - */ -struct iwl_queue { - int write_ptr; /* 1-st empty entry (index) host_w*/ - int read_ptr; /* last used entry (index) host_r*/ - /* use for monitoring and recovering the stuck queue */ - dma_addr_t dma_addr; /* physical addr for BD's */ - int n_window; /* safe queue window */ - u32 id; - int low_mark; /* low watermark, resume queue if free - * space more than this */ - int high_mark; /* high watermark, stop queue if free - * space less than this */ -}; #define TFD_TX_CMD_SLOTS 256 #define TFD_CMD_SLOTS 32 @@ -274,12 +241,31 @@ struct iwl_pcie_first_tb_buf { * @wd_timeout: queue watchdog timeout (jiffies) - per queue * @frozen: tx stuck queue timer is frozen * @frozen_expiry_remainder: remember how long until the timer fires + * @write_ptr: 1-st empty entry (index) host_w + * @read_ptr: last used entry (index) host_r + * @dma_addr: physical addr for BD's + * @n_window: safe queue window + * @id: queue id + * @low_mark: low watermark, resume queue if free space more than this + * @high_mark: high watermark, stop queue if free space less than this * * A Tx queue consists of circular buffer of BDs (a.k.a. TFDs, transmit frame * descriptors) and required locking structures. + * + * Note the difference between TFD_QUEUE_SIZE_MAX and n_window: the hardware + * always assumes 256 descriptors, so TFD_QUEUE_SIZE_MAX is always 256 (unless + * there might be HW changes in the future). For the normal TX + * queues, n_window, which is the size of the software queue data + * is also 256; however, for the command queue, n_window is only + * 32 since we don't need so many commands pending. Since the HW + * still uses 256 BDs for DMA though, TFD_QUEUE_SIZE_MAX stays 256. + * This means that we end up with the following: + * HW entries: | 0 | ... | N * 32 | ... | N * 32 + 31 | ... | 255 | + * SW entries: | 0 | ... | 31 | + * where N is a number between 0 and 7. This means that the SW + * data is a window overlayed over the HW queue. */ struct iwl_txq { - struct iwl_queue q; void *tfds; struct iwl_pcie_first_tb_buf *first_tb_bufs; dma_addr_t first_tb_dma; @@ -295,6 +281,14 @@ struct iwl_txq { bool block; unsigned long wd_timeout; struct sk_buff_head overflow_q; + + int write_ptr; + int read_ptr; + dma_addr_t dma_addr; + int n_window; + u32 id; + int low_mark; + int high_mark; }; static inline dma_addr_t @@ -633,9 +627,9 @@ static inline void iwl_wake_queue(struct iwl_trans *trans, { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); - if (test_and_clear_bit(txq->q.id, trans_pcie->queue_stopped)) { - IWL_DEBUG_TX_QUEUES(trans, "Wake hwq %d\n", txq->q.id); - iwl_op_mode_queue_not_full(trans->op_mode, txq->q.id); + if (test_and_clear_bit(txq->id, trans_pcie->queue_stopped)) { + IWL_DEBUG_TX_QUEUES(trans, "Wake hwq %d\n", txq->id); + iwl_op_mode_queue_not_full(trans->op_mode, txq->id); } } @@ -644,22 +638,22 @@ static inline void iwl_stop_queue(struct iwl_trans *trans, { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); - if (!test_and_set_bit(txq->q.id, trans_pcie->queue_stopped)) { - iwl_op_mode_queue_full(trans->op_mode, txq->q.id); - IWL_DEBUG_TX_QUEUES(trans, "Stop hwq %d\n", txq->q.id); + if (!test_and_set_bit(txq->id, trans_pcie->queue_stopped)) { + iwl_op_mode_queue_full(trans->op_mode, txq->id); + IWL_DEBUG_TX_QUEUES(trans, "Stop hwq %d\n", txq->id); } else IWL_DEBUG_TX_QUEUES(trans, "hwq %d already stopped\n", - txq->q.id); + txq->id); } -static inline bool iwl_queue_used(const struct iwl_queue *q, int i) +static inline bool iwl_queue_used(const struct iwl_txq *q, int i) { return q->write_ptr >= q->read_ptr ? (i >= q->read_ptr && i < q->write_ptr) : !(i < q->read_ptr && i >= q->write_ptr); } -static inline u8 get_cmd_index(struct iwl_queue *q, u32 index) +static inline u8 get_cmd_index(struct iwl_txq *q, u32 index) { return index & (q->n_window - 1); } diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c index 5c36e6d00622..452387cd5df4 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c @@ -1142,7 +1142,7 @@ static void iwl_pcie_rx_handle_rb(struct iwl_trans *trans, sequence = le16_to_cpu(pkt->hdr.sequence); index = SEQ_TO_INDEX(sequence); - cmd_index = get_cmd_index(&txq->q, index); + cmd_index = get_cmd_index(txq, index); if (rxq->id == 0) iwl_op_mode_rx(trans->op_mode, &rxq->napi, diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index e908bb8e10b6..361ccf651e9b 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -1899,7 +1899,7 @@ static void iwl_trans_pcie_freeze_txq_timer(struct iwl_trans *trans, txq->frozen = freeze; - if (txq->q.read_ptr == txq->q.write_ptr) + if (txq->read_ptr == txq->write_ptr) goto next_queue; if (freeze) { @@ -1947,7 +1947,7 @@ static void iwl_trans_pcie_block_txq_ptrs(struct iwl_trans *trans, bool block) txq->block--; if (!txq->block) { iwl_write32(trans, HBUS_TARG_WRPTR, - txq->q.write_ptr | (i << 8)); + txq->write_ptr | (i << 8)); } } else if (block) { txq->block++; @@ -1967,14 +1967,14 @@ void iwl_trans_pcie_log_scd_error(struct iwl_trans *trans, struct iwl_txq *txq) int cnt; IWL_ERR(trans, "Current SW read_ptr %d write_ptr %d\n", - txq->q.read_ptr, txq->q.write_ptr); + txq->read_ptr, txq->write_ptr); if (trans->cfg->use_tfh) /* TODO: access new SCD registers and dump them */ return; scd_sram_addr = trans_pcie->scd_base_addr + - SCD_TX_STTS_QUEUE_OFFSET(txq->q.id); + SCD_TX_STTS_QUEUE_OFFSET(txq->id); iwl_trans_read_mem_bytes(trans, scd_sram_addr, buf, sizeof(buf)); iwl_print_hex_error(trans, buf, sizeof(buf)); @@ -2009,7 +2009,6 @@ static int iwl_trans_pcie_wait_txq_empty(struct iwl_trans *trans, u32 txq_bm) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct iwl_txq *txq; - struct iwl_queue *q; int cnt; unsigned long now = jiffies; int ret = 0; @@ -2027,13 +2026,12 @@ static int iwl_trans_pcie_wait_txq_empty(struct iwl_trans *trans, u32 txq_bm) IWL_DEBUG_TX_QUEUES(trans, "Emptying queue %d...\n", cnt); txq = &trans_pcie->txq[cnt]; - q = &txq->q; - wr_ptr = ACCESS_ONCE(q->write_ptr); + wr_ptr = ACCESS_ONCE(txq->write_ptr); - while (q->read_ptr != ACCESS_ONCE(q->write_ptr) && + while (txq->read_ptr != ACCESS_ONCE(txq->write_ptr) && !time_after(jiffies, now + msecs_to_jiffies(IWL_FLUSH_WAIT_MS))) { - u8 write_ptr = ACCESS_ONCE(q->write_ptr); + u8 write_ptr = ACCESS_ONCE(txq->write_ptr); if (WARN_ONCE(wr_ptr != write_ptr, "WR pointer moved while flushing %d -> %d\n", @@ -2042,7 +2040,7 @@ static int iwl_trans_pcie_wait_txq_empty(struct iwl_trans *trans, u32 txq_bm) usleep_range(1000, 2000); } - if (q->read_ptr != q->write_ptr) { + if (txq->read_ptr != txq->write_ptr) { IWL_ERR(trans, "fail to flush all tx fifo queues Q %d\n", cnt); ret = -ETIMEDOUT; @@ -2210,7 +2208,6 @@ static ssize_t iwl_dbgfs_tx_queue_read(struct file *file, struct iwl_trans *trans = file->private_data; struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct iwl_txq *txq; - struct iwl_queue *q; char *buf; int pos = 0; int cnt; @@ -2228,10 +2225,9 @@ static ssize_t iwl_dbgfs_tx_queue_read(struct file *file, for (cnt = 0; cnt < trans->cfg->base_params->num_of_queues; cnt++) { txq = &trans_pcie->txq[cnt]; - q = &txq->q; pos += scnprintf(buf + pos, bufsz - pos, "hwq %.2d: read=%u write=%u use=%d stop=%d need_update=%d frozen=%d%s\n", - cnt, q->read_ptr, q->write_ptr, + cnt, txq->read_ptr, txq->write_ptr, !!test_bit(cnt, trans_pcie->queue_used), !!test_bit(cnt, trans_pcie->queue_stopped), txq->need_update, txq->frozen, @@ -2659,7 +2655,7 @@ static struct iwl_trans_dump_data /* host commands */ len += sizeof(*data) + - cmdq->q.n_window * (sizeof(*txcmd) + TFD_MAX_PAYLOAD_SIZE); + cmdq->n_window * (sizeof(*txcmd) + TFD_MAX_PAYLOAD_SIZE); /* FW monitor */ if (trans_pcie->fw_mon_page) { @@ -2727,9 +2723,9 @@ static struct iwl_trans_dump_data data->type = cpu_to_le32(IWL_FW_ERROR_DUMP_TXCMD); txcmd = (void *)data->data; spin_lock_bh(&cmdq->lock); - ptr = cmdq->q.write_ptr; - for (i = 0; i < cmdq->q.n_window; i++) { - u8 idx = get_cmd_index(&cmdq->q, ptr); + ptr = cmdq->write_ptr; + for (i = 0; i < cmdq->n_window; i++) { + u8 idx = get_cmd_index(cmdq, ptr); u32 caplen, cmdlen; cmdlen = iwl_trans_pcie_get_cmdlen(trans, cmdq->tfds + diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index b4b925ed5267..0d156fddbf3d 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -71,7 +71,7 @@ * ***************************************************/ -static int iwl_queue_space(const struct iwl_queue *q) +static int iwl_queue_space(const struct iwl_txq *q) { unsigned int max; unsigned int used; @@ -102,7 +102,7 @@ static int iwl_queue_space(const struct iwl_queue *q) /* * iwl_queue_init - Initialize queue's high/low-water and read/write indexes */ -static int iwl_queue_init(struct iwl_queue *q, int slots_num, u32 id) +static int iwl_queue_init(struct iwl_txq *q, int slots_num, u32 id) { q->n_window = slots_num; q->id = id; @@ -158,13 +158,13 @@ static void iwl_pcie_txq_stuck_timer(unsigned long data) spin_lock(&txq->lock); /* check if triggered erroneously */ - if (txq->q.read_ptr == txq->q.write_ptr) { + if (txq->read_ptr == txq->write_ptr) { spin_unlock(&txq->lock); return; } spin_unlock(&txq->lock); - IWL_ERR(trans, "Queue %d stuck for %u ms.\n", txq->q.id, + IWL_ERR(trans, "Queue %d stuck for %u ms.\n", txq->id, jiffies_to_msecs(txq->wd_timeout)); iwl_trans_pcie_log_scd_error(trans, txq); @@ -181,13 +181,13 @@ static void iwl_pcie_txq_update_byte_cnt_tbl(struct iwl_trans *trans, { struct iwlagn_scd_bc_tbl *scd_bc_tbl; struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); - int write_ptr = txq->q.write_ptr; - int txq_id = txq->q.id; + int write_ptr = txq->write_ptr; + int txq_id = txq->id; u8 sec_ctl = 0; u16 len = byte_cnt + IWL_TX_CRC_SIZE + IWL_TX_DELIMITER_SIZE; __le16 bc_ent; struct iwl_tx_cmd *tx_cmd = - (void *) txq->entries[txq->q.write_ptr].cmd->payload; + (void *)txq->entries[txq->write_ptr].cmd->payload; scd_bc_tbl = trans_pcie->scd_bc_tbls.addr; @@ -244,12 +244,12 @@ static void iwl_pcie_txq_inval_byte_cnt_tbl(struct iwl_trans *trans, struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct iwlagn_scd_bc_tbl *scd_bc_tbl = trans_pcie->scd_bc_tbls.addr; - int txq_id = txq->q.id; - int read_ptr = txq->q.read_ptr; + int txq_id = txq->id; + int read_ptr = txq->read_ptr; u8 sta_id = 0; __le16 bc_ent; struct iwl_tx_cmd *tx_cmd = - (void *)txq->entries[txq->q.read_ptr].cmd->payload; + (void *)txq->entries[read_ptr].cmd->payload; WARN_ON(read_ptr >= TFD_QUEUE_SIZE_MAX); @@ -273,7 +273,7 @@ static void iwl_pcie_txq_inc_wr_ptr(struct iwl_trans *trans, { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); u32 reg = 0; - int txq_id = txq->q.id; + int txq_id = txq->id; lockdep_assert_held(&txq->lock); @@ -307,10 +307,10 @@ static void iwl_pcie_txq_inc_wr_ptr(struct iwl_trans *trans, * if not in power-save mode, uCode will never sleep when we're * trying to tx (during RFKILL, we're not trying to tx). */ - IWL_DEBUG_TX(trans, "Q:%d WR: 0x%x\n", txq_id, txq->q.write_ptr); + IWL_DEBUG_TX(trans, "Q:%d WR: 0x%x\n", txq_id, txq->write_ptr); if (!txq->block) iwl_write32(trans, HBUS_TARG_WRPTR, - txq->q.write_ptr | (txq_id << 8)); + txq->write_ptr | (txq_id << 8)); } void iwl_pcie_txq_check_wrptrs(struct iwl_trans *trans) @@ -462,8 +462,8 @@ static void iwl_pcie_txq_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq) /* rd_ptr is bounded by TFD_QUEUE_SIZE_MAX and * idx is bounded by n_window */ - int rd_ptr = txq->q.read_ptr; - int idx = get_cmd_index(&txq->q, rd_ptr); + int rd_ptr = txq->read_ptr; + int idx = get_cmd_index(txq, rd_ptr); lockdep_assert_held(&txq->lock); @@ -493,12 +493,10 @@ static int iwl_pcie_txq_build_tfd(struct iwl_trans *trans, struct iwl_txq *txq, dma_addr_t addr, u16 len, bool reset) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); - struct iwl_queue *q; void *tfd; u32 num_tbs; - q = &txq->q; - tfd = txq->tfds + trans_pcie->tfd_size * q->write_ptr; + tfd = txq->tfds + trans_pcie->tfd_size * txq->write_ptr; if (reset) memset(tfd, 0, trans_pcie->tfd_size); @@ -537,7 +535,7 @@ static int iwl_pcie_txq_alloc(struct iwl_trans *trans, (unsigned long)txq); txq->trans_pcie = trans_pcie; - txq->q.n_window = slots_num; + txq->n_window = slots_num; txq->entries = kcalloc(slots_num, sizeof(struct iwl_pcie_txq_entry), @@ -558,7 +556,7 @@ static int iwl_pcie_txq_alloc(struct iwl_trans *trans, /* Circular buffer of transmit frame descriptors (TFDs), * shared with device */ txq->tfds = dma_alloc_coherent(trans->dev, tfd_sz, - &txq->q.dma_addr, GFP_KERNEL); + &txq->dma_addr, GFP_KERNEL); if (!txq->tfds) goto error; @@ -572,11 +570,11 @@ static int iwl_pcie_txq_alloc(struct iwl_trans *trans, if (!txq->first_tb_bufs) goto err_free_tfds; - txq->q.id = txq_id; + txq->id = txq_id; return 0; err_free_tfds: - dma_free_coherent(trans->dev, tfd_sz, txq->tfds, txq->q.dma_addr); + dma_free_coherent(trans->dev, tfd_sz, txq->tfds, txq->dma_addr); error: if (txq->entries && txq_id == trans_pcie->cmd_queue) for (i = 0; i < slots_num; i++) @@ -600,7 +598,7 @@ static int iwl_pcie_txq_init(struct iwl_trans *trans, struct iwl_txq *txq, BUILD_BUG_ON(TFD_QUEUE_SIZE_MAX & (TFD_QUEUE_SIZE_MAX - 1)); /* Initialize queue's high/low-water marks, and head/tail indexes */ - ret = iwl_queue_init(&txq->q, slots_num, txq_id); + ret = iwl_queue_init(txq, slots_num, txq_id); if (ret) return ret; @@ -614,10 +612,10 @@ static int iwl_pcie_txq_init(struct iwl_trans *trans, struct iwl_txq *txq, if (trans->cfg->use_tfh) iwl_write_direct64(trans, FH_MEM_CBBC_QUEUE(trans, txq_id), - txq->q.dma_addr); + txq->dma_addr); else iwl_write_direct32(trans, FH_MEM_CBBC_QUEUE(trans, txq_id), - txq->q.dma_addr >> 8); + txq->dma_addr >> 8); return 0; } @@ -664,15 +662,14 @@ static void iwl_pcie_txq_unmap(struct iwl_trans *trans, int txq_id) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct iwl_txq *txq = &trans_pcie->txq[txq_id]; - struct iwl_queue *q = &txq->q; spin_lock_bh(&txq->lock); - while (q->write_ptr != q->read_ptr) { + while (txq->write_ptr != txq->read_ptr) { IWL_DEBUG_TX_REPLY(trans, "Q %d Free %d\n", - txq_id, q->read_ptr); + txq_id, txq->read_ptr); if (txq_id != trans_pcie->cmd_queue) { - struct sk_buff *skb = txq->entries[q->read_ptr].skb; + struct sk_buff *skb = txq->entries[txq->read_ptr].skb; if (WARN_ON_ONCE(!skb)) continue; @@ -680,15 +677,15 @@ static void iwl_pcie_txq_unmap(struct iwl_trans *trans, int txq_id) iwl_pcie_free_tso_page(trans_pcie, skb); } iwl_pcie_txq_free_tfd(trans, txq); - q->read_ptr = iwl_queue_inc_wrap(q->read_ptr); + txq->read_ptr = iwl_queue_inc_wrap(txq->read_ptr); - if (q->read_ptr == q->write_ptr) { + if (txq->read_ptr == txq->write_ptr) { unsigned long flags; spin_lock_irqsave(&trans_pcie->reg_lock, flags); if (txq_id != trans_pcie->cmd_queue) { IWL_DEBUG_RPM(trans, "Q %d - last tx freed\n", - q->id); + txq->id); iwl_trans_unref(trans); } else { iwl_pcie_clear_cmd_in_flight(trans); @@ -732,7 +729,7 @@ static void iwl_pcie_txq_free(struct iwl_trans *trans, int txq_id) /* De-alloc array of command/tx buffers */ if (txq_id == trans_pcie->cmd_queue) - for (i = 0; i < txq->q.n_window; i++) { + for (i = 0; i < txq->n_window; i++) { kzfree(txq->entries[i].cmd); kzfree(txq->entries[i].free_buf); } @@ -741,12 +738,12 @@ static void iwl_pcie_txq_free(struct iwl_trans *trans, int txq_id) if (txq->tfds) { dma_free_coherent(dev, trans_pcie->tfd_size * TFD_QUEUE_SIZE_MAX, - txq->tfds, txq->q.dma_addr); - txq->q.dma_addr = 0; + txq->tfds, txq->dma_addr); + txq->dma_addr = 0; txq->tfds = NULL; dma_free_coherent(dev, - sizeof(*txq->first_tb_bufs) * txq->q.n_window, + sizeof(*txq->first_tb_bufs) * txq->n_window, txq->first_tb_bufs, txq->first_tb_dma); } @@ -830,14 +827,14 @@ void iwl_trans_pcie_tx_reset(struct iwl_trans *trans) if (trans->cfg->use_tfh) iwl_write_direct64(trans, FH_MEM_CBBC_QUEUE(trans, txq_id), - txq->q.dma_addr); + txq->dma_addr); else iwl_write_direct32(trans, FH_MEM_CBBC_QUEUE(trans, txq_id), - txq->q.dma_addr >> 8); + txq->dma_addr >> 8); iwl_pcie_txq_unmap(trans, txq_id); - txq->q.read_ptr = 0; - txq->q.write_ptr = 0; + txq->read_ptr = 0; + txq->write_ptr = 0; } /* Tell NIC where to find the "keep warm" buffer */ @@ -1081,7 +1078,7 @@ static inline void iwl_pcie_txq_progress(struct iwl_txq *txq) * if empty delete timer, otherwise move timer forward * since we're making progress on this queue */ - if (txq->q.read_ptr == txq->q.write_ptr) + if (txq->read_ptr == txq->write_ptr) del_timer(&txq->stuck_timer); else mod_timer(&txq->stuck_timer, jiffies + txq->wd_timeout); @@ -1094,7 +1091,6 @@ void iwl_trans_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn, struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct iwl_txq *txq = &trans_pcie->txq[txq_id]; int tfd_num = ssn & (TFD_QUEUE_SIZE_MAX - 1); - struct iwl_queue *q = &txq->q; int last_to_free; /* This function is not meant to release cmd queue*/ @@ -1109,21 +1105,21 @@ void iwl_trans_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn, goto out; } - if (txq->q.read_ptr == tfd_num) + if (txq->read_ptr == tfd_num) goto out; IWL_DEBUG_TX_REPLY(trans, "[Q %d] %d -> %d (%d)\n", - txq_id, txq->q.read_ptr, tfd_num, ssn); + txq_id, txq->read_ptr, tfd_num, ssn); /*Since we free until index _not_ inclusive, the one before index is * the last we will free. This one must be used */ last_to_free = iwl_queue_dec_wrap(tfd_num); - if (!iwl_queue_used(q, last_to_free)) { + if (!iwl_queue_used(txq, last_to_free)) { IWL_ERR(trans, "%s: Read index for DMA queue txq id (%d), last_to_free %d is out of range [0-%d] %d %d.\n", __func__, txq_id, last_to_free, TFD_QUEUE_SIZE_MAX, - q->write_ptr, q->read_ptr); + txq->write_ptr, txq->read_ptr); goto out; } @@ -1131,9 +1127,9 @@ void iwl_trans_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn, goto out; for (; - q->read_ptr != tfd_num; - q->read_ptr = iwl_queue_inc_wrap(q->read_ptr)) { - struct sk_buff *skb = txq->entries[txq->q.read_ptr].skb; + txq->read_ptr != tfd_num; + txq->read_ptr = iwl_queue_inc_wrap(txq->read_ptr)) { + struct sk_buff *skb = txq->entries[txq->read_ptr].skb; if (WARN_ON_ONCE(!skb)) continue; @@ -1142,7 +1138,7 @@ void iwl_trans_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn, __skb_queue_tail(skbs, skb); - txq->entries[txq->q.read_ptr].skb = NULL; + txq->entries[txq->read_ptr].skb = NULL; if (!trans->cfg->use_tfh) iwl_pcie_txq_inval_byte_cnt_tbl(trans, txq); @@ -1152,7 +1148,7 @@ void iwl_trans_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn, iwl_pcie_txq_progress(txq); - if (iwl_queue_space(&txq->q) > txq->q.low_mark && + if (iwl_queue_space(txq) > txq->low_mark && test_bit(txq_id, trans_pcie->queue_stopped)) { struct sk_buff_head overflow_skbs; @@ -1184,12 +1180,12 @@ void iwl_trans_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn, } spin_lock_bh(&txq->lock); - if (iwl_queue_space(&txq->q) > txq->q.low_mark) + if (iwl_queue_space(txq) > txq->low_mark) iwl_wake_queue(trans, txq); } - if (q->read_ptr == q->write_ptr) { - IWL_DEBUG_RPM(trans, "Q %d - last tx reclaimed\n", q->id); + if (txq->read_ptr == txq->write_ptr) { + IWL_DEBUG_RPM(trans, "Q %d - last tx reclaimed\n", txq->id); iwl_trans_unref(trans); } @@ -1251,31 +1247,30 @@ static void iwl_pcie_cmdq_reclaim(struct iwl_trans *trans, int txq_id, int idx) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct iwl_txq *txq = &trans_pcie->txq[txq_id]; - struct iwl_queue *q = &txq->q; unsigned long flags; int nfreed = 0; lockdep_assert_held(&txq->lock); - if ((idx >= TFD_QUEUE_SIZE_MAX) || (!iwl_queue_used(q, idx))) { + if ((idx >= TFD_QUEUE_SIZE_MAX) || (!iwl_queue_used(txq, idx))) { IWL_ERR(trans, "%s: Read index for DMA queue txq id (%d), index %d is out of range [0-%d] %d %d.\n", __func__, txq_id, idx, TFD_QUEUE_SIZE_MAX, - q->write_ptr, q->read_ptr); + txq->write_ptr, txq->read_ptr); return; } - for (idx = iwl_queue_inc_wrap(idx); q->read_ptr != idx; - q->read_ptr = iwl_queue_inc_wrap(q->read_ptr)) { + for (idx = iwl_queue_inc_wrap(idx); txq->read_ptr != idx; + txq->read_ptr = iwl_queue_inc_wrap(txq->read_ptr)) { if (nfreed++ > 0) { IWL_ERR(trans, "HCMD skipped: index (%d) %d %d\n", - idx, q->write_ptr, q->read_ptr); + idx, txq->write_ptr, txq->read_ptr); iwl_force_nmi(trans); } } - if (q->read_ptr == q->write_ptr) { + if (txq->read_ptr == txq->write_ptr) { spin_lock_irqsave(&trans_pcie->reg_lock, flags); iwl_pcie_clear_cmd_in_flight(trans); spin_unlock_irqrestore(&trans_pcie->reg_lock, flags); @@ -1361,14 +1356,14 @@ void iwl_trans_pcie_txq_enable(struct iwl_trans *trans, int txq_id, u16 ssn, */ iwl_scd_txq_disable_agg(trans, txq_id); - ssn = txq->q.read_ptr; + ssn = txq->read_ptr; } } /* Place first TFD at index corresponding to start sequence number. * Assumes that ssn_idx is valid (!= 0xFFF) */ - txq->q.read_ptr = (ssn & 0xff); - txq->q.write_ptr = (ssn & 0xff); + txq->read_ptr = (ssn & 0xff); + txq->write_ptr = (ssn & 0xff); iwl_write_direct32(trans, HBUS_TARG_WRPTR, (ssn & 0xff) | (txq_id << 8)); @@ -1484,7 +1479,6 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct iwl_txq *txq = &trans_pcie->txq[trans_pcie->cmd_queue]; - struct iwl_queue *q = &txq->q; struct iwl_device_cmd *out_cmd; struct iwl_cmd_meta *out_meta; unsigned long flags; @@ -1583,7 +1577,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, spin_lock_bh(&txq->lock); - if (iwl_queue_space(q) < ((cmd->flags & CMD_ASYNC) ? 2 : 1)) { + if (iwl_queue_space(txq) < ((cmd->flags & CMD_ASYNC) ? 2 : 1)) { spin_unlock_bh(&txq->lock); IWL_ERR(trans, "No space in command queue\n"); @@ -1592,7 +1586,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, goto free_dup_buf; } - idx = get_cmd_index(q, q->write_ptr); + idx = get_cmd_index(txq, txq->write_ptr); out_cmd = txq->entries[idx].cmd; out_meta = &txq->entries[idx].meta; @@ -1611,7 +1605,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, out_cmd->hdr_wide.reserved = 0; out_cmd->hdr_wide.sequence = cpu_to_le16(QUEUE_TO_SEQ(trans_pcie->cmd_queue) | - INDEX_TO_SEQ(q->write_ptr)); + INDEX_TO_SEQ(txq->write_ptr)); cmd_pos = sizeof(struct iwl_cmd_header_wide); copy_size = sizeof(struct iwl_cmd_header_wide); @@ -1619,7 +1613,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, out_cmd->hdr.cmd = iwl_cmd_opcode(cmd->id); out_cmd->hdr.sequence = cpu_to_le16(QUEUE_TO_SEQ(trans_pcie->cmd_queue) | - INDEX_TO_SEQ(q->write_ptr)); + INDEX_TO_SEQ(txq->write_ptr)); out_cmd->hdr.group_id = 0; cmd_pos = sizeof(struct iwl_cmd_header); @@ -1669,7 +1663,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, iwl_get_cmd_string(trans, cmd->id), group_id, out_cmd->hdr.cmd, le16_to_cpu(out_cmd->hdr.sequence), - cmd_size, q->write_ptr, idx, trans_pcie->cmd_queue); + cmd_size, txq->write_ptr, idx, trans_pcie->cmd_queue); /* start the TFD with the minimum copy bytes */ tb0_size = min_t(int, copy_size, IWL_FIRST_TB_SIZE); @@ -1685,7 +1679,8 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, copy_size - tb0_size, DMA_TO_DEVICE); if (dma_mapping_error(trans->dev, phys_addr)) { - iwl_pcie_tfd_unmap(trans, out_meta, txq, q->write_ptr); + iwl_pcie_tfd_unmap(trans, out_meta, txq, + txq->write_ptr); idx = -ENOMEM; goto out; } @@ -1708,7 +1703,8 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, phys_addr = dma_map_single(trans->dev, (void *)data, cmdlen[i], DMA_TO_DEVICE); if (dma_mapping_error(trans->dev, phys_addr)) { - iwl_pcie_tfd_unmap(trans, out_meta, txq, q->write_ptr); + iwl_pcie_tfd_unmap(trans, out_meta, txq, + txq->write_ptr); idx = -ENOMEM; goto out; } @@ -1725,7 +1721,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, trace_iwlwifi_dev_hcmd(trans->dev, cmd, cmd_size, &out_cmd->hdr_wide); /* start timer if queue currently empty */ - if (q->read_ptr == q->write_ptr && txq->wd_timeout) + if (txq->read_ptr == txq->write_ptr && txq->wd_timeout) mod_timer(&txq->stuck_timer, jiffies + txq->wd_timeout); spin_lock_irqsave(&trans_pcie->reg_lock, flags); @@ -1737,7 +1733,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, } /* Increment and update queue's write index */ - q->write_ptr = iwl_queue_inc_wrap(q->write_ptr); + txq->write_ptr = iwl_queue_inc_wrap(txq->write_ptr); iwl_pcie_txq_inc_wr_ptr(trans, txq); spin_unlock_irqrestore(&trans_pcie->reg_lock, flags); @@ -1775,15 +1771,15 @@ void iwl_pcie_hcmd_complete(struct iwl_trans *trans, if (WARN(txq_id != trans_pcie->cmd_queue, "wrong command queue %d (should be %d), sequence 0x%X readp=%d writep=%d\n", txq_id, trans_pcie->cmd_queue, sequence, - trans_pcie->txq[trans_pcie->cmd_queue].q.read_ptr, - trans_pcie->txq[trans_pcie->cmd_queue].q.write_ptr)) { + trans_pcie->txq[trans_pcie->cmd_queue].read_ptr, + trans_pcie->txq[trans_pcie->cmd_queue].write_ptr)) { iwl_print_hex_error(trans, pkt, 32); return; } spin_lock_bh(&txq->lock); - cmd_index = get_cmd_index(&txq->q, index); + cmd_index = get_cmd_index(txq, index); cmd = txq->entries[cmd_index].cmd; meta = &txq->entries[cmd_index].meta; cmd_id = iwl_cmd_id(cmd->hdr.cmd, group_id, 0); @@ -1901,14 +1897,13 @@ static int iwl_pcie_send_hcmd_sync(struct iwl_trans *trans, HOST_COMPLETE_TIMEOUT); if (!ret) { struct iwl_txq *txq = &trans_pcie->txq[trans_pcie->cmd_queue]; - struct iwl_queue *q = &txq->q; IWL_ERR(trans, "Error sending %s: time out after %dms.\n", iwl_get_cmd_string(trans, cmd->id), jiffies_to_msecs(HOST_COMPLETE_TIMEOUT)); IWL_ERR(trans, "Current CMD queue read_ptr %d write_ptr %d\n", - q->read_ptr, q->write_ptr); + txq->read_ptr, txq->write_ptr); clear_bit(STATUS_SYNC_HCMD_ACTIVE, &trans->status); IWL_DEBUG_INFO(trans, "Clearing HCMD_ACTIVE for command %s\n", @@ -1987,7 +1982,6 @@ static int iwl_fill_data_tbs(struct iwl_trans *trans, struct sk_buff *skb, struct iwl_device_cmd *dev_cmd, u16 tb1_len) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); - struct iwl_queue *q = &txq->q; u16 tb2_len; int i; @@ -2002,7 +1996,8 @@ static int iwl_fill_data_tbs(struct iwl_trans *trans, struct sk_buff *skb, skb->data + hdr_len, tb2_len, DMA_TO_DEVICE); if (unlikely(dma_mapping_error(trans->dev, tb2_phys))) { - iwl_pcie_tfd_unmap(trans, out_meta, txq, q->write_ptr); + iwl_pcie_tfd_unmap(trans, out_meta, txq, + txq->write_ptr); return -EINVAL; } iwl_pcie_txq_build_tfd(trans, txq, tb2_phys, tb2_len, false); @@ -2021,7 +2016,8 @@ static int iwl_fill_data_tbs(struct iwl_trans *trans, struct sk_buff *skb, skb_frag_size(frag), DMA_TO_DEVICE); if (unlikely(dma_mapping_error(trans->dev, tb_phys))) { - iwl_pcie_tfd_unmap(trans, out_meta, txq, q->write_ptr); + iwl_pcie_tfd_unmap(trans, out_meta, txq, + txq->write_ptr); return -EINVAL; } tb_idx = iwl_pcie_txq_build_tfd(trans, txq, tb_phys, @@ -2031,7 +2027,7 @@ static int iwl_fill_data_tbs(struct iwl_trans *trans, struct sk_buff *skb, } trace_iwlwifi_dev_tx(trans->dev, skb, - iwl_pcie_get_tfd(trans_pcie, txq, q->write_ptr), + iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr), trans_pcie->tfd_size, &dev_cmd->hdr, IWL_FIRST_TB_SIZE + tb1_len, skb->data + hdr_len, tb2_len); @@ -2093,7 +2089,6 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb, struct ieee80211_hdr *hdr = (void *)skb->data; unsigned int snap_ip_tcp_hdrlen, ip_hdrlen, total_len, hdr_room; unsigned int mss = skb_shinfo(skb)->gso_size; - struct iwl_queue *q = &txq->q; u16 length, iv_len, amsdu_pad; u8 *start_hdr; struct iwl_tso_hdr_page *hdr_page; @@ -2107,7 +2102,7 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb, IEEE80211_CCMP_HDR_LEN : 0; trace_iwlwifi_dev_tx(trans->dev, skb, - iwl_pcie_get_tfd(trans_pcie, txq, q->write_ptr), + iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr), trans_pcie->tfd_size, &dev_cmd->hdr, IWL_FIRST_TB_SIZE + tb1_len, NULL, 0); @@ -2264,7 +2259,7 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb, return 0; out_unmap: - iwl_pcie_tfd_unmap(trans, out_meta, txq, q->write_ptr); + iwl_pcie_tfd_unmap(trans, out_meta, txq, txq->write_ptr); return ret; } #else /* CONFIG_INET */ @@ -2288,7 +2283,6 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb, struct iwl_tx_cmd *tx_cmd = (struct iwl_tx_cmd *)dev_cmd->payload; struct iwl_cmd_meta *out_meta; struct iwl_txq *txq; - struct iwl_queue *q; dma_addr_t tb0_phys, tb1_phys, scratch_phys; void *tb1_addr; void *tfd; @@ -2300,7 +2294,6 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb, bool amsdu; txq = &trans_pcie->txq[txq_id]; - q = &txq->q; if (WARN_ONCE(!test_bit(txq_id, trans_pcie->queue_used), "TX on unused queue %d\n", txq_id)) @@ -2335,11 +2328,11 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb, spin_lock(&txq->lock); - if (iwl_queue_space(q) < q->high_mark) { + if (iwl_queue_space(txq) < txq->high_mark) { iwl_stop_queue(trans, txq); /* don't put the packet on the ring, if there is no room */ - if (unlikely(iwl_queue_space(q) < 3)) { + if (unlikely(iwl_queue_space(txq) < 3)) { struct iwl_device_cmd **dev_cmd_ptr; dev_cmd_ptr = (void *)((u8 *)skb->cb + @@ -2360,19 +2353,19 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb, */ wifi_seq = IEEE80211_SEQ_TO_SN(le16_to_cpu(hdr->seq_ctrl)); WARN_ONCE(txq->ampdu && - (wifi_seq & 0xff) != q->write_ptr, + (wifi_seq & 0xff) != txq->write_ptr, "Q: %d WiFi Seq %d tfdNum %d", - txq_id, wifi_seq, q->write_ptr); + txq_id, wifi_seq, txq->write_ptr); /* Set up driver data for this TFD */ - txq->entries[q->write_ptr].skb = skb; - txq->entries[q->write_ptr].cmd = dev_cmd; + txq->entries[txq->write_ptr].skb = skb; + txq->entries[txq->write_ptr].cmd = dev_cmd; dev_cmd->hdr.sequence = cpu_to_le16((u16)(QUEUE_TO_SEQ(txq_id) | - INDEX_TO_SEQ(q->write_ptr))); + INDEX_TO_SEQ(txq->write_ptr))); - tb0_phys = iwl_pcie_get_first_tb_dma(txq, q->write_ptr); + tb0_phys = iwl_pcie_get_first_tb_dma(txq, txq->write_ptr); scratch_phys = tb0_phys + sizeof(struct iwl_cmd_header) + offsetof(struct iwl_tx_cmd, scratch); @@ -2380,7 +2373,7 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb, tx_cmd->dram_msb_ptr = iwl_get_dma_hi_addr(scratch_phys); /* Set up first empty entry in queue's array of Tx/cmd buffers */ - out_meta = &txq->entries[q->write_ptr].meta; + out_meta = &txq->entries[txq->write_ptr].meta; out_meta->flags = 0; /* @@ -2405,7 +2398,7 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb, } /* The first TB points to bi-directional DMA data */ - memcpy(&txq->first_tb_bufs[q->write_ptr], &dev_cmd->hdr, + memcpy(&txq->first_tb_bufs[txq->write_ptr], &dev_cmd->hdr, IWL_FIRST_TB_SIZE); iwl_pcie_txq_build_tfd(trans, txq, tb0_phys, IWL_FIRST_TB_SIZE, true); @@ -2430,7 +2423,7 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb, goto out_err; } - tfd = iwl_pcie_get_tfd(trans_pcie, txq, q->write_ptr); + tfd = iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr); /* Set up entry for this TFD in Tx byte-count array */ iwl_pcie_txq_update_byte_cnt_tbl(trans, txq, le16_to_cpu(tx_cmd->len), iwl_pcie_tfd_get_num_tbs(trans, tfd)); @@ -2438,7 +2431,7 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb, wait_write_ptr = ieee80211_has_morefrags(fc); /* start timer if queue currently empty */ - if (q->read_ptr == q->write_ptr) { + if (txq->read_ptr == txq->write_ptr) { if (txq->wd_timeout) { /* * If the TXQ is active, then set the timer, if not, @@ -2452,12 +2445,12 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb, else txq->frozen_expiry_remainder = txq->wd_timeout; } - IWL_DEBUG_RPM(trans, "Q: %d first tx - take ref\n", q->id); + IWL_DEBUG_RPM(trans, "Q: %d first tx - take ref\n", txq->id); iwl_trans_ref(trans); } /* Tell device the write index *just past* this latest filled TFD */ - q->write_ptr = iwl_queue_inc_wrap(q->write_ptr); + txq->write_ptr = iwl_queue_inc_wrap(txq->write_ptr); if (!wait_write_ptr) iwl_pcie_txq_inc_wr_ptr(trans, txq); From c46e7724bfe9bbff17bc468903a757248b99e4ed Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Sun, 17 Jul 2016 14:24:55 +0300 Subject: [PATCH 1057/1715] iwlwifi: mvm: support new BA notification response Support new format. TX response will not be sent anymore, so all needed data is in the BA response. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- .../wireless/intel/iwlwifi/mvm/fw-api-tx.h | 79 +++++++++ drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 159 ++++++++++++------ 2 files changed, 184 insertions(+), 54 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h index 6b4c63a5e625..0055a960238b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h @@ -577,6 +577,85 @@ struct iwl_mvm_ba_notif { u8 reserved1; } __packed; +/** + * struct iwl_mvm_compressed_ba_tfd - progress of a TFD queue + * @q_num: TFD queue number + * @tfd_index: Index of first un-acked frame in the TFD queue + */ +struct iwl_mvm_compressed_ba_tfd { + u8 q_num; + u8 reserved; + __le16 tfd_index; +} __packed; /* COMPRESSED_BA_TFD_API_S_VER_1 */ + +/** + * struct iwl_mvm_compressed_ba_ratid - progress of a RA TID queue + * @q_num: RA TID queue number + * @tid: TID of the queue + * @ssn: BA window current SSN + */ +struct iwl_mvm_compressed_ba_ratid { + u8 q_num; + u8 tid; + __le16 ssn; +} __packed; /* COMPRESSED_BA_RATID_API_S_VER_1 */ + +/* + * enum iwl_mvm_ba_resp_flags - TX aggregation status + * @IWL_MVM_BA_RESP_TX_AGG: generated due to BA + * @IWL_MVM_BA_RESP_TX_BAR: generated due to BA after BAR + * @IWL_MVM_BA_RESP_TX_AGG_FAIL: aggregation didn't receive BA + * @IWL_MVM_BA_RESP_TX_UNDERRUN: aggregation got underrun + * @IWL_MVM_BA_RESP_TX_BT_KILL: aggregation got BT-kill + * @IWL_MVM_BA_RESP_TX_DSP_TIMEOUT: aggregation didn't finish within the + * expected time + */ +enum iwl_mvm_ba_resp_flags { + IWL_MVM_BA_RESP_TX_AGG, + IWL_MVM_BA_RESP_TX_BAR, + IWL_MVM_BA_RESP_TX_AGG_FAIL, + IWL_MVM_BA_RESP_TX_UNDERRUN, + IWL_MVM_BA_RESP_TX_BT_KILL, + IWL_MVM_BA_RESP_TX_DSP_TIMEOUT +}; + +/** + * struct iwl_mvm_compressed_ba_notif - notifies about reception of BA + * ( BA_NOTIF = 0xc5 ) + * @flags: status flag, see the &iwl_mvm_ba_resp_flags + * @sta_id: Index of recipient (BA-sending) station in fw's station table + * @reduced_txp: power reduced according to TPC. This is the actual value and + * not a copy from the LQ command. Thus, if not the first rate was used + * for Tx-ing then this value will be set to 0 by FW. + * @initial_rate: TLC rate info, initial rate index, TLC table color + * @retry_cnt: retry count + * @query_byte_cnt: SCD query byte count + * @query_frame_cnt: SCD query frame count + * @txed: number of frames sent in the aggregation (all-TIDs) + * @done: number of frames that were Acked by the BA (all-TIDs) + * @wireless_time: Wireless-media time + * @tx_rate: the rate the aggregation was sent at + * @tfd_cnt: number of TFD-Q elements + * @ra_tid_cnt: number of RATID-Q elements + */ +struct iwl_mvm_compressed_ba_notif { + __le32 flags; + u8 sta_id; + u8 reduced_txp; + u8 initial_rate; + u8 retry_cnt; + __le32 query_byte_cnt; + __le16 query_frame_cnt; + __le16 txed; + __le16 done; + __le32 wireless_time; + __le32 tx_rate; + __le16 tfd_cnt; + __le16 ra_tid_cnt; + struct iwl_mvm_compressed_ba_tfd tfd[1]; + struct iwl_mvm_compressed_ba_ratid ra_tid[0]; +} __packed; /* COMPRESSED_BA_RES_API_S_VER_4 */ + /** * struct iwl_mac_beacon_cmd_v6 - beacon template command * @tx: the tx commands associated with the beacon frame diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index 62714709ba8f..96482a5de38c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -1584,41 +1584,16 @@ void iwl_mvm_rx_tx_cmd(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb) iwl_mvm_rx_tx_cmd_agg(mvm, pkt); } -static void iwl_mvm_tx_info_from_ba_notif(struct ieee80211_tx_info *info, - struct iwl_mvm_ba_notif *ba_notif, - struct iwl_mvm_tid_data *tid_data) +static void iwl_mvm_tx_reclaim(struct iwl_mvm *mvm, int sta_id, int tid, + int txq, int index, + struct ieee80211_tx_info *ba_info, u32 rate) { - info->flags |= IEEE80211_TX_STAT_AMPDU; - info->status.ampdu_ack_len = ba_notif->txed_2_done; - info->status.ampdu_len = ba_notif->txed; - iwl_mvm_hwrate_to_tx_status(tid_data->rate_n_flags, - info); - /* TODO: not accounted if the whole A-MPDU failed */ - info->status.tx_time = tid_data->tx_time; - info->status.status_driver_data[0] = - (void *)(uintptr_t)ba_notif->reduced_txp; - info->status.status_driver_data[1] = - (void *)(uintptr_t)tid_data->rate_n_flags; -} - -void iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb) -{ - struct iwl_rx_packet *pkt = rxb_addr(rxb); - struct iwl_mvm_ba_notif *ba_notif = (void *)pkt->data; struct sk_buff_head reclaimed_skbs; struct iwl_mvm_tid_data *tid_data; struct ieee80211_sta *sta; struct iwl_mvm_sta *mvmsta; struct sk_buff *skb; - int sta_id, tid, freed; - /* "flow" corresponds to Tx queue */ - u16 scd_flow = le16_to_cpu(ba_notif->scd_flow); - /* "ssn" is start of block-ack Tx window, corresponds to index - * (in Tx queue's circular buffer) of first TFD/frame in window */ - u16 ba_resp_scd_ssn = le16_to_cpu(ba_notif->scd_ssn); - - sta_id = ba_notif->sta_id; - tid = ba_notif->tid; + int freed; if (WARN_ONCE(sta_id >= IWL_MVM_STATION_COUNT || tid >= IWL_MAX_TID_COUNT, @@ -1638,10 +1613,10 @@ void iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb) mvmsta = iwl_mvm_sta_from_mac80211(sta); tid_data = &mvmsta->tid_data[tid]; - if (tid_data->txq_id != scd_flow) { + if (tid_data->txq_id != txq) { IWL_ERR(mvm, - "invalid BA notification: Q %d, tid %d, flow %d\n", - tid_data->txq_id, tid, scd_flow); + "invalid BA notification: Q %d, tid %d\n", + tid_data->txq_id, tid); rcu_read_unlock(); return; } @@ -1655,27 +1630,14 @@ void iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb) * block-ack window (we assume that they've been successfully * transmitted ... if not, it's too late anyway). */ - iwl_trans_reclaim(mvm->trans, scd_flow, ba_resp_scd_ssn, - &reclaimed_skbs); + iwl_trans_reclaim(mvm->trans, txq, index, &reclaimed_skbs); - IWL_DEBUG_TX_REPLY(mvm, - "BA_NOTIFICATION Received from %pM, sta_id = %d\n", - (u8 *)&ba_notif->sta_addr_lo32, - ba_notif->sta_id); - IWL_DEBUG_TX_REPLY(mvm, - "TID = %d, SeqCtl = %d, bitmap = 0x%llx, scd_flow = %d, scd_ssn = %d sent:%d, acked:%d\n", - ba_notif->tid, le16_to_cpu(ba_notif->seq_ctl), - (unsigned long long)le64_to_cpu(ba_notif->bitmap), - scd_flow, ba_resp_scd_ssn, ba_notif->txed, - ba_notif->txed_2_done); - - IWL_DEBUG_TX_REPLY(mvm, "reduced txp from ba notif %d\n", - ba_notif->reduced_txp); - tid_data->next_reclaimed = ba_resp_scd_ssn; + tid_data->next_reclaimed = index; iwl_mvm_check_ratid_empty(mvm, sta, tid); freed = 0; + ba_info->status.status_driver_data[1] = (void *)(uintptr_t)rate; skb_queue_walk(&reclaimed_skbs, skb) { struct ieee80211_hdr *hdr = (void *)skb->data; @@ -1697,8 +1659,12 @@ void iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb) /* this is the first skb we deliver in this batch */ /* put the rate scaling data there */ - if (freed == 1) - iwl_mvm_tx_info_from_ba_notif(info, ba_notif, tid_data); + if (freed == 1) { + info->flags |= IEEE80211_TX_STAT_AMPDU; + memcpy(&info->status, &ba_info->status, + sizeof(ba_info->status)); + iwl_mvm_hwrate_to_tx_status(rate, info); + } } spin_unlock_bh(&mvmsta->lock); @@ -1708,7 +1674,6 @@ void iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb) * Still it's important to update RS about sent vs. acked. */ if (skb_queue_empty(&reclaimed_skbs)) { - struct ieee80211_tx_info ba_info = {}; struct ieee80211_chanctx_conf *chanctx_conf = NULL; if (mvmsta->vif) @@ -1718,11 +1683,11 @@ void iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb) if (WARN_ON_ONCE(!chanctx_conf)) goto out; - ba_info.band = chanctx_conf->def.chan->band; - iwl_mvm_tx_info_from_ba_notif(&ba_info, ba_notif, tid_data); + ba_info->band = chanctx_conf->def.chan->band; + iwl_mvm_hwrate_to_tx_status(rate, ba_info); IWL_DEBUG_TX_REPLY(mvm, "No reclaim. Update rs directly\n"); - iwl_mvm_rs_tx_status(mvm, sta, tid, &ba_info, false); + iwl_mvm_rs_tx_status(mvm, sta, tid, ba_info, false); } out: @@ -1734,6 +1699,92 @@ out: } } +void iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb) +{ + struct iwl_rx_packet *pkt = rxb_addr(rxb); + int sta_id, tid, txq, index; + struct ieee80211_tx_info ba_info = {}; + struct iwl_mvm_ba_notif *ba_notif; + struct iwl_mvm_tid_data *tid_data; + struct iwl_mvm_sta *mvmsta; + + if (iwl_mvm_has_new_tx_api(mvm)) { + struct iwl_mvm_compressed_ba_notif *ba_res = + (void *)pkt->data; + + sta_id = ba_res->sta_id; + ba_info.status.ampdu_ack_len = (u8)le16_to_cpu(ba_res->done); + ba_info.status.ampdu_len = (u8)le16_to_cpu(ba_res->txed); + ba_info.status.tx_time = + (u16)le32_to_cpu(ba_res->wireless_time); + ba_info.status.status_driver_data[0] = + (void *)(uintptr_t)ba_res->reduced_txp; + + /* + * TODO: + * When supporting multi TID aggregations - we need to move + * next_reclaimed to be per TXQ and not per TID or handle it + * in a different way. + * This will go together with SN and AddBA offload and cannot + * be handled properly for now. + */ + WARN_ON(le16_to_cpu(ba_res->tfd_cnt) != 1); + iwl_mvm_tx_reclaim(mvm, sta_id, ba_res->ra_tid[0].tid, + (int)ba_res->tfd[0].q_num, + le16_to_cpu(ba_res->tfd[0].tfd_index), + &ba_info, le32_to_cpu(ba_res->tx_rate)); + + IWL_DEBUG_TX_REPLY(mvm, + "BA_NOTIFICATION Received from sta_id = %d, flags %x, sent:%d, acked:%d\n", + sta_id, le32_to_cpu(ba_res->flags), + le16_to_cpu(ba_res->txed), + le16_to_cpu(ba_res->done)); + return; + } + + ba_notif = (void *)pkt->data; + sta_id = ba_notif->sta_id; + tid = ba_notif->tid; + /* "flow" corresponds to Tx queue */ + txq = le16_to_cpu(ba_notif->scd_flow); + /* "ssn" is start of block-ack Tx window, corresponds to index + * (in Tx queue's circular buffer) of first TFD/frame in window */ + index = le16_to_cpu(ba_notif->scd_ssn); + + rcu_read_lock(); + mvmsta = iwl_mvm_sta_from_staid_rcu(mvm, sta_id); + if (WARN_ON_ONCE(!mvmsta)) { + rcu_read_unlock(); + return; + } + + tid_data = &mvmsta->tid_data[tid]; + + ba_info.status.ampdu_ack_len = ba_notif->txed_2_done; + ba_info.status.ampdu_len = ba_notif->txed; + ba_info.status.tx_time = tid_data->tx_time; + ba_info.status.status_driver_data[0] = + (void *)(uintptr_t)ba_notif->reduced_txp; + + rcu_read_unlock(); + + iwl_mvm_tx_reclaim(mvm, sta_id, tid, txq, index, &ba_info, + tid_data->rate_n_flags); + + IWL_DEBUG_TX_REPLY(mvm, + "BA_NOTIFICATION Received from %pM, sta_id = %d\n", + (u8 *)&ba_notif->sta_addr_lo32, ba_notif->sta_id); + + IWL_DEBUG_TX_REPLY(mvm, + "TID = %d, SeqCtl = %d, bitmap = 0x%llx, scd_flow = %d, scd_ssn = %d sent:%d, acked:%d\n", + ba_notif->tid, le16_to_cpu(ba_notif->seq_ctl), + le64_to_cpu(ba_notif->bitmap), txq, index, + ba_notif->txed, ba_notif->txed_2_done); + + IWL_DEBUG_TX_REPLY(mvm, "reduced txp from ba notif %d\n", + ba_notif->reduced_txp); +} + /* * Note that there are transports that buffer frames before they reach * the firmware. This means that after flush_tx_path is called, the From 496d83caf354a84a3159c3b7aee9276640982e56 Mon Sep 17 00:00:00 2001 From: Haim Dreyfuss Date: Sun, 20 Mar 2016 17:57:22 +0200 Subject: [PATCH 1058/1715] iwlwifi: pcie: Configure shared interrupt vector in MSIX mode In case the OS provides fewer interrupts than requested, different causes will share the same interrupt vector as follow: 1.One interrupt less: non rx causes shared with FBQ. 2.Two interrupts less: non rx causes shared with FBQ and RSS. 3.More than two interrupts: we will use fewer RSS queues. Also make the request depend on the number of online CPUs instead of possible CPUs. Signed-off-by: Haim Dreyfuss Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-csr.h | 2 + .../wireless/intel/iwlwifi/pcie/internal.h | 21 ++- drivers/net/wireless/intel/iwlwifi/pcie/rx.c | 14 ++ .../net/wireless/intel/iwlwifi/pcie/trans.c | 154 ++++++++++++------ 4 files changed, 134 insertions(+), 57 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h index 871ad02fdb17..d73e9d436027 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h @@ -589,6 +589,8 @@ enum dtd_diode_reg { * Causes for the FH register interrupts */ enum msix_fh_int_causes { + MSIX_FH_INT_CAUSES_Q0 = BIT(0), + MSIX_FH_INT_CAUSES_Q1 = BIT(1), MSIX_FH_INT_CAUSES_D2S_CH0_NUM = BIT(16), MSIX_FH_INT_CAUSES_D2S_CH1_NUM = BIT(17), MSIX_FH_INT_CAUSES_S2D = BIT(19), diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h index cad2d1199321..8ad92fac8592 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h +++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h @@ -303,6 +303,16 @@ struct iwl_tso_hdr_page { u8 *pos; }; +/** + * enum iwl_shared_irq_flags - level of sharing for irq + * @IWL_SHARED_IRQ_NON_RX: interrupt vector serves non rx causes. + * @IWL_SHARED_IRQ_FIRST_RSS: interrupt vector serves first RSS queue. + */ +enum iwl_shared_irq_flags { + IWL_SHARED_IRQ_NON_RX = BIT(0), + IWL_SHARED_IRQ_FIRST_RSS = BIT(1), +}; + /** * struct iwl_trans_pcie - PCIe transport specific data * @rxq: all the RX queue data @@ -333,8 +343,10 @@ struct iwl_tso_hdr_page { * @fw_mon_size: size of the buffer for the firmware monitor * @msix_entries: array of MSI-X entries * @msix_enabled: true if managed to enable MSI-X - * @allocated_vector: the number of interrupt vector allocated by the OS - * @default_irq_num: default irq for non rx interrupt + * @shared_vec_mask: the type of causes the shared vector handles + * (see iwl_shared_irq_flags). + * @alloc_vecs: the number of interrupt vectors allocated by the OS + * @def_irq: default irq for non rx causes * @fh_init_mask: initial unmasked fh causes * @hw_init_mask: initial unmasked hw causes * @fh_mask: current unmasked fh causes @@ -407,8 +419,9 @@ struct iwl_trans_pcie { struct msix_entry msix_entries[IWL_MAX_RX_HW_QUEUES]; bool msix_enabled; - u32 allocated_vector; - u32 default_irq_num; + u8 shared_vec_mask; + u32 alloc_vecs; + u32 def_irq; u32 fh_init_mask; u32 hw_init_mask; u32 fh_mask; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c index 452387cd5df4..0b5b331010fb 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c @@ -1885,6 +1885,20 @@ irqreturn_t iwl_pcie_irq_msix_handler(int irq, void *dev_id) inta_fh, iwl_read32(trans, CSR_MSIX_FH_INT_MASK_AD)); + if ((trans_pcie->shared_vec_mask & IWL_SHARED_IRQ_NON_RX) && + inta_fh & MSIX_FH_INT_CAUSES_Q0) { + local_bh_disable(); + iwl_pcie_rx_handle(trans, 0); + local_bh_enable(); + } + + if ((trans_pcie->shared_vec_mask & IWL_SHARED_IRQ_FIRST_RSS) && + inta_fh & MSIX_FH_INT_CAUSES_Q1) { + local_bh_disable(); + iwl_pcie_rx_handle(trans, 1); + local_bh_enable(); + } + /* This "Tx" DMA channel is used only for loading uCode */ if (inta_fh & MSIX_FH_INT_CAUSES_D2S_CH0_NUM) { IWL_DEBUG_ISR(trans, "uCode load interrupt\n"); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 361ccf651e9b..be32fe1683f5 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -1170,7 +1170,7 @@ static void iwl_pcie_synchronize_irqs(struct iwl_trans *trans) if (trans_pcie->msix_enabled) { int i; - for (i = 0; i < trans_pcie->allocated_vector; i++) + for (i = 0; i < trans_pcie->alloc_vecs; i++) synchronize_irq(trans_pcie->msix_entries[i].vector); } else { synchronize_irq(trans_pcie->pci_dev->irq); @@ -1429,13 +1429,58 @@ static struct iwl_causes_list causes_list[] = { {MSIX_HW_INT_CAUSES_REG_HAP, CSR_MSIX_HW_INT_MASK_AD, 0x2E}, }; +static void iwl_pcie_map_non_rx_causes(struct iwl_trans *trans) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + int val = trans_pcie->def_irq | MSIX_NON_AUTO_CLEAR_CAUSE; + int i; + + /* + * Access all non RX causes and map them to the default irq. + * In case we are missing at least one interrupt vector, + * the first interrupt vector will serve non-RX and FBQ causes. + */ + for (i = 0; i < ARRAY_SIZE(causes_list); i++) { + iwl_write8(trans, CSR_MSIX_IVAR(causes_list[i].addr), val); + iwl_clear_bit(trans, causes_list[i].mask_reg, + causes_list[i].cause_num); + } +} + +static void iwl_pcie_map_rx_causes(struct iwl_trans *trans) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + u32 offset = + trans_pcie->shared_vec_mask & IWL_SHARED_IRQ_FIRST_RSS ? 1 : 0; + u32 val, idx; + + /* + * The first RX queue - fallback queue, which is designated for + * management frame, command responses etc, is always mapped to the + * first interrupt vector. The other RX queues are mapped to + * the other (N - 2) interrupt vectors. + */ + val = BIT(MSIX_FH_INT_CAUSES_Q(0)); + for (idx = 1; idx < trans->num_rx_queues; idx++) { + iwl_write8(trans, CSR_MSIX_RX_IVAR(idx), + MSIX_FH_INT_CAUSES_Q(idx - offset)); + val |= BIT(MSIX_FH_INT_CAUSES_Q(idx)); + } + iwl_write32(trans, CSR_MSIX_FH_INT_MASK_AD, ~val); + + val = MSIX_FH_INT_CAUSES_Q(0); + if (trans_pcie->shared_vec_mask & IWL_SHARED_IRQ_NON_RX) + val |= MSIX_NON_AUTO_CLEAR_CAUSE; + iwl_write8(trans, CSR_MSIX_RX_IVAR(0), val); + + if (trans_pcie->shared_vec_mask & IWL_SHARED_IRQ_FIRST_RSS) + iwl_write8(trans, CSR_MSIX_RX_IVAR(1), val); +} + static void iwl_pcie_init_msix(struct iwl_trans_pcie *trans_pcie) { - u32 val, max_rx_vector, i; struct iwl_trans *trans = trans_pcie->trans; - max_rx_vector = trans_pcie->allocated_vector - 1; - if (!trans_pcie->msix_enabled) { if (trans->cfg->mq_rx_supported) iwl_write_prph(trans, UREG_CHICK, @@ -1446,25 +1491,16 @@ static void iwl_pcie_init_msix(struct iwl_trans_pcie *trans_pcie) iwl_write_prph(trans, UREG_CHICK, UREG_CHICK_MSIX_ENABLE); /* - * Each cause from the list above and the RX causes is represented as - * a byte in the IVAR table. We access the first (N - 1) bytes and map - * them to the (N - 1) vectors so these vectors will be used as rx - * vectors. Then access all non rx causes and map them to the - * default queue (N'th queue). + * Each cause from the causes list above and the RX causes is + * represented as a byte in the IVAR table. The first nibble + * represents the bound interrupt vector of the cause, the second + * represents no auto clear for this cause. This will be set if its + * interrupt vector is bound to serve other causes. */ - for (i = 0; i < max_rx_vector; i++) { - iwl_write8(trans, CSR_MSIX_RX_IVAR(i), MSIX_FH_INT_CAUSES_Q(i)); - iwl_clear_bit(trans, CSR_MSIX_FH_INT_MASK_AD, - BIT(MSIX_FH_INT_CAUSES_Q(i))); - } + iwl_pcie_map_rx_causes(trans); + + iwl_pcie_map_non_rx_causes(trans); - for (i = 0; i < ARRAY_SIZE(causes_list); i++) { - val = trans_pcie->default_irq_num | - MSIX_NON_AUTO_CLEAR_CAUSE; - iwl_write8(trans, CSR_MSIX_IVAR(causes_list[i].addr), val); - iwl_clear_bit(trans, causes_list[i].mask_reg, - causes_list[i].cause_num); - } trans_pcie->fh_init_mask = ~iwl_read32(trans, CSR_MSIX_FH_INT_MASK_AD); trans_pcie->fh_mask = trans_pcie->fh_init_mask; @@ -1477,9 +1513,8 @@ static void iwl_pcie_set_interrupt_capa(struct pci_dev *pdev, struct iwl_trans *trans) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + int max_vector, nvec, i; u16 pci_cmd; - int max_vector; - int ret, i; if (trans->cfg->mq_rx_supported) { max_vector = min_t(u32, (num_possible_cpus() + 2), @@ -1487,33 +1522,48 @@ static void iwl_pcie_set_interrupt_capa(struct pci_dev *pdev, for (i = 0; i < max_vector; i++) trans_pcie->msix_entries[i].entry = i; - ret = pci_enable_msix_range(pdev, trans_pcie->msix_entries, - MSIX_MIN_INTERRUPT_VECTORS, - max_vector); - if (ret > 1) { + nvec = pci_enable_msix_range(pdev, trans_pcie->msix_entries, + MSIX_MIN_INTERRUPT_VECTORS, + max_vector); + if (nvec < 0) { IWL_DEBUG_INFO(trans, - "Enable MSI-X allocate %d interrupt vector\n", - ret); - trans_pcie->allocated_vector = ret; - trans_pcie->default_irq_num = - trans_pcie->allocated_vector - 1; - trans_pcie->trans->num_rx_queues = - trans_pcie->allocated_vector - 1; - trans_pcie->msix_enabled = true; - - return; + "ret = %d failed to enable msi-x mode move to msi mode\n", + nvec); + goto msi; } - IWL_DEBUG_INFO(trans, - "ret = %d %s move to msi mode\n", ret, - (ret == 1) ? - "can't allocate more than 1 interrupt vector" : - "failed to enable msi-x mode"); - pci_disable_msix(pdev); - } - ret = pci_enable_msi(pdev); - if (ret) { - dev_err(&pdev->dev, "pci_enable_msi failed - %d\n", ret); + IWL_DEBUG_INFO(trans, + "Enable MSI-X allocate %d interrupt vector\n", + nvec); + trans_pcie->def_irq = (nvec == max_vector) ? nvec - 1 : 0; + /* + * In case the OS provides fewer interrupts than requested, + * different causes will share the same interrupt vector + * as follow: + * One interrupt less: non rx causes shared with FBQ. + * Two interrupts less: non rx causes shared with FBQ and RSS. + * More than two interrupts: we will use fewer RSS queues. + */ + if (nvec <= num_online_cpus()) { + trans_pcie->trans->num_rx_queues = nvec + 1; + trans_pcie->shared_vec_mask = IWL_SHARED_IRQ_NON_RX | + IWL_SHARED_IRQ_FIRST_RSS; + } else if (nvec == num_online_cpus() + 1) { + trans_pcie->trans->num_rx_queues = nvec; + trans_pcie->shared_vec_mask = IWL_SHARED_IRQ_NON_RX; + } else { + trans_pcie->trans->num_rx_queues = nvec - 1; + } + + trans_pcie->alloc_vecs = nvec; + trans_pcie->msix_enabled = true; + return; + } +msi: + + nvec = pci_enable_msi(pdev); + if (nvec) { + dev_err(&pdev->dev, "pci_enable_msi failed - %d\n", nvec); /* enable rfkill interrupt: hw bug w/a */ pci_read_config_word(pdev, PCI_COMMAND, &pci_cmd); if (pci_cmd & PCI_COMMAND_INTX_DISABLE) { @@ -1526,16 +1576,14 @@ static void iwl_pcie_set_interrupt_capa(struct pci_dev *pdev, static int iwl_pcie_init_msix_handler(struct pci_dev *pdev, struct iwl_trans_pcie *trans_pcie) { - int i, last_vector; + int i; - last_vector = trans_pcie->trans->num_rx_queues; - - for (i = 0; i < trans_pcie->allocated_vector; i++) { + for (i = 0; i < trans_pcie->alloc_vecs; i++) { int ret; ret = request_threaded_irq(trans_pcie->msix_entries[i].vector, iwl_pcie_msix_isr, - (i == last_vector) ? + (i == trans_pcie->def_irq) ? iwl_pcie_irq_msix_handler : iwl_pcie_irq_rx_msix_handler, IRQF_SHARED, @@ -1712,7 +1760,7 @@ void iwl_trans_pcie_free(struct iwl_trans *trans) iwl_pcie_rx_free(trans); if (trans_pcie->msix_enabled) { - for (i = 0; i < trans_pcie->allocated_vector; i++) + for (i = 0; i < trans_pcie->alloc_vecs; i++) free_irq(trans_pcie->msix_entries[i].vector, &trans_pcie->msix_entries[i]); From 7585c354637bb003ce612dd22f5047c015545ef4 Mon Sep 17 00:00:00 2001 From: Liad Kaufman Date: Thu, 7 Jul 2016 11:00:26 +0300 Subject: [PATCH 1059/1715] iwlwifi: mvm: fix pending frames tracking on tx resp In iwl_mvm_rx_tx_cmd_single(), when checking if a given TID is aggregated, the driver doesn't check whether or not the queue itself can be aggregated. For example, a management queue might be marked as aggregated if TID 0 is aggregated on a (different) data queue. Make sure that mgmt frames are sent with TID IWL_TID_NON_QOS, and in this way make sure no mixups of this sort happen. Fixes: commit 24afba7690e4 ("iwlwifi: mvm: support bss dynamic alloc/dealloc of queues") Signed-off-by: Liad Kaufman Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index 96482a5de38c..10517778faf4 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -920,9 +920,13 @@ static int iwl_mvm_tx_mpdu(struct iwl_mvm *mvm, struct sk_buff *skb, tid = IWL_MAX_TID_COUNT; } - if (iwl_mvm_is_dqa_supported(mvm)) + if (iwl_mvm_is_dqa_supported(mvm)) { txq_id = mvmsta->tid_data[tid].txq_id; + if (ieee80211_is_mgmt(fc)) + tx_cmd->tid_tspec = IWL_TID_NON_QOS; + } + /* Copy MAC header from skb into command buffer */ memcpy(tx_cmd->hdr, hdr, hdrlen); From a0315dea9091d1ebc1534f6129b3fc9942b8ca99 Mon Sep 17 00:00:00 2001 From: Liad Kaufman Date: Thu, 7 Jul 2016 13:25:59 +0300 Subject: [PATCH 1060/1715] iwlwifi: mvm: free reserved queue on STA removal When a STA is removed in DQA mode, if no traffic went through its reserved queue, the txq continues to be marked as reserved and no STA can use it. Make sure that in such a case the reserved queue is marked as free when the STA is removed. Fixes: commit 24afba7690e4 ("iwlwifi: mvm: support bss dynamic alloc/dealloc of queues") Signed-off-by: Liad Kaufman Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 24 +++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index a92e12edeb3a..11c7e1591b3b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -1496,9 +1496,31 @@ int iwl_mvm_rm_sta(struct iwl_mvm *mvm, ret = iwl_mvm_drain_sta(mvm, mvm_sta, false); /* If DQA is supported - the queues can be disabled now */ - if (iwl_mvm_is_dqa_supported(mvm)) + if (iwl_mvm_is_dqa_supported(mvm)) { + u8 reserved_txq = mvm_sta->reserved_queue; + enum iwl_mvm_queue_status *status; + iwl_mvm_disable_sta_queues(mvm, vif, mvm_sta); + /* + * If no traffic has gone through the reserved TXQ - it + * is still marked as IWL_MVM_QUEUE_RESERVED, and + * should be manually marked as free again + */ + spin_lock_bh(&mvm->queue_info_lock); + status = &mvm->queue_info[reserved_txq].status; + if (WARN((*status != IWL_MVM_QUEUE_RESERVED) && + (*status != IWL_MVM_QUEUE_FREE), + "sta_id %d reserved txq %d status %d", + mvm_sta->sta_id, reserved_txq, *status)) { + spin_unlock_bh(&mvm->queue_info_lock); + return -EINVAL; + } + + *status = IWL_MVM_QUEUE_FREE; + spin_unlock_bh(&mvm->queue_info_lock); + } + if (vif->type == NL80211_IFTYPE_STATION && mvmvif->ap_sta_id == mvm_sta->sta_id) { /* if associated - we can't remove the AP STA now */ From 7c8d91eb312f30e96db04c710988394242a83565 Mon Sep 17 00:00:00 2001 From: Haim Dreyfuss Date: Sun, 13 Mar 2016 17:51:59 +0200 Subject: [PATCH 1061/1715] iwlwifi: pcie: Set affinity mask for rx interrupt vectors per cpu In order to utilize the host's CPUs in the most efficient way we bind each rx interrupt vector to each CPU on the host. Each rx interrupt is prioritized to execute only on the designated CPU rather than any CPU. Processor affinity takes advantage of the fact that some remnants of a process that was run on a given processor may remain in that processor's memory state for example, data in the CPU cache after another process is run on that CPU. Scheduling that process to execute on the same processor could result in an efficient use of process by reducing performance-degrading situations such as cache misses and parallel processing. Signed-off-by: Haim Dreyfuss Signed-off-by: Luca Coelho --- .../wireless/intel/iwlwifi/pcie/internal.h | 2 ++ .../net/wireless/intel/iwlwifi/pcie/trans.c | 32 ++++++++++++++++++- 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h index 8ad92fac8592..987a0770fb5b 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h +++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h @@ -37,6 +37,7 @@ #include #include #include +#include #include "iwl-fh.h" #include "iwl-csr.h" @@ -426,6 +427,7 @@ struct iwl_trans_pcie { u32 hw_init_mask; u32 fh_mask; u32 hw_mask; + cpumask_t affinity_mask[IWL_MAX_RX_HW_QUEUES]; }; static inline struct iwl_trans_pcie * diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index be32fe1683f5..d7521c1d90ec 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -1573,6 +1573,30 @@ msi: } } +static void iwl_pcie_irq_set_affinity(struct iwl_trans *trans) +{ + int iter_rx_q, i, ret, cpu, offset; + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + + i = trans_pcie->shared_vec_mask & IWL_SHARED_IRQ_FIRST_RSS ? 0 : 1; + iter_rx_q = trans_pcie->trans->num_rx_queues - 1 + i; + offset = 1 + i; + for (; i < iter_rx_q ; i++) { + /* + * Get the cpu prior to the place to search + * (i.e. return will be > i - 1). + */ + cpu = cpumask_next(i - offset, cpu_online_mask); + cpumask_set_cpu(cpu, &trans_pcie->affinity_mask[i]); + ret = irq_set_affinity_hint(trans_pcie->msix_entries[i].vector, + &trans_pcie->affinity_mask[i]); + if (ret) + IWL_ERR(trans_pcie->trans, + "Failed to set affinity mask for IRQ %d\n", + i); + } +} + static int iwl_pcie_init_msix_handler(struct pci_dev *pdev, struct iwl_trans_pcie *trans_pcie) { @@ -1601,6 +1625,7 @@ static int iwl_pcie_init_msix_handler(struct pci_dev *pdev, return ret; } } + iwl_pcie_irq_set_affinity(trans_pcie->trans); return 0; } @@ -1760,9 +1785,14 @@ void iwl_trans_pcie_free(struct iwl_trans *trans) iwl_pcie_rx_free(trans); if (trans_pcie->msix_enabled) { - for (i = 0; i < trans_pcie->alloc_vecs; i++) + for (i = 0; i < trans_pcie->alloc_vecs; i++) { + irq_set_affinity_hint( + trans_pcie->msix_entries[i].vector, + NULL); + free_irq(trans_pcie->msix_entries[i].vector, &trans_pcie->msix_entries[i]); + } pci_disable_msix(trans_pcie->pci_dev); trans_pcie->msix_enabled = false; From 89e4ad53ae53e97a81c0bb1feeb40bc69d87d13c Mon Sep 17 00:00:00 2001 From: Oren Givon Date: Thu, 7 Jul 2016 10:40:27 +0300 Subject: [PATCH 1062/1715] iwlwifi: add the new 9560 series Add a new config struct for the new 9560 series and add the 4 new PCI IDs for it. Signed-off-by: Oren Givon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-9000.c | 11 +++++++++++ drivers/net/wireless/intel/iwlwifi/iwl-config.h | 1 + drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 4 ++++ 3 files changed, 16 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-9000.c b/drivers/net/wireless/intel/iwlwifi/iwl-9000.c index 1fec6afbe041..4d6898515046 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-9000.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-9000.c @@ -187,6 +187,17 @@ const struct iwl_cfg iwl9460_2ac_cfg = { .integrated = true, }; +const struct iwl_cfg iwl9560_2ac_cfg = { + .name = "Intel(R) Dual Band Wireless AC 9560", + .fw_name_pre = IWL9000_FW_PRE, + IWL_DEVICE_9000, + .ht_params = &iwl9000_ht_params, + .nvm_ver = IWL9000_NVM_VERSION, + .nvm_calib_ver = IWL9000_TX_POWER_VERSION, + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, + .integrated = true, +}; + /* * TODO the struct below is for internal testing only this should be * removed by EO 2016~ diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index 7008319532ef..63d3f9b18bcb 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -454,6 +454,7 @@ extern const struct iwl_cfg iwl9160_2ac_cfg; extern const struct iwl_cfg iwl9260_2ac_cfg; extern const struct iwl_cfg iwl9270_2ac_cfg; extern const struct iwl_cfg iwl9460_2ac_cfg; +extern const struct iwl_cfg iwl9560_2ac_cfg; extern const struct iwl_cfg iwla000_2ac_cfg; #endif /* CONFIG_IWLMVM */ diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index c6e24fb286be..74aa416831bf 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -523,6 +523,10 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x9DF0, 0x0060, iwl9460_2ac_cfg)}, {IWL_PCI_DEVICE(0xA370, 0x0060, iwl9460_2ac_cfg)}, {IWL_PCI_DEVICE(0x31DC, 0x0060, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x0030, iwl9560_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0030, iwl9560_2ac_cfg)}, + {IWL_PCI_DEVICE(0xA370, 0x0030, iwl9560_2ac_cfg)}, + {IWL_PCI_DEVICE(0x31DC, 0x0030, iwl9560_2ac_cfg)}, /* a000 Series */ {IWL_PCI_DEVICE(0x2720, 0x0A10, iwla000_2ac_cfg)}, From a3e939dfe0a1e12a37ecd08ab89bb6115a490909 Mon Sep 17 00:00:00 2001 From: Oren Givon Date: Thu, 7 Jul 2016 09:11:56 +0300 Subject: [PATCH 1063/1715] iwlwifi: add the new 8275 series Add a new config struct for the new 8275 series and add the first PCI ID for it. Signed-off-by: Oren Givon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-8000.c | 11 +++++++++++ drivers/net/wireless/intel/iwlwifi/iwl-config.h | 1 + drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 1 + 3 files changed, 13 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-8000.c b/drivers/net/wireless/intel/iwlwifi/iwl-8000.c index 6c6725e808d4..cde4f265e4e5 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-8000.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-8000.c @@ -212,6 +212,17 @@ const struct iwl_cfg iwl8265_2ac_cfg = { .vht_mu_mimo_supported = true, }; +const struct iwl_cfg iwl8275_2ac_cfg = { + .name = "Intel(R) Dual Band Wireless AC 8275", + .fw_name_pre = IWL8265_FW_PRE, + IWL_DEVICE_8265, + .ht_params = &iwl8000_ht_params, + .nvm_ver = IWL8000_NVM_VERSION, + .nvm_calib_ver = IWL8000_TX_POWER_VERSION, + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, + .vht_mu_mimo_supported = true, +}; + const struct iwl_cfg iwl4165_2ac_cfg = { .name = "Intel(R) Dual Band Wireless AC 4165", .fw_name_pre = IWL8000_FW_PRE, diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index 63d3f9b18bcb..6cecb1adf0c8 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -445,6 +445,7 @@ extern const struct iwl_cfg iwl7265d_n_cfg; extern const struct iwl_cfg iwl8260_2n_cfg; extern const struct iwl_cfg iwl8260_2ac_cfg; extern const struct iwl_cfg iwl8265_2ac_cfg; +extern const struct iwl_cfg iwl8275_2ac_cfg; extern const struct iwl_cfg iwl4165_2ac_cfg; extern const struct iwl_cfg iwl8260_2ac_sdio_cfg; extern const struct iwl_cfg iwl8265_2ac_sdio_cfg; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 74aa416831bf..14c6e94ecbf2 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -500,6 +500,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x24FD, 0x0930, iwl8265_2ac_cfg)}, {IWL_PCI_DEVICE(0x24FD, 0x0950, iwl8265_2ac_cfg)}, {IWL_PCI_DEVICE(0x24FD, 0x0850, iwl8265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24FD, 0x0012, iwl8275_2ac_cfg)}, /* 9000 Series */ {IWL_PCI_DEVICE(0x271B, 0x0010, iwl9160_2ac_cfg)}, From 72c240fed0d1334d6de5c22b0ef16bdf5b542447 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 12 Jul 2016 11:40:57 +0000 Subject: [PATCH 1064/1715] iwlwifi: mvm: use setup_timer instead of init_timer and data fields Use setup_timer function instead of initializing timer with the function and data fields Signed-off-by: Wei Yongjun Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 11c7e1591b3b..216aa54c8679 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -2050,11 +2050,9 @@ int iwl_mvm_sta_rx_agg(struct iwl_mvm *mvm, struct ieee80211_sta *sta, baid_data->baid = baid; baid_data->timeout = timeout; baid_data->last_rx = jiffies; - init_timer(&baid_data->session_timer); - baid_data->session_timer.function = - iwl_mvm_rx_agg_session_expired; - baid_data->session_timer.data = - (unsigned long)&mvm->baid_map[baid]; + setup_timer(&baid_data->session_timer, + iwl_mvm_rx_agg_session_expired, + (unsigned long)&mvm->baid_map[baid]); baid_data->mvm = mvm; baid_data->tid = tid; baid_data->sta_id = mvm_sta->sta_id; From 06f4b08179fa0590bd3a53e9af16b7197460947f Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Thu, 21 Jul 2016 15:39:29 +0300 Subject: [PATCH 1065/1715] iwlwifi: pcie: change indentation of iwl_pcie_set_interrupt_capa() Function is very indented. Go to msi section if needed to avoid it and by that make the code more readable. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- .../net/wireless/intel/iwlwifi/pcie/trans.c | 88 +++++++++---------- 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index d7521c1d90ec..43c3915ed352 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -1513,57 +1513,57 @@ static void iwl_pcie_set_interrupt_capa(struct pci_dev *pdev, struct iwl_trans *trans) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); - int max_vector, nvec, i; + int max_irqs, num_irqs, i, ret; u16 pci_cmd; - if (trans->cfg->mq_rx_supported) { - max_vector = min_t(u32, (num_possible_cpus() + 2), - IWL_MAX_RX_HW_QUEUES); - for (i = 0; i < max_vector; i++) - trans_pcie->msix_entries[i].entry = i; + if (!trans->cfg->mq_rx_supported) + goto enable_msi; - nvec = pci_enable_msix_range(pdev, trans_pcie->msix_entries, - MSIX_MIN_INTERRUPT_VECTORS, - max_vector); - if (nvec < 0) { - IWL_DEBUG_INFO(trans, - "ret = %d failed to enable msi-x mode move to msi mode\n", - nvec); - goto msi; - } + max_irqs = min_t(u32, num_possible_cpus() + 2, IWL_MAX_RX_HW_QUEUES); + for (i = 0; i < max_irqs; i++) + trans_pcie->msix_entries[i].entry = i; + num_irqs = pci_enable_msix_range(pdev, trans_pcie->msix_entries, + MSIX_MIN_INTERRUPT_VECTORS, + max_irqs); + if (num_irqs < 0) { IWL_DEBUG_INFO(trans, - "Enable MSI-X allocate %d interrupt vector\n", - nvec); - trans_pcie->def_irq = (nvec == max_vector) ? nvec - 1 : 0; - /* - * In case the OS provides fewer interrupts than requested, - * different causes will share the same interrupt vector - * as follow: - * One interrupt less: non rx causes shared with FBQ. - * Two interrupts less: non rx causes shared with FBQ and RSS. - * More than two interrupts: we will use fewer RSS queues. - */ - if (nvec <= num_online_cpus()) { - trans_pcie->trans->num_rx_queues = nvec + 1; - trans_pcie->shared_vec_mask = IWL_SHARED_IRQ_NON_RX | - IWL_SHARED_IRQ_FIRST_RSS; - } else if (nvec == num_online_cpus() + 1) { - trans_pcie->trans->num_rx_queues = nvec; - trans_pcie->shared_vec_mask = IWL_SHARED_IRQ_NON_RX; - } else { - trans_pcie->trans->num_rx_queues = nvec - 1; - } - - trans_pcie->alloc_vecs = nvec; - trans_pcie->msix_enabled = true; - return; + "Failed to enable msi-x mode (ret %d). Moving to msi mode.\n", + num_irqs); + goto enable_msi; } -msi: + trans_pcie->def_irq = (num_irqs == max_irqs) ? num_irqs - 1 : 0; - nvec = pci_enable_msi(pdev); - if (nvec) { - dev_err(&pdev->dev, "pci_enable_msi failed - %d\n", nvec); + IWL_DEBUG_INFO(trans, + "MSI-X enabled. %d interrupt vectors were allocated\n", + num_irqs); + + /* + * In case the OS provides fewer interrupts than requested, different + * causes will share the same interrupt vector as follows: + * One interrupt less: non rx causes shared with FBQ. + * Two interrupts less: non rx causes shared with FBQ and RSS. + * More than two interrupts: we will use fewer RSS queues. + */ + if (num_irqs <= num_online_cpus()) { + trans_pcie->trans->num_rx_queues = num_irqs + 1; + trans_pcie->shared_vec_mask = IWL_SHARED_IRQ_NON_RX | + IWL_SHARED_IRQ_FIRST_RSS; + } else if (num_irqs == num_online_cpus() + 1) { + trans_pcie->trans->num_rx_queues = num_irqs; + trans_pcie->shared_vec_mask = IWL_SHARED_IRQ_NON_RX; + } else { + trans_pcie->trans->num_rx_queues = num_irqs - 1; + } + + trans_pcie->alloc_vecs = num_irqs; + trans_pcie->msix_enabled = true; + return; + +enable_msi: + ret = pci_enable_msi(pdev); + if (ret) { + dev_err(&pdev->dev, "pci_enable_msi failed - %d\n", ret); /* enable rfkill interrupt: hw bug w/a */ pci_read_config_word(pdev, PCI_COMMAND, &pci_cmd); if (pci_cmd & PCI_COMMAND_INTX_DISABLE) { From 5fe343978b663e347e2e478e9005ddbf228ca508 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 26 Jul 2016 23:22:01 +0300 Subject: [PATCH 1066/1715] iwlwifi: mvm: bump max API to 26 The driver now support version 26 of the firmware APIs. Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-7000.c | 4 ++-- drivers/net/wireless/intel/iwlwifi/iwl-8000.c | 4 ++-- drivers/net/wireless/intel/iwlwifi/iwl-9000.c | 2 +- drivers/net/wireless/intel/iwlwifi/iwl-a000.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-7000.c b/drivers/net/wireless/intel/iwlwifi/iwl-7000.c index 64690c14ff4d..aa575fb9dea0 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-7000.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-7000.c @@ -73,8 +73,8 @@ /* Highest firmware API version supported */ #define IWL7260_UCODE_API_MAX 17 #define IWL7265_UCODE_API_MAX 17 -#define IWL7265D_UCODE_API_MAX 24 -#define IWL3168_UCODE_API_MAX 24 +#define IWL7265D_UCODE_API_MAX 26 +#define IWL3168_UCODE_API_MAX 26 /* Lowest firmware API version supported */ #define IWL7260_UCODE_API_MIN 16 diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-8000.c b/drivers/net/wireless/intel/iwlwifi/iwl-8000.c index cde4f265e4e5..990cf2b17517 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-8000.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-8000.c @@ -70,8 +70,8 @@ #include "iwl-agn-hw.h" /* Highest firmware API version supported */ -#define IWL8000_UCODE_API_MAX 24 -#define IWL8265_UCODE_API_MAX 24 +#define IWL8000_UCODE_API_MAX 26 +#define IWL8265_UCODE_API_MAX 26 /* Lowest firmware API version supported */ #define IWL8000_UCODE_API_MIN 16 diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-9000.c b/drivers/net/wireless/intel/iwlwifi/iwl-9000.c index 4d6898515046..a2c7946c12c9 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-9000.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-9000.c @@ -55,7 +55,7 @@ #include "iwl-agn-hw.h" /* Highest firmware API version supported */ -#define IWL9000_UCODE_API_MAX 24 +#define IWL9000_UCODE_API_MAX 26 /* Lowest firmware API version supported */ #define IWL9000_UCODE_API_MIN 16 diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-a000.c b/drivers/net/wireless/intel/iwlwifi/iwl-a000.c index 4d78232c8afe..ea1618525878 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-a000.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-a000.c @@ -55,7 +55,7 @@ #include "iwl-agn-hw.h" /* Highest firmware API version supported */ -#define IWL_A000_UCODE_API_MAX 24 +#define IWL_A000_UCODE_API_MAX 26 /* Lowest firmware API version supported */ #define IWL_A000_UCODE_API_MIN 24 From 9fb064df6d57bf09ed3f8f964c8b0789b55206e7 Mon Sep 17 00:00:00 2001 From: Haim Dreyfuss Date: Tue, 26 Jul 2016 18:03:07 +0300 Subject: [PATCH 1067/1715] iwlwifi: pcie: replace possible_cpus() with online_cpus() in MSIX mode In MSIX mode the number of irq depends on the number of possible cpus existing on the host. This cause to bug in case there are offline cores. Take into account only the online CPUs instead. Also save it in temporary variable. Fixes: commit 2e5d4a8f61dc ("iwlwifi: pcie: Add new configuration to enable MSIX") Signed-off-by: Haim Dreyfuss Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 43c3915ed352..188b0dee542c 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -1513,13 +1513,14 @@ static void iwl_pcie_set_interrupt_capa(struct pci_dev *pdev, struct iwl_trans *trans) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); - int max_irqs, num_irqs, i, ret; + int max_irqs, num_irqs, i, ret, nr_online_cpus; u16 pci_cmd; if (!trans->cfg->mq_rx_supported) goto enable_msi; - max_irqs = min_t(u32, num_possible_cpus() + 2, IWL_MAX_RX_HW_QUEUES); + nr_online_cpus = num_online_cpus(); + max_irqs = min_t(u32, nr_online_cpus + 2, IWL_MAX_RX_HW_QUEUES); for (i = 0; i < max_irqs; i++) trans_pcie->msix_entries[i].entry = i; @@ -1545,11 +1546,11 @@ static void iwl_pcie_set_interrupt_capa(struct pci_dev *pdev, * Two interrupts less: non rx causes shared with FBQ and RSS. * More than two interrupts: we will use fewer RSS queues. */ - if (num_irqs <= num_online_cpus()) { + if (num_irqs <= nr_online_cpus) { trans_pcie->trans->num_rx_queues = num_irqs + 1; trans_pcie->shared_vec_mask = IWL_SHARED_IRQ_NON_RX | IWL_SHARED_IRQ_FIRST_RSS; - } else if (num_irqs == num_online_cpus() + 1) { + } else if (num_irqs == nr_online_cpus + 1) { trans_pcie->trans->num_rx_queues = num_irqs; trans_pcie->shared_vec_mask = IWL_SHARED_IRQ_NON_RX; } else { From 612da1efc07f28ea9e64402820dd25287ff5233b Mon Sep 17 00:00:00 2001 From: Sharon Dvir Date: Wed, 3 Aug 2016 10:55:45 +0300 Subject: [PATCH 1068/1715] iwlwifi: unify iwl_get_ucode_image() implementations Avoid multiple implementations. Signed-off-by: Sharon Dvir Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/dvm/ucode.c | 11 +---------- drivers/net/wireless/intel/iwlwifi/iwl-fw.h | 9 +++++++++ drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 13 ++----------- 3 files changed, 12 insertions(+), 21 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/ucode.c b/drivers/net/wireless/intel/iwlwifi/dvm/ucode.c index b662cf35b033..c7509c51e9d9 100644 --- a/drivers/net/wireless/intel/iwlwifi/dvm/ucode.c +++ b/drivers/net/wireless/intel/iwlwifi/dvm/ucode.c @@ -46,15 +46,6 @@ * ******************************************************************************/ -static inline const struct fw_img * -iwl_get_ucode_image(struct iwl_priv *priv, enum iwl_ucode_type ucode_type) -{ - if (ucode_type >= IWL_UCODE_TYPE_MAX) - return NULL; - - return &priv->fw->img[ucode_type]; -} - /* * Calibration */ @@ -330,7 +321,7 @@ int iwl_load_ucode_wait_alive(struct iwl_priv *priv, enum iwl_ucode_type old_type; static const u16 alive_cmd[] = { REPLY_ALIVE }; - fw = iwl_get_ucode_image(priv, ucode_type); + fw = iwl_get_ucode_image(priv->fw, ucode_type); if (WARN_ON(!fw)) return -EINVAL; diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-fw.h b/drivers/net/wireless/intel/iwlwifi/iwl-fw.h index 74ea68d1063c..5f229556339a 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-fw.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-fw.h @@ -329,4 +329,13 @@ iwl_fw_dbg_conf_usniffer(const struct iwl_fw *fw, u8 id) return conf_tlv->usniffer; } +static inline const struct fw_img * +iwl_get_ucode_image(const struct iwl_fw *fw, enum iwl_ucode_type ucode_type) +{ + if (ucode_type >= IWL_UCODE_TYPE_MAX) + return NULL; + + return &fw->img[ucode_type]; +} + #endif /* __iwl_fw_h__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index b951a7f06060..7322c4394a9a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -90,15 +90,6 @@ struct iwl_mvm_alive_data { u32 scd_base_addr; }; -static inline const struct fw_img * -iwl_get_ucode_image(struct iwl_mvm *mvm, enum iwl_ucode_type ucode_type) -{ - if (ucode_type >= IWL_UCODE_TYPE_MAX) - return NULL; - - return &mvm->fw->img[ucode_type]; -} - static int iwl_send_tx_ant_cfg(struct iwl_mvm *mvm, u8 valid_tx_ant) { struct iwl_tx_ant_cfg_cmd tx_ant_cmd = { @@ -592,9 +583,9 @@ static int iwl_mvm_load_ucode_wait_alive(struct iwl_mvm *mvm, iwl_fw_dbg_conf_usniffer(mvm->fw, FW_DBG_START_FROM_ALIVE) && !(fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_USNIFFER_UNIFIED))) - fw = iwl_get_ucode_image(mvm, IWL_UCODE_REGULAR_USNIFFER); + fw = iwl_get_ucode_image(mvm->fw, IWL_UCODE_REGULAR_USNIFFER); else - fw = iwl_get_ucode_image(mvm, ucode_type); + fw = iwl_get_ucode_image(mvm->fw, ucode_type); if (WARN_ON(!fw)) return -EINVAL; mvm->cur_ucode = ucode_type; From 723d11a4cdf63544a01ae7a474c202a914b67a3b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 3 Aug 2016 09:34:39 +0200 Subject: [PATCH 1069/1715] iwlwifi: mvm: make RSS RX more robust If the firmware ever decides to send any new/more notifications to the RSS queues, the driver would currently try to interpret those as REPLY_RX_MPDU_CMD and, if the notification was small, access invalid memory. Prevent that by checking for REPLY_RX_MPDU_CMD explicitly which allows ignoring unexpected notifications. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index 55d9096da68c..43ea1e5fdfc6 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -1672,7 +1672,7 @@ static void iwl_mvm_rx_mq_rss(struct iwl_op_mode *op_mode, else if (unlikely(pkt->hdr.cmd == RX_QUEUES_NOTIFICATION && pkt->hdr.group_id == DATA_PATH_GROUP)) iwl_mvm_rx_queue_notif(mvm, rxb, queue); - else + else if (likely(pkt->hdr.cmd == REPLY_RX_MPDU_CMD)) iwl_mvm_rx_mpdu_mq(mvm, napi, rxb, queue); } From 9b8568360585854c4fe99572e4fbffc736706cfd Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 3 Aug 2016 13:38:59 +0200 Subject: [PATCH 1070/1715] iwlwifi: mvm: remove pointless _bh from spinlock in timer Inside the reorder timer expire function, there's no point in disabling BHs since it is in BH context. Remove that. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 08d8a8abb918..7845dbefb67a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -452,10 +452,10 @@ void iwl_mvm_reorder_timer_expired(unsigned long data) u16 sn = 0, index = 0; bool expired = false; - spin_lock_bh(&buf->lock); + spin_lock(&buf->lock); if (!buf->num_stored || buf->removed) { - spin_unlock_bh(&buf->lock); + spin_unlock(&buf->lock); return; } @@ -492,7 +492,7 @@ void iwl_mvm_reorder_timer_expired(unsigned long data) buf->reorder_time[index] + 1 + RX_REORDER_BUF_TIMEOUT_MQ); } - spin_unlock_bh(&buf->lock); + spin_unlock(&buf->lock); } static void iwl_mvm_del_ba(struct iwl_mvm *mvm, int queue, From fd659f8e75b7f8214b4f323d46248205b4566e10 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 3 Aug 2016 13:52:56 +0200 Subject: [PATCH 1071/1715] iwlwifi: mvm: tighten BAID range check As pointed out by smatch, checking the BAID for just >= INVALID is a bad idea since only 32 (IWL_MAX_BAID) actually exist. Check the range for that and print invalid ones in the warning. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 7845dbefb67a..d6d9ec401b44 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -503,7 +503,7 @@ static void iwl_mvm_del_ba(struct iwl_mvm *mvm, int queue, struct iwl_mvm_reorder_buffer *reorder_buf; u8 baid = data->baid; - if (WARN_ON_ONCE(baid >= IWL_RX_REORDER_DATA_INVALID_BAID)) + if (WARN_ONCE(baid >= IWL_MAX_BAID, "invalid BAID: %x\n", baid)) return; rcu_read_lock(); From 85574dbf9d12b393448334897acf13f567bac8f9 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Wed, 14 Sep 2016 23:13:15 +0800 Subject: [PATCH 1072/1715] net: ethernet: mediatek: refactoring mtk_hw_init to be reused the existing mtk_hw_init includes hardware and software initialization inside so that it is slightly hard to reuse them for the process of the reset recovery, so some splitting is made here for keeping hardware initializing relevant thing and the else such as IRQ registration and MDIO initialization what are all about to the interface of core driver moved to the other proper place because they have no needs to register IRQ and re-initialize structure again during the reset process. Signed-off-by: Sean Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 62 +++++++++++---------- 1 file changed, 34 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 6e01f1fba068..616170137588 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1415,7 +1415,12 @@ static int mtk_stop(struct net_device *dev) static int __init mtk_hw_init(struct mtk_eth *eth) { - int err, i; + int i; + + clk_prepare_enable(eth->clks[MTK_CLK_ETHIF]); + clk_prepare_enable(eth->clks[MTK_CLK_ESW]); + clk_prepare_enable(eth->clks[MTK_CLK_GP1]); + clk_prepare_enable(eth->clks[MTK_CLK_GP2]); /* reset the frame engine */ reset_control_assert(eth->rstc); @@ -1441,19 +1446,6 @@ static int __init mtk_hw_init(struct mtk_eth *eth) /* Enable RX VLan Offloading */ mtk_w32(eth, 1, MTK_CDMP_EG_CTRL); - err = devm_request_irq(eth->dev, eth->irq[1], mtk_handle_irq_tx, 0, - dev_name(eth->dev), eth); - if (err) - return err; - err = devm_request_irq(eth->dev, eth->irq[2], mtk_handle_irq_rx, 0, - dev_name(eth->dev), eth); - if (err) - return err; - - err = mtk_mdio_init(eth); - if (err) - return err; - /* disable delay and normal interrupt */ mtk_w32(eth, 0, MTK_QDMA_DELAY_INT); mtk_w32(eth, 0, MTK_PDMA_DELAY_INT); @@ -1786,16 +1778,7 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np) eth->netdev[id]->features |= MTK_HW_FEATURES; eth->netdev[id]->ethtool_ops = &mtk_ethtool_ops; - err = register_netdev(eth->netdev[id]); - if (err) { - dev_err(eth->dev, "error bringing up device\n"); - goto free_netdev; - } eth->netdev[id]->irq = eth->irq[0]; - netif_info(eth, probe, eth->netdev[id], - "mediatek frame engine at 0x%08lx, irq %d\n", - eth->netdev[id]->base_addr, eth->irq[0]); - return 0; free_netdev: @@ -1865,11 +1848,6 @@ static int mtk_probe(struct platform_device *pdev) } } - clk_prepare_enable(eth->clks[MTK_CLK_ETHIF]); - clk_prepare_enable(eth->clks[MTK_CLK_ESW]); - clk_prepare_enable(eth->clks[MTK_CLK_GP1]); - clk_prepare_enable(eth->clks[MTK_CLK_GP2]); - eth->msg_enable = netif_msg_init(mtk_msg_level, MTK_DEFAULT_MSG_ENABLE); INIT_WORK(ð->pending_work, mtk_pending_work); @@ -1890,6 +1868,34 @@ static int mtk_probe(struct platform_device *pdev) goto err_free_dev; } + err = devm_request_irq(eth->dev, eth->irq[1], mtk_handle_irq_tx, 0, + dev_name(eth->dev), eth); + if (err) + goto err_free_dev; + + err = devm_request_irq(eth->dev, eth->irq[2], mtk_handle_irq_rx, 0, + dev_name(eth->dev), eth); + if (err) + goto err_free_dev; + + err = mtk_mdio_init(eth); + if (err) + goto err_free_dev; + + for (i = 0; i < MTK_MAX_DEVS; i++) { + if (!eth->netdev[i]) + continue; + + err = register_netdev(eth->netdev[i]); + if (err) { + dev_err(eth->dev, "error bringing up device\n"); + goto err_free_dev; + } else + netif_info(eth, probe, eth->netdev[i], + "mediatek frame engine at 0x%08lx, irq %d\n", + eth->netdev[i]->base_addr, eth->irq[0]); + } + /* we run 2 devices on the same DMA ring so we need a dummy device * for NAPI to work */ From bf253fb72221cdd9dc31009056f269a420f7bbd9 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Wed, 14 Sep 2016 23:13:16 +0800 Subject: [PATCH 1073/1715] net: ethernet: mediatek: add mtk_hw_deinit call as the opposite to mtk_hw_init call grouping things related to the deinitialization of what mtk_hw_init call does that help to be reused by the reset process and the error path handling. Signed-off-by: Sean Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 616170137588..1f756bd2d18f 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1477,6 +1477,16 @@ static int __init mtk_hw_init(struct mtk_eth *eth) return 0; } +static int mtk_hw_deinit(struct mtk_eth *eth) +{ + clk_disable_unprepare(eth->clks[MTK_CLK_GP2]); + clk_disable_unprepare(eth->clks[MTK_CLK_GP1]); + clk_disable_unprepare(eth->clks[MTK_CLK_ESW]); + clk_disable_unprepare(eth->clks[MTK_CLK_ETHIF]); + + return 0; +} + static int __init mtk_init(struct net_device *dev) { struct mtk_mac *mac = netdev_priv(dev); @@ -1926,10 +1936,7 @@ static int mtk_remove(struct platform_device *pdev) mtk_stop(eth->netdev[i]); } - clk_disable_unprepare(eth->clks[MTK_CLK_ETHIF]); - clk_disable_unprepare(eth->clks[MTK_CLK_ESW]); - clk_disable_unprepare(eth->clks[MTK_CLK_GP1]); - clk_disable_unprepare(eth->clks[MTK_CLK_GP2]); + mtk_hw_deinit(eth); netif_napi_del(ð->tx_napi); netif_napi_del(ð->rx_napi); From 8a8a9e89f801ce5d3d1d57ac48db678caf072147 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Wed, 14 Sep 2016 23:13:17 +0800 Subject: [PATCH 1074/1715] net: ethernet: mediatek: cleanup error path inside mtk_hw_init This cleans up the error path inside mtk_hw_init call, causing it able to exit appropriately when something fails and also includes refactoring mtk_cleanup call to make the partial logic reusable on the error path. Signed-off-by: Sean Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 36 +++++++++++++++++---- 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 1f756bd2d18f..1272316a7d7b 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1567,17 +1567,36 @@ static void mtk_pending_work(struct work_struct *work) rtnl_unlock(); } -static int mtk_cleanup(struct mtk_eth *eth) +static int mtk_free_dev(struct mtk_eth *eth) { int i; for (i = 0; i < MTK_MAC_COUNT; i++) { if (!eth->netdev[i]) continue; - - unregister_netdev(eth->netdev[i]); free_netdev(eth->netdev[i]); } + + return 0; +} + +static int mtk_unreg_dev(struct mtk_eth *eth) +{ + int i; + + for (i = 0; i < MTK_MAC_COUNT; i++) { + if (!eth->netdev[i]) + continue; + unregister_netdev(eth->netdev[i]); + } + + return 0; +} + +static int mtk_cleanup(struct mtk_eth *eth) +{ + mtk_unreg_dev(eth); + mtk_free_dev(eth); cancel_work_sync(ð->pending_work); return 0; @@ -1875,7 +1894,7 @@ static int mtk_probe(struct platform_device *pdev) err = mtk_add_mac(eth, mac_np); if (err) - goto err_free_dev; + goto err_deinit_hw; } err = devm_request_irq(eth->dev, eth->irq[1], mtk_handle_irq_tx, 0, @@ -1899,7 +1918,7 @@ static int mtk_probe(struct platform_device *pdev) err = register_netdev(eth->netdev[i]); if (err) { dev_err(eth->dev, "error bringing up device\n"); - goto err_free_dev; + goto err_deinit_mdio; } else netif_info(eth, probe, eth->netdev[i], "mediatek frame engine at 0x%08lx, irq %d\n", @@ -1919,8 +1938,13 @@ static int mtk_probe(struct platform_device *pdev) return 0; +err_deinit_mdio: + mtk_mdio_cleanup(eth); err_free_dev: - mtk_cleanup(eth); + mtk_free_dev(eth); +err_deinit_hw: + mtk_hw_deinit(eth); + return err; } From 26a2ad8a5418525d21f06083e65b10c932633209 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Wed, 14 Sep 2016 23:13:18 +0800 Subject: [PATCH 1075/1715] net: ethernet: mediatek: add controlling power domain the ethernet belongs to introduce power domain control which the digital circuit of the ethernet belongs to inside the flow of hardware initialization and deinitialization which helps the entire ethernet hardware block could restart cleanly and completely as being back to the initial state when the whole machine reboot. Signed-off-by: Sean Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 1272316a7d7b..01f59115132c 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -1417,6 +1418,9 @@ static int __init mtk_hw_init(struct mtk_eth *eth) { int i; + pm_runtime_enable(eth->dev); + pm_runtime_get_sync(eth->dev); + clk_prepare_enable(eth->clks[MTK_CLK_ETHIF]); clk_prepare_enable(eth->clks[MTK_CLK_ESW]); clk_prepare_enable(eth->clks[MTK_CLK_GP1]); @@ -1484,6 +1488,9 @@ static int mtk_hw_deinit(struct mtk_eth *eth) clk_disable_unprepare(eth->clks[MTK_CLK_ESW]); clk_disable_unprepare(eth->clks[MTK_CLK_ETHIF]); + pm_runtime_put_sync(eth->dev); + pm_runtime_disable(eth->dev); + return 0; } From 9ea4d311509fc11128a464d86745beeafd575051 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Wed, 14 Sep 2016 23:13:19 +0800 Subject: [PATCH 1076/1715] net: ethernet: mediatek: add the whole ethernet reset into the reset process 1) original driver only resets DMA used by descriptor rings which can't guarantee it can recover all various kinds of fatal errors, so the patch tries to reset the underlying hardware resource from scratch on Mediatek SoC required for ethernet running, including power, pin mux control, clock and internal circuits on the ethernet in order to restore into the initial state which the rebooted machine gives. 2) add state variable inside structure mtk_eth to help distinguish mtk_hw_init is called between the initialization during boot time or re-initialization during the reset process. 3) add ge_mode variable inside structure mtk_mac for restoring the interface mode of the current setup for the target MAC. 4) remove __init attribute from mtk_hw_init definition Signed-off-by: Sean Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 51 +++++++++++++++++---- drivers/net/ethernet/mediatek/mtk_eth_soc.h | 8 ++++ 2 files changed, 51 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 01f59115132c..7b2f5edf274a 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -231,7 +231,7 @@ static int mtk_phy_connect(struct mtk_mac *mac) { struct mtk_eth *eth = mac->hw; struct device_node *np; - u32 val, ge_mode; + u32 val; np = of_parse_phandle(mac->of_node, "phy-handle", 0); if (!np && of_phy_is_fixed_link(mac->of_node)) @@ -245,18 +245,18 @@ static int mtk_phy_connect(struct mtk_mac *mac) case PHY_INTERFACE_MODE_RGMII_RXID: case PHY_INTERFACE_MODE_RGMII_ID: case PHY_INTERFACE_MODE_RGMII: - ge_mode = 0; + mac->ge_mode = 0; break; case PHY_INTERFACE_MODE_MII: - ge_mode = 1; + mac->ge_mode = 1; break; case PHY_INTERFACE_MODE_REVMII: - ge_mode = 2; + mac->ge_mode = 2; break; case PHY_INTERFACE_MODE_RMII: if (!mac->id) goto err_phy; - ge_mode = 3; + mac->ge_mode = 3; break; default: goto err_phy; @@ -265,7 +265,7 @@ static int mtk_phy_connect(struct mtk_mac *mac) /* put the gmac into the right mode */ regmap_read(eth->ethsys, ETHSYS_SYSCFG0, &val); val &= ~SYSCFG0_GE_MODE(SYSCFG0_GE_MASK, mac->id); - val |= SYSCFG0_GE_MODE(ge_mode, mac->id); + val |= SYSCFG0_GE_MODE(mac->ge_mode, mac->id); regmap_write(eth->ethsys, ETHSYS_SYSCFG0, val); mtk_phy_connect_node(eth, mac, np); @@ -1414,9 +1414,12 @@ static int mtk_stop(struct net_device *dev) return 0; } -static int __init mtk_hw_init(struct mtk_eth *eth) +static int mtk_hw_init(struct mtk_eth *eth) { - int i; + int i, val; + + if (test_and_set_bit(MTK_HW_INIT, ð->state)) + return 0; pm_runtime_enable(eth->dev); pm_runtime_get_sync(eth->dev); @@ -1432,6 +1435,15 @@ static int __init mtk_hw_init(struct mtk_eth *eth) reset_control_deassert(eth->rstc); usleep_range(10, 20); + regmap_read(eth->ethsys, ETHSYS_SYSCFG0, &val); + for (i = 0; i < MTK_MAC_COUNT; i++) { + if (!eth->mac[i]) + continue; + val &= ~SYSCFG0_GE_MODE(SYSCFG0_GE_MASK, eth->mac[i]->id); + val |= SYSCFG0_GE_MODE(eth->mac[i]->ge_mode, eth->mac[i]->id); + } + regmap_write(eth->ethsys, ETHSYS_SYSCFG0, val); + /* Set GE2 driving and slew rate */ regmap_write(eth->pctl, GPIO_DRV_SEL10, 0xa00); @@ -1483,6 +1495,9 @@ static int __init mtk_hw_init(struct mtk_eth *eth) static int mtk_hw_deinit(struct mtk_eth *eth) { + if (!test_and_clear_bit(MTK_HW_INIT, ð->state)) + return 0; + clk_disable_unprepare(eth->clks[MTK_CLK_GP2]); clk_disable_unprepare(eth->clks[MTK_CLK_GP1]); clk_disable_unprepare(eth->clks[MTK_CLK_ESW]); @@ -1560,6 +1575,26 @@ static void mtk_pending_work(struct work_struct *work) __set_bit(i, &restart); } + /* restart underlying hardware such as power, clock, pin mux + * and the connected phy + */ + mtk_hw_deinit(eth); + + if (eth->dev->pins) + pinctrl_select_state(eth->dev->pins->p, + eth->dev->pins->default_state); + mtk_hw_init(eth); + + for (i = 0; i < MTK_MAC_COUNT; i++) { + if (!eth->mac[i] || + of_phy_is_fixed_link(eth->mac[i]->of_node)) + continue; + err = phy_init_hw(eth->mac[i]->phy_dev); + if (err) + dev_err(eth->dev, "%s: PHY init failed.\n", + eth->netdev[i]->name); + } + /* restart DMA and enable IRQs */ for (i = 0; i < MTK_MAC_COUNT; i++) { if (!test_bit(i, &restart)) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index 0b984dca462e..388cbe7babdd 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -330,6 +330,10 @@ enum mtk_clks_map { MTK_CLK_MAX }; +enum mtk_dev_state { + MTK_HW_INIT +}; + /* struct mtk_tx_buf - This struct holds the pointers to the memory pointed at * by the TX descriptor s * @skb: The SKB pointer of the packet being sent @@ -413,6 +417,7 @@ struct mtk_rx_ring { * @clks: clock array for all clocks required * @mii_bus: If there is a bus we need to create an instance for it * @pending_work: The workqueue used to reset the dma ring + * @state Initialization and runtime state of the device. */ struct mtk_eth { @@ -441,11 +446,13 @@ struct mtk_eth { struct mii_bus *mii_bus; struct work_struct pending_work; + unsigned long state; }; /* struct mtk_mac - the structure that holds the info about the MACs of the * SoC * @id: The number of the MAC + * @ge_mode: Interface mode kept for setup restoring * @of_node: Our devicetree node * @hw: Backpointer to our main datastruture * @hw_stats: Packet statistics counter @@ -453,6 +460,7 @@ struct mtk_eth { */ struct mtk_mac { int id; + int ge_mode; struct device_node *of_node; struct mtk_eth *hw; struct mtk_hw_stats *hw_stats; From 2a8307aab373684e8c1067695310db4438621868 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Wed, 14 Sep 2016 23:13:20 +0800 Subject: [PATCH 1077/1715] net: ethernet: mediatek: add more resets for internal ethernet circuit block struct mtk_eth has already contained struct regmap ethsys pointer to the address range of the internal circuit reset, so we reuse it to reset more internal blocks on ethernet hardware such as packet processing engine (PPE) and frame engine (FE) instead of rstc which deals with FE only. Signed-off-by: Sean Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 27 ++++++++++++--------- drivers/net/ethernet/mediatek/mtk_eth_soc.h | 6 ++++- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 7b2f5edf274a..4574332de1f6 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1414,6 +1414,19 @@ static int mtk_stop(struct net_device *dev) return 0; } +static void ethsys_reset(struct mtk_eth *eth, u32 reset_bits) +{ + regmap_update_bits(eth->ethsys, ETHSYS_RSTCTRL, + reset_bits, + reset_bits); + + usleep_range(1000, 1100); + regmap_update_bits(eth->ethsys, ETHSYS_RSTCTRL, + reset_bits, + ~reset_bits); + mdelay(10); +} + static int mtk_hw_init(struct mtk_eth *eth) { int i, val; @@ -1428,12 +1441,8 @@ static int mtk_hw_init(struct mtk_eth *eth) clk_prepare_enable(eth->clks[MTK_CLK_ESW]); clk_prepare_enable(eth->clks[MTK_CLK_GP1]); clk_prepare_enable(eth->clks[MTK_CLK_GP2]); - - /* reset the frame engine */ - reset_control_assert(eth->rstc); - usleep_range(10, 20); - reset_control_deassert(eth->rstc); - usleep_range(10, 20); + ethsys_reset(eth, RSTCTRL_FE); + ethsys_reset(eth, RSTCTRL_PPE); regmap_read(eth->ethsys, ETHSYS_SYSCFG0, &val); for (i = 0; i < MTK_MAC_COUNT; i++) { @@ -1896,12 +1905,6 @@ static int mtk_probe(struct platform_device *pdev) return PTR_ERR(eth->pctl); } - eth->rstc = devm_reset_control_get(&pdev->dev, "eth"); - if (IS_ERR(eth->rstc)) { - dev_err(&pdev->dev, "no eth reset found\n"); - return PTR_ERR(eth->rstc); - } - for (i = 0; i < 3; i++) { eth->irq[i] = platform_get_irq(pdev, i); if (eth->irq[i] < 0) { diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index 388cbe7babdd..7efa00fa9c71 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -266,6 +266,11 @@ #define SYSCFG0_GE_MASK 0x3 #define SYSCFG0_GE_MODE(x, y) (x << (12 + (y * 2))) +/*ethernet reset control register*/ +#define ETHSYS_RSTCTRL 0x34 +#define RSTCTRL_FE BIT(6) +#define RSTCTRL_PPE BIT(31) + struct mtk_rx_dma { unsigned int rxd1; unsigned int rxd2; @@ -423,7 +428,6 @@ struct mtk_rx_ring { struct mtk_eth { struct device *dev; void __iomem *base; - struct reset_control *rstc; spinlock_t page_lock; spinlock_t irq_lock; struct net_device dummy_dev; From dce6fa42199d493596315cddc0b4e7ac1d57475b Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Wed, 14 Sep 2016 23:13:21 +0800 Subject: [PATCH 1078/1715] net: ethernet: mediatek: avoid race condition during the reset process add the protection of the race condition between the reset process and hardware access happening on the related callbacks. Signed-off-by: Sean Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 36 +++++++++++++++++++++ drivers/net/ethernet/mediatek/mtk_eth_soc.h | 3 +- 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 4574332de1f6..522fe8dda6c3 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -145,6 +145,9 @@ static void mtk_phy_link_adjust(struct net_device *dev) MAC_MCR_RX_EN | MAC_MCR_BACKOFF_EN | MAC_MCR_BACKPR_EN; + if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state))) + return; + switch (mac->phy_dev->speed) { case SPEED_1000: mcr |= MAC_MCR_SPEED_1000; @@ -370,6 +373,9 @@ static int mtk_set_mac_address(struct net_device *dev, void *p) if (ret) return ret; + if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state))) + return -EBUSY; + spin_lock_bh(&mac->hw->page_lock); mtk_w32(mac->hw, (macaddr[0] << 8) | macaddr[1], MTK_GDMA_MAC_ADRH(mac->id)); @@ -770,6 +776,9 @@ static int mtk_start_xmit(struct sk_buff *skb, struct net_device *dev) */ spin_lock(ð->page_lock); + if (unlikely(test_bit(MTK_RESETTING, ð->state))) + goto drop; + tx_num = mtk_cal_txd_req(skb); if (unlikely(atomic_read(&ring->free_count) <= tx_num)) { mtk_stop_queue(eth); @@ -842,6 +851,9 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, netdev = eth->netdev[mac]; + if (unlikely(test_bit(MTK_RESETTING, ð->state))) + goto release_desc; + /* alloc new buffer */ new_data = napi_alloc_frag(ring->frag_size); if (unlikely(!new_data)) { @@ -1576,6 +1588,12 @@ static void mtk_pending_work(struct work_struct *work) rtnl_lock(); + dev_dbg(eth->dev, "[%s][%d] reset\n", __func__, __LINE__); + + while (test_and_set_bit_lock(MTK_RESETTING, ð->state)) + cpu_relax(); + + dev_dbg(eth->dev, "[%s][%d] mtk_stop starts\n", __func__, __LINE__); /* stop all devices to make sure that dma is properly shut down */ for (i = 0; i < MTK_MAC_COUNT; i++) { if (!eth->netdev[i]) @@ -1583,6 +1601,7 @@ static void mtk_pending_work(struct work_struct *work) mtk_stop(eth->netdev[i]); __set_bit(i, &restart); } + dev_dbg(eth->dev, "[%s][%d] mtk_stop ends\n", __func__, __LINE__); /* restart underlying hardware such as power, clock, pin mux * and the connected phy @@ -1615,6 +1634,11 @@ static void mtk_pending_work(struct work_struct *work) dev_close(eth->netdev[i]); } } + + dev_dbg(eth->dev, "[%s][%d] reset done\n", __func__, __LINE__); + + clear_bit_unlock(MTK_RESETTING, ð->state); + rtnl_unlock(); } @@ -1659,6 +1683,9 @@ static int mtk_get_settings(struct net_device *dev, struct mtk_mac *mac = netdev_priv(dev); int err; + if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state))) + return -EBUSY; + err = phy_read_status(mac->phy_dev); if (err) return -ENODEV; @@ -1709,6 +1736,9 @@ static int mtk_nway_reset(struct net_device *dev) { struct mtk_mac *mac = netdev_priv(dev); + if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state))) + return -EBUSY; + return genphy_restart_aneg(mac->phy_dev); } @@ -1717,6 +1747,9 @@ static u32 mtk_get_link(struct net_device *dev) struct mtk_mac *mac = netdev_priv(dev); int err; + if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state))) + return -EBUSY; + err = genphy_update_link(mac->phy_dev); if (err) return ethtool_op_get_link(dev); @@ -1757,6 +1790,9 @@ static void mtk_get_ethtool_stats(struct net_device *dev, unsigned int start; int i; + if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state))) + return; + if (netif_running(dev) && netif_device_present(dev)) { if (spin_trylock(&hwstats->stats_lock)) { mtk_stats_update_mac(mac); diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index 7efa00fa9c71..79954b419b53 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -336,7 +336,8 @@ enum mtk_clks_map { }; enum mtk_dev_state { - MTK_HW_INIT + MTK_HW_INIT, + MTK_RESETTING }; /* struct mtk_tx_buf - This struct holds the pointers to the memory pointed at From 76f0dcbb5ae1a7c3dbeec13dd98233b8e6b0b32a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 13 Sep 2016 22:55:05 -0700 Subject: [PATCH 1079/1715] tcp: fix a stale ooo_last_skb after a replace When skb replaces another one in ooo queue, I forgot to also update tp->ooo_last_skb as well, if the replaced skb was the last one in the queue. To fix this, we simply can re-use the code that runs after an insertion, trying to merge skbs at the right of current skb. This not only fixes the bug, but also remove all small skbs that might be a subset of the new one. Example: We receive segments 2001:3001, 4001:5001 Then we receive 2001:8001 : We should replace 2001:3001 with the big skb, but also remove 4001:50001 from the queue to save space. packetdrill test demonstrating the bug 0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 +0 bind(3, ..., ...) = 0 +0 listen(3, 1) = 0 +0 < S 0:0(0) win 32792 +0 > S. 0:0(0) ack 1 +0.100 < . 1:1(0) ack 1 win 1024 +0 accept(3, ..., ...) = 4 +0.01 < . 1001:2001(1000) ack 1 win 1024 +0 > . 1:1(0) ack 1 +0.01 < . 1001:3001(2000) ack 1 win 1024 +0 > . 1:1(0) ack 1 Fixes: 9f5afeae5152 ("tcp: use an RB tree for ooo receive queue") Signed-off-by: Eric Dumazet Reported-by: Yuchung Cheng Cc: Yaogong Wang Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 70b892db9901..dad3e7eeed94 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4502,7 +4502,7 @@ coalesce_done: NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE); __kfree_skb(skb1); - goto add_sack; + goto merge_right; } } else if (tcp_try_coalesce(sk, skb1, skb, &fragstolen)) { goto coalesce_done; @@ -4514,6 +4514,7 @@ insert: rb_link_node(&skb->rbnode, parent, p); rb_insert_color(&skb->rbnode, &tp->out_of_order_queue); +merge_right: /* Remove other segments covered by skb. */ while ((q = rb_next(&skb->rbnode)) != NULL) { skb1 = rb_entry(q, struct sk_buff, rbnode); From 0e26e5bd518f608ee2023b29429ecd4cd8b6969d Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 15 Sep 2016 02:24:13 +0000 Subject: [PATCH 1080/1715] net: dsa: bcm_sf2: Fix non static symbol warning Fixes the following sparse warning: drivers/net/dsa/bcm_sf2.c:963:19: warning: symbol 'bcm_sf2_io_ops' was not declared. Should it be static? Signed-off-by: Wei Yongjun Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 5bf4f3452676..e218887f18b7 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -960,7 +960,7 @@ static int bcm_sf2_core_write64(struct b53_device *dev, u8 page, u8 reg, return 0; } -struct b53_io_ops bcm_sf2_io_ops = { +static struct b53_io_ops bcm_sf2_io_ops = { .read8 = bcm_sf2_core_read8, .read16 = bcm_sf2_core_read16, .read32 = bcm_sf2_core_read32, From 46c21e20128ac156c5b1503bab33ba1659283815 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 15 Sep 2016 02:24:37 +0000 Subject: [PATCH 1081/1715] net: dsa: b53: Remove unused including Remove including that don't need it. Signed-off-by: Wei Yongjun Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_priv.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index 76672dae412d..f192a673caba 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -372,7 +372,6 @@ static inline void b53_arl_from_entry(u64 *mac_vid, u32 *fwd_entry, #ifdef CONFIG_BCM47XX -#include #include #include static inline int b53_switch_get_reset_gpio(struct b53_device *dev) From 7e5eded5c189abaea77556da41af1195af841b0a Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 15 Sep 2016 02:25:52 +0000 Subject: [PATCH 1082/1715] net: emac: remove unnecessary dev_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure. Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/emac/emac.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c index 56e0a9f1c5ec..42d2d233b60f 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac.c @@ -722,7 +722,6 @@ static int emac_remove(struct platform_device *pdev) mdiobus_unregister(adpt->mii_bus); free_netdev(netdev); - dev_set_drvdata(&pdev->dev, NULL); return 0; } From 1d7b47a3c78e0b5391a18246f9637752a4565e5b Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 15 Sep 2016 02:26:10 +0000 Subject: [PATCH 1083/1715] net: emac: remove .owner field for driver Remove .owner field if calls are used which set it automatically. Generated by: scripts/coccinelle/api/platform_no_drv_owner.cocci Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/emac/emac.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c index 42d2d233b60f..e47d38701d6c 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac.c @@ -730,7 +730,6 @@ static struct platform_driver emac_platform_driver = { .probe = emac_probe, .remove = emac_remove, .driver = { - .owner = THIS_MODULE, .name = "qcom-emac", .of_match_table = emac_dt_match, }, From e5dcad290a7c62d1c856269dbd13e470e388b704 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Thu, 15 Sep 2016 16:26:39 +0200 Subject: [PATCH 1084/1715] Documentation: devicetree: add qca8k binding Add device-tree binding for ar8xxx switch families. Cc: devicetree@vger.kernel.org Signed-off-by: John Crispin Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- .../devicetree/bindings/net/dsa/qca8k.txt | 89 +++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 Documentation/devicetree/bindings/net/dsa/qca8k.txt diff --git a/Documentation/devicetree/bindings/net/dsa/qca8k.txt b/Documentation/devicetree/bindings/net/dsa/qca8k.txt new file mode 100644 index 000000000000..9c67ee4890d7 --- /dev/null +++ b/Documentation/devicetree/bindings/net/dsa/qca8k.txt @@ -0,0 +1,89 @@ +* Qualcomm Atheros QCA8xxx switch family + +Required properties: + +- compatible: should be "qca,qca8337" +- #size-cells: must be 0 +- #address-cells: must be 1 + +Subnodes: + +The integrated switch subnode should be specified according to the binding +described in dsa/dsa.txt. As the QCA8K switches do not have a N:N mapping of +port and PHY id, each subnode describing a port needs to have a valid phandle +referencing the internal PHY connected to it. The CPU port of this switch is +always port 0. + +Example: + + + &mdio0 { + phy_port1: phy@0 { + reg = <0>; + }; + + phy_port2: phy@1 { + reg = <1>; + }; + + phy_port3: phy@2 { + reg = <2>; + }; + + phy_port4: phy@3 { + reg = <3>; + }; + + phy_port5: phy@4 { + reg = <4>; + }; + + switch0@0 { + compatible = "qca,qca8337"; + #address-cells = <1>; + #size-cells = <0>; + + reg = <0>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + port@0 { + reg = <0>; + label = "cpu"; + ethernet = <&gmac1>; + phy-mode = "rgmii"; + }; + + port@1 { + reg = <1>; + label = "lan1"; + phy-handle = <&phy_port1>; + }; + + port@2 { + reg = <2>; + label = "lan2"; + phy-handle = <&phy_port2>; + }; + + port@3 { + reg = <3>; + label = "lan3"; + phy-handle = <&phy_port3>; + }; + + port@4 { + reg = <4>; + label = "lan4"; + phy-handle = <&phy_port4>; + }; + + port@5 { + reg = <5>; + label = "wan"; + phy-handle = <&phy_port5>; + }; + }; + }; + }; From cafdc45c949b9963cbfb8fe3a68d0ab16b0208ce Mon Sep 17 00:00:00 2001 From: John Crispin Date: Thu, 15 Sep 2016 16:26:40 +0200 Subject: [PATCH 1085/1715] net-next: dsa: add Qualcomm tag RX/TX handler Add support for the 2-bytes Qualcomm tag that gigabit switches such as the QCA8337/N might insert when receiving packets, or that we need to insert while targeting specific switch ports. The tag is inserted directly behind the ethernet header. Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: John Crispin Signed-off-by: David S. Miller --- include/net/dsa.h | 1 + net/dsa/Kconfig | 3 + net/dsa/Makefile | 1 + net/dsa/dsa.c | 3 + net/dsa/dsa_priv.h | 2 + net/dsa/tag_qca.c | 138 +++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 148 insertions(+) create mode 100644 net/dsa/tag_qca.c diff --git a/include/net/dsa.h b/include/net/dsa.h index 9d97c5214341..7556646db2d3 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -26,6 +26,7 @@ enum dsa_tag_protocol { DSA_TAG_PROTO_TRAILER, DSA_TAG_PROTO_EDSA, DSA_TAG_PROTO_BRCM, + DSA_TAG_PROTO_QCA, DSA_TAG_LAST, /* MUST BE LAST */ }; diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index ff7736f7ff42..96e47c539bee 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -38,4 +38,7 @@ config NET_DSA_TAG_EDSA config NET_DSA_TAG_TRAILER bool +config NET_DSA_TAG_QCA + bool + endif diff --git a/net/dsa/Makefile b/net/dsa/Makefile index 8af4ded70f1c..a3380ed0e0be 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -7,3 +7,4 @@ dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o dsa_core-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o dsa_core-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o dsa_core-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o +dsa_core-$(CONFIG_NET_DSA_TAG_QCA) += tag_qca.o diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index d8d267e9a872..66e31acfcad8 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -53,6 +53,9 @@ const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = { #endif #ifdef CONFIG_NET_DSA_TAG_BRCM [DSA_TAG_PROTO_BRCM] = &brcm_netdev_ops, +#endif +#ifdef CONFIG_NET_DSA_TAG_QCA + [DSA_TAG_PROTO_QCA] = &qca_netdev_ops, #endif [DSA_TAG_PROTO_NONE] = &none_ops, }; diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 00077a9c97f4..6cfd7388834e 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -81,5 +81,7 @@ extern const struct dsa_device_ops trailer_netdev_ops; /* tag_brcm.c */ extern const struct dsa_device_ops brcm_netdev_ops; +/* tag_qca.c */ +extern const struct dsa_device_ops qca_netdev_ops; #endif diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c new file mode 100644 index 000000000000..0c90cacee7aa --- /dev/null +++ b/net/dsa/tag_qca.c @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include "dsa_priv.h" + +#define QCA_HDR_LEN 2 +#define QCA_HDR_VERSION 0x2 + +#define QCA_HDR_RECV_VERSION_MASK GENMASK(15, 14) +#define QCA_HDR_RECV_VERSION_S 14 +#define QCA_HDR_RECV_PRIORITY_MASK GENMASK(13, 11) +#define QCA_HDR_RECV_PRIORITY_S 11 +#define QCA_HDR_RECV_TYPE_MASK GENMASK(10, 6) +#define QCA_HDR_RECV_TYPE_S 6 +#define QCA_HDR_RECV_FRAME_IS_TAGGED BIT(3) +#define QCA_HDR_RECV_SOURCE_PORT_MASK GENMASK(2, 0) + +#define QCA_HDR_XMIT_VERSION_MASK GENMASK(15, 14) +#define QCA_HDR_XMIT_VERSION_S 14 +#define QCA_HDR_XMIT_PRIORITY_MASK GENMASK(13, 11) +#define QCA_HDR_XMIT_PRIORITY_S 11 +#define QCA_HDR_XMIT_CONTROL_MASK GENMASK(10, 8) +#define QCA_HDR_XMIT_CONTROL_S 8 +#define QCA_HDR_XMIT_FROM_CPU BIT(7) +#define QCA_HDR_XMIT_DP_BIT_MASK GENMASK(6, 0) + +static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + u16 *phdr, hdr; + + dev->stats.tx_packets++; + dev->stats.tx_bytes += skb->len; + + if (skb_cow_head(skb, 0) < 0) + goto out_free; + + skb_push(skb, QCA_HDR_LEN); + + memmove(skb->data, skb->data + QCA_HDR_LEN, 2 * ETH_ALEN); + phdr = (u16 *)(skb->data + 2 * ETH_ALEN); + + /* Set the version field, and set destination port information */ + hdr = QCA_HDR_VERSION << QCA_HDR_XMIT_VERSION_S | + QCA_HDR_XMIT_FROM_CPU | + BIT(p->port); + + *phdr = htons(hdr); + + return skb; + +out_free: + kfree_skb(skb); + return NULL; +} + +static int qca_tag_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + struct dsa_switch_tree *dst = dev->dsa_ptr; + struct dsa_switch *ds; + u8 ver; + int port; + __be16 *phdr, hdr; + + if (unlikely(!dst)) + goto out_drop; + + skb = skb_unshare(skb, GFP_ATOMIC); + if (!skb) + goto out; + + if (unlikely(!pskb_may_pull(skb, QCA_HDR_LEN))) + goto out_drop; + + /* The QCA header is added by the switch between src addr and Ethertype + * At this point, skb->data points to ethertype so header should be + * right before + */ + phdr = (__be16 *)(skb->data - 2); + hdr = ntohs(*phdr); + + /* Make sure the version is correct */ + ver = (hdr & QCA_HDR_RECV_VERSION_MASK) >> QCA_HDR_RECV_VERSION_S; + if (unlikely(ver != QCA_HDR_VERSION)) + goto out_drop; + + /* Remove QCA tag and recalculate checksum */ + skb_pull_rcsum(skb, QCA_HDR_LEN); + memmove(skb->data - ETH_HLEN, skb->data - ETH_HLEN - QCA_HDR_LEN, + ETH_HLEN - QCA_HDR_LEN); + + /* This protocol doesn't support cascading multiple switches so it's + * safe to assume the switch is first in the tree + */ + ds = dst->ds[0]; + if (!ds) + goto out_drop; + + /* Get source port information */ + port = (hdr & QCA_HDR_RECV_SOURCE_PORT_MASK); + if (!ds->ports[port].netdev) + goto out_drop; + + /* Update skb & forward the frame accordingly */ + skb_push(skb, ETH_HLEN); + skb->pkt_type = PACKET_HOST; + skb->dev = ds->ports[port].netdev; + skb->protocol = eth_type_trans(skb, skb->dev); + + skb->dev->stats.rx_packets++; + skb->dev->stats.rx_bytes += skb->len; + + netif_receive_skb(skb); + + return 0; + +out_drop: + kfree_skb(skb); +out: + return 0; +} + +const struct dsa_device_ops qca_netdev_ops = { + .xmit = qca_tag_xmit, + .rcv = qca_tag_rcv, +}; From 6b93fb46480a9cfa4afb52a6d19b2591804e5f9e Mon Sep 17 00:00:00 2001 From: John Crispin Date: Thu, 15 Sep 2016 16:26:41 +0200 Subject: [PATCH 1086/1715] net-next: dsa: add new driver for qca8xxx family This patch contains initial support for the QCA8337 switch. It will detect a QCA8337 switch, if present and declared in the DT. Each port will be represented through a standalone net_device interface, as for other DSA switches. CPU can communicate with any of the ports by setting an IP@ on ethN interface. Most of the extra callbacks of the DSA subsystem are already supported, such as bridge offloading, stp, fdb. Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/dsa/Kconfig | 9 + drivers/net/dsa/Makefile | 1 + drivers/net/dsa/qca8k.c | 1060 ++++++++++++++++++++++++++++++++++++++ drivers/net/dsa/qca8k.h | 185 +++++++ 4 files changed, 1255 insertions(+) create mode 100644 drivers/net/dsa/qca8k.c create mode 100644 drivers/net/dsa/qca8k.h diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig index de6d04429a70..065984670ff1 100644 --- a/drivers/net/dsa/Kconfig +++ b/drivers/net/dsa/Kconfig @@ -25,4 +25,13 @@ source "drivers/net/dsa/b53/Kconfig" source "drivers/net/dsa/mv88e6xxx/Kconfig" +config NET_DSA_QCA8K + tristate "Qualcomm Atheros QCA8K Ethernet switch family support" + depends on NET_DSA + select NET_DSA_TAG_QCA + select REGMAP + ---help--- + This enables support for the Qualcomm Atheros QCA8K Ethernet + switch chips. + endmenu diff --git a/drivers/net/dsa/Makefile b/drivers/net/dsa/Makefile index ca1e71b853a6..8346e4f9737a 100644 --- a/drivers/net/dsa/Makefile +++ b/drivers/net/dsa/Makefile @@ -1,5 +1,6 @@ obj-$(CONFIG_NET_DSA_MV88E6060) += mv88e6060.o obj-$(CONFIG_NET_DSA_BCM_SF2) += bcm_sf2.o +obj-$(CONFIG_NET_DSA_QCA8K) += qca8k.o obj-y += b53/ obj-y += mv88e6xxx/ diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c new file mode 100644 index 000000000000..7f3f1781c202 --- /dev/null +++ b/drivers/net/dsa/qca8k.c @@ -0,0 +1,1060 @@ +/* + * Copyright (C) 2009 Felix Fietkau + * Copyright (C) 2011-2012 Gabor Juhos + * Copyright (c) 2015, The Linux Foundation. All rights reserved. + * Copyright (c) 2016 John Crispin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "qca8k.h" + +#define MIB_DESC(_s, _o, _n) \ + { \ + .size = (_s), \ + .offset = (_o), \ + .name = (_n), \ + } + +static const struct qca8k_mib_desc ar8327_mib[] = { + MIB_DESC(1, 0x00, "RxBroad"), + MIB_DESC(1, 0x04, "RxPause"), + MIB_DESC(1, 0x08, "RxMulti"), + MIB_DESC(1, 0x0c, "RxFcsErr"), + MIB_DESC(1, 0x10, "RxAlignErr"), + MIB_DESC(1, 0x14, "RxRunt"), + MIB_DESC(1, 0x18, "RxFragment"), + MIB_DESC(1, 0x1c, "Rx64Byte"), + MIB_DESC(1, 0x20, "Rx128Byte"), + MIB_DESC(1, 0x24, "Rx256Byte"), + MIB_DESC(1, 0x28, "Rx512Byte"), + MIB_DESC(1, 0x2c, "Rx1024Byte"), + MIB_DESC(1, 0x30, "Rx1518Byte"), + MIB_DESC(1, 0x34, "RxMaxByte"), + MIB_DESC(1, 0x38, "RxTooLong"), + MIB_DESC(2, 0x3c, "RxGoodByte"), + MIB_DESC(2, 0x44, "RxBadByte"), + MIB_DESC(1, 0x4c, "RxOverFlow"), + MIB_DESC(1, 0x50, "Filtered"), + MIB_DESC(1, 0x54, "TxBroad"), + MIB_DESC(1, 0x58, "TxPause"), + MIB_DESC(1, 0x5c, "TxMulti"), + MIB_DESC(1, 0x60, "TxUnderRun"), + MIB_DESC(1, 0x64, "Tx64Byte"), + MIB_DESC(1, 0x68, "Tx128Byte"), + MIB_DESC(1, 0x6c, "Tx256Byte"), + MIB_DESC(1, 0x70, "Tx512Byte"), + MIB_DESC(1, 0x74, "Tx1024Byte"), + MIB_DESC(1, 0x78, "Tx1518Byte"), + MIB_DESC(1, 0x7c, "TxMaxByte"), + MIB_DESC(1, 0x80, "TxOverSize"), + MIB_DESC(2, 0x84, "TxByte"), + MIB_DESC(1, 0x8c, "TxCollision"), + MIB_DESC(1, 0x90, "TxAbortCol"), + MIB_DESC(1, 0x94, "TxMultiCol"), + MIB_DESC(1, 0x98, "TxSingleCol"), + MIB_DESC(1, 0x9c, "TxExcDefer"), + MIB_DESC(1, 0xa0, "TxDefer"), + MIB_DESC(1, 0xa4, "TxLateCol"), +}; + +/* The 32bit switch registers are accessed indirectly. To achieve this we need + * to set the page of the register. Track the last page that was set to reduce + * mdio writes + */ +static u16 qca8k_current_page = 0xffff; + +static void +qca8k_split_addr(u32 regaddr, u16 *r1, u16 *r2, u16 *page) +{ + regaddr >>= 1; + *r1 = regaddr & 0x1e; + + regaddr >>= 5; + *r2 = regaddr & 0x7; + + regaddr >>= 3; + *page = regaddr & 0x3ff; +} + +static u32 +qca8k_mii_read32(struct mii_bus *bus, int phy_id, u32 regnum) +{ + u32 val; + int ret; + + ret = bus->read(bus, phy_id, regnum); + if (ret >= 0) { + val = ret; + ret = bus->read(bus, phy_id, regnum + 1); + val |= ret << 16; + } + + if (ret < 0) { + dev_err_ratelimited(&bus->dev, + "failed to read qca8k 32bit register\n"); + return ret; + } + + return val; +} + +static void +qca8k_mii_write32(struct mii_bus *bus, int phy_id, u32 regnum, u32 val) +{ + u16 lo, hi; + int ret; + + lo = val & 0xffff; + hi = (u16)(val >> 16); + + ret = bus->write(bus, phy_id, regnum, lo); + if (ret >= 0) + ret = bus->write(bus, phy_id, regnum + 1, hi); + if (ret < 0) + dev_err_ratelimited(&bus->dev, + "failed to write qca8k 32bit register\n"); +} + +static void +qca8k_set_page(struct mii_bus *bus, u16 page) +{ + if (page == qca8k_current_page) + return; + + if (bus->write(bus, 0x18, 0, page) < 0) + dev_err_ratelimited(&bus->dev, + "failed to set qca8k page\n"); + qca8k_current_page = page; +} + +static u32 +qca8k_read(struct qca8k_priv *priv, u32 reg) +{ + u16 r1, r2, page; + u32 val; + + qca8k_split_addr(reg, &r1, &r2, &page); + + mutex_lock_nested(&priv->bus->mdio_lock, MDIO_MUTEX_NESTED); + + qca8k_set_page(priv->bus, page); + val = qca8k_mii_read32(priv->bus, 0x10 | r2, r1); + + mutex_unlock(&priv->bus->mdio_lock); + + return val; +} + +static void +qca8k_write(struct qca8k_priv *priv, u32 reg, u32 val) +{ + u16 r1, r2, page; + + qca8k_split_addr(reg, &r1, &r2, &page); + + mutex_lock_nested(&priv->bus->mdio_lock, MDIO_MUTEX_NESTED); + + qca8k_set_page(priv->bus, page); + qca8k_mii_write32(priv->bus, 0x10 | r2, r1, val); + + mutex_unlock(&priv->bus->mdio_lock); +} + +static u32 +qca8k_rmw(struct qca8k_priv *priv, u32 reg, u32 mask, u32 val) +{ + u16 r1, r2, page; + u32 ret; + + qca8k_split_addr(reg, &r1, &r2, &page); + + mutex_lock_nested(&priv->bus->mdio_lock, MDIO_MUTEX_NESTED); + + qca8k_set_page(priv->bus, page); + ret = qca8k_mii_read32(priv->bus, 0x10 | r2, r1); + ret &= ~mask; + ret |= val; + qca8k_mii_write32(priv->bus, 0x10 | r2, r1, ret); + + mutex_unlock(&priv->bus->mdio_lock); + + return ret; +} + +static void +qca8k_reg_set(struct qca8k_priv *priv, u32 reg, u32 val) +{ + qca8k_rmw(priv, reg, 0, val); +} + +static void +qca8k_reg_clear(struct qca8k_priv *priv, u32 reg, u32 val) +{ + qca8k_rmw(priv, reg, val, 0); +} + +static int +qca8k_regmap_read(void *ctx, uint32_t reg, uint32_t *val) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ctx; + + *val = qca8k_read(priv, reg); + + return 0; +} + +static int +qca8k_regmap_write(void *ctx, uint32_t reg, uint32_t val) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ctx; + + qca8k_write(priv, reg, val); + + return 0; +} + +static const struct regmap_range qca8k_readable_ranges[] = { + regmap_reg_range(0x0000, 0x00e4), /* Global control */ + regmap_reg_range(0x0100, 0x0168), /* EEE control */ + regmap_reg_range(0x0200, 0x0270), /* Parser control */ + regmap_reg_range(0x0400, 0x0454), /* ACL */ + regmap_reg_range(0x0600, 0x0718), /* Lookup */ + regmap_reg_range(0x0800, 0x0b70), /* QM */ + regmap_reg_range(0x0c00, 0x0c80), /* PKT */ + regmap_reg_range(0x0e00, 0x0e98), /* L3 */ + regmap_reg_range(0x1000, 0x10ac), /* MIB - Port0 */ + regmap_reg_range(0x1100, 0x11ac), /* MIB - Port1 */ + regmap_reg_range(0x1200, 0x12ac), /* MIB - Port2 */ + regmap_reg_range(0x1300, 0x13ac), /* MIB - Port3 */ + regmap_reg_range(0x1400, 0x14ac), /* MIB - Port4 */ + regmap_reg_range(0x1500, 0x15ac), /* MIB - Port5 */ + regmap_reg_range(0x1600, 0x16ac), /* MIB - Port6 */ + +}; + +static struct regmap_access_table qca8k_readable_table = { + .yes_ranges = qca8k_readable_ranges, + .n_yes_ranges = ARRAY_SIZE(qca8k_readable_ranges), +}; + +struct regmap_config qca8k_regmap_config = { + .reg_bits = 16, + .val_bits = 32, + .reg_stride = 4, + .max_register = 0x16ac, /* end MIB - Port6 range */ + .reg_read = qca8k_regmap_read, + .reg_write = qca8k_regmap_write, + .rd_table = &qca8k_readable_table, +}; + +static int +qca8k_busy_wait(struct qca8k_priv *priv, u32 reg, u32 mask) +{ + unsigned long timeout; + + timeout = jiffies + msecs_to_jiffies(20); + + /* loop until the busy flag has cleared */ + do { + u32 val = qca8k_read(priv, reg); + int busy = val & mask; + + if (!busy) + break; + cond_resched(); + } while (!time_after_eq(jiffies, timeout)); + + return time_after_eq(jiffies, timeout); +} + +static void +qca8k_fdb_read(struct qca8k_priv *priv, struct qca8k_fdb *fdb) +{ + u32 reg[4]; + int i; + + /* load the ARL table into an array */ + for (i = 0; i < 4; i++) + reg[i] = qca8k_read(priv, QCA8K_REG_ATU_DATA0 + (i * 4)); + + /* vid - 83:72 */ + fdb->vid = (reg[2] >> QCA8K_ATU_VID_S) & QCA8K_ATU_VID_M; + /* aging - 67:64 */ + fdb->aging = reg[2] & QCA8K_ATU_STATUS_M; + /* portmask - 54:48 */ + fdb->port_mask = (reg[1] >> QCA8K_ATU_PORT_S) & QCA8K_ATU_PORT_M; + /* mac - 47:0 */ + fdb->mac[0] = (reg[1] >> QCA8K_ATU_ADDR0_S) & 0xff; + fdb->mac[1] = reg[1] & 0xff; + fdb->mac[2] = (reg[0] >> QCA8K_ATU_ADDR2_S) & 0xff; + fdb->mac[3] = (reg[0] >> QCA8K_ATU_ADDR3_S) & 0xff; + fdb->mac[4] = (reg[0] >> QCA8K_ATU_ADDR4_S) & 0xff; + fdb->mac[5] = reg[0] & 0xff; +} + +static void +qca8k_fdb_write(struct qca8k_priv *priv, u16 vid, u8 port_mask, const u8 *mac, + u8 aging) +{ + u32 reg[3] = { 0 }; + int i; + + /* vid - 83:72 */ + reg[2] = (vid & QCA8K_ATU_VID_M) << QCA8K_ATU_VID_S; + /* aging - 67:64 */ + reg[2] |= aging & QCA8K_ATU_STATUS_M; + /* portmask - 54:48 */ + reg[1] = (port_mask & QCA8K_ATU_PORT_M) << QCA8K_ATU_PORT_S; + /* mac - 47:0 */ + reg[1] |= mac[0] << QCA8K_ATU_ADDR0_S; + reg[1] |= mac[1]; + reg[0] |= mac[2] << QCA8K_ATU_ADDR2_S; + reg[0] |= mac[3] << QCA8K_ATU_ADDR3_S; + reg[0] |= mac[4] << QCA8K_ATU_ADDR4_S; + reg[0] |= mac[5]; + + /* load the array into the ARL table */ + for (i = 0; i < 3; i++) + qca8k_write(priv, QCA8K_REG_ATU_DATA0 + (i * 4), reg[i]); +} + +static int +qca8k_fdb_access(struct qca8k_priv *priv, enum qca8k_fdb_cmd cmd, int port) +{ + u32 reg; + + /* Set the command and FDB index */ + reg = QCA8K_ATU_FUNC_BUSY; + reg |= cmd; + if (port >= 0) { + reg |= QCA8K_ATU_FUNC_PORT_EN; + reg |= (port & QCA8K_ATU_FUNC_PORT_M) << QCA8K_ATU_FUNC_PORT_S; + } + + /* Write the function register triggering the table access */ + qca8k_write(priv, QCA8K_REG_ATU_FUNC, reg); + + /* wait for completion */ + if (qca8k_busy_wait(priv, QCA8K_REG_ATU_FUNC, QCA8K_ATU_FUNC_BUSY)) + return -1; + + /* Check for table full violation when adding an entry */ + if (cmd == QCA8K_FDB_LOAD) { + reg = qca8k_read(priv, QCA8K_REG_ATU_FUNC); + if (reg & QCA8K_ATU_FUNC_FULL) + return -1; + } + + return 0; +} + +static int +qca8k_fdb_next(struct qca8k_priv *priv, struct qca8k_fdb *fdb, int port) +{ + int ret; + + qca8k_fdb_write(priv, fdb->vid, fdb->port_mask, fdb->mac, fdb->aging); + ret = qca8k_fdb_access(priv, QCA8K_FDB_NEXT, port); + if (ret >= 0) + qca8k_fdb_read(priv, fdb); + + return ret; +} + +static int +qca8k_fdb_add(struct qca8k_priv *priv, const u8 *mac, u16 port_mask, + u16 vid, u8 aging) +{ + int ret; + + mutex_lock(&priv->reg_mutex); + qca8k_fdb_write(priv, vid, port_mask, mac, aging); + ret = qca8k_fdb_access(priv, QCA8K_FDB_LOAD, -1); + mutex_unlock(&priv->reg_mutex); + + return ret; +} + +static int +qca8k_fdb_del(struct qca8k_priv *priv, const u8 *mac, u16 port_mask, u16 vid) +{ + int ret; + + mutex_lock(&priv->reg_mutex); + qca8k_fdb_write(priv, vid, port_mask, mac, 0); + ret = qca8k_fdb_access(priv, QCA8K_FDB_PURGE, -1); + mutex_unlock(&priv->reg_mutex); + + return ret; +} + +static void +qca8k_fdb_flush(struct qca8k_priv *priv) +{ + mutex_lock(&priv->reg_mutex); + qca8k_fdb_access(priv, QCA8K_FDB_FLUSH, -1); + mutex_unlock(&priv->reg_mutex); +} + +static void +qca8k_mib_init(struct qca8k_priv *priv) +{ + mutex_lock(&priv->reg_mutex); + qca8k_reg_set(priv, QCA8K_REG_MIB, QCA8K_MIB_FLUSH | QCA8K_MIB_BUSY); + qca8k_busy_wait(priv, QCA8K_REG_MIB, QCA8K_MIB_BUSY); + qca8k_reg_set(priv, QCA8K_REG_MIB, QCA8K_MIB_CPU_KEEP); + qca8k_write(priv, QCA8K_REG_MODULE_EN, QCA8K_MODULE_EN_MIB); + mutex_unlock(&priv->reg_mutex); +} + +static int +qca8k_set_pad_ctrl(struct qca8k_priv *priv, int port, int mode) +{ + u32 reg; + + switch (port) { + case 0: + reg = QCA8K_REG_PORT0_PAD_CTRL; + break; + case 6: + reg = QCA8K_REG_PORT6_PAD_CTRL; + break; + default: + pr_err("Can't set PAD_CTRL on port %d\n", port); + return -EINVAL; + } + + /* Configure a port to be directly connected to an external + * PHY or MAC. + */ + switch (mode) { + case PHY_INTERFACE_MODE_RGMII: + qca8k_write(priv, reg, + QCA8K_PORT_PAD_RGMII_EN | + QCA8K_PORT_PAD_RGMII_TX_DELAY(3) | + QCA8K_PORT_PAD_RGMII_RX_DELAY(3)); + + /* According to the datasheet, RGMII delay is enabled through + * PORT5_PAD_CTRL for all ports, rather than individual port + * registers + */ + qca8k_write(priv, QCA8K_REG_PORT5_PAD_CTRL, + QCA8K_PORT_PAD_RGMII_RX_DELAY_EN); + break; + case PHY_INTERFACE_MODE_SGMII: + qca8k_write(priv, reg, QCA8K_PORT_PAD_SGMII_EN); + break; + default: + pr_err("xMII mode %d not supported\n", mode); + return -EINVAL; + } + + return 0; +} + +static void +qca8k_port_set_status(struct qca8k_priv *priv, int port, int enable) +{ + u32 mask = QCA8K_PORT_STATUS_TXMAC; + + /* Port 0 and 6 have no internal PHY */ + if ((port > 0) && (port < 6)) + mask |= QCA8K_PORT_STATUS_LINK_AUTO; + + if (enable) + qca8k_reg_set(priv, QCA8K_REG_PORT_STATUS(port), mask); + else + qca8k_reg_clear(priv, QCA8K_REG_PORT_STATUS(port), mask); +} + +static int +qca8k_setup(struct dsa_switch *ds) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + int ret, i, phy_mode = -1; + + /* Make sure that port 0 is the cpu port */ + if (!dsa_is_cpu_port(ds, 0)) { + pr_err("port 0 is not the CPU port\n"); + return -EINVAL; + } + + mutex_init(&priv->reg_mutex); + + /* Start by setting up the register mapping */ + priv->regmap = devm_regmap_init(ds->dev, NULL, priv, + &qca8k_regmap_config); + if (IS_ERR(priv->regmap)) + pr_warn("regmap initialization failed"); + + /* Initialize CPU port pad mode (xMII type, delays...) */ + phy_mode = of_get_phy_mode(ds->ports[ds->dst->cpu_port].dn); + if (phy_mode < 0) { + pr_err("Can't find phy-mode for master device\n"); + return phy_mode; + } + ret = qca8k_set_pad_ctrl(priv, QCA8K_CPU_PORT, phy_mode); + if (ret < 0) + return ret; + + /* Enable CPU Port */ + qca8k_reg_set(priv, QCA8K_REG_GLOBAL_FW_CTRL0, + QCA8K_GLOBAL_FW_CTRL0_CPU_PORT_EN); + qca8k_port_set_status(priv, QCA8K_CPU_PORT, 1); + priv->port_sts[QCA8K_CPU_PORT].enabled = 1; + + /* Enable MIB counters */ + qca8k_mib_init(priv); + + /* Enable QCA header mode on the cpu port */ + qca8k_write(priv, QCA8K_REG_PORT_HDR_CTRL(QCA8K_CPU_PORT), + QCA8K_PORT_HDR_CTRL_ALL << QCA8K_PORT_HDR_CTRL_TX_S | + QCA8K_PORT_HDR_CTRL_ALL << QCA8K_PORT_HDR_CTRL_RX_S); + + /* Disable forwarding by default on all ports */ + for (i = 0; i < QCA8K_NUM_PORTS; i++) + qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(i), + QCA8K_PORT_LOOKUP_MEMBER, 0); + + /* Disable MAC by default on all user ports */ + for (i = 1; i < QCA8K_NUM_PORTS; i++) + if (ds->enabled_port_mask & BIT(i)) + qca8k_port_set_status(priv, i, 0); + + /* Forward all unknown frames to CPU port for Linux processing */ + qca8k_write(priv, QCA8K_REG_GLOBAL_FW_CTRL1, + BIT(0) << QCA8K_GLOBAL_FW_CTRL1_IGMP_DP_S | + BIT(0) << QCA8K_GLOBAL_FW_CTRL1_BC_DP_S | + BIT(0) << QCA8K_GLOBAL_FW_CTRL1_MC_DP_S | + BIT(0) << QCA8K_GLOBAL_FW_CTRL1_UC_DP_S); + + /* Setup connection between CPU port & user ports */ + for (i = 0; i < DSA_MAX_PORTS; i++) { + /* CPU port gets connected to all user ports of the switch */ + if (dsa_is_cpu_port(ds, i)) { + qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(QCA8K_CPU_PORT), + QCA8K_PORT_LOOKUP_MEMBER, + ds->enabled_port_mask); + } + + /* Invividual user ports get connected to CPU port only */ + if (ds->enabled_port_mask & BIT(i)) { + int shift = 16 * (i % 2); + + qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(i), + QCA8K_PORT_LOOKUP_MEMBER, + BIT(QCA8K_CPU_PORT)); + + /* Enable ARP Auto-learning by default */ + qca8k_reg_set(priv, QCA8K_PORT_LOOKUP_CTRL(i), + QCA8K_PORT_LOOKUP_LEARN); + + /* For port based vlans to work we need to set the + * default egress vid + */ + qca8k_rmw(priv, QCA8K_EGRESS_VLAN(i), + 0xffff << shift, 1 << shift); + qca8k_write(priv, QCA8K_REG_PORT_VLAN_CTRL0(i), + QCA8K_PORT_VLAN_CVID(1) | + QCA8K_PORT_VLAN_SVID(1)); + } + } + + /* Flush the FDB table */ + qca8k_fdb_flush(priv); + + return 0; +} + +static int +qca8k_set_addr(struct dsa_switch *ds, u8 *addr) +{ + /* The subsystem always calls this function so add an empty stub */ + return 0; +} + +static int +qca8k_phy_read(struct dsa_switch *ds, int phy, int regnum) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + + return mdiobus_read(priv->bus, phy, regnum); +} + +static int +qca8k_phy_write(struct dsa_switch *ds, int phy, int regnum, u16 val) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + + return mdiobus_write(priv->bus, phy, regnum, val); +} + +static void +qca8k_get_strings(struct dsa_switch *ds, int port, uint8_t *data) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ar8327_mib); i++) + strncpy(data + i * ETH_GSTRING_LEN, ar8327_mib[i].name, + ETH_GSTRING_LEN); +} + +static void +qca8k_get_ethtool_stats(struct dsa_switch *ds, int port, + uint64_t *data) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + const struct qca8k_mib_desc *mib; + u32 reg, i; + u64 hi; + + for (i = 0; i < ARRAY_SIZE(ar8327_mib); i++) { + mib = &ar8327_mib[i]; + reg = QCA8K_PORT_MIB_COUNTER(port) + mib->offset; + + data[i] = qca8k_read(priv, reg); + if (mib->size == 2) { + hi = qca8k_read(priv, reg + 4); + data[i] |= hi << 32; + } + } +} + +static int +qca8k_get_sset_count(struct dsa_switch *ds) +{ + return ARRAY_SIZE(ar8327_mib); +} + +static void +qca8k_eee_enable_set(struct dsa_switch *ds, int port, bool enable) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + u32 lpi_en = QCA8K_REG_EEE_CTRL_LPI_EN(port); + u32 reg; + + mutex_lock(&priv->reg_mutex); + reg = qca8k_read(priv, QCA8K_REG_EEE_CTRL); + if (enable) + reg |= lpi_en; + else + reg &= ~lpi_en; + qca8k_write(priv, QCA8K_REG_EEE_CTRL, reg); + mutex_unlock(&priv->reg_mutex); +} + +static int +qca8k_eee_init(struct dsa_switch *ds, int port, + struct phy_device *phy) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + struct ethtool_eee *p = &priv->port_sts[port].eee; + int ret; + + p->supported = (SUPPORTED_1000baseT_Full | SUPPORTED_100baseT_Full); + + ret = phy_init_eee(phy, 0); + if (ret) + return ret; + + qca8k_eee_enable_set(ds, port, true); + + return 0; +} + +static int +qca8k_set_eee(struct dsa_switch *ds, int port, + struct phy_device *phydev, + struct ethtool_eee *e) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + struct ethtool_eee *p = &priv->port_sts[port].eee; + int ret = 0; + + p->eee_enabled = e->eee_enabled; + + if (e->eee_enabled) { + p->eee_enabled = qca8k_eee_init(ds, port, phydev); + if (!p->eee_enabled) + ret = -EOPNOTSUPP; + } + qca8k_eee_enable_set(ds, port, p->eee_enabled); + + return ret; +} + +static int +qca8k_get_eee(struct dsa_switch *ds, int port, + struct ethtool_eee *e) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + struct ethtool_eee *p = &priv->port_sts[port].eee; + struct net_device *netdev = ds->ports[port].netdev; + int ret; + + ret = phy_ethtool_get_eee(netdev->phydev, p); + if (!ret) + e->eee_active = + !!(p->supported & p->advertised & p->lp_advertised); + else + e->eee_active = 0; + + e->eee_enabled = p->eee_enabled; + + return ret; +} + +static void +qca8k_port_stp_state_set(struct dsa_switch *ds, int port, u8 state) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + u32 stp_state; + + switch (state) { + case BR_STATE_DISABLED: + stp_state = QCA8K_PORT_LOOKUP_STATE_DISABLED; + break; + case BR_STATE_BLOCKING: + stp_state = QCA8K_PORT_LOOKUP_STATE_BLOCKING; + break; + case BR_STATE_LISTENING: + stp_state = QCA8K_PORT_LOOKUP_STATE_LISTENING; + break; + case BR_STATE_LEARNING: + stp_state = QCA8K_PORT_LOOKUP_STATE_LEARNING; + break; + case BR_STATE_FORWARDING: + default: + stp_state = QCA8K_PORT_LOOKUP_STATE_FORWARD; + break; + } + + qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(port), + QCA8K_PORT_LOOKUP_STATE_MASK, stp_state); +} + +static int +qca8k_port_bridge_join(struct dsa_switch *ds, int port, + struct net_device *bridge) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + int port_mask = BIT(QCA8K_CPU_PORT); + int i; + + priv->port_sts[port].bridge_dev = bridge; + + for (i = 1; i < QCA8K_NUM_PORTS; i++) { + if (priv->port_sts[i].bridge_dev != bridge) + continue; + /* Add this port to the portvlan mask of the other ports + * in the bridge + */ + qca8k_reg_set(priv, + QCA8K_PORT_LOOKUP_CTRL(i), + BIT(port)); + if (i != port) + port_mask |= BIT(i); + } + /* Add all other ports to this ports portvlan mask */ + qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(port), + QCA8K_PORT_LOOKUP_MEMBER, port_mask); + + return 0; +} + +static void +qca8k_port_bridge_leave(struct dsa_switch *ds, int port) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + int i; + + for (i = 1; i < QCA8K_NUM_PORTS; i++) { + if (priv->port_sts[i].bridge_dev != + priv->port_sts[port].bridge_dev) + continue; + /* Remove this port to the portvlan mask of the other ports + * in the bridge + */ + qca8k_reg_clear(priv, + QCA8K_PORT_LOOKUP_CTRL(i), + BIT(port)); + } + priv->port_sts[port].bridge_dev = NULL; + /* Set the cpu port to be the only one in the portvlan mask of + * this port + */ + qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(port), + QCA8K_PORT_LOOKUP_MEMBER, BIT(QCA8K_CPU_PORT)); +} + +static int +qca8k_port_enable(struct dsa_switch *ds, int port, + struct phy_device *phy) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + + qca8k_port_set_status(priv, port, 1); + priv->port_sts[port].enabled = 1; + + return 0; +} + +static void +qca8k_port_disable(struct dsa_switch *ds, int port, + struct phy_device *phy) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + + qca8k_port_set_status(priv, port, 0); + priv->port_sts[port].enabled = 0; +} + +static int +qca8k_port_fdb_insert(struct qca8k_priv *priv, const u8 *addr, + u16 port_mask, u16 vid) +{ + /* Set the vid to the port vlan id if no vid is set */ + if (!vid) + vid = 1; + + return qca8k_fdb_add(priv, addr, port_mask, vid, + QCA8K_ATU_STATUS_STATIC); +} + +static int +qca8k_port_fdb_prepare(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_fdb *fdb, + struct switchdev_trans *trans) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + + /* The FDB table for static and auto learned entries is the same. We + * need to reserve an entry with no port_mask set to make sure that + * when port_fdb_add is called an entry is still available. Otherwise + * the last free entry might have been used up by auto learning + */ + return qca8k_port_fdb_insert(priv, fdb->addr, 0, fdb->vid); +} + +static void +qca8k_port_fdb_add(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_fdb *fdb, + struct switchdev_trans *trans) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + u16 port_mask = BIT(port); + + /* Update the FDB entry adding the port_mask */ + qca8k_port_fdb_insert(priv, fdb->addr, port_mask, fdb->vid); +} + +static int +qca8k_port_fdb_del(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_fdb *fdb) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + u16 port_mask = BIT(port); + u16 vid = fdb->vid; + + if (!vid) + vid = 1; + + return qca8k_fdb_del(priv, fdb->addr, port_mask, vid); +} + +static int +qca8k_port_fdb_dump(struct dsa_switch *ds, int port, + struct switchdev_obj_port_fdb *fdb, + int (*cb)(struct switchdev_obj *obj)) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + struct qca8k_fdb _fdb = { 0 }; + int cnt = QCA8K_NUM_FDB_RECORDS; + int ret = 0; + + mutex_lock(&priv->reg_mutex); + while (cnt-- && !qca8k_fdb_next(priv, &_fdb, port)) { + if (!_fdb.aging) + break; + + ether_addr_copy(fdb->addr, _fdb.mac); + fdb->vid = _fdb.vid; + if (_fdb.aging == QCA8K_ATU_STATUS_STATIC) + fdb->ndm_state = NUD_NOARP; + else + fdb->ndm_state = NUD_REACHABLE; + + ret = cb(&fdb->obj); + if (ret) + break; + } + mutex_unlock(&priv->reg_mutex); + + return 0; +} + +static enum dsa_tag_protocol +qca8k_get_tag_protocol(struct dsa_switch *ds) +{ + return DSA_TAG_PROTO_QCA; +} + +static struct dsa_switch_ops qca8k_switch_ops = { + .get_tag_protocol = qca8k_get_tag_protocol, + .setup = qca8k_setup, + .set_addr = qca8k_set_addr, + .get_strings = qca8k_get_strings, + .phy_read = qca8k_phy_read, + .phy_write = qca8k_phy_write, + .get_ethtool_stats = qca8k_get_ethtool_stats, + .get_sset_count = qca8k_get_sset_count, + .get_eee = qca8k_get_eee, + .set_eee = qca8k_set_eee, + .port_enable = qca8k_port_enable, + .port_disable = qca8k_port_disable, + .port_stp_state_set = qca8k_port_stp_state_set, + .port_bridge_join = qca8k_port_bridge_join, + .port_bridge_leave = qca8k_port_bridge_leave, + .port_fdb_prepare = qca8k_port_fdb_prepare, + .port_fdb_add = qca8k_port_fdb_add, + .port_fdb_del = qca8k_port_fdb_del, + .port_fdb_dump = qca8k_port_fdb_dump, +}; + +static int +qca8k_sw_probe(struct mdio_device *mdiodev) +{ + struct qca8k_priv *priv; + u32 id; + + /* allocate the private data struct so that we can probe the switches + * ID register + */ + priv = devm_kzalloc(&mdiodev->dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->bus = mdiodev->bus; + + /* read the switches ID register */ + id = qca8k_read(priv, QCA8K_REG_MASK_CTRL); + id >>= QCA8K_MASK_CTRL_ID_S; + id &= QCA8K_MASK_CTRL_ID_M; + if (id != QCA8K_ID_QCA8337) + return -ENODEV; + + priv->ds = devm_kzalloc(&mdiodev->dev, sizeof(*priv->ds), GFP_KERNEL); + if (!priv->ds) + return -ENOMEM; + + priv->ds->priv = priv; + priv->ds->dev = &mdiodev->dev; + priv->ds->ops = &qca8k_switch_ops; + mutex_init(&priv->reg_mutex); + dev_set_drvdata(&mdiodev->dev, priv); + + return dsa_register_switch(priv->ds, priv->ds->dev->of_node); +} + +static void +qca8k_sw_remove(struct mdio_device *mdiodev) +{ + struct qca8k_priv *priv = dev_get_drvdata(&mdiodev->dev); + int i; + + for (i = 0; i < QCA8K_NUM_PORTS; i++) + qca8k_port_set_status(priv, i, 0); + + dsa_unregister_switch(priv->ds); +} + +#ifdef CONFIG_PM_SLEEP +static void +qca8k_set_pm(struct qca8k_priv *priv, int enable) +{ + int i; + + for (i = 0; i < QCA8K_NUM_PORTS; i++) { + if (!priv->port_sts[i].enabled) + continue; + + qca8k_port_set_status(priv, i, enable); + } +} + +static int qca8k_suspend(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct qca8k_priv *priv = platform_get_drvdata(pdev); + + qca8k_set_pm(priv, 0); + + return dsa_switch_suspend(priv->ds); +} + +static int qca8k_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct qca8k_priv *priv = platform_get_drvdata(pdev); + + qca8k_set_pm(priv, 1); + + return dsa_switch_resume(priv->ds); +} +#endif /* CONFIG_PM_SLEEP */ + +static SIMPLE_DEV_PM_OPS(qca8k_pm_ops, + qca8k_suspend, qca8k_resume); + +static const struct of_device_id qca8k_of_match[] = { + { .compatible = "qca,qca8337" }, + { /* sentinel */ }, +}; + +static struct mdio_driver qca8kmdio_driver = { + .probe = qca8k_sw_probe, + .remove = qca8k_sw_remove, + .mdiodrv.driver = { + .name = "qca8k", + .of_match_table = qca8k_of_match, + .pm = &qca8k_pm_ops, + }, +}; + +static int __init +qca8kmdio_driver_register(void) +{ + return mdio_driver_register(&qca8kmdio_driver); +} +module_init(qca8kmdio_driver_register); + +static void __exit +qca8kmdio_driver_unregister(void) +{ + mdio_driver_unregister(&qca8kmdio_driver); +} +module_exit(qca8kmdio_driver_unregister); + +MODULE_AUTHOR("Mathieu Olivari, John Crispin "); +MODULE_DESCRIPTION("Driver for QCA8K ethernet switch family"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("platform:qca8k"); diff --git a/drivers/net/dsa/qca8k.h b/drivers/net/dsa/qca8k.h new file mode 100644 index 000000000000..201464719531 --- /dev/null +++ b/drivers/net/dsa/qca8k.h @@ -0,0 +1,185 @@ +/* + * Copyright (C) 2009 Felix Fietkau + * Copyright (C) 2011-2012 Gabor Juhos + * Copyright (c) 2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __QCA8K_H +#define __QCA8K_H + +#include +#include + +#define QCA8K_NUM_PORTS 7 + +#define PHY_ID_QCA8337 0x004dd036 +#define QCA8K_ID_QCA8337 0x13 + +#define QCA8K_NUM_FDB_RECORDS 2048 + +#define QCA8K_CPU_PORT 0 + +/* Global control registers */ +#define QCA8K_REG_MASK_CTRL 0x000 +#define QCA8K_MASK_CTRL_ID_M 0xff +#define QCA8K_MASK_CTRL_ID_S 8 +#define QCA8K_REG_PORT0_PAD_CTRL 0x004 +#define QCA8K_REG_PORT5_PAD_CTRL 0x008 +#define QCA8K_REG_PORT6_PAD_CTRL 0x00c +#define QCA8K_PORT_PAD_RGMII_EN BIT(26) +#define QCA8K_PORT_PAD_RGMII_TX_DELAY(x) \ + ((0x8 + (x & 0x3)) << 22) +#define QCA8K_PORT_PAD_RGMII_RX_DELAY(x) \ + ((0x10 + (x & 0x3)) << 20) +#define QCA8K_PORT_PAD_RGMII_RX_DELAY_EN BIT(24) +#define QCA8K_PORT_PAD_SGMII_EN BIT(7) +#define QCA8K_REG_MODULE_EN 0x030 +#define QCA8K_MODULE_EN_MIB BIT(0) +#define QCA8K_REG_MIB 0x034 +#define QCA8K_MIB_FLUSH BIT(24) +#define QCA8K_MIB_CPU_KEEP BIT(20) +#define QCA8K_MIB_BUSY BIT(17) +#define QCA8K_GOL_MAC_ADDR0 0x60 +#define QCA8K_GOL_MAC_ADDR1 0x64 +#define QCA8K_REG_PORT_STATUS(_i) (0x07c + (_i) * 4) +#define QCA8K_PORT_STATUS_SPEED GENMASK(2, 0) +#define QCA8K_PORT_STATUS_SPEED_S 0 +#define QCA8K_PORT_STATUS_TXMAC BIT(2) +#define QCA8K_PORT_STATUS_RXMAC BIT(3) +#define QCA8K_PORT_STATUS_TXFLOW BIT(4) +#define QCA8K_PORT_STATUS_RXFLOW BIT(5) +#define QCA8K_PORT_STATUS_DUPLEX BIT(6) +#define QCA8K_PORT_STATUS_LINK_UP BIT(8) +#define QCA8K_PORT_STATUS_LINK_AUTO BIT(9) +#define QCA8K_PORT_STATUS_LINK_PAUSE BIT(10) +#define QCA8K_REG_PORT_HDR_CTRL(_i) (0x9c + (_i * 4)) +#define QCA8K_PORT_HDR_CTRL_RX_MASK GENMASK(3, 2) +#define QCA8K_PORT_HDR_CTRL_RX_S 2 +#define QCA8K_PORT_HDR_CTRL_TX_MASK GENMASK(1, 0) +#define QCA8K_PORT_HDR_CTRL_TX_S 0 +#define QCA8K_PORT_HDR_CTRL_ALL 2 +#define QCA8K_PORT_HDR_CTRL_MGMT 1 +#define QCA8K_PORT_HDR_CTRL_NONE 0 + +/* EEE control registers */ +#define QCA8K_REG_EEE_CTRL 0x100 +#define QCA8K_REG_EEE_CTRL_LPI_EN(_i) ((_i + 1) * 2) + +/* ACL registers */ +#define QCA8K_REG_PORT_VLAN_CTRL0(_i) (0x420 + (_i * 8)) +#define QCA8K_PORT_VLAN_CVID(x) (x << 16) +#define QCA8K_PORT_VLAN_SVID(x) x +#define QCA8K_REG_PORT_VLAN_CTRL1(_i) (0x424 + (_i * 8)) +#define QCA8K_REG_IPV4_PRI_BASE_ADDR 0x470 +#define QCA8K_REG_IPV4_PRI_ADDR_MASK 0x474 + +/* Lookup registers */ +#define QCA8K_REG_ATU_DATA0 0x600 +#define QCA8K_ATU_ADDR2_S 24 +#define QCA8K_ATU_ADDR3_S 16 +#define QCA8K_ATU_ADDR4_S 8 +#define QCA8K_REG_ATU_DATA1 0x604 +#define QCA8K_ATU_PORT_M 0x7f +#define QCA8K_ATU_PORT_S 16 +#define QCA8K_ATU_ADDR0_S 8 +#define QCA8K_REG_ATU_DATA2 0x608 +#define QCA8K_ATU_VID_M 0xfff +#define QCA8K_ATU_VID_S 8 +#define QCA8K_ATU_STATUS_M 0xf +#define QCA8K_ATU_STATUS_STATIC 0xf +#define QCA8K_REG_ATU_FUNC 0x60c +#define QCA8K_ATU_FUNC_BUSY BIT(31) +#define QCA8K_ATU_FUNC_PORT_EN BIT(14) +#define QCA8K_ATU_FUNC_MULTI_EN BIT(13) +#define QCA8K_ATU_FUNC_FULL BIT(12) +#define QCA8K_ATU_FUNC_PORT_M 0xf +#define QCA8K_ATU_FUNC_PORT_S 8 +#define QCA8K_REG_GLOBAL_FW_CTRL0 0x620 +#define QCA8K_GLOBAL_FW_CTRL0_CPU_PORT_EN BIT(10) +#define QCA8K_REG_GLOBAL_FW_CTRL1 0x624 +#define QCA8K_GLOBAL_FW_CTRL1_IGMP_DP_S 24 +#define QCA8K_GLOBAL_FW_CTRL1_BC_DP_S 16 +#define QCA8K_GLOBAL_FW_CTRL1_MC_DP_S 8 +#define QCA8K_GLOBAL_FW_CTRL1_UC_DP_S 0 +#define QCA8K_PORT_LOOKUP_CTRL(_i) (0x660 + (_i) * 0xc) +#define QCA8K_PORT_LOOKUP_MEMBER GENMASK(6, 0) +#define QCA8K_PORT_LOOKUP_STATE_MASK GENMASK(18, 16) +#define QCA8K_PORT_LOOKUP_STATE_DISABLED (0 << 16) +#define QCA8K_PORT_LOOKUP_STATE_BLOCKING (1 << 16) +#define QCA8K_PORT_LOOKUP_STATE_LISTENING (2 << 16) +#define QCA8K_PORT_LOOKUP_STATE_LEARNING (3 << 16) +#define QCA8K_PORT_LOOKUP_STATE_FORWARD (4 << 16) +#define QCA8K_PORT_LOOKUP_STATE GENMASK(18, 16) +#define QCA8K_PORT_LOOKUP_LEARN BIT(20) + +/* Pkt edit registers */ +#define QCA8K_EGRESS_VLAN(x) (0x0c70 + (4 * (x / 2))) + +/* L3 registers */ +#define QCA8K_HROUTER_CONTROL 0xe00 +#define QCA8K_HROUTER_CONTROL_GLB_LOCKTIME_M GENMASK(17, 16) +#define QCA8K_HROUTER_CONTROL_GLB_LOCKTIME_S 16 +#define QCA8K_HROUTER_CONTROL_ARP_AGE_MODE 1 +#define QCA8K_HROUTER_PBASED_CONTROL1 0xe08 +#define QCA8K_HROUTER_PBASED_CONTROL2 0xe0c +#define QCA8K_HNAT_CONTROL 0xe38 + +/* MIB registers */ +#define QCA8K_PORT_MIB_COUNTER(_i) (0x1000 + (_i) * 0x100) + +/* QCA specific MII registers */ +#define MII_ATH_MMD_ADDR 0x0d +#define MII_ATH_MMD_DATA 0x0e + +enum { + QCA8K_PORT_SPEED_10M = 0, + QCA8K_PORT_SPEED_100M = 1, + QCA8K_PORT_SPEED_1000M = 2, + QCA8K_PORT_SPEED_ERR = 3, +}; + +enum qca8k_fdb_cmd { + QCA8K_FDB_FLUSH = 1, + QCA8K_FDB_LOAD = 2, + QCA8K_FDB_PURGE = 3, + QCA8K_FDB_NEXT = 6, + QCA8K_FDB_SEARCH = 7, +}; + +struct ar8xxx_port_status { + struct ethtool_eee eee; + struct net_device *bridge_dev; + int enabled; +}; + +struct qca8k_priv { + struct regmap *regmap; + struct mii_bus *bus; + struct ar8xxx_port_status port_sts[QCA8K_NUM_PORTS]; + struct dsa_switch *ds; + struct mutex reg_mutex; +}; + +struct qca8k_mib_desc { + unsigned int size; + unsigned int offset; + const char *name; +}; + +struct qca8k_fdb { + u16 vid; + u8 port_mask; + u8 aging; + u8 mac[6]; +}; + +#endif /* __QCA8K_H */ From c68df2e7be0c1238ea3c281fd744a204ef3b15a0 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 15 Sep 2016 16:30:02 +0300 Subject: [PATCH 1087/1715] mac80211: allow using AP_LINK_PS with mac80211-generated TIM IE In 46fa38e84b65 ("mac80211: allow software PS-Poll/U-APSD with AP_LINK_PS"), Johannes allowed to use mac80211's code for handling stations that go to PS or send PS-Poll / uAPSD trigger frames for devices that enable RSS. This means that mac80211 doesn't look at frames anymore but rather relies on a notification that will come from the device when a PS transition occurs or when a PS-Poll / trigger frame is detected by the device. iwlwifi will need this capability but still needs mac80211 to take care of the TIM IE. Today, if a driver sets AP_LINK_PS, mac80211 will not update the TIM IE. Change mac80211 to check existence of the set_tim driver callback rather than using AP_LINK_PS to decide if the driver handles the TIM IE internally or not. Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho [reword commit message a bit] Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 5e70fa52e1ff..1b1b28ff4fdb 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -690,7 +690,7 @@ static void __sta_info_recalc_tim(struct sta_info *sta, bool ignore_pending) } /* No need to do anything if the driver does all */ - if (ieee80211_hw_check(&local->hw, AP_LINK_PS)) + if (!local->ops->set_tim) return; if (sta->dead) From b59abfbed638037f3b51eeb73266892cd2df177f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 15 Sep 2016 16:30:03 +0300 Subject: [PATCH 1088/1715] mac80211_hwsim: statically initialize hwsim_radios list There's no need to initialize at runtime, when the static declaration macro can just be used instead, so do that. Signed-off-by: Johannes Berg --- drivers/net/wireless/mac80211_hwsim.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 8c35ac838fce..431f13b4faf6 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -487,7 +487,7 @@ static const struct ieee80211_iface_combination hwsim_if_comb_p2p_dev[] = { }; static spinlock_t hwsim_radio_lock; -static struct list_head hwsim_radios; +static LIST_HEAD(hwsim_radios); static int hwsim_radio_idx; static struct platform_driver mac80211_hwsim_driver = { @@ -3376,7 +3376,6 @@ static int __init init_mac80211_hwsim(void) mac80211_hwsim_unassign_vif_chanctx; spin_lock_init(&hwsim_radio_lock); - INIT_LIST_HEAD(&hwsim_radios); err = register_pernet_device(&hwsim_net_ops); if (err) From fbd05e4a6e82fd573d3aa79e284e424b8d78c149 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Thu, 15 Sep 2016 18:15:09 +0300 Subject: [PATCH 1089/1715] cfg80211: add helper to find an IE that matches a byte-array There are a few places where an IE that matches not only the EID, but also other bytes inside the element, needs to be found. To simplify that and reduce the amount of similar code, implement a new helper function to match the EID and an extra array of bytes. Additionally, simplify cfg80211_find_vendor_ie() by using the new match function. Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 33 +++++++++++++++++++++++- net/wireless/scan.c | 58 ++++++++++++++++++++---------------------- 2 files changed, 59 insertions(+), 32 deletions(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index d5e7f690bad9..533cb6410678 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3946,6 +3946,34 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, unsigned int cfg80211_classify8021d(struct sk_buff *skb, struct cfg80211_qos_map *qos_map); +/** + * cfg80211_find_ie_match - match information element and byte array in data + * + * @eid: element ID + * @ies: data consisting of IEs + * @len: length of data + * @match: byte array to match + * @match_len: number of bytes in the match array + * @match_offset: offset in the IE where the byte array should match. + * If match_len is zero, this must also be set to zero. + * Otherwise this must be set to 2 or more, because the first + * byte is the element id, which is already compared to eid, and + * the second byte is the IE length. + * + * Return: %NULL if the element ID could not be found or if + * the element is invalid (claims to be longer than the given + * data) or if the byte array doesn't match, or a pointer to the first + * byte of the requested element, that is the byte containing the + * element ID. + * + * Note: There are no checks on the element length other than + * having to fit into the given data and being large enough for the + * byte array to match. + */ +const u8 *cfg80211_find_ie_match(u8 eid, const u8 *ies, int len, + const u8 *match, int match_len, + int match_offset); + /** * cfg80211_find_ie - find information element in data * @@ -3961,7 +3989,10 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb, * Note: There are no checks on the element length other than * having to fit into the given data. */ -const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len); +static inline const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len) +{ + return cfg80211_find_ie_match(eid, ies, len, NULL, 0, 0); +} /** * cfg80211_find_vendor_ie - find vendor specific information element in data diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 0358e12be54b..b5bd58d0f731 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -352,52 +352,48 @@ void cfg80211_bss_expire(struct cfg80211_registered_device *rdev) __cfg80211_bss_expire(rdev, jiffies - IEEE80211_SCAN_RESULT_EXPIRE); } -const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len) +const u8 *cfg80211_find_ie_match(u8 eid, const u8 *ies, int len, + const u8 *match, int match_len, + int match_offset) { - while (len > 2 && ies[0] != eid) { + /* match_offset can't be smaller than 2, unless match_len is + * zero, in which case match_offset must be zero as well. + */ + if (WARN_ON((match_len && match_offset < 2) || + (!match_len && match_offset))) + return NULL; + + while (len >= 2 && len >= ies[1] + 2) { + if ((ies[0] == eid) && + (ies[1] + 2 >= match_offset + match_len) && + !memcmp(ies + match_offset, match, match_len)) + return ies; + len -= ies[1] + 2; ies += ies[1] + 2; } - if (len < 2) - return NULL; - if (len < 2 + ies[1]) - return NULL; - return ies; + + return NULL; } -EXPORT_SYMBOL(cfg80211_find_ie); +EXPORT_SYMBOL(cfg80211_find_ie_match); const u8 *cfg80211_find_vendor_ie(unsigned int oui, int oui_type, const u8 *ies, int len) { - struct ieee80211_vendor_ie *ie; - const u8 *pos = ies, *end = ies + len; - int ie_oui; + const u8 *ie; + u8 match[] = { oui >> 16, oui >> 8, oui, oui_type }; + int match_len = (oui_type < 0) ? 3 : sizeof(match); if (WARN_ON(oui_type > 0xff)) return NULL; - while (pos < end) { - pos = cfg80211_find_ie(WLAN_EID_VENDOR_SPECIFIC, pos, - end - pos); - if (!pos) - return NULL; + ie = cfg80211_find_ie_match(WLAN_EID_VENDOR_SPECIFIC, ies, len, + match, match_len, 2); - ie = (struct ieee80211_vendor_ie *)pos; + if (ie && (ie[1] < 4)) + return NULL; - /* make sure we can access ie->len */ - BUILD_BUG_ON(offsetof(struct ieee80211_vendor_ie, len) != 1); - - if (ie->len < sizeof(*ie)) - goto cont; - - ie_oui = ie->oui[0] << 16 | ie->oui[1] << 8 | ie->oui[2]; - if (ie_oui == oui && - (oui_type < 0 || ie->oui_type == oui_type)) - return pos; -cont: - pos += 2 + ie->len; - } - return NULL; + return ie; } EXPORT_SYMBOL(cfg80211_find_vendor_ie); From d19127473a575c629c70974cee0bb8acb6374f08 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 07:26:01 +0100 Subject: [PATCH 1090/1715] rxrpc: Make IPv6 support conditional on CONFIG_IPV6 Add CONFIG_AF_RXRPC_IPV6 and make the IPv6 support code conditional on it. This is then made conditional on CONFIG_IPV6. Without this, the following can be seen: net/built-in.o: In function `rxrpc_init_peer': >> peer_object.c:(.text+0x18c3c8): undefined reference to `ip6_route_output_flags' Reported-by: kbuild test robot Signed-off-by: David Howells Signed-off-by: David S. Miller --- net/rxrpc/Kconfig | 7 +++++++ net/rxrpc/af_rxrpc.c | 7 ++++++- net/rxrpc/conn_object.c | 2 ++ net/rxrpc/local_object.c | 2 ++ net/rxrpc/output.c | 2 ++ net/rxrpc/peer_event.c | 4 +++- net/rxrpc/peer_object.c | 10 ++++++++++ net/rxrpc/utils.c | 2 ++ 8 files changed, 34 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig index 784c53163b7b..13396c74b5c1 100644 --- a/net/rxrpc/Kconfig +++ b/net/rxrpc/Kconfig @@ -19,6 +19,13 @@ config AF_RXRPC See Documentation/networking/rxrpc.txt. +config AF_RXRPC_IPV6 + bool "IPv6 support for RxRPC" + depends on (IPV6 = m && AF_RXRPC = m) || (IPV6 = y && AF_RXRPC) + help + Say Y here to allow AF_RXRPC to use IPV6 UDP as well as IPV4 UDP as + its network transport. + config AF_RXRPC_DEBUG bool "RxRPC dynamic debugging" diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index f61f7b2d1ca4..09f81befc705 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -109,12 +109,14 @@ static int rxrpc_validate_address(struct rxrpc_sock *rx, tail = offsetof(struct sockaddr_rxrpc, transport.sin.__pad); break; +#ifdef CONFIG_AF_RXRPC_IPV6 case AF_INET6: if (srx->transport_len < sizeof(struct sockaddr_in6)) return -EINVAL; tail = offsetof(struct sockaddr_rxrpc, transport) + sizeof(struct sockaddr_in6); break; +#endif default: return -EAFNOSUPPORT; @@ -413,9 +415,11 @@ static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len) case AF_INET: rx->srx.transport_len = sizeof(struct sockaddr_in); break; +#ifdef CONFIG_AF_RXRPC_IPV6 case AF_INET6: rx->srx.transport_len = sizeof(struct sockaddr_in6); break; +#endif default: ret = -EAFNOSUPPORT; goto error_unlock; @@ -570,7 +574,8 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol, return -EAFNOSUPPORT; /* we support transport protocol UDP/UDP6 only */ - if (protocol != PF_INET && protocol != PF_INET6) + if (protocol != PF_INET && + IS_ENABLED(CONFIG_AF_RXRPC_IPV6) && protocol != PF_INET6) return -EPROTONOSUPPORT; if (sock->type != SOCK_DGRAM) diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index c0ddba787fd4..bb1f29280aea 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -134,6 +134,7 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local, srx.transport.sin.sin_addr.s_addr) goto not_found; break; +#ifdef CONFIG_AF_RXRPC_IPV6 case AF_INET6: if (peer->srx.transport.sin6.sin6_port != srx.transport.sin6.sin6_port || @@ -142,6 +143,7 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local, sizeof(struct in6_addr)) != 0) goto not_found; break; +#endif default: BUG(); } diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index f5b9bb0d3f98..e3fad80b0795 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -58,6 +58,7 @@ static long rxrpc_local_cmp_key(const struct rxrpc_local *local, memcmp(&local->srx.transport.sin.sin_addr, &srx->transport.sin.sin_addr, sizeof(struct in_addr)); +#ifdef CONFIG_AF_RXRPC_IPV6 case AF_INET6: /* If the choice of UDP6 port is left up to the transport, then * the endpoint record doesn't match. @@ -67,6 +68,7 @@ static long rxrpc_local_cmp_key(const struct rxrpc_local *local, memcmp(&local->srx.transport.sin6.sin6_addr, &srx->transport.sin6.sin6_addr, sizeof(struct in6_addr)); +#endif default: BUG(); } diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index d7cd87f17f0d..06a9aca739d1 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -259,6 +259,7 @@ send_fragmentable: } break; +#ifdef CONFIG_AF_RXRPC_IPV6 case AF_INET6: opt = IPV6_PMTUDISC_DONT; ret = kernel_setsockopt(conn->params.local->socket, @@ -274,6 +275,7 @@ send_fragmentable: (char *)&opt, sizeof(opt)); } break; +#endif } up_write(&conn->params.local->defrag_sem); diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index 74217589cf44..9e0725f5652b 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -66,6 +66,7 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local, } break; +#ifdef CONFIG_AF_RXRPC_IPV6 case AF_INET6: srx.transport.sin6.sin6_port = serr->port; srx.transport_len = sizeof(struct sockaddr_in6); @@ -78,7 +79,7 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local, break; case SO_EE_ORIGIN_ICMP: _net("Rx ICMP on v6 sock"); - memcpy(&srx.transport.sin6.sin6_addr.s6_addr + 12, + memcpy(srx.transport.sin6.sin6_addr.s6_addr + 12, skb_network_header(skb) + serr->addr_offset, sizeof(struct in_addr)); break; @@ -89,6 +90,7 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local, break; } break; +#endif default: BUG(); diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index dfc07b41a472..f3e5766910fd 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -52,11 +52,13 @@ static unsigned long rxrpc_peer_hash_key(struct rxrpc_local *local, size = sizeof(srx->transport.sin.sin_addr); p = (u16 *)&srx->transport.sin.sin_addr; break; +#ifdef CONFIG_AF_RXRPC_IPV6 case AF_INET6: hash_key += (u16 __force)srx->transport.sin.sin_port; size = sizeof(srx->transport.sin6.sin6_addr); p = (u16 *)&srx->transport.sin6.sin6_addr; break; +#endif default: WARN(1, "AF_RXRPC: Unsupported transport address family\n"); return 0; @@ -100,12 +102,14 @@ static long rxrpc_peer_cmp_key(const struct rxrpc_peer *peer, memcmp(&peer->srx.transport.sin.sin_addr, &srx->transport.sin.sin_addr, sizeof(struct in_addr)); +#ifdef CONFIG_AF_RXRPC_IPV6 case AF_INET6: return ((u16 __force)peer->srx.transport.sin6.sin6_port - (u16 __force)srx->transport.sin6.sin6_port) ?: memcmp(&peer->srx.transport.sin6.sin6_addr, &srx->transport.sin6.sin6_addr, sizeof(struct in6_addr)); +#endif default: BUG(); } @@ -159,7 +163,9 @@ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer) struct rtable *rt; struct flowi fl; struct flowi4 *fl4 = &fl.u.ip4; +#ifdef CONFIG_AF_RXRPC_IPV6 struct flowi6 *fl6 = &fl.u.ip6; +#endif peer->if_mtu = 1500; @@ -177,6 +183,7 @@ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer) dst = &rt->dst; break; +#ifdef CONFIG_AF_RXRPC_IPV6 case AF_INET6: fl6->flowi6_iif = LOOPBACK_IFINDEX; fl6->flowi6_scope = RT_SCOPE_UNIVERSE; @@ -191,6 +198,7 @@ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer) return; } break; +#endif default: BUG(); @@ -241,9 +249,11 @@ static void rxrpc_init_peer(struct rxrpc_peer *peer, unsigned long hash_key) case AF_INET: peer->hdrsize = sizeof(struct iphdr); break; +#ifdef CONFIG_AF_RXRPC_IPV6 case AF_INET6: peer->hdrsize = sizeof(struct ipv6hdr); break; +#endif default: BUG(); } diff --git a/net/rxrpc/utils.c b/net/rxrpc/utils.c index b88914d53ca5..ff7af71c4b49 100644 --- a/net/rxrpc/utils.c +++ b/net/rxrpc/utils.c @@ -30,6 +30,7 @@ int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *srx, struct sk_buff *skb) srx->transport.sin.sin_addr.s_addr = ip_hdr(skb)->saddr; return 0; +#ifdef CONFIG_AF_RXRPC_IPV6 case ETH_P_IPV6: srx->transport_type = SOCK_DGRAM; srx->transport_len = sizeof(srx->transport.sin6); @@ -37,6 +38,7 @@ int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *srx, struct sk_buff *skb) srx->transport.sin6.sin6_port = udp_hdr(skb)->source; srx->transport.sin6.sin6_addr = ipv6_hdr(skb)->saddr; return 0; +#endif default: pr_warn_ratelimited("AF_RXRPC: Unknown eth protocol %u\n", From fabf9201806255d70386d8bc9f6a2942c0940da2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:11 +0100 Subject: [PATCH 1091/1715] rxrpc: Remove some whitespace. Remove a tab that's on a line that should otherwise be blank. Signed-off-by: David Howells --- net/rxrpc/call_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 61432049869b..9367c3be31eb 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -31,7 +31,7 @@ static void rxrpc_set_timer(struct rxrpc_call *call) _enter("{%ld,%ld,%ld:%ld}", call->ack_at - now, call->resend_at - now, call->expire_at - now, call->timer.expires - now); - + read_lock_bh(&call->state_lock); if (call->state < RXRPC_CALL_COMPLETE) { From 4b22457c06a3a950e14938c486283ad0f308c13d Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:11 +0100 Subject: [PATCH 1092/1715] rxrpc: Move the check of rx_pkt_offset from rxrpc_locate_data() to caller Move the check of rx_pkt_offset from rxrpc_locate_data() to the caller, rxrpc_recvmsg_data(), so that it's more clear what's going on there. Signed-off-by: David Howells --- net/rxrpc/recvmsg.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index a284205b8ecf..0d085f5cf1bf 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -240,9 +240,6 @@ static int rxrpc_locate_data(struct rxrpc_call *call, struct sk_buff *skb, int ret; u8 annotation = *_annotation; - if (offset > 0) - return 0; - /* Locate the subpacket */ offset = sp->offset; len = skb->len - sp->offset; @@ -303,8 +300,10 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, if (msg) sock_recv_timestamp(msg, sock->sk, skb); - ret = rxrpc_locate_data(call, skb, &call->rxtx_annotations[ix], - &rx_pkt_offset, &rx_pkt_len); + if (rx_pkt_offset == 0) + ret = rxrpc_locate_data(call, skb, + &call->rxtx_annotations[ix], + &rx_pkt_offset, &rx_pkt_len); _debug("recvmsg %x DATA #%u { %d, %d }", sp->hdr.callNumber, seq, rx_pkt_offset, rx_pkt_len); From 2e2ea51dec2ab6a81950d4b436eb66ebf45dd507 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:11 +0100 Subject: [PATCH 1093/1715] rxrpc: Check the return value of rxrpc_locate_data() Check the return value of rxrpc_locate_data() in rxrpc_recvmsg_data(). Signed-off-by: David Howells --- net/rxrpc/recvmsg.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 0d085f5cf1bf..1edf2cf62cc5 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -300,10 +300,13 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, if (msg) sock_recv_timestamp(msg, sock->sk, skb); - if (rx_pkt_offset == 0) + if (rx_pkt_offset == 0) { ret = rxrpc_locate_data(call, skb, &call->rxtx_annotations[ix], &rx_pkt_offset, &rx_pkt_len); + if (ret < 0) + goto out; + } _debug("recvmsg %x DATA #%u { %d, %d }", sp->hdr.callNumber, seq, rx_pkt_offset, rx_pkt_len); From 816c9fce12f3745abc959c0fca8ace1c2c51421c Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:11 +0100 Subject: [PATCH 1094/1715] rxrpc: Fix handling of the last packet in rxrpc_recvmsg_data() The code for determining the last packet in rxrpc_recvmsg_data() has been using the RXRPC_CALL_RX_LAST flag to determine if the rx_top pointer points to the last packet or not. This isn't a good idea, however, as the input code may be running simultaneously on another CPU and that sets the flag *before* updating the top pointer. Fix this by the following means: (1) Restrict the use of RXRPC_CALL_RX_LAST to the input routines only. There's otherwise a synchronisation problem between detecting the flag and checking tx_top. This could probably be dealt with by appropriate application of memory barriers, but there's a simpler way. (2) Set RXRPC_CALL_RX_LAST after setting rx_top. (3) Make rxrpc_rotate_rx_window() consult the flags header field of the DATA packet it's about to discard to see if that was the last packet. Use this as the basis for ending the Rx phase. This shouldn't be a problem because the recvmsg side of things is guaranteed to see the packets in order. (4) Make rxrpc_recvmsg_data() return 1 to indicate the end of the data if: (a) the packet it has just processed is marked as RXRPC_LAST_PACKET (b) the call's Rx phase has been ended. Signed-off-by: David Howells --- net/rxrpc/input.c | 4 +++- net/rxrpc/recvmsg.c | 49 ++++++++++++++++++++++++++++++--------------- 2 files changed, 36 insertions(+), 17 deletions(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 75af0bd316c7..f0d9115b9b7e 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -238,7 +238,7 @@ next_subpacket: len = RXRPC_JUMBO_DATALEN; if (flags & RXRPC_LAST_PACKET) { - if (test_and_set_bit(RXRPC_CALL_RX_LAST, &call->flags) && + if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) && seq != call->rx_top) return rxrpc_proto_abort("LSN", call, seq); } else { @@ -282,6 +282,8 @@ next_subpacket: call->rxtx_buffer[ix] = skb; if (after(seq, call->rx_top)) smp_store_release(&call->rx_top, seq); + if (flags & RXRPC_LAST_PACKET) + set_bit(RXRPC_CALL_RX_LAST, &call->flags); queued = true; if (after_eq(seq, call->rx_expect_next)) { diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 1edf2cf62cc5..8b8d7e14f800 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -134,6 +134,8 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call) { _enter("%d,%s", call->debug_id, rxrpc_call_states[call->state]); + ASSERTCMP(call->rx_hard_ack, ==, call->rx_top); + if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) { rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, true, false); rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); @@ -163,8 +165,10 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call) */ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) { + struct rxrpc_skb_priv *sp; struct sk_buff *skb; rxrpc_seq_t hard_ack, top; + u8 flags; int ix; _enter("%d", call->debug_id); @@ -177,6 +181,8 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) ix = hard_ack & RXRPC_RXTX_BUFF_MASK; skb = call->rxtx_buffer[ix]; rxrpc_see_skb(skb); + sp = rxrpc_skb(skb); + flags = sp->hdr.flags; call->rxtx_buffer[ix] = NULL; call->rxtx_annotations[ix] = 0; /* Barrier against rxrpc_input_data(). */ @@ -184,8 +190,8 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) rxrpc_free_skb(skb); - _debug("%u,%u,%lx", hard_ack, top, call->flags); - if (hard_ack == top && test_bit(RXRPC_CALL_RX_LAST, &call->flags)) + _debug("%u,%u,%02x", hard_ack, top, flags); + if (flags & RXRPC_LAST_PACKET) rxrpc_end_rx_phase(call); } @@ -278,13 +284,19 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, size_t remain; bool last; unsigned int rx_pkt_offset, rx_pkt_len; - int ix, copy, ret = 0; + int ix, copy, ret = -EAGAIN, ret2; _enter(""); rx_pkt_offset = call->rx_pkt_offset; rx_pkt_len = call->rx_pkt_len; + if (call->state >= RXRPC_CALL_SERVER_ACK_REQUEST) { + seq = call->rx_hard_ack; + ret = 1; + goto done; + } + /* Barriers against rxrpc_input_data(). */ hard_ack = call->rx_hard_ack; top = smp_load_acquire(&call->rx_top); @@ -301,11 +313,13 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, sock_recv_timestamp(msg, sock->sk, skb); if (rx_pkt_offset == 0) { - ret = rxrpc_locate_data(call, skb, - &call->rxtx_annotations[ix], - &rx_pkt_offset, &rx_pkt_len); - if (ret < 0) + ret2 = rxrpc_locate_data(call, skb, + &call->rxtx_annotations[ix], + &rx_pkt_offset, &rx_pkt_len); + if (ret2 < 0) { + ret = ret2; goto out; + } } _debug("recvmsg %x DATA #%u { %d, %d }", sp->hdr.callNumber, seq, rx_pkt_offset, rx_pkt_len); @@ -316,10 +330,12 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, if (copy > remain) copy = remain; if (copy > 0) { - ret = skb_copy_datagram_iter(skb, rx_pkt_offset, iter, - copy); - if (ret < 0) + ret2 = skb_copy_datagram_iter(skb, rx_pkt_offset, iter, + copy); + if (ret2 < 0) { + ret = ret2; goto out; + } /* handle piecemeal consumption of data packets */ _debug("copied %d @%zu", copy, *_offset); @@ -332,6 +348,7 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, if (rx_pkt_len > 0) { _debug("buffer full"); ASSERTCMP(*_offset, ==, len); + ret = 0; break; } @@ -342,19 +359,19 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, rx_pkt_offset = 0; rx_pkt_len = 0; - ASSERTIFCMP(last, seq, ==, top); + if (last) { + ASSERTCMP(seq, ==, READ_ONCE(call->rx_top)); + ret = 1; + goto out; + } } - if (after(seq, top)) { - ret = -EAGAIN; - if (test_bit(RXRPC_CALL_RX_LAST, &call->flags)) - ret = 1; - } out: if (!(flags & MSG_PEEK)) { call->rx_pkt_offset = rx_pkt_offset; call->rx_pkt_len = rx_pkt_len; } +done: _leave(" = %d [%u/%u]", ret, seq, top); return ret; } From e6f3afb3fc058e17b407b6f7cac08058b19e641c Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:11 +0100 Subject: [PATCH 1095/1715] rxrpc: Record calls that need to be accepted Record calls that need to be accepted using sk_acceptq_added() otherwise the backlog counter goes negative because sk_acceptq_removed() is called. This causes the preallocator to malfunction. Calls that are preaccepted by AFS within the kernel aren't affected by this. Signed-off-by: David Howells --- net/rxrpc/call_accept.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 26c293ef98eb..323b8da50163 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -369,6 +369,8 @@ found_service: if (rx->notify_new_call) rx->notify_new_call(&rx->sk, call, call->user_call_ID); + else + sk_acceptq_added(&rx->sk); spin_lock(&conn->state_lock); switch (conn->state) { From 0360da6db7d6390e7bd2f6c93b01af29bcd36ad5 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:11 +0100 Subject: [PATCH 1096/1715] rxrpc: Purge the to_be_accepted queue on socket release Purge the queue of to_be_accepted calls on socket release. Note that purging sock_calls doesn't release the ref owned by to_be_accepted. Probably the sock_calls list is redundant given a purges of the recvmsg_q, the to_be_accepted queue and the calls tree. Signed-off-by: David Howells --- net/rxrpc/call_object.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 22f9b0d1a138..b0ffbd9664e6 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -476,6 +476,16 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) _enter("%p", rx); + while (!list_empty(&rx->to_be_accepted)) { + call = list_entry(rx->to_be_accepted.next, + struct rxrpc_call, accept_link); + list_del(&call->accept_link); + rxrpc_abort_call("SKR", call, 0, RX_CALL_DEAD, ECONNRESET); + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); + rxrpc_release_call(rx, call); + rxrpc_put_call(call, rxrpc_call_put); + } + while (!list_empty(&rx->sock_calls)) { call = list_entry(rx->sock_calls.next, struct rxrpc_call, sock_link); From 66d58af7f4af53e8318e852efa31a7cb0e31bfb6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:12 +0100 Subject: [PATCH 1097/1715] rxrpc: Fix the putting of client connections In rxrpc_put_one_client_conn(), if a connection has RXRPC_CONN_COUNTED set on it, then it's accounted for in rxrpc_nr_client_conns and may be on various lists - and this is cleaned up correctly. However, if the connection doesn't have RXRPC_CONN_COUNTED set on it, then the put routine returns rather than just skipping the extra bit of cleanup. Fix this by making the extra bit of clean up conditional instead and always killing off the connection. This manifests itself as connections with a zero usage count hanging around in /proc/net/rxrpc_conns because the connection allocated, but discarded, due to a race with another process that set up a parallel connection, which was then shared instead. Signed-off-by: David Howells --- net/rxrpc/conn_client.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 9344a8416ceb..5a675c43cace 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -818,7 +818,7 @@ idle_connection: static struct rxrpc_connection * rxrpc_put_one_client_conn(struct rxrpc_connection *conn) { - struct rxrpc_connection *next; + struct rxrpc_connection *next = NULL; struct rxrpc_local *local = conn->params.local; unsigned int nr_conns; @@ -834,24 +834,22 @@ rxrpc_put_one_client_conn(struct rxrpc_connection *conn) ASSERTCMP(conn->cache_state, ==, RXRPC_CONN_CLIENT_INACTIVE); - if (!test_bit(RXRPC_CONN_COUNTED, &conn->flags)) - return NULL; + if (test_bit(RXRPC_CONN_COUNTED, &conn->flags)) { + spin_lock(&rxrpc_client_conn_cache_lock); + nr_conns = --rxrpc_nr_client_conns; - spin_lock(&rxrpc_client_conn_cache_lock); - nr_conns = --rxrpc_nr_client_conns; + if (nr_conns < rxrpc_max_client_connections && + !list_empty(&rxrpc_waiting_client_conns)) { + next = list_entry(rxrpc_waiting_client_conns.next, + struct rxrpc_connection, cache_link); + rxrpc_get_connection(next); + rxrpc_activate_conn(next); + } - next = NULL; - if (nr_conns < rxrpc_max_client_connections && - !list_empty(&rxrpc_waiting_client_conns)) { - next = list_entry(rxrpc_waiting_client_conns.next, - struct rxrpc_connection, cache_link); - rxrpc_get_connection(next); - rxrpc_activate_conn(next); + spin_unlock(&rxrpc_client_conn_cache_lock); } - spin_unlock(&rxrpc_client_conn_cache_lock); rxrpc_kill_connection(conn); - if (next) rxrpc_activate_channels(next); From 357f5ef64628c2d6c532e7a6bfc0bc3830b4c221 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:12 +0100 Subject: [PATCH 1098/1715] rxrpc: Call rxrpc_release_call() on error in rxrpc_new_client_call() Call rxrpc_release_call() on getting an error in rxrpc_new_client_call() rather than trying to do the cleanup ourselves. This isn't a problem, provided we set RXRPC_CALL_HAS_USERID only if we actually add the call to the calls tree as cleanup code fragments that would otherwise cause problems are conditional. Without this, we miss some of the cleanup. Signed-off-by: David Howells --- net/rxrpc/call_object.c | 36 ++++++++++++------------------------ 1 file changed, 12 insertions(+), 24 deletions(-) diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index b0ffbd9664e6..23f5a5f58282 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -226,9 +226,6 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, (const void *)user_call_ID); /* Publish the call, even though it is incompletely set up as yet */ - call->user_call_ID = user_call_ID; - __set_bit(RXRPC_CALL_HAS_USERID, &call->flags); - write_lock(&rx->call_lock); pp = &rx->calls.rb_node; @@ -242,10 +239,12 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, else if (user_call_ID > xcall->user_call_ID) pp = &(*pp)->rb_right; else - goto found_user_ID_now_present; + goto error_dup_user_ID; } rcu_assign_pointer(call->socket, rx); + call->user_call_ID = user_call_ID; + __set_bit(RXRPC_CALL_HAS_USERID, &call->flags); rxrpc_get_call(call, rxrpc_call_got_userid); rb_link_node(&call->sock_node, parent, pp); rb_insert_color(&call->sock_node, &rx->calls); @@ -276,33 +275,22 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, _leave(" = %p [new]", call); return call; -error: - write_lock(&rx->call_lock); - rb_erase(&call->sock_node, &rx->calls); - write_unlock(&rx->call_lock); - rxrpc_put_call(call, rxrpc_call_put_userid); - - write_lock(&rxrpc_call_lock); - list_del_init(&call->link); - write_unlock(&rxrpc_call_lock); - -error_out: - __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, - RX_CALL_DEAD, ret); - set_bit(RXRPC_CALL_RELEASED, &call->flags); - rxrpc_put_call(call, rxrpc_call_put); - _leave(" = %d", ret); - return ERR_PTR(ret); - /* We unexpectedly found the user ID in the list after taking * the call_lock. This shouldn't happen unless the user races * with itself and tries to add the same user ID twice at the * same time in different threads. */ -found_user_ID_now_present: +error_dup_user_ID: write_unlock(&rx->call_lock); ret = -EEXIST; - goto error_out; + +error: + __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, + RX_CALL_DEAD, ret); + rxrpc_release_call(rx, call); + rxrpc_put_call(call, rxrpc_call_put); + _leave(" = %d", ret); + return ERR_PTR(ret); } /* From 78883793f8ac4bb3f97d48db7a8c71d8476bcf98 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:12 +0100 Subject: [PATCH 1099/1715] rxrpc: Fix unexposed client conn release If the last call on a client connection is release after the connection has had a bunch of calls allocated but before any DATA packets are sent (so that it's not yet marked RXRPC_CONN_EXPOSED), an assertion will happen in rxrpc_disconnect_client_call(). af_rxrpc: Assertion failed - 1(0x1) >= 2(0x2) is false ------------[ cut here ]------------ kernel BUG at ../net/rxrpc/conn_client.c:753! This is because it's expecting the conn to have been exposed and to have 2 or more refs - but this isn't necessarily the case. Simply remove the assertion. This allows the conn to be moved into the inactive state and deleted if it isn't resurrected before the final put is called. Signed-off-by: David Howells --- net/rxrpc/conn_client.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 5a675c43cace..226bc910e556 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -721,7 +721,6 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call) } ASSERTCMP(rcu_access_pointer(chan->call), ==, call); - ASSERTCMP(atomic_read(&conn->usage), >=, 2); /* If a client call was exposed to the world, we save the result for * retransmission. From d01dc4c3c1209e865368d5f8d3b5e08f97326ca9 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:12 +0100 Subject: [PATCH 1100/1715] rxrpc: Fix the parsing of soft-ACKs The soft-ACK parser doesn't increment the pointer into the soft-ACK list, resulting in the first ACK/NACK value being applied to all the relevant packets in the Tx queue. This has the potential to miss retransmissions and cause excessive retransmissions. Fix this by incrementing the pointer. Signed-off-by: David Howells --- net/rxrpc/input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index f0d9115b9b7e..c1f83d22f9b7 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -384,7 +384,7 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks, for (; nr_acks > 0; nr_acks--, seq++) { ix = seq & RXRPC_RXTX_BUFF_MASK; - switch (*acks) { + switch (*acks++) { case RXRPC_ACK_TYPE_ACK: call->rxtx_annotations[ix] = RXRPC_TX_ANNO_ACK; break; From dfa7d9204054b091949d87270e55e0fd5800c3ae Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:12 +0100 Subject: [PATCH 1101/1715] rxrpc: Fix retransmission algorithm Make the retransmission algorithm use for-loops instead of do-loops and move the counter increments into the for-statement increment slots. Though the do-loops are slighly more efficient since there will be at least one pass through the each loop, the counter increments are harder to get right as the continue-statements skip them. Without this, if there are any positive acks within the loop, the do-loop will cycle forever because the counter increment is never done. Signed-off-by: David Howells --- net/rxrpc/call_event.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 9367c3be31eb..f0cabc48a1b7 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -163,8 +163,7 @@ static void rxrpc_resend(struct rxrpc_call *call) */ now = jiffies; resend_at = now + rxrpc_resend_timeout; - seq = cursor + 1; - do { + for (seq = cursor + 1; before_eq(seq, top); seq++) { ix = seq & RXRPC_RXTX_BUFF_MASK; annotation = call->rxtx_annotations[ix]; if (annotation == RXRPC_TX_ANNO_ACK) @@ -184,8 +183,7 @@ static void rxrpc_resend(struct rxrpc_call *call) /* Okay, we need to retransmit a packet. */ call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS; - seq++; - } while (before_eq(seq, top)); + } call->resend_at = resend_at; @@ -194,8 +192,7 @@ static void rxrpc_resend(struct rxrpc_call *call) * lock is dropped, it may clear some of the retransmission markers for * packets that it soft-ACKs. */ - seq = cursor + 1; - do { + for (seq = cursor + 1; before_eq(seq, top); seq++) { ix = seq & RXRPC_RXTX_BUFF_MASK; annotation = call->rxtx_annotations[ix]; if (annotation != RXRPC_TX_ANNO_RETRANS) @@ -237,8 +234,7 @@ static void rxrpc_resend(struct rxrpc_call *call) if (after(call->tx_hard_ack, seq)) seq = call->tx_hard_ack; - seq++; - } while (before_eq(seq, top)); + } out_unlock: spin_unlock_bh(&call->lock); From 27d0fc431c6b4847231c1490fa541bc3f5a7a351 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:13 +0100 Subject: [PATCH 1102/1715] rxrpc: Don't transmit an ACK if there's no reason set Don't transmit an ACK if call->ackr_reason in unset. There's the possibility of a race between recvmsg() sending an ACK and the background processing thread trying to send the same one. Signed-off-by: David Howells --- net/rxrpc/output.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 06a9aca739d1..aa0507214b31 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -137,6 +137,11 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) switch (type) { case RXRPC_PACKET_TYPE_ACK: spin_lock_bh(&call->lock); + if (!call->ackr_reason) { + spin_unlock_bh(&call->lock); + ret = 0; + goto out; + } n = rxrpc_fill_out_ack(call, pkt); call->ackr_reason = 0; From 2311e327cda015a24a201efc7655a9a983679e55 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:12 +0100 Subject: [PATCH 1103/1715] rxrpc: Be consistent about switch value in rxrpc_send_call_packet() rxrpc_send_call_packet() should use type in both its switch-statements rather than using pkt->whdr.type. This might give the compiler an easier job of uninitialised variable checking. Signed-off-by: David Howells --- net/rxrpc/output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index aa0507214b31..0b21ed859de7 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -182,7 +182,7 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) &msg, iov, ioc, len); if (ret < 0 && call->state < RXRPC_CALL_COMPLETE) { - switch (pkt->whdr.type) { + switch (type) { case RXRPC_PACKET_TYPE_ACK: rxrpc_propose_ACK(call, pkt->ack.reason, ntohs(pkt->ack.maxSkew), From 182f50562490e5861afaa7a2e42dcc0dd9dcfcca Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:12 +0100 Subject: [PATCH 1104/1715] rxrpc: Fix the basic transmit DATA packet content size at 1412 bytes Fix the basic transmit DATA packet content size at 1412 bytes so that they can be arbitrarily assembled into jumbo packets. In the future, I'm thinking of moving to keeping a jumbo packet header at the beginning of each packet in the Tx queue and creating the packet header on the spot when kernel_sendmsg() is invoked. That way, jumbo packets can be assembled on the spur of the moment for (re-)transmission. Signed-off-by: David Howells --- net/rxrpc/sendmsg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index cba236575073..8bfddf4e338c 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -214,7 +214,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, goto maybe_error; } - max = call->conn->params.peer->maxdata; + max = RXRPC_JUMBO_DATALEN; max -= call->conn->security_size; max &= ~(call->conn->size_align - 1UL); From a3868bfc8d5b0f36c784deab644ee1d2b0e6974b Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:13 +0100 Subject: [PATCH 1105/1715] rxrpc: Print the packet type name in the Rx packet trace Print a symbolic packet type name for each valid received packet in the trace output, not just a number. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 5 +++-- net/rxrpc/ar-internal.h | 6 +++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index ea3b10ed91a8..0a30c673509c 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -93,11 +93,12 @@ TRACE_EVENT(rxrpc_rx_packet, memcpy(&__entry->hdr, &sp->hdr, sizeof(__entry->hdr)); ), - TP_printk("%08x:%08x:%08x:%04x %08x %08x %02x %02x", + TP_printk("%08x:%08x:%08x:%04x %08x %08x %02x %02x %s", __entry->hdr.epoch, __entry->hdr.cid, __entry->hdr.callNumber, __entry->hdr.serviceId, __entry->hdr.serial, __entry->hdr.seq, - __entry->hdr.type, __entry->hdr.flags) + __entry->hdr.type, __entry->hdr.flags, + __entry->hdr.type <= 15 ? rxrpc_pkts[__entry->hdr.type] : "?UNK") ); TRACE_EVENT(rxrpc_rx_done, diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index e78c40b37db5..0f6fafa2c271 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -551,6 +551,9 @@ enum rxrpc_call_trace { extern const char rxrpc_call_traces[rxrpc_call__nr_trace][4]; +extern const char *const rxrpc_pkts[]; +extern const char *rxrpc_acks(u8 reason); + #include /* @@ -851,11 +854,8 @@ extern unsigned int rxrpc_rx_mtu; extern unsigned int rxrpc_rx_jumbo_max; extern unsigned int rxrpc_resend_timeout; -extern const char *const rxrpc_pkts[]; extern const s8 rxrpc_ack_priority[]; -extern const char *rxrpc_acks(u8 reason); - /* * output.c */ From a84a46d73050f70fd8820c74840e2815c78d8690 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:14 +0100 Subject: [PATCH 1106/1715] rxrpc: Add some additional call tracing Add additional call tracepoint points for noting call-connected, call-released and connection-failed events. Also fix one tracepoint that was using an integer instead of the corresponding enum value as the point type. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 3 +++ net/rxrpc/call_object.c | 18 ++++++++++++++---- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 0f6fafa2c271..4a73c20d9436 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -539,6 +539,8 @@ enum rxrpc_call_trace { rxrpc_call_queued, rxrpc_call_queued_ref, rxrpc_call_seen, + rxrpc_call_connected, + rxrpc_call_release, rxrpc_call_got, rxrpc_call_got_userid, rxrpc_call_got_kernel, @@ -546,6 +548,7 @@ enum rxrpc_call_trace { rxrpc_call_put_userid, rxrpc_call_put_kernel, rxrpc_call_put_noqueue, + rxrpc_call_error, rxrpc_call__nr_trace }; diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 23f5a5f58282..0df9d1af8edb 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -53,6 +53,8 @@ const char rxrpc_call_traces[rxrpc_call__nr_trace][4] = { [rxrpc_call_new_service] = "NWs", [rxrpc_call_queued] = "QUE", [rxrpc_call_queued_ref] = "QUR", + [rxrpc_call_connected] = "CON", + [rxrpc_call_release] = "RLS", [rxrpc_call_seen] = "SEE", [rxrpc_call_got] = "GOT", [rxrpc_call_got_userid] = "Gus", @@ -61,6 +63,7 @@ const char rxrpc_call_traces[rxrpc_call__nr_trace][4] = { [rxrpc_call_put_userid] = "Pus", [rxrpc_call_put_kernel] = "Pke", [rxrpc_call_put_noqueue] = "PNQ", + [rxrpc_call_error] = "*E*", }; struct kmem_cache *rxrpc_call_jar; @@ -222,8 +225,8 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, return call; } - trace_rxrpc_call(call, 0, atomic_read(&call->usage), here, - (const void *)user_call_ID); + trace_rxrpc_call(call, rxrpc_call_new_client, atomic_read(&call->usage), + here, (const void *)user_call_ID); /* Publish the call, even though it is incompletely set up as yet */ write_lock(&rx->call_lock); @@ -263,6 +266,9 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, if (ret < 0) goto error; + trace_rxrpc_call(call, rxrpc_call_connected, atomic_read(&call->usage), + here, ERR_PTR(ret)); + spin_lock_bh(&call->conn->params.peer->lock); hlist_add_head(&call->error_link, &call->conn->params.peer->error_targets); @@ -287,6 +293,8 @@ error_dup_user_ID: error: __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, RX_CALL_DEAD, ret); + trace_rxrpc_call(call, rxrpc_call_error, atomic_read(&call->usage), + here, ERR_PTR(ret)); rxrpc_release_call(rx, call); rxrpc_put_call(call, rxrpc_call_put); _leave(" = %d", ret); @@ -396,15 +404,17 @@ void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op) */ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) { + const void *here = __builtin_return_address(0); struct rxrpc_connection *conn = call->conn; bool put = false; int i; _enter("{%d,%d}", call->debug_id, atomic_read(&call->usage)); - ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); + trace_rxrpc_call(call, rxrpc_call_release, atomic_read(&call->usage), + here, (const void *)call->flags); - rxrpc_see_call(call); + ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); spin_lock_bh(&call->lock); if (test_and_set_bit(RXRPC_CALL_RELEASED, &call->flags)) From 363deeab6d0f308d33d011323661ae9cf5f9f8d6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:14 +0100 Subject: [PATCH 1107/1715] rxrpc: Add connection tracepoint and client conn state tracepoint Add a pair of tracepoints, one to track rxrpc_connection struct ref counting and the other to track the client connection cache state. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 60 +++++++++++++++++++++++++++ net/rxrpc/ar-internal.h | 76 +++++++++++++++++++++------------- net/rxrpc/call_accept.c | 4 ++ net/rxrpc/call_object.c | 2 - net/rxrpc/conn_client.c | 80 +++++++++++++++++++++++++----------- net/rxrpc/conn_event.c | 2 +- net/rxrpc/conn_object.c | 72 ++++++++++++++++++++++++++++++-- net/rxrpc/conn_service.c | 4 ++ net/rxrpc/misc.c | 31 ++++++++++++++ 9 files changed, 273 insertions(+), 58 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 0a30c673509c..c0c496c83f31 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -16,6 +16,66 @@ #include +TRACE_EVENT(rxrpc_conn, + TP_PROTO(struct rxrpc_connection *conn, enum rxrpc_conn_trace op, + int usage, const void *where), + + TP_ARGS(conn, op, usage, where), + + TP_STRUCT__entry( + __field(struct rxrpc_connection *, conn ) + __field(int, op ) + __field(int, usage ) + __field(const void *, where ) + ), + + TP_fast_assign( + __entry->conn = conn; + __entry->op = op; + __entry->usage = usage; + __entry->where = where; + ), + + TP_printk("C=%p %s u=%d sp=%pSR", + __entry->conn, + rxrpc_conn_traces[__entry->op], + __entry->usage, + __entry->where) + ); + +TRACE_EVENT(rxrpc_client, + TP_PROTO(struct rxrpc_connection *conn, int channel, + enum rxrpc_client_trace op), + + TP_ARGS(conn, channel, op), + + TP_STRUCT__entry( + __field(struct rxrpc_connection *, conn ) + __field(u32, cid ) + __field(int, channel ) + __field(int, usage ) + __field(enum rxrpc_client_trace, op ) + __field(enum rxrpc_conn_cache_state, cs ) + ), + + TP_fast_assign( + __entry->conn = conn; + __entry->channel = channel; + __entry->usage = atomic_read(&conn->usage); + __entry->op = op; + __entry->cid = conn->proto.cid; + __entry->cs = conn->cache_state; + ), + + TP_printk("C=%p h=%2d %s %s i=%08x u=%d", + __entry->conn, + __entry->channel, + rxrpc_client_traces[__entry->op], + rxrpc_conn_cache_states[__entry->cs], + __entry->cid, + __entry->usage) + ); + TRACE_EVENT(rxrpc_call, TP_PROTO(struct rxrpc_call *call, enum rxrpc_call_trace op, int usage, const void *where, const void *aux), diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 4a73c20d9436..6ca40eea3022 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -314,6 +314,7 @@ enum rxrpc_conn_cache_state { RXRPC_CONN_CLIENT_ACTIVE, /* Conn is on active list, doing calls */ RXRPC_CONN_CLIENT_CULLED, /* Conn is culled and delisted, doing calls */ RXRPC_CONN_CLIENT_IDLE, /* Conn is on idle list, doing mostly nothing */ + RXRPC_CONN__NR_CACHE_STATES }; /* @@ -533,6 +534,44 @@ struct rxrpc_call { rxrpc_serial_t acks_latest; /* serial number of latest ACK received */ }; +enum rxrpc_conn_trace { + rxrpc_conn_new_client, + rxrpc_conn_new_service, + rxrpc_conn_queued, + rxrpc_conn_seen, + rxrpc_conn_got, + rxrpc_conn_put_client, + rxrpc_conn_put_service, + rxrpc_conn__nr_trace +}; + +extern const char rxrpc_conn_traces[rxrpc_conn__nr_trace][4]; + +enum rxrpc_client_trace { + rxrpc_client_activate_chans, + rxrpc_client_alloc, + rxrpc_client_chan_activate, + rxrpc_client_chan_disconnect, + rxrpc_client_chan_pass, + rxrpc_client_chan_unstarted, + rxrpc_client_cleanup, + rxrpc_client_count, + rxrpc_client_discard, + rxrpc_client_duplicate, + rxrpc_client_exposed, + rxrpc_client_replace, + rxrpc_client_to_active, + rxrpc_client_to_culled, + rxrpc_client_to_idle, + rxrpc_client_to_inactive, + rxrpc_client_to_waiting, + rxrpc_client_uncount, + rxrpc_client__nr_trace +}; + +extern const char rxrpc_client_traces[rxrpc_client__nr_trace][7]; +extern const char rxrpc_conn_cache_states[RXRPC_CONN__NR_CACHE_STATES][5]; + enum rxrpc_call_trace { rxrpc_call_new_client, rxrpc_call_new_service, @@ -734,7 +773,11 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *, void __rxrpc_disconnect_call(struct rxrpc_connection *, struct rxrpc_call *); void rxrpc_disconnect_call(struct rxrpc_call *); void rxrpc_kill_connection(struct rxrpc_connection *); -void __rxrpc_put_connection(struct rxrpc_connection *); +bool rxrpc_queue_conn(struct rxrpc_connection *); +void rxrpc_see_connection(struct rxrpc_connection *); +void rxrpc_get_connection(struct rxrpc_connection *); +struct rxrpc_connection *rxrpc_get_connection_maybe(struct rxrpc_connection *); +void rxrpc_put_service_conn(struct rxrpc_connection *); void __exit rxrpc_destroy_all_connections(void); static inline bool rxrpc_conn_is_client(const struct rxrpc_connection *conn) @@ -747,38 +790,15 @@ static inline bool rxrpc_conn_is_service(const struct rxrpc_connection *conn) return !rxrpc_conn_is_client(conn); } -static inline void rxrpc_get_connection(struct rxrpc_connection *conn) -{ - atomic_inc(&conn->usage); -} - -static inline -struct rxrpc_connection *rxrpc_get_connection_maybe(struct rxrpc_connection *conn) -{ - return atomic_inc_not_zero(&conn->usage) ? conn : NULL; -} - static inline void rxrpc_put_connection(struct rxrpc_connection *conn) { if (!conn) return; - if (rxrpc_conn_is_client(conn)) { - if (atomic_dec_and_test(&conn->usage)) - rxrpc_put_client_conn(conn); - } else { - if (atomic_dec_return(&conn->usage) == 1) - __rxrpc_put_connection(conn); - } -} - -static inline bool rxrpc_queue_conn(struct rxrpc_connection *conn) -{ - if (!rxrpc_get_connection_maybe(conn)) - return false; - if (!rxrpc_queue_work(&conn->processor)) - rxrpc_put_connection(conn); - return true; + if (rxrpc_conn_is_client(conn)) + rxrpc_put_client_conn(conn); + else + rxrpc_put_service_conn(conn); } /* diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 323b8da50163..3e474508ba75 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -85,6 +85,9 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, b->conn_backlog[head] = conn; smp_store_release(&b->conn_backlog_head, (head + 1) & (size - 1)); + + trace_rxrpc_conn(conn, rxrpc_conn_new_service, + atomic_read(&conn->usage), here); } /* Now it gets complicated, because calls get registered with the @@ -290,6 +293,7 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, rxrpc_get_local(local); conn->params.local = local; conn->params.peer = peer; + rxrpc_see_connection(conn); rxrpc_new_incoming_connection(conn, skb); } else { rxrpc_get_connection(conn); diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 0df9d1af8edb..54f30482a7fd 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -479,8 +479,6 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) struct rxrpc_call, accept_link); list_del(&call->accept_link); rxrpc_abort_call("SKR", call, 0, RX_CALL_DEAD, ECONNRESET); - rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); - rxrpc_release_call(rx, call); rxrpc_put_call(call, rxrpc_call_put); } diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 226bc910e556..c76a125df891 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -105,6 +105,14 @@ static void rxrpc_discard_expired_client_conns(struct work_struct *); static DECLARE_DELAYED_WORK(rxrpc_client_conn_reap, rxrpc_discard_expired_client_conns); +const char rxrpc_conn_cache_states[RXRPC_CONN__NR_CACHE_STATES][5] = { + [RXRPC_CONN_CLIENT_INACTIVE] = "Inac", + [RXRPC_CONN_CLIENT_WAITING] = "Wait", + [RXRPC_CONN_CLIENT_ACTIVE] = "Actv", + [RXRPC_CONN_CLIENT_CULLED] = "Cull", + [RXRPC_CONN_CLIENT_IDLE] = "Idle", +}; + /* * Get a connection ID and epoch for a client connection from the global pool. * The connection struct pointer is then recorded in the idr radix tree. The @@ -220,6 +228,9 @@ rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp) rxrpc_get_local(conn->params.local); key_get(conn->params.key); + trace_rxrpc_conn(conn, rxrpc_conn_new_client, atomic_read(&conn->usage), + __builtin_return_address(0)); + trace_rxrpc_client(conn, -1, rxrpc_client_alloc); _leave(" = %p", conn); return conn; @@ -385,6 +396,7 @@ static int rxrpc_get_client_conn(struct rxrpc_call *call, rb_replace_node(&conn->client_node, &candidate->client_node, &local->client_conns); + trace_rxrpc_client(conn, -1, rxrpc_client_replace); goto candidate_published; } } @@ -409,8 +421,11 @@ found_extant_conn: _debug("found conn"); spin_unlock(&local->client_conns_lock); - rxrpc_put_connection(candidate); - candidate = NULL; + if (candidate) { + trace_rxrpc_client(candidate, -1, rxrpc_client_duplicate); + rxrpc_put_connection(candidate); + candidate = NULL; + } spin_lock(&conn->channel_lock); call->conn = conn; @@ -433,6 +448,7 @@ error: */ static void rxrpc_activate_conn(struct rxrpc_connection *conn) { + trace_rxrpc_client(conn, -1, rxrpc_client_to_active); conn->cache_state = RXRPC_CONN_CLIENT_ACTIVE; rxrpc_nr_active_client_conns++; list_move_tail(&conn->cache_link, &rxrpc_active_client_conns); @@ -462,8 +478,10 @@ static void rxrpc_animate_client_conn(struct rxrpc_connection *conn) spin_lock(&rxrpc_client_conn_cache_lock); nr_conns = rxrpc_nr_client_conns; - if (!test_and_set_bit(RXRPC_CONN_COUNTED, &conn->flags)) + if (!test_and_set_bit(RXRPC_CONN_COUNTED, &conn->flags)) { + trace_rxrpc_client(conn, -1, rxrpc_client_count); rxrpc_nr_client_conns = nr_conns + 1; + } switch (conn->cache_state) { case RXRPC_CONN_CLIENT_ACTIVE: @@ -494,6 +512,7 @@ activate_conn: wait_for_capacity: _debug("wait"); + trace_rxrpc_client(conn, -1, rxrpc_client_to_waiting); conn->cache_state = RXRPC_CONN_CLIENT_WAITING; list_move_tail(&conn->cache_link, &rxrpc_waiting_client_conns); goto out_unlock; @@ -524,6 +543,8 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn, struct rxrpc_call, chan_wait_link); u32 call_id = chan->call_counter + 1; + trace_rxrpc_client(conn, channel, rxrpc_client_chan_activate); + write_lock_bh(&call->state_lock); call->state = RXRPC_CALL_CLIENT_SEND_REQUEST; write_unlock_bh(&call->state_lock); @@ -563,6 +584,8 @@ static void rxrpc_activate_channels(struct rxrpc_connection *conn) _enter("%d", conn->debug_id); + trace_rxrpc_client(conn, -1, rxrpc_client_activate_chans); + if (conn->cache_state != RXRPC_CONN_CLIENT_ACTIVE || conn->active_chans == RXRPC_ACTIVE_CHANS_MASK) return; @@ -657,10 +680,13 @@ int rxrpc_connect_call(struct rxrpc_call *call, * had a chance at re-use (the per-connection security negotiation is * expensive). */ -static void rxrpc_expose_client_conn(struct rxrpc_connection *conn) +static void rxrpc_expose_client_conn(struct rxrpc_connection *conn, + unsigned int channel) { - if (!test_and_set_bit(RXRPC_CONN_EXPOSED, &conn->flags)) + if (!test_and_set_bit(RXRPC_CONN_EXPOSED, &conn->flags)) { + trace_rxrpc_client(conn, channel, rxrpc_client_exposed); rxrpc_get_connection(conn); + } } /* @@ -669,9 +695,9 @@ static void rxrpc_expose_client_conn(struct rxrpc_connection *conn) */ void rxrpc_expose_client_call(struct rxrpc_call *call) { + unsigned int channel = call->cid & RXRPC_CHANNELMASK; struct rxrpc_connection *conn = call->conn; - struct rxrpc_channel *chan = - &conn->channels[call->cid & RXRPC_CHANNELMASK]; + struct rxrpc_channel *chan = &conn->channels[channel]; if (!test_and_set_bit(RXRPC_CALL_EXPOSED, &call->flags)) { /* Mark the call ID as being used. If the callNumber counter @@ -682,7 +708,7 @@ void rxrpc_expose_client_call(struct rxrpc_call *call) chan->call_counter++; if (chan->call_counter >= INT_MAX) set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); - rxrpc_expose_client_conn(conn); + rxrpc_expose_client_conn(conn, channel); } } @@ -695,6 +721,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call) struct rxrpc_connection *conn = call->conn; struct rxrpc_channel *chan = &conn->channels[channel]; + trace_rxrpc_client(conn, channel, rxrpc_client_chan_disconnect); call->conn = NULL; spin_lock(&conn->channel_lock); @@ -709,6 +736,8 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call) ASSERT(!test_bit(RXRPC_CALL_EXPOSED, &call->flags)); list_del_init(&call->chan_wait_link); + trace_rxrpc_client(conn, channel, rxrpc_client_chan_unstarted); + /* We must deactivate or idle the connection if it's now * waiting for nothing. */ @@ -739,7 +768,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call) /* See if we can pass the channel directly to another call. */ if (conn->cache_state == RXRPC_CONN_CLIENT_ACTIVE && !list_empty(&conn->waiting_calls)) { - _debug("pass chan"); + trace_rxrpc_client(conn, channel, rxrpc_client_chan_pass); rxrpc_activate_one_channel(conn, channel); goto out_2; } @@ -762,7 +791,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call) goto out; } - _debug("pass chan 2"); + trace_rxrpc_client(conn, channel, rxrpc_client_chan_pass); rxrpc_activate_one_channel(conn, channel); goto out; @@ -794,7 +823,7 @@ idle_connection: * immediately or moved to the idle list for a short while. */ if (test_bit(RXRPC_CONN_EXPOSED, &conn->flags)) { - _debug("make idle"); + trace_rxrpc_client(conn, channel, rxrpc_client_to_idle); conn->idle_timestamp = jiffies; conn->cache_state = RXRPC_CONN_CLIENT_IDLE; list_move_tail(&conn->cache_link, &rxrpc_idle_client_conns); @@ -804,7 +833,7 @@ idle_connection: &rxrpc_client_conn_reap, rxrpc_conn_idle_client_expiry); } else { - _debug("make inactive"); + trace_rxrpc_client(conn, channel, rxrpc_client_to_inactive); conn->cache_state = RXRPC_CONN_CLIENT_INACTIVE; list_del_init(&conn->cache_link); } @@ -821,6 +850,8 @@ rxrpc_put_one_client_conn(struct rxrpc_connection *conn) struct rxrpc_local *local = conn->params.local; unsigned int nr_conns; + trace_rxrpc_client(conn, -1, rxrpc_client_cleanup); + if (test_bit(RXRPC_CONN_IN_CLIENT_CONNS, &conn->flags)) { spin_lock(&local->client_conns_lock); if (test_and_clear_bit(RXRPC_CONN_IN_CLIENT_CONNS, @@ -834,6 +865,7 @@ rxrpc_put_one_client_conn(struct rxrpc_connection *conn) ASSERTCMP(conn->cache_state, ==, RXRPC_CONN_CLIENT_INACTIVE); if (test_bit(RXRPC_CONN_COUNTED, &conn->flags)) { + trace_rxrpc_client(conn, -1, rxrpc_client_uncount); spin_lock(&rxrpc_client_conn_cache_lock); nr_conns = --rxrpc_nr_client_conns; @@ -863,20 +895,18 @@ rxrpc_put_one_client_conn(struct rxrpc_connection *conn) */ void rxrpc_put_client_conn(struct rxrpc_connection *conn) { - struct rxrpc_connection *next; + const void *here = __builtin_return_address(0); + int n; do { - _enter("%p{u=%d,d=%d}", - conn, atomic_read(&conn->usage), conn->debug_id); + n = atomic_dec_return(&conn->usage); + trace_rxrpc_conn(conn, rxrpc_conn_put_client, n, here); + if (n > 0) + return; + ASSERTCMP(n, >=, 0); - next = rxrpc_put_one_client_conn(conn); - - if (!next) - break; - conn = next; - } while (atomic_dec_and_test(&conn->usage)); - - _leave(""); + conn = rxrpc_put_one_client_conn(conn); + } while (conn); } /* @@ -907,9 +937,11 @@ static void rxrpc_cull_active_client_conns(void) ASSERTCMP(conn->cache_state, ==, RXRPC_CONN_CLIENT_ACTIVE); if (list_empty(&conn->waiting_calls)) { + trace_rxrpc_client(conn, -1, rxrpc_client_to_culled); conn->cache_state = RXRPC_CONN_CLIENT_CULLED; list_del_init(&conn->cache_link); } else { + trace_rxrpc_client(conn, -1, rxrpc_client_to_waiting); conn->cache_state = RXRPC_CONN_CLIENT_WAITING; list_move_tail(&conn->cache_link, &rxrpc_waiting_client_conns); @@ -983,7 +1015,7 @@ next: goto not_yet_expired; } - _debug("discard conn %d", conn->debug_id); + trace_rxrpc_client(conn, -1, rxrpc_client_discard); if (!test_and_clear_bit(RXRPC_CONN_EXPOSED, &conn->flags)) BUG(); conn->cache_state = RXRPC_CONN_CLIENT_INACTIVE; diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 0691007cfc02..a43f4c94a88d 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -377,7 +377,7 @@ void rxrpc_process_connection(struct work_struct *work) u32 abort_code = RX_PROTOCOL_ERROR; int ret; - _enter("{%d}", conn->debug_id); + rxrpc_see_connection(conn); if (test_and_clear_bit(RXRPC_CONN_EV_CHALLENGE, &conn->events)) rxrpc_secure_connection(conn); diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index bb1f29280aea..3b55aee0c436 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -246,11 +246,77 @@ void rxrpc_kill_connection(struct rxrpc_connection *conn) } /* - * release a virtual connection + * Queue a connection's work processor, getting a ref to pass to the work + * queue. */ -void __rxrpc_put_connection(struct rxrpc_connection *conn) +bool rxrpc_queue_conn(struct rxrpc_connection *conn) { - rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0); + const void *here = __builtin_return_address(0); + int n = __atomic_add_unless(&conn->usage, 1, 0); + if (n == 0) + return false; + if (rxrpc_queue_work(&conn->processor)) + trace_rxrpc_conn(conn, rxrpc_conn_queued, n + 1, here); + else + rxrpc_put_connection(conn); + return true; +} + +/* + * Note the re-emergence of a connection. + */ +void rxrpc_see_connection(struct rxrpc_connection *conn) +{ + const void *here = __builtin_return_address(0); + if (conn) { + int n = atomic_read(&conn->usage); + + trace_rxrpc_conn(conn, rxrpc_conn_seen, n, here); + } +} + +/* + * Get a ref on a connection. + */ +void rxrpc_get_connection(struct rxrpc_connection *conn) +{ + const void *here = __builtin_return_address(0); + int n = atomic_inc_return(&conn->usage); + + trace_rxrpc_conn(conn, rxrpc_conn_got, n, here); +} + +/* + * Try to get a ref on a connection. + */ +struct rxrpc_connection * +rxrpc_get_connection_maybe(struct rxrpc_connection *conn) +{ + const void *here = __builtin_return_address(0); + + if (conn) { + int n = __atomic_add_unless(&conn->usage, 1, 0); + if (n > 0) + trace_rxrpc_conn(conn, rxrpc_conn_got, n + 1, here); + else + conn = NULL; + } + return conn; +} + +/* + * Release a service connection + */ +void rxrpc_put_service_conn(struct rxrpc_connection *conn) +{ + const void *here = __builtin_return_address(0); + int n; + + n = atomic_dec_return(&conn->usage); + trace_rxrpc_conn(conn, rxrpc_conn_put_service, n, here); + ASSERTCMP(n, >=, 0); + if (n == 0) + rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0); } /* diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c index 83d54da4ce8b..eef551f40dc2 100644 --- a/net/rxrpc/conn_service.c +++ b/net/rxrpc/conn_service.c @@ -136,6 +136,10 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(gfp_t gfp) list_add_tail(&conn->link, &rxrpc_connections); list_add_tail(&conn->proc_link, &rxrpc_connection_proc_list); write_unlock(&rxrpc_connection_lock); + + trace_rxrpc_conn(conn, rxrpc_conn_new_service, + atomic_read(&conn->usage), + __builtin_return_address(0)); } return conn; diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 8b910780f1ac..598064d3bdd2 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -101,3 +101,34 @@ const char *rxrpc_acks(u8 reason) reason = ARRAY_SIZE(str) - 1; return str[reason]; } + +const char rxrpc_conn_traces[rxrpc_conn__nr_trace][4] = { + [rxrpc_conn_new_client] = "NWc", + [rxrpc_conn_new_service] = "NWs", + [rxrpc_conn_queued] = "QUE", + [rxrpc_conn_seen] = "SEE", + [rxrpc_conn_got] = "GOT", + [rxrpc_conn_put_client] = "PTc", + [rxrpc_conn_put_service] = "PTs", +}; + +const char rxrpc_client_traces[rxrpc_client__nr_trace][7] = { + [rxrpc_client_activate_chans] = "Activa", + [rxrpc_client_alloc] = "Alloc ", + [rxrpc_client_chan_activate] = "ChActv", + [rxrpc_client_chan_disconnect] = "ChDisc", + [rxrpc_client_chan_pass] = "ChPass", + [rxrpc_client_chan_unstarted] = "ChUnst", + [rxrpc_client_cleanup] = "Clean ", + [rxrpc_client_count] = "Count ", + [rxrpc_client_discard] = "Discar", + [rxrpc_client_duplicate] = "Duplic", + [rxrpc_client_exposed] = "Expose", + [rxrpc_client_replace] = "Replac", + [rxrpc_client_to_active] = "->Actv", + [rxrpc_client_to_culled] = "->Cull", + [rxrpc_client_to_idle] = "->Idle", + [rxrpc_client_to_inactive] = "->Inac", + [rxrpc_client_to_waiting] = "->Wait", + [rxrpc_client_uncount] = "Uncoun", +}; From a124fe3ee5d82f2c9a9b8818ed5cb9f61685f1d3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:13 +0100 Subject: [PATCH 1108/1715] rxrpc: Add a tracepoint to follow the life of a packet in the Tx buffer Add a tracepoint to follow the insertion of a packet into the transmit buffer, its transmission and its rotation out of the buffer. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 26 ++++++++++++++++++++++++++ net/rxrpc/ar-internal.h | 12 ++++++++++++ net/rxrpc/input.c | 2 ++ net/rxrpc/misc.c | 9 +++++++++ net/rxrpc/sendmsg.c | 9 ++++++++- 5 files changed, 57 insertions(+), 1 deletion(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index c0c496c83f31..ffc74b3e5b76 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -208,6 +208,32 @@ TRACE_EVENT(rxrpc_abort, __entry->abort_code, __entry->error, __entry->why) ); +TRACE_EVENT(rxrpc_transmit, + TP_PROTO(struct rxrpc_call *call, enum rxrpc_transmit_trace why), + + TP_ARGS(call, why), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(enum rxrpc_transmit_trace, why ) + __field(rxrpc_seq_t, tx_hard_ack ) + __field(rxrpc_seq_t, tx_top ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->why = why; + __entry->tx_hard_ack = call->tx_hard_ack; + __entry->tx_top = call->tx_top; + ), + + TP_printk("c=%p %s f=%08x n=%u", + __entry->call, + rxrpc_transmit_traces[__entry->why], + __entry->tx_hard_ack + 1, + __entry->tx_top - __entry->tx_hard_ack) + ); + #endif /* _TRACE_RXRPC_H */ /* This part must be outside protection */ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 6ca40eea3022..afa5dcc05fe0 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -593,6 +593,18 @@ enum rxrpc_call_trace { extern const char rxrpc_call_traces[rxrpc_call__nr_trace][4]; +enum rxrpc_transmit_trace { + rxrpc_transmit_wait, + rxrpc_transmit_queue, + rxrpc_transmit_queue_reqack, + rxrpc_transmit_queue_last, + rxrpc_transmit_rotate, + rxrpc_transmit_end, + rxrpc_transmit__nr_trace +}; + +extern const char rxrpc_transmit_traces[rxrpc_transmit__nr_trace][4]; + extern const char *const rxrpc_pkts[]; extern const char *rxrpc_acks(u8 reason); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index c1f83d22f9b7..c7eb5104e91a 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -59,6 +59,7 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to) spin_unlock(&call->lock); + trace_rxrpc_transmit(call, rxrpc_transmit_rotate); wake_up(&call->waitq); while (list) { @@ -107,6 +108,7 @@ static bool rxrpc_end_tx_phase(struct rxrpc_call *call, const char *abort_why) } write_unlock(&call->state_lock); + trace_rxrpc_transmit(call, rxrpc_transmit_end); _leave(" = ok"); return true; } diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 598064d3bdd2..dca89995f03e 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -132,3 +132,12 @@ const char rxrpc_client_traces[rxrpc_client__nr_trace][7] = { [rxrpc_client_to_waiting] = "->Wait", [rxrpc_client_uncount] = "Uncoun", }; + +const char rxrpc_transmit_traces[rxrpc_transmit__nr_trace][4] = { + [rxrpc_transmit_wait] = "WAI", + [rxrpc_transmit_queue] = "QUE", + [rxrpc_transmit_queue_reqack] = "QRA", + [rxrpc_transmit_queue_last] = "QLS", + [rxrpc_transmit_rotate] = "ROT", + [rxrpc_transmit_end] = "END", +}; diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 8bfddf4e338c..28d8f73cf11d 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -56,6 +56,7 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx, break; } + trace_rxrpc_transmit(call, rxrpc_transmit_wait); release_sock(&rx->sk); *timeo = schedule_timeout(*timeo); lock_sock(&rx->sk); @@ -104,8 +105,14 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, smp_wmb(); call->rxtx_buffer[ix] = skb; call->tx_top = seq; - if (last) + if (last) { set_bit(RXRPC_CALL_TX_LAST, &call->flags); + trace_rxrpc_transmit(call, rxrpc_transmit_queue_last); + } else if (sp->hdr.flags & RXRPC_REQUEST_ACK) { + trace_rxrpc_transmit(call, rxrpc_transmit_queue_reqack); + } else { + trace_rxrpc_transmit(call, rxrpc_transmit_queue); + } if (last || call->state == RXRPC_CALL_SERVER_ACK_REQUEST) { _debug("________awaiting reply/ACK__________"); From ec71eb9ada34f8d1a58b7c35d906c59411295445 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:13 +0100 Subject: [PATCH 1109/1715] rxrpc: Add a tracepoint to log received ACK packets Add a tracepoint to log information from received ACK packets. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 26 ++++++++++++++++++++++++++ net/rxrpc/input.c | 2 ++ 2 files changed, 28 insertions(+) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index ffc74b3e5b76..2b19f3fa5174 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -234,6 +234,32 @@ TRACE_EVENT(rxrpc_transmit, __entry->tx_top - __entry->tx_hard_ack) ); +TRACE_EVENT(rxrpc_rx_ack, + TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t first, u8 reason, u8 n_acks), + + TP_ARGS(call, first, reason, n_acks), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(rxrpc_seq_t, first ) + __field(u8, reason ) + __field(u8, n_acks ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->first = first; + __entry->reason = reason; + __entry->n_acks = n_acks; + ), + + TP_printk("c=%p %s f=%08x n=%u", + __entry->call, + rxrpc_acks(__entry->reason), + __entry->first, + __entry->n_acks) + ); + #endif /* _TRACE_RXRPC_H */ /* This part must be outside protection */ diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index c7eb5104e91a..7b18ca124978 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -440,6 +440,8 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, hard_ack = first_soft_ack - 1; nr_acks = buf.ack.nAcks; + trace_rxrpc_rx_ack(call, first_soft_ack, buf.ack.reason, nr_acks); + _proto("Rx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }", sp->hdr.serial, ntohs(buf.ack.maxSkew), From f3639df2d90bc919328c459b3c7c49ed5667a52f Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:13 +0100 Subject: [PATCH 1110/1715] rxrpc: Add a tracepoint to log ACK transmission Add a tracepoint to log information about ACK transmission. Signed-off-by: David Howels --- include/trace/events/rxrpc.h | 30 ++++++++++++++++++++++++++++++ net/rxrpc/conn_event.c | 3 +++ net/rxrpc/output.c | 7 ++++++- 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 2b19f3fa5174..d545d692ae22 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -260,6 +260,36 @@ TRACE_EVENT(rxrpc_rx_ack, __entry->n_acks) ); +TRACE_EVENT(rxrpc_tx_ack, + TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t first, + rxrpc_serial_t serial, u8 reason, u8 n_acks), + + TP_ARGS(call, first, serial, reason, n_acks), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(rxrpc_seq_t, first ) + __field(rxrpc_serial_t, serial ) + __field(u8, reason ) + __field(u8, n_acks ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->first = first; + __entry->serial = serial; + __entry->reason = reason; + __entry->n_acks = n_acks; + ), + + TP_printk("c=%p %s f=%08x r=%08x n=%u", + __entry->call, + rxrpc_acks(__entry->reason), + __entry->first, + __entry->serial, + __entry->n_acks) + ); + #endif /* _TRACE_RXRPC_H */ /* This part must be outside protection */ diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index a43f4c94a88d..9b19c51831aa 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -98,6 +98,9 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, pkt.info.rwind = htonl(rxrpc_rx_window_size); pkt.info.jumbo_max = htonl(rxrpc_rx_jumbo_max); len += sizeof(pkt.ack) + sizeof(pkt.info); + + trace_rxrpc_tx_ack(NULL, chan->last_seq, 0, + RXRPC_ACK_DUPLICATE, 0); break; } diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 0b21ed859de7..2c9daeadce87 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -38,12 +38,14 @@ struct rxrpc_pkt_buffer { static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, struct rxrpc_pkt_buffer *pkt) { + rxrpc_serial_t serial; rxrpc_seq_t hard_ack, top, seq; int ix; u32 mtu, jmax; u8 *ackp = pkt->acks; /* Barrier against rxrpc_input_data(). */ + serial = call->ackr_serial; hard_ack = READ_ONCE(call->rx_hard_ack); top = smp_load_acquire(&call->rx_top); @@ -51,7 +53,7 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, pkt->ack.maxSkew = htons(call->ackr_skew); pkt->ack.firstPacket = htonl(hard_ack + 1); pkt->ack.previousPacket = htonl(call->ackr_prev_seq); - pkt->ack.serial = htonl(call->ackr_serial); + pkt->ack.serial = htonl(serial); pkt->ack.reason = call->ackr_reason; pkt->ack.nAcks = top - hard_ack; @@ -75,6 +77,9 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, pkt->ackinfo.rwind = htonl(call->rx_winsize); pkt->ackinfo.jumbo_max = htonl(jmax); + trace_rxrpc_tx_ack(call, hard_ack + 1, serial, call->ackr_reason, + top - hard_ack); + *ackp++ = 0; *ackp++ = 0; *ackp++ = 0; From 58dc63c998ea3c5a27e2bf9251eddbf0977056a6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:13 +0100 Subject: [PATCH 1111/1715] rxrpc: Add a tracepoint to follow packets in the Rx buffer Add a tracepoint to follow the life of packets that get added to a call's receive buffer. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 33 +++++++++++++++++++++++++++++++++ net/rxrpc/ar-internal.h | 12 ++++++++++++ net/rxrpc/call_accept.c | 3 +++ net/rxrpc/input.c | 6 +++++- net/rxrpc/misc.c | 9 +++++++++ net/rxrpc/recvmsg.c | 11 +++++++++++ 6 files changed, 73 insertions(+), 1 deletion(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index d545d692ae22..7dd5f0188681 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -290,6 +290,39 @@ TRACE_EVENT(rxrpc_tx_ack, __entry->n_acks) ); +TRACE_EVENT(rxrpc_receive, + TP_PROTO(struct rxrpc_call *call, enum rxrpc_receive_trace why, + rxrpc_serial_t serial, rxrpc_seq_t seq), + + TP_ARGS(call, why, serial, seq), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(enum rxrpc_receive_trace, why ) + __field(rxrpc_serial_t, serial ) + __field(rxrpc_seq_t, seq ) + __field(rxrpc_seq_t, hard_ack ) + __field(rxrpc_seq_t, top ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->why = why; + __entry->serial = serial; + __entry->seq = seq; + __entry->hard_ack = call->rx_hard_ack; + __entry->top = call->rx_top; + ), + + TP_printk("c=%p %s r=%08x q=%08x w=%08x-%08x", + __entry->call, + rxrpc_receive_traces[__entry->why], + __entry->serial, + __entry->seq, + __entry->hard_ack, + __entry->top) + ); + #endif /* _TRACE_RXRPC_H */ /* This part must be outside protection */ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index afa5dcc05fe0..e5d2f2fb8e41 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -605,6 +605,18 @@ enum rxrpc_transmit_trace { extern const char rxrpc_transmit_traces[rxrpc_transmit__nr_trace][4]; +enum rxrpc_receive_trace { + rxrpc_receive_incoming, + rxrpc_receive_queue, + rxrpc_receive_queue_last, + rxrpc_receive_front, + rxrpc_receive_rotate, + rxrpc_receive_end, + rxrpc_receive__nr_trace +}; + +extern const char rxrpc_receive_traces[rxrpc_receive__nr_trace][4]; + extern const char *const rxrpc_pkts[]; extern const char *rxrpc_acks(u8 reason); diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 3e474508ba75..a8d39d7cf42c 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -367,6 +367,9 @@ found_service: goto out; } + trace_rxrpc_receive(call, rxrpc_receive_incoming, + sp->hdr.serial, sp->hdr.seq); + /* Make the call live. */ rxrpc_incoming_call(rx, call, skb); conn = call->conn; diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 7b18ca124978..b690220533c6 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -284,8 +284,12 @@ next_subpacket: call->rxtx_buffer[ix] = skb; if (after(seq, call->rx_top)) smp_store_release(&call->rx_top, seq); - if (flags & RXRPC_LAST_PACKET) + if (flags & RXRPC_LAST_PACKET) { set_bit(RXRPC_CALL_RX_LAST, &call->flags); + trace_rxrpc_receive(call, rxrpc_receive_queue_last, serial, seq); + } else { + trace_rxrpc_receive(call, rxrpc_receive_queue, serial, seq); + } queued = true; if (after_eq(seq, call->rx_expect_next)) { diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index dca89995f03e..db5f1d54fc90 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -141,3 +141,12 @@ const char rxrpc_transmit_traces[rxrpc_transmit__nr_trace][4] = { [rxrpc_transmit_rotate] = "ROT", [rxrpc_transmit_end] = "END", }; + +const char rxrpc_receive_traces[rxrpc_receive__nr_trace][4] = { + [rxrpc_receive_incoming] = "INC", + [rxrpc_receive_queue] = "QUE", + [rxrpc_receive_queue_last] = "QLS", + [rxrpc_receive_front] = "FRN", + [rxrpc_receive_rotate] = "ROT", + [rxrpc_receive_end] = "END", +}; diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 8b8d7e14f800..22d51087c580 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -134,6 +134,7 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call) { _enter("%d,%s", call->debug_id, rxrpc_call_states[call->state]); + trace_rxrpc_receive(call, rxrpc_receive_end, 0, call->rx_top); ASSERTCMP(call->rx_hard_ack, ==, call->rx_top); if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) { @@ -167,6 +168,7 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) { struct rxrpc_skb_priv *sp; struct sk_buff *skb; + rxrpc_serial_t serial; rxrpc_seq_t hard_ack, top; u8 flags; int ix; @@ -183,6 +185,10 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) rxrpc_see_skb(skb); sp = rxrpc_skb(skb); flags = sp->hdr.flags; + serial = sp->hdr.serial; + if (call->rxtx_annotations[ix] & RXRPC_RX_ANNO_JUMBO) + serial += (call->rxtx_annotations[ix] & RXRPC_RX_ANNO_JUMBO) - 1; + call->rxtx_buffer[ix] = NULL; call->rxtx_annotations[ix] = 0; /* Barrier against rxrpc_input_data(). */ @@ -191,6 +197,7 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) rxrpc_free_skb(skb); _debug("%u,%u,%02x", hard_ack, top, flags); + trace_rxrpc_receive(call, rxrpc_receive_rotate, serial, hard_ack); if (flags & RXRPC_LAST_PACKET) rxrpc_end_rx_phase(call); } @@ -309,6 +316,10 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, rxrpc_see_skb(skb); sp = rxrpc_skb(skb); + if (!(flags & MSG_PEEK)) + trace_rxrpc_receive(call, rxrpc_receive_front, + sp->hdr.serial, seq); + if (msg) sock_recv_timestamp(msg, sock->sk, skb); From 849979051cbc9352857d8bb31895ae55afe19d96 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 11:13:31 +0100 Subject: [PATCH 1112/1715] rxrpc: Add a tracepoint to follow what recvmsg does Add a tracepoint to follow what recvmsg does within AF_RXRPC. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 34 ++++++++++++++++++++++++++++++++++ net/rxrpc/ar-internal.h | 17 +++++++++++++++++ net/rxrpc/misc.c | 14 ++++++++++++++ net/rxrpc/recvmsg.c | 34 ++++++++++++++++++++++++++-------- 4 files changed, 91 insertions(+), 8 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 7dd5f0188681..58732202e9f0 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -323,6 +323,40 @@ TRACE_EVENT(rxrpc_receive, __entry->top) ); +TRACE_EVENT(rxrpc_recvmsg, + TP_PROTO(struct rxrpc_call *call, enum rxrpc_recvmsg_trace why, + rxrpc_seq_t seq, unsigned int offset, unsigned int len, + int ret), + + TP_ARGS(call, why, seq, offset, len, ret), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(enum rxrpc_recvmsg_trace, why ) + __field(rxrpc_seq_t, seq ) + __field(unsigned int, offset ) + __field(unsigned int, len ) + __field(int, ret ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->why = why; + __entry->seq = seq; + __entry->offset = offset; + __entry->len = len; + __entry->ret = ret; + ), + + TP_printk("c=%p %s q=%08x o=%u l=%u ret=%d", + __entry->call, + rxrpc_recvmsg_traces[__entry->why], + __entry->seq, + __entry->offset, + __entry->len, + __entry->ret) + ); + #endif /* _TRACE_RXRPC_H */ /* This part must be outside protection */ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index e5d2f2fb8e41..a17341d2df3d 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -617,6 +617,23 @@ enum rxrpc_receive_trace { extern const char rxrpc_receive_traces[rxrpc_receive__nr_trace][4]; +enum rxrpc_recvmsg_trace { + rxrpc_recvmsg_enter, + rxrpc_recvmsg_wait, + rxrpc_recvmsg_dequeue, + rxrpc_recvmsg_hole, + rxrpc_recvmsg_next, + rxrpc_recvmsg_cont, + rxrpc_recvmsg_full, + rxrpc_recvmsg_data_return, + rxrpc_recvmsg_terminal, + rxrpc_recvmsg_to_be_accepted, + rxrpc_recvmsg_return, + rxrpc_recvmsg__nr_trace +}; + +extern const char rxrpc_recvmsg_traces[rxrpc_recvmsg__nr_trace][5]; + extern const char *const rxrpc_pkts[]; extern const char *rxrpc_acks(u8 reason); diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index db5f1d54fc90..c7065d893d1e 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -150,3 +150,17 @@ const char rxrpc_receive_traces[rxrpc_receive__nr_trace][4] = { [rxrpc_receive_rotate] = "ROT", [rxrpc_receive_end] = "END", }; + +const char rxrpc_recvmsg_traces[rxrpc_recvmsg__nr_trace][5] = { + [rxrpc_recvmsg_enter] = "ENTR", + [rxrpc_recvmsg_wait] = "WAIT", + [rxrpc_recvmsg_dequeue] = "DEQU", + [rxrpc_recvmsg_hole] = "HOLE", + [rxrpc_recvmsg_next] = "NEXT", + [rxrpc_recvmsg_cont] = "CONT", + [rxrpc_recvmsg_full] = "FULL", + [rxrpc_recvmsg_data_return] = "DATA", + [rxrpc_recvmsg_terminal] = "TERM", + [rxrpc_recvmsg_to_be_accepted] = "TBAC", + [rxrpc_recvmsg_return] = "RETN", +}; diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 22d51087c580..b62a08151895 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -94,6 +94,8 @@ static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg) break; } + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_terminal, call->rx_hard_ack, + call->rx_pkt_offset, call->rx_pkt_len, ret); return ret; } @@ -124,6 +126,7 @@ static int rxrpc_recvmsg_new_call(struct rxrpc_sock *rx, write_unlock(&rx->call_lock); } + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_to_be_accepted, 1, 0, 0, ret); return ret; } @@ -310,8 +313,11 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, for (seq = hard_ack + 1; before_eq(seq, top); seq++) { ix = seq & RXRPC_RXTX_BUFF_MASK; skb = call->rxtx_buffer[ix]; - if (!skb) + if (!skb) { + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_hole, seq, + rx_pkt_offset, rx_pkt_len, 0); break; + } smp_rmb(); rxrpc_see_skb(skb); sp = rxrpc_skb(skb); @@ -327,10 +333,15 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, ret2 = rxrpc_locate_data(call, skb, &call->rxtx_annotations[ix], &rx_pkt_offset, &rx_pkt_len); + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_next, seq, + rx_pkt_offset, rx_pkt_len, ret2); if (ret2 < 0) { ret = ret2; goto out; } + } else { + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_cont, seq, + rx_pkt_offset, rx_pkt_len, 0); } _debug("recvmsg %x DATA #%u { %d, %d }", sp->hdr.callNumber, seq, rx_pkt_offset, rx_pkt_len); @@ -357,6 +368,8 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, } if (rx_pkt_len > 0) { + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_full, seq, + rx_pkt_offset, rx_pkt_len, 0); _debug("buffer full"); ASSERTCMP(*_offset, ==, len); ret = 0; @@ -383,6 +396,8 @@ out: call->rx_pkt_len = rx_pkt_len; } done: + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_data_return, seq, + rx_pkt_offset, rx_pkt_len, ret); _leave(" = %d [%u/%u]", ret, seq, top); return ret; } @@ -404,7 +419,7 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, DEFINE_WAIT(wait); - _enter(",,,%zu,%d", len, flags); + trace_rxrpc_recvmsg(NULL, rxrpc_recvmsg_enter, 0, 0, 0, 0); if (flags & (MSG_OOB | MSG_TRUNC)) return -EOPNOTSUPP; @@ -424,8 +439,10 @@ try_again: if (list_empty(&rx->recvmsg_q)) { ret = -EWOULDBLOCK; - if (timeo == 0) + if (timeo == 0) { + call = NULL; goto error_no_call; + } release_sock(&rx->sk); @@ -439,6 +456,8 @@ try_again: if (list_empty(&rx->recvmsg_q)) { if (signal_pending(current)) goto wait_interrupted; + trace_rxrpc_recvmsg(NULL, rxrpc_recvmsg_wait, + 0, 0, 0, 0); timeo = schedule_timeout(timeo); } finish_wait(sk_sleep(&rx->sk), &wait); @@ -457,7 +476,7 @@ try_again: rxrpc_get_call(call, rxrpc_call_got); write_unlock_bh(&rx->recvmsg_lock); - _debug("recvmsg call %p", call); + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_dequeue, 0, 0, 0, 0); if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) BUG(); @@ -527,16 +546,15 @@ error: rxrpc_put_call(call, rxrpc_call_put); error_no_call: release_sock(&rx->sk); - _leave(" = %d", ret); + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, 0, 0, 0, ret); return ret; wait_interrupted: ret = sock_intr_errno(timeo); wait_error: finish_wait(sk_sleep(&rx->sk), &wait); - release_sock(&rx->sk); - _leave(" = %d [wait]", ret); - return ret; + call = NULL; + goto error_no_call; } /** From ba39f3a0ed756ccd882adf4a77916ec863db3ce4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:14 +0100 Subject: [PATCH 1113/1715] rxrpc: Remove printks from rxrpc_recvmsg_data() to fix uninit var Remove _enter/_debug/_leave calls from rxrpc_recvmsg_data() of which one uses an uninitialised variable. Signed-off-by: David Howells --- net/rxrpc/recvmsg.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index b62a08151895..79e65668bc58 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -296,8 +296,6 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, unsigned int rx_pkt_offset, rx_pkt_len; int ix, copy, ret = -EAGAIN, ret2; - _enter(""); - rx_pkt_offset = call->rx_pkt_offset; rx_pkt_len = call->rx_pkt_len; @@ -343,8 +341,6 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, trace_rxrpc_recvmsg(call, rxrpc_recvmsg_cont, seq, rx_pkt_offset, rx_pkt_len, 0); } - _debug("recvmsg %x DATA #%u { %d, %d }", - sp->hdr.callNumber, seq, rx_pkt_offset, rx_pkt_len); /* We have to handle short, empty and used-up DATA packets. */ remain = len - *_offset; @@ -360,8 +356,6 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, } /* handle piecemeal consumption of data packets */ - _debug("copied %d @%zu", copy, *_offset); - rx_pkt_offset += copy; rx_pkt_len -= copy; *_offset += copy; @@ -370,7 +364,6 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, if (rx_pkt_len > 0) { trace_rxrpc_recvmsg(call, rxrpc_recvmsg_full, seq, rx_pkt_offset, rx_pkt_len, 0); - _debug("buffer full"); ASSERTCMP(*_offset, ==, len); ret = 0; break; @@ -398,7 +391,6 @@ out: done: trace_rxrpc_recvmsg(call, rxrpc_recvmsg_data_return, seq, rx_pkt_offset, rx_pkt_len, ret); - _leave(" = %d [%u/%u]", ret, seq, top); return ret; } From 71f3ca408fd43b586c02480768a503af075b247e Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:14 +0100 Subject: [PATCH 1114/1715] rxrpc: Improve skb tracing Improve sk_buff tracing within AF_RXRPC by the following means: (1) Use an enum to note the event type rather than plain integers and use an array of event names rather than a big multi ?: list. (2) Distinguish Rx from Tx packets and account them separately. This requires the call phase to be tracked so that we know what we might find in rxtx_buffer[]. (3) Add a parameter to rxrpc_{new,see,get,free}_skb() to indicate the event type. (4) A pair of 'rotate' events are added to indicate packets that are about to be rotated out of the Rx and Tx windows. (5) A pair of 'lost' events are added, along with rxrpc_lose_skb() for packet loss injection recording. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 12 +++----- net/rxrpc/af_rxrpc.c | 5 ++-- net/rxrpc/ar-internal.h | 33 ++++++++++++++++++---- net/rxrpc/call_event.c | 8 +++--- net/rxrpc/call_object.c | 11 ++++++-- net/rxrpc/conn_event.c | 6 ++-- net/rxrpc/input.c | 13 +++++---- net/rxrpc/local_event.c | 4 +-- net/rxrpc/misc.c | 18 ++++++++++++ net/rxrpc/output.c | 4 +-- net/rxrpc/peer_event.c | 10 +++---- net/rxrpc/recvmsg.c | 7 +++-- net/rxrpc/sendmsg.c | 10 +++---- net/rxrpc/skbuff.c | 53 ++++++++++++++++++++++++++---------- 14 files changed, 131 insertions(+), 63 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 58732202e9f0..75a5d8bf50e1 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -107,14 +107,14 @@ TRACE_EVENT(rxrpc_call, ); TRACE_EVENT(rxrpc_skb, - TP_PROTO(struct sk_buff *skb, int op, int usage, int mod_count, - const void *where), + TP_PROTO(struct sk_buff *skb, enum rxrpc_skb_trace op, + int usage, int mod_count, const void *where), TP_ARGS(skb, op, usage, mod_count, where), TP_STRUCT__entry( __field(struct sk_buff *, skb ) - __field(int, op ) + __field(enum rxrpc_skb_trace, op ) __field(int, usage ) __field(int, mod_count ) __field(const void *, where ) @@ -130,11 +130,7 @@ TRACE_EVENT(rxrpc_skb, TP_printk("s=%p %s u=%d m=%d p=%pSR", __entry->skb, - (__entry->op == 0 ? "NEW" : - __entry->op == 1 ? "SEE" : - __entry->op == 2 ? "GET" : - __entry->op == 3 ? "FRE" : - "PUR"), + rxrpc_skb_traces[__entry->op], __entry->usage, __entry->mod_count, __entry->where) diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 09f81befc705..8dbf7bed2cc4 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -45,7 +45,7 @@ u32 rxrpc_epoch; atomic_t rxrpc_debug_id; /* count of skbs currently in use */ -atomic_t rxrpc_n_skbs; +atomic_t rxrpc_n_tx_skbs, rxrpc_n_rx_skbs; struct workqueue_struct *rxrpc_workqueue; @@ -867,7 +867,8 @@ static void __exit af_rxrpc_exit(void) proto_unregister(&rxrpc_proto); rxrpc_destroy_all_calls(); rxrpc_destroy_all_connections(); - ASSERTCMP(atomic_read(&rxrpc_n_skbs), ==, 0); + ASSERTCMP(atomic_read(&rxrpc_n_tx_skbs), ==, 0); + ASSERTCMP(atomic_read(&rxrpc_n_rx_skbs), ==, 0); rxrpc_destroy_all_locals(); remove_proc_entry("rxrpc_conns", init_net.proc_net); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index a17341d2df3d..034f525f2235 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -520,6 +520,7 @@ struct rxrpc_call { rxrpc_seq_t rx_expect_next; /* Expected next packet sequence number */ u8 rx_winsize; /* Size of Rx window */ u8 tx_winsize; /* Maximum size of Tx window */ + bool tx_phase; /* T if transmission phase, F if receive phase */ u8 nr_jumbo_bad; /* Number of jumbo dups/exceeds-windows */ /* receive-phase ACK management */ @@ -534,6 +535,27 @@ struct rxrpc_call { rxrpc_serial_t acks_latest; /* serial number of latest ACK received */ }; +enum rxrpc_skb_trace { + rxrpc_skb_rx_cleaned, + rxrpc_skb_rx_freed, + rxrpc_skb_rx_got, + rxrpc_skb_rx_lost, + rxrpc_skb_rx_received, + rxrpc_skb_rx_rotated, + rxrpc_skb_rx_purged, + rxrpc_skb_rx_seen, + rxrpc_skb_tx_cleaned, + rxrpc_skb_tx_freed, + rxrpc_skb_tx_got, + rxrpc_skb_tx_lost, + rxrpc_skb_tx_new, + rxrpc_skb_tx_rotated, + rxrpc_skb_tx_seen, + rxrpc_skb__nr_trace +}; + +extern const char rxrpc_skb_traces[rxrpc_skb__nr_trace][7]; + enum rxrpc_conn_trace { rxrpc_conn_new_client, rxrpc_conn_new_service, @@ -642,7 +664,7 @@ extern const char *rxrpc_acks(u8 reason); /* * af_rxrpc.c */ -extern atomic_t rxrpc_n_skbs; +extern atomic_t rxrpc_n_tx_skbs, rxrpc_n_rx_skbs; extern u32 rxrpc_epoch; extern atomic_t rxrpc_debug_id; extern struct workqueue_struct *rxrpc_workqueue; @@ -1000,10 +1022,11 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *, struct msghdr *, size_t); */ void rxrpc_kernel_data_consumed(struct rxrpc_call *, struct sk_buff *); void rxrpc_packet_destructor(struct sk_buff *); -void rxrpc_new_skb(struct sk_buff *); -void rxrpc_see_skb(struct sk_buff *); -void rxrpc_get_skb(struct sk_buff *); -void rxrpc_free_skb(struct sk_buff *); +void rxrpc_new_skb(struct sk_buff *, enum rxrpc_skb_trace); +void rxrpc_see_skb(struct sk_buff *, enum rxrpc_skb_trace); +void rxrpc_get_skb(struct sk_buff *, enum rxrpc_skb_trace); +void rxrpc_free_skb(struct sk_buff *, enum rxrpc_skb_trace); +void rxrpc_lose_skb(struct sk_buff *, enum rxrpc_skb_trace); void rxrpc_purge_queue(struct sk_buff_head *); /* diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index f0cabc48a1b7..7d1b99824ed9 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -170,7 +170,7 @@ static void rxrpc_resend(struct rxrpc_call *call) continue; skb = call->rxtx_buffer[ix]; - rxrpc_see_skb(skb); + rxrpc_see_skb(skb, rxrpc_skb_tx_seen); sp = rxrpc_skb(skb); if (annotation == RXRPC_TX_ANNO_UNACK) { @@ -199,7 +199,7 @@ static void rxrpc_resend(struct rxrpc_call *call) continue; skb = call->rxtx_buffer[ix]; - rxrpc_get_skb(skb); + rxrpc_get_skb(skb, rxrpc_skb_tx_got); spin_unlock_bh(&call->lock); sp = rxrpc_skb(skb); @@ -211,7 +211,7 @@ static void rxrpc_resend(struct rxrpc_call *call) if (rxrpc_send_data_packet(call->conn, skb) < 0) { call->resend_at = now + 2; - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_tx_freed); return; } @@ -219,7 +219,7 @@ static void rxrpc_resend(struct rxrpc_call *call) rxrpc_expose_client_call(call); sp->resend_at = now + rxrpc_resend_timeout; - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_tx_freed); spin_lock_bh(&call->lock); /* We need to clear the retransmit state, but there are two diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 54f30482a7fd..f50a6094e198 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -182,6 +182,7 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx, return ERR_PTR(-ENOMEM); call->state = RXRPC_CALL_CLIENT_AWAIT_CONN; call->service_id = srx->srx_service; + call->tx_phase = true; _leave(" = %p", call); return call; @@ -458,7 +459,9 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) rxrpc_disconnect_call(call); for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) { - rxrpc_free_skb(call->rxtx_buffer[i]); + rxrpc_free_skb(call->rxtx_buffer[i], + (call->tx_phase ? rxrpc_skb_tx_cleaned : + rxrpc_skb_rx_cleaned)); call->rxtx_buffer[i] = NULL; } @@ -552,9 +555,11 @@ void rxrpc_cleanup_call(struct rxrpc_call *call) /* Clean up the Rx/Tx buffer */ for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) - rxrpc_free_skb(call->rxtx_buffer[i]); + rxrpc_free_skb(call->rxtx_buffer[i], + (call->tx_phase ? rxrpc_skb_tx_cleaned : + rxrpc_skb_rx_cleaned)); - rxrpc_free_skb(call->tx_pending); + rxrpc_free_skb(call->tx_pending, rxrpc_skb_tx_cleaned); call_rcu(&call->rcu, rxrpc_rcu_destroy_call); } diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 9b19c51831aa..75a15a4c74c3 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -388,7 +388,7 @@ void rxrpc_process_connection(struct work_struct *work) /* go through the conn-level event packets, releasing the ref on this * connection that each one has when we've finished with it */ while ((skb = skb_dequeue(&conn->rx_queue))) { - rxrpc_see_skb(skb); + rxrpc_see_skb(skb, rxrpc_skb_rx_seen); ret = rxrpc_process_event(conn, skb, &abort_code); switch (ret) { case -EPROTO: @@ -399,7 +399,7 @@ void rxrpc_process_connection(struct work_struct *work) goto requeue_and_leave; case -ECONNABORTED: default: - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); break; } } @@ -416,7 +416,7 @@ requeue_and_leave: protocol_error: if (rxrpc_abort_connection(conn, -ret, abort_code) < 0) goto requeue_and_leave; - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); _leave(" [EPROTO]"); goto out; } diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index b690220533c6..84bb16d47b85 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -50,7 +50,7 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to) call->tx_hard_ack++; ix = call->tx_hard_ack & RXRPC_RXTX_BUFF_MASK; skb = call->rxtx_buffer[ix]; - rxrpc_see_skb(skb); + rxrpc_see_skb(skb, rxrpc_skb_tx_rotated); call->rxtx_buffer[ix] = NULL; call->rxtx_annotations[ix] = 0; skb->next = list; @@ -66,7 +66,7 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to) skb = list; list = skb->next; skb->next = NULL; - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_tx_freed); } } @@ -99,6 +99,7 @@ static bool rxrpc_end_tx_phase(struct rxrpc_call *call, const char *abort_why) default: break; case RXRPC_CALL_CLIENT_AWAIT_REPLY: + call->tx_phase = false; call->state = RXRPC_CALL_CLIENT_RECV_REPLY; break; case RXRPC_CALL_SERVER_AWAIT_ACK: @@ -278,7 +279,7 @@ next_subpacket: * Barriers against rxrpc_recvmsg_data() and rxrpc_rotate_rx_window() * and also rxrpc_fill_out_ack(). */ - rxrpc_get_skb(skb); + rxrpc_get_skb(skb, rxrpc_skb_rx_got); call->rxtx_annotations[ix] = annotation; smp_wmb(); call->rxtx_buffer[ix] = skb; @@ -691,13 +692,13 @@ void rxrpc_data_ready(struct sock *udp_sk) return; } - rxrpc_new_skb(skb); + rxrpc_new_skb(skb, rxrpc_skb_rx_received); _net("recv skb %p", skb); /* we'll probably need to checksum it (didn't call sock_recvmsg) */ if (skb_checksum_complete(skb)) { - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); __UDP_INC_STATS(&init_net, UDP_MIB_INERRORS, 0); _leave(" [CSUM failed]"); return; @@ -821,7 +822,7 @@ void rxrpc_data_ready(struct sock *udp_sk) discard_unlock: rcu_read_unlock(); discard: - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); out: trace_rxrpc_rx_done(0, 0); return; diff --git a/net/rxrpc/local_event.c b/net/rxrpc/local_event.c index f073e932500e..190f68bd9e27 100644 --- a/net/rxrpc/local_event.c +++ b/net/rxrpc/local_event.c @@ -90,7 +90,7 @@ void rxrpc_process_local_events(struct rxrpc_local *local) if (skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - rxrpc_see_skb(skb); + rxrpc_see_skb(skb, rxrpc_skb_rx_seen); _debug("{%d},{%u}", local->debug_id, sp->hdr.type); switch (sp->hdr.type) { @@ -107,7 +107,7 @@ void rxrpc_process_local_events(struct rxrpc_local *local) break; } - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); } _leave(""); diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index c7065d893d1e..026e1f2e83ff 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -102,6 +102,24 @@ const char *rxrpc_acks(u8 reason) return str[reason]; } +const char rxrpc_skb_traces[rxrpc_skb__nr_trace][7] = { + [rxrpc_skb_rx_cleaned] = "Rx CLN", + [rxrpc_skb_rx_freed] = "Rx FRE", + [rxrpc_skb_rx_got] = "Rx GOT", + [rxrpc_skb_rx_lost] = "Rx *L*", + [rxrpc_skb_rx_received] = "Rx RCV", + [rxrpc_skb_rx_purged] = "Rx PUR", + [rxrpc_skb_rx_rotated] = "Rx ROT", + [rxrpc_skb_rx_seen] = "Rx SEE", + [rxrpc_skb_tx_cleaned] = "Tx CLN", + [rxrpc_skb_tx_freed] = "Tx FRE", + [rxrpc_skb_tx_got] = "Tx GOT", + [rxrpc_skb_tx_lost] = "Tx *L*", + [rxrpc_skb_tx_new] = "Tx NEW", + [rxrpc_skb_tx_rotated] = "Tx ROT", + [rxrpc_skb_tx_seen] = "Tx SEE", +}; + const char rxrpc_conn_traces[rxrpc_conn__nr_trace][4] = { [rxrpc_conn_new_client] = "NWc", [rxrpc_conn_new_service] = "NWs", diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 2c9daeadce87..a2cad5ce7416 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -324,7 +324,7 @@ void rxrpc_reject_packets(struct rxrpc_local *local) whdr.type = RXRPC_PACKET_TYPE_ABORT; while ((skb = skb_dequeue(&local->reject_queue))) { - rxrpc_see_skb(skb); + rxrpc_see_skb(skb, rxrpc_skb_rx_seen); sp = rxrpc_skb(skb); if (rxrpc_extract_addr_from_skb(&srx, skb) == 0) { @@ -343,7 +343,7 @@ void rxrpc_reject_packets(struct rxrpc_local *local) kernel_sendmsg(local->socket, &msg, iov, 2, size); } - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); } _leave(""); diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index 9e0725f5652b..18276e7cb9e0 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -155,11 +155,11 @@ void rxrpc_error_report(struct sock *sk) _leave("UDP socket errqueue empty"); return; } - rxrpc_new_skb(skb); + rxrpc_new_skb(skb, rxrpc_skb_rx_received); serr = SKB_EXT_ERR(skb); if (!skb->len && serr->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) { _leave("UDP empty message"); - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); return; } @@ -169,7 +169,7 @@ void rxrpc_error_report(struct sock *sk) peer = NULL; if (!peer) { rcu_read_unlock(); - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); _leave(" [no peer]"); return; } @@ -179,7 +179,7 @@ void rxrpc_error_report(struct sock *sk) serr->ee.ee_code == ICMP_FRAG_NEEDED)) { rxrpc_adjust_mtu(peer, serr); rcu_read_unlock(); - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); rxrpc_put_peer(peer); _leave(" [MTU update]"); return; @@ -187,7 +187,7 @@ void rxrpc_error_report(struct sock *sk) rxrpc_store_error(peer, serr); rcu_read_unlock(); - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); /* The ref we obtained is passed off to the work item */ rxrpc_queue_work(&peer->error_distributor); diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 79e65668bc58..6ba4af5a8d95 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -155,6 +155,7 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call) break; case RXRPC_CALL_SERVER_RECV_REQUEST: + call->tx_phase = true; call->state = RXRPC_CALL_SERVER_ACK_REQUEST; break; default: @@ -185,7 +186,7 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) hard_ack++; ix = hard_ack & RXRPC_RXTX_BUFF_MASK; skb = call->rxtx_buffer[ix]; - rxrpc_see_skb(skb); + rxrpc_see_skb(skb, rxrpc_skb_rx_rotated); sp = rxrpc_skb(skb); flags = sp->hdr.flags; serial = sp->hdr.serial; @@ -197,7 +198,7 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) /* Barrier against rxrpc_input_data(). */ smp_store_release(&call->rx_hard_ack, hard_ack); - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); _debug("%u,%u,%02x", hard_ack, top, flags); trace_rxrpc_receive(call, rxrpc_receive_rotate, serial, hard_ack); @@ -317,7 +318,7 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, break; } smp_rmb(); - rxrpc_see_skb(skb); + rxrpc_see_skb(skb, rxrpc_skb_rx_seen); sp = rxrpc_skb(skb); if (!(flags & MSG_PEEK)) diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 28d8f73cf11d..6a39ee97a0b7 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -100,7 +100,7 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, ASSERTCMP(seq, ==, call->tx_top + 1); ix = seq & RXRPC_RXTX_BUFF_MASK; - rxrpc_get_skb(skb); + rxrpc_get_skb(skb, rxrpc_skb_tx_got); call->rxtx_annotations[ix] = RXRPC_TX_ANNO_UNACK; smp_wmb(); call->rxtx_buffer[ix] = skb; @@ -146,7 +146,7 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, rxrpc_instant_resend(call, ix); } - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_tx_freed); _leave(""); } @@ -201,7 +201,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, skb = call->tx_pending; call->tx_pending = NULL; - rxrpc_see_skb(skb); + rxrpc_see_skb(skb, rxrpc_skb_tx_seen); copied = 0; do { @@ -242,7 +242,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, if (!skb) goto maybe_error; - rxrpc_new_skb(skb); + rxrpc_new_skb(skb, rxrpc_skb_tx_new); _debug("ALLOC SEND %p", skb); @@ -352,7 +352,7 @@ out: return ret; call_terminated: - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_tx_freed); _leave(" = %d", -call->error); return -call->error; diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c index 620d9ccaf3c1..5154cbf7e540 100644 --- a/net/rxrpc/skbuff.c +++ b/net/rxrpc/skbuff.c @@ -18,54 +18,76 @@ #include #include "ar-internal.h" +#define select_skb_count(op) (op >= rxrpc_skb_tx_cleaned ? &rxrpc_n_tx_skbs : &rxrpc_n_rx_skbs) + /* - * Note the existence of a new-to-us socket buffer (allocated or dequeued). + * Note the allocation or reception of a socket buffer. */ -void rxrpc_new_skb(struct sk_buff *skb) +void rxrpc_new_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) { const void *here = __builtin_return_address(0); - int n = atomic_inc_return(&rxrpc_n_skbs); - trace_rxrpc_skb(skb, 0, atomic_read(&skb->users), n, here); + int n = atomic_inc_return(select_skb_count(op)); + trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here); } /* * Note the re-emergence of a socket buffer from a queue or buffer. */ -void rxrpc_see_skb(struct sk_buff *skb) +void rxrpc_see_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) { const void *here = __builtin_return_address(0); if (skb) { - int n = atomic_read(&rxrpc_n_skbs); - trace_rxrpc_skb(skb, 1, atomic_read(&skb->users), n, here); + int n = atomic_read(select_skb_count(op)); + trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here); } } /* * Note the addition of a ref on a socket buffer. */ -void rxrpc_get_skb(struct sk_buff *skb) +void rxrpc_get_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) { const void *here = __builtin_return_address(0); - int n = atomic_inc_return(&rxrpc_n_skbs); - trace_rxrpc_skb(skb, 2, atomic_read(&skb->users), n, here); + int n = atomic_inc_return(select_skb_count(op)); + trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here); skb_get(skb); } /* * Note the destruction of a socket buffer. */ -void rxrpc_free_skb(struct sk_buff *skb) +void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) { const void *here = __builtin_return_address(0); if (skb) { int n; CHECK_SLAB_OKAY(&skb->users); - n = atomic_dec_return(&rxrpc_n_skbs); - trace_rxrpc_skb(skb, 3, atomic_read(&skb->users), n, here); + n = atomic_dec_return(select_skb_count(op)); + trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here); kfree_skb(skb); } } +/* + * Note the injected loss of a socket buffer. + */ +void rxrpc_lose_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) +{ + const void *here = __builtin_return_address(0); + if (skb) { + int n; + CHECK_SLAB_OKAY(&skb->users); + if (op == rxrpc_skb_tx_lost) { + n = atomic_read(select_skb_count(op)); + trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here); + } else { + n = atomic_dec_return(select_skb_count(op)); + trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here); + kfree_skb(skb); + } + } +} + /* * Clear a queue of socket buffers. */ @@ -74,8 +96,9 @@ void rxrpc_purge_queue(struct sk_buff_head *list) const void *here = __builtin_return_address(0); struct sk_buff *skb; while ((skb = skb_dequeue((list))) != NULL) { - int n = atomic_dec_return(&rxrpc_n_skbs); - trace_rxrpc_skb(skb, 4, atomic_read(&skb->users), n, here); + int n = atomic_dec_return(select_skb_count(rxrpc_skb_rx_purged)); + trace_rxrpc_skb(skb, rxrpc_skb_rx_purged, + atomic_read(&skb->users), n, here); kfree_skb(skb); } } From 8a681c360559f75a80b37e6a6a9590457361ccb0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:15 +0100 Subject: [PATCH 1115/1715] rxrpc: Add config to inject packet loss Add a configuration option to inject packet loss by discarding approximately every 8th packet received and approximately every 8th DATA packet transmitted. Note that no locking is used, but it shouldn't really matter. Signed-off-by: David Howells --- net/rxrpc/Kconfig | 7 +++++++ net/rxrpc/input.c | 8 ++++++++ net/rxrpc/output.c | 9 +++++++++ 3 files changed, 24 insertions(+) diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig index 13396c74b5c1..86f8853a038c 100644 --- a/net/rxrpc/Kconfig +++ b/net/rxrpc/Kconfig @@ -26,6 +26,13 @@ config AF_RXRPC_IPV6 Say Y here to allow AF_RXRPC to use IPV6 UDP as well as IPV4 UDP as its network transport. +config AF_RXRPC_INJECT_LOSS + bool "Inject packet loss into RxRPC packet stream" + depends on AF_RXRPC + help + Say Y here to inject packet loss by discarding some received and some + transmitted packets. + config AF_RXRPC_DEBUG bool "RxRPC dynamic debugging" diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 84bb16d47b85..7ac1edf3aac7 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -712,6 +712,14 @@ void rxrpc_data_ready(struct sock *udp_sk) skb_orphan(skb); sp = rxrpc_skb(skb); + if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { + static int lose; + if ((lose++ & 7) == 7) { + rxrpc_lose_skb(skb, rxrpc_skb_rx_lost); + return; + } + } + _net("Rx UDP packet from %08x:%04hu", ntohl(ip_hdr(skb)->saddr), ntohs(udp_hdr(skb)->source)); diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index a2cad5ce7416..16e18a94ffa6 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -225,6 +225,15 @@ int rxrpc_send_data_packet(struct rxrpc_connection *conn, struct sk_buff *skb) msg.msg_controllen = 0; msg.msg_flags = 0; + if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { + static int lose; + if ((lose++ & 7) == 7) { + rxrpc_lose_skb(skb, rxrpc_skb_tx_lost); + _leave(" = 0 [lose]"); + return 0; + } + } + /* send the packet with the don't fragment bit set if we currently * think it's small enough */ if (skb->len - sizeof(struct rxrpc_wire_header) < conn->params.peer->maxdata) { From 7e426671704d2266757dff9c4254b788561aa11e Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 15 Sep 2016 16:08:36 +0300 Subject: [PATCH 1116/1715] net/mlx5e: Single flow order-0 pages for Striding RQ To improve the memory consumption scheme, we omit the flow that demands and splits high-order pages in Striding RQ, and stay with a single Striding RQ flow that uses order-0 pages. Moving to fragmented memory allows the use of larger MPWQEs, which reduces the number of UMR posts and filler CQEs. Moving to a single flow allows several optimizations that improve performance, especially in production servers where we would anyway fallback to order-0 allocations: - inline functions that were called via function pointers. - improve the UMR post process. This patch alone is expected to give a slight performance reduction. However, the new memory scheme gives the possibility to use a page-cache of a fair size, that doesn't inflate the memory footprint, which will dramatically fix the reduction and even give a performance gain. Performance tests: The following results were measured on a freshly booted system, giving optimal baseline performance, as high-order pages are yet to be fragmented and depleted. We ran pktgen single-stream benchmarks, with iptables-raw-drop: Single stride, 64 bytes: * 4,739,057 - baseline * 4,749,550 - this patch no reduction Larger packets, no page cross, 1024 bytes: * 3,982,361 - baseline * 3,845,682 - this patch 3.5% reduction Larger packets, every 3rd packet crosses a page, 1500 bytes: * 3,731,189 - baseline * 3,579,414 - this patch 4% reduction Fixes: 461017cb006a ("net/mlx5e: Support RX multi-packet WQE (Striding RQ)") Fixes: bc77b240b3c5 ("net/mlx5e: Add fragmented memory support for RX multi packet WQE") Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 54 +--- .../net/ethernet/mellanox/mlx5/core/en_main.c | 136 ++++++-- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 292 +++--------------- .../ethernet/mellanox/mlx5/core/en_stats.h | 4 - .../net/ethernet/mellanox/mlx5/core/en_txrx.c | 2 +- 5 files changed, 184 insertions(+), 304 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index a9358cf7386a..401b2f7b165f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -62,12 +62,12 @@ #define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE 0xd #define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW 0x1 -#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW 0x4 +#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW 0x3 #define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW 0x6 #define MLX5_MPWRQ_LOG_STRIDE_SIZE 6 /* >= 6, HW restriction */ #define MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS 8 /* >= 6, HW restriction */ -#define MLX5_MPWRQ_LOG_WQE_SZ 17 +#define MLX5_MPWRQ_LOG_WQE_SZ 18 #define MLX5_MPWRQ_WQE_PAGE_ORDER (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \ MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT : 0) #define MLX5_MPWRQ_PAGES_PER_WQE BIT(MLX5_MPWRQ_WQE_PAGE_ORDER) @@ -293,8 +293,8 @@ struct mlx5e_rq { u32 wqe_sz; struct sk_buff **skb; struct mlx5e_mpw_info *wqe_info; + void *mtt_no_align; __be32 mkey_be; - __be32 umr_mkey_be; struct device *pdev; struct net_device *netdev; @@ -323,32 +323,15 @@ struct mlx5e_rq { struct mlx5e_umr_dma_info { __be64 *mtt; - __be64 *mtt_no_align; dma_addr_t mtt_addr; - struct mlx5e_dma_info *dma_info; + struct mlx5e_dma_info dma_info[MLX5_MPWRQ_PAGES_PER_WQE]; + struct mlx5e_umr_wqe wqe; }; struct mlx5e_mpw_info { - union { - struct mlx5e_dma_info dma_info; - struct mlx5e_umr_dma_info umr; - }; + struct mlx5e_umr_dma_info umr; u16 consumed_strides; u16 skbs_frags[MLX5_MPWRQ_PAGES_PER_WQE]; - - void (*dma_pre_sync)(struct device *pdev, - struct mlx5e_mpw_info *wi, - u32 wqe_offset, u32 len); - void (*add_skb_frag)(struct mlx5e_rq *rq, - struct sk_buff *skb, - struct mlx5e_mpw_info *wi, - u32 page_idx, u32 frag_offset, u32 len); - void (*copy_skb_header)(struct device *pdev, - struct sk_buff *skb, - struct mlx5e_mpw_info *wi, - u32 page_idx, u32 offset, - u32 headlen); - void (*free_wqe)(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi); }; struct mlx5e_tx_wqe_info { @@ -672,24 +655,11 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq); int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix); -int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix); +int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix); void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix); void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix); -void mlx5e_post_rx_fragmented_mpwqe(struct mlx5e_rq *rq); -void mlx5e_complete_rx_linear_mpwqe(struct mlx5e_rq *rq, - struct mlx5_cqe64 *cqe, - u16 byte_cnt, - struct mlx5e_mpw_info *wi, - struct sk_buff *skb); -void mlx5e_complete_rx_fragmented_mpwqe(struct mlx5e_rq *rq, - struct mlx5_cqe64 *cqe, - u16 byte_cnt, - struct mlx5e_mpw_info *wi, - struct sk_buff *skb); -void mlx5e_free_rx_linear_mpwqe(struct mlx5e_rq *rq, - struct mlx5e_mpw_info *wi); -void mlx5e_free_rx_fragmented_mpwqe(struct mlx5e_rq *rq, - struct mlx5e_mpw_info *wi); +void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq); +void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi); struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq); void mlx5e_rx_am(struct mlx5e_rq *rq); @@ -776,6 +746,12 @@ static inline void mlx5e_cq_arm(struct mlx5e_cq *cq) mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, NULL, cq->wq.cc); } +static inline u32 mlx5e_get_wqe_mtt_offset(struct mlx5e_rq *rq, u16 wqe_ix) +{ + return rq->mpwqe_mtt_offset + + wqe_ix * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8); +} + static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) { return min_t(int, mdev->priv.eq_table.num_comp_vectors, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index af4c61e6d589..136554b77c3b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -138,7 +138,6 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner; s->rx_wqe_err += rq_stats->wqe_err; s->rx_mpwqe_filler += rq_stats->mpwqe_filler; - s->rx_mpwqe_frag += rq_stats->mpwqe_frag; s->rx_buff_alloc_err += rq_stats->buff_alloc_err; s->rx_cqe_compress_blks += rq_stats->cqe_compress_blks; s->rx_cqe_compress_pkts += rq_stats->cqe_compress_pkts; @@ -295,6 +294,107 @@ static void mlx5e_disable_async_events(struct mlx5e_priv *priv) #define MLX5E_HW2SW_MTU(hwmtu) (hwmtu - (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)) #define MLX5E_SW2HW_MTU(swmtu) (swmtu + (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)) +static inline int mlx5e_get_wqe_mtt_sz(void) +{ + /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes. + * To avoid copying garbage after the mtt array, we allocate + * a little more. + */ + return ALIGN(MLX5_MPWRQ_PAGES_PER_WQE * sizeof(__be64), + MLX5_UMR_MTT_ALIGNMENT); +} + +static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, struct mlx5e_sq *sq, + struct mlx5e_umr_wqe *wqe, u16 ix) +{ + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl; + struct mlx5_wqe_data_seg *dseg = &wqe->data; + struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; + u8 ds_cnt = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS); + u32 umr_wqe_mtt_offset = mlx5e_get_wqe_mtt_offset(rq, ix); + + cseg->qpn_ds = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) | + ds_cnt); + cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; + cseg->imm = rq->mkey_be; + + ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN; + ucseg->klm_octowords = + cpu_to_be16(MLX5_MTT_OCTW(MLX5_MPWRQ_PAGES_PER_WQE)); + ucseg->bsf_octowords = + cpu_to_be16(MLX5_MTT_OCTW(umr_wqe_mtt_offset)); + ucseg->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); + + dseg->lkey = sq->mkey_be; + dseg->addr = cpu_to_be64(wi->umr.mtt_addr); +} + +static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, + struct mlx5e_channel *c) +{ + int wq_sz = mlx5_wq_ll_get_size(&rq->wq); + int mtt_sz = mlx5e_get_wqe_mtt_sz(); + int mtt_alloc = mtt_sz + MLX5_UMR_ALIGN - 1; + int i; + + rq->wqe_info = kzalloc_node(wq_sz * sizeof(*rq->wqe_info), + GFP_KERNEL, cpu_to_node(c->cpu)); + if (!rq->wqe_info) + goto err_out; + + /* We allocate more than mtt_sz as we will align the pointer */ + rq->mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, GFP_KERNEL, + cpu_to_node(c->cpu)); + if (unlikely(!rq->mtt_no_align)) + goto err_free_wqe_info; + + for (i = 0; i < wq_sz; i++) { + struct mlx5e_mpw_info *wi = &rq->wqe_info[i]; + + wi->umr.mtt = PTR_ALIGN(rq->mtt_no_align + i * mtt_alloc, + MLX5_UMR_ALIGN); + wi->umr.mtt_addr = dma_map_single(c->pdev, wi->umr.mtt, mtt_sz, + PCI_DMA_TODEVICE); + if (unlikely(dma_mapping_error(c->pdev, wi->umr.mtt_addr))) + goto err_unmap_mtts; + + mlx5e_build_umr_wqe(rq, &c->icosq, &wi->umr.wqe, i); + } + + return 0; + +err_unmap_mtts: + while (--i >= 0) { + struct mlx5e_mpw_info *wi = &rq->wqe_info[i]; + + dma_unmap_single(c->pdev, wi->umr.mtt_addr, mtt_sz, + PCI_DMA_TODEVICE); + } + kfree(rq->mtt_no_align); +err_free_wqe_info: + kfree(rq->wqe_info); + +err_out: + return -ENOMEM; +} + +static void mlx5e_rq_free_mpwqe_info(struct mlx5e_rq *rq) +{ + int wq_sz = mlx5_wq_ll_get_size(&rq->wq); + int mtt_sz = mlx5e_get_wqe_mtt_sz(); + int i; + + for (i = 0; i < wq_sz; i++) { + struct mlx5e_mpw_info *wi = &rq->wqe_info[i]; + + dma_unmap_single(rq->pdev, wi->umr.mtt_addr, mtt_sz, + PCI_DMA_TODEVICE); + } + kfree(rq->mtt_no_align); + kfree(rq->wqe_info); +} + static int mlx5e_create_rq(struct mlx5e_channel *c, struct mlx5e_rq_param *param, struct mlx5e_rq *rq) @@ -319,14 +419,16 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, wq_sz = mlx5_wq_ll_get_size(&rq->wq); + rq->wq_type = priv->params.rq_wq_type; + rq->pdev = c->pdev; + rq->netdev = c->netdev; + rq->tstamp = &priv->tstamp; + rq->channel = c; + rq->ix = c->ix; + rq->priv = c->priv; + switch (priv->params.rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - rq->wqe_info = kzalloc_node(wq_sz * sizeof(*rq->wqe_info), - GFP_KERNEL, cpu_to_node(c->cpu)); - if (!rq->wqe_info) { - err = -ENOMEM; - goto err_rq_wq_destroy; - } rq->handle_rx_cqe = mlx5e_handle_rx_cqe_mpwrq; rq->alloc_wqe = mlx5e_alloc_rx_mpwqe; rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe; @@ -338,6 +440,10 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, rq->mpwqe_num_strides = BIT(priv->params.mpwqe_log_num_strides); rq->wqe_sz = rq->mpwqe_stride_sz * rq->mpwqe_num_strides; byte_count = rq->wqe_sz; + rq->mkey_be = cpu_to_be32(c->priv->umr_mkey.key); + err = mlx5e_rq_alloc_mpwqe_info(rq, c); + if (err) + goto err_rq_wq_destroy; break; default: /* MLX5_WQ_TYPE_LINKED_LIST */ rq->skb = kzalloc_node(wq_sz * sizeof(*rq->skb), GFP_KERNEL, @@ -356,27 +462,19 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, rq->wqe_sz = SKB_DATA_ALIGN(rq->wqe_sz); byte_count = rq->wqe_sz; byte_count |= MLX5_HW_START_PADDING; + rq->mkey_be = c->mkey_be; } for (i = 0; i < wq_sz; i++) { struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i); wqe->data.byte_count = cpu_to_be32(byte_count); + wqe->data.lkey = rq->mkey_be; } INIT_WORK(&rq->am.work, mlx5e_rx_am_work); rq->am.mode = priv->params.rx_cq_period_mode; - rq->wq_type = priv->params.rq_wq_type; - rq->pdev = c->pdev; - rq->netdev = c->netdev; - rq->tstamp = &priv->tstamp; - rq->channel = c; - rq->ix = c->ix; - rq->priv = c->priv; - rq->mkey_be = c->mkey_be; - rq->umr_mkey_be = cpu_to_be32(c->priv->umr_mkey.key); - return 0; err_rq_wq_destroy: @@ -389,7 +487,7 @@ static void mlx5e_destroy_rq(struct mlx5e_rq *rq) { switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - kfree(rq->wqe_info); + mlx5e_rq_free_mpwqe_info(rq); break; default: /* MLX5_WQ_TYPE_LINKED_LIST */ kfree(rq->skb); @@ -528,7 +626,7 @@ static void mlx5e_free_rx_descs(struct mlx5e_rq *rq) /* UMR WQE (if in progress) is always at wq->head */ if (test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state)) - mlx5e_free_rx_fragmented_mpwqe(rq, &rq->wqe_info[wq->head]); + mlx5e_free_rx_mpwqe(rq, &rq->wqe_info[wq->head]); while (!mlx5_wq_ll_is_empty(wq)) { wqe_ix_be = *wq->tail_next; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index e7c969df3dad..5d1b7b5e4f36 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -200,7 +200,6 @@ int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) *((dma_addr_t *)skb->cb) = dma_addr; wqe->data.addr = cpu_to_be64(dma_addr); - wqe->data.lkey = rq->mkey_be; rq->skb[ix] = skb; @@ -231,44 +230,11 @@ static inline int mlx5e_mpwqe_strides_per_page(struct mlx5e_rq *rq) return rq->mpwqe_num_strides >> MLX5_MPWRQ_WQE_PAGE_ORDER; } -static inline void -mlx5e_dma_pre_sync_linear_mpwqe(struct device *pdev, - struct mlx5e_mpw_info *wi, - u32 wqe_offset, u32 len) -{ - dma_sync_single_for_cpu(pdev, wi->dma_info.addr + wqe_offset, - len, DMA_FROM_DEVICE); -} - -static inline void -mlx5e_dma_pre_sync_fragmented_mpwqe(struct device *pdev, - struct mlx5e_mpw_info *wi, - u32 wqe_offset, u32 len) -{ - /* No dma pre sync for fragmented MPWQE */ -} - -static inline void -mlx5e_add_skb_frag_linear_mpwqe(struct mlx5e_rq *rq, - struct sk_buff *skb, - struct mlx5e_mpw_info *wi, - u32 page_idx, u32 frag_offset, - u32 len) -{ - unsigned int truesize = ALIGN(len, rq->mpwqe_stride_sz); - - wi->skbs_frags[page_idx]++; - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, - &wi->dma_info.page[page_idx], frag_offset, - len, truesize); -} - -static inline void -mlx5e_add_skb_frag_fragmented_mpwqe(struct mlx5e_rq *rq, - struct sk_buff *skb, - struct mlx5e_mpw_info *wi, - u32 page_idx, u32 frag_offset, - u32 len) +static inline void mlx5e_add_skb_frag_mpwqe(struct mlx5e_rq *rq, + struct sk_buff *skb, + struct mlx5e_mpw_info *wi, + u32 page_idx, u32 frag_offset, + u32 len) { unsigned int truesize = ALIGN(len, rq->mpwqe_stride_sz); @@ -282,24 +248,11 @@ mlx5e_add_skb_frag_fragmented_mpwqe(struct mlx5e_rq *rq, } static inline void -mlx5e_copy_skb_header_linear_mpwqe(struct device *pdev, - struct sk_buff *skb, - struct mlx5e_mpw_info *wi, - u32 page_idx, u32 offset, - u32 headlen) -{ - struct page *page = &wi->dma_info.page[page_idx]; - - skb_copy_to_linear_data(skb, page_address(page) + offset, - ALIGN(headlen, sizeof(long))); -} - -static inline void -mlx5e_copy_skb_header_fragmented_mpwqe(struct device *pdev, - struct sk_buff *skb, - struct mlx5e_mpw_info *wi, - u32 page_idx, u32 offset, - u32 headlen) +mlx5e_copy_skb_header_mpwqe(struct device *pdev, + struct sk_buff *skb, + struct mlx5e_mpw_info *wi, + u32 page_idx, u32 offset, + u32 headlen) { u16 headlen_pg = min_t(u32, headlen, PAGE_SIZE - offset); struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[page_idx]; @@ -324,46 +277,9 @@ mlx5e_copy_skb_header_fragmented_mpwqe(struct device *pdev, } } -static u32 mlx5e_get_wqe_mtt_offset(struct mlx5e_rq *rq, u16 wqe_ix) +static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) { - return rq->mpwqe_mtt_offset + - wqe_ix * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8); -} - -static void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, - struct mlx5e_sq *sq, - struct mlx5e_umr_wqe *wqe, - u16 ix) -{ - struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; - struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl; - struct mlx5_wqe_data_seg *dseg = &wqe->data; struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; - u8 ds_cnt = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS); - u32 umr_wqe_mtt_offset = mlx5e_get_wqe_mtt_offset(rq, ix); - - memset(wqe, 0, sizeof(*wqe)); - cseg->opmod_idx_opcode = - cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | - MLX5_OPCODE_UMR); - cseg->qpn_ds = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) | - ds_cnt); - cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; - cseg->imm = rq->umr_mkey_be; - - ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN; - ucseg->klm_octowords = - cpu_to_be16(MLX5_MTT_OCTW(MLX5_MPWRQ_PAGES_PER_WQE)); - ucseg->bsf_octowords = - cpu_to_be16(MLX5_MTT_OCTW(umr_wqe_mtt_offset)); - ucseg->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); - - dseg->lkey = sq->mkey_be; - dseg->addr = cpu_to_be64(wi->umr.mtt_addr); -} - -static void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) -{ struct mlx5e_sq *sq = &rq->channel->icosq; struct mlx5_wq_cyc *wq = &sq->wq; struct mlx5e_umr_wqe *wqe; @@ -378,30 +294,22 @@ static void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) } wqe = mlx5_wq_cyc_get_wqe(wq, pi); - mlx5e_build_umr_wqe(rq, sq, wqe, ix); + memcpy(wqe, &wi->umr.wqe, sizeof(*wqe)); + wqe->ctrl.opmod_idx_opcode = + cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | + MLX5_OPCODE_UMR); + sq->ico_wqe_info[pi].opcode = MLX5_OPCODE_UMR; sq->ico_wqe_info[pi].num_wqebbs = num_wqebbs; sq->pc += num_wqebbs; mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); } -static inline int mlx5e_get_wqe_mtt_sz(void) +static inline int mlx5e_alloc_and_map_page(struct mlx5e_rq *rq, + struct mlx5e_mpw_info *wi, + int i) { - /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes. - * To avoid copying garbage after the mtt array, we allocate - * a little more. - */ - return ALIGN(MLX5_MPWRQ_PAGES_PER_WQE * sizeof(__be64), - MLX5_UMR_MTT_ALIGNMENT); -} - -static int mlx5e_alloc_and_map_page(struct mlx5e_rq *rq, - struct mlx5e_mpw_info *wi, - int i) -{ - struct page *page; - - page = dev_alloc_page(); + struct page *page = dev_alloc_page(); if (unlikely(!page)) return -ENOMEM; @@ -417,47 +325,25 @@ static int mlx5e_alloc_and_map_page(struct mlx5e_rq *rq, return 0; } -static int mlx5e_alloc_rx_fragmented_mpwqe(struct mlx5e_rq *rq, - struct mlx5e_rx_wqe *wqe, - u16 ix) +static int mlx5e_alloc_rx_umr_mpwqe(struct mlx5e_rq *rq, + struct mlx5e_rx_wqe *wqe, + u16 ix) { struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; - int mtt_sz = mlx5e_get_wqe_mtt_sz(); u64 dma_offset = (u64)mlx5e_get_wqe_mtt_offset(rq, ix) << PAGE_SHIFT; + int pg_strides = mlx5e_mpwqe_strides_per_page(rq); + int err; int i; - wi->umr.dma_info = kmalloc(sizeof(*wi->umr.dma_info) * - MLX5_MPWRQ_PAGES_PER_WQE, - GFP_ATOMIC); - if (unlikely(!wi->umr.dma_info)) - goto err_out; - - /* We allocate more than mtt_sz as we will align the pointer */ - wi->umr.mtt_no_align = kzalloc(mtt_sz + MLX5_UMR_ALIGN - 1, - GFP_ATOMIC); - if (unlikely(!wi->umr.mtt_no_align)) - goto err_free_umr; - - wi->umr.mtt = PTR_ALIGN(wi->umr.mtt_no_align, MLX5_UMR_ALIGN); - wi->umr.mtt_addr = dma_map_single(rq->pdev, wi->umr.mtt, mtt_sz, - PCI_DMA_TODEVICE); - if (unlikely(dma_mapping_error(rq->pdev, wi->umr.mtt_addr))) - goto err_free_mtt; - for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { - if (unlikely(mlx5e_alloc_and_map_page(rq, wi, i))) + err = mlx5e_alloc_and_map_page(rq, wi, i); + if (unlikely(err)) goto err_unmap; - page_ref_add(wi->umr.dma_info[i].page, - mlx5e_mpwqe_strides_per_page(rq)); + page_ref_add(wi->umr.dma_info[i].page, pg_strides); wi->skbs_frags[i] = 0; } wi->consumed_strides = 0; - wi->dma_pre_sync = mlx5e_dma_pre_sync_fragmented_mpwqe; - wi->add_skb_frag = mlx5e_add_skb_frag_fragmented_mpwqe; - wi->copy_skb_header = mlx5e_copy_skb_header_fragmented_mpwqe; - wi->free_wqe = mlx5e_free_rx_fragmented_mpwqe; - wqe->data.lkey = rq->umr_mkey_be; wqe->data.addr = cpu_to_be64(dma_offset); return 0; @@ -466,41 +352,28 @@ err_unmap: while (--i >= 0) { dma_unmap_page(rq->pdev, wi->umr.dma_info[i].addr, PAGE_SIZE, PCI_DMA_FROMDEVICE); - page_ref_sub(wi->umr.dma_info[i].page, - mlx5e_mpwqe_strides_per_page(rq)); + page_ref_sub(wi->umr.dma_info[i].page, pg_strides); put_page(wi->umr.dma_info[i].page); } - dma_unmap_single(rq->pdev, wi->umr.mtt_addr, mtt_sz, PCI_DMA_TODEVICE); -err_free_mtt: - kfree(wi->umr.mtt_no_align); - -err_free_umr: - kfree(wi->umr.dma_info); - -err_out: - return -ENOMEM; + return err; } -void mlx5e_free_rx_fragmented_mpwqe(struct mlx5e_rq *rq, - struct mlx5e_mpw_info *wi) +void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi) { - int mtt_sz = mlx5e_get_wqe_mtt_sz(); + int pg_strides = mlx5e_mpwqe_strides_per_page(rq); int i; for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { dma_unmap_page(rq->pdev, wi->umr.dma_info[i].addr, PAGE_SIZE, PCI_DMA_FROMDEVICE); page_ref_sub(wi->umr.dma_info[i].page, - mlx5e_mpwqe_strides_per_page(rq) - wi->skbs_frags[i]); + pg_strides - wi->skbs_frags[i]); put_page(wi->umr.dma_info[i].page); } - dma_unmap_single(rq->pdev, wi->umr.mtt_addr, mtt_sz, PCI_DMA_TODEVICE); - kfree(wi->umr.mtt_no_align); - kfree(wi->umr.dma_info); } -void mlx5e_post_rx_fragmented_mpwqe(struct mlx5e_rq *rq) +void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq) { struct mlx5_wq_ll *wq = &rq->wq; struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(wq, wq->head); @@ -508,12 +381,11 @@ void mlx5e_post_rx_fragmented_mpwqe(struct mlx5e_rq *rq) clear_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state); if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH, &rq->state))) { - mlx5e_free_rx_fragmented_mpwqe(rq, &rq->wqe_info[wq->head]); + mlx5e_free_rx_mpwqe(rq, &rq->wqe_info[wq->head]); return; } mlx5_wq_ll_push(wq, be16_to_cpu(wqe->next.next_wqe_index)); - rq->stats.mpwqe_frag++; /* ensure wqes are visible to device before updating doorbell record */ dma_wmb(); @@ -521,84 +393,23 @@ void mlx5e_post_rx_fragmented_mpwqe(struct mlx5e_rq *rq) mlx5_wq_ll_update_db_record(wq); } -static int mlx5e_alloc_rx_linear_mpwqe(struct mlx5e_rq *rq, - struct mlx5e_rx_wqe *wqe, - u16 ix) -{ - struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; - gfp_t gfp_mask; - int i; - - gfp_mask = GFP_ATOMIC | __GFP_COLD | __GFP_MEMALLOC; - wi->dma_info.page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, - MLX5_MPWRQ_WQE_PAGE_ORDER); - if (unlikely(!wi->dma_info.page)) - return -ENOMEM; - - wi->dma_info.addr = dma_map_page(rq->pdev, wi->dma_info.page, 0, - rq->wqe_sz, PCI_DMA_FROMDEVICE); - if (unlikely(dma_mapping_error(rq->pdev, wi->dma_info.addr))) { - put_page(wi->dma_info.page); - return -ENOMEM; - } - - /* We split the high-order page into order-0 ones and manage their - * reference counter to minimize the memory held by small skb fragments - */ - split_page(wi->dma_info.page, MLX5_MPWRQ_WQE_PAGE_ORDER); - for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { - page_ref_add(&wi->dma_info.page[i], - mlx5e_mpwqe_strides_per_page(rq)); - wi->skbs_frags[i] = 0; - } - - wi->consumed_strides = 0; - wi->dma_pre_sync = mlx5e_dma_pre_sync_linear_mpwqe; - wi->add_skb_frag = mlx5e_add_skb_frag_linear_mpwqe; - wi->copy_skb_header = mlx5e_copy_skb_header_linear_mpwqe; - wi->free_wqe = mlx5e_free_rx_linear_mpwqe; - wqe->data.lkey = rq->mkey_be; - wqe->data.addr = cpu_to_be64(wi->dma_info.addr); - - return 0; -} - -void mlx5e_free_rx_linear_mpwqe(struct mlx5e_rq *rq, - struct mlx5e_mpw_info *wi) -{ - int i; - - dma_unmap_page(rq->pdev, wi->dma_info.addr, rq->wqe_sz, - PCI_DMA_FROMDEVICE); - for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { - page_ref_sub(&wi->dma_info.page[i], - mlx5e_mpwqe_strides_per_page(rq) - wi->skbs_frags[i]); - put_page(&wi->dma_info.page[i]); - } -} - -int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) +int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) { int err; - err = mlx5e_alloc_rx_linear_mpwqe(rq, wqe, ix); - if (unlikely(err)) { - err = mlx5e_alloc_rx_fragmented_mpwqe(rq, wqe, ix); - if (unlikely(err)) - return err; - set_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state); - mlx5e_post_umr_wqe(rq, ix); - return -EBUSY; - } - - return 0; + err = mlx5e_alloc_rx_umr_mpwqe(rq, wqe, ix); + if (unlikely(err)) + return err; + set_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state); + mlx5e_post_umr_wqe(rq, ix); + return -EBUSY; } void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) { struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; - wi->free_wqe(rq, wi); + mlx5e_free_rx_mpwqe(rq, wi); } #define RQ_CANNOT_POST(rq) \ @@ -617,9 +428,10 @@ bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) int err; err = rq->alloc_wqe(rq, wqe, wq->head); + if (err == -EBUSY) + return true; if (unlikely(err)) { - if (err != -EBUSY) - rq->stats.buff_alloc_err++; + rq->stats.buff_alloc_err++; break; } @@ -831,7 +643,6 @@ static inline void mlx5e_mpwqe_fill_rx_skb(struct mlx5e_rq *rq, u32 cqe_bcnt, struct sk_buff *skb) { - u32 consumed_bytes = ALIGN(cqe_bcnt, rq->mpwqe_stride_sz); u16 stride_ix = mpwrq_get_cqe_stride_index(cqe); u32 wqe_offset = stride_ix * rq->mpwqe_stride_sz; u32 head_offset = wqe_offset & (PAGE_SIZE - 1); @@ -845,21 +656,20 @@ static inline void mlx5e_mpwqe_fill_rx_skb(struct mlx5e_rq *rq, page_idx++; frag_offset -= PAGE_SIZE; } - wi->dma_pre_sync(rq->pdev, wi, wqe_offset, consumed_bytes); while (byte_cnt) { u32 pg_consumed_bytes = min_t(u32, PAGE_SIZE - frag_offset, byte_cnt); - wi->add_skb_frag(rq, skb, wi, page_idx, frag_offset, - pg_consumed_bytes); + mlx5e_add_skb_frag_mpwqe(rq, skb, wi, page_idx, frag_offset, + pg_consumed_bytes); byte_cnt -= pg_consumed_bytes; frag_offset = 0; page_idx++; } /* copy header */ - wi->copy_skb_header(rq->pdev, skb, wi, head_page_idx, head_offset, - headlen); + mlx5e_copy_skb_header_mpwqe(rq->pdev, skb, wi, head_page_idx, + head_offset, headlen); /* skb linear part was allocated with headlen and aligned to long */ skb->tail += headlen; skb->len += headlen; @@ -904,7 +714,7 @@ mpwrq_cqe_out: if (likely(wi->consumed_strides < rq->mpwqe_num_strides)) return; - wi->free_wqe(rq, wi); + mlx5e_free_rx_mpwqe(rq, wi); mlx5_wq_ll_pop(&rq->wq, cqe->wqe_id, &wqe->next.next_wqe_index); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 499487ce3b53..1f56543feb63 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -73,7 +73,6 @@ struct mlx5e_sw_stats { u64 tx_xmit_more; u64 rx_wqe_err; u64 rx_mpwqe_filler; - u64 rx_mpwqe_frag; u64 rx_buff_alloc_err; u64 rx_cqe_compress_blks; u64 rx_cqe_compress_pkts; @@ -105,7 +104,6 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xmit_more) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_wqe_err) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_frag) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) }, @@ -274,7 +272,6 @@ struct mlx5e_rq_stats { u64 lro_bytes; u64 wqe_err; u64 mpwqe_filler; - u64 mpwqe_frag; u64 buff_alloc_err; u64 cqe_compress_blks; u64 cqe_compress_pkts; @@ -290,7 +287,6 @@ static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_bytes) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_filler) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_frag) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, buff_alloc_err) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_blks) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) }, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 9bf33bb69210..08d8b0c91f07 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -87,7 +87,7 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) case MLX5_OPCODE_NOP: break; case MLX5_OPCODE_UMR: - mlx5e_post_rx_fragmented_mpwqe(&sq->channel->rq); + mlx5e_post_rx_mpwqe(&sq->channel->rq); break; default: WARN_ONCE(true, From a5a0c590166e39fa399940775e7bfd8e1a9356da Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 15 Sep 2016 16:08:37 +0300 Subject: [PATCH 1117/1715] net/mlx5e: Introduce API for RX mapped pages Manage the allocation and deallocation of mapped RX pages only through dedicated API functions. Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 46 +++++++++++-------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 5d1b7b5e4f36..0c34daa04c43 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -305,26 +305,32 @@ static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); } -static inline int mlx5e_alloc_and_map_page(struct mlx5e_rq *rq, - struct mlx5e_mpw_info *wi, - int i) +static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) { struct page *page = dev_alloc_page(); + if (unlikely(!page)) return -ENOMEM; - wi->umr.dma_info[i].page = page; - wi->umr.dma_info[i].addr = dma_map_page(rq->pdev, page, 0, PAGE_SIZE, - PCI_DMA_FROMDEVICE); - if (unlikely(dma_mapping_error(rq->pdev, wi->umr.dma_info[i].addr))) { + dma_info->page = page; + dma_info->addr = dma_map_page(rq->pdev, page, 0, PAGE_SIZE, + DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) { put_page(page); return -ENOMEM; } - wi->umr.mtt[i] = cpu_to_be64(wi->umr.dma_info[i].addr | MLX5_EN_WR); return 0; } +static inline void mlx5e_page_release(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) +{ + dma_unmap_page(rq->pdev, dma_info->addr, PAGE_SIZE, DMA_FROM_DEVICE); + put_page(dma_info->page); +} + static int mlx5e_alloc_rx_umr_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) @@ -336,10 +342,13 @@ static int mlx5e_alloc_rx_umr_mpwqe(struct mlx5e_rq *rq, int i; for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { - err = mlx5e_alloc_and_map_page(rq, wi, i); + struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[i]; + + err = mlx5e_page_alloc_mapped(rq, dma_info); if (unlikely(err)) goto err_unmap; - page_ref_add(wi->umr.dma_info[i].page, pg_strides); + wi->umr.mtt[i] = cpu_to_be64(dma_info->addr | MLX5_EN_WR); + page_ref_add(dma_info->page, pg_strides); wi->skbs_frags[i] = 0; } @@ -350,10 +359,10 @@ static int mlx5e_alloc_rx_umr_mpwqe(struct mlx5e_rq *rq, err_unmap: while (--i >= 0) { - dma_unmap_page(rq->pdev, wi->umr.dma_info[i].addr, PAGE_SIZE, - PCI_DMA_FROMDEVICE); - page_ref_sub(wi->umr.dma_info[i].page, pg_strides); - put_page(wi->umr.dma_info[i].page); + struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[i]; + + page_ref_sub(dma_info->page, pg_strides); + mlx5e_page_release(rq, dma_info); } return err; @@ -365,11 +374,10 @@ void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi) int i; for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { - dma_unmap_page(rq->pdev, wi->umr.dma_info[i].addr, PAGE_SIZE, - PCI_DMA_FROMDEVICE); - page_ref_sub(wi->umr.dma_info[i].page, - pg_strides - wi->skbs_frags[i]); - put_page(wi->umr.dma_info[i].page); + struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[i]; + + page_ref_sub(dma_info->page, pg_strides - wi->skbs_frags[i]); + mlx5e_page_release(rq, dma_info); } } From 4415a0319f92ea0d624fe11c917faf9114f89187 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 15 Sep 2016 16:08:38 +0300 Subject: [PATCH 1118/1715] net/mlx5e: Implement RX mapped page cache for page recycle Instead of reallocating and mapping pages for RX data-path, recycle already used pages in a per ring cache. Performance tests: The following results were measured on a freshly booted system, giving optimal baseline performance, as high-order pages are yet to be fragmented and depleted. We ran pktgen single-stream benchmarks, with iptables-raw-drop: Single stride, 64 bytes: * 4,739,057 - baseline * 4,749,550 - order0 no cache * 4,786,899 - order0 with cache 1% gain Larger packets, no page cross, 1024 bytes: * 3,982,361 - baseline * 3,845,682 - order0 no cache * 4,127,852 - order0 with cache 3.7% gain Larger packets, every 3rd packet crosses a page, 1500 bytes: * 3,731,189 - baseline * 3,579,414 - order0 no cache * 3,931,708 - order0 with cache 5.4% gain Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 16 ++++++ .../net/ethernet/mellanox/mlx5/core/en_main.c | 15 +++++ .../net/ethernet/mellanox/mlx5/core/en_rx.c | 57 +++++++++++++++++-- .../ethernet/mellanox/mlx5/core/en_stats.h | 16 ++++++ 4 files changed, 99 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 401b2f7b165f..7dd4763e726e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -287,6 +287,18 @@ struct mlx5e_rx_am { /* Adaptive Moderation */ u8 tired; }; +/* a single cache unit is capable to serve one napi call (for non-striding rq) + * or a MPWQE (for striding rq). + */ +#define MLX5E_CACHE_UNIT (MLX5_MPWRQ_PAGES_PER_WQE > NAPI_POLL_WEIGHT ? \ + MLX5_MPWRQ_PAGES_PER_WQE : NAPI_POLL_WEIGHT) +#define MLX5E_CACHE_SIZE (2 * roundup_pow_of_two(MLX5E_CACHE_UNIT)) +struct mlx5e_page_cache { + u32 head; + u32 tail; + struct mlx5e_dma_info page_cache[MLX5E_CACHE_SIZE]; +}; + struct mlx5e_rq { /* data path */ struct mlx5_wq_ll wq; @@ -301,6 +313,8 @@ struct mlx5e_rq { struct mlx5e_tstamp *tstamp; struct mlx5e_rq_stats stats; struct mlx5e_cq cq; + struct mlx5e_page_cache page_cache; + mlx5e_fp_handle_rx_cqe handle_rx_cqe; mlx5e_fp_alloc_wqe alloc_wqe; mlx5e_fp_dealloc_wqe dealloc_wqe; @@ -651,6 +665,8 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget); int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); void mlx5e_free_tx_descs(struct mlx5e_sq *sq); +void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, + bool recycle); void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 136554b77c3b..8595b507e200 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -141,6 +141,10 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) s->rx_buff_alloc_err += rq_stats->buff_alloc_err; s->rx_cqe_compress_blks += rq_stats->cqe_compress_blks; s->rx_cqe_compress_pkts += rq_stats->cqe_compress_pkts; + s->rx_cache_reuse += rq_stats->cache_reuse; + s->rx_cache_full += rq_stats->cache_full; + s->rx_cache_empty += rq_stats->cache_empty; + s->rx_cache_busy += rq_stats->cache_busy; for (j = 0; j < priv->params.num_tc; j++) { sq_stats = &priv->channel[i]->sq[j].stats; @@ -475,6 +479,9 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, INIT_WORK(&rq->am.work, mlx5e_rx_am_work); rq->am.mode = priv->params.rx_cq_period_mode; + rq->page_cache.head = 0; + rq->page_cache.tail = 0; + return 0; err_rq_wq_destroy: @@ -485,6 +492,8 @@ err_rq_wq_destroy: static void mlx5e_destroy_rq(struct mlx5e_rq *rq) { + int i; + switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: mlx5e_rq_free_mpwqe_info(rq); @@ -493,6 +502,12 @@ static void mlx5e_destroy_rq(struct mlx5e_rq *rq) kfree(rq->skb); } + for (i = rq->page_cache.head; i != rq->page_cache.tail; + i = (i + 1) & (MLX5E_CACHE_SIZE - 1)) { + struct mlx5e_dma_info *dma_info = &rq->page_cache.page_cache[i]; + + mlx5e_page_release(rq, dma_info, false); + } mlx5_wq_destroy(&rq->wq_ctrl); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 0c34daa04c43..dc8677933f76 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -305,11 +305,55 @@ static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); } +static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) +{ + struct mlx5e_page_cache *cache = &rq->page_cache; + u32 tail_next = (cache->tail + 1) & (MLX5E_CACHE_SIZE - 1); + + if (tail_next == cache->head) { + rq->stats.cache_full++; + return false; + } + + cache->page_cache[cache->tail] = *dma_info; + cache->tail = tail_next; + return true; +} + +static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) +{ + struct mlx5e_page_cache *cache = &rq->page_cache; + + if (unlikely(cache->head == cache->tail)) { + rq->stats.cache_empty++; + return false; + } + + if (page_ref_count(cache->page_cache[cache->head].page) != 1) { + rq->stats.cache_busy++; + return false; + } + + *dma_info = cache->page_cache[cache->head]; + cache->head = (cache->head + 1) & (MLX5E_CACHE_SIZE - 1); + rq->stats.cache_reuse++; + + dma_sync_single_for_device(rq->pdev, dma_info->addr, PAGE_SIZE, + DMA_FROM_DEVICE); + return true; +} + static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info) { - struct page *page = dev_alloc_page(); + struct page *page; + if (mlx5e_rx_cache_get(rq, dma_info)) + return 0; + + page = dev_alloc_page(); if (unlikely(!page)) return -ENOMEM; @@ -324,9 +368,12 @@ static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq, return 0; } -static inline void mlx5e_page_release(struct mlx5e_rq *rq, - struct mlx5e_dma_info *dma_info) +void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, + bool recycle) { + if (likely(recycle) && mlx5e_rx_cache_put(rq, dma_info)) + return; + dma_unmap_page(rq->pdev, dma_info->addr, PAGE_SIZE, DMA_FROM_DEVICE); put_page(dma_info->page); } @@ -362,7 +409,7 @@ err_unmap: struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[i]; page_ref_sub(dma_info->page, pg_strides); - mlx5e_page_release(rq, dma_info); + mlx5e_page_release(rq, dma_info, true); } return err; @@ -377,7 +424,7 @@ void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi) struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[i]; page_ref_sub(dma_info->page, pg_strides - wi->skbs_frags[i]); - mlx5e_page_release(rq, dma_info); + mlx5e_page_release(rq, dma_info, true); } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 1f56543feb63..6af8d79e8c2a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -76,6 +76,10 @@ struct mlx5e_sw_stats { u64 rx_buff_alloc_err; u64 rx_cqe_compress_blks; u64 rx_cqe_compress_pkts; + u64 rx_cache_reuse; + u64 rx_cache_full; + u64 rx_cache_empty; + u64 rx_cache_busy; /* Special handling counters */ u64 link_down_events_phy; @@ -107,6 +111,10 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_reuse) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_full) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_empty) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_busy) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, link_down_events_phy) }, }; @@ -275,6 +283,10 @@ struct mlx5e_rq_stats { u64 buff_alloc_err; u64 cqe_compress_blks; u64 cqe_compress_pkts; + u64 cache_reuse; + u64 cache_full; + u64 cache_empty; + u64 cache_busy; }; static const struct counter_desc rq_stats_desc[] = { @@ -290,6 +302,10 @@ static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, buff_alloc_err) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_blks) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_reuse) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_full) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_empty) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_busy) }, }; struct mlx5e_sq_stats { From 3613b3dbd1ade9a6a626dae1f608c57638eb5e8a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 15 Sep 2016 09:33:02 -0700 Subject: [PATCH 1119/1715] tcp: prepare skbs for better sack shifting With large BDP TCP flows and lossy networks, it is very important to keep a low number of skbs in the write queue. RACK and SACK processing can perform a linear scan of it. We should avoid putting any payload in skb->head, so that SACK shifting can be done if needed. With this patch, we allow to pack ~0.5 MB per skb instead of the 64KB initially cooked at tcp_sendmsg() time. This gives a reduction of number of skbs in write queue by eight. tcp_rack_detect_loss() likes this. We still allow payload in skb->head for first skb put in the queue, to not impact RPC workloads. Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a13fcb369f52..7dae800092e6 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1020,17 +1020,31 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset, } EXPORT_SYMBOL(tcp_sendpage); -static inline int select_size(const struct sock *sk, bool sg) +/* Do not bother using a page frag for very small frames. + * But use this heuristic only for the first skb in write queue. + * + * Having no payload in skb->head allows better SACK shifting + * in tcp_shift_skb_data(), reducing sack/rack overhead, because + * write queue has less skbs. + * Each skb can hold up to MAX_SKB_FRAGS * 32Kbytes, or ~0.5 MB. + * This also speeds up tso_fragment(), since it wont fallback + * to tcp_fragment(). + */ +static int linear_payload_sz(bool first_skb) +{ + if (first_skb) + return SKB_WITH_OVERHEAD(2048 - MAX_TCP_HEADER); + return 0; +} + +static int select_size(const struct sock *sk, bool sg, bool first_skb) { const struct tcp_sock *tp = tcp_sk(sk); int tmp = tp->mss_cache; if (sg) { if (sk_can_gso(sk)) { - /* Small frames wont use a full page: - * Payload will immediately follow tcp header. - */ - tmp = SKB_WITH_OVERHEAD(2048 - MAX_TCP_HEADER); + tmp = linear_payload_sz(first_skb); } else { int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER); @@ -1161,6 +1175,8 @@ restart: } if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) { + bool first_skb; + new_segment: /* Allocate new segment. If the interface is SG, * allocate skb fitting to single page. @@ -1172,10 +1188,11 @@ new_segment: process_backlog = false; goto restart; } + first_skb = skb_queue_empty(&sk->sk_write_queue); skb = sk_stream_alloc_skb(sk, - select_size(sk, sg), + select_size(sk, sg, first_skb), sk->sk_allocation, - skb_queue_empty(&sk->sk_write_queue)); + first_skb); if (!skb) goto wait_for_memory; From e1fb9d0389e5386151de32b64624896e2b621e1a Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 15 Sep 2016 10:13:47 -0700 Subject: [PATCH 1120/1715] net: vrf: Remove RT_FL_TOS No longer used after d66f6c0a8f3c0 ("net: ipv4: Remove l3mdev_get_saddr") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 55674b0e65b7..85c271c70d42 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -37,9 +37,6 @@ #include #include -#define RT_FL_TOS(oldflp4) \ - ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK)) - #define DRV_NAME "vrf" #define DRV_VERSION "1.0" From 19664c6a000956290cce84c6924b13488ab794d6 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 15 Sep 2016 10:18:45 -0700 Subject: [PATCH 1121/1715] net: l3mdev: Remove netif_index_is_l3_master No longer used after e0d56fdd73422 ("net: l3mdev: remove redundant calls") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/l3mdev.h | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 3832099289c5..b220dabeab45 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -114,25 +114,6 @@ static inline u32 l3mdev_fib_table(const struct net_device *dev) return tb_id; } -static inline bool netif_index_is_l3_master(struct net *net, int ifindex) -{ - struct net_device *dev; - bool rc = false; - - if (ifindex == 0) - return false; - - rcu_read_lock(); - - dev = dev_get_by_index_rcu(net, ifindex); - if (dev) - rc = netif_is_l3_master(dev); - - rcu_read_unlock(); - - return rc; -} - struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6); static inline @@ -226,11 +207,6 @@ static inline u32 l3mdev_fib_table_by_index(struct net *net, int ifindex) return 0; } -static inline bool netif_index_is_l3_master(struct net *net, int ifindex) -{ - return false; -} - static inline struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6) { From 5ff904d55da821fd194ff493f2928d134ce5b67a Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Thu, 15 Sep 2016 18:51:25 +0100 Subject: [PATCH 1122/1715] llc: switch type to bool as the timeout is only tested versus 0 (As asked by Dave in Februrary) Signed-off-by: Alan Cox Signed-off-by: David S. Miller --- net/llc/af_llc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 8ae3ed97d95c..db916cf51ffe 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -38,7 +38,7 @@ static u16 llc_ui_sap_link_no_max[256]; static struct sockaddr_llc llc_ui_addrnull; static const struct proto_ops llc_ui_ops; -static long llc_ui_wait_for_conn(struct sock *sk, long timeout); +static bool llc_ui_wait_for_conn(struct sock *sk, long timeout); static int llc_ui_wait_for_disc(struct sock *sk, long timeout); static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout); @@ -551,7 +551,7 @@ static int llc_ui_wait_for_disc(struct sock *sk, long timeout) return rc; } -static long llc_ui_wait_for_conn(struct sock *sk, long timeout) +static bool llc_ui_wait_for_conn(struct sock *sk, long timeout) { DEFINE_WAIT(wait); From 66f58ec486389e8d3f7ebe8cfc3883a72e808eb9 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Thu, 15 Sep 2016 22:23:24 +0200 Subject: [PATCH 1123/1715] hisilicon: constify net_device_ops structures Check for net_device_ops structures that are only stored in the netdev_ops field of a net_device structure. This field is declared const, so net_device_ops structures that have this property can be declared as const also. The semantic patch that makes this change is as follows: (http://coccinelle.lip6.fr/) // @r disable optional_qualifier@ identifier i; position p; @@ static struct net_device_ops i@p = { ... }; @ok@ identifier r.i; struct net_device e; position p; @@ e.netdev_ops = &i@p; @bad@ position p != {r.p,ok.p}; identifier r.i; struct net_device_ops e; @@ e@i@p @depends on !bad disable optional_qualifier@ identifier r.i; @@ static +const struct net_device_ops i = { ... }; // The result of size on this file before the change is: text data bss dec hex filename 7995 848 8 8851 2293 drivers/net/ethernet/hisilicon/hip04_eth.o and after the change it is: text data bss dec hex filename 8571 256 8 8835 2283 drivers/net/ethernet/hisilicon/hip04_eth.o Signed-off-by: Julia Lawall Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hip04_eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c index a90ab40e55af..415ffa1509d5 100644 --- a/drivers/net/ethernet/hisilicon/hip04_eth.c +++ b/drivers/net/ethernet/hisilicon/hip04_eth.c @@ -761,7 +761,7 @@ static const struct ethtool_ops hip04_ethtool_ops = { .get_drvinfo = hip04_get_drvinfo, }; -static struct net_device_ops hip04_netdev_ops = { +static const struct net_device_ops hip04_netdev_ops = { .ndo_open = hip04_mac_open, .ndo_stop = hip04_mac_stop, .ndo_get_stats = hip04_get_stats, From 373075049c7be28bed875494488c185ef5eeb938 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Thu, 15 Sep 2016 22:23:25 +0200 Subject: [PATCH 1124/1715] dwc_eth_qos: constify net_device_ops structures Check for net_device_ops structures that are only stored in the netdev_ops field of a net_device structure. This field is declared const, so net_device_ops structures that have this property can be declared as const also. The semantic patch that makes this change is as follows: (http://coccinelle.lip6.fr/) // @r disable optional_qualifier@ identifier i; position p; @@ static struct net_device_ops i@p = { ... }; @ok@ identifier r.i; struct net_device e; position p; @@ e.netdev_ops = &i@p; @bad@ position p != {r.p,ok.p}; identifier r.i; struct net_device_ops e; @@ e@i@p @depends on !bad disable optional_qualifier@ identifier r.i; @@ static +const struct net_device_ops i = { ... }; // The result of size on this file before the change is: text data bss dec hex filename 21623 1316 40 22979 59c3 drivers/net/ethernet/synopsys/dwc_eth_qos.o and after the change it is: text data bss dec hex filename 22199 724 40 22963 59b3 drivers/net/ethernet/synopsys/dwc_eth_qos.o Signed-off-by: Julia Lawall Signed-off-by: David S. Miller --- drivers/net/ethernet/synopsys/dwc_eth_qos.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/synopsys/dwc_eth_qos.c b/drivers/net/ethernet/synopsys/dwc_eth_qos.c index ef26f588eeba..0d0053128542 100644 --- a/drivers/net/ethernet/synopsys/dwc_eth_qos.c +++ b/drivers/net/ethernet/synopsys/dwc_eth_qos.c @@ -2761,7 +2761,7 @@ static const struct ethtool_ops dwceqos_ethtool_ops = { .set_link_ksettings = phy_ethtool_set_link_ksettings, }; -static struct net_device_ops netdev_ops = { +static const struct net_device_ops netdev_ops = { .ndo_open = dwceqos_open, .ndo_stop = dwceqos_stop, .ndo_start_xmit = dwceqos_start_xmit, From eb94737d711913a23e466b99c0d9ffdf15651290 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Thu, 15 Sep 2016 22:23:26 +0200 Subject: [PATCH 1125/1715] l2tp: constify net_device_ops structures Check for net_device_ops structures that are only stored in the netdev_ops field of a net_device structure. This field is declared const, so net_device_ops structures that have this property can be declared as const also. The semantic patch that makes this change is as follows: (http://coccinelle.lip6.fr/) // @r disable optional_qualifier@ identifier i; position p; @@ static struct net_device_ops i@p = { ... }; @ok@ identifier r.i; struct net_device e; position p; @@ e.netdev_ops = &i@p; @bad@ position p != {r.p,ok.p}; identifier r.i; struct net_device_ops e; @@ e@i@p @depends on !bad disable optional_qualifier@ identifier r.i; @@ static +const struct net_device_ops i = { ... }; // The result of size on this file before the change is: text data bss dec hex filename 3401 931 44 4376 1118 net/l2tp/l2tp_eth.o and after the change it is: text data bss dec hex filename 3993 347 44 4384 1120 net/l2tp/l2tp_eth.o Signed-off-by: Julia Lawall Signed-off-by: David S. Miller --- net/l2tp/l2tp_eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index ef2cd30ca06e..965f7e344cef 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -121,7 +121,7 @@ static struct rtnl_link_stats64 *l2tp_eth_get_stats64(struct net_device *dev, } -static struct net_device_ops l2tp_eth_netdev_ops = { +static const struct net_device_ops l2tp_eth_netdev_ops = { .ndo_init = l2tp_eth_dev_init, .ndo_uninit = l2tp_eth_dev_uninit, .ndo_start_xmit = l2tp_eth_dev_xmit, From cfc7381b3002756b1dcada32979e942aa3126e31 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 15 Sep 2016 13:00:29 -0700 Subject: [PATCH 1126/1715] ip_tunnel: add collect_md mode to IPIP tunnel Similar to gre, vxlan, geneve tunnels allow IPIP tunnels to operate in 'collect metadata' mode. bpf_skb_[gs]et_tunnel_key() helpers can make use of it right away. ovs can use it as well in the future (once appropriate ovs-vport abstractions and user apis are added). Note that just like in other tunnels we cannot cache the dst, since tunnel_info metadata can be different for every packet. Signed-off-by: Alexei Starovoitov Acked-by: Thomas Graf Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 2 + include/uapi/linux/if_tunnel.h | 1 + net/ipv4/ip_tunnel.c | 76 ++++++++++++++++++++++++++++++++++ net/ipv4/ipip.c | 35 +++++++++++++--- 4 files changed, 108 insertions(+), 6 deletions(-) diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index e598c639aa6f..59557c07904b 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -255,6 +255,8 @@ void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops); void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params, const u8 protocol); +void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, + const u8 proto); int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd); int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict); int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu); diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index 9865c8caedde..18d5dc13985d 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -73,6 +73,7 @@ enum { IFLA_IPTUN_ENCAP_FLAGS, IFLA_IPTUN_ENCAP_SPORT, IFLA_IPTUN_ENCAP_DPORT, + IFLA_IPTUN_COLLECT_METADATA, __IFLA_IPTUN_MAX, }; #define IFLA_IPTUN_MAX (__IFLA_IPTUN_MAX - 1) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 95649ebd2874..5719d6ba0824 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -55,6 +55,7 @@ #include #include #include +#include #if IS_ENABLED(CONFIG_IPV6) #include @@ -546,6 +547,81 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, return 0; } +void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + u32 headroom = sizeof(struct iphdr); + struct ip_tunnel_info *tun_info; + const struct ip_tunnel_key *key; + const struct iphdr *inner_iph; + struct rtable *rt; + struct flowi4 fl4; + __be16 df = 0; + u8 tos, ttl; + + tun_info = skb_tunnel_info(skb); + if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || + ip_tunnel_info_af(tun_info) != AF_INET)) + goto tx_error; + key = &tun_info->key; + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + inner_iph = (const struct iphdr *)skb_inner_network_header(skb); + tos = key->tos; + if (tos == 1) { + if (skb->protocol == htons(ETH_P_IP)) + tos = inner_iph->tos; + else if (skb->protocol == htons(ETH_P_IPV6)) + tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); + } + init_tunnel_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0, + RT_TOS(tos), tunnel->parms.link); + if (tunnel->encap.type != TUNNEL_ENCAP_NONE) + goto tx_error; + rt = ip_route_output_key(tunnel->net, &fl4); + if (IS_ERR(rt)) { + dev->stats.tx_carrier_errors++; + goto tx_error; + } + if (rt->dst.dev == dev) { + ip_rt_put(rt); + dev->stats.collisions++; + goto tx_error; + } + tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); + ttl = key->ttl; + if (ttl == 0) { + if (skb->protocol == htons(ETH_P_IP)) + ttl = inner_iph->ttl; + else if (skb->protocol == htons(ETH_P_IPV6)) + ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit; + else + ttl = ip4_dst_hoplimit(&rt->dst); + } + if (key->tun_flags & TUNNEL_DONT_FRAGMENT) + df = htons(IP_DF); + else if (skb->protocol == htons(ETH_P_IP)) + df = inner_iph->frag_off & htons(IP_DF); + headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len; + if (headroom > dev->needed_headroom) + dev->needed_headroom = headroom; + + if (skb_cow_head(skb, dev->needed_headroom)) { + ip_rt_put(rt); + goto tx_dropped; + } + iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, key->tos, + key->ttl, df, !net_eq(tunnel->net, dev_net(dev))); + return; +tx_error: + dev->stats.tx_errors++; + goto kfree; +tx_dropped: + dev->stats.tx_dropped++; +kfree: + kfree_skb(skb); +} +EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit); + void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params, u8 protocol) { diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 4ae3f8e6c6cc..c9392589c415 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -115,6 +115,7 @@ #include #include #include +#include static bool log_ecn_error = true; module_param(log_ecn_error, bool, 0644); @@ -193,6 +194,7 @@ static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto) { struct net *net = dev_net(skb->dev); struct ip_tunnel_net *itn = net_generic(net, ipip_net_id); + struct metadata_dst *tun_dst = NULL; struct ip_tunnel *tunnel; const struct iphdr *iph; @@ -216,7 +218,12 @@ static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto) tpi = &ipip_tpi; if (iptunnel_pull_header(skb, 0, tpi->proto, false)) goto drop; - return ip_tunnel_rcv(tunnel, skb, tpi, NULL, log_ecn_error); + if (tunnel->collect_md) { + tun_dst = ip_tun_rx_dst(skb, 0, 0, 0); + if (!tun_dst) + return 0; + } + return ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error); } return -1; @@ -270,7 +277,10 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, skb_set_inner_ipproto(skb, ipproto); - ip_tunnel_xmit(skb, dev, tiph, ipproto); + if (tunnel->collect_md) + ip_md_tunnel_xmit(skb, dev, ipproto); + else + ip_tunnel_xmit(skb, dev, tiph, ipproto); return NETDEV_TX_OK; tx_error: @@ -380,13 +390,14 @@ static int ipip_tunnel_validate(struct nlattr *tb[], struct nlattr *data[]) } static void ipip_netlink_parms(struct nlattr *data[], - struct ip_tunnel_parm *parms) + struct ip_tunnel_parm *parms, bool *collect_md) { memset(parms, 0, sizeof(*parms)); parms->iph.version = 4; parms->iph.protocol = IPPROTO_IPIP; parms->iph.ihl = 5; + *collect_md = false; if (!data) return; @@ -414,6 +425,9 @@ static void ipip_netlink_parms(struct nlattr *data[], if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC])) parms->iph.frag_off = htons(IP_DF); + + if (data[IFLA_IPTUN_COLLECT_METADATA]) + *collect_md = true; } /* This function returns true when ENCAP attributes are present in the nl msg */ @@ -453,18 +467,18 @@ static bool ipip_netlink_encap_parms(struct nlattr *data[], static int ipip_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { + struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_parm p; struct ip_tunnel_encap ipencap; if (ipip_netlink_encap_parms(data, &ipencap)) { - struct ip_tunnel *t = netdev_priv(dev); int err = ip_tunnel_encap_setup(t, &ipencap); if (err < 0) return err; } - ipip_netlink_parms(data, &p); + ipip_netlink_parms(data, &p, &t->collect_md); return ip_tunnel_newlink(dev, tb, &p); } @@ -473,6 +487,7 @@ static int ipip_changelink(struct net_device *dev, struct nlattr *tb[], { struct ip_tunnel_parm p; struct ip_tunnel_encap ipencap; + bool collect_md; if (ipip_netlink_encap_parms(data, &ipencap)) { struct ip_tunnel *t = netdev_priv(dev); @@ -482,7 +497,9 @@ static int ipip_changelink(struct net_device *dev, struct nlattr *tb[], return err; } - ipip_netlink_parms(data, &p); + ipip_netlink_parms(data, &p, &collect_md); + if (collect_md) + return -EINVAL; if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) || (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr)) @@ -516,6 +533,8 @@ static size_t ipip_get_size(const struct net_device *dev) nla_total_size(2) + /* IFLA_IPTUN_ENCAP_DPORT */ nla_total_size(2) + + /* IFLA_IPTUN_COLLECT_METADATA */ + nla_total_size(0) + 0; } @@ -544,6 +563,9 @@ static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev) tunnel->encap.flags)) goto nla_put_failure; + if (tunnel->collect_md) + if (nla_put_flag(skb, IFLA_IPTUN_COLLECT_METADATA)) + goto nla_put_failure; return 0; nla_put_failure: @@ -562,6 +584,7 @@ static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = { [IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 }, [IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 }, [IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 }, + [IFLA_IPTUN_COLLECT_METADATA] = { .type = NLA_FLAG }, }; static struct rtnl_link_ops ipip_link_ops __read_mostly = { From 8d79266bc48c6ab6477d04e159cabf1e7809cb72 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 15 Sep 2016 13:00:30 -0700 Subject: [PATCH 1127/1715] ip6_tunnel: add collect_md mode to IPv6 tunnels Similar to gre, vxlan, geneve tunnels allow IPIP6 and IP6IP6 tunnels to operate in 'collect metadata' mode. Unlike ipv4 code here it's possible to reuse ip6_tnl_xmit() function for both collect_md and traditional tunnels. bpf_skb_[gs]et_tunnel_key() helpers and ovs (in the future) are the users. Signed-off-by: Alexei Starovoitov Acked-by: Thomas Graf Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/net/ip6_tunnel.h | 1 + net/ipv6/ip6_tunnel.c | 186 ++++++++++++++++++++++++++++----------- 2 files changed, 138 insertions(+), 49 deletions(-) diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index 43a5a0e4524c..20ed9699fcd4 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -23,6 +23,7 @@ struct __ip6_tnl_parm { __u8 proto; /* tunnel protocol */ __u8 encap_limit; /* encapsulation limit for tunnel */ __u8 hop_limit; /* hop limit for tunnel */ + bool collect_md; __be32 flowinfo; /* traffic class and flowlabel for tunnel */ __u32 flags; /* tunnel flags */ struct in6_addr laddr; /* local tunnel end-point address */ diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 5c5779720ef1..6a66adba0c22 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -57,6 +57,7 @@ #include #include #include +#include MODULE_AUTHOR("Ville Nuorvala"); MODULE_DESCRIPTION("IPv6 tunneling device"); @@ -90,6 +91,7 @@ struct ip6_tnl_net { struct ip6_tnl __rcu *tnls_r_l[IP6_TUNNEL_HASH_SIZE]; struct ip6_tnl __rcu *tnls_wc[1]; struct ip6_tnl __rcu **tnls[2]; + struct ip6_tnl __rcu *collect_md_tun; }; static struct net_device_stats *ip6_get_stats(struct net_device *dev) @@ -166,6 +168,10 @@ ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_ return t; } + t = rcu_dereference(ip6n->collect_md_tun); + if (t) + return t; + t = rcu_dereference(ip6n->tnls_wc[0]); if (t && (t->dev->flags & IFF_UP)) return t; @@ -209,6 +215,8 @@ ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) { struct ip6_tnl __rcu **tp = ip6_tnl_bucket(ip6n, &t->parms); + if (t->parms.collect_md) + rcu_assign_pointer(ip6n->collect_md_tun, t); rcu_assign_pointer(t->next , rtnl_dereference(*tp)); rcu_assign_pointer(*tp, t); } @@ -224,6 +232,9 @@ ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) struct ip6_tnl __rcu **tp; struct ip6_tnl *iter; + if (t->parms.collect_md) + rcu_assign_pointer(ip6n->collect_md_tun, NULL); + for (tp = ip6_tnl_bucket(ip6n, &t->parms); (iter = rtnl_dereference(*tp)) != NULL; tp = &iter->next) { @@ -829,6 +840,9 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev))); + if (tun_dst) + skb_dst_set(skb, (struct dst_entry *)tun_dst); + gro_cells_receive(&tunnel->gro_cells, skb); return 0; @@ -865,6 +879,7 @@ static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto, { struct ip6_tnl *t; const struct ipv6hdr *ipv6h = ipv6_hdr(skb); + struct metadata_dst *tun_dst = NULL; int ret = -1; rcu_read_lock(); @@ -881,7 +896,12 @@ static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto, goto drop; if (iptunnel_pull_header(skb, 0, tpi->proto, false)) goto drop; - ret = __ip6_tnl_rcv(t, skb, tpi, NULL, dscp_ecn_decapsulate, + if (t->parms.collect_md) { + tun_dst = ipv6_tun_rx_dst(skb, 0, 0, 0); + if (!tun_dst) + return 0; + } + ret = __ip6_tnl_rcv(t, skb, tpi, tun_dst, dscp_ecn_decapsulate, log_ecn_error); } @@ -1012,8 +1032,16 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, int mtu; unsigned int psh_hlen = sizeof(struct ipv6hdr) + t->encap_hlen; unsigned int max_headroom = psh_hlen; + u8 hop_limit; int err = -1; + if (t->parms.collect_md) { + hop_limit = skb_tunnel_info(skb)->key.ttl; + goto route_lookup; + } else { + hop_limit = t->parms.hop_limit; + } + /* NBMA tunnel */ if (ipv6_addr_any(&t->parms.raddr)) { struct in6_addr *addr6; @@ -1043,6 +1071,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, goto tx_err_link_failure; if (!dst) { +route_lookup: dst = ip6_route_output(net, NULL, fl6); if (dst->error) @@ -1053,6 +1082,10 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, dst = NULL; goto tx_err_link_failure; } + if (t->parms.collect_md && + ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev, + &fl6->daddr, 0, &fl6->saddr)) + goto tx_err_link_failure; ndst = dst; } @@ -1071,7 +1104,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, } if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; - if (skb_dst(skb)) + if (skb_dst(skb) && !t->parms.collect_md) skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); if (skb->len > mtu && !skb_is_gso(skb)) { *pmtu = mtu; @@ -1111,8 +1144,13 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, skb = new_skb; } - if (!fl6->flowi6_mark && ndst) - dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr); + if (t->parms.collect_md) { + if (t->encap.type != TUNNEL_ENCAP_NONE) + goto tx_err_dst_release; + } else { + if (!fl6->flowi6_mark && ndst) + dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr); + } skb_dst_set(skb, dst); if (encap_limit >= 0) { @@ -1137,7 +1175,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, ipv6h = ipv6_hdr(skb); ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), ip6_make_flowlabel(net, skb, fl6->flowlabel, true, fl6)); - ipv6h->hop_limit = t->parms.hop_limit; + ipv6h->hop_limit = hop_limit; ipv6h->nexthdr = proto; ipv6h->saddr = fl6->saddr; ipv6h->daddr = fl6->daddr; @@ -1170,19 +1208,34 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) if (tproto != IPPROTO_IPIP && tproto != 0) return -1; - if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) - encap_limit = t->parms.encap_limit; - - memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_IPIP; - dsfield = ipv4_get_dsfield(iph); - if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) - fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT) - & IPV6_TCLASS_MASK; - if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) - fl6.flowi6_mark = skb->mark; + if (t->parms.collect_md) { + struct ip_tunnel_info *tun_info; + const struct ip_tunnel_key *key; + + tun_info = skb_tunnel_info(skb); + if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || + ip_tunnel_info_af(tun_info) != AF_INET6)) + return -1; + key = &tun_info->key; + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_IPIP; + fl6.daddr = key->u.ipv6.dst; + fl6.flowlabel = key->label; + } else { + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + encap_limit = t->parms.encap_limit; + + memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_IPIP; + + if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) + fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT) + & IPV6_TCLASS_MASK; + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) + fl6.flowi6_mark = skb->mark; + } if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) return -1; @@ -1220,29 +1273,47 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) ip6_tnl_addr_conflict(t, ipv6h)) return -1; - offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); - if (offset > 0) { - struct ipv6_tlv_tnl_enc_lim *tel; - tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset]; - if (tel->encap_limit == 0) { - icmpv6_send(skb, ICMPV6_PARAMPROB, - ICMPV6_HDR_FIELD, offset + 2); - return -1; - } - encap_limit = tel->encap_limit - 1; - } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) - encap_limit = t->parms.encap_limit; - - memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_IPV6; - dsfield = ipv6_get_dsfield(ipv6h); - if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) - fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK); - if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) - fl6.flowlabel |= ip6_flowlabel(ipv6h); - if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) - fl6.flowi6_mark = skb->mark; + + if (t->parms.collect_md) { + struct ip_tunnel_info *tun_info; + const struct ip_tunnel_key *key; + + tun_info = skb_tunnel_info(skb); + if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || + ip_tunnel_info_af(tun_info) != AF_INET6)) + return -1; + key = &tun_info->key; + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_IPV6; + fl6.daddr = key->u.ipv6.dst; + fl6.flowlabel = key->label; + } else { + offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); + if (offset > 0) { + struct ipv6_tlv_tnl_enc_lim *tel; + + tel = (void *)&skb_network_header(skb)[offset]; + if (tel->encap_limit == 0) { + icmpv6_send(skb, ICMPV6_PARAMPROB, + ICMPV6_HDR_FIELD, offset + 2); + return -1; + } + encap_limit = tel->encap_limit - 1; + } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) { + encap_limit = t->parms.encap_limit; + } + + memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_IPV6; + + if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) + fl6.flowlabel |= (*(__be32 *)ipv6h & IPV6_TCLASS_MASK); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) + fl6.flowlabel |= ip6_flowlabel(ipv6h); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) + fl6.flowi6_mark = skb->mark; + } if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) return -1; @@ -1741,6 +1812,10 @@ static int ip6_tnl_dev_init(struct net_device *dev) if (err) return err; ip6_tnl_link_config(t); + if (t->parms.collect_md) { + dev->features |= NETIF_F_NETNS_LOCAL; + netif_keep_dst(dev); + } return 0; } @@ -1811,6 +1886,9 @@ static void ip6_tnl_netlink_parms(struct nlattr *data[], if (data[IFLA_IPTUN_PROTO]) parms->proto = nla_get_u8(data[IFLA_IPTUN_PROTO]); + + if (data[IFLA_IPTUN_COLLECT_METADATA]) + parms->collect_md = true; } static bool ip6_tnl_netlink_encap_parms(struct nlattr *data[], @@ -1850,6 +1928,7 @@ static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { struct net *net = dev_net(dev); + struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); struct ip6_tnl *nt, *t; struct ip_tunnel_encap ipencap; @@ -1864,9 +1943,14 @@ static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev, ip6_tnl_netlink_parms(data, &nt->parms); - t = ip6_tnl_locate(net, &nt->parms, 0); - if (!IS_ERR(t)) - return -EEXIST; + if (nt->parms.collect_md) { + if (rtnl_dereference(ip6n->collect_md_tun)) + return -EEXIST; + } else { + t = ip6_tnl_locate(net, &nt->parms, 0); + if (!IS_ERR(t)) + return -EEXIST; + } return ip6_tnl_create2(dev); } @@ -1890,6 +1974,8 @@ static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[], return err; } ip6_tnl_netlink_parms(data, &p); + if (p.collect_md) + return -EINVAL; t = ip6_tnl_locate(net, &p, 0); if (!IS_ERR(t)) { @@ -1937,6 +2023,8 @@ static size_t ip6_tnl_get_size(const struct net_device *dev) nla_total_size(2) + /* IFLA_IPTUN_ENCAP_DPORT */ nla_total_size(2) + + /* IFLA_IPTUN_COLLECT_METADATA */ + nla_total_size(0) + 0; } @@ -1955,16 +2043,15 @@ static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto)) goto nla_put_failure; - if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE, - tunnel->encap.type) || - nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT, - tunnel->encap.sport) || - nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT, - tunnel->encap.dport) || - nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS, - tunnel->encap.flags)) + if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE, tunnel->encap.type) || + nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT, tunnel->encap.sport) || + nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT, tunnel->encap.dport) || + nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS, tunnel->encap.flags)) goto nla_put_failure; + if (parm->collect_md) + if (nla_put_flag(skb, IFLA_IPTUN_COLLECT_METADATA)) + goto nla_put_failure; return 0; nla_put_failure: @@ -1992,6 +2079,7 @@ static const struct nla_policy ip6_tnl_policy[IFLA_IPTUN_MAX + 1] = { [IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 }, [IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 }, [IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 }, + [IFLA_IPTUN_COLLECT_METADATA] = { .type = NLA_FLAG }, }; static struct rtnl_link_ops ip6_link_ops __read_mostly = { From a1c82704d13fd0d0ab0eb10d33a9bb7af83c90e3 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 15 Sep 2016 13:00:31 -0700 Subject: [PATCH 1128/1715] samples/bpf: extend test_tunnel_bpf.sh with IPIP test extend existing tests for vxlan, geneve, gre to include IPIP tunnel. It tests both traditional tunnel configuration and dynamic via bpf helpers. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- samples/bpf/tcbpf2_kern.c | 58 ++++++++++++++++++++++++++++++++++ samples/bpf/test_tunnel_bpf.sh | 56 +++++++++++++++++++++++++++----- 2 files changed, 106 insertions(+), 8 deletions(-) diff --git a/samples/bpf/tcbpf2_kern.c b/samples/bpf/tcbpf2_kern.c index 7a15289da6cc..c1917d968fb4 100644 --- a/samples/bpf/tcbpf2_kern.c +++ b/samples/bpf/tcbpf2_kern.c @@ -1,4 +1,5 @@ /* Copyright (c) 2016 VMware + * Copyright (c) 2016 Facebook * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -188,4 +189,61 @@ int _geneve_get_tunnel(struct __sk_buff *skb) return TC_ACT_OK; } +SEC("ipip_set_tunnel") +int _ipip_set_tunnel(struct __sk_buff *skb) +{ + struct bpf_tunnel_key key = {}; + void *data = (void *)(long)skb->data; + struct iphdr *iph = data; + struct tcphdr *tcp = data + sizeof(*iph); + void *data_end = (void *)(long)skb->data_end; + int ret; + + /* single length check */ + if (data + sizeof(*iph) + sizeof(*tcp) > data_end) { + ERROR(1); + return TC_ACT_SHOT; + } + + key.tunnel_ttl = 64; + if (iph->protocol == IPPROTO_ICMP) { + key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ + } else { + if (iph->protocol != IPPROTO_TCP || iph->ihl != 5) + return TC_ACT_SHOT; + + if (tcp->dest == htons(5200)) + key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ + else if (tcp->dest == htons(5201)) + key.remote_ipv4 = 0xac100165; /* 172.16.1.101 */ + else + return TC_ACT_SHOT; + } + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("ipip_get_tunnel") +int _ipip_get_tunnel(struct __sk_buff *skb) +{ + int ret; + struct bpf_tunnel_key key; + char fmt[] = "remote ip 0x%x\n"; + + ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + bpf_trace_printk(fmt, sizeof(fmt), key.remote_ipv4); + return TC_ACT_OK; +} + char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/test_tunnel_bpf.sh b/samples/bpf/test_tunnel_bpf.sh index 4956589a83ae..1ff634f187b7 100755 --- a/samples/bpf/test_tunnel_bpf.sh +++ b/samples/bpf/test_tunnel_bpf.sh @@ -9,15 +9,13 @@ # local 172.16.1.200 remote 172.16.1.100 # veth1 IP: 172.16.1.200, tunnel dev 11 -set -e - function config_device { ip netns add at_ns0 ip link add veth0 type veth peer name veth1 ip link set veth0 netns at_ns0 ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0 ip netns exec at_ns0 ip link set dev veth0 up - ip link set dev veth1 up + ip link set dev veth1 up mtu 1500 ip addr add dev veth1 172.16.1.200/24 } @@ -67,6 +65,19 @@ function add_geneve_tunnel { ip addr add dev $DEV 10.1.1.200/24 } +function add_ipip_tunnel { + # in namespace + ip netns exec at_ns0 \ + ip link add dev $DEV_NS type $TYPE local 172.16.1.100 remote 172.16.1.200 + ip netns exec at_ns0 ip link set dev $DEV_NS up + ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 + + # out of namespace + ip link add dev $DEV type $TYPE external + ip link set dev $DEV up + ip addr add dev $DEV 10.1.1.200/24 +} + function attach_bpf { DEV=$1 SET_TUNNEL=$2 @@ -85,6 +96,7 @@ function test_gre { attach_bpf $DEV gre_set_tunnel gre_get_tunnel ping -c 1 10.1.1.100 ip netns exec at_ns0 ping -c 1 10.1.1.200 + cleanup } function test_vxlan { @@ -96,6 +108,7 @@ function test_vxlan { attach_bpf $DEV vxlan_set_tunnel vxlan_get_tunnel ping -c 1 10.1.1.100 ip netns exec at_ns0 ping -c 1 10.1.1.200 + cleanup } function test_geneve { @@ -107,21 +120,48 @@ function test_geneve { attach_bpf $DEV geneve_set_tunnel geneve_get_tunnel ping -c 1 10.1.1.100 ip netns exec at_ns0 ping -c 1 10.1.1.200 + cleanup +} + +function test_ipip { + TYPE=ipip + DEV_NS=ipip00 + DEV=ipip11 + config_device + tcpdump -nei veth1 & + cat /sys/kernel/debug/tracing/trace_pipe & + add_ipip_tunnel + ethtool -K veth1 gso off gro off rx off tx off + ip link set dev veth1 mtu 1500 + attach_bpf $DEV ipip_set_tunnel ipip_get_tunnel + ping -c 1 10.1.1.100 + ip netns exec at_ns0 ping -c 1 10.1.1.200 + ip netns exec at_ns0 iperf -sD -p 5200 > /dev/null + sleep 0.2 + iperf -c 10.1.1.100 -n 5k -p 5200 + cleanup } function cleanup { + set +ex + pkill iperf ip netns delete at_ns0 ip link del veth1 - ip link del $DEV + ip link del ipip11 + ip link del gretap11 + ip link del geneve11 + pkill tcpdump + pkill cat + set -ex } +cleanup echo "Testing GRE tunnel..." test_gre -cleanup echo "Testing VXLAN tunnel..." test_vxlan -cleanup echo "Testing GENEVE tunnel..." test_geneve -cleanup -echo "Success" +echo "Testing IPIP tunnel..." +test_ipip +echo "*** PASS ***" From 173ca26e9b5136faa82dee37c77cbfb36974d079 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 15 Sep 2016 13:00:32 -0700 Subject: [PATCH 1129/1715] samples/bpf: add comprehensive ipip, ipip6, ip6ip6 test the test creates 3 namespaces with veth connected via bridge. First two namespaces simulate two different hosts with the same IPv4 and IPv6 addresses configured on the tunnel interface and they communicate with outside world via standard tunnels. Third namespace creates collect_md tunnel that is driven by BPF program which selects different remote host (either first or second namespace) based on tcp dest port number while tcp dst ip is the same. This scenario is rough approximation of load balancer use case. The tests check both traditional tunnel configuration and collect_md mode. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- samples/bpf/tcbpf2_kern.c | 132 ++++++++++++++++++++++++++++ samples/bpf/test_ipip.sh | 178 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 310 insertions(+) create mode 100755 samples/bpf/test_ipip.sh diff --git a/samples/bpf/tcbpf2_kern.c b/samples/bpf/tcbpf2_kern.c index c1917d968fb4..3303bb85593b 100644 --- a/samples/bpf/tcbpf2_kern.c +++ b/samples/bpf/tcbpf2_kern.c @@ -9,12 +9,15 @@ #include #include #include +#include #include #include #include #include +#include #include "bpf_helpers.h" +#define _htonl __builtin_bswap32 #define ERROR(ret) do {\ char fmt[] = "ERROR line:%d ret:%d\n";\ bpf_trace_printk(fmt, sizeof(fmt), __LINE__, ret); \ @@ -246,4 +249,133 @@ int _ipip_get_tunnel(struct __sk_buff *skb) return TC_ACT_OK; } +SEC("ipip6_set_tunnel") +int _ipip6_set_tunnel(struct __sk_buff *skb) +{ + struct bpf_tunnel_key key = {}; + void *data = (void *)(long)skb->data; + struct iphdr *iph = data; + struct tcphdr *tcp = data + sizeof(*iph); + void *data_end = (void *)(long)skb->data_end; + int ret; + + /* single length check */ + if (data + sizeof(*iph) + sizeof(*tcp) > data_end) { + ERROR(1); + return TC_ACT_SHOT; + } + + key.remote_ipv6[0] = _htonl(0x2401db00); + key.tunnel_ttl = 64; + + if (iph->protocol == IPPROTO_ICMP) { + key.remote_ipv6[3] = _htonl(1); + } else { + if (iph->protocol != IPPROTO_TCP || iph->ihl != 5) { + ERROR(iph->protocol); + return TC_ACT_SHOT; + } + + if (tcp->dest == htons(5200)) { + key.remote_ipv6[3] = _htonl(1); + } else if (tcp->dest == htons(5201)) { + key.remote_ipv6[3] = _htonl(2); + } else { + ERROR(tcp->dest); + return TC_ACT_SHOT; + } + } + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("ipip6_get_tunnel") +int _ipip6_get_tunnel(struct __sk_buff *skb) +{ + int ret; + struct bpf_tunnel_key key; + char fmt[] = "remote ip6 %x::%x\n"; + + ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + bpf_trace_printk(fmt, sizeof(fmt), _htonl(key.remote_ipv6[0]), + _htonl(key.remote_ipv6[3])); + return TC_ACT_OK; +} + +SEC("ip6ip6_set_tunnel") +int _ip6ip6_set_tunnel(struct __sk_buff *skb) +{ + struct bpf_tunnel_key key = {}; + void *data = (void *)(long)skb->data; + struct ipv6hdr *iph = data; + struct tcphdr *tcp = data + sizeof(*iph); + void *data_end = (void *)(long)skb->data_end; + int ret; + + /* single length check */ + if (data + sizeof(*iph) + sizeof(*tcp) > data_end) { + ERROR(1); + return TC_ACT_SHOT; + } + + key.remote_ipv6[0] = _htonl(0x2401db00); + key.tunnel_ttl = 64; + + if (iph->nexthdr == NEXTHDR_ICMP) { + key.remote_ipv6[3] = _htonl(1); + } else { + if (iph->nexthdr != NEXTHDR_TCP) { + ERROR(iph->nexthdr); + return TC_ACT_SHOT; + } + + if (tcp->dest == htons(5200)) { + key.remote_ipv6[3] = _htonl(1); + } else if (tcp->dest == htons(5201)) { + key.remote_ipv6[3] = _htonl(2); + } else { + ERROR(tcp->dest); + return TC_ACT_SHOT; + } + } + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("ip6ip6_get_tunnel") +int _ip6ip6_get_tunnel(struct __sk_buff *skb) +{ + int ret; + struct bpf_tunnel_key key; + char fmt[] = "remote ip6 %x::%x\n"; + + ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + bpf_trace_printk(fmt, sizeof(fmt), _htonl(key.remote_ipv6[0]), + _htonl(key.remote_ipv6[3])); + return TC_ACT_OK; +} + + char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/test_ipip.sh b/samples/bpf/test_ipip.sh new file mode 100755 index 000000000000..196925403ab4 --- /dev/null +++ b/samples/bpf/test_ipip.sh @@ -0,0 +1,178 @@ +#!/bin/bash + +function config_device { + ip netns add at_ns0 + ip netns add at_ns1 + ip netns add at_ns2 + ip link add veth0 type veth peer name veth0b + ip link add veth1 type veth peer name veth1b + ip link add veth2 type veth peer name veth2b + ip link set veth0b up + ip link set veth1b up + ip link set veth2b up + ip link set dev veth0b mtu 1500 + ip link set dev veth1b mtu 1500 + ip link set dev veth2b mtu 1500 + ip link set veth0 netns at_ns0 + ip link set veth1 netns at_ns1 + ip link set veth2 netns at_ns2 + ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0 + ip netns exec at_ns0 ip addr add 2401:db00::1/64 dev veth0 nodad + ip netns exec at_ns0 ip link set dev veth0 up + ip netns exec at_ns1 ip addr add 172.16.1.101/24 dev veth1 + ip netns exec at_ns1 ip addr add 2401:db00::2/64 dev veth1 nodad + ip netns exec at_ns1 ip link set dev veth1 up + ip netns exec at_ns2 ip addr add 172.16.1.200/24 dev veth2 + ip netns exec at_ns2 ip addr add 2401:db00::3/64 dev veth2 nodad + ip netns exec at_ns2 ip link set dev veth2 up + ip link add br0 type bridge + ip link set br0 up + ip link set dev br0 mtu 1500 + ip link set veth0b master br0 + ip link set veth1b master br0 + ip link set veth2b master br0 +} + +function add_ipip_tunnel { + ip netns exec at_ns0 \ + ip link add dev $DEV_NS type ipip local 172.16.1.100 remote 172.16.1.200 + ip netns exec at_ns0 ip link set dev $DEV_NS up + ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 + ip netns exec at_ns1 \ + ip link add dev $DEV_NS type ipip local 172.16.1.101 remote 172.16.1.200 + ip netns exec at_ns1 ip link set dev $DEV_NS up + # same inner IP address in at_ns0 and at_ns1 + ip netns exec at_ns1 ip addr add dev $DEV_NS 10.1.1.100/24 + + ip netns exec at_ns2 ip link add dev $DEV type ipip external + ip netns exec at_ns2 ip link set dev $DEV up + ip netns exec at_ns2 ip addr add dev $DEV 10.1.1.200/24 +} + +function add_ipip6_tunnel { + ip netns exec at_ns0 \ + ip link add dev $DEV_NS type ip6tnl mode ipip6 local 2401:db00::1/64 remote 2401:db00::3/64 + ip netns exec at_ns0 ip link set dev $DEV_NS up + ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 + ip netns exec at_ns1 \ + ip link add dev $DEV_NS type ip6tnl mode ipip6 local 2401:db00::2/64 remote 2401:db00::3/64 + ip netns exec at_ns1 ip link set dev $DEV_NS up + # same inner IP address in at_ns0 and at_ns1 + ip netns exec at_ns1 ip addr add dev $DEV_NS 10.1.1.100/24 + + ip netns exec at_ns2 ip link add dev $DEV type ip6tnl mode ipip6 external + ip netns exec at_ns2 ip link set dev $DEV up + ip netns exec at_ns2 ip addr add dev $DEV 10.1.1.200/24 +} + +function add_ip6ip6_tunnel { + ip netns exec at_ns0 \ + ip link add dev $DEV_NS type ip6tnl mode ip6ip6 local 2401:db00::1/64 remote 2401:db00::3/64 + ip netns exec at_ns0 ip link set dev $DEV_NS up + ip netns exec at_ns0 ip addr add dev $DEV_NS 2601:646::1/64 + ip netns exec at_ns1 \ + ip link add dev $DEV_NS type ip6tnl mode ip6ip6 local 2401:db00::2/64 remote 2401:db00::3/64 + ip netns exec at_ns1 ip link set dev $DEV_NS up + # same inner IP address in at_ns0 and at_ns1 + ip netns exec at_ns1 ip addr add dev $DEV_NS 2601:646::1/64 + + ip netns exec at_ns2 ip link add dev $DEV type ip6tnl mode ip6ip6 external + ip netns exec at_ns2 ip link set dev $DEV up + ip netns exec at_ns2 ip addr add dev $DEV 2601:646::2/64 +} + +function attach_bpf { + DEV=$1 + SET_TUNNEL=$2 + GET_TUNNEL=$3 + ip netns exec at_ns2 tc qdisc add dev $DEV clsact + ip netns exec at_ns2 tc filter add dev $DEV egress bpf da obj tcbpf2_kern.o sec $SET_TUNNEL + ip netns exec at_ns2 tc filter add dev $DEV ingress bpf da obj tcbpf2_kern.o sec $GET_TUNNEL +} + +function test_ipip { + DEV_NS=ipip_std + DEV=ipip_bpf + config_device +# tcpdump -nei br0 & + cat /sys/kernel/debug/tracing/trace_pipe & + + add_ipip_tunnel + attach_bpf $DEV ipip_set_tunnel ipip_get_tunnel + + ip netns exec at_ns0 ping -c 1 10.1.1.200 + ip netns exec at_ns2 ping -c 1 10.1.1.100 + ip netns exec at_ns0 iperf -sD -p 5200 > /dev/null + ip netns exec at_ns1 iperf -sD -p 5201 > /dev/null + sleep 0.2 + # tcp check _same_ IP over different tunnels + ip netns exec at_ns2 iperf -c 10.1.1.100 -n 5k -p 5200 + ip netns exec at_ns2 iperf -c 10.1.1.100 -n 5k -p 5201 + cleanup +} + +# IPv4 over IPv6 tunnel +function test_ipip6 { + DEV_NS=ipip_std + DEV=ipip_bpf + config_device +# tcpdump -nei br0 & + cat /sys/kernel/debug/tracing/trace_pipe & + + add_ipip6_tunnel + attach_bpf $DEV ipip6_set_tunnel ipip6_get_tunnel + + ip netns exec at_ns0 ping -c 1 10.1.1.200 + ip netns exec at_ns2 ping -c 1 10.1.1.100 + ip netns exec at_ns0 iperf -sD -p 5200 > /dev/null + ip netns exec at_ns1 iperf -sD -p 5201 > /dev/null + sleep 0.2 + # tcp check _same_ IP over different tunnels + ip netns exec at_ns2 iperf -c 10.1.1.100 -n 5k -p 5200 + ip netns exec at_ns2 iperf -c 10.1.1.100 -n 5k -p 5201 + cleanup +} + +# IPv6 over IPv6 tunnel +function test_ip6ip6 { + DEV_NS=ipip_std + DEV=ipip_bpf + config_device +# tcpdump -nei br0 & + cat /sys/kernel/debug/tracing/trace_pipe & + + add_ip6ip6_tunnel + attach_bpf $DEV ip6ip6_set_tunnel ip6ip6_get_tunnel + + ip netns exec at_ns0 ping -6 -c 1 2601:646::2 + ip netns exec at_ns2 ping -6 -c 1 2601:646::1 + ip netns exec at_ns0 iperf -6sD -p 5200 > /dev/null + ip netns exec at_ns1 iperf -6sD -p 5201 > /dev/null + sleep 0.2 + # tcp check _same_ IP over different tunnels + ip netns exec at_ns2 iperf -6c 2601:646::1 -n 5k -p 5200 + ip netns exec at_ns2 iperf -6c 2601:646::1 -n 5k -p 5201 + cleanup +} + +function cleanup { + set +ex + pkill iperf + ip netns delete at_ns0 + ip netns delete at_ns1 + ip netns delete at_ns2 + ip link del veth0 + ip link del veth1 + ip link del veth2 + ip link del br0 + pkill tcpdump + pkill cat + set -ex +} + +cleanup +echo "Testing IP tunnels..." +test_ipip +test_ipip6 +test_ip6ip6 +echo "*** PASS ***" From f1785fbf7c0bc17211c299a647ebc38968a42181 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Tue, 13 Sep 2016 15:03:15 -0400 Subject: [PATCH 1130/1715] rtl8xxxu: Implement 8192e specific power down sequence This powers down the 8192e correctly, or at least to the point where the firmware will load again, when reloading the driver module. Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- .../realtek/rtl8xxxu/rtl8xxxu_8192e.c | 144 +++++++++++++++++- .../wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h | 1 + 2 files changed, 144 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c index 841522ee6379..df54d27e7851 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c @@ -1396,6 +1396,114 @@ exit: return ret; } +static int rtl8192eu_active_to_lps(struct rtl8xxxu_priv *priv) +{ + struct device *dev = &priv->udev->dev; + u8 val8; + u16 val16; + u32 val32; + int retry, retval; + + rtl8xxxu_write8(priv, REG_TXPAUSE, 0xff); + + retry = 100; + retval = -EBUSY; + /* + * Poll 32 bit wide 0x05f8 for 0x00000000 to ensure no TX is pending. + */ + do { + val32 = rtl8xxxu_read32(priv, REG_SCH_TX_CMD); + if (!val32) { + retval = 0; + break; + } + } while (retry--); + + if (!retry) { + dev_warn(dev, "Failed to flush TX queue\n"); + retval = -EBUSY; + goto out; + } + + /* Disable CCK and OFDM, clock gated */ + val8 = rtl8xxxu_read8(priv, REG_SYS_FUNC); + val8 &= ~SYS_FUNC_BBRSTB; + rtl8xxxu_write8(priv, REG_SYS_FUNC, val8); + + udelay(2); + + /* Reset whole BB */ + val8 = rtl8xxxu_read8(priv, REG_SYS_FUNC); + val8 &= ~SYS_FUNC_BB_GLB_RSTN; + rtl8xxxu_write8(priv, REG_SYS_FUNC, val8); + + /* Reset MAC TRX */ + val16 = rtl8xxxu_read16(priv, REG_CR); + val16 &= 0xff00; + val16 |= (CR_HCI_TXDMA_ENABLE | CR_HCI_RXDMA_ENABLE); + rtl8xxxu_write16(priv, REG_CR, val16); + + val16 = rtl8xxxu_read16(priv, REG_CR); + val16 &= ~CR_SECURITY_ENABLE; + rtl8xxxu_write16(priv, REG_CR, val16); + + val8 = rtl8xxxu_read8(priv, REG_DUAL_TSF_RST); + val8 |= DUAL_TSF_TX_OK; + rtl8xxxu_write8(priv, REG_DUAL_TSF_RST, val8); + +out: + return retval; +} + +static int rtl8192eu_active_to_emu(struct rtl8xxxu_priv *priv) +{ + u8 val8; + int count, ret = 0; + + /* Turn off RF */ + rtl8xxxu_write8(priv, REG_RF_CTRL, 0); + + /* Switch DPDT_SEL_P output from register 0x65[2] */ + val8 = rtl8xxxu_read8(priv, REG_LEDCFG2); + val8 &= ~LEDCFG2_DPDT_SELECT; + rtl8xxxu_write8(priv, REG_LEDCFG2, val8); + + /* 0x0005[1] = 1 turn off MAC by HW state machine*/ + val8 = rtl8xxxu_read8(priv, REG_APS_FSMCO + 1); + val8 |= BIT(1); + rtl8xxxu_write8(priv, REG_APS_FSMCO + 1, val8); + + for (count = RTL8XXXU_MAX_REG_POLL; count; count--) { + val8 = rtl8xxxu_read8(priv, REG_APS_FSMCO + 1); + if ((val8 & BIT(1)) == 0) + break; + udelay(10); + } + + if (!count) { + dev_warn(&priv->udev->dev, "%s: Disabling MAC timed out\n", + __func__); + ret = -EBUSY; + goto exit; + } + +exit: + return ret; +} + +static int rtl8192eu_emu_to_disabled(struct rtl8xxxu_priv *priv) +{ + u8 val8; + + /* 0x04[12:11] = 01 enable WL suspend */ + val8 = rtl8xxxu_read8(priv, REG_APS_FSMCO + 1); + val8 &= ~(BIT(3) | BIT(4)); + val8 |= BIT(3); + rtl8xxxu_write8(priv, REG_APS_FSMCO + 1, val8); + + return 0; +} + static int rtl8192eu_power_on(struct rtl8xxxu_priv *priv) { u16 val16; @@ -1446,6 +1554,40 @@ exit: return ret; } +void rtl8192eu_power_off(struct rtl8xxxu_priv *priv) +{ + u8 val8; + u16 val16; + + rtl8xxxu_flush_fifo(priv); + + val8 = rtl8xxxu_read8(priv, REG_TX_REPORT_CTRL); + val8 &= ~TX_REPORT_CTRL_TIMER_ENABLE; + rtl8xxxu_write8(priv, REG_TX_REPORT_CTRL, val8); + + /* Turn off RF */ + rtl8xxxu_write8(priv, REG_RF_CTRL, 0x00); + + rtl8192eu_active_to_lps(priv); + + /* Reset Firmware if running in RAM */ + if (rtl8xxxu_read8(priv, REG_MCU_FW_DL) & MCU_FW_RAM_SEL) + rtl8xxxu_firmware_self_reset(priv); + + /* Reset MCU */ + val16 = rtl8xxxu_read16(priv, REG_SYS_FUNC); + val16 &= ~SYS_FUNC_CPU_ENABLE; + rtl8xxxu_write16(priv, REG_SYS_FUNC, val16); + + /* Reset MCU ready status */ + rtl8xxxu_write8(priv, REG_MCU_FW_DL, 0x00); + + rtl8xxxu_reset_8051(priv); + + rtl8192eu_active_to_emu(priv); + rtl8192eu_emu_to_disabled(priv); +} + static void rtl8192e_enable_rf(struct rtl8xxxu_priv *priv) { u32 val32; @@ -1487,7 +1629,7 @@ struct rtl8xxxu_fileops rtl8192eu_fops = { .parse_efuse = rtl8192eu_parse_efuse, .load_firmware = rtl8192eu_load_firmware, .power_on = rtl8192eu_power_on, - .power_off = rtl8xxxu_power_off, + .power_off = rtl8192eu_power_off, .reset_8051 = rtl8xxxu_reset_8051, .llt_init = rtl8xxxu_auto_llt_table, .init_phy_bb = rtl8192eu_init_phy_bb, diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h index 3555a2f24285..315ccfb2dff5 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h @@ -676,6 +676,7 @@ #define REG_SCH_TXCMD 0x05d0 /* define REG_FW_TSF_SYNC_CNT 0x04a0 */ +#define REG_SCH_TX_CMD 0x05f8 #define REG_FW_RESET_TSF_CNT_1 0x05fc #define REG_FW_RESET_TSF_CNT_0 0x05fd #define REG_FW_BCN_DIS_CNT 0x05fe From 92ca4f92eca7aa362d51f7657d3fea47861600ee Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Wed, 14 Sep 2016 08:42:36 -0400 Subject: [PATCH 1131/1715] mwifiex: fix error handling in mwifiex_create_custom_regdomain smatch reports: sta_cmdresp.c:1053 mwifiex_create_custom_regdomain() warn: possible memory leak of 'regd' Indeed, mwifiex_create_custom_regdomain() returns NULL in the case that channel is missing in the TLV without freeing regd. Moreover, some other error paths in this function return ERR_PTR values which are assigned without checking to the regd field in the mwifiex_adapter struct. The latter is only null-checked where used. Fix by freeing regd in the error path, and only update priv->adapter->regd if the returned pointer is valid. Cc: Amitkumar Karwar Cc: Nishant Sarmukadam Signed-off-by: Bob Copeland Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c b/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c index 3344a26bed03..8548027abf71 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c @@ -1049,8 +1049,10 @@ mwifiex_create_custom_regdomain(struct mwifiex_private *priv, enum nl80211_band band; chan = *buf++; - if (!chan) + if (!chan) { + kfree(regd); return NULL; + } chflags = *buf++; band = (chan <= 14) ? NL80211_BAND_2GHZ : NL80211_BAND_5GHZ; freq = ieee80211_channel_to_frequency(chan, band); @@ -1116,6 +1118,7 @@ static int mwifiex_ret_chan_region_cfg(struct mwifiex_private *priv, u16 action = le16_to_cpu(reg->action); u16 tlv, tlv_buf_len, tlv_buf_left; struct mwifiex_ie_types_header *head; + struct ieee80211_regdomain *regd; u8 *tlv_buf; if (action != HostCmd_ACT_GEN_GET) @@ -1137,10 +1140,10 @@ static int mwifiex_ret_chan_region_cfg(struct mwifiex_private *priv, mwifiex_dbg_dump(priv->adapter, CMD_D, "CHAN:", (u8 *)head + sizeof(*head), tlv_buf_len); - priv->adapter->regd = - mwifiex_create_custom_regdomain(priv, - (u8 *)head + - sizeof(*head), tlv_buf_len); + regd = mwifiex_create_custom_regdomain(priv, + (u8 *)head + sizeof(*head), tlv_buf_len); + if (!IS_ERR(regd)) + priv->adapter->regd = regd; break; } From 80ba4f1d365af206b9e818d17d22fed02fe5def0 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 16 Sep 2016 10:37:31 +0100 Subject: [PATCH 1132/1715] mwifiex: fix null pointer deference when adapter is null If adapter is null the error exit path in mwifiex_shutdown_sw is to down the semaphore sem and print some debug via mwifiex_dbg. However, passing a NULL adapter to mwifiex_dbg causes a null pointer deference when accessing adapter->dev. This fix checks for a null adapter at the start of the function and to exit without the need to up the semaphore and we also skip the debug to avoid the null pointer dereference. Signed-off-by: Colin Ian King Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/main.c b/drivers/net/wireless/marvell/mwifiex/main.c index 9b2e98cfe090..2478ccd6f2d9 100644 --- a/drivers/net/wireless/marvell/mwifiex/main.c +++ b/drivers/net/wireless/marvell/mwifiex/main.c @@ -1369,12 +1369,12 @@ mwifiex_shutdown_sw(struct mwifiex_adapter *adapter, struct semaphore *sem) struct mwifiex_private *priv; int i; + if (!adapter) + goto exit_return; + if (down_interruptible(sem)) goto exit_sem_err; - if (!adapter) - goto exit_remove; - priv = mwifiex_get_priv(adapter, MWIFIEX_BSS_ROLE_ANY); mwifiex_deauthenticate(priv, NULL); @@ -1430,10 +1430,10 @@ mwifiex_shutdown_sw(struct mwifiex_adapter *adapter, struct semaphore *sem) rtnl_unlock(); } -exit_remove: up(sem); exit_sem_err: mwifiex_dbg(adapter, INFO, "%s, successful\n", __func__); +exit_return: return 0; } From 2c89791eeb6f3873349c240345c1879ef6a16f63 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 14 Sep 2016 02:04:18 +0800 Subject: [PATCH 1133/1715] sctp: remove the unnecessary state check in sctp_outq_tail Data Chunks are only sent by sctp_primitive_SEND, in which sctp checks the asoc's state through statetable before calling sctp_outq_tail. So there's no need to check the asoc's state again in sctp_outq_tail. Besides, sctp_do_sm is protected by lock_sock, even if sending msg is interrupted by timer events, the event's processes still need to acquire lock_sock first. It means no others CMDs can be enqueue into side effect list before CMD_SEND_MSG to change asoc->state, so it's safe to remove it. This patch is to remove redundant asoc->state check from sctp_outq_tail. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/outqueue.c | 51 ++++++++++++--------------------------------- 1 file changed, 13 insertions(+), 38 deletions(-) diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 72e54a416af6..da2418b64c86 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -299,50 +299,25 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk, gfp_t gfp) * immediately. */ if (sctp_chunk_is_data(chunk)) { - /* Is it OK to queue data chunks? */ - /* From 9. Termination of Association - * - * When either endpoint performs a shutdown, the - * association on each peer will stop accepting new - * data from its user and only deliver data in queue - * at the time of sending or receiving the SHUTDOWN - * chunk. - */ - switch (q->asoc->state) { - case SCTP_STATE_CLOSED: - case SCTP_STATE_SHUTDOWN_PENDING: - case SCTP_STATE_SHUTDOWN_SENT: - case SCTP_STATE_SHUTDOWN_RECEIVED: - case SCTP_STATE_SHUTDOWN_ACK_SENT: - /* Cannot send after transport endpoint shutdown */ - error = -ESHUTDOWN; - break; + pr_debug("%s: outqueueing: outq:%p, chunk:%p[%s])\n", + __func__, q, chunk, chunk && chunk->chunk_hdr ? + sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) : + "illegal chunk"); - default: - pr_debug("%s: outqueueing: outq:%p, chunk:%p[%s])\n", - __func__, q, chunk, chunk && chunk->chunk_hdr ? - sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) : - "illegal chunk"); - - sctp_chunk_hold(chunk); - sctp_outq_tail_data(q, chunk); - if (chunk->asoc->prsctp_enable && - SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags)) - chunk->asoc->sent_cnt_removable++; - if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) - SCTP_INC_STATS(net, SCTP_MIB_OUTUNORDERCHUNKS); - else - SCTP_INC_STATS(net, SCTP_MIB_OUTORDERCHUNKS); - break; - } + sctp_chunk_hold(chunk); + sctp_outq_tail_data(q, chunk); + if (chunk->asoc->prsctp_enable && + SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags)) + chunk->asoc->sent_cnt_removable++; + if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) + SCTP_INC_STATS(net, SCTP_MIB_OUTUNORDERCHUNKS); + else + SCTP_INC_STATS(net, SCTP_MIB_OUTORDERCHUNKS); } else { list_add_tail(&chunk->list, &q->control_chunk_list); SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); } - if (error < 0) - return error; - if (!q->cork) error = sctp_outq_flush(q, 0, gfp); From 66388f2c08dfa38071f9eceae7bb29060d9be9aa Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 14 Sep 2016 02:04:19 +0800 Subject: [PATCH 1134/1715] sctp: do not return the transmit err back to sctp_sendmsg Once a chunk is enqueued successfully, sctp queues can take care of it. Even if it is failed to transmit (like because of nomem), it should be put into retransmit queue. If sctp report this error to users, it confuses them, they may resend that msg, but actually in kernel sctp stack is in charge of retransmit it already. Besides, this error probably is not from the failure of transmitting current msg, but transmitting or retransmitting another msg's chunks, as sctp_outq_flush just tries to send out all transports' chunks. This patch is to make sctp_cmd_send_msg return avoid, and not return the transmit err back to sctp_sendmsg Fixes: 8b570dc9f7b6 ("sctp: only drop the reference on the datamsg after sending a msg") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/sm_sideeffect.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 12d45193357c..cf6e4f0de729 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1020,19 +1020,13 @@ static void sctp_cmd_t1_timer_update(struct sctp_association *asoc, * This way the whole message is queued up and bundling if * encouraged for small fragments. */ -static int sctp_cmd_send_msg(struct sctp_association *asoc, - struct sctp_datamsg *msg, gfp_t gfp) +static void sctp_cmd_send_msg(struct sctp_association *asoc, + struct sctp_datamsg *msg, gfp_t gfp) { struct sctp_chunk *chunk; - int error = 0; - list_for_each_entry(chunk, &msg->chunks, frag_list) { - error = sctp_outq_tail(&asoc->outqueue, chunk, gfp); - if (error) - break; - } - - return error; + list_for_each_entry(chunk, &msg->chunks, frag_list) + sctp_outq_tail(&asoc->outqueue, chunk, gfp); } @@ -1709,7 +1703,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, sctp_outq_cork(&asoc->outqueue); local_cork = 1; } - error = sctp_cmd_send_msg(asoc, cmd->obj.msg, gfp); + sctp_cmd_send_msg(asoc, cmd->obj.msg, gfp); break; case SCTP_CMD_SEND_NEXT_ASCONF: sctp_cmd_send_asconf(asoc); From b61c654f9b3f1a271217e46c893f80565b1f754d Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 14 Sep 2016 02:04:20 +0800 Subject: [PATCH 1135/1715] sctp: free msg->chunks when sctp_primitive_SEND return err Last patch "sctp: do not return the transmit err back to sctp_sendmsg" made sctp_primitive_SEND return err only when asoc state is unavailable. In this case, chunks are not enqueued, they have no chance to be freed if we don't take care of them later. This Patch is actually to revert commit 1cd4d5c4326a ("sctp: remove the unused sctp_datamsg_free()"), commit 69b5777f2e57 ("sctp: hold the chunks only after the chunk is enqueued in outq") and commit 8b570dc9f7b6 ("sctp: only drop the reference on the datamsg after sending a msg"), to use sctp_datamsg_free to free the chunks of current msg. Fixes: 8b570dc9f7b6 ("sctp: only drop the reference on the datamsg after sending a msg") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 1 + net/sctp/chunk.c | 13 +++++++++++++ net/sctp/outqueue.c | 1 - net/sctp/socket.c | 8 ++++++-- 4 files changed, 20 insertions(+), 3 deletions(-) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index ce93c4b10d26..f61fb7c87e53 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -537,6 +537,7 @@ struct sctp_datamsg { struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *, struct sctp_sndrcvinfo *, struct iov_iter *); +void sctp_datamsg_free(struct sctp_datamsg *); void sctp_datamsg_put(struct sctp_datamsg *); void sctp_chunk_fail(struct sctp_chunk *, int error); int sctp_chunk_abandoned(struct sctp_chunk *); diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index a55e54738b81..af9cc8055465 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -70,6 +70,19 @@ static struct sctp_datamsg *sctp_datamsg_new(gfp_t gfp) return msg; } +void sctp_datamsg_free(struct sctp_datamsg *msg) +{ + struct sctp_chunk *chunk; + + /* This doesn't have to be a _safe vairant because + * sctp_chunk_free() only drops the refs. + */ + list_for_each_entry(chunk, &msg->chunks, frag_list) + sctp_chunk_free(chunk); + + sctp_datamsg_put(msg); +} + /* Final destructruction of datamsg memory. */ static void sctp_datamsg_destroy(struct sctp_datamsg *msg) { diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index da2418b64c86..6c109b0f8495 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -304,7 +304,6 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk, gfp_t gfp) sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) : "illegal chunk"); - sctp_chunk_hold(chunk); sctp_outq_tail_data(q, chunk); if (chunk->asoc->prsctp_enable && SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags)) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 9fc417a8b476..6cdc61c21438 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1958,6 +1958,8 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) /* Now send the (possibly) fragmented message. */ list_for_each_entry(chunk, &datamsg->chunks, frag_list) { + sctp_chunk_hold(chunk); + /* Do accounting for the write space. */ sctp_set_owner_w(chunk); @@ -1970,13 +1972,15 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) * breaks. */ err = sctp_primitive_SEND(net, asoc, datamsg); - sctp_datamsg_put(datamsg); /* Did the lower layer accept the chunk? */ - if (err) + if (err) { + sctp_datamsg_free(datamsg); goto out_free; + } pr_debug("%s: we sent primitively\n", __func__); + sctp_datamsg_put(datamsg); err = msg_len; if (unlikely(wait_connect)) { From 645194409b0634a43890ec27c491c368b3bffc07 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 14 Sep 2016 02:04:21 +0800 Subject: [PATCH 1136/1715] sctp: save transmit error to sk_err in sctp_outq_flush Every time when sctp calls sctp_outq_flush, it sends out the chunks of control queue, retransmit queue and data queue. Even if some trunks are failed to transmit, it still has to flush all the transports, as it's the only chance to clean that transmit_list. So the latest transmit error here should be returned back. This transmit error is an internal error of sctp stack. I checked all the places where it uses the transmit error (the return value of sctp_outq_flush), most of them are actually just save it to sk_err. Except for sctp_assoc/endpoint_bh_rcv, they will drop the chunk if it's failed to send a REPLY, which is actually incorrect, as we can't be sure the error that sctp_outq_flush returns is from sending that REPLY. So it's meaningless for sctp_outq_flush to return error back. This patch is to save transmit error to sk_err in sctp_outq_flush, the new error can update the old value. Eventually, sctp_wait_for_* would check for it. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/output.c | 3 ++- net/sctp/outqueue.c | 21 ++++++++++++--------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/net/sctp/output.c b/net/sctp/output.c index 31b7bc35895d..f2597a9eff74 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -180,7 +180,6 @@ sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet, int one_packet, gfp_t gfp) { sctp_xmit_t retval; - int error = 0; pr_debug("%s: packet:%p size:%Zu chunk:%p size:%d\n", __func__, packet, packet->size, chunk, chunk->skb ? chunk->skb->len : -1); @@ -188,6 +187,8 @@ sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet, switch ((retval = (sctp_packet_append_chunk(packet, chunk)))) { case SCTP_XMIT_PMTU_FULL: if (!packet->has_cookie_echo) { + int error = 0; + error = sctp_packet_transmit(packet, gfp); if (error < 0) chunk->skb->sk->sk_err = -error; diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 6c109b0f8495..052a4796a457 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -533,7 +533,6 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, sctp_retransmit_reason_t reason) { struct net *net = sock_net(q->asoc->base.sk); - int error = 0; switch (reason) { case SCTP_RTXR_T3_RTX: @@ -577,10 +576,7 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, * will be flushed at the end. */ if (reason != SCTP_RTXR_FAST_RTX) - error = sctp_outq_flush(q, /* rtx_timeout */ 1, GFP_ATOMIC); - - if (error) - q->asoc->base.sk->sk_err = -error; + sctp_outq_flush(q, /* rtx_timeout */ 1, GFP_ATOMIC); } /* @@ -893,8 +889,10 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) sctp_packet_config(&singleton, vtag, 0); sctp_packet_append_chunk(&singleton, chunk); error = sctp_packet_transmit(&singleton, gfp); - if (error < 0) - return error; + if (error < 0) { + asoc->base.sk->sk_err = -error; + return 0; + } break; case SCTP_CID_ABORT: @@ -992,6 +990,8 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) retran: error = sctp_outq_flush_rtx(q, packet, rtx_timeout, &start_timer); + if (error < 0) + asoc->base.sk->sk_err = -error; if (start_timer) { sctp_transport_reset_t3_rtx(transport); @@ -1166,14 +1166,17 @@ sctp_flush_out: struct sctp_transport, send_ready); packet = &t->packet; - if (!sctp_packet_empty(packet)) + if (!sctp_packet_empty(packet)) { error = sctp_packet_transmit(packet, gfp); + if (error < 0) + asoc->base.sk->sk_err = -error; + } /* Clear the burst limited state, if any */ sctp_transport_burst_reset(t); } - return error; + return 0; } /* Update unack_data based on the incoming SACK chunk */ From 83dbc3d4a38411ef38f680d7045c8478cc9c5a56 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 14 Sep 2016 02:04:22 +0800 Subject: [PATCH 1137/1715] sctp: make sctp_outq_flush/tail/uncork return void sctp_outq_flush return value is meaningless now, this patch is to make sctp_outq_flush return void, as well as sctp_outq_fail and sctp_outq_uncork. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 4 ++-- net/sctp/outqueue.c | 19 +++++++------------ net/sctp/sm_sideeffect.c | 9 ++++----- 3 files changed, 13 insertions(+), 19 deletions(-) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index f61fb7c87e53..8693dc452a7f 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1077,7 +1077,7 @@ struct sctp_outq { void sctp_outq_init(struct sctp_association *, struct sctp_outq *); void sctp_outq_teardown(struct sctp_outq *); void sctp_outq_free(struct sctp_outq*); -int sctp_outq_tail(struct sctp_outq *, struct sctp_chunk *chunk, gfp_t); +void sctp_outq_tail(struct sctp_outq *, struct sctp_chunk *chunk, gfp_t); int sctp_outq_sack(struct sctp_outq *, struct sctp_chunk *); int sctp_outq_is_empty(const struct sctp_outq *); void sctp_outq_restart(struct sctp_outq *); @@ -1085,7 +1085,7 @@ void sctp_outq_restart(struct sctp_outq *); void sctp_retransmit(struct sctp_outq *, struct sctp_transport *, sctp_retransmit_reason_t); void sctp_retransmit_mark(struct sctp_outq *, struct sctp_transport *, __u8); -int sctp_outq_uncork(struct sctp_outq *, gfp_t gfp); +void sctp_outq_uncork(struct sctp_outq *, gfp_t gfp); void sctp_prsctp_prune(struct sctp_association *asoc, struct sctp_sndrcvinfo *sinfo, int msg_len); /* Uncork and flush an outqueue. */ diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 052a4796a457..8c3f446d965c 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -68,7 +68,7 @@ static void sctp_mark_missing(struct sctp_outq *q, static void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 sack_ctsn); -static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp); +static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp); /* Add data to the front of the queue. */ static inline void sctp_outq_head_data(struct sctp_outq *q, @@ -285,10 +285,9 @@ void sctp_outq_free(struct sctp_outq *q) } /* Put a new chunk in an sctp_outq. */ -int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk, gfp_t gfp) +void sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk, gfp_t gfp) { struct net *net = sock_net(q->asoc->base.sk); - int error = 0; pr_debug("%s: outq:%p, chunk:%p[%s]\n", __func__, q, chunk, chunk && chunk->chunk_hdr ? @@ -318,9 +317,7 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk, gfp_t gfp) } if (!q->cork) - error = sctp_outq_flush(q, 0, gfp); - - return error; + sctp_outq_flush(q, 0, gfp); } /* Insert a chunk into the sorted list based on the TSNs. The retransmit list @@ -748,12 +745,12 @@ redo: } /* Cork the outqueue so queued chunks are really queued. */ -int sctp_outq_uncork(struct sctp_outq *q, gfp_t gfp) +void sctp_outq_uncork(struct sctp_outq *q, gfp_t gfp) { if (q->cork) q->cork = 0; - return sctp_outq_flush(q, 0, gfp); + sctp_outq_flush(q, 0, gfp); } @@ -766,7 +763,7 @@ int sctp_outq_uncork(struct sctp_outq *q, gfp_t gfp) * locking concerns must be made. Today we use the sock lock to protect * this function. */ -static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) +static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) { struct sctp_packet *packet; struct sctp_packet singleton; @@ -891,7 +888,7 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) error = sctp_packet_transmit(&singleton, gfp); if (error < 0) { asoc->base.sk->sk_err = -error; - return 0; + return; } break; @@ -1175,8 +1172,6 @@ sctp_flush_out: /* Clear the burst limited state, if any */ sctp_transport_burst_reset(t); } - - return 0; } /* Update unack_data based on the incoming SACK chunk */ diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index cf6e4f0de729..c345bf153bed 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1421,8 +1421,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, local_cork = 1; } /* Send a chunk to our peer. */ - error = sctp_outq_tail(&asoc->outqueue, cmd->obj.chunk, - gfp); + sctp_outq_tail(&asoc->outqueue, cmd->obj.chunk, gfp); break; case SCTP_CMD_SEND_PKT: @@ -1676,7 +1675,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_FORCE_PRIM_RETRAN: t = asoc->peer.retran_path; asoc->peer.retran_path = asoc->peer.primary_path; - error = sctp_outq_uncork(&asoc->outqueue, gfp); + sctp_outq_uncork(&asoc->outqueue, gfp); local_cork = 0; asoc->peer.retran_path = t; break; @@ -1733,9 +1732,9 @@ out: */ if (asoc && SCTP_EVENT_T_CHUNK == event_type && chunk) { if (chunk->end_of_packet || chunk->singleton) - error = sctp_outq_uncork(&asoc->outqueue, gfp); + sctp_outq_uncork(&asoc->outqueue, gfp); } else if (local_cork) - error = sctp_outq_uncork(&asoc->outqueue, gfp); + sctp_outq_uncork(&asoc->outqueue, gfp); if (sp->data_ready_signalled) sp->data_ready_signalled = 0; From 41001faf95faaff7c4f4f93c6bb544ee227ad0cc Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 14 Sep 2016 02:04:23 +0800 Subject: [PATCH 1138/1715] sctp: not return ENOMEM err back in sctp_packet_transmit As David and Marcelo's suggestion, ENOMEM err shouldn't return back to user in transmit path. Instead, sctp's retransmit would take care of the chunks that fail to send because of ENOMEM. This patch is only to do some release job when alloc_skb fails, not to return ENOMEM back any more. Besides, it also cleans up sctp_packet_transmit's err path, and fixes some issues in err path: - It didn't free the head skb in nomem: path. - No need to check nskb in no_route: path. - It should goto err: path if alloc_skb fails for head. - Not all the NOMEMs should free nskb. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/output.c | 47 ++++++++++++++++++++++------------------------- 1 file changed, 22 insertions(+), 25 deletions(-) diff --git a/net/sctp/output.c b/net/sctp/output.c index f2597a9eff74..0c605ec74dc4 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -442,14 +442,14 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) * time. Application may notice this error. */ pr_err_once("Trying to GSO but underlying device doesn't support it."); - goto nomem; + goto err; } } else { pkt_size = packet->size; } head = alloc_skb(pkt_size + MAX_HEADER, gfp); if (!head) - goto nomem; + goto err; if (gso) { NAPI_GRO_CB(head)->last = head; skb_shinfo(head)->gso_type = sk->sk_gso_type; @@ -470,8 +470,12 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) } } dst = dst_clone(tp->dst); - if (!dst) - goto no_route; + if (!dst) { + if (asoc) + IP_INC_STATS(sock_net(asoc->base.sk), + IPSTATS_MIB_OUTNOROUTES); + goto nodst; + } skb_dst_set(head, dst); /* Build the SCTP header. */ @@ -622,8 +626,10 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) if (!gso) break; - if (skb_gro_receive(&head, nskb)) + if (skb_gro_receive(&head, nskb)) { + kfree_skb(nskb); goto nomem; + } nskb = NULL; if (WARN_ON_ONCE(skb_shinfo(head)->gso_segs >= sk->sk_gso_max_segs)) @@ -717,18 +723,13 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) } head->ignore_df = packet->ipfragok; tp->af_specific->sctp_xmit(head, tp); + goto out; -out: - sctp_packet_reset(packet); - return err; -no_route: - kfree_skb(head); - if (nskb != head) - kfree_skb(nskb); - - if (asoc) - IP_INC_STATS(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES); +nomem: + if (packet->auth && list_empty(&packet->auth->list)) + sctp_chunk_free(packet->auth); +nodst: /* FIXME: Returning the 'err' will effect all the associations * associated with a socket, although only one of the paths of the * association is unreachable. @@ -737,22 +738,18 @@ no_route: * required. */ /* err = -EHOSTUNREACH; */ -err: - /* Control chunks are unreliable so just drop them. DATA chunks - * will get resent or dropped later. - */ + kfree_skb(head); +err: list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) { list_del_init(&chunk->list); if (!sctp_chunk_is_data(chunk)) sctp_chunk_free(chunk); } - goto out; -nomem: - if (packet->auth && list_empty(&packet->auth->list)) - sctp_chunk_free(packet->auth); - err = -ENOMEM; - goto err; + +out: + sctp_packet_reset(packet); + return err; } /******************************************************************** From 40773966ccf1985a1b2bb570a03cbeaf1cbd4e00 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Thu, 15 Sep 2016 19:11:52 -0300 Subject: [PATCH 1139/1715] openvswitch: fix flow stats accounting when node 0 is not possible On a system with only node 1 as possible, all statistics is going to be accounted on node 0 as it will have a single writer. However, when getting and clearing the statistics, node 0 is not going to be considered, as it's not a possible node. Tested that statistics are not zero on a system with only node 1 possible. Also compile-tested with CONFIG_NUMA off. Signed-off-by: Thadeu Lima de Souza Cascardo Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/flow.c | 6 ++++-- net/openvswitch/flow_table.c | 5 +++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 1240ae3b88d2..5b80612df182 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -142,7 +142,8 @@ void ovs_flow_stats_get(const struct sw_flow *flow, *tcp_flags = 0; memset(ovs_stats, 0, sizeof(*ovs_stats)); - for_each_node(node) { + /* We open code this to make sure node 0 is always considered */ + for (node = 0; node < MAX_NUMNODES; node = next_node(node, node_possible_map)) { struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[node]); if (stats) { @@ -165,7 +166,8 @@ void ovs_flow_stats_clear(struct sw_flow *flow) { int node; - for_each_node(node) { + /* We open code this to make sure node 0 is always considered */ + for (node = 0; node < MAX_NUMNODES; node = next_node(node, node_possible_map)) { struct flow_stats *stats = ovsl_dereference(flow->stats[node]); if (stats) { diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index d073fff82fdb..957a3c31dbb0 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -148,8 +148,9 @@ static void flow_free(struct sw_flow *flow) kfree(flow->id.unmasked_key); if (flow->sf_acts) ovs_nla_free_flow_actions((struct sw_flow_actions __force *)flow->sf_acts); - for_each_node(node) - if (flow->stats[node]) + /* We open code this to make sure node 0 is always considered */ + for (node = 0; node < MAX_NUMNODES; node = next_node(node, node_possible_map)) + if (node != 0 && flow->stats[node]) kmem_cache_free(flow_stats_cache, (struct flow_stats __force *)flow->stats[node]); kmem_cache_free(flow_cache, flow); From db74a3335e0f645e3139c80bcfc90feb01d8e304 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Thu, 15 Sep 2016 19:11:53 -0300 Subject: [PATCH 1140/1715] openvswitch: use percpu flow stats Instead of using flow stats per NUMA node, use it per CPU. When using megaflows, the stats lock can be a bottleneck in scalability. On a E5-2690 12-core system, usual throughput went from ~4Mpps to ~15Mpps when forwarding between two 40GbE ports with a single flow configured on the datapath. This has been tested on a system with possible CPUs 0-7,16-23. After module removal, there were no corruption on the slab cache. Signed-off-by: Thadeu Lima de Souza Cascardo Cc: pravin shelar Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/flow.c | 42 +++++++++++++++++++----------------- net/openvswitch/flow.h | 4 ++-- net/openvswitch/flow_table.c | 26 ++++++++-------------- 3 files changed, 33 insertions(+), 39 deletions(-) diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 5b80612df182..0fa45439def1 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -72,32 +73,33 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags, { struct flow_stats *stats; int node = numa_node_id(); + int cpu = smp_processor_id(); int len = skb->len + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); - stats = rcu_dereference(flow->stats[node]); + stats = rcu_dereference(flow->stats[cpu]); - /* Check if already have node-specific stats. */ + /* Check if already have CPU-specific stats. */ if (likely(stats)) { spin_lock(&stats->lock); /* Mark if we write on the pre-allocated stats. */ - if (node == 0 && unlikely(flow->stats_last_writer != node)) - flow->stats_last_writer = node; + if (cpu == 0 && unlikely(flow->stats_last_writer != cpu)) + flow->stats_last_writer = cpu; } else { stats = rcu_dereference(flow->stats[0]); /* Pre-allocated. */ spin_lock(&stats->lock); - /* If the current NUMA-node is the only writer on the + /* If the current CPU is the only writer on the * pre-allocated stats keep using them. */ - if (unlikely(flow->stats_last_writer != node)) { + if (unlikely(flow->stats_last_writer != cpu)) { /* A previous locker may have already allocated the - * stats, so we need to check again. If node-specific + * stats, so we need to check again. If CPU-specific * stats were already allocated, we update the pre- * allocated stats as we have already locked them. */ - if (likely(flow->stats_last_writer != NUMA_NO_NODE) - && likely(!rcu_access_pointer(flow->stats[node]))) { - /* Try to allocate node-specific stats. */ + if (likely(flow->stats_last_writer != -1) && + likely(!rcu_access_pointer(flow->stats[cpu]))) { + /* Try to allocate CPU-specific stats. */ struct flow_stats *new_stats; new_stats = @@ -114,12 +116,12 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags, new_stats->tcp_flags = tcp_flags; spin_lock_init(&new_stats->lock); - rcu_assign_pointer(flow->stats[node], + rcu_assign_pointer(flow->stats[cpu], new_stats); goto unlock; } } - flow->stats_last_writer = node; + flow->stats_last_writer = cpu; } } @@ -136,15 +138,15 @@ void ovs_flow_stats_get(const struct sw_flow *flow, struct ovs_flow_stats *ovs_stats, unsigned long *used, __be16 *tcp_flags) { - int node; + int cpu; *used = 0; *tcp_flags = 0; memset(ovs_stats, 0, sizeof(*ovs_stats)); - /* We open code this to make sure node 0 is always considered */ - for (node = 0; node < MAX_NUMNODES; node = next_node(node, node_possible_map)) { - struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[node]); + /* We open code this to make sure cpu 0 is always considered */ + for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask)) { + struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]); if (stats) { /* Local CPU may write on non-local stats, so we must @@ -164,11 +166,11 @@ void ovs_flow_stats_get(const struct sw_flow *flow, /* Called with ovs_mutex. */ void ovs_flow_stats_clear(struct sw_flow *flow) { - int node; + int cpu; - /* We open code this to make sure node 0 is always considered */ - for (node = 0; node < MAX_NUMNODES; node = next_node(node, node_possible_map)) { - struct flow_stats *stats = ovsl_dereference(flow->stats[node]); + /* We open code this to make sure cpu 0 is always considered */ + for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask)) { + struct flow_stats *stats = ovsl_dereference(flow->stats[cpu]); if (stats) { spin_lock_bh(&stats->lock); diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 156a3029c17b..ae783f5c6695 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -178,14 +178,14 @@ struct sw_flow { struct hlist_node node[2]; u32 hash; } flow_table, ufid_table; - int stats_last_writer; /* NUMA-node id of the last writer on + int stats_last_writer; /* CPU id of the last writer on * 'stats[0]'. */ struct sw_flow_key key; struct sw_flow_id id; struct sw_flow_mask *mask; struct sw_flow_actions __rcu *sf_acts; - struct flow_stats __rcu *stats[]; /* One for each NUMA node. First one + struct flow_stats __rcu *stats[]; /* One for each CPU. First one * is allocated at flow creation time, * the rest are allocated on demand * while holding the 'stats[0].lock'. diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 957a3c31dbb0..ea7a8073fa02 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -79,17 +80,12 @@ struct sw_flow *ovs_flow_alloc(void) { struct sw_flow *flow; struct flow_stats *stats; - int node; - flow = kmem_cache_alloc(flow_cache, GFP_KERNEL); + flow = kmem_cache_zalloc(flow_cache, GFP_KERNEL); if (!flow) return ERR_PTR(-ENOMEM); - flow->sf_acts = NULL; - flow->mask = NULL; - flow->id.unmasked_key = NULL; - flow->id.ufid_len = 0; - flow->stats_last_writer = NUMA_NO_NODE; + flow->stats_last_writer = -1; /* Initialize the default stat node. */ stats = kmem_cache_alloc_node(flow_stats_cache, @@ -102,10 +98,6 @@ struct sw_flow *ovs_flow_alloc(void) RCU_INIT_POINTER(flow->stats[0], stats); - for_each_node(node) - if (node != 0) - RCU_INIT_POINTER(flow->stats[node], NULL); - return flow; err: kmem_cache_free(flow_cache, flow); @@ -142,17 +134,17 @@ static struct flex_array *alloc_buckets(unsigned int n_buckets) static void flow_free(struct sw_flow *flow) { - int node; + int cpu; if (ovs_identifier_is_key(&flow->id)) kfree(flow->id.unmasked_key); if (flow->sf_acts) ovs_nla_free_flow_actions((struct sw_flow_actions __force *)flow->sf_acts); - /* We open code this to make sure node 0 is always considered */ - for (node = 0; node < MAX_NUMNODES; node = next_node(node, node_possible_map)) - if (node != 0 && flow->stats[node]) + /* We open code this to make sure cpu 0 is always considered */ + for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask)) + if (flow->stats[cpu]) kmem_cache_free(flow_stats_cache, - (struct flow_stats __force *)flow->stats[node]); + (struct flow_stats __force *)flow->stats[cpu]); kmem_cache_free(flow_cache, flow); } @@ -757,7 +749,7 @@ int ovs_flow_init(void) BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long)); flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow) - + (nr_node_ids + + (nr_cpu_ids * sizeof(struct flow_stats *)), 0, 0, NULL); if (flow_cache == NULL) From 695b4ec0f0a9cf29deabd3ac075911d58b31f42b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 15 Sep 2016 16:20:01 -0700 Subject: [PATCH 1141/1715] pkt_sched: fq: use proper locking in fq_dump_stats() When fq is used on 32bit kernels, we need to lock the qdisc before copying 64bit fields. Otherwise "tc -s qdisc ..." might report bogus values. Fixes: afe4fd062416 ("pkt_sched: fq: Fair Queue packet scheduler") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_fq.c | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index e5458b99e09c..dc52cc10d6ed 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -823,20 +823,24 @@ nla_put_failure: static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d) { struct fq_sched_data *q = qdisc_priv(sch); - u64 now = ktime_get_ns(); - struct tc_fq_qd_stats st = { - .gc_flows = q->stat_gc_flows, - .highprio_packets = q->stat_internal_packets, - .tcp_retrans = q->stat_tcp_retrans, - .throttled = q->stat_throttled, - .flows_plimit = q->stat_flows_plimit, - .pkts_too_long = q->stat_pkts_too_long, - .allocation_errors = q->stat_allocation_errors, - .flows = q->flows, - .inactive_flows = q->inactive_flows, - .throttled_flows = q->throttled_flows, - .time_next_delayed_flow = q->time_next_delayed_flow - now, - }; + struct tc_fq_qd_stats st; + + sch_tree_lock(sch); + + st.gc_flows = q->stat_gc_flows; + st.highprio_packets = q->stat_internal_packets; + st.tcp_retrans = q->stat_tcp_retrans; + st.throttled = q->stat_throttled; + st.flows_plimit = q->stat_flows_plimit; + st.pkts_too_long = q->stat_pkts_too_long; + st.allocation_errors = q->stat_allocation_errors; + st.time_next_delayed_flow = q->time_next_delayed_flow - ktime_get_ns(); + st.flows = q->flows; + st.inactive_flows = q->inactive_flows; + st.throttled_flows = q->throttled_flows; + st.pad = 0; + + sch_tree_unlock(sch); return gnet_stats_copy_app(d, &st, sizeof(st)); } From 22da73492541736eff5f6a6634c732e36c52a133 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 16 Sep 2016 10:43:38 +0100 Subject: [PATCH 1142/1715] net: r6040: add in missing white space in error message text A couple of dev_err messages span two lines and the literal string is missing a white space between words. Add the white space and join the two lines into one. Signed-off-by: Colin Ian King Acked-by: FLorian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/rdc/r6040.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/rdc/r6040.c b/drivers/net/ethernet/rdc/r6040.c index cb29ee24cf1b..5ef5d728c250 100644 --- a/drivers/net/ethernet/rdc/r6040.c +++ b/drivers/net/ethernet/rdc/r6040.c @@ -1062,14 +1062,12 @@ static int r6040_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* this should always be supported */ err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); if (err) { - dev_err(&pdev->dev, "32-bit PCI DMA addresses" - "not supported by the card\n"); + dev_err(&pdev->dev, "32-bit PCI DMA addresses not supported by the card\n"); goto err_out_disable_dev; } err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); if (err) { - dev_err(&pdev->dev, "32-bit PCI DMA addresses" - "not supported by the card\n"); + dev_err(&pdev->dev, "32-bit PCI DMA addresses not supported by the card\n"); goto err_out_disable_dev; } From 2c9d85d4d82d9e0a62aad08bf50650804e68ed30 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Fri, 16 Sep 2016 15:05:36 +0200 Subject: [PATCH 1143/1715] netdevice: Add offload statistics ndo Add a new ndo to return statistics for offloaded operation. Since there can be many different offloaded operation with many stats types, the ndo gets an attribute id by which it knows which stats are wanted. The ndo also gets a void pointer to be cast according to the attribute id. Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/netdevice.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2095b6ab3661..a10d8d18ce19 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -924,6 +924,14 @@ struct netdev_xdp { * 3. Update dev->stats asynchronously and atomically, and define * neither operation. * + * bool (*ndo_has_offload_stats)(int attr_id) + * Return true if this device supports offload stats of this attr_id. + * + * int (*ndo_get_offload_stats)(int attr_id, const struct net_device *dev, + * void *attr_data) + * Get statistics for offload operations by attr_id. Write it into the + * attr_data pointer. + * * int (*ndo_vlan_rx_add_vid)(struct net_device *dev, __be16 proto, u16 vid); * If device supports VLAN filtering this function is called when a * VLAN id is registered. @@ -1155,6 +1163,10 @@ struct net_device_ops { struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev, struct rtnl_link_stats64 *storage); + bool (*ndo_has_offload_stats)(int attr_id); + int (*ndo_get_offload_stats)(int attr_id, + const struct net_device *dev, + void *attr_data); struct net_device_stats* (*ndo_get_stats)(struct net_device *dev); int (*ndo_vlan_rx_add_vid)(struct net_device *dev, From 69ae6ad2ff37911903a90256e216d7e7ae460002 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Fri, 16 Sep 2016 15:05:37 +0200 Subject: [PATCH 1144/1715] net: core: Add offload stats to if_stats_msg Add a nested attribute of offload stats to if_stats_msg named IFLA_STATS_LINK_OFFLOAD_XSTATS. Under it, add SW stats, meaning stats only per packets that went via slowpath to the cpu, named IFLA_OFFLOAD_XSTATS_CPU_HIT. Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 9 +++ net/core/rtnetlink.c | 111 +++++++++++++++++++++++++++++++++-- 2 files changed, 116 insertions(+), 4 deletions(-) diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 9bf3aecfe05b..2351776a724f 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -826,6 +826,7 @@ enum { IFLA_STATS_LINK_64, IFLA_STATS_LINK_XSTATS, IFLA_STATS_LINK_XSTATS_SLAVE, + IFLA_STATS_LINK_OFFLOAD_XSTATS, __IFLA_STATS_MAX, }; @@ -845,6 +846,14 @@ enum { }; #define LINK_XSTATS_TYPE_MAX (__LINK_XSTATS_TYPE_MAX - 1) +/* These are stats embedded into IFLA_STATS_LINK_OFFLOAD_XSTATS */ +enum { + IFLA_OFFLOAD_XSTATS_UNSPEC, + IFLA_OFFLOAD_XSTATS_CPU_HIT, /* struct rtnl_link_stats64 */ + __IFLA_OFFLOAD_XSTATS_MAX +}; +#define IFLA_OFFLOAD_XSTATS_MAX (__IFLA_OFFLOAD_XSTATS_MAX - 1) + /* XDP section */ enum { diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 937e459bdaa9..0dbae4244a89 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3577,6 +3577,91 @@ static bool stats_attr_valid(unsigned int mask, int attrid, int idxattr) (!idxattr || idxattr == attrid); } +#define IFLA_OFFLOAD_XSTATS_FIRST (IFLA_OFFLOAD_XSTATS_UNSPEC + 1) +static int rtnl_get_offload_stats_attr_size(int attr_id) +{ + switch (attr_id) { + case IFLA_OFFLOAD_XSTATS_CPU_HIT: + return sizeof(struct rtnl_link_stats64); + } + + return 0; +} + +static int rtnl_get_offload_stats(struct sk_buff *skb, struct net_device *dev, + int *prividx) +{ + struct nlattr *attr = NULL; + int attr_id, size; + void *attr_data; + int err; + + if (!(dev->netdev_ops && dev->netdev_ops->ndo_has_offload_stats && + dev->netdev_ops->ndo_get_offload_stats)) + return -ENODATA; + + for (attr_id = IFLA_OFFLOAD_XSTATS_FIRST; + attr_id <= IFLA_OFFLOAD_XSTATS_MAX; attr_id++) { + if (attr_id < *prividx) + continue; + + size = rtnl_get_offload_stats_attr_size(attr_id); + if (!size) + continue; + + if (!dev->netdev_ops->ndo_has_offload_stats(attr_id)) + continue; + + attr = nla_reserve_64bit(skb, attr_id, size, + IFLA_OFFLOAD_XSTATS_UNSPEC); + if (!attr) + goto nla_put_failure; + + attr_data = nla_data(attr); + memset(attr_data, 0, size); + err = dev->netdev_ops->ndo_get_offload_stats(attr_id, dev, + attr_data); + if (err) + goto get_offload_stats_failure; + } + + if (!attr) + return -ENODATA; + + *prividx = 0; + return 0; + +nla_put_failure: + err = -EMSGSIZE; +get_offload_stats_failure: + *prividx = attr_id; + return err; +} + +static int rtnl_get_offload_stats_size(const struct net_device *dev) +{ + int nla_size = 0; + int attr_id; + int size; + + if (!(dev->netdev_ops && dev->netdev_ops->ndo_has_offload_stats && + dev->netdev_ops->ndo_get_offload_stats)) + return 0; + + for (attr_id = IFLA_OFFLOAD_XSTATS_FIRST; + attr_id <= IFLA_OFFLOAD_XSTATS_MAX; attr_id++) { + if (!dev->netdev_ops->ndo_has_offload_stats(attr_id)) + continue; + size = rtnl_get_offload_stats_attr_size(attr_id); + nla_size += nla_total_size_64bit(size); + } + + if (nla_size != 0) + nla_size += nla_total_size(0); + + return nla_size; +} + static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, int type, u32 pid, u32 seq, u32 change, unsigned int flags, unsigned int filter_mask, @@ -3586,6 +3671,7 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, struct nlmsghdr *nlh; struct nlattr *attr; int s_prividx = *prividx; + int err; ASSERT_RTNL(); @@ -3614,8 +3700,6 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, const struct rtnl_link_ops *ops = dev->rtnl_link_ops; if (ops && ops->fill_linkxstats) { - int err; - *idxattr = IFLA_STATS_LINK_XSTATS; attr = nla_nest_start(skb, IFLA_STATS_LINK_XSTATS); @@ -3639,8 +3723,6 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, if (master) ops = master->rtnl_link_ops; if (ops && ops->fill_linkxstats) { - int err; - *idxattr = IFLA_STATS_LINK_XSTATS_SLAVE; attr = nla_nest_start(skb, IFLA_STATS_LINK_XSTATS_SLAVE); @@ -3655,6 +3737,24 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, } } + if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_OFFLOAD_XSTATS, + *idxattr)) { + *idxattr = IFLA_STATS_LINK_OFFLOAD_XSTATS; + attr = nla_nest_start(skb, IFLA_STATS_LINK_OFFLOAD_XSTATS); + if (!attr) + goto nla_put_failure; + + err = rtnl_get_offload_stats(skb, dev, prividx); + if (err == -ENODATA) + nla_nest_cancel(skb, attr); + else + nla_nest_end(skb, attr); + + if (err && err != -ENODATA) + goto nla_put_failure; + *idxattr = 0; + } + nlmsg_end(skb, nlh); return 0; @@ -3708,6 +3808,9 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev, } } + if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_OFFLOAD_XSTATS, 0)) + size += rtnl_get_offload_stats_size(dev); + return size; } From fc1bbb0f1831cc22326c86fb21d88cca44999b3e Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Fri, 16 Sep 2016 15:05:38 +0200 Subject: [PATCH 1145/1715] mlxsw: spectrum: Implement offload stats ndo and expose HW stats by default Change the default statistics ndo to return HW statistics (like the one returned by ethtool_ops). The HW stats are collected to a cache by delayed work every 1 sec. Implement the offload stat ndo. Add a function to get SW statistics, to be called from this function. Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.c | 129 +++++++++++++++++- .../net/ethernet/mellanox/mlxsw/spectrum.h | 5 + 2 files changed, 127 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 27bbcaf9cfcd..171f8dd19efa 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -819,9 +819,9 @@ err_span_port_mtu_update: return err; } -static struct rtnl_link_stats64 * -mlxsw_sp_port_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *stats) +int +mlxsw_sp_port_get_sw_stats64(const struct net_device *dev, + struct rtnl_link_stats64 *stats) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); struct mlxsw_sp_port_pcpu_stats *p; @@ -848,6 +848,107 @@ mlxsw_sp_port_get_stats64(struct net_device *dev, tx_dropped += p->tx_dropped; } stats->tx_dropped = tx_dropped; + return 0; +} + +bool mlxsw_sp_port_has_offload_stats(int attr_id) +{ + switch (attr_id) { + case IFLA_OFFLOAD_XSTATS_CPU_HIT: + return true; + } + + return false; +} + +int mlxsw_sp_port_get_offload_stats(int attr_id, const struct net_device *dev, + void *sp) +{ + switch (attr_id) { + case IFLA_OFFLOAD_XSTATS_CPU_HIT: + return mlxsw_sp_port_get_sw_stats64(dev, sp); + } + + return -EINVAL; +} + +static int mlxsw_sp_port_get_stats_raw(struct net_device *dev, int grp, + int prio, char *ppcnt_pl) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + + mlxsw_reg_ppcnt_pack(ppcnt_pl, mlxsw_sp_port->local_port, grp, prio); + return mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ppcnt), ppcnt_pl); +} + +static int mlxsw_sp_port_get_hw_stats(struct net_device *dev, + struct rtnl_link_stats64 *stats) +{ + char ppcnt_pl[MLXSW_REG_PPCNT_LEN]; + int err; + + err = mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_IEEE_8023_CNT, + 0, ppcnt_pl); + if (err) + goto out; + + stats->tx_packets = + mlxsw_reg_ppcnt_a_frames_transmitted_ok_get(ppcnt_pl); + stats->rx_packets = + mlxsw_reg_ppcnt_a_frames_received_ok_get(ppcnt_pl); + stats->tx_bytes = + mlxsw_reg_ppcnt_a_octets_transmitted_ok_get(ppcnt_pl); + stats->rx_bytes = + mlxsw_reg_ppcnt_a_octets_received_ok_get(ppcnt_pl); + stats->multicast = + mlxsw_reg_ppcnt_a_multicast_frames_received_ok_get(ppcnt_pl); + + stats->rx_crc_errors = + mlxsw_reg_ppcnt_a_frame_check_sequence_errors_get(ppcnt_pl); + stats->rx_frame_errors = + mlxsw_reg_ppcnt_a_alignment_errors_get(ppcnt_pl); + + stats->rx_length_errors = ( + mlxsw_reg_ppcnt_a_in_range_length_errors_get(ppcnt_pl) + + mlxsw_reg_ppcnt_a_out_of_range_length_field_get(ppcnt_pl) + + mlxsw_reg_ppcnt_a_frame_too_long_errors_get(ppcnt_pl)); + + stats->rx_errors = (stats->rx_crc_errors + + stats->rx_frame_errors + stats->rx_length_errors); + +out: + return err; +} + +static void update_stats_cache(struct work_struct *work) +{ + struct mlxsw_sp_port *mlxsw_sp_port = + container_of(work, struct mlxsw_sp_port, + hw_stats.update_dw.work); + + if (!netif_carrier_ok(mlxsw_sp_port->dev)) + goto out; + + mlxsw_sp_port_get_hw_stats(mlxsw_sp_port->dev, + mlxsw_sp_port->hw_stats.cache); + +out: + mlxsw_core_schedule_dw(&mlxsw_sp_port->hw_stats.update_dw, + MLXSW_HW_STATS_UPDATE_TIME); +} + +/* Return the stats from a cache that is updated periodically, + * as this function might get called in an atomic context. + */ +static struct rtnl_link_stats64 * +mlxsw_sp_port_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + + memcpy(stats, mlxsw_sp_port->hw_stats.cache, sizeof(*stats)); + return stats; } @@ -1209,6 +1310,8 @@ static const struct net_device_ops mlxsw_sp_port_netdev_ops = { .ndo_set_mac_address = mlxsw_sp_port_set_mac_address, .ndo_change_mtu = mlxsw_sp_port_change_mtu, .ndo_get_stats64 = mlxsw_sp_port_get_stats64, + .ndo_has_offload_stats = mlxsw_sp_port_has_offload_stats, + .ndo_get_offload_stats = mlxsw_sp_port_get_offload_stats, .ndo_vlan_rx_add_vid = mlxsw_sp_port_add_vid, .ndo_vlan_rx_kill_vid = mlxsw_sp_port_kill_vid, .ndo_neigh_construct = mlxsw_sp_router_neigh_construct, @@ -1547,8 +1650,6 @@ static void __mlxsw_sp_port_get_stats(struct net_device *dev, enum mlxsw_reg_ppcnt_grp grp, int prio, u64 *data, int data_index) { - struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct mlxsw_sp_port_hw_stats *hw_stats; char ppcnt_pl[MLXSW_REG_PPCNT_LEN]; int i, len; @@ -1557,8 +1658,7 @@ static void __mlxsw_sp_port_get_stats(struct net_device *dev, err = mlxsw_sp_get_hw_stats_by_group(&hw_stats, &len, grp); if (err) return; - mlxsw_reg_ppcnt_pack(ppcnt_pl, mlxsw_sp_port->local_port, grp, prio); - err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ppcnt), ppcnt_pl); + mlxsw_sp_port_get_stats_raw(dev, grp, prio, ppcnt_pl); for (i = 0; i < len; i++) data[data_index + i] = !err ? hw_stats[i].getter(ppcnt_pl) : 0; } @@ -2145,6 +2245,16 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, goto err_alloc_stats; } + mlxsw_sp_port->hw_stats.cache = + kzalloc(sizeof(*mlxsw_sp_port->hw_stats.cache), GFP_KERNEL); + + if (!mlxsw_sp_port->hw_stats.cache) { + err = -ENOMEM; + goto err_alloc_hw_stats; + } + INIT_DELAYED_WORK(&mlxsw_sp_port->hw_stats.update_dw, + &update_stats_cache); + dev->netdev_ops = &mlxsw_sp_port_netdev_ops; dev->ethtool_ops = &mlxsw_sp_port_ethtool_ops; @@ -2245,6 +2355,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, goto err_core_port_init; } + mlxsw_core_schedule_dw(&mlxsw_sp_port->hw_stats.update_dw, 0); return 0; err_core_port_init: @@ -2265,6 +2376,8 @@ err_port_system_port_mapping_set: err_dev_addr_init: mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT); err_port_swid_set: + kfree(mlxsw_sp_port->hw_stats.cache); +err_alloc_hw_stats: free_percpu(mlxsw_sp_port->pcpu_stats); err_alloc_stats: kfree(mlxsw_sp_port->untagged_vlans); @@ -2281,6 +2394,7 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) if (!mlxsw_sp_port) return; + cancel_delayed_work_sync(&mlxsw_sp_port->hw_stats.update_dw); mlxsw_core_port_fini(&mlxsw_sp_port->core_port); unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */ mlxsw_sp->ports[local_port] = NULL; @@ -2290,6 +2404,7 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT); mlxsw_sp_port_module_unmap(mlxsw_sp, mlxsw_sp_port->local_port); free_percpu(mlxsw_sp_port->pcpu_stats); + kfree(mlxsw_sp_port->hw_stats.cache); kfree(mlxsw_sp_port->untagged_vlans); kfree(mlxsw_sp_port->active_vlans); WARN_ON_ONCE(!list_empty(&mlxsw_sp_port->vports_list)); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 969c250b3048..49f4cafce148 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -361,6 +361,11 @@ struct mlxsw_sp_port { struct list_head vports_list; /* TC handles */ struct list_head mall_tc_list; + struct { + #define MLXSW_HW_STATS_UPDATE_TIME HZ + struct rtnl_link_stats64 *cache; + struct delayed_work update_dw; + } hw_stats; }; struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev); From d409b84768037ad03d1d73538d99fb902adf7365 Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Fri, 16 Sep 2016 12:59:08 -0700 Subject: [PATCH 1146/1715] ipv6: Export p6_route_input_lookup symbol Make ip6_route_input_lookup available outside of ipv6 the module similar to ip_route_input_noref in the IPv4 world. Signed-off-by: Mahesh Bandewar Signed-off-by: David S. Miller --- include/net/ip6_route.h | 3 +++ net/ipv6/route.c | 7 ++++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index d97305d0e71f..e0cd318d5103 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -64,6 +64,9 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr) } void ip6_route_input(struct sk_buff *skb); +struct dst_entry *ip6_route_input_lookup(struct net *net, + struct net_device *dev, + struct flowi6 *fl6, int flags); struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, struct flowi6 *fl6, int flags); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ad4a7ff301fc..4dab585f7642 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1147,15 +1147,16 @@ static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table * return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags); } -static struct dst_entry *ip6_route_input_lookup(struct net *net, - struct net_device *dev, - struct flowi6 *fl6, int flags) +struct dst_entry *ip6_route_input_lookup(struct net *net, + struct net_device *dev, + struct flowi6 *fl6, int flags) { if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG) flags |= RT6_LOOKUP_F_IFACE; return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input); } +EXPORT_SYMBOL_GPL(ip6_route_input_lookup); void ip6_route_input(struct sk_buff *skb) { From e8bffe0cf964f0330595bb376b74921cccdaac88 Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Fri, 16 Sep 2016 12:59:13 -0700 Subject: [PATCH 1147/1715] net: Add _nf_(un)register_hooks symbols Add _nf_register_hooks() and _nf_unregister_hooks() calls which allow caller to hold RTNL mutex. Signed-off-by: Mahesh Bandewar CC: Pablo Neira Ayuso Signed-off-by: David S. Miller --- include/linux/netfilter.h | 2 ++ net/netfilter/core.c | 51 +++++++++++++++++++++++++++++++++++---- 2 files changed, 48 insertions(+), 5 deletions(-) diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 9230f9aee896..e82b76781bf6 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -133,6 +133,8 @@ int nf_register_hook(struct nf_hook_ops *reg); void nf_unregister_hook(struct nf_hook_ops *reg); int nf_register_hooks(struct nf_hook_ops *reg, unsigned int n); void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n); +int _nf_register_hooks(struct nf_hook_ops *reg, unsigned int n); +void _nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n); /* Functions to register get/setsockopt ranges (non-inclusive). You need to check permissions yourself! */ diff --git a/net/netfilter/core.c b/net/netfilter/core.c index f39276d1c2d7..2c5327e43a88 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -188,19 +188,17 @@ EXPORT_SYMBOL(nf_unregister_net_hooks); static LIST_HEAD(nf_hook_list); -int nf_register_hook(struct nf_hook_ops *reg) +static int _nf_register_hook(struct nf_hook_ops *reg) { struct net *net, *last; int ret; - rtnl_lock(); for_each_net(net) { ret = nf_register_net_hook(net, reg); if (ret && ret != -ENOENT) goto rollback; } list_add_tail(®->list, &nf_hook_list); - rtnl_unlock(); return 0; rollback: @@ -210,19 +208,34 @@ rollback: break; nf_unregister_net_hook(net, reg); } + return ret; +} + +int nf_register_hook(struct nf_hook_ops *reg) +{ + int ret; + + rtnl_lock(); + ret = _nf_register_hook(reg); rtnl_unlock(); + return ret; } EXPORT_SYMBOL(nf_register_hook); -void nf_unregister_hook(struct nf_hook_ops *reg) +static void _nf_unregister_hook(struct nf_hook_ops *reg) { struct net *net; - rtnl_lock(); list_del(®->list); for_each_net(net) nf_unregister_net_hook(net, reg); +} + +void nf_unregister_hook(struct nf_hook_ops *reg) +{ + rtnl_lock(); + _nf_unregister_hook(reg); rtnl_unlock(); } EXPORT_SYMBOL(nf_unregister_hook); @@ -246,6 +259,26 @@ err: } EXPORT_SYMBOL(nf_register_hooks); +/* Caller MUST take rtnl_lock() */ +int _nf_register_hooks(struct nf_hook_ops *reg, unsigned int n) +{ + unsigned int i; + int err = 0; + + for (i = 0; i < n; i++) { + err = _nf_register_hook(®[i]); + if (err) + goto err; + } + return err; + +err: + if (i > 0) + _nf_unregister_hooks(reg, i); + return err; +} +EXPORT_SYMBOL(_nf_register_hooks); + void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n) { while (n-- > 0) @@ -253,6 +286,14 @@ void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n) } EXPORT_SYMBOL(nf_unregister_hooks); +/* Caller MUST take rtnl_lock */ +void _nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n) +{ + while (n-- > 0) + _nf_unregister_hook(®[n]); +} +EXPORT_SYMBOL(_nf_unregister_hooks); + unsigned int nf_iterate(struct list_head *head, struct sk_buff *skb, struct nf_hook_state *state, From 4fbae7d83c98c30efcf0a2a2ac55fbb75ef5a1a5 Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Fri, 16 Sep 2016 12:59:19 -0700 Subject: [PATCH 1148/1715] ipvlan: Introduce l3s mode In a typical IPvlan L3 setup where master is in default-ns and each slave is into different (slave) ns. In this setup egress packet processing for traffic originating from slave-ns will hit all NF_HOOKs in slave-ns as well as default-ns. However same is not true for ingress processing. All these NF_HOOKs are hit only in the slave-ns skipping them in the default-ns. IPvlan in L3 mode is restrictive and if admins want to deploy iptables rules in default-ns, this asymmetric data path makes it impossible to do so. This patch makes use of the l3_rcv() (added as part of l3mdev enhancements) to perform input route lookup on RX packets without changing the skb->dev and then uses nf_hook at NF_INET_LOCAL_IN to change the skb->dev just before handing over skb to L4. Signed-off-by: Mahesh Bandewar CC: David Ahern Reviewed-by: David Ahern Signed-off-by: David S. Miller --- Documentation/networking/ipvlan.txt | 7 ++- drivers/net/Kconfig | 1 + drivers/net/ipvlan/ipvlan.h | 6 ++ drivers/net/ipvlan/ipvlan_core.c | 94 +++++++++++++++++++++++++++++ drivers/net/ipvlan/ipvlan_main.c | 87 +++++++++++++++++++++++--- include/uapi/linux/if_link.h | 1 + 6 files changed, 188 insertions(+), 8 deletions(-) diff --git a/Documentation/networking/ipvlan.txt b/Documentation/networking/ipvlan.txt index 14422f8fcdc4..24196cef7c91 100644 --- a/Documentation/networking/ipvlan.txt +++ b/Documentation/networking/ipvlan.txt @@ -22,7 +22,7 @@ The driver can be built into the kernel (CONFIG_IPVLAN=y) or as a module There are no module parameters for this driver and it can be configured using IProute2/ip utility. - ip link add link type ipvlan mode { l2 | L3 } + ip link add link type ipvlan mode { l2 | l3 | l3s } e.g. ip link add link ipvl0 eth0 type ipvlan mode l2 @@ -48,6 +48,11 @@ master device for the L2 processing and routing from that instance will be used before packets are queued on the outbound device. In this mode the slaves will not receive nor can send multicast / broadcast traffic. +4.3 L3S mode: + This is very similar to the L3 mode except that iptables (conn-tracking) +works in this mode and hence it is L3-symmetric (L3s). This will have slightly less +performance but that shouldn't matter since you are choosing this mode over plain-L3 +mode to make conn-tracking work. 5. What to choose (macvlan vs. ipvlan)? These two devices are very similar in many regards and the specific use diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 0c5415b05ea9..8768a625350d 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -149,6 +149,7 @@ config IPVLAN tristate "IP-VLAN support" depends on INET depends on IPV6 + depends on NET_L3_MASTER_DEV ---help--- This allows one to create virtual devices off of a main interface and packets will be delivered based on the dest L3 (IPv6/IPv4 addr) diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h index 695a5dc9ace3..7e0732f5ea07 100644 --- a/drivers/net/ipvlan/ipvlan.h +++ b/drivers/net/ipvlan/ipvlan.h @@ -23,11 +23,13 @@ #include #include #include +#include #include #include #include #include #include +#include #define IPVLAN_DRV "ipvlan" #define IPV_DRV_VER "0.1" @@ -124,4 +126,8 @@ struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan, const void *iaddr, bool is_v6); bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6); void ipvlan_ht_addr_del(struct ipvl_addr *addr); +struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb, + u16 proto); +unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state); #endif /* __IPVLAN_H */ diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index b5f9511d819e..b4e990743e1d 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -560,6 +560,7 @@ int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev) case IPVLAN_MODE_L2: return ipvlan_xmit_mode_l2(skb, dev); case IPVLAN_MODE_L3: + case IPVLAN_MODE_L3S: return ipvlan_xmit_mode_l3(skb, dev); } @@ -664,6 +665,8 @@ rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb) return ipvlan_handle_mode_l2(pskb, port); case IPVLAN_MODE_L3: return ipvlan_handle_mode_l3(pskb, port); + case IPVLAN_MODE_L3S: + return RX_HANDLER_PASS; } /* Should not reach here */ @@ -672,3 +675,94 @@ rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb) kfree_skb(skb); return RX_HANDLER_CONSUMED; } + +static struct ipvl_addr *ipvlan_skb_to_addr(struct sk_buff *skb, + struct net_device *dev) +{ + struct ipvl_addr *addr = NULL; + struct ipvl_port *port; + void *lyr3h; + int addr_type; + + if (!dev || !netif_is_ipvlan_port(dev)) + goto out; + + port = ipvlan_port_get_rcu(dev); + if (!port || port->mode != IPVLAN_MODE_L3S) + goto out; + + lyr3h = ipvlan_get_L3_hdr(skb, &addr_type); + if (!lyr3h) + goto out; + + addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true); +out: + return addr; +} + +struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb, + u16 proto) +{ + struct ipvl_addr *addr; + struct net_device *sdev; + + addr = ipvlan_skb_to_addr(skb, dev); + if (!addr) + goto out; + + sdev = addr->master->dev; + switch (proto) { + case AF_INET: + { + int err; + struct iphdr *ip4h = ip_hdr(skb); + + err = ip_route_input_noref(skb, ip4h->daddr, ip4h->saddr, + ip4h->tos, sdev); + if (unlikely(err)) + goto out; + break; + } + case AF_INET6: + { + struct dst_entry *dst; + struct ipv6hdr *ip6h = ipv6_hdr(skb); + int flags = RT6_LOOKUP_F_HAS_SADDR; + struct flowi6 fl6 = { + .flowi6_iif = sdev->ifindex, + .daddr = ip6h->daddr, + .saddr = ip6h->saddr, + .flowlabel = ip6_flowinfo(ip6h), + .flowi6_mark = skb->mark, + .flowi6_proto = ip6h->nexthdr, + }; + + skb_dst_drop(skb); + dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6, flags); + skb_dst_set(skb, dst); + break; + } + default: + break; + } + +out: + return skb; +} + +unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct ipvl_addr *addr; + unsigned int len; + + addr = ipvlan_skb_to_addr(skb, skb->dev); + if (!addr) + goto out; + + skb->dev = addr->master->dev; + len = skb->len + ETH_HLEN; + ipvlan_count_rx(addr->master, len, true, false); +out: + return NF_ACCEPT; +} diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 18b4e8c7f68a..f442eb366863 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -9,24 +9,87 @@ #include "ipvlan.h" +static u32 ipvl_nf_hook_refcnt = 0; + +static struct nf_hook_ops ipvl_nfops[] __read_mostly = { + { + .hook = ipvlan_nf_input, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_IN, + .priority = INT_MAX, + }, + { + .hook = ipvlan_nf_input, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_LOCAL_IN, + .priority = INT_MAX, + }, +}; + +static struct l3mdev_ops ipvl_l3mdev_ops __read_mostly = { + .l3mdev_l3_rcv = ipvlan_l3_rcv, +}; + static void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev) { ipvlan->dev->mtu = dev->mtu - ipvlan->mtu_adj; } -static void ipvlan_set_port_mode(struct ipvl_port *port, u16 nval) +static int ipvlan_register_nf_hook(void) +{ + int err = 0; + + if (!ipvl_nf_hook_refcnt) { + err = _nf_register_hooks(ipvl_nfops, ARRAY_SIZE(ipvl_nfops)); + if (!err) + ipvl_nf_hook_refcnt = 1; + } else { + ipvl_nf_hook_refcnt++; + } + + return err; +} + +static void ipvlan_unregister_nf_hook(void) +{ + WARN_ON(!ipvl_nf_hook_refcnt); + + ipvl_nf_hook_refcnt--; + if (!ipvl_nf_hook_refcnt) + _nf_unregister_hooks(ipvl_nfops, ARRAY_SIZE(ipvl_nfops)); +} + +static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval) { struct ipvl_dev *ipvlan; + struct net_device *mdev = port->dev; + int err = 0; + ASSERT_RTNL(); if (port->mode != nval) { + if (nval == IPVLAN_MODE_L3S) { + /* New mode is L3S */ + err = ipvlan_register_nf_hook(); + if (!err) { + mdev->l3mdev_ops = &ipvl_l3mdev_ops; + mdev->priv_flags |= IFF_L3MDEV_MASTER; + } else + return err; + } else if (port->mode == IPVLAN_MODE_L3S) { + /* Old mode was L3S */ + mdev->priv_flags &= ~IFF_L3MDEV_MASTER; + ipvlan_unregister_nf_hook(); + mdev->l3mdev_ops = NULL; + } list_for_each_entry(ipvlan, &port->ipvlans, pnode) { - if (nval == IPVLAN_MODE_L3) + if (nval == IPVLAN_MODE_L3 || nval == IPVLAN_MODE_L3S) ipvlan->dev->flags |= IFF_NOARP; else ipvlan->dev->flags &= ~IFF_NOARP; } port->mode = nval; } + return err; } static int ipvlan_port_create(struct net_device *dev) @@ -74,6 +137,11 @@ static void ipvlan_port_destroy(struct net_device *dev) struct ipvl_port *port = ipvlan_port_get_rtnl(dev); dev->priv_flags &= ~IFF_IPVLAN_MASTER; + if (port->mode == IPVLAN_MODE_L3S) { + dev->priv_flags &= ~IFF_L3MDEV_MASTER; + ipvlan_unregister_nf_hook(); + dev->l3mdev_ops = NULL; + } netdev_rx_handler_unregister(dev); cancel_work_sync(&port->wq); __skb_queue_purge(&port->backlog); @@ -132,7 +200,8 @@ static int ipvlan_open(struct net_device *dev) struct net_device *phy_dev = ipvlan->phy_dev; struct ipvl_addr *addr; - if (ipvlan->port->mode == IPVLAN_MODE_L3) + if (ipvlan->port->mode == IPVLAN_MODE_L3 || + ipvlan->port->mode == IPVLAN_MODE_L3S) dev->flags |= IFF_NOARP; else dev->flags &= ~IFF_NOARP; @@ -372,13 +441,14 @@ static int ipvlan_nl_changelink(struct net_device *dev, { struct ipvl_dev *ipvlan = netdev_priv(dev); struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev); + int err = 0; if (data && data[IFLA_IPVLAN_MODE]) { u16 nmode = nla_get_u16(data[IFLA_IPVLAN_MODE]); - ipvlan_set_port_mode(port, nmode); + err = ipvlan_set_port_mode(port, nmode); } - return 0; + return err; } static size_t ipvlan_nl_getsize(const struct net_device *dev) @@ -473,10 +543,13 @@ static int ipvlan_link_new(struct net *src_net, struct net_device *dev, unregister_netdevice(dev); return err; } + err = ipvlan_set_port_mode(port, mode); + if (err) { + unregister_netdevice(dev); + return err; + } list_add_tail_rcu(&ipvlan->pnode, &port->ipvlans); - ipvlan_set_port_mode(port, mode); - netif_stacked_transfer_operstate(phy_dev, dev); return 0; } diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 2351776a724f..7ec9e99d5491 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -464,6 +464,7 @@ enum { enum ipvlan_mode { IPVLAN_MODE_L2 = 0, IPVLAN_MODE_L3, + IPVLAN_MODE_L3S, IPVLAN_MODE_MAX }; From 95357907ae73a8039c2106897ee2694f26ac3caf Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 16 Sep 2016 22:36:12 +0200 Subject: [PATCH 1149/1715] mlx4: fix XDP_TX is acting like XDP_PASS on TX ring full The XDP_TX action can fail transmitting the frame in case the TX ring is full or port is down. In case of TX failure it should drop the frame, and not as now call 'break' which is the same as XDP_PASS. Fixes: 9ecc2d86171a ("net/mlx4_en: add xdp forwarding and data write support") Signed-off-by: Jesper Dangaard Brouer Reviewed-by: Brenden Blanco Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 6758292311f4..c80073e4947f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -906,7 +906,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud length, tx_index, &doorbell_pending)) goto consumed; - break; + goto next; /* Drop on xmit failure */ default: bpf_warn_invalid_xdp_action(act); case XDP_ABORTED: From e8bc8f9a670e26e91562e724a2114243898bd616 Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Fri, 16 Sep 2016 23:05:35 +0200 Subject: [PATCH 1150/1715] sctp: Remove some redundant code In commit 311b21774f13 ("sctp: simplify sk_receive_queue locking"), a call to 'skb_queue_splice_tail_init()' has been made explicit. Previously it was hidden in 'sctp_skb_list_tail()' Now, the code around it looks redundant. The '_init()' part of 'skb_queue_splice_tail_init()' should already do the same. Signed-off-by: Christophe JAILLET Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/ulpqueue.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index 877e55066f89..84d0fdaf7de9 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -140,11 +140,8 @@ int sctp_clear_pd(struct sock *sk, struct sctp_association *asoc) * we can go ahead and clear out the lobby in one shot */ if (!skb_queue_empty(&sp->pd_lobby)) { - struct list_head *list; skb_queue_splice_tail_init(&sp->pd_lobby, &sk->sk_receive_queue); - list = (struct list_head *)&sctp_sk(sk)->pd_lobby; - INIT_LIST_HEAD(list); return 1; } } else { From 0fbc81b3ad513fecaaf62b48f42b89fcd57f7682 Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Sat, 17 Sep 2016 08:12:39 +0530 Subject: [PATCH 1151/1715] chcr/cxgb4i/cxgbit/RDMA/cxgb4: Allocate resources dynamically for all cxgb4 ULD's Allocate resources dynamically to cxgb4's Upper layer driver's(ULD) like cxgbit, iw_cxgb4 and cxgb4i. Allocate resources when they register with cxgb4 driver and free them while unregistering. All the queues and the interrupts for them will be allocated during ULD probe only and freed during remove. Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/crypto/chelsio/chcr_core.c | 10 +- drivers/infiniband/hw/cxgb4/device.c | 4 + drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 47 +- .../ethernet/chelsio/cxgb4/cxgb4_debugfs.c | 127 +--- .../net/ethernet/chelsio/cxgb4/cxgb4_main.c | 611 ++++-------------- .../net/ethernet/chelsio/cxgb4/cxgb4_uld.c | 223 +++++-- .../net/ethernet/chelsio/cxgb4/cxgb4_uld.h | 31 +- drivers/net/ethernet/chelsio/cxgb4/sge.c | 18 +- drivers/scsi/cxgbi/cxgb4i/cxgb4i.c | 3 + drivers/target/iscsi/cxgbit/cxgbit_main.c | 3 + 10 files changed, 384 insertions(+), 693 deletions(-) diff --git a/drivers/crypto/chelsio/chcr_core.c b/drivers/crypto/chelsio/chcr_core.c index 2f6156b672ce..fb5f9bbfa09c 100644 --- a/drivers/crypto/chelsio/chcr_core.c +++ b/drivers/crypto/chelsio/chcr_core.c @@ -39,12 +39,10 @@ static chcr_handler_func work_handlers[NUM_CPL_CMDS] = { [CPL_FW6_PLD] = cpl_fw6_pld_handler, }; -static struct cxgb4_pci_uld_info chcr_uld_info = { +static struct cxgb4_uld_info chcr_uld_info = { .name = DRV_MODULE_NAME, - .nrxq = 4, + .nrxq = MAX_ULD_QSETS, .rxq_size = 1024, - .nciq = 0, - .ciq_size = 0, .add = chcr_uld_add, .state_change = chcr_uld_state_change, .rx_handler = chcr_uld_rx_handler, @@ -205,7 +203,7 @@ static int chcr_uld_state_change(void *handle, enum cxgb4_state state) static int __init chcr_crypto_init(void) { - if (cxgb4_register_pci_uld(CXGB4_PCI_ULD1, &chcr_uld_info)) { + if (cxgb4_register_uld(CXGB4_ULD_CRYPTO, &chcr_uld_info)) { pr_err("ULD register fail: No chcr crypto support in cxgb4"); return -1; } @@ -228,7 +226,7 @@ static void __exit chcr_crypto_exit(void) kfree(u_ctx); } mutex_unlock(&dev_mutex); - cxgb4_unregister_pci_uld(CXGB4_PCI_ULD1); + cxgb4_unregister_uld(CXGB4_ULD_CRYPTO); } module_init(chcr_crypto_init); diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 071d7332ec06..f170b63e6eb9 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -1475,6 +1475,10 @@ static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...) static struct cxgb4_uld_info c4iw_uld_info = { .name = DRV_NAME, + .nrxq = MAX_ULD_QSETS, + .rxq_size = 511, + .ciq = true, + .lro = false, .add = c4iw_uld_add, .rx_handler = c4iw_uld_rx_handler, .state_change = c4iw_uld_state_change, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 45955691edc7..1f9867db3b78 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -437,11 +437,6 @@ enum { MAX_ETH_QSETS = 32, /* # of Ethernet Tx/Rx queue sets */ MAX_OFLD_QSETS = 16, /* # of offload Tx, iscsi Rx queue sets */ MAX_CTRL_QUEUES = NCHAN, /* # of control Tx queues */ - MAX_RDMA_QUEUES = NCHAN, /* # of streaming RDMA Rx queues */ - MAX_RDMA_CIQS = 32, /* # of RDMA concentrator IQs */ - - /* # of streaming iSCSIT Rx queues */ - MAX_ISCSIT_QUEUES = MAX_OFLD_QSETS, }; enum { @@ -458,8 +453,7 @@ enum { enum { INGQ_EXTRAS = 2, /* firmware event queue and */ /* forwarded interrupts */ - MAX_INGQ = MAX_ETH_QSETS + MAX_OFLD_QSETS + MAX_RDMA_QUEUES + - MAX_RDMA_CIQS + MAX_ISCSIT_QUEUES + INGQ_EXTRAS, + MAX_INGQ = MAX_ETH_QSETS + INGQ_EXTRAS, }; struct adapter; @@ -704,10 +698,6 @@ struct sge { struct sge_ctrl_txq ctrlq[MAX_CTRL_QUEUES]; struct sge_eth_rxq ethrxq[MAX_ETH_QSETS]; - struct sge_ofld_rxq iscsirxq[MAX_OFLD_QSETS]; - struct sge_ofld_rxq iscsitrxq[MAX_ISCSIT_QUEUES]; - struct sge_ofld_rxq rdmarxq[MAX_RDMA_QUEUES]; - struct sge_ofld_rxq rdmaciq[MAX_RDMA_CIQS]; struct sge_rspq fw_evtq ____cacheline_aligned_in_smp; struct sge_uld_rxq_info **uld_rxq_info; @@ -717,15 +707,8 @@ struct sge { u16 max_ethqsets; /* # of available Ethernet queue sets */ u16 ethqsets; /* # of active Ethernet queue sets */ u16 ethtxq_rover; /* Tx queue to clean up next */ - u16 iscsiqsets; /* # of active iSCSI queue sets */ - u16 niscsitq; /* # of available iSCST Rx queues */ - u16 rdmaqs; /* # of available RDMA Rx queues */ - u16 rdmaciqs; /* # of available RDMA concentrator IQs */ + u16 ofldqsets; /* # of active ofld queue sets */ u16 nqs_per_uld; /* # of Rx queues per ULD */ - u16 iscsi_rxq[MAX_OFLD_QSETS]; - u16 iscsit_rxq[MAX_ISCSIT_QUEUES]; - u16 rdma_rxq[MAX_RDMA_QUEUES]; - u16 rdma_ciq[MAX_RDMA_CIQS]; u16 timer_val[SGE_NTIMERS]; u8 counter_val[SGE_NCOUNTERS]; u32 fl_pg_order; /* large page allocation size */ @@ -749,10 +732,7 @@ struct sge { }; #define for_each_ethrxq(sge, i) for (i = 0; i < (sge)->ethqsets; i++) -#define for_each_iscsirxq(sge, i) for (i = 0; i < (sge)->iscsiqsets; i++) -#define for_each_iscsitrxq(sge, i) for (i = 0; i < (sge)->niscsitq; i++) -#define for_each_rdmarxq(sge, i) for (i = 0; i < (sge)->rdmaqs; i++) -#define for_each_rdmaciq(sge, i) for (i = 0; i < (sge)->rdmaciqs; i++) +#define for_each_ofldtxq(sge, i) for (i = 0; i < (sge)->ofldqsets; i++) struct l2t_data; @@ -786,6 +766,7 @@ struct uld_msix_bmap { struct uld_msix_info { unsigned short vec; char desc[IFNAMSIZ + 10]; + unsigned int idx; }; struct vf_info { @@ -818,7 +799,7 @@ struct adapter { } msix_info[MAX_INGQ + 1]; struct uld_msix_info *msix_info_ulds; /* msix info for uld's */ struct uld_msix_bmap msix_bmap_ulds; /* msix bitmap for all uld */ - unsigned int msi_idx; + int msi_idx; struct doorbell_stats db_stats; struct sge sge; @@ -836,9 +817,10 @@ struct adapter { unsigned int clipt_start; unsigned int clipt_end; struct clip_tbl *clipt; - struct cxgb4_pci_uld_info *uld; + struct cxgb4_uld_info *uld; void *uld_handle[CXGB4_ULD_MAX]; unsigned int num_uld; + unsigned int num_ofld_uld; struct list_head list_node; struct list_head rcu_node; struct list_head mac_hlist; /* list of MAC addresses in MPS Hash */ @@ -858,6 +840,8 @@ struct adapter { #define T4_OS_LOG_MBOX_CMDS 256 struct mbox_cmd_log *mbox_log; + struct mutex uld_mutex; + struct dentry *debugfs_root; bool use_bd; /* Use SGE Back Door intfc for reading SGE Contexts */ bool trace_rss; /* 1 implies that different RSS flit per filter is @@ -1051,6 +1035,11 @@ static inline int is_pci_uld(const struct adapter *adap) return adap->params.crypto; } +static inline int is_uld(const struct adapter *adap) +{ + return (adap->params.offload || adap->params.crypto); +} + static inline u32 t4_read_reg(struct adapter *adap, u32 reg_addr) { return readl(adap->regs + reg_addr); @@ -1277,6 +1266,8 @@ int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq, int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq, struct net_device *dev, unsigned int iqid, unsigned int cmplqid); +int t4_sge_mod_ctrl_txq(struct adapter *adap, unsigned int eqid, + unsigned int cmplqid); int t4_sge_alloc_ofld_txq(struct adapter *adap, struct sge_ofld_txq *txq, struct net_device *dev, unsigned int iqid); irqreturn_t t4_sge_intr_msix(int irq, void *cookie); @@ -1635,7 +1626,9 @@ void t4_idma_monitor(struct adapter *adapter, int hz, int ticks); int t4_set_vf_mac_acl(struct adapter *adapter, unsigned int vf, unsigned int naddr, u8 *addr); -void uld_mem_free(struct adapter *adap); -int uld_mem_alloc(struct adapter *adap); +void t4_uld_mem_free(struct adapter *adap); +int t4_uld_mem_alloc(struct adapter *adap); +void t4_uld_clean_up(struct adapter *adap); +void t4_register_netevent_notifier(void); void free_rspq_fl(struct adapter *adap, struct sge_rspq *rq, struct sge_fl *fl); #endif /* __CXGB4_H__ */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c index 91fb50850fff..52be9a4ef97a 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c @@ -2432,17 +2432,11 @@ static int sge_qinfo_show(struct seq_file *seq, void *v) { struct adapter *adap = seq->private; int eth_entries = DIV_ROUND_UP(adap->sge.ethqsets, 4); - int iscsi_entries = DIV_ROUND_UP(adap->sge.iscsiqsets, 4); - int iscsit_entries = DIV_ROUND_UP(adap->sge.niscsitq, 4); - int rdma_entries = DIV_ROUND_UP(adap->sge.rdmaqs, 4); - int ciq_entries = DIV_ROUND_UP(adap->sge.rdmaciqs, 4); + int ofld_entries = DIV_ROUND_UP(adap->sge.ofldqsets, 4); int ctrl_entries = DIV_ROUND_UP(MAX_CTRL_QUEUES, 4); int i, r = (uintptr_t)v - 1; - int iscsi_idx = r - eth_entries; - int iscsit_idx = iscsi_idx - iscsi_entries; - int rdma_idx = iscsit_idx - iscsit_entries; - int ciq_idx = rdma_idx - rdma_entries; - int ctrl_idx = ciq_idx - ciq_entries; + int ofld_idx = r - eth_entries; + int ctrl_idx = ofld_idx - ofld_entries; int fq_idx = ctrl_idx - ctrl_entries; if (r) @@ -2518,119 +2512,17 @@ do { \ RL("FLLow:", fl.low); RL("FLStarving:", fl.starving); - } else if (iscsi_idx < iscsi_entries) { - const struct sge_ofld_rxq *rx = - &adap->sge.iscsirxq[iscsi_idx * 4]; + } else if (ofld_idx < ofld_entries) { const struct sge_ofld_txq *tx = - &adap->sge.ofldtxq[iscsi_idx * 4]; - int n = min(4, adap->sge.iscsiqsets - 4 * iscsi_idx); + &adap->sge.ofldtxq[ofld_idx * 4]; + int n = min(4, adap->sge.ofldqsets - 4 * ofld_idx); - S("QType:", "iSCSI"); + S("QType:", "OFLD-Txq"); T("TxQ ID:", q.cntxt_id); T("TxQ size:", q.size); T("TxQ inuse:", q.in_use); T("TxQ CIDX:", q.cidx); T("TxQ PIDX:", q.pidx); - R("RspQ ID:", rspq.abs_id); - R("RspQ size:", rspq.size); - R("RspQE size:", rspq.iqe_len); - R("RspQ CIDX:", rspq.cidx); - R("RspQ Gen:", rspq.gen); - S3("u", "Intr delay:", qtimer_val(adap, &rx[i].rspq)); - S3("u", "Intr pktcnt:", - adap->sge.counter_val[rx[i].rspq.pktcnt_idx]); - R("FL ID:", fl.cntxt_id); - R("FL size:", fl.size - 8); - R("FL pend:", fl.pend_cred); - R("FL avail:", fl.avail); - R("FL PIDX:", fl.pidx); - R("FL CIDX:", fl.cidx); - RL("RxPackets:", stats.pkts); - RL("RxImmPkts:", stats.imm); - RL("RxNoMem:", stats.nomem); - RL("FLAllocErr:", fl.alloc_failed); - RL("FLLrgAlcErr:", fl.large_alloc_failed); - RL("FLMapErr:", fl.mapping_err); - RL("FLLow:", fl.low); - RL("FLStarving:", fl.starving); - - } else if (iscsit_idx < iscsit_entries) { - const struct sge_ofld_rxq *rx = - &adap->sge.iscsitrxq[iscsit_idx * 4]; - int n = min(4, adap->sge.niscsitq - 4 * iscsit_idx); - - S("QType:", "iSCSIT"); - R("RspQ ID:", rspq.abs_id); - R("RspQ size:", rspq.size); - R("RspQE size:", rspq.iqe_len); - R("RspQ CIDX:", rspq.cidx); - R("RspQ Gen:", rspq.gen); - S3("u", "Intr delay:", qtimer_val(adap, &rx[i].rspq)); - S3("u", "Intr pktcnt:", - adap->sge.counter_val[rx[i].rspq.pktcnt_idx]); - R("FL ID:", fl.cntxt_id); - R("FL size:", fl.size - 8); - R("FL pend:", fl.pend_cred); - R("FL avail:", fl.avail); - R("FL PIDX:", fl.pidx); - R("FL CIDX:", fl.cidx); - RL("RxPackets:", stats.pkts); - RL("RxImmPkts:", stats.imm); - RL("RxNoMem:", stats.nomem); - RL("FLAllocErr:", fl.alloc_failed); - RL("FLLrgAlcErr:", fl.large_alloc_failed); - RL("FLMapErr:", fl.mapping_err); - RL("FLLow:", fl.low); - RL("FLStarving:", fl.starving); - - } else if (rdma_idx < rdma_entries) { - const struct sge_ofld_rxq *rx = - &adap->sge.rdmarxq[rdma_idx * 4]; - int n = min(4, adap->sge.rdmaqs - 4 * rdma_idx); - - S("QType:", "RDMA-CPL"); - S("Interface:", - rx[i].rspq.netdev ? rx[i].rspq.netdev->name : "N/A"); - R("RspQ ID:", rspq.abs_id); - R("RspQ size:", rspq.size); - R("RspQE size:", rspq.iqe_len); - R("RspQ CIDX:", rspq.cidx); - R("RspQ Gen:", rspq.gen); - S3("u", "Intr delay:", qtimer_val(adap, &rx[i].rspq)); - S3("u", "Intr pktcnt:", - adap->sge.counter_val[rx[i].rspq.pktcnt_idx]); - R("FL ID:", fl.cntxt_id); - R("FL size:", fl.size - 8); - R("FL pend:", fl.pend_cred); - R("FL avail:", fl.avail); - R("FL PIDX:", fl.pidx); - R("FL CIDX:", fl.cidx); - RL("RxPackets:", stats.pkts); - RL("RxImmPkts:", stats.imm); - RL("RxNoMem:", stats.nomem); - RL("FLAllocErr:", fl.alloc_failed); - RL("FLLrgAlcErr:", fl.large_alloc_failed); - RL("FLMapErr:", fl.mapping_err); - RL("FLLow:", fl.low); - RL("FLStarving:", fl.starving); - - } else if (ciq_idx < ciq_entries) { - const struct sge_ofld_rxq *rx = &adap->sge.rdmaciq[ciq_idx * 4]; - int n = min(4, adap->sge.rdmaciqs - 4 * ciq_idx); - - S("QType:", "RDMA-CIQ"); - S("Interface:", - rx[i].rspq.netdev ? rx[i].rspq.netdev->name : "N/A"); - R("RspQ ID:", rspq.abs_id); - R("RspQ size:", rspq.size); - R("RspQE size:", rspq.iqe_len); - R("RspQ CIDX:", rspq.cidx); - R("RspQ Gen:", rspq.gen); - S3("u", "Intr delay:", qtimer_val(adap, &rx[i].rspq)); - S3("u", "Intr pktcnt:", - adap->sge.counter_val[rx[i].rspq.pktcnt_idx]); - RL("RxAN:", stats.an); - RL("RxNoMem:", stats.nomem); } else if (ctrl_idx < ctrl_entries) { const struct sge_ctrl_txq *tx = &adap->sge.ctrlq[ctrl_idx * 4]; @@ -2672,10 +2564,7 @@ do { \ static int sge_queue_entries(const struct adapter *adap) { return DIV_ROUND_UP(adap->sge.ethqsets, 4) + - DIV_ROUND_UP(adap->sge.iscsiqsets, 4) + - DIV_ROUND_UP(adap->sge.niscsitq, 4) + - DIV_ROUND_UP(adap->sge.rdmaqs, 4) + - DIV_ROUND_UP(adap->sge.rdmaciqs, 4) + + DIV_ROUND_UP(adap->sge.ofldqsets, 4) + DIV_ROUND_UP(MAX_CTRL_QUEUES, 4) + 1; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 44cc9767936f..d1ebb84c073e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -226,11 +226,6 @@ static struct dentry *cxgb4_debugfs_root; LIST_HEAD(adapter_list); DEFINE_MUTEX(uld_mutex); -/* Adapter list to be accessed from atomic context */ -static LIST_HEAD(adap_rcu_list); -static DEFINE_SPINLOCK(adap_rcu_lock); -static struct cxgb4_uld_info ulds[CXGB4_ULD_MAX]; -static const char *const uld_str[] = { "RDMA", "iSCSI", "iSCSIT" }; static void link_report(struct net_device *dev) { @@ -678,56 +673,6 @@ out: return 0; } -/* Flush the aggregated lro sessions */ -static void uldrx_flush_handler(struct sge_rspq *q) -{ - if (ulds[q->uld].lro_flush) - ulds[q->uld].lro_flush(&q->lro_mgr); -} - -/** - * uldrx_handler - response queue handler for ULD queues - * @q: the response queue that received the packet - * @rsp: the response queue descriptor holding the offload message - * @gl: the gather list of packet fragments - * - * Deliver an ingress offload packet to a ULD. All processing is done by - * the ULD, we just maintain statistics. - */ -static int uldrx_handler(struct sge_rspq *q, const __be64 *rsp, - const struct pkt_gl *gl) -{ - struct sge_ofld_rxq *rxq = container_of(q, struct sge_ofld_rxq, rspq); - int ret; - - /* FW can send CPLs encapsulated in a CPL_FW4_MSG. - */ - if (((const struct rss_header *)rsp)->opcode == CPL_FW4_MSG && - ((const struct cpl_fw4_msg *)(rsp + 1))->type == FW_TYPE_RSSCPL) - rsp += 2; - - if (q->flush_handler) - ret = ulds[q->uld].lro_rx_handler(q->adap->uld_handle[q->uld], - rsp, gl, &q->lro_mgr, - &q->napi); - else - ret = ulds[q->uld].rx_handler(q->adap->uld_handle[q->uld], - rsp, gl); - - if (ret) { - rxq->stats.nomem++; - return -1; - } - - if (gl == NULL) - rxq->stats.imm++; - else if (gl == CXGB4_MSG_AN) - rxq->stats.an++; - else - rxq->stats.pkts++; - return 0; -} - static void disable_msi(struct adapter *adapter) { if (adapter->flags & USING_MSIX) { @@ -779,30 +724,12 @@ static void name_msix_vecs(struct adapter *adap) snprintf(adap->msix_info[msi_idx].desc, n, "%s-Rx%d", d->name, i); } - - /* offload queues */ - for_each_iscsirxq(&adap->sge, i) - snprintf(adap->msix_info[msi_idx++].desc, n, "%s-iscsi%d", - adap->port[0]->name, i); - - for_each_iscsitrxq(&adap->sge, i) - snprintf(adap->msix_info[msi_idx++].desc, n, "%s-iSCSIT%d", - adap->port[0]->name, i); - - for_each_rdmarxq(&adap->sge, i) - snprintf(adap->msix_info[msi_idx++].desc, n, "%s-rdma%d", - adap->port[0]->name, i); - - for_each_rdmaciq(&adap->sge, i) - snprintf(adap->msix_info[msi_idx++].desc, n, "%s-rdma-ciq%d", - adap->port[0]->name, i); } static int request_msix_queue_irqs(struct adapter *adap) { struct sge *s = &adap->sge; - int err, ethqidx, iscsiqidx = 0, rdmaqidx = 0, rdmaciqqidx = 0; - int iscsitqidx = 0; + int err, ethqidx; int msi_index = 2; err = request_irq(adap->msix_info[1].vec, t4_sge_intr_msix, 0, @@ -819,57 +746,9 @@ static int request_msix_queue_irqs(struct adapter *adap) goto unwind; msi_index++; } - for_each_iscsirxq(s, iscsiqidx) { - err = request_irq(adap->msix_info[msi_index].vec, - t4_sge_intr_msix, 0, - adap->msix_info[msi_index].desc, - &s->iscsirxq[iscsiqidx].rspq); - if (err) - goto unwind; - msi_index++; - } - for_each_iscsitrxq(s, iscsitqidx) { - err = request_irq(adap->msix_info[msi_index].vec, - t4_sge_intr_msix, 0, - adap->msix_info[msi_index].desc, - &s->iscsitrxq[iscsitqidx].rspq); - if (err) - goto unwind; - msi_index++; - } - for_each_rdmarxq(s, rdmaqidx) { - err = request_irq(adap->msix_info[msi_index].vec, - t4_sge_intr_msix, 0, - adap->msix_info[msi_index].desc, - &s->rdmarxq[rdmaqidx].rspq); - if (err) - goto unwind; - msi_index++; - } - for_each_rdmaciq(s, rdmaciqqidx) { - err = request_irq(adap->msix_info[msi_index].vec, - t4_sge_intr_msix, 0, - adap->msix_info[msi_index].desc, - &s->rdmaciq[rdmaciqqidx].rspq); - if (err) - goto unwind; - msi_index++; - } return 0; unwind: - while (--rdmaciqqidx >= 0) - free_irq(adap->msix_info[--msi_index].vec, - &s->rdmaciq[rdmaciqqidx].rspq); - while (--rdmaqidx >= 0) - free_irq(adap->msix_info[--msi_index].vec, - &s->rdmarxq[rdmaqidx].rspq); - while (--iscsitqidx >= 0) - free_irq(adap->msix_info[--msi_index].vec, - &s->iscsitrxq[iscsitqidx].rspq); - while (--iscsiqidx >= 0) - free_irq(adap->msix_info[--msi_index].vec, - &s->iscsirxq[iscsiqidx].rspq); while (--ethqidx >= 0) free_irq(adap->msix_info[--msi_index].vec, &s->ethrxq[ethqidx].rspq); @@ -885,16 +764,6 @@ static void free_msix_queue_irqs(struct adapter *adap) free_irq(adap->msix_info[1].vec, &s->fw_evtq); for_each_ethrxq(s, i) free_irq(adap->msix_info[msi_index++].vec, &s->ethrxq[i].rspq); - for_each_iscsirxq(s, i) - free_irq(adap->msix_info[msi_index++].vec, - &s->iscsirxq[i].rspq); - for_each_iscsitrxq(s, i) - free_irq(adap->msix_info[msi_index++].vec, - &s->iscsitrxq[i].rspq); - for_each_rdmarxq(s, i) - free_irq(adap->msix_info[msi_index++].vec, &s->rdmarxq[i].rspq); - for_each_rdmaciq(s, i) - free_irq(adap->msix_info[msi_index++].vec, &s->rdmaciq[i].rspq); } /** @@ -1033,42 +902,11 @@ static void enable_rx(struct adapter *adap) } } -static int alloc_ofld_rxqs(struct adapter *adap, struct sge_ofld_rxq *q, - unsigned int nq, unsigned int per_chan, int msi_idx, - u16 *ids, bool lro) -{ - int i, err; - for (i = 0; i < nq; i++, q++) { - if (msi_idx > 0) - msi_idx++; - err = t4_sge_alloc_rxq(adap, &q->rspq, false, - adap->port[i / per_chan], - msi_idx, q->fl.size ? &q->fl : NULL, - uldrx_handler, - lro ? uldrx_flush_handler : NULL, - 0); - if (err) - return err; - memset(&q->stats, 0, sizeof(q->stats)); - if (ids) - ids[i] = q->rspq.abs_id; - } - return 0; -} - -/** - * setup_sge_queues - configure SGE Tx/Rx/response queues - * @adap: the adapter - * - * Determines how many sets of SGE queues to use and initializes them. - * We support multiple queue sets per port if we have MSI-X, otherwise - * just one queue set per port. - */ -static int setup_sge_queues(struct adapter *adap) +static int setup_fw_sge_queues(struct adapter *adap) { - int err, i, j; struct sge *s = &adap->sge; + int err = 0; bitmap_zero(s->starving_fl, s->egr_sz); bitmap_zero(s->txq_maperr, s->egr_sz); @@ -1083,25 +921,27 @@ static int setup_sge_queues(struct adapter *adap) adap->msi_idx = -((int)s->intrq.abs_id + 1); } - /* NOTE: If you add/delete any Ingress/Egress Queue allocations in here, - * don't forget to update the following which need to be - * synchronized to and changes here. - * - * 1. The calculations of MAX_INGQ in cxgb4.h. - * - * 2. Update enable_msix/name_msix_vecs/request_msix_queue_irqs - * to accommodate any new/deleted Ingress Queues - * which need MSI-X Vectors. - * - * 3. Update sge_qinfo_show() to include information on the - * new/deleted queues. - */ err = t4_sge_alloc_rxq(adap, &s->fw_evtq, true, adap->port[0], adap->msi_idx, NULL, fwevtq_handler, NULL, -1); - if (err) { -freeout: t4_free_sge_resources(adap); - return err; - } + if (err) + t4_free_sge_resources(adap); + return err; +} + +/** + * setup_sge_queues - configure SGE Tx/Rx/response queues + * @adap: the adapter + * + * Determines how many sets of SGE queues to use and initializes them. + * We support multiple queue sets per port if we have MSI-X, otherwise + * just one queue set per port. + */ +static int setup_sge_queues(struct adapter *adap) +{ + int err, i, j; + struct sge *s = &adap->sge; + struct sge_uld_rxq_info *rxq_info = s->uld_rxq_info[CXGB4_ULD_RDMA]; + unsigned int cmplqid = 0; for_each_port(adap, i) { struct net_device *dev = adap->port[i]; @@ -1132,8 +972,8 @@ freeout: t4_free_sge_resources(adap); } } - j = s->iscsiqsets / adap->params.nports; /* iscsi queues per channel */ - for_each_iscsirxq(s, i) { + j = s->ofldqsets / adap->params.nports; /* iscsi queues per channel */ + for_each_ofldtxq(s, i) { err = t4_sge_alloc_ofld_txq(adap, &s->ofldtxq[i], adap->port[i / j], s->fw_evtq.cntxt_id); @@ -1141,30 +981,15 @@ freeout: t4_free_sge_resources(adap); goto freeout; } -#define ALLOC_OFLD_RXQS(firstq, nq, per_chan, ids, lro) do { \ - err = alloc_ofld_rxqs(adap, firstq, nq, per_chan, adap->msi_idx, ids, lro); \ - if (err) \ - goto freeout; \ - if (adap->msi_idx > 0) \ - adap->msi_idx += nq; \ -} while (0) - - ALLOC_OFLD_RXQS(s->iscsirxq, s->iscsiqsets, j, s->iscsi_rxq, false); - ALLOC_OFLD_RXQS(s->iscsitrxq, s->niscsitq, j, s->iscsit_rxq, true); - ALLOC_OFLD_RXQS(s->rdmarxq, s->rdmaqs, 1, s->rdma_rxq, false); - j = s->rdmaciqs / adap->params.nports; /* rdmaq queues per channel */ - ALLOC_OFLD_RXQS(s->rdmaciq, s->rdmaciqs, j, s->rdma_ciq, false); - -#undef ALLOC_OFLD_RXQS - for_each_port(adap, i) { - /* - * Note that ->rdmarxq[i].rspq.cntxt_id below is 0 if we don't + /* Note that cmplqid below is 0 if we don't * have RDMA queues, and that's the right value. */ + if (rxq_info) + cmplqid = rxq_info->uldrxq[i].rspq.cntxt_id; + err = t4_sge_alloc_ctrl_txq(adap, &s->ctrlq[i], adap->port[i], - s->fw_evtq.cntxt_id, - s->rdmarxq[i].rspq.cntxt_id); + s->fw_evtq.cntxt_id, cmplqid); if (err) goto freeout; } @@ -1175,6 +1000,9 @@ freeout: t4_free_sge_resources(adap); RSSCONTROL_V(netdev2pinfo(adap->port[0])->tx_chan) | QUEUENUMBER_V(s->ethrxq[0].rspq.abs_id)); return 0; +freeout: + t4_free_sge_resources(adap); + return err; } /* @@ -2317,7 +2145,7 @@ static void disable_dbs(struct adapter *adap) for_each_ethrxq(&adap->sge, i) disable_txq_db(&adap->sge.ethtxq[i].q); - for_each_iscsirxq(&adap->sge, i) + for_each_ofldtxq(&adap->sge, i) disable_txq_db(&adap->sge.ofldtxq[i].q); for_each_port(adap, i) disable_txq_db(&adap->sge.ctrlq[i].q); @@ -2329,7 +2157,7 @@ static void enable_dbs(struct adapter *adap) for_each_ethrxq(&adap->sge, i) enable_txq_db(adap, &adap->sge.ethtxq[i].q); - for_each_iscsirxq(&adap->sge, i) + for_each_ofldtxq(&adap->sge, i) enable_txq_db(adap, &adap->sge.ofldtxq[i].q); for_each_port(adap, i) enable_txq_db(adap, &adap->sge.ctrlq[i].q); @@ -2337,9 +2165,10 @@ static void enable_dbs(struct adapter *adap) static void notify_rdma_uld(struct adapter *adap, enum cxgb4_control cmd) { - if (adap->uld_handle[CXGB4_ULD_RDMA]) - ulds[CXGB4_ULD_RDMA].control(adap->uld_handle[CXGB4_ULD_RDMA], - cmd); + enum cxgb4_uld type = CXGB4_ULD_RDMA; + + if (adap->uld && adap->uld[type].handle) + adap->uld[type].control(adap->uld[type].handle, cmd); } static void process_db_full(struct work_struct *work) @@ -2393,13 +2222,14 @@ out: if (ret) CH_WARN(adap, "DB drop recovery failed.\n"); } + static void recover_all_queues(struct adapter *adap) { int i; for_each_ethrxq(&adap->sge, i) sync_txq_pidx(adap, &adap->sge.ethtxq[i].q); - for_each_iscsirxq(&adap->sge, i) + for_each_ofldtxq(&adap->sge, i) sync_txq_pidx(adap, &adap->sge.ofldtxq[i].q); for_each_port(adap, i) sync_txq_pidx(adap, &adap->sge.ctrlq[i].q); @@ -2464,94 +2294,12 @@ void t4_db_dropped(struct adapter *adap) queue_work(adap->workq, &adap->db_drop_task); } -static void uld_attach(struct adapter *adap, unsigned int uld) +void t4_register_netevent_notifier(void) { - void *handle; - struct cxgb4_lld_info lli; - unsigned short i; - - lli.pdev = adap->pdev; - lli.pf = adap->pf; - lli.l2t = adap->l2t; - lli.tids = &adap->tids; - lli.ports = adap->port; - lli.vr = &adap->vres; - lli.mtus = adap->params.mtus; - if (uld == CXGB4_ULD_RDMA) { - lli.rxq_ids = adap->sge.rdma_rxq; - lli.ciq_ids = adap->sge.rdma_ciq; - lli.nrxq = adap->sge.rdmaqs; - lli.nciq = adap->sge.rdmaciqs; - } else if (uld == CXGB4_ULD_ISCSI) { - lli.rxq_ids = adap->sge.iscsi_rxq; - lli.nrxq = adap->sge.iscsiqsets; - } else if (uld == CXGB4_ULD_ISCSIT) { - lli.rxq_ids = adap->sge.iscsit_rxq; - lli.nrxq = adap->sge.niscsitq; - } - lli.ntxq = adap->sge.iscsiqsets; - lli.nchan = adap->params.nports; - lli.nports = adap->params.nports; - lli.wr_cred = adap->params.ofldq_wr_cred; - lli.adapter_type = adap->params.chip; - lli.iscsi_iolen = MAXRXDATA_G(t4_read_reg(adap, TP_PARA_REG2_A)); - lli.iscsi_tagmask = t4_read_reg(adap, ULP_RX_ISCSI_TAGMASK_A); - lli.iscsi_pgsz_order = t4_read_reg(adap, ULP_RX_ISCSI_PSZ_A); - lli.iscsi_llimit = t4_read_reg(adap, ULP_RX_ISCSI_LLIMIT_A); - lli.iscsi_ppm = &adap->iscsi_ppm; - lli.cclk_ps = 1000000000 / adap->params.vpd.cclk; - lli.udb_density = 1 << adap->params.sge.eq_qpp; - lli.ucq_density = 1 << adap->params.sge.iq_qpp; - lli.filt_mode = adap->params.tp.vlan_pri_map; - /* MODQ_REQ_MAP sets queues 0-3 to chan 0-3 */ - for (i = 0; i < NCHAN; i++) - lli.tx_modq[i] = i; - lli.gts_reg = adap->regs + MYPF_REG(SGE_PF_GTS_A); - lli.db_reg = adap->regs + MYPF_REG(SGE_PF_KDOORBELL_A); - lli.fw_vers = adap->params.fw_vers; - lli.dbfifo_int_thresh = dbfifo_int_thresh; - lli.sge_ingpadboundary = adap->sge.fl_align; - lli.sge_egrstatuspagesize = adap->sge.stat_len; - lli.sge_pktshift = adap->sge.pktshift; - lli.enable_fw_ofld_conn = adap->flags & FW_OFLD_CONN; - lli.max_ordird_qp = adap->params.max_ordird_qp; - lli.max_ird_adapter = adap->params.max_ird_adapter; - lli.ulptx_memwrite_dsgl = adap->params.ulptx_memwrite_dsgl; - lli.nodeid = dev_to_node(adap->pdev_dev); - - handle = ulds[uld].add(&lli); - if (IS_ERR(handle)) { - dev_warn(adap->pdev_dev, - "could not attach to the %s driver, error %ld\n", - uld_str[uld], PTR_ERR(handle)); - return; - } - - adap->uld_handle[uld] = handle; - if (!netevent_registered) { register_netevent_notifier(&cxgb4_netevent_nb); netevent_registered = true; } - - if (adap->flags & FULL_INIT_DONE) - ulds[uld].state_change(handle, CXGB4_STATE_UP); -} - -static void attach_ulds(struct adapter *adap) -{ - unsigned int i; - - spin_lock(&adap_rcu_lock); - list_add_tail_rcu(&adap->rcu_node, &adap_rcu_list); - spin_unlock(&adap_rcu_lock); - - mutex_lock(&uld_mutex); - list_add_tail(&adap->list_node, &adapter_list); - for (i = 0; i < CXGB4_ULD_MAX; i++) - if (ulds[i].add) - uld_attach(adap, i); - mutex_unlock(&uld_mutex); } static void detach_ulds(struct adapter *adap) @@ -2561,12 +2309,6 @@ static void detach_ulds(struct adapter *adap) mutex_lock(&uld_mutex); list_del(&adap->list_node); for (i = 0; i < CXGB4_ULD_MAX; i++) - if (adap->uld_handle[i]) { - ulds[i].state_change(adap->uld_handle[i], - CXGB4_STATE_DETACH); - adap->uld_handle[i] = NULL; - } - for (i = 0; i < CXGB4_PCI_ULD_MAX; i++) if (adap->uld && adap->uld[i].handle) { adap->uld[i].state_change(adap->uld[i].handle, CXGB4_STATE_DETACH); @@ -2577,10 +2319,6 @@ static void detach_ulds(struct adapter *adap) netevent_registered = false; } mutex_unlock(&uld_mutex); - - spin_lock(&adap_rcu_lock); - list_del_rcu(&adap->rcu_node); - spin_unlock(&adap_rcu_lock); } static void notify_ulds(struct adapter *adap, enum cxgb4_state new_state) @@ -2589,65 +2327,12 @@ static void notify_ulds(struct adapter *adap, enum cxgb4_state new_state) mutex_lock(&uld_mutex); for (i = 0; i < CXGB4_ULD_MAX; i++) - if (adap->uld_handle[i]) - ulds[i].state_change(adap->uld_handle[i], new_state); - for (i = 0; i < CXGB4_PCI_ULD_MAX; i++) if (adap->uld && adap->uld[i].handle) adap->uld[i].state_change(adap->uld[i].handle, new_state); mutex_unlock(&uld_mutex); } -/** - * cxgb4_register_uld - register an upper-layer driver - * @type: the ULD type - * @p: the ULD methods - * - * Registers an upper-layer driver with this driver and notifies the ULD - * about any presently available devices that support its type. Returns - * %-EBUSY if a ULD of the same type is already registered. - */ -int cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p) -{ - int ret = 0; - struct adapter *adap; - - if (type >= CXGB4_ULD_MAX) - return -EINVAL; - mutex_lock(&uld_mutex); - if (ulds[type].add) { - ret = -EBUSY; - goto out; - } - ulds[type] = *p; - list_for_each_entry(adap, &adapter_list, list_node) - uld_attach(adap, type); -out: mutex_unlock(&uld_mutex); - return ret; -} -EXPORT_SYMBOL(cxgb4_register_uld); - -/** - * cxgb4_unregister_uld - unregister an upper-layer driver - * @type: the ULD type - * - * Unregisters an existing upper-layer driver. - */ -int cxgb4_unregister_uld(enum cxgb4_uld type) -{ - struct adapter *adap; - - if (type >= CXGB4_ULD_MAX) - return -EINVAL; - mutex_lock(&uld_mutex); - list_for_each_entry(adap, &adapter_list, list_node) - adap->uld_handle[type] = NULL; - ulds[type].add = NULL; - mutex_unlock(&uld_mutex); - return 0; -} -EXPORT_SYMBOL(cxgb4_unregister_uld); - #if IS_ENABLED(CONFIG_IPV6) static int cxgb4_inet6addr_handler(struct notifier_block *this, unsigned long event, void *data) @@ -2752,7 +2437,6 @@ static int cxgb_up(struct adapter *adap) adap->msix_info[0].desc, adap); if (err) goto irq_err; - err = request_msix_queue_irqs(adap); if (err) { free_irq(adap->msix_info[0].vec, adap); @@ -4262,6 +3946,7 @@ static int adap_init0(struct adapter *adap) adap->params.ofldq_wr_cred = val[5]; adap->params.offload = 1; + adap->num_ofld_uld += 1; } if (caps_cmd.rdmacaps) { params[0] = FW_PARAM_PFVF(STAG_START); @@ -4314,6 +3999,7 @@ static int adap_init0(struct adapter *adap) "max_ordird_qp %d max_ird_adapter %d\n", adap->params.max_ordird_qp, adap->params.max_ird_adapter); + adap->num_ofld_uld += 2; } if (caps_cmd.iscsicaps) { params[0] = FW_PARAM_PFVF(ISCSI_START); @@ -4324,6 +4010,8 @@ static int adap_init0(struct adapter *adap) goto bye; adap->vres.iscsi.start = val[0]; adap->vres.iscsi.size = val[1] - val[0] + 1; + /* LIO target and cxgb4i initiaitor */ + adap->num_ofld_uld += 2; } if (caps_cmd.cryptocaps) { /* Should query params here...TODO */ @@ -4523,14 +4211,14 @@ static void cfg_queues(struct adapter *adap) #ifndef CONFIG_CHELSIO_T4_DCB int q10g = 0; #endif - int ciq_size; /* Reduce memory usage in kdump environment, disable all offload. */ if (is_kdump_kernel()) { adap->params.offload = 0; adap->params.crypto = 0; - } else if (adap->num_uld && uld_mem_alloc(adap)) { + } else if (is_uld(adap) && t4_uld_mem_alloc(adap)) { + adap->params.offload = 0; adap->params.crypto = 0; } @@ -4576,33 +4264,18 @@ static void cfg_queues(struct adapter *adap) s->ethqsets = qidx; s->max_ethqsets = qidx; /* MSI-X may lower it later */ - if (is_offload(adap)) { + if (is_uld(adap)) { /* * For offload we use 1 queue/channel if all ports are up to 1G, * otherwise we divide all available queues amongst the channels * capped by the number of available cores. */ if (n10g) { - i = min_t(int, ARRAY_SIZE(s->iscsirxq), - num_online_cpus()); - s->iscsiqsets = roundup(i, adap->params.nports); - } else - s->iscsiqsets = adap->params.nports; - /* For RDMA one Rx queue per channel suffices */ - s->rdmaqs = adap->params.nports; - /* Try and allow at least 1 CIQ per cpu rounding down - * to the number of ports, with a minimum of 1 per port. - * A 2 port card in a 6 cpu system: 6 CIQs, 3 / port. - * A 4 port card in a 6 cpu system: 4 CIQs, 1 / port. - * A 4 port card in a 2 cpu system: 4 CIQs, 1 / port. - */ - s->rdmaciqs = min_t(int, MAX_RDMA_CIQS, num_online_cpus()); - s->rdmaciqs = (s->rdmaciqs / adap->params.nports) * - adap->params.nports; - s->rdmaciqs = max_t(int, s->rdmaciqs, adap->params.nports); - - if (!is_t4(adap->params.chip)) - s->niscsitq = s->iscsiqsets; + i = num_online_cpus(); + s->ofldqsets = roundup(i, adap->params.nports); + } else { + s->ofldqsets = adap->params.nports; + } } for (i = 0; i < ARRAY_SIZE(s->ethrxq); i++) { @@ -4621,47 +4294,8 @@ static void cfg_queues(struct adapter *adap) for (i = 0; i < ARRAY_SIZE(s->ofldtxq); i++) s->ofldtxq[i].q.size = 1024; - for (i = 0; i < ARRAY_SIZE(s->iscsirxq); i++) { - struct sge_ofld_rxq *r = &s->iscsirxq[i]; - - init_rspq(adap, &r->rspq, 5, 1, 1024, 64); - r->rspq.uld = CXGB4_ULD_ISCSI; - r->fl.size = 72; - } - - if (!is_t4(adap->params.chip)) { - for (i = 0; i < ARRAY_SIZE(s->iscsitrxq); i++) { - struct sge_ofld_rxq *r = &s->iscsitrxq[i]; - - init_rspq(adap, &r->rspq, 5, 1, 1024, 64); - r->rspq.uld = CXGB4_ULD_ISCSIT; - r->fl.size = 72; - } - } - - for (i = 0; i < ARRAY_SIZE(s->rdmarxq); i++) { - struct sge_ofld_rxq *r = &s->rdmarxq[i]; - - init_rspq(adap, &r->rspq, 5, 1, 511, 64); - r->rspq.uld = CXGB4_ULD_RDMA; - r->fl.size = 72; - } - - ciq_size = 64 + adap->vres.cq.size + adap->tids.nftids; - if (ciq_size > SGE_MAX_IQ_SIZE) { - CH_WARN(adap, "CIQ size too small for available IQs\n"); - ciq_size = SGE_MAX_IQ_SIZE; - } - - for (i = 0; i < ARRAY_SIZE(s->rdmaciq); i++) { - struct sge_ofld_rxq *r = &s->rdmaciq[i]; - - init_rspq(adap, &r->rspq, 5, 1, ciq_size, 64); - r->rspq.uld = CXGB4_ULD_RDMA; - } - init_rspq(adap, &s->fw_evtq, 0, 1, 1024, 64); - init_rspq(adap, &s->intrq, 0, 1, 2 * MAX_INGQ, 64); + init_rspq(adap, &s->intrq, 0, 1, 512, 64); } /* @@ -4695,7 +4329,15 @@ static void reduce_ethqs(struct adapter *adap, int n) static int get_msix_info(struct adapter *adap) { struct uld_msix_info *msix_info; - int max_ingq = (MAX_OFLD_QSETS * adap->num_uld); + unsigned int max_ingq = 0; + + if (is_offload(adap)) + max_ingq += MAX_OFLD_QSETS * adap->num_ofld_uld; + if (is_pci_uld(adap)) + max_ingq += MAX_OFLD_QSETS * adap->num_uld; + + if (!max_ingq) + goto out; msix_info = kcalloc(max_ingq, sizeof(*msix_info), GFP_KERNEL); if (!msix_info) @@ -4709,12 +4351,13 @@ static int get_msix_info(struct adapter *adap) } spin_lock_init(&adap->msix_bmap_ulds.lock); adap->msix_info_ulds = msix_info; +out: return 0; } static void free_msix_info(struct adapter *adap) { - if (!adap->num_uld) + if (!(adap->num_uld && adap->num_ofld_uld)) return; kfree(adap->msix_info_ulds); @@ -4733,32 +4376,32 @@ static int enable_msix(struct adapter *adap) struct msix_entry *entries; int max_ingq = MAX_INGQ; - max_ingq += (MAX_OFLD_QSETS * adap->num_uld); + if (is_pci_uld(adap)) + max_ingq += (MAX_OFLD_QSETS * adap->num_uld); + if (is_offload(adap)) + max_ingq += (MAX_OFLD_QSETS * adap->num_ofld_uld); entries = kmalloc(sizeof(*entries) * (max_ingq + 1), GFP_KERNEL); if (!entries) return -ENOMEM; /* map for msix */ - if (is_pci_uld(adap) && get_msix_info(adap)) + if (get_msix_info(adap)) { + adap->params.offload = 0; adap->params.crypto = 0; + } for (i = 0; i < max_ingq + 1; ++i) entries[i].entry = i; want = s->max_ethqsets + EXTRA_VECS; if (is_offload(adap)) { - want += s->rdmaqs + s->rdmaciqs + s->iscsiqsets + - s->niscsitq; - /* need nchan for each possible ULD */ - if (is_t4(adap->params.chip)) - ofld_need = 3 * nchan; - else - ofld_need = 4 * nchan; + want += adap->num_ofld_uld * s->ofldqsets; + ofld_need = adap->num_ofld_uld * nchan; } if (is_pci_uld(adap)) { - want += netif_get_num_default_rss_queues() * nchan; - uld_need = nchan; + want += adap->num_uld * s->ofldqsets; + uld_need = adap->num_uld * nchan; } #ifdef CONFIG_CHELSIO_T4_DCB /* For Data Center Bridging we need 8 Ethernet TX Priority Queues for @@ -4786,43 +4429,25 @@ static int enable_msix(struct adapter *adap) if (i < s->ethqsets) reduce_ethqs(adap, i); } - if (is_pci_uld(adap)) { + if (is_uld(adap)) { if (allocated < want) s->nqs_per_uld = nchan; else - s->nqs_per_uld = netif_get_num_default_rss_queues() * - nchan; + s->nqs_per_uld = s->ofldqsets; } - if (is_offload(adap)) { - if (allocated < want) { - s->rdmaqs = nchan; - s->rdmaciqs = nchan; - - if (!is_t4(adap->params.chip)) - s->niscsitq = nchan; - } - - /* leftovers go to OFLD */ - i = allocated - EXTRA_VECS - s->max_ethqsets - - s->rdmaqs - s->rdmaciqs - s->niscsitq; - if (is_pci_uld(adap)) - i -= s->nqs_per_uld * adap->num_uld; - s->iscsiqsets = (i / nchan) * nchan; /* round down */ - - } - - for (i = 0; i < (allocated - (s->nqs_per_uld * adap->num_uld)); ++i) + for (i = 0; i < (s->max_ethqsets + EXTRA_VECS); ++i) adap->msix_info[i].vec = entries[i].vector; - if (is_pci_uld(adap)) { - for (j = 0 ; i < allocated; ++i, j++) + if (is_uld(adap)) { + for (j = 0 ; i < allocated; ++i, j++) { adap->msix_info_ulds[j].vec = entries[i].vector; + adap->msix_info_ulds[j].idx = i; + } adap->msix_bmap_ulds.mapsize = j; } dev_info(adap->pdev_dev, "%d MSI-X vectors allocated, " - "nic %d iscsi %d rdma cpl %d rdma ciq %d uld %d\n", - allocated, s->max_ethqsets, s->iscsiqsets, s->rdmaqs, - s->rdmaciqs, s->nqs_per_uld); + "nic %d per uld %d\n", + allocated, s->max_ethqsets, s->nqs_per_uld); kfree(entries); return 0; @@ -5535,10 +5160,14 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* PCIe EEH recovery on powerpc platforms needs fundamental reset */ pdev->needs_freset = 1; - if (is_offload(adapter)) - attach_ulds(adapter); + if (is_uld(adapter)) { + mutex_lock(&uld_mutex); + list_add_tail(&adapter->list_node, &adapter_list); + mutex_unlock(&uld_mutex); + } print_adapter_info(adapter); + setup_fw_sge_queues(adapter); return 0; sriov: @@ -5593,8 +5222,8 @@ sriov: free_some_resources(adapter); if (adapter->flags & USING_MSIX) free_msix_info(adapter); - if (adapter->num_uld) - uld_mem_free(adapter); + if (adapter->num_uld || adapter->num_ofld_uld) + t4_uld_mem_free(adapter); out_unmap_bar: if (!is_t4(adapter->params.chip)) iounmap(adapter->bar2); @@ -5631,7 +5260,7 @@ static void remove_one(struct pci_dev *pdev) */ destroy_workqueue(adapter->workq); - if (is_offload(adapter)) + if (is_uld(adapter)) detach_ulds(adapter); disable_interrupts(adapter); @@ -5658,8 +5287,8 @@ static void remove_one(struct pci_dev *pdev) if (adapter->flags & USING_MSIX) free_msix_info(adapter); - if (adapter->num_uld) - uld_mem_free(adapter); + if (adapter->num_uld || adapter->num_ofld_uld) + t4_uld_mem_free(adapter); free_some_resources(adapter); #if IS_ENABLED(CONFIG_IPV6) t4_cleanup_clip_tbl(adapter); @@ -5690,12 +5319,58 @@ static void remove_one(struct pci_dev *pdev) #endif } +/* "Shutdown" quiesces the device, stopping Ingress Packet and Interrupt + * delivery. This is essentially a stripped down version of the PCI remove() + * function where we do the minimal amount of work necessary to shutdown any + * further activity. + */ +static void shutdown_one(struct pci_dev *pdev) +{ + struct adapter *adapter = pci_get_drvdata(pdev); + + /* As with remove_one() above (see extended comment), we only want do + * do cleanup on PCI Devices which went all the way through init_one() + * ... + */ + if (!adapter) { + pci_release_regions(pdev); + return; + } + + if (adapter->pf == 4) { + int i; + + for_each_port(adapter, i) + if (adapter->port[i]->reg_state == NETREG_REGISTERED) + cxgb_close(adapter->port[i]); + + t4_uld_clean_up(adapter); + disable_interrupts(adapter); + disable_msi(adapter); + + t4_sge_stop(adapter); + if (adapter->flags & FW_OK) + t4_fw_bye(adapter, adapter->mbox); + } +#ifdef CONFIG_PCI_IOV + else { + if (adapter->port[0]) + unregister_netdev(adapter->port[0]); + iounmap(adapter->regs); + kfree(adapter->vfinfo); + kfree(adapter); + pci_disable_sriov(pdev); + pci_release_regions(pdev); + } +#endif +} + static struct pci_driver cxgb4_driver = { .name = KBUILD_MODNAME, .id_table = cxgb4_pci_tbl, .probe = init_one, .remove = remove_one, - .shutdown = remove_one, + .shutdown = shutdown_one, #ifdef CONFIG_PCI_IOV .sriov_configure = cxgb4_iov_configure, #endif diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c index 5d402bace6c1..fc04e3b878e8 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c @@ -82,6 +82,24 @@ static void free_msix_idx_in_bmap(struct adapter *adap, unsigned int msix_idx) spin_unlock_irqrestore(&bmap->lock, flags); } +/* Flush the aggregated lro sessions */ +static void uldrx_flush_handler(struct sge_rspq *q) +{ + struct adapter *adap = q->adap; + + if (adap->uld[q->uld].lro_flush) + adap->uld[q->uld].lro_flush(&q->lro_mgr); +} + +/** + * uldrx_handler - response queue handler for ULD queues + * @q: the response queue that received the packet + * @rsp: the response queue descriptor holding the offload message + * @gl: the gather list of packet fragments + * + * Deliver an ingress offload packet to a ULD. All processing is done by + * the ULD, we just maintain statistics. + */ static int uldrx_handler(struct sge_rspq *q, const __be64 *rsp, const struct pkt_gl *gl) { @@ -124,8 +142,8 @@ static int alloc_uld_rxqs(struct adapter *adap, struct sge_ofld_rxq *q = rxq_info->uldrxq + offset; unsigned short *ids = rxq_info->rspq_id + offset; unsigned int per_chan = nq / adap->params.nports; - unsigned int msi_idx, bmap_idx; - int i, err; + unsigned int bmap_idx = 0; + int i, err, msi_idx; if (adap->flags & USING_MSIX) msi_idx = 1; @@ -135,14 +153,14 @@ static int alloc_uld_rxqs(struct adapter *adap, for (i = 0; i < nq; i++, q++) { if (msi_idx >= 0) { bmap_idx = get_msix_idx_from_bmap(adap); - adap->msi_idx++; + msi_idx = adap->msix_info_ulds[bmap_idx].idx; } err = t4_sge_alloc_rxq(adap, &q->rspq, false, adap->port[i / per_chan], - adap->msi_idx, + msi_idx, q->fl.size ? &q->fl : NULL, uldrx_handler, - NULL, + lro ? uldrx_flush_handler : NULL, 0); if (err) goto freeout; @@ -159,7 +177,6 @@ freeout: if (q->rspq.desc) free_rspq_fl(adap, &q->rspq, q->fl.size ? &q->fl : NULL); - adap->msi_idx--; } /* We need to free rxq also in case of ciq allocation failure */ @@ -169,7 +186,6 @@ freeout: if (q->rspq.desc) free_rspq_fl(adap, &q->rspq, q->fl.size ? &q->fl : NULL); - adap->msi_idx--; } } return err; @@ -178,17 +194,38 @@ freeout: int setup_sge_queues_uld(struct adapter *adap, unsigned int uld_type, bool lro) { struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; + int i, ret = 0; if (adap->flags & USING_MSIX) { - rxq_info->msix_tbl = kzalloc(rxq_info->nrxq + rxq_info->nciq, + rxq_info->msix_tbl = kcalloc((rxq_info->nrxq + rxq_info->nciq), + sizeof(unsigned short), GFP_KERNEL); if (!rxq_info->msix_tbl) return -ENOMEM; } - return !(!alloc_uld_rxqs(adap, rxq_info, rxq_info->nrxq, 0, lro) && + ret = !(!alloc_uld_rxqs(adap, rxq_info, rxq_info->nrxq, 0, lro) && !alloc_uld_rxqs(adap, rxq_info, rxq_info->nciq, rxq_info->nrxq, lro)); + + /* Tell uP to route control queue completions to rdma rspq */ + if (adap->flags & FULL_INIT_DONE && + !ret && uld_type == CXGB4_ULD_RDMA) { + struct sge *s = &adap->sge; + unsigned int cmplqid; + u32 param, cmdop; + + cmdop = FW_PARAMS_PARAM_DMAQ_EQ_CMPLIQID_CTRL; + for_each_port(adap, i) { + cmplqid = rxq_info->uldrxq[i].rspq.cntxt_id; + param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) | + FW_PARAMS_PARAM_X_V(cmdop) | + FW_PARAMS_PARAM_YZ_V(s->ctrlq[i].q.cntxt_id)); + ret = t4_set_params(adap, adap->mbox, adap->pf, + 0, 1, ¶m, &cmplqid); + } + } + return ret; } static void t4_free_uld_rxqs(struct adapter *adap, int n, @@ -198,7 +235,6 @@ static void t4_free_uld_rxqs(struct adapter *adap, int n, if (q->rspq.desc) free_rspq_fl(adap, &q->rspq, q->fl.size ? &q->fl : NULL); - adap->msi_idx--; } } @@ -206,6 +242,21 @@ void free_sge_queues_uld(struct adapter *adap, unsigned int uld_type) { struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; + if (adap->flags & FULL_INIT_DONE && uld_type == CXGB4_ULD_RDMA) { + struct sge *s = &adap->sge; + u32 param, cmdop, cmplqid = 0; + int i; + + cmdop = FW_PARAMS_PARAM_DMAQ_EQ_CMPLIQID_CTRL; + for_each_port(adap, i) { + param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) | + FW_PARAMS_PARAM_X_V(cmdop) | + FW_PARAMS_PARAM_YZ_V(s->ctrlq[i].q.cntxt_id)); + t4_set_params(adap, adap->mbox, adap->pf, + 0, 1, ¶m, &cmplqid); + } + } + if (rxq_info->nciq) t4_free_uld_rxqs(adap, rxq_info->nciq, rxq_info->uldrxq + rxq_info->nrxq); @@ -215,26 +266,38 @@ void free_sge_queues_uld(struct adapter *adap, unsigned int uld_type) } int cfg_queues_uld(struct adapter *adap, unsigned int uld_type, - const struct cxgb4_pci_uld_info *uld_info) + const struct cxgb4_uld_info *uld_info) { struct sge *s = &adap->sge; struct sge_uld_rxq_info *rxq_info; - int i, nrxq; + int i, nrxq, ciq_size; rxq_info = kzalloc(sizeof(*rxq_info), GFP_KERNEL); if (!rxq_info) return -ENOMEM; - if (uld_info->nrxq > s->nqs_per_uld) - rxq_info->nrxq = s->nqs_per_uld; - else - rxq_info->nrxq = uld_info->nrxq; - if (!uld_info->nciq) + if (adap->flags & USING_MSIX && uld_info->nrxq > s->nqs_per_uld) { + i = s->nqs_per_uld; + rxq_info->nrxq = roundup(i, adap->params.nports); + } else { + i = min_t(int, uld_info->nrxq, + num_online_cpus()); + rxq_info->nrxq = roundup(i, adap->params.nports); + } + if (!uld_info->ciq) { rxq_info->nciq = 0; - else if (uld_info->nciq && uld_info->nciq > s->nqs_per_uld) - rxq_info->nciq = s->nqs_per_uld; - else - rxq_info->nciq = uld_info->nciq; + } else { + if (adap->flags & USING_MSIX) + rxq_info->nciq = min_t(int, s->nqs_per_uld, + num_online_cpus()); + else + rxq_info->nciq = min_t(int, MAX_OFLD_QSETS, + num_online_cpus()); + rxq_info->nciq = ((rxq_info->nciq / adap->params.nports) * + adap->params.nports); + rxq_info->nciq = max_t(int, rxq_info->nciq, + adap->params.nports); + } nrxq = rxq_info->nrxq + rxq_info->nciq; /* total rxq's */ rxq_info->uldrxq = kcalloc(nrxq, sizeof(struct sge_ofld_rxq), @@ -259,12 +322,17 @@ int cfg_queues_uld(struct adapter *adap, unsigned int uld_type, r->fl.size = 72; } + ciq_size = 64 + adap->vres.cq.size + adap->tids.nftids; + if (ciq_size > SGE_MAX_IQ_SIZE) { + dev_warn(adap->pdev_dev, "CIQ size too small for available IQs\n"); + ciq_size = SGE_MAX_IQ_SIZE; + } + for (i = rxq_info->nrxq; i < nrxq; i++) { struct sge_ofld_rxq *r = &rxq_info->uldrxq[i]; - init_rspq(adap, &r->rspq, 5, 1, uld_info->ciq_size, 64); + init_rspq(adap, &r->rspq, 5, 1, ciq_size, 64); r->rspq.uld = uld_type; - r->fl.size = 72; } memcpy(rxq_info->name, uld_info->name, IFNAMSIZ); @@ -285,7 +353,8 @@ void free_queues_uld(struct adapter *adap, unsigned int uld_type) int request_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type) { struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; - int idx, bmap_idx, err = 0; + int err = 0; + unsigned int idx, bmap_idx; for_each_uldrxq(rxq_info, idx) { bmap_idx = rxq_info->msix_tbl[idx]; @@ -310,10 +379,10 @@ unwind: void free_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type) { struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; - int idx; + unsigned int idx, bmap_idx; for_each_uldrxq(rxq_info, idx) { - unsigned int bmap_idx = rxq_info->msix_tbl[idx]; + bmap_idx = rxq_info->msix_tbl[idx]; free_msix_idx_in_bmap(adap, bmap_idx); free_irq(adap->msix_info_ulds[bmap_idx].vec, @@ -325,10 +394,10 @@ void name_msix_vecs_uld(struct adapter *adap, unsigned int uld_type) { struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; int n = sizeof(adap->msix_info_ulds[0].desc); - int idx; + unsigned int idx, bmap_idx; for_each_uldrxq(rxq_info, idx) { - unsigned int bmap_idx = rxq_info->msix_tbl[idx]; + bmap_idx = rxq_info->msix_tbl[idx]; snprintf(adap->msix_info_ulds[bmap_idx].desc, n, "%s-%s%d", adap->port[0]->name, rxq_info->name, idx); @@ -390,15 +459,15 @@ static void uld_queue_init(struct adapter *adap, unsigned int uld_type, lli->nciq = rxq_info->nciq; } -int uld_mem_alloc(struct adapter *adap) +int t4_uld_mem_alloc(struct adapter *adap) { struct sge *s = &adap->sge; - adap->uld = kcalloc(adap->num_uld, sizeof(*adap->uld), GFP_KERNEL); + adap->uld = kcalloc(CXGB4_ULD_MAX, sizeof(*adap->uld), GFP_KERNEL); if (!adap->uld) return -ENOMEM; - s->uld_rxq_info = kzalloc(adap->num_uld * + s->uld_rxq_info = kzalloc(CXGB4_ULD_MAX * sizeof(struct sge_uld_rxq_info *), GFP_KERNEL); if (!s->uld_rxq_info) @@ -410,7 +479,7 @@ err_uld: return -ENOMEM; } -void uld_mem_free(struct adapter *adap) +void t4_uld_mem_free(struct adapter *adap) { struct sge *s = &adap->sge; @@ -418,6 +487,26 @@ void uld_mem_free(struct adapter *adap) kfree(adap->uld); } +void t4_uld_clean_up(struct adapter *adap) +{ + struct sge_uld_rxq_info *rxq_info; + unsigned int i; + + if (!adap->uld) + return; + for (i = 0; i < CXGB4_ULD_MAX; i++) { + if (!adap->uld[i].handle) + continue; + rxq_info = adap->sge.uld_rxq_info[i]; + if (adap->flags & FULL_INIT_DONE) + quiesce_rx_uld(adap, i); + if (adap->flags & USING_MSIX) + free_msix_queue_irqs_uld(adap, i); + free_sge_queues_uld(adap, i); + free_queues_uld(adap, i); + } +} + static void uld_init(struct adapter *adap, struct cxgb4_lld_info *lld) { int i; @@ -429,10 +518,15 @@ static void uld_init(struct adapter *adap, struct cxgb4_lld_info *lld) lld->ports = adap->port; lld->vr = &adap->vres; lld->mtus = adap->params.mtus; - lld->ntxq = adap->sge.iscsiqsets; + lld->ntxq = adap->sge.ofldqsets; lld->nchan = adap->params.nports; lld->nports = adap->params.nports; lld->wr_cred = adap->params.ofldq_wr_cred; + lld->iscsi_iolen = MAXRXDATA_G(t4_read_reg(adap, TP_PARA_REG2_A)); + lld->iscsi_tagmask = t4_read_reg(adap, ULP_RX_ISCSI_TAGMASK_A); + lld->iscsi_pgsz_order = t4_read_reg(adap, ULP_RX_ISCSI_PSZ_A); + lld->iscsi_llimit = t4_read_reg(adap, ULP_RX_ISCSI_LLIMIT_A); + lld->iscsi_ppm = &adap->iscsi_ppm; lld->adapter_type = adap->params.chip; lld->cclk_ps = 1000000000 / adap->params.vpd.cclk; lld->udb_density = 1 << adap->params.sge.eq_qpp; @@ -472,23 +566,37 @@ static void uld_attach(struct adapter *adap, unsigned int uld) } adap->uld[uld].handle = handle; + t4_register_netevent_notifier(); if (adap->flags & FULL_INIT_DONE) adap->uld[uld].state_change(handle, CXGB4_STATE_UP); } -int cxgb4_register_pci_uld(enum cxgb4_pci_uld type, - struct cxgb4_pci_uld_info *p) +/** + * cxgb4_register_uld - register an upper-layer driver + * @type: the ULD type + * @p: the ULD methods + * + * Registers an upper-layer driver with this driver and notifies the ULD + * about any presently available devices that support its type. Returns + * %-EBUSY if a ULD of the same type is already registered. + */ +int cxgb4_register_uld(enum cxgb4_uld type, + const struct cxgb4_uld_info *p) { int ret = 0; + unsigned int adap_idx = 0; struct adapter *adap; - if (type >= CXGB4_PCI_ULD_MAX) + if (type >= CXGB4_ULD_MAX) return -EINVAL; mutex_lock(&uld_mutex); list_for_each_entry(adap, &adapter_list, list_node) { - if (!is_pci_uld(adap)) + if ((type == CXGB4_ULD_CRYPTO && !is_pci_uld(adap)) || + (type != CXGB4_ULD_CRYPTO && !is_offload(adap))) + continue; + if (type == CXGB4_ULD_ISCSIT && is_t4(adap->params.chip)) continue; ret = cfg_queues_uld(adap, type, p); if (ret) @@ -510,11 +618,14 @@ int cxgb4_register_pci_uld(enum cxgb4_pci_uld type, } adap->uld[type] = *p; uld_attach(adap, type); + adap_idx++; } mutex_unlock(&uld_mutex); return 0; free_irq: + if (adap->flags & FULL_INIT_DONE) + quiesce_rx_uld(adap, type); if (adap->flags & USING_MSIX) free_msix_queue_irqs_uld(adap, type); free_rxq: @@ -522,21 +633,49 @@ free_rxq: free_queues: free_queues_uld(adap, type); out: + + list_for_each_entry(adap, &adapter_list, list_node) { + if ((type == CXGB4_ULD_CRYPTO && !is_pci_uld(adap)) || + (type != CXGB4_ULD_CRYPTO && !is_offload(adap))) + continue; + if (type == CXGB4_ULD_ISCSIT && is_t4(adap->params.chip)) + continue; + if (!adap_idx) + break; + adap->uld[type].handle = NULL; + adap->uld[type].add = NULL; + if (adap->flags & FULL_INIT_DONE) + quiesce_rx_uld(adap, type); + if (adap->flags & USING_MSIX) + free_msix_queue_irqs_uld(adap, type); + free_sge_queues_uld(adap, type); + free_queues_uld(adap, type); + adap_idx--; + } mutex_unlock(&uld_mutex); return ret; } -EXPORT_SYMBOL(cxgb4_register_pci_uld); +EXPORT_SYMBOL(cxgb4_register_uld); -int cxgb4_unregister_pci_uld(enum cxgb4_pci_uld type) +/** + * cxgb4_unregister_uld - unregister an upper-layer driver + * @type: the ULD type + * + * Unregisters an existing upper-layer driver. + */ +int cxgb4_unregister_uld(enum cxgb4_uld type) { struct adapter *adap; - if (type >= CXGB4_PCI_ULD_MAX) + if (type >= CXGB4_ULD_MAX) return -EINVAL; mutex_lock(&uld_mutex); list_for_each_entry(adap, &adapter_list, list_node) { - if (!is_pci_uld(adap)) + if ((type == CXGB4_ULD_CRYPTO && !is_pci_uld(adap)) || + (type != CXGB4_ULD_CRYPTO && !is_offload(adap))) + continue; + if (type == CXGB4_ULD_ISCSIT && is_t4(adap->params.chip)) continue; adap->uld[type].handle = NULL; adap->uld[type].add = NULL; @@ -551,4 +690,4 @@ int cxgb4_unregister_pci_uld(enum cxgb4_pci_uld type) return 0; } -EXPORT_SYMBOL(cxgb4_unregister_pci_uld); +EXPORT_SYMBOL(cxgb4_unregister_uld); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index ab4037222f8d..b3544f6b88ca 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -42,6 +42,8 @@ #include #include "cxgb4.h" +#define MAX_ULD_QSETS 16 + /* CPL message priority levels */ enum { CPL_PRIORITY_DATA = 0, /* data messages */ @@ -189,9 +191,11 @@ static inline void set_wr_txq(struct sk_buff *skb, int prio, int queue) } enum cxgb4_uld { + CXGB4_ULD_INIT, CXGB4_ULD_RDMA, CXGB4_ULD_ISCSI, CXGB4_ULD_ISCSIT, + CXGB4_ULD_CRYPTO, CXGB4_ULD_MAX }; @@ -284,31 +288,11 @@ struct cxgb4_lld_info { struct cxgb4_uld_info { const char *name; - void *(*add)(const struct cxgb4_lld_info *p); - int (*rx_handler)(void *handle, const __be64 *rsp, - const struct pkt_gl *gl); - int (*state_change)(void *handle, enum cxgb4_state new_state); - int (*control)(void *handle, enum cxgb4_control control, ...); - int (*lro_rx_handler)(void *handle, const __be64 *rsp, - const struct pkt_gl *gl, - struct t4_lro_mgr *lro_mgr, - struct napi_struct *napi); - void (*lro_flush)(struct t4_lro_mgr *); -}; - -enum cxgb4_pci_uld { - CXGB4_PCI_ULD1, - CXGB4_PCI_ULD_MAX -}; - -struct cxgb4_pci_uld_info { - const char *name; - bool lro; void *handle; unsigned int nrxq; - unsigned int nciq; unsigned int rxq_size; - unsigned int ciq_size; + bool ciq; + bool lro; void *(*add)(const struct cxgb4_lld_info *p); int (*rx_handler)(void *handle, const __be64 *rsp, const struct pkt_gl *gl); @@ -323,9 +307,6 @@ struct cxgb4_pci_uld_info { int cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p); int cxgb4_unregister_uld(enum cxgb4_uld type); -int cxgb4_register_pci_uld(enum cxgb4_pci_uld type, - struct cxgb4_pci_uld_info *p); -int cxgb4_unregister_pci_uld(enum cxgb4_pci_uld type); int cxgb4_ofld_send(struct net_device *dev, struct sk_buff *skb); unsigned int cxgb4_dbfifo_count(const struct net_device *dev, int lpfifo); unsigned int cxgb4_port_chan(const struct net_device *dev); diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 9a607dbc6ca8..1e74fd6085df 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -2860,6 +2860,18 @@ int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq, return 0; } +int t4_sge_mod_ctrl_txq(struct adapter *adap, unsigned int eqid, + unsigned int cmplqid) +{ + u32 param, val; + + param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) | + FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DMAQ_EQ_CMPLIQID_CTRL) | + FW_PARAMS_PARAM_YZ_V(eqid)); + val = cmplqid; + return t4_set_params(adap, adap->mbox, adap->pf, 0, 1, ¶m, &val); +} + int t4_sge_alloc_ofld_txq(struct adapter *adap, struct sge_ofld_txq *txq, struct net_device *dev, unsigned int iqid) { @@ -3014,12 +3026,6 @@ void t4_free_sge_resources(struct adapter *adap) } } - /* clean up RDMA and iSCSI Rx queues */ - t4_free_ofld_rxqs(adap, adap->sge.iscsiqsets, adap->sge.iscsirxq); - t4_free_ofld_rxqs(adap, adap->sge.niscsitq, adap->sge.iscsitrxq); - t4_free_ofld_rxqs(adap, adap->sge.rdmaqs, adap->sge.rdmarxq); - t4_free_ofld_rxqs(adap, adap->sge.rdmaciqs, adap->sge.rdmaciq); - /* clean up offload Tx queues */ for (i = 0; i < ARRAY_SIZE(adap->sge.ofldtxq); i++) { struct sge_ofld_txq *q = &adap->sge.ofldtxq[i]; diff --git a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c index e4ba2d2616cd..7c0d7af0d3b7 100644 --- a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c +++ b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c @@ -84,6 +84,9 @@ static inline int send_tx_flowc_wr(struct cxgbi_sock *); static const struct cxgb4_uld_info cxgb4i_uld_info = { .name = DRV_MODULE_NAME, + .nrxq = MAX_ULD_QSETS, + .rxq_size = 1024, + .lro = false, .add = t4_uld_add, .rx_handler = t4_uld_rx_handler, .state_change = t4_uld_state_change, diff --git a/drivers/target/iscsi/cxgbit/cxgbit_main.c b/drivers/target/iscsi/cxgbit/cxgbit_main.c index 27dd11aff934..ad26b9372f10 100644 --- a/drivers/target/iscsi/cxgbit/cxgbit_main.c +++ b/drivers/target/iscsi/cxgbit/cxgbit_main.c @@ -652,6 +652,9 @@ static struct iscsit_transport cxgbit_transport = { static struct cxgb4_uld_info cxgbit_uld_info = { .name = DRV_NAME, + .nrxq = MAX_ULD_QSETS, + .rxq_size = 1024, + .lro = true, .add = cxgbit_uld_add, .state_change = cxgbit_uld_state_change, .lro_rx_handler = cxgbit_uld_lro_rx_handler, From ee40681037c0e5fa0058447d7603a4fb77308bce Mon Sep 17 00:00:00 2001 From: Nelson Chang Date: Sat, 17 Sep 2016 23:50:55 +0800 Subject: [PATCH 1152/1715] net: ethernet: mediatek: add HW LRO functions of PDMA RX rings The codes add the large receive offload (LRO) functions by hardware as below: 1) PDMA has total four RX rings that one is the normal ring, and others can be configured as LRO rings. 2) Only TCP/IP RX flows can be offloaded. The hardware can set four IP addresses at most, if the destination IP of the RX flow matches one of them, it has the chance to be offloaded. 3) There three RX flows can be offloaded at most, and one flow is mapped to one RX ring. 4) If there are more than three candidate RX flows, the hardware can choose three of them by throughput comparison results. Signed-off-by: Nelson Chang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 215 ++++++++++++++++++-- drivers/net/ethernet/mediatek/mtk_eth_soc.h | 75 ++++++- 2 files changed, 265 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 522fe8dda6c3..18600cbbb907 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -820,11 +820,51 @@ drop: return NETDEV_TX_OK; } +static struct mtk_rx_ring *mtk_get_rx_ring(struct mtk_eth *eth) +{ + int i; + struct mtk_rx_ring *ring; + int idx; + + if (!eth->hwlro) + return ð->rx_ring[0]; + + for (i = 0; i < MTK_MAX_RX_RING_NUM; i++) { + ring = ð->rx_ring[i]; + idx = NEXT_RX_DESP_IDX(ring->calc_idx, ring->dma_size); + if (ring->dma[idx].rxd2 & RX_DMA_DONE) { + ring->calc_idx_update = true; + return ring; + } + } + + return NULL; +} + +static void mtk_update_rx_cpu_idx(struct mtk_eth *eth) +{ + struct mtk_rx_ring *ring; + int i; + + if (!eth->hwlro) { + ring = ð->rx_ring[0]; + mtk_w32(eth, ring->calc_idx, ring->crx_idx_reg); + } else { + for (i = 0; i < MTK_MAX_RX_RING_NUM; i++) { + ring = ð->rx_ring[i]; + if (ring->calc_idx_update) { + ring->calc_idx_update = false; + mtk_w32(eth, ring->calc_idx, ring->crx_idx_reg); + } + } + } +} + static int mtk_poll_rx(struct napi_struct *napi, int budget, struct mtk_eth *eth) { - struct mtk_rx_ring *ring = ð->rx_ring; - int idx = ring->calc_idx; + struct mtk_rx_ring *ring; + int idx; struct sk_buff *skb; u8 *data, *new_data; struct mtk_rx_dma *rxd, trxd; @@ -836,7 +876,11 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, dma_addr_t dma_addr; int mac = 0; - idx = NEXT_RX_DESP_IDX(idx); + ring = mtk_get_rx_ring(eth); + if (unlikely(!ring)) + goto rx_done; + + idx = NEXT_RX_DESP_IDX(ring->calc_idx, ring->dma_size); rxd = &ring->dma[idx]; data = ring->data[idx]; @@ -907,12 +951,13 @@ release_desc: done++; } +rx_done: if (done) { /* make sure that all changes to the dma ring are flushed before * we continue */ wmb(); - mtk_w32(eth, ring->calc_idx, MTK_PRX_CRX_IDX0); + mtk_update_rx_cpu_idx(eth); } return done; @@ -1135,32 +1180,41 @@ static void mtk_tx_clean(struct mtk_eth *eth) } } -static int mtk_rx_alloc(struct mtk_eth *eth) +static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag) { - struct mtk_rx_ring *ring = ð->rx_ring; + struct mtk_rx_ring *ring = ð->rx_ring[ring_no]; + int rx_data_len, rx_dma_size; int i; - ring->frag_size = mtk_max_frag_size(ETH_DATA_LEN); + if (rx_flag == MTK_RX_FLAGS_HWLRO) { + rx_data_len = MTK_MAX_LRO_RX_LENGTH; + rx_dma_size = MTK_HW_LRO_DMA_SIZE; + } else { + rx_data_len = ETH_DATA_LEN; + rx_dma_size = MTK_DMA_SIZE; + } + + ring->frag_size = mtk_max_frag_size(rx_data_len); ring->buf_size = mtk_max_buf_size(ring->frag_size); - ring->data = kcalloc(MTK_DMA_SIZE, sizeof(*ring->data), + ring->data = kcalloc(rx_dma_size, sizeof(*ring->data), GFP_KERNEL); if (!ring->data) return -ENOMEM; - for (i = 0; i < MTK_DMA_SIZE; i++) { + for (i = 0; i < rx_dma_size; i++) { ring->data[i] = netdev_alloc_frag(ring->frag_size); if (!ring->data[i]) return -ENOMEM; } ring->dma = dma_alloc_coherent(eth->dev, - MTK_DMA_SIZE * sizeof(*ring->dma), + rx_dma_size * sizeof(*ring->dma), &ring->phys, GFP_ATOMIC | __GFP_ZERO); if (!ring->dma) return -ENOMEM; - for (i = 0; i < MTK_DMA_SIZE; i++) { + for (i = 0; i < rx_dma_size; i++) { dma_addr_t dma_addr = dma_map_single(eth->dev, ring->data[i] + NET_SKB_PAD, ring->buf_size, @@ -1171,27 +1225,30 @@ static int mtk_rx_alloc(struct mtk_eth *eth) ring->dma[i].rxd2 = RX_DMA_PLEN0(ring->buf_size); } - ring->calc_idx = MTK_DMA_SIZE - 1; + ring->dma_size = rx_dma_size; + ring->calc_idx_update = false; + ring->calc_idx = rx_dma_size - 1; + ring->crx_idx_reg = MTK_PRX_CRX_IDX_CFG(ring_no); /* make sure that all changes to the dma ring are flushed before we * continue */ wmb(); - mtk_w32(eth, eth->rx_ring.phys, MTK_PRX_BASE_PTR0); - mtk_w32(eth, MTK_DMA_SIZE, MTK_PRX_MAX_CNT0); - mtk_w32(eth, eth->rx_ring.calc_idx, MTK_PRX_CRX_IDX0); - mtk_w32(eth, MTK_PST_DRX_IDX0, MTK_PDMA_RST_IDX); + mtk_w32(eth, ring->phys, MTK_PRX_BASE_PTR_CFG(ring_no)); + mtk_w32(eth, rx_dma_size, MTK_PRX_MAX_CNT_CFG(ring_no)); + mtk_w32(eth, ring->calc_idx, ring->crx_idx_reg); + mtk_w32(eth, MTK_PST_DRX_IDX_CFG(ring_no), MTK_PDMA_RST_IDX); return 0; } -static void mtk_rx_clean(struct mtk_eth *eth) +static void mtk_rx_clean(struct mtk_eth *eth, int ring_no) { - struct mtk_rx_ring *ring = ð->rx_ring; + struct mtk_rx_ring *ring = ð->rx_ring[ring_no]; int i; if (ring->data && ring->dma) { - for (i = 0; i < MTK_DMA_SIZE; i++) { + for (i = 0; i < ring->dma_size; i++) { if (!ring->data[i]) continue; if (!ring->dma[i].rxd1) @@ -1208,13 +1265,98 @@ static void mtk_rx_clean(struct mtk_eth *eth) if (ring->dma) { dma_free_coherent(eth->dev, - MTK_DMA_SIZE * sizeof(*ring->dma), + ring->dma_size * sizeof(*ring->dma), ring->dma, ring->phys); ring->dma = NULL; } } +static int mtk_hwlro_rx_init(struct mtk_eth *eth) +{ + int i; + u32 ring_ctrl_dw1 = 0, ring_ctrl_dw2 = 0, ring_ctrl_dw3 = 0; + u32 lro_ctrl_dw0 = 0, lro_ctrl_dw3 = 0; + + /* set LRO rings to auto-learn modes */ + ring_ctrl_dw2 |= MTK_RING_AUTO_LERAN_MODE; + + /* validate LRO ring */ + ring_ctrl_dw2 |= MTK_RING_VLD; + + /* set AGE timer (unit: 20us) */ + ring_ctrl_dw2 |= MTK_RING_AGE_TIME_H; + ring_ctrl_dw1 |= MTK_RING_AGE_TIME_L; + + /* set max AGG timer (unit: 20us) */ + ring_ctrl_dw2 |= MTK_RING_MAX_AGG_TIME; + + /* set max LRO AGG count */ + ring_ctrl_dw2 |= MTK_RING_MAX_AGG_CNT_L; + ring_ctrl_dw3 |= MTK_RING_MAX_AGG_CNT_H; + + for (i = 1; i < MTK_MAX_RX_RING_NUM; i++) { + mtk_w32(eth, ring_ctrl_dw1, MTK_LRO_CTRL_DW1_CFG(i)); + mtk_w32(eth, ring_ctrl_dw2, MTK_LRO_CTRL_DW2_CFG(i)); + mtk_w32(eth, ring_ctrl_dw3, MTK_LRO_CTRL_DW3_CFG(i)); + } + + /* IPv4 checksum update enable */ + lro_ctrl_dw0 |= MTK_L3_CKS_UPD_EN; + + /* switch priority comparison to packet count mode */ + lro_ctrl_dw0 |= MTK_LRO_ALT_PKT_CNT_MODE; + + /* bandwidth threshold setting */ + mtk_w32(eth, MTK_HW_LRO_BW_THRE, MTK_PDMA_LRO_CTRL_DW2); + + /* auto-learn score delta setting */ + mtk_w32(eth, MTK_HW_LRO_REPLACE_DELTA, MTK_PDMA_LRO_ALT_SCORE_DELTA); + + /* set refresh timer for altering flows to 1 sec. (unit: 20us) */ + mtk_w32(eth, (MTK_HW_LRO_TIMER_UNIT << 16) | MTK_HW_LRO_REFRESH_TIME, + MTK_PDMA_LRO_ALT_REFRESH_TIMER); + + /* set HW LRO mode & the max aggregation count for rx packets */ + lro_ctrl_dw3 |= MTK_ADMA_MODE | (MTK_HW_LRO_MAX_AGG_CNT & 0xff); + + /* the minimal remaining room of SDL0 in RXD for lro aggregation */ + lro_ctrl_dw3 |= MTK_LRO_MIN_RXD_SDL; + + /* enable HW LRO */ + lro_ctrl_dw0 |= MTK_LRO_EN; + + mtk_w32(eth, lro_ctrl_dw3, MTK_PDMA_LRO_CTRL_DW3); + mtk_w32(eth, lro_ctrl_dw0, MTK_PDMA_LRO_CTRL_DW0); + + return 0; +} + +static void mtk_hwlro_rx_uninit(struct mtk_eth *eth) +{ + int i; + u32 val; + + /* relinquish lro rings, flush aggregated packets */ + mtk_w32(eth, MTK_LRO_RING_RELINQUISH_REQ, MTK_PDMA_LRO_CTRL_DW0); + + /* wait for relinquishments done */ + for (i = 0; i < 10; i++) { + val = mtk_r32(eth, MTK_PDMA_LRO_CTRL_DW0); + if (val & MTK_LRO_RING_RELINQUISH_DONE) { + msleep(20); + continue; + } + } + + /* invalidate lro rings */ + for (i = 1; i < MTK_MAX_RX_RING_NUM; i++) + mtk_w32(eth, 0, MTK_LRO_CTRL_DW2_CFG(i)); + + /* disable HW LRO */ + mtk_w32(eth, 0, MTK_PDMA_LRO_CTRL_DW0); +} + /* wait for DMA to finish whatever it is doing before we start using it again */ static int mtk_dma_busy_wait(struct mtk_eth *eth) { @@ -1235,6 +1377,7 @@ static int mtk_dma_busy_wait(struct mtk_eth *eth) static int mtk_dma_init(struct mtk_eth *eth) { int err; + u32 i; if (mtk_dma_busy_wait(eth)) return -EBUSY; @@ -1250,10 +1393,21 @@ static int mtk_dma_init(struct mtk_eth *eth) if (err) return err; - err = mtk_rx_alloc(eth); + err = mtk_rx_alloc(eth, 0, MTK_RX_FLAGS_NORMAL); if (err) return err; + if (eth->hwlro) { + for (i = 1; i < MTK_MAX_RX_RING_NUM; i++) { + err = mtk_rx_alloc(eth, i, MTK_RX_FLAGS_HWLRO); + if (err) + return err; + } + err = mtk_hwlro_rx_init(eth); + if (err) + return err; + } + /* Enable random early drop and set drop threshold automatically */ mtk_w32(eth, FC_THRES_DROP_MODE | FC_THRES_DROP_EN | FC_THRES_MIN, MTK_QDMA_FC_THRES); @@ -1278,7 +1432,14 @@ static void mtk_dma_free(struct mtk_eth *eth) eth->phy_scratch_ring = 0; } mtk_tx_clean(eth); - mtk_rx_clean(eth); + mtk_rx_clean(eth, 0); + + if (eth->hwlro) { + mtk_hwlro_rx_uninit(eth); + for (i = 1; i < MTK_MAX_RX_RING_NUM; i++) + mtk_rx_clean(eth, i); + } + kfree(eth->scratch_head); } @@ -1873,6 +2034,9 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np) mac->hw = eth; mac->of_node = np; + memset(mac->hwlro_ip, 0, sizeof(mac->hwlro_ip)); + mac->hwlro_ip_cnt = 0; + mac->hw_stats = devm_kzalloc(eth->dev, sizeof(*mac->hw_stats), GFP_KERNEL); @@ -1889,6 +2053,11 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np) eth->netdev[id]->watchdog_timeo = 5 * HZ; eth->netdev[id]->netdev_ops = &mtk_netdev_ops; eth->netdev[id]->base_addr = (unsigned long)eth->base; + + eth->netdev[id]->hw_features = MTK_HW_FEATURES; + if (eth->hwlro) + eth->netdev[id]->hw_features |= NETIF_F_LRO; + eth->netdev[id]->vlan_features = MTK_HW_FEATURES & ~(NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX); eth->netdev[id]->features |= MTK_HW_FEATURES; @@ -1941,6 +2110,8 @@ static int mtk_probe(struct platform_device *pdev) return PTR_ERR(eth->pctl); } + eth->hwlro = of_property_read_bool(pdev->dev.of_node, "mediatek,hwlro"); + for (i = 0; i < 3; i++) { eth->irq[i] = platform_get_irq(pdev, i); if (eth->irq[i] < 0) { diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index 79954b419b53..7c5e534d2120 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -39,7 +39,21 @@ NETIF_F_SG | NETIF_F_TSO | \ NETIF_F_TSO6 | \ NETIF_F_IPV6_CSUM) -#define NEXT_RX_DESP_IDX(X) (((X) + 1) & (MTK_DMA_SIZE - 1)) +#define NEXT_RX_DESP_IDX(X, Y) (((X) + 1) & ((Y) - 1)) + +#define MTK_MAX_RX_RING_NUM 4 +#define MTK_HW_LRO_DMA_SIZE 8 + +#define MTK_MAX_LRO_RX_LENGTH (4096 * 3) +#define MTK_MAX_LRO_IP_CNT 2 +#define MTK_HW_LRO_TIMER_UNIT 1 /* 20 us */ +#define MTK_HW_LRO_REFRESH_TIME 50000 /* 1 sec. */ +#define MTK_HW_LRO_AGG_TIME 10 /* 200us */ +#define MTK_HW_LRO_AGE_TIME 50 /* 1ms */ +#define MTK_HW_LRO_MAX_AGG_CNT 64 +#define MTK_HW_LRO_BW_THRE 3000 +#define MTK_HW_LRO_REPLACE_DELTA 1000 +#define MTK_HW_LRO_SDL_REMAIN_ROOM 1522 /* Frame Engine Global Reset Register */ #define MTK_RST_GL 0x04 @@ -50,6 +64,9 @@ #define MTK_GDM1_AF BIT(28) #define MTK_GDM2_AF BIT(29) +/* PDMA HW LRO Alter Flow Timer Register */ +#define MTK_PDMA_LRO_ALT_REFRESH_TIMER 0x1c + /* Frame Engine Interrupt Grouping Register */ #define MTK_FE_INT_GRP 0x20 @@ -70,12 +87,29 @@ /* PDMA RX Base Pointer Register */ #define MTK_PRX_BASE_PTR0 0x900 +#define MTK_PRX_BASE_PTR_CFG(x) (MTK_PRX_BASE_PTR0 + (x * 0x10)) /* PDMA RX Maximum Count Register */ #define MTK_PRX_MAX_CNT0 0x904 +#define MTK_PRX_MAX_CNT_CFG(x) (MTK_PRX_MAX_CNT0 + (x * 0x10)) /* PDMA RX CPU Pointer Register */ #define MTK_PRX_CRX_IDX0 0x908 +#define MTK_PRX_CRX_IDX_CFG(x) (MTK_PRX_CRX_IDX0 + (x * 0x10)) + +/* PDMA HW LRO Control Registers */ +#define MTK_PDMA_LRO_CTRL_DW0 0x980 +#define MTK_LRO_EN BIT(0) +#define MTK_L3_CKS_UPD_EN BIT(7) +#define MTK_LRO_ALT_PKT_CNT_MODE BIT(21) +#define MTK_LRO_RING_RELINQUISH_REQ (0x3 << 26) +#define MTK_LRO_RING_RELINQUISH_DONE (0x3 << 29) + +#define MTK_PDMA_LRO_CTRL_DW1 0x984 +#define MTK_PDMA_LRO_CTRL_DW2 0x988 +#define MTK_PDMA_LRO_CTRL_DW3 0x98c +#define MTK_ADMA_MODE BIT(15) +#define MTK_LRO_MIN_RXD_SDL (MTK_HW_LRO_SDL_REMAIN_ROOM << 16) /* PDMA Global Configuration Register */ #define MTK_PDMA_GLO_CFG 0xa04 @@ -84,6 +118,7 @@ /* PDMA Reset Index Register */ #define MTK_PDMA_RST_IDX 0xa08 #define MTK_PST_DRX_IDX0 BIT(16) +#define MTK_PST_DRX_IDX_CFG(x) (MTK_PST_DRX_IDX0 << (x)) /* PDMA Delay Interrupt Register */ #define MTK_PDMA_DELAY_INT 0xa0c @@ -94,10 +129,33 @@ /* PDMA Interrupt Mask Register */ #define MTK_PDMA_INT_MASK 0xa28 +/* PDMA HW LRO Alter Flow Delta Register */ +#define MTK_PDMA_LRO_ALT_SCORE_DELTA 0xa4c + /* PDMA Interrupt grouping registers */ #define MTK_PDMA_INT_GRP1 0xa50 #define MTK_PDMA_INT_GRP2 0xa54 +/* PDMA HW LRO IP Setting Registers */ +#define MTK_LRO_RX_RING0_DIP_DW0 0xb04 +#define MTK_LRO_DIP_DW0_CFG(x) (MTK_LRO_RX_RING0_DIP_DW0 + (x * 0x40)) +#define MTK_RING_MYIP_VLD BIT(9) + +/* PDMA HW LRO Ring Control Registers */ +#define MTK_LRO_RX_RING0_CTRL_DW1 0xb28 +#define MTK_LRO_RX_RING0_CTRL_DW2 0xb2c +#define MTK_LRO_RX_RING0_CTRL_DW3 0xb30 +#define MTK_LRO_CTRL_DW1_CFG(x) (MTK_LRO_RX_RING0_CTRL_DW1 + (x * 0x40)) +#define MTK_LRO_CTRL_DW2_CFG(x) (MTK_LRO_RX_RING0_CTRL_DW2 + (x * 0x40)) +#define MTK_LRO_CTRL_DW3_CFG(x) (MTK_LRO_RX_RING0_CTRL_DW3 + (x * 0x40)) +#define MTK_RING_AGE_TIME_L ((MTK_HW_LRO_AGE_TIME & 0x3ff) << 22) +#define MTK_RING_AGE_TIME_H ((MTK_HW_LRO_AGE_TIME >> 10) & 0x3f) +#define MTK_RING_AUTO_LERAN_MODE (3 << 6) +#define MTK_RING_VLD BIT(8) +#define MTK_RING_MAX_AGG_TIME ((MTK_HW_LRO_AGG_TIME & 0xffff) << 10) +#define MTK_RING_MAX_AGG_CNT_L ((MTK_HW_LRO_MAX_AGG_CNT & 0x3f) << 26) +#define MTK_RING_MAX_AGG_CNT_H ((MTK_HW_LRO_MAX_AGG_CNT >> 6) & 0x3) + /* QDMA TX Queue Configuration Registers */ #define MTK_QTX_CFG(x) (0x1800 + (x * 0x10)) #define QDMA_RES_THRES 4 @@ -132,7 +190,6 @@ /* QDMA Reset Index Register */ #define MTK_QDMA_RST_IDX 0x1A08 -#define MTK_PST_DRX_IDX0 BIT(16) /* QDMA Delay Interrupt Register */ #define MTK_QDMA_DELAY_INT 0x1A0C @@ -377,6 +434,12 @@ struct mtk_tx_ring { atomic_t free_count; }; +/* PDMA rx ring mode */ +enum mtk_rx_flags { + MTK_RX_FLAGS_NORMAL = 0, + MTK_RX_FLAGS_HWLRO, +}; + /* struct mtk_rx_ring - This struct holds info describing a RX ring * @dma: The descriptor ring * @data: The memory pointed at by the ring @@ -391,7 +454,10 @@ struct mtk_rx_ring { dma_addr_t phys; u16 frag_size; u16 buf_size; + u16 dma_size; + bool calc_idx_update; u16 calc_idx; + u32 crx_idx_reg; }; /* currently no SoC has more than 2 macs */ @@ -439,9 +505,10 @@ struct mtk_eth { unsigned long sysclk; struct regmap *ethsys; struct regmap *pctl; + bool hwlro; atomic_t dma_refcnt; struct mtk_tx_ring tx_ring; - struct mtk_rx_ring rx_ring; + struct mtk_rx_ring rx_ring[MTK_MAX_RX_RING_NUM]; struct napi_struct tx_napi; struct napi_struct rx_napi; struct mtk_tx_dma *scratch_ring; @@ -470,6 +537,8 @@ struct mtk_mac { struct mtk_eth *hw; struct mtk_hw_stats *hw_stats; struct phy_device *phy_dev; + __be32 hwlro_ip[MTK_MAX_LRO_IP_CNT]; + int hwlro_ip_cnt; }; /* the struct describing the SoC. these are declared in the soc_xyz.c files */ From 7aab747e5563ecbc9f3cb64ddea13fe7b9fee2bd Mon Sep 17 00:00:00 2001 From: Nelson Chang Date: Sat, 17 Sep 2016 23:50:56 +0800 Subject: [PATCH 1153/1715] net: ethernet: mediatek: add ethtool functions to configure RX flows of HW LRO The codes add ethtool functions to set RX flows for HW LRO. Because the HW LRO hardware can only recognize the destination IP of TCP/IP RX flows, the ethtool command to add HW LRO flow is as below: ethtool -N [devname] flow-type tcp4 dst-ip [ip_addr] loc [0~1] Otherwise, cause the hardware can set total four destination IPs, each GMAC (GMAC1/GMAC2) can set two IPs separately at most. Signed-off-by: Nelson Chang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 236 ++++++++++++++++++++ 1 file changed, 236 insertions(+) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 18600cbbb907..481f3609a1ea 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1357,6 +1357,182 @@ static void mtk_hwlro_rx_uninit(struct mtk_eth *eth) mtk_w32(eth, 0, MTK_PDMA_LRO_CTRL_DW0); } +static void mtk_hwlro_val_ipaddr(struct mtk_eth *eth, int idx, __be32 ip) +{ + u32 reg_val; + + reg_val = mtk_r32(eth, MTK_LRO_CTRL_DW2_CFG(idx)); + + /* invalidate the IP setting */ + mtk_w32(eth, (reg_val & ~MTK_RING_MYIP_VLD), MTK_LRO_CTRL_DW2_CFG(idx)); + + mtk_w32(eth, ip, MTK_LRO_DIP_DW0_CFG(idx)); + + /* validate the IP setting */ + mtk_w32(eth, (reg_val | MTK_RING_MYIP_VLD), MTK_LRO_CTRL_DW2_CFG(idx)); +} + +static void mtk_hwlro_inval_ipaddr(struct mtk_eth *eth, int idx) +{ + u32 reg_val; + + reg_val = mtk_r32(eth, MTK_LRO_CTRL_DW2_CFG(idx)); + + /* invalidate the IP setting */ + mtk_w32(eth, (reg_val & ~MTK_RING_MYIP_VLD), MTK_LRO_CTRL_DW2_CFG(idx)); + + mtk_w32(eth, 0, MTK_LRO_DIP_DW0_CFG(idx)); +} + +static int mtk_hwlro_get_ip_cnt(struct mtk_mac *mac) +{ + int cnt = 0; + int i; + + for (i = 0; i < MTK_MAX_LRO_IP_CNT; i++) { + if (mac->hwlro_ip[i]) + cnt++; + } + + return cnt; +} + +static int mtk_hwlro_add_ipaddr(struct net_device *dev, + struct ethtool_rxnfc *cmd) +{ + struct ethtool_rx_flow_spec *fsp = + (struct ethtool_rx_flow_spec *)&cmd->fs; + struct mtk_mac *mac = netdev_priv(dev); + struct mtk_eth *eth = mac->hw; + int hwlro_idx; + + if ((fsp->flow_type != TCP_V4_FLOW) || + (!fsp->h_u.tcp_ip4_spec.ip4dst) || + (fsp->location > 1)) + return -EINVAL; + + mac->hwlro_ip[fsp->location] = htonl(fsp->h_u.tcp_ip4_spec.ip4dst); + hwlro_idx = (mac->id * MTK_MAX_LRO_IP_CNT) + fsp->location; + + mac->hwlro_ip_cnt = mtk_hwlro_get_ip_cnt(mac); + + mtk_hwlro_val_ipaddr(eth, hwlro_idx, mac->hwlro_ip[fsp->location]); + + return 0; +} + +static int mtk_hwlro_del_ipaddr(struct net_device *dev, + struct ethtool_rxnfc *cmd) +{ + struct ethtool_rx_flow_spec *fsp = + (struct ethtool_rx_flow_spec *)&cmd->fs; + struct mtk_mac *mac = netdev_priv(dev); + struct mtk_eth *eth = mac->hw; + int hwlro_idx; + + if (fsp->location > 1) + return -EINVAL; + + mac->hwlro_ip[fsp->location] = 0; + hwlro_idx = (mac->id * MTK_MAX_LRO_IP_CNT) + fsp->location; + + mac->hwlro_ip_cnt = mtk_hwlro_get_ip_cnt(mac); + + mtk_hwlro_inval_ipaddr(eth, hwlro_idx); + + return 0; +} + +static void mtk_hwlro_netdev_disable(struct net_device *dev) +{ + struct mtk_mac *mac = netdev_priv(dev); + struct mtk_eth *eth = mac->hw; + int i, hwlro_idx; + + for (i = 0; i < MTK_MAX_LRO_IP_CNT; i++) { + mac->hwlro_ip[i] = 0; + hwlro_idx = (mac->id * MTK_MAX_LRO_IP_CNT) + i; + + mtk_hwlro_inval_ipaddr(eth, hwlro_idx); + } + + mac->hwlro_ip_cnt = 0; +} + +static int mtk_hwlro_get_fdir_entry(struct net_device *dev, + struct ethtool_rxnfc *cmd) +{ + struct mtk_mac *mac = netdev_priv(dev); + struct ethtool_rx_flow_spec *fsp = + (struct ethtool_rx_flow_spec *)&cmd->fs; + + /* only tcp dst ipv4 is meaningful, others are meaningless */ + fsp->flow_type = TCP_V4_FLOW; + fsp->h_u.tcp_ip4_spec.ip4dst = ntohl(mac->hwlro_ip[fsp->location]); + fsp->m_u.tcp_ip4_spec.ip4dst = 0; + + fsp->h_u.tcp_ip4_spec.ip4src = 0; + fsp->m_u.tcp_ip4_spec.ip4src = 0xffffffff; + fsp->h_u.tcp_ip4_spec.psrc = 0; + fsp->m_u.tcp_ip4_spec.psrc = 0xffff; + fsp->h_u.tcp_ip4_spec.pdst = 0; + fsp->m_u.tcp_ip4_spec.pdst = 0xffff; + fsp->h_u.tcp_ip4_spec.tos = 0; + fsp->m_u.tcp_ip4_spec.tos = 0xff; + + return 0; +} + +static int mtk_hwlro_get_fdir_all(struct net_device *dev, + struct ethtool_rxnfc *cmd, + u32 *rule_locs) +{ + struct mtk_mac *mac = netdev_priv(dev); + int cnt = 0; + int i; + + for (i = 0; i < MTK_MAX_LRO_IP_CNT; i++) { + if (mac->hwlro_ip[i]) { + rule_locs[cnt] = i; + cnt++; + } + } + + cmd->rule_cnt = cnt; + + return 0; +} + +static netdev_features_t mtk_fix_features(struct net_device *dev, + netdev_features_t features) +{ + if (!(features & NETIF_F_LRO)) { + struct mtk_mac *mac = netdev_priv(dev); + int ip_cnt = mtk_hwlro_get_ip_cnt(mac); + + if (ip_cnt) { + netdev_info(dev, "RX flow is programmed, LRO should keep on\n"); + + features |= NETIF_F_LRO; + } + } + + return features; +} + +static int mtk_set_features(struct net_device *dev, netdev_features_t features) +{ + int err = 0; + + if (!((dev->features ^ features) & NETIF_F_LRO)) + return 0; + + if (!(features & NETIF_F_LRO)) + mtk_hwlro_netdev_disable(dev); + + return err; +} + /* wait for DMA to finish whatever it is doing before we start using it again */ static int mtk_dma_busy_wait(struct mtk_eth *eth) { @@ -1971,6 +2147,62 @@ static void mtk_get_ethtool_stats(struct net_device *dev, } while (u64_stats_fetch_retry_irq(&hwstats->syncp, start)); } +static int mtk_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd, + u32 *rule_locs) +{ + int ret = -EOPNOTSUPP; + + switch (cmd->cmd) { + case ETHTOOL_GRXRINGS: + if (dev->features & NETIF_F_LRO) { + cmd->data = MTK_MAX_RX_RING_NUM; + ret = 0; + } + break; + case ETHTOOL_GRXCLSRLCNT: + if (dev->features & NETIF_F_LRO) { + struct mtk_mac *mac = netdev_priv(dev); + + cmd->rule_cnt = mac->hwlro_ip_cnt; + ret = 0; + } + break; + case ETHTOOL_GRXCLSRULE: + if (dev->features & NETIF_F_LRO) + ret = mtk_hwlro_get_fdir_entry(dev, cmd); + break; + case ETHTOOL_GRXCLSRLALL: + if (dev->features & NETIF_F_LRO) + ret = mtk_hwlro_get_fdir_all(dev, cmd, + rule_locs); + break; + default: + break; + } + + return ret; +} + +static int mtk_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) +{ + int ret = -EOPNOTSUPP; + + switch (cmd->cmd) { + case ETHTOOL_SRXCLSRLINS: + if (dev->features & NETIF_F_LRO) + ret = mtk_hwlro_add_ipaddr(dev, cmd); + break; + case ETHTOOL_SRXCLSRLDEL: + if (dev->features & NETIF_F_LRO) + ret = mtk_hwlro_del_ipaddr(dev, cmd); + break; + default: + break; + } + + return ret; +} + static const struct ethtool_ops mtk_ethtool_ops = { .get_settings = mtk_get_settings, .set_settings = mtk_set_settings, @@ -1982,6 +2214,8 @@ static const struct ethtool_ops mtk_ethtool_ops = { .get_strings = mtk_get_strings, .get_sset_count = mtk_get_sset_count, .get_ethtool_stats = mtk_get_ethtool_stats, + .get_rxnfc = mtk_get_rxnfc, + .set_rxnfc = mtk_set_rxnfc, }; static const struct net_device_ops mtk_netdev_ops = { @@ -1996,6 +2230,8 @@ static const struct net_device_ops mtk_netdev_ops = { .ndo_change_mtu = eth_change_mtu, .ndo_tx_timeout = mtk_tx_timeout, .ndo_get_stats64 = mtk_get_stats64, + .ndo_fix_features = mtk_fix_features, + .ndo_set_features = mtk_set_features, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = mtk_poll_controller, #endif From 004e6cc6c181aa109427594fca35eb55d89d780e Mon Sep 17 00:00:00 2001 From: Nelson Chang Date: Sat, 17 Sep 2016 23:50:57 +0800 Subject: [PATCH 1154/1715] net: ethernet: mediatek: add the dts property to set if the HW supports LRO Add the dts property for the capability if the hardware supports LRO. Signed-off-by: Nelson Chang Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/mediatek-net.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/mediatek-net.txt b/Documentation/devicetree/bindings/net/mediatek-net.txt index 32eaaca04d9b..6103e5583070 100644 --- a/Documentation/devicetree/bindings/net/mediatek-net.txt +++ b/Documentation/devicetree/bindings/net/mediatek-net.txt @@ -24,7 +24,7 @@ Required properties: Optional properties: - interrupt-parent: Should be the phandle for the interrupt controller that services interrupts for this device - +- mediatek,hwlro: the capability if the hardware supports LRO functions * Ethernet MAC node @@ -51,6 +51,7 @@ eth: ethernet@1b100000 { reset-names = "eth"; mediatek,ethsys = <ðsys>; mediatek,pctl = <&syscfg_pctl_a>; + mediatek,hwlro; #address-cells = <1>; #size-cells = <0>; From 106323b905a6bcd21ff83dd4e19566282fd5eb52 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 17 Sep 2016 15:52:17 +0000 Subject: [PATCH 1155/1715] cxgb4: Fix return value check in cfg_queues_uld() Fix the retrn value check which testing the wrong variable in cfg_queues_uld(). Fixes: 94cdb8bb993a ("cxgb4: Add support for dynamic allocation of resources for ULD") Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c index fc04e3b878e8..d12a73e03565 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c @@ -308,7 +308,7 @@ int cfg_queues_uld(struct adapter *adap, unsigned int uld_type, } rxq_info->rspq_id = kcalloc(nrxq, sizeof(unsigned short), GFP_KERNEL); - if (!rxq_info->uldrxq) { + if (!rxq_info->rspq_id) { kfree(rxq_info->uldrxq); kfree(rxq_info); return -ENOMEM; From 1486587b2fcda08dee7eab23784d504eed772c45 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 18 Sep 2016 00:57:30 +0200 Subject: [PATCH 1156/1715] pie: use qdisc_dequeue_head wrapper Doesn't change generated code. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/sched/sch_pie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index a570b0bb254c..d976d74b22d7 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -511,7 +511,7 @@ static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d) static struct sk_buff *pie_qdisc_dequeue(struct Qdisc *sch) { struct sk_buff *skb; - skb = __qdisc_dequeue_head(sch, &sch->q); + skb = qdisc_dequeue_head(sch); if (!skb) return NULL; From 97d0678f913369af0dc8b510a682a641654ab743 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 18 Sep 2016 00:57:31 +0200 Subject: [PATCH 1157/1715] sched: don't use skb queue helpers A followup change will replace the sk_buff_head in the qdisc struct with a slightly different list. Use of the sk_buff_head helpers will thus cause compiler warnings. Open-code these accesses in an extra change to ease review. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/sched/sch_fifo.c | 4 ++-- net/sched/sch_generic.c | 2 +- net/sched/sch_netem.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index baeed6a78d28..1e37247656f8 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -31,7 +31,7 @@ static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch, static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { - if (likely(skb_queue_len(&sch->q) < sch->limit)) + if (likely(sch->q.qlen < sch->limit)) return qdisc_enqueue_tail(skb, sch); return qdisc_drop(skb, sch, to_free); @@ -42,7 +42,7 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch, { unsigned int prev_backlog; - if (likely(skb_queue_len(&sch->q) < sch->limit)) + if (likely(sch->q.qlen < sch->limit)) return qdisc_enqueue_tail(skb, sch); prev_backlog = sch->qstats.backlog; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 0d21b567ff27..5e63bf638350 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -486,7 +486,7 @@ static inline struct sk_buff_head *band2list(struct pfifo_fast_priv *priv, static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, struct sk_buff **to_free) { - if (skb_queue_len(&qdisc->q) < qdisc_dev(qdisc)->tx_queue_len) { + if (qdisc->q.qlen < qdisc_dev(qdisc)->tx_queue_len) { int band = prio2band[skb->priority & TC_PRIO_MAX]; struct pfifo_fast_priv *priv = qdisc_priv(qdisc); struct sk_buff_head *list = band2list(priv, band); diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index aaaf02175338..1832d7732dbc 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -502,7 +502,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, 1<<(prandom_u32() % 8); } - if (unlikely(skb_queue_len(&sch->q) >= sch->limit)) + if (unlikely(sch->q.qlen >= sch->limit)) return qdisc_drop(skb, sch, to_free); qdisc_qstats_backlog_inc(sch, skb); @@ -522,7 +522,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (q->rate) { struct sk_buff *last; - if (!skb_queue_empty(&sch->q)) + if (sch->q.qlen) last = skb_peek_tail(&sch->q); else last = netem_rb_to_skb(rb_last(&q->t_root)); From ec323368793b8570c02e723127611a8d906a9b3f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 18 Sep 2016 00:57:32 +0200 Subject: [PATCH 1158/1715] sched: remove qdisc arg from __qdisc_dequeue_head Moves qdisc stat accouting to qdisc_dequeue_head. The only direct caller of the __qdisc_dequeue_head version open-codes this now. This allows us to later use __qdisc_dequeue_head as a replacement of __skb_dequeue() (which operates on sk_buff_head list). Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/sch_generic.h | 15 ++++++++------- net/sched/sch_generic.c | 7 ++++++- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 52a2015667b4..0741ed41575b 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -614,11 +614,17 @@ static inline int qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch) return __qdisc_enqueue_tail(skb, sch, &sch->q); } -static inline struct sk_buff *__qdisc_dequeue_head(struct Qdisc *sch, - struct sk_buff_head *list) +static inline struct sk_buff *__qdisc_dequeue_head(struct sk_buff_head *list) { struct sk_buff *skb = __skb_dequeue(list); + return skb; +} + +static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch) +{ + struct sk_buff *skb = __qdisc_dequeue_head(&sch->q); + if (likely(skb != NULL)) { qdisc_qstats_backlog_dec(sch, skb); qdisc_bstats_update(sch, skb); @@ -627,11 +633,6 @@ static inline struct sk_buff *__qdisc_dequeue_head(struct Qdisc *sch, return skb; } -static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch) -{ - return __qdisc_dequeue_head(sch, &sch->q); -} - /* Instead of calling kfree_skb() while root qdisc lock is held, * queue the skb for future freeing at end of __dev_xmit_skb() */ diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 5e63bf638350..73877d9c2bcb 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -506,7 +506,12 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc) if (likely(band >= 0)) { struct sk_buff_head *list = band2list(priv, band); - struct sk_buff *skb = __qdisc_dequeue_head(qdisc, list); + struct sk_buff *skb = __qdisc_dequeue_head(list); + + if (likely(skb != NULL)) { + qdisc_qstats_backlog_dec(qdisc, skb); + qdisc_bstats_update(qdisc, skb); + } qdisc->q.qlen--; if (skb_queue_empty(list)) From ed760cb8aae7c2b84c193d4a7637b0c9e752f07e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 18 Sep 2016 00:57:33 +0200 Subject: [PATCH 1159/1715] sched: replace __skb_dequeue with __qdisc_dequeue_head After previous patch these functions are identical. Replace __skb_dequeue in qdiscs with __qdisc_dequeue_head. Next patch will then make __qdisc_dequeue_head handle single-linked list instead of strcut sk_buff_head argument. Doesn't change generated code. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/sched/sch_codel.c | 4 ++-- net/sched/sch_netem.c | 2 +- net/sched/sch_pie.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c index 4002df3c7d9f..5bfa79ee657c 100644 --- a/net/sched/sch_codel.c +++ b/net/sched/sch_codel.c @@ -69,7 +69,7 @@ struct codel_sched_data { static struct sk_buff *dequeue_func(struct codel_vars *vars, void *ctx) { struct Qdisc *sch = ctx; - struct sk_buff *skb = __skb_dequeue(&sch->q); + struct sk_buff *skb = __qdisc_dequeue_head(&sch->q); if (skb) sch->qstats.backlog -= qdisc_pkt_len(skb); @@ -172,7 +172,7 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt) qlen = sch->q.qlen; while (sch->q.qlen > sch->limit) { - struct sk_buff *skb = __skb_dequeue(&sch->q); + struct sk_buff *skb = __qdisc_dequeue_head(&sch->q); dropped += qdisc_pkt_len(skb); qdisc_qstats_backlog_dec(sch, skb); diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 1832d7732dbc..0a964b35f8c7 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -587,7 +587,7 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) struct rb_node *p; tfifo_dequeue: - skb = __skb_dequeue(&sch->q); + skb = __qdisc_dequeue_head(&sch->q); if (skb) { qdisc_qstats_backlog_dec(sch, skb); deliver: diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index d976d74b22d7..5c3a99d6aa82 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -231,7 +231,7 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt) /* Drop excess packets if new limit is lower */ qlen = sch->q.qlen; while (sch->q.qlen > sch->limit) { - struct sk_buff *skb = __skb_dequeue(&sch->q); + struct sk_buff *skb = __qdisc_dequeue_head(&sch->q); dropped += qdisc_pkt_len(skb); qdisc_qstats_backlog_dec(sch, skb); From 48da34b7a74201f15315cb1fc40bb9a7bd2b4940 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 18 Sep 2016 00:57:34 +0200 Subject: [PATCH 1160/1715] sched: add and use qdisc_skb_head helpers This change replaces sk_buff_head struct in Qdiscs with new qdisc_skb_head. Its similar to the skb_buff_head api, but does not use skb->prev pointers. Qdiscs will commonly enqueue at the tail of a list and dequeue at head. While skb_buff_head works fine for this, enqueue/dequeue needs to also adjust the prev pointer of next element. The ->prev pointer is not required for qdiscs so we can just leave it undefined and avoid one cacheline write access for en/dequeue. Suggested-by: Eric Dumazet Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/sch_generic.h | 65 +++++++++++++++++++++++++++++++-------- net/sched/sch_generic.c | 21 +++++++------ net/sched/sch_htb.c | 24 ++++++++++++--- net/sched/sch_netem.c | 14 +++++++-- 4 files changed, 95 insertions(+), 29 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 0741ed41575b..e6aa0a249672 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -36,6 +36,14 @@ struct qdisc_size_table { u16 data[]; }; +/* similar to sk_buff_head, but skb->prev pointer is undefined. */ +struct qdisc_skb_head { + struct sk_buff *head; + struct sk_buff *tail; + __u32 qlen; + spinlock_t lock; +}; + struct Qdisc { int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch, @@ -76,7 +84,7 @@ struct Qdisc { * For performance sake on SMP, we put highly modified fields at the end */ struct sk_buff *gso_skb ____cacheline_aligned_in_smp; - struct sk_buff_head q; + struct qdisc_skb_head q; struct gnet_stats_basic_packed bstats; seqcount_t running; struct gnet_stats_queue qstats; @@ -600,10 +608,27 @@ static inline void qdisc_qstats_overlimit(struct Qdisc *sch) sch->qstats.overlimits++; } -static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch, - struct sk_buff_head *list) +static inline void qdisc_skb_head_init(struct qdisc_skb_head *qh) { - __skb_queue_tail(list, skb); + qh->head = NULL; + qh->tail = NULL; + qh->qlen = 0; +} + +static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch, + struct qdisc_skb_head *qh) +{ + struct sk_buff *last = qh->tail; + + if (last) { + skb->next = NULL; + last->next = skb; + qh->tail = skb; + } else { + qh->tail = skb; + qh->head = skb; + } + qh->qlen++; qdisc_qstats_backlog_inc(sch, skb); return NET_XMIT_SUCCESS; @@ -614,9 +639,17 @@ static inline int qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch) return __qdisc_enqueue_tail(skb, sch, &sch->q); } -static inline struct sk_buff *__qdisc_dequeue_head(struct sk_buff_head *list) +static inline struct sk_buff *__qdisc_dequeue_head(struct qdisc_skb_head *qh) { - struct sk_buff *skb = __skb_dequeue(list); + struct sk_buff *skb = qh->head; + + if (likely(skb != NULL)) { + qh->head = skb->next; + qh->qlen--; + if (qh->head == NULL) + qh->tail = NULL; + skb->next = NULL; + } return skb; } @@ -643,10 +676,10 @@ static inline void __qdisc_drop(struct sk_buff *skb, struct sk_buff **to_free) } static inline unsigned int __qdisc_queue_drop_head(struct Qdisc *sch, - struct sk_buff_head *list, + struct qdisc_skb_head *qh, struct sk_buff **to_free) { - struct sk_buff *skb = __skb_dequeue(list); + struct sk_buff *skb = __qdisc_dequeue_head(qh); if (likely(skb != NULL)) { unsigned int len = qdisc_pkt_len(skb); @@ -667,7 +700,9 @@ static inline unsigned int qdisc_queue_drop_head(struct Qdisc *sch, static inline struct sk_buff *qdisc_peek_head(struct Qdisc *sch) { - return skb_peek(&sch->q); + const struct qdisc_skb_head *qh = &sch->q; + + return qh->head; } /* generic pseudo peek method for non-work-conserving qdisc */ @@ -702,15 +737,19 @@ static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch) return skb; } -static inline void __qdisc_reset_queue(struct sk_buff_head *list) +static inline void __qdisc_reset_queue(struct qdisc_skb_head *qh) { /* * We do not know the backlog in bytes of this list, it * is up to the caller to correct it */ - if (!skb_queue_empty(list)) { - rtnl_kfree_skbs(list->next, list->prev); - __skb_queue_head_init(list); + ASSERT_RTNL(); + if (qh->qlen) { + rtnl_kfree_skbs(qh->head, qh->tail); + + qh->head = NULL; + qh->tail = NULL; + qh->qlen = 0; } } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 73877d9c2bcb..6cfb6e9038c2 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -466,7 +466,7 @@ static const u8 prio2band[TC_PRIO_MAX + 1] = { */ struct pfifo_fast_priv { u32 bitmap; - struct sk_buff_head q[PFIFO_FAST_BANDS]; + struct qdisc_skb_head q[PFIFO_FAST_BANDS]; }; /* @@ -477,7 +477,7 @@ struct pfifo_fast_priv { */ static const int bitmap2band[] = {-1, 0, 1, 0, 2, 0, 1, 0}; -static inline struct sk_buff_head *band2list(struct pfifo_fast_priv *priv, +static inline struct qdisc_skb_head *band2list(struct pfifo_fast_priv *priv, int band) { return priv->q + band; @@ -489,7 +489,7 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, if (qdisc->q.qlen < qdisc_dev(qdisc)->tx_queue_len) { int band = prio2band[skb->priority & TC_PRIO_MAX]; struct pfifo_fast_priv *priv = qdisc_priv(qdisc); - struct sk_buff_head *list = band2list(priv, band); + struct qdisc_skb_head *list = band2list(priv, band); priv->bitmap |= (1 << band); qdisc->q.qlen++; @@ -505,8 +505,8 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc) int band = bitmap2band[priv->bitmap]; if (likely(band >= 0)) { - struct sk_buff_head *list = band2list(priv, band); - struct sk_buff *skb = __qdisc_dequeue_head(list); + struct qdisc_skb_head *qh = band2list(priv, band); + struct sk_buff *skb = __qdisc_dequeue_head(qh); if (likely(skb != NULL)) { qdisc_qstats_backlog_dec(qdisc, skb); @@ -514,7 +514,7 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc) } qdisc->q.qlen--; - if (skb_queue_empty(list)) + if (qh->qlen == 0) priv->bitmap &= ~(1 << band); return skb; @@ -529,9 +529,9 @@ static struct sk_buff *pfifo_fast_peek(struct Qdisc *qdisc) int band = bitmap2band[priv->bitmap]; if (band >= 0) { - struct sk_buff_head *list = band2list(priv, band); + struct qdisc_skb_head *qh = band2list(priv, band); - return skb_peek(list); + return qh->head; } return NULL; @@ -569,7 +569,7 @@ static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt) struct pfifo_fast_priv *priv = qdisc_priv(qdisc); for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) - __skb_queue_head_init(band2list(priv, prio)); + qdisc_skb_head_init(band2list(priv, prio)); /* Can by-pass the queue discipline */ qdisc->flags |= TCQ_F_CAN_BYPASS; @@ -617,7 +617,8 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p); sch->padded = (char *) sch - (char *) p; } - skb_queue_head_init(&sch->q); + qdisc_skb_head_init(&sch->q); + spin_lock_init(&sch->q.lock); spin_lock_init(&sch->busylock); lockdep_set_class(&sch->busylock, diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 53dbfa187870..c798d0de8a9d 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -162,7 +162,7 @@ struct htb_sched { struct work_struct work; /* non shaped skbs; let them go directly thru */ - struct sk_buff_head direct_queue; + struct qdisc_skb_head direct_queue; long direct_pkts; struct qdisc_watchdog watchdog; @@ -570,6 +570,22 @@ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl) list_del_init(&cl->un.leaf.drop_list); } +static void htb_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch, + struct qdisc_skb_head *qh) +{ + struct sk_buff *last = qh->tail; + + if (last) { + skb->next = NULL; + last->next = skb; + qh->tail = skb; + } else { + qh->tail = skb; + qh->head = skb; + } + qh->qlen++; +} + static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { @@ -580,7 +596,7 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (cl == HTB_DIRECT) { /* enqueue to helper queue */ if (q->direct_queue.qlen < q->direct_qlen) { - __skb_queue_tail(&q->direct_queue, skb); + htb_enqueue_tail(skb, sch, &q->direct_queue); q->direct_pkts++; } else { return qdisc_drop(skb, sch, to_free); @@ -888,7 +904,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) unsigned long start_at; /* try to dequeue direct packets as high prio (!) to minimize cpu work */ - skb = __skb_dequeue(&q->direct_queue); + skb = __qdisc_dequeue_head(&q->direct_queue); if (skb != NULL) { ok: qdisc_bstats_update(sch, skb); @@ -1019,7 +1035,7 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt) qdisc_watchdog_init(&q->watchdog, sch); INIT_WORK(&q->work, htb_work_func); - __skb_queue_head_init(&q->direct_queue); + qdisc_skb_head_init(&q->direct_queue); if (tb[TCA_HTB_DIRECT_QLEN]) q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]); diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 0a964b35f8c7..9f7b380cf0a3 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -413,6 +413,16 @@ static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch, return segs; } +static void netem_enqueue_skb_head(struct qdisc_skb_head *qh, struct sk_buff *skb) +{ + skb->next = qh->head; + + if (!qh->head) + qh->tail = skb; + qh->head = skb; + qh->qlen++; +} + /* * Insert one skb into qdisc. * Note: parent depends on return value to account for queue length. @@ -523,7 +533,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff *last; if (sch->q.qlen) - last = skb_peek_tail(&sch->q); + last = sch->q.tail; else last = netem_rb_to_skb(rb_last(&q->t_root)); if (last) { @@ -552,7 +562,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, cb->time_to_send = psched_get_time(); q->counter = 0; - __skb_queue_head(&sch->q, skb); + netem_enqueue_skb_head(&sch->q, skb); sch->qstats.requeues++; } From d55092b4812827b577daf2da7b4e404d87571e0b Mon Sep 17 00:00:00 2001 From: Liad Kaufman Date: Wed, 3 Aug 2016 18:41:27 +0300 Subject: [PATCH 1161/1715] iwlwifi: mvm: remove variable shadowing Variable "ac" defined twice. Fix that. Fixes: commit 93f436e2c7fe ("iwlwifi: mvm: set sta_id in SCD_QUEUE_CONFIG cmd") Signed-off-by: Liad Kaufman Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 216aa54c8679..d3a0378b547f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -745,14 +745,14 @@ static int iwl_mvm_sta_alloc_queue(struct iwl_mvm *mvm, .scd_queue = queue, .action = SCD_CFG_DISABLE_QUEUE, }; - u8 ac; + u8 txq_curr_ac; disable_agg_tids = iwl_mvm_remove_sta_queue_marking(mvm, queue); spin_lock_bh(&mvm->queue_info_lock); - ac = mvm->queue_info[queue].mac80211_ac; + txq_curr_ac = mvm->queue_info[queue].mac80211_ac; cmd.sta_id = mvm->queue_info[queue].ra_sta_id; - cmd.tx_fifo = iwl_mvm_ac_to_tx_fifo[ac]; + cmd.tx_fifo = iwl_mvm_ac_to_tx_fifo[txq_curr_ac]; cmd.tid = mvm->queue_info[queue].txq_tid; spin_unlock_bh(&mvm->queue_info_lock); From 8352e62ac2c4ebbfe95c95561f9f16d0ff06f375 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Thu, 4 Aug 2016 10:56:53 +0300 Subject: [PATCH 1162/1715] iwlwifi: pcie: fix typo in struct name for a000 devices commit 3cd1980b0cdf ("iwlwifi: pcie: introduce new tfd and tb formats") Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 188b0dee542c..74199b174948 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -2953,7 +2953,7 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, if (cfg->use_tfh) { trans_pcie->max_tbs = IWL_TFH_NUM_TBS; - trans_pcie->tfd_size = sizeof(struct iwl_tfh_tb); + trans_pcie->tfd_size = sizeof(struct iwl_tfh_tfd); } else { trans_pcie->max_tbs = IWL_NUM_OF_TBS; From 5a710b8606cc3ec12f60a400248a051743ee35d1 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Wed, 3 Aug 2016 14:08:00 +0300 Subject: [PATCH 1163/1715] iwlwifi: mvm: cleanup skb queue functions use Use skb_queue_empty() and not skb_peek_tail() to check for empty list. Avoid a redundant check as well - loop will take care of it. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index d6d9ec401b44..b3866287d1c0 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -418,10 +418,11 @@ static void iwl_mvm_release_frames(struct iwl_mvm *mvm, ssn = ieee80211_sn_inc(ssn); - /* holes are valid since nssn indicates frames were received. */ - if (skb_queue_empty(skb_list) || !skb_peek_tail(skb_list)) - continue; - /* Empty the list. Will have more than one frame for A-MSDU */ + /* + * Empty the list. Will have more than one frame for A-MSDU. + * Empty list is valid as well since nssn indicates frames were + * received. + */ while ((skb = __skb_dequeue(skb_list))) { iwl_mvm_pass_packet_to_mac80211(mvm, napi, skb, reorder_buf->queue, @@ -434,7 +435,7 @@ static void iwl_mvm_release_frames(struct iwl_mvm *mvm, if (reorder_buf->num_stored && !reorder_buf->removed) { u16 index = reorder_buf->head_sn % reorder_buf->buf_size; - while (!skb_peek_tail(&reorder_buf->entries[index])) + while (skb_queue_empty(&reorder_buf->entries[index])) index = (index + 1) % reorder_buf->buf_size; /* modify timer to match next frame's expiration time */ mod_timer(&reorder_buf->reorder_timer, @@ -462,7 +463,7 @@ void iwl_mvm_reorder_timer_expired(unsigned long data) for (i = 0; i < buf->buf_size ; i++) { index = (buf->head_sn + i) % buf->buf_size; - if (!skb_peek_tail(&buf->entries[index])) + if (skb_queue_empty(&buf->entries[index])) continue; if (!time_after(jiffies, buf->reorder_time[index] + RX_REORDER_BUF_TIMEOUT_MQ)) From 61b0f5d7c41408ff523a5cd3925feafa82b971f5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 4 Aug 2016 08:57:59 +0200 Subject: [PATCH 1164/1715] iwlwifi: mvm: compare full command ID When comparing command IDs, the group should be taken into account so the same command/notification from a different group doesn't trigger anything unexpected. Fix this by comparing to the wide ID. Fixes: commit 1738d60b31d7 ("iwlwifi: mvm: handle RX MPDUs separately") Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 23 +++++++++++--------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index 43ea1e5fdfc6..de34c9f4f0f4 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -966,10 +966,11 @@ static void iwl_mvm_rx(struct iwl_op_mode *op_mode, { struct iwl_rx_packet *pkt = rxb_addr(rxb); struct iwl_mvm *mvm = IWL_OP_MODE_GET_MVM(op_mode); + u16 cmd = WIDE_ID(pkt->hdr.group_id, pkt->hdr.cmd); - if (likely(pkt->hdr.cmd == REPLY_RX_MPDU_CMD)) + if (likely(cmd == WIDE_ID(LEGACY_GROUP, REPLY_RX_MPDU_CMD))) iwl_mvm_rx_rx_mpdu(mvm, napi, rxb); - else if (pkt->hdr.cmd == REPLY_RX_PHY_CMD) + else if (cmd == WIDE_ID(LEGACY_GROUP, REPLY_RX_PHY_CMD)) iwl_mvm_rx_rx_phy_cmd(mvm, rxb); else iwl_mvm_rx_common(mvm, rxb, pkt); @@ -981,13 +982,14 @@ static void iwl_mvm_rx_mq(struct iwl_op_mode *op_mode, { struct iwl_rx_packet *pkt = rxb_addr(rxb); struct iwl_mvm *mvm = IWL_OP_MODE_GET_MVM(op_mode); + u16 cmd = WIDE_ID(pkt->hdr.group_id, pkt->hdr.cmd); - if (likely(pkt->hdr.cmd == REPLY_RX_MPDU_CMD)) + if (likely(cmd == WIDE_ID(LEGACY_GROUP, REPLY_RX_MPDU_CMD))) iwl_mvm_rx_mpdu_mq(mvm, napi, rxb, 0); - else if (unlikely(pkt->hdr.group_id == DATA_PATH_GROUP && - pkt->hdr.cmd == RX_QUEUES_NOTIFICATION)) + else if (unlikely(cmd == WIDE_ID(DATA_PATH_GROUP, + RX_QUEUES_NOTIFICATION))) iwl_mvm_rx_queue_notif(mvm, rxb, 0); - else if (pkt->hdr.cmd == FRAME_RELEASE) + else if (cmd == WIDE_ID(LEGACY_GROUP, FRAME_RELEASE)) iwl_mvm_rx_frame_release(mvm, napi, rxb, 0); else iwl_mvm_rx_common(mvm, rxb, pkt); @@ -1666,13 +1668,14 @@ static void iwl_mvm_rx_mq_rss(struct iwl_op_mode *op_mode, { struct iwl_mvm *mvm = IWL_OP_MODE_GET_MVM(op_mode); struct iwl_rx_packet *pkt = rxb_addr(rxb); + u16 cmd = WIDE_ID(pkt->hdr.group_id, pkt->hdr.cmd); - if (unlikely(pkt->hdr.cmd == FRAME_RELEASE)) + if (unlikely(cmd == WIDE_ID(LEGACY_GROUP, FRAME_RELEASE))) iwl_mvm_rx_frame_release(mvm, napi, rxb, queue); - else if (unlikely(pkt->hdr.cmd == RX_QUEUES_NOTIFICATION && - pkt->hdr.group_id == DATA_PATH_GROUP)) + else if (unlikely(cmd == WIDE_ID(DATA_PATH_GROUP, + RX_QUEUES_NOTIFICATION))) iwl_mvm_rx_queue_notif(mvm, rxb, queue); - else if (likely(pkt->hdr.cmd == REPLY_RX_MPDU_CMD)) + else if (likely(cmd == WIDE_ID(LEGACY_GROUP, REPLY_RX_MPDU_CMD))) iwl_mvm_rx_mpdu_mq(mvm, napi, rxb, queue); } From 91b08c2da4b1b89c265d4c68354b1ef3564a9eeb Mon Sep 17 00:00:00 2001 From: Aviya Erenfeld Date: Sun, 27 Mar 2016 14:53:49 +0300 Subject: [PATCH 1165/1715] iwlwifi: mvm: add support for MU-MIMO air sniffer enable MU-MIMO air sniffer if it's supported by the NIC Signed-off-by: Aviya Erenfeld Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 9506e658abd3..b7d80b5fc0bf 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -720,6 +720,10 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm) if (ret) iwl_mvm_leds_exit(mvm); + if (mvm->cfg->vht_mu_mimo_supported) + wiphy_ext_feature_set(hw->wiphy, + NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER); + return ret; } @@ -2229,6 +2233,10 @@ static void iwl_mvm_bss_info_changed(struct ieee80211_hw *hw, case NL80211_IFTYPE_ADHOC: iwl_mvm_bss_info_changed_ap_ibss(mvm, vif, bss_conf, changes); break; + case NL80211_IFTYPE_MONITOR: + if (changes & BSS_CHANGED_MU_GROUPS) + iwl_mvm_update_mu_groups(mvm, vif); + break; default: /* shouldn't happen */ WARN_ON_ONCE(1); From a6c934b364948cd4de5bd9ab055bb65206ec70f3 Mon Sep 17 00:00:00 2001 From: Haim Dreyfuss Date: Tue, 2 Aug 2016 15:28:23 +0300 Subject: [PATCH 1166/1715] iwlwifi: check for valid ethernet address provided by OEM In 9000 family products we added an option to let the OEM fuse the mac address via registers. If these registers are zeroed we use the OTP address instead. Make sure that the address provided by the OEM is valid and, if not, fall back to the OTP address as well. Fixes: commit 17c867bfe89b ("iwlwifi: add support for getting HW address from CSR") Signed-off-by: Haim Dreyfuss Signed-off-by: Luca Coelho --- .../net/wireless/intel/iwlwifi/iwl-nvm-parse.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c index 43f8f7d45ddb..adba3b003f55 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c @@ -564,11 +564,16 @@ static void iwl_set_hw_address_from_csr(struct iwl_trans *trans, __le32 mac_addr0 = cpu_to_le32(iwl_read32(trans, CSR_MAC_ADDR0_STRAP)); __le32 mac_addr1 = cpu_to_le32(iwl_read32(trans, CSR_MAC_ADDR1_STRAP)); - /* If OEM did not fuse address - get it from OTP */ - if (!mac_addr0 && !mac_addr1) { - mac_addr0 = cpu_to_le32(iwl_read32(trans, CSR_MAC_ADDR0_OTP)); - mac_addr1 = cpu_to_le32(iwl_read32(trans, CSR_MAC_ADDR1_OTP)); - } + iwl_flip_hw_address(mac_addr0, mac_addr1, data->hw_addr); + /* + * If the OEM fused a valid address, use it instead of the one in the + * OTP + */ + if (is_valid_ether_addr(data->hw_addr)) + return; + + mac_addr0 = cpu_to_le32(iwl_read32(trans, CSR_MAC_ADDR0_OTP)); + mac_addr1 = cpu_to_le32(iwl_read32(trans, CSR_MAC_ADDR1_OTP)); iwl_flip_hw_address(mac_addr0, mac_addr1, data->hw_addr); } From 3ee0f0e23e4f290b2ee0a273b34c6e4f95a209a0 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Mon, 8 Aug 2016 11:51:24 +0300 Subject: [PATCH 1167/1715] iwlwifi: mvm: fix DQA AP mode station assumption There was recently a Full AP mode feature added where hostap adds the station at auth stage, and not assoc. However, when running with legacy hostapd, we get the auth response before station was added, and tx_skb_non_sta fails to allocate a queue, resulting in a complete failure of association. Take care of this situation as well. Add a warning when no valid queue is returned at all and make mvm drop the packet instead of passing it on. Refactor the function a bit while at it. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 39 ++++++++++++++++----- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index 10517778faf4..e0c9065a3ad4 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -490,16 +490,34 @@ iwl_mvm_set_tx_params(struct iwl_mvm *mvm, struct sk_buff *skb, static int iwl_mvm_get_ctrl_vif_queue(struct iwl_mvm *mvm, struct ieee80211_tx_info *info, __le16 fc) { - if (iwl_mvm_is_dqa_supported(mvm)) { - if (info->control.vif->type == NL80211_IFTYPE_AP && - ieee80211_is_probe_resp(fc)) - return IWL_MVM_DQA_AP_PROBE_RESP_QUEUE; - else if (ieee80211_is_mgmt(fc) && - info->control.vif->type == NL80211_IFTYPE_P2P_DEVICE) - return IWL_MVM_DQA_P2P_DEVICE_QUEUE; - } + if (!iwl_mvm_is_dqa_supported(mvm)) + return info->hw_queue; - return info->hw_queue; + switch (info->control.vif->type) { + case NL80211_IFTYPE_AP: + /* + * handle legacy hostapd as well, where station may be added + * only after assoc. + */ + if (ieee80211_is_probe_resp(fc) || ieee80211_is_auth(fc)) + return IWL_MVM_DQA_AP_PROBE_RESP_QUEUE; + if (info->hw_queue == info->control.vif->cab_queue) + return info->hw_queue; + + WARN_ON_ONCE(1); + return IWL_MVM_DQA_AP_PROBE_RESP_QUEUE; + case NL80211_IFTYPE_P2P_DEVICE: + if (ieee80211_is_mgmt(fc)) + return IWL_MVM_DQA_P2P_DEVICE_QUEUE; + if (info->hw_queue == info->control.vif->cab_queue) + return info->hw_queue; + + WARN_ON_ONCE(1); + return IWL_MVM_DQA_P2P_DEVICE_QUEUE; + default: + WARN_ONCE(1, "Not a ctrl vif, no available queue\n"); + return -1; + } } int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb) @@ -560,6 +578,9 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb) sta_id = mvmvif->bcast_sta.sta_id; queue = iwl_mvm_get_ctrl_vif_queue(mvm, &info, hdr->frame_control); + if (queue < 0) + return -1; + } else if (info.control.vif->type == NL80211_IFTYPE_STATION && is_multicast_ether_addr(hdr->addr1)) { u8 ap_sta_id = ACCESS_ONCE(mvmvif->ap_sta_id); From 9a73a7d24d51eaf9e43c771c53cf7b594e5b5334 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Mon, 8 Aug 2016 13:07:01 +0300 Subject: [PATCH 1168/1715] iwlwifi: mvm: support BAR in reorder buffer On default queue we will not receive frame release notification, but the BAR itself. Upon receiving the BAR driver should look at the NSSN and adjust window accordingly. Fixes: b915c10174fb ("iwlwifi: mvm: add reorder buffer per queue") Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index b3866287d1c0..0274f14e88e3 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -600,9 +600,10 @@ static bool iwl_mvm_reorder(struct iwl_mvm *mvm, mvm_sta = iwl_mvm_sta_from_mac80211(sta); - /* not a data packet */ - if (!ieee80211_is_data_qos(hdr->frame_control) || - is_multicast_ether_addr(hdr->addr1)) + /* not a data packet or a bar */ + if (!ieee80211_is_back_req(hdr->frame_control) && + (!ieee80211_is_data_qos(hdr->frame_control) || + is_multicast_ether_addr(hdr->addr1))) return false; if (unlikely(!ieee80211_is_data_present(hdr->frame_control))) @@ -626,6 +627,11 @@ static bool iwl_mvm_reorder(struct iwl_mvm *mvm, spin_lock_bh(&buffer->lock); + if (ieee80211_is_back_req(hdr->frame_control)) { + iwl_mvm_release_frames(mvm, sta, napi, buffer, nssn); + goto drop; + } + /* * If there was a significant jump in the nssn - adjust. * If the SN is smaller than the NSSN it might need to first go into From aacf8f189b03b3d3c2e577a6d4ef5872bf4d393f Mon Sep 17 00:00:00 2001 From: Avrahams Stern Date: Tue, 19 Jul 2016 11:15:09 +0300 Subject: [PATCH 1169/1715] iwlwifi: mvm: Add support for RRM by scan Implement support for RRM by adding an option to configure the scan dwell time and reporting scan start time and BSS detection time, and Advertise support for these features. Signed-off-by: David Spinadel Signed-off-by: Avraham Stern Signed-off-by: Luca Coelho --- .../net/wireless/intel/iwlwifi/iwl-fw-file.h | 5 ++ .../wireless/intel/iwlwifi/mvm/fw-api-scan.h | 20 +++--- .../net/wireless/intel/iwlwifi/mvm/mac80211.c | 10 +++ drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 6 ++ drivers/net/wireless/intel/iwlwifi/mvm/scan.c | 61 +++++++++++++++++-- 5 files changed, 88 insertions(+), 14 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-fw-file.h b/drivers/net/wireless/intel/iwlwifi/iwl-fw-file.h index 1b1e045f8907..94423f04320b 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-fw-file.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-fw-file.h @@ -256,6 +256,10 @@ typedef unsigned int __bitwise__ iwl_ucode_tlv_api_t; * instead of 3. * @IWL_UCODE_TLV_API_TX_POWER_CHAIN: TX power API has larger command size * (command version 3) that supports per-chain limits + * @IWL_UCODE_TLV_API_SCAN_TSF_REPORT: Scan start time reported in scan + * iteration complete notification, and the timestamp reported for RX + * received during scan, are reported in TSF of the mac specified in the + * scan request. * * @NUM_IWL_UCODE_TLV_API: number of bits used */ @@ -267,6 +271,7 @@ enum iwl_ucode_tlv_api { IWL_UCODE_TLV_API_NEW_VERSION = (__force iwl_ucode_tlv_api_t)20, IWL_UCODE_TLV_API_EXT_SCAN_PRIORITY = (__force iwl_ucode_tlv_api_t)24, IWL_UCODE_TLV_API_TX_POWER_CHAIN = (__force iwl_ucode_tlv_api_t)27, + IWL_UCODE_TLV_API_SCAN_TSF_REPORT = (__force iwl_ucode_tlv_api_t)28, NUM_IWL_UCODE_TLV_API #ifdef __CHECKER__ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-scan.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-scan.h index f01dab0d0dac..0c294c9f98e9 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-scan.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-scan.h @@ -7,6 +7,7 @@ * * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH + * Copyright(c) 2016 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -603,6 +604,8 @@ struct iwl_scan_req_umac_tail { * @uid: scan id, &enum iwl_umac_scan_uid_offsets * @ooc_priority: out of channel priority - &enum iwl_scan_priority * @general_flags: &enum iwl_umac_scan_general_flags + * @reserved2: for future use and alignment + * @scan_start_mac_id: report the scan start TSF time according to this mac TSF * @extended_dwell: dwell time for channels 1, 6 and 11 * @active_dwell: dwell time for active scan * @passive_dwell: dwell time for passive scan @@ -620,8 +623,10 @@ struct iwl_scan_req_umac { __le32 flags; __le32 uid; __le32 ooc_priority; - /* SCAN_GENERAL_PARAMS_API_S_VER_1 */ - __le32 general_flags; + /* SCAN_GENERAL_PARAMS_API_S_VER_4 */ + __le16 general_flags; + u8 reserved2; + u8 scan_start_mac_id; u8 extended_dwell; u8 active_dwell; u8 passive_dwell; @@ -629,7 +634,7 @@ struct iwl_scan_req_umac { __le32 max_out_time; __le32 suspend_time; __le32 scan_priority; - /* SCAN_CHANNEL_PARAMS_API_S_VER_1 */ + /* SCAN_CHANNEL_PARAMS_API_S_VER_4 */ u8 channel_flags; u8 n_channels; __le16 reserved; @@ -718,8 +723,8 @@ struct iwl_scan_offload_profiles_query { * @status: one of SCAN_COMP_STATUS_* * @bt_status: BT on/off status * @last_channel: last channel that was scanned - * @tsf_low: TSF timer (lower half) in usecs - * @tsf_high: TSF timer (higher half) in usecs + * @start_tsf: TSF timer in usecs of the scan start time for the mac specified + * in &struct iwl_scan_req_umac. * @results: array of scan results, only "scanned_channels" of them are valid */ struct iwl_umac_scan_iter_complete_notif { @@ -728,9 +733,8 @@ struct iwl_umac_scan_iter_complete_notif { u8 status; u8 bt_status; u8 last_channel; - __le32 tsf_low; - __le32 tsf_high; + __le64 start_tsf; struct iwl_scan_results_notif results[]; -} __packed; /* SCAN_ITER_COMPLETE_NTF_UMAC_API_S_VER_1 */ +} __packed; /* SCAN_ITER_COMPLETE_NTF_UMAC_API_S_VER_2 */ #endif diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index b7d80b5fc0bf..a5ede8c9bea2 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -653,6 +653,16 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm) IWL_UCODE_TLV_CAPA_WFA_TPC_REP_IE_SUPPORT)) hw->wiphy->features |= NL80211_FEATURE_WFA_TPC_IE_IN_PROBES; + if (fw_has_api(&mvm->fw->ucode_capa, + IWL_UCODE_TLV_API_SCAN_TSF_REPORT)) { + wiphy_ext_feature_set(hw->wiphy, + NL80211_EXT_FEATURE_SCAN_START_TIME); + wiphy_ext_feature_set(hw->wiphy, + NL80211_EXT_FEATURE_BSS_PARENT_TSF); + wiphy_ext_feature_set(hw->wiphy, + NL80211_EXT_FEATURE_SET_SCAN_DWELL); + } + mvm->rts_threshold = IEEE80211_MAX_RTS_THRESHOLD; #ifdef CONFIG_PM_SLEEP diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 83f3889e432b..e68a2bdc84ed 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -821,6 +821,12 @@ struct iwl_mvm { /* UMAC scan tracking */ u32 scan_uid_status[IWL_MVM_MAX_UMAC_SCANS]; + /* start time of last scan in TSF of the mac that requested the scan */ + u64 scan_start; + + /* the vif that requested the current scan */ + struct iwl_mvm_vif *scan_vif; + /* rx chain antennas set through debugfs for the scan command */ u8 scan_rx_ant; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c index dac120f8861b..00b03fc5807b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c @@ -141,6 +141,7 @@ struct iwl_mvm_scan_params { struct cfg80211_match_set *match_sets; int n_scan_plans; struct cfg80211_sched_scan_plan *scan_plans; + u32 measurement_dwell; }; static u8 iwl_mvm_scan_rx_ant(struct iwl_mvm *mvm) @@ -232,6 +233,27 @@ iwl_mvm_scan_type iwl_mvm_get_scan_type(struct iwl_mvm *mvm, bool p2p_device) return IWL_SCAN_TYPE_WILD; } +static int +iwl_mvm_get_measurement_dwell(struct iwl_mvm *mvm, + struct cfg80211_scan_request *req, + struct iwl_mvm_scan_params *params) +{ + if (!req->duration) + return 0; + + if (req->duration_mandatory && + req->duration > scan_timing[params->type].max_out_time) { + IWL_DEBUG_SCAN(mvm, + "Measurement scan - too long dwell %hu (max out time %u)\n", + req->duration, + scan_timing[params->type].max_out_time); + return -EOPNOTSUPP; + } + + return min_t(u32, (u32)req->duration, + scan_timing[params->type].max_out_time); +} + static inline bool iwl_mvm_rrm_scan_needed(struct iwl_mvm *mvm) { /* require rrm scan whenever the fw supports it */ @@ -1033,9 +1055,15 @@ static void iwl_mvm_scan_umac_dwell(struct iwl_mvm *mvm, struct iwl_scan_req_umac *cmd, struct iwl_mvm_scan_params *params) { - cmd->extended_dwell = scan_timing[params->type].dwell_extended; - cmd->active_dwell = scan_timing[params->type].dwell_active; - cmd->passive_dwell = scan_timing[params->type].dwell_passive; + if (params->measurement_dwell) { + cmd->active_dwell = params->measurement_dwell; + cmd->passive_dwell = params->measurement_dwell; + cmd->extended_dwell = params->measurement_dwell; + } else { + cmd->active_dwell = scan_timing[params->type].dwell_active; + cmd->passive_dwell = scan_timing[params->type].dwell_passive; + cmd->extended_dwell = scan_timing[params->type].dwell_extended; + } cmd->fragmented_dwell = scan_timing[params->type].dwell_fragmented; cmd->max_out_time = cpu_to_le32(scan_timing[params->type].max_out_time); cmd->suspend_time = cpu_to_le32(scan_timing[params->type].suspend_time); @@ -1067,11 +1095,11 @@ iwl_mvm_umac_scan_cfg_channels(struct iwl_mvm *mvm, } } -static u32 iwl_mvm_scan_umac_flags(struct iwl_mvm *mvm, +static u16 iwl_mvm_scan_umac_flags(struct iwl_mvm *mvm, struct iwl_mvm_scan_params *params, struct ieee80211_vif *vif) { - int flags = 0; + u16 flags = 0; if (params->n_ssids == 0) flags = IWL_UMAC_SCAN_GEN_FLAGS_PASSIVE; @@ -1093,6 +1121,9 @@ static u32 iwl_mvm_scan_umac_flags(struct iwl_mvm *mvm, if (!iwl_mvm_is_regular_scan(params)) flags |= IWL_UMAC_SCAN_GEN_FLAGS_PERIODIC; + if (params->measurement_dwell) + flags |= IWL_UMAC_SCAN_GEN_FLAGS_ITER_COMPLETE; + #ifdef CONFIG_IWLWIFI_DEBUGFS if (mvm->scan_iter_notif_enabled) flags |= IWL_UMAC_SCAN_GEN_FLAGS_ITER_COMPLETE; @@ -1119,6 +1150,7 @@ static int iwl_mvm_scan_umac(struct iwl_mvm *mvm, struct ieee80211_vif *vif, mvm->fw->ucode_capa.n_scan_channels; int uid, i; u32 ssid_bitmap = 0; + struct iwl_mvm_vif *scan_vif = iwl_mvm_vif_from_mac80211(vif); lockdep_assert_held(&mvm->mutex); @@ -1136,8 +1168,9 @@ static int iwl_mvm_scan_umac(struct iwl_mvm *mvm, struct ieee80211_vif *vif, mvm->scan_uid_status[uid] = type; cmd->uid = cpu_to_le32(uid); - cmd->general_flags = cpu_to_le32(iwl_mvm_scan_umac_flags(mvm, params, + cmd->general_flags = cpu_to_le16(iwl_mvm_scan_umac_flags(mvm, params, vif)); + cmd->scan_start_mac_id = scan_vif->id; if (type == IWL_MVM_SCAN_SCHED || type == IWL_MVM_SCAN_NETDETECT) cmd->flags = cpu_to_le32(IWL_UMAC_SCAN_FLAG_PREEMPTIVE); @@ -1289,6 +1322,12 @@ int iwl_mvm_reg_scan_start(struct iwl_mvm *mvm, struct ieee80211_vif *vif, iwl_mvm_get_scan_type(mvm, vif->type == NL80211_IFTYPE_P2P_DEVICE); + ret = iwl_mvm_get_measurement_dwell(mvm, req, ¶ms); + if (ret < 0) + return ret; + + params.measurement_dwell = ret; + iwl_mvm_build_scan_probe(mvm, vif, ies, ¶ms); if (fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_UMAC_SCAN)) { @@ -1315,6 +1354,7 @@ int iwl_mvm_reg_scan_start(struct iwl_mvm *mvm, struct ieee80211_vif *vif, IWL_DEBUG_SCAN(mvm, "Scan request was sent successfully\n"); mvm->scan_status |= IWL_MVM_SCAN_REGULAR; + mvm->scan_vif = iwl_mvm_vif_from_mac80211(vif); iwl_mvm_ref(mvm, IWL_MVM_REF_SCAN); queue_delayed_work(system_wq, &mvm->scan_timeout_dwork, @@ -1437,9 +1477,12 @@ void iwl_mvm_rx_umac_scan_complete_notif(struct iwl_mvm *mvm, if (mvm->scan_uid_status[uid] == IWL_MVM_SCAN_REGULAR) { struct cfg80211_scan_info info = { .aborted = aborted, + .scan_start_tsf = mvm->scan_start, }; + memcpy(info.tsf_bssid, mvm->scan_vif->bssid, ETH_ALEN); ieee80211_scan_completed(mvm->hw, &info); + mvm->scan_vif = NULL; iwl_mvm_unref(mvm, IWL_MVM_REF_SCAN); cancel_delayed_work(&mvm->scan_timeout_dwork); } else if (mvm->scan_uid_status[uid] == IWL_MVM_SCAN_SCHED) { @@ -1473,6 +1516,8 @@ void iwl_mvm_rx_umac_scan_iter_complete_notif(struct iwl_mvm *mvm, struct iwl_umac_scan_iter_complete_notif *notif = (void *)pkt->data; u8 buf[256]; + mvm->scan_start = le64_to_cpu(notif->start_tsf); + IWL_DEBUG_SCAN(mvm, "UMAC Scan iteration complete: status=0x%x scanned_channels=%d channels list: %s\n", notif->status, notif->scanned_channels, @@ -1485,6 +1530,10 @@ void iwl_mvm_rx_umac_scan_iter_complete_notif(struct iwl_mvm *mvm, ieee80211_sched_scan_results(mvm->hw); mvm->sched_scan_pass_all = SCHED_SCAN_PASS_ALL_ENABLED; } + + IWL_DEBUG_SCAN(mvm, + "UMAC Scan iteration complete: scan started at %llu (TSF)\n", + mvm->scan_start); } static int iwl_mvm_umac_scan_abort(struct iwl_mvm *mvm, int type) From 4857d6cbf7ca4e040ac2b24687464c76e0be96ff Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Tue, 9 Aug 2016 20:03:52 +0300 Subject: [PATCH 1170/1715] iwlwifi: mvm: support packet injection For automatic testing packet injection can be useful. Support injection through debugfs. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- .../net/wireless/intel/iwlwifi/mvm/debugfs.c | 55 +++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c index b34489817c70..540b7c9deaef 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c @@ -917,6 +917,59 @@ static ssize_t iwl_dbgfs_indirection_tbl_write(struct iwl_mvm *mvm, return ret ?: count; } +static ssize_t iwl_dbgfs_inject_packet_write(struct iwl_mvm *mvm, + char *buf, size_t count, + loff_t *ppos) +{ + struct iwl_rx_cmd_buffer rxb = { + ._rx_page_order = 0, + .truesize = 0, /* not used */ + ._offset = 0, + }; + struct iwl_rx_packet *pkt; + struct iwl_rx_mpdu_desc *desc; + int bin_len = count / 2; + int ret = -EINVAL; + + /* supporting only 9000 descriptor */ + if (!mvm->trans->cfg->mq_rx_supported) + return -ENOTSUPP; + + rxb._page = alloc_pages(GFP_ATOMIC, 0); + if (!rxb._page) + return -ENOMEM; + pkt = rxb_addr(&rxb); + + ret = hex2bin(page_address(rxb._page), buf, bin_len); + if (ret) + goto out; + + /* avoid invalid memory access */ + if (bin_len < sizeof(*pkt) + sizeof(*desc)) + goto out; + + /* check this is RX packet */ + if (WIDE_ID(pkt->hdr.group_id, pkt->hdr.cmd) != + WIDE_ID(LEGACY_GROUP, REPLY_RX_MPDU_CMD)) + goto out; + + /* check the length in metadata matches actual received length */ + desc = (void *)pkt->data; + if (le16_to_cpu(desc->mpdu_len) != + (bin_len - sizeof(*desc) - sizeof(*pkt))) + goto out; + + local_bh_disable(); + iwl_mvm_rx_mpdu_mq(mvm, NULL, &rxb, 0); + local_bh_enable(); + ret = 0; + +out: + iwl_free_rxb(&rxb); + + return ret ?: count; +} + static ssize_t iwl_dbgfs_fw_dbg_conf_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) @@ -1454,6 +1507,7 @@ MVM_DEBUGFS_WRITE_FILE_OPS(cont_recording, 8); MVM_DEBUGFS_WRITE_FILE_OPS(max_amsdu_len, 8); MVM_DEBUGFS_WRITE_FILE_OPS(indirection_tbl, (IWL_RSS_INDIRECTION_TABLE_SIZE * 2)); +MVM_DEBUGFS_WRITE_FILE_OPS(inject_packet, 512); #ifdef CONFIG_IWLWIFI_BCAST_FILTERING MVM_DEBUGFS_READ_WRITE_FILE_OPS(bcast_filters, 256); @@ -1502,6 +1556,7 @@ int iwl_mvm_dbgfs_register(struct iwl_mvm *mvm, struct dentry *dbgfs_dir) MVM_DEBUGFS_ADD_FILE(send_echo_cmd, mvm->debugfs_dir, S_IWUSR); MVM_DEBUGFS_ADD_FILE(cont_recording, mvm->debugfs_dir, S_IWUSR); MVM_DEBUGFS_ADD_FILE(indirection_tbl, mvm->debugfs_dir, S_IWUSR); + MVM_DEBUGFS_ADD_FILE(inject_packet, mvm->debugfs_dir, S_IWUSR); if (!debugfs_create_bool("enable_scan_iteration_notif", S_IRUSR | S_IWUSR, mvm->debugfs_dir, From 671bed3fbeecb5e8392ed054d2cab78ed66290ff Mon Sep 17 00:00:00 2001 From: Arik Nemtsov Date: Sun, 7 Aug 2016 18:58:29 +0300 Subject: [PATCH 1171/1715] iwlwifi: move BIOS MCC retrieval to common code This will be used by more than MVM, so move it to iwlwifi While at it, rename WRD_METHOD to the more appropriate WRDD_METHOD and add some documentation. Signed-off-by: Arik Nemtsov Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- .../wireless/intel/iwlwifi/iwl-nvm-parse.c | 89 ++++++++++++++++++ .../wireless/intel/iwlwifi/iwl-nvm-parse.h | 20 +++- drivers/net/wireless/intel/iwlwifi/mvm/nvm.c | 93 +------------------ 3 files changed, 109 insertions(+), 93 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c index adba3b003f55..3bd6fc1b76d4 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c @@ -67,6 +67,7 @@ #include #include #include +#include #include "iwl-drv.h" #include "iwl-modparams.h" #include "iwl-nvm-parse.h" @@ -904,3 +905,91 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg, return regd; } IWL_EXPORT_SYMBOL(iwl_parse_nvm_mcc_info); + +#ifdef CONFIG_ACPI +#define WRDD_METHOD "WRDD" +#define WRDD_WIFI (0x07) +#define WRDD_WIGIG (0x10) + +static u32 iwl_wrdd_get_mcc(struct device *dev, union acpi_object *wrdd) +{ + union acpi_object *mcc_pkg, *domain_type, *mcc_value; + u32 i; + + if (wrdd->type != ACPI_TYPE_PACKAGE || + wrdd->package.count < 2 || + wrdd->package.elements[0].type != ACPI_TYPE_INTEGER || + wrdd->package.elements[0].integer.value != 0) { + IWL_DEBUG_EEPROM(dev, "Unsupported wrdd structure\n"); + return 0; + } + + for (i = 1 ; i < wrdd->package.count ; ++i) { + mcc_pkg = &wrdd->package.elements[i]; + + if (mcc_pkg->type != ACPI_TYPE_PACKAGE || + mcc_pkg->package.count < 2 || + mcc_pkg->package.elements[0].type != ACPI_TYPE_INTEGER || + mcc_pkg->package.elements[1].type != ACPI_TYPE_INTEGER) { + mcc_pkg = NULL; + continue; + } + + domain_type = &mcc_pkg->package.elements[0]; + if (domain_type->integer.value == WRDD_WIFI) + break; + + mcc_pkg = NULL; + } + + if (mcc_pkg) { + mcc_value = &mcc_pkg->package.elements[1]; + return mcc_value->integer.value; + } + + return 0; +} + +int iwl_get_bios_mcc(struct device *dev, char *mcc) +{ + acpi_handle root_handle; + acpi_handle handle; + struct acpi_buffer wrdd = {ACPI_ALLOCATE_BUFFER, NULL}; + acpi_status status; + u32 mcc_val; + + root_handle = ACPI_HANDLE(dev); + if (!root_handle) { + IWL_DEBUG_EEPROM(dev, + "Could not retrieve root port ACPI handle\n"); + return -ENOENT; + } + + /* Get the method's handle */ + status = acpi_get_handle(root_handle, (acpi_string)WRDD_METHOD, + &handle); + if (ACPI_FAILURE(status)) { + IWL_DEBUG_EEPROM(dev, "WRD method not found\n"); + return -ENOENT; + } + + /* Call WRDD with no arguments */ + status = acpi_evaluate_object(handle, NULL, NULL, &wrdd); + if (ACPI_FAILURE(status)) { + IWL_DEBUG_EEPROM(dev, "WRDC invocation failed (0x%x)\n", + status); + return -ENOENT; + } + + mcc_val = iwl_wrdd_get_mcc(dev, wrdd.pointer); + kfree(wrdd.pointer); + if (!mcc_val) + return -ENOENT; + + mcc[0] = (mcc_val >> 8) & 0xff; + mcc[1] = mcc_val & 0xff; + mcc[2] = '\0'; + return 0; +} +IWL_EXPORT_SYMBOL(iwl_get_bios_mcc); +#endif diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h index d704d52aa7ec..7249e5b403f4 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h @@ -5,7 +5,8 @@ * * GPL LICENSE SUMMARY * - * Copyright(c) 2008 - 2014 Intel Corporation. All rights reserved. + * Copyright(c) 2008 - 2015 Intel Corporation. All rights reserved. + * Copyright(c) 2016 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -93,4 +94,21 @@ struct ieee80211_regdomain * iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg, int num_of_ch, __le32 *channels, u16 fw_mcc); +#ifdef CONFIG_ACPI +/** + * iwl_get_bios_mcc - read MCC from BIOS, if available + * + * @dev: the struct device + * @mcc: output buffer (3 bytes) that will get the MCC + * + * This function tries to read the current MCC from ACPI if available. + */ +int iwl_get_bios_mcc(struct device *dev, char *mcc); +#else +static inline int iwl_get_bios_mcc(struct device *dev, char *mcc) +{ + return -ENOENT; +} +#endif + #endif /* __iwl_nvm_parse_h__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c index 7a686f67f007..eade099b6dbf 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c @@ -66,7 +66,6 @@ *****************************************************************************/ #include #include -#include #include "iwl-trans.h" #include "iwl-csr.h" #include "mvm.h" @@ -751,96 +750,6 @@ exit: return resp_cp; } -#ifdef CONFIG_ACPI -#define WRD_METHOD "WRDD" -#define WRDD_WIFI (0x07) -#define WRDD_WIGIG (0x10) - -static u32 iwl_mvm_wrdd_get_mcc(struct iwl_mvm *mvm, union acpi_object *wrdd) -{ - union acpi_object *mcc_pkg, *domain_type, *mcc_value; - u32 i; - - if (wrdd->type != ACPI_TYPE_PACKAGE || - wrdd->package.count < 2 || - wrdd->package.elements[0].type != ACPI_TYPE_INTEGER || - wrdd->package.elements[0].integer.value != 0) { - IWL_DEBUG_LAR(mvm, "Unsupported wrdd structure\n"); - return 0; - } - - for (i = 1 ; i < wrdd->package.count ; ++i) { - mcc_pkg = &wrdd->package.elements[i]; - - if (mcc_pkg->type != ACPI_TYPE_PACKAGE || - mcc_pkg->package.count < 2 || - mcc_pkg->package.elements[0].type != ACPI_TYPE_INTEGER || - mcc_pkg->package.elements[1].type != ACPI_TYPE_INTEGER) { - mcc_pkg = NULL; - continue; - } - - domain_type = &mcc_pkg->package.elements[0]; - if (domain_type->integer.value == WRDD_WIFI) - break; - - mcc_pkg = NULL; - } - - if (mcc_pkg) { - mcc_value = &mcc_pkg->package.elements[1]; - return mcc_value->integer.value; - } - - return 0; -} - -static int iwl_mvm_get_bios_mcc(struct iwl_mvm *mvm, char *mcc) -{ - acpi_handle root_handle; - acpi_handle handle; - struct acpi_buffer wrdd = {ACPI_ALLOCATE_BUFFER, NULL}; - acpi_status status; - u32 mcc_val; - - root_handle = ACPI_HANDLE(mvm->dev); - if (!root_handle) { - IWL_DEBUG_LAR(mvm, - "Could not retrieve root port ACPI handle\n"); - return -ENOENT; - } - - /* Get the method's handle */ - status = acpi_get_handle(root_handle, (acpi_string)WRD_METHOD, &handle); - if (ACPI_FAILURE(status)) { - IWL_DEBUG_LAR(mvm, "WRD method not found\n"); - return -ENOENT; - } - - /* Call WRDD with no arguments */ - status = acpi_evaluate_object(handle, NULL, NULL, &wrdd); - if (ACPI_FAILURE(status)) { - IWL_DEBUG_LAR(mvm, "WRDC invocation failed (0x%x)\n", status); - return -ENOENT; - } - - mcc_val = iwl_mvm_wrdd_get_mcc(mvm, wrdd.pointer); - kfree(wrdd.pointer); - if (!mcc_val) - return -ENOENT; - - mcc[0] = (mcc_val >> 8) & 0xff; - mcc[1] = mcc_val & 0xff; - mcc[2] = '\0'; - return 0; -} -#else /* CONFIG_ACPI */ -static int iwl_mvm_get_bios_mcc(struct iwl_mvm *mvm, char *mcc) -{ - return -ENOENT; -} -#endif - int iwl_mvm_init_mcc(struct iwl_mvm *mvm) { bool tlv_lar; @@ -884,7 +793,7 @@ int iwl_mvm_init_mcc(struct iwl_mvm *mvm) return -EIO; if (iwl_mvm_is_wifi_mcc_supported(mvm) && - !iwl_mvm_get_bios_mcc(mvm, mcc)) { + !iwl_get_bios_mcc(mvm->dev, mcc)) { kfree(regd); regd = iwl_mvm_get_regdomain(mvm->hw->wiphy, mcc, MCC_SOURCE_BIOS, NULL); From 35177c9931465e06379729d1ef2a22628ac9cbdf Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Mon, 15 Aug 2016 17:13:27 +0300 Subject: [PATCH 1172/1715] iwlwifi: pcie: log full command sequence Log group as well. Remove 0x prefix to match TX logging. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/rx.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c index 0b5b331010fb..78cb6d2314d9 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c @@ -1108,13 +1108,14 @@ static void iwl_pcie_rx_handle_rb(struct iwl_trans *trans, FH_RSCSR_RXQ_POS != rxq->id); IWL_DEBUG_RX(trans, - "cmd at offset %d: %s (0x%.2x, seq 0x%x)\n", + "cmd at offset %d: %s (%.2x.%2x, seq 0x%x)\n", rxcb._offset, iwl_get_cmd_string(trans, iwl_cmd_id(pkt->hdr.cmd, pkt->hdr.group_id, 0)), - pkt->hdr.cmd, le16_to_cpu(pkt->hdr.sequence)); + pkt->hdr.group_id, pkt->hdr.cmd, + le16_to_cpu(pkt->hdr.sequence)); len = iwl_rx_packet_len(pkt); len += sizeof(u32); /* account for status word */ From 607876f1dac6c644d83964f3f4691e79737b5bb2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 18 Aug 2016 10:32:58 +0200 Subject: [PATCH 1173/1715] iwlwifi: mvm: make iwl_mvm_update_sta() an inline There's no point in making this an out-of-line function since it just calls a single other function with a few changed parameters. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 7 ------- drivers/net/wireless/intel/iwlwifi/mvm/sta.h | 11 ++++++++--- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index d3a0378b547f..258a234feb71 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -1297,13 +1297,6 @@ err: return ret; } -int iwl_mvm_update_sta(struct iwl_mvm *mvm, - struct ieee80211_vif *vif, - struct ieee80211_sta *sta) -{ - return iwl_mvm_sta_send_to_fw(mvm, sta, true, 0); -} - int iwl_mvm_drain_sta(struct iwl_mvm *mvm, struct iwl_mvm_sta *mvmsta, bool drain) { diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h index 709542bbfce5..e068d5355865 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h @@ -473,9 +473,14 @@ int iwl_mvm_sta_send_to_fw(struct iwl_mvm *mvm, struct ieee80211_sta *sta, int iwl_mvm_add_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif, struct ieee80211_sta *sta); -int iwl_mvm_update_sta(struct iwl_mvm *mvm, - struct ieee80211_vif *vif, - struct ieee80211_sta *sta); + +static inline int iwl_mvm_update_sta(struct iwl_mvm *mvm, + struct ieee80211_vif *vif, + struct ieee80211_sta *sta) +{ + return iwl_mvm_sta_send_to_fw(mvm, sta, true, 0); +} + int iwl_mvm_rm_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif, struct ieee80211_sta *sta); From 5b88792cd8505d3804be199c10b3c4159fecb258 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Mon, 15 Aug 2016 17:36:47 +0300 Subject: [PATCH 1174/1715] iwlwifi: move to wide ID for all commands Due to firmware design considerations, move to wide ID for all commands. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-notif-wait.c | 8 ++++++-- drivers/net/wireless/intel/iwlwifi/iwl-trans.c | 3 +++ drivers/net/wireless/intel/iwlwifi/iwl-trans.h | 5 +++-- drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 4 ++-- drivers/net/wireless/intel/iwlwifi/pcie/internal.h | 2 -- drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 1 - drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 2 +- 7 files changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-notif-wait.c b/drivers/net/wireless/intel/iwlwifi/iwl-notif-wait.c index 8aa1f2b7fdfc..88f260db3744 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-notif-wait.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-notif-wait.c @@ -99,8 +99,12 @@ void iwl_notification_wait_notify(struct iwl_notif_wait_data *notif_wait, continue; for (i = 0; i < w->n_cmds; i++) { - if (w->cmds[i] == - WIDE_ID(pkt->hdr.group_id, pkt->hdr.cmd)) { + u16 rec_id = WIDE_ID(pkt->hdr.group_id, + pkt->hdr.cmd); + + if (w->cmds[i] == rec_id || + (!iwl_cmd_groupid(w->cmds[i]) && + DEF_ID(w->cmds[i]) == rec_id)) { found = true; break; } diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.c b/drivers/net/wireless/intel/iwlwifi/iwl-trans.c index b0bd67c64b5c..b42152c697be 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.c @@ -140,6 +140,9 @@ int iwl_trans_send_cmd(struct iwl_trans *trans, struct iwl_host_cmd *cmd) if (!(cmd->flags & CMD_ASYNC)) lock_map_acquire_read(&trans->sync_cmd_lockdep_map); + if (trans->wide_cmd_header && !iwl_cmd_groupid(cmd->id)) + cmd->id = DEF_ID(cmd->id); + ret = trans->ops->send_cmd(trans, cmd); if (!(cmd->flags & CMD_ASYNC)) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h index 04e998d9d5fe..0296124a7f9c 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h @@ -153,6 +153,7 @@ static inline u32 iwl_cmd_id(u8 opcode, u8 groupid, u8 version) /* make u16 wide id out of u8 group and opcode */ #define WIDE_ID(grp, opcode) ((grp << 8) | opcode) +#define DEF_ID(opcode) ((1 << 8) | (opcode)) /* due to the conversion, this group is special; new groups * should be defined in the appropriate fw-api header files @@ -484,7 +485,6 @@ struct iwl_hcmd_arr { * @bc_table_dword: set to true if the BC table expects the byte count to be * in DWORD (as opposed to bytes) * @scd_set_active: should the transport configure the SCD for HCMD queue - * @wide_cmd_header: firmware supports wide host command header * @sw_csum_tx: transport should compute the TCP checksum * @command_groups: array of command groups, each member is an array of the * commands in the group; for debugging only @@ -506,7 +506,6 @@ struct iwl_trans_config { enum iwl_amsdu_size rx_buf_size; bool bc_table_dword; bool scd_set_active; - bool wide_cmd_header; bool sw_csum_tx; const struct iwl_hcmd_arr *command_groups; int command_groups_size; @@ -770,6 +769,7 @@ enum iwl_plat_pm_mode { * @hw_id_str: a string with info about HW ID. Set during transport allocation. * @pm_support: set to true in start_hw if link pm is supported * @ltr_enabled: set to true if the LTR is enabled + * @wide_cmd_header: true when ucode supports wide command header format * @num_rx_queues: number of RX queues allocated by the transport; * the transport must set this before calling iwl_drv_start() * @dev_cmd_pool: pool for Tx cmd allocation - for internal use only. @@ -821,6 +821,7 @@ struct iwl_trans { const struct iwl_hcmd_arr *command_groups; int command_groups_size; + bool wide_cmd_header; u8 num_rx_queues; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index de34c9f4f0f4..b2d8722cbb11 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -652,8 +652,8 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg, /* the hardware splits the A-MSDU */ if (mvm->cfg->mq_rx_supported) trans_cfg.rx_buf_size = IWL_AMSDU_4K; - trans_cfg.wide_cmd_header = fw_has_api(&mvm->fw->ucode_capa, - IWL_UCODE_TLV_API_WIDE_CMD_HDR); + trans->wide_cmd_header = fw_has_api(&mvm->fw->ucode_capa, + IWL_UCODE_TLV_API_WIDE_CMD_HDR); if (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_DW_BC_TABLE) trans_cfg.bc_table_dword = true; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h index 987a0770fb5b..2d81630fba0f 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h +++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h @@ -332,7 +332,6 @@ enum iwl_shared_irq_flags { * @rx_buf_size: Rx buffer size * @bc_table_dword: true if the BC table expects DWORD (as opposed to bytes) * @scd_set_active: should the transport configure the SCD for HCMD queue - * @wide_cmd_header: true when ucode supports wide command header format * @sw_csum_tx: if true, then the transport will compute the csum of the TXed * frame. * @rx_page_order: page order for receive buffer size @@ -405,7 +404,6 @@ struct iwl_trans_pcie { enum iwl_amsdu_size rx_buf_size; bool bc_table_dword; bool scd_set_active; - bool wide_cmd_header; bool sw_csum_tx; u32 rx_page_order; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 74199b174948..68fa84381343 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -1755,7 +1755,6 @@ static void iwl_trans_pcie_configure(struct iwl_trans *trans, trans_pcie->rx_page_order = iwl_trans_get_rb_size_order(trans_pcie->rx_buf_size); - trans_pcie->wide_cmd_header = trans_cfg->wide_cmd_header; trans_pcie->bc_table_dword = trans_cfg->bc_table_dword; trans_pcie->scd_set_active = trans_cfg->scd_set_active; trans_pcie->sw_csum_tx = trans_cfg->sw_csum_tx; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index 0d156fddbf3d..e00e7d8f197a 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -1493,7 +1493,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, const u8 *cmddata[IWL_MAX_CMD_TBS_PER_TFD]; u16 cmdlen[IWL_MAX_CMD_TBS_PER_TFD]; - if (WARN(!trans_pcie->wide_cmd_header && + if (WARN(!trans->wide_cmd_header && group_id > IWL_ALWAYS_LONG_GROUP, "unsupported wide command %#x\n", cmd->id)) return -EINVAL; From 8ec8ed437936ea40427ac2777dd4b65c87009d0a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 18 Aug 2016 14:18:40 +0200 Subject: [PATCH 1175/1715] iwlwifi: mvm: document passing unexpected Block Ack Request frames When we get an unexpected Block Ack Request frame, the BAID from the hardware will be invalid, and we'll pass it to mac80211 for further handling (sending a delBA action frame.) Add a comment explaining that, in case anyone looks in the future. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 0274f14e88e3..a57c6ef5bc14 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -591,6 +591,11 @@ static bool iwl_mvm_reorder(struct iwl_mvm *mvm, baid = (reorder & IWL_RX_MPDU_REORDER_BAID_MASK) >> IWL_RX_MPDU_REORDER_BAID_SHIFT; + /* + * This also covers the case of receiving a Block Ack Request + * outside a BA session; we'll pass it to mac80211 and that + * then sends a delBA action frame. + */ if (baid == IWL_RX_REORDER_DATA_INVALID_BAID) return false; From de9466499b1ce705808bbc7876a9ee929b13d5bc Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Mon, 22 Aug 2016 23:24:40 +0300 Subject: [PATCH 1176/1715] iwlwifi: don't export trace points that are used in iwlwifi only This can save a few bytes Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-devtrace.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-devtrace.c b/drivers/net/wireless/intel/iwlwifi/iwl-devtrace.c index 1d9dd153ef1c..50510fb6ab8c 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-devtrace.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-devtrace.c @@ -33,9 +33,6 @@ #define CREATE_TRACE_POINTS #include "iwl-devtrace.h" -EXPORT_TRACEPOINT_SYMBOL(iwlwifi_dev_iowrite8); -EXPORT_TRACEPOINT_SYMBOL(iwlwifi_dev_ioread32); -EXPORT_TRACEPOINT_SYMBOL(iwlwifi_dev_iowrite32); EXPORT_TRACEPOINT_SYMBOL(iwlwifi_dev_ucode_event); EXPORT_TRACEPOINT_SYMBOL(iwlwifi_dev_ucode_error); EXPORT_TRACEPOINT_SYMBOL(iwlwifi_dev_ucode_cont_event); From 341d7eb8223bdd48bdf75729487a2de5e01623b3 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Thu, 18 Aug 2016 20:14:38 +0300 Subject: [PATCH 1177/1715] iwlwifi: mvm: disable P2P queue on mac context release AP queue is properly released, but P2P queue isn't. Fixes: commit 4c965139a3cd ("iwlwifi: mvm: support p2p device frames tx on dqa queue #2") Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c index 69c42ce45b8a..d742d27d8de0 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c @@ -539,6 +539,11 @@ void iwl_mvm_mac_ctxt_release(struct iwl_mvm *mvm, struct ieee80211_vif *vif) iwl_mvm_disable_txq(mvm, IWL_MVM_OFFCHANNEL_QUEUE, IWL_MVM_OFFCHANNEL_QUEUE, IWL_MAX_TID_COUNT, 0); + else + iwl_mvm_disable_txq(mvm, + IWL_MVM_DQA_P2P_DEVICE_QUEUE, + vif->hw_queue[0], IWL_MAX_TID_COUNT, + 0); break; case NL80211_IFTYPE_AP: From 5a41a86c525a53d31a229dd358f70d6b4b5a7bbc Mon Sep 17 00:00:00 2001 From: Sharon Dvir Date: Wed, 10 Aug 2016 09:05:48 +0300 Subject: [PATCH 1178/1715] iwlwifi: migrate to devm_* API Change PCIE and trans resource allocations to managed resources. Signed-off-by: Sharon Dvir Signed-off-by: Luca Coelho --- .../net/wireless/intel/iwlwifi/iwl-trans.c | 8 +- .../net/wireless/intel/iwlwifi/pcie/trans.c | 79 ++++++++----------- 2 files changed, 33 insertions(+), 54 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.c b/drivers/net/wireless/intel/iwlwifi/iwl-trans.c index b42152c697be..d42cab291025 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.c @@ -78,7 +78,7 @@ struct iwl_trans *iwl_trans_alloc(unsigned int priv_size, static struct lock_class_key __key; #endif - trans = kzalloc(sizeof(*trans) + priv_size, GFP_KERNEL); + trans = devm_kzalloc(dev, sizeof(*trans) + priv_size, GFP_KERNEL); if (!trans) return NULL; @@ -103,18 +103,14 @@ struct iwl_trans *iwl_trans_alloc(unsigned int priv_size, SLAB_HWCACHE_ALIGN, NULL); if (!trans->dev_cmd_pool) - goto free; + return NULL; return trans; - free: - kfree(trans); - return NULL; } void iwl_trans_free(struct iwl_trans *trans) { kmem_cache_destroy(trans->dev_cmd_pool); - kfree(trans); } int iwl_trans_send_cmd(struct iwl_trans *trans, struct iwl_host_cmd *cmd) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 68fa84381343..ae95533e587d 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -1605,24 +1605,22 @@ static int iwl_pcie_init_msix_handler(struct pci_dev *pdev, for (i = 0; i < trans_pcie->alloc_vecs; i++) { int ret; + struct msix_entry *msix_entry; - ret = request_threaded_irq(trans_pcie->msix_entries[i].vector, - iwl_pcie_msix_isr, - (i == trans_pcie->def_irq) ? - iwl_pcie_irq_msix_handler : - iwl_pcie_irq_rx_msix_handler, - IRQF_SHARED, - DRV_NAME, - &trans_pcie->msix_entries[i]); + msix_entry = &trans_pcie->msix_entries[i]; + ret = devm_request_threaded_irq(&pdev->dev, + msix_entry->vector, + iwl_pcie_msix_isr, + (i == trans_pcie->def_irq) ? + iwl_pcie_irq_msix_handler : + iwl_pcie_irq_rx_msix_handler, + IRQF_SHARED, + DRV_NAME, + msix_entry); if (ret) { - int j; - IWL_ERR(trans_pcie->trans, "Error allocating IRQ %d\n", i); - for (j = 0; j < i; j++) - free_irq(trans_pcie->msix_entries[j].vector, - &trans_pcie->msix_entries[j]); - pci_disable_msix(pdev); + return ret; } } @@ -1789,23 +1787,12 @@ void iwl_trans_pcie_free(struct iwl_trans *trans) irq_set_affinity_hint( trans_pcie->msix_entries[i].vector, NULL); - - free_irq(trans_pcie->msix_entries[i].vector, - &trans_pcie->msix_entries[i]); } - pci_disable_msix(trans_pcie->pci_dev); trans_pcie->msix_enabled = false; } else { - free_irq(trans_pcie->pci_dev->irq, trans); - iwl_pcie_free_ict(trans); - - pci_disable_msi(trans_pcie->pci_dev); } - iounmap(trans_pcie->hw_base); - pci_release_regions(trans_pcie->pci_dev); - pci_disable_device(trans_pcie->pci_dev); iwl_pcie_free_fw_monitor(trans); @@ -2912,6 +2899,10 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, struct iwl_trans *trans; int ret, addr_size; + ret = pcim_enable_device(pdev); + if (ret) + return ERR_PTR(ret); + trans = iwl_trans_alloc(sizeof(struct iwl_trans_pcie), &pdev->dev, cfg, &trans_ops_pcie, 0); if (!trans) @@ -2930,9 +2921,6 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, goto out_no_pci; } - ret = pci_enable_device(pdev); - if (ret) - goto out_no_pci; if (!cfg->base_params->pcie_l1_allowed) { /* @@ -2974,21 +2962,21 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, /* both attempts failed: */ if (ret) { dev_err(&pdev->dev, "No suitable DMA available\n"); - goto out_pci_disable_device; + goto out_no_pci; } } - ret = pci_request_regions(pdev, DRV_NAME); + ret = pcim_iomap_regions_request_all(pdev, BIT(0), DRV_NAME); if (ret) { - dev_err(&pdev->dev, "pci_request_regions failed\n"); - goto out_pci_disable_device; + dev_err(&pdev->dev, "pcim_iomap_regions_request_all failed\n"); + goto out_no_pci; } - trans_pcie->hw_base = pci_ioremap_bar(pdev, 0); + trans_pcie->hw_base = pcim_iomap_table(pdev)[0]; if (!trans_pcie->hw_base) { - dev_err(&pdev->dev, "pci_ioremap_bar failed\n"); + dev_err(&pdev->dev, "pcim_iomap_table failed\n"); ret = -ENODEV; - goto out_pci_release_regions; + goto out_no_pci; } /* We disable the RETRY_TIMEOUT register (0x41) to keep @@ -3015,7 +3003,7 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, ret = iwl_pcie_prepare_card_hw(trans); if (ret) { IWL_WARN(trans, "Exit HW not ready\n"); - goto out_pci_disable_msi; + goto out_no_pci; } /* @@ -3032,7 +3020,7 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, 25000); if (ret < 0) { IWL_DEBUG_INFO(trans, "Failed to wake up the nic\n"); - goto out_pci_disable_msi; + goto out_no_pci; } if (iwl_trans_grab_nic_access(trans, &flags)) { @@ -3064,15 +3052,16 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, if (trans_pcie->msix_enabled) { if (iwl_pcie_init_msix_handler(pdev, trans_pcie)) - goto out_pci_release_regions; + goto out_no_pci; } else { ret = iwl_pcie_alloc_ict(trans); if (ret) - goto out_pci_disable_msi; + goto out_no_pci; - ret = request_threaded_irq(pdev->irq, iwl_pcie_isr, - iwl_pcie_irq_handler, - IRQF_SHARED, DRV_NAME, trans); + ret = devm_request_threaded_irq(&pdev->dev, pdev->irq, + iwl_pcie_isr, + iwl_pcie_irq_handler, + IRQF_SHARED, DRV_NAME, trans); if (ret) { IWL_ERR(trans, "Error allocating IRQ %d\n", pdev->irq); goto out_free_ict; @@ -3090,12 +3079,6 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, out_free_ict: iwl_pcie_free_ict(trans); -out_pci_disable_msi: - pci_disable_msi(pdev); -out_pci_release_regions: - pci_release_regions(pdev); -out_pci_disable_device: - pci_disable_device(pdev); out_no_pci: free_percpu(trans_pcie->tso_hdr_page); iwl_trans_free(trans); From b3bee580b1e200e4fc14091e3e118ae8280dc06c Mon Sep 17 00:00:00 2001 From: Roee Zamir Date: Tue, 2 Aug 2016 13:55:13 +0300 Subject: [PATCH 1179/1715] iwlwifi: mvm: Add debugfs function for clocks diff New function, reveals the diff between gp2 and host time. Signed-off-by: Roee Zamir Signed-off-by: Luca Coelho --- .../wireless/intel/iwlwifi/mvm/debugfs-vif.c | 26 +++++++++++++++++++ drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 1 + .../net/wireless/intel/iwlwifi/mvm/utils.c | 22 ++++++++++++++++ 3 files changed, 49 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c index b23271755daf..8ff19210ef1e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c @@ -504,6 +504,28 @@ static inline char *iwl_dbgfs_is_match(char *name, char *buf) return !strncmp(name, buf, len) ? buf + len : NULL; } +static ssize_t iwl_dbgfs_os_device_timediff_read(struct file *file, + char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct ieee80211_vif *vif = file->private_data; + struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); + struct iwl_mvm *mvm = mvmvif->mvm; + u32 curr_gp2; + u64 curr_os; + s64 diff; + char buf[64]; + const size_t bufsz = sizeof(buf); + int pos = 0; + + iwl_mvm_get_sync_time(mvm, &curr_gp2, &curr_os); + do_div(curr_os, NSEC_PER_USEC); + diff = curr_os - curr_gp2; + pos += scnprintf(buf + pos, bufsz - pos, "diff=%lld\n", diff); + + return simple_read_from_buffer(user_buf, count, ppos, buf, pos); +} + static ssize_t iwl_dbgfs_tof_enable_write(struct ieee80211_vif *vif, char *buf, size_t count, loff_t *ppos) @@ -1530,6 +1552,8 @@ MVM_DEBUGFS_READ_FILE_OPS(tof_range_response); MVM_DEBUGFS_READ_WRITE_FILE_OPS(tof_responder_params, 32); MVM_DEBUGFS_READ_WRITE_FILE_OPS(quota_min, 32); MVM_DEBUGFS_WRITE_FILE_OPS(lqm_send_cmd, 64); +MVM_DEBUGFS_READ_FILE_OPS(os_device_timediff); + void iwl_mvm_vif_dbgfs_register(struct iwl_mvm *mvm, struct ieee80211_vif *vif) { @@ -1570,6 +1594,8 @@ void iwl_mvm_vif_dbgfs_register(struct iwl_mvm *mvm, struct ieee80211_vif *vif) MVM_DEBUGFS_ADD_FILE_VIF(quota_min, mvmvif->dbgfs_dir, S_IRUSR | S_IWUSR); MVM_DEBUGFS_ADD_FILE_VIF(lqm_send_cmd, mvmvif->dbgfs_dir, S_IWUSR); + MVM_DEBUGFS_ADD_FILE_VIF(os_device_timediff, + mvmvif->dbgfs_dir, S_IRUSR); if (vif->type == NL80211_IFTYPE_STATION && !vif->p2p && mvmvif == mvm->bf_allowed_vif) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index e68a2bdc84ed..b7cfdcbcf95b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -1268,6 +1268,7 @@ u8 iwl_mvm_mac80211_idx_to_hwrate(int rate_idx); void iwl_mvm_dump_nic_error_log(struct iwl_mvm *mvm); u8 first_antenna(u8 mask); u8 iwl_mvm_next_antenna(struct iwl_mvm *mvm, u8 valid, u8 last_idx); +void iwl_mvm_get_sync_time(struct iwl_mvm *mvm, u32 *gp2, u64 *boottime); /* Tx / Host Commands */ int __must_check iwl_mvm_send_cmd(struct iwl_mvm *mvm, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c index 7c138fedcb19..9e366e28c983 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c @@ -1225,6 +1225,28 @@ void iwl_mvm_inactivity_check(struct iwl_mvm *mvm) rcu_read_unlock(); } +void iwl_mvm_get_sync_time(struct iwl_mvm *mvm, u32 *gp2, u64 *boottime) +{ + bool ps_disabled; + + lockdep_assert_held(&mvm->mutex); + + /* Disable power save when reading GP2 */ + ps_disabled = mvm->ps_disabled; + if (!ps_disabled) { + mvm->ps_disabled = true; + iwl_mvm_power_update_device(mvm); + } + + *gp2 = iwl_read_prph(mvm->trans, DEVICE_SYSTEM_TIME_REG); + *boottime = ktime_get_boot_ns(); + + if (!ps_disabled) { + mvm->ps_disabled = ps_disabled; + iwl_mvm_power_update_device(mvm); + } +} + int iwl_mvm_send_lqm_cmd(struct ieee80211_vif *vif, enum iwl_lqm_cmd_operatrions operation, u32 duration, u32 timeout) From 8b6607cc6cdecd932201e63597b3b0c0f7958f33 Mon Sep 17 00:00:00 2001 From: Oren Givon Date: Thu, 25 Aug 2016 13:15:24 +0300 Subject: [PATCH 1180/1715] iwlwifi: add new 8265 series PCI ID Add a new PCI ID for the 8265 series. Signed-off-by: Oren Givon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 14c6e94ecbf2..3d766f250d3f 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -487,6 +487,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x24FD, 0x1130, iwl8265_2ac_cfg)}, {IWL_PCI_DEVICE(0x24FD, 0x0130, iwl8265_2ac_cfg)}, {IWL_PCI_DEVICE(0x24FD, 0x1010, iwl8265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24FD, 0x10D0, iwl8265_2ac_cfg)}, {IWL_PCI_DEVICE(0x24FD, 0x0050, iwl8265_2ac_cfg)}, {IWL_PCI_DEVICE(0x24FD, 0x0150, iwl8265_2ac_cfg)}, {IWL_PCI_DEVICE(0x24FD, 0x9010, iwl8265_2ac_cfg)}, From 186cd49a4d9ad1a0d4a7371c2d02a05a8ac53a03 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 24 Aug 2016 10:05:40 +0200 Subject: [PATCH 1181/1715] iwlwifi: mvm: move AP-specific code to right function There's no need for the common MAC context function to have an if on AP mode, the values can be overridden in the AP-specific function later. Clean that up by adding the full command as a new parameter to the AP-specific function, and doing it there. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- .../net/wireless/intel/iwlwifi/mvm/mac-ctxt.c | 42 +++++++++---------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c index d742d27d8de0..6b962d6b067a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c @@ -774,26 +774,6 @@ static void iwl_mvm_mac_ctxt_cmd_common(struct iwl_mvm *mvm, cmd->ac[txf].fifos_mask = BIT(txf); } - if (vif->type == NL80211_IFTYPE_AP) { - /* in AP mode, the MCAST FIFO takes the EDCA params from VO */ - cmd->ac[IWL_MVM_TX_FIFO_VO].fifos_mask |= - BIT(IWL_MVM_TX_FIFO_MCAST); - - /* - * in AP mode, pass probe requests and beacons from other APs - * (needed for ht protection); when there're no any associated - * station don't ask FW to pass beacons to prevent unnecessary - * wake-ups. - */ - cmd->filter_flags |= cpu_to_le32(MAC_FILTER_IN_PROBE_REQUEST); - if (mvmvif->ap_assoc_sta_count || !mvm->drop_bcn_ap_mode) { - cmd->filter_flags |= cpu_to_le32(MAC_FILTER_IN_BEACON); - IWL_DEBUG_HC(mvm, "Asking FW to pass beacons\n"); - } else { - IWL_DEBUG_HC(mvm, "No need to receive beacons\n"); - } - } - if (vif->bss_conf.qos) cmd->qos_flags |= cpu_to_le32(MAC_QOS_FLG_UPDATE_EDCA); @@ -1191,6 +1171,7 @@ static void iwl_mvm_mac_ap_iterator(void *_data, u8 *mac, */ static void iwl_mvm_mac_ctxt_cmd_fill_ap(struct iwl_mvm *mvm, struct ieee80211_vif *vif, + struct iwl_mac_ctx_cmd *cmd, struct iwl_mac_data_ap *ctxt_ap, bool add) { @@ -1201,6 +1182,23 @@ static void iwl_mvm_mac_ctxt_cmd_fill_ap(struct iwl_mvm *mvm, .beacon_device_ts = 0 }; + /* in AP mode, the MCAST FIFO takes the EDCA params from VO */ + cmd->ac[IWL_MVM_TX_FIFO_VO].fifos_mask |= BIT(IWL_MVM_TX_FIFO_MCAST); + + /* + * in AP mode, pass probe requests and beacons from other APs + * (needed for ht protection); when there're no any associated + * station don't ask FW to pass beacons to prevent unnecessary + * wake-ups. + */ + cmd->filter_flags |= cpu_to_le32(MAC_FILTER_IN_PROBE_REQUEST); + if (mvmvif->ap_assoc_sta_count || !mvm->drop_bcn_ap_mode) { + cmd->filter_flags |= cpu_to_le32(MAC_FILTER_IN_BEACON); + IWL_DEBUG_HC(mvm, "Asking FW to pass beacons\n"); + } else { + IWL_DEBUG_HC(mvm, "No need to receive beacons\n"); + } + ctxt_ap->bi = cpu_to_le32(vif->bss_conf.beacon_int); ctxt_ap->bi_reciprocal = cpu_to_le32(iwl_mvm_reciprocal(vif->bss_conf.beacon_int)); @@ -1258,7 +1256,7 @@ static int iwl_mvm_mac_ctxt_cmd_ap(struct iwl_mvm *mvm, iwl_mvm_mac_ctxt_cmd_common(mvm, vif, &cmd, NULL, action); /* Fill the data specific for ap mode */ - iwl_mvm_mac_ctxt_cmd_fill_ap(mvm, vif, &cmd.ap, + iwl_mvm_mac_ctxt_cmd_fill_ap(mvm, vif, &cmd, &cmd.ap, action == FW_CTXT_ACTION_ADD); return iwl_mvm_mac_ctxt_send_cmd(mvm, &cmd); @@ -1277,7 +1275,7 @@ static int iwl_mvm_mac_ctxt_cmd_go(struct iwl_mvm *mvm, iwl_mvm_mac_ctxt_cmd_common(mvm, vif, &cmd, NULL, action); /* Fill the data specific for GO mode */ - iwl_mvm_mac_ctxt_cmd_fill_ap(mvm, vif, &cmd.go.ap, + iwl_mvm_mac_ctxt_cmd_fill_ap(mvm, vif, &cmd, &cmd.go.ap, action == FW_CTXT_ACTION_ADD); cmd.go.ctwin = cpu_to_le32(noa->oppps_ctwindow & From 2b55f43f8e477a123bca4ab35351666479bd7b86 Mon Sep 17 00:00:00 2001 From: Ido Yariv Date: Tue, 23 Aug 2016 14:44:59 -0400 Subject: [PATCH 1182/1715] iwlwifi: mvm: Add mem debugfs entry In order to access cached/paged memory, there are a couple of firmware commands (one for UMAC and one for LMAC) that let the host access memory and registers indirectly. Since this is done by the firmware on behalf of the host, even if memory is paged out or cached, the host will retrieve the memory as the firmware sees it (paged out memory will get paged in). Export this mechanism via a debugfs entry for both read and write access. WARNING: This mechanism has no protections at all. Invalid addresses may crash or hang the firmware. Writing to arbitrary memory also comes with no guarantees. Signed-off-by: Ido Yariv Signed-off-by: Luca Coelho --- .../net/wireless/intel/iwlwifi/mvm/debugfs.c | 129 ++++++++++++++++++ .../net/wireless/intel/iwlwifi/mvm/fw-api.h | 50 +++++++ 2 files changed, 179 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c index 540b7c9deaef..539d718df797 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c @@ -1518,6 +1518,132 @@ MVM_DEBUGFS_READ_WRITE_FILE_OPS(bcast_filters_macs, 256); MVM_DEBUGFS_READ_WRITE_FILE_OPS(d3_sram, 8); #endif +static ssize_t iwl_dbgfs_mem_read(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct iwl_mvm *mvm = file->private_data; + struct iwl_dbg_mem_access_cmd cmd = {}; + struct iwl_dbg_mem_access_rsp *rsp; + struct iwl_host_cmd hcmd = { + .flags = CMD_WANT_SKB | CMD_SEND_IN_RFKILL, + .data = { &cmd, }, + .len = { sizeof(cmd) }, + }; + size_t delta, len; + ssize_t ret; + + hcmd.id = iwl_cmd_id(*ppos >> 24 ? UMAC_RD_WR : LMAC_RD_WR, + DEBUG_GROUP, 0); + cmd.op = cpu_to_le32(DEBUG_MEM_OP_READ); + + /* Take care of alignment of both the position and the length */ + delta = *ppos & 0x3; + cmd.addr = cpu_to_le32(*ppos - delta); + cmd.len = cpu_to_le32(min(ALIGN(count + delta, 4) / 4, + (size_t)DEBUG_MEM_MAX_SIZE_DWORDS)); + + mutex_lock(&mvm->mutex); + ret = iwl_mvm_send_cmd(mvm, &hcmd); + mutex_unlock(&mvm->mutex); + + if (ret < 0) + return ret; + + rsp = (void *)hcmd.resp_pkt->data; + if (le32_to_cpu(rsp->status) != DEBUG_MEM_STATUS_SUCCESS) { + ret = -ENXIO; + goto out; + } + + len = min((size_t)le32_to_cpu(rsp->len) << 2, + iwl_rx_packet_payload_len(hcmd.resp_pkt) - sizeof(*rsp)); + len = min(len - delta, count); + if (len < 0) { + ret = -EFAULT; + goto out; + } + + ret = len - copy_to_user(user_buf, (void *)rsp->data + delta, len); + *ppos += ret; + +out: + iwl_free_resp(&hcmd); + return ret; +} + +static ssize_t iwl_dbgfs_mem_write(struct file *file, + const char __user *user_buf, size_t count, + loff_t *ppos) +{ + struct iwl_mvm *mvm = file->private_data; + struct iwl_dbg_mem_access_cmd *cmd; + struct iwl_dbg_mem_access_rsp *rsp; + struct iwl_host_cmd hcmd = {}; + size_t cmd_size; + size_t data_size; + u32 op, len; + ssize_t ret; + + hcmd.id = iwl_cmd_id(*ppos >> 24 ? UMAC_RD_WR : LMAC_RD_WR, + DEBUG_GROUP, 0); + + if (*ppos & 0x3 || count < 4) { + op = DEBUG_MEM_OP_WRITE_BYTES; + len = min(count, (size_t)(4 - (*ppos & 0x3))); + data_size = len; + } else { + op = DEBUG_MEM_OP_WRITE; + len = min(count >> 2, (size_t)DEBUG_MEM_MAX_SIZE_DWORDS); + data_size = len << 2; + } + + cmd_size = sizeof(*cmd) + ALIGN(data_size, 4); + cmd = kzalloc(cmd_size, GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + cmd->op = cpu_to_le32(op); + cmd->len = cpu_to_le32(len); + cmd->addr = cpu_to_le32(*ppos); + if (copy_from_user((void *)cmd->data, user_buf, data_size)) { + kfree(cmd); + return -EFAULT; + } + + hcmd.flags = CMD_WANT_SKB | CMD_SEND_IN_RFKILL, + hcmd.data[0] = (void *)cmd; + hcmd.len[0] = cmd_size; + + mutex_lock(&mvm->mutex); + ret = iwl_mvm_send_cmd(mvm, &hcmd); + mutex_unlock(&mvm->mutex); + + kfree(cmd); + + if (ret < 0) + return ret; + + rsp = (void *)hcmd.resp_pkt->data; + if (rsp->status != DEBUG_MEM_STATUS_SUCCESS) { + ret = -ENXIO; + goto out; + } + + ret = data_size; + *ppos += ret; + +out: + iwl_free_resp(&hcmd); + return ret; +} + +static const struct file_operations iwl_dbgfs_mem_ops = { + .read = iwl_dbgfs_mem_read, + .write = iwl_dbgfs_mem_write, + .open = simple_open, + .llseek = default_llseek, +}; + int iwl_mvm_dbgfs_register(struct iwl_mvm *mvm, struct dentry *dbgfs_dir) { struct dentry *bcast_dir __maybe_unused; @@ -1615,6 +1741,9 @@ int iwl_mvm_dbgfs_register(struct iwl_mvm *mvm, struct dentry *dbgfs_dir) mvm->debugfs_dir, &mvm->nvm_phy_sku_blob)) goto err; + debugfs_create_file("mem", S_IRUSR | S_IWUSR, dbgfs_dir, mvm, + &iwl_dbgfs_mem_ops); + /* * Create a symlink with mac80211. It will be removed when mac80211 * exists (before the opmode exists which removes the target.) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h index 2f92994d0e5b..fdd9506e10cd 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h @@ -340,6 +340,11 @@ enum iwl_prot_offload_subcmd_ids { STORED_BEACON_NTF = 0xFF, }; +enum iwl_fmac_debug_cmds { + LMAC_RD_WR = 0x0, + UMAC_RD_WR = 0x1, +}; + /* command groups */ enum { LEGACY_GROUP = 0x0, @@ -349,6 +354,7 @@ enum { PHY_OPS_GROUP = 0x4, DATA_PATH_GROUP = 0x5, PROT_OFFLOAD_GROUP = 0xb, + DEBUG_GROUP = 0xf, }; /** @@ -2149,4 +2155,48 @@ struct iwl_channel_switch_noa_notif { __le32 id_and_color; } __packed; /* CHANNEL_SWITCH_START_NTFY_API_S_VER_1 */ +/* Operation types for the debug mem access */ +enum { + DEBUG_MEM_OP_READ = 0, + DEBUG_MEM_OP_WRITE = 1, + DEBUG_MEM_OP_WRITE_BYTES = 2, +}; + +#define DEBUG_MEM_MAX_SIZE_DWORDS 32 + +/** + * struct iwl_dbg_mem_access_cmd - Request the device to read/write memory + * @op: DEBUG_MEM_OP_* + * @addr: address to read/write from/to + * @len: in dwords, to read/write + * @data: for write opeations, contains the source buffer + */ +struct iwl_dbg_mem_access_cmd { + __le32 op; + __le32 addr; + __le32 len; + __le32 data[]; +} __packed; /* DEBUG_(U|L)MAC_RD_WR_CMD_API_S_VER_1 */ + +/* Status responses for the debug mem access */ +enum { + DEBUG_MEM_STATUS_SUCCESS = 0x0, + DEBUG_MEM_STATUS_FAILED = 0x1, + DEBUG_MEM_STATUS_LOCKED = 0x2, + DEBUG_MEM_STATUS_HIDDEN = 0x3, + DEBUG_MEM_STATUS_LENGTH = 0x4, +}; + +/** + * struct iwl_dbg_mem_access_rsp - Response to debug mem commands + * @status: DEBUG_MEM_STATUS_* + * @len: read dwords (0 for write operations) + * @data: contains the read DWs + */ +struct iwl_dbg_mem_access_rsp { + __le32 status; + __le32 len; + __le32 data[]; +} __packed; /* DEBUG_(U|L)MAC_RD_WR_RSP_API_S_VER_1 */ + #endif /* __fw_api_h__ */ From 176aa60bf148b5af4209ac323cef941dee76e390 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Wed, 31 Aug 2016 19:03:01 +0300 Subject: [PATCH 1183/1715] iwlwifi: mvm: set HCMD_NAME for PHY_DB as well Currently it is logged as UNKNOWN. Also, 0x6c seems to be the permanent ID for this command, remove incorrect comment and uncomment the command from the commands list. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-phy-db.c | 2 +- drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h | 2 +- drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-phy-db.c b/drivers/net/wireless/intel/iwlwifi/iwl-phy-db.c index 7beba9ae5617..2893826d7d2b 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-phy-db.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-phy-db.c @@ -110,7 +110,7 @@ enum iwl_phy_db_section_type { IWL_PHY_DB_MAX }; -#define PHY_DB_CMD 0x6c /* TEMP API - The actual is 0x8c */ +#define PHY_DB_CMD 0x6c /* * phy db - configure operational ucode diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h index fdd9506e10cd..97633690f3d5 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h @@ -205,7 +205,7 @@ enum { /* Phy */ PHY_CONFIGURATION_CMD = 0x6a, CALIB_RES_NOTIF_PHY_DB = 0x6b, - /* PHY_DB_CMD = 0x6c, */ + PHY_DB_CMD = 0x6c, /* ToF - 802.11mc FTM */ TOF_CMD = 0x10, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index b2d8722cbb11..3eccd89b9570 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -359,6 +359,7 @@ static const struct iwl_hcmd_names iwl_mvm_legacy_names[] = { HCMD_NAME(BT_COEX_CI), HCMD_NAME(PHY_CONFIGURATION_CMD), HCMD_NAME(CALIB_RES_NOTIF_PHY_DB), + HCMD_NAME(PHY_DB_CMD), HCMD_NAME(SCAN_OFFLOAD_COMPLETE), HCMD_NAME(SCAN_OFFLOAD_UPDATE_PROFILES_CMD), HCMD_NAME(SCAN_OFFLOAD_CONFIG_CMD), From 8098203f26b4fd4b4ef5281b2e77d49ab6d9a3b4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 31 Aug 2016 22:16:11 +0200 Subject: [PATCH 1184/1715] iwlwifi: mvm: use LIST_HEAD() macro There's no need to declare a list and then init it manually, just use the LIST_HEAD() macro. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index 3eccd89b9570..75f3ca0504fd 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -858,9 +858,7 @@ static void iwl_mvm_async_handlers_wk(struct work_struct *wk) struct iwl_mvm *mvm = container_of(wk, struct iwl_mvm, async_handlers_wk); struct iwl_async_handler_entry *entry, *tmp; - struct list_head local_list; - - INIT_LIST_HEAD(&local_list); + LIST_HEAD(local_list); /* Ensure that we are not in stop flow (check iwl_mvm_mac_stop) */ From 0979a913f879ea39504200d83fb9f275a555a58d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 31 Aug 2016 22:16:11 +0200 Subject: [PATCH 1185/1715] iwlwifi: pcie: use LIST_HEAD() macro There's no need to declare a list and then init it manually, just use the LIST_HEAD() macro. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/rx.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c index 78cb6d2314d9..6fe5546dc773 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c @@ -487,15 +487,13 @@ static void iwl_pcie_rx_allocator(struct iwl_trans *trans) while (pending) { int i; - struct list_head local_allocated; + LIST_HEAD(local_allocated); gfp_t gfp_mask = GFP_KERNEL; /* Do not post a warning if there are only a few requests */ if (pending < RX_PENDING_WATERMARK) gfp_mask |= __GFP_NOWARN; - INIT_LIST_HEAD(&local_allocated); - for (i = 0; i < RX_CLAIM_REQ_ALLOC;) { struct iwl_rx_mem_buffer *rxb; struct page *page; From 53f863a66904542b03204f2b115d050b04c11ba5 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 21 Jul 2016 14:12:40 +0200 Subject: [PATCH 1186/1715] Bluetooth: Put led_trigger field behind CONFIG_BT_LEDS The led_trigger field in hci_dev should be conditional based on if CONFIG_BT_LEDS is set or not. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci_core.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index ee7fc47680a1..b8d43bd9c71b 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -399,7 +399,9 @@ struct hci_dev { struct delayed_work rpa_expired; bdaddr_t rpa; +#if IS_ENABLED(CONFIG_BT_LEDS) struct led_trigger *power_led; +#endif int (*open)(struct hci_dev *hdev); int (*close)(struct hci_dev *hdev); From e64c97b53bc6727aa4385535166aaa047281e02d Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 21 Jul 2016 14:12:41 +0200 Subject: [PATCH 1187/1715] Bluetooth: Add combined LED trigger for controller power Instead of just having a LED trigger for power on a specific controller, this adds the LED trigger "bluetooth-power" that combines the power states of all controllers into a single trigger. This simplifies the trigger selection and also supports multiple controllers per host system via a single LED. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/af_bluetooth.c | 5 +++++ net/bluetooth/leds.c | 27 +++++++++++++++++++++++++++ net/bluetooth/leds.h | 10 ++++++++++ 3 files changed, 42 insertions(+) diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 0b5f729d08d2..1d96ff3a8d87 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -31,6 +31,7 @@ #include #include +#include "leds.h" #include "selftest.h" /* Bluetooth sockets */ @@ -726,6 +727,8 @@ static int __init bt_init(void) bt_debugfs = debugfs_create_dir("bluetooth", NULL); + bt_leds_init(); + err = bt_sysfs_init(); if (err < 0) return err; @@ -785,6 +788,8 @@ static void __exit bt_exit(void) bt_sysfs_cleanup(); + bt_leds_cleanup(); + debugfs_remove_recursive(bt_debugfs); } diff --git a/net/bluetooth/leds.c b/net/bluetooth/leds.c index 8319c8440c89..cb670b5594eb 100644 --- a/net/bluetooth/leds.c +++ b/net/bluetooth/leds.c @@ -11,6 +11,8 @@ #include "leds.h" +DEFINE_LED_TRIGGER(bt_power_led_trigger); + struct hci_basic_led_trigger { struct led_trigger led_trigger; struct hci_dev *hdev; @@ -24,6 +26,21 @@ void hci_leds_update_powered(struct hci_dev *hdev, bool enabled) if (hdev->power_led) led_trigger_event(hdev->power_led, enabled ? LED_FULL : LED_OFF); + + if (!enabled) { + struct hci_dev *d; + + read_lock(&hci_dev_list_lock); + + list_for_each_entry(d, &hci_dev_list, list) { + if (test_bit(HCI_UP, &d->flags)) + enabled = true; + } + + read_unlock(&hci_dev_list_lock); + } + + led_trigger_event(bt_power_led_trigger, enabled ? LED_FULL : LED_OFF); } static void power_activate(struct led_classdev *led_cdev) @@ -72,3 +89,13 @@ void hci_leds_init(struct hci_dev *hdev) /* initialize power_led */ hdev->power_led = led_allocate_basic(hdev, power_activate, "power"); } + +void bt_leds_init(void) +{ + led_trigger_register_simple("bluetooth-power", &bt_power_led_trigger); +} + +void bt_leds_cleanup(void) +{ + led_trigger_unregister_simple(bt_power_led_trigger); +} diff --git a/net/bluetooth/leds.h b/net/bluetooth/leds.h index a9c4d6ea01cf..08725a2fbd9b 100644 --- a/net/bluetooth/leds.h +++ b/net/bluetooth/leds.h @@ -7,10 +7,20 @@ */ #if IS_ENABLED(CONFIG_BT_LEDS) + void hci_leds_update_powered(struct hci_dev *hdev, bool enabled); void hci_leds_init(struct hci_dev *hdev); + +void bt_leds_init(void); +void bt_leds_cleanup(void); + #else + static inline void hci_leds_update_powered(struct hci_dev *hdev, bool enabled) {} static inline void hci_leds_init(struct hci_dev *hdev) {} + +static inline void bt_leds_init(void) {} +static inline void bt_leds_cleanup(void) {} + #endif From abbcc341adb16f68915cae7ef9a10e0d7b57e3c0 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sun, 24 Jul 2016 16:12:24 +0200 Subject: [PATCH 1188/1715] mac802154: set phy net namespace for new ifaces This patch sets the net namespace when creating SoftMAC interfaces. This is important if the namespace at phy layer was switched before. Currently we losing interfaces in some namespace and it's not possible to recover that. Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- net/mac802154/iface.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c index 7079cd32a7ad..06019dba4b10 100644 --- a/net/mac802154/iface.c +++ b/net/mac802154/iface.c @@ -663,6 +663,7 @@ ieee802154_if_add(struct ieee802154_local *local, const char *name, /* TODO check this */ SET_NETDEV_DEV(ndev, &local->phy->dev); + dev_net_set(ndev, wpan_phy_net(local->hw.phy)); sdata = netdev_priv(ndev); ndev->ieee802154_ptr = &sdata->wpan_dev; memcpy(sdata->name, ndev->name, IFNAMSIZ); From 5ddedce3b7331959a6da217ed3189d020090873c Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sun, 24 Jul 2016 16:12:25 +0200 Subject: [PATCH 1189/1715] 6lowpan: ndisc: no overreact if no short address is available This patch removes handling to remove short address for a neigbour entry if RS/RA/NS/NA doesn't contain a short address. If these messages doesn't has any short address option, the existing short address from ndisc cache will be used. The current behaviour will set that the neigbour doesn't has a short address anymore. Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- net/6lowpan/ndisc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/6lowpan/ndisc.c b/net/6lowpan/ndisc.c index 86450b7e2899..941df2fa4448 100644 --- a/net/6lowpan/ndisc.c +++ b/net/6lowpan/ndisc.c @@ -101,8 +101,6 @@ static void lowpan_ndisc_802154_update(struct neighbour *n, u32 flags, ieee802154_be16_to_le16(&neigh->short_addr, lladdr_short); if (!lowpan_802154_is_valid_src_short_addr(neigh->short_addr)) neigh->short_addr = cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC); - } else { - neigh->short_addr = cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC); } write_unlock_bh(&n->lock); } From ca1de81aa262dcf48354a7c55f2558205517d06e Mon Sep 17 00:00:00 2001 From: Aristeu Rozanski Date: Mon, 25 Jul 2016 11:46:40 -0400 Subject: [PATCH 1190/1715] mac802154: don't warn on unsupported frames Just because we don't support certain types of frames yet doesn't mean we have to flood the message log with warnings about "invalid" frames. Signed-off-by: Aristeu Rozanski Acked-by: Alexander Aring Signed-off-by: Marcel Holtmann --- net/mac802154/rx.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c index 446e1300383e..b978da018bf8 100644 --- a/net/mac802154/rx.c +++ b/net/mac802154/rx.c @@ -101,6 +101,11 @@ ieee802154_subif_frame(struct ieee802154_sub_if_data *sdata, sdata->dev->stats.rx_bytes += skb->len; switch (mac_cb(skb)->type) { + case IEEE802154_FC_TYPE_BEACON: + case IEEE802154_FC_TYPE_ACK: + case IEEE802154_FC_TYPE_MAC_CMD: + goto fail; + case IEEE802154_FC_TYPE_DATA: return ieee802154_deliver_skb(skb); default: From bd89bb6daaca3e4a7c509bdacb53a610f432fa2c Mon Sep 17 00:00:00 2001 From: Aristeu Rozanski Date: Mon, 25 Jul 2016 11:46:41 -0400 Subject: [PATCH 1191/1715] mac802154: use rate limited warnings for malformed frames Signed-off-by: Aristeu Rozanski Acked-by: Alexander Aring Signed-off-by: Marcel Holtmann --- net/mac802154/rx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c index b978da018bf8..4dcf6e18563a 100644 --- a/net/mac802154/rx.c +++ b/net/mac802154/rx.c @@ -109,8 +109,8 @@ ieee802154_subif_frame(struct ieee802154_sub_if_data *sdata, case IEEE802154_FC_TYPE_DATA: return ieee802154_deliver_skb(skb); default: - pr_warn("ieee802154: bad frame received (type = %d)\n", - mac_cb(skb)->type); + pr_warn_ratelimited("ieee802154: bad frame received " + "(type = %d)\n", mac_cb(skb)->type); goto fail; } From 65010e68efbeda4275845240869138c0c4587422 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Fri, 12 Aug 2016 17:01:27 -0700 Subject: [PATCH 1192/1715] Bluetooth: Add HCI device identifier for Qualcomm SMD This patch assigns the next free HCI device identifier to Bluetooth devices based on the Qualcomm Shared Memory channels. Signed-off-by: Bjorn Andersson Signed-off-by: Bjorn Andersson Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 003b25283407..0aac123b5eee 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -63,6 +63,7 @@ #define HCI_SDIO 6 #define HCI_SPI 7 #define HCI_I2C 8 +#define HCI_SMD 9 /* HCI controller types */ #define HCI_PRIMARY 0x00 From 1511cc750c3d9a1c402d71e3522c9cf1fad0ad9c Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Fri, 12 Aug 2016 17:01:28 -0700 Subject: [PATCH 1193/1715] Bluetooth: Introduce Qualcomm WCNSS SMD based HCI driver The Qualcomm WCNSS chip provides two SMD channels to the BT core; one for command and one for event packets. This driver exposes the two channels as a hci device. Signed-off-by: Bjorn Andersson Signed-off-by: Bjorn Andersson Signed-off-by: Marcel Holtmann --- drivers/bluetooth/Kconfig | 12 +++ drivers/bluetooth/Makefile | 1 + drivers/bluetooth/btqcomsmd.c | 182 ++++++++++++++++++++++++++++++++++ 3 files changed, 195 insertions(+) create mode 100644 drivers/bluetooth/btqcomsmd.c diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig index cf50fd2e96df..2c481911f1ce 100644 --- a/drivers/bluetooth/Kconfig +++ b/drivers/bluetooth/Kconfig @@ -331,4 +331,16 @@ config BT_WILINK Say Y here to compile support for Texas Instrument's WiLink7 driver into the kernel or say M to compile it as module (btwilink). +config BT_QCOMSMD + tristate "Qualcomm SMD based HCI support" + depends on QCOM_SMD + select BT_QCA + help + Qualcomm SMD based HCI driver. + This driver is used to bridge HCI data onto the shared memory + channels to the WCNSS core. + + Say Y here to compile support for HCI over Qualcomm SMD into the + kernel or say M to compile as a module. + endmenu diff --git a/drivers/bluetooth/Makefile b/drivers/bluetooth/Makefile index 9c18939fc5c9..3e92cfeb9ee2 100644 --- a/drivers/bluetooth/Makefile +++ b/drivers/bluetooth/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_BT_ATH3K) += ath3k.o obj-$(CONFIG_BT_MRVL) += btmrvl.o obj-$(CONFIG_BT_MRVL_SDIO) += btmrvl_sdio.o obj-$(CONFIG_BT_WILINK) += btwilink.o +obj-$(CONFIG_BT_QCOMSMD) += btqcomsmd.o obj-$(CONFIG_BT_BCM) += btbcm.o obj-$(CONFIG_BT_RTL) += btrtl.o obj-$(CONFIG_BT_QCA) += btqca.o diff --git a/drivers/bluetooth/btqcomsmd.c b/drivers/bluetooth/btqcomsmd.c new file mode 100644 index 000000000000..08c2c93887c1 --- /dev/null +++ b/drivers/bluetooth/btqcomsmd.c @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2016, Linaro Ltd. + * Copyright (c) 2015, Sony Mobile Communications Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include + +#include +#include + +#include "btqca.h" + +struct btqcomsmd { + struct hci_dev *hdev; + + struct qcom_smd_channel *acl_channel; + struct qcom_smd_channel *cmd_channel; +}; + +static int btqcomsmd_recv(struct hci_dev *hdev, unsigned int type, + const void *data, size_t count) +{ + struct sk_buff *skb; + + /* Use GFP_ATOMIC as we're in IRQ context */ + skb = bt_skb_alloc(count, GFP_ATOMIC); + if (!skb) { + hdev->stat.err_rx++; + return -ENOMEM; + } + + hci_skb_pkt_type(skb) = type; + memcpy(skb_put(skb, count), data, count); + + return hci_recv_frame(hdev, skb); +} + +static int btqcomsmd_acl_callback(struct qcom_smd_channel *channel, + const void *data, size_t count) +{ + struct btqcomsmd *btq = qcom_smd_get_drvdata(channel); + + btq->hdev->stat.byte_rx += count; + return btqcomsmd_recv(btq->hdev, HCI_ACLDATA_PKT, data, count); +} + +static int btqcomsmd_cmd_callback(struct qcom_smd_channel *channel, + const void *data, size_t count) +{ + struct btqcomsmd *btq = qcom_smd_get_drvdata(channel); + + return btqcomsmd_recv(btq->hdev, HCI_EVENT_PKT, data, count); +} + +static int btqcomsmd_send(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct btqcomsmd *btq = hci_get_drvdata(hdev); + int ret; + + switch (hci_skb_pkt_type(skb)) { + case HCI_ACLDATA_PKT: + ret = qcom_smd_send(btq->acl_channel, skb->data, skb->len); + hdev->stat.acl_tx++; + hdev->stat.byte_tx += skb->len; + break; + case HCI_COMMAND_PKT: + ret = qcom_smd_send(btq->cmd_channel, skb->data, skb->len); + hdev->stat.cmd_tx++; + break; + default: + ret = -EILSEQ; + break; + } + + kfree_skb(skb); + + return ret; +} + +static int btqcomsmd_open(struct hci_dev *hdev) +{ + return 0; +} + +static int btqcomsmd_close(struct hci_dev *hdev) +{ + return 0; +} + +static int btqcomsmd_probe(struct platform_device *pdev) +{ + struct btqcomsmd *btq; + struct hci_dev *hdev; + void *wcnss; + int ret; + + btq = devm_kzalloc(&pdev->dev, sizeof(*btq), GFP_KERNEL); + if (!btq) + return -ENOMEM; + + wcnss = dev_get_drvdata(pdev->dev.parent); + + btq->acl_channel = qcom_wcnss_open_channel(wcnss, "APPS_RIVA_BT_ACL", + btqcomsmd_acl_callback); + if (IS_ERR(btq->acl_channel)) + return PTR_ERR(btq->acl_channel); + + btq->cmd_channel = qcom_wcnss_open_channel(wcnss, "APPS_RIVA_BT_CMD", + btqcomsmd_cmd_callback); + if (IS_ERR(btq->cmd_channel)) + return PTR_ERR(btq->cmd_channel); + + qcom_smd_set_drvdata(btq->acl_channel, btq); + qcom_smd_set_drvdata(btq->cmd_channel, btq); + + hdev = hci_alloc_dev(); + if (!hdev) + return -ENOMEM; + + hci_set_drvdata(hdev, btq); + btq->hdev = hdev; + SET_HCIDEV_DEV(hdev, &pdev->dev); + + hdev->bus = HCI_SMD; + hdev->open = btqcomsmd_open; + hdev->close = btqcomsmd_close; + hdev->send = btqcomsmd_send; + hdev->set_bdaddr = qca_set_bdaddr_rome; + + ret = hci_register_dev(hdev); + if (ret < 0) { + hci_free_dev(hdev); + return ret; + } + + platform_set_drvdata(pdev, btq); + + return 0; +} + +static int btqcomsmd_remove(struct platform_device *pdev) +{ + struct btqcomsmd *btq = platform_get_drvdata(pdev); + + hci_unregister_dev(btq->hdev); + hci_free_dev(btq->hdev); + + return 0; +} + +static const struct of_device_id btqcomsmd_of_match[] = { + { .compatible = "qcom,wcnss-bt", }, + { }, +}; + +static struct platform_driver btqcomsmd_driver = { + .probe = btqcomsmd_probe, + .remove = btqcomsmd_remove, + .driver = { + .name = "btqcomsmd", + .of_match_table = btqcomsmd_of_match, + }, +}; + +module_platform_driver(btqcomsmd_driver); + +MODULE_AUTHOR("Bjorn Andersson "); +MODULE_DESCRIPTION("Qualcomm SMD HCI driver"); +MODULE_LICENSE("GPL v2"); From f0a70a04ca10d07a383a89edea142e3cbab1f2ca Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 11 Aug 2016 16:02:44 -0700 Subject: [PATCH 1194/1715] Bluetooth: btusb, hci_intel: Fix wait_on_bit_timeout() return value checks wait_on_bit_timeout() returns one of the following three values: * 0 to indicate success. * -EINTR to indicate that a signal has been received; * -EAGAIN to indicate timeout; Make the wait_on_bit_timeout() callers check for these values. Signed-off-by: Bart Van Assche Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btusb.c | 5 ++--- drivers/bluetooth/hci_intel.c | 6 +++--- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 811f9b97e360..c58a00cb5208 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -2221,9 +2221,8 @@ static int btusb_setup_intel_new(struct hci_dev *hdev) err = wait_on_bit_timeout(&data->flags, BTUSB_DOWNLOADING, TASK_INTERRUPTIBLE, msecs_to_jiffies(5000)); - if (err == 1) { + if (err == -EINTR) { BT_ERR("%s: Firmware loading interrupted", hdev->name); - err = -EINTR; goto done; } @@ -2275,7 +2274,7 @@ done: TASK_INTERRUPTIBLE, msecs_to_jiffies(1000)); - if (err == 1) { + if (err == -EINTR) { BT_ERR("%s: Device boot interrupted", hdev->name); return -EINTR; } diff --git a/drivers/bluetooth/hci_intel.c b/drivers/bluetooth/hci_intel.c index ed0a4201b551..9e271286c5e5 100644 --- a/drivers/bluetooth/hci_intel.c +++ b/drivers/bluetooth/hci_intel.c @@ -128,7 +128,7 @@ static int intel_wait_booting(struct hci_uart *hu) TASK_INTERRUPTIBLE, msecs_to_jiffies(1000)); - if (err == 1) { + if (err == -EINTR) { bt_dev_err(hu->hdev, "Device boot interrupted"); return -EINTR; } @@ -151,7 +151,7 @@ static int intel_wait_lpm_transaction(struct hci_uart *hu) TASK_INTERRUPTIBLE, msecs_to_jiffies(1000)); - if (err == 1) { + if (err == -EINTR) { bt_dev_err(hu->hdev, "LPM transaction interrupted"); return -EINTR; } @@ -813,7 +813,7 @@ static int intel_setup(struct hci_uart *hu) err = wait_on_bit_timeout(&intel->flags, STATE_DOWNLOADING, TASK_INTERRUPTIBLE, msecs_to_jiffies(5000)); - if (err == 1) { + if (err == -EINTR) { bt_dev_err(hdev, "Firmware loading interrupted"); err = -EINTR; goto done; From 1aabbbcefe8e62fbffaaa01ca8bdd4cd6ed1625b Mon Sep 17 00:00:00 2001 From: Nicolas Iooss Date: Fri, 29 Jul 2016 13:28:25 +0200 Subject: [PATCH 1195/1715] Bluetooth: add printf format attribute to hci_set_[fh]w_info() Commit 5177a83827cd ("Bluetooth: Add debugfs fields for hardware and firmware info") introduced hci_set_hw_info() and hci_set_fw_info(). These functions use kvasprintf_const() but are not marked with a __printf attribute. Adding such an attribute helps detecting issues related to printf-formatting at build time. Signed-off-by: Nicolas Iooss Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index b8d43bd9c71b..cc349f633570 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1028,8 +1028,8 @@ int hci_resume_dev(struct hci_dev *hdev); int hci_reset_dev(struct hci_dev *hdev); int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb); int hci_recv_diag(struct hci_dev *hdev, struct sk_buff *skb); -void hci_set_hw_info(struct hci_dev *hdev, const char *fmt, ...); -void hci_set_fw_info(struct hci_dev *hdev, const char *fmt, ...); +__printf(2, 3) void hci_set_hw_info(struct hci_dev *hdev, const char *fmt, ...); +__printf(2, 3) void hci_set_fw_info(struct hci_dev *hdev, const char *fmt, ...); int hci_dev_open(__u16 dev); int hci_dev_close(__u16 dev); int hci_dev_do_close(struct hci_dev *hdev); From 935199348048902124d0b288788c3a45e78b69ab Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Tue, 16 Aug 2016 12:50:06 +0800 Subject: [PATCH 1196/1715] Bluetooth: btusb: Add support for 0cf3:e009 Device 0cf3:e009 is one of the QCA ROME family. T: Bus=01 Lev=01 Prnt=01 Port=07 Cnt=04 Dev#= 4 Spd=12 MxCh= 0 D: Ver= 2.01 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=0cf3 ProdID=e009 Rev=00.01 C: #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA I: If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb I: If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb Signed-off-by: Kai-Heng Feng Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btusb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index c58a00cb5208..80ae854a0bee 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -248,6 +248,7 @@ static const struct usb_device_id blacklist_table[] = { /* QCA ROME chipset */ { USB_DEVICE(0x0cf3, 0xe007), .driver_info = BTUSB_QCA_ROME }, + { USB_DEVICE(0x0cf3, 0xe009), .driver_info = BTUSB_QCA_ROME }, { USB_DEVICE(0x0cf3, 0xe300), .driver_info = BTUSB_QCA_ROME }, { USB_DEVICE(0x0cf3, 0xe360), .driver_info = BTUSB_QCA_ROME }, { USB_DEVICE(0x0489, 0xe092), .driver_info = BTUSB_QCA_ROME }, From 7e8524591ffffe3536bd363827ff4477a5672c65 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 11 Aug 2016 23:00:31 +0200 Subject: [PATCH 1197/1715] Bluetooth: bcm203x: don't print error when allocating urb fails kmalloc will print enough information in case of failure. Signed-off-by: Wolfram Sang Signed-off-by: Marcel Holtmann --- drivers/bluetooth/bcm203x.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/bluetooth/bcm203x.c b/drivers/bluetooth/bcm203x.c index 5b0ef7bbe8ac..5ce6d4176dc3 100644 --- a/drivers/bluetooth/bcm203x.c +++ b/drivers/bluetooth/bcm203x.c @@ -185,10 +185,8 @@ static int bcm203x_probe(struct usb_interface *intf, const struct usb_device_id data->state = BCM203X_LOAD_MINIDRV; data->urb = usb_alloc_urb(0, GFP_KERNEL); - if (!data->urb) { - BT_ERR("Can't allocate URB"); + if (!data->urb) return -ENOMEM; - } if (request_firmware(&firmware, "BCM2033-MD.hex", &udev->dev) < 0) { BT_ERR("Mini driver request failed"); From 47b0f573f2fa7634860e16ea31f2bc3057a1022a Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sat, 27 Aug 2016 20:23:37 +0200 Subject: [PATCH 1198/1715] Bluetooth: Check SOL_HCI for raw socket options The SOL_HCI level should be enforced when using socket options on the HCI raw socket interface. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_sock.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 96f04b7b9556..99dd1503ef56 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1440,6 +1440,9 @@ static int hci_sock_setsockopt(struct socket *sock, int level, int optname, BT_DBG("sk %p, opt %d", sk, optname); + if (level != SOL_HCI) + return -ENOPROTOOPT; + lock_sock(sk); if (hci_pi(sk)->channel != HCI_CHANNEL_RAW) { @@ -1523,6 +1526,9 @@ static int hci_sock_getsockopt(struct socket *sock, int level, int optname, BT_DBG("sk %p, opt %d", sk, optname); + if (level != SOL_HCI) + return -ENOPROTOOPT; + if (get_user(len, optlen)) return -EFAULT; From 70ecce91e3a2d7e332fe56fd065c67d404b8fccf Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sat, 27 Aug 2016 20:23:38 +0200 Subject: [PATCH 1199/1715] Bluetooth: Store control socket cookie and comm information To further allow unique identification and tracking of control socket, store cookie and comm information when binding the socket. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/bluetooth.h | 1 + net/bluetooth/hci_sock.c | 31 ++++++++++++++++++++++++++++++- 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index bfd1590821d6..69b5174168b7 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -371,6 +371,7 @@ void hci_sock_set_flag(struct sock *sk, int nr); void hci_sock_clear_flag(struct sock *sk, int nr); int hci_sock_test_flag(struct sock *sk, int nr); unsigned short hci_sock_get_channel(struct sock *sk); +u32 hci_sock_get_cookie(struct sock *sk); int hci_sock_init(void); void hci_sock_cleanup(void); diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 99dd1503ef56..4dce6dfdb0f2 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -26,6 +26,7 @@ #include #include +#include #include #include @@ -38,6 +39,8 @@ static LIST_HEAD(mgmt_chan_list); static DEFINE_MUTEX(mgmt_chan_list_lock); +static DEFINE_IDA(sock_cookie_ida); + static atomic_t monitor_promisc = ATOMIC_INIT(0); /* ----- HCI socket interface ----- */ @@ -52,6 +55,8 @@ struct hci_pinfo { __u32 cmsg_mask; unsigned short channel; unsigned long flags; + __u32 cookie; + char comm[TASK_COMM_LEN]; }; void hci_sock_set_flag(struct sock *sk, int nr) @@ -74,6 +79,11 @@ unsigned short hci_sock_get_channel(struct sock *sk) return hci_pi(sk)->channel; } +u32 hci_sock_get_cookie(struct sock *sk) +{ + return hci_pi(sk)->cookie; +} + static inline int hci_test_bit(int nr, const void *addr) { return *((const __u32 *) addr + (nr >> 5)) & ((__u32) 1 << (nr & 31)); @@ -585,6 +595,7 @@ static int hci_sock_release(struct socket *sock) { struct sock *sk = sock->sk; struct hci_dev *hdev; + int id; BT_DBG("sock %p sk %p", sock, sk); @@ -593,8 +604,17 @@ static int hci_sock_release(struct socket *sock) hdev = hci_pi(sk)->hdev; - if (hci_pi(sk)->channel == HCI_CHANNEL_MONITOR) + switch (hci_pi(sk)->channel) { + case HCI_CHANNEL_MONITOR: atomic_dec(&monitor_promisc); + break; + case HCI_CHANNEL_CONTROL: + id = hci_pi(sk)->cookie; + + hci_pi(sk)->cookie = 0xffffffff; + ida_simple_remove(&sock_cookie_ida, id); + break; + } bt_sock_unlink(&hci_sk_list, sk); @@ -957,6 +977,15 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, * are changes to settings, class of device, name etc. */ if (haddr.hci_channel == HCI_CHANNEL_CONTROL) { + int id; + + id = ida_simple_get(&sock_cookie_ida, 1, 0, GFP_KERNEL); + if (id < 0) + id = 0xffffffff; + + hci_pi(sk)->cookie = id; + get_task_comm(hci_pi(sk)->comm, current); + hci_sock_set_flag(sk, HCI_MGMT_INDEX_EVENTS); hci_sock_set_flag(sk, HCI_MGMT_UNCONF_INDEX_EVENTS); hci_sock_set_flag(sk, HCI_MGMT_GENERIC_EVENTS); From 03c979c4717c7fa0c058fafe76ac4d6acdd1fb0d Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sat, 27 Aug 2016 20:23:39 +0200 Subject: [PATCH 1200/1715] Bluetooth: Introduce helper to pack mgmt version information The mgmt version information will be also needed for the control changell tracing feature. This provides a helper to pack them. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci_core.h | 1 + net/bluetooth/mgmt.c | 11 +++++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index cc349f633570..9f181b583b96 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1451,6 +1451,7 @@ void hci_mgmt_chan_unregister(struct hci_mgmt_chan *c); #define DISCOV_BREDR_INQUIRY_LEN 0x08 #define DISCOV_LE_RESTART_DELAY msecs_to_jiffies(200) /* msec */ +void mgmt_fill_version_info(void *ver); int mgmt_new_settings(struct hci_dev *hdev); void mgmt_index_added(struct hci_dev *hdev); void mgmt_index_removed(struct hci_dev *hdev); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 7639290b6de3..9071886df194 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -278,6 +278,14 @@ static u8 le_addr_type(u8 mgmt_addr_type) return ADDR_LE_DEV_RANDOM; } +void mgmt_fill_version_info(void *ver) +{ + struct mgmt_rp_read_version *rp = ver; + + rp->version = MGMT_VERSION; + rp->revision = cpu_to_le16(MGMT_REVISION); +} + static int read_version(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { @@ -285,8 +293,7 @@ static int read_version(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG("sock %p", sk); - rp.version = MGMT_VERSION; - rp.revision = cpu_to_le16(MGMT_REVISION); + mgmt_fill_version_info(&rp); return mgmt_cmd_complete(sk, MGMT_INDEX_NONE, MGMT_OP_READ_VERSION, 0, &rp, sizeof(rp)); From 249fa1699f8642c73eb43e61b321969f0549ab2c Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sat, 27 Aug 2016 20:23:40 +0200 Subject: [PATCH 1201/1715] Bluetooth: Add support for sending MGMT open and close to monitor This sends new notifications to the monitor support whenever a management channel has been opened or closed. This allows tracing of control channels really easily. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci_mon.h | 2 + net/bluetooth/hci_sock.c | 95 +++++++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+) diff --git a/include/net/bluetooth/hci_mon.h b/include/net/bluetooth/hci_mon.h index 587d0131b349..9640790cbbcc 100644 --- a/include/net/bluetooth/hci_mon.h +++ b/include/net/bluetooth/hci_mon.h @@ -45,6 +45,8 @@ struct hci_mon_hdr { #define HCI_MON_VENDOR_DIAG 11 #define HCI_MON_SYSTEM_NOTE 12 #define HCI_MON_USER_LOGGING 13 +#define HCI_MON_CTRL_OPEN 14 +#define HCI_MON_CTRL_CLOSE 15 struct hci_mon_new_index { __u8 type; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 4dce6dfdb0f2..2d8725006838 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -394,6 +394,59 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event) return skb; } +static struct sk_buff *create_monitor_ctrl_open(struct sock *sk) +{ + struct hci_mon_hdr *hdr; + struct sk_buff *skb; + u16 format = 0x0002; + u8 ver[3]; + u32 flags; + + skb = bt_skb_alloc(14 + TASK_COMM_LEN , GFP_ATOMIC); + if (!skb) + return NULL; + + mgmt_fill_version_info(ver); + flags = hci_sock_test_flag(sk, HCI_SOCK_TRUSTED) ? 0x1 : 0x0; + + put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4)); + put_unaligned_le16(format, skb_put(skb, 2)); + memcpy(skb_put(skb, sizeof(ver)), ver, sizeof(ver)); + put_unaligned_le32(flags, skb_put(skb, 4)); + *skb_put(skb, 1) = TASK_COMM_LEN; + memcpy(skb_put(skb, TASK_COMM_LEN), hci_pi(sk)->comm, TASK_COMM_LEN); + + __net_timestamp(skb); + + hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE); + hdr->opcode = cpu_to_le16(HCI_MON_CTRL_OPEN); + hdr->index = cpu_to_le16(HCI_DEV_NONE); + hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); + + return skb; +} + +static struct sk_buff *create_monitor_ctrl_close(struct sock *sk) +{ + struct hci_mon_hdr *hdr; + struct sk_buff *skb; + + skb = bt_skb_alloc(4, GFP_ATOMIC); + if (!skb) + return NULL; + + put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4)); + + __net_timestamp(skb); + + hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE); + hdr->opcode = cpu_to_le16(HCI_MON_CTRL_CLOSE); + hdr->index = cpu_to_le16(HCI_DEV_NONE); + hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); + + return skb; +} + static void __printf(2, 3) send_monitor_note(struct sock *sk, const char *fmt, ...) { @@ -468,6 +521,29 @@ static void send_monitor_replay(struct sock *sk) read_unlock(&hci_dev_list_lock); } +static void send_monitor_control_replay(struct sock *mon_sk) +{ + struct sock *sk; + + read_lock(&hci_sk_list.lock); + + sk_for_each(sk, &hci_sk_list.head) { + struct sk_buff *skb; + + if (hci_pi(sk)->channel != HCI_CHANNEL_CONTROL) + continue; + + skb = create_monitor_ctrl_open(sk); + if (!skb) + continue; + + if (sock_queue_rcv_skb(mon_sk, skb)) + kfree_skb(skb); + } + + read_unlock(&hci_sk_list.lock); +} + /* Generate internal stack event */ static void hci_si_event(struct hci_dev *hdev, int type, int dlen, void *data) { @@ -595,6 +671,7 @@ static int hci_sock_release(struct socket *sock) { struct sock *sk = sock->sk; struct hci_dev *hdev; + struct sk_buff *skb; int id; BT_DBG("sock %p sk %p", sock, sk); @@ -611,6 +688,14 @@ static int hci_sock_release(struct socket *sock) case HCI_CHANNEL_CONTROL: id = hci_pi(sk)->cookie; + /* Send event to monitor */ + skb = create_monitor_ctrl_close(sk); + if (skb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(skb); + } + hci_pi(sk)->cookie = 0xffffffff; ida_simple_remove(&sock_cookie_ida, id); break; @@ -931,6 +1016,7 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, send_monitor_note(sk, "Bluetooth subsystem version %s", BT_SUBSYS_VERSION); send_monitor_replay(sk); + send_monitor_control_replay(sk); atomic_inc(&monitor_promisc); break; @@ -977,6 +1063,7 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, * are changes to settings, class of device, name etc. */ if (haddr.hci_channel == HCI_CHANNEL_CONTROL) { + struct sk_buff *skb; int id; id = ida_simple_get(&sock_cookie_ida, 1, 0, GFP_KERNEL); @@ -986,6 +1073,14 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, hci_pi(sk)->cookie = id; get_task_comm(hci_pi(sk)->comm, current); + /* Send event to monitor */ + skb = create_monitor_ctrl_open(sk); + if (skb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(skb); + } + hci_sock_set_flag(sk, HCI_MGMT_INDEX_EVENTS); hci_sock_set_flag(sk, HCI_MGMT_UNCONF_INDEX_EVENTS); hci_sock_set_flag(sk, HCI_MGMT_GENERIC_EVENTS); From 38ceaa00d02dceb22c6bdd5268f5a44d5c00e123 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sat, 27 Aug 2016 20:23:41 +0200 Subject: [PATCH 1202/1715] Bluetooth: Add support for sending MGMT commands and events to monitor This adds support for tracing all management commands and events via the monitor interface. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci_core.h | 3 + include/net/bluetooth/hci_mon.h | 2 + net/bluetooth/hci_sock.c | 94 ++++++++++++++++++++++++++++++++ net/bluetooth/mgmt_util.c | 66 +++++++++++++++++++++- 4 files changed, 162 insertions(+), 3 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 9f181b583b96..a48f71d73dc8 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1406,6 +1406,9 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb); void hci_send_to_channel(unsigned short channel, struct sk_buff *skb, int flag, struct sock *skip_sk); void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb); +void hci_send_monitor_ctrl_event(struct hci_dev *hdev, u16 event, + void *data, u16 data_len, ktime_t tstamp, + int flag, struct sock *skip_sk); void hci_sock_dev_event(struct hci_dev *hdev, int event); diff --git a/include/net/bluetooth/hci_mon.h b/include/net/bluetooth/hci_mon.h index 9640790cbbcc..240786b04a46 100644 --- a/include/net/bluetooth/hci_mon.h +++ b/include/net/bluetooth/hci_mon.h @@ -47,6 +47,8 @@ struct hci_mon_hdr { #define HCI_MON_USER_LOGGING 13 #define HCI_MON_CTRL_OPEN 14 #define HCI_MON_CTRL_CLOSE 15 +#define HCI_MON_CTRL_COMMAND 16 +#define HCI_MON_CTRL_EVENT 17 struct hci_mon_new_index { __u8 type; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 2d8725006838..576ea48631b9 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -315,6 +315,60 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb) kfree_skb(skb_copy); } +void hci_send_monitor_ctrl_event(struct hci_dev *hdev, u16 event, + void *data, u16 data_len, ktime_t tstamp, + int flag, struct sock *skip_sk) +{ + struct sock *sk; + __le16 index; + + if (hdev) + index = cpu_to_le16(hdev->id); + else + index = cpu_to_le16(MGMT_INDEX_NONE); + + read_lock(&hci_sk_list.lock); + + sk_for_each(sk, &hci_sk_list.head) { + struct hci_mon_hdr *hdr; + struct sk_buff *skb; + + if (hci_pi(sk)->channel != HCI_CHANNEL_CONTROL) + continue; + + /* Ignore socket without the flag set */ + if (!hci_sock_test_flag(sk, flag)) + continue; + + /* Skip the original socket */ + if (sk == skip_sk) + continue; + + skb = bt_skb_alloc(6 + data_len, GFP_ATOMIC); + if (!skb) + continue; + + put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4)); + put_unaligned_le16(event, skb_put(skb, 2)); + + if (data) + memcpy(skb_put(skb, data_len), data, data_len); + + skb->tstamp = tstamp; + + hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE); + hdr->opcode = cpu_to_le16(HCI_MON_CTRL_EVENT); + hdr->index = index; + hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); + + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(skb); + } + + read_unlock(&hci_sk_list.lock); +} + static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event) { struct hci_mon_hdr *hdr; @@ -447,6 +501,33 @@ static struct sk_buff *create_monitor_ctrl_close(struct sock *sk) return skb; } +static struct sk_buff *create_monitor_ctrl_command(struct sock *sk, u16 index, + u16 opcode, u16 len, + const void *buf) +{ + struct hci_mon_hdr *hdr; + struct sk_buff *skb; + + skb = bt_skb_alloc(6 + len, GFP_ATOMIC); + if (!skb) + return NULL; + + put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4)); + put_unaligned_le16(opcode, skb_put(skb, 2)); + + if (buf) + memcpy(skb_put(skb, len), buf, len); + + __net_timestamp(skb); + + hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE); + hdr->opcode = cpu_to_le16(HCI_MON_CTRL_COMMAND); + hdr->index = cpu_to_le16(index); + hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); + + return skb; +} + static void __printf(2, 3) send_monitor_note(struct sock *sk, const char *fmt, ...) { @@ -1257,6 +1338,19 @@ static int hci_mgmt_cmd(struct hci_mgmt_chan *chan, struct sock *sk, goto done; } + if (chan->channel == HCI_CHANNEL_CONTROL) { + struct sk_buff *skb; + + /* Send event to monitor */ + skb = create_monitor_ctrl_command(sk, index, opcode, len, + buf + sizeof(*hdr)); + if (skb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(skb); + } + } + if (opcode >= chan->handler_count || chan->handlers[opcode].func == NULL) { BT_DBG("Unknown op %u", opcode); diff --git a/net/bluetooth/mgmt_util.c b/net/bluetooth/mgmt_util.c index 8c30c7eb8bef..c933bd08c1fe 100644 --- a/net/bluetooth/mgmt_util.c +++ b/net/bluetooth/mgmt_util.c @@ -21,12 +21,41 @@ SOFTWARE IS DISCLAIMED. */ +#include + #include #include +#include #include #include "mgmt_util.h" +static struct sk_buff *create_monitor_ctrl_event(__le16 index, u32 cookie, + u16 opcode, u16 len, void *buf) +{ + struct hci_mon_hdr *hdr; + struct sk_buff *skb; + + skb = bt_skb_alloc(6 + len, GFP_ATOMIC); + if (!skb) + return NULL; + + put_unaligned_le32(cookie, skb_put(skb, 4)); + put_unaligned_le16(opcode, skb_put(skb, 2)); + + if (buf) + memcpy(skb_put(skb, len), buf, len); + + __net_timestamp(skb); + + hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE); + hdr->opcode = cpu_to_le16(HCI_MON_CTRL_EVENT); + hdr->index = index; + hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); + + return skb; +} + int mgmt_send_event(u16 event, struct hci_dev *hdev, unsigned short channel, void *data, u16 data_len, int flag, struct sock *skip_sk) { @@ -52,14 +81,18 @@ int mgmt_send_event(u16 event, struct hci_dev *hdev, unsigned short channel, __net_timestamp(skb); hci_send_to_channel(channel, skb, flag, skip_sk); - kfree_skb(skb); + if (channel == HCI_CHANNEL_CONTROL) + hci_send_monitor_ctrl_event(hdev, event, data, data_len, + skb_get_ktime(skb), flag, skip_sk); + + kfree_skb(skb); return 0; } int mgmt_cmd_status(struct sock *sk, u16 index, u16 cmd, u8 status) { - struct sk_buff *skb; + struct sk_buff *skb, *mskb; struct mgmt_hdr *hdr; struct mgmt_ev_cmd_status *ev; int err; @@ -80,17 +113,30 @@ int mgmt_cmd_status(struct sock *sk, u16 index, u16 cmd, u8 status) ev->status = status; ev->opcode = cpu_to_le16(cmd); + mskb = create_monitor_ctrl_event(hdr->index, hci_sock_get_cookie(sk), + MGMT_EV_CMD_STATUS, sizeof(*ev), ev); + if (mskb) + skb->tstamp = mskb->tstamp; + else + __net_timestamp(skb); + err = sock_queue_rcv_skb(sk, skb); if (err < 0) kfree_skb(skb); + if (mskb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, mskb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(mskb); + } + return err; } int mgmt_cmd_complete(struct sock *sk, u16 index, u16 cmd, u8 status, void *rp, size_t rp_len) { - struct sk_buff *skb; + struct sk_buff *skb, *mskb; struct mgmt_hdr *hdr; struct mgmt_ev_cmd_complete *ev; int err; @@ -114,10 +160,24 @@ int mgmt_cmd_complete(struct sock *sk, u16 index, u16 cmd, u8 status, if (rp) memcpy(ev->data, rp, rp_len); + mskb = create_monitor_ctrl_event(hdr->index, hci_sock_get_cookie(sk), + MGMT_EV_CMD_COMPLETE, + sizeof(*ev) + rp_len, ev); + if (mskb) + skb->tstamp = mskb->tstamp; + else + __net_timestamp(skb); + err = sock_queue_rcv_skb(sk, skb); if (err < 0) kfree_skb(skb); + if (mskb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, mskb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(mskb); + } + return err; } From 37d3a1fab50fa07ac706787646e61c60e7c520e0 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Sun, 28 Aug 2016 20:53:34 +0300 Subject: [PATCH 1203/1715] Bluetooth: mgmt: Fix sending redundant event for Advertising Instance When an Advertising Instance is removed, the Advertising Removed event shouldn't be sent to the same socket that issued the Remove Advertising command (it gets a command complete event instead). The mgmt_advertising_removed() function already has a parameter for skipping a specific socket, but there was no code to propagate the right value to this parameter. This patch fixes the issue by making sure the intermediate hci_req_clear_adv_instance() function gets the socket pointer. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 11 ++++++----- net/bluetooth/hci_request.h | 5 +++-- net/bluetooth/mgmt.c | 6 +++--- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index b0e23dfc5c34..9968b1c7c03a 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1194,7 +1194,7 @@ static void adv_timeout_expire(struct work_struct *work) hci_req_init(&req, hdev); - hci_req_clear_adv_instance(hdev, &req, instance, false); + hci_req_clear_adv_instance(hdev, NULL, &req, instance, false); if (list_empty(&hdev->adv_instances)) __hci_req_disable_advertising(&req); @@ -1284,8 +1284,9 @@ static void cancel_adv_timeout(struct hci_dev *hdev) * setting. * - force == false: Only instances that have a timeout will be removed. */ -void hci_req_clear_adv_instance(struct hci_dev *hdev, struct hci_request *req, - u8 instance, bool force) +void hci_req_clear_adv_instance(struct hci_dev *hdev, struct sock *sk, + struct hci_request *req, u8 instance, + bool force) { struct adv_info *adv_instance, *n, *next_instance = NULL; int err; @@ -1311,7 +1312,7 @@ void hci_req_clear_adv_instance(struct hci_dev *hdev, struct hci_request *req, rem_inst = adv_instance->instance; err = hci_remove_adv_instance(hdev, rem_inst); if (!err) - mgmt_advertising_removed(NULL, hdev, rem_inst); + mgmt_advertising_removed(sk, hdev, rem_inst); } } else { adv_instance = hci_find_adv_instance(hdev, instance); @@ -1325,7 +1326,7 @@ void hci_req_clear_adv_instance(struct hci_dev *hdev, struct hci_request *req, err = hci_remove_adv_instance(hdev, instance); if (!err) - mgmt_advertising_removed(NULL, hdev, instance); + mgmt_advertising_removed(sk, hdev, instance); } } diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index b2d044bdc732..ac1e11006f38 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -73,8 +73,9 @@ void __hci_req_update_scan_rsp_data(struct hci_request *req, u8 instance); int __hci_req_schedule_adv_instance(struct hci_request *req, u8 instance, bool force); -void hci_req_clear_adv_instance(struct hci_dev *hdev, struct hci_request *req, - u8 instance, bool force); +void hci_req_clear_adv_instance(struct hci_dev *hdev, struct sock *sk, + struct hci_request *req, u8 instance, + bool force); void __hci_req_update_class(struct hci_request *req); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 9071886df194..f9af5f7c2ea2 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -929,7 +929,7 @@ static int clean_up_hci_state(struct hci_dev *hdev) hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); } - hci_req_clear_adv_instance(hdev, NULL, 0x00, false); + hci_req_clear_adv_instance(hdev, NULL, NULL, 0x00, false); if (hci_dev_test_flag(hdev, HCI_LE_ADV)) __hci_req_disable_advertising(&req); @@ -1697,7 +1697,7 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) enabled = lmp_host_le_capable(hdev); if (!val) - hci_req_clear_adv_instance(hdev, NULL, 0x00, true); + hci_req_clear_adv_instance(hdev, NULL, NULL, 0x00, true); if (!hdev_is_powered(hdev) || val == enabled) { bool changed = false; @@ -6182,7 +6182,7 @@ static int remove_advertising(struct sock *sk, struct hci_dev *hdev, hci_req_init(&req, hdev); - hci_req_clear_adv_instance(hdev, &req, cp->instance, true); + hci_req_clear_adv_instance(hdev, sk, &req, cp->instance, true); if (list_empty(&hdev->adv_instances)) __hci_req_disable_advertising(&req); From 5504c3a31061704512707bb23bd7835e8a5281e4 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Mon, 29 Aug 2016 06:19:46 +0200 Subject: [PATCH 1204/1715] Bluetooth: Use individual flags for certain management events Instead of hiding everything behind a general managment events flag, introduce indivdual flags that allow fine control over which events are send to a given management channel. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci.h | 5 ++++- net/bluetooth/hci_sock.c | 5 ++++- net/bluetooth/mgmt.c | 32 +++++++++++++------------------- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 0aac123b5eee..ddb9accac3a5 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -208,7 +208,10 @@ enum { HCI_MGMT_INDEX_EVENTS, HCI_MGMT_UNCONF_INDEX_EVENTS, HCI_MGMT_EXT_INDEX_EVENTS, - HCI_MGMT_GENERIC_EVENTS, + HCI_MGMT_OPTION_EVENTS, + HCI_MGMT_SETTING_EVENTS, + HCI_MGMT_DEV_CLASS_EVENTS, + HCI_MGMT_LOCAL_NAME_EVENTS, HCI_MGMT_OOB_DATA_EVENTS, }; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 576ea48631b9..d37c2243157b 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1164,7 +1164,10 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, hci_sock_set_flag(sk, HCI_MGMT_INDEX_EVENTS); hci_sock_set_flag(sk, HCI_MGMT_UNCONF_INDEX_EVENTS); - hci_sock_set_flag(sk, HCI_MGMT_GENERIC_EVENTS); + hci_sock_set_flag(sk, HCI_MGMT_OPTION_EVENTS); + hci_sock_set_flag(sk, HCI_MGMT_SETTING_EVENTS); + hci_sock_set_flag(sk, HCI_MGMT_DEV_CLASS_EVENTS); + hci_sock_set_flag(sk, HCI_MGMT_LOCAL_NAME_EVENTS); } break; } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index f9af5f7c2ea2..469f5cc3109b 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -256,13 +256,6 @@ static int mgmt_limited_event(u16 event, struct hci_dev *hdev, void *data, flag, skip_sk); } -static int mgmt_generic_event(u16 event, struct hci_dev *hdev, void *data, - u16 len, struct sock *skip_sk) -{ - return mgmt_send_event(event, hdev, HCI_CHANNEL_CONTROL, data, len, - HCI_MGMT_GENERIC_EVENTS, skip_sk); -} - static int mgmt_event(u16 event, struct hci_dev *hdev, void *data, u16 len, struct sock *skip_sk) { @@ -579,8 +572,8 @@ static int new_options(struct hci_dev *hdev, struct sock *skip) { __le32 options = get_missing_options(hdev); - return mgmt_generic_event(MGMT_EV_NEW_CONFIG_OPTIONS, hdev, &options, - sizeof(options), skip); + return mgmt_limited_event(MGMT_EV_NEW_CONFIG_OPTIONS, hdev, &options, + sizeof(options), HCI_MGMT_OPTION_EVENTS, skip); } static int send_options_rsp(struct sock *sk, u16 opcode, struct hci_dev *hdev) @@ -1007,8 +1000,8 @@ static int new_settings(struct hci_dev *hdev, struct sock *skip) { __le32 ev = cpu_to_le32(get_current_settings(hdev)); - return mgmt_generic_event(MGMT_EV_NEW_SETTINGS, hdev, &ev, - sizeof(ev), skip); + return mgmt_limited_event(MGMT_EV_NEW_SETTINGS, hdev, &ev, + sizeof(ev), HCI_MGMT_SETTING_EVENTS, skip); } int mgmt_new_settings(struct hci_dev *hdev) @@ -3000,8 +2993,8 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data, if (err < 0) goto failed; - err = mgmt_generic_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, - data, len, sk); + err = mgmt_limited_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, data, + len, HCI_MGMT_LOCAL_NAME_EVENTS, sk); goto failed; } @@ -6502,8 +6495,9 @@ void __mgmt_power_off(struct hci_dev *hdev) mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status); if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0) - mgmt_generic_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, - zero_cod, sizeof(zero_cod), NULL); + mgmt_limited_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, + zero_cod, sizeof(zero_cod), + HCI_MGMT_DEV_CLASS_EVENTS, NULL); new_settings(hdev, match.sk); @@ -7100,8 +7094,8 @@ void mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class, mgmt_pending_foreach(MGMT_OP_REMOVE_UUID, hdev, sk_lookup, &match); if (!status) - mgmt_generic_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, - dev_class, 3, NULL); + mgmt_limited_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, dev_class, + 3, HCI_MGMT_DEV_CLASS_EVENTS, NULL); if (match.sk) sock_put(match.sk); @@ -7130,8 +7124,8 @@ void mgmt_set_local_name_complete(struct hci_dev *hdev, u8 *name, u8 status) return; } - mgmt_generic_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, &ev, sizeof(ev), - cmd ? cmd->sk : NULL); + mgmt_limited_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, &ev, sizeof(ev), + HCI_MGMT_LOCAL_NAME_EVENTS, cmd ? cmd->sk : NULL); } static inline bool has_uuid(u8 *uuid, u16 uuid_count, u8 (*uuids)[16]) From 56f787c5024de7829f8cccce7569feb520829baf Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Mon, 29 Aug 2016 06:19:47 +0200 Subject: [PATCH 1205/1715] Bluetooth: Fix wrong Get Clock Information return parameters The address information of the Get Clock Information return parameters is copying from a different memory location. It uses &cmd->param while it actually needs to be cmd->param. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/mgmt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 469f5cc3109b..0c83dd36b7e3 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -4869,7 +4869,7 @@ static int clock_info_cmd_complete(struct mgmt_pending_cmd *cmd, u8 status) int err; memset(&rp, 0, sizeof(rp)); - memcpy(&rp.addr, &cmd->param, sizeof(rp.addr)); + memcpy(&rp.addr, cmd->param, sizeof(rp.addr)); if (status) goto complete; From 9db5c62951871c33e4443fe433e234419cf574d2 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Mon, 29 Aug 2016 06:31:57 +0200 Subject: [PATCH 1206/1715] Bluetooth: Use command status event for Set IO Capability errors In case of failure, the Set IO Capability command is suppose to return command status and not command complete. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/mgmt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 0c83dd36b7e3..47efdb4a669a 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2513,8 +2513,8 @@ static int set_io_capability(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG(""); if (cp->io_capability > SMP_IO_KEYBOARD_DISPLAY) - return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_IO_CAPABILITY, - MGMT_STATUS_INVALID_PARAMS, NULL, 0); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_IO_CAPABILITY, + MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); From 3d4e2fb64111ffb5dc737daf25f5434bf39bee5f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 29 Aug 2016 14:36:18 +0200 Subject: [PATCH 1207/1715] Bluetooth: add WCNSS dependency for HCI driver The newly added bluetooth driver is based on the soc-specific support, but lacks the obvious compile-time dependency on that: drivers/bluetooth/btqcomsmd.o: In function `btqcomsmd_probe': btqcomsmd.c:(.text.btqcomsmd_probe+0x40): undefined reference to `qcom_wcnss_open_channel' btqcomsmd.c:(.text.btqcomsmd_probe+0x5c): undefined reference to `qcom_wcnss_open_channel' Makefile:969: recipe for target 'vmlinux' failed Fixes: 90c107dc8b2c ("Bluetooth: Introduce Qualcomm WCNSS SMD based HCI driver") Signed-off-by: Arnd Bergmann Signed-off-by: Marcel Holtmann --- drivers/bluetooth/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig index 2c481911f1ce..43e9f9328df8 100644 --- a/drivers/bluetooth/Kconfig +++ b/drivers/bluetooth/Kconfig @@ -333,7 +333,7 @@ config BT_WILINK config BT_QCOMSMD tristate "Qualcomm SMD based HCI support" - depends on QCOM_SMD + depends on QCOM_SMD && QCOM_WCNSS_CTRL select BT_QCA help Qualcomm SMD based HCI driver. From df1cb87af9f24527a8932e4d195d49ffab1168d2 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Tue, 30 Aug 2016 05:00:34 +0200 Subject: [PATCH 1208/1715] Bluetooth: Introduce helper functions for socket cookie handling Instead of manually allocating cookie information each time, use helper functions for generating and releasing cookies. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_sock.c | 41 ++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index d37c2243157b..804208d48368 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -84,6 +84,33 @@ u32 hci_sock_get_cookie(struct sock *sk) return hci_pi(sk)->cookie; } +static bool hci_sock_gen_cookie(struct sock *sk) +{ + int id = hci_pi(sk)->cookie; + + if (!id) { + id = ida_simple_get(&sock_cookie_ida, 1, 0, GFP_KERNEL); + if (id < 0) + id = 0xffffffff; + + hci_pi(sk)->cookie = id; + get_task_comm(hci_pi(sk)->comm, current); + return true; + } + + return false; +} + +static void hci_sock_free_cookie(struct sock *sk) +{ + int id = hci_pi(sk)->cookie; + + if (id) { + hci_pi(sk)->cookie = 0xffffffff; + ida_simple_remove(&sock_cookie_ida, id); + } +} + static inline int hci_test_bit(int nr, const void *addr) { return *((const __u32 *) addr + (nr >> 5)) & ((__u32) 1 << (nr & 31)); @@ -753,7 +780,6 @@ static int hci_sock_release(struct socket *sock) struct sock *sk = sock->sk; struct hci_dev *hdev; struct sk_buff *skb; - int id; BT_DBG("sock %p sk %p", sock, sk); @@ -767,8 +793,6 @@ static int hci_sock_release(struct socket *sock) atomic_dec(&monitor_promisc); break; case HCI_CHANNEL_CONTROL: - id = hci_pi(sk)->cookie; - /* Send event to monitor */ skb = create_monitor_ctrl_close(sk); if (skb) { @@ -777,8 +801,7 @@ static int hci_sock_release(struct socket *sock) kfree_skb(skb); } - hci_pi(sk)->cookie = 0xffffffff; - ida_simple_remove(&sock_cookie_ida, id); + hci_sock_free_cookie(sk); break; } @@ -1145,14 +1168,8 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, */ if (haddr.hci_channel == HCI_CHANNEL_CONTROL) { struct sk_buff *skb; - int id; - id = ida_simple_get(&sock_cookie_ida, 1, 0, GFP_KERNEL); - if (id < 0) - id = 0xffffffff; - - hci_pi(sk)->cookie = id; - get_task_comm(hci_pi(sk)->comm, current); + hci_sock_gen_cookie(sk); /* Send event to monitor */ skb = create_monitor_ctrl_open(sk); From 9e8305b39bfa23a83b932007654097f4676c2ba2 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Tue, 30 Aug 2016 05:00:35 +0200 Subject: [PATCH 1209/1715] Bluetooth: Use numbers for subsystem version string Instead of keeping a version string around, use version and revision numbers and then stringify them for use as module parameter. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/bluetooth.h | 3 ++- net/bluetooth/af_bluetooth.c | 10 +++++++--- net/bluetooth/hci_sock.c | 4 ++-- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 69b5174168b7..d705bcf40710 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -29,7 +29,8 @@ #include #include -#define BT_SUBSYS_VERSION "2.21" +#define BT_SUBSYS_VERSION 2 +#define BT_SUBSYS_REVISION 21 #ifndef AF_BLUETOOTH #define AF_BLUETOOTH 31 diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 1d96ff3a8d87..1aff2da9bc74 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -26,6 +26,7 @@ #include #include +#include #include #include @@ -713,13 +714,16 @@ static struct net_proto_family bt_sock_family_ops = { struct dentry *bt_debugfs; EXPORT_SYMBOL_GPL(bt_debugfs); +#define VERSION __stringify(BT_SUBSYS_VERSION) "." \ + __stringify(BT_SUBSYS_REVISION) + static int __init bt_init(void) { int err; sock_skb_cb_check_size(sizeof(struct bt_skb_cb)); - BT_INFO("Core ver %s", BT_SUBSYS_VERSION); + BT_INFO("Core ver %s", VERSION); err = bt_selftest(); if (err < 0) @@ -797,7 +801,7 @@ subsys_initcall(bt_init); module_exit(bt_exit); MODULE_AUTHOR("Marcel Holtmann "); -MODULE_DESCRIPTION("Bluetooth Core ver " BT_SUBSYS_VERSION); -MODULE_VERSION(BT_SUBSYS_VERSION); +MODULE_DESCRIPTION("Bluetooth Core ver " VERSION); +MODULE_VERSION(VERSION); MODULE_LICENSE("GPL"); MODULE_ALIAS_NETPROTO(PF_BLUETOOTH); diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 804208d48368..a4227c777d16 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1117,8 +1117,8 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, send_monitor_note(sk, "Linux version %s (%s)", init_utsname()->release, init_utsname()->machine); - send_monitor_note(sk, "Bluetooth subsystem version %s", - BT_SUBSYS_VERSION); + send_monitor_note(sk, "Bluetooth subsystem version %u.%u", + BT_SUBSYS_VERSION, BT_SUBSYS_REVISION); send_monitor_replay(sk); send_monitor_control_replay(sk); From 0ef2c42f8c4e372bad16f67dc0f4b15b9be910f6 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Tue, 30 Aug 2016 05:00:36 +0200 Subject: [PATCH 1210/1715] Bluetooth: Send control open and close only when cookie is present Only when the cookie has been assigned, then send the open and close monitor messages. Also if the socket is bound to a device, then include the index into the message. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_sock.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index a4227c777d16..0deca758fd9e 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -483,6 +483,10 @@ static struct sk_buff *create_monitor_ctrl_open(struct sock *sk) u8 ver[3]; u32 flags; + /* No message needed when cookie is not present */ + if (!hci_pi(sk)->cookie) + return NULL; + skb = bt_skb_alloc(14 + TASK_COMM_LEN , GFP_ATOMIC); if (!skb) return NULL; @@ -501,7 +505,10 @@ static struct sk_buff *create_monitor_ctrl_open(struct sock *sk) hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE); hdr->opcode = cpu_to_le16(HCI_MON_CTRL_OPEN); - hdr->index = cpu_to_le16(HCI_DEV_NONE); + if (hci_pi(sk)->hdev) + hdr->index = cpu_to_le16(hci_pi(sk)->hdev->id); + else + hdr->index = cpu_to_le16(HCI_DEV_NONE); hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); return skb; @@ -512,6 +519,10 @@ static struct sk_buff *create_monitor_ctrl_close(struct sock *sk) struct hci_mon_hdr *hdr; struct sk_buff *skb; + /* No message needed when cookie is not present */ + if (!hci_pi(sk)->cookie) + return NULL; + skb = bt_skb_alloc(4, GFP_ATOMIC); if (!skb) return NULL; @@ -522,7 +533,10 @@ static struct sk_buff *create_monitor_ctrl_close(struct sock *sk) hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE); hdr->opcode = cpu_to_le16(HCI_MON_CTRL_CLOSE); - hdr->index = cpu_to_le16(HCI_DEV_NONE); + if (hci_pi(sk)->hdev) + hdr->index = cpu_to_le16(hci_pi(sk)->hdev->id); + else + hdr->index = cpu_to_le16(HCI_DEV_NONE); hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); return skb; From 5a6d2cf5f18b5afbae0b1b450070bbba50f1e3e0 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Tue, 30 Aug 2016 05:00:37 +0200 Subject: [PATCH 1211/1715] Bluetooth: Assign the channel early when binding HCI sockets Assignment of the hci_pi(sk)->channel should be done early when binding the HCI socket. This avoids confusion with the RAW channel that is used for legacy access. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_sock.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 0deca758fd9e..ca13fac1c132 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1045,6 +1045,7 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, atomic_inc(&hdev->promisc); } + hci_pi(sk)->channel = haddr.hci_channel; hci_pi(sk)->hdev = hdev; break; @@ -1107,9 +1108,10 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, } } - atomic_inc(&hdev->promisc); - + hci_pi(sk)->channel = haddr.hci_channel; hci_pi(sk)->hdev = hdev; + + atomic_inc(&hdev->promisc); break; case HCI_CHANNEL_MONITOR: @@ -1123,6 +1125,8 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, goto done; } + hci_pi(sk)->channel = haddr.hci_channel; + /* The monitor interface is restricted to CAP_NET_RAW * capabilities and with that implicitly trusted. */ @@ -1149,6 +1153,8 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, err = -EPERM; goto done; } + + hci_pi(sk)->channel = haddr.hci_channel; break; default: @@ -1170,6 +1176,8 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, if (capable(CAP_NET_ADMIN)) hci_sock_set_flag(sk, HCI_SOCK_TRUSTED); + hci_pi(sk)->channel = haddr.hci_channel; + /* At the moment the index and unconfigured index events * are enabled unconditionally. Setting them on each * socket when binding keeps this functionality. They @@ -1180,7 +1188,7 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, * received by untrusted users. Example for such events * are changes to settings, class of device, name etc. */ - if (haddr.hci_channel == HCI_CHANNEL_CONTROL) { + if (hci_pi(sk)->channel == HCI_CHANNEL_CONTROL) { struct sk_buff *skb; hci_sock_gen_cookie(sk); @@ -1203,8 +1211,6 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, break; } - - hci_pi(sk)->channel = haddr.hci_channel; sk->sk_state = BT_BOUND; done: From d0bef1d26fb6fdad818f3d15a178d51e2a8478ae Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Tue, 30 Aug 2016 05:00:38 +0200 Subject: [PATCH 1212/1715] Bluetooth: Add extra channel checks for control open/close messages The control open and close monitoring events require special channel checks to ensure messages are only send when the right events happen. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_sock.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index ca13fac1c132..b22efe272f7e 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -479,7 +479,7 @@ static struct sk_buff *create_monitor_ctrl_open(struct sock *sk) { struct hci_mon_hdr *hdr; struct sk_buff *skb; - u16 format = 0x0002; + u16 format; u8 ver[3]; u32 flags; @@ -487,11 +487,20 @@ static struct sk_buff *create_monitor_ctrl_open(struct sock *sk) if (!hci_pi(sk)->cookie) return NULL; + switch (hci_pi(sk)->channel) { + case HCI_CHANNEL_CONTROL: + format = 0x0002; + mgmt_fill_version_info(ver); + break; + default: + /* No message for unsupported format */ + return NULL; + } + skb = bt_skb_alloc(14 + TASK_COMM_LEN , GFP_ATOMIC); if (!skb) return NULL; - mgmt_fill_version_info(ver); flags = hci_sock_test_flag(sk, HCI_SOCK_TRUSTED) ? 0x1 : 0x0; put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4)); @@ -523,6 +532,14 @@ static struct sk_buff *create_monitor_ctrl_close(struct sock *sk) if (!hci_pi(sk)->cookie) return NULL; + switch (hci_pi(sk)->channel) { + case HCI_CHANNEL_CONTROL: + break; + default: + /* No message for unsupported format */ + return NULL; + } + skb = bt_skb_alloc(4, GFP_ATOMIC); if (!skb) return NULL; @@ -652,9 +669,6 @@ static void send_monitor_control_replay(struct sock *mon_sk) sk_for_each(sk, &hci_sk_list.head) { struct sk_buff *skb; - if (hci_pi(sk)->channel != HCI_CHANNEL_CONTROL) - continue; - skb = create_monitor_ctrl_open(sk); if (!skb) continue; From f81f5b2db8692ff1d2d5f4db1fde58e67aa976a3 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Tue, 30 Aug 2016 05:00:39 +0200 Subject: [PATCH 1213/1715] Bluetooth: Send control open and close messages for HCI raw sockets When opening and closing HCI raw sockets their main usage is for legacy userspace. To track interaction with the modern mgmt interface, send open and close monitoring messages for these action. The HCI raw sockets is special since it supports unbound ioctl operation and for that special case delay the notification message until at least one ioctl has been executed. The difference between a bound and unbound socket will be detailed by the fact the HCI index is present or not. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_sock.c | 48 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index b22efe272f7e..c7772436f508 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -488,6 +488,11 @@ static struct sk_buff *create_monitor_ctrl_open(struct sock *sk) return NULL; switch (hci_pi(sk)->channel) { + case HCI_CHANNEL_RAW: + format = 0x0000; + ver[0] = BT_SUBSYS_VERSION; + put_unaligned_le16(BT_SUBSYS_REVISION, ver + 1); + break; case HCI_CHANNEL_CONTROL: format = 0x0002; mgmt_fill_version_info(ver); @@ -533,6 +538,7 @@ static struct sk_buff *create_monitor_ctrl_close(struct sock *sk) return NULL; switch (hci_pi(sk)->channel) { + case HCI_CHANNEL_RAW: case HCI_CHANNEL_CONTROL: break; default: @@ -820,6 +826,7 @@ static int hci_sock_release(struct socket *sock) case HCI_CHANNEL_MONITOR: atomic_dec(&monitor_promisc); break; + case HCI_CHANNEL_RAW: case HCI_CHANNEL_CONTROL: /* Send event to monitor */ skb = create_monitor_ctrl_close(sk); @@ -958,6 +965,27 @@ static int hci_sock_ioctl(struct socket *sock, unsigned int cmd, goto done; } + /* When calling an ioctl on an unbound raw socket, then ensure + * that the monitor gets informed. Ensure that the resulting event + * is only send once by checking if the cookie exists or not. The + * socket cookie will be only ever generated once for the lifetime + * of a given socket. + */ + if (hci_sock_gen_cookie(sk)) { + struct sk_buff *skb; + + if (capable(CAP_NET_ADMIN)) + hci_sock_set_flag(sk, HCI_SOCK_TRUSTED); + + /* Send event to monitor */ + skb = create_monitor_ctrl_open(sk); + if (skb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(skb); + } + } + release_sock(sk); switch (cmd) { @@ -1061,6 +1089,26 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, hci_pi(sk)->channel = haddr.hci_channel; hci_pi(sk)->hdev = hdev; + + /* Only send the event to monitor when a new cookie has + * been generated. An existing cookie means that an unbound + * socket has seen an ioctl and that triggered the cookie + * generation and sending of the monitor event. + */ + if (hci_sock_gen_cookie(sk)) { + struct sk_buff *skb; + + if (capable(CAP_NET_ADMIN)) + hci_sock_set_flag(sk, HCI_SOCK_TRUSTED); + + /* Send event to monitor */ + skb = create_monitor_ctrl_open(sk); + if (skb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(skb); + } + } break; case HCI_CHANNEL_USER: From f4cdbb3f25c15c17a952deae1f2e0db6df8f1948 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Tue, 30 Aug 2016 05:00:40 +0200 Subject: [PATCH 1214/1715] Bluetooth: Handle HCI raw socket transition from unbound to bound In case an unbound HCI raw socket is later on bound, ensure that the monitor notification messages indicate a close and re-open. None of the userspace tools use the socket this, but it is actually possible to use an ioctl on an unbound socket and then later bind it. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_sock.c | 53 +++++++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 17 deletions(-) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index c7772436f508..83e9fdb712e5 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1049,6 +1049,7 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, struct sockaddr_hci haddr; struct sock *sk = sock->sk; struct hci_dev *hdev = NULL; + struct sk_buff *skb; int len, err = 0; BT_DBG("sock %p sk %p", sock, sk); @@ -1088,27 +1089,34 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, } hci_pi(sk)->channel = haddr.hci_channel; - hci_pi(sk)->hdev = hdev; - /* Only send the event to monitor when a new cookie has - * been generated. An existing cookie means that an unbound - * socket has seen an ioctl and that triggered the cookie - * generation and sending of the monitor event. - */ - if (hci_sock_gen_cookie(sk)) { - struct sk_buff *skb; - - if (capable(CAP_NET_ADMIN)) - hci_sock_set_flag(sk, HCI_SOCK_TRUSTED); - - /* Send event to monitor */ - skb = create_monitor_ctrl_open(sk); + if (!hci_sock_gen_cookie(sk)) { + /* In the case when a cookie has already been assigned, + * then there has been already an ioctl issued against + * an unbound socket and with that triggerd an open + * notification. Send a close notification first to + * allow the state transition to bounded. + */ + skb = create_monitor_ctrl_close(sk); if (skb) { hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, HCI_SOCK_TRUSTED, NULL); kfree_skb(skb); } } + + if (capable(CAP_NET_ADMIN)) + hci_sock_set_flag(sk, HCI_SOCK_TRUSTED); + + hci_pi(sk)->hdev = hdev; + + /* Send event to monitor */ + skb = create_monitor_ctrl_open(sk); + if (skb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(skb); + } break; case HCI_CHANNEL_USER: @@ -1251,9 +1259,20 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, * are changes to settings, class of device, name etc. */ if (hci_pi(sk)->channel == HCI_CHANNEL_CONTROL) { - struct sk_buff *skb; - - hci_sock_gen_cookie(sk); + if (!hci_sock_gen_cookie(sk)) { + /* In the case when a cookie has already been + * assigned, this socket will transtion from + * a raw socket into a control socket. To + * allow for a clean transtion, send the + * close notification first. + */ + skb = create_monitor_ctrl_close(sk); + if (skb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(skb); + } + } /* Send event to monitor */ skb = create_monitor_ctrl_open(sk); From fac9a6021b4e8dc8b2112a8e133936c0daf7ff94 Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Tue, 30 Aug 2016 22:42:53 +0530 Subject: [PATCH 1215/1715] Bluetooth: Remove deprecated create_singlethread_workqueue The workqueue "workqueue" queues multiple work items viz &qca->ws_awake_rx &qca->ws_rx_vote_off, &qca->ws_awake_device, &qca->ws_tx_vote_off which require strict execution ordering. Hence, an ordered dedicated workqueue has been used to replace the deprecated create_singlethread_workqueue instance. WQ_MEM_RECLAIM has not been set since the driver is not being used on a memory reclaim path. Signed-off-by: Bhaktipriya Shridhar Signed-off-by: Marcel Holtmann --- drivers/bluetooth/hci_qca.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 683c2b642057..6c867fbc56a7 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -397,7 +397,7 @@ static int qca_open(struct hci_uart *hu) skb_queue_head_init(&qca->txq); skb_queue_head_init(&qca->tx_wait_q); spin_lock_init(&qca->hci_ibs_lock); - qca->workqueue = create_singlethread_workqueue("qca_wq"); + qca->workqueue = alloc_ordered_workqueue("qca_wq", 0); if (!qca->workqueue) { BT_ERR("QCA Workqueue not initialized properly"); kfree(qca); From 418678b01aca849b4f86224e609610ce87a9bdc4 Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Thu, 1 Sep 2016 17:22:37 +0200 Subject: [PATCH 1216/1715] Bluetooth: btusb: Mark CW6622 devices to have broken link key commands Conwise CW6622 seems to have a problem with the stored link key commands so just mark it as broken. < HCI Command: Read Local Supported Features (0x04|0x0003) plen 0 > HCI Event: Command Complete (0x0e) plen 12 Read Local Supported Features (0x04|0x0003) ncmd 1 status 0x00 Features: 0xff 0x3e 0x85 0x38 0x18 0x18 0x00 0x00 < HCI Command: Read Local Version Information (0x04|0x0001) plen 0 > HCI Event: Command Complete (0x0e) plen 12 Read Local Version Information (0x04|0x0001) ncmd 1 status 0x00 HCI Version: 2.0 (0x3) HCI Revision: 0x1f4 LMP Version: 2.0 (0x3) LMP Subversion: 0x1f4 Manufacturer: CONWISE Technology Corporation Ltd (66) ... < HCI Command: Read Local Supported Commands (0x04|0x0002) plen 0 > HCI Event: Command Complete (0x0e) plen 68 Read Local Supported Commands (0x04|0x0002) ncmd 1 status 0x00 Commands: 7fffef03cedfffffffffff1ff20ff8ff3f ... < HCI Command: Read Stored Link Key (0x03|0x000d) plen 7 bdaddr 00:00:00:00:00:00 all 1 > HCI Event: Command Complete (0x0e) plen 8 Read Stored Link Key (0x03|0x000d) ncmd 1 status 0x11 max 0 num 0 Error: Unsupported Feature or Parameter Value Signed-off-by: Szymon Janc Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btusb.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 80ae854a0bee..9ebd73dd7915 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -62,6 +62,7 @@ static struct usb_driver btusb_driver; #define BTUSB_REALTEK 0x20000 #define BTUSB_BCM2045 0x40000 #define BTUSB_IFNUM_2 0x80000 +#define BTUSB_CW6622 0x100000 static const struct usb_device_id btusb_table[] = { /* Generic Bluetooth USB device */ @@ -291,7 +292,8 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x0400, 0x080a), .driver_info = BTUSB_BROKEN_ISOC }, /* CONWISE Technology based adapters with buggy SCO support */ - { USB_DEVICE(0x0e5e, 0x6622), .driver_info = BTUSB_BROKEN_ISOC }, + { USB_DEVICE(0x0e5e, 0x6622), + .driver_info = BTUSB_BROKEN_ISOC | BTUSB_CW6622}, /* Roper Class 1 Bluetooth Dongle (Silicon Wave based) */ { USB_DEVICE(0x1310, 0x0001), .driver_info = BTUSB_SWAVE }, @@ -2845,6 +2847,9 @@ static int btusb_probe(struct usb_interface *intf, hdev->send = btusb_send_frame; hdev->notify = btusb_notify; + if (id->driver_info & BTUSB_CW6622) + set_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks); + if (id->driver_info & BTUSB_BCM2045) set_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks); From 321c6feed2519a2691f65e41c4d62332d6ee3d52 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 1 Sep 2016 16:46:23 +0200 Subject: [PATCH 1217/1715] Bluetooth: Add framework for Extended Controller Information MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This command is used to retrieve the current state and basic information of a controller. It is typically used right after getting the response to the Read Controller Index List command or an Index Added event (or its extended counterparts). When any of the values in the EIR_Data field changes, the event Extended Controller Information Changed will be used to inform clients about the updated information. Signed-off-by: Marcel Holtmann Signed-off-by: Michał Narajowski --- include/net/bluetooth/hci.h | 1 + include/net/bluetooth/mgmt.h | 18 +++++++++++ net/bluetooth/mgmt.c | 62 ++++++++++++++++++++++++++++++++++-- 3 files changed, 79 insertions(+), 2 deletions(-) diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index ddb9accac3a5..99aa5e5e3100 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -208,6 +208,7 @@ enum { HCI_MGMT_INDEX_EVENTS, HCI_MGMT_UNCONF_INDEX_EVENTS, HCI_MGMT_EXT_INDEX_EVENTS, + HCI_MGMT_EXT_INFO_EVENTS, HCI_MGMT_OPTION_EVENTS, HCI_MGMT_SETTING_EVENTS, HCI_MGMT_DEV_CLASS_EVENTS, diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index 7647964b1efa..611b243713ea 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -586,6 +586,18 @@ struct mgmt_rp_get_adv_size_info { #define MGMT_OP_START_LIMITED_DISCOVERY 0x0041 +#define MGMT_OP_READ_EXT_INFO 0x0042 +#define MGMT_READ_EXT_INFO_SIZE 0 +struct mgmt_rp_read_ext_info { + bdaddr_t bdaddr; + __u8 version; + __le16 manufacturer; + __le32 supported_settings; + __le32 current_settings; + __le16 eir_len; + __u8 eir[0]; +} __packed; + #define MGMT_EV_CMD_COMPLETE 0x0001 struct mgmt_ev_cmd_complete { __le16 opcode; @@ -800,3 +812,9 @@ struct mgmt_ev_advertising_added { struct mgmt_ev_advertising_removed { __u8 instance; } __packed; + +#define MGMT_EV_EXT_INFO_CHANGED 0x0025 +struct mgmt_ev_ext_info_changed { + __le16 eir_len; + __u8 eir[0]; +} __packed; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 47efdb4a669a..69001f415efa 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -104,6 +104,7 @@ static const u16 mgmt_commands[] = { MGMT_OP_REMOVE_ADVERTISING, MGMT_OP_GET_ADV_SIZE_INFO, MGMT_OP_START_LIMITED_DISCOVERY, + MGMT_OP_READ_EXT_INFO, }; static const u16 mgmt_events[] = { @@ -141,6 +142,7 @@ static const u16 mgmt_events[] = { MGMT_EV_LOCAL_OOB_DATA_UPDATED, MGMT_EV_ADVERTISING_ADDED, MGMT_EV_ADVERTISING_REMOVED, + MGMT_EV_EXT_INFO_CHANGED, }; static const u16 mgmt_untrusted_commands[] = { @@ -149,6 +151,7 @@ static const u16 mgmt_untrusted_commands[] = { MGMT_OP_READ_UNCONF_INDEX_LIST, MGMT_OP_READ_CONFIG_INFO, MGMT_OP_READ_EXT_INDEX_LIST, + MGMT_OP_READ_EXT_INFO, }; static const u16 mgmt_untrusted_events[] = { @@ -162,6 +165,7 @@ static const u16 mgmt_untrusted_events[] = { MGMT_EV_NEW_CONFIG_OPTIONS, MGMT_EV_EXT_INDEX_ADDED, MGMT_EV_EXT_INDEX_REMOVED, + MGMT_EV_EXT_INFO_CHANGED, }; #define CACHE_TIMEOUT msecs_to_jiffies(2 * 1000) @@ -862,6 +866,52 @@ static int read_controller_info(struct sock *sk, struct hci_dev *hdev, sizeof(rp)); } +static int read_ext_controller_info(struct sock *sk, struct hci_dev *hdev, + void *data, u16 data_len) +{ + struct mgmt_rp_read_ext_info rp; + + BT_DBG("sock %p %s", sk, hdev->name); + + hci_dev_lock(hdev); + + memset(&rp, 0, sizeof(rp)); + + bacpy(&rp.bdaddr, &hdev->bdaddr); + + rp.version = hdev->hci_ver; + rp.manufacturer = cpu_to_le16(hdev->manufacturer); + + rp.supported_settings = cpu_to_le32(get_supported_settings(hdev)); + rp.current_settings = cpu_to_le32(get_current_settings(hdev)); + + rp.eir_len = cpu_to_le16(0); + + hci_dev_unlock(hdev); + + /* If this command is called at least once, then the events + * for class of device and local name changes are disabled + * and only the new extended controller information event + * is used. + */ + hci_sock_set_flag(sk, HCI_MGMT_EXT_INFO_EVENTS); + hci_sock_clear_flag(sk, HCI_MGMT_DEV_CLASS_EVENTS); + hci_sock_clear_flag(sk, HCI_MGMT_LOCAL_NAME_EVENTS); + + return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_READ_EXT_INFO, 0, &rp, + sizeof(rp)); +} + +static int ext_info_changed(struct hci_dev *hdev, struct sock *skip) +{ + struct mgmt_ev_ext_info_changed ev; + + ev.eir_len = cpu_to_le16(0); + + return mgmt_limited_event(MGMT_EV_EXT_INFO_CHANGED, hdev, &ev, + sizeof(ev), HCI_MGMT_EXT_INFO_EVENTS, skip); +} + static int send_settings_rsp(struct sock *sk, u16 opcode, struct hci_dev *hdev) { __le32 settings = cpu_to_le32(get_current_settings(hdev)); @@ -2995,6 +3045,7 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data, err = mgmt_limited_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, data, len, HCI_MGMT_LOCAL_NAME_EVENTS, sk); + ext_info_changed(hdev, sk); goto failed; } @@ -6356,6 +6407,8 @@ static const struct hci_mgmt_handler mgmt_handlers[] = { { remove_advertising, MGMT_REMOVE_ADVERTISING_SIZE }, { get_adv_size_info, MGMT_GET_ADV_SIZE_INFO_SIZE }, { start_limited_discovery, MGMT_START_DISCOVERY_SIZE }, + { read_ext_controller_info,MGMT_READ_EXT_INFO_SIZE, + HCI_MGMT_UNTRUSTED }, }; void mgmt_index_added(struct hci_dev *hdev) @@ -6494,10 +6547,12 @@ void __mgmt_power_off(struct hci_dev *hdev) mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status); - if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0) + if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0) { mgmt_limited_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, zero_cod, sizeof(zero_cod), HCI_MGMT_DEV_CLASS_EVENTS, NULL); + ext_info_changed(hdev, NULL); + } new_settings(hdev, match.sk); @@ -7093,9 +7148,11 @@ void mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class, mgmt_pending_foreach(MGMT_OP_ADD_UUID, hdev, sk_lookup, &match); mgmt_pending_foreach(MGMT_OP_REMOVE_UUID, hdev, sk_lookup, &match); - if (!status) + if (!status) { mgmt_limited_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, dev_class, 3, HCI_MGMT_DEV_CLASS_EVENTS, NULL); + ext_info_changed(hdev, NULL); + } if (match.sk) sock_put(match.sk); @@ -7126,6 +7183,7 @@ void mgmt_set_local_name_complete(struct hci_dev *hdev, u8 *name, u8 status) mgmt_limited_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, &ev, sizeof(ev), HCI_MGMT_LOCAL_NAME_EVENTS, cmd ? cmd->sk : NULL); + ext_info_changed(hdev, cmd ? cmd->sk : NULL); } static inline bool has_uuid(u8 *uuid, u16 uuid_count, u8 (*uuids)[16]) From 8a0c9f49090fe8ae122fd1bbf7260c8492289386 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Narajowski?= Date: Thu, 1 Sep 2016 16:46:24 +0200 Subject: [PATCH 1218/1715] Bluetooth: Append local name and CoD to Extended Controller Info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds device class, complete local name and short local name to EIR data in Extended Controller Info as specified in docs. Signed-off-by: Michał Narajowski Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 63 +++++++++++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 21 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 69001f415efa..74179b92ef22 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -866,26 +866,58 @@ static int read_controller_info(struct sock *sk, struct hci_dev *hdev, sizeof(rp)); } +static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, u8 *data, + u8 data_len) +{ + eir[eir_len++] = sizeof(type) + data_len; + eir[eir_len++] = type; + memcpy(&eir[eir_len], data, data_len); + eir_len += data_len; + + return eir_len; +} + static int read_ext_controller_info(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { - struct mgmt_rp_read_ext_info rp; + struct mgmt_rp_read_ext_info *rp; + char buff[512]; + u16 eir_len = 0; + u8 name_len; BT_DBG("sock %p %s", sk, hdev->name); hci_dev_lock(hdev); - memset(&rp, 0, sizeof(rp)); + if (hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) + eir_len = eir_append_data(buff, eir_len, + EIR_CLASS_OF_DEV, + hdev->dev_class, 3); - bacpy(&rp.bdaddr, &hdev->bdaddr); + name_len = strlen(hdev->dev_name); + eir_len = eir_append_data(buff, eir_len, EIR_NAME_COMPLETE, + hdev->dev_name, name_len); - rp.version = hdev->hci_ver; - rp.manufacturer = cpu_to_le16(hdev->manufacturer); + name_len = strlen(hdev->short_name); + eir_len = eir_append_data(buff, eir_len, EIR_NAME_SHORT, + hdev->short_name, name_len); - rp.supported_settings = cpu_to_le32(get_supported_settings(hdev)); - rp.current_settings = cpu_to_le32(get_current_settings(hdev)); + rp = kmalloc(sizeof(*rp) + eir_len, GFP_KERNEL); + if (!rp) + return -ENOMEM; - rp.eir_len = cpu_to_le16(0); + memset(rp, 0, sizeof(*rp) + eir_len); + + rp->eir_len = cpu_to_le16(eir_len); + memcpy(rp->eir, buff, eir_len); + + bacpy(&rp->bdaddr, &hdev->bdaddr); + + rp->version = hdev->hci_ver; + rp->manufacturer = cpu_to_le16(hdev->manufacturer); + + rp->supported_settings = cpu_to_le32(get_supported_settings(hdev)); + rp->current_settings = cpu_to_le32(get_current_settings(hdev)); hci_dev_unlock(hdev); @@ -898,8 +930,8 @@ static int read_ext_controller_info(struct sock *sk, struct hci_dev *hdev, hci_sock_clear_flag(sk, HCI_MGMT_DEV_CLASS_EVENTS); hci_sock_clear_flag(sk, HCI_MGMT_LOCAL_NAME_EVENTS); - return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_READ_EXT_INFO, 0, &rp, - sizeof(rp)); + return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_READ_EXT_INFO, 0, rp, + sizeof(*rp) + eir_len); } static int ext_info_changed(struct hci_dev *hdev, struct sock *skip) @@ -5552,17 +5584,6 @@ unlock: return err; } -static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, u8 *data, - u8 data_len) -{ - eir[eir_len++] = sizeof(type) + data_len; - eir[eir_len++] = type; - memcpy(&eir[eir_len], data, data_len); - eir_len += data_len; - - return eir_len; -} - static void read_local_oob_ext_data_complete(struct hci_dev *hdev, u8 status, u16 opcode, struct sk_buff *skb) { From bdca1fd9a6df745857e23c6056494b7fe062b4e6 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Thu, 1 Sep 2016 11:24:57 +0200 Subject: [PATCH 1219/1715] fakelb: fix schedule while atomic This patch changes the spinlock to mutex for the available fakelb phy list. When holding the spinlock the ieee802154_unregister_hw is called which holding the rtnl_mutex, in that case we get a "BUG: sleeping function called from invalid context" error. We simple change the spinlock to mutex which allows to hold the rtnl lock there. Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- drivers/net/ieee802154/fakelb.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ieee802154/fakelb.c b/drivers/net/ieee802154/fakelb.c index 0becf0ac3926..ec387efb61d0 100644 --- a/drivers/net/ieee802154/fakelb.c +++ b/drivers/net/ieee802154/fakelb.c @@ -30,7 +30,7 @@ static int numlbs = 2; static LIST_HEAD(fakelb_phys); -static DEFINE_SPINLOCK(fakelb_phys_lock); +static DEFINE_MUTEX(fakelb_phys_lock); static LIST_HEAD(fakelb_ifup_phys); static DEFINE_RWLOCK(fakelb_ifup_phys_lock); @@ -188,9 +188,9 @@ static int fakelb_add_one(struct device *dev) if (err) goto err_reg; - spin_lock(&fakelb_phys_lock); + mutex_lock(&fakelb_phys_lock); list_add_tail(&phy->list, &fakelb_phys); - spin_unlock(&fakelb_phys_lock); + mutex_unlock(&fakelb_phys_lock); return 0; @@ -222,10 +222,10 @@ static int fakelb_probe(struct platform_device *pdev) return 0; err_slave: - spin_lock(&fakelb_phys_lock); + mutex_lock(&fakelb_phys_lock); list_for_each_entry_safe(phy, tmp, &fakelb_phys, list) fakelb_del(phy); - spin_unlock(&fakelb_phys_lock); + mutex_unlock(&fakelb_phys_lock); return err; } @@ -233,10 +233,10 @@ static int fakelb_remove(struct platform_device *pdev) { struct fakelb_phy *phy, *tmp; - spin_lock(&fakelb_phys_lock); + mutex_lock(&fakelb_phys_lock); list_for_each_entry_safe(phy, tmp, &fakelb_phys, list) fakelb_del(phy); - spin_unlock(&fakelb_phys_lock); + mutex_unlock(&fakelb_phys_lock); return 0; } From aa1638dde75d00e4f549902017d0df48b77e86ff Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 1 Sep 2016 19:48:28 +0200 Subject: [PATCH 1220/1715] Bluetooth: Send control open and close messages for HCI user channels When opening and closing HCI user channel, send monitoring messages to be able to trace its behavior. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_sock.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 83e9fdb712e5..48f9471e7c85 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -493,6 +493,11 @@ static struct sk_buff *create_monitor_ctrl_open(struct sock *sk) ver[0] = BT_SUBSYS_VERSION; put_unaligned_le16(BT_SUBSYS_REVISION, ver + 1); break; + case HCI_CHANNEL_USER: + format = 0x0001; + ver[0] = BT_SUBSYS_VERSION; + put_unaligned_le16(BT_SUBSYS_REVISION, ver + 1); + break; case HCI_CHANNEL_CONTROL: format = 0x0002; mgmt_fill_version_info(ver); @@ -539,6 +544,7 @@ static struct sk_buff *create_monitor_ctrl_close(struct sock *sk) switch (hci_pi(sk)->channel) { case HCI_CHANNEL_RAW: + case HCI_CHANNEL_USER: case HCI_CHANNEL_CONTROL: break; default: @@ -827,6 +833,7 @@ static int hci_sock_release(struct socket *sock) atomic_dec(&monitor_promisc); break; case HCI_CHANNEL_RAW: + case HCI_CHANNEL_USER: case HCI_CHANNEL_CONTROL: /* Send event to monitor */ skb = create_monitor_ctrl_close(sk); @@ -1179,8 +1186,36 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, } hci_pi(sk)->channel = haddr.hci_channel; + + if (!hci_sock_gen_cookie(sk)) { + /* In the case when a cookie has already been assigned, + * this socket will transition from a raw socket into + * an user channel socket. For a clean transition, send + * the close notification first. + */ + skb = create_monitor_ctrl_close(sk); + if (skb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(skb); + } + } + + /* The user channel is restricted to CAP_NET_ADMIN + * capabilities and with that implicitly trusted. + */ + hci_sock_set_flag(sk, HCI_SOCK_TRUSTED); + hci_pi(sk)->hdev = hdev; + /* Send event to monitor */ + skb = create_monitor_ctrl_open(sk); + if (skb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(skb); + } + atomic_inc(&hdev->promisc); break; From baab793225c9badf46309f56982eb1012dbaac80 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sun, 4 Sep 2016 05:13:46 +0200 Subject: [PATCH 1221/1715] Bluetooth: Fix wrong New Settings event when closing HCI User Channel When closing HCI User Channel, the New Settings event was send out to inform about changed settings. However such event is wrong since the exclusive HCI User Channel access is active until the Index Added event has been sent. @ USER Close: test @ MGMT Event: New Settings (0x0006) plen 4 Current settings: 0x00000ad0 Bondable Secure Simple Pairing BR/EDR Low Energy Secure Connections = Close Index: 00:14:EF:22:04:12 @ MGMT Event: Index Added (0x0004) plen 0 Calling __mgmt_power_off from hci_dev_do_close requires an extra check for an active HCI User Channel. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index ddf8432fe8fb..3ac89e9ace71 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1562,6 +1562,7 @@ int hci_dev_do_close(struct hci_dev *hdev) auto_off = hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF); if (!auto_off && hdev->dev_type == HCI_PRIMARY && + !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && hci_dev_test_flag(hdev, HCI_MGMT)) __mgmt_power_off(hdev); From 0676cab47ed18c9cfa884f055a3ba0f9e6fb8e96 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 6 Sep 2016 13:15:49 +0100 Subject: [PATCH 1222/1715] Bluetooth: btqca: remove null checks on edl->data as it is an array edl->data is an array of __u8 so the null check is unneccessary, so remove it. Signed-off-by: Colin Ian King Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btqca.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index 4a6208168850..28afd5d585f9 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -55,8 +55,8 @@ static int rome_patch_ver_req(struct hci_dev *hdev, u32 *rome_version) } edl = (struct edl_event_hdr *)(skb->data); - if (!edl || !edl->data) { - BT_ERR("%s: TLV with no header or no data", hdev->name); + if (!edl) { + BT_ERR("%s: TLV with no header", hdev->name); err = -EILSEQ; goto out; } @@ -224,8 +224,8 @@ static int rome_tlv_send_segment(struct hci_dev *hdev, int idx, int seg_size, } edl = (struct edl_event_hdr *)(skb->data); - if (!edl || !edl->data) { - BT_ERR("%s: TLV with no header or no data", hdev->name); + if (!edl) { + BT_ERR("%s: TLV with no header", hdev->name); err = -EILSEQ; goto out; } From 3c0975a7a1087add3bf873601f0270aa695d7616 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Dalleau?= Date: Thu, 8 Sep 2016 12:00:11 +0200 Subject: [PATCH 1223/1715] Bluetooth: Fix reason code used for rejecting SCO connections MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A comment in the code states that SCO connection should be rejected with the proper error value between 0xd-0xf. The code uses HCI_ERROR_REMOTE_LOW_RESOURCES which is 0x14. This led to following error: < HCI Command: Reject Synchronous Co.. (0x01|0x002a) plen 7 Address: 34:51:C9:EF:02:CA (Apple, Inc.) Reason: Remote Device Terminated due to Low Resources (0x14) > HCI Event: Command Status (0x0f) plen 4 Reject Synchronous Connection Request (0x01|0x002a) ncmd 1 Status: Invalid HCI Command Parameters (0x12) Instead make use of HCI_ERROR_REJ_LIMITED_RESOURCES which is 0xd. Signed-off-by: Frédéric Dalleau Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 9968b1c7c03a..9566ff8e3223 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1717,7 +1717,7 @@ void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn, * function. To be safe hard-code one of the * values that's suitable for SCO. */ - rej.reason = HCI_ERROR_REMOTE_LOW_RESOURCES; + rej.reason = HCI_ERROR_REJ_LIMITED_RESOURCES; hci_req_add(req, HCI_OP_REJECT_SYNC_CONN_REQ, sizeof(rej), &rej); From 4037a7747d7b5a3e5bb4d10fb9ea6e2fd8a23c3b Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 8 Sep 2016 18:07:07 +0200 Subject: [PATCH 1224/1715] Bluetooth: Increase the subsystem minor version number While the subsystem version information are purely informational, increase the minor number due to the addition of user channel and management control monitoring suppport. It is helpful for debugging purposes to see the version numbers change. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/bluetooth.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index d705bcf40710..0a1e21d7bce1 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -30,7 +30,7 @@ #include #define BT_SUBSYS_VERSION 2 -#define BT_SUBSYS_REVISION 21 +#define BT_SUBSYS_REVISION 22 #ifndef AF_BLUETOOTH #define AF_BLUETOOTH 31 From 3e36ca483a642f441b8e29b4e98091f2c62bfb38 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 10 Sep 2016 12:21:22 +0000 Subject: [PATCH 1225/1715] Bluetooth: Use kzalloc instead of kmalloc/memset Use kzalloc rather than kmalloc followed by memset with 0. Generated by: scripts/coccinelle/api/alloc/kzalloc-simple.cocci Signed-off-by: Wei Yongjun Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 74179b92ef22..0ac881cfc646 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -902,12 +902,10 @@ static int read_ext_controller_info(struct sock *sk, struct hci_dev *hdev, eir_len = eir_append_data(buff, eir_len, EIR_NAME_SHORT, hdev->short_name, name_len); - rp = kmalloc(sizeof(*rp) + eir_len, GFP_KERNEL); + rp = kzalloc(sizeof(*rp) + eir_len, GFP_KERNEL); if (!rp) return -ENOMEM; - memset(rp, 0, sizeof(*rp) + eir_len); - rp->eir_len = cpu_to_le16(eir_len); memcpy(rp->eir, buff, eir_len); From 83ebb9ec734e9e768a9fae469e4a7ed1762ef43a Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Fri, 9 Sep 2016 20:24:40 +0200 Subject: [PATCH 1226/1715] Bluetooth: Fix not registering BR/EDR SMP channel with force_bredr flag If force_bredr is set SMP BR/EDR channel should also be for non-SC capable controllers. Since hcidev flag is persistent wrt power toggle it can be already set when calling smp_register(). This resulted in SMP BR/EDR channel not being registered even if HCI_FORCE_BREDR_SMP flag was set. This also fix NULL pointer dereference when trying to disable force_bredr after power cycle. BUG: unable to handle kernel NULL pointer dereference at 0000000000000388 IP: [] smp_del_chan+0x18/0x80 [bluetooth] Call Trace: [] force_bredr_smp_write+0xba/0x100 [bluetooth] [] full_proxy_write+0x54/0x90 [] __vfs_write+0x37/0x160 [] ? selinux_file_permission+0xd7/0x110 [] ? security_file_permission+0x3d/0xc0 [] ? percpu_down_read+0x12/0x50 [] vfs_write+0xb5/0x1a0 [] SyS_write+0x55/0xc0 [] entry_SYSCALL_64_fastpath+0x1a/0xa4 Code: 48 8b 45 f0 eb c1 0f 1f 00 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 f6 05 c6 3b 02 00 04 55 48 89 e5 41 54 53 49 89 fc 75 4b <49> 8b 9c 24 88 03 00 00 48 85 db 74 31 49 c7 84 24 88 03 00 00 RIP [] smp_del_chan+0x18/0x80 [bluetooth] RSP CR2: 0000000000000388 Signed-off-by: Szymon Janc Signed-off-by: Marcel Holtmann --- net/bluetooth/smp.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 4c1a16a96ae5..43faf2aea2ab 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -3387,7 +3387,10 @@ int smp_register(struct hci_dev *hdev) if (!lmp_sc_capable(hdev)) { debugfs_create_file("force_bredr_smp", 0644, hdev->debugfs, hdev, &force_bredr_smp_fops); - return 0; + + /* Flag can be already set here (due to power toggle) */ + if (!hci_dev_test_flag(hdev, HCI_FORCE_BREDR_SMP)) + return 0; } if (WARN_ON(hdev->smp_bredr_data)) { From 1110a2dbe69831abdcf119c3a9a4c4ef2d0905f8 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Fri, 9 Sep 2016 10:02:05 -0500 Subject: [PATCH 1227/1715] Bluetooth: btrtl: Add RTL8822BE Bluetooth device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The RTL8822BE is a new Realtek wifi and BT device. Support for the BT part is hereby added. As this device is similar to most of the other Realtek BT devices, the changes are minimal. The main difference is that the 8822BE needs a configuration file for enabling and disabling features. Thus code is added to select and load this configuration file. Although not needed at the moment, hooks are added for the other devices that might need such configuration files. One additional change is to the routine that tests that the project ID contained in the firmware matches the hardware. As the project IDs are not sequential, continuing to use the position in the array as the expected value of the ID would require adding extra unused entries in the table, and any subsequant rearrangment of the array would break the code. To fix these problems, the array elements now contain both the hardware ID and the expected value for the project ID. Signed-off-by: 陆朱伟 Signed-off-by: Larry Finger Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btrtl.c | 107 +++++++++++++++++++++++++++++++++----- 1 file changed, 95 insertions(+), 12 deletions(-) diff --git a/drivers/bluetooth/btrtl.c b/drivers/bluetooth/btrtl.c index 84288938f7f2..fc9b25703c67 100644 --- a/drivers/bluetooth/btrtl.c +++ b/drivers/bluetooth/btrtl.c @@ -33,6 +33,7 @@ #define RTL_ROM_LMP_8723B 0x8723 #define RTL_ROM_LMP_8821A 0x8821 #define RTL_ROM_LMP_8761A 0x8761 +#define RTL_ROM_LMP_8822B 0x8822 static int rtl_read_rom_version(struct hci_dev *hdev, u8 *version) { @@ -78,11 +79,15 @@ static int rtl8723b_parse_firmware(struct hci_dev *hdev, u16 lmp_subver, const unsigned char *patch_length_base, *patch_offset_base; u32 patch_offset = 0; u16 patch_length, num_patches; - const u16 project_id_to_lmp_subver[] = { - RTL_ROM_LMP_8723A, - RTL_ROM_LMP_8723B, - RTL_ROM_LMP_8821A, - RTL_ROM_LMP_8761A + static const struct { + __u16 lmp_subver; + __u8 id; + } project_id_to_lmp_subver[] = { + { RTL_ROM_LMP_8723A, 0 }, + { RTL_ROM_LMP_8723B, 1 }, + { RTL_ROM_LMP_8821A, 2 }, + { RTL_ROM_LMP_8761A, 3 }, + { RTL_ROM_LMP_8822B, 8 }, }; ret = rtl_read_rom_version(hdev, &rom_version); @@ -134,14 +139,20 @@ static int rtl8723b_parse_firmware(struct hci_dev *hdev, u16 lmp_subver, return -EINVAL; } - if (project_id >= ARRAY_SIZE(project_id_to_lmp_subver)) { + /* Find project_id in table */ + for (i = 0; i < ARRAY_SIZE(project_id_to_lmp_subver); i++) { + if (project_id == project_id_to_lmp_subver[i].id) + break; + } + + if (i >= ARRAY_SIZE(project_id_to_lmp_subver)) { BT_ERR("%s: unknown project id %d", hdev->name, project_id); return -EINVAL; } - if (lmp_subver != project_id_to_lmp_subver[project_id]) { + if (lmp_subver != project_id_to_lmp_subver[i].lmp_subver) { BT_ERR("%s: firmware is for %x but this is a %x", hdev->name, - project_id_to_lmp_subver[project_id], lmp_subver); + project_id_to_lmp_subver[i].lmp_subver, lmp_subver); return -EINVAL; } @@ -257,6 +268,26 @@ out: return ret; } +static int rtl_load_config(struct hci_dev *hdev, const char *name, u8 **buff) +{ + const struct firmware *fw; + int ret; + + BT_INFO("%s: rtl: loading %s", hdev->name, name); + ret = request_firmware(&fw, name, &hdev->dev); + if (ret < 0) { + BT_ERR("%s: Failed to load %s", hdev->name, name); + return ret; + } + + ret = fw->size; + *buff = kmemdup(fw->data, ret, GFP_KERNEL); + + release_firmware(fw); + + return ret; +} + static int btrtl_setup_rtl8723a(struct hci_dev *hdev) { const struct firmware *fw; @@ -296,25 +327,74 @@ static int btrtl_setup_rtl8723b(struct hci_dev *hdev, u16 lmp_subver, unsigned char *fw_data = NULL; const struct firmware *fw; int ret; + int cfg_sz; + u8 *cfg_buff = NULL; + u8 *tbuff; + char *cfg_name = NULL; + + switch (lmp_subver) { + case RTL_ROM_LMP_8723B: + cfg_name = "rtl_bt/rtl8723b_config.bin"; + break; + case RTL_ROM_LMP_8821A: + cfg_name = "rtl_bt/rtl8821a_config.bin"; + break; + case RTL_ROM_LMP_8761A: + cfg_name = "rtl_bt/rtl8761a_config.bin"; + break; + case RTL_ROM_LMP_8822B: + cfg_name = "rtl_bt/rtl8822b_config.bin"; + break; + default: + BT_ERR("%s: rtl: no config according to lmp_subver %04x", + hdev->name, lmp_subver); + break; + } + + if (cfg_name) { + cfg_sz = rtl_load_config(hdev, cfg_name, &cfg_buff); + if (cfg_sz < 0) + cfg_sz = 0; + } else + cfg_sz = 0; BT_INFO("%s: rtl: loading %s", hdev->name, fw_name); ret = request_firmware(&fw, fw_name, &hdev->dev); if (ret < 0) { BT_ERR("%s: Failed to load %s", hdev->name, fw_name); - return ret; + goto err_req_fw; } ret = rtl8723b_parse_firmware(hdev, lmp_subver, fw, &fw_data); if (ret < 0) goto out; + if (cfg_sz) { + tbuff = kzalloc(ret + cfg_sz, GFP_KERNEL); + if (!tbuff) { + ret = -ENOMEM; + goto out; + } + + memcpy(tbuff, fw_data, ret); + kfree(fw_data); + + memcpy(tbuff + ret, cfg_buff, cfg_sz); + ret += cfg_sz; + + fw_data = tbuff; + } + + BT_INFO("cfg_sz %d, total size %d", cfg_sz, ret); + ret = rtl_download_firmware(hdev, fw_data, ret); - kfree(fw_data); - if (ret < 0) - goto out; out: release_firmware(fw); + kfree(fw_data); +err_req_fw: + if (cfg_sz) + kfree(cfg_buff); return ret; } @@ -377,6 +457,9 @@ int btrtl_setup_realtek(struct hci_dev *hdev) case RTL_ROM_LMP_8761A: return btrtl_setup_rtl8723b(hdev, lmp_subver, "rtl_bt/rtl8761a_fw.bin"); + case RTL_ROM_LMP_8822B: + return btrtl_setup_rtl8723b(hdev, lmp_subver, + "rtl_bt/rtl8822b_fw.bin"); default: BT_INFO("rtl: assuming no firmware upload needed."); return 0; From 7c295c4801b2de24fc25687eb0cb73cf0c99d114 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Narajowski?= Date: Sun, 18 Sep 2016 12:50:02 +0200 Subject: [PATCH 1228/1715] Bluetooth: Add support for local name in scan rsp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch enables appending local name to scan response data. If currently advertised instance has name flag set it is expired immediately. Signed-off-by: Michał Narajowski Signed-off-by: Szymon Janc Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 28 +++++++++++++++------- net/bluetooth/mgmt.c | 46 +++++++++++++++++++++++++++++++++++-- 2 files changed, 64 insertions(+), 10 deletions(-) diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 9566ff8e3223..0ce6cdd278b2 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -971,14 +971,14 @@ void __hci_req_enable_advertising(struct hci_request *req) hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable); } -static u8 create_default_scan_rsp_data(struct hci_dev *hdev, u8 *ptr) +static u8 append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len) { - u8 ad_len = 0; size_t name_len; + int max_len; + max_len = HCI_MAX_AD_LENGTH - ad_len - 2; name_len = strlen(hdev->dev_name); - if (name_len > 0) { - size_t max_len = HCI_MAX_AD_LENGTH - ad_len - 2; + if (name_len > 0 && max_len > 0) { if (name_len > max_len) { name_len = max_len; @@ -997,22 +997,34 @@ static u8 create_default_scan_rsp_data(struct hci_dev *hdev, u8 *ptr) return ad_len; } +static u8 create_default_scan_rsp_data(struct hci_dev *hdev, u8 *ptr) +{ + return append_local_name(hdev, ptr, 0); +} + static u8 create_instance_scan_rsp_data(struct hci_dev *hdev, u8 instance, u8 *ptr) { struct adv_info *adv_instance; + u32 instance_flags; + u8 scan_rsp_len = 0; adv_instance = hci_find_adv_instance(hdev, instance); if (!adv_instance) return 0; - /* TODO: Set the appropriate entries based on advertising instance flags - * here once flags other than 0 are supported. - */ + instance_flags = adv_instance->flags; + memcpy(ptr, adv_instance->scan_rsp_data, adv_instance->scan_rsp_len); - return adv_instance->scan_rsp_len; + scan_rsp_len += adv_instance->scan_rsp_len; + ptr += adv_instance->scan_rsp_len; + + if (instance_flags & MGMT_ADV_FLAG_LOCAL_NAME) + scan_rsp_len = append_local_name(hdev, ptr, scan_rsp_len); + + return scan_rsp_len; } void __hci_req_update_scan_rsp_data(struct hci_request *req, u8 instance) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 0ac881cfc646..89954bb19222 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3012,6 +3012,35 @@ static int user_passkey_neg_reply(struct sock *sk, struct hci_dev *hdev, HCI_OP_USER_PASSKEY_NEG_REPLY, 0); } +static void adv_expire(struct hci_dev *hdev, u32 flags) +{ + struct adv_info *adv_instance; + struct hci_request req; + int err; + + adv_instance = hci_find_adv_instance(hdev, hdev->cur_adv_instance); + if (!adv_instance) + return; + + /* stop if current instance doesn't need to be changed */ + if (!(adv_instance->flags & flags)) + return; + + cancel_adv_timeout(hdev); + + adv_instance = hci_get_next_instance(hdev, adv_instance->instance); + if (!adv_instance) + return; + + hci_req_init(&req, hdev); + err = __hci_req_schedule_adv_instance(&req, adv_instance->instance, + true); + if (err) + return; + + hci_req_run(&req, NULL); +} + static void set_name_complete(struct hci_dev *hdev, u8 status, u16 opcode) { struct mgmt_cp_set_local_name *cp; @@ -3027,13 +3056,17 @@ static void set_name_complete(struct hci_dev *hdev, u8 status, u16 opcode) cp = cmd->param; - if (status) + if (status) { mgmt_cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, mgmt_status(status)); - else + } else { mgmt_cmd_complete(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, 0, cp, sizeof(*cp)); + if (hci_dev_test_flag(hdev, HCI_LE_ADV)) + adv_expire(hdev, MGMT_ADV_FLAG_LOCAL_NAME); + } + mgmt_pending_remove(cmd); unlock: @@ -5885,6 +5918,7 @@ static u32 get_supported_adv_flags(struct hci_dev *hdev) flags |= MGMT_ADV_FLAG_DISCOV; flags |= MGMT_ADV_FLAG_LIMITED_DISCOV; flags |= MGMT_ADV_FLAG_MANAGED_FLAGS; + flags |= MGMT_ADV_FLAG_LOCAL_NAME; if (hdev->adv_tx_power != HCI_TX_POWER_INVALID) flags |= MGMT_ADV_FLAG_TX_POWER; @@ -5961,6 +5995,10 @@ static bool tlv_data_is_valid(struct hci_dev *hdev, u32 adv_flags, u8 *data, tx_power_managed = true; max_len -= 3; } + } else { + /* at least 1 byte of name should fit in */ + if (adv_flags & MGMT_ADV_FLAG_LOCAL_NAME) + max_len -= 3; } if (len > max_len) @@ -6293,6 +6331,10 @@ static u8 tlv_data_max_len(u32 adv_flags, bool is_adv_data) if (adv_flags & MGMT_ADV_FLAG_TX_POWER) max_len -= 3; + } else { + /* at least 1 byte of name should fit in */ + if (adv_flags & MGMT_ADV_FLAG_LOCAL_NAME) + max_len -= 3; } return max_len; From c4960ecf2b09210930964ef2c05ce2590802ccf4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Narajowski?= Date: Sun, 18 Sep 2016 12:50:03 +0200 Subject: [PATCH 1229/1715] Bluetooth: Add support for appearance in scan rsp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch enables prepending appearance value to scan response data. It also adds support for setting appearance value through mgmt command. If currently advertised instance has apperance flag set it is expired immediately. Signed-off-by: Michał Narajowski Signed-off-by: Szymon Janc Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 1 + include/net/bluetooth/mgmt.h | 6 ++++++ net/bluetooth/hci_request.c | 8 +++++++ net/bluetooth/mgmt.c | 37 ++++++++++++++++++++++++++++++++ 4 files changed, 52 insertions(+) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index a48f71d73dc8..f00bf667ec33 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -211,6 +211,7 @@ struct hci_dev { __u8 dev_name[HCI_MAX_NAME_LENGTH]; __u8 short_name[HCI_MAX_SHORT_NAME_LENGTH]; __u8 eir[HCI_MAX_EIR_LENGTH]; + __u16 appearance; __u8 dev_class[3]; __u8 major_class; __u8 minor_class; diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index 611b243713ea..72a456bbbcd5 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -598,6 +598,12 @@ struct mgmt_rp_read_ext_info { __u8 eir[0]; } __packed; +#define MGMT_OP_SET_APPEARANCE 0x0043 +struct mgmt_cp_set_appearance { + __u16 appearance; +} __packed; +#define MGMT_SET_APPEARANCE_SIZE 2 + #define MGMT_EV_CMD_COMPLETE 0x0001 struct mgmt_ev_cmd_complete { __le16 opcode; diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 0ce6cdd278b2..c8135680c43e 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1015,6 +1015,14 @@ static u8 create_instance_scan_rsp_data(struct hci_dev *hdev, u8 instance, instance_flags = adv_instance->flags; + if ((instance_flags & MGMT_ADV_FLAG_APPEARANCE) && hdev->appearance) { + ptr[0] = 3; + ptr[1] = EIR_APPEARANCE; + put_unaligned_le16(hdev->appearance, ptr + 2); + scan_rsp_len += 4; + ptr += 4; + } + memcpy(ptr, adv_instance->scan_rsp_data, adv_instance->scan_rsp_len); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 89954bb19222..78d708851208 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -105,6 +105,7 @@ static const u16 mgmt_commands[] = { MGMT_OP_GET_ADV_SIZE_INFO, MGMT_OP_START_LIMITED_DISCOVERY, MGMT_OP_READ_EXT_INFO, + MGMT_OP_SET_APPEARANCE, }; static const u16 mgmt_events[] = { @@ -3143,6 +3144,34 @@ failed: return err; } +static int set_appearance(struct sock *sk, struct hci_dev *hdev, void *data, + u16 len) +{ + struct mgmt_cp_set_appearance *cp = data; + u16 apperance; + int err; + + BT_DBG(""); + + apperance = le16_to_cpu(cp->appearance); + + hci_dev_lock(hdev); + + if (hdev->appearance != apperance) { + hdev->appearance = apperance; + + if (hci_dev_test_flag(hdev, HCI_LE_ADV)) + adv_expire(hdev, MGMT_ADV_FLAG_APPEARANCE); + } + + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_APPEARANCE, 0, NULL, + 0); + + hci_dev_unlock(hdev); + + return err; +} + static void read_local_oob_data_complete(struct hci_dev *hdev, u8 status, u16 opcode, struct sk_buff *skb) { @@ -5918,6 +5947,7 @@ static u32 get_supported_adv_flags(struct hci_dev *hdev) flags |= MGMT_ADV_FLAG_DISCOV; flags |= MGMT_ADV_FLAG_LIMITED_DISCOV; flags |= MGMT_ADV_FLAG_MANAGED_FLAGS; + flags |= MGMT_ADV_FLAG_APPEARANCE; flags |= MGMT_ADV_FLAG_LOCAL_NAME; if (hdev->adv_tx_power != HCI_TX_POWER_INVALID) @@ -5999,6 +6029,9 @@ static bool tlv_data_is_valid(struct hci_dev *hdev, u32 adv_flags, u8 *data, /* at least 1 byte of name should fit in */ if (adv_flags & MGMT_ADV_FLAG_LOCAL_NAME) max_len -= 3; + + if (adv_flags & MGMT_ADV_FLAG_APPEARANCE) + max_len -= 4; } if (len > max_len) @@ -6335,6 +6368,9 @@ static u8 tlv_data_max_len(u32 adv_flags, bool is_adv_data) /* at least 1 byte of name should fit in */ if (adv_flags & MGMT_ADV_FLAG_LOCAL_NAME) max_len -= 3; + + if (adv_flags & (MGMT_ADV_FLAG_APPEARANCE)) + max_len -= 4; } return max_len; @@ -6470,6 +6506,7 @@ static const struct hci_mgmt_handler mgmt_handlers[] = { { start_limited_discovery, MGMT_START_DISCOVERY_SIZE }, { read_ext_controller_info,MGMT_READ_EXT_INFO_SIZE, HCI_MGMT_UNTRUSTED }, + { set_appearance, MGMT_SET_APPEARANCE_SIZE }, }; void mgmt_index_added(struct hci_dev *hdev) From 5e2c59e84b633e4f7719fdc6a2930f2a311da83a Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Sun, 18 Sep 2016 12:50:04 +0200 Subject: [PATCH 1230/1715] Bluetooth: Remove unused parameter from tlv_data_is_valid function hdev parameter is not used in function. Signed-off-by: Szymon Janc Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 78d708851208..97f70b7fb7b1 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -6005,8 +6005,7 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev, return err; } -static bool tlv_data_is_valid(struct hci_dev *hdev, u32 adv_flags, u8 *data, - u8 len, bool is_adv_data) +static bool tlv_data_is_valid(u32 adv_flags, u8 *data, u8 len, bool is_adv_data) { u8 max_len = HCI_MAX_AD_LENGTH; int i, cur_len; @@ -6168,8 +6167,8 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev, goto unlock; } - if (!tlv_data_is_valid(hdev, flags, cp->data, cp->adv_data_len, true) || - !tlv_data_is_valid(hdev, flags, cp->data + cp->adv_data_len, + if (!tlv_data_is_valid(flags, cp->data, cp->adv_data_len, true) || + !tlv_data_is_valid(flags, cp->data + cp->adv_data_len, cp->scan_rsp_len, false)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, MGMT_STATUS_INVALID_PARAMS); From 2bb36870e8cb29949ef9acec37129cd8e70f1857 Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Sun, 18 Sep 2016 12:50:05 +0200 Subject: [PATCH 1231/1715] Bluetooth: Unify advertising instance flags check This unifies max length and TLV validity checks. Signed-off-by: Szymon Janc Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 85 +++++++++++++++++++++++++------------------- 1 file changed, 48 insertions(+), 37 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 97f70b7fb7b1..c96b0adc4971 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -6005,34 +6005,59 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev, return err; } -static bool tlv_data_is_valid(u32 adv_flags, u8 *data, u8 len, bool is_adv_data) +static u8 tlv_data_max_len(u32 adv_flags, bool is_adv_data) { u8 max_len = HCI_MAX_AD_LENGTH; - int i, cur_len; - bool flags_managed = false; - bool tx_power_managed = false; if (is_adv_data) { if (adv_flags & (MGMT_ADV_FLAG_DISCOV | MGMT_ADV_FLAG_LIMITED_DISCOV | - MGMT_ADV_FLAG_MANAGED_FLAGS)) { - flags_managed = true; + MGMT_ADV_FLAG_MANAGED_FLAGS)) max_len -= 3; - } - if (adv_flags & MGMT_ADV_FLAG_TX_POWER) { - tx_power_managed = true; + if (adv_flags & MGMT_ADV_FLAG_TX_POWER) max_len -= 3; - } } else { /* at least 1 byte of name should fit in */ if (adv_flags & MGMT_ADV_FLAG_LOCAL_NAME) max_len -= 3; - if (adv_flags & MGMT_ADV_FLAG_APPEARANCE) + if (adv_flags & (MGMT_ADV_FLAG_APPEARANCE)) max_len -= 4; } + return max_len; +} + +static bool flags_managed(u32 adv_flags) +{ + return adv_flags & (MGMT_ADV_FLAG_DISCOV | + MGMT_ADV_FLAG_LIMITED_DISCOV | + MGMT_ADV_FLAG_MANAGED_FLAGS); +} + +static bool tx_power_managed(u32 adv_flags) +{ + return adv_flags & MGMT_ADV_FLAG_TX_POWER; +} + +static bool name_managed(u32 adv_flags) +{ + return adv_flags & MGMT_ADV_FLAG_LOCAL_NAME; +} + +static bool appearance_managed(u32 adv_flags) +{ + return adv_flags & MGMT_ADV_FLAG_APPEARANCE; +} + +static bool tlv_data_is_valid(u32 adv_flags, u8 *data, u8 len, bool is_adv_data) +{ + int i, cur_len; + u8 max_len; + + max_len = tlv_data_max_len(adv_flags, is_adv_data); + if (len > max_len) return false; @@ -6040,10 +6065,20 @@ static bool tlv_data_is_valid(u32 adv_flags, u8 *data, u8 len, bool is_adv_data) for (i = 0, cur_len = 0; i < len; i += (cur_len + 1)) { cur_len = data[i]; - if (flags_managed && data[i + 1] == EIR_FLAGS) + if (data[i + 1] == EIR_FLAGS && flags_managed(adv_flags)) return false; - if (tx_power_managed && data[i + 1] == EIR_TX_POWER) + if (data[i + 1] == EIR_TX_POWER && tx_power_managed(adv_flags)) + return false; + + if (data[i + 1] == EIR_NAME_COMPLETE && name_managed(adv_flags)) + return false; + + if (data[i + 1] == EIR_NAME_SHORT && name_managed(adv_flags)) + return false; + + if (data[i + 1] == EIR_APPEARANCE && + appearance_managed(adv_flags)) return false; /* If the current field length would exceed the total data @@ -6351,30 +6386,6 @@ unlock: return err; } -static u8 tlv_data_max_len(u32 adv_flags, bool is_adv_data) -{ - u8 max_len = HCI_MAX_AD_LENGTH; - - if (is_adv_data) { - if (adv_flags & (MGMT_ADV_FLAG_DISCOV | - MGMT_ADV_FLAG_LIMITED_DISCOV | - MGMT_ADV_FLAG_MANAGED_FLAGS)) - max_len -= 3; - - if (adv_flags & MGMT_ADV_FLAG_TX_POWER) - max_len -= 3; - } else { - /* at least 1 byte of name should fit in */ - if (adv_flags & MGMT_ADV_FLAG_LOCAL_NAME) - max_len -= 3; - - if (adv_flags & (MGMT_ADV_FLAG_APPEARANCE)) - max_len -= 4; - } - - return max_len; -} - static int get_adv_size_info(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { From 9c9db78dc0fbbd95177fefdad008e46ffaa777f2 Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Sun, 18 Sep 2016 12:50:06 +0200 Subject: [PATCH 1232/1715] Bluetooth: Fix advertising instance validity check for flags Flags are not allowed in Scan Response. Signed-off-by: Szymon Janc Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index c96b0adc4971..2758c6a4425c 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -6065,7 +6065,8 @@ static bool tlv_data_is_valid(u32 adv_flags, u8 *data, u8 len, bool is_adv_data) for (i = 0, cur_len = 0; i < len; i += (cur_len + 1)) { cur_len = data[i]; - if (data[i + 1] == EIR_FLAGS && flags_managed(adv_flags)) + if (data[i + 1] == EIR_FLAGS && + (!is_adv_data || flags_managed(adv_flags))) return false; if (data[i + 1] == EIR_TX_POWER && tx_power_managed(adv_flags)) From 3310230c5dddfafe3d1ef87f1257812011681aca Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Sun, 18 Sep 2016 12:50:07 +0200 Subject: [PATCH 1233/1715] Bluetooth: Increment management interface revision Increment the mgmt revision due to the recently added Read Extended Controller Information and Set Appearance commands. Signed-off-by: Szymon Janc Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 2758c6a4425c..54dd218d06f7 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -38,7 +38,7 @@ #include "mgmt_util.h" #define MGMT_VERSION 1 -#define MGMT_REVISION 13 +#define MGMT_REVISION 14 static const u16 mgmt_commands[] = { MGMT_OP_READ_INDEX_LIST, From 143f0a28ff7ebcc74144ed29bc66da6fbcce0dc7 Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Mon, 19 Sep 2016 12:05:12 +0200 Subject: [PATCH 1234/1715] Bluetooth: hci_bcm: Change protocol name Use full name instead of abbreviation. Signed-off-by: Loic Poulain Signed-off-by: Marcel Holtmann --- drivers/bluetooth/hci_bcm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index 1c97eda8bae3..5ccb90ef0146 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -798,7 +798,7 @@ static int bcm_remove(struct platform_device *pdev) static const struct hci_uart_proto bcm_proto = { .id = HCI_UART_BCM, - .name = "BCM", + .name = "Broadcom", .manufacturer = 15, .init_speed = 115200, .oper_speed = 4000000, From 9e69130c4efc61ce0a8fb3b9eea0188f8d41f779 Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Mon, 19 Sep 2016 11:32:35 +0200 Subject: [PATCH 1235/1715] Bluetooth: hci_uart: Add Nokia Protocol identifier Will be used by hci_nokia extra protocol. Signed-off-by: Loic Poulain Signed-off-by: Marcel Holtmann --- drivers/bluetooth/hci_uart.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/bluetooth/hci_uart.h b/drivers/bluetooth/hci_uart.h index 839bad1d8152..22b7c58ae837 100644 --- a/drivers/bluetooth/hci_uart.h +++ b/drivers/bluetooth/hci_uart.h @@ -35,7 +35,7 @@ #define HCIUARTGETFLAGS _IOR('U', 204, int) /* UART protocols */ -#define HCI_UART_MAX_PROTO 10 +#define HCI_UART_MAX_PROTO 11 #define HCI_UART_H4 0 #define HCI_UART_BCSP 1 @@ -47,6 +47,7 @@ #define HCI_UART_BCM 7 #define HCI_UART_QCA 8 #define HCI_UART_AG6XX 9 +#define HCI_UART_NOKIA 10 #define HCI_UART_RAW_DEVICE 0 #define HCI_UART_RESET_ON_INIT 1 From 162f812f23bab583f5d514ca0e4df67797ac9cdf Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Mon, 19 Sep 2016 16:29:27 +0200 Subject: [PATCH 1236/1715] Bluetooth: hci_uart: Add Marvell support This patch introduces support for Marvell Bluetooth controller over UART (8897 for now). In order to send the final firmware at full speed, a helper firmware is firstly sent. Firmware download is driven by the controller which sends request firmware packets (including expected size). This driver is a global rework of the one proposed by Amitkumar Karwar . Signed-off-by: Loic Poulain Signed-off-by: Marcel Holtmann --- drivers/bluetooth/Kconfig | 11 + drivers/bluetooth/Makefile | 1 + drivers/bluetooth/hci_ldisc.c | 6 + drivers/bluetooth/hci_mrvl.c | 387 ++++++++++++++++++++++++++++++++++ drivers/bluetooth/hci_uart.h | 8 +- 5 files changed, 412 insertions(+), 1 deletion(-) create mode 100644 drivers/bluetooth/hci_mrvl.c diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig index 43e9f9328df8..3cc9bff9d99d 100644 --- a/drivers/bluetooth/Kconfig +++ b/drivers/bluetooth/Kconfig @@ -180,6 +180,17 @@ config BT_HCIUART_AG6XX Say Y here to compile support for Intel AG6XX protocol. +config BT_HCIUART_MRVL + bool "Marvell protocol support" + depends on BT_HCIUART + select BT_HCIUART_H4 + help + Marvell is serial protocol for communication between Bluetooth + device and host. This protocol is required for most Marvell Bluetooth + devices with UART interface. + + Say Y here to compile support for HCI MRVL protocol. + config BT_HCIBCM203X tristate "HCI BCM203x USB driver" depends on USB diff --git a/drivers/bluetooth/Makefile b/drivers/bluetooth/Makefile index 3e92cfeb9ee2..b1fc29a697b7 100644 --- a/drivers/bluetooth/Makefile +++ b/drivers/bluetooth/Makefile @@ -38,6 +38,7 @@ hci_uart-$(CONFIG_BT_HCIUART_INTEL) += hci_intel.o hci_uart-$(CONFIG_BT_HCIUART_BCM) += hci_bcm.o hci_uart-$(CONFIG_BT_HCIUART_QCA) += hci_qca.o hci_uart-$(CONFIG_BT_HCIUART_AG6XX) += hci_ag6xx.o +hci_uart-$(CONFIG_BT_HCIUART_MRVL) += hci_mrvl.o hci_uart-objs := $(hci_uart-y) ccflags-y += -D__CHECK_ENDIAN__ diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c index dda97398c59a..9a3aab67b6bb 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c @@ -810,6 +810,9 @@ static int __init hci_uart_init(void) #ifdef CONFIG_BT_HCIUART_AG6XX ag6xx_init(); #endif +#ifdef CONFIG_BT_HCIUART_MRVL + mrvl_init(); +#endif return 0; } @@ -845,6 +848,9 @@ static void __exit hci_uart_exit(void) #ifdef CONFIG_BT_HCIUART_AG6XX ag6xx_deinit(); #endif +#ifdef CONFIG_BT_HCIUART_MRVL + mrvl_deinit(); +#endif /* Release tty registration of line discipline */ err = tty_unregister_ldisc(N_HCI); diff --git a/drivers/bluetooth/hci_mrvl.c b/drivers/bluetooth/hci_mrvl.c new file mode 100644 index 000000000000..bbc4b39b1dbf --- /dev/null +++ b/drivers/bluetooth/hci_mrvl.c @@ -0,0 +1,387 @@ +/* + * + * Bluetooth HCI UART driver for marvell devices + * + * Copyright (C) 2016 Marvell International Ltd. + * Copyright (C) 2016 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "hci_uart.h" + +#define HCI_FW_REQ_PKT 0xA5 +#define HCI_CHIP_VER_PKT 0xAA + +#define MRVL_ACK 0x5A +#define MRVL_NAK 0xBF +#define MRVL_RAW_DATA 0x1F + +enum { + STATE_CHIP_VER_PENDING, + STATE_FW_REQ_PENDING, +}; + +struct mrvl_data { + struct sk_buff *rx_skb; + struct sk_buff_head txq; + struct sk_buff_head rawq; + unsigned long flags; + unsigned int tx_len; + u8 id, rev; +}; + +struct hci_mrvl_pkt { + __le16 lhs; + __le16 rhs; +} __packed; +#define HCI_MRVL_PKT_SIZE 4 + +static int mrvl_open(struct hci_uart *hu) +{ + struct mrvl_data *mrvl; + + BT_DBG("hu %p", hu); + + mrvl = kzalloc(sizeof(*mrvl), GFP_KERNEL); + if (!mrvl) + return -ENOMEM; + + skb_queue_head_init(&mrvl->txq); + skb_queue_head_init(&mrvl->rawq); + + set_bit(STATE_CHIP_VER_PENDING, &mrvl->flags); + + hu->priv = mrvl; + return 0; +} + +static int mrvl_close(struct hci_uart *hu) +{ + struct mrvl_data *mrvl = hu->priv; + + BT_DBG("hu %p", hu); + + skb_queue_purge(&mrvl->txq); + skb_queue_purge(&mrvl->rawq); + kfree_skb(mrvl->rx_skb); + kfree(mrvl); + + hu->priv = NULL; + return 0; +} + +static int mrvl_flush(struct hci_uart *hu) +{ + struct mrvl_data *mrvl = hu->priv; + + BT_DBG("hu %p", hu); + + skb_queue_purge(&mrvl->txq); + skb_queue_purge(&mrvl->rawq); + + return 0; +} + +static struct sk_buff *mrvl_dequeue(struct hci_uart *hu) +{ + struct mrvl_data *mrvl = hu->priv; + struct sk_buff *skb; + + skb = skb_dequeue(&mrvl->txq); + if (!skb) { + /* Any raw data ? */ + skb = skb_dequeue(&mrvl->rawq); + } else { + /* Prepend skb with frame type */ + memcpy(skb_push(skb, 1), &bt_cb(skb)->pkt_type, 1); + } + + return skb; +} + +static int mrvl_enqueue(struct hci_uart *hu, struct sk_buff *skb) +{ + struct mrvl_data *mrvl = hu->priv; + + skb_queue_tail(&mrvl->txq, skb); + return 0; +} + +static void mrvl_send_ack(struct hci_uart *hu, unsigned char type) +{ + struct mrvl_data *mrvl = hu->priv; + struct sk_buff *skb; + + /* No H4 payload, only 1 byte header */ + skb = bt_skb_alloc(0, GFP_ATOMIC); + if (!skb) { + bt_dev_err(hu->hdev, "Unable to alloc ack/nak packet"); + return; + } + hci_skb_pkt_type(skb) = type; + + skb_queue_tail(&mrvl->txq, skb); + hci_uart_tx_wakeup(hu); +} + +static int mrvl_recv_fw_req(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_mrvl_pkt *pkt = (void *)skb->data; + struct hci_uart *hu = hci_get_drvdata(hdev); + struct mrvl_data *mrvl = hu->priv; + int ret = 0; + + if ((pkt->lhs ^ pkt->rhs) != 0xffff) { + bt_dev_err(hdev, "Corrupted mrvl header"); + mrvl_send_ack(hu, MRVL_NAK); + ret = -EINVAL; + goto done; + } + mrvl_send_ack(hu, MRVL_ACK); + + if (!test_bit(STATE_FW_REQ_PENDING, &mrvl->flags)) { + bt_dev_err(hdev, "Received unexpected firmware request"); + ret = -EINVAL; + goto done; + } + + mrvl->tx_len = le16_to_cpu(pkt->lhs); + + clear_bit(STATE_FW_REQ_PENDING, &mrvl->flags); + smp_mb__after_atomic(); + wake_up_bit(&mrvl->flags, STATE_FW_REQ_PENDING); + +done: + kfree_skb(skb); + return ret; +} + +static int mrvl_recv_chip_ver(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_mrvl_pkt *pkt = (void *)skb->data; + struct hci_uart *hu = hci_get_drvdata(hdev); + struct mrvl_data *mrvl = hu->priv; + u16 version = le16_to_cpu(pkt->lhs); + int ret = 0; + + if ((pkt->lhs ^ pkt->rhs) != 0xffff) { + bt_dev_err(hdev, "Corrupted mrvl header"); + mrvl_send_ack(hu, MRVL_NAK); + ret = -EINVAL; + goto done; + } + mrvl_send_ack(hu, MRVL_ACK); + + if (!test_bit(STATE_CHIP_VER_PENDING, &mrvl->flags)) { + bt_dev_err(hdev, "Received unexpected chip version"); + goto done; + } + + mrvl->id = version; + mrvl->rev = version >> 8; + + bt_dev_info(hdev, "Controller id = %x, rev = %x", mrvl->id, mrvl->rev); + + clear_bit(STATE_CHIP_VER_PENDING, &mrvl->flags); + smp_mb__after_atomic(); + wake_up_bit(&mrvl->flags, STATE_CHIP_VER_PENDING); + +done: + kfree_skb(skb); + return ret; +} + +#define HCI_RECV_CHIP_VER \ + .type = HCI_CHIP_VER_PKT, \ + .hlen = HCI_MRVL_PKT_SIZE, \ + .loff = 0, \ + .lsize = 0, \ + .maxlen = HCI_MRVL_PKT_SIZE + +#define HCI_RECV_FW_REQ \ + .type = HCI_FW_REQ_PKT, \ + .hlen = HCI_MRVL_PKT_SIZE, \ + .loff = 0, \ + .lsize = 0, \ + .maxlen = HCI_MRVL_PKT_SIZE + +static const struct h4_recv_pkt mrvl_recv_pkts[] = { + { H4_RECV_ACL, .recv = hci_recv_frame }, + { H4_RECV_SCO, .recv = hci_recv_frame }, + { H4_RECV_EVENT, .recv = hci_recv_frame }, + { HCI_RECV_FW_REQ, .recv = mrvl_recv_fw_req }, + { HCI_RECV_CHIP_VER, .recv = mrvl_recv_chip_ver }, +}; + +static int mrvl_recv(struct hci_uart *hu, const void *data, int count) +{ + struct mrvl_data *mrvl = hu->priv; + + if (!test_bit(HCI_UART_REGISTERED, &hu->flags)) + return -EUNATCH; + + mrvl->rx_skb = h4_recv_buf(hu->hdev, mrvl->rx_skb, data, count, + mrvl_recv_pkts, + ARRAY_SIZE(mrvl_recv_pkts)); + if (IS_ERR(mrvl->rx_skb)) { + int err = PTR_ERR(mrvl->rx_skb); + bt_dev_err(hu->hdev, "Frame reassembly failed (%d)", err); + mrvl->rx_skb = NULL; + return err; + } + + return count; +} + +static int mrvl_load_firmware(struct hci_dev *hdev, const char *name) +{ + struct hci_uart *hu = hci_get_drvdata(hdev); + struct mrvl_data *mrvl = hu->priv; + const struct firmware *fw = NULL; + const u8 *fw_ptr, *fw_max; + int err; + + err = request_firmware(&fw, name, &hdev->dev); + if (err < 0) { + bt_dev_err(hdev, "Failed to load firmware file %s", name); + return err; + } + + fw_ptr = fw->data; + fw_max = fw->data + fw->size; + + bt_dev_info(hdev, "Loading %s", name); + + set_bit(STATE_FW_REQ_PENDING, &mrvl->flags); + + while (fw_ptr <= fw_max) { + struct sk_buff *skb; + + /* Controller drives the firmware load by sending firmware + * request packets containing the expected fragment size. + */ + err = wait_on_bit_timeout(&mrvl->flags, STATE_FW_REQ_PENDING, + TASK_INTERRUPTIBLE, + msecs_to_jiffies(2000)); + if (err == 1) { + bt_dev_err(hdev, "Firmware load interrupted"); + err = -EINTR; + break; + } else if (err) { + bt_dev_err(hdev, "Firmware request timeout"); + err = -ETIMEDOUT; + break; + } + + bt_dev_dbg(hdev, "Firmware request, expecting %d bytes", + mrvl->tx_len); + + if (fw_ptr == fw_max) { + /* Controller requests a null size once firmware is + * fully loaded. If controller expects more data, there + * is an issue. + */ + if (!mrvl->tx_len) { + bt_dev_info(hdev, "Firmware loading complete"); + } else { + bt_dev_err(hdev, "Firmware loading failure"); + err = -EINVAL; + } + break; + } + + if (fw_ptr + mrvl->tx_len > fw_max) { + mrvl->tx_len = fw_max - fw_ptr; + bt_dev_dbg(hdev, "Adjusting tx_len to %d", + mrvl->tx_len); + } + + skb = bt_skb_alloc(mrvl->tx_len, GFP_KERNEL); + if (!skb) { + bt_dev_err(hdev, "Failed to alloc mem for FW packet"); + err = -ENOMEM; + break; + } + bt_cb(skb)->pkt_type = MRVL_RAW_DATA; + + memcpy(skb_put(skb, mrvl->tx_len), fw_ptr, mrvl->tx_len); + fw_ptr += mrvl->tx_len; + + set_bit(STATE_FW_REQ_PENDING, &mrvl->flags); + + skb_queue_tail(&mrvl->rawq, skb); + hci_uart_tx_wakeup(hu); + } + + release_firmware(fw); + return err; +} + +static int mrvl_setup(struct hci_uart *hu) +{ + int err; + + hci_uart_set_flow_control(hu, true); + + err = mrvl_load_firmware(hu->hdev, "mrvl/helper_uart_3000000.bin"); + if (err) { + bt_dev_err(hu->hdev, "Unable to download firmware helper"); + return -EINVAL; + } + + hci_uart_set_baudrate(hu, 3000000); + hci_uart_set_flow_control(hu, false); + + err = mrvl_load_firmware(hu->hdev, "mrvl/uart8897_bt.bin"); + if (err) + return err; + + return 0; +} + +static const struct hci_uart_proto mrvl_proto = { + .id = HCI_UART_MRVL, + .name = "Marvell", + .init_speed = 115200, + .open = mrvl_open, + .close = mrvl_close, + .flush = mrvl_flush, + .setup = mrvl_setup, + .recv = mrvl_recv, + .enqueue = mrvl_enqueue, + .dequeue = mrvl_dequeue, +}; + +int __init mrvl_init(void) +{ + return hci_uart_register_proto(&mrvl_proto); +} + +int __exit mrvl_deinit(void) +{ + return hci_uart_unregister_proto(&mrvl_proto); +} diff --git a/drivers/bluetooth/hci_uart.h b/drivers/bluetooth/hci_uart.h index 22b7c58ae837..070139513e65 100644 --- a/drivers/bluetooth/hci_uart.h +++ b/drivers/bluetooth/hci_uart.h @@ -35,7 +35,7 @@ #define HCIUARTGETFLAGS _IOR('U', 204, int) /* UART protocols */ -#define HCI_UART_MAX_PROTO 11 +#define HCI_UART_MAX_PROTO 12 #define HCI_UART_H4 0 #define HCI_UART_BCSP 1 @@ -48,6 +48,7 @@ #define HCI_UART_QCA 8 #define HCI_UART_AG6XX 9 #define HCI_UART_NOKIA 10 +#define HCI_UART_MRVL 11 #define HCI_UART_RAW_DEVICE 0 #define HCI_UART_RESET_ON_INIT 1 @@ -190,3 +191,8 @@ int qca_deinit(void); int ag6xx_init(void); int ag6xx_deinit(void); #endif + +#ifdef CONFIG_BT_HCIUART_MRVL +int mrvl_init(void); +int mrvl_deinit(void); +#endif From 7d5c11da1ff6389511c42448f59456373edfc103 Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Mon, 19 Sep 2016 20:25:52 +0200 Subject: [PATCH 1237/1715] Bluetooth: Refactor read_ext_controller_info handler There is no need to allocate heap for reply only to copy stack data to it. This also fix rp memory leak and missing hdev unlock if kmalloc failed. Signed-off-by: Szymon Janc Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 42 +++++++++++++++++++----------------------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 54dd218d06f7..604c48142848 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -881,35 +881,17 @@ static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, u8 *data, static int read_ext_controller_info(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { - struct mgmt_rp_read_ext_info *rp; - char buff[512]; + char buf[512]; + struct mgmt_rp_read_ext_info *rp = (void *)buf; u16 eir_len = 0; - u8 name_len; + size_t name_len; BT_DBG("sock %p %s", sk, hdev->name); + memset(&buf, 0, sizeof(buf)); + hci_dev_lock(hdev); - if (hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) - eir_len = eir_append_data(buff, eir_len, - EIR_CLASS_OF_DEV, - hdev->dev_class, 3); - - name_len = strlen(hdev->dev_name); - eir_len = eir_append_data(buff, eir_len, EIR_NAME_COMPLETE, - hdev->dev_name, name_len); - - name_len = strlen(hdev->short_name); - eir_len = eir_append_data(buff, eir_len, EIR_NAME_SHORT, - hdev->short_name, name_len); - - rp = kzalloc(sizeof(*rp) + eir_len, GFP_KERNEL); - if (!rp) - return -ENOMEM; - - rp->eir_len = cpu_to_le16(eir_len); - memcpy(rp->eir, buff, eir_len); - bacpy(&rp->bdaddr, &hdev->bdaddr); rp->version = hdev->hci_ver; @@ -918,6 +900,20 @@ static int read_ext_controller_info(struct sock *sk, struct hci_dev *hdev, rp->supported_settings = cpu_to_le32(get_supported_settings(hdev)); rp->current_settings = cpu_to_le32(get_current_settings(hdev)); + if (hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) + eir_len = eir_append_data(rp->eir, eir_len, EIR_CLASS_OF_DEV, + hdev->dev_class, 3); + + name_len = strlen(hdev->dev_name); + eir_len = eir_append_data(rp->eir, eir_len, EIR_NAME_COMPLETE, + hdev->dev_name, name_len); + + name_len = strlen(hdev->short_name); + eir_len = eir_append_data(rp->eir, eir_len, EIR_NAME_SHORT, + hdev->short_name, name_len); + + rp->eir_len = cpu_to_le16(eir_len); + hci_dev_unlock(hdev); /* If this command is called at least once, then the events From cde7a863d36a4a629c111f37edc2297d6b822a82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Narajowski?= Date: Mon, 19 Sep 2016 20:25:53 +0200 Subject: [PATCH 1238/1715] Bluetooth: Factor appending EIR to separate helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will also be used for Extended Information Event handling. Signed-off-by: Michał Narajowski Signed-off-by: Szymon Janc Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 604c48142848..2b6fe10256b9 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -878,13 +878,32 @@ static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, u8 *data, return eir_len; } +static u16 append_eir_data_to_buf(struct hci_dev *hdev, u8 *eir) +{ + u16 eir_len = 0; + size_t name_len; + + if (hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) + eir_len = eir_append_data(eir, eir_len, EIR_CLASS_OF_DEV, + hdev->dev_class, 3); + + name_len = strlen(hdev->dev_name); + eir_len = eir_append_data(eir, eir_len, EIR_NAME_COMPLETE, + hdev->dev_name, name_len); + + name_len = strlen(hdev->short_name); + eir_len = eir_append_data(eir, eir_len, EIR_NAME_SHORT, + hdev->short_name, name_len); + + return eir_len; +} + static int read_ext_controller_info(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { char buf[512]; struct mgmt_rp_read_ext_info *rp = (void *)buf; - u16 eir_len = 0; - size_t name_len; + u16 eir_len; BT_DBG("sock %p %s", sk, hdev->name); @@ -900,18 +919,8 @@ static int read_ext_controller_info(struct sock *sk, struct hci_dev *hdev, rp->supported_settings = cpu_to_le32(get_supported_settings(hdev)); rp->current_settings = cpu_to_le32(get_current_settings(hdev)); - if (hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) - eir_len = eir_append_data(rp->eir, eir_len, EIR_CLASS_OF_DEV, - hdev->dev_class, 3); - - name_len = strlen(hdev->dev_name); - eir_len = eir_append_data(rp->eir, eir_len, EIR_NAME_COMPLETE, - hdev->dev_name, name_len); - - name_len = strlen(hdev->short_name); - eir_len = eir_append_data(rp->eir, eir_len, EIR_NAME_SHORT, - hdev->short_name, name_len); + eir_len = append_eir_data_to_buf(hdev, rp->eir); rp->eir_len = cpu_to_le16(eir_len); hci_dev_unlock(hdev); From 6a9e90bff9cfb33d5939c29e5bf2674c9176365d Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Mon, 19 Sep 2016 20:25:54 +0200 Subject: [PATCH 1239/1715] Bluetooth: Add appearance to Read Ext Controller Info command MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If LE is enabled appearance is added to EIR data. Signed-off-by: Michał Narajowski Signed-off-by: Szymon Janc Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 2b6fe10256b9..d3837e0633af 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -878,6 +878,16 @@ static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, u8 *data, return eir_len; } +static inline u16 eir_append_le16(u8 *eir, u16 eir_len, u8 type, u16 data) +{ + eir[eir_len++] = sizeof(type) + sizeof(data); + eir[eir_len++] = type; + put_unaligned_le16(data, &eir[eir_len]); + eir_len += sizeof(data); + + return eir_len; +} + static u16 append_eir_data_to_buf(struct hci_dev *hdev, u8 *eir) { u16 eir_len = 0; @@ -887,6 +897,10 @@ static u16 append_eir_data_to_buf(struct hci_dev *hdev, u8 *eir) eir_len = eir_append_data(eir, eir_len, EIR_CLASS_OF_DEV, hdev->dev_class, 3); + if (hci_dev_test_flag(hdev, HCI_LE_ENABLED)) + eir_len = eir_append_le16(eir, eir_len, EIR_APPEARANCE, + hdev->appearance); + name_len = strlen(hdev->dev_name); eir_len = eir_append_data(eir, eir_len, EIR_NAME_COMPLETE, hdev->dev_name, name_len); From 5e9fae48f800b973e45887ce0b8d717d54c0bb11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Narajowski?= Date: Mon, 19 Sep 2016 20:25:55 +0200 Subject: [PATCH 1240/1715] Bluetooth: Add supported data types to ext info changed event MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds EIR data to extended info changed event. Signed-off-by: Michał Narajowski Signed-off-by: Szymon Janc Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index d3837e0633af..29e5ce95c50c 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -954,12 +954,18 @@ static int read_ext_controller_info(struct sock *sk, struct hci_dev *hdev, static int ext_info_changed(struct hci_dev *hdev, struct sock *skip) { - struct mgmt_ev_ext_info_changed ev; + char buf[512]; + struct mgmt_ev_ext_info_changed *ev = (void *)buf; + u16 eir_len; - ev.eir_len = cpu_to_le16(0); + memset(buf, 0, sizeof(buf)); - return mgmt_limited_event(MGMT_EV_EXT_INFO_CHANGED, hdev, &ev, - sizeof(ev), HCI_MGMT_EXT_INFO_EVENTS, skip); + eir_len = append_eir_data_to_buf(hdev, ev->eir); + ev->eir_len = cpu_to_le16(eir_len); + + return mgmt_limited_event(MGMT_EV_EXT_INFO_CHANGED, hdev, ev, + sizeof(*ev) + eir_len, + HCI_MGMT_EXT_INFO_EVENTS, skip); } static int send_settings_rsp(struct sock *sk, u16 opcode, struct hci_dev *hdev) From e74317f43f5ce2d13cddaab867c59d42934d9585 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Narajowski?= Date: Mon, 19 Sep 2016 20:25:56 +0200 Subject: [PATCH 1241/1715] Bluetooth: Fix missing ext info event when setting appearance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds missing event when setting appearance, just like in the set local name command. Signed-off-by: Michał Narajowski Signed-off-by: Szymon Janc Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 29e5ce95c50c..cd9f345894e0 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3187,6 +3187,8 @@ static int set_appearance(struct sock *sk, struct hci_dev *hdev, void *data, if (hci_dev_test_flag(hdev, HCI_LE_ADV)) adv_expire(hdev, MGMT_ADV_FLAG_APPEARANCE); + + ext_info_changed(hdev, sk); } err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_APPEARANCE, 0, NULL, From af4168c5a925dc3b11b0246c2b91124327919f47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Narajowski?= Date: Mon, 19 Sep 2016 14:33:33 +0200 Subject: [PATCH 1242/1715] Bluetooth: Set appearance only for LE capable controllers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Setting appearance on controllers without LE support will result in No Supported error. Signed-off-by: Michał Narajowski Signed-off-by: Johan Hedberg --- net/bluetooth/mgmt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index cd9f345894e0..7b2bac492fb1 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3178,6 +3178,10 @@ static int set_appearance(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG(""); + if (!lmp_le_capable(hdev)) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_APPEARANCE, + MGMT_STATUS_NOT_SUPPORTED); + apperance = le16_to_cpu(cp->appearance); hci_dev_lock(hdev); From 07b26c9454a2a19fff86d6fcf2aba6bc801eb8d8 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 19 Sep 2016 12:58:47 +0200 Subject: [PATCH 1243/1715] gso: Support partial splitting at the frag_list pointer Since commit 8a29111c7 ("net: gro: allow to build full sized skb") gro may build buffers with a frag_list. This can hurt forwarding because most NICs can't offload such packets, they need to be segmented in software. This patch splits buffers with a frag_list at the frag_list pointer into buffers that can be TSO offloaded. Signed-off-by: Steffen Klassert Acked-by: Alexander Duyck Signed-off-by: David S. Miller --- net/core/skbuff.c | 51 +++++++++++++++++++++++++++++++++--------- net/ipv4/af_inet.c | 14 ++++++++---- net/ipv4/gre_offload.c | 6 +++-- net/ipv4/tcp_offload.c | 13 ++++++----- net/ipv4/udp_offload.c | 6 +++-- net/ipv6/ip6_offload.c | 5 ++++- 6 files changed, 69 insertions(+), 26 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1e329d411242..7bf82a28e10a 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3097,11 +3097,31 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, sg = !!(features & NETIF_F_SG); csum = !!can_checksum_protocol(features, proto); - /* GSO partial only requires that we trim off any excess that - * doesn't fit into an MSS sized block, so take care of that - * now. - */ - if (sg && csum && (features & NETIF_F_GSO_PARTIAL)) { + if (sg && csum && (mss != GSO_BY_FRAGS)) { + if (!(features & NETIF_F_GSO_PARTIAL)) { + struct sk_buff *iter; + + if (!list_skb || + !net_gso_ok(features, skb_shinfo(head_skb)->gso_type)) + goto normal; + + /* Split the buffer at the frag_list pointer. + * This is based on the assumption that all + * buffers in the chain excluding the last + * containing the same amount of data. + */ + skb_walk_frags(head_skb, iter) { + if (skb_headlen(iter)) + goto normal; + + len -= iter->len; + } + } + + /* GSO partial only requires that we trim off any excess that + * doesn't fit into an MSS sized block, so take care of that + * now. + */ partial_segs = len / mss; if (partial_segs > 1) mss *= partial_segs; @@ -3109,6 +3129,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, partial_segs = 0; } +normal: headroom = skb_headroom(head_skb); pos = skb_headlen(head_skb); @@ -3300,21 +3321,29 @@ perform_csum_check: */ segs->prev = tail; - /* Update GSO info on first skb in partial sequence. */ if (partial_segs) { + struct sk_buff *iter; int type = skb_shinfo(head_skb)->gso_type; + unsigned short gso_size = skb_shinfo(head_skb)->gso_size; /* Update type to add partial and then remove dodgy if set */ - type |= SKB_GSO_PARTIAL; + type |= (features & NETIF_F_GSO_PARTIAL) / NETIF_F_GSO_PARTIAL * SKB_GSO_PARTIAL; type &= ~SKB_GSO_DODGY; /* Update GSO info and prepare to start updating headers on * our way back down the stack of protocols. */ - skb_shinfo(segs)->gso_size = skb_shinfo(head_skb)->gso_size; - skb_shinfo(segs)->gso_segs = partial_segs; - skb_shinfo(segs)->gso_type = type; - SKB_GSO_CB(segs)->data_offset = skb_headroom(segs) + doffset; + for (iter = segs; iter; iter = iter->next) { + skb_shinfo(iter)->gso_size = gso_size; + skb_shinfo(iter)->gso_segs = partial_segs; + skb_shinfo(iter)->gso_type = type; + SKB_GSO_CB(iter)->data_offset = skb_headroom(iter) + doffset; + } + + if (tail->len - doffset <= gso_size) + skb_shinfo(tail)->gso_size = 0; + else if (tail != segs) + skb_shinfo(tail)->gso_segs = DIV_ROUND_UP(tail->len - doffset, gso_size); } /* Following permits correct backpressure, for protocols diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index e94b47be0019..1effc986739e 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1192,7 +1192,7 @@ EXPORT_SYMBOL(inet_sk_rebuild_header); struct sk_buff *inet_gso_segment(struct sk_buff *skb, netdev_features_t features) { - bool udpfrag = false, fixedid = false, encap; + bool udpfrag = false, fixedid = false, gso_partial, encap; struct sk_buff *segs = ERR_PTR(-EINVAL); const struct net_offload *ops; unsigned int offset = 0; @@ -1245,6 +1245,8 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb, if (IS_ERR_OR_NULL(segs)) goto out; + gso_partial = !!(skb_shinfo(segs)->gso_type & SKB_GSO_PARTIAL); + skb = segs; do { iph = (struct iphdr *)(skb_mac_header(skb) + nhoff); @@ -1259,9 +1261,13 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb, iph->id = htons(id); id += skb_shinfo(skb)->gso_segs; } - tot_len = skb_shinfo(skb)->gso_size + - SKB_GSO_CB(skb)->data_offset + - skb->head - (unsigned char *)iph; + + if (gso_partial) + tot_len = skb_shinfo(skb)->gso_size + + SKB_GSO_CB(skb)->data_offset + + skb->head - (unsigned char *)iph; + else + tot_len = skb->len - nhoff; } else { if (!fixedid) iph->id = htons(id++); diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index ecd1e09dbbf1..96e0efecefa6 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -24,7 +24,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, __be16 protocol = skb->protocol; u16 mac_len = skb->mac_len; int gre_offset, outer_hlen; - bool need_csum, ufo; + bool need_csum, ufo, gso_partial; if (!skb->encapsulation) goto out; @@ -69,6 +69,8 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, goto out; } + gso_partial = !!(skb_shinfo(segs)->gso_type & SKB_GSO_PARTIAL); + outer_hlen = skb_tnl_header_len(skb); gre_offset = outer_hlen - tnl_hlen; skb = segs; @@ -96,7 +98,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, greh = (struct gre_base_hdr *)skb_transport_header(skb); pcsum = (__sum16 *)(greh + 1); - if (skb_is_gso(skb)) { + if (gso_partial) { unsigned int partial_adj; /* Adjust checksum to account for the fact that diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 5c5964962d0c..bc68da38ea86 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -90,12 +90,6 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, goto out; } - /* GSO partial only requires splitting the frame into an MSS - * multiple and possibly a remainder. So update the mss now. - */ - if (features & NETIF_F_GSO_PARTIAL) - mss = skb->len - (skb->len % mss); - copy_destructor = gso_skb->destructor == tcp_wfree; ooo_okay = gso_skb->ooo_okay; /* All segments but the first should have ooo_okay cleared */ @@ -108,6 +102,13 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, /* Only first segment might have ooo_okay set */ segs->ooo_okay = ooo_okay; + /* GSO partial and frag_list segmentation only requires splitting + * the frame into an MSS multiple and possibly a remainder, both + * cases return a GSO skb. So update the mss now. + */ + if (skb_is_gso(segs)) + mss *= skb_shinfo(segs)->gso_segs; + delta = htonl(oldlen + (thlen + mss)); skb = segs; diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 81f253b6ff36..f9333c963607 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -21,7 +21,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, __be16 new_protocol, bool is_ipv6) { int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); - bool remcsum, need_csum, offload_csum, ufo; + bool remcsum, need_csum, offload_csum, ufo, gso_partial; struct sk_buff *segs = ERR_PTR(-EINVAL); struct udphdr *uh = udp_hdr(skb); u16 mac_offset = skb->mac_header; @@ -88,6 +88,8 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, goto out; } + gso_partial = !!(skb_shinfo(segs)->gso_type & SKB_GSO_PARTIAL); + outer_hlen = skb_tnl_header_len(skb); udp_offset = outer_hlen - tnl_hlen; skb = segs; @@ -117,7 +119,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, * will be using a length value equal to only one MSS sized * segment instead of the entire frame. */ - if (skb_is_gso(skb)) { + if (gso_partial) { uh->len = htons(skb_shinfo(skb)->gso_size + SKB_GSO_CB(skb)->data_offset + skb->head - (unsigned char *)uh); diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 22e90e56b5a9..e7bfd55899a3 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -69,6 +69,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int offset = 0; bool encap, udpfrag; int nhoff; + bool gso_partial; skb_reset_network_header(skb); nhoff = skb_network_header(skb) - skb_mac_header(skb); @@ -101,9 +102,11 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, if (IS_ERR(segs)) goto out; + gso_partial = !!(skb_shinfo(segs)->gso_type & SKB_GSO_PARTIAL); + for (skb = segs; skb; skb = skb->next) { ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff); - if (skb_is_gso(skb)) + if (gso_partial) payload_len = skb_shinfo(skb)->gso_size + SKB_GSO_CB(skb)->data_offset + skb->head - (unsigned char *)(ipv6h + 1); From 8d6be8b627389c6dc7e0ea2455a7542c8a2a16a7 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 19 Sep 2016 03:58:00 -0400 Subject: [PATCH 1244/1715] bnxt_en: Use RSS flags defined in the bnxt_hsi.h file. And remove redundant definitions of the same flags. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 8 ++++---- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 5 ----- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 228c964e709a..cee0e8db7cc1 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -3419,10 +3419,10 @@ static int bnxt_hwrm_vnic_set_rss(struct bnxt *bp, u16 vnic_id, bool set_rss) bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VNIC_RSS_CFG, -1, -1); if (set_rss) { - vnic->hash_type = BNXT_RSS_HASH_TYPE_FLAG_IPV4 | - BNXT_RSS_HASH_TYPE_FLAG_TCP_IPV4 | - BNXT_RSS_HASH_TYPE_FLAG_IPV6 | - BNXT_RSS_HASH_TYPE_FLAG_TCP_IPV6; + vnic->hash_type = VNIC_RSS_CFG_REQ_HASH_TYPE_IPV4 | + VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV4 | + VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6 | + VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV6; req.hash_type = cpu_to_le32(vnic->hash_type); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 23e04a6142fb..db4814e927f7 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -389,11 +389,6 @@ struct rx_tpa_end_cmp_ext { #define INVALID_HW_RING_ID ((u16)-1) -#define BNXT_RSS_HASH_TYPE_FLAG_IPV4 0x01 -#define BNXT_RSS_HASH_TYPE_FLAG_TCP_IPV4 0x02 -#define BNXT_RSS_HASH_TYPE_FLAG_IPV6 0x04 -#define BNXT_RSS_HASH_TYPE_FLAG_TCP_IPV6 0x08 - /* The hardware supports certain page sizes. Use the supported page sizes * to allocate the rings. */ From adbc830545003c4b7494c903654bea22e5a66bb4 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 19 Sep 2016 03:58:01 -0400 Subject: [PATCH 1245/1715] bnxt_en: Simplify PCI device names and add additinal PCI IDs. Remove "Single-port/Dual-port" from the device names. Dual-port devices will appear as 2 separate devices, so no need to call each a dual-port device. Use a more generic name for VF devices belonging to the same chip fanmily. Add some remaining NPAR device IDs. Signed-off-by: David Christensen Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 68 ++++++++++++----------- 1 file changed, 36 insertions(+), 32 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index cee0e8db7cc1..d9b4cd1694ff 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -93,50 +93,49 @@ enum board_idx { BCM57404_NPAR, BCM57406_NPAR, BCM57407_SFP, + BCM57407_NPAR, BCM57414_NPAR, BCM57416_NPAR, - BCM57304_VF, - BCM57404_VF, - BCM57414_VF, - BCM57314_VF, + NETXTREME_E_VF, + NETXTREME_C_VF, }; /* indexed by enum above */ static const struct { char *name; } board_info[] = { - { "Broadcom BCM57301 NetXtreme-C Single-port 10Gb Ethernet" }, - { "Broadcom BCM57302 NetXtreme-C Dual-port 10Gb/25Gb Ethernet" }, - { "Broadcom BCM57304 NetXtreme-C Dual-port 10Gb/25Gb/40Gb/50Gb Ethernet" }, + { "Broadcom BCM57301 NetXtreme-C 10Gb Ethernet" }, + { "Broadcom BCM57302 NetXtreme-C 10Gb/25Gb Ethernet" }, + { "Broadcom BCM57304 NetXtreme-C 10Gb/25Gb/40Gb/50Gb Ethernet" }, { "Broadcom BCM57417 NetXtreme-E Ethernet Partition" }, - { "Broadcom BCM58700 Nitro 4-port 1Gb/2.5Gb/10Gb Ethernet" }, - { "Broadcom BCM57311 NetXtreme-C Single-port 10Gb Ethernet" }, - { "Broadcom BCM57312 NetXtreme-C Dual-port 10Gb/25Gb Ethernet" }, - { "Broadcom BCM57402 NetXtreme-E Dual-port 10Gb Ethernet" }, - { "Broadcom BCM57404 NetXtreme-E Dual-port 10Gb/25Gb Ethernet" }, - { "Broadcom BCM57406 NetXtreme-E Dual-port 10GBase-T Ethernet" }, + { "Broadcom BCM58700 Nitro 1Gb/2.5Gb/10Gb Ethernet" }, + { "Broadcom BCM57311 NetXtreme-C 10Gb Ethernet" }, + { "Broadcom BCM57312 NetXtreme-C 10Gb/25Gb Ethernet" }, + { "Broadcom BCM57402 NetXtreme-E 10Gb Ethernet" }, + { "Broadcom BCM57404 NetXtreme-E 10Gb/25Gb Ethernet" }, + { "Broadcom BCM57406 NetXtreme-E 10GBase-T Ethernet" }, { "Broadcom BCM57402 NetXtreme-E Ethernet Partition" }, - { "Broadcom BCM57407 NetXtreme-E Dual-port 10GBase-T Ethernet" }, - { "Broadcom BCM57412 NetXtreme-E Dual-port 10Gb Ethernet" }, - { "Broadcom BCM57414 NetXtreme-E Dual-port 10Gb/25Gb Ethernet" }, - { "Broadcom BCM57416 NetXtreme-E Dual-port 10GBase-T Ethernet" }, - { "Broadcom BCM57417 NetXtreme-E Dual-port 10GBase-T Ethernet" }, + { "Broadcom BCM57407 NetXtreme-E 10GBase-T Ethernet" }, + { "Broadcom BCM57412 NetXtreme-E 10Gb Ethernet" }, + { "Broadcom BCM57414 NetXtreme-E 10Gb/25Gb Ethernet" }, + { "Broadcom BCM57416 NetXtreme-E 10GBase-T Ethernet" }, + { "Broadcom BCM57417 NetXtreme-E 10GBase-T Ethernet" }, { "Broadcom BCM57412 NetXtreme-E Ethernet Partition" }, - { "Broadcom BCM57314 NetXtreme-C Dual-port 10Gb/25Gb/40Gb/50Gb Ethernet" }, - { "Broadcom BCM57417 NetXtreme-E Dual-port 10Gb/25Gb Ethernet" }, - { "Broadcom BCM57416 NetXtreme-E Dual-port 10Gb Ethernet" }, + { "Broadcom BCM57314 NetXtreme-C 10Gb/25Gb/40Gb/50Gb Ethernet" }, + { "Broadcom BCM57417 NetXtreme-E 10Gb/25Gb Ethernet" }, + { "Broadcom BCM57416 NetXtreme-E 10Gb Ethernet" }, { "Broadcom BCM57404 NetXtreme-E Ethernet Partition" }, { "Broadcom BCM57406 NetXtreme-E Ethernet Partition" }, - { "Broadcom BCM57407 NetXtreme-E Dual-port 25Gb Ethernet" }, + { "Broadcom BCM57407 NetXtreme-E 25Gb Ethernet" }, + { "Broadcom BCM57407 NetXtreme-E Ethernet Partition" }, { "Broadcom BCM57414 NetXtreme-E Ethernet Partition" }, { "Broadcom BCM57416 NetXtreme-E Ethernet Partition" }, - { "Broadcom BCM57304 NetXtreme-C Ethernet Virtual Function" }, - { "Broadcom BCM57404 NetXtreme-E Ethernet Virtual Function" }, - { "Broadcom BCM57414 NetXtreme-E Ethernet Virtual Function" }, - { "Broadcom BCM57314 NetXtreme-E Ethernet Virtual Function" }, + { "Broadcom NetXtreme-E Ethernet Virtual Function" }, + { "Broadcom NetXtreme-C Ethernet Virtual Function" }, }; static const struct pci_device_id bnxt_pci_tbl[] = { + { PCI_VDEVICE(BROADCOM, 0x16c0), .driver_data = BCM57417_NPAR }, { PCI_VDEVICE(BROADCOM, 0x16c8), .driver_data = BCM57301 }, { PCI_VDEVICE(BROADCOM, 0x16c9), .driver_data = BCM57302 }, { PCI_VDEVICE(BROADCOM, 0x16ca), .driver_data = BCM57304 }, @@ -160,13 +159,19 @@ static const struct pci_device_id bnxt_pci_tbl[] = { { PCI_VDEVICE(BROADCOM, 0x16e7), .driver_data = BCM57404_NPAR }, { PCI_VDEVICE(BROADCOM, 0x16e8), .driver_data = BCM57406_NPAR }, { PCI_VDEVICE(BROADCOM, 0x16e9), .driver_data = BCM57407_SFP }, + { PCI_VDEVICE(BROADCOM, 0x16ea), .driver_data = BCM57407_NPAR }, + { PCI_VDEVICE(BROADCOM, 0x16eb), .driver_data = BCM57412_NPAR }, { PCI_VDEVICE(BROADCOM, 0x16ec), .driver_data = BCM57414_NPAR }, + { PCI_VDEVICE(BROADCOM, 0x16ed), .driver_data = BCM57414_NPAR }, { PCI_VDEVICE(BROADCOM, 0x16ee), .driver_data = BCM57416_NPAR }, + { PCI_VDEVICE(BROADCOM, 0x16ef), .driver_data = BCM57416_NPAR }, #ifdef CONFIG_BNXT_SRIOV - { PCI_VDEVICE(BROADCOM, 0x16cb), .driver_data = BCM57304_VF }, - { PCI_VDEVICE(BROADCOM, 0x16d3), .driver_data = BCM57404_VF }, - { PCI_VDEVICE(BROADCOM, 0x16dc), .driver_data = BCM57414_VF }, - { PCI_VDEVICE(BROADCOM, 0x16e1), .driver_data = BCM57314_VF }, + { PCI_VDEVICE(BROADCOM, 0x16c1), .driver_data = NETXTREME_E_VF }, + { PCI_VDEVICE(BROADCOM, 0x16cb), .driver_data = NETXTREME_C_VF }, + { PCI_VDEVICE(BROADCOM, 0x16d3), .driver_data = NETXTREME_E_VF }, + { PCI_VDEVICE(BROADCOM, 0x16dc), .driver_data = NETXTREME_E_VF }, + { PCI_VDEVICE(BROADCOM, 0x16e1), .driver_data = NETXTREME_C_VF }, + { PCI_VDEVICE(BROADCOM, 0x16e5), .driver_data = NETXTREME_C_VF }, #endif { 0 } }; @@ -189,8 +194,7 @@ static const u16 bnxt_async_events_arr[] = { static bool bnxt_vf_pciid(enum board_idx idx) { - return (idx == BCM57304_VF || idx == BCM57404_VF || - idx == BCM57314_VF || idx == BCM57414_VF); + return (idx == NETXTREME_C_VF || idx == NETXTREME_E_VF); } #define DB_CP_REARM_FLAGS (DB_KEY_CP | DB_IDX_VALID) From 441cabbbf1bd0b99e283c9116fe430e53ee67a4a Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 19 Sep 2016 03:58:02 -0400 Subject: [PATCH 1246/1715] bnxt_en: Update to firmware interface spec 1.5.1. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 + drivers/net/ethernet/broadcom/bnxt/bnxt.h | 14 +- drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h | 1251 ++++++++++------- 3 files changed, 760 insertions(+), 508 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index d9b4cd1694ff..f6b4f342d7b4 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4253,6 +4253,9 @@ static int bnxt_hwrm_queue_qportcfg(struct bnxt *bp) if (bp->max_tc > BNXT_MAX_QUEUE) bp->max_tc = BNXT_MAX_QUEUE; + if (resp->queue_cfg_info & QUEUE_QPORTCFG_RESP_QUEUE_CFG_INFO_ASYM_CFG) + bp->max_tc = 1; + qptr = &resp->queue_id0; for (i = 0; i < bp->max_tc; i++) { bp->q_info[i].queue_id = *qptr++; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index db4814e927f7..012cc51440b7 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -11,10 +11,10 @@ #define BNXT_H #define DRV_MODULE_NAME "bnxt_en" -#define DRV_MODULE_VERSION "1.3.0" +#define DRV_MODULE_VERSION "1.5.0" #define DRV_VER_MAJ 1 -#define DRV_VER_MIN 3 +#define DRV_VER_MIN 5 #define DRV_VER_UPD 0 struct tx_bd { @@ -106,11 +106,11 @@ struct tx_cmp { #define CMP_TYPE_REMOTE_DRIVER_REQ 34 #define CMP_TYPE_REMOTE_DRIVER_RESP 36 #define CMP_TYPE_ERROR_STATUS 48 - #define CMPL_BASE_TYPE_STAT_EJECT (0x1aUL << 0) - #define CMPL_BASE_TYPE_HWRM_DONE (0x20UL << 0) - #define CMPL_BASE_TYPE_HWRM_FWD_REQ (0x22UL << 0) - #define CMPL_BASE_TYPE_HWRM_FWD_RESP (0x24UL << 0) - #define CMPL_BASE_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define CMPL_BASE_TYPE_STAT_EJECT 0x1aUL + #define CMPL_BASE_TYPE_HWRM_DONE 0x20UL + #define CMPL_BASE_TYPE_HWRM_FWD_REQ 0x22UL + #define CMPL_BASE_TYPE_HWRM_FWD_RESP 0x24UL + #define CMPL_BASE_TYPE_HWRM_ASYNC_EVENT 0x2eUL #define TX_CMP_FLAGS_ERROR (1 << 6) #define TX_CMP_FLAGS_PUSH (1 << 7) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h index 517567f6d651..04a96cc3498a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h @@ -39,7 +39,7 @@ struct eject_cmpl { __le16 type; #define EJECT_CMPL_TYPE_MASK 0x3fUL #define EJECT_CMPL_TYPE_SFT 0 - #define EJECT_CMPL_TYPE_STAT_EJECT (0x1aUL << 0) + #define EJECT_CMPL_TYPE_STAT_EJECT 0x1aUL __le16 len; __le32 opaque; __le32 v; @@ -52,7 +52,7 @@ struct hwrm_cmpl { __le16 type; #define HWRM_CMPL_TYPE_MASK 0x3fUL #define HWRM_CMPL_TYPE_SFT 0 - #define HWRM_CMPL_TYPE_HWRM_DONE (0x20UL << 0) + #define HWRM_CMPL_TYPE_HWRM_DONE 0x20UL __le16 sequence_id; __le32 unused_1; __le32 v; @@ -65,7 +65,7 @@ struct hwrm_fwd_req_cmpl { __le16 req_len_type; #define HWRM_FWD_REQ_CMPL_TYPE_MASK 0x3fUL #define HWRM_FWD_REQ_CMPL_TYPE_SFT 0 - #define HWRM_FWD_REQ_CMPL_TYPE_HWRM_FWD_REQ (0x22UL << 0) + #define HWRM_FWD_REQ_CMPL_TYPE_HWRM_FWD_REQ 0x22UL #define HWRM_FWD_REQ_CMPL_REQ_LEN_MASK 0xffc0UL #define HWRM_FWD_REQ_CMPL_REQ_LEN_SFT 6 __le16 source_id; @@ -81,7 +81,7 @@ struct hwrm_fwd_resp_cmpl { __le16 type; #define HWRM_FWD_RESP_CMPL_TYPE_MASK 0x3fUL #define HWRM_FWD_RESP_CMPL_TYPE_SFT 0 - #define HWRM_FWD_RESP_CMPL_TYPE_HWRM_FWD_RESP (0x24UL << 0) + #define HWRM_FWD_RESP_CMPL_TYPE_HWRM_FWD_RESP 0x24UL __le16 source_id; __le16 resp_len; __le16 unused_1; @@ -96,25 +96,26 @@ struct hwrm_async_event_cmpl { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_STATUS_CHANGE (0x0UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_MTU_CHANGE (0x1UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CHANGE (0x2UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE (0x3UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_CONN_NOT_ALLOWED (0x4UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_NOT_ALLOWED (0x5UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE (0x6UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_PHY_CFG_CHANGE (0x7UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_FUNC_DRVR_UNLOAD (0x10UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_FUNC_DRVR_LOAD (0x11UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_UNLOAD (0x20UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_LOAD (0x21UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_FLR (0x30UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_MAC_ADDR_CHANGE (0x31UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_VF_COMM_STATUS_CHANGE (0x32UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_CFG_CHANGE (0x33UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR (0xffUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_STATUS_CHANGE 0x0UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_MTU_CHANGE 0x1UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CHANGE 0x2UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE 0x3UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_CONN_NOT_ALLOWED 0x4UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_NOT_ALLOWED 0x5UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE 0x6UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_PHY_CFG_CHANGE 0x7UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_FUNC_DRVR_UNLOAD 0x10UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_FUNC_DRVR_LOAD 0x11UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_FUNC_FLR_PROC_CMPLT 0x12UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_UNLOAD 0x20UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_LOAD 0x21UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_FLR 0x30UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_MAC_ADDR_CHANGE 0x31UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_VF_COMM_STATUS_CHANGE 0x32UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_CFG_CHANGE 0x33UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR 0xffUL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_V 0x1UL @@ -130,9 +131,9 @@ struct hwrm_async_event_cmpl_link_status_change { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_EVENT_ID_LINK_STATUS_CHANGE (0x0UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_EVENT_ID_LINK_STATUS_CHANGE 0x0UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_V 0x1UL @@ -156,9 +157,9 @@ struct hwrm_async_event_cmpl_link_mtu_change { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_EVENT_ID_LINK_MTU_CHANGE (0x1UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_EVENT_ID_LINK_MTU_CHANGE 0x1UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_V 0x1UL @@ -176,9 +177,9 @@ struct hwrm_async_event_cmpl_link_speed_change { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_ID_LINK_SPEED_CHANGE (0x2UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_ID_LINK_SPEED_CHANGE 0x2UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_V 0x1UL @@ -200,8 +201,7 @@ struct hwrm_async_event_cmpl_link_speed_change { #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_40GB (0x190UL << 1) #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_50GB (0x1f4UL << 1) #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_100GB (0x3e8UL << 1) - #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_10MB (0xffffUL << 1) - #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_LAST HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_10MB + #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_LAST HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_100GB #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_PORT_ID_MASK 0xffff0000UL #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_PORT_ID_SFT 16 }; @@ -211,9 +211,9 @@ struct hwrm_async_event_cmpl_dcb_config_change { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_EVENT_ID_DCB_CONFIG_CHANGE (0x3UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_EVENT_ID_DCB_CONFIG_CHANGE 0x3UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_V 0x1UL @@ -231,9 +231,9 @@ struct hwrm_async_event_cmpl_port_conn_not_allowed { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_ID_PORT_CONN_NOT_ALLOWED (0x4UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_ID_PORT_CONN_NOT_ALLOWED 0x4UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_V 0x1UL @@ -258,9 +258,9 @@ struct hwrm_async_event_cmpl_link_speed_cfg_not_allowed { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_EVENT_ID_LINK_SPEED_CFG_NOT_ALLOWED (0x5UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_EVENT_ID_LINK_SPEED_CFG_NOT_ALLOWED 0x5UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_V 0x1UL @@ -278,9 +278,9 @@ struct hwrm_async_event_cmpl_link_speed_cfg_change { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_EVENT_ID_LINK_SPEED_CFG_CHANGE (0x6UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_EVENT_ID_LINK_SPEED_CFG_CHANGE 0x6UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_V 0x1UL @@ -300,9 +300,9 @@ struct hwrm_async_event_cmpl_func_drvr_unload { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_EVENT_ID_FUNC_DRVR_UNLOAD (0x10UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_EVENT_ID_FUNC_DRVR_UNLOAD 0x10UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_V 0x1UL @@ -320,9 +320,9 @@ struct hwrm_async_event_cmpl_func_drvr_load { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_EVENT_ID_FUNC_DRVR_LOAD (0x11UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_EVENT_ID_FUNC_DRVR_LOAD 0x11UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_V 0x1UL @@ -340,9 +340,9 @@ struct hwrm_async_event_cmpl_pf_drvr_unload { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_EVENT_ID_PF_DRVR_UNLOAD (0x20UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_EVENT_ID_PF_DRVR_UNLOAD 0x20UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_V 0x1UL @@ -362,9 +362,9 @@ struct hwrm_async_event_cmpl_pf_drvr_load { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_LOAD_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_LOAD_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_LOAD_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_LOAD_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_LOAD_EVENT_ID_PF_DRVR_LOAD (0x21UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_LOAD_EVENT_ID_PF_DRVR_LOAD 0x21UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_LOAD_V 0x1UL @@ -384,9 +384,9 @@ struct hwrm_async_event_cmpl_vf_flr { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_VF_FLR_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_VF_FLR_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_VF_FLR_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_VF_FLR_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_VF_FLR_EVENT_ID_VF_FLR (0x30UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_VF_FLR_EVENT_ID_VF_FLR 0x30UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_VF_FLR_V 0x1UL @@ -404,9 +404,9 @@ struct hwrm_async_event_cmpl_vf_mac_addr_change { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_EVENT_ID_VF_MAC_ADDR_CHANGE (0x31UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_EVENT_ID_VF_MAC_ADDR_CHANGE 0x31UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_V 0x1UL @@ -424,9 +424,9 @@ struct hwrm_async_event_cmpl_pf_vf_comm_status_change { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_EVENT_ID_PF_VF_COMM_STATUS_CHANGE (0x32UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_EVENT_ID_PF_VF_COMM_STATUS_CHANGE 0x32UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_V 0x1UL @@ -443,9 +443,9 @@ struct hwrm_async_event_cmpl_vf_cfg_change { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_VF_CFG_CHANGE_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_VF_CFG_CHANGE_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_VF_CFG_CHANGE_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_VF_CFG_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_VF_CFG_CHANGE_EVENT_ID_VF_CFG_CHANGE (0x33UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_VF_CFG_CHANGE_EVENT_ID_VF_CFG_CHANGE 0x33UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_VF_CFG_CHANGE_V 0x1UL @@ -465,15 +465,15 @@ struct hwrm_async_event_cmpl_hwrm_error { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_ID_HWRM_ERROR (0xffUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_ID_HWRM_ERROR 0xffUL __le32 event_data2; #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_MASK 0xffUL #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_WARNING (0x0UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_NONFATAL (0x1UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_FATAL (0x2UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_WARNING 0x0UL + #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_NONFATAL 0x1UL + #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_FATAL 0x2UL #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_LAST HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_FATAL u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_V 0x1UL @@ -485,12 +485,12 @@ struct hwrm_async_event_cmpl_hwrm_error { #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA1_TIMESTAMP 0x1UL }; -/* HW Resource Manager Specification 1.3.0 */ +/* HW Resource Manager Specification 1.5.1 */ #define HWRM_VERSION_MAJOR 1 -#define HWRM_VERSION_MINOR 3 -#define HWRM_VERSION_UPDATE 0 +#define HWRM_VERSION_MINOR 5 +#define HWRM_VERSION_UPDATE 1 -#define HWRM_VERSION_STR "1.3.0" +#define HWRM_VERSION_STR "1.5.1" /* * Following is the signature for HWRM message field that indicates not * applicable (All F's). Need to cast it the size of the field if needed. @@ -556,8 +556,8 @@ struct cmd_nums { #define HWRM_QUEUE_QPORTCFG (0x30UL) #define HWRM_QUEUE_QCFG (0x31UL) #define HWRM_QUEUE_CFG (0x32UL) - #define HWRM_QUEUE_BUFFERS_QCFG (0x33UL) - #define HWRM_QUEUE_BUFFERS_CFG (0x34UL) + #define RESERVED2 (0x33UL) + #define RESERVED3 (0x34UL) #define HWRM_QUEUE_PFCENABLE_QCFG (0x35UL) #define HWRM_QUEUE_PFCENABLE_CFG (0x36UL) #define HWRM_QUEUE_PRI2COS_QCFG (0x37UL) @@ -574,6 +574,7 @@ struct cmd_nums { #define HWRM_VNIC_RSS_QCFG (0x47UL) #define HWRM_VNIC_PLCMODES_CFG (0x48UL) #define HWRM_VNIC_PLCMODES_QCFG (0x49UL) + #define HWRM_VNIC_QCAPS (0x4aUL) #define HWRM_RING_ALLOC (0x50UL) #define HWRM_RING_FREE (0x51UL) #define HWRM_RING_CMPL_RING_QAGGINT_PARAMS (0x52UL) @@ -581,13 +582,15 @@ struct cmd_nums { #define HWRM_RING_RESET (0x5eUL) #define HWRM_RING_GRP_ALLOC (0x60UL) #define HWRM_RING_GRP_FREE (0x61UL) + #define RESERVED5 (0x64UL) + #define RESERVED6 (0x65UL) #define HWRM_VNIC_RSS_COS_LB_CTX_ALLOC (0x70UL) #define HWRM_VNIC_RSS_COS_LB_CTX_FREE (0x71UL) #define HWRM_CFA_L2_FILTER_ALLOC (0x90UL) #define HWRM_CFA_L2_FILTER_FREE (0x91UL) #define HWRM_CFA_L2_FILTER_CFG (0x92UL) #define HWRM_CFA_L2_SET_RX_MASK (0x93UL) - #define RESERVED3 (0x94UL) + #define RESERVED4 (0x94UL) #define HWRM_CFA_TUNNEL_FILTER_ALLOC (0x95UL) #define HWRM_CFA_TUNNEL_FILTER_FREE (0x96UL) #define HWRM_CFA_ENCAP_RECORD_ALLOC (0x97UL) @@ -607,6 +610,8 @@ struct cmd_nums { #define HWRM_STAT_CTX_CLR_STATS (0xb3UL) #define HWRM_FW_RESET (0xc0UL) #define HWRM_FW_QSTATUS (0xc1UL) + #define HWRM_FW_SET_TIME (0xc8UL) + #define HWRM_FW_GET_TIME (0xc9UL) #define HWRM_EXEC_FWD_RESP (0xd0UL) #define HWRM_REJECT_FWD_RESP (0xd1UL) #define HWRM_FWD_RESP (0xd2UL) @@ -615,11 +620,13 @@ struct cmd_nums { #define HWRM_WOL_FILTER_ALLOC (0xf0UL) #define HWRM_WOL_FILTER_FREE (0xf1UL) #define HWRM_WOL_FILTER_QCFG (0xf2UL) + #define HWRM_WOL_REASON_QCFG (0xf3UL) #define HWRM_DBG_READ_DIRECT (0xff10UL) #define HWRM_DBG_READ_INDIRECT (0xff11UL) #define HWRM_DBG_WRITE_DIRECT (0xff12UL) #define HWRM_DBG_WRITE_INDIRECT (0xff13UL) #define HWRM_DBG_DUMP (0xff14UL) + #define HWRM_NVM_INSTALL_UPDATE (0xfff3UL) #define HWRM_NVM_MODIFY (0xfff4UL) #define HWRM_NVM_VERIFY_UPDATE (0xfff5UL) #define HWRM_NVM_GET_DEV_INFO (0xfff6UL) @@ -824,7 +831,9 @@ struct hwrm_ver_get_output { u8 netctrl_fw_min; u8 netctrl_fw_bld; u8 netctrl_fw_rsvd; - __le32 reserved1; + __le32 dev_caps_cfg; + #define VER_GET_RESP_DEV_CAPS_CFG_SECURE_FW_UPD_SUPPORTED 0x1UL + #define VER_GET_RESP_DEV_CAPS_CFG_FW_DCBX_AGENT_SUPPORTED 0x2UL u8 roce_fw_maj; u8 roce_fw_min; u8 roce_fw_bld; @@ -839,9 +848,9 @@ struct hwrm_ver_get_output { u8 chip_metal; u8 chip_bond_id; u8 chip_platform_type; - #define VER_GET_RESP_CHIP_PLATFORM_TYPE_ASIC (0x0UL << 0) - #define VER_GET_RESP_CHIP_PLATFORM_TYPE_FPGA (0x1UL << 0) - #define VER_GET_RESP_CHIP_PLATFORM_TYPE_PALLADIUM (0x2UL << 0) + #define VER_GET_RESP_CHIP_PLATFORM_TYPE_ASIC 0x0UL + #define VER_GET_RESP_CHIP_PLATFORM_TYPE_FPGA 0x1UL + #define VER_GET_RESP_CHIP_PLATFORM_TYPE_PALLADIUM 0x2UL __le16 max_req_win_len; __le16 max_resp_len; __le16 def_req_timeout; @@ -863,10 +872,10 @@ struct hwrm_func_reset_input { #define FUNC_RESET_REQ_ENABLES_VF_ID_VALID 0x1UL __le16 vf_id; u8 func_reset_level; - #define FUNC_RESET_REQ_FUNC_RESET_LEVEL_RESETALL (0x0UL << 0) - #define FUNC_RESET_REQ_FUNC_RESET_LEVEL_RESETME (0x1UL << 0) - #define FUNC_RESET_REQ_FUNC_RESET_LEVEL_RESETCHILDREN (0x2UL << 0) - #define FUNC_RESET_REQ_FUNC_RESET_LEVEL_RESETVF (0x3UL << 0) + #define FUNC_RESET_REQ_FUNC_RESET_LEVEL_RESETALL 0x0UL + #define FUNC_RESET_REQ_FUNC_RESET_LEVEL_RESETME 0x1UL + #define FUNC_RESET_REQ_FUNC_RESET_LEVEL_RESETCHILDREN 0x2UL + #define FUNC_RESET_REQ_FUNC_RESET_LEVEL_RESETVF 0x3UL u8 unused_0; }; @@ -1028,6 +1037,10 @@ struct hwrm_func_qcaps_output { #define FUNC_QCAPS_RESP_FLAGS_ROCE_V2_SUPPORTED 0x10UL #define FUNC_QCAPS_RESP_FLAGS_WOL_MAGICPKT_SUPPORTED 0x20UL #define FUNC_QCAPS_RESP_FLAGS_WOL_BMP_SUPPORTED 0x40UL + #define FUNC_QCAPS_RESP_FLAGS_TX_RING_RL_SUPPORTED 0x80UL + #define FUNC_QCAPS_RESP_FLAGS_TX_BW_CFG_SUPPORTED 0x100UL + #define FUNC_QCAPS_RESP_FLAGS_VF_TX_RING_RL_SUPPORTED 0x200UL + #define FUNC_QCAPS_RESP_FLAGS_VF_BW_CFG_SUPPORTED 0x400UL u8 mac_address[6]; __le16 max_rsscos_ctx; __le16 max_cmpl_rings; @@ -1047,9 +1060,8 @@ struct hwrm_func_qcaps_output { __le32 max_mcast_filters; __le32 max_flow_id; __le32 max_hw_ring_grps; + __le16 max_sp_tx_rings; u8 unused_0; - u8 unused_1; - u8 unused_2; u8 valid; }; @@ -1077,6 +1089,7 @@ struct hwrm_func_qcfg_output { __le16 flags; #define FUNC_QCFG_RESP_FLAGS_OOB_WOL_MAGICPKT_ENABLED 0x1UL #define FUNC_QCFG_RESP_FLAGS_OOB_WOL_BMP_ENABLED 0x2UL + #define FUNC_QCFG_RESP_FLAGS_FW_DCBX_AGENT_ENABLED 0x4UL u8 mac_address[6]; __le16 pci_id; __le16 alloc_rsscos_ctx; @@ -1089,29 +1102,46 @@ struct hwrm_func_qcfg_output { __le16 mru; __le16 stat_ctx_id; u8 port_partition_type; - #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_SPF (0x0UL << 0) - #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_MPFS (0x1UL << 0) - #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_0 (0x2UL << 0) - #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_5 (0x3UL << 0) - #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR2_0 (0x4UL << 0) - #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_UNKNOWN (0xffUL << 0) + #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_SPF 0x0UL + #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_MPFS 0x1UL + #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_0 0x2UL + #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_5 0x3UL + #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR2_0 0x4UL + #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_UNKNOWN 0xffUL u8 unused_0; __le16 dflt_vnic_id; u8 unused_1; u8 unused_2; __le32 min_bw; + #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_MASK 0xfffffffUL + #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_SFT 0 + #define FUNC_QCFG_RESP_MIN_BW_RSVD 0x10000000UL + #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_UNIT_SFT 29 + #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_UNIT_LAST FUNC_QCFG_RESP_MIN_BW_BW_VALUE_UNIT_INVALID __le32 max_bw; + #define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_SFT 0 + #define FUNC_QCFG_RESP_MAX_BW_RSVD 0x10000000UL + #define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_UNIT_LAST FUNC_QCFG_RESP_MAX_BW_BW_VALUE_UNIT_INVALID u8 evb_mode; - #define FUNC_QCFG_RESP_EVB_MODE_NO_EVB (0x0UL << 0) - #define FUNC_QCFG_RESP_EVB_MODE_VEB (0x1UL << 0) - #define FUNC_QCFG_RESP_EVB_MODE_VEPA (0x2UL << 0) + #define FUNC_QCFG_RESP_EVB_MODE_NO_EVB 0x0UL + #define FUNC_QCFG_RESP_EVB_MODE_VEB 0x1UL + #define FUNC_QCFG_RESP_EVB_MODE_VEPA 0x2UL u8 unused_3; - __le16 unused_4; + __le16 alloc_vfs; __le32 alloc_mcast_filters; __le32 alloc_hw_ring_grps; - u8 unused_5; - u8 unused_6; - u8 unused_7; + __le16 alloc_sp_tx_rings; + u8 unused_4; u8 valid; }; @@ -1171,18 +1201,36 @@ struct hwrm_func_cfg_input { __le16 dflt_vlan; __be32 dflt_ip_addr[4]; __le32 min_bw; + #define FUNC_CFG_REQ_MIN_BW_BW_VALUE_MASK 0xfffffffUL + #define FUNC_CFG_REQ_MIN_BW_BW_VALUE_SFT 0 + #define FUNC_CFG_REQ_MIN_BW_RSVD 0x10000000UL + #define FUNC_CFG_REQ_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define FUNC_CFG_REQ_MIN_BW_BW_VALUE_UNIT_SFT 29 + #define FUNC_CFG_REQ_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define FUNC_CFG_REQ_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define FUNC_CFG_REQ_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define FUNC_CFG_REQ_MIN_BW_BW_VALUE_UNIT_LAST FUNC_CFG_REQ_MIN_BW_BW_VALUE_UNIT_INVALID __le32 max_bw; + #define FUNC_CFG_REQ_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define FUNC_CFG_REQ_MAX_BW_BW_VALUE_SFT 0 + #define FUNC_CFG_REQ_MAX_BW_RSVD 0x10000000UL + #define FUNC_CFG_REQ_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define FUNC_CFG_REQ_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define FUNC_CFG_REQ_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define FUNC_CFG_REQ_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define FUNC_CFG_REQ_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define FUNC_CFG_REQ_MAX_BW_BW_VALUE_UNIT_LAST FUNC_CFG_REQ_MAX_BW_BW_VALUE_UNIT_INVALID __le16 async_event_cr; u8 vlan_antispoof_mode; - #define FUNC_CFG_REQ_VLAN_ANTISPOOF_MODE_NOCHECK (0x0UL << 0) - #define FUNC_CFG_REQ_VLAN_ANTISPOOF_MODE_VALIDATE_VLAN (0x1UL << 0) - #define FUNC_CFG_REQ_VLAN_ANTISPOOF_MODE_INSERT_IF_VLANDNE (0x2UL << 0) - #define FUNC_CFG_REQ_VLAN_ANTISPOOF_MODE_INSERT_OR_OVERRIDE_VLAN (0x3UL << 0) + #define FUNC_CFG_REQ_VLAN_ANTISPOOF_MODE_NOCHECK 0x0UL + #define FUNC_CFG_REQ_VLAN_ANTISPOOF_MODE_VALIDATE_VLAN 0x1UL + #define FUNC_CFG_REQ_VLAN_ANTISPOOF_MODE_INSERT_IF_VLANDNE 0x2UL + #define FUNC_CFG_REQ_VLAN_ANTISPOOF_MODE_INSERT_OR_OVERRIDE_VLAN 0x3UL u8 allowed_vlan_pris; u8 evb_mode; - #define FUNC_CFG_REQ_EVB_MODE_NO_EVB (0x0UL << 0) - #define FUNC_CFG_REQ_EVB_MODE_VEB (0x1UL << 0) - #define FUNC_CFG_REQ_EVB_MODE_VEPA (0x2UL << 0) + #define FUNC_CFG_REQ_EVB_MODE_NO_EVB 0x0UL + #define FUNC_CFG_REQ_EVB_MODE_VEB 0x1UL + #define FUNC_CFG_REQ_EVB_MODE_VEPA 0x2UL u8 unused_2; __le16 num_mcast_filters; }; @@ -1341,16 +1389,16 @@ struct hwrm_func_drv_rgtr_input { #define FUNC_DRV_RGTR_REQ_ENABLES_VF_REQ_FWD 0x8UL #define FUNC_DRV_RGTR_REQ_ENABLES_ASYNC_EVENT_FWD 0x10UL __le16 os_type; - #define FUNC_DRV_RGTR_REQ_OS_TYPE_UNKNOWN (0x0UL << 0) - #define FUNC_DRV_RGTR_REQ_OS_TYPE_OTHER (0x1UL << 0) - #define FUNC_DRV_RGTR_REQ_OS_TYPE_MSDOS (0xeUL << 0) - #define FUNC_DRV_RGTR_REQ_OS_TYPE_WINDOWS (0x12UL << 0) - #define FUNC_DRV_RGTR_REQ_OS_TYPE_SOLARIS (0x1dUL << 0) - #define FUNC_DRV_RGTR_REQ_OS_TYPE_LINUX (0x24UL << 0) - #define FUNC_DRV_RGTR_REQ_OS_TYPE_FREEBSD (0x2aUL << 0) - #define FUNC_DRV_RGTR_REQ_OS_TYPE_ESXI (0x68UL << 0) - #define FUNC_DRV_RGTR_REQ_OS_TYPE_WIN864 (0x73UL << 0) - #define FUNC_DRV_RGTR_REQ_OS_TYPE_WIN2012R2 (0x74UL << 0) + #define FUNC_DRV_RGTR_REQ_OS_TYPE_UNKNOWN 0x0UL + #define FUNC_DRV_RGTR_REQ_OS_TYPE_OTHER 0x1UL + #define FUNC_DRV_RGTR_REQ_OS_TYPE_MSDOS 0xeUL + #define FUNC_DRV_RGTR_REQ_OS_TYPE_WINDOWS 0x12UL + #define FUNC_DRV_RGTR_REQ_OS_TYPE_SOLARIS 0x1dUL + #define FUNC_DRV_RGTR_REQ_OS_TYPE_LINUX 0x24UL + #define FUNC_DRV_RGTR_REQ_OS_TYPE_FREEBSD 0x2aUL + #define FUNC_DRV_RGTR_REQ_OS_TYPE_ESXI 0x68UL + #define FUNC_DRV_RGTR_REQ_OS_TYPE_WIN864 0x73UL + #define FUNC_DRV_RGTR_REQ_OS_TYPE_WIN2012R2 0x74UL u8 ver_maj; u8 ver_min; u8 ver_upd; @@ -1415,13 +1463,13 @@ struct hwrm_func_buf_rgtr_input { __le16 vf_id; __le16 req_buf_num_pages; __le16 req_buf_page_size; - #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_16B (0x4UL << 0) - #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_4K (0xcUL << 0) - #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_8K (0xdUL << 0) - #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_64K (0x10UL << 0) - #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_2M (0x15UL << 0) - #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_4M (0x16UL << 0) - #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_1G (0x1eUL << 0) + #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_16B 0x4UL + #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_4K 0xcUL + #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_8K 0xdUL + #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_64K 0x10UL + #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_2M 0x15UL + #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_4M 0x16UL + #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_1G 0x1eUL __le16 req_buf_len; __le16 resp_buf_len; u8 unused_0; @@ -1473,16 +1521,16 @@ struct hwrm_func_drv_qver_output { __le16 seq_id; __le16 resp_len; __le16 os_type; - #define FUNC_DRV_QVER_RESP_OS_TYPE_UNKNOWN (0x0UL << 0) - #define FUNC_DRV_QVER_RESP_OS_TYPE_OTHER (0x1UL << 0) - #define FUNC_DRV_QVER_RESP_OS_TYPE_MSDOS (0xeUL << 0) - #define FUNC_DRV_QVER_RESP_OS_TYPE_WINDOWS (0x12UL << 0) - #define FUNC_DRV_QVER_RESP_OS_TYPE_SOLARIS (0x1dUL << 0) - #define FUNC_DRV_QVER_RESP_OS_TYPE_LINUX (0x24UL << 0) - #define FUNC_DRV_QVER_RESP_OS_TYPE_FREEBSD (0x2aUL << 0) - #define FUNC_DRV_QVER_RESP_OS_TYPE_ESXI (0x68UL << 0) - #define FUNC_DRV_QVER_RESP_OS_TYPE_WIN864 (0x73UL << 0) - #define FUNC_DRV_QVER_RESP_OS_TYPE_WIN2012R2 (0x74UL << 0) + #define FUNC_DRV_QVER_RESP_OS_TYPE_UNKNOWN 0x0UL + #define FUNC_DRV_QVER_RESP_OS_TYPE_OTHER 0x1UL + #define FUNC_DRV_QVER_RESP_OS_TYPE_MSDOS 0xeUL + #define FUNC_DRV_QVER_RESP_OS_TYPE_WINDOWS 0x12UL + #define FUNC_DRV_QVER_RESP_OS_TYPE_SOLARIS 0x1dUL + #define FUNC_DRV_QVER_RESP_OS_TYPE_LINUX 0x24UL + #define FUNC_DRV_QVER_RESP_OS_TYPE_FREEBSD 0x2aUL + #define FUNC_DRV_QVER_RESP_OS_TYPE_ESXI 0x68UL + #define FUNC_DRV_QVER_RESP_OS_TYPE_WIN864 0x73UL + #define FUNC_DRV_QVER_RESP_OS_TYPE_WIN2012R2 0x74UL u8 ver_maj; u8 ver_min; u8 ver_upd; @@ -1528,44 +1576,44 @@ struct hwrm_port_phy_cfg_input { #define PORT_PHY_CFG_REQ_ENABLES_TX_LPI_TIMER 0x400UL __le16 port_id; __le16 force_link_speed; - #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_100MB (0x1UL << 0) - #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_1GB (0xaUL << 0) - #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_2GB (0x14UL << 0) - #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_2_5GB (0x19UL << 0) - #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_10GB (0x64UL << 0) - #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_20GB (0xc8UL << 0) - #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_25GB (0xfaUL << 0) - #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_40GB (0x190UL << 0) - #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_50GB (0x1f4UL << 0) - #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_100GB (0x3e8UL << 0) - #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_10MB (0xffffUL << 0) + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_100MB 0x1UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_1GB 0xaUL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_2GB 0x14UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_2_5GB 0x19UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_10GB 0x64UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_20GB 0xc8UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_25GB 0xfaUL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_40GB 0x190UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_50GB 0x1f4UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_100GB 0x3e8UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_10MB 0xffffUL u8 auto_mode; - #define PORT_PHY_CFG_REQ_AUTO_MODE_NONE (0x0UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_MODE_ALL_SPEEDS (0x1UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_MODE_ONE_SPEED (0x2UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_MODE_ONE_OR_BELOW (0x3UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_MODE_SPEED_MASK (0x4UL << 0) + #define PORT_PHY_CFG_REQ_AUTO_MODE_NONE 0x0UL + #define PORT_PHY_CFG_REQ_AUTO_MODE_ALL_SPEEDS 0x1UL + #define PORT_PHY_CFG_REQ_AUTO_MODE_ONE_SPEED 0x2UL + #define PORT_PHY_CFG_REQ_AUTO_MODE_ONE_OR_BELOW 0x3UL + #define PORT_PHY_CFG_REQ_AUTO_MODE_SPEED_MASK 0x4UL u8 auto_duplex; - #define PORT_PHY_CFG_REQ_AUTO_DUPLEX_HALF (0x0UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_DUPLEX_FULL (0x1UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_DUPLEX_BOTH (0x2UL << 0) + #define PORT_PHY_CFG_REQ_AUTO_DUPLEX_HALF 0x0UL + #define PORT_PHY_CFG_REQ_AUTO_DUPLEX_FULL 0x1UL + #define PORT_PHY_CFG_REQ_AUTO_DUPLEX_BOTH 0x2UL u8 auto_pause; #define PORT_PHY_CFG_REQ_AUTO_PAUSE_TX 0x1UL #define PORT_PHY_CFG_REQ_AUTO_PAUSE_RX 0x2UL #define PORT_PHY_CFG_REQ_AUTO_PAUSE_AUTONEG_PAUSE 0x4UL u8 unused_0; __le16 auto_link_speed; - #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_100MB (0x1UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_1GB (0xaUL << 0) - #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_2GB (0x14UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_2_5GB (0x19UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_10GB (0x64UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_20GB (0xc8UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_25GB (0xfaUL << 0) - #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_40GB (0x190UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_50GB (0x1f4UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_100GB (0x3e8UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_10MB (0xffffUL << 0) + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_100MB 0x1UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_1GB 0xaUL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_2GB 0x14UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_2_5GB 0x19UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_10GB 0x64UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_20GB 0xc8UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_25GB 0xfaUL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_40GB 0x190UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_50GB 0x1f4UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_100GB 0x3e8UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_10MB 0xffffUL __le16 auto_link_speed_mask; #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_MASK_100MBHD 0x1UL #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_MASK_100MB 0x2UL @@ -1582,12 +1630,12 @@ struct hwrm_port_phy_cfg_input { #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_MASK_10MBHD 0x1000UL #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_MASK_10MB 0x2000UL u8 wirespeed; - #define PORT_PHY_CFG_REQ_WIRESPEED_OFF (0x0UL << 0) - #define PORT_PHY_CFG_REQ_WIRESPEED_ON (0x1UL << 0) + #define PORT_PHY_CFG_REQ_WIRESPEED_OFF 0x0UL + #define PORT_PHY_CFG_REQ_WIRESPEED_ON 0x1UL u8 lpbk; - #define PORT_PHY_CFG_REQ_LPBK_NONE (0x0UL << 0) - #define PORT_PHY_CFG_REQ_LPBK_LOCAL (0x1UL << 0) - #define PORT_PHY_CFG_REQ_LPBK_REMOTE (0x2UL << 0) + #define PORT_PHY_CFG_REQ_LPBK_NONE 0x0UL + #define PORT_PHY_CFG_REQ_LPBK_LOCAL 0x1UL + #define PORT_PHY_CFG_REQ_LPBK_REMOTE 0x2UL u8 force_pause; #define PORT_PHY_CFG_REQ_FORCE_PAUSE_TX 0x1UL #define PORT_PHY_CFG_REQ_FORCE_PAUSE_RX 0x2UL @@ -1641,25 +1689,25 @@ struct hwrm_port_phy_qcfg_output { __le16 seq_id; __le16 resp_len; u8 link; - #define PORT_PHY_QCFG_RESP_LINK_NO_LINK (0x0UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_SIGNAL (0x1UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_LINK (0x2UL << 0) + #define PORT_PHY_QCFG_RESP_LINK_NO_LINK 0x0UL + #define PORT_PHY_QCFG_RESP_LINK_SIGNAL 0x1UL + #define PORT_PHY_QCFG_RESP_LINK_LINK 0x2UL u8 unused_0; __le16 link_speed; - #define PORT_PHY_QCFG_RESP_LINK_SPEED_100MB (0x1UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_SPEED_1GB (0xaUL << 0) - #define PORT_PHY_QCFG_RESP_LINK_SPEED_2GB (0x14UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_SPEED_2_5GB (0x19UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_SPEED_10GB (0x64UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_SPEED_20GB (0xc8UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_SPEED_25GB (0xfaUL << 0) - #define PORT_PHY_QCFG_RESP_LINK_SPEED_40GB (0x190UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_SPEED_50GB (0x1f4UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_SPEED_100GB (0x3e8UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_SPEED_10MB (0xffffUL << 0) + #define PORT_PHY_QCFG_RESP_LINK_SPEED_100MB 0x1UL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_1GB 0xaUL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_2GB 0x14UL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_2_5GB 0x19UL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_10GB 0x64UL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_20GB 0xc8UL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_25GB 0xfaUL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_40GB 0x190UL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_50GB 0x1f4UL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_100GB 0x3e8UL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_10MB 0xffffUL u8 duplex; - #define PORT_PHY_QCFG_RESP_DUPLEX_HALF (0x0UL << 0) - #define PORT_PHY_QCFG_RESP_DUPLEX_FULL (0x1UL << 0) + #define PORT_PHY_QCFG_RESP_DUPLEX_HALF 0x0UL + #define PORT_PHY_QCFG_RESP_DUPLEX_FULL 0x1UL u8 pause; #define PORT_PHY_QCFG_RESP_PAUSE_TX 0x1UL #define PORT_PHY_QCFG_RESP_PAUSE_RX 0x2UL @@ -1679,39 +1727,39 @@ struct hwrm_port_phy_qcfg_output { #define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS_10MBHD 0x1000UL #define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS_10MB 0x2000UL __le16 force_link_speed; - #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_100MB (0x1UL << 0) - #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_1GB (0xaUL << 0) - #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_2GB (0x14UL << 0) - #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_2_5GB (0x19UL << 0) - #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_10GB (0x64UL << 0) - #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_20GB (0xc8UL << 0) - #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_25GB (0xfaUL << 0) - #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_40GB (0x190UL << 0) - #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_50GB (0x1f4UL << 0) - #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_100GB (0x3e8UL << 0) - #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_10MB (0xffffUL << 0) + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_100MB 0x1UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_1GB 0xaUL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_2GB 0x14UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_2_5GB 0x19UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_10GB 0x64UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_20GB 0xc8UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_25GB 0xfaUL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_40GB 0x190UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_50GB 0x1f4UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_100GB 0x3e8UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_10MB 0xffffUL u8 auto_mode; - #define PORT_PHY_QCFG_RESP_AUTO_MODE_NONE (0x0UL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_MODE_ALL_SPEEDS (0x1UL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_MODE_ONE_SPEED (0x2UL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_MODE_ONE_OR_BELOW (0x3UL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_MODE_SPEED_MASK (0x4UL << 0) + #define PORT_PHY_QCFG_RESP_AUTO_MODE_NONE 0x0UL + #define PORT_PHY_QCFG_RESP_AUTO_MODE_ALL_SPEEDS 0x1UL + #define PORT_PHY_QCFG_RESP_AUTO_MODE_ONE_SPEED 0x2UL + #define PORT_PHY_QCFG_RESP_AUTO_MODE_ONE_OR_BELOW 0x3UL + #define PORT_PHY_QCFG_RESP_AUTO_MODE_SPEED_MASK 0x4UL u8 auto_pause; #define PORT_PHY_QCFG_RESP_AUTO_PAUSE_TX 0x1UL #define PORT_PHY_QCFG_RESP_AUTO_PAUSE_RX 0x2UL #define PORT_PHY_QCFG_RESP_AUTO_PAUSE_AUTONEG_PAUSE 0x4UL __le16 auto_link_speed; - #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_100MB (0x1UL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_1GB (0xaUL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_2GB (0x14UL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_2_5GB (0x19UL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_10GB (0x64UL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_20GB (0xc8UL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_25GB (0xfaUL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_40GB (0x190UL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_50GB (0x1f4UL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_100GB (0x3e8UL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_10MB (0xffffUL << 0) + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_100MB 0x1UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_1GB 0xaUL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_2GB 0x14UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_2_5GB 0x19UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_10GB 0x64UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_20GB 0xc8UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_25GB 0xfaUL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_40GB 0x190UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_50GB 0x1f4UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_100GB 0x3e8UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_10MB 0xffffUL __le16 auto_link_speed_mask; #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_MASK_100MBHD 0x1UL #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_MASK_100MB 0x2UL @@ -1728,46 +1776,46 @@ struct hwrm_port_phy_qcfg_output { #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_MASK_10MBHD 0x1000UL #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_MASK_10MB 0x2000UL u8 wirespeed; - #define PORT_PHY_QCFG_RESP_WIRESPEED_OFF (0x0UL << 0) - #define PORT_PHY_QCFG_RESP_WIRESPEED_ON (0x1UL << 0) + #define PORT_PHY_QCFG_RESP_WIRESPEED_OFF 0x0UL + #define PORT_PHY_QCFG_RESP_WIRESPEED_ON 0x1UL u8 lpbk; - #define PORT_PHY_QCFG_RESP_LPBK_NONE (0x0UL << 0) - #define PORT_PHY_QCFG_RESP_LPBK_LOCAL (0x1UL << 0) - #define PORT_PHY_QCFG_RESP_LPBK_REMOTE (0x2UL << 0) + #define PORT_PHY_QCFG_RESP_LPBK_NONE 0x0UL + #define PORT_PHY_QCFG_RESP_LPBK_LOCAL 0x1UL + #define PORT_PHY_QCFG_RESP_LPBK_REMOTE 0x2UL u8 force_pause; #define PORT_PHY_QCFG_RESP_FORCE_PAUSE_TX 0x1UL #define PORT_PHY_QCFG_RESP_FORCE_PAUSE_RX 0x2UL u8 module_status; - #define PORT_PHY_QCFG_RESP_MODULE_STATUS_NONE (0x0UL << 0) - #define PORT_PHY_QCFG_RESP_MODULE_STATUS_DISABLETX (0x1UL << 0) - #define PORT_PHY_QCFG_RESP_MODULE_STATUS_WARNINGMSG (0x2UL << 0) - #define PORT_PHY_QCFG_RESP_MODULE_STATUS_PWRDOWN (0x3UL << 0) - #define PORT_PHY_QCFG_RESP_MODULE_STATUS_NOTINSERTED (0x4UL << 0) - #define PORT_PHY_QCFG_RESP_MODULE_STATUS_NOTAPPLICABLE (0xffUL << 0) + #define PORT_PHY_QCFG_RESP_MODULE_STATUS_NONE 0x0UL + #define PORT_PHY_QCFG_RESP_MODULE_STATUS_DISABLETX 0x1UL + #define PORT_PHY_QCFG_RESP_MODULE_STATUS_WARNINGMSG 0x2UL + #define PORT_PHY_QCFG_RESP_MODULE_STATUS_PWRDOWN 0x3UL + #define PORT_PHY_QCFG_RESP_MODULE_STATUS_NOTINSERTED 0x4UL + #define PORT_PHY_QCFG_RESP_MODULE_STATUS_NOTAPPLICABLE 0xffUL __le32 preemphasis; u8 phy_maj; u8 phy_min; u8 phy_bld; u8 phy_type; - #define PORT_PHY_QCFG_RESP_PHY_TYPE_UNKNOWN (0x0UL << 0) - #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASECR (0x1UL << 0) - #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASEKR4 (0x2UL << 0) - #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASELR (0x3UL << 0) - #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASESR (0x4UL << 0) - #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASEKR2 (0x5UL << 0) - #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASEKX (0x6UL << 0) - #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASEKR (0x7UL << 0) - #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASET (0x8UL << 0) - #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASETE (0x9UL << 0) - #define PORT_PHY_QCFG_RESP_PHY_TYPE_SGMIIEXTPHY (0xaUL << 0) + #define PORT_PHY_QCFG_RESP_PHY_TYPE_UNKNOWN 0x0UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASECR 0x1UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASEKR4 0x2UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASELR 0x3UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASESR 0x4UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASEKR2 0x5UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASEKX 0x6UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASEKR 0x7UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASET 0x8UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASETE 0x9UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_SGMIIEXTPHY 0xaUL u8 media_type; - #define PORT_PHY_QCFG_RESP_MEDIA_TYPE_UNKNOWN (0x0UL << 0) - #define PORT_PHY_QCFG_RESP_MEDIA_TYPE_TP (0x1UL << 0) - #define PORT_PHY_QCFG_RESP_MEDIA_TYPE_DAC (0x2UL << 0) - #define PORT_PHY_QCFG_RESP_MEDIA_TYPE_FIBRE (0x3UL << 0) + #define PORT_PHY_QCFG_RESP_MEDIA_TYPE_UNKNOWN 0x0UL + #define PORT_PHY_QCFG_RESP_MEDIA_TYPE_TP 0x1UL + #define PORT_PHY_QCFG_RESP_MEDIA_TYPE_DAC 0x2UL + #define PORT_PHY_QCFG_RESP_MEDIA_TYPE_FIBRE 0x3UL u8 xcvr_pkg_type; - #define PORT_PHY_QCFG_RESP_XCVR_PKG_TYPE_XCVR_INTERNAL (0x1UL << 0) - #define PORT_PHY_QCFG_RESP_XCVR_PKG_TYPE_XCVR_EXTERNAL (0x2UL << 0) + #define PORT_PHY_QCFG_RESP_XCVR_PKG_TYPE_XCVR_INTERNAL 0x1UL + #define PORT_PHY_QCFG_RESP_XCVR_PKG_TYPE_XCVR_EXTERNAL 0x2UL u8 eee_config_phy_addr; #define PORT_PHY_QCFG_RESP_PHY_ADDR_MASK 0x1fUL #define PORT_PHY_QCFG_RESP_PHY_ADDR_SFT 0 @@ -1796,11 +1844,11 @@ struct hwrm_port_phy_qcfg_output { #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_SPEEDS_10MBHD 0x1000UL #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_SPEEDS_10MB 0x2000UL u8 link_partner_adv_auto_mode; - #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_NONE (0x0UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_ALL_SPEEDS (0x1UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_ONE_SPEED (0x2UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_ONE_OR_BELOW (0x3UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_SPEED_MASK (0x4UL << 0) + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_NONE 0x0UL + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_ALL_SPEEDS 0x1UL + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_ONE_SPEED 0x2UL + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_ONE_OR_BELOW 0x3UL + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_SPEED_MASK 0x4UL u8 link_partner_adv_pause; #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_PAUSE_TX 0x1UL #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_PAUSE_RX 0x2UL @@ -1859,7 +1907,7 @@ struct hwrm_port_mac_cfg_input { __le64 resp_addr; __le32 flags; #define PORT_MAC_CFG_REQ_FLAGS_MATCH_LINK 0x1UL - #define PORT_MAC_CFG_REQ_FLAGS_COS_ASSIGNMENT_ENABLE 0x2UL + #define PORT_MAC_CFG_REQ_FLAGS_VLAN_PRI2COS_ENABLE 0x2UL #define PORT_MAC_CFG_REQ_FLAGS_TUNNEL_PRI2COS_ENABLE 0x4UL #define PORT_MAC_CFG_REQ_FLAGS_IP_DSCP2COS_ENABLE 0x8UL #define PORT_MAC_CFG_REQ_FLAGS_PTP_RX_TS_CAPTURE_ENABLE 0x10UL @@ -1868,28 +1916,50 @@ struct hwrm_port_mac_cfg_input { #define PORT_MAC_CFG_REQ_FLAGS_PTP_TX_TS_CAPTURE_DISABLE 0x80UL #define PORT_MAC_CFG_REQ_FLAGS_OOB_WOL_ENABLE 0x100UL #define PORT_MAC_CFG_REQ_FLAGS_OOB_WOL_DISABLE 0x200UL + #define PORT_MAC_CFG_REQ_FLAGS_VLAN_PRI2COS_DISABLE 0x400UL + #define PORT_MAC_CFG_REQ_FLAGS_TUNNEL_PRI2COS_DISABLE 0x800UL + #define PORT_MAC_CFG_REQ_FLAGS_IP_DSCP2COS_DISABLE 0x1000UL __le32 enables; #define PORT_MAC_CFG_REQ_ENABLES_IPG 0x1UL #define PORT_MAC_CFG_REQ_ENABLES_LPBK 0x2UL - #define PORT_MAC_CFG_REQ_ENABLES_IVLAN_PRI2COS_MAP_PRI 0x4UL - #define PORT_MAC_CFG_REQ_ENABLES_LCOS_MAP_PRI 0x8UL + #define PORT_MAC_CFG_REQ_ENABLES_VLAN_PRI2COS_MAP_PRI 0x4UL + #define PORT_MAC_CFG_REQ_ENABLES_RESERVED1 0x8UL #define PORT_MAC_CFG_REQ_ENABLES_TUNNEL_PRI2COS_MAP_PRI 0x10UL #define PORT_MAC_CFG_REQ_ENABLES_DSCP2COS_MAP_PRI 0x20UL #define PORT_MAC_CFG_REQ_ENABLES_RX_TS_CAPTURE_PTP_MSG_TYPE 0x40UL #define PORT_MAC_CFG_REQ_ENABLES_TX_TS_CAPTURE_PTP_MSG_TYPE 0x80UL + #define PORT_MAC_CFG_REQ_ENABLES_COS_FIELD_CFG 0x100UL __le16 port_id; u8 ipg; u8 lpbk; - #define PORT_MAC_CFG_REQ_LPBK_NONE (0x0UL << 0) - #define PORT_MAC_CFG_REQ_LPBK_LOCAL (0x1UL << 0) - #define PORT_MAC_CFG_REQ_LPBK_REMOTE (0x2UL << 0) - u8 ivlan_pri2cos_map_pri; - u8 lcos_map_pri; + #define PORT_MAC_CFG_REQ_LPBK_NONE 0x0UL + #define PORT_MAC_CFG_REQ_LPBK_LOCAL 0x1UL + #define PORT_MAC_CFG_REQ_LPBK_REMOTE 0x2UL + u8 vlan_pri2cos_map_pri; + u8 reserved1; u8 tunnel_pri2cos_map_pri; u8 dscp2pri_map_pri; __le16 rx_ts_capture_ptp_msg_type; __le16 tx_ts_capture_ptp_msg_type; - __le32 unused_0; + u8 cos_field_cfg; + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_RSVD1 0x1UL + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_VLAN_PRI_SEL_MASK 0x6UL + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_VLAN_PRI_SEL_SFT 1 + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_VLAN_PRI_SEL_INNERMOST (0x0UL << 1) + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_VLAN_PRI_SEL_OUTER (0x1UL << 1) + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_VLAN_PRI_SEL_OUTERMOST (0x2UL << 1) + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_VLAN_PRI_SEL_UNSPECIFIED (0x3UL << 1) + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_VLAN_PRI_SEL_LAST PORT_MAC_CFG_REQ_COS_FIELD_CFG_VLAN_PRI_SEL_UNSPECIFIED + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_T_VLAN_PRI_SEL_MASK 0x18UL + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_T_VLAN_PRI_SEL_SFT 3 + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_T_VLAN_PRI_SEL_INNERMOST (0x0UL << 3) + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_T_VLAN_PRI_SEL_OUTER (0x1UL << 3) + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_T_VLAN_PRI_SEL_OUTERMOST (0x2UL << 3) + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_T_VLAN_PRI_SEL_UNSPECIFIED (0x3UL << 3) + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_T_VLAN_PRI_SEL_LAST PORT_MAC_CFG_REQ_COS_FIELD_CFG_T_VLAN_PRI_SEL_UNSPECIFIED + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_DEFAULT_COS_MASK 0xe0UL + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_DEFAULT_COS_SFT 5 + u8 unused_0[3]; }; /* Output (16 bytes) */ @@ -1902,9 +1972,9 @@ struct hwrm_port_mac_cfg_output { __le16 mtu; u8 ipg; u8 lpbk; - #define PORT_MAC_CFG_RESP_LPBK_NONE (0x0UL << 0) - #define PORT_MAC_CFG_RESP_LPBK_LOCAL (0x1UL << 0) - #define PORT_MAC_CFG_RESP_LPBK_REMOTE (0x2UL << 0) + #define PORT_MAC_CFG_RESP_LPBK_NONE 0x0UL + #define PORT_MAC_CFG_RESP_LPBK_LOCAL 0x1UL + #define PORT_MAC_CFG_RESP_LPBK_REMOTE 0x2UL u8 unused_0; u8 valid; }; @@ -2163,8 +2233,8 @@ struct hwrm_queue_qportcfg_input { __le64 resp_addr; __le32 flags; #define QUEUE_QPORTCFG_REQ_FLAGS_PATH 0x1UL - #define QUEUE_QPORTCFG_REQ_FLAGS_PATH_TX (0x0UL << 0) - #define QUEUE_QPORTCFG_REQ_FLAGS_PATH_RX (0x1UL << 0) + #define QUEUE_QPORTCFG_REQ_FLAGS_PATH_TX 0x0UL + #define QUEUE_QPORTCFG_REQ_FLAGS_PATH_RX 0x1UL #define QUEUE_QPORTCFG_REQ_FLAGS_PATH_LAST QUEUE_QPORTCFG_REQ_FLAGS_PATH_RX __le16 port_id; __le16 unused_0; @@ -2179,50 +2249,51 @@ struct hwrm_queue_qportcfg_output { u8 max_configurable_queues; u8 max_configurable_lossless_queues; u8 queue_cfg_allowed; - u8 queue_buffers_cfg_allowed; + u8 queue_cfg_info; + #define QUEUE_QPORTCFG_RESP_QUEUE_CFG_INFO_ASYM_CFG 0x1UL u8 queue_pfcenable_cfg_allowed; u8 queue_pri2cos_cfg_allowed; u8 queue_cos2bw_cfg_allowed; u8 queue_id0; u8 queue_id0_service_profile; - #define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_LOSSY (0x0UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_LOSSLESS (0x1UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_UNKNOWN (0xffUL << 0) + #define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_LOSSY 0x0UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_LOSSLESS 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_UNKNOWN 0xffUL u8 queue_id1; u8 queue_id1_service_profile; - #define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_LOSSY (0x0UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_LOSSLESS (0x1UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_UNKNOWN (0xffUL << 0) + #define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_LOSSY 0x0UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_LOSSLESS 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_UNKNOWN 0xffUL u8 queue_id2; u8 queue_id2_service_profile; - #define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_LOSSY (0x0UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_LOSSLESS (0x1UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_UNKNOWN (0xffUL << 0) + #define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_LOSSY 0x0UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_LOSSLESS 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_UNKNOWN 0xffUL u8 queue_id3; u8 queue_id3_service_profile; - #define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_LOSSY (0x0UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_LOSSLESS (0x1UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_UNKNOWN (0xffUL << 0) + #define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_LOSSY 0x0UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_LOSSLESS 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_UNKNOWN 0xffUL u8 queue_id4; u8 queue_id4_service_profile; - #define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_LOSSY (0x0UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_LOSSLESS (0x1UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_UNKNOWN (0xffUL << 0) + #define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_LOSSY 0x0UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_LOSSLESS 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_UNKNOWN 0xffUL u8 queue_id5; u8 queue_id5_service_profile; - #define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_LOSSY (0x0UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_LOSSLESS (0x1UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_UNKNOWN (0xffUL << 0) + #define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_LOSSY 0x0UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_LOSSLESS 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_UNKNOWN 0xffUL u8 queue_id6; u8 queue_id6_service_profile; - #define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_LOSSY (0x0UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_LOSSLESS (0x1UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_UNKNOWN (0xffUL << 0) + #define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_LOSSY 0x0UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_LOSSLESS 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_UNKNOWN 0xffUL u8 queue_id7; u8 queue_id7_service_profile; - #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_LOSSY (0x0UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_LOSSLESS (0x1UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_UNKNOWN (0xffUL << 0) + #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_LOSSY 0x0UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_LOSSLESS 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_UNKNOWN 0xffUL u8 valid; }; @@ -2235,19 +2306,21 @@ struct hwrm_queue_cfg_input { __le16 target_id; __le64 resp_addr; __le32 flags; - #define QUEUE_CFG_REQ_FLAGS_PATH 0x1UL - #define QUEUE_CFG_REQ_FLAGS_PATH_TX (0x0UL << 0) - #define QUEUE_CFG_REQ_FLAGS_PATH_RX (0x1UL << 0) - #define QUEUE_CFG_REQ_FLAGS_PATH_LAST QUEUE_CFG_REQ_FLAGS_PATH_RX + #define QUEUE_CFG_REQ_FLAGS_PATH_MASK 0x3UL + #define QUEUE_CFG_REQ_FLAGS_PATH_SFT 0 + #define QUEUE_CFG_REQ_FLAGS_PATH_TX 0x0UL + #define QUEUE_CFG_REQ_FLAGS_PATH_RX 0x1UL + #define QUEUE_CFG_REQ_FLAGS_PATH_BIDIR 0x2UL + #define QUEUE_CFG_REQ_FLAGS_PATH_LAST QUEUE_CFG_REQ_FLAGS_PATH_BIDIR __le32 enables; #define QUEUE_CFG_REQ_ENABLES_DFLT_LEN 0x1UL #define QUEUE_CFG_REQ_ENABLES_SERVICE_PROFILE 0x2UL __le32 queue_id; __le32 dflt_len; u8 service_profile; - #define QUEUE_CFG_REQ_SERVICE_PROFILE_LOSSY (0x0UL << 0) - #define QUEUE_CFG_REQ_SERVICE_PROFILE_LOSSLESS (0x1UL << 0) - #define QUEUE_CFG_REQ_SERVICE_PROFILE_UNKNOWN (0xffUL << 0) + #define QUEUE_CFG_REQ_SERVICE_PROFILE_LOSSY 0x0UL + #define QUEUE_CFG_REQ_SERVICE_PROFILE_LOSSLESS 0x1UL + #define QUEUE_CFG_REQ_SERVICE_PROFILE_UNKNOWN 0xffUL u8 unused_0[7]; }; @@ -2264,50 +2337,6 @@ struct hwrm_queue_cfg_output { u8 valid; }; -/* hwrm_queue_buffers_cfg */ -/* Input (56 bytes) */ -struct hwrm_queue_buffers_cfg_input { - __le16 req_type; - __le16 cmpl_ring; - __le16 seq_id; - __le16 target_id; - __le64 resp_addr; - __le32 flags; - #define QUEUE_BUFFERS_CFG_REQ_FLAGS_PATH 0x1UL - #define QUEUE_BUFFERS_CFG_REQ_FLAGS_PATH_TX (0x0UL << 0) - #define QUEUE_BUFFERS_CFG_REQ_FLAGS_PATH_RX (0x1UL << 0) - #define QUEUE_BUFFERS_CFG_REQ_FLAGS_PATH_LAST QUEUE_BUFFERS_CFG_REQ_FLAGS_PATH_RX - __le32 enables; - #define QUEUE_BUFFERS_CFG_REQ_ENABLES_RESERVED 0x1UL - #define QUEUE_BUFFERS_CFG_REQ_ENABLES_SHARED 0x2UL - #define QUEUE_BUFFERS_CFG_REQ_ENABLES_XOFF 0x4UL - #define QUEUE_BUFFERS_CFG_REQ_ENABLES_XON 0x8UL - #define QUEUE_BUFFERS_CFG_REQ_ENABLES_FULL 0x10UL - #define QUEUE_BUFFERS_CFG_REQ_ENABLES_NOTFULL 0x20UL - #define QUEUE_BUFFERS_CFG_REQ_ENABLES_MAX 0x40UL - __le32 queue_id; - __le32 reserved; - __le32 shared; - __le32 xoff; - __le32 xon; - __le32 full; - __le32 notfull; - __le32 max; -}; - -/* Output (16 bytes) */ -struct hwrm_queue_buffers_cfg_output { - __le16 error_code; - __le16 req_type; - __le16 seq_id; - __le16 resp_len; - __le32 unused_0; - u8 unused_1; - u8 unused_2; - u8 unused_3; - u8 valid; -}; - /* hwrm_queue_pfcenable_cfg */ /* Input (24 bytes) */ struct hwrm_queue_pfcenable_cfg_input { @@ -2351,12 +2380,22 @@ struct hwrm_queue_pri2cos_cfg_input { __le16 target_id; __le64 resp_addr; __le32 flags; - #define QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH 0x1UL + #define QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_MASK 0x3UL + #define QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_SFT 0 #define QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_TX (0x0UL << 0) #define QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_RX (0x1UL << 0) - #define QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_LAST QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_RX - #define QUEUE_PRI2COS_CFG_REQ_FLAGS_IVLAN 0x2UL + #define QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_BIDIR (0x2UL << 0) + #define QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_LAST QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_BIDIR + #define QUEUE_PRI2COS_CFG_REQ_FLAGS_IVLAN 0x4UL __le32 enables; + #define QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI0_COS_QUEUE_ID 0x1UL + #define QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI1_COS_QUEUE_ID 0x2UL + #define QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI2_COS_QUEUE_ID 0x4UL + #define QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI3_COS_QUEUE_ID 0x8UL + #define QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI4_COS_QUEUE_ID 0x10UL + #define QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI5_COS_QUEUE_ID 0x20UL + #define QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI6_COS_QUEUE_ID 0x40UL + #define QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI7_COS_QUEUE_ID 0x80UL u8 port_id; u8 pri0_cos_queue_id; u8 pri1_cos_queue_id; @@ -2404,82 +2443,226 @@ struct hwrm_queue_cos2bw_cfg_input { u8 queue_id0; u8 unused_0; __le32 queue_id0_min_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_INVALID __le32 queue_id0_max_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_INVALID u8 queue_id0_tsa_assign; - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_SP (0x0UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_ETS (0x1UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_SP 0x0UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_ETS 0x1UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_RESERVED_FIRST 0x2UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_RESERVED_LAST 0xffUL u8 queue_id0_pri_lvl; u8 queue_id0_bw_weight; u8 queue_id1; __le32 queue_id1_min_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_INVALID __le32 queue_id1_max_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_INVALID u8 queue_id1_tsa_assign; - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_SP (0x0UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_ETS (0x1UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_SP 0x0UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_ETS 0x1UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_RESERVED_FIRST 0x2UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_RESERVED_LAST 0xffUL u8 queue_id1_pri_lvl; u8 queue_id1_bw_weight; u8 queue_id2; __le32 queue_id2_min_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_INVALID __le32 queue_id2_max_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_INVALID u8 queue_id2_tsa_assign; - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_SP (0x0UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_ETS (0x1UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_SP 0x0UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_ETS 0x1UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_RESERVED_FIRST 0x2UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_RESERVED_LAST 0xffUL u8 queue_id2_pri_lvl; u8 queue_id2_bw_weight; u8 queue_id3; __le32 queue_id3_min_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_INVALID __le32 queue_id3_max_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_INVALID u8 queue_id3_tsa_assign; - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_SP (0x0UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_ETS (0x1UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_SP 0x0UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_ETS 0x1UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_RESERVED_FIRST 0x2UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_RESERVED_LAST 0xffUL u8 queue_id3_pri_lvl; u8 queue_id3_bw_weight; u8 queue_id4; __le32 queue_id4_min_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_INVALID __le32 queue_id4_max_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_INVALID u8 queue_id4_tsa_assign; - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_SP (0x0UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_ETS (0x1UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_SP 0x0UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_ETS 0x1UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_RESERVED_FIRST 0x2UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_RESERVED_LAST 0xffUL u8 queue_id4_pri_lvl; u8 queue_id4_bw_weight; u8 queue_id5; __le32 queue_id5_min_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_INVALID __le32 queue_id5_max_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_INVALID u8 queue_id5_tsa_assign; - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_SP (0x0UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_ETS (0x1UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_SP 0x0UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_ETS 0x1UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_RESERVED_FIRST 0x2UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_RESERVED_LAST 0xffUL u8 queue_id5_pri_lvl; u8 queue_id5_bw_weight; u8 queue_id6; __le32 queue_id6_min_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_INVALID __le32 queue_id6_max_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_INVALID u8 queue_id6_tsa_assign; - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_SP (0x0UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_ETS (0x1UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_SP 0x0UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_ETS 0x1UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_RESERVED_FIRST 0x2UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_RESERVED_LAST 0xffUL u8 queue_id6_pri_lvl; u8 queue_id6_bw_weight; u8 queue_id7; __le32 queue_id7_min_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_INVALID __le32 queue_id7_max_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_INVALID u8 queue_id7_tsa_assign; - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_SP (0x0UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_ETS (0x1UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_SP 0x0UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_ETS 0x1UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_RESERVED_FIRST 0x2UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_RESERVED_LAST 0xffUL u8 queue_id7_pri_lvl; u8 queue_id7_bw_weight; u8 unused_1[5]; @@ -2563,6 +2746,7 @@ struct hwrm_vnic_cfg_input { #define VNIC_CFG_REQ_FLAGS_BD_STALL_MODE 0x4UL #define VNIC_CFG_REQ_FLAGS_ROCE_DUAL_VNIC_MODE 0x8UL #define VNIC_CFG_REQ_FLAGS_ROCE_ONLY_VNIC_MODE 0x10UL + #define VNIC_CFG_REQ_FLAGS_RSS_DFLT_CR_MODE 0x20UL __le32 enables; #define VNIC_CFG_REQ_ENABLES_DFLT_RING_GRP 0x1UL #define VNIC_CFG_REQ_ENABLES_RSS_RULE 0x2UL @@ -2615,18 +2799,18 @@ struct hwrm_vnic_tpa_cfg_input { #define VNIC_TPA_CFG_REQ_ENABLES_MIN_AGG_LEN 0x8UL __le16 vnic_id; __le16 max_agg_segs; - #define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_1 (0x0UL << 0) - #define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_2 (0x1UL << 0) - #define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_4 (0x2UL << 0) - #define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_8 (0x3UL << 0) - #define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_MAX (0x1fUL << 0) + #define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_1 0x0UL + #define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_2 0x1UL + #define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_4 0x2UL + #define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_8 0x3UL + #define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_MAX 0x1fUL __le16 max_aggs; - #define VNIC_TPA_CFG_REQ_MAX_AGGS_1 (0x0UL << 0) - #define VNIC_TPA_CFG_REQ_MAX_AGGS_2 (0x1UL << 0) - #define VNIC_TPA_CFG_REQ_MAX_AGGS_4 (0x2UL << 0) - #define VNIC_TPA_CFG_REQ_MAX_AGGS_8 (0x3UL << 0) - #define VNIC_TPA_CFG_REQ_MAX_AGGS_16 (0x4UL << 0) - #define VNIC_TPA_CFG_REQ_MAX_AGGS_MAX (0x7UL << 0) + #define VNIC_TPA_CFG_REQ_MAX_AGGS_1 0x0UL + #define VNIC_TPA_CFG_REQ_MAX_AGGS_2 0x1UL + #define VNIC_TPA_CFG_REQ_MAX_AGGS_4 0x2UL + #define VNIC_TPA_CFG_REQ_MAX_AGGS_8 0x3UL + #define VNIC_TPA_CFG_REQ_MAX_AGGS_16 0x4UL + #define VNIC_TPA_CFG_REQ_MAX_AGGS_MAX 0x7UL u8 unused_0; u8 unused_1; __le32 max_agg_timer; @@ -2780,15 +2964,15 @@ struct hwrm_ring_alloc_input { __le64 resp_addr; __le32 enables; #define RING_ALLOC_REQ_ENABLES_RESERVED1 0x1UL - #define RING_ALLOC_REQ_ENABLES_RESERVED2 0x2UL + #define RING_ALLOC_REQ_ENABLES_RING_ARB_CFG 0x2UL #define RING_ALLOC_REQ_ENABLES_RESERVED3 0x4UL #define RING_ALLOC_REQ_ENABLES_STAT_CTX_ID_VALID 0x8UL #define RING_ALLOC_REQ_ENABLES_RESERVED4 0x10UL #define RING_ALLOC_REQ_ENABLES_MAX_BW_VALID 0x20UL u8 ring_type; - #define RING_ALLOC_REQ_RING_TYPE_CMPL (0x0UL << 0) - #define RING_ALLOC_REQ_RING_TYPE_TX (0x1UL << 0) - #define RING_ALLOC_REQ_RING_TYPE_RX (0x2UL << 0) + #define RING_ALLOC_REQ_RING_TYPE_CMPL 0x0UL + #define RING_ALLOC_REQ_RING_TYPE_TX 0x1UL + #define RING_ALLOC_REQ_RING_TYPE_RX 0x2UL u8 unused_0; __le16 unused_1; __le64 page_tbl_addr; @@ -2804,18 +2988,36 @@ struct hwrm_ring_alloc_input { u8 unused_4; u8 unused_5; __le32 reserved1; - __le16 reserved2; + __le16 ring_arb_cfg; + #define RING_ALLOC_REQ_RING_ARB_CFG_ARB_POLICY_MASK 0xfUL + #define RING_ALLOC_REQ_RING_ARB_CFG_ARB_POLICY_SFT 0 + #define RING_ALLOC_REQ_RING_ARB_CFG_ARB_POLICY_SP (0x1UL << 0) + #define RING_ALLOC_REQ_RING_ARB_CFG_ARB_POLICY_WFQ (0x2UL << 0) + #define RING_ALLOC_REQ_RING_ARB_CFG_ARB_POLICY_LAST RING_ALLOC_REQ_RING_ARB_CFG_ARB_POLICY_WFQ + #define RING_ALLOC_REQ_RING_ARB_CFG_RSVD_MASK 0xf0UL + #define RING_ALLOC_REQ_RING_ARB_CFG_RSVD_SFT 4 + #define RING_ALLOC_REQ_RING_ARB_CFG_ARB_POLICY_PARAM_MASK 0xff00UL + #define RING_ALLOC_REQ_RING_ARB_CFG_ARB_POLICY_PARAM_SFT 8 u8 unused_6; u8 unused_7; __le32 reserved3; __le32 stat_ctx_id; __le32 reserved4; __le32 max_bw; + #define RING_ALLOC_REQ_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define RING_ALLOC_REQ_MAX_BW_BW_VALUE_SFT 0 + #define RING_ALLOC_REQ_MAX_BW_RSVD 0x10000000UL + #define RING_ALLOC_REQ_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define RING_ALLOC_REQ_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define RING_ALLOC_REQ_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define RING_ALLOC_REQ_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define RING_ALLOC_REQ_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define RING_ALLOC_REQ_MAX_BW_BW_VALUE_UNIT_LAST RING_ALLOC_REQ_MAX_BW_BW_VALUE_UNIT_INVALID u8 int_mode; - #define RING_ALLOC_REQ_INT_MODE_LEGACY (0x0UL << 0) - #define RING_ALLOC_REQ_INT_MODE_RSVD (0x1UL << 0) - #define RING_ALLOC_REQ_INT_MODE_MSIX (0x2UL << 0) - #define RING_ALLOC_REQ_INT_MODE_POLL (0x3UL << 0) + #define RING_ALLOC_REQ_INT_MODE_LEGACY 0x0UL + #define RING_ALLOC_REQ_INT_MODE_RSVD 0x1UL + #define RING_ALLOC_REQ_INT_MODE_MSIX 0x2UL + #define RING_ALLOC_REQ_INT_MODE_POLL 0x3UL u8 unused_8[3]; }; @@ -2842,9 +3044,9 @@ struct hwrm_ring_free_input { __le16 target_id; __le64 resp_addr; u8 ring_type; - #define RING_FREE_REQ_RING_TYPE_CMPL (0x0UL << 0) - #define RING_FREE_REQ_RING_TYPE_TX (0x1UL << 0) - #define RING_FREE_REQ_RING_TYPE_RX (0x2UL << 0) + #define RING_FREE_REQ_RING_TYPE_CMPL 0x0UL + #define RING_FREE_REQ_RING_TYPE_TX 0x1UL + #define RING_FREE_REQ_RING_TYPE_RX 0x2UL u8 unused_0; __le16 ring_id; __le32 unused_1; @@ -2942,9 +3144,9 @@ struct hwrm_ring_reset_input { __le16 target_id; __le64 resp_addr; u8 ring_type; - #define RING_RESET_REQ_RING_TYPE_CMPL (0x0UL << 0) - #define RING_RESET_REQ_RING_TYPE_TX (0x1UL << 0) - #define RING_RESET_REQ_RING_TYPE_RX (0x2UL << 0) + #define RING_RESET_REQ_RING_TYPE_CMPL 0x0UL + #define RING_RESET_REQ_RING_TYPE_TX 0x1UL + #define RING_RESET_REQ_RING_TYPE_RX 0x2UL u8 unused_0; __le16 ring_id; __le32 unused_1; @@ -3068,36 +3270,36 @@ struct hwrm_cfa_l2_filter_alloc_input { __le16 t_l2_ivlan; __le16 t_l2_ivlan_mask; u8 src_type; - #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_NPORT (0x0UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_PF (0x1UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_VF (0x2UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_VNIC (0x3UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_KONG (0x4UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_APE (0x5UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_BONO (0x6UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_TANG (0x7UL << 0) + #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_NPORT 0x0UL + #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_PF 0x1UL + #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_VF 0x2UL + #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_VNIC 0x3UL + #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_KONG 0x4UL + #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_APE 0x5UL + #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_BONO 0x6UL + #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_TANG 0x7UL u8 unused_6; __le32 src_id; u8 tunnel_type; - #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_NONTUNNEL (0x0UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN (0x1UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_NVGRE (0x2UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2GRE (0x3UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPIP (0x4UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_GENEVE (0x5UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS (0x6UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT (0x7UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE (0x8UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL (0xffUL << 0) + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_NONTUNNEL 0x0UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN 0x1UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_NVGRE 0x2UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2GRE 0x3UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPIP 0x4UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_GENEVE 0x5UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS 0x6UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT 0x7UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE 0x8UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL 0xffUL u8 unused_7; __le16 dst_id; __le16 mirror_vnic_id; u8 pri_hint; - #define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_NO_PREFER (0x0UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_ABOVE_FILTER (0x1UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_BELOW_FILTER (0x2UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_MAX (0x3UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_MIN (0x4UL << 0) + #define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_NO_PREFER 0x0UL + #define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_ABOVE_FILTER 0x1UL + #define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_BELOW_FILTER 0x2UL + #define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_MAX 0x3UL + #define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_MIN 0x4UL u8 unused_8; __le32 unused_9; __le64 l2_filter_id_hint; @@ -3246,16 +3448,16 @@ struct hwrm_cfa_tunnel_filter_alloc_input { u8 l3_addr_type; u8 t_l3_addr_type; u8 tunnel_type; - #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_NONTUNNEL (0x0UL << 0) - #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN (0x1UL << 0) - #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_NVGRE (0x2UL << 0) - #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2GRE (0x3UL << 0) - #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPIP (0x4UL << 0) - #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_GENEVE (0x5UL << 0) - #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS (0x6UL << 0) - #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT (0x7UL << 0) - #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE (0x8UL << 0) - #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL (0xffUL << 0) + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_NONTUNNEL 0x0UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN 0x1UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_NVGRE 0x2UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2GRE 0x3UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPIP 0x4UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_GENEVE 0x5UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS 0x6UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT 0x7UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE 0x8UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL 0xffUL u8 unused_0; __le32 vni; __le32 dst_vnic_id; @@ -3311,14 +3513,14 @@ struct hwrm_cfa_encap_record_alloc_input { __le32 flags; #define CFA_ENCAP_RECORD_ALLOC_REQ_FLAGS_LOOPBACK 0x1UL u8 encap_type; - #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VXLAN (0x1UL << 0) - #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_NVGRE (0x2UL << 0) - #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_L2GRE (0x3UL << 0) - #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_IPIP (0x4UL << 0) - #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_GENEVE (0x5UL << 0) - #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_MPLS (0x6UL << 0) - #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VLAN (0x7UL << 0) - #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_IPGRE (0x8UL << 0) + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VXLAN 0x1UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_NVGRE 0x2UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_L2GRE 0x3UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_IPIP 0x4UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_GENEVE 0x5UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_MPLS 0x6UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VLAN 0x7UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_IPGRE 0x8UL u8 unused_0; __le16 unused_1; __le32 encap_data[16]; @@ -3397,32 +3599,32 @@ struct hwrm_cfa_ntuple_filter_alloc_input { u8 src_macaddr[6]; __be16 ethertype; u8 ip_addr_type; - #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_UNKNOWN (0x0UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV4 (0x4UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV6 (0x6UL << 0) + #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_UNKNOWN 0x0UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV4 0x4UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV6 0x6UL u8 ip_protocol; - #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_UNKNOWN (0x0UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_UDP (0x6UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_TCP (0x11UL << 0) + #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_UNKNOWN 0x0UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_UDP 0x6UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_TCP 0x11UL __le16 dst_id; __le16 mirror_vnic_id; u8 tunnel_type; - #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_NONTUNNEL (0x0UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN (0x1UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_NVGRE (0x2UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2GRE (0x3UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPIP (0x4UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_GENEVE (0x5UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS (0x6UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT (0x7UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE (0x8UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL (0xffUL << 0) + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_NONTUNNEL 0x0UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN 0x1UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_NVGRE 0x2UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2GRE 0x3UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPIP 0x4UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_GENEVE 0x5UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS 0x6UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT 0x7UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE 0x8UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL 0xffUL u8 pri_hint; - #define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_NO_PREFER (0x0UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_ABOVE (0x1UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_BELOW (0x2UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_HIGHEST (0x3UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_LOWEST (0x4UL << 0) + #define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_NO_PREFER 0x0UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_ABOVE 0x1UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_BELOW 0x2UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_HIGHEST 0x3UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_LOWEST 0x4UL __be32 src_ipaddr[4]; __be32 src_ipaddr_mask[4]; __be32 dst_ipaddr[4]; @@ -3511,8 +3713,8 @@ struct hwrm_tunnel_dst_port_query_input { __le16 target_id; __le64 resp_addr; u8 tunnel_type; - #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN (0x1UL << 0) - #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_GENEVE (0x5UL << 0) + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN 0x1UL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_GENEVE 0x5UL u8 unused_0[7]; }; @@ -3539,8 +3741,8 @@ struct hwrm_tunnel_dst_port_alloc_input { __le16 target_id; __le64 resp_addr; u8 tunnel_type; - #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN (0x1UL << 0) - #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_GENEVE (0x5UL << 0) + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN 0x1UL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_GENEVE 0x5UL u8 unused_0; __be16 tunnel_dst_port_val; __le32 unused_1; @@ -3570,8 +3772,8 @@ struct hwrm_tunnel_dst_port_free_input { __le16 target_id; __le64 resp_addr; u8 tunnel_type; - #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN (0x1UL << 0) - #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE (0x5UL << 0) + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN 0x1UL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE 0x5UL u8 unused_0; __le16 tunnel_dst_port_id; __le32 unused_1; @@ -3720,15 +3922,15 @@ struct hwrm_fw_reset_input { __le16 target_id; __le64 resp_addr; u8 embedded_proc_type; - #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_BOOT (0x0UL << 0) - #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_MGMT (0x1UL << 0) - #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_NETCTRL (0x2UL << 0) - #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_ROCE (0x3UL << 0) - #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_RSVD (0x4UL << 0) + #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_BOOT 0x0UL + #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_MGMT 0x1UL + #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_NETCTRL 0x2UL + #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_ROCE 0x3UL + #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_RSVD 0x4UL u8 selfrst_status; - #define FW_RESET_REQ_SELFRST_STATUS_SELFRSTNONE (0x0UL << 0) - #define FW_RESET_REQ_SELFRST_STATUS_SELFRSTASAP (0x1UL << 0) - #define FW_RESET_REQ_SELFRST_STATUS_SELFRSTPCIERST (0x2UL << 0) + #define FW_RESET_REQ_SELFRST_STATUS_SELFRSTNONE 0x0UL + #define FW_RESET_REQ_SELFRST_STATUS_SELFRSTASAP 0x1UL + #define FW_RESET_REQ_SELFRST_STATUS_SELFRSTPCIERST 0x2UL __le16 unused_0[3]; }; @@ -3739,9 +3941,9 @@ struct hwrm_fw_reset_output { __le16 seq_id; __le16 resp_len; u8 selfrst_status; - #define FW_RESET_RESP_SELFRST_STATUS_SELFRSTNONE (0x0UL << 0) - #define FW_RESET_RESP_SELFRST_STATUS_SELFRSTASAP (0x1UL << 0) - #define FW_RESET_RESP_SELFRST_STATUS_SELFRSTPCIERST (0x2UL << 0) + #define FW_RESET_RESP_SELFRST_STATUS_SELFRSTNONE 0x0UL + #define FW_RESET_RESP_SELFRST_STATUS_SELFRSTASAP 0x1UL + #define FW_RESET_RESP_SELFRST_STATUS_SELFRSTPCIERST 0x2UL u8 unused_0; __le16 unused_1; u8 unused_2; @@ -3759,11 +3961,11 @@ struct hwrm_fw_qstatus_input { __le16 target_id; __le64 resp_addr; u8 embedded_proc_type; - #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_BOOT (0x0UL << 0) - #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_MGMT (0x1UL << 0) - #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_NETCTRL (0x2UL << 0) - #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_ROCE (0x3UL << 0) - #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_RSVD (0x4UL << 0) + #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_BOOT 0x0UL + #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_MGMT 0x1UL + #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_NETCTRL 0x2UL + #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_ROCE 0x3UL + #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_RSVD 0x4UL u8 unused_0[7]; }; @@ -3774,9 +3976,9 @@ struct hwrm_fw_qstatus_output { __le16 seq_id; __le16 resp_len; u8 selfrst_status; - #define FW_QSTATUS_RESP_SELFRST_STATUS_SELFRSTNONE (0x0UL << 0) - #define FW_QSTATUS_RESP_SELFRST_STATUS_SELFRSTASAP (0x1UL << 0) - #define FW_QSTATUS_RESP_SELFRST_STATUS_SELFRSTPCIERST (0x2UL << 0) + #define FW_QSTATUS_RESP_SELFRST_STATUS_SELFRSTNONE 0x0UL + #define FW_QSTATUS_RESP_SELFRST_STATUS_SELFRSTASAP 0x1UL + #define FW_QSTATUS_RESP_SELFRST_STATUS_SELFRSTPCIERST 0x2UL u8 unused_0; __le16 unused_1; u8 unused_2; @@ -3785,6 +3987,42 @@ struct hwrm_fw_qstatus_output { u8 valid; }; +/* hwrm_fw_set_time */ +/* Input (32 bytes) */ +struct hwrm_fw_set_time_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 year; + #define FW_SET_TIME_REQ_YEAR_UNKNOWN 0x0UL + u8 month; + u8 day; + u8 hour; + u8 minute; + u8 second; + u8 unused_0; + __le16 millisecond; + __le16 zone; + #define FW_SET_TIME_REQ_ZONE_UTC 0x0UL + #define FW_SET_TIME_REQ_ZONE_UNKNOWN 0xffffUL + __le32 unused_1; +}; + +/* Output (16 bytes) */ +struct hwrm_fw_set_time_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le32 unused_0; + u8 unused_1; + u8 unused_2; + u8 unused_3; + u8 valid; +}; + /* hwrm_exec_fwd_resp */ /* Input (128 bytes) */ struct hwrm_exec_fwd_resp_input { @@ -3921,32 +4159,6 @@ struct hwrm_temp_monitor_query_output { u8 valid; }; -/* hwrm_nvm_raw_write_blk */ -/* Input (32 bytes) */ -struct hwrm_nvm_raw_write_blk_input { - __le16 req_type; - __le16 cmpl_ring; - __le16 seq_id; - __le16 target_id; - __le64 resp_addr; - __le64 host_src_addr; - __le32 dest_addr; - __le32 len; -}; - -/* Output (16 bytes) */ -struct hwrm_nvm_raw_write_blk_output { - __le16 error_code; - __le16 req_type; - __le16 seq_id; - __le16 resp_len; - __le32 unused_0; - u8 unused_1; - u8 unused_2; - u8 unused_3; - u8 valid; -}; - /* hwrm_nvm_read */ /* Input (40 bytes) */ struct hwrm_nvm_read_input { @@ -4132,9 +4344,9 @@ struct hwrm_nvm_find_dir_entry_input { u8 opt_ordinal; #define NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_MASK 0x3UL #define NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_SFT 0 - #define NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_EQ (0x0UL << 0) - #define NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_GE (0x1UL << 0) - #define NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_GT (0x2UL << 0) + #define NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_EQ 0x0UL + #define NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_GE 0x1UL + #define NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_GT 0x2UL u8 unused_1[3]; }; @@ -4266,4 +4478,41 @@ struct hwrm_nvm_verify_update_output { u8 valid; }; +/* hwrm_nvm_install_update */ +/* Input (24 bytes) */ +struct hwrm_nvm_install_update_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 install_type; + #define NVM_INSTALL_UPDATE_REQ_INSTALL_TYPE_NORMAL 0x0UL + #define NVM_INSTALL_UPDATE_REQ_INSTALL_TYPE_ALL 0xffffffffUL + __le32 unused_0; +}; + +/* Output (24 bytes) */ +struct hwrm_nvm_install_update_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le64 installed_items; + u8 result; + #define NVM_INSTALL_UPDATE_RESP_RESULT_SUCCESS 0x0UL + u8 problem_item; + #define NVM_INSTALL_UPDATE_RESP_PROBLEM_ITEM_NONE 0x0UL + #define NVM_INSTALL_UPDATE_RESP_PROBLEM_ITEM_PACKAGE 0xffUL + u8 reset_required; + #define NVM_INSTALL_UPDATE_RESP_RESET_REQUIRED_NONE 0x0UL + #define NVM_INSTALL_UPDATE_RESP_RESET_REQUIRED_PCI 0x1UL + #define NVM_INSTALL_UPDATE_RESP_RESET_REQUIRED_POWER 0x2UL + u8 unused_0; + u8 unused_1; + u8 unused_2; + u8 unused_3; + u8 valid; +}; + #endif From 5ac67d8bc753b122175e682274599338b3ee7d42 Mon Sep 17 00:00:00 2001 From: Rob Swindell Date: Mon, 19 Sep 2016 03:58:03 -0400 Subject: [PATCH 1247/1715] bnxt_en: Added support for Secure Firmware Update Using Ethtool flashdev command, entire NVM package (*.pkg) files may now be staged into the "update" area of the NVM and subsequently verified and installed by the firmware using the newly introduced command: NVM_INSTALL_UPDATE. We also introduce use of the new firmware command FW_SET_TIME so that the NVM-resident package installation log contains valid time-stamps. Signed-off-by: Rob Swindell Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 24 +++ drivers/net/ethernet/broadcom/bnxt/bnxt.h | 1 + .../net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 155 ++++++++++++++++-- .../net/ethernet/broadcom/bnxt/bnxt_fw_hdr.h | 16 +- 4 files changed, 182 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index f6b4f342d7b4..f0a9d23fda15 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -4314,6 +4315,27 @@ hwrm_ver_get_exit: return rc; } +int bnxt_hwrm_fw_set_time(struct bnxt *bp) +{ + struct hwrm_fw_set_time_input req = {0}; + struct rtc_time tm; + struct timeval tv; + + if (bp->hwrm_spec_code < 0x10400) + return -EOPNOTSUPP; + + do_gettimeofday(&tv); + rtc_time_to_tm(tv.tv_sec, &tm); + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FW_SET_TIME, -1, -1); + req.year = cpu_to_le16(1900 + tm.tm_year); + req.month = 1 + tm.tm_mon; + req.day = tm.tm_mday; + req.hour = tm.tm_hour; + req.minute = tm.tm_min; + req.second = tm.tm_sec; + return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); +} + static int bnxt_hwrm_port_qstats(struct bnxt *bp) { int rc; @@ -6811,6 +6833,8 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (rc) goto init_err; + bnxt_hwrm_fw_set_time(bp); + dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_GRE | diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 012cc51440b7..41033d01fe8d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1220,6 +1220,7 @@ int bnxt_hwrm_set_coal(struct bnxt *); int bnxt_hwrm_func_qcaps(struct bnxt *); int bnxt_hwrm_set_pause(struct bnxt *); int bnxt_hwrm_set_link_setting(struct bnxt *, bool, bool); +int bnxt_hwrm_fw_set_time(struct bnxt *); int bnxt_open_nic(struct bnxt *, bool, bool); int bnxt_close_nic(struct bnxt *, bool, bool); int bnxt_get_max_rings(struct bnxt *, int *, int *, bool); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index b83e17403d6c..4a430b623489 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -21,6 +21,8 @@ #include "bnxt_nvm_defs.h" /* NVRAM content constant and structure defs */ #include "bnxt_fw_hdr.h" /* Firmware hdr constant and structure defs */ #define FLASH_NVRAM_TIMEOUT ((HWRM_CMD_TIMEOUT) * 100) +#define FLASH_PACKAGE_TIMEOUT ((HWRM_CMD_TIMEOUT) * 200) +#define INSTALL_PACKAGE_TIMEOUT ((HWRM_CMD_TIMEOUT) * 200) static char *bnxt_get_pkgver(struct net_device *dev, char *buf, size_t buflen); @@ -1028,6 +1030,10 @@ static u32 bnxt_get_link(struct net_device *dev) return bp->link_info.link_up; } +static int bnxt_find_nvram_item(struct net_device *dev, u16 type, u16 ordinal, + u16 ext, u16 *index, u32 *item_length, + u32 *data_length); + static int bnxt_flash_nvram(struct net_device *dev, u16 dir_type, u16 dir_ordinal, @@ -1179,7 +1185,6 @@ static int bnxt_flash_firmware(struct net_device *dev, (unsigned long)calculated_crc); return -EINVAL; } - /* TODO: Validate digital signature (RSA-encrypted SHA-256 hash) here */ rc = bnxt_flash_nvram(dev, dir_type, BNX_DIR_ORDINAL_FIRST, 0, 0, fw_data, fw_size); if (rc == 0) /* Firmware update successful */ @@ -1188,6 +1193,57 @@ static int bnxt_flash_firmware(struct net_device *dev, return rc; } +static int bnxt_flash_microcode(struct net_device *dev, + u16 dir_type, + const u8 *fw_data, + size_t fw_size) +{ + struct bnxt_ucode_trailer *trailer; + u32 calculated_crc; + u32 stored_crc; + int rc = 0; + + if (fw_size < sizeof(struct bnxt_ucode_trailer)) { + netdev_err(dev, "Invalid microcode file size: %u\n", + (unsigned int)fw_size); + return -EINVAL; + } + trailer = (struct bnxt_ucode_trailer *)(fw_data + (fw_size - + sizeof(*trailer))); + if (trailer->sig != cpu_to_le32(BNXT_UCODE_TRAILER_SIGNATURE)) { + netdev_err(dev, "Invalid microcode trailer signature: %08X\n", + le32_to_cpu(trailer->sig)); + return -EINVAL; + } + if (le16_to_cpu(trailer->dir_type) != dir_type) { + netdev_err(dev, "Expected microcode type: %d, read: %d\n", + dir_type, le16_to_cpu(trailer->dir_type)); + return -EINVAL; + } + if (le16_to_cpu(trailer->trailer_length) < + sizeof(struct bnxt_ucode_trailer)) { + netdev_err(dev, "Invalid microcode trailer length: %d\n", + le16_to_cpu(trailer->trailer_length)); + return -EINVAL; + } + + /* Confirm the CRC32 checksum of the file: */ + stored_crc = le32_to_cpu(*(__le32 *)(fw_data + fw_size - + sizeof(stored_crc))); + calculated_crc = ~crc32(~0, fw_data, fw_size - sizeof(stored_crc)); + if (calculated_crc != stored_crc) { + netdev_err(dev, + "CRC32 (%08lX) does not match calculated: %08lX\n", + (unsigned long)stored_crc, + (unsigned long)calculated_crc); + return -EINVAL; + } + rc = bnxt_flash_nvram(dev, dir_type, BNX_DIR_ORDINAL_FIRST, + 0, 0, fw_data, fw_size); + + return rc; +} + static bool bnxt_dir_type_is_ape_bin_format(u16 dir_type) { switch (dir_type) { @@ -1206,7 +1262,7 @@ static bool bnxt_dir_type_is_ape_bin_format(u16 dir_type) return false; } -static bool bnxt_dir_type_is_unprotected_exec_format(u16 dir_type) +static bool bnxt_dir_type_is_other_exec_format(u16 dir_type) { switch (dir_type) { case BNX_DIR_TYPE_AVS: @@ -1227,7 +1283,7 @@ static bool bnxt_dir_type_is_unprotected_exec_format(u16 dir_type) static bool bnxt_dir_type_is_executable(u16 dir_type) { return bnxt_dir_type_is_ape_bin_format(dir_type) || - bnxt_dir_type_is_unprotected_exec_format(dir_type); + bnxt_dir_type_is_other_exec_format(dir_type); } static int bnxt_flash_firmware_from_file(struct net_device *dev, @@ -1237,10 +1293,6 @@ static int bnxt_flash_firmware_from_file(struct net_device *dev, const struct firmware *fw; int rc; - if (dir_type != BNX_DIR_TYPE_UPDATE && - bnxt_dir_type_is_executable(dir_type) == false) - return -EINVAL; - rc = request_firmware(&fw, filename, &dev->dev); if (rc != 0) { netdev_err(dev, "Error %d requesting firmware file: %s\n", @@ -1249,6 +1301,8 @@ static int bnxt_flash_firmware_from_file(struct net_device *dev, } if (bnxt_dir_type_is_ape_bin_format(dir_type) == true) rc = bnxt_flash_firmware(dev, dir_type, fw->data, fw->size); + else if (bnxt_dir_type_is_other_exec_format(dir_type) == true) + rc = bnxt_flash_microcode(dev, dir_type, fw->data, fw->size); else rc = bnxt_flash_nvram(dev, dir_type, BNX_DIR_ORDINAL_FIRST, 0, 0, fw->data, fw->size); @@ -1257,10 +1311,83 @@ static int bnxt_flash_firmware_from_file(struct net_device *dev, } static int bnxt_flash_package_from_file(struct net_device *dev, - char *filename) + char *filename, u32 install_type) { - netdev_err(dev, "packages are not yet supported\n"); - return -EINVAL; + struct bnxt *bp = netdev_priv(dev); + struct hwrm_nvm_install_update_output *resp = bp->hwrm_cmd_resp_addr; + struct hwrm_nvm_install_update_input install = {0}; + const struct firmware *fw; + u32 item_len; + u16 index; + int rc; + + bnxt_hwrm_fw_set_time(bp); + + if (bnxt_find_nvram_item(dev, BNX_DIR_TYPE_UPDATE, + BNX_DIR_ORDINAL_FIRST, BNX_DIR_EXT_NONE, + &index, &item_len, NULL) != 0) { + netdev_err(dev, "PKG update area not created in nvram\n"); + return -ENOBUFS; + } + + rc = request_firmware(&fw, filename, &dev->dev); + if (rc != 0) { + netdev_err(dev, "PKG error %d requesting file: %s\n", + rc, filename); + return rc; + } + + if (fw->size > item_len) { + netdev_err(dev, "PKG insufficient update area in nvram: %lu", + (unsigned long)fw->size); + rc = -EFBIG; + } else { + dma_addr_t dma_handle; + u8 *kmem; + struct hwrm_nvm_modify_input modify = {0}; + + bnxt_hwrm_cmd_hdr_init(bp, &modify, HWRM_NVM_MODIFY, -1, -1); + + modify.dir_idx = cpu_to_le16(index); + modify.len = cpu_to_le32(fw->size); + + kmem = dma_alloc_coherent(&bp->pdev->dev, fw->size, + &dma_handle, GFP_KERNEL); + if (!kmem) { + netdev_err(dev, + "dma_alloc_coherent failure, length = %u\n", + (unsigned int)fw->size); + rc = -ENOMEM; + } else { + memcpy(kmem, fw->data, fw->size); + modify.host_src_addr = cpu_to_le64(dma_handle); + + rc = hwrm_send_message(bp, &modify, sizeof(modify), + FLASH_PACKAGE_TIMEOUT); + dma_free_coherent(&bp->pdev->dev, fw->size, kmem, + dma_handle); + } + } + release_firmware(fw); + if (rc) + return rc; + + if ((install_type & 0xffff) == 0) + install_type >>= 16; + bnxt_hwrm_cmd_hdr_init(bp, &install, HWRM_NVM_INSTALL_UPDATE, -1, -1); + install.install_type = cpu_to_le32(install_type); + + rc = hwrm_send_message(bp, &install, sizeof(install), + INSTALL_PACKAGE_TIMEOUT); + if (rc) + return -EOPNOTSUPP; + + if (resp->result) { + netdev_err(dev, "PKG install error = %d, problem_item = %d\n", + (s8)resp->result, (int)resp->problem_item); + return -ENOPKG; + } + return 0; } static int bnxt_flash_device(struct net_device *dev, @@ -1271,8 +1398,10 @@ static int bnxt_flash_device(struct net_device *dev, return -EINVAL; } - if (flash->region == ETHTOOL_FLASH_ALL_REGIONS) - return bnxt_flash_package_from_file(dev, flash->data); + if (flash->region == ETHTOOL_FLASH_ALL_REGIONS || + flash->region > 0xffff) + return bnxt_flash_package_from_file(dev, flash->data, + flash->region); return bnxt_flash_firmware_from_file(dev, flash->region, flash->data); } @@ -1516,7 +1645,7 @@ static int bnxt_set_eeprom(struct net_device *dev, /* Create or re-write an NVM item: */ if (bnxt_dir_type_is_executable(type) == true) - return -EINVAL; + return -EOPNOTSUPP; ext = eeprom->magic & 0xffff; ordinal = eeprom->offset >> 16; attr = eeprom->offset & 0xffff; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_fw_hdr.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_fw_hdr.h index 82bf44ab811b..cad30ddc6936 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_fw_hdr.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_fw_hdr.h @@ -11,6 +11,7 @@ #define __BNXT_FW_HDR_H__ #define BNXT_FIRMWARE_BIN_SIGNATURE 0x1a4d4342 /* "BCM"+0x1a */ +#define BNXT_UCODE_TRAILER_SIGNATURE 0x726c7254 /* "Trlr" */ enum SUPPORTED_FAMILY { DEVICE_5702_3_4_FAMILY, /* 0 - Denali, Vinson, K2 */ @@ -85,7 +86,7 @@ enum SUPPORTED_MEDIA { struct bnxt_fw_header { __le32 signature; /* constains the constant value of - * BNXT_Firmware_Bin_Signatures + * BNXT_FIRMWARE_BIN_SIGNATURE */ u8 flags; /* reserved for ChiMP use */ u8 code_type; /* enum SUPPORTED_CODE */ @@ -102,4 +103,17 @@ struct bnxt_fw_header { u8 major_ver; }; +/* Microcode and pre-boot software/firmware trailer: */ +struct bnxt_ucode_trailer { + u8 rsa_sig[256]; + __le16 flags; + u8 version_format; + u8 version_length; + u8 version[16]; + __le16 dir_type; + __le16 trailer_length; + __le32 sig; /* BNXT_UCODE_TRAILER_SIGNATURE */ + __le32 chksum; /* CRC-32 */ +}; + #endif From 47f8e8b9bbbbe00740786bd1da0d5097d45ba46b Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 19 Sep 2016 03:58:04 -0400 Subject: [PATCH 1248/1715] bnxt_en: Fix ethtool -l|-L inconsistent channel counts. The existing code is inconsistent in reporting and accepting the combined channel count. bnxt_get_channels() reports maximum combined as the maximum rx count. bnxt_set_channels() accepts combined count that cannot be bigger than max rx or max tx. For example, if max rx = 2 and max tx = 1, we report max supported combined to be 2. But if the user tries to set combined to 2, it will fail because 2 is bigger than max tx which is 1. Fix the code to be consistent. Max allowed combined = max(max_rx, max_tx). We will accept a combined channel count <= max(max_rx, max_tx). Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 4a430b623489..c74ce698805e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -348,7 +348,7 @@ static void bnxt_get_channels(struct net_device *dev, int max_rx_rings, max_tx_rings, tcs; bnxt_get_max_rings(bp, &max_rx_rings, &max_tx_rings, true); - channel->max_combined = max_rx_rings; + channel->max_combined = max_t(int, max_rx_rings, max_tx_rings); if (bnxt_get_max_rings(bp, &max_rx_rings, &max_tx_rings, false)) { max_rx_rings = 0; @@ -406,8 +406,8 @@ static int bnxt_set_channels(struct net_device *dev, if (tcs > 1) max_tx_rings /= tcs; - if (sh && (channel->combined_count > max_rx_rings || - channel->combined_count > max_tx_rings)) + if (sh && + channel->combined_count > max_t(int, max_rx_rings, max_tx_rings)) return -ENOMEM; if (!sh && (channel->rx_count > max_rx_rings || @@ -430,8 +430,10 @@ static int bnxt_set_channels(struct net_device *dev, if (sh) { bp->flags |= BNXT_FLAG_SHARED_RINGS; - bp->rx_nr_rings = channel->combined_count; - bp->tx_nr_rings_per_tc = channel->combined_count; + bp->rx_nr_rings = min_t(int, channel->combined_count, + max_rx_rings); + bp->tx_nr_rings_per_tc = min_t(int, channel->combined_count, + max_tx_rings); } else { bp->flags &= ~BNXT_FLAG_SHARED_RINGS; bp->rx_nr_rings = channel->rx_count; From 7cc5a20e38fcaf395ac59e7ed6c3decb575a0dc7 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 19 Sep 2016 03:58:05 -0400 Subject: [PATCH 1249/1715] bnxt_en: Re-arrange bnxt_hwrm_func_qcaps(). Re-arrange the code so that the generation of the random MAC address for the VF is at the end of the function. The next patch will add one more step to call bnxt_approve_mac() to get the firmware to approve the random MAC address. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index f0a9d23fda15..b1dcece2cd64 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4161,6 +4161,11 @@ int bnxt_hwrm_func_qcaps(struct bnxt *bp) if (rc) goto hwrm_func_qcaps_exit; + bp->tx_push_thresh = 0; + if (resp->flags & + cpu_to_le32(FUNC_QCAPS_RESP_FLAGS_PUSH_MODE_SUPPORTED)) + bp->tx_push_thresh = BNXT_TX_PUSH_THRESH; + if (BNXT_PF(bp)) { struct bnxt_pf_info *pf = &bp->pf; @@ -4192,12 +4197,6 @@ int bnxt_hwrm_func_qcaps(struct bnxt *bp) struct bnxt_vf_info *vf = &bp->vf; vf->fw_fid = le16_to_cpu(resp->fid); - memcpy(vf->mac_addr, resp->mac_address, ETH_ALEN); - if (is_valid_ether_addr(vf->mac_addr)) - /* overwrite netdev dev_adr with admin VF MAC */ - memcpy(bp->dev->dev_addr, vf->mac_addr, ETH_ALEN); - else - random_ether_addr(bp->dev->dev_addr); vf->max_rsscos_ctxs = le16_to_cpu(resp->max_rsscos_ctx); vf->max_cp_rings = le16_to_cpu(resp->max_cmpl_rings); @@ -4209,14 +4208,16 @@ int bnxt_hwrm_func_qcaps(struct bnxt *bp) vf->max_l2_ctxs = le16_to_cpu(resp->max_l2_ctxs); vf->max_vnics = le16_to_cpu(resp->max_vnics); vf->max_stat_ctxs = le16_to_cpu(resp->max_stat_ctx); + + memcpy(vf->mac_addr, resp->mac_address, ETH_ALEN); + if (is_valid_ether_addr(vf->mac_addr)) + /* overwrite netdev dev_adr with admin VF MAC */ + memcpy(bp->dev->dev_addr, vf->mac_addr, ETH_ALEN); + else + random_ether_addr(bp->dev->dev_addr); #endif } - bp->tx_push_thresh = 0; - if (resp->flags & - cpu_to_le32(FUNC_QCAPS_RESP_FLAGS_PUSH_MODE_SUPPORTED)) - bp->tx_push_thresh = BNXT_TX_PUSH_THRESH; - hwrm_func_qcaps_exit: mutex_unlock(&bp->hwrm_cmd_lock); return rc; From 001154eb242b5a6667b74e5cf20873fb75f1b9d3 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 19 Sep 2016 03:58:06 -0400 Subject: [PATCH 1250/1715] bnxt_en: Call firmware to approve the random VF MAC address. After generating the random MAC address for VF, call the firmware to approve it. This step serves 2 purposes. Some hypervisor (e.g. ESX) wants to approve the MAC address. 2nd, the call will setup the proper forwarding database in the internal switch. We need to unlock the hwrm_cmd_lock mutex before calling bnxt_approve_mac(). We can do that because we are at the end of the function and all the previous firmware response data has been copied. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index b1dcece2cd64..cbc0b8ad916c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4210,11 +4210,16 @@ int bnxt_hwrm_func_qcaps(struct bnxt *bp) vf->max_stat_ctxs = le16_to_cpu(resp->max_stat_ctx); memcpy(vf->mac_addr, resp->mac_address, ETH_ALEN); - if (is_valid_ether_addr(vf->mac_addr)) + mutex_unlock(&bp->hwrm_cmd_lock); + + if (is_valid_ether_addr(vf->mac_addr)) { /* overwrite netdev dev_adr with admin VF MAC */ memcpy(bp->dev->dev_addr, vf->mac_addr, ETH_ALEN); - else + } else { random_ether_addr(bp->dev->dev_addr); + rc = bnxt_approve_mac(bp, bp->dev->dev_addr); + } + return rc; #endif } From 4ffcd582301bd020b1f9d00c55473af305ec19b5 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 19 Sep 2016 03:58:07 -0400 Subject: [PATCH 1251/1715] bnxt_en: Pad TX packets below 52 bytes. The hardware has a limitation that it won't pass host to BMC loopback packets below 52-bytes. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 41033d01fe8d..51b164a0e844 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -413,7 +413,7 @@ struct rx_tpa_end_cmp_ext { #define BNXT_RX_PAGE_SIZE (1 << BNXT_RX_PAGE_SHIFT) -#define BNXT_MIN_PKT_SIZE 45 +#define BNXT_MIN_PKT_SIZE 52 #define BNXT_NUM_TESTS(bp) 0 From ae8e98a6fa7a73917196c507e43414ea96b6a0fc Mon Sep 17 00:00:00 2001 From: Deepak Khungar Date: Mon, 19 Sep 2016 03:58:08 -0400 Subject: [PATCH 1252/1715] bnxt_en: Support for "ethtool -r" command Restart autoneg if autoneg is enabled. Signed-off-by: Deepak Khungar Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index c74ce698805e..a7e04ff4eaed 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -1849,6 +1849,25 @@ static int bnxt_get_module_eeprom(struct net_device *dev, return rc; } +static int bnxt_nway_reset(struct net_device *dev) +{ + int rc = 0; + + struct bnxt *bp = netdev_priv(dev); + struct bnxt_link_info *link_info = &bp->link_info; + + if (!BNXT_SINGLE_PF(bp)) + return -EOPNOTSUPP; + + if (!(link_info->autoneg & BNXT_AUTONEG_SPEED)) + return -EINVAL; + + if (netif_running(dev)) + rc = bnxt_hwrm_set_link_setting(bp, true, false); + + return rc; +} + const struct ethtool_ops bnxt_ethtool_ops = { .get_link_ksettings = bnxt_get_link_ksettings, .set_link_ksettings = bnxt_set_link_ksettings, @@ -1881,4 +1900,5 @@ const struct ethtool_ops bnxt_ethtool_ops = { .set_eee = bnxt_set_eee, .get_module_info = bnxt_get_module_info, .get_module_eeprom = bnxt_get_module_eeprom, + .nway_reset = bnxt_nway_reset }; From 350a714960eb8a980c913c9be5a96bb18b2fe9da Mon Sep 17 00:00:00 2001 From: Eddie Wai Date: Mon, 19 Sep 2016 03:58:09 -0400 Subject: [PATCH 1253/1715] bnxt_en: Fixed the VF link status after a link state change The VF link state can be changed via the 'ip link set' cmd. Currently, the new link state does not take effect immediately. The fix is for the PF to send a link change async event to the designated VF after a VF link state change. This async event will trigger the VF to update the link status. Signed-off-by: Eddie Wai Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/bnxt/bnxt_sriov.c | 84 +++++++++---------- 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c index 50d2007a2640..8be718508600 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c @@ -19,6 +19,45 @@ #include "bnxt_ethtool.h" #ifdef CONFIG_BNXT_SRIOV +static int bnxt_hwrm_fwd_async_event_cmpl(struct bnxt *bp, + struct bnxt_vf_info *vf, u16 event_id) +{ + struct hwrm_fwd_async_event_cmpl_output *resp = bp->hwrm_cmd_resp_addr; + struct hwrm_fwd_async_event_cmpl_input req = {0}; + struct hwrm_async_event_cmpl *async_cmpl; + int rc = 0; + + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FWD_ASYNC_EVENT_CMPL, -1, -1); + if (vf) + req.encap_async_event_target_id = cpu_to_le16(vf->fw_fid); + else + /* broadcast this async event to all VFs */ + req.encap_async_event_target_id = cpu_to_le16(0xffff); + async_cmpl = (struct hwrm_async_event_cmpl *)req.encap_async_event_cmpl; + async_cmpl->type = + cpu_to_le16(HWRM_ASYNC_EVENT_CMPL_TYPE_HWRM_ASYNC_EVENT); + async_cmpl->event_id = cpu_to_le16(event_id); + + mutex_lock(&bp->hwrm_cmd_lock); + rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + + if (rc) { + netdev_err(bp->dev, "hwrm_fwd_async_event_cmpl failed. rc:%d\n", + rc); + goto fwd_async_event_cmpl_exit; + } + + if (resp->error_code) { + netdev_err(bp->dev, "hwrm_fwd_async_event_cmpl error %d\n", + resp->error_code); + rc = -1; + } + +fwd_async_event_cmpl_exit: + mutex_unlock(&bp->hwrm_cmd_lock); + return rc; +} + static int bnxt_vf_ndo_prep(struct bnxt *bp, int vf_id) { if (!test_bit(BNXT_STATE_OPEN, &bp->state)) { @@ -243,8 +282,9 @@ int bnxt_set_vf_link_state(struct net_device *dev, int vf_id, int link) rc = -EINVAL; break; } - /* CHIMP TODO: send msg to VF to update new link state */ - + if (vf->flags & (BNXT_VF_LINK_UP | BNXT_VF_LINK_FORCED)) + rc = bnxt_hwrm_fwd_async_event_cmpl(bp, vf, + HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_STATUS_CHANGE); return rc; } @@ -525,46 +565,6 @@ err_out1: return rc; } -static int bnxt_hwrm_fwd_async_event_cmpl(struct bnxt *bp, - struct bnxt_vf_info *vf, - u16 event_id) -{ - int rc = 0; - struct hwrm_fwd_async_event_cmpl_input req = {0}; - struct hwrm_fwd_async_event_cmpl_output *resp = bp->hwrm_cmd_resp_addr; - struct hwrm_async_event_cmpl *async_cmpl; - - bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FWD_ASYNC_EVENT_CMPL, -1, -1); - if (vf) - req.encap_async_event_target_id = cpu_to_le16(vf->fw_fid); - else - /* broadcast this async event to all VFs */ - req.encap_async_event_target_id = cpu_to_le16(0xffff); - async_cmpl = (struct hwrm_async_event_cmpl *)req.encap_async_event_cmpl; - async_cmpl->type = - cpu_to_le16(HWRM_ASYNC_EVENT_CMPL_TYPE_HWRM_ASYNC_EVENT); - async_cmpl->event_id = cpu_to_le16(event_id); - - mutex_lock(&bp->hwrm_cmd_lock); - rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); - - if (rc) { - netdev_err(bp->dev, "hwrm_fwd_async_event_cmpl failed. rc:%d\n", - rc); - goto fwd_async_event_cmpl_exit; - } - - if (resp->error_code) { - netdev_err(bp->dev, "hwrm_fwd_async_event_cmpl error %d\n", - resp->error_code); - rc = -1; - } - -fwd_async_event_cmpl_exit: - mutex_unlock(&bp->hwrm_cmd_lock); - return rc; -} - void bnxt_sriov_disable(struct bnxt *bp) { u16 num_vfs = pci_num_vf(bp->pdev); From 51f141bec15aecb2ee5f0db77761dbf219333b93 Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Sun, 18 Sep 2016 00:11:34 +0200 Subject: [PATCH 1254/1715] net: ethernet: broadcom: b44: use phydev from struct net_device The private structure contain a pointer to phydev, but the structure net_device already contain such pointer. So we can remove the pointer phydev in the private structure, and update the driver to use the one contained in struct net_device. Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/b44.c | 22 +++++++++++----------- drivers/net/ethernet/broadcom/b44.h | 1 - 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index 74f0a37c4eb6..936f06ffef23 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -1486,7 +1486,7 @@ static int b44_open(struct net_device *dev) b44_enable_ints(bp); if (bp->flags & B44_FLAG_EXTERNAL_PHY) - phy_start(bp->phydev); + phy_start(dev->phydev); netif_start_queue(dev); out: @@ -1651,7 +1651,7 @@ static int b44_close(struct net_device *dev) netif_stop_queue(dev); if (bp->flags & B44_FLAG_EXTERNAL_PHY) - phy_stop(bp->phydev); + phy_stop(dev->phydev); napi_disable(&bp->napi); @@ -1837,8 +1837,8 @@ static int b44_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) struct b44 *bp = netdev_priv(dev); if (bp->flags & B44_FLAG_EXTERNAL_PHY) { - BUG_ON(!bp->phydev); - return phy_ethtool_gset(bp->phydev, cmd); + BUG_ON(!dev->phydev); + return phy_ethtool_gset(dev->phydev, cmd); } cmd->supported = (SUPPORTED_Autoneg); @@ -1886,12 +1886,12 @@ static int b44_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) int ret; if (bp->flags & B44_FLAG_EXTERNAL_PHY) { - BUG_ON(!bp->phydev); + BUG_ON(!dev->phydev); spin_lock_irq(&bp->lock); if (netif_running(dev)) b44_setup_phy(bp); - ret = phy_ethtool_sset(bp->phydev, cmd); + ret = phy_ethtool_sset(dev->phydev, cmd); spin_unlock_irq(&bp->lock); @@ -2137,8 +2137,8 @@ static int b44_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) spin_lock_irq(&bp->lock); if (bp->flags & B44_FLAG_EXTERNAL_PHY) { - BUG_ON(!bp->phydev); - err = phy_mii_ioctl(bp->phydev, ifr, cmd); + BUG_ON(!dev->phydev); + err = phy_mii_ioctl(dev->phydev, ifr, cmd); } else { err = generic_mii_ioctl(&bp->mii_if, if_mii(ifr), cmd, NULL); } @@ -2206,7 +2206,7 @@ static const struct net_device_ops b44_netdev_ops = { static void b44_adjust_link(struct net_device *dev) { struct b44 *bp = netdev_priv(dev); - struct phy_device *phydev = bp->phydev; + struct phy_device *phydev = dev->phydev; bool status_changed = 0; BUG_ON(!phydev); @@ -2303,7 +2303,6 @@ static int b44_register_phy_one(struct b44 *bp) SUPPORTED_MII); phydev->advertising = phydev->supported; - bp->phydev = phydev; bp->old_link = 0; bp->phy_addr = phydev->mdio.addr; @@ -2323,9 +2322,10 @@ err_out: static void b44_unregister_phy_one(struct b44 *bp) { + struct net_device *dev = bp->dev; struct mii_bus *mii_bus = bp->mii_bus; - phy_disconnect(bp->phydev); + phy_disconnect(dev->phydev); mdiobus_unregister(mii_bus); mdiobus_free(mii_bus); } diff --git a/drivers/net/ethernet/broadcom/b44.h b/drivers/net/ethernet/broadcom/b44.h index 65d88d7c5581..89d2cf341163 100644 --- a/drivers/net/ethernet/broadcom/b44.h +++ b/drivers/net/ethernet/broadcom/b44.h @@ -404,7 +404,6 @@ struct b44 { u32 tx_pending; u8 phy_addr; u8 force_copybreak; - struct phy_device *phydev; struct mii_bus *mii_bus; int old_link; struct mii_if_info mii_if; From 2406e5d4c4c48c9402c0629748c001178ea4c149 Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Sun, 18 Sep 2016 00:11:35 +0200 Subject: [PATCH 1255/1715] net: ethernet: broadcom: b44: use new api ethtool_{get|set}_link_ksettings The ethtool api {get|set}_settings is deprecated. We move this driver to new api {get|set}_link_ksettings. Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/b44.c | 98 ++++++++++++++++------------- 1 file changed, 54 insertions(+), 44 deletions(-) diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index 936f06ffef23..17aa33c5567d 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -1832,58 +1832,65 @@ static int b44_nway_reset(struct net_device *dev) return r; } -static int b44_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +static int b44_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) { struct b44 *bp = netdev_priv(dev); + u32 supported, advertising; if (bp->flags & B44_FLAG_EXTERNAL_PHY) { BUG_ON(!dev->phydev); - return phy_ethtool_gset(dev->phydev, cmd); + return phy_ethtool_ksettings_get(dev->phydev, cmd); } - cmd->supported = (SUPPORTED_Autoneg); - cmd->supported |= (SUPPORTED_100baseT_Half | - SUPPORTED_100baseT_Full | - SUPPORTED_10baseT_Half | - SUPPORTED_10baseT_Full | - SUPPORTED_MII); + supported = (SUPPORTED_Autoneg); + supported |= (SUPPORTED_100baseT_Half | + SUPPORTED_100baseT_Full | + SUPPORTED_10baseT_Half | + SUPPORTED_10baseT_Full | + SUPPORTED_MII); - cmd->advertising = 0; + advertising = 0; if (bp->flags & B44_FLAG_ADV_10HALF) - cmd->advertising |= ADVERTISED_10baseT_Half; + advertising |= ADVERTISED_10baseT_Half; if (bp->flags & B44_FLAG_ADV_10FULL) - cmd->advertising |= ADVERTISED_10baseT_Full; + advertising |= ADVERTISED_10baseT_Full; if (bp->flags & B44_FLAG_ADV_100HALF) - cmd->advertising |= ADVERTISED_100baseT_Half; + advertising |= ADVERTISED_100baseT_Half; if (bp->flags & B44_FLAG_ADV_100FULL) - cmd->advertising |= ADVERTISED_100baseT_Full; - cmd->advertising |= ADVERTISED_Pause | ADVERTISED_Asym_Pause; - ethtool_cmd_speed_set(cmd, ((bp->flags & B44_FLAG_100_BASE_T) ? - SPEED_100 : SPEED_10)); - cmd->duplex = (bp->flags & B44_FLAG_FULL_DUPLEX) ? + advertising |= ADVERTISED_100baseT_Full; + advertising |= ADVERTISED_Pause | ADVERTISED_Asym_Pause; + cmd->base.speed = (bp->flags & B44_FLAG_100_BASE_T) ? + SPEED_100 : SPEED_10; + cmd->base.duplex = (bp->flags & B44_FLAG_FULL_DUPLEX) ? DUPLEX_FULL : DUPLEX_HALF; - cmd->port = 0; - cmd->phy_address = bp->phy_addr; - cmd->transceiver = (bp->flags & B44_FLAG_EXTERNAL_PHY) ? - XCVR_EXTERNAL : XCVR_INTERNAL; - cmd->autoneg = (bp->flags & B44_FLAG_FORCE_LINK) ? + cmd->base.port = 0; + cmd->base.phy_address = bp->phy_addr; + cmd->base.autoneg = (bp->flags & B44_FLAG_FORCE_LINK) ? AUTONEG_DISABLE : AUTONEG_ENABLE; - if (cmd->autoneg == AUTONEG_ENABLE) - cmd->advertising |= ADVERTISED_Autoneg; + if (cmd->base.autoneg == AUTONEG_ENABLE) + advertising |= ADVERTISED_Autoneg; + + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + supported); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, + advertising); + if (!netif_running(dev)){ - ethtool_cmd_speed_set(cmd, 0); - cmd->duplex = 0xff; + cmd->base.speed = 0; + cmd->base.duplex = 0xff; } - cmd->maxtxpkt = 0; - cmd->maxrxpkt = 0; + return 0; } -static int b44_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) +static int b44_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { struct b44 *bp = netdev_priv(dev); u32 speed; int ret; + u32 advertising; if (bp->flags & B44_FLAG_EXTERNAL_PHY) { BUG_ON(!dev->phydev); @@ -1891,31 +1898,34 @@ static int b44_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) if (netif_running(dev)) b44_setup_phy(bp); - ret = phy_ethtool_sset(dev->phydev, cmd); + ret = phy_ethtool_ksettings_set(dev->phydev, cmd); spin_unlock_irq(&bp->lock); return ret; } - speed = ethtool_cmd_speed(cmd); + speed = cmd->base.speed; + + ethtool_convert_link_mode_to_legacy_u32(&advertising, + cmd->link_modes.advertising); /* We do not support gigabit. */ - if (cmd->autoneg == AUTONEG_ENABLE) { - if (cmd->advertising & + if (cmd->base.autoneg == AUTONEG_ENABLE) { + if (advertising & (ADVERTISED_1000baseT_Half | ADVERTISED_1000baseT_Full)) return -EINVAL; } else if ((speed != SPEED_100 && speed != SPEED_10) || - (cmd->duplex != DUPLEX_HALF && - cmd->duplex != DUPLEX_FULL)) { + (cmd->base.duplex != DUPLEX_HALF && + cmd->base.duplex != DUPLEX_FULL)) { return -EINVAL; } spin_lock_irq(&bp->lock); - if (cmd->autoneg == AUTONEG_ENABLE) { + if (cmd->base.autoneg == AUTONEG_ENABLE) { bp->flags &= ~(B44_FLAG_FORCE_LINK | B44_FLAG_100_BASE_T | B44_FLAG_FULL_DUPLEX | @@ -1923,19 +1933,19 @@ static int b44_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) B44_FLAG_ADV_10FULL | B44_FLAG_ADV_100HALF | B44_FLAG_ADV_100FULL); - if (cmd->advertising == 0) { + if (advertising == 0) { bp->flags |= (B44_FLAG_ADV_10HALF | B44_FLAG_ADV_10FULL | B44_FLAG_ADV_100HALF | B44_FLAG_ADV_100FULL); } else { - if (cmd->advertising & ADVERTISED_10baseT_Half) + if (advertising & ADVERTISED_10baseT_Half) bp->flags |= B44_FLAG_ADV_10HALF; - if (cmd->advertising & ADVERTISED_10baseT_Full) + if (advertising & ADVERTISED_10baseT_Full) bp->flags |= B44_FLAG_ADV_10FULL; - if (cmd->advertising & ADVERTISED_100baseT_Half) + if (advertising & ADVERTISED_100baseT_Half) bp->flags |= B44_FLAG_ADV_100HALF; - if (cmd->advertising & ADVERTISED_100baseT_Full) + if (advertising & ADVERTISED_100baseT_Full) bp->flags |= B44_FLAG_ADV_100FULL; } } else { @@ -1943,7 +1953,7 @@ static int b44_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) bp->flags &= ~(B44_FLAG_100_BASE_T | B44_FLAG_FULL_DUPLEX); if (speed == SPEED_100) bp->flags |= B44_FLAG_100_BASE_T; - if (cmd->duplex == DUPLEX_FULL) + if (cmd->base.duplex == DUPLEX_FULL) bp->flags |= B44_FLAG_FULL_DUPLEX; } @@ -2110,8 +2120,6 @@ static int b44_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) static const struct ethtool_ops b44_ethtool_ops = { .get_drvinfo = b44_get_drvinfo, - .get_settings = b44_get_settings, - .set_settings = b44_set_settings, .nway_reset = b44_nway_reset, .get_link = ethtool_op_get_link, .get_wol = b44_get_wol, @@ -2125,6 +2133,8 @@ static const struct ethtool_ops b44_ethtool_ops = { .get_strings = b44_get_strings, .get_sset_count = b44_get_sset_count, .get_ethtool_stats = b44_get_ethtool_stats, + .get_link_ksettings = b44_get_link_ksettings, + .set_link_ksettings = b44_set_link_ksettings, }; static int b44_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) From 625eb8667d6fcb22e474502133986b2d2838917d Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Sun, 18 Sep 2016 16:59:06 +0200 Subject: [PATCH 1256/1715] net: ethernet: broadcom: bcm63xx: use phydev from struct net_device The private structure contain a pointer to phydev, but the structure net_device already contain such pointer. So we can remove the pointer phydev in the private structure, and update the driver to use the one contained in struct net_device. Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcm63xx_enet.c | 31 +++++++++----------- drivers/net/ethernet/broadcom/bcm63xx_enet.h | 1 - 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index 6c8bc5fadac7..082f3f0cf5a0 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -791,7 +791,7 @@ static void bcm_enet_adjust_phy_link(struct net_device *dev) int status_changed; priv = netdev_priv(dev); - phydev = priv->phydev; + phydev = dev->phydev; status_changed = 0; if (priv->old_link != phydev->link) { @@ -913,7 +913,6 @@ static int bcm_enet_open(struct net_device *dev) priv->old_link = 0; priv->old_duplex = -1; priv->old_pause = -1; - priv->phydev = phydev; } /* mask all interrupts and request them */ @@ -1085,7 +1084,7 @@ static int bcm_enet_open(struct net_device *dev) ENETDMAC_IRMASK, priv->tx_chan); if (priv->has_phy) - phy_start(priv->phydev); + phy_start(phydev); else bcm_enet_adjust_link(dev); @@ -1127,7 +1126,7 @@ out_freeirq: free_irq(dev->irq, dev); out_phy_disconnect: - phy_disconnect(priv->phydev); + phy_disconnect(phydev); return ret; } @@ -1190,7 +1189,7 @@ static int bcm_enet_stop(struct net_device *dev) netif_stop_queue(dev); napi_disable(&priv->napi); if (priv->has_phy) - phy_stop(priv->phydev); + phy_stop(dev->phydev); del_timer_sync(&priv->rx_timeout); /* mask all interrupts */ @@ -1234,10 +1233,8 @@ static int bcm_enet_stop(struct net_device *dev) free_irq(dev->irq, dev); /* release phy */ - if (priv->has_phy) { - phy_disconnect(priv->phydev); - priv->phydev = NULL; - } + if (priv->has_phy) + phy_disconnect(dev->phydev); return 0; } @@ -1437,9 +1434,9 @@ static int bcm_enet_nway_reset(struct net_device *dev) priv = netdev_priv(dev); if (priv->has_phy) { - if (!priv->phydev) + if (!dev->phydev) return -ENODEV; - return genphy_restart_aneg(priv->phydev); + return genphy_restart_aneg(dev->phydev); } return -EOPNOTSUPP; @@ -1456,9 +1453,9 @@ static int bcm_enet_get_settings(struct net_device *dev, cmd->maxtxpkt = 0; if (priv->has_phy) { - if (!priv->phydev) + if (!dev->phydev) return -ENODEV; - return phy_ethtool_gset(priv->phydev, cmd); + return phy_ethtool_gset(dev->phydev, cmd); } else { cmd->autoneg = 0; ethtool_cmd_speed_set(cmd, ((priv->force_speed_100) @@ -1483,9 +1480,9 @@ static int bcm_enet_set_settings(struct net_device *dev, priv = netdev_priv(dev); if (priv->has_phy) { - if (!priv->phydev) + if (!dev->phydev) return -ENODEV; - return phy_ethtool_sset(priv->phydev, cmd); + return phy_ethtool_sset(dev->phydev, cmd); } else { if (cmd->autoneg || @@ -1604,9 +1601,9 @@ static int bcm_enet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) priv = netdev_priv(dev); if (priv->has_phy) { - if (!priv->phydev) + if (!dev->phydev) return -ENODEV; - return phy_mii_ioctl(priv->phydev, rq, cmd); + return phy_mii_ioctl(dev->phydev, rq, cmd); } else { struct mii_if_info mii; diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.h b/drivers/net/ethernet/broadcom/bcm63xx_enet.h index f55af4310085..0a1b7b2e55bd 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.h +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.h @@ -290,7 +290,6 @@ struct bcm_enet_priv { /* used when a phy is connected (phylib used) */ struct mii_bus *mii_bus; - struct phy_device *phydev; int old_link; int old_duplex; int old_pause; From 639cfa9e8cdaca5276c9786e22195653a0d4391b Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Sun, 18 Sep 2016 16:59:07 +0200 Subject: [PATCH 1257/1715] net: ethernet: broadcom: bcm63xx: use new api ethtool_{get|set}_link_ksettings The ethtool api {get|set}_settings is deprecated. We move this driver to new api {get|set}_link_ksettings. Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcm63xx_enet.c | 52 +++++++++++--------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index 082f3f0cf5a0..ae364c74baf3 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -1442,39 +1442,40 @@ static int bcm_enet_nway_reset(struct net_device *dev) return -EOPNOTSUPP; } -static int bcm_enet_get_settings(struct net_device *dev, - struct ethtool_cmd *cmd) +static int bcm_enet_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) { struct bcm_enet_priv *priv; + u32 supported, advertising; priv = netdev_priv(dev); - cmd->maxrxpkt = 0; - cmd->maxtxpkt = 0; - if (priv->has_phy) { if (!dev->phydev) return -ENODEV; - return phy_ethtool_gset(dev->phydev, cmd); + return phy_ethtool_ksettings_get(dev->phydev, cmd); } else { - cmd->autoneg = 0; - ethtool_cmd_speed_set(cmd, ((priv->force_speed_100) - ? SPEED_100 : SPEED_10)); - cmd->duplex = (priv->force_duplex_full) ? + cmd->base.autoneg = 0; + cmd->base.speed = (priv->force_speed_100) ? + SPEED_100 : SPEED_10; + cmd->base.duplex = (priv->force_duplex_full) ? DUPLEX_FULL : DUPLEX_HALF; - cmd->supported = ADVERTISED_10baseT_Half | + supported = ADVERTISED_10baseT_Half | ADVERTISED_10baseT_Full | ADVERTISED_100baseT_Half | ADVERTISED_100baseT_Full; - cmd->advertising = 0; - cmd->port = PORT_MII; - cmd->transceiver = XCVR_EXTERNAL; + advertising = 0; + ethtool_convert_legacy_u32_to_link_mode( + cmd->link_modes.supported, supported); + ethtool_convert_legacy_u32_to_link_mode( + cmd->link_modes.advertising, advertising); + cmd->base.port = PORT_MII; } return 0; } -static int bcm_enet_set_settings(struct net_device *dev, - struct ethtool_cmd *cmd) +static int bcm_enet_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { struct bcm_enet_priv *priv; @@ -1482,16 +1483,19 @@ static int bcm_enet_set_settings(struct net_device *dev, if (priv->has_phy) { if (!dev->phydev) return -ENODEV; - return phy_ethtool_sset(dev->phydev, cmd); + return phy_ethtool_ksettings_set(dev->phydev, cmd); } else { - if (cmd->autoneg || - (cmd->speed != SPEED_100 && cmd->speed != SPEED_10) || - cmd->port != PORT_MII) + if (cmd->base.autoneg || + (cmd->base.speed != SPEED_100 && + cmd->base.speed != SPEED_10) || + cmd->base.port != PORT_MII) return -EINVAL; - priv->force_speed_100 = (cmd->speed == SPEED_100) ? 1 : 0; - priv->force_duplex_full = (cmd->duplex == DUPLEX_FULL) ? 1 : 0; + priv->force_speed_100 = + (cmd->base.speed == SPEED_100) ? 1 : 0; + priv->force_duplex_full = + (cmd->base.duplex == DUPLEX_FULL) ? 1 : 0; if (netif_running(dev)) bcm_enet_adjust_link(dev); @@ -1585,14 +1589,14 @@ static const struct ethtool_ops bcm_enet_ethtool_ops = { .get_sset_count = bcm_enet_get_sset_count, .get_ethtool_stats = bcm_enet_get_ethtool_stats, .nway_reset = bcm_enet_nway_reset, - .get_settings = bcm_enet_get_settings, - .set_settings = bcm_enet_set_settings, .get_drvinfo = bcm_enet_get_drvinfo, .get_link = ethtool_op_get_link, .get_ringparam = bcm_enet_get_ringparam, .set_ringparam = bcm_enet_set_ringparam, .get_pauseparam = bcm_enet_get_pauseparam, .set_pauseparam = bcm_enet_set_pauseparam, + .get_link_ksettings = bcm_enet_get_link_ksettings, + .set_link_ksettings = bcm_enet_set_link_ksettings, }; static int bcm_enet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) From 6b352ebccbcf68866fa5e2ec98cce5e6b7cdf92e Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Sun, 18 Sep 2016 17:16:45 +0200 Subject: [PATCH 1258/1715] net: ethernet: broadcom: bcmgenet: use new api ethtool_{get|set}_link_ksettings The ethtool api {get|set}_settings is deprecated. We move this driver to new api {get|set}_link_ksettings. Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 46f904392f88..2013474bfdbf 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -450,8 +450,8 @@ static inline void bcmgenet_rdma_ring_writel(struct bcmgenet_priv *priv, genet_dma_ring_regs[r]); } -static int bcmgenet_get_settings(struct net_device *dev, - struct ethtool_cmd *cmd) +static int bcmgenet_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) { if (!netif_running(dev)) return -EINVAL; @@ -459,11 +459,11 @@ static int bcmgenet_get_settings(struct net_device *dev, if (!dev->phydev) return -ENODEV; - return phy_ethtool_gset(dev->phydev, cmd); + return phy_ethtool_ksettings_get(dev->phydev, cmd); } -static int bcmgenet_set_settings(struct net_device *dev, - struct ethtool_cmd *cmd) +static int bcmgenet_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { if (!netif_running(dev)) return -EINVAL; @@ -471,7 +471,7 @@ static int bcmgenet_set_settings(struct net_device *dev, if (!dev->phydev) return -ENODEV; - return phy_ethtool_sset(dev->phydev, cmd); + return phy_ethtool_ksettings_set(dev->phydev, cmd); } static int bcmgenet_set_rx_csum(struct net_device *dev, @@ -977,8 +977,6 @@ static const struct ethtool_ops bcmgenet_ethtool_ops = { .get_strings = bcmgenet_get_strings, .get_sset_count = bcmgenet_get_sset_count, .get_ethtool_stats = bcmgenet_get_ethtool_stats, - .get_settings = bcmgenet_get_settings, - .set_settings = bcmgenet_set_settings, .get_drvinfo = bcmgenet_get_drvinfo, .get_link = ethtool_op_get_link, .get_msglevel = bcmgenet_get_msglevel, @@ -990,6 +988,8 @@ static const struct ethtool_ops bcmgenet_ethtool_ops = { .nway_reset = bcmgenet_nway_reset, .get_coalesce = bcmgenet_get_coalesce, .set_coalesce = bcmgenet_set_coalesce, + .get_link_ksettings = bcmgenet_get_link_ksettings, + .set_link_ksettings = bcmgenet_set_link_ksettings, }; /* Power down the unimac, based on mode. */ From 9940803065e3e15df8fd0d6a9e29f7b0617d8935 Mon Sep 17 00:00:00 2001 From: Baoyou Xie Date: Sun, 18 Sep 2016 16:26:34 +0800 Subject: [PATCH 1259/1715] phy: mark lan88xx_suspend() static We get 1 warning when building kernel with W=1: drivers/net/phy/microchip.c:58:5: warning: no previous prototype for 'lan88xx_suspend' [-Wmissing-prototypes] In fact, this function is only used in the file in which it is declared and don't need a declaration, but can be made static. so this patch marks this function with 'static'. Signed-off-by: Baoyou Xie Signed-off-by: David S. Miller --- drivers/net/phy/microchip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/microchip.c b/drivers/net/phy/microchip.c index 15f820648f82..7c00e508a101 100644 --- a/drivers/net/phy/microchip.c +++ b/drivers/net/phy/microchip.c @@ -55,7 +55,7 @@ static int lan88xx_phy_ack_interrupt(struct phy_device *phydev) return rc < 0 ? rc : 0; } -int lan88xx_suspend(struct phy_device *phydev) +static int lan88xx_suspend(struct phy_device *phydev) { struct lan88xx_priv *priv = phydev->priv; From d766e7e6b68d681d46d74e228ad0ba133e730e36 Mon Sep 17 00:00:00 2001 From: Baoyou Xie Date: Sun, 18 Sep 2016 16:35:29 +0800 Subject: [PATCH 1260/1715] be2net: mark symbols static where possible We get 4 warnings when building kernel with W=1: drivers/net/ethernet/emulex/benet/be_main.c:4368:6: warning: no previous prototype for 'be_calculate_pf_pool_rss_tables' [-Wmissing-prototypes] drivers/net/ethernet/emulex/benet/be_cmds.c:4385:5: warning: no previous prototype for 'be_get_nic_pf_num_list' [-Wmissing-prototypes] drivers/net/ethernet/emulex/benet/be_cmds.c:4537:6: warning: no previous prototype for 'be_reset_nic_desc' [-Wmissing-prototypes] drivers/net/ethernet/emulex/benet/be_cmds.c:4910:5: warning: no previous prototype for '__be_cmd_set_logical_link_config' [-Wmissing-prototypes] In fact, these functions are only used in the file in which they are declared and don't need a declaration, but can be made static. so this patch marks these functions with 'static'. Signed-off-by: Baoyou Xie Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_cmds.c | 9 +++++---- drivers/net/ethernet/emulex/benet/be_main.c | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 15d02da08d8f..9cffe48be156 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -4382,7 +4382,7 @@ err: } /* This routine returns a list of all the NIC PF_nums in the adapter */ -u16 be_get_nic_pf_num_list(u8 *buf, u32 desc_count, u16 *nic_pf_nums) +static u16 be_get_nic_pf_num_list(u8 *buf, u32 desc_count, u16 *nic_pf_nums) { struct be_res_desc_hdr *hdr = (struct be_res_desc_hdr *)buf; struct be_pcie_res_desc *pcie = NULL; @@ -4534,7 +4534,7 @@ static int be_cmd_set_profile_config(struct be_adapter *adapter, void *desc, } /* Mark all fields invalid */ -void be_reset_nic_desc(struct be_nic_res_desc *nic) +static void be_reset_nic_desc(struct be_nic_res_desc *nic) { memset(nic, 0, sizeof(*nic)); nic->unicast_mac_count = 0xFFFF; @@ -4907,8 +4907,9 @@ err: return status; } -int __be_cmd_set_logical_link_config(struct be_adapter *adapter, - int link_state, int version, u8 domain) +static int +__be_cmd_set_logical_link_config(struct be_adapter *adapter, + int link_state, int version, u8 domain) { struct be_mcc_wrb *wrb; struct be_cmd_req_set_ll_link *req; diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 34f63eff6e8a..9a94840c5757 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -4365,7 +4365,7 @@ static void be_setup_init(struct be_adapter *adapter) * for distribution between the VFs. This self-imposed limit will determine the * no: of VFs for which RSS can be enabled. */ -void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter) +static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter) { struct be_port_resources port_res = {0}; u8 rss_tables_on_port; From 766a0e978fc2ba98d4865b466f8b572402317189 Mon Sep 17 00:00:00 2001 From: Baoyou Xie Date: Sun, 18 Sep 2016 16:44:22 +0800 Subject: [PATCH 1261/1715] net/mlx5: clean function declarations in eswitch.c up We get 2 warnings when building kernel with W=1: drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c:463:5: warning: no previous prototype for 'esw_offloads_init' [-Wmissing-prototypes] drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c:521:6: warning: no previous prototype for 'esw_offloads_cleanup' [-Wmissing-prototypes] In fact, both functions are declared in drivers/net/ethernet/mellanox/mlx5/core/eswitch.c,but should be declared in a header file, thus can be recognized in other file. So this patch moves the declarations into drivers/net/ethernet/mellanox/mlx5/core/eswitch.h Signed-off-by: Baoyou Xie Acked-by: Leon Romanovsky Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 3 --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 654b76ff962f..492749473e77 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -81,9 +81,6 @@ enum { MC_ADDR_CHANGE | \ PROMISC_CHANGE) -int esw_offloads_init(struct mlx5_eswitch *esw, int nvports); -void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports); - static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, u32 events_mask) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 6855783f3bb3..b96e8c93bf9d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -209,6 +209,9 @@ struct mlx5_eswitch { int mode; }; +void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports); +int esw_offloads_init(struct mlx5_eswitch *esw, int nvports); + /* E-Switch API */ int mlx5_eswitch_init(struct mlx5_core_dev *dev); void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); From 6a5d58b67e205f2ffc62d0a9ee4ef7d237e9a7fb Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Sun, 18 Sep 2016 07:31:42 -0400 Subject: [PATCH 1262/1715] net sched ife action: add 16 bit helpers encoder and checker for 16 bits metadata Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/tc_act/tc_ife.h | 2 ++ net/sched/act_ife.c | 26 ++++++++++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/include/net/tc_act/tc_ife.h b/include/net/tc_act/tc_ife.h index 5164bd7a38fb..9fd2bea0a6e0 100644 --- a/include/net/tc_act/tc_ife.h +++ b/include/net/tc_act/tc_ife.h @@ -50,9 +50,11 @@ int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval, gfp_t gfp); int ife_alloc_meta_u16(struct tcf_meta_info *mi, void *metaval, gfp_t gfp); int ife_check_meta_u32(u32 metaval, struct tcf_meta_info *mi); +int ife_check_meta_u16(u16 metaval, struct tcf_meta_info *mi); int ife_encode_meta_u32(u32 metaval, void *skbdata, struct tcf_meta_info *mi); int ife_validate_meta_u32(void *val, int len); int ife_validate_meta_u16(void *val, int len); +int ife_encode_meta_u16(u16 metaval, void *skbdata, struct tcf_meta_info *mi); void ife_release_meta_gen(struct tcf_meta_info *mi); int register_ife_op(struct tcf_meta_ops *mops); int unregister_ife_op(struct tcf_meta_ops *mops); diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index e87cd81315e1..ccf7b4b655fe 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -63,6 +63,23 @@ int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, const void *dval) } EXPORT_SYMBOL_GPL(ife_tlv_meta_encode); +int ife_encode_meta_u16(u16 metaval, void *skbdata, struct tcf_meta_info *mi) +{ + u16 edata = 0; + + if (mi->metaval) + edata = *(u16 *)mi->metaval; + else if (metaval) + edata = metaval; + + if (!edata) /* will not encode */ + return 0; + + edata = htons(edata); + return ife_tlv_meta_encode(skbdata, mi->metaid, 2, &edata); +} +EXPORT_SYMBOL_GPL(ife_encode_meta_u16); + int ife_get_meta_u32(struct sk_buff *skb, struct tcf_meta_info *mi) { if (mi->metaval) @@ -81,6 +98,15 @@ int ife_check_meta_u32(u32 metaval, struct tcf_meta_info *mi) } EXPORT_SYMBOL_GPL(ife_check_meta_u32); +int ife_check_meta_u16(u16 metaval, struct tcf_meta_info *mi) +{ + if (metaval || mi->metaval) + return 8; /* T+L+(V) == 2+2+(2+2bytepad) */ + + return 0; +} +EXPORT_SYMBOL_GPL(ife_check_meta_u16); + int ife_encode_meta_u32(u32 metaval, void *skbdata, struct tcf_meta_info *mi) { u32 edata = metaval; From 408fbc22ef1efb00dd896acd00e9f7d9b641e047 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Sun, 18 Sep 2016 07:31:43 -0400 Subject: [PATCH 1263/1715] net sched ife action: Introduce skb tcindex metadata encap decap Sample use case of how this is encoded: user space via tuntap (or a connected VM/Machine/container) encodes the tcindex TLV. Sample use case of decoding: IFE action decodes it and the skb->tc_index is then used to classify. So something like this for encoded ICMP packets: .. first decode then reclassify... skb->tcindex will be set sudo $TC filter add dev $ETH parent ffff: prio 2 protocol 0xbeef \ u32 match u32 0 0 flowid 1:1 \ action ife decode reclassify ...next match the decode icmp packet... sudo $TC filter add dev $ETH parent ffff: prio 4 protocol ip \ u32 match ip protocol 1 0xff flowid 1:1 \ action continue ... last classify it using the tcindex classifier and do someaction.. sudo $TC filter add dev $ETH parent ffff: prio 5 protocol ip \ handle 0x11 tcindex classid 1:1 \ action blah.. Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/uapi/linux/tc_act/tc_ife.h | 3 +- net/sched/Kconfig | 5 ++ net/sched/Makefile | 1 + net/sched/act_meta_skbtcindex.c | 79 ++++++++++++++++++++++++++++++ 4 files changed, 87 insertions(+), 1 deletion(-) create mode 100644 net/sched/act_meta_skbtcindex.c diff --git a/include/uapi/linux/tc_act/tc_ife.h b/include/uapi/linux/tc_act/tc_ife.h index 4ece02a77b9a..cd18360eca24 100644 --- a/include/uapi/linux/tc_act/tc_ife.h +++ b/include/uapi/linux/tc_act/tc_ife.h @@ -32,8 +32,9 @@ enum { #define IFE_META_HASHID 2 #define IFE_META_PRIO 3 #define IFE_META_QMAP 4 +#define IFE_META_TCINDEX 5 /*Can be overridden at runtime by module option*/ -#define __IFE_META_MAX 5 +#define __IFE_META_MAX 6 #define IFE_META_MAX (__IFE_META_MAX - 1) #endif diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 7795d5a3f79a..87956a768d1b 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -793,6 +793,11 @@ config NET_IFE_SKBPRIO depends on NET_ACT_IFE ---help--- +config NET_IFE_SKBTCINDEX + tristate "Support to encoding decoding skb tcindex on IFE action" + depends on NET_ACT_IFE + ---help--- + config NET_CLS_IND bool "Incoming device classification" depends on NET_CLS_U32 || NET_CLS_FW diff --git a/net/sched/Makefile b/net/sched/Makefile index 148ae0d5ac2c..4bdda3634e0b 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -23,6 +23,7 @@ obj-$(CONFIG_NET_ACT_SKBMOD) += act_skbmod.o obj-$(CONFIG_NET_ACT_IFE) += act_ife.o obj-$(CONFIG_NET_IFE_SKBMARK) += act_meta_mark.o obj-$(CONFIG_NET_IFE_SKBPRIO) += act_meta_skbprio.o +obj-$(CONFIG_NET_IFE_SKBTCINDEX) += act_meta_skbtcindex.o obj-$(CONFIG_NET_ACT_TUNNEL_KEY)+= act_tunnel_key.o obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o diff --git a/net/sched/act_meta_skbtcindex.c b/net/sched/act_meta_skbtcindex.c new file mode 100644 index 000000000000..3b35774ce890 --- /dev/null +++ b/net/sched/act_meta_skbtcindex.c @@ -0,0 +1,79 @@ +/* + * net/sched/act_meta_tc_index.c IFE skb->tc_index metadata module + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * copyright Jamal Hadi Salim (2016) + * +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int skbtcindex_encode(struct sk_buff *skb, void *skbdata, + struct tcf_meta_info *e) +{ + u32 ifetc_index = skb->tc_index; + + return ife_encode_meta_u16(ifetc_index, skbdata, e); +} + +static int skbtcindex_decode(struct sk_buff *skb, void *data, u16 len) +{ + u16 ifetc_index = *(u16 *)data; + + skb->tc_index = ntohs(ifetc_index); + return 0; +} + +static int skbtcindex_check(struct sk_buff *skb, struct tcf_meta_info *e) +{ + return ife_check_meta_u16(skb->tc_index, e); +} + +static struct tcf_meta_ops ife_skbtcindex_ops = { + .metaid = IFE_META_TCINDEX, + .metatype = NLA_U16, + .name = "tc_index", + .synopsis = "skb tc_index 16 bit metadata", + .check_presence = skbtcindex_check, + .encode = skbtcindex_encode, + .decode = skbtcindex_decode, + .get = ife_get_meta_u16, + .alloc = ife_alloc_meta_u16, + .release = ife_release_meta_gen, + .validate = ife_validate_meta_u16, + .owner = THIS_MODULE, +}; + +static int __init ifetc_index_init_module(void) +{ + return register_ife_op(&ife_skbtcindex_ops); +} + +static void __exit ifetc_index_cleanup_module(void) +{ + unregister_ife_op(&ife_skbtcindex_ops); +} + +module_init(ifetc_index_init_module); +module_exit(ifetc_index_cleanup_module); + +MODULE_AUTHOR("Jamal Hadi Salim(2016)"); +MODULE_DESCRIPTION("Inter-FE skb tc_index metadata module"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_IFE_META(IFE_META_SKBTCINDEX); From f71b109f1730902b73f70d78764d8a41265080dd Mon Sep 17 00:00:00 2001 From: Roman Mashak Date: Sun, 18 Sep 2016 07:53:08 -0400 Subject: [PATCH 1264/1715] net sched actions police: peg drop stats for conforming traffic setting conforming action to drop is a valid policy. When it is set we need to at least see the stats indicating it for debugging. Signed-off-by: Roman Mashak Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_police.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 8a3be1d99775..ba7074b391ae 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -249,6 +249,8 @@ static int tcf_act_police(struct sk_buff *skb, const struct tc_action *a, police->tcfp_t_c = now; police->tcfp_toks = toks; police->tcfp_ptoks = ptoks; + if (police->tcfp_result == TC_ACT_SHOT) + police->tcf_qstats.drops++; spin_unlock(&police->tcf_lock); return police->tcfp_result; } From 5a7a5555a362f60350668cd124df9a396f546c61 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Sun, 18 Sep 2016 08:45:33 -0400 Subject: [PATCH 1265/1715] net sched: stylistic cleanups Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_api.c | 16 ++++++---------- net/sched/act_csum.c | 36 ++++++++++++++++++------------------ net/sched/act_gact.c | 3 ++- net/sched/act_mirred.c | 3 ++- net/sched/act_police.c | 10 ++++------ net/sched/cls_api.c | 18 ++++++++++-------- net/sched/cls_bpf.c | 6 ++++-- net/sched/cls_flow.c | 21 ++++++++++++++------- net/sched/cls_flower.c | 3 ++- net/sched/cls_fw.c | 10 +++++----- net/sched/cls_route.c | 9 +++------ net/sched/cls_tcindex.c | 12 ++++++------ net/sched/cls_u32.c | 30 ++++++++++++------------------ net/sched/sch_api.c | 41 ++++++++++++++++++++++++++--------------- 14 files changed, 114 insertions(+), 104 deletions(-) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index d09d0687594b..d0aceb1740b1 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -592,9 +592,8 @@ err_out: return ERR_PTR(err); } -int tcf_action_init(struct net *net, struct nlattr *nla, - struct nlattr *est, char *name, int ovr, - int bind, struct list_head *actions) +int tcf_action_init(struct net *net, struct nlattr *nla, struct nlattr *est, + char *name, int ovr, int bind, struct list_head *actions) { struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; struct tc_action *act; @@ -923,9 +922,8 @@ tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions, return err; } -static int -tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n, - u32 portid, int ovr) +static int tcf_action_add(struct net *net, struct nlattr *nla, + struct nlmsghdr *n, u32 portid, int ovr) { int ret = 0; LIST_HEAD(actions); @@ -988,8 +986,7 @@ replay: return ret; } -static struct nlattr * -find_dump_kind(const struct nlmsghdr *n) +static struct nlattr *find_dump_kind(const struct nlmsghdr *n) { struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1]; struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; @@ -1016,8 +1013,7 @@ find_dump_kind(const struct nlmsghdr *n) return kind; } -static int -tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) +static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); struct nlmsghdr *nlh; diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index b5dbf633a863..e0defcef376d 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -116,8 +116,8 @@ static void *tcf_csum_skb_nextlayer(struct sk_buff *skb, return (void *)(skb_network_header(skb) + ihl); } -static int tcf_csum_ipv4_icmp(struct sk_buff *skb, - unsigned int ihl, unsigned int ipl) +static int tcf_csum_ipv4_icmp(struct sk_buff *skb, unsigned int ihl, + unsigned int ipl) { struct icmphdr *icmph; @@ -152,8 +152,8 @@ static int tcf_csum_ipv4_igmp(struct sk_buff *skb, return 1; } -static int tcf_csum_ipv6_icmp(struct sk_buff *skb, - unsigned int ihl, unsigned int ipl) +static int tcf_csum_ipv6_icmp(struct sk_buff *skb, unsigned int ihl, + unsigned int ipl) { struct icmp6hdr *icmp6h; const struct ipv6hdr *ip6h; @@ -174,8 +174,8 @@ static int tcf_csum_ipv6_icmp(struct sk_buff *skb, return 1; } -static int tcf_csum_ipv4_tcp(struct sk_buff *skb, - unsigned int ihl, unsigned int ipl) +static int tcf_csum_ipv4_tcp(struct sk_buff *skb, unsigned int ihl, + unsigned int ipl) { struct tcphdr *tcph; const struct iphdr *iph; @@ -195,8 +195,8 @@ static int tcf_csum_ipv4_tcp(struct sk_buff *skb, return 1; } -static int tcf_csum_ipv6_tcp(struct sk_buff *skb, - unsigned int ihl, unsigned int ipl) +static int tcf_csum_ipv6_tcp(struct sk_buff *skb, unsigned int ihl, + unsigned int ipl) { struct tcphdr *tcph; const struct ipv6hdr *ip6h; @@ -217,8 +217,8 @@ static int tcf_csum_ipv6_tcp(struct sk_buff *skb, return 1; } -static int tcf_csum_ipv4_udp(struct sk_buff *skb, - unsigned int ihl, unsigned int ipl, int udplite) +static int tcf_csum_ipv4_udp(struct sk_buff *skb, unsigned int ihl, + unsigned int ipl, int udplite) { struct udphdr *udph; const struct iphdr *iph; @@ -270,8 +270,8 @@ ignore_obscure_skb: return 1; } -static int tcf_csum_ipv6_udp(struct sk_buff *skb, - unsigned int ihl, unsigned int ipl, int udplite) +static int tcf_csum_ipv6_udp(struct sk_buff *skb, unsigned int ihl, + unsigned int ipl, int udplite) { struct udphdr *udph; const struct ipv6hdr *ip6h; @@ -380,8 +380,8 @@ fail: return 0; } -static int tcf_csum_ipv6_hopopts(struct ipv6_opt_hdr *ip6xh, - unsigned int ixhl, unsigned int *pl) +static int tcf_csum_ipv6_hopopts(struct ipv6_opt_hdr *ip6xh, unsigned int ixhl, + unsigned int *pl) { int off, len, optlen; unsigned char *xh = (void *)ip6xh; @@ -494,8 +494,8 @@ fail: return 0; } -static int tcf_csum(struct sk_buff *skb, - const struct tc_action *a, struct tcf_result *res) +static int tcf_csum(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) { struct tcf_csum *p = to_tcf_csum(a); int action; @@ -531,8 +531,8 @@ drop: return TC_ACT_SHOT; } -static int tcf_csum_dump(struct sk_buff *skb, - struct tc_action *a, int bind, int ref) +static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind, + int ref) { unsigned char *b = skb_tail_pointer(skb); struct tcf_csum *p = to_tcf_csum(a); diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index e24a4093d6f6..e0aa30f83c6c 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -156,7 +156,8 @@ static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u32 packets, int action = READ_ONCE(gact->tcf_action); struct tcf_t *tm = &gact->tcf_tm; - _bstats_cpu_update(this_cpu_ptr(gact->common.cpu_bstats), bytes, packets); + _bstats_cpu_update(this_cpu_ptr(gact->common.cpu_bstats), bytes, + packets); if (action == TC_ACT_SHOT) this_cpu_ptr(gact->common.cpu_qstats)->drops += packets; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 6038c85d92f5..1c76387c5d9c 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -204,7 +204,8 @@ out: return retval; } -static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) +static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, + int ref) { unsigned char *b = skb_tail_pointer(skb); struct tcf_mirred *m = to_mirred(a); diff --git a/net/sched/act_police.c b/net/sched/act_police.c index ba7074b391ae..d1bd248fe146 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -263,8 +263,8 @@ static int tcf_act_police(struct sk_buff *skb, const struct tc_action *a, return police->tcf_action; } -static int -tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) +static int tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, + int bind, int ref) { unsigned char *b = skb_tail_pointer(skb); struct tcf_police *police = to_police(a); @@ -349,14 +349,12 @@ static struct pernet_operations police_net_ops = { .size = sizeof(struct tc_action_net), }; -static int __init -police_init_module(void) +static int __init police_init_module(void) { return tcf_register_action(&act_police_ops, &police_net_ops); } -static void __exit -police_cleanup_module(void) +static void __exit police_cleanup_module(void) { tcf_unregister_action(&act_police_ops, &police_net_ops); } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index a7c5645373af..11da7da0b7c4 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -344,13 +344,15 @@ replay: if (err == 0) { struct tcf_proto *next = rtnl_dereference(tp->next); - tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER); + tfilter_notify(net, skb, n, tp, fh, + RTM_DELTFILTER); if (tcf_destroy(tp, false)) RCU_INIT_POINTER(*back, next); } goto errout; case RTM_GETTFILTER: - err = tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER); + err = tfilter_notify(net, skb, n, tp, fh, + RTM_NEWTFILTER); goto errout; default: err = -EINVAL; @@ -448,7 +450,8 @@ static int tcf_node_dump(struct tcf_proto *tp, unsigned long n, struct net *net = sock_net(a->skb->sk); return tcf_fill_node(net, a->skb, tp, n, NETLINK_CB(a->cb->skb).portid, - a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER); + a->cb->nlh->nlmsg_seq, NLM_F_MULTI, + RTM_NEWTFILTER); } /* called with RTNL */ @@ -552,7 +555,7 @@ void tcf_exts_destroy(struct tcf_exts *exts) EXPORT_SYMBOL(tcf_exts_destroy); int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, - struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr) + struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr) { #ifdef CONFIG_NET_CLS_ACT { @@ -560,8 +563,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, if (exts->police && tb[exts->police]) { act = tcf_action_init_1(net, tb[exts->police], rate_tlv, - "police", ovr, - TCA_ACT_BIND); + "police", ovr, TCA_ACT_BIND); if (IS_ERR(act)) return PTR_ERR(act); @@ -573,8 +575,8 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, int err, i = 0; err = tcf_action_init(net, tb[exts->action], rate_tlv, - NULL, ovr, - TCA_ACT_BIND, &actions); + NULL, ovr, TCA_ACT_BIND, + &actions); if (err) return err; list_for_each_entry(act, &actions, list) diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 1d92d4d3f222..c6f7a47541eb 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -55,7 +55,8 @@ static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = { [TCA_BPF_CLASSID] = { .type = NLA_U32 }, [TCA_BPF_FLAGS] = { .type = NLA_U32 }, [TCA_BPF_FD] = { .type = NLA_U32 }, - [TCA_BPF_NAME] = { .type = NLA_NUL_STRING, .len = CLS_BPF_NAME_LEN }, + [TCA_BPF_NAME] = { .type = NLA_NUL_STRING, + .len = CLS_BPF_NAME_LEN }, [TCA_BPF_OPS_LEN] = { .type = NLA_U16 }, [TCA_BPF_OPS] = { .type = NLA_BINARY, .len = sizeof(struct sock_filter) * BPF_MAXINSNS }, @@ -409,7 +410,8 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, goto errout; } - ret = cls_bpf_modify_existing(net, tp, prog, base, tb, tca[TCA_RATE], ovr); + ret = cls_bpf_modify_existing(net, tp, prog, base, tb, tca[TCA_RATE], + ovr); if (ret < 0) goto errout; diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index a379bae1d74e..e39672394c7b 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -87,12 +87,14 @@ static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow) return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb); } -static u32 flow_get_proto(const struct sk_buff *skb, const struct flow_keys *flow) +static u32 flow_get_proto(const struct sk_buff *skb, + const struct flow_keys *flow) { return flow->basic.ip_proto; } -static u32 flow_get_proto_src(const struct sk_buff *skb, const struct flow_keys *flow) +static u32 flow_get_proto_src(const struct sk_buff *skb, + const struct flow_keys *flow) { if (flow->ports.ports) return ntohs(flow->ports.src); @@ -100,7 +102,8 @@ static u32 flow_get_proto_src(const struct sk_buff *skb, const struct flow_keys return addr_fold(skb->sk); } -static u32 flow_get_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow) +static u32 flow_get_proto_dst(const struct sk_buff *skb, + const struct flow_keys *flow) { if (flow->ports.ports) return ntohs(flow->ports.dst); @@ -149,7 +152,8 @@ static u32 flow_get_nfct(const struct sk_buff *skb) }) #endif -static u32 flow_get_nfct_src(const struct sk_buff *skb, const struct flow_keys *flow) +static u32 flow_get_nfct_src(const struct sk_buff *skb, + const struct flow_keys *flow) { switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): @@ -161,7 +165,8 @@ fallback: return flow_get_src(skb, flow); } -static u32 flow_get_nfct_dst(const struct sk_buff *skb, const struct flow_keys *flow) +static u32 flow_get_nfct_dst(const struct sk_buff *skb, + const struct flow_keys *flow) { switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): @@ -173,14 +178,16 @@ fallback: return flow_get_dst(skb, flow); } -static u32 flow_get_nfct_proto_src(const struct sk_buff *skb, const struct flow_keys *flow) +static u32 flow_get_nfct_proto_src(const struct sk_buff *skb, + const struct flow_keys *flow) { return ntohs(CTTUPLE(skb, src.u.all)); fallback: return flow_get_proto_src(skb, flow); } -static u32 flow_get_nfct_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow) +static u32 flow_get_nfct_proto_dst(const struct sk_buff *skb, + const struct flow_keys *flow) { return ntohs(CTTUPLE(skb, dst.u.all)); fallback: diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index a3f4c706dfaa..2af09c872a1a 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -241,7 +241,8 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, tc.type = TC_SETUP_CLSFLOWER; tc.cls_flower = &offload; - err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc); + err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, + &tc); if (tc_skip_sw(flags)) return err; diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index cc0bda945800..9dc63d54e167 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -57,7 +57,7 @@ static u32 fw_hash(u32 handle) } static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp, - struct tcf_result *res) + struct tcf_result *res) { struct fw_head *head = rcu_dereference_bh(tp->root); struct fw_filter *f; @@ -188,7 +188,8 @@ static const struct nla_policy fw_policy[TCA_FW_MAX + 1] = { static int fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f, - struct nlattr **tb, struct nlattr **tca, unsigned long base, bool ovr) + struct nlattr **tb, struct nlattr **tca, unsigned long base, + bool ovr) { struct fw_head *head = rtnl_dereference(tp->root); struct tcf_exts e; @@ -237,9 +238,8 @@ errout: static int fw_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, - u32 handle, - struct nlattr **tca, - unsigned long *arg, bool ovr) + u32 handle, struct nlattr **tca, unsigned long *arg, + bool ovr) { struct fw_head *head = rtnl_dereference(tp->root); struct fw_filter *f = (struct fw_filter *) *arg; diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index c91e65d81a48..a4ce39b19be0 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -268,8 +268,7 @@ static int route4_init(struct tcf_proto *tp) return 0; } -static void -route4_delete_filter(struct rcu_head *head) +static void route4_delete_filter(struct rcu_head *head) { struct route4_filter *f = container_of(head, struct route4_filter, rcu); @@ -474,10 +473,8 @@ errout: } static int route4_change(struct net *net, struct sk_buff *in_skb, - struct tcf_proto *tp, unsigned long base, - u32 handle, - struct nlattr **tca, - unsigned long *arg, bool ovr) + struct tcf_proto *tp, unsigned long base, u32 handle, + struct nlattr **tca, unsigned long *arg, bool ovr) { struct route4_head *head = rtnl_dereference(tp->root); struct route4_filter __rcu **fp; diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index d9500709831f..96144bdf30db 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -50,14 +50,13 @@ struct tcindex_data { struct rcu_head rcu; }; -static inline int -tcindex_filter_is_set(struct tcindex_filter_result *r) +static inline int tcindex_filter_is_set(struct tcindex_filter_result *r) { return tcf_exts_is_predicative(&r->exts) || r->res.classid; } -static struct tcindex_filter_result * -tcindex_lookup(struct tcindex_data *p, u16 key) +static struct tcindex_filter_result *tcindex_lookup(struct tcindex_data *p, + u16 key) { if (p->perfect) { struct tcindex_filter_result *f = p->perfect + key; @@ -144,7 +143,8 @@ static void tcindex_destroy_rexts(struct rcu_head *head) static void tcindex_destroy_fexts(struct rcu_head *head) { - struct tcindex_filter *f = container_of(head, struct tcindex_filter, rcu); + struct tcindex_filter *f = container_of(head, struct tcindex_filter, + rcu); tcf_exts_destroy(&f->result.exts); kfree(f); @@ -550,7 +550,7 @@ static bool tcindex_destroy(struct tcf_proto *tp, bool force) static int tcindex_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t) { struct tcindex_data *p = rtnl_dereference(tp->root); struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index a29263a9d8c1..ae83c3aec308 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -104,7 +104,8 @@ static inline unsigned int u32_hash_fold(__be32 key, return h; } -static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) +static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp, + struct tcf_result *res) { struct { struct tc_u_knode *knode; @@ -256,8 +257,7 @@ deadloop: return -1; } -static struct tc_u_hnode * -u32_lookup_ht(struct tc_u_common *tp_c, u32 handle) +static struct tc_u_hnode *u32_lookup_ht(struct tc_u_common *tp_c, u32 handle) { struct tc_u_hnode *ht; @@ -270,8 +270,7 @@ u32_lookup_ht(struct tc_u_common *tp_c, u32 handle) return ht; } -static struct tc_u_knode * -u32_lookup_key(struct tc_u_hnode *ht, u32 handle) +static struct tc_u_knode *u32_lookup_key(struct tc_u_hnode *ht, u32 handle) { unsigned int sel; struct tc_u_knode *n = NULL; @@ -360,8 +359,7 @@ static int u32_init(struct tcf_proto *tp) return 0; } -static int u32_destroy_key(struct tcf_proto *tp, - struct tc_u_knode *n, +static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n, bool free_pf) { tcf_exts_destroy(&n->exts); @@ -448,9 +446,8 @@ static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle) } } -static int u32_replace_hw_hnode(struct tcf_proto *tp, - struct tc_u_hnode *h, - u32 flags) +static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, + u32 flags) { struct net_device *dev = tp->q->dev_queue->dev; struct tc_cls_u32_offload u32_offload = {0}; @@ -496,9 +493,8 @@ static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h) } } -static int u32_replace_hw_knode(struct tcf_proto *tp, - struct tc_u_knode *n, - u32 flags) +static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, + u32 flags) { struct net_device *dev = tp->q->dev_queue->dev; struct tc_cls_u32_offload u32_offload = {0}; @@ -763,8 +759,7 @@ errout: return err; } -static void u32_replace_knode(struct tcf_proto *tp, - struct tc_u_common *tp_c, +static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c, struct tc_u_knode *n) { struct tc_u_knode __rcu **ins; @@ -845,8 +840,7 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp, static int u32_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, - unsigned long *arg, bool ovr) + struct nlattr **tca, unsigned long *arg, bool ovr) { struct tc_u_common *tp_c = tp->data; struct tc_u_hnode *ht; @@ -1088,7 +1082,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg) } static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t) { struct tc_u_knode *n = (struct tc_u_knode *)fh; struct tc_u_hnode *ht_up, *ht_down; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index d677b3484d81..206dc24add3a 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -389,7 +389,8 @@ static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab) static struct qdisc_rate_table *qdisc_rtab_list; -struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab) +struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, + struct nlattr *tab) { struct qdisc_rate_table *rtab; @@ -541,7 +542,8 @@ nla_put_failure: return -1; } -void __qdisc_calculate_pkt_len(struct sk_buff *skb, const struct qdisc_size_table *stab) +void __qdisc_calculate_pkt_len(struct sk_buff *skb, + const struct qdisc_size_table *stab) { int pkt_len, slot; @@ -888,10 +890,10 @@ static struct lock_class_key qdisc_rx_lock; Parameters are passed via opt. */ -static struct Qdisc * -qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, - struct Qdisc *p, u32 parent, u32 handle, - struct nlattr **tca, int *errp) +static struct Qdisc *qdisc_create(struct net_device *dev, + struct netdev_queue *dev_queue, + struct Qdisc *p, u32 parent, u32 handle, + struct nlattr **tca, int *errp) { int err; struct nlattr *kind = tca[TCA_KIND]; @@ -1073,7 +1075,8 @@ struct check_loop_arg { int depth; }; -static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w); +static int check_loop_fn(struct Qdisc *q, unsigned long cl, + struct qdisc_walker *w); static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth) { @@ -1450,7 +1453,8 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, } else { if (!tc_qdisc_dump_ignore(q) && tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) + cb->nlh->nlmsg_seq, NLM_F_MULTI, + RTM_NEWQDISC) <= 0) goto done; q_idx++; } @@ -1471,7 +1475,8 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, } if (!tc_qdisc_dump_ignore(q) && tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) + cb->nlh->nlmsg_seq, NLM_F_MULTI, + RTM_NEWQDISC) <= 0) goto done; q_idx++; } @@ -1505,7 +1510,8 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) s_q_idx = 0; q_idx = 0; - if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx, true) < 0) + if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx, + true) < 0) goto done; dev_queue = dev_ingress_queue(dev); @@ -1640,7 +1646,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n) if (cops->delete) err = cops->delete(q, cl); if (err == 0) - tclass_notify(net, skb, n, q, cl, RTM_DELTCLASS); + tclass_notify(net, skb, n, q, cl, + RTM_DELTCLASS); goto out; case RTM_GETTCLASS: err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS); @@ -1738,12 +1745,14 @@ struct qdisc_dump_args { struct netlink_callback *cb; }; -static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg) +static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, + struct qdisc_walker *arg) { struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg; return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid, - a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS); + a->cb->nlh->nlmsg_seq, NLM_F_MULTI, + RTM_NEWTCLASS); } static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb, @@ -1976,10 +1985,12 @@ static int __init pktsched_init(void) rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, NULL); rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, NULL); - rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc, NULL); + rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc, + NULL); rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, NULL); rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, NULL); - rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass, NULL); + rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass, + NULL); return 0; } From 878786d95e07ce2f5fb6e3cd8a6c2ed320339196 Mon Sep 17 00:00:00 2001 From: Rob Swindell Date: Tue, 20 Sep 2016 03:36:33 -0400 Subject: [PATCH 1266/1715] bnxt_en: Fix build error for kernesl without RTC-LIB bnxt_hwrm_fw_set_time() now returns -EOPNOTSUPP when built for kernel without RTC_LIB. Setting the firmware time is not critical to the successful completion of the firmware update process. Signed-off-by: Rob Swindell Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index cbc0b8ad916c..a9f9f3738022 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4323,6 +4323,7 @@ hwrm_ver_get_exit: int bnxt_hwrm_fw_set_time(struct bnxt *bp) { +#if IS_ENABLED(CONFIG_RTC_LIB) struct hwrm_fw_set_time_input req = {0}; struct rtc_time tm; struct timeval tv; @@ -4340,6 +4341,9 @@ int bnxt_hwrm_fw_set_time(struct bnxt *bp) req.minute = tm.tm_min; req.second = tm.tm_sec; return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); +#else + return -EOPNOTSUPP; +#endif } static int bnxt_hwrm_port_qstats(struct bnxt *bp) From 18c2d2c113eb330d260277350d09aae454e80177 Mon Sep 17 00:00:00 2001 From: Elad Raz Date: Mon, 19 Sep 2016 08:28:24 +0200 Subject: [PATCH 1267/1715] mlxsw: Change the RX LAG hash function from XOR to CRC Change the RX hash function from XOR to CRC in order to have better distribution of the traffic. Signed-off-by: Elad Raz Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 4e2354ca0e4a..6460c7256f2b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -1392,7 +1392,7 @@ static inline void mlxsw_reg_slcr_pack(char *payload, u16 lag_hash) { MLXSW_REG_ZERO(slcr, payload); mlxsw_reg_slcr_pp_set(payload, MLXSW_REG_SLCR_PP_GLOBAL); - mlxsw_reg_slcr_type_set(payload, MLXSW_REG_SLCR_TYPE_XOR); + mlxsw_reg_slcr_type_set(payload, MLXSW_REG_SLCR_TYPE_CRC); mlxsw_reg_slcr_lag_hash_set(payload, lag_hash); } From 1a9234e66eddb0f18447532ab3f12bd136473ed6 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 19 Sep 2016 08:29:26 +0200 Subject: [PATCH 1268/1715] mlxsw: spectrum: Fix sparse warnings drivers/net/ethernet/mellanox/mlxsw//spectrum.c:251:28: warning: symbol 'mlxsw_sp_span_entry_find' was not declared. Should it be static? drivers/net/ethernet/mellanox/mlxsw//spectrum.c:265:28: warning: symbol 'mlxsw_sp_span_entry_get' was not declared. Should it be static? drivers/net/ethernet/mellanox/mlxsw//spectrum.c:367:56: warning: mixing different enum types drivers/net/ethernet/mellanox/mlxsw//spectrum.c:367:56: int enum mlxsw_sp_span_type versus drivers/net/ethernet/mellanox/mlxsw//spectrum.c:367:56: int enum mlxsw_reg_mpar_i_e ... drivers/net/ethernet/mellanox/mlxsw//spectrum_buffers.c:598:32: warning: mixing different enum types drivers/net/ethernet/mellanox/mlxsw//spectrum_buffers.c:598:32: int enum mlxsw_reg_sbxx_dir versus drivers/net/ethernet/mellanox/mlxsw//spectrum_buffers.c:598:32: int enum devlink_sb_pool_type drivers/net/ethernet/mellanox/mlxsw//spectrum_buffers.c:600:39: warning: mixing different enum types drivers/net/ethernet/mellanox/mlxsw//spectrum_buffers.c:600:39: int enum mlxsw_reg_sbpr_mode versus drivers/net/ethernet/mellanox/mlxsw//spectrum_buffers.c:600:39: int enum devlink_sb_threshold_type ... drivers/net/ethernet/mellanox/mlxsw//spectrum_router.c:255:54: warning: mixing different enum types drivers/net/ethernet/mellanox/mlxsw//spectrum_router.c:255:54: int enum mlxsw_sp_l3proto versus drivers/net/ethernet/mellanox/mlxsw//spectrum_router.c:255:54: int enum mlxsw_reg_ralxx_protocol ... drivers/net/ethernet/mellanox/mlxsw//spectrum_router.c:1749:6: warning: symbol 'mlxsw_sp_fib_entry_put' was not declared. Should it be static? Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.c | 12 ++++-- .../mellanox/mlxsw/spectrum_buffers.c | 15 +++---- .../ethernet/mellanox/mlxsw/spectrum_router.c | 41 ++++++++++++------- 3 files changed, 42 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 171f8dd19efa..fa312610c2d2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -248,7 +248,8 @@ static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp *mlxsw_sp, span_entry->used = false; } -struct mlxsw_sp_span_entry *mlxsw_sp_span_entry_find(struct mlxsw_sp_port *port) +static struct mlxsw_sp_span_entry * +mlxsw_sp_span_entry_find(struct mlxsw_sp_port *port) { struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; int i; @@ -262,7 +263,8 @@ struct mlxsw_sp_span_entry *mlxsw_sp_span_entry_find(struct mlxsw_sp_port *port) return NULL; } -struct mlxsw_sp_span_entry *mlxsw_sp_span_entry_get(struct mlxsw_sp_port *port) +static struct mlxsw_sp_span_entry +*mlxsw_sp_span_entry_get(struct mlxsw_sp_port *port) { struct mlxsw_sp_span_entry *span_entry; @@ -364,7 +366,8 @@ mlxsw_sp_span_inspected_port_bind(struct mlxsw_sp_port *port, } /* bind the port to the SPAN entry */ - mlxsw_reg_mpar_pack(mpar_pl, port->local_port, type, true, pa_id); + mlxsw_reg_mpar_pack(mpar_pl, port->local_port, + (enum mlxsw_reg_mpar_i_e) type, true, pa_id); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpar), mpar_pl); if (err) goto err_mpar_reg_write; @@ -405,7 +408,8 @@ mlxsw_sp_span_inspected_port_unbind(struct mlxsw_sp_port *port, return; /* remove the inspected port */ - mlxsw_reg_mpar_pack(mpar_pl, port->local_port, type, false, pa_id); + mlxsw_reg_mpar_pack(mpar_pl, port->local_port, + (enum mlxsw_reg_mpar_i_e) type, false, pa_id); mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpar), mpar_pl); /* remove the SBIB buffer if it was egress SPAN */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index 953b214f38d0..bcaed8a38037 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -595,9 +595,9 @@ int mlxsw_sp_sb_pool_get(struct mlxsw_core *mlxsw_core, enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index); struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool, dir); - pool_info->pool_type = dir; + pool_info->pool_type = (enum devlink_sb_pool_type) dir; pool_info->size = MLXSW_SP_CELLS_TO_BYTES(pr->size); - pool_info->threshold_type = pr->mode; + pool_info->threshold_type = (enum devlink_sb_threshold_type) pr->mode; return 0; } @@ -608,9 +608,10 @@ int mlxsw_sp_sb_pool_set(struct mlxsw_core *mlxsw_core, struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); u8 pool = pool_get(pool_index); enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index); - enum mlxsw_reg_sbpr_mode mode = threshold_type; u32 pool_size = MLXSW_SP_BYTES_TO_CELLS(size); + enum mlxsw_reg_sbpr_mode mode; + mode = (enum mlxsw_reg_sbpr_mode) threshold_type; return mlxsw_sp_sb_pr_write(mlxsw_sp, pool, dir, mode, pool_size); } @@ -696,13 +697,13 @@ int mlxsw_sp_sb_tc_pool_bind_get(struct mlxsw_core_port *mlxsw_core_port, struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; u8 local_port = mlxsw_sp_port->local_port; u8 pg_buff = tc_index; - enum mlxsw_reg_sbxx_dir dir = pool_type; + enum mlxsw_reg_sbxx_dir dir = (enum mlxsw_reg_sbxx_dir) pool_type; struct mlxsw_sp_sb_cm *cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, pg_buff, dir); *p_threshold = mlxsw_sp_sb_threshold_out(mlxsw_sp, cm->pool, dir, cm->max_buff); - *p_pool_index = pool_index_get(cm->pool, pool_type); + *p_pool_index = pool_index_get(cm->pool, dir); return 0; } @@ -716,7 +717,7 @@ int mlxsw_sp_sb_tc_pool_bind_set(struct mlxsw_core_port *mlxsw_core_port, struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; u8 local_port = mlxsw_sp_port->local_port; u8 pg_buff = tc_index; - enum mlxsw_reg_sbxx_dir dir = pool_type; + enum mlxsw_reg_sbxx_dir dir = (enum mlxsw_reg_sbxx_dir) pool_type; u8 pool = pool_get(pool_index); u32 max_buff; int err; @@ -943,7 +944,7 @@ int mlxsw_sp_sb_occ_tc_port_bind_get(struct mlxsw_core_port *mlxsw_core_port, struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; u8 local_port = mlxsw_sp_port->local_port; u8 pg_buff = tc_index; - enum mlxsw_reg_sbxx_dir dir = pool_type; + enum mlxsw_reg_sbxx_dir dir = (enum mlxsw_reg_sbxx_dir) pool_type; struct mlxsw_sp_sb_cm *cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, pg_buff, dir); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 3f5c51da6d3e..4afb49854eec 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -252,7 +252,9 @@ static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp, { char ralta_pl[MLXSW_REG_RALTA_LEN]; - mlxsw_reg_ralta_pack(ralta_pl, true, lpm_tree->proto, lpm_tree->id); + mlxsw_reg_ralta_pack(ralta_pl, true, + (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto, + lpm_tree->id); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); } @@ -261,7 +263,9 @@ static int mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp, { char ralta_pl[MLXSW_REG_RALTA_LEN]; - mlxsw_reg_ralta_pack(ralta_pl, false, lpm_tree->proto, lpm_tree->id); + mlxsw_reg_ralta_pack(ralta_pl, false, + (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto, + lpm_tree->id); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); } @@ -384,7 +388,9 @@ static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp, { char raltb_pl[MLXSW_REG_RALTB_LEN]; - mlxsw_reg_raltb_pack(raltb_pl, vr->id, vr->proto, vr->lpm_tree->id); + mlxsw_reg_raltb_pack(raltb_pl, vr->id, + (enum mlxsw_reg_ralxx_protocol) vr->proto, + vr->lpm_tree->id); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); } @@ -394,7 +400,8 @@ static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp, char raltb_pl[MLXSW_REG_RALTB_LEN]; /* Bind to tree 0 which is default */ - mlxsw_reg_raltb_pack(raltb_pl, vr->id, vr->proto, 0); + mlxsw_reg_raltb_pack(raltb_pl, vr->id, + (enum mlxsw_reg_ralxx_protocol) vr->proto, 0); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); } @@ -1081,9 +1088,10 @@ static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp, { char raleu_pl[MLXSW_REG_RALEU_LEN]; - mlxsw_reg_raleu_pack(raleu_pl, vr->proto, vr->id, - adj_index, ecmp_size, - new_adj_index, new_ecmp_size); + mlxsw_reg_raleu_pack(raleu_pl, + (enum mlxsw_reg_ralxx_protocol) vr->proto, vr->id, + adj_index, ecmp_size, new_adj_index, + new_ecmp_size); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl); } @@ -1558,8 +1566,9 @@ static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp, trap_id = MLXSW_TRAP_ID_RTR_INGRESS0; } - mlxsw_reg_ralue_pack4(ralue_pl, vr->proto, op, vr->id, - fib_entry->key.prefix_len, *p_dip); + mlxsw_reg_ralue_pack4(ralue_pl, + (enum mlxsw_reg_ralxx_protocol) vr->proto, op, + vr->id, fib_entry->key.prefix_len, *p_dip); mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id, adjacency_index, ecmp_size); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); @@ -1573,8 +1582,9 @@ static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp, u32 *p_dip = (u32 *) fib_entry->key.addr; struct mlxsw_sp_vr *vr = fib_entry->vr; - mlxsw_reg_ralue_pack4(ralue_pl, vr->proto, op, vr->id, - fib_entry->key.prefix_len, *p_dip); + mlxsw_reg_ralue_pack4(ralue_pl, + (enum mlxsw_reg_ralxx_protocol) vr->proto, op, + vr->id, fib_entry->key.prefix_len, *p_dip); mlxsw_reg_ralue_act_local_pack(ralue_pl, MLXSW_REG_RALUE_TRAP_ACTION_NOP, 0, fib_entry->rif); @@ -1589,8 +1599,9 @@ static int mlxsw_sp_fib_entry_op4_trap(struct mlxsw_sp *mlxsw_sp, u32 *p_dip = (u32 *) fib_entry->key.addr; struct mlxsw_sp_vr *vr = fib_entry->vr; - mlxsw_reg_ralue_pack4(ralue_pl, vr->proto, op, vr->id, - fib_entry->key.prefix_len, *p_dip); + mlxsw_reg_ralue_pack4(ralue_pl, + (enum mlxsw_reg_ralxx_protocol) vr->proto, op, + vr->id, fib_entry->key.prefix_len, *p_dip); mlxsw_reg_ralue_act_ip2me_pack(ralue_pl); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); } @@ -1753,8 +1764,8 @@ mlxsw_sp_fib_entry_find(struct mlxsw_sp *mlxsw_sp, fib4->fi->fib_dev); } -void mlxsw_sp_fib_entry_put(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry) +static void mlxsw_sp_fib_entry_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) { struct mlxsw_sp_vr *vr = fib_entry->vr; From 1a21101d21d7ef056dfda1d7b843289e05ecd034 Mon Sep 17 00:00:00 2001 From: Raju Lakkaraju Date: Mon, 19 Sep 2016 15:33:54 +0530 Subject: [PATCH 1269/1715] net: phy: Add MAC-IF driver for Microsemi PHYs. All the review comments updated and resending for review. This is MAC interface feature. Microsemi PHY can support RGMII, RMII or GMII/MII interface between MAC and PHY. MAC-IF function program the right value based on Device tree configuration. Tested on Beaglebone Black with VSC 8531 PHY. Signed-off-by: Raju Lakkaraju Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/mscc.c | 51 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/drivers/net/phy/mscc.c b/drivers/net/phy/mscc.c index c09cc4a3d166..d350debd174a 100644 --- a/drivers/net/phy/mscc.c +++ b/drivers/net/phy/mscc.c @@ -23,6 +23,16 @@ enum rgmii_rx_clock_delay { RGMII_RX_CLK_DELAY_3_4_NS = 7 }; +/* Microsemi VSC85xx PHY registers */ +/* IEEE 802. Std Registers */ +#define MSCC_PHY_EXT_PHY_CNTL_1 23 +#define MAC_IF_SELECTION_MASK 0x1800 +#define MAC_IF_SELECTION_GMII 0 +#define MAC_IF_SELECTION_RMII 1 +#define MAC_IF_SELECTION_RGMII 2 +#define MAC_IF_SELECTION_POS 11 +#define FAR_END_LOOPBACK_MODE_MASK 0x0008 + #define MII_VSC85XX_INT_MASK 25 #define MII_VSC85XX_INT_MASK_MASK 0xa000 #define MII_VSC85XX_INT_STATUS 26 @@ -48,6 +58,42 @@ static int vsc85xx_phy_page_set(struct phy_device *phydev, u8 page) return rc; } +static int vsc85xx_mac_if_set(struct phy_device *phydev, + phy_interface_t interface) +{ + int rc; + u16 reg_val; + + mutex_lock(&phydev->lock); + reg_val = phy_read(phydev, MSCC_PHY_EXT_PHY_CNTL_1); + reg_val &= ~(MAC_IF_SELECTION_MASK); + switch (interface) { + case PHY_INTERFACE_MODE_RGMII: + reg_val |= (MAC_IF_SELECTION_RGMII << MAC_IF_SELECTION_POS); + break; + case PHY_INTERFACE_MODE_RMII: + reg_val |= (MAC_IF_SELECTION_RMII << MAC_IF_SELECTION_POS); + break; + case PHY_INTERFACE_MODE_MII: + case PHY_INTERFACE_MODE_GMII: + reg_val |= (MAC_IF_SELECTION_GMII << MAC_IF_SELECTION_POS); + break; + default: + rc = -EINVAL; + goto out_unlock; + } + rc = phy_write(phydev, MSCC_PHY_EXT_PHY_CNTL_1, reg_val); + if (rc != 0) + goto out_unlock; + + rc = genphy_soft_reset(phydev); + +out_unlock: + mutex_unlock(&phydev->lock); + + return rc; +} + static int vsc85xx_default_config(struct phy_device *phydev) { int rc; @@ -77,6 +123,11 @@ static int vsc85xx_config_init(struct phy_device *phydev) rc = vsc85xx_default_config(phydev); if (rc) return rc; + + rc = vsc85xx_mac_if_set(phydev, phydev->interface); + if (rc) + return rc; + rc = genphy_config_init(phydev); return rc; From fd07160bb7180cdd0afeb089d8cdfd66002f17e6 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 19 Sep 2016 12:53:40 +0200 Subject: [PATCH 1270/1715] xen-netfront: avoid packet loss when ethernet header crosses page boundary Small packet loss is reported on complex multi host network configurations including tunnels, NAT, ... My investigation led me to the following check in netback which drops packets: if (unlikely(txreq.size < ETH_HLEN)) { netdev_err(queue->vif->dev, "Bad packet size: %d\n", txreq.size); xenvif_tx_err(queue, &txreq, extra_count, idx); break; } But this check itself is legitimate. SKBs consist of a linear part (which has to have the ethernet header) and (optionally) a number of frags. Netfront transmits the head of the linear part up to the page boundary as the first request and all the rest becomes frags so when we're reconstructing the SKB in netback we can't distinguish between original frags and the 'tail' of the linear part. The first SKB needs to be at least ETH_HLEN size. So in case we have an SKB with its linear part starting too close to the page boundary the packet is lost. I see two ways to fix the issue: - Change the 'wire' protocol between netfront and netback to start keeping the original SKB structure. We'll have to add a flag indicating the fact that the particular request is a part of the original linear part and not a frag. We'll need to know the length of the linear part to pre-allocate memory. - Avoid transmitting SKBs with linear parts starting too close to the page boundary. That seems preferable short-term and shouldn't bring significant performance degradation as such packets are rare. That's what this patch is trying to achieve with skb_copy(). Signed-off-by: Vitaly Kuznetsov Acked-by: David Vrabel Signed-off-by: David S. Miller --- drivers/net/xen-netfront.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 96ccd4e943db..e17879dd5d5a 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -565,6 +565,7 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) struct netfront_queue *queue = NULL; unsigned int num_queues = dev->real_num_tx_queues; u16 queue_index; + struct sk_buff *nskb; /* Drop the packet if no queues are set up */ if (num_queues < 1) @@ -593,6 +594,20 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) page = virt_to_page(skb->data); offset = offset_in_page(skb->data); + + /* The first req should be at least ETH_HLEN size or the packet will be + * dropped by netback. + */ + if (unlikely(PAGE_SIZE - offset < ETH_HLEN)) { + nskb = skb_copy(skb, GFP_ATOMIC); + if (!nskb) + goto drop; + dev_kfree_skb_any(skb); + skb = nskb; + page = virt_to_page(skb->data); + offset = offset_in_page(skb->data); + } + len = skb_headlen(skb); spin_lock_irqsave(&queue->tx_lock, flags); From ca26893f05e86497a86732768ec53cd38c0819ca Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 19 Sep 2016 19:00:09 +0800 Subject: [PATCH 1271/1715] rhashtable: Add rhlist interface The insecure_elasticity setting is an ugly wart brought out by users who need to insert duplicate objects (that is, distinct objects with identical keys) into the same table. In fact, those users have a much bigger problem. Once those duplicate objects are inserted, they don't have an interface to find them (unless you count the walker interface which walks over the entire table). Some users have resorted to doing a manual walk over the hash table which is of course broken because they don't handle the potential existence of multiple hash tables. The result is that they will break sporadically when they encounter a hash table resize/rehash. This patch provides a way out for those users, at the expense of an extra pointer per object. Essentially each object is now a list of objects carrying the same key. The hash table will only see the lists so nothing changes as far as rhashtable is concerned. To use this new interface, you need to insert a struct rhlist_head into your objects instead of struct rhash_head. While the hash table is unchanged, for type-safety you'll need to use struct rhltable instead of struct rhashtable. All the existing interfaces have been duplicated for rhlist, including the hash table walker. One missing feature is nulls marking because AFAIK the only potential user of it does not need duplicate objects. Should anyone need this it shouldn't be too hard to add. Signed-off-by: Herbert Xu Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 491 ++++++++++++++++++++++++++++--------- lib/rhashtable.c | 256 +++++++++++++++---- 2 files changed, 582 insertions(+), 165 deletions(-) diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index fd82584acd48..5c132d3188be 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -1,7 +1,7 @@ /* * Resizable, Scalable, Concurrent Hash Table * - * Copyright (c) 2015 Herbert Xu + * Copyright (c) 2015-2016 Herbert Xu * Copyright (c) 2014-2015 Thomas Graf * Copyright (c) 2008-2014 Patrick McHardy * @@ -53,6 +53,11 @@ struct rhash_head { struct rhash_head __rcu *next; }; +struct rhlist_head { + struct rhash_head rhead; + struct rhlist_head __rcu *next; +}; + /** * struct bucket_table - Table of hash buckets * @size: Number of hash buckets @@ -137,6 +142,7 @@ struct rhashtable_params { * @key_len: Key length for hashfn * @elasticity: Maximum chain length before rehash * @p: Configuration parameters + * @rhlist: True if this is an rhltable * @run_work: Deferred worker to expand/shrink asynchronously * @mutex: Mutex to protect current/future table swapping * @lock: Spin lock to protect walker list @@ -147,11 +153,20 @@ struct rhashtable { unsigned int key_len; unsigned int elasticity; struct rhashtable_params p; + bool rhlist; struct work_struct run_work; struct mutex mutex; spinlock_t lock; }; +/** + * struct rhltable - Hash table with duplicate objects in a list + * @ht: Underlying rhtable + */ +struct rhltable { + struct rhashtable ht; +}; + /** * struct rhashtable_walker - Hash table walker * @list: List entry on list of walkers @@ -163,9 +178,10 @@ struct rhashtable_walker { }; /** - * struct rhashtable_iter - Hash table iterator, fits into netlink cb + * struct rhashtable_iter - Hash table iterator * @ht: Table to iterate through * @p: Current pointer + * @list: Current hash list pointer * @walker: Associated rhashtable walker * @slot: Current slot * @skip: Number of entries to skip in slot @@ -173,6 +189,7 @@ struct rhashtable_walker { struct rhashtable_iter { struct rhashtable *ht; struct rhash_head *p; + struct rhlist_head *list; struct rhashtable_walker walker; unsigned int slot; unsigned int skip; @@ -339,13 +356,11 @@ static inline int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, int rhashtable_init(struct rhashtable *ht, const struct rhashtable_params *params); +int rhltable_init(struct rhltable *hlt, + const struct rhashtable_params *params); -struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht, - const void *key, - struct rhash_head *obj, - struct bucket_table *old_tbl, - void **data); -int rhashtable_insert_rehash(struct rhashtable *ht, struct bucket_table *tbl); +void *rhashtable_insert_slow(struct rhashtable *ht, const void *key, + struct rhash_head *obj); void rhashtable_walk_enter(struct rhashtable *ht, struct rhashtable_iter *iter); @@ -507,6 +522,31 @@ void rhashtable_destroy(struct rhashtable *ht); rht_for_each_entry_rcu_continue(tpos, pos, (tbl)->buckets[hash],\ tbl, hash, member) +/** + * rhl_for_each_rcu - iterate over rcu hash table list + * @pos: the &struct rlist_head to use as a loop cursor. + * @list: the head of the list + * + * This hash chain list-traversal primitive should be used on the + * list returned by rhltable_lookup. + */ +#define rhl_for_each_rcu(pos, list) \ + for (pos = list; pos; pos = rcu_dereference_raw(pos->next)) + +/** + * rhl_for_each_entry_rcu - iterate over rcu hash table list of given type + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct rlist_head to use as a loop cursor. + * @list: the head of the list + * @member: name of the &struct rlist_head within the hashable struct. + * + * This hash chain list-traversal primitive should be used on the + * list returned by rhltable_lookup. + */ +#define rhl_for_each_entry_rcu(tpos, pos, list, member) \ + for (pos = list; pos && rht_entry(tpos, pos, member); \ + pos = rcu_dereference_raw(pos->next)) + static inline int rhashtable_compare(struct rhashtable_compare_arg *arg, const void *obj) { @@ -516,18 +556,8 @@ static inline int rhashtable_compare(struct rhashtable_compare_arg *arg, return memcmp(ptr + ht->p.key_offset, arg->key, ht->p.key_len); } -/** - * rhashtable_lookup_fast - search hash table, inlined version - * @ht: hash table - * @key: the pointer to the key - * @params: hash table parameters - * - * Computes the hash value for the key and traverses the bucket chain looking - * for a entry with an identical key. The first matching entry is returned. - * - * Returns the first entry on which the compare function returned true. - */ -static inline void *rhashtable_lookup_fast( +/* Internal function, do not use. */ +static inline struct rhash_head *__rhashtable_lookup( struct rhashtable *ht, const void *key, const struct rhashtable_params params) { @@ -539,8 +569,6 @@ static inline void *rhashtable_lookup_fast( struct rhash_head *he; unsigned int hash; - rcu_read_lock(); - tbl = rht_dereference_rcu(ht->tbl, ht); restart: hash = rht_key_hashfn(ht, tbl, key, params); @@ -549,8 +577,7 @@ restart: params.obj_cmpfn(&arg, rht_obj(ht, he)) : rhashtable_compare(&arg, rht_obj(ht, he))) continue; - rcu_read_unlock(); - return rht_obj(ht, he); + return he; } /* Ensure we see any new tables. */ @@ -559,96 +586,165 @@ restart: tbl = rht_dereference_rcu(tbl->future_tbl, ht); if (unlikely(tbl)) goto restart; - rcu_read_unlock(); return NULL; } +/** + * rhashtable_lookup - search hash table + * @ht: hash table + * @key: the pointer to the key + * @params: hash table parameters + * + * Computes the hash value for the key and traverses the bucket chain looking + * for a entry with an identical key. The first matching entry is returned. + * + * This must only be called under the RCU read lock. + * + * Returns the first entry on which the compare function returned true. + */ +static inline void *rhashtable_lookup( + struct rhashtable *ht, const void *key, + const struct rhashtable_params params) +{ + struct rhash_head *he = __rhashtable_lookup(ht, key, params); + + return he ? rht_obj(ht, he) : NULL; +} + +/** + * rhashtable_lookup_fast - search hash table, without RCU read lock + * @ht: hash table + * @key: the pointer to the key + * @params: hash table parameters + * + * Computes the hash value for the key and traverses the bucket chain looking + * for a entry with an identical key. The first matching entry is returned. + * + * Only use this function when you have other mechanisms guaranteeing + * that the object won't go away after the RCU read lock is released. + * + * Returns the first entry on which the compare function returned true. + */ +static inline void *rhashtable_lookup_fast( + struct rhashtable *ht, const void *key, + const struct rhashtable_params params) +{ + void *obj; + + rcu_read_lock(); + obj = rhashtable_lookup(ht, key, params); + rcu_read_unlock(); + + return obj; +} + +/** + * rhltable_lookup - search hash list table + * @hlt: hash table + * @key: the pointer to the key + * @params: hash table parameters + * + * Computes the hash value for the key and traverses the bucket chain looking + * for a entry with an identical key. All matching entries are returned + * in a list. + * + * This must only be called under the RCU read lock. + * + * Returns the list of entries that match the given key. + */ +static inline struct rhlist_head *rhltable_lookup( + struct rhltable *hlt, const void *key, + const struct rhashtable_params params) +{ + struct rhash_head *he = __rhashtable_lookup(&hlt->ht, key, params); + + return he ? container_of(he, struct rhlist_head, rhead) : NULL; +} + /* Internal function, please use rhashtable_insert_fast() instead. This * function returns the existing element already in hashes in there is a clash, * otherwise it returns an error via ERR_PTR(). */ static inline void *__rhashtable_insert_fast( struct rhashtable *ht, const void *key, struct rhash_head *obj, - const struct rhashtable_params params) + const struct rhashtable_params params, bool rhlist) { struct rhashtable_compare_arg arg = { .ht = ht, .key = key, }; - struct bucket_table *tbl, *new_tbl; + struct rhash_head __rcu **pprev; + struct bucket_table *tbl; struct rhash_head *head; spinlock_t *lock; - unsigned int elasticity; unsigned int hash; - void *data = NULL; - int err; + int elasticity; + void *data; -restart: rcu_read_lock(); tbl = rht_dereference_rcu(ht->tbl, ht); + hash = rht_head_hashfn(ht, tbl, obj, params); + lock = rht_bucket_lock(tbl, hash); + spin_lock_bh(lock); - /* All insertions must grab the oldest table containing - * the hashed bucket that is yet to be rehashed. - */ - for (;;) { - hash = rht_head_hashfn(ht, tbl, obj, params); - lock = rht_bucket_lock(tbl, hash); - spin_lock_bh(lock); - - if (tbl->rehash <= hash) - break; - + if (unlikely(rht_dereference_bucket(tbl->future_tbl, tbl, hash))) { +slow_path: spin_unlock_bh(lock); - tbl = rht_dereference_rcu(tbl->future_tbl, ht); + rcu_read_unlock(); + return rhashtable_insert_slow(ht, key, obj); } - new_tbl = rht_dereference_rcu(tbl->future_tbl, ht); - if (unlikely(new_tbl)) { - tbl = rhashtable_insert_slow(ht, key, obj, new_tbl, &data); - if (!IS_ERR_OR_NULL(tbl)) - goto slow_path; + elasticity = ht->elasticity; + pprev = &tbl->buckets[hash]; + rht_for_each(head, tbl, hash) { + struct rhlist_head *plist; + struct rhlist_head *list; - err = PTR_ERR(tbl); - if (err == -EEXIST) - err = 0; + elasticity--; + if (!key || + (params.obj_cmpfn ? + params.obj_cmpfn(&arg, rht_obj(ht, head)) : + rhashtable_compare(&arg, rht_obj(ht, head)))) + continue; - goto out; + data = rht_obj(ht, head); + + if (!rhlist) + goto out; + + + list = container_of(obj, struct rhlist_head, rhead); + plist = container_of(head, struct rhlist_head, rhead); + + RCU_INIT_POINTER(list->next, plist); + head = rht_dereference_bucket(head->next, tbl, hash); + RCU_INIT_POINTER(list->rhead.next, head); + rcu_assign_pointer(*pprev, obj); + + goto good; } - err = -E2BIG; + if (elasticity <= 0) + goto slow_path; + + data = ERR_PTR(-E2BIG); if (unlikely(rht_grow_above_max(ht, tbl))) goto out; - if (unlikely(rht_grow_above_100(ht, tbl))) { -slow_path: - spin_unlock_bh(lock); - err = rhashtable_insert_rehash(ht, tbl); - rcu_read_unlock(); - if (err) - return ERR_PTR(err); - - goto restart; - } - - err = 0; - elasticity = ht->elasticity; - rht_for_each(head, tbl, hash) { - if (key && - unlikely(!(params.obj_cmpfn ? - params.obj_cmpfn(&arg, rht_obj(ht, head)) : - rhashtable_compare(&arg, rht_obj(ht, head))))) { - data = rht_obj(ht, head); - goto out; - } - if (!--elasticity) - goto slow_path; - } + if (unlikely(rht_grow_above_100(ht, tbl))) + goto slow_path; head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); RCU_INIT_POINTER(obj->next, head); + if (rhlist) { + struct rhlist_head *list; + + list = container_of(obj, struct rhlist_head, rhead); + RCU_INIT_POINTER(list->next, NULL); + } rcu_assign_pointer(tbl->buckets[hash], obj); @@ -656,11 +752,14 @@ slow_path: if (rht_grow_above_75(ht, tbl)) schedule_work(&ht->run_work); +good: + data = NULL; + out: spin_unlock_bh(lock); rcu_read_unlock(); - return err ? ERR_PTR(err) : data; + return data; } /** @@ -685,13 +784,65 @@ static inline int rhashtable_insert_fast( { void *ret; - ret = __rhashtable_insert_fast(ht, NULL, obj, params); + ret = __rhashtable_insert_fast(ht, NULL, obj, params, false); if (IS_ERR(ret)) return PTR_ERR(ret); return ret == NULL ? 0 : -EEXIST; } +/** + * rhltable_insert_key - insert object into hash list table + * @hlt: hash list table + * @key: the pointer to the key + * @list: pointer to hash list head inside object + * @params: hash table parameters + * + * Will take a per bucket spinlock to protect against mutual mutations + * on the same bucket. Multiple insertions may occur in parallel unless + * they map to the same bucket lock. + * + * It is safe to call this function from atomic context. + * + * Will trigger an automatic deferred table resizing if the size grows + * beyond the watermark indicated by grow_decision() which can be passed + * to rhashtable_init(). + */ +static inline int rhltable_insert_key( + struct rhltable *hlt, const void *key, struct rhlist_head *list, + const struct rhashtable_params params) +{ + return PTR_ERR(__rhashtable_insert_fast(&hlt->ht, key, &list->rhead, + params, true)); +} + +/** + * rhltable_insert - insert object into hash list table + * @hlt: hash list table + * @list: pointer to hash list head inside object + * @params: hash table parameters + * + * Will take a per bucket spinlock to protect against mutual mutations + * on the same bucket. Multiple insertions may occur in parallel unless + * they map to the same bucket lock. + * + * It is safe to call this function from atomic context. + * + * Will trigger an automatic deferred table resizing if the size grows + * beyond the watermark indicated by grow_decision() which can be passed + * to rhashtable_init(). + */ +static inline int rhltable_insert( + struct rhltable *hlt, struct rhlist_head *list, + const struct rhashtable_params params) +{ + const char *key = rht_obj(&hlt->ht, &list->rhead); + + key += params.key_offset; + + return rhltable_insert_key(hlt, key, list, params); +} + /** * rhashtable_lookup_insert_fast - lookup and insert object into hash table * @ht: hash table @@ -722,7 +873,8 @@ static inline int rhashtable_lookup_insert_fast( BUG_ON(ht->p.obj_hashfn); - ret = __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params); + ret = __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params, + false); if (IS_ERR(ret)) return PTR_ERR(ret); @@ -759,7 +911,7 @@ static inline int rhashtable_lookup_insert_key( BUG_ON(!ht->p.obj_hashfn || !key); - ret = __rhashtable_insert_fast(ht, key, obj, params); + ret = __rhashtable_insert_fast(ht, key, obj, params, false); if (IS_ERR(ret)) return PTR_ERR(ret); @@ -783,13 +935,14 @@ static inline void *rhashtable_lookup_get_insert_key( { BUG_ON(!ht->p.obj_hashfn || !key); - return __rhashtable_insert_fast(ht, key, obj, params); + return __rhashtable_insert_fast(ht, key, obj, params, false); } /* Internal function, please use rhashtable_remove_fast() instead */ -static inline int __rhashtable_remove_fast( +static inline int __rhashtable_remove_fast_one( struct rhashtable *ht, struct bucket_table *tbl, - struct rhash_head *obj, const struct rhashtable_params params) + struct rhash_head *obj, const struct rhashtable_params params, + bool rhlist) { struct rhash_head __rcu **pprev; struct rhash_head *he; @@ -804,18 +957,86 @@ static inline int __rhashtable_remove_fast( pprev = &tbl->buckets[hash]; rht_for_each(he, tbl, hash) { + struct rhlist_head *list; + + list = container_of(he, struct rhlist_head, rhead); + if (he != obj) { + struct rhlist_head __rcu **lpprev; + pprev = &he->next; - continue; + + if (!rhlist) + continue; + + do { + lpprev = &list->next; + list = rht_dereference_bucket(list->next, + tbl, hash); + } while (list && obj != &list->rhead); + + if (!list) + continue; + + list = rht_dereference_bucket(list->next, tbl, hash); + RCU_INIT_POINTER(*lpprev, list); + err = 0; + break; } - rcu_assign_pointer(*pprev, obj->next); - err = 0; + obj = rht_dereference_bucket(obj->next, tbl, hash); + err = 1; + + if (rhlist) { + list = rht_dereference_bucket(list->next, tbl, hash); + if (list) { + RCU_INIT_POINTER(list->rhead.next, obj); + obj = &list->rhead; + err = 0; + } + } + + rcu_assign_pointer(*pprev, obj); break; } spin_unlock_bh(lock); + if (err > 0) { + atomic_dec(&ht->nelems); + if (unlikely(ht->p.automatic_shrinking && + rht_shrink_below_30(ht, tbl))) + schedule_work(&ht->run_work); + err = 0; + } + + return err; +} + +/* Internal function, please use rhashtable_remove_fast() instead */ +static inline int __rhashtable_remove_fast( + struct rhashtable *ht, struct rhash_head *obj, + const struct rhashtable_params params, bool rhlist) +{ + struct bucket_table *tbl; + int err; + + rcu_read_lock(); + + tbl = rht_dereference_rcu(ht->tbl, ht); + + /* Because we have already taken (and released) the bucket + * lock in old_tbl, if we find that future_tbl is not yet + * visible then that guarantees the entry to still be in + * the old tbl if it exists. + */ + while ((err = __rhashtable_remove_fast_one(ht, tbl, obj, params, + rhlist)) && + (tbl = rht_dereference_rcu(tbl->future_tbl, ht))) + ; + + rcu_read_unlock(); + return err; } @@ -838,34 +1059,29 @@ static inline int rhashtable_remove_fast( struct rhashtable *ht, struct rhash_head *obj, const struct rhashtable_params params) { - struct bucket_table *tbl; - int err; + return __rhashtable_remove_fast(ht, obj, params, false); +} - rcu_read_lock(); - - tbl = rht_dereference_rcu(ht->tbl, ht); - - /* Because we have already taken (and released) the bucket - * lock in old_tbl, if we find that future_tbl is not yet - * visible then that guarantees the entry to still be in - * the old tbl if it exists. - */ - while ((err = __rhashtable_remove_fast(ht, tbl, obj, params)) && - (tbl = rht_dereference_rcu(tbl->future_tbl, ht))) - ; - - if (err) - goto out; - - atomic_dec(&ht->nelems); - if (unlikely(ht->p.automatic_shrinking && - rht_shrink_below_30(ht, tbl))) - schedule_work(&ht->run_work); - -out: - rcu_read_unlock(); - - return err; +/** + * rhltable_remove - remove object from hash list table + * @hlt: hash list table + * @list: pointer to hash list head inside object + * @params: hash table parameters + * + * Since the hash chain is single linked, the removal operation needs to + * walk the bucket chain upon removal. The removal operation is thus + * considerable slow if the hash table is not correctly sized. + * + * Will automatically shrink the table via rhashtable_expand() if the + * shrink_decision function specified at rhashtable_init() returns true. + * + * Returns zero on success, -ENOENT if the entry could not be found. + */ +static inline int rhltable_remove( + struct rhltable *hlt, struct rhlist_head *list, + const struct rhashtable_params params) +{ + return __rhashtable_remove_fast(&hlt->ht, &list->rhead, params, true); } /* Internal function, please use rhashtable_replace_fast() instead */ @@ -958,4 +1174,51 @@ static inline int rhashtable_walk_init(struct rhashtable *ht, return 0; } +/** + * rhltable_walk_enter - Initialise an iterator + * @hlt: Table to walk over + * @iter: Hash table Iterator + * + * This function prepares a hash table walk. + * + * Note that if you restart a walk after rhashtable_walk_stop you + * may see the same object twice. Also, you may miss objects if + * there are removals in between rhashtable_walk_stop and the next + * call to rhashtable_walk_start. + * + * For a completely stable walk you should construct your own data + * structure outside the hash table. + * + * This function may sleep so you must not call it from interrupt + * context or with spin locks held. + * + * You must call rhashtable_walk_exit after this function returns. + */ +static inline void rhltable_walk_enter(struct rhltable *hlt, + struct rhashtable_iter *iter) +{ + return rhashtable_walk_enter(&hlt->ht, iter); +} + +/** + * rhltable_free_and_destroy - free elements and destroy hash list table + * @hlt: the hash list table to destroy + * @free_fn: callback to release resources of element + * @arg: pointer passed to free_fn + * + * See documentation for rhashtable_free_and_destroy. + */ +static inline void rhltable_free_and_destroy(struct rhltable *hlt, + void (*free_fn)(void *ptr, + void *arg), + void *arg) +{ + return rhashtable_free_and_destroy(&hlt->ht, free_fn, arg); +} + +static inline void rhltable_destroy(struct rhltable *hlt) +{ + return rhltable_free_and_destroy(hlt, NULL, NULL); +} + #endif /* _LINUX_RHASHTABLE_H */ diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 06c28728bb53..32d0ad058380 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -378,22 +378,8 @@ static void rht_deferred_worker(struct work_struct *work) schedule_work(&ht->run_work); } -static bool rhashtable_check_elasticity(struct rhashtable *ht, - struct bucket_table *tbl, - unsigned int hash) -{ - unsigned int elasticity = ht->elasticity; - struct rhash_head *head; - - rht_for_each(head, tbl, hash) - if (!--elasticity) - return true; - - return false; -} - -int rhashtable_insert_rehash(struct rhashtable *ht, - struct bucket_table *tbl) +static int rhashtable_insert_rehash(struct rhashtable *ht, + struct bucket_table *tbl) { struct bucket_table *old_tbl; struct bucket_table *new_tbl; @@ -439,57 +425,165 @@ fail: return err; } -EXPORT_SYMBOL_GPL(rhashtable_insert_rehash); -struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht, - const void *key, - struct rhash_head *obj, - struct bucket_table *tbl, - void **data) +static void *rhashtable_lookup_one(struct rhashtable *ht, + struct bucket_table *tbl, unsigned int hash, + const void *key, struct rhash_head *obj) { + struct rhashtable_compare_arg arg = { + .ht = ht, + .key = key, + }; + struct rhash_head __rcu **pprev; struct rhash_head *head; - unsigned int hash; - int err; + int elasticity; - tbl = rhashtable_last_table(ht, tbl); - hash = head_hashfn(ht, tbl, obj); - spin_lock_nested(rht_bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING); + elasticity = ht->elasticity; + pprev = &tbl->buckets[hash]; + rht_for_each(head, tbl, hash) { + struct rhlist_head *list; + struct rhlist_head *plist; - err = -EEXIST; - if (key) { - *data = rhashtable_lookup_fast(ht, key, ht->p); - if (*data) - goto exit; + elasticity--; + if (!key || + (ht->p.obj_cmpfn ? + ht->p.obj_cmpfn(&arg, rht_obj(ht, head)) : + rhashtable_compare(&arg, rht_obj(ht, head)))) + continue; + + if (!ht->rhlist) + return rht_obj(ht, head); + + list = container_of(obj, struct rhlist_head, rhead); + plist = container_of(head, struct rhlist_head, rhead); + + RCU_INIT_POINTER(list->next, plist); + head = rht_dereference_bucket(head->next, tbl, hash); + RCU_INIT_POINTER(list->rhead.next, head); + rcu_assign_pointer(*pprev, obj); + + return NULL; } - err = -E2BIG; + if (elasticity <= 0) + return ERR_PTR(-EAGAIN); + + return ERR_PTR(-ENOENT); +} + +static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht, + struct bucket_table *tbl, + unsigned int hash, + struct rhash_head *obj, + void *data) +{ + struct bucket_table *new_tbl; + struct rhash_head *head; + + if (!IS_ERR_OR_NULL(data)) + return ERR_PTR(-EEXIST); + + if (PTR_ERR(data) != -EAGAIN && PTR_ERR(data) != -ENOENT) + return ERR_CAST(data); + + new_tbl = rcu_dereference(tbl->future_tbl); + if (new_tbl) + return new_tbl; + + if (PTR_ERR(data) != -ENOENT) + return ERR_CAST(data); + if (unlikely(rht_grow_above_max(ht, tbl))) - goto exit; + return ERR_PTR(-E2BIG); - err = -EAGAIN; - if (rhashtable_check_elasticity(ht, tbl, hash) || - rht_grow_above_100(ht, tbl)) - goto exit; - - err = 0; + if (unlikely(rht_grow_above_100(ht, tbl))) + return ERR_PTR(-EAGAIN); head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); RCU_INIT_POINTER(obj->next, head); + if (ht->rhlist) { + struct rhlist_head *list; + + list = container_of(obj, struct rhlist_head, rhead); + RCU_INIT_POINTER(list->next, NULL); + } rcu_assign_pointer(tbl->buckets[hash], obj); atomic_inc(&ht->nelems); + if (rht_grow_above_75(ht, tbl)) + schedule_work(&ht->run_work); -exit: - spin_unlock(rht_bucket_lock(tbl, hash)); + return NULL; +} - if (err == 0) - return NULL; - else if (err == -EAGAIN) - return tbl; - else - return ERR_PTR(err); +static void *rhashtable_try_insert(struct rhashtable *ht, const void *key, + struct rhash_head *obj) +{ + struct bucket_table *new_tbl; + struct bucket_table *tbl; + unsigned int hash; + spinlock_t *lock; + void *data; + + tbl = rcu_dereference(ht->tbl); + + /* All insertions must grab the oldest table containing + * the hashed bucket that is yet to be rehashed. + */ + for (;;) { + hash = rht_head_hashfn(ht, tbl, obj, ht->p); + lock = rht_bucket_lock(tbl, hash); + spin_lock_bh(lock); + + if (tbl->rehash <= hash) + break; + + spin_unlock_bh(lock); + tbl = rcu_dereference(tbl->future_tbl); + } + + data = rhashtable_lookup_one(ht, tbl, hash, key, obj); + new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data); + if (PTR_ERR(new_tbl) != -EEXIST) + data = ERR_CAST(new_tbl); + + while (!IS_ERR_OR_NULL(new_tbl)) { + tbl = new_tbl; + hash = rht_head_hashfn(ht, tbl, obj, ht->p); + spin_lock_nested(rht_bucket_lock(tbl, hash), + SINGLE_DEPTH_NESTING); + + data = rhashtable_lookup_one(ht, tbl, hash, key, obj); + new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data); + if (PTR_ERR(new_tbl) != -EEXIST) + data = ERR_CAST(new_tbl); + + spin_unlock(rht_bucket_lock(tbl, hash)); + } + + spin_unlock_bh(lock); + + if (PTR_ERR(data) == -EAGAIN) + data = ERR_PTR(rhashtable_insert_rehash(ht, tbl) ?: + -EAGAIN); + + return data; +} + +void *rhashtable_insert_slow(struct rhashtable *ht, const void *key, + struct rhash_head *obj) +{ + void *data; + + do { + rcu_read_lock(); + data = rhashtable_try_insert(ht, key, obj); + rcu_read_unlock(); + } while (PTR_ERR(data) == -EAGAIN); + + return data; } EXPORT_SYMBOL_GPL(rhashtable_insert_slow); @@ -593,11 +687,16 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_start); void *rhashtable_walk_next(struct rhashtable_iter *iter) { struct bucket_table *tbl = iter->walker.tbl; + struct rhlist_head *list = iter->list; struct rhashtable *ht = iter->ht; struct rhash_head *p = iter->p; + bool rhlist = ht->rhlist; if (p) { - p = rht_dereference_bucket_rcu(p->next, tbl, iter->slot); + if (!rhlist || !(list = rcu_dereference(list->next))) { + p = rcu_dereference(p->next); + list = container_of(p, struct rhlist_head, rhead); + } goto next; } @@ -605,6 +704,18 @@ void *rhashtable_walk_next(struct rhashtable_iter *iter) int skip = iter->skip; rht_for_each_rcu(p, tbl, iter->slot) { + if (rhlist) { + list = container_of(p, struct rhlist_head, + rhead); + do { + if (!skip) + goto next; + skip--; + list = rcu_dereference(list->next); + } while (list); + + continue; + } if (!skip) break; skip--; @@ -614,7 +725,8 @@ next: if (!rht_is_a_nulls(p)) { iter->skip++; iter->p = p; - return rht_obj(ht, p); + iter->list = list; + return rht_obj(ht, rhlist ? &list->rhead : p); } iter->skip = 0; @@ -802,6 +914,48 @@ int rhashtable_init(struct rhashtable *ht, } EXPORT_SYMBOL_GPL(rhashtable_init); +/** + * rhltable_init - initialize a new hash list table + * @hlt: hash list table to be initialized + * @params: configuration parameters + * + * Initializes a new hash list table. + * + * See documentation for rhashtable_init. + */ +int rhltable_init(struct rhltable *hlt, const struct rhashtable_params *params) +{ + int err; + + /* No rhlist NULLs marking for now. */ + if (params->nulls_base) + return -EINVAL; + + err = rhashtable_init(&hlt->ht, params); + hlt->ht.rhlist = true; + return err; +} +EXPORT_SYMBOL_GPL(rhltable_init); + +static void rhashtable_free_one(struct rhashtable *ht, struct rhash_head *obj, + void (*free_fn)(void *ptr, void *arg), + void *arg) +{ + struct rhlist_head *list; + + if (!ht->rhlist) { + free_fn(rht_obj(ht, obj), arg); + return; + } + + list = container_of(obj, struct rhlist_head, rhead); + do { + obj = &list->rhead; + list = rht_dereference(list->next, ht); + free_fn(rht_obj(ht, obj), arg); + } while (list); +} + /** * rhashtable_free_and_destroy - free elements and destroy hash table * @ht: the hash table to destroy @@ -839,7 +993,7 @@ void rhashtable_free_and_destroy(struct rhashtable *ht, pos = next, next = !rht_is_a_nulls(pos) ? rht_dereference(pos->next, ht) : NULL) - free_fn(rht_obj(ht, pos), arg); + rhashtable_free_one(ht, pos, free_fn, arg); } } From 83e7e4ce9e93c3b020497144f4354b62aed5d894 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 19 Sep 2016 19:00:10 +0800 Subject: [PATCH 1272/1715] mac80211: Use rhltable instead of rhashtable mac80211 currently uses rhashtable with insecure_elasticity set to true. The latter is because of duplicate objects. What's more, mac80211 walks the rhashtable chains by hand which is broken as rhashtable may contain multiple tables due to resizing or rehashing. This patch fixes it by converting it to the newly added rhltable interface which is designed for use with duplicate objects. With rhltable a lookup returns a list of objects instead of a single one. This is then fed into the existing for_each_sta_info macro. This patch also deletes the sta_addr_hash function since rhashtable defaults to jhash. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/mac80211/ieee80211_i.h | 2 +- net/mac80211/rx.c | 7 ++--- net/mac80211/sta_info.c | 52 +++++++++++++++----------------------- net/mac80211/sta_info.h | 19 +++++--------- net/mac80211/status.c | 7 ++--- 5 files changed, 33 insertions(+), 54 deletions(-) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index c71c73594790..e496dee5af08 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1213,7 +1213,7 @@ struct ieee80211_local { spinlock_t tim_lock; unsigned long num_sta; struct list_head sta_list; - struct rhashtable sta_hash; + struct rhltable sta_hash; struct timer_list sta_cleanup; int sta_generation; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index e796060b7c5e..f7cf342bab52 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -4003,7 +4003,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, __le16 fc; struct ieee80211_rx_data rx; struct ieee80211_sub_if_data *prev; - struct rhash_head *tmp; + struct rhlist_head *tmp; int err = 0; fc = ((struct ieee80211_hdr *)skb->data)->frame_control; @@ -4046,13 +4046,10 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, goto out; } else if (ieee80211_is_data(fc)) { struct sta_info *sta, *prev_sta; - const struct bucket_table *tbl; prev_sta = NULL; - tbl = rht_dereference_rcu(local->sta_hash.tbl, &local->sta_hash); - - for_each_sta_info(local, tbl, hdr->addr2, sta, tmp) { + for_each_sta_info(local, hdr->addr2, sta, tmp) { if (!prev_sta) { prev_sta = sta; continue; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 1b1b28ff4fdb..c803e2cb58bc 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -67,12 +67,10 @@ static const struct rhashtable_params sta_rht_params = { .nelem_hint = 3, /* start small */ - .insecure_elasticity = true, /* Disable chain-length checks. */ .automatic_shrinking = true, .head_offset = offsetof(struct sta_info, hash_node), .key_offset = offsetof(struct sta_info, addr), .key_len = ETH_ALEN, - .hashfn = sta_addr_hash, .max_size = CONFIG_MAC80211_STA_HASH_MAX_SIZE, }; @@ -80,8 +78,8 @@ static const struct rhashtable_params sta_rht_params = { static int sta_info_hash_del(struct ieee80211_local *local, struct sta_info *sta) { - return rhashtable_remove_fast(&local->sta_hash, &sta->hash_node, - sta_rht_params); + return rhltable_remove(&local->sta_hash, &sta->hash_node, + sta_rht_params); } static void __cleanup_single_sta(struct sta_info *sta) @@ -157,19 +155,22 @@ static void cleanup_single_sta(struct sta_info *sta) sta_info_free(local, sta); } +struct rhlist_head *sta_info_hash_lookup(struct ieee80211_local *local, + const u8 *addr) +{ + return rhltable_lookup(&local->sta_hash, addr, sta_rht_params); +} + /* protected by RCU */ struct sta_info *sta_info_get(struct ieee80211_sub_if_data *sdata, const u8 *addr) { struct ieee80211_local *local = sdata->local; + struct rhlist_head *tmp; struct sta_info *sta; - struct rhash_head *tmp; - const struct bucket_table *tbl; rcu_read_lock(); - tbl = rht_dereference_rcu(local->sta_hash.tbl, &local->sta_hash); - - for_each_sta_info(local, tbl, addr, sta, tmp) { + for_each_sta_info(local, addr, sta, tmp) { if (sta->sdata == sdata) { rcu_read_unlock(); /* this is safe as the caller must already hold @@ -190,14 +191,11 @@ struct sta_info *sta_info_get_bss(struct ieee80211_sub_if_data *sdata, const u8 *addr) { struct ieee80211_local *local = sdata->local; + struct rhlist_head *tmp; struct sta_info *sta; - struct rhash_head *tmp; - const struct bucket_table *tbl; rcu_read_lock(); - tbl = rht_dereference_rcu(local->sta_hash.tbl, &local->sta_hash); - - for_each_sta_info(local, tbl, addr, sta, tmp) { + for_each_sta_info(local, addr, sta, tmp) { if (sta->sdata == sdata || (sta->sdata->bss && sta->sdata->bss == sdata->bss)) { rcu_read_unlock(); @@ -263,8 +261,8 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta) static int sta_info_hash_add(struct ieee80211_local *local, struct sta_info *sta) { - return rhashtable_insert_fast(&local->sta_hash, &sta->hash_node, - sta_rht_params); + return rhltable_insert(&local->sta_hash, &sta->hash_node, + sta_rht_params); } static void sta_deliver_ps_frames(struct work_struct *wk) @@ -453,9 +451,9 @@ static int sta_info_insert_check(struct sta_info *sta) is_multicast_ether_addr(sta->sta.addr))) return -EINVAL; - /* Strictly speaking this isn't necessary as we hold the mutex, but - * the rhashtable code can't really deal with that distinction. We - * do require the mutex for correctness though. + /* The RCU read lock is required by rhashtable due to + * asynchronous resize/rehash. We also require the mutex + * for correctness. */ rcu_read_lock(); lockdep_assert_held(&sdata->local->sta_mtx); @@ -1043,16 +1041,11 @@ static void sta_info_cleanup(unsigned long data) round_jiffies(jiffies + STA_INFO_CLEANUP_INTERVAL)); } -u32 sta_addr_hash(const void *key, u32 length, u32 seed) -{ - return jhash(key, ETH_ALEN, seed); -} - int sta_info_init(struct ieee80211_local *local) { int err; - err = rhashtable_init(&local->sta_hash, &sta_rht_params); + err = rhltable_init(&local->sta_hash, &sta_rht_params); if (err) return err; @@ -1068,7 +1061,7 @@ int sta_info_init(struct ieee80211_local *local) void sta_info_stop(struct ieee80211_local *local) { del_timer_sync(&local->sta_cleanup); - rhashtable_destroy(&local->sta_hash); + rhltable_destroy(&local->sta_hash); } @@ -1138,17 +1131,14 @@ struct ieee80211_sta *ieee80211_find_sta_by_ifaddr(struct ieee80211_hw *hw, const u8 *localaddr) { struct ieee80211_local *local = hw_to_local(hw); + struct rhlist_head *tmp; struct sta_info *sta; - struct rhash_head *tmp; - const struct bucket_table *tbl; - - tbl = rht_dereference_rcu(local->sta_hash.tbl, &local->sta_hash); /* * Just return a random station if localaddr is NULL * ... first in list. */ - for_each_sta_info(local, tbl, addr, sta, tmp) { + for_each_sta_info(local, addr, sta, tmp) { if (localaddr && !ether_addr_equal(sta->sdata->vif.addr, localaddr)) continue; diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 530231b73278..ed5fcb984a01 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -455,7 +455,7 @@ struct sta_info { /* General information, mostly static */ struct list_head list, free_list; struct rcu_head rcu_head; - struct rhash_head hash_node; + struct rhlist_head hash_node; u8 addr[ETH_ALEN]; struct ieee80211_local *local; struct ieee80211_sub_if_data *sdata; @@ -638,6 +638,9 @@ rcu_dereference_protected_tid_tx(struct sta_info *sta, int tid) */ #define STA_INFO_CLEANUP_INTERVAL (10 * HZ) +struct rhlist_head *sta_info_hash_lookup(struct ieee80211_local *local, + const u8 *addr); + /* * Get a STA info, must be under RCU read lock. */ @@ -647,17 +650,9 @@ struct sta_info *sta_info_get(struct ieee80211_sub_if_data *sdata, struct sta_info *sta_info_get_bss(struct ieee80211_sub_if_data *sdata, const u8 *addr); -u32 sta_addr_hash(const void *key, u32 length, u32 seed); - -#define _sta_bucket_idx(_tbl, _a) \ - rht_bucket_index(_tbl, sta_addr_hash(_a, ETH_ALEN, (_tbl)->hash_rnd)) - -#define for_each_sta_info(local, tbl, _addr, _sta, _tmp) \ - rht_for_each_entry_rcu(_sta, _tmp, tbl, \ - _sta_bucket_idx(tbl, _addr), \ - hash_node) \ - /* compare address and run code only if it matches */ \ - if (ether_addr_equal(_sta->addr, (_addr))) +#define for_each_sta_info(local, _addr, _sta, _tmp) \ + rhl_for_each_entry_rcu(_sta, _tmp, \ + sta_info_hash_lookup(local, _addr), hash_node) /* * Get STA info by index, BROKEN! diff --git a/net/mac80211/status.c b/net/mac80211/status.c index ea39f8a7baf3..ddf71c648cab 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -746,8 +746,8 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); __le16 fc; struct ieee80211_supported_band *sband; + struct rhlist_head *tmp; struct sta_info *sta; - struct rhash_head *tmp; int retry_count; int rates_idx; bool send_to_cooked; @@ -755,7 +755,6 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) struct ieee80211_bar *bar; int shift = 0; int tid = IEEE80211_NUM_TIDS; - const struct bucket_table *tbl; rates_idx = ieee80211_tx_get_rates(hw, info, &retry_count); @@ -764,9 +763,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) sband = local->hw.wiphy->bands[info->band]; fc = hdr->frame_control; - tbl = rht_dereference_rcu(local->sta_hash.tbl, &local->sta_hash); - - for_each_sta_info(local, tbl, hdr->addr1, sta, tmp) { + for_each_sta_info(local, hdr->addr1, sta, tmp) { /* skip wrong virtual interface */ if (!ether_addr_equal(hdr->addr2, sta->sdata->vif.addr)) continue; From 06f8ec9041f02d44bb0b75d47668e2fe00d5e0c3 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Mon, 19 Sep 2016 15:28:00 +0200 Subject: [PATCH 1273/1715] net-next: dsa: fix duplicate invocation of set_addr() commit 83c0afaec7b730b ("net: dsa: Add new binding implementation") has a duplicate invocation of the set_addr() operation callback. Remove one of them. Signed-off-by: John Crispin Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 8278385dcd21..cffc19e972a1 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -308,10 +308,6 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds) if (err < 0) return err; - err = ds->ops->set_addr(ds, dst->master_netdev->dev_addr); - if (err < 0) - return err; - if (!ds->slave_mii_bus && ds->ops->phy_read) { ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev); if (!ds->slave_mii_bus) From 092183df0fa1f4b49baad3a980c55d55de07dfb7 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Mon, 19 Sep 2016 15:28:01 +0200 Subject: [PATCH 1274/1715] net-next: dsa: make the set_addr() operation optional Only 1 of the 3 drivers currently has a set_addr() operation. Make the set_addr() callback optional to reduce the amount of empty stubs inside the drivers. Signed-off-by: John Crispin Signed-off-by: David S. Miller --- net/dsa/dsa.c | 8 +++++--- net/dsa/dsa2.c | 8 +++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 66e31acfcad8..a6902c1e2f28 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -378,9 +378,11 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) if (ret < 0) goto out; - ret = ops->set_addr(ds, dst->master_netdev->dev_addr); - if (ret < 0) - goto out; + if (ops->set_addr) { + ret = ops->set_addr(ds, dst->master_netdev->dev_addr); + if (ret < 0) + goto out; + } if (!ds->slave_mii_bus && ops->phy_read) { ds->slave_mii_bus = devm_mdiobus_alloc(parent); diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index cffc19e972a1..f8a7d9aab437 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -304,9 +304,11 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds) if (err < 0) return err; - err = ds->ops->set_addr(ds, dst->master_netdev->dev_addr); - if (err < 0) - return err; + if (ds->ops->set_addr) { + err = ds->ops->set_addr(ds, dst->master_netdev->dev_addr); + if (err < 0) + return err; + } if (!ds->slave_mii_bus && ds->ops->phy_read) { ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev); From 1f449736525addd6fcce674d654bd1471748484e Mon Sep 17 00:00:00 2001 From: John Crispin Date: Mon, 19 Sep 2016 15:28:02 +0200 Subject: [PATCH 1275/1715] net-next: dsa: b53: remove empty set_addr() stub The set_addr() callback is now optional. Remove the empty stub that b53 has. Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 0afc2e5a04dc..1a492c053e27 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -764,11 +764,6 @@ static int b53_get_sset_count(struct dsa_switch *ds) return b53_get_mib_size(dev); } -static int b53_set_addr(struct dsa_switch *ds, u8 *addr) -{ - return 0; -} - static int b53_setup(struct dsa_switch *ds) { struct b53_device *dev = ds->priv; @@ -1466,7 +1461,6 @@ static enum dsa_tag_protocol b53_get_tag_protocol(struct dsa_switch *ds) static struct dsa_switch_ops b53_switch_ops = { .get_tag_protocol = b53_get_tag_protocol, .setup = b53_setup, - .set_addr = b53_set_addr, .get_strings = b53_get_strings, .get_ethtool_stats = b53_get_ethtool_stats, .get_sset_count = b53_get_sset_count, From 8941ee36e3266a5efca52e32c4dc214f202c751a Mon Sep 17 00:00:00 2001 From: John Crispin Date: Mon, 19 Sep 2016 15:28:03 +0200 Subject: [PATCH 1276/1715] net-next: dsa: qca8k: remove empty set_addr() stub The set_addr() callback is now optional. Remove the empty stub that qca8k has. Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/dsa/qca8k.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index 7f3f1781c202..4788a89520dd 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -585,13 +585,6 @@ qca8k_setup(struct dsa_switch *ds) return 0; } -static int -qca8k_set_addr(struct dsa_switch *ds, u8 *addr) -{ - /* The subsystem always calls this function so add an empty stub */ - return 0; -} - static int qca8k_phy_read(struct dsa_switch *ds, int phy, int regnum) { @@ -921,7 +914,6 @@ qca8k_get_tag_protocol(struct dsa_switch *ds) static struct dsa_switch_ops qca8k_switch_ops = { .get_tag_protocol = qca8k_get_tag_protocol, .setup = qca8k_setup, - .set_addr = qca8k_set_addr, .get_strings = qca8k_get_strings, .phy_read = qca8k_phy_read, .phy_write = qca8k_phy_write, From 5737f6c92681939e417579b421f81f035e57c582 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Mon, 19 Sep 2016 17:46:38 +0200 Subject: [PATCH 1277/1715] mlx4: add missed recycle opportunity for XDP_TX on TX failure Correct drop handling for XDP_TX on TX failure, were recently added in commit 95357907ae73 ("mlx4: fix XDP_TX is acting like XDP_PASS on TX ring full"). The change missed an opportunity for recycling the RX page, instead of going through the page allocator, like the regular XDP_DROP action does. This patch cease the opportunity, by going through the XDP_DROP case. Fixes: 95357907ae73 ("mlx4: fix XDP_TX is acting like XDP_PASS on TX ring full") Signed-off-by: Jesper Dangaard Brouer Reviewed-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index c80073e4947f..c46355bce613 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -906,11 +906,12 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud length, tx_index, &doorbell_pending)) goto consumed; - goto next; /* Drop on xmit failure */ + goto xdp_drop; /* Drop on xmit failure */ default: bpf_warn_invalid_xdp_action(act); case XDP_ABORTED: case XDP_DROP: +xdp_drop: if (mlx4_en_rx_recycle(ring, frags)) goto consumed; goto next; From ad9798967dd67f080bf0e8d611b382a5d292aae2 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Mon, 19 Sep 2016 20:15:24 +0100 Subject: [PATCH 1278/1715] 6pack: fix buffer length mishandling Dmitry Vyukov wrote: > different runs). Looking at code, the following looks suspicious -- we > limit copy by 512 bytes, but use the original count which can be > larger than 512: > > static void sixpack_receive_buf(struct tty_struct *tty, > const unsigned char *cp, char *fp, int count) > { > unsigned char buf[512]; > .... > memcpy(buf, cp, count < sizeof(buf) ? count : sizeof(buf)); > .... > sixpack_decode(sp, buf, count1); With the sane tty locking we now have I believe the following is safe as we consume the bytes and move them into the decoded buffer before returning. Signed-off-by: Alan Cox Signed-off-by: David S. Miller --- drivers/net/hamradio/6pack.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c index 5a1e98547031..470b3dcd54e5 100644 --- a/drivers/net/hamradio/6pack.c +++ b/drivers/net/hamradio/6pack.c @@ -127,7 +127,7 @@ struct sixpack { #define AX25_6PACK_HEADER_LEN 0 -static void sixpack_decode(struct sixpack *, unsigned char[], int); +static void sixpack_decode(struct sixpack *, const unsigned char[], int); static int encode_sixpack(unsigned char *, unsigned char *, int, unsigned char); /* @@ -428,7 +428,7 @@ out: /* * Handle the 'receiver data ready' interrupt. - * This function is called by the 'tty_io' module in the kernel when + * This function is called by the tty module in the kernel when * a block of 6pack data has been received, which can now be decapsulated * and sent on to some IP layer for further processing. */ @@ -436,7 +436,6 @@ static void sixpack_receive_buf(struct tty_struct *tty, const unsigned char *cp, char *fp, int count) { struct sixpack *sp; - unsigned char buf[512]; int count1; if (!count) @@ -446,10 +445,7 @@ static void sixpack_receive_buf(struct tty_struct *tty, if (!sp) return; - memcpy(buf, cp, count < sizeof(buf) ? count : sizeof(buf)); - /* Read the characters out of the buffer */ - count1 = count; while (count) { count--; @@ -459,7 +455,7 @@ static void sixpack_receive_buf(struct tty_struct *tty, continue; } } - sixpack_decode(sp, buf, count1); + sixpack_decode(sp, cp, count1); sp_put(sp); tty_unthrottle(tty); @@ -992,7 +988,7 @@ static void decode_std_command(struct sixpack *sp, unsigned char cmd) /* decode a 6pack packet */ static void -sixpack_decode(struct sixpack *sp, unsigned char *pre_rbuff, int count) +sixpack_decode(struct sixpack *sp, const unsigned char *pre_rbuff, int count) { unsigned char inbyte; int count1; From 190aa3e77880a05332ea1ccb382a51285d57adb5 Mon Sep 17 00:00:00 2001 From: pravin shelar Date: Mon, 19 Sep 2016 13:50:59 -0700 Subject: [PATCH 1279/1715] openvswitch: Fix Frame-size larger than 1024 bytes warning. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no need to declare separate key on stack, we can just use sw_flow->key to store the key directly. This commit fixes following warning: net/openvswitch/datapath.c: In function ‘ovs_flow_cmd_new’: net/openvswitch/datapath.c:1080:1: warning: the frame size of 1040 bytes is larger than 1024 bytes [-Wframe-larger-than=] Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/datapath.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 0536ab3504d5..474e7a6bfeb7 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -928,7 +928,6 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) struct sw_flow_mask mask; struct sk_buff *reply; struct datapath *dp; - struct sw_flow_key key; struct sw_flow_actions *acts; struct sw_flow_match match; u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); @@ -956,20 +955,24 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) } /* Extract key. */ - ovs_match_init(&match, &key, &mask); + ovs_match_init(&match, &new_flow->key, &mask); error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK], log); if (error) goto err_kfree_flow; - ovs_flow_mask_key(&new_flow->key, &key, true, &mask); - /* Extract flow identifier. */ error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID], - &key, log); + &new_flow->key, log); if (error) goto err_kfree_flow; + /* unmasked key is needed to match when ufid is not used. */ + if (ovs_identifier_is_key(&new_flow->id)) + match.key = new_flow->id.unmasked_key; + + ovs_flow_mask_key(&new_flow->key, &new_flow->key, true, &mask); + /* Validate actions. */ error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key, &acts, log); @@ -996,7 +999,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) if (ovs_identifier_is_ufid(&new_flow->id)) flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id); if (!flow) - flow = ovs_flow_tbl_lookup(&dp->table, &key); + flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->key); if (likely(!flow)) { rcu_assign_pointer(new_flow->sf_acts, acts); From 2279994d07ab67ff7a1d09bfbd65588332dfb6d8 Mon Sep 17 00:00:00 2001 From: pravin shelar Date: Mon, 19 Sep 2016 13:51:00 -0700 Subject: [PATCH 1280/1715] openvswitch: avoid resetting flow key while installing new flow. since commit commit db74a3335e0f6 ("openvswitch: use percpu flow stats") flow alloc resets flow-key. So there is no need to reset the flow-key again if OVS is using newly allocated flow-key. Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/datapath.c | 8 ++++---- net/openvswitch/flow.c | 2 -- net/openvswitch/flow_netlink.c | 6 ++++-- net/openvswitch/flow_netlink.h | 3 ++- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 474e7a6bfeb7..4d67ea856067 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -955,7 +955,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) } /* Extract key. */ - ovs_match_init(&match, &new_flow->key, &mask); + ovs_match_init(&match, &new_flow->key, false, &mask); error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK], log); if (error) @@ -1124,7 +1124,7 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log); if (a[OVS_FLOW_ATTR_KEY]) { - ovs_match_init(&match, &key, &mask); + ovs_match_init(&match, &key, true, &mask); error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK], log); } else if (!ufid_present) { @@ -1241,7 +1241,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log); if (a[OVS_FLOW_ATTR_KEY]) { - ovs_match_init(&match, &key, NULL); + ovs_match_init(&match, &key, true, NULL); err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], NULL, log); } else if (!ufid_present) { @@ -1300,7 +1300,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log); if (a[OVS_FLOW_ATTR_KEY]) { - ovs_match_init(&match, &key, NULL); + ovs_match_init(&match, &key, true, NULL); err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], NULL, log); if (unlikely(err)) diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 0fa45439def1..634cc10d6dee 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -767,8 +767,6 @@ int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr, { int err; - memset(key, 0, OVS_SW_FLOW_KEY_METADATA_SIZE); - /* Extract metadata from netlink attributes. */ err = ovs_nla_get_flow_metadata(net, attr, key, log); if (err) diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 8efa718ddb5e..ae25ded82b3b 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -1996,13 +1996,15 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr, void ovs_match_init(struct sw_flow_match *match, struct sw_flow_key *key, + bool reset_key, struct sw_flow_mask *mask) { memset(match, 0, sizeof(*match)); match->key = key; match->mask = mask; - memset(key, 0, sizeof(*key)); + if (reset_key) + memset(key, 0, sizeof(*key)); if (mask) { memset(&mask->key, 0, sizeof(mask->key)); @@ -2049,7 +2051,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, struct nlattr *a; int err = 0, start, opts_type; - ovs_match_init(&match, &key, NULL); + ovs_match_init(&match, &key, true, NULL); opts_type = ip_tun_from_nlattr(nla_data(attr), &match, false, log); if (opts_type < 0) return opts_type; diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h index 47dd142eca1c..45f9769e5aac 100644 --- a/net/openvswitch/flow_netlink.h +++ b/net/openvswitch/flow_netlink.h @@ -41,7 +41,8 @@ size_t ovs_tun_key_attr_size(void); size_t ovs_key_attr_size(void); void ovs_match_init(struct sw_flow_match *match, - struct sw_flow_key *key, struct sw_flow_mask *mask); + struct sw_flow_key *key, bool reset_key, + struct sw_flow_mask *mask); int ovs_nla_put_key(const struct sw_flow_key *, const struct sw_flow_key *, int attr, bool is_mask, struct sk_buff *); From cf714ac147e08bc13cd6bc79f2b090da905398ef Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Mon, 19 Sep 2016 13:56:29 -0700 Subject: [PATCH 1281/1715] ipvlan: Fix dependency issue kbuild-build-bot reported that if NETFILTER is not selected, the build fails pointing to netfilter symbols. Fixes: 4fbae7d83c98 ("ipvlan: Introduce l3s mode") Signed-off-by: Mahesh Bandewar Signed-off-by: David S. Miller --- drivers/net/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 8768a625350d..95c32f2d7601 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -149,6 +149,7 @@ config IPVLAN tristate "IP-VLAN support" depends on INET depends on IPV6 + depends on NETFILTER depends on NET_L3_MASTER_DEV ---help--- This allows one to create virtual devices off of a main interface From b399cf64e318ac8c5f10d36bb911e61c746b8788 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 20 Sep 2016 00:26:12 +0200 Subject: [PATCH 1282/1715] bpf, verifier: enforce larger zero range for pkt on overloading stack buffs Current contract for the following two helper argument types is: * ARG_CONST_STACK_SIZE: passed argument pair must be (ptr, >0). * ARG_CONST_STACK_SIZE_OR_ZERO: passed argument pair can be either (NULL, 0) or (ptr, >0). With 6841de8b0d03 ("bpf: allow helpers access the packet directly"), we can pass also raw packet data to helpers, so depending on the argument type being PTR_TO_PACKET, we now either assert memory via check_packet_access() or check_stack_boundary(). As a result, the tests in check_packet_access() currently allow more than intended with regards to reg->imm. Back in 969bf05eb3ce ("bpf: direct packet access"), check_packet_access() was fine to ignore size argument since in check_mem_access() size was bpf_size_to_bytes() derived and prior to the call to check_packet_access() guaranteed to be larger than zero. However, for the above two argument types, it currently means, we can have a <= 0 size and thus breaking current guarantees for helpers. Enforce a check for size <= 0 and bail out if so. check_stack_boundary() doesn't have such an issue since it already tests for access_size <= 0 and bails out, resp. access_size == 0 in case of NULL pointer passed when allowed. Fixes: 6841de8b0d03 ("bpf: allow helpers access the packet directly") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 90493a66dddd..bc138f34e38c 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -671,7 +671,7 @@ static int check_packet_access(struct verifier_env *env, u32 regno, int off, struct reg_state *reg = ®s[regno]; off += reg->off; - if (off < 0 || off + size > reg->range) { + if (off < 0 || size <= 0 || off + size > reg->range) { verbose("invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n", off, size, regno, reg->id, reg->off, reg->range); return -EACCES; From 36bbef52c7eb646ed6247055a2acd3851e317857 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 20 Sep 2016 00:26:13 +0200 Subject: [PATCH 1283/1715] bpf: direct packet write and access for helpers for clsact progs This work implements direct packet access for helpers and direct packet write in a similar fashion as already available for XDP types via commits 4acf6c0b84c9 ("bpf: enable direct packet data write for xdp progs") and 6841de8b0d03 ("bpf: allow helpers access the packet directly"), and as a complementary feature to the already available direct packet read for tc (cls/act) programs. For enabling this, we need to introduce two helpers, bpf_skb_pull_data() and bpf_csum_update(). The first is generally needed for both, read and write, because they would otherwise only be limited to the current linear skb head. Usually, when the data_end test fails, programs just bail out, or, in the direct read case, use bpf_skb_load_bytes() as an alternative to overcome this limitation. If such data sits in non-linear parts, we can just pull them in once with the new helper, retest and eventually access them. At the same time, this also makes sure the skb is uncloned, which is, of course, a necessary condition for direct write. As this needs to be an invariant for the write part only, the verifier detects writes and adds a prologue that is calling bpf_skb_pull_data() to effectively unclone the skb from the very beginning in case it is indeed cloned. The heuristic makes use of a similar trick that was done in 233577a22089 ("net: filter: constify detection of pkt_type_offset"). This comes at zero cost for other programs that do not use the direct write feature. Should a program use this feature only sparsely and has read access for the most parts with, for example, drop return codes, then such write action can be delegated to a tail called program for mitigating this cost of potential uncloning to a late point in time where it would have been paid similarly with the bpf_skb_store_bytes() as well. Advantage of direct write is that the writes are inlined whereas the helper cannot make any length assumptions and thus needs to generate a call to memcpy() also for small sizes, as well as cost of helper call itself with sanity checks are avoided. Plus, when direct read is already used, we don't need to cache or perform rechecks on the data boundaries (due to verifier invalidating previous checks for helpers that change skb->data), so more complex programs using rewrites can benefit from switching to direct read plus write. For direct packet access to helpers, we save the otherwise needed copy into a temp struct sitting on stack memory when use-case allows. Both facilities are enabled via may_access_direct_pkt_data() in verifier. For now, we limit this to map helpers and csum_diff, and can successively enable other helpers where we find it makes sense. Helpers that definitely cannot be allowed for this are those part of bpf_helper_changes_skb_data() since they can change underlying data, and those that write into memory as this could happen for packet typed args when still cloned. bpf_csum_update() helper accommodates for the fact that we need to fixup checksum_complete when using direct write instead of bpf_skb_store_bytes(), meaning the programs can use available helpers like bpf_csum_diff(), and implement csum_add(), csum_sub(), csum_block_add(), csum_block_sub() equivalents in eBPF together with the new helper. A usage example will be provided for iproute2's examples/bpf/ directory. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 4 +- include/linux/skbuff.h | 14 +++- include/uapi/linux/bpf.h | 21 ++++++ kernel/bpf/helpers.c | 3 + kernel/bpf/verifier.c | 54 ++++++++++++---- net/core/filter.c | 134 ++++++++++++++++++++++++++++++++++----- 6 files changed, 196 insertions(+), 34 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 9a904f63f8c1..5691fdc83819 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -96,6 +96,7 @@ enum bpf_return_type { struct bpf_func_proto { u64 (*func)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); bool gpl_only; + bool pkt_access; enum bpf_return_type ret_type; enum bpf_arg_type arg1_type; enum bpf_arg_type arg2_type; @@ -151,7 +152,8 @@ struct bpf_verifier_ops { */ bool (*is_valid_access)(int off, int size, enum bpf_access_type type, enum bpf_reg_type *reg_type); - + int (*gen_prologue)(struct bpf_insn *insn, bool direct_write, + const struct bpf_prog *prog); u32 (*convert_ctx_access)(enum bpf_access_type type, int dst_reg, int src_reg, int ctx_off, struct bpf_insn *insn, struct bpf_prog *prog); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4c5662f05bda..c6dab3f7457c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -676,13 +676,23 @@ struct sk_buff { */ kmemcheck_bitfield_begin(flags1); __u16 queue_mapping; + +/* if you move cloned around you also must adapt those constants */ +#ifdef __BIG_ENDIAN_BITFIELD +#define CLONED_MASK (1 << 7) +#else +#define CLONED_MASK 1 +#endif +#define CLONED_OFFSET() offsetof(struct sk_buff, __cloned_offset) + + __u8 __cloned_offset[0]; __u8 cloned:1, nohdr:1, fclone:2, peeked:1, head_frag:1, - xmit_more:1; - /* one bit hole */ + xmit_more:1, + __unused:1; /* one bit hole */ kmemcheck_bitfield_end(flags1); /* fields enclosed in headers_start/headers_end are copied diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f896dfac4ac0..e07432b9f8b8 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -398,6 +398,27 @@ enum bpf_func_id { */ BPF_FUNC_skb_change_tail, + /** + * bpf_skb_pull_data(skb, len) + * The helper will pull in non-linear data in case the + * skb is non-linear and not all of len are part of the + * linear section. Only needed for read/write with direct + * packet access. + * @skb: pointer to skb + * @len: len to make read/writeable + * Return: 0 on success or negative error + */ + BPF_FUNC_skb_pull_data, + + /** + * bpf_csum_update(skb, csum) + * Adds csum into skb->csum in case of CHECKSUM_COMPLETE. + * @skb: pointer to skb + * @csum: csum to add + * Return: csum on success or negative error + */ + BPF_FUNC_csum_update, + __BPF_FUNC_MAX_ID, }; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index a5b8bf8cfcfd..39918402e6e9 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -36,6 +36,7 @@ BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key) const struct bpf_func_proto bpf_map_lookup_elem_proto = { .func = bpf_map_lookup_elem, .gpl_only = false, + .pkt_access = true, .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_MAP_KEY, @@ -51,6 +52,7 @@ BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key, const struct bpf_func_proto bpf_map_update_elem_proto = { .func = bpf_map_update_elem, .gpl_only = false, + .pkt_access = true, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_MAP_KEY, @@ -67,6 +69,7 @@ BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key) const struct bpf_func_proto bpf_map_delete_elem_proto = { .func = bpf_map_delete_elem, .gpl_only = false, + .pkt_access = true, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_MAP_KEY, diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index bc138f34e38c..3a75ee3bdcd1 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -196,6 +196,7 @@ struct verifier_env { u32 used_map_cnt; /* number of used maps */ u32 id_gen; /* used to generate unique reg IDs */ bool allow_ptr_leaks; + bool seen_direct_write; }; #define BPF_COMPLEXITY_LIMIT_INSNS 65536 @@ -204,6 +205,7 @@ struct verifier_env { struct bpf_call_arg_meta { struct bpf_map *map_ptr; bool raw_mode; + bool pkt_access; int regno; int access_size; }; @@ -654,10 +656,17 @@ static int check_map_access(struct verifier_env *env, u32 regno, int off, #define MAX_PACKET_OFF 0xffff -static bool may_write_pkt_data(enum bpf_prog_type type) +static bool may_access_direct_pkt_data(struct verifier_env *env, + const struct bpf_call_arg_meta *meta) { - switch (type) { + switch (env->prog->type) { + case BPF_PROG_TYPE_SCHED_CLS: + case BPF_PROG_TYPE_SCHED_ACT: case BPF_PROG_TYPE_XDP: + if (meta) + return meta->pkt_access; + + env->seen_direct_write = true; return true; default: return false; @@ -817,7 +826,7 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off, err = check_stack_read(state, off, size, value_regno); } } else if (state->regs[regno].type == PTR_TO_PACKET) { - if (t == BPF_WRITE && !may_write_pkt_data(env->prog->type)) { + if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL)) { verbose("cannot write into packet\n"); return -EACCES; } @@ -950,8 +959,8 @@ static int check_func_arg(struct verifier_env *env, u32 regno, return 0; } - if (type == PTR_TO_PACKET && !may_write_pkt_data(env->prog->type)) { - verbose("helper access to the packet is not allowed for clsact\n"); + if (type == PTR_TO_PACKET && !may_access_direct_pkt_data(env, meta)) { + verbose("helper access to the packet is not allowed\n"); return -EACCES; } @@ -1191,6 +1200,7 @@ static int check_call(struct verifier_env *env, int func_id) changes_data = bpf_helper_changes_skb_data(fn->func); memset(&meta, 0, sizeof(meta)); + meta.pkt_access = fn->pkt_access; /* We only support one arg being in raw mode at the moment, which * is sufficient for the helper functions we have right now. @@ -2675,18 +2685,35 @@ static void convert_pseudo_ld_imm64(struct verifier_env *env) */ static int convert_ctx_accesses(struct verifier_env *env) { - struct bpf_insn *insn = env->prog->insnsi; - int insn_cnt = env->prog->len; - struct bpf_insn insn_buf[16]; + const struct bpf_verifier_ops *ops = env->prog->aux->ops; + struct bpf_insn insn_buf[16], *insn; struct bpf_prog *new_prog; enum bpf_access_type type; - int i; + int i, insn_cnt, cnt; - if (!env->prog->aux->ops->convert_ctx_access) + if (ops->gen_prologue) { + cnt = ops->gen_prologue(insn_buf, env->seen_direct_write, + env->prog); + if (cnt >= ARRAY_SIZE(insn_buf)) { + verbose("bpf verifier is misconfigured\n"); + return -EINVAL; + } else if (cnt) { + new_prog = bpf_patch_insn_single(env->prog, 0, + insn_buf, cnt); + if (!new_prog) + return -ENOMEM; + env->prog = new_prog; + } + } + + if (!ops->convert_ctx_access) return 0; + insn_cnt = env->prog->len; + insn = env->prog->insnsi; + for (i = 0; i < insn_cnt; i++, insn++) { - u32 insn_delta, cnt; + u32 insn_delta; if (insn->code == (BPF_LDX | BPF_MEM | BPF_W) || insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) @@ -2703,9 +2730,8 @@ static int convert_ctx_accesses(struct verifier_env *env) continue; } - cnt = env->prog->aux->ops-> - convert_ctx_access(type, insn->dst_reg, insn->src_reg, - insn->off, insn_buf, env->prog); + cnt = ops->convert_ctx_access(type, insn->dst_reg, insn->src_reg, + insn->off, insn_buf, env->prog); if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) { verbose("bpf verifier is misconfigured\n"); return -EINVAL; diff --git a/net/core/filter.c b/net/core/filter.c index 298b146b47e7..0920c2ac1d00 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1362,6 +1362,11 @@ static inline int bpf_try_make_writable(struct sk_buff *skb, return err; } +static int bpf_try_make_head_writable(struct sk_buff *skb) +{ + return bpf_try_make_writable(skb, skb_headlen(skb)); +} + static inline void bpf_push_mac_rcsum(struct sk_buff *skb) { if (skb_at_tc_ingress(skb)) @@ -1441,6 +1446,28 @@ static const struct bpf_func_proto bpf_skb_load_bytes_proto = { .arg4_type = ARG_CONST_STACK_SIZE, }; +BPF_CALL_2(bpf_skb_pull_data, struct sk_buff *, skb, u32, len) +{ + /* Idea is the following: should the needed direct read/write + * test fail during runtime, we can pull in more data and redo + * again, since implicitly, we invalidate previous checks here. + * + * Or, since we know how much we need to make read/writeable, + * this can be done once at the program beginning for direct + * access case. By this we overcome limitations of only current + * headroom being accessible. + */ + return bpf_try_make_writable(skb, len ? : skb_headlen(skb)); +} + +static const struct bpf_func_proto bpf_skb_pull_data_proto = { + .func = bpf_skb_pull_data, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +}; + BPF_CALL_5(bpf_l3_csum_replace, struct sk_buff *, skb, u32, offset, u64, from, u64, to, u64, flags) { @@ -1567,6 +1594,7 @@ BPF_CALL_5(bpf_csum_diff, __be32 *, from, u32, from_size, static const struct bpf_func_proto bpf_csum_diff_proto = { .func = bpf_csum_diff, .gpl_only = false, + .pkt_access = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_STACK, .arg2_type = ARG_CONST_STACK_SIZE_OR_ZERO, @@ -1575,6 +1603,26 @@ static const struct bpf_func_proto bpf_csum_diff_proto = { .arg5_type = ARG_ANYTHING, }; +BPF_CALL_2(bpf_csum_update, struct sk_buff *, skb, __wsum, csum) +{ + /* The interface is to be used in combination with bpf_csum_diff() + * for direct packet writes. csum rotation for alignment as well + * as emulating csum_sub() can be done from the eBPF program. + */ + if (skb->ip_summed == CHECKSUM_COMPLETE) + return (skb->csum = csum_add(skb->csum, csum)); + + return -ENOTSUPP; +} + +static const struct bpf_func_proto bpf_csum_update_proto = { + .func = bpf_csum_update, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +}; + static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb) { return dev_forward_skb(dev, skb); @@ -1602,6 +1650,8 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb) BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags) { struct net_device *dev; + struct sk_buff *clone; + int ret; if (unlikely(flags & ~(BPF_F_INGRESS))) return -EINVAL; @@ -1610,14 +1660,25 @@ BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags) if (unlikely(!dev)) return -EINVAL; - skb = skb_clone(skb, GFP_ATOMIC); - if (unlikely(!skb)) + clone = skb_clone(skb, GFP_ATOMIC); + if (unlikely(!clone)) return -ENOMEM; - bpf_push_mac_rcsum(skb); + /* For direct write, we need to keep the invariant that the skbs + * we're dealing with need to be uncloned. Should uncloning fail + * here, we need to free the just generated clone to unclone once + * again. + */ + ret = bpf_try_make_head_writable(skb); + if (unlikely(ret)) { + kfree_skb(clone); + return -ENOMEM; + } + + bpf_push_mac_rcsum(clone); return flags & BPF_F_INGRESS ? - __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb); + __bpf_rx_skb(dev, clone) : __bpf_tx_skb(dev, clone); } static const struct bpf_func_proto bpf_clone_redirect_proto = { @@ -2063,19 +2124,14 @@ static const struct bpf_func_proto bpf_skb_change_tail_proto = { bool bpf_helper_changes_skb_data(void *func) { - if (func == bpf_skb_vlan_push) - return true; - if (func == bpf_skb_vlan_pop) - return true; - if (func == bpf_skb_store_bytes) - return true; - if (func == bpf_skb_change_proto) - return true; - if (func == bpf_skb_change_tail) - return true; - if (func == bpf_l3_csum_replace) - return true; - if (func == bpf_l4_csum_replace) + if (func == bpf_skb_vlan_push || + func == bpf_skb_vlan_pop || + func == bpf_skb_store_bytes || + func == bpf_skb_change_proto || + func == bpf_skb_change_tail || + func == bpf_skb_pull_data || + func == bpf_l3_csum_replace || + func == bpf_l4_csum_replace) return true; return false; @@ -2440,8 +2496,12 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) return &bpf_skb_store_bytes_proto; case BPF_FUNC_skb_load_bytes: return &bpf_skb_load_bytes_proto; + case BPF_FUNC_skb_pull_data: + return &bpf_skb_pull_data_proto; case BPF_FUNC_csum_diff: return &bpf_csum_diff_proto; + case BPF_FUNC_csum_update: + return &bpf_csum_update_proto; case BPF_FUNC_l3_csum_replace: return &bpf_l3_csum_replace_proto; case BPF_FUNC_l4_csum_replace: @@ -2533,6 +2593,45 @@ static bool sk_filter_is_valid_access(int off, int size, return __is_valid_access(off, size, type); } +static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write, + const struct bpf_prog *prog) +{ + struct bpf_insn *insn = insn_buf; + + if (!direct_write) + return 0; + + /* if (!skb->cloned) + * goto start; + * + * (Fast-path, otherwise approximation that we might be + * a clone, do the rest in helper.) + */ + *insn++ = BPF_LDX_MEM(BPF_B, BPF_REG_6, BPF_REG_1, CLONED_OFFSET()); + *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_6, CLONED_MASK); + *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 7); + + /* ret = bpf_skb_pull_data(skb, 0); */ + *insn++ = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1); + *insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_2, BPF_REG_2); + *insn++ = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_skb_pull_data); + /* if (!ret) + * goto restore; + * return TC_ACT_SHOT; + */ + *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2); + *insn++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, TC_ACT_SHOT); + *insn++ = BPF_EXIT_INSN(); + + /* restore: */ + *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6); + /* start: */ + *insn++ = prog->insnsi[0]; + + return insn - insn_buf; +} + static bool tc_cls_act_is_valid_access(int off, int size, enum bpf_access_type type, enum bpf_reg_type *reg_type) @@ -2810,6 +2909,7 @@ static const struct bpf_verifier_ops tc_cls_act_ops = { .get_func_proto = tc_cls_act_func_proto, .is_valid_access = tc_cls_act_is_valid_access, .convert_ctx_access = tc_cls_act_convert_ctx_access, + .gen_prologue = tc_cls_act_prologue, }; static const struct bpf_verifier_ops xdp_ops = { From 7d95b0ab5bbe2dc9bf3fd99c27e80ced5bfa8acf Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 20 Sep 2016 00:26:14 +0200 Subject: [PATCH 1284/1715] bpf: add test cases for direct packet access Add couple of test cases for direct write and the negative size issue, and also adjust the direct packet access test4 since it asserts that writes are not possible, but since we've just added support for writes, we need to invert the verdict to ACCEPT, of course. Summary: 133 PASSED, 0 FAILED. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- samples/bpf/test_verifier.c | 433 +++++++++++++++++++++++++++++++++++- 1 file changed, 430 insertions(+), 3 deletions(-) diff --git a/samples/bpf/test_verifier.c b/samples/bpf/test_verifier.c index 1f6cc9b6a23b..ac590d4b7f02 100644 --- a/samples/bpf/test_verifier.c +++ b/samples/bpf/test_verifier.c @@ -290,6 +290,29 @@ static struct bpf_test tests[] = { .errstr = "invalid read from stack", .result = REJECT, }, + { + "invalid argument register", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_cgroup_classid), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_cgroup_classid), + BPF_EXIT_INSN(), + }, + .errstr = "R1 !read_ok", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "non-invalid argument register", + .insns = { + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_cgroup_classid), + BPF_ALU64_REG(BPF_MOV, BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_cgroup_classid), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, { "check valid spill/fill", .insns = { @@ -1209,6 +1232,54 @@ static struct bpf_test tests[] = { .errstr = "invalid read from stack off -8+0 size 8", .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, + { + "raw_stack: skb_load_bytes, negative len", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_4, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid stack type R3", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "raw_stack: skb_load_bytes, negative len 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_4, ~0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid stack type R3", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "raw_stack: skb_load_bytes, zero len", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid stack type R3", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, { "raw_stack: skb_load_bytes, no init", .insns = { @@ -1511,7 +1582,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, }, { - "direct packet access: test4", + "direct packet access: test4 (write)", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct __sk_buff, data)), @@ -1524,8 +1595,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr = "cannot write", - .result = REJECT, + .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { @@ -1630,6 +1700,26 @@ static struct bpf_test tests[] = { .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, + { + "direct packet access: test10 (write invalid)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid access to packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, { "helper access to packet: test1, valid packet_ptr range", .insns = { @@ -1736,6 +1826,343 @@ static struct bpf_test tests[] = { .errstr = "invalid access to packet", .prog_type = BPF_PROG_TYPE_XDP, }, + { + "helper access to packet: test6, cls valid packet_ptr range", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 5), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_2), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_update_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup = {5}, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test7, cls unchecked packet_ptr", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup = {1}, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test8, cls variable add", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 10), + BPF_LDX_MEM(BPF_B, BPF_REG_5, BPF_REG_2, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_5), + BPF_MOV64_REG(BPF_REG_5, BPF_REG_4), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_3, 4), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup = {11}, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test9, cls packet_ptr with bad range", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup = {7}, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test10, cls packet_ptr with too short range", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup = {6}, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test11, cls unsuitable helper 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_7, 4), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_4, 42), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_store_bytes), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "helper access to the packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test12, cls unsuitable helper 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 3), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_4, 4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "helper access to the packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test13, cls helper ok", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test14, cls helper fail sub", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 4), + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "type=inv expected=fp", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test15, cls helper fail range 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_2, 8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test16, cls helper fail range 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_2, -9), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test17, cls helper fail range 3", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_2, ~0), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test18, cls helper fail range zero", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test19, pkt end as input", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R1 type=pkt_end expected=fp", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test20, wrong reg", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, }; static int probe_filter_length(struct bpf_insn *fp) From aecc5cefc389735b5327d234e11d1fe505e1c280 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Mon, 19 Sep 2016 19:02:51 -0400 Subject: [PATCH 1285/1715] net sched actions: fix GETing actions With the batch changes that translated transient actions into a temporary list lost in the translation was the fact that tcf_action_destroy() will eventually delete the action from the permanent location if the refcount is zero. Example of what broke: ...add a gact action to drop sudo $TC actions add action drop index 10 ...now retrieve it, looks good sudo $TC actions get action gact index 10 ...retrieve it again and find it is gone! sudo $TC actions get action gact index 10 Fixes: 22dc13c837c3 ("net_sched: convert tcf_exts from list to pointer array"), Fixes: 824a7e8863b3 ("net_sched: remove an unnecessary list_del()") Fixes: f07fed82ad79 ("net_sched: remove the leftover cleanup_a()") Acked-by: Cong Wang Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_api.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index d0aceb1740b1..c9102172ce3b 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -592,6 +592,17 @@ err_out: return ERR_PTR(err); } +static void cleanup_a(struct list_head *actions, int ovr) +{ + struct tc_action *a; + + if (!ovr) + return; + + list_for_each_entry(a, actions, list) + a->tcfa_refcnt--; +} + int tcf_action_init(struct net *net, struct nlattr *nla, struct nlattr *est, char *name, int ovr, int bind, struct list_head *actions) { @@ -611,8 +622,15 @@ int tcf_action_init(struct net *net, struct nlattr *nla, struct nlattr *est, goto err; } act->order = i; + if (ovr) + act->tcfa_refcnt++; list_add_tail(&act->list, actions); } + + /* Remove the temp refcnt which was necessary to protect against + * destroying an existing action which was being replaced + */ + cleanup_a(actions, ovr); return 0; err: @@ -882,6 +900,8 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, goto err; } act->order = i; + if (event == RTM_GETACTION) + act->tcfa_refcnt++; list_add_tail(&act->list, &actions); } From 8847293992606677d5e446d1e712bd128ea7977f Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Mon, 19 Sep 2016 19:56:11 -0400 Subject: [PATCH 1286/1715] net: dsa: mv88e6xxx: handle multiple ports in ATU An address can be loaded in the ATU with multiple ports, for instance when adding multiple ports to a Multicast group with "bridge mdb". The current code doesn't allow that. Add an helper to get a single entry from the ATU, then set or clear the requested port, before loading the entry back in the ATU. Note that the required _mv88e6xxx_atu_getnext function is defined below mv88e6xxx_port_db_load_purge, so forward-declare it for the moment. The ATU code will be isolated in future patches. Fixes: 83dabd1fa84c ("net: dsa: mv88e6xxx: make switchdev DB ops generic") Signed-off-by: Vivien Didelot Reviewed-by: Andrew Lunn Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 56 ++++++++++++++++++++++++++++---- 1 file changed, 49 insertions(+), 7 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 70a812d159c9..1d71802396fb 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -2091,12 +2091,48 @@ static int _mv88e6xxx_atu_load(struct mv88e6xxx_chip *chip, return _mv88e6xxx_atu_cmd(chip, entry->fid, GLOBAL_ATU_OP_LOAD_DB); } +static int _mv88e6xxx_atu_getnext(struct mv88e6xxx_chip *chip, u16 fid, + struct mv88e6xxx_atu_entry *entry); + +static int mv88e6xxx_atu_get(struct mv88e6xxx_chip *chip, int fid, + const u8 *addr, struct mv88e6xxx_atu_entry *entry) +{ + struct mv88e6xxx_atu_entry next; + int err; + + eth_broadcast_addr(next.mac); + + err = _mv88e6xxx_atu_mac_write(chip, next.mac); + if (err) + return err; + + do { + err = _mv88e6xxx_atu_getnext(chip, fid, &next); + if (err) + return err; + + if (next.state == GLOBAL_ATU_DATA_STATE_UNUSED) + break; + + if (ether_addr_equal(next.mac, addr)) { + *entry = next; + return 0; + } + } while (!is_broadcast_ether_addr(next.mac)); + + memset(entry, 0, sizeof(*entry)); + entry->fid = fid; + ether_addr_copy(entry->mac, addr); + + return 0; +} + static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port, const unsigned char *addr, u16 vid, u8 state) { - struct mv88e6xxx_atu_entry entry = { 0 }; struct mv88e6xxx_vtu_stu_entry vlan; + struct mv88e6xxx_atu_entry entry; int err; /* Null VLAN ID corresponds to the port private database */ @@ -2107,12 +2143,18 @@ static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port, if (err) return err; - entry.fid = vlan.fid; - entry.state = state; - ether_addr_copy(entry.mac, addr); - if (state != GLOBAL_ATU_DATA_STATE_UNUSED) { - entry.trunk = false; - entry.portv_trunkid = BIT(port); + err = mv88e6xxx_atu_get(chip, vlan.fid, addr, &entry); + if (err) + return err; + + /* Purge the ATU entry only if no port is using it anymore */ + if (state == GLOBAL_ATU_DATA_STATE_UNUSED) { + entry.portv_trunkid &= ~BIT(port); + if (!entry.portv_trunkid) + entry.state = GLOBAL_ATU_DATA_STATE_UNUSED; + } else { + entry.portv_trunkid |= BIT(port); + entry.state = state; } return _mv88e6xxx_atu_load(chip, &entry); From 94d308d060cd3ee65152b8ebd7a1c24fa86eee82 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Tue, 20 Sep 2016 11:26:48 +0800 Subject: [PATCH 1287/1715] net: ethernet: mediatek: enhance with avoiding superfluous assignment inside mtk_get_ethtool_stats data_src is unchanged inside the loop, so this patch moves the assignment to outside the loop to avoid unnecessarily assignment Signed-off-by: Sean Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 481f3609a1ea..ca6b5013e275 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -2137,8 +2137,9 @@ static void mtk_get_ethtool_stats(struct net_device *dev, } } + data_src = (u64 *)hwstats; + do { - data_src = (u64 *)hwstats; data_dst = data; start = u64_stats_fetch_begin_irq(&hwstats->syncp); From f78e73e27fdeab6f9317667f7e9676b59c1ec1fb Mon Sep 17 00:00:00 2001 From: Soheil Hassas Yeganeh Date: Mon, 19 Sep 2016 23:39:08 -0400 Subject: [PATCH 1288/1715] tcp: cdg: rename struct minmax in tcp_cdg.c to avoid a naming conflict The upcoming change "lib/win_minmax: windowed min or max estimator" introduces a struct called minmax, which is then included in include/linux/tcp.h in the upcoming change "tcp: use windowed min filter library for TCP min_rtt estimation". This would create a compilation error for tcp_cdg.c, which defines its own minmax struct. To avoid this naming conflict (and potentially others in the future), this commit renames the version used in tcp_cdg.c to cdg_minmax. Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Cc: Kenneth Klette Jonassen Acked-by: Kenneth Klette Jonassen Signed-off-by: David S. Miller --- net/ipv4/tcp_cdg.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c index 03725b294286..35b280361cb2 100644 --- a/net/ipv4/tcp_cdg.c +++ b/net/ipv4/tcp_cdg.c @@ -56,7 +56,7 @@ MODULE_PARM_DESC(use_shadow, "use shadow window heuristic"); module_param(use_tolerance, bool, 0644); MODULE_PARM_DESC(use_tolerance, "use loss tolerance heuristic"); -struct minmax { +struct cdg_minmax { union { struct { s32 min; @@ -74,10 +74,10 @@ enum cdg_state { }; struct cdg { - struct minmax rtt; - struct minmax rtt_prev; - struct minmax *gradients; - struct minmax gsum; + struct cdg_minmax rtt; + struct cdg_minmax rtt_prev; + struct cdg_minmax *gradients; + struct cdg_minmax gsum; bool gfilled; u8 tail; u8 state; @@ -353,7 +353,7 @@ static void tcp_cdg_cwnd_event(struct sock *sk, const enum tcp_ca_event ev) { struct cdg *ca = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); - struct minmax *gradients; + struct cdg_minmax *gradients; switch (ev) { case CA_EVENT_CWND_RESTART: From a4f1f9ac8153e22869b6408832b5a9bb9c762bf6 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 19 Sep 2016 23:39:09 -0400 Subject: [PATCH 1289/1715] lib/win_minmax: windowed min or max estimator This commit introduces a generic library to estimate either the min or max value of a time-varying variable over a recent time window. This is code originally from Kathleen Nichols. The current form of the code is from Van Jacobson. A single struct minmax_sample will track the estimated windowed-max value of the series if you call minmax_running_max() or the estimated windowed-min value of the series if you call minmax_running_min(). Nearly equivalent code is already in place for minimum RTT estimation in the TCP stack. This commit extracts that code and generalizes it to handle both min and max. Moving the code here reduces the footprint and complexity of the TCP code base and makes the filter generally available for other parts of the codebase, including an upcoming TCP congestion control module. This library works well for time series where the measurements are smoothly increasing or decreasing. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/linux/win_minmax.h | 37 ++++++++++++++ lib/Makefile | 2 +- lib/win_minmax.c | 98 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 136 insertions(+), 1 deletion(-) create mode 100644 include/linux/win_minmax.h create mode 100644 lib/win_minmax.c diff --git a/include/linux/win_minmax.h b/include/linux/win_minmax.h new file mode 100644 index 000000000000..56569604278f --- /dev/null +++ b/include/linux/win_minmax.h @@ -0,0 +1,37 @@ +/** + * lib/minmax.c: windowed min/max tracker by Kathleen Nichols. + * + */ +#ifndef MINMAX_H +#define MINMAX_H + +#include + +/* A single data point for our parameterized min-max tracker */ +struct minmax_sample { + u32 t; /* time measurement was taken */ + u32 v; /* value measured */ +}; + +/* State for the parameterized min-max tracker */ +struct minmax { + struct minmax_sample s[3]; +}; + +static inline u32 minmax_get(const struct minmax *m) +{ + return m->s[0].v; +} + +static inline u32 minmax_reset(struct minmax *m, u32 t, u32 meas) +{ + struct minmax_sample val = { .t = t, .v = meas }; + + m->s[2] = m->s[1] = m->s[0] = val; + return m->s[0].v; +} + +u32 minmax_running_max(struct minmax *m, u32 win, u32 t, u32 meas); +u32 minmax_running_min(struct minmax *m, u32 win, u32 t, u32 meas); + +#endif diff --git a/lib/Makefile b/lib/Makefile index 5dc77a8ec297..df747e5eeb7a 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -22,7 +22,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ sha1.o chacha20.o md5.o irq_regs.o argv_split.o \ flex_proportions.o ratelimit.o show_mem.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ - earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o + earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o win_minmax.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o diff --git a/lib/win_minmax.c b/lib/win_minmax.c new file mode 100644 index 000000000000..c8420d404926 --- /dev/null +++ b/lib/win_minmax.c @@ -0,0 +1,98 @@ +/** + * lib/minmax.c: windowed min/max tracker + * + * Kathleen Nichols' algorithm for tracking the minimum (or maximum) + * value of a data stream over some fixed time interval. (E.g., + * the minimum RTT over the past five minutes.) It uses constant + * space and constant time per update yet almost always delivers + * the same minimum as an implementation that has to keep all the + * data in the window. + * + * The algorithm keeps track of the best, 2nd best & 3rd best min + * values, maintaining an invariant that the measurement time of + * the n'th best >= n-1'th best. It also makes sure that the three + * values are widely separated in the time window since that bounds + * the worse case error when that data is monotonically increasing + * over the window. + * + * Upon getting a new min, we can forget everything earlier because + * it has no value - the new min is <= everything else in the window + * by definition and it's the most recent. So we restart fresh on + * every new min and overwrites 2nd & 3rd choices. The same property + * holds for 2nd & 3rd best. + */ +#include +#include + +/* As time advances, update the 1st, 2nd, and 3rd choices. */ +static u32 minmax_subwin_update(struct minmax *m, u32 win, + const struct minmax_sample *val) +{ + u32 dt = val->t - m->s[0].t; + + if (unlikely(dt > win)) { + /* + * Passed entire window without a new val so make 2nd + * choice the new val & 3rd choice the new 2nd choice. + * we may have to iterate this since our 2nd choice + * may also be outside the window (we checked on entry + * that the third choice was in the window). + */ + m->s[0] = m->s[1]; + m->s[1] = m->s[2]; + m->s[2] = *val; + if (unlikely(val->t - m->s[0].t > win)) { + m->s[0] = m->s[1]; + m->s[1] = m->s[2]; + m->s[2] = *val; + } + } else if (unlikely(m->s[1].t == m->s[0].t) && dt > win/4) { + /* + * We've passed a quarter of the window without a new val + * so take a 2nd choice from the 2nd quarter of the window. + */ + m->s[2] = m->s[1] = *val; + } else if (unlikely(m->s[2].t == m->s[1].t) && dt > win/2) { + /* + * We've passed half the window without finding a new val + * so take a 3rd choice from the last half of the window + */ + m->s[2] = *val; + } + return m->s[0].v; +} + +/* Check if new measurement updates the 1st, 2nd or 3rd choice max. */ +u32 minmax_running_max(struct minmax *m, u32 win, u32 t, u32 meas) +{ + struct minmax_sample val = { .t = t, .v = meas }; + + if (unlikely(val.v >= m->s[0].v) || /* found new max? */ + unlikely(val.t - m->s[2].t > win)) /* nothing left in window? */ + return minmax_reset(m, t, meas); /* forget earlier samples */ + + if (unlikely(val.v >= m->s[1].v)) + m->s[2] = m->s[1] = val; + else if (unlikely(val.v >= m->s[2].v)) + m->s[2] = val; + + return minmax_subwin_update(m, win, &val); +} +EXPORT_SYMBOL(minmax_running_max); + +/* Check if new measurement updates the 1st, 2nd or 3rd choice min. */ +u32 minmax_running_min(struct minmax *m, u32 win, u32 t, u32 meas) +{ + struct minmax_sample val = { .t = t, .v = meas }; + + if (unlikely(val.v <= m->s[0].v) || /* found new min? */ + unlikely(val.t - m->s[2].t > win)) /* nothing left in window? */ + return minmax_reset(m, t, meas); /* forget earlier samples */ + + if (unlikely(val.v <= m->s[1].v)) + m->s[2] = m->s[1] = val; + else if (unlikely(val.v <= m->s[2].v)) + m->s[2] = val; + + return minmax_subwin_update(m, win, &val); +} From 6403389211e1f4d40ed963fe47a96fce1a3ba7a9 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 19 Sep 2016 23:39:10 -0400 Subject: [PATCH 1290/1715] tcp: use windowed min filter library for TCP min_rtt estimation Refactor the TCP min_rtt code to reuse the new win_minmax library in lib/win_minmax.c to simplify the TCP code. This is a pure refactor: the functionality is exactly the same. We just moved the windowed min code to make TCP easier to read and maintain, and to allow other parts of the kernel to use the windowed min/max filter code. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/linux/tcp.h | 5 ++-- include/net/tcp.h | 2 +- net/ipv4/tcp.c | 2 +- net/ipv4/tcp_input.c | 62 +++------------------------------------- net/ipv4/tcp_minisocks.c | 2 +- 5 files changed, 9 insertions(+), 64 deletions(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index c723a465125d..6433cc8b4667 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -19,6 +19,7 @@ #include +#include #include #include #include @@ -234,9 +235,7 @@ struct tcp_sock { u32 mdev_max_us; /* maximal mdev for the last rtt period */ u32 rttvar_us; /* smoothed mdev_max */ u32 rtt_seq; /* sequence number to update rttvar */ - struct rtt_meas { - u32 rtt, ts; /* RTT in usec and sampling time in jiffies. */ - } rtt_min[3]; + struct minmax rtt_min; u32 packets_out; /* Packets which are "in flight" */ u32 retrans_out; /* Retransmitted packets out */ diff --git a/include/net/tcp.h b/include/net/tcp.h index fdfbedd61c67..2f1648af4d12 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -671,7 +671,7 @@ static inline bool tcp_ca_dst_locked(const struct dst_entry *dst) /* Minimum RTT in usec. ~0 means not available. */ static inline u32 tcp_min_rtt(const struct tcp_sock *tp) { - return tp->rtt_min[0].rtt; + return minmax_get(&tp->rtt_min); } /* Compute the actual receive window we are currently advertising. diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 7dae800092e6..e79ed17ccfd6 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -387,7 +387,7 @@ void tcp_init_sock(struct sock *sk) icsk->icsk_rto = TCP_TIMEOUT_INIT; tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); - tp->rtt_min[0].rtt = ~0U; + minmax_reset(&tp->rtt_min, tcp_time_stamp, ~0U); /* So many TCP implementations out there (incorrectly) count the * initial SYN frame in their delayed-ACK and congestion control diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index dad3e7eeed94..6886f386464f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2879,67 +2879,13 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked, *rexmit = REXMIT_LOST; } -/* Kathleen Nichols' algorithm for tracking the minimum value of - * a data stream over some fixed time interval. (E.g., the minimum - * RTT over the past five minutes.) It uses constant space and constant - * time per update yet almost always delivers the same minimum as an - * implementation that has to keep all the data in the window. - * - * The algorithm keeps track of the best, 2nd best & 3rd best min - * values, maintaining an invariant that the measurement time of the - * n'th best >= n-1'th best. It also makes sure that the three values - * are widely separated in the time window since that bounds the worse - * case error when that data is monotonically increasing over the window. - * - * Upon getting a new min, we can forget everything earlier because it - * has no value - the new min is <= everything else in the window by - * definition and it's the most recent. So we restart fresh on every new min - * and overwrites 2nd & 3rd choices. The same property holds for 2nd & 3rd - * best. - */ static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us) { - const u32 now = tcp_time_stamp, wlen = sysctl_tcp_min_rtt_wlen * HZ; - struct rtt_meas *m = tcp_sk(sk)->rtt_min; - struct rtt_meas rttm = { - .rtt = likely(rtt_us) ? rtt_us : jiffies_to_usecs(1), - .ts = now, - }; - u32 elapsed; + struct tcp_sock *tp = tcp_sk(sk); + u32 wlen = sysctl_tcp_min_rtt_wlen * HZ; - /* Check if the new measurement updates the 1st, 2nd, or 3rd choices */ - if (unlikely(rttm.rtt <= m[0].rtt)) - m[0] = m[1] = m[2] = rttm; - else if (rttm.rtt <= m[1].rtt) - m[1] = m[2] = rttm; - else if (rttm.rtt <= m[2].rtt) - m[2] = rttm; - - elapsed = now - m[0].ts; - if (unlikely(elapsed > wlen)) { - /* Passed entire window without a new min so make 2nd choice - * the new min & 3rd choice the new 2nd. So forth and so on. - */ - m[0] = m[1]; - m[1] = m[2]; - m[2] = rttm; - if (now - m[0].ts > wlen) { - m[0] = m[1]; - m[1] = rttm; - if (now - m[0].ts > wlen) - m[0] = rttm; - } - } else if (m[1].ts == m[0].ts && elapsed > wlen / 4) { - /* Passed a quarter of the window without a new min so - * take 2nd choice from the 2nd quarter of the window. - */ - m[2] = m[1] = rttm; - } else if (m[2].ts == m[1].ts && elapsed > wlen / 2) { - /* Passed half the window without a new min so take the 3rd - * choice from the last half of the window. - */ - m[2] = rttm; - } + minmax_running_min(&tp->rtt_min, wlen, tcp_time_stamp, + rtt_us ? : jiffies_to_usecs(1)); } static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index f63c73dc0acb..568947110b60 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -464,7 +464,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->srtt_us = 0; newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); - newtp->rtt_min[0].rtt = ~0U; + minmax_reset(&newtp->rtt_min, tcp_time_stamp, ~0U); newicsk->icsk_rto = TCP_TIMEOUT_INIT; newtp->packets_out = 0; From 77879147a3481babffd7e368d977ab682545a6bd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 19 Sep 2016 23:39:11 -0400 Subject: [PATCH 1291/1715] net_sched: sch_fq: add low_rate_threshold parameter This commit adds to the fq module a low_rate_threshold parameter to insert a delay after all packets if the socket requests a pacing rate below the threshold. This helps achieve more precise control of the sending rate with low-rate paths, especially policers. The basic issue is that if a congestion control module detects a policer at a certain rate, it may want fq to be able to shape to that policed rate. That way the sender can avoid policer drops by having the packets arrive at the policer at or just under the policed rate. The default threshold of 550Kbps was chosen analytically so that for policers or links at 500Kbps or 512Kbps fq would very likely invoke this mechanism, even if the pacing rate was briefly slightly above the available bandwidth. This value was then empirically validated with two years of production testing on YouTube video servers. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 2 ++ net/sched/sch_fq.c | 22 +++++++++++++++++++--- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 2382eed50278..f8e39dbaa781 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -792,6 +792,8 @@ enum { TCA_FQ_ORPHAN_MASK, /* mask applied to orphaned skb hashes */ + TCA_FQ_LOW_RATE_THRESHOLD, /* per packet delay under this rate */ + __TCA_FQ_MAX }; diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index dc52cc10d6ed..5dd929cc1423 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -94,6 +94,7 @@ struct fq_sched_data { u32 flow_max_rate; /* optional max rate per flow */ u32 flow_plimit; /* max packets per flow */ u32 orphan_mask; /* mask for orphaned skb */ + u32 low_rate_threshold; struct rb_root *fq_root; u8 rate_enable; u8 fq_trees_log; @@ -433,7 +434,7 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch) struct fq_flow_head *head; struct sk_buff *skb; struct fq_flow *f; - u32 rate; + u32 rate, plen; skb = fq_dequeue_head(sch, &q->internal); if (skb) @@ -482,7 +483,7 @@ begin: prefetch(&skb->end); f->credit -= qdisc_pkt_len(skb); - if (f->credit > 0 || !q->rate_enable) + if (!q->rate_enable) goto out; /* Do not pace locally generated ack packets */ @@ -493,8 +494,15 @@ begin: if (skb->sk) rate = min(skb->sk->sk_pacing_rate, rate); + if (rate <= q->low_rate_threshold) { + f->credit = 0; + plen = qdisc_pkt_len(skb); + } else { + plen = max(qdisc_pkt_len(skb), q->quantum); + if (f->credit > 0) + goto out; + } if (rate != ~0U) { - u32 plen = max(qdisc_pkt_len(skb), q->quantum); u64 len = (u64)plen * NSEC_PER_SEC; if (likely(rate)) @@ -662,6 +670,7 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = { [TCA_FQ_FLOW_MAX_RATE] = { .type = NLA_U32 }, [TCA_FQ_BUCKETS_LOG] = { .type = NLA_U32 }, [TCA_FQ_FLOW_REFILL_DELAY] = { .type = NLA_U32 }, + [TCA_FQ_LOW_RATE_THRESHOLD] = { .type = NLA_U32 }, }; static int fq_change(struct Qdisc *sch, struct nlattr *opt) @@ -716,6 +725,10 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt) if (tb[TCA_FQ_FLOW_MAX_RATE]) q->flow_max_rate = nla_get_u32(tb[TCA_FQ_FLOW_MAX_RATE]); + if (tb[TCA_FQ_LOW_RATE_THRESHOLD]) + q->low_rate_threshold = + nla_get_u32(tb[TCA_FQ_LOW_RATE_THRESHOLD]); + if (tb[TCA_FQ_RATE_ENABLE]) { u32 enable = nla_get_u32(tb[TCA_FQ_RATE_ENABLE]); @@ -781,6 +794,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt) q->fq_root = NULL; q->fq_trees_log = ilog2(1024); q->orphan_mask = 1024 - 1; + q->low_rate_threshold = 550000 / 8; qdisc_watchdog_init(&q->watchdog, sch); if (opt) @@ -811,6 +825,8 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) nla_put_u32(skb, TCA_FQ_FLOW_REFILL_DELAY, jiffies_to_usecs(q->flow_refill_delay)) || nla_put_u32(skb, TCA_FQ_ORPHAN_MASK, q->orphan_mask) || + nla_put_u32(skb, TCA_FQ_LOW_RATE_THRESHOLD, + q->low_rate_threshold) || nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log)) goto nla_put_failure; From b2d3ea4a730f812b9c0f67a67b6762ce66ddb17c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 19 Sep 2016 23:39:12 -0400 Subject: [PATCH 1292/1715] tcp: switch back to proper tcp_skb_cb size check in tcp_init() Revert to the tcp_skb_cb size check that tcp_init() had before commit b4772ef879a8 ("net: use common macro for assering skb->cb[] available size in protocol families"). As related commit 744d5a3e9fe2 ("net: move skb->dropcount to skb->cb[]") explains, the sock_skb_cb_check_size() mechanism was added to ensure that there is space for dropcount, "for protocol families using it". But TCP is not a protocol using dropcount, so tcp_init() doesn't need to provision space for dropcount in the skb->cb[], and thus we can revert to the older form of the tcp_skb_cb size check. Doing so allows TCP to use 4 more bytes of the skb->cb[] space. Fixes: b4772ef879a8 ("net: use common macro for assering skb->cb[] available size in protocol families") Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e79ed17ccfd6..de02fb4b1349 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3261,11 +3261,12 @@ static void __init tcp_init_mem(void) void __init tcp_init(void) { - unsigned long limit; int max_rshare, max_wshare, cnt; + unsigned long limit; unsigned int i; - sock_skb_cb_check_size(sizeof(struct tcp_skb_cb)); + BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > + FIELD_SIZEOF(struct sk_buff, cb)); percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL); percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL); From 0682e6902a52aca7caf6ad42551b16ea0f87bc31 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 19 Sep 2016 23:39:13 -0400 Subject: [PATCH 1293/1715] tcp: count packets marked lost for a TCP connection Count the number of packets that a TCP connection marks lost. Congestion control modules can use this loss rate information for more intelligent decisions about how fast to send. Specifically, this is used in TCP BBR policer detection. BBR uses a high packet loss rate as one signal in its policer detection and policer bandwidth estimation algorithm. The BBR policer detection algorithm cannot simply track retransmits, because a retransmit can be (and often is) an indicator of packets lost long, long ago. This is particularly true in a long CA_Loss period that repairs the initial massive losses when a policer kicks in. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + net/ipv4/tcp_input.c | 25 ++++++++++++++++++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 6433cc8b4667..38590fbc0ac5 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -267,6 +267,7 @@ struct tcp_sock { * receiver in Recovery. */ u32 prr_out; /* Total number of pkts sent during Recovery. */ u32 delivered; /* Total data packets delivered incl. rexmits */ + u32 lost; /* Total data packets lost incl. rexmits */ u32 rcv_wnd; /* Current receiver window */ u32 write_seq; /* Tail(+1) of data held in tcp send buffer */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 6886f386464f..9413288c2778 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -899,12 +899,29 @@ static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb) tp->retransmit_high = TCP_SKB_CB(skb)->end_seq; } +/* Sum the number of packets on the wire we have marked as lost. + * There are two cases we care about here: + * a) Packet hasn't been marked lost (nor retransmitted), + * and this is the first loss. + * b) Packet has been marked both lost and retransmitted, + * and this means we think it was lost again. + */ +static void tcp_sum_lost(struct tcp_sock *tp, struct sk_buff *skb) +{ + __u8 sacked = TCP_SKB_CB(skb)->sacked; + + if (!(sacked & TCPCB_LOST) || + ((sacked & TCPCB_LOST) && (sacked & TCPCB_SACKED_RETRANS))) + tp->lost += tcp_skb_pcount(skb); +} + static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb) { if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) { tcp_verify_retransmit_hint(tp, skb); tp->lost_out += tcp_skb_pcount(skb); + tcp_sum_lost(tp, skb); TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; } } @@ -913,6 +930,7 @@ void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb) { tcp_verify_retransmit_hint(tp, skb); + tcp_sum_lost(tp, skb); if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) { tp->lost_out += tcp_skb_pcount(skb); TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; @@ -1890,6 +1908,7 @@ void tcp_enter_loss(struct sock *sk) struct sk_buff *skb; bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery; bool is_reneg; /* is receiver reneging on SACKs? */ + bool mark_lost; /* Reduce ssthresh if it has not yet been made inside this window. */ if (icsk->icsk_ca_state <= TCP_CA_Disorder || @@ -1923,8 +1942,12 @@ void tcp_enter_loss(struct sock *sk) if (skb == tcp_send_head(sk)) break; + mark_lost = (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) || + is_reneg); + if (mark_lost) + tcp_sum_lost(tp, skb); TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED; - if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || is_reneg) { + if (mark_lost) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED; TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; tp->lost_out += tcp_skb_pcount(skb); From b9f64820fb226a4e8ab10591f46cecd91ca56b30 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Mon, 19 Sep 2016 23:39:14 -0400 Subject: [PATCH 1294/1715] tcp: track data delivery rate for a TCP connection This patch generates data delivery rate (throughput) samples on a per-ACK basis. These rate samples can be used by congestion control modules, and specifically will be used by TCP BBR in later patches in this series. Key state: tp->delivered: Tracks the total number of data packets (original or not) delivered so far. This is an already-existing field. tp->delivered_mstamp: the last time tp->delivered was updated. Algorithm: A rate sample is calculated as (d1 - d0)/(t1 - t0) on a per-ACK basis: d1: the current tp->delivered after processing the ACK t1: the current time after processing the ACK d0: the prior tp->delivered when the acked skb was transmitted t0: the prior tp->delivered_mstamp when the acked skb was transmitted When an skb is transmitted, we snapshot d0 and t0 in its control block in tcp_rate_skb_sent(). When an ACK arrives, it may SACK and ACK some skbs. For each SACKed or ACKed skb, tcp_rate_skb_delivered() updates the rate_sample struct to reflect the latest (d0, t0). Finally, tcp_rate_gen() generates a rate sample by storing (d1 - d0) in rs->delivered and (t1 - t0) in rs->interval_us. One caveat: if an skb was sent with no packets in flight, then tp->delivered_mstamp may be either invalid (if the connection is starting) or outdated (if the connection was idle). In that case, we'll re-stamp tp->delivered_mstamp. At first glance it seems t0 should always be the time when an skb was transmitted, but actually this could over-estimate the rate due to phase mismatch between transmit and ACK events. To track the delivery rate, we ensure that if packets are in flight then t0 and and t1 are times at which packets were marked delivered. If the initial and final RTTs are different then one may be corrupted by some sort of noise. The noise we see most often is sending gaps caused by delayed, compressed, or stretched acks. This either affects both RTTs equally or artificially reduces the final RTT. We approach this by recording the info we need to compute the initial RTT (duration of the "send phase" of the window) when we recorded the associated inflight. Then, for a filter to avoid bandwidth overestimates, we generalize the per-sample bandwidth computation from: bw = delivered / ack_phase_rtt to the following: bw = delivered / max(send_phase_rtt, ack_phase_rtt) In large-scale experiments, this filtering approach incorporating send_phase_rtt is effective at avoiding bandwidth overestimates due to ACK compression or stretched ACKs. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 + include/net/tcp.h | 35 +++++++++- net/ipv4/Makefile | 2 +- net/ipv4/tcp_input.c | 46 +++++++++---- net/ipv4/tcp_output.c | 4 ++ net/ipv4/tcp_rate.c | 149 ++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 222 insertions(+), 16 deletions(-) create mode 100644 net/ipv4/tcp_rate.c diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 38590fbc0ac5..c50e6aec005a 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -268,6 +268,8 @@ struct tcp_sock { u32 prr_out; /* Total number of pkts sent during Recovery. */ u32 delivered; /* Total data packets delivered incl. rexmits */ u32 lost; /* Total data packets lost incl. rexmits */ + struct skb_mstamp first_tx_mstamp; /* start of window send phase */ + struct skb_mstamp delivered_mstamp; /* time we reached "delivered" */ u32 rcv_wnd; /* Current receiver window */ u32 write_seq; /* Tail(+1) of data held in tcp send buffer */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 2f1648af4d12..b261c892605a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -763,8 +763,14 @@ struct tcp_skb_cb { __u32 ack_seq; /* Sequence number ACK'd */ union { struct { - /* There is space for up to 20 bytes */ + /* There is space for up to 24 bytes */ __u32 in_flight;/* Bytes in flight when packet sent */ + /* pkts S/ACKed so far upon tx of skb, incl retrans: */ + __u32 delivered; + /* start of send pipeline phase */ + struct skb_mstamp first_tx_mstamp; + /* when we reached the "delivered" count */ + struct skb_mstamp delivered_mstamp; } tx; /* only used for outgoing skbs */ union { struct inet_skb_parm h4; @@ -860,6 +866,26 @@ struct ack_sample { u32 in_flight; }; +/* A rate sample measures the number of (original/retransmitted) data + * packets delivered "delivered" over an interval of time "interval_us". + * The tcp_rate.c code fills in the rate sample, and congestion + * control modules that define a cong_control function to run at the end + * of ACK processing can optionally chose to consult this sample when + * setting cwnd and pacing rate. + * A sample is invalid if "delivered" or "interval_us" is negative. + */ +struct rate_sample { + struct skb_mstamp prior_mstamp; /* starting timestamp for interval */ + u32 prior_delivered; /* tp->delivered at "prior_mstamp" */ + s32 delivered; /* number of packets delivered over interval */ + long interval_us; /* time for tp->delivered to incr "delivered" */ + long rtt_us; /* RTT of last (S)ACKed packet (or -1) */ + int losses; /* number of packets marked lost upon ACK */ + u32 acked_sacked; /* number of packets newly (S)ACKed upon ACK */ + u32 prior_in_flight; /* in flight before this ACK */ + bool is_retrans; /* is sample from retransmission? */ +}; + struct tcp_congestion_ops { struct list_head list; u32 key; @@ -946,6 +972,13 @@ static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event) icsk->icsk_ca_ops->cwnd_event(sk, event); } +/* From tcp_rate.c */ +void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb); +void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, + struct rate_sample *rs); +void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, + struct skb_mstamp *now, struct rate_sample *rs); + /* These functions determine how the current flow behaves in respect of SACK * handling. SACK is negotiated with the peer, and therefore it can vary * between different flows. diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 24629b6f57cc..9cfff1a0bf71 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -8,7 +8,7 @@ obj-y := route.o inetpeer.o protocol.o \ inet_timewait_sock.o inet_connection_sock.o \ tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ tcp_minisocks.o tcp_cong.o tcp_metrics.o tcp_fastopen.o \ - tcp_recovery.o \ + tcp_rate.o tcp_recovery.o \ tcp_offload.o datagram.o raw.o udp.o udplite.o \ udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \ fib_frontend.o fib_semantics.o fib_trie.o \ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9413288c2778..d9ed4bb96f74 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1112,6 +1112,7 @@ struct tcp_sacktag_state { */ struct skb_mstamp first_sackt; struct skb_mstamp last_sackt; + struct rate_sample *rate; int flag; }; @@ -1279,6 +1280,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked, start_seq, end_seq, dup_sack, pcount, &skb->skb_mstamp); + tcp_rate_skb_delivered(sk, skb, state->rate); if (skb == tp->lost_skb_hint) tp->lost_cnt_hint += pcount; @@ -1329,6 +1331,9 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, tcp_advance_highest_sack(sk, skb); tcp_skb_collapse_tstamp(prev, skb); + if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp.v64)) + TCP_SKB_CB(prev)->tx.delivered_mstamp.v64 = 0; + tcp_unlink_write_queue(skb, sk); sk_wmem_free_skb(sk, skb); @@ -1558,6 +1563,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, dup_sack, tcp_skb_pcount(skb), &skb->skb_mstamp); + tcp_rate_skb_delivered(sk, skb, state->rate); if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp))) @@ -1640,8 +1646,10 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire, num_sacks, prior_snd_una); - if (found_dup_sack) + if (found_dup_sack) { state->flag |= FLAG_DSACKING_ACK; + tp->delivered++; /* A spurious retransmission is delivered */ + } /* Eliminate too old ACKs, but take into * account more or less fresh ones, they can @@ -3071,10 +3079,11 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb, */ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, u32 prior_snd_una, int *acked, - struct tcp_sacktag_state *sack) + struct tcp_sacktag_state *sack, + struct skb_mstamp *now) { const struct inet_connection_sock *icsk = inet_csk(sk); - struct skb_mstamp first_ackt, last_ackt, now; + struct skb_mstamp first_ackt, last_ackt; struct tcp_sock *tp = tcp_sk(sk); u32 prior_sacked = tp->sacked_out; u32 reord = tp->packets_out; @@ -3106,7 +3115,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, acked_pcount = tcp_tso_acked(sk, skb); if (!acked_pcount) break; - fully_acked = false; } else { /* Speedup tcp_unlink_write_queue() and next loop */ @@ -3142,6 +3150,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, tp->packets_out -= acked_pcount; pkts_acked += acked_pcount; + tcp_rate_skb_delivered(sk, skb, sack->rate); /* Initial outgoing SYN's get put onto the write_queue * just like anything else we transmit. It is not @@ -3174,16 +3183,15 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) flag |= FLAG_SACK_RENEGING; - skb_mstamp_get(&now); if (likely(first_ackt.v64) && !(flag & FLAG_RETRANS_DATA_ACKED)) { - seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt); - ca_rtt_us = skb_mstamp_us_delta(&now, &last_ackt); + seq_rtt_us = skb_mstamp_us_delta(now, &first_ackt); + ca_rtt_us = skb_mstamp_us_delta(now, &last_ackt); } if (sack->first_sackt.v64) { - sack_rtt_us = skb_mstamp_us_delta(&now, &sack->first_sackt); - ca_rtt_us = skb_mstamp_us_delta(&now, &sack->last_sackt); + sack_rtt_us = skb_mstamp_us_delta(now, &sack->first_sackt); + ca_rtt_us = skb_mstamp_us_delta(now, &sack->last_sackt); } - + sack->rate->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet, or -1 */ rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us, ca_rtt_us); @@ -3211,7 +3219,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, tp->fackets_out -= min(pkts_acked, tp->fackets_out); } else if (skb && rtt_update && sack_rtt_us >= 0 && - sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) { + sack_rtt_us > skb_mstamp_us_delta(now, &skb->skb_mstamp)) { /* Do not re-arm RTO if the sack RTT is measured from data sent * after when the head was last (re)transmitted. Otherwise the * timeout may continue to extend in loss recovery. @@ -3548,17 +3556,21 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct tcp_sacktag_state sack_state; + struct rate_sample rs = { .prior_delivered = 0 }; u32 prior_snd_una = tp->snd_una; u32 ack_seq = TCP_SKB_CB(skb)->seq; u32 ack = TCP_SKB_CB(skb)->ack_seq; bool is_dupack = false; u32 prior_fackets; int prior_packets = tp->packets_out; - u32 prior_delivered = tp->delivered; + u32 delivered = tp->delivered; + u32 lost = tp->lost; int acked = 0; /* Number of packets newly acked */ int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */ + struct skb_mstamp now; sack_state.first_sackt.v64 = 0; + sack_state.rate = &rs; /* We very likely will need to access write queue head. */ prefetchw(sk->sk_write_queue.next); @@ -3581,6 +3593,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (after(ack, tp->snd_nxt)) goto invalid_ack; + skb_mstamp_get(&now); + if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) tcp_rearm_rto(sk); @@ -3591,6 +3605,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) } prior_fackets = tp->fackets_out; + rs.prior_in_flight = tcp_packets_in_flight(tp); /* ts_recent update must be made after we are sure that the packet * is in window. @@ -3646,7 +3661,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) /* See if we can take anything off of the retransmit queue. */ flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked, - &sack_state); + &sack_state, &now); if (tcp_ack_is_dubious(sk, flag)) { is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); @@ -3663,7 +3678,10 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (icsk->icsk_pending == ICSK_TIME_RETRANS) tcp_schedule_loss_probe(sk); - tcp_cong_control(sk, ack, tp->delivered - prior_delivered, flag); + delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */ + lost = tp->lost - lost; /* freshly marked lost */ + tcp_rate_gen(sk, delivered, lost, &now, &rs); + tcp_cong_control(sk, ack, delivered, flag); tcp_xmit_recovery(sk, rexmit); return 1; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8b45794eb6b2..e02c8ebf3ed4 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -918,6 +918,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, skb_mstamp_get(&skb->skb_mstamp); TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq - tp->snd_una; + tcp_rate_skb_sent(sk, skb); if (unlikely(skb_cloned(skb))) skb = pskb_copy(skb, gfp_mask); @@ -1213,6 +1214,9 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, tcp_set_skb_tso_segs(skb, mss_now); tcp_set_skb_tso_segs(buff, mss_now); + /* Update delivered info for the new segment */ + TCP_SKB_CB(buff)->tx = TCP_SKB_CB(skb)->tx; + /* If this packet has been sent out already, we must * adjust the various packet counters. */ diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c new file mode 100644 index 000000000000..1daed6af6e80 --- /dev/null +++ b/net/ipv4/tcp_rate.c @@ -0,0 +1,149 @@ +#include + +/* The bandwidth estimator estimates the rate at which the network + * can currently deliver outbound data packets for this flow. At a high + * level, it operates by taking a delivery rate sample for each ACK. + * + * A rate sample records the rate at which the network delivered packets + * for this flow, calculated over the time interval between the transmission + * of a data packet and the acknowledgment of that packet. + * + * Specifically, over the interval between each transmit and corresponding ACK, + * the estimator generates a delivery rate sample. Typically it uses the rate + * at which packets were acknowledged. However, the approach of using only the + * acknowledgment rate faces a challenge under the prevalent ACK decimation or + * compression: packets can temporarily appear to be delivered much quicker + * than the bottleneck rate. Since it is physically impossible to do that in a + * sustained fashion, when the estimator notices that the ACK rate is faster + * than the transmit rate, it uses the latter: + * + * send_rate = #pkts_delivered/(last_snd_time - first_snd_time) + * ack_rate = #pkts_delivered/(last_ack_time - first_ack_time) + * bw = min(send_rate, ack_rate) + * + * Notice the estimator essentially estimates the goodput, not always the + * network bottleneck link rate when the sending or receiving is limited by + * other factors like applications or receiver window limits. The estimator + * deliberately avoids using the inter-packet spacing approach because that + * approach requires a large number of samples and sophisticated filtering. + */ + + +/* Snapshot the current delivery information in the skb, to generate + * a rate sample later when the skb is (s)acked in tcp_rate_skb_delivered(). + */ +void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb) +{ + struct tcp_sock *tp = tcp_sk(sk); + + /* In general we need to start delivery rate samples from the + * time we received the most recent ACK, to ensure we include + * the full time the network needs to deliver all in-flight + * packets. If there are no packets in flight yet, then we + * know that any ACKs after now indicate that the network was + * able to deliver those packets completely in the sampling + * interval between now and the next ACK. + * + * Note that we use packets_out instead of tcp_packets_in_flight(tp) + * because the latter is a guess based on RTO and loss-marking + * heuristics. We don't want spurious RTOs or loss markings to cause + * a spuriously small time interval, causing a spuriously high + * bandwidth estimate. + */ + if (!tp->packets_out) { + tp->first_tx_mstamp = skb->skb_mstamp; + tp->delivered_mstamp = skb->skb_mstamp; + } + + TCP_SKB_CB(skb)->tx.first_tx_mstamp = tp->first_tx_mstamp; + TCP_SKB_CB(skb)->tx.delivered_mstamp = tp->delivered_mstamp; + TCP_SKB_CB(skb)->tx.delivered = tp->delivered; +} + +/* When an skb is sacked or acked, we fill in the rate sample with the (prior) + * delivery information when the skb was last transmitted. + * + * If an ACK (s)acks multiple skbs (e.g., stretched-acks), this function is + * called multiple times. We favor the information from the most recently + * sent skb, i.e., the skb with the highest prior_delivered count. + */ +void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, + struct rate_sample *rs) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct tcp_skb_cb *scb = TCP_SKB_CB(skb); + + if (!scb->tx.delivered_mstamp.v64) + return; + + if (!rs->prior_delivered || + after(scb->tx.delivered, rs->prior_delivered)) { + rs->prior_delivered = scb->tx.delivered; + rs->prior_mstamp = scb->tx.delivered_mstamp; + rs->is_retrans = scb->sacked & TCPCB_RETRANS; + + /* Find the duration of the "send phase" of this window: */ + rs->interval_us = skb_mstamp_us_delta( + &skb->skb_mstamp, + &scb->tx.first_tx_mstamp); + + /* Record send time of most recently ACKed packet: */ + tp->first_tx_mstamp = skb->skb_mstamp; + } + /* Mark off the skb delivered once it's sacked to avoid being + * used again when it's cumulatively acked. For acked packets + * we don't need to reset since it'll be freed soon. + */ + if (scb->sacked & TCPCB_SACKED_ACKED) + scb->tx.delivered_mstamp.v64 = 0; +} + +/* Update the connection delivery information and generate a rate sample. */ +void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, + struct skb_mstamp *now, struct rate_sample *rs) +{ + struct tcp_sock *tp = tcp_sk(sk); + u32 snd_us, ack_us; + + /* TODO: there are multiple places throughout tcp_ack() to get + * current time. Refactor the code using a new "tcp_acktag_state" + * to carry current time, flags, stats like "tcp_sacktag_state". + */ + if (delivered) + tp->delivered_mstamp = *now; + + rs->acked_sacked = delivered; /* freshly ACKed or SACKed */ + rs->losses = lost; /* freshly marked lost */ + /* Return an invalid sample if no timing information is available. */ + if (!rs->prior_mstamp.v64) { + rs->delivered = -1; + rs->interval_us = -1; + return; + } + rs->delivered = tp->delivered - rs->prior_delivered; + + /* Model sending data and receiving ACKs as separate pipeline phases + * for a window. Usually the ACK phase is longer, but with ACK + * compression the send phase can be longer. To be safe we use the + * longer phase. + */ + snd_us = rs->interval_us; /* send phase */ + ack_us = skb_mstamp_us_delta(now, &rs->prior_mstamp); /* ack phase */ + rs->interval_us = max(snd_us, ack_us); + + /* Normally we expect interval_us >= min-rtt. + * Note that rate may still be over-estimated when a spuriously + * retransmistted skb was first (s)acked because "interval_us" + * is under-estimated (up to an RTT). However continuously + * measuring the delivery rate during loss recovery is crucial + * for connections suffer heavy or prolonged losses. + */ + if (unlikely(rs->interval_us < tcp_min_rtt(tp))) { + rs->interval_us = -1; + if (!rs->is_retrans) + pr_debug("tcp rate: %ld %d %u %u %u\n", + rs->interval_us, rs->delivered, + inet_csk(sk)->icsk_ca_state, + tp->rx_opt.sack_ok, tcp_min_rtt(tp)); + } +} From d7722e8570fc0f1e003cee7cf37694041828918b Mon Sep 17 00:00:00 2001 From: Soheil Hassas Yeganeh Date: Mon, 19 Sep 2016 23:39:15 -0400 Subject: [PATCH 1295/1715] tcp: track application-limited rate samples This commit adds code to track whether the delivery rate represented by each rate_sample was limited by the application. Upon each transmit, we store in the is_app_limited field in the skb a boolean bit indicating whether there is a known "bubble in the pipe": a point in the rate sample interval where the sender was application-limited, and did not transmit even though the cwnd and pacing rate allowed it. This logic marks the flow app-limited on a write if *all* of the following are true: 1) There is less than 1 MSS of unsent data in the write queue available to transmit. 2) There is no packet in the sender's queues (e.g. in fq or the NIC tx queue). 3) The connection is not limited by cwnd. 4) There are no lost packets to retransmit. The tcp_rate_check_app_limited() code in tcp_rate.c determines whether the connection is application-limited at the moment. If the flow is application-limited, it sets the tp->app_limited field. If the flow is application-limited then that means there is effectively a "bubble" of silence in the pipe now, and this silence will be reflected in a lower bandwidth sample for any rate samples from now until we get an ACK indicating this bubble has exited the pipe: specifically, until we get an ACK for the next packet we transmit. When we send every skb we record in scb->tx.is_app_limited whether the resulting rate sample will be application-limited. The code in tcp_rate_gen() checks to see when it is safe to mark all known application-limited bubbles of silence as having exited the pipe. It does this by checking to see when the delivered count moves past the tp->app_limited marker. At this point it zeroes the tp->app_limited marker, as all known bubbles are out of the pipe. We make room for the tx.is_app_limited bit in the skb by borrowing a bit from the in_flight field used by NV to record the number of bytes in flight. The receive window in the TCP header is 16 bits, and the max receive window scaling shift factor is 14 (RFC 1323). So the max receive window offered by the TCP protocol is 2^(16+14) = 2^30. So we only need 30 bits for the tx.in_flight used by NV. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + include/net/tcp.h | 6 +++++- net/ipv4/tcp.c | 8 ++++++++ net/ipv4/tcp_minisocks.c | 3 +++ net/ipv4/tcp_rate.c | 29 ++++++++++++++++++++++++++++- 5 files changed, 45 insertions(+), 2 deletions(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index c50e6aec005a..fdcd00ffcb66 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -268,6 +268,7 @@ struct tcp_sock { u32 prr_out; /* Total number of pkts sent during Recovery. */ u32 delivered; /* Total data packets delivered incl. rexmits */ u32 lost; /* Total data packets lost incl. rexmits */ + u32 app_limited; /* limited until "delivered" reaches this val */ struct skb_mstamp first_tx_mstamp; /* start of window send phase */ struct skb_mstamp delivered_mstamp; /* time we reached "delivered" */ diff --git a/include/net/tcp.h b/include/net/tcp.h index b261c892605a..a69ed7f0030c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -764,7 +764,9 @@ struct tcp_skb_cb { union { struct { /* There is space for up to 24 bytes */ - __u32 in_flight;/* Bytes in flight when packet sent */ + __u32 in_flight:30,/* Bytes in flight at transmit */ + is_app_limited:1, /* cwnd not fully used? */ + unused:1; /* pkts S/ACKed so far upon tx of skb, incl retrans: */ __u32 delivered; /* start of send pipeline phase */ @@ -883,6 +885,7 @@ struct rate_sample { int losses; /* number of packets marked lost upon ACK */ u32 acked_sacked; /* number of packets newly (S)ACKed upon ACK */ u32 prior_in_flight; /* in flight before this ACK */ + bool is_app_limited; /* is sample from packet with bubble in pipe? */ bool is_retrans; /* is sample from retransmission? */ }; @@ -978,6 +981,7 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, struct rate_sample *rs); void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, struct skb_mstamp *now, struct rate_sample *rs); +void tcp_rate_check_app_limited(struct sock *sk); /* These functions determine how the current flow behaves in respect of SACK * handling. SACK is negotiated with the peer, and therefore it can vary diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index de02fb4b1349..2250f891f931 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -396,6 +396,9 @@ void tcp_init_sock(struct sock *sk) */ tp->snd_cwnd = TCP_INIT_CWND; + /* There's a bubble in the pipe until at least the first ACK. */ + tp->app_limited = ~0U; + /* See draft-stevens-tcpca-spec-01 for discussion of the * initialization of these values. */ @@ -1014,6 +1017,9 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset, flags); lock_sock(sk); + + tcp_rate_check_app_limited(sk); /* is sending application-limited? */ + res = do_tcp_sendpages(sk, page, offset, size, flags); release_sock(sk); return res; @@ -1115,6 +1121,8 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); + tcp_rate_check_app_limited(sk); /* is sending application-limited? */ + /* Wait for a connection to finish. One exception is TCP Fast Open * (passive side) where data is allowed to be sent before a connection * is fully established. diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 568947110b60..6234ebaa7db1 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -487,6 +487,9 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->snd_cwnd = TCP_INIT_CWND; newtp->snd_cwnd_cnt = 0; + /* There's a bubble in the pipe until at least the first ACK. */ + newtp->app_limited = ~0U; + tcp_init_xmit_timers(newsk); newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1; diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c index 1daed6af6e80..52ff84be59ab 100644 --- a/net/ipv4/tcp_rate.c +++ b/net/ipv4/tcp_rate.c @@ -26,9 +26,13 @@ * other factors like applications or receiver window limits. The estimator * deliberately avoids using the inter-packet spacing approach because that * approach requires a large number of samples and sophisticated filtering. + * + * TCP flows can often be application-limited in request/response workloads. + * The estimator marks a bandwidth sample as application-limited if there + * was some moment during the sampled window of packets when there was no data + * ready to send in the write queue. */ - /* Snapshot the current delivery information in the skb, to generate * a rate sample later when the skb is (s)acked in tcp_rate_skb_delivered(). */ @@ -58,6 +62,7 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb) TCP_SKB_CB(skb)->tx.first_tx_mstamp = tp->first_tx_mstamp; TCP_SKB_CB(skb)->tx.delivered_mstamp = tp->delivered_mstamp; TCP_SKB_CB(skb)->tx.delivered = tp->delivered; + TCP_SKB_CB(skb)->tx.is_app_limited = tp->app_limited ? 1 : 0; } /* When an skb is sacked or acked, we fill in the rate sample with the (prior) @@ -80,6 +85,7 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, after(scb->tx.delivered, rs->prior_delivered)) { rs->prior_delivered = scb->tx.delivered; rs->prior_mstamp = scb->tx.delivered_mstamp; + rs->is_app_limited = scb->tx.is_app_limited; rs->is_retrans = scb->sacked & TCPCB_RETRANS; /* Find the duration of the "send phase" of this window: */ @@ -105,6 +111,10 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, struct tcp_sock *tp = tcp_sk(sk); u32 snd_us, ack_us; + /* Clear app limited if bubble is acked and gone. */ + if (tp->app_limited && after(tp->delivered, tp->app_limited)) + tp->app_limited = 0; + /* TODO: there are multiple places throughout tcp_ack() to get * current time. Refactor the code using a new "tcp_acktag_state" * to carry current time, flags, stats like "tcp_sacktag_state". @@ -147,3 +157,20 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, tp->rx_opt.sack_ok, tcp_min_rtt(tp)); } } + +/* If a gap is detected between sends, mark the socket application-limited. */ +void tcp_rate_check_app_limited(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (/* We have less than one packet to send. */ + tp->write_seq - tp->snd_nxt < tp->mss_cache && + /* Nothing in sending host's qdisc queues or NIC tx queue. */ + sk_wmem_alloc_get(sk) < SKB_TRUESIZE(1) && + /* We are not limited by CWND. */ + tcp_packets_in_flight(tp) < tp->snd_cwnd && + /* All lost packets have been retransmitted. */ + tp->lost_out <= tp->retrans_out) + tp->app_limited = + (tp->delivered + tcp_packets_in_flight(tp)) ? : 1; +} From eb8329e0a04db0061f714f033b4454326ba147f4 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Mon, 19 Sep 2016 23:39:16 -0400 Subject: [PATCH 1296/1715] tcp: export data delivery rate This commit export two new fields in struct tcp_info: tcpi_delivery_rate: The most recent goodput, as measured by tcp_rate_gen(). If the socket is limited by the sending application (e.g., no data to send), it reports the highest measurement instead of the most recent. The unit is bytes per second (like other rate fields in tcp_info). tcpi_delivery_rate_app_limited: A boolean indicating if the goodput was measured when the socket's throughput was limited by the sending application. This delivery rate information can be useful for applications that want to know the current throughput the TCP connection is seeing, e.g. adaptive bitrate video streaming. It can also be very useful for debugging or troubleshooting. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/linux/tcp.h | 5 ++++- include/uapi/linux/tcp.h | 3 +++ net/ipv4/tcp.c | 11 ++++++++++- net/ipv4/tcp_rate.c | 12 +++++++++++- 4 files changed, 28 insertions(+), 3 deletions(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index fdcd00ffcb66..a17ae7b85218 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -213,7 +213,8 @@ struct tcp_sock { u8 reord; /* reordering detected */ } rack; u16 advmss; /* Advertised MSS */ - u8 unused; + u8 rate_app_limited:1, /* rate_{delivered,interval_us} limited? */ + unused:7; u8 nonagle : 4,/* Disable Nagle algorithm? */ thin_lto : 1,/* Use linear timeouts for thin streams */ thin_dupack : 1,/* Fast retransmit on first dupack */ @@ -271,6 +272,8 @@ struct tcp_sock { u32 app_limited; /* limited until "delivered" reaches this val */ struct skb_mstamp first_tx_mstamp; /* start of window send phase */ struct skb_mstamp delivered_mstamp; /* time we reached "delivered" */ + u32 rate_delivered; /* saved rate sample: packets delivered */ + u32 rate_interval_us; /* saved rate sample: time elapsed */ u32 rcv_wnd; /* Current receiver window */ u32 write_seq; /* Tail(+1) of data held in tcp send buffer */ diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 482898fc433a..73ac0db487f8 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -167,6 +167,7 @@ struct tcp_info { __u8 tcpi_backoff; __u8 tcpi_options; __u8 tcpi_snd_wscale : 4, tcpi_rcv_wscale : 4; + __u8 tcpi_delivery_rate_app_limited:1; __u32 tcpi_rto; __u32 tcpi_ato; @@ -211,6 +212,8 @@ struct tcp_info { __u32 tcpi_min_rtt; __u32 tcpi_data_segs_in; /* RFC4898 tcpEStatsDataSegsIn */ __u32 tcpi_data_segs_out; /* RFC4898 tcpEStatsDataSegsOut */ + + __u64 tcpi_delivery_rate; }; /* for TCP_MD5SIG socket option */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 2250f891f931..f253e5019d22 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2712,7 +2712,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) { const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */ const struct inet_connection_sock *icsk = inet_csk(sk); - u32 now = tcp_time_stamp; + u32 now = tcp_time_stamp, intv; unsigned int start; int notsent_bytes; u64 rate64; @@ -2802,6 +2802,15 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_min_rtt = tcp_min_rtt(tp); info->tcpi_data_segs_in = tp->data_segs_in; info->tcpi_data_segs_out = tp->data_segs_out; + + info->tcpi_delivery_rate_app_limited = tp->rate_app_limited ? 1 : 0; + rate = READ_ONCE(tp->rate_delivered); + intv = READ_ONCE(tp->rate_interval_us); + if (rate && intv) { + rate64 = (u64)rate * tp->mss_cache * USEC_PER_SEC; + do_div(rate64, intv); + put_unaligned(rate64, &info->tcpi_delivery_rate); + } } EXPORT_SYMBOL_GPL(tcp_get_info); diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c index 52ff84be59ab..9be1581a5a08 100644 --- a/net/ipv4/tcp_rate.c +++ b/net/ipv4/tcp_rate.c @@ -149,12 +149,22 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, * for connections suffer heavy or prolonged losses. */ if (unlikely(rs->interval_us < tcp_min_rtt(tp))) { - rs->interval_us = -1; if (!rs->is_retrans) pr_debug("tcp rate: %ld %d %u %u %u\n", rs->interval_us, rs->delivered, inet_csk(sk)->icsk_ca_state, tp->rx_opt.sack_ok, tcp_min_rtt(tp)); + rs->interval_us = -1; + return; + } + + /* Record the last non-app-limited or the highest app-limited bw */ + if (!rs->is_app_limited || + ((u64)rs->delivered * tp->rate_interval_us >= + (u64)tp->rate_delivered * rs->interval_us)) { + tp->rate_delivered = rs->delivered; + tp->rate_interval_us = rs->interval_us; + tp->rate_app_limited = rs->is_app_limited; } } From ed6e7268b930e0a9a65d895d368eac79a438d992 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 19 Sep 2016 23:39:17 -0400 Subject: [PATCH 1297/1715] tcp: allow congestion control module to request TSO skb segment count Add the tso_segs_goal() function in tcp_congestion_ops to allow the congestion control module to specify the number of segments that should be in a TSO skb sent by tcp_write_xmit() and tcp_xmit_retransmit_queue(). The congestion control module can either request a particular number of segments in TSO skb that we transmit, or return 0 if it doesn't care. This allows the upcoming BBR congestion control module to select small TSO skb sizes if the module detects that the bottleneck bandwidth is very low, or that the connection is policed to a low rate. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/net/tcp.h | 2 ++ net/ipv4/tcp_output.c | 15 +++++++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index a69ed7f0030c..f8f581fd05f5 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -913,6 +913,8 @@ struct tcp_congestion_ops { u32 (*undo_cwnd)(struct sock *sk); /* hook for packet ack accounting (optional) */ void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample); + /* suggest number of segments for each skb to transmit (optional) */ + u32 (*tso_segs_goal)(struct sock *sk); /* get info for inet_diag (optional) */ size_t (*get_info)(struct sock *sk, u32 ext, int *attr, union tcp_cc_info *info); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e02c8ebf3ed4..01379567a732 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1566,6 +1566,17 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now) return min_t(u32, segs, sk->sk_gso_max_segs); } +/* Return the number of segments we want in the skb we are transmitting. + * See if congestion control module wants to decide; otherwise, autosize. + */ +static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) +{ + const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; + u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0; + + return tso_segs ? : tcp_tso_autosize(sk, mss_now); +} + /* Returns the portion of skb which can be sent right away */ static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb, @@ -2061,7 +2072,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, } } - max_segs = tcp_tso_autosize(sk, mss_now); + max_segs = tcp_tso_segs(sk, mss_now); while ((skb = tcp_send_head(sk))) { unsigned int limit; @@ -2778,7 +2789,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) last_lost = tp->snd_una; } - max_segs = tcp_tso_autosize(sk, tcp_current_mss(sk)); + max_segs = tcp_tso_segs(sk, tcp_current_mss(sk)); tcp_for_write_queue_from(skb, sk) { __u8 sacked; int segs; From 1b3878ca1551f3baab2c408d1e703b5ef785a1b2 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 19 Sep 2016 23:39:18 -0400 Subject: [PATCH 1298/1715] tcp: export tcp_tso_autosize() and parameterize minimum number of TSO segments To allow congestion control modules to use the default TSO auto-sizing algorithm as one of the ingredients in their own decision about TSO sizing: 1) Export tcp_tso_autosize() so that CC modules can use it. 2) Change tcp_tso_autosize() to allow callers to specify a minimum number of segments per TSO skb, in case the congestion control module has a different notion of the best floor for TSO skbs for the connection right now. For very low-rate paths or policed connections it can be appropriate to use smaller TSO skbs. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/net/tcp.h | 2 ++ net/ipv4/tcp_output.c | 9 ++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index f8f581fd05f5..349204130d84 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -533,6 +533,8 @@ __u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mss); #endif /* tcp_output.c */ +u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, + int min_tso_segs); void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, int nonagle); bool tcp_may_send_now(struct sock *sk); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 01379567a732..0bf3d481fa85 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1549,7 +1549,8 @@ static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp, /* Return how many segs we'd like on a TSO packet, * to send one TSO packet per ms */ -static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now) +u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, + int min_tso_segs) { u32 bytes, segs; @@ -1561,10 +1562,11 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now) * This preserves ACK clocking and is consistent * with tcp_tso_should_defer() heuristic. */ - segs = max_t(u32, bytes / mss_now, sysctl_tcp_min_tso_segs); + segs = max_t(u32, bytes / mss_now, min_tso_segs); return min_t(u32, segs, sk->sk_gso_max_segs); } +EXPORT_SYMBOL(tcp_tso_autosize); /* Return the number of segments we want in the skb we are transmitting. * See if congestion control module wants to decide; otherwise, autosize. @@ -1574,7 +1576,8 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0; - return tso_segs ? : tcp_tso_autosize(sk, mss_now); + return tso_segs ? : + tcp_tso_autosize(sk, mss_now, sysctl_tcp_min_tso_segs); } /* Returns the portion of skb which can be sent right away */ From 556c6b46d194cc0dbb6a5b22f1d2bbc699c86d8e Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 19 Sep 2016 23:39:19 -0400 Subject: [PATCH 1299/1715] tcp: export tcp_mss_to_mtu() for congestion control modules Export tcp_mss_to_mtu(), so that congestion control modules can use this to help calculate a pacing rate. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 0bf3d481fa85..7d025a7804b5 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1362,6 +1362,7 @@ int tcp_mss_to_mtu(struct sock *sk, int mss) } return mtu; } +EXPORT_SYMBOL(tcp_mss_to_mtu); /* MTU probing init per socket */ void tcp_mtup_init(struct sock *sk) From 77bfc174c38e558a3425d3b069aa2762b2fedfdd Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Mon, 19 Sep 2016 23:39:20 -0400 Subject: [PATCH 1300/1715] tcp: allow congestion control to expand send buffer differently Currently the TCP send buffer expands to twice cwnd, in order to allow limited transmits in the CA_Recovery state. This assumes that cwnd does not increase in the CA_Recovery. For some congestion control algorithms, like the upcoming BBR module, if the losses in recovery do not indicate congestion then we may continue to raise cwnd multiplicatively in recovery. In such cases the current multiplier will falsely limit the sending rate, much as if it were limited by the application. This commit adds an optional congestion control callback to use a different multiplier to expand the TCP send buffer. For congestion control modules that do not specificy this callback, TCP continues to use the previous default of 2. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/tcp.h | 2 ++ net/ipv4/tcp_input.c | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 349204130d84..1aa9628ae608 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -917,6 +917,8 @@ struct tcp_congestion_ops { void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample); /* suggest number of segments for each skb to transmit (optional) */ u32 (*tso_segs_goal)(struct sock *sk); + /* returns the multiplier used in tcp_sndbuf_expand (optional) */ + u32 (*sndbuf_expand)(struct sock *sk); /* get info for inet_diag (optional) */ size_t (*get_info)(struct sock *sk, u32 ext, int *attr, union tcp_cc_info *info); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d9ed4bb96f74..13a2e70141f5 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -289,6 +289,7 @@ static bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr static void tcp_sndbuf_expand(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; int sndmem, per_mss; u32 nr_segs; @@ -309,7 +310,8 @@ static void tcp_sndbuf_expand(struct sock *sk) * Cubic needs 1.7 factor, rounded to 2 to include * extra cushion (application might react slowly to POLLOUT) */ - sndmem = 2 * nr_segs * per_mss; + sndmem = ca_ops->sndbuf_expand ? ca_ops->sndbuf_expand(sk) : 2; + sndmem *= nr_segs * per_mss; if (sk->sk_sndbuf < sndmem) sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]); From c0402760f565ae066621ebf8720a32fba074d538 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Mon, 19 Sep 2016 23:39:21 -0400 Subject: [PATCH 1301/1715] tcp: new CC hook to set sending rate with rate_sample in any CA state This commit introduces an optional new "omnipotent" hook, cong_control(), for congestion control modules. The cong_control() function is called at the end of processing an ACK (i.e., after updating sequence numbers, the SACK scoreboard, and loss detection). At that moment we have precise delivery rate information the congestion control module can use to control the sending behavior (using cwnd, TSO skb size, and pacing rate) in any CA state. This function can also be used by a congestion control that prefers not to use the default cwnd reduction approach (i.e., the PRR algorithm) during CA_Recovery to control the cwnd and sending rate during loss recovery. We take advantage of the fact that recent changes defer the retransmission or transmission of new data (e.g. by F-RTO) in recovery until the new tcp_cong_control() function is run. With this commit, we only run tcp_update_pacing_rate() if the congestion control is not using this new API. New congestion controls which use the new API do not want the TCP stack to run the default pacing rate calculation and overwrite whatever pacing rate they have chosen at initialization time. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/net/tcp.h | 4 ++++ net/ipv4/tcp_cong.c | 2 +- net/ipv4/tcp_input.c | 17 ++++++++++++++--- 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 1aa9628ae608..f83b7f220a65 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -919,6 +919,10 @@ struct tcp_congestion_ops { u32 (*tso_segs_goal)(struct sock *sk); /* returns the multiplier used in tcp_sndbuf_expand (optional) */ u32 (*sndbuf_expand)(struct sock *sk); + /* call when packets are delivered to update cwnd and pacing rate, + * after all the ca_state processing. (optional) + */ + void (*cong_control)(struct sock *sk, const struct rate_sample *rs); /* get info for inet_diag (optional) */ size_t (*get_info)(struct sock *sk, u32 ext, int *attr, union tcp_cc_info *info); diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 882caa4e72bc..1294af4e0127 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -69,7 +69,7 @@ int tcp_register_congestion_control(struct tcp_congestion_ops *ca) int ret = 0; /* all algorithms must implement ssthresh and cong_avoid ops */ - if (!ca->ssthresh || !ca->cong_avoid) { + if (!ca->ssthresh || !(ca->cong_avoid || ca->cong_control)) { pr_err("%s does not implement required ops\n", ca->name); return -EINVAL; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 13a2e70141f5..980a83edfa63 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2536,6 +2536,9 @@ static inline void tcp_end_cwnd_reduction(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); + if (inet_csk(sk)->icsk_ca_ops->cong_control) + return; + /* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */ if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) { @@ -3312,8 +3315,15 @@ static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag) * information. All transmission or retransmission are delayed afterwards. */ static void tcp_cong_control(struct sock *sk, u32 ack, u32 acked_sacked, - int flag) + int flag, const struct rate_sample *rs) { + const struct inet_connection_sock *icsk = inet_csk(sk); + + if (icsk->icsk_ca_ops->cong_control) { + icsk->icsk_ca_ops->cong_control(sk, rs); + return; + } + if (tcp_in_cwnd_reduction(sk)) { /* Reduce cwnd if state mandates */ tcp_cwnd_reduction(sk, acked_sacked, flag); @@ -3683,7 +3693,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */ lost = tp->lost - lost; /* freshly marked lost */ tcp_rate_gen(sk, delivered, lost, &now, &rs); - tcp_cong_control(sk, ack, delivered, flag); + tcp_cong_control(sk, ack, delivered, flag, &rs); tcp_xmit_recovery(sk, rexmit); return 1; @@ -5982,7 +5992,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) } else tcp_init_metrics(sk); - tcp_update_pacing_rate(sk); + if (!inet_csk(sk)->icsk_ca_ops->cong_control) + tcp_update_pacing_rate(sk); /* Prevent spurious tcp_cwnd_restart() on first data packet */ tp->lsndtime = tcp_time_stamp; From 7e744171386ae6da1248d3d27d10b6dbdc54f0ff Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 19 Sep 2016 23:39:22 -0400 Subject: [PATCH 1302/1715] tcp: increase ICSK_CA_PRIV_SIZE from 64 bytes to 88 The TCP CUBIC module already uses 64 bytes. The upcoming TCP BBR module uses 88 bytes. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 49dcad4fe99e..197a30d221e9 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -134,8 +134,8 @@ struct inet_connection_sock { } icsk_mtup; u32 icsk_user_timeout; - u64 icsk_ca_priv[64 / sizeof(u64)]; -#define ICSK_CA_PRIV_SIZE (8 * sizeof(u64)) + u64 icsk_ca_priv[88 / sizeof(u64)]; +#define ICSK_CA_PRIV_SIZE (11 * sizeof(u64)) }; #define ICSK_TIME_RETRANS 1 /* Retransmit timer */ From 0f8782ea14974ce992618b55f0c041ef43ed0b78 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 19 Sep 2016 23:39:23 -0400 Subject: [PATCH 1303/1715] tcp_bbr: add BBR congestion control This commit implements a new TCP congestion control algorithm: BBR (Bottleneck Bandwidth and RTT). A detailed description of BBR will be published in ACM Queue, Vol. 14 No. 5, September-October 2016, as "BBR: Congestion-Based Congestion Control". BBR has significantly increased throughput and reduced latency for connections on Google's internal backbone networks and google.com and YouTube Web servers. BBR requires only changes on the sender side, not in the network or the receiver side. Thus it can be incrementally deployed on today's Internet, or in datacenters. The Internet has predominantly used loss-based congestion control (largely Reno or CUBIC) since the 1980s, relying on packet loss as the signal to slow down. While this worked well for many years, loss-based congestion control is unfortunately out-dated in today's networks. On today's Internet, loss-based congestion control causes the infamous bufferbloat problem, often causing seconds of needless queuing delay, since it fills the bloated buffers in many last-mile links. On today's high-speed long-haul links using commodity switches with shallow buffers, loss-based congestion control has abysmal throughput because it over-reacts to losses caused by transient traffic bursts. In 1981 Kleinrock and Gale showed that the optimal operating point for a network maximizes delivered bandwidth while minimizing delay and loss, not only for single connections but for the network as a whole. Finding that optimal operating point has been elusive, since any single network measurement is ambiguous: network measurements are the result of both bandwidth and propagation delay, and those two cannot be measured simultaneously. While it is impossible to disambiguate any single bandwidth or RTT measurement, a connection's behavior over time tells a clearer story. BBR uses a measurement strategy designed to resolve this ambiguity. It combines these measurements with a robust servo loop using recent control systems advances to implement a distributed congestion control algorithm that reacts to actual congestion, not packet loss or transient queue delay, and is designed to converge with high probability to a point near the optimal operating point. In a nutshell, BBR creates an explicit model of the network pipe by sequentially probing the bottleneck bandwidth and RTT. On the arrival of each ACK, BBR derives the current delivery rate of the last round trip, and feeds it through a windowed max-filter to estimate the bottleneck bandwidth. Conversely it uses a windowed min-filter to estimate the round trip propagation delay. The max-filtered bandwidth and min-filtered RTT estimates form BBR's model of the network pipe. Using its model, BBR sets control parameters to govern sending behavior. The primary control is the pacing rate: BBR applies a gain multiplier to transmit faster or slower than the observed bottleneck bandwidth. The conventional congestion window (cwnd) is now the secondary control; the cwnd is set to a small multiple of the estimated BDP (bandwidth-delay product) in order to allow full utilization and bandwidth probing while bounding the potential amount of queue at the bottleneck. When a BBR connection starts, it enters STARTUP mode and applies a high gain to perform an exponential search to quickly probe the bottleneck bandwidth (doubling its sending rate each round trip, like slow start). However, instead of continuing until it fills up the buffer (i.e. a loss), or until delay or ACK spacing reaches some threshold (like Hystart), it uses its model of the pipe to estimate when that pipe is full: it estimates the pipe is full when it notices the estimated bandwidth has stopped growing. At that point it exits STARTUP and enters DRAIN mode, where it reduces its pacing rate to drain the queue it estimates it has created. Then BBR enters steady state. In steady state, PROBE_BW mode cycles between first pacing faster to probe for more bandwidth, then pacing slower to drain any queue that created if no more bandwidth was available, and then cruising at the estimated bandwidth to utilize the pipe without creating excess queue. Occasionally, on an as-needed basis, it sends significantly slower to probe for RTT (PROBE_RTT mode). BBR has been fully deployed on Google's wide-area backbone networks and we're experimenting with BBR on Google.com and YouTube on a global scale. Replacing CUBIC with BBR has resulted in significant improvements in network latency and application (RPC, browser, and video) metrics. For more details please refer to our upcoming ACM Queue publication. Example performance results, to illustrate the difference between BBR and CUBIC: Resilience to random loss (e.g. from shallow buffers): Consider a netperf TCP_STREAM test lasting 30 secs on an emulated path with a 10Gbps bottleneck, 100ms RTT, and 1% packet loss rate. CUBIC gets 3.27 Mbps, and BBR gets 9150 Mbps (2798x higher). Low latency with the bloated buffers common in today's last-mile links: Consider a netperf TCP_STREAM test lasting 120 secs on an emulated path with a 10Mbps bottleneck, 40ms RTT, and 1000-packet bottleneck buffer. Both fully utilize the bottleneck bandwidth, but BBR achieves this with a median RTT 25x lower (43 ms instead of 1.09 secs). Our long-term goal is to improve the congestion control algorithms used on the Internet. We are hopeful that BBR can help advance the efforts toward this goal, and motivate the community to do further research. Test results, performance evaluations, feedback, and BBR-related discussions are very welcome in the public e-mail list for BBR: https://groups.google.com/forum/#!forum/bbr-dev NOTE: BBR *must* be used with the fq qdisc ("man tc-fq") with pacing enabled, since pacing is integral to the BBR design and implementation. BBR without pacing would not function properly, and may incur unnecessary high packet loss rates. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/uapi/linux/inet_diag.h | 13 + net/ipv4/Kconfig | 18 + net/ipv4/Makefile | 1 + net/ipv4/tcp_bbr.c | 896 +++++++++++++++++++++++++++++++++ 4 files changed, 928 insertions(+) create mode 100644 net/ipv4/tcp_bbr.c diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index b5c366f87b3e..509cd961068d 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -124,6 +124,7 @@ enum { INET_DIAG_PEERS, INET_DIAG_PAD, INET_DIAG_MARK, + INET_DIAG_BBRINFO, __INET_DIAG_MAX, }; @@ -157,8 +158,20 @@ struct tcp_dctcp_info { __u32 dctcp_ab_tot; }; +/* INET_DIAG_BBRINFO */ + +struct tcp_bbr_info { + /* u64 bw: max-filtered BW (app throughput) estimate in Byte per sec: */ + __u32 bbr_bw_lo; /* lower 32 bits of bw */ + __u32 bbr_bw_hi; /* upper 32 bits of bw */ + __u32 bbr_min_rtt; /* min-filtered RTT in uSec */ + __u32 bbr_pacing_gain; /* pacing gain shifted left 8 bits */ + __u32 bbr_cwnd_gain; /* cwnd gain shifted left 8 bits */ +}; + union tcp_cc_info { struct tcpvegas_info vegas; struct tcp_dctcp_info dctcp; + struct tcp_bbr_info bbr; }; #endif /* _UAPI_INET_DIAG_H_ */ diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 50d6a9b49f6c..300b06888fdf 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -640,6 +640,21 @@ config TCP_CONG_CDG D.A. Hayes and G. Armitage. "Revisiting TCP congestion control using delay gradients." In Networking 2011. Preprint: http://goo.gl/No3vdg +config TCP_CONG_BBR + tristate "BBR TCP" + default n + ---help--- + + BBR (Bottleneck Bandwidth and RTT) TCP congestion control aims to + maximize network utilization and minimize queues. It builds an explicit + model of the the bottleneck delivery rate and path round-trip + propagation delay. It tolerates packet loss and delay unrelated to + congestion. It can operate over LAN, WAN, cellular, wifi, or cable + modem links. It can coexist with flows that use loss-based congestion + control, and can operate with shallow buffers, deep buffers, + bufferbloat, policers, or AQM schemes that do not provide a delay + signal. It requires the fq ("Fair Queue") pacing packet scheduler. + choice prompt "Default TCP congestion control" default DEFAULT_CUBIC @@ -674,6 +689,9 @@ choice config DEFAULT_CDG bool "CDG" if TCP_CONG_CDG=y + config DEFAULT_BBR + bool "BBR" if TCP_CONG_BBR=y + config DEFAULT_RENO bool "Reno" endchoice diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 9cfff1a0bf71..bc6a6c8b9bcd 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -41,6 +41,7 @@ obj-$(CONFIG_INET_DIAG) += inet_diag.o obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o obj-$(CONFIG_INET_UDP_DIAG) += udp_diag.o obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o +obj-$(CONFIG_TCP_CONG_BBR) += tcp_bbr.o obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o obj-$(CONFIG_TCP_CONG_CDG) += tcp_cdg.o obj-$(CONFIG_TCP_CONG_CUBIC) += tcp_cubic.o diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c new file mode 100644 index 000000000000..0ea66c2c9344 --- /dev/null +++ b/net/ipv4/tcp_bbr.c @@ -0,0 +1,896 @@ +/* Bottleneck Bandwidth and RTT (BBR) congestion control + * + * BBR congestion control computes the sending rate based on the delivery + * rate (throughput) estimated from ACKs. In a nutshell: + * + * On each ACK, update our model of the network path: + * bottleneck_bandwidth = windowed_max(delivered / elapsed, 10 round trips) + * min_rtt = windowed_min(rtt, 10 seconds) + * pacing_rate = pacing_gain * bottleneck_bandwidth + * cwnd = max(cwnd_gain * bottleneck_bandwidth * min_rtt, 4) + * + * The core algorithm does not react directly to packet losses or delays, + * although BBR may adjust the size of next send per ACK when loss is + * observed, or adjust the sending rate if it estimates there is a + * traffic policer, in order to keep the drop rate reasonable. + * + * BBR is described in detail in: + * "BBR: Congestion-Based Congestion Control", + * Neal Cardwell, Yuchung Cheng, C. Stephen Gunn, Soheil Hassas Yeganeh, + * Van Jacobson. ACM Queue, Vol. 14 No. 5, September-October 2016. + * + * There is a public e-mail list for discussing BBR development and testing: + * https://groups.google.com/forum/#!forum/bbr-dev + * + * NOTE: BBR *must* be used with the fq qdisc ("man tc-fq") with pacing enabled, + * since pacing is integral to the BBR design and implementation. + * BBR without pacing would not function properly, and may incur unnecessary + * high packet loss rates. + */ +#include +#include +#include +#include +#include +#include + +/* Scale factor for rate in pkt/uSec unit to avoid truncation in bandwidth + * estimation. The rate unit ~= (1500 bytes / 1 usec / 2^24) ~= 715 bps. + * This handles bandwidths from 0.06pps (715bps) to 256Mpps (3Tbps) in a u32. + * Since the minimum window is >=4 packets, the lower bound isn't + * an issue. The upper bound isn't an issue with existing technologies. + */ +#define BW_SCALE 24 +#define BW_UNIT (1 << BW_SCALE) + +#define BBR_SCALE 8 /* scaling factor for fractions in BBR (e.g. gains) */ +#define BBR_UNIT (1 << BBR_SCALE) + +/* BBR has the following modes for deciding how fast to send: */ +enum bbr_mode { + BBR_STARTUP, /* ramp up sending rate rapidly to fill pipe */ + BBR_DRAIN, /* drain any queue created during startup */ + BBR_PROBE_BW, /* discover, share bw: pace around estimated bw */ + BBR_PROBE_RTT, /* cut cwnd to min to probe min_rtt */ +}; + +/* BBR congestion control block */ +struct bbr { + u32 min_rtt_us; /* min RTT in min_rtt_win_sec window */ + u32 min_rtt_stamp; /* timestamp of min_rtt_us */ + u32 probe_rtt_done_stamp; /* end time for BBR_PROBE_RTT mode */ + struct minmax bw; /* Max recent delivery rate in pkts/uS << 24 */ + u32 rtt_cnt; /* count of packet-timed rounds elapsed */ + u32 next_rtt_delivered; /* scb->tx.delivered at end of round */ + struct skb_mstamp cycle_mstamp; /* time of this cycle phase start */ + u32 mode:3, /* current bbr_mode in state machine */ + prev_ca_state:3, /* CA state on previous ACK */ + packet_conservation:1, /* use packet conservation? */ + restore_cwnd:1, /* decided to revert cwnd to old value */ + round_start:1, /* start of packet-timed tx->ack round? */ + tso_segs_goal:7, /* segments we want in each skb we send */ + idle_restart:1, /* restarting after idle? */ + probe_rtt_round_done:1, /* a BBR_PROBE_RTT round at 4 pkts? */ + unused:5, + lt_is_sampling:1, /* taking long-term ("LT") samples now? */ + lt_rtt_cnt:7, /* round trips in long-term interval */ + lt_use_bw:1; /* use lt_bw as our bw estimate? */ + u32 lt_bw; /* LT est delivery rate in pkts/uS << 24 */ + u32 lt_last_delivered; /* LT intvl start: tp->delivered */ + u32 lt_last_stamp; /* LT intvl start: tp->delivered_mstamp */ + u32 lt_last_lost; /* LT intvl start: tp->lost */ + u32 pacing_gain:10, /* current gain for setting pacing rate */ + cwnd_gain:10, /* current gain for setting cwnd */ + full_bw_cnt:3, /* number of rounds without large bw gains */ + cycle_idx:3, /* current index in pacing_gain cycle array */ + unused_b:6; + u32 prior_cwnd; /* prior cwnd upon entering loss recovery */ + u32 full_bw; /* recent bw, to estimate if pipe is full */ +}; + +#define CYCLE_LEN 8 /* number of phases in a pacing gain cycle */ + +/* Window length of bw filter (in rounds): */ +static const int bbr_bw_rtts = CYCLE_LEN + 2; +/* Window length of min_rtt filter (in sec): */ +static const u32 bbr_min_rtt_win_sec = 10; +/* Minimum time (in ms) spent at bbr_cwnd_min_target in BBR_PROBE_RTT mode: */ +static const u32 bbr_probe_rtt_mode_ms = 200; +/* Skip TSO below the following bandwidth (bits/sec): */ +static const int bbr_min_tso_rate = 1200000; + +/* We use a high_gain value of 2/ln(2) because it's the smallest pacing gain + * that will allow a smoothly increasing pacing rate that will double each RTT + * and send the same number of packets per RTT that an un-paced, slow-starting + * Reno or CUBIC flow would: + */ +static const int bbr_high_gain = BBR_UNIT * 2885 / 1000 + 1; +/* The pacing gain of 1/high_gain in BBR_DRAIN is calculated to typically drain + * the queue created in BBR_STARTUP in a single round: + */ +static const int bbr_drain_gain = BBR_UNIT * 1000 / 2885; +/* The gain for deriving steady-state cwnd tolerates delayed/stretched ACKs: */ +static const int bbr_cwnd_gain = BBR_UNIT * 2; +/* The pacing_gain values for the PROBE_BW gain cycle, to discover/share bw: */ +static const int bbr_pacing_gain[] = { + BBR_UNIT * 5 / 4, /* probe for more available bw */ + BBR_UNIT * 3 / 4, /* drain queue and/or yield bw to other flows */ + BBR_UNIT, BBR_UNIT, BBR_UNIT, /* cruise at 1.0*bw to utilize pipe, */ + BBR_UNIT, BBR_UNIT, BBR_UNIT /* without creating excess queue... */ +}; +/* Randomize the starting gain cycling phase over N phases: */ +static const u32 bbr_cycle_rand = 7; + +/* Try to keep at least this many packets in flight, if things go smoothly. For + * smooth functioning, a sliding window protocol ACKing every other packet + * needs at least 4 packets in flight: + */ +static const u32 bbr_cwnd_min_target = 4; + +/* To estimate if BBR_STARTUP mode (i.e. high_gain) has filled pipe... */ +/* If bw has increased significantly (1.25x), there may be more bw available: */ +static const u32 bbr_full_bw_thresh = BBR_UNIT * 5 / 4; +/* But after 3 rounds w/o significant bw growth, estimate pipe is full: */ +static const u32 bbr_full_bw_cnt = 3; + +/* "long-term" ("LT") bandwidth estimator parameters... */ +/* The minimum number of rounds in an LT bw sampling interval: */ +static const u32 bbr_lt_intvl_min_rtts = 4; +/* If lost/delivered ratio > 20%, interval is "lossy" and we may be policed: */ +static const u32 bbr_lt_loss_thresh = 50; +/* If 2 intervals have a bw ratio <= 1/8, their bw is "consistent": */ +static const u32 bbr_lt_bw_ratio = BBR_UNIT / 8; +/* If 2 intervals have a bw diff <= 4 Kbit/sec their bw is "consistent": */ +static const u32 bbr_lt_bw_diff = 4000 / 8; +/* If we estimate we're policed, use lt_bw for this many round trips: */ +static const u32 bbr_lt_bw_max_rtts = 48; + +/* Do we estimate that STARTUP filled the pipe? */ +static bool bbr_full_bw_reached(const struct sock *sk) +{ + const struct bbr *bbr = inet_csk_ca(sk); + + return bbr->full_bw_cnt >= bbr_full_bw_cnt; +} + +/* Return the windowed max recent bandwidth sample, in pkts/uS << BW_SCALE. */ +static u32 bbr_max_bw(const struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + return minmax_get(&bbr->bw); +} + +/* Return the estimated bandwidth of the path, in pkts/uS << BW_SCALE. */ +static u32 bbr_bw(const struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + return bbr->lt_use_bw ? bbr->lt_bw : bbr_max_bw(sk); +} + +/* Return rate in bytes per second, optionally with a gain. + * The order here is chosen carefully to avoid overflow of u64. This should + * work for input rates of up to 2.9Tbit/sec and gain of 2.89x. + */ +static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain) +{ + rate *= tcp_mss_to_mtu(sk, tcp_sk(sk)->mss_cache); + rate *= gain; + rate >>= BBR_SCALE; + rate *= USEC_PER_SEC; + return rate >> BW_SCALE; +} + +/* Pace using current bw estimate and a gain factor. In order to help drive the + * network toward lower queues while maintaining high utilization and low + * latency, the average pacing rate aims to be slightly (~1%) lower than the + * estimated bandwidth. This is an important aspect of the design. In this + * implementation this slightly lower pacing rate is achieved implicitly by not + * including link-layer headers in the packet size used for the pacing rate. + */ +static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain) +{ + struct bbr *bbr = inet_csk_ca(sk); + u64 rate = bw; + + rate = bbr_rate_bytes_per_sec(sk, rate, gain); + rate = min_t(u64, rate, sk->sk_max_pacing_rate); + if (bbr->mode != BBR_STARTUP || rate > sk->sk_pacing_rate) + sk->sk_pacing_rate = rate; +} + +/* Return count of segments we want in the skbs we send, or 0 for default. */ +static u32 bbr_tso_segs_goal(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + return bbr->tso_segs_goal; +} + +static void bbr_set_tso_segs_goal(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u32 min_segs; + + min_segs = sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2; + bbr->tso_segs_goal = min(tcp_tso_autosize(sk, tp->mss_cache, min_segs), + 0x7FU); +} + +/* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */ +static void bbr_save_cwnd(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + if (bbr->prev_ca_state < TCP_CA_Recovery && bbr->mode != BBR_PROBE_RTT) + bbr->prior_cwnd = tp->snd_cwnd; /* this cwnd is good enough */ + else /* loss recovery or BBR_PROBE_RTT have temporarily cut cwnd */ + bbr->prior_cwnd = max(bbr->prior_cwnd, tp->snd_cwnd); +} + +static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + if (event == CA_EVENT_TX_START && tp->app_limited) { + bbr->idle_restart = 1; + /* Avoid pointless buffer overflows: pace at est. bw if we don't + * need more speed (we're restarting from idle and app-limited). + */ + if (bbr->mode == BBR_PROBE_BW) + bbr_set_pacing_rate(sk, bbr_bw(sk), BBR_UNIT); + } +} + +/* Find target cwnd. Right-size the cwnd based on min RTT and the + * estimated bottleneck bandwidth: + * + * cwnd = bw * min_rtt * gain = BDP * gain + * + * The key factor, gain, controls the amount of queue. While a small gain + * builds a smaller queue, it becomes more vulnerable to noise in RTT + * measurements (e.g., delayed ACKs or other ACK compression effects). This + * noise may cause BBR to under-estimate the rate. + * + * To achieve full performance in high-speed paths, we budget enough cwnd to + * fit full-sized skbs in-flight on both end hosts to fully utilize the path: + * - one skb in sending host Qdisc, + * - one skb in sending host TSO/GSO engine + * - one skb being received by receiver host LRO/GRO/delayed-ACK engine + * Don't worry, at low rates (bbr_min_tso_rate) this won't bloat cwnd because + * in such cases tso_segs_goal is 1. The minimum cwnd is 4 packets, + * which allows 2 outstanding 2-packet sequences, to try to keep pipe + * full even with ACK-every-other-packet delayed ACKs. + */ +static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 cwnd; + u64 w; + + /* If we've never had a valid RTT sample, cap cwnd at the initial + * default. This should only happen when the connection is not using TCP + * timestamps and has retransmitted all of the SYN/SYNACK/data packets + * ACKed so far. In this case, an RTO can cut cwnd to 1, in which + * case we need to slow-start up toward something safe: TCP_INIT_CWND. + */ + if (unlikely(bbr->min_rtt_us == ~0U)) /* no valid RTT samples yet? */ + return TCP_INIT_CWND; /* be safe: cap at default initial cwnd*/ + + w = (u64)bw * bbr->min_rtt_us; + + /* Apply a gain to the given value, then remove the BW_SCALE shift. */ + cwnd = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT; + + /* Allow enough full-sized skbs in flight to utilize end systems. */ + cwnd += 3 * bbr->tso_segs_goal; + + /* Reduce delayed ACKs by rounding up cwnd to the next even number. */ + cwnd = (cwnd + 1) & ~1U; + + return cwnd; +} + +/* An optimization in BBR to reduce losses: On the first round of recovery, we + * follow the packet conservation principle: send P packets per P packets acked. + * After that, we slow-start and send at most 2*P packets per P packets acked. + * After recovery finishes, or upon undo, we restore the cwnd we had when + * recovery started (capped by the target cwnd based on estimated BDP). + * + * TODO(ycheng/ncardwell): implement a rate-based approach. + */ +static bool bbr_set_cwnd_to_recover_or_restore( + struct sock *sk, const struct rate_sample *rs, u32 acked, u32 *new_cwnd) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u8 prev_state = bbr->prev_ca_state, state = inet_csk(sk)->icsk_ca_state; + u32 cwnd = tp->snd_cwnd; + + /* An ACK for P pkts should release at most 2*P packets. We do this + * in two steps. First, here we deduct the number of lost packets. + * Then, in bbr_set_cwnd() we slow start up toward the target cwnd. + */ + if (rs->losses > 0) + cwnd = max_t(s32, cwnd - rs->losses, 1); + + if (state == TCP_CA_Recovery && prev_state != TCP_CA_Recovery) { + /* Starting 1st round of Recovery, so do packet conservation. */ + bbr->packet_conservation = 1; + bbr->next_rtt_delivered = tp->delivered; /* start round now */ + /* Cut unused cwnd from app behavior, TSQ, or TSO deferral: */ + cwnd = tcp_packets_in_flight(tp) + acked; + } else if (prev_state >= TCP_CA_Recovery && state < TCP_CA_Recovery) { + /* Exiting loss recovery; restore cwnd saved before recovery. */ + bbr->restore_cwnd = 1; + bbr->packet_conservation = 0; + } + bbr->prev_ca_state = state; + + if (bbr->restore_cwnd) { + /* Restore cwnd after exiting loss recovery or PROBE_RTT. */ + cwnd = max(cwnd, bbr->prior_cwnd); + bbr->restore_cwnd = 0; + } + + if (bbr->packet_conservation) { + *new_cwnd = max(cwnd, tcp_packets_in_flight(tp) + acked); + return true; /* yes, using packet conservation */ + } + *new_cwnd = cwnd; + return false; +} + +/* Slow-start up toward target cwnd (if bw estimate is growing, or packet loss + * has drawn us down below target), or snap down to target if we're above it. + */ +static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs, + u32 acked, u32 bw, int gain) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u32 cwnd = 0, target_cwnd = 0; + + if (!acked) + return; + + if (bbr_set_cwnd_to_recover_or_restore(sk, rs, acked, &cwnd)) + goto done; + + /* If we're below target cwnd, slow start cwnd toward target cwnd. */ + target_cwnd = bbr_target_cwnd(sk, bw, gain); + if (bbr_full_bw_reached(sk)) /* only cut cwnd if we filled the pipe */ + cwnd = min(cwnd + acked, target_cwnd); + else if (cwnd < target_cwnd || tp->delivered < TCP_INIT_CWND) + cwnd = cwnd + acked; + cwnd = max(cwnd, bbr_cwnd_min_target); + +done: + tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp); /* apply global cap */ + if (bbr->mode == BBR_PROBE_RTT) /* drain queue, refresh min_rtt */ + tp->snd_cwnd = min(tp->snd_cwnd, bbr_cwnd_min_target); +} + +/* End cycle phase if it's time and/or we hit the phase's in-flight target. */ +static bool bbr_is_next_cycle_phase(struct sock *sk, + const struct rate_sample *rs) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + bool is_full_length = + skb_mstamp_us_delta(&tp->delivered_mstamp, &bbr->cycle_mstamp) > + bbr->min_rtt_us; + u32 inflight, bw; + + /* The pacing_gain of 1.0 paces at the estimated bw to try to fully + * use the pipe without increasing the queue. + */ + if (bbr->pacing_gain == BBR_UNIT) + return is_full_length; /* just use wall clock time */ + + inflight = rs->prior_in_flight; /* what was in-flight before ACK? */ + bw = bbr_max_bw(sk); + + /* A pacing_gain > 1.0 probes for bw by trying to raise inflight to at + * least pacing_gain*BDP; this may take more than min_rtt if min_rtt is + * small (e.g. on a LAN). We do not persist if packets are lost, since + * a path with small buffers may not hold that much. + */ + if (bbr->pacing_gain > BBR_UNIT) + return is_full_length && + (rs->losses || /* perhaps pacing_gain*BDP won't fit */ + inflight >= bbr_target_cwnd(sk, bw, bbr->pacing_gain)); + + /* A pacing_gain < 1.0 tries to drain extra queue we added if bw + * probing didn't find more bw. If inflight falls to match BDP then we + * estimate queue is drained; persisting would underutilize the pipe. + */ + return is_full_length || + inflight <= bbr_target_cwnd(sk, bw, BBR_UNIT); +} + +static void bbr_advance_cycle_phase(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + bbr->cycle_idx = (bbr->cycle_idx + 1) & (CYCLE_LEN - 1); + bbr->cycle_mstamp = tp->delivered_mstamp; + bbr->pacing_gain = bbr_pacing_gain[bbr->cycle_idx]; +} + +/* Gain cycling: cycle pacing gain to converge to fair share of available bw. */ +static void bbr_update_cycle_phase(struct sock *sk, + const struct rate_sample *rs) +{ + struct bbr *bbr = inet_csk_ca(sk); + + if ((bbr->mode == BBR_PROBE_BW) && !bbr->lt_use_bw && + bbr_is_next_cycle_phase(sk, rs)) + bbr_advance_cycle_phase(sk); +} + +static void bbr_reset_startup_mode(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + bbr->mode = BBR_STARTUP; + bbr->pacing_gain = bbr_high_gain; + bbr->cwnd_gain = bbr_high_gain; +} + +static void bbr_reset_probe_bw_mode(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + bbr->mode = BBR_PROBE_BW; + bbr->pacing_gain = BBR_UNIT; + bbr->cwnd_gain = bbr_cwnd_gain; + bbr->cycle_idx = CYCLE_LEN - 1 - prandom_u32_max(bbr_cycle_rand); + bbr_advance_cycle_phase(sk); /* flip to next phase of gain cycle */ +} + +static void bbr_reset_mode(struct sock *sk) +{ + if (!bbr_full_bw_reached(sk)) + bbr_reset_startup_mode(sk); + else + bbr_reset_probe_bw_mode(sk); +} + +/* Start a new long-term sampling interval. */ +static void bbr_reset_lt_bw_sampling_interval(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + bbr->lt_last_stamp = tp->delivered_mstamp.stamp_jiffies; + bbr->lt_last_delivered = tp->delivered; + bbr->lt_last_lost = tp->lost; + bbr->lt_rtt_cnt = 0; +} + +/* Completely reset long-term bandwidth sampling. */ +static void bbr_reset_lt_bw_sampling(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + bbr->lt_bw = 0; + bbr->lt_use_bw = 0; + bbr->lt_is_sampling = false; + bbr_reset_lt_bw_sampling_interval(sk); +} + +/* Long-term bw sampling interval is done. Estimate whether we're policed. */ +static void bbr_lt_bw_interval_done(struct sock *sk, u32 bw) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 diff; + + if (bbr->lt_bw) { /* do we have bw from a previous interval? */ + /* Is new bw close to the lt_bw from the previous interval? */ + diff = abs(bw - bbr->lt_bw); + if ((diff * BBR_UNIT <= bbr_lt_bw_ratio * bbr->lt_bw) || + (bbr_rate_bytes_per_sec(sk, diff, BBR_UNIT) <= + bbr_lt_bw_diff)) { + /* All criteria are met; estimate we're policed. */ + bbr->lt_bw = (bw + bbr->lt_bw) >> 1; /* avg 2 intvls */ + bbr->lt_use_bw = 1; + bbr->pacing_gain = BBR_UNIT; /* try to avoid drops */ + bbr->lt_rtt_cnt = 0; + return; + } + } + bbr->lt_bw = bw; + bbr_reset_lt_bw_sampling_interval(sk); +} + +/* Token-bucket traffic policers are common (see "An Internet-Wide Analysis of + * Traffic Policing", SIGCOMM 2016). BBR detects token-bucket policers and + * explicitly models their policed rate, to reduce unnecessary losses. We + * estimate that we're policed if we see 2 consecutive sampling intervals with + * consistent throughput and high packet loss. If we think we're being policed, + * set lt_bw to the "long-term" average delivery rate from those 2 intervals. + */ +static void bbr_lt_bw_sampling(struct sock *sk, const struct rate_sample *rs) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u32 lost, delivered; + u64 bw; + s32 t; + + if (bbr->lt_use_bw) { /* already using long-term rate, lt_bw? */ + if (bbr->mode == BBR_PROBE_BW && bbr->round_start && + ++bbr->lt_rtt_cnt >= bbr_lt_bw_max_rtts) { + bbr_reset_lt_bw_sampling(sk); /* stop using lt_bw */ + bbr_reset_probe_bw_mode(sk); /* restart gain cycling */ + } + return; + } + + /* Wait for the first loss before sampling, to let the policer exhaust + * its tokens and estimate the steady-state rate allowed by the policer. + * Starting samples earlier includes bursts that over-estimate the bw. + */ + if (!bbr->lt_is_sampling) { + if (!rs->losses) + return; + bbr_reset_lt_bw_sampling_interval(sk); + bbr->lt_is_sampling = true; + } + + /* To avoid underestimates, reset sampling if we run out of data. */ + if (rs->is_app_limited) { + bbr_reset_lt_bw_sampling(sk); + return; + } + + if (bbr->round_start) + bbr->lt_rtt_cnt++; /* count round trips in this interval */ + if (bbr->lt_rtt_cnt < bbr_lt_intvl_min_rtts) + return; /* sampling interval needs to be longer */ + if (bbr->lt_rtt_cnt > 4 * bbr_lt_intvl_min_rtts) { + bbr_reset_lt_bw_sampling(sk); /* interval is too long */ + return; + } + + /* End sampling interval when a packet is lost, so we estimate the + * policer tokens were exhausted. Stopping the sampling before the + * tokens are exhausted under-estimates the policed rate. + */ + if (!rs->losses) + return; + + /* Calculate packets lost and delivered in sampling interval. */ + lost = tp->lost - bbr->lt_last_lost; + delivered = tp->delivered - bbr->lt_last_delivered; + /* Is loss rate (lost/delivered) >= lt_loss_thresh? If not, wait. */ + if (!delivered || (lost << BBR_SCALE) < bbr_lt_loss_thresh * delivered) + return; + + /* Find average delivery rate in this sampling interval. */ + t = (s32)(tp->delivered_mstamp.stamp_jiffies - bbr->lt_last_stamp); + if (t < 1) + return; /* interval is less than one jiffy, so wait */ + t = jiffies_to_usecs(t); + /* Interval long enough for jiffies_to_usecs() to return a bogus 0? */ + if (t < 1) { + bbr_reset_lt_bw_sampling(sk); /* interval too long; reset */ + return; + } + bw = (u64)delivered * BW_UNIT; + do_div(bw, t); + bbr_lt_bw_interval_done(sk, bw); +} + +/* Estimate the bandwidth based on how fast packets are delivered */ +static void bbr_update_bw(struct sock *sk, const struct rate_sample *rs) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u64 bw; + + bbr->round_start = 0; + if (rs->delivered < 0 || rs->interval_us <= 0) + return; /* Not a valid observation */ + + /* See if we've reached the next RTT */ + if (!before(rs->prior_delivered, bbr->next_rtt_delivered)) { + bbr->next_rtt_delivered = tp->delivered; + bbr->rtt_cnt++; + bbr->round_start = 1; + bbr->packet_conservation = 0; + } + + bbr_lt_bw_sampling(sk, rs); + + /* Divide delivered by the interval to find a (lower bound) bottleneck + * bandwidth sample. Delivered is in packets and interval_us in uS and + * ratio will be <<1 for most connections. So delivered is first scaled. + */ + bw = (u64)rs->delivered * BW_UNIT; + do_div(bw, rs->interval_us); + + /* If this sample is application-limited, it is likely to have a very + * low delivered count that represents application behavior rather than + * the available network rate. Such a sample could drag down estimated + * bw, causing needless slow-down. Thus, to continue to send at the + * last measured network rate, we filter out app-limited samples unless + * they describe the path bw at least as well as our bw model. + * + * So the goal during app-limited phase is to proceed with the best + * network rate no matter how long. We automatically leave this + * phase when app writes faster than the network can deliver :) + */ + if (!rs->is_app_limited || bw >= bbr_max_bw(sk)) { + /* Incorporate new sample into our max bw filter. */ + minmax_running_max(&bbr->bw, bbr_bw_rtts, bbr->rtt_cnt, bw); + } +} + +/* Estimate when the pipe is full, using the change in delivery rate: BBR + * estimates that STARTUP filled the pipe if the estimated bw hasn't changed by + * at least bbr_full_bw_thresh (25%) after bbr_full_bw_cnt (3) non-app-limited + * rounds. Why 3 rounds: 1: rwin autotuning grows the rwin, 2: we fill the + * higher rwin, 3: we get higher delivery rate samples. Or transient + * cross-traffic or radio noise can go away. CUBIC Hystart shares a similar + * design goal, but uses delay and inter-ACK spacing instead of bandwidth. + */ +static void bbr_check_full_bw_reached(struct sock *sk, + const struct rate_sample *rs) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 bw_thresh; + + if (bbr_full_bw_reached(sk) || !bbr->round_start || rs->is_app_limited) + return; + + bw_thresh = (u64)bbr->full_bw * bbr_full_bw_thresh >> BBR_SCALE; + if (bbr_max_bw(sk) >= bw_thresh) { + bbr->full_bw = bbr_max_bw(sk); + bbr->full_bw_cnt = 0; + return; + } + ++bbr->full_bw_cnt; +} + +/* If pipe is probably full, drain the queue and then enter steady-state. */ +static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs) +{ + struct bbr *bbr = inet_csk_ca(sk); + + if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) { + bbr->mode = BBR_DRAIN; /* drain queue we created */ + bbr->pacing_gain = bbr_drain_gain; /* pace slow to drain */ + bbr->cwnd_gain = bbr_high_gain; /* maintain cwnd */ + } /* fall through to check if in-flight is already small: */ + if (bbr->mode == BBR_DRAIN && + tcp_packets_in_flight(tcp_sk(sk)) <= + bbr_target_cwnd(sk, bbr_max_bw(sk), BBR_UNIT)) + bbr_reset_probe_bw_mode(sk); /* we estimate queue is drained */ +} + +/* The goal of PROBE_RTT mode is to have BBR flows cooperatively and + * periodically drain the bottleneck queue, to converge to measure the true + * min_rtt (unloaded propagation delay). This allows the flows to keep queues + * small (reducing queuing delay and packet loss) and achieve fairness among + * BBR flows. + * + * The min_rtt filter window is 10 seconds. When the min_rtt estimate expires, + * we enter PROBE_RTT mode and cap the cwnd at bbr_cwnd_min_target=4 packets. + * After at least bbr_probe_rtt_mode_ms=200ms and at least one packet-timed + * round trip elapsed with that flight size <= 4, we leave PROBE_RTT mode and + * re-enter the previous mode. BBR uses 200ms to approximately bound the + * performance penalty of PROBE_RTT's cwnd capping to roughly 2% (200ms/10s). + * + * Note that flows need only pay 2% if they are busy sending over the last 10 + * seconds. Interactive applications (e.g., Web, RPCs, video chunks) often have + * natural silences or low-rate periods within 10 seconds where the rate is low + * enough for long enough to drain its queue in the bottleneck. We pick up + * these min RTT measurements opportunistically with our min_rtt filter. :-) + */ +static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + bool filter_expired; + + /* Track min RTT seen in the min_rtt_win_sec filter window: */ + filter_expired = after(tcp_time_stamp, + bbr->min_rtt_stamp + bbr_min_rtt_win_sec * HZ); + if (rs->rtt_us >= 0 && + (rs->rtt_us <= bbr->min_rtt_us || filter_expired)) { + bbr->min_rtt_us = rs->rtt_us; + bbr->min_rtt_stamp = tcp_time_stamp; + } + + if (bbr_probe_rtt_mode_ms > 0 && filter_expired && + !bbr->idle_restart && bbr->mode != BBR_PROBE_RTT) { + bbr->mode = BBR_PROBE_RTT; /* dip, drain queue */ + bbr->pacing_gain = BBR_UNIT; + bbr->cwnd_gain = BBR_UNIT; + bbr_save_cwnd(sk); /* note cwnd so we can restore it */ + bbr->probe_rtt_done_stamp = 0; + } + + if (bbr->mode == BBR_PROBE_RTT) { + /* Ignore low rate samples during this mode. */ + tp->app_limited = + (tp->delivered + tcp_packets_in_flight(tp)) ? : 1; + /* Maintain min packets in flight for max(200 ms, 1 round). */ + if (!bbr->probe_rtt_done_stamp && + tcp_packets_in_flight(tp) <= bbr_cwnd_min_target) { + bbr->probe_rtt_done_stamp = tcp_time_stamp + + msecs_to_jiffies(bbr_probe_rtt_mode_ms); + bbr->probe_rtt_round_done = 0; + bbr->next_rtt_delivered = tp->delivered; + } else if (bbr->probe_rtt_done_stamp) { + if (bbr->round_start) + bbr->probe_rtt_round_done = 1; + if (bbr->probe_rtt_round_done && + after(tcp_time_stamp, bbr->probe_rtt_done_stamp)) { + bbr->min_rtt_stamp = tcp_time_stamp; + bbr->restore_cwnd = 1; /* snap to prior_cwnd */ + bbr_reset_mode(sk); + } + } + } + bbr->idle_restart = 0; +} + +static void bbr_update_model(struct sock *sk, const struct rate_sample *rs) +{ + bbr_update_bw(sk, rs); + bbr_update_cycle_phase(sk, rs); + bbr_check_full_bw_reached(sk, rs); + bbr_check_drain(sk, rs); + bbr_update_min_rtt(sk, rs); +} + +static void bbr_main(struct sock *sk, const struct rate_sample *rs) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 bw; + + bbr_update_model(sk, rs); + + bw = bbr_bw(sk); + bbr_set_pacing_rate(sk, bw, bbr->pacing_gain); + bbr_set_tso_segs_goal(sk); + bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain); +} + +static void bbr_init(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u64 bw; + + bbr->prior_cwnd = 0; + bbr->tso_segs_goal = 0; /* default segs per skb until first ACK */ + bbr->rtt_cnt = 0; + bbr->next_rtt_delivered = 0; + bbr->prev_ca_state = TCP_CA_Open; + bbr->packet_conservation = 0; + + bbr->probe_rtt_done_stamp = 0; + bbr->probe_rtt_round_done = 0; + bbr->min_rtt_us = tcp_min_rtt(tp); + bbr->min_rtt_stamp = tcp_time_stamp; + + minmax_reset(&bbr->bw, bbr->rtt_cnt, 0); /* init max bw to 0 */ + + /* Initialize pacing rate to: high_gain * init_cwnd / RTT. */ + bw = (u64)tp->snd_cwnd * BW_UNIT; + do_div(bw, (tp->srtt_us >> 3) ? : USEC_PER_MSEC); + sk->sk_pacing_rate = 0; /* force an update of sk_pacing_rate */ + bbr_set_pacing_rate(sk, bw, bbr_high_gain); + + bbr->restore_cwnd = 0; + bbr->round_start = 0; + bbr->idle_restart = 0; + bbr->full_bw = 0; + bbr->full_bw_cnt = 0; + bbr->cycle_mstamp.v64 = 0; + bbr->cycle_idx = 0; + bbr_reset_lt_bw_sampling(sk); + bbr_reset_startup_mode(sk); +} + +static u32 bbr_sndbuf_expand(struct sock *sk) +{ + /* Provision 3 * cwnd since BBR may slow-start even during recovery. */ + return 3; +} + +/* In theory BBR does not need to undo the cwnd since it does not + * always reduce cwnd on losses (see bbr_main()). Keep it for now. + */ +static u32 bbr_undo_cwnd(struct sock *sk) +{ + return tcp_sk(sk)->snd_cwnd; +} + +/* Entering loss recovery, so save cwnd for when we exit or undo recovery. */ +static u32 bbr_ssthresh(struct sock *sk) +{ + bbr_save_cwnd(sk); + return TCP_INFINITE_SSTHRESH; /* BBR does not use ssthresh */ +} + +static size_t bbr_get_info(struct sock *sk, u32 ext, int *attr, + union tcp_cc_info *info) +{ + if (ext & (1 << (INET_DIAG_BBRINFO - 1)) || + ext & (1 << (INET_DIAG_VEGASINFO - 1))) { + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u64 bw = bbr_bw(sk); + + bw = bw * tp->mss_cache * USEC_PER_SEC >> BW_SCALE; + memset(&info->bbr, 0, sizeof(info->bbr)); + info->bbr.bbr_bw_lo = (u32)bw; + info->bbr.bbr_bw_hi = (u32)(bw >> 32); + info->bbr.bbr_min_rtt = bbr->min_rtt_us; + info->bbr.bbr_pacing_gain = bbr->pacing_gain; + info->bbr.bbr_cwnd_gain = bbr->cwnd_gain; + *attr = INET_DIAG_BBRINFO; + return sizeof(info->bbr); + } + return 0; +} + +static void bbr_set_state(struct sock *sk, u8 new_state) +{ + struct bbr *bbr = inet_csk_ca(sk); + + if (new_state == TCP_CA_Loss) { + struct rate_sample rs = { .losses = 1 }; + + bbr->prev_ca_state = TCP_CA_Loss; + bbr->full_bw = 0; + bbr->round_start = 1; /* treat RTO like end of a round */ + bbr_lt_bw_sampling(sk, &rs); + } +} + +static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = { + .flags = TCP_CONG_NON_RESTRICTED, + .name = "bbr", + .owner = THIS_MODULE, + .init = bbr_init, + .cong_control = bbr_main, + .sndbuf_expand = bbr_sndbuf_expand, + .undo_cwnd = bbr_undo_cwnd, + .cwnd_event = bbr_cwnd_event, + .ssthresh = bbr_ssthresh, + .tso_segs_goal = bbr_tso_segs_goal, + .get_info = bbr_get_info, + .set_state = bbr_set_state, +}; + +static int __init bbr_register(void) +{ + BUILD_BUG_ON(sizeof(struct bbr) > ICSK_CA_PRIV_SIZE); + return tcp_register_congestion_control(&tcp_bbr_cong_ops); +} + +static void __exit bbr_unregister(void) +{ + tcp_unregister_congestion_control(&tcp_bbr_cong_ops); +} + +module_init(bbr_register); +module_exit(bbr_unregister); + +MODULE_AUTHOR("Van Jacobson "); +MODULE_AUTHOR("Neal Cardwell "); +MODULE_AUTHOR("Yuchung Cheng "); +MODULE_AUTHOR("Soheil Hassas Yeganeh "); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("TCP BBR (Bottleneck Bandwidth and RTT)"); From 4bdcc6ca2158f43b1770e020f9b71ab8a808594f Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 20 Sep 2016 08:14:08 +0300 Subject: [PATCH 1304/1715] mlxsw: spectrum: Make offloads stats functions static The offloads stats functions are local to this file, make them static. Fixes: fc1bbb0f1831 ('mlxsw: spectrum: Implement offload stats ndo [..]') Signed-off-by: Or Gerlitz Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index fa312610c2d2..43d508111b17 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -823,7 +823,7 @@ err_span_port_mtu_update: return err; } -int +static int mlxsw_sp_port_get_sw_stats64(const struct net_device *dev, struct rtnl_link_stats64 *stats) { @@ -855,7 +855,7 @@ mlxsw_sp_port_get_sw_stats64(const struct net_device *dev, return 0; } -bool mlxsw_sp_port_has_offload_stats(int attr_id) +static bool mlxsw_sp_port_has_offload_stats(int attr_id) { switch (attr_id) { case IFLA_OFFLOAD_XSTATS_CPU_HIT: @@ -865,8 +865,8 @@ bool mlxsw_sp_port_has_offload_stats(int attr_id) return false; } -int mlxsw_sp_port_get_offload_stats(int attr_id, const struct net_device *dev, - void *sp) +static int mlxsw_sp_port_get_offload_stats(int attr_id, const struct net_device *dev, + void *sp) { switch (attr_id) { case IFLA_OFFLOAD_XSTATS_CPU_HIT: From 9f7f797c1df40ee6c0329e9e53ea9ca0d224f55d Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Tue, 20 Sep 2016 11:16:49 +0200 Subject: [PATCH 1305/1715] mlxsw: pci: Add lag related resources to resources query Add max lag and max ports in lag resources to resources query. Signed-off-by: Nogah Frankel Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core.h | 6 +++++- drivers/net/ethernet/mellanox/mlxsw/pci.c | 10 ++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index d2e32979319c..51f27a35749e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -269,8 +269,12 @@ struct mlxsw_driver { }; struct mlxsw_resources { - u8 max_span_valid:1; + u8 max_span_valid:1, + max_lag_valid:1, + max_ports_in_lag_valid:1; u8 max_span; + u8 max_lag; + u8 max_ports_in_lag; }; struct mlxsw_resources *mlxsw_core_resources_get(struct mlxsw_core *mlxsw_core); diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 1d1360c178bb..cb284eaeec39 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -1156,6 +1156,8 @@ mlxsw_pci_config_profile_swid_config(struct mlxsw_pci *mlxsw_pci, #define MLXSW_RESOURCES_TABLE_END_ID 0xffff #define MLXSW_MAX_SPAN_ID 0x2420 +#define MLXSW_MAX_LAG_ID 0x2520 +#define MLXSW_MAX_PORTS_IN_LAG_ID 0x2521 #define MLXSW_RESOURCES_QUERY_MAX_QUERIES 100 #define MLXSW_RESOURCES_PER_QUERY 32 @@ -1167,6 +1169,14 @@ static void mlxsw_pci_resources_query_parse(int id, u64 val, resources->max_span = val; resources->max_span_valid = 1; break; + case MLXSW_MAX_LAG_ID: + resources->max_lag = val; + resources->max_lag_valid = 1; + break; + case MLXSW_MAX_PORTS_IN_LAG_ID: + resources->max_ports_in_lag = val; + resources->max_ports_in_lag_valid = 1; + break; default: break; } From ce0bd2b0c57a2d97ea89f87f61b9f5758139bcb8 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Tue, 20 Sep 2016 11:16:50 +0200 Subject: [PATCH 1306/1715] mlxsw: spectrum: lag resources- use resources data instead of consts Use max lag and max ports in lag resources as the result of resource query instead of using const to save them. Signed-off-by: Nogah Frankel Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core.c | 26 ++++++------ drivers/net/ethernet/mellanox/mlxsw/core.h | 4 -- drivers/net/ethernet/mellanox/mlxsw/pci.c | 12 ------ .../net/ethernet/mellanox/mlxsw/spectrum.c | 41 +++++++++++++++---- .../net/ethernet/mellanox/mlxsw/spectrum.h | 5 +-- .../mellanox/mlxsw/spectrum_switchdev.c | 4 +- .../net/ethernet/mellanox/mlxsw/switchx2.c | 4 -- 7 files changed, 50 insertions(+), 46 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 068ee65a960b..aa33d58b9f81 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1100,10 +1100,15 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, goto err_alloc_stats; } - if (mlxsw_driver->profile->used_max_lag && - mlxsw_driver->profile->used_max_port_per_lag) { - alloc_size = sizeof(u8) * mlxsw_driver->profile->max_lag * - mlxsw_driver->profile->max_port_per_lag; + err = mlxsw_bus->init(bus_priv, mlxsw_core, mlxsw_driver->profile, + &mlxsw_core->resources); + if (err) + goto err_bus_init; + + if (mlxsw_core->resources.max_lag_valid && + mlxsw_core->resources.max_ports_in_lag_valid) { + alloc_size = sizeof(u8) * mlxsw_core->resources.max_lag * + mlxsw_core->resources.max_ports_in_lag; mlxsw_core->lag.mapping = kzalloc(alloc_size, GFP_KERNEL); if (!mlxsw_core->lag.mapping) { err = -ENOMEM; @@ -1111,11 +1116,6 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, } } - err = mlxsw_bus->init(bus_priv, mlxsw_core, mlxsw_driver->profile, - &mlxsw_core->resources); - if (err) - goto err_bus_init; - err = mlxsw_emad_init(mlxsw_core); if (err) goto err_emad_init; @@ -1146,10 +1146,10 @@ err_hwmon_init: err_devlink_register: mlxsw_emad_fini(mlxsw_core); err_emad_init: - mlxsw_bus->fini(bus_priv); -err_bus_init: kfree(mlxsw_core->lag.mapping); err_alloc_lag_mapping: + mlxsw_bus->fini(bus_priv); +err_bus_init: free_percpu(mlxsw_core->pcpu_stats); err_alloc_stats: devlink_free(devlink); @@ -1615,7 +1615,7 @@ EXPORT_SYMBOL(mlxsw_core_skb_receive); static int mlxsw_core_lag_mapping_index(struct mlxsw_core *mlxsw_core, u16 lag_id, u8 port_index) { - return mlxsw_core->driver->profile->max_port_per_lag * lag_id + + return mlxsw_core->resources.max_ports_in_lag * lag_id + port_index; } @@ -1644,7 +1644,7 @@ void mlxsw_core_lag_mapping_clear(struct mlxsw_core *mlxsw_core, { int i; - for (i = 0; i < mlxsw_core->driver->profile->max_port_per_lag; i++) { + for (i = 0; i < mlxsw_core->resources.max_ports_in_lag; i++) { int index = mlxsw_core_lag_mapping_index(mlxsw_core, lag_id, i); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 51f27a35749e..558d1ce89531 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -179,8 +179,6 @@ struct mlxsw_swid_config { struct mlxsw_config_profile { u16 used_max_vepa_channels:1, - used_max_lag:1, - used_max_port_per_lag:1, used_max_mid:1, used_max_pgt:1, used_max_system_port:1, @@ -194,8 +192,6 @@ struct mlxsw_config_profile { used_adaptive_routing_group_cap:1, used_kvd_sizes:1; u8 max_vepa_channels; - u16 max_lag; - u16 max_port_per_lag; u16 max_mid; u16 max_pgt; u16 max_system_port; diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index cb284eaeec39..57c2d3474bb6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -1232,18 +1232,6 @@ static int mlxsw_pci_config_profile(struct mlxsw_pci *mlxsw_pci, char *mbox, mlxsw_cmd_mbox_config_profile_max_vepa_channels_set( mbox, profile->max_vepa_channels); } - if (profile->used_max_lag) { - mlxsw_cmd_mbox_config_profile_set_max_lag_set( - mbox, 1); - mlxsw_cmd_mbox_config_profile_max_lag_set( - mbox, profile->max_lag); - } - if (profile->used_max_port_per_lag) { - mlxsw_cmd_mbox_config_profile_set_max_port_per_lag_set( - mbox, 1); - mlxsw_cmd_mbox_config_profile_max_port_per_lag_set( - mbox, profile->max_port_per_lag); - } if (profile->used_max_mid) { mlxsw_cmd_mbox_config_profile_set_max_mid_set( mbox, 1); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 43d508111b17..0f96d1108568 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -2887,7 +2887,9 @@ static int mlxsw_sp_flood_init(struct mlxsw_sp *mlxsw_sp) static int mlxsw_sp_lag_init(struct mlxsw_sp *mlxsw_sp) { + struct mlxsw_resources *resources; char slcr_pl[MLXSW_REG_SLCR_LEN]; + int err; mlxsw_reg_slcr_pack(slcr_pl, MLXSW_REG_SLCR_LAG_HASH_SMAC | MLXSW_REG_SLCR_LAG_HASH_DMAC | @@ -2898,7 +2900,26 @@ static int mlxsw_sp_lag_init(struct mlxsw_sp *mlxsw_sp) MLXSW_REG_SLCR_LAG_HASH_SPORT | MLXSW_REG_SLCR_LAG_HASH_DPORT | MLXSW_REG_SLCR_LAG_HASH_IPPROTO); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcr), slcr_pl); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcr), slcr_pl); + if (err) + return err; + + resources = mlxsw_core_resources_get(mlxsw_sp->core); + if (!(resources->max_lag_valid && resources->max_ports_in_lag_valid)) + return -EIO; + + mlxsw_sp->lags = kcalloc(resources->max_lag, + sizeof(struct mlxsw_sp_upper), + GFP_KERNEL); + if (!mlxsw_sp->lags) + return -ENOMEM; + + return 0; +} + +static void mlxsw_sp_lag_fini(struct mlxsw_sp *mlxsw_sp) +{ + kfree(mlxsw_sp->lags); } static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, @@ -2982,6 +3003,7 @@ err_span_init: err_router_init: mlxsw_sp_switchdev_fini(mlxsw_sp); err_switchdev_init: + mlxsw_sp_lag_fini(mlxsw_sp); err_lag_init: mlxsw_sp_buffers_fini(mlxsw_sp); err_buffers_init: @@ -3001,6 +3023,7 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) mlxsw_sp_span_fini(mlxsw_sp); mlxsw_sp_router_fini(mlxsw_sp); mlxsw_sp_switchdev_fini(mlxsw_sp); + mlxsw_sp_lag_fini(mlxsw_sp); mlxsw_sp_buffers_fini(mlxsw_sp); mlxsw_sp_traps_fini(mlxsw_sp); mlxsw_sp_event_unregister(mlxsw_sp, MLXSW_TRAP_ID_PUDE); @@ -3013,10 +3036,6 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) static struct mlxsw_config_profile mlxsw_sp_config_profile = { .used_max_vepa_channels = 1, .max_vepa_channels = 0, - .used_max_lag = 1, - .max_lag = MLXSW_SP_LAG_MAX, - .used_max_port_per_lag = 1, - .max_port_per_lag = MLXSW_SP_PORT_PER_LAG_MAX, .used_max_mid = 1, .max_mid = MLXSW_SP_MID_MAX, .used_max_pgt = 1, @@ -3683,12 +3702,14 @@ static bool mlxsw_sp_port_fdb_should_flush(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; u8 local_port = mlxsw_sp_port->local_port; u16 lag_id = mlxsw_sp_port->lag_id; + struct mlxsw_resources *resources; int i, count = 0; if (!mlxsw_sp_port->lagged) return true; - for (i = 0; i < MLXSW_SP_PORT_PER_LAG_MAX; i++) { + resources = mlxsw_core_resources_get(mlxsw_sp->core); + for (i = 0; i < resources->max_ports_in_lag; i++) { struct mlxsw_sp_port *lag_port; lag_port = mlxsw_sp_port_lagged_get(mlxsw_sp, lag_id, i); @@ -3894,11 +3915,13 @@ static int mlxsw_sp_lag_index_get(struct mlxsw_sp *mlxsw_sp, struct net_device *lag_dev, u16 *p_lag_id) { + struct mlxsw_resources *resources; struct mlxsw_sp_upper *lag; int free_lag_id = -1; int i; - for (i = 0; i < MLXSW_SP_LAG_MAX; i++) { + resources = mlxsw_core_resources_get(mlxsw_sp->core); + for (i = 0; i < resources->max_lag; i++) { lag = mlxsw_sp_lag_get(mlxsw_sp, i); if (lag->ref_count) { if (lag->dev == lag_dev) { @@ -3932,9 +3955,11 @@ mlxsw_sp_master_lag_check(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp_port_lag_index_get(struct mlxsw_sp *mlxsw_sp, u16 lag_id, u8 *p_port_index) { + struct mlxsw_resources *resources; int i; - for (i = 0; i < MLXSW_SP_PORT_PER_LAG_MAX; i++) { + resources = mlxsw_core_resources_get(mlxsw_sp->core); + for (i = 0; i < resources->max_ports_in_lag; i++) { if (!mlxsw_sp_port_lagged_get(mlxsw_sp, lag_id, i)) { *p_port_index = i; return 0; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 49f4cafce148..a056aaa6c584 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -56,9 +56,6 @@ #define MLXSW_SP_RFID_BASE 15360 #define MLXSW_SP_RIF_MAX 800 -#define MLXSW_SP_LAG_MAX 64 -#define MLXSW_SP_PORT_PER_LAG_MAX 16 - #define MLXSW_SP_MID_MAX 7000 #define MLXSW_SP_PORTS_PER_CLUSTER_MAX 4 @@ -290,7 +287,7 @@ struct mlxsw_sp { #define MLXSW_SP_DEFAULT_AGEING_TIME 300 u32 ageing_time; struct mlxsw_sp_upper master_bridge; - struct mlxsw_sp_upper lags[MLXSW_SP_LAG_MAX]; + struct mlxsw_sp_upper *lags; u8 port_to_module[MLXSW_PORT_MAX_PORTS]; struct mlxsw_sp_sb sb; struct mlxsw_sp_router router; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 7186c4810785..2b04b76b503e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1205,9 +1205,11 @@ static struct mlxsw_sp_port *mlxsw_sp_lag_rep_port(struct mlxsw_sp *mlxsw_sp, u16 lag_id) { struct mlxsw_sp_port *mlxsw_sp_port; + struct mlxsw_resources *resources; int i; - for (i = 0; i < MLXSW_SP_PORT_PER_LAG_MAX; i++) { + resources = mlxsw_core_resources_get(mlxsw_sp->core); + for (i = 0; i < resources->max_ports_in_lag; i++) { mlxsw_sp_port = mlxsw_sp_port_lagged_get(mlxsw_sp, lag_id, i); if (mlxsw_sp_port) return mlxsw_sp_port; diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index 377daa4d509c..8b15bf0c744d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -1512,10 +1512,6 @@ static void mlxsw_sx_fini(struct mlxsw_core *mlxsw_core) static struct mlxsw_config_profile mlxsw_sx_config_profile = { .used_max_vepa_channels = 1, .max_vepa_channels = 0, - .used_max_lag = 1, - .max_lag = 64, - .used_max_port_per_lag = 1, - .max_port_per_lag = 16, .used_max_mid = 1, .max_mid = 7000, .used_max_pgt = 1, From 2acd10c51bd2ce3a39c75fa3ff113e32e2413c6f Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Tue, 20 Sep 2016 11:16:51 +0200 Subject: [PATCH 1307/1715] mlxsw: pci: Add KVD size relate resources Add KVD size, and minimum sizes for the single and double sections resources to resources query. Signed-off-by: Nogah Frankel Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core.h | 8 +++++++- drivers/net/ethernet/mellanox/mlxsw/pci.c | 15 +++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 558d1ce89531..76ad566a0421 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -267,10 +267,16 @@ struct mlxsw_driver { struct mlxsw_resources { u8 max_span_valid:1, max_lag_valid:1, - max_ports_in_lag_valid:1; + max_ports_in_lag_valid:1, + kvd_size_valid:1, + kvd_single_min_size_valid:1, + kvd_double_min_size_valid:1; u8 max_span; u8 max_lag; u8 max_ports_in_lag; + u32 kvd_size; + u32 kvd_single_min_size; + u32 kvd_double_min_size; }; struct mlxsw_resources *mlxsw_core_resources_get(struct mlxsw_core *mlxsw_core); diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 57c2d3474bb6..7b2ab1ec1290 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -1158,6 +1158,9 @@ mlxsw_pci_config_profile_swid_config(struct mlxsw_pci *mlxsw_pci, #define MLXSW_MAX_SPAN_ID 0x2420 #define MLXSW_MAX_LAG_ID 0x2520 #define MLXSW_MAX_PORTS_IN_LAG_ID 0x2521 +#define MLXSW_KVD_SIZE_ID 0x1001 +#define MLXSW_KVD_SINGLE_MIN_SIZE_ID 0x1002 +#define MLXSW_KVD_DOUBLE_MIN_SIZE_ID 0x1003 #define MLXSW_RESOURCES_QUERY_MAX_QUERIES 100 #define MLXSW_RESOURCES_PER_QUERY 32 @@ -1177,6 +1180,18 @@ static void mlxsw_pci_resources_query_parse(int id, u64 val, resources->max_ports_in_lag = val; resources->max_ports_in_lag_valid = 1; break; + case MLXSW_KVD_SIZE_ID: + resources->kvd_size = val; + resources->kvd_size_valid = 1; + break; + case MLXSW_KVD_SINGLE_MIN_SIZE_ID: + resources->kvd_single_min_size = val; + resources->kvd_single_min_size_valid = 1; + break; + case MLXSW_KVD_DOUBLE_MIN_SIZE_ID: + resources->kvd_double_min_size = val; + resources->kvd_double_min_size_valid = 1; + break; default: break; } From 403547d38d0b99f589a0d87f3a5f352895e54aae Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Tue, 20 Sep 2016 11:16:52 +0200 Subject: [PATCH 1308/1715] mlxsw: profile: Add KVD resources to profile config Use resources from resource query to determine values for the profile configuration. Add KVD determined section sizes to the resources struct. Change the profile struct and value to match this changes. Signed-off-by: Nogah Frankel Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core.h | 15 +++- drivers/net/ethernet/mellanox/mlxsw/pci.c | 73 +++++++++++++++---- .../net/ethernet/mellanox/mlxsw/spectrum.c | 7 +- .../net/ethernet/mellanox/mlxsw/spectrum.h | 3 +- 4 files changed, 76 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 76ad566a0421..1193bdcc4407 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -190,7 +190,8 @@ struct mlxsw_config_profile { used_max_pkey:1, used_ar_sec:1, used_adaptive_routing_group_cap:1, - used_kvd_sizes:1; + used_kvd_split_data:1; /* indicate for the kvd's values */ + u8 max_vepa_channels; u16 max_mid; u16 max_pgt; @@ -210,8 +211,9 @@ struct mlxsw_config_profile { u16 adaptive_routing_group_cap; u8 arn; u32 kvd_linear_size; - u32 kvd_hash_single_size; - u32 kvd_hash_double_size; + u16 kvd_hash_granularity; + u8 kvd_hash_single_parts; + u8 kvd_hash_double_parts; u8 resource_query_enable; struct mlxsw_swid_config swid_config[MLXSW_CONFIG_PROFILE_SWID_COUNT]; }; @@ -277,6 +279,13 @@ struct mlxsw_resources { u32 kvd_size; u32 kvd_single_min_size; u32 kvd_double_min_size; + + /* Internal resources. + * Determined by the SW, not queried from the HW. + */ + u32 kvd_single_size; + u32 kvd_double_size; + u32 kvd_linear_size; }; struct mlxsw_resources *mlxsw_core_resources_get(struct mlxsw_core *mlxsw_core); diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 7b2ab1ec1290..c2d2fa1a1d3a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -1234,10 +1234,52 @@ static int mlxsw_pci_resources_query(struct mlxsw_pci *mlxsw_pci, char *mbox, return -EIO; } +static int mlxsw_pci_profile_get_kvd_sizes(const struct mlxsw_config_profile *profile, + struct mlxsw_resources *resources) +{ + u32 singles_size, doubles_size, linear_size; + + if (!resources->kvd_single_min_size_valid || + !resources->kvd_double_min_size_valid || + !profile->used_kvd_split_data) + return -EIO; + + linear_size = profile->kvd_linear_size; + + /* The hash part is what left of the kvd without the + * linear part. It is split to the single size and + * double size by the parts ratio from the profile. + * Both sizes must be a multiplications of the + * granularity from the profile. + */ + doubles_size = (resources->kvd_size - linear_size); + doubles_size *= profile->kvd_hash_double_parts; + doubles_size /= (profile->kvd_hash_double_parts + + profile->kvd_hash_single_parts); + doubles_size /= profile->kvd_hash_granularity; + doubles_size *= profile->kvd_hash_granularity; + singles_size = resources->kvd_size - doubles_size - + linear_size; + + /* Check results are legal. */ + if (singles_size < resources->kvd_single_min_size || + doubles_size < resources->kvd_double_min_size || + resources->kvd_size < linear_size) + return -EIO; + + resources->kvd_single_size = singles_size; + resources->kvd_double_size = doubles_size; + resources->kvd_linear_size = linear_size; + + return 0; +} + static int mlxsw_pci_config_profile(struct mlxsw_pci *mlxsw_pci, char *mbox, - const struct mlxsw_config_profile *profile) + const struct mlxsw_config_profile *profile, + struct mlxsw_resources *resources) { int i; + int err; mlxsw_cmd_mbox_zero(mbox); @@ -1323,19 +1365,22 @@ static int mlxsw_pci_config_profile(struct mlxsw_pci *mlxsw_pci, char *mbox, mlxsw_cmd_mbox_config_profile_adaptive_routing_group_cap_set( mbox, profile->adaptive_routing_group_cap); } - if (profile->used_kvd_sizes) { - mlxsw_cmd_mbox_config_profile_set_kvd_linear_size_set( - mbox, 1); - mlxsw_cmd_mbox_config_profile_kvd_linear_size_set( - mbox, profile->kvd_linear_size); - mlxsw_cmd_mbox_config_profile_set_kvd_hash_single_size_set( - mbox, 1); - mlxsw_cmd_mbox_config_profile_kvd_hash_single_size_set( - mbox, profile->kvd_hash_single_size); + if (resources->kvd_size_valid) { + err = mlxsw_pci_profile_get_kvd_sizes(profile, resources); + if (err) + return err; + + mlxsw_cmd_mbox_config_profile_set_kvd_linear_size_set(mbox, 1); + mlxsw_cmd_mbox_config_profile_kvd_linear_size_set(mbox, + resources->kvd_linear_size); + mlxsw_cmd_mbox_config_profile_set_kvd_hash_single_size_set(mbox, + 1); + mlxsw_cmd_mbox_config_profile_kvd_hash_single_size_set(mbox, + resources->kvd_single_size); mlxsw_cmd_mbox_config_profile_set_kvd_hash_double_size_set( - mbox, 1); - mlxsw_cmd_mbox_config_profile_kvd_hash_double_size_set( - mbox, profile->kvd_hash_double_size); + mbox, 1); + mlxsw_cmd_mbox_config_profile_kvd_hash_double_size_set(mbox, + resources->kvd_double_size); } for (i = 0; i < MLXSW_CONFIG_PROFILE_SWID_COUNT; i++) @@ -1537,7 +1582,7 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core, if (err) goto err_query_resources; - err = mlxsw_pci_config_profile(mlxsw_pci, mbox, profile); + err = mlxsw_pci_config_profile(mlxsw_pci, mbox, profile, resources); if (err) goto err_config_profile; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 0f96d1108568..d8e3da2f0667 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -3057,10 +3057,11 @@ static struct mlxsw_config_profile mlxsw_sp_config_profile = { .max_ib_mc = 0, .used_max_pkey = 1, .max_pkey = 0, - .used_kvd_sizes = 1, + .used_kvd_split_data = 1, + .kvd_hash_granularity = MLXSW_SP_KVD_GRANULARITY, + .kvd_hash_single_parts = 2, + .kvd_hash_double_parts = 1, .kvd_linear_size = MLXSW_SP_KVD_LINEAR_SIZE, - .kvd_hash_single_size = MLXSW_SP_KVD_HASH_SINGLE_SIZE, - .kvd_hash_double_size = MLXSW_SP_KVD_HASH_DOUBLE_SIZE, .swid_config = { { .used_type = 1, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index a056aaa6c584..208dfeedde44 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -74,8 +74,7 @@ #define MLXSW_SP_CELLS_TO_BYTES(c) (c * MLXSW_SP_BYTES_PER_CELL) #define MLXSW_SP_KVD_LINEAR_SIZE 65536 /* entries */ -#define MLXSW_SP_KVD_HASH_SINGLE_SIZE 163840 /* entries */ -#define MLXSW_SP_KVD_HASH_DOUBLE_SIZE 32768 /* entries */ +#define MLXSW_SP_KVD_GRANULARITY 128 /* Maximum delay buffer needed in case of PAUSE frames, in cells. * Assumes 100m cable and maximum MTU. From b8a09f0a0938acee81c386e3c27e7aca78b087eb Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Tue, 20 Sep 2016 11:16:53 +0200 Subject: [PATCH 1309/1715] mlxsw: pci: Add max virtual routers resource Add the max number of virtual routers to resource query. Signed-off-by: Nogah Frankel Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core.h | 4 +++- drivers/net/ethernet/mellanox/mlxsw/pci.c | 5 +++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 1193bdcc4407..e936dc9bdb9b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -272,13 +272,15 @@ struct mlxsw_resources { max_ports_in_lag_valid:1, kvd_size_valid:1, kvd_single_min_size_valid:1, - kvd_double_min_size_valid:1; + kvd_double_min_size_valid:1, + max_virtual_routers_valid:1; u8 max_span; u8 max_lag; u8 max_ports_in_lag; u32 kvd_size; u32 kvd_single_min_size; u32 kvd_double_min_size; + u16 max_virtual_routers; /* Internal resources. * Determined by the SW, not queried from the HW. diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index c2d2fa1a1d3a..cb95f9a07200 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -1161,6 +1161,7 @@ mlxsw_pci_config_profile_swid_config(struct mlxsw_pci *mlxsw_pci, #define MLXSW_KVD_SIZE_ID 0x1001 #define MLXSW_KVD_SINGLE_MIN_SIZE_ID 0x1002 #define MLXSW_KVD_DOUBLE_MIN_SIZE_ID 0x1003 +#define MLXSW_MAX_VIRTUAL_ROUTERS_ID 0x2C01 #define MLXSW_RESOURCES_QUERY_MAX_QUERIES 100 #define MLXSW_RESOURCES_PER_QUERY 32 @@ -1192,6 +1193,10 @@ static void mlxsw_pci_resources_query_parse(int id, u64 val, resources->kvd_double_min_size = val; resources->kvd_double_min_size_valid = 1; break; + case MLXSW_MAX_VIRTUAL_ROUTERS_ID: + resources->max_virtual_routers = val; + resources->max_virtual_routers_valid = 1; + break; default: break; } From 9497c042bfa9012cdefaddd8588b031bfae59a7c Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Tue, 20 Sep 2016 11:16:54 +0200 Subject: [PATCH 1310/1715] mlxsw: spectrum: Implement max virtual routers resource Replace max virtual routers const with the result from the resource query. Signed-off-by: Nogah Frankel Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.h | 4 +- .../ethernet/mellanox/mlxsw/spectrum_router.c | 43 ++++++++++++++++--- 2 files changed, 38 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 208dfeedde44..352079ef96b6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -64,8 +64,6 @@ #define MLXSW_SP_LPM_TREE_MAX 22 #define MLXSW_SP_LPM_TREE_COUNT (MLXSW_SP_LPM_TREE_MAX - MLXSW_SP_LPM_TREE_MIN) -#define MLXSW_SP_VIRTUAL_ROUTER_MAX 256 - #define MLXSW_SP_PORT_BASE_SPEED 25000 /* Mb/s */ #define MLXSW_SP_BYTES_PER_CELL 96 @@ -249,7 +247,7 @@ struct mlxsw_sp_port_mall_tc_entry { struct mlxsw_sp_router { struct mlxsw_sp_lpm_tree lpm_trees[MLXSW_SP_LPM_TREE_COUNT]; - struct mlxsw_sp_vr vrs[MLXSW_SP_VIRTUAL_ROUTER_MAX]; + struct mlxsw_sp_vr *vrs; struct rhashtable neigh_ht; struct { struct delayed_work dw; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 4afb49854eec..ded19f00ee72 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -372,10 +372,12 @@ static void mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp) static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp) { + struct mlxsw_resources *resources; struct mlxsw_sp_vr *vr; int i; - for (i = 0; i < MLXSW_SP_VIRTUAL_ROUTER_MAX; i++) { + resources = mlxsw_core_resources_get(mlxsw_sp->core); + for (i = 0; i < resources->max_virtual_routers; i++) { vr = &mlxsw_sp->router.vrs[i]; if (!vr->used) return vr; @@ -417,11 +419,14 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id, enum mlxsw_sp_l3proto proto) { + struct mlxsw_resources *resources; struct mlxsw_sp_vr *vr; int i; tb_id = mlxsw_sp_fix_tb_id(tb_id); - for (i = 0; i < MLXSW_SP_VIRTUAL_ROUTER_MAX; i++) { + + resources = mlxsw_core_resources_get(mlxsw_sp->core); + for (i = 0; i < resources->max_virtual_routers; i++) { vr = &mlxsw_sp->router.vrs[i]; if (vr->used && vr->proto == proto && vr->tb_id == tb_id) return vr; @@ -555,15 +560,33 @@ static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr) &vr->fib->prefix_usage); } -static void mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp) +static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp) { + struct mlxsw_resources *resources; struct mlxsw_sp_vr *vr; int i; - for (i = 0; i < MLXSW_SP_VIRTUAL_ROUTER_MAX; i++) { + resources = mlxsw_core_resources_get(mlxsw_sp->core); + if (!resources->max_virtual_routers_valid) + return -EIO; + + mlxsw_sp->router.vrs = kcalloc(resources->max_virtual_routers, + sizeof(struct mlxsw_sp_vr), + GFP_KERNEL); + if (!mlxsw_sp->router.vrs) + return -ENOMEM; + + for (i = 0; i < resources->max_virtual_routers; i++) { vr = &mlxsw_sp->router.vrs[i]; vr->id = i; } + + return 0; +} + +static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp) +{ + kfree(mlxsw_sp->router.vrs); } struct mlxsw_sp_neigh_key { @@ -1523,14 +1546,21 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) err = __mlxsw_sp_router_init(mlxsw_sp); if (err) return err; + mlxsw_sp_lpm_init(mlxsw_sp); - mlxsw_sp_vrs_init(mlxsw_sp); - err = mlxsw_sp_neigh_init(mlxsw_sp); + err = mlxsw_sp_vrs_init(mlxsw_sp); + if (err) + goto err_vrs_init; + + err = mlxsw_sp_neigh_init(mlxsw_sp); if (err) goto err_neigh_init; + return 0; err_neigh_init: + mlxsw_sp_vrs_fini(mlxsw_sp); +err_vrs_init: __mlxsw_sp_router_fini(mlxsw_sp); return err; } @@ -1538,6 +1568,7 @@ err_neigh_init: void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) { mlxsw_sp_neigh_fini(mlxsw_sp); + mlxsw_sp_vrs_fini(mlxsw_sp); __mlxsw_sp_router_fini(mlxsw_sp); } From e44d49cbbc5fb1a310b71212acc4e1f378a8fd91 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Tue, 20 Sep 2016 11:16:55 +0200 Subject: [PATCH 1311/1715] mlxsw: pci: Add some miscellaneous resources Add max system ports, max regions and max vlan groups to resource query. Signed-off-by: Nogah Frankel Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core.h | 10 ++++++++-- drivers/net/ethernet/mellanox/mlxsw/pci.c | 15 +++++++++++++++ drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 6 ------ 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index e936dc9bdb9b..097e56014c0c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -267,13 +267,16 @@ struct mlxsw_driver { }; struct mlxsw_resources { - u8 max_span_valid:1, + u32 max_span_valid:1, max_lag_valid:1, max_ports_in_lag_valid:1, kvd_size_valid:1, kvd_single_min_size_valid:1, kvd_double_min_size_valid:1, - max_virtual_routers_valid:1; + max_virtual_routers_valid:1, + max_system_ports_valid:1, + max_vlan_groups_valid:1, + max_regions_valid:1; u8 max_span; u8 max_lag; u8 max_ports_in_lag; @@ -281,6 +284,9 @@ struct mlxsw_resources { u32 kvd_single_min_size; u32 kvd_double_min_size; u16 max_virtual_routers; + u16 max_system_ports; + u16 max_vlan_groups; + u16 max_regions; /* Internal resources. * Determined by the SW, not queried from the HW. diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index cb95f9a07200..826b5020c01a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -1162,6 +1162,9 @@ mlxsw_pci_config_profile_swid_config(struct mlxsw_pci *mlxsw_pci, #define MLXSW_KVD_SINGLE_MIN_SIZE_ID 0x1002 #define MLXSW_KVD_DOUBLE_MIN_SIZE_ID 0x1003 #define MLXSW_MAX_VIRTUAL_ROUTERS_ID 0x2C01 +#define MLXSW_MAX_SYSTEM_PORT_ID 0x2502 +#define MLXSW_MAX_VLAN_GROUPS_ID 0x2906 +#define MLXSW_MAX_REGIONS_ID 0x2901 #define MLXSW_RESOURCES_QUERY_MAX_QUERIES 100 #define MLXSW_RESOURCES_PER_QUERY 32 @@ -1197,6 +1200,18 @@ static void mlxsw_pci_resources_query_parse(int id, u64 val, resources->max_virtual_routers = val; resources->max_virtual_routers_valid = 1; break; + case MLXSW_MAX_SYSTEM_PORT_ID: + resources->max_system_ports = val; + resources->max_system_ports_valid = 1; + break; + case MLXSW_MAX_VLAN_GROUPS_ID: + resources->max_vlan_groups = val; + resources->max_vlan_groups_valid = 1; + break; + case MLXSW_MAX_REGIONS_ID: + resources->max_regions = val; + resources->max_regions_valid = 1; + break; default: break; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index d8e3da2f0667..9acc3e022543 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -3040,12 +3040,6 @@ static struct mlxsw_config_profile mlxsw_sp_config_profile = { .max_mid = MLXSW_SP_MID_MAX, .used_max_pgt = 1, .max_pgt = 0, - .used_max_system_port = 1, - .max_system_port = 64, - .used_max_vlan_groups = 1, - .max_vlan_groups = 127, - .used_max_regions = 1, - .max_regions = 400, .used_flood_tables = 1, .used_flood_mode = 1, .flood_mode = 3, From 274df7fb77fffd243336c6affa2f9469e8f11122 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Tue, 20 Sep 2016 11:16:56 +0200 Subject: [PATCH 1312/1715] mlxsw: pci: Add max router interface resource Add the max number of rif (router interfaces) to resource query. Signed-off-by: Nogah Frankel Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core.h | 4 +++- drivers/net/ethernet/mellanox/mlxsw/pci.c | 5 +++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 097e56014c0c..c4f550b6f783 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -276,7 +276,8 @@ struct mlxsw_resources { max_virtual_routers_valid:1, max_system_ports_valid:1, max_vlan_groups_valid:1, - max_regions_valid:1; + max_regions_valid:1, + max_rif_valid:1; u8 max_span; u8 max_lag; u8 max_ports_in_lag; @@ -287,6 +288,7 @@ struct mlxsw_resources { u16 max_system_ports; u16 max_vlan_groups; u16 max_regions; + u16 max_rif; /* Internal resources. * Determined by the SW, not queried from the HW. diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 826b5020c01a..e742bd4e8894 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -1165,6 +1165,7 @@ mlxsw_pci_config_profile_swid_config(struct mlxsw_pci *mlxsw_pci, #define MLXSW_MAX_SYSTEM_PORT_ID 0x2502 #define MLXSW_MAX_VLAN_GROUPS_ID 0x2906 #define MLXSW_MAX_REGIONS_ID 0x2901 +#define MLXSW_MAX_RIF_ID 0x2C02 #define MLXSW_RESOURCES_QUERY_MAX_QUERIES 100 #define MLXSW_RESOURCES_PER_QUERY 32 @@ -1212,6 +1213,10 @@ static void mlxsw_pci_resources_query_parse(int id, u64 val, resources->max_regions = val; resources->max_regions_valid = 1; break; + case MLXSW_MAX_RIF_ID: + resources->max_rif = val; + resources->max_rif_valid = 1; + break; default: break; } From 8f8a62d462492a043349a08abc51e2ad65b1f49a Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Tue, 20 Sep 2016 11:16:57 +0200 Subject: [PATCH 1313/1715] mlxsw: spectrum: Implement max rif resource Replace max rif const with using the result from resource query. Signed-off-by: Nogah Frankel Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.c | 13 ++++---- .../net/ethernet/mellanox/mlxsw/spectrum.h | 9 ++++-- .../ethernet/mellanox/mlxsw/spectrum_router.c | 31 +++++++++++++++++-- 3 files changed, 41 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 9acc3e022543..80f27b504444 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -3017,7 +3017,6 @@ err_rx_listener_register: static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) { struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); - int i; mlxsw_sp_ports_remove(mlxsw_sp); mlxsw_sp_span_fini(mlxsw_sp); @@ -3029,8 +3028,6 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) mlxsw_sp_event_unregister(mlxsw_sp, MLXSW_TRAP_ID_PUDE); WARN_ON(!list_empty(&mlxsw_sp->vfids.list)); WARN_ON(!list_empty(&mlxsw_sp->fids)); - for (i = 0; i < MLXSW_SP_RIF_MAX; i++) - WARN_ON_ONCE(mlxsw_sp->rifs[i]); } static struct mlxsw_config_profile mlxsw_sp_config_profile = { @@ -3172,13 +3169,15 @@ static bool mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *r, static int mlxsw_sp_avail_rif_get(struct mlxsw_sp *mlxsw_sp) { + struct mlxsw_resources *resources; int i; - for (i = 0; i < MLXSW_SP_RIF_MAX; i++) + resources = mlxsw_core_resources_get(mlxsw_sp->core); + for (i = 0; i < resources->max_rif; i++) if (!mlxsw_sp->rifs[i]) return i; - return MLXSW_SP_RIF_MAX; + return MLXSW_SP_INVALID_RIF; } static void mlxsw_sp_vport_rif_sp_attr_get(struct mlxsw_sp_port *mlxsw_sp_vport, @@ -3258,7 +3257,7 @@ mlxsw_sp_vport_rif_sp_create(struct mlxsw_sp_port *mlxsw_sp_vport, int err; rif = mlxsw_sp_avail_rif_get(mlxsw_sp); - if (rif == MLXSW_SP_RIF_MAX) + if (rif == MLXSW_SP_INVALID_RIF) return ERR_PTR(-ERANGE); err = mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, l3_dev, rif, true); @@ -3490,7 +3489,7 @@ static int mlxsw_sp_rif_bridge_create(struct mlxsw_sp *mlxsw_sp, int err; rif = mlxsw_sp_avail_rif_get(mlxsw_sp); - if (rif == MLXSW_SP_RIF_MAX) + if (rif == MLXSW_SP_INVALID_RIF) return -ERANGE; err = mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, true); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 352079ef96b6..73cae211a5ce 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -54,7 +54,7 @@ #define MLXSW_SP_VFID_MAX 6656 /* Bridged VLAN interfaces */ #define MLXSW_SP_RFID_BASE 15360 -#define MLXSW_SP_RIF_MAX 800 +#define MLXSW_SP_INVALID_RIF 0xffff #define MLXSW_SP_MID_MAX 7000 @@ -269,7 +269,7 @@ struct mlxsw_sp { DECLARE_BITMAP(mapped, MLXSW_SP_MID_MAX); } br_mids; struct list_head fids; /* VLAN-aware bridge FIDs */ - struct mlxsw_sp_rif *rifs[MLXSW_SP_RIF_MAX]; + struct mlxsw_sp_rif **rifs; struct mlxsw_sp_port **ports; struct mlxsw_core *core; const struct mlxsw_bus_info *bus_info; @@ -477,9 +477,12 @@ static inline struct mlxsw_sp_rif * mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp, const struct net_device *dev) { + struct mlxsw_resources *resources; int i; - for (i = 0; i < MLXSW_SP_RIF_MAX; i++) + resources = mlxsw_core_resources_get(mlxsw_sp->core); + + for (i = 0; i < resources->max_rif; i++) if (mlxsw_sp->rifs[i] && mlxsw_sp->rifs[i]->dev == dev) return mlxsw_sp->rifs[i]; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index ded19f00ee72..cc653ac2d7d6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1522,19 +1522,46 @@ static void mlxsw_sp_nexthop_group_put(struct mlxsw_sp *mlxsw_sp, static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) { + struct mlxsw_resources *resources; char rgcr_pl[MLXSW_REG_RGCR_LEN]; + int err; + + resources = mlxsw_core_resources_get(mlxsw_sp->core); + if (!resources->max_rif_valid) + return -EIO; + + mlxsw_sp->rifs = kcalloc(resources->max_rif, + sizeof(struct mlxsw_sp_rif *), GFP_KERNEL); + if (!mlxsw_sp->rifs) + return -ENOMEM; mlxsw_reg_rgcr_pack(rgcr_pl, true); - mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, MLXSW_SP_RIF_MAX); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); + mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, resources->max_rif); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); + if (err) + goto err_rgcr_fail; + + return 0; + +err_rgcr_fail: + kfree(mlxsw_sp->rifs); + return err; } static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) { + struct mlxsw_resources *resources; char rgcr_pl[MLXSW_REG_RGCR_LEN]; + int i; mlxsw_reg_rgcr_pack(rgcr_pl, false); mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); + + resources = mlxsw_core_resources_get(mlxsw_sp->core); + for (i = 0; i < resources->max_rif; i++) + WARN_ON_ONCE(mlxsw_sp->rifs[i]); + + kfree(mlxsw_sp->rifs); } int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) From c2f672fc94642bae96821a393f342edcfa9794a6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 20 Sep 2016 15:45:26 +0200 Subject: [PATCH 1314/1715] xfrm: state lookup can be lockless This is called from the packet input path, we get lock contention if many cpus handle ipsec in parallel. After recent rcu conversion it is safe to call __xfrm_state_lookup without the spinlock. Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index ba8bf518ba14..a38fdead38ea 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1431,9 +1431,9 @@ xfrm_state_lookup(struct net *net, u32 mark, const xfrm_address_t *daddr, __be32 { struct xfrm_state *x; - spin_lock_bh(&net->xfrm.xfrm_state_lock); + rcu_read_lock(); x = __xfrm_state_lookup(net, mark, daddr, spi, proto, family); - spin_unlock_bh(&net->xfrm.xfrm_state_lock); + rcu_read_unlock(); return x; } EXPORT_SYMBOL(xfrm_state_lookup); From 332ae8e2f6ecda5e50c5c62ed62894963e3a83f5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:43:53 +0100 Subject: [PATCH 1315/1715] net: cls_bpf: add hardware offload This patch adds hardware offload capability to cls_bpf classifier, similar to what have been done with U32 and flower. Signed-off-by: Jakub Kicinski Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ include/net/pkt_cls.h | 14 ++++++++ net/sched/cls_bpf.c | 70 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 86 insertions(+) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a10d8d18ce19..69f242c71865 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -789,6 +789,7 @@ enum { TC_SETUP_CLSU32, TC_SETUP_CLSFLOWER, TC_SETUP_MATCHALL, + TC_SETUP_CLSBPF, }; struct tc_cls_u32_offload; @@ -800,6 +801,7 @@ struct tc_to_netdev { struct tc_cls_u32_offload *cls_u32; struct tc_cls_flower_offload *cls_flower; struct tc_cls_matchall_offload *cls_mall; + struct tc_cls_bpf_offload *cls_bpf; }; }; diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index a459be5fe1c2..41e8071dff87 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -486,4 +486,18 @@ struct tc_cls_matchall_offload { unsigned long cookie; }; +enum tc_clsbpf_command { + TC_CLSBPF_ADD, + TC_CLSBPF_REPLACE, + TC_CLSBPF_DESTROY, +}; + +struct tc_cls_bpf_offload { + enum tc_clsbpf_command command; + struct tcf_exts *exts; + struct bpf_prog *prog; + const char *name; + bool exts_integrated; +}; + #endif diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index c6f7a47541eb..6523c5b4c0a5 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -39,6 +39,7 @@ struct cls_bpf_prog { struct list_head link; struct tcf_result res; bool exts_integrated; + bool offloaded; struct tcf_exts exts; u32 handle; union { @@ -138,6 +139,71 @@ static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog) return !prog->bpf_ops; } +static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, + enum tc_clsbpf_command cmd) +{ + struct net_device *dev = tp->q->dev_queue->dev; + struct tc_cls_bpf_offload bpf_offload = {}; + struct tc_to_netdev offload; + + offload.type = TC_SETUP_CLSBPF; + offload.cls_bpf = &bpf_offload; + + bpf_offload.command = cmd; + bpf_offload.exts = &prog->exts; + bpf_offload.prog = prog->filter; + bpf_offload.name = prog->bpf_name; + bpf_offload.exts_integrated = prog->exts_integrated; + + return dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, + tp->protocol, &offload); +} + +static void cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog, + struct cls_bpf_prog *oldprog) +{ + struct net_device *dev = tp->q->dev_queue->dev; + struct cls_bpf_prog *obj = prog; + enum tc_clsbpf_command cmd; + + if (oldprog && oldprog->offloaded) { + if (tc_should_offload(dev, tp, 0)) { + cmd = TC_CLSBPF_REPLACE; + } else { + obj = oldprog; + cmd = TC_CLSBPF_DESTROY; + } + } else { + if (!tc_should_offload(dev, tp, 0)) + return; + cmd = TC_CLSBPF_ADD; + } + + if (cls_bpf_offload_cmd(tp, obj, cmd)) + return; + + obj->offloaded = true; + if (oldprog) + oldprog->offloaded = false; +} + +static void cls_bpf_stop_offload(struct tcf_proto *tp, + struct cls_bpf_prog *prog) +{ + int err; + + if (!prog->offloaded) + return; + + err = cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY); + if (err) { + pr_err("Stopping hardware offload failed: %d\n", err); + return; + } + + prog->offloaded = false; +} + static int cls_bpf_init(struct tcf_proto *tp) { struct cls_bpf_head *head; @@ -177,6 +243,7 @@ static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg) { struct cls_bpf_prog *prog = (struct cls_bpf_prog *) arg; + cls_bpf_stop_offload(tp, prog); list_del_rcu(&prog->link); tcf_unbind_filter(tp, &prog->res); call_rcu(&prog->rcu, __cls_bpf_delete_prog); @@ -193,6 +260,7 @@ static bool cls_bpf_destroy(struct tcf_proto *tp, bool force) return false; list_for_each_entry_safe(prog, tmp, &head->plist, link) { + cls_bpf_stop_offload(tp, prog); list_del_rcu(&prog->link); tcf_unbind_filter(tp, &prog->res); call_rcu(&prog->rcu, __cls_bpf_delete_prog); @@ -415,6 +483,8 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, if (ret < 0) goto errout; + cls_bpf_offload(tp, prog, oldprog); + if (oldprog) { list_replace_rcu(&oldprog->link, &prog->link); tcf_unbind_filter(tp, &oldprog->res); From 0d01d45f1b251448590c710baa32f722e43c62c7 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:43:54 +0100 Subject: [PATCH 1316/1715] net: cls_bpf: limit hardware offload by software-only flag Add cls_bpf support for the TCA_CLS_FLAGS_SKIP_HW flag. Unlike U32 and flower cls_bpf already has some netlink flags defined. Create a new attribute to be able to use the same flag values as the above. Unlike U32 and flower reject unknown flags. Signed-off-by: Jakub Kicinski Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 1 + include/uapi/linux/pkt_cls.h | 1 + net/sched/cls_bpf.c | 22 ++++++++++++++++++++-- 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 41e8071dff87..57af9f3032ff 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -498,6 +498,7 @@ struct tc_cls_bpf_offload { struct bpf_prog *prog; const char *name; bool exts_integrated; + u32 gen_flags; }; #endif diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 8915b61bbf83..8fd715f806a2 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -396,6 +396,7 @@ enum { TCA_BPF_FD, TCA_BPF_NAME, TCA_BPF_FLAGS, + TCA_BPF_FLAGS_GEN, __TCA_BPF_MAX, }; diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 6523c5b4c0a5..ebf01f7c1470 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -27,6 +27,8 @@ MODULE_AUTHOR("Daniel Borkmann "); MODULE_DESCRIPTION("TC BPF based classifier"); #define CLS_BPF_NAME_LEN 256 +#define CLS_BPF_SUPPORTED_GEN_FLAGS \ + TCA_CLS_FLAGS_SKIP_HW struct cls_bpf_head { struct list_head plist; @@ -40,6 +42,7 @@ struct cls_bpf_prog { struct tcf_result res; bool exts_integrated; bool offloaded; + u32 gen_flags; struct tcf_exts exts; u32 handle; union { @@ -55,6 +58,7 @@ struct cls_bpf_prog { static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = { [TCA_BPF_CLASSID] = { .type = NLA_U32 }, [TCA_BPF_FLAGS] = { .type = NLA_U32 }, + [TCA_BPF_FLAGS_GEN] = { .type = NLA_U32 }, [TCA_BPF_FD] = { .type = NLA_U32 }, [TCA_BPF_NAME] = { .type = NLA_NUL_STRING, .len = CLS_BPF_NAME_LEN }, @@ -154,6 +158,7 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, bpf_offload.prog = prog->filter; bpf_offload.name = prog->bpf_name; bpf_offload.exts_integrated = prog->exts_integrated; + bpf_offload.gen_flags = prog->gen_flags; return dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &offload); @@ -167,14 +172,14 @@ static void cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog, enum tc_clsbpf_command cmd; if (oldprog && oldprog->offloaded) { - if (tc_should_offload(dev, tp, 0)) { + if (tc_should_offload(dev, tp, prog->gen_flags)) { cmd = TC_CLSBPF_REPLACE; } else { obj = oldprog; cmd = TC_CLSBPF_DESTROY; } } else { - if (!tc_should_offload(dev, tp, 0)) + if (!tc_should_offload(dev, tp, prog->gen_flags)) return; cmd = TC_CLSBPF_ADD; } @@ -370,6 +375,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, { bool is_bpf, is_ebpf, have_exts = false; struct tcf_exts exts; + u32 gen_flags = 0; int ret; is_bpf = tb[TCA_BPF_OPS_LEN] && tb[TCA_BPF_OPS]; @@ -394,8 +400,17 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT; } + if (tb[TCA_BPF_FLAGS_GEN]) { + gen_flags = nla_get_u32(tb[TCA_BPF_FLAGS_GEN]); + if (gen_flags & ~CLS_BPF_SUPPORTED_GEN_FLAGS || + !tc_flags_valid(gen_flags)) { + ret = -EINVAL; + goto errout; + } + } prog->exts_integrated = have_exts; + prog->gen_flags = gen_flags; ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) : cls_bpf_prog_from_efd(tb, prog, tp); @@ -568,6 +583,9 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, bpf_flags |= TCA_BPF_FLAG_ACT_DIRECT; if (bpf_flags && nla_put_u32(skb, TCA_BPF_FLAGS, bpf_flags)) goto nla_put_failure; + if (prog->gen_flags && + nla_put_u32(skb, TCA_BPF_FLAGS_GEN, prog->gen_flags)) + goto nla_put_failure; nla_nest_end(skb, nest); From eadb41489fd2249e71fd14b36fb488ed7217ca4b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:43:55 +0100 Subject: [PATCH 1317/1715] net: cls_bpf: add support for marking filters as hardware-only Add cls_bpf support for the TCA_CLS_FLAGS_SKIP_SW flag. Signed-off-by: Jakub Kicinski Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/sched/cls_bpf.c | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index ebf01f7c1470..1becc2fe1bc5 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -28,7 +28,7 @@ MODULE_DESCRIPTION("TC BPF based classifier"); #define CLS_BPF_NAME_LEN 256 #define CLS_BPF_SUPPORTED_GEN_FLAGS \ - TCA_CLS_FLAGS_SKIP_HW + (TCA_CLS_FLAGS_SKIP_HW | TCA_CLS_FLAGS_SKIP_SW) struct cls_bpf_head { struct list_head plist; @@ -96,7 +96,9 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, qdisc_skb_cb(skb)->tc_classid = prog->res.classid; - if (at_ingress) { + if (tc_skip_sw(prog->gen_flags)) { + filter_res = prog->exts_integrated ? TC_ACT_UNSPEC : 0; + } else if (at_ingress) { /* It is safe to push/pull even if skb_shared() */ __skb_push(skb, skb->mac_len); bpf_compute_data_end(skb); @@ -164,32 +166,42 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, tp->protocol, &offload); } -static void cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog, - struct cls_bpf_prog *oldprog) +static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog, + struct cls_bpf_prog *oldprog) { struct net_device *dev = tp->q->dev_queue->dev; struct cls_bpf_prog *obj = prog; enum tc_clsbpf_command cmd; + bool skip_sw; + int ret; + + skip_sw = tc_skip_sw(prog->gen_flags) || + (oldprog && tc_skip_sw(oldprog->gen_flags)); if (oldprog && oldprog->offloaded) { if (tc_should_offload(dev, tp, prog->gen_flags)) { cmd = TC_CLSBPF_REPLACE; - } else { + } else if (!tc_skip_sw(prog->gen_flags)) { obj = oldprog; cmd = TC_CLSBPF_DESTROY; + } else { + return -EINVAL; } } else { if (!tc_should_offload(dev, tp, prog->gen_flags)) - return; + return skip_sw ? -EINVAL : 0; cmd = TC_CLSBPF_ADD; } - if (cls_bpf_offload_cmd(tp, obj, cmd)) - return; + ret = cls_bpf_offload_cmd(tp, obj, cmd); + if (ret) + return skip_sw ? ret : 0; obj->offloaded = true; if (oldprog) oldprog->offloaded = false; + + return 0; } static void cls_bpf_stop_offload(struct tcf_proto *tp, @@ -498,7 +510,11 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, if (ret < 0) goto errout; - cls_bpf_offload(tp, prog, oldprog); + ret = cls_bpf_offload(tp, prog, oldprog); + if (ret) { + cls_bpf_delete_prog(tp, prog); + return ret; + } if (oldprog) { list_replace_rcu(&oldprog->link, &prog->link); From 3df126f35f88dc76eea33769f85a3c3bb8ce6c6b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:43:56 +0100 Subject: [PATCH 1318/1715] bpf: don't (ab)use instructions to store state Storing state in reserved fields of instructions makes it impossible to run verifier on programs already marked as read-only. Allocate and use an array of per-instruction state instead. While touching the error path rename and move existing jump target. Suggested-by: Alexei Starovoitov Signed-off-by: Jakub Kicinski Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 70 ++++++++++++++++++++++++------------------- 1 file changed, 40 insertions(+), 30 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 3a75ee3bdcd1..a9542d89f293 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -181,6 +181,10 @@ struct verifier_stack_elem { struct verifier_stack_elem *next; }; +struct bpf_insn_aux_data { + enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ +}; + #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ /* single container for all structs @@ -197,6 +201,7 @@ struct verifier_env { u32 id_gen; /* used to generate unique reg IDs */ bool allow_ptr_leaks; bool seen_direct_write; + struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ }; #define BPF_COMPLEXITY_LIMIT_INSNS 65536 @@ -2344,7 +2349,7 @@ static int do_check(struct verifier_env *env) return err; } else if (class == BPF_LDX) { - enum bpf_reg_type src_reg_type; + enum bpf_reg_type *prev_src_type, src_reg_type; /* check for reserved fields is already done */ @@ -2374,16 +2379,18 @@ static int do_check(struct verifier_env *env) continue; } - if (insn->imm == 0) { + prev_src_type = &env->insn_aux_data[insn_idx].ptr_type; + + if (*prev_src_type == NOT_INIT) { /* saw a valid insn * dst_reg = *(u32 *)(src_reg + off) - * use reserved 'imm' field to mark this insn + * save type to validate intersecting paths */ - insn->imm = src_reg_type; + *prev_src_type = src_reg_type; - } else if (src_reg_type != insn->imm && + } else if (src_reg_type != *prev_src_type && (src_reg_type == PTR_TO_CTX || - insn->imm == PTR_TO_CTX)) { + *prev_src_type == PTR_TO_CTX)) { /* ABuser program is trying to use the same insn * dst_reg = *(u32*) (src_reg + off) * with different pointer types: @@ -2396,7 +2403,7 @@ static int do_check(struct verifier_env *env) } } else if (class == BPF_STX) { - enum bpf_reg_type dst_reg_type; + enum bpf_reg_type *prev_dst_type, dst_reg_type; if (BPF_MODE(insn->code) == BPF_XADD) { err = check_xadd(env, insn); @@ -2424,11 +2431,13 @@ static int do_check(struct verifier_env *env) if (err) return err; - if (insn->imm == 0) { - insn->imm = dst_reg_type; - } else if (dst_reg_type != insn->imm && + prev_dst_type = &env->insn_aux_data[insn_idx].ptr_type; + + if (*prev_dst_type == NOT_INIT) { + *prev_dst_type = dst_reg_type; + } else if (dst_reg_type != *prev_dst_type && (dst_reg_type == PTR_TO_CTX || - insn->imm == PTR_TO_CTX)) { + *prev_dst_type == PTR_TO_CTX)) { verbose("same insn cannot be used with different pointers\n"); return -EINVAL; } @@ -2686,10 +2695,11 @@ static void convert_pseudo_ld_imm64(struct verifier_env *env) static int convert_ctx_accesses(struct verifier_env *env) { const struct bpf_verifier_ops *ops = env->prog->aux->ops; + const int insn_cnt = env->prog->len; struct bpf_insn insn_buf[16], *insn; struct bpf_prog *new_prog; enum bpf_access_type type; - int i, insn_cnt, cnt; + int i, cnt, delta = 0; if (ops->gen_prologue) { cnt = ops->gen_prologue(insn_buf, env->seen_direct_write, @@ -2703,18 +2713,16 @@ static int convert_ctx_accesses(struct verifier_env *env) if (!new_prog) return -ENOMEM; env->prog = new_prog; + delta += cnt - 1; } } if (!ops->convert_ctx_access) return 0; - insn_cnt = env->prog->len; - insn = env->prog->insnsi; + insn = env->prog->insnsi + delta; for (i = 0; i < insn_cnt; i++, insn++) { - u32 insn_delta; - if (insn->code == (BPF_LDX | BPF_MEM | BPF_W) || insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) type = BPF_READ; @@ -2724,11 +2732,8 @@ static int convert_ctx_accesses(struct verifier_env *env) else continue; - if (insn->imm != PTR_TO_CTX) { - /* clear internal mark */ - insn->imm = 0; + if (env->insn_aux_data[i].ptr_type != PTR_TO_CTX) continue; - } cnt = ops->convert_ctx_access(type, insn->dst_reg, insn->src_reg, insn->off, insn_buf, env->prog); @@ -2737,18 +2742,16 @@ static int convert_ctx_accesses(struct verifier_env *env) return -EINVAL; } - new_prog = bpf_patch_insn_single(env->prog, i, insn_buf, cnt); + new_prog = bpf_patch_insn_single(env->prog, i + delta, insn_buf, + cnt); if (!new_prog) return -ENOMEM; - insn_delta = cnt - 1; + delta += cnt - 1; /* keep walking new program and skip insns we just inserted */ env->prog = new_prog; - insn = new_prog->insnsi + i + insn_delta; - - insn_cnt += insn_delta; - i += insn_delta; + insn = new_prog->insnsi + i + delta; } return 0; @@ -2792,6 +2795,11 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) if (!env) return -ENOMEM; + env->insn_aux_data = vzalloc(sizeof(struct bpf_insn_aux_data) * + (*prog)->len); + ret = -ENOMEM; + if (!env->insn_aux_data) + goto err_free_env; env->prog = *prog; /* grab the mutex to protect few globals used by verifier */ @@ -2810,12 +2818,12 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) /* log_* values have to be sane */ if (log_size < 128 || log_size > UINT_MAX >> 8 || log_level == 0 || log_ubuf == NULL) - goto free_env; + goto err_unlock; ret = -ENOMEM; log_buf = vmalloc(log_size); if (!log_buf) - goto free_env; + goto err_unlock; } else { log_level = 0; } @@ -2884,14 +2892,16 @@ skip_full_check: free_log_buf: if (log_level) vfree(log_buf); -free_env: if (!env->prog->aux->used_maps) /* if we didn't copy map pointers into bpf_prog_info, release * them now. Otherwise free_bpf_prog_info() will release them. */ release_maps(env); *prog = env->prog; - kfree(env); +err_unlock: mutex_unlock(&bpf_verifier_lock); + vfree(env->insn_aux_data); +err_free_env: + kfree(env); return ret; } From 58e2af8b3a6b587e4ac8414343581da4349d3c0f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:43:57 +0100 Subject: [PATCH 1319/1715] bpf: expose internal verfier structures Move verifier's internal structures to a header file and prefix their names with bpf_ to avoid potential namespace conflicts. Those structures will soon be used by external analyzers. Signed-off-by: Jakub Kicinski Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf_verifier.h | 79 +++++++++++ kernel/bpf/verifier.c | 266 ++++++++++++++--------------------- 2 files changed, 182 insertions(+), 163 deletions(-) create mode 100644 include/linux/bpf_verifier.h diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h new file mode 100644 index 000000000000..9457a22fc6e0 --- /dev/null +++ b/include/linux/bpf_verifier.h @@ -0,0 +1,79 @@ +/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#ifndef _LINUX_BPF_VERIFIER_H +#define _LINUX_BPF_VERIFIER_H 1 + +#include /* for enum bpf_reg_type */ +#include /* for MAX_BPF_STACK */ + +struct bpf_reg_state { + enum bpf_reg_type type; + union { + /* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */ + s64 imm; + + /* valid when type == PTR_TO_PACKET* */ + struct { + u32 id; + u16 off; + u16 range; + }; + + /* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE | + * PTR_TO_MAP_VALUE_OR_NULL + */ + struct bpf_map *map_ptr; + }; +}; + +enum bpf_stack_slot_type { + STACK_INVALID, /* nothing was stored in this stack slot */ + STACK_SPILL, /* register spilled into stack */ + STACK_MISC /* BPF program wrote some data into this slot */ +}; + +#define BPF_REG_SIZE 8 /* size of eBPF register in bytes */ + +/* state of the program: + * type of all registers and stack info + */ +struct bpf_verifier_state { + struct bpf_reg_state regs[MAX_BPF_REG]; + u8 stack_slot_type[MAX_BPF_STACK]; + struct bpf_reg_state spilled_regs[MAX_BPF_STACK / BPF_REG_SIZE]; +}; + +/* linked list of verifier states used to prune search */ +struct bpf_verifier_state_list { + struct bpf_verifier_state state; + struct bpf_verifier_state_list *next; +}; + +struct bpf_insn_aux_data { + enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ +}; + +#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ + +/* single container for all structs + * one verifier_env per bpf_check() call + */ +struct bpf_verifier_env { + struct bpf_prog *prog; /* eBPF program being verified */ + struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */ + int stack_size; /* number of states to be processed */ + struct bpf_verifier_state cur_state; /* current verifier state */ + struct bpf_verifier_state_list **explored_states; /* search pruning optimization */ + struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */ + u32 used_map_cnt; /* number of used maps */ + u32 id_gen; /* used to generate unique reg IDs */ + bool allow_ptr_leaks; + bool seen_direct_write; + struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ +}; + +#endif /* _LINUX_BPF_VERIFIER_H */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a9542d89f293..dca2b9b1d02e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -126,82 +127,16 @@ * are set to NOT_INIT to indicate that they are no longer readable. */ -struct reg_state { - enum bpf_reg_type type; - union { - /* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */ - s64 imm; - - /* valid when type == PTR_TO_PACKET* */ - struct { - u32 id; - u16 off; - u16 range; - }; - - /* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE | - * PTR_TO_MAP_VALUE_OR_NULL - */ - struct bpf_map *map_ptr; - }; -}; - -enum bpf_stack_slot_type { - STACK_INVALID, /* nothing was stored in this stack slot */ - STACK_SPILL, /* register spilled into stack */ - STACK_MISC /* BPF program wrote some data into this slot */ -}; - -#define BPF_REG_SIZE 8 /* size of eBPF register in bytes */ - -/* state of the program: - * type of all registers and stack info - */ -struct verifier_state { - struct reg_state regs[MAX_BPF_REG]; - u8 stack_slot_type[MAX_BPF_STACK]; - struct reg_state spilled_regs[MAX_BPF_STACK / BPF_REG_SIZE]; -}; - -/* linked list of verifier states used to prune search */ -struct verifier_state_list { - struct verifier_state state; - struct verifier_state_list *next; -}; - /* verifier_state + insn_idx are pushed to stack when branch is encountered */ -struct verifier_stack_elem { +struct bpf_verifier_stack_elem { /* verifer state is 'st' * before processing instruction 'insn_idx' * and after processing instruction 'prev_insn_idx' */ - struct verifier_state st; + struct bpf_verifier_state st; int insn_idx; int prev_insn_idx; - struct verifier_stack_elem *next; -}; - -struct bpf_insn_aux_data { - enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ -}; - -#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ - -/* single container for all structs - * one verifier_env per bpf_check() call - */ -struct verifier_env { - struct bpf_prog *prog; /* eBPF program being verified */ - struct verifier_stack_elem *head; /* stack of verifier states to be processed */ - int stack_size; /* number of states to be processed */ - struct verifier_state cur_state; /* current verifier state */ - struct verifier_state_list **explored_states; /* search pruning optimization */ - struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */ - u32 used_map_cnt; /* number of used maps */ - u32 id_gen; /* used to generate unique reg IDs */ - bool allow_ptr_leaks; - bool seen_direct_write; - struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ + struct bpf_verifier_stack_elem *next; }; #define BPF_COMPLEXITY_LIMIT_INSNS 65536 @@ -254,9 +189,9 @@ static const char * const reg_type_str[] = { [PTR_TO_PACKET_END] = "pkt_end", }; -static void print_verifier_state(struct verifier_state *state) +static void print_verifier_state(struct bpf_verifier_state *state) { - struct reg_state *reg; + struct bpf_reg_state *reg; enum bpf_reg_type t; int i; @@ -432,9 +367,9 @@ static void print_bpf_insn(struct bpf_insn *insn) } } -static int pop_stack(struct verifier_env *env, int *prev_insn_idx) +static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx) { - struct verifier_stack_elem *elem; + struct bpf_verifier_stack_elem *elem; int insn_idx; if (env->head == NULL) @@ -451,12 +386,12 @@ static int pop_stack(struct verifier_env *env, int *prev_insn_idx) return insn_idx; } -static struct verifier_state *push_stack(struct verifier_env *env, int insn_idx, - int prev_insn_idx) +static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, + int insn_idx, int prev_insn_idx) { - struct verifier_stack_elem *elem; + struct bpf_verifier_stack_elem *elem; - elem = kmalloc(sizeof(struct verifier_stack_elem), GFP_KERNEL); + elem = kmalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL); if (!elem) goto err; @@ -482,7 +417,7 @@ static const int caller_saved[CALLER_SAVED_REGS] = { BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5 }; -static void init_reg_state(struct reg_state *regs) +static void init_reg_state(struct bpf_reg_state *regs) { int i; @@ -498,7 +433,7 @@ static void init_reg_state(struct reg_state *regs) regs[BPF_REG_1].type = PTR_TO_CTX; } -static void mark_reg_unknown_value(struct reg_state *regs, u32 regno) +static void mark_reg_unknown_value(struct bpf_reg_state *regs, u32 regno) { BUG_ON(regno >= MAX_BPF_REG); regs[regno].type = UNKNOWN_VALUE; @@ -511,7 +446,7 @@ enum reg_arg_type { DST_OP_NO_MARK /* same as above, check only, don't mark */ }; -static int check_reg_arg(struct reg_state *regs, u32 regno, +static int check_reg_arg(struct bpf_reg_state *regs, u32 regno, enum reg_arg_type t) { if (regno >= MAX_BPF_REG) { @@ -571,8 +506,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type) /* check_stack_read/write functions track spill/fill of registers, * stack boundary and alignment are checked in check_mem_access() */ -static int check_stack_write(struct verifier_state *state, int off, int size, - int value_regno) +static int check_stack_write(struct bpf_verifier_state *state, int off, + int size, int value_regno) { int i; /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0, @@ -597,7 +532,7 @@ static int check_stack_write(struct verifier_state *state, int off, int size, } else { /* regular write of data into stack */ state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE] = - (struct reg_state) {}; + (struct bpf_reg_state) {}; for (i = 0; i < size; i++) state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_MISC; @@ -605,7 +540,7 @@ static int check_stack_write(struct verifier_state *state, int off, int size, return 0; } -static int check_stack_read(struct verifier_state *state, int off, int size, +static int check_stack_read(struct bpf_verifier_state *state, int off, int size, int value_regno) { u8 *slot_type; @@ -646,7 +581,7 @@ static int check_stack_read(struct verifier_state *state, int off, int size, } /* check read/write into map element returned by bpf_map_lookup_elem() */ -static int check_map_access(struct verifier_env *env, u32 regno, int off, +static int check_map_access(struct bpf_verifier_env *env, u32 regno, int off, int size) { struct bpf_map *map = env->cur_state.regs[regno].map_ptr; @@ -661,7 +596,7 @@ static int check_map_access(struct verifier_env *env, u32 regno, int off, #define MAX_PACKET_OFF 0xffff -static bool may_access_direct_pkt_data(struct verifier_env *env, +static bool may_access_direct_pkt_data(struct bpf_verifier_env *env, const struct bpf_call_arg_meta *meta) { switch (env->prog->type) { @@ -678,11 +613,11 @@ static bool may_access_direct_pkt_data(struct verifier_env *env, } } -static int check_packet_access(struct verifier_env *env, u32 regno, int off, +static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off, int size) { - struct reg_state *regs = env->cur_state.regs; - struct reg_state *reg = ®s[regno]; + struct bpf_reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *reg = ®s[regno]; off += reg->off; if (off < 0 || size <= 0 || off + size > reg->range) { @@ -694,7 +629,7 @@ static int check_packet_access(struct verifier_env *env, u32 regno, int off, } /* check access to 'struct bpf_context' fields */ -static int check_ctx_access(struct verifier_env *env, int off, int size, +static int check_ctx_access(struct bpf_verifier_env *env, int off, int size, enum bpf_access_type t, enum bpf_reg_type *reg_type) { if (env->prog->aux->ops->is_valid_access && @@ -709,7 +644,7 @@ static int check_ctx_access(struct verifier_env *env, int off, int size, return -EACCES; } -static bool is_pointer_value(struct verifier_env *env, int regno) +static bool is_pointer_value(struct bpf_verifier_env *env, int regno) { if (env->allow_ptr_leaks) return false; @@ -723,12 +658,13 @@ static bool is_pointer_value(struct verifier_env *env, int regno) } } -static int check_ptr_alignment(struct verifier_env *env, struct reg_state *reg, - int off, int size) +static int check_ptr_alignment(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, int off, int size) { if (reg->type != PTR_TO_PACKET) { if (off % size != 0) { - verbose("misaligned access off %d size %d\n", off, size); + verbose("misaligned access off %d size %d\n", + off, size); return -EACCES; } else { return 0; @@ -769,12 +705,12 @@ static int check_ptr_alignment(struct verifier_env *env, struct reg_state *reg, * if t==write && value_regno==-1, some unknown value is stored into memory * if t==read && value_regno==-1, don't care what we read from memory */ -static int check_mem_access(struct verifier_env *env, u32 regno, int off, +static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off, int bpf_size, enum bpf_access_type t, int value_regno) { - struct verifier_state *state = &env->cur_state; - struct reg_state *reg = &state->regs[regno]; + struct bpf_verifier_state *state = &env->cur_state; + struct bpf_reg_state *reg = &state->regs[regno]; int size, err = 0; if (reg->type == PTR_TO_STACK) @@ -860,9 +796,9 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off, return err; } -static int check_xadd(struct verifier_env *env, struct bpf_insn *insn) +static int check_xadd(struct bpf_verifier_env *env, struct bpf_insn *insn) { - struct reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *regs = env->cur_state.regs; int err; if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) || @@ -896,12 +832,12 @@ static int check_xadd(struct verifier_env *env, struct bpf_insn *insn) * bytes from that pointer, make sure that it's within stack boundary * and all elements of stack are initialized */ -static int check_stack_boundary(struct verifier_env *env, int regno, +static int check_stack_boundary(struct bpf_verifier_env *env, int regno, int access_size, bool zero_size_allowed, struct bpf_call_arg_meta *meta) { - struct verifier_state *state = &env->cur_state; - struct reg_state *regs = state->regs; + struct bpf_verifier_state *state = &env->cur_state; + struct bpf_reg_state *regs = state->regs; int off, i; if (regs[regno].type != PTR_TO_STACK) { @@ -940,11 +876,11 @@ static int check_stack_boundary(struct verifier_env *env, int regno, return 0; } -static int check_func_arg(struct verifier_env *env, u32 regno, +static int check_func_arg(struct bpf_verifier_env *env, u32 regno, enum bpf_arg_type arg_type, struct bpf_call_arg_meta *meta) { - struct reg_state *regs = env->cur_state.regs, *reg = ®s[regno]; + struct bpf_reg_state *regs = env->cur_state.regs, *reg = ®s[regno]; enum bpf_reg_type expected_type, type = reg->type; int err = 0; @@ -1149,10 +1085,10 @@ static int check_raw_mode(const struct bpf_func_proto *fn) return count > 1 ? -EINVAL : 0; } -static void clear_all_pkt_pointers(struct verifier_env *env) +static void clear_all_pkt_pointers(struct bpf_verifier_env *env) { - struct verifier_state *state = &env->cur_state; - struct reg_state *regs = state->regs, *reg; + struct bpf_verifier_state *state = &env->cur_state; + struct bpf_reg_state *regs = state->regs, *reg; int i; for (i = 0; i < MAX_BPF_REG; i++) @@ -1172,12 +1108,12 @@ static void clear_all_pkt_pointers(struct verifier_env *env) } } -static int check_call(struct verifier_env *env, int func_id) +static int check_call(struct bpf_verifier_env *env, int func_id) { - struct verifier_state *state = &env->cur_state; + struct bpf_verifier_state *state = &env->cur_state; const struct bpf_func_proto *fn = NULL; - struct reg_state *regs = state->regs; - struct reg_state *reg; + struct bpf_reg_state *regs = state->regs; + struct bpf_reg_state *reg; struct bpf_call_arg_meta meta; bool changes_data; int i, err; @@ -1280,12 +1216,13 @@ static int check_call(struct verifier_env *env, int func_id) return 0; } -static int check_packet_ptr_add(struct verifier_env *env, struct bpf_insn *insn) +static int check_packet_ptr_add(struct bpf_verifier_env *env, + struct bpf_insn *insn) { - struct reg_state *regs = env->cur_state.regs; - struct reg_state *dst_reg = ®s[insn->dst_reg]; - struct reg_state *src_reg = ®s[insn->src_reg]; - struct reg_state tmp_reg; + struct bpf_reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *dst_reg = ®s[insn->dst_reg]; + struct bpf_reg_state *src_reg = ®s[insn->src_reg]; + struct bpf_reg_state tmp_reg; s32 imm; if (BPF_SRC(insn->code) == BPF_K) { @@ -1353,10 +1290,10 @@ add_imm: return 0; } -static int evaluate_reg_alu(struct verifier_env *env, struct bpf_insn *insn) +static int evaluate_reg_alu(struct bpf_verifier_env *env, struct bpf_insn *insn) { - struct reg_state *regs = env->cur_state.regs; - struct reg_state *dst_reg = ®s[insn->dst_reg]; + struct bpf_reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *dst_reg = ®s[insn->dst_reg]; u8 opcode = BPF_OP(insn->code); s64 imm_log2; @@ -1366,7 +1303,7 @@ static int evaluate_reg_alu(struct verifier_env *env, struct bpf_insn *insn) */ if (BPF_SRC(insn->code) == BPF_X) { - struct reg_state *src_reg = ®s[insn->src_reg]; + struct bpf_reg_state *src_reg = ®s[insn->src_reg]; if (src_reg->type == UNKNOWN_VALUE && src_reg->imm > 0 && dst_reg->imm && opcode == BPF_ADD) { @@ -1455,11 +1392,12 @@ static int evaluate_reg_alu(struct verifier_env *env, struct bpf_insn *insn) return 0; } -static int evaluate_reg_imm_alu(struct verifier_env *env, struct bpf_insn *insn) +static int evaluate_reg_imm_alu(struct bpf_verifier_env *env, + struct bpf_insn *insn) { - struct reg_state *regs = env->cur_state.regs; - struct reg_state *dst_reg = ®s[insn->dst_reg]; - struct reg_state *src_reg = ®s[insn->src_reg]; + struct bpf_reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *dst_reg = ®s[insn->dst_reg]; + struct bpf_reg_state *src_reg = ®s[insn->src_reg]; u8 opcode = BPF_OP(insn->code); /* dst_reg->type == CONST_IMM here, simulate execution of 'add' insn. @@ -1476,9 +1414,9 @@ static int evaluate_reg_imm_alu(struct verifier_env *env, struct bpf_insn *insn) } /* check validity of 32-bit and 64-bit arithmetic operations */ -static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn) +static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) { - struct reg_state *regs = env->cur_state.regs, *dst_reg; + struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg; u8 opcode = BPF_OP(insn->code); int err; @@ -1652,10 +1590,10 @@ static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn) return 0; } -static void find_good_pkt_pointers(struct verifier_state *state, - const struct reg_state *dst_reg) +static void find_good_pkt_pointers(struct bpf_verifier_state *state, + struct bpf_reg_state *dst_reg) { - struct reg_state *regs = state->regs, *reg; + struct bpf_reg_state *regs = state->regs, *reg; int i; /* LLVM can generate two kind of checks: @@ -1701,11 +1639,11 @@ static void find_good_pkt_pointers(struct verifier_state *state, } } -static int check_cond_jmp_op(struct verifier_env *env, +static int check_cond_jmp_op(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx) { - struct verifier_state *other_branch, *this_branch = &env->cur_state; - struct reg_state *regs = this_branch->regs, *dst_reg; + struct bpf_verifier_state *other_branch, *this_branch = &env->cur_state; + struct bpf_reg_state *regs = this_branch->regs, *dst_reg; u8 opcode = BPF_OP(insn->code); int err; @@ -1767,7 +1705,7 @@ static int check_cond_jmp_op(struct verifier_env *env, if (!other_branch) return -EFAULT; - /* detect if R == 0 where R is returned value from bpf_map_lookup_elem() */ + /* detect if R == 0 where R is returned from bpf_map_lookup_elem() */ if (BPF_SRC(insn->code) == BPF_K && insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) && dst_reg->type == PTR_TO_MAP_VALUE_OR_NULL) { @@ -1809,9 +1747,9 @@ static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn) } /* verify BPF_LD_IMM64 instruction */ -static int check_ld_imm(struct verifier_env *env, struct bpf_insn *insn) +static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) { - struct reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *regs = env->cur_state.regs; int err; if (BPF_SIZE(insn->code) != BPF_DW) { @@ -1866,11 +1804,11 @@ static bool may_access_skb(enum bpf_prog_type type) * Output: * R0 - 8/16/32-bit skb data converted to cpu endianness */ -static int check_ld_abs(struct verifier_env *env, struct bpf_insn *insn) +static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) { - struct reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *regs = env->cur_state.regs; u8 mode = BPF_MODE(insn->code); - struct reg_state *reg; + struct bpf_reg_state *reg; int i, err; if (!may_access_skb(env->prog->type)) { @@ -1956,7 +1894,7 @@ enum { BRANCH = 2, }; -#define STATE_LIST_MARK ((struct verifier_state_list *) -1L) +#define STATE_LIST_MARK ((struct bpf_verifier_state_list *) -1L) static int *insn_stack; /* stack of insns to process */ static int cur_stack; /* current stack index */ @@ -1967,7 +1905,7 @@ static int *insn_state; * w - next instruction * e - edge */ -static int push_insn(int t, int w, int e, struct verifier_env *env) +static int push_insn(int t, int w, int e, struct bpf_verifier_env *env) { if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH)) return 0; @@ -2008,7 +1946,7 @@ static int push_insn(int t, int w, int e, struct verifier_env *env) /* non-recursive depth-first-search to detect loops in BPF program * loop == back-edge in directed graph */ -static int check_cfg(struct verifier_env *env) +static int check_cfg(struct bpf_verifier_env *env) { struct bpf_insn *insns = env->prog->insnsi; int insn_cnt = env->prog->len; @@ -2117,7 +2055,8 @@ err_free: /* the following conditions reduce the number of explored insns * from ~140k to ~80k for ultra large programs that use a lot of ptr_to_packet */ -static bool compare_ptrs_to_packet(struct reg_state *old, struct reg_state *cur) +static bool compare_ptrs_to_packet(struct bpf_reg_state *old, + struct bpf_reg_state *cur) { if (old->id != cur->id) return false; @@ -2192,9 +2131,10 @@ static bool compare_ptrs_to_packet(struct reg_state *old, struct reg_state *cur) * whereas register type in current state is meaningful, it means that * the current state will reach 'bpf_exit' instruction safely */ -static bool states_equal(struct verifier_state *old, struct verifier_state *cur) +static bool states_equal(struct bpf_verifier_state *old, + struct bpf_verifier_state *cur) { - struct reg_state *rold, *rcur; + struct bpf_reg_state *rold, *rcur; int i; for (i = 0; i < MAX_BPF_REG; i++) { @@ -2234,9 +2174,9 @@ static bool states_equal(struct verifier_state *old, struct verifier_state *cur) * the same, check that stored pointers types * are the same as well. * Ex: explored safe path could have stored - * (struct reg_state) {.type = PTR_TO_STACK, .imm = -8} + * (bpf_reg_state) {.type = PTR_TO_STACK, .imm = -8} * but current path has stored: - * (struct reg_state) {.type = PTR_TO_STACK, .imm = -16} + * (bpf_reg_state) {.type = PTR_TO_STACK, .imm = -16} * such verifier states are not equivalent. * return false to continue verification of this path */ @@ -2247,10 +2187,10 @@ static bool states_equal(struct verifier_state *old, struct verifier_state *cur) return true; } -static int is_state_visited(struct verifier_env *env, int insn_idx) +static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) { - struct verifier_state_list *new_sl; - struct verifier_state_list *sl; + struct bpf_verifier_state_list *new_sl; + struct bpf_verifier_state_list *sl; sl = env->explored_states[insn_idx]; if (!sl) @@ -2274,7 +2214,7 @@ static int is_state_visited(struct verifier_env *env, int insn_idx) * it will be rejected. Since there are no loops, we won't be * seeing this 'insn_idx' instruction again on the way to bpf_exit */ - new_sl = kmalloc(sizeof(struct verifier_state_list), GFP_USER); + new_sl = kmalloc(sizeof(struct bpf_verifier_state_list), GFP_USER); if (!new_sl) return -ENOMEM; @@ -2285,11 +2225,11 @@ static int is_state_visited(struct verifier_env *env, int insn_idx) return 0; } -static int do_check(struct verifier_env *env) +static int do_check(struct bpf_verifier_env *env) { - struct verifier_state *state = &env->cur_state; + struct bpf_verifier_state *state = &env->cur_state; struct bpf_insn *insns = env->prog->insnsi; - struct reg_state *regs = state->regs; + struct bpf_reg_state *regs = state->regs; int insn_cnt = env->prog->len; int insn_idx, prev_insn_idx = 0; int insn_processed = 0; @@ -2572,7 +2512,7 @@ static int check_map_prog_compatibility(struct bpf_map *map, /* look for pseudo eBPF instructions that access map FDs and * replace them with actual map pointers */ -static int replace_map_fd_with_map_ptr(struct verifier_env *env) +static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env) { struct bpf_insn *insn = env->prog->insnsi; int insn_cnt = env->prog->len; @@ -2669,7 +2609,7 @@ next_insn: } /* drop refcnt of maps used by the rejected program */ -static void release_maps(struct verifier_env *env) +static void release_maps(struct bpf_verifier_env *env) { int i; @@ -2678,7 +2618,7 @@ static void release_maps(struct verifier_env *env) } /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */ -static void convert_pseudo_ld_imm64(struct verifier_env *env) +static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env) { struct bpf_insn *insn = env->prog->insnsi; int insn_cnt = env->prog->len; @@ -2692,7 +2632,7 @@ static void convert_pseudo_ld_imm64(struct verifier_env *env) /* convert load instructions that access fields of 'struct __sk_buff' * into sequence of instructions that access fields of 'struct sk_buff' */ -static int convert_ctx_accesses(struct verifier_env *env) +static int convert_ctx_accesses(struct bpf_verifier_env *env) { const struct bpf_verifier_ops *ops = env->prog->aux->ops; const int insn_cnt = env->prog->len; @@ -2757,9 +2697,9 @@ static int convert_ctx_accesses(struct verifier_env *env) return 0; } -static void free_states(struct verifier_env *env) +static void free_states(struct bpf_verifier_env *env) { - struct verifier_state_list *sl, *sln; + struct bpf_verifier_state_list *sl, *sln; int i; if (!env->explored_states) @@ -2782,16 +2722,16 @@ static void free_states(struct verifier_env *env) int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) { char __user *log_ubuf = NULL; - struct verifier_env *env; + struct bpf_verifier_env *env; int ret = -EINVAL; if ((*prog)->len <= 0 || (*prog)->len > BPF_MAXINSNS) return -E2BIG; - /* 'struct verifier_env' can be global, but since it's not small, + /* 'struct bpf_verifier_env' can be global, but since it's not small, * allocate/free it every time bpf_check() is called */ - env = kzalloc(sizeof(struct verifier_env), GFP_KERNEL); + env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL); if (!env) return -ENOMEM; @@ -2833,7 +2773,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) goto skip_full_check; env->explored_states = kcalloc(env->prog->len, - sizeof(struct verifier_state_list *), + sizeof(struct bpf_verifier_state_list *), GFP_USER); ret = -ENOMEM; if (!env->explored_states) From 13a27dfc669724564aafa2699976ee756029fed2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:43:58 +0100 Subject: [PATCH 1320/1715] bpf: enable non-core use of the verfier Advanced JIT compilers and translators may want to use eBPF verifier as a base for parsers or to perform custom checks and validations. Add ability for external users to invoke the verifier and provide callbacks to be invoked for every intruction checked. For now only add most basic callback for per-instruction pre-interpretation checks is added. More advanced users may also like to have per-instruction post callback and state comparison callback. Signed-off-by: Jakub Kicinski Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf_verifier.h | 11 ++++++ kernel/bpf/verifier.c | 68 ++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 9457a22fc6e0..c5cb661712c9 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -59,6 +59,12 @@ struct bpf_insn_aux_data { #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ +struct bpf_verifier_env; +struct bpf_ext_analyzer_ops { + int (*insn_hook)(struct bpf_verifier_env *env, + int insn_idx, int prev_insn_idx); +}; + /* single container for all structs * one verifier_env per bpf_check() call */ @@ -68,6 +74,8 @@ struct bpf_verifier_env { int stack_size; /* number of states to be processed */ struct bpf_verifier_state cur_state; /* current verifier state */ struct bpf_verifier_state_list **explored_states; /* search pruning optimization */ + const struct bpf_ext_analyzer_ops *analyzer_ops; /* external analyzer ops */ + void *analyzer_priv; /* pointer to external analyzer's private data */ struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */ u32 used_map_cnt; /* number of used maps */ u32 id_gen; /* used to generate unique reg IDs */ @@ -76,4 +84,7 @@ struct bpf_verifier_env { struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ }; +int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops, + void *priv); + #endif /* _LINUX_BPF_VERIFIER_H */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index dca2b9b1d02e..ee86a77dc40b 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -632,6 +632,10 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off, static int check_ctx_access(struct bpf_verifier_env *env, int off, int size, enum bpf_access_type t, enum bpf_reg_type *reg_type) { + /* for analyzer ctx accesses are already validated and converted */ + if (env->analyzer_ops) + return 0; + if (env->prog->aux->ops->is_valid_access && env->prog->aux->ops->is_valid_access(off, size, t, reg_type)) { /* remember the offset of last byte accessed in ctx */ @@ -2225,6 +2229,15 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) return 0; } +static int ext_analyzer_insn_hook(struct bpf_verifier_env *env, + int insn_idx, int prev_insn_idx) +{ + if (!env->analyzer_ops || !env->analyzer_ops->insn_hook) + return 0; + + return env->analyzer_ops->insn_hook(env, insn_idx, prev_insn_idx); +} + static int do_check(struct bpf_verifier_env *env) { struct bpf_verifier_state *state = &env->cur_state; @@ -2283,6 +2296,10 @@ static int do_check(struct bpf_verifier_env *env) print_bpf_insn(insn); } + err = ext_analyzer_insn_hook(env, insn_idx, prev_insn_idx); + if (err) + return err; + if (class == BPF_ALU || class == BPF_ALU64) { err = check_alu_op(env, insn); if (err) @@ -2845,3 +2862,54 @@ err_free_env: kfree(env); return ret; } + +int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops, + void *priv) +{ + struct bpf_verifier_env *env; + int ret; + + env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL); + if (!env) + return -ENOMEM; + + env->insn_aux_data = vzalloc(sizeof(struct bpf_insn_aux_data) * + prog->len); + ret = -ENOMEM; + if (!env->insn_aux_data) + goto err_free_env; + env->prog = prog; + env->analyzer_ops = ops; + env->analyzer_priv = priv; + + /* grab the mutex to protect few globals used by verifier */ + mutex_lock(&bpf_verifier_lock); + + log_level = 0; + + env->explored_states = kcalloc(env->prog->len, + sizeof(struct bpf_verifier_state_list *), + GFP_KERNEL); + ret = -ENOMEM; + if (!env->explored_states) + goto skip_full_check; + + ret = check_cfg(env); + if (ret < 0) + goto skip_full_check; + + env->allow_ptr_leaks = capable(CAP_SYS_ADMIN); + + ret = do_check(env); + +skip_full_check: + while (pop_stack(env, NULL) >= 0); + free_states(env); + + mutex_unlock(&bpf_verifier_lock); + vfree(env->insn_aux_data); +err_free_env: + kfree(env); + return ret; +} +EXPORT_SYMBOL_GPL(bpf_analyzer); From 6b17387307bafc71624b9890b9239b6a438e2e89 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:43:59 +0100 Subject: [PATCH 1321/1715] bpf: recognize 64bit immediate loads as consts When running as parser interpret BPF_LD | BPF_IMM | BPF_DW instructions as loading CONST_IMM with the value stored in imm. The verifier will continue not recognizing those due to concerns about search space/program complexity increase. Signed-off-by: Jakub Kicinski Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index ee86a77dc40b..8c3f794c7028 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1769,9 +1769,19 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) if (err) return err; - if (insn->src_reg == 0) - /* generic move 64-bit immediate into a register */ + if (insn->src_reg == 0) { + /* generic move 64-bit immediate into a register, + * only analyzer needs to collect the ld_imm value. + */ + u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm; + + if (!env->analyzer_ops) + return 0; + + regs[insn->dst_reg].type = CONST_IMM; + regs[insn->dst_reg].imm = imm; return 0; + } /* replace_map_fd_with_map_ptr() should have caught bad ld_imm64 */ BUG_ON(insn->src_reg != BPF_PSEUDO_MAP_FD); From cd7df56ed3e60d046ddb3acd987778c00aa9ee33 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:44:00 +0100 Subject: [PATCH 1322/1715] nfp: add BPF to NFP code translator Add translator for JITing eBPF to operations which can be executed on NFP's programmable engines. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/Makefile | 6 + drivers/net/ethernet/netronome/nfp/nfp_asm.h | 233 +++ drivers/net/ethernet/netronome/nfp/nfp_bpf.h | 208 ++ .../net/ethernet/netronome/nfp/nfp_bpf_jit.c | 1724 +++++++++++++++++ .../ethernet/netronome/nfp/nfp_bpf_verifier.c | 162 ++ 5 files changed, 2333 insertions(+) create mode 100644 drivers/net/ethernet/netronome/nfp/nfp_asm.h create mode 100644 drivers/net/ethernet/netronome/nfp/nfp_bpf.h create mode 100644 drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c create mode 100644 drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile index 68178819ff12..5f12689bf523 100644 --- a/drivers/net/ethernet/netronome/nfp/Makefile +++ b/drivers/net/ethernet/netronome/nfp/Makefile @@ -5,4 +5,10 @@ nfp_netvf-objs := \ nfp_net_ethtool.o \ nfp_netvf_main.o +ifeq ($(CONFIG_BPF_SYSCALL),y) +nfp_netvf-objs += \ + nfp_bpf_verifier.o \ + nfp_bpf_jit.o +endif + nfp_netvf-$(CONFIG_NFP_NET_DEBUG) += nfp_net_debugfs.o diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h new file mode 100644 index 000000000000..22484b6fd3e8 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h @@ -0,0 +1,233 @@ +/* + * Copyright (C) 2016 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __NFP_ASM_H__ +#define __NFP_ASM_H__ 1 + +#include "nfp_bpf.h" + +#define REG_NONE 0 + +#define RE_REG_NO_DST 0x020 +#define RE_REG_IMM 0x020 +#define RE_REG_IMM_encode(x) \ + (RE_REG_IMM | ((x) & 0x1f) | (((x) & 0x60) << 1)) +#define RE_REG_IMM_MAX 0x07fULL +#define RE_REG_XFR 0x080 + +#define UR_REG_XFR 0x180 +#define UR_REG_NN 0x280 +#define UR_REG_NO_DST 0x300 +#define UR_REG_IMM UR_REG_NO_DST +#define UR_REG_IMM_encode(x) (UR_REG_IMM | (x)) +#define UR_REG_IMM_MAX 0x0ffULL + +#define OP_BR_BASE 0x0d800000020ULL +#define OP_BR_BASE_MASK 0x0f8000c3ce0ULL +#define OP_BR_MASK 0x0000000001fULL +#define OP_BR_EV_PIP 0x00000000300ULL +#define OP_BR_CSS 0x0000003c000ULL +#define OP_BR_DEFBR 0x00000300000ULL +#define OP_BR_ADDR_LO 0x007ffc00000ULL +#define OP_BR_ADDR_HI 0x10000000000ULL + +#define nfp_is_br(_insn) \ + (((_insn) & OP_BR_BASE_MASK) == OP_BR_BASE) + +enum br_mask { + BR_BEQ = 0x00, + BR_BNE = 0x01, + BR_BHS = 0x04, + BR_BLO = 0x05, + BR_BGE = 0x08, + BR_UNC = 0x18, +}; + +enum br_ev_pip { + BR_EV_PIP_UNCOND = 0, + BR_EV_PIP_COND = 1, +}; + +enum br_ctx_signal_state { + BR_CSS_NONE = 2, +}; + +#define OP_BBYTE_BASE 0x0c800000000ULL +#define OP_BB_A_SRC 0x000000000ffULL +#define OP_BB_BYTE 0x00000000300ULL +#define OP_BB_B_SRC 0x0000003fc00ULL +#define OP_BB_I8 0x00000040000ULL +#define OP_BB_EQ 0x00000080000ULL +#define OP_BB_DEFBR 0x00000300000ULL +#define OP_BB_ADDR_LO 0x007ffc00000ULL +#define OP_BB_ADDR_HI 0x10000000000ULL + +#define OP_BALU_BASE 0x0e800000000ULL +#define OP_BA_A_SRC 0x000000003ffULL +#define OP_BA_B_SRC 0x000000ffc00ULL +#define OP_BA_DEFBR 0x00000300000ULL +#define OP_BA_ADDR_HI 0x0007fc00000ULL + +#define OP_IMMED_A_SRC 0x000000003ffULL +#define OP_IMMED_B_SRC 0x000000ffc00ULL +#define OP_IMMED_IMM 0x0000ff00000ULL +#define OP_IMMED_WIDTH 0x00060000000ULL +#define OP_IMMED_INV 0x00080000000ULL +#define OP_IMMED_SHIFT 0x00600000000ULL +#define OP_IMMED_BASE 0x0f000000000ULL +#define OP_IMMED_WR_AB 0x20000000000ULL + +enum immed_width { + IMMED_WIDTH_ALL = 0, + IMMED_WIDTH_BYTE = 1, + IMMED_WIDTH_WORD = 2, +}; + +enum immed_shift { + IMMED_SHIFT_0B = 0, + IMMED_SHIFT_1B = 1, + IMMED_SHIFT_2B = 2, +}; + +#define OP_SHF_BASE 0x08000000000ULL +#define OP_SHF_A_SRC 0x000000000ffULL +#define OP_SHF_SC 0x00000000300ULL +#define OP_SHF_B_SRC 0x0000003fc00ULL +#define OP_SHF_I8 0x00000040000ULL +#define OP_SHF_SW 0x00000080000ULL +#define OP_SHF_DST 0x0000ff00000ULL +#define OP_SHF_SHIFT 0x001f0000000ULL +#define OP_SHF_OP 0x00e00000000ULL +#define OP_SHF_DST_AB 0x01000000000ULL +#define OP_SHF_WR_AB 0x20000000000ULL + +enum shf_op { + SHF_OP_NONE = 0, + SHF_OP_AND = 2, + SHF_OP_OR = 5, +}; + +enum shf_sc { + SHF_SC_R_ROT = 0, + SHF_SC_R_SHF = 1, + SHF_SC_L_SHF = 2, + SHF_SC_R_DSHF = 3, +}; + +#define OP_ALU_A_SRC 0x000000003ffULL +#define OP_ALU_B_SRC 0x000000ffc00ULL +#define OP_ALU_DST 0x0003ff00000ULL +#define OP_ALU_SW 0x00040000000ULL +#define OP_ALU_OP 0x00f80000000ULL +#define OP_ALU_DST_AB 0x01000000000ULL +#define OP_ALU_BASE 0x0a000000000ULL +#define OP_ALU_WR_AB 0x20000000000ULL + +enum alu_op { + ALU_OP_NONE = 0x00, + ALU_OP_ADD = 0x01, + ALU_OP_NEG = 0x04, + ALU_OP_AND = 0x08, + ALU_OP_SUB_C = 0x0d, + ALU_OP_ADD_C = 0x11, + ALU_OP_OR = 0x14, + ALU_OP_SUB = 0x15, + ALU_OP_XOR = 0x18, +}; + +enum alu_dst_ab { + ALU_DST_A = 0, + ALU_DST_B = 1, +}; + +#define OP_LDF_BASE 0x0c000000000ULL +#define OP_LDF_A_SRC 0x000000000ffULL +#define OP_LDF_SC 0x00000000300ULL +#define OP_LDF_B_SRC 0x0000003fc00ULL +#define OP_LDF_I8 0x00000040000ULL +#define OP_LDF_SW 0x00000080000ULL +#define OP_LDF_ZF 0x00000100000ULL +#define OP_LDF_BMASK 0x0000f000000ULL +#define OP_LDF_SHF 0x001f0000000ULL +#define OP_LDF_WR_AB 0x20000000000ULL + +#define OP_CMD_A_SRC 0x000000000ffULL +#define OP_CMD_CTX 0x00000000300ULL +#define OP_CMD_B_SRC 0x0000003fc00ULL +#define OP_CMD_TOKEN 0x000000c0000ULL +#define OP_CMD_XFER 0x00001f00000ULL +#define OP_CMD_CNT 0x0000e000000ULL +#define OP_CMD_SIG 0x000f0000000ULL +#define OP_CMD_TGT_CMD 0x07f00000000ULL +#define OP_CMD_MODE 0x1c0000000000ULL + +struct cmd_tgt_act { + u8 token; + u8 tgt_cmd; +}; + +enum cmd_tgt_map { + CMD_TGT_READ8, + CMD_TGT_WRITE8, + CMD_TGT_READ_LE, + CMD_TGT_READ_SWAP_LE, + __CMD_TGT_MAP_SIZE, +}; + +enum cmd_mode { + CMD_MODE_40b_AB = 0, + CMD_MODE_40b_BA = 1, + CMD_MODE_32b = 4, +}; + +enum cmd_ctx_swap { + CMD_CTX_SWAP = 0, + CMD_CTX_NO_SWAP = 3, +}; + +#define OP_LCSR_BASE 0x0fc00000000ULL +#define OP_LCSR_A_SRC 0x000000003ffULL +#define OP_LCSR_B_SRC 0x000000ffc00ULL +#define OP_LCSR_WRITE 0x00000200000ULL +#define OP_LCSR_ADDR 0x001ffc00000ULL + +enum lcsr_wr_src { + LCSR_WR_AREG, + LCSR_WR_BREG, + LCSR_WR_IMM, +}; + +#define OP_CARB_BASE 0x0e000000000ULL +#define OP_CARB_OR 0x00000010000ULL + +#endif diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf.h b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h new file mode 100644 index 000000000000..3726421e353f --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h @@ -0,0 +1,208 @@ +/* + * Copyright (C) 2016 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __NFP_BPF_H__ +#define __NFP_BPF_H__ 1 + +#include +#include +#include +#include + +#define FIELD_FIT(mask, val) (!((((u64)val) << __bf_shf(mask)) & ~(mask))) + +/* For branch fixup logic use up-most byte of branch instruction as scratch + * area. Remember to clear this before sending instructions to HW! + */ +#define OP_BR_SPECIAL 0xff00000000000000ULL + +enum br_special { + OP_BR_NORMAL = 0, + OP_BR_GO_OUT, + OP_BR_GO_ABORT, +}; + +enum static_regs { + STATIC_REG_PKT = 1, +#define REG_PKT_BANK ALU_DST_A + STATIC_REG_IMM = 2, /* Bank AB */ +}; + +enum nfp_bpf_action_type { + NN_ACT_TC_DROP, +}; + +/* Software register representation, hardware encoding in asm.h */ +#define NN_REG_TYPE GENMASK(31, 24) +#define NN_REG_VAL GENMASK(7, 0) + +enum nfp_bpf_reg_type { + NN_REG_GPR_A = BIT(0), + NN_REG_GPR_B = BIT(1), + NN_REG_NNR = BIT(2), + NN_REG_XFER = BIT(3), + NN_REG_IMM = BIT(4), + NN_REG_NONE = BIT(5), +}; + +#define NN_REG_GPR_BOTH (NN_REG_GPR_A | NN_REG_GPR_B) + +#define reg_both(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_GPR_BOTH)) +#define reg_a(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_GPR_A)) +#define reg_b(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_GPR_B)) +#define reg_nnr(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_NNR)) +#define reg_xfer(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_XFER)) +#define reg_imm(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_IMM)) +#define reg_none() (FIELD_PREP(NN_REG_TYPE, NN_REG_NONE)) + +#define pkt_reg(np) reg_a((np)->regs_per_thread - STATIC_REG_PKT) +#define imm_a(np) reg_a((np)->regs_per_thread - STATIC_REG_IMM) +#define imm_b(np) reg_b((np)->regs_per_thread - STATIC_REG_IMM) +#define imm_both(np) reg_both((np)->regs_per_thread - STATIC_REG_IMM) + +#define NFP_BPF_ABI_FLAGS reg_nnr(0) +#define NFP_BPF_ABI_PKT reg_nnr(2) +#define NFP_BPF_ABI_LEN reg_nnr(3) + +struct nfp_prog; +struct nfp_insn_meta; +typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *); + +#define nfp_prog_first_meta(nfp_prog) \ + list_first_entry(&(nfp_prog)->insns, struct nfp_insn_meta, l) +#define nfp_prog_last_meta(nfp_prog) \ + list_last_entry(&(nfp_prog)->insns, struct nfp_insn_meta, l) +#define nfp_meta_next(meta) list_next_entry(meta, l) +#define nfp_meta_prev(meta) list_prev_entry(meta, l) + +/** + * struct nfp_insn_meta - BPF instruction wrapper + * @insn: BPF instruction + * @off: index of first generated machine instruction (in nfp_prog.prog) + * @n: eBPF instruction number + * @skip: skip this instruction (optimized out) + * @double_cb: callback for second part of the instruction + * @l: link on nfp_prog->insns list + */ +struct nfp_insn_meta { + struct bpf_insn insn; + unsigned int off; + unsigned short n; + bool skip; + instr_cb_t double_cb; + + struct list_head l; +}; + +#define BPF_SIZE_MASK 0x18 + +static inline u8 mbpf_class(const struct nfp_insn_meta *meta) +{ + return BPF_CLASS(meta->insn.code); +} + +static inline u8 mbpf_src(const struct nfp_insn_meta *meta) +{ + return BPF_SRC(meta->insn.code); +} + +static inline u8 mbpf_op(const struct nfp_insn_meta *meta) +{ + return BPF_OP(meta->insn.code); +} + +static inline u8 mbpf_mode(const struct nfp_insn_meta *meta) +{ + return BPF_MODE(meta->insn.code); +} + +/** + * struct nfp_prog - nfp BPF program + * @prog: machine code + * @prog_len: number of valid instructions in @prog array + * @__prog_alloc_len: alloc size of @prog array + * @act: BPF program/action type (TC DA, TC with action, XDP etc.) + * @num_regs: number of registers used by this program + * @regs_per_thread: number of basic registers allocated per thread + * @start_off: address of the first instruction in the memory + * @tgt_out: jump target for normal exit + * @tgt_abort: jump target for abort (e.g. access outside of packet buffer) + * @tgt_done: jump target to get the next packet + * @n_translated: number of successfully translated instructions (for errors) + * @error: error code if something went wrong + * @insns: list of BPF instruction wrappers (struct nfp_insn_meta) + */ +struct nfp_prog { + u64 *prog; + unsigned int prog_len; + unsigned int __prog_alloc_len; + + enum nfp_bpf_action_type act; + + unsigned int num_regs; + unsigned int regs_per_thread; + + unsigned int start_off; + unsigned int tgt_out; + unsigned int tgt_abort; + unsigned int tgt_done; + + unsigned int n_translated; + int error; + + struct list_head insns; +}; + +struct nfp_bpf_result { + unsigned int n_instr; + bool dense_mode; +}; + +#ifdef CONFIG_BPF_SYSCALL +int +nfp_bpf_jit(struct bpf_prog *filter, void *prog, enum nfp_bpf_action_type act, + unsigned int prog_start, unsigned int prog_done, + unsigned int prog_sz, struct nfp_bpf_result *res); +#else +int +nfp_bpf_jit(struct bpf_prog *filter, void *prog, enum nfp_bpf_action_type act, + unsigned int prog_start, unsigned int prog_done, + unsigned int prog_sz, struct nfp_bpf_result *res) +{ + return -ENOTSUPP; +} +#endif + +int nfp_prog_verify(struct nfp_prog *nfp_prog, struct bpf_prog *prog); + +#endif diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c new file mode 100644 index 000000000000..cfbf53607fc9 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c @@ -0,0 +1,1724 @@ +/* + * Copyright (C) 2016 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define pr_fmt(fmt) "NFP net bpf: " fmt + +#include +#include +#include +#include +#include + +#include "nfp_asm.h" +#include "nfp_bpf.h" + +/* --- NFP prog --- */ +/* Foreach "multiple" entries macros provide pos and next pointers. + * It's safe to modify the next pointers (but not pos). + */ +#define nfp_for_each_insn_walk2(nfp_prog, pos, next) \ + for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \ + next = list_next_entry(pos, l); \ + &(nfp_prog)->insns != &pos->l && \ + &(nfp_prog)->insns != &next->l; \ + pos = nfp_meta_next(pos), \ + next = nfp_meta_next(pos)) + +#define nfp_for_each_insn_walk3(nfp_prog, pos, next, next2) \ + for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \ + next = list_next_entry(pos, l), \ + next2 = list_next_entry(next, l); \ + &(nfp_prog)->insns != &pos->l && \ + &(nfp_prog)->insns != &next->l && \ + &(nfp_prog)->insns != &next2->l; \ + pos = nfp_meta_next(pos), \ + next = nfp_meta_next(pos), \ + next2 = nfp_meta_next(next)) + +static bool +nfp_meta_has_next(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return meta->l.next != &nfp_prog->insns; +} + +static bool +nfp_meta_has_prev(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return meta->l.prev != &nfp_prog->insns; +} + +static void nfp_prog_free(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta, *tmp; + + list_for_each_entry_safe(meta, tmp, &nfp_prog->insns, l) { + list_del(&meta->l); + kfree(meta); + } + kfree(nfp_prog); +} + +static void nfp_prog_push(struct nfp_prog *nfp_prog, u64 insn) +{ + if (nfp_prog->__prog_alloc_len == nfp_prog->prog_len) { + nfp_prog->error = -ENOSPC; + return; + } + + nfp_prog->prog[nfp_prog->prog_len] = insn; + nfp_prog->prog_len++; +} + +static unsigned int nfp_prog_current_offset(struct nfp_prog *nfp_prog) +{ + return nfp_prog->start_off + nfp_prog->prog_len; +} + +static unsigned int +nfp_prog_offset_to_index(struct nfp_prog *nfp_prog, unsigned int offset) +{ + return offset - nfp_prog->start_off; +} + +/* --- SW reg --- */ +struct nfp_insn_ur_regs { + enum alu_dst_ab dst_ab; + u16 dst; + u16 areg, breg; + bool swap; + bool wr_both; +}; + +struct nfp_insn_re_regs { + enum alu_dst_ab dst_ab; + u8 dst; + u8 areg, breg; + bool swap; + bool wr_both; + bool i8; +}; + +static u16 nfp_swreg_to_unreg(u32 swreg, bool is_dst) +{ + u16 val = FIELD_GET(NN_REG_VAL, swreg); + + switch (FIELD_GET(NN_REG_TYPE, swreg)) { + case NN_REG_GPR_A: + case NN_REG_GPR_B: + case NN_REG_GPR_BOTH: + return val; + case NN_REG_NNR: + return UR_REG_NN | val; + case NN_REG_XFER: + return UR_REG_XFR | val; + case NN_REG_IMM: + if (val & ~0xff) { + pr_err("immediate too large\n"); + return 0; + } + return UR_REG_IMM_encode(val); + case NN_REG_NONE: + return is_dst ? UR_REG_NO_DST : REG_NONE; + default: + pr_err("unrecognized reg encoding %08x\n", swreg); + return 0; + } +} + +static int +swreg_to_unrestricted(u32 dst, u32 lreg, u32 rreg, struct nfp_insn_ur_regs *reg) +{ + memset(reg, 0, sizeof(*reg)); + + /* Decode destination */ + if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_IMM) + return -EFAULT; + + if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_B) + reg->dst_ab = ALU_DST_B; + if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_BOTH) + reg->wr_both = true; + reg->dst = nfp_swreg_to_unreg(dst, true); + + /* Decode source operands */ + if (FIELD_GET(NN_REG_TYPE, lreg) == FIELD_GET(NN_REG_TYPE, rreg)) + return -EFAULT; + + if (FIELD_GET(NN_REG_TYPE, lreg) == NN_REG_GPR_B || + FIELD_GET(NN_REG_TYPE, rreg) == NN_REG_GPR_A) { + reg->areg = nfp_swreg_to_unreg(rreg, false); + reg->breg = nfp_swreg_to_unreg(lreg, false); + reg->swap = true; + } else { + reg->areg = nfp_swreg_to_unreg(lreg, false); + reg->breg = nfp_swreg_to_unreg(rreg, false); + } + + return 0; +} + +static u16 nfp_swreg_to_rereg(u32 swreg, bool is_dst, bool has_imm8, bool *i8) +{ + u16 val = FIELD_GET(NN_REG_VAL, swreg); + + switch (FIELD_GET(NN_REG_TYPE, swreg)) { + case NN_REG_GPR_A: + case NN_REG_GPR_B: + case NN_REG_GPR_BOTH: + return val; + case NN_REG_XFER: + return RE_REG_XFR | val; + case NN_REG_IMM: + if (val & ~(0x7f | has_imm8 << 7)) { + pr_err("immediate too large\n"); + return 0; + } + *i8 = val & 0x80; + return RE_REG_IMM_encode(val & 0x7f); + case NN_REG_NONE: + return is_dst ? RE_REG_NO_DST : REG_NONE; + default: + pr_err("unrecognized reg encoding\n"); + return 0; + } +} + +static int +swreg_to_restricted(u32 dst, u32 lreg, u32 rreg, struct nfp_insn_re_regs *reg, + bool has_imm8) +{ + memset(reg, 0, sizeof(*reg)); + + /* Decode destination */ + if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_IMM) + return -EFAULT; + + if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_B) + reg->dst_ab = ALU_DST_B; + if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_BOTH) + reg->wr_both = true; + reg->dst = nfp_swreg_to_rereg(dst, true, false, NULL); + + /* Decode source operands */ + if (FIELD_GET(NN_REG_TYPE, lreg) == FIELD_GET(NN_REG_TYPE, rreg)) + return -EFAULT; + + if (FIELD_GET(NN_REG_TYPE, lreg) == NN_REG_GPR_B || + FIELD_GET(NN_REG_TYPE, rreg) == NN_REG_GPR_A) { + reg->areg = nfp_swreg_to_rereg(rreg, false, has_imm8, ®->i8); + reg->breg = nfp_swreg_to_rereg(lreg, false, has_imm8, ®->i8); + reg->swap = true; + } else { + reg->areg = nfp_swreg_to_rereg(lreg, false, has_imm8, ®->i8); + reg->breg = nfp_swreg_to_rereg(rreg, false, has_imm8, ®->i8); + } + + return 0; +} + +/* --- Emitters --- */ +static const struct cmd_tgt_act cmd_tgt_act[__CMD_TGT_MAP_SIZE] = { + [CMD_TGT_WRITE8] = { 0x00, 0x42 }, + [CMD_TGT_READ8] = { 0x01, 0x43 }, + [CMD_TGT_READ_LE] = { 0x01, 0x40 }, + [CMD_TGT_READ_SWAP_LE] = { 0x03, 0x40 }, +}; + +static void +__emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, + u8 mode, u8 xfer, u8 areg, u8 breg, u8 size, bool sync) +{ + enum cmd_ctx_swap ctx; + u64 insn; + + if (sync) + ctx = CMD_CTX_SWAP; + else + ctx = CMD_CTX_NO_SWAP; + + insn = FIELD_PREP(OP_CMD_A_SRC, areg) | + FIELD_PREP(OP_CMD_CTX, ctx) | + FIELD_PREP(OP_CMD_B_SRC, breg) | + FIELD_PREP(OP_CMD_TOKEN, cmd_tgt_act[op].token) | + FIELD_PREP(OP_CMD_XFER, xfer) | + FIELD_PREP(OP_CMD_CNT, size) | + FIELD_PREP(OP_CMD_SIG, sync) | + FIELD_PREP(OP_CMD_TGT_CMD, cmd_tgt_act[op].tgt_cmd) | + FIELD_PREP(OP_CMD_MODE, mode); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, + u8 mode, u8 xfer, u32 lreg, u32 rreg, u8 size, bool sync) +{ + struct nfp_insn_re_regs reg; + int err; + + err = swreg_to_restricted(reg_none(), lreg, rreg, ®, false); + if (err) { + nfp_prog->error = err; + return; + } + if (reg.swap) { + pr_err("cmd can't swap arguments\n"); + nfp_prog->error = -EFAULT; + return; + } + + __emit_cmd(nfp_prog, op, mode, xfer, reg.areg, reg.breg, size, sync); +} + +static void +__emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, enum br_ev_pip ev_pip, + enum br_ctx_signal_state css, u16 addr, u8 defer) +{ + u16 addr_lo, addr_hi; + u64 insn; + + addr_lo = addr & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO)); + addr_hi = addr != addr_lo; + + insn = OP_BR_BASE | + FIELD_PREP(OP_BR_MASK, mask) | + FIELD_PREP(OP_BR_EV_PIP, ev_pip) | + FIELD_PREP(OP_BR_CSS, css) | + FIELD_PREP(OP_BR_DEFBR, defer) | + FIELD_PREP(OP_BR_ADDR_LO, addr_lo) | + FIELD_PREP(OP_BR_ADDR_HI, addr_hi); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer) +{ + __emit_br(nfp_prog, mask, + mask != BR_UNC ? BR_EV_PIP_COND : BR_EV_PIP_UNCOND, + BR_CSS_NONE, addr, defer); +} + +static void +__emit_br_byte(struct nfp_prog *nfp_prog, u8 areg, u8 breg, bool imm8, + u8 byte, bool equal, u16 addr, u8 defer) +{ + u16 addr_lo, addr_hi; + u64 insn; + + addr_lo = addr & (OP_BB_ADDR_LO >> __bf_shf(OP_BB_ADDR_LO)); + addr_hi = addr != addr_lo; + + insn = OP_BBYTE_BASE | + FIELD_PREP(OP_BB_A_SRC, areg) | + FIELD_PREP(OP_BB_BYTE, byte) | + FIELD_PREP(OP_BB_B_SRC, breg) | + FIELD_PREP(OP_BB_I8, imm8) | + FIELD_PREP(OP_BB_EQ, equal) | + FIELD_PREP(OP_BB_DEFBR, defer) | + FIELD_PREP(OP_BB_ADDR_LO, addr_lo) | + FIELD_PREP(OP_BB_ADDR_HI, addr_hi); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_br_byte_neq(struct nfp_prog *nfp_prog, + u32 dst, u8 imm, u8 byte, u16 addr, u8 defer) +{ + struct nfp_insn_re_regs reg; + int err; + + err = swreg_to_restricted(reg_none(), dst, reg_imm(imm), ®, true); + if (err) { + nfp_prog->error = err; + return; + } + + __emit_br_byte(nfp_prog, reg.areg, reg.breg, reg.i8, byte, false, addr, + defer); +} + +static void +__emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi, + enum immed_width width, bool invert, + enum immed_shift shift, bool wr_both) +{ + u64 insn; + + insn = OP_IMMED_BASE | + FIELD_PREP(OP_IMMED_A_SRC, areg) | + FIELD_PREP(OP_IMMED_B_SRC, breg) | + FIELD_PREP(OP_IMMED_IMM, imm_hi) | + FIELD_PREP(OP_IMMED_WIDTH, width) | + FIELD_PREP(OP_IMMED_INV, invert) | + FIELD_PREP(OP_IMMED_SHIFT, shift) | + FIELD_PREP(OP_IMMED_WR_AB, wr_both); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_immed(struct nfp_prog *nfp_prog, u32 dst, u16 imm, + enum immed_width width, bool invert, enum immed_shift shift) +{ + struct nfp_insn_ur_regs reg; + int err; + + if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_IMM) { + nfp_prog->error = -EFAULT; + return; + } + + err = swreg_to_unrestricted(dst, dst, reg_imm(imm & 0xff), ®); + if (err) { + nfp_prog->error = err; + return; + } + + __emit_immed(nfp_prog, reg.areg, reg.breg, imm >> 8, width, + invert, shift, reg.wr_both); +} + +static void +__emit_shf(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab, + enum shf_sc sc, u8 shift, + u16 areg, enum shf_op op, u16 breg, bool i8, bool sw, bool wr_both) +{ + u64 insn; + + if (!FIELD_FIT(OP_SHF_SHIFT, shift)) { + nfp_prog->error = -EFAULT; + return; + } + + if (sc == SHF_SC_L_SHF) + shift = 32 - shift; + + insn = OP_SHF_BASE | + FIELD_PREP(OP_SHF_A_SRC, areg) | + FIELD_PREP(OP_SHF_SC, sc) | + FIELD_PREP(OP_SHF_B_SRC, breg) | + FIELD_PREP(OP_SHF_I8, i8) | + FIELD_PREP(OP_SHF_SW, sw) | + FIELD_PREP(OP_SHF_DST, dst) | + FIELD_PREP(OP_SHF_SHIFT, shift) | + FIELD_PREP(OP_SHF_OP, op) | + FIELD_PREP(OP_SHF_DST_AB, dst_ab) | + FIELD_PREP(OP_SHF_WR_AB, wr_both); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_shf(struct nfp_prog *nfp_prog, u32 dst, u32 lreg, enum shf_op op, u32 rreg, + enum shf_sc sc, u8 shift) +{ + struct nfp_insn_re_regs reg; + int err; + + err = swreg_to_restricted(dst, lreg, rreg, ®, true); + if (err) { + nfp_prog->error = err; + return; + } + + __emit_shf(nfp_prog, reg.dst, reg.dst_ab, sc, shift, + reg.areg, op, reg.breg, reg.i8, reg.swap, reg.wr_both); +} + +static void +__emit_alu(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab, + u16 areg, enum alu_op op, u16 breg, bool swap, bool wr_both) +{ + u64 insn; + + insn = OP_ALU_BASE | + FIELD_PREP(OP_ALU_A_SRC, areg) | + FIELD_PREP(OP_ALU_B_SRC, breg) | + FIELD_PREP(OP_ALU_DST, dst) | + FIELD_PREP(OP_ALU_SW, swap) | + FIELD_PREP(OP_ALU_OP, op) | + FIELD_PREP(OP_ALU_DST_AB, dst_ab) | + FIELD_PREP(OP_ALU_WR_AB, wr_both); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_alu(struct nfp_prog *nfp_prog, u32 dst, u32 lreg, enum alu_op op, u32 rreg) +{ + struct nfp_insn_ur_regs reg; + int err; + + err = swreg_to_unrestricted(dst, lreg, rreg, ®); + if (err) { + nfp_prog->error = err; + return; + } + + __emit_alu(nfp_prog, reg.dst, reg.dst_ab, + reg.areg, op, reg.breg, reg.swap, reg.wr_both); +} + +static void +__emit_ld_field(struct nfp_prog *nfp_prog, enum shf_sc sc, + u8 areg, u8 bmask, u8 breg, u8 shift, bool imm8, + bool zero, bool swap, bool wr_both) +{ + u64 insn; + + insn = OP_LDF_BASE | + FIELD_PREP(OP_LDF_A_SRC, areg) | + FIELD_PREP(OP_LDF_SC, sc) | + FIELD_PREP(OP_LDF_B_SRC, breg) | + FIELD_PREP(OP_LDF_I8, imm8) | + FIELD_PREP(OP_LDF_SW, swap) | + FIELD_PREP(OP_LDF_ZF, zero) | + FIELD_PREP(OP_LDF_BMASK, bmask) | + FIELD_PREP(OP_LDF_SHF, shift) | + FIELD_PREP(OP_LDF_WR_AB, wr_both); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_ld_field_any(struct nfp_prog *nfp_prog, enum shf_sc sc, u8 shift, + u32 dst, u8 bmask, u32 src, bool zero) +{ + struct nfp_insn_re_regs reg; + int err; + + err = swreg_to_restricted(reg_none(), dst, src, ®, true); + if (err) { + nfp_prog->error = err; + return; + } + + __emit_ld_field(nfp_prog, sc, reg.areg, bmask, reg.breg, shift, + reg.i8, zero, reg.swap, reg.wr_both); +} + +static void +emit_ld_field(struct nfp_prog *nfp_prog, u32 dst, u8 bmask, u32 src, + enum shf_sc sc, u8 shift) +{ + emit_ld_field_any(nfp_prog, sc, shift, dst, bmask, src, false); +} + +/* --- Wrappers --- */ +static bool pack_immed(u32 imm, u16 *val, enum immed_shift *shift) +{ + if (!(imm & 0xffff0000)) { + *val = imm; + *shift = IMMED_SHIFT_0B; + } else if (!(imm & 0xff0000ff)) { + *val = imm >> 8; + *shift = IMMED_SHIFT_1B; + } else if (!(imm & 0x0000ffff)) { + *val = imm >> 16; + *shift = IMMED_SHIFT_2B; + } else { + return false; + } + + return true; +} + +static void wrp_immed(struct nfp_prog *nfp_prog, u32 dst, u32 imm) +{ + enum immed_shift shift; + u16 val; + + if (pack_immed(imm, &val, &shift)) { + emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, false, shift); + } else if (pack_immed(~imm, &val, &shift)) { + emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, true, shift); + } else { + emit_immed(nfp_prog, dst, imm & 0xffff, IMMED_WIDTH_ALL, + false, IMMED_SHIFT_0B); + emit_immed(nfp_prog, dst, imm >> 16, IMMED_WIDTH_WORD, + false, IMMED_SHIFT_2B); + } +} + +/* ur_load_imm_any() - encode immediate or use tmp register (unrestricted) + * If the @imm is small enough encode it directly in operand and return + * otherwise load @imm to a spare register and return its encoding. + */ +static u32 ur_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, u32 tmp_reg) +{ + if (FIELD_FIT(UR_REG_IMM_MAX, imm)) + return reg_imm(imm); + + wrp_immed(nfp_prog, tmp_reg, imm); + return tmp_reg; +} + +/* re_load_imm_any() - encode immediate or use tmp register (restricted) + * If the @imm is small enough encode it directly in operand and return + * otherwise load @imm to a spare register and return its encoding. + */ +static u32 re_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, u32 tmp_reg) +{ + if (FIELD_FIT(RE_REG_IMM_MAX, imm)) + return reg_imm(imm); + + wrp_immed(nfp_prog, tmp_reg, imm); + return tmp_reg; +} + +static void +wrp_br_special(struct nfp_prog *nfp_prog, enum br_mask mask, + enum br_special special) +{ + emit_br(nfp_prog, mask, 0, 0); + + nfp_prog->prog[nfp_prog->prog_len - 1] |= + FIELD_PREP(OP_BR_SPECIAL, special); +} + +static void wrp_reg_mov(struct nfp_prog *nfp_prog, u16 dst, u16 src) +{ + emit_alu(nfp_prog, reg_both(dst), reg_none(), ALU_OP_NONE, reg_b(src)); +} + +static int +construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, + u16 src, bool src_valid, u8 size) +{ + unsigned int i; + u16 shift, sz; + u32 tmp_reg; + + /* We load the value from the address indicated in @offset and then + * shift out the data we don't need. Note: this is big endian! + */ + sz = size < 4 ? 4 : size; + shift = size < 4 ? 4 - size : 0; + + if (src_valid) { + /* Calculate the true offset (src_reg + imm) */ + tmp_reg = ur_load_imm_any(nfp_prog, offset, imm_b(nfp_prog)); + emit_alu(nfp_prog, imm_both(nfp_prog), + reg_a(src), ALU_OP_ADD, tmp_reg); + /* Check packet length (size guaranteed to fit b/c it's u8) */ + emit_alu(nfp_prog, imm_a(nfp_prog), + imm_a(nfp_prog), ALU_OP_ADD, reg_imm(size)); + emit_alu(nfp_prog, reg_none(), + NFP_BPF_ABI_LEN, ALU_OP_SUB, imm_a(nfp_prog)); + wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT); + /* Load data */ + emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0, + pkt_reg(nfp_prog), imm_b(nfp_prog), sz - 1, true); + } else { + /* Check packet length */ + tmp_reg = ur_load_imm_any(nfp_prog, offset + size, + imm_a(nfp_prog)); + emit_alu(nfp_prog, reg_none(), + NFP_BPF_ABI_LEN, ALU_OP_SUB, tmp_reg); + wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT); + /* Load data */ + tmp_reg = re_load_imm_any(nfp_prog, offset, imm_b(nfp_prog)); + emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0, + pkt_reg(nfp_prog), tmp_reg, sz - 1, true); + } + + i = 0; + if (shift) + emit_shf(nfp_prog, reg_both(0), reg_none(), SHF_OP_NONE, + reg_xfer(0), SHF_SC_R_SHF, shift * 8); + else + for (; i * 4 < size; i++) + emit_alu(nfp_prog, reg_both(i), + reg_none(), ALU_OP_NONE, reg_xfer(i)); + + if (i < 2) + wrp_immed(nfp_prog, reg_both(1), 0); + + return 0; +} + +static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size) +{ + return construct_data_ind_ld(nfp_prog, offset, 0, false, size); +} + +static void +wrp_alu_imm(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u32 imm) +{ + u32 tmp_reg; + + if (alu_op == ALU_OP_AND) { + if (!imm) + wrp_immed(nfp_prog, reg_both(dst), 0); + if (!imm || !~imm) + return; + } + if (alu_op == ALU_OP_OR) { + if (!~imm) + wrp_immed(nfp_prog, reg_both(dst), ~0U); + if (!imm || !~imm) + return; + } + if (alu_op == ALU_OP_XOR) { + if (!~imm) + emit_alu(nfp_prog, reg_both(dst), reg_none(), + ALU_OP_NEG, reg_b(dst)); + if (!imm || !~imm) + return; + } + + tmp_reg = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog)); + emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, tmp_reg); +} + +static int +wrp_alu64_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + enum alu_op alu_op, bool skip) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + + if (skip) { + meta->skip = true; + return 0; + } + + wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, imm & ~0U); + wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, alu_op, imm >> 32); + + return 0; +} + +static int +wrp_alu64_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + enum alu_op alu_op) +{ + u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2; + + emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src)); + emit_alu(nfp_prog, reg_both(dst + 1), + reg_a(dst + 1), alu_op, reg_b(src + 1)); + + return 0; +} + +static int +wrp_alu32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + enum alu_op alu_op, bool skip) +{ + const struct bpf_insn *insn = &meta->insn; + + if (skip) { + meta->skip = true; + return 0; + } + + wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, insn->imm); + wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); + + return 0; +} + +static int +wrp_alu32_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + enum alu_op alu_op) +{ + u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2; + + emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src)); + wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); + + return 0; +} + +static void +wrp_test_reg_one(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u8 src, + enum br_mask br_mask, u16 off) +{ + emit_alu(nfp_prog, reg_none(), reg_a(dst), alu_op, reg_b(src)); + emit_br(nfp_prog, br_mask, off, 0); +} + +static int +wrp_test_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + enum alu_op alu_op, enum br_mask br_mask) +{ + const struct bpf_insn *insn = &meta->insn; + + if (insn->off < 0) /* TODO */ + return -ENOTSUPP; + + wrp_test_reg_one(nfp_prog, insn->dst_reg * 2, alu_op, + insn->src_reg * 2, br_mask, insn->off); + wrp_test_reg_one(nfp_prog, insn->dst_reg * 2 + 1, alu_op, + insn->src_reg * 2 + 1, br_mask, insn->off); + + return 0; +} + +static int +wrp_cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + enum br_mask br_mask, bool swap) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + u8 reg = insn->dst_reg * 2; + u32 tmp_reg; + + if (insn->off < 0) /* TODO */ + return -ENOTSUPP; + + tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); + if (!swap) + emit_alu(nfp_prog, reg_none(), reg_a(reg), ALU_OP_SUB, tmp_reg); + else + emit_alu(nfp_prog, reg_none(), tmp_reg, ALU_OP_SUB, reg_a(reg)); + + tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); + if (!swap) + emit_alu(nfp_prog, reg_none(), + reg_a(reg + 1), ALU_OP_SUB_C, tmp_reg); + else + emit_alu(nfp_prog, reg_none(), + tmp_reg, ALU_OP_SUB_C, reg_a(reg + 1)); + + emit_br(nfp_prog, br_mask, insn->off, 0); + + return 0; +} + +static int +wrp_cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + enum br_mask br_mask, bool swap) +{ + const struct bpf_insn *insn = &meta->insn; + u8 areg = insn->src_reg * 2, breg = insn->dst_reg * 2; + + if (insn->off < 0) /* TODO */ + return -ENOTSUPP; + + if (swap) { + areg ^= breg; + breg ^= areg; + areg ^= breg; + } + + emit_alu(nfp_prog, reg_none(), reg_a(areg), ALU_OP_SUB, reg_b(breg)); + emit_alu(nfp_prog, reg_none(), + reg_a(areg + 1), ALU_OP_SUB_C, reg_b(breg + 1)); + emit_br(nfp_prog, br_mask, insn->off, 0); + + return 0; +} + +/* --- Callbacks --- */ +static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + wrp_reg_mov(nfp_prog, insn->dst_reg * 2, insn->src_reg * 2); + wrp_reg_mov(nfp_prog, insn->dst_reg * 2 + 1, insn->src_reg * 2 + 1); + + return 0; +} + +static int mov_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + u64 imm = meta->insn.imm; /* sign extend */ + + wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2), imm & ~0U); + wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), imm >> 32); + + return 0; +} + +static int xor_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu64_reg(nfp_prog, meta, ALU_OP_XOR); +} + +static int xor_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu64_imm(nfp_prog, meta, ALU_OP_XOR, !meta->insn.imm); +} + +static int and_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu64_reg(nfp_prog, meta, ALU_OP_AND); +} + +static int and_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu64_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm); +} + +static int or_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu64_reg(nfp_prog, meta, ALU_OP_OR); +} + +static int or_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu64_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm); +} + +static int add_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + emit_alu(nfp_prog, reg_both(insn->dst_reg * 2), + reg_a(insn->dst_reg * 2), ALU_OP_ADD, + reg_b(insn->src_reg * 2)); + emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1), + reg_a(insn->dst_reg * 2 + 1), ALU_OP_ADD_C, + reg_b(insn->src_reg * 2 + 1)); + + return 0; +} + +static int add_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + + wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_ADD, imm & ~0U); + wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_ADD_C, imm >> 32); + + return 0; +} + +static int sub_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + emit_alu(nfp_prog, reg_both(insn->dst_reg * 2), + reg_a(insn->dst_reg * 2), ALU_OP_SUB, + reg_b(insn->src_reg * 2)); + emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1), + reg_a(insn->dst_reg * 2 + 1), ALU_OP_SUB_C, + reg_b(insn->src_reg * 2 + 1)); + + return 0; +} + +static int sub_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + + wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_SUB, imm & ~0U); + wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_SUB_C, imm >> 32); + + return 0; +} + +static int shl_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + if (insn->imm != 32) + return 1; /* TODO */ + + wrp_reg_mov(nfp_prog, insn->dst_reg * 2 + 1, insn->dst_reg * 2); + wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), 0); + + return 0; +} + +static int shr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + if (insn->imm != 32) + return 1; /* TODO */ + + wrp_reg_mov(nfp_prog, insn->dst_reg * 2, insn->dst_reg * 2 + 1); + wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); + + return 0; +} + +static int mov_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + wrp_reg_mov(nfp_prog, insn->dst_reg * 2, insn->src_reg * 2); + wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); + + return 0; +} + +static int mov_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), insn->imm); + wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); + + return 0; +} + +static int xor_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_reg(nfp_prog, meta, ALU_OP_XOR); +} + +static int xor_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_imm(nfp_prog, meta, ALU_OP_XOR, !~meta->insn.imm); +} + +static int and_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_reg(nfp_prog, meta, ALU_OP_AND); +} + +static int and_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm); +} + +static int or_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_reg(nfp_prog, meta, ALU_OP_OR); +} + +static int or_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm); +} + +static int add_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_reg(nfp_prog, meta, ALU_OP_ADD); +} + +static int add_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_imm(nfp_prog, meta, ALU_OP_ADD, !meta->insn.imm); +} + +static int sub_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_reg(nfp_prog, meta, ALU_OP_SUB); +} + +static int sub_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_imm(nfp_prog, meta, ALU_OP_SUB, !meta->insn.imm); +} + +static int shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + if (!insn->imm) + return 1; /* TODO: zero shift means indirect */ + + emit_shf(nfp_prog, reg_both(insn->dst_reg * 2), + reg_none(), SHF_OP_NONE, reg_b(insn->dst_reg * 2), + SHF_SC_L_SHF, insn->imm); + wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); + + return 0; +} + +static int imm_ld8_part2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + wrp_immed(nfp_prog, reg_both(nfp_meta_prev(meta)->insn.dst_reg * 2 + 1), + meta->insn.imm); + + return 0; +} + +static int imm_ld8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + meta->double_cb = imm_ld8_part2; + wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), insn->imm); + + return 0; +} + +static int data_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return construct_data_ld(nfp_prog, meta->insn.imm, 1); +} + +static int data_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return construct_data_ld(nfp_prog, meta->insn.imm, 2); +} + +static int data_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return construct_data_ld(nfp_prog, meta->insn.imm, 4); +} + +static int data_ind_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return construct_data_ind_ld(nfp_prog, meta->insn.imm, + meta->insn.src_reg * 2, true, 1); +} + +static int data_ind_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return construct_data_ind_ld(nfp_prog, meta->insn.imm, + meta->insn.src_reg * 2, true, 2); +} + +static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return construct_data_ind_ld(nfp_prog, meta->insn.imm, + meta->insn.src_reg * 2, true, 4); +} + +static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + if (meta->insn.off == offsetof(struct sk_buff, len)) + emit_alu(nfp_prog, reg_both(meta->insn.dst_reg * 2), + reg_none(), ALU_OP_NONE, NFP_BPF_ABI_LEN); + else + return -ENOTSUPP; + + return 0; +} + +static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + if (meta->insn.off < 0) /* TODO */ + return -ENOTSUPP; + emit_br(nfp_prog, BR_UNC, meta->insn.off, 0); + + return 0; +} + +static int jeq_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + u32 or1 = reg_a(insn->dst_reg * 2), or2 = reg_b(insn->dst_reg * 2 + 1); + u32 tmp_reg; + + if (insn->off < 0) /* TODO */ + return -ENOTSUPP; + + if (imm & ~0U) { + tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); + emit_alu(nfp_prog, imm_a(nfp_prog), + reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg); + or1 = imm_a(nfp_prog); + } + + if (imm >> 32) { + tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); + emit_alu(nfp_prog, imm_b(nfp_prog), + reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg); + or2 = imm_b(nfp_prog); + } + + emit_alu(nfp_prog, reg_none(), or1, ALU_OP_OR, or2); + emit_br(nfp_prog, BR_BEQ, insn->off, 0); + + return 0; +} + +static int jgt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_cmp_imm(nfp_prog, meta, BR_BLO, false); +} + +static int jge_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_cmp_imm(nfp_prog, meta, BR_BHS, true); +} + +static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + u32 tmp_reg; + + if (insn->off < 0) /* TODO */ + return -ENOTSUPP; + + if (!imm) { + meta->skip = true; + return 0; + } + + if (imm & ~0U) { + tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); + emit_alu(nfp_prog, reg_none(), + reg_a(insn->dst_reg * 2), ALU_OP_AND, tmp_reg); + emit_br(nfp_prog, BR_BNE, insn->off, 0); + } + + if (imm >> 32) { + tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); + emit_alu(nfp_prog, reg_none(), + reg_a(insn->dst_reg * 2 + 1), ALU_OP_AND, tmp_reg); + emit_br(nfp_prog, BR_BNE, insn->off, 0); + } + + return 0; +} + +static int jne_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + u32 tmp_reg; + + if (insn->off < 0) /* TODO */ + return -ENOTSUPP; + + if (!imm) { + emit_alu(nfp_prog, reg_none(), reg_a(insn->dst_reg * 2), + ALU_OP_OR, reg_b(insn->dst_reg * 2 + 1)); + emit_br(nfp_prog, BR_BNE, insn->off, 0); + } + + tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); + emit_alu(nfp_prog, reg_none(), + reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg); + emit_br(nfp_prog, BR_BNE, insn->off, 0); + + tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); + emit_alu(nfp_prog, reg_none(), + reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg); + emit_br(nfp_prog, BR_BNE, insn->off, 0); + + return 0; +} + +static int jeq_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + if (insn->off < 0) /* TODO */ + return -ENOTSUPP; + + emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(insn->dst_reg * 2), + ALU_OP_XOR, reg_b(insn->src_reg * 2)); + emit_alu(nfp_prog, imm_b(nfp_prog), reg_a(insn->dst_reg * 2 + 1), + ALU_OP_XOR, reg_b(insn->src_reg * 2 + 1)); + emit_alu(nfp_prog, reg_none(), + imm_a(nfp_prog), ALU_OP_OR, imm_b(nfp_prog)); + emit_br(nfp_prog, BR_BEQ, insn->off, 0); + + return 0; +} + +static int jgt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_cmp_reg(nfp_prog, meta, BR_BLO, false); +} + +static int jge_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_cmp_reg(nfp_prog, meta, BR_BHS, true); +} + +static int jset_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_test_reg(nfp_prog, meta, ALU_OP_AND, BR_BNE); +} + +static int jne_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_test_reg(nfp_prog, meta, ALU_OP_XOR, BR_BNE); +} + +static int goto_out(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + wrp_br_special(nfp_prog, BR_UNC, OP_BR_GO_OUT); + + return 0; +} + +static const instr_cb_t instr_cb[256] = { + [BPF_ALU64 | BPF_MOV | BPF_X] = mov_reg64, + [BPF_ALU64 | BPF_MOV | BPF_K] = mov_imm64, + [BPF_ALU64 | BPF_XOR | BPF_X] = xor_reg64, + [BPF_ALU64 | BPF_XOR | BPF_K] = xor_imm64, + [BPF_ALU64 | BPF_AND | BPF_X] = and_reg64, + [BPF_ALU64 | BPF_AND | BPF_K] = and_imm64, + [BPF_ALU64 | BPF_OR | BPF_X] = or_reg64, + [BPF_ALU64 | BPF_OR | BPF_K] = or_imm64, + [BPF_ALU64 | BPF_ADD | BPF_X] = add_reg64, + [BPF_ALU64 | BPF_ADD | BPF_K] = add_imm64, + [BPF_ALU64 | BPF_SUB | BPF_X] = sub_reg64, + [BPF_ALU64 | BPF_SUB | BPF_K] = sub_imm64, + [BPF_ALU64 | BPF_LSH | BPF_K] = shl_imm64, + [BPF_ALU64 | BPF_RSH | BPF_K] = shr_imm64, + [BPF_ALU | BPF_MOV | BPF_X] = mov_reg, + [BPF_ALU | BPF_MOV | BPF_K] = mov_imm, + [BPF_ALU | BPF_XOR | BPF_X] = xor_reg, + [BPF_ALU | BPF_XOR | BPF_K] = xor_imm, + [BPF_ALU | BPF_AND | BPF_X] = and_reg, + [BPF_ALU | BPF_AND | BPF_K] = and_imm, + [BPF_ALU | BPF_OR | BPF_X] = or_reg, + [BPF_ALU | BPF_OR | BPF_K] = or_imm, + [BPF_ALU | BPF_ADD | BPF_X] = add_reg, + [BPF_ALU | BPF_ADD | BPF_K] = add_imm, + [BPF_ALU | BPF_SUB | BPF_X] = sub_reg, + [BPF_ALU | BPF_SUB | BPF_K] = sub_imm, + [BPF_ALU | BPF_LSH | BPF_K] = shl_imm, + [BPF_LD | BPF_IMM | BPF_DW] = imm_ld8, + [BPF_LD | BPF_ABS | BPF_B] = data_ld1, + [BPF_LD | BPF_ABS | BPF_H] = data_ld2, + [BPF_LD | BPF_ABS | BPF_W] = data_ld4, + [BPF_LD | BPF_IND | BPF_B] = data_ind_ld1, + [BPF_LD | BPF_IND | BPF_H] = data_ind_ld2, + [BPF_LD | BPF_IND | BPF_W] = data_ind_ld4, + [BPF_LDX | BPF_MEM | BPF_W] = mem_ldx4, + [BPF_JMP | BPF_JA | BPF_K] = jump, + [BPF_JMP | BPF_JEQ | BPF_K] = jeq_imm, + [BPF_JMP | BPF_JGT | BPF_K] = jgt_imm, + [BPF_JMP | BPF_JGE | BPF_K] = jge_imm, + [BPF_JMP | BPF_JSET | BPF_K] = jset_imm, + [BPF_JMP | BPF_JNE | BPF_K] = jne_imm, + [BPF_JMP | BPF_JEQ | BPF_X] = jeq_reg, + [BPF_JMP | BPF_JGT | BPF_X] = jgt_reg, + [BPF_JMP | BPF_JGE | BPF_X] = jge_reg, + [BPF_JMP | BPF_JSET | BPF_X] = jset_reg, + [BPF_JMP | BPF_JNE | BPF_X] = jne_reg, + [BPF_JMP | BPF_EXIT] = goto_out, +}; + +/* --- Misc code --- */ +static void br_set_offset(u64 *instr, u16 offset) +{ + u16 addr_lo, addr_hi; + + addr_lo = offset & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO)); + addr_hi = offset != addr_lo; + *instr &= ~(OP_BR_ADDR_HI | OP_BR_ADDR_LO); + *instr |= FIELD_PREP(OP_BR_ADDR_HI, addr_hi); + *instr |= FIELD_PREP(OP_BR_ADDR_LO, addr_lo); +} + +/* --- Assembler logic --- */ +static int nfp_fixup_branches(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta, *next; + u32 off, br_idx; + u32 idx; + + nfp_for_each_insn_walk2(nfp_prog, meta, next) { + if (meta->skip) + continue; + if (BPF_CLASS(meta->insn.code) != BPF_JMP) + continue; + + br_idx = nfp_prog_offset_to_index(nfp_prog, next->off) - 1; + if (!nfp_is_br(nfp_prog->prog[br_idx])) { + pr_err("Fixup found block not ending in branch %d %02x %016llx!!\n", + br_idx, meta->insn.code, nfp_prog->prog[br_idx]); + return -ELOOP; + } + /* Leave special branches for later */ + if (FIELD_GET(OP_BR_SPECIAL, nfp_prog->prog[br_idx])) + continue; + + /* Find the target offset in assembler realm */ + off = meta->insn.off; + if (!off) { + pr_err("Fixup found zero offset!!\n"); + return -ELOOP; + } + + while (off && nfp_meta_has_next(nfp_prog, next)) { + next = nfp_meta_next(next); + off--; + } + if (off) { + pr_err("Fixup found too large jump!! %d\n", off); + return -ELOOP; + } + + if (next->skip) { + pr_err("Branch landing on removed instruction!!\n"); + return -ELOOP; + } + + for (idx = nfp_prog_offset_to_index(nfp_prog, meta->off); + idx <= br_idx; idx++) { + if (!nfp_is_br(nfp_prog->prog[idx])) + continue; + br_set_offset(&nfp_prog->prog[idx], next->off); + } + } + + /* Fixup 'goto out's separately, they can be scattered around */ + for (br_idx = 0; br_idx < nfp_prog->prog_len; br_idx++) { + enum br_special special; + + if ((nfp_prog->prog[br_idx] & OP_BR_BASE_MASK) != OP_BR_BASE) + continue; + + special = FIELD_GET(OP_BR_SPECIAL, nfp_prog->prog[br_idx]); + switch (special) { + case OP_BR_NORMAL: + break; + case OP_BR_GO_OUT: + br_set_offset(&nfp_prog->prog[br_idx], + nfp_prog->tgt_out); + break; + case OP_BR_GO_ABORT: + br_set_offset(&nfp_prog->prog[br_idx], + nfp_prog->tgt_abort); + break; + } + + nfp_prog->prog[br_idx] &= ~OP_BR_SPECIAL; + } + + return 0; +} + +static void nfp_intro(struct nfp_prog *nfp_prog) +{ + emit_alu(nfp_prog, pkt_reg(nfp_prog), + reg_none(), ALU_OP_NONE, NFP_BPF_ABI_PKT); +} + +static void nfp_outro_tc_legacy(struct nfp_prog *nfp_prog) +{ + const u8 act2code[] = { + [NN_ACT_TC_DROP] = 0x22, + }; + /* Target for aborts */ + nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog); + wrp_immed(nfp_prog, reg_both(0), 0); + + /* Target for normal exits */ + nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog); + /* Legacy TC mode: + * 0 0x11 -> pass, count as stat0 + * -1 drop 0x22 -> drop, count as stat1 + * redir 0x24 -> redir, count as stat1 + * ife mark 0x21 -> pass, count as stat1 + * ife + tx 0x24 -> redir, count as stat1 + */ + emit_br_byte_neq(nfp_prog, reg_b(0), 0xff, 0, nfp_prog->tgt_done, 2); + emit_alu(nfp_prog, reg_a(0), + reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS); + emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16); + + emit_br(nfp_prog, BR_UNC, nfp_prog->tgt_done, 1); + emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(act2code[nfp_prog->act]), + SHF_SC_L_SHF, 16); +} + +static void nfp_outro(struct nfp_prog *nfp_prog) +{ + switch (nfp_prog->act) { + case NN_ACT_TC_DROP: + nfp_outro_tc_legacy(nfp_prog); + break; + } +} + +static int nfp_translate(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta; + int err; + + nfp_intro(nfp_prog); + if (nfp_prog->error) + return nfp_prog->error; + + list_for_each_entry(meta, &nfp_prog->insns, l) { + instr_cb_t cb = instr_cb[meta->insn.code]; + + meta->off = nfp_prog_current_offset(nfp_prog); + + if (meta->skip) { + nfp_prog->n_translated++; + continue; + } + + if (nfp_meta_has_prev(nfp_prog, meta) && + nfp_meta_prev(meta)->double_cb) + cb = nfp_meta_prev(meta)->double_cb; + if (!cb) + return -ENOENT; + err = cb(nfp_prog, meta); + if (err) + return err; + + nfp_prog->n_translated++; + } + + nfp_outro(nfp_prog); + if (nfp_prog->error) + return nfp_prog->error; + + return nfp_fixup_branches(nfp_prog); +} + +static int +nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog, + unsigned int cnt) +{ + unsigned int i; + + for (i = 0; i < cnt; i++) { + struct nfp_insn_meta *meta; + + meta = kzalloc(sizeof(*meta), GFP_KERNEL); + if (!meta) + return -ENOMEM; + + meta->insn = prog[i]; + meta->n = i; + + list_add_tail(&meta->l, &nfp_prog->insns); + } + + return 0; +} + +/* --- Optimizations --- */ +static void nfp_bpf_opt_reg_init(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta; + + list_for_each_entry(meta, &nfp_prog->insns, l) { + struct bpf_insn insn = meta->insn; + + /* Programs converted from cBPF start with register xoring */ + if (insn.code == (BPF_ALU64 | BPF_XOR | BPF_X) && + insn.src_reg == insn.dst_reg) + continue; + + /* Programs start with R6 = R1 but we ignore the skb pointer */ + if (insn.code == (BPF_ALU64 | BPF_MOV | BPF_X) && + insn.src_reg == 1 && insn.dst_reg == 6) + meta->skip = true; + + /* Return as soon as something doesn't match */ + if (!meta->skip) + return; + } +} + +/* Try to rename registers so that program uses only low ones */ +static int nfp_bpf_opt_reg_rename(struct nfp_prog *nfp_prog) +{ + bool reg_used[MAX_BPF_REG] = {}; + u8 tgt_reg[MAX_BPF_REG] = {}; + struct nfp_insn_meta *meta; + unsigned int i, j; + + list_for_each_entry(meta, &nfp_prog->insns, l) { + if (meta->skip) + continue; + + reg_used[meta->insn.src_reg] = true; + reg_used[meta->insn.dst_reg] = true; + } + + for (i = 0, j = 0; i < ARRAY_SIZE(tgt_reg); i++) { + if (!reg_used[i]) + continue; + + tgt_reg[i] = j++; + } + nfp_prog->num_regs = j; + + list_for_each_entry(meta, &nfp_prog->insns, l) { + meta->insn.src_reg = tgt_reg[meta->insn.src_reg]; + meta->insn.dst_reg = tgt_reg[meta->insn.dst_reg]; + } + + return 0; +} + +/* Remove masking after load since our load guarantees this is not needed */ +static void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta1, *meta2; + const s32 exp_mask[] = { + [BPF_B] = 0x000000ffU, + [BPF_H] = 0x0000ffffU, + [BPF_W] = 0xffffffffU, + }; + + nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) { + struct bpf_insn insn, next; + + insn = meta1->insn; + next = meta2->insn; + + if (BPF_CLASS(insn.code) != BPF_LD) + continue; + if (BPF_MODE(insn.code) != BPF_ABS && + BPF_MODE(insn.code) != BPF_IND) + continue; + + if (next.code != (BPF_ALU64 | BPF_AND | BPF_K)) + continue; + + if (!exp_mask[BPF_SIZE(insn.code)]) + continue; + if (exp_mask[BPF_SIZE(insn.code)] != next.imm) + continue; + + if (next.src_reg || next.dst_reg) + continue; + + meta2->skip = true; + } +} + +static void nfp_bpf_opt_ld_shift(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta1, *meta2, *meta3; + + nfp_for_each_insn_walk3(nfp_prog, meta1, meta2, meta3) { + struct bpf_insn insn, next1, next2; + + insn = meta1->insn; + next1 = meta2->insn; + next2 = meta3->insn; + + if (BPF_CLASS(insn.code) != BPF_LD) + continue; + if (BPF_MODE(insn.code) != BPF_ABS && + BPF_MODE(insn.code) != BPF_IND) + continue; + if (BPF_SIZE(insn.code) != BPF_W) + continue; + + if (!(next1.code == (BPF_LSH | BPF_K | BPF_ALU64) && + next2.code == (BPF_RSH | BPF_K | BPF_ALU64)) && + !(next1.code == (BPF_RSH | BPF_K | BPF_ALU64) && + next2.code == (BPF_LSH | BPF_K | BPF_ALU64))) + continue; + + if (next1.src_reg || next1.dst_reg || + next2.src_reg || next2.dst_reg) + continue; + + if (next1.imm != 0x20 || next2.imm != 0x20) + continue; + + meta2->skip = true; + meta3->skip = true; + } +} + +static int nfp_bpf_optimize(struct nfp_prog *nfp_prog) +{ + int ret; + + nfp_bpf_opt_reg_init(nfp_prog); + + ret = nfp_bpf_opt_reg_rename(nfp_prog); + if (ret) + return ret; + + nfp_bpf_opt_ld_mask(nfp_prog); + nfp_bpf_opt_ld_shift(nfp_prog); + + return 0; +} + +/** + * nfp_bpf_jit() - translate BPF code into NFP assembly + * @filter: kernel BPF filter struct + * @prog_mem: memory to store assembler instructions + * @act: action attached to this eBPF program + * @prog_start: offset of the first instruction when loaded + * @prog_done: where to jump on exit + * @prog_sz: size of @prog_mem in instructions + * @res: achieved parameters of translation results + */ +int +nfp_bpf_jit(struct bpf_prog *filter, void *prog_mem, + enum nfp_bpf_action_type act, + unsigned int prog_start, unsigned int prog_done, + unsigned int prog_sz, struct nfp_bpf_result *res) +{ + struct nfp_prog *nfp_prog; + int ret; + + nfp_prog = kzalloc(sizeof(*nfp_prog), GFP_KERNEL); + if (!nfp_prog) + return -ENOMEM; + + INIT_LIST_HEAD(&nfp_prog->insns); + nfp_prog->act = act; + nfp_prog->start_off = prog_start; + nfp_prog->tgt_done = prog_done; + + ret = nfp_prog_prepare(nfp_prog, filter->insnsi, filter->len); + if (ret) + goto out; + + ret = nfp_prog_verify(nfp_prog, filter); + if (ret) + goto out; + + ret = nfp_bpf_optimize(nfp_prog); + if (ret) + goto out; + + if (nfp_prog->num_regs <= 7) + nfp_prog->regs_per_thread = 16; + else + nfp_prog->regs_per_thread = 32; + + nfp_prog->prog = prog_mem; + nfp_prog->__prog_alloc_len = prog_sz; + + ret = nfp_translate(nfp_prog); + if (ret) { + pr_err("Translation failed with error %d (translated: %u)\n", + ret, nfp_prog->n_translated); + ret = -EINVAL; + } + + res->n_instr = nfp_prog->prog_len; + res->dense_mode = nfp_prog->num_regs <= 7; +out: + nfp_prog_free(nfp_prog); + + return ret; +} diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c b/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c new file mode 100644 index 000000000000..ef6775b54168 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c @@ -0,0 +1,162 @@ +/* + * Copyright (C) 2016 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define pr_fmt(fmt) "NFP net bpf: " fmt + +#include +#include +#include +#include + +#include "nfp_bpf.h" + +/* Analyzer/verifier definitions */ +struct nfp_bpf_analyzer_priv { + struct nfp_prog *prog; + struct nfp_insn_meta *meta; +}; + +static struct nfp_insn_meta * +nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int insn_idx, unsigned int n_insns) +{ + unsigned int forward, backward, i; + + backward = meta->n - insn_idx; + forward = insn_idx - meta->n; + + if (min(forward, backward) > n_insns - insn_idx - 1) { + backward = n_insns - insn_idx - 1; + meta = nfp_prog_last_meta(nfp_prog); + } + if (min(forward, backward) > insn_idx && backward > insn_idx) { + forward = insn_idx; + meta = nfp_prog_first_meta(nfp_prog); + } + + if (forward < backward) + for (i = 0; i < forward; i++) + meta = nfp_meta_next(meta); + else + for (i = 0; i < backward; i++) + meta = nfp_meta_prev(meta); + + return meta; +} + +static int +nfp_bpf_check_exit(struct nfp_prog *nfp_prog, + const struct bpf_verifier_env *env) +{ + const struct bpf_reg_state *reg0 = &env->cur_state.regs[0]; + + if (reg0->type != CONST_IMM) { + pr_info("unsupported exit state: %d, imm: %llx\n", + reg0->type, reg0->imm); + return -EINVAL; + } + + if (reg0->imm != 0 && (reg0->imm & ~0U) != ~0U) { + pr_info("unsupported exit state: %d, imm: %llx\n", + reg0->type, reg0->imm); + return -EINVAL; + } + + return 0; +} + +static int +nfp_bpf_check_ctx_ptr(struct nfp_prog *nfp_prog, + const struct bpf_verifier_env *env, u8 reg) +{ + if (env->cur_state.regs[reg].type != PTR_TO_CTX) + return -EINVAL; + + return 0; +} + +static int +nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx) +{ + struct nfp_bpf_analyzer_priv *priv = env->analyzer_priv; + struct nfp_insn_meta *meta = priv->meta; + + meta = nfp_bpf_goto_meta(priv->prog, meta, insn_idx, env->prog->len); + priv->meta = meta; + + if (meta->insn.src_reg == BPF_REG_10 || + meta->insn.dst_reg == BPF_REG_10) { + pr_err("stack not yet supported\n"); + return -EINVAL; + } + if (meta->insn.src_reg >= MAX_BPF_REG || + meta->insn.dst_reg >= MAX_BPF_REG) { + pr_err("program uses extended registers - jit hardening?\n"); + return -EINVAL; + } + + if (meta->insn.code == (BPF_JMP | BPF_EXIT)) + return nfp_bpf_check_exit(priv->prog, env); + + if ((meta->insn.code & ~BPF_SIZE_MASK) == (BPF_LDX | BPF_MEM)) + return nfp_bpf_check_ctx_ptr(priv->prog, env, + meta->insn.src_reg); + if ((meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_MEM)) + return nfp_bpf_check_ctx_ptr(priv->prog, env, + meta->insn.dst_reg); + + return 0; +} + +static const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops = { + .insn_hook = nfp_verify_insn, +}; + +int nfp_prog_verify(struct nfp_prog *nfp_prog, struct bpf_prog *prog) +{ + struct nfp_bpf_analyzer_priv *priv; + int ret; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->prog = nfp_prog; + priv->meta = nfp_prog_first_meta(nfp_prog); + + ret = bpf_analyzer(prog, &nfp_bpf_analyzer_ops, priv); + + kfree(priv); + + return ret; +} From 7533fdc0f77f207fcc370b10965f4bcee82dfedf Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:44:01 +0100 Subject: [PATCH 1323/1715] nfp: bpf: add hardware bpf offload Add hardware bpf offload on our smart NICs. Detect if capable firmware is loaded and use it to load the code JITed with just added translator onto programmable engines. This commit only supports offloading cls_bpf in legacy mode (non-direct action). Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/Makefile | 1 + drivers/net/ethernet/netronome/nfp/nfp_net.h | 26 ++- .../ethernet/netronome/nfp/nfp_net_common.c | 40 +++- .../net/ethernet/netronome/nfp/nfp_net_ctrl.h | 44 +++- .../ethernet/netronome/nfp/nfp_net_offload.c | 220 ++++++++++++++++++ 5 files changed, 324 insertions(+), 7 deletions(-) create mode 100644 drivers/net/ethernet/netronome/nfp/nfp_net_offload.c diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile index 5f12689bf523..0efb2ba9a558 100644 --- a/drivers/net/ethernet/netronome/nfp/Makefile +++ b/drivers/net/ethernet/netronome/nfp/Makefile @@ -3,6 +3,7 @@ obj-$(CONFIG_NFP_NETVF) += nfp_netvf.o nfp_netvf-objs := \ nfp_net_common.o \ nfp_net_ethtool.o \ + nfp_net_offload.o \ nfp_netvf_main.o ifeq ($(CONFIG_BPF_SYSCALL),y) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index 690635660195..ea6f5e667f27 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -220,7 +220,7 @@ struct nfp_net_tx_ring { #define PCIE_DESC_RX_I_TCP_CSUM_OK cpu_to_le16(BIT(11)) #define PCIE_DESC_RX_I_UDP_CSUM cpu_to_le16(BIT(10)) #define PCIE_DESC_RX_I_UDP_CSUM_OK cpu_to_le16(BIT(9)) -#define PCIE_DESC_RX_SPARE cpu_to_le16(BIT(8)) +#define PCIE_DESC_RX_BPF cpu_to_le16(BIT(8)) #define PCIE_DESC_RX_EOP cpu_to_le16(BIT(7)) #define PCIE_DESC_RX_IP4_CSUM cpu_to_le16(BIT(6)) #define PCIE_DESC_RX_IP4_CSUM_OK cpu_to_le16(BIT(5)) @@ -413,6 +413,7 @@ static inline bool nfp_net_fw_ver_eq(struct nfp_net_fw_version *fw_ver, * @is_vf: Is the driver attached to a VF? * @is_nfp3200: Is the driver for a NFP-3200 card? * @fw_loaded: Is the firmware loaded? + * @bpf_offload_skip_sw: Offloaded BPF program will not be rerun by cls_bpf * @ctrl: Local copy of the control register/word. * @fl_bufsz: Currently configured size of the freelist buffers * @rx_offset: Offset in the RX buffers where packet data starts @@ -473,6 +474,7 @@ struct nfp_net { unsigned is_vf:1; unsigned is_nfp3200:1; unsigned fw_loaded:1; + unsigned bpf_offload_skip_sw:1; u32 ctrl; u32 fl_bufsz; @@ -561,12 +563,28 @@ struct nfp_net { /* Functions to read/write from/to a BAR * Performs any endian conversion necessary. */ +static inline u16 nn_readb(struct nfp_net *nn, int off) +{ + return readb(nn->ctrl_bar + off); +} + static inline void nn_writeb(struct nfp_net *nn, int off, u8 val) { writeb(val, nn->ctrl_bar + off); } -/* NFP-3200 can't handle 16-bit accesses too well - hence no readw/writew */ +/* NFP-3200 can't handle 16-bit accesses too well */ +static inline u16 nn_readw(struct nfp_net *nn, int off) +{ + WARN_ON_ONCE(nn->is_nfp3200); + return readw(nn->ctrl_bar + off); +} + +static inline void nn_writew(struct nfp_net *nn, int off, u16 val) +{ + WARN_ON_ONCE(nn->is_nfp3200); + writew(val, nn->ctrl_bar + off); +} static inline u32 nn_readl(struct nfp_net *nn, int off) { @@ -757,4 +775,8 @@ static inline void nfp_net_debugfs_adapter_del(struct nfp_net *nn) } #endif /* CONFIG_NFP_NET_DEBUG */ +int +nfp_net_bpf_offload(struct nfp_net *nn, u32 handle, __be16 proto, + struct tc_cls_bpf_offload *cls_bpf); + #endif /* _NFP_NET_H_ */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 252e4924de0f..51978dfe883b 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -60,6 +60,7 @@ #include +#include #include #include "nfp_net_ctrl.h" @@ -2382,6 +2383,31 @@ static struct rtnl_link_stats64 *nfp_net_stat64(struct net_device *netdev, return stats; } +static bool nfp_net_ebpf_capable(struct nfp_net *nn) +{ + if (nn->cap & NFP_NET_CFG_CTRL_BPF && + nn_readb(nn, NFP_NET_CFG_BPF_ABI) == NFP_NET_BPF_ABI) + return true; + return false; +} + +static int +nfp_net_setup_tc(struct net_device *netdev, u32 handle, __be16 proto, + struct tc_to_netdev *tc) +{ + struct nfp_net *nn = netdev_priv(netdev); + + if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS)) + return -ENOTSUPP; + if (proto != htons(ETH_P_ALL)) + return -ENOTSUPP; + + if (tc->type == TC_SETUP_CLSBPF && nfp_net_ebpf_capable(nn)) + return nfp_net_bpf_offload(nn, handle, proto, tc->cls_bpf); + + return -EINVAL; +} + static int nfp_net_set_features(struct net_device *netdev, netdev_features_t features) { @@ -2436,6 +2462,11 @@ static int nfp_net_set_features(struct net_device *netdev, new_ctrl &= ~NFP_NET_CFG_CTRL_GATHER; } + if (changed & NETIF_F_HW_TC && nn->ctrl & NFP_NET_CFG_CTRL_BPF) { + nn_err(nn, "Cannot disable HW TC offload while in use\n"); + return -EBUSY; + } + nn_dbg(nn, "Feature change 0x%llx -> 0x%llx (changed=0x%llx)\n", netdev->features, features, changed); @@ -2585,6 +2616,7 @@ static const struct net_device_ops nfp_net_netdev_ops = { .ndo_stop = nfp_net_netdev_close, .ndo_start_xmit = nfp_net_tx, .ndo_get_stats64 = nfp_net_stat64, + .ndo_setup_tc = nfp_net_setup_tc, .ndo_tx_timeout = nfp_net_tx_timeout, .ndo_set_rx_mode = nfp_net_set_rx_mode, .ndo_change_mtu = nfp_net_change_mtu, @@ -2610,7 +2642,7 @@ void nfp_net_info(struct nfp_net *nn) nn->fw_ver.resv, nn->fw_ver.class, nn->fw_ver.major, nn->fw_ver.minor, nn->max_mtu); - nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", + nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", nn->cap, nn->cap & NFP_NET_CFG_CTRL_PROMISC ? "PROMISC " : "", nn->cap & NFP_NET_CFG_CTRL_L2BC ? "L2BCFILT " : "", @@ -2627,7 +2659,8 @@ void nfp_net_info(struct nfp_net *nn) nn->cap & NFP_NET_CFG_CTRL_MSIXAUTO ? "AUTOMASK " : "", nn->cap & NFP_NET_CFG_CTRL_IRQMOD ? "IRQMOD " : "", nn->cap & NFP_NET_CFG_CTRL_VXLAN ? "VXLAN " : "", - nn->cap & NFP_NET_CFG_CTRL_NVGRE ? "NVGRE " : ""); + nn->cap & NFP_NET_CFG_CTRL_NVGRE ? "NVGRE " : "", + nfp_net_ebpf_capable(nn) ? "BPF " : ""); } /** @@ -2795,6 +2828,9 @@ int nfp_net_netdev_init(struct net_device *netdev) netdev->features = netdev->hw_features; + if (nfp_net_ebpf_capable(nn)) + netdev->hw_features |= NETIF_F_HW_TC; + /* Advertise but disable TSO by default. */ netdev->features &= ~(NETIF_F_TSO | NETIF_F_TSO6); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h index ad6c4e31cedd..7aa11f3c5ef0 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h @@ -123,6 +123,7 @@ #define NFP_NET_CFG_CTRL_L2SWITCH_LOCAL (0x1 << 23) /* Switch to local */ #define NFP_NET_CFG_CTRL_VXLAN (0x1 << 24) /* VXLAN tunnel support */ #define NFP_NET_CFG_CTRL_NVGRE (0x1 << 25) /* NVGRE tunnel support */ +#define NFP_NET_CFG_CTRL_BPF (0x1 << 27) /* BPF offload capable */ #define NFP_NET_CFG_UPDATE 0x0004 #define NFP_NET_CFG_UPDATE_GEN (0x1 << 0) /* General update */ #define NFP_NET_CFG_UPDATE_RING (0x1 << 1) /* Ring config change */ @@ -134,6 +135,7 @@ #define NFP_NET_CFG_UPDATE_RESET (0x1 << 7) /* Update due to FLR */ #define NFP_NET_CFG_UPDATE_IRQMOD (0x1 << 8) /* IRQ mod change */ #define NFP_NET_CFG_UPDATE_VXLAN (0x1 << 9) /* VXLAN port change */ +#define NFP_NET_CFG_UPDATE_BPF (0x1 << 10) /* BPF program load */ #define NFP_NET_CFG_UPDATE_ERR (0x1 << 31) /* A error occurred */ #define NFP_NET_CFG_TXRS_ENABLE 0x0008 #define NFP_NET_CFG_RXRS_ENABLE 0x0010 @@ -196,10 +198,37 @@ #define NFP_NET_CFG_VXLAN_SZ 0x0008 /** - * 64B reserved for future use (0x0080 - 0x00c0) + * NFP6000 - BPF section + * @NFP_NET_CFG_BPF_ABI: BPF ABI version + * @NFP_NET_CFG_BPF_CAP: BPF capabilities + * @NFP_NET_CFG_BPF_MAX_LEN: Maximum size of JITed BPF code in bytes + * @NFP_NET_CFG_BPF_START: Offset at which BPF will be loaded + * @NFP_NET_CFG_BPF_DONE: Offset to jump to on exit + * @NFP_NET_CFG_BPF_STACK_SZ: Total size of stack area in 64B chunks + * @NFP_NET_CFG_BPF_INL_MTU: Packet data split offset in 64B chunks + * @NFP_NET_CFG_BPF_SIZE: Size of the JITed BPF code in instructions + * @NFP_NET_CFG_BPF_ADDR: DMA address of the buffer with JITed BPF code */ -#define NFP_NET_CFG_RESERVED 0x0080 -#define NFP_NET_CFG_RESERVED_SZ 0x0040 +#define NFP_NET_CFG_BPF_ABI 0x0080 +#define NFP_NET_BPF_ABI 1 +#define NFP_NET_CFG_BPF_CAP 0x0081 +#define NFP_NET_BPF_CAP_RELO (1 << 0) /* seamless reload */ +#define NFP_NET_CFG_BPF_MAX_LEN 0x0082 +#define NFP_NET_CFG_BPF_START 0x0084 +#define NFP_NET_CFG_BPF_DONE 0x0086 +#define NFP_NET_CFG_BPF_STACK_SZ 0x0088 +#define NFP_NET_CFG_BPF_INL_MTU 0x0089 +#define NFP_NET_CFG_BPF_SIZE 0x008e +#define NFP_NET_CFG_BPF_ADDR 0x0090 +#define NFP_NET_CFG_BPF_CFG_8CTX (1 << 0) /* 8ctx mode */ +#define NFP_NET_CFG_BPF_CFG_MASK 7ULL +#define NFP_NET_CFG_BPF_ADDR_MASK (~NFP_NET_CFG_BPF_CFG_MASK) + +/** + * 40B reserved for future use (0x0098 - 0x00c0) + */ +#define NFP_NET_CFG_RESERVED 0x0098 +#define NFP_NET_CFG_RESERVED_SZ 0x0028 /** * RSS configuration (0x0100 - 0x01ac): @@ -303,6 +332,15 @@ #define NFP_NET_CFG_STATS_TX_MC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x80) #define NFP_NET_CFG_STATS_TX_BC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x88) +#define NFP_NET_CFG_STATS_APP0_FRAMES (NFP_NET_CFG_STATS_BASE + 0x90) +#define NFP_NET_CFG_STATS_APP0_BYTES (NFP_NET_CFG_STATS_BASE + 0x98) +#define NFP_NET_CFG_STATS_APP1_FRAMES (NFP_NET_CFG_STATS_BASE + 0xa0) +#define NFP_NET_CFG_STATS_APP1_BYTES (NFP_NET_CFG_STATS_BASE + 0xa8) +#define NFP_NET_CFG_STATS_APP2_FRAMES (NFP_NET_CFG_STATS_BASE + 0xb0) +#define NFP_NET_CFG_STATS_APP2_BYTES (NFP_NET_CFG_STATS_BASE + 0xb8) +#define NFP_NET_CFG_STATS_APP3_FRAMES (NFP_NET_CFG_STATS_BASE + 0xc0) +#define NFP_NET_CFG_STATS_APP3_BYTES (NFP_NET_CFG_STATS_BASE + 0xc8) + /** * Per ring stats (0x1000 - 0x1800) * options, 64bit per entry diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c new file mode 100644 index 000000000000..313988cd3a43 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c @@ -0,0 +1,220 @@ +/* + * Copyright (C) 2016 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * nfp_net_offload.c + * Netronome network device driver: TC offload functions for PF and VF + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "nfp_bpf.h" +#include "nfp_net_ctrl.h" +#include "nfp_net.h" + +static int +nfp_net_bpf_get_act(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf) +{ + const struct tc_action *a; + LIST_HEAD(actions); + + /* TC direct action */ + if (cls_bpf->exts_integrated) + return -ENOTSUPP; + + /* TC legacy mode */ + if (!tc_single_action(cls_bpf->exts)) + return -ENOTSUPP; + + tcf_exts_to_list(cls_bpf->exts, &actions); + list_for_each_entry(a, &actions, list) { + if (is_tcf_gact_shot(a)) + return NN_ACT_TC_DROP; + } + + return -ENOTSUPP; +} + +static int +nfp_net_bpf_offload_prepare(struct nfp_net *nn, + struct tc_cls_bpf_offload *cls_bpf, + struct nfp_bpf_result *res, + void **code, dma_addr_t *dma_addr, u16 max_instr) +{ + unsigned int code_sz = max_instr * sizeof(u64); + enum nfp_bpf_action_type act; + u16 start_off, done_off; + unsigned int max_mtu; + int ret; + + ret = nfp_net_bpf_get_act(nn, cls_bpf); + if (ret < 0) + return ret; + act = ret; + + max_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32; + if (max_mtu < nn->netdev->mtu) { + nn_info(nn, "BPF offload not supported with MTU larger than HW packet split boundary\n"); + return -ENOTSUPP; + } + + start_off = nn_readw(nn, NFP_NET_CFG_BPF_START); + done_off = nn_readw(nn, NFP_NET_CFG_BPF_DONE); + + *code = dma_zalloc_coherent(&nn->pdev->dev, code_sz, dma_addr, + GFP_KERNEL); + if (!*code) + return -ENOMEM; + + ret = nfp_bpf_jit(cls_bpf->prog, *code, act, start_off, done_off, + max_instr, res); + if (ret) + goto out; + + return 0; + +out: + dma_free_coherent(&nn->pdev->dev, code_sz, *code, *dma_addr); + return ret; +} + +static void +nfp_net_bpf_load_and_start(struct nfp_net *nn, u32 tc_flags, + void *code, dma_addr_t dma_addr, + unsigned int code_sz, unsigned int n_instr, + bool dense_mode) +{ + u64 bpf_addr = dma_addr; + int err; + + nn->bpf_offload_skip_sw = !!(tc_flags & TCA_CLS_FLAGS_SKIP_SW); + + if (dense_mode) + bpf_addr |= NFP_NET_CFG_BPF_CFG_8CTX; + + nn_writew(nn, NFP_NET_CFG_BPF_SIZE, n_instr); + nn_writeq(nn, NFP_NET_CFG_BPF_ADDR, bpf_addr); + + /* Load up the JITed code */ + err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_BPF); + if (err) + nn_err(nn, "FW command error while loading BPF: %d\n", err); + + /* Enable passing packets through BPF function */ + nn->ctrl |= NFP_NET_CFG_CTRL_BPF; + nn_writel(nn, NFP_NET_CFG_CTRL, nn->ctrl); + err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN); + if (err) + nn_err(nn, "FW command error while enabling BPF: %d\n", err); + + dma_free_coherent(&nn->pdev->dev, code_sz, code, dma_addr); +} + +static int nfp_net_bpf_stop(struct nfp_net *nn) +{ + if (!(nn->ctrl & NFP_NET_CFG_CTRL_BPF)) + return 0; + + nn->ctrl &= ~NFP_NET_CFG_CTRL_BPF; + nn_writel(nn, NFP_NET_CFG_CTRL, nn->ctrl); + + nn->bpf_offload_skip_sw = 0; + + return nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN); +} + +int +nfp_net_bpf_offload(struct nfp_net *nn, u32 handle, __be16 proto, + struct tc_cls_bpf_offload *cls_bpf) +{ + struct nfp_bpf_result res; + dma_addr_t dma_addr; + u16 max_instr; + void *code; + int err; + + max_instr = nn_readw(nn, NFP_NET_CFG_BPF_MAX_LEN); + + switch (cls_bpf->command) { + case TC_CLSBPF_REPLACE: + /* There is nothing stopping us from implementing seamless + * replace but the simple method of loading I adopted in + * the firmware does not handle atomic replace (i.e. we have to + * stop the BPF offload and re-enable it). Leaking-in a few + * frames which didn't have BPF applied in the hardware should + * be fine if software fallback is available, though. + */ + if (nn->bpf_offload_skip_sw) + return -EBUSY; + + err = nfp_net_bpf_offload_prepare(nn, cls_bpf, &res, &code, + &dma_addr, max_instr); + if (err) + return err; + + nfp_net_bpf_stop(nn); + nfp_net_bpf_load_and_start(nn, cls_bpf->gen_flags, code, + dma_addr, max_instr * sizeof(u64), + res.n_instr, res.dense_mode); + return 0; + + case TC_CLSBPF_ADD: + if (nn->ctrl & NFP_NET_CFG_CTRL_BPF) + return -EBUSY; + + err = nfp_net_bpf_offload_prepare(nn, cls_bpf, &res, &code, + &dma_addr, max_instr); + if (err) + return err; + + nfp_net_bpf_load_and_start(nn, cls_bpf->gen_flags, code, + dma_addr, max_instr * sizeof(u64), + res.n_instr, res.dense_mode); + return 0; + + case TC_CLSBPF_DESTROY: + return nfp_net_bpf_stop(nn); + + default: + return -ENOTSUPP; + } +} From 68d640630d4ef2a4bf3f68b5073dec5e4c4f878b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:44:02 +0100 Subject: [PATCH 1324/1715] net: cls_bpf: allow offloaded filters to update stats Call into offloaded filters to update stats. Signed-off-by: Jakub Kicinski Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 1 + net/sched/cls_bpf.c | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 57af9f3032ff..5ccaa4be7d96 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -490,6 +490,7 @@ enum tc_clsbpf_command { TC_CLSBPF_ADD, TC_CLSBPF_REPLACE, TC_CLSBPF_DESTROY, + TC_CLSBPF_STATS, }; struct tc_cls_bpf_offload { diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 1becc2fe1bc5..bb1d5a487081 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -221,6 +221,15 @@ static void cls_bpf_stop_offload(struct tcf_proto *tp, prog->offloaded = false; } +static void cls_bpf_offload_update_stats(struct tcf_proto *tp, + struct cls_bpf_prog *prog) +{ + if (!prog->offloaded) + return; + + cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_STATS); +} + static int cls_bpf_init(struct tcf_proto *tp) { struct cls_bpf_head *head; @@ -577,6 +586,8 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, tm->tcm_handle = prog->handle; + cls_bpf_offload_update_stats(tp, prog); + nest = nla_nest_start(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; From 66860beb7ed5df11433528cb535d5e9f7dad2302 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:44:03 +0100 Subject: [PATCH 1325/1715] nfp: bpf: allow offloaded filters to update stats Periodically poll stats and call into offloaded actions to update them. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_net.h | 19 ++++++ .../ethernet/netronome/nfp/nfp_net_common.c | 3 + .../ethernet/netronome/nfp/nfp_net_ethtool.c | 12 ++++ .../ethernet/netronome/nfp/nfp_net_offload.c | 63 +++++++++++++++++++ 4 files changed, 97 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index ea6f5e667f27..13c6a9001b4d 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -62,6 +62,9 @@ /* Max time to wait for NFP to respond on updates (in seconds) */ #define NFP_NET_POLL_TIMEOUT 5 +/* Interval for reading offloaded filter stats */ +#define NFP_NET_STAT_POLL_IVL msecs_to_jiffies(100) + /* Bar allocation */ #define NFP_NET_CTRL_BAR 0 #define NFP_NET_Q0_BAR 2 @@ -405,6 +408,11 @@ static inline bool nfp_net_fw_ver_eq(struct nfp_net_fw_version *fw_ver, fw_ver->minor == minor; } +struct nfp_stat_pair { + u64 pkts; + u64 bytes; +}; + /** * struct nfp_net - NFP network device structure * @pdev: Backpointer to PCI device @@ -428,6 +436,11 @@ static inline bool nfp_net_fw_ver_eq(struct nfp_net_fw_version *fw_ver, * @rss_cfg: RSS configuration * @rss_key: RSS secret key * @rss_itbl: RSS indirection table + * @rx_filter: Filter offload statistics - dropped packets/bytes + * @rx_filter_prev: Filter offload statistics - values from previous update + * @rx_filter_change: Jiffies when statistics last changed + * @rx_filter_stats_timer: Timer for polling filter offload statistics + * @rx_filter_lock: Lock protecting timer state changes (teardown) * @max_tx_rings: Maximum number of TX rings supported by the Firmware * @max_rx_rings: Maximum number of RX rings supported by the Firmware * @num_tx_rings: Currently configured number of TX rings @@ -504,6 +517,11 @@ struct nfp_net { u8 rss_key[NFP_NET_CFG_RSS_KEY_SZ]; u8 rss_itbl[NFP_NET_CFG_RSS_ITBL_SZ]; + struct nfp_stat_pair rx_filter, rx_filter_prev; + unsigned long rx_filter_change; + struct timer_list rx_filter_stats_timer; + spinlock_t rx_filter_lock; + int max_tx_rings; int max_rx_rings; @@ -775,6 +793,7 @@ static inline void nfp_net_debugfs_adapter_del(struct nfp_net *nn) } #endif /* CONFIG_NFP_NET_DEBUG */ +void nfp_net_filter_stats_timer(unsigned long data); int nfp_net_bpf_offload(struct nfp_net *nn, u32 handle, __be16 proto, struct tc_cls_bpf_offload *cls_bpf); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 51978dfe883b..f091eb758ca2 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -2703,10 +2703,13 @@ struct nfp_net *nfp_net_netdev_alloc(struct pci_dev *pdev, nn->rxd_cnt = NFP_NET_RX_DESCS_DEFAULT; spin_lock_init(&nn->reconfig_lock); + spin_lock_init(&nn->rx_filter_lock); spin_lock_init(&nn->link_status_lock); setup_timer(&nn->reconfig_timer, nfp_net_reconfig_timer, (unsigned long)nn); + setup_timer(&nn->rx_filter_stats_timer, + nfp_net_filter_stats_timer, (unsigned long)nn); return nn; } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index 4c9897220969..3418f2277e9d 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -106,6 +106,18 @@ static const struct _nfp_net_et_stats nfp_net_et_stats[] = { {"dev_tx_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_FRAMES)}, {"dev_tx_mc_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_MC_FRAMES)}, {"dev_tx_bc_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_BC_FRAMES)}, + + {"bpf_pass_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP0_FRAMES)}, + {"bpf_pass_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP0_BYTES)}, + /* see comments in outro functions in nfp_bpf_jit.c to find out + * how different BPF modes use app-specific counters + */ + {"bpf_app1_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP1_FRAMES)}, + {"bpf_app1_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP1_BYTES)}, + {"bpf_app2_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP2_FRAMES)}, + {"bpf_app2_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP2_BYTES)}, + {"bpf_app3_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP3_FRAMES)}, + {"bpf_app3_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP3_BYTES)}, }; #define NN_ET_GLOBAL_STATS_LEN ARRAY_SIZE(nfp_net_et_stats) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c index 313988cd3a43..0537a53e2174 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c @@ -51,6 +51,60 @@ #include "nfp_net_ctrl.h" #include "nfp_net.h" +void nfp_net_filter_stats_timer(unsigned long data) +{ + struct nfp_net *nn = (void *)data; + struct nfp_stat_pair latest; + + spin_lock_bh(&nn->rx_filter_lock); + + if (nn->ctrl & NFP_NET_CFG_CTRL_BPF) + mod_timer(&nn->rx_filter_stats_timer, + jiffies + NFP_NET_STAT_POLL_IVL); + + spin_unlock_bh(&nn->rx_filter_lock); + + latest.pkts = nn_readq(nn, NFP_NET_CFG_STATS_APP1_FRAMES); + latest.bytes = nn_readq(nn, NFP_NET_CFG_STATS_APP1_BYTES); + + if (latest.pkts != nn->rx_filter.pkts) + nn->rx_filter_change = jiffies; + + nn->rx_filter = latest; +} + +static void nfp_net_bpf_stats_reset(struct nfp_net *nn) +{ + nn->rx_filter.pkts = nn_readq(nn, NFP_NET_CFG_STATS_APP1_FRAMES); + nn->rx_filter.bytes = nn_readq(nn, NFP_NET_CFG_STATS_APP1_BYTES); + nn->rx_filter_prev = nn->rx_filter; + nn->rx_filter_change = jiffies; +} + +static int +nfp_net_bpf_stats_update(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf) +{ + struct tc_action *a; + LIST_HEAD(actions); + u64 bytes, pkts; + + pkts = nn->rx_filter.pkts - nn->rx_filter_prev.pkts; + bytes = nn->rx_filter.bytes - nn->rx_filter_prev.bytes; + bytes -= pkts * ETH_HLEN; + + nn->rx_filter_prev = nn->rx_filter; + + preempt_disable(); + + tcf_exts_to_list(cls_bpf->exts, &actions); + list_for_each_entry(a, &actions, list) + tcf_action_stats_update(a, bytes, pkts, nn->rx_filter_change); + + preempt_enable(); + + return 0; +} + static int nfp_net_bpf_get_act(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf) { @@ -147,6 +201,9 @@ nfp_net_bpf_load_and_start(struct nfp_net *nn, u32 tc_flags, nn_err(nn, "FW command error while enabling BPF: %d\n", err); dma_free_coherent(&nn->pdev->dev, code_sz, code, dma_addr); + + nfp_net_bpf_stats_reset(nn); + mod_timer(&nn->rx_filter_stats_timer, jiffies + NFP_NET_STAT_POLL_IVL); } static int nfp_net_bpf_stop(struct nfp_net *nn) @@ -154,9 +211,12 @@ static int nfp_net_bpf_stop(struct nfp_net *nn) if (!(nn->ctrl & NFP_NET_CFG_CTRL_BPF)) return 0; + spin_lock_bh(&nn->rx_filter_lock); nn->ctrl &= ~NFP_NET_CFG_CTRL_BPF; + spin_unlock_bh(&nn->rx_filter_lock); nn_writel(nn, NFP_NET_CFG_CTRL, nn->ctrl); + del_timer_sync(&nn->rx_filter_stats_timer); nn->bpf_offload_skip_sw = 0; return nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN); @@ -214,6 +274,9 @@ nfp_net_bpf_offload(struct nfp_net *nn, u32 handle, __be16 proto, case TC_CLSBPF_DESTROY: return nfp_net_bpf_stop(nn); + case TC_CLSBPF_STATS: + return nfp_net_bpf_stats_update(nn, cls_bpf); + default: return -ENOTSUPP; } From 19d0f54edab6e77b6b73277ac33717be1f858fa8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:44:04 +0100 Subject: [PATCH 1326/1715] nfp: bpf: add packet marking support Add missing ABI defines and eBPF instructions to allow mark to be passed on and extend prepend parsing on the RX path to pick it up from packet metadata. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_bpf.h | 2 + .../net/ethernet/netronome/nfp/nfp_bpf_jit.c | 19 ++++ drivers/net/ethernet/netronome/nfp/nfp_net.h | 2 + .../ethernet/netronome/nfp/nfp_net_common.c | 95 ++++++++++++++----- .../net/ethernet/netronome/nfp/nfp_net_ctrl.h | 7 ++ .../ethernet/netronome/nfp/nfp_netvf_main.c | 2 +- 6 files changed, 103 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf.h b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h index 3726421e353f..2adb1d80c7b7 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_bpf.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h @@ -91,6 +91,8 @@ enum nfp_bpf_reg_type { #define imm_both(np) reg_both((np)->regs_per_thread - STATIC_REG_IMM) #define NFP_BPF_ABI_FLAGS reg_nnr(0) +#define NFP_BPF_ABI_FLAG_MARK 1 +#define NFP_BPF_ABI_MARK reg_nnr(1) #define NFP_BPF_ABI_PKT reg_nnr(2) #define NFP_BPF_ABI_LEN reg_nnr(3) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c index cfbf53607fc9..368381f0357f 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c @@ -674,6 +674,16 @@ static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size) return construct_data_ind_ld(nfp_prog, offset, 0, false, size); } +static int wrp_set_mark(struct nfp_prog *nfp_prog, u8 src) +{ + emit_alu(nfp_prog, NFP_BPF_ABI_MARK, + reg_none(), ALU_OP_NONE, reg_b(src)); + emit_alu(nfp_prog, NFP_BPF_ABI_FLAGS, + NFP_BPF_ABI_FLAGS, ALU_OP_OR, reg_imm(NFP_BPF_ABI_FLAG_MARK)); + + return 0; +} + static void wrp_alu_imm(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u32 imm) { @@ -1117,6 +1127,14 @@ static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) return 0; } +static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + if (meta->insn.off == offsetof(struct sk_buff, mark)) + return wrp_set_mark(nfp_prog, meta->insn.src_reg * 2); + + return -ENOTSUPP; +} + static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { if (meta->insn.off < 0) /* TODO */ @@ -1306,6 +1324,7 @@ static const instr_cb_t instr_cb[256] = { [BPF_LD | BPF_IND | BPF_H] = data_ind_ld2, [BPF_LD | BPF_IND | BPF_W] = data_ind_ld4, [BPF_LDX | BPF_MEM | BPF_W] = mem_ldx4, + [BPF_STX | BPF_MEM | BPF_W] = mem_stx4, [BPF_JMP | BPF_JA | BPF_K] = jump, [BPF_JMP | BPF_JEQ | BPF_K] = jeq_imm, [BPF_JMP | BPF_JGT | BPF_K] = jgt_imm, diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index 13c6a9001b4d..ed824e11a1e3 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -269,6 +269,8 @@ struct nfp_net_rx_desc { }; }; +#define NFP_NET_META_FIELD_MASK GENMASK(NFP_NET_META_FIELD_SIZE - 1, 0) + struct nfp_net_rx_hash { __be32 hash_type; __be32 hash; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index f091eb758ca2..415691edcaa5 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -1293,36 +1293,70 @@ static void nfp_net_rx_csum(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, } } -/** - * nfp_net_set_hash() - Set SKB hash data - * @netdev: adapter's net_device structure - * @skb: SKB to set the hash data on - * @rxd: RX descriptor - * - * The RSS hash and hash-type are pre-pended to the packet data. - * Extract and decode it and set the skb fields. - */ static void nfp_net_set_hash(struct net_device *netdev, struct sk_buff *skb, - struct nfp_net_rx_desc *rxd) + unsigned int type, __be32 *hash) +{ + if (!(netdev->features & NETIF_F_RXHASH)) + return; + + switch (type) { + case NFP_NET_RSS_IPV4: + case NFP_NET_RSS_IPV6: + case NFP_NET_RSS_IPV6_EX: + skb_set_hash(skb, get_unaligned_be32(hash), PKT_HASH_TYPE_L3); + break; + default: + skb_set_hash(skb, get_unaligned_be32(hash), PKT_HASH_TYPE_L4); + break; + } +} + +static void +nfp_net_set_hash_desc(struct net_device *netdev, struct sk_buff *skb, + struct nfp_net_rx_desc *rxd) { struct nfp_net_rx_hash *rx_hash; - if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS) || - !(netdev->features & NETIF_F_RXHASH)) + if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS)) return; rx_hash = (struct nfp_net_rx_hash *)(skb->data - sizeof(*rx_hash)); - switch (be32_to_cpu(rx_hash->hash_type)) { - case NFP_NET_RSS_IPV4: - case NFP_NET_RSS_IPV6: - case NFP_NET_RSS_IPV6_EX: - skb_set_hash(skb, be32_to_cpu(rx_hash->hash), PKT_HASH_TYPE_L3); - break; - default: - skb_set_hash(skb, be32_to_cpu(rx_hash->hash), PKT_HASH_TYPE_L4); - break; + nfp_net_set_hash(netdev, skb, get_unaligned_be32(&rx_hash->hash_type), + &rx_hash->hash); +} + +static void * +nfp_net_parse_meta(struct net_device *netdev, struct sk_buff *skb, + int meta_len) +{ + u8 *data = skb->data - meta_len; + u32 meta_info; + + meta_info = get_unaligned_be32(data); + data += 4; + + while (meta_info) { + switch (meta_info & NFP_NET_META_FIELD_MASK) { + case NFP_NET_META_HASH: + meta_info >>= NFP_NET_META_FIELD_SIZE; + nfp_net_set_hash(netdev, skb, + meta_info & NFP_NET_META_FIELD_MASK, + (__be32 *)data); + data += 4; + break; + case NFP_NET_META_MARK: + skb->mark = get_unaligned_be32(data); + data += 4; + break; + default: + return NULL; + } + + meta_info >>= NFP_NET_META_FIELD_SIZE; } + + return data; } /** @@ -1439,14 +1473,29 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) skb_reserve(skb, nn->rx_offset); skb_put(skb, data_len - meta_len); - nfp_net_set_hash(nn->netdev, skb, rxd); - /* Stats update */ u64_stats_update_begin(&r_vec->rx_sync); r_vec->rx_pkts++; r_vec->rx_bytes += skb->len; u64_stats_update_end(&r_vec->rx_sync); + if (nn->fw_ver.major <= 3) { + nfp_net_set_hash_desc(nn->netdev, skb, rxd); + } else if (meta_len) { + void *end; + + end = nfp_net_parse_meta(nn->netdev, skb, meta_len); + if (unlikely(end != skb->data)) { + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->rx_drops++; + u64_stats_update_end(&r_vec->rx_sync); + + dev_kfree_skb_any(skb); + nn_warn_ratelimit(nn, "invalid RX packet metadata\n"); + continue; + } + } + skb_record_rx_queue(skb, rx_ring->idx); skb->protocol = eth_type_trans(skb, nn->netdev); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h index 7aa11f3c5ef0..93b10b441acb 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h @@ -65,6 +65,13 @@ */ #define NFP_NET_LSO_MAX_HDR_SZ 255 +/** + * Prepend field types + */ +#define NFP_NET_META_FIELD_SIZE 4 +#define NFP_NET_META_HASH 1 /* next field carries hash type */ +#define NFP_NET_META_MARK 2 + /** * Hash type pre-pended when a RSS hash was computed */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c index f7062cb648e1..2800bbf65a89 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c @@ -148,7 +148,7 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev, dev_warn(&pdev->dev, "OBSOLETE Firmware detected - VF isolation not available\n"); } else { switch (fw_ver.major) { - case 1 ... 3: + case 1 ... 4: if (is_nfp3200) { stride = 2; tx_bar_no = NFP_NET_Q0_BAR; From 9798e6fe4f9b6a2847a40e24b75e68afdc7a01b3 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:44:05 +0100 Subject: [PATCH 1327/1715] net: act_mirred: allow statistic updates from offloaded actions Implement .stats_update() callback. The implementation is generic and can be reused by other simple actions if needed. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/sched/act_mirred.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 1c76387c5d9c..667dc382df82 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -204,6 +204,13 @@ out: return retval; } +static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets, + u64 lastuse) +{ + tcf_lastuse_update(&a->tcfa_tm); + _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets); +} + static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { @@ -281,6 +288,7 @@ static struct tc_action_ops act_mirred_ops = { .type = TCA_ACT_MIRRED, .owner = THIS_MODULE, .act = tcf_mirred, + .stats_update = tcf_stats_update, .dump = tcf_mirred_dump, .cleanup = tcf_mirred_release, .init = tcf_mirred_init, From 2d18421debc29a338e6783c06fb75ab7b16fc9ba Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:44:06 +0100 Subject: [PATCH 1328/1715] nfp: bpf: add support for legacy redirect action Data path has redirect support so expressing redirect to the port frame came from is a trivial matter of setting the right result code. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_bpf.h | 1 + drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c | 2 ++ drivers/net/ethernet/netronome/nfp/nfp_net_offload.c | 4 ++++ 3 files changed, 7 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf.h b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h index 2adb1d80c7b7..adbe0235d98e 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_bpf.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h @@ -60,6 +60,7 @@ enum static_regs { enum nfp_bpf_action_type { NN_ACT_TC_DROP, + NN_ACT_TC_REDIR, }; /* Software register representation, hardware encoding in asm.h */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c index 368381f0357f..434bef975c58 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c @@ -1440,6 +1440,7 @@ static void nfp_outro_tc_legacy(struct nfp_prog *nfp_prog) { const u8 act2code[] = { [NN_ACT_TC_DROP] = 0x22, + [NN_ACT_TC_REDIR] = 0x24 }; /* Target for aborts */ nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog); @@ -1468,6 +1469,7 @@ static void nfp_outro(struct nfp_prog *nfp_prog) { switch (nfp_prog->act) { case NN_ACT_TC_DROP: + case NN_ACT_TC_REDIR: nfp_outro_tc_legacy(nfp_prog); break; } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c index 0537a53e2174..1ec8e5b74651 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c @@ -123,6 +123,10 @@ nfp_net_bpf_get_act(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf) list_for_each_entry(a, &actions, list) { if (is_tcf_gact_shot(a)) return NN_ACT_TC_DROP; + + if (is_tcf_mirred_redirect(a) && + tcf_mirred_ifindex(a) == nn->netdev->ifindex) + return NN_ACT_TC_REDIR; } return -ENOTSUPP; From e3b8baf0ca2a69f88846b5446234e5647ecd17eb Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:44:07 +0100 Subject: [PATCH 1329/1715] nfp: bpf: add offload of TC direct action mode Add offload of TC in direct action mode. We just need to provide appropriate checks in the verifier and a new outro block to translate the exit codes to what data path expects Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_bpf.h | 1 + .../net/ethernet/netronome/nfp/nfp_bpf_jit.c | 66 +++++++++++++++++++ .../ethernet/netronome/nfp/nfp_bpf_verifier.c | 11 +++- .../ethernet/netronome/nfp/nfp_net_offload.c | 6 +- 4 files changed, 82 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf.h b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h index adbe0235d98e..fc220cd04115 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_bpf.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h @@ -61,6 +61,7 @@ enum static_regs { enum nfp_bpf_action_type { NN_ACT_TC_DROP, NN_ACT_TC_REDIR, + NN_ACT_DIRECT, }; /* Software register representation, hardware encoding in asm.h */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c index 434bef975c58..3de819acd68c 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c @@ -321,6 +321,16 @@ __emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, enum br_ev_pip ev_pip, nfp_prog_push(nfp_prog, insn); } +static void emit_br_def(struct nfp_prog *nfp_prog, u16 addr, u8 defer) +{ + if (defer > 2) { + pr_err("BUG: branch defer out of bounds %d\n", defer); + nfp_prog->error = -EFAULT; + return; + } + __emit_br(nfp_prog, BR_UNC, BR_EV_PIP_UNCOND, BR_CSS_NONE, addr, defer); +} + static void emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer) { @@ -1465,9 +1475,65 @@ static void nfp_outro_tc_legacy(struct nfp_prog *nfp_prog) SHF_SC_L_SHF, 16); } +static void nfp_outro_tc_da(struct nfp_prog *nfp_prog) +{ + /* TC direct-action mode: + * 0,1 ok NOT SUPPORTED[1] + * 2 drop 0x22 -> drop, count as stat1 + * 4,5 nuke 0x02 -> drop + * 7 redir 0x44 -> redir, count as stat2 + * * unspec 0x11 -> pass, count as stat0 + * + * [1] We can't support OK and RECLASSIFY because we can't tell TC + * the exact decision made. We are forced to support UNSPEC + * to handle aborts so that's the only one we handle for passing + * packets up the stack. + */ + /* Target for aborts */ + nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog); + + emit_br_def(nfp_prog, nfp_prog->tgt_done, 2); + + emit_alu(nfp_prog, reg_a(0), + reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS); + emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16); + + /* Target for normal exits */ + nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog); + + /* if R0 > 7 jump to abort */ + emit_alu(nfp_prog, reg_none(), reg_imm(7), ALU_OP_SUB, reg_b(0)); + emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0); + emit_alu(nfp_prog, reg_a(0), + reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS); + + wrp_immed(nfp_prog, reg_b(2), 0x41221211); + wrp_immed(nfp_prog, reg_b(3), 0x41001211); + + emit_shf(nfp_prog, reg_a(1), + reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 2); + + emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0)); + emit_shf(nfp_prog, reg_a(2), + reg_imm(0xf), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0); + + emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0)); + emit_shf(nfp_prog, reg_b(2), + reg_imm(0xf), SHF_OP_AND, reg_b(3), SHF_SC_R_SHF, 0); + + emit_br_def(nfp_prog, nfp_prog->tgt_done, 2); + + emit_shf(nfp_prog, reg_b(2), + reg_a(2), SHF_OP_OR, reg_b(2), SHF_SC_L_SHF, 4); + emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16); +} + static void nfp_outro(struct nfp_prog *nfp_prog) { switch (nfp_prog->act) { + case NN_ACT_DIRECT: + nfp_outro_tc_da(nfp_prog); + break; case NN_ACT_TC_DROP: case NN_ACT_TC_REDIR: nfp_outro_tc_legacy(nfp_prog); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c b/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c index ef6775b54168..144cae87f63a 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c @@ -86,7 +86,16 @@ nfp_bpf_check_exit(struct nfp_prog *nfp_prog, return -EINVAL; } - if (reg0->imm != 0 && (reg0->imm & ~0U) != ~0U) { + if (nfp_prog->act != NN_ACT_DIRECT && + reg0->imm != 0 && (reg0->imm & ~0U) != ~0U) { + pr_info("unsupported exit state: %d, imm: %llx\n", + reg0->type, reg0->imm); + return -EINVAL; + } + + if (nfp_prog->act == NN_ACT_DIRECT && reg0->imm <= TC_ACT_REDIRECT && + reg0->imm != TC_ACT_SHOT && reg0->imm != TC_ACT_STOLEN && + reg0->imm != TC_ACT_QUEUED) { pr_info("unsupported exit state: %d, imm: %llx\n", reg0->type, reg0->imm); return -EINVAL; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c index 1ec8e5b74651..43f42f842eda 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c @@ -112,8 +112,12 @@ nfp_net_bpf_get_act(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf) LIST_HEAD(actions); /* TC direct action */ - if (cls_bpf->exts_integrated) + if (cls_bpf->exts_integrated) { + if (tc_no_actions(cls_bpf->exts)) + return NN_ACT_DIRECT; + return -ENOTSUPP; + } /* TC legacy mode */ if (!tc_single_action(cls_bpf->exts)) From 5a924b8951f835b5ff8a3d9f434f3b230fc9905f Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Sep 2016 00:29:31 +0100 Subject: [PATCH 1330/1715] rxrpc: Don't store the rxrpc header in the Tx queue sk_buffs Don't store the rxrpc protocol header in sk_buffs on the transmit queue, but rather generate it on the fly and pass it to kernel_sendmsg() as a separate iov. This reduces the amount of storage required. Note that the security header is still stored in the sk_buff as it may get encrypted along with the data (and doesn't change with each transmission). Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 7 ++-- net/rxrpc/call_event.c | 11 ++---- net/rxrpc/conn_object.c | 1 - net/rxrpc/output.c | 79 ++++++++++++++++++++++++++++------------- net/rxrpc/rxkad.c | 8 ++--- net/rxrpc/sendmsg.c | 51 +++++--------------------- 6 files changed, 70 insertions(+), 87 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 034f525f2235..f021df4a6a22 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -385,10 +385,9 @@ struct rxrpc_connection { int debug_id; /* debug ID for printks */ atomic_t serial; /* packet serial number counter */ unsigned int hi_serial; /* highest serial number received */ - u8 size_align; /* data size alignment (for security) */ - u8 header_size; /* rxrpc + security header size */ - u8 security_size; /* security header size */ u32 security_nonce; /* response re-use preventer */ + u8 size_align; /* data size alignment (for security) */ + u8 security_size; /* security header size */ u8 security_ix; /* security type */ u8 out_clientflag; /* RXRPC_CLIENT_INITIATED if we are client */ }; @@ -946,7 +945,7 @@ extern const s8 rxrpc_ack_priority[]; * output.c */ int rxrpc_send_call_packet(struct rxrpc_call *, u8); -int rxrpc_send_data_packet(struct rxrpc_connection *, struct sk_buff *); +int rxrpc_send_data_packet(struct rxrpc_call *, struct sk_buff *); void rxrpc_reject_packets(struct rxrpc_local *); /* diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 7d1b99824ed9..6247ce25eb21 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -139,7 +139,6 @@ void rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, */ static void rxrpc_resend(struct rxrpc_call *call) { - struct rxrpc_wire_header *whdr; struct rxrpc_skb_priv *sp; struct sk_buff *skb; rxrpc_seq_t cursor, seq, top; @@ -201,15 +200,8 @@ static void rxrpc_resend(struct rxrpc_call *call) skb = call->rxtx_buffer[ix]; rxrpc_get_skb(skb, rxrpc_skb_tx_got); spin_unlock_bh(&call->lock); - sp = rxrpc_skb(skb); - /* Each Tx packet needs a new serial number */ - sp->hdr.serial = atomic_inc_return(&call->conn->serial); - - whdr = (struct rxrpc_wire_header *)skb->head; - whdr->serial = htonl(sp->hdr.serial); - - if (rxrpc_send_data_packet(call->conn, skb) < 0) { + if (rxrpc_send_data_packet(call, skb) < 0) { call->resend_at = now + 2; rxrpc_free_skb(skb, rxrpc_skb_tx_freed); return; @@ -217,6 +209,7 @@ static void rxrpc_resend(struct rxrpc_call *call) if (rxrpc_is_client_call(call)) rxrpc_expose_client_call(call); + sp = rxrpc_skb(skb); sp->resend_at = now + rxrpc_resend_timeout; rxrpc_free_skb(skb, rxrpc_skb_tx_freed); diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 3b55aee0c436..e1e83af47866 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -53,7 +53,6 @@ struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp) spin_lock_init(&conn->state_lock); conn->debug_id = atomic_inc_return(&rxrpc_debug_id); conn->size_align = 4; - conn->header_size = sizeof(struct rxrpc_wire_header); conn->idle_timestamp = jiffies; } diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 16e18a94ffa6..817fb0e82d6a 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -208,19 +208,42 @@ out: /* * send a packet through the transport endpoint */ -int rxrpc_send_data_packet(struct rxrpc_connection *conn, struct sk_buff *skb) +int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb) { - struct kvec iov[1]; + struct rxrpc_connection *conn = call->conn; + struct rxrpc_wire_header whdr; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct msghdr msg; + struct kvec iov[2]; + rxrpc_serial_t serial; + size_t len; int ret, opt; _enter(",{%d}", skb->len); - iov[0].iov_base = skb->head; - iov[0].iov_len = skb->len; + /* Each transmission of a Tx packet needs a new serial number */ + serial = atomic_inc_return(&conn->serial); - msg.msg_name = &conn->params.peer->srx.transport; - msg.msg_namelen = conn->params.peer->srx.transport_len; + whdr.epoch = htonl(conn->proto.epoch); + whdr.cid = htonl(call->cid); + whdr.callNumber = htonl(call->call_id); + whdr.seq = htonl(sp->hdr.seq); + whdr.serial = htonl(serial); + whdr.type = RXRPC_PACKET_TYPE_DATA; + whdr.flags = sp->hdr.flags; + whdr.userStatus = 0; + whdr.securityIndex = call->security_ix; + whdr._rsvd = htons(sp->hdr._rsvd); + whdr.serviceId = htons(call->service_id); + + iov[0].iov_base = &whdr; + iov[0].iov_len = sizeof(whdr); + iov[1].iov_base = skb->head; + iov[1].iov_len = skb->len; + len = iov[0].iov_len + iov[1].iov_len; + + msg.msg_name = &call->peer->srx.transport; + msg.msg_namelen = call->peer->srx.transport_len; msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = 0; @@ -234,26 +257,33 @@ int rxrpc_send_data_packet(struct rxrpc_connection *conn, struct sk_buff *skb) } } + _proto("Tx DATA %%%u { #%u }", serial, sp->hdr.seq); + /* send the packet with the don't fragment bit set if we currently * think it's small enough */ - if (skb->len - sizeof(struct rxrpc_wire_header) < conn->params.peer->maxdata) { - down_read(&conn->params.local->defrag_sem); - /* send the packet by UDP - * - returns -EMSGSIZE if UDP would have to fragment the packet - * to go out of the interface - * - in which case, we'll have processed the ICMP error - * message and update the peer record - */ - ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 1, - iov[0].iov_len); + if (iov[1].iov_len >= call->peer->maxdata) + goto send_fragmentable; - up_read(&conn->params.local->defrag_sem); - if (ret == -EMSGSIZE) - goto send_fragmentable; + down_read(&conn->params.local->defrag_sem); + /* send the packet by UDP + * - returns -EMSGSIZE if UDP would have to fragment the packet + * to go out of the interface + * - in which case, we'll have processed the ICMP error + * message and update the peer record + */ + ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); - _leave(" = %d [%u]", ret, conn->params.peer->maxdata); - return ret; + up_read(&conn->params.local->defrag_sem); + if (ret == -EMSGSIZE) + goto send_fragmentable; + +done: + if (ret == 0) { + sp->resend_at = jiffies + rxrpc_resend_timeout; + sp->hdr.serial = serial; } + _leave(" = %d [%u]", ret, call->peer->maxdata); + return ret; send_fragmentable: /* attempt to send this message with fragmentation enabled */ @@ -268,8 +298,8 @@ send_fragmentable: SOL_IP, IP_MTU_DISCOVER, (char *)&opt, sizeof(opt)); if (ret == 0) { - ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 1, - iov[0].iov_len); + ret = kernel_sendmsg(conn->params.local->socket, &msg, + iov, 2, len); opt = IP_PMTUDISC_DO; kernel_setsockopt(conn->params.local->socket, SOL_IP, @@ -298,8 +328,7 @@ send_fragmentable: } up_write(&conn->params.local->defrag_sem); - _leave(" = %d [frag %u]", ret, conn->params.peer->maxdata); - return ret; + goto done; } /* diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index ae392558829d..88d080a1a3de 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -80,12 +80,10 @@ static int rxkad_init_connection_security(struct rxrpc_connection *conn) case RXRPC_SECURITY_AUTH: conn->size_align = 8; conn->security_size = sizeof(struct rxkad_level1_hdr); - conn->header_size += sizeof(struct rxkad_level1_hdr); break; case RXRPC_SECURITY_ENCRYPT: conn->size_align = 8; conn->security_size = sizeof(struct rxkad_level2_hdr); - conn->header_size += sizeof(struct rxkad_level2_hdr); break; default: ret = -EKEYREJECTED; @@ -161,7 +159,7 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call, _enter(""); - check = sp->hdr.seq ^ sp->hdr.callNumber; + check = sp->hdr.seq ^ call->call_id; data_size |= (u32)check << 16; hdr.data_size = htonl(data_size); @@ -205,7 +203,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call, _enter(""); - check = sp->hdr.seq ^ sp->hdr.callNumber; + check = sp->hdr.seq ^ call->call_id; rxkhdr.data_size = htonl(data_size | (u32)check << 16); rxkhdr.checksum = 0; @@ -277,7 +275,7 @@ static int rxkad_secure_packet(struct rxrpc_call *call, /* calculate the security checksum */ x = (call->cid & RXRPC_CHANNELMASK) << (32 - RXRPC_CIDSHIFT); x |= sp->hdr.seq & 0x3fffffff; - call->crypto_buf[0] = htonl(sp->hdr.callNumber); + call->crypto_buf[0] = htonl(call->call_id); call->crypto_buf[1] = htonl(x); sg_init_one(&sg, call->crypto_buf, 8); diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 6a39ee97a0b7..814b17f23971 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -134,13 +134,11 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, write_unlock_bh(&call->state_lock); } - _proto("Tx DATA %%%u { #%u }", sp->hdr.serial, sp->hdr.seq); - if (seq == 1 && rxrpc_is_client_call(call)) rxrpc_expose_client_call(call); sp->resend_at = jiffies + rxrpc_resend_timeout; - ret = rxrpc_send_data_packet(call->conn, skb); + ret = rxrpc_send_data_packet(call, skb); if (ret < 0) { _debug("need instant resend %d", ret); rxrpc_instant_resend(call, ix); @@ -150,29 +148,6 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, _leave(""); } -/* - * Convert a host-endian header into a network-endian header. - */ -static void rxrpc_insert_header(struct sk_buff *skb) -{ - struct rxrpc_wire_header whdr; - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - - whdr.epoch = htonl(sp->hdr.epoch); - whdr.cid = htonl(sp->hdr.cid); - whdr.callNumber = htonl(sp->hdr.callNumber); - whdr.seq = htonl(sp->hdr.seq); - whdr.serial = htonl(sp->hdr.serial); - whdr.type = sp->hdr.type; - whdr.flags = sp->hdr.flags; - whdr.userStatus = sp->hdr.userStatus; - whdr.securityIndex = sp->hdr.securityIndex; - whdr._rsvd = htons(sp->hdr._rsvd); - whdr.serviceId = htons(sp->hdr.serviceId); - - memcpy(skb->head, &whdr, sizeof(whdr)); -} - /* * send data through a socket * - must be called in process context @@ -232,7 +207,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, space = chunk + call->conn->size_align; space &= ~(call->conn->size_align - 1UL); - size = space + call->conn->header_size; + size = space + call->conn->security_size; _debug("SIZE: %zu/%zu/%zu", chunk, space, size); @@ -248,9 +223,9 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, ASSERTCMP(skb->mark, ==, 0); - _debug("HS: %u", call->conn->header_size); - skb_reserve(skb, call->conn->header_size); - skb->len += call->conn->header_size; + _debug("HS: %u", call->conn->security_size); + skb_reserve(skb, call->conn->security_size); + skb->len += call->conn->security_size; sp = rxrpc_skb(skb); sp->remain = chunk; @@ -312,33 +287,23 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, seq = call->tx_top + 1; - sp->hdr.epoch = conn->proto.epoch; - sp->hdr.cid = call->cid; - sp->hdr.callNumber = call->call_id; sp->hdr.seq = seq; - sp->hdr.serial = atomic_inc_return(&conn->serial); - sp->hdr.type = RXRPC_PACKET_TYPE_DATA; - sp->hdr.userStatus = 0; - sp->hdr.securityIndex = call->security_ix; sp->hdr._rsvd = 0; - sp->hdr.serviceId = call->service_id; + sp->hdr.flags = conn->out_clientflag; - sp->hdr.flags = conn->out_clientflag; if (msg_data_left(msg) == 0 && !more) sp->hdr.flags |= RXRPC_LAST_PACKET; else if (call->tx_top - call->tx_hard_ack < call->tx_winsize) sp->hdr.flags |= RXRPC_MORE_PACKETS; - if (more && seq & 1) + if (seq & 1) sp->hdr.flags |= RXRPC_REQUEST_ACK; ret = conn->security->secure_packet( - call, skb, skb->mark, - skb->head + sizeof(struct rxrpc_wire_header)); + call, skb, skb->mark, skb->head); if (ret < 0) goto out; - rxrpc_insert_header(skb); rxrpc_queue_packet(call, skb, !msg_data_left(msg) && !more); skb = NULL; } From f07373ead455a396e15a431bc08d8ce1dac6f1cf Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Sep 2016 00:29:32 +0100 Subject: [PATCH 1331/1715] rxrpc: Add re-sent Tx annotation Add a Tx-phase annotation for packet buffers to indicate that a buffer has already been retransmitted. This will be used by future congestion management. Re-retransmissions of a packet don't affect the congestion window managment in the same way as initial retransmissions. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 2 ++ net/rxrpc/call_event.c | 28 +++++++++++++++++++--------- net/rxrpc/input.c | 14 +++++++++++--- 3 files changed, 32 insertions(+), 12 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index f021df4a6a22..dcf54e3fb478 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -505,6 +505,8 @@ struct rxrpc_call { #define RXRPC_TX_ANNO_UNACK 1 #define RXRPC_TX_ANNO_NAK 2 #define RXRPC_TX_ANNO_RETRANS 3 +#define RXRPC_TX_ANNO_MASK 0x03 +#define RXRPC_TX_ANNO_RESENT 0x04 #define RXRPC_RX_ANNO_JUMBO 0x3f /* Jumbo subpacket number + 1 if not zero */ #define RXRPC_RX_ANNO_JLAST 0x40 /* Set if last element of a jumbo packet */ #define RXRPC_RX_ANNO_VERIFIED 0x80 /* Set if verified and decrypted */ diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 6247ce25eb21..34ad967f2d81 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -144,7 +144,7 @@ static void rxrpc_resend(struct rxrpc_call *call) rxrpc_seq_t cursor, seq, top; unsigned long resend_at, now; int ix; - u8 annotation; + u8 annotation, anno_type; _enter("{%d,%d}", call->tx_hard_ack, call->tx_top); @@ -165,14 +165,16 @@ static void rxrpc_resend(struct rxrpc_call *call) for (seq = cursor + 1; before_eq(seq, top); seq++) { ix = seq & RXRPC_RXTX_BUFF_MASK; annotation = call->rxtx_annotations[ix]; - if (annotation == RXRPC_TX_ANNO_ACK) + anno_type = annotation & RXRPC_TX_ANNO_MASK; + annotation &= ~RXRPC_TX_ANNO_MASK; + if (anno_type == RXRPC_TX_ANNO_ACK) continue; skb = call->rxtx_buffer[ix]; rxrpc_see_skb(skb, rxrpc_skb_tx_seen); sp = rxrpc_skb(skb); - if (annotation == RXRPC_TX_ANNO_UNACK) { + if (anno_type == RXRPC_TX_ANNO_UNACK) { if (time_after(sp->resend_at, now)) { if (time_before(sp->resend_at, resend_at)) resend_at = sp->resend_at; @@ -181,7 +183,7 @@ static void rxrpc_resend(struct rxrpc_call *call) } /* Okay, we need to retransmit a packet. */ - call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS; + call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS | annotation; } call->resend_at = resend_at; @@ -194,7 +196,8 @@ static void rxrpc_resend(struct rxrpc_call *call) for (seq = cursor + 1; before_eq(seq, top); seq++) { ix = seq & RXRPC_RXTX_BUFF_MASK; annotation = call->rxtx_annotations[ix]; - if (annotation != RXRPC_TX_ANNO_RETRANS) + anno_type = annotation & RXRPC_TX_ANNO_MASK; + if (anno_type != RXRPC_TX_ANNO_RETRANS) continue; skb = call->rxtx_buffer[ix]; @@ -220,10 +223,17 @@ static void rxrpc_resend(struct rxrpc_call *call) * received and the packet might have been hard-ACK'd (in which * case it will no longer be in the buffer). */ - if (after(seq, call->tx_hard_ack) && - (call->rxtx_annotations[ix] == RXRPC_TX_ANNO_RETRANS || - call->rxtx_annotations[ix] == RXRPC_TX_ANNO_NAK)) - call->rxtx_annotations[ix] = RXRPC_TX_ANNO_UNACK; + if (after(seq, call->tx_hard_ack)) { + annotation = call->rxtx_annotations[ix]; + anno_type = annotation & RXRPC_TX_ANNO_MASK; + if (anno_type == RXRPC_TX_ANNO_RETRANS || + anno_type == RXRPC_TX_ANNO_NAK) { + annotation &= ~RXRPC_TX_ANNO_MASK; + annotation |= RXRPC_TX_ANNO_UNACK; + } + annotation |= RXRPC_TX_ANNO_RESENT; + call->rxtx_annotations[ix] = annotation; + } if (after(call->tx_hard_ack, seq)) seq = call->tx_hard_ack; diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 7ac1edf3aac7..aa261df9fc9e 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -388,17 +388,25 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks, { bool resend = false; int ix; + u8 annotation, anno_type; for (; nr_acks > 0; nr_acks--, seq++) { ix = seq & RXRPC_RXTX_BUFF_MASK; + annotation = call->rxtx_annotations[ix]; + anno_type = annotation & RXRPC_TX_ANNO_MASK; + annotation &= ~RXRPC_TX_ANNO_MASK; switch (*acks++) { case RXRPC_ACK_TYPE_ACK: - call->rxtx_annotations[ix] = RXRPC_TX_ANNO_ACK; + if (anno_type == RXRPC_TX_ANNO_ACK) + continue; + call->rxtx_annotations[ix] = + RXRPC_TX_ANNO_ACK | annotation; break; case RXRPC_ACK_TYPE_NACK: - if (call->rxtx_annotations[ix] == RXRPC_TX_ANNO_NAK) + if (anno_type == RXRPC_TX_ANNO_NAK) continue; - call->rxtx_annotations[ix] = RXRPC_TX_ANNO_NAK; + call->rxtx_annotations[ix] = + RXRPC_TX_ANNO_NAK | annotation; resend = true; break; default: From cf1a6474f80735ff4a5d99f3dd68a94dbec8455f Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Sep 2016 00:41:53 +0100 Subject: [PATCH 1332/1715] rxrpc: Add per-peer RTT tracker Add a function to track the average RTT for a peer. Sources of RTT data will be added in subsequent patches. The RTT data will be useful in the future for determining resend timeouts and for handling the slow-start part of the Rx protocol. Also add a pair of tracepoints, one to log transmissions to elicit a response for RTT purposes and one to log responses that contribute RTT data. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 61 ++++++++++++++++++++++++++++++++++++ net/rxrpc/ar-internal.h | 25 ++++++++++++--- net/rxrpc/misc.c | 8 +++++ net/rxrpc/peer_event.c | 41 ++++++++++++++++++++++++ 4 files changed, 131 insertions(+), 4 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 75a5d8bf50e1..e8f2afbbe0bf 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -353,6 +353,67 @@ TRACE_EVENT(rxrpc_recvmsg, __entry->ret) ); +TRACE_EVENT(rxrpc_rtt_tx, + TP_PROTO(struct rxrpc_call *call, enum rxrpc_rtt_tx_trace why, + rxrpc_serial_t send_serial), + + TP_ARGS(call, why, send_serial), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(enum rxrpc_rtt_tx_trace, why ) + __field(rxrpc_serial_t, send_serial ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->why = why; + __entry->send_serial = send_serial; + ), + + TP_printk("c=%p %s sr=%08x", + __entry->call, + rxrpc_rtt_tx_traces[__entry->why], + __entry->send_serial) + ); + +TRACE_EVENT(rxrpc_rtt_rx, + TP_PROTO(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, + rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial, + s64 rtt, u8 nr, s64 avg), + + TP_ARGS(call, why, send_serial, resp_serial, rtt, nr, avg), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(enum rxrpc_rtt_rx_trace, why ) + __field(u8, nr ) + __field(rxrpc_serial_t, send_serial ) + __field(rxrpc_serial_t, resp_serial ) + __field(s64, rtt ) + __field(u64, avg ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->why = why; + __entry->send_serial = send_serial; + __entry->resp_serial = resp_serial; + __entry->rtt = rtt; + __entry->nr = nr; + __entry->avg = avg; + ), + + TP_printk("c=%p %s sr=%08x rr=%08x rtt=%lld nr=%u avg=%lld", + __entry->call, + rxrpc_rtt_rx_traces[__entry->why], + __entry->send_serial, + __entry->resp_serial, + __entry->rtt, + __entry->nr, + __entry->avg) + ); + #endif /* _TRACE_RXRPC_H */ /* This part must be outside protection */ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index dcf54e3fb478..79c671e552c3 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -258,10 +258,11 @@ struct rxrpc_peer { /* calculated RTT cache */ #define RXRPC_RTT_CACHE_SIZE 32 - suseconds_t rtt; /* current RTT estimate (in uS) */ - unsigned int rtt_point; /* next entry at which to insert */ - unsigned int rtt_usage; /* amount of cache actually used */ - suseconds_t rtt_cache[RXRPC_RTT_CACHE_SIZE]; /* calculated RTT cache */ + u64 rtt; /* Current RTT estimate (in nS) */ + u64 rtt_sum; /* Sum of cache contents */ + u64 rtt_cache[RXRPC_RTT_CACHE_SIZE]; /* Determined RTT cache */ + u8 rtt_cursor; /* next entry at which to insert */ + u8 rtt_usage; /* amount of cache actually used */ }; /* @@ -657,6 +658,20 @@ enum rxrpc_recvmsg_trace { extern const char rxrpc_recvmsg_traces[rxrpc_recvmsg__nr_trace][5]; +enum rxrpc_rtt_tx_trace { + rxrpc_rtt_tx_ping, + rxrpc_rtt_tx__nr_trace +}; + +extern const char rxrpc_rtt_tx_traces[rxrpc_rtt_tx__nr_trace][5]; + +enum rxrpc_rtt_rx_trace { + rxrpc_rtt_rx_ping_response, + rxrpc_rtt_rx__nr_trace +}; + +extern const char rxrpc_rtt_rx_traces[rxrpc_rtt_rx__nr_trace][5]; + extern const char *const rxrpc_pkts[]; extern const char *rxrpc_acks(u8 reason); @@ -955,6 +970,8 @@ void rxrpc_reject_packets(struct rxrpc_local *); */ void rxrpc_error_report(struct sock *); void rxrpc_peer_error_distributor(struct work_struct *); +void rxrpc_peer_add_rtt(struct rxrpc_call *, enum rxrpc_rtt_rx_trace, + rxrpc_serial_t, rxrpc_serial_t, ktime_t, ktime_t); /* * peer_object.c diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 026e1f2e83ff..6321c23f9a6e 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -182,3 +182,11 @@ const char rxrpc_recvmsg_traces[rxrpc_recvmsg__nr_trace][5] = { [rxrpc_recvmsg_to_be_accepted] = "TBAC", [rxrpc_recvmsg_return] = "RETN", }; + +const char rxrpc_rtt_tx_traces[rxrpc_rtt_tx__nr_trace][5] = { + [rxrpc_rtt_tx_ping] = "PING", +}; + +const char rxrpc_rtt_rx_traces[rxrpc_rtt_rx__nr_trace][5] = { + [rxrpc_rtt_rx_ping_response] = "PONG", +}; diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index 18276e7cb9e0..bf13b8470c9a 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -305,3 +305,44 @@ void rxrpc_peer_error_distributor(struct work_struct *work) rxrpc_put_peer(peer); _leave(""); } + +/* + * Add RTT information to cache. This is called in softirq mode and has + * exclusive access to the peer RTT data. + */ +void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, + rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial, + ktime_t send_time, ktime_t resp_time) +{ + struct rxrpc_peer *peer = call->peer; + s64 rtt; + u64 sum = peer->rtt_sum, avg; + u8 cursor = peer->rtt_cursor, usage = peer->rtt_usage; + + rtt = ktime_to_ns(ktime_sub(resp_time, send_time)); + if (rtt < 0) + return; + + /* Replace the oldest datum in the RTT buffer */ + sum -= peer->rtt_cache[cursor]; + sum += rtt; + peer->rtt_cache[cursor] = rtt; + peer->rtt_cursor = (cursor + 1) & (RXRPC_RTT_CACHE_SIZE - 1); + peer->rtt_sum = sum; + if (usage < RXRPC_RTT_CACHE_SIZE) { + usage++; + peer->rtt_usage = usage; + } + + /* Now recalculate the average */ + if (usage == RXRPC_RTT_CACHE_SIZE) { + avg = sum / RXRPC_RTT_CACHE_SIZE; + } else { + avg = sum; + do_div(avg, usage); + } + + peer->rtt = avg; + trace_rxrpc_rtt_rx(call, why, send_serial, resp_serial, rtt, + usage, avg); +} From de3d6fa81e684af5817dc379ffc394235a9666cc Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Tue, 20 Sep 2016 14:39:39 +0300 Subject: [PATCH 1333/1715] net/mlx4_en: Add branch prediction hints in RX data-path Add likely/unlikely hints to improve branch predictions in the RX data-path. Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 26 ++++++++++++---------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index c46355bce613..6e474af3fb1f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -72,7 +72,7 @@ static int mlx4_alloc_pages(struct mlx4_en_priv *priv, } dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE << order, frag_info->dma_dir); - if (dma_mapping_error(priv->ddev, dma)) { + if (unlikely(dma_mapping_error(priv->ddev, dma))) { put_page(page); return -ENOMEM; } @@ -108,7 +108,8 @@ static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv, ring_alloc[i].page_size) continue; - if (mlx4_alloc_pages(priv, &page_alloc[i], frag_info, gfp)) + if (unlikely(mlx4_alloc_pages(priv, &page_alloc[i], + frag_info, gfp))) goto out; } @@ -585,7 +586,7 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, frag_info = &priv->frag_info[nr]; if (length <= frag_info->frag_prefix_size) break; - if (!frags[nr].page) + if (unlikely(!frags[nr].page)) goto fail; dma = be64_to_cpu(rx_desc->data[nr].addr); @@ -625,7 +626,7 @@ static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv, dma_addr_t dma; skb = netdev_alloc_skb(priv->dev, SMALL_PACKET_SIZE + NET_IP_ALIGN); - if (!skb) { + if (unlikely(!skb)) { en_dbg(RX_ERR, priv, "Failed allocating skb\n"); return NULL; } @@ -736,7 +737,8 @@ static int get_fixed_ipv6_csum(__wsum hw_checksum, struct sk_buff *skb, { __wsum csum_pseudo_hdr = 0; - if (ipv6h->nexthdr == IPPROTO_FRAGMENT || ipv6h->nexthdr == IPPROTO_HOPOPTS) + if (unlikely(ipv6h->nexthdr == IPPROTO_FRAGMENT || + ipv6h->nexthdr == IPPROTO_HOPOPTS)) return -1; hw_checksum = csum_add(hw_checksum, (__force __wsum)htons(ipv6h->nexthdr)); @@ -769,7 +771,7 @@ static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va, get_fixed_ipv4_csum(hw_checksum, skb, hdr); #if IS_ENABLED(CONFIG_IPV6) else if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV6)) - if (get_fixed_ipv6_csum(hw_checksum, skb, hdr)) + if (unlikely(get_fixed_ipv6_csum(hw_checksum, skb, hdr))) return -1; #endif return 0; @@ -796,10 +798,10 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud u64 timestamp; bool l2_tunnel; - if (!priv->port_up) + if (unlikely(!priv->port_up)) return 0; - if (budget <= 0) + if (unlikely(budget <= 0)) return polled; /* Protect accesses to: ring->xdp_prog, priv->mac_hash list */ @@ -902,9 +904,9 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud case XDP_PASS: break; case XDP_TX: - if (!mlx4_en_xmit_frame(frags, dev, + if (likely(!mlx4_en_xmit_frame(frags, dev, length, tx_index, - &doorbell_pending)) + &doorbell_pending))) goto consumed; goto xdp_drop; /* Drop on xmit failure */ default: @@ -912,7 +914,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud case XDP_ABORTED: case XDP_DROP: xdp_drop: - if (mlx4_en_rx_recycle(ring, frags)) + if (likely(mlx4_en_rx_recycle(ring, frags))) goto consumed; goto next; } @@ -1016,7 +1018,7 @@ xdp_drop: /* GRO not possible, complete processing here */ skb = mlx4_en_rx_skb(priv, rx_desc, frags, length); - if (!skb) { + if (unlikely(!skb)) { ring->dropped++; goto next; } From 57c970c2e8d8772237294bb8a6a25a205448fd96 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Tue, 20 Sep 2016 14:39:40 +0300 Subject: [PATCH 1334/1715] net/mlx4_en: Fix wrong indentation Use tabs instead of spaces before if statement, no functional change. Fixes: e7c1c2c46201 ("mlx4_en: Added self diagnostics test implementation") Signed-off-by: Kamal Heib Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 6e474af3fb1f..f2e8beddcf44 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -1023,7 +1023,7 @@ xdp_drop: goto next; } - if (unlikely(priv->validate_loopback)) { + if (unlikely(priv->validate_loopback)) { validate_loopback(priv, skb); goto next; } From 30353bfc43a1602c020f31d95cf27182ffd23824 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 20 Sep 2016 14:39:41 +0300 Subject: [PATCH 1335/1715] net/mlx4_core: Use RCU to perform radix tree lookup for SRQ Radix tree lookup can be performed without locking. Fixes: 225c7b1feef1 ("IB/mlx4: Add a driver Mellanox ConnectX InfiniBand adapters") Signed-off-by: Leon Romanovsky Suggested-by: Sagi Grimberg Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/srq.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/srq.c b/drivers/net/ethernet/mellanox/mlx4/srq.c index 67146624eb58..f44d089e2ca6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/srq.c +++ b/drivers/net/ethernet/mellanox/mlx4/srq.c @@ -45,15 +45,12 @@ void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type) struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table; struct mlx4_srq *srq; - spin_lock(&srq_table->lock); - + rcu_read_lock(); srq = radix_tree_lookup(&srq_table->tree, srqn & (dev->caps.num_srqs - 1)); + rcu_read_unlock(); if (srq) atomic_inc(&srq->refcount); - - spin_unlock(&srq_table->lock); - - if (!srq) { + else { mlx4_warn(dev, "Async event for bogus SRQ %08x\n", srqn); return; } @@ -301,12 +298,11 @@ struct mlx4_srq *mlx4_srq_lookup(struct mlx4_dev *dev, u32 srqn) { struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table; struct mlx4_srq *srq; - unsigned long flags; - spin_lock_irqsave(&srq_table->lock, flags); + rcu_read_lock(); srq = radix_tree_lookup(&srq_table->tree, srqn & (dev->caps.num_srqs - 1)); - spin_unlock_irqrestore(&srq_table->lock, flags); + rcu_read_unlock(); return srq; } From a7e1f04905e5b2b90251974dddde781301b6be37 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 20 Sep 2016 14:39:42 +0300 Subject: [PATCH 1336/1715] net/mlx4_core: Fix deadlock when switching between polling and event fw commands When switching from polling-based fw commands to event-based fw commands, there is a race condition which could cause a fw command in another task to hang: that task will keep waiting for the polling sempahore, but may never be able to acquire it. This is due to mlx4_cmd_use_events, which "down"s the sempahore back to 0. During driver initialization, this is not a problem, since no other tasks which invoke FW commands are active. However, there is a problem if the driver switches to polling mode and then back to event mode during normal operation. The "test_interrupts" feature does exactly that. Running "ethtool -t offline" causes the PF driver to temporarily switch to polling mode, and then back to event mode. (Note that for VF drivers, such switching is not performed). Fix this by adding a read-write semaphore for protection when switching between modes. Fixes: 225c7b1feef1 ("IB/mlx4: Add a driver Mellanox ConnectX InfiniBand adapters") Signed-off-by: Jack Morgenstein Signed-off-by: Matan Barak Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 23 +++++++++++++++++------ drivers/net/ethernet/mellanox/mlx4/mlx4.h | 2 ++ 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index f04a423ff79d..a58d96cf1ed1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -785,17 +785,23 @@ int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param, return mlx4_cmd_reset_flow(dev, op, op_modifier, -EIO); if (!mlx4_is_mfunc(dev) || (native && mlx4_is_master(dev))) { + int ret; + if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) return mlx4_internal_err_ret_value(dev, op, op_modifier); + down_read(&mlx4_priv(dev)->cmd.switch_sem); if (mlx4_priv(dev)->cmd.use_events) - return mlx4_cmd_wait(dev, in_param, out_param, - out_is_imm, in_modifier, - op_modifier, op, timeout); + ret = mlx4_cmd_wait(dev, in_param, out_param, + out_is_imm, in_modifier, + op_modifier, op, timeout); else - return mlx4_cmd_poll(dev, in_param, out_param, - out_is_imm, in_modifier, - op_modifier, op, timeout); + ret = mlx4_cmd_poll(dev, in_param, out_param, + out_is_imm, in_modifier, + op_modifier, op, timeout); + + up_read(&mlx4_priv(dev)->cmd.switch_sem); + return ret; } return mlx4_slave_cmd(dev, in_param, out_param, out_is_imm, in_modifier, op_modifier, op, timeout); @@ -2454,6 +2460,7 @@ int mlx4_cmd_init(struct mlx4_dev *dev) int flags = 0; if (!priv->cmd.initialized) { + init_rwsem(&priv->cmd.switch_sem); mutex_init(&priv->cmd.slave_cmd_mutex); sema_init(&priv->cmd.poll_sem, 1); priv->cmd.use_events = 0; @@ -2583,6 +2590,7 @@ int mlx4_cmd_use_events(struct mlx4_dev *dev) if (!priv->cmd.context) return -ENOMEM; + down_write(&priv->cmd.switch_sem); for (i = 0; i < priv->cmd.max_cmds; ++i) { priv->cmd.context[i].token = i; priv->cmd.context[i].next = i + 1; @@ -2606,6 +2614,7 @@ int mlx4_cmd_use_events(struct mlx4_dev *dev) down(&priv->cmd.poll_sem); priv->cmd.use_events = 1; + up_write(&priv->cmd.switch_sem); return err; } @@ -2618,6 +2627,7 @@ void mlx4_cmd_use_polling(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); int i; + down_write(&priv->cmd.switch_sem); priv->cmd.use_events = 0; for (i = 0; i < priv->cmd.max_cmds; ++i) @@ -2626,6 +2636,7 @@ void mlx4_cmd_use_polling(struct mlx4_dev *dev) kfree(priv->cmd.context); up(&priv->cmd.poll_sem); + up_write(&priv->cmd.switch_sem); } struct mlx4_cmd_mailbox *mlx4_alloc_cmd_mailbox(struct mlx4_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index c9d7fc5159f2..c128ba3ef014 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -627,6 +628,7 @@ struct mlx4_cmd { struct mutex slave_cmd_mutex; struct semaphore poll_sem; struct semaphore event_sem; + struct rw_semaphore switch_sem; int max_cmds; spinlock_t context_lock; int free_head; From bfca4c520f7ea78138ddccea2de18dc062b0fefd Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Mon, 19 Sep 2016 19:11:09 +0300 Subject: [PATCH 1337/1715] net: skbuff: Export __skb_vlan_pop This exports the functionality of extracting the tag from the payload, without moving next vlan tag into hw accel tag. Signed-off-by: Shmulik Ladkani Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 + net/core/skbuff.c | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c6dab3f7457c..9bf60b556bd2 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3085,6 +3085,7 @@ bool skb_gso_validate_mtu(const struct sk_buff *skb, unsigned int mtu); struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features); struct sk_buff *skb_vlan_untag(struct sk_buff *skb); int skb_ensure_writable(struct sk_buff *skb, int write_len); +int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci); int skb_vlan_pop(struct sk_buff *skb); int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci); struct sk_buff *pskb_extract(struct sk_buff *skb, int off, int to_copy, diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 7bf82a28e10a..6c22351bd519 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4522,8 +4522,10 @@ int skb_ensure_writable(struct sk_buff *skb, int write_len) } EXPORT_SYMBOL(skb_ensure_writable); -/* remove VLAN header from packet and update csum accordingly. */ -static int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci) +/* remove VLAN header from packet and update csum accordingly. + * expects a non skb_vlan_tag_present skb with a vlan tag payload + */ +int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci) { struct vlan_hdr *vhdr; unsigned int offset = skb->data - skb_mac_header(skb); @@ -4554,6 +4556,7 @@ pull: return err; } +EXPORT_SYMBOL(__skb_vlan_pop); int skb_vlan_pop(struct sk_buff *skb) { From 45a497f2d149a4a8061c61518a79d59f1f3034b2 Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Mon, 19 Sep 2016 19:11:10 +0300 Subject: [PATCH 1338/1715] net/sched: act_vlan: Introduce TCA_VLAN_ACT_MODIFY vlan action TCA_VLAN_ACT_MODIFY allows one to change an existing tag. It accepts same attributes as TCA_VLAN_ACT_PUSH (protocol, id, priority). If packet is vlan tagged, then the tag gets overwritten according to user specified attributes. For example, this allows user to replace a tag's vid while preserving its priority bits (as opposed to "action vlan pop pipe action vlan push"). Signed-off-by: Shmulik Ladkani Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/uapi/linux/tc_act/tc_vlan.h | 1 + net/sched/act_vlan.c | 29 ++++++++++++++++++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/tc_act/tc_vlan.h b/include/uapi/linux/tc_act/tc_vlan.h index be72b6e3843b..bddb272b843f 100644 --- a/include/uapi/linux/tc_act/tc_vlan.h +++ b/include/uapi/linux/tc_act/tc_vlan.h @@ -16,6 +16,7 @@ #define TCA_VLAN_ACT_POP 1 #define TCA_VLAN_ACT_PUSH 2 +#define TCA_VLAN_ACT_MODIFY 3 struct tc_vlan { tc_gen; diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 59a8d3150ae2..a95c00b119da 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -30,6 +30,7 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a, struct tcf_vlan *v = to_vlan(a); int action; int err; + u16 tci; spin_lock(&v->tcf_lock); tcf_lastuse_update(&v->tcf_tm); @@ -48,6 +49,30 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a, if (err) goto drop; break; + case TCA_VLAN_ACT_MODIFY: + /* No-op if no vlan tag (either hw-accel or in-payload) */ + if (!skb_vlan_tagged(skb)) + goto unlock; + /* extract existing tag (and guarantee no hw-accel tag) */ + if (skb_vlan_tag_present(skb)) { + tci = skb_vlan_tag_get(skb); + skb->vlan_tci = 0; + } else { + /* in-payload vlan tag, pop it */ + err = __skb_vlan_pop(skb, &tci); + if (err) + goto drop; + } + /* replace the vid */ + tci = (tci & ~VLAN_VID_MASK) | v->tcfv_push_vid; + /* replace prio bits, if tcfv_push_prio specified */ + if (v->tcfv_push_prio) { + tci &= ~VLAN_PRIO_MASK; + tci |= v->tcfv_push_prio << VLAN_PRIO_SHIFT; + } + /* put updated tci as hwaccel tag */ + __vlan_hwaccel_put_tag(skb, v->tcfv_push_proto, tci); + break; default: BUG(); } @@ -102,6 +127,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, case TCA_VLAN_ACT_POP: break; case TCA_VLAN_ACT_PUSH: + case TCA_VLAN_ACT_MODIFY: if (!tb[TCA_VLAN_PUSH_VLAN_ID]) { if (exists) tcf_hash_release(*a, bind); @@ -185,7 +211,8 @@ static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a, if (nla_put(skb, TCA_VLAN_PARMS, sizeof(opt), &opt)) goto nla_put_failure; - if (v->tcfv_action == TCA_VLAN_ACT_PUSH && + if ((v->tcfv_action == TCA_VLAN_ACT_PUSH || + v->tcfv_action == TCA_VLAN_ACT_MODIFY) && (nla_put_u16(skb, TCA_VLAN_PUSH_VLAN_ID, v->tcfv_push_vid) || nla_put_be16(skb, TCA_VLAN_PUSH_VLAN_PROTOCOL, v->tcfv_push_proto) || From 636c2628086e40c86dac7ddc84a1c4b4fcccc6e3 Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Tue, 20 Sep 2016 12:48:36 +0300 Subject: [PATCH 1339/1715] net: skbuff: Remove errornous length validation in skb_vlan_pop() In 93515d53b1 "net: move vlan pop/push functions into common code" skb_vlan_pop was moved from its private location in openvswitch to skbuff common code. In case skb has non hw-accel vlan tag, the original 'pop_vlan()' assured that skb->len is sufficient (if skb->len < VLAN_ETH_HLEN then pop was considered a no-op). This validation was moved as is into the new common 'skb_vlan_pop'. Alas, in its original location (openvswitch), there was a guarantee that 'data' points to the mac_header, therefore the 'skb->len < VLAN_ETH_HLEN' condition made sense. However there's no such guarantee in the generic 'skb_vlan_pop'. For short packets received in rx path going through 'skb_vlan_pop', this causes 'skb_vlan_pop' to fail pop-ing a valid vlan hdr (in the non hw-accel case) or to fail moving next tag into hw-accel tag. Remove the 'skb->len < VLAN_ETH_HLEN' condition entirely: It is superfluous since inner '__skb_vlan_pop' already verifies there are VLAN_ETH_HLEN writable bytes at the mac_header. Note this presents a slight change to skb_vlan_pop() users: In case total length is smaller than VLAN_ETH_HLEN, skb_vlan_pop() now returns an error, as opposed to previous "no-op" behavior. Existing callers (e.g. tc act vlan, ovs) usually drop the packet if 'skb_vlan_pop' fails. Fixes: 93515d53b1 ("net: move vlan pop/push functions into common code") Signed-off-by: Shmulik Ladkani Cc: Pravin Shelar Reviewed-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/core/skbuff.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 6c22351bd519..b2a51bf1b0f9 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4567,9 +4567,8 @@ int skb_vlan_pop(struct sk_buff *skb) if (likely(skb_vlan_tag_present(skb))) { skb->vlan_tci = 0; } else { - if (unlikely((skb->protocol != htons(ETH_P_8021Q) && - skb->protocol != htons(ETH_P_8021AD)) || - skb->len < VLAN_ETH_HLEN)) + if (unlikely(skb->protocol != htons(ETH_P_8021Q) && + skb->protocol != htons(ETH_P_8021AD))) return 0; err = __skb_vlan_pop(skb, &vlan_tci); @@ -4577,9 +4576,8 @@ int skb_vlan_pop(struct sk_buff *skb) return err; } /* move next vlan tag to hw accel tag */ - if (likely((skb->protocol != htons(ETH_P_8021Q) && - skb->protocol != htons(ETH_P_8021AD)) || - skb->len < VLAN_ETH_HLEN)) + if (likely(skb->protocol != htons(ETH_P_8021Q) && + skb->protocol != htons(ETH_P_8021AD))) return 0; vlan_proto = skb->protocol; From ecf4ee41d25832a6ec52f8b54dfaa46c08b949d5 Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Tue, 20 Sep 2016 12:48:37 +0300 Subject: [PATCH 1340/1715] net: skbuff: Coding: Use eth_type_vlan() instead of open coding it Fix 'skb_vlan_pop' to use eth_type_vlan instead of directly comparing skb->protocol to ETH_P_8021Q or ETH_P_8021AD. Signed-off-by: Shmulik Ladkani Reviewed-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/core/skbuff.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b2a51bf1b0f9..d36c7548952f 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4567,8 +4567,7 @@ int skb_vlan_pop(struct sk_buff *skb) if (likely(skb_vlan_tag_present(skb))) { skb->vlan_tci = 0; } else { - if (unlikely(skb->protocol != htons(ETH_P_8021Q) && - skb->protocol != htons(ETH_P_8021AD))) + if (unlikely(!eth_type_vlan(skb->protocol))) return 0; err = __skb_vlan_pop(skb, &vlan_tci); @@ -4576,8 +4575,7 @@ int skb_vlan_pop(struct sk_buff *skb) return err; } /* move next vlan tag to hw accel tag */ - if (likely(skb->protocol != htons(ETH_P_8021Q) && - skb->protocol != htons(ETH_P_8021AD))) + if (likely(!eth_type_vlan(skb->protocol))) return 0; vlan_proto = skb->protocol; From d57fd6cafbad29d0648ed769f6df07b02f10d613 Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Tue, 20 Sep 2016 17:13:06 +0530 Subject: [PATCH 1341/1715] cxgb4: move common filter code to separate file Move common filter code to separate files. Also fix the following checkpatch checks. CHECK: Comparison to NULL could be written "!f->l2t" + if (f->l2t == NULL) { CHECK: spaces preferred around that '/' (ctx:VxV) + fwr->len16_pkd = htonl(FW_WR_LEN16_V(sizeof(*fwr)/16)); Signed-off-by: Rahul Lakkireddy Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/Makefile | 2 +- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 23 ++ .../net/ethernet/chelsio/cxgb4/cxgb4_filter.c | 274 ++++++++++++++++++ .../net/ethernet/chelsio/cxgb4/cxgb4_filter.h | 47 +++ .../net/ethernet/chelsio/cxgb4/cxgb4_main.c | 264 +---------------- 5 files changed, 346 insertions(+), 264 deletions(-) create mode 100644 drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c create mode 100644 drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h diff --git a/drivers/net/ethernet/chelsio/cxgb4/Makefile b/drivers/net/ethernet/chelsio/cxgb4/Makefile index 246129650967..da889817ec27 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/Makefile +++ b/drivers/net/ethernet/chelsio/cxgb4/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_CHELSIO_T4) += cxgb4.o -cxgb4-objs := cxgb4_main.o l2t.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o cxgb4_uld.o sched.o +cxgb4-objs := cxgb4_main.o l2t.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o cxgb4_uld.o sched.o cxgb4_filter.o cxgb4-$(CONFIG_CHELSIO_T4_DCB) += cxgb4_dcb.o cxgb4-$(CONFIG_CHELSIO_T4_FCOE) += cxgb4_fcoe.o cxgb4-$(CONFIG_DEBUG_FS) += cxgb4_debugfs.o diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 1f9867db3b78..a844fd25c214 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -1025,6 +1025,29 @@ enum { VLAN_REWRITE }; +/* Host shadow copy of ingress filter entry. This is in host native format + * and doesn't match the ordering or bit order, etc. of the hardware of the + * firmware command. The use of bit-field structure elements is purely to + * remind ourselves of the field size limitations and save memory in the case + * where the filter table is large. + */ +struct filter_entry { + /* Administrative fields for filter. */ + u32 valid:1; /* filter allocated and valid */ + u32 locked:1; /* filter is administratively locked */ + + u32 pending:1; /* filter action is pending firmware reply */ + u32 smtidx:8; /* Source MAC Table index for smac */ + struct l2t_entry *l2t; /* Layer Two Table entry for dmac */ + + /* The filter itself. Most of this is a straight copy of information + * provided by the extended ioctl(). Some fields are translated to + * internal forms -- for instance the Ingress Queue ID passed in from + * the ioctl() is translated into the Absolute Ingress Queue ID. + */ + struct ch_filter_specification fs; +}; + static inline int is_offload(const struct adapter *adap) { return adap->params.offload; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c new file mode 100644 index 000000000000..8a26a54a9c84 --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -0,0 +1,274 @@ +/* + * This file is part of the Chelsio T4 Ethernet driver for Linux. + * + * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "cxgb4.h" +#include "l2t.h" +#include "t4fw_api.h" +#include "cxgb4_filter.h" + +/* Delete the filter at a specified index. */ +static int del_filter_wr(struct adapter *adapter, int fidx) +{ + struct filter_entry *f = &adapter->tids.ftid_tab[fidx]; + struct fw_filter_wr *fwr; + unsigned int len, ftid; + struct sk_buff *skb; + + len = sizeof(*fwr); + ftid = adapter->tids.ftid_base + fidx; + + skb = alloc_skb(len, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + fwr = (struct fw_filter_wr *)__skb_put(skb, len); + t4_mk_filtdelwr(ftid, fwr, adapter->sge.fw_evtq.abs_id); + + /* Mark the filter as "pending" and ship off the Filter Work Request. + * When we get the Work Request Reply we'll clear the pending status. + */ + f->pending = 1; + t4_mgmt_tx(adapter, skb); + return 0; +} + +/* Send a Work Request to write the filter at a specified index. We construct + * a Firmware Filter Work Request to have the work done and put the indicated + * filter into "pending" mode which will prevent any further actions against + * it till we get a reply from the firmware on the completion status of the + * request. + */ +int set_filter_wr(struct adapter *adapter, int fidx) +{ + struct filter_entry *f = &adapter->tids.ftid_tab[fidx]; + struct fw_filter_wr *fwr; + struct sk_buff *skb; + unsigned int ftid; + + skb = alloc_skb(sizeof(*fwr), GFP_KERNEL); + if (!skb) + return -ENOMEM; + + /* If the new filter requires loopback Destination MAC and/or VLAN + * rewriting then we need to allocate a Layer 2 Table (L2T) entry for + * the filter. + */ + if (f->fs.newdmac || f->fs.newvlan) { + /* allocate L2T entry for new filter */ + f->l2t = t4_l2t_alloc_switching(adapter, f->fs.vlan, + f->fs.eport, f->fs.dmac); + if (!f->l2t) { + kfree_skb(skb); + return -ENOMEM; + } + } + + ftid = adapter->tids.ftid_base + fidx; + + fwr = (struct fw_filter_wr *)__skb_put(skb, sizeof(*fwr)); + memset(fwr, 0, sizeof(*fwr)); + + /* It would be nice to put most of the following in t4_hw.c but most + * of the work is translating the cxgbtool ch_filter_specification + * into the Work Request and the definition of that structure is + * currently in cxgbtool.h which isn't appropriate to pull into the + * common code. We may eventually try to come up with a more neutral + * filter specification structure but for now it's easiest to simply + * put this fairly direct code in line ... + */ + fwr->op_pkd = htonl(FW_WR_OP_V(FW_FILTER_WR)); + fwr->len16_pkd = htonl(FW_WR_LEN16_V(sizeof(*fwr) / 16)); + fwr->tid_to_iq = + htonl(FW_FILTER_WR_TID_V(ftid) | + FW_FILTER_WR_RQTYPE_V(f->fs.type) | + FW_FILTER_WR_NOREPLY_V(0) | + FW_FILTER_WR_IQ_V(f->fs.iq)); + fwr->del_filter_to_l2tix = + htonl(FW_FILTER_WR_RPTTID_V(f->fs.rpttid) | + FW_FILTER_WR_DROP_V(f->fs.action == FILTER_DROP) | + FW_FILTER_WR_DIRSTEER_V(f->fs.dirsteer) | + FW_FILTER_WR_MASKHASH_V(f->fs.maskhash) | + FW_FILTER_WR_DIRSTEERHASH_V(f->fs.dirsteerhash) | + FW_FILTER_WR_LPBK_V(f->fs.action == FILTER_SWITCH) | + FW_FILTER_WR_DMAC_V(f->fs.newdmac) | + FW_FILTER_WR_SMAC_V(f->fs.newsmac) | + FW_FILTER_WR_INSVLAN_V(f->fs.newvlan == VLAN_INSERT || + f->fs.newvlan == VLAN_REWRITE) | + FW_FILTER_WR_RMVLAN_V(f->fs.newvlan == VLAN_REMOVE || + f->fs.newvlan == VLAN_REWRITE) | + FW_FILTER_WR_HITCNTS_V(f->fs.hitcnts) | + FW_FILTER_WR_TXCHAN_V(f->fs.eport) | + FW_FILTER_WR_PRIO_V(f->fs.prio) | + FW_FILTER_WR_L2TIX_V(f->l2t ? f->l2t->idx : 0)); + fwr->ethtype = htons(f->fs.val.ethtype); + fwr->ethtypem = htons(f->fs.mask.ethtype); + fwr->frag_to_ovlan_vldm = + (FW_FILTER_WR_FRAG_V(f->fs.val.frag) | + FW_FILTER_WR_FRAGM_V(f->fs.mask.frag) | + FW_FILTER_WR_IVLAN_VLD_V(f->fs.val.ivlan_vld) | + FW_FILTER_WR_OVLAN_VLD_V(f->fs.val.ovlan_vld) | + FW_FILTER_WR_IVLAN_VLDM_V(f->fs.mask.ivlan_vld) | + FW_FILTER_WR_OVLAN_VLDM_V(f->fs.mask.ovlan_vld)); + fwr->smac_sel = 0; + fwr->rx_chan_rx_rpl_iq = + htons(FW_FILTER_WR_RX_CHAN_V(0) | + FW_FILTER_WR_RX_RPL_IQ_V(adapter->sge.fw_evtq.abs_id)); + fwr->maci_to_matchtypem = + htonl(FW_FILTER_WR_MACI_V(f->fs.val.macidx) | + FW_FILTER_WR_MACIM_V(f->fs.mask.macidx) | + FW_FILTER_WR_FCOE_V(f->fs.val.fcoe) | + FW_FILTER_WR_FCOEM_V(f->fs.mask.fcoe) | + FW_FILTER_WR_PORT_V(f->fs.val.iport) | + FW_FILTER_WR_PORTM_V(f->fs.mask.iport) | + FW_FILTER_WR_MATCHTYPE_V(f->fs.val.matchtype) | + FW_FILTER_WR_MATCHTYPEM_V(f->fs.mask.matchtype)); + fwr->ptcl = f->fs.val.proto; + fwr->ptclm = f->fs.mask.proto; + fwr->ttyp = f->fs.val.tos; + fwr->ttypm = f->fs.mask.tos; + fwr->ivlan = htons(f->fs.val.ivlan); + fwr->ivlanm = htons(f->fs.mask.ivlan); + fwr->ovlan = htons(f->fs.val.ovlan); + fwr->ovlanm = htons(f->fs.mask.ovlan); + memcpy(fwr->lip, f->fs.val.lip, sizeof(fwr->lip)); + memcpy(fwr->lipm, f->fs.mask.lip, sizeof(fwr->lipm)); + memcpy(fwr->fip, f->fs.val.fip, sizeof(fwr->fip)); + memcpy(fwr->fipm, f->fs.mask.fip, sizeof(fwr->fipm)); + fwr->lp = htons(f->fs.val.lport); + fwr->lpm = htons(f->fs.mask.lport); + fwr->fp = htons(f->fs.val.fport); + fwr->fpm = htons(f->fs.mask.fport); + if (f->fs.newsmac) + memcpy(fwr->sma, f->fs.smac, sizeof(fwr->sma)); + + /* Mark the filter as "pending" and ship off the Filter Work Request. + * When we get the Work Request Reply we'll clear the pending status. + */ + f->pending = 1; + set_wr_txq(skb, CPL_PRIORITY_CONTROL, f->fs.val.iport & 0x3); + t4_ofld_send(adapter, skb); + return 0; +} + +/* Return an error number if the indicated filter isn't writable ... */ +int writable_filter(struct filter_entry *f) +{ + if (f->locked) + return -EPERM; + if (f->pending) + return -EBUSY; + + return 0; +} + +/* Delete the filter at the specified index (if valid). The checks for all + * the common problems with doing this like the filter being locked, currently + * pending in another operation, etc. + */ +int delete_filter(struct adapter *adapter, unsigned int fidx) +{ + struct filter_entry *f; + int ret; + + if (fidx >= adapter->tids.nftids + adapter->tids.nsftids) + return -EINVAL; + + f = &adapter->tids.ftid_tab[fidx]; + ret = writable_filter(f); + if (ret) + return ret; + if (f->valid) + return del_filter_wr(adapter, fidx); + + return 0; +} + +/* Clear a filter and release any of its resources that we own. This also + * clears the filter's "pending" status. + */ +void clear_filter(struct adapter *adap, struct filter_entry *f) +{ + /* If the new or old filter have loopback rewriteing rules then we'll + * need to free any existing Layer Two Table (L2T) entries of the old + * filter rule. The firmware will handle freeing up any Source MAC + * Table (SMT) entries used for rewriting Source MAC Addresses in + * loopback rules. + */ + if (f->l2t) + cxgb4_l2t_release(f->l2t); + + /* The zeroing of the filter rule below clears the filter valid, + * pending, locked flags, l2t pointer, etc. so it's all we need for + * this operation. + */ + memset(f, 0, sizeof(*f)); +} + +/* Handle a filter write/deletion reply. */ +void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl) +{ + unsigned int idx = GET_TID(rpl); + unsigned int nidx = idx - adap->tids.ftid_base; + struct filter_entry *f; + unsigned int ret; + + if (idx >= adap->tids.ftid_base && nidx < + (adap->tids.nftids + adap->tids.nsftids)) { + idx = nidx; + ret = TCB_COOKIE_G(rpl->cookie); + f = &adap->tids.ftid_tab[idx]; + + if (ret == FW_FILTER_WR_FLT_DELETED) { + /* Clear the filter when we get confirmation from the + * hardware that the filter has been deleted. + */ + clear_filter(adap, f); + } else if (ret == FW_FILTER_WR_SMT_TBL_FULL) { + dev_err(adap->pdev_dev, "filter %u setup failed due to full SMT\n", + idx); + clear_filter(adap, f); + } else if (ret == FW_FILTER_WR_FLT_ADDED) { + f->smtidx = (be64_to_cpu(rpl->oldval) >> 24) & 0xff; + f->pending = 0; /* asynchronous setup completed */ + f->valid = 1; + } else { + /* Something went wrong. Issue a warning about the + * problem and clear everything out. + */ + dev_err(adap->pdev_dev, "filter %u setup failed with error %u\n", + idx, ret); + clear_filter(adap, f); + } + } +} diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h new file mode 100644 index 000000000000..f6bd0bf65b23 --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h @@ -0,0 +1,47 @@ +/* + * This file is part of the Chelsio T4 Ethernet driver for Linux. + * + * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __CXGB4_FILTER_H +#define __CXGB4_FILTER_H + +#include "t4_msg.h" + +void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl); +void clear_filter(struct adapter *adap, struct filter_entry *f); + +int set_filter_wr(struct adapter *adapter, int fidx); +int delete_filter(struct adapter *adapter, unsigned int fidx); + +int writable_filter(struct filter_entry *f); +#endif /* __CXGB4_FILTER_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index d1ebb84c073e..5cdcfe871c12 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -67,6 +67,7 @@ #include #include "cxgb4.h" +#include "cxgb4_filter.h" #include "t4_regs.h" #include "t4_values.h" #include "t4_msg.h" @@ -87,30 +88,6 @@ char cxgb4_driver_name[] = KBUILD_MODNAME; const char cxgb4_driver_version[] = DRV_VERSION; #define DRV_DESC "Chelsio T4/T5/T6 Network Driver" -/* Host shadow copy of ingress filter entry. This is in host native format - * and doesn't match the ordering or bit order, etc. of the hardware of the - * firmware command. The use of bit-field structure elements is purely to - * remind ourselves of the field size limitations and save memory in the case - * where the filter table is large. - */ -struct filter_entry { - /* Administrative fields for filter. - */ - u32 valid:1; /* filter allocated and valid */ - u32 locked:1; /* filter is administratively locked */ - - u32 pending:1; /* filter action is pending firmware reply */ - u32 smtidx:8; /* Source MAC Table index for smac */ - struct l2t_entry *l2t; /* Layer Two Table entry for dmac */ - - /* The filter itself. Most of this is a straight copy of information - * provided by the extended ioctl(). Some fields are translated to - * internal forms -- for instance the Ingress Queue ID passed in from - * the ioctl() is translated into the Absolute Ingress Queue ID. - */ - struct ch_filter_specification fs; -}; - #define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \ NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\ NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR) @@ -527,66 +504,6 @@ static void dcb_rpl(struct adapter *adap, const struct fw_port_cmd *pcmd) } #endif /* CONFIG_CHELSIO_T4_DCB */ -/* Clear a filter and release any of its resources that we own. This also - * clears the filter's "pending" status. - */ -static void clear_filter(struct adapter *adap, struct filter_entry *f) -{ - /* If the new or old filter have loopback rewriteing rules then we'll - * need to free any existing Layer Two Table (L2T) entries of the old - * filter rule. The firmware will handle freeing up any Source MAC - * Table (SMT) entries used for rewriting Source MAC Addresses in - * loopback rules. - */ - if (f->l2t) - cxgb4_l2t_release(f->l2t); - - /* The zeroing of the filter rule below clears the filter valid, - * pending, locked flags, l2t pointer, etc. so it's all we need for - * this operation. - */ - memset(f, 0, sizeof(*f)); -} - -/* Handle a filter write/deletion reply. - */ -static void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl) -{ - unsigned int idx = GET_TID(rpl); - unsigned int nidx = idx - adap->tids.ftid_base; - unsigned int ret; - struct filter_entry *f; - - if (idx >= adap->tids.ftid_base && nidx < - (adap->tids.nftids + adap->tids.nsftids)) { - idx = nidx; - ret = TCB_COOKIE_G(rpl->cookie); - f = &adap->tids.ftid_tab[idx]; - - if (ret == FW_FILTER_WR_FLT_DELETED) { - /* Clear the filter when we get confirmation from the - * hardware that the filter has been deleted. - */ - clear_filter(adap, f); - } else if (ret == FW_FILTER_WR_SMT_TBL_FULL) { - dev_err(adap->pdev_dev, "filter %u setup failed due to full SMT\n", - idx); - clear_filter(adap, f); - } else if (ret == FW_FILTER_WR_FLT_ADDED) { - f->smtidx = (be64_to_cpu(rpl->oldval) >> 24) & 0xff; - f->pending = 0; /* asynchronous setup completed */ - f->valid = 1; - } else { - /* Something went wrong. Issue a warning about the - * problem and clear everything out. - */ - dev_err(adap->pdev_dev, "filter %u setup failed with error %u\n", - idx, ret); - clear_filter(adap, f); - } - } -} - /* Response queue handler for the FW event queue. */ static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp, @@ -1026,151 +943,6 @@ void t4_free_mem(void *addr) kvfree(addr); } -/* Send a Work Request to write the filter at a specified index. We construct - * a Firmware Filter Work Request to have the work done and put the indicated - * filter into "pending" mode which will prevent any further actions against - * it till we get a reply from the firmware on the completion status of the - * request. - */ -static int set_filter_wr(struct adapter *adapter, int fidx) -{ - struct filter_entry *f = &adapter->tids.ftid_tab[fidx]; - struct sk_buff *skb; - struct fw_filter_wr *fwr; - unsigned int ftid; - - skb = alloc_skb(sizeof(*fwr), GFP_KERNEL); - if (!skb) - return -ENOMEM; - - /* If the new filter requires loopback Destination MAC and/or VLAN - * rewriting then we need to allocate a Layer 2 Table (L2T) entry for - * the filter. - */ - if (f->fs.newdmac || f->fs.newvlan) { - /* allocate L2T entry for new filter */ - f->l2t = t4_l2t_alloc_switching(adapter, f->fs.vlan, - f->fs.eport, f->fs.dmac); - if (f->l2t == NULL) { - kfree_skb(skb); - return -ENOMEM; - } - } - - ftid = adapter->tids.ftid_base + fidx; - - fwr = (struct fw_filter_wr *)__skb_put(skb, sizeof(*fwr)); - memset(fwr, 0, sizeof(*fwr)); - - /* It would be nice to put most of the following in t4_hw.c but most - * of the work is translating the cxgbtool ch_filter_specification - * into the Work Request and the definition of that structure is - * currently in cxgbtool.h which isn't appropriate to pull into the - * common code. We may eventually try to come up with a more neutral - * filter specification structure but for now it's easiest to simply - * put this fairly direct code in line ... - */ - fwr->op_pkd = htonl(FW_WR_OP_V(FW_FILTER_WR)); - fwr->len16_pkd = htonl(FW_WR_LEN16_V(sizeof(*fwr)/16)); - fwr->tid_to_iq = - htonl(FW_FILTER_WR_TID_V(ftid) | - FW_FILTER_WR_RQTYPE_V(f->fs.type) | - FW_FILTER_WR_NOREPLY_V(0) | - FW_FILTER_WR_IQ_V(f->fs.iq)); - fwr->del_filter_to_l2tix = - htonl(FW_FILTER_WR_RPTTID_V(f->fs.rpttid) | - FW_FILTER_WR_DROP_V(f->fs.action == FILTER_DROP) | - FW_FILTER_WR_DIRSTEER_V(f->fs.dirsteer) | - FW_FILTER_WR_MASKHASH_V(f->fs.maskhash) | - FW_FILTER_WR_DIRSTEERHASH_V(f->fs.dirsteerhash) | - FW_FILTER_WR_LPBK_V(f->fs.action == FILTER_SWITCH) | - FW_FILTER_WR_DMAC_V(f->fs.newdmac) | - FW_FILTER_WR_SMAC_V(f->fs.newsmac) | - FW_FILTER_WR_INSVLAN_V(f->fs.newvlan == VLAN_INSERT || - f->fs.newvlan == VLAN_REWRITE) | - FW_FILTER_WR_RMVLAN_V(f->fs.newvlan == VLAN_REMOVE || - f->fs.newvlan == VLAN_REWRITE) | - FW_FILTER_WR_HITCNTS_V(f->fs.hitcnts) | - FW_FILTER_WR_TXCHAN_V(f->fs.eport) | - FW_FILTER_WR_PRIO_V(f->fs.prio) | - FW_FILTER_WR_L2TIX_V(f->l2t ? f->l2t->idx : 0)); - fwr->ethtype = htons(f->fs.val.ethtype); - fwr->ethtypem = htons(f->fs.mask.ethtype); - fwr->frag_to_ovlan_vldm = - (FW_FILTER_WR_FRAG_V(f->fs.val.frag) | - FW_FILTER_WR_FRAGM_V(f->fs.mask.frag) | - FW_FILTER_WR_IVLAN_VLD_V(f->fs.val.ivlan_vld) | - FW_FILTER_WR_OVLAN_VLD_V(f->fs.val.ovlan_vld) | - FW_FILTER_WR_IVLAN_VLDM_V(f->fs.mask.ivlan_vld) | - FW_FILTER_WR_OVLAN_VLDM_V(f->fs.mask.ovlan_vld)); - fwr->smac_sel = 0; - fwr->rx_chan_rx_rpl_iq = - htons(FW_FILTER_WR_RX_CHAN_V(0) | - FW_FILTER_WR_RX_RPL_IQ_V(adapter->sge.fw_evtq.abs_id)); - fwr->maci_to_matchtypem = - htonl(FW_FILTER_WR_MACI_V(f->fs.val.macidx) | - FW_FILTER_WR_MACIM_V(f->fs.mask.macidx) | - FW_FILTER_WR_FCOE_V(f->fs.val.fcoe) | - FW_FILTER_WR_FCOEM_V(f->fs.mask.fcoe) | - FW_FILTER_WR_PORT_V(f->fs.val.iport) | - FW_FILTER_WR_PORTM_V(f->fs.mask.iport) | - FW_FILTER_WR_MATCHTYPE_V(f->fs.val.matchtype) | - FW_FILTER_WR_MATCHTYPEM_V(f->fs.mask.matchtype)); - fwr->ptcl = f->fs.val.proto; - fwr->ptclm = f->fs.mask.proto; - fwr->ttyp = f->fs.val.tos; - fwr->ttypm = f->fs.mask.tos; - fwr->ivlan = htons(f->fs.val.ivlan); - fwr->ivlanm = htons(f->fs.mask.ivlan); - fwr->ovlan = htons(f->fs.val.ovlan); - fwr->ovlanm = htons(f->fs.mask.ovlan); - memcpy(fwr->lip, f->fs.val.lip, sizeof(fwr->lip)); - memcpy(fwr->lipm, f->fs.mask.lip, sizeof(fwr->lipm)); - memcpy(fwr->fip, f->fs.val.fip, sizeof(fwr->fip)); - memcpy(fwr->fipm, f->fs.mask.fip, sizeof(fwr->fipm)); - fwr->lp = htons(f->fs.val.lport); - fwr->lpm = htons(f->fs.mask.lport); - fwr->fp = htons(f->fs.val.fport); - fwr->fpm = htons(f->fs.mask.fport); - if (f->fs.newsmac) - memcpy(fwr->sma, f->fs.smac, sizeof(fwr->sma)); - - /* Mark the filter as "pending" and ship off the Filter Work Request. - * When we get the Work Request Reply we'll clear the pending status. - */ - f->pending = 1; - set_wr_txq(skb, CPL_PRIORITY_CONTROL, f->fs.val.iport & 0x3); - t4_ofld_send(adapter, skb); - return 0; -} - -/* Delete the filter at a specified index. - */ -static int del_filter_wr(struct adapter *adapter, int fidx) -{ - struct filter_entry *f = &adapter->tids.ftid_tab[fidx]; - struct sk_buff *skb; - struct fw_filter_wr *fwr; - unsigned int len, ftid; - - len = sizeof(*fwr); - ftid = adapter->tids.ftid_base + fidx; - - skb = alloc_skb(len, GFP_KERNEL); - if (!skb) - return -ENOMEM; - - fwr = (struct fw_filter_wr *)__skb_put(skb, len); - t4_mk_filtdelwr(ftid, fwr, adapter->sge.fw_evtq.abs_id); - - /* Mark the filter as "pending" and ship off the Filter Work Request. - * When we get the Work Request Reply we'll clear the pending status. - */ - f->pending = 1; - t4_mgmt_tx(adapter, skb); - return 0; -} - static u16 cxgb_select_queue(struct net_device *dev, struct sk_buff *skb, void *accel_priv, select_queue_fallback_t fallback) { @@ -2514,40 +2286,6 @@ static int cxgb_close(struct net_device *dev) return t4_enable_vi(adapter, adapter->pf, pi->viid, false, false); } -/* Return an error number if the indicated filter isn't writable ... - */ -static int writable_filter(struct filter_entry *f) -{ - if (f->locked) - return -EPERM; - if (f->pending) - return -EBUSY; - - return 0; -} - -/* Delete the filter at the specified index (if valid). The checks for all - * the common problems with doing this like the filter being locked, currently - * pending in another operation, etc. - */ -static int delete_filter(struct adapter *adapter, unsigned int fidx) -{ - struct filter_entry *f; - int ret; - - if (fidx >= adapter->tids.nftids + adapter->tids.nsftids) - return -EINVAL; - - f = &adapter->tids.ftid_tab[fidx]; - ret = writable_filter(f); - if (ret) - return ret; - if (f->valid) - return del_filter_wr(adapter, fidx); - - return 0; -} - int cxgb4_create_server_filter(const struct net_device *dev, unsigned int stid, __be32 sip, __be16 sport, __be16 vlan, unsigned int queue, unsigned char port, unsigned char mask) From 578b46b9383c3619cc0a6002ff867e732b08b67a Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Tue, 20 Sep 2016 17:13:07 +0530 Subject: [PATCH 1342/1715] cxgb4: add common api support for configuring filters Enable filters for non-offload configuration and add common api support for setting and deleting filters in LE-TCAM region of the hardware. IPv4 filters occupy one slot. IPv6 filters occupy 4 slots and must be on a 4-slot boundary. IPv4 filters can not occupy a slot belonging to IPv6 and the vice-versa is also true. Filters are set and deleted asynchronously. Use completion to wait for reply from firmware in order to allow for synchronization if needed. Signed-off-by: Rahul Lakkireddy Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 3 + .../net/ethernet/chelsio/cxgb4/cxgb4_filter.c | 477 +++++++++++++++++- .../net/ethernet/chelsio/cxgb4/cxgb4_filter.h | 1 + .../net/ethernet/chelsio/cxgb4/cxgb4_main.c | 39 +- .../net/ethernet/chelsio/cxgb4/cxgb4_uld.h | 26 +- 5 files changed, 512 insertions(+), 34 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index a844fd25c214..51edb126da65 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -1038,7 +1038,10 @@ struct filter_entry { u32 pending:1; /* filter action is pending firmware reply */ u32 smtidx:8; /* Source MAC Table index for smac */ + struct filter_ctx *ctx; /* Caller's completion hook */ struct l2t_entry *l2t; /* Layer Two Table entry for dmac */ + struct net_device *dev; /* Associated net device */ + u32 tid; /* This will store the actual tid */ /* The filter itself. Most of this is a straight copy of information * provided by the extended ioctl(). Some fields are translated to diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index 8a26a54a9c84..2a616171cb68 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -33,27 +33,165 @@ */ #include "cxgb4.h" +#include "t4_regs.h" #include "l2t.h" #include "t4fw_api.h" #include "cxgb4_filter.h" +static inline bool is_field_set(u32 val, u32 mask) +{ + return val || mask; +} + +static inline bool unsupported(u32 conf, u32 conf_mask, u32 val, u32 mask) +{ + return !(conf & conf_mask) && is_field_set(val, mask); +} + +/* Validate filter spec against configuration done on the card. */ +static int validate_filter(struct net_device *dev, + struct ch_filter_specification *fs) +{ + struct adapter *adapter = netdev2adap(dev); + u32 fconf, iconf; + + /* Check for unconfigured fields being used. */ + fconf = adapter->params.tp.vlan_pri_map; + iconf = adapter->params.tp.ingress_config; + + if (unsupported(fconf, FCOE_F, fs->val.fcoe, fs->mask.fcoe) || + unsupported(fconf, PORT_F, fs->val.iport, fs->mask.iport) || + unsupported(fconf, TOS_F, fs->val.tos, fs->mask.tos) || + unsupported(fconf, ETHERTYPE_F, fs->val.ethtype, + fs->mask.ethtype) || + unsupported(fconf, MACMATCH_F, fs->val.macidx, fs->mask.macidx) || + unsupported(fconf, MPSHITTYPE_F, fs->val.matchtype, + fs->mask.matchtype) || + unsupported(fconf, FRAGMENTATION_F, fs->val.frag, fs->mask.frag) || + unsupported(fconf, PROTOCOL_F, fs->val.proto, fs->mask.proto) || + unsupported(fconf, VNIC_ID_F, fs->val.pfvf_vld, + fs->mask.pfvf_vld) || + unsupported(fconf, VNIC_ID_F, fs->val.ovlan_vld, + fs->mask.ovlan_vld) || + unsupported(fconf, VLAN_F, fs->val.ivlan_vld, fs->mask.ivlan_vld)) + return -EOPNOTSUPP; + + /* T4 inconveniently uses the same FT_VNIC_ID_W bits for both the Outer + * VLAN Tag and PF/VF/VFvld fields based on VNIC_F being set + * in TP_INGRESS_CONFIG. Hense the somewhat crazy checks + * below. Additionally, since the T4 firmware interface also + * carries that overlap, we need to translate any PF/VF + * specification into that internal format below. + */ + if (is_field_set(fs->val.pfvf_vld, fs->mask.pfvf_vld) && + is_field_set(fs->val.ovlan_vld, fs->mask.ovlan_vld)) + return -EOPNOTSUPP; + if (unsupported(iconf, VNIC_F, fs->val.pfvf_vld, fs->mask.pfvf_vld) || + (is_field_set(fs->val.ovlan_vld, fs->mask.ovlan_vld) && + (iconf & VNIC_F))) + return -EOPNOTSUPP; + if (fs->val.pf > 0x7 || fs->val.vf > 0x7f) + return -ERANGE; + fs->mask.pf &= 0x7; + fs->mask.vf &= 0x7f; + + /* If the user is requesting that the filter action loop + * matching packets back out one of our ports, make sure that + * the egress port is in range. + */ + if (fs->action == FILTER_SWITCH && + fs->eport >= adapter->params.nports) + return -ERANGE; + + /* Don't allow various trivially obvious bogus out-of-range values... */ + if (fs->val.iport >= adapter->params.nports) + return -ERANGE; + + /* T4 doesn't support removing VLAN Tags for loop back filters. */ + if (is_t4(adapter->params.chip) && + fs->action == FILTER_SWITCH && + (fs->newvlan == VLAN_REMOVE || + fs->newvlan == VLAN_REWRITE)) + return -EOPNOTSUPP; + + return 0; +} + +static unsigned int get_filter_steerq(struct net_device *dev, + struct ch_filter_specification *fs) +{ + struct adapter *adapter = netdev2adap(dev); + unsigned int iq; + + /* If the user has requested steering matching Ingress Packets + * to a specific Queue Set, we need to make sure it's in range + * for the port and map that into the Absolute Queue ID of the + * Queue Set's Response Queue. + */ + if (!fs->dirsteer) { + if (fs->iq) + return -EINVAL; + iq = 0; + } else { + struct port_info *pi = netdev_priv(dev); + + /* If the iq id is greater than the number of qsets, + * then assume it is an absolute qid. + */ + if (fs->iq < pi->nqsets) + iq = adapter->sge.ethrxq[pi->first_qset + + fs->iq].rspq.abs_id; + else + iq = fs->iq; + } + + return iq; +} + +static int cxgb4_set_ftid(struct tid_info *t, int fidx, int family) +{ + spin_lock_bh(&t->ftid_lock); + + if (test_bit(fidx, t->ftid_bmap)) { + spin_unlock_bh(&t->ftid_lock); + return -EBUSY; + } + + if (family == PF_INET) + __set_bit(fidx, t->ftid_bmap); + else + bitmap_allocate_region(t->ftid_bmap, fidx, 2); + + spin_unlock_bh(&t->ftid_lock); + return 0; +} + +static void cxgb4_clear_ftid(struct tid_info *t, int fidx, int family) +{ + spin_lock_bh(&t->ftid_lock); + if (family == PF_INET) + __clear_bit(fidx, t->ftid_bmap); + else + bitmap_release_region(t->ftid_bmap, fidx, 2); + spin_unlock_bh(&t->ftid_lock); +} + /* Delete the filter at a specified index. */ static int del_filter_wr(struct adapter *adapter, int fidx) { struct filter_entry *f = &adapter->tids.ftid_tab[fidx]; struct fw_filter_wr *fwr; - unsigned int len, ftid; struct sk_buff *skb; + unsigned int len; len = sizeof(*fwr); - ftid = adapter->tids.ftid_base + fidx; skb = alloc_skb(len, GFP_KERNEL); if (!skb) return -ENOMEM; fwr = (struct fw_filter_wr *)__skb_put(skb, len); - t4_mk_filtdelwr(ftid, fwr, adapter->sge.fw_evtq.abs_id); + t4_mk_filtdelwr(f->tid, fwr, adapter->sge.fw_evtq.abs_id); /* Mark the filter as "pending" and ship off the Filter Work Request. * When we get the Work Request Reply we'll clear the pending status. @@ -74,7 +212,6 @@ int set_filter_wr(struct adapter *adapter, int fidx) struct filter_entry *f = &adapter->tids.ftid_tab[fidx]; struct fw_filter_wr *fwr; struct sk_buff *skb; - unsigned int ftid; skb = alloc_skb(sizeof(*fwr), GFP_KERNEL); if (!skb) @@ -94,8 +231,6 @@ int set_filter_wr(struct adapter *adapter, int fidx) } } - ftid = adapter->tids.ftid_base + fidx; - fwr = (struct fw_filter_wr *)__skb_put(skb, sizeof(*fwr)); memset(fwr, 0, sizeof(*fwr)); @@ -110,7 +245,7 @@ int set_filter_wr(struct adapter *adapter, int fidx) fwr->op_pkd = htonl(FW_WR_OP_V(FW_FILTER_WR)); fwr->len16_pkd = htonl(FW_WR_LEN16_V(sizeof(*fwr) / 16)); fwr->tid_to_iq = - htonl(FW_FILTER_WR_TID_V(ftid) | + htonl(FW_FILTER_WR_TID_V(f->tid) | FW_FILTER_WR_RQTYPE_V(f->fs.type) | FW_FILTER_WR_NOREPLY_V(0) | FW_FILTER_WR_IQ_V(f->fs.iq)); @@ -235,33 +370,341 @@ void clear_filter(struct adapter *adap, struct filter_entry *f) memset(f, 0, sizeof(*f)); } +void clear_all_filters(struct adapter *adapter) +{ + unsigned int i; + + if (adapter->tids.ftid_tab) { + struct filter_entry *f = &adapter->tids.ftid_tab[0]; + unsigned int max_ftid = adapter->tids.nftids + + adapter->tids.nsftids; + + for (i = 0; i < max_ftid; i++, f++) + if (f->valid || f->pending) + clear_filter(adapter, f); + } +} + +/* Fill up default masks for set match fields. */ +static void fill_default_mask(struct ch_filter_specification *fs) +{ + unsigned int lip = 0, lip_mask = 0; + unsigned int fip = 0, fip_mask = 0; + unsigned int i; + + if (fs->val.iport && !fs->mask.iport) + fs->mask.iport |= ~0; + if (fs->val.fcoe && !fs->mask.fcoe) + fs->mask.fcoe |= ~0; + if (fs->val.matchtype && !fs->mask.matchtype) + fs->mask.matchtype |= ~0; + if (fs->val.macidx && !fs->mask.macidx) + fs->mask.macidx |= ~0; + if (fs->val.ethtype && !fs->mask.ethtype) + fs->mask.ethtype |= ~0; + if (fs->val.ivlan && !fs->mask.ivlan) + fs->mask.ivlan |= ~0; + if (fs->val.ovlan && !fs->mask.ovlan) + fs->mask.ovlan |= ~0; + if (fs->val.frag && !fs->mask.frag) + fs->mask.frag |= ~0; + if (fs->val.tos && !fs->mask.tos) + fs->mask.tos |= ~0; + if (fs->val.proto && !fs->mask.proto) + fs->mask.proto |= ~0; + + for (i = 0; i < ARRAY_SIZE(fs->val.lip); i++) { + lip |= fs->val.lip[i]; + lip_mask |= fs->mask.lip[i]; + fip |= fs->val.fip[i]; + fip_mask |= fs->mask.fip[i]; + } + + if (lip && !lip_mask) + memset(fs->mask.lip, ~0, sizeof(fs->mask.lip)); + + if (fip && !fip_mask) + memset(fs->mask.fip, ~0, sizeof(fs->mask.lip)); + + if (fs->val.lport && !fs->mask.lport) + fs->mask.lport = ~0; + if (fs->val.fport && !fs->mask.fport) + fs->mask.fport = ~0; +} + +/* Check a Chelsio Filter Request for validity, convert it into our internal + * format and send it to the hardware. Return 0 on success, an error number + * otherwise. We attach any provided filter operation context to the internal + * filter specification in order to facilitate signaling completion of the + * operation. + */ +int __cxgb4_set_filter(struct net_device *dev, int filter_id, + struct ch_filter_specification *fs, + struct filter_ctx *ctx) +{ + struct adapter *adapter = netdev2adap(dev); + unsigned int max_fidx, fidx, iq; + struct filter_entry *f; + u32 iconf; + int ret; + + max_fidx = adapter->tids.nftids; + if (filter_id != (max_fidx + adapter->tids.nsftids - 1) && + filter_id >= max_fidx) + return -E2BIG; + + fill_default_mask(fs); + + ret = validate_filter(dev, fs); + if (ret) + return ret; + + iq = get_filter_steerq(dev, fs); + if (iq < 0) + return iq; + + /* IPv6 filters occupy four slots and must be aligned on + * four-slot boundaries. IPv4 filters only occupy a single + * slot and have no alignment requirements but writing a new + * IPv4 filter into the middle of an existing IPv6 filter + * requires clearing the old IPv6 filter and hence we prevent + * insertion. + */ + if (fs->type == 0) { /* IPv4 */ + /* If our IPv4 filter isn't being written to a + * multiple of four filter index and there's an IPv6 + * filter at the multiple of 4 base slot, then we + * prevent insertion. + */ + fidx = filter_id & ~0x3; + if (fidx != filter_id && + adapter->tids.ftid_tab[fidx].fs.type) { + f = &adapter->tids.ftid_tab[fidx]; + if (f->valid) { + dev_err(adapter->pdev_dev, + "Invalid location. IPv6 requires 4 slots and is occupying slots %u to %u\n", + fidx, fidx + 3); + return -EINVAL; + } + } + } else { /* IPv6 */ + /* Ensure that the IPv6 filter is aligned on a + * multiple of 4 boundary. + */ + if (filter_id & 0x3) { + dev_err(adapter->pdev_dev, + "Invalid location. IPv6 must be aligned on a 4-slot boundary\n"); + return -EINVAL; + } + + /* Check all except the base overlapping IPv4 filter slots. */ + for (fidx = filter_id + 1; fidx < filter_id + 4; fidx++) { + f = &adapter->tids.ftid_tab[fidx]; + if (f->valid) { + dev_err(adapter->pdev_dev, + "Invalid location. IPv6 requires 4 slots and an IPv4 filter exists at %u\n", + fidx); + return -EINVAL; + } + } + } + + /* Check to make sure that provided filter index is not + * already in use by someone else + */ + f = &adapter->tids.ftid_tab[filter_id]; + if (f->valid) + return -EBUSY; + + fidx = filter_id + adapter->tids.ftid_base; + ret = cxgb4_set_ftid(&adapter->tids, filter_id, + fs->type ? PF_INET6 : PF_INET); + if (ret) + return ret; + + /* Check to make sure the filter requested is writable ... */ + ret = writable_filter(f); + if (ret) { + /* Clear the bits we have set above */ + cxgb4_clear_ftid(&adapter->tids, filter_id, + fs->type ? PF_INET6 : PF_INET); + return ret; + } + + /* Clear out any old resources being used by the filter before + * we start constructing the new filter. + */ + if (f->valid) + clear_filter(adapter, f); + + /* Convert the filter specification into our internal format. + * We copy the PF/VF specification into the Outer VLAN field + * here so the rest of the code -- including the interface to + * the firmware -- doesn't have to constantly do these checks. + */ + f->fs = *fs; + f->fs.iq = iq; + f->dev = dev; + + iconf = adapter->params.tp.ingress_config; + if (iconf & VNIC_F) { + f->fs.val.ovlan = (fs->val.pf << 13) | fs->val.vf; + f->fs.mask.ovlan = (fs->mask.pf << 13) | fs->mask.vf; + f->fs.val.ovlan_vld = fs->val.pfvf_vld; + f->fs.mask.ovlan_vld = fs->mask.pfvf_vld; + } + + /* Attempt to set the filter. If we don't succeed, we clear + * it and return the failure. + */ + f->ctx = ctx; + f->tid = fidx; /* Save the actual tid */ + ret = set_filter_wr(adapter, filter_id); + if (ret) { + cxgb4_clear_ftid(&adapter->tids, filter_id, + fs->type ? PF_INET6 : PF_INET); + clear_filter(adapter, f); + } + + return ret; +} + +/* Check a delete filter request for validity and send it to the hardware. + * Return 0 on success, an error number otherwise. We attach any provided + * filter operation context to the internal filter specification in order to + * facilitate signaling completion of the operation. + */ +int __cxgb4_del_filter(struct net_device *dev, int filter_id, + struct filter_ctx *ctx) +{ + struct adapter *adapter = netdev2adap(dev); + struct filter_entry *f; + unsigned int max_fidx; + int ret; + + max_fidx = adapter->tids.nftids; + if (filter_id != (max_fidx + adapter->tids.nsftids - 1) && + filter_id >= max_fidx) + return -E2BIG; + + f = &adapter->tids.ftid_tab[filter_id]; + ret = writable_filter(f); + if (ret) + return ret; + + if (f->valid) { + f->ctx = ctx; + cxgb4_clear_ftid(&adapter->tids, filter_id, + f->fs.type ? PF_INET6 : PF_INET); + return del_filter_wr(adapter, filter_id); + } + + /* If the caller has passed in a Completion Context then we need to + * mark it as a successful completion so they don't stall waiting + * for it. + */ + if (ctx) { + ctx->result = 0; + complete(&ctx->completion); + } + return ret; +} + +int cxgb4_set_filter(struct net_device *dev, int filter_id, + struct ch_filter_specification *fs) +{ + struct filter_ctx ctx; + int ret; + + init_completion(&ctx.completion); + + ret = __cxgb4_set_filter(dev, filter_id, fs, &ctx); + if (ret) + goto out; + + /* Wait for reply */ + ret = wait_for_completion_timeout(&ctx.completion, 10 * HZ); + if (!ret) + return -ETIMEDOUT; + + ret = ctx.result; +out: + return ret; +} + +int cxgb4_del_filter(struct net_device *dev, int filter_id) +{ + struct filter_ctx ctx; + int ret; + + init_completion(&ctx.completion); + + ret = __cxgb4_del_filter(dev, filter_id, &ctx); + if (ret) + goto out; + + /* Wait for reply */ + ret = wait_for_completion_timeout(&ctx.completion, 10 * HZ); + if (!ret) + return -ETIMEDOUT; + + ret = ctx.result; +out: + return ret; +} + /* Handle a filter write/deletion reply. */ void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl) { - unsigned int idx = GET_TID(rpl); - unsigned int nidx = idx - adap->tids.ftid_base; - struct filter_entry *f; - unsigned int ret; + unsigned int tid = GET_TID(rpl); + struct filter_entry *f = NULL; + unsigned int max_fidx; + int idx; - if (idx >= adap->tids.ftid_base && nidx < - (adap->tids.nftids + adap->tids.nsftids)) { - idx = nidx; - ret = TCB_COOKIE_G(rpl->cookie); + max_fidx = adap->tids.nftids + adap->tids.nsftids; + /* Get the corresponding filter entry for this tid */ + if (adap->tids.ftid_tab) { + /* Check this in normal filter region */ + idx = tid - adap->tids.ftid_base; + if (idx >= max_fidx) + return; f = &adap->tids.ftid_tab[idx]; + if (f->tid != tid) + return; + } + + /* We found the filter entry for this tid */ + if (f) { + unsigned int ret = TCB_COOKIE_G(rpl->cookie); + struct filter_ctx *ctx; + + /* Pull off any filter operation context attached to the + * filter. + */ + ctx = f->ctx; + f->ctx = NULL; if (ret == FW_FILTER_WR_FLT_DELETED) { /* Clear the filter when we get confirmation from the * hardware that the filter has been deleted. */ clear_filter(adap, f); + if (ctx) + ctx->result = 0; } else if (ret == FW_FILTER_WR_SMT_TBL_FULL) { dev_err(adap->pdev_dev, "filter %u setup failed due to full SMT\n", idx); clear_filter(adap, f); + if (ctx) + ctx->result = -ENOMEM; } else if (ret == FW_FILTER_WR_FLT_ADDED) { f->smtidx = (be64_to_cpu(rpl->oldval) >> 24) & 0xff; f->pending = 0; /* asynchronous setup completed */ f->valid = 1; + if (ctx) { + ctx->result = 0; + ctx->tid = idx; + } } else { /* Something went wrong. Issue a warning about the * problem and clear everything out. @@ -269,6 +712,10 @@ void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl) dev_err(adap->pdev_dev, "filter %u setup failed with error %u\n", idx, ret); clear_filter(adap, f); + if (ctx) + ctx->result = -EINVAL; } + if (ctx) + complete(&ctx->completion); } } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h index f6bd0bf65b23..23742cb1c69f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h @@ -44,4 +44,5 @@ int set_filter_wr(struct adapter *adapter, int fidx); int delete_filter(struct adapter *adapter, unsigned int fidx); int writable_filter(struct filter_entry *f); +void clear_all_filters(struct adapter *adapter); #endif /* __CXGB4_FILTER_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 5cdcfe871c12..e97daa0510e5 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -1324,19 +1324,22 @@ EXPORT_SYMBOL(cxgb4_remove_tid); */ static int tid_init(struct tid_info *t) { - size_t size; - unsigned int stid_bmap_size; - unsigned int natids = t->natids; struct adapter *adap = container_of(t, struct adapter, tids); + unsigned int max_ftids = t->nftids + t->nsftids; + unsigned int natids = t->natids; + unsigned int stid_bmap_size; + unsigned int ftid_bmap_size; + size_t size; stid_bmap_size = BITS_TO_LONGS(t->nstids + t->nsftids); + ftid_bmap_size = BITS_TO_LONGS(t->nftids); size = t->ntids * sizeof(*t->tid_tab) + natids * sizeof(*t->atid_tab) + t->nstids * sizeof(*t->stid_tab) + t->nsftids * sizeof(*t->stid_tab) + stid_bmap_size * sizeof(long) + - t->nftids * sizeof(*t->ftid_tab) + - t->nsftids * sizeof(*t->ftid_tab); + max_ftids * sizeof(*t->ftid_tab) + + ftid_bmap_size * sizeof(long); t->tid_tab = t4_alloc_mem(size); if (!t->tid_tab) @@ -1346,8 +1349,10 @@ static int tid_init(struct tid_info *t) t->stid_tab = (struct serv_entry *)&t->atid_tab[natids]; t->stid_bmap = (unsigned long *)&t->stid_tab[t->nstids + t->nsftids]; t->ftid_tab = (struct filter_entry *)&t->stid_bmap[stid_bmap_size]; + t->ftid_bmap = (unsigned long *)&t->ftid_tab[max_ftids]; spin_lock_init(&t->stid_lock); spin_lock_init(&t->atid_lock); + spin_lock_init(&t->ftid_lock); t->stids_in_use = 0; t->sftids_in_use = 0; @@ -1362,12 +1367,16 @@ static int tid_init(struct tid_info *t) t->atid_tab[natids - 1].next = &t->atid_tab[natids]; t->afree = t->atid_tab; } - bitmap_zero(t->stid_bmap, t->nstids + t->nsftids); - /* Reserve stid 0 for T4/T5 adapters */ - if (!t->stid_base && - (CHELSIO_CHIP_VERSION(adap->params.chip) <= CHELSIO_T5)) - __set_bit(0, t->stid_bmap); + if (is_offload(adap)) { + bitmap_zero(t->stid_bmap, t->nstids + t->nsftids); + /* Reserve stid 0 for T4/T5 adapters */ + if (!t->stid_base && + CHELSIO_CHIP_VERSION(adap->params.chip) <= CHELSIO_T5) + __set_bit(0, t->stid_bmap); + } + + bitmap_zero(t->ftid_bmap, t->nftids); return 0; } @@ -4825,7 +4834,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) i); } - if (is_offload(adapter) && tid_init(&adapter->tids) < 0) { + if (tid_init(&adapter->tids) < 0) { dev_warn(&pdev->dev, "could not allocate TID table, " "continuing\n"); adapter->params.offload = 0; @@ -5012,13 +5021,7 @@ static void remove_one(struct pci_dev *pdev) /* If we allocated filters, free up state associated with any * valid filters ... */ - if (adapter->tids.ftid_tab) { - struct filter_entry *f = &adapter->tids.ftid_tab[0]; - for (i = 0; i < (adapter->tids.nftids + - adapter->tids.nsftids); i++, f++) - if (f->valid) - clear_filter(adapter, f); - } + clear_all_filters(adapter); if (adapter->flags & FULL_INIT_DONE) cxgb_down(adapter); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index b3544f6b88ca..47bd14f602db 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -1,7 +1,7 @@ /* * This file is part of the Chelsio T4 Ethernet driver for Linux. * - * Copyright (c) 2003-2014 Chelsio Communications, Inc. All rights reserved. + * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -106,6 +106,7 @@ struct tid_info { unsigned int atid_base; struct filter_entry *ftid_tab; + unsigned long *ftid_bmap; unsigned int nftids; unsigned int ftid_base; unsigned int aftid_base; @@ -126,6 +127,8 @@ struct tid_info { atomic_t tids_in_use; /* TIDs in the HASH */ atomic_t hash_tids_in_use; + /* lock for setting/clearing filter bitmap */ + spinlock_t ftid_lock; }; static inline void *lookup_tid(const struct tid_info *t, unsigned int tid) @@ -185,6 +188,27 @@ int cxgb4_create_server_filter(const struct net_device *dev, unsigned int stid, int cxgb4_remove_server_filter(const struct net_device *dev, unsigned int stid, unsigned int queue, bool ipv6); +/* Filter operation context to allow callers of cxgb4_set_filter() and + * cxgb4_del_filter() to wait for an asynchronous completion. + */ +struct filter_ctx { + struct completion completion; /* completion rendezvous */ + void *closure; /* caller's opaque information */ + int result; /* result of operation */ + u32 tid; /* to store tid */ +}; + +struct ch_filter_specification; + +int __cxgb4_set_filter(struct net_device *dev, int filter_id, + struct ch_filter_specification *fs, + struct filter_ctx *ctx); +int __cxgb4_del_filter(struct net_device *dev, int filter_id, + struct filter_ctx *ctx); +int cxgb4_set_filter(struct net_device *dev, int filter_id, + struct ch_filter_specification *fs); +int cxgb4_del_filter(struct net_device *dev, int filter_id); + static inline void set_wr_txq(struct sk_buff *skb, int prio, int queue) { skb_set_queue_mapping(skb, (queue << 1) | prio); From 2e8aad7bf20323c6ef0beec859a77c94a082c55d Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Tue, 20 Sep 2016 17:13:08 +0530 Subject: [PATCH 1343/1715] cxgb4: add parser to translate u32 filters to internal spec Parse information sent by u32 into internal filter specification. Add support for parsing several fields in IPv4, IPv6, TCP, and UDP. Signed-off-by: Rahul Lakkireddy Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- .../chelsio/cxgb4/cxgb4_tc_u32_parse.h | 282 ++++++++++++++++++ 1 file changed, 282 insertions(+) create mode 100644 drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h new file mode 100644 index 000000000000..65c20ca51baa --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h @@ -0,0 +1,282 @@ +/* + * This file is part of the Chelsio T4 Ethernet driver for Linux. + * + * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __CXGB4_TC_U32_PARSE_H +#define __CXGB4_TC_U32_PARSE_H + +struct cxgb4_match_field { + int off; /* Offset from the beginning of the header to match */ + /* Fill the value/mask pair in the spec if matched */ + int (*val)(struct ch_filter_specification *f, u32 val, u32 mask); +}; + +/* IPv4 match fields */ +static inline int cxgb4_fill_ipv4_tos(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + f->val.tos = (ntohl(val) >> 16) & 0x000000FF; + f->mask.tos = (ntohl(mask) >> 16) & 0x000000FF; + + return 0; +} + +static inline int cxgb4_fill_ipv4_frag(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + u32 mask_val; + u8 frag_val; + + frag_val = (ntohl(val) >> 13) & 0x00000007; + mask_val = ntohl(mask) & 0x0000FFFF; + + if (frag_val == 0x1 && mask_val != 0x3FFF) { /* MF set */ + f->val.frag = 1; + f->mask.frag = 1; + } else if (frag_val == 0x2 && mask_val != 0x3FFF) { /* DF set */ + f->val.frag = 0; + f->mask.frag = 1; + } else { + return -EINVAL; + } + + return 0; +} + +static inline int cxgb4_fill_ipv4_proto(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + f->val.proto = (ntohl(val) >> 16) & 0x000000FF; + f->mask.proto = (ntohl(mask) >> 16) & 0x000000FF; + + return 0; +} + +static inline int cxgb4_fill_ipv4_src_ip(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.fip[0], &val, sizeof(u32)); + memcpy(&f->mask.fip[0], &mask, sizeof(u32)); + + return 0; +} + +static inline int cxgb4_fill_ipv4_dst_ip(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.lip[0], &val, sizeof(u32)); + memcpy(&f->mask.lip[0], &mask, sizeof(u32)); + + return 0; +} + +static const struct cxgb4_match_field cxgb4_ipv4_fields[] = { + { .off = 0, .val = cxgb4_fill_ipv4_tos }, + { .off = 4, .val = cxgb4_fill_ipv4_frag }, + { .off = 8, .val = cxgb4_fill_ipv4_proto }, + { .off = 12, .val = cxgb4_fill_ipv4_src_ip }, + { .off = 16, .val = cxgb4_fill_ipv4_dst_ip }, + { .val = NULL } +}; + +/* IPv6 match fields */ +static inline int cxgb4_fill_ipv6_tos(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + f->val.tos = (ntohl(val) >> 20) & 0x000000FF; + f->mask.tos = (ntohl(mask) >> 20) & 0x000000FF; + + return 0; +} + +static inline int cxgb4_fill_ipv6_proto(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + f->val.proto = (ntohl(val) >> 8) & 0x000000FF; + f->mask.proto = (ntohl(mask) >> 8) & 0x000000FF; + + return 0; +} + +static inline int cxgb4_fill_ipv6_src_ip0(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.fip[0], &val, sizeof(u32)); + memcpy(&f->mask.fip[0], &mask, sizeof(u32)); + + return 0; +} + +static inline int cxgb4_fill_ipv6_src_ip1(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.fip[4], &val, sizeof(u32)); + memcpy(&f->mask.fip[4], &mask, sizeof(u32)); + + return 0; +} + +static inline int cxgb4_fill_ipv6_src_ip2(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.fip[8], &val, sizeof(u32)); + memcpy(&f->mask.fip[8], &mask, sizeof(u32)); + + return 0; +} + +static inline int cxgb4_fill_ipv6_src_ip3(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.fip[12], &val, sizeof(u32)); + memcpy(&f->mask.fip[12], &mask, sizeof(u32)); + + return 0; +} + +static inline int cxgb4_fill_ipv6_dst_ip0(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.lip[0], &val, sizeof(u32)); + memcpy(&f->mask.lip[0], &mask, sizeof(u32)); + + return 0; +} + +static inline int cxgb4_fill_ipv6_dst_ip1(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.lip[4], &val, sizeof(u32)); + memcpy(&f->mask.lip[4], &mask, sizeof(u32)); + + return 0; +} + +static inline int cxgb4_fill_ipv6_dst_ip2(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.lip[8], &val, sizeof(u32)); + memcpy(&f->mask.lip[8], &mask, sizeof(u32)); + + return 0; +} + +static inline int cxgb4_fill_ipv6_dst_ip3(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.lip[12], &val, sizeof(u32)); + memcpy(&f->mask.lip[12], &mask, sizeof(u32)); + + return 0; +} + +static const struct cxgb4_match_field cxgb4_ipv6_fields[] = { + { .off = 0, .val = cxgb4_fill_ipv6_tos }, + { .off = 4, .val = cxgb4_fill_ipv6_proto }, + { .off = 8, .val = cxgb4_fill_ipv6_src_ip0 }, + { .off = 12, .val = cxgb4_fill_ipv6_src_ip1 }, + { .off = 16, .val = cxgb4_fill_ipv6_src_ip2 }, + { .off = 20, .val = cxgb4_fill_ipv6_src_ip3 }, + { .off = 24, .val = cxgb4_fill_ipv6_dst_ip0 }, + { .off = 28, .val = cxgb4_fill_ipv6_dst_ip1 }, + { .off = 32, .val = cxgb4_fill_ipv6_dst_ip2 }, + { .off = 36, .val = cxgb4_fill_ipv6_dst_ip3 }, + { .val = NULL } +}; + +/* TCP/UDP match */ +static inline int cxgb4_fill_l4_ports(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + f->val.fport = ntohl(val) >> 16; + f->mask.fport = ntohl(mask) >> 16; + f->val.lport = ntohl(val) & 0x0000FFFF; + f->mask.lport = ntohl(mask) & 0x0000FFFF; + + return 0; +}; + +static const struct cxgb4_match_field cxgb4_tcp_fields[] = { + { .off = 0, .val = cxgb4_fill_l4_ports }, + { .val = NULL } +}; + +static const struct cxgb4_match_field cxgb4_udp_fields[] = { + { .off = 0, .val = cxgb4_fill_l4_ports }, + { .val = NULL } +}; + +struct cxgb4_next_header { + unsigned int offset; /* Offset to next header */ + /* offset, shift, and mask added to offset above + * to get to next header. Useful when using a header + * field's value to jump to next header such as IHL field + * in IPv4 header. + */ + unsigned int offoff; + u32 shift; + u32 mask; + /* match criteria to make this jump */ + unsigned int match_off; + u32 match_val; + u32 match_mask; + /* location of jump to make */ + const struct cxgb4_match_field *jump; +}; + +/* Accept a rule with a jump to transport layer header based on IHL field in + * IPv4 header. + */ +static const struct cxgb4_next_header cxgb4_ipv4_jumps[] = { + { .offset = 0, .offoff = 0, .shift = 6, .mask = 0xF, + .match_off = 8, .match_val = 0x600, .match_mask = 0xFF00, + .jump = cxgb4_tcp_fields }, + { .offset = 0, .offoff = 0, .shift = 6, .mask = 0xF, + .match_off = 8, .match_val = 0x1100, .match_mask = 0xFF00, + .jump = cxgb4_udp_fields }, + { .jump = NULL } +}; + +/* Accept a rule with a jump directly past the 40 Bytes of IPv6 fixed header + * to get to transport layer header. + */ +static const struct cxgb4_next_header cxgb4_ipv6_jumps[] = { + { .offset = 0x28, .offoff = 0, .shift = 0, .mask = 0, + .match_off = 4, .match_val = 0x60000, .match_mask = 0xFF0000, + .jump = cxgb4_tcp_fields }, + { .offset = 0x28, .offoff = 0, .shift = 0, .mask = 0, + .match_off = 4, .match_val = 0x110000, .match_mask = 0xFF0000, + .jump = cxgb4_udp_fields }, + { .jump = NULL } +}; +#endif /* __CXGB4_TC_U32_PARSE_H */ From d8931847488d250e27d8f18ca6b7373e9f981d7a Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Tue, 20 Sep 2016 17:13:09 +0530 Subject: [PATCH 1344/1715] cxgb4: add support for offloading u32 filters Add support for offloading u32 filter onto hardware. Links are stored in a jump table to perform necessary jumps to match TCP/UDP header. When inserting rules in the linked bucket, the TCP/UDP match fields in the corresponding entry of the jump table are appended to the filter rule before insertion. If a link is deleted, then all corresponding filters associated with the link are also deleted. Also enable hardware tc offload as a supported feature. Signed-off-by: Rahul Lakkireddy Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/Makefile | 2 +- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 3 + .../net/ethernet/chelsio/cxgb4/cxgb4_main.c | 41 +- .../net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c | 412 ++++++++++++++++++ .../net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.h | 57 +++ .../chelsio/cxgb4/cxgb4_tc_u32_parse.h | 12 + 6 files changed, 525 insertions(+), 2 deletions(-) create mode 100644 drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c create mode 100644 drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.h diff --git a/drivers/net/ethernet/chelsio/cxgb4/Makefile b/drivers/net/ethernet/chelsio/cxgb4/Makefile index da889817ec27..c6b71f656992 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/Makefile +++ b/drivers/net/ethernet/chelsio/cxgb4/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_CHELSIO_T4) += cxgb4.o -cxgb4-objs := cxgb4_main.o l2t.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o cxgb4_uld.o sched.o cxgb4_filter.o +cxgb4-objs := cxgb4_main.o l2t.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o cxgb4_uld.o sched.o cxgb4_filter.o cxgb4_tc_u32.o cxgb4-$(CONFIG_CHELSIO_T4_DCB) += cxgb4_dcb.o cxgb4-$(CONFIG_CHELSIO_T4_FCOE) += cxgb4_fcoe.o cxgb4-$(CONFIG_DEBUG_FS) += cxgb4_debugfs.o diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 51edb126da65..ea0d1f188802 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -851,6 +851,9 @@ struct adapter { spinlock_t stats_lock; spinlock_t win0_lock ____cacheline_aligned_in_smp; + + /* TC u32 offload */ + struct cxgb4_tc_u32_table *tc_u32; }; /* Support for "sched-class" command to allow a TX Scheduling Class to be diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index e97daa0510e5..1be4d235a576 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -78,6 +78,7 @@ #include "clip_tbl.h" #include "l2t.h" #include "sched.h" +#include "cxgb4_tc_u32.h" char cxgb4_driver_name[] = KBUILD_MODNAME; @@ -2711,6 +2712,35 @@ static int cxgb_set_tx_maxrate(struct net_device *dev, int index, u32 rate) return err; } +int cxgb_setup_tc(struct net_device *dev, u32 handle, __be16 proto, + struct tc_to_netdev *tc) +{ + struct port_info *pi = netdev2pinfo(dev); + struct adapter *adap = netdev2adap(dev); + + if (!(adap->flags & FULL_INIT_DONE)) { + dev_err(adap->pdev_dev, + "Failed to setup tc on port %d. Link Down?\n", + pi->port_id); + return -EINVAL; + } + + if (TC_H_MAJ(handle) == TC_H_MAJ(TC_H_INGRESS) && + tc->type == TC_SETUP_CLSU32) { + switch (tc->cls_u32->command) { + case TC_CLSU32_NEW_KNODE: + case TC_CLSU32_REPLACE_KNODE: + return cxgb4_config_knode(dev, proto, tc->cls_u32); + case TC_CLSU32_DELETE_KNODE: + return cxgb4_delete_knode(dev, proto, tc->cls_u32); + default: + return -EOPNOTSUPP; + } + } + + return -EOPNOTSUPP; +} + static const struct net_device_ops cxgb4_netdev_ops = { .ndo_open = cxgb_open, .ndo_stop = cxgb_close, @@ -2734,6 +2764,7 @@ static const struct net_device_ops cxgb4_netdev_ops = { .ndo_busy_poll = cxgb_busy_poll, #endif .ndo_set_tx_maxrate = cxgb_set_tx_maxrate, + .ndo_setup_tc = cxgb_setup_tc, }; #ifdef CONFIG_PCI_IOV @@ -4406,6 +4437,7 @@ static void free_some_resources(struct adapter *adapter) t4_free_mem(adapter->l2t); t4_cleanup_sched(adapter); t4_free_mem(adapter->tids.tid_tab); + cxgb4_cleanup_tc_u32(adapter); kfree(adapter->sge.egr_map); kfree(adapter->sge.ingr_map); kfree(adapter->sge.starving_fl); @@ -4750,7 +4782,8 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->hw_features = NETIF_F_SG | TSO_FLAGS | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM | NETIF_F_RXHASH | - NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; + NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_TC; if (highdma) netdev->hw_features |= NETIF_F_HIGHDMA; netdev->features |= netdev->hw_features; @@ -4838,6 +4871,12 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) dev_warn(&pdev->dev, "could not allocate TID table, " "continuing\n"); adapter->params.offload = 0; + } else { + adapter->tc_u32 = cxgb4_init_tc_u32(adapter, + CXGB4_MAX_LINK_HANDLE); + if (!adapter->tc_u32) + dev_warn(&pdev->dev, + "could not offload tc u32, continuing\n"); } if (is_offload(adapter)) { diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c new file mode 100644 index 000000000000..d63b8955ea05 --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c @@ -0,0 +1,412 @@ +/* + * This file is part of the Chelsio T4 Ethernet driver for Linux. + * + * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "cxgb4.h" +#include "cxgb4_tc_u32_parse.h" +#include "cxgb4_tc_u32.h" + +/* Fill ch_filter_specification with parsed match value/mask pair. */ +static int fill_match_fields(struct adapter *adap, + struct ch_filter_specification *fs, + struct tc_cls_u32_offload *cls, + const struct cxgb4_match_field *entry, + bool next_header) +{ + unsigned int i, j; + u32 val, mask; + int off, err; + bool found; + + for (i = 0; i < cls->knode.sel->nkeys; i++) { + off = cls->knode.sel->keys[i].off; + val = cls->knode.sel->keys[i].val; + mask = cls->knode.sel->keys[i].mask; + + if (next_header) { + /* For next headers, parse only keys with offmask */ + if (!cls->knode.sel->keys[i].offmask) + continue; + } else { + /* For the remaining, parse only keys without offmask */ + if (cls->knode.sel->keys[i].offmask) + continue; + } + + found = false; + + for (j = 0; entry[j].val; j++) { + if (off == entry[j].off) { + found = true; + err = entry[j].val(fs, val, mask); + if (err) + return err; + break; + } + } + + if (!found) + return -EINVAL; + } + + return 0; +} + +int cxgb4_config_knode(struct net_device *dev, __be16 protocol, + struct tc_cls_u32_offload *cls) +{ + const struct cxgb4_match_field *start, *link_start = NULL; + struct adapter *adapter = netdev2adap(dev); + struct ch_filter_specification fs; + struct cxgb4_tc_u32_table *t; + struct cxgb4_link *link; + unsigned int filter_id; + u32 uhtid, link_uhtid; + bool is_ipv6 = false; + int ret; + + if (!can_tc_u32_offload(dev)) + return -EOPNOTSUPP; + + if (protocol != htons(ETH_P_IP) && protocol != htons(ETH_P_IPV6)) + return -EOPNOTSUPP; + + /* Fetch the location to insert the filter. */ + filter_id = cls->knode.handle & 0xFFFFF; + + if (filter_id > adapter->tids.nftids) { + dev_err(adapter->pdev_dev, + "Location %d out of range for insertion. Max: %d\n", + filter_id, adapter->tids.nftids); + return -ERANGE; + } + + t = adapter->tc_u32; + uhtid = TC_U32_USERHTID(cls->knode.handle); + link_uhtid = TC_U32_USERHTID(cls->knode.link_handle); + + /* Ensure that uhtid is either root u32 (i.e. 0x800) + * or a a valid linked bucket. + */ + if (uhtid != 0x800 && uhtid >= t->size) + return -EINVAL; + + /* Ensure link handle uhtid is sane, if specified. */ + if (link_uhtid >= t->size) + return -EINVAL; + + memset(&fs, 0, sizeof(fs)); + + if (protocol == htons(ETH_P_IPV6)) { + start = cxgb4_ipv6_fields; + is_ipv6 = true; + } else { + start = cxgb4_ipv4_fields; + is_ipv6 = false; + } + + if (uhtid != 0x800) { + /* Link must exist from root node before insertion. */ + if (!t->table[uhtid - 1].link_handle) + return -EINVAL; + + /* Link must have a valid supported next header. */ + link_start = t->table[uhtid - 1].match_field; + if (!link_start) + return -EINVAL; + } + + /* Parse links and record them for subsequent jumps to valid + * next headers. + */ + if (link_uhtid) { + const struct cxgb4_next_header *next; + bool found = false; + unsigned int i, j; + u32 val, mask; + int off; + + if (t->table[link_uhtid - 1].link_handle) { + dev_err(adapter->pdev_dev, + "Link handle exists for: 0x%x\n", + link_uhtid); + return -EINVAL; + } + + next = is_ipv6 ? cxgb4_ipv6_jumps : cxgb4_ipv4_jumps; + + /* Try to find matches that allow jumps to next header. */ + for (i = 0; next[i].jump; i++) { + if (next[i].offoff != cls->knode.sel->offoff || + next[i].shift != cls->knode.sel->offshift || + next[i].mask != cls->knode.sel->offmask || + next[i].offset != cls->knode.sel->off) + continue; + + /* Found a possible candidate. Find a key that + * matches the corresponding offset, value, and + * mask to jump to next header. + */ + for (j = 0; j < cls->knode.sel->nkeys; j++) { + off = cls->knode.sel->keys[j].off; + val = cls->knode.sel->keys[j].val; + mask = cls->knode.sel->keys[j].mask; + + if (next[i].match_off == off && + next[i].match_val == val && + next[i].match_mask == mask) { + found = true; + break; + } + } + + if (!found) + continue; /* Try next candidate. */ + + /* Candidate to jump to next header found. + * Translate all keys to internal specification + * and store them in jump table. This spec is copied + * later to set the actual filters. + */ + ret = fill_match_fields(adapter, &fs, cls, + start, false); + if (ret) + goto out; + + link = &t->table[link_uhtid - 1]; + link->match_field = next[i].jump; + link->link_handle = cls->knode.handle; + memcpy(&link->fs, &fs, sizeof(fs)); + break; + } + + /* No candidate found to jump to next header. */ + if (!found) + return -EINVAL; + + return 0; + } + + /* Fill ch_filter_specification match fields to be shipped to hardware. + * Copy the linked spec (if any) first. And then update the spec as + * needed. + */ + if (uhtid != 0x800 && t->table[uhtid - 1].link_handle) { + /* Copy linked ch_filter_specification */ + memcpy(&fs, &t->table[uhtid - 1].fs, sizeof(fs)); + ret = fill_match_fields(adapter, &fs, cls, + link_start, true); + if (ret) + goto out; + } + + ret = fill_match_fields(adapter, &fs, cls, start, false); + if (ret) + goto out; + + /* The filter spec has been completely built from the info + * provided from u32. We now set some default fields in the + * spec for sanity. + */ + + /* Match only packets coming from the ingress port where this + * filter will be created. + */ + fs.val.iport = netdev2pinfo(dev)->port_id; + fs.mask.iport = ~0; + + /* Enable filter hit counts. */ + fs.hitcnts = 1; + + /* Set type of filter - IPv6 or IPv4 */ + fs.type = is_ipv6 ? 1 : 0; + + /* Set the filter */ + ret = cxgb4_set_filter(dev, filter_id, &fs); + if (ret) + goto out; + + /* If this is a linked bucket, then set the corresponding + * entry in the bitmap to mark it as belonging to this linked + * bucket. + */ + if (uhtid != 0x800 && t->table[uhtid - 1].link_handle) + set_bit(filter_id, t->table[uhtid - 1].tid_map); + +out: + return ret; +} + +int cxgb4_delete_knode(struct net_device *dev, __be16 protocol, + struct tc_cls_u32_offload *cls) +{ + struct adapter *adapter = netdev2adap(dev); + unsigned int filter_id, max_tids, i, j; + struct cxgb4_link *link = NULL; + struct cxgb4_tc_u32_table *t; + u32 handle, uhtid; + int ret; + + if (!can_tc_u32_offload(dev)) + return -EOPNOTSUPP; + + /* Fetch the location to delete the filter. */ + filter_id = cls->knode.handle & 0xFFFFF; + + if (filter_id > adapter->tids.nftids) { + dev_err(adapter->pdev_dev, + "Location %d out of range for deletion. Max: %d\n", + filter_id, adapter->tids.nftids); + return -ERANGE; + } + + t = adapter->tc_u32; + handle = cls->knode.handle; + uhtid = TC_U32_USERHTID(cls->knode.handle); + + /* Ensure that uhtid is either root u32 (i.e. 0x800) + * or a a valid linked bucket. + */ + if (uhtid != 0x800 && uhtid >= t->size) + return -EINVAL; + + /* Delete the specified filter */ + if (uhtid != 0x800) { + link = &t->table[uhtid - 1]; + if (!link->link_handle) + return -EINVAL; + + if (!test_bit(filter_id, link->tid_map)) + return -EINVAL; + } + + ret = cxgb4_del_filter(dev, filter_id); + if (ret) + goto out; + + if (link) + clear_bit(filter_id, link->tid_map); + + /* If a link is being deleted, then delete all filters + * associated with the link. + */ + max_tids = adapter->tids.nftids; + for (i = 0; i < t->size; i++) { + link = &t->table[i]; + + if (link->link_handle == handle) { + for (j = 0; j < max_tids; j++) { + if (!test_bit(j, link->tid_map)) + continue; + + ret = __cxgb4_del_filter(dev, j, NULL); + if (ret) + goto out; + + clear_bit(j, link->tid_map); + } + + /* Clear the link state */ + link->match_field = NULL; + link->link_handle = 0; + memset(&link->fs, 0, sizeof(link->fs)); + break; + } + } + +out: + return ret; +} + +void cxgb4_cleanup_tc_u32(struct adapter *adap) +{ + struct cxgb4_tc_u32_table *t; + unsigned int i; + + if (!adap->tc_u32) + return; + + /* Free up all allocated memory. */ + t = adap->tc_u32; + for (i = 0; i < t->size; i++) { + struct cxgb4_link *link = &t->table[i]; + + t4_free_mem(link->tid_map); + } + t4_free_mem(adap->tc_u32); +} + +struct cxgb4_tc_u32_table *cxgb4_init_tc_u32(struct adapter *adap, + unsigned int size) +{ + struct cxgb4_tc_u32_table *t; + unsigned int i; + + if (!size) + return NULL; + + t = t4_alloc_mem(sizeof(*t) + + (size * sizeof(struct cxgb4_link))); + if (!t) + return NULL; + + t->size = size; + + for (i = 0; i < t->size; i++) { + struct cxgb4_link *link = &t->table[i]; + unsigned int bmap_size; + unsigned int max_tids; + + max_tids = adap->tids.nftids; + bmap_size = BITS_TO_LONGS(max_tids); + link->tid_map = t4_alloc_mem(sizeof(unsigned long) * bmap_size); + if (!link->tid_map) + goto out_no_mem; + bitmap_zero(link->tid_map, max_tids); + } + + return t; + +out_no_mem: + for (i = 0; i < t->size; i++) { + struct cxgb4_link *link = &t->table[i]; + + if (link->tid_map) + t4_free_mem(link->tid_map); + } + + if (t) + t4_free_mem(t); + + return NULL; +} diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.h new file mode 100644 index 000000000000..6bdc885eff22 --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.h @@ -0,0 +1,57 @@ +/* + * This file is part of the Chelsio T4 Ethernet driver for Linux. + * + * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __CXGB4_TC_U32_H +#define __CXGB4_TC_U32_H + +#include + +#define CXGB4_MAX_LINK_HANDLE 32 + +static inline bool can_tc_u32_offload(struct net_device *dev) +{ + struct adapter *adap = netdev2adap(dev); + + return (dev->features & NETIF_F_HW_TC) && adap->tc_u32 ? true : false; +} + +int cxgb4_config_knode(struct net_device *dev, __be16 protocol, + struct tc_cls_u32_offload *cls); +int cxgb4_delete_knode(struct net_device *dev, __be16 protocol, + struct tc_cls_u32_offload *cls); + +void cxgb4_cleanup_tc_u32(struct adapter *adapter); +struct cxgb4_tc_u32_table *cxgb4_init_tc_u32(struct adapter *adap, + unsigned int size); +#endif /* __CXGB4_TC_U32_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h index 65c20ca51baa..a4b99edcc339 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h @@ -279,4 +279,16 @@ static const struct cxgb4_next_header cxgb4_ipv6_jumps[] = { .jump = cxgb4_udp_fields }, { .jump = NULL } }; + +struct cxgb4_link { + const struct cxgb4_match_field *match_field; /* Next header */ + struct ch_filter_specification fs; /* Match spec associated with link */ + u32 link_handle; /* Knode handle associated with the link */ + unsigned long *tid_map; /* Bitmap for filter tids */ +}; + +struct cxgb4_tc_u32_table { + unsigned int size; /* number of entries in table */ + struct cxgb4_link table[0]; /* Jump table */ +}; #endif /* __CXGB4_TC_U32_PARSE_H */ From b20ff726fa8360a0508d2d79ecdee5a45d854e99 Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Tue, 20 Sep 2016 17:13:10 +0530 Subject: [PATCH 1345/1715] cxgb4: add support for drop and redirect actions Add support for dropping matched packets in hardware. Also add support for re-directing matched packets to a specified port in hardware. Signed-off-by: Rahul Lakkireddy Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- .../net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c | 71 +++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c index d63b8955ea05..49d2debb334e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c @@ -32,6 +32,9 @@ * SOFTWARE. */ +#include +#include + #include "cxgb4.h" #include "cxgb4_tc_u32_parse.h" #include "cxgb4_tc_u32.h" @@ -82,6 +85,67 @@ static int fill_match_fields(struct adapter *adap, return 0; } +/* Fill ch_filter_specification with parsed action. */ +static int fill_action_fields(struct adapter *adap, + struct ch_filter_specification *fs, + struct tc_cls_u32_offload *cls) +{ + unsigned int num_actions = 0; + const struct tc_action *a; + struct tcf_exts *exts; + LIST_HEAD(actions); + + exts = cls->knode.exts; + if (tc_no_actions(exts)) + return -EINVAL; + + tcf_exts_to_list(exts, &actions); + list_for_each_entry(a, &actions, list) { + /* Don't allow more than one action per rule. */ + if (num_actions) + return -EINVAL; + + /* Drop in hardware. */ + if (is_tcf_gact_shot(a)) { + fs->action = FILTER_DROP; + num_actions++; + continue; + } + + /* Re-direct to specified port in hardware. */ + if (is_tcf_mirred_redirect(a)) { + struct net_device *n_dev; + unsigned int i, index; + bool found = false; + + index = tcf_mirred_ifindex(a); + for_each_port(adap, i) { + n_dev = adap->port[i]; + if (index == n_dev->ifindex) { + fs->action = FILTER_SWITCH; + fs->eport = i; + found = true; + break; + } + } + + /* Interface doesn't belong to any port of + * the underlying hardware. + */ + if (!found) + return -EINVAL; + + num_actions++; + continue; + } + + /* Un-supported action. */ + return -EINVAL; + } + + return 0; +} + int cxgb4_config_knode(struct net_device *dev, __be16 protocol, struct tc_cls_u32_offload *cls) { @@ -234,6 +298,13 @@ int cxgb4_config_knode(struct net_device *dev, __be16 protocol, if (ret) goto out; + /* Fill ch_filter_specification action fields to be shipped to + * hardware. + */ + ret = fill_action_fields(adapter, &fs, cls); + if (ret) + goto out; + /* The filter spec has been completely built from the info * provided from u32. We now set some default fields in the * spec for sanity. From e82f71489ffb325a9b7bca367b06a39315452dfe Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Tue, 20 Sep 2016 23:53:24 +0800 Subject: [PATCH 1346/1715] net: ethernet: mediatek: fix missing changes merged for conflicts overlapping commits add the missing commits about 1) Commit d3bd1ce4db8e843dce421e2f8f123e5251a9c7d3 ("remove redundant free_irq for devm_request_ir allocated irq") 2) Commit 7c6b0d76fa02213393815e3b6d5e4a415bf3f0e2 ("fix logic unbalance between probe and remove") during merge for conflicts overlapping commits by Commit b20b378d49926b82c0a131492fa8842156e0e8a9 ("Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net") Signed-off-by: Sean Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index ca6b5013e275..2909372c4da0 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1894,11 +1894,8 @@ static void mtk_uninit(struct net_device *dev) struct mtk_eth *eth = mac->hw; phy_disconnect(mac->phy_dev); - mtk_mdio_cleanup(eth); mtk_irq_disable(eth, MTK_QDMA_INT_MASK, ~0); mtk_irq_disable(eth, MTK_PDMA_INT_MASK, ~0); - free_irq(eth->irq[1], dev); - free_irq(eth->irq[2], dev); } static int mtk_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) @@ -2454,6 +2451,7 @@ static int mtk_remove(struct platform_device *pdev) netif_napi_del(ð->tx_napi); netif_napi_del(ð->rx_napi); mtk_cleanup(eth); + mtk_mdio_cleanup(eth); return 0; } From 262b38cdb3e47d402f4fdf76fcf3e8c4c8380a52 Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Tue, 20 Sep 2016 22:30:11 +0200 Subject: [PATCH 1347/1715] net: ethernet: hisilicon: hns: use phydev from struct net_device The private structure contain a pointer to phydev, but the structure net_device already contain such pointer. So we can remove the pointer phydev in the private structure, and update the driver to use the one contained in struct net_device. Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 23 +++++-------- drivers/net/ethernet/hisilicon/hns/hns_enet.h | 1 - .../net/ethernet/hisilicon/hns/hns_ethtool.c | 33 +++++++++---------- 3 files changed, 24 insertions(+), 33 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index d7e1f8c7ae92..059aaeda46b1 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -994,10 +994,10 @@ static void hns_nic_adjust_link(struct net_device *ndev) struct hnae_handle *h = priv->ae_handle; int state = 1; - if (priv->phy) { + if (ndev->phydev) { h->dev->ops->adjust_link(h, ndev->phydev->speed, ndev->phydev->duplex); - state = priv->phy->link; + state = ndev->phydev->link; } state = state && h->dev->ops->get_status(h); @@ -1022,7 +1022,6 @@ static void hns_nic_adjust_link(struct net_device *ndev) */ int hns_nic_init_phy(struct net_device *ndev, struct hnae_handle *h) { - struct hns_nic_priv *priv = netdev_priv(ndev); struct phy_device *phy_dev = h->phy_dev; int ret; @@ -1046,8 +1045,6 @@ int hns_nic_init_phy(struct net_device *ndev, struct hnae_handle *h) if (h->phy_if == PHY_INTERFACE_MODE_XGMII) phy_dev->autoneg = false; - priv->phy = phy_dev; - return 0; } @@ -1224,8 +1221,8 @@ static int hns_nic_net_up(struct net_device *ndev) if (ret) goto out_start_err; - if (priv->phy) - phy_start(priv->phy); + if (ndev->phydev) + phy_start(ndev->phydev); clear_bit(NIC_STATE_DOWN, &priv->state); (void)mod_timer(&priv->service_timer, jiffies + SERVICE_TIMER_HZ); @@ -1259,8 +1256,8 @@ static void hns_nic_net_down(struct net_device *ndev) netif_tx_disable(ndev); priv->link = 0; - if (priv->phy) - phy_stop(priv->phy); + if (ndev->phydev) + phy_stop(ndev->phydev); ops = priv->ae_handle->dev->ops; @@ -1359,8 +1356,7 @@ static void hns_nic_net_timeout(struct net_device *ndev) static int hns_nic_do_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) { - struct hns_nic_priv *priv = netdev_priv(netdev); - struct phy_device *phy_dev = priv->phy; + struct phy_device *phy_dev = netdev->phydev; if (!netif_running(netdev)) return -EINVAL; @@ -2017,9 +2013,8 @@ static int hns_nic_dev_remove(struct platform_device *pdev) hns_nic_uninit_ring_data(priv); priv->ring_data = NULL; - if (priv->phy) - phy_disconnect(priv->phy); - priv->phy = NULL; + if (ndev->phydev) + phy_disconnect(ndev->phydev); if (!IS_ERR_OR_NULL(priv->ae_handle)) hnae_put_handle(priv->ae_handle); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.h b/drivers/net/ethernet/hisilicon/hns/hns_enet.h index 44bb3015eed3..5b412de350aa 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.h @@ -59,7 +59,6 @@ struct hns_nic_priv { u32 port_id; int phy_mode; int phy_led_val; - struct phy_device *phy; struct net_device *netdev; struct device *dev; struct hnae_handle *ae_handle; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c index 5eb3245bdf86..0e2c17423a34 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c @@ -48,9 +48,9 @@ static u32 hns_nic_get_link(struct net_device *net_dev) h = priv->ae_handle; - if (priv->phy) { - if (!genphy_read_status(priv->phy)) - link_stat = priv->phy->link; + if (net_dev->phydev) { + if (!genphy_read_status(net_dev->phydev)) + link_stat = net_dev->phydev->link; else link_stat = 0; } @@ -67,8 +67,7 @@ static void hns_get_mdix_mode(struct net_device *net_dev, struct ethtool_cmd *cmd) { int mdix_ctrl, mdix, retval, is_resolved; - struct hns_nic_priv *priv = netdev_priv(net_dev); - struct phy_device *phy_dev = priv->phy; + struct phy_device *phy_dev = net_dev->phydev; if (!phy_dev || !phy_dev->mdio.bus) { cmd->eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID; @@ -144,8 +143,8 @@ static int hns_nic_get_settings(struct net_device *net_dev, ethtool_cmd_speed_set(cmd, speed); cmd->duplex = duplex; - if (priv->phy) - (void)phy_ethtool_gset(priv->phy, cmd); + if (net_dev->phydev) + (void)phy_ethtool_gset(net_dev->phydev, cmd); link_stat = hns_nic_get_link(net_dev); if (!link_stat) { @@ -215,13 +214,13 @@ static int hns_nic_set_settings(struct net_device *net_dev, cmd->duplex != DUPLEX_FULL) return -EINVAL; } else if (h->phy_if == PHY_INTERFACE_MODE_SGMII) { - if (!priv->phy && cmd->autoneg == AUTONEG_ENABLE) + if (!net_dev->phydev && cmd->autoneg == AUTONEG_ENABLE) return -EINVAL; if (speed == SPEED_1000 && cmd->duplex == DUPLEX_HALF) return -EINVAL; - if (priv->phy) - return phy_ethtool_sset(priv->phy, cmd); + if (net_dev->phydev) + return phy_ethtool_sset(net_dev->phydev, cmd); if ((speed != SPEED_10 && speed != SPEED_100 && speed != SPEED_1000) || (cmd->duplex != DUPLEX_HALF && @@ -305,7 +304,7 @@ static int __lb_setup(struct net_device *ndev, { int ret = 0; struct hns_nic_priv *priv = netdev_priv(ndev); - struct phy_device *phy_dev = priv->phy; + struct phy_device *phy_dev = ndev->phydev; struct hnae_handle *h = priv->ae_handle; switch (loop) { @@ -910,7 +909,7 @@ void hns_get_strings(struct net_device *netdev, u32 stringset, u8 *data) memcpy(buff, hns_nic_test_strs[MAC_INTERNALLOOP_SERDES], ETH_GSTRING_LEN); buff += ETH_GSTRING_LEN; - if ((priv->phy) && (!priv->phy->is_c45)) + if ((netdev->phydev) && (!netdev->phydev->is_c45)) memcpy(buff, hns_nic_test_strs[MAC_INTERNALLOOP_PHY], ETH_GSTRING_LEN); @@ -996,7 +995,7 @@ int hns_get_sset_count(struct net_device *netdev, int stringset) if (priv->ae_handle->phy_if == PHY_INTERFACE_MODE_XGMII) cnt--; - if ((!priv->phy) || (priv->phy->is_c45)) + if ((!netdev->phydev) || (netdev->phydev->is_c45)) cnt--; return cnt; @@ -1015,8 +1014,7 @@ int hns_get_sset_count(struct net_device *netdev, int stringset) int hns_phy_led_set(struct net_device *netdev, int value) { int retval; - struct hns_nic_priv *priv = netdev_priv(netdev); - struct phy_device *phy_dev = priv->phy; + struct phy_device *phy_dev = netdev->phydev; retval = phy_write(phy_dev, HNS_PHY_PAGE_REG, HNS_PHY_PAGE_LED); retval |= phy_write(phy_dev, HNS_LED_FC_REG, value); @@ -1039,7 +1037,7 @@ int hns_set_phys_id(struct net_device *netdev, enum ethtool_phys_id_state state) { struct hns_nic_priv *priv = netdev_priv(netdev); struct hnae_handle *h = priv->ae_handle; - struct phy_device *phy_dev = priv->phy; + struct phy_device *phy_dev = netdev->phydev; int ret; if (phy_dev) @@ -1159,8 +1157,7 @@ static int hns_get_regs_len(struct net_device *net_dev) static int hns_nic_nway_reset(struct net_device *netdev) { int ret = 0; - struct hns_nic_priv *priv = netdev_priv(netdev); - struct phy_device *phy = priv->phy; + struct phy_device *phy = netdev->phydev; if (netif_running(netdev)) { if (phy) From d270f76c2d6ee3d96cfb1affb78a3d536e0b8fd6 Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Tue, 20 Sep 2016 22:30:12 +0200 Subject: [PATCH 1348/1715] net: ethernet: hisilicon: hns: use new api ethtool_{get|set}_link_ksettings The ethtool api {get|set}_settings is deprecated. We move this driver to new api {get|set}_link_ksettings. Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- .../net/ethernet/hisilicon/hns/hns_ethtool.c | 105 ++++++++++-------- 1 file changed, 58 insertions(+), 47 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c index 0e2c17423a34..47e59bbfd061 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c @@ -64,14 +64,14 @@ static u32 hns_nic_get_link(struct net_device *net_dev) } static void hns_get_mdix_mode(struct net_device *net_dev, - struct ethtool_cmd *cmd) + struct ethtool_link_ksettings *cmd) { int mdix_ctrl, mdix, retval, is_resolved; struct phy_device *phy_dev = net_dev->phydev; if (!phy_dev || !phy_dev->mdio.bus) { - cmd->eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID; - cmd->eth_tp_mdix = ETH_TP_MDI_INVALID; + cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID; + cmd->base.eth_tp_mdix = ETH_TP_MDI_INVALID; return; } @@ -88,35 +88,35 @@ static void hns_get_mdix_mode(struct net_device *net_dev, switch (mdix_ctrl) { case 0x0: - cmd->eth_tp_mdix_ctrl = ETH_TP_MDI; + cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI; break; case 0x1: - cmd->eth_tp_mdix_ctrl = ETH_TP_MDI_X; + cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_X; break; case 0x3: - cmd->eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO; + cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO; break; default: - cmd->eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID; + cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID; break; } if (!is_resolved) - cmd->eth_tp_mdix = ETH_TP_MDI_INVALID; + cmd->base.eth_tp_mdix = ETH_TP_MDI_INVALID; else if (mdix) - cmd->eth_tp_mdix = ETH_TP_MDI_X; + cmd->base.eth_tp_mdix = ETH_TP_MDI_X; else - cmd->eth_tp_mdix = ETH_TP_MDI; + cmd->base.eth_tp_mdix = ETH_TP_MDI; } /** - *hns_nic_get_settings - implement ethtool get settings + *hns_nic_get_link_ksettings - implement ethtool get link ksettings *@net_dev: net_device - *@cmd: ethtool_cmd + *@cmd: ethtool_link_ksettings *retuen 0 - success , negative --fail */ -static int hns_nic_get_settings(struct net_device *net_dev, - struct ethtool_cmd *cmd) +static int hns_nic_get_link_ksettings(struct net_device *net_dev, + struct ethtool_link_ksettings *cmd) { struct hns_nic_priv *priv = netdev_priv(net_dev); struct hnae_handle *h; @@ -124,6 +124,7 @@ static int hns_nic_get_settings(struct net_device *net_dev, int ret; u8 duplex; u16 speed; + u32 supported, advertising; if (!priv || !priv->ae_handle) return -ESRCH; @@ -138,38 +139,43 @@ static int hns_nic_get_settings(struct net_device *net_dev, return -EINVAL; } + ethtool_convert_link_mode_to_legacy_u32(&supported, + cmd->link_modes.supported); + ethtool_convert_link_mode_to_legacy_u32(&advertising, + cmd->link_modes.advertising); + /* When there is no phy, autoneg is off. */ - cmd->autoneg = false; - ethtool_cmd_speed_set(cmd, speed); - cmd->duplex = duplex; + cmd->base.autoneg = false; + cmd->base.cmd = speed; + cmd->base.duplex = duplex; if (net_dev->phydev) - (void)phy_ethtool_gset(net_dev->phydev, cmd); + (void)phy_ethtool_ksettings_get(net_dev->phydev, cmd); link_stat = hns_nic_get_link(net_dev); if (!link_stat) { - ethtool_cmd_speed_set(cmd, (u32)SPEED_UNKNOWN); - cmd->duplex = DUPLEX_UNKNOWN; + cmd->base.speed = (u32)SPEED_UNKNOWN; + cmd->base.duplex = DUPLEX_UNKNOWN; } - if (cmd->autoneg) - cmd->advertising |= ADVERTISED_Autoneg; + if (cmd->base.autoneg) + advertising |= ADVERTISED_Autoneg; - cmd->supported |= h->if_support; + supported |= h->if_support; if (h->phy_if == PHY_INTERFACE_MODE_SGMII) { - cmd->supported |= SUPPORTED_TP; - cmd->advertising |= ADVERTISED_1000baseT_Full; + supported |= SUPPORTED_TP; + advertising |= ADVERTISED_1000baseT_Full; } else if (h->phy_if == PHY_INTERFACE_MODE_XGMII) { - cmd->supported |= SUPPORTED_FIBRE; - cmd->advertising |= ADVERTISED_10000baseKR_Full; + supported |= SUPPORTED_FIBRE; + advertising |= ADVERTISED_10000baseKR_Full; } switch (h->media_type) { case HNAE_MEDIA_TYPE_FIBER: - cmd->port = PORT_FIBRE; + cmd->base.port = PORT_FIBRE; break; case HNAE_MEDIA_TYPE_COPPER: - cmd->port = PORT_TP; + cmd->base.port = PORT_TP; break; case HNAE_MEDIA_TYPE_UNKNOWN: default: @@ -177,23 +183,27 @@ static int hns_nic_get_settings(struct net_device *net_dev, } if (!(AE_IS_VER1(priv->enet_ver) && h->port_type == HNAE_PORT_DEBUG)) - cmd->supported |= SUPPORTED_Pause; + supported |= SUPPORTED_Pause; - cmd->transceiver = XCVR_EXTERNAL; - cmd->mdio_support = (ETH_MDIO_SUPPORTS_C45 | ETH_MDIO_SUPPORTS_C22); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + supported); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, + advertising); + + cmd->base.mdio_support = ETH_MDIO_SUPPORTS_C45 | ETH_MDIO_SUPPORTS_C22; hns_get_mdix_mode(net_dev, cmd); return 0; } /** - *hns_nic_set_settings - implement ethtool set settings + *hns_nic_set_link_settings - implement ethtool set link ksettings *@net_dev: net_device - *@cmd: ethtool_cmd + *@cmd: ethtool_link_ksettings *retuen 0 - success , negative --fail */ -static int hns_nic_set_settings(struct net_device *net_dev, - struct ethtool_cmd *cmd) +static int hns_nic_set_link_ksettings(struct net_device *net_dev, + const struct ethtool_link_ksettings *cmd) { struct hns_nic_priv *priv = netdev_priv(net_dev); struct hnae_handle *h; @@ -207,24 +217,25 @@ static int hns_nic_set_settings(struct net_device *net_dev, return -ENODEV; h = priv->ae_handle; - speed = ethtool_cmd_speed(cmd); + speed = cmd->base.speed; if (h->phy_if == PHY_INTERFACE_MODE_XGMII) { - if (cmd->autoneg == AUTONEG_ENABLE || speed != SPEED_10000 || - cmd->duplex != DUPLEX_FULL) + if (cmd->base.autoneg == AUTONEG_ENABLE || + speed != SPEED_10000 || + cmd->base.duplex != DUPLEX_FULL) return -EINVAL; } else if (h->phy_if == PHY_INTERFACE_MODE_SGMII) { - if (!net_dev->phydev && cmd->autoneg == AUTONEG_ENABLE) + if (!net_dev->phydev && cmd->base.autoneg == AUTONEG_ENABLE) return -EINVAL; - if (speed == SPEED_1000 && cmd->duplex == DUPLEX_HALF) + if (speed == SPEED_1000 && cmd->base.duplex == DUPLEX_HALF) return -EINVAL; if (net_dev->phydev) - return phy_ethtool_sset(net_dev->phydev, cmd); + return phy_ethtool_ksettings_set(net_dev->phydev, cmd); if ((speed != SPEED_10 && speed != SPEED_100 && - speed != SPEED_1000) || (cmd->duplex != DUPLEX_HALF && - cmd->duplex != DUPLEX_FULL)) + speed != SPEED_1000) || (cmd->base.duplex != DUPLEX_HALF && + cmd->base.duplex != DUPLEX_FULL)) return -EINVAL; } else { netdev_err(net_dev, "Not supported!"); @@ -232,7 +243,7 @@ static int hns_nic_set_settings(struct net_device *net_dev, } if (h->dev->ops->adjust_link) { - h->dev->ops->adjust_link(h, (int)speed, cmd->duplex); + h->dev->ops->adjust_link(h, (int)speed, cmd->base.duplex); return 0; } @@ -1264,8 +1275,6 @@ static int hns_get_rxnfc(struct net_device *netdev, static const struct ethtool_ops hns_ethtool_ops = { .get_drvinfo = hns_nic_get_drvinfo, .get_link = hns_nic_get_link, - .get_settings = hns_nic_get_settings, - .set_settings = hns_nic_set_settings, .get_ringparam = hns_get_ringparam, .get_pauseparam = hns_get_pauseparam, .set_pauseparam = hns_set_pauseparam, @@ -1285,6 +1294,8 @@ static const struct ethtool_ops hns_ethtool_ops = { .get_rxfh = hns_get_rss, .set_rxfh = hns_set_rss, .get_rxnfc = hns_get_rxnfc, + .get_link_ksettings = hns_nic_get_link_ksettings, + .set_link_ksettings = hns_nic_set_link_ksettings, }; void hns_ethtool_set_ops(struct net_device *ndev) From efee95f42b5dddedcaff0a0eaa44e170fc7522e8 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Tue, 20 Sep 2016 19:25:58 -0400 Subject: [PATCH 1349/1715] ptp_clock: future-proofing drivers against PTP subsystem becoming optional Drivers must be ready to accept NULL from ptp_clock_register() if the PTP clock subsystem is configured out. This patch documents that and ensures that all drivers cope well with a NULL return. Signed-off-by: Nicolas Pitre Reviewed-by: Eugenia Emantayev Acked-by: Richard Cochran Acked-by: Edward Cree Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/e1000e/ptp.c | 2 +- drivers/net/ethernet/intel/i40e/i40e_ptp.c | 2 +- drivers/net/ethernet/intel/igb/igb_ptp.c | 2 +- drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c | 2 +- drivers/net/ethernet/mellanox/mlx4/en_clock.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_clock.c | 2 +- drivers/net/ethernet/sfc/ptp.c | 14 +++++++------- drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c | 2 +- include/linux/ptp_clock_kernel.h | 5 +++++ 9 files changed, 19 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/ptp.c b/drivers/net/ethernet/intel/e1000e/ptp.c index 2e1b17ad52a3..ad03763e009a 100644 --- a/drivers/net/ethernet/intel/e1000e/ptp.c +++ b/drivers/net/ethernet/intel/e1000e/ptp.c @@ -334,7 +334,7 @@ void e1000e_ptp_init(struct e1000_adapter *adapter) if (IS_ERR(adapter->ptp_clock)) { adapter->ptp_clock = NULL; e_err("ptp_clock_register failed\n"); - } else { + } else if (adapter->ptp_clock) { e_info("registered PHC clock\n"); } } diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c index ed39cbad24bd..f1feceab758a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c @@ -669,7 +669,7 @@ void i40e_ptp_init(struct i40e_pf *pf) pf->ptp_clock = NULL; dev_err(&pf->pdev->dev, "%s: ptp_clock_register failed\n", __func__); - } else { + } else if (pf->ptp_clock) { struct timespec64 ts; u32 regval; diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index 66dfa2085cc7..1dd14e166dc8 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -1159,7 +1159,7 @@ void igb_ptp_init(struct igb_adapter *adapter) if (IS_ERR(adapter->ptp_clock)) { adapter->ptp_clock = NULL; dev_err(&adapter->pdev->dev, "ptp_clock_register failed\n"); - } else { + } else if (adapter->ptp_clock) { dev_info(&adapter->pdev->dev, "added PHC on %s\n", adapter->netdev->name); adapter->ptp_flags |= IGB_PTP_ENABLED; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c index e5431bfe3339..a92277683a64 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c @@ -1254,7 +1254,7 @@ static long ixgbe_ptp_create_clock(struct ixgbe_adapter *adapter) adapter->ptp_clock = NULL; e_dev_err("ptp_clock_register failed\n"); return err; - } else + } else if (adapter->ptp_clock) e_dev_info("registered PHC device on %s\n", netdev->name); /* set default timestamp mode to disabled here. We do this in diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c index 1494997c4f7e..08fc5fc56d43 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c @@ -298,7 +298,7 @@ void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev) if (IS_ERR(mdev->ptp_clock)) { mdev->ptp_clock = NULL; mlx4_err(mdev, "ptp_clock_register failed\n"); - } else { + } else if (mdev->ptp_clock) { mlx4_info(mdev, "registered PHC clock\n"); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c index 847a8f3ac2b2..13dc388667b6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c @@ -273,7 +273,7 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv) tstamp->ptp = ptp_clock_register(&tstamp->ptp_info, &priv->mdev->pdev->dev); - if (IS_ERR_OR_NULL(tstamp->ptp)) { + if (IS_ERR(tstamp->ptp)) { mlx5_core_warn(priv->mdev, "ptp_clock_register failed %ld\n", PTR_ERR(tstamp->ptp)); tstamp->ptp = NULL; diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c index dd204d9704c6..77a5364f7a10 100644 --- a/drivers/net/ethernet/sfc/ptp.c +++ b/drivers/net/ethernet/sfc/ptp.c @@ -1269,13 +1269,13 @@ int efx_ptp_probe(struct efx_nic *efx, struct efx_channel *channel) if (IS_ERR(ptp->phc_clock)) { rc = PTR_ERR(ptp->phc_clock); goto fail3; - } - - INIT_WORK(&ptp->pps_work, efx_ptp_pps_worker); - ptp->pps_workwq = create_singlethread_workqueue("sfc_pps"); - if (!ptp->pps_workwq) { - rc = -ENOMEM; - goto fail4; + } else if (ptp->phc_clock) { + INIT_WORK(&ptp->pps_work, efx_ptp_pps_worker); + ptp->pps_workwq = create_singlethread_workqueue("sfc_pps"); + if (!ptp->pps_workwq) { + rc = -ENOMEM; + goto fail4; + } } } ptp->nic_ts_enabled = false; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index 170a18b61281..6e3b82972ce8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -187,7 +187,7 @@ int stmmac_ptp_register(struct stmmac_priv *priv) if (IS_ERR(priv->ptp_clock)) { priv->ptp_clock = NULL; pr_err("ptp_clock_register() failed on %s\n", priv->dev->name); - } else + } else if (priv->ptp_clock) pr_debug("Added PTP HW clock successfully on %s\n", priv->dev->name); diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index 6b15e168148a..5ad54fc66cf0 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -127,6 +127,11 @@ struct ptp_clock; * * @info: Structure describing the new clock. * @parent: Pointer to the parent device of the new clock. + * + * Returns a valid pointer on success or PTR_ERR on failure. If PHC + * support is missing at the configuration level, this function + * returns NULL, and drivers are expected to gracefully handle that + * case separately. */ extern struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, From 0e7b99257be4b596d9fdd435698c0bfdb0b38d91 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 21 Sep 2016 01:40:31 +0200 Subject: [PATCH 1350/1715] net: dsa: mv88e6xxx: Add helper for accessing port registers There is a device coming soon which places its port registers somewhere different to all other Marvell switches supported so far. Add helper functions for reading/writing port registers, making it easier to handle this new device. Signed-off-by: Andrew Lunn Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 370 +++++++++++++------------- drivers/net/dsa/mv88e6xxx/mv88e6xxx.h | 1 - 2 files changed, 182 insertions(+), 189 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 1d71802396fb..25bd3fa744d9 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -216,6 +216,22 @@ int mv88e6xxx_write(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val) return 0; } +int mv88e6xxx_port_read(struct mv88e6xxx_chip *chip, int port, int reg, + u16 *val) +{ + int addr = chip->info->port_base_addr + port; + + return mv88e6xxx_read(chip, addr, reg, val); +} + +int mv88e6xxx_port_write(struct mv88e6xxx_chip *chip, int port, int reg, + u16 val) +{ + int addr = chip->info->port_base_addr + port; + + return mv88e6xxx_write(chip, addr, reg, val); +} + static int mv88e6xxx_phy_read(struct mv88e6xxx_chip *chip, int phy, int reg, u16 *val) { @@ -585,23 +601,23 @@ static void mv88e6xxx_adjust_link(struct dsa_switch *ds, int port, struct phy_device *phydev) { struct mv88e6xxx_chip *chip = ds->priv; - u32 reg; - int ret; + u16 reg; + int err; if (!phy_is_pseudo_fixed_link(phydev)) return; mutex_lock(&chip->reg_lock); - ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_PCS_CTRL); - if (ret < 0) + err = mv88e6xxx_port_read(chip, port, PORT_PCS_CTRL, ®); + if (err) goto out; - reg = ret & ~(PORT_PCS_CTRL_LINK_UP | - PORT_PCS_CTRL_FORCE_LINK | - PORT_PCS_CTRL_DUPLEX_FULL | - PORT_PCS_CTRL_FORCE_DUPLEX | - PORT_PCS_CTRL_UNFORCED); + reg &= ~(PORT_PCS_CTRL_LINK_UP | + PORT_PCS_CTRL_FORCE_LINK | + PORT_PCS_CTRL_DUPLEX_FULL | + PORT_PCS_CTRL_FORCE_DUPLEX | + PORT_PCS_CTRL_UNFORCED); reg |= PORT_PCS_CTRL_FORCE_LINK; if (phydev->link) @@ -639,7 +655,7 @@ static void mv88e6xxx_adjust_link(struct dsa_switch *ds, int port, reg |= (PORT_PCS_CTRL_RGMII_DELAY_RXCLK | PORT_PCS_CTRL_RGMII_DELAY_TXCLK); } - _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_PCS_CTRL, reg); + mv88e6xxx_port_write(chip, port, PORT_PCS_CTRL, reg); out: mutex_unlock(&chip->reg_lock); @@ -799,22 +815,22 @@ static uint64_t _mv88e6xxx_get_ethtool_stat(struct mv88e6xxx_chip *chip, { u32 low; u32 high = 0; - int ret; + int err; + u16 reg; u64 value; switch (s->type) { case PORT: - ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), s->reg); - if (ret < 0) + err = mv88e6xxx_port_read(chip, port, s->reg, ®); + if (err) return UINT64_MAX; - low = ret; + low = reg; if (s->sizeof_stat == 4) { - ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), - s->reg + 1); - if (ret < 0) + err = mv88e6xxx_port_read(chip, port, s->reg + 1, ®); + if (err) return UINT64_MAX; - high = ret; + high = reg; } break; case BANK0: @@ -893,6 +909,8 @@ static void mv88e6xxx_get_regs(struct dsa_switch *ds, int port, struct ethtool_regs *regs, void *_p) { struct mv88e6xxx_chip *chip = ds->priv; + int err; + u16 reg; u16 *p = _p; int i; @@ -903,11 +921,10 @@ static void mv88e6xxx_get_regs(struct dsa_switch *ds, int port, mutex_lock(&chip->reg_lock); for (i = 0; i < 32; i++) { - int ret; - ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), i); - if (ret >= 0) - p[i] = ret; + err = mv88e6xxx_port_read(chip, port, i, ®); + if (!err) + p[i] = reg; } mutex_unlock(&chip->reg_lock); @@ -938,7 +955,7 @@ static int mv88e6xxx_get_eee(struct dsa_switch *ds, int port, e->eee_enabled = !!(reg & 0x0200); e->tx_lpi_enabled = !!(reg & 0x0100); - err = mv88e6xxx_read(chip, REG_PORT(port), PORT_STATUS, ®); + err = mv88e6xxx_port_read(chip, port, PORT_STATUS, ®); if (err) goto out; @@ -1106,12 +1123,13 @@ static int _mv88e6xxx_port_state(struct mv88e6xxx_chip *chip, int port, u8 state) { struct dsa_switch *ds = chip->ds; - int reg, ret = 0; + u16 reg; + int err; u8 oldstate; - reg = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_CONTROL); - if (reg < 0) - return reg; + err = mv88e6xxx_port_read(chip, port, PORT_CONTROL, ®); + if (err) + return err; oldstate = reg & PORT_CONTROL_STATE_MASK; @@ -1124,23 +1142,22 @@ static int _mv88e6xxx_port_state(struct mv88e6xxx_chip *chip, int port, oldstate == PORT_CONTROL_STATE_FORWARDING) && (state == PORT_CONTROL_STATE_DISABLED || state == PORT_CONTROL_STATE_BLOCKING)) { - ret = _mv88e6xxx_atu_remove(chip, 0, port, false); - if (ret) - return ret; + err = _mv88e6xxx_atu_remove(chip, 0, port, false); + if (err) + return err; } reg = (reg & ~PORT_CONTROL_STATE_MASK) | state; - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_CONTROL, - reg); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_CONTROL, reg); + if (err) + return err; netdev_dbg(ds->ports[port].netdev, "PortState %s (was %s)\n", mv88e6xxx_port_state_names[state], mv88e6xxx_port_state_names[oldstate]); } - return ret; + return err; } static int _mv88e6xxx_port_based_vlan_map(struct mv88e6xxx_chip *chip, int port) @@ -1149,7 +1166,8 @@ static int _mv88e6xxx_port_based_vlan_map(struct mv88e6xxx_chip *chip, int port) const u16 mask = (1 << chip->info->num_ports) - 1; struct dsa_switch *ds = chip->ds; u16 output_ports = 0; - int reg; + u16 reg; + int err; int i; /* allow CPU port or DSA link(s) to send frames to every port */ @@ -1170,14 +1188,14 @@ static int _mv88e6xxx_port_based_vlan_map(struct mv88e6xxx_chip *chip, int port) /* prevent frames from going back out of the port they came in on */ output_ports &= ~BIT(port); - reg = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_BASE_VLAN); - if (reg < 0) - return reg; + err = mv88e6xxx_port_read(chip, port, PORT_BASE_VLAN, ®); + if (err) + return err; reg &= ~mask; reg |= output_ports & mask; - return _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_BASE_VLAN, reg); + return mv88e6xxx_port_write(chip, port, PORT_BASE_VLAN, reg); } static void mv88e6xxx_port_stp_state_set(struct dsa_switch *ds, int port, @@ -1218,23 +1236,22 @@ static int _mv88e6xxx_port_pvid(struct mv88e6xxx_chip *chip, int port, u16 *new, u16 *old) { struct dsa_switch *ds = chip->ds; - u16 pvid; - int ret; + u16 pvid, reg; + int err; - ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_DEFAULT_VLAN); - if (ret < 0) - return ret; + err = mv88e6xxx_port_read(chip, port, PORT_DEFAULT_VLAN, ®); + if (err) + return err; - pvid = ret & PORT_DEFAULT_VLAN_MASK; + pvid = reg & PORT_DEFAULT_VLAN_MASK; if (new) { - ret &= ~PORT_DEFAULT_VLAN_MASK; - ret |= *new & PORT_DEFAULT_VLAN_MASK; + reg &= ~PORT_DEFAULT_VLAN_MASK; + reg |= *new & PORT_DEFAULT_VLAN_MASK; - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_DEFAULT_VLAN, ret); - if (ret < 0) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_DEFAULT_VLAN, reg); + if (err) + return err; netdev_dbg(ds->ports[port].netdev, "DefaultVID %d (was %d)\n", *new, pvid); @@ -1613,7 +1630,8 @@ static int _mv88e6xxx_port_fid(struct mv88e6xxx_chip *chip, int port, struct dsa_switch *ds = chip->ds; u16 upper_mask; u16 fid; - int ret; + u16 reg; + int err; if (mv88e6xxx_num_databases(chip) == 4096) upper_mask = 0xff; @@ -1623,37 +1641,35 @@ static int _mv88e6xxx_port_fid(struct mv88e6xxx_chip *chip, int port, return -EOPNOTSUPP; /* Port's default FID bits 3:0 are located in reg 0x06, offset 12 */ - ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_BASE_VLAN); - if (ret < 0) - return ret; + err = mv88e6xxx_port_read(chip, port, PORT_BASE_VLAN, ®); + if (err) + return err; - fid = (ret & PORT_BASE_VLAN_FID_3_0_MASK) >> 12; + fid = (reg & PORT_BASE_VLAN_FID_3_0_MASK) >> 12; if (new) { - ret &= ~PORT_BASE_VLAN_FID_3_0_MASK; - ret |= (*new << 12) & PORT_BASE_VLAN_FID_3_0_MASK; + reg &= ~PORT_BASE_VLAN_FID_3_0_MASK; + reg |= (*new << 12) & PORT_BASE_VLAN_FID_3_0_MASK; - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_BASE_VLAN, - ret); - if (ret < 0) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_BASE_VLAN, reg); + if (err) + return err; } /* Port's default FID bits 11:4 are located in reg 0x05, offset 0 */ - ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_CONTROL_1); - if (ret < 0) - return ret; + err = mv88e6xxx_port_read(chip, port, PORT_CONTROL_1, ®); + if (err) + return err; - fid |= (ret & upper_mask) << 4; + fid |= (reg & upper_mask) << 4; if (new) { - ret &= ~upper_mask; - ret |= (*new >> 4) & upper_mask; + reg &= ~upper_mask; + reg |= (*new >> 4) & upper_mask; - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_CONTROL_1, - ret); - if (ret < 0) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_CONTROL_1, reg); + if (err) + return err; netdev_dbg(ds->ports[port].netdev, "FID %d (was %d)\n", *new, fid); @@ -1865,26 +1881,26 @@ static int mv88e6xxx_port_vlan_filtering(struct dsa_switch *ds, int port, struct mv88e6xxx_chip *chip = ds->priv; u16 old, new = vlan_filtering ? PORT_CONTROL_2_8021Q_SECURE : PORT_CONTROL_2_8021Q_DISABLED; - int ret; + u16 reg; + int err; if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_VTU)) return -EOPNOTSUPP; mutex_lock(&chip->reg_lock); - ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_CONTROL_2); - if (ret < 0) + err = mv88e6xxx_port_read(chip, port, PORT_CONTROL_2, ®); + if (err) goto unlock; - old = ret & PORT_CONTROL_2_8021Q_MASK; + old = reg & PORT_CONTROL_2_8021Q_MASK; if (new != old) { - ret &= ~PORT_CONTROL_2_8021Q_MASK; - ret |= new & PORT_CONTROL_2_8021Q_MASK; + reg &= ~PORT_CONTROL_2_8021Q_MASK; + reg |= new & PORT_CONTROL_2_8021Q_MASK; - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_CONTROL_2, - ret); - if (ret < 0) + err = mv88e6xxx_port_write(chip, port, PORT_CONTROL_2, reg); + if (err) goto unlock; netdev_dbg(ds->ports[port].netdev, "802.1Q Mode %s (was %s)\n", @@ -1892,11 +1908,11 @@ static int mv88e6xxx_port_vlan_filtering(struct dsa_switch *ds, int port, mv88e6xxx_port_8021q_mode_names[old]); } - ret = 0; + err = 0; unlock: mutex_unlock(&chip->reg_lock); - return ret; + return err; } static int @@ -2406,19 +2422,20 @@ static int mv88e6xxx_switch_reset(struct mv88e6xxx_chip *chip) u16 is_reset = (ppu_active ? 0x8800 : 0xc800); struct gpio_desc *gpiod = chip->reset; unsigned long timeout; - int ret; + int err, ret; + u16 reg; int i; /* Set all ports to the disabled state. */ for (i = 0; i < chip->info->num_ports; i++) { - ret = _mv88e6xxx_reg_read(chip, REG_PORT(i), PORT_CONTROL); - if (ret < 0) - return ret; + err = mv88e6xxx_port_read(chip, i, PORT_CONTROL, ®); + if (err) + return err; - ret = _mv88e6xxx_reg_write(chip, REG_PORT(i), PORT_CONTROL, - ret & 0xfffc); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, i, PORT_CONTROL, + reg & 0xfffc); + if (err) + return err; } /* Wait for transmit queues to drain. */ @@ -2437,11 +2454,11 @@ static int mv88e6xxx_switch_reset(struct mv88e6xxx_chip *chip) * through global registers 0x18 and 0x19. */ if (ppu_active) - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, 0x04, 0xc000); + err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, 0x04, 0xc000); else - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, 0x04, 0xc400); - if (ret) - return ret; + err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, 0x04, 0xc400); + if (err) + return err; /* Wait up to one second for reset to complete. */ timeout = jiffies + 1 * HZ; @@ -2455,11 +2472,11 @@ static int mv88e6xxx_switch_reset(struct mv88e6xxx_chip *chip) usleep_range(1000, 2000); } if (time_after(jiffies, timeout)) - ret = -ETIMEDOUT; + err = -ETIMEDOUT; else - ret = 0; + err = 0; - return ret; + return err; } static int mv88e6xxx_serdes_power_on(struct mv88e6xxx_chip *chip) @@ -2480,21 +2497,10 @@ static int mv88e6xxx_serdes_power_on(struct mv88e6xxx_chip *chip) return err; } -static int mv88e6xxx_port_read(struct mv88e6xxx_chip *chip, int port, - int reg, u16 *val) -{ - int addr = chip->info->port_base_addr + port; - - if (port >= chip->info->num_ports) - return -EINVAL; - - return mv88e6xxx_read(chip, addr, reg, val); -} - static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) { struct dsa_switch *ds = chip->ds; - int ret; + int err; u16 reg; if (mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip) || @@ -2507,7 +2513,7 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) * and all DSA ports to their maximum bandwidth and * full duplex. */ - reg = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_PCS_CTRL); + err = mv88e6xxx_port_read(chip, port, PORT_PCS_CTRL, ®); if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) { reg &= ~PORT_PCS_CTRL_UNFORCED; reg |= PORT_PCS_CTRL_FORCE_LINK | @@ -2522,10 +2528,9 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) reg |= PORT_PCS_CTRL_UNFORCED; } - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_PCS_CTRL, reg); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_PCS_CTRL, reg); + if (err) + return err; } /* Port Control: disable Drop-on-Unlock, disable Drop-on-Lock, @@ -2576,26 +2581,25 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) PORT_CONTROL_FORWARD_UNKNOWN_MC; } if (reg) { - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_CONTROL, reg); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_CONTROL, reg); + if (err) + return err; } /* If this port is connected to a SerDes, make sure the SerDes is not * powered down. */ if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_SERDES)) { - ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_STATUS); - if (ret < 0) - return ret; - ret &= PORT_STATUS_CMODE_MASK; - if ((ret == PORT_STATUS_CMODE_100BASE_X) || - (ret == PORT_STATUS_CMODE_1000BASE_X) || - (ret == PORT_STATUS_CMODE_SGMII)) { - ret = mv88e6xxx_serdes_power_on(chip); - if (ret < 0) - return ret; + err = mv88e6xxx_port_read(chip, port, PORT_STATUS, ®); + if (err) + return err; + reg &= PORT_STATUS_CMODE_MASK; + if ((reg == PORT_STATUS_CMODE_100BASE_X) || + (reg == PORT_STATUS_CMODE_1000BASE_X) || + (reg == PORT_STATUS_CMODE_SGMII)) { + err = mv88e6xxx_serdes_power_on(chip); + if (err < 0) + return err; } } @@ -2629,10 +2633,9 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) reg |= PORT_CONTROL_2_8021Q_DISABLED; if (reg) { - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_CONTROL_2, reg); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_CONTROL_2, reg); + if (err) + return err; } /* Port Association Vector: when learning source addresses @@ -2645,16 +2648,14 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) if (dsa_is_cpu_port(ds, port)) reg = 0; - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_ASSOC_VECTOR, - reg); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_ASSOC_VECTOR, reg); + if (err) + return err; /* Egress rate control 2: disable egress rate control. */ - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_RATE_CONTROL_2, - 0x0000); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_RATE_CONTROL_2, 0x0000); + if (err) + return err; if (mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip) || mv88e6xxx_6165_family(chip) || mv88e6xxx_6097_family(chip) || @@ -2663,96 +2664,89 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) * be paused for by the remote end or the period of * time that this port can pause the remote end. */ - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_PAUSE_CTRL, 0x0000); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_PAUSE_CTRL, 0x0000); + if (err) + return err; /* Port ATU control: disable limiting the number of * address database entries that this port is allowed * to use. */ - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_ATU_CONTROL, 0x0000); + err = mv88e6xxx_port_write(chip, port, PORT_ATU_CONTROL, + 0x0000); /* Priority Override: disable DA, SA and VTU priority * override. */ - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_PRI_OVERRIDE, 0x0000); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_PRI_OVERRIDE, + 0x0000); + if (err) + return err; /* Port Ethertype: use the Ethertype DSA Ethertype * value. */ if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_EDSA)) { - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_ETH_TYPE, ETH_P_EDSA); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_ETH_TYPE, + ETH_P_EDSA); + if (err) + return err; } /* Tag Remap: use an identity 802.1p prio -> switch * prio mapping. */ - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_TAG_REGMAP_0123, 0x3210); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_TAG_REGMAP_0123, + 0x3210); + if (err) + return err; /* Tag Remap 2: use an identity 802.1p prio -> switch * prio mapping. */ - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_TAG_REGMAP_4567, 0x7654); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_TAG_REGMAP_4567, + 0x7654); + if (err) + return err; } /* Rate Control: disable ingress rate limiting. */ if (mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip) || mv88e6xxx_6165_family(chip) || mv88e6xxx_6097_family(chip) || mv88e6xxx_6320_family(chip)) { - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_RATE_CONTROL, 0x0001); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_RATE_CONTROL, + 0x0001); + if (err) + return err; } else if (mv88e6xxx_6185_family(chip) || mv88e6xxx_6095_family(chip)) { - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_RATE_CONTROL, 0x0000); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_RATE_CONTROL, + 0x0000); + if (err) + return err; } /* Port Control 1: disable trunking, disable sending * learning messages to this port. */ - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_CONTROL_1, - 0x0000); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_CONTROL_1, 0x0000); + if (err) + return err; /* Port based VLAN map: give each port the same default address * database, and allow bidirectional communication between the * CPU and DSA port(s), and the other ports. */ - ret = _mv88e6xxx_port_fid_set(chip, port, 0); - if (ret) - return ret; + err = _mv88e6xxx_port_fid_set(chip, port, 0); + if (err) + return err; - ret = _mv88e6xxx_port_based_vlan_map(chip, port); - if (ret) - return ret; + err = _mv88e6xxx_port_based_vlan_map(chip, port); + if (err) + return err; /* Default VLAN ID and priority: don't set a default VLAN * ID, and set the default packet priority to zero. */ - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_DEFAULT_VLAN, - 0x0000); - if (ret) - return ret; - - return 0; + return mv88e6xxx_port_write(chip, port, PORT_DEFAULT_VLAN, 0x0000); } static int mv88e6xxx_g1_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr) diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h index 52f3f5231f51..e349d0d64645 100644 --- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h @@ -37,7 +37,6 @@ #define ADDR_SERDES 0x0f #define SERDES_PAGE_FIBER 0x01 -#define REG_PORT(p) (0x10 + (p)) #define PORT_STATUS 0x00 #define PORT_STATUS_PAUSE_EN BIT(15) #define PORT_STATUS_MY_PAUSE BIT(14) From d6b1023a83e85943d1f2fa3baafbb4d5cfee7179 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 21 Sep 2016 01:40:32 +0200 Subject: [PATCH 1351/1715] net: dsa: mv88e6xxx: Convert flag bits to unsigned long long We are soon going to run out of flag bits on 32bit systems. Convert to unsigned long long. Signed-off-by: Andrew Lunn Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/mv88e6xxx.h | 52 +++++++++++++-------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h index e349d0d64645..827988397fd8 100644 --- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h @@ -452,36 +452,36 @@ enum mv88e6xxx_cap { }; /* Bitmask of capabilities */ -#define MV88E6XXX_FLAG_EDSA BIT(MV88E6XXX_CAP_EDSA) -#define MV88E6XXX_FLAG_EEE BIT(MV88E6XXX_CAP_EEE) +#define MV88E6XXX_FLAG_EDSA BIT_ULL(MV88E6XXX_CAP_EDSA) +#define MV88E6XXX_FLAG_EEE BIT_ULL(MV88E6XXX_CAP_EEE) -#define MV88E6XXX_FLAG_SMI_CMD BIT(MV88E6XXX_CAP_SMI_CMD) -#define MV88E6XXX_FLAG_SMI_DATA BIT(MV88E6XXX_CAP_SMI_DATA) +#define MV88E6XXX_FLAG_SMI_CMD BIT_ULL(MV88E6XXX_CAP_SMI_CMD) +#define MV88E6XXX_FLAG_SMI_DATA BIT_ULL(MV88E6XXX_CAP_SMI_DATA) -#define MV88E6XXX_FLAG_PHY_PAGE BIT(MV88E6XXX_CAP_PHY_PAGE) +#define MV88E6XXX_FLAG_PHY_PAGE BIT_ULL(MV88E6XXX_CAP_PHY_PAGE) -#define MV88E6XXX_FLAG_SERDES BIT(MV88E6XXX_CAP_SERDES) +#define MV88E6XXX_FLAG_SERDES BIT_ULL(MV88E6XXX_CAP_SERDES) -#define MV88E6XXX_FLAG_GLOBAL2 BIT(MV88E6XXX_CAP_GLOBAL2) -#define MV88E6XXX_FLAG_G2_MGMT_EN_2X BIT(MV88E6XXX_CAP_G2_MGMT_EN_2X) -#define MV88E6XXX_FLAG_G2_MGMT_EN_0X BIT(MV88E6XXX_CAP_G2_MGMT_EN_0X) -#define MV88E6XXX_FLAG_G2_IRL_CMD BIT(MV88E6XXX_CAP_G2_IRL_CMD) -#define MV88E6XXX_FLAG_G2_IRL_DATA BIT(MV88E6XXX_CAP_G2_IRL_DATA) -#define MV88E6XXX_FLAG_G2_PVT_ADDR BIT(MV88E6XXX_CAP_G2_PVT_ADDR) -#define MV88E6XXX_FLAG_G2_PVT_DATA BIT(MV88E6XXX_CAP_G2_PVT_DATA) -#define MV88E6XXX_FLAG_G2_SWITCH_MAC BIT(MV88E6XXX_CAP_G2_SWITCH_MAC) -#define MV88E6XXX_FLAG_G2_POT BIT(MV88E6XXX_CAP_G2_POT) -#define MV88E6XXX_FLAG_G2_EEPROM_CMD BIT(MV88E6XXX_CAP_G2_EEPROM_CMD) -#define MV88E6XXX_FLAG_G2_EEPROM_DATA BIT(MV88E6XXX_CAP_G2_EEPROM_DATA) -#define MV88E6XXX_FLAG_G2_SMI_PHY_CMD BIT(MV88E6XXX_CAP_G2_SMI_PHY_CMD) -#define MV88E6XXX_FLAG_G2_SMI_PHY_DATA BIT(MV88E6XXX_CAP_G2_SMI_PHY_DATA) +#define MV88E6XXX_FLAG_GLOBAL2 BIT_ULL(MV88E6XXX_CAP_GLOBAL2) +#define MV88E6XXX_FLAG_G2_MGMT_EN_2X BIT_ULL(MV88E6XXX_CAP_G2_MGMT_EN_2X) +#define MV88E6XXX_FLAG_G2_MGMT_EN_0X BIT_ULL(MV88E6XXX_CAP_G2_MGMT_EN_0X) +#define MV88E6XXX_FLAG_G2_IRL_CMD BIT_ULL(MV88E6XXX_CAP_G2_IRL_CMD) +#define MV88E6XXX_FLAG_G2_IRL_DATA BIT_ULL(MV88E6XXX_CAP_G2_IRL_DATA) +#define MV88E6XXX_FLAG_G2_PVT_ADDR BIT_ULL(MV88E6XXX_CAP_G2_PVT_ADDR) +#define MV88E6XXX_FLAG_G2_PVT_DATA BIT_ULL(MV88E6XXX_CAP_G2_PVT_DATA) +#define MV88E6XXX_FLAG_G2_SWITCH_MAC BIT_ULL(MV88E6XXX_CAP_G2_SWITCH_MAC) +#define MV88E6XXX_FLAG_G2_POT BIT_ULL(MV88E6XXX_CAP_G2_POT) +#define MV88E6XXX_FLAG_G2_EEPROM_CMD BIT_ULL(MV88E6XXX_CAP_G2_EEPROM_CMD) +#define MV88E6XXX_FLAG_G2_EEPROM_DATA BIT_ULL(MV88E6XXX_CAP_G2_EEPROM_DATA) +#define MV88E6XXX_FLAG_G2_SMI_PHY_CMD BIT_ULL(MV88E6XXX_CAP_G2_SMI_PHY_CMD) +#define MV88E6XXX_FLAG_G2_SMI_PHY_DATA BIT_ULL(MV88E6XXX_CAP_G2_SMI_PHY_DATA) -#define MV88E6XXX_FLAG_PPU BIT(MV88E6XXX_CAP_PPU) -#define MV88E6XXX_FLAG_PPU_ACTIVE BIT(MV88E6XXX_CAP_PPU_ACTIVE) -#define MV88E6XXX_FLAG_STU BIT(MV88E6XXX_CAP_STU) -#define MV88E6XXX_FLAG_TEMP BIT(MV88E6XXX_CAP_TEMP) -#define MV88E6XXX_FLAG_TEMP_LIMIT BIT(MV88E6XXX_CAP_TEMP_LIMIT) -#define MV88E6XXX_FLAG_VTU BIT(MV88E6XXX_CAP_VTU) +#define MV88E6XXX_FLAG_PPU BIT_ULL(MV88E6XXX_CAP_PPU) +#define MV88E6XXX_FLAG_PPU_ACTIVE BIT_ULL(MV88E6XXX_CAP_PPU_ACTIVE) +#define MV88E6XXX_FLAG_STU BIT_ULL(MV88E6XXX_CAP_STU) +#define MV88E6XXX_FLAG_TEMP BIT_ULL(MV88E6XXX_CAP_TEMP) +#define MV88E6XXX_FLAG_TEMP_LIMIT BIT_ULL(MV88E6XXX_CAP_TEMP_LIMIT) +#define MV88E6XXX_FLAG_VTU BIT_ULL(MV88E6XXX_CAP_VTU) /* EEPROM Programming via Global2 with 16-bit data */ #define MV88E6XXX_FLAGS_EEPROM16 \ @@ -614,7 +614,7 @@ struct mv88e6xxx_info { unsigned int num_ports; unsigned int port_base_addr; unsigned int age_time_coeff; - unsigned long flags; + unsigned long long flags; }; struct mv88e6xxx_atu_entry { From f9616c35a0d786bc64fff4bf819d1e4984873367 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 20 Sep 2016 22:45:58 -0700 Subject: [PATCH 1352/1715] tcp: implement TSQ for retransmits We saw sch_fq drops caused by the per flow limit of 100 packets and TCP when dealing with large cwnd and bursts of retransmits. Even after increasing the limit to 1000, and even after commit 10d3be569243 ("tcp-tso: do not split TSO packets at retransmit time"), we can still have these drops. Under certain conditions, TCP can spend a considerable amount of time queuing thousands of skbs in a single tcp_xmit_retransmit_queue() invocation, incurring latency spikes and stalls of other softirq handlers. This patch implements TSQ for retransmits, limiting number of packets and giving more chance for scheduling packets in both ways. Signed-off-by: Eric Dumazet Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 72 ++++++++++++++++++++++++++++--------------- 1 file changed, 47 insertions(+), 25 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 7d025a7804b5..478dfc539178 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -734,9 +734,16 @@ static void tcp_tsq_handler(struct sock *sk) { if ((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING | - TCPF_CLOSE_WAIT | TCPF_LAST_ACK)) - tcp_write_xmit(sk, tcp_current_mss(sk), tcp_sk(sk)->nonagle, + TCPF_CLOSE_WAIT | TCPF_LAST_ACK)) { + struct tcp_sock *tp = tcp_sk(sk); + + if (tp->lost_out > tp->retrans_out && + tp->snd_cwnd > tcp_packets_in_flight(tp)) + tcp_xmit_retransmit_queue(sk); + + tcp_write_xmit(sk, tcp_current_mss(sk), tp->nonagle, 0, GFP_ATOMIC); + } } /* * One tasklet per cpu tries to send more skbs. @@ -2039,6 +2046,39 @@ static int tcp_mtu_probe(struct sock *sk) return -1; } +/* TCP Small Queues : + * Control number of packets in qdisc/devices to two packets / or ~1 ms. + * (These limits are doubled for retransmits) + * This allows for : + * - better RTT estimation and ACK scheduling + * - faster recovery + * - high rates + * Alas, some drivers / subsystems require a fair amount + * of queued bytes to ensure line rate. + * One example is wifi aggregation (802.11 AMPDU) + */ +static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, + unsigned int factor) +{ + unsigned int limit; + + limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10); + limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes); + limit <<= factor; + + if (atomic_read(&sk->sk_wmem_alloc) > limit) { + set_bit(TSQ_THROTTLED, &tcp_sk(sk)->tsq_flags); + /* It is possible TX completion already happened + * before we set TSQ_THROTTLED, so we must + * test again the condition. + */ + smp_mb__after_atomic(); + if (atomic_read(&sk->sk_wmem_alloc) > limit) + return true; + } + return false; +} + /* This routine writes packets to the network. It advances the * send_head. This happens as incoming acks open up the remote * window for us. @@ -2125,29 +2165,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) break; - /* TCP Small Queues : - * Control number of packets in qdisc/devices to two packets / or ~1 ms. - * This allows for : - * - better RTT estimation and ACK scheduling - * - faster recovery - * - high rates - * Alas, some drivers / subsystems require a fair amount - * of queued bytes to ensure line rate. - * One example is wifi aggregation (802.11 AMPDU) - */ - limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10); - limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes); - - if (atomic_read(&sk->sk_wmem_alloc) > limit) { - set_bit(TSQ_THROTTLED, &tp->tsq_flags); - /* It is possible TX completion already happened - * before we set TSQ_THROTTLED, so we must - * test again the condition. - */ - smp_mb__after_atomic(); - if (atomic_read(&sk->sk_wmem_alloc) > limit) - break; - } + if (tcp_small_queue_check(sk, skb, 0)) + break; if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp))) break; @@ -2847,6 +2866,9 @@ begin_fwd: if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS)) continue; + if (tcp_small_queue_check(sk, skb, 1)) + return; + if (tcp_retransmit_skb(sk, skb, segs)) return; From 1bfecfca565c0505d04dbf5fdd3d2fbb951827c0 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Wed, 21 Sep 2016 12:19:42 +0300 Subject: [PATCH 1353/1715] net/mlx5e: Build RX SKB on demand For non-striding RQ configuration before this patch we had a ring with pre-allocated SKBs and mapped the SKB->data buffers for device. For robustness and better RX data buffers management, we allocate a page per packet and build_skb around it. This patch (which is a prerequisite for XDP) will actually reduce performance for normal stack usage, because we are now hitting a bottleneck in the page allocator. We use the page-cache to restore or even improve performance in comparison to the old RX scheme. Packet rate performance testing was done with pktgen 64B packets on xmit side and TC ingress dropping action on RX side. CPU: Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz Comparison is done between: 1.Baseline, before 'net/mlx5e: Build RX SKB on demand' 2.Build SKB with RX page cache (This patch) RX Cores Baseline Build SKB+page-cache Improvement ----------------------------------------------------------- 1 4.16Mpps 5.33Mpps 28% 2 7.16Mpps 10.24Mpps 43% 4 13.61Mpps 20.51Mpps 51% 8 25.32Mpps 32.00Mpps 26% All respective cores were 100% utilized. Signed-off-by: Saeed Mahameed Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 10 +- .../net/ethernet/mellanox/mlx5/core/en_main.c | 31 ++- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 221 +++++++++--------- 3 files changed, 136 insertions(+), 126 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 7dd4763e726e..4d06c1b9348b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -65,6 +65,8 @@ #define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW 0x3 #define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW 0x6 +#define MLX5_RX_HEADROOM NET_SKB_PAD + #define MLX5_MPWRQ_LOG_STRIDE_SIZE 6 /* >= 6, HW restriction */ #define MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS 8 /* >= 6, HW restriction */ #define MLX5_MPWRQ_LOG_WQE_SZ 18 @@ -302,10 +304,14 @@ struct mlx5e_page_cache { struct mlx5e_rq { /* data path */ struct mlx5_wq_ll wq; - u32 wqe_sz; - struct sk_buff **skb; + + struct mlx5e_dma_info *dma_info; struct mlx5e_mpw_info *wqe_info; void *mtt_no_align; + struct { + u8 page_order; + u32 wqe_sz; /* wqe data buffer size */ + } buff; __be32 mkey_be; struct device *pdev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 8595b507e200..d09588b54c38 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -408,6 +408,8 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, void *rqc = param->rqc; void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); u32 byte_count; + u32 frag_sz; + int npages; int wq_sz; int err; int i; @@ -442,29 +444,40 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, rq->mpwqe_stride_sz = BIT(priv->params.mpwqe_log_stride_sz); rq->mpwqe_num_strides = BIT(priv->params.mpwqe_log_num_strides); - rq->wqe_sz = rq->mpwqe_stride_sz * rq->mpwqe_num_strides; - byte_count = rq->wqe_sz; + + rq->buff.wqe_sz = rq->mpwqe_stride_sz * rq->mpwqe_num_strides; + byte_count = rq->buff.wqe_sz; rq->mkey_be = cpu_to_be32(c->priv->umr_mkey.key); err = mlx5e_rq_alloc_mpwqe_info(rq, c); if (err) goto err_rq_wq_destroy; break; default: /* MLX5_WQ_TYPE_LINKED_LIST */ - rq->skb = kzalloc_node(wq_sz * sizeof(*rq->skb), GFP_KERNEL, - cpu_to_node(c->cpu)); - if (!rq->skb) { + rq->dma_info = kzalloc_node(wq_sz * sizeof(*rq->dma_info), + GFP_KERNEL, cpu_to_node(c->cpu)); + if (!rq->dma_info) { err = -ENOMEM; goto err_rq_wq_destroy; } + rq->handle_rx_cqe = mlx5e_handle_rx_cqe; rq->alloc_wqe = mlx5e_alloc_rx_wqe; rq->dealloc_wqe = mlx5e_dealloc_rx_wqe; - rq->wqe_sz = (priv->params.lro_en) ? + rq->buff.wqe_sz = (priv->params.lro_en) ? priv->params.lro_wqe_sz : MLX5E_SW2HW_MTU(priv->netdev->mtu); - rq->wqe_sz = SKB_DATA_ALIGN(rq->wqe_sz); - byte_count = rq->wqe_sz; + byte_count = rq->buff.wqe_sz; + + /* calc the required page order */ + frag_sz = MLX5_RX_HEADROOM + + byte_count /* packet data */ + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + frag_sz = SKB_DATA_ALIGN(frag_sz); + + npages = DIV_ROUND_UP(frag_sz, PAGE_SIZE); + rq->buff.page_order = order_base_2(npages); + byte_count |= MLX5_HW_START_PADDING; rq->mkey_be = c->mkey_be; } @@ -499,7 +512,7 @@ static void mlx5e_destroy_rq(struct mlx5e_rq *rq) mlx5e_rq_free_mpwqe_info(rq); break; default: /* MLX5_WQ_TYPE_LINKED_LIST */ - kfree(rq->skb); + kfree(rq->dma_info); } for (i = rq->page_cache.head; i != rq->page_cache.tail; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index dc8677933f76..d017829b77eb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -179,50 +179,99 @@ unlock: mutex_unlock(&priv->state_lock); } -int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) -{ - struct sk_buff *skb; - dma_addr_t dma_addr; +#define RQ_PAGE_SIZE(rq) ((1 << rq->buff.page_order) << PAGE_SHIFT) - skb = napi_alloc_skb(rq->cq.napi, rq->wqe_sz); - if (unlikely(!skb)) +static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) +{ + struct mlx5e_page_cache *cache = &rq->page_cache; + u32 tail_next = (cache->tail + 1) & (MLX5E_CACHE_SIZE - 1); + + if (tail_next == cache->head) { + rq->stats.cache_full++; + return false; + } + + cache->page_cache[cache->tail] = *dma_info; + cache->tail = tail_next; + return true; +} + +static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) +{ + struct mlx5e_page_cache *cache = &rq->page_cache; + + if (unlikely(cache->head == cache->tail)) { + rq->stats.cache_empty++; + return false; + } + + if (page_ref_count(cache->page_cache[cache->head].page) != 1) { + rq->stats.cache_busy++; + return false; + } + + *dma_info = cache->page_cache[cache->head]; + cache->head = (cache->head + 1) & (MLX5E_CACHE_SIZE - 1); + rq->stats.cache_reuse++; + + dma_sync_single_for_device(rq->pdev, dma_info->addr, + RQ_PAGE_SIZE(rq), + DMA_FROM_DEVICE); + return true; +} + +static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) +{ + struct page *page; + + if (mlx5e_rx_cache_get(rq, dma_info)) + return 0; + + page = dev_alloc_pages(rq->buff.page_order); + if (unlikely(!page)) return -ENOMEM; - dma_addr = dma_map_single(rq->pdev, - /* hw start padding */ - skb->data, - /* hw end padding */ - rq->wqe_sz, - DMA_FROM_DEVICE); - - if (unlikely(dma_mapping_error(rq->pdev, dma_addr))) - goto err_free_skb; - - *((dma_addr_t *)skb->cb) = dma_addr; - wqe->data.addr = cpu_to_be64(dma_addr); - - rq->skb[ix] = skb; + dma_info->page = page; + dma_info->addr = dma_map_page(rq->pdev, page, 0, + RQ_PAGE_SIZE(rq), DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) { + put_page(page); + return -ENOMEM; + } return 0; +} -err_free_skb: - dev_kfree_skb(skb); +void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, + bool recycle) +{ + if (likely(recycle) && mlx5e_rx_cache_put(rq, dma_info)) + return; - return -ENOMEM; + dma_unmap_page(rq->pdev, dma_info->addr, RQ_PAGE_SIZE(rq), + DMA_FROM_DEVICE); + put_page(dma_info->page); +} + +int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) +{ + struct mlx5e_dma_info *di = &rq->dma_info[ix]; + + if (unlikely(mlx5e_page_alloc_mapped(rq, di))) + return -ENOMEM; + + wqe->data.addr = cpu_to_be64(di->addr + MLX5_RX_HEADROOM); + return 0; } void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix) { - struct sk_buff *skb = rq->skb[ix]; + struct mlx5e_dma_info *di = &rq->dma_info[ix]; - if (skb) { - rq->skb[ix] = NULL; - dma_unmap_single(rq->pdev, - *((dma_addr_t *)skb->cb), - rq->wqe_sz, - DMA_FROM_DEVICE); - dev_kfree_skb(skb); - } + mlx5e_page_release(rq, di, true); } static inline int mlx5e_mpwqe_strides_per_page(struct mlx5e_rq *rq) @@ -305,79 +354,6 @@ static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); } -static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, - struct mlx5e_dma_info *dma_info) -{ - struct mlx5e_page_cache *cache = &rq->page_cache; - u32 tail_next = (cache->tail + 1) & (MLX5E_CACHE_SIZE - 1); - - if (tail_next == cache->head) { - rq->stats.cache_full++; - return false; - } - - cache->page_cache[cache->tail] = *dma_info; - cache->tail = tail_next; - return true; -} - -static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq, - struct mlx5e_dma_info *dma_info) -{ - struct mlx5e_page_cache *cache = &rq->page_cache; - - if (unlikely(cache->head == cache->tail)) { - rq->stats.cache_empty++; - return false; - } - - if (page_ref_count(cache->page_cache[cache->head].page) != 1) { - rq->stats.cache_busy++; - return false; - } - - *dma_info = cache->page_cache[cache->head]; - cache->head = (cache->head + 1) & (MLX5E_CACHE_SIZE - 1); - rq->stats.cache_reuse++; - - dma_sync_single_for_device(rq->pdev, dma_info->addr, PAGE_SIZE, - DMA_FROM_DEVICE); - return true; -} - -static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq, - struct mlx5e_dma_info *dma_info) -{ - struct page *page; - - if (mlx5e_rx_cache_get(rq, dma_info)) - return 0; - - page = dev_alloc_page(); - if (unlikely(!page)) - return -ENOMEM; - - dma_info->page = page; - dma_info->addr = dma_map_page(rq->pdev, page, 0, PAGE_SIZE, - DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) { - put_page(page); - return -ENOMEM; - } - - return 0; -} - -void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, - bool recycle) -{ - if (likely(recycle) && mlx5e_rx_cache_put(rq, dma_info)) - return; - - dma_unmap_page(rq->pdev, dma_info->addr, PAGE_SIZE, DMA_FROM_DEVICE); - put_page(dma_info->page); -} - static int mlx5e_alloc_rx_umr_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) @@ -448,7 +424,7 @@ void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq) mlx5_wq_ll_update_db_record(wq); } -int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) +int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) { int err; @@ -658,31 +634,46 @@ static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq, void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) { + struct mlx5e_dma_info *di; struct mlx5e_rx_wqe *wqe; - struct sk_buff *skb; __be16 wqe_counter_be; + struct sk_buff *skb; u16 wqe_counter; u32 cqe_bcnt; + void *va; wqe_counter_be = cqe->wqe_counter; wqe_counter = be16_to_cpu(wqe_counter_be); wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter); - skb = rq->skb[wqe_counter]; - prefetch(skb->data); - rq->skb[wqe_counter] = NULL; + di = &rq->dma_info[wqe_counter]; + va = page_address(di->page); - dma_unmap_single(rq->pdev, - *((dma_addr_t *)skb->cb), - rq->wqe_sz, - DMA_FROM_DEVICE); + dma_sync_single_range_for_cpu(rq->pdev, + di->addr, + MLX5_RX_HEADROOM, + rq->buff.wqe_sz, + DMA_FROM_DEVICE); + prefetch(va + MLX5_RX_HEADROOM); if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { rq->stats.wqe_err++; - dev_kfree_skb(skb); + mlx5e_page_release(rq, di, true); goto wq_ll_pop; } + skb = build_skb(va, RQ_PAGE_SIZE(rq)); + if (unlikely(!skb)) { + rq->stats.buff_alloc_err++; + mlx5e_page_release(rq, di, true); + goto wq_ll_pop; + } + + /* queue up for recycling ..*/ + page_ref_inc(di->page); + mlx5e_page_release(rq, di, true); + cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + skb_reserve(skb, MLX5_RX_HEADROOM); skb_put(skb, cqe_bcnt); mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); From 21c59685dd176dd6b2c4fc5e18dc65730cfd546a Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Wed, 21 Sep 2016 12:19:43 +0300 Subject: [PATCH 1354/1715] net/mlx5e: Union RQ RX info per RQ type We have two types of RX RQs, and they use two separate sets of info arrays and structures in RX data path function. Today those structures are mutually exclusive per RQ type, hence one kind is allocated on RQ creation according to the RQ type. For better cache locality and to minimalize the sizeof(struct mlx5e_rq), in this patch we define them as a union. Signed-off-by: Saeed Mahameed Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 14 +++++--- .../net/ethernet/mellanox/mlx5/core/en_main.c | 32 +++++++++---------- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 10 +++--- 3 files changed, 30 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 4d06c1b9348b..e3331237ce0e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -305,9 +305,14 @@ struct mlx5e_rq { /* data path */ struct mlx5_wq_ll wq; - struct mlx5e_dma_info *dma_info; - struct mlx5e_mpw_info *wqe_info; - void *mtt_no_align; + union { + struct mlx5e_dma_info *dma_info; + struct { + struct mlx5e_mpw_info *info; + void *mtt_no_align; + u32 mtt_offset; + } mpwqe; + }; struct { u8 page_order; u32 wqe_sz; /* wqe data buffer size */ @@ -327,7 +332,6 @@ struct mlx5e_rq { unsigned long state; int ix; - u32 mpwqe_mtt_offset; struct mlx5e_rx_am am; /* Adaptive Moderation */ @@ -770,7 +774,7 @@ static inline void mlx5e_cq_arm(struct mlx5e_cq *cq) static inline u32 mlx5e_get_wqe_mtt_offset(struct mlx5e_rq *rq, u16 wqe_ix) { - return rq->mpwqe_mtt_offset + + return rq->mpwqe.mtt_offset + wqe_ix * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index d09588b54c38..f2efa532f453 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -314,7 +314,7 @@ static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, struct mlx5e_sq *sq, struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl; struct mlx5_wqe_data_seg *dseg = &wqe->data; - struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix]; u8 ds_cnt = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS); u32 umr_wqe_mtt_offset = mlx5e_get_wqe_mtt_offset(rq, ix); @@ -342,21 +342,21 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int mtt_alloc = mtt_sz + MLX5_UMR_ALIGN - 1; int i; - rq->wqe_info = kzalloc_node(wq_sz * sizeof(*rq->wqe_info), - GFP_KERNEL, cpu_to_node(c->cpu)); - if (!rq->wqe_info) + rq->mpwqe.info = kzalloc_node(wq_sz * sizeof(*rq->mpwqe.info), + GFP_KERNEL, cpu_to_node(c->cpu)); + if (!rq->mpwqe.info) goto err_out; /* We allocate more than mtt_sz as we will align the pointer */ - rq->mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, GFP_KERNEL, + rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, GFP_KERNEL, cpu_to_node(c->cpu)); - if (unlikely(!rq->mtt_no_align)) + if (unlikely(!rq->mpwqe.mtt_no_align)) goto err_free_wqe_info; for (i = 0; i < wq_sz; i++) { - struct mlx5e_mpw_info *wi = &rq->wqe_info[i]; + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[i]; - wi->umr.mtt = PTR_ALIGN(rq->mtt_no_align + i * mtt_alloc, + wi->umr.mtt = PTR_ALIGN(rq->mpwqe.mtt_no_align + i * mtt_alloc, MLX5_UMR_ALIGN); wi->umr.mtt_addr = dma_map_single(c->pdev, wi->umr.mtt, mtt_sz, PCI_DMA_TODEVICE); @@ -370,14 +370,14 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, err_unmap_mtts: while (--i >= 0) { - struct mlx5e_mpw_info *wi = &rq->wqe_info[i]; + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[i]; dma_unmap_single(c->pdev, wi->umr.mtt_addr, mtt_sz, PCI_DMA_TODEVICE); } - kfree(rq->mtt_no_align); + kfree(rq->mpwqe.mtt_no_align); err_free_wqe_info: - kfree(rq->wqe_info); + kfree(rq->mpwqe.info); err_out: return -ENOMEM; @@ -390,13 +390,13 @@ static void mlx5e_rq_free_mpwqe_info(struct mlx5e_rq *rq) int i; for (i = 0; i < wq_sz; i++) { - struct mlx5e_mpw_info *wi = &rq->wqe_info[i]; + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[i]; dma_unmap_single(rq->pdev, wi->umr.mtt_addr, mtt_sz, PCI_DMA_TODEVICE); } - kfree(rq->mtt_no_align); - kfree(rq->wqe_info); + kfree(rq->mpwqe.mtt_no_align); + kfree(rq->mpwqe.info); } static int mlx5e_create_rq(struct mlx5e_channel *c, @@ -439,7 +439,7 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, rq->alloc_wqe = mlx5e_alloc_rx_mpwqe; rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe; - rq->mpwqe_mtt_offset = c->ix * + rq->mpwqe.mtt_offset = c->ix * MLX5E_REQUIRED_MTTS(1, BIT(priv->params.log_rq_size)); rq->mpwqe_stride_sz = BIT(priv->params.mpwqe_log_stride_sz); @@ -654,7 +654,7 @@ static void mlx5e_free_rx_descs(struct mlx5e_rq *rq) /* UMR WQE (if in progress) is always at wq->head */ if (test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state)) - mlx5e_free_rx_mpwqe(rq, &rq->wqe_info[wq->head]); + mlx5e_free_rx_mpwqe(rq, &rq->mpwqe.info[wq->head]); while (!mlx5_wq_ll_is_empty(wq)) { wqe_ix_be = *wq->tail_next; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index d017829b77eb..a403a797ebef 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -328,7 +328,7 @@ mlx5e_copy_skb_header_mpwqe(struct device *pdev, static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) { - struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix]; struct mlx5e_sq *sq = &rq->channel->icosq; struct mlx5_wq_cyc *wq = &sq->wq; struct mlx5e_umr_wqe *wqe; @@ -358,7 +358,7 @@ static int mlx5e_alloc_rx_umr_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) { - struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix]; u64 dma_offset = (u64)mlx5e_get_wqe_mtt_offset(rq, ix) << PAGE_SHIFT; int pg_strides = mlx5e_mpwqe_strides_per_page(rq); int err; @@ -412,7 +412,7 @@ void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq) clear_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state); if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH, &rq->state))) { - mlx5e_free_rx_mpwqe(rq, &rq->wqe_info[wq->head]); + mlx5e_free_rx_mpwqe(rq, &rq->mpwqe.info[wq->head]); return; } @@ -438,7 +438,7 @@ int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) { - struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix]; mlx5e_free_rx_mpwqe(rq, wi); } @@ -725,7 +725,7 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) { u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe); u16 wqe_id = be16_to_cpu(cqe->wqe_id); - struct mlx5e_mpw_info *wi = &rq->wqe_info[wqe_id]; + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[wqe_id]; struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_id); struct sk_buff *skb; u16 cqe_bcnt; From e4b85508072b32682ba84df32cac5cf6a4f6178e Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Wed, 21 Sep 2016 12:19:44 +0300 Subject: [PATCH 1355/1715] net/mlx5e: Slightly reduce hardware LRO size Before this patch LRO size was 64K, now with build_skb requires extra room, headroom + sizeof(skb_shared_info) added to the data buffer will make wqe size or page_frag_size slightly larger than 64K which will demand order 5 page instead of order 4 in 4K page systems. We take those extra bytes from hardware LRO data size in order to not increase the required page order for when hardware LRO is enabled. Signed-off-by: Saeed Mahameed Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index f2efa532f453..8734240865e1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3187,8 +3187,11 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, mlx5e_build_default_indir_rqt(mdev, priv->params.indirection_rqt, MLX5E_INDIR_RQT_SIZE, profile->max_nch(mdev)); - priv->params.lro_wqe_sz = - MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; + priv->params.lro_wqe_sz = + MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ - + /* Extra room needed for build_skb */ + MLX5_RX_HEADROOM - + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); /* Initialize pflags */ MLX5E_SET_PRIV_FLAG(priv, MLX5E_PFLAG_RX_CQE_BASED_MODER, From 2fc4bfb7250d79ee4e58c1d5bca257687e9f5e53 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Wed, 21 Sep 2016 12:19:45 +0300 Subject: [PATCH 1356/1715] net/mlx5e: Dynamic RQ type infrastructure Add two helper functions to allow dynamic changes of RQ type. mlx5e_set_rq_priv_params and mlx5e_set_rq_type_params will be used on netdev creation to determine the default RQ type. This will be needed later for downstream patches of XDP support. When enabling XDP we will dynamically move from striding RQ to linked list RQ type. Signed-off-by: Saeed Mahameed Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/en_main.c | 92 ++++++++++--------- 1 file changed, 50 insertions(+), 42 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 8734240865e1..ff520ad27f5e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -69,6 +69,47 @@ struct mlx5e_channel_param { struct mlx5e_cq_param icosq_cq; }; +static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) +{ + return MLX5_CAP_GEN(mdev, striding_rq) && + MLX5_CAP_GEN(mdev, umr_ptr_rlky) && + MLX5_CAP_ETH(mdev, reg_umr_sq); +} + +static void mlx5e_set_rq_type_params(struct mlx5e_priv *priv, u8 rq_type) +{ + priv->params.rq_wq_type = rq_type; + switch (priv->params.rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW; + priv->params.mpwqe_log_stride_sz = priv->params.rx_cqe_compress ? + MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS : + MLX5_MPWRQ_LOG_STRIDE_SIZE; + priv->params.mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - + priv->params.mpwqe_log_stride_sz; + break; + default: /* MLX5_WQ_TYPE_LINKED_LIST */ + priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; + } + priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type, + BIT(priv->params.log_rq_size)); + + mlx5_core_info(priv->mdev, + "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n", + priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ, + BIT(priv->params.log_rq_size), + BIT(priv->params.mpwqe_log_stride_sz), + priv->params.rx_cqe_compress_admin); +} + +static void mlx5e_set_rq_priv_params(struct mlx5e_priv *priv) +{ + u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(priv->mdev) ? + MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : + MLX5_WQ_TYPE_LINKED_LIST; + mlx5e_set_rq_type_params(priv, rq_type); +} + static void mlx5e_update_carrier(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; @@ -3038,13 +3079,6 @@ void mlx5e_build_default_indir_rqt(struct mlx5_core_dev *mdev, indirection_rqt[i] = i % num_channels; } -static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) -{ - return MLX5_CAP_GEN(mdev, striding_rq) && - MLX5_CAP_GEN(mdev, umr_ptr_rlky) && - MLX5_CAP_ETH(mdev, reg_umr_sq); -} - static int mlx5e_get_pci_bw(struct mlx5_core_dev *mdev, u32 *pci_bw) { enum pcie_link_width width; @@ -3124,11 +3158,13 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, MLX5_CQ_PERIOD_MODE_START_FROM_CQE : MLX5_CQ_PERIOD_MODE_START_FROM_EQE; - priv->params.log_sq_size = - MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; - priv->params.rq_wq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) ? - MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : - MLX5_WQ_TYPE_LINKED_LIST; + priv->mdev = mdev; + priv->netdev = netdev; + priv->params.num_channels = profile->max_nch(mdev); + priv->profile = profile; + priv->ppriv = ppriv; + + priv->params.log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; /* set CQE compression */ priv->params.rx_cqe_compress_admin = false; @@ -3141,33 +3177,11 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, priv->params.rx_cqe_compress_admin = cqe_compress_heuristic(link_speed, pci_bw); } - priv->params.rx_cqe_compress = priv->params.rx_cqe_compress_admin; - switch (priv->params.rq_wq_type) { - case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW; - priv->params.mpwqe_log_stride_sz = - priv->params.rx_cqe_compress ? - MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS : - MLX5_MPWRQ_LOG_STRIDE_SIZE; - priv->params.mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - - priv->params.mpwqe_log_stride_sz; + mlx5e_set_rq_priv_params(priv); + if (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) priv->params.lro_en = true; - break; - default: /* MLX5_WQ_TYPE_LINKED_LIST */ - priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; - } - - mlx5_core_info(mdev, - "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n", - priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ, - BIT(priv->params.log_rq_size), - BIT(priv->params.mpwqe_log_stride_sz), - priv->params.rx_cqe_compress_admin); - - priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type, - BIT(priv->params.log_rq_size)); priv->params.rx_am_enabled = MLX5_CAP_GEN(mdev, cq_moderation); mlx5e_set_rx_cq_mode_params(&priv->params, cq_period_mode); @@ -3197,12 +3211,6 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, MLX5E_SET_PRIV_FLAG(priv, MLX5E_PFLAG_RX_CQE_BASED_MODER, priv->params.rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE); - priv->mdev = mdev; - priv->netdev = netdev; - priv->params.num_channels = profile->max_nch(mdev); - priv->profile = profile; - priv->ppriv = ppriv; - #ifdef CONFIG_MLX5_CORE_EN_DCB mlx5e_ets_init(priv); #endif From 86994156c736978d113e7927455d4eeeb2128b9f Mon Sep 17 00:00:00 2001 From: Rana Shahout Date: Wed, 21 Sep 2016 12:19:46 +0300 Subject: [PATCH 1357/1715] net/mlx5e: XDP fast RX drop bpf programs support Add support for the BPF_PROG_TYPE_PHYS_DEV hook in mlx5e driver. When XDP is on we make sure to change channels RQs type to MLX5_WQ_TYPE_LINKED_LIST rather than "striding RQ" type to ensure "page per packet". On XDP set, we fail if HW LRO is set and request from user to turn it off. Since on ConnectX4-LX HW LRO is always on by default, this will be annoying, but we prefer not to enforce LRO off from XDP set function. Full channels reset (close/open) is required only when setting XDP on/off. When XDP set is called just to exchange programs, we will update each RQ xdp program on the fly and for synchronization with current data path RX activity of that RQ, we temporally disable that RQ and ensure RX path is not running, quickly update and re-enable that RQ, for that we do: - rq.state = disabled - napi_synnchronize - xchg(rq->xdp_prg) - rq.state = enabled - napi_schedule // Just in case we've missed an IRQ Packet rate performance testing was done with pktgen 64B packets and on TX side and, TC drop action on RX side compared to XDP fast drop. CPU: Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz Comparison is done between: 1. Baseline, Before this patch with TC drop action 2. This patch with TC drop action 3. This patch with XDP RX fast drop RX Cores Baseline(TC drop) TC drop XDP fast Drop -------------------------------------------------------------- 1 5.3Mpps 5.3Mpps 16.5Mpps 2 10.2Mpps 10.2Mpps 31.3Mpps 4 20.5Mpps 19.9Mpps 36.3Mpps* *My xmitter was limited to 36.3Mpps, so it is the bottleneck. It seems that receive side can handle more. Signed-off-by: Rana Shahout Signed-off-by: Saeed Mahameed Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 + .../net/ethernet/mellanox/mlx5/core/en_main.c | 100 +++++++++++++++++- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 26 ++++- .../ethernet/mellanox/mlx5/core/en_stats.h | 4 + 4 files changed, 130 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index e3331237ce0e..5e8e669f69c0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -334,6 +334,7 @@ struct mlx5e_rq { int ix; struct mlx5e_rx_am am; /* Adaptive Moderation */ + struct bpf_prog *xdp_prog; /* control */ struct mlx5_wq_ctrl wq_ctrl; @@ -627,6 +628,7 @@ struct mlx5e_priv { /* priv data path fields - start */ struct mlx5e_sq **txq_to_sq_map; int channeltc_to_txq_map[MLX5E_MAX_NUM_CHANNELS][MLX5E_MAX_NUM_TC]; + struct bpf_prog *xdp_prog; /* priv data path fields - end */ unsigned long state; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index ff520ad27f5e..6e95a16226f8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -34,6 +34,7 @@ #include #include #include +#include #include "en.h" #include "en_tc.h" #include "eswitch.h" @@ -104,7 +105,8 @@ static void mlx5e_set_rq_type_params(struct mlx5e_priv *priv, u8 rq_type) static void mlx5e_set_rq_priv_params(struct mlx5e_priv *priv) { - u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(priv->mdev) ? + u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(priv->mdev) && + !priv->xdp_prog ? MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : MLX5_WQ_TYPE_LINKED_LIST; mlx5e_set_rq_type_params(priv, rq_type); @@ -177,6 +179,7 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) s->rx_csum_none += rq_stats->csum_none; s->rx_csum_complete += rq_stats->csum_complete; s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner; + s->rx_xdp_drop += rq_stats->xdp_drop; s->rx_wqe_err += rq_stats->wqe_err; s->rx_mpwqe_filler += rq_stats->mpwqe_filler; s->rx_buff_alloc_err += rq_stats->buff_alloc_err; @@ -473,6 +476,7 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, rq->channel = c; rq->ix = c->ix; rq->priv = c->priv; + rq->xdp_prog = priv->xdp_prog; switch (priv->params.rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: @@ -536,6 +540,9 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, rq->page_cache.head = 0; rq->page_cache.tail = 0; + if (rq->xdp_prog) + bpf_prog_add(rq->xdp_prog, 1); + return 0; err_rq_wq_destroy: @@ -548,6 +555,9 @@ static void mlx5e_destroy_rq(struct mlx5e_rq *rq) { int i; + if (rq->xdp_prog) + bpf_prog_put(rq->xdp_prog); + switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: mlx5e_rq_free_mpwqe_info(rq); @@ -2955,6 +2965,92 @@ static void mlx5e_tx_timeout(struct net_device *dev) schedule_work(&priv->tx_timeout_work); } +static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct bpf_prog *old_prog; + int err = 0; + bool reset, was_opened; + int i; + + mutex_lock(&priv->state_lock); + + if ((netdev->features & NETIF_F_LRO) && prog) { + netdev_warn(netdev, "can't set XDP while LRO is on, disable LRO first\n"); + err = -EINVAL; + goto unlock; + } + + was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); + /* no need for full reset when exchanging programs */ + reset = (!priv->xdp_prog || !prog); + + if (was_opened && reset) + mlx5e_close_locked(netdev); + + /* exchange programs */ + old_prog = xchg(&priv->xdp_prog, prog); + if (prog) + bpf_prog_add(prog, 1); + if (old_prog) + bpf_prog_put(old_prog); + + if (reset) /* change RQ type according to priv->xdp_prog */ + mlx5e_set_rq_priv_params(priv); + + if (was_opened && reset) + mlx5e_open_locked(netdev); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || reset) + goto unlock; + + /* exchanging programs w/o reset, we update ref counts on behalf + * of the channels RQs here. + */ + bpf_prog_add(prog, priv->params.num_channels); + for (i = 0; i < priv->params.num_channels; i++) { + struct mlx5e_channel *c = priv->channel[i]; + + set_bit(MLX5E_RQ_STATE_FLUSH, &c->rq.state); + napi_synchronize(&c->napi); + /* prevent mlx5e_poll_rx_cq from accessing rq->xdp_prog */ + + old_prog = xchg(&c->rq.xdp_prog, prog); + + clear_bit(MLX5E_RQ_STATE_FLUSH, &c->rq.state); + /* napi_schedule in case we have missed anything */ + set_bit(MLX5E_CHANNEL_NAPI_SCHED, &c->flags); + napi_schedule(&c->napi); + + if (old_prog) + bpf_prog_put(old_prog); + } + +unlock: + mutex_unlock(&priv->state_lock); + return err; +} + +static bool mlx5e_xdp_attached(struct net_device *dev) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return !!priv->xdp_prog; +} + +static int mlx5e_xdp(struct net_device *dev, struct netdev_xdp *xdp) +{ + switch (xdp->command) { + case XDP_SETUP_PROG: + return mlx5e_xdp_set(dev, xdp->prog); + case XDP_QUERY_PROG: + xdp->prog_attached = mlx5e_xdp_attached(dev); + return 0; + default: + return -EINVAL; + } +} + static const struct net_device_ops mlx5e_netdev_ops_basic = { .ndo_open = mlx5e_open, .ndo_stop = mlx5e_close, @@ -2974,6 +3070,7 @@ static const struct net_device_ops mlx5e_netdev_ops_basic = { .ndo_rx_flow_steer = mlx5e_rx_flow_steer, #endif .ndo_tx_timeout = mlx5e_tx_timeout, + .ndo_xdp = mlx5e_xdp, }; static const struct net_device_ops mlx5e_netdev_ops_sriov = { @@ -3005,6 +3102,7 @@ static const struct net_device_ops mlx5e_netdev_ops_sriov = { .ndo_set_vf_link_state = mlx5e_set_vf_link_state, .ndo_get_vf_stats = mlx5e_get_vf_stats, .ndo_tx_timeout = mlx5e_tx_timeout, + .ndo_xdp = mlx5e_xdp, }; static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index a403a797ebef..96f6317a95ea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -632,8 +632,20 @@ static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq, napi_gro_receive(rq->cq.napi, skb); } +static inline enum xdp_action mlx5e_xdp_handle(struct mlx5e_rq *rq, + const struct bpf_prog *prog, + void *data, u32 len) +{ + struct xdp_buff xdp; + + xdp.data = data; + xdp.data_end = xdp.data + len; + return bpf_prog_run_xdp(prog, &xdp); +} + void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) { + struct bpf_prog *xdp_prog = READ_ONCE(rq->xdp_prog); struct mlx5e_dma_info *di; struct mlx5e_rx_wqe *wqe; __be16 wqe_counter_be; @@ -654,6 +666,7 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) rq->buff.wqe_sz, DMA_FROM_DEVICE); prefetch(va + MLX5_RX_HEADROOM); + cqe_bcnt = be32_to_cpu(cqe->byte_cnt); if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { rq->stats.wqe_err++; @@ -661,6 +674,18 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) goto wq_ll_pop; } + if (xdp_prog) { + enum xdp_action act = + mlx5e_xdp_handle(rq, xdp_prog, va + MLX5_RX_HEADROOM, + cqe_bcnt); + + if (act != XDP_PASS) { + rq->stats.xdp_drop++; + mlx5e_page_release(rq, di, true); + goto wq_ll_pop; + } + } + skb = build_skb(va, RQ_PAGE_SIZE(rq)); if (unlikely(!skb)) { rq->stats.buff_alloc_err++; @@ -672,7 +697,6 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) page_ref_inc(di->page); mlx5e_page_release(rq, di, true); - cqe_bcnt = be32_to_cpu(cqe->byte_cnt); skb_reserve(skb, MLX5_RX_HEADROOM); skb_put(skb, cqe_bcnt); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 6af8d79e8c2a..084d6c893a6e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -65,6 +65,7 @@ struct mlx5e_sw_stats { u64 rx_csum_none; u64 rx_csum_complete; u64 rx_csum_unnecessary_inner; + u64 rx_xdp_drop; u64 tx_csum_partial; u64 tx_csum_partial_inner; u64 tx_queue_stopped; @@ -100,6 +101,7 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary_inner) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_drop) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial_inner) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_stopped) }, @@ -278,6 +280,7 @@ struct mlx5e_rq_stats { u64 csum_none; u64 lro_packets; u64 lro_bytes; + u64 xdp_drop; u64 wqe_err; u64 mpwqe_filler; u64 buff_alloc_err; @@ -295,6 +298,7 @@ static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_drop) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_packets) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_bytes) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) }, From f10b7cc7707f7d598e3ddacd848080b18ba4cbff Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Wed, 21 Sep 2016 12:19:47 +0300 Subject: [PATCH 1358/1715] net/mlx5e: Have a clear separation between different SQ types Make a clear separate between Regular SQ (TXQ) and ICO SQ creation, destruction and union their mutual information structures. Don't allocate redundant TXQ skb/wqe_info/dma_fifo arrays for ICO SQ. And have a different SQ edge for ICO SQ than TXQ SQ, to be more accurate. In preparation for XDP TX support. Signed-off-by: Saeed Mahameed Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 23 +++- .../net/ethernet/mellanox/mlx5/core/en_main.c | 121 ++++++++++++------ .../net/ethernet/mellanox/mlx5/core/en_rx.c | 8 +- .../net/ethernet/mellanox/mlx5/core/en_tx.c | 30 +++-- .../net/ethernet/mellanox/mlx5/core/en_txrx.c | 2 +- 5 files changed, 120 insertions(+), 64 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 5e8e669f69c0..5917f5e609ae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -101,6 +101,9 @@ #define MLX5E_UPDATE_STATS_INTERVAL 200 /* msecs */ #define MLX5E_SQ_BF_BUDGET 16 +#define MLX5E_ICOSQ_MAX_WQEBBS \ + (DIV_ROUND_UP(sizeof(struct mlx5e_umr_wqe), MLX5_SEND_WQE_BB)) + #define MLX5E_NUM_MAIN_GROUPS 9 static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size) @@ -386,6 +389,11 @@ struct mlx5e_ico_wqe_info { u8 num_wqebbs; }; +enum mlx5e_sq_type { + MLX5E_SQ_TXQ, + MLX5E_SQ_ICO +}; + struct mlx5e_sq { /* data path */ @@ -403,10 +411,15 @@ struct mlx5e_sq { struct mlx5e_cq cq; - /* pointers to per packet info: write@xmit, read@completion */ - struct sk_buff **skb; - struct mlx5e_sq_dma *dma_fifo; - struct mlx5e_tx_wqe_info *wqe_info; + /* pointers to per tx element info: write@xmit, read@completion */ + union { + struct { + struct sk_buff **skb; + struct mlx5e_sq_dma *dma_fifo; + struct mlx5e_tx_wqe_info *wqe_info; + } txq; + struct mlx5e_ico_wqe_info *ico_wqe; + } db; /* read only */ struct mlx5_wq_cyc wq; @@ -428,8 +441,8 @@ struct mlx5e_sq { struct mlx5_uar uar; struct mlx5e_channel *channel; int tc; - struct mlx5e_ico_wqe_info *ico_wqe_info; u32 rate_limit; + u8 type; } ____cacheline_aligned_in_smp; static inline bool mlx5e_sq_has_room_for(struct mlx5e_sq *sq, u16 n) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 6e95a16226f8..632de09e69cf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -51,7 +51,7 @@ struct mlx5e_sq_param { struct mlx5_wq_param wq; u16 max_inline; u8 min_inline_mode; - bool icosq; + enum mlx5e_sq_type type; }; struct mlx5e_cq_param { @@ -740,8 +740,8 @@ static int mlx5e_open_rq(struct mlx5e_channel *c, if (param->am_enabled) set_bit(MLX5E_RQ_STATE_AM, &c->rq.state); - sq->ico_wqe_info[pi].opcode = MLX5_OPCODE_NOP; - sq->ico_wqe_info[pi].num_wqebbs = 1; + sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP; + sq->db.ico_wqe[pi].num_wqebbs = 1; mlx5e_send_nop(sq, true); /* trigger mlx5e_post_rx_wqes() */ return 0; @@ -765,26 +765,43 @@ static void mlx5e_close_rq(struct mlx5e_rq *rq) mlx5e_destroy_rq(rq); } -static void mlx5e_free_sq_db(struct mlx5e_sq *sq) +static void mlx5e_free_sq_ico_db(struct mlx5e_sq *sq) { - kfree(sq->wqe_info); - kfree(sq->dma_fifo); - kfree(sq->skb); + kfree(sq->db.ico_wqe); } -static int mlx5e_alloc_sq_db(struct mlx5e_sq *sq, int numa) +static int mlx5e_alloc_sq_ico_db(struct mlx5e_sq *sq, int numa) +{ + u8 wq_sz = mlx5_wq_cyc_get_size(&sq->wq); + + sq->db.ico_wqe = kzalloc_node(sizeof(*sq->db.ico_wqe) * wq_sz, + GFP_KERNEL, numa); + if (!sq->db.ico_wqe) + return -ENOMEM; + + return 0; +} + +static void mlx5e_free_sq_txq_db(struct mlx5e_sq *sq) +{ + kfree(sq->db.txq.wqe_info); + kfree(sq->db.txq.dma_fifo); + kfree(sq->db.txq.skb); +} + +static int mlx5e_alloc_sq_txq_db(struct mlx5e_sq *sq, int numa) { int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); int df_sz = wq_sz * MLX5_SEND_WQEBB_NUM_DS; - sq->skb = kzalloc_node(wq_sz * sizeof(*sq->skb), GFP_KERNEL, numa); - sq->dma_fifo = kzalloc_node(df_sz * sizeof(*sq->dma_fifo), GFP_KERNEL, - numa); - sq->wqe_info = kzalloc_node(wq_sz * sizeof(*sq->wqe_info), GFP_KERNEL, - numa); - - if (!sq->skb || !sq->dma_fifo || !sq->wqe_info) { - mlx5e_free_sq_db(sq); + sq->db.txq.skb = kzalloc_node(wq_sz * sizeof(*sq->db.txq.skb), + GFP_KERNEL, numa); + sq->db.txq.dma_fifo = kzalloc_node(df_sz * sizeof(*sq->db.txq.dma_fifo), + GFP_KERNEL, numa); + sq->db.txq.wqe_info = kzalloc_node(wq_sz * sizeof(*sq->db.txq.wqe_info), + GFP_KERNEL, numa); + if (!sq->db.txq.skb || !sq->db.txq.dma_fifo || !sq->db.txq.wqe_info) { + mlx5e_free_sq_txq_db(sq); return -ENOMEM; } @@ -793,6 +810,30 @@ static int mlx5e_alloc_sq_db(struct mlx5e_sq *sq, int numa) return 0; } +static void mlx5e_free_sq_db(struct mlx5e_sq *sq) +{ + switch (sq->type) { + case MLX5E_SQ_TXQ: + mlx5e_free_sq_txq_db(sq); + break; + case MLX5E_SQ_ICO: + mlx5e_free_sq_ico_db(sq); + break; + } +} + +static int mlx5e_alloc_sq_db(struct mlx5e_sq *sq, int numa) +{ + switch (sq->type) { + case MLX5E_SQ_TXQ: + return mlx5e_alloc_sq_txq_db(sq, numa); + case MLX5E_SQ_ICO: + return mlx5e_alloc_sq_ico_db(sq, numa); + } + + return 0; +} + static int mlx5e_create_sq(struct mlx5e_channel *c, int tc, struct mlx5e_sq_param *param, @@ -803,8 +844,16 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, void *sqc = param->sqc; void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq); + u16 sq_max_wqebbs; int err; + sq->type = param->type; + sq->pdev = c->pdev; + sq->tstamp = &priv->tstamp; + sq->mkey_be = c->mkey_be; + sq->channel = c; + sq->tc = tc; + err = mlx5_alloc_map_uar(mdev, &sq->uar, !!MLX5_CAP_GEN(mdev, bf)); if (err) return err; @@ -833,18 +882,8 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, if (err) goto err_sq_wq_destroy; - if (param->icosq) { - u8 wq_sz = mlx5_wq_cyc_get_size(&sq->wq); - - sq->ico_wqe_info = kzalloc_node(sizeof(*sq->ico_wqe_info) * - wq_sz, - GFP_KERNEL, - cpu_to_node(c->cpu)); - if (!sq->ico_wqe_info) { - err = -ENOMEM; - goto err_free_sq_db; - } - } else { + sq_max_wqebbs = MLX5_SEND_WQE_MAX_WQEBBS; + if (sq->type == MLX5E_SQ_TXQ) { int txq_ix; txq_ix = c->ix + tc * priv->params.num_channels; @@ -852,19 +891,14 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, priv->txq_to_sq_map[txq_ix] = sq; } - sq->pdev = c->pdev; - sq->tstamp = &priv->tstamp; - sq->mkey_be = c->mkey_be; - sq->channel = c; - sq->tc = tc; - sq->edge = (sq->wq.sz_m1 + 1) - MLX5_SEND_WQE_MAX_WQEBBS; + if (sq->type == MLX5E_SQ_ICO) + sq_max_wqebbs = MLX5E_ICOSQ_MAX_WQEBBS; + + sq->edge = (sq->wq.sz_m1 + 1) - sq_max_wqebbs; sq->bf_budget = MLX5E_SQ_BF_BUDGET; return 0; -err_free_sq_db: - mlx5e_free_sq_db(sq); - err_sq_wq_destroy: mlx5_wq_destroy(&sq->wq_ctrl); @@ -879,7 +913,6 @@ static void mlx5e_destroy_sq(struct mlx5e_sq *sq) struct mlx5e_channel *c = sq->channel; struct mlx5e_priv *priv = c->priv; - kfree(sq->ico_wqe_info); mlx5e_free_sq_db(sq); mlx5_wq_destroy(&sq->wq_ctrl); mlx5_unmap_free_uar(priv->mdev, &sq->uar); @@ -908,11 +941,12 @@ static int mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param) memcpy(sqc, param->sqc, sizeof(param->sqc)); - MLX5_SET(sqc, sqc, tis_num_0, param->icosq ? 0 : priv->tisn[sq->tc]); + MLX5_SET(sqc, sqc, tis_num_0, param->type == MLX5E_SQ_ICO ? + 0 : priv->tisn[sq->tc]); MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn); MLX5_SET(sqc, sqc, min_wqe_inline_mode, sq->min_inline_mode); MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); - MLX5_SET(sqc, sqc, tis_lst_sz, param->icosq ? 0 : 1); + MLX5_SET(sqc, sqc, tis_lst_sz, param->type == MLX5E_SQ_ICO ? 0 : 1); MLX5_SET(sqc, sqc, flush_in_error_en, 1); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); @@ -1027,8 +1061,10 @@ static void mlx5e_close_sq(struct mlx5e_sq *sq) netif_tx_disable_queue(sq->txq); /* last doorbell out, godspeed .. */ - if (mlx5e_sq_has_room_for(sq, 1)) + if (mlx5e_sq_has_room_for(sq, 1)) { + sq->db.txq.skb[(sq->pc & sq->wq.sz_m1)] = NULL; mlx5e_send_nop(sq, true); + } } mlx5e_disable_sq(sq); @@ -1505,6 +1541,7 @@ static void mlx5e_build_sq_param(struct mlx5e_priv *priv, param->max_inline = priv->params.tx_max_inline; param->min_inline_mode = priv->params.tx_min_inline_mode; + param->type = MLX5E_SQ_TXQ; } static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv, @@ -1578,7 +1615,7 @@ static void mlx5e_build_icosq_param(struct mlx5e_priv *priv, MLX5_SET(wq, wq, log_wq_sz, log_wq_size); MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(priv->mdev, reg_umr_sq)); - param->icosq = true; + param->type = MLX5E_SQ_ICO; } static void mlx5e_build_channel_param(struct mlx5e_priv *priv, struct mlx5e_channel_param *cparam) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 96f6317a95ea..941e53169838 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -337,8 +337,8 @@ static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) /* fill sq edge with nops to avoid wqe wrap around */ while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) { - sq->ico_wqe_info[pi].opcode = MLX5_OPCODE_NOP; - sq->ico_wqe_info[pi].num_wqebbs = 1; + sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP; + sq->db.ico_wqe[pi].num_wqebbs = 1; mlx5e_send_nop(sq, true); } @@ -348,8 +348,8 @@ static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | MLX5_OPCODE_UMR); - sq->ico_wqe_info[pi].opcode = MLX5_OPCODE_UMR; - sq->ico_wqe_info[pi].num_wqebbs = num_wqebbs; + sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_UMR; + sq->db.ico_wqe[pi].num_wqebbs = num_wqebbs; sq->pc += num_wqebbs; mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index eb0e72537f10..f02f24cfcb7a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -52,7 +52,6 @@ void mlx5e_send_nop(struct mlx5e_sq *sq, bool notify_hw) cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_NOP); cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | 0x01); - sq->skb[pi] = NULL; sq->pc++; sq->stats.nop++; @@ -82,15 +81,17 @@ static inline void mlx5e_dma_push(struct mlx5e_sq *sq, u32 size, enum mlx5e_dma_map_type map_type) { - sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].addr = addr; - sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].size = size; - sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].type = map_type; + u32 i = sq->dma_fifo_pc & sq->dma_fifo_mask; + + sq->db.txq.dma_fifo[i].addr = addr; + sq->db.txq.dma_fifo[i].size = size; + sq->db.txq.dma_fifo[i].type = map_type; sq->dma_fifo_pc++; } static inline struct mlx5e_sq_dma *mlx5e_dma_get(struct mlx5e_sq *sq, u32 i) { - return &sq->dma_fifo[i & sq->dma_fifo_mask]; + return &sq->db.txq.dma_fifo[i & sq->dma_fifo_mask]; } static void mlx5e_dma_unmap_wqe_err(struct mlx5e_sq *sq, u8 num_dma) @@ -221,7 +222,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) u16 pi = sq->pc & wq->sz_m1; struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); - struct mlx5e_tx_wqe_info *wi = &sq->wqe_info[pi]; + struct mlx5e_tx_wqe_info *wi = &sq->db.txq.wqe_info[pi]; struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; struct mlx5_wqe_eth_seg *eseg = &wqe->eth; @@ -341,7 +342,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode); cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); - sq->skb[pi] = skb; + sq->db.txq.skb[pi] = skb; wi->num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); sq->pc += wi->num_wqebbs; @@ -368,8 +369,10 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) } /* fill sq edge with nops to avoid wqe wrap around */ - while ((sq->pc & wq->sz_m1) > sq->edge) + while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) { + sq->db.txq.skb[pi] = NULL; mlx5e_send_nop(sq, false); + } if (bf) sq->bf_budget--; @@ -442,8 +445,8 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) last_wqe = (sqcc == wqe_counter); ci = sqcc & sq->wq.sz_m1; - skb = sq->skb[ci]; - wi = &sq->wqe_info[ci]; + skb = sq->db.txq.skb[ci]; + wi = &sq->db.txq.wqe_info[ci]; if (unlikely(!skb)) { /* nop */ sqcc++; @@ -499,10 +502,13 @@ void mlx5e_free_tx_descs(struct mlx5e_sq *sq) u16 ci; int i; + if (sq->type != MLX5E_SQ_TXQ) + return; + while (sq->cc != sq->pc) { ci = sq->cc & sq->wq.sz_m1; - skb = sq->skb[ci]; - wi = &sq->wqe_info[ci]; + skb = sq->db.txq.skb[ci]; + wi = &sq->db.txq.wqe_info[ci]; if (!skb) { /* nop */ sq->cc++; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 08d8b0c91f07..47cd5619ae02 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -72,7 +72,7 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) do { u16 ci = be16_to_cpu(cqe->wqe_counter) & wq->sz_m1; - struct mlx5e_ico_wqe_info *icowi = &sq->ico_wqe_info[ci]; + struct mlx5e_ico_wqe_info *icowi = &sq->db.ico_wqe[ci]; mlx5_cqwq_pop(&cq->wq); sqcc += icowi->num_wqebbs; From b5503b994ed5ed8dbfe821317e7b5b38acb065c5 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Wed, 21 Sep 2016 12:19:48 +0300 Subject: [PATCH 1359/1715] net/mlx5e: XDP TX forwarding support Adding support for XDP_TX forwarding from xdp program. Using XDP, now user can loop packets out of the same port. We create a dedicated TX SQ for each channel that will serve XDP programs that return XDP_TX action to loop packets back to the wire directly from the channel RQ RX path. For that RX pages will now need to be mapped bi-directionally, and on XDP_TX action we will sync the page back to device then queue it into SQ for transmission. The XDP xmit frame function will report back to the RX path if the page was consumed (transmitted), if so, RX path will forget about that page as if it were released to the stack. Later on, on XDP TX completion, the page will be released back to the page cache. For simplicity this patch will hit a doorbell on every XDP TX packet. Next patch will introduce a xmit more like mechanism that will queue up more than one packet into SQ w/o notifying the hardware, once RX napi loop is done we will hit doorbell once for all XDP TX packets form the previous loop. This should drastically improve XDP TX performance. Signed-off-by: Saeed Mahameed Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 25 +++- .../net/ethernet/mellanox/mlx5/core/en_main.c | 93 +++++++++++++-- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 110 +++++++++++++++--- .../ethernet/mellanox/mlx5/core/en_stats.h | 8 ++ .../net/ethernet/mellanox/mlx5/core/en_tx.c | 39 ++++++- .../net/ethernet/mellanox/mlx5/core/en_txrx.c | 65 ++++++++++- 6 files changed, 304 insertions(+), 36 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 5917f5e609ae..82eededfc92a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -104,6 +104,15 @@ #define MLX5E_ICOSQ_MAX_WQEBBS \ (DIV_ROUND_UP(sizeof(struct mlx5e_umr_wqe), MLX5_SEND_WQE_BB)) +#define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN) +#define MLX5E_XDP_IHS_DS_COUNT \ + DIV_ROUND_UP(MLX5E_XDP_MIN_INLINE - 2, MLX5_SEND_WQE_DS) +#define MLX5E_XDP_TX_DS_COUNT \ + (MLX5E_XDP_IHS_DS_COUNT + \ + (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) + 1 /* SG DS */) +#define MLX5E_XDP_TX_WQEBBS \ + DIV_ROUND_UP(MLX5E_XDP_TX_DS_COUNT, MLX5_SEND_WQEBB_NUM_DS) + #define MLX5E_NUM_MAIN_GROUPS 9 static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size) @@ -319,6 +328,7 @@ struct mlx5e_rq { struct { u8 page_order; u32 wqe_sz; /* wqe data buffer size */ + u8 map_dir; /* dma map direction */ } buff; __be32 mkey_be; @@ -384,14 +394,15 @@ enum { MLX5E_SQ_STATE_BF_ENABLE, }; -struct mlx5e_ico_wqe_info { +struct mlx5e_sq_wqe_info { u8 opcode; u8 num_wqebbs; }; enum mlx5e_sq_type { MLX5E_SQ_TXQ, - MLX5E_SQ_ICO + MLX5E_SQ_ICO, + MLX5E_SQ_XDP }; struct mlx5e_sq { @@ -418,7 +429,11 @@ struct mlx5e_sq { struct mlx5e_sq_dma *dma_fifo; struct mlx5e_tx_wqe_info *wqe_info; } txq; - struct mlx5e_ico_wqe_info *ico_wqe; + struct mlx5e_sq_wqe_info *ico_wqe; + struct { + struct mlx5e_sq_wqe_info *wqe_info; + struct mlx5e_dma_info *di; + } xdp; } db; /* read only */ @@ -458,8 +473,10 @@ enum channel_flags { struct mlx5e_channel { /* data path */ struct mlx5e_rq rq; + struct mlx5e_sq xdp_sq; struct mlx5e_sq sq[MLX5E_MAX_NUM_TC]; struct mlx5e_sq icosq; /* internal control operations */ + bool xdp; struct napi_struct napi; struct device *pdev; struct net_device *netdev; @@ -688,7 +705,7 @@ void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event); int mlx5e_napi_poll(struct napi_struct *napi, int budget); bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget); int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); -void mlx5e_free_tx_descs(struct mlx5e_sq *sq); +void mlx5e_free_sq_descs(struct mlx5e_sq *sq); void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, bool recycle); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 632de09e69cf..a9fc9d4c6af4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -64,6 +64,7 @@ struct mlx5e_cq_param { struct mlx5e_channel_param { struct mlx5e_rq_param rq; struct mlx5e_sq_param sq; + struct mlx5e_sq_param xdp_sq; struct mlx5e_sq_param icosq; struct mlx5e_cq_param rx_cq; struct mlx5e_cq_param tx_cq; @@ -180,6 +181,8 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) s->rx_csum_complete += rq_stats->csum_complete; s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner; s->rx_xdp_drop += rq_stats->xdp_drop; + s->rx_xdp_tx += rq_stats->xdp_tx; + s->rx_xdp_tx_full += rq_stats->xdp_tx_full; s->rx_wqe_err += rq_stats->wqe_err; s->rx_mpwqe_filler += rq_stats->mpwqe_filler; s->rx_buff_alloc_err += rq_stats->buff_alloc_err; @@ -478,6 +481,10 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, rq->priv = c->priv; rq->xdp_prog = priv->xdp_prog; + rq->buff.map_dir = DMA_FROM_DEVICE; + if (rq->xdp_prog) + rq->buff.map_dir = DMA_BIDIRECTIONAL; + switch (priv->params.rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: rq->handle_rx_cqe = mlx5e_handle_rx_cqe_mpwrq; @@ -765,6 +772,28 @@ static void mlx5e_close_rq(struct mlx5e_rq *rq) mlx5e_destroy_rq(rq); } +static void mlx5e_free_sq_xdp_db(struct mlx5e_sq *sq) +{ + kfree(sq->db.xdp.di); + kfree(sq->db.xdp.wqe_info); +} + +static int mlx5e_alloc_sq_xdp_db(struct mlx5e_sq *sq, int numa) +{ + int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); + + sq->db.xdp.di = kzalloc_node(sizeof(*sq->db.xdp.di) * wq_sz, + GFP_KERNEL, numa); + sq->db.xdp.wqe_info = kzalloc_node(sizeof(*sq->db.xdp.wqe_info) * wq_sz, + GFP_KERNEL, numa); + if (!sq->db.xdp.di || !sq->db.xdp.wqe_info) { + mlx5e_free_sq_xdp_db(sq); + return -ENOMEM; + } + + return 0; +} + static void mlx5e_free_sq_ico_db(struct mlx5e_sq *sq) { kfree(sq->db.ico_wqe); @@ -819,6 +848,9 @@ static void mlx5e_free_sq_db(struct mlx5e_sq *sq) case MLX5E_SQ_ICO: mlx5e_free_sq_ico_db(sq); break; + case MLX5E_SQ_XDP: + mlx5e_free_sq_xdp_db(sq); + break; } } @@ -829,11 +861,24 @@ static int mlx5e_alloc_sq_db(struct mlx5e_sq *sq, int numa) return mlx5e_alloc_sq_txq_db(sq, numa); case MLX5E_SQ_ICO: return mlx5e_alloc_sq_ico_db(sq, numa); + case MLX5E_SQ_XDP: + return mlx5e_alloc_sq_xdp_db(sq, numa); } return 0; } +static int mlx5e_sq_get_max_wqebbs(u8 sq_type) +{ + switch (sq_type) { + case MLX5E_SQ_ICO: + return MLX5E_ICOSQ_MAX_WQEBBS; + case MLX5E_SQ_XDP: + return MLX5E_XDP_TX_WQEBBS; + } + return MLX5_SEND_WQE_MAX_WQEBBS; +} + static int mlx5e_create_sq(struct mlx5e_channel *c, int tc, struct mlx5e_sq_param *param, @@ -844,7 +889,6 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, void *sqc = param->sqc; void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq); - u16 sq_max_wqebbs; int err; sq->type = param->type; @@ -882,7 +926,6 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, if (err) goto err_sq_wq_destroy; - sq_max_wqebbs = MLX5_SEND_WQE_MAX_WQEBBS; if (sq->type == MLX5E_SQ_TXQ) { int txq_ix; @@ -891,10 +934,7 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, priv->txq_to_sq_map[txq_ix] = sq; } - if (sq->type == MLX5E_SQ_ICO) - sq_max_wqebbs = MLX5E_ICOSQ_MAX_WQEBBS; - - sq->edge = (sq->wq.sz_m1 + 1) - sq_max_wqebbs; + sq->edge = (sq->wq.sz_m1 + 1) - mlx5e_sq_get_max_wqebbs(sq->type); sq->bf_budget = MLX5E_SQ_BF_BUDGET; return 0; @@ -1068,7 +1108,7 @@ static void mlx5e_close_sq(struct mlx5e_sq *sq) } mlx5e_disable_sq(sq); - mlx5e_free_tx_descs(sq); + mlx5e_free_sq_descs(sq); mlx5e_destroy_sq(sq); } @@ -1429,14 +1469,31 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, } } + if (priv->xdp_prog) { + /* XDP SQ CQ params are same as normal TXQ sq CQ params */ + err = mlx5e_open_cq(c, &cparam->tx_cq, &c->xdp_sq.cq, + priv->params.tx_cq_moderation); + if (err) + goto err_close_sqs; + + err = mlx5e_open_sq(c, 0, &cparam->xdp_sq, &c->xdp_sq); + if (err) { + mlx5e_close_cq(&c->xdp_sq.cq); + goto err_close_sqs; + } + } + + c->xdp = !!priv->xdp_prog; err = mlx5e_open_rq(c, &cparam->rq, &c->rq); if (err) - goto err_close_sqs; + goto err_close_xdp_sq; netif_set_xps_queue(netdev, get_cpu_mask(c->cpu), ix); *cp = c; return 0; +err_close_xdp_sq: + mlx5e_close_sq(&c->xdp_sq); err_close_sqs: mlx5e_close_sqs(c); @@ -1465,9 +1522,13 @@ err_napi_del: static void mlx5e_close_channel(struct mlx5e_channel *c) { mlx5e_close_rq(&c->rq); + if (c->xdp) + mlx5e_close_sq(&c->xdp_sq); mlx5e_close_sqs(c); mlx5e_close_sq(&c->icosq); napi_disable(&c->napi); + if (c->xdp) + mlx5e_close_cq(&c->xdp_sq.cq); mlx5e_close_cq(&c->rq.cq); mlx5e_close_tx_cqs(c); mlx5e_close_cq(&c->icosq.cq); @@ -1618,12 +1679,28 @@ static void mlx5e_build_icosq_param(struct mlx5e_priv *priv, param->type = MLX5E_SQ_ICO; } +static void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv, + struct mlx5e_sq_param *param) +{ + void *sqc = param->sqc; + void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + + mlx5e_build_sq_param_common(priv, param); + MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size); + + param->max_inline = priv->params.tx_max_inline; + /* FOR XDP SQs will support only L2 inline mode */ + param->min_inline_mode = MLX5_INLINE_MODE_NONE; + param->type = MLX5E_SQ_XDP; +} + static void mlx5e_build_channel_param(struct mlx5e_priv *priv, struct mlx5e_channel_param *cparam) { u8 icosq_log_wq_sz = MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; mlx5e_build_rq_param(priv, &cparam->rq); mlx5e_build_sq_param(priv, &cparam->sq); + mlx5e_build_xdpsq_param(priv, &cparam->xdp_sq); mlx5e_build_icosq_param(priv, &cparam->icosq, icosq_log_wq_sz); mlx5e_build_rx_cq_param(priv, &cparam->rx_cq); mlx5e_build_tx_cq_param(priv, &cparam->tx_cq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 941e53169838..57d49513a87a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -236,7 +236,7 @@ static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq, dma_info->page = page; dma_info->addr = dma_map_page(rq->pdev, page, 0, - RQ_PAGE_SIZE(rq), DMA_FROM_DEVICE); + RQ_PAGE_SIZE(rq), rq->buff.map_dir); if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) { put_page(page); return -ENOMEM; @@ -252,7 +252,7 @@ void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, return; dma_unmap_page(rq->pdev, dma_info->addr, RQ_PAGE_SIZE(rq), - DMA_FROM_DEVICE); + rq->buff.map_dir); put_page(dma_info->page); } @@ -632,15 +632,95 @@ static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq, napi_gro_receive(rq->cq.napi, skb); } -static inline enum xdp_action mlx5e_xdp_handle(struct mlx5e_rq *rq, - const struct bpf_prog *prog, - void *data, u32 len) +static inline void mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, + struct mlx5e_dma_info *di, + unsigned int data_offset, + int len) +{ + struct mlx5e_sq *sq = &rq->channel->xdp_sq; + struct mlx5_wq_cyc *wq = &sq->wq; + u16 pi = sq->pc & wq->sz_m1; + struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); + struct mlx5e_sq_wqe_info *wi = &sq->db.xdp.wqe_info[pi]; + + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + struct mlx5_wqe_eth_seg *eseg = &wqe->eth; + struct mlx5_wqe_data_seg *dseg; + + dma_addr_t dma_addr = di->addr + data_offset + MLX5E_XDP_MIN_INLINE; + unsigned int dma_len = len - MLX5E_XDP_MIN_INLINE; + void *data = page_address(di->page) + data_offset; + + if (unlikely(!mlx5e_sq_has_room_for(sq, MLX5E_XDP_TX_WQEBBS))) { + rq->stats.xdp_tx_full++; + mlx5e_page_release(rq, di, true); + return; + } + + dma_sync_single_for_device(sq->pdev, dma_addr, dma_len, + PCI_DMA_TODEVICE); + + memset(wqe, 0, sizeof(*wqe)); + + /* copy the inline part */ + memcpy(eseg->inline_hdr_start, data, MLX5E_XDP_MIN_INLINE); + eseg->inline_hdr_sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE); + + dseg = (struct mlx5_wqe_data_seg *)cseg + (MLX5E_XDP_TX_DS_COUNT - 1); + + /* write the dma part */ + dseg->addr = cpu_to_be64(dma_addr); + dseg->byte_count = cpu_to_be32(dma_len); + dseg->lkey = sq->mkey_be; + + cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND); + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | MLX5E_XDP_TX_DS_COUNT); + + sq->db.xdp.di[pi] = *di; + wi->opcode = MLX5_OPCODE_SEND; + wi->num_wqebbs = MLX5E_XDP_TX_WQEBBS; + sq->pc += MLX5E_XDP_TX_WQEBBS; + + wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; + mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); + + /* fill sq edge with nops to avoid wqe wrap around */ + while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) { + sq->db.xdp.wqe_info[pi].opcode = MLX5_OPCODE_NOP; + mlx5e_send_nop(sq, false); + } + rq->stats.xdp_tx++; +} + +/* returns true if packet was consumed by xdp */ +static inline bool mlx5e_xdp_handle(struct mlx5e_rq *rq, + const struct bpf_prog *prog, + struct mlx5e_dma_info *di, + void *data, u16 len) { struct xdp_buff xdp; + u32 act; + + if (!prog) + return false; xdp.data = data; xdp.data_end = xdp.data + len; - return bpf_prog_run_xdp(prog, &xdp); + act = bpf_prog_run_xdp(prog, &xdp); + switch (act) { + case XDP_PASS: + return false; + case XDP_TX: + mlx5e_xmit_xdp_frame(rq, di, MLX5_RX_HEADROOM, len); + return true; + default: + bpf_warn_invalid_xdp_action(act); + case XDP_ABORTED: + case XDP_DROP: + rq->stats.xdp_drop++; + mlx5e_page_release(rq, di, true); + return true; + } } void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) @@ -651,21 +731,22 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) __be16 wqe_counter_be; struct sk_buff *skb; u16 wqe_counter; + void *va, *data; u32 cqe_bcnt; - void *va; wqe_counter_be = cqe->wqe_counter; wqe_counter = be16_to_cpu(wqe_counter_be); wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter); di = &rq->dma_info[wqe_counter]; va = page_address(di->page); + data = va + MLX5_RX_HEADROOM; dma_sync_single_range_for_cpu(rq->pdev, di->addr, MLX5_RX_HEADROOM, rq->buff.wqe_sz, DMA_FROM_DEVICE); - prefetch(va + MLX5_RX_HEADROOM); + prefetch(data); cqe_bcnt = be32_to_cpu(cqe->byte_cnt); if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { @@ -674,17 +755,8 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) goto wq_ll_pop; } - if (xdp_prog) { - enum xdp_action act = - mlx5e_xdp_handle(rq, xdp_prog, va + MLX5_RX_HEADROOM, - cqe_bcnt); - - if (act != XDP_PASS) { - rq->stats.xdp_drop++; - mlx5e_page_release(rq, di, true); - goto wq_ll_pop; - } - } + if (mlx5e_xdp_handle(rq, xdp_prog, di, data, cqe_bcnt)) + goto wq_ll_pop; /* page/packet was consumed by XDP */ skb = build_skb(va, RQ_PAGE_SIZE(rq)); if (unlikely(!skb)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 084d6c893a6e..57452fdc5154 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -66,6 +66,8 @@ struct mlx5e_sw_stats { u64 rx_csum_complete; u64 rx_csum_unnecessary_inner; u64 rx_xdp_drop; + u64 rx_xdp_tx; + u64 rx_xdp_tx_full; u64 tx_csum_partial; u64 tx_csum_partial_inner; u64 tx_queue_stopped; @@ -102,6 +104,8 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary_inner) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_drop) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_full) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial_inner) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_stopped) }, @@ -281,6 +285,8 @@ struct mlx5e_rq_stats { u64 lro_packets; u64 lro_bytes; u64 xdp_drop; + u64 xdp_tx; + u64 xdp_tx_full; u64 wqe_err; u64 mpwqe_filler; u64 buff_alloc_err; @@ -299,6 +305,8 @@ static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_drop) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_tx) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_tx_full) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_packets) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_bytes) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) }, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index f02f24cfcb7a..70a717382357 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -495,16 +495,13 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) return (i == MLX5E_TX_CQ_POLL_BUDGET); } -void mlx5e_free_tx_descs(struct mlx5e_sq *sq) +static void mlx5e_free_txq_sq_descs(struct mlx5e_sq *sq) { struct mlx5e_tx_wqe_info *wi; struct sk_buff *skb; u16 ci; int i; - if (sq->type != MLX5E_SQ_TXQ) - return; - while (sq->cc != sq->pc) { ci = sq->cc & sq->wq.sz_m1; skb = sq->db.txq.skb[ci]; @@ -526,3 +523,37 @@ void mlx5e_free_tx_descs(struct mlx5e_sq *sq) sq->cc += wi->num_wqebbs; } } + +static void mlx5e_free_xdp_sq_descs(struct mlx5e_sq *sq) +{ + struct mlx5e_sq_wqe_info *wi; + struct mlx5e_dma_info *di; + u16 ci; + + while (sq->cc != sq->pc) { + ci = sq->cc & sq->wq.sz_m1; + di = &sq->db.xdp.di[ci]; + wi = &sq->db.xdp.wqe_info[ci]; + + if (wi->opcode == MLX5_OPCODE_NOP) { + sq->cc++; + continue; + } + + sq->cc += wi->num_wqebbs; + + mlx5e_page_release(&sq->channel->rq, di, false); + } +} + +void mlx5e_free_sq_descs(struct mlx5e_sq *sq) +{ + switch (sq->type) { + case MLX5E_SQ_TXQ: + mlx5e_free_txq_sq_descs(sq); + break; + case MLX5E_SQ_XDP: + mlx5e_free_xdp_sq_descs(sq); + break; + } +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 47cd5619ae02..5703f19a6a24 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -72,7 +72,7 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) do { u16 ci = be16_to_cpu(cqe->wqe_counter) & wq->sz_m1; - struct mlx5e_ico_wqe_info *icowi = &sq->db.ico_wqe[ci]; + struct mlx5e_sq_wqe_info *icowi = &sq->db.ico_wqe[ci]; mlx5_cqwq_pop(&cq->wq); sqcc += icowi->num_wqebbs; @@ -105,6 +105,66 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) sq->cc = sqcc; } +static inline bool mlx5e_poll_xdp_tx_cq(struct mlx5e_cq *cq) +{ + struct mlx5e_sq *sq; + u16 sqcc; + int i; + + sq = container_of(cq, struct mlx5e_sq, cq); + + if (unlikely(test_bit(MLX5E_SQ_STATE_FLUSH, &sq->state))) + return false; + + /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), + * otherwise a cq overrun may occur + */ + sqcc = sq->cc; + + for (i = 0; i < MLX5E_TX_CQ_POLL_BUDGET; i++) { + struct mlx5_cqe64 *cqe; + u16 wqe_counter; + bool last_wqe; + + cqe = mlx5e_get_cqe(cq); + if (!cqe) + break; + + mlx5_cqwq_pop(&cq->wq); + + wqe_counter = be16_to_cpu(cqe->wqe_counter); + + do { + struct mlx5e_sq_wqe_info *wi; + struct mlx5e_dma_info *di; + u16 ci; + + last_wqe = (sqcc == wqe_counter); + + ci = sqcc & sq->wq.sz_m1; + di = &sq->db.xdp.di[ci]; + wi = &sq->db.xdp.wqe_info[ci]; + + if (unlikely(wi->opcode == MLX5_OPCODE_NOP)) { + sqcc++; + continue; + } + + sqcc += wi->num_wqebbs; + /* Recycle RX page */ + mlx5e_page_release(&sq->channel->rq, di, true); + } while (!last_wqe); + } + + mlx5_cqwq_update_db_record(&cq->wq); + + /* ensure cq space is freed before enabling more cqes */ + wmb(); + + sq->cc = sqcc; + return (i == MLX5E_TX_CQ_POLL_BUDGET); +} + int mlx5e_napi_poll(struct napi_struct *napi, int budget) { struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel, @@ -121,6 +181,9 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) work_done = mlx5e_poll_rx_cq(&c->rq.cq, budget); busy |= work_done == budget; + if (c->xdp) + busy |= mlx5e_poll_xdp_tx_cq(&c->xdp_sq.cq); + mlx5e_poll_ico_cq(&c->icosq.cq); busy |= mlx5e_post_rx_wqes(&c->rq); From 35b510e257f7516546a0a3f725f71dfbccc3f733 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Wed, 21 Sep 2016 12:19:49 +0300 Subject: [PATCH 1360/1715] net/mlx5e: XDP TX xmit more Previously we rang XDP SQ doorbell on every forwarded XDP packet. Here we introduce a xmit more like mechanism that will queue up more than one packet into SQ (up to RX napi budget) w/o notifying the hardware. Once RX napi budget is consumed and we exit napi RX loop, we will flush (doorbell) all XDP looped packets in case there are such. XDP forward packet rate: Comparing XDP with and w/o xmit more (bulk transmit): RX Cores XDP TX XDP TX (xmit more) --------------------------------------------------- 1 6.5Mpps 12.4Mpps 2 13.2Mpps 24.2Mpps 4 25.2Mpps 36.3Mpps* 8 36.3Mpps* 36.3Mpps* *My xmitter was limited to 36.3Mpps, so it is the bottleneck. It seems that receive side can handle more. Signed-off-by: Saeed Mahameed Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + .../net/ethernet/mellanox/mlx5/core/en_rx.c | 32 ++++++++++++++----- 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 82eededfc92a..346015407b70 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -433,6 +433,7 @@ struct mlx5e_sq { struct { struct mlx5e_sq_wqe_info *wqe_info; struct mlx5e_dma_info *di; + bool doorbell; } xdp; } db; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 57d49513a87a..0a81bd34abbe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -632,6 +632,18 @@ static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq, napi_gro_receive(rq->cq.napi, skb); } +static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_sq *sq) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + struct mlx5e_tx_wqe *wqe; + u16 pi = (sq->pc - MLX5E_XDP_TX_WQEBBS) & wq->sz_m1; /* last pi */ + + wqe = mlx5_wq_cyc_get_wqe(wq, pi); + + wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; + mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); +} + static inline void mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, unsigned int data_offset, @@ -652,6 +664,11 @@ static inline void mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, void *data = page_address(di->page) + data_offset; if (unlikely(!mlx5e_sq_has_room_for(sq, MLX5E_XDP_TX_WQEBBS))) { + if (sq->db.xdp.doorbell) { + /* SQ is full, ring doorbell */ + mlx5e_xmit_xdp_doorbell(sq); + sq->db.xdp.doorbell = false; + } rq->stats.xdp_tx_full++; mlx5e_page_release(rq, di, true); return; @@ -681,14 +698,7 @@ static inline void mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, wi->num_wqebbs = MLX5E_XDP_TX_WQEBBS; sq->pc += MLX5E_XDP_TX_WQEBBS; - wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; - mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); - - /* fill sq edge with nops to avoid wqe wrap around */ - while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) { - sq->db.xdp.wqe_info[pi].opcode = MLX5_OPCODE_NOP; - mlx5e_send_nop(sq, false); - } + sq->db.xdp.doorbell = true; rq->stats.xdp_tx++; } @@ -863,6 +873,7 @@ mpwrq_cqe_out: int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) { struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq); + struct mlx5e_sq *xdp_sq = &rq->channel->xdp_sq; int work_done = 0; if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH, &rq->state))) @@ -889,6 +900,11 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) rq->handle_rx_cqe(rq, cqe); } + if (xdp_sq->db.xdp.doorbell) { + mlx5e_xmit_xdp_doorbell(xdp_sq); + xdp_sq->db.xdp.doorbell = false; + } + mlx5_cqwq_update_db_record(&cq->wq); /* ensure cq space is freed before enabling more cqes */ From e2f036a97271cf5811ee754bf321a29a814577f9 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Wed, 21 Sep 2016 08:45:55 -0300 Subject: [PATCH 1361/1715] sctp: rename WORD_TRUNC/ROUND macros To something more meaningful these days, specially because this is working on packet headers or lengths and which are not tied to any CPU arch but to the protocol itself. So, WORD_TRUNC becomes SCTP_TRUNC4 and WORD_ROUND becomes SCTP_PAD4. Reported-by: David Laight Reported-by: David Miller Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 10 +++++----- net/netfilter/xt_sctp.c | 2 +- net/sctp/associola.c | 2 +- net/sctp/chunk.c | 6 +++--- net/sctp/input.c | 8 ++++---- net/sctp/inqueue.c | 2 +- net/sctp/output.c | 12 ++++++------ net/sctp/sm_make_chunk.c | 28 ++++++++++++++-------------- net/sctp/sm_statefuns.c | 6 +++--- net/sctp/transport.c | 4 ++-- net/sctp/ulpevent.c | 4 ++-- 11 files changed, 42 insertions(+), 42 deletions(-) diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 632e205ca54b..87a7f42e7639 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -83,9 +83,9 @@ #endif /* Round an int up to the next multiple of 4. */ -#define WORD_ROUND(s) (((s)+3)&~3) +#define SCTP_PAD4(s) (((s)+3)&~3) /* Truncate to the previous multiple of 4. */ -#define WORD_TRUNC(s) ((s)&~3) +#define SCTP_TRUNC4(s) ((s)&~3) /* * Function declarations. @@ -433,7 +433,7 @@ static inline int sctp_frag_point(const struct sctp_association *asoc, int pmtu) if (asoc->user_frag) frag = min_t(int, frag, asoc->user_frag); - frag = WORD_TRUNC(min_t(int, frag, SCTP_MAX_CHUNK_LEN)); + frag = SCTP_TRUNC4(min_t(int, frag, SCTP_MAX_CHUNK_LEN)); return frag; } @@ -462,7 +462,7 @@ _sctp_walk_params((pos), (chunk), ntohs((chunk)->chunk_hdr.length), member) for (pos.v = chunk->member;\ pos.v <= (void *)chunk + end - ntohs(pos.p->length) &&\ ntohs(pos.p->length) >= sizeof(sctp_paramhdr_t);\ - pos.v += WORD_ROUND(ntohs(pos.p->length))) + pos.v += SCTP_PAD4(ntohs(pos.p->length))) #define sctp_walk_errors(err, chunk_hdr)\ _sctp_walk_errors((err), (chunk_hdr), ntohs((chunk_hdr)->length)) @@ -472,7 +472,7 @@ for (err = (sctp_errhdr_t *)((void *)chunk_hdr + \ sizeof(sctp_chunkhdr_t));\ (void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\ ntohs(err->length) >= sizeof(sctp_errhdr_t); \ - err = (sctp_errhdr_t *)((void *)err + WORD_ROUND(ntohs(err->length)))) + err = (sctp_errhdr_t *)((void *)err + SCTP_PAD4(ntohs(err->length)))) #define sctp_walk_fwdtsn(pos, chunk)\ _sctp_walk_fwdtsn((pos), (chunk), ntohs((chunk)->chunk_hdr->length) - sizeof(struct sctp_fwdtsn_chunk)) diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c index ef36a56a02c6..4dedb96d1a06 100644 --- a/net/netfilter/xt_sctp.c +++ b/net/netfilter/xt_sctp.c @@ -68,7 +68,7 @@ match_packet(const struct sk_buff *skb, ++i, offset, sch->type, htons(sch->length), sch->flags); #endif - offset += WORD_ROUND(ntohs(sch->length)); + offset += SCTP_PAD4(ntohs(sch->length)); pr_debug("skb->len: %d\toffset: %d\n", skb->len, offset); diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 1c23060c41a6..f10d3397f917 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1408,7 +1408,7 @@ void sctp_assoc_sync_pmtu(struct sock *sk, struct sctp_association *asoc) transports) { if (t->pmtu_pending && t->dst) { sctp_transport_update_pmtu(sk, t, - WORD_TRUNC(dst_mtu(t->dst))); + SCTP_TRUNC4(dst_mtu(t->dst))); t->pmtu_pending = 0; } if (!pmtu || (t->pathmtu < pmtu)) diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index af9cc8055465..76eae828ec89 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -208,8 +208,8 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, struct sctp_hmac *hmac_desc = sctp_auth_asoc_get_hmac(asoc); if (hmac_desc) - max_data -= WORD_ROUND(sizeof(sctp_auth_chunk_t) + - hmac_desc->hmac_len); + max_data -= SCTP_PAD4(sizeof(sctp_auth_chunk_t) + + hmac_desc->hmac_len); } /* Now, check if we need to reduce our max */ @@ -229,7 +229,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, asoc->outqueue.out_qlen == 0 && list_empty(&asoc->outqueue.retransmit) && msg_len > max) - max_data -= WORD_ROUND(sizeof(sctp_sack_chunk_t)); + max_data -= SCTP_PAD4(sizeof(sctp_sack_chunk_t)); /* Encourage Cookie-ECHO bundling. */ if (asoc->state < SCTP_STATE_COOKIE_ECHOED) diff --git a/net/sctp/input.c b/net/sctp/input.c index 69444d32ecda..a1d85065bfc0 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -605,7 +605,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info) /* PMTU discovery (RFC1191) */ if (ICMP_FRAG_NEEDED == code) { sctp_icmp_frag_needed(sk, asoc, transport, - WORD_TRUNC(info)); + SCTP_TRUNC4(info)); goto out_unlock; } else { if (ICMP_PROT_UNREACH == code) { @@ -673,7 +673,7 @@ static int sctp_rcv_ootb(struct sk_buff *skb) if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t)) break; - ch_end = offset + WORD_ROUND(ntohs(ch->length)); + ch_end = offset + SCTP_PAD4(ntohs(ch->length)); if (ch_end > skb->len) break; @@ -1121,7 +1121,7 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net, if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t)) break; - ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length)); + ch_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length)); if (ch_end > skb_tail_pointer(skb)) break; @@ -1190,7 +1190,7 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net, * that the chunk length doesn't cause overflow. Otherwise, we'll * walk off the end. */ - if (WORD_ROUND(ntohs(ch->length)) > skb->len) + if (SCTP_PAD4(ntohs(ch->length)) > skb->len) return NULL; /* If this is INIT/INIT-ACK look inside the chunk too. */ diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index 6437aa97cfd7..f731de3e8428 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -213,7 +213,7 @@ new_skb: } chunk->chunk_hdr = ch; - chunk->chunk_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length)); + chunk->chunk_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length)); skb_pull(chunk->skb, sizeof(sctp_chunkhdr_t)); chunk->subh.v = NULL; /* Subheader is no longer valid. */ diff --git a/net/sctp/output.c b/net/sctp/output.c index 0c605ec74dc4..2a5c1896d18f 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -297,7 +297,7 @@ static sctp_xmit_t __sctp_packet_append_chunk(struct sctp_packet *packet, struct sctp_chunk *chunk) { sctp_xmit_t retval = SCTP_XMIT_OK; - __u16 chunk_len = WORD_ROUND(ntohs(chunk->chunk_hdr->length)); + __u16 chunk_len = SCTP_PAD4(ntohs(chunk->chunk_hdr->length)); /* Check to see if this chunk will fit into the packet */ retval = sctp_packet_will_fit(packet, chunk, chunk_len); @@ -508,7 +508,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) if (gso) { pkt_size = packet->overhead; list_for_each_entry(chunk, &packet->chunk_list, list) { - int padded = WORD_ROUND(chunk->skb->len); + int padded = SCTP_PAD4(chunk->skb->len); if (pkt_size + padded > tp->pathmtu) break; @@ -538,7 +538,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) * included in the chunk length field. The sender should * never pad with more than 3 bytes. * - * [This whole comment explains WORD_ROUND() below.] + * [This whole comment explains SCTP_PAD4() below.] */ pkt_size -= packet->overhead; @@ -560,7 +560,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) has_data = 1; } - padding = WORD_ROUND(chunk->skb->len) - chunk->skb->len; + padding = SCTP_PAD4(chunk->skb->len) - chunk->skb->len; if (padding) memset(skb_put(chunk->skb, padding), 0, padding); @@ -587,7 +587,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) * acknowledged or have failed. * Re-queue auth chunks if needed. */ - pkt_size -= WORD_ROUND(chunk->skb->len); + pkt_size -= SCTP_PAD4(chunk->skb->len); if (!sctp_chunk_is_data(chunk) && chunk != packet->auth) sctp_chunk_free(chunk); @@ -911,7 +911,7 @@ static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet, */ maxsize = pmtu - packet->overhead; if (packet->auth) - maxsize -= WORD_ROUND(packet->auth->skb->len); + maxsize -= SCTP_PAD4(packet->auth->skb->len); if (chunk_len > maxsize) retval = SCTP_XMIT_PMTU_FULL; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 8c77b87a8565..79dd66079dd7 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -253,7 +253,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, num_types = sp->pf->supported_addrs(sp, types); chunksize = sizeof(init) + addrs_len; - chunksize += WORD_ROUND(SCTP_SAT_LEN(num_types)); + chunksize += SCTP_PAD4(SCTP_SAT_LEN(num_types)); chunksize += sizeof(ecap_param); if (asoc->prsctp_enable) @@ -283,14 +283,14 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, /* Add HMACS parameter length if any were defined */ auth_hmacs = (sctp_paramhdr_t *)asoc->c.auth_hmacs; if (auth_hmacs->length) - chunksize += WORD_ROUND(ntohs(auth_hmacs->length)); + chunksize += SCTP_PAD4(ntohs(auth_hmacs->length)); else auth_hmacs = NULL; /* Add CHUNKS parameter length */ auth_chunks = (sctp_paramhdr_t *)asoc->c.auth_chunks; if (auth_chunks->length) - chunksize += WORD_ROUND(ntohs(auth_chunks->length)); + chunksize += SCTP_PAD4(ntohs(auth_chunks->length)); else auth_chunks = NULL; @@ -300,8 +300,8 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, /* If we have any extensions to report, account for that */ if (num_ext) - chunksize += WORD_ROUND(sizeof(sctp_supported_ext_param_t) + - num_ext); + chunksize += SCTP_PAD4(sizeof(sctp_supported_ext_param_t) + + num_ext); /* RFC 2960 3.3.2 Initiation (INIT) (1) * @@ -443,13 +443,13 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, auth_hmacs = (sctp_paramhdr_t *)asoc->c.auth_hmacs; if (auth_hmacs->length) - chunksize += WORD_ROUND(ntohs(auth_hmacs->length)); + chunksize += SCTP_PAD4(ntohs(auth_hmacs->length)); else auth_hmacs = NULL; auth_chunks = (sctp_paramhdr_t *)asoc->c.auth_chunks; if (auth_chunks->length) - chunksize += WORD_ROUND(ntohs(auth_chunks->length)); + chunksize += SCTP_PAD4(ntohs(auth_chunks->length)); else auth_chunks = NULL; @@ -458,8 +458,8 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, } if (num_ext) - chunksize += WORD_ROUND(sizeof(sctp_supported_ext_param_t) + - num_ext); + chunksize += SCTP_PAD4(sizeof(sctp_supported_ext_param_t) + + num_ext); /* Now allocate and fill out the chunk. */ retval = sctp_make_control(asoc, SCTP_CID_INIT_ACK, 0, chunksize, gfp); @@ -1390,7 +1390,7 @@ static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc, struct sock *sk; /* No need to allocate LL here, as this is only a chunk. */ - skb = alloc_skb(WORD_ROUND(sizeof(sctp_chunkhdr_t) + paylen), gfp); + skb = alloc_skb(SCTP_PAD4(sizeof(sctp_chunkhdr_t) + paylen), gfp); if (!skb) goto nodata; @@ -1482,7 +1482,7 @@ void *sctp_addto_chunk(struct sctp_chunk *chunk, int len, const void *data) void *target; void *padding; int chunklen = ntohs(chunk->chunk_hdr->length); - int padlen = WORD_ROUND(chunklen) - chunklen; + int padlen = SCTP_PAD4(chunklen) - chunklen; padding = skb_put(chunk->skb, padlen); target = skb_put(chunk->skb, len); @@ -1900,7 +1900,7 @@ static int sctp_process_missing_param(const struct sctp_association *asoc, struct __sctp_missing report; __u16 len; - len = WORD_ROUND(sizeof(report)); + len = SCTP_PAD4(sizeof(report)); /* Make an ERROR chunk, preparing enough room for * returning multiple unknown parameters. @@ -2098,9 +2098,9 @@ static sctp_ierror_t sctp_process_unk_param(const struct sctp_association *asoc, if (*errp) { if (!sctp_init_cause_fixed(*errp, SCTP_ERROR_UNKNOWN_PARAM, - WORD_ROUND(ntohs(param.p->length)))) + SCTP_PAD4(ntohs(param.p->length)))) sctp_addto_chunk_fixed(*errp, - WORD_ROUND(ntohs(param.p->length)), + SCTP_PAD4(ntohs(param.p->length)), param.v); } else { /* If there is no memory for generating the ERROR diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index d88bb2b0b699..026e3bca4a94 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -3454,7 +3454,7 @@ sctp_disposition_t sctp_sf_ootb(struct net *net, } /* Report violation if chunk len overflows */ - ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length)); + ch_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length)); if (ch_end > skb_tail_pointer(skb)) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); @@ -4185,7 +4185,7 @@ sctp_disposition_t sctp_sf_unk_chunk(struct net *net, hdr = unk_chunk->chunk_hdr; err_chunk = sctp_make_op_error(asoc, unk_chunk, SCTP_ERROR_UNKNOWN_CHUNK, hdr, - WORD_ROUND(ntohs(hdr->length)), + SCTP_PAD4(ntohs(hdr->length)), 0); if (err_chunk) { sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, @@ -4203,7 +4203,7 @@ sctp_disposition_t sctp_sf_unk_chunk(struct net *net, hdr = unk_chunk->chunk_hdr; err_chunk = sctp_make_op_error(asoc, unk_chunk, SCTP_ERROR_UNKNOWN_CHUNK, hdr, - WORD_ROUND(ntohs(hdr->length)), + SCTP_PAD4(ntohs(hdr->length)), 0); if (err_chunk) { sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 81b86678be4d..ce54dce13ddb 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -233,7 +233,7 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk) } if (transport->dst) { - transport->pathmtu = WORD_TRUNC(dst_mtu(transport->dst)); + transport->pathmtu = SCTP_TRUNC4(dst_mtu(transport->dst)); } else transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; } @@ -287,7 +287,7 @@ void sctp_transport_route(struct sctp_transport *transport, return; } if (transport->dst) { - transport->pathmtu = WORD_TRUNC(dst_mtu(transport->dst)); + transport->pathmtu = SCTP_TRUNC4(dst_mtu(transport->dst)); /* Initialize sk->sk_rcv_saddr, if the transport is the * association's active path for getsockname(). diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index d85b803da11d..bea00058ce35 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -383,7 +383,7 @@ sctp_ulpevent_make_remote_error(const struct sctp_association *asoc, ch = (sctp_errhdr_t *)(chunk->skb->data); cause = ch->cause; - elen = WORD_ROUND(ntohs(ch->length)) - sizeof(sctp_errhdr_t); + elen = SCTP_PAD4(ntohs(ch->length)) - sizeof(sctp_errhdr_t); /* Pull off the ERROR header. */ skb_pull(chunk->skb, sizeof(sctp_errhdr_t)); @@ -688,7 +688,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc, * MUST ignore the padding bytes. */ len = ntohs(chunk->chunk_hdr->length); - padding = WORD_ROUND(len) - len; + padding = SCTP_PAD4(len) - len; /* Fixup cloned skb with just this chunks data. */ skb_trim(skb, chunk->chunk_end - padding - skb->data); From 4a225ce3950879a5426c56f306f5d1c9d6330292 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Wed, 21 Sep 2016 08:45:56 -0300 Subject: [PATCH 1362/1715] sctp: make use of SCTP_TRUNC4 macro And avoid the usage of '&~3'. This is the last place still not using the macro. Also break the line to make it easier to read. Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/chunk.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 76eae828ec89..8afe2e90d003 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -195,9 +195,10 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, /* This is the biggest possible DATA chunk that can fit into * the packet */ - max_data = (asoc->pathmtu - - sctp_sk(asoc->base.sk)->pf->af->net_header_len - - sizeof(struct sctphdr) - sizeof(struct sctp_data_chunk)) & ~3; + max_data = asoc->pathmtu - + sctp_sk(asoc->base.sk)->pf->af->net_header_len - + sizeof(struct sctphdr) - sizeof(struct sctp_data_chunk); + max_data = SCTP_TRUNC4(max_data); max = asoc->frag_point; /* If the the peer requested that we authenticate DATA chunks From fcfbfd68b36b2bf4e0133dffa316831a5e180199 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 21 Sep 2016 15:04:43 +0000 Subject: [PATCH 1363/1715] net: dsa: qca8k: fix non static symbol warning Fixes the following sparse warning: drivers/net/dsa/qca8k.c:259:22: warning: symbol 'qca8k_regmap_config' was not declared. Should it be static? Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/dsa/qca8k.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index 4788a89520dd..6fc379cf12e9 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -256,7 +256,7 @@ static struct regmap_access_table qca8k_readable_table = { .n_yes_ranges = ARRAY_SIZE(qca8k_readable_ranges), }; -struct regmap_config qca8k_regmap_config = { +static struct regmap_config qca8k_regmap_config = { .reg_bits = 16, .val_bits = 32, .reg_stride = 4, From a084ab33543cfa07033f91161978b626a9d9bd57 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 21 Sep 2016 15:05:05 +0000 Subject: [PATCH 1364/1715] net: dsa: qca8k: use mdio_module_driver to simplify the code mdio_module_driver() makes the code simpler by eliminating boilerplate code. Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/dsa/qca8k.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index 6fc379cf12e9..b3df70d07ff6 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -1032,19 +1032,7 @@ static struct mdio_driver qca8kmdio_driver = { }, }; -static int __init -qca8kmdio_driver_register(void) -{ - return mdio_driver_register(&qca8kmdio_driver); -} -module_init(qca8kmdio_driver_register); - -static void __exit -qca8kmdio_driver_unregister(void) -{ - mdio_driver_unregister(&qca8kmdio_driver); -} -module_exit(qca8kmdio_driver_unregister); +mdio_module_driver(qca8kmdio_driver); MODULE_AUTHOR("Mathieu Olivari, John Crispin "); MODULE_DESCRIPTION("Driver for QCA8K ethernet switch family"); From 524605e5ba5a897fb0a8c29398ed049100fe80aa Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 21 Sep 2016 15:09:16 +0000 Subject: [PATCH 1365/1715] cxgb4: Convert to use simple_open() Remove an open coded simple_open() function and replace file operations references to the function with simple_open() instead. Generated by: scripts/coccinelle/api/simple_open.cocci Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c index 52be9a4ef97a..20455d082cb8 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c @@ -2748,12 +2748,6 @@ static void add_debugfs_mem(struct adapter *adap, const char *name, size_mb << 20); } -static int blocked_fl_open(struct inode *inode, struct file *file) -{ - file->private_data = inode->i_private; - return 0; -} - static ssize_t blocked_fl_read(struct file *filp, char __user *ubuf, size_t count, loff_t *ppos) { @@ -2797,7 +2791,7 @@ static ssize_t blocked_fl_write(struct file *filp, const char __user *ubuf, static const struct file_operations blocked_fl_fops = { .owner = THIS_MODULE, - .open = blocked_fl_open, + .open = simple_open, .read = blocked_fl_read, .write = blocked_fl_write, .llseek = generic_file_llseek, From 8e83134db4ecb77a1dc3390b60ddeea840a5afbc Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Sep 2016 00:29:31 +0100 Subject: [PATCH 1366/1715] rxrpc: Send pings to get RTT data Send a PING ACK packet to the peer when we get a new incoming call from a peer we don't have a record for. The PING RESPONSE ACK packet will tell us the following about the peer: (1) its receive window size (2) its MTU sizes (3) its support for jumbo DATA packets (4) if it supports slow start (similar to RFC 5681) (5) an estimate of the RTT This is necessary because the peer won't normally send us an ACK until it gets to the Rx phase and we send it a packet, but we would like to know some of this information before we start sending packets. A pair of tracepoints are added so that RTT determination can be observed. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 7 ++++-- net/rxrpc/input.c | 48 ++++++++++++++++++++++++++++++++++++++++- net/rxrpc/misc.c | 11 +++++----- net/rxrpc/output.c | 22 +++++++++++++++++++ 4 files changed, 80 insertions(+), 8 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 79c671e552c3..8b47f468eb9d 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -403,6 +403,7 @@ enum rxrpc_call_flag { RXRPC_CALL_EXPOSED, /* The call was exposed to the world */ RXRPC_CALL_RX_LAST, /* Received the last packet (at rxtx_top) */ RXRPC_CALL_TX_LAST, /* Last packet in Tx buffer (at rxtx_top) */ + RXRPC_CALL_PINGING, /* Ping in process */ }; /* @@ -487,6 +488,8 @@ struct rxrpc_call { u32 call_id; /* call ID on connection */ u32 cid; /* connection ID plus channel index */ int debug_id; /* debug ID for printks */ + unsigned short rx_pkt_offset; /* Current recvmsg packet offset */ + unsigned short rx_pkt_len; /* Current recvmsg packet len */ /* Rx/Tx circular buffer, depending on phase. * @@ -530,8 +533,8 @@ struct rxrpc_call { u16 ackr_skew; /* skew on packet being ACK'd */ rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */ rxrpc_seq_t ackr_prev_seq; /* previous sequence number received */ - unsigned short rx_pkt_offset; /* Current recvmsg packet offset */ - unsigned short rx_pkt_len; /* Current recvmsg packet len */ + rxrpc_serial_t ackr_ping; /* Last ping sent */ + ktime_t ackr_ping_time; /* Time last ping sent */ /* transmission-phase ACK management */ rxrpc_serial_t acks_latest; /* serial number of latest ACK received */ diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index aa261df9fc9e..a0a5bd108c9e 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -36,6 +36,19 @@ static void rxrpc_proto_abort(const char *why, } } +/* + * Ping the other end to fill our RTT cache and to retrieve the rwind + * and MTU parameters. + */ +static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb, + int skew) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + + rxrpc_propose_ACK(call, RXRPC_ACK_PING, skew, sp->hdr.serial, + true, true); +} + /* * Apply a hard ACK by advancing the Tx window. */ @@ -342,6 +355,32 @@ ack: _leave(" [queued]"); } +/* + * Process a ping response. + */ +static void rxrpc_input_ping_response(struct rxrpc_call *call, + ktime_t resp_time, + rxrpc_serial_t orig_serial, + rxrpc_serial_t ack_serial) +{ + rxrpc_serial_t ping_serial; + ktime_t ping_time; + + ping_time = call->ackr_ping_time; + smp_rmb(); + ping_serial = call->ackr_ping; + + if (!test_bit(RXRPC_CALL_PINGING, &call->flags) || + before(orig_serial, ping_serial)) + return; + clear_bit(RXRPC_CALL_PINGING, &call->flags); + if (after(orig_serial, ping_serial)) + return; + + rxrpc_peer_add_rtt(call, rxrpc_rtt_rx_ping_response, + orig_serial, ack_serial, ping_time, resp_time); +} + /* * Process the extra information that may be appended to an ACK packet */ @@ -438,6 +477,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, struct rxrpc_ackinfo info; u8 acks[RXRPC_MAXACKS]; } buf; + rxrpc_serial_t acked_serial; rxrpc_seq_t first_soft_ack, hard_ack; int nr_acks, offset; @@ -449,6 +489,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, } sp->offset += sizeof(buf.ack); + acked_serial = ntohl(buf.ack.serial); first_soft_ack = ntohl(buf.ack.firstPacket); hard_ack = first_soft_ack - 1; nr_acks = buf.ack.nAcks; @@ -460,10 +501,14 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, ntohs(buf.ack.maxSkew), first_soft_ack, ntohl(buf.ack.previousPacket), - ntohl(buf.ack.serial), + acked_serial, rxrpc_acks(buf.ack.reason), buf.ack.nAcks); + if (buf.ack.reason == RXRPC_ACK_PING_RESPONSE) + rxrpc_input_ping_response(call, skb->tstamp, acked_serial, + sp->hdr.serial); + if (buf.ack.reason == RXRPC_ACK_PING) { _proto("Rx ACK %%%u PING Request", sp->hdr.serial); rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE, @@ -830,6 +875,7 @@ void rxrpc_data_ready(struct sock *udp_sk) rcu_read_unlock(); goto reject_packet; } + rxrpc_send_ping(call, skb, skew); } rxrpc_input_call_packet(call, skb, skew); diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 6321c23f9a6e..56e668352fc7 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -83,11 +83,12 @@ const s8 rxrpc_ack_priority[] = { [RXRPC_ACK_DELAY] = 1, [RXRPC_ACK_REQUESTED] = 2, [RXRPC_ACK_IDLE] = 3, - [RXRPC_ACK_PING_RESPONSE] = 4, - [RXRPC_ACK_DUPLICATE] = 5, - [RXRPC_ACK_OUT_OF_SEQUENCE] = 6, - [RXRPC_ACK_EXCEEDS_WINDOW] = 7, - [RXRPC_ACK_NOSPACE] = 8, + [RXRPC_ACK_DUPLICATE] = 4, + [RXRPC_ACK_OUT_OF_SEQUENCE] = 5, + [RXRPC_ACK_EXCEEDS_WINDOW] = 6, + [RXRPC_ACK_NOSPACE] = 7, + [RXRPC_ACK_PING_RESPONSE] = 8, + [RXRPC_ACK_PING] = 9, }; const char *rxrpc_acks(u8 reason) diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 817fb0e82d6a..0d89cd3f2c01 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -57,6 +57,9 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, pkt->ack.reason = call->ackr_reason; pkt->ack.nAcks = top - hard_ack; + if (pkt->ack.reason == RXRPC_ACK_PING) + pkt->whdr.flags |= RXRPC_REQUEST_ACK; + if (after(top, hard_ack)) { seq = hard_ack + 1; do { @@ -97,6 +100,7 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) struct kvec iov[2]; rxrpc_serial_t serial; size_t len, n; + bool ping = false; int ioc, ret; u32 abort_code; @@ -147,6 +151,7 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) ret = 0; goto out; } + ping = (call->ackr_reason == RXRPC_ACK_PING); n = rxrpc_fill_out_ack(call, pkt); call->ackr_reason = 0; @@ -183,12 +188,29 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) goto out; } + if (ping) { + call->ackr_ping = serial; + smp_wmb(); + /* We need to stick a time in before we send the packet in case + * the reply gets back before kernel_sendmsg() completes - but + * asking UDP to send the packet can take a relatively long + * time, so we update the time after, on the assumption that + * the packet transmission is more likely to happen towards the + * end of the kernel_sendmsg() call. + */ + call->ackr_ping_time = ktime_get_real(); + set_bit(RXRPC_CALL_PINGING, &call->flags); + trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_ping, serial); + } ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, ioc, len); + if (ping) + call->ackr_ping_time = ktime_get_real(); if (ret < 0 && call->state < RXRPC_CALL_COMPLETE) { switch (type) { case RXRPC_PACKET_TYPE_ACK: + clear_bit(RXRPC_CALL_PINGING, &call->flags); rxrpc_propose_ACK(call, pkt->ack.reason, ntohs(pkt->ack.maxSkew), ntohl(pkt->ack.serial), From 7aa51da7c88d42cc0bb85ab7d01429fbd4e51282 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Sep 2016 00:29:31 +0100 Subject: [PATCH 1367/1715] rxrpc: Expedite ping response transmission Expedite the transmission of a response to a PING ACK by sending it from sendmsg if one is pending. We're most likely to see a PING ACK during the client call Tx phase as the other side may use it to determine a number of parameters, such as the client's receive window size, the RTT and whether the client is doing slow start (similar to RFC5681). If we don't expedite it, it's left to the background processing thread to transmit. Signed-off-by: David Howells --- net/rxrpc/sendmsg.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 814b17f23971..3c969de3ef05 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -180,6 +180,10 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, copied = 0; do { + /* Check to see if there's a ping ACK to reply to. */ + if (call->ackr_reason == RXRPC_ACK_PING_RESPONSE) + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); + if (!skb) { size_t size, chunk, max, space; From 77f2efcbdd7133466060198e02c6e8a170c3cd14 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Sep 2016 00:29:01 +0100 Subject: [PATCH 1368/1715] rxrpc: Add ktime_sub_ms() Add a ktime_sub_ms() to go with ktime_add_ms() and co. for use in AF_RXRPC RTT determination. Signed-off-by: David Howells --- include/linux/ktime.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/linux/ktime.h b/include/linux/ktime.h index 2b6a204bd8d4..aa118bad1407 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -231,6 +231,11 @@ static inline ktime_t ktime_sub_us(const ktime_t kt, const u64 usec) return ktime_sub_ns(kt, usec * NSEC_PER_USEC); } +static inline ktime_t ktime_sub_ms(const ktime_t kt, const u64 msec) +{ + return ktime_sub_ns(kt, msec * NSEC_PER_MSEC); +} + extern ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs); /** From 50235c4b5a2fb9a9690f02cd1dea6ca047d7f79e Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Sep 2016 00:29:31 +0100 Subject: [PATCH 1369/1715] rxrpc: Obtain RTT data by requesting ACKs on DATA packets In addition to sending a PING ACK to gain RTT data, we can set the RXRPC_REQUEST_ACK flag on a DATA packet and get a REQUESTED-ACK ACK. The ACK packet contains the serial number of the packet it is in response to, so we can look through the Tx buffer for a matching DATA packet. This requires that the data packets be stamped with the time of transmission as a ktime rather than having the resend_at time in jiffies. This further requires the resend code to do the resend determination in ktimes and convert to jiffies to set the timer. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 7 +++---- net/rxrpc/call_event.c | 19 +++++++++---------- net/rxrpc/input.c | 35 +++++++++++++++++++++++++++++++++++ net/rxrpc/misc.c | 6 ++++-- net/rxrpc/output.c | 7 +++++-- net/rxrpc/sendmsg.c | 1 - net/rxrpc/sysctl.c | 2 +- 7 files changed, 57 insertions(+), 20 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 8b47f468eb9d..1c4597b2c6cd 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -142,10 +142,7 @@ struct rxrpc_host_header { */ struct rxrpc_skb_priv { union { - unsigned long resend_at; /* time in jiffies at which to resend */ - struct { - u8 nr_jumbo; /* Number of jumbo subpackets */ - }; + u8 nr_jumbo; /* Number of jumbo subpackets */ }; union { unsigned int offset; /* offset into buffer of next read */ @@ -663,6 +660,7 @@ extern const char rxrpc_recvmsg_traces[rxrpc_recvmsg__nr_trace][5]; enum rxrpc_rtt_tx_trace { rxrpc_rtt_tx_ping, + rxrpc_rtt_tx_data, rxrpc_rtt_tx__nr_trace }; @@ -670,6 +668,7 @@ extern const char rxrpc_rtt_tx_traces[rxrpc_rtt_tx__nr_trace][5]; enum rxrpc_rtt_rx_trace { rxrpc_rtt_rx_ping_response, + rxrpc_rtt_rx_requested_ack, rxrpc_rtt_rx__nr_trace }; diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 34ad967f2d81..adb2ec61e21f 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -142,12 +142,14 @@ static void rxrpc_resend(struct rxrpc_call *call) struct rxrpc_skb_priv *sp; struct sk_buff *skb; rxrpc_seq_t cursor, seq, top; - unsigned long resend_at, now; + ktime_t now = ktime_get_real(), max_age, oldest, resend_at; int ix; u8 annotation, anno_type; _enter("{%d,%d}", call->tx_hard_ack, call->tx_top); + max_age = ktime_sub_ms(now, rxrpc_resend_timeout); + spin_lock_bh(&call->lock); cursor = call->tx_hard_ack; @@ -160,8 +162,7 @@ static void rxrpc_resend(struct rxrpc_call *call) * the packets in the Tx buffer we're going to resend and what the new * resend timeout will be. */ - now = jiffies; - resend_at = now + rxrpc_resend_timeout; + oldest = now; for (seq = cursor + 1; before_eq(seq, top); seq++) { ix = seq & RXRPC_RXTX_BUFF_MASK; annotation = call->rxtx_annotations[ix]; @@ -175,9 +176,9 @@ static void rxrpc_resend(struct rxrpc_call *call) sp = rxrpc_skb(skb); if (anno_type == RXRPC_TX_ANNO_UNACK) { - if (time_after(sp->resend_at, now)) { - if (time_before(sp->resend_at, resend_at)) - resend_at = sp->resend_at; + if (ktime_after(skb->tstamp, max_age)) { + if (ktime_before(skb->tstamp, oldest)) + oldest = skb->tstamp; continue; } } @@ -186,7 +187,8 @@ static void rxrpc_resend(struct rxrpc_call *call) call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS | annotation; } - call->resend_at = resend_at; + resend_at = ktime_sub(ktime_add_ns(oldest, rxrpc_resend_timeout), now); + call->resend_at = jiffies + nsecs_to_jiffies(ktime_to_ns(resend_at)); /* Now go through the Tx window and perform the retransmissions. We * have to drop the lock for each send. If an ACK comes in whilst the @@ -205,15 +207,12 @@ static void rxrpc_resend(struct rxrpc_call *call) spin_unlock_bh(&call->lock); if (rxrpc_send_data_packet(call, skb) < 0) { - call->resend_at = now + 2; rxrpc_free_skb(skb, rxrpc_skb_tx_freed); return; } if (rxrpc_is_client_call(call)) rxrpc_expose_client_call(call); - sp = rxrpc_skb(skb); - sp->resend_at = now + rxrpc_resend_timeout; rxrpc_free_skb(skb, rxrpc_skb_tx_freed); spin_lock_bh(&call->lock); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index a0a5bd108c9e..c121949de3c8 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -355,6 +355,38 @@ ack: _leave(" [queued]"); } +/* + * Process a requested ACK. + */ +static void rxrpc_input_requested_ack(struct rxrpc_call *call, + ktime_t resp_time, + rxrpc_serial_t orig_serial, + rxrpc_serial_t ack_serial) +{ + struct rxrpc_skb_priv *sp; + struct sk_buff *skb; + ktime_t sent_at; + int ix; + + for (ix = 0; ix < RXRPC_RXTX_BUFF_SIZE; ix++) { + skb = call->rxtx_buffer[ix]; + if (!skb) + continue; + + sp = rxrpc_skb(skb); + if (sp->hdr.serial != orig_serial) + continue; + smp_rmb(); + sent_at = skb->tstamp; + goto found; + } + return; + +found: + rxrpc_peer_add_rtt(call, rxrpc_rtt_rx_requested_ack, + orig_serial, ack_serial, sent_at, resp_time); +} + /* * Process a ping response. */ @@ -508,6 +540,9 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, if (buf.ack.reason == RXRPC_ACK_PING_RESPONSE) rxrpc_input_ping_response(call, skb->tstamp, acked_serial, sp->hdr.serial); + if (buf.ack.reason == RXRPC_ACK_REQUESTED) + rxrpc_input_requested_ack(call, skb->tstamp, acked_serial, + sp->hdr.serial); if (buf.ack.reason == RXRPC_ACK_PING) { _proto("Rx ACK %%%u PING Request", sp->hdr.serial); diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 56e668352fc7..0d425e707f22 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -68,9 +68,9 @@ unsigned int rxrpc_rx_mtu = 5692; unsigned int rxrpc_rx_jumbo_max = 4; /* - * Time till packet resend (in jiffies). + * Time till packet resend (in milliseconds). */ -unsigned int rxrpc_resend_timeout = 4 * HZ; +unsigned int rxrpc_resend_timeout = 4 * 1000; const char *const rxrpc_pkts[] = { "?00", @@ -186,8 +186,10 @@ const char rxrpc_recvmsg_traces[rxrpc_recvmsg__nr_trace][5] = { const char rxrpc_rtt_tx_traces[rxrpc_rtt_tx__nr_trace][5] = { [rxrpc_rtt_tx_ping] = "PING", + [rxrpc_rtt_tx_data] = "DATA", }; const char rxrpc_rtt_rx_traces[rxrpc_rtt_rx__nr_trace][5] = { [rxrpc_rtt_rx_ping_response] = "PONG", + [rxrpc_rtt_rx_requested_ack] = "RACK", }; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 0d89cd3f2c01..db01fbb70d23 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -300,9 +300,12 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb) goto send_fragmentable; done: - if (ret == 0) { - sp->resend_at = jiffies + rxrpc_resend_timeout; + if (ret >= 0) { + skb->tstamp = ktime_get_real(); + smp_wmb(); sp->hdr.serial = serial; + if (whdr.flags & RXRPC_REQUEST_ACK) + trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, serial); } _leave(" = %d [%u]", ret, call->peer->maxdata); return ret; diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 3c969de3ef05..607223f4f871 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -137,7 +137,6 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, if (seq == 1 && rxrpc_is_client_call(call)) rxrpc_expose_client_call(call); - sp->resend_at = jiffies + rxrpc_resend_timeout; ret = rxrpc_send_data_packet(call, skb); if (ret < 0) { _debug("need instant resend %d", ret); diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c index a03c61c672f5..13d1df03ebac 100644 --- a/net/rxrpc/sysctl.c +++ b/net/rxrpc/sysctl.c @@ -59,7 +59,7 @@ static struct ctl_table rxrpc_sysctl_table[] = { .data = &rxrpc_resend_timeout, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec_ms_jiffies, + .proc_handler = proc_dointvec, .extra1 = (void *)&one, }, { From ada66b54c44add8702612940a08d077b4d6ecd0e Mon Sep 17 00:00:00 2001 From: Andrew Jeffery Date: Thu, 22 Sep 2016 08:34:58 +0930 Subject: [PATCH 1370/1715] net/faraday: Separate rx page storage from rxdesc The ftgmac100 hardware revision in e.g. the Aspeed AST2500 no longer reserves all bits in RXDES#2 but instead uses the bottom 16 bits to store MAC frame metadata. Avoid corruption by shifting struct page pointers out to their own member in struct ftgmac100. Signed-off-by: Andrew Jeffery Signed-off-by: Joel Stanley Signed-off-by: David S. Miller --- drivers/net/ethernet/faraday/ftgmac100.c | 25 +++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 36361f8bf894..40622567159a 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -60,6 +60,8 @@ struct ftgmac100 { struct ftgmac100_descs *descs; dma_addr_t descs_dma_addr; + struct page *rx_pages[RX_QUEUE_ENTRIES]; + unsigned int rx_pointer; unsigned int tx_clean_pointer; unsigned int tx_pointer; @@ -341,18 +343,27 @@ static bool ftgmac100_rxdes_ipcs_err(struct ftgmac100_rxdes *rxdes) return rxdes->rxdes1 & cpu_to_le32(FTGMAC100_RXDES1_IP_CHKSUM_ERR); } +static inline struct page **ftgmac100_rxdes_page_slot(struct ftgmac100 *priv, + struct ftgmac100_rxdes *rxdes) +{ + return &priv->rx_pages[rxdes - priv->descs->rxdes]; +} + /* * rxdes2 is not used by hardware. We use it to keep track of page. * Since hardware does not touch it, we can skip cpu_to_le32()/le32_to_cpu(). */ -static void ftgmac100_rxdes_set_page(struct ftgmac100_rxdes *rxdes, struct page *page) +static void ftgmac100_rxdes_set_page(struct ftgmac100 *priv, + struct ftgmac100_rxdes *rxdes, + struct page *page) { - rxdes->rxdes2 = (unsigned int)page; + *ftgmac100_rxdes_page_slot(priv, rxdes) = page; } -static struct page *ftgmac100_rxdes_get_page(struct ftgmac100_rxdes *rxdes) +static struct page *ftgmac100_rxdes_get_page(struct ftgmac100 *priv, + struct ftgmac100_rxdes *rxdes) { - return (struct page *)rxdes->rxdes2; + return *ftgmac100_rxdes_page_slot(priv, rxdes); } /****************************************************************************** @@ -501,7 +512,7 @@ static bool ftgmac100_rx_packet(struct ftgmac100 *priv, int *processed) do { dma_addr_t map = ftgmac100_rxdes_get_dma_addr(rxdes); - struct page *page = ftgmac100_rxdes_get_page(rxdes); + struct page *page = ftgmac100_rxdes_get_page(priv, rxdes); unsigned int size; dma_unmap_page(priv->dev, map, RX_BUF_SIZE, DMA_FROM_DEVICE); @@ -779,7 +790,7 @@ static int ftgmac100_alloc_rx_page(struct ftgmac100 *priv, return -ENOMEM; } - ftgmac100_rxdes_set_page(rxdes, page); + ftgmac100_rxdes_set_page(priv, rxdes, page); ftgmac100_rxdes_set_dma_addr(rxdes, map); ftgmac100_rxdes_set_dma_own(rxdes); return 0; @@ -791,7 +802,7 @@ static void ftgmac100_free_buffers(struct ftgmac100 *priv) for (i = 0; i < RX_QUEUE_ENTRIES; i++) { struct ftgmac100_rxdes *rxdes = &priv->descs->rxdes[i]; - struct page *page = ftgmac100_rxdes_get_page(rxdes); + struct page *page = ftgmac100_rxdes_get_page(priv, rxdes); dma_addr_t map = ftgmac100_rxdes_get_dma_addr(rxdes); if (!page) From 7906a4da0ef845d01e76f2187c23cc71ae00fa1d Mon Sep 17 00:00:00 2001 From: Andrew Jeffery Date: Thu, 22 Sep 2016 08:34:59 +0930 Subject: [PATCH 1371/1715] net/faraday: Make EDO{R,T}R bits configurable These bits are #defined at a fixed location. In order to support future hardware that has chosen to move these bits around move the bits into a member of the struct ftgmac100. Signed-off-by: Andrew Jeffery Signed-off-by: Joel Stanley Signed-off-by: David S. Miller --- drivers/net/ethernet/faraday/ftgmac100.c | 40 +++++++++++++++--------- drivers/net/ethernet/faraday/ftgmac100.h | 2 -- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 40622567159a..62a88d1a1f99 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -79,6 +79,9 @@ struct ftgmac100 { int int_mask_all; bool use_ncsi; bool enabled; + + u32 rxdes0_edorr_mask; + u32 txdes0_edotr_mask; }; static int ftgmac100_alloc_rx_page(struct ftgmac100 *priv, @@ -259,10 +262,11 @@ static bool ftgmac100_rxdes_packet_ready(struct ftgmac100_rxdes *rxdes) return rxdes->rxdes0 & cpu_to_le32(FTGMAC100_RXDES0_RXPKT_RDY); } -static void ftgmac100_rxdes_set_dma_own(struct ftgmac100_rxdes *rxdes) +static void ftgmac100_rxdes_set_dma_own(const struct ftgmac100 *priv, + struct ftgmac100_rxdes *rxdes) { /* clear status bits */ - rxdes->rxdes0 &= cpu_to_le32(FTGMAC100_RXDES0_EDORR); + rxdes->rxdes0 &= cpu_to_le32(priv->rxdes0_edorr_mask); } static bool ftgmac100_rxdes_rx_error(struct ftgmac100_rxdes *rxdes) @@ -300,9 +304,10 @@ static bool ftgmac100_rxdes_multicast(struct ftgmac100_rxdes *rxdes) return rxdes->rxdes0 & cpu_to_le32(FTGMAC100_RXDES0_MULTICAST); } -static void ftgmac100_rxdes_set_end_of_ring(struct ftgmac100_rxdes *rxdes) +static void ftgmac100_rxdes_set_end_of_ring(const struct ftgmac100 *priv, + struct ftgmac100_rxdes *rxdes) { - rxdes->rxdes0 |= cpu_to_le32(FTGMAC100_RXDES0_EDORR); + rxdes->rxdes0 |= cpu_to_le32(priv->rxdes0_edorr_mask); } static void ftgmac100_rxdes_set_dma_addr(struct ftgmac100_rxdes *rxdes, @@ -393,7 +398,7 @@ ftgmac100_rx_locate_first_segment(struct ftgmac100 *priv) if (ftgmac100_rxdes_first_segment(rxdes)) return rxdes; - ftgmac100_rxdes_set_dma_own(rxdes); + ftgmac100_rxdes_set_dma_own(priv, rxdes); ftgmac100_rx_pointer_advance(priv); rxdes = ftgmac100_current_rxdes(priv); } @@ -464,7 +469,7 @@ static void ftgmac100_rx_drop_packet(struct ftgmac100 *priv) if (ftgmac100_rxdes_last_segment(rxdes)) done = true; - ftgmac100_rxdes_set_dma_own(rxdes); + ftgmac100_rxdes_set_dma_own(priv, rxdes); ftgmac100_rx_pointer_advance(priv); rxdes = ftgmac100_current_rxdes(priv); } while (!done && ftgmac100_rxdes_packet_ready(rxdes)); @@ -556,10 +561,11 @@ static bool ftgmac100_rx_packet(struct ftgmac100 *priv, int *processed) /****************************************************************************** * internal functions (transmit descriptor) *****************************************************************************/ -static void ftgmac100_txdes_reset(struct ftgmac100_txdes *txdes) +static void ftgmac100_txdes_reset(const struct ftgmac100 *priv, + struct ftgmac100_txdes *txdes) { /* clear all except end of ring bit */ - txdes->txdes0 &= cpu_to_le32(FTGMAC100_TXDES0_EDOTR); + txdes->txdes0 &= cpu_to_le32(priv->txdes0_edotr_mask); txdes->txdes1 = 0; txdes->txdes2 = 0; txdes->txdes3 = 0; @@ -580,9 +586,10 @@ static void ftgmac100_txdes_set_dma_own(struct ftgmac100_txdes *txdes) txdes->txdes0 |= cpu_to_le32(FTGMAC100_TXDES0_TXDMA_OWN); } -static void ftgmac100_txdes_set_end_of_ring(struct ftgmac100_txdes *txdes) +static void ftgmac100_txdes_set_end_of_ring(const struct ftgmac100 *priv, + struct ftgmac100_txdes *txdes) { - txdes->txdes0 |= cpu_to_le32(FTGMAC100_TXDES0_EDOTR); + txdes->txdes0 |= cpu_to_le32(priv->txdes0_edotr_mask); } static void ftgmac100_txdes_set_first_segment(struct ftgmac100_txdes *txdes) @@ -701,7 +708,7 @@ static bool ftgmac100_tx_complete_packet(struct ftgmac100 *priv) dev_kfree_skb(skb); - ftgmac100_txdes_reset(txdes); + ftgmac100_txdes_reset(priv, txdes); ftgmac100_tx_clean_pointer_advance(priv); @@ -792,7 +799,7 @@ static int ftgmac100_alloc_rx_page(struct ftgmac100 *priv, ftgmac100_rxdes_set_page(priv, rxdes, page); ftgmac100_rxdes_set_dma_addr(rxdes, map); - ftgmac100_rxdes_set_dma_own(rxdes); + ftgmac100_rxdes_set_dma_own(priv, rxdes); return 0; } @@ -839,7 +846,8 @@ static int ftgmac100_alloc_buffers(struct ftgmac100 *priv) return -ENOMEM; /* initialize RX ring */ - ftgmac100_rxdes_set_end_of_ring(&priv->descs->rxdes[RX_QUEUE_ENTRIES - 1]); + ftgmac100_rxdes_set_end_of_ring(priv, + &priv->descs->rxdes[RX_QUEUE_ENTRIES - 1]); for (i = 0; i < RX_QUEUE_ENTRIES; i++) { struct ftgmac100_rxdes *rxdes = &priv->descs->rxdes[i]; @@ -849,7 +857,8 @@ static int ftgmac100_alloc_buffers(struct ftgmac100 *priv) } /* initialize TX ring */ - ftgmac100_txdes_set_end_of_ring(&priv->descs->txdes[TX_QUEUE_ENTRIES - 1]); + ftgmac100_txdes_set_end_of_ring(priv, + &priv->descs->txdes[TX_QUEUE_ENTRIES - 1]); return 0; err: @@ -1336,6 +1345,9 @@ static int ftgmac100_probe(struct platform_device *pdev) priv->netdev = netdev; priv->dev = &pdev->dev; + priv->rxdes0_edorr_mask = BIT(15); + priv->txdes0_edotr_mask = BIT(15); + spin_lock_init(&priv->tx_lock); /* initialize NAPI */ diff --git a/drivers/net/ethernet/faraday/ftgmac100.h b/drivers/net/ethernet/faraday/ftgmac100.h index 13408d448b05..c258586ce4a4 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.h +++ b/drivers/net/ethernet/faraday/ftgmac100.h @@ -189,7 +189,6 @@ struct ftgmac100_txdes { } __attribute__ ((aligned(16))); #define FTGMAC100_TXDES0_TXBUF_SIZE(x) ((x) & 0x3fff) -#define FTGMAC100_TXDES0_EDOTR (1 << 15) #define FTGMAC100_TXDES0_CRC_ERR (1 << 19) #define FTGMAC100_TXDES0_LTS (1 << 28) #define FTGMAC100_TXDES0_FTS (1 << 29) @@ -215,7 +214,6 @@ struct ftgmac100_rxdes { } __attribute__ ((aligned(16))); #define FTGMAC100_RXDES0_VDBC 0x3fff -#define FTGMAC100_RXDES0_EDORR (1 << 15) #define FTGMAC100_RXDES0_MULTICAST (1 << 16) #define FTGMAC100_RXDES0_BROADCAST (1 << 17) #define FTGMAC100_RXDES0_RX_ERR (1 << 18) From 2a0ab8ebbec634127987fc8dbbd09a7fd7274e3d Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Thu, 22 Sep 2016 08:35:00 +0930 Subject: [PATCH 1372/1715] net/faraday: Adapt for Aspeed SoCs The RXDES and TXDES registers bits in the ftgmac100 indicates EDO{R,T}R at bit position 15 for the Faraday Tech IP. However, the version of this IP present in the Aspeed SoCs has these bits at position 30 in the registers. It appers that ast2400 SoCs support both positions, with the 15th bit marked as reserved but still functional. In the ast2500 this bit is reused for another function, so we need a work around. This was confirmed with engineers from Aspeed that using bit 30 is correct for both the ast2400 and ast2500 SoCs. Signed-off-by: Joel Stanley Signed-off-by: David S. Miller --- drivers/net/ethernet/faraday/ftgmac100.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 62a88d1a1f99..47f512224b57 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -1345,9 +1345,6 @@ static int ftgmac100_probe(struct platform_device *pdev) priv->netdev = netdev; priv->dev = &pdev->dev; - priv->rxdes0_edorr_mask = BIT(15); - priv->txdes0_edotr_mask = BIT(15); - spin_lock_init(&priv->tx_lock); /* initialize NAPI */ @@ -1381,6 +1378,16 @@ static int ftgmac100_probe(struct platform_device *pdev) FTGMAC100_INT_PHYSTS_CHG | FTGMAC100_INT_RPKT_BUF | FTGMAC100_INT_NO_RXBUF); + + if (of_machine_is_compatible("aspeed,ast2400") || + of_machine_is_compatible("aspeed,ast2500")) { + priv->rxdes0_edorr_mask = BIT(30); + priv->txdes0_edotr_mask = BIT(30); + } else { + priv->rxdes0_edorr_mask = BIT(15); + priv->txdes0_edotr_mask = BIT(15); + } + if (pdev->dev.of_node && of_get_property(pdev->dev.of_node, "use-ncsi", NULL)) { if (!IS_ENABLED(CONFIG_NET_NCSI)) { From 08c9c126004e999f0c05b369d1e0cc757e6040cc Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 22 Sep 2016 08:35:01 +0930 Subject: [PATCH 1373/1715] net/faraday: Clear stale interrupts There is stale interrupt (PHYSTS_CHG in ISR, bit#6 in 0x0) from the bootloader (uboot) when enabling the MAC. The stale interrupts aren't part of kernel and should be cleared. This clears the stale interrupts in ISR (0x0) when enabling the MAC. Signed-off-by: Gavin Shan Signed-off-by: Joel Stanley Signed-off-by: David S. Miller --- drivers/net/ethernet/faraday/ftgmac100.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 47f512224b57..189373743ddf 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -1112,6 +1112,7 @@ static int ftgmac100_poll(struct napi_struct *napi, int budget) static int ftgmac100_open(struct net_device *netdev) { struct ftgmac100 *priv = netdev_priv(netdev); + unsigned int status; int err; err = ftgmac100_alloc_buffers(priv); @@ -1137,6 +1138,11 @@ static int ftgmac100_open(struct net_device *netdev) ftgmac100_init_hw(priv); ftgmac100_start_hw(priv, priv->use_ncsi ? 100 : 10); + + /* Clear stale interrupts */ + status = ioread32(priv->base + FTGMAC100_OFFSET_ISR); + iowrite32(status, priv->base + FTGMAC100_OFFSET_ISR); + if (netdev->phydev) phy_start(netdev->phydev); else if (priv->use_ncsi) From e07dc63ba22df2d8bc931821b22938d4ce37a934 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Thu, 22 Sep 2016 08:35:02 +0930 Subject: [PATCH 1374/1715] net/faraday: Configure old MDIO interface on Aspeed SoCs The Aspeed SoCs have a new MDIO interface as an option in the G4 and G5 SoCs. The old one is still available, so select it in order to remain compatible with the ftgmac100 driver. Signed-off-by: Joel Stanley Signed-off-by: David S. Miller --- drivers/net/ethernet/faraday/ftgmac100.c | 9 +++++++++ drivers/net/ethernet/faraday/ftgmac100.h | 5 +++++ 2 files changed, 14 insertions(+) diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 189373743ddf..e3653b14008a 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -1252,12 +1252,21 @@ static int ftgmac100_setup_mdio(struct net_device *netdev) struct ftgmac100 *priv = netdev_priv(netdev); struct platform_device *pdev = to_platform_device(priv->dev); int i, err = 0; + u32 reg; /* initialize mdio bus */ priv->mii_bus = mdiobus_alloc(); if (!priv->mii_bus) return -EIO; + if (of_machine_is_compatible("aspeed,ast2400") || + of_machine_is_compatible("aspeed,ast2500")) { + /* This driver supports the old MDIO interface */ + reg = ioread32(priv->base + FTGMAC100_OFFSET_REVR); + reg &= ~FTGMAC100_REVR_NEW_MDIO_INTERFACE; + iowrite32(reg, priv->base + FTGMAC100_OFFSET_REVR); + }; + priv->mii_bus->name = "ftgmac100_mdio"; snprintf(priv->mii_bus->id, MII_BUS_ID_SIZE, "%s-%d", pdev->name, pdev->id); diff --git a/drivers/net/ethernet/faraday/ftgmac100.h b/drivers/net/ethernet/faraday/ftgmac100.h index c258586ce4a4..8a377ab1d127 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.h +++ b/drivers/net/ethernet/faraday/ftgmac100.h @@ -133,6 +133,11 @@ #define FTGMAC100_DMAFIFOS_RXDMA_REQ (1 << 30) #define FTGMAC100_DMAFIFOS_TXDMA_REQ (1 << 31) +/* + * Feature Register + */ +#define FTGMAC100_REVR_NEW_MDIO_INTERFACE BIT(31) + /* * Receive buffer size register */ From edcd692fe4816ba6cb240b7a1d8b984ee7082763 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Thu, 22 Sep 2016 08:35:03 +0930 Subject: [PATCH 1375/1715] net/faraday: Mask out PHYSTS_CHG interrupt The PHYSTS_CHG (the ftgmac100's PHY IRQ) is telling the system to go look at the PHY registers for a link status change. The interrupt was causing issues on Aspeed SoC where some board designs had an active high configuration, some active low, and in some cases repurposed for other functions. When misconfigured Linux would chew 100% of CPU cycles servicing interrupts: [ 20.280000] ftgmac100 1e660000.ethernet eth0: [ISR] = 0x200: PHYSTS_CHG [ 20.280000] ftgmac100 1e660000.ethernet eth0: [ISR] = 0x200: PHYSTS_CHG [ 20.280000] ftgmac100 1e660000.ethernet eth0: [ISR] = 0x200: PHYSTS_CHG [ 20.300000] ftgmac100 1e660000.ethernet eth0: [ISR] = 0x200: PHYSTS_CHG While in the ftgmac100 IP can be configured for high, low and edge sensitivity the current driver always polls the PHY, so we chose to mask out the interrupt. See https://patchwork.ozlabs.org/patch/672099/ for more discussion. Signed-off-by: Joel Stanley Signed-off-by: David S. Miller --- drivers/net/ethernet/faraday/ftgmac100.c | 10 +++------- drivers/net/ethernet/faraday/ftgmac100.h | 1 + 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index e3653b14008a..90f9c5481290 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -1075,14 +1075,12 @@ static int ftgmac100_poll(struct napi_struct *napi, int budget) } if (status & priv->int_mask_all & (FTGMAC100_INT_NO_RXBUF | - FTGMAC100_INT_RPKT_LOST | FTGMAC100_INT_AHB_ERR | - FTGMAC100_INT_PHYSTS_CHG)) { + FTGMAC100_INT_RPKT_LOST | FTGMAC100_INT_AHB_ERR)) { if (net_ratelimit()) - netdev_info(netdev, "[ISR] = 0x%x: %s%s%s%s\n", status, + netdev_info(netdev, "[ISR] = 0x%x: %s%s%s\n", status, status & FTGMAC100_INT_NO_RXBUF ? "NO_RXBUF " : "", status & FTGMAC100_INT_RPKT_LOST ? "RPKT_LOST " : "", - status & FTGMAC100_INT_AHB_ERR ? "AHB_ERR " : "", - status & FTGMAC100_INT_PHYSTS_CHG ? "PHYSTS_CHG" : ""); + status & FTGMAC100_INT_AHB_ERR ? "AHB_ERR " : ""); if (status & FTGMAC100_INT_NO_RXBUF) { /* RX buffer unavailable */ @@ -1390,7 +1388,6 @@ static int ftgmac100_probe(struct platform_device *pdev) FTGMAC100_INT_XPKT_ETH | FTGMAC100_INT_XPKT_LOST | FTGMAC100_INT_AHB_ERR | - FTGMAC100_INT_PHYSTS_CHG | FTGMAC100_INT_RPKT_BUF | FTGMAC100_INT_NO_RXBUF); @@ -1412,7 +1409,6 @@ static int ftgmac100_probe(struct platform_device *pdev) dev_info(&pdev->dev, "Using NCSI interface\n"); priv->use_ncsi = true; - priv->int_mask_all &= ~FTGMAC100_INT_PHYSTS_CHG; priv->ndev = ncsi_register_dev(netdev, ftgmac100_ncsi_handler); if (!priv->ndev) goto err_ncsi_dev; diff --git a/drivers/net/ethernet/faraday/ftgmac100.h b/drivers/net/ethernet/faraday/ftgmac100.h index 8a377ab1d127..a7ce0ac8858a 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.h +++ b/drivers/net/ethernet/faraday/ftgmac100.h @@ -157,6 +157,7 @@ #define FTGMAC100_MACCR_FULLDUP (1 << 8) #define FTGMAC100_MACCR_GIGA_MODE (1 << 9) #define FTGMAC100_MACCR_CRC_APD (1 << 10) +#define FTGMAC100_MACCR_PHY_LINK_LEVEL (1 << 11) #define FTGMAC100_MACCR_RX_RUNT (1 << 12) #define FTGMAC100_MACCR_JUMBO_LF (1 << 13) #define FTGMAC100_MACCR_RX_ALL (1 << 14) From 0d4b103c008ac9f6f438d2618c155f6e868e5a67 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Sep 2016 00:29:31 +0100 Subject: [PATCH 1376/1715] rxrpc: Reduce the number of ACK-Requests sent Reduce the number of ACK-Requests we set on DATA packets that we're sending to reduce network traffic. We set the flag on odd-numbered DATA packets to start off the RTT cache until we have at least three entries in it and then probe once per second thereafter to keep it topped up. This could be made tunable in future. Note that from this point, the RXRPC_REQUEST_ACK flag is set on DATA packets as we transmit them and not stored statically in the sk_buff. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 1 + net/rxrpc/output.c | 13 +++++++++++-- net/rxrpc/peer_object.c | 1 + net/rxrpc/sendmsg.c | 2 -- 4 files changed, 13 insertions(+), 4 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 1c4597b2c6cd..b13754a6dd7a 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -255,6 +255,7 @@ struct rxrpc_peer { /* calculated RTT cache */ #define RXRPC_RTT_CACHE_SIZE 32 + ktime_t rtt_last_req; /* Time of last RTT request */ u64 rtt; /* Current RTT estimate (in nS) */ u64 rtt_sum; /* Sum of cache contents */ u64 rtt_cache[RXRPC_RTT_CACHE_SIZE]; /* Determined RTT cache */ diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index db01fbb70d23..282cb1e36d06 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -270,6 +270,12 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb) msg.msg_controllen = 0; msg.msg_flags = 0; + /* If our RTT cache needs working on, request an ACK. */ + if ((call->peer->rtt_usage < 3 && sp->hdr.seq & 1) || + ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), + ktime_get_real())) + whdr.flags |= RXRPC_REQUEST_ACK; + if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { static int lose; if ((lose++ & 7) == 7) { @@ -301,11 +307,14 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb) done: if (ret >= 0) { - skb->tstamp = ktime_get_real(); + ktime_t now = ktime_get_real(); + skb->tstamp = now; smp_wmb(); sp->hdr.serial = serial; - if (whdr.flags & RXRPC_REQUEST_ACK) + if (whdr.flags & RXRPC_REQUEST_ACK) { + call->peer->rtt_last_req = now; trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, serial); + } } _leave(" = %d [%u]", ret, call->peer->maxdata); return ret; diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index f3e5766910fd..941b724d523b 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -244,6 +244,7 @@ static void rxrpc_init_peer(struct rxrpc_peer *peer, unsigned long hash_key) peer->hash_key = hash_key; rxrpc_assess_MTU_size(peer); peer->mtu = peer->if_mtu; + peer->rtt_last_req = ktime_get_real(); switch (peer->srx.transport.family) { case AF_INET: diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 607223f4f871..ca7c3be60ad2 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -299,8 +299,6 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, else if (call->tx_top - call->tx_hard_ack < call->tx_winsize) sp->hdr.flags |= RXRPC_MORE_PACKETS; - if (seq & 1) - sp->hdr.flags |= RXRPC_REQUEST_ACK; ret = conn->security->secure_packet( call, skb, skb->mark, skb->head); From fc943f67773487bb85131273f39b5f183caafe95 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Sep 2016 00:29:32 +0100 Subject: [PATCH 1377/1715] rxrpc: Reduce the number of PING ACKs sent We don't want to send a PING ACK for every new incoming call as that just adds to the network traffic. Instead, we send a PING ACK to the first three that we receive and then once per second thereafter. This could probably be made adjustable in future. Signed-off-by: David Howells --- net/rxrpc/call_event.c | 2 +- net/rxrpc/input.c | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index adb2ec61e21f..6e2ea8f4ae75 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -142,7 +142,7 @@ static void rxrpc_resend(struct rxrpc_call *call) struct rxrpc_skb_priv *sp; struct sk_buff *skb; rxrpc_seq_t cursor, seq, top; - ktime_t now = ktime_get_real(), max_age, oldest, resend_at; + ktime_t now = ktime_get_real(), max_age, oldest, resend_at; int ix; u8 annotation, anno_type; diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index c121949de3c8..cbb5d53f09d7 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -44,9 +44,12 @@ static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb, int skew) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + ktime_t now = skb->tstamp; - rxrpc_propose_ACK(call, RXRPC_ACK_PING, skew, sp->hdr.serial, - true, true); + if (call->peer->rtt_usage < 3 || + ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), now)) + rxrpc_propose_ACK(call, RXRPC_ACK_PING, skew, sp->hdr.serial, + true, true); } /* From 572de608e36279f249c9a6350f142e69f23dacab Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Thu, 22 Sep 2016 10:33:54 +0800 Subject: [PATCH 1378/1715] net: ethernet: mediatek: add extension of phy-mode for TRGMII adds PHY-mode "trgmii" as an extension for the operation mode of the PHY interface for PHY_INTERFACE_MODE_TRGMII. and adds a variable trgmii inside mtk_mac as the indication to make the difference between the MAC connected to internal switch or connected to external PHY by the given configuration on the board and then to perform the corresponding setup on TRGMII hardware module. Signed-off-by: Sean Wang Cc: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 2 ++ drivers/net/ethernet/mediatek/mtk_eth_soc.h | 3 +++ include/linux/phy.h | 3 +++ 3 files changed, 8 insertions(+) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 2909372c4da0..e873e21fd20e 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -244,6 +244,8 @@ static int mtk_phy_connect(struct mtk_mac *mac) return -ENODEV; switch (of_get_phy_mode(np)) { + case PHY_INTERFACE_MODE_TRGMII: + mac->trgmii = true; case PHY_INTERFACE_MODE_RGMII_TXID: case PHY_INTERFACE_MODE_RGMII_RXID: case PHY_INTERFACE_MODE_RGMII_ID: diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index 7c5e534d2120..e3b9525f94c6 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -529,6 +529,8 @@ struct mtk_eth { * @hw: Backpointer to our main datastruture * @hw_stats: Packet statistics counter * @phy_dev: The attached PHY if available + * @trgmii Indicate if the MAC uses TRGMII connected to internal + switch */ struct mtk_mac { int id; @@ -539,6 +541,7 @@ struct mtk_mac { struct phy_device *phy_dev; __be32 hwlro_ip[MTK_MAX_LRO_IP_CNT]; int hwlro_ip_cnt; + bool trgmii; }; /* the struct describing the SoC. these are declared in the soc_xyz.c files */ diff --git a/include/linux/phy.h b/include/linux/phy.h index 2d24b283aa2d..e25f1830fbcf 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -80,6 +80,7 @@ typedef enum { PHY_INTERFACE_MODE_XGMII, PHY_INTERFACE_MODE_MOCA, PHY_INTERFACE_MODE_QSGMII, + PHY_INTERFACE_MODE_TRGMII, PHY_INTERFACE_MODE_MAX, } phy_interface_t; @@ -123,6 +124,8 @@ static inline const char *phy_modes(phy_interface_t interface) return "moca"; case PHY_INTERFACE_MODE_QSGMII: return "qsgmii"; + case PHY_INTERFACE_MODE_TRGMII: + return "trgmii"; default: return "unknown"; } From f430dea7c150dab2c103d28fa32efb59b5ae80b4 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Thu, 22 Sep 2016 10:33:55 +0800 Subject: [PATCH 1379/1715] net: ethernet: mediatek: add support for GMAC0 connecting with external PHY through TRGMII Changing dynamically source clock, TX/RX delay and interface mode used by TRGMII hardware module inside PHY capability polling routine for adapting to the various speed of RGMII used by external PHY for GMAC0. Signed-off-by: Sean Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 32 ++++++++++++++++++++- drivers/net/ethernet/mediatek/mtk_eth_soc.h | 31 +++++++++++++++++++- 2 files changed, 61 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index e873e21fd20e..ec60794f802c 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -52,7 +52,7 @@ static const struct mtk_ethtool_stats { }; static const char * const mtk_clks_source_name[] = { - "ethif", "esw", "gp1", "gp2" + "ethif", "esw", "gp1", "gp2", "trgpll" }; void mtk_w32(struct mtk_eth *eth, u32 val, unsigned reg) @@ -135,6 +135,33 @@ static int mtk_mdio_read(struct mii_bus *bus, int phy_addr, int phy_reg) return _mtk_mdio_read(eth, phy_addr, phy_reg); } +static void mtk_gmac0_rgmii_adjust(struct mtk_eth *eth, int speed) +{ + u32 val; + int ret; + + val = (speed == SPEED_1000) ? + INTF_MODE_RGMII_1000 : INTF_MODE_RGMII_10_100; + mtk_w32(eth, val, INTF_MODE); + + regmap_update_bits(eth->ethsys, ETHSYS_CLKCFG0, + ETHSYS_TRGMII_CLK_SEL362_5, + ETHSYS_TRGMII_CLK_SEL362_5); + + val = (speed == SPEED_1000) ? 250000000 : 500000000; + ret = clk_set_rate(eth->clks[MTK_CLK_TRGPLL], val); + if (ret) + dev_err(eth->dev, "Failed to set trgmii pll: %d\n", ret); + + val = (speed == SPEED_1000) ? + RCK_CTRL_RGMII_1000 : RCK_CTRL_RGMII_10_100; + mtk_w32(eth, val, TRGMII_RCK_CTRL); + + val = (speed == SPEED_1000) ? + TCK_CTRL_RGMII_1000 : TCK_CTRL_RGMII_10_100; + mtk_w32(eth, val, TRGMII_TCK_CTRL); +} + static void mtk_phy_link_adjust(struct net_device *dev) { struct mtk_mac *mac = netdev_priv(dev); @@ -157,6 +184,9 @@ static void mtk_phy_link_adjust(struct net_device *dev) break; }; + if (mac->id == 0 && !mac->trgmii) + mtk_gmac0_rgmii_adjust(mac->hw, mac->phy_dev->speed); + if (mac->phy_dev->link) mcr |= MAC_MCR_FORCE_LINK; diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index e3b9525f94c6..e52115633726 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -313,6 +313,30 @@ MAC_MCR_FORCE_TX_FC | MAC_MCR_SPEED_1000 | \ MAC_MCR_FORCE_DPX | MAC_MCR_FORCE_LINK) +/* TRGMII RXC control register */ +#define TRGMII_RCK_CTRL 0x10300 +#define DQSI0(x) ((x << 0) & GENMASK(6, 0)) +#define DQSI1(x) ((x << 8) & GENMASK(14, 8)) +#define RXCTL_DMWTLAT(x) ((x << 16) & GENMASK(18, 16)) +#define RXC_DQSISEL BIT(30) +#define RCK_CTRL_RGMII_1000 (RXC_DQSISEL | RXCTL_DMWTLAT(2) | DQSI1(16)) +#define RCK_CTRL_RGMII_10_100 RXCTL_DMWTLAT(2) + +/* TRGMII RXC control register */ +#define TRGMII_TCK_CTRL 0x10340 +#define TXCTL_DMWTLAT(x) ((x << 16) & GENMASK(18, 16)) +#define TXC_INV BIT(30) +#define TCK_CTRL_RGMII_1000 TXCTL_DMWTLAT(2) +#define TCK_CTRL_RGMII_10_100 (TXC_INV | TXCTL_DMWTLAT(2)) + +/* TRGMII Interface mode register */ +#define INTF_MODE 0x10390 +#define TRGMII_INTF_DIS BIT(0) +#define TRGMII_MODE BIT(1) +#define TRGMII_CENTRAL_ALIGNED BIT(2) +#define INTF_MODE_RGMII_1000 (TRGMII_MODE | TRGMII_CENTRAL_ALIGNED) +#define INTF_MODE_RGMII_10_100 0 + /* GPIO port control registers for GMAC 2*/ #define GPIO_OD33_CTRL8 0x4c0 #define GPIO_BIAS_CTRL 0xed0 @@ -323,7 +347,11 @@ #define SYSCFG0_GE_MASK 0x3 #define SYSCFG0_GE_MODE(x, y) (x << (12 + (y * 2))) -/*ethernet reset control register*/ +/* ethernet subsystem clock register */ +#define ETHSYS_CLKCFG0 0x2c +#define ETHSYS_TRGMII_CLK_SEL362_5 BIT(11) + +/* ethernet reset control register */ #define ETHSYS_RSTCTRL 0x34 #define RSTCTRL_FE BIT(6) #define RSTCTRL_PPE BIT(31) @@ -389,6 +417,7 @@ enum mtk_clks_map { MTK_CLK_ESW, MTK_CLK_GP1, MTK_CLK_GP2, + MTK_CLK_TRGPLL, MTK_CLK_MAX }; From b88539658a597e649627c4cc2d446456fc01e104 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Thu, 22 Sep 2016 10:33:56 +0800 Subject: [PATCH 1380/1715] net: ethernet: mediatek: add the dts property to set if TRGMII supported on GMAC0 Add the dts property for the capability if TRGMII supported on GAMC0 Signed-off-by: Sean Wang Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/mediatek-net.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/mediatek-net.txt b/Documentation/devicetree/bindings/net/mediatek-net.txt index 6103e5583070..7111278adc7e 100644 --- a/Documentation/devicetree/bindings/net/mediatek-net.txt +++ b/Documentation/devicetree/bindings/net/mediatek-net.txt @@ -31,7 +31,10 @@ Optional properties: Required properties: - compatible: Should be "mediatek,eth-mac" - reg: The number of the MAC -- phy-handle: see ethernet.txt file in the same directory. +- phy-handle: see ethernet.txt file in the same directory and + the phy-mode "trgmii" required being provided when reg + is equal to 0 and the MAC uses fixed-link to connect + with inernal switch such as MT7530. Example: From 2364c5c5ec14e936826eb10af56a337ccec01ffa Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Thu, 22 Sep 2016 16:33:35 +0800 Subject: [PATCH 1381/1715] net: ethernet: mediatek: use phydev from struct net_device reuse phydev already in struct net_device instead of creating another new one in private structure. Signed-off-by: Sean Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 73 ++++++++++----------- drivers/net/ethernet/mediatek/mtk_eth_soc.h | 2 - 2 files changed, 36 insertions(+), 39 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index ec60794f802c..6b7acf4463fa 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -175,7 +175,7 @@ static void mtk_phy_link_adjust(struct net_device *dev) if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state))) return; - switch (mac->phy_dev->speed) { + switch (dev->phydev->speed) { case SPEED_1000: mcr |= MAC_MCR_SPEED_1000; break; @@ -185,22 +185,22 @@ static void mtk_phy_link_adjust(struct net_device *dev) }; if (mac->id == 0 && !mac->trgmii) - mtk_gmac0_rgmii_adjust(mac->hw, mac->phy_dev->speed); + mtk_gmac0_rgmii_adjust(mac->hw, dev->phydev->speed); - if (mac->phy_dev->link) + if (dev->phydev->link) mcr |= MAC_MCR_FORCE_LINK; - if (mac->phy_dev->duplex) { + if (dev->phydev->duplex) { mcr |= MAC_MCR_FORCE_DPX; - if (mac->phy_dev->pause) + if (dev->phydev->pause) rmt_adv = LPA_PAUSE_CAP; - if (mac->phy_dev->asym_pause) + if (dev->phydev->asym_pause) rmt_adv |= LPA_PAUSE_ASYM; - if (mac->phy_dev->advertising & ADVERTISED_Pause) + if (dev->phydev->advertising & ADVERTISED_Pause) lcl_adv |= ADVERTISE_PAUSE_CAP; - if (mac->phy_dev->advertising & ADVERTISED_Asym_Pause) + if (dev->phydev->advertising & ADVERTISED_Asym_Pause) lcl_adv |= ADVERTISE_PAUSE_ASYM; flowctrl = mii_resolve_flowctrl_fdx(lcl_adv, rmt_adv); @@ -217,7 +217,7 @@ static void mtk_phy_link_adjust(struct net_device *dev) mtk_w32(mac->hw, mcr, MTK_MAC_MCR(mac->id)); - if (mac->phy_dev->link) + if (dev->phydev->link) netif_carrier_on(dev); else netif_carrier_off(dev); @@ -255,17 +255,17 @@ static int mtk_phy_connect_node(struct mtk_eth *eth, struct mtk_mac *mac, mac->id, phydev_name(phydev), phydev->phy_id, phydev->drv->name); - mac->phy_dev = phydev; - return 0; } -static int mtk_phy_connect(struct mtk_mac *mac) +static int mtk_phy_connect(struct net_device *dev) { - struct mtk_eth *eth = mac->hw; + struct mtk_mac *mac = netdev_priv(dev); + struct mtk_eth *eth; struct device_node *np; u32 val; + eth = mac->hw; np = of_parse_phandle(mac->of_node, "phy-handle", 0); if (!np && of_phy_is_fixed_link(mac->of_node)) if (!of_phy_register_fixed_link(mac->of_node)) @@ -303,20 +303,21 @@ static int mtk_phy_connect(struct mtk_mac *mac) val |= SYSCFG0_GE_MODE(mac->ge_mode, mac->id); regmap_write(eth->ethsys, ETHSYS_SYSCFG0, val); + /* couple phydev to net_device */ mtk_phy_connect_node(eth, mac, np); - mac->phy_dev->autoneg = AUTONEG_ENABLE; - mac->phy_dev->speed = 0; - mac->phy_dev->duplex = 0; + dev->phydev->autoneg = AUTONEG_ENABLE; + dev->phydev->speed = 0; + dev->phydev->duplex = 0; if (of_phy_is_fixed_link(mac->of_node)) - mac->phy_dev->supported |= + dev->phydev->supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; - mac->phy_dev->supported &= PHY_GBIT_FEATURES | SUPPORTED_Pause | + dev->phydev->supported &= PHY_GBIT_FEATURES | SUPPORTED_Pause | SUPPORTED_Asym_Pause; - mac->phy_dev->advertising = mac->phy_dev->supported | + dev->phydev->advertising = dev->phydev->supported | ADVERTISED_Autoneg; - phy_start_aneg(mac->phy_dev); + phy_start_aneg(dev->phydev); of_node_put(np); @@ -1742,7 +1743,7 @@ static int mtk_open(struct net_device *dev) } atomic_inc(ð->dma_refcnt); - phy_start(mac->phy_dev); + phy_start(dev->phydev); netif_start_queue(dev); return 0; @@ -1777,7 +1778,7 @@ static int mtk_stop(struct net_device *dev) struct mtk_eth *eth = mac->hw; netif_tx_disable(dev); - phy_stop(mac->phy_dev); + phy_stop(dev->phydev); /* only shutdown DMA if this is the last user */ if (!atomic_dec_and_test(ð->dma_refcnt)) @@ -1917,7 +1918,7 @@ static int __init mtk_init(struct net_device *dev) dev->addr_assign_type = NET_ADDR_RANDOM; } - return mtk_phy_connect(mac); + return mtk_phy_connect(dev); } static void mtk_uninit(struct net_device *dev) @@ -1925,20 +1926,18 @@ static void mtk_uninit(struct net_device *dev) struct mtk_mac *mac = netdev_priv(dev); struct mtk_eth *eth = mac->hw; - phy_disconnect(mac->phy_dev); + phy_disconnect(dev->phydev); mtk_irq_disable(eth, MTK_QDMA_INT_MASK, ~0); mtk_irq_disable(eth, MTK_PDMA_INT_MASK, ~0); } static int mtk_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { - struct mtk_mac *mac = netdev_priv(dev); - switch (cmd) { case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSMIIREG: - return phy_mii_ioctl(mac->phy_dev, ifr, cmd); + return phy_mii_ioctl(dev->phydev, ifr, cmd); default: break; } @@ -1983,7 +1982,7 @@ static void mtk_pending_work(struct work_struct *work) if (!eth->mac[i] || of_phy_is_fixed_link(eth->mac[i]->of_node)) continue; - err = phy_init_hw(eth->mac[i]->phy_dev); + err = phy_init_hw(eth->netdev[i]->phydev); if (err) dev_err(eth->dev, "%s: PHY init failed.\n", eth->netdev[i]->name); @@ -2052,11 +2051,11 @@ static int mtk_get_settings(struct net_device *dev, if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state))) return -EBUSY; - err = phy_read_status(mac->phy_dev); + err = phy_read_status(dev->phydev); if (err) return -ENODEV; - return phy_ethtool_gset(mac->phy_dev, cmd); + return phy_ethtool_gset(dev->phydev, cmd); } static int mtk_set_settings(struct net_device *dev, @@ -2064,14 +2063,14 @@ static int mtk_set_settings(struct net_device *dev, { struct mtk_mac *mac = netdev_priv(dev); - if (cmd->phy_address != mac->phy_dev->mdio.addr) { - mac->phy_dev = mdiobus_get_phy(mac->hw->mii_bus, + if (cmd->phy_address != dev->phydev->mdio.addr) { + dev->phydev = mdiobus_get_phy(mac->hw->mii_bus, cmd->phy_address); - if (!mac->phy_dev) + if (!dev->phydev) return -ENODEV; } - return phy_ethtool_sset(mac->phy_dev, cmd); + return phy_ethtool_sset(dev->phydev, cmd); } static void mtk_get_drvinfo(struct net_device *dev, @@ -2105,7 +2104,7 @@ static int mtk_nway_reset(struct net_device *dev) if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state))) return -EBUSY; - return genphy_restart_aneg(mac->phy_dev); + return genphy_restart_aneg(dev->phydev); } static u32 mtk_get_link(struct net_device *dev) @@ -2116,11 +2115,11 @@ static u32 mtk_get_link(struct net_device *dev) if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state))) return -EBUSY; - err = genphy_update_link(mac->phy_dev); + err = genphy_update_link(dev->phydev); if (err) return ethtool_op_get_link(dev); - return mac->phy_dev->link; + return dev->phydev->link; } static void mtk_get_strings(struct net_device *dev, u32 stringset, u8 *data) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index e52115633726..7e194f795c9a 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -557,7 +557,6 @@ struct mtk_eth { * @of_node: Our devicetree node * @hw: Backpointer to our main datastruture * @hw_stats: Packet statistics counter - * @phy_dev: The attached PHY if available * @trgmii Indicate if the MAC uses TRGMII connected to internal switch */ @@ -567,7 +566,6 @@ struct mtk_mac { struct device_node *of_node; struct mtk_eth *hw; struct mtk_hw_stats *hw_stats; - struct phy_device *phy_dev; __be32 hwlro_ip[MTK_MAX_LRO_IP_CNT]; int hwlro_ip_cnt; bool trgmii; From a2b2a19f0fbc674478a6806ea9e4f6aff06763f8 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Thu, 22 Sep 2016 16:36:15 +0800 Subject: [PATCH 1382/1715] net: ethernet: mediatek: remove superfluous local variable for phy address remove the unused variable for parsing PHY address and the related logic for sanity test which would be all already handled done when of_mdiobus_register was called Reported-by: Nelson Chang Signed-off-by: Sean Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 6b7acf4463fa..1918c39bbd40 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -226,17 +226,9 @@ static void mtk_phy_link_adjust(struct net_device *dev) static int mtk_phy_connect_node(struct mtk_eth *eth, struct mtk_mac *mac, struct device_node *phy_node) { - const __be32 *_addr = NULL; struct phy_device *phydev; - int phy_mode, addr; + int phy_mode; - _addr = of_get_property(phy_node, "reg", NULL); - - if (!_addr || (be32_to_cpu(*_addr) >= 0x20)) { - pr_err("%s: invalid phy address\n", phy_node->name); - return -EINVAL; - } - addr = be32_to_cpu(*_addr); phy_mode = of_get_phy_mode(phy_node); if (phy_mode < 0) { dev_err(eth->dev, "incorrect phy-mode %d\n", phy_mode); From 3e60b748fd2f50bbdb6a61fdd48222cd492c77e3 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Thu, 22 Sep 2016 16:42:03 +0800 Subject: [PATCH 1383/1715] net: ethernet: mediatek: use [get|set]_link_ksettings 1) use new api [get|set]_link_ksettings instead of [get|set]_settings old ones. 2) dev->phydev is sure being ready before calling these callbacks, so removing all the sanity check if it is existing. Signed-off-by: Sean Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 33 ++++++++------------- 1 file changed, 12 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 1918c39bbd40..a65801a5e2ee 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -2034,35 +2034,26 @@ static int mtk_cleanup(struct mtk_eth *eth) return 0; } -static int mtk_get_settings(struct net_device *dev, - struct ethtool_cmd *cmd) +int mtk_get_link_ksettings(struct net_device *ndev, + struct ethtool_link_ksettings *cmd) { - struct mtk_mac *mac = netdev_priv(dev); - int err; + struct mtk_mac *mac = netdev_priv(ndev); if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state))) return -EBUSY; - err = phy_read_status(dev->phydev); - if (err) - return -ENODEV; - - return phy_ethtool_gset(dev->phydev, cmd); + return phy_ethtool_ksettings_get(ndev->phydev, cmd); } -static int mtk_set_settings(struct net_device *dev, - struct ethtool_cmd *cmd) +int mtk_set_link_ksettings(struct net_device *ndev, + const struct ethtool_link_ksettings *cmd) { - struct mtk_mac *mac = netdev_priv(dev); + struct mtk_mac *mac = netdev_priv(ndev); - if (cmd->phy_address != dev->phydev->mdio.addr) { - dev->phydev = mdiobus_get_phy(mac->hw->mii_bus, - cmd->phy_address); - if (!dev->phydev) - return -ENODEV; - } + if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state))) + return -EBUSY; - return phy_ethtool_sset(dev->phydev, cmd); + return phy_ethtool_ksettings_set(ndev->phydev, cmd); } static void mtk_get_drvinfo(struct net_device *dev, @@ -2225,8 +2216,8 @@ static int mtk_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) } static const struct ethtool_ops mtk_ethtool_ops = { - .get_settings = mtk_get_settings, - .set_settings = mtk_set_settings, + .get_link_ksettings = mtk_get_link_ksettings, + .set_link_ksettings = mtk_set_link_ksettings, .get_drvinfo = mtk_get_drvinfo, .get_msglevel = mtk_get_msglevel, .set_msglevel = mtk_set_msglevel, From f6f7d9c03f5daae04449ad19de5e2f0c20c5eaac Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Thu, 22 Sep 2016 16:44:16 +0800 Subject: [PATCH 1384/1715] net: ethernet: mediatek: get out of potential invalid pointer access Potential dangerous invalid pointer might be accessed if the error happens when couple phy_device to net_device so cleanup the error path. Signed-off-by: Sean Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index a65801a5e2ee..3d7e0cb601b6 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -296,7 +296,9 @@ static int mtk_phy_connect(struct net_device *dev) regmap_write(eth->ethsys, ETHSYS_SYSCFG0, val); /* couple phydev to net_device */ - mtk_phy_connect_node(eth, mac, np); + if (mtk_phy_connect_node(eth, mac, np)) + goto err_phy; + dev->phydev->autoneg = AUTONEG_ENABLE; dev->phydev->speed = 0; dev->phydev->duplex = 0; @@ -317,7 +319,7 @@ static int mtk_phy_connect(struct net_device *dev) err_phy: of_node_put(np); - dev_err(eth->dev, "invalid phy_mode\n"); + dev_err(eth->dev, "%s: invalid phy\n", __func__); return -EINVAL; } From 0364a8824c020f12e2d5e9fad963685b58f7574e Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 22 Sep 2016 11:06:25 +0200 Subject: [PATCH 1385/1715] xen-netback: switch to threaded irq for control ring Instead of open coding it use the threaded irq mechanism in xen-netback. Signed-off-by: Juergen Gross Acked-by: Wei Liu Signed-off-by: David S. Miller --- drivers/net/xen-netback/common.h | 4 +-- drivers/net/xen-netback/interface.c | 38 +++++------------------------ drivers/net/xen-netback/netback.c | 18 +++----------- 3 files changed, 11 insertions(+), 49 deletions(-) diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 3a562683603c..ff94c513fe20 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -292,8 +292,6 @@ struct xenvif { #endif struct xen_netif_ctrl_back_ring ctrl; - struct task_struct *ctrl_task; - wait_queue_head_t ctrl_wq; unsigned int ctrl_irq; /* Miscellaneous private stuff. */ @@ -359,7 +357,7 @@ void xenvif_kick_thread(struct xenvif_queue *queue); int xenvif_dealloc_kthread(void *data); -int xenvif_ctrl_kthread(void *data); +irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data); void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb); diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 83deeebfc2d1..fb50c6d5f6c3 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -128,15 +128,6 @@ irqreturn_t xenvif_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -irqreturn_t xenvif_ctrl_interrupt(int irq, void *dev_id) -{ - struct xenvif *vif = dev_id; - - wake_up(&vif->ctrl_wq); - - return IRQ_HANDLED; -} - int xenvif_queue_stopped(struct xenvif_queue *queue) { struct net_device *dev = queue->vif->dev; @@ -570,8 +561,7 @@ int xenvif_connect_ctrl(struct xenvif *vif, grant_ref_t ring_ref, struct net_device *dev = vif->dev; void *addr; struct xen_netif_ctrl_sring *shared; - struct task_struct *task; - int err = -ENOMEM; + int err; err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif), &ring_ref, 1, &addr); @@ -581,11 +571,7 @@ int xenvif_connect_ctrl(struct xenvif *vif, grant_ref_t ring_ref, shared = (struct xen_netif_ctrl_sring *)addr; BACK_RING_INIT(&vif->ctrl, shared, XEN_PAGE_SIZE); - init_waitqueue_head(&vif->ctrl_wq); - - err = bind_interdomain_evtchn_to_irqhandler(vif->domid, evtchn, - xenvif_ctrl_interrupt, - 0, dev->name, vif); + err = bind_interdomain_evtchn_to_irq(vif->domid, evtchn); if (err < 0) goto err_unmap; @@ -593,19 +579,13 @@ int xenvif_connect_ctrl(struct xenvif *vif, grant_ref_t ring_ref, xenvif_init_hash(vif); - task = kthread_create(xenvif_ctrl_kthread, (void *)vif, - "%s-control", dev->name); - if (IS_ERR(task)) { - pr_warn("Could not allocate kthread for %s\n", dev->name); - err = PTR_ERR(task); + err = request_threaded_irq(vif->ctrl_irq, NULL, xenvif_ctrl_irq_fn, + IRQF_ONESHOT, "xen-netback-ctrl", vif); + if (err) { + pr_warn("Could not setup irq handler for %s\n", dev->name); goto err_deinit; } - get_task_struct(task); - vif->ctrl_task = task; - - wake_up_process(vif->ctrl_task); - return 0; err_deinit: @@ -774,12 +754,6 @@ void xenvif_disconnect_data(struct xenvif *vif) void xenvif_disconnect_ctrl(struct xenvif *vif) { - if (vif->ctrl_task) { - kthread_stop(vif->ctrl_task); - put_task_struct(vif->ctrl_task); - vif->ctrl_task = NULL; - } - if (vif->ctrl_irq) { xenvif_deinit_hash(vif); unbind_from_irqhandler(vif->ctrl_irq, vif); diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index edbae0b1e8f0..3d0c989384b5 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -2359,24 +2359,14 @@ static bool xenvif_ctrl_work_todo(struct xenvif *vif) return 0; } -int xenvif_ctrl_kthread(void *data) +irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data) { struct xenvif *vif = data; - for (;;) { - wait_event_interruptible(vif->ctrl_wq, - xenvif_ctrl_work_todo(vif) || - kthread_should_stop()); - if (kthread_should_stop()) - break; + while (xenvif_ctrl_work_todo(vif)) + xenvif_ctrl_action(vif); - while (xenvif_ctrl_work_todo(vif)) - xenvif_ctrl_action(vif); - - cond_resched(); - } - - return 0; + return IRQ_HANDLED; } static int __init netback_init(void) From 2b03bf732488a3c2e920afe22c03b82cb8477e28 Mon Sep 17 00:00:00 2001 From: Laura Garcia Liebana Date: Tue, 13 Sep 2016 13:49:53 +0200 Subject: [PATCH 1386/1715] netfilter: nft_numgen: add number generation offset Add support of an offset value for incremental counter and random. With this option the sysadmin is able to start the counter to a certain value and then apply the generated number. Example: meta mark set numgen inc mod 2 offset 100 This will generate marks with the serie 100, 101, 100, 101, ... Suggested-by: Pablo Neira Ayuso Signed-off-by: Laura Garcia Liebana Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ net/netfilter/nft_numgen.c | 32 +++++++++++++++++++----- 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index bc0eb6a1066d..bcfb892ff148 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1136,12 +1136,14 @@ enum nft_trace_types { * @NFTA_NG_DREG: destination register (NLA_U32) * @NFTA_NG_MODULUS: maximum counter value (NLA_U32) * @NFTA_NG_TYPE: operation type (NLA_U32) + * @NFTA_NG_OFFSET: offset to be added to the counter (NLA_U32) */ enum nft_ng_attributes { NFTA_NG_UNSPEC, NFTA_NG_DREG, NFTA_NG_MODULUS, NFTA_NG_TYPE, + NFTA_NG_OFFSET, __NFTA_NG_MAX }; #define NFTA_NG_MAX (__NFTA_NG_MAX - 1) diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c index f173ebec30a7..55bc5ab78d4a 100644 --- a/net/netfilter/nft_numgen.c +++ b/net/netfilter/nft_numgen.c @@ -23,6 +23,7 @@ struct nft_ng_inc { enum nft_registers dreg:8; u32 modulus; atomic_t counter; + u32 offset; }; static void nft_ng_inc_eval(const struct nft_expr *expr, @@ -37,13 +38,14 @@ static void nft_ng_inc_eval(const struct nft_expr *expr, nval = (oval + 1 < priv->modulus) ? oval + 1 : 0; } while (atomic_cmpxchg(&priv->counter, oval, nval) != oval); - regs->data[priv->dreg] = nval; + regs->data[priv->dreg] = nval + priv->offset; } static const struct nla_policy nft_ng_policy[NFTA_NG_MAX + 1] = { [NFTA_NG_DREG] = { .type = NLA_U32 }, [NFTA_NG_MODULUS] = { .type = NLA_U32 }, [NFTA_NG_TYPE] = { .type = NLA_U32 }, + [NFTA_NG_OFFSET] = { .type = NLA_U32 }, }; static int nft_ng_inc_init(const struct nft_ctx *ctx, @@ -52,10 +54,16 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx, { struct nft_ng_inc *priv = nft_expr_priv(expr); + if (tb[NFTA_NG_OFFSET]) + priv->offset = ntohl(nla_get_be32(tb[NFTA_NG_OFFSET])); + priv->modulus = ntohl(nla_get_be32(tb[NFTA_NG_MODULUS])); if (priv->modulus == 0) return -ERANGE; + if (priv->offset + priv->modulus - 1 < priv->offset) + return -EOVERFLOW; + priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]); atomic_set(&priv->counter, 0); @@ -64,7 +72,7 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx, } static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg, - u32 modulus, enum nft_ng_types type) + u32 modulus, enum nft_ng_types type, u32 offset) { if (nft_dump_register(skb, NFTA_NG_DREG, dreg)) goto nla_put_failure; @@ -72,6 +80,8 @@ static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg, goto nla_put_failure; if (nla_put_be32(skb, NFTA_NG_TYPE, htonl(type))) goto nla_put_failure; + if (nla_put_be32(skb, NFTA_NG_OFFSET, htonl(offset))) + goto nla_put_failure; return 0; @@ -83,12 +93,14 @@ static int nft_ng_inc_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_ng_inc *priv = nft_expr_priv(expr); - return nft_ng_dump(skb, priv->dreg, priv->modulus, NFT_NG_INCREMENTAL); + return nft_ng_dump(skb, priv->dreg, priv->modulus, NFT_NG_INCREMENTAL, + priv->offset); } struct nft_ng_random { enum nft_registers dreg:8; u32 modulus; + u32 offset; }; static void nft_ng_random_eval(const struct nft_expr *expr, @@ -97,9 +109,10 @@ static void nft_ng_random_eval(const struct nft_expr *expr, { struct nft_ng_random *priv = nft_expr_priv(expr); struct rnd_state *state = this_cpu_ptr(&nft_numgen_prandom_state); + u32 val; - regs->data[priv->dreg] = reciprocal_scale(prandom_u32_state(state), - priv->modulus); + val = reciprocal_scale(prandom_u32_state(state), priv->modulus); + regs->data[priv->dreg] = val + priv->offset; } static int nft_ng_random_init(const struct nft_ctx *ctx, @@ -108,10 +121,16 @@ static int nft_ng_random_init(const struct nft_ctx *ctx, { struct nft_ng_random *priv = nft_expr_priv(expr); + if (tb[NFTA_NG_OFFSET]) + priv->offset = ntohl(nla_get_be32(tb[NFTA_NG_OFFSET])); + priv->modulus = ntohl(nla_get_be32(tb[NFTA_NG_MODULUS])); if (priv->modulus == 0) return -ERANGE; + if (priv->offset + priv->modulus - 1 < priv->offset) + return -EOVERFLOW; + prandom_init_once(&nft_numgen_prandom_state); priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]); @@ -124,7 +143,8 @@ static int nft_ng_random_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_ng_random *priv = nft_expr_priv(expr); - return nft_ng_dump(skb, priv->dreg, priv->modulus, NFT_NG_RANDOM); + return nft_ng_dump(skb, priv->dreg, priv->modulus, NFT_NG_RANDOM, + priv->offset); } static struct nft_expr_type nft_ng_type; From dd7e39bbfce1fa6de8315d790d1fe01e92cba44d Mon Sep 17 00:00:00 2001 From: Arek Lichwa Date: Thu, 22 Sep 2016 14:08:05 +0200 Subject: [PATCH 1387/1715] Bluetooth: Fix NULL pointer dereference in mgmt context Adds missing callback assignment to cmd_complete in pending management command context. Dump path involves security procedure performed on legacy (pre-SSP) devices with service security requirements set to HIGH (16digits PIN). It fails when shorter PIN is delivered by user. [ 1.517950] Bluetooth: PIN code is not 16 bytes long [ 1.518491] BUG: unable to handle kernel NULL pointer dereference at (null) [ 1.518584] IP: [< (null)>] (null) [ 1.518584] PGD 9e08067 PUD 9fdf067 PMD 0 [ 1.518584] Oops: 0010 [#1] SMP [ 1.518584] Modules linked in: [ 1.518584] CPU: 0 PID: 1002 Comm: kworker/u3:2 Not tainted 4.8.0-rc6-354649-gaf4168c #16 [ 1.518584] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.9.3-20160701_074356-anatol 04/01/2014 [ 1.518584] Workqueue: hci0 hci_rx_work [ 1.518584] task: ffff880009ce14c0 task.stack: ffff880009e10000 [ 1.518584] RIP: 0010:[<0000000000000000>] [< (null)>] (null) [ 1.518584] RSP: 0018:ffff880009e13bc8 EFLAGS: 00010293 [ 1.518584] RAX: 0000000000000000 RBX: ffff880009eed100 RCX: 0000000000000006 [ 1.518584] RDX: ffff880009ddc000 RSI: 0000000000000000 RDI: ffff880009eed100 [ 1.518584] RBP: ffff880009e13be0 R08: 0000000000000000 R09: 0000000000000001 [ 1.518584] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 [ 1.518584] R13: ffff880009e13ccd R14: ffff880009ddc000 R15: ffff880009ddc010 [ 1.518584] FS: 0000000000000000(0000) GS:ffff88000bc00000(0000) knlGS:0000000000000000 [ 1.518584] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1.518584] CR2: 0000000000000000 CR3: 0000000009fdd000 CR4: 00000000000006f0 [ 1.518584] Stack: [ 1.518584] ffffffff81909808 ffff880009e13cce ffff880009e0d40b ffff880009e13c68 [ 1.518584] ffffffff818f428d 00000000024000c0 ffff880009e13c08 ffffffff810ca903 [ 1.518584] ffff880009e13c48 ffffffff811ade34 ffffffff8178c31f ffff880009ee6200 [ 1.518584] Call Trace: [ 1.518584] [] ? mgmt_pin_code_neg_reply_complete+0x38/0x60 [ 1.518584] [] hci_cmd_complete_evt+0x69d/0x3200 [ 1.518584] [] ? rcu_read_lock_sched_held+0x53/0x60 [ 1.518584] [] ? kmem_cache_alloc+0x1a4/0x200 [ 1.518584] [] ? skb_clone+0x4f/0xa0 [ 1.518584] [] hci_event_packet+0x8e1/0x28e0 [ 1.518584] [] ? _raw_spin_unlock_irqrestore+0x31/0x50 [ 1.518584] [] ? trace_hardirqs_on_caller+0xee/0x1b0 [ 1.518584] [] hci_rx_work+0x1e1/0x5b0 [ 1.518584] [] ? process_one_work+0x1ed/0x6b0 [ 1.518584] [] process_one_work+0x268/0x6b0 [ 1.518584] [] ? process_one_work+0x1ed/0x6b0 [ 1.518584] [] worker_thread+0x43/0x4e0 [ 1.518584] [] ? process_one_work+0x6b0/0x6b0 [ 1.518584] [] ? process_one_work+0x6b0/0x6b0 [ 1.518584] [] kthread+0xdf/0x100 [ 1.518584] [] ret_from_fork+0x1f/0x40 [ 1.518584] [] ? kthread_create_on_node+0x210/0x210 Signed-off-by: Arek Lichwa Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 7b2bac492fb1..63f42f45a96a 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2541,6 +2541,8 @@ static int send_pin_code_neg_reply(struct sock *sk, struct hci_dev *hdev, if (!cmd) return -ENOMEM; + cmd->cmd_complete = addr_cmd_complete; + err = hci_send_cmd(hdev, HCI_OP_PIN_CODE_NEG_REPLY, sizeof(cp->addr.bdaddr), &cp->addr.bdaddr); if (err < 0) From 1144a4eed04b2c3e7d20146d1b76f7669b55971d Mon Sep 17 00:00:00 2001 From: Dmitry Tunin Date: Wed, 21 Sep 2016 19:13:08 +0300 Subject: [PATCH 1388/1715] Bluetooth: Add a new 04ca:3011 QCA_ROME device BugLink: https://bugs.launchpad.net/bugs/1535802 T: Bus=01 Lev=02 Prnt=02 Port=04 Cnt=01 Dev#= 3 Spd=12 MxCh= 0 D: Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=04ca ProdID=3011 Rev=00.01 C: #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA I: If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb I: If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb Signed-off-by: Dmitry Tunin Signed-off-by: Marcel Holtmann Cc: stable@vger.kernel.org --- drivers/bluetooth/btusb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 9ebd73dd7915..6bd63b84abd0 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -253,6 +253,7 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x0cf3, 0xe300), .driver_info = BTUSB_QCA_ROME }, { USB_DEVICE(0x0cf3, 0xe360), .driver_info = BTUSB_QCA_ROME }, { USB_DEVICE(0x0489, 0xe092), .driver_info = BTUSB_QCA_ROME }, + { USB_DEVICE(0x04ca, 0x3011), .driver_info = BTUSB_QCA_ROME }, /* Broadcom BCM2035 */ { USB_DEVICE(0x0a5c, 0x2009), .driver_info = BTUSB_BCM92035 }, From 7dc6f16c68757548a332a0c5fbe661987c2189a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Narajowski?= Date: Thu, 22 Sep 2016 16:01:39 +0200 Subject: [PATCH 1389/1715] Bluetooth: Fix not updating scan rsp when adv off MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Scan response data should not be updated unless there is an advertising instance. Signed-off-by: Michał Narajowski Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 63f42f45a96a..19b8a5e9420d 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3159,7 +3159,7 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data, /* The name is stored in the scan response data and so * no need to udpate the advertising data here. */ - if (lmp_le_capable(hdev)) + if (lmp_le_capable(hdev) && hci_dev_test_flag(hdev, HCI_ADVERTISING)) __hci_req_update_scan_rsp_data(&req, hdev->cur_adv_instance); err = hci_req_run(&req, set_name_complete); From 56f2929b0f913163ba48c22ea4ae558ac61d3d1c Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Wed, 31 Aug 2016 12:37:55 +0300 Subject: [PATCH 1390/1715] iwlwifi: mvm: cleanup redundant no_power_up_nic_in_init config This is never really used anymore. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- .../net/wireless/intel/iwlwifi/iwl-config.h | 1 - drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 46 ++++++------------- 2 files changed, 15 insertions(+), 32 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index 6cecb1adf0c8..2660cc4b9f8c 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -359,7 +359,6 @@ struct iwl_cfg { high_temp:1, mac_addr_from_csr:1, lp_xtal_workaround:1, - no_power_up_nic_in_init:1, disable_dummy_notification:1, apmg_not_supported:1, mq_rx_supported:1, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index 75f3ca0504fd..6c08aa3fd2df 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -712,37 +712,21 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg, IWL_DEBUG_EEPROM(mvm->trans->dev, "working without external nvm file\n"); - if (WARN(cfg->no_power_up_nic_in_init && !mvm->nvm_file_name, - "not allowing power-up and not having nvm_file\n")) + err = iwl_trans_start_hw(mvm->trans); + if (err) goto out_free; - /* - * Even if nvm exists in the nvm_file driver should read again the nvm - * from the nic because there might be entries that exist in the OTP - * and not in the file. - * for nics with no_power_up_nic_in_init: rely completley on nvm_file - */ - if (cfg->no_power_up_nic_in_init && mvm->nvm_file_name) { - err = iwl_nvm_init(mvm, false); - if (err) - goto out_free; - } else { - err = iwl_trans_start_hw(mvm->trans); - if (err) - goto out_free; - - mutex_lock(&mvm->mutex); - iwl_mvm_ref(mvm, IWL_MVM_REF_INIT_UCODE); - err = iwl_run_init_mvm_ucode(mvm, true); - if (!err || !iwlmvm_mod_params.init_dbg) - iwl_mvm_stop_device(mvm); - iwl_mvm_unref(mvm, IWL_MVM_REF_INIT_UCODE); - mutex_unlock(&mvm->mutex); - /* returns 0 if successful, 1 if success but in rfkill */ - if (err < 0 && !iwlmvm_mod_params.init_dbg) { - IWL_ERR(mvm, "Failed to run INIT ucode: %d\n", err); - goto out_free; - } + mutex_lock(&mvm->mutex); + iwl_mvm_ref(mvm, IWL_MVM_REF_INIT_UCODE); + err = iwl_run_init_mvm_ucode(mvm, true); + if (!err || !iwlmvm_mod_params.init_dbg) + iwl_mvm_stop_device(mvm); + iwl_mvm_unref(mvm, IWL_MVM_REF_INIT_UCODE); + mutex_unlock(&mvm->mutex); + /* returns 0 if successful, 1 if success but in rfkill */ + if (err < 0 && !iwlmvm_mod_params.init_dbg) { + IWL_ERR(mvm, "Failed to run INIT ucode: %d\n", err); + goto out_free; } scan_size = iwl_mvm_scan_size(mvm); @@ -784,8 +768,8 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg, flush_delayed_work(&mvm->fw_dump_wk); iwl_phy_db_free(mvm->phy_db); kfree(mvm->scan_cmd); - if (!cfg->no_power_up_nic_in_init || !mvm->nvm_file_name) - iwl_trans_op_mode_leave(trans); + iwl_trans_op_mode_leave(trans); + ieee80211_free_hw(mvm->hw); return NULL; } From acf91ec384dd4c7c2c88cbaa2e0374e537123c1f Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Thu, 22 Sep 2016 11:20:42 -0700 Subject: [PATCH 1391/1715] Bluetooth: btwilink: Save the packet type before sending Running with KASAN produces some messages: BUG: KASAN: use-after-free in ti_st_send_frame+0x9c/0x16c at addr ffffffc064868fe8 Read of size 1 by task kworker/u17:1/1266 Hardware name: HiKey Development Board (DT) Workqueue: hci0 hci_cmd_work Call trace: [] dump_backtrace+0x0/0x178 [] show_stack+0x20/0x28 [] dump_stack+0xa8/0xe0 [] print_trailer+0x110/0x174 [] object_err+0x4c/0x5c [] kasan_report_error+0x254/0x54c [] kasan_report+0x64/0x70 [] __asan_load1+0x4c/0x54 [] ti_st_send_frame+0x9c/0x16c [] hci_send_frame+0xb4/0x118 [] hci_cmd_work+0xcc/0x154 [] process_one_work+0x26c/0x7a4 [] worker_thread+0x9c/0x73c [] kthread+0x138/0x154 [] ret_from_fork+0x10/0x40 The packet is being accessed for statistics after it has been freed. Save the packet type before sending for statistics afterwards. Signed-off-by: Laura Abbott Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btwilink.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/bluetooth/btwilink.c b/drivers/bluetooth/btwilink.c index 485281b3f167..ef51c9c864c5 100644 --- a/drivers/bluetooth/btwilink.c +++ b/drivers/bluetooth/btwilink.c @@ -245,6 +245,7 @@ static int ti_st_send_frame(struct hci_dev *hdev, struct sk_buff *skb) { struct ti_st *hst; long len; + int pkt_type; hst = hci_get_drvdata(hdev); @@ -258,6 +259,7 @@ static int ti_st_send_frame(struct hci_dev *hdev, struct sk_buff *skb) * Freeing skb memory is taken care in shared transport layer, * so don't free skb memory here. */ + pkt_type = hci_skb_pkt_type(skb); len = hst->st_write(skb); if (len < 0) { kfree_skb(skb); @@ -268,7 +270,7 @@ static int ti_st_send_frame(struct hci_dev *hdev, struct sk_buff *skb) /* ST accepted our skb. So, Go ahead and do rest */ hdev->stat.byte_tx += len; - ti_st_tx_complete(hst, hci_skb_pkt_type(skb)); + ti_st_tx_complete(hst, pkt_type); return 0; } From f2082a53eea9839d9e3762405315c2a31d24d427 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Wed, 31 Aug 2016 14:13:53 +0300 Subject: [PATCH 1392/1715] iwlwifi: mvm: cleanup usage of init_dbg parameter Move the init_dbg check to earlier in the function to simplify the code. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 28 ++++++++++----------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 7322c4394a9a..8f10780808f6 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -1115,27 +1115,27 @@ int iwl_mvm_up(struct iwl_mvm *mvm) * (for example, if we were in RFKILL) */ ret = iwl_run_init_mvm_ucode(mvm, false); - if (ret && !iwlmvm_mod_params.init_dbg) { + + if (iwlmvm_mod_params.init_dbg) + return 0; + + if (ret) { IWL_ERR(mvm, "Failed to run INIT ucode: %d\n", ret); /* this can't happen */ if (WARN_ON(ret > 0)) ret = -ERFKILL; goto error; } - if (!iwlmvm_mod_params.init_dbg) { - /* - * Stop and start the transport without entering low power - * mode. This will save the state of other components on the - * device that are triggered by the INIT firwmare (MFUART). - */ - _iwl_trans_stop_device(mvm->trans, false); - ret = _iwl_trans_start_hw(mvm->trans, false); - if (ret) - goto error; - } - if (iwlmvm_mod_params.init_dbg) - return 0; + /* + * Stop and start the transport without entering low power + * mode. This will save the state of other components on the + * device that are triggered by the INIT firwmare (MFUART). + */ + _iwl_trans_stop_device(mvm->trans, false); + ret = _iwl_trans_start_hw(mvm->trans, false); + if (ret) + goto error; ret = iwl_mvm_load_ucode_wait_alive(mvm, IWL_UCODE_REGULAR); if (ret) { From 191167160c1380e59156dff0c2d7e74aa0ba5770 Mon Sep 17 00:00:00 2001 From: Oren Givon Date: Thu, 8 Sep 2016 13:45:54 +0300 Subject: [PATCH 1393/1715] iwlwifi: add two new 9560 series PCI IDs Add two new PCI IDs for the 9560 series. Signed-off-by: Oren Givon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 3d766f250d3f..001be406a3d3 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -529,6 +529,8 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x9DF0, 0x0030, iwl9560_2ac_cfg)}, {IWL_PCI_DEVICE(0xA370, 0x0030, iwl9560_2ac_cfg)}, {IWL_PCI_DEVICE(0x31DC, 0x0030, iwl9560_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x1030, iwl9560_2ac_cfg)}, + {IWL_PCI_DEVICE(0xA370, 0x1030, iwl9560_2ac_cfg)}, /* a000 Series */ {IWL_PCI_DEVICE(0x2720, 0x0A10, iwla000_2ac_cfg)}, From cc2f41f84fdc68074e228397247acd32ad06440a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 9 Sep 2016 09:34:46 +0200 Subject: [PATCH 1394/1715] iwlwifi: pcie: avoid variable shadowing in TFD helpers The various TFD/TB helpers have two code paths depending on the type of TFD supported, with variable shadowing due to the new if branches. Move the fall-through code into else branches to avoid variable shadowing. While doing so, rename some of the variables and do some other cleanups (like removing void * casts of void * pointers.) Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- .../wireless/intel/iwlwifi/pcie/internal.h | 19 +++---- drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 51 ++++++++++--------- 2 files changed, 35 insertions(+), 35 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h index 2d81630fba0f..cac6d99012b3 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h +++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h @@ -497,23 +497,20 @@ void iwl_trans_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn, struct sk_buff_head *skbs); void iwl_trans_pcie_tx_reset(struct iwl_trans *trans); -static inline u16 iwl_pcie_tfd_tb_get_len(struct iwl_trans *trans, void *tfd, +static inline u16 iwl_pcie_tfd_tb_get_len(struct iwl_trans *trans, void *_tfd, u8 idx) { - struct iwl_tfd *tfd_fh; - struct iwl_tfd_tb *tb; - if (trans->cfg->use_tfh) { - struct iwl_tfh_tfd *tfd_fh = (void *)tfd; - struct iwl_tfh_tb *tb = &tfd_fh->tbs[idx]; + struct iwl_tfh_tfd *tfd = _tfd; + struct iwl_tfh_tb *tb = &tfd->tbs[idx]; return le16_to_cpu(tb->tb_len); + } else { + struct iwl_tfd *tfd = _tfd; + struct iwl_tfd_tb *tb = &tfd->tbs[idx]; + + return le16_to_cpu(tb->hi_n_len) >> 4; } - - tfd_fh = (void *)tfd; - tb = &tfd_fh->tbs[idx]; - - return le16_to_cpu(tb->hi_n_len) >> 4; } /***************************************************** diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index e00e7d8f197a..e9a278b60dfd 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -337,28 +337,32 @@ static inline void *iwl_pcie_get_tfd(struct iwl_trans_pcie *trans_pcie, } static inline dma_addr_t iwl_pcie_tfd_tb_get_addr(struct iwl_trans *trans, - void *tfd, u8 idx) + void *_tfd, u8 idx) { - struct iwl_tfd *tfd_fh; - struct iwl_tfd_tb *tb; - dma_addr_t addr; if (trans->cfg->use_tfh) { - struct iwl_tfh_tfd *tfd_fh = (void *)tfd; - struct iwl_tfh_tb *tb = &tfd_fh->tbs[idx]; + struct iwl_tfh_tfd *tfd = _tfd; + struct iwl_tfh_tb *tb = &tfd->tbs[idx]; return (dma_addr_t)(le64_to_cpu(tb->addr)); + } else { + struct iwl_tfd *tfd = _tfd; + struct iwl_tfd_tb *tb = &tfd->tbs[idx]; + dma_addr_t addr = get_unaligned_le32(&tb->lo); + dma_addr_t hi_len; + + if (sizeof(dma_addr_t) <= sizeof(u32)) + return addr; + + hi_len = le16_to_cpu(tb->hi_n_len) & 0xF; + + /* + * shift by 16 twice to avoid warnings on 32-bit + * (where this code never runs anyway due to the + * if statement above) + */ + return addr | ((hi_len << 16) << 16); } - - tfd_fh = (void *)tfd; - tb = &tfd_fh->tbs[idx]; - addr = get_unaligned_le32(&tb->lo); - - if (sizeof(dma_addr_t) > sizeof(u32)) - addr |= - ((dma_addr_t)(le16_to_cpu(tb->hi_n_len) & 0xF) << 16) << 16; - - return addr; } static inline void iwl_pcie_tfd_set_tb(struct iwl_trans *trans, void *tfd, @@ -388,18 +392,17 @@ static inline void iwl_pcie_tfd_set_tb(struct iwl_trans *trans, void *tfd, } } -static inline u8 iwl_pcie_tfd_get_num_tbs(struct iwl_trans *trans, void *tfd) +static inline u8 iwl_pcie_tfd_get_num_tbs(struct iwl_trans *trans, void *_tfd) { - struct iwl_tfd *tfd_fh; - if (trans->cfg->use_tfh) { - struct iwl_tfh_tfd *tfd_fh = (void *)tfd; + struct iwl_tfh_tfd *tfd = _tfd; - return le16_to_cpu(tfd_fh->num_tbs) & 0x1f; + return le16_to_cpu(tfd->num_tbs) & 0x1f; + } else { + struct iwl_tfd *tfd = _tfd; + + return tfd->num_tbs & 0x1f; } - - tfd_fh = (void *)tfd; - return tfd_fh->num_tbs & 0x1f; } static void iwl_pcie_tfd_unmap(struct iwl_trans *trans, From 75cfe338b8a6fadaa28879a969047554701a7589 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Wed, 14 Sep 2016 11:54:36 +0300 Subject: [PATCH 1395/1715] iwlwifi: mvm: bail out if CTDP start operation fails We were assigning the return value of iwl_mvm_ctdp_command() to a variable, but never checking it. If this command fails, we should not allow the interface up process to proceed, since it is potentially dangerous to ignore thermal management requirements. Fixes: commit 5c89e7bc557e ("iwlwifi: mvm: add registration to cooling device") Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 8f10780808f6..897412057d1f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -1233,9 +1233,12 @@ int iwl_mvm_up(struct iwl_mvm *mvm) } /* TODO: read the budget from BIOS / Platform NVM */ - if (iwl_mvm_is_ctdp_supported(mvm) && mvm->cooling_dev.cur_state > 0) + if (iwl_mvm_is_ctdp_supported(mvm) && mvm->cooling_dev.cur_state > 0) { ret = iwl_mvm_ctdp_command(mvm, CTDP_CMD_OPERATION_START, mvm->cooling_dev.cur_state); + if (ret) + goto error; + } #else /* Initialize tx backoffs to the minimal possible */ iwl_mvm_tt_tx_backoff(mvm, 0); From 7f66ea03644e197a29af5a00a6e26ff120d8edd0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 14 Sep 2016 10:20:10 +0200 Subject: [PATCH 1396/1715] iwlwifi: mvm: correct rate_idx bounds-check The upper bound IWL_RATE_COUNT_LEGACY should be used with a >= check, rejecting the value itself; fix that. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index e0c9065a3ad4..ef4bdfc9e7e2 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -346,7 +346,7 @@ void iwl_mvm_set_tx_cmd_rate(struct iwl_mvm *mvm, struct iwl_tx_cmd *tx_cmd, rate_idx = info->control.rates[0].idx; /* if the rate isn't a well known legacy rate, take the lowest one */ - if (rate_idx < 0 || rate_idx > IWL_RATE_COUNT_LEGACY) + if (rate_idx < 0 || rate_idx >= IWL_RATE_COUNT_LEGACY) rate_idx = rate_lowest_index( &mvm->nvm_data->bands[info->band], sta); From f15823510246444052b35f148c7ae627842b0e05 Mon Sep 17 00:00:00 2001 From: Alan Brady Date: Wed, 24 Aug 2016 11:33:46 -0700 Subject: [PATCH 1397/1715] i40e: fix setting user defined RSS hash key Previously, when using ethtool to change the RSS hash key, ethtool would report back saying the old key was still being used and no error was reported. It was unclear whether it was being reported incorrectly or being set incorrectly. Debugging revealed 'i40e_set_rxfh()' returned zero immediately instead of setting the key because a user defined indirection table is not supplied when changing the hash key. This fix instead changes it such that if an indirection table is not supplied, then a default one is created and the hash key is now correctly set. Change-ID: Iddb621897ecf208650272b7ee46702cad7b69a71 Signed-off-by: Alan Brady Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e.h | 2 ++ drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 12 +++++++----- drivers/net/ethernet/intel/i40e/i40e_main.c | 6 ++---- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 19103a6a7dcc..30aaee42f648 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -701,6 +701,8 @@ void i40e_do_reset_safe(struct i40e_pf *pf, u32 reset_flags); void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags); int i40e_config_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size); int i40e_get_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size); +void i40e_fill_rss_lut(struct i40e_pf *pf, u8 *lut, + u16 rss_table_size, u16 rss_size); struct i40e_vsi *i40e_find_vsi_from_id(struct i40e_pf *pf, u16 id); void i40e_update_stats(struct i40e_vsi *vsi); void i40e_update_eth_stats(struct i40e_vsi *vsi); diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 1835186b62c9..af28a8c2166b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -2922,15 +2922,13 @@ static int i40e_set_rxfh(struct net_device *netdev, const u32 *indir, { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; + struct i40e_pf *pf = vsi->back; u8 *seed = NULL; u16 i; if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) return -EOPNOTSUPP; - if (!indir) - return 0; - if (key) { if (!vsi->rss_hkey_user) { vsi->rss_hkey_user = kzalloc(I40E_HKEY_ARRAY_SIZE, @@ -2948,8 +2946,12 @@ static int i40e_set_rxfh(struct net_device *netdev, const u32 *indir, } /* Each 32 bits pointed by 'indir' is stored with a lut entry */ - for (i = 0; i < I40E_HLUT_ARRAY_SIZE; i++) - vsi->rss_lut_user[i] = (u8)(indir[i]); + if (indir) + for (i = 0; i < I40E_HLUT_ARRAY_SIZE; i++) + vsi->rss_lut_user[i] = (u8)(indir[i]); + else + i40e_fill_rss_lut(pf, vsi->rss_lut_user, I40E_HLUT_ARRAY_SIZE, + vsi->rss_size); return i40e_config_rss(vsi, seed, vsi->rss_lut_user, I40E_HLUT_ARRAY_SIZE); diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 61b0fc433d37..69b9e30af3fa 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -57,8 +57,6 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit); static int i40e_setup_misc_vector(struct i40e_pf *pf); static void i40e_determine_queue_usage(struct i40e_pf *pf); static int i40e_setup_pf_filter_control(struct i40e_pf *pf); -static void i40e_fill_rss_lut(struct i40e_pf *pf, u8 *lut, - u16 rss_table_size, u16 rss_size); static void i40e_fdir_sb_setup(struct i40e_pf *pf); static int i40e_veb_get_bw_info(struct i40e_veb *veb); @@ -8244,8 +8242,8 @@ int i40e_get_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) * @rss_table_size: Lookup table size * @rss_size: Range of queue number for hashing */ -static void i40e_fill_rss_lut(struct i40e_pf *pf, u8 *lut, - u16 rss_table_size, u16 rss_size) +void i40e_fill_rss_lut(struct i40e_pf *pf, u8 *lut, + u16 rss_table_size, u16 rss_size) { u16 i; From a01c7f6709925919e2e3c6c190a92692f63f74e4 Mon Sep 17 00:00:00 2001 From: Alan Brady Date: Wed, 24 Aug 2016 11:33:47 -0700 Subject: [PATCH 1398/1715] i40e: fix "dump port" command when NPAR enabled When using the debugfs to issue the "dump port" command with NPAR enabled, the firmware reports back with invalid argument. The issue occurs because the pf->mac_seid was used to perform the query. This is fine when NPAR is disabled because the switch ID == pf->mac_seid, however this is not the case when NPAR is enabled. This fix instead goes through the VSI to determine the correct ID to use in either case. Change-ID: I0cd67913a7f2c4a2962e06d39e32e7447cc55b6a Signed-off-by: Alan Brady Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_debugfs.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 05cf9a719bab..8555f04002da 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -1054,6 +1054,7 @@ static ssize_t i40e_dbg_command_write(struct file *filp, struct i40e_dcbx_config *r_cfg = &pf->hw.remote_dcbx_config; int i, ret; + u32 switch_id; bw_data = kzalloc(sizeof( struct i40e_aqc_query_port_ets_config_resp), @@ -1063,8 +1064,12 @@ static ssize_t i40e_dbg_command_write(struct file *filp, goto command_write_done; } + vsi = pf->vsi[pf->lan_vsi]; + switch_id = + vsi->info.switch_id & I40E_AQ_VSI_SW_ID_MASK; + ret = i40e_aq_query_port_ets_config(&pf->hw, - pf->mac_seid, + switch_id, bw_data, NULL); if (ret) { dev_info(&pf->pdev->dev, From 8d9d927f4ab8d87fee91d9aa8bdcdf19a1787ce0 Mon Sep 17 00:00:00 2001 From: Mitch Williams Date: Wed, 24 Aug 2016 11:33:49 -0700 Subject: [PATCH 1399/1715] i40e: return correct opcode to VF This conditional is backward, so the driver responds back to the VF with the wrong opcode. Do the old switcheroo to fix this. Change-ID: I384035b0fef8a3881c176de4b4672009b3400b25 Signed-off-by: Mitch Williams Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index da3423561b3a..611fc87a5f0a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -2217,8 +2217,8 @@ static int i40e_vc_iwarp_qvmap_msg(struct i40e_vf *vf, u8 *msg, u16 msglen, error_param: /* send the response to the VF */ return i40e_vc_send_resp_to_vf(vf, - config ? I40E_VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP : - I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP, + config ? I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP : + I40E_VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP, aq_ret); } From d4a0658d813ec72965a52f04f07258a4018ccb17 Mon Sep 17 00:00:00 2001 From: Carolyn Wyborny Date: Wed, 24 Aug 2016 11:33:50 -0700 Subject: [PATCH 1400/1715] i40e: Fix to check for NULL This patch fixes an issue in the virt channel code, where a return from i40e_find_vsi_from_id was not checked for NULL when applicable. Without this patch, there is a risk for panic and static analysis tools complain. This patch fixes the problem by adding the check and adding an additional input check for similar reasons. Change-ID: I7e9be88eb7a3addb50eadc451c8336d9e06f5394 Signed-off-by: Carolyn Wyborny Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 611fc87a5f0a..2ab53555f463 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -502,8 +502,16 @@ static int i40e_config_vsi_tx_queue(struct i40e_vf *vf, u16 vsi_id, u32 qtx_ctl; int ret = 0; + if (!i40e_vc_isvalid_vsi_id(vf, info->vsi_id)) { + ret = -ENOENT; + goto error_context; + } pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id, vsi_queue_id); vsi = i40e_find_vsi_from_id(pf, vsi_id); + if (!vsi) { + ret = -ENOENT; + goto error_context; + } /* clear the context structure first */ memset(&tx_ctx, 0, sizeof(struct i40e_hmc_obj_txq)); @@ -1476,7 +1484,8 @@ static int i40e_vc_config_promiscuous_mode_msg(struct i40e_vf *vf, vsi = i40e_find_vsi_from_id(pf, info->vsi_id); if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states) || - !i40e_vc_isvalid_vsi_id(vf, info->vsi_id)) { + !i40e_vc_isvalid_vsi_id(vf, info->vsi_id) || + !vsi) { aq_ret = I40E_ERR_PARAM; goto error_param; } From b3f5c7bc88bab134e9649e42d30be15e3775f00d Mon Sep 17 00:00:00 2001 From: Carolyn Wyborny Date: Wed, 24 Aug 2016 11:33:51 -0700 Subject: [PATCH 1401/1715] i40e: Fix for extra byte swap in tunnel setup This patch fixes an issue where we were byte swapping the port parameter, then byte swapping it again in function execution. Obviously, that's unnecessary, so take it out of the function calls. Without this patch, the udp based tunnel configuration would not be correct. Change-ID: I788d83c5bd5732170f1a81dbfa0b1ac3ca8ea5b7 Signed-off-by: Carolyn Wyborny Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 69b9e30af3fa..53cde5b44346 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -7154,9 +7154,9 @@ static void i40e_sync_udp_filters_subtask(struct i40e_pf *pf) pf->pending_udp_bitmap &= ~BIT_ULL(i); port = pf->udp_ports[i].index; if (port) - ret = i40e_aq_add_udp_tunnel(hw, ntohs(port), - pf->udp_ports[i].type, - NULL, NULL); + ret = i40e_aq_add_udp_tunnel(hw, port, + pf->udp_ports[i].type, + NULL, NULL); else ret = i40e_aq_del_udp_tunnel(hw, i, NULL); From ff918912e1b8ba4e743d1f0b06ced1d01969e17c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 28 Aug 2016 18:41:01 +0100 Subject: [PATCH 1402/1715] i40e: avoid potential null pointer dereference when assigning len There is a sanitcy check for desc being null in the first line of function i40evf_debug_aq. However, before that, aq_desc is cast from desc, and aq_desc is being dereferenced on the assignment of len, so this could be a potential null pointer deference. Fix this by moving the initialization of len to the code block where len is being used and hence at this point we know it is OK to dereference aq_desc. Signed-off-by: Colin Ian King Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40evf/i40e_common.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40evf/i40e_common.c b/drivers/net/ethernet/intel/i40evf/i40e_common.c index 4db0c0326185..7953c13451b9 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_common.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_common.c @@ -302,7 +302,6 @@ void i40evf_debug_aq(struct i40e_hw *hw, enum i40e_debug_mask mask, void *desc, void *buffer, u16 buf_len) { struct i40e_aq_desc *aq_desc = (struct i40e_aq_desc *)desc; - u16 len = le16_to_cpu(aq_desc->datalen); u8 *buf = (u8 *)buffer; u16 i = 0; @@ -326,6 +325,8 @@ void i40evf_debug_aq(struct i40e_hw *hw, enum i40e_debug_mask mask, void *desc, le32_to_cpu(aq_desc->params.external.addr_low)); if ((buffer != NULL) && (aq_desc->datalen != 0)) { + u16 len = le16_to_cpu(aq_desc->datalen); + i40e_debug(hw, mask, "AQ CMD Buffer:\n"); if (buf_len < len) len = buf_len; From 3f341acc1c65b800ced567174c683cda12dfb17d Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Thu, 1 Sep 2016 22:27:27 +0200 Subject: [PATCH 1403/1715] i40evf: Fix link state event handling Currently disabling the link state from PF via ip link set enp5s0f0 vf 0 state disable doesn't disable the CARRIER on the VF. This patch updates the carrier and starts/stops the tx queues based on the link state notification from PF. PF: enp5s0f0, VF: enp5s2 #modprobe i40e #echo 2 > /sys/class/net/enp5s0f0/device/sriov_numvfs #ip link set enp5s2 up #ip -d link show enp5s2 175: enp5s2: mtu 1500 qdisc mq state UP mode DEFAULT group default qlen 1000 link/ether ea:4d:60:bc:6f:85 brd ff:ff:ff:ff:ff:ff promiscuity 0 addrgenmode eui64 #ip link set enp5s0f0 vf 0 state disable #ip -d link show enp5s0f0 171: enp5s0f0: mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000 link/ether 68:05:ca:2e:72:68 brd ff:ff:ff:ff:ff:ff promiscuity 0 addrgenmode eui64 numtxqueues 72 numrxqueues 72 portid 6805ca2e7268 vf 0 MAC 00:00:00:00:00:00, spoof checking on, link-state disable, trust off vf 1 MAC 00:00:00:00:00:00, spoof checking on, link-state auto, trust off #ip -d link show enp5s2 175: enp5s2: mtu 1500 qdisc mq state DOWN mode DEFAULT group default qlen 1000 link/ether ea:4d:60:bc:6f:85 brd ff:ff:ff:ff:ff:ff promiscuity 0 addrgenmode eui64 numtxqueues 16 numrxqueues 16 Signed-off-by: Sridhar Samudrala Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40evf/i40evf_main.c | 4 ++++ drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c | 10 +++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index f751f7bc0d81..e0a8cd82255c 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -1037,6 +1037,7 @@ void i40evf_down(struct i40evf_adapter *adapter) netif_carrier_off(netdev); netif_tx_disable(netdev); + adapter->link_up = false; i40evf_napi_disable_all(adapter); i40evf_irq_disable(adapter); @@ -1731,6 +1732,7 @@ static void i40evf_reset_task(struct work_struct *work) set_bit(__I40E_DOWN, &adapter->vsi.state); netif_carrier_off(netdev); netif_tx_disable(netdev); + adapter->link_up = false; i40evf_napi_disable_all(adapter); i40evf_irq_disable(adapter); i40evf_free_traffic_irqs(adapter); @@ -1769,6 +1771,7 @@ continue_reset: if (netif_running(adapter->netdev)) { netif_carrier_off(netdev); netif_tx_stop_all_queues(netdev); + adapter->link_up = false; i40evf_napi_disable_all(adapter); } i40evf_irq_disable(adapter); @@ -2457,6 +2460,7 @@ static void i40evf_init_task(struct work_struct *work) goto err_sw_init; netif_carrier_off(netdev); + adapter->link_up = false; if (!adapter->netdev_registered) { err = register_netdev(netdev); diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c index cc6cb30c1667..ddf478d6322b 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c @@ -898,8 +898,14 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, vpe->event_data.link_event.link_status) { adapter->link_up = vpe->event_data.link_event.link_status; + if (adapter->link_up) { + netif_tx_start_all_queues(netdev); + netif_carrier_on(netdev); + } else { + netif_tx_stop_all_queues(netdev); + netif_carrier_off(netdev); + } i40evf_print_link_message(adapter); - netif_tx_stop_all_queues(netdev); } break; case I40E_VIRTCHNL_EVENT_RESET_IMPENDING: @@ -974,8 +980,6 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, case I40E_VIRTCHNL_OP_ENABLE_QUEUES: /* enable transmits */ i40evf_irq_enable(adapter, true); - netif_tx_start_all_queues(adapter->netdev); - netif_carrier_on(adapter->netdev); break; case I40E_VIRTCHNL_OP_DISABLE_QUEUES: i40evf_free_all_tx_resources(adapter); From cb130a0b41d2a825fa48d7dfc964f08da9ccbb96 Mon Sep 17 00:00:00 2001 From: Bimmy Pujari Date: Tue, 6 Sep 2016 18:05:03 -0700 Subject: [PATCH 1404/1715] i40evf: remove unnecessary error checking against i40evf_up_complete Function i40evf_up_complete() always returns success. Changed this to a void type and removed the code that checks the return status and prints an error message. Change-ID: I8c400f174786b9c855f679e470f35af292fb50ad Signed-off-by: Bimmy Pujari Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40evf/i40evf_main.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index e0a8cd82255c..99067757feb3 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -1007,7 +1007,7 @@ static void i40evf_configure(struct i40evf_adapter *adapter) * i40evf_up_complete - Finish the last steps of bringing up a connection * @adapter: board private structure **/ -static int i40evf_up_complete(struct i40evf_adapter *adapter) +static void i40evf_up_complete(struct i40evf_adapter *adapter) { adapter->state = __I40EVF_RUNNING; clear_bit(__I40E_DOWN, &adapter->vsi.state); @@ -1016,7 +1016,6 @@ static int i40evf_up_complete(struct i40evf_adapter *adapter) adapter->aq_required |= I40EVF_FLAG_AQ_ENABLE_QUEUES; mod_timer_pending(&adapter->watchdog_timer, jiffies + 1); - return 0; } /** @@ -1827,9 +1826,7 @@ continue_reset: i40evf_configure(adapter); - err = i40evf_up_complete(adapter); - if (err) - goto reset_err; + i40evf_up_complete(adapter); i40evf_irq_enable(adapter, true); } else { @@ -2059,9 +2056,7 @@ static int i40evf_open(struct net_device *netdev) i40evf_add_filter(adapter, adapter->hw.mac.addr); i40evf_configure(adapter); - err = i40evf_up_complete(adapter); - if (err) - goto err_req_irq; + i40evf_up_complete(adapter); i40evf_irq_enable(adapter, true); From 841493a3f64395b60554afbcaa17f4350f90e764 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 6 Sep 2016 18:05:04 -0700 Subject: [PATCH 1405/1715] i40e: Limit TX descriptor count in cases where frag size is greater than 16K The i40e driver was incorrectly assuming that we would always be pulling no more than 1 descriptor from each fragment. It is in fact possible for us to end up with the case where 2 descriptors worth of data may be pulled when a frame is larger than one of the pieces generated when aligning the payload to either 4K or pieces smaller than 16K. To adjust for this we just need to make certain to test all the way to the end of the fragments as it is possible for us to span 2 descriptors in the block before us so we need to guarantee that even the last 6 descriptors have enough data to fill a full frame. Change-ID: Ic2ecb4d6b745f447d334e66c14002152f50e2f99 Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 7 ++----- drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 7 ++----- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index f8d66236fcbf..bf7bb7c3f227 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -2621,9 +2621,7 @@ bool __i40e_chk_linearize(struct sk_buff *skb) return false; /* We need to walk through the list and validate that each group - * of 6 fragments totals at least gso_size. However we don't need - * to perform such validation on the last 6 since the last 6 cannot - * inherit any data from a descriptor after them. + * of 6 fragments totals at least gso_size. */ nr_frags -= I40E_MAX_BUFFER_TXD - 2; frag = &skb_shinfo(skb)->frags[0]; @@ -2654,8 +2652,7 @@ bool __i40e_chk_linearize(struct sk_buff *skb) if (sum < 0) return true; - /* use pre-decrement to avoid processing last fragment */ - if (!--nr_frags) + if (!nr_frags--) break; sum -= skb_frag_size(stale++); diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index 0130458264e5..e3427ebd5b84 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -1832,9 +1832,7 @@ bool __i40evf_chk_linearize(struct sk_buff *skb) return false; /* We need to walk through the list and validate that each group - * of 6 fragments totals at least gso_size. However we don't need - * to perform such validation on the last 6 since the last 6 cannot - * inherit any data from a descriptor after them. + * of 6 fragments totals at least gso_size. */ nr_frags -= I40E_MAX_BUFFER_TXD - 2; frag = &skb_shinfo(skb)->frags[0]; @@ -1865,8 +1863,7 @@ bool __i40evf_chk_linearize(struct sk_buff *skb) if (sum < 0) return true; - /* use pre-decrement to avoid processing last fragment */ - if (!--nr_frags) + if (!nr_frags--) break; sum -= skb_frag_size(stale++); From 903e68323bb62cc8ca30c5a7a41d962f92c27b97 Mon Sep 17 00:00:00 2001 From: Lihong Yang Date: Tue, 6 Sep 2016 18:05:05 -0700 Subject: [PATCH 1406/1715] i40evf: remove unnecessary error checking against i40e_shutdown_adminq The i40e_shutdown_adminq function never returns failure. There is no need to check the non-0 return value. Clean up the unnecessary error checking and warning against it. Change-ID: Ibb616f09cfb93bd1a872ebf3241a15fb8354b31b Signed-off-by: Lihong Yang Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40evf/i40evf_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 99067757feb3..99833f3ea34e 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -1785,8 +1785,7 @@ continue_reset: i40evf_free_all_tx_resources(adapter); /* kill and reinit the admin queue */ - if (i40evf_shutdown_adminq(hw)) - dev_warn(&adapter->pdev->dev, "Failed to shut down adminq\n"); + i40evf_shutdown_adminq(hw); adapter->current_op = I40E_VIRTCHNL_OP_UNKNOWN; err = i40evf_init_adminq(hw); if (err) From 691e412132f07bd566934b7cbc430e87c5656de1 Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Mon, 22 Aug 2016 16:17:46 -0700 Subject: [PATCH 1407/1715] ixgbe: simplify the logic for setting VLAN filtering Simplify the logic for setting VLNCTRL.VFE by checking the VMDQ flag and 82598 MAC instead of having to maintain a list of MAC types. Signed-off-by: Emil Tantilov Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 39 ++++++------------- 1 file changed, 11 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index d76bc1a313ea..1c888588cecd 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -4105,23 +4105,20 @@ static void ixgbe_vlan_promisc_enable(struct ixgbe_adapter *adapter) vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); - switch (hw->mac.type) { - case ixgbe_mac_82599EB: - case ixgbe_mac_X540: - case ixgbe_mac_X550: - case ixgbe_mac_X550EM_x: - case ixgbe_mac_x550em_a: - default: - if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) - break; - /* fall through */ - case ixgbe_mac_82598EB: - /* legacy case, we can just disable VLAN filtering */ + if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) { + /* For VMDq and SR-IOV we must leave VLAN filtering enabled */ + vlnctrl |= IXGBE_VLNCTRL_VFE; + IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); + } else { vlnctrl &= ~IXGBE_VLNCTRL_VFE; IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); return; } + /* Nothing to do for 82598 */ + if (hw->mac.type == ixgbe_mac_82598EB) + return; + /* We are already in VLAN promisc, nothing to do */ if (adapter->flags2 & IXGBE_FLAG2_VLAN_PROMISC) return; @@ -4129,10 +4126,6 @@ static void ixgbe_vlan_promisc_enable(struct ixgbe_adapter *adapter) /* Set flag so we don't redo unnecessary work */ adapter->flags2 |= IXGBE_FLAG2_VLAN_PROMISC; - /* For VMDq and SR-IOV we must leave VLAN filtering enabled */ - vlnctrl |= IXGBE_VLNCTRL_VFE; - IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); - /* Add PF to all active pools */ for (i = IXGBE_VLVF_ENTRIES; --i;) { u32 reg_offset = IXGBE_VLVFB(i * 2 + VMDQ_P(0) / 32); @@ -4204,19 +4197,9 @@ static void ixgbe_vlan_promisc_disable(struct ixgbe_adapter *adapter) vlnctrl |= IXGBE_VLNCTRL_VFE; IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); - switch (hw->mac.type) { - case ixgbe_mac_82599EB: - case ixgbe_mac_X540: - case ixgbe_mac_X550: - case ixgbe_mac_X550EM_x: - case ixgbe_mac_x550em_a: - default: - if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) - break; - /* fall through */ - case ixgbe_mac_82598EB: + if (!(adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) || + hw->mac.type == ixgbe_mac_82598EB) return; - } /* We are not in VLAN promisc, nothing to do */ if (!(adapter->flags2 & IXGBE_FLAG2_VLAN_PROMISC)) From d2d43e5b9fce2c30182dd9b6c63f436ea923a4d9 Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Mon, 22 Aug 2016 16:28:34 -0700 Subject: [PATCH 1408/1715] ixgbe: make ixgbe_led_on/off_t_x550em static These functions are only used in ixgbe_x550.c. Fixes a warning when compiling with -Wmissing-prototypes Reported-by: Krishneil Singh Signed-off-by: Emil Tantilov Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index e092a8929413..dec8b116e6a2 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -2125,7 +2125,7 @@ static s32 ixgbe_reset_phy_t_X550em(struct ixgbe_hw *hw) * @hw: pointer to hardware structure * @led_idx: led number to turn on **/ -s32 ixgbe_led_on_t_x550em(struct ixgbe_hw *hw, u32 led_idx) +static s32 ixgbe_led_on_t_x550em(struct ixgbe_hw *hw, u32 led_idx) { u16 phy_data; @@ -2147,7 +2147,7 @@ s32 ixgbe_led_on_t_x550em(struct ixgbe_hw *hw, u32 led_idx) * @hw: pointer to hardware structure * @led_idx: led number to turn off **/ -s32 ixgbe_led_off_t_x550em(struct ixgbe_hw *hw, u32 led_idx) +static s32 ixgbe_led_off_t_x550em(struct ixgbe_hw *hw, u32 led_idx) { u16 phy_data; From 8fe293aaaa7abd192633cf612065b355a66ed6ad Mon Sep 17 00:00:00 2001 From: Mark Rustad Date: Fri, 26 Aug 2016 14:48:28 -0700 Subject: [PATCH 1409/1715] ixgbe: Resolve NULL reference by setting {read, write}_reg_mdi Set the read_reg_mdi and write_reg_mdi method pointers for X550EM_A_10G_T devices to resolve jumping to NULL. Signed-off-by: Mark Rustad Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index dec8b116e6a2..cd22efb940ad 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -3049,6 +3049,8 @@ static const struct ixgbe_phy_operations phy_ops_x550em_a = { .identify = &ixgbe_identify_phy_x550em, .read_reg = &ixgbe_read_phy_reg_x550a, .write_reg = &ixgbe_write_phy_reg_x550a, + .read_reg_mdi = &ixgbe_read_phy_reg_mdi, + .write_reg_mdi = &ixgbe_write_phy_reg_mdi, }; static const u32 ixgbe_mvals_X550[IXGBE_MVALS_IDX_LIMIT] = { From ade3ccf9dc75c94c1557108572d445f0300adead Mon Sep 17 00:00:00 2001 From: Mark Rustad Date: Fri, 26 Aug 2016 14:48:33 -0700 Subject: [PATCH 1410/1715] ixgbe: Indicate support for pause frames in all cases All the MACs supported by ixgbe support pause frames, so indicate that support in ethtool. Also set advertising according to requested mode. Signed-off-by: Mark Rustad Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- .../net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 9547191e26c9..730a99f0f002 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -313,6 +313,25 @@ static int ixgbe_get_settings(struct net_device *netdev, break; } + /* Indicate pause support */ + ecmd->supported |= SUPPORTED_Pause; + + switch (hw->fc.requested_mode) { + case ixgbe_fc_full: + ecmd->advertising |= ADVERTISED_Pause; + break; + case ixgbe_fc_rx_pause: + ecmd->advertising |= ADVERTISED_Pause | + ADVERTISED_Asym_Pause; + break; + case ixgbe_fc_tx_pause: + ecmd->advertising |= ADVERTISED_Asym_Pause; + break; + default: + ecmd->advertising &= ~(ADVERTISED_Pause | + ADVERTISED_Asym_Pause); + } + if (netif_carrier_ok(netdev)) { switch (adapter->link_speed) { case IXGBE_LINK_SPEED_10GB_FULL: From 14b22cd9827ad6765a00ca0b267c3cb0353d9c10 Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Mon, 29 Aug 2016 16:39:28 -0700 Subject: [PATCH 1411/1715] ixgbevf: add spinlocks for MTU change calls Protect set_rlpml with mailbox lock to make sure the MTU configuration is handled properly. This change resolves an issue where set_rlpml can fail when the VF interface is brought up: ixgbevf 0000:03:1d.6: Failed to set MTU at 1500 Signed-off-by: Emil Tantilov Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 4044608083cd..7eaac3234049 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -1810,8 +1810,10 @@ static void ixgbevf_configure_rx(struct ixgbevf_adapter *adapter) if (hw->mac.type >= ixgbe_mac_X550_vf) ixgbevf_setup_vfmrqc(adapter); + spin_lock_bh(&adapter->mbx_lock); /* notify the PF of our intent to use this size of frame */ ret = hw->mac.ops.set_rlpml(hw, netdev->mtu + ETH_HLEN + ETH_FCS_LEN); + spin_unlock_bh(&adapter->mbx_lock); if (ret) dev_err(&adapter->pdev->dev, "Failed to set MTU at %d\n", netdev->mtu); @@ -3758,8 +3760,10 @@ static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu) if ((new_mtu < 68) || (max_frame > max_possible_frame)) return -EINVAL; + spin_lock_bh(&adapter->mbx_lock); /* notify the PF of our intent to use this size of frame */ ret = hw->mac.ops.set_rlpml(hw, max_frame); + spin_unlock_bh(&adapter->mbx_lock); if (ret) return -EINVAL; From 7564a8880a3cf831078a67bffb05c51f34d133eb Mon Sep 17 00:00:00 2001 From: Mark Rustad Date: Thu, 1 Sep 2016 13:58:51 -0700 Subject: [PATCH 1412/1715] ixgbe: Use MDIO_PRTAD_NONE consistently The value MDIO_PRTAD_NONE should be used to indicate no PHY address. Not 0, not 0xFFFF. Use the MDIO_PRTAD_NONE value consistently. Signed-off-by: Mark Rustad Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c | 4 ++-- drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c index db0731e05401..021ab9b89c71 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c @@ -346,8 +346,8 @@ s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw) return 0; } } - /* clear value if nothing found */ - hw->phy.mdio.prtad = 0; + /* indicate no PHY found */ + hw->phy.mdio.prtad = MDIO_PRTAD_NONE; return IXGBE_ERR_PHY_ADDR_INVALID; } return 0; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index cd22efb940ad..7e6b9267ca9d 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -1459,7 +1459,7 @@ ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw, ixgbe_link_speed speed, /* Configure internal PHY for KR/KX. */ ixgbe_setup_kr_speed_x550em(hw, speed); - if (!hw->phy.mdio.prtad || hw->phy.mdio.prtad == 0xFFFF) + if (hw->phy.mdio.prtad == MDIO_PRTAD_NONE) return IXGBE_ERR_PHY_ADDR_INVALID; /* Get external PHY device id */ From 3b00da03ae303a3bdfa3bdfbb078e0eadb749375 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 7 Sep 2016 20:28:11 -0700 Subject: [PATCH 1413/1715] ixgbe: Allow setting multiple queues when SR-IOV is enabled The maximum queue count reported was 1, however support for multiple queues with SR-IOV was added some time ago so we should report support for it to the user so that they can select multiple queues if they so desire. Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 730a99f0f002..2d872be336bb 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -3060,8 +3060,8 @@ static unsigned int ixgbe_max_channels(struct ixgbe_adapter *adapter) /* We only support one q_vector without MSI-X */ max_combined = 1; } else if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) { - /* SR-IOV currently only allows one queue on the PF */ - max_combined = 1; + /* Limit value based on the queue mask */ + max_combined = adapter->ring_feature[RING_F_RSS].mask + 1; } else if (tcs > 1) { /* For DCB report channels per traffic class */ if (adapter->hw.mac.type == ixgbe_mac_82598EB) { From fa81da7e5b261cf8010f65253661522d3ff71714 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 7 Sep 2016 20:28:17 -0700 Subject: [PATCH 1414/1715] ixgbe: Limit reporting of redirection table if SR-IOV is enabled The hardware redirection table can support more queues then the PF currently has when SR-IOV is enabled. In order to account for this use the RSS mask to trim of the bits that are not used. Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 2d872be336bb..f49f80380aa5 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -2947,9 +2947,13 @@ static u32 ixgbe_rss_indir_size(struct net_device *netdev) static void ixgbe_get_reta(struct ixgbe_adapter *adapter, u32 *indir) { int i, reta_size = ixgbe_rss_indir_tbl_entries(adapter); + u16 rss_m = adapter->ring_feature[RING_F_RSS].mask; + + if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) + rss_m = adapter->ring_feature[RING_F_RSS].indices - 1; for (i = 0; i < reta_size; i++) - indir[i] = adapter->rss_indir_tbl[i]; + indir[i] = adapter->rss_indir_tbl[i] & rss_m; } static int ixgbe_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, From e24fcf28959298e07cae9ee19eb9a4b2b399b4fb Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 7 Sep 2016 20:28:24 -0700 Subject: [PATCH 1415/1715] ixgbe: Support 4 queue RSS on VFs with 1 or 2 queue RSS on PF Instead of limiting the VFs if we don't use 4 queues for RSS in the PF we can instead just limit the RSS queues used to a power of 2. By doing this we can support use cases where VFs are using more queues than the PF is currently using and can support RSS if so desired. The only limitation on this is that we cannot support 3 queues of RSS in the PF or VF. In either of these cases we should fall back to 2 queues in order to be able to use the power of 2 masking provided by the psrtype register. Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c | 7 ++++--- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 12 +++++++----- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c index bcdc88444ceb..15ab337fd7ad 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c @@ -515,15 +515,16 @@ static bool ixgbe_set_sriov_queues(struct ixgbe_adapter *adapter) vmdq_i = min_t(u16, IXGBE_MAX_VMDQ_INDICES, vmdq_i); /* 64 pool mode with 2 queues per pool */ - if ((vmdq_i > 32) || (rss_i < 4) || (vmdq_i > 16 && pools)) { + if ((vmdq_i > 32) || (vmdq_i > 16 && pools)) { vmdq_m = IXGBE_82599_VMDQ_2Q_MASK; rss_m = IXGBE_RSS_2Q_MASK; rss_i = min_t(u16, rss_i, 2); - /* 32 pool mode with 4 queues per pool */ + /* 32 pool mode with up to 4 queues per pool */ } else { vmdq_m = IXGBE_82599_VMDQ_4Q_MASK; rss_m = IXGBE_RSS_4Q_MASK; - rss_i = 4; + /* We can support 4, 2, or 1 queues */ + rss_i = (rss_i > 3) ? 4 : (rss_i > 1) ? 2 : 1; } #ifdef IXGBE_FCOE diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 1c888588cecd..a244d9a67264 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -3248,7 +3248,8 @@ static void ixgbe_setup_mtqc(struct ixgbe_adapter *adapter) mtqc |= IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ; else if (tcs > 1) mtqc |= IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ; - else if (adapter->ring_feature[RING_F_RSS].indices == 4) + else if (adapter->ring_feature[RING_F_VMDQ].mask == + IXGBE_82599_VMDQ_4Q_MASK) mtqc |= IXGBE_MTQC_32VF; else mtqc |= IXGBE_MTQC_64VF; @@ -3475,12 +3476,12 @@ static void ixgbe_setup_reta(struct ixgbe_adapter *adapter) u32 reta_entries = ixgbe_rss_indir_tbl_entries(adapter); u16 rss_i = adapter->ring_feature[RING_F_RSS].indices; - /* Program table for at least 2 queues w/ SR-IOV so that VFs can + /* Program table for at least 4 queues w/ SR-IOV so that VFs can * make full use of any rings they may have. We will use the * PSRTYPE register to control how many rings we use within the PF. */ - if ((adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) && (rss_i < 2)) - rss_i = 2; + if ((adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) && (rss_i < 4)) + rss_i = 4; /* Fill out hash function seeds */ for (i = 0; i < 10; i++) @@ -3544,7 +3545,8 @@ static void ixgbe_setup_mrqc(struct ixgbe_adapter *adapter) mrqc = IXGBE_MRQC_VMDQRT8TCEN; /* 8 TCs */ else if (tcs > 1) mrqc = IXGBE_MRQC_VMDQRT4TCEN; /* 4 TCs */ - else if (adapter->ring_feature[RING_F_RSS].indices == 4) + else if (adapter->ring_feature[RING_F_VMDQ].mask == + IXGBE_82599_VMDQ_4Q_MASK) mrqc = IXGBE_MRQC_VMDQRSS32EN; else mrqc = IXGBE_MRQC_VMDQRSS64EN; From 0c339bf9ac2eed861d34a9dd40aee2a2d490ec36 Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Fri, 9 Sep 2016 12:59:10 -0700 Subject: [PATCH 1416/1715] ixgbe: reset before SRIOV init to avoid mailbox issues Enabling SRIOV while the ixgbevf driver is loaded will result in all mailbox requests from ixgbevf_open() being rejected by ixgbe because adapter->clear_to_send is set to false on reset. Call ixgbe_sriov_reinit() before pci_enable_sriov() to make sure that mailbox requests are handled from the time ixgbevf is loaded. Reported-by: Andrew Bowers Signed-off-by: Emil Tantilov Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index 8618599dfd6f..343a18241323 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -329,13 +329,15 @@ static int ixgbe_pci_sriov_enable(struct pci_dev *dev, int num_vfs) for (i = 0; i < adapter->num_vfs; i++) ixgbe_vf_configuration(dev, (i | 0x10000000)); + /* reset before enabling SRIOV to avoid mailbox issues */ + ixgbe_sriov_reinit(adapter); + err = pci_enable_sriov(dev, num_vfs); if (err) { e_dev_warn("Failed to enable PCI sriov: %d\n", err); return err; } ixgbe_get_vfs(adapter); - ixgbe_sriov_reinit(adapter); return num_vfs; #else From 36b701fae12ac763a568037e4e7c96b5727a8b3e Mon Sep 17 00:00:00 2001 From: Laura Garcia Liebana Date: Wed, 14 Sep 2016 15:00:02 +0200 Subject: [PATCH 1417/1715] netfilter: nf_tables: validate maximum value of u32 netlink attributes Fetch value and validate u32 netlink attribute. This validation is usually required when the u32 netlink attributes are being stored in a field whose size is smaller. This patch revisits 4da449ae1df9 ("netfilter: nft_exthdr: Add size check on u8 nft_exthdr attributes"). Fixes: 96518518cc41 ("netfilter: add nftables") Suggested-by: Pablo Neira Ayuso Signed-off-by: Laura Garcia Liebana Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 1 + net/netfilter/nf_tables_api.c | 25 +++++++++++++++++++++++++ net/netfilter/nft_bitwise.c | 8 +++++++- net/netfilter/nft_byteorder.c | 15 +++++++++++++-- net/netfilter/nft_cmp.c | 3 +++ net/netfilter/nft_exthdr.c | 12 +++++++----- net/netfilter/nft_immediate.c | 4 ++++ 7 files changed, 60 insertions(+), 8 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index a7a7cebc8d07..5031e072567b 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -145,6 +145,7 @@ static inline enum nft_registers nft_type_to_reg(enum nft_data_types type) return type == NFT_DATA_VERDICT ? NFT_REG_VERDICT : NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE; } +unsigned int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest); unsigned int nft_parse_register(const struct nlattr *attr); int nft_dump_register(struct sk_buff *skb, unsigned int attr, unsigned int reg); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index bd9715e5ff26..b70d3ea1430e 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4409,6 +4409,31 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, return 0; } +/** + * nft_parse_u32_check - fetch u32 attribute and check for maximum value + * + * @attr: netlink attribute to fetch value from + * @max: maximum value to be stored in dest + * @dest: pointer to the variable + * + * Parse, check and store a given u32 netlink attribute into variable. + * This function returns -ERANGE if the value goes over maximum value. + * Otherwise a 0 is returned and the attribute value is stored in the + * destination variable. + */ +unsigned int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest) +{ + int val; + + val = ntohl(nla_get_be32(attr)); + if (val > max) + return -ERANGE; + + *dest = val; + return 0; +} +EXPORT_SYMBOL_GPL(nft_parse_u32_check); + /** * nft_parse_register - parse a register value from a netlink attribute * diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index d71cc18fa35d..31c15ed2e5fc 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -52,6 +52,7 @@ static int nft_bitwise_init(const struct nft_ctx *ctx, { struct nft_bitwise *priv = nft_expr_priv(expr); struct nft_data_desc d1, d2; + u32 len; int err; if (tb[NFTA_BITWISE_SREG] == NULL || @@ -61,7 +62,12 @@ static int nft_bitwise_init(const struct nft_ctx *ctx, tb[NFTA_BITWISE_XOR] == NULL) return -EINVAL; - priv->len = ntohl(nla_get_be32(tb[NFTA_BITWISE_LEN])); + err = nft_parse_u32_check(tb[NFTA_BITWISE_LEN], U8_MAX, &len); + if (err < 0) + return err; + + priv->len = len; + priv->sreg = nft_parse_register(tb[NFTA_BITWISE_SREG]); err = nft_validate_register_load(priv->sreg, priv->len); if (err < 0) diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c index b78c28ba465f..ee63d981268d 100644 --- a/net/netfilter/nft_byteorder.c +++ b/net/netfilter/nft_byteorder.c @@ -99,6 +99,7 @@ static int nft_byteorder_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_byteorder *priv = nft_expr_priv(expr); + u32 size, len; int err; if (tb[NFTA_BYTEORDER_SREG] == NULL || @@ -117,7 +118,12 @@ static int nft_byteorder_init(const struct nft_ctx *ctx, return -EINVAL; } - priv->size = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_SIZE])); + err = nft_parse_u32_check(tb[NFTA_BYTEORDER_SIZE], U8_MAX, &size); + if (err < 0) + return err; + + priv->size = size; + switch (priv->size) { case 2: case 4: @@ -128,7 +134,12 @@ static int nft_byteorder_init(const struct nft_ctx *ctx, } priv->sreg = nft_parse_register(tb[NFTA_BYTEORDER_SREG]); - priv->len = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_LEN])); + err = nft_parse_u32_check(tb[NFTA_BYTEORDER_LEN], U8_MAX, &len); + if (err < 0) + return err; + + priv->len = len; + err = nft_validate_register_load(priv->sreg, priv->len); if (err < 0) return err; diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index e25b35d70e4d..2e53739812b1 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -84,6 +84,9 @@ static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr, if (err < 0) return err; + if (desc.len > U8_MAX) + return -ERANGE; + priv->op = ntohl(nla_get_be32(tb[NFTA_CMP_OP])); priv->len = desc.len; return 0; diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index 82c264e40278..a84cf3d66056 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -59,7 +59,7 @@ static int nft_exthdr_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_exthdr *priv = nft_expr_priv(expr); - u32 offset, len; + u32 offset, len, err; if (tb[NFTA_EXTHDR_DREG] == NULL || tb[NFTA_EXTHDR_TYPE] == NULL || @@ -67,11 +67,13 @@ static int nft_exthdr_init(const struct nft_ctx *ctx, tb[NFTA_EXTHDR_LEN] == NULL) return -EINVAL; - offset = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OFFSET])); - len = ntohl(nla_get_be32(tb[NFTA_EXTHDR_LEN])); + err = nft_parse_u32_check(tb[NFTA_EXTHDR_OFFSET], U8_MAX, &offset); + if (err < 0) + return err; - if (offset > U8_MAX || len > U8_MAX) - return -ERANGE; + err = nft_parse_u32_check(tb[NFTA_EXTHDR_LEN], U8_MAX, &len); + if (err < 0) + return err; priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]); priv->offset = offset; diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index db3b746858e3..d17018ff54e6 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -53,6 +53,10 @@ static int nft_immediate_init(const struct nft_ctx *ctx, tb[NFTA_IMMEDIATE_DATA]); if (err < 0) return err; + + if (desc.len > U8_MAX) + return -ERANGE; + priv->dlen = desc.len; priv->dreg = nft_parse_register(tb[NFTA_IMMEDIATE_DREG]); From 8061bb54436c19fd16b7c734a69ff60bac26e3e9 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Wed, 14 Sep 2016 23:41:46 +0800 Subject: [PATCH 1418/1715] netfilter: nft_queue: add _SREG_QNUM attr to select the queue number Currently, the user can specify the queue numbers by _QUEUE_NUM and _QUEUE_TOTAL attributes, this is enough in most situations. But acctually, it is not very flexible, for example: tcp dport 80 mapped to queue0 tcp dport 81 mapped to queue1 tcp dport 82 mapped to queue2 In order to do this thing, we must add 3 nft rules, and more mapping meant more rules ... So take one register to select the queue number, then we can add one simple rule to mapping queues, maybe like this: queue num tcp dport map { 80:0, 81:1, 82:2 ... } Florian Westphal also proposed wider usage scenarios: queue num jhash ip saddr . ip daddr mod ... queue num meta cpu ... queue num meta mark ... The last point is how to load a queue number from sreg, although we can use *(u16*)®s->data[reg] to load the queue number, just like nat expr to load its l4port do. But we will cooperate with hash expr, meta cpu, meta mark expr and so on. They all store the result to u32 type, so cast it to u16 pointer and dereference it will generate wrong result in the big endian system. So just keep it simple, we treat queue number as u32 type, although u16 type is already enough. Suggested-by: Pablo Neira Ayuso Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 + net/netfilter/nft_queue.c | 102 ++++++++++++++++++++--- 2 files changed, 92 insertions(+), 12 deletions(-) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index bcfb892ff148..1cf41dd838b2 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -894,12 +894,14 @@ enum nft_log_attributes { * @NFTA_QUEUE_NUM: netlink queue to send messages to (NLA_U16) * @NFTA_QUEUE_TOTAL: number of queues to load balance packets on (NLA_U16) * @NFTA_QUEUE_FLAGS: various flags (NLA_U16) + * @NFTA_QUEUE_SREG_QNUM: source register of queue number (NLA_U32: nft_registers) */ enum nft_queue_attributes { NFTA_QUEUE_UNSPEC, NFTA_QUEUE_NUM, NFTA_QUEUE_TOTAL, NFTA_QUEUE_FLAGS, + NFTA_QUEUE_SREG_QNUM, __NFTA_QUEUE_MAX }; #define NFTA_QUEUE_MAX (__NFTA_QUEUE_MAX - 1) diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c index d16d59959ff6..393d359a1889 100644 --- a/net/netfilter/nft_queue.c +++ b/net/netfilter/nft_queue.c @@ -22,9 +22,10 @@ static u32 jhash_initval __read_mostly; struct nft_queue { - u16 queuenum; - u16 queues_total; - u16 flags; + enum nft_registers sreg_qnum:8; + u16 queuenum; + u16 queues_total; + u16 flags; }; static void nft_queue_eval(const struct nft_expr *expr, @@ -54,26 +55,39 @@ static void nft_queue_eval(const struct nft_expr *expr, regs->verdict.code = ret; } +static void nft_queue_sreg_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_queue *priv = nft_expr_priv(expr); + u32 queue, ret; + + queue = regs->data[priv->sreg_qnum]; + + ret = NF_QUEUE_NR(queue); + if (priv->flags & NFT_QUEUE_FLAG_BYPASS) + ret |= NF_VERDICT_FLAG_QUEUE_BYPASS; + + regs->verdict.code = ret; +} + static const struct nla_policy nft_queue_policy[NFTA_QUEUE_MAX + 1] = { [NFTA_QUEUE_NUM] = { .type = NLA_U16 }, [NFTA_QUEUE_TOTAL] = { .type = NLA_U16 }, [NFTA_QUEUE_FLAGS] = { .type = NLA_U16 }, + [NFTA_QUEUE_SREG_QNUM] = { .type = NLA_U32 }, }; static int nft_queue_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr * const tb[]) + const struct nft_expr *expr, + const struct nlattr * const tb[]) { struct nft_queue *priv = nft_expr_priv(expr); u32 maxid; - if (tb[NFTA_QUEUE_NUM] == NULL) - return -EINVAL; - - init_hashrandom(&jhash_initval); priv->queuenum = ntohs(nla_get_be16(tb[NFTA_QUEUE_NUM])); - if (tb[NFTA_QUEUE_TOTAL] != NULL) + if (tb[NFTA_QUEUE_TOTAL]) priv->queues_total = ntohs(nla_get_be16(tb[NFTA_QUEUE_TOTAL])); else priv->queues_total = 1; @@ -85,7 +99,7 @@ static int nft_queue_init(const struct nft_ctx *ctx, if (maxid > U16_MAX) return -ERANGE; - if (tb[NFTA_QUEUE_FLAGS] != NULL) { + if (tb[NFTA_QUEUE_FLAGS]) { priv->flags = ntohs(nla_get_be16(tb[NFTA_QUEUE_FLAGS])); if (priv->flags & ~NFT_QUEUE_FLAG_MASK) return -EINVAL; @@ -93,6 +107,29 @@ static int nft_queue_init(const struct nft_ctx *ctx, return 0; } +static int nft_queue_sreg_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_queue *priv = nft_expr_priv(expr); + int err; + + priv->sreg_qnum = nft_parse_register(tb[NFTA_QUEUE_SREG_QNUM]); + err = nft_validate_register_load(priv->sreg_qnum, sizeof(u32)); + if (err < 0) + return err; + + if (tb[NFTA_QUEUE_FLAGS]) { + priv->flags = ntohs(nla_get_be16(tb[NFTA_QUEUE_FLAGS])); + if (priv->flags & ~NFT_QUEUE_FLAG_MASK) + return -EINVAL; + if (priv->flags & NFT_QUEUE_FLAG_CPU_FANOUT) + return -EOPNOTSUPP; + } + + return 0; +} + static int nft_queue_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_queue *priv = nft_expr_priv(expr); @@ -108,6 +145,21 @@ nla_put_failure: return -1; } +static int +nft_queue_sreg_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_queue *priv = nft_expr_priv(expr); + + if (nft_dump_register(skb, NFTA_QUEUE_SREG_QNUM, priv->sreg_qnum) || + nla_put_be16(skb, NFTA_QUEUE_FLAGS, htons(priv->flags))) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + static struct nft_expr_type nft_queue_type; static const struct nft_expr_ops nft_queue_ops = { .type = &nft_queue_type, @@ -117,9 +169,35 @@ static const struct nft_expr_ops nft_queue_ops = { .dump = nft_queue_dump, }; +static const struct nft_expr_ops nft_queue_sreg_ops = { + .type = &nft_queue_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_queue)), + .eval = nft_queue_sreg_eval, + .init = nft_queue_sreg_init, + .dump = nft_queue_sreg_dump, +}; + +static const struct nft_expr_ops * +nft_queue_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) +{ + if (tb[NFTA_QUEUE_NUM] && tb[NFTA_QUEUE_SREG_QNUM]) + return ERR_PTR(-EINVAL); + + init_hashrandom(&jhash_initval); + + if (tb[NFTA_QUEUE_NUM]) + return &nft_queue_ops; + + if (tb[NFTA_QUEUE_SREG_QNUM]) + return &nft_queue_sreg_ops; + + return ERR_PTR(-EINVAL); +} + static struct nft_expr_type nft_queue_type __read_mostly = { .name = "queue", - .ops = &nft_queue_ops, + .select_ops = &nft_queue_select_ops, .policy = nft_queue_policy, .maxattr = NFTA_QUEUE_MAX, .owner = THIS_MODULE, From 2462f3f4a7e079192b78f36900c34f18dad824a7 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Thu, 15 Sep 2016 20:50:16 +0800 Subject: [PATCH 1419/1715] netfilter: nf_queue: improve queue range support for bridge family After commit ac2863445686 ("netfilter: bridge: add nf_afinfo to enable queuing to userspace"), we can queue packets to the user space in bridge family. But when the user specify the queue range, packets will be only delivered to the first queue num. Because in nfqueue_hash, we only support ipv4 and ipv6 family. Now add support for bridge family too. Suggested-by: Pablo Neira Ayuso Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_queue.h | 56 ++++++++++++++++++++++++-------- 1 file changed, 43 insertions(+), 13 deletions(-) diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index cc8a11f7e306..903dca050fb6 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -41,22 +41,19 @@ static inline void init_hashrandom(u32 *jhash_initval) *jhash_initval = prandom_u32(); } -static inline u32 hash_v4(const struct sk_buff *skb, u32 jhash_initval) +static inline u32 hash_v4(const struct iphdr *iph, u32 initval) { - const struct iphdr *iph = ip_hdr(skb); - /* packets in either direction go into same queue */ if ((__force u32)iph->saddr < (__force u32)iph->daddr) return jhash_3words((__force u32)iph->saddr, - (__force u32)iph->daddr, iph->protocol, jhash_initval); + (__force u32)iph->daddr, iph->protocol, initval); return jhash_3words((__force u32)iph->daddr, - (__force u32)iph->saddr, iph->protocol, jhash_initval); + (__force u32)iph->saddr, iph->protocol, initval); } -static inline u32 hash_v6(const struct sk_buff *skb, u32 jhash_initval) +static inline u32 hash_v6(const struct ipv6hdr *ip6h, u32 initval) { - const struct ipv6hdr *ip6h = ipv6_hdr(skb); u32 a, b, c; if ((__force u32)ip6h->saddr.s6_addr32[3] < @@ -74,17 +71,50 @@ static inline u32 hash_v6(const struct sk_buff *skb, u32 jhash_initval) else c = (__force u32) ip6h->daddr.s6_addr32[1]; - return jhash_3words(a, b, c, jhash_initval); + return jhash_3words(a, b, c, initval); +} + +static inline u32 hash_bridge(const struct sk_buff *skb, u32 initval) +{ + struct ipv6hdr *ip6h, _ip6h; + struct iphdr *iph, _iph; + + switch (eth_hdr(skb)->h_proto) { + case htons(ETH_P_IP): + iph = skb_header_pointer(skb, skb_network_offset(skb), + sizeof(*iph), &_iph); + if (iph) + return hash_v4(iph, initval); + break; + case htons(ETH_P_IPV6): + ip6h = skb_header_pointer(skb, skb_network_offset(skb), + sizeof(*ip6h), &_ip6h); + if (ip6h) + return hash_v6(ip6h, initval); + break; + } + + return 0; } static inline u32 nfqueue_hash(const struct sk_buff *skb, u16 queue, u16 queues_total, u8 family, - u32 jhash_initval) + u32 initval) { - if (family == NFPROTO_IPV4) - queue += ((u64) hash_v4(skb, jhash_initval) * queues_total) >> 32; - else if (family == NFPROTO_IPV6) - queue += ((u64) hash_v6(skb, jhash_initval) * queues_total) >> 32; + switch (family) { + case NFPROTO_IPV4: + queue += reciprocal_scale(hash_v4(ip_hdr(skb), initval), + queues_total); + break; + case NFPROTO_IPV6: + queue += reciprocal_scale(hash_v6(ipv6_hdr(skb), initval), + queues_total); + break; + case NFPROTO_BRIDGE: + queue += reciprocal_scale(hash_bridge(skb, initval), + queues_total); + break; + } return queue; } From 8dc3c2b86bb16e8f345b80a8af69696e9a7edb65 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Thu, 15 Sep 2016 21:29:08 +0800 Subject: [PATCH 1420/1715] netfilter: nf_tables: improve nft payload fast eval There's an off-by-one issue in nft_payload_fast_eval, skb_tail_pointer and ptr + priv->len all point to the last valid address plus 1. So if they are equal, we can still fetch the valid data. It's unnecessary to fall back to nft_payload_eval. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index fb8b5892b5ff..36ba4e55d84e 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -98,7 +98,7 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr, ptr += priv->offset; - if (unlikely(ptr + priv->len >= skb_tail_pointer(skb))) + if (unlikely(ptr + priv->len > skb_tail_pointer(skb))) return false; *dest = 0; From a20877b5edec4d2b62560b5245199af04846476c Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 17 Sep 2016 14:31:20 +0800 Subject: [PATCH 1421/1715] netfilter: nf_tables: check tprot_set first when we use xt.thoff pkt->xt.thoff is not always set properly, but we use it without any check. For payload expr, it will cause wrong results. For nftrace, we may notify the wrong network or transport header to the user space, furthermore, input the following nft rules, warning message will be printed out: # nft add rule arp filter output meta nftrace set 1 WARNING: CPU: 0 PID: 13428 at net/netfilter/nf_tables_trace.c:263 nft_trace_notify+0x4a3/0x5e0 [nf_tables] Call Trace: [] dump_stack+0x63/0x85 [] __warn+0xcb/0xf0 [] warn_slowpath_null+0x1d/0x20 [] nft_trace_notify+0x4a3/0x5e0 [nf_tables] [ ... ] [] nft_do_chain_arp+0x78/0x90 [nf_tables_arp] [] nf_iterate+0x62/0x80 [] nf_hook_slow+0x73/0xd0 [] arp_xmit+0x8f/0xb0 [ ... ] [] arp_solicit+0x106/0x2c0 So before we use pkt->xt.thoff, check the tprot_set first. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_core.c | 5 ++++- net/netfilter/nf_tables_trace.c | 20 +++++++++++--------- net/netfilter/nft_payload.c | 4 ++++ 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 36ba4e55d84e..67259cefef06 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -93,8 +93,11 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr, if (priv->base == NFT_PAYLOAD_NETWORK_HEADER) ptr = skb_network_header(skb); - else + else { + if (!pkt->tprot_set) + return false; ptr = skb_network_header(skb) + pkt->xt.thoff; + } ptr += priv->offset; diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c index 39eb1cc62e91..696fe8f6f2f2 100644 --- a/net/netfilter/nf_tables_trace.c +++ b/net/netfilter/nf_tables_trace.c @@ -113,20 +113,22 @@ static int nf_trace_fill_pkt_info(struct sk_buff *nlskb, const struct nft_pktinfo *pkt) { const struct sk_buff *skb = pkt->skb; - unsigned int len = min_t(unsigned int, - pkt->xt.thoff - skb_network_offset(skb), - NFT_TRACETYPE_NETWORK_HSIZE); int off = skb_network_offset(skb); + unsigned int len, nh_end; + nh_end = pkt->tprot_set ? pkt->xt.thoff : skb->len; + len = min_t(unsigned int, nh_end - skb_network_offset(skb), + NFT_TRACETYPE_NETWORK_HSIZE); if (trace_fill_header(nlskb, NFTA_TRACE_NETWORK_HEADER, skb, off, len)) return -1; - len = min_t(unsigned int, skb->len - pkt->xt.thoff, - NFT_TRACETYPE_TRANSPORT_HSIZE); - - if (trace_fill_header(nlskb, NFTA_TRACE_TRANSPORT_HEADER, skb, - pkt->xt.thoff, len)) - return -1; + if (pkt->tprot_set) { + len = min_t(unsigned int, skb->len - pkt->xt.thoff, + NFT_TRACETYPE_TRANSPORT_HSIZE); + if (trace_fill_header(nlskb, NFTA_TRACE_TRANSPORT_HEADER, skb, + pkt->xt.thoff, len)) + return -1; + } if (!skb_mac_header_was_set(skb)) return 0; diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 12cd4bf16d17..b2f88617611a 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -92,6 +92,8 @@ static void nft_payload_eval(const struct nft_expr *expr, offset = skb_network_offset(skb); break; case NFT_PAYLOAD_TRANSPORT_HEADER: + if (!pkt->tprot_set) + goto err; offset = pkt->xt.thoff; break; default: @@ -184,6 +186,8 @@ static void nft_payload_set_eval(const struct nft_expr *expr, offset = skb_network_offset(skb); break; case NFT_PAYLOAD_TRANSPORT_HEADER: + if (!pkt->tprot_set) + goto err; offset = pkt->xt.thoff; break; default: From 7bdc66242de7f9cbe8dbb01757042dd18744d800 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Sun, 18 Sep 2016 10:52:25 +0800 Subject: [PATCH 1422/1715] netfilter: Enhance the codes used to get random once There are some codes which are used to get one random once in netfilter. We could use net_get_random_once to simplify these codes. Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_RATEEST.c | 6 +----- net/netfilter/xt_connlimit.c | 8 +------- net/netfilter/xt_recent.c | 7 ++----- 3 files changed, 4 insertions(+), 17 deletions(-) diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c index 515131f9e021..dbd6c4a12b97 100644 --- a/net/netfilter/xt_RATEEST.c +++ b/net/netfilter/xt_RATEEST.c @@ -24,7 +24,6 @@ static DEFINE_MUTEX(xt_rateest_mutex); #define RATEEST_HSIZE 16 static struct hlist_head rateest_hash[RATEEST_HSIZE] __read_mostly; static unsigned int jhash_rnd __read_mostly; -static bool rnd_inited __read_mostly; static unsigned int xt_rateest_hash(const char *name) { @@ -99,10 +98,7 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par) } cfg; int ret; - if (unlikely(!rnd_inited)) { - get_random_bytes(&jhash_rnd, sizeof(jhash_rnd)); - rnd_inited = true; - } + net_get_random_once(&jhash_rnd, sizeof(jhash_rnd)); est = xt_rateest_lookup(info->name); if (est) { diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 99bbc829868d..b6dc322593a3 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -366,14 +366,8 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par) unsigned int i; int ret; - if (unlikely(!connlimit_rnd)) { - u_int32_t rand; + net_get_random_once(&connlimit_rnd, sizeof(connlimit_rnd)); - do { - get_random_bytes(&rand, sizeof(rand)); - } while (!rand); - cmpxchg(&connlimit_rnd, 0, rand); - } ret = nf_ct_l3proto_try_module_get(par->family); if (ret < 0) { pr_info("cannot load conntrack support for " diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index d725a27743a1..e3b7a09b103e 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -110,7 +110,6 @@ static const struct file_operations recent_old_fops, recent_mt_fops; #endif static u_int32_t hash_rnd __read_mostly; -static bool hash_rnd_inited __read_mostly; static inline unsigned int recent_entry_hash4(const union nf_inet_addr *addr) { @@ -340,10 +339,8 @@ static int recent_mt_check(const struct xt_mtchk_param *par, int ret = -EINVAL; size_t sz; - if (unlikely(!hash_rnd_inited)) { - get_random_bytes(&hash_rnd, sizeof(hash_rnd)); - hash_rnd_inited = true; - } + net_get_random_once(&hash_rnd, sizeof(hash_rnd)); + if (info->check_set & ~XT_RECENT_VALID_FLAGS) { pr_info("Unsupported user space flags (%08x)\n", info->check_set); From b9d80f83bf8c3485ae53a4f3a715363d764bb0e4 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Tue, 20 Sep 2016 10:31:04 +0800 Subject: [PATCH 1423/1715] netfilter: xt_helper: Use sizeof(variable) instead of literal number It's better to use sizeof(info->name)-1 as index to force set the string tail instead of literal number '29'. Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c index 9f4ab00c8050..805c9f64a04c 100644 --- a/net/netfilter/xt_helper.c +++ b/net/netfilter/xt_helper.c @@ -65,7 +65,7 @@ static int helper_mt_check(const struct xt_mtchk_param *par) par->family); return ret; } - info->name[29] = '\0'; + info->name[sizeof(info->name) - 1] = '\0'; return 0; } From 4004d5c374dabcbce201e16442e4596b764cc60b Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 20 Sep 2016 18:22:46 +0200 Subject: [PATCH 1424/1715] netfilter: nft_lookup: remove superfluous element found check We already checked for !found just a bit before: if (!found) { regs->verdict.code = NFT_BREAK; return; } if (found && set->flags & NFT_SET_MAP) ^^^^^ So this redundant check can just go away. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_lookup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index e164325d1bc0..8166b6994cc7 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -43,7 +43,7 @@ static void nft_lookup_eval(const struct nft_expr *expr, return; } - if (found && set->flags & NFT_SET_MAP) + if (set->flags & NFT_SET_MAP) nft_data_copy(®s->data[priv->dreg], nft_set_ext_data(ext), set->dlen); From 21641c2e1ffd0b504610a33beaeab8fcc5140677 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Sun, 18 Sep 2016 15:52:20 -0700 Subject: [PATCH 1425/1715] net_sched: check NULL on error path in route4_change() On error path in route4_change(), 'f' could be NULL, so we should check NULL before calling tcf_exts_destroy(). Fixes: b9a24bb76bf6 ("net_sched: properly handle failure case of tcf_exts_init()") Reported-by: kbuild test robot Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/cls_route.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index a4ce39b19be0..455fc8f83d0a 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -559,7 +559,8 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, return 0; errout: - tcf_exts_destroy(&f->exts); + if (f) + tcf_exts_destroy(&f->exts); kfree(f); return err; } From a3007446e53af07c53bdb4cabad7b3ea60859da4 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 20 Sep 2016 18:19:13 -0300 Subject: [PATCH 1426/1715] sctp: fix the handling of SACK Gap Ack blocks sctp_acked() is using 32bit arithmetics on 16bits vars, via TSN_lte() macros, which is weird and confusing. Once the offset to ctsn is calculated, all wrapping is already handled and thus to verify the Gap Ack blocks we can just use pure less/big-or-equal than checks. Also, rename gap variable to tsn_offset, so it's more meaningful, as it doesn't point to any gap at all. Even so, I don't think this discrepancy resulted in any practical bug. This patch is a preparation for the next one, which will introduce typecheck() for TSN_lte() macros and would cause a compile error here. Suggested-by: David Laight Reported-by: David Laight Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/outqueue.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 8c3f446d965c..3ec6da8bbb53 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -1719,7 +1719,7 @@ static int sctp_acked(struct sctp_sackhdr *sack, __u32 tsn) { int i; sctp_sack_variable_t *frags; - __u16 gap; + __u16 tsn_offset, blocks; __u32 ctsn = ntohl(sack->cum_tsn_ack); if (TSN_lte(tsn, ctsn)) @@ -1738,10 +1738,11 @@ static int sctp_acked(struct sctp_sackhdr *sack, __u32 tsn) */ frags = sack->variable; - gap = tsn - ctsn; - for (i = 0; i < ntohs(sack->num_gap_ack_blocks); ++i) { - if (TSN_lte(ntohs(frags[i].gab.start), gap) && - TSN_lte(gap, ntohs(frags[i].gab.end))) + blocks = ntohs(sack->num_gap_ack_blocks); + tsn_offset = tsn - ctsn; + for (i = 0; i < blocks; ++i) { + if (tsn_offset >= ntohs(frags[i].gab.start) && + tsn_offset <= ntohs(frags[i].gab.end)) goto pass; } From 182691d0998400f35ad304718024e60feaa864aa Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 20 Sep 2016 18:19:14 -0300 Subject: [PATCH 1427/1715] sctp: improve how SSN, TSN and ASCONF serial are compared Make it similar to time_before() macros: - easier to understand - make use of typecheck() to avoid working on unexpected variable types (made the issue on previous patch visible) - for _[lg]te versions, slighly faster, as the compiler used to generate a sequence of cmp/je/cmp/js instructions and now it's sub/test/jle (for _lte): Before, for sctp_outq_sack: if (primary->cacc.changeover_active) { 1f01: 80 b9 84 02 00 00 00 cmpb $0x0,0x284(%rcx) 1f08: 74 6e je 1f78 u8 clear_cycling = 0; if (TSN_lte(primary->cacc.next_tsn_at_change, sack_ctsn)) { 1f0a: 8b 81 80 02 00 00 mov 0x280(%rcx),%eax return ((s) - (t)) & TSN_SIGN_BIT; } static inline int TSN_lte(__u32 s, __u32 t) { return ((s) == (t)) || (((s) - (t)) & TSN_SIGN_BIT); 1f10: 8b 7d bc mov -0x44(%rbp),%edi 1f13: 39 c7 cmp %eax,%edi 1f15: 74 25 je 1f3c 1f17: 39 f8 cmp %edi,%eax 1f19: 78 21 js 1f3c primary->cacc.changeover_active = 0; After: if (primary->cacc.changeover_active) { 1ee7: 80 b9 84 02 00 00 00 cmpb $0x0,0x284(%rcx) 1eee: 74 73 je 1f63 u8 clear_cycling = 0; if (TSN_lte(primary->cacc.next_tsn_at_change, sack_ctsn)) { 1ef0: 8b 81 80 02 00 00 mov 0x280(%rcx),%eax 1ef6: 2b 45 b4 sub -0x4c(%rbp),%eax 1ef9: 85 c0 test %eax,%eax 1efb: 7e 26 jle 1f23 primary->cacc.changeover_active = 0; *_lt() generated pretty much the same code. Tested with gcc (GCC) 6.1.1 20160621. This patch also removes SSN_lte as it is not used and cleanups some comments. Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/sm.h | 92 ++++++++----------------------------------- 1 file changed, 17 insertions(+), 75 deletions(-) diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index bafe2a0ab908..ca6c971dd74a 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -307,85 +307,27 @@ static inline __u16 sctp_data_size(struct sctp_chunk *chunk) } /* Compare two TSNs */ +#define TSN_lt(a,b) \ + (typecheck(__u32, a) && \ + typecheck(__u32, b) && \ + ((__s32)((a) - (b)) < 0)) -/* RFC 1982 - Serial Number Arithmetic - * - * 2. Comparison - * Then, s1 is said to be equal to s2 if and only if i1 is equal to i2, - * in all other cases, s1 is not equal to s2. - * - * s1 is said to be less than s2 if, and only if, s1 is not equal to s2, - * and - * - * (i1 < i2 and i2 - i1 < 2^(SERIAL_BITS - 1)) or - * (i1 > i2 and i1 - i2 > 2^(SERIAL_BITS - 1)) - * - * s1 is said to be greater than s2 if, and only if, s1 is not equal to - * s2, and - * - * (i1 < i2 and i2 - i1 > 2^(SERIAL_BITS - 1)) or - * (i1 > i2 and i1 - i2 < 2^(SERIAL_BITS - 1)) - */ - -/* - * RFC 2960 - * 1.6 Serial Number Arithmetic - * - * Comparisons and arithmetic on TSNs in this document SHOULD use Serial - * Number Arithmetic as defined in [RFC1982] where SERIAL_BITS = 32. - */ - -enum { - TSN_SIGN_BIT = (1<<31) -}; - -static inline int TSN_lt(__u32 s, __u32 t) -{ - return ((s) - (t)) & TSN_SIGN_BIT; -} - -static inline int TSN_lte(__u32 s, __u32 t) -{ - return ((s) == (t)) || (((s) - (t)) & TSN_SIGN_BIT); -} +#define TSN_lte(a,b) \ + (typecheck(__u32, a) && \ + typecheck(__u32, b) && \ + ((__s32)((a) - (b)) <= 0)) /* Compare two SSNs */ +#define SSN_lt(a,b) \ + (typecheck(__u16, a) && \ + typecheck(__u16, b) && \ + ((__s16)((a) - (b)) < 0)) -/* - * RFC 2960 - * 1.6 Serial Number Arithmetic - * - * Comparisons and arithmetic on Stream Sequence Numbers in this document - * SHOULD use Serial Number Arithmetic as defined in [RFC1982] where - * SERIAL_BITS = 16. - */ -enum { - SSN_SIGN_BIT = (1<<15) -}; - -static inline int SSN_lt(__u16 s, __u16 t) -{ - return ((s) - (t)) & SSN_SIGN_BIT; -} - -static inline int SSN_lte(__u16 s, __u16 t) -{ - return ((s) == (t)) || (((s) - (t)) & SSN_SIGN_BIT); -} - -/* - * ADDIP 3.1.1 - * The valid range of Serial Number is from 0 to 4294967295 (2**32 - 1). Serial - * Numbers wrap back to 0 after reaching 4294967295. - */ -enum { - ADDIP_SERIAL_SIGN_BIT = (1<<31) -}; - -static inline int ADDIP_SERIAL_gte(__u32 s, __u32 t) -{ - return ((s) == (t)) || (((t) - (s)) & ADDIP_SERIAL_SIGN_BIT); -} +/* ADDIP 3.1.1 */ +#define ADDIP_SERIAL_gte(a,b) \ + (typecheck(__u32, a) && \ + typecheck(__u32, b) && \ + ((__s32)((b) - (a)) <= 0)) /* Check VTAG of the packet matches the sender's own tag. */ static inline int From 429baa6f0e1b9237a3667c3a5e8ca76051e6d0b7 Mon Sep 17 00:00:00 2001 From: Bert Kenward Date: Thu, 22 Sep 2016 15:47:45 +0100 Subject: [PATCH 1428/1715] sfc: check async completer is !NULL before calling Add a NULL check before calling asynchronous MCDI completion functions during device removal. Fixes: 7014d7f6 ("sfc: allow asynchronous MCDI without completion function") Signed-off-by: Bert Kenward Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/mcdi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c index 9fbc12a8f80c..241520943ada 100644 --- a/drivers/net/ethernet/sfc/mcdi.c +++ b/drivers/net/ethernet/sfc/mcdi.c @@ -1156,7 +1156,8 @@ void efx_mcdi_flush_async(struct efx_nic *efx) * acquired locks in the wrong order. */ list_for_each_entry_safe(async, next, &mcdi->async_list, list) { - async->complete(efx, async->cookie, -ENETDOWN, NULL, 0); + if (async->complete) + async->complete(efx, async->cookie, -ENETDOWN, NULL, 0); list_del(&async->list); kfree(async); } From fefa569a9d4bc4b7758c0fddd75bb0382c95da77 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 22 Sep 2016 08:58:55 -0700 Subject: [PATCH 1429/1715] net_sched: sch_fq: account for schedule/timers drifts It looks like the following patch can make FQ very precise, even in VM or stressed hosts. It matters at high pacing rates. We take into account the difference between the time that was programmed when last packet was sent, and current time (a drift of tens of usecs is often observed) Add an EWMA of the unthrottle latency to help diagnostics. This latency is the difference between current time and oldest packet in delayed RB-tree. This accounts for the high resolution timer latency, but can be different under stress, as fq_check_throttled() can be opportunistically be called from a dequeue() called after an enqueue() for a different flow. Tested: // Start a 10Gbit flow $ netperf --google-pacing-rate 1250000000 -H lpaa24 -l 10000 -- -K bbr & Before patch : $ sar -n DEV 10 5 | grep eth0 | grep Average Average: eth0 17106.04 756876.84 1102.75 1119049.02 0.00 0.00 0.52 After patch : $ sar -n DEV 10 5 | grep eth0 | grep Average Average: eth0 17867.00 800245.90 1151.77 1183172.12 0.00 0.00 0.52 A new iproute2 tc can output the 'unthrottle latency' : $ tc -s qd sh dev eth0 | grep latency 0 gc, 0 highprio, 32490767 throttled, 2382 ns latency Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 2 +- net/sched/sch_fq.c | 21 ++++++++++++++++++--- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index f8e39dbaa781..df7451d35131 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -811,7 +811,7 @@ struct tc_fq_qd_stats { __u32 flows; __u32 inactive_flows; __u32 throttled_flows; - __u32 pad; + __u32 unthrottle_latency_ns; }; /* Heavy-Hitter Filter */ diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 5dd929cc1423..18e752439f6f 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -86,6 +86,7 @@ struct fq_sched_data { struct rb_root delayed; /* for rate limited flows */ u64 time_next_delayed_flow; + unsigned long unthrottle_latency_ns; struct fq_flow internal; /* for non classified or high prio packets */ u32 quantum; @@ -408,11 +409,19 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, static void fq_check_throttled(struct fq_sched_data *q, u64 now) { + unsigned long sample; struct rb_node *p; if (q->time_next_delayed_flow > now) return; + /* Update unthrottle latency EWMA. + * This is cheap and can help diagnosing timer/latency problems. + */ + sample = (unsigned long)(now - q->time_next_delayed_flow); + q->unthrottle_latency_ns -= q->unthrottle_latency_ns >> 3; + q->unthrottle_latency_ns += sample >> 3; + q->time_next_delayed_flow = ~0ULL; while ((p = rb_first(&q->delayed)) != NULL) { struct fq_flow *f = container_of(p, struct fq_flow, rate_node); @@ -515,7 +524,12 @@ begin: len = NSEC_PER_SEC; q->stat_pkts_too_long++; } - + /* Account for schedule/timers drifts. + * f->time_next_packet was set when prior packet was sent, + * and current time (@now) can be too late by tens of us. + */ + if (f->time_next_packet) + len -= min(len/2, now - f->time_next_packet); f->time_next_packet = now + len; } out: @@ -787,6 +801,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt) q->initial_quantum = 10 * psched_mtu(qdisc_dev(sch)); q->flow_refill_delay = msecs_to_jiffies(40); q->flow_max_rate = ~0U; + q->time_next_delayed_flow = ~0ULL; q->rate_enable = 1; q->new_flows.first = NULL; q->old_flows.first = NULL; @@ -854,8 +869,8 @@ static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d) st.flows = q->flows; st.inactive_flows = q->inactive_flows; st.throttled_flows = q->throttled_flows; - st.pad = 0; - + st.unthrottle_latency_ns = min_t(unsigned long, + q->unthrottle_latency_ns, ~0U); sch_tree_unlock(sch); return gnet_stats_copy_app(d, &st, sizeof(st)); From 53e89941ba2a969c483aa29b907de9a823179297 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 22 Sep 2016 20:01:41 +0300 Subject: [PATCH 1430/1715] net_sched: act_vlan: add helper inlines to access tcf_vlan info Needed e.g for offloading drivers to pick the relevant attributes. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/net/tc_act/tc_vlan.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/include/net/tc_act/tc_vlan.h b/include/net/tc_act/tc_vlan.h index 6b835889ea30..48cca321ee6c 100644 --- a/include/net/tc_act/tc_vlan.h +++ b/include/net/tc_act/tc_vlan.h @@ -11,6 +11,7 @@ #define __NET_TC_VLAN_H #include +#include #define VLAN_F_POP 0x1 #define VLAN_F_PUSH 0x2 @@ -24,4 +25,28 @@ struct tcf_vlan { }; #define to_vlan(a) ((struct tcf_vlan *)a) +static inline bool is_tcf_vlan(const struct tc_action *a) +{ +#ifdef CONFIG_NET_CLS_ACT + if (a->ops && a->ops->type == TCA_ACT_VLAN) + return true; +#endif + return false; +} + +static inline u32 tcf_vlan_action(const struct tc_action *a) +{ + return to_vlan(a)->tcfv_action; +} + +static inline u16 tcf_vlan_push_vid(const struct tc_action *a) +{ + return to_vlan(a)->tcfv_push_vid; +} + +static inline __be16 tcf_vlan_push_proto(const struct tc_action *a) +{ + return to_vlan(a)->tcfv_push_proto; +} + #endif /* __NET_TC_VLAN_H */ From 9deb2241f19f26800e3b4c6bf49c4db992192bf0 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 22 Sep 2016 20:01:42 +0300 Subject: [PATCH 1431/1715] net/mlx5: E-Switch, Set the vport when registering the uplink rep Set the vport value in the PF entry to be that of the uplink so we can use it blindly over the tc / eswitch offload code without translating it each time we deal with the uplink representor. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/en_main.c | 6 +-- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 10 +---- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 3 +- .../mellanox/mlx5/core/eswitch_offloads.c | 41 +++++++++---------- 4 files changed, 27 insertions(+), 33 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index a9fc9d4c6af4..b309e7cbe1bf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3726,9 +3726,9 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) mlx5_query_nic_vport_mac_address(mdev, 0, rep.hw_id); rep.load = mlx5e_nic_rep_load; rep.unload = mlx5e_nic_rep_unload; - rep.vport = 0; + rep.vport = FDB_UPLINK_VPORT; rep.priv_data = priv; - mlx5_eswitch_register_vport_rep(esw, &rep); + mlx5_eswitch_register_vport_rep(esw, 0, &rep); } } @@ -3867,7 +3867,7 @@ static void mlx5e_register_vport_rep(struct mlx5_core_dev *mdev) rep.unload = mlx5e_vport_rep_unload; rep.vport = vport; ether_addr_copy(rep.hw_id, mac); - mlx5_eswitch_register_vport_rep(esw, &rep); + mlx5_eswitch_register_vport_rep(esw, vport, &rep); } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 22cfc4ac1837..783e1222e71e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -120,10 +120,7 @@ static struct mlx5_flow_rule *mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, struct mlx5_eswitch_rep *rep = priv->ppriv; u32 src_vport; - if (rep->vport) /* set source vport for the flow */ - src_vport = rep->vport; - else - src_vport = FDB_UPLINK_VPORT; + src_vport = rep->vport; return mlx5_eswitch_add_offloaded_rule(esw, spec, action, src_vport, dst_vport); } @@ -399,10 +396,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, out_priv = netdev_priv(out_dev); out_rep = out_priv->ppriv; - if (out_rep->vport == 0) - *dest_vport = FDB_UPLINK_VPORT; - else - *dest_vport = out_rep->vport; + *dest_vport = out_rep->vport; *action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; continue; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index b96e8c93bf9d..6d8c5a2fbb63 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -254,9 +254,10 @@ void mlx5_eswitch_sqs2vport_stop(struct mlx5_eswitch *esw, int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode); int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode); void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, + int vport_index, struct mlx5_eswitch_rep *rep); void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw, - int vport); + int vport_index); #define MLX5_DEBUG_ESWITCH_MASK BIT(3) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 7de40e6b0c25..516ac9920b75 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -144,16 +144,12 @@ int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw, { struct mlx5_flow_rule *flow_rule; struct mlx5_esw_sq *esw_sq; - int vport; int err; int i; if (esw->mode != SRIOV_OFFLOADS) return 0; - vport = rep->vport == 0 ? - FDB_UPLINK_VPORT : rep->vport; - for (i = 0; i < sqns_num; i++) { esw_sq = kzalloc(sizeof(*esw_sq), GFP_KERNEL); if (!esw_sq) { @@ -163,7 +159,7 @@ int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw, /* Add re-inject rule to the PF/representor sqs */ flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw, - vport, + rep->vport, sqns_array[i]); if (IS_ERR(flow_rule)) { err = PTR_ERR(flow_rule); @@ -620,27 +616,30 @@ int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) } void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, - struct mlx5_eswitch_rep *rep) -{ - struct mlx5_esw_offload *offloads = &esw->offloads; - - memcpy(&offloads->vport_reps[rep->vport], rep, - sizeof(struct mlx5_eswitch_rep)); - - INIT_LIST_HEAD(&offloads->vport_reps[rep->vport].vport_sqs_list); - offloads->vport_reps[rep->vport].valid = true; -} - -void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw, - int vport) + int vport_index, + struct mlx5_eswitch_rep *__rep) { struct mlx5_esw_offload *offloads = &esw->offloads; struct mlx5_eswitch_rep *rep; - rep = &offloads->vport_reps[vport]; + rep = &offloads->vport_reps[vport_index]; - if (esw->mode == SRIOV_OFFLOADS && esw->vports[vport].enabled) + memcpy(rep, __rep, sizeof(struct mlx5_eswitch_rep)); + + INIT_LIST_HEAD(&rep->vport_sqs_list); + rep->valid = true; +} + +void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw, + int vport_index) +{ + struct mlx5_esw_offload *offloads = &esw->offloads; + struct mlx5_eswitch_rep *rep; + + rep = &offloads->vport_reps[vport_index]; + + if (esw->mode == SRIOV_OFFLOADS && esw->vports[vport_index].enabled) rep->unload(esw, rep); - offloads->vport_reps[vport].valid = false; + rep->valid = false; } From bac9b6aa1df7d584d72558cdd12df186e91245b3 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 22 Sep 2016 20:01:43 +0300 Subject: [PATCH 1432/1715] net/mlx5: E-Switch, Set vport representor fields explicitly on registration The structure we use for the eswitch vport representor (mlx5_eswitch_rep) has some fields which are set from upper layers in the driver when they register the rep. Use explicit setting on registration time for them and avoid global memcpy. This patch doesn't add new functionality. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 5 +++-- .../net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 8 +++++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 6d8c5a2fbb63..ebfcde02a5db 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -178,11 +178,12 @@ struct mlx5_eswitch_rep { void (*unload)(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep); u16 vport; - struct mlx5_flow_rule *vport_rx_rule; + u8 hw_id[ETH_ALEN]; void *priv_data; + + struct mlx5_flow_rule *vport_rx_rule; struct list_head vport_sqs_list; bool valid; - u8 hw_id[ETH_ALEN]; }; struct mlx5_esw_offload { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 516ac9920b75..80c6f4f7487b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -624,7 +624,13 @@ void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, rep = &offloads->vport_reps[vport_index]; - memcpy(rep, __rep, sizeof(struct mlx5_eswitch_rep)); + memset(rep, 0, sizeof(*rep)); + + rep->load = __rep->load; + rep->unload = __rep->unload; + rep->vport = __rep->vport; + rep->priv_data = __rep->priv_data; + ether_addr_copy(rep->hw_id, __rep->hw_id); INIT_LIST_HEAD(&rep->vport_sqs_list); rep->valid = true; From e33dfe316cf3b408e63bf0c21be0842412eb7981 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 22 Sep 2016 20:01:44 +0300 Subject: [PATCH 1433/1715] net/mlx5: E-Switch, Allow fine tuning of eswitch vport push/pop vlan The HW can be programmed to push vlan, pop vlan or both. A factorization step towards using the push/pop capabilties in the eswitch offloads mode. This patch doesn't add new functionality. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 33 ++++++++++++------- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 5 +++ 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index f75f864fa490..abbf2c369923 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -127,7 +127,7 @@ static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport, } static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport, - u16 vlan, u8 qos, bool set) + u16 vlan, u8 qos, u8 set_flags) { u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {0}; @@ -135,14 +135,18 @@ static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport, !MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist)) return -ENOTSUPP; - esw_debug(dev, "Set Vport[%d] VLAN %d qos %d set=%d\n", - vport, vlan, qos, set); - if (set) { + esw_debug(dev, "Set Vport[%d] VLAN %d qos %d set=%x\n", + vport, vlan, qos, set_flags); + + if (set_flags & SET_VLAN_STRIP) MLX5_SET(modify_esw_vport_context_in, in, esw_vport_context.vport_cvlan_strip, 1); + + if (set_flags & SET_VLAN_INSERT) { /* insert only if no vlan in packet */ MLX5_SET(modify_esw_vport_context_in, in, esw_vport_context.vport_cvlan_insert, 1); + MLX5_SET(modify_esw_vport_context_in, in, esw_vport_context.cvlan_pcp, qos); MLX5_SET(modify_esw_vport_context_in, in, @@ -1778,25 +1782,21 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, return 0; } -int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, - int vport, u16 vlan, u8 qos) +int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, + int vport, u16 vlan, u8 qos, u8 set_flags) { struct mlx5_vport *evport; int err = 0; - int set = 0; if (!ESW_ALLOWED(esw)) return -EPERM; if (!LEGAL_VPORT(esw, vport) || (vlan > 4095) || (qos > 7)) return -EINVAL; - if (vlan || qos) - set = 1; - mutex_lock(&esw->state_lock); evport = &esw->vports[vport]; - err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set); + err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set_flags); if (err) goto unlock; @@ -1814,6 +1814,17 @@ unlock: return err; } +int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, + int vport, u16 vlan, u8 qos) +{ + u8 set_flags = 0; + + if (vlan || qos) + set_flags = SET_VLAN_STRIP | SET_VLAN_INSERT; + + return __mlx5_eswitch_set_vport_vlan(esw, vport, vlan, qos, set_flags); +} + int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw, int vport, bool spoofchk) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index ebfcde02a5db..4f5391a7965a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -246,6 +246,11 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_flow_rule * mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn); +enum { + SET_VLAN_STRIP = BIT(0), + SET_VLAN_INSERT = BIT(1) +}; + int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep, u16 *sqns_array, int sqns_num); From 776b12b674db53012a7ce8c379a0bbdec0a5ffa5 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 22 Sep 2016 20:01:45 +0300 Subject: [PATCH 1434/1715] net/mlx5: Put elements related to offloaded TC rule in one struct Put the representors related to the source and dest vports and the action in struct mlx5_esw_flow_attr which is used while setting the FDB rule. This patch doesn't change any functionality. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 51 +++++++++++-------- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 10 +++- .../mellanox/mlx5/core/eswitch_offloads.c | 9 ++-- 3 files changed, 44 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 783e1222e71e..3eb319b12663 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -39,6 +39,7 @@ #include #include #include +#include #include "en.h" #include "en_tc.h" #include "eswitch.h" @@ -47,6 +48,7 @@ struct mlx5e_tc_flow { struct rhash_head node; u64 cookie; struct mlx5_flow_rule *rule; + struct mlx5_esw_flow_attr *attr; }; #define MLX5E_TC_TABLE_NUM_ENTRIES 1024 @@ -114,15 +116,11 @@ err_create_ft: static struct mlx5_flow_rule *mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, - u32 action, u32 dst_vport) + struct mlx5_esw_flow_attr *attr) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5_eswitch_rep *rep = priv->ppriv; - u32 src_vport; - src_vport = rep->vport; - - return mlx5_eswitch_add_offloaded_rule(esw, spec, action, src_vport, dst_vport); + return mlx5_eswitch_add_offloaded_rule(esw, spec, attr); } static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, @@ -358,7 +356,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, } static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, - u32 *action, u32 *dest_vport) + struct mlx5_esw_flow_attr *attr) { const struct tc_action *a; LIST_HEAD(actions); @@ -366,17 +364,18 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, if (tc_no_actions(exts)) return -EINVAL; - *action = 0; + memset(attr, 0, sizeof(*attr)); + attr->in_rep = priv->ppriv; tcf_exts_to_list(exts, &actions); list_for_each_entry(a, &actions, list) { /* Only support a single action per rule */ - if (*action) + if (attr->action) return -EINVAL; if (is_tcf_gact_shot(a)) { - *action = MLX5_FLOW_CONTEXT_ACTION_DROP | - MLX5_FLOW_CONTEXT_ACTION_COUNT; + attr->action = MLX5_FLOW_CONTEXT_ACTION_DROP | + MLX5_FLOW_CONTEXT_ACTION_COUNT; continue; } @@ -384,7 +383,6 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, int ifindex = tcf_mirred_ifindex(a); struct net_device *out_dev; struct mlx5e_priv *out_priv; - struct mlx5_eswitch_rep *out_rep; out_dev = __dev_get_by_index(dev_net(priv->netdev), ifindex); @@ -394,10 +392,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, return -EINVAL; } + attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; out_priv = netdev_priv(out_dev); - out_rep = out_priv->ppriv; - *dest_vport = out_rep->vport; - *action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + attr->out_rep = out_priv->ppriv; continue; } @@ -411,18 +408,27 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, { struct mlx5e_tc_table *tc = &priv->fs.tc; int err = 0; - u32 flow_tag, action, dest_vport = 0; + bool fdb_flow = false; + u32 flow_tag, action; struct mlx5e_tc_flow *flow; struct mlx5_flow_spec *spec; struct mlx5_flow_rule *old = NULL; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + if (esw && esw->mode == SRIOV_OFFLOADS) + fdb_flow = true; + flow = rhashtable_lookup_fast(&tc->ht, &f->cookie, tc->ht_params); - if (flow) + if (flow) { old = flow->rule; - else - flow = kzalloc(sizeof(*flow), GFP_KERNEL); + } else { + if (fdb_flow) + flow = kzalloc(sizeof(*flow) + sizeof(struct mlx5_esw_flow_attr), + GFP_KERNEL); + else + flow = kzalloc(sizeof(*flow), GFP_KERNEL); + } spec = mlx5_vzalloc(sizeof(*spec)); if (!spec || !flow) { @@ -436,11 +442,12 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, if (err < 0) goto err_free; - if (esw && esw->mode == SRIOV_OFFLOADS) { - err = parse_tc_fdb_actions(priv, f->exts, &action, &dest_vport); + if (fdb_flow) { + flow->attr = (struct mlx5_esw_flow_attr *)(flow + 1); + err = parse_tc_fdb_actions(priv, f->exts, flow->attr); if (err < 0) goto err_free; - flow->rule = mlx5e_tc_add_fdb_flow(priv, spec, action, dest_vport); + flow->rule = mlx5e_tc_add_fdb_flow(priv, spec, flow->attr); } else { err = parse_tc_nic_actions(priv, f->exts, &action, &flow_tag); if (err < 0) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 4f5391a7965a..eeeeadc51558 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -238,11 +238,12 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, struct ifla_vf_stats *vf_stats); struct mlx5_flow_spec; +struct mlx5_esw_flow_attr; struct mlx5_flow_rule * mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, - u32 action, u32 src_vport, u32 dst_vport); + struct mlx5_esw_flow_attr *attr); struct mlx5_flow_rule * mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn); @@ -251,6 +252,13 @@ enum { SET_VLAN_INSERT = BIT(1) }; +struct mlx5_esw_flow_attr { + struct mlx5_eswitch_rep *in_rep; + struct mlx5_eswitch_rep *out_rep; + + int action; +}; + int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep, u16 *sqns_array, int sqns_num); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 80c6f4f7487b..781debb1acf8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -46,19 +46,22 @@ enum { struct mlx5_flow_rule * mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, - u32 action, u32 src_vport, u32 dst_vport) + struct mlx5_esw_flow_attr *attr) { struct mlx5_flow_destination dest = { 0 }; struct mlx5_fc *counter = NULL; struct mlx5_flow_rule *rule; void *misc; + int action; if (esw->mode != SRIOV_OFFLOADS) return ERR_PTR(-EOPNOTSUPP); + action = attr->action; + if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; - dest.vport_num = dst_vport; + dest.vport_num = attr->out_rep->vport; action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; } else if (action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { counter = mlx5_fc_create(esw->dev, true); @@ -69,7 +72,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, } misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); - MLX5_SET(fte_match_set_misc, misc, source_port, src_vport); + MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport); misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); From 8515c581dfa574420559d8cef24c2ba24e8eb8dd Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 22 Sep 2016 20:01:46 +0300 Subject: [PATCH 1435/1715] net/mlx5e: Refactor retrival of skb from rx completion element (cqe) Factor the relevant code into a static inline helper (skb_from_cqe) doing that. Move the call to napi_gro_receive to be carried out just after mlx5e_complete_rx_cqe returns. Both changes are to be used for the VF representor as well in the next commit. This patch doesn't change any functionality. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 41 +++++++++++++------ 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 0a81bd34abbe..e836e477f8b7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -629,7 +629,6 @@ static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq, rq->stats.packets++; rq->stats.bytes += cqe_bcnt; mlx5e_build_rx_skb(cqe, cqe_bcnt, rq, skb); - napi_gro_receive(rq->cq.napi, skb); } static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_sq *sq) @@ -733,20 +732,15 @@ static inline bool mlx5e_xdp_handle(struct mlx5e_rq *rq, } } -void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +static inline +struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, + u16 wqe_counter, u32 cqe_bcnt) { struct bpf_prog *xdp_prog = READ_ONCE(rq->xdp_prog); struct mlx5e_dma_info *di; - struct mlx5e_rx_wqe *wqe; - __be16 wqe_counter_be; struct sk_buff *skb; - u16 wqe_counter; void *va, *data; - u32 cqe_bcnt; - wqe_counter_be = cqe->wqe_counter; - wqe_counter = be16_to_cpu(wqe_counter_be); - wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter); di = &rq->dma_info[wqe_counter]; va = page_address(di->page); data = va + MLX5_RX_HEADROOM; @@ -757,22 +751,21 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) rq->buff.wqe_sz, DMA_FROM_DEVICE); prefetch(data); - cqe_bcnt = be32_to_cpu(cqe->byte_cnt); if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { rq->stats.wqe_err++; mlx5e_page_release(rq, di, true); - goto wq_ll_pop; + return NULL; } if (mlx5e_xdp_handle(rq, xdp_prog, di, data, cqe_bcnt)) - goto wq_ll_pop; /* page/packet was consumed by XDP */ + return NULL; /* page/packet was consumed by XDP */ skb = build_skb(va, RQ_PAGE_SIZE(rq)); if (unlikely(!skb)) { rq->stats.buff_alloc_err++; mlx5e_page_release(rq, di, true); - goto wq_ll_pop; + return NULL; } /* queue up for recycling ..*/ @@ -782,7 +775,28 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) skb_reserve(skb, MLX5_RX_HEADROOM); skb_put(skb, cqe_bcnt); + return skb; +} + +void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + struct mlx5e_rx_wqe *wqe; + __be16 wqe_counter_be; + struct sk_buff *skb; + u16 wqe_counter; + u32 cqe_bcnt; + + wqe_counter_be = cqe->wqe_counter; + wqe_counter = be16_to_cpu(wqe_counter_be); + wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter); + cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + + skb = skb_from_cqe(rq, cqe, wqe_counter, cqe_bcnt); + if (!skb) + goto wq_ll_pop; + mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + napi_gro_receive(rq->cq.napi, skb); wq_ll_pop: mlx5_wq_ll_pop(&rq->wq, wqe_counter_be, @@ -861,6 +875,7 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) mlx5e_mpwqe_fill_rx_skb(rq, cqe, wi, cqe_bcnt, skb); mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + napi_gro_receive(rq->cq.napi, skb); mpwrq_cqe_out: if (likely(wi->consumed_strides < rq->mpwqe_num_strides)) From f5f82476090fd2c6fc4fde03ba61aef984900009 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 22 Sep 2016 20:01:47 +0300 Subject: [PATCH 1436/1715] net/mlx5: E-Switch, Support VLAN actions in the offloads mode Many virtualization systems use a policy under which a vlan tag is pushed to packets sent by guests, and popped before the packet is forwarded to the VM. The current generation of the mlx5 HW doesn't fully support that on a per flow level. As such, we are addressing the above common use case with the SRIOV e-Switch abilities to push vlan into packets sent by VFs and pop vlan from packets forwarded to VFs. The HW can match on the correct vlan being present in packets forwarded to VFs (eSwitch steering is done before stripping the tag), so this part is offloaded as is. A common practice for vlans is to avoid both push vlan and pop vlan for inter-host VM/VM (east-west) communication because in this case, push on egress cancels out with pop on ingress. For supporting that, we use a global eswitch vlan pop policy, hence allowing guest A to communicate with both remote VM B and local VM C. This works since the HW pops the vlan only if it exists (e.g for C --> A packets but not for B --> A packets). On the slow path, when a VF vport has an offloaded flow which involves pushing vlans, wheres another flow is not currently offloaded, the packets from the 2nd flow seen by the VF representor on the host have vlan. The VF rep driver removes such vlan before calling into the host networking stack. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + .../net/ethernet/mellanox/mlx5/core/en_main.c | 21 +- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 33 ++++ .../net/ethernet/mellanox/mlx5/core/eswitch.h | 15 ++ .../mellanox/mlx5/core/eswitch_offloads.c | 180 ++++++++++++++++++ 5 files changed, 249 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 346015407b70..460363b66cb1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -869,6 +869,7 @@ void mlx5e_nic_rep_unload(struct mlx5_eswitch *esw, int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv); void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv); int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr); +void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); int mlx5e_create_direct_rqts(struct mlx5e_priv *priv); void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index b309e7cbe1bf..c12792314be7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -446,6 +446,16 @@ static void mlx5e_rq_free_mpwqe_info(struct mlx5e_rq *rq) kfree(rq->mpwqe.info); } +static bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch_rep *rep = (struct mlx5_eswitch_rep *)priv->ppriv; + + if (rep && rep->vport != FDB_UPLINK_VPORT) + return true; + + return false; +} + static int mlx5e_create_rq(struct mlx5e_channel *c, struct mlx5e_rq_param *param, struct mlx5e_rq *rq) @@ -487,6 +497,11 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, switch (priv->params.rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + if (mlx5e_is_vf_vport_rep(priv)) { + err = -EINVAL; + goto err_rq_wq_destroy; + } + rq->handle_rx_cqe = mlx5e_handle_rx_cqe_mpwrq; rq->alloc_wqe = mlx5e_alloc_rx_mpwqe; rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe; @@ -512,7 +527,11 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, goto err_rq_wq_destroy; } - rq->handle_rx_cqe = mlx5e_handle_rx_cqe; + if (mlx5e_is_vf_vport_rep(priv)) + rq->handle_rx_cqe = mlx5e_handle_rx_cqe_rep; + else + rq->handle_rx_cqe = mlx5e_handle_rx_cqe; + rq->alloc_wqe = mlx5e_alloc_rx_wqe; rq->dealloc_wqe = mlx5e_dealloc_rx_wqe; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index e836e477f8b7..c6de6fba5843 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -36,6 +36,7 @@ #include #include "en.h" #include "en_tc.h" +#include "eswitch.h" static inline bool mlx5e_rx_hw_stamp(struct mlx5e_tstamp *tstamp) { @@ -803,6 +804,38 @@ wq_ll_pop: &wqe->next.next_wqe_index); } +void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + struct net_device *netdev = rq->netdev; + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5e_rx_wqe *wqe; + struct sk_buff *skb; + __be16 wqe_counter_be; + u16 wqe_counter; + u32 cqe_bcnt; + + wqe_counter_be = cqe->wqe_counter; + wqe_counter = be16_to_cpu(wqe_counter_be); + wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter); + cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + + skb = skb_from_cqe(rq, cqe, wqe_counter, cqe_bcnt); + if (!skb) + goto wq_ll_pop; + + mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + + if (rep->vlan && skb_vlan_tag_present(skb)) + skb_vlan_pop(skb); + + napi_gro_receive(rq->cq.napi, skb); + +wq_ll_pop: + mlx5_wq_ll_pop(&rq->wq, wqe_counter_be, + &wqe->next.next_wqe_index); +} + static inline void mlx5e_mpwqe_fill_rx_skb(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, struct mlx5e_mpw_info *wi, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index eeeeadc51558..2e2938e08cda 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -157,6 +157,7 @@ struct mlx5_eswitch_fdb { struct mlx5_flow_group *send_to_vport_grp; struct mlx5_flow_group *miss_grp; struct mlx5_flow_rule *miss_rule; + int vlan_push_pop_refcount; } offloads; }; }; @@ -183,6 +184,8 @@ struct mlx5_eswitch_rep { struct mlx5_flow_rule *vport_rx_rule; struct list_head vport_sqs_list; + u16 vlan; + u32 vlan_refcount; bool valid; }; @@ -252,11 +255,16 @@ enum { SET_VLAN_INSERT = BIT(1) }; +#define MLX5_FLOW_CONTEXT_ACTION_VLAN_POP 0x40 +#define MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH 0x80 + struct mlx5_esw_flow_attr { struct mlx5_eswitch_rep *in_rep; struct mlx5_eswitch_rep *out_rep; int action; + u16 vlan; + bool vlan_handled; }; int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw, @@ -273,6 +281,13 @@ void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw, int vport_index); +int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, + struct mlx5_esw_flow_attr *attr); +int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, + struct mlx5_esw_flow_attr *attr); +int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, + int vport, u16 vlan, u8 qos, u8 set_flags); + #define MLX5_DEBUG_ESWITCH_MASK BIT(3) #define esw_info(dev, format, ...) \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 781debb1acf8..c55ad8d00c05 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -89,6 +89,186 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, return rule; } +static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val) +{ + struct mlx5_eswitch_rep *rep; + int vf_vport, err = 0; + + esw_debug(esw->dev, "%s applying global %s policy\n", __func__, val ? "pop" : "none"); + for (vf_vport = 1; vf_vport < esw->enabled_vports; vf_vport++) { + rep = &esw->offloads.vport_reps[vf_vport]; + if (!rep->valid) + continue; + + err = __mlx5_eswitch_set_vport_vlan(esw, rep->vport, 0, 0, val); + if (err) + goto out; + } + +out: + return err; +} + +static struct mlx5_eswitch_rep * +esw_vlan_action_get_vport(struct mlx5_esw_flow_attr *attr, bool push, bool pop) +{ + struct mlx5_eswitch_rep *in_rep, *out_rep, *vport = NULL; + + in_rep = attr->in_rep; + out_rep = attr->out_rep; + + if (push) + vport = in_rep; + else if (pop) + vport = out_rep; + else + vport = in_rep; + + return vport; +} + +static int esw_add_vlan_action_check(struct mlx5_esw_flow_attr *attr, + bool push, bool pop, bool fwd) +{ + struct mlx5_eswitch_rep *in_rep, *out_rep; + + if ((push || pop) && !fwd) + goto out_notsupp; + + in_rep = attr->in_rep; + out_rep = attr->out_rep; + + if (push && in_rep->vport == FDB_UPLINK_VPORT) + goto out_notsupp; + + if (pop && out_rep->vport == FDB_UPLINK_VPORT) + goto out_notsupp; + + /* vport has vlan push configured, can't offload VF --> wire rules w.o it */ + if (!push && !pop && fwd) + if (in_rep->vlan && out_rep->vport == FDB_UPLINK_VPORT) + goto out_notsupp; + + /* protects against (1) setting rules with different vlans to push and + * (2) setting rules w.o vlans (attr->vlan = 0) && w. vlans to push (!= 0) + */ + if (push && in_rep->vlan_refcount && (in_rep->vlan != attr->vlan)) + goto out_notsupp; + + return 0; + +out_notsupp: + return -ENOTSUPP; +} + +int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, + struct mlx5_esw_flow_attr *attr) +{ + struct offloads_fdb *offloads = &esw->fdb_table.offloads; + struct mlx5_eswitch_rep *vport = NULL; + bool push, pop, fwd; + int err = 0; + + push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH); + pop = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP); + fwd = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST); + + err = esw_add_vlan_action_check(attr, push, pop, fwd); + if (err) + return err; + + attr->vlan_handled = false; + + vport = esw_vlan_action_get_vport(attr, push, pop); + + if (!push && !pop && fwd) { + /* tracks VF --> wire rules without vlan push action */ + if (attr->out_rep->vport == FDB_UPLINK_VPORT) { + vport->vlan_refcount++; + attr->vlan_handled = true; + } + + return 0; + } + + if (!push && !pop) + return 0; + + if (!(offloads->vlan_push_pop_refcount)) { + /* it's the 1st vlan rule, apply global vlan pop policy */ + err = esw_set_global_vlan_pop(esw, SET_VLAN_STRIP); + if (err) + goto out; + } + offloads->vlan_push_pop_refcount++; + + if (push) { + if (vport->vlan_refcount) + goto skip_set_push; + + err = __mlx5_eswitch_set_vport_vlan(esw, vport->vport, attr->vlan, 0, + SET_VLAN_INSERT | SET_VLAN_STRIP); + if (err) + goto out; + vport->vlan = attr->vlan; +skip_set_push: + vport->vlan_refcount++; + } +out: + if (!err) + attr->vlan_handled = true; + return err; +} + +int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, + struct mlx5_esw_flow_attr *attr) +{ + struct offloads_fdb *offloads = &esw->fdb_table.offloads; + struct mlx5_eswitch_rep *vport = NULL; + bool push, pop, fwd; + int err = 0; + + if (!attr->vlan_handled) + return 0; + + push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH); + pop = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP); + fwd = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST); + + vport = esw_vlan_action_get_vport(attr, push, pop); + + if (!push && !pop && fwd) { + /* tracks VF --> wire rules without vlan push action */ + if (attr->out_rep->vport == FDB_UPLINK_VPORT) + vport->vlan_refcount--; + + return 0; + } + + if (push) { + vport->vlan_refcount--; + if (vport->vlan_refcount) + goto skip_unset_push; + + vport->vlan = 0; + err = __mlx5_eswitch_set_vport_vlan(esw, vport->vport, + 0, 0, SET_VLAN_STRIP); + if (err) + goto out; + } + +skip_unset_push: + offloads->vlan_push_pop_refcount--; + if (offloads->vlan_push_pop_refcount) + return 0; + + /* no more vlan rules, stop global vlan pop policy */ + err = esw_set_global_vlan_pop(esw, 0); + +out: + return err; +} + static struct mlx5_flow_rule * mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn) { From 8b32580df1cb4dc9cccb2d369d20317f7f74d9ce Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 22 Sep 2016 20:01:48 +0300 Subject: [PATCH 1437/1715] net/mlx5e: Add TC vlan action for SRIOV offloads Parse TC vlan actions and set the required elements to allow offloading. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 43 ++++++++++++++----- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 3eb319b12663..e61bd525f60e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -119,17 +119,27 @@ static struct mlx5_flow_rule *mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, struct mlx5_esw_flow_attr *attr) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + int err; + + err = mlx5_eswitch_add_vlan_action(esw, attr); + if (err) + return ERR_PTR(err); return mlx5_eswitch_add_offloaded_rule(esw, spec, attr); } static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, - struct mlx5_flow_rule *rule) + struct mlx5_flow_rule *rule, + struct mlx5_esw_flow_attr *attr) { + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_fc *counter = NULL; counter = mlx5_flow_rule_counter(rule); + if (esw && esw->mode == SRIOV_OFFLOADS) + mlx5_eswitch_del_vlan_action(esw, attr); + mlx5_del_flow_rule(rule); mlx5_fc_destroy(priv->mdev, counter); @@ -369,13 +379,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, tcf_exts_to_list(exts, &actions); list_for_each_entry(a, &actions, list) { - /* Only support a single action per rule */ - if (attr->action) - return -EINVAL; - if (is_tcf_gact_shot(a)) { - attr->action = MLX5_FLOW_CONTEXT_ACTION_DROP | - MLX5_FLOW_CONTEXT_ACTION_COUNT; + attr->action |= MLX5_FLOW_CONTEXT_ACTION_DROP | + MLX5_FLOW_CONTEXT_ACTION_COUNT; continue; } @@ -392,12 +398,25 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, return -EINVAL; } - attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; out_priv = netdev_priv(out_dev); attr->out_rep = out_priv->ppriv; continue; } + if (is_tcf_vlan(a)) { + if (tcf_vlan_action(a) == VLAN_F_POP) { + attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; + } else if (tcf_vlan_action(a) == VLAN_F_PUSH) { + if (tcf_vlan_push_proto(a) != htons(ETH_P_8021Q)) + return -EOPNOTSUPP; + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; + attr->vlan = tcf_vlan_push_vid(a); + } + continue; + } + return -EINVAL; } return 0; @@ -413,6 +432,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, struct mlx5e_tc_flow *flow; struct mlx5_flow_spec *spec; struct mlx5_flow_rule *old = NULL; + struct mlx5_esw_flow_attr *old_attr; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; if (esw && esw->mode == SRIOV_OFFLOADS) @@ -422,6 +442,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, tc->ht_params); if (flow) { old = flow->rule; + old_attr = flow->attr; } else { if (fdb_flow) flow = kzalloc(sizeof(*flow) + sizeof(struct mlx5_esw_flow_attr), @@ -466,7 +487,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, goto err_del_rule; if (old) - mlx5e_tc_del_flow(priv, old); + mlx5e_tc_del_flow(priv, old, old_attr); goto out; @@ -494,7 +515,7 @@ int mlx5e_delete_flower(struct mlx5e_priv *priv, rhashtable_remove_fast(&tc->ht, &flow->node, tc->ht_params); - mlx5e_tc_del_flow(priv, flow->rule); + mlx5e_tc_del_flow(priv, flow->rule, flow->attr); kfree(flow); @@ -551,7 +572,7 @@ static void _mlx5e_tc_del_flow(void *ptr, void *arg) struct mlx5e_tc_flow *flow = ptr; struct mlx5e_priv *priv = arg; - mlx5e_tc_del_flow(priv, flow->rule); + mlx5e_tc_del_flow(priv, flow->rule, flow->attr); kfree(flow); } From 095b6cfd69cedc8050b69535af8bf718ce0e9aad Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 22 Sep 2016 20:01:49 +0300 Subject: [PATCH 1438/1715] net/mlx5e: Add TC vlan match parsing Enhance the parsing of offloaded TC rules matches to handle vlans. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index e61bd525f60e..a350b7171e3d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -164,6 +164,7 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) | BIT(FLOW_DISSECTOR_KEY_BASIC) | BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_VLAN) | BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | BIT(FLOW_DISSECTOR_KEY_PORTS))) { @@ -227,6 +228,24 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec key->src); } + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_dissector_key_vlan *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_VLAN, + f->key); + struct flow_dissector_key_vlan *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_VLAN, + f->mask); + if (mask->vlan_id) { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, vlan_tag, 1); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, vlan_tag, 1); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, mask->vlan_id); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, key->vlan_id); + } + } + if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { struct flow_dissector_key_ipv4_addrs *key = skb_flow_dissector_target(f->dissector, From e12934d9806e61d2727069cd56757987f3da76aa Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 22 Sep 2016 18:48:58 +0100 Subject: [PATCH 1439/1715] cxgb4: fix signed wrap around when decrementing index idx Change predecrement compare to post decrement compare to avoid an unsigned integer wrap-around comparison when decrementing idx in the while loop. For example, when idx is zero, the current situation will predecrement idx in the while loop, wrapping idx to the maximum signed integer and cause out of bounds reads on rxq_info->msix_tbl[idx]. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c index d12a73e03565..f13b593d7c8b 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c @@ -367,7 +367,7 @@ int request_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type) } return 0; unwind: - while (--idx >= 0) { + while (idx-- > 0) { bmap_idx = rxq_info->msix_tbl[idx]; free_msix_idx_in_bmap(adap, bmap_idx); free_irq(adap->msix_info_ulds[bmap_idx].vec, From b24d2891cfb0a7975b0039743439c98fe7b7dea7 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Sep 2016 13:17:33 +0100 Subject: [PATCH 1440/1715] rxrpc: Preset timestamp on Tx sk_buffs Set the timestamp on sk_buffs holding packets to be transmitted before queueing them because the moment the packet is on the queue it can be seen by the retransmission algorithm - which may see a completely random timestamp. If the retransmission algorithm sees such a timestamp, it may retransmit the packet and, in future, tell the congestion management algorithm that the retransmit timer expired. Signed-off-by: David Howells --- net/rxrpc/sendmsg.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index ca7c3be60ad2..ca3811bfbd17 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -99,6 +99,11 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, ASSERTCMP(seq, ==, call->tx_top + 1); + /* We have to set the timestamp before queueing as the retransmit + * algorithm can see the packet as soon as we queue it. + */ + skb->tstamp = ktime_get_real(); + ix = seq & RXRPC_RXTX_BUFF_MASK; rxrpc_get_skb(skb, rxrpc_skb_tx_got); call->rxtx_annotations[ix] = RXRPC_TX_ANNO_UNACK; From 9aff212bd677829189fae2e2e408cefc196ae5ae Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Sep 2016 12:39:23 +0100 Subject: [PATCH 1441/1715] rxrpc: Don't send an ACK at the end of service call response transmission Don't send an IDLE ACK at the end of the transmission of the response to a service call. The service end resends DATA packets until the client sends an ACK that hard-acks all the send data. At that point, the call is complete. Signed-off-by: David Howells --- net/rxrpc/recvmsg.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 6ba4af5a8d95..99e4c0ae30f1 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -143,8 +143,6 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call) if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) { rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, true, false); rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); - } else { - rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, false, false); } write_lock_bh(&call->state_lock); From c0d058c21c69b3685c3f1bb008aa11f1a5eaee7e Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Sep 2016 12:39:23 +0100 Subject: [PATCH 1442/1715] rxrpc: Make sure sendmsg() is woken on call completion Make sure that sendmsg() gets woken up if the call it is waiting for completes abnormally. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 1 + 1 file changed, 1 insertion(+) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index b13754a6dd7a..808ab750dc6b 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -758,6 +758,7 @@ static inline bool __rxrpc_set_call_completion(struct rxrpc_call *call, call->error = error; call->completion = compl, call->state = RXRPC_CALL_COMPLETE; + wake_up(&call->waitq); return true; } return false; From 90bd684ded900673d86f64f4b4197704a38f04bc Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Sep 2016 12:39:23 +0100 Subject: [PATCH 1443/1715] rxrpc: Should be using ktime_add_ms() not ktime_add_ns() ktime_add_ms() should be used to add the resend time (in ms) rather than ktime_add_ns(). Signed-off-by: David Howells --- net/rxrpc/call_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 6e2ea8f4ae75..a2909da5d581 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -187,7 +187,7 @@ static void rxrpc_resend(struct rxrpc_call *call) call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS | annotation; } - resend_at = ktime_sub(ktime_add_ns(oldest, rxrpc_resend_timeout), now); + resend_at = ktime_sub(ktime_add_ms(oldest, rxrpc_resend_timeout), now); call->resend_at = jiffies + nsecs_to_jiffies(ktime_to_ns(resend_at)); /* Now go through the Tx window and perform the retransmissions. We From e3978673f514fa4999f04dfad9bbd5bb70d0edc6 Mon Sep 17 00:00:00 2001 From: Iyappan Subramanian Date: Thu, 22 Sep 2016 15:47:33 -0700 Subject: [PATCH 1444/1715] drivers: net: xgene: Fix MSS programming Current driver programs static value of MSS in hardware register for TSO offload engine to segment the TCP payload regardless the MSS value provided by network stack. This patch fixes this by programming hardware registers with the stack provided MSS value. Since the hardware has the limitation of having only 4 MSS registers, this patch uses reference count of mss values being used. Signed-off-by: Iyappan Subramanian Signed-off-by: Toan Le Signed-off-by: David S. Miller --- .../net/ethernet/apm/xgene/xgene_enet_hw.h | 7 ++ .../net/ethernet/apm/xgene/xgene_enet_main.c | 88 +++++++++++++++---- .../net/ethernet/apm/xgene/xgene_enet_main.h | 8 +- .../net/ethernet/apm/xgene/xgene_enet_xgmac.c | 18 +++- 4 files changed, 99 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h index 8a8d05500894..8456337a237d 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h @@ -237,6 +237,8 @@ enum xgene_enet_rm { #define TCPHDR_LEN 6 #define IPHDR_POS 6 #define IPHDR_LEN 6 +#define MSS_POS 20 +#define MSS_LEN 2 #define EC_POS 22 /* Enable checksum */ #define EC_LEN 1 #define ET_POS 23 /* Enable TSO */ @@ -253,6 +255,11 @@ enum xgene_enet_rm { #define LAST_BUFFER (0x7800ULL << BUFDATALEN_POS) +#define TSO_MSS0_POS 0 +#define TSO_MSS0_LEN 14 +#define TSO_MSS1_POS 16 +#define TSO_MSS1_LEN 14 + struct xgene_enet_raw_desc { __le64 m0; __le64 m1; diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index 522ba920bfc1..429f18fc5503 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -137,6 +137,7 @@ static irqreturn_t xgene_enet_rx_irq(const int irq, void *data) static int xgene_enet_tx_completion(struct xgene_enet_desc_ring *cp_ring, struct xgene_enet_raw_desc *raw_desc) { + struct xgene_enet_pdata *pdata = netdev_priv(cp_ring->ndev); struct sk_buff *skb; struct device *dev; skb_frag_t *frag; @@ -144,6 +145,7 @@ static int xgene_enet_tx_completion(struct xgene_enet_desc_ring *cp_ring, u16 skb_index; u8 status; int i, ret = 0; + u8 mss_index; skb_index = GET_VAL(USERINFO, le64_to_cpu(raw_desc->m0)); skb = cp_ring->cp_skb[skb_index]; @@ -160,6 +162,13 @@ static int xgene_enet_tx_completion(struct xgene_enet_desc_ring *cp_ring, DMA_TO_DEVICE); } + if (GET_BIT(ET, le64_to_cpu(raw_desc->m3))) { + mss_index = GET_VAL(MSS, le64_to_cpu(raw_desc->m3)); + spin_lock(&pdata->mss_lock); + pdata->mss_refcnt[mss_index]--; + spin_unlock(&pdata->mss_lock); + } + /* Checking for error */ status = GET_VAL(LERR, le64_to_cpu(raw_desc->m0)); if (unlikely(status > 2)) { @@ -178,15 +187,53 @@ static int xgene_enet_tx_completion(struct xgene_enet_desc_ring *cp_ring, return ret; } -static u64 xgene_enet_work_msg(struct sk_buff *skb) +static int xgene_enet_setup_mss(struct net_device *ndev, u32 mss) +{ + struct xgene_enet_pdata *pdata = netdev_priv(ndev); + bool mss_index_found = false; + int mss_index; + int i; + + spin_lock(&pdata->mss_lock); + + /* Reuse the slot if MSS matches */ + for (i = 0; !mss_index_found && i < NUM_MSS_REG; i++) { + if (pdata->mss[i] == mss) { + pdata->mss_refcnt[i]++; + mss_index = i; + mss_index_found = true; + } + } + + /* Overwrite the slot with ref_count = 0 */ + for (i = 0; !mss_index_found && i < NUM_MSS_REG; i++) { + if (!pdata->mss_refcnt[i]) { + pdata->mss_refcnt[i]++; + pdata->mac_ops->set_mss(pdata, mss, i); + pdata->mss[i] = mss; + mss_index = i; + mss_index_found = true; + } + } + + spin_unlock(&pdata->mss_lock); + + /* No slots with ref_count = 0 available, return busy */ + if (!mss_index_found) + return -EBUSY; + + return mss_index; +} + +static int xgene_enet_work_msg(struct sk_buff *skb, u64 *hopinfo) { struct net_device *ndev = skb->dev; struct iphdr *iph; u8 l3hlen = 0, l4hlen = 0; u8 ethhdr, proto = 0, csum_enable = 0; - u64 hopinfo = 0; u32 hdr_len, mss = 0; u32 i, len, nr_frags; + int mss_index; ethhdr = xgene_enet_hdr_len(skb->data); @@ -226,7 +273,11 @@ static u64 xgene_enet_work_msg(struct sk_buff *skb) if (!mss || ((skb->len - hdr_len) <= mss)) goto out; - hopinfo |= SET_BIT(ET); + mss_index = xgene_enet_setup_mss(ndev, mss); + if (unlikely(mss_index < 0)) + return -EBUSY; + + *hopinfo |= SET_BIT(ET) | SET_VAL(MSS, mss_index); } } else if (iph->protocol == IPPROTO_UDP) { l4hlen = UDP_HDR_SIZE; @@ -234,15 +285,15 @@ static u64 xgene_enet_work_msg(struct sk_buff *skb) } out: l3hlen = ip_hdrlen(skb) >> 2; - hopinfo |= SET_VAL(TCPHDR, l4hlen) | - SET_VAL(IPHDR, l3hlen) | - SET_VAL(ETHHDR, ethhdr) | - SET_VAL(EC, csum_enable) | - SET_VAL(IS, proto) | - SET_BIT(IC) | - SET_BIT(TYPE_ETH_WORK_MESSAGE); + *hopinfo |= SET_VAL(TCPHDR, l4hlen) | + SET_VAL(IPHDR, l3hlen) | + SET_VAL(ETHHDR, ethhdr) | + SET_VAL(EC, csum_enable) | + SET_VAL(IS, proto) | + SET_BIT(IC) | + SET_BIT(TYPE_ETH_WORK_MESSAGE); - return hopinfo; + return 0; } static u16 xgene_enet_encode_len(u16 len) @@ -282,20 +333,22 @@ static int xgene_enet_setup_tx_desc(struct xgene_enet_desc_ring *tx_ring, dma_addr_t dma_addr, pbuf_addr, *frag_dma_addr; skb_frag_t *frag; u16 tail = tx_ring->tail; - u64 hopinfo; + u64 hopinfo = 0; u32 len, hw_len; u8 ll = 0, nv = 0, idx = 0; bool split = false; u32 size, offset, ell_bytes = 0; u32 i, fidx, nr_frags, count = 1; + int ret; raw_desc = &tx_ring->raw_desc[tail]; tail = (tail + 1) & (tx_ring->slots - 1); memset(raw_desc, 0, sizeof(struct xgene_enet_raw_desc)); - hopinfo = xgene_enet_work_msg(skb); - if (!hopinfo) - return -EINVAL; + ret = xgene_enet_work_msg(skb, &hopinfo); + if (ret) + return ret; + raw_desc->m3 = cpu_to_le64(SET_VAL(HENQNUM, tx_ring->dst_ring_num) | hopinfo); @@ -435,6 +488,9 @@ static netdev_tx_t xgene_enet_start_xmit(struct sk_buff *skb, return NETDEV_TX_OK; count = xgene_enet_setup_tx_desc(tx_ring, skb); + if (count == -EBUSY) + return NETDEV_TX_BUSY; + if (count <= 0) { dev_kfree_skb_any(skb); return NETDEV_TX_OK; @@ -1669,7 +1725,7 @@ static int xgene_enet_probe(struct platform_device *pdev) if (pdata->phy_mode == PHY_INTERFACE_MODE_XGMII) { ndev->features |= NETIF_F_TSO; - pdata->mss = XGENE_ENET_MSS; + spin_lock_init(&pdata->mss_lock); } ndev->hw_features = ndev->features; diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h index 773537161171..0cda58f5a840 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h @@ -47,7 +47,7 @@ #define NUM_PKT_BUF 64 #define NUM_BUFPOOL 32 #define MAX_EXP_BUFFS 256 -#define XGENE_ENET_MSS 1448 +#define NUM_MSS_REG 4 #define XGENE_MIN_ENET_FRAME_SIZE 60 #define XGENE_MAX_ENET_IRQ 16 @@ -143,7 +143,7 @@ struct xgene_mac_ops { void (*rx_disable)(struct xgene_enet_pdata *pdata); void (*set_speed)(struct xgene_enet_pdata *pdata); void (*set_mac_addr)(struct xgene_enet_pdata *pdata); - void (*set_mss)(struct xgene_enet_pdata *pdata); + void (*set_mss)(struct xgene_enet_pdata *pdata, u16 mss, u8 index); void (*link_state)(struct work_struct *work); }; @@ -212,7 +212,9 @@ struct xgene_enet_pdata { u8 eth_bufnum; u8 bp_bufnum; u16 ring_num; - u32 mss; + u32 mss[NUM_MSS_REG]; + u32 mss_refcnt[NUM_MSS_REG]; + spinlock_t mss_lock; /* mss lock */ u8 tx_delay; u8 rx_delay; bool mdio_driver; diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c index 279ee27004f7..6475f383ba83 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c @@ -232,9 +232,22 @@ static void xgene_xgmac_set_mac_addr(struct xgene_enet_pdata *pdata) xgene_enet_wr_mac(pdata, HSTMACADR_MSW_ADDR, addr1); } -static void xgene_xgmac_set_mss(struct xgene_enet_pdata *pdata) +static void xgene_xgmac_set_mss(struct xgene_enet_pdata *pdata, + u16 mss, u8 index) { - xgene_enet_wr_csr(pdata, XG_TSIF_MSS_REG0_ADDR, pdata->mss); + u8 offset; + u32 data; + + offset = (index < 2) ? 0 : 4; + xgene_enet_rd_csr(pdata, XG_TSIF_MSS_REG0_ADDR + offset, &data); + + if (!(index & 0x1)) + data = SET_VAL(TSO_MSS1, data >> TSO_MSS1_POS) | + SET_VAL(TSO_MSS0, mss); + else + data = SET_VAL(TSO_MSS1, mss) | SET_VAL(TSO_MSS0, data); + + xgene_enet_wr_csr(pdata, XG_TSIF_MSS_REG0_ADDR + offset, data); } static u32 xgene_enet_link_status(struct xgene_enet_pdata *pdata) @@ -258,7 +271,6 @@ static void xgene_xgmac_init(struct xgene_enet_pdata *pdata) xgene_enet_wr_mac(pdata, AXGMAC_CONFIG_1, data); xgene_xgmac_set_mac_addr(pdata); - xgene_xgmac_set_mss(pdata); xgene_enet_rd_csr(pdata, XG_RSIF_CONFIG_REG_ADDR, &data); data |= CFG_RSIF_FPBUFF_TIMEOUT_EN; From 4acfee8143b33efa8bda6f03fe1462d545ff8170 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 22 Sep 2016 16:49:21 -0400 Subject: [PATCH 1445/1715] net: dsa: add port STP state helper Add a void helper to set the STP state of a port, checking first if the required routine is provided by the driver. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- net/dsa/slave.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 9ecbe787f102..fd78d4c9b197 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -69,6 +69,12 @@ static inline bool dsa_port_is_bridged(struct dsa_slave_priv *p) return !!p->bridge_dev; } +static void dsa_port_set_stp_state(struct dsa_switch *ds, int port, u8 state) +{ + if (ds->ops->port_stp_state_set) + ds->ops->port_stp_state_set(ds, port, state); +} + static int dsa_slave_open(struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); @@ -104,8 +110,7 @@ static int dsa_slave_open(struct net_device *dev) goto clear_promisc; } - if (ds->ops->port_stp_state_set) - ds->ops->port_stp_state_set(ds, p->port, stp_state); + dsa_port_set_stp_state(ds, p->port, stp_state); if (p->phy) phy_start(p->phy); @@ -147,8 +152,7 @@ static int dsa_slave_close(struct net_device *dev) if (ds->ops->port_disable) ds->ops->port_disable(ds, p->port, p->phy); - if (ds->ops->port_stp_state_set) - ds->ops->port_stp_state_set(ds, p->port, BR_STATE_DISABLED); + dsa_port_set_stp_state(ds, p->port, BR_STATE_DISABLED); return 0; } @@ -354,7 +358,7 @@ static int dsa_slave_stp_state_set(struct net_device *dev, if (switchdev_trans_ph_prepare(trans)) return ds->ops->port_stp_state_set ? 0 : -EOPNOTSUPP; - ds->ops->port_stp_state_set(ds, p->port, attr->u.stp_state); + dsa_port_set_stp_state(ds, p->port, attr->u.stp_state); return 0; } @@ -556,8 +560,7 @@ static void dsa_slave_bridge_port_leave(struct net_device *dev) /* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer, * so allow it to be in BR_STATE_FORWARDING to be kept functional */ - if (ds->ops->port_stp_state_set) - ds->ops->port_stp_state_set(ds, p->port, BR_STATE_FORWARDING); + dsa_port_set_stp_state(ds, p->port, BR_STATE_FORWARDING); } static int dsa_slave_port_attr_get(struct net_device *dev, From 732f794c1baf58e1eb2be4431635829c3da655bd Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 22 Sep 2016 16:49:22 -0400 Subject: [PATCH 1446/1715] net: dsa: add port fast ageing Today the DSA drivers are in charge of flushing the MAC addresses associated to a port when its STP state changes from Learning or Forwarding, to Disabled or Blocking or Listening. This makes the drivers more complex and hides the generic switch logic. Introduce a new optional port_fast_age operation to dsa_switch_ops, to move this logic to the DSA layer and keep drivers simple. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- include/net/dsa.h | 2 ++ net/dsa/slave.c | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/include/net/dsa.h b/include/net/dsa.h index 7556646db2d3..b122196d5a1f 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -143,6 +143,7 @@ struct dsa_port { struct net_device *netdev; struct device_node *dn; unsigned int ageing_time; + u8 stp_state; }; struct dsa_switch { @@ -339,6 +340,7 @@ struct dsa_switch_ops { void (*port_bridge_leave)(struct dsa_switch *ds, int port); void (*port_stp_state_set)(struct dsa_switch *ds, int port, u8 state); + void (*port_fast_age)(struct dsa_switch *ds, int port); /* * VLAN support diff --git a/net/dsa/slave.c b/net/dsa/slave.c index fd78d4c9b197..6b1282c006b1 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -71,8 +71,26 @@ static inline bool dsa_port_is_bridged(struct dsa_slave_priv *p) static void dsa_port_set_stp_state(struct dsa_switch *ds, int port, u8 state) { + struct dsa_port *dp = &ds->ports[port]; + if (ds->ops->port_stp_state_set) ds->ops->port_stp_state_set(ds, port, state); + + if (ds->ops->port_fast_age) { + /* Fast age FDB entries or flush appropriate forwarding database + * for the given port, if we are moving it from Learning or + * Forwarding state, to Disabled or Blocking or Listening state. + */ + + if ((dp->stp_state == BR_STATE_LEARNING || + dp->stp_state == BR_STATE_FORWARDING) && + (state == BR_STATE_DISABLED || + state == BR_STATE_BLOCKING || + state == BR_STATE_LISTENING)) + ds->ops->port_fast_age(ds, port); + } + + dp->stp_state = state; } static int dsa_slave_open(struct net_device *dev) From 597698f1e00d37d40f83770ea166f3ca0dc1d68c Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 22 Sep 2016 16:49:23 -0400 Subject: [PATCH 1447/1715] net: dsa: b53: implement DSA port fast ageing Remove the fast ageing logic from b53_br_set_stp_state and implement the new DSA switch port_fast_age operation instead. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 31 +++++++++++-------------------- 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 1a492c053e27..7717b19dc806 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1402,16 +1402,12 @@ static void b53_br_leave(struct dsa_switch *ds, int port) } } -static void b53_br_set_stp_state(struct dsa_switch *ds, int port, - u8 state) +static void b53_br_set_stp_state(struct dsa_switch *ds, int port, u8 state) { struct b53_device *dev = ds->priv; - u8 hw_state, cur_hw_state; + u8 hw_state; u8 reg; - b53_read8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(port), ®); - cur_hw_state = reg & PORT_CTRL_STP_STATE_MASK; - switch (state) { case BR_STATE_DISABLED: hw_state = PORT_CTRL_DIS_STATE; @@ -1433,26 +1429,20 @@ static void b53_br_set_stp_state(struct dsa_switch *ds, int port, return; } - /* Fast-age ARL entries if we are moving a port from Learning or - * Forwarding (cur_hw_state) state to Disabled, Blocking or Listening - * state (hw_state) - */ - if (cur_hw_state != hw_state) { - if (cur_hw_state >= PORT_CTRL_LEARN_STATE && - hw_state <= PORT_CTRL_LISTEN_STATE) { - if (b53_fast_age_port(dev, port)) { - dev_err(ds->dev, "fast ageing failed\n"); - return; - } - } - } - b53_read8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(port), ®); reg &= ~PORT_CTRL_STP_STATE_MASK; reg |= hw_state; b53_write8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(port), reg); } +static void b53_br_fast_age(struct dsa_switch *ds, int port) +{ + struct b53_device *dev = ds->priv; + + if (b53_fast_age_port(dev, port)) + dev_err(ds->dev, "fast ageing failed\n"); +} + static enum dsa_tag_protocol b53_get_tag_protocol(struct dsa_switch *ds) { return DSA_TAG_PROTO_NONE; @@ -1472,6 +1462,7 @@ static struct dsa_switch_ops b53_switch_ops = { .port_bridge_join = b53_br_join, .port_bridge_leave = b53_br_leave, .port_stp_state_set = b53_br_set_stp_state, + .port_fast_age = b53_br_fast_age, .port_vlan_filtering = b53_vlan_filtering, .port_vlan_prepare = b53_vlan_prepare, .port_vlan_add = b53_vlan_add, From 749efcb8140e608dc2a63b6d61063b7cd3e556a5 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 22 Sep 2016 16:49:24 -0400 Subject: [PATCH 1448/1715] net: dsa: mv88e6xxx: implement DSA port fast ageing Now that the DSA layer handles port fast ageing on correct STP change, simplify _mv88e6xxx_port_state and implement mv88e6xxx_port_fast_age. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 45 ++++++++++++++++---------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 25bd3fa744d9..122876cb926c 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -1133,31 +1133,18 @@ static int _mv88e6xxx_port_state(struct mv88e6xxx_chip *chip, int port, oldstate = reg & PORT_CONTROL_STATE_MASK; - if (oldstate != state) { - /* Flush forwarding database if we're moving a port - * from Learning or Forwarding state to Disabled or - * Blocking or Listening state. - */ - if ((oldstate == PORT_CONTROL_STATE_LEARNING || - oldstate == PORT_CONTROL_STATE_FORWARDING) && - (state == PORT_CONTROL_STATE_DISABLED || - state == PORT_CONTROL_STATE_BLOCKING)) { - err = _mv88e6xxx_atu_remove(chip, 0, port, false); - if (err) - return err; - } + reg &= ~PORT_CONTROL_STATE_MASK; + reg |= state; - reg = (reg & ~PORT_CONTROL_STATE_MASK) | state; - err = mv88e6xxx_port_write(chip, port, PORT_CONTROL, reg); - if (err) - return err; + err = mv88e6xxx_port_write(chip, port, PORT_CONTROL, reg); + if (err) + return err; - netdev_dbg(ds->ports[port].netdev, "PortState %s (was %s)\n", - mv88e6xxx_port_state_names[state], - mv88e6xxx_port_state_names[oldstate]); - } + netdev_dbg(ds->ports[port].netdev, "PortState %s (was %s)\n", + mv88e6xxx_port_state_names[state], + mv88e6xxx_port_state_names[oldstate]); - return err; + return 0; } static int _mv88e6xxx_port_based_vlan_map(struct mv88e6xxx_chip *chip, int port) @@ -1232,6 +1219,19 @@ static void mv88e6xxx_port_stp_state_set(struct dsa_switch *ds, int port, mv88e6xxx_port_state_names[stp_state]); } +static void mv88e6xxx_port_fast_age(struct dsa_switch *ds, int port) +{ + struct mv88e6xxx_chip *chip = ds->priv; + int err; + + mutex_lock(&chip->reg_lock); + err = _mv88e6xxx_atu_remove(chip, 0, port, false); + mutex_unlock(&chip->reg_lock); + + if (err) + netdev_err(ds->ports[port].netdev, "failed to flush ATU\n"); +} + static int _mv88e6xxx_port_pvid(struct mv88e6xxx_chip *chip, int port, u16 *new, u16 *old) { @@ -3684,6 +3684,7 @@ static struct dsa_switch_ops mv88e6xxx_switch_ops = { .port_bridge_join = mv88e6xxx_port_bridge_join, .port_bridge_leave = mv88e6xxx_port_bridge_leave, .port_stp_state_set = mv88e6xxx_port_stp_state_set, + .port_fast_age = mv88e6xxx_port_fast_age, .port_vlan_filtering = mv88e6xxx_port_vlan_filtering, .port_vlan_prepare = mv88e6xxx_port_vlan_prepare, .port_vlan_add = mv88e6xxx_port_vlan_add, From 17db4bcef3c3c45b95b3b3d8577f725df1b2c0a0 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 22 Sep 2016 16:56:29 -0700 Subject: [PATCH 1449/1715] hv_netvsc: use consume_skb Packets that are transmitted in normal path should use consume_skb instead of kfree_skb. This allows for better tracing of packet drops. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index ff05b9b0837f..720b5fa9e625 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -635,7 +635,7 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device, q_idx = nvsc_packet->q_idx; channel = incoming_channel; - dev_kfree_skb_any(skb); + dev_consume_skb_any(skb); } num_outstanding_sends = @@ -944,7 +944,7 @@ int netvsc_send(struct hv_device *device, } if (msdp->skb) - dev_kfree_skb_any(msdp->skb); + dev_consume_skb_any(msdp->skb); if (xmit_more && !packet->cp_partial) { msdp->skb = skb; From 07d0f0008c783d2a2fce8497000938db15fd7aa1 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 22 Sep 2016 16:56:30 -0700 Subject: [PATCH 1450/1715] hv_netvsc: dev hold/put reference to VF The netvsc driver holds a pointer to the virtual function network device if managing SR-IOV association. In order to ensure that the VF network device does not disappear, it should be using dev_hold/dev_put to get a reference count. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 2360e704e271..e74dbcc2916d 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1262,6 +1262,8 @@ static int netvsc_register_vf(struct net_device *vf_netdev) * Take a reference on the module. */ try_module_get(THIS_MODULE); + + dev_hold(vf_netdev); net_device_ctx->vf_netdev = vf_netdev; return NOTIFY_OK; } @@ -1376,6 +1378,7 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev) netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name); netvsc_inject_disable(net_device_ctx); net_device_ctx->vf_netdev = NULL; + dev_put(vf_netdev); module_put(THIS_MODULE); return NOTIFY_OK; } From ee837a137304290a1ae26980c73a367f7afef54f Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 22 Sep 2016 16:56:31 -0700 Subject: [PATCH 1451/1715] hv_netvsc: simplify callback event code The callback handler for netlink events can be simplified: * Consolidate check for netlink callback events about this driver itself. * Ignore non-Ethernet devices. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index e74dbcc2916d..849b566ad7bf 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1238,10 +1238,6 @@ static int netvsc_register_vf(struct net_device *vf_netdev) struct net_device *ndev; struct net_device_context *net_device_ctx; struct netvsc_device *netvsc_dev; - const struct ethtool_ops *eth_ops = vf_netdev->ethtool_ops; - - if (eth_ops == NULL || eth_ops == ðtool_ops) - return NOTIFY_DONE; /* * We will use the MAC address to locate the synthetic interface to @@ -1286,12 +1282,8 @@ static int netvsc_vf_up(struct net_device *vf_netdev) { struct net_device *ndev; struct netvsc_device *netvsc_dev; - const struct ethtool_ops *eth_ops = vf_netdev->ethtool_ops; struct net_device_context *net_device_ctx; - if (eth_ops == ðtool_ops) - return NOTIFY_DONE; - ndev = get_netvsc_net_device(vf_netdev->dev_addr); if (!ndev) return NOTIFY_DONE; @@ -1329,10 +1321,6 @@ static int netvsc_vf_down(struct net_device *vf_netdev) struct net_device *ndev; struct netvsc_device *netvsc_dev; struct net_device_context *net_device_ctx; - const struct ethtool_ops *eth_ops = vf_netdev->ethtool_ops; - - if (eth_ops == ðtool_ops) - return NOTIFY_DONE; ndev = get_netvsc_net_device(vf_netdev->dev_addr); if (!ndev) @@ -1361,12 +1349,8 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev) { struct net_device *ndev; struct netvsc_device *netvsc_dev; - const struct ethtool_ops *eth_ops = vf_netdev->ethtool_ops; struct net_device_context *net_device_ctx; - if (eth_ops == ðtool_ops) - return NOTIFY_DONE; - ndev = get_netvsc_net_device(vf_netdev->dev_addr); if (!ndev) return NOTIFY_DONE; @@ -1542,13 +1526,21 @@ static int netvsc_netdev_event(struct notifier_block *this, { struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); + /* Skip our own events */ + if (event_dev->netdev_ops == &device_ops) + return NOTIFY_DONE; + + /* Avoid non-Ethernet type devices */ + if (event_dev->type != ARPHRD_ETHER) + return NOTIFY_DONE; + /* Avoid Vlan dev with same MAC registering as VF */ if (event_dev->priv_flags & IFF_802_1Q_VLAN) return NOTIFY_DONE; /* Avoid Bonding master dev with same MAC registering as VF */ - if (event_dev->priv_flags & IFF_BONDING && - event_dev->flags & IFF_MASTER) + if ((event_dev->priv_flags & IFF_BONDING) && + (event_dev->flags & IFF_MASTER)) return NOTIFY_DONE; switch (event) { From e8ff40d4bff1f3b6a588e29ed1fbdfd943642856 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 22 Sep 2016 16:56:32 -0700 Subject: [PATCH 1452/1715] hv_netvsc: improve VF device matching The code to associate netvsc and VF devices can be made less error prone by using a better matching algorithms. On registration, use the permanent address which avoids any possible issues caused by device MAC address being changed. For all other callbacks, search by the netdevice pointer value to ensure getting the correct network device. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 60 +++++++++++++++++++++------------ 1 file changed, 39 insertions(+), 21 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 849b566ad7bf..87682198ff73 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1215,22 +1215,44 @@ static void netvsc_free_netdev(struct net_device *netdev) free_netdev(netdev); } -static struct net_device *get_netvsc_net_device(char *mac) +static struct net_device *get_netvsc_bymac(const u8 *mac) { - struct net_device *dev, *found = NULL; + struct net_device *dev; ASSERT_RTNL(); for_each_netdev(&init_net, dev) { - if (memcmp(dev->dev_addr, mac, ETH_ALEN) == 0) { - if (dev->netdev_ops != &device_ops) - continue; - found = dev; - break; - } + if (dev->netdev_ops != &device_ops) + continue; /* not a netvsc device */ + + if (ether_addr_equal(mac, dev->perm_addr)) + return dev; } - return found; + return NULL; +} + +static struct net_device *get_netvsc_byref(const struct net_device *vf_netdev) +{ + struct net_device *dev; + + ASSERT_RTNL(); + + for_each_netdev(&init_net, dev) { + struct net_device_context *net_device_ctx; + + if (dev->netdev_ops != &device_ops) + continue; /* not a netvsc device */ + + net_device_ctx = netdev_priv(dev); + if (net_device_ctx->nvdev == NULL) + continue; /* device is removed */ + + if (net_device_ctx->vf_netdev == vf_netdev) + return dev; /* a match */ + } + + return NULL; } static int netvsc_register_vf(struct net_device *vf_netdev) @@ -1239,12 +1261,15 @@ static int netvsc_register_vf(struct net_device *vf_netdev) struct net_device_context *net_device_ctx; struct netvsc_device *netvsc_dev; + if (vf_netdev->addr_len != ETH_ALEN) + return NOTIFY_DONE; + /* * We will use the MAC address to locate the synthetic interface to * associate with the VF interface. If we don't find a matching * synthetic interface, move on. */ - ndev = get_netvsc_net_device(vf_netdev->dev_addr); + ndev = get_netvsc_bymac(vf_netdev->perm_addr); if (!ndev) return NOTIFY_DONE; @@ -1284,16 +1309,13 @@ static int netvsc_vf_up(struct net_device *vf_netdev) struct netvsc_device *netvsc_dev; struct net_device_context *net_device_ctx; - ndev = get_netvsc_net_device(vf_netdev->dev_addr); + ndev = get_netvsc_byref(vf_netdev); if (!ndev) return NOTIFY_DONE; net_device_ctx = netdev_priv(ndev); netvsc_dev = net_device_ctx->nvdev; - if (!netvsc_dev || !net_device_ctx->vf_netdev) - return NOTIFY_DONE; - netdev_info(ndev, "VF up: %s\n", vf_netdev->name); netvsc_inject_enable(net_device_ctx); @@ -1322,16 +1344,13 @@ static int netvsc_vf_down(struct net_device *vf_netdev) struct netvsc_device *netvsc_dev; struct net_device_context *net_device_ctx; - ndev = get_netvsc_net_device(vf_netdev->dev_addr); + ndev = get_netvsc_byref(vf_netdev); if (!ndev) return NOTIFY_DONE; net_device_ctx = netdev_priv(ndev); netvsc_dev = net_device_ctx->nvdev; - if (!netvsc_dev || !net_device_ctx->vf_netdev) - return NOTIFY_DONE; - netdev_info(ndev, "VF down: %s\n", vf_netdev->name); netvsc_inject_disable(net_device_ctx); netvsc_switch_datapath(ndev, false); @@ -1351,14 +1370,13 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev) struct netvsc_device *netvsc_dev; struct net_device_context *net_device_ctx; - ndev = get_netvsc_net_device(vf_netdev->dev_addr); + ndev = get_netvsc_byref(vf_netdev); if (!ndev) return NOTIFY_DONE; net_device_ctx = netdev_priv(ndev); netvsc_dev = net_device_ctx->nvdev; - if (!netvsc_dev || !net_device_ctx->vf_netdev) - return NOTIFY_DONE; + netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name); netvsc_inject_disable(net_device_ctx); net_device_ctx->vf_netdev = NULL; From f207c10d982388fa42710922ad1c0c9d3ba9a87b Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 22 Sep 2016 16:56:33 -0700 Subject: [PATCH 1453/1715] hv_netvsc: use RCU to protect vf_netdev The vf_netdev pointer in the netvsc device context can simply be protected by RCU because network device destruction is already RCU synchronized. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 2 +- drivers/net/hyperv/netvsc_drv.c | 29 +++++++++++++++-------------- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 284b97b6b258..6b7948764443 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -695,7 +695,7 @@ struct net_device_context { bool start_remove; /* State to manage the associated VF interface. */ - struct net_device *vf_netdev; + struct net_device __rcu *vf_netdev; bool vf_inject; atomic_t vf_use_cnt; /* 1: allocated, serial number is valid. 0: not allocated */ diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 87682198ff73..dde17c0faf9f 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -667,8 +667,8 @@ int netvsc_recv_callback(struct hv_device *device_obj, { struct net_device *net = hv_get_drvdata(device_obj); struct net_device_context *net_device_ctx = netdev_priv(net); + struct net_device *vf_netdev; struct sk_buff *skb; - struct sk_buff *vf_skb; struct netvsc_stats *rx_stats; u32 bytes_recvd = packet->total_data_buflen; int ret = 0; @@ -676,9 +676,12 @@ int netvsc_recv_callback(struct hv_device *device_obj, if (!net || net->reg_state != NETREG_REGISTERED) return NVSP_STAT_FAIL; - if (READ_ONCE(net_device_ctx->vf_inject)) { + vf_netdev = rcu_dereference(net_device_ctx->vf_netdev); + if (vf_netdev) { + struct sk_buff *vf_skb; + atomic_inc(&net_device_ctx->vf_use_cnt); - if (!READ_ONCE(net_device_ctx->vf_inject)) { + if (!net_device_ctx->vf_inject) { /* * We raced; just move on. */ @@ -694,13 +697,12 @@ int netvsc_recv_callback(struct hv_device *device_obj, * the host). Deliver these via the VF interface * in the guest. */ - vf_skb = netvsc_alloc_recv_skb(net_device_ctx->vf_netdev, + vf_skb = netvsc_alloc_recv_skb(vf_netdev, packet, csum_info, *data, vlan_tci); if (vf_skb != NULL) { - ++net_device_ctx->vf_netdev->stats.rx_packets; - net_device_ctx->vf_netdev->stats.rx_bytes += - bytes_recvd; + ++vf_netdev->stats.rx_packets; + vf_netdev->stats.rx_bytes += bytes_recvd; netif_receive_skb(vf_skb); } else { ++net->stats.rx_dropped; @@ -1232,7 +1234,7 @@ static struct net_device *get_netvsc_bymac(const u8 *mac) return NULL; } -static struct net_device *get_netvsc_byref(const struct net_device *vf_netdev) +static struct net_device *get_netvsc_byref(struct net_device *vf_netdev) { struct net_device *dev; @@ -1248,7 +1250,7 @@ static struct net_device *get_netvsc_byref(const struct net_device *vf_netdev) if (net_device_ctx->nvdev == NULL) continue; /* device is removed */ - if (net_device_ctx->vf_netdev == vf_netdev) + if (rtnl_dereference(net_device_ctx->vf_netdev) == vf_netdev) return dev; /* a match */ } @@ -1275,7 +1277,7 @@ static int netvsc_register_vf(struct net_device *vf_netdev) net_device_ctx = netdev_priv(ndev); netvsc_dev = net_device_ctx->nvdev; - if (!netvsc_dev || net_device_ctx->vf_netdev) + if (!netvsc_dev || rtnl_dereference(net_device_ctx->vf_netdev)) return NOTIFY_DONE; netdev_info(ndev, "VF registering: %s\n", vf_netdev->name); @@ -1285,7 +1287,7 @@ static int netvsc_register_vf(struct net_device *vf_netdev) try_module_get(THIS_MODULE); dev_hold(vf_netdev); - net_device_ctx->vf_netdev = vf_netdev; + rcu_assign_pointer(net_device_ctx->vf_netdev, vf_netdev); return NOTIFY_OK; } @@ -1379,7 +1381,8 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev) netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name); netvsc_inject_disable(net_device_ctx); - net_device_ctx->vf_netdev = NULL; + + RCU_INIT_POINTER(net_device_ctx->vf_netdev, NULL); dev_put(vf_netdev); module_put(THIS_MODULE); return NOTIFY_OK; @@ -1433,8 +1436,6 @@ static int netvsc_probe(struct hv_device *dev, INIT_LIST_HEAD(&net_device_ctx->reconfig_events); atomic_set(&net_device_ctx->vf_use_cnt, 0); - net_device_ctx->vf_netdev = NULL; - net_device_ctx->vf_inject = false; net->netdev_ops = &device_ops; From 9cbcc4280645f0e7e19e6a0da443ec7e69cecf40 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 22 Sep 2016 16:56:34 -0700 Subject: [PATCH 1454/1715] hv_netvsc: remove VF in flight counters Since VF reference is now protected by RCU, no longer need the VF usage counter and can use device flags to see whether to inject or not. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 3 +- drivers/net/hyperv/netvsc_drv.c | 81 ++++++++------------------------- 2 files changed, 21 insertions(+), 63 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 6b7948764443..1d4974026eff 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -696,8 +696,7 @@ struct net_device_context { /* State to manage the associated VF interface. */ struct net_device __rcu *vf_netdev; - bool vf_inject; - atomic_t vf_use_cnt; + /* 1: allocated, serial number is valid. 0: not allocated */ u32 vf_alloc; /* Serial number of the VF to team with */ diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index dde17c0faf9f..9375d82702ce 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -670,50 +670,20 @@ int netvsc_recv_callback(struct hv_device *device_obj, struct net_device *vf_netdev; struct sk_buff *skb; struct netvsc_stats *rx_stats; - u32 bytes_recvd = packet->total_data_buflen; - int ret = 0; - if (!net || net->reg_state != NETREG_REGISTERED) + if (net->reg_state != NETREG_REGISTERED) return NVSP_STAT_FAIL; + /* + * If necessary, inject this packet into the VF interface. + * On Hyper-V, multicast and brodcast packets are only delivered + * to the synthetic interface (after subjecting these to + * policy filters on the host). Deliver these via the VF + * interface in the guest. + */ vf_netdev = rcu_dereference(net_device_ctx->vf_netdev); - if (vf_netdev) { - struct sk_buff *vf_skb; - - atomic_inc(&net_device_ctx->vf_use_cnt); - if (!net_device_ctx->vf_inject) { - /* - * We raced; just move on. - */ - atomic_dec(&net_device_ctx->vf_use_cnt); - goto vf_injection_done; - } - - /* - * Inject this packet into the VF inerface. - * On Hyper-V, multicast and brodcast packets - * are only delivered on the synthetic interface - * (after subjecting these to policy filters on - * the host). Deliver these via the VF interface - * in the guest. - */ - vf_skb = netvsc_alloc_recv_skb(vf_netdev, - packet, csum_info, *data, - vlan_tci); - if (vf_skb != NULL) { - ++vf_netdev->stats.rx_packets; - vf_netdev->stats.rx_bytes += bytes_recvd; - netif_receive_skb(vf_skb); - } else { - ++net->stats.rx_dropped; - ret = NVSP_STAT_FAIL; - } - atomic_dec(&net_device_ctx->vf_use_cnt); - return ret; - } - -vf_injection_done: - rx_stats = this_cpu_ptr(net_device_ctx->rx_stats); + if (vf_netdev && (vf_netdev->flags & IFF_UP)) + net = vf_netdev; /* Allocate a skb - TODO direct I/O to pages? */ skb = netvsc_alloc_recv_skb(net, packet, csum_info, *data, vlan_tci); @@ -721,9 +691,17 @@ vf_injection_done: ++net->stats.rx_dropped; return NVSP_STAT_FAIL; } - skb_record_rx_queue(skb, channel-> - offermsg.offer.sub_channel_index); + if (net != vf_netdev) + skb_record_rx_queue(skb, + channel->offermsg.offer.sub_channel_index); + + /* + * Even if injecting the packet, record the statistics + * on the synthetic device because modifying the VF device + * statistics will not work correctly. + */ + rx_stats = this_cpu_ptr(net_device_ctx->rx_stats); u64_stats_update_begin(&rx_stats->syncp); rx_stats->packets++; rx_stats->bytes += packet->total_data_buflen; @@ -1291,20 +1269,6 @@ static int netvsc_register_vf(struct net_device *vf_netdev) return NOTIFY_OK; } -static void netvsc_inject_enable(struct net_device_context *net_device_ctx) -{ - net_device_ctx->vf_inject = true; -} - -static void netvsc_inject_disable(struct net_device_context *net_device_ctx) -{ - net_device_ctx->vf_inject = false; - - /* Wait for currently active users to drain out. */ - while (atomic_read(&net_device_ctx->vf_use_cnt) != 0) - udelay(50); -} - static int netvsc_vf_up(struct net_device *vf_netdev) { struct net_device *ndev; @@ -1319,7 +1283,6 @@ static int netvsc_vf_up(struct net_device *vf_netdev) netvsc_dev = net_device_ctx->nvdev; netdev_info(ndev, "VF up: %s\n", vf_netdev->name); - netvsc_inject_enable(net_device_ctx); /* * Open the device before switching data path. @@ -1354,7 +1317,6 @@ static int netvsc_vf_down(struct net_device *vf_netdev) netvsc_dev = net_device_ctx->nvdev; netdev_info(ndev, "VF down: %s\n", vf_netdev->name); - netvsc_inject_disable(net_device_ctx); netvsc_switch_datapath(ndev, false); netdev_info(ndev, "Data path switched from VF: %s\n", vf_netdev->name); rndis_filter_close(netvsc_dev); @@ -1380,7 +1342,6 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev) netvsc_dev = net_device_ctx->nvdev; netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name); - netvsc_inject_disable(net_device_ctx); RCU_INIT_POINTER(net_device_ctx->vf_netdev, NULL); dev_put(vf_netdev); @@ -1435,8 +1396,6 @@ static int netvsc_probe(struct hv_device *dev, spin_lock_init(&net_device_ctx->lock); INIT_LIST_HEAD(&net_device_ctx->reconfig_events); - atomic_set(&net_device_ctx->vf_use_cnt, 0); - net->netdev_ops = &device_ops; net->hw_features = NETVSC_HW_FEATURES; From f7ad75b753f386454f50044fd69edad767b69ce8 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 22 Sep 2016 16:56:35 -0700 Subject: [PATCH 1455/1715] hv_netvsc: count multicast packets received Useful for debugging issues with multicast and SR-IOV to keep track of number of received multicast packets. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 2 ++ drivers/net/hyperv/netvsc_drv.c | 9 ++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 1d4974026eff..7130bf910f52 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -649,6 +649,8 @@ struct multi_recv_comp { struct netvsc_stats { u64 packets; u64 bytes; + u64 broadcast; + u64 multicast; struct u64_stats_sync syncp; }; diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 9375d82702ce..52eeb2f67276 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -705,6 +705,11 @@ int netvsc_recv_callback(struct hv_device *device_obj, u64_stats_update_begin(&rx_stats->syncp); rx_stats->packets++; rx_stats->bytes += packet->total_data_buflen; + + if (skb->pkt_type == PACKET_BROADCAST) + ++rx_stats->broadcast; + else if (skb->pkt_type == PACKET_MULTICAST) + ++rx_stats->multicast; u64_stats_update_end(&rx_stats->syncp); /* @@ -947,7 +952,7 @@ static struct rtnl_link_stats64 *netvsc_get_stats64(struct net_device *net, cpu); struct netvsc_stats *rx_stats = per_cpu_ptr(ndev_ctx->rx_stats, cpu); - u64 tx_packets, tx_bytes, rx_packets, rx_bytes; + u64 tx_packets, tx_bytes, rx_packets, rx_bytes, rx_multicast; unsigned int start; do { @@ -960,12 +965,14 @@ static struct rtnl_link_stats64 *netvsc_get_stats64(struct net_device *net, start = u64_stats_fetch_begin_irq(&rx_stats->syncp); rx_packets = rx_stats->packets; rx_bytes = rx_stats->bytes; + rx_multicast = rx_stats->multicast + rx_stats->broadcast; } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start)); t->tx_bytes += tx_bytes; t->tx_packets += tx_packets; t->rx_bytes += rx_bytes; t->rx_packets += rx_packets; + t->multicast += rx_multicast; } t->tx_dropped = net->stats.tx_dropped; From 2d48c5f9335e48ddac7a52db10bf3bfd01986b9c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 23 Sep 2016 01:28:35 +0200 Subject: [PATCH 1456/1715] bpf: use skb_to_full_sk helper in bpf_skb_under_cgroup We need to use skb_to_full_sk() helper introduced in commit bd5eb35f16a9 ("xfrm: take care of request sockets") as otherwise we miss tcp synack messages, since ownership is on request socket and therefore it would miss the sk_fullsock() check. Use skb_to_full_sk() as also done similarly in the bpf_get_cgroup_classid() helper via 2309236c13fe ("cls_cgroup: get sk_classid only from full sockets") fix to not let this fall through. Fixes: 4a482f34afcc ("cgroup: bpf: Add bpf_skb_in_cgroup_proto") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/core/filter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/filter.c b/net/core/filter.c index 0920c2ac1d00..e5d997759d5e 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2408,7 +2408,7 @@ BPF_CALL_3(bpf_skb_under_cgroup, struct sk_buff *, skb, struct bpf_map *, map, struct cgroup *cgrp; struct sock *sk; - sk = skb->sk; + sk = skb_to_full_sk(skb); if (!sk || !sk_fullsock(sk)) return -ENOENT; if (unlikely(idx >= array->map.max_entries)) From 669dc4d76d0ecc2d795df735839f43cfddf9f617 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 23 Sep 2016 01:28:36 +0200 Subject: [PATCH 1457/1715] bpf: use bpf_get_smp_processor_id_proto instead of raw one Same motivation as in commit 80b48c445797 ("bpf: don't use raw processor id in generic helper"), but this time for XDP typed programs. Thus, allow for preemption checks when we have DEBUG_PREEMPT enabled, and otherwise use the raw variant. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/core/filter.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/filter.c b/net/core/filter.c index e5d997759d5e..acf84fbfb043 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2551,6 +2551,8 @@ xdp_func_proto(enum bpf_func_id func_id) switch (func_id) { case BPF_FUNC_perf_event_output: return &bpf_xdp_event_output_proto; + case BPF_FUNC_get_smp_processor_id: + return &bpf_get_smp_processor_id_proto; default: return sk_filter_func_proto(func_id); } From 7a4b28c6cc9ffac50f791b99cc7e46106436e5d8 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 23 Sep 2016 01:28:37 +0200 Subject: [PATCH 1458/1715] bpf: add helper to invalidate hash Add a small helper that complements 36bbef52c7eb ("bpf: direct packet write and access for helpers for clsact progs") for invalidating the current skb->hash after mangling on headers via direct packet write. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 7 +++++++ net/core/filter.c | 18 ++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e07432b9f8b8..f09c70b97eca 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -419,6 +419,13 @@ enum bpf_func_id { */ BPF_FUNC_csum_update, + /** + * bpf_set_hash_invalid(skb) + * Invalidate current skb>hash. + * @skb: pointer to skb + */ + BPF_FUNC_set_hash_invalid, + __BPF_FUNC_MAX_ID, }; diff --git a/net/core/filter.c b/net/core/filter.c index acf84fbfb043..00351cdf7d0c 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1777,6 +1777,22 @@ static const struct bpf_func_proto bpf_get_hash_recalc_proto = { .arg1_type = ARG_PTR_TO_CTX, }; +BPF_CALL_1(bpf_set_hash_invalid, struct sk_buff *, skb) +{ + /* After all direct packet write, this can be used once for + * triggering a lazy recalc on next skb_get_hash() invocation. + */ + skb_clear_hash(skb); + return 0; +} + +static const struct bpf_func_proto bpf_set_hash_invalid_proto = { + .func = bpf_set_hash_invalid, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +}; + BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto, u16, vlan_tci) { @@ -2534,6 +2550,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) return &bpf_get_route_realm_proto; case BPF_FUNC_get_hash_recalc: return &bpf_get_hash_recalc_proto; + case BPF_FUNC_set_hash_invalid: + return &bpf_set_hash_invalid_proto; case BPF_FUNC_perf_event_output: return &bpf_skb_event_output_proto; case BPF_FUNC_get_smp_processor_id: From 98dafac5697fbe1fb4bef9e3204baf9051641b00 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Sep 2016 14:04:38 +0100 Subject: [PATCH 1459/1715] rxrpc: Use before_eq() and friends to compare serial numbers before_eq() and friends should be used to compare serial numbers (when not checking for (non)equality) rather than casting to int, subtracting and checking the result. Signed-off-by: David Howells --- net/rxrpc/input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index cbb5d53f09d7..06027b6d9c19 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -578,7 +578,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, } /* Discard any out-of-order or duplicate ACKs. */ - if ((int)sp->hdr.serial - (int)call->acks_latest <= 0) { + if (before_eq(sp->hdr.serial, call->acks_latest)) { _debug("discard ACK %d <= %d", sp->hdr.serial, call->acks_latest); return; From dfc3da4404ad1ec42a0a649a4ffa2b0f37e80352 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Sep 2016 12:39:23 +0100 Subject: [PATCH 1460/1715] rxrpc: Need to start the resend timer on initial transmission When a DATA packet has its initial transmission, we may need to start or adjust the resend timer. Without this we end up relying on being sent a NACK to initiate the resend. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 1 + net/rxrpc/call_event.c | 2 +- net/rxrpc/sendmsg.c | 9 +++++++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 808ab750dc6b..9e3ba4dc9578 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -704,6 +704,7 @@ int rxrpc_reject_call(struct rxrpc_sock *); /* * call_event.c */ +void rxrpc_set_timer(struct rxrpc_call *); void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool); void rxrpc_process_call(struct work_struct *); diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index a2909da5d581..3a7f90a2659c 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -24,7 +24,7 @@ /* * Set the timer */ -static void rxrpc_set_timer(struct rxrpc_call *call) +void rxrpc_set_timer(struct rxrpc_call *call) { unsigned long t, now = jiffies; diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index ca3811bfbd17..7cb34b2dfba9 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -146,6 +146,15 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, if (ret < 0) { _debug("need instant resend %d", ret); rxrpc_instant_resend(call, ix); + } else { + unsigned long resend_at; + + resend_at = jiffies + msecs_to_jiffies(rxrpc_resend_timeout); + + if (time_before(resend_at, call->resend_at)) { + call->resend_at = resend_at; + rxrpc_set_timer(call); + } } rxrpc_free_skb(skb, rxrpc_skb_tx_freed); From fb2a3d5c7c85cb6e8bc88192be919b4ef8d6e630 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 23 Sep 2016 09:09:31 -0400 Subject: [PATCH 1461/1715] Revert "xen-netback: create a debugfs node for hash information" This reverts commit c0c64c152389ad73306b9b0796357210ec6d32ee. There is no vif->ctrl_task member, so this change broke the build. Signed-off-by: David S. Miller --- drivers/net/xen-netback/common.h | 4 -- drivers/net/xen-netback/hash.c | 68 -------------------------------- drivers/net/xen-netback/xenbus.c | 37 +---------------- 3 files changed, 2 insertions(+), 107 deletions(-) diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index ff94c513fe20..b38fb2cf3364 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -410,8 +410,4 @@ u32 xenvif_set_hash_mapping(struct xenvif *vif, u32 gref, u32 len, void xenvif_set_skb_hash(struct xenvif *vif, struct sk_buff *skb); -#ifdef CONFIG_DEBUG_FS -void xenvif_dump_hash_info(struct xenvif *vif, struct seq_file *m); -#endif - #endif /* __XEN_NETBACK__COMMON_H__ */ diff --git a/drivers/net/xen-netback/hash.c b/drivers/net/xen-netback/hash.c index e8c5dddc54ba..613bac057650 100644 --- a/drivers/net/xen-netback/hash.c +++ b/drivers/net/xen-netback/hash.c @@ -360,74 +360,6 @@ u32 xenvif_set_hash_mapping(struct xenvif *vif, u32 gref, u32 len, return XEN_NETIF_CTRL_STATUS_SUCCESS; } -#ifdef CONFIG_DEBUG_FS -void xenvif_dump_hash_info(struct xenvif *vif, struct seq_file *m) -{ - unsigned int i; - - switch (vif->hash.alg) { - case XEN_NETIF_CTRL_HASH_ALGORITHM_TOEPLITZ: - seq_puts(m, "Hash Algorithm: TOEPLITZ\n"); - break; - - case XEN_NETIF_CTRL_HASH_ALGORITHM_NONE: - seq_puts(m, "Hash Algorithm: NONE\n"); - /* FALLTHRU */ - default: - return; - } - - if (vif->hash.flags) { - seq_puts(m, "\nHash Flags:\n"); - - if (vif->hash.flags & XEN_NETIF_CTRL_HASH_TYPE_IPV4) - seq_puts(m, "- IPv4\n"); - if (vif->hash.flags & XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP) - seq_puts(m, "- IPv4 + TCP\n"); - if (vif->hash.flags & XEN_NETIF_CTRL_HASH_TYPE_IPV6) - seq_puts(m, "- IPv6\n"); - if (vif->hash.flags & XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP) - seq_puts(m, "- IPv6 + TCP\n"); - } - - seq_puts(m, "\nHash Key:\n"); - - for (i = 0; i < XEN_NETBK_MAX_HASH_KEY_SIZE; ) { - unsigned int j, n; - - n = 8; - if (i + n >= XEN_NETBK_MAX_HASH_KEY_SIZE) - n = XEN_NETBK_MAX_HASH_KEY_SIZE - i; - - seq_printf(m, "[%2u - %2u]: ", i, i + n - 1); - - for (j = 0; j < n; j++, i++) - seq_printf(m, "%02x ", vif->hash.key[i]); - - seq_puts(m, "\n"); - } - - if (vif->hash.size != 0) { - seq_puts(m, "\nHash Mapping:\n"); - - for (i = 0; i < vif->hash.size; ) { - unsigned int j, n; - - n = 8; - if (i + n >= vif->hash.size) - n = vif->hash.size - i; - - seq_printf(m, "[%4u - %4u]: ", i, i + n - 1); - - for (j = 0; j < n; j++, i++) - seq_printf(m, "%4u ", vif->hash.mapping[i]); - - seq_puts(m, "\n"); - } - } -} -#endif /* CONFIG_DEBUG_FS */ - void xenvif_init_hash(struct xenvif *vif) { if (xenvif_hash_cache_size == 0) diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index 9911b4e61c0f..daf4c7867102 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -165,7 +165,7 @@ xenvif_write_io_ring(struct file *filp, const char __user *buf, size_t count, return count; } -static int xenvif_io_ring_open(struct inode *inode, struct file *filp) +static int xenvif_dump_open(struct inode *inode, struct file *filp) { int ret; void *queue = NULL; @@ -179,35 +179,13 @@ static int xenvif_io_ring_open(struct inode *inode, struct file *filp) static const struct file_operations xenvif_dbg_io_ring_ops_fops = { .owner = THIS_MODULE, - .open = xenvif_io_ring_open, + .open = xenvif_dump_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, .write = xenvif_write_io_ring, }; -static int xenvif_read_ctrl(struct seq_file *m, void *v) -{ - struct xenvif *vif = m->private; - - xenvif_dump_hash_info(vif, m); - - return 0; -} - -static int xenvif_ctrl_open(struct inode *inode, struct file *filp) -{ - return single_open(filp, xenvif_read_ctrl, inode->i_private); -} - -static const struct file_operations xenvif_dbg_ctrl_ops_fops = { - .owner = THIS_MODULE, - .open = xenvif_ctrl_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static void xenvif_debugfs_addif(struct xenvif *vif) { struct dentry *pfile; @@ -232,17 +210,6 @@ static void xenvif_debugfs_addif(struct xenvif *vif) pr_warn("Creation of io_ring file returned %ld!\n", PTR_ERR(pfile)); } - - if (vif->ctrl_task) { - pfile = debugfs_create_file("ctrl", - S_IRUSR, - vif->xenvif_dbg_root, - vif, - &xenvif_dbg_ctrl_ops_fops); - if (IS_ERR_OR_NULL(pfile)) - pr_warn("Creation of ctrl file returned %ld!\n", - PTR_ERR(pfile)); - } } else netdev_warn(vif->dev, "Creation of vif debugfs dir returned %ld!\n", From be8aa3380678183821bd7d7b5dec845f10d776ce Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Sep 2016 12:39:23 +0100 Subject: [PATCH 1462/1715] rxrpc: Fix accidental cancellation of scheduled resend by ACK parser When rxrpc_input_soft_acks() is parsing the soft-ACKs from an ACK packet, it updates the Tx packet annotations in the annotation buffer. If a soft-ACK is an ACK, then we overwrite unack'd, nak'd or to-be-retransmitted states and that is fine; but if the soft-ACK is an NACK, we overwrite the to-be-retransmitted with a nak - which isn't. Instead, we need to let any scheduled retransmission stand if the packet was NAK'd. Note that we don't reissue a resend if the annotation is in the to-be-retransmitted state because someone else must've scheduled the resend already. Signed-off-by: David Howells --- net/rxrpc/input.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 06027b6d9c19..d3d69ab1f0a1 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -479,6 +479,8 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks, case RXRPC_ACK_TYPE_NACK: if (anno_type == RXRPC_TX_ANNO_NAK) continue; + if (anno_type == RXRPC_TX_ANNO_RETRANS) + continue; call->rxtx_annotations[ix] = RXRPC_TX_ANNO_NAK | annotation; resend = true; From 01a88f7f6bd4514de9551c3fc9a6fd9e65cbf79d Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Sep 2016 12:39:22 +0100 Subject: [PATCH 1463/1715] rxrpc: Fix call timer Fix the call timer in the following ways: (1) If call->resend_at or call->ack_at are before or equal to the current time, then ignore that timeout. (2) If call->expire_at is before or equal to the current time, then don't set the timer at all (possibly we should queue the call). (3) Don't skip modifying the timer if timer_pending() is true. This indicates that the timer is working, not that it has expired and is running/waiting to run its expiry handler. Also call rxrpc_set_timer() to start the call timer going rather than calling add_timer(). Signed-off-by: David Howells --- net/rxrpc/call_event.c | 25 ++++++++++++++----------- net/rxrpc/call_object.c | 4 ++-- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 3a7f90a2659c..8bc5c8e37ab4 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -28,24 +28,27 @@ void rxrpc_set_timer(struct rxrpc_call *call) { unsigned long t, now = jiffies; - _enter("{%ld,%ld,%ld:%ld}", - call->ack_at - now, call->resend_at - now, call->expire_at - now, - call->timer.expires - now); - read_lock_bh(&call->state_lock); if (call->state < RXRPC_CALL_COMPLETE) { - t = call->ack_at; - if (time_before(call->resend_at, t)) + t = call->expire_at; + if (time_before_eq(t, now)) + goto out; + + if (time_after(call->resend_at, now) && + time_before(call->resend_at, t)) t = call->resend_at; - if (time_before(call->expire_at, t)) - t = call->expire_at; - if (!timer_pending(&call->timer) || - time_before(t, call->timer.expires)) { - _debug("set timer %ld", t - now); + + if (time_after(call->ack_at, now) && + time_before(call->ack_at, t)) + t = call->ack_at; + + if (call->timer.expires != t || !timer_pending(&call->timer)) { mod_timer(&call->timer, t); } } + +out: read_unlock_bh(&call->state_lock); } diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index f50a6094e198..f2fadf667e19 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -199,8 +199,8 @@ static void rxrpc_start_call_timer(struct rxrpc_call *call) call->expire_at = expire_at; call->ack_at = expire_at; call->resend_at = expire_at; - call->timer.expires = expire_at; - add_timer(&call->timer); + call->timer.expires = expire_at + 1; + rxrpc_set_timer(call); } /* From 70790dbe3f6651fb66ad38da0a1e24368778bc16 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Sep 2016 12:39:22 +0100 Subject: [PATCH 1464/1715] rxrpc: Pass the last Tx packet marker in the annotation buffer When the last packet of data to be transmitted on a call is queued, tx_top is set and then the RXRPC_CALL_TX_LAST flag is set. Unfortunately, this leaves a race in the ACK processing side of things because the flag affects the interpretation of tx_top and also allows us to start receiving reply data before we've finished transmitting. To fix this, make the following changes: (1) rxrpc_queue_packet() now sets a marker in the annotation buffer instead of setting the RXRPC_CALL_TX_LAST flag. (2) rxrpc_rotate_tx_window() detects the marker and sets the flag in the same context as the routines that use it. (3) rxrpc_end_tx_phase() is simplified to just shift the call state. The Tx window must have been rotated before calling to discard the last packet. (4) rxrpc_receiving_reply() is added to handle the arrival of the first DATA packet of a reply to a client call (which is an implicit ACK of the Tx phase). (5) The last part of rxrpc_input_ack() is reordered to perform Tx rotation, then soft-ACK application and then to end the phase if we've rotated the last packet. In the event of a terminal ACK, the soft-ACK application will be skipped as nAcks should be 0. (6) rxrpc_input_ackall() now has to rotate as well as ending the phase. In addition: (7) Alter the transmit tracepoint to log the rotation of the last packet. (8) Remove the no-longer relevant queue_reqack tracepoint note. The ACK-REQUESTED packet header flag is now set as needed when we actually transmit the packet and may vary by retransmission. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 7 ++- net/rxrpc/input.c | 104 ++++++++++++++++++++++++++-------------- net/rxrpc/misc.c | 3 +- net/rxrpc/sendmsg.c | 14 +++--- 4 files changed, 82 insertions(+), 46 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 9e3ba4dc9578..a494d56eb236 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -508,7 +508,9 @@ struct rxrpc_call { #define RXRPC_TX_ANNO_NAK 2 #define RXRPC_TX_ANNO_RETRANS 3 #define RXRPC_TX_ANNO_MASK 0x03 -#define RXRPC_TX_ANNO_RESENT 0x04 +#define RXRPC_TX_ANNO_LAST 0x04 +#define RXRPC_TX_ANNO_RESENT 0x08 + #define RXRPC_RX_ANNO_JUMBO 0x3f /* Jumbo subpacket number + 1 if not zero */ #define RXRPC_RX_ANNO_JLAST 0x40 /* Set if last element of a jumbo packet */ #define RXRPC_RX_ANNO_VERIFIED 0x80 /* Set if verified and decrypted */ @@ -621,9 +623,10 @@ extern const char rxrpc_call_traces[rxrpc_call__nr_trace][4]; enum rxrpc_transmit_trace { rxrpc_transmit_wait, rxrpc_transmit_queue, - rxrpc_transmit_queue_reqack, rxrpc_transmit_queue_last, rxrpc_transmit_rotate, + rxrpc_transmit_rotate_last, + rxrpc_transmit_await_reply, rxrpc_transmit_end, rxrpc_transmit__nr_trace }; diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index d3d69ab1f0a1..fb3e2f6afa3b 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -59,6 +59,7 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to) { struct sk_buff *skb, *list = NULL; int ix; + u8 annotation; spin_lock(&call->lock); @@ -66,16 +67,22 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to) call->tx_hard_ack++; ix = call->tx_hard_ack & RXRPC_RXTX_BUFF_MASK; skb = call->rxtx_buffer[ix]; + annotation = call->rxtx_annotations[ix]; rxrpc_see_skb(skb, rxrpc_skb_tx_rotated); call->rxtx_buffer[ix] = NULL; call->rxtx_annotations[ix] = 0; skb->next = list; list = skb; + + if (annotation & RXRPC_TX_ANNO_LAST) + set_bit(RXRPC_CALL_TX_LAST, &call->flags); } spin_unlock(&call->lock); - trace_rxrpc_transmit(call, rxrpc_transmit_rotate); + trace_rxrpc_transmit(call, (test_bit(RXRPC_CALL_TX_LAST, &call->flags) ? + rxrpc_transmit_rotate_last : + rxrpc_transmit_rotate)); wake_up(&call->waitq); while (list) { @@ -92,42 +99,65 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to) * This occurs when we get an ACKALL packet, the first DATA packet of a reply, * or a final ACK packet. */ -static bool rxrpc_end_tx_phase(struct rxrpc_call *call, const char *abort_why) +static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun, + const char *abort_why) { - _enter(""); - switch (call->state) { - case RXRPC_CALL_CLIENT_RECV_REPLY: - return true; - case RXRPC_CALL_CLIENT_AWAIT_REPLY: - case RXRPC_CALL_SERVER_AWAIT_ACK: - break; - default: - rxrpc_proto_abort(abort_why, call, call->tx_top); - return false; - } - - rxrpc_rotate_tx_window(call, call->tx_top); + ASSERT(test_bit(RXRPC_CALL_TX_LAST, &call->flags)); write_lock(&call->state_lock); switch (call->state) { - default: - break; + case RXRPC_CALL_CLIENT_SEND_REQUEST: case RXRPC_CALL_CLIENT_AWAIT_REPLY: - call->tx_phase = false; - call->state = RXRPC_CALL_CLIENT_RECV_REPLY; + if (reply_begun) + call->state = RXRPC_CALL_CLIENT_RECV_REPLY; + else + call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY; break; + case RXRPC_CALL_SERVER_AWAIT_ACK: __rxrpc_call_completed(call); rxrpc_notify_socket(call); break; + + default: + goto bad_state; } write_unlock(&call->state_lock); - trace_rxrpc_transmit(call, rxrpc_transmit_end); + if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) { + trace_rxrpc_transmit(call, rxrpc_transmit_await_reply); + } else { + trace_rxrpc_transmit(call, rxrpc_transmit_end); + } _leave(" = ok"); return true; + +bad_state: + write_unlock(&call->state_lock); + kdebug("end_tx %s", rxrpc_call_states[call->state]); + rxrpc_proto_abort(abort_why, call, call->tx_top); + return false; +} + +/* + * Begin the reply reception phase of a call. + */ +static bool rxrpc_receiving_reply(struct rxrpc_call *call) +{ + rxrpc_seq_t top = READ_ONCE(call->tx_top); + + if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) + rxrpc_rotate_tx_window(call, top); + if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) { + rxrpc_proto_abort("TXL", call, top); + return false; + } + if (!rxrpc_end_tx_phase(call, true, "ETD")) + return false; + call->tx_phase = false; + return true; } /* @@ -226,8 +256,9 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb, /* Received data implicitly ACKs all of the request packets we sent * when we're acting as a client. */ - if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY && - !rxrpc_end_tx_phase(call, "ETD")) + if ((call->state == RXRPC_CALL_CLIENT_SEND_REQUEST || + call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) && + !rxrpc_receiving_reply(call)) return; call->ackr_prev_seq = seq; @@ -587,27 +618,26 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, } call->acks_latest = sp->hdr.serial; - if (test_bit(RXRPC_CALL_TX_LAST, &call->flags) && - hard_ack == call->tx_top) { - rxrpc_end_tx_phase(call, "ETA"); - return; - } - if (before(hard_ack, call->tx_hard_ack) || after(hard_ack, call->tx_top)) return rxrpc_proto_abort("AKW", call, 0); + if (nr_acks > call->tx_top - hard_ack) + return rxrpc_proto_abort("AKN", call, 0); if (after(hard_ack, call->tx_hard_ack)) rxrpc_rotate_tx_window(call, hard_ack); - if (after(first_soft_ack, call->tx_top)) - return; + if (nr_acks > 0) { + if (skb_copy_bits(skb, sp->offset, buf.acks, nr_acks) < 0) + return rxrpc_proto_abort("XSA", call, 0); + rxrpc_input_soft_acks(call, buf.acks, first_soft_ack, nr_acks); + } + + if (test_bit(RXRPC_CALL_TX_LAST, &call->flags)) { + rxrpc_end_tx_phase(call, false, "ETA"); + return; + } - if (nr_acks > call->tx_top - first_soft_ack + 1) - nr_acks = first_soft_ack - call->tx_top + 1; - if (skb_copy_bits(skb, sp->offset, buf.acks, nr_acks) < 0) - return rxrpc_proto_abort("XSA", call, 0); - rxrpc_input_soft_acks(call, buf.acks, first_soft_ack, nr_acks); } /* @@ -619,7 +649,9 @@ static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb) _proto("Rx ACKALL %%%u", sp->hdr.serial); - rxrpc_end_tx_phase(call, "ETL"); + rxrpc_rotate_tx_window(call, call->tx_top); + if (test_bit(RXRPC_CALL_TX_LAST, &call->flags)) + rxrpc_end_tx_phase(call, false, "ETL"); } /* diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 0d425e707f22..fe648711c2f7 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -155,9 +155,10 @@ const char rxrpc_client_traces[rxrpc_client__nr_trace][7] = { const char rxrpc_transmit_traces[rxrpc_transmit__nr_trace][4] = { [rxrpc_transmit_wait] = "WAI", [rxrpc_transmit_queue] = "QUE", - [rxrpc_transmit_queue_reqack] = "QRA", [rxrpc_transmit_queue_last] = "QLS", [rxrpc_transmit_rotate] = "ROT", + [rxrpc_transmit_rotate_last] = "RLS", + [rxrpc_transmit_await_reply] = "AWR", [rxrpc_transmit_end] = "END", }; diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 7cb34b2dfba9..93e6584cd751 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -94,11 +94,15 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, struct rxrpc_skb_priv *sp = rxrpc_skb(skb); rxrpc_seq_t seq = sp->hdr.seq; int ret, ix; + u8 annotation = RXRPC_TX_ANNO_UNACK; _net("queue skb %p [%d]", skb, seq); ASSERTCMP(seq, ==, call->tx_top + 1); + if (last) + annotation |= RXRPC_TX_ANNO_LAST; + /* We have to set the timestamp before queueing as the retransmit * algorithm can see the packet as soon as we queue it. */ @@ -106,18 +110,14 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, ix = seq & RXRPC_RXTX_BUFF_MASK; rxrpc_get_skb(skb, rxrpc_skb_tx_got); - call->rxtx_annotations[ix] = RXRPC_TX_ANNO_UNACK; + call->rxtx_annotations[ix] = annotation; smp_wmb(); call->rxtx_buffer[ix] = skb; call->tx_top = seq; - if (last) { - set_bit(RXRPC_CALL_TX_LAST, &call->flags); + if (last) trace_rxrpc_transmit(call, rxrpc_transmit_queue_last); - } else if (sp->hdr.flags & RXRPC_REQUEST_ACK) { - trace_rxrpc_transmit(call, rxrpc_transmit_queue_reqack); - } else { + else trace_rxrpc_transmit(call, rxrpc_transmit_queue); - } if (last || call->state == RXRPC_CALL_SERVER_ACK_REQUEST) { _debug("________awaiting reply/ACK__________"); From b86e218e0d422488e0febb07620fa97ae9713779 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Sep 2016 15:08:48 +0100 Subject: [PATCH 1465/1715] rxrpc: Don't call the tx_ack tracepoint if don't generate an ACK rxrpc_send_call_packet() is invoking the tx_ack tracepoint before it checks whether there's an ACK to transmit (another thread may jump in and transmit it). Fix this by only invoking the tracepoint if we get a valid ACK to transmit. Further, only allocate a serial number if we're going to actually transmit something. Signed-off-by: David Howells --- net/rxrpc/output.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 282cb1e36d06..5c1e008a5323 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -80,9 +80,6 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, pkt->ackinfo.rwind = htonl(call->rx_winsize); pkt->ackinfo.jumbo_max = htonl(jmax); - trace_rxrpc_tx_ack(call, hard_ack + 1, serial, call->ackr_reason, - top - hard_ack); - *ackp++ = 0; *ackp++ = 0; *ackp++ = 0; @@ -119,8 +116,6 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) return -ENOMEM; } - serial = atomic_inc_return(&conn->serial); - msg.msg_name = &call->peer->srx.transport; msg.msg_namelen = call->peer->srx.transport_len; msg.msg_control = NULL; @@ -131,7 +126,6 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) pkt->whdr.cid = htonl(call->cid); pkt->whdr.callNumber = htonl(call->call_id); pkt->whdr.seq = 0; - pkt->whdr.serial = htonl(serial); pkt->whdr.type = type; pkt->whdr.flags = conn->out_clientflag; pkt->whdr.userStatus = 0; @@ -157,14 +151,6 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) spin_unlock_bh(&call->lock); - _proto("Tx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }", - serial, - ntohs(pkt->ack.maxSkew), - ntohl(pkt->ack.firstPacket), - ntohl(pkt->ack.previousPacket), - ntohl(pkt->ack.serial), - rxrpc_acks(pkt->ack.reason), - pkt->ack.nAcks); iov[0].iov_len += sizeof(pkt->ack) + n; iov[1].iov_base = &pkt->ackinfo; @@ -176,7 +162,6 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) case RXRPC_PACKET_TYPE_ABORT: abort_code = call->abort_code; pkt->abort_code = htonl(abort_code); - _proto("Tx ABORT %%%u { %d }", serial, abort_code); iov[0].iov_len += sizeof(pkt->abort_code); len += sizeof(pkt->abort_code); ioc = 1; @@ -188,6 +173,17 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) goto out; } + serial = atomic_inc_return(&conn->serial); + pkt->whdr.serial = htonl(serial); + switch (type) { + case RXRPC_PACKET_TYPE_ACK: + trace_rxrpc_tx_ack(call, + ntohl(pkt->ack.firstPacket), + ntohl(pkt->ack.serial), + pkt->ack.reason, pkt->ack.nAcks); + break; + } + if (ping) { call->ackr_ping = serial; smp_wmb(); From fc7ab6d29a3af0b7f6df7c095509378c8caf85b5 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Sep 2016 15:22:36 +0100 Subject: [PATCH 1466/1715] rxrpc: Add a tracepoint for the call timer Add a tracepoint to log call timer initiation, setting and expiry. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 36 ++++++++++++++++++++++++++++++++++++ net/rxrpc/ar-internal.h | 13 ++++++++++++- net/rxrpc/call_event.c | 7 ++++--- net/rxrpc/call_object.c | 6 ++++-- net/rxrpc/misc.c | 8 ++++++++ net/rxrpc/sendmsg.c | 2 +- 6 files changed, 65 insertions(+), 7 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index e8f2afbbe0bf..57322897d745 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -414,6 +414,42 @@ TRACE_EVENT(rxrpc_rtt_rx, __entry->avg) ); +TRACE_EVENT(rxrpc_timer, + TP_PROTO(struct rxrpc_call *call, enum rxrpc_timer_trace why, + unsigned long now), + + TP_ARGS(call, why, now), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(enum rxrpc_timer_trace, why ) + __field(unsigned long, now ) + __field(unsigned long, expire_at ) + __field(unsigned long, ack_at ) + __field(unsigned long, resend_at ) + __field(unsigned long, timer ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->why = why; + __entry->now = now; + __entry->expire_at = call->expire_at; + __entry->ack_at = call->ack_at; + __entry->resend_at = call->resend_at; + __entry->timer = call->timer.expires; + ), + + TP_printk("c=%p %s now=%lx x=%ld a=%ld r=%ld t=%ld", + __entry->call, + rxrpc_timer_traces[__entry->why], + __entry->now, + __entry->expire_at - __entry->now, + __entry->ack_at - __entry->now, + __entry->resend_at - __entry->now, + __entry->timer - __entry->now) + ); + #endif /* _TRACE_RXRPC_H */ /* This part must be outside protection */ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index a494d56eb236..e564eca75985 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -678,6 +678,17 @@ enum rxrpc_rtt_rx_trace { extern const char rxrpc_rtt_rx_traces[rxrpc_rtt_rx__nr_trace][5]; +enum rxrpc_timer_trace { + rxrpc_timer_begin, + rxrpc_timer_expired, + rxrpc_timer_set_for_ack, + rxrpc_timer_set_for_resend, + rxrpc_timer_set_for_send, + rxrpc_timer__nr_trace +}; + +extern const char rxrpc_timer_traces[rxrpc_timer__nr_trace][8]; + extern const char *const rxrpc_pkts[]; extern const char *rxrpc_acks(u8 reason); @@ -707,7 +718,7 @@ int rxrpc_reject_call(struct rxrpc_sock *); /* * call_event.c */ -void rxrpc_set_timer(struct rxrpc_call *); +void rxrpc_set_timer(struct rxrpc_call *, enum rxrpc_timer_trace); void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool); void rxrpc_process_call(struct work_struct *); diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 8bc5c8e37ab4..90e970ba048a 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -24,7 +24,7 @@ /* * Set the timer */ -void rxrpc_set_timer(struct rxrpc_call *call) +void rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why) { unsigned long t, now = jiffies; @@ -45,6 +45,7 @@ void rxrpc_set_timer(struct rxrpc_call *call) if (call->timer.expires != t || !timer_pending(&call->timer)) { mod_timer(&call->timer, t); + trace_rxrpc_timer(call, why, now); } } @@ -120,7 +121,7 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, _debug("deferred ACK %ld < %ld", expiry, call->ack_at - now); if (time_before(ack_at, call->ack_at)) { call->ack_at = ack_at; - rxrpc_set_timer(call); + rxrpc_set_timer(call, rxrpc_timer_set_for_ack); } } } @@ -293,7 +294,7 @@ recheck_state: goto recheck_state; } - rxrpc_set_timer(call); + rxrpc_set_timer(call, rxrpc_timer_set_for_resend); /* other events may have been raised since we started checking */ if (call->events && call->state < RXRPC_CALL_COMPLETE) { diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index f2fadf667e19..a53f4c2c0025 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -76,8 +76,10 @@ static void rxrpc_call_timer_expired(unsigned long _call) _enter("%d", call->debug_id); - if (call->state < RXRPC_CALL_COMPLETE) + if (call->state < RXRPC_CALL_COMPLETE) { + trace_rxrpc_timer(call, rxrpc_timer_expired, jiffies); rxrpc_queue_call(call); + } } /* @@ -200,7 +202,7 @@ static void rxrpc_start_call_timer(struct rxrpc_call *call) call->ack_at = expire_at; call->resend_at = expire_at; call->timer.expires = expire_at + 1; - rxrpc_set_timer(call); + rxrpc_set_timer(call, rxrpc_timer_begin); } /* diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index fe648711c2f7..fa9942fabdf2 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -194,3 +194,11 @@ const char rxrpc_rtt_rx_traces[rxrpc_rtt_rx__nr_trace][5] = { [rxrpc_rtt_rx_ping_response] = "PONG", [rxrpc_rtt_rx_requested_ack] = "RACK", }; + +const char rxrpc_timer_traces[rxrpc_timer__nr_trace][8] = { + [rxrpc_timer_begin] = "Begin ", + [rxrpc_timer_expired] = "*EXPR*", + [rxrpc_timer_set_for_ack] = "SetAck", + [rxrpc_timer_set_for_send] = "SetTx ", + [rxrpc_timer_set_for_resend] = "SetRTx", +}; diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 93e6584cd751..99939372b5a4 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -153,7 +153,7 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, if (time_before(resend_at, call->resend_at)) { call->resend_at = resend_at; - rxrpc_set_timer(call); + rxrpc_set_timer(call, rxrpc_timer_set_for_send); } } From be832aecc5ba811728e15a10f675f4a2187f25dd Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Sep 2016 12:39:22 +0100 Subject: [PATCH 1467/1715] rxrpc: Add data Tx tracepoint and adjust Tx ACK tracepoint Add a tracepoint to log transmission of DATA packets (including loss injection). Adjust the ACK transmission tracepoint to include the packet serial number and to line this up with the DATA transmission display. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 52 +++++++++++++++++++++++++++++------- net/rxrpc/conn_event.c | 5 ++-- net/rxrpc/output.c | 5 +++- 3 files changed, 49 insertions(+), 13 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 57322897d745..6001bf93dc79 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -256,33 +256,67 @@ TRACE_EVENT(rxrpc_rx_ack, __entry->n_acks) ); -TRACE_EVENT(rxrpc_tx_ack, - TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t first, - rxrpc_serial_t serial, u8 reason, u8 n_acks), +TRACE_EVENT(rxrpc_tx_data, + TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq, + rxrpc_serial_t serial, u8 flags, bool lose), - TP_ARGS(call, first, serial, reason, n_acks), + TP_ARGS(call, seq, serial, flags, lose), TP_STRUCT__entry( __field(struct rxrpc_call *, call ) - __field(rxrpc_seq_t, first ) + __field(rxrpc_seq_t, seq ) __field(rxrpc_serial_t, serial ) + __field(u8, flags ) + __field(bool, lose ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->seq = seq; + __entry->serial = serial; + __entry->flags = flags; + __entry->lose = lose; + ), + + TP_printk("c=%p DATA %08x q=%08x fl=%02x%s", + __entry->call, + __entry->serial, + __entry->seq, + __entry->flags, + __entry->lose ? " *LOSE*" : "") + ); + +TRACE_EVENT(rxrpc_tx_ack, + TP_PROTO(struct rxrpc_call *call, rxrpc_serial_t serial, + rxrpc_seq_t ack_first, rxrpc_serial_t ack_serial, + u8 reason, u8 n_acks), + + TP_ARGS(call, serial, ack_first, ack_serial, reason, n_acks), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(rxrpc_serial_t, serial ) + __field(rxrpc_seq_t, ack_first ) + __field(rxrpc_serial_t, ack_serial ) __field(u8, reason ) __field(u8, n_acks ) ), TP_fast_assign( __entry->call = call; - __entry->first = first; __entry->serial = serial; + __entry->ack_first = ack_first; + __entry->ack_serial = ack_serial; __entry->reason = reason; __entry->n_acks = n_acks; ), - TP_printk("c=%p %s f=%08x r=%08x n=%u", + TP_printk(" c=%p ACK %08x %s f=%08x r=%08x n=%u", __entry->call, - rxrpc_acks(__entry->reason), - __entry->first, __entry->serial, + rxrpc_acks(__entry->reason), + __entry->ack_first, + __entry->ack_serial, __entry->n_acks) ); diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 75a15a4c74c3..a1cf1ec5f29e 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -98,9 +98,6 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, pkt.info.rwind = htonl(rxrpc_rx_window_size); pkt.info.jumbo_max = htonl(rxrpc_rx_jumbo_max); len += sizeof(pkt.ack) + sizeof(pkt.info); - - trace_rxrpc_tx_ack(NULL, chan->last_seq, 0, - RXRPC_ACK_DUPLICATE, 0); break; } @@ -122,6 +119,8 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, _proto("Tx ABORT %%%u { %d } [re]", serial, conn->local_abort); break; case RXRPC_PACKET_TYPE_ACK: + trace_rxrpc_tx_ack(NULL, serial, chan->last_seq, 0, + RXRPC_ACK_DUPLICATE, 0); _proto("Tx ACK %%%u [re]", serial); break; } diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 5c1e008a5323..e47fbd1c836d 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -177,7 +177,7 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) pkt->whdr.serial = htonl(serial); switch (type) { case RXRPC_PACKET_TYPE_ACK: - trace_rxrpc_tx_ack(call, + trace_rxrpc_tx_ack(call, serial, ntohl(pkt->ack.firstPacket), ntohl(pkt->ack.serial), pkt->ack.reason, pkt->ack.nAcks); @@ -275,6 +275,8 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb) if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { static int lose; if ((lose++ & 7) == 7) { + trace_rxrpc_tx_data(call, sp->hdr.seq, serial, + whdr.flags, true); rxrpc_lose_skb(skb, rxrpc_skb_tx_lost); _leave(" = 0 [lose]"); return 0; @@ -302,6 +304,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb) goto send_fragmentable; done: + trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags, false); if (ret >= 0) { ktime_t now = ktime_get_real(); skb->tstamp = now; From 89b475abdb107a74f57497b65becaf837a0e5b6b Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Sep 2016 12:39:22 +0100 Subject: [PATCH 1468/1715] rxrpc: Add a tracepoint to log injected Rx packet loss Add a tracepoint to log received packets that get discarded due to Rx packet loss. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 21 +++++++++++++++++++++ net/rxrpc/input.c | 11 +++++------ 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 6001bf93dc79..9413b17ba04b 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -484,6 +484,27 @@ TRACE_EVENT(rxrpc_timer, __entry->timer - __entry->now) ); +TRACE_EVENT(rxrpc_rx_lose, + TP_PROTO(struct rxrpc_skb_priv *sp), + + TP_ARGS(sp), + + TP_STRUCT__entry( + __field_struct(struct rxrpc_host_header, hdr ) + ), + + TP_fast_assign( + memcpy(&__entry->hdr, &sp->hdr, sizeof(__entry->hdr)); + ), + + TP_printk("%08x:%08x:%08x:%04x %08x %08x %02x %02x %s *LOSE*", + __entry->hdr.epoch, __entry->hdr.cid, + __entry->hdr.callNumber, __entry->hdr.serviceId, + __entry->hdr.serial, __entry->hdr.seq, + __entry->hdr.type, __entry->hdr.flags, + __entry->hdr.type <= 15 ? rxrpc_pkts[__entry->hdr.type] : "?UNK") + ); + #endif /* _TRACE_RXRPC_H */ /* This part must be outside protection */ diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index fb3e2f6afa3b..19b1e189f5dc 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -837,20 +837,19 @@ void rxrpc_data_ready(struct sock *udp_sk) skb_orphan(skb); sp = rxrpc_skb(skb); + /* dig out the RxRPC connection details */ + if (rxrpc_extract_header(sp, skb) < 0) + goto bad_message; + if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { static int lose; if ((lose++ & 7) == 7) { + trace_rxrpc_rx_lose(sp); rxrpc_lose_skb(skb, rxrpc_skb_rx_lost); return; } } - _net("Rx UDP packet from %08x:%04hu", - ntohl(ip_hdr(skb)->saddr), ntohs(udp_hdr(skb)->source)); - - /* dig out the RxRPC connection details */ - if (rxrpc_extract_header(sp, skb) < 0) - goto bad_message; trace_rxrpc_rx_packet(sp); _net("Rx RxRPC %s ep=%x call=%x:%x", From 9c7ad434441da6b5d4ac878cac368fbdaec99b56 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Sep 2016 13:50:40 +0100 Subject: [PATCH 1469/1715] rxrpc: Add tracepoint for ACK proposal Add a tracepoint to log proposed ACKs, including whether the proposal is used to update a pending ACK or is discarded in favour of an easlier, higher priority ACK. Whilst we're at it, get rid of the rxrpc_acks() function and access the name array directly. We do, however, need to validate the ACK reason number given to trace_rxrpc_rx_ack() to make sure we don't overrun the array. Signed-off-by: David Howells --- include/rxrpc/packet.h | 1 + include/trace/events/rxrpc.h | 42 ++++++++++++++++++++++++++++++++++-- net/rxrpc/ar-internal.h | 25 +++++++++++++++++++-- net/rxrpc/call_event.c | 21 ++++++++++++------ net/rxrpc/input.c | 19 ++++++++++------ net/rxrpc/misc.c | 30 ++++++++++++++++---------- net/rxrpc/output.c | 3 ++- net/rxrpc/recvmsg.c | 3 ++- 8 files changed, 114 insertions(+), 30 deletions(-) diff --git a/include/rxrpc/packet.h b/include/rxrpc/packet.h index fd6eb3a60a8c..703a64b4681a 100644 --- a/include/rxrpc/packet.h +++ b/include/rxrpc/packet.h @@ -123,6 +123,7 @@ struct rxrpc_ackpacket { #define RXRPC_ACK_PING_RESPONSE 7 /* response to RXRPC_ACK_PING */ #define RXRPC_ACK_DELAY 8 /* nothing happened since received packet */ #define RXRPC_ACK_IDLE 9 /* ACK due to fully received ACK window */ +#define RXRPC_ACK__INVALID 10 /* Representation of invalid ACK reason */ uint8_t nAcks; /* number of ACKs */ #define RXRPC_MAXACKS 255 diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 9413b17ba04b..d67a8c6b085a 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -251,7 +251,7 @@ TRACE_EVENT(rxrpc_rx_ack, TP_printk("c=%p %s f=%08x n=%u", __entry->call, - rxrpc_acks(__entry->reason), + rxrpc_ack_names[__entry->reason], __entry->first, __entry->n_acks) ); @@ -314,7 +314,7 @@ TRACE_EVENT(rxrpc_tx_ack, TP_printk(" c=%p ACK %08x %s f=%08x r=%08x n=%u", __entry->call, __entry->serial, - rxrpc_acks(__entry->reason), + rxrpc_ack_names[__entry->reason], __entry->ack_first, __entry->ack_serial, __entry->n_acks) @@ -505,6 +505,44 @@ TRACE_EVENT(rxrpc_rx_lose, __entry->hdr.type <= 15 ? rxrpc_pkts[__entry->hdr.type] : "?UNK") ); +TRACE_EVENT(rxrpc_propose_ack, + TP_PROTO(struct rxrpc_call *call, enum rxrpc_propose_ack_trace why, + u8 ack_reason, rxrpc_serial_t serial, bool immediate, + bool background, enum rxrpc_propose_ack_outcome outcome), + + TP_ARGS(call, why, ack_reason, serial, immediate, background, + outcome), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(enum rxrpc_propose_ack_trace, why ) + __field(rxrpc_serial_t, serial ) + __field(u8, ack_reason ) + __field(bool, immediate ) + __field(bool, background ) + __field(enum rxrpc_propose_ack_outcome, outcome ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->why = why; + __entry->serial = serial; + __entry->ack_reason = ack_reason; + __entry->immediate = immediate; + __entry->background = background; + __entry->outcome = outcome; + ), + + TP_printk("c=%p %s %s r=%08x i=%u b=%u%s", + __entry->call, + rxrpc_propose_ack_traces[__entry->why], + rxrpc_ack_names[__entry->ack_reason], + __entry->serial, + __entry->immediate, + __entry->background, + rxrpc_propose_ack_outcomes[__entry->outcome]) + ); + #endif /* _TRACE_RXRPC_H */ /* This part must be outside protection */ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index e564eca75985..042dbcc52654 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -689,8 +689,28 @@ enum rxrpc_timer_trace { extern const char rxrpc_timer_traces[rxrpc_timer__nr_trace][8]; +enum rxrpc_propose_ack_trace { + rxrpc_propose_ack_input_data, + rxrpc_propose_ack_ping_for_params, + rxrpc_propose_ack_respond_to_ack, + rxrpc_propose_ack_respond_to_ping, + rxrpc_propose_ack_retry_tx, + rxrpc_propose_ack_terminal_ack, + rxrpc_propose_ack__nr_trace +}; + +enum rxrpc_propose_ack_outcome { + rxrpc_propose_ack_use, + rxrpc_propose_ack_update, + rxrpc_propose_ack_subsume, + rxrpc_propose_ack__nr_outcomes +}; + +extern const char rxrpc_propose_ack_traces[rxrpc_propose_ack__nr_trace][8]; +extern const char *const rxrpc_propose_ack_outcomes[rxrpc_propose_ack__nr_outcomes]; + extern const char *const rxrpc_pkts[]; -extern const char *rxrpc_acks(u8 reason); +extern const char const rxrpc_ack_names[RXRPC_ACK__INVALID + 1][4]; #include @@ -719,7 +739,8 @@ int rxrpc_reject_call(struct rxrpc_sock *); * call_event.c */ void rxrpc_set_timer(struct rxrpc_call *, enum rxrpc_timer_trace); -void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool); +void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool, + enum rxrpc_propose_ack_trace); void rxrpc_process_call(struct work_struct *); /* diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 90e970ba048a..fd5b11339ffb 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -58,14 +58,13 @@ out: */ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, u16 skew, u32 serial, bool immediate, - bool background) + bool background, + enum rxrpc_propose_ack_trace why) { + enum rxrpc_propose_ack_outcome outcome = rxrpc_propose_ack_use; unsigned long now, ack_at, expiry = rxrpc_soft_ack_delay; s8 prior = rxrpc_ack_priority[ack_reason]; - _enter("{%d},%s,%%%x,%u", - call->debug_id, rxrpc_acks(ack_reason), serial, immediate); - /* Update DELAY, IDLE, REQUESTED and PING_RESPONSE ACK serial * numbers, but we don't alter the timeout. */ @@ -74,15 +73,18 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, call->ackr_reason, rxrpc_ack_priority[call->ackr_reason]); if (ack_reason == call->ackr_reason) { if (RXRPC_ACK_UPDATEABLE & (1 << ack_reason)) { + outcome = rxrpc_propose_ack_update; call->ackr_serial = serial; call->ackr_skew = skew; } if (!immediate) - return; + goto trace; } else if (prior > rxrpc_ack_priority[call->ackr_reason]) { call->ackr_reason = ack_reason; call->ackr_serial = serial; call->ackr_skew = skew; + } else { + outcome = rxrpc_propose_ack_subsume; } switch (ack_reason) { @@ -124,17 +126,22 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, rxrpc_set_timer(call, rxrpc_timer_set_for_ack); } } + +trace: + trace_rxrpc_propose_ack(call, why, ack_reason, serial, immediate, + background, outcome); } /* * propose an ACK be sent, locking the call structure */ void rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, - u16 skew, u32 serial, bool immediate, bool background) + u16 skew, u32 serial, bool immediate, bool background, + enum rxrpc_propose_ack_trace why) { spin_lock_bh(&call->lock); __rxrpc_propose_ACK(call, ack_reason, skew, serial, - immediate, background); + immediate, background, why); spin_unlock_bh(&call->lock); } diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 19b1e189f5dc..349698d87ad1 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -49,7 +49,8 @@ static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb, if (call->peer->rtt_usage < 3 || ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), now)) rxrpc_propose_ACK(call, RXRPC_ACK_PING, skew, sp->hdr.serial, - true, true); + true, true, + rxrpc_propose_ack_ping_for_params); } /* @@ -382,7 +383,8 @@ skip: ack: if (ack) rxrpc_propose_ACK(call, ack, skew, ack_serial, - immediate_ack, true); + immediate_ack, true, + rxrpc_propose_ack_input_data); if (sp->hdr.seq == READ_ONCE(call->rx_hard_ack) + 1) rxrpc_notify_socket(call); @@ -539,6 +541,7 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks, static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, u16 skew) { + u8 ack_reason; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); union { struct rxrpc_ackpacket ack; @@ -561,8 +564,10 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, first_soft_ack = ntohl(buf.ack.firstPacket); hard_ack = first_soft_ack - 1; nr_acks = buf.ack.nAcks; + ack_reason = (buf.ack.reason < RXRPC_ACK__INVALID ? + buf.ack.reason : RXRPC_ACK__INVALID); - trace_rxrpc_rx_ack(call, first_soft_ack, buf.ack.reason, nr_acks); + trace_rxrpc_rx_ack(call, first_soft_ack, ack_reason, nr_acks); _proto("Rx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }", sp->hdr.serial, @@ -570,7 +575,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, first_soft_ack, ntohl(buf.ack.previousPacket), acked_serial, - rxrpc_acks(buf.ack.reason), + rxrpc_ack_names[ack_reason], buf.ack.nAcks); if (buf.ack.reason == RXRPC_ACK_PING_RESPONSE) @@ -583,10 +588,12 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, if (buf.ack.reason == RXRPC_ACK_PING) { _proto("Rx ACK %%%u PING Request", sp->hdr.serial); rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE, - skew, sp->hdr.serial, true, true); + skew, sp->hdr.serial, true, true, + rxrpc_propose_ack_respond_to_ping); } else if (sp->hdr.flags & RXRPC_REQUEST_ACK) { rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED, - skew, sp->hdr.serial, true, true); + skew, sp->hdr.serial, true, true, + rxrpc_propose_ack_respond_to_ack); } offset = sp->offset + nr_acks + 3; diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index fa9942fabdf2..1ca14835d87f 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -91,17 +91,10 @@ const s8 rxrpc_ack_priority[] = { [RXRPC_ACK_PING] = 9, }; -const char *rxrpc_acks(u8 reason) -{ - static const char *const str[] = { - "---", "REQ", "DUP", "OOS", "WIN", "MEM", "PNG", "PNR", "DLY", - "IDL", "-?-" - }; - - if (reason >= ARRAY_SIZE(str)) - reason = ARRAY_SIZE(str) - 1; - return str[reason]; -} +const char const rxrpc_ack_names[RXRPC_ACK__INVALID + 1][4] = { + "---", "REQ", "DUP", "OOS", "WIN", "MEM", "PNG", "PNR", "DLY", + "IDL", "-?-" +}; const char rxrpc_skb_traces[rxrpc_skb__nr_trace][7] = { [rxrpc_skb_rx_cleaned] = "Rx CLN", @@ -202,3 +195,18 @@ const char rxrpc_timer_traces[rxrpc_timer__nr_trace][8] = { [rxrpc_timer_set_for_send] = "SetTx ", [rxrpc_timer_set_for_resend] = "SetRTx", }; + +const char rxrpc_propose_ack_traces[rxrpc_propose_ack__nr_trace][8] = { + [rxrpc_propose_ack_input_data] = "DataIn ", + [rxrpc_propose_ack_ping_for_params] = "Params ", + [rxrpc_propose_ack_respond_to_ack] = "Rsp2Ack", + [rxrpc_propose_ack_respond_to_ping] = "Rsp2Png", + [rxrpc_propose_ack_retry_tx] = "RetryTx", + [rxrpc_propose_ack_terminal_ack] = "ClTerm ", +}; + +const char *const rxrpc_propose_ack_outcomes[rxrpc_propose_ack__nr_outcomes] = { + [rxrpc_propose_ack_use] = "", + [rxrpc_propose_ack_update] = " Update", + [rxrpc_propose_ack_subsume] = " Subsume", +}; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index e47fbd1c836d..0c563e325c9d 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -210,7 +210,8 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) rxrpc_propose_ACK(call, pkt->ack.reason, ntohs(pkt->ack.maxSkew), ntohl(pkt->ack.serial), - true, true); + true, true, + rxrpc_propose_ack_retry_tx); break; case RXRPC_PACKET_TYPE_ABORT: break; diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 99e4c0ae30f1..8c7f3de45bac 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -141,7 +141,8 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call) ASSERTCMP(call->rx_hard_ack, ==, call->rx_top); if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) { - rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, true, false); + rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, true, false, + rxrpc_propose_ack_terminal_ack); rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); } From c6672e3fe4a641bf302d6309ab4d5ee55648e758 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Sep 2016 13:58:55 +0100 Subject: [PATCH 1470/1715] rxrpc: Add a tracepoint to log which packets will be retransmitted Add a tracepoint to log in rxrpc_resend() which packets will be retransmitted. Note that if a positive ACK comes in whilst we have dropped the lock to retransmit another packet, the actual retransmission may not happen, though some of the effects will (such as altering the congestion management). Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 27 +++++++++++++++++++++++++++ net/rxrpc/call_event.c | 2 ++ 2 files changed, 29 insertions(+) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index d67a8c6b085a..56475497043d 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -543,6 +543,33 @@ TRACE_EVENT(rxrpc_propose_ack, rxrpc_propose_ack_outcomes[__entry->outcome]) ); +TRACE_EVENT(rxrpc_retransmit, + TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq, u8 annotation, + s64 expiry), + + TP_ARGS(call, seq, annotation, expiry), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(rxrpc_seq_t, seq ) + __field(u8, annotation ) + __field(s64, expiry ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->seq = seq; + __entry->annotation = annotation; + __entry->expiry = expiry; + ), + + TP_printk("c=%p q=%x a=%02x xp=%lld", + __entry->call, + __entry->seq, + __entry->annotation, + __entry->expiry) + ); + #endif /* _TRACE_RXRPC_H */ /* This part must be outside protection */ diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index fd5b11339ffb..a78a92fe5d77 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -196,6 +196,8 @@ static void rxrpc_resend(struct rxrpc_call *call) /* Okay, we need to retransmit a packet. */ call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS | annotation; + trace_rxrpc_retransmit(call, seq, annotation | anno_type, + ktime_to_ns(ktime_sub(skb->tstamp, max_age))); } resend_at = ktime_sub(ktime_add_ms(oldest, rxrpc_resend_timeout), now); From 8083ad1cf910dc22d4300213583d9d540853898a Mon Sep 17 00:00:00 2001 From: Dean Jenkins Date: Fri, 23 Sep 2016 18:56:26 +0100 Subject: [PATCH 1471/1715] Bluetooth: Tidy-up coding style in hci_bcsp.c drivers/bluetooth/hci_bcsp.c contains some style issues as highlighted by ./scripts/checkpatch.pl --strict -f drivers/bluetooth/hci_bcsp.c a) comments - maintainer prefers network style comments b) positioning of lines in multi-line statements c) spaces after casts d) missing blank lines after declarations Therefore, tidy-up the above to make it easier to apply future code changes that have conforming style. Signed-off-by: Dean Jenkins Signed-off-by: Marcel Holtmann --- drivers/bluetooth/hci_bcsp.c | 55 +++++++++++++++++++----------------- 1 file changed, 29 insertions(+), 26 deletions(-) diff --git a/drivers/bluetooth/hci_bcsp.c b/drivers/bluetooth/hci_bcsp.c index d7d23ceba4d1..2628b3683b81 100644 --- a/drivers/bluetooth/hci_bcsp.c +++ b/drivers/bluetooth/hci_bcsp.c @@ -90,7 +90,8 @@ struct bcsp_struct { /* ---- BCSP CRC calculation ---- */ /* Table for calculating CRC for polynomial 0x1021, LSB processed first, -initial value 0xffff, bits shifted in reverse order. */ + * initial value 0xffff, bits shifted in reverse order. + */ static const u16 crc_table[] = { 0x0000, 0x1081, 0x2102, 0x3183, @@ -174,7 +175,7 @@ static int bcsp_enqueue(struct hci_uart *hu, struct sk_buff *skb) } static struct sk_buff *bcsp_prepare_pkt(struct bcsp_struct *bcsp, u8 *data, - int len, int pkt_type) + int len, int pkt_type) { struct sk_buff *nskb; u8 hdr[4], chan; @@ -213,6 +214,7 @@ static struct sk_buff *bcsp_prepare_pkt(struct bcsp_struct *bcsp, u8 *data, /* Vendor specific commands */ if (hci_opcode_ogf(__le16_to_cpu(opcode)) == 0x3f) { u8 desc = *(data + HCI_COMMAND_HDR_SIZE); + if ((desc & 0xf0) == 0xc0) { data += HCI_COMMAND_HDR_SIZE + 1; len -= HCI_COMMAND_HDR_SIZE + 1; @@ -271,8 +273,8 @@ static struct sk_buff *bcsp_prepare_pkt(struct bcsp_struct *bcsp, u8 *data, /* Put CRC */ if (bcsp->use_crc) { bcsp_txmsg_crc = bitrev16(bcsp_txmsg_crc); - bcsp_slip_one_byte(nskb, (u8) ((bcsp_txmsg_crc >> 8) & 0x00ff)); - bcsp_slip_one_byte(nskb, (u8) (bcsp_txmsg_crc & 0x00ff)); + bcsp_slip_one_byte(nskb, (u8)((bcsp_txmsg_crc >> 8) & 0x00ff)); + bcsp_slip_one_byte(nskb, (u8)(bcsp_txmsg_crc & 0x00ff)); } bcsp_slip_msgdelim(nskb); @@ -287,7 +289,8 @@ static struct sk_buff *bcsp_dequeue(struct hci_uart *hu) struct sk_buff *skb; /* First of all, check for unreliable messages in the queue, - since they have priority */ + * since they have priority + */ skb = skb_dequeue(&bcsp->unrel); if (skb != NULL) { @@ -414,7 +417,7 @@ static void bcsp_handle_le_pkt(struct hci_uart *hu) /* spot "conf" pkts and reply with a "conf rsp" pkt */ if (bcsp->rx_skb->data[1] >> 4 == 4 && bcsp->rx_skb->data[2] == 0 && - !memcmp(&bcsp->rx_skb->data[4], conf_pkt, 4)) { + !memcmp(&bcsp->rx_skb->data[4], conf_pkt, 4)) { struct sk_buff *nskb = alloc_skb(4, GFP_ATOMIC); BT_DBG("Found a LE conf pkt"); @@ -428,7 +431,7 @@ static void bcsp_handle_le_pkt(struct hci_uart *hu) } /* Spot "sync" pkts. If we find one...disaster! */ else if (bcsp->rx_skb->data[1] >> 4 == 4 && bcsp->rx_skb->data[2] == 0 && - !memcmp(&bcsp->rx_skb->data[4], sync_pkt, 4)) { + !memcmp(&bcsp->rx_skb->data[4], sync_pkt, 4)) { BT_ERR("Found a LE sync pkt, card has reset"); } } @@ -446,7 +449,7 @@ static inline void bcsp_unslip_one_byte(struct bcsp_struct *bcsp, unsigned char default: memcpy(skb_put(bcsp->rx_skb, 1), &byte, 1); if ((bcsp->rx_skb->data[0] & 0x40) != 0 && - bcsp->rx_state != BCSP_W4_CRC) + bcsp->rx_state != BCSP_W4_CRC) bcsp_crc_update(&bcsp->message_crc, byte); bcsp->rx_count--; } @@ -457,7 +460,7 @@ static inline void bcsp_unslip_one_byte(struct bcsp_struct *bcsp, unsigned char case 0xdc: memcpy(skb_put(bcsp->rx_skb, 1), &c0, 1); if ((bcsp->rx_skb->data[0] & 0x40) != 0 && - bcsp->rx_state != BCSP_W4_CRC) + bcsp->rx_state != BCSP_W4_CRC) bcsp_crc_update(&bcsp->message_crc, 0xc0); bcsp->rx_esc_state = BCSP_ESCSTATE_NOESC; bcsp->rx_count--; @@ -466,7 +469,7 @@ static inline void bcsp_unslip_one_byte(struct bcsp_struct *bcsp, unsigned char case 0xdd: memcpy(skb_put(bcsp->rx_skb, 1), &db, 1); if ((bcsp->rx_skb->data[0] & 0x40) != 0 && - bcsp->rx_state != BCSP_W4_CRC) + bcsp->rx_state != BCSP_W4_CRC) bcsp_crc_update(&bcsp->message_crc, 0xdb); bcsp->rx_esc_state = BCSP_ESCSTATE_NOESC; bcsp->rx_count--; @@ -502,18 +505,18 @@ static void bcsp_complete_rx_pkt(struct hci_uart *hu) bcsp_pkt_cull(bcsp); if ((bcsp->rx_skb->data[1] & 0x0f) == 6 && - bcsp->rx_skb->data[0] & 0x80) { + bcsp->rx_skb->data[0] & 0x80) { hci_skb_pkt_type(bcsp->rx_skb) = HCI_ACLDATA_PKT; pass_up = 1; } else if ((bcsp->rx_skb->data[1] & 0x0f) == 5 && - bcsp->rx_skb->data[0] & 0x80) { + bcsp->rx_skb->data[0] & 0x80) { hci_skb_pkt_type(bcsp->rx_skb) = HCI_EVENT_PKT; pass_up = 1; } else if ((bcsp->rx_skb->data[1] & 0x0f) == 7) { hci_skb_pkt_type(bcsp->rx_skb) = HCI_SCODATA_PKT; pass_up = 1; } else if ((bcsp->rx_skb->data[1] & 0x0f) == 1 && - !(bcsp->rx_skb->data[0] & 0x80)) { + !(bcsp->rx_skb->data[0] & 0x80)) { bcsp_handle_le_pkt(hu); pass_up = 0; } else @@ -537,9 +540,9 @@ static void bcsp_complete_rx_pkt(struct hci_uart *hu) hci_recv_frame(hu->hdev, bcsp->rx_skb); } else { BT_ERR("Packet for unknown channel (%u %s)", - bcsp->rx_skb->data[1] & 0x0f, - bcsp->rx_skb->data[0] & 0x80 ? - "reliable" : "unreliable"); + bcsp->rx_skb->data[1] & 0x0f, + bcsp->rx_skb->data[0] & 0x80 ? + "reliable" : "unreliable"); kfree_skb(bcsp->rx_skb); } } else @@ -567,7 +570,7 @@ static int bcsp_recv(struct hci_uart *hu, const void *data, int count) const unsigned char *ptr; BT_DBG("hu %p count %d rx_state %d rx_count %ld", - hu, count, bcsp->rx_state, bcsp->rx_count); + hu, count, bcsp->rx_state, bcsp->rx_count); ptr = data; while (count) { @@ -586,18 +589,18 @@ static int bcsp_recv(struct hci_uart *hu, const void *data, int count) switch (bcsp->rx_state) { case BCSP_W4_BCSP_HDR: - if ((0xff & (u8) ~ (bcsp->rx_skb->data[0] + bcsp->rx_skb->data[1] + - bcsp->rx_skb->data[2])) != bcsp->rx_skb->data[3]) { + if ((0xff & (u8)~(bcsp->rx_skb->data[0] + bcsp->rx_skb->data[1] + + bcsp->rx_skb->data[2])) != bcsp->rx_skb->data[3]) { BT_ERR("Error in BCSP hdr checksum"); kfree_skb(bcsp->rx_skb); bcsp->rx_state = BCSP_W4_PKT_DELIMITER; bcsp->rx_count = 0; continue; } - if (bcsp->rx_skb->data[0] & 0x80 /* reliable pkt */ - && (bcsp->rx_skb->data[0] & 0x07) != bcsp->rxseq_txack) { + if (bcsp->rx_skb->data[0] & 0x80 && /* reliable pkt */ + (bcsp->rx_skb->data[0] & 0x07) != bcsp->rxseq_txack) { BT_ERR("Out-of-order packet arrived, got %u expected %u", - bcsp->rx_skb->data[0] & 0x07, bcsp->rxseq_txack); + bcsp->rx_skb->data[0] & 0x07, bcsp->rxseq_txack); kfree_skb(bcsp->rx_skb); bcsp->rx_state = BCSP_W4_PKT_DELIMITER; @@ -620,8 +623,8 @@ static int bcsp_recv(struct hci_uart *hu, const void *data, int count) case BCSP_W4_CRC: if (bitrev16(bcsp->message_crc) != bscp_get_crc(bcsp)) { BT_ERR("Checksum failed: computed %04x received %04x", - bitrev16(bcsp->message_crc), - bscp_get_crc(bcsp)); + bitrev16(bcsp->message_crc), + bscp_get_crc(bcsp)); kfree_skb(bcsp->rx_skb); bcsp->rx_state = BCSP_W4_PKT_DELIMITER; @@ -679,7 +682,7 @@ static int bcsp_recv(struct hci_uart *hu, const void *data, int count) /* Arrange to retransmit all messages in the relq. */ static void bcsp_timed_event(unsigned long arg) { - struct hci_uart *hu = (struct hci_uart *) arg; + struct hci_uart *hu = (struct hci_uart *)arg; struct bcsp_struct *bcsp = hu->priv; struct sk_buff *skb; unsigned long flags; @@ -715,7 +718,7 @@ static int bcsp_open(struct hci_uart *hu) init_timer(&bcsp->tbcsp); bcsp->tbcsp.function = bcsp_timed_event; - bcsp->tbcsp.data = (u_long) hu; + bcsp->tbcsp.data = (u_long)hu; bcsp->rx_state = BCSP_W4_PKT_DELIMITER; From 37332ddc1470b820bd9d1eb36a2ca6ce1efb209b Mon Sep 17 00:00:00 2001 From: Dean Jenkins Date: Fri, 23 Sep 2016 18:56:27 +0100 Subject: [PATCH 1472/1715] Bluetooth: BCSP fails to ACK re-transmitted frames from the peer Send an ACK frame with the current txack value in response to every received reliable frame unless a TX reliable frame is being sent. This modification allows re-transmitted frames from the remote peer to be acknowledged rather than ignored. It means that the remote peer knows which frame number to start re-transmitting from. Without this modification, the recovery time to a missing frame from the remote peer was unnecessarily being extended because the headers of the out of order reliable frames were being discarded rather than being processed. The frame headers of received frames will indicate whether the local peer's transmissions have been acknowledged by the remote peer. Therefore, the local peer may unnecessarily re-transmit despite the remote peer already indicating that the frame had been acknowledged in out of order reliable frame. Signed-off-by: Dean Jenkins Signed-off-by: Jiada Wang Signed-off-by: Rajeev Kumar Signed-off-by: Marcel Holtmann --- drivers/bluetooth/hci_bcsp.c | 83 ++++++++++++++++++++++-------------- 1 file changed, 50 insertions(+), 33 deletions(-) diff --git a/drivers/bluetooth/hci_bcsp.c b/drivers/bluetooth/hci_bcsp.c index 2628b3683b81..a2c921faaa12 100644 --- a/drivers/bluetooth/hci_bcsp.c +++ b/drivers/bluetooth/hci_bcsp.c @@ -488,13 +488,28 @@ static inline void bcsp_unslip_one_byte(struct bcsp_struct *bcsp, unsigned char static void bcsp_complete_rx_pkt(struct hci_uart *hu) { struct bcsp_struct *bcsp = hu->priv; - int pass_up; + int pass_up = 0; if (bcsp->rx_skb->data[0] & 0x80) { /* reliable pkt */ BT_DBG("Received seqno %u from card", bcsp->rxseq_txack); - bcsp->rxseq_txack++; - bcsp->rxseq_txack %= 0x8; - bcsp->txack_req = 1; + + /* check the rx sequence number is as expected */ + if ((bcsp->rx_skb->data[0] & 0x07) == bcsp->rxseq_txack) { + bcsp->rxseq_txack++; + bcsp->rxseq_txack %= 0x8; + } else { + /* handle re-transmitted packet or + * when packet was missed + */ + BT_ERR("Out-of-order packet arrived, got %u expected %u", + bcsp->rx_skb->data[0] & 0x07, bcsp->rxseq_txack); + + /* do not process out-of-order packet payload */ + pass_up = 2; + } + + /* send current txack value to all received reliable packets */ + bcsp->txack_req = 1; /* If needed, transmit an ack pkt */ hci_uart_tx_wakeup(hu); @@ -503,26 +518,33 @@ static void bcsp_complete_rx_pkt(struct hci_uart *hu) bcsp->rxack = (bcsp->rx_skb->data[0] >> 3) & 0x07; BT_DBG("Request for pkt %u from card", bcsp->rxack); + /* handle received ACK indications, + * including those from out-of-order packets + */ bcsp_pkt_cull(bcsp); - if ((bcsp->rx_skb->data[1] & 0x0f) == 6 && - bcsp->rx_skb->data[0] & 0x80) { - hci_skb_pkt_type(bcsp->rx_skb) = HCI_ACLDATA_PKT; - pass_up = 1; - } else if ((bcsp->rx_skb->data[1] & 0x0f) == 5 && - bcsp->rx_skb->data[0] & 0x80) { - hci_skb_pkt_type(bcsp->rx_skb) = HCI_EVENT_PKT; - pass_up = 1; - } else if ((bcsp->rx_skb->data[1] & 0x0f) == 7) { - hci_skb_pkt_type(bcsp->rx_skb) = HCI_SCODATA_PKT; - pass_up = 1; - } else if ((bcsp->rx_skb->data[1] & 0x0f) == 1 && - !(bcsp->rx_skb->data[0] & 0x80)) { - bcsp_handle_le_pkt(hu); - pass_up = 0; - } else - pass_up = 0; - if (!pass_up) { + if (pass_up != 2) { + if ((bcsp->rx_skb->data[1] & 0x0f) == 6 && + (bcsp->rx_skb->data[0] & 0x80)) { + hci_skb_pkt_type(bcsp->rx_skb) = HCI_ACLDATA_PKT; + pass_up = 1; + } else if ((bcsp->rx_skb->data[1] & 0x0f) == 5 && + (bcsp->rx_skb->data[0] & 0x80)) { + hci_skb_pkt_type(bcsp->rx_skb) = HCI_EVENT_PKT; + pass_up = 1; + } else if ((bcsp->rx_skb->data[1] & 0x0f) == 7) { + hci_skb_pkt_type(bcsp->rx_skb) = HCI_SCODATA_PKT; + pass_up = 1; + } else if ((bcsp->rx_skb->data[1] & 0x0f) == 1 && + !(bcsp->rx_skb->data[0] & 0x80)) { + bcsp_handle_le_pkt(hu); + pass_up = 0; + } else { + pass_up = 0; + } + } + + if (pass_up == 0) { struct hci_event_hdr hdr; u8 desc = (bcsp->rx_skb->data[1] & 0x0f); @@ -547,11 +569,16 @@ static void bcsp_complete_rx_pkt(struct hci_uart *hu) } } else kfree_skb(bcsp->rx_skb); - } else { + } else if (pass_up == 1) { /* Pull out BCSP hdr */ skb_pull(bcsp->rx_skb, 4); hci_recv_frame(hu->hdev, bcsp->rx_skb); + } else { + /* ignore packet payload of already ACKed re-transmitted + * packets or when a packet was missed in the BCSP window + */ + kfree_skb(bcsp->rx_skb); } bcsp->rx_state = BCSP_W4_PKT_DELIMITER; @@ -597,16 +624,6 @@ static int bcsp_recv(struct hci_uart *hu, const void *data, int count) bcsp->rx_count = 0; continue; } - if (bcsp->rx_skb->data[0] & 0x80 && /* reliable pkt */ - (bcsp->rx_skb->data[0] & 0x07) != bcsp->rxseq_txack) { - BT_ERR("Out-of-order packet arrived, got %u expected %u", - bcsp->rx_skb->data[0] & 0x07, bcsp->rxseq_txack); - - kfree_skb(bcsp->rx_skb); - bcsp->rx_state = BCSP_W4_PKT_DELIMITER; - bcsp->rx_count = 0; - continue; - } bcsp->rx_state = BCSP_W4_DATA; bcsp->rx_count = (bcsp->rx_skb->data[1] >> 4) + (bcsp->rx_skb->data[2] << 4); /* May be 0 */ From 056506944a58814e5767d55ef6389aa2ab06908c Mon Sep 17 00:00:00 2001 From: Vignesh Raman Date: Fri, 23 Sep 2016 18:56:28 +0100 Subject: [PATCH 1473/1715] Bluetooth: Use single return in hci_uart_tty_ioctl() call Remove multiple return statements in hci_uart_tty_ioctl() call and added a single return statement. This code re-organisation allows subsequent locking to be easily added. Signed-off-by: Vignesh Raman Signed-off-by: Dean Jenkins Signed-off-by: Rajeev Kumar Signed-off-by: Marcel Holtmann --- drivers/bluetooth/hci_ldisc.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c index 9a3aab67b6bb..9497c469efd2 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c @@ -697,34 +697,36 @@ static int hci_uart_tty_ioctl(struct tty_struct *tty, struct file *file, case HCIUARTSETPROTO: if (!test_and_set_bit(HCI_UART_PROTO_SET, &hu->flags)) { err = hci_uart_set_proto(hu, arg); - if (err) { + if (err) clear_bit(HCI_UART_PROTO_SET, &hu->flags); - return err; - } } else - return -EBUSY; + err = -EBUSY; break; case HCIUARTGETPROTO: if (test_bit(HCI_UART_PROTO_SET, &hu->flags)) - return hu->proto->id; - return -EUNATCH; + err = hu->proto->id; + else + err = -EUNATCH; + break; case HCIUARTGETDEVICE: if (test_bit(HCI_UART_REGISTERED, &hu->flags)) - return hu->hdev->id; - return -EUNATCH; + err = hu->hdev->id; + else + err = -EUNATCH; + break; case HCIUARTSETFLAGS: if (test_bit(HCI_UART_PROTO_SET, &hu->flags)) - return -EBUSY; - err = hci_uart_set_flags(hu, arg); - if (err) - return err; + err = -EBUSY; + else + err = hci_uart_set_flags(hu, arg); break; case HCIUARTGETFLAGS: - return hu->hdev_flags; + err = hu->hdev_flags; + break; default: err = n_tty_ioctl_helper(tty, file, cmd, arg); From c9cc599a96a6822c52cd72ed31dd7f813d792b4f Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Thu, 22 Sep 2016 12:11:12 +0300 Subject: [PATCH 1474/1715] net/mlx4_core: Fix QUERY FUNC CAP flags Separate QUERY_FUNC_CAP flags0 from QUERY_FUNC_CAP flags, as 'flags' is already used for another set of flags in FUNC CAP, while phv bit should be part of a different set of flags. Remove QUERY_FUNC_CAP port_flags field, as it is not in use. Fixes: 77fc29c4bbbb ('net/mlx4_core: Preparations for 802.1ad VLAN support') Fixes: 5cc914f10851 ('mlx4_core: Added FW commands and their wrappers for supporting SRIOV') Signed-off-by: Moshe Shemesh Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/fw.c | 5 ++--- drivers/net/ethernet/mellanox/mlx4/fw.h | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index d728704d0c7b..c7523307b41a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -612,8 +612,7 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u8 gen_or_port, MLX4_GET(func_cap->phys_port_id, outbox, QUERY_FUNC_CAP_PHYS_PORT_ID); - MLX4_GET(field, outbox, QUERY_FUNC_CAP_FLAGS0_OFFSET); - func_cap->flags |= (field & QUERY_FUNC_CAP_PHV_BIT); + MLX4_GET(func_cap->flags0, outbox, QUERY_FUNC_CAP_FLAGS0_OFFSET); /* All other resources are allocated by the master, but we still report * 'num' and 'reserved' capabilities as follows: @@ -2914,7 +2913,7 @@ int get_phv_bit(struct mlx4_dev *dev, u8 port, int *phv) memset(&func_cap, 0, sizeof(func_cap)); err = mlx4_QUERY_FUNC_CAP(dev, port, &func_cap); if (!err) - *phv = func_cap.flags & QUERY_FUNC_CAP_PHV_BIT; + *phv = func_cap.flags0 & QUERY_FUNC_CAP_PHV_BIT; return err; } EXPORT_SYMBOL(get_phv_bit); diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index cdbd76f10ced..f11614f12517 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -152,7 +152,7 @@ struct mlx4_func_cap { u32 qp1_proxy_qpn; u32 reserved_lkey; u8 physical_port; - u8 port_flags; + u8 flags0; u8 flags1; u64 phys_port_id; u32 extra_flags; From 7c3d21c8153c6bfb5690e35e086b0522c42442d9 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Thu, 22 Sep 2016 12:11:13 +0300 Subject: [PATCH 1475/1715] net/mlx4_core: Preparation for VF vlan protocol 802.1ad Check device capability to support VF vlan protocol 802.1ad mode. Add vport attribute vlan protocol. Init vport vlan protocol by default to 802.1Q. Add update QP support for VF vlan protocol 802.1ad. Add func capability vlan_offload_disable to disable all vlan HW acceleration on VF while the VF is set to VF vlan protocol 802.1ad mode. No change in VF vlan protocol 802.1Q (VST) mode. Signed-off-by: Moshe Shemesh Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 7 ++++ drivers/net/ethernet/mellanox/mlx4/fw.c | 37 ++++++++++++++--- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 2 + .../ethernet/mellanox/mlx4/resource_tracker.c | 40 +++++++++++++++---- include/linux/mlx4/device.h | 3 ++ include/linux/mlx4/qp.h | 2 + 6 files changed, 78 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index a58d96cf1ed1..09c969420eac 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -1851,6 +1851,7 @@ static int mlx4_master_immediate_activate_vlan_qos(struct mlx4_priv *priv, if (vp_oper->state.default_vlan == vp_admin->default_vlan && vp_oper->state.default_qos == vp_admin->default_qos && + vp_oper->state.vlan_proto == vp_admin->vlan_proto && vp_oper->state.link_state == vp_admin->link_state && vp_oper->state.qos_vport == vp_admin->qos_vport) return 0; @@ -1909,6 +1910,7 @@ static int mlx4_master_immediate_activate_vlan_qos(struct mlx4_priv *priv, vp_oper->state.default_vlan = vp_admin->default_vlan; vp_oper->state.default_qos = vp_admin->default_qos; + vp_oper->state.vlan_proto = vp_admin->vlan_proto; vp_oper->state.link_state = vp_admin->link_state; vp_oper->state.qos_vport = vp_admin->qos_vport; @@ -1922,6 +1924,7 @@ static int mlx4_master_immediate_activate_vlan_qos(struct mlx4_priv *priv, work->qos_vport = vp_oper->state.qos_vport; work->vlan_id = vp_oper->state.default_vlan; work->vlan_ix = vp_oper->vlan_idx; + work->vlan_proto = vp_oper->state.vlan_proto; work->priv = priv; INIT_WORK(&work->work, mlx4_vf_immed_vlan_work_handler); queue_work(priv->mfunc.master.comm_wq, &work->work); @@ -2012,6 +2015,8 @@ static int mlx4_master_activate_admin_state(struct mlx4_priv *priv, int slave) vp_admin->default_vlan, &(vp_oper->vlan_idx)); if (err) { vp_oper->vlan_idx = NO_INDX; + vp_oper->state.default_vlan = MLX4_VGT; + vp_oper->state.vlan_proto = htons(ETH_P_8021Q); mlx4_warn(&priv->dev, "No vlan resources slave %d, port %d\n", slave, port); @@ -2388,6 +2393,8 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) admin_vport->qos_vport = MLX4_VPP_DEFAULT_VPORT; oper_vport->qos_vport = MLX4_VPP_DEFAULT_VPORT; + admin_vport->vlan_proto = htons(ETH_P_8021Q); + oper_vport->vlan_proto = htons(ETH_P_8021Q); vf_oper->vport[port].vlan_idx = NO_INDX; vf_oper->vport[port].mac_idx = NO_INDX; mlx4_set_random_admin_guid(dev, i, port); diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index c7523307b41a..7dc9d38a51f8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -158,7 +158,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) [31] = "Modifying loopback source checks using UPDATE_QP support", [32] = "Loopback source checks support", [33] = "RoCEv2 support", - [34] = "DMFS Sniffer support (UC & MC)" + [34] = "DMFS Sniffer support (UC & MC)", + [35] = "QinQ VST mode support", }; int i; @@ -313,12 +314,15 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, #define QUERY_FUNC_CAP_FLAGS0_FORCE_PHY_WQE_GID 0x80 #define QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS (1 << 31) #define QUERY_FUNC_CAP_PHV_BIT 0x40 +#define QUERY_FUNC_CAP_VLAN_OFFLOAD_DISABLE 0x20 if (vhcr->op_modifier == 1) { struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, slave); int converted_port = mlx4_slave_convert_port( dev, slave, vhcr->in_modifier); + struct mlx4_vport_oper_state *vp_oper = + &priv->mfunc.master.vf_oper[slave].vport[vhcr->in_modifier]; if (converted_port < 0) return -EINVAL; @@ -357,11 +361,12 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, MLX4_PUT(outbox->buf, dev->caps.phys_port_id[vhcr->in_modifier], QUERY_FUNC_CAP_PHYS_PORT_ID); - if (dev->caps.phv_bit[port]) { - field = QUERY_FUNC_CAP_PHV_BIT; - MLX4_PUT(outbox->buf, field, - QUERY_FUNC_CAP_FLAGS0_OFFSET); - } + field = 0; + if (dev->caps.phv_bit[port]) + field |= QUERY_FUNC_CAP_PHV_BIT; + if (vp_oper->state.vlan_proto == htons(ETH_P_8021AD)) + field |= QUERY_FUNC_CAP_VLAN_OFFLOAD_DISABLE; + MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS0_OFFSET); } else if (vhcr->op_modifier == 0) { struct mlx4_active_ports actv_ports = @@ -689,6 +694,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_MAX_DESC_SZ_SQ_OFFSET 0x52 #define QUERY_DEV_CAP_MAX_SG_RQ_OFFSET 0x55 #define QUERY_DEV_CAP_MAX_DESC_SZ_RQ_OFFSET 0x56 +#define QUERY_DEV_CAP_SVLAN_BY_QP_OFFSET 0x5D #define QUERY_DEV_CAP_MAX_QP_MCG_OFFSET 0x61 #define QUERY_DEV_CAP_RSVD_MCG_OFFSET 0x62 #define QUERY_DEV_CAP_MAX_MCG_OFFSET 0x63 @@ -856,6 +862,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) MLX4_GET(size, outbox, QUERY_DEV_CAP_MAX_DESC_SZ_SQ_OFFSET); dev_cap->max_sq_desc_sz = size; + MLX4_GET(field, outbox, QUERY_DEV_CAP_SVLAN_BY_QP_OFFSET); + if (field & 0x1) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_SVLAN_BY_QP; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_QP_MCG_OFFSET); dev_cap->max_qp_per_mcg = 1 << field; MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MCG_OFFSET); @@ -2937,6 +2946,22 @@ int set_phv_bit(struct mlx4_dev *dev, u8 port, int new_val) } EXPORT_SYMBOL(set_phv_bit); +int mlx4_get_is_vlan_offload_disabled(struct mlx4_dev *dev, u8 port, + bool *vlan_offload_disabled) +{ + struct mlx4_func_cap func_cap; + int err; + + memset(&func_cap, 0, sizeof(func_cap)); + err = mlx4_QUERY_FUNC_CAP(dev, port, &func_cap); + if (!err) + *vlan_offload_disabled = + !!(func_cap.flags0 & + QUERY_FUNC_CAP_VLAN_OFFLOAD_DISABLE); + return err; +} +EXPORT_SYMBOL(mlx4_get_is_vlan_offload_disabled); + void mlx4_replace_zero_macs(struct mlx4_dev *dev) { int i; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index c128ba3ef014..fdfe1ace07d7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -508,6 +508,7 @@ struct mlx4_vport_state { u64 mac; u16 default_vlan; u8 default_qos; + __be16 vlan_proto; u32 tx_rate; bool spoofchk; u32 link_state; @@ -657,6 +658,7 @@ struct mlx4_vf_immed_vlan_work { u8 qos_vport; u16 vlan_id; u16 orig_vlan_id; + __be16 vlan_proto; }; diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 8b81114bdc72..84d7857ccc27 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -790,10 +790,22 @@ static int update_vport_qp_param(struct mlx4_dev *dev, MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED | MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED; } else if (0 != vp_oper->state.default_vlan) { - qpc->pri_path.vlan_control |= - MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | - MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED | - MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED; + if (vp_oper->state.vlan_proto == htons(ETH_P_8021AD)) { + /* vst QinQ should block untagged on TX, + * but cvlan is in payload and phv is set so + * hw see it as untagged. Block tagged instead. + */ + qpc->pri_path.vlan_control |= + MLX4_VLAN_CTRL_ETH_TX_BLOCK_PRIO_TAGGED | + MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED; + } else { /* vst 802.1Q */ + qpc->pri_path.vlan_control |= + MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED; + } } else { /* priority tagged */ qpc->pri_path.vlan_control |= MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | @@ -802,7 +814,11 @@ static int update_vport_qp_param(struct mlx4_dev *dev, qpc->pri_path.fvl_rx |= MLX4_FVL_RX_FORCE_ETH_VLAN; qpc->pri_path.vlan_index = vp_oper->vlan_idx; - qpc->pri_path.fl |= MLX4_FL_CV | MLX4_FL_ETH_HIDE_CQE_VLAN; + qpc->pri_path.fl |= MLX4_FL_ETH_HIDE_CQE_VLAN; + if (vp_oper->state.vlan_proto == htons(ETH_P_8021AD)) + qpc->pri_path.fl |= MLX4_FL_SV; + else + qpc->pri_path.fl |= MLX4_FL_CV; qpc->pri_path.feup |= MLX4_FEUP_FORCE_ETH_UP | MLX4_FVL_FORCE_ETH_VLAN; qpc->pri_path.sched_queue &= 0xC7; qpc->pri_path.sched_queue |= (vp_oper->state.default_qos) << 3; @@ -5238,6 +5254,7 @@ void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work) u64 qp_path_mask = ((1ULL << MLX4_UPD_QP_PATH_MASK_VLAN_INDEX) | (1ULL << MLX4_UPD_QP_PATH_MASK_FVL) | (1ULL << MLX4_UPD_QP_PATH_MASK_CV) | + (1ULL << MLX4_UPD_QP_PATH_MASK_SV) | (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_HIDE_CQE_VLAN) | (1ULL << MLX4_UPD_QP_PATH_MASK_FEUP) | (1ULL << MLX4_UPD_QP_PATH_MASK_FVL_RX) | @@ -5266,7 +5283,12 @@ void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work) else if (!work->vlan_id) vlan_control = MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED; - else + else if (work->vlan_proto == htons(ETH_P_8021AD)) + vlan_control = MLX4_VLAN_CTRL_ETH_TX_BLOCK_PRIO_TAGGED | + MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED; + else /* vst 802.1Q */ vlan_control = MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED | MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED; @@ -5311,7 +5333,11 @@ void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work) upd_context->qp_context.pri_path.fvl_rx = qp->fvl_rx | MLX4_FVL_RX_FORCE_ETH_VLAN; upd_context->qp_context.pri_path.fl = - qp->pri_path_fl | MLX4_FL_CV | MLX4_FL_ETH_HIDE_CQE_VLAN; + qp->pri_path_fl | MLX4_FL_ETH_HIDE_CQE_VLAN; + if (work->vlan_proto == htons(ETH_P_8021AD)) + upd_context->qp_context.pri_path.fl |= MLX4_FL_SV; + else + upd_context->qp_context.pri_path.fl |= MLX4_FL_CV; upd_context->qp_context.pri_path.feup = qp->feup | MLX4_FEUP_FORCE_ETH_UP | MLX4_FVL_FORCE_ETH_VLAN; upd_context->qp_context.pri_path.sched_queue = diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 42da3552f7cb..59b50d3eedb4 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -221,6 +221,7 @@ enum { MLX4_DEV_CAP_FLAG2_ROCE_V1_V2 = 1ULL << 33, MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER = 1ULL << 34, MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT = 1ULL << 35, + MLX4_DEV_CAP_FLAG2_SVLAN_BY_QP = 1ULL << 36, }; enum { @@ -1371,6 +1372,8 @@ int mlx4_SET_PORT_fcs_check(struct mlx4_dev *dev, u8 port, int mlx4_SET_PORT_VXLAN(struct mlx4_dev *dev, u8 port, u8 steering, int enable); int set_phv_bit(struct mlx4_dev *dev, u8 port, int new_val); int get_phv_bit(struct mlx4_dev *dev, u8 port, int *phv); +int mlx4_get_is_vlan_offload_disabled(struct mlx4_dev *dev, u8 port, + bool *vlan_offload_disabled); int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx); int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx); int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index); diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index deaa2217214d..b4ee8f62ce8d 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -160,6 +160,7 @@ struct mlx4_qp_path { enum { /* fl */ MLX4_FL_CV = 1 << 6, + MLX4_FL_SV = 1 << 5, MLX4_FL_ETH_HIDE_CQE_VLAN = 1 << 2, MLX4_FL_ETH_SRC_CHECK_MC_LB = 1 << 1, MLX4_FL_ETH_SRC_CHECK_UC_LB = 1 << 0, @@ -267,6 +268,7 @@ enum { MLX4_UPD_QP_PATH_MASK_FVL_RX = 16 + 32, MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_UC_LB = 18 + 32, MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB = 19 + 32, + MLX4_UPD_QP_PATH_MASK_SV = 22 + 32, }; enum { /* param3 */ From 0815fe3a86a01cdf81361459c465761be7138665 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Thu, 22 Sep 2016 12:11:14 +0300 Subject: [PATCH 1476/1715] net/mlx4_en: Disable vlan HW acceleration when in VF vlan protocol 802.1ad mode In Ethernet VF, disable vlan HW acceleration on VF while it is set to VF vlan protocol 802.1ad mode. Signed-off-by: Moshe Shemesh Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 62516f8369ba..a94f8a3f026c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -3224,6 +3224,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, } if (mlx4_is_slave(mdev->dev)) { + bool vlan_offload_disabled; int phv; err = get_phv_bit(mdev->dev, port, &phv); @@ -3231,6 +3232,18 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, dev->hw_features |= NETIF_F_HW_VLAN_STAG_TX; priv->pflags |= MLX4_EN_PRIV_FLAGS_PHV; } + err = mlx4_get_is_vlan_offload_disabled(mdev->dev, port, + &vlan_offload_disabled); + if (!err && vlan_offload_disabled) { + dev->hw_features &= ~(NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_STAG_TX | + NETIF_F_HW_VLAN_STAG_RX); + dev->features &= ~(NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_STAG_TX | + NETIF_F_HW_VLAN_STAG_RX); + } } else { if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PHV_EN && !(mdev->dev->caps.flags2 & From 79aab093a0b5370d7fc4e99df75996f4744dc03f Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Thu, 22 Sep 2016 12:11:15 +0300 Subject: [PATCH 1477/1715] net: Update API for VF vlan protocol 802.1ad support Introduce new rtnl UAPI that exposes a list of vlans per VF, giving the ability for user-space application to specify it for the VF, as an option to support 802.1ad. We adjusted IP Link tool to support this option. For future use cases, the new UAPI supports multiple vlans. For now we limit the list size to a single vlan in kernel. Add IFLA_VF_VLAN_LIST in addition to IFLA_VF_VLAN to keep backward compatibility with older versions of IP Link tool. Add a vlan protocol parameter to the ndo_set_vf_vlan callback. We kept 802.1Q as the drivers' default vlan protocol. Suitable ip link tool command examples: Set vf vlan protocol 802.1ad: ip link set eth0 vf 1 vlan 100 proto 802.1ad Set vf to VST (802.1Q) mode: ip link set eth0 vf 1 vlan 100 proto 802.1Q Or by omitting the new parameter ip link set eth0 vf 1 vlan 100 Signed-off-by: Moshe Shemesh Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/bnx2x/bnx2x_cmn.h | 3 +- .../net/ethernet/broadcom/bnx2x/bnx2x_sriov.c | 9 ++- .../net/ethernet/broadcom/bnxt/bnxt_sriov.c | 6 +- .../net/ethernet/broadcom/bnxt/bnxt_sriov.h | 2 +- drivers/net/ethernet/emulex/benet/be_main.c | 6 +- drivers/net/ethernet/intel/fm10k/fm10k.h | 2 +- drivers/net/ethernet/intel/fm10k/fm10k_iov.c | 6 +- .../ethernet/intel/i40e/i40e_virtchnl_pf.c | 11 ++- .../ethernet/intel/i40e/i40e_virtchnl_pf.h | 4 +- drivers/net/ethernet/intel/igb/igb_main.c | 9 ++- .../net/ethernet/intel/ixgbe/ixgbe_sriov.c | 5 +- .../net/ethernet/intel/ixgbe/ixgbe_sriov.h | 2 +- .../net/ethernet/mellanox/mlx4/en_netdev.c | 6 +- .../net/ethernet/mellanox/mlx5/core/en_main.c | 6 +- drivers/net/ethernet/qlogic/qede/qede_main.c | 6 +- .../net/ethernet/qlogic/qlcnic/qlcnic_sriov.h | 2 +- .../ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c | 5 +- drivers/net/ethernet/sfc/sriov.c | 5 +- drivers/net/ethernet/sfc/sriov.h | 2 +- include/linux/if_link.h | 1 + include/linux/netdevice.h | 6 +- include/uapi/linux/if_link.h | 19 ++++- net/core/rtnetlink.c | 80 +++++++++++++++---- 23 files changed, 161 insertions(+), 42 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h index 0e68fadecfdb..243cb9748d35 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h @@ -492,7 +492,8 @@ int __bnx2x_setup_tc(struct net_device *dev, u32 handle, __be16 proto, int bnx2x_get_vf_config(struct net_device *dev, int vf, struct ifla_vf_info *ivi); int bnx2x_set_vf_mac(struct net_device *dev, int queue, u8 *mac); -int bnx2x_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos); +int bnx2x_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos, + __be16 vlan_proto); /* select_queue callback */ u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb, diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c index 6c586b045d1d..3f77d0863543 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -2521,7 +2521,8 @@ void bnx2x_pf_set_vfs_vlan(struct bnx2x *bp) for_each_vf(bp, vfidx) { bulletin = BP_VF_BULLETIN(bp, vfidx); if (bulletin->valid_bitmap & (1 << VLAN_VALID)) - bnx2x_set_vf_vlan(bp->dev, vfidx, bulletin->vlan, 0); + bnx2x_set_vf_vlan(bp->dev, vfidx, bulletin->vlan, 0, + htons(ETH_P_8021Q)); } } @@ -2781,7 +2782,8 @@ static int bnx2x_set_vf_vlan_filter(struct bnx2x *bp, struct bnx2x_virtf *vf, return 0; } -int bnx2x_set_vf_vlan(struct net_device *dev, int vfidx, u16 vlan, u8 qos) +int bnx2x_set_vf_vlan(struct net_device *dev, int vfidx, u16 vlan, u8 qos, + __be16 vlan_proto) { struct pf_vf_bulletin_content *bulletin = NULL; struct bnx2x *bp = netdev_priv(dev); @@ -2796,6 +2798,9 @@ int bnx2x_set_vf_vlan(struct net_device *dev, int vfidx, u16 vlan, u8 qos) return -EINVAL; } + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; + DP(BNX2X_MSG_IOV, "configuring VF %d with VLAN %d qos %d\n", vfidx, vlan, 0); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c index 8be718508600..ec6cd18842c3 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c @@ -174,7 +174,8 @@ int bnxt_set_vf_mac(struct net_device *dev, int vf_id, u8 *mac) return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); } -int bnxt_set_vf_vlan(struct net_device *dev, int vf_id, u16 vlan_id, u8 qos) +int bnxt_set_vf_vlan(struct net_device *dev, int vf_id, u16 vlan_id, u8 qos, + __be16 vlan_proto) { struct hwrm_func_cfg_input req = {0}; struct bnxt *bp = netdev_priv(dev); @@ -185,6 +186,9 @@ int bnxt_set_vf_vlan(struct net_device *dev, int vf_id, u16 vlan_id, u8 qos) if (bp->hwrm_spec_code < 0x10201) return -ENOTSUPP; + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; + rc = bnxt_vf_ndo_prep(bp, vf_id); if (rc) return rc; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h index 0392670ab49c..1ab72e4820af 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h @@ -12,7 +12,7 @@ int bnxt_get_vf_config(struct net_device *, int, struct ifla_vf_info *); int bnxt_set_vf_mac(struct net_device *, int, u8 *); -int bnxt_set_vf_vlan(struct net_device *, int, u16, u8); +int bnxt_set_vf_vlan(struct net_device *, int, u16, u8, __be16); int bnxt_set_vf_bw(struct net_device *, int, int, int); int bnxt_set_vf_link_state(struct net_device *, int, int); int bnxt_set_vf_spoofchk(struct net_device *, int, bool); diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 9a94840c5757..ac513e6627d1 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -1895,7 +1895,8 @@ static int be_clear_vf_tvt(struct be_adapter *adapter, int vf) return 0; } -static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos) +static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos, + __be16 vlan_proto) { struct be_adapter *adapter = netdev_priv(netdev); struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf]; @@ -1907,6 +1908,9 @@ static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos) if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7) return -EINVAL; + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; + if (vlan || qos) { vlan |= qos << VLAN_PRIO_SHIFT; status = be_set_vf_tvt(adapter, vf, vlan); diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h index 67ff01aeb11a..4d19e46f7c55 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k.h @@ -507,7 +507,7 @@ int fm10k_iov_configure(struct pci_dev *pdev, int num_vfs); s32 fm10k_iov_update_pvid(struct fm10k_intfc *interface, u16 glort, u16 pvid); int fm10k_ndo_set_vf_mac(struct net_device *netdev, int vf_idx, u8 *mac); int fm10k_ndo_set_vf_vlan(struct net_device *netdev, - int vf_idx, u16 vid, u8 qos); + int vf_idx, u16 vid, u8 qos, __be16 vlan_proto); int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx, int rate, int unused); int fm10k_ndo_get_vf_config(struct net_device *netdev, diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c index d9dec81f6b6d..5f4dac0d36ef 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c @@ -445,7 +445,7 @@ int fm10k_ndo_set_vf_mac(struct net_device *netdev, int vf_idx, u8 *mac) } int fm10k_ndo_set_vf_vlan(struct net_device *netdev, int vf_idx, u16 vid, - u8 qos) + u8 qos, __be16 vlan_proto) { struct fm10k_intfc *interface = netdev_priv(netdev); struct fm10k_iov_data *iov_data = interface->iov_data; @@ -460,6 +460,10 @@ int fm10k_ndo_set_vf_vlan(struct net_device *netdev, int vf_idx, u16 vid, if (qos || (vid > (VLAN_VID_MASK - 1))) return -EINVAL; + /* VF VLAN Protocol part to default is unsupported */ + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; + vf_info = &iov_data->vf_info[vf_idx]; /* exit if there is nothing to do */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index da3423561b3a..724d8740d4cc 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -2747,11 +2747,12 @@ error_param: * @vf_id: VF identifier * @vlan_id: mac address * @qos: priority setting + * @vlan_proto: vlan protocol * * program VF vlan id and/or qos **/ -int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, - int vf_id, u16 vlan_id, u8 qos) +int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id, + u16 vlan_id, u8 qos, __be16 vlan_proto) { u16 vlanprio = vlan_id | (qos << I40E_VLAN_PRIORITY_SHIFT); struct i40e_netdev_priv *np = netdev_priv(netdev); @@ -2774,6 +2775,12 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, goto error_pvid; } + if (vlan_proto != htons(ETH_P_8021Q)) { + dev_err(&pf->pdev->dev, "VF VLAN protocol is not supported\n"); + ret = -EPROTONOSUPPORT; + goto error_pvid; + } + vf = &(pf->vf[vf_id]); vsi = pf->vsi[vf->lan_vsi_idx]; if (!test_bit(I40E_VF_STAT_INIT, &vf->vf_states)) { diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h index 875174141451..4012d069939a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h @@ -129,8 +129,8 @@ void i40e_vc_notify_vf_reset(struct i40e_vf *vf); /* VF configuration related iplink handlers */ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac); -int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, - int vf_id, u16 vlan_id, u8 qos); +int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id, + u16 vlan_id, u8 qos, __be16 vlan_proto); int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate, int max_tx_rate); int i40e_ndo_set_vf_trust(struct net_device *netdev, int vf_id, bool setting); diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index af75eac5fa16..a83aa13a5bf4 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -169,7 +169,7 @@ static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *); static void igb_restore_vf_multicasts(struct igb_adapter *adapter); static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac); static int igb_ndo_set_vf_vlan(struct net_device *netdev, - int vf, u16 vlan, u8 qos); + int vf, u16 vlan, u8 qos, __be16 vlan_proto); static int igb_ndo_set_vf_bw(struct net_device *, int, int, int); static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting); @@ -6222,14 +6222,17 @@ static int igb_disable_port_vlan(struct igb_adapter *adapter, int vf) return 0; } -static int igb_ndo_set_vf_vlan(struct net_device *netdev, - int vf, u16 vlan, u8 qos) +static int igb_ndo_set_vf_vlan(struct net_device *netdev, int vf, + u16 vlan, u8 qos, __be16 vlan_proto) { struct igb_adapter *adapter = netdev_priv(netdev); if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7)) return -EINVAL; + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; + return (vlan || qos) ? igb_enable_port_vlan(adapter, vf, vlan, qos) : igb_disable_port_vlan(adapter, vf); } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index 8618599dfd6f..b18590a995db 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -1354,13 +1354,16 @@ static int ixgbe_disable_port_vlan(struct ixgbe_adapter *adapter, int vf) return err; } -int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos) +int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, + u8 qos, __be16 vlan_proto) { int err = 0; struct ixgbe_adapter *adapter = netdev_priv(netdev); if ((vf >= adapter->num_vfs) || (vlan > 4095) || (qos > 7)) return -EINVAL; + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; if (vlan || qos) { /* Check if there is already a port VLAN set, if so * we have to delete the old one first before we diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h index 47e65e2f886a..0c7977d27b71 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h @@ -43,7 +43,7 @@ void ixgbe_disable_tx_rx(struct ixgbe_adapter *adapter); void ixgbe_ping_all_vfs(struct ixgbe_adapter *adapter); int ixgbe_ndo_set_vf_mac(struct net_device *netdev, int queue, u8 *mac); int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int queue, u16 vlan, - u8 qos); + u8 qos, __be16 vlan_proto); int ixgbe_link_mbps(struct ixgbe_adapter *adapter); int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int min_tx_rate, int max_tx_rate); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index a94f8a3f026c..132eeeafcdc4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2400,11 +2400,15 @@ static int mlx4_en_set_vf_mac(struct net_device *dev, int queue, u8 *mac) return mlx4_set_vf_mac(mdev->dev, en_priv->port, queue, mac_u64); } -static int mlx4_en_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos) +static int mlx4_en_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos, + __be16 vlan_proto) { struct mlx4_en_priv *en_priv = netdev_priv(dev); struct mlx4_en_dev *mdev = en_priv->mdev; + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; + return mlx4_set_vf_vlan(mdev->dev, en_priv->port, vf, vlan, qos); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index c12792314be7..b58cfe37dead 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2917,11 +2917,15 @@ static int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac) return mlx5_eswitch_set_vport_mac(mdev->priv.eswitch, vf + 1, mac); } -static int mlx5e_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos) +static int mlx5e_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos, + __be16 vlan_proto) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; + return mlx5_eswitch_set_vport_vlan(mdev->priv.eswitch, vf + 1, vlan, qos); } diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index cd23a2946db7..0e198fe89d1a 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -100,7 +100,8 @@ static int qede_alloc_rx_buffer(struct qede_dev *edev, static void qede_link_update(void *dev, struct qed_link_output *link); #ifdef CONFIG_QED_SRIOV -static int qede_set_vf_vlan(struct net_device *ndev, int vf, u16 vlan, u8 qos) +static int qede_set_vf_vlan(struct net_device *ndev, int vf, u16 vlan, u8 qos, + __be16 vlan_proto) { struct qede_dev *edev = netdev_priv(ndev); @@ -109,6 +110,9 @@ static int qede_set_vf_vlan(struct net_device *ndev, int vf, u16 vlan, u8 qos) return -EINVAL; } + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; + DP_VERBOSE(edev, QED_MSG_IOV, "Setting Vlan 0x%04x to VF [%d]\n", vlan, vf); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h index 24061b9b92e8..5f327659efa7 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h @@ -238,7 +238,7 @@ int qlcnic_sriov_set_vf_mac(struct net_device *, int, u8 *); int qlcnic_sriov_set_vf_tx_rate(struct net_device *, int, int, int); int qlcnic_sriov_get_vf_config(struct net_device *, int , struct ifla_vf_info *); -int qlcnic_sriov_set_vf_vlan(struct net_device *, int, u16, u8); +int qlcnic_sriov_set_vf_vlan(struct net_device *, int, u16, u8, __be16); int qlcnic_sriov_set_vf_spoofchk(struct net_device *, int, bool); #else static inline void qlcnic_sriov_pf_disable(struct qlcnic_adapter *adapter) {} diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c index afd687e5e779..50eaafa3eaba 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c @@ -1915,7 +1915,7 @@ int qlcnic_sriov_set_vf_tx_rate(struct net_device *netdev, int vf, } int qlcnic_sriov_set_vf_vlan(struct net_device *netdev, int vf, - u16 vlan, u8 qos) + u16 vlan, u8 qos, __be16 vlan_proto) { struct qlcnic_adapter *adapter = netdev_priv(netdev); struct qlcnic_sriov *sriov = adapter->ahw->sriov; @@ -1928,6 +1928,9 @@ int qlcnic_sriov_set_vf_vlan(struct net_device *netdev, int vf, if (vf >= sriov->num_vfs || qos > 7) return -EINVAL; + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; + if (vlan > MAX_VLAN_ID) { netdev_err(netdev, "Invalid VLAN ID, allowed range is [0 - %d]\n", diff --git a/drivers/net/ethernet/sfc/sriov.c b/drivers/net/ethernet/sfc/sriov.c index 816c44689e67..9abcf4aded30 100644 --- a/drivers/net/ethernet/sfc/sriov.c +++ b/drivers/net/ethernet/sfc/sriov.c @@ -22,7 +22,7 @@ int efx_sriov_set_vf_mac(struct net_device *net_dev, int vf_i, u8 *mac) } int efx_sriov_set_vf_vlan(struct net_device *net_dev, int vf_i, u16 vlan, - u8 qos) + u8 qos, __be16 vlan_proto) { struct efx_nic *efx = netdev_priv(net_dev); @@ -31,6 +31,9 @@ int efx_sriov_set_vf_vlan(struct net_device *net_dev, int vf_i, u16 vlan, (qos & ~(VLAN_PRIO_MASK >> VLAN_PRIO_SHIFT))) return -EINVAL; + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; + return efx->type->sriov_set_vf_vlan(efx, vf_i, vlan, qos); } else { return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/sfc/sriov.h b/drivers/net/ethernet/sfc/sriov.h index 400df526586d..ba1762e7f216 100644 --- a/drivers/net/ethernet/sfc/sriov.h +++ b/drivers/net/ethernet/sfc/sriov.h @@ -16,7 +16,7 @@ int efx_sriov_set_vf_mac(struct net_device *net_dev, int vf_i, u8 *mac); int efx_sriov_set_vf_vlan(struct net_device *net_dev, int vf_i, u16 vlan, - u8 qos); + u8 qos, __be16 vlan_proto); int efx_sriov_set_vf_spoofchk(struct net_device *net_dev, int vf_i, bool spoofchk); int efx_sriov_get_vf_config(struct net_device *net_dev, int vf_i, diff --git a/include/linux/if_link.h b/include/linux/if_link.h index f923d15b432c..0b17c585b5cd 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -25,5 +25,6 @@ struct ifla_vf_info { __u32 max_tx_rate; __u32 rss_query_en; __u32 trusted; + __be16 vlan_proto; }; #endif /* _LINUX_IF_LINK_H */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 69f242c71865..1e8a5c734d72 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -946,7 +946,8 @@ struct netdev_xdp { * * SR-IOV management functions. * int (*ndo_set_vf_mac)(struct net_device *dev, int vf, u8* mac); - * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan, u8 qos); + * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan, + * u8 qos, __be16 proto); * int (*ndo_set_vf_rate)(struct net_device *dev, int vf, int min_tx_rate, * int max_tx_rate); * int (*ndo_set_vf_spoofchk)(struct net_device *dev, int vf, bool setting); @@ -1187,7 +1188,8 @@ struct net_device_ops { int (*ndo_set_vf_mac)(struct net_device *dev, int queue, u8 *mac); int (*ndo_set_vf_vlan)(struct net_device *dev, - int queue, u16 vlan, u8 qos); + int queue, u16 vlan, + u8 qos, __be16 proto); int (*ndo_set_vf_rate)(struct net_device *dev, int vf, int min_tx_rate, int max_tx_rate); diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 7ec9e99d5491..b4fba662cd32 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -619,7 +619,7 @@ enum { enum { IFLA_VF_UNSPEC, IFLA_VF_MAC, /* Hardware queue specific attributes */ - IFLA_VF_VLAN, + IFLA_VF_VLAN, /* VLAN ID and QoS */ IFLA_VF_TX_RATE, /* Max TX Bandwidth Allocation */ IFLA_VF_SPOOFCHK, /* Spoof Checking on/off switch */ IFLA_VF_LINK_STATE, /* link state enable/disable/auto switch */ @@ -631,6 +631,7 @@ enum { IFLA_VF_TRUST, /* Trust VF */ IFLA_VF_IB_NODE_GUID, /* VF Infiniband node GUID */ IFLA_VF_IB_PORT_GUID, /* VF Infiniband port GUID */ + IFLA_VF_VLAN_LIST, /* nested list of vlans, option for QinQ */ __IFLA_VF_MAX, }; @@ -647,6 +648,22 @@ struct ifla_vf_vlan { __u32 qos; }; +enum { + IFLA_VF_VLAN_INFO_UNSPEC, + IFLA_VF_VLAN_INFO, /* VLAN ID, QoS and VLAN protocol */ + __IFLA_VF_VLAN_INFO_MAX, +}; + +#define IFLA_VF_VLAN_INFO_MAX (__IFLA_VF_VLAN_INFO_MAX - 1) +#define MAX_VLAN_LIST_LEN 1 + +struct ifla_vf_vlan_info { + __u32 vf; + __u32 vlan; /* 0 - 4095, 0 disables VLAN filter */ + __u32 qos; + __be16 vlan_proto; /* VLAN protocol either 802.1Q or 802.1ad */ +}; + struct ifla_vf_tx_rate { __u32 vf; __u32 rate; /* Max TX bandwidth in Mbps, 0 disables throttling */ diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 0dbae4244a89..3ac8946bf244 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -843,7 +843,10 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev, size += nla_total_size(num_vfs * sizeof(struct nlattr)); size += num_vfs * (nla_total_size(sizeof(struct ifla_vf_mac)) + - nla_total_size(sizeof(struct ifla_vf_vlan)) + + nla_total_size(MAX_VLAN_LIST_LEN * + sizeof(struct nlattr)) + + nla_total_size(MAX_VLAN_LIST_LEN * + sizeof(struct ifla_vf_vlan_info)) + nla_total_size(sizeof(struct ifla_vf_spoofchk)) + nla_total_size(sizeof(struct ifla_vf_rate)) + nla_total_size(sizeof(struct ifla_vf_link_state)) + @@ -1111,14 +1114,15 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, struct nlattr *vfinfo) { struct ifla_vf_rss_query_en vf_rss_query_en; + struct nlattr *vf, *vfstats, *vfvlanlist; struct ifla_vf_link_state vf_linkstate; + struct ifla_vf_vlan_info vf_vlan_info; struct ifla_vf_spoofchk vf_spoofchk; struct ifla_vf_tx_rate vf_tx_rate; struct ifla_vf_stats vf_stats; struct ifla_vf_trust vf_trust; struct ifla_vf_vlan vf_vlan; struct ifla_vf_rate vf_rate; - struct nlattr *vf, *vfstats; struct ifla_vf_mac vf_mac; struct ifla_vf_info ivi; @@ -1135,11 +1139,14 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, * IFLA_VF_LINK_STATE_AUTO which equals zero */ ivi.linkstate = 0; + /* VLAN Protocol by default is 802.1Q */ + ivi.vlan_proto = htons(ETH_P_8021Q); if (dev->netdev_ops->ndo_get_vf_config(dev, vfs_num, &ivi)) return 0; vf_mac.vf = vf_vlan.vf = + vf_vlan_info.vf = vf_rate.vf = vf_tx_rate.vf = vf_spoofchk.vf = @@ -1150,6 +1157,9 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac)); vf_vlan.vlan = ivi.vlan; vf_vlan.qos = ivi.qos; + vf_vlan_info.vlan = ivi.vlan; + vf_vlan_info.qos = ivi.qos; + vf_vlan_info.vlan_proto = ivi.vlan_proto; vf_tx_rate.rate = ivi.max_tx_rate; vf_rate.min_tx_rate = ivi.min_tx_rate; vf_rate.max_tx_rate = ivi.max_tx_rate; @@ -1158,10 +1168,8 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, vf_rss_query_en.setting = ivi.rss_query_en; vf_trust.setting = ivi.trusted; vf = nla_nest_start(skb, IFLA_VF_INFO); - if (!vf) { - nla_nest_cancel(skb, vfinfo); - return -EMSGSIZE; - } + if (!vf) + goto nla_put_vfinfo_failure; if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) || nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) || nla_put(skb, IFLA_VF_RATE, sizeof(vf_rate), @@ -1177,17 +1185,23 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, &vf_rss_query_en) || nla_put(skb, IFLA_VF_TRUST, sizeof(vf_trust), &vf_trust)) - return -EMSGSIZE; + goto nla_put_vf_failure; + vfvlanlist = nla_nest_start(skb, IFLA_VF_VLAN_LIST); + if (!vfvlanlist) + goto nla_put_vf_failure; + if (nla_put(skb, IFLA_VF_VLAN_INFO, sizeof(vf_vlan_info), + &vf_vlan_info)) { + nla_nest_cancel(skb, vfvlanlist); + goto nla_put_vf_failure; + } + nla_nest_end(skb, vfvlanlist); memset(&vf_stats, 0, sizeof(vf_stats)); if (dev->netdev_ops->ndo_get_vf_stats) dev->netdev_ops->ndo_get_vf_stats(dev, vfs_num, &vf_stats); vfstats = nla_nest_start(skb, IFLA_VF_STATS); - if (!vfstats) { - nla_nest_cancel(skb, vf); - nla_nest_cancel(skb, vfinfo); - return -EMSGSIZE; - } + if (!vfstats) + goto nla_put_vf_failure; if (nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_PACKETS, vf_stats.rx_packets, IFLA_VF_STATS_PAD) || nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_PACKETS, @@ -1199,11 +1213,19 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, nla_put_u64_64bit(skb, IFLA_VF_STATS_BROADCAST, vf_stats.broadcast, IFLA_VF_STATS_PAD) || nla_put_u64_64bit(skb, IFLA_VF_STATS_MULTICAST, - vf_stats.multicast, IFLA_VF_STATS_PAD)) - return -EMSGSIZE; + vf_stats.multicast, IFLA_VF_STATS_PAD)) { + nla_nest_cancel(skb, vfstats); + goto nla_put_vf_failure; + } nla_nest_end(skb, vfstats); nla_nest_end(skb, vf); return 0; + +nla_put_vf_failure: + nla_nest_cancel(skb, vf); +nla_put_vfinfo_failure: + nla_nest_cancel(skb, vfinfo); + return -EMSGSIZE; } static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev) @@ -1448,6 +1470,7 @@ static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = { [IFLA_VF_MAC] = { .len = sizeof(struct ifla_vf_mac) }, [IFLA_VF_VLAN] = { .len = sizeof(struct ifla_vf_vlan) }, + [IFLA_VF_VLAN_LIST] = { .type = NLA_NESTED }, [IFLA_VF_TX_RATE] = { .len = sizeof(struct ifla_vf_tx_rate) }, [IFLA_VF_SPOOFCHK] = { .len = sizeof(struct ifla_vf_spoofchk) }, [IFLA_VF_RATE] = { .len = sizeof(struct ifla_vf_rate) }, @@ -1704,7 +1727,34 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) err = -EOPNOTSUPP; if (ops->ndo_set_vf_vlan) err = ops->ndo_set_vf_vlan(dev, ivv->vf, ivv->vlan, - ivv->qos); + ivv->qos, + htons(ETH_P_8021Q)); + if (err < 0) + return err; + } + + if (tb[IFLA_VF_VLAN_LIST]) { + struct ifla_vf_vlan_info *ivvl[MAX_VLAN_LIST_LEN]; + struct nlattr *attr; + int rem, len = 0; + + err = -EOPNOTSUPP; + if (!ops->ndo_set_vf_vlan) + return err; + + nla_for_each_nested(attr, tb[IFLA_VF_VLAN_LIST], rem) { + if (nla_type(attr) != IFLA_VF_VLAN_INFO || + nla_len(attr) < NLA_HDRLEN) { + return -EINVAL; + } + if (len >= MAX_VLAN_LIST_LEN) + return -EOPNOTSUPP; + ivvl[len] = nla_data(attr); + + len++; + } + err = ops->ndo_set_vf_vlan(dev, ivvl[0]->vf, ivvl[0]->vlan, + ivvl[0]->qos, ivvl[0]->vlan_proto); if (err < 0) return err; } From b42959dc35a533a531dd698b581193a65a5da831 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Thu, 22 Sep 2016 12:11:16 +0300 Subject: [PATCH 1478/1715] net/mlx4: Add VF vlan protocol 802.1ad support Move the vf to VST 802.1ad mode (mlx4 VST QinQ mode) by setting vf vlan protocol to 802.1ad. VST 802.1ad mode in mlx4, is used for STAG strip/insertion by PF, while the CTAG is set by the VF. Read current vlan protocol as part of the vf configuration state. Upon setting vf vlan protocol to 802.1ad, we use a mechanism of handshake to verify that both the vf and the pf driver version support it. The handshake uses the command QUERY_FUNC_CAP: - The vf sets a pre-defined support bit in input modifier. - A pf that supports the feature sends the request to the vf through a pre-defined field in the output mailbox. - In case vf does not support the feature, the pf will fail the control command (in this case, IP link tool command to set the vf vlan protocol to 802.1ad). No change in VST 802.1Q mode. Signed-off-by: Moshe Shemesh Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 51 ++++++++++- .../net/ethernet/mellanox/mlx4/en_netdev.c | 6 +- drivers/net/ethernet/mellanox/mlx4/fw.c | 89 ++++++++++++++++++- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 1 + include/linux/mlx4/cmd.h | 3 +- 5 files changed, 138 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 09c969420eac..b1cef7a0f7ca 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -1995,6 +1995,8 @@ static int mlx4_master_activate_admin_state(struct mlx4_priv *priv, int slave) int port, err; struct mlx4_vport_state *vp_admin; struct mlx4_vport_oper_state *vp_oper; + struct mlx4_slave_state *slave_state = + &priv->mfunc.master.slave_state[slave]; struct mlx4_active_ports actv_ports = mlx4_get_active_ports( &priv->dev, slave); int min_port = find_first_bit(actv_ports.ports, @@ -2009,7 +2011,19 @@ static int mlx4_master_activate_admin_state(struct mlx4_priv *priv, int slave) priv->mfunc.master.vf_admin[slave].enable_smi[port]; vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port]; - vp_oper->state = *vp_admin; + if (vp_admin->vlan_proto != htons(ETH_P_8021AD) || + slave_state->vst_qinq_supported) { + vp_oper->state.vlan_proto = vp_admin->vlan_proto; + vp_oper->state.default_vlan = vp_admin->default_vlan; + vp_oper->state.default_qos = vp_admin->default_qos; + } + vp_oper->state.link_state = vp_admin->link_state; + vp_oper->state.mac = vp_admin->mac; + vp_oper->state.spoofchk = vp_admin->spoofchk; + vp_oper->state.tx_rate = vp_admin->tx_rate; + vp_oper->state.qos_vport = vp_admin->qos_vport; + vp_oper->state.guid = vp_admin->guid; + if (MLX4_VGT != vp_admin->default_vlan) { err = __mlx4_register_vlan(&priv->dev, port, vp_admin->default_vlan, &(vp_oper->vlan_idx)); @@ -2097,6 +2111,7 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, mlx4_warn(dev, "Received reset from slave:%d\n", slave); slave_state[slave].active = false; slave_state[slave].old_vlan_api = false; + slave_state[slave].vst_qinq_supported = false; mlx4_master_deactivate_admin_state(priv, slave); for (i = 0; i < MLX4_EVENT_TYPES_NUM; ++i) { slave_state[slave].event_eq[i].eqn = -1; @@ -2364,6 +2379,7 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) vf_oper = &priv->mfunc.master.vf_oper[i]; s_state = &priv->mfunc.master.slave_state[i]; s_state->last_cmd = MLX4_COMM_CMD_RESET; + s_state->vst_qinq_supported = false; mutex_init(&priv->mfunc.master.gen_eqe_mutex[i]); for (j = 0; j < MLX4_EVENT_TYPES_NUM; ++j) s_state->event_eq[j].eqn = -1; @@ -2955,10 +2971,13 @@ int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u64 mac) EXPORT_SYMBOL_GPL(mlx4_set_vf_mac); -int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos) +int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos, + __be16 proto) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_vport_state *vf_admin; + struct mlx4_slave_state *slave_state; + struct mlx4_vport_oper_state *vf_oper; int slave; if ((!mlx4_is_master(dev)) || @@ -2968,12 +2987,31 @@ int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos) if ((vlan > 4095) || (qos > 7)) return -EINVAL; + if (proto == htons(ETH_P_8021AD) && + !(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SVLAN_BY_QP)) + return -EPROTONOSUPPORT; + + if (proto != htons(ETH_P_8021Q) && + proto != htons(ETH_P_8021AD)) + return -EINVAL; + + if ((proto == htons(ETH_P_8021AD)) && + ((vlan == 0) || (vlan == MLX4_VGT))) + return -EINVAL; + slave = mlx4_get_slave_indx(dev, vf); if (slave < 0) return -EINVAL; + slave_state = &priv->mfunc.master.slave_state[slave]; + if ((proto == htons(ETH_P_8021AD)) && (slave_state->active) && + (!slave_state->vst_qinq_supported)) { + mlx4_err(dev, "vf %d does not support VST QinQ mode\n", vf); + return -EPROTONOSUPPORT; + } port = mlx4_slaves_closest_port(dev, slave, port); vf_admin = &priv->mfunc.master.vf_admin[slave].vport[port]; + vf_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; if (!mlx4_valid_vf_state_change(dev, port, vf_admin, vlan, qos)) return -EPERM; @@ -2983,6 +3021,7 @@ int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos) else vf_admin->default_vlan = vlan; vf_admin->default_qos = qos; + vf_admin->vlan_proto = proto; /* If rate was configured prior to VST, we saved the configured rate * in vf_admin->rate and now, if priority supported we enforce the QoS @@ -2991,7 +3030,12 @@ int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos) vf_admin->tx_rate) vf_admin->qos_vport = slave; - if (mlx4_master_immediate_activate_vlan_qos(priv, slave, port)) + /* Try to activate new vf state without restart, + * this option is not supported while moving to VST QinQ mode. + */ + if ((proto == htons(ETH_P_8021AD) && + vf_oper->state.vlan_proto != proto) || + mlx4_master_immediate_activate_vlan_qos(priv, slave, port)) mlx4_info(dev, "updating vf %d port %d config will take effect on next VF restart\n", vf, port); @@ -3135,6 +3179,7 @@ int mlx4_get_vf_config(struct mlx4_dev *dev, int port, int vf, struct ifla_vf_in ivf->vlan = s_info->default_vlan; ivf->qos = s_info->default_qos; + ivf->vlan_proto = s_info->vlan_proto; if (mlx4_is_vf_vst_and_prio_qos(dev, port, s_info)) ivf->max_tx_rate = s_info->tx_rate; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 132eeeafcdc4..7e703bed7b82 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2406,10 +2406,8 @@ static int mlx4_en_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos, struct mlx4_en_priv *en_priv = netdev_priv(dev); struct mlx4_en_dev *mdev = en_priv->mdev; - if (vlan_proto != htons(ETH_P_8021Q)) - return -EPROTONOSUPPORT; - - return mlx4_set_vf_vlan(mdev->dev, en_priv->port, vf, vlan, qos); + return mlx4_set_vf_vlan(mdev->dev, en_priv->port, vf, vlan, qos, + vlan_proto); } static int mlx4_en_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate, diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 7dc9d38a51f8..090bf81076e8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -249,6 +249,72 @@ out: return err; } +static int mlx4_activate_vst_qinq(struct mlx4_priv *priv, int slave, int port) +{ + struct mlx4_vport_oper_state *vp_oper; + struct mlx4_vport_state *vp_admin; + int err; + + vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; + vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port]; + + if (vp_admin->default_vlan != vp_oper->state.default_vlan) { + err = __mlx4_register_vlan(&priv->dev, port, + vp_admin->default_vlan, + &vp_oper->vlan_idx); + if (err) { + vp_oper->vlan_idx = NO_INDX; + mlx4_warn(&priv->dev, + "No vlan resources slave %d, port %d\n", + slave, port); + return err; + } + mlx4_dbg(&priv->dev, "alloc vlan %d idx %d slave %d port %d\n", + (int)(vp_oper->state.default_vlan), + vp_oper->vlan_idx, slave, port); + } + vp_oper->state.vlan_proto = vp_admin->vlan_proto; + vp_oper->state.default_vlan = vp_admin->default_vlan; + vp_oper->state.default_qos = vp_admin->default_qos; + + return 0; +} + +static int mlx4_handle_vst_qinq(struct mlx4_priv *priv, int slave, int port) +{ + struct mlx4_vport_oper_state *vp_oper; + struct mlx4_slave_state *slave_state; + struct mlx4_vport_state *vp_admin; + int err; + + vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; + vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port]; + slave_state = &priv->mfunc.master.slave_state[slave]; + + if ((vp_admin->vlan_proto != htons(ETH_P_8021AD)) || + (!slave_state->active)) + return 0; + + if (vp_oper->state.vlan_proto == vp_admin->vlan_proto && + vp_oper->state.default_vlan == vp_admin->default_vlan && + vp_oper->state.default_qos == vp_admin->default_qos) + return 0; + + if (!slave_state->vst_qinq_supported) { + /* Warn and revert the request to set vst QinQ mode */ + vp_admin->vlan_proto = vp_oper->state.vlan_proto; + vp_admin->default_vlan = vp_oper->state.default_vlan; + vp_admin->default_qos = vp_oper->state.default_qos; + + mlx4_warn(&priv->dev, + "Slave %d does not support VST QinQ mode\n", slave); + return 0; + } + + err = mlx4_activate_vst_qinq(priv, slave, port); + return err; +} + int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, @@ -312,17 +378,18 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, #define QUERY_FUNC_CAP_VF_ENABLE_QP0 0x08 #define QUERY_FUNC_CAP_FLAGS0_FORCE_PHY_WQE_GID 0x80 -#define QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS (1 << 31) #define QUERY_FUNC_CAP_PHV_BIT 0x40 #define QUERY_FUNC_CAP_VLAN_OFFLOAD_DISABLE 0x20 +#define QUERY_FUNC_CAP_SUPPORTS_VST_QINQ BIT(30) +#define QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS BIT(31) + if (vhcr->op_modifier == 1) { struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, slave); int converted_port = mlx4_slave_convert_port( dev, slave, vhcr->in_modifier); - struct mlx4_vport_oper_state *vp_oper = - &priv->mfunc.master.vf_oper[slave].vport[vhcr->in_modifier]; + struct mlx4_vport_oper_state *vp_oper; if (converted_port < 0) return -EINVAL; @@ -361,6 +428,11 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, MLX4_PUT(outbox->buf, dev->caps.phys_port_id[vhcr->in_modifier], QUERY_FUNC_CAP_PHYS_PORT_ID); + vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; + err = mlx4_handle_vst_qinq(priv, slave, port); + if (err) + return err; + field = 0; if (dev->caps.phv_bit[port]) field |= QUERY_FUNC_CAP_PHV_BIT; @@ -371,6 +443,9 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, } else if (vhcr->op_modifier == 0) { struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, slave); + struct mlx4_slave_state *slave_state = + &priv->mfunc.master.slave_state[slave]; + /* enable rdma and ethernet interfaces, new quota locations, * and reserved lkey */ @@ -444,6 +519,10 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, size = dev->caps.reserved_lkey + ((slave << 8) & 0xFF00); MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP_RESD_LKEY_OFFSET); + + if (vhcr->in_modifier & QUERY_FUNC_CAP_SUPPORTS_VST_QINQ) + slave_state->vst_qinq_supported = true; + } else err = -EINVAL; @@ -459,10 +538,12 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u8 gen_or_port, u32 size, qkey; int err = 0, quotas = 0; u32 in_modifier; + u32 slave_caps; op_modifier = !!gen_or_port; /* 0 = general, 1 = logical port */ - in_modifier = op_modifier ? gen_or_port : + slave_caps = QUERY_FUNC_CAP_SUPPORTS_VST_QINQ | QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS; + in_modifier = op_modifier ? gen_or_port : slave_caps; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index fdfe1ace07d7..e4878f31e45d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -483,6 +483,7 @@ struct mlx4_slave_state { u8 init_port_mask; bool active; bool old_vlan_api; + bool vst_qinq_supported; u8 function; dma_addr_t vhcr_dma; u16 mtu[MLX4_MAX_PORTS + 1]; diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 116b284bc4ce..1f3568694a57 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -309,7 +309,8 @@ int mlx4_get_vf_stats(struct mlx4_dev *dev, int port, int vf_idx, struct ifla_vf_stats *vf_stats); u32 mlx4_comm_get_version(void); int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u64 mac); -int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos); +int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, + u8 qos, __be16 proto); int mlx4_set_vf_rate(struct mlx4_dev *dev, int port, int vf, int min_tx_rate, int max_tx_rate); int mlx4_set_vf_spoofchk(struct mlx4_dev *dev, int port, int vf, bool setting); From 4ce4862a815e3cee8040c9d91e2148aecbbf056e Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Fri, 23 Sep 2016 14:04:09 +0800 Subject: [PATCH 1479/1715] Documentation: devicetree: revise ethernet device-tree binding about TRGMII add phy-mode "trgmii" to Documentation/devicetree/bindings/net/ethernet.txt Cc: devicetree@vger.kernel.org Reported-by: Sergei Shtylyov Signed-off-by: Sean Wang Acked-by: Rob Herring Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/ethernet.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/net/ethernet.txt b/Documentation/devicetree/bindings/net/ethernet.txt index 5d88f37480b6..e1d76812419c 100644 --- a/Documentation/devicetree/bindings/net/ethernet.txt +++ b/Documentation/devicetree/bindings/net/ethernet.txt @@ -11,8 +11,8 @@ The following properties are common to the Ethernet controllers: the maximum frame size (there's contradiction in ePAPR). - phy-mode: string, operation mode of the PHY interface; supported values are "mii", "gmii", "sgmii", "qsgmii", "tbi", "rev-mii", "rmii", "rgmii", "rgmii-id", - "rgmii-rxid", "rgmii-txid", "rtbi", "smii", "xgmii"; this is now a de-facto - standard property; + "rgmii-rxid", "rgmii-txid", "rtbi", "smii", "xgmii", "trgmii"; this is now a + de-facto standard property; - phy-connection-type: the same as "phy-mode" property but described in ePAPR; - phy-handle: phandle, specifies a reference to a node representing a PHY device; this property is described in ePAPR and so preferred; From 7f8c2865a94a73308386627cd7556c17f03efb63 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Fri, 23 Sep 2016 14:09:32 +0800 Subject: [PATCH 1480/1715] Documentation: devicetree: fix typo in MediaTek ethernet device-tree binding fix typo in Documentation/devicetree/bindings/net/mediatek-net.txt Cc: devicetree@vger.kernel.org Reported-by: Sergei Shtylyov Signed-off-by: Sean Wang Acked-by: Rob Herring Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/mediatek-net.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/mediatek-net.txt b/Documentation/devicetree/bindings/net/mediatek-net.txt index 7111278adc7e..f09525772369 100644 --- a/Documentation/devicetree/bindings/net/mediatek-net.txt +++ b/Documentation/devicetree/bindings/net/mediatek-net.txt @@ -34,7 +34,7 @@ Required properties: - phy-handle: see ethernet.txt file in the same directory and the phy-mode "trgmii" required being provided when reg is equal to 0 and the MAC uses fixed-link to connect - with inernal switch such as MT7530. + with internal switch such as MT7530. Example: From faac0ff0a544eed6b8c9375c1104d692e4979540 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 23 Sep 2016 12:02:45 +0100 Subject: [PATCH 1481/1715] mlxsw: spectrum: remove redundant check if err is zero There is an earlier check and return if err is non-zero, so the check to see if it is zero is redundant in every iteration of the loop and hence the check can be removed. Signed-off-by: Colin Ian King Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 80f27b504444..fd74d1064ff3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1664,7 +1664,7 @@ static void __mlxsw_sp_port_get_stats(struct net_device *dev, return; mlxsw_sp_port_get_stats_raw(dev, grp, prio, ppcnt_pl); for (i = 0; i < len; i++) - data[data_index + i] = !err ? hw_stats[i].getter(ppcnt_pl) : 0; + data[data_index + i] = hw_stats[i].getter(ppcnt_pl); } static void mlxsw_sp_port_get_stats(struct net_device *dev, From b4e28c1fc9c7f3b7508b9a27d7c59a0da7b1f824 Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Fri, 23 Sep 2016 14:42:27 +0530 Subject: [PATCH 1482/1715] net: thunderx: Fix issue with IRQ namimg This patch fixes a regression caused by previous commit when irq name exceeds 20 byte array if interface's name size is large. Fixes: e412621394fa ("net: thunderx: Use netdev's name for naming VF's interrupts") Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h index 18d12d35039a..30426109711c 100644 --- a/drivers/net/ethernet/cavium/thunder/nic.h +++ b/drivers/net/ethernet/cavium/thunder/nic.h @@ -305,7 +305,7 @@ struct nicvf { bool msix_enabled; u8 num_vec; struct msix_entry msix_entries[NIC_VF_MSIX_VECTORS]; - char irq_name[NIC_VF_MSIX_VECTORS][20]; + char irq_name[NIC_VF_MSIX_VECTORS][IFNAMSIZ + 15]; bool irq_allocated[NIC_VF_MSIX_VECTORS]; cpumask_var_t affinity_mask[NIC_VF_MSIX_VECTORS]; From 2c204c2b9fca36aa24f7abe2e8bfd83fe3a8db8d Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Fri, 23 Sep 2016 14:42:28 +0530 Subject: [PATCH 1483/1715] net: thunderx: Support for byte queue limits This patch adds support for byte queue limits Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- .../net/ethernet/cavium/thunder/nicvf_main.c | 14 +++++++-- .../ethernet/cavium/thunder/nicvf_queues.c | 30 ++++++++++++------- 2 files changed, 32 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 7d00162a2f89..45a13f718863 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -516,7 +516,8 @@ static int nicvf_init_resources(struct nicvf *nic) static void nicvf_snd_pkt_handler(struct net_device *netdev, struct cmp_queue *cq, struct cqe_send_t *cqe_tx, - int cqe_type, int budget) + int cqe_type, int budget, + unsigned int *tx_pkts, unsigned int *tx_bytes) { struct sk_buff *skb = NULL; struct nicvf *nic = netdev_priv(netdev); @@ -547,6 +548,8 @@ static void nicvf_snd_pkt_handler(struct net_device *netdev, } nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1); prefetch(skb); + (*tx_pkts)++; + *tx_bytes += skb->len; napi_consume_skb(skb, budget); sq->skbuff[cqe_tx->sqe_ptr] = (u64)NULL; } else { @@ -662,6 +665,7 @@ static int nicvf_cq_intr_handler(struct net_device *netdev, u8 cq_idx, struct cmp_queue *cq = &qs->cq[cq_idx]; struct cqe_rx_t *cq_desc; struct netdev_queue *txq; + unsigned int tx_pkts = 0, tx_bytes = 0; spin_lock_bh(&cq->lock); loop: @@ -701,7 +705,7 @@ loop: case CQE_TYPE_SEND: nicvf_snd_pkt_handler(netdev, cq, (void *)cq_desc, CQE_TYPE_SEND, - budget); + budget, &tx_pkts, &tx_bytes); tx_done++; break; case CQE_TYPE_INVALID: @@ -730,6 +734,9 @@ done: netdev = nic->pnicvf->netdev; txq = netdev_get_tx_queue(netdev, nicvf_netdev_qidx(nic, cq_idx)); + if (tx_pkts) + netdev_tx_completed_queue(txq, tx_pkts, tx_bytes); + nic = nic->pnicvf; if (netif_tx_queue_stopped(txq) && netif_carrier_ok(netdev)) { netif_tx_start_queue(txq); @@ -1160,6 +1167,9 @@ int nicvf_stop(struct net_device *netdev) netif_tx_disable(netdev); + for (qidx = 0; qidx < netdev->num_tx_queues; qidx++) + netdev_tx_reset_queue(netdev_get_tx_queue(netdev, qidx)); + /* Free resources */ nicvf_config_data_transfer(nic, false); diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index 178c5c7b0994..a4fc50155881 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -1082,6 +1082,24 @@ static inline void nicvf_sq_add_cqe_subdesc(struct snd_queue *sq, int qentry, imm->len = 1; } +static inline void nicvf_sq_doorbell(struct nicvf *nic, struct sk_buff *skb, + int sq_num, int desc_cnt) +{ + struct netdev_queue *txq; + + txq = netdev_get_tx_queue(nic->pnicvf->netdev, + skb_get_queue_mapping(skb)); + + netdev_tx_sent_queue(txq, skb->len); + + /* make sure all memory stores are done before ringing doorbell */ + smp_wmb(); + + /* Inform HW to xmit all TSO segments */ + nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR, + sq_num, desc_cnt); +} + /* Segment a TSO packet into 'gso_size' segments and append * them to SQ for transfer */ @@ -1141,12 +1159,8 @@ static int nicvf_sq_append_tso(struct nicvf *nic, struct snd_queue *sq, /* Save SKB in the last segment for freeing */ sq->skbuff[hdr_qentry] = (u64)skb; - /* make sure all memory stores are done before ringing doorbell */ - smp_wmb(); + nicvf_sq_doorbell(nic, skb, sq_num, desc_cnt); - /* Inform HW to xmit all TSO segments */ - nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR, - sq_num, desc_cnt); nic->drv_stats.tx_tso++; return 1; } @@ -1219,12 +1233,8 @@ doorbell: nicvf_sq_add_cqe_subdesc(sq, qentry, tso_sqe, skb); } - /* make sure all memory stores are done before ringing doorbell */ - smp_wmb(); + nicvf_sq_doorbell(nic, skb, sq_num, subdesc_cnt); - /* Inform HW to xmit new packet */ - nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR, - sq_num, subdesc_cnt); return 1; append_fail: From c6a77ff82fb849534748719f37f3f9086d78ed39 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 23 Sep 2016 17:08:17 -0700 Subject: [PATCH 1484/1715] hv_netvsc: fix comments Typo's and spelling errors. Also remove old comment from staging era. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 7130bf910f52..f4fbcb5aa24a 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -433,7 +433,7 @@ struct nvsp_1_message_revoke_send_buffer { */ struct nvsp_1_message_send_rndis_packet { /* - * This field is specified by RNIDS. They assume there's two different + * This field is specified by RNDIS. They assume there's two different * channels of communication. However, the Network VSP only has one. * Therefore, the channel travels with the RNDIS packet. */ @@ -578,7 +578,7 @@ struct nvsp_5_send_indirect_table { /* The number of entries in the send indirection table */ u32 count; - /* The offset of the send indireciton table from top of this struct. + /* The offset of the send indirection table from top of this struct. * The send indirection table tells which channel to put the send * traffic on. Each entry is a channel number. */ @@ -734,7 +734,6 @@ struct netvsc_device { struct nvsp_message channel_init_pkt; struct nvsp_message revoke_packet; - /* unsigned char HwMacAddr[HW_MACADDR_LEN]; */ struct vmbus_channel *chn_table[VRSS_CHANNEL_MAX]; u32 send_table[VRSS_SEND_TAB_SIZE]; @@ -1239,7 +1238,7 @@ struct rndis_message { u32 ndis_msg_type; /* Total length of this message, from the beginning */ - /* of the sruct rndis_message, in bytes. */ + /* of the struct rndis_message, in bytes. */ u32 msg_len; /* Actual message */ From 805b21b929e29192fb5de16154f616bfc1116e3e Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 24 Sep 2016 18:05:26 +0100 Subject: [PATCH 1485/1715] rxrpc: Send an ACK after every few DATA packets we receive Send an ACK if we haven't sent one for the last two packets we've received. This keeps the other end apprised of where we've got to - which is important if they're doing slow-start. We do this in recvmsg so that we can dispatch a packet directly without the need to wake up the background thread. This should possibly be made configurable in future. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 3 +++ net/rxrpc/misc.c | 1 + net/rxrpc/output.c | 25 +++++++++++++++++-------- net/rxrpc/recvmsg.c | 13 ++++++++++++- 4 files changed, 33 insertions(+), 9 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 042dbcc52654..e3bf9c0e3ad1 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -533,6 +533,8 @@ struct rxrpc_call { u16 ackr_skew; /* skew on packet being ACK'd */ rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */ rxrpc_seq_t ackr_prev_seq; /* previous sequence number received */ + rxrpc_seq_t ackr_consumed; /* Highest packet shown consumed */ + rxrpc_seq_t ackr_seen; /* Highest packet shown seen */ rxrpc_serial_t ackr_ping; /* Last ping sent */ ktime_t ackr_ping_time; /* Time last ping sent */ @@ -695,6 +697,7 @@ enum rxrpc_propose_ack_trace { rxrpc_propose_ack_respond_to_ack, rxrpc_propose_ack_respond_to_ping, rxrpc_propose_ack_retry_tx, + rxrpc_propose_ack_rotate_rx, rxrpc_propose_ack_terminal_ack, rxrpc_propose_ack__nr_trace }; diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 1ca14835d87f..a473fd7dabaa 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -202,6 +202,7 @@ const char rxrpc_propose_ack_traces[rxrpc_propose_ack__nr_trace][8] = { [rxrpc_propose_ack_respond_to_ack] = "Rsp2Ack", [rxrpc_propose_ack_respond_to_ping] = "Rsp2Png", [rxrpc_propose_ack_retry_tx] = "RetryTx", + [rxrpc_propose_ack_rotate_rx] = "RxAck ", [rxrpc_propose_ack_terminal_ack] = "ClTerm ", }; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 0c563e325c9d..3eb01445e814 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -36,7 +36,9 @@ struct rxrpc_pkt_buffer { * Fill out an ACK packet. */ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, - struct rxrpc_pkt_buffer *pkt) + struct rxrpc_pkt_buffer *pkt, + rxrpc_seq_t *_hard_ack, + rxrpc_seq_t *_top) { rxrpc_serial_t serial; rxrpc_seq_t hard_ack, top, seq; @@ -48,6 +50,8 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, serial = call->ackr_serial; hard_ack = READ_ONCE(call->rx_hard_ack); top = smp_load_acquire(&call->rx_top); + *_hard_ack = hard_ack; + *_top = top; pkt->ack.bufferSpace = htons(8); pkt->ack.maxSkew = htons(call->ackr_skew); @@ -96,6 +100,7 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) struct msghdr msg; struct kvec iov[2]; rxrpc_serial_t serial; + rxrpc_seq_t hard_ack, top; size_t len, n; bool ping = false; int ioc, ret; @@ -146,7 +151,7 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) goto out; } ping = (call->ackr_reason == RXRPC_ACK_PING); - n = rxrpc_fill_out_ack(call, pkt); + n = rxrpc_fill_out_ack(call, pkt, &hard_ack, &top); call->ackr_reason = 0; spin_unlock_bh(&call->lock); @@ -203,18 +208,22 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) if (ping) call->ackr_ping_time = ktime_get_real(); - if (ret < 0 && call->state < RXRPC_CALL_COMPLETE) { - switch (type) { - case RXRPC_PACKET_TYPE_ACK: + if (type == RXRPC_PACKET_TYPE_ACK && + call->state < RXRPC_CALL_COMPLETE) { + if (ret < 0) { clear_bit(RXRPC_CALL_PINGING, &call->flags); rxrpc_propose_ACK(call, pkt->ack.reason, ntohs(pkt->ack.maxSkew), ntohl(pkt->ack.serial), true, true, rxrpc_propose_ack_retry_tx); - break; - case RXRPC_PACKET_TYPE_ABORT: - break; + } else { + spin_lock_bh(&call->lock); + if (after(hard_ack, call->ackr_consumed)) + call->ackr_consumed = hard_ack; + if (after(top, call->ackr_seen)) + call->ackr_seen = top; + spin_unlock_bh(&call->lock); } } diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 8c7f3de45bac..a7458c398b9e 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -201,8 +201,19 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) _debug("%u,%u,%02x", hard_ack, top, flags); trace_rxrpc_receive(call, rxrpc_receive_rotate, serial, hard_ack); - if (flags & RXRPC_LAST_PACKET) + if (flags & RXRPC_LAST_PACKET) { rxrpc_end_rx_phase(call); + } else { + /* Check to see if there's an ACK that needs sending. */ + if (after_eq(hard_ack, call->ackr_consumed + 2) || + after_eq(top, call->ackr_seen + 2) || + (hard_ack == top && after(hard_ack, call->ackr_consumed))) + rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, 0, serial, + true, false, + rxrpc_propose_ack_rotate_rx); + if (call->ackr_reason) + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); + } } /* From 50f4c7b73f831a53fa9ddeb9bdf4cfb5b23d3aa7 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Wed, 7 Sep 2016 10:40:24 +0800 Subject: [PATCH 1486/1715] netfilter: xt_TCPMSS: Refactor the codes to decrease one condition check and more readable The origin codes perform two condition checks with dst_mtu(skb_dst(skb)) and in_mtu. And the last statement is "min(dst_mtu(skb_dst(skb)), in_mtu) - minlen". It may let reader think about how about the result. Would it be negative. Now assign the result of min(dst_mtu(skb_dst(skb)), in_mtu) to a new variable, then only perform one condition check, and it is more readable. Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_TCPMSS.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index e118397254af..872db2d0e2a9 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -110,18 +110,14 @@ tcpmss_mangle_packet(struct sk_buff *skb, if (info->mss == XT_TCPMSS_CLAMP_PMTU) { struct net *net = par->net; unsigned int in_mtu = tcpmss_reverse_mtu(net, skb, family); + unsigned int min_mtu = min(dst_mtu(skb_dst(skb)), in_mtu); - if (dst_mtu(skb_dst(skb)) <= minlen) { + if (min_mtu <= minlen) { net_err_ratelimited("unknown or invalid path-MTU (%u)\n", - dst_mtu(skb_dst(skb))); + min_mtu); return -1; } - if (in_mtu <= minlen) { - net_err_ratelimited("unknown or invalid path-MTU (%u)\n", - in_mtu); - return -1; - } - newmss = min(dst_mtu(skb_dst(skb)), in_mtu) - minlen; + newmss = min_mtu - minlen; } else newmss = info->mss; From c5136b15ea364124299c8a9ba96b300e96061e3a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 21 Sep 2016 11:35:01 -0400 Subject: [PATCH 1487/1715] netfilter: bridge: add and use br_nf_hook_thresh This replaces the last uses of NF_HOOK_THRESH(). Followup patch will remove it and rename nf_hook_thresh. The reason is that inet (non-bridge) netfilter no longer invokes the hooks from hooks, so we do no longer need the thresh value to skip hooks with a lower priority. The bridge netfilter however may need to do this. br_nf_hook_thresh is a wrapper that is supposed to do this, i.e. only call hooks with a priority that exceeds NF_BR_PRI_BRNF. It's used only in the recursion cases of br_netfilter. It invokes nf_hook_slow while holding an rcu read-side critical section to make a future cleanup simpler. Signed-off-by: Florian Westphal Signed-off-by: Aaron Conole Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/br_netfilter.h | 6 +++ net/bridge/br_netfilter_hooks.c | 60 +++++++++++++++++++++++----- net/bridge/br_netfilter_ipv6.c | 12 +++--- 3 files changed, 62 insertions(+), 16 deletions(-) diff --git a/include/net/netfilter/br_netfilter.h b/include/net/netfilter/br_netfilter.h index e8d1448425a7..0b0c35c37125 100644 --- a/include/net/netfilter/br_netfilter.h +++ b/include/net/netfilter/br_netfilter.h @@ -15,6 +15,12 @@ static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb) void nf_bridge_update_protocol(struct sk_buff *skb); +int br_nf_hook_thresh(unsigned int hook, struct net *net, struct sock *sk, + struct sk_buff *skb, struct net_device *indev, + struct net_device *outdev, + int (*okfn)(struct net *, struct sock *, + struct sk_buff *)); + static inline struct nf_bridge_info * nf_bridge_info_get(const struct sk_buff *skb) { diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 77e7f69bf80d..6029af47377d 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -395,11 +396,10 @@ bridged_dnat: skb->dev = nf_bridge->physindev; nf_bridge_update_protocol(skb); nf_bridge_push_encap_header(skb); - NF_HOOK_THRESH(NFPROTO_BRIDGE, - NF_BR_PRE_ROUTING, - net, sk, skb, skb->dev, NULL, - br_nf_pre_routing_finish_bridge, - 1); + br_nf_hook_thresh(NF_BR_PRE_ROUTING, + net, sk, skb, skb->dev, + NULL, + br_nf_pre_routing_finish); return 0; } ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr); @@ -417,10 +417,8 @@ bridged_dnat: skb->dev = nf_bridge->physindev; nf_bridge_update_protocol(skb); nf_bridge_push_encap_header(skb); - NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, net, sk, skb, - skb->dev, NULL, - br_handle_frame_finish, 1); - + br_nf_hook_thresh(NF_BR_PRE_ROUTING, net, sk, skb, skb->dev, NULL, + br_handle_frame_finish); return 0; } @@ -992,6 +990,50 @@ static struct notifier_block brnf_notifier __read_mostly = { .notifier_call = brnf_device_event, }; +/* recursively invokes nf_hook_slow (again), skipping already-called + * hooks (< NF_BR_PRI_BRNF). + * + * Called with rcu read lock held. + */ +int br_nf_hook_thresh(unsigned int hook, struct net *net, + struct sock *sk, struct sk_buff *skb, + struct net_device *indev, + struct net_device *outdev, + int (*okfn)(struct net *, struct sock *, + struct sk_buff *)) +{ + struct nf_hook_ops *elem; + struct nf_hook_state state; + struct list_head *head; + int ret; + + head = &net->nf.hooks[NFPROTO_BRIDGE][hook]; + + list_for_each_entry_rcu(elem, head, list) { + struct nf_hook_ops *next; + + next = list_entry_rcu(list_next_rcu(&elem->list), + struct nf_hook_ops, list); + if (next->priority <= NF_BR_PRI_BRNF) + continue; + } + + if (&elem->list == head) + return okfn(net, sk, skb); + + /* We may already have this, but read-locks nest anyway */ + rcu_read_lock(); + nf_hook_state_init(&state, head, hook, NF_BR_PRI_BRNF + 1, + NFPROTO_BRIDGE, indev, outdev, sk, net, okfn); + + ret = nf_hook_slow(skb, &state); + rcu_read_unlock(); + if (ret == 1) + ret = okfn(net, sk, skb); + + return ret; +} + #ifdef CONFIG_SYSCTL static int brnf_sysctl_call_tables(struct ctl_table *ctl, int write, diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c index 5e59a8457e7b..5989661c659f 100644 --- a/net/bridge/br_netfilter_ipv6.c +++ b/net/bridge/br_netfilter_ipv6.c @@ -187,10 +187,9 @@ static int br_nf_pre_routing_finish_ipv6(struct net *net, struct sock *sk, struc skb->dev = nf_bridge->physindev; nf_bridge_update_protocol(skb); nf_bridge_push_encap_header(skb); - NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, - net, sk, skb, skb->dev, NULL, - br_nf_pre_routing_finish_bridge, - 1); + br_nf_hook_thresh(NF_BR_PRE_ROUTING, + net, sk, skb, skb->dev, NULL, + br_nf_pre_routing_finish_bridge); return 0; } ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr); @@ -207,9 +206,8 @@ static int br_nf_pre_routing_finish_ipv6(struct net *net, struct sock *sk, struc skb->dev = nf_bridge->physindev; nf_bridge_update_protocol(skb); nf_bridge_push_encap_header(skb); - NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, net, sk, skb, - skb->dev, NULL, - br_handle_frame_finish, 1); + br_nf_hook_thresh(NF_BR_PRE_ROUTING, net, sk, skb, + skb->dev, NULL, br_handle_frame_finish); return 0; } From fe72926b792e52ab00abfa81a201805bfb2247d6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 21 Sep 2016 11:35:02 -0400 Subject: [PATCH 1488/1715] netfilter: call nf_hook_state_init with rcu_read_lock held This makes things simpler because we can store the head of the list in the nf_state structure without worrying about concurrent add/delete of hook elements from the list. A future commit will make use of this to implement a simpler linked-list. Signed-off-by: Florian Westphal Signed-off-by: Aaron Conole Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 8 +++++++- include/linux/netfilter_ingress.h | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 9230f9aee896..ad444f0b4ed0 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -174,10 +174,16 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, if (!list_empty(hook_list)) { struct nf_hook_state state; + int ret; + /* We may already have this, but read-locks nest anyway */ + rcu_read_lock(); nf_hook_state_init(&state, hook_list, hook, thresh, pf, indev, outdev, sk, net, okfn); - return nf_hook_slow(skb, &state); + + ret = nf_hook_slow(skb, &state); + rcu_read_unlock(); + return ret; } return 1; } diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h index 5fcd375ef175..6965ba09eba7 100644 --- a/include/linux/netfilter_ingress.h +++ b/include/linux/netfilter_ingress.h @@ -14,6 +14,7 @@ static inline bool nf_hook_ingress_active(const struct sk_buff *skb) return !list_empty(&skb->dev->nf_hooks_ingress); } +/* caller must hold rcu_read_lock */ static inline int nf_hook_ingress(struct sk_buff *skb) { struct nf_hook_state state; From 2c1e2703ff812ccaa42a4bc8a25803955e342b85 Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Wed, 21 Sep 2016 11:35:03 -0400 Subject: [PATCH 1489/1715] netfilter: call nf_hook_ingress with rcu_read_lock This commit ensures that the rcu read-side lock is held while the ingress hook is called. This ensures that a call to nf_hook_slow (and ultimately nf_ingress) will be read protected. Signed-off-by: Aaron Conole Signed-off-by: Pablo Neira Ayuso --- net/core/dev.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 34b5322bc081..064919425b7d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4040,12 +4040,17 @@ static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev, { #ifdef CONFIG_NETFILTER_INGRESS if (nf_hook_ingress_active(skb)) { + int ingress_retval; + if (*pt_prev) { *ret = deliver_skb(skb, *pt_prev, orig_dev); *pt_prev = NULL; } - return nf_hook_ingress(skb); + rcu_read_lock(); + ingress_retval = nf_hook_ingress(skb); + rcu_read_unlock(); + return ingress_retval; } #endif /* CONFIG_NETFILTER_INGRESS */ return 0; From e2361cb90a0327bdab34d01d1a7b9dbd67c31e60 Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Wed, 21 Sep 2016 11:35:04 -0400 Subject: [PATCH 1490/1715] netfilter: Remove explicit rcu_read_lock in nf_hook_slow All of the callers of nf_hook_slow already hold the rcu_read_lock, so this cleanup removes the recursive call. This is just a cleanup, as the locking code gracefully handles this situation. Signed-off-by: Aaron Conole Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebt_redirect.c | 2 +- net/bridge/netfilter/ebtables.c | 2 +- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 2 +- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 2 +- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 2 +- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 2 +- net/netfilter/core.c | 6 +----- net/netfilter/nf_conntrack_core.c | 2 +- net/netfilter/nf_conntrack_h323_main.c | 2 +- net/netfilter/nf_conntrack_helper.c | 2 +- net/netfilter/nfnetlink_cthelper.c | 2 +- net/netfilter/nfnetlink_log.c | 8 ++++++-- net/netfilter/nfnetlink_queue.c | 2 +- net/netfilter/xt_helper.c | 2 +- 14 files changed, 19 insertions(+), 19 deletions(-) diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c index 203964997a51..2e7c4f974340 100644 --- a/net/bridge/netfilter/ebt_redirect.c +++ b/net/bridge/netfilter/ebt_redirect.c @@ -24,7 +24,7 @@ ebt_redirect_tg(struct sk_buff *skb, const struct xt_action_param *par) return EBT_DROP; if (par->hooknum != NF_BR_BROUTING) - /* rcu_read_lock()ed by nf_hook_slow */ + /* rcu_read_lock()ed by nf_hook_thresh */ ether_addr_copy(eth_hdr(skb)->h_dest, br_port_get_rcu(par->in)->br->dev->dev_addr); else diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index cceac5bb658f..dd7133216c9c 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -146,7 +146,7 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb, return 1; if (NF_INVF(e, EBT_IOUT, ebt_dev_check(e->out, out))) return 1; - /* rcu_read_lock()ed by nf_hook_slow */ + /* rcu_read_lock()ed by nf_hook_thresh */ if (in && (p = br_port_get_rcu(in)) != NULL && NF_INVF(e, EBT_ILOGICALIN, ebt_dev_check(e->logical_in, p->br->dev))) diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 870aebda2932..713c09a74b90 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -110,7 +110,7 @@ static unsigned int ipv4_helper(void *priv, if (!help) return NF_ACCEPT; - /* rcu_read_lock()ed by nf_hook_slow */ + /* rcu_read_lock()ed by nf_hook_thresh */ helper = rcu_dereference(help->helper); if (!helper) return NF_ACCEPT; diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 4b5904bc2614..d075b3cf2400 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -149,7 +149,7 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, return -NF_ACCEPT; } - /* rcu_read_lock()ed by nf_hook_slow */ + /* rcu_read_lock()ed by nf_hook_thresh */ innerproto = __nf_ct_l4proto_find(PF_INET, origtuple.dst.protonum); /* Ordinarily, we'd expect the inverted tupleproto, but it's diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 1aa5848764a7..963ee3848675 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -115,7 +115,7 @@ static unsigned int ipv6_helper(void *priv, help = nfct_help(ct); if (!help) return NF_ACCEPT; - /* rcu_read_lock()ed by nf_hook_slow */ + /* rcu_read_lock()ed by nf_hook_thresh */ helper = rcu_dereference(help->helper); if (!helper) return NF_ACCEPT; diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 660bc10c7a9c..f5a61bc3ec2b 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -165,7 +165,7 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl, return -NF_ACCEPT; } - /* rcu_read_lock()ed by nf_hook_slow */ + /* rcu_read_lock()ed by nf_hook_thresh */ inproto = __nf_ct_l4proto_find(PF_INET6, origtuple.dst.protonum); /* Ordinarily, we'd expect the inverted tupleproto, but it's diff --git a/net/netfilter/core.c b/net/netfilter/core.c index f39276d1c2d7..c8faf8102394 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -291,16 +291,13 @@ repeat: /* Returns 1 if okfn() needs to be executed by the caller, - * -EPERM for NF_DROP, 0 otherwise. */ + * -EPERM for NF_DROP, 0 otherwise. Caller must hold rcu_read_lock. */ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state) { struct nf_hook_ops *elem; unsigned int verdict; int ret = 0; - /* We may already have this, but read-locks nest anyway */ - rcu_read_lock(); - elem = list_entry_rcu(state->hook_list, struct nf_hook_ops, list); next_hook: verdict = nf_iterate(state->hook_list, skb, state, &elem); @@ -321,7 +318,6 @@ next_hook: kfree_skb(skb); } } - rcu_read_unlock(); return ret; } EXPORT_SYMBOL(nf_hook_slow); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 8d1ddb9b63ed..c94ec197845c 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1275,7 +1275,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, skb->nfct = NULL; } - /* rcu_read_lock()ed by nf_hook_slow */ + /* rcu_read_lock()ed by nf_hook_thresh */ l3proto = __nf_ct_l3proto_find(pf); ret = l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff, &protonum); diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 5c0db5c64734..f65d93639d12 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -736,7 +736,7 @@ static int callforward_do_filter(struct net *net, const struct nf_afinfo *afinfo; int ret = 0; - /* rcu_read_lock()ed by nf_hook_slow() */ + /* rcu_read_lock()ed by nf_hook_thresh */ afinfo = nf_get_afinfo(family); if (!afinfo) return 0; diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 4ffe388a9a1e..336e21559e01 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -346,7 +346,7 @@ void nf_ct_helper_log(struct sk_buff *skb, const struct nf_conn *ct, /* Called from the helper function, this call never fails */ help = nfct_help(ct); - /* rcu_read_lock()ed by nf_hook_slow */ + /* rcu_read_lock()ed by nf_hook_thresh */ helper = rcu_dereference(help->helper); nf_log_packet(nf_ct_net(ct), nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL, diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index e924e95fcc7f..3b79f34b5095 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -43,7 +43,7 @@ nfnl_userspace_cthelper(struct sk_buff *skb, unsigned int protoff, if (help == NULL) return NF_DROP; - /* rcu_read_lock()ed by nf_hook_slow */ + /* rcu_read_lock()ed by nf_hook_thresh */ helper = rcu_dereference(help->helper); if (helper == NULL) return NF_DROP; diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 6577db524ef6..eb086a192c5a 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -442,7 +442,9 @@ __build_packet_message(struct nfnl_log_net *log, if (nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSINDEV, htonl(indev->ifindex)) || /* this is the bridge group "brX" */ - /* rcu_read_lock()ed by nf_hook_slow or nf_log_packet */ + /* rcu_read_lock()ed by nf_hook_thresh or + * nf_log_packet. + */ nla_put_be32(inst->skb, NFULA_IFINDEX_INDEV, htonl(br_port_get_rcu(indev)->br->dev->ifindex))) goto nla_put_failure; @@ -477,7 +479,9 @@ __build_packet_message(struct nfnl_log_net *log, if (nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV, htonl(outdev->ifindex)) || /* this is the bridge group "brX" */ - /* rcu_read_lock()ed by nf_hook_slow or nf_log_packet */ + /* rcu_read_lock()ed by nf_hook_thresh or + * nf_log_packet. + */ nla_put_be32(inst->skb, NFULA_IFINDEX_OUTDEV, htonl(br_port_get_rcu(outdev)->br->dev->ifindex))) goto nla_put_failure; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 808da34f94cd..7caa8b082c41 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -740,7 +740,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) struct net *net = entry->state.net; struct nfnl_queue_net *q = nfnl_queue_pernet(net); - /* rcu_read_lock()ed by nf_hook_slow() */ + /* rcu_read_lock()ed by nf_hook_thresh */ queue = instance_lookup(q, queuenum); if (!queue) return -ESRCH; diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c index 805c9f64a04c..f679dd4c272a 100644 --- a/net/netfilter/xt_helper.c +++ b/net/netfilter/xt_helper.c @@ -41,7 +41,7 @@ helper_mt(const struct sk_buff *skb, struct xt_action_param *par) if (!master_help) return ret; - /* rcu_read_lock()ed by nf_hook_slow */ + /* rcu_read_lock()ed by nf_hook_thresh */ helper = rcu_dereference(master_help->helper); if (!helper) return ret; From d4bb5caa9cc1a802ba25f605b24b5640c025806b Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Wed, 21 Sep 2016 11:35:05 -0400 Subject: [PATCH 1491/1715] netfilter: Only allow sane values in nf_register_net_hook This commit adds an upfront check for sane values to be passed when registering a netfilter hook. This will be used in a future patch for a simplified hook list traversal. Signed-off-by: Aaron Conole Signed-off-by: Pablo Neira Ayuso --- net/netfilter/core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/netfilter/core.c b/net/netfilter/core.c index c8faf8102394..67b74287535d 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -89,6 +89,11 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg) struct nf_hook_entry *entry; struct nf_hook_ops *elem; + if (reg->pf == NFPROTO_NETDEV && + (reg->hooknum != NF_NETDEV_INGRESS || + !reg->dev || dev_net(reg->dev) != net)) + return -EINVAL; + entry = kmalloc(sizeof(*entry), GFP_KERNEL); if (!entry) return -ENOMEM; From a7056c5ba67ee6a956b42cf9ff9ba3a6a0bd9794 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 24 Sep 2016 18:05:27 +0100 Subject: [PATCH 1492/1715] rxrpc: Send an immediate ACK if we fill in a hole Send an immediate ACK if we fill in a hole in the buffer left by an out-of-sequence packet. This may allow the congestion management in the peer to avoid a retransmission if packets got reordered on the wire. Signed-off-by: David Howells --- net/rxrpc/input.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 349698d87ad1..757c16f033a0 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -331,8 +331,16 @@ next_subpacket: call->rxtx_annotations[ix] = annotation; smp_wmb(); call->rxtx_buffer[ix] = skb; - if (after(seq, call->rx_top)) + if (after(seq, call->rx_top)) { smp_store_release(&call->rx_top, seq); + } else if (before(seq, call->rx_top)) { + /* Send an immediate ACK if we fill in a hole */ + if (!ack) { + ack = RXRPC_ACK_DELAY; + ack_serial = serial; + } + immediate_ack = true; + } if (flags & RXRPC_LAST_PACKET) { set_bit(RXRPC_CALL_RX_LAST, &call->flags); trace_rxrpc_receive(call, rxrpc_receive_queue_last, serial, seq); From b69d94d7991f83928d3ea18fe12ab011fa852bb0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 24 Sep 2016 18:05:27 +0100 Subject: [PATCH 1493/1715] rxrpc: Include the last reply DATA serial number in the final ACK In a client call, include the serial number of the last DATA packet of the reply in the final ACK. Signed-off-by: David Howells --- net/rxrpc/recvmsg.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index a7458c398b9e..038ae62ddb4d 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -133,7 +133,7 @@ static int rxrpc_recvmsg_new_call(struct rxrpc_sock *rx, /* * End the packet reception phase. */ -static void rxrpc_end_rx_phase(struct rxrpc_call *call) +static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial) { _enter("%d,%s", call->debug_id, rxrpc_call_states[call->state]); @@ -141,7 +141,7 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call) ASSERTCMP(call->rx_hard_ack, ==, call->rx_top); if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) { - rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, true, false, + rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, serial, true, false, rxrpc_propose_ack_terminal_ack); rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); } @@ -202,7 +202,7 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) _debug("%u,%u,%02x", hard_ack, top, flags); trace_rxrpc_receive(call, rxrpc_receive_rotate, serial, hard_ack); if (flags & RXRPC_LAST_PACKET) { - rxrpc_end_rx_phase(call); + rxrpc_end_rx_phase(call, serial); } else { /* Check to see if there's an ACK that needs sending. */ if (after_eq(hard_ack, call->ackr_consumed + 2) || From dd7c1ee59a90ca8a75bce72c721851d5550f3c59 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 24 Sep 2016 18:05:27 +0100 Subject: [PATCH 1494/1715] rxrpc: Reinitialise the call ACK and timer state for client reply phase Clear the ACK reason, ACK timer and resend timer when entering the client reply phase when the first DATA packet is received. New ACKs will be proposed once the data is queued. The resend timer is no longer relevant and we need to cancel ACKs scheduled to probe for a lost reply. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 1 + net/rxrpc/input.c | 9 +++++++++ net/rxrpc/misc.c | 1 + 3 files changed, 11 insertions(+) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index e3bf9c0e3ad1..cdd35e2b40ba 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -682,6 +682,7 @@ extern const char rxrpc_rtt_rx_traces[rxrpc_rtt_rx__nr_trace][5]; enum rxrpc_timer_trace { rxrpc_timer_begin, + rxrpc_timer_init_for_reply, rxrpc_timer_expired, rxrpc_timer_set_for_ack, rxrpc_timer_set_for_resend, diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 757c16f033a0..bda11eb2ab2a 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -149,6 +149,15 @@ static bool rxrpc_receiving_reply(struct rxrpc_call *call) { rxrpc_seq_t top = READ_ONCE(call->tx_top); + if (call->ackr_reason) { + spin_lock_bh(&call->lock); + call->ackr_reason = 0; + call->resend_at = call->expire_at; + call->ack_at = call->expire_at; + spin_unlock_bh(&call->lock); + rxrpc_set_timer(call, rxrpc_timer_init_for_reply); + } + if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) rxrpc_rotate_tx_window(call, top); if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) { diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index a473fd7dabaa..901c012a2700 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -191,6 +191,7 @@ const char rxrpc_rtt_rx_traces[rxrpc_rtt_rx__nr_trace][5] = { const char rxrpc_timer_traces[rxrpc_timer__nr_trace][8] = { [rxrpc_timer_begin] = "Begin ", [rxrpc_timer_expired] = "*EXPR*", + [rxrpc_timer_init_for_reply] = "IniRpl", [rxrpc_timer_set_for_ack] = "SetAck", [rxrpc_timer_set_for_send] = "SetTx ", [rxrpc_timer_set_for_resend] = "SetRTx", From df0562a72dba13ab49c7dd7cb15170697b9848ee Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 24 Sep 2016 23:00:54 +0100 Subject: [PATCH 1495/1715] rxrpc: Delay the resend timer to allow for nsec->jiffies conv error When determining the resend timer value, we have a value in nsec but the timer is in jiffies which may be a million or more times more coarse. nsecs_to_jiffies() rounds down - which means that the resend timeout expressed as jiffies is very likely earlier than the one expressed as nanoseconds from which it was derived. The problem is that rxrpc_resend() gets triggered by the timer, but can't then find anything to resend yet. It sets the timer again - but gets kicked off immediately again and again until the nanosecond-based expiry time is reached and we actually retransmit. Fix this by adding 1 to the jiffies-based resend_at value to counteract the rounding and make sure that the timer happens after the nanosecond-based expiry is passed. Alternatives would be to adjust the timestamp on the packets to align with the jiffie scale or to switch back to using jiffie-timestamps. Signed-off-by: David Howells --- net/rxrpc/call_event.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index a78a92fe5d77..d5bf9ce7ec6f 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -200,8 +200,14 @@ static void rxrpc_resend(struct rxrpc_call *call) ktime_to_ns(ktime_sub(skb->tstamp, max_age))); } - resend_at = ktime_sub(ktime_add_ms(oldest, rxrpc_resend_timeout), now); - call->resend_at = jiffies + nsecs_to_jiffies(ktime_to_ns(resend_at)); + resend_at = ktime_add_ms(oldest, rxrpc_resend_timeout); + call->resend_at = jiffies + + nsecs_to_jiffies(ktime_to_ns(ktime_sub(resend_at, now))) + + 1; /* We have to make sure that the calculated jiffies value + * falls at or after the nsec value, or we shall loop + * ceaselessly because the timer times out, but we haven't + * reached the nsec timeout yet. + */ /* Now go through the Tx window and perform the retransmissions. We * have to drop the lock for each send. If an ACK comes in whilst the From 31a1b989508ce64e8ead504884ced01e61870852 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 24 Sep 2016 18:05:26 +0100 Subject: [PATCH 1496/1715] rxrpc: Generate a summary of the ACK state for later use Generate a summary of the Tx buffer packet state when an ACK is received for use in a later patch that does congestion management. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 14 +++++++++++++ net/rxrpc/input.c | 45 +++++++++++++++++++++++++++++++---------- 2 files changed, 48 insertions(+), 11 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index cdd35e2b40ba..1a700b6a998b 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -540,6 +540,20 @@ struct rxrpc_call { /* transmission-phase ACK management */ rxrpc_serial_t acks_latest; /* serial number of latest ACK received */ + rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */ +}; + +/* + * Summary of a new ACK and the changes it made. + */ +struct rxrpc_ack_summary { + u8 ack_reason; + u8 nr_acks; /* Number of ACKs in packet */ + u8 nr_nacks; /* Number of NACKs in packet */ + u8 nr_new_acks; /* Number of new ACKs in packet */ + u8 nr_new_nacks; /* Number of new NACKs in packet */ + u8 nr_rot_new_acks; /* Number of rotated new ACKs */ + bool new_low_nack; /* T if new low NACK found */ }; enum rxrpc_skb_trace { diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index bda11eb2ab2a..dd699667eeef 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -56,12 +56,20 @@ static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb, /* * Apply a hard ACK by advancing the Tx window. */ -static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to) +static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, + struct rxrpc_ack_summary *summary) { struct sk_buff *skb, *list = NULL; int ix; u8 annotation; + if (call->acks_lowest_nak == call->tx_hard_ack) { + call->acks_lowest_nak = to; + } else if (before_eq(call->acks_lowest_nak, to)) { + summary->new_low_nack = true; + call->acks_lowest_nak = to; + } + spin_lock(&call->lock); while (before(call->tx_hard_ack, to)) { @@ -77,6 +85,8 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to) if (annotation & RXRPC_TX_ANNO_LAST) set_bit(RXRPC_CALL_TX_LAST, &call->flags); + if ((annotation & RXRPC_TX_ANNO_MASK) != RXRPC_TX_ANNO_ACK) + summary->nr_rot_new_acks++; } spin_unlock(&call->lock); @@ -147,6 +157,7 @@ bad_state: */ static bool rxrpc_receiving_reply(struct rxrpc_call *call) { + struct rxrpc_ack_summary summary = { 0 }; rxrpc_seq_t top = READ_ONCE(call->tx_top); if (call->ackr_reason) { @@ -159,7 +170,7 @@ static bool rxrpc_receiving_reply(struct rxrpc_call *call) } if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) - rxrpc_rotate_tx_window(call, top); + rxrpc_rotate_tx_window(call, top, &summary); if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) { rxrpc_proto_abort("TXL", call, top); return false; @@ -508,7 +519,8 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, * the time the ACK was sent. */ static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks, - rxrpc_seq_t seq, int nr_acks) + rxrpc_seq_t seq, int nr_acks, + struct rxrpc_ack_summary *summary) { bool resend = false; int ix; @@ -521,14 +533,23 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks, annotation &= ~RXRPC_TX_ANNO_MASK; switch (*acks++) { case RXRPC_ACK_TYPE_ACK: + summary->nr_acks++; if (anno_type == RXRPC_TX_ANNO_ACK) continue; + summary->nr_new_acks++; call->rxtx_annotations[ix] = RXRPC_TX_ANNO_ACK | annotation; break; case RXRPC_ACK_TYPE_NACK: + if (!summary->nr_nacks && + call->acks_lowest_nak != seq) { + call->acks_lowest_nak = seq; + summary->new_low_nack = true; + } + summary->nr_nacks++; if (anno_type == RXRPC_TX_ANNO_NAK) continue; + summary->nr_new_nacks++; if (anno_type == RXRPC_TX_ANNO_RETRANS) continue; call->rxtx_annotations[ix] = @@ -558,7 +579,7 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks, static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, u16 skew) { - u8 ack_reason; + struct rxrpc_ack_summary summary = { 0 }; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); union { struct rxrpc_ackpacket ack; @@ -581,10 +602,10 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, first_soft_ack = ntohl(buf.ack.firstPacket); hard_ack = first_soft_ack - 1; nr_acks = buf.ack.nAcks; - ack_reason = (buf.ack.reason < RXRPC_ACK__INVALID ? - buf.ack.reason : RXRPC_ACK__INVALID); + summary.ack_reason = (buf.ack.reason < RXRPC_ACK__INVALID ? + buf.ack.reason : RXRPC_ACK__INVALID); - trace_rxrpc_rx_ack(call, first_soft_ack, ack_reason, nr_acks); + trace_rxrpc_rx_ack(call, first_soft_ack, summary.ack_reason, nr_acks); _proto("Rx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }", sp->hdr.serial, @@ -592,7 +613,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, first_soft_ack, ntohl(buf.ack.previousPacket), acked_serial, - rxrpc_ack_names[ack_reason], + rxrpc_ack_names[summary.ack_reason], buf.ack.nAcks); if (buf.ack.reason == RXRPC_ACK_PING_RESPONSE) @@ -649,12 +670,13 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, return rxrpc_proto_abort("AKN", call, 0); if (after(hard_ack, call->tx_hard_ack)) - rxrpc_rotate_tx_window(call, hard_ack); + rxrpc_rotate_tx_window(call, hard_ack, &summary); if (nr_acks > 0) { if (skb_copy_bits(skb, sp->offset, buf.acks, nr_acks) < 0) return rxrpc_proto_abort("XSA", call, 0); - rxrpc_input_soft_acks(call, buf.acks, first_soft_ack, nr_acks); + rxrpc_input_soft_acks(call, buf.acks, first_soft_ack, nr_acks, + &summary); } if (test_bit(RXRPC_CALL_TX_LAST, &call->flags)) { @@ -669,11 +691,12 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, */ static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb) { + struct rxrpc_ack_summary summary = { 0 }; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); _proto("Rx ACKALL %%%u", sp->hdr.serial); - rxrpc_rotate_tx_window(call, call->tx_top); + rxrpc_rotate_tx_window(call, call->tx_top, &summary); if (test_bit(RXRPC_CALL_TX_LAST, &call->flags)) rxrpc_end_tx_phase(call, false, "ETL"); } From 0d967960d39ee89f9e0289692e9f7232f490e55c Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 24 Sep 2016 18:05:27 +0100 Subject: [PATCH 1497/1715] rxrpc: Schedule an ACK if the reply to a client call appears overdue If we've sent all the request data in a client call but haven't seen any sign of the reply data yet, schedule an ACK to be sent to the server to find out if the reply data got lost. If the server hasn't yet hard-ACK'd the request data, we send a PING ACK to demand a response to find out whether we need to retransmit. If the server says it has received all of the data, we send an IDLE ACK to tell the server that we haven't received anything in the receive phase as yet. To make this work, a non-immediate PING ACK must carry a delay. I've chosen the same as the IDLE ACK for the moment. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 2 ++ net/rxrpc/call_event.c | 1 + net/rxrpc/input.c | 8 ++++++++ net/rxrpc/misc.c | 2 ++ 4 files changed, 13 insertions(+) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 1a700b6a998b..b1e697fc9ffb 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -707,7 +707,9 @@ enum rxrpc_timer_trace { extern const char rxrpc_timer_traces[rxrpc_timer__nr_trace][8]; enum rxrpc_propose_ack_trace { + rxrpc_propose_ack_client_tx_end, rxrpc_propose_ack_input_data, + rxrpc_propose_ack_ping_for_lost_reply, rxrpc_propose_ack_ping_for_params, rxrpc_propose_ack_respond_to_ack, rxrpc_propose_ack_respond_to_ping, diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index d5bf9ce7ec6f..05b94d1acf52 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -100,6 +100,7 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, expiry = rxrpc_soft_ack_delay; break; + case RXRPC_ACK_PING: case RXRPC_ACK_IDLE: if (rxrpc_idle_ack_delay < expiry) expiry = rxrpc_idle_ack_delay; diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index dd699667eeef..0344f4494eb7 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -138,6 +138,8 @@ static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun, write_unlock(&call->state_lock); if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) { + rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, false, true, + rxrpc_propose_ack_client_tx_end); trace_rxrpc_transmit(call, rxrpc_transmit_await_reply); } else { trace_rxrpc_transmit(call, rxrpc_transmit_end); @@ -684,6 +686,12 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, return; } + if (call->rxtx_annotations[call->tx_top & RXRPC_RXTX_BUFF_MASK] & + RXRPC_TX_ANNO_LAST && + summary.nr_acks == call->tx_top - hard_ack) + rxrpc_propose_ACK(call, RXRPC_ACK_PING, skew, sp->hdr.serial, + false, true, + rxrpc_propose_ack_ping_for_lost_reply); } /* diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 901c012a2700..a608769343e6 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -198,7 +198,9 @@ const char rxrpc_timer_traces[rxrpc_timer__nr_trace][8] = { }; const char rxrpc_propose_ack_traces[rxrpc_propose_ack__nr_trace][8] = { + [rxrpc_propose_ack_client_tx_end] = "ClTxEnd", [rxrpc_propose_ack_input_data] = "DataIn ", + [rxrpc_propose_ack_ping_for_lost_reply] = "LostRpl", [rxrpc_propose_ack_ping_for_params] = "Params ", [rxrpc_propose_ack_respond_to_ack] = "Rsp2Ack", [rxrpc_propose_ack_respond_to_ping] = "Rsp2Png", From 57494343cb5d66962bb197878fb1cc576177db31 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 24 Sep 2016 18:05:27 +0100 Subject: [PATCH 1498/1715] rxrpc: Implement slow-start Implement RxRPC slow-start, which is similar to RFC 5681 for TCP. A tracepoint is added to log the state of the congestion management algorithm and the decisions it makes. Notes: (1) Since we send fixed-size DATA packets (apart from the final packet in each phase), counters and calculations are in terms of packets rather than bytes. (2) The ACK packet carries the equivalent of TCP SACK. (3) The FLIGHT_SIZE calculation in RFC 5681 doesn't seem particularly suited to SACK of a small number of packets. It seems that, almost inevitably, by the time three 'duplicate' ACKs have been seen, we have narrowed the loss down to one or two missing packets, and the FLIGHT_SIZE calculation ends up as 2. (4) In rxrpc_resend(), if there was no data that apparently needed retransmission, we transmit a PING ACK to ask the peer to tell us what its Rx window state is. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 45 ++++++++++ net/rxrpc/ar-internal.h | 53 ++++++++++- net/rxrpc/call_event.c | 36 +++++++- net/rxrpc/call_object.c | 13 +++ net/rxrpc/conn_event.c | 1 + net/rxrpc/input.c | 169 +++++++++++++++++++++++++++++++++-- net/rxrpc/misc.c | 19 ++++ net/rxrpc/output.c | 9 +- net/rxrpc/sendmsg.c | 7 +- 9 files changed, 339 insertions(+), 13 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 56475497043d..ada12d00118c 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -570,6 +570,51 @@ TRACE_EVENT(rxrpc_retransmit, __entry->expiry) ); +TRACE_EVENT(rxrpc_congest, + TP_PROTO(struct rxrpc_call *call, struct rxrpc_ack_summary *summary, + rxrpc_serial_t ack_serial, enum rxrpc_congest_change change), + + TP_ARGS(call, summary, ack_serial, change), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(enum rxrpc_congest_change, change ) + __field(rxrpc_seq_t, hard_ack ) + __field(rxrpc_seq_t, top ) + __field(rxrpc_seq_t, lowest_nak ) + __field(rxrpc_serial_t, ack_serial ) + __field_struct(struct rxrpc_ack_summary, sum ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->change = change; + __entry->hard_ack = call->tx_hard_ack; + __entry->top = call->tx_top; + __entry->lowest_nak = call->acks_lowest_nak; + __entry->ack_serial = ack_serial; + memcpy(&__entry->sum, summary, sizeof(__entry->sum)); + ), + + TP_printk("c=%p %08x %s %08x %s cw=%u ss=%u nr=%u,%u nw=%u,%u r=%u b=%u u=%u d=%u l=%x%s%s%s", + __entry->call, + __entry->ack_serial, + rxrpc_ack_names[__entry->sum.ack_reason], + __entry->hard_ack, + rxrpc_congest_modes[__entry->sum.mode], + __entry->sum.cwnd, + __entry->sum.ssthresh, + __entry->sum.nr_acks, __entry->sum.nr_nacks, + __entry->sum.nr_new_acks, __entry->sum.nr_new_nacks, + __entry->sum.nr_rot_new_acks, + __entry->top - __entry->hard_ack, + __entry->sum.cumulative_acks, + __entry->sum.dup_acks, + __entry->lowest_nak, __entry->sum.new_low_nack ? "!" : "", + rxrpc_congest_changes[__entry->change], + __entry->sum.retrans_timeo ? " rTxTo" : "") + ); + #endif /* _TRACE_RXRPC_H */ /* This part must be outside protection */ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index b1e697fc9ffb..ca96e547cb9a 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -402,6 +402,7 @@ enum rxrpc_call_flag { RXRPC_CALL_RX_LAST, /* Received the last packet (at rxtx_top) */ RXRPC_CALL_TX_LAST, /* Last packet in Tx buffer (at rxtx_top) */ RXRPC_CALL_PINGING, /* Ping in process */ + RXRPC_CALL_RETRANS_TIMEOUT, /* Retransmission due to timeout occurred */ }; /* @@ -446,6 +447,17 @@ enum rxrpc_call_completion { NR__RXRPC_CALL_COMPLETIONS }; +/* + * Call Tx congestion management modes. + */ +enum rxrpc_congest_mode { + RXRPC_CALL_SLOW_START, + RXRPC_CALL_CONGEST_AVOIDANCE, + RXRPC_CALL_PACKET_LOSS, + RXRPC_CALL_FAST_RETRANSMIT, + NR__RXRPC_CONGEST_MODES +}; + /* * RxRPC call definition * - matched by { connection, call_id } @@ -518,6 +530,20 @@ struct rxrpc_call { * not hard-ACK'd packet follows this. */ rxrpc_seq_t tx_top; /* Highest Tx slot allocated. */ + + /* TCP-style slow-start congestion control [RFC5681]. Since the SMSS + * is fixed, we keep these numbers in terms of segments (ie. DATA + * packets) rather than bytes. + */ +#define RXRPC_TX_SMSS RXRPC_JUMBO_DATALEN + u8 cong_cwnd; /* Congestion window size */ + u8 cong_extra; /* Extra to send for congestion management */ + u8 cong_ssthresh; /* Slow-start threshold */ + enum rxrpc_congest_mode cong_mode:8; /* Congestion management mode */ + u8 cong_dup_acks; /* Count of ACKs showing missing packets */ + u8 cong_cumul_acks; /* Cumulative ACK count */ + ktime_t cong_tstamp; /* Last time cwnd was changed */ + rxrpc_seq_t rx_hard_ack; /* Dead slot in buffer; the first received but not * consumed packet follows this. */ @@ -539,12 +565,13 @@ struct rxrpc_call { ktime_t ackr_ping_time; /* Time last ping sent */ /* transmission-phase ACK management */ + ktime_t acks_latest_ts; /* Timestamp of latest ACK received */ rxrpc_serial_t acks_latest; /* serial number of latest ACK received */ rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */ }; /* - * Summary of a new ACK and the changes it made. + * Summary of a new ACK and the changes it made to the Tx buffer packet states. */ struct rxrpc_ack_summary { u8 ack_reason; @@ -554,6 +581,14 @@ struct rxrpc_ack_summary { u8 nr_new_nacks; /* Number of new NACKs in packet */ u8 nr_rot_new_acks; /* Number of rotated new ACKs */ bool new_low_nack; /* T if new low NACK found */ + bool retrans_timeo; /* T if reTx due to timeout happened */ + u8 flight_size; /* Number of unreceived transmissions */ + /* Place to stash values for tracing */ + enum rxrpc_congest_mode mode:8; + u8 cwnd; + u8 ssthresh; + u8 dup_acks; + u8 cumulative_acks; }; enum rxrpc_skb_trace { @@ -709,6 +744,7 @@ extern const char rxrpc_timer_traces[rxrpc_timer__nr_trace][8]; enum rxrpc_propose_ack_trace { rxrpc_propose_ack_client_tx_end, rxrpc_propose_ack_input_data, + rxrpc_propose_ack_ping_for_lost_ack, rxrpc_propose_ack_ping_for_lost_reply, rxrpc_propose_ack_ping_for_params, rxrpc_propose_ack_respond_to_ack, @@ -729,6 +765,21 @@ enum rxrpc_propose_ack_outcome { extern const char rxrpc_propose_ack_traces[rxrpc_propose_ack__nr_trace][8]; extern const char *const rxrpc_propose_ack_outcomes[rxrpc_propose_ack__nr_outcomes]; +enum rxrpc_congest_change { + rxrpc_cong_begin_retransmission, + rxrpc_cong_cleared_nacks, + rxrpc_cong_new_low_nack, + rxrpc_cong_no_change, + rxrpc_cong_progress, + rxrpc_cong_retransmit_again, + rxrpc_cong_rtt_window_end, + rxrpc_cong_saw_nack, + rxrpc_congest__nr_change +}; + +extern const char rxrpc_congest_modes[NR__RXRPC_CONGEST_MODES][10]; +extern const char rxrpc_congest_changes[rxrpc_congest__nr_change][9]; + extern const char *const rxrpc_pkts[]; extern const char const rxrpc_ack_names[RXRPC_ACK__INVALID + 1][4]; diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 05b94d1acf52..0e8478012212 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -146,6 +146,14 @@ void rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, spin_unlock_bh(&call->lock); } +/* + * Handle congestion being detected by the retransmit timeout. + */ +static void rxrpc_congestion_timeout(struct rxrpc_call *call) +{ + set_bit(RXRPC_CALL_RETRANS_TIMEOUT, &call->flags); +} + /* * Perform retransmission of NAK'd and unack'd packets. */ @@ -154,9 +162,9 @@ static void rxrpc_resend(struct rxrpc_call *call) struct rxrpc_skb_priv *sp; struct sk_buff *skb; rxrpc_seq_t cursor, seq, top; - ktime_t now = ktime_get_real(), max_age, oldest, resend_at; + ktime_t now = ktime_get_real(), max_age, oldest, resend_at, ack_ts; int ix; - u8 annotation, anno_type; + u8 annotation, anno_type, retrans = 0, unacked = 0; _enter("{%d,%d}", call->tx_hard_ack, call->tx_top); @@ -193,10 +201,13 @@ static void rxrpc_resend(struct rxrpc_call *call) oldest = skb->tstamp; continue; } + if (!(annotation & RXRPC_TX_ANNO_RESENT)) + unacked++; } /* Okay, we need to retransmit a packet. */ call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS | annotation; + retrans++; trace_rxrpc_retransmit(call, seq, annotation | anno_type, ktime_to_ns(ktime_sub(skb->tstamp, max_age))); } @@ -210,6 +221,25 @@ static void rxrpc_resend(struct rxrpc_call *call) * reached the nsec timeout yet. */ + if (unacked) + rxrpc_congestion_timeout(call); + + /* If there was nothing that needed retransmission then it's likely + * that an ACK got lost somewhere. Send a ping to find out instead of + * retransmitting data. + */ + if (!retrans) { + rxrpc_set_timer(call, rxrpc_timer_set_for_resend); + spin_unlock_bh(&call->lock); + ack_ts = ktime_sub(now, call->acks_latest_ts); + if (ktime_to_ns(ack_ts) < call->peer->rtt) + goto out; + rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false, + rxrpc_propose_ack_ping_for_lost_ack); + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); + goto out; + } + /* Now go through the Tx window and perform the retransmissions. We * have to drop the lock for each send. If an ACK comes in whilst the * lock is dropped, it may clear some of the retransmission markers for @@ -260,6 +290,7 @@ static void rxrpc_resend(struct rxrpc_call *call) out_unlock: spin_unlock_bh(&call->lock); +out: _leave(""); } @@ -293,6 +324,7 @@ recheck_state: if (time_after_eq(now, call->expire_at)) { rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, ETIME); set_bit(RXRPC_CALL_EV_ABORT, &call->events); + goto recheck_state; } if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events) || diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index a53f4c2c0025..d4b3293b78fa 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -160,6 +160,14 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) call->rx_winsize = rxrpc_rx_window_size; call->tx_winsize = 16; call->rx_expect_next = 1; + + if (RXRPC_TX_SMSS > 2190) + call->cong_cwnd = 2; + else if (RXRPC_TX_SMSS > 1095) + call->cong_cwnd = 3; + else + call->cong_cwnd = 4; + call->cong_ssthresh = RXRPC_RXTX_BUFF_SIZE - 1; return call; nomem_2: @@ -176,6 +184,7 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx, gfp_t gfp) { struct rxrpc_call *call; + ktime_t now; _enter(""); @@ -185,6 +194,9 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx, call->state = RXRPC_CALL_CLIENT_AWAIT_CONN; call->service_id = srx->srx_service; call->tx_phase = true; + now = ktime_get_real(); + call->acks_latest_ts = now; + call->cong_tstamp = now; _leave(" = %p", call); return call; @@ -325,6 +337,7 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx, call->state = RXRPC_CALL_SERVER_ACCEPTING; if (sp->hdr.securityIndex > 0) call->state = RXRPC_CALL_SERVER_SECURING; + call->cong_tstamp = skb->tstamp; /* Set the channel for this call. We don't get channel_lock as we're * only defending against the data_ready handler (which we're called diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index a1cf1ec5f29e..37609ce89f52 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -97,6 +97,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, pkt.info.maxMTU = htonl(mtu); pkt.info.rwind = htonl(rxrpc_rx_window_size); pkt.info.jumbo_max = htonl(rxrpc_rx_jumbo_max); + pkt.whdr.flags |= RXRPC_SLOW_START_OK; len += sizeof(pkt.ack) + sizeof(pkt.info); break; } diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 0344f4494eb7..094720dd1eaf 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -36,6 +36,166 @@ static void rxrpc_proto_abort(const char *why, } } +/* + * Do TCP-style congestion management [RFC 5681]. + */ +static void rxrpc_congestion_management(struct rxrpc_call *call, + struct sk_buff *skb, + struct rxrpc_ack_summary *summary) +{ + enum rxrpc_congest_change change = rxrpc_cong_no_change; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + unsigned int cumulative_acks = call->cong_cumul_acks; + unsigned int cwnd = call->cong_cwnd; + bool resend = false; + + summary->flight_size = + (call->tx_top - call->tx_hard_ack) - summary->nr_acks; + + if (test_and_clear_bit(RXRPC_CALL_RETRANS_TIMEOUT, &call->flags)) { + summary->retrans_timeo = true; + call->cong_ssthresh = max_t(unsigned int, + summary->flight_size / 2, 2); + cwnd = 1; + if (cwnd > call->cong_ssthresh && + call->cong_mode == RXRPC_CALL_SLOW_START) { + call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; + call->cong_tstamp = skb->tstamp; + cumulative_acks = 0; + } + } + + cumulative_acks += summary->nr_new_acks; + cumulative_acks += summary->nr_rot_new_acks; + if (cumulative_acks > 255) + cumulative_acks = 255; + + summary->mode = call->cong_mode; + summary->cwnd = call->cong_cwnd; + summary->ssthresh = call->cong_ssthresh; + summary->cumulative_acks = cumulative_acks; + summary->dup_acks = call->cong_dup_acks; + + switch (call->cong_mode) { + case RXRPC_CALL_SLOW_START: + if (summary->nr_nacks > 0) + goto packet_loss_detected; + if (summary->cumulative_acks > 0) + cwnd += 1; + if (cwnd > call->cong_ssthresh) { + call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; + call->cong_tstamp = skb->tstamp; + } + goto out; + + case RXRPC_CALL_CONGEST_AVOIDANCE: + if (summary->nr_nacks > 0) + goto packet_loss_detected; + + /* We analyse the number of packets that get ACK'd per RTT + * period and increase the window if we managed to fill it. + */ + if (call->peer->rtt_usage == 0) + goto out; + if (ktime_before(skb->tstamp, + ktime_add_ns(call->cong_tstamp, + call->peer->rtt))) + goto out_no_clear_ca; + change = rxrpc_cong_rtt_window_end; + call->cong_tstamp = skb->tstamp; + if (cumulative_acks >= cwnd) + cwnd++; + goto out; + + case RXRPC_CALL_PACKET_LOSS: + if (summary->nr_nacks == 0) + goto resume_normality; + + if (summary->new_low_nack) { + change = rxrpc_cong_new_low_nack; + call->cong_dup_acks = 1; + if (call->cong_extra > 1) + call->cong_extra = 1; + goto send_extra_data; + } + + call->cong_dup_acks++; + if (call->cong_dup_acks < 3) + goto send_extra_data; + + change = rxrpc_cong_begin_retransmission; + call->cong_mode = RXRPC_CALL_FAST_RETRANSMIT; + call->cong_ssthresh = max_t(unsigned int, + summary->flight_size / 2, 2); + cwnd = call->cong_ssthresh + 3; + call->cong_extra = 0; + call->cong_dup_acks = 0; + resend = true; + goto out; + + case RXRPC_CALL_FAST_RETRANSMIT: + if (!summary->new_low_nack) { + if (summary->nr_new_acks == 0) + cwnd += 1; + call->cong_dup_acks++; + if (call->cong_dup_acks == 2) { + change = rxrpc_cong_retransmit_again; + call->cong_dup_acks = 0; + resend = true; + } + } else { + change = rxrpc_cong_progress; + cwnd = call->cong_ssthresh; + if (summary->nr_nacks == 0) + goto resume_normality; + } + goto out; + + default: + BUG(); + goto out; + } + +resume_normality: + change = rxrpc_cong_cleared_nacks; + call->cong_dup_acks = 0; + call->cong_extra = 0; + call->cong_tstamp = skb->tstamp; + if (cwnd <= call->cong_ssthresh) + call->cong_mode = RXRPC_CALL_SLOW_START; + else + call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; +out: + cumulative_acks = 0; +out_no_clear_ca: + if (cwnd >= RXRPC_RXTX_BUFF_SIZE - 1) + cwnd = RXRPC_RXTX_BUFF_SIZE - 1; + call->cong_cwnd = cwnd; + call->cong_cumul_acks = cumulative_acks; + trace_rxrpc_congest(call, summary, sp->hdr.serial, change); + if (resend && !test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events)) + rxrpc_queue_call(call); + return; + +packet_loss_detected: + change = rxrpc_cong_saw_nack; + call->cong_mode = RXRPC_CALL_PACKET_LOSS; + call->cong_dup_acks = 0; + goto send_extra_data; + +send_extra_data: + /* Send some previously unsent DATA if we have some to advance the ACK + * state. + */ + if (call->rxtx_annotations[call->tx_top & RXRPC_RXTX_BUFF_MASK] & + RXRPC_TX_ANNO_LAST || + summary->nr_acks != call->tx_top - call->tx_hard_ack) { + call->cong_extra++; + wake_up(&call->waitq); + } + goto out_no_clear_ca; +} + /* * Ping the other end to fill our RTT cache and to retrieve the rwind * and MTU parameters. @@ -524,7 +684,6 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks, rxrpc_seq_t seq, int nr_acks, struct rxrpc_ack_summary *summary) { - bool resend = false; int ix; u8 annotation, anno_type; @@ -556,16 +715,11 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks, continue; call->rxtx_annotations[ix] = RXRPC_TX_ANNO_NAK | annotation; - resend = true; break; default: return rxrpc_proto_abort("SFT", call, 0); } } - - if (resend && - !test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events)) - rxrpc_queue_call(call); } /* @@ -663,6 +817,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, sp->hdr.serial, call->acks_latest); return; } + call->acks_latest_ts = skb->tstamp; call->acks_latest = sp->hdr.serial; if (before(hard_ack, call->tx_hard_ack) || @@ -692,6 +847,8 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, rxrpc_propose_ACK(call, RXRPC_ACK_PING, skew, sp->hdr.serial, false, true, rxrpc_propose_ack_ping_for_lost_reply); + + return rxrpc_congestion_management(call, skb, &summary); } /* diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index a608769343e6..aedb8978226d 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -200,6 +200,7 @@ const char rxrpc_timer_traces[rxrpc_timer__nr_trace][8] = { const char rxrpc_propose_ack_traces[rxrpc_propose_ack__nr_trace][8] = { [rxrpc_propose_ack_client_tx_end] = "ClTxEnd", [rxrpc_propose_ack_input_data] = "DataIn ", + [rxrpc_propose_ack_ping_for_lost_ack] = "LostAck", [rxrpc_propose_ack_ping_for_lost_reply] = "LostRpl", [rxrpc_propose_ack_ping_for_params] = "Params ", [rxrpc_propose_ack_respond_to_ack] = "Rsp2Ack", @@ -214,3 +215,21 @@ const char *const rxrpc_propose_ack_outcomes[rxrpc_propose_ack__nr_outcomes] = { [rxrpc_propose_ack_update] = " Update", [rxrpc_propose_ack_subsume] = " Subsume", }; + +const char rxrpc_congest_modes[NR__RXRPC_CONGEST_MODES][10] = { + [RXRPC_CALL_SLOW_START] = "SlowStart", + [RXRPC_CALL_CONGEST_AVOIDANCE] = "CongAvoid", + [RXRPC_CALL_PACKET_LOSS] = "PktLoss ", + [RXRPC_CALL_FAST_RETRANSMIT] = "FastReTx ", +}; + +const char rxrpc_congest_changes[rxrpc_congest__nr_change][9] = { + [rxrpc_cong_begin_retransmission] = " Retrans", + [rxrpc_cong_cleared_nacks] = " Cleared", + [rxrpc_cong_new_low_nack] = " NewLowN", + [rxrpc_cong_no_change] = "", + [rxrpc_cong_progress] = " Progres", + [rxrpc_cong_retransmit_again] = " ReTxAgn", + [rxrpc_cong_rtt_window_end] = " RttWinE", + [rxrpc_cong_saw_nack] = " SawNack", +}; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 3eb01445e814..cf43a715685e 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -157,6 +157,8 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) spin_unlock_bh(&call->lock); + pkt->whdr.flags |= RXRPC_SLOW_START_OK; + iov[0].iov_len += sizeof(pkt->ack) + n; iov[1].iov_base = &pkt->ackinfo; iov[1].iov_len = sizeof(pkt->ackinfo); @@ -276,8 +278,11 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb) msg.msg_controllen = 0; msg.msg_flags = 0; - /* If our RTT cache needs working on, request an ACK. */ - if ((call->peer->rtt_usage < 3 && sp->hdr.seq & 1) || + /* If our RTT cache needs working on, request an ACK. Also request + * ACKs if a DATA packet appears to have been lost. + */ + if (call->cong_mode == RXRPC_CALL_FAST_RETRANSMIT || + (call->peer->rtt_usage < 3 && sp->hdr.seq & 1) || ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), ktime_get_real())) whdr.flags |= RXRPC_REQUEST_ACK; diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 99939372b5a4..1f8040d82395 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -45,7 +45,9 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx, for (;;) { set_current_state(TASK_INTERRUPTIBLE); ret = 0; - if (call->tx_top - call->tx_hard_ack < call->tx_winsize) + if (call->tx_top - call->tx_hard_ack < + min_t(unsigned int, call->tx_winsize, + call->cong_cwnd + call->cong_extra)) break; if (call->state >= RXRPC_CALL_COMPLETE) { ret = -call->error; @@ -203,7 +205,8 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, _debug("alloc"); if (call->tx_top - call->tx_hard_ack >= - call->tx_winsize) { + min_t(unsigned int, call->tx_winsize, + call->cong_cwnd + call->cong_extra)) { ret = -EAGAIN; if (msg->msg_flags & MSG_DONTWAIT) goto maybe_error; From 54f17bbc52f71e2d313721046627c383d6c5c7da Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Wed, 21 Sep 2016 11:35:06 -0400 Subject: [PATCH 1499/1715] netfilter: nf_queue: whitespace cleanup A future patch will modify the hook drop and outfn functions. This will cause the line lengths to take up too much space. This is simply a readability change. Signed-off-by: Aaron Conole Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_queue.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index 903dca050fb6..8fe85b98b5c8 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -22,10 +22,10 @@ struct nf_queue_entry { /* Packet queuing */ struct nf_queue_handler { - int (*outfn)(struct nf_queue_entry *entry, - unsigned int queuenum); - void (*nf_hook_drop)(struct net *net, - struct nf_hook_ops *ops); + int (*outfn)(struct nf_queue_entry *entry, + unsigned int queuenum); + void (*nf_hook_drop)(struct net *net, + struct nf_hook_ops *ops); }; void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *qh); From c2675de447f8238e7e2e7eced78fa671d42a9a7e Mon Sep 17 00:00:00 2001 From: Lance Richardson Date: Sat, 24 Sep 2016 14:01:04 -0400 Subject: [PATCH 1500/1715] gre: use nla_get_be32() to extract flowinfo Eliminate a sparse endianness mismatch warning, use nla_get_be32() to extract a __be32 value instead of nla_get_u32(). Signed-off-by: Lance Richardson Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 397e1ed3daa3..4ce74f86291b 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1239,7 +1239,7 @@ static void ip6gre_netlink_parms(struct nlattr *data[], parms->encap_limit = nla_get_u8(data[IFLA_GRE_ENCAP_LIMIT]); if (data[IFLA_GRE_FLOWINFO]) - parms->flowinfo = nla_get_u32(data[IFLA_GRE_FLOWINFO]); + parms->flowinfo = nla_get_be32(data[IFLA_GRE_FLOWINFO]); if (data[IFLA_GRE_FLAGS]) parms->flags = nla_get_u32(data[IFLA_GRE_FLAGS]); From f2c7c1d09832ef0d6499a9e1a958c3ddc686f723 Mon Sep 17 00:00:00 2001 From: Harshitha Ramamurthy Date: Tue, 6 Sep 2016 18:05:06 -0700 Subject: [PATCH 1501/1715] i40e: Remove 100 Mbps SGMII support for X722 This patch fixes the problem where driver shows 100 Mbps as a supported speed, and allows it to be configured for advertising on X722 devices. This patch fixes the problem by not setting the 100 Mbps SGMII flag for X722 devices. Without this patch, the user incorrectly thinks that 100 Mbps is supported and hence might try to advertise it on X722 devices when it is actually not a supported speed. Change-ID: I8c3d7c4251a9402d98994ed29749b7b895a0f205 Signed-off-by: Harshitha Ramamurthy Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 53cde5b44346..60f082e10c3e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -8608,7 +8608,6 @@ static int i40e_sw_init(struct i40e_pf *pf) I40E_FLAG_WB_ON_ITR_CAPABLE | I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE | I40E_FLAG_NO_PCI_LINK_CHECK | - I40E_FLAG_100M_SGMII_CAPABLE | I40E_FLAG_USE_SET_LLDP_MIB | I40E_FLAG_GENEVE_OFFLOAD_CAPABLE; } else if ((pf->hw.aq.api_maj_ver > 1) || From a6cb91464b5b09c0ff749e4b01048f93ce450275 Mon Sep 17 00:00:00 2001 From: Alan Brady Date: Tue, 6 Sep 2016 18:05:07 -0700 Subject: [PATCH 1502/1715] i40e: fix deleting mac filters There exists a bug in which deleting a mac filter does not actually occur. The driver reports that the filter has been deleted with no error. The problem occurs because the wrong cmd_flag is passed to the firmware when deleting the filter. The firmware reports an error back to the driver but it is expressly ignored. This fixes the bug by using the correct flag when deleting a filter. Without this patch, deleted filters remain in firmware and function as if they had not been deleted. Change-ID: I5f22b874f3b83f457702f18f0d5602ca21ac40c3 Signed-off-by: Alan Brady Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 60f082e10c3e..0841379a7517 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -1315,7 +1315,7 @@ static void i40e_rm_default_mac_filter(struct i40e_vsi *vsi, u8 *macaddr) element.vlan_tag = 0; /* ...and some firmware does it this way. */ element.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH | - I40E_AQC_MACVLAN_ADD_IGNORE_VLAN; + I40E_AQC_MACVLAN_DEL_IGNORE_VLAN; i40e_aq_remove_macvlan(&pf->hw, vsi->seid, &element, 1, NULL); } @@ -1908,7 +1908,7 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) ether_addr_copy(del_list[num_del].mac_addr, f->macaddr); if (f->vlan == I40E_VLAN_ANY) { del_list[num_del].vlan_tag = 0; - cmd_flags |= I40E_AQC_MACVLAN_ADD_IGNORE_VLAN; + cmd_flags |= I40E_AQC_MACVLAN_DEL_IGNORE_VLAN; } else { del_list[num_del].vlan_tag = cpu_to_le16((u16)(f->vlan)); From 2199254cb50a650934ef2d1c531ec6fdc0a826c5 Mon Sep 17 00:00:00 2001 From: Preethi Banala Date: Tue, 6 Sep 2016 18:05:08 -0700 Subject: [PATCH 1503/1715] i40e: add encap csum VF offload flag Add ENCAP_CSUM offload negotiation flag. Currently VF assumes checksum offload for encapsulated packets is supported by default. Going forward, this feature needs to be negotiated with PF before advertising to the stack. Hence, we need a flag to control it. This is in regards to prepping up for VF base mode functionality support. Change-ID: Iaab1f25cc0abda5f2fbe3309092640f0e77d163e Signed-off-by: Preethi Banala Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_virtchnl.h | 1 + drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h index c92a3bdee229..f861d3109d1a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h @@ -163,6 +163,7 @@ struct i40e_virtchnl_vsi_resource { #define I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING 0x00020000 #define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 0x00040000 #define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF 0X00080000 +#define I40E_VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM 0X00100000 struct i40e_virtchnl_vf_resource { u16 num_vsis; diff --git a/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h b/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h index f04ce6cb70dc..bd691ad86673 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h @@ -160,6 +160,7 @@ struct i40e_virtchnl_vsi_resource { #define I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING 0x00020000 #define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 0x00040000 #define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF 0X00080000 +#define I40E_VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM 0X00100000 struct i40e_virtchnl_vf_resource { u16 num_vsis; From 234dc4e67611c11bb3990abced26cb75b8ef262a Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 6 Sep 2016 18:05:09 -0700 Subject: [PATCH 1504/1715] i40e: cleanup ATR auto_disable_flags use Some locations that disable ATR accidentally used the "full" disable by disabling the flag in the standard flags field. This incorrectly forces ATR off permanently instead of temporarily disabling it. In addition, some code locations accidentally set the ATR flag enabled when they only meant to clear the auto_disable_flags. This results in ignoring the user's ethtool private flag settings. Additionally, when disabling ATR via ethtool, we did not perform a flush of the FD table. This results in the previously assigned ATR rules still functioning which was not expected. Cleanup all these areas so that automatic disable uses only the auto_disable_flag. Fix the flush code so that we can trigger a flush even when we've disabled ATR and SB support, as otherwise the flush doesn't work. Fix ethtool setting to actually request a flush. Fix NETIF_F_NTUPLE flag to only clear the auto_disable setting and not enable the full feature. Change-ID: Ib2486111f8031bd16943e9308757b276305c03b5 Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- .../net/ethernet/intel/i40e/i40e_ethtool.c | 3 +++ drivers/net/ethernet/intel/i40e/i40e_main.c | 20 +++++++------------ drivers/net/ethernet/intel/i40e/i40e_txrx.c | 14 ++++++------- 3 files changed, 17 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index af28a8c2166b..5cad80f157fb 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -3021,6 +3021,9 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags) } else { pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED; pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED; + + /* flush current ATR settings */ + set_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state); } if ((flags & I40E_PRIV_FLAGS_VEB_STATS) && diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 0841379a7517..e1a2c9a77a63 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -5242,7 +5242,7 @@ static int i40e_up_complete(struct i40e_vsi *vsi) /* reset fd counters */ pf->fd_add_err = pf->fd_atr_cnt = 0; if (pf->fd_tcp_rule > 0) { - pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED; + pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED; if (I40E_DEBUG_FD & pf->hw.debug_mask) dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 exist\n"); pf->fd_tcp_rule = 0; @@ -5976,9 +5976,6 @@ static void i40e_fdir_flush_and_replay(struct i40e_pf *pf) int fd_room; int reg; - if (!(pf->flags & (I40E_FLAG_FD_SB_ENABLED | I40E_FLAG_FD_ATR_ENABLED))) - return; - if (!time_after(jiffies, pf->fd_flush_timestamp + (I40E_MIN_FD_FLUSH_INTERVAL * HZ))) return; @@ -5998,7 +5995,7 @@ static void i40e_fdir_flush_and_replay(struct i40e_pf *pf) } pf->fd_flush_timestamp = jiffies; - pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED; + pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED; /* flush all filters */ wr32(&pf->hw, I40E_PFQF_CTL_1, I40E_PFQF_CTL_1_CLEARFDTABLE_MASK); @@ -6018,7 +6015,7 @@ static void i40e_fdir_flush_and_replay(struct i40e_pf *pf) /* replay sideband filters */ i40e_fdir_filter_restore(pf->vsi[pf->lan_vsi]); if (!disable_atr) - pf->flags |= I40E_FLAG_FD_ATR_ENABLED; + pf->auto_disable_flags &= ~I40E_FLAG_FD_ATR_ENABLED; clear_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state); if (I40E_DEBUG_FD & pf->hw.debug_mask) dev_info(&pf->pdev->dev, "FD Filter table flushed and FD-SB replayed.\n"); @@ -6052,9 +6049,6 @@ static void i40e_fdir_reinit_subtask(struct i40e_pf *pf) if (test_bit(__I40E_DOWN, &pf->state)) return; - if (!(pf->flags & (I40E_FLAG_FD_SB_ENABLED | I40E_FLAG_FD_ATR_ENABLED))) - return; - if (test_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state)) i40e_fdir_flush_and_replay(pf); @@ -8682,13 +8676,13 @@ bool i40e_set_ntuple(struct i40e_pf *pf, netdev_features_t features) /* reset fd counters */ pf->fd_add_err = pf->fd_atr_cnt = pf->fd_tcp_rule = 0; pf->fdir_pf_active_filters = 0; - pf->flags |= I40E_FLAG_FD_ATR_ENABLED; - if (I40E_DEBUG_FD & pf->hw.debug_mask) - dev_info(&pf->pdev->dev, "ATR re-enabled.\n"); /* if ATR was auto disabled it can be re-enabled. */ if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) && - (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED)) + (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED)) { pf->auto_disable_flags &= ~I40E_FLAG_FD_ATR_ENABLED; + if (I40E_DEBUG_FD & pf->hw.debug_mask) + dev_info(&pf->pdev->dev, "ATR re-enabled.\n"); + } } return need_reset; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index bf7bb7c3f227..7ada05e7776f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -282,18 +282,18 @@ static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi, if (add) { pf->fd_tcp_rule++; - if (pf->flags & I40E_FLAG_FD_ATR_ENABLED) { - if (I40E_DEBUG_FD & pf->hw.debug_mask) - dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n"); - pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED; - } + if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) && + I40E_DEBUG_FD & pf->hw.debug_mask) + dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n"); + pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED; } else { pf->fd_tcp_rule = (pf->fd_tcp_rule > 0) ? (pf->fd_tcp_rule - 1) : 0; if (pf->fd_tcp_rule == 0) { - pf->flags |= I40E_FLAG_FD_ATR_ENABLED; - if (I40E_DEBUG_FD & pf->hw.debug_mask) + if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) && + I40E_DEBUG_FD & pf->hw.debug_mask) dev_info(&pf->pdev->dev, "ATR re-enabled due to no sideband TCP/IPv4 rules\n"); + pf->auto_disable_flags &= ~I40E_FLAG_FD_ATR_ENABLED; } } From a3417d287fb02e7bd24c6b1068fe6f9b52a259a6 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 6 Sep 2016 18:05:10 -0700 Subject: [PATCH 1505/1715] i40e: check conflicting ntuple/sideband rules when re-enabling ATR In i40e_fdir_check_and_reenable(), the driver performs some checks to determine whether it is safe to re-enable FD Sideband and FD ATR support. The current check will only determine if there is available space in the flow director table. However, this ignores the fact that ATR should be disabled when there are TCP/IPv4 sideband rules in effect. Add the missing check, and update the info message printed when I40E_DEBUG_FD is enabled. Change-ID: Ibb9c63e5be95d63c53a498fdd5dbf69f54a00e08 Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index e1a2c9a77a63..89b0418a6c2e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -5939,13 +5939,17 @@ void i40e_fdir_check_and_reenable(struct i40e_pf *pf) dev_info(&pf->pdev->dev, "FD Sideband/ntuple is being enabled since we have space in the table now\n"); } } - /* Wait for some more space to be available to turn on ATR */ + + /* Wait for some more space to be available to turn on ATR. We also + * must check that no existing ntuple rules for TCP are in effect + */ if (fcnt_prog < (fcnt_avail - I40E_FDIR_BUFFER_HEAD_ROOM * 2)) { if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) && - (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED)) { + (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED) && + (pf->fd_tcp_rule == 0)) { pf->auto_disable_flags &= ~I40E_FLAG_FD_ATR_ENABLED; if (I40E_DEBUG_FD & pf->hw.debug_mask) - dev_info(&pf->pdev->dev, "ATR is being enabled since we have space in the table now\n"); + dev_info(&pf->pdev->dev, "ATR is being enabled since we have space in the table and there are no conflicting ntuple rules\n"); } } From ac9c5c6d8c17fa105878442ac663f0e9abe3cff5 Mon Sep 17 00:00:00 2001 From: Henry Tieman Date: Tue, 6 Sep 2016 18:05:11 -0700 Subject: [PATCH 1506/1715] i40e: removing unreachable code The return value from i40e_shutdown_adminq() is always 0 (I40E_SUCCESS). So, the test for non-0 will never be true. Cleanup by removing the test and debug print statement. Change-ID: Ie51e8e37515c3e3a6a9ff26fa951d0e5e24343c1 Signed-off-by: Henry Tieman Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 89b0418a6c2e..e626761b8c94 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -11333,11 +11333,7 @@ static void i40e_remove(struct pci_dev *pdev) } /* shutdown the adminq */ - ret_code = i40e_shutdown_adminq(hw); - if (ret_code) - dev_warn(&pdev->dev, - "Failed to destroy the Admin Queue resources: %d\n", - ret_code); + i40e_shutdown_adminq(hw); /* destroy the locks only once, here */ mutex_destroy(&hw->aq.arq_mutex); From fa90efa59dabbaac24f1ad2e6535e6daa2845257 Mon Sep 17 00:00:00 2001 From: Bimmy Pujari Date: Tue, 6 Sep 2016 18:05:12 -0700 Subject: [PATCH 1507/1715] i40e/i40evf: Changed version to 1.6.16 Signed-off-by: Bimmy Pujari Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +- drivers/net/ethernet/intel/i40evf/i40evf_main.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index e626761b8c94..b434d07d98cd 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -41,7 +41,7 @@ static const char i40e_driver_string[] = #define DRV_VERSION_MAJOR 1 #define DRV_VERSION_MINOR 6 -#define DRV_VERSION_BUILD 12 +#define DRV_VERSION_BUILD 16 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ __stringify(DRV_VERSION_MINOR) "." \ __stringify(DRV_VERSION_BUILD) DRV_KERN diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 99833f3ea34e..064419e48f21 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -38,7 +38,7 @@ static const char i40evf_driver_string[] = #define DRV_VERSION_MAJOR 1 #define DRV_VERSION_MINOR 6 -#define DRV_VERSION_BUILD 12 +#define DRV_VERSION_BUILD 16 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ __stringify(DRV_VERSION_MINOR) "." \ __stringify(DRV_VERSION_BUILD) \ From 7ac4b5c6fd351be8f849f687e290ca9724acfd33 Mon Sep 17 00:00:00 2001 From: Akeem Abodunrin Date: Mon, 12 Sep 2016 14:18:37 -0700 Subject: [PATCH 1508/1715] i40e: Increase minimum number of allocated VSI This patch increases minimum number of allocated VSIs, so as to resolve failure adding VSI for VF when 64-VFs assigned to a PF. The driver supports up to 128 VFs per device, users can decide to enable up to 64-VFs on a single PF, especially 2 X 40 devices. In that scenario, with VMDq co-existence, there would be starvation of VSIs - with this patch, supported features would have enough VSIs for configuration now. Change-ID: If084f4cd823667af8fe7fdc11489c705b32039d5 Signed-off-by: Akeem Abodunrin Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 30aaee42f648..6b22df62f018 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -74,7 +74,7 @@ #define I40E_MIN_NUM_DESCRIPTORS 64 #define I40E_MIN_MSIX 2 #define I40E_DEFAULT_NUM_VMDQ_VSI 8 /* max 256 VSIs */ -#define I40E_MIN_VSI_ALLOC 51 /* LAN, ATR, FCOE, 32 VF, 16 VMDQ */ +#define I40E_MIN_VSI_ALLOC 83 /* LAN, ATR, FCOE, 64 VF */ /* max 16 qps */ #define i40e_default_queues_per_vmdq(pf) \ (((pf)->flags & I40E_FLAG_RSS_AQ_CAPABLE) ? 4 : 1) From f19a973f46e85d4394cadb90fa7717f7ec98197a Mon Sep 17 00:00:00 2001 From: Mitch Williams Date: Mon, 12 Sep 2016 14:18:38 -0700 Subject: [PATCH 1509/1715] i40evf: enable adaptive interrupt throttling All of the code to support adaptive interrupt throttling is already in the interrupt handler, it just needs to be enabled. Fill out the data structures properly to make it happen. Single-flow traffic tests may show slightly lower throughput, but interrupts per second will drop by about 75%. Change-ID: I9cd7d42c025b906bf1bb85c6aeb6112684aa6471 Signed-off-by: Mitch Williams Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40evf/i40evf_main.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 064419e48f21..302c9743ef44 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -370,6 +370,8 @@ i40evf_map_vector_to_rxq(struct i40evf_adapter *adapter, int v_idx, int r_idx) { struct i40e_q_vector *q_vector = &adapter->q_vectors[v_idx]; struct i40e_ring *rx_ring = &adapter->rx_rings[r_idx]; + struct i40e_vsi *vsi = &adapter->vsi; + struct i40e_hw *hw = &adapter->hw; rx_ring->q_vector = q_vector; rx_ring->next = q_vector->rx.ring; @@ -377,7 +379,10 @@ i40evf_map_vector_to_rxq(struct i40evf_adapter *adapter, int v_idx, int r_idx) q_vector->rx.ring = rx_ring; q_vector->rx.count++; q_vector->rx.latency_range = I40E_LOW_LATENCY; + q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting); + q_vector->ring_mask |= BIT(r_idx); q_vector->itr_countdown = ITR_COUNTDOWN_START; + wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, v_idx - 1), q_vector->rx.itr); } /** @@ -391,6 +396,8 @@ i40evf_map_vector_to_txq(struct i40evf_adapter *adapter, int v_idx, int t_idx) { struct i40e_q_vector *q_vector = &adapter->q_vectors[v_idx]; struct i40e_ring *tx_ring = &adapter->tx_rings[t_idx]; + struct i40e_vsi *vsi = &adapter->vsi; + struct i40e_hw *hw = &adapter->hw; tx_ring->q_vector = q_vector; tx_ring->next = q_vector->tx.ring; @@ -398,9 +405,10 @@ i40evf_map_vector_to_txq(struct i40evf_adapter *adapter, int v_idx, int t_idx) q_vector->tx.ring = tx_ring; q_vector->tx.count++; q_vector->tx.latency_range = I40E_LOW_LATENCY; + q_vector->tx.itr = ITR_TO_REG(vsi->tx_itr_setting); q_vector->itr_countdown = ITR_COUNTDOWN_START; q_vector->num_ringpairs++; - q_vector->ring_mask |= BIT(t_idx); + wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, v_idx - 1), q_vector->tx.itr); } /** @@ -2269,10 +2277,8 @@ int i40evf_process_config(struct i40evf_adapter *adapter) adapter->vsi.back = adapter; adapter->vsi.base_vector = 1; adapter->vsi.work_limit = I40E_DEFAULT_IRQ_WORK; - adapter->vsi.rx_itr_setting = (I40E_ITR_DYNAMIC | - ITR_REG_TO_USEC(I40E_ITR_RX_DEF)); - adapter->vsi.tx_itr_setting = (I40E_ITR_DYNAMIC | - ITR_REG_TO_USEC(I40E_ITR_TX_DEF)); + adapter->vsi.rx_itr_setting = (I40E_ITR_DYNAMIC | I40E_ITR_RX_DEF); + adapter->vsi.tx_itr_setting = (I40E_ITR_DYNAMIC | I40E_ITR_TX_DEF); vsi->netdev = adapter->netdev; vsi->qs_handle = adapter->vsi_res->qset_handle; if (vfres->vf_offload_flags & I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF) { From 64bfd68eaecdce7b86e179fe39662340c8aed20d Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 12 Sep 2016 14:18:39 -0700 Subject: [PATCH 1510/1715] i40e: Fix Flow Director raw_buf cleanup The Tx cleanup flow was incorrectly assuming it could check for the flow director bits after it had unmapped the buffer. However in this case it results in us trying to free a raw_buf as though it is an sk_buff. To fix this I am moving up the flag test for the FD_SB bit so that when find a non-NULL skb or raw_buf value we then check the flag and use the appropriate call to free the buffer. Change-ID: I6284034ba1ea87c9922e56f6eb3181f7f09bddde Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 8 ++++---- drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 7ada05e7776f..a2077bedac93 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -532,7 +532,10 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring, struct i40e_tx_buffer *tx_buffer) { if (tx_buffer->skb) { - dev_kfree_skb_any(tx_buffer->skb); + if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB) + kfree(tx_buffer->raw_buf); + else + dev_kfree_skb_any(tx_buffer->skb); if (dma_unmap_len(tx_buffer, len)) dma_unmap_single(ring->dev, dma_unmap_addr(tx_buffer, dma), @@ -545,9 +548,6 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring, DMA_TO_DEVICE); } - if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB) - kfree(tx_buffer->raw_buf); - tx_buffer->next_to_watch = NULL; tx_buffer->skb = NULL; dma_unmap_len_set(tx_buffer, len, 0); diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index e3427ebd5b84..cb6b13098699 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -51,7 +51,10 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring, struct i40e_tx_buffer *tx_buffer) { if (tx_buffer->skb) { - dev_kfree_skb_any(tx_buffer->skb); + if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB) + kfree(tx_buffer->raw_buf); + else + dev_kfree_skb_any(tx_buffer->skb); if (dma_unmap_len(tx_buffer, len)) dma_unmap_single(ring->dev, dma_unmap_addr(tx_buffer, dma), @@ -64,9 +67,6 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring, DMA_TO_DEVICE); } - if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB) - kfree(tx_buffer->raw_buf); - tx_buffer->next_to_watch = NULL; tx_buffer->skb = NULL; dma_unmap_len_set(tx_buffer, len, 0); From e486bdfd7c491e997f29fcdf6a4216861ab1d06a Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 12 Sep 2016 14:18:40 -0700 Subject: [PATCH 1511/1715] i40e/i40evf: Add txring_txq function to match fm10k and ixgbe This patch adds a txring_txq function which allows us to convert a i40e_ring/i40evf_ring to a netdev_tx_queue structure. This way we can avoid having to make a multi-line function call for all the spots that need access to this. Change-ID: Ic063b71d8b92ea406d2c32e798c8e2b02809d65b Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 17 ++++++----------- drivers/net/ethernet/intel/i40e/i40e_txrx.h | 9 +++++++++ drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 17 ++++++----------- drivers/net/ethernet/intel/i40evf/i40e_txrx.h | 9 +++++++++ 4 files changed, 30 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index a2077bedac93..6e0a7acf17a2 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -584,8 +584,7 @@ void i40e_clean_tx_ring(struct i40e_ring *tx_ring) return; /* cleanup Tx queue statistics */ - netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index)); + netdev_tx_reset_queue(txring_txq(tx_ring)); } /** @@ -754,8 +753,8 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, tx_ring->arm_wb = true; } - netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index), + /* notify netdev of completed buffers */ + netdev_tx_completed_queue(txring_txq(tx_ring), total_packets, total_bytes); #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) @@ -2784,9 +2783,7 @@ static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, tx_ring->next_to_use = i; - netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index), - first->bytecount); + netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount); i40e_maybe_stop_tx(tx_ring, DESC_NEEDED); /* Algorithm to optimize tail and RS bit setting: @@ -2811,13 +2808,11 @@ static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, * trigger a force WB. */ if (skb->xmit_more && - !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index))) { + !netif_xmit_stopped(txring_txq(tx_ring))) { tx_ring->flags |= I40E_TXR_FLAGS_LAST_XMIT_MORE_SET; tail_bump = false; } else if (!skb->xmit_more && - !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index)) && + !netif_xmit_stopped(txring_txq(tx_ring)) && (!(tx_ring->flags & I40E_TXR_FLAGS_LAST_XMIT_MORE_SET)) && (tx_ring->packet_stride < WB_STRIDE) && (desc_count < WB_STRIDE)) { diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index b78c810d1835..508840585645 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -463,4 +463,13 @@ static inline bool i40e_rx_is_fcoe(u16 ptype) return (ptype >= I40E_RX_PTYPE_L2_FCOE_PAY3) && (ptype <= I40E_RX_PTYPE_L2_FCOE_VFT_FCOTHER); } + +/** + * txring_txq - Find the netdev Tx ring based on the i40e Tx ring + * @ring: Tx ring to find the netdev equivalent of + **/ +static inline struct netdev_queue *txring_txq(const struct i40e_ring *ring) +{ + return netdev_get_tx_queue(ring->netdev, ring->queue_index); +} #endif /* _I40E_TXRX_H_ */ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index cb6b13098699..d4c6a767be94 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -103,8 +103,7 @@ void i40evf_clean_tx_ring(struct i40e_ring *tx_ring) return; /* cleanup Tx queue statistics */ - netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index)); + netdev_tx_reset_queue(txring_txq(tx_ring)); } /** @@ -273,8 +272,8 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, tx_ring->arm_wb = true; } - netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index), + /* notify netdev of completed buffers */ + netdev_tx_completed_queue(txring_txq(tx_ring), total_packets, total_bytes); #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) @@ -2012,9 +2011,7 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, tx_ring->next_to_use = i; - netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index), - first->bytecount); + netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount); i40e_maybe_stop_tx(tx_ring, DESC_NEEDED); /* Algorithm to optimize tail and RS bit setting: @@ -2039,13 +2036,11 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, * trigger a force WB. */ if (skb->xmit_more && - !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index))) { + !netif_xmit_stopped(txring_txq(tx_ring))) { tx_ring->flags |= I40E_TXR_FLAGS_LAST_XMIT_MORE_SET; tail_bump = false; } else if (!skb->xmit_more && - !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index)) && + !netif_xmit_stopped(txring_txq(tx_ring)) && (!(tx_ring->flags & I40E_TXR_FLAGS_LAST_XMIT_MORE_SET)) && (tx_ring->packet_stride < WB_STRIDE) && (desc_count < WB_STRIDE)) { diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h index 0112277e5882..84e561cc0166 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h @@ -445,4 +445,13 @@ static inline bool i40e_rx_is_fcoe(u16 ptype) return (ptype >= I40E_RX_PTYPE_L2_FCOE_PAY3) && (ptype <= I40E_RX_PTYPE_L2_FCOE_VFT_FCOTHER); } + +/** + * txring_txq - Find the netdev Tx ring based on the i40e Tx ring + * @ring: Tx ring to find the netdev equivalent of + **/ +static inline struct netdev_queue *txring_txq(const struct i40e_ring *ring) +{ + return netdev_get_tx_queue(ring->netdev, ring->queue_index); +} #endif /* _I40E_TXRX_H_ */ From 5e02f2837349b399e48fd2a5e5149c9ee9c27cdd Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 12 Sep 2016 14:18:41 -0700 Subject: [PATCH 1512/1715] i40e: Split Flow Director descriptor config into separate function In an effort to improve code readability I am splitting the Flow Director filter configuration out into a separate function like we have done for the standard xmit path. The general idea is to provide a single block of code that translates the flow specification into a proper Flow Director descriptor. Change-ID: Id355ad8030c4e6c72c57504fa09de60c976a8ffe Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 115 +++++++++++--------- 1 file changed, 64 insertions(+), 51 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 6e0a7acf17a2..ef9b8d7f85d4 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -40,6 +40,69 @@ static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size, } #define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS) +/** + * i40e_fdir - Generate a Flow Director descriptor based on fdata + * @tx_ring: Tx ring to send buffer on + * @fdata: Flow director filter data + * @add: Indicate if we are adding a rule or deleting one + * + **/ +static void i40e_fdir(struct i40e_ring *tx_ring, + struct i40e_fdir_filter *fdata, bool add) +{ + struct i40e_filter_program_desc *fdir_desc; + struct i40e_pf *pf = tx_ring->vsi->back; + u32 flex_ptype, dtype_cmd; + u16 i; + + /* grab the next descriptor */ + i = tx_ring->next_to_use; + fdir_desc = I40E_TX_FDIRDESC(tx_ring, i); + + i++; + tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; + + flex_ptype = I40E_TXD_FLTR_QW0_QINDEX_MASK & + (fdata->q_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT); + + flex_ptype |= I40E_TXD_FLTR_QW0_FLEXOFF_MASK & + (fdata->flex_off << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT); + + flex_ptype |= I40E_TXD_FLTR_QW0_PCTYPE_MASK & + (fdata->pctype << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT); + + /* Use LAN VSI Id if not programmed by user */ + flex_ptype |= I40E_TXD_FLTR_QW0_DEST_VSI_MASK & + ((u32)(fdata->dest_vsi ? : pf->vsi[pf->lan_vsi]->id) << + I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT); + + dtype_cmd = I40E_TX_DESC_DTYPE_FILTER_PROG; + + dtype_cmd |= add ? + I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE << + I40E_TXD_FLTR_QW1_PCMD_SHIFT : + I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE << + I40E_TXD_FLTR_QW1_PCMD_SHIFT; + + dtype_cmd |= I40E_TXD_FLTR_QW1_DEST_MASK & + (fdata->dest_ctl << I40E_TXD_FLTR_QW1_DEST_SHIFT); + + dtype_cmd |= I40E_TXD_FLTR_QW1_FD_STATUS_MASK & + (fdata->fd_status << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT); + + if (fdata->cnt_index) { + dtype_cmd |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK; + dtype_cmd |= I40E_TXD_FLTR_QW1_CNTINDEX_MASK & + ((u32)fdata->cnt_index << + I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT); + } + + fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype); + fdir_desc->rsvd = cpu_to_le32(0); + fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dtype_cmd); + fdir_desc->fd_id = cpu_to_le32(fdata->fd_id); +} + #define I40E_FD_CLEAN_DELAY 10 /** * i40e_program_fdir_filter - Program a Flow Director filter @@ -51,11 +114,9 @@ static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size, int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, u8 *raw_packet, struct i40e_pf *pf, bool add) { - struct i40e_filter_program_desc *fdir_desc; struct i40e_tx_buffer *tx_buf, *first; struct i40e_tx_desc *tx_desc; struct i40e_ring *tx_ring; - unsigned int fpt, dcc; struct i40e_vsi *vsi; struct device *dev; dma_addr_t dma; @@ -92,56 +153,8 @@ int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, u8 *raw_packet, /* grab the next descriptor */ i = tx_ring->next_to_use; - fdir_desc = I40E_TX_FDIRDESC(tx_ring, i); first = &tx_ring->tx_bi[i]; - memset(first, 0, sizeof(struct i40e_tx_buffer)); - - tx_ring->next_to_use = ((i + 1) < tx_ring->count) ? i + 1 : 0; - - fpt = (fdir_data->q_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) & - I40E_TXD_FLTR_QW0_QINDEX_MASK; - - fpt |= (fdir_data->flex_off << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT) & - I40E_TXD_FLTR_QW0_FLEXOFF_MASK; - - fpt |= (fdir_data->pctype << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) & - I40E_TXD_FLTR_QW0_PCTYPE_MASK; - - /* Use LAN VSI Id if not programmed by user */ - if (fdir_data->dest_vsi == 0) - fpt |= (pf->vsi[pf->lan_vsi]->id) << - I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT; - else - fpt |= ((u32)fdir_data->dest_vsi << - I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT) & - I40E_TXD_FLTR_QW0_DEST_VSI_MASK; - - dcc = I40E_TX_DESC_DTYPE_FILTER_PROG; - - if (add) - dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE << - I40E_TXD_FLTR_QW1_PCMD_SHIFT; - else - dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE << - I40E_TXD_FLTR_QW1_PCMD_SHIFT; - - dcc |= (fdir_data->dest_ctl << I40E_TXD_FLTR_QW1_DEST_SHIFT) & - I40E_TXD_FLTR_QW1_DEST_MASK; - - dcc |= (fdir_data->fd_status << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT) & - I40E_TXD_FLTR_QW1_FD_STATUS_MASK; - - if (fdir_data->cnt_index != 0) { - dcc |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK; - dcc |= ((u32)fdir_data->cnt_index << - I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) & - I40E_TXD_FLTR_QW1_CNTINDEX_MASK; - } - - fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(fpt); - fdir_desc->rsvd = cpu_to_le32(0); - fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dcc); - fdir_desc->fd_id = cpu_to_le32(fdir_data->fd_id); + i40e_fdir(tx_ring, fdir_data, add); /* Now program a dummy descriptor */ i = tx_ring->next_to_use; From 1eb846ac90b956e52f4269d80f13cfbe1df6850b Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 12 Sep 2016 14:18:42 -0700 Subject: [PATCH 1513/1715] i40e: Strip out debugfs hook for Flow Director filter programming This interface was only ever meant for debug only. Since it is not supposed to be here we are removing it. Change-ID: Id771a1e5e7d3e2b4b7f56591b61fb48c921e1d04 Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e.h | 2 - .../net/ethernet/intel/i40e/i40e_debugfs.c | 80 ------------------- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 5 +- 3 files changed, 3 insertions(+), 84 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 6b22df62f018..2030d7c1dc94 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -710,8 +710,6 @@ struct rtnl_link_stats64 *i40e_get_vsi_stats_struct(struct i40e_vsi *vsi); int i40e_fetch_switch_configuration(struct i40e_pf *pf, bool printconfig); -int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, u8 *raw_packet, - struct i40e_pf *pf, bool add); int i40e_add_del_fdir(struct i40e_vsi *vsi, struct i40e_fdir_filter *input, bool add); void i40e_fdir_check_and_reenable(struct i40e_pf *pf); diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 8555f04002da..0c1875b5b16d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -1430,84 +1430,6 @@ static ssize_t i40e_dbg_command_write(struct file *filp, buff = NULL; kfree(desc); desc = NULL; - } else if ((strncmp(cmd_buf, "add fd_filter", 13) == 0) || - (strncmp(cmd_buf, "rem fd_filter", 13) == 0)) { - struct i40e_fdir_filter fd_data; - u16 packet_len, i, j = 0; - char *asc_packet; - u8 *raw_packet; - bool add = false; - int ret; - - if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED)) - goto command_write_done; - - if (strncmp(cmd_buf, "add", 3) == 0) - add = true; - - if (add && (pf->auto_disable_flags & I40E_FLAG_FD_SB_ENABLED)) - goto command_write_done; - - asc_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, - GFP_KERNEL); - if (!asc_packet) - goto command_write_done; - - raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, - GFP_KERNEL); - - if (!raw_packet) { - kfree(asc_packet); - asc_packet = NULL; - goto command_write_done; - } - - cnt = sscanf(&cmd_buf[13], - "%hx %2hhx %2hhx %hx %2hhx %2hhx %hx %x %hd %511s", - &fd_data.q_index, - &fd_data.flex_off, &fd_data.pctype, - &fd_data.dest_vsi, &fd_data.dest_ctl, - &fd_data.fd_status, &fd_data.cnt_index, - &fd_data.fd_id, &packet_len, asc_packet); - if (cnt != 10) { - dev_info(&pf->pdev->dev, - "program fd_filter: bad command string, cnt=%d\n", - cnt); - kfree(asc_packet); - asc_packet = NULL; - kfree(raw_packet); - goto command_write_done; - } - - /* fix packet length if user entered 0 */ - if (packet_len == 0) - packet_len = I40E_FDIR_MAX_RAW_PACKET_SIZE; - - /* make sure to check the max as well */ - packet_len = min_t(u16, - packet_len, I40E_FDIR_MAX_RAW_PACKET_SIZE); - - for (i = 0; i < packet_len; i++) { - cnt = sscanf(&asc_packet[j], "%2hhx ", &raw_packet[i]); - if (!cnt) - break; - j += 3; - } - dev_info(&pf->pdev->dev, "FD raw packet dump\n"); - print_hex_dump(KERN_INFO, "FD raw packet: ", - DUMP_PREFIX_OFFSET, 16, 1, - raw_packet, packet_len, true); - ret = i40e_program_fdir_filter(&fd_data, raw_packet, pf, add); - if (!ret) { - dev_info(&pf->pdev->dev, "Filter command send Status : Success\n"); - } else { - dev_info(&pf->pdev->dev, - "Filter command send failed %d\n", ret); - } - kfree(raw_packet); - raw_packet = NULL; - kfree(asc_packet); - asc_packet = NULL; } else if (strncmp(cmd_buf, "fd current cnt", 14) == 0) { dev_info(&pf->pdev->dev, "FD current total filter count for this interface: %d\n", i40e_get_current_fd_count(pf)); @@ -1732,8 +1654,6 @@ static ssize_t i40e_dbg_command_write(struct file *filp, dev_info(&pf->pdev->dev, " globr\n"); dev_info(&pf->pdev->dev, " send aq_cmd \n"); dev_info(&pf->pdev->dev, " send indirect aq_cmd \n"); - dev_info(&pf->pdev->dev, " add fd_filter \n"); - dev_info(&pf->pdev->dev, " rem fd_filter \n"); dev_info(&pf->pdev->dev, " fd current cnt"); dev_info(&pf->pdev->dev, " lldp start\n"); dev_info(&pf->pdev->dev, " lldp stop\n"); diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index ef9b8d7f85d4..5237c491af89 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -111,8 +111,9 @@ static void i40e_fdir(struct i40e_ring *tx_ring, * @pf: The PF pointer * @add: True for add/update, False for remove **/ -int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, u8 *raw_packet, - struct i40e_pf *pf, bool add) +static int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, + u8 *raw_packet, struct i40e_pf *pf, + bool add) { struct i40e_tx_buffer *tx_buf, *first; struct i40e_tx_desc *tx_desc; From a4fa59cc5bb028ebb8048e8dcb6f92b2a1ea07f6 Mon Sep 17 00:00:00 2001 From: Mitch Williams Date: Mon, 12 Sep 2016 14:18:43 -0700 Subject: [PATCH 1514/1715] i40e: don't configure zero-size RSS table In some rare cases, we might get a VSI with no queues. In this case, we cannot configure RSS on this VSI as it will try to divide by zero when configuring the lookup table. Change-ID: I6ae173a7dd3481a081e079eb10eb80275de2adb0 Signed-off-by: Mitch Williams Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index b434d07d98cd..8176596932be 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -8282,6 +8282,8 @@ static int i40e_pf_config_rss(struct i40e_pf *pf) if (!vsi->rss_size) vsi->rss_size = min_t(int, pf->alloc_rss_size, vsi->num_queue_pairs); + if (!vsi->rss_size) + return -EINVAL; lut = kzalloc(vsi->rss_table_size, GFP_KERNEL); if (!lut) From 65e87c0398f542d5bd51cfd8a29b9dfd246b6a1c Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 12 Sep 2016 14:18:44 -0700 Subject: [PATCH 1515/1715] i40evf: support queue-specific settings for interrupt moderation In commit a75e8005d506f3 ("i40e: queue-specific settings for interrupt moderation") the i40e driver gained support for setting interrupt moderation values per queue. This patch adds support for this feature to the i40evf driver as well. In addition, a few changes are made to the i40e implementation to add function header documentation comments, as well. This behaves in a similar fashion to the implementation in i40e. Thus, requesting the moderation value when no queue is provided will report queue 0 value, while setting the value without a queue will set all queues at once. Change-ID: I1f310a57c8e6c84a8524c178d44d1b7a6d3a848e Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- .../net/ethernet/intel/i40e/i40e_ethtool.c | 72 +++++- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 21 +- drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 26 +- drivers/net/ethernet/intel/i40evf/i40e_txrx.h | 8 + drivers/net/ethernet/intel/i40evf/i40evf.h | 7 - .../ethernet/intel/i40evf/i40evf_ethtool.c | 224 +++++++++++++----- .../net/ethernet/intel/i40evf/i40evf_main.c | 10 +- 7 files changed, 289 insertions(+), 79 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 5cad80f157fb..92bc8846f1ba 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -1970,11 +1970,22 @@ static int i40e_set_phys_id(struct net_device *netdev, * 125us (8000 interrupts per second) == ITR(62) */ +/** + * __i40e_get_coalesce - get per-queue coalesce settings + * @netdev: the netdev to check + * @ec: ethtool coalesce data structure + * @queue: which queue to pick + * + * Gets the per-queue settings for coalescence. Specifically Rx and Tx usecs + * are per queue. If queue is <0 then we default to queue 0 as the + * representative value. + **/ static int __i40e_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec, int queue) { struct i40e_netdev_priv *np = netdev_priv(netdev); + struct i40e_ring *rx_ring, *tx_ring; struct i40e_vsi *vsi = np->vsi; ec->tx_max_coalesced_frames_irq = vsi->work_limit; @@ -1989,14 +2000,18 @@ static int __i40e_get_coalesce(struct net_device *netdev, return -EINVAL; } - if (ITR_IS_DYNAMIC(vsi->rx_rings[queue]->rx_itr_setting)) + rx_ring = vsi->rx_rings[queue]; + tx_ring = vsi->tx_rings[queue]; + + if (ITR_IS_DYNAMIC(rx_ring->rx_itr_setting)) ec->use_adaptive_rx_coalesce = 1; - if (ITR_IS_DYNAMIC(vsi->tx_rings[queue]->tx_itr_setting)) + if (ITR_IS_DYNAMIC(tx_ring->tx_itr_setting)) ec->use_adaptive_tx_coalesce = 1; - ec->rx_coalesce_usecs = vsi->rx_rings[queue]->rx_itr_setting & ~I40E_ITR_DYNAMIC; - ec->tx_coalesce_usecs = vsi->tx_rings[queue]->tx_itr_setting & ~I40E_ITR_DYNAMIC; + ec->rx_coalesce_usecs = rx_ring->rx_itr_setting & ~I40E_ITR_DYNAMIC; + ec->tx_coalesce_usecs = tx_ring->tx_itr_setting & ~I40E_ITR_DYNAMIC; + /* we use the _usecs_high to store/set the interrupt rate limit * that the hardware supports, that almost but not quite @@ -2010,18 +2025,44 @@ static int __i40e_get_coalesce(struct net_device *netdev, return 0; } +/** + * i40e_get_coalesce - get a netdev's coalesce settings + * @netdev: the netdev to check + * @ec: ethtool coalesce data structure + * + * Gets the coalesce settings for a particular netdev. Note that if user has + * modified per-queue settings, this only guarantees to represent queue 0. See + * __i40e_get_coalesce for more details. + **/ static int i40e_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec) { return __i40e_get_coalesce(netdev, ec, -1); } +/** + * i40e_get_per_queue_coalesce - gets coalesce settings for particular queue + * @netdev: netdev structure + * @ec: ethtool's coalesce settings + * @queue: the particular queue to read + * + * Will read a specific queue's coalesce settings + **/ static int i40e_get_per_queue_coalesce(struct net_device *netdev, u32 queue, struct ethtool_coalesce *ec) { return __i40e_get_coalesce(netdev, ec, queue); } +/** + * i40e_set_itr_per_queue - set ITR values for specific queue + * @vsi: the VSI to set values for + * @ec: coalesce settings from ethtool + * @queue: the queue to modify + * + * Change the ITR settings for a specific queue. + **/ + static void i40e_set_itr_per_queue(struct i40e_vsi *vsi, struct ethtool_coalesce *ec, int queue) @@ -2060,6 +2101,14 @@ static void i40e_set_itr_per_queue(struct i40e_vsi *vsi, i40e_flush(hw); } +/** + * __i40e_set_coalesce - set coalesce settings for particular queue + * @netdev: the netdev to change + * @ec: ethtool coalesce settings + * @queue: the queue to change + * + * Sets the coalesce settings for a particular queue. + **/ static int __i40e_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec, int queue) @@ -2120,12 +2169,27 @@ static int __i40e_set_coalesce(struct net_device *netdev, return 0; } +/** + * i40e_set_coalesce - set coalesce settings for every queue on the netdev + * @netdev: the netdev to change + * @ec: ethtool coalesce settings + * + * This will set each queue to the same coalesce settings. + **/ static int i40e_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec) { return __i40e_set_coalesce(netdev, ec, -1); } +/** + * i40e_set_per_queue_coalesce - set specific queue's coalesce settings + * @netdev: the netdev to change + * @ec: ethtool's coalesce settings + * @queue: the queue to change + * + * Sets the specified queue's coalesce settings. + **/ static int i40e_set_per_queue_coalesce(struct net_device *netdev, u32 queue, struct ethtool_coalesce *ec) { diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 5237c491af89..6287bf63c43c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -1877,6 +1877,15 @@ static u32 i40e_buildreg_itr(const int type, const u16 itr) /* a small macro to shorten up some long lines */ #define INTREG I40E_PFINT_DYN_CTLN +static inline int get_rx_itr_enabled(struct i40e_vsi *vsi, int idx) +{ + return !!(vsi->rx_rings[idx]->rx_itr_setting); +} + +static inline int get_tx_itr_enabled(struct i40e_vsi *vsi, int idx) +{ + return !!(vsi->tx_rings[idx]->tx_itr_setting); +} /** * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt @@ -1892,6 +1901,7 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, u32 rxval, txval; int vector; int idx = q_vector->v_idx; + int rx_itr_setting, tx_itr_setting; vector = (q_vector->v_idx + vsi->base_vector); @@ -1900,18 +1910,21 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, */ rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0); + rx_itr_setting = get_rx_itr_enabled(vsi, idx); + tx_itr_setting = get_tx_itr_enabled(vsi, idx); + if (q_vector->itr_countdown > 0 || - (!ITR_IS_DYNAMIC(vsi->rx_rings[idx]->rx_itr_setting) && - !ITR_IS_DYNAMIC(vsi->tx_rings[idx]->tx_itr_setting))) { + (!ITR_IS_DYNAMIC(rx_itr_setting) && + !ITR_IS_DYNAMIC(tx_itr_setting))) { goto enable_int; } - if (ITR_IS_DYNAMIC(vsi->rx_rings[idx]->rx_itr_setting)) { + if (ITR_IS_DYNAMIC(tx_itr_setting)) { rx = i40e_set_new_dynamic_itr(&q_vector->rx); rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr); } - if (ITR_IS_DYNAMIC(vsi->tx_rings[idx]->tx_itr_setting)) { + if (ITR_IS_DYNAMIC(tx_itr_setting)) { tx = i40e_set_new_dynamic_itr(&q_vector->tx); txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr); } diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index d4c6a767be94..75f2a2cdd738 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -1311,6 +1311,19 @@ static u32 i40e_buildreg_itr(const int type, const u16 itr) /* a small macro to shorten up some long lines */ #define INTREG I40E_VFINT_DYN_CTLN1 +static inline int get_rx_itr_enabled(struct i40e_vsi *vsi, int idx) +{ + struct i40evf_adapter *adapter = vsi->back; + + return !!(adapter->rx_rings[idx].rx_itr_setting); +} + +static inline int get_tx_itr_enabled(struct i40e_vsi *vsi, int idx) +{ + struct i40evf_adapter *adapter = vsi->back; + + return !!(adapter->tx_rings[idx].tx_itr_setting); +} /** * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt @@ -1325,6 +1338,8 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, bool rx = false, tx = false; u32 rxval, txval; int vector; + int idx = q_vector->v_idx; + int rx_itr_setting, tx_itr_setting; vector = (q_vector->v_idx + vsi->base_vector); @@ -1333,18 +1348,21 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, */ rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0); + rx_itr_setting = get_rx_itr_enabled(vsi, idx); + tx_itr_setting = get_tx_itr_enabled(vsi, idx); + if (q_vector->itr_countdown > 0 || - (!ITR_IS_DYNAMIC(vsi->rx_itr_setting) && - !ITR_IS_DYNAMIC(vsi->tx_itr_setting))) { + (!ITR_IS_DYNAMIC(rx_itr_setting) && + !ITR_IS_DYNAMIC(tx_itr_setting))) { goto enable_int; } - if (ITR_IS_DYNAMIC(vsi->rx_itr_setting)) { + if (ITR_IS_DYNAMIC(rx_itr_setting)) { rx = i40e_set_new_dynamic_itr(&q_vector->rx); rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr); } - if (ITR_IS_DYNAMIC(vsi->tx_itr_setting)) { + if (ITR_IS_DYNAMIC(tx_itr_setting)) { tx = i40e_set_new_dynamic_itr(&q_vector->tx); txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr); } diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h index 84e561cc0166..abcdecabbc56 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h @@ -287,6 +287,14 @@ struct i40e_ring { u8 dcb_tc; /* Traffic class of ring */ u8 __iomem *tail; + /* high bit set means dynamic, use accessors routines to read/write. + * hardware only supports 2us resolution for the ITR registers. + * these values always store the USER setting, and must be converted + * before programming to a register. + */ + u16 rx_itr_setting; + u16 tx_itr_setting; + u16 count; /* Number of descriptors */ u16 reg_idx; /* HW register index of the ring */ u16 rx_buf_len; diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h index dc00aaf94687..c5fd724313c7 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf.h +++ b/drivers/net/ethernet/intel/i40evf/i40evf.h @@ -59,13 +59,6 @@ struct i40e_vsi { unsigned long state; int base_vector; u16 work_limit; - /* high bit set means dynamic, use accessor routines to read/write. - * hardware only supports 2us resolution for the ITR registers. - * these values always store the USER setting, and must be converted - * before programming to a register. - */ - u16 rx_itr_setting; - u16 tx_itr_setting; u16 qs_handle; }; diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c index e17a15456266..a9940154eead 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c @@ -295,6 +295,50 @@ static int i40evf_set_ringparam(struct net_device *netdev, return 0; } +/** + * __i40evf_get_coalesce - get per-queue coalesce settings + * @netdev: the netdev to check + * @ec: ethtool coalesce data structure + * @queue: which queue to pick + * + * Gets the per-queue settings for coalescence. Specifically Rx and Tx usecs + * are per queue. If queue is <0 then we default to queue 0 as the + * representative value. + **/ +static int __i40evf_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec, + int queue) +{ + struct i40evf_adapter *adapter = netdev_priv(netdev); + struct i40e_vsi *vsi = &adapter->vsi; + struct i40e_ring *rx_ring, *tx_ring; + + ec->tx_max_coalesced_frames = vsi->work_limit; + ec->rx_max_coalesced_frames = vsi->work_limit; + + /* Rx and Tx usecs per queue value. If user doesn't specify the + * queue, return queue 0's value to represent. + */ + if (queue < 0) + queue = 0; + else if (queue >= adapter->num_active_queues) + return -EINVAL; + + rx_ring = &adapter->rx_rings[queue]; + tx_ring = &adapter->tx_rings[queue]; + + if (ITR_IS_DYNAMIC(rx_ring->rx_itr_setting)) + ec->use_adaptive_rx_coalesce = 1; + + if (ITR_IS_DYNAMIC(tx_ring->tx_itr_setting)) + ec->use_adaptive_tx_coalesce = 1; + + ec->rx_coalesce_usecs = rx_ring->rx_itr_setting & ~I40E_ITR_DYNAMIC; + ec->tx_coalesce_usecs = tx_ring->tx_itr_setting & ~I40E_ITR_DYNAMIC; + + return 0; +} + /** * i40evf_get_coalesce - Get interrupt coalescing settings * @netdev: network interface device structure @@ -302,25 +346,124 @@ static int i40evf_set_ringparam(struct net_device *netdev, * * Returns current coalescing settings. This is referred to elsewhere in the * driver as Interrupt Throttle Rate, as this is how the hardware describes - * this functionality. + * this functionality. Note that if per-queue settings have been modified this + * only represents the settings of queue 0. **/ static int i40evf_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec) +{ + return __i40evf_get_coalesce(netdev, ec, -1); +} + +/** + * i40evf_get_per_queue_coalesce - get coalesce values for specific queue + * @netdev: netdev to read + * @ec: coalesce settings from ethtool + * @queue: the queue to read + * + * Read specific queue's coalesce settings. + **/ +static int i40evf_get_per_queue_coalesce(struct net_device *netdev, + u32 queue, + struct ethtool_coalesce *ec) +{ + return __i40evf_get_coalesce(netdev, ec, queue); +} + +/** + * i40evf_set_itr_per_queue - set ITR values for specific queue + * @vsi: the VSI to set values for + * @ec: coalesce settings from ethtool + * @queue: the queue to modify + * + * Change the ITR settings for a specific queue. + **/ +static void i40evf_set_itr_per_queue(struct i40evf_adapter *adapter, + struct ethtool_coalesce *ec, + int queue) +{ + struct i40e_vsi *vsi = &adapter->vsi; + struct i40e_hw *hw = &adapter->hw; + struct i40e_q_vector *q_vector; + u16 vector; + + adapter->rx_rings[queue].rx_itr_setting = ec->rx_coalesce_usecs; + adapter->tx_rings[queue].tx_itr_setting = ec->tx_coalesce_usecs; + + if (ec->use_adaptive_rx_coalesce) + adapter->rx_rings[queue].rx_itr_setting |= I40E_ITR_DYNAMIC; + else + adapter->rx_rings[queue].rx_itr_setting &= ~I40E_ITR_DYNAMIC; + + if (ec->use_adaptive_tx_coalesce) + adapter->tx_rings[queue].tx_itr_setting |= I40E_ITR_DYNAMIC; + else + adapter->tx_rings[queue].tx_itr_setting &= ~I40E_ITR_DYNAMIC; + + q_vector = adapter->rx_rings[queue].q_vector; + q_vector->rx.itr = ITR_TO_REG(adapter->rx_rings[queue].rx_itr_setting); + vector = vsi->base_vector + q_vector->v_idx; + wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, vector - 1), q_vector->rx.itr); + + q_vector = adapter->tx_rings[queue].q_vector; + q_vector->tx.itr = ITR_TO_REG(adapter->tx_rings[queue].tx_itr_setting); + vector = vsi->base_vector + q_vector->v_idx; + wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, vector - 1), q_vector->tx.itr); + + i40e_flush(hw); +} + +/** + * __i40evf_set_coalesce - set coalesce settings for particular queue + * @netdev: the netdev to change + * @ec: ethtool coalesce settings + * @queue: the queue to change + * + * Sets the coalesce settings for a particular queue. + **/ +static int __i40evf_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec, + int queue) { struct i40evf_adapter *adapter = netdev_priv(netdev); struct i40e_vsi *vsi = &adapter->vsi; + int i; - ec->tx_max_coalesced_frames = vsi->work_limit; - ec->rx_max_coalesced_frames = vsi->work_limit; + if (ec->tx_max_coalesced_frames_irq || ec->rx_max_coalesced_frames_irq) + vsi->work_limit = ec->tx_max_coalesced_frames_irq; - if (ITR_IS_DYNAMIC(vsi->rx_itr_setting)) - ec->use_adaptive_rx_coalesce = 1; + if (ec->rx_coalesce_usecs == 0) { + if (ec->use_adaptive_rx_coalesce) + netif_info(adapter, drv, netdev, "rx-usecs=0, need to disable adaptive-rx for a complete disable\n"); + } else if ((ec->rx_coalesce_usecs < (I40E_MIN_ITR << 1)) || + (ec->rx_coalesce_usecs > (I40E_MAX_ITR << 1))) { + netif_info(adapter, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n"); + return -EINVAL; + } - if (ITR_IS_DYNAMIC(vsi->tx_itr_setting)) - ec->use_adaptive_tx_coalesce = 1; + else + if (ec->tx_coalesce_usecs == 0) { + if (ec->use_adaptive_tx_coalesce) + netif_info(adapter, drv, netdev, "tx-usecs=0, need to disable adaptive-tx for a complete disable\n"); + } else if ((ec->tx_coalesce_usecs < (I40E_MIN_ITR << 1)) || + (ec->tx_coalesce_usecs > (I40E_MAX_ITR << 1))) { + netif_info(adapter, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n"); + return -EINVAL; + } - ec->rx_coalesce_usecs = vsi->rx_itr_setting & ~I40E_ITR_DYNAMIC; - ec->tx_coalesce_usecs = vsi->tx_itr_setting & ~I40E_ITR_DYNAMIC; + /* Rx and Tx usecs has per queue value. If user doesn't specify the + * queue, apply to all queues. + */ + if (queue < 0) { + for (i = 0; i < adapter->num_active_queues; i++) + i40evf_set_itr_per_queue(adapter, ec, i); + } else if (queue < adapter->num_active_queues) { + i40evf_set_itr_per_queue(adapter, ec, queue); + } else { + netif_info(adapter, drv, netdev, "Invalid queue value, queue range is 0 - %d\n", + adapter->num_active_queues - 1); + return -EINVAL; + } return 0; } @@ -330,56 +473,27 @@ static int i40evf_get_coalesce(struct net_device *netdev, * @netdev: network interface device structure * @ec: ethtool coalesce structure * - * Change current coalescing settings. + * Change current coalescing settings for every queue. **/ static int i40evf_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec) { - struct i40evf_adapter *adapter = netdev_priv(netdev); - struct i40e_hw *hw = &adapter->hw; - struct i40e_vsi *vsi = &adapter->vsi; - struct i40e_q_vector *q_vector; - int i; + return __i40evf_set_coalesce(netdev, ec, -1); +} - if (ec->tx_max_coalesced_frames_irq || ec->rx_max_coalesced_frames_irq) - vsi->work_limit = ec->tx_max_coalesced_frames_irq; - - if ((ec->rx_coalesce_usecs >= (I40E_MIN_ITR << 1)) && - (ec->rx_coalesce_usecs <= (I40E_MAX_ITR << 1))) - vsi->rx_itr_setting = ec->rx_coalesce_usecs; - - else - return -EINVAL; - - if ((ec->tx_coalesce_usecs >= (I40E_MIN_ITR << 1)) && - (ec->tx_coalesce_usecs <= (I40E_MAX_ITR << 1))) - vsi->tx_itr_setting = ec->tx_coalesce_usecs; - else if (ec->use_adaptive_tx_coalesce) - vsi->tx_itr_setting = (I40E_ITR_DYNAMIC | - ITR_REG_TO_USEC(I40E_ITR_RX_DEF)); - else - return -EINVAL; - - if (ec->use_adaptive_rx_coalesce) - vsi->rx_itr_setting |= I40E_ITR_DYNAMIC; - else - vsi->rx_itr_setting &= ~I40E_ITR_DYNAMIC; - - if (ec->use_adaptive_tx_coalesce) - vsi->tx_itr_setting |= I40E_ITR_DYNAMIC; - else - vsi->tx_itr_setting &= ~I40E_ITR_DYNAMIC; - - for (i = 0; i < adapter->num_msix_vectors - NONQ_VECS; i++) { - q_vector = &adapter->q_vectors[i]; - q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting); - wr32(hw, I40E_VFINT_ITRN1(0, i), q_vector->rx.itr); - q_vector->tx.itr = ITR_TO_REG(vsi->tx_itr_setting); - wr32(hw, I40E_VFINT_ITRN1(1, i), q_vector->tx.itr); - i40e_flush(hw); - } - - return 0; +/** + * i40evf_set_per_queue_coalesce - set specific queue's coalesce settings + * @netdev: the netdev to change + * @ec: ethtool's coalesce settings + * @queue: the queue to modify + * + * Modifies a specific queue's coalesce settings. + */ +static int i40evf_set_per_queue_coalesce(struct net_device *netdev, + u32 queue, + struct ethtool_coalesce *ec) +{ + return __i40evf_set_coalesce(netdev, ec, queue); } /** @@ -533,6 +647,8 @@ static const struct ethtool_ops i40evf_ethtool_ops = { .set_msglevel = i40evf_set_msglevel, .get_coalesce = i40evf_get_coalesce, .set_coalesce = i40evf_set_coalesce, + .get_per_queue_coalesce = i40evf_get_per_queue_coalesce, + .set_per_queue_coalesce = i40evf_set_per_queue_coalesce, .get_rxnfc = i40evf_get_rxnfc, .get_rxfh_indir_size = i40evf_get_rxfh_indir_size, .get_rxfh = i40evf_get_rxfh, diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 302c9743ef44..14372810fc27 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -370,7 +370,6 @@ i40evf_map_vector_to_rxq(struct i40evf_adapter *adapter, int v_idx, int r_idx) { struct i40e_q_vector *q_vector = &adapter->q_vectors[v_idx]; struct i40e_ring *rx_ring = &adapter->rx_rings[r_idx]; - struct i40e_vsi *vsi = &adapter->vsi; struct i40e_hw *hw = &adapter->hw; rx_ring->q_vector = q_vector; @@ -379,7 +378,7 @@ i40evf_map_vector_to_rxq(struct i40evf_adapter *adapter, int v_idx, int r_idx) q_vector->rx.ring = rx_ring; q_vector->rx.count++; q_vector->rx.latency_range = I40E_LOW_LATENCY; - q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting); + q_vector->rx.itr = ITR_TO_REG(rx_ring->rx_itr_setting); q_vector->ring_mask |= BIT(r_idx); q_vector->itr_countdown = ITR_COUNTDOWN_START; wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, v_idx - 1), q_vector->rx.itr); @@ -396,7 +395,6 @@ i40evf_map_vector_to_txq(struct i40evf_adapter *adapter, int v_idx, int t_idx) { struct i40e_q_vector *q_vector = &adapter->q_vectors[v_idx]; struct i40e_ring *tx_ring = &adapter->tx_rings[t_idx]; - struct i40e_vsi *vsi = &adapter->vsi; struct i40e_hw *hw = &adapter->hw; tx_ring->q_vector = q_vector; @@ -405,7 +403,7 @@ i40evf_map_vector_to_txq(struct i40evf_adapter *adapter, int v_idx, int t_idx) q_vector->tx.ring = tx_ring; q_vector->tx.count++; q_vector->tx.latency_range = I40E_LOW_LATENCY; - q_vector->tx.itr = ITR_TO_REG(vsi->tx_itr_setting); + q_vector->tx.itr = ITR_TO_REG(tx_ring->tx_itr_setting); q_vector->itr_countdown = ITR_COUNTDOWN_START; q_vector->num_ringpairs++; wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, v_idx - 1), q_vector->tx.itr); @@ -1162,6 +1160,7 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter) tx_ring->netdev = adapter->netdev; tx_ring->dev = &adapter->pdev->dev; tx_ring->count = adapter->tx_desc_count; + tx_ring->tx_itr_setting = (I40E_ITR_DYNAMIC | I40E_ITR_TX_DEF); if (adapter->flags & I40E_FLAG_WB_ON_ITR_CAPABLE) tx_ring->flags |= I40E_TXR_FLAGS_WB_ON_ITR; @@ -1170,6 +1169,7 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter) rx_ring->netdev = adapter->netdev; rx_ring->dev = &adapter->pdev->dev; rx_ring->count = adapter->rx_desc_count; + rx_ring->rx_itr_setting = (I40E_ITR_DYNAMIC | I40E_ITR_RX_DEF); } return 0; @@ -2277,8 +2277,6 @@ int i40evf_process_config(struct i40evf_adapter *adapter) adapter->vsi.back = adapter; adapter->vsi.base_vector = 1; adapter->vsi.work_limit = I40E_DEFAULT_IRQ_WORK; - adapter->vsi.rx_itr_setting = (I40E_ITR_DYNAMIC | I40E_ITR_RX_DEF); - adapter->vsi.tx_itr_setting = (I40E_ITR_DYNAMIC | I40E_ITR_TX_DEF); vsi->netdev = adapter->netdev; vsi->qs_handle = adapter->vsi_res->qset_handle; if (vfres->vf_offload_flags & I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF) { From e3b37f11e6e4e6b6f02cc762f182ce233d2c1c9d Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Wed, 21 Sep 2016 11:35:07 -0400 Subject: [PATCH 1516/1715] netfilter: replace list_head with single linked list The netfilter hook list never uses the prev pointer, and so can be trimmed to be a simple singly-linked list. In addition to having a more light weight structure for hook traversal, struct net becomes 5568 bytes (down from 6400) and struct net_device becomes 2176 bytes (down from 2240). Signed-off-by: Aaron Conole Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netdevice.h | 2 +- include/linux/netfilter.h | 63 +++++++------ include/linux/netfilter_ingress.h | 17 ++-- include/net/netfilter/nf_queue.h | 3 +- include/net/netns/netfilter.h | 2 +- net/bridge/br_netfilter_hooks.c | 19 ++-- net/netfilter/core.c | 145 +++++++++++++++++++----------- net/netfilter/nf_internals.h | 10 +-- net/netfilter/nf_queue.c | 18 ++-- net/netfilter/nfnetlink_queue.c | 8 +- 10 files changed, 169 insertions(+), 118 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 67bb978470dc..41f49f5ab62a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1783,7 +1783,7 @@ struct net_device { #endif struct netdev_queue __rcu *ingress_queue; #ifdef CONFIG_NETFILTER_INGRESS - struct list_head nf_hooks_ingress; + struct nf_hook_entry __rcu *nf_hooks_ingress; #endif unsigned char broadcast[MAX_ADDR_LEN]; diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index ad444f0b4ed0..44e20dac98a9 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -55,12 +55,34 @@ struct nf_hook_state { struct net_device *out; struct sock *sk; struct net *net; - struct list_head *hook_list; + struct nf_hook_entry __rcu *hook_entries; int (*okfn)(struct net *, struct sock *, struct sk_buff *); }; +typedef unsigned int nf_hookfn(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state); +struct nf_hook_ops { + struct list_head list; + + /* User fills in from here down. */ + nf_hookfn *hook; + struct net_device *dev; + void *priv; + u_int8_t pf; + unsigned int hooknum; + /* Hooks are ordered in ascending priority. */ + int priority; +}; + +struct nf_hook_entry { + struct nf_hook_entry __rcu *next; + struct nf_hook_ops ops; + const struct nf_hook_ops *orig_ops; +}; + static inline void nf_hook_state_init(struct nf_hook_state *p, - struct list_head *hook_list, + struct nf_hook_entry *hook_entry, unsigned int hook, int thresh, u_int8_t pf, struct net_device *indev, @@ -76,26 +98,11 @@ static inline void nf_hook_state_init(struct nf_hook_state *p, p->out = outdev; p->sk = sk; p->net = net; - p->hook_list = hook_list; + RCU_INIT_POINTER(p->hook_entries, hook_entry); p->okfn = okfn; } -typedef unsigned int nf_hookfn(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state); -struct nf_hook_ops { - struct list_head list; - - /* User fills in from here down. */ - nf_hookfn *hook; - struct net_device *dev; - void *priv; - u_int8_t pf; - unsigned int hooknum; - /* Hooks are ordered in ascending priority. */ - int priority; -}; struct nf_sockopt_ops { struct list_head list; @@ -161,7 +168,8 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, int (*okfn)(struct net *, struct sock *, struct sk_buff *), int thresh) { - struct list_head *hook_list; + struct nf_hook_entry *hook_head; + int ret = 1; #ifdef HAVE_JUMP_LABEL if (__builtin_constant_p(pf) && @@ -170,22 +178,19 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, return 1; #endif - hook_list = &net->nf.hooks[pf][hook]; - - if (!list_empty(hook_list)) { + rcu_read_lock(); + hook_head = rcu_dereference(net->nf.hooks[pf][hook]); + if (hook_head) { struct nf_hook_state state; - int ret; - /* We may already have this, but read-locks nest anyway */ - rcu_read_lock(); - nf_hook_state_init(&state, hook_list, hook, thresh, + nf_hook_state_init(&state, hook_head, hook, thresh, pf, indev, outdev, sk, net, okfn); ret = nf_hook_slow(skb, &state); - rcu_read_unlock(); - return ret; } - return 1; + rcu_read_unlock(); + + return ret; } static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h index 6965ba09eba7..33e37fb41d5d 100644 --- a/include/linux/netfilter_ingress.h +++ b/include/linux/netfilter_ingress.h @@ -11,23 +11,30 @@ static inline bool nf_hook_ingress_active(const struct sk_buff *skb) if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_INGRESS])) return false; #endif - return !list_empty(&skb->dev->nf_hooks_ingress); + return rcu_access_pointer(skb->dev->nf_hooks_ingress); } /* caller must hold rcu_read_lock */ static inline int nf_hook_ingress(struct sk_buff *skb) { + struct nf_hook_entry *e = rcu_dereference(skb->dev->nf_hooks_ingress); struct nf_hook_state state; - nf_hook_state_init(&state, &skb->dev->nf_hooks_ingress, - NF_NETDEV_INGRESS, INT_MIN, NFPROTO_NETDEV, - skb->dev, NULL, NULL, dev_net(skb->dev), NULL); + /* Must recheck the ingress hook head, in the event it became NULL + * after the check in nf_hook_ingress_active evaluated to true. + */ + if (unlikely(!e)) + return 0; + + nf_hook_state_init(&state, e, NF_NETDEV_INGRESS, INT_MIN, + NFPROTO_NETDEV, skb->dev, NULL, NULL, + dev_net(skb->dev), NULL); return nf_hook_slow(skb, &state); } static inline void nf_hook_ingress_init(struct net_device *dev) { - INIT_LIST_HEAD(&dev->nf_hooks_ingress); + RCU_INIT_POINTER(dev->nf_hooks_ingress, NULL); } #else /* CONFIG_NETFILTER_INGRESS */ static inline int nf_hook_ingress_active(struct sk_buff *skb) diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index 8fe85b98b5c8..2280cfe86c56 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -11,7 +11,6 @@ struct nf_queue_entry { struct sk_buff *skb; unsigned int id; - struct nf_hook_ops *elem; struct nf_hook_state state; u16 size; /* sizeof(entry) + saved route keys */ @@ -25,7 +24,7 @@ struct nf_queue_handler { int (*outfn)(struct nf_queue_entry *entry, unsigned int queuenum); void (*nf_hook_drop)(struct net *net, - struct nf_hook_ops *ops); + const struct nf_hook_entry *hooks); }; void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *qh); diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h index 36d723579af2..58487b1cc99a 100644 --- a/include/net/netns/netfilter.h +++ b/include/net/netns/netfilter.h @@ -16,6 +16,6 @@ struct netns_nf { #ifdef CONFIG_SYSCTL struct ctl_table_header *nf_log_dir_header; #endif - struct list_head hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; + struct nf_hook_entry __rcu *hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; }; #endif diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 6029af47377d..2fe9345c1407 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -1002,28 +1002,21 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { - struct nf_hook_ops *elem; + struct nf_hook_entry *elem; struct nf_hook_state state; - struct list_head *head; int ret; - head = &net->nf.hooks[NFPROTO_BRIDGE][hook]; + elem = rcu_dereference(net->nf.hooks[NFPROTO_BRIDGE][hook]); - list_for_each_entry_rcu(elem, head, list) { - struct nf_hook_ops *next; + while (elem && (elem->ops.priority <= NF_BR_PRI_BRNF)) + elem = rcu_dereference(elem->next); - next = list_entry_rcu(list_next_rcu(&elem->list), - struct nf_hook_ops, list); - if (next->priority <= NF_BR_PRI_BRNF) - continue; - } - - if (&elem->list == head) + if (!elem) return okfn(net, sk, skb); /* We may already have this, but read-locks nest anyway */ rcu_read_lock(); - nf_hook_state_init(&state, head, hook, NF_BR_PRI_BRNF + 1, + nf_hook_state_init(&state, elem, hook, NF_BR_PRI_BRNF + 1, NFPROTO_BRIDGE, indev, outdev, sk, net, okfn); ret = nf_hook_slow(skb, &state); diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 67b74287535d..72fc514ec676 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -61,33 +62,50 @@ EXPORT_SYMBOL(nf_hooks_needed); #endif static DEFINE_MUTEX(nf_hook_mutex); +#define nf_entry_dereference(e) \ + rcu_dereference_protected(e, lockdep_is_held(&nf_hook_mutex)) -static struct list_head *nf_find_hook_list(struct net *net, - const struct nf_hook_ops *reg) +static struct nf_hook_entry *nf_hook_entry_head(struct net *net, + const struct nf_hook_ops *reg) { - struct list_head *hook_list = NULL; + struct nf_hook_entry *hook_head = NULL; if (reg->pf != NFPROTO_NETDEV) - hook_list = &net->nf.hooks[reg->pf][reg->hooknum]; + hook_head = nf_entry_dereference(net->nf.hooks[reg->pf] + [reg->hooknum]); else if (reg->hooknum == NF_NETDEV_INGRESS) { #ifdef CONFIG_NETFILTER_INGRESS if (reg->dev && dev_net(reg->dev) == net) - hook_list = ®->dev->nf_hooks_ingress; + hook_head = + nf_entry_dereference( + reg->dev->nf_hooks_ingress); #endif } - return hook_list; + return hook_head; } -struct nf_hook_entry { - const struct nf_hook_ops *orig_ops; - struct nf_hook_ops ops; -}; +/* must hold nf_hook_mutex */ +static void nf_set_hooks_head(struct net *net, const struct nf_hook_ops *reg, + struct nf_hook_entry *entry) +{ + switch (reg->pf) { + case NFPROTO_NETDEV: + /* We already checked in nf_register_net_hook() that this is + * used from ingress. + */ + rcu_assign_pointer(reg->dev->nf_hooks_ingress, entry); + break; + default: + rcu_assign_pointer(net->nf.hooks[reg->pf][reg->hooknum], + entry); + break; + } +} int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg) { - struct list_head *hook_list; + struct nf_hook_entry *hooks_entry; struct nf_hook_entry *entry; - struct nf_hook_ops *elem; if (reg->pf == NFPROTO_NETDEV && (reg->hooknum != NF_NETDEV_INGRESS || @@ -100,19 +118,30 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg) entry->orig_ops = reg; entry->ops = *reg; - - hook_list = nf_find_hook_list(net, reg); - if (!hook_list) { - kfree(entry); - return -ENOENT; - } + entry->next = NULL; mutex_lock(&nf_hook_mutex); - list_for_each_entry(elem, hook_list, list) { - if (reg->priority < elem->priority) - break; + hooks_entry = nf_hook_entry_head(net, reg); + + if (hooks_entry && hooks_entry->orig_ops->priority > reg->priority) { + /* This is the case where we need to insert at the head */ + entry->next = hooks_entry; + hooks_entry = NULL; } - list_add_rcu(&entry->ops.list, elem->list.prev); + + while (hooks_entry && + reg->priority >= hooks_entry->orig_ops->priority && + nf_entry_dereference(hooks_entry->next)) { + hooks_entry = nf_entry_dereference(hooks_entry->next); + } + + if (hooks_entry) { + entry->next = nf_entry_dereference(hooks_entry->next); + rcu_assign_pointer(hooks_entry->next, entry); + } else { + nf_set_hooks_head(net, reg, entry); + } + mutex_unlock(&nf_hook_mutex); #ifdef CONFIG_NETFILTER_INGRESS if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS) @@ -127,24 +156,33 @@ EXPORT_SYMBOL(nf_register_net_hook); void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg) { - struct list_head *hook_list; - struct nf_hook_entry *entry; - struct nf_hook_ops *elem; - - hook_list = nf_find_hook_list(net, reg); - if (!hook_list) - return; + struct nf_hook_entry *hooks_entry; mutex_lock(&nf_hook_mutex); - list_for_each_entry(elem, hook_list, list) { - entry = container_of(elem, struct nf_hook_entry, ops); - if (entry->orig_ops == reg) { - list_del_rcu(&entry->ops.list); - break; - } + hooks_entry = nf_hook_entry_head(net, reg); + if (hooks_entry->orig_ops == reg) { + nf_set_hooks_head(net, reg, + nf_entry_dereference(hooks_entry->next)); + goto unlock; } + while (hooks_entry && nf_entry_dereference(hooks_entry->next)) { + struct nf_hook_entry *next = + nf_entry_dereference(hooks_entry->next); + struct nf_hook_entry *nnext; + + if (next->orig_ops != reg) { + hooks_entry = next; + continue; + } + nnext = nf_entry_dereference(next->next); + rcu_assign_pointer(hooks_entry->next, nnext); + hooks_entry = next; + break; + } + +unlock: mutex_unlock(&nf_hook_mutex); - if (&elem->list == hook_list) { + if (!hooks_entry) { WARN(1, "nf_unregister_net_hook: hook not found!\n"); return; } @@ -156,10 +194,10 @@ void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg) static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]); #endif synchronize_net(); - nf_queue_nf_hook_drop(net, &entry->ops); + nf_queue_nf_hook_drop(net, hooks_entry); /* other cpu might still process nfqueue verdict that used reg */ synchronize_net(); - kfree(entry); + kfree(hooks_entry); } EXPORT_SYMBOL(nf_unregister_net_hook); @@ -258,10 +296,9 @@ void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n) } EXPORT_SYMBOL(nf_unregister_hooks); -unsigned int nf_iterate(struct list_head *head, - struct sk_buff *skb, +unsigned int nf_iterate(struct sk_buff *skb, struct nf_hook_state *state, - struct nf_hook_ops **elemp) + struct nf_hook_entry **entryp) { unsigned int verdict; @@ -269,20 +306,23 @@ unsigned int nf_iterate(struct list_head *head, * The caller must not block between calls to this * function because of risk of continuing from deleted element. */ - list_for_each_entry_continue_rcu((*elemp), head, list) { - if (state->thresh > (*elemp)->priority) + while (*entryp) { + if (state->thresh > (*entryp)->ops.priority) { + *entryp = rcu_dereference((*entryp)->next); continue; + } /* Optimization: we don't need to hold module reference here, since function can't sleep. --RR */ repeat: - verdict = (*elemp)->hook((*elemp)->priv, skb, state); + verdict = (*entryp)->ops.hook((*entryp)->ops.priv, skb, state); if (verdict != NF_ACCEPT) { #ifdef CONFIG_NETFILTER_DEBUG if (unlikely((verdict & NF_VERDICT_MASK) > NF_MAX_VERDICT)) { NFDEBUG("Evil return from %p(%u).\n", - (*elemp)->hook, state->hook); + (*entryp)->ops.hook, state->hook); + *entryp = rcu_dereference((*entryp)->next); continue; } #endif @@ -290,6 +330,7 @@ repeat: return verdict; goto repeat; } + *entryp = rcu_dereference((*entryp)->next); } return NF_ACCEPT; } @@ -299,13 +340,13 @@ repeat: * -EPERM for NF_DROP, 0 otherwise. Caller must hold rcu_read_lock. */ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state) { - struct nf_hook_ops *elem; + struct nf_hook_entry *entry; unsigned int verdict; int ret = 0; - elem = list_entry_rcu(state->hook_list, struct nf_hook_ops, list); + entry = rcu_dereference(state->hook_entries); next_hook: - verdict = nf_iterate(state->hook_list, skb, state, &elem); + verdict = nf_iterate(skb, state, &entry); if (verdict == NF_ACCEPT || verdict == NF_STOP) { ret = 1; } else if ((verdict & NF_VERDICT_MASK) == NF_DROP) { @@ -314,8 +355,10 @@ next_hook: if (ret == 0) ret = -EPERM; } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { - int err = nf_queue(skb, elem, state, - verdict >> NF_VERDICT_QBITS); + int err; + + RCU_INIT_POINTER(state->hook_entries, entry); + err = nf_queue(skb, state, verdict >> NF_VERDICT_QBITS); if (err < 0) { if (err == -ESRCH && (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) @@ -442,7 +485,7 @@ static int __net_init netfilter_net_init(struct net *net) for (i = 0; i < ARRAY_SIZE(net->nf.hooks); i++) { for (h = 0; h < NF_MAX_HOOKS; h++) - INIT_LIST_HEAD(&net->nf.hooks[i][h]); + RCU_INIT_POINTER(net->nf.hooks[i][h], NULL); } #ifdef CONFIG_PROC_FS diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index 065522564ac6..e0adb5959342 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h @@ -13,13 +13,13 @@ /* core.c */ -unsigned int nf_iterate(struct list_head *head, struct sk_buff *skb, - struct nf_hook_state *state, struct nf_hook_ops **elemp); +unsigned int nf_iterate(struct sk_buff *skb, struct nf_hook_state *state, + struct nf_hook_entry **entryp); /* nf_queue.c */ -int nf_queue(struct sk_buff *skb, struct nf_hook_ops *elem, - struct nf_hook_state *state, unsigned int queuenum); -void nf_queue_nf_hook_drop(struct net *net, struct nf_hook_ops *ops); +int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, + unsigned int queuenum); +void nf_queue_nf_hook_drop(struct net *net, const struct nf_hook_entry *entry); int __init netfilter_queue_init(void); /* nf_log.c */ diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index b19ad20a705c..96964a0070e1 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -96,14 +96,14 @@ void nf_queue_entry_get_refs(struct nf_queue_entry *entry) } EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs); -void nf_queue_nf_hook_drop(struct net *net, struct nf_hook_ops *ops) +void nf_queue_nf_hook_drop(struct net *net, const struct nf_hook_entry *entry) { const struct nf_queue_handler *qh; rcu_read_lock(); qh = rcu_dereference(net->nf.queue_handler); if (qh) - qh->nf_hook_drop(net, ops); + qh->nf_hook_drop(net, entry); rcu_read_unlock(); } @@ -112,7 +112,6 @@ void nf_queue_nf_hook_drop(struct net *net, struct nf_hook_ops *ops) * through nf_reinject(). */ int nf_queue(struct sk_buff *skb, - struct nf_hook_ops *elem, struct nf_hook_state *state, unsigned int queuenum) { @@ -141,7 +140,6 @@ int nf_queue(struct sk_buff *skb, *entry = (struct nf_queue_entry) { .skb = skb, - .elem = elem, .state = *state, .size = sizeof(*entry) + afinfo->route_key_size, }; @@ -165,11 +163,15 @@ err: void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) { + struct nf_hook_entry *hook_entry; struct sk_buff *skb = entry->skb; - struct nf_hook_ops *elem = entry->elem; const struct nf_afinfo *afinfo; + struct nf_hook_ops *elem; int err; + hook_entry = rcu_dereference(entry->state.hook_entries); + elem = &hook_entry->ops; + nf_queue_entry_release_refs(entry); /* Continue traversal iff userspace said ok... */ @@ -186,8 +188,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) if (verdict == NF_ACCEPT) { next_hook: - verdict = nf_iterate(entry->state.hook_list, - skb, &entry->state, &elem); + verdict = nf_iterate(skb, &entry->state, &hook_entry); } switch (verdict & NF_VERDICT_MASK) { @@ -198,7 +199,8 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) local_bh_enable(); break; case NF_QUEUE: - err = nf_queue(skb, elem, &entry->state, + RCU_INIT_POINTER(entry->state.hook_entries, hook_entry); + err = nf_queue(skb, &entry->state, verdict >> NF_VERDICT_QBITS); if (err < 0) { if (err == -ESRCH && diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 7caa8b082c41..af832c526048 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -917,12 +917,14 @@ static struct notifier_block nfqnl_dev_notifier = { .notifier_call = nfqnl_rcv_dev_event, }; -static int nf_hook_cmp(struct nf_queue_entry *entry, unsigned long ops_ptr) +static int nf_hook_cmp(struct nf_queue_entry *entry, unsigned long entry_ptr) { - return entry->elem == (struct nf_hook_ops *)ops_ptr; + return rcu_access_pointer(entry->state.hook_entries) == + (struct nf_hook_entry *)entry_ptr; } -static void nfqnl_nf_hook_drop(struct net *net, struct nf_hook_ops *hook) +static void nfqnl_nf_hook_drop(struct net *net, + const struct nf_hook_entry *hook) { struct nfnl_queue_net *q = nfnl_queue_pernet(net); int i; From 8d11350f5f33378efc5f905bee325f3e76d6bcca Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Thu, 22 Sep 2016 14:53:53 +0800 Subject: [PATCH 1517/1715] netfilter: seqadj: Fix the wrong ack adjust for the RST packet without ack It is valid that the TCP RST packet which does not set ack flag, and bytes of ack number are zero. But current seqadj codes would adjust the "0" ack to invalid ack number. Actually seqadj need to check the ack flag before adjust it for these RST packets. The following is my test case client is 10.26.98.245, and add one iptable rule: iptables -I INPUT -p tcp --sport 12345 -m connbytes --connbytes 2: --connbytes-dir reply --connbytes-mode packets -j REJECT --reject-with tcp-reset This iptables rule could generate on TCP RST without ack flag. server:10.172.135.55 Enable the synproxy with seqadjust by the following iptables rules iptables -t raw -A PREROUTING -i eth0 -p tcp -d 10.172.135.55 --dport 12345 -m tcp --syn -j CT --notrack iptables -A INPUT -i eth0 -p tcp -d 10.172.135.55 --dport 12345 -m conntrack --ctstate INVALID,UNTRACKED -j SYNPROXY --sack-perm --timestamp --wscale 7 --mss 1460 iptables -A OUTPUT -o eth0 -p tcp -s 10.172.135.55 --sport 12345 -m conntrack --ctstate INVALID,UNTRACKED -m tcp --tcp-flags SYN,RST,ACK SYN,ACK -j ACCEPT The following is my test result. 1. packet trace on client root@routers:/tmp# tcpdump -i eth0 tcp port 12345 -n tcpdump: verbose output suppressed, use -v or -vv for full protocol decode listening on eth0, link-type EN10MB (Ethernet), capture size 65535 bytes IP 10.26.98.245.45154 > 10.172.135.55.12345: Flags [S], seq 3695959829, win 29200, options [mss 1460,sackOK,TS val 452367884 ecr 0,nop,wscale 7], length 0 IP 10.172.135.55.12345 > 10.26.98.245.45154: Flags [S.], seq 546723266, ack 3695959830, win 0, options [mss 1460,sackOK,TS val 15643479 ecr 452367884, nop,wscale 7], length 0 IP 10.26.98.245.45154 > 10.172.135.55.12345: Flags [.], ack 1, win 229, options [nop,nop,TS val 452367885 ecr 15643479], length 0 IP 10.172.135.55.12345 > 10.26.98.245.45154: Flags [.], ack 1, win 226, options [nop,nop,TS val 15643479 ecr 452367885], length 0 IP 10.26.98.245.45154 > 10.172.135.55.12345: Flags [R], seq 3695959830, win 0, length 0 2. seqadj log on server [62873.867319] Adjusting sequence number from 602341895->546723267, ack from 3695959830->3695959830 [62873.867644] Adjusting sequence number from 602341895->546723267, ack from 3695959830->3695959830 [62873.869040] Adjusting sequence number from 3695959830->3695959830, ack from 0->55618628 To summarize, it is clear that the seqadj codes adjust the 0 ack when receive one TCP RST packet without ack. Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_seqadj.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/net/netfilter/nf_conntrack_seqadj.c b/net/netfilter/nf_conntrack_seqadj.c index dff0f0cc59e4..ef7063eced7c 100644 --- a/net/netfilter/nf_conntrack_seqadj.c +++ b/net/netfilter/nf_conntrack_seqadj.c @@ -169,7 +169,7 @@ int nf_ct_seq_adjust(struct sk_buff *skb, s32 seqoff, ackoff; struct nf_conn_seqadj *seqadj = nfct_seqadj(ct); struct nf_ct_seqadj *this_way, *other_way; - int res; + int res = 1; this_way = &seqadj->seq[dir]; other_way = &seqadj->seq[!dir]; @@ -184,27 +184,31 @@ int nf_ct_seq_adjust(struct sk_buff *skb, else seqoff = this_way->offset_before; + newseq = htonl(ntohl(tcph->seq) + seqoff); + inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, false); + pr_debug("Adjusting sequence number from %u->%u\n", + ntohl(tcph->seq), ntohl(newseq)); + tcph->seq = newseq; + + if (!tcph->ack) + goto out; + if (after(ntohl(tcph->ack_seq) - other_way->offset_before, other_way->correction_pos)) ackoff = other_way->offset_after; else ackoff = other_way->offset_before; - newseq = htonl(ntohl(tcph->seq) + seqoff); newack = htonl(ntohl(tcph->ack_seq) - ackoff); - - inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, false); inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, false); - - pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n", + pr_debug("Adjusting ack number from %u->%u, ack from %u->%u\n", ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq), ntohl(newack)); - - tcph->seq = newseq; tcph->ack_seq = newack; res = nf_ct_sack_adjust(skb, protoff, tcph, ct, ctinfo); +out: spin_unlock_bh(&ct->lock); return res; From d767ff2c84f19be1aa403762f34eebbb403caf6d Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Thu, 22 Sep 2016 22:28:51 +0800 Subject: [PATCH 1518/1715] netfilter: nft_ct: unnecessary to require dir when use ct l3proto/protocol Currently, if the user want to match ct l3proto, we must specify the direction, for example: # nft add rule filter input ct original l3proto ipv4 ^^^^^^^^ Otherwise, error message will be reported: # nft add rule filter input ct l3proto ipv4 nft add rule filter input ct l3proto ipv4 :1:1-38: Error: Could not process rule: Invalid argument add rule filter input ct l3proto ipv4 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Actually, there's no need to require NFTA_CT_DIRECTION attr, because ct l3proto and protocol are unrelated to direction. And for compatibility, even if the user specify the NFTA_CT_DIRECTION attr, do not report error, just skip it. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_ct.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 51e180f2a003..825fbbc62f48 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -128,15 +128,18 @@ static void nft_ct_get_eval(const struct nft_expr *expr, memcpy(dest, &count, sizeof(count)); return; } + case NFT_CT_L3PROTOCOL: + *dest = nf_ct_l3num(ct); + return; + case NFT_CT_PROTOCOL: + *dest = nf_ct_protonum(ct); + return; default: break; } tuple = &ct->tuplehash[priv->dir].tuple; switch (priv->key) { - case NFT_CT_L3PROTOCOL: - *dest = nf_ct_l3num(ct); - return; case NFT_CT_SRC: memcpy(dest, tuple->src.u3.all, nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16); @@ -145,9 +148,6 @@ static void nft_ct_get_eval(const struct nft_expr *expr, memcpy(dest, tuple->dst.u3.all, nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16); return; - case NFT_CT_PROTOCOL: - *dest = nf_ct_protonum(ct); - return; case NFT_CT_PROTO_SRC: *dest = (__force __u16)tuple->src.u.all; return; @@ -283,8 +283,9 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, case NFT_CT_L3PROTOCOL: case NFT_CT_PROTOCOL: - if (tb[NFTA_CT_DIRECTION] == NULL) - return -EINVAL; + /* For compatibility, do not report error if NFTA_CT_DIRECTION + * attribute is specified. + */ len = sizeof(u8); break; case NFT_CT_SRC: @@ -432,8 +433,6 @@ static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr) goto nla_put_failure; switch (priv->key) { - case NFT_CT_L3PROTOCOL: - case NFT_CT_PROTOCOL: case NFT_CT_SRC: case NFT_CT_DST: case NFT_CT_PROTO_SRC: From 7bfdde7045ad54d9fdccac70baffd094d9de73f8 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Thu, 22 Sep 2016 22:28:52 +0800 Subject: [PATCH 1519/1715] netfilter: nft_ct: report error if mark and dir specified simultaneously NFT_CT_MARK is unrelated to direction, so if NFTA_CT_DIRECTION attr is specified, report EINVAL to the userspace. This validation check was already done at nft_ct_get_init, but we missed it in nft_ct_set_init. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_ct.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 825fbbc62f48..d7b0d171172a 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -364,6 +364,8 @@ static int nft_ct_set_init(const struct nft_ctx *ctx, switch (priv->key) { #ifdef CONFIG_NF_CONNTRACK_MARK case NFT_CT_MARK: + if (tb[NFTA_CT_DIRECTION]) + return -EINVAL; len = FIELD_SIZEOF(struct nf_conn, mark); break; #endif From 0dc60a4546fefc6dc9f54abf60beeeb3501726fa Mon Sep 17 00:00:00 2001 From: Vishwanath Pai Date: Thu, 22 Sep 2016 12:42:46 -0400 Subject: [PATCH 1520/1715] netfilter: xt_hashlimit: Prepare for revision 2 I am planning to add a revision 2 for the hashlimit xtables module to support higher packets per second rates. This patch renames all the functions and variables related to revision 1 by adding _v1 at the end of the names. Signed-off-by: Vishwanath Pai Signed-off-by: Joshua Hunt Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_hashlimit.c | 61 ++++++++++++++++++------------------ 1 file changed, 31 insertions(+), 30 deletions(-) diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 178696852bde..e93d9e0a3f35 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -56,7 +56,7 @@ static inline struct hashlimit_net *hashlimit_pernet(struct net *net) } /* need to declare this at the top */ -static const struct file_operations dl_file_ops; +static const struct file_operations dl_file_ops_v1; /* hash table crap */ struct dsthash_dst { @@ -215,8 +215,8 @@ dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent) } static void htable_gc(struct work_struct *work); -static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo, - u_int8_t family) +static int htable_create_v1(struct net *net, struct xt_hashlimit_mtinfo1 *minfo, + u_int8_t family) { struct hashlimit_net *hashlimit_net = hashlimit_pernet(net); struct xt_hashlimit_htable *hinfo; @@ -265,7 +265,7 @@ static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo, hinfo->pde = proc_create_data(minfo->name, 0, (family == NFPROTO_IPV4) ? hashlimit_net->ipt_hashlimit : hashlimit_net->ip6t_hashlimit, - &dl_file_ops, hinfo); + &dl_file_ops_v1, hinfo); if (hinfo->pde == NULL) { kfree(hinfo->name); vfree(hinfo); @@ -398,7 +398,7 @@ static void htable_put(struct xt_hashlimit_htable *hinfo) (slowest userspace tool allows), which means CREDITS_PER_JIFFY*HZ*60*60*24 < 2^32 ie. */ -#define MAX_CPJ (0xFFFFFFFF / (HZ*60*60*24)) +#define MAX_CPJ_v1 (0xFFFFFFFF / (HZ*60*60*24)) /* Repeated shift and or gives us all 1s, final shift and add 1 gives * us the power of 2 below the theoretical max, so GCC simply does a @@ -410,7 +410,7 @@ static void htable_put(struct xt_hashlimit_htable *hinfo) #define _POW2_BELOW32(x) (_POW2_BELOW16(x)|_POW2_BELOW16((x)>>16)) #define POW2_BELOW32(x) ((_POW2_BELOW32(x)>>1) + 1) -#define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ) +#define CREDITS_PER_JIFFY_v1 POW2_BELOW32(MAX_CPJ_v1) /* in byte mode, the lowest possible rate is one packet/second. * credit_cap is used as a counter that tells us how many times we can @@ -428,11 +428,12 @@ static u32 xt_hashlimit_len_to_chunks(u32 len) static u32 user2credits(u32 user) { /* If multiplying would overflow... */ - if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY)) + if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY_v1)) /* Divide first. */ - return (user / XT_HASHLIMIT_SCALE) * HZ * CREDITS_PER_JIFFY; + return (user / XT_HASHLIMIT_SCALE) *\ + HZ * CREDITS_PER_JIFFY_v1; - return (user * HZ * CREDITS_PER_JIFFY) / XT_HASHLIMIT_SCALE; + return (user * HZ * CREDITS_PER_JIFFY_v1) / XT_HASHLIMIT_SCALE; } static u32 user2credits_byte(u32 user) @@ -461,7 +462,7 @@ static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now, u32 mode) return; } } else { - dh->rateinfo.credit += delta * CREDITS_PER_JIFFY; + dh->rateinfo.credit += delta * CREDITS_PER_JIFFY_v1; cap = dh->rateinfo.credit_cap; } if (dh->rateinfo.credit > cap) @@ -603,7 +604,7 @@ static u32 hashlimit_byte_cost(unsigned int len, struct dsthash_ent *dh) } static bool -hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) +hashlimit_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_hashlimit_mtinfo1 *info = par->matchinfo; struct xt_hashlimit_htable *hinfo = info->hinfo; @@ -660,7 +661,7 @@ hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) return false; } -static int hashlimit_mt_check(const struct xt_mtchk_param *par) +static int hashlimit_mt_check_v1(const struct xt_mtchk_param *par) { struct net *net = par->net; struct xt_hashlimit_mtinfo1 *info = par->matchinfo; @@ -701,7 +702,7 @@ static int hashlimit_mt_check(const struct xt_mtchk_param *par) mutex_lock(&hashlimit_mutex); info->hinfo = htable_find_get(net, info->name, par->family); if (info->hinfo == NULL) { - ret = htable_create(net, info, par->family); + ret = htable_create_v1(net, info, par->family); if (ret < 0) { mutex_unlock(&hashlimit_mutex); return ret; @@ -711,7 +712,7 @@ static int hashlimit_mt_check(const struct xt_mtchk_param *par) return 0; } -static void hashlimit_mt_destroy(const struct xt_mtdtor_param *par) +static void hashlimit_mt_destroy_v1(const struct xt_mtdtor_param *par) { const struct xt_hashlimit_mtinfo1 *info = par->matchinfo; @@ -723,10 +724,10 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = { .name = "hashlimit", .revision = 1, .family = NFPROTO_IPV4, - .match = hashlimit_mt, + .match = hashlimit_mt_v1, .matchsize = sizeof(struct xt_hashlimit_mtinfo1), - .checkentry = hashlimit_mt_check, - .destroy = hashlimit_mt_destroy, + .checkentry = hashlimit_mt_check_v1, + .destroy = hashlimit_mt_destroy_v1, .me = THIS_MODULE, }, #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) @@ -734,10 +735,10 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = { .name = "hashlimit", .revision = 1, .family = NFPROTO_IPV6, - .match = hashlimit_mt, + .match = hashlimit_mt_v1, .matchsize = sizeof(struct xt_hashlimit_mtinfo1), - .checkentry = hashlimit_mt_check, - .destroy = hashlimit_mt_destroy, + .checkentry = hashlimit_mt_check_v1, + .destroy = hashlimit_mt_destroy_v1, .me = THIS_MODULE, }, #endif @@ -786,8 +787,8 @@ static void dl_seq_stop(struct seq_file *s, void *v) spin_unlock_bh(&htable->lock); } -static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family, - struct seq_file *s) +static int dl_seq_real_show_v1(struct dsthash_ent *ent, u_int8_t family, + struct seq_file *s) { const struct xt_hashlimit_htable *ht = s->private; @@ -825,7 +826,7 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family, return seq_has_overflowed(s); } -static int dl_seq_show(struct seq_file *s, void *v) +static int dl_seq_show_v1(struct seq_file *s, void *v) { struct xt_hashlimit_htable *htable = s->private; unsigned int *bucket = (unsigned int *)v; @@ -833,22 +834,22 @@ static int dl_seq_show(struct seq_file *s, void *v) if (!hlist_empty(&htable->hash[*bucket])) { hlist_for_each_entry(ent, &htable->hash[*bucket], node) - if (dl_seq_real_show(ent, htable->family, s)) + if (dl_seq_real_show_v1(ent, htable->family, s)) return -1; } return 0; } -static const struct seq_operations dl_seq_ops = { +static const struct seq_operations dl_seq_ops_v1 = { .start = dl_seq_start, .next = dl_seq_next, .stop = dl_seq_stop, - .show = dl_seq_show + .show = dl_seq_show_v1 }; -static int dl_proc_open(struct inode *inode, struct file *file) +static int dl_proc_open_v1(struct inode *inode, struct file *file) { - int ret = seq_open(file, &dl_seq_ops); + int ret = seq_open(file, &dl_seq_ops_v1); if (!ret) { struct seq_file *sf = file->private_data; @@ -857,9 +858,9 @@ static int dl_proc_open(struct inode *inode, struct file *file) return ret; } -static const struct file_operations dl_file_ops = { +static const struct file_operations dl_file_ops_v1 = { .owner = THIS_MODULE, - .open = dl_proc_open, + .open = dl_proc_open_v1, .read = seq_read, .llseek = seq_lseek, .release = seq_release From 11d5f15723c9f39d7c131d0149d024c17dbef676 Mon Sep 17 00:00:00 2001 From: Vishwanath Pai Date: Thu, 22 Sep 2016 12:43:44 -0400 Subject: [PATCH 1521/1715] netfilter: xt_hashlimit: Create revision 2 to support higher pps rates Create a new revision for the hashlimit iptables extension module. Rev 2 will support higher pps of upto 1 million, Version 1 supports only 10k. To support this we have to increase the size of the variables avg and burst in hashlimit_cfg to 64-bit. Create two new structs hashlimit_cfg2 and xt_hashlimit_mtinfo2 and also create newer versions of all the functions for match, checkentry and destroy. Some of the functions like hashlimit_mt, hashlimit_mt_check etc are very similar in both rev1 and rev2 with only minor changes, so I have split those functions and moved all the common code to a *_common function. Signed-off-by: Vishwanath Pai Signed-off-by: Joshua Hunt Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/xt_hashlimit.h | 23 ++ net/netfilter/xt_hashlimit.c | 332 ++++++++++++++++---- 2 files changed, 286 insertions(+), 69 deletions(-) diff --git a/include/uapi/linux/netfilter/xt_hashlimit.h b/include/uapi/linux/netfilter/xt_hashlimit.h index 6db90372f09c..3efc0ca18345 100644 --- a/include/uapi/linux/netfilter/xt_hashlimit.h +++ b/include/uapi/linux/netfilter/xt_hashlimit.h @@ -6,6 +6,7 @@ /* timings are in milliseconds. */ #define XT_HASHLIMIT_SCALE 10000 +#define XT_HASHLIMIT_SCALE_v2 1000000llu /* 1/10,000 sec period => max of 10,000/sec. Min rate is then 429490 * seconds, or one packet every 59 hours. */ @@ -63,6 +64,20 @@ struct hashlimit_cfg1 { __u8 srcmask, dstmask; }; +struct hashlimit_cfg2 { + __u64 avg; /* Average secs between packets * scale */ + __u64 burst; /* Period multiplier for upper limit. */ + __u32 mode; /* bitmask of XT_HASHLIMIT_HASH_* */ + + /* user specified */ + __u32 size; /* how many buckets */ + __u32 max; /* max number of entries */ + __u32 gc_interval; /* gc interval */ + __u32 expire; /* when do entries expire? */ + + __u8 srcmask, dstmask; +}; + struct xt_hashlimit_mtinfo1 { char name[IFNAMSIZ]; struct hashlimit_cfg1 cfg; @@ -71,4 +86,12 @@ struct xt_hashlimit_mtinfo1 { struct xt_hashlimit_htable *hinfo __attribute__((aligned(8))); }; +struct xt_hashlimit_mtinfo2 { + char name[NAME_MAX]; + struct hashlimit_cfg2 cfg; + + /* Used internally by the kernel */ + struct xt_hashlimit_htable *hinfo __attribute__((aligned(8))); +}; + #endif /* _UAPI_XT_HASHLIMIT_H */ diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index e93d9e0a3f35..44a095ecc7b7 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -57,6 +57,7 @@ static inline struct hashlimit_net *hashlimit_pernet(struct net *net) /* need to declare this at the top */ static const struct file_operations dl_file_ops_v1; +static const struct file_operations dl_file_ops; /* hash table crap */ struct dsthash_dst { @@ -86,8 +87,8 @@ struct dsthash_ent { unsigned long expires; /* precalculated expiry time */ struct { unsigned long prev; /* last modification */ - u_int32_t credit; - u_int32_t credit_cap, cost; + u_int64_t credit; + u_int64_t credit_cap, cost; } rateinfo; struct rcu_head rcu; }; @@ -98,7 +99,7 @@ struct xt_hashlimit_htable { u_int8_t family; bool rnd_initialized; - struct hashlimit_cfg1 cfg; /* config */ + struct hashlimit_cfg2 cfg; /* config */ /* used internally */ spinlock_t lock; /* lock for list_head */ @@ -114,6 +115,30 @@ struct xt_hashlimit_htable { struct hlist_head hash[0]; /* hashtable itself */ }; +static int +cfg_copy(struct hashlimit_cfg2 *to, void *from, int revision) +{ + if (revision == 1) { + struct hashlimit_cfg1 *cfg = (struct hashlimit_cfg1 *)from; + + to->mode = cfg->mode; + to->avg = cfg->avg; + to->burst = cfg->burst; + to->size = cfg->size; + to->max = cfg->max; + to->gc_interval = cfg->gc_interval; + to->expire = cfg->expire; + to->srcmask = cfg->srcmask; + to->dstmask = cfg->dstmask; + } else if (revision == 2) { + memcpy(to, from, sizeof(struct hashlimit_cfg2)); + } else { + return -EINVAL; + } + + return 0; +} + static DEFINE_MUTEX(hashlimit_mutex); /* protects htables list */ static struct kmem_cache *hashlimit_cachep __read_mostly; @@ -215,16 +240,18 @@ dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent) } static void htable_gc(struct work_struct *work); -static int htable_create_v1(struct net *net, struct xt_hashlimit_mtinfo1 *minfo, - u_int8_t family) +static int htable_create(struct net *net, struct hashlimit_cfg2 *cfg, + const char *name, u_int8_t family, + struct xt_hashlimit_htable **out_hinfo, + int revision) { struct hashlimit_net *hashlimit_net = hashlimit_pernet(net); struct xt_hashlimit_htable *hinfo; - unsigned int size; - unsigned int i; + unsigned int size, i; + int ret; - if (minfo->cfg.size) { - size = minfo->cfg.size; + if (cfg->size) { + size = cfg->size; } else { size = (totalram_pages << PAGE_SHIFT) / 16384 / sizeof(struct list_head); @@ -238,10 +265,14 @@ static int htable_create_v1(struct net *net, struct xt_hashlimit_mtinfo1 *minfo, sizeof(struct list_head) * size); if (hinfo == NULL) return -ENOMEM; - minfo->hinfo = hinfo; + *out_hinfo = hinfo; /* copy match config into hashtable config */ - memcpy(&hinfo->cfg, &minfo->cfg, sizeof(hinfo->cfg)); + ret = cfg_copy(&hinfo->cfg, (void *)cfg, 2); + + if (ret) + return ret; + hinfo->cfg.size = size; if (hinfo->cfg.max == 0) hinfo->cfg.max = 8 * hinfo->cfg.size; @@ -255,17 +286,18 @@ static int htable_create_v1(struct net *net, struct xt_hashlimit_mtinfo1 *minfo, hinfo->count = 0; hinfo->family = family; hinfo->rnd_initialized = false; - hinfo->name = kstrdup(minfo->name, GFP_KERNEL); + hinfo->name = kstrdup(name, GFP_KERNEL); if (!hinfo->name) { vfree(hinfo); return -ENOMEM; } spin_lock_init(&hinfo->lock); - hinfo->pde = proc_create_data(minfo->name, 0, + hinfo->pde = proc_create_data(name, 0, (family == NFPROTO_IPV4) ? hashlimit_net->ipt_hashlimit : hashlimit_net->ip6t_hashlimit, - &dl_file_ops_v1, hinfo); + (revision == 1) ? &dl_file_ops_v1 : &dl_file_ops, + hinfo); if (hinfo->pde == NULL) { kfree(hinfo->name); vfree(hinfo); @@ -399,6 +431,7 @@ static void htable_put(struct xt_hashlimit_htable *hinfo) CREDITS_PER_JIFFY*HZ*60*60*24 < 2^32 ie. */ #define MAX_CPJ_v1 (0xFFFFFFFF / (HZ*60*60*24)) +#define MAX_CPJ (0xFFFFFFFFFFFFFFFF / (HZ*60*60*24)) /* Repeated shift and or gives us all 1s, final shift and add 1 gives * us the power of 2 below the theoretical max, so GCC simply does a @@ -408,8 +441,11 @@ static void htable_put(struct xt_hashlimit_htable *hinfo) #define _POW2_BELOW8(x) (_POW2_BELOW4(x)|_POW2_BELOW4((x)>>4)) #define _POW2_BELOW16(x) (_POW2_BELOW8(x)|_POW2_BELOW8((x)>>8)) #define _POW2_BELOW32(x) (_POW2_BELOW16(x)|_POW2_BELOW16((x)>>16)) +#define _POW2_BELOW64(x) (_POW2_BELOW32(x)|_POW2_BELOW32((x)>>32)) #define POW2_BELOW32(x) ((_POW2_BELOW32(x)>>1) + 1) +#define POW2_BELOW64(x) ((_POW2_BELOW64(x)>>1) + 1) +#define CREDITS_PER_JIFFY POW2_BELOW64(MAX_CPJ) #define CREDITS_PER_JIFFY_v1 POW2_BELOW32(MAX_CPJ_v1) /* in byte mode, the lowest possible rate is one packet/second. @@ -425,15 +461,24 @@ static u32 xt_hashlimit_len_to_chunks(u32 len) } /* Precision saver. */ -static u32 user2credits(u32 user) +static u64 user2credits(u64 user, int revision) { - /* If multiplying would overflow... */ - if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY_v1)) - /* Divide first. */ - return (user / XT_HASHLIMIT_SCALE) *\ - HZ * CREDITS_PER_JIFFY_v1; + if (revision == 1) { + /* If multiplying would overflow... */ + if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY_v1)) + /* Divide first. */ + return (user / XT_HASHLIMIT_SCALE) *\ + HZ * CREDITS_PER_JIFFY_v1; - return (user * HZ * CREDITS_PER_JIFFY_v1) / XT_HASHLIMIT_SCALE; + return (user * HZ * CREDITS_PER_JIFFY_v1) \ + / XT_HASHLIMIT_SCALE; + } else { + if (user > 0xFFFFFFFFFFFFFFFF / (HZ*CREDITS_PER_JIFFY)) + return (user / XT_HASHLIMIT_SCALE_v2) *\ + HZ * CREDITS_PER_JIFFY; + + return (user * HZ * CREDITS_PER_JIFFY) / XT_HASHLIMIT_SCALE_v2; + } } static u32 user2credits_byte(u32 user) @@ -443,10 +488,11 @@ static u32 user2credits_byte(u32 user) return (u32) (us >> 32); } -static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now, u32 mode) +static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now, + u32 mode, int revision) { unsigned long delta = now - dh->rateinfo.prev; - u32 cap; + u64 cap, cpj; if (delta == 0) return; @@ -454,7 +500,7 @@ static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now, u32 mode) dh->rateinfo.prev = now; if (mode & XT_HASHLIMIT_BYTES) { - u32 tmp = dh->rateinfo.credit; + u64 tmp = dh->rateinfo.credit; dh->rateinfo.credit += CREDITS_PER_JIFFY_BYTES * delta; cap = CREDITS_PER_JIFFY_BYTES * HZ; if (tmp >= dh->rateinfo.credit) {/* overflow */ @@ -462,7 +508,9 @@ static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now, u32 mode) return; } } else { - dh->rateinfo.credit += delta * CREDITS_PER_JIFFY_v1; + cpj = (revision == 1) ? + CREDITS_PER_JIFFY_v1 : CREDITS_PER_JIFFY; + dh->rateinfo.credit += delta * cpj; cap = dh->rateinfo.credit_cap; } if (dh->rateinfo.credit > cap) @@ -470,7 +518,7 @@ static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now, u32 mode) } static void rateinfo_init(struct dsthash_ent *dh, - struct xt_hashlimit_htable *hinfo) + struct xt_hashlimit_htable *hinfo, int revision) { dh->rateinfo.prev = jiffies; if (hinfo->cfg.mode & XT_HASHLIMIT_BYTES) { @@ -479,8 +527,8 @@ static void rateinfo_init(struct dsthash_ent *dh, dh->rateinfo.credit_cap = hinfo->cfg.burst; } else { dh->rateinfo.credit = user2credits(hinfo->cfg.avg * - hinfo->cfg.burst); - dh->rateinfo.cost = user2credits(hinfo->cfg.avg); + hinfo->cfg.burst, revision); + dh->rateinfo.cost = user2credits(hinfo->cfg.avg, revision); dh->rateinfo.credit_cap = dh->rateinfo.credit; } } @@ -604,15 +652,15 @@ static u32 hashlimit_byte_cost(unsigned int len, struct dsthash_ent *dh) } static bool -hashlimit_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) +hashlimit_mt_common(const struct sk_buff *skb, struct xt_action_param *par, + struct xt_hashlimit_htable *hinfo, + const struct hashlimit_cfg2 *cfg, int revision) { - const struct xt_hashlimit_mtinfo1 *info = par->matchinfo; - struct xt_hashlimit_htable *hinfo = info->hinfo; unsigned long now = jiffies; struct dsthash_ent *dh; struct dsthash_dst dst; bool race = false; - u32 cost; + u64 cost; if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0) goto hotdrop; @@ -627,18 +675,18 @@ hashlimit_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) } else if (race) { /* Already got an entry, update expiration timeout */ dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire); - rateinfo_recalc(dh, now, hinfo->cfg.mode); + rateinfo_recalc(dh, now, hinfo->cfg.mode, revision); } else { dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire); - rateinfo_init(dh, hinfo); + rateinfo_init(dh, hinfo, revision); } } else { /* update expiration timeout */ dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire); - rateinfo_recalc(dh, now, hinfo->cfg.mode); + rateinfo_recalc(dh, now, hinfo->cfg.mode, revision); } - if (info->cfg.mode & XT_HASHLIMIT_BYTES) + if (cfg->mode & XT_HASHLIMIT_BYTES) cost = hashlimit_byte_cost(skb->len, dh); else cost = dh->rateinfo.cost; @@ -648,70 +696,126 @@ hashlimit_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) dh->rateinfo.credit -= cost; spin_unlock(&dh->lock); rcu_read_unlock_bh(); - return !(info->cfg.mode & XT_HASHLIMIT_INVERT); + return !(cfg->mode & XT_HASHLIMIT_INVERT); } spin_unlock(&dh->lock); rcu_read_unlock_bh(); /* default match is underlimit - so over the limit, we need to invert */ - return info->cfg.mode & XT_HASHLIMIT_INVERT; + return cfg->mode & XT_HASHLIMIT_INVERT; hotdrop: par->hotdrop = true; return false; } -static int hashlimit_mt_check_v1(const struct xt_mtchk_param *par) +static bool +hashlimit_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) { - struct net *net = par->net; - struct xt_hashlimit_mtinfo1 *info = par->matchinfo; + const struct xt_hashlimit_mtinfo1 *info = par->matchinfo; + struct xt_hashlimit_htable *hinfo = info->hinfo; + struct hashlimit_cfg2 cfg = {}; int ret; - if (info->cfg.gc_interval == 0 || info->cfg.expire == 0) - return -EINVAL; - if (info->name[sizeof(info->name)-1] != '\0') + ret = cfg_copy(&cfg, (void *)&info->cfg, 1); + + if (ret) + return ret; + + return hashlimit_mt_common(skb, par, hinfo, &cfg, 1); +} + +static bool +hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_hashlimit_mtinfo2 *info = par->matchinfo; + struct xt_hashlimit_htable *hinfo = info->hinfo; + + return hashlimit_mt_common(skb, par, hinfo, &info->cfg, 2); +} + +static int hashlimit_mt_check_common(const struct xt_mtchk_param *par, + struct xt_hashlimit_htable **hinfo, + struct hashlimit_cfg2 *cfg, + const char *name, int revision) +{ + struct net *net = par->net; + int ret; + + if (cfg->gc_interval == 0 || cfg->expire == 0) return -EINVAL; if (par->family == NFPROTO_IPV4) { - if (info->cfg.srcmask > 32 || info->cfg.dstmask > 32) + if (cfg->srcmask > 32 || cfg->dstmask > 32) return -EINVAL; } else { - if (info->cfg.srcmask > 128 || info->cfg.dstmask > 128) + if (cfg->srcmask > 128 || cfg->dstmask > 128) return -EINVAL; } - if (info->cfg.mode & ~XT_HASHLIMIT_ALL) { + if (cfg->mode & ~XT_HASHLIMIT_ALL) { pr_info("Unknown mode mask %X, kernel too old?\n", - info->cfg.mode); + cfg->mode); return -EINVAL; } /* Check for overflow. */ - if (info->cfg.mode & XT_HASHLIMIT_BYTES) { - if (user2credits_byte(info->cfg.avg) == 0) { - pr_info("overflow, rate too high: %u\n", info->cfg.avg); + if (cfg->mode & XT_HASHLIMIT_BYTES) { + if (user2credits_byte(cfg->avg) == 0) { + pr_info("overflow, rate too high: %llu\n", cfg->avg); return -EINVAL; } - } else if (info->cfg.burst == 0 || - user2credits(info->cfg.avg * info->cfg.burst) < - user2credits(info->cfg.avg)) { - pr_info("overflow, try lower: %u/%u\n", - info->cfg.avg, info->cfg.burst); + } else if (cfg->burst == 0 || + user2credits(cfg->avg * cfg->burst, revision) < + user2credits(cfg->avg, revision)) { + pr_info("overflow, try lower: %llu/%llu\n", + cfg->avg, cfg->burst); return -ERANGE; } mutex_lock(&hashlimit_mutex); - info->hinfo = htable_find_get(net, info->name, par->family); - if (info->hinfo == NULL) { - ret = htable_create_v1(net, info, par->family); + *hinfo = htable_find_get(net, name, par->family); + if (*hinfo == NULL) { + ret = htable_create(net, cfg, name, par->family, + hinfo, revision); if (ret < 0) { mutex_unlock(&hashlimit_mutex); return ret; } } mutex_unlock(&hashlimit_mutex); + return 0; } +static int hashlimit_mt_check_v1(const struct xt_mtchk_param *par) +{ + struct xt_hashlimit_mtinfo1 *info = par->matchinfo; + struct hashlimit_cfg2 cfg = {}; + int ret; + + if (info->name[sizeof(info->name) - 1] != '\0') + return -EINVAL; + + ret = cfg_copy(&cfg, (void *)&info->cfg, 1); + + if (ret) + return ret; + + return hashlimit_mt_check_common(par, &info->hinfo, + &cfg, info->name, 1); +} + +static int hashlimit_mt_check(const struct xt_mtchk_param *par) +{ + struct xt_hashlimit_mtinfo2 *info = par->matchinfo; + + if (info->name[sizeof(info->name) - 1] != '\0') + return -EINVAL; + + return hashlimit_mt_check_common(par, &info->hinfo, &info->cfg, + info->name, 2); +} + static void hashlimit_mt_destroy_v1(const struct xt_mtdtor_param *par) { const struct xt_hashlimit_mtinfo1 *info = par->matchinfo; @@ -719,6 +823,13 @@ static void hashlimit_mt_destroy_v1(const struct xt_mtdtor_param *par) htable_put(info->hinfo); } +static void hashlimit_mt_destroy(const struct xt_mtdtor_param *par) +{ + const struct xt_hashlimit_mtinfo2 *info = par->matchinfo; + + htable_put(info->hinfo); +} + static struct xt_match hashlimit_mt_reg[] __read_mostly = { { .name = "hashlimit", @@ -730,6 +841,16 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = { .destroy = hashlimit_mt_destroy_v1, .me = THIS_MODULE, }, + { + .name = "hashlimit", + .revision = 2, + .family = NFPROTO_IPV4, + .match = hashlimit_mt, + .matchsize = sizeof(struct xt_hashlimit_mtinfo2), + .checkentry = hashlimit_mt_check, + .destroy = hashlimit_mt_destroy, + .me = THIS_MODULE, + }, #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) { .name = "hashlimit", @@ -741,6 +862,16 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = { .destroy = hashlimit_mt_destroy_v1, .me = THIS_MODULE, }, + { + .name = "hashlimit", + .revision = 2, + .family = NFPROTO_IPV6, + .match = hashlimit_mt, + .matchsize = sizeof(struct xt_hashlimit_mtinfo2), + .checkentry = hashlimit_mt_check, + .destroy = hashlimit_mt_destroy, + .me = THIS_MODULE, + }, #endif }; @@ -787,18 +918,12 @@ static void dl_seq_stop(struct seq_file *s, void *v) spin_unlock_bh(&htable->lock); } -static int dl_seq_real_show_v1(struct dsthash_ent *ent, u_int8_t family, - struct seq_file *s) +static void dl_seq_print(struct dsthash_ent *ent, u_int8_t family, + struct seq_file *s) { - const struct xt_hashlimit_htable *ht = s->private; - - spin_lock(&ent->lock); - /* recalculate to show accurate numbers */ - rateinfo_recalc(ent, jiffies, ht->cfg.mode); - switch (family) { case NFPROTO_IPV4: - seq_printf(s, "%ld %pI4:%u->%pI4:%u %u %u %u\n", + seq_printf(s, "%ld %pI4:%u->%pI4:%u %llu %llu %llu\n", (long)(ent->expires - jiffies)/HZ, &ent->dst.ip.src, ntohs(ent->dst.src_port), @@ -809,7 +934,7 @@ static int dl_seq_real_show_v1(struct dsthash_ent *ent, u_int8_t family, break; #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) case NFPROTO_IPV6: - seq_printf(s, "%ld %pI6:%u->%pI6:%u %u %u %u\n", + seq_printf(s, "%ld %pI6:%u->%pI6:%u %llu %llu %llu\n", (long)(ent->expires - jiffies)/HZ, &ent->dst.ip6.src, ntohs(ent->dst.src_port), @@ -822,6 +947,34 @@ static int dl_seq_real_show_v1(struct dsthash_ent *ent, u_int8_t family, default: BUG(); } +} + +static int dl_seq_real_show_v1(struct dsthash_ent *ent, u_int8_t family, + struct seq_file *s) +{ + const struct xt_hashlimit_htable *ht = s->private; + + spin_lock(&ent->lock); + /* recalculate to show accurate numbers */ + rateinfo_recalc(ent, jiffies, ht->cfg.mode, 1); + + dl_seq_print(ent, family, s); + + spin_unlock(&ent->lock); + return seq_has_overflowed(s); +} + +static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family, + struct seq_file *s) +{ + const struct xt_hashlimit_htable *ht = s->private; + + spin_lock(&ent->lock); + /* recalculate to show accurate numbers */ + rateinfo_recalc(ent, jiffies, ht->cfg.mode, 2); + + dl_seq_print(ent, family, s); + spin_unlock(&ent->lock); return seq_has_overflowed(s); } @@ -840,6 +993,20 @@ static int dl_seq_show_v1(struct seq_file *s, void *v) return 0; } +static int dl_seq_show(struct seq_file *s, void *v) +{ + struct xt_hashlimit_htable *htable = s->private; + unsigned int *bucket = (unsigned int *)v; + struct dsthash_ent *ent; + + if (!hlist_empty(&htable->hash[*bucket])) { + hlist_for_each_entry(ent, &htable->hash[*bucket], node) + if (dl_seq_real_show(ent, htable->family, s)) + return -1; + } + return 0; +} + static const struct seq_operations dl_seq_ops_v1 = { .start = dl_seq_start, .next = dl_seq_next, @@ -847,6 +1014,13 @@ static const struct seq_operations dl_seq_ops_v1 = { .show = dl_seq_show_v1 }; +static const struct seq_operations dl_seq_ops = { + .start = dl_seq_start, + .next = dl_seq_next, + .stop = dl_seq_stop, + .show = dl_seq_show +}; + static int dl_proc_open_v1(struct inode *inode, struct file *file) { int ret = seq_open(file, &dl_seq_ops_v1); @@ -858,6 +1032,18 @@ static int dl_proc_open_v1(struct inode *inode, struct file *file) return ret; } +static int dl_proc_open(struct inode *inode, struct file *file) +{ + int ret = seq_open(file, &dl_seq_ops); + + if (!ret) { + struct seq_file *sf = file->private_data; + + sf->private = PDE_DATA(inode); + } + return ret; +} + static const struct file_operations dl_file_ops_v1 = { .owner = THIS_MODULE, .open = dl_proc_open_v1, @@ -866,6 +1052,14 @@ static const struct file_operations dl_file_ops_v1 = { .release = seq_release }; +static const struct file_operations dl_file_ops = { + .owner = THIS_MODULE, + .open = dl_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release +}; + static int __net_init hashlimit_proc_net_init(struct net *net) { struct hashlimit_net *hashlimit_net = hashlimit_pernet(net); From 58e207e4983d7acea39b7fbec9343d8a6d218a18 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 22 Sep 2016 23:49:17 +0200 Subject: [PATCH 1522/1715] netfilter: evict stale entries when user reads /proc/net/nf_conntrack Fabian reports a possible conntrack memory leak (could not reproduce so far), however, one minor issue can be easily resolved: > cat /proc/net/nf_conntrack | wc -l = 5 > 4 minutes required to clean up the table. We should not report those timed-out entries to the user in first place. And instead of just skipping those timed-out entries while iterating over the table we can also zap them (we already do this during ctnetlink walks, but I forgot about the /proc interface). Fixes: f330a7fdbe16 ("netfilter: conntrack: get rid of conntrack timer") Reported-by: Fabian Frederick Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_standalone.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 7d52f8401afd..5f446cd9f3fd 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -212,6 +212,11 @@ static int ct_seq_show(struct seq_file *s, void *v) if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use))) return 0; + if (nf_ct_should_gc(ct)) { + nf_ct_kill(ct); + goto release; + } + /* we only want to print DIR_ORIGINAL */ if (NF_CT_DIRECTION(hash)) goto release; From 7a682575ad4829b4de3e672a6ad5f73a05826b82 Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Fri, 23 Sep 2016 11:27:42 +0200 Subject: [PATCH 1523/1715] netfilter: xt_socket: fix transparent match for IPv6 request sockets The introduction of TCP_NEW_SYN_RECV state, and the addition of request sockets to the ehash table seems to have broken the --transparent option of the socket match for IPv6 (around commit a9407000). Now that the socket lookup finds the TCP_NEW_SYN_RECV socket instead of the listener, the --transparent option tries to match on the no_srccheck flag of the request socket. Unfortunately, that flag was only set for IPv4 sockets in tcp_v4_init_req() by copying the transparent flag of the listener socket. This effectively causes '-m socket --transparent' not match on the ACK packet sent by the client in a TCP handshake. Based on the suggestion from Eric Dumazet, this change moves the code initializing no_srccheck to tcp_conn_request(), rendering the above scenario working again. Fixes: a940700003 ("netfilter: xt_socket: prepare for TCP_NEW_SYN_RECV support") Signed-off-by: Alex Badics Signed-off-by: KOVACS Krisztian Signed-off-by: Pablo Neira Ayuso --- net/ipv4/tcp_input.c | 1 + net/ipv4/tcp_ipv4.c | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8cd02c0b056c..f3a9f3c2c8d8 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6269,6 +6269,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; tcp_openreq_init(req, &tmp_opt, skb, sk); + inet_rsk(req)->no_srccheck = inet_sk(sk)->transparent; /* Note: tcp_v6_init_req() might override ir_iif for link locals */ inet_rsk(req)->ir_iif = inet_request_bound_dev_if(sk, skb); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a75bf48d7950..13b05adf9d3e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1196,7 +1196,6 @@ static void tcp_v4_init_req(struct request_sock *req, sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr); sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr); - ireq->no_srccheck = inet_sk(sk_listener)->transparent; ireq->opt = tcp_v4_save_options(skb); } From 0f3cd9b3697708c86a825ae3cedabf7be6fd3e72 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 23 Sep 2016 15:23:33 +0200 Subject: [PATCH 1524/1715] netfilter: nf_tables: add range expression Inverse ranges != [a,b] are not currently possible because rules are composites of && operations, and we need to express this: data < a || data > b This patch adds a new range expression. Positive ranges can be already through two cmp expressions: cmp(sreg, data, >=) cmp(sreg, data, <=) This new range expression provides an alternative way to express this. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_core.h | 3 + include/uapi/linux/netfilter/nf_tables.h | 29 +++++ net/netfilter/Makefile | 3 +- net/netfilter/nf_tables_core.c | 7 +- net/netfilter/nft_range.c | 138 +++++++++++++++++++++++ 5 files changed, 178 insertions(+), 2 deletions(-) create mode 100644 net/netfilter/nft_range.c diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index a9060dd99db7..00f4f6b1b1ba 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -28,6 +28,9 @@ extern const struct nft_expr_ops nft_cmp_fast_ops; int nft_cmp_module_init(void); void nft_cmp_module_exit(void); +int nft_range_module_init(void); +void nft_range_module_exit(void); + int nft_lookup_module_init(void); void nft_lookup_module_exit(void); diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 1cf41dd838b2..c6c4477c136b 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -546,6 +546,35 @@ enum nft_cmp_attributes { }; #define NFTA_CMP_MAX (__NFTA_CMP_MAX - 1) +/** + * enum nft_range_ops - nf_tables range operator + * + * @NFT_RANGE_EQ: equal + * @NFT_RANGE_NEQ: not equal + */ +enum nft_range_ops { + NFT_RANGE_EQ, + NFT_RANGE_NEQ, +}; + +/** + * enum nft_range_attributes - nf_tables range expression netlink attributes + * + * @NFTA_RANGE_SREG: source register of data to compare (NLA_U32: nft_registers) + * @NFTA_RANGE_OP: cmp operation (NLA_U32: nft_cmp_ops) + * @NFTA_RANGE_FROM_DATA: data range from (NLA_NESTED: nft_data_attributes) + * @NFTA_RANGE_TO_DATA: data range to (NLA_NESTED: nft_data_attributes) + */ +enum nft_range_attributes { + NFTA_RANGE_UNSPEC, + NFTA_RANGE_SREG, + NFTA_RANGE_OP, + NFTA_RANGE_FROM_DATA, + NFTA_RANGE_TO_DATA, + __NFTA_RANGE_MAX +}; +#define NFTA_RANGE_MAX (__NFTA_RANGE_MAX - 1) + enum nft_lookup_flags { NFT_LOOKUP_F_INV = (1 << 0), }; diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 0c8581100ac6..c23c3c84416f 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -71,8 +71,9 @@ obj-$(CONFIG_NF_DUP_NETDEV) += nf_dup_netdev.o # nf_tables nf_tables-objs += nf_tables_core.o nf_tables_api.o nf_tables_trace.o -nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o nft_dynset.o +nf_tables-objs += nft_immediate.o nft_cmp.o nft_range.o nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o +nf_tables-objs += nft_lookup.o nft_dynset.o obj-$(CONFIG_NF_TABLES) += nf_tables.o obj-$(CONFIG_NF_TABLES_INET) += nf_tables_inet.o diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 67259cefef06..7c94ce0080d5 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -263,8 +263,13 @@ int __init nf_tables_core_module_init(void) if (err < 0) goto err7; - return 0; + err = nft_range_module_init(); + if (err < 0) + goto err8; + return 0; +err8: + nft_dynset_module_exit(); err7: nft_payload_module_exit(); err6: diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c new file mode 100644 index 000000000000..c6d5358482d1 --- /dev/null +++ b/net/netfilter/nft_range.c @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2016 Pablo Neira Ayuso + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_range_expr { + struct nft_data data_from; + struct nft_data data_to; + enum nft_registers sreg:8; + u8 len; + enum nft_range_ops op:8; +}; + +static void nft_range_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_range_expr *priv = nft_expr_priv(expr); + bool mismatch; + int d1, d2; + + d1 = memcmp(®s->data[priv->sreg], &priv->data_from, priv->len); + d2 = memcmp(®s->data[priv->sreg], &priv->data_to, priv->len); + switch (priv->op) { + case NFT_RANGE_EQ: + mismatch = (d1 < 0 || d2 > 0); + break; + case NFT_RANGE_NEQ: + mismatch = (d1 >= 0 && d2 <= 0); + break; + } + + if (mismatch) + regs->verdict.code = NFT_BREAK; +} + +static const struct nla_policy nft_range_policy[NFTA_RANGE_MAX + 1] = { + [NFTA_RANGE_SREG] = { .type = NLA_U32 }, + [NFTA_RANGE_OP] = { .type = NLA_U32 }, + [NFTA_RANGE_FROM_DATA] = { .type = NLA_NESTED }, + [NFTA_RANGE_TO_DATA] = { .type = NLA_NESTED }, +}; + +static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_range_expr *priv = nft_expr_priv(expr); + struct nft_data_desc desc_from, desc_to; + int err; + + err = nft_data_init(NULL, &priv->data_from, sizeof(priv->data_from), + &desc_from, tb[NFTA_RANGE_FROM_DATA]); + if (err < 0) + return err; + + err = nft_data_init(NULL, &priv->data_to, sizeof(priv->data_to), + &desc_to, tb[NFTA_RANGE_TO_DATA]); + if (err < 0) + goto err1; + + if (desc_from.len != desc_to.len) { + err = -EINVAL; + goto err2; + } + + priv->sreg = nft_parse_register(tb[NFTA_RANGE_SREG]); + err = nft_validate_register_load(priv->sreg, desc_from.len); + if (err < 0) + goto err2; + + priv->op = ntohl(nla_get_be32(tb[NFTA_RANGE_OP])); + priv->len = desc_from.len; + return 0; +err2: + nft_data_uninit(&priv->data_to, desc_to.type); +err1: + nft_data_uninit(&priv->data_from, desc_from.type); + return err; +} + +static int nft_range_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_range_expr *priv = nft_expr_priv(expr); + + if (nft_dump_register(skb, NFTA_RANGE_SREG, priv->sreg)) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_RANGE_OP, htonl(priv->op))) + goto nla_put_failure; + + if (nft_data_dump(skb, NFTA_RANGE_FROM_DATA, &priv->data_from, + NFT_DATA_VALUE, priv->len) < 0 || + nft_data_dump(skb, NFTA_RANGE_TO_DATA, &priv->data_to, + NFT_DATA_VALUE, priv->len) < 0) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_range_type; +static const struct nft_expr_ops nft_range_ops = { + .type = &nft_range_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_range_expr)), + .eval = nft_range_eval, + .init = nft_range_init, + .dump = nft_range_dump, +}; + +static struct nft_expr_type nft_range_type __read_mostly = { + .name = "range", + .ops = &nft_range_ops, + .policy = nft_range_policy, + .maxattr = NFTA_RANGE_MAX, + .owner = THIS_MODULE, +}; + +int __init nft_range_module_init(void) +{ + return nft_register_expr(&nft_range_type); +} + +void nft_range_module_exit(void) +{ + nft_unregister_expr(&nft_range_type); +} From ff107d27761ff4b644c82c209e004ec9c8fbbc22 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sun, 25 Sep 2016 16:35:56 +0800 Subject: [PATCH 1525/1715] netfilter: nft_log: complete NFTA_LOG_FLAGS attr support NFTA_LOG_FLAGS attribute is already supported, but the related NF_LOG_XXX flags are not exposed to the userspace. So we cannot explicitly enable log flags to log uid, tcp sequence, ip options and so on, i.e. such rule "nft add rule filter output log uid" is not supported yet. So move NF_LOG_XXX macro definitions to the uapi/../nf_log.h. In order to keep consistent with other modules, change NF_LOG_MASK to refer to all supported log flags. On the other hand, add a new NF_LOG_DEFAULT_MASK to refer to the original default log flags. Finally, if user specify the unsupported log flags or NFTA_LOG_GROUP and NFTA_LOG_FLAGS are set at the same time, report EINVAL to the userspace. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_log.h | 11 +++-------- include/uapi/linux/netfilter/nf_log.h | 12 ++++++++++++ net/bridge/netfilter/ebt_log.c | 2 +- net/ipv4/netfilter/ip_tables.c | 2 +- net/ipv4/netfilter/nf_log_arp.c | 2 +- net/ipv4/netfilter/nf_log_ipv4.c | 4 ++-- net/ipv6/netfilter/ip6_tables.c | 2 +- net/ipv6/netfilter/nf_log_ipv6.c | 4 ++-- net/netfilter/nf_tables_core.c | 2 +- net/netfilter/nft_log.c | 9 ++++++++- 10 files changed, 32 insertions(+), 18 deletions(-) create mode 100644 include/uapi/linux/netfilter/nf_log.h diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h index ee07dc8b0a7b..309cd267be4f 100644 --- a/include/net/netfilter/nf_log.h +++ b/include/net/netfilter/nf_log.h @@ -2,15 +2,10 @@ #define _NF_LOG_H #include +#include -/* those NF_LOG_* defines and struct nf_loginfo are legacy definitios that will - * disappear once iptables is replaced with pkttables. Please DO NOT use them - * for any new code! */ -#define NF_LOG_TCPSEQ 0x01 /* Log TCP sequence numbers */ -#define NF_LOG_TCPOPT 0x02 /* Log TCP options */ -#define NF_LOG_IPOPT 0x04 /* Log IP options */ -#define NF_LOG_UID 0x08 /* Log UID owning local socket */ -#define NF_LOG_MASK 0x0f +/* Log tcp sequence, tcp options, ip options and uid owning local socket */ +#define NF_LOG_DEFAULT_MASK 0x0f /* This flag indicates that copy_len field in nf_loginfo is set */ #define NF_LOG_F_COPY_LEN 0x1 diff --git a/include/uapi/linux/netfilter/nf_log.h b/include/uapi/linux/netfilter/nf_log.h new file mode 100644 index 000000000000..8be21e02387d --- /dev/null +++ b/include/uapi/linux/netfilter/nf_log.h @@ -0,0 +1,12 @@ +#ifndef _NETFILTER_NF_LOG_H +#define _NETFILTER_NF_LOG_H + +#define NF_LOG_TCPSEQ 0x01 /* Log TCP sequence numbers */ +#define NF_LOG_TCPOPT 0x02 /* Log TCP options */ +#define NF_LOG_IPOPT 0x04 /* Log IP options */ +#define NF_LOG_UID 0x08 /* Log UID owning local socket */ +#define NF_LOG_NFLOG 0x10 /* Unsupported, don't reuse */ +#define NF_LOG_MACDECODE 0x20 /* Decode MAC header */ +#define NF_LOG_MASK 0x2f + +#endif /* _NETFILTER_NF_LOG_H */ diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index 152300d164ac..9a11086ba6ff 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -91,7 +91,7 @@ ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum, if (loginfo->type == NF_LOG_TYPE_LOG) bitmask = loginfo->u.log.logflags; else - bitmask = NF_LOG_MASK; + bitmask = NF_LOG_DEFAULT_MASK; if ((bitmask & EBT_LOG_IP) && eth_hdr(skb)->h_proto == htons(ETH_P_IP)) { diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index f993545a3373..7c00ce90adb8 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -156,7 +156,7 @@ static struct nf_loginfo trace_loginfo = { .u = { .log = { .level = 4, - .logflags = NF_LOG_MASK, + .logflags = NF_LOG_DEFAULT_MASK, }, }, }; diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c index 8945c2653814..b24795e2ee6d 100644 --- a/net/ipv4/netfilter/nf_log_arp.c +++ b/net/ipv4/netfilter/nf_log_arp.c @@ -30,7 +30,7 @@ static struct nf_loginfo default_loginfo = { .u = { .log = { .level = LOGLEVEL_NOTICE, - .logflags = NF_LOG_MASK, + .logflags = NF_LOG_DEFAULT_MASK, }, }, }; diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c index 20f225593a8b..5b571e1b5f15 100644 --- a/net/ipv4/netfilter/nf_log_ipv4.c +++ b/net/ipv4/netfilter/nf_log_ipv4.c @@ -29,7 +29,7 @@ static struct nf_loginfo default_loginfo = { .u = { .log = { .level = LOGLEVEL_NOTICE, - .logflags = NF_LOG_MASK, + .logflags = NF_LOG_DEFAULT_MASK, }, }, }; @@ -46,7 +46,7 @@ static void dump_ipv4_packet(struct nf_log_buf *m, if (info->type == NF_LOG_TYPE_LOG) logflags = info->u.log.logflags; else - logflags = NF_LOG_MASK; + logflags = NF_LOG_DEFAULT_MASK; ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph); if (ih == NULL) { diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 552fac2f390a..55aacea24396 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -190,7 +190,7 @@ static struct nf_loginfo trace_loginfo = { .u = { .log = { .level = LOGLEVEL_WARNING, - .logflags = NF_LOG_MASK, + .logflags = NF_LOG_DEFAULT_MASK, }, }, }; diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c index c1bcf699a23d..f6aee2895fee 100644 --- a/net/ipv6/netfilter/nf_log_ipv6.c +++ b/net/ipv6/netfilter/nf_log_ipv6.c @@ -30,7 +30,7 @@ static struct nf_loginfo default_loginfo = { .u = { .log = { .level = LOGLEVEL_NOTICE, - .logflags = NF_LOG_MASK, + .logflags = NF_LOG_DEFAULT_MASK, }, }, }; @@ -52,7 +52,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m, if (info->type == NF_LOG_TYPE_LOG) logflags = info->u.log.logflags; else - logflags = NF_LOG_MASK; + logflags = NF_LOG_DEFAULT_MASK; ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h); if (ih == NULL) { diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 7c94ce0080d5..0dd5c695482f 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -34,7 +34,7 @@ static struct nf_loginfo trace_loginfo = { .u = { .log = { .level = LOGLEVEL_WARNING, - .logflags = NF_LOG_MASK, + .logflags = NF_LOG_DEFAULT_MASK, }, }, }; diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index 24a73bb26e94..1b01404bb33f 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -58,8 +58,11 @@ static int nft_log_init(const struct nft_ctx *ctx, if (tb[NFTA_LOG_LEVEL] != NULL && tb[NFTA_LOG_GROUP] != NULL) return -EINVAL; - if (tb[NFTA_LOG_GROUP] != NULL) + if (tb[NFTA_LOG_GROUP] != NULL) { li->type = NF_LOG_TYPE_ULOG; + if (tb[NFTA_LOG_FLAGS] != NULL) + return -EINVAL; + } nla = tb[NFTA_LOG_PREFIX]; if (nla != NULL) { @@ -87,6 +90,10 @@ static int nft_log_init(const struct nft_ctx *ctx, if (tb[NFTA_LOG_FLAGS] != NULL) { li->u.log.logflags = ntohl(nla_get_be32(tb[NFTA_LOG_FLAGS])); + if (li->u.log.logflags & ~NF_LOG_MASK) { + err = -EINVAL; + goto err1; + } } break; case NF_LOG_TYPE_ULOG: From 8cb2a7d5667ab9a9c2fdd356357b85b63b320901 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sun, 25 Sep 2016 16:47:05 +0800 Subject: [PATCH 1526/1715] netfilter: nf_log: get rid of XT_LOG_* macros nf_log is used by both nftables and iptables, so use XT_LOG_XXX macros here is not appropriate. Replace them with NF_LOG_XXX. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_log_ipv4.c | 6 +++--- net/ipv6/netfilter/nf_log_ipv6.c | 14 +++++++------- net/netfilter/nf_log_common.c | 4 ++-- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c index 5b571e1b5f15..856648966f4c 100644 --- a/net/ipv4/netfilter/nf_log_ipv4.c +++ b/net/ipv4/netfilter/nf_log_ipv4.c @@ -76,7 +76,7 @@ static void dump_ipv4_packet(struct nf_log_buf *m, if (ntohs(ih->frag_off) & IP_OFFSET) nf_log_buf_add(m, "FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET); - if ((logflags & XT_LOG_IPOPT) && + if ((logflags & NF_LOG_IPOPT) && ih->ihl * 4 > sizeof(struct iphdr)) { const unsigned char *op; unsigned char _opt[4 * 15 - sizeof(struct iphdr)]; @@ -250,7 +250,7 @@ static void dump_ipv4_packet(struct nf_log_buf *m, } /* Max length: 15 "UID=4294967295 " */ - if ((logflags & XT_LOG_UID) && !iphoff) + if ((logflags & NF_LOG_UID) && !iphoff) nf_log_dump_sk_uid_gid(m, skb->sk); /* Max length: 16 "MARK=0xFFFFFFFF " */ @@ -282,7 +282,7 @@ static void dump_ipv4_mac_header(struct nf_log_buf *m, if (info->type == NF_LOG_TYPE_LOG) logflags = info->u.log.logflags; - if (!(logflags & XT_LOG_MACDECODE)) + if (!(logflags & NF_LOG_MACDECODE)) goto fallback; switch (dev->type) { diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c index f6aee2895fee..57d86066a13b 100644 --- a/net/ipv6/netfilter/nf_log_ipv6.c +++ b/net/ipv6/netfilter/nf_log_ipv6.c @@ -84,7 +84,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m, } /* Max length: 48 "OPT (...) " */ - if (logflags & XT_LOG_IPOPT) + if (logflags & NF_LOG_IPOPT) nf_log_buf_add(m, "OPT ( "); switch (currenthdr) { @@ -121,7 +121,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m, case IPPROTO_ROUTING: case IPPROTO_HOPOPTS: if (fragment) { - if (logflags & XT_LOG_IPOPT) + if (logflags & NF_LOG_IPOPT) nf_log_buf_add(m, ")"); return; } @@ -129,7 +129,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m, break; /* Max Length */ case IPPROTO_AH: - if (logflags & XT_LOG_IPOPT) { + if (logflags & NF_LOG_IPOPT) { struct ip_auth_hdr _ahdr; const struct ip_auth_hdr *ah; @@ -161,7 +161,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m, hdrlen = (hp->hdrlen+2)<<2; break; case IPPROTO_ESP: - if (logflags & XT_LOG_IPOPT) { + if (logflags & NF_LOG_IPOPT) { struct ip_esp_hdr _esph; const struct ip_esp_hdr *eh; @@ -194,7 +194,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m, nf_log_buf_add(m, "Unknown Ext Hdr %u", currenthdr); return; } - if (logflags & XT_LOG_IPOPT) + if (logflags & NF_LOG_IPOPT) nf_log_buf_add(m, ") "); currenthdr = hp->nexthdr; @@ -277,7 +277,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m, } /* Max length: 15 "UID=4294967295 " */ - if ((logflags & XT_LOG_UID) && recurse) + if ((logflags & NF_LOG_UID) && recurse) nf_log_dump_sk_uid_gid(m, skb->sk); /* Max length: 16 "MARK=0xFFFFFFFF " */ @@ -295,7 +295,7 @@ static void dump_ipv6_mac_header(struct nf_log_buf *m, if (info->type == NF_LOG_TYPE_LOG) logflags = info->u.log.logflags; - if (!(logflags & XT_LOG_MACDECODE)) + if (!(logflags & NF_LOG_MACDECODE)) goto fallback; switch (dev->type) { diff --git a/net/netfilter/nf_log_common.c b/net/netfilter/nf_log_common.c index a5aa5967b8e1..119fe1cb1ea9 100644 --- a/net/netfilter/nf_log_common.c +++ b/net/netfilter/nf_log_common.c @@ -77,7 +77,7 @@ int nf_log_dump_tcp_header(struct nf_log_buf *m, const struct sk_buff *skb, nf_log_buf_add(m, "SPT=%u DPT=%u ", ntohs(th->source), ntohs(th->dest)); /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */ - if (logflags & XT_LOG_TCPSEQ) { + if (logflags & NF_LOG_TCPSEQ) { nf_log_buf_add(m, "SEQ=%u ACK=%u ", ntohl(th->seq), ntohl(th->ack_seq)); } @@ -107,7 +107,7 @@ int nf_log_dump_tcp_header(struct nf_log_buf *m, const struct sk_buff *skb, /* Max length: 11 "URGP=65535 " */ nf_log_buf_add(m, "URGP=%u ", ntohs(th->urg_ptr)); - if ((logflags & XT_LOG_TCPOPT) && th->doff*4 > sizeof(struct tcphdr)) { + if ((logflags & NF_LOG_TCPOPT) && th->doff*4 > sizeof(struct tcphdr)) { u_int8_t _opt[60 - sizeof(struct tcphdr)]; const u_int8_t *op; unsigned int i; From 1cb1860d9133ff795cac640b9af4569a2668b45e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 25 Sep 2016 14:14:45 -0700 Subject: [PATCH 1527/1715] cxgb4: fix -ve error check on a signed iq iq is unsigned, so the error check for iq < 0 has no effect so errors can slip past this check. Fix this by making iq signed and also get_filter_steerq return a signed int so a -ve error can be returned. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index 2a616171cb68..10736738ff30 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -117,11 +117,11 @@ static int validate_filter(struct net_device *dev, return 0; } -static unsigned int get_filter_steerq(struct net_device *dev, - struct ch_filter_specification *fs) +static int get_filter_steerq(struct net_device *dev, + struct ch_filter_specification *fs) { struct adapter *adapter = netdev2adap(dev); - unsigned int iq; + int iq; /* If the user has requested steering matching Ingress Packets * to a specific Queue Set, we need to make sure it's in range @@ -443,10 +443,10 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id, struct filter_ctx *ctx) { struct adapter *adapter = netdev2adap(dev); - unsigned int max_fidx, fidx, iq; + unsigned int max_fidx, fidx; struct filter_entry *f; u32 iconf; - int ret; + int iq, ret; max_fidx = adapter->tids.nftids; if (filter_id != (max_fidx + adapter->tids.nsftids - 1) && From 876a55b8e255702d2c406872fd791817a3c98bd1 Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Sun, 25 Sep 2016 23:00:45 +0200 Subject: [PATCH 1528/1715] net: smc91x: take into account register shift This aligns smc91x with its cousin, namely smc911x.c. This also allows the driver to run also in a device-tree based lubbock board build, on which it was tested. Signed-off-by: Robert Jarzmik Signed-off-by: David S. Miller --- drivers/net/ethernet/smsc/smc91x.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c index 503a3b6dce91..73212590d04a 100644 --- a/drivers/net/ethernet/smsc/smc91x.c +++ b/drivers/net/ethernet/smsc/smc91x.c @@ -2323,6 +2323,9 @@ static int smc_drv_probe(struct platform_device *pdev) } else { lp->cfg.flags |= SMC91X_USE_16BIT; } + if (!device_property_read_u32(&pdev->dev, "reg-shift", + &val)) + lp->io_shift = val; } #endif From e6053dd56a022d2ef754e46b4a9836c65f1dc434 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 25 Sep 2016 15:40:36 +0000 Subject: [PATCH 1529/1715] be2net: fix non static symbol warnings Fixes the following sparse warnings: drivers/net/ethernet/emulex/benet/be_main.c:47:25: warning: symbol 'be_err_recovery_workq' was not declared. Should it be static? drivers/net/ethernet/emulex/benet/be_main.c:63:25: warning: symbol 'be_wq' was not declared. Should it be static? Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index ac513e6627d1..dcb930a52613 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -44,7 +44,7 @@ MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data."); /* Per-module error detection/recovery workq shared across all functions. * Each function schedules its own work request on this shared workq. */ -struct workqueue_struct *be_err_recovery_workq; +static struct workqueue_struct *be_err_recovery_workq; static const struct pci_device_id be_dev_ids[] = { { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) }, @@ -60,7 +60,7 @@ static const struct pci_device_id be_dev_ids[] = { MODULE_DEVICE_TABLE(pci, be_dev_ids); /* Workqueue used by all functions for defering cmd calls to the adapter */ -struct workqueue_struct *be_wq; +static struct workqueue_struct *be_wq; /* UE Status Low CSR */ static const char * const ue_status_low_desc[] = { From b3f5bf64d8dbbb08685b91ee4576d2e82e7bd60f Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 25 Sep 2016 15:43:02 +0000 Subject: [PATCH 1530/1715] net: dsa: mv88e6xxx: fix non static symbol warnings Fixes the following sparse warnings: drivers/net/dsa/mv88e6xxx/chip.c:219:5: warning: symbol 'mv88e6xxx_port_read' was not declared. Should it be static? drivers/net/dsa/mv88e6xxx/chip.c:227:5: warning: symbol 'mv88e6xxx_port_write' was not declared. Should it be static? Signed-off-by: Wei Yongjun Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 122876cb926c..b2c25daef294 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -216,16 +216,16 @@ int mv88e6xxx_write(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val) return 0; } -int mv88e6xxx_port_read(struct mv88e6xxx_chip *chip, int port, int reg, - u16 *val) +static int mv88e6xxx_port_read(struct mv88e6xxx_chip *chip, int port, int reg, + u16 *val) { int addr = chip->info->port_base_addr + port; return mv88e6xxx_read(chip, addr, reg, val); } -int mv88e6xxx_port_write(struct mv88e6xxx_chip *chip, int port, int reg, - u16 val) +static int mv88e6xxx_port_write(struct mv88e6xxx_chip *chip, int port, int reg, + u16 val) { int addr = chip->info->port_base_addr + port; From ecdad234755368e1beaa317e930387ea37eff881 Mon Sep 17 00:00:00 2001 From: Baoyou Xie Date: Sun, 25 Sep 2016 17:16:44 +0800 Subject: [PATCH 1531/1715] net: hisilicon: mark symbols static where possible We get 2 warnings when building kernel with W=1: drivers/net/ethernet/hisilicon/hisi_femac.c:943:5: warning: no previous prototype for 'hisi_femac_drv_suspend' [-Wmissing-prototypes] drivers/net/ethernet/hisilicon/hisi_femac.c:960:5: warning: no previous prototype for 'hisi_femac_drv_resume' [-Wmissing-prototypes] In fact, these two functions are only used in the file in which they are declared and don't need a declaration, but can be made static. so this patch marks these functions with 'static'. Signed-off-by: Baoyou Xie Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hisi_femac.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hisi_femac.c b/drivers/net/ethernet/hisilicon/hisi_femac.c index ca68e2208b5b..ced185962ef8 100644 --- a/drivers/net/ethernet/hisilicon/hisi_femac.c +++ b/drivers/net/ethernet/hisilicon/hisi_femac.c @@ -940,8 +940,8 @@ static int hisi_femac_drv_remove(struct platform_device *pdev) } #ifdef CONFIG_PM -int hisi_femac_drv_suspend(struct platform_device *pdev, - pm_message_t state) +static int hisi_femac_drv_suspend(struct platform_device *pdev, + pm_message_t state) { struct net_device *ndev = platform_get_drvdata(pdev); struct hisi_femac_priv *priv = netdev_priv(ndev); @@ -957,7 +957,7 @@ int hisi_femac_drv_suspend(struct platform_device *pdev, return 0; } -int hisi_femac_drv_resume(struct platform_device *pdev) +static int hisi_femac_drv_resume(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); struct hisi_femac_priv *priv = netdev_priv(ndev); From 49e3e6f34d6a00c7725eb42a2b78ee1612bdcac9 Mon Sep 17 00:00:00 2001 From: Baoyou Xie Date: Sun, 25 Sep 2016 17:19:04 +0800 Subject: [PATCH 1532/1715] net: hip04: mark tx_done() static We get 1 warning when building kernel with W=1: drivers/net/ethernet/hisilicon/hip04_eth.c:603:22: warning: no previous prototype for 'tx_done' [-Wmissing-prototypes] In fact, this function is only used in the file in which it is declared and don't need a declaration, but can be made static. so this patch marks this function with 'static'. Signed-off-by: Baoyou Xie Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hip04_eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c index 415ffa1509d5..39778892b3b3 100644 --- a/drivers/net/ethernet/hisilicon/hip04_eth.c +++ b/drivers/net/ethernet/hisilicon/hip04_eth.c @@ -600,7 +600,7 @@ static irqreturn_t hip04_mac_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -enum hrtimer_restart tx_done(struct hrtimer *hrtimer) +static enum hrtimer_restart tx_done(struct hrtimer *hrtimer) { struct hip04_priv *priv; From 2dc0d2b45289c880130fe0c54ec684947d407786 Mon Sep 17 00:00:00 2001 From: Baoyou Xie Date: Sun, 25 Sep 2016 17:20:41 +0800 Subject: [PATCH 1533/1715] net: mvneta: mark symbols static where possible We get 2 warnings when building kernel with W=1: drivers/net/ethernet/marvell/mvneta.c:639:27: warning: no previous prototype for 'mvneta_get_stats64' [-Wmissing-prototypes] drivers/net/ethernet/marvell/mvneta.c:3529:5: warning: no previous prototype for 'mvneta_ethtool_set_link_ksettings' [-Wmissing-prototypes] In fact, these two functions are only used in the file in which they are declared and don't need a declaration, but can be made static. so this patch marks these functions with 'static'. Signed-off-by: Baoyou Xie Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 8e4252dd9a9d..bfada880e5eb 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -634,8 +634,9 @@ static void mvneta_mib_counters_clear(struct mvneta_port *pp) } /* Get System Network Statistics */ -struct rtnl_link_stats64 *mvneta_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *stats) +static struct rtnl_link_stats64 * +mvneta_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats) { struct mvneta_port *pp = netdev_priv(dev); unsigned int start; @@ -3506,8 +3507,9 @@ static int mvneta_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) /* Ethtool methods */ /* Set link ksettings (phy address, speed) for ethtools */ -int mvneta_ethtool_set_link_ksettings(struct net_device *ndev, - const struct ethtool_link_ksettings *cmd) +static int +mvneta_ethtool_set_link_ksettings(struct net_device *ndev, + const struct ethtool_link_ksettings *cmd) { struct mvneta_port *pp = netdev_priv(ndev); struct phy_device *phydev = ndev->phydev; From 50935857f878c014d92be49cbf651bcfbfdacdc0 Mon Sep 17 00:00:00 2001 From: Baoyou Xie Date: Sun, 25 Sep 2016 14:10:09 +0800 Subject: [PATCH 1534/1715] cxgb4: mark symbols static where possible We get 10 warnings when building kernel with W=1: drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c:304:5: warning: no previous prototype for 'cxgb4_dcb_enabled' [-Wmissing-prototypes] drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c:194:5: warning: no previous prototype for 'setup_sge_queues_uld' [-Wmissing-prototypes] drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c:241:6: warning: no previous prototype for 'free_sge_queues_uld' [-Wmissing-prototypes] drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c:268:5: warning: no previous prototype for 'cfg_queues_uld' [-Wmissing-prototypes] drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c:344:6: warning: no previous prototype for 'free_queues_uld' [-Wmissing-prototypes] drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c:353:5: warning: no previous prototype for 'request_msix_queue_irqs_uld' [-Wmissing-prototypes] drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c:379:6: warning: no previous prototype for 'free_msix_queue_irqs_uld' [-Wmissing-prototypes] drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c:393:6: warning: no previous prototype for 'name_msix_vecs_uld' [-Wmissing-prototypes] drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c:433:6: warning: no previous prototype for 'enable_rx_uld' [-Wmissing-prototypes] drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c:442:6: warning: no previous prototype for 'quiesce_rx_uld' [-Wmissing-prototypes] In fact, these functions are only used in the file in which they are declared and don't need a declaration, but can be made static. so this patch marks these functions with 'static'. Signed-off-by: Baoyou Xie Signed-off-by: David S. Miller --- .../net/ethernet/chelsio/cxgb4/cxgb4_main.c | 2 +- .../net/ethernet/chelsio/cxgb4/cxgb4_uld.c | 23 +++++++++++-------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index eaa7fa98a205..c8a5c434ad2c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -279,7 +279,7 @@ static void dcb_tx_queue_prio_enable(struct net_device *dev, int enable) } #endif /* CONFIG_CHELSIO_T4_DCB */ -int cxgb4_dcb_enabled(const struct net_device *dev) +static int cxgb4_dcb_enabled(const struct net_device *dev) { #ifdef CONFIG_CHELSIO_T4_DCB struct port_info *pi = netdev_priv(dev); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c index f13b593d7c8b..b4b2d20aab3c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c @@ -191,7 +191,8 @@ freeout: return err; } -int setup_sge_queues_uld(struct adapter *adap, unsigned int uld_type, bool lro) +static int +setup_sge_queues_uld(struct adapter *adap, unsigned int uld_type, bool lro) { struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; int i, ret = 0; @@ -238,7 +239,7 @@ static void t4_free_uld_rxqs(struct adapter *adap, int n, } } -void free_sge_queues_uld(struct adapter *adap, unsigned int uld_type) +static void free_sge_queues_uld(struct adapter *adap, unsigned int uld_type) { struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; @@ -265,8 +266,8 @@ void free_sge_queues_uld(struct adapter *adap, unsigned int uld_type) kfree(rxq_info->msix_tbl); } -int cfg_queues_uld(struct adapter *adap, unsigned int uld_type, - const struct cxgb4_uld_info *uld_info) +static int cfg_queues_uld(struct adapter *adap, unsigned int uld_type, + const struct cxgb4_uld_info *uld_info) { struct sge *s = &adap->sge; struct sge_uld_rxq_info *rxq_info; @@ -341,7 +342,7 @@ int cfg_queues_uld(struct adapter *adap, unsigned int uld_type, return 0; } -void free_queues_uld(struct adapter *adap, unsigned int uld_type) +static void free_queues_uld(struct adapter *adap, unsigned int uld_type) { struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; @@ -350,7 +351,8 @@ void free_queues_uld(struct adapter *adap, unsigned int uld_type) kfree(rxq_info); } -int request_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type) +static int +request_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type) { struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; int err = 0; @@ -376,7 +378,8 @@ unwind: return err; } -void free_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type) +static void +free_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type) { struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; unsigned int idx, bmap_idx; @@ -390,7 +393,7 @@ void free_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type) } } -void name_msix_vecs_uld(struct adapter *adap, unsigned int uld_type) +static void name_msix_vecs_uld(struct adapter *adap, unsigned int uld_type) { struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; int n = sizeof(adap->msix_info_ulds[0].desc); @@ -430,7 +433,7 @@ static void quiesce_rx(struct adapter *adap, struct sge_rspq *q) } } -void enable_rx_uld(struct adapter *adap, unsigned int uld_type) +static void enable_rx_uld(struct adapter *adap, unsigned int uld_type) { struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; int idx; @@ -439,7 +442,7 @@ void enable_rx_uld(struct adapter *adap, unsigned int uld_type) enable_rx(adap, &rxq_info->uldrxq[idx].rspq); } -void quiesce_rx_uld(struct adapter *adap, unsigned int uld_type) +static void quiesce_rx_uld(struct adapter *adap, unsigned int uld_type) { struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; int idx; From e2072600a24161b7ddcfb26814f69f5fbc8ef85a Mon Sep 17 00:00:00 2001 From: Baoyou Xie Date: Sun, 25 Sep 2016 14:23:15 +0800 Subject: [PATCH 1535/1715] net: bcmgenet: remove unused function in bcmgenet.c We get 1 warning when building kernel with W=1: drivers/net/ethernet/broadcom/genet/bcmgenet.c:2763:5: warning: no previous prototype for 'bcmgenet_hfb_add_filter' [-Wmissing-prototypes] In fact, this function is implemented in drivers/net/ethernet/broadcom/genet/bcmgenet.c, but be called by no one, thus can be removed. So this patch removes the unused functions. Signed-off-by: Baoyou Xie Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/genet/bcmgenet.c | 122 ------------------ 1 file changed, 122 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 2013474bfdbf..7f478499b649 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2664,128 +2664,6 @@ static void bcmgenet_enable_dma(struct bcmgenet_priv *priv, u32 dma_ctrl) bcmgenet_tdma_writel(priv, reg, DMA_CTRL); } -static bool bcmgenet_hfb_is_filter_enabled(struct bcmgenet_priv *priv, - u32 f_index) -{ - u32 offset; - u32 reg; - - offset = HFB_FLT_ENABLE_V3PLUS + (f_index < 32) * sizeof(u32); - reg = bcmgenet_hfb_reg_readl(priv, offset); - return !!(reg & (1 << (f_index % 32))); -} - -static void bcmgenet_hfb_enable_filter(struct bcmgenet_priv *priv, u32 f_index) -{ - u32 offset; - u32 reg; - - offset = HFB_FLT_ENABLE_V3PLUS + (f_index < 32) * sizeof(u32); - reg = bcmgenet_hfb_reg_readl(priv, offset); - reg |= (1 << (f_index % 32)); - bcmgenet_hfb_reg_writel(priv, reg, offset); -} - -static void bcmgenet_hfb_set_filter_rx_queue_mapping(struct bcmgenet_priv *priv, - u32 f_index, u32 rx_queue) -{ - u32 offset; - u32 reg; - - offset = f_index / 8; - reg = bcmgenet_rdma_readl(priv, DMA_INDEX2RING_0 + offset); - reg &= ~(0xF << (4 * (f_index % 8))); - reg |= ((rx_queue & 0xF) << (4 * (f_index % 8))); - bcmgenet_rdma_writel(priv, reg, DMA_INDEX2RING_0 + offset); -} - -static void bcmgenet_hfb_set_filter_length(struct bcmgenet_priv *priv, - u32 f_index, u32 f_length) -{ - u32 offset; - u32 reg; - - offset = HFB_FLT_LEN_V3PLUS + - ((priv->hw_params->hfb_filter_cnt - 1 - f_index) / 4) * - sizeof(u32); - reg = bcmgenet_hfb_reg_readl(priv, offset); - reg &= ~(0xFF << (8 * (f_index % 4))); - reg |= ((f_length & 0xFF) << (8 * (f_index % 4))); - bcmgenet_hfb_reg_writel(priv, reg, offset); -} - -static int bcmgenet_hfb_find_unused_filter(struct bcmgenet_priv *priv) -{ - u32 f_index; - - for (f_index = 0; f_index < priv->hw_params->hfb_filter_cnt; f_index++) - if (!bcmgenet_hfb_is_filter_enabled(priv, f_index)) - return f_index; - - return -ENOMEM; -} - -/* bcmgenet_hfb_add_filter - * - * Add new filter to Hardware Filter Block to match and direct Rx traffic to - * desired Rx queue. - * - * f_data is an array of unsigned 32-bit integers where each 32-bit integer - * provides filter data for 2 bytes (4 nibbles) of Rx frame: - * - * bits 31:20 - unused - * bit 19 - nibble 0 match enable - * bit 18 - nibble 1 match enable - * bit 17 - nibble 2 match enable - * bit 16 - nibble 3 match enable - * bits 15:12 - nibble 0 data - * bits 11:8 - nibble 1 data - * bits 7:4 - nibble 2 data - * bits 3:0 - nibble 3 data - * - * Example: - * In order to match: - * - Ethernet frame type = 0x0800 (IP) - * - IP version field = 4 - * - IP protocol field = 0x11 (UDP) - * - * The following filter is needed: - * u32 hfb_filter_ipv4_udp[] = { - * Rx frame offset 0x00: 0x00000000, 0x00000000, 0x00000000, 0x00000000, - * Rx frame offset 0x08: 0x00000000, 0x00000000, 0x000F0800, 0x00084000, - * Rx frame offset 0x10: 0x00000000, 0x00000000, 0x00000000, 0x00030011, - * }; - * - * To add the filter to HFB and direct the traffic to Rx queue 0, call: - * bcmgenet_hfb_add_filter(priv, hfb_filter_ipv4_udp, - * ARRAY_SIZE(hfb_filter_ipv4_udp), 0); - */ -int bcmgenet_hfb_add_filter(struct bcmgenet_priv *priv, u32 *f_data, - u32 f_length, u32 rx_queue) -{ - int f_index; - u32 i; - - f_index = bcmgenet_hfb_find_unused_filter(priv); - if (f_index < 0) - return -ENOMEM; - - if (f_length > priv->hw_params->hfb_filter_size) - return -EINVAL; - - for (i = 0; i < f_length; i++) - bcmgenet_hfb_writel(priv, f_data[i], - (f_index * priv->hw_params->hfb_filter_size + i) * - sizeof(u32)); - - bcmgenet_hfb_set_filter_length(priv, f_index, 2 * f_length); - bcmgenet_hfb_set_filter_rx_queue_mapping(priv, f_index, rx_queue); - bcmgenet_hfb_enable_filter(priv, f_index); - bcmgenet_hfb_reg_writel(priv, 0x1, HFB_CTRL); - - return 0; -} - /* bcmgenet_hfb_clear * * Clear Hardware Filter Block and disable all filtering. From b47c62c5de2bc43a26bcaca8d7a93bf9dee66ffe Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 23 Sep 2016 22:23:59 +0200 Subject: [PATCH 1536/1715] nfp: bpf: improve handling for disabled BPF syscall I stumbled over a new warning during randconfig testing, with CONFIG_BPF_SYSCALL disabled: drivers/net/ethernet/netronome/nfp/nfp_net_offload.c: In function 'nfp_net_bpf_offload': drivers/net/ethernet/netronome/nfp/nfp_net_offload.c:263:3: error: '*((void *)&res+4)' may be used uninitialized in this function [-Werror=maybe-uninitialized] drivers/net/ethernet/netronome/nfp/nfp_net_offload.c:263:3: error: 'res.n_instr' may be used uninitialized in this function [-Werror=maybe-uninitialized] As far as I can tell, this is a false positive caused by the compiler getting confused about a function that is partially inlined, but it's easy to avoid while improving the code: The nfp_bpf_jit() stub helper for that configuration is unusual as it is defined in a header file but not marked 'static inline'. By moving the compile-time check into the caller using the IS_ENABLED() macro, we can remove that stub and simplify the nfp_net_bpf_offload_prepare() function enough to unconfuse the compiler. Fixes: 7533fdc0f77f ("nfp: bpf: add hardware bpf offload") Signed-off-by: Arnd Bergmann Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_bpf.h | 10 ---------- drivers/net/ethernet/netronome/nfp/nfp_net_offload.c | 3 +++ 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf.h b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h index fc220cd04115..87aa8a3e9112 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_bpf.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h @@ -192,20 +192,10 @@ struct nfp_bpf_result { bool dense_mode; }; -#ifdef CONFIG_BPF_SYSCALL int nfp_bpf_jit(struct bpf_prog *filter, void *prog, enum nfp_bpf_action_type act, unsigned int prog_start, unsigned int prog_done, unsigned int prog_sz, struct nfp_bpf_result *res); -#else -int -nfp_bpf_jit(struct bpf_prog *filter, void *prog, enum nfp_bpf_action_type act, - unsigned int prog_start, unsigned int prog_done, - unsigned int prog_sz, struct nfp_bpf_result *res) -{ - return -ENOTSUPP; -} -#endif int nfp_prog_verify(struct nfp_prog *nfp_prog, struct bpf_prog *prog); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c index 43f42f842eda..8acfb631a0ea 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c @@ -148,6 +148,9 @@ nfp_net_bpf_offload_prepare(struct nfp_net *nn, unsigned int max_mtu; int ret; + if (!IS_ENABLED(CONFIG_BPF_SYSCALL)) + return -ENOTSUPP; + ret = nfp_net_bpf_get_act(nn, cls_bpf); if (ret < 0) return ret; From a7c7fbff6a408d00431c705bbe3dfc5f51e3f1c4 Mon Sep 17 00:00:00 2001 From: Purushottam Kushwaha Date: Wed, 14 Sep 2016 17:38:44 +0530 Subject: [PATCH 1537/1715] cfg80211: Add support to configure a beacon data rate This allows an option to configure a single beacon tx rate for an AP. Signed-off-by: Purushottam Kushwaha Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 25 +- net/wireless/nl80211.c | 510 +++++++++++++++++++++++------------------ 2 files changed, 302 insertions(+), 233 deletions(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index bd26cc6e2d79..e0949c8bc2d1 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -676,6 +676,18 @@ struct cfg80211_acl_data { struct mac_address mac_addrs[]; }; +/* + * cfg80211_bitrate_mask - masks for bitrate control + */ +struct cfg80211_bitrate_mask { + struct { + u32 legacy; + u8 ht_mcs[IEEE80211_HT_MCS_MASK_LEN]; + u16 vht_mcs[NL80211_VHT_NSS_MAX]; + enum nl80211_txrate_gi gi; + } control[NUM_NL80211_BANDS]; +}; + /** * struct cfg80211_ap_settings - AP configuration * @@ -700,6 +712,7 @@ struct cfg80211_acl_data { * MAC address based access control * @pbss: If set, start as a PCP instead of AP. Relevant for DMG * networks. + * @beacon_rate: masks for setting user configured beacon tx rate. */ struct cfg80211_ap_settings { struct cfg80211_chan_def chandef; @@ -719,6 +732,7 @@ struct cfg80211_ap_settings { bool p2p_opp_ps; const struct cfg80211_acl_data *acl; bool pbss; + struct cfg80211_bitrate_mask beacon_rate; }; /** @@ -2010,17 +2024,6 @@ enum wiphy_params_flags { WIPHY_PARAM_DYN_ACK = 1 << 5, }; -/* - * cfg80211_bitrate_mask - masks for bitrate control - */ -struct cfg80211_bitrate_mask { - struct { - u32 legacy; - u8 ht_mcs[IEEE80211_HT_MCS_MASK_LEN]; - u16 vht_mcs[NL80211_VHT_NSS_MAX]; - enum nl80211_txrate_gi gi; - } control[NUM_NL80211_BANDS]; -}; /** * struct cfg80211_pmksa - PMK Security Association * diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 887c4c114206..a10484da60c0 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3340,6 +3340,279 @@ static int nl80211_set_mac_acl(struct sk_buff *skb, struct genl_info *info) return err; } +static u32 rateset_to_mask(struct ieee80211_supported_band *sband, + u8 *rates, u8 rates_len) +{ + u8 i; + u32 mask = 0; + + for (i = 0; i < rates_len; i++) { + int rate = (rates[i] & 0x7f) * 5; + int ridx; + + for (ridx = 0; ridx < sband->n_bitrates; ridx++) { + struct ieee80211_rate *srate = + &sband->bitrates[ridx]; + if (rate == srate->bitrate) { + mask |= 1 << ridx; + break; + } + } + if (ridx == sband->n_bitrates) + return 0; /* rate not found */ + } + + return mask; +} + +static bool ht_rateset_to_mask(struct ieee80211_supported_band *sband, + u8 *rates, u8 rates_len, + u8 mcs[IEEE80211_HT_MCS_MASK_LEN]) +{ + u8 i; + + memset(mcs, 0, IEEE80211_HT_MCS_MASK_LEN); + + for (i = 0; i < rates_len; i++) { + int ridx, rbit; + + ridx = rates[i] / 8; + rbit = BIT(rates[i] % 8); + + /* check validity */ + if ((ridx < 0) || (ridx >= IEEE80211_HT_MCS_MASK_LEN)) + return false; + + /* check availability */ + if (sband->ht_cap.mcs.rx_mask[ridx] & rbit) + mcs[ridx] |= rbit; + else + return false; + } + + return true; +} + +static u16 vht_mcs_map_to_mcs_mask(u8 vht_mcs_map) +{ + u16 mcs_mask = 0; + + switch (vht_mcs_map) { + case IEEE80211_VHT_MCS_NOT_SUPPORTED: + break; + case IEEE80211_VHT_MCS_SUPPORT_0_7: + mcs_mask = 0x00FF; + break; + case IEEE80211_VHT_MCS_SUPPORT_0_8: + mcs_mask = 0x01FF; + break; + case IEEE80211_VHT_MCS_SUPPORT_0_9: + mcs_mask = 0x03FF; + break; + default: + break; + } + + return mcs_mask; +} + +static void vht_build_mcs_mask(u16 vht_mcs_map, + u16 vht_mcs_mask[NL80211_VHT_NSS_MAX]) +{ + u8 nss; + + for (nss = 0; nss < NL80211_VHT_NSS_MAX; nss++) { + vht_mcs_mask[nss] = vht_mcs_map_to_mcs_mask(vht_mcs_map & 0x03); + vht_mcs_map >>= 2; + } +} + +static bool vht_set_mcs_mask(struct ieee80211_supported_band *sband, + struct nl80211_txrate_vht *txrate, + u16 mcs[NL80211_VHT_NSS_MAX]) +{ + u16 tx_mcs_map = le16_to_cpu(sband->vht_cap.vht_mcs.tx_mcs_map); + u16 tx_mcs_mask[NL80211_VHT_NSS_MAX] = {}; + u8 i; + + if (!sband->vht_cap.vht_supported) + return false; + + memset(mcs, 0, sizeof(u16) * NL80211_VHT_NSS_MAX); + + /* Build vht_mcs_mask from VHT capabilities */ + vht_build_mcs_mask(tx_mcs_map, tx_mcs_mask); + + for (i = 0; i < NL80211_VHT_NSS_MAX; i++) { + if ((tx_mcs_mask[i] & txrate->mcs[i]) == txrate->mcs[i]) + mcs[i] = txrate->mcs[i]; + else + return false; + } + + return true; +} + +static const struct nla_policy nl80211_txattr_policy[NL80211_TXRATE_MAX + 1] = { + [NL80211_TXRATE_LEGACY] = { .type = NLA_BINARY, + .len = NL80211_MAX_SUPP_RATES }, + [NL80211_TXRATE_HT] = { .type = NLA_BINARY, + .len = NL80211_MAX_SUPP_HT_RATES }, + [NL80211_TXRATE_VHT] = { .len = sizeof(struct nl80211_txrate_vht)}, + [NL80211_TXRATE_GI] = { .type = NLA_U8 }, +}; + +static int nl80211_parse_tx_bitrate_mask(struct genl_info *info, + struct cfg80211_bitrate_mask *mask) +{ + struct nlattr *tb[NL80211_TXRATE_MAX + 1]; + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + int rem, i; + struct nlattr *tx_rates; + struct ieee80211_supported_band *sband; + u16 vht_tx_mcs_map; + + memset(mask, 0, sizeof(*mask)); + /* Default to all rates enabled */ + for (i = 0; i < NUM_NL80211_BANDS; i++) { + sband = rdev->wiphy.bands[i]; + + if (!sband) + continue; + + mask->control[i].legacy = (1 << sband->n_bitrates) - 1; + memcpy(mask->control[i].ht_mcs, + sband->ht_cap.mcs.rx_mask, + sizeof(mask->control[i].ht_mcs)); + + if (!sband->vht_cap.vht_supported) + continue; + + vht_tx_mcs_map = le16_to_cpu(sband->vht_cap.vht_mcs.tx_mcs_map); + vht_build_mcs_mask(vht_tx_mcs_map, mask->control[i].vht_mcs); + } + + /* if no rates are given set it back to the defaults */ + if (!info->attrs[NL80211_ATTR_TX_RATES]) + goto out; + + /* The nested attribute uses enum nl80211_band as the index. This maps + * directly to the enum nl80211_band values used in cfg80211. + */ + BUILD_BUG_ON(NL80211_MAX_SUPP_HT_RATES > IEEE80211_HT_MCS_MASK_LEN * 8); + nla_for_each_nested(tx_rates, info->attrs[NL80211_ATTR_TX_RATES], rem) { + enum nl80211_band band = nla_type(tx_rates); + int err; + + if (band < 0 || band >= NUM_NL80211_BANDS) + return -EINVAL; + sband = rdev->wiphy.bands[band]; + if (sband == NULL) + return -EINVAL; + err = nla_parse(tb, NL80211_TXRATE_MAX, nla_data(tx_rates), + nla_len(tx_rates), nl80211_txattr_policy); + if (err) + return err; + if (tb[NL80211_TXRATE_LEGACY]) { + mask->control[band].legacy = rateset_to_mask( + sband, + nla_data(tb[NL80211_TXRATE_LEGACY]), + nla_len(tb[NL80211_TXRATE_LEGACY])); + if ((mask->control[band].legacy == 0) && + nla_len(tb[NL80211_TXRATE_LEGACY])) + return -EINVAL; + } + if (tb[NL80211_TXRATE_HT]) { + if (!ht_rateset_to_mask( + sband, + nla_data(tb[NL80211_TXRATE_HT]), + nla_len(tb[NL80211_TXRATE_HT]), + mask->control[band].ht_mcs)) + return -EINVAL; + } + if (tb[NL80211_TXRATE_VHT]) { + if (!vht_set_mcs_mask( + sband, + nla_data(tb[NL80211_TXRATE_VHT]), + mask->control[band].vht_mcs)) + return -EINVAL; + } + if (tb[NL80211_TXRATE_GI]) { + mask->control[band].gi = + nla_get_u8(tb[NL80211_TXRATE_GI]); + if (mask->control[band].gi > NL80211_TXRATE_FORCE_LGI) + return -EINVAL; + } + + if (mask->control[band].legacy == 0) { + /* don't allow empty legacy rates if HT or VHT + * are not even supported. + */ + if (!(rdev->wiphy.bands[band]->ht_cap.ht_supported || + rdev->wiphy.bands[band]->vht_cap.vht_supported)) + return -EINVAL; + + for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++) + if (mask->control[band].ht_mcs[i]) + goto out; + + for (i = 0; i < NL80211_VHT_NSS_MAX; i++) + if (mask->control[band].vht_mcs[i]) + goto out; + + /* legacy and mcs rates may not be both empty */ + return -EINVAL; + } + } + +out: + return 0; +} + +static int validate_beacon_tx_rate(struct cfg80211_ap_settings *params) +{ + u32 rate, count_ht, count_vht, i; + enum nl80211_band band; + + band = params->chandef.chan->band; + rate = params->beacon_rate.control[band].legacy; + + /* Allow only one rate */ + if (hweight32(rate) > 1) + return -EINVAL; + + count_ht = 0; + for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++) { + if (hweight8(params->beacon_rate.control[band].ht_mcs[i]) > 1) { + return -EINVAL; + } else if (params->beacon_rate.control[band].ht_mcs[i]) { + count_ht++; + if (count_ht > 1) + return -EINVAL; + } + if (count_ht && rate) + return -EINVAL; + } + + count_vht = 0; + for (i = 0; i < NL80211_VHT_NSS_MAX; i++) { + if (hweight16(params->beacon_rate.control[band].vht_mcs[i]) > 1) { + return -EINVAL; + } else if (params->beacon_rate.control[band].vht_mcs[i]) { + count_vht++; + if (count_vht > 1) + return -EINVAL; + } + if (count_vht && rate) + return -EINVAL; + } + + if ((count_ht && count_vht) || (!rate && !count_ht && !count_vht)) + return -EINVAL; + + return 0; +} + static int nl80211_parse_beacon(struct nlattr *attrs[], struct cfg80211_beacon_data *bcn) { @@ -3569,6 +3842,16 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) wdev->iftype)) return -EINVAL; + if (info->attrs[NL80211_ATTR_TX_RATES]) { + err = nl80211_parse_tx_bitrate_mask(info, ¶ms.beacon_rate); + if (err) + return err; + + err = validate_beacon_tx_rate(¶ms); + if (err) + return err; + } + if (info->attrs[NL80211_ATTR_SMPS_MODE]) { params.smps_mode = nla_get_u8(info->attrs[NL80211_ATTR_SMPS_MODE]); @@ -8641,238 +8924,21 @@ static int nl80211_cancel_remain_on_channel(struct sk_buff *skb, return rdev_cancel_remain_on_channel(rdev, wdev, cookie); } -static u32 rateset_to_mask(struct ieee80211_supported_band *sband, - u8 *rates, u8 rates_len) -{ - u8 i; - u32 mask = 0; - - for (i = 0; i < rates_len; i++) { - int rate = (rates[i] & 0x7f) * 5; - int ridx; - - for (ridx = 0; ridx < sband->n_bitrates; ridx++) { - struct ieee80211_rate *srate = - &sband->bitrates[ridx]; - if (rate == srate->bitrate) { - mask |= 1 << ridx; - break; - } - } - if (ridx == sband->n_bitrates) - return 0; /* rate not found */ - } - - return mask; -} - -static bool ht_rateset_to_mask(struct ieee80211_supported_band *sband, - u8 *rates, u8 rates_len, - u8 mcs[IEEE80211_HT_MCS_MASK_LEN]) -{ - u8 i; - - memset(mcs, 0, IEEE80211_HT_MCS_MASK_LEN); - - for (i = 0; i < rates_len; i++) { - int ridx, rbit; - - ridx = rates[i] / 8; - rbit = BIT(rates[i] % 8); - - /* check validity */ - if ((ridx < 0) || (ridx >= IEEE80211_HT_MCS_MASK_LEN)) - return false; - - /* check availability */ - if (sband->ht_cap.mcs.rx_mask[ridx] & rbit) - mcs[ridx] |= rbit; - else - return false; - } - - return true; -} - -static u16 vht_mcs_map_to_mcs_mask(u8 vht_mcs_map) -{ - u16 mcs_mask = 0; - - switch (vht_mcs_map) { - case IEEE80211_VHT_MCS_NOT_SUPPORTED: - break; - case IEEE80211_VHT_MCS_SUPPORT_0_7: - mcs_mask = 0x00FF; - break; - case IEEE80211_VHT_MCS_SUPPORT_0_8: - mcs_mask = 0x01FF; - break; - case IEEE80211_VHT_MCS_SUPPORT_0_9: - mcs_mask = 0x03FF; - break; - default: - break; - } - - return mcs_mask; -} - -static void vht_build_mcs_mask(u16 vht_mcs_map, - u16 vht_mcs_mask[NL80211_VHT_NSS_MAX]) -{ - u8 nss; - - for (nss = 0; nss < NL80211_VHT_NSS_MAX; nss++) { - vht_mcs_mask[nss] = vht_mcs_map_to_mcs_mask(vht_mcs_map & 0x03); - vht_mcs_map >>= 2; - } -} - -static bool vht_set_mcs_mask(struct ieee80211_supported_band *sband, - struct nl80211_txrate_vht *txrate, - u16 mcs[NL80211_VHT_NSS_MAX]) -{ - u16 tx_mcs_map = le16_to_cpu(sband->vht_cap.vht_mcs.tx_mcs_map); - u16 tx_mcs_mask[NL80211_VHT_NSS_MAX] = {}; - u8 i; - - if (!sband->vht_cap.vht_supported) - return false; - - memset(mcs, 0, sizeof(u16) * NL80211_VHT_NSS_MAX); - - /* Build vht_mcs_mask from VHT capabilities */ - vht_build_mcs_mask(tx_mcs_map, tx_mcs_mask); - - for (i = 0; i < NL80211_VHT_NSS_MAX; i++) { - if ((tx_mcs_mask[i] & txrate->mcs[i]) == txrate->mcs[i]) - mcs[i] = txrate->mcs[i]; - else - return false; - } - - return true; -} - -static const struct nla_policy nl80211_txattr_policy[NL80211_TXRATE_MAX + 1] = { - [NL80211_TXRATE_LEGACY] = { .type = NLA_BINARY, - .len = NL80211_MAX_SUPP_RATES }, - [NL80211_TXRATE_HT] = { .type = NLA_BINARY, - .len = NL80211_MAX_SUPP_HT_RATES }, - [NL80211_TXRATE_VHT] = { .len = sizeof(struct nl80211_txrate_vht)}, - [NL80211_TXRATE_GI] = { .type = NLA_U8 }, -}; - static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb, struct genl_info *info) { - struct nlattr *tb[NL80211_TXRATE_MAX + 1]; - struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct cfg80211_bitrate_mask mask; - int rem, i; + struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; - struct nlattr *tx_rates; - struct ieee80211_supported_band *sband; - u16 vht_tx_mcs_map; + int err; if (!rdev->ops->set_bitrate_mask) return -EOPNOTSUPP; - memset(&mask, 0, sizeof(mask)); - /* Default to all rates enabled */ - for (i = 0; i < NUM_NL80211_BANDS; i++) { - sband = rdev->wiphy.bands[i]; + err = nl80211_parse_tx_bitrate_mask(info, &mask); + if (err) + return err; - if (!sband) - continue; - - mask.control[i].legacy = (1 << sband->n_bitrates) - 1; - memcpy(mask.control[i].ht_mcs, - sband->ht_cap.mcs.rx_mask, - sizeof(mask.control[i].ht_mcs)); - - if (!sband->vht_cap.vht_supported) - continue; - - vht_tx_mcs_map = le16_to_cpu(sband->vht_cap.vht_mcs.tx_mcs_map); - vht_build_mcs_mask(vht_tx_mcs_map, mask.control[i].vht_mcs); - } - - /* if no rates are given set it back to the defaults */ - if (!info->attrs[NL80211_ATTR_TX_RATES]) - goto out; - - /* - * The nested attribute uses enum nl80211_band as the index. This maps - * directly to the enum nl80211_band values used in cfg80211. - */ - BUILD_BUG_ON(NL80211_MAX_SUPP_HT_RATES > IEEE80211_HT_MCS_MASK_LEN * 8); - nla_for_each_nested(tx_rates, info->attrs[NL80211_ATTR_TX_RATES], rem) { - enum nl80211_band band = nla_type(tx_rates); - int err; - - if (band < 0 || band >= NUM_NL80211_BANDS) - return -EINVAL; - sband = rdev->wiphy.bands[band]; - if (sband == NULL) - return -EINVAL; - err = nla_parse(tb, NL80211_TXRATE_MAX, nla_data(tx_rates), - nla_len(tx_rates), nl80211_txattr_policy); - if (err) - return err; - if (tb[NL80211_TXRATE_LEGACY]) { - mask.control[band].legacy = rateset_to_mask( - sband, - nla_data(tb[NL80211_TXRATE_LEGACY]), - nla_len(tb[NL80211_TXRATE_LEGACY])); - if ((mask.control[band].legacy == 0) && - nla_len(tb[NL80211_TXRATE_LEGACY])) - return -EINVAL; - } - if (tb[NL80211_TXRATE_HT]) { - if (!ht_rateset_to_mask( - sband, - nla_data(tb[NL80211_TXRATE_HT]), - nla_len(tb[NL80211_TXRATE_HT]), - mask.control[band].ht_mcs)) - return -EINVAL; - } - if (tb[NL80211_TXRATE_VHT]) { - if (!vht_set_mcs_mask( - sband, - nla_data(tb[NL80211_TXRATE_VHT]), - mask.control[band].vht_mcs)) - return -EINVAL; - } - if (tb[NL80211_TXRATE_GI]) { - mask.control[band].gi = - nla_get_u8(tb[NL80211_TXRATE_GI]); - if (mask.control[band].gi > NL80211_TXRATE_FORCE_LGI) - return -EINVAL; - } - - if (mask.control[band].legacy == 0) { - /* don't allow empty legacy rates if HT or VHT - * are not even supported. - */ - if (!(rdev->wiphy.bands[band]->ht_cap.ht_supported || - rdev->wiphy.bands[band]->vht_cap.vht_supported)) - return -EINVAL; - - for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++) - if (mask.control[band].ht_mcs[i]) - goto out; - - for (i = 0; i < NL80211_VHT_NSS_MAX; i++) - if (mask.control[band].vht_mcs[i]) - goto out; - - /* legacy and mcs rates may not be both empty */ - return -EINVAL; - } - } - -out: return rdev_set_bitrate_mask(rdev, dev, NULL, &mask); } From 8564e38206de2ff005a27c8d7c2ce3869a44f0dd Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 19 Sep 2016 09:44:44 +0200 Subject: [PATCH 1538/1715] cfg80211: add checks for beacon rate, extend to mesh The previous commit added support for specifying the beacon rate for AP mode. Add features checks to this, and extend it to also support the rate configuration for mesh networks. For IBSS it's not as simple due to joining etc., so that's not yet supported. Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 4 +++- include/uapi/linux/nl80211.h | 17 ++++++++++++- net/wireless/nl80211.c | 46 +++++++++++++++++++++++++++--------- 3 files changed, 54 insertions(+), 13 deletions(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index e0949c8bc2d1..ed37304fa09d 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -712,7 +712,7 @@ struct cfg80211_bitrate_mask { * MAC address based access control * @pbss: If set, start as a PCP instead of AP. Relevant for DMG * networks. - * @beacon_rate: masks for setting user configured beacon tx rate. + * @beacon_rate: bitrate to be used for beacons */ struct cfg80211_ap_settings { struct cfg80211_chan_def chandef; @@ -1365,6 +1365,7 @@ struct mesh_config { * @beacon_interval: beacon interval to use * @mcast_rate: multicat rate for Mesh Node [6Mbps is the default for 802.11a] * @basic_rates: basic rates to use when creating the mesh + * @beacon_rate: bitrate to be used for beacons * * These parameters are fixed when the mesh is created. */ @@ -1385,6 +1386,7 @@ struct mesh_setup { u16 beacon_interval; int mcast_rate[NUM_NL80211_BANDS]; u32 basic_rates; + struct cfg80211_bitrate_mask beacon_rate; }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 220694151434..ec10d1b2838f 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1343,7 +1343,13 @@ enum nl80211_commands { * enum nl80211_band value is used as the index (nla_type() of the nested * data. If a band is not included, it will be configured to allow all * rates based on negotiated supported rates information. This attribute - * is used with %NL80211_CMD_SET_TX_BITRATE_MASK. + * is used with %NL80211_CMD_SET_TX_BITRATE_MASK and with starting AP, + * and joining mesh networks (not IBSS yet). In the later case, it must + * specify just a single bitrate, which is to be used for the beacon. + * The driver must also specify support for this with the extended + * features NL80211_EXT_FEATURE_BEACON_RATE_LEGACY, + * NL80211_EXT_FEATURE_BEACON_RATE_HT and + * NL80211_EXT_FEATURE_BEACON_RATE_VHT. * * @NL80211_ATTR_FRAME_MATCH: A binary attribute which typically must contain * at least one byte, currently used with @NL80211_CMD_REGISTER_FRAME. @@ -4551,6 +4557,12 @@ enum nl80211_feature_flags { * (if available). * @NL80211_EXT_FEATURE_SET_SCAN_DWELL: This driver supports configuration of * channel dwell time. + * @NL80211_EXT_FEATURE_BEACON_RATE_LEGACY: Driver supports beacon rate + * configuration (AP/mesh), supporting a legacy (non HT/VHT) rate. + * @NL80211_EXT_FEATURE_BEACON_RATE_HT: Driver supports beacon rate + * configuration (AP/mesh) with HT rates. + * @NL80211_EXT_FEATURE_BEACON_RATE_VHT: Driver supports beacon rate + * configuration (AP/mesh) with VHT rates. * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. @@ -4562,6 +4574,9 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_SCAN_START_TIME, NL80211_EXT_FEATURE_BSS_PARENT_TSF, NL80211_EXT_FEATURE_SET_SCAN_DWELL, + NL80211_EXT_FEATURE_BEACON_RATE_LEGACY, + NL80211_EXT_FEATURE_BEACON_RATE_HT, + NL80211_EXT_FEATURE_BEACON_RATE_VHT, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index a10484da60c0..b8441e60b0f6 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3569,13 +3569,12 @@ out: return 0; } -static int validate_beacon_tx_rate(struct cfg80211_ap_settings *params) +static int validate_beacon_tx_rate(struct cfg80211_registered_device *rdev, + enum nl80211_band band, + struct cfg80211_bitrate_mask *beacon_rate) { - u32 rate, count_ht, count_vht, i; - enum nl80211_band band; - - band = params->chandef.chan->band; - rate = params->beacon_rate.control[band].legacy; + u32 count_ht, count_vht, i; + u32 rate = beacon_rate->control[band].legacy; /* Allow only one rate */ if (hweight32(rate) > 1) @@ -3583,9 +3582,9 @@ static int validate_beacon_tx_rate(struct cfg80211_ap_settings *params) count_ht = 0; for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++) { - if (hweight8(params->beacon_rate.control[band].ht_mcs[i]) > 1) { + if (hweight8(beacon_rate->control[band].ht_mcs[i]) > 1) { return -EINVAL; - } else if (params->beacon_rate.control[band].ht_mcs[i]) { + } else if (beacon_rate->control[band].ht_mcs[i]) { count_ht++; if (count_ht > 1) return -EINVAL; @@ -3596,9 +3595,9 @@ static int validate_beacon_tx_rate(struct cfg80211_ap_settings *params) count_vht = 0; for (i = 0; i < NL80211_VHT_NSS_MAX; i++) { - if (hweight16(params->beacon_rate.control[band].vht_mcs[i]) > 1) { + if (hweight16(beacon_rate->control[band].vht_mcs[i]) > 1) { return -EINVAL; - } else if (params->beacon_rate.control[band].vht_mcs[i]) { + } else if (beacon_rate->control[band].vht_mcs[i]) { count_vht++; if (count_vht > 1) return -EINVAL; @@ -3610,6 +3609,19 @@ static int validate_beacon_tx_rate(struct cfg80211_ap_settings *params) if ((count_ht && count_vht) || (!rate && !count_ht && !count_vht)) return -EINVAL; + if (rate && + !wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_BEACON_RATE_LEGACY)) + return -EINVAL; + if (count_ht && + !wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_BEACON_RATE_HT)) + return -EINVAL; + if (count_vht && + !wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_BEACON_RATE_VHT)) + return -EINVAL; + return 0; } @@ -3847,7 +3859,8 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) if (err) return err; - err = validate_beacon_tx_rate(¶ms); + err = validate_beacon_tx_rate(rdev, params.chandef.chan->band, + ¶ms.beacon_rate); if (err) return err; } @@ -9406,6 +9419,17 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info) return err; } + if (info->attrs[NL80211_ATTR_TX_RATES]) { + err = nl80211_parse_tx_bitrate_mask(info, &setup.beacon_rate); + if (err) + return err; + + err = validate_beacon_tx_rate(rdev, setup.chandef.chan->band, + &setup.beacon_rate); + if (err) + return err; + } + return cfg80211_join_mesh(rdev, dev, &setup, &cfg); } From e8c6ae9fbf8ca70ef0c2de0d2f3995acb0dc8968 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Thu, 22 Sep 2016 17:12:25 -0400 Subject: [PATCH 1539/1715] bnx2x: allocate mac filtering 'mcast_list' in PAGE_SIZE increments Currently, we can have high order page allocations that specify GFP_ATOMIC when configuring multicast MAC address filters. For example, we have seen order 2 page allocation failures with ~500 multicast addresses configured. Convert the allocation for 'mcast_list' to be done in PAGE_SIZE increments. Signed-off-by: Jason Baron Cc: Yuval Mintz Cc: Ariel Elior Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/bnx2x/bnx2x_main.c | 87 ++++++++++++------- 1 file changed, 55 insertions(+), 32 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index dab61a81a3ba..20fe6a8c35c1 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -12563,41 +12563,62 @@ static int bnx2x_close(struct net_device *dev) return 0; } -static int bnx2x_init_mcast_macs_list(struct bnx2x *bp, - struct bnx2x_mcast_ramrod_params *p) +struct bnx2x_mcast_list_elem_group { - int mc_count = netdev_mc_count(bp->dev); - struct bnx2x_mcast_list_elem *mc_mac = - kcalloc(mc_count, sizeof(*mc_mac), GFP_ATOMIC); - struct netdev_hw_addr *ha; + struct list_head mcast_group_link; + struct bnx2x_mcast_list_elem mcast_elems[]; +}; - if (!mc_mac) { - BNX2X_ERR("Failed to allocate mc MAC list\n"); - return -ENOMEM; +#define MCAST_ELEMS_PER_PG \ + ((PAGE_SIZE - sizeof(struct bnx2x_mcast_list_elem_group)) / \ + sizeof(struct bnx2x_mcast_list_elem)) + +static void bnx2x_free_mcast_macs_list(struct list_head *mcast_group_list) +{ + struct bnx2x_mcast_list_elem_group *current_mcast_group; + + while (!list_empty(mcast_group_list)) { + current_mcast_group = list_first_entry(mcast_group_list, + struct bnx2x_mcast_list_elem_group, + mcast_group_link); + list_del(¤t_mcast_group->mcast_group_link); + free_page((unsigned long)current_mcast_group); } - - INIT_LIST_HEAD(&p->mcast_list); - - netdev_for_each_mc_addr(ha, bp->dev) { - mc_mac->mac = bnx2x_mc_addr(ha); - list_add_tail(&mc_mac->link, &p->mcast_list); - mc_mac++; - } - - p->mcast_list_len = mc_count; - - return 0; } -static void bnx2x_free_mcast_macs_list( - struct bnx2x_mcast_ramrod_params *p) +static int bnx2x_init_mcast_macs_list(struct bnx2x *bp, + struct bnx2x_mcast_ramrod_params *p, + struct list_head *mcast_group_list) { - struct bnx2x_mcast_list_elem *mc_mac = - list_first_entry(&p->mcast_list, struct bnx2x_mcast_list_elem, - link); + struct bnx2x_mcast_list_elem *mc_mac; + struct netdev_hw_addr *ha; + struct bnx2x_mcast_list_elem_group *current_mcast_group = NULL; + int mc_count = netdev_mc_count(bp->dev); + int offset = 0; - WARN_ON(!mc_mac); - kfree(mc_mac); + INIT_LIST_HEAD(&p->mcast_list); + netdev_for_each_mc_addr(ha, bp->dev) { + if (!offset) { + current_mcast_group = + (struct bnx2x_mcast_list_elem_group *) + __get_free_page(GFP_ATOMIC); + if (!current_mcast_group) { + bnx2x_free_mcast_macs_list(mcast_group_list); + BNX2X_ERR("Failed to allocate mc MAC list\n"); + return -ENOMEM; + } + list_add(¤t_mcast_group->mcast_group_link, + mcast_group_list); + } + mc_mac = ¤t_mcast_group->mcast_elems[offset]; + mc_mac->mac = bnx2x_mc_addr(ha); + list_add_tail(&mc_mac->link, &p->mcast_list); + offset++; + if (offset == MCAST_ELEMS_PER_PG) + offset = 0; + } + p->mcast_list_len = mc_count; + return 0; } /** @@ -12647,6 +12668,7 @@ static int bnx2x_set_uc_list(struct bnx2x *bp) static int bnx2x_set_mc_list_e1x(struct bnx2x *bp) { + LIST_HEAD(mcast_group_list); struct net_device *dev = bp->dev; struct bnx2x_mcast_ramrod_params rparam = {NULL}; int rc = 0; @@ -12662,7 +12684,7 @@ static int bnx2x_set_mc_list_e1x(struct bnx2x *bp) /* then, configure a new MACs list */ if (netdev_mc_count(dev)) { - rc = bnx2x_init_mcast_macs_list(bp, &rparam); + rc = bnx2x_init_mcast_macs_list(bp, &rparam, &mcast_group_list); if (rc) return rc; @@ -12673,7 +12695,7 @@ static int bnx2x_set_mc_list_e1x(struct bnx2x *bp) BNX2X_ERR("Failed to set a new multicast configuration: %d\n", rc); - bnx2x_free_mcast_macs_list(&rparam); + bnx2x_free_mcast_macs_list(&mcast_group_list); } return rc; @@ -12681,6 +12703,7 @@ static int bnx2x_set_mc_list_e1x(struct bnx2x *bp) static int bnx2x_set_mc_list(struct bnx2x *bp) { + LIST_HEAD(mcast_group_list); struct bnx2x_mcast_ramrod_params rparam = {NULL}; struct net_device *dev = bp->dev; int rc = 0; @@ -12692,7 +12715,7 @@ static int bnx2x_set_mc_list(struct bnx2x *bp) rparam.mcast_obj = &bp->mcast_obj; if (netdev_mc_count(dev)) { - rc = bnx2x_init_mcast_macs_list(bp, &rparam); + rc = bnx2x_init_mcast_macs_list(bp, &rparam, &mcast_group_list); if (rc) return rc; @@ -12703,7 +12726,7 @@ static int bnx2x_set_mc_list(struct bnx2x *bp) BNX2X_ERR("Failed to set a new multicast configuration: %d\n", rc); - bnx2x_free_mcast_macs_list(&rparam); + bnx2x_free_mcast_macs_list(&mcast_group_list); } else { /* If no mc addresses are required, flush the configuration */ rc = bnx2x_config_mcast(bp, &rparam, BNX2X_MCAST_CMD_DEL); From 3129e1599c6edfafc2a0a8be9f2eb344c7feb920 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Thu, 22 Sep 2016 17:12:26 -0400 Subject: [PATCH 1540/1715] bnx2x: allocate mac filtering pending list in PAGE_SIZE increments Currently, we can have high order page allocations that specify GFP_ATOMIC when configuring multicast MAC address filters. For example, we have seen order 2 page allocation failures with ~500 multicast addresses configured. Convert the allocation for the pending list to be done in PAGE_SIZE increments. Signed-off-by: Jason Baron Cc: Yuval Mintz Cc: Ariel Elior Acked-by: Yuval Mintz Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/bnx2x/bnx2x_sp.c | 127 ++++++++++++------ 1 file changed, 88 insertions(+), 39 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c index d468380c2a23..4947a9cbf0c1 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c @@ -2606,8 +2606,23 @@ struct bnx2x_mcast_bin_elem { int type; /* BNX2X_MCAST_CMD_SET_{ADD, DEL} */ }; +union bnx2x_mcast_elem { + struct bnx2x_mcast_bin_elem bin_elem; + struct bnx2x_mcast_mac_elem mac_elem; +}; + +struct bnx2x_mcast_elem_group { + struct list_head mcast_group_link; + union bnx2x_mcast_elem mcast_elems[]; +}; + +#define MCAST_MAC_ELEMS_PER_PG \ + ((PAGE_SIZE - sizeof(struct bnx2x_mcast_elem_group)) / \ + sizeof(union bnx2x_mcast_elem)) + struct bnx2x_pending_mcast_cmd { struct list_head link; + struct list_head group_head; int type; /* BNX2X_MCAST_CMD_X */ union { struct list_head macs_head; @@ -2638,16 +2653,29 @@ static int bnx2x_mcast_wait(struct bnx2x *bp, return 0; } +static void bnx2x_free_groups(struct list_head *mcast_group_list) +{ + struct bnx2x_mcast_elem_group *current_mcast_group; + + while (!list_empty(mcast_group_list)) { + current_mcast_group = list_first_entry(mcast_group_list, + struct bnx2x_mcast_elem_group, + mcast_group_link); + list_del(¤t_mcast_group->mcast_group_link); + free_page((unsigned long)current_mcast_group); + } +} + static int bnx2x_mcast_enqueue_cmd(struct bnx2x *bp, struct bnx2x_mcast_obj *o, struct bnx2x_mcast_ramrod_params *p, enum bnx2x_mcast_cmd cmd) { - int total_sz; struct bnx2x_pending_mcast_cmd *new_cmd; - struct bnx2x_mcast_mac_elem *cur_mac = NULL; struct bnx2x_mcast_list_elem *pos; - int macs_list_len = 0, macs_list_len_size; + struct bnx2x_mcast_elem_group *elem_group; + struct bnx2x_mcast_mac_elem *mac_elem; + int total_elems = 0, macs_list_len = 0, offset = 0; /* When adding MACs we'll need to store their values */ if (cmd == BNX2X_MCAST_CMD_ADD || cmd == BNX2X_MCAST_CMD_SET) @@ -2657,50 +2685,61 @@ static int bnx2x_mcast_enqueue_cmd(struct bnx2x *bp, if (!p->mcast_list_len) return 0; - /* For a set command, we need to allocate sufficient memory for all - * the bins, since we can't analyze at this point how much memory would - * be required. - */ - macs_list_len_size = macs_list_len * - sizeof(struct bnx2x_mcast_mac_elem); - if (cmd == BNX2X_MCAST_CMD_SET) { - int bin_size = BNX2X_MCAST_BINS_NUM * - sizeof(struct bnx2x_mcast_bin_elem); - - if (bin_size > macs_list_len_size) - macs_list_len_size = bin_size; - } - total_sz = sizeof(*new_cmd) + macs_list_len_size; - /* Add mcast is called under spin_lock, thus calling with GFP_ATOMIC */ - new_cmd = kzalloc(total_sz, GFP_ATOMIC); - + new_cmd = kzalloc(sizeof(*new_cmd), GFP_ATOMIC); if (!new_cmd) return -ENOMEM; + INIT_LIST_HEAD(&new_cmd->data.macs_head); + INIT_LIST_HEAD(&new_cmd->group_head); + new_cmd->type = cmd; + new_cmd->done = false; + DP(BNX2X_MSG_SP, "About to enqueue a new %d command. macs_list_len=%d\n", cmd, macs_list_len); - INIT_LIST_HEAD(&new_cmd->data.macs_head); - - new_cmd->type = cmd; - new_cmd->done = false; - switch (cmd) { case BNX2X_MCAST_CMD_ADD: case BNX2X_MCAST_CMD_SET: - cur_mac = (struct bnx2x_mcast_mac_elem *) - ((u8 *)new_cmd + sizeof(*new_cmd)); - - /* Push the MACs of the current command into the pending command - * MACs list: FIFO + /* For a set command, we need to allocate sufficient memory for + * all the bins, since we can't analyze at this point how much + * memory would be required. */ - list_for_each_entry(pos, &p->mcast_list, link) { - memcpy(cur_mac->mac, pos->mac, ETH_ALEN); - list_add_tail(&cur_mac->link, &new_cmd->data.macs_head); - cur_mac++; + total_elems = macs_list_len; + if (cmd == BNX2X_MCAST_CMD_SET) { + if (total_elems < BNX2X_MCAST_BINS_NUM) + total_elems = BNX2X_MCAST_BINS_NUM; + } + while (total_elems > 0) { + elem_group = (struct bnx2x_mcast_elem_group *) + __get_free_page(GFP_ATOMIC | __GFP_ZERO); + if (!elem_group) { + kfree(new_cmd); + bnx2x_free_groups(&new_cmd->group_head); + return -ENOMEM; + } + total_elems -= MCAST_MAC_ELEMS_PER_PG; + list_add_tail(&elem_group->mcast_group_link, + &new_cmd->group_head); + } + elem_group = list_first_entry(&new_cmd->group_head, + struct bnx2x_mcast_elem_group, + mcast_group_link); + list_for_each_entry(pos, &p->mcast_list, link) { + mac_elem = &elem_group->mcast_elems[offset].mac_elem; + memcpy(mac_elem->mac, pos->mac, ETH_ALEN); + /* Push the MACs of the current command into the pending + * command MACs list: FIFO + */ + list_add_tail(&mac_elem->link, + &new_cmd->data.macs_head); + offset++; + if (offset == MCAST_MAC_ELEMS_PER_PG) { + offset = 0; + elem_group = list_next_entry(elem_group, + mcast_group_link); + } } - break; case BNX2X_MCAST_CMD_DEL: @@ -2978,7 +3017,8 @@ bnx2x_mcast_hdl_pending_set_e2_convert(struct bnx2x *bp, u64 cur[BNX2X_MCAST_VEC_SZ], req[BNX2X_MCAST_VEC_SZ]; struct bnx2x_mcast_mac_elem *pmac_pos, *pmac_pos_n; struct bnx2x_mcast_bin_elem *p_item; - int i, cnt = 0, mac_cnt = 0; + struct bnx2x_mcast_elem_group *elem_group; + int cnt = 0, mac_cnt = 0, offset = 0, i; memset(req, 0, sizeof(u64) * BNX2X_MCAST_VEC_SZ); memcpy(cur, o->registry.aprox_match.vec, @@ -3001,9 +3041,10 @@ bnx2x_mcast_hdl_pending_set_e2_convert(struct bnx2x *bp, * a list that will be used to configure bins. */ cmd_pos->set_convert = true; - p_item = (struct bnx2x_mcast_bin_elem *)(cmd_pos + 1); INIT_LIST_HEAD(&cmd_pos->data.macs_head); - + elem_group = list_first_entry(&cmd_pos->group_head, + struct bnx2x_mcast_elem_group, + mcast_group_link); for (i = 0; i < BNX2X_MCAST_BINS_NUM; i++) { bool b_current = !!BIT_VEC64_TEST_BIT(cur, i); bool b_required = !!BIT_VEC64_TEST_BIT(req, i); @@ -3011,12 +3052,18 @@ bnx2x_mcast_hdl_pending_set_e2_convert(struct bnx2x *bp, if (b_current == b_required) continue; + p_item = &elem_group->mcast_elems[offset].bin_elem; p_item->bin = i; p_item->type = b_required ? BNX2X_MCAST_CMD_SET_ADD : BNX2X_MCAST_CMD_SET_DEL; list_add_tail(&p_item->link , &cmd_pos->data.macs_head); - p_item++; cnt++; + offset++; + if (offset == MCAST_MAC_ELEMS_PER_PG) { + offset = 0; + elem_group = list_next_entry(elem_group, + mcast_group_link); + } } /* We now definitely know how many commands are hiding here. @@ -3103,6 +3150,7 @@ static inline int bnx2x_mcast_handle_pending_cmds_e2(struct bnx2x *bp, */ if (cmd_pos->done) { list_del(&cmd_pos->link); + bnx2x_free_groups(&cmd_pos->group_head); kfree(cmd_pos); } @@ -3741,6 +3789,7 @@ static inline int bnx2x_mcast_handle_pending_cmds_e1( } list_del(&cmd_pos->link); + bnx2x_free_groups(&cmd_pos->group_head); kfree(cmd_pos); return cnt; From a762bb8ecb899d1e058b51f3daba4e1ed42b3479 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Sat, 17 Sep 2016 09:06:29 -0700 Subject: [PATCH 1541/1715] wlcore: Prepare family to fix nvs file handling Move struct wilink_family_data to be available for all TI WLAN variants. And fix familiy typo, it should be just family. Looks like wl12xx use two different nvs.bin files and wl18xx uses a different conf.bin file. Signed-off-by: Tony Lindgren Signed-off-by: Kalle Valo --- drivers/net/wireless/ti/wlcore/spi.c | 12 ++++-------- drivers/net/wireless/ti/wlcore/wlcore_i.h | 7 +++++++ 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/ti/wlcore/spi.c b/drivers/net/wireless/ti/wlcore/spi.c index 0ed526e4c0df..a336493dab9b 100644 --- a/drivers/net/wireless/ti/wlcore/spi.c +++ b/drivers/net/wireless/ti/wlcore/spi.c @@ -80,17 +80,13 @@ ((SPI_AGGR_BUFFER_SIZE / WSPI_MAX_CHUNK_SIZE) + 1) -struct wilink_familiy_data { - char name[8]; -}; +static const struct wilink_family_data *wilink_data; -static const struct wilink_familiy_data *wilink_data; - -static const struct wilink_familiy_data wl18xx_data = { +static const struct wilink_family_data wl18xx_data = { .name = "wl18xx", }; -static const struct wilink_familiy_data wl12xx_data = { +static const struct wilink_family_data wl12xx_data = { .name = "wl12xx", }; @@ -461,7 +457,7 @@ static int wlcore_probe_of(struct spi_device *spi, struct wl12xx_spi_glue *glue, return -ENODEV; wilink_data = of_id->data; - dev_info(&spi->dev, "selected chip familiy is %s\n", + dev_info(&spi->dev, "selected chip family is %s\n", wilink_data->name); if (of_find_property(dt_node, "clock-xtal", NULL)) diff --git a/drivers/net/wireless/ti/wlcore/wlcore_i.h b/drivers/net/wireless/ti/wlcore/wlcore_i.h index 0277ae508b8a..f68280db6e5e 100644 --- a/drivers/net/wireless/ti/wlcore/wlcore_i.h +++ b/drivers/net/wireless/ti/wlcore/wlcore_i.h @@ -42,6 +42,12 @@ */ #define WL12XX_NVS_NAME "ti-connectivity/wl1271-nvs.bin" +struct wilink_family_data { + const char *name; + const char *nvs_name; /* wl12xx nvs file */ + const char *cfg_name; /* wl18xx cfg file */ +}; + #define WL1271_TX_SECURITY_LO16(s) ((u16)((s) & 0xffff)) #define WL1271_TX_SECURITY_HI32(s) ((u32)(((s) >> 16) & 0xffffffff)) #define WL1271_TX_SQN_POST_RECOVERY_PADDING 0xff @@ -208,6 +214,7 @@ struct wl1271_if_operations { struct wlcore_platdev_data { struct wl1271_if_operations *if_ops; + const struct wilink_family_data *family; bool ref_clock_xtal; /* specify whether the clock is XTAL or not */ u32 ref_clock_freq; /* in Hertz */ From d776fc86b82ffd7cfe9eb4182cc398cb8ab4199c Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Sat, 17 Sep 2016 09:06:30 -0700 Subject: [PATCH 1542/1715] wlcore: sdio: Populate config firmware data Configure the config firmware names and make it available in platform data. Signed-off-by: Tony Lindgren Signed-off-by: Kalle Valo --- drivers/net/wireless/ti/wlcore/sdio.c | 76 +++++++++++++++++---------- 1 file changed, 47 insertions(+), 29 deletions(-) diff --git a/drivers/net/wireless/ti/wlcore/sdio.c b/drivers/net/wireless/ti/wlcore/sdio.c index 5839acbbc782..a6e94b1a12cb 100644 --- a/drivers/net/wireless/ti/wlcore/sdio.c +++ b/drivers/net/wireless/ti/wlcore/sdio.c @@ -216,17 +216,33 @@ static struct wl1271_if_operations sdio_ops = { }; #ifdef CONFIG_OF + +static const struct wilink_family_data wl127x_data = { + .name = "wl127x", + .nvs_name = "ti-connectivity/wl127x-nvs.bin", +}; + +static const struct wilink_family_data wl128x_data = { + .name = "wl128x", + .nvs_name = "ti-connectivity/wl128x-nvs.bin", +}; + +static const struct wilink_family_data wl18xx_data = { + .name = "wl18xx", + .cfg_name = "ti-connectivity/wl18xx-conf.bin", +}; + static const struct of_device_id wlcore_sdio_of_match_table[] = { - { .compatible = "ti,wl1271" }, - { .compatible = "ti,wl1273" }, - { .compatible = "ti,wl1281" }, - { .compatible = "ti,wl1283" }, - { .compatible = "ti,wl1801" }, - { .compatible = "ti,wl1805" }, - { .compatible = "ti,wl1807" }, - { .compatible = "ti,wl1831" }, - { .compatible = "ti,wl1835" }, - { .compatible = "ti,wl1837" }, + { .compatible = "ti,wl1271", .data = &wl127x_data }, + { .compatible = "ti,wl1273", .data = &wl127x_data }, + { .compatible = "ti,wl1281", .data = &wl128x_data }, + { .compatible = "ti,wl1283", .data = &wl128x_data }, + { .compatible = "ti,wl1801", .data = &wl18xx_data }, + { .compatible = "ti,wl1805", .data = &wl18xx_data }, + { .compatible = "ti,wl1807", .data = &wl18xx_data }, + { .compatible = "ti,wl1831", .data = &wl18xx_data }, + { .compatible = "ti,wl1835", .data = &wl18xx_data }, + { .compatible = "ti,wl1837", .data = &wl18xx_data }, { } }; @@ -234,9 +250,13 @@ static int wlcore_probe_of(struct device *dev, int *irq, struct wlcore_platdev_data *pdev_data) { struct device_node *np = dev->of_node; + const struct of_device_id *of_id; - if (!np || !of_match_node(wlcore_sdio_of_match_table, np)) - return -ENODATA; + of_id = of_match_node(wlcore_sdio_of_match_table, np); + if (!of_id) + return -ENODEV; + + pdev_data->family = of_id->data; *irq = irq_of_parse_and_map(np, 0); if (!*irq) { @@ -263,7 +283,7 @@ static int wlcore_probe_of(struct device *dev, int *irq, static int wl1271_probe(struct sdio_func *func, const struct sdio_device_id *id) { - struct wlcore_platdev_data pdev_data; + struct wlcore_platdev_data *pdev_data; struct wl12xx_sdio_glue *glue; struct resource res[1]; mmc_pm_flag_t mmcflags; @@ -275,14 +295,15 @@ static int wl1271_probe(struct sdio_func *func, if (func->num != 0x02) return -ENODEV; - memset(&pdev_data, 0x00, sizeof(pdev_data)); - pdev_data.if_ops = &sdio_ops; + pdev_data = devm_kzalloc(&func->dev, sizeof(*pdev_data), GFP_KERNEL); + if (!pdev_data) + return -ENOMEM; - glue = kzalloc(sizeof(*glue), GFP_KERNEL); - if (!glue) { - dev_err(&func->dev, "can't allocate glue\n"); - goto out; - } + pdev_data->if_ops = &sdio_ops; + + glue = devm_kzalloc(&func->dev, sizeof(*glue), GFP_KERNEL); + if (!glue) + return -ENOMEM; glue->dev = &func->dev; @@ -292,16 +313,16 @@ static int wl1271_probe(struct sdio_func *func, /* Use block mode for transferring over one block size of data */ func->card->quirks |= MMC_QUIRK_BLKSZ_FOR_BYTE_MODE; - ret = wlcore_probe_of(&func->dev, &irq, &pdev_data); + ret = wlcore_probe_of(&func->dev, &irq, pdev_data); if (ret) - goto out_free_glue; + goto out; /* if sdio can keep power while host is suspended, enable wow */ mmcflags = sdio_get_host_pm_caps(func); dev_dbg(glue->dev, "sdio PM caps = 0x%x\n", mmcflags); if (mmcflags & MMC_PM_KEEP_POWER) - pdev_data.pwr_in_suspend = true; + pdev_data->pwr_in_suspend = true; sdio_set_drvdata(func, glue); @@ -323,7 +344,7 @@ static int wl1271_probe(struct sdio_func *func, if (!glue->core) { dev_err(glue->dev, "can't allocate platform_device"); ret = -ENOMEM; - goto out_free_glue; + goto out; } glue->core->dev.parent = &func->dev; @@ -341,8 +362,8 @@ static int wl1271_probe(struct sdio_func *func, goto out_dev_put; } - ret = platform_device_add_data(glue->core, &pdev_data, - sizeof(pdev_data)); + ret = platform_device_add_data(glue->core, pdev_data, + sizeof(*pdev_data)); if (ret) { dev_err(glue->dev, "can't add platform data\n"); goto out_dev_put; @@ -358,9 +379,6 @@ static int wl1271_probe(struct sdio_func *func, out_dev_put: platform_device_put(glue->core); -out_free_glue: - kfree(glue); - out: return ret; } From c815fdebef444d591f57a1e90dcfa0e2f8112e10 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Sat, 17 Sep 2016 09:06:31 -0700 Subject: [PATCH 1543/1715] wlcore: spi: Populate config firmware data Configure the config firmware names and make it available in platform data. Let's also fix the order of the struct wilink_family_data while at it. Signed-off-by: Tony Lindgren Signed-off-by: Kalle Valo --- drivers/net/wireless/ti/wlcore/spi.c | 42 ++++++++++++++++------------ 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/drivers/net/wireless/ti/wlcore/spi.c b/drivers/net/wireless/ti/wlcore/spi.c index a336493dab9b..f949ad2bd898 100644 --- a/drivers/net/wireless/ti/wlcore/spi.c +++ b/drivers/net/wireless/ti/wlcore/spi.c @@ -79,15 +79,19 @@ #define WSPI_MAX_NUM_OF_CHUNKS \ ((SPI_AGGR_BUFFER_SIZE / WSPI_MAX_CHUNK_SIZE) + 1) +static const struct wilink_family_data wl127x_data = { + .name = "wl127x", + .nvs_name = "ti-connectivity/wl127x-nvs.bin", +}; -static const struct wilink_family_data *wilink_data; +static const struct wilink_family_data wl128x_data = { + .name = "wl128x", + .nvs_name = "ti-connectivity/wl128x-nvs.bin", +}; static const struct wilink_family_data wl18xx_data = { .name = "wl18xx", -}; - -static const struct wilink_family_data wl12xx_data = { - .name = "wl12xx", + .cfg_name = "ti-connectivity/wl18xx-conf.bin", }; struct wl12xx_spi_glue { @@ -425,10 +429,10 @@ static struct wl1271_if_operations spi_ops = { }; static const struct of_device_id wlcore_spi_of_match_table[] = { - { .compatible = "ti,wl1271", .data = &wl12xx_data}, - { .compatible = "ti,wl1273", .data = &wl12xx_data}, - { .compatible = "ti,wl1281", .data = &wl12xx_data}, - { .compatible = "ti,wl1283", .data = &wl12xx_data}, + { .compatible = "ti,wl1271", .data = &wl127x_data}, + { .compatible = "ti,wl1273", .data = &wl127x_data}, + { .compatible = "ti,wl1281", .data = &wl128x_data}, + { .compatible = "ti,wl1283", .data = &wl128x_data}, { .compatible = "ti,wl1801", .data = &wl18xx_data}, { .compatible = "ti,wl1805", .data = &wl18xx_data}, { .compatible = "ti,wl1807", .data = &wl18xx_data}, @@ -456,9 +460,9 @@ static int wlcore_probe_of(struct spi_device *spi, struct wl12xx_spi_glue *glue, if (!of_id) return -ENODEV; - wilink_data = of_id->data; + pdev_data->family = of_id->data; dev_info(&spi->dev, "selected chip family is %s\n", - wilink_data->name); + pdev_data->family->name); if (of_find_property(dt_node, "clock-xtal", NULL)) pdev_data->ref_clock_xtal = true; @@ -475,13 +479,15 @@ static int wlcore_probe_of(struct spi_device *spi, struct wl12xx_spi_glue *glue, static int wl1271_probe(struct spi_device *spi) { struct wl12xx_spi_glue *glue; - struct wlcore_platdev_data pdev_data; + struct wlcore_platdev_data *pdev_data; struct resource res[1]; int ret; - memset(&pdev_data, 0x00, sizeof(pdev_data)); + pdev_data = devm_kzalloc(&spi->dev, sizeof(*pdev_data), GFP_KERNEL); + if (!pdev_data) + return -ENOMEM; - pdev_data.if_ops = &spi_ops; + pdev_data->if_ops = &spi_ops; glue = devm_kzalloc(&spi->dev, sizeof(*glue), GFP_KERNEL); if (!glue) { @@ -505,7 +511,7 @@ static int wl1271_probe(struct spi_device *spi) return PTR_ERR(glue->reg); } - ret = wlcore_probe_of(spi, glue, &pdev_data); + ret = wlcore_probe_of(spi, glue, pdev_data); if (ret) { dev_err(glue->dev, "can't get device tree parameters (%d)\n", ret); @@ -518,7 +524,7 @@ static int wl1271_probe(struct spi_device *spi) return ret; } - glue->core = platform_device_alloc(wilink_data->name, + glue->core = platform_device_alloc(pdev_data->family->name, PLATFORM_DEVID_AUTO); if (!glue->core) { dev_err(glue->dev, "can't allocate platform_device\n"); @@ -539,8 +545,8 @@ static int wl1271_probe(struct spi_device *spi) goto out_dev_put; } - ret = platform_device_add_data(glue->core, &pdev_data, - sizeof(pdev_data)); + ret = platform_device_add_data(glue->core, pdev_data, + sizeof(*pdev_data)); if (ret) { dev_err(glue->dev, "can't add platform data\n"); goto out_dev_put; From 3e1ac932682b1377ae1c9d6283f0b88e28fb227d Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Sat, 17 Sep 2016 09:06:32 -0700 Subject: [PATCH 1544/1715] wlcore: Fix config firmware loading issues Booting multiple wl12xx and wl18xx devices using the same rootfs is a pain. You currently have to symlink the right nvs file depending on the wl12xx type. For example, with wl1271-nvs.bin being a symlink to wl127x-nvs.bin by default and trying to bring up a wl128x based device: wlcore: ERROR nvs size is not as expected: 1113 != 912 wlcore: ERROR NVS file is needed during boot wlcore: ERROR NVS file is needed during boot wlcore: ERROR firmware boot failed despite 3 retries Note that wl18xx uses a separate config firmware wl18xx-conf.bin that can be generated with tools using the following two git repos: git.ti.com/wilink8-wlan/18xx-ti-utils git.ti.com/wilink8-wlan/wl18xx_fw So let's not configure the nvs file for wl18xx as it's not needed AFAIK. If it turns out that we also need the nvs file for wl18xx, we can just add it to the config firmware data for wl18xx. Let's fix the issue by using the chip specific config firmware data, and make sure we produce understandable warnings if something is missing. Signed-off-by: Tony Lindgren Signed-off-by: Kalle Valo --- drivers/net/wireless/ti/wlcore/boot.c | 15 +++++++--- drivers/net/wireless/ti/wlcore/main.c | 36 +++++++++++++++-------- drivers/net/wireless/ti/wlcore/wlcore_i.h | 7 ----- 3 files changed, 35 insertions(+), 23 deletions(-) diff --git a/drivers/net/wireless/ti/wlcore/boot.c b/drivers/net/wireless/ti/wlcore/boot.c index f75d30444117..f00509ea8aca 100644 --- a/drivers/net/wireless/ti/wlcore/boot.c +++ b/drivers/net/wireless/ti/wlcore/boot.c @@ -282,6 +282,9 @@ EXPORT_SYMBOL_GPL(wlcore_boot_upload_firmware); int wlcore_boot_upload_nvs(struct wl1271 *wl) { + struct platform_device *pdev = wl->pdev; + struct wlcore_platdev_data *pdev_data = dev_get_platdata(&pdev->dev); + const char *nvs_name = "unknown"; size_t nvs_len, burst_len; int i; u32 dest_addr, val; @@ -293,6 +296,9 @@ int wlcore_boot_upload_nvs(struct wl1271 *wl) return -ENODEV; } + if (pdev_data && pdev_data->family) + nvs_name = pdev_data->family->nvs_name; + if (wl->quirks & WLCORE_QUIRK_LEGACY_NVS) { struct wl1271_nvs_file *nvs = (struct wl1271_nvs_file *)wl->nvs; @@ -310,8 +316,9 @@ int wlcore_boot_upload_nvs(struct wl1271 *wl) if (wl->nvs_len != sizeof(struct wl1271_nvs_file) && (wl->nvs_len != WL1271_INI_LEGACY_NVS_FILE_SIZE || wl->enable_11a)) { - wl1271_error("nvs size is not as expected: %zu != %zu", - wl->nvs_len, sizeof(struct wl1271_nvs_file)); + wl1271_error("%s size is not as expected: %zu != %zu", + nvs_name, wl->nvs_len, + sizeof(struct wl1271_nvs_file)); kfree(wl->nvs); wl->nvs = NULL; wl->nvs_len = 0; @@ -328,8 +335,8 @@ int wlcore_boot_upload_nvs(struct wl1271 *wl) if (nvs->general_params.dual_mode_select) wl->enable_11a = true; } else { - wl1271_error("nvs size is not as expected: %zu != %zu", - wl->nvs_len, + wl1271_error("%s size is not as expected: %zu != %zu", + nvs_name, wl->nvs_len, sizeof(struct wl128x_nvs_file)); kfree(wl->nvs); wl->nvs = NULL; diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c index ef6c15b952cc..471521a0db7b 100644 --- a/drivers/net/wireless/ti/wlcore/main.c +++ b/drivers/net/wireless/ti/wlcore/main.c @@ -6413,9 +6413,12 @@ static void wlcore_nvs_cb(const struct firmware *fw, void *context) goto out; } wl->nvs_len = fw->size; - } else { + } else if (pdev_data->family->nvs_name) { wl1271_debug(DEBUG_BOOT, "Could not get nvs file %s", - WL12XX_NVS_NAME); + pdev_data->family->nvs_name); + wl->nvs = NULL; + wl->nvs_len = 0; + } else { wl->nvs = NULL; wl->nvs_len = 0; } @@ -6510,21 +6513,29 @@ out: int wlcore_probe(struct wl1271 *wl, struct platform_device *pdev) { - int ret; + struct wlcore_platdev_data *pdev_data = dev_get_platdata(&pdev->dev); + const char *nvs_name; + int ret = 0; - if (!wl->ops || !wl->ptable) + if (!wl->ops || !wl->ptable || !pdev_data) return -EINVAL; wl->dev = &pdev->dev; wl->pdev = pdev; platform_set_drvdata(pdev, wl); - ret = request_firmware_nowait(THIS_MODULE, FW_ACTION_HOTPLUG, - WL12XX_NVS_NAME, &pdev->dev, GFP_KERNEL, - wl, wlcore_nvs_cb); - if (ret < 0) { - wl1271_error("request_firmware_nowait failed: %d", ret); - complete_all(&wl->nvs_loading_complete); + if (pdev_data->family && pdev_data->family->nvs_name) { + nvs_name = pdev_data->family->nvs_name; + ret = request_firmware_nowait(THIS_MODULE, FW_ACTION_HOTPLUG, + nvs_name, &pdev->dev, GFP_KERNEL, + wl, wlcore_nvs_cb); + if (ret < 0) { + wl1271_error("request_firmware_nowait failed for %s: %d", + nvs_name, ret); + complete_all(&wl->nvs_loading_complete); + } + } else { + wlcore_nvs_cb(NULL, wl); } return ret; @@ -6533,9 +6544,11 @@ EXPORT_SYMBOL_GPL(wlcore_probe); int wlcore_remove(struct platform_device *pdev) { + struct wlcore_platdev_data *pdev_data = dev_get_platdata(&pdev->dev); struct wl1271 *wl = platform_get_drvdata(pdev); - wait_for_completion(&wl->nvs_loading_complete); + if (pdev_data->family && pdev_data->family->nvs_name) + wait_for_completion(&wl->nvs_loading_complete); if (!wl->initialized) return 0; @@ -6572,4 +6585,3 @@ MODULE_PARM_DESC(no_recovery, "Prevent HW recovery. FW will remain stuck."); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Luciano Coelho "); MODULE_AUTHOR("Juuso Oikarinen "); -MODULE_FIRMWARE(WL12XX_NVS_NAME); diff --git a/drivers/net/wireless/ti/wlcore/wlcore_i.h b/drivers/net/wireless/ti/wlcore/wlcore_i.h index f68280db6e5e..e840985385fc 100644 --- a/drivers/net/wireless/ti/wlcore/wlcore_i.h +++ b/drivers/net/wireless/ti/wlcore/wlcore_i.h @@ -35,13 +35,6 @@ #include "conf.h" #include "ini.h" -/* - * wl127x and wl128x are using the same NVS file name. However, the - * ini parameters between them are different. The driver validates - * the correct NVS size in wl1271_boot_upload_nvs(). - */ -#define WL12XX_NVS_NAME "ti-connectivity/wl1271-nvs.bin" - struct wilink_family_data { const char *name; const char *nvs_name; /* wl12xx nvs file */ From 33e40d025ed82fcab102035db38a727c914399a4 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Sat, 17 Sep 2016 09:06:33 -0700 Subject: [PATCH 1545/1715] wlcore: wl18xx: Use chip specific configuration firmware Use the wl18xx specific config firmware we now have available. Signed-off-by: Tony Lindgren Signed-off-by: Kalle Valo --- drivers/net/wireless/ti/wl18xx/main.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/ti/wl18xx/main.c b/drivers/net/wireless/ti/wl18xx/main.c index 00a04dfc03d1..06d6943b257c 100644 --- a/drivers/net/wireless/ti/wl18xx/main.c +++ b/drivers/net/wireless/ti/wl18xx/main.c @@ -1397,25 +1397,24 @@ out: return ret; } -#define WL18XX_CONF_FILE_NAME "ti-connectivity/wl18xx-conf.bin" - static int wl18xx_load_conf_file(struct device *dev, struct wlcore_conf *conf, - struct wl18xx_priv_conf *priv_conf) + struct wl18xx_priv_conf *priv_conf, + const char *file) { struct wlcore_conf_file *conf_file; const struct firmware *fw; int ret; - ret = request_firmware(&fw, WL18XX_CONF_FILE_NAME, dev); + ret = request_firmware(&fw, file, dev); if (ret < 0) { wl1271_error("could not get configuration binary %s: %d", - WL18XX_CONF_FILE_NAME, ret); + file, ret); return ret; } if (fw->size != WL18XX_CONF_SIZE) { - wl1271_error("configuration binary file size is wrong, expected %zu got %zu", - WL18XX_CONF_SIZE, fw->size); + wl1271_error("%s configuration binary size is wrong, expected %zu got %zu", + file, WL18XX_CONF_SIZE, fw->size); ret = -EINVAL; goto out_release; } @@ -1448,9 +1447,12 @@ out_release: static int wl18xx_conf_init(struct wl1271 *wl, struct device *dev) { + struct platform_device *pdev = wl->pdev; + struct wlcore_platdev_data *pdata = dev_get_platdata(&pdev->dev); struct wl18xx_priv *priv = wl->priv; - if (wl18xx_load_conf_file(dev, &wl->conf, &priv->conf) < 0) { + if (wl18xx_load_conf_file(dev, &wl->conf, &priv->conf, + pdata->family->cfg_name) < 0) { wl1271_warning("falling back to default config"); /* apply driver default configuration */ @@ -2141,4 +2143,3 @@ MODULE_PARM_DESC(num_rx_desc_param, MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Luciano Coelho "); MODULE_FIRMWARE(WL18XX_FW_NAME); -MODULE_FIRMWARE(WL18XX_CONF_FILE_NAME); From 89951db2be53106edbe0d24b3b5f9a787326daf6 Mon Sep 17 00:00:00 2001 From: Ganapathi Bhat Date: Tue, 20 Sep 2016 18:46:23 +0530 Subject: [PATCH 1546/1715] mwifiex: cfg80211 set_default_mgmt_key handler Previously device used to start using IGTK key as Tx key as soon as it gets downloaded in add_key(). This patch implements set_default_mgmt_key handler. We will update Tx key ID in set_default_mgmt_key(). Signed-off-by: Ganapathi Bhat Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- .../net/wireless/marvell/mwifiex/cfg80211.c | 24 +++++++++++++++++++ drivers/net/wireless/marvell/mwifiex/fw.h | 1 + drivers/net/wireless/marvell/mwifiex/ioctl.h | 1 + .../net/wireless/marvell/mwifiex/sta_cmd.c | 5 ++++ 4 files changed, 31 insertions(+) diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index c7f2faa81921..39ce76ad00bc 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -483,6 +483,29 @@ mwifiex_cfg80211_add_key(struct wiphy *wiphy, struct net_device *netdev, return 0; } +/* + * CFG802.11 operation handler to set default mgmt key. + */ +static int +mwifiex_cfg80211_set_default_mgmt_key(struct wiphy *wiphy, + struct net_device *netdev, + u8 key_index) +{ + struct mwifiex_private *priv = mwifiex_netdev_get_priv(netdev); + struct mwifiex_ds_encrypt_key encrypt_key; + + wiphy_dbg(wiphy, "set default mgmt key, key index=%d\n", key_index); + + memset(&encrypt_key, 0, sizeof(struct mwifiex_ds_encrypt_key)); + encrypt_key.key_len = WLAN_KEY_LEN_CCMP; + encrypt_key.key_index = key_index; + encrypt_key.is_igtk_def_key = true; + eth_broadcast_addr(encrypt_key.mac_addr); + + return mwifiex_send_cmd(priv, HostCmd_CMD_802_11_KEY_MATERIAL, + HostCmd_ACT_GEN_SET, true, &encrypt_key, true); +} + /* * This function sends domain information to the firmware. * @@ -4082,6 +4105,7 @@ static struct cfg80211_ops mwifiex_cfg80211_ops = { .leave_ibss = mwifiex_cfg80211_leave_ibss, .add_key = mwifiex_cfg80211_add_key, .del_key = mwifiex_cfg80211_del_key, + .set_default_mgmt_key = mwifiex_cfg80211_set_default_mgmt_key, .mgmt_tx = mwifiex_cfg80211_mgmt_tx, .mgmt_frame_register = mwifiex_cfg80211_mgmt_frame_register, .remain_on_channel = mwifiex_cfg80211_remain_on_channel, diff --git a/drivers/net/wireless/marvell/mwifiex/fw.h b/drivers/net/wireless/marvell/mwifiex/fw.h index 18aa5256e88b..4b1894b4757f 100644 --- a/drivers/net/wireless/marvell/mwifiex/fw.h +++ b/drivers/net/wireless/marvell/mwifiex/fw.h @@ -78,6 +78,7 @@ enum KEY_TYPE_ID { KEY_TYPE_ID_AES, KEY_TYPE_ID_WAPI, KEY_TYPE_ID_AES_CMAC, + KEY_TYPE_ID_AES_CMAC_DEF, }; #define WPA_PN_SIZE 8 diff --git a/drivers/net/wireless/marvell/mwifiex/ioctl.h b/drivers/net/wireless/marvell/mwifiex/ioctl.h index 70429815ff53..536ab834b126 100644 --- a/drivers/net/wireless/marvell/mwifiex/ioctl.h +++ b/drivers/net/wireless/marvell/mwifiex/ioctl.h @@ -260,6 +260,7 @@ struct mwifiex_ds_encrypt_key { u8 is_igtk_key; u8 is_current_wep_key; u8 is_rx_seq_valid; + u8 is_igtk_def_key; }; struct mwifiex_power_cfg { diff --git a/drivers/net/wireless/marvell/mwifiex/sta_cmd.c b/drivers/net/wireless/marvell/mwifiex/sta_cmd.c index 49048b41fd36..2a162c33d271 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_cmd.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_cmd.c @@ -598,6 +598,11 @@ static int mwifiex_set_aes_key_v2(struct mwifiex_private *priv, memcpy(km->key_param_set.key_params.cmac_aes.key, enc_key->key_material, enc_key->key_len); len += sizeof(struct mwifiex_cmac_aes_param); + } else if (enc_key->is_igtk_def_key) { + mwifiex_dbg(adapter, INFO, + "%s: Set CMAC default Key index\n", __func__); + km->key_param_set.key_type = KEY_TYPE_ID_AES_CMAC_DEF; + km->key_param_set.key_idx = enc_key->key_index & KEY_INDEX_MASK; } else { mwifiex_dbg(adapter, INFO, "%s: Set AES Key\n", __func__); From b3589dfe02123a0d0ea82076a9f8ef84a46852c0 Mon Sep 17 00:00:00 2001 From: Hante Meuleman Date: Mon, 19 Sep 2016 12:09:51 +0100 Subject: [PATCH 1547/1715] brcmfmac: ignore 11d configuration errors 802.11d is not always supported by firmware anymore. Currently the AP configuration of 11d will cause an abort if the ioctl set is failing. This behavior is not correct and the error should be ignored. Reviewed-by: Arend Van Spriel Reviewed-by: Franky Lin Reviewed-by: Pieter-Paul Giesberts Signed-off-by: Hante Meuleman Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- .../broadcom/brcm80211/brcmfmac/cfg80211.c | 27 ++++++++++--------- 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index 748eaa68cf07..0f2667e95e81 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -4502,6 +4502,7 @@ brcmf_cfg80211_start_ap(struct wiphy *wiphy, struct net_device *ndev, u16 chanspec = chandef_to_chanspec(&cfg->d11inf, &settings->chandef); bool mbss; int is_11d; + bool supports_11d; brcmf_dbg(TRACE, "ctrlchn=%d, center=%d, bw=%d, beacon_interval=%d, dtim_period=%d,\n", settings->chandef.chan->hw_value, @@ -4514,11 +4515,16 @@ brcmf_cfg80211_start_ap(struct wiphy *wiphy, struct net_device *ndev, mbss = ifp->vif->mbss; /* store current 11d setting */ - brcmf_fil_cmd_int_get(ifp, BRCMF_C_GET_REGULATORY, &ifp->vif->is_11d); - country_ie = brcmf_parse_tlvs((u8 *)settings->beacon.tail, - settings->beacon.tail_len, - WLAN_EID_COUNTRY); - is_11d = country_ie ? 1 : 0; + if (brcmf_fil_cmd_int_get(ifp, BRCMF_C_GET_REGULATORY, + &ifp->vif->is_11d)) { + supports_11d = false; + } else { + country_ie = brcmf_parse_tlvs((u8 *)settings->beacon.tail, + settings->beacon.tail_len, + WLAN_EID_COUNTRY); + is_11d = country_ie ? 1 : 0; + supports_11d = true; + } memset(&ssid_le, 0, sizeof(ssid_le)); if (settings->ssid == NULL || settings->ssid_len == 0) { @@ -4577,7 +4583,7 @@ brcmf_cfg80211_start_ap(struct wiphy *wiphy, struct net_device *ndev, /* Parameters shared by all radio interfaces */ if (!mbss) { - if (is_11d != ifp->vif->is_11d) { + if ((supports_11d) && (is_11d != ifp->vif->is_11d)) { err = brcmf_fil_cmd_int_set(ifp, BRCMF_C_SET_REGULATORY, is_11d); if (err < 0) { @@ -4619,7 +4625,7 @@ brcmf_cfg80211_start_ap(struct wiphy *wiphy, struct net_device *ndev, brcmf_err("SET INFRA error %d\n", err); goto exit; } - } else if (WARN_ON(is_11d != ifp->vif->is_11d)) { + } else if (WARN_ON(supports_11d && (is_11d != ifp->vif->is_11d))) { /* Multiple-BSS should use same 11d configuration */ err = -EINVAL; goto exit; @@ -4753,11 +4759,8 @@ static int brcmf_cfg80211_stop_ap(struct wiphy *wiphy, struct net_device *ndev) brcmf_err("setting INFRA mode failed %d\n", err); if (brcmf_feat_is_enabled(ifp, BRCMF_FEAT_MBSS)) brcmf_fil_iovar_int_set(ifp, "mbss", 0); - err = brcmf_fil_cmd_int_set(ifp, BRCMF_C_SET_REGULATORY, - ifp->vif->is_11d); - if (err < 0) - brcmf_err("restoring REGULATORY setting failed %d\n", - err); + brcmf_fil_cmd_int_set(ifp, BRCMF_C_SET_REGULATORY, + ifp->vif->is_11d); /* Bring device back up so it can be used again */ err = brcmf_fil_cmd_int_set(ifp, BRCMF_C_UP, 1); if (err < 0) From 704d1c6b56f4ee2ad6a5f012a72a278d17c1a223 Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Mon, 19 Sep 2016 12:09:52 +0100 Subject: [PATCH 1548/1715] brcmfmac: rework pointer trickery in brcmf_proto_bcdc_query_dcmd() The variable info is assigned to point to bcdc->msg[1], which is the same as pointing to bcdc->buf. As that is what we want to access make it clear by fixing the assignment. This also avoid out-of-bounds errors from static analyzers are bcdc->msg[1] is not in the structure definition. Reviewed-by: Hante Meuleman Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Franky Lin Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcdc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcdc.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcdc.c index d1bc51f92686..038a960c5104 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcdc.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcdc.c @@ -194,7 +194,7 @@ retry: } /* Check info buffer */ - info = (void *)&msg[1]; + info = (void *)&bcdc->buf[0]; /* Copy info buffer */ if (buf) { From bc981641360183990de59da17f9f560f9150b801 Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Mon, 19 Sep 2016 12:09:53 +0100 Subject: [PATCH 1549/1715] brcmfmac: fix memory leak in brcmf_flowring_add_tdls_peer() In the error paths in brcmf_flowring_add_tdls_peer() the allocated resource should be freed. Reviewed-by: Hante Meuleman Reviewed-by: Pieter-Paul Giesberts Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- .../net/wireless/broadcom/brcm80211/brcmfmac/flowring.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/flowring.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/flowring.c index 7e269f9aa607..b16b367b0569 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/flowring.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/flowring.c @@ -495,14 +495,18 @@ void brcmf_flowring_add_tdls_peer(struct brcmf_flowring *flow, int ifidx, } else { search = flow->tdls_entry; if (memcmp(search->mac, peer, ETH_ALEN) == 0) - return; + goto free_entry; while (search->next) { search = search->next; if (memcmp(search->mac, peer, ETH_ALEN) == 0) - return; + goto free_entry; } search->next = tdls_entry; } flow->tdls_active = true; + return; + +free_entry: + kfree(tdls_entry); } From 26305d3d7298d1ddf8fd4ce95a382aa90534f0a3 Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Mon, 19 Sep 2016 12:09:54 +0100 Subject: [PATCH 1550/1715] brcmfmac: initialize variable in brcmf_sdiod_regrl() In case of an error the variable returned is uninitialized. The caller will probably check the error code before using it, but better assure it is set to zero. Reviewed-by: Hante Meuleman Reviewed-by: Pieter-Paul Giesberts Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c index 03404cbe9237..72139b579b18 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c @@ -420,7 +420,7 @@ u8 brcmf_sdiod_regrb(struct brcmf_sdio_dev *sdiodev, u32 addr, int *ret) u32 brcmf_sdiod_regrl(struct brcmf_sdio_dev *sdiodev, u32 addr, int *ret) { - u32 data; + u32 data = 0; int retval; brcmf_dbg(SDIO, "addr:0x%08x\n", addr); From 8fa5fdec09cd379c9ecb8972f344f8f308e0ccf3 Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Mon, 19 Sep 2016 12:09:55 +0100 Subject: [PATCH 1551/1715] brcmfmac: remove worker from .ndo_set_mac_address() callback As it turns out there is no need to use a worker for the callback because it is not called from atomic context. Reported-by: Dan Williams Reviewed-by: Hante Meuleman Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Franky Lin Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- .../broadcom/brcm80211/brcmfmac/core.c | 39 +++++++------------ .../broadcom/brcm80211/brcmfmac/core.h | 2 - 2 files changed, 13 insertions(+), 28 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c index 65e8c8766441..3f6a58035077 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c @@ -136,27 +136,6 @@ static void _brcmf_set_multicast_list(struct work_struct *work) err); } -static void -_brcmf_set_mac_address(struct work_struct *work) -{ - struct brcmf_if *ifp; - s32 err; - - ifp = container_of(work, struct brcmf_if, setmacaddr_work); - - brcmf_dbg(TRACE, "Enter, bsscfgidx=%d\n", ifp->bsscfgidx); - - err = brcmf_fil_iovar_data_set(ifp, "cur_etheraddr", ifp->mac_addr, - ETH_ALEN); - if (err < 0) { - brcmf_err("Setting cur_etheraddr failed, %d\n", err); - } else { - brcmf_dbg(TRACE, "MAC address updated to %pM\n", - ifp->mac_addr); - memcpy(ifp->ndev->dev_addr, ifp->mac_addr, ETH_ALEN); - } -} - #if IS_ENABLED(CONFIG_IPV6) static void _brcmf_update_ndtable(struct work_struct *work) { @@ -190,10 +169,20 @@ static int brcmf_netdev_set_mac_address(struct net_device *ndev, void *addr) { struct brcmf_if *ifp = netdev_priv(ndev); struct sockaddr *sa = (struct sockaddr *)addr; + int err; - memcpy(&ifp->mac_addr, sa->sa_data, ETH_ALEN); - schedule_work(&ifp->setmacaddr_work); - return 0; + brcmf_dbg(TRACE, "Enter, bsscfgidx=%d\n", ifp->bsscfgidx); + + err = brcmf_fil_iovar_data_set(ifp, "cur_etheraddr", sa->sa_data, + ETH_ALEN); + if (err < 0) { + brcmf_err("Setting cur_etheraddr failed, %d\n", err); + } else { + brcmf_dbg(TRACE, "updated to %pM\n", sa->sa_data); + memcpy(ifp->mac_addr, sa->sa_data, ETH_ALEN); + memcpy(ifp->ndev->dev_addr, ifp->mac_addr, ETH_ALEN); + } + return err; } static void brcmf_netdev_set_multicast_list(struct net_device *ndev) @@ -525,7 +514,6 @@ int brcmf_net_attach(struct brcmf_if *ifp, bool rtnl_locked) /* set the mac address */ memcpy(ndev->dev_addr, ifp->mac_addr, ETH_ALEN); - INIT_WORK(&ifp->setmacaddr_work, _brcmf_set_mac_address); INIT_WORK(&ifp->multicast_work, _brcmf_set_multicast_list); INIT_WORK(&ifp->ndoffload_work, _brcmf_update_ndtable); @@ -730,7 +718,6 @@ static void brcmf_del_if(struct brcmf_pub *drvr, s32 bsscfgidx, } if (ifp->ndev->netdev_ops == &brcmf_netdev_ops_pri) { - cancel_work_sync(&ifp->setmacaddr_work); cancel_work_sync(&ifp->multicast_work); cancel_work_sync(&ifp->ndoffload_work); } diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h index 8fa34cad5a96..8a810bb24a11 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h @@ -176,7 +176,6 @@ enum brcmf_netif_stop_reason { * @vif: points to cfg80211 specific interface information. * @ndev: associated network device. * @stats: interface specific network statistics. - * @setmacaddr_work: worker object for setting mac address. * @multicast_work: worker object for multicast provisioning. * @ndoffload_work: worker object for neighbor discovery offload configuration. * @fws_desc: interface specific firmware-signalling descriptor. @@ -193,7 +192,6 @@ struct brcmf_if { struct brcmf_cfg80211_vif *vif; struct net_device *ndev; struct net_device_stats stats; - struct work_struct setmacaddr_work; struct work_struct multicast_work; struct work_struct ndoffload_work; struct brcmf_fws_mac_descriptor *fws_desc; From 835680b82f029818c813324aed3073cdcf63241f Mon Sep 17 00:00:00 2001 From: Hante Meuleman Date: Mon, 19 Sep 2016 12:09:56 +0100 Subject: [PATCH 1552/1715] brcmfmac: remove unnecessary null pointer check in the function brcmf_bus_start() in the exception handling a check is made to dermine whether ifp is null, though this is not possible. Removing the unnessary check. Reviewed-by: Arend Van Spriel Reviewed-by: Franky Lin Reviewed-by: Pieter-Paul Giesberts Signed-off-by: Hante Meuleman Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c index 3f6a58035077..9a05371453ce 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c @@ -1048,8 +1048,7 @@ fail: brcmf_fws_del_interface(ifp); brcmf_fws_deinit(drvr); } - if (ifp) - brcmf_net_detach(ifp->ndev, false); + brcmf_net_detach(ifp->ndev, false); if (p2p_ifp) brcmf_net_detach(p2p_ifp->ndev, false); drvr->iflist[0] = NULL; From 2b7425f3629b38c438f890c20c5faeca64b144ff Mon Sep 17 00:00:00 2001 From: Hante Meuleman Date: Mon, 19 Sep 2016 12:09:57 +0100 Subject: [PATCH 1553/1715] brcmfmac: fix clearing entry IPv6 address When IPv6 address is to be cleared there is a possible out of bound access. But also the clearing of the last entry and the adjustment of total number of stored IPv6 addresses is not updated. This patch fixes that bug. Bug was found using coverity. Reviewed-by: Arend Van Spriel Reviewed-by: Franky Lin Reviewed-by: Pieter-Paul Giesberts Signed-off-by: Hante Meuleman Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c index 9a05371453ce..7a65f9da048a 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c @@ -873,9 +873,12 @@ static int brcmf_inet6addr_changed(struct notifier_block *nb, } break; case NETDEV_DOWN: - if (i < NDOL_MAX_ENTRIES) - for (; i < ifp->ipv6addr_idx; i++) + if (i < NDOL_MAX_ENTRIES) { + for (; i < ifp->ipv6addr_idx - 1; i++) table[i] = table[i + 1]; + memset(&table[i], 0, sizeof(table[i])); + ifp->ipv6addr_idx--; + } break; default: break; From a7ed7828ecda0c2b5e0d7f55dedd4230afd4b583 Mon Sep 17 00:00:00 2001 From: Hante Meuleman Date: Mon, 19 Sep 2016 12:09:58 +0100 Subject: [PATCH 1554/1715] brcmfmac: fix out of bound access on clearing wowl wake indicator Clearing the wowl wakeindicator happens with a rather odd construction where the string "clear" is used to set the iovar wowl_wakeind. This was implemented incorrectly as it caused an out of bound access. Use an intermediate variable of correct length and copy string in that. Problem was found using coverity. Reviewed-by: Arend Van Spriel Reviewed-by: Franky Lin Reviewed-by: Pieter-Paul Giesberts Signed-off-by: Hante Meuleman Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index 0f2667e95e81..d97d6b153d6a 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -3703,6 +3703,7 @@ static void brcmf_configure_wowl(struct brcmf_cfg80211_info *cfg, struct cfg80211_wowlan *wowl) { u32 wowl_config; + struct brcmf_wowl_wakeind_le wowl_wakeind; u32 i; brcmf_dbg(TRACE, "Suspend, wowl config.\n"); @@ -3744,8 +3745,9 @@ static void brcmf_configure_wowl(struct brcmf_cfg80211_info *cfg, if (!test_bit(BRCMF_VIF_STATUS_CONNECTED, &ifp->vif->sme_state)) wowl_config |= BRCMF_WOWL_UNASSOC; - brcmf_fil_iovar_data_set(ifp, "wowl_wakeind", "clear", - sizeof(struct brcmf_wowl_wakeind_le)); + memcpy(&wowl_wakeind, "clear", 6); + brcmf_fil_iovar_data_set(ifp, "wowl_wakeind", &wowl_wakeind, + sizeof(wowl_wakeind)); brcmf_fil_iovar_int_set(ifp, "wowl", wowl_config); brcmf_fil_iovar_int_set(ifp, "wowl_activate", 1); brcmf_bus_wowl_config(cfg->pub->bus_if, true); From 92c313604711a0976def79dabb9e8da3cc2cc780 Mon Sep 17 00:00:00 2001 From: Hante Meuleman Date: Mon, 19 Sep 2016 12:09:59 +0100 Subject: [PATCH 1555/1715] brcmfmac: simplify mapping of auth type The 802.11 standard only has four valid auth type configurations of which our firmware only supports two, ie. Open System and Shared Key. Simplify the mapping falling back to automatic for other types specified by user-space. Reviewed-by: Arend Van Spriel Reviewed-by: Franky Lin Reviewed-by: Pieter-Paul Giesberts Signed-off-by: Hante Meuleman Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- .../net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index d97d6b153d6a..6aeb69cb29b6 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -1595,15 +1595,9 @@ static s32 brcmf_set_auth_type(struct net_device *ndev, val = 1; brcmf_dbg(CONN, "shared key\n"); break; - case NL80211_AUTHTYPE_AUTOMATIC: - val = 2; - brcmf_dbg(CONN, "automatic\n"); - break; - case NL80211_AUTHTYPE_NETWORK_EAP: - brcmf_dbg(CONN, "network eap\n"); default: val = 2; - brcmf_err("invalid auth type (%d)\n", sme->auth_type); + brcmf_dbg(CONN, "automatic, auth type (%d)\n", sme->auth_type); break; } From 1afac196c16753f93d482eedb9aeb802e740e67e Mon Sep 17 00:00:00 2001 From: Cathy Luo Date: Tue, 20 Sep 2016 20:49:02 +0530 Subject: [PATCH 1556/1715] mwifiex: fix kernel crash for USB chipsets Following crash issue is observed during TCP traffic stress test [ 2253.625439] NMI watchdog: BUG: soft lockup - CPU#3 stuck for 22s! [kworker/u17:1:5191] [ 2253.625520] Call Trace: [ 2253.625527] [] ? moal_spin_lock+0x30/0x30 [usb8xxx] [ 2253.625533] [] ? wlan_wmm_lists_empty+0xb/0xf0 [mlan] [ 2253.625537] [] mlan_main_process+0x1b3/0x720 [mlan] [ 2253.625540] [] woal_main_work_queue+0x45/0x80 [usb8xxx] [ 2253.625543] [] process_one_work+0x150/0x3f0 [ 2253.625545] [] worker_thread+0x121/0x520 [ 2253.625547] [] ? rescuer_thread+0x330/0x330 [ 2253.625549] [] kthread+0xd2/0xf0 [ 2253.625551] [] ? kthread_create_on_node+0x1c0/0x1c0 [ 2253.625553] [] ret_from_fork+0x7c/0xb0 [ 2253.625555] [] ? kthread_create_on_node+0x1c0/0x1c0 In mwifiex_usb_tx_complete(), we are updating port->block_status first and then freeing the skb attached to that URB. We may end up attaching new skb to URB in a corner case and same will be freed. This results in the kernel crash. The problem is solved by changing the sequence. Signed-off-by: Cathy Luo Signed-off-by: Shengzhen Li Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/usb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/usb.c b/drivers/net/wireless/marvell/mwifiex/usb.c index 8a20620fa119..e8283dc1f088 100644 --- a/drivers/net/wireless/marvell/mwifiex/usb.c +++ b/drivers/net/wireless/marvell/mwifiex/usb.c @@ -273,6 +273,8 @@ static void mwifiex_usb_tx_complete(struct urb *urb) } else { mwifiex_dbg(adapter, DATA, "%s: DATA\n", __func__); + mwifiex_write_data_complete(adapter, context->skb, 0, + urb->status ? -1 : 0); for (i = 0; i < MWIFIEX_TX_DATA_PORT; i++) { port = &card->port[i]; if (context->ep == port->tx_data_ep) { @@ -282,8 +284,6 @@ static void mwifiex_usb_tx_complete(struct urb *urb) } } adapter->data_sent = false; - mwifiex_write_data_complete(adapter, context->skb, 0, - urb->status ? -1 : 0); } if (card->mc_resync_flag) From 5476f8030d9a9f7082ba5a4d4f0a1bfbf6936800 Mon Sep 17 00:00:00 2001 From: Cathy Luo Date: Tue, 20 Sep 2016 20:49:03 +0530 Subject: [PATCH 1557/1715] mwifiex: fix race condition causing tx timeout It's been observed that in a corner case mwifiex_usb_tx_complete() gets called before we exit from mwifiex_usb_host_to_card() after submitting the urb. 'data_sent' flag remains set in this case. It blocks further Tx packets and triggers watchdog timeout. The problem is fixed by setting data_sent and port_block flag at correct place. Signed-off-by: Cathy Luo Signed-off-by: Shengzhen Li Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/usb.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/usb.c b/drivers/net/wireless/marvell/mwifiex/usb.c index e8283dc1f088..8fd51e42ea5e 100644 --- a/drivers/net/wireless/marvell/mwifiex/usb.c +++ b/drivers/net/wireless/marvell/mwifiex/usb.c @@ -897,6 +897,13 @@ static int mwifiex_usb_host_to_card(struct mwifiex_adapter *adapter, u8 ep, else atomic_inc(&port->tx_data_urb_pending); + if (ep != card->tx_cmd_ep && + atomic_read(&port->tx_data_urb_pending) == + MWIFIEX_TX_DATA_URB) { + port->block_status = true; + adapter->data_sent = mwifiex_usb_data_sent(adapter); + } + if (usb_submit_urb(tx_urb, GFP_ATOMIC)) { mwifiex_dbg(adapter, ERROR, "%s: usb_submit_urb failed\n", __func__); @@ -905,6 +912,7 @@ static int mwifiex_usb_host_to_card(struct mwifiex_adapter *adapter, u8 ep, } else { atomic_dec(&port->tx_data_urb_pending); port->block_status = false; + adapter->data_sent = false; if (port->tx_data_ix) port->tx_data_ix--; else @@ -916,9 +924,7 @@ static int mwifiex_usb_host_to_card(struct mwifiex_adapter *adapter, u8 ep, if (ep != card->tx_cmd_ep && atomic_read(&port->tx_data_urb_pending) == MWIFIEX_TX_DATA_URB) { - port->block_status = true; - ret = -ENOSR; - goto done; + return -ENOSR; } } From ac3b561721e946047eebbca73d8dcaee1cc9b302 Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Tue, 20 Sep 2016 20:49:04 +0530 Subject: [PATCH 1558/1715] mwifiex: code rearrangement in mwifiex_usb_host_to_card() This patch helps get rid of goto statement and improves readability. Signed-off-by: Amitkumar Karwar Signed-off-by: Cathy Luo Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/usb.c | 23 ++++++---------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/usb.c b/drivers/net/wireless/marvell/mwifiex/usb.c index 8fd51e42ea5e..73eb0846db21 100644 --- a/drivers/net/wireless/marvell/mwifiex/usb.c +++ b/drivers/net/wireless/marvell/mwifiex/usb.c @@ -841,7 +841,7 @@ static int mwifiex_usb_host_to_card(struct mwifiex_adapter *adapter, u8 ep, struct usb_tx_data_port *port = NULL; u8 *data = (u8 *)skb->data; struct urb *tx_urb; - int idx, ret; + int idx, ret = -EINPROGRESS; if (adapter->is_suspended) { mwifiex_dbg(adapter, ERROR, @@ -865,8 +865,9 @@ static int mwifiex_usb_host_to_card(struct mwifiex_adapter *adapter, u8 ep, if (atomic_read(&port->tx_data_urb_pending) >= MWIFIEX_TX_DATA_URB) { port->block_status = true; - ret = -EBUSY; - goto done; + adapter->data_sent = + mwifiex_usb_data_sent(adapter); + return -EBUSY; } if (port->tx_data_ix >= MWIFIEX_TX_DATA_URB) port->tx_data_ix = 0; @@ -902,6 +903,7 @@ static int mwifiex_usb_host_to_card(struct mwifiex_adapter *adapter, u8 ep, MWIFIEX_TX_DATA_URB) { port->block_status = true; adapter->data_sent = mwifiex_usb_data_sent(adapter); + ret = -ENOSR; } if (usb_submit_urb(tx_urb, GFP_ATOMIC)) { @@ -918,22 +920,9 @@ static int mwifiex_usb_host_to_card(struct mwifiex_adapter *adapter, u8 ep, else port->tx_data_ix = MWIFIEX_TX_DATA_URB; } - - return -1; - } else { - if (ep != card->tx_cmd_ep && - atomic_read(&port->tx_data_urb_pending) == - MWIFIEX_TX_DATA_URB) { - return -ENOSR; - } + ret = -1; } - return -EINPROGRESS; - -done: - if (ep != card->tx_cmd_ep) - adapter->data_sent = mwifiex_usb_data_sent(adapter); - return ret; } From 3a589fae4a2cff317c3cabd4f76578ffd2761808 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Tue, 20 Sep 2016 21:19:26 -0400 Subject: [PATCH 1559/1715] rtl8xxxu: Fix off by one error calculating pubq This was detected tracing the 8188eu driver, but doesn't seem to make any difference when using it. Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index ca92022f9d50..98fcd7ba6826 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -3869,7 +3869,7 @@ static void rtl8xxxu_init_queue_reserved_page(struct rtl8xxxu_priv *priv) val32 = (nq << RQPN_NPQ_SHIFT) | (eq << RQPN_EPQ_SHIFT); rtl8xxxu_write32(priv, REG_RQPN_NPQ, val32); - pubq = fops->total_page_num - hq - lq - nq; + pubq = fops->total_page_num - hq - lq - nq - 1; val32 = RQPN_LOAD; val32 |= (hq << RQPN_HI_PQ_SHIFT); From c0a99bbb1b7a11605a53f84f5c444be3ef25a8ab Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Tue, 20 Sep 2016 21:19:27 -0400 Subject: [PATCH 1560/1715] rtl8xxxu: Clean up llt_init() API Remove last_tx_page argument from the llt_init() function. The rtl8xxxu_fileops structure contains the correct TX_TOTAL_PAGE_NUM value for the device, and rtl8xxxu_auto_llt_table() doesn't need to know the value in the first place. Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h | 6 +++--- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 9 ++++++--- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h index 1f54b8928d3c..a10a57ca16db 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h @@ -1318,7 +1318,7 @@ struct rtl8xxxu_fileops { int (*power_on) (struct rtl8xxxu_priv *priv); void (*power_off) (struct rtl8xxxu_priv *priv); void (*reset_8051) (struct rtl8xxxu_priv *priv); - int (*llt_init) (struct rtl8xxxu_priv *priv, u8 last_tx_page); + int (*llt_init) (struct rtl8xxxu_priv *priv); void (*init_phy_bb) (struct rtl8xxxu_priv *priv); int (*init_phy_rf) (struct rtl8xxxu_priv *priv); void (*phy_init_antenna_selection) (struct rtl8xxxu_priv *priv); @@ -1400,14 +1400,14 @@ int rtl8xxxu_load_firmware(struct rtl8xxxu_priv *priv, char *fw_name); void rtl8xxxu_firmware_self_reset(struct rtl8xxxu_priv *priv); void rtl8xxxu_power_off(struct rtl8xxxu_priv *priv); void rtl8xxxu_reset_8051(struct rtl8xxxu_priv *priv); -int rtl8xxxu_auto_llt_table(struct rtl8xxxu_priv *priv, u8 last_tx_page); +int rtl8xxxu_auto_llt_table(struct rtl8xxxu_priv *priv); void rtl8xxxu_gen2_prepare_calibrate(struct rtl8xxxu_priv *priv, u8 start); int rtl8xxxu_flush_fifo(struct rtl8xxxu_priv *priv); int rtl8xxxu_gen2_h2c_cmd(struct rtl8xxxu_priv *priv, struct h2c_cmd *h2c, int len); int rtl8xxxu_active_to_lps(struct rtl8xxxu_priv *priv); void rtl8xxxu_disabled_to_emu(struct rtl8xxxu_priv *priv); -int rtl8xxxu_init_llt_table(struct rtl8xxxu_priv *priv, u8 last_tx_page); +int rtl8xxxu_init_llt_table(struct rtl8xxxu_priv *priv); void rtl8xxxu_gen1_phy_iq_calibrate(struct rtl8xxxu_priv *priv); void rtl8xxxu_gen1_init_phy_bb(struct rtl8xxxu_priv *priv); void rtl8xxxu_gen1_set_tx_power(struct rtl8xxxu_priv *priv, diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index 98fcd7ba6826..c628b908efc2 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -2472,10 +2472,13 @@ static int rtl8xxxu_llt_write(struct rtl8xxxu_priv *priv, u8 address, u8 data) return ret; } -int rtl8xxxu_init_llt_table(struct rtl8xxxu_priv *priv, u8 last_tx_page) +int rtl8xxxu_init_llt_table(struct rtl8xxxu_priv *priv) { int ret; int i; + u8 last_tx_page; + + last_tx_page = priv->fops->total_page_num; for (i = 0; i < last_tx_page; i++) { ret = rtl8xxxu_llt_write(priv, i, i + 1); @@ -2503,7 +2506,7 @@ exit: return ret; } -int rtl8xxxu_auto_llt_table(struct rtl8xxxu_priv *priv, u8 last_tx_page) +int rtl8xxxu_auto_llt_table(struct rtl8xxxu_priv *priv) { u32 val32; int ret = 0; @@ -3988,7 +3991,7 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) dev_dbg(dev, "%s: macpower %i\n", __func__, macpower); if (!macpower) { - ret = priv->fops->llt_init(priv, TX_TOTAL_PAGE_NUM); + ret = priv->fops->llt_init(priv); if (ret) { dev_warn(dev, "%s: LLT table init failed\n", __func__); goto exit; From 2fc5dd27bf9b75d83a7071d13cca044bc39748fb Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Tue, 20 Sep 2016 21:19:28 -0400 Subject: [PATCH 1561/1715] rtl8xxxu: Use a struct rtl8xxxu_fileops * in rtl8xxxu_init_device() This saves some 217, or about, derefences of priv->fops. Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- .../wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 37 ++++++++++--------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index c628b908efc2..71145ebb3564 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -3886,6 +3886,7 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) { struct rtl8xxxu_priv *priv = hw->priv; struct device *dev = &priv->udev->dev; + struct rtl8xxxu_fileops *fops = priv->fops; bool macpower; int ret; u8 val8; @@ -3904,7 +3905,7 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) else macpower = true; - ret = priv->fops->power_on(priv); + ret = fops->power_on(priv); if (ret < 0) { dev_warn(dev, "%s: Failed power on\n", __func__); goto exit; @@ -3921,7 +3922,7 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) /* * Set RX page boundary */ - rtl8xxxu_write16(priv, REG_TRXFF_BNDY + 2, priv->fops->trxff_boundary); + rtl8xxxu_write16(priv, REG_TRXFF_BNDY + 2, fops->trxff_boundary); ret = rtl8xxxu_download_firmware(priv); dev_dbg(dev, "%s: download_firmware %i\n", __func__, ret); @@ -3932,8 +3933,8 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) if (ret) goto exit; - if (priv->fops->phy_init_antenna_selection) - priv->fops->phy_init_antenna_selection(priv); + if (fops->phy_init_antenna_selection) + fops->phy_init_antenna_selection(priv); ret = rtl8xxxu_init_mac(priv); @@ -3946,7 +3947,7 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) if (ret) goto exit; - ret = priv->fops->init_phy_rf(priv); + ret = fops->init_phy_rf(priv); if (ret) goto exit; @@ -3971,7 +3972,7 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) /* * Set TX buffer boundary */ - val8 = priv->fops->total_page_num + 1; + val8 = fops->total_page_num + 1; rtl8xxxu_write8(priv, REG_TXPKTBUF_BCNQ_BDNY, val8); rtl8xxxu_write8(priv, REG_TXPKTBUF_MGQ_BDNY, val8); @@ -3984,14 +3985,14 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) * The vendor drivers set PBP for all devices, except 8192e. * There is no explanation for this in any of the sources. */ - val8 = (priv->fops->pbp_rx << PBP_PAGE_SIZE_RX_SHIFT) | - (priv->fops->pbp_tx << PBP_PAGE_SIZE_TX_SHIFT); + val8 = (fops->pbp_rx << PBP_PAGE_SIZE_RX_SHIFT) | + (fops->pbp_tx << PBP_PAGE_SIZE_TX_SHIFT); if (priv->rtl_chip != RTL8192E) rtl8xxxu_write8(priv, REG_PBP, val8); dev_dbg(dev, "%s: macpower %i\n", __func__, macpower); if (!macpower) { - ret = priv->fops->llt_init(priv); + ret = fops->llt_init(priv); if (ret) { dev_warn(dev, "%s: LLT table init failed\n", __func__); goto exit; @@ -4000,12 +4001,12 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) /* * Chip specific quirks */ - priv->fops->usb_quirks(priv); + fops->usb_quirks(priv); /* * Enable TX report and TX report timer for 8723bu/8188eu/... */ - if (priv->fops->has_tx_report) { + if (fops->has_tx_report) { val8 = rtl8xxxu_read8(priv, REG_TX_REPORT_CTRL); val8 |= TX_REPORT_CTRL_TIMER_ENABLE; rtl8xxxu_write8(priv, REG_TX_REPORT_CTRL, val8); @@ -4140,8 +4141,8 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) rtl8xxxu_write8(priv, REG_RSV_CTRL, val8); } - if (priv->fops->init_aggregation) - priv->fops->init_aggregation(priv); + if (fops->init_aggregation) + fops->init_aggregation(priv); /* * Enable CCK and OFDM block @@ -4158,7 +4159,7 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) /* * Start out with default power levels for channel 6, 20MHz */ - priv->fops->set_tx_power(priv, 1, false); + fops->set_tx_power(priv, 1, false); /* Let the 8051 take control of antenna setting */ if (priv->rtl_chip != RTL8192E) { @@ -4174,8 +4175,8 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) rtl8xxxu_write16(priv, REG_FAST_EDCA_CTRL, 0); - if (priv->fops->init_statistics) - priv->fops->init_statistics(priv); + if (fops->init_statistics) + fops->init_statistics(priv); if (priv->rtl_chip == RTL8192E) { /* @@ -4193,12 +4194,12 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) rtl8723a_phy_lc_calibrate(priv); - priv->fops->phy_iq_calibrate(priv); + fops->phy_iq_calibrate(priv); /* * This should enable thermal meter */ - if (priv->fops->gen2_thermal_meter) + if (fops->gen2_thermal_meter) rtl8xxxu_write_rfreg(priv, RF_A, RF6052_REG_T_METER_8723B, 0x37cf8); else From b42fbed6b8a5942e9f76ec8c7f9c9fd798a2d3af Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Tue, 20 Sep 2016 21:19:29 -0400 Subject: [PATCH 1562/1715] rtl8xxxu: Stop log spam from each successful interrupt As soon as debugging is turned on, the logs are filled with messages reporting the interrupt status. As this quantity is usually zero, this output is not needed. In fact, there will be a report if the status is not zero, thus the debug line in question could probably be deleted. Rather than taking that action, I have changed it to only be printed when the newly added RTL8XXXU_DEBUG_INTERRUPT bit is set in the debug mask. Signed-off-by: Larry Finger Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h | 1 + drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h index a10a57ca16db..1016628926d2 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h @@ -29,6 +29,7 @@ #define RTL8XXXU_DEBUG_H2C 0x800 #define RTL8XXXU_DEBUG_ACTION 0x1000 #define RTL8XXXU_DEBUG_EFUSE 0x2000 +#define RTL8XXXU_DEBUG_INTERRUPT 0x4000 #define RTW_USB_CONTROL_MSG_TIMEOUT 500 #define RTL8XXXU_MAX_REG_POLL 500 diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index 71145ebb3564..b2d7f6e69667 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -5375,7 +5375,8 @@ static void rtl8xxxu_int_complete(struct urb *urb) struct device *dev = &priv->udev->dev; int ret; - dev_dbg(dev, "%s: status %i\n", __func__, urb->status); + if (rtl8xxxu_debug & RTL8XXXU_DEBUG_INTERRUPT) + dev_dbg(dev, "%s: status %i\n", __func__, urb->status); if (urb->status == 0) { usb_anchor_urb(urb, &priv->int_anchor); ret = usb_submit_urb(urb, GFP_ATOMIC); From 23e9c128adb2038c27a424a5f91136e7fa3e0dc6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Wed, 21 Sep 2016 08:23:24 +0200 Subject: [PATCH 1563/1715] brcmfmac: fix memory leak in brcmf_fill_bss_param MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This function is called from get_station callback which means that every time user space was getting/dumping station(s) we were leaking 2 KiB. Signed-off-by: Rafał Miłecki Fixes: 1f0dc59a6de ("brcmfmac: rework .get_station() callback") Cc: stable@vger.kernel.org # 4.2+ Acked-by: Arend van Spriel Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index 6aeb69cb29b6..b777e1b2f87a 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -2527,7 +2527,7 @@ static void brcmf_fill_bss_param(struct brcmf_if *ifp, struct station_info *si) WL_BSS_INFO_MAX); if (err) { brcmf_err("Failed to get bss info (%d)\n", err); - return; + goto out_kfree; } si->filled |= BIT(NL80211_STA_INFO_BSS_PARAM); si->bss_param.beacon_interval = le16_to_cpu(buf->bss_le.beacon_period); @@ -2539,6 +2539,9 @@ static void brcmf_fill_bss_param(struct brcmf_if *ifp, struct station_info *si) si->bss_param.flags |= BSS_PARAM_FLAGS_SHORT_PREAMBLE; if (capability & WLAN_CAPABILITY_SHORT_SLOT_TIME) si->bss_param.flags |= BSS_PARAM_FLAGS_SHORT_SLOT_TIME; + +out_kfree: + kfree(buf); } static s32 From 2df86ad959c9d1cdbeb2f23a0801857731156692 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Fri, 23 Sep 2016 15:27:46 +0200 Subject: [PATCH 1564/1715] brcmfmac: drop unused fields from struct brcmf_pub MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit They seem to be there from the first day. We calculate these values but never use them. Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c | 3 --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h | 4 ---- drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c | 2 -- 3 files changed, 9 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c index 7a65f9da048a..17152805d96f 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c @@ -508,9 +508,6 @@ int brcmf_net_attach(struct brcmf_if *ifp, bool rtnl_locked) ndev->needed_headroom += drvr->hdrlen; ndev->ethtool_ops = &brcmf_ethtool_ops; - drvr->rxsz = ndev->mtu + ndev->hard_header_len + - drvr->hdrlen; - /* set the mac address */ memcpy(ndev->dev_addr, ifp->mac_addr, ETH_ALEN); diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h index 8a810bb24a11..c94dcab260d0 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h @@ -112,15 +112,11 @@ struct brcmf_pub { /* Internal brcmf items */ uint hdrlen; /* Total BRCMF header length (proto + bus) */ - uint rxsz; /* Rx buffer size bus module should use */ /* Dongle media info */ char fwver[BRCMF_DRIVER_FIRMWARE_VERSION_LEN]; u8 mac[ETH_ALEN]; /* MAC address obtained from dongle */ - /* Multicast data packets sent to dongle */ - unsigned long tx_multicast; - struct mac_address addresses[BRCMF_MAX_IFS]; struct brcmf_if *iflist[BRCMF_MAX_IFS]; diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c index 9f9024a7bd64..a190f535efc9 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c @@ -2104,8 +2104,6 @@ int brcmf_fws_process_skb(struct brcmf_if *ifp, struct sk_buff *skb) if ((skb->priority == 0) || (skb->priority > 7)) skb->priority = cfg80211_classify8021d(skb, NULL); - drvr->tx_multicast += !!multicast; - if (fws->avoid_queueing) { rc = brcmf_proto_txdata(drvr, ifp->ifidx, 0, skb); if (rc < 0) From 4b87e5af638b6056bd6c20b0954d09a5a58633be Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Mon, 12 Sep 2016 16:03:30 +0300 Subject: [PATCH 1565/1715] iwlwifi: remove support for fw older than -17 and -22 FW versions older than -17 for 3160 and 7260 and older than -22 for newer NICs are not supported anymore. Don't load these versions and remove code that handles them. Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-7000.c | 6 ++--- drivers/net/wireless/intel/iwlwifi/iwl-8000.c | 2 +- drivers/net/wireless/intel/iwlwifi/iwl-9000.c | 2 +- .../net/wireless/intel/iwlwifi/iwl-fw-file.h | 24 +---------------- .../wireless/intel/iwlwifi/mvm/debugfs-vif.c | 3 +-- .../wireless/intel/iwlwifi/mvm/fw-api-power.h | 21 +++++---------- .../wireless/intel/iwlwifi/mvm/fw-api-tx.h | 2 -- drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 10 +------ .../net/wireless/intel/iwlwifi/mvm/mac80211.c | 24 +++++++---------- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 2 -- drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 6 ++--- .../net/wireless/intel/iwlwifi/mvm/power.c | 15 ++++------- drivers/net/wireless/intel/iwlwifi/mvm/scan.c | 27 +++---------------- drivers/net/wireless/intel/iwlwifi/mvm/tt.c | 10 ++----- 14 files changed, 36 insertions(+), 118 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-7000.c b/drivers/net/wireless/intel/iwlwifi/iwl-7000.c index aa575fb9dea0..d4b73dedf89b 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-7000.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-7000.c @@ -77,9 +77,9 @@ #define IWL3168_UCODE_API_MAX 26 /* Lowest firmware API version supported */ -#define IWL7260_UCODE_API_MIN 16 -#define IWL7265_UCODE_API_MIN 16 -#define IWL7265D_UCODE_API_MIN 16 +#define IWL7260_UCODE_API_MIN 17 +#define IWL7265_UCODE_API_MIN 17 +#define IWL7265D_UCODE_API_MIN 17 #define IWL3168_UCODE_API_MIN 20 /* NVM versions */ diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-8000.c b/drivers/net/wireless/intel/iwlwifi/iwl-8000.c index 990cf2b17517..d02ca1491d16 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-8000.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-8000.c @@ -74,7 +74,7 @@ #define IWL8265_UCODE_API_MAX 26 /* Lowest firmware API version supported */ -#define IWL8000_UCODE_API_MIN 16 +#define IWL8000_UCODE_API_MIN 17 #define IWL8265_UCODE_API_MIN 20 /* NVM versions */ diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-9000.c b/drivers/net/wireless/intel/iwlwifi/iwl-9000.c index a2c7946c12c9..ff850410d897 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-9000.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-9000.c @@ -58,7 +58,7 @@ #define IWL9000_UCODE_API_MAX 26 /* Lowest firmware API version supported */ -#define IWL9000_UCODE_API_MIN 16 +#define IWL9000_UCODE_API_MIN 17 /* NVM versions */ #define IWL9000_NVM_VERSION 0x0a1d diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-fw-file.h b/drivers/net/wireless/intel/iwlwifi/iwl-fw-file.h index 94423f04320b..ceec5ca2b1ab 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-fw-file.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-fw-file.h @@ -199,8 +199,6 @@ struct iwl_ucode_capa { * @IWL_UCODE_TLV_FLAGS_NEWSCAN: new uCode scan behavior on hidden SSID, * treats good CRC threshold as a boolean * @IWL_UCODE_TLV_FLAGS_MFP: This uCode image supports MFP (802.11w). - * @IWL_UCODE_TLV_FLAGS_P2P: This uCode image supports P2P. - * @IWL_UCODE_TLV_FLAGS_DW_BC_TABLE: The SCD byte count table is in DWORDS * @IWL_UCODE_TLV_FLAGS_UAPSD_SUPPORT: This uCode image supports uAPSD * @IWL_UCODE_TLV_FLAGS_SHORT_BL: 16 entries of black list instead of 64 in scan * offload profile config command. @@ -210,36 +208,24 @@ struct iwl_ucode_capa { * from the probe request template. * @IWL_UCODE_TLV_FLAGS_NEW_NSOFFL_SMALL: new NS offload (small version) * @IWL_UCODE_TLV_FLAGS_NEW_NSOFFL_LARGE: new NS offload (large version) - * @IWL_UCODE_TLV_FLAGS_P2P_PM: P2P client supports PM as a stand alone MAC - * @IWL_UCODE_TLV_FLAGS_P2P_BSS_PS_DCM: support power save on BSS station and - * P2P client interfaces simultaneously if they are in different bindings. - * @IWL_UCODE_TLV_FLAGS_P2P_BSS_PS_SCM: support power save on BSS station and - * P2P client interfaces simultaneously if they are in same bindings. * @IWL_UCODE_TLV_FLAGS_UAPSD_SUPPORT: General support for uAPSD * @IWL_UCODE_TLV_FLAGS_P2P_PS_UAPSD: P2P client supports uAPSD power save * @IWL_UCODE_TLV_FLAGS_BCAST_FILTERING: uCode supports broadcast filtering. - * @IWL_UCODE_TLV_FLAGS_GO_UAPSD: AP/GO interfaces support uAPSD clients * @IWL_UCODE_TLV_FLAGS_EBS_SUPPORT: this uCode image supports EBS. */ enum iwl_ucode_tlv_flag { IWL_UCODE_TLV_FLAGS_PAN = BIT(0), IWL_UCODE_TLV_FLAGS_NEWSCAN = BIT(1), IWL_UCODE_TLV_FLAGS_MFP = BIT(2), - IWL_UCODE_TLV_FLAGS_P2P = BIT(3), - IWL_UCODE_TLV_FLAGS_DW_BC_TABLE = BIT(4), IWL_UCODE_TLV_FLAGS_SHORT_BL = BIT(7), IWL_UCODE_TLV_FLAGS_D3_6_IPV6_ADDRS = BIT(10), IWL_UCODE_TLV_FLAGS_NO_BASIC_SSID = BIT(12), IWL_UCODE_TLV_FLAGS_NEW_NSOFFL_SMALL = BIT(15), IWL_UCODE_TLV_FLAGS_NEW_NSOFFL_LARGE = BIT(16), - IWL_UCODE_TLV_FLAGS_P2P_PM = BIT(21), - IWL_UCODE_TLV_FLAGS_BSS_P2P_PS_DCM = BIT(22), - IWL_UCODE_TLV_FLAGS_BSS_P2P_PS_SCM = BIT(23), IWL_UCODE_TLV_FLAGS_UAPSD_SUPPORT = BIT(24), IWL_UCODE_TLV_FLAGS_EBS_SUPPORT = BIT(25), IWL_UCODE_TLV_FLAGS_P2P_PS_UAPSD = BIT(26), IWL_UCODE_TLV_FLAGS_BCAST_FILTERING = BIT(29), - IWL_UCODE_TLV_FLAGS_GO_UAPSD = BIT(30), }; typedef unsigned int __bitwise__ iwl_ucode_tlv_api_t; @@ -249,13 +235,8 @@ typedef unsigned int __bitwise__ iwl_ucode_tlv_api_t; * @IWL_UCODE_TLV_API_FRAGMENTED_SCAN: This ucode supports active dwell time * longer than the passive one, which is essential for fragmented scan. * @IWL_UCODE_TLV_API_WIFI_MCC_UPDATE: ucode supports MCC updates with source. - * @IWL_UCODE_TLV_API_WIDE_CMD_HDR: ucode supports wide command header * @IWL_UCODE_TLV_API_LQ_SS_PARAMS: Configure STBC/BFER via LQ CMD ss_params * @IWL_UCODE_TLV_API_NEW_VERSION: new versioning format - * @IWL_UCODE_TLV_API_EXT_SCAN_PRIORITY: scan APIs use 8-level priority - * instead of 3. - * @IWL_UCODE_TLV_API_TX_POWER_CHAIN: TX power API has larger command size - * (command version 3) that supports per-chain limits * @IWL_UCODE_TLV_API_SCAN_TSF_REPORT: Scan start time reported in scan * iteration complete notification, and the timestamp reported for RX * received during scan, are reported in TSF of the mac specified in the @@ -266,11 +247,8 @@ typedef unsigned int __bitwise__ iwl_ucode_tlv_api_t; enum iwl_ucode_tlv_api { IWL_UCODE_TLV_API_FRAGMENTED_SCAN = (__force iwl_ucode_tlv_api_t)8, IWL_UCODE_TLV_API_WIFI_MCC_UPDATE = (__force iwl_ucode_tlv_api_t)9, - IWL_UCODE_TLV_API_WIDE_CMD_HDR = (__force iwl_ucode_tlv_api_t)14, IWL_UCODE_TLV_API_LQ_SS_PARAMS = (__force iwl_ucode_tlv_api_t)18, - IWL_UCODE_TLV_API_NEW_VERSION = (__force iwl_ucode_tlv_api_t)20, - IWL_UCODE_TLV_API_EXT_SCAN_PRIORITY = (__force iwl_ucode_tlv_api_t)24, - IWL_UCODE_TLV_API_TX_POWER_CHAIN = (__force iwl_ucode_tlv_api_t)27, + IWL_UCODE_TLV_API_NEW_VERSION = (__force iwl_ucode_tlv_api_t)20, IWL_UCODE_TLV_API_SCAN_TSF_REPORT = (__force iwl_ucode_tlv_api_t)28, NUM_IWL_UCODE_TLV_API diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c index 8ff19210ef1e..2d6f44fbaf62 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c @@ -1578,8 +1578,7 @@ void iwl_mvm_vif_dbgfs_register(struct iwl_mvm *mvm, struct ieee80211_vif *vif) if (iwlmvm_mod_params.power_scheme != IWL_POWER_SCHEME_CAM && ((vif->type == NL80211_IFTYPE_STATION && !vif->p2p) || - (vif->type == NL80211_IFTYPE_STATION && vif->p2p && - mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_BSS_P2P_PS_DCM))) + (vif->type == NL80211_IFTYPE_STATION && vif->p2p))) MVM_DEBUGFS_ADD_FILE_VIF(pm_params, mvmvif->dbgfs_dir, S_IWUSR | S_IRUSR); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-power.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-power.h index 404b0de9e2dc..3fa43d1348a2 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-power.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-power.h @@ -313,35 +313,26 @@ enum iwl_dev_tx_power_cmd_mode { IWL_TX_POWER_MODE_SET_ACK = 3, }; /* TX_POWER_REDUCED_FLAGS_TYPE_API_E_VER_4 */; +#define IWL_NUM_CHAIN_LIMITS 2 +#define IWL_NUM_SUB_BANDS 5 + /** - * struct iwl_dev_tx_power_cmd_v2 - TX power reduction command + * struct iwl_dev_tx_power_cmd - TX power reduction command * @set_mode: see &enum iwl_dev_tx_power_cmd_mode * @mac_context_id: id of the mac ctx for which we are reducing TX power. * @pwr_restriction: TX power restriction in 1/8 dBms. * @dev_24: device TX power restriction in 1/8 dBms * @dev_52_low: device TX power restriction upper band - low * @dev_52_high: device TX power restriction upper band - high + * @per_chain_restriction: per chain restrictions */ -struct iwl_dev_tx_power_cmd_v2 { +struct iwl_dev_tx_power_cmd_v3 { __le32 set_mode; __le32 mac_context_id; __le16 pwr_restriction; __le16 dev_24; __le16 dev_52_low; __le16 dev_52_high; -} __packed; /* TX_REDUCED_POWER_API_S_VER_2 */ - -#define IWL_NUM_CHAIN_LIMITS 2 -#define IWL_NUM_SUB_BANDS 5 - -/** - * struct iwl_dev_tx_power_cmd - TX power reduction command - * @v2: version 2 of the command, embedded here for easier software handling - * @per_chain_restriction: per chain restrictions - */ -struct iwl_dev_tx_power_cmd_v3 { - /* v3 is just an extension of v2 - keep this here */ - struct iwl_dev_tx_power_cmd_v2 v2; __le16 per_chain_restriction[IWL_NUM_CHAIN_LIMITS][IWL_NUM_SUB_BANDS]; } __packed; /* TX_REDUCED_POWER_API_S_VER_3 */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h index 0055a960238b..57d29a1b7b8d 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h @@ -89,7 +89,6 @@ * @TX_CMD_FLG_MH_PAD: driver inserted 2 byte padding after MAC header. * Should be set for 26/30 length MAC headers * @TX_CMD_FLG_RESP_TO_DRV: zero this if the response should go only to FW - * @TX_CMD_FLG_CCMP_AGG: this frame uses CCMP for aggregation acceleration * @TX_CMD_FLG_TKIP_MIC_DONE: FW already performed TKIP MIC calculation * @TX_CMD_FLG_DUR: disable duration overwriting used in PS-Poll Assoc-id * @TX_CMD_FLG_FW_DROP: FW should mark frame to be dropped @@ -116,7 +115,6 @@ enum iwl_tx_flags { TX_CMD_FLG_KEEP_SEQ_CTL = BIT(18), TX_CMD_FLG_MH_PAD = BIT(20), TX_CMD_FLG_RESP_TO_DRV = BIT(21), - TX_CMD_FLG_CCMP_AGG = BIT(22), TX_CMD_FLG_TKIP_MIC_DONE = BIT(23), TX_CMD_FLG_DUR = BIT(25), TX_CMD_FLG_FW_DROP = BIT(26), diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 897412057d1f..872066317fa5 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -1046,19 +1046,11 @@ static int iwl_mvm_sar_init(struct iwl_mvm *mvm) { struct iwl_mvm_sar_table sar_table; struct iwl_dev_tx_power_cmd cmd = { - .v3.v2.set_mode = cpu_to_le32(IWL_TX_POWER_MODE_SET_CHAINS), + .v3.set_mode = cpu_to_le32(IWL_TX_POWER_MODE_SET_CHAINS), }; int ret, i, j, idx; int len = sizeof(cmd); - /* we can't do anything with the table if the FW doesn't support it */ - if (!fw_has_api(&mvm->fw->ucode_capa, - IWL_UCODE_TLV_API_TX_POWER_CHAIN)) { - IWL_DEBUG_RADIO(mvm, - "FW doesn't support per-chain TX power settings.\n"); - return 0; - } - if (!fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_TX_POWER_ACK)) len = sizeof(cmd.v3); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index a5ede8c9bea2..318efd814037 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -479,13 +479,11 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm) hw->wiphy->n_cipher_suites++; } - /* - * Enable 11w if advertised by firmware and software crypto - * is not enabled (as the firmware will interpret some mgmt - * packets, so enabling it with software crypto isn't safe) + /* Enable 11w if software crypto is not enabled (as the + * firmware will interpret some mgmt packets, so enabling it + * with software crypto isn't safe). */ - if (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_MFP && - !iwlwifi_mod_params.sw_crypto) { + if (!iwlwifi_mod_params.sw_crypto) { ieee80211_hw_set(hw, MFP_CAPABLE); mvm->ciphers[hw->wiphy->n_cipher_suites] = WLAN_CIPHER_SUITE_AES_CMAC; @@ -547,9 +545,7 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm) hw->wiphy->regulatory_flags |= REGULATORY_CUSTOM_REG | REGULATORY_DISABLE_BEACON_HINTS; - if (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_GO_UAPSD) - hw->wiphy->flags |= WIPHY_FLAG_AP_UAPSD; - + hw->wiphy->flags |= WIPHY_FLAG_AP_UAPSD; hw->wiphy->flags |= WIPHY_FLAG_HAS_CHANNEL_SWITCH; hw->wiphy->iface_combinations = iwl_mvm_iface_combinations; @@ -1273,20 +1269,18 @@ static int iwl_mvm_set_tx_power(struct iwl_mvm *mvm, struct ieee80211_vif *vif, s16 tx_power) { struct iwl_dev_tx_power_cmd cmd = { - .v3.v2.set_mode = cpu_to_le32(IWL_TX_POWER_MODE_SET_MAC), - .v3.v2.mac_context_id = + .v3.set_mode = cpu_to_le32(IWL_TX_POWER_MODE_SET_MAC), + .v3.mac_context_id = cpu_to_le32(iwl_mvm_vif_from_mac80211(vif)->id), - .v3.v2.pwr_restriction = cpu_to_le16(8 * tx_power), + .v3.pwr_restriction = cpu_to_le16(8 * tx_power), }; int len = sizeof(cmd); if (tx_power == IWL_DEFAULT_MAX_TX_POWER) - cmd.v3.v2.pwr_restriction = cpu_to_le16(IWL_DEV_MAX_TX_POWER); + cmd.v3.pwr_restriction = cpu_to_le16(IWL_DEV_MAX_TX_POWER); if (!fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_TX_POWER_ACK)) len = sizeof(cmd.v3); - if (!fw_has_api(&mvm->fw->ucode_capa, IWL_UCODE_TLV_API_TX_POWER_CHAIN)) - len = sizeof(cmd.v3.v2); return iwl_mvm_send_cmd_pdu(mvm, REDUCE_TX_POWER_CMD, 0, len, &cmd); } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index b7cfdcbcf95b..d17cbf603f7c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -1305,8 +1305,6 @@ static inline void iwl_mvm_set_tx_cmd_ccmp(struct ieee80211_tx_info *info, tx_cmd->sec_ctl = TX_CMD_SEC_CCM; memcpy(tx_cmd->key, keyconf->key, keyconf->keylen); - if (info->flags & IEEE80211_TX_CTL_AMPDU) - tx_cmd->tx_flags |= cpu_to_le32(TX_CMD_FLG_CCMP_AGG); } static inline void iwl_mvm_wait_for_async_handlers(struct iwl_mvm *mvm) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index 6c08aa3fd2df..05fe6dd1a2c8 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -653,11 +653,9 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg, /* the hardware splits the A-MSDU */ if (mvm->cfg->mq_rx_supported) trans_cfg.rx_buf_size = IWL_AMSDU_4K; - trans->wide_cmd_header = fw_has_api(&mvm->fw->ucode_capa, - IWL_UCODE_TLV_API_WIDE_CMD_HDR); - if (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_DW_BC_TABLE) - trans_cfg.bc_table_dword = true; + trans->wide_cmd_header = true; + trans_cfg.bc_table_dword = true; trans_cfg.command_groups = iwl_mvm_groups; trans_cfg.command_groups_size = ARRAY_SIZE(iwl_mvm_groups); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/power.c b/drivers/net/wireless/intel/iwlwifi/mvm/power.c index ff85865b1dda..af6d10c23e5a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/power.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/power.c @@ -694,8 +694,7 @@ static void iwl_mvm_power_set_pm(struct iwl_mvm *mvm, /* enable PM on p2p if p2p stand alone */ if (vifs->p2p_active && !vifs->bss_active && !vifs->ap_active) { - if (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_P2P_PM) - p2p_mvmvif->pm_enabled = true; + p2p_mvmvif->pm_enabled = true; return; } @@ -707,12 +706,10 @@ static void iwl_mvm_power_set_pm(struct iwl_mvm *mvm, ap_mvmvif->phy_ctxt->id); /* clients are not stand alone: enable PM if DCM */ - if (!(client_same_channel || ap_same_channel) && - (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_BSS_P2P_PS_DCM)) { + if (!(client_same_channel || ap_same_channel)) { if (vifs->bss_active) bss_mvmvif->pm_enabled = true; - if (vifs->p2p_active && - (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_P2P_PM)) + if (vifs->p2p_active) p2p_mvmvif->pm_enabled = true; return; } @@ -721,12 +718,10 @@ static void iwl_mvm_power_set_pm(struct iwl_mvm *mvm, * There is only one channel in the system and there are only * bss and p2p clients that share it */ - if (client_same_channel && !vifs->ap_active && - (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_BSS_P2P_PS_SCM)) { + if (client_same_channel && !vifs->ap_active) { /* share same channel*/ bss_mvmvif->pm_enabled = true; - if (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_P2P_PM) - p2p_mvmvif->pm_enabled = true; + p2p_mvmvif->pm_enabled = true; } } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c index 00b03fc5807b..f279fdd6eb44 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c @@ -739,22 +739,6 @@ iwl_mvm_build_scan_probe(struct iwl_mvm *mvm, struct ieee80211_vif *vif, params->preq.common_data.len = cpu_to_le16(ies->common_ie_len); } -static __le32 iwl_mvm_scan_priority(struct iwl_mvm *mvm, - enum iwl_scan_priority_ext prio) -{ - if (fw_has_api(&mvm->fw->ucode_capa, - IWL_UCODE_TLV_API_EXT_SCAN_PRIORITY)) - return cpu_to_le32(prio); - - if (prio <= IWL_SCAN_PRIORITY_EXT_2) - return cpu_to_le32(IWL_SCAN_PRIORITY_LOW); - - if (prio <= IWL_SCAN_PRIORITY_EXT_4) - return cpu_to_le32(IWL_SCAN_PRIORITY_MEDIUM); - - return cpu_to_le32(IWL_SCAN_PRIORITY_HIGH); -} - static void iwl_mvm_scan_lmac_dwell(struct iwl_mvm *mvm, struct iwl_scan_req_lmac *cmd, struct iwl_mvm_scan_params *params) @@ -765,7 +749,7 @@ static void iwl_mvm_scan_lmac_dwell(struct iwl_mvm *mvm, cmd->extended_dwell = scan_timing[params->type].dwell_extended; cmd->max_out_time = cpu_to_le32(scan_timing[params->type].max_out_time); cmd->suspend_time = cpu_to_le32(scan_timing[params->type].suspend_time); - cmd->scan_prio = iwl_mvm_scan_priority(mvm, IWL_SCAN_PRIORITY_EXT_6); + cmd->scan_prio = cpu_to_le32(IWL_SCAN_PRIORITY_EXT_6); } static inline bool iwl_mvm_scan_fits(struct iwl_mvm *mvm, int n_ssids, @@ -1067,15 +1051,12 @@ static void iwl_mvm_scan_umac_dwell(struct iwl_mvm *mvm, cmd->fragmented_dwell = scan_timing[params->type].dwell_fragmented; cmd->max_out_time = cpu_to_le32(scan_timing[params->type].max_out_time); cmd->suspend_time = cpu_to_le32(scan_timing[params->type].suspend_time); - cmd->scan_priority = - iwl_mvm_scan_priority(mvm, IWL_SCAN_PRIORITY_EXT_6); + cmd->scan_priority = cpu_to_le32(IWL_SCAN_PRIORITY_EXT_6); if (iwl_mvm_is_regular_scan(params)) - cmd->ooc_priority = - iwl_mvm_scan_priority(mvm, IWL_SCAN_PRIORITY_EXT_6); + cmd->ooc_priority = cpu_to_le32(IWL_SCAN_PRIORITY_EXT_6); else - cmd->ooc_priority = - iwl_mvm_scan_priority(mvm, IWL_SCAN_PRIORITY_EXT_2); + cmd->ooc_priority = cpu_to_le32(IWL_SCAN_PRIORITY_EXT_2); } static void diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c index 58fc7b3c711c..63a051be832e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c @@ -241,11 +241,8 @@ static int iwl_mvm_get_temp_cmd(struct iwl_mvm *mvm) }; u32 cmdid; - if (fw_has_api(&mvm->fw->ucode_capa, IWL_UCODE_TLV_API_WIDE_CMD_HDR)) - cmdid = iwl_cmd_id(CMD_DTS_MEASUREMENT_TRIGGER_WIDE, - PHY_OPS_GROUP, 0); - else - cmdid = CMD_DTS_MEASUREMENT_TRIGGER; + cmdid = iwl_cmd_id(CMD_DTS_MEASUREMENT_TRIGGER_WIDE, + PHY_OPS_GROUP, 0); if (!fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_EXTENDED_DTS_MEASURE)) @@ -261,9 +258,6 @@ int iwl_mvm_get_temp(struct iwl_mvm *mvm, s32 *temp) DTS_MEASUREMENT_NOTIF_WIDE) }; int ret; - if (!fw_has_api(&mvm->fw->ucode_capa, IWL_UCODE_TLV_API_WIDE_CMD_HDR)) - temp_notif[0] = DTS_MEASUREMENT_NOTIFICATION; - lockdep_assert_held(&mvm->mutex); iwl_init_notification_wait(&mvm->notif_wait, &wait_temp_notif, From 4dc6511f86b5f3edfd289d4714488d56562f9095 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 13 Sep 2016 22:59:27 +0300 Subject: [PATCH 1566/1715] iwlwifi: mvm: fix typo in TC_CMD_SEC_KEY_FROM_TABLE This define should really be TX_CMD_SEC_KEY_FROM_TABLE Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h | 4 ++-- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h index 57d29a1b7b8d..59ca97a11b2b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h @@ -147,7 +147,7 @@ enum iwl_tx_pm_timeouts { * @TX_CMD_SEC_EXT: extended cipher algorithm. * @TX_CMD_SEC_GCMP: GCMP encryption algorithm. * @TX_CMD_SEC_KEY128: set for 104 bits WEP key. - * @TC_CMD_SEC_KEY_FROM_TABLE: for a non-WEP key, set if the key should be taken + * @TX_CMD_SEC_KEY_FROM_TABLE: for a non-WEP key, set if the key should be taken * from the table instead of from the TX command. * If the key is taken from the key table its index should be given by the * first byte of the TX command key field. @@ -159,7 +159,7 @@ enum iwl_tx_cmd_sec_ctrl { TX_CMD_SEC_EXT = 0x04, TX_CMD_SEC_GCMP = 0x05, TX_CMD_SEC_KEY128 = 0x08, - TC_CMD_SEC_KEY_FROM_TABLE = 0x08, + TX_CMD_SEC_KEY_FROM_TABLE = 0x08, }; /* TODO: how does these values are OK with only 16 bit variable??? */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index ef4bdfc9e7e2..a3fe73b963c3 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -441,7 +441,7 @@ static void iwl_mvm_set_tx_cmd_crypto(struct iwl_mvm *mvm, * one. * Need to handle this. */ - tx_cmd->sec_ctl |= TX_CMD_SEC_GCMP | TC_CMD_SEC_KEY_FROM_TABLE; + tx_cmd->sec_ctl |= TX_CMD_SEC_GCMP | TX_CMD_SEC_KEY_FROM_TABLE; tx_cmd->key[0] = keyconf->hw_key_idx; iwl_mvm_set_tx_cmd_pn(info, crypto_hdr); break; From 3ac37d0109a32f684d38c9a47c49dde8d27bf1b8 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Thu, 8 Sep 2016 10:44:38 +0300 Subject: [PATCH 1567/1715] iwlwifi: allow error table address new range The firmware has a new smart linker, and this table can now be in ICCM or in SMEM. It is not hardcoded, but depends on code size. Allow the full range. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c index 9e366e28c983..d04babd99b53 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c @@ -512,7 +512,7 @@ void iwl_mvm_dump_nic_error_log(struct iwl_mvm *mvm) base = mvm->fw->inst_errlog_ptr; } - if (base < 0x800000) { + if (base < 0x400000) { IWL_ERR(mvm, "Not valid error log pointer 0x%08X for %s uCode\n", base, From 9cd70e80f7f0df1d6d13d8aeb50a16bf40e2962c Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 20 Sep 2016 13:40:33 +0300 Subject: [PATCH 1568/1715] iwlwifi: mvm: initialise ADD_STA before sending it to the firmware When we unshare a queue, the ADD_STA was not properly initialised. Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 258a234feb71..fc771885e383 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -933,7 +933,7 @@ static void iwl_mvm_unshare_queue(struct iwl_mvm *mvm, int queue) /* If aggs should be turned back on - do it */ if (mvmsta->tid_data[tid].state == IWL_AGG_ON) { - struct iwl_mvm_add_sta_cmd cmd; + struct iwl_mvm_add_sta_cmd cmd = {0}; mvmsta->tid_disable_agg &= ~BIT(tid); From b6f5be287344433100201c4d6f3f8dcd1755a6b3 Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Sun, 25 Sep 2016 23:31:24 +0200 Subject: [PATCH 1569/1715] net: tg3: use new api ethtool_{get|set}_link_ksettings The ethtool api {get|set}_settings is deprecated. We move this driver to new api {get|set}_link_ksettings. Signed-off-by: Philippe Reynes Acked-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/tg3.c | 110 +++++++++++++++------------- 1 file changed, 61 insertions(+), 49 deletions(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index a2551bcd1027..2726f032f2d4 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -12079,95 +12079,107 @@ static int tg3_set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom, return ret; } -static int tg3_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +static int tg3_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) { struct tg3 *tp = netdev_priv(dev); + u32 supported, advertising; if (tg3_flag(tp, USE_PHYLIB)) { struct phy_device *phydev; if (!(tp->phy_flags & TG3_PHYFLG_IS_CONNECTED)) return -EAGAIN; phydev = mdiobus_get_phy(tp->mdio_bus, tp->phy_addr); - return phy_ethtool_gset(phydev, cmd); + return phy_ethtool_ksettings_get(phydev, cmd); } - cmd->supported = (SUPPORTED_Autoneg); + supported = (SUPPORTED_Autoneg); if (!(tp->phy_flags & TG3_PHYFLG_10_100_ONLY)) - cmd->supported |= (SUPPORTED_1000baseT_Half | - SUPPORTED_1000baseT_Full); + supported |= (SUPPORTED_1000baseT_Half | + SUPPORTED_1000baseT_Full); if (!(tp->phy_flags & TG3_PHYFLG_ANY_SERDES)) { - cmd->supported |= (SUPPORTED_100baseT_Half | - SUPPORTED_100baseT_Full | - SUPPORTED_10baseT_Half | - SUPPORTED_10baseT_Full | - SUPPORTED_TP); - cmd->port = PORT_TP; + supported |= (SUPPORTED_100baseT_Half | + SUPPORTED_100baseT_Full | + SUPPORTED_10baseT_Half | + SUPPORTED_10baseT_Full | + SUPPORTED_TP); + cmd->base.port = PORT_TP; } else { - cmd->supported |= SUPPORTED_FIBRE; - cmd->port = PORT_FIBRE; + supported |= SUPPORTED_FIBRE; + cmd->base.port = PORT_FIBRE; } + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + supported); - cmd->advertising = tp->link_config.advertising; + advertising = tp->link_config.advertising; if (tg3_flag(tp, PAUSE_AUTONEG)) { if (tp->link_config.flowctrl & FLOW_CTRL_RX) { if (tp->link_config.flowctrl & FLOW_CTRL_TX) { - cmd->advertising |= ADVERTISED_Pause; + advertising |= ADVERTISED_Pause; } else { - cmd->advertising |= ADVERTISED_Pause | - ADVERTISED_Asym_Pause; + advertising |= ADVERTISED_Pause | + ADVERTISED_Asym_Pause; } } else if (tp->link_config.flowctrl & FLOW_CTRL_TX) { - cmd->advertising |= ADVERTISED_Asym_Pause; + advertising |= ADVERTISED_Asym_Pause; } } + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, + advertising); + if (netif_running(dev) && tp->link_up) { - ethtool_cmd_speed_set(cmd, tp->link_config.active_speed); - cmd->duplex = tp->link_config.active_duplex; - cmd->lp_advertising = tp->link_config.rmt_adv; + cmd->base.speed = tp->link_config.active_speed; + cmd->base.duplex = tp->link_config.active_duplex; + ethtool_convert_legacy_u32_to_link_mode( + cmd->link_modes.lp_advertising, + tp->link_config.rmt_adv); + if (!(tp->phy_flags & TG3_PHYFLG_ANY_SERDES)) { if (tp->phy_flags & TG3_PHYFLG_MDIX_STATE) - cmd->eth_tp_mdix = ETH_TP_MDI_X; + cmd->base.eth_tp_mdix = ETH_TP_MDI_X; else - cmd->eth_tp_mdix = ETH_TP_MDI; + cmd->base.eth_tp_mdix = ETH_TP_MDI; } } else { - ethtool_cmd_speed_set(cmd, SPEED_UNKNOWN); - cmd->duplex = DUPLEX_UNKNOWN; - cmd->eth_tp_mdix = ETH_TP_MDI_INVALID; + cmd->base.speed = SPEED_UNKNOWN; + cmd->base.duplex = DUPLEX_UNKNOWN; + cmd->base.eth_tp_mdix = ETH_TP_MDI_INVALID; } - cmd->phy_address = tp->phy_addr; - cmd->transceiver = XCVR_INTERNAL; - cmd->autoneg = tp->link_config.autoneg; - cmd->maxtxpkt = 0; - cmd->maxrxpkt = 0; + cmd->base.phy_address = tp->phy_addr; + cmd->base.autoneg = tp->link_config.autoneg; return 0; } -static int tg3_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) +static int tg3_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { struct tg3 *tp = netdev_priv(dev); - u32 speed = ethtool_cmd_speed(cmd); + u32 speed = cmd->base.speed; + u32 advertising; if (tg3_flag(tp, USE_PHYLIB)) { struct phy_device *phydev; if (!(tp->phy_flags & TG3_PHYFLG_IS_CONNECTED)) return -EAGAIN; phydev = mdiobus_get_phy(tp->mdio_bus, tp->phy_addr); - return phy_ethtool_sset(phydev, cmd); + return phy_ethtool_ksettings_set(phydev, cmd); } - if (cmd->autoneg != AUTONEG_ENABLE && - cmd->autoneg != AUTONEG_DISABLE) + if (cmd->base.autoneg != AUTONEG_ENABLE && + cmd->base.autoneg != AUTONEG_DISABLE) return -EINVAL; - if (cmd->autoneg == AUTONEG_DISABLE && - cmd->duplex != DUPLEX_FULL && - cmd->duplex != DUPLEX_HALF) + if (cmd->base.autoneg == AUTONEG_DISABLE && + cmd->base.duplex != DUPLEX_FULL && + cmd->base.duplex != DUPLEX_HALF) return -EINVAL; - if (cmd->autoneg == AUTONEG_ENABLE) { + ethtool_convert_link_mode_to_legacy_u32(&advertising, + cmd->link_modes.advertising); + + if (cmd->base.autoneg == AUTONEG_ENABLE) { u32 mask = ADVERTISED_Autoneg | ADVERTISED_Pause | ADVERTISED_Asym_Pause; @@ -12185,7 +12197,7 @@ static int tg3_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) else mask |= ADVERTISED_FIBRE; - if (cmd->advertising & ~mask) + if (advertising & ~mask) return -EINVAL; mask &= (ADVERTISED_1000baseT_Half | @@ -12195,13 +12207,13 @@ static int tg3_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) ADVERTISED_10baseT_Half | ADVERTISED_10baseT_Full); - cmd->advertising &= mask; + advertising &= mask; } else { if (tp->phy_flags & TG3_PHYFLG_ANY_SERDES) { if (speed != SPEED_1000) return -EINVAL; - if (cmd->duplex != DUPLEX_FULL) + if (cmd->base.duplex != DUPLEX_FULL) return -EINVAL; } else { if (speed != SPEED_100 && @@ -12212,16 +12224,16 @@ static int tg3_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) tg3_full_lock(tp, 0); - tp->link_config.autoneg = cmd->autoneg; - if (cmd->autoneg == AUTONEG_ENABLE) { - tp->link_config.advertising = (cmd->advertising | + tp->link_config.autoneg = cmd->base.autoneg; + if (cmd->base.autoneg == AUTONEG_ENABLE) { + tp->link_config.advertising = (advertising | ADVERTISED_Autoneg); tp->link_config.speed = SPEED_UNKNOWN; tp->link_config.duplex = DUPLEX_UNKNOWN; } else { tp->link_config.advertising = 0; tp->link_config.speed = speed; - tp->link_config.duplex = cmd->duplex; + tp->link_config.duplex = cmd->base.duplex; } tp->phy_flags |= TG3_PHYFLG_USER_CONFIGURED; @@ -14094,8 +14106,6 @@ static int tg3_get_eee(struct net_device *dev, struct ethtool_eee *edata) } static const struct ethtool_ops tg3_ethtool_ops = { - .get_settings = tg3_get_settings, - .set_settings = tg3_set_settings, .get_drvinfo = tg3_get_drvinfo, .get_regs_len = tg3_get_regs_len, .get_regs = tg3_get_regs, @@ -14128,6 +14138,8 @@ static const struct ethtool_ops tg3_ethtool_ops = { .get_ts_info = tg3_get_ts_info, .get_eee = tg3_get_eee, .set_eee = tg3_set_eee, + .get_link_ksettings = tg3_get_link_ksettings, + .set_link_ksettings = tg3_set_link_ksettings, }; static struct rtnl_link_stats64 *tg3_get_stats64(struct net_device *dev, From 2b064fff8527a1052c7060c65c22cae80a9343b9 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Sat, 24 Sep 2016 02:10:04 +0530 Subject: [PATCH 1570/1715] bpf samples: fix compiler errors with sockex2 and sockex3 These samples fail to compile as 'struct flow_keys' conflicts with definition in net/flow_dissector.h. Fix the same by renaming the structure used in the sample. Signed-off-by: Naveen N. Rao Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- samples/bpf/sockex2_kern.c | 10 +++++----- samples/bpf/sockex3_kern.c | 8 ++++---- samples/bpf/sockex3_user.c | 4 ++-- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/samples/bpf/sockex2_kern.c b/samples/bpf/sockex2_kern.c index ba0e177ff561..44e5846c988f 100644 --- a/samples/bpf/sockex2_kern.c +++ b/samples/bpf/sockex2_kern.c @@ -14,7 +14,7 @@ struct vlan_hdr { __be16 h_vlan_encapsulated_proto; }; -struct flow_keys { +struct bpf_flow_keys { __be32 src; __be32 dst; union { @@ -59,7 +59,7 @@ static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off) } static inline __u64 parse_ip(struct __sk_buff *skb, __u64 nhoff, __u64 *ip_proto, - struct flow_keys *flow) + struct bpf_flow_keys *flow) { __u64 verlen; @@ -83,7 +83,7 @@ static inline __u64 parse_ip(struct __sk_buff *skb, __u64 nhoff, __u64 *ip_proto } static inline __u64 parse_ipv6(struct __sk_buff *skb, __u64 nhoff, __u64 *ip_proto, - struct flow_keys *flow) + struct bpf_flow_keys *flow) { *ip_proto = load_byte(skb, nhoff + offsetof(struct ipv6hdr, nexthdr)); @@ -96,7 +96,7 @@ static inline __u64 parse_ipv6(struct __sk_buff *skb, __u64 nhoff, __u64 *ip_pro return nhoff; } -static inline bool flow_dissector(struct __sk_buff *skb, struct flow_keys *flow) +static inline bool flow_dissector(struct __sk_buff *skb, struct bpf_flow_keys *flow) { __u64 nhoff = ETH_HLEN; __u64 ip_proto; @@ -198,7 +198,7 @@ struct bpf_map_def SEC("maps") hash_map = { SEC("socket2") int bpf_prog2(struct __sk_buff *skb) { - struct flow_keys flow; + struct bpf_flow_keys flow; struct pair *value; u32 key; diff --git a/samples/bpf/sockex3_kern.c b/samples/bpf/sockex3_kern.c index 41ae2fd21b13..95907f8d2b17 100644 --- a/samples/bpf/sockex3_kern.c +++ b/samples/bpf/sockex3_kern.c @@ -61,7 +61,7 @@ struct vlan_hdr { __be16 h_vlan_encapsulated_proto; }; -struct flow_keys { +struct bpf_flow_keys { __be32 src; __be32 dst; union { @@ -88,7 +88,7 @@ static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off) } struct globals { - struct flow_keys flow; + struct bpf_flow_keys flow; }; struct bpf_map_def SEC("maps") percpu_map = { @@ -114,14 +114,14 @@ struct pair { struct bpf_map_def SEC("maps") hash_map = { .type = BPF_MAP_TYPE_HASH, - .key_size = sizeof(struct flow_keys), + .key_size = sizeof(struct bpf_flow_keys), .value_size = sizeof(struct pair), .max_entries = 1024, }; static void update_stats(struct __sk_buff *skb, struct globals *g) { - struct flow_keys key = g->flow; + struct bpf_flow_keys key = g->flow; struct pair *value; value = bpf_map_lookup_elem(&hash_map, &key); diff --git a/samples/bpf/sockex3_user.c b/samples/bpf/sockex3_user.c index d4184ab5f3ac..3fcfd8c4b2a3 100644 --- a/samples/bpf/sockex3_user.c +++ b/samples/bpf/sockex3_user.c @@ -7,7 +7,7 @@ #include #include -struct flow_keys { +struct bpf_flow_keys { __be32 src; __be32 dst; union { @@ -49,7 +49,7 @@ int main(int argc, char **argv) (void) f; for (i = 0; i < 5; i++) { - struct flow_keys key = {}, next_key; + struct bpf_flow_keys key = {}, next_key; struct pair value; sleep(1); From 973d94d8a87c32661f1308a118074972ac5d483a Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Sat, 24 Sep 2016 02:10:05 +0530 Subject: [PATCH 1571/1715] bpf samples: update tracex5 sample to use __seccomp_filter seccomp_phase1() does not exist anymore. Instead, update sample to use __seccomp_filter(). While at it, set max locked memory to unlimited. Signed-off-by: Naveen N. Rao Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- samples/bpf/tracex5_kern.c | 16 +++++++--------- samples/bpf/tracex5_user.c | 3 +++ 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/samples/bpf/tracex5_kern.c b/samples/bpf/tracex5_kern.c index f95f232cbab9..fd12d7154d42 100644 --- a/samples/bpf/tracex5_kern.c +++ b/samples/bpf/tracex5_kern.c @@ -19,20 +19,18 @@ struct bpf_map_def SEC("maps") progs = { .max_entries = 1024, }; -SEC("kprobe/seccomp_phase1") +SEC("kprobe/__seccomp_filter") int bpf_prog1(struct pt_regs *ctx) { - struct seccomp_data sd; - - bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM1(ctx)); + int sc_nr = (int)PT_REGS_PARM1(ctx); /* dispatch into next BPF program depending on syscall number */ - bpf_tail_call(ctx, &progs, sd.nr); + bpf_tail_call(ctx, &progs, sc_nr); /* fall through -> unknown syscall */ - if (sd.nr >= __NR_getuid && sd.nr <= __NR_getsid) { + if (sc_nr >= __NR_getuid && sc_nr <= __NR_getsid) { char fmt[] = "syscall=%d (one of get/set uid/pid/gid)\n"; - bpf_trace_printk(fmt, sizeof(fmt), sd.nr); + bpf_trace_printk(fmt, sizeof(fmt), sc_nr); } return 0; } @@ -42,7 +40,7 @@ PROG(__NR_write)(struct pt_regs *ctx) { struct seccomp_data sd; - bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM1(ctx)); + bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx)); if (sd.args[2] == 512) { char fmt[] = "write(fd=%d, buf=%p, size=%d)\n"; bpf_trace_printk(fmt, sizeof(fmt), @@ -55,7 +53,7 @@ PROG(__NR_read)(struct pt_regs *ctx) { struct seccomp_data sd; - bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM1(ctx)); + bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx)); if (sd.args[2] > 128 && sd.args[2] <= 1024) { char fmt[] = "read(fd=%d, buf=%p, size=%d)\n"; bpf_trace_printk(fmt, sizeof(fmt), diff --git a/samples/bpf/tracex5_user.c b/samples/bpf/tracex5_user.c index a04dd3cd4358..36b5925bb137 100644 --- a/samples/bpf/tracex5_user.c +++ b/samples/bpf/tracex5_user.c @@ -6,6 +6,7 @@ #include #include "libbpf.h" #include "bpf_load.h" +#include /* install fake seccomp program to enable seccomp code path inside the kernel, * so that our kprobe attached to seccomp_phase1() can be triggered @@ -27,8 +28,10 @@ int main(int ac, char **argv) { FILE *f; char filename[256]; + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + setrlimit(RLIMIT_MEMLOCK, &r); if (load_bpf_file(filename)) { printf("%s", bpf_log_buf); From 1955351da41caa1dbf4139191358fed84909d64b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Sat, 24 Sep 2016 20:01:50 +0200 Subject: [PATCH 1572/1715] bpf: Set register type according to is_valid_access() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This prevent future potential pointer leaks when an unprivileged eBPF program will read a pointer value from its context. Even if is_valid_access() returns a pointer type, the eBPF verifier replace it with UNKNOWN_VALUE. The register value that contains a kernel address is then allowed to leak. Moreover, this fix allows unprivileged eBPF programs to use functions with (legitimate) pointer arguments. Not an issue currently since reg_type is only set for PTR_TO_PACKET or PTR_TO_PACKET_END in XDP and TC programs that can only be loaded as privileged. For now, the only unprivileged eBPF program allowed is for socket filtering and all the types from its context are UNKNOWN_VALUE. However, this fix is important for future unprivileged eBPF programs which could use pointers in their context. Signed-off-by: Mickaël Salaün Cc: Alexei Starovoitov Cc: Daniel Borkmann Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 8c3f794c7028..7ada3152a556 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -749,9 +749,8 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off, err = check_ctx_access(env, off, size, t, ®_type); if (!err && t == BPF_READ && value_regno >= 0) { mark_reg_unknown_value(state->regs, value_regno); - if (env->allow_ptr_leaks) - /* note that reg.[id|off|range] == 0 */ - state->regs[value_regno].type = reg_type; + /* note that reg.[id|off|range] == 0 */ + state->regs[value_regno].type = reg_type; } } else if (reg->type == FRAME_PTR || reg->type == PTR_TO_STACK) { From 62c8d3daada97dab481a8235f39f2283daf5c8ba Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Mon, 26 Sep 2016 22:31:55 +0200 Subject: [PATCH 1573/1715] Revert "net: ethernet: bcmgenet: use new api ethtool_{get|set}_link_ksettings" This reverts commit 6b352ebccbcf ("net: ethernet: broadcom: bcmgenet: use new api ethtool_{get|set}_link_ksettings"). We needs to revert the commit 62469c76007e ("net: ethernet: bcmgenet: use phydev from struct net_device"), because this commit add a regression. As the commit 6b352ebccbcf ("net: ethernet: broadcom: bcmgenet: use new api ethtool_{get|set}_link_ksettings") depend on the first one, we also need to revert it first. Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 7f478499b649..6d6f83b20654 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -450,8 +450,8 @@ static inline void bcmgenet_rdma_ring_writel(struct bcmgenet_priv *priv, genet_dma_ring_regs[r]); } -static int bcmgenet_get_link_ksettings(struct net_device *dev, - struct ethtool_link_ksettings *cmd) +static int bcmgenet_get_settings(struct net_device *dev, + struct ethtool_cmd *cmd) { if (!netif_running(dev)) return -EINVAL; @@ -459,11 +459,11 @@ static int bcmgenet_get_link_ksettings(struct net_device *dev, if (!dev->phydev) return -ENODEV; - return phy_ethtool_ksettings_get(dev->phydev, cmd); + return phy_ethtool_gset(dev->phydev, cmd); } -static int bcmgenet_set_link_ksettings(struct net_device *dev, - const struct ethtool_link_ksettings *cmd) +static int bcmgenet_set_settings(struct net_device *dev, + struct ethtool_cmd *cmd) { if (!netif_running(dev)) return -EINVAL; @@ -471,7 +471,7 @@ static int bcmgenet_set_link_ksettings(struct net_device *dev, if (!dev->phydev) return -ENODEV; - return phy_ethtool_ksettings_set(dev->phydev, cmd); + return phy_ethtool_sset(dev->phydev, cmd); } static int bcmgenet_set_rx_csum(struct net_device *dev, @@ -977,6 +977,8 @@ static const struct ethtool_ops bcmgenet_ethtool_ops = { .get_strings = bcmgenet_get_strings, .get_sset_count = bcmgenet_get_sset_count, .get_ethtool_stats = bcmgenet_get_ethtool_stats, + .get_settings = bcmgenet_get_settings, + .set_settings = bcmgenet_set_settings, .get_drvinfo = bcmgenet_get_drvinfo, .get_link = ethtool_op_get_link, .get_msglevel = bcmgenet_get_msglevel, @@ -988,8 +990,6 @@ static const struct ethtool_ops bcmgenet_ethtool_ops = { .nway_reset = bcmgenet_nway_reset, .get_coalesce = bcmgenet_get_coalesce, .set_coalesce = bcmgenet_set_coalesce, - .get_link_ksettings = bcmgenet_get_link_ksettings, - .set_link_ksettings = bcmgenet_set_link_ksettings, }; /* Power down the unimac, based on mode. */ From 0299b6acf977f2c5ce9feea8faed0e264d3f01d3 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 26 Sep 2016 22:31:56 +0200 Subject: [PATCH 1574/1715] Revert "net: ethernet: bcmgenet: use phydev from struct net_device" This reverts commit 62469c76007e ("net: ethernet: bcmgenet: use phydev from struct net_device") because it causes GENETv1/2/3 adapters to expose the following behavior after an ifconfig down/up sequence: PING fainelli-linux (10.112.156.244): 56 data bytes 64 bytes from 10.112.156.244: seq=1 ttl=61 time=1.352 ms 64 bytes from 10.112.156.244: seq=1 ttl=61 time=1.472 ms (DUP!) 64 bytes from 10.112.156.244: seq=1 ttl=61 time=1.496 ms (DUP!) 64 bytes from 10.112.156.244: seq=1 ttl=61 time=1.517 ms (DUP!) 64 bytes from 10.112.156.244: seq=1 ttl=61 time=1.536 ms (DUP!) 64 bytes from 10.112.156.244: seq=1 ttl=61 time=1.557 ms (DUP!) 64 bytes from 10.112.156.244: seq=1 ttl=61 time=752.448 ms (DUP!) This was previously fixed by commit 5dbebbb44a6a ("net: bcmgenet: Software reset EPHY after power on") but the commit we are reverting was essentially making this previous commit void, here is why. Without commit 62469c76007e we would have the following scenario after an ifconfig down then up sequence: - bcmgenet_open() calls bcmgenet_power_up() to make sure the PHY is initialized *before* we get to initialize the UniMAC, this is critical to ensure the PHY is in a correct state, priv->phydev is valid, this code executes fine - second time from bcmgenet_mii_probe(), through the normal phy_init_hw() call (which arguably could be optimized out) Everything is fine in that case. With commit 62469c76007e, we would have the following scenario to happen after an ifconfig down then up sequence: - bcmgenet_close() calls phy_disonnect() which makes dev->phydev become NULL - when bcmgenet_open() executes again and calls bcmgenet_mii_reset() from bcmgenet_power_up() to initialize the internal PHY, the NULL check becomes true, so we do not reset the PHY, yet we keep going on and initialize the UniMAC, causing MAC activity to occur - we call bcmgenet_mii_reset() from bcmgenet_mii_probe(), but this is too late, the PHY is botched, and causes the above bogus pings/packets transmission/reception to occur Reported-by: Jaedon Shin Signed-off-by: Florian Fainelli Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/genet/bcmgenet.c | 45 ++++++++++--------- .../net/ethernet/broadcom/genet/bcmgenet.h | 1 + drivers/net/ethernet/broadcom/genet/bcmmii.c | 24 +++++----- 3 files changed, 39 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 6d6f83b20654..55cbe2ed2929 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -453,25 +453,29 @@ static inline void bcmgenet_rdma_ring_writel(struct bcmgenet_priv *priv, static int bcmgenet_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) { + struct bcmgenet_priv *priv = netdev_priv(dev); + if (!netif_running(dev)) return -EINVAL; - if (!dev->phydev) + if (!priv->phydev) return -ENODEV; - return phy_ethtool_gset(dev->phydev, cmd); + return phy_ethtool_gset(priv->phydev, cmd); } static int bcmgenet_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) { + struct bcmgenet_priv *priv = netdev_priv(dev); + if (!netif_running(dev)) return -EINVAL; - if (!dev->phydev) + if (!priv->phydev) return -ENODEV; - return phy_ethtool_sset(dev->phydev, cmd); + return phy_ethtool_sset(priv->phydev, cmd); } static int bcmgenet_set_rx_csum(struct net_device *dev, @@ -937,7 +941,7 @@ static int bcmgenet_get_eee(struct net_device *dev, struct ethtool_eee *e) e->eee_active = p->eee_active; e->tx_lpi_timer = bcmgenet_umac_readl(priv, UMAC_EEE_LPI_TIMER); - return phy_ethtool_get_eee(dev->phydev, e); + return phy_ethtool_get_eee(priv->phydev, e); } static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e) @@ -954,7 +958,7 @@ static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e) if (!p->eee_enabled) { bcmgenet_eee_enable_set(dev, false); } else { - ret = phy_init_eee(dev->phydev, 0); + ret = phy_init_eee(priv->phydev, 0); if (ret) { netif_err(priv, hw, dev, "EEE initialization failed\n"); return ret; @@ -964,12 +968,14 @@ static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e) bcmgenet_eee_enable_set(dev, true); } - return phy_ethtool_set_eee(dev->phydev, e); + return phy_ethtool_set_eee(priv->phydev, e); } static int bcmgenet_nway_reset(struct net_device *dev) { - return genphy_restart_aneg(dev->phydev); + struct bcmgenet_priv *priv = netdev_priv(dev); + + return genphy_restart_aneg(priv->phydev); } /* standard ethtool support functions. */ @@ -996,13 +1002,12 @@ static const struct ethtool_ops bcmgenet_ethtool_ops = { static int bcmgenet_power_down(struct bcmgenet_priv *priv, enum bcmgenet_power_mode mode) { - struct net_device *ndev = priv->dev; int ret = 0; u32 reg; switch (mode) { case GENET_POWER_CABLE_SENSE: - phy_detach(ndev->phydev); + phy_detach(priv->phydev); break; case GENET_POWER_WOL_MAGIC: @@ -1063,6 +1068,7 @@ static void bcmgenet_power_up(struct bcmgenet_priv *priv, /* ioctl handle special commands that are not present in ethtool. */ static int bcmgenet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { + struct bcmgenet_priv *priv = netdev_priv(dev); int val = 0; if (!netif_running(dev)) @@ -1072,10 +1078,10 @@ static int bcmgenet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSMIIREG: - if (!dev->phydev) + if (!priv->phydev) val = -ENODEV; else - val = phy_mii_ioctl(dev->phydev, rq, cmd); + val = phy_mii_ioctl(priv->phydev, rq, cmd); break; default: @@ -2458,7 +2464,6 @@ static void bcmgenet_irq_task(struct work_struct *work) { struct bcmgenet_priv *priv = container_of( work, struct bcmgenet_priv, bcmgenet_irq_work); - struct net_device *ndev = priv->dev; netif_dbg(priv, intr, priv->dev, "%s\n", __func__); @@ -2471,7 +2476,7 @@ static void bcmgenet_irq_task(struct work_struct *work) /* Link UP/DOWN event */ if (priv->irq0_stat & UMAC_IRQ_LINK_EVENT) { - phy_mac_interrupt(ndev->phydev, + phy_mac_interrupt(priv->phydev, !!(priv->irq0_stat & UMAC_IRQ_LINK_UP)); priv->irq0_stat &= ~UMAC_IRQ_LINK_EVENT; } @@ -2711,7 +2716,7 @@ static void bcmgenet_netif_start(struct net_device *dev) /* Monitor link interrupts now */ bcmgenet_link_intr_enable(priv); - phy_start(dev->phydev); + phy_start(priv->phydev); } static int bcmgenet_open(struct net_device *dev) @@ -2810,7 +2815,7 @@ static void bcmgenet_netif_stop(struct net_device *dev) struct bcmgenet_priv *priv = netdev_priv(dev); netif_tx_stop_all_queues(dev); - phy_stop(dev->phydev); + phy_stop(priv->phydev); bcmgenet_intr_disable(priv); bcmgenet_disable_rx_napi(priv); bcmgenet_disable_tx_napi(priv); @@ -2836,7 +2841,7 @@ static int bcmgenet_close(struct net_device *dev) bcmgenet_netif_stop(dev); /* Really kill the PHY state machine and disconnect from it */ - phy_disconnect(dev->phydev); + phy_disconnect(priv->phydev); /* Disable MAC receive */ umac_enable_set(priv, CMD_RX_EN, false); @@ -3395,7 +3400,7 @@ static int bcmgenet_suspend(struct device *d) bcmgenet_netif_stop(dev); - phy_suspend(dev->phydev); + phy_suspend(priv->phydev); netif_device_detach(dev); @@ -3459,7 +3464,7 @@ static int bcmgenet_resume(struct device *d) if (priv->wolopts) clk_disable_unprepare(priv->clk_wol); - phy_init_hw(dev->phydev); + phy_init_hw(priv->phydev); /* Speed settings must be restored */ bcmgenet_mii_config(priv->dev); @@ -3492,7 +3497,7 @@ static int bcmgenet_resume(struct device *d) netif_device_attach(dev); - phy_resume(dev->phydev); + phy_resume(priv->phydev); if (priv->eee.eee_enabled) bcmgenet_eee_enable_set(dev, true); diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index 0f0868c56f05..1e2dc34d331a 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -597,6 +597,7 @@ struct bcmgenet_priv { /* MDIO bus variables */ wait_queue_head_t wq; + struct phy_device *phydev; bool internal_phy; struct device_node *phy_dn; struct device_node *mdio_dn; diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index e907acd81da9..457c3bc8cfff 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -86,7 +86,7 @@ static int bcmgenet_mii_write(struct mii_bus *bus, int phy_id, void bcmgenet_mii_setup(struct net_device *dev) { struct bcmgenet_priv *priv = netdev_priv(dev); - struct phy_device *phydev = dev->phydev; + struct phy_device *phydev = priv->phydev; u32 reg, cmd_bits = 0; bool status_changed = false; @@ -183,9 +183,9 @@ void bcmgenet_mii_reset(struct net_device *dev) if (GENET_IS_V4(priv)) return; - if (dev->phydev) { - phy_init_hw(dev->phydev); - phy_start_aneg(dev->phydev); + if (priv->phydev) { + phy_init_hw(priv->phydev); + phy_start_aneg(priv->phydev); } } @@ -236,7 +236,6 @@ static void bcmgenet_internal_phy_setup(struct net_device *dev) static void bcmgenet_moca_phy_setup(struct bcmgenet_priv *priv) { - struct net_device *ndev = priv->dev; u32 reg; /* Speed settings are set in bcmgenet_mii_setup() */ @@ -245,14 +244,14 @@ static void bcmgenet_moca_phy_setup(struct bcmgenet_priv *priv) bcmgenet_sys_writel(priv, reg, SYS_PORT_CTRL); if (priv->hw_params->flags & GENET_HAS_MOCA_LINK_DET) - fixed_phy_set_link_update(ndev->phydev, + fixed_phy_set_link_update(priv->phydev, bcmgenet_fixed_phy_link_update); } int bcmgenet_mii_config(struct net_device *dev) { struct bcmgenet_priv *priv = netdev_priv(dev); - struct phy_device *phydev = dev->phydev; + struct phy_device *phydev = priv->phydev; struct device *kdev = &priv->pdev->dev; const char *phy_name = NULL; u32 id_mode_dis = 0; @@ -303,7 +302,7 @@ int bcmgenet_mii_config(struct net_device *dev) * capabilities, use that knowledge to also configure the * Reverse MII interface correctly. */ - if ((phydev->supported & PHY_BASIC_FEATURES) == + if ((priv->phydev->supported & PHY_BASIC_FEATURES) == PHY_BASIC_FEATURES) port_ctrl = PORT_MODE_EXT_RVMII_25; else @@ -372,7 +371,7 @@ int bcmgenet_mii_probe(struct net_device *dev) return -ENODEV; } } else { - phydev = dev->phydev; + phydev = priv->phydev; phydev->dev_flags = phy_flags; ret = phy_connect_direct(dev, phydev, bcmgenet_mii_setup, @@ -383,6 +382,8 @@ int bcmgenet_mii_probe(struct net_device *dev) } } + priv->phydev = phydev; + /* Configure port multiplexer based on what the probed PHY device since * reading the 'max-speed' property determines the maximum supported * PHY speed which is needed for bcmgenet_mii_config() to configure @@ -390,7 +391,7 @@ int bcmgenet_mii_probe(struct net_device *dev) */ ret = bcmgenet_mii_config(dev); if (ret) { - phy_disconnect(phydev); + phy_disconnect(priv->phydev); return ret; } @@ -400,7 +401,7 @@ int bcmgenet_mii_probe(struct net_device *dev) * Ethernet MAC ISRs */ if (priv->internal_phy) - phydev->irq = PHY_IGNORE_INTERRUPT; + priv->phydev->irq = PHY_IGNORE_INTERRUPT; return 0; } @@ -605,6 +606,7 @@ static int bcmgenet_mii_pd_init(struct bcmgenet_priv *priv) } + priv->phydev = phydev; priv->phy_interface = pd->phy_interface; return 0; From fa92bf04f583dddf275845838e686fd052f5b49a Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Mon, 26 Sep 2016 22:31:57 +0200 Subject: [PATCH 1575/1715] net: bcmgenet: use new api ethtool_{get|set}_link_ksettings The ethtool api {get|set}_settings is deprecated. We move this driver to new api {get|set}_link_ksettings. Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 55cbe2ed2929..4464bc5db934 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -450,8 +450,8 @@ static inline void bcmgenet_rdma_ring_writel(struct bcmgenet_priv *priv, genet_dma_ring_regs[r]); } -static int bcmgenet_get_settings(struct net_device *dev, - struct ethtool_cmd *cmd) +static int bcmgenet_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) { struct bcmgenet_priv *priv = netdev_priv(dev); @@ -461,11 +461,11 @@ static int bcmgenet_get_settings(struct net_device *dev, if (!priv->phydev) return -ENODEV; - return phy_ethtool_gset(priv->phydev, cmd); + return phy_ethtool_ksettings_get(priv->phydev, cmd); } -static int bcmgenet_set_settings(struct net_device *dev, - struct ethtool_cmd *cmd) +static int bcmgenet_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { struct bcmgenet_priv *priv = netdev_priv(dev); @@ -475,7 +475,7 @@ static int bcmgenet_set_settings(struct net_device *dev, if (!priv->phydev) return -ENODEV; - return phy_ethtool_sset(priv->phydev, cmd); + return phy_ethtool_ksettings_set(priv->phydev, cmd); } static int bcmgenet_set_rx_csum(struct net_device *dev, @@ -983,8 +983,6 @@ static const struct ethtool_ops bcmgenet_ethtool_ops = { .get_strings = bcmgenet_get_strings, .get_sset_count = bcmgenet_get_sset_count, .get_ethtool_stats = bcmgenet_get_ethtool_stats, - .get_settings = bcmgenet_get_settings, - .set_settings = bcmgenet_set_settings, .get_drvinfo = bcmgenet_get_drvinfo, .get_link = ethtool_op_get_link, .get_msglevel = bcmgenet_get_msglevel, @@ -996,6 +994,8 @@ static const struct ethtool_ops bcmgenet_ethtool_ops = { .nway_reset = bcmgenet_nway_reset, .get_coalesce = bcmgenet_get_coalesce, .set_coalesce = bcmgenet_set_coalesce, + .get_link_ksettings = bcmgenet_get_link_ksettings, + .set_link_ksettings = bcmgenet_set_link_ksettings, }; /* Power down the unimac, based on mode. */ From e13dbead976d79968bd616b924f300cdaf15f852 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 26 Sep 2016 21:56:21 +0300 Subject: [PATCH 1576/1715] ath10k: spelling and miscellaneous neatening Correct some trivial comment typos. Remove unnecessary parentheses in a long line. Signed-off-by: Joe Perches [kvalo@qca.qualcomm.com: drop the change for return] Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/ce.c | 2 +- drivers/net/wireless/ath/ath10k/htt.h | 8 +++---- drivers/net/wireless/ath/ath10k/hw.c | 2 +- drivers/net/wireless/ath/ath10k/hw.h | 2 +- drivers/net/wireless/ath/ath10k/targaddrs.h | 2 +- drivers/net/wireless/ath/ath10k/wmi.h | 24 ++++++++++----------- 6 files changed, 20 insertions(+), 20 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/ce.c b/drivers/net/wireless/ath/ath10k/ce.c index 65d8d714e917..e7205546fa6b 100644 --- a/drivers/net/wireless/ath/ath10k/ce.c +++ b/drivers/net/wireless/ath/ath10k/ce.c @@ -39,7 +39,7 @@ * chooses what to send (buffer address, length). The destination * side keeps a supply of "anonymous receive buffers" available and * it handles incoming data as it arrives (when the destination - * recieves an interrupt). + * receives an interrupt). * * The sender may send a simple buffer (address/length) or it may * send a small list of buffers. When a small list is sent, hardware diff --git a/drivers/net/wireless/ath/ath10k/htt.h b/drivers/net/wireless/ath/ath10k/htt.h index 98c14247021b..0d2ed09f202b 100644 --- a/drivers/net/wireless/ath/ath10k/htt.h +++ b/drivers/net/wireless/ath/ath10k/htt.h @@ -595,7 +595,7 @@ enum htt_rx_mpdu_status { /* only accept EAPOL frames */ HTT_RX_IND_MPDU_STATUS_UNAUTH_PEER, HTT_RX_IND_MPDU_STATUS_OUT_OF_SYNC, - /* Non-data in promiscous mode */ + /* Non-data in promiscuous mode */ HTT_RX_IND_MPDU_STATUS_MGMT_CTRL, HTT_RX_IND_MPDU_STATUS_TKIP_MIC_ERR, HTT_RX_IND_MPDU_STATUS_DECRYPT_ERR, @@ -900,7 +900,7 @@ struct htt_rx_in_ord_ind { * Purpose: indicate how many 32-bit integers follow the message header * - NUM_CHARS * Bits 31:16 - * Purpose: indicate how many 8-bit charaters follow the series of integers + * Purpose: indicate how many 8-bit characters follow the series of integers */ struct htt_rx_test { u8 num_ints; @@ -1042,10 +1042,10 @@ struct htt_dbg_stats_wal_tx_stats { /* illegal rate phy errors */ __le32 illgl_rate_phy_err; - /* wal pdev continous xretry */ + /* wal pdev continuous xretry */ __le32 pdev_cont_xretry; - /* wal pdev continous xretry */ + /* wal pdev continuous xretry */ __le32 pdev_tx_timeout; /* wal pdev resets */ diff --git a/drivers/net/wireless/ath/ath10k/hw.c b/drivers/net/wireless/ath/ath10k/hw.c index c2ecb9bd824a..675e75d66db2 100644 --- a/drivers/net/wireless/ath/ath10k/hw.c +++ b/drivers/net/wireless/ath/ath10k/hw.c @@ -85,7 +85,7 @@ const struct ath10k_hw_regs qca99x0_regs = { .ce7_base_address = 0x0004bc00, /* Note: qca99x0 supports upto 12 Copy Engines. Other than address of * CE0 and CE1 no other copy engine is directly referred in the code. - * It is not really neccessary to assign address for newly supported + * It is not really necessary to assign address for newly supported * CEs in this address table. * Copy Engine Address * CE8 0x0004c000 diff --git a/drivers/net/wireless/ath/ath10k/hw.h b/drivers/net/wireless/ath/ath10k/hw.h index 308e423d4b99..4a01bcff49a7 100644 --- a/drivers/net/wireless/ath/ath10k/hw.h +++ b/drivers/net/wireless/ath/ath10k/hw.h @@ -284,7 +284,7 @@ void ath10k_hw_fill_survey_time(struct ath10k *ar, struct survey_info *survey, #define QCA_REV_9377(ar) ((ar)->hw_rev == ATH10K_HW_QCA9377) #define QCA_REV_40XX(ar) ((ar)->hw_rev == ATH10K_HW_QCA4019) -/* Known pecularities: +/* Known peculiarities: * - raw appears in nwifi decap, raw and nwifi appear in ethernet decap * - raw have FCS, nwifi doesn't * - ethernet frames have 802.11 header decapped and parts (base hdr, cipher diff --git a/drivers/net/wireless/ath/ath10k/targaddrs.h b/drivers/net/wireless/ath/ath10k/targaddrs.h index aaf53a81e78b..a47cab44d9c8 100644 --- a/drivers/net/wireless/ath/ath10k/targaddrs.h +++ b/drivers/net/wireless/ath/ath10k/targaddrs.h @@ -405,7 +405,7 @@ Fw Mode/SubMode Mask * 1. target firmware would check magic number and if it's a match, firmware * would consider the bits[0:15] are valid and base on that to calculate * the end of DRAM. Early allocation would be located at that area and - * may be reclaimed when necesary + * may be reclaimed when necessary * 2. if no magic number is found, early allocation would happen at "_end" * symbol of ROM which is located before the app-data and might NOT be * re-claimable. If this is adopted, link script should keep this in diff --git a/drivers/net/wireless/ath/ath10k/wmi.h b/drivers/net/wireless/ath/ath10k/wmi.h index 48e04b92e231..6a68817b90b8 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.h +++ b/drivers/net/wireless/ath/ath10k/wmi.h @@ -55,7 +55,7 @@ * type. * * 6. Comment each parameter part of the WMI command/event structure by - * using the 2 stars at the begining of C comment instead of one star to + * using the 2 stars at the beginning of C comment instead of one star to * enable HTML document generation using Doxygen. * */ @@ -2087,7 +2087,7 @@ struct wmi_resource_config { * In offload mode target supports features like WOW, chatter and * other protocol offloads. In order to support them some * functionalities like reorder buffering, PN checking need to be - * done in target. This determines maximum number of peers suported + * done in target. This determines maximum number of peers supported * by target in offload mode */ __le32 num_offload_peers; @@ -2268,7 +2268,7 @@ struct wmi_resource_config { * Max. number of Tx fragments per MSDU * This parameter controls the max number of Tx fragments per MSDU. * This is sent by the target as part of the WMI_SERVICE_READY event - * and is overriden by the OS shim as required. + * and is overridden by the OS shim as required. */ __le32 max_frag_entries; } __packed; @@ -2450,7 +2450,7 @@ struct wmi_resource_config_10x { * Max. number of Tx fragments per MSDU * This parameter controls the max number of Tx fragments per MSDU. * This is sent by the target as part of the WMI_SERVICE_READY event - * and is overriden by the OS shim as required. + * and is overridden by the OS shim as required. */ __le32 max_frag_entries; } __packed; @@ -2744,7 +2744,7 @@ struct wmi_init_cmd { struct wmi_host_mem_chunks mem_chunks; } __packed; -/* _10x stucture is from 10.X FW API */ +/* _10x structure is from 10.X FW API */ struct wmi_init_cmd_10x { struct wmi_resource_config_10x resource_config; struct wmi_host_mem_chunks mem_chunks; @@ -3967,7 +3967,7 @@ struct wmi_pdev_stats_tx { /* illegal rate phy errors */ __le32 illgl_rate_phy_err; - /* wal pdev continous xretry */ + /* wal pdev continuous xretry */ __le32 pdev_cont_xretry; /* wal pdev continous xretry */ @@ -4461,9 +4461,9 @@ struct wmi_vdev_start_request_cmd { __le32 flags; /* ssid field. Only valid for AP/GO/IBSS/BTAmp VDEV type. */ struct wmi_ssid ssid; - /* beacon/probe reponse xmit rate. Applicable for SoftAP. */ + /* beacon/probe response xmit rate. Applicable for SoftAP. */ __le32 bcn_tx_rate; - /* beacon/probe reponse xmit power. Applicable for SoftAP. */ + /* beacon/probe response xmit power. Applicable for SoftAP. */ __le32 bcn_tx_power; /* number of p2p NOA descriptor(s) from scan entry */ __le32 num_noa_descriptors; @@ -4691,7 +4691,7 @@ enum wmi_vdev_param { WMI_VDEV_PARAM_BEACON_INTERVAL, /* Listen interval in TUs */ WMI_VDEV_PARAM_LISTEN_INTERVAL, - /* muticast rate in Mbps */ + /* multicast rate in Mbps */ WMI_VDEV_PARAM_MULTICAST_RATE, /* management frame rate in Mbps */ WMI_VDEV_PARAM_MGMT_TX_RATE, @@ -4822,7 +4822,7 @@ enum wmi_10x_vdev_param { WMI_10X_VDEV_PARAM_BEACON_INTERVAL, /* Listen interval in TUs */ WMI_10X_VDEV_PARAM_LISTEN_INTERVAL, - /* muticast rate in Mbps */ + /* multicast rate in Mbps */ WMI_10X_VDEV_PARAM_MULTICAST_RATE, /* management frame rate in Mbps */ WMI_10X_VDEV_PARAM_MGMT_TX_RATE, @@ -5067,7 +5067,7 @@ struct wmi_vdev_simple_event { } __packed; /* VDEV start response status codes */ -/* VDEV succesfully started */ +/* VDEV successfully started */ #define WMI_INIFIED_VDEV_START_RESPONSE_STATUS_SUCCESS 0x0 /* requested VDEV not found */ @@ -5383,7 +5383,7 @@ enum wmi_sta_ps_param_pspoll_count { #define WMI_UAPSD_AC_TYPE_TRIG 1 #define WMI_UAPSD_AC_BIT_MASK(ac, type) \ - ((type == WMI_UAPSD_AC_TYPE_DELI) ? (1 << (ac << 1)) : (1 << ((ac << 1) + 1))) + (type == WMI_UAPSD_AC_TYPE_DELI ? 1 << (ac << 1) : 1 << ((ac << 1) + 1)) enum wmi_sta_ps_param_uapsd { WMI_STA_PS_UAPSD_AC0_DELIVERY_EN = (1 << 0), From 828662753d60e5f95d082dd50dfc6ce1abe82095 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 26 Sep 2016 21:56:22 +0300 Subject: [PATCH 1577/1715] ath10k: use devm_clk_get() instead of clk_get() Use the managed variant of clk_get() to simplify the failure path and the .remove callback. Signed-off-by: Masahiro Yamada Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/ahb.c | 34 +++++---------------------- 1 file changed, 6 insertions(+), 28 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/ahb.c b/drivers/net/wireless/ath/ath10k/ahb.c index b99ad5df383d..2de481b48e5d 100644 --- a/drivers/net/wireless/ath/ath10k/ahb.c +++ b/drivers/net/wireless/ath/ath10k/ahb.c @@ -91,59 +91,37 @@ static int ath10k_ahb_clock_init(struct ath10k *ar) { struct ath10k_ahb *ar_ahb = ath10k_ahb_priv(ar); struct device *dev; - int ret; dev = &ar_ahb->pdev->dev; - ar_ahb->cmd_clk = clk_get(dev, "wifi_wcss_cmd"); + ar_ahb->cmd_clk = devm_clk_get(dev, "wifi_wcss_cmd"); if (IS_ERR_OR_NULL(ar_ahb->cmd_clk)) { ath10k_err(ar, "failed to get cmd clk: %ld\n", PTR_ERR(ar_ahb->cmd_clk)); - ret = ar_ahb->cmd_clk ? PTR_ERR(ar_ahb->cmd_clk) : -ENODEV; - goto out; + return ar_ahb->cmd_clk ? PTR_ERR(ar_ahb->cmd_clk) : -ENODEV; } - ar_ahb->ref_clk = clk_get(dev, "wifi_wcss_ref"); + ar_ahb->ref_clk = devm_clk_get(dev, "wifi_wcss_ref"); if (IS_ERR_OR_NULL(ar_ahb->ref_clk)) { ath10k_err(ar, "failed to get ref clk: %ld\n", PTR_ERR(ar_ahb->ref_clk)); - ret = ar_ahb->ref_clk ? PTR_ERR(ar_ahb->ref_clk) : -ENODEV; - goto err_cmd_clk_put; + return ar_ahb->ref_clk ? PTR_ERR(ar_ahb->ref_clk) : -ENODEV; } - ar_ahb->rtc_clk = clk_get(dev, "wifi_wcss_rtc"); + ar_ahb->rtc_clk = devm_clk_get(dev, "wifi_wcss_rtc"); if (IS_ERR_OR_NULL(ar_ahb->rtc_clk)) { ath10k_err(ar, "failed to get rtc clk: %ld\n", PTR_ERR(ar_ahb->rtc_clk)); - ret = ar_ahb->rtc_clk ? PTR_ERR(ar_ahb->rtc_clk) : -ENODEV; - goto err_ref_clk_put; + return ar_ahb->rtc_clk ? PTR_ERR(ar_ahb->rtc_clk) : -ENODEV; } return 0; - -err_ref_clk_put: - clk_put(ar_ahb->ref_clk); - -err_cmd_clk_put: - clk_put(ar_ahb->cmd_clk); - -out: - return ret; } static void ath10k_ahb_clock_deinit(struct ath10k *ar) { struct ath10k_ahb *ar_ahb = ath10k_ahb_priv(ar); - if (!IS_ERR_OR_NULL(ar_ahb->cmd_clk)) - clk_put(ar_ahb->cmd_clk); - - if (!IS_ERR_OR_NULL(ar_ahb->ref_clk)) - clk_put(ar_ahb->ref_clk); - - if (!IS_ERR_OR_NULL(ar_ahb->rtc_clk)) - clk_put(ar_ahb->rtc_clk); - ar_ahb->cmd_clk = NULL; ar_ahb->ref_clk = NULL; ar_ahb->rtc_clk = NULL; From c5d8a34675d9160493a990007ae85ced02241d29 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 26 Sep 2016 21:56:23 +0300 Subject: [PATCH 1578/1715] ath10k: use devm_reset_control_get() instead of reset_control_get() Use the managed variant of reset_control_get() to simplify the failure path and the .remove callback. Signed-off-by: Masahiro Yamada Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/ahb.c | 56 +++++---------------------- 1 file changed, 10 insertions(+), 46 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/ahb.c b/drivers/net/wireless/ath/ath10k/ahb.c index 2de481b48e5d..751b7fed536d 100644 --- a/drivers/net/wireless/ath/ath10k/ahb.c +++ b/drivers/net/wireless/ath/ath10k/ahb.c @@ -191,92 +191,56 @@ static int ath10k_ahb_rst_ctrl_init(struct ath10k *ar) { struct ath10k_ahb *ar_ahb = ath10k_ahb_priv(ar); struct device *dev; - int ret; dev = &ar_ahb->pdev->dev; - ar_ahb->core_cold_rst = reset_control_get(dev, "wifi_core_cold"); + ar_ahb->core_cold_rst = devm_reset_control_get(dev, "wifi_core_cold"); if (IS_ERR_OR_NULL(ar_ahb->core_cold_rst)) { ath10k_err(ar, "failed to get core cold rst ctrl: %ld\n", PTR_ERR(ar_ahb->core_cold_rst)); - ret = ar_ahb->core_cold_rst ? + return ar_ahb->core_cold_rst ? PTR_ERR(ar_ahb->core_cold_rst) : -ENODEV; - goto out; } - ar_ahb->radio_cold_rst = reset_control_get(dev, "wifi_radio_cold"); + ar_ahb->radio_cold_rst = devm_reset_control_get(dev, "wifi_radio_cold"); if (IS_ERR_OR_NULL(ar_ahb->radio_cold_rst)) { ath10k_err(ar, "failed to get radio cold rst ctrl: %ld\n", PTR_ERR(ar_ahb->radio_cold_rst)); - ret = ar_ahb->radio_cold_rst ? + return ar_ahb->radio_cold_rst ? PTR_ERR(ar_ahb->radio_cold_rst) : -ENODEV; - goto err_core_cold_rst_put; } - ar_ahb->radio_warm_rst = reset_control_get(dev, "wifi_radio_warm"); + ar_ahb->radio_warm_rst = devm_reset_control_get(dev, "wifi_radio_warm"); if (IS_ERR_OR_NULL(ar_ahb->radio_warm_rst)) { ath10k_err(ar, "failed to get radio warm rst ctrl: %ld\n", PTR_ERR(ar_ahb->radio_warm_rst)); - ret = ar_ahb->radio_warm_rst ? + return ar_ahb->radio_warm_rst ? PTR_ERR(ar_ahb->radio_warm_rst) : -ENODEV; - goto err_radio_cold_rst_put; } - ar_ahb->radio_srif_rst = reset_control_get(dev, "wifi_radio_srif"); + ar_ahb->radio_srif_rst = devm_reset_control_get(dev, "wifi_radio_srif"); if (IS_ERR_OR_NULL(ar_ahb->radio_srif_rst)) { ath10k_err(ar, "failed to get radio srif rst ctrl: %ld\n", PTR_ERR(ar_ahb->radio_srif_rst)); - ret = ar_ahb->radio_srif_rst ? + return ar_ahb->radio_srif_rst ? PTR_ERR(ar_ahb->radio_srif_rst) : -ENODEV; - goto err_radio_warm_rst_put; } - ar_ahb->cpu_init_rst = reset_control_get(dev, "wifi_cpu_init"); + ar_ahb->cpu_init_rst = devm_reset_control_get(dev, "wifi_cpu_init"); if (IS_ERR_OR_NULL(ar_ahb->cpu_init_rst)) { ath10k_err(ar, "failed to get cpu init rst ctrl: %ld\n", PTR_ERR(ar_ahb->cpu_init_rst)); - ret = ar_ahb->cpu_init_rst ? + return ar_ahb->cpu_init_rst ? PTR_ERR(ar_ahb->cpu_init_rst) : -ENODEV; - goto err_radio_srif_rst_put; } return 0; - -err_radio_srif_rst_put: - reset_control_put(ar_ahb->radio_srif_rst); - -err_radio_warm_rst_put: - reset_control_put(ar_ahb->radio_warm_rst); - -err_radio_cold_rst_put: - reset_control_put(ar_ahb->radio_cold_rst); - -err_core_cold_rst_put: - reset_control_put(ar_ahb->core_cold_rst); - -out: - return ret; } static void ath10k_ahb_rst_ctrl_deinit(struct ath10k *ar) { struct ath10k_ahb *ar_ahb = ath10k_ahb_priv(ar); - if (!IS_ERR_OR_NULL(ar_ahb->core_cold_rst)) - reset_control_put(ar_ahb->core_cold_rst); - - if (!IS_ERR_OR_NULL(ar_ahb->radio_cold_rst)) - reset_control_put(ar_ahb->radio_cold_rst); - - if (!IS_ERR_OR_NULL(ar_ahb->radio_warm_rst)) - reset_control_put(ar_ahb->radio_warm_rst); - - if (!IS_ERR_OR_NULL(ar_ahb->radio_srif_rst)) - reset_control_put(ar_ahb->radio_srif_rst); - - if (!IS_ERR_OR_NULL(ar_ahb->cpu_init_rst)) - reset_control_put(ar_ahb->cpu_init_rst); - ar_ahb->core_cold_rst = NULL; ar_ahb->radio_cold_rst = NULL; ar_ahb->radio_warm_rst = NULL; From 65901a9e70584afb840b1c013d582d07b7f61696 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 26 Sep 2016 21:56:24 +0300 Subject: [PATCH 1579/1715] ath10k: do not check if reset is NULL Since reset_control_get() never returns NULL, we can use IS_ERR() instead of IS_ERR_OR_NULL(). The return statements can be simpler as well. Signed-off-by: Masahiro Yamada Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/ahb.c | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/ahb.c b/drivers/net/wireless/ath/ath10k/ahb.c index 751b7fed536d..fe38b459544d 100644 --- a/drivers/net/wireless/ath/ath10k/ahb.c +++ b/drivers/net/wireless/ath/ath10k/ahb.c @@ -195,43 +195,38 @@ static int ath10k_ahb_rst_ctrl_init(struct ath10k *ar) dev = &ar_ahb->pdev->dev; ar_ahb->core_cold_rst = devm_reset_control_get(dev, "wifi_core_cold"); - if (IS_ERR_OR_NULL(ar_ahb->core_cold_rst)) { + if (IS_ERR(ar_ahb->core_cold_rst)) { ath10k_err(ar, "failed to get core cold rst ctrl: %ld\n", PTR_ERR(ar_ahb->core_cold_rst)); - return ar_ahb->core_cold_rst ? - PTR_ERR(ar_ahb->core_cold_rst) : -ENODEV; + return PTR_ERR(ar_ahb->core_cold_rst); } ar_ahb->radio_cold_rst = devm_reset_control_get(dev, "wifi_radio_cold"); - if (IS_ERR_OR_NULL(ar_ahb->radio_cold_rst)) { + if (IS_ERR(ar_ahb->radio_cold_rst)) { ath10k_err(ar, "failed to get radio cold rst ctrl: %ld\n", PTR_ERR(ar_ahb->radio_cold_rst)); - return ar_ahb->radio_cold_rst ? - PTR_ERR(ar_ahb->radio_cold_rst) : -ENODEV; + return PTR_ERR(ar_ahb->radio_cold_rst); } ar_ahb->radio_warm_rst = devm_reset_control_get(dev, "wifi_radio_warm"); - if (IS_ERR_OR_NULL(ar_ahb->radio_warm_rst)) { + if (IS_ERR(ar_ahb->radio_warm_rst)) { ath10k_err(ar, "failed to get radio warm rst ctrl: %ld\n", PTR_ERR(ar_ahb->radio_warm_rst)); - return ar_ahb->radio_warm_rst ? - PTR_ERR(ar_ahb->radio_warm_rst) : -ENODEV; + return PTR_ERR(ar_ahb->radio_warm_rst); } ar_ahb->radio_srif_rst = devm_reset_control_get(dev, "wifi_radio_srif"); - if (IS_ERR_OR_NULL(ar_ahb->radio_srif_rst)) { + if (IS_ERR(ar_ahb->radio_srif_rst)) { ath10k_err(ar, "failed to get radio srif rst ctrl: %ld\n", PTR_ERR(ar_ahb->radio_srif_rst)); - return ar_ahb->radio_srif_rst ? - PTR_ERR(ar_ahb->radio_srif_rst) : -ENODEV; + return PTR_ERR(ar_ahb->radio_srif_rst); } ar_ahb->cpu_init_rst = devm_reset_control_get(dev, "wifi_cpu_init"); - if (IS_ERR_OR_NULL(ar_ahb->cpu_init_rst)) { + if (IS_ERR(ar_ahb->cpu_init_rst)) { ath10k_err(ar, "failed to get cpu init rst ctrl: %ld\n", PTR_ERR(ar_ahb->cpu_init_rst)); - return ar_ahb->cpu_init_rst ? - PTR_ERR(ar_ahb->cpu_init_rst) : -ENODEV; + return PTR_ERR(ar_ahb->cpu_init_rst); } return 0; From 2f38c3c01de945234d23dd163e3528ccb413066d Mon Sep 17 00:00:00 2001 From: Vasanthakumar Thiagarajan Date: Mon, 26 Sep 2016 21:56:24 +0300 Subject: [PATCH 1580/1715] ath10k: fix rfc1042 header retrieval in QCA4019 with eth decap mode Chipset from QCA99X0 onwards (QCA99X0, QCA9984, QCA4019 & future) rx_hdr_status is not padded to align in 4-byte boundary. Define a new hw_params field to handle different alignment behaviour between different hw. This patch fixes improper retrieval of rfc1042 header with QCA4019. This patch along with "ath10k: Properly remove padding from the start of rx payload" will fix traffic failure in ethernet decap mode for QCA4019. Signed-off-by: Vasanthakumar Thiagarajan Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/core.c | 12 ++++++++++++ drivers/net/wireless/ath/ath10k/htt_rx.c | 5 +++-- drivers/net/wireless/ath/ath10k/hw.h | 3 +++ 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index 3a8984ba9f74..98af0053d30d 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -68,6 +68,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_ext_size = QCA988X_BOARD_EXT_DATA_SZ, }, .hw_ops = &qca988x_ops, + .decap_align_bytes = 4, }, { .id = QCA9887_HW_1_0_VERSION, @@ -87,6 +88,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_ext_size = QCA9887_BOARD_EXT_DATA_SZ, }, .hw_ops = &qca988x_ops, + .decap_align_bytes = 4, }, { .id = QCA6174_HW_2_1_VERSION, @@ -105,6 +107,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_ext_size = QCA6174_BOARD_EXT_DATA_SZ, }, .hw_ops = &qca988x_ops, + .decap_align_bytes = 4, }, { .id = QCA6174_HW_2_1_VERSION, @@ -123,6 +126,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_ext_size = QCA6174_BOARD_EXT_DATA_SZ, }, .hw_ops = &qca988x_ops, + .decap_align_bytes = 4, }, { .id = QCA6174_HW_3_0_VERSION, @@ -141,6 +145,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_ext_size = QCA6174_BOARD_EXT_DATA_SZ, }, .hw_ops = &qca988x_ops, + .decap_align_bytes = 4, }, { .id = QCA6174_HW_3_2_VERSION, @@ -160,6 +165,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_ext_size = QCA6174_BOARD_EXT_DATA_SZ, }, .hw_ops = &qca988x_ops, + .decap_align_bytes = 4, }, { .id = QCA99X0_HW_2_0_DEV_VERSION, @@ -184,6 +190,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { }, .sw_decrypt_mcast_mgmt = true, .hw_ops = &qca99x0_ops, + .decap_align_bytes = 1, }, { .id = QCA9984_HW_1_0_DEV_VERSION, @@ -208,6 +215,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { }, .sw_decrypt_mcast_mgmt = true, .hw_ops = &qca99x0_ops, + .decap_align_bytes = 1, }, { .id = QCA9888_HW_2_0_DEV_VERSION, @@ -231,6 +239,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { }, .sw_decrypt_mcast_mgmt = true, .hw_ops = &qca99x0_ops, + .decap_align_bytes = 1, }, { .id = QCA9377_HW_1_0_DEV_VERSION, @@ -249,6 +258,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_ext_size = QCA9377_BOARD_EXT_DATA_SZ, }, .hw_ops = &qca988x_ops, + .decap_align_bytes = 4, }, { .id = QCA9377_HW_1_1_DEV_VERSION, @@ -267,6 +277,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_ext_size = QCA9377_BOARD_EXT_DATA_SZ, }, .hw_ops = &qca988x_ops, + .decap_align_bytes = 4, }, { .id = QCA4019_HW_1_0_DEV_VERSION, @@ -292,6 +303,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { }, .sw_decrypt_mcast_mgmt = true, .hw_ops = &qca99x0_ops, + .decap_align_bytes = 1, }, }; diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c index a3785a9aa843..0b4c1562420f 100644 --- a/drivers/net/wireless/ath/ath10k/htt_rx.c +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c @@ -1103,6 +1103,7 @@ static void *ath10k_htt_rx_h_find_rfc1042(struct ath10k *ar, size_t hdr_len, crypto_len; void *rfc1042; bool is_first, is_last, is_amsdu; + int bytes_aligned = ar->hw_params.decap_align_bytes; rxd = (void *)msdu->data - sizeof(*rxd); hdr = (void *)rxd->rx_hdr_status; @@ -1119,8 +1120,8 @@ static void *ath10k_htt_rx_h_find_rfc1042(struct ath10k *ar, hdr_len = ieee80211_hdrlen(hdr->frame_control); crypto_len = ath10k_htt_rx_crypto_param_len(ar, enctype); - rfc1042 += round_up(hdr_len, 4) + - round_up(crypto_len, 4); + rfc1042 += round_up(hdr_len, bytes_aligned) + + round_up(crypto_len, bytes_aligned); } if (is_amsdu) diff --git a/drivers/net/wireless/ath/ath10k/hw.h b/drivers/net/wireless/ath/ath10k/hw.h index 4a01bcff49a7..6038b7486f1d 100644 --- a/drivers/net/wireless/ath/ath10k/hw.h +++ b/drivers/net/wireless/ath/ath10k/hw.h @@ -408,6 +408,9 @@ struct ath10k_hw_params { bool sw_decrypt_mcast_mgmt; const struct ath10k_hw_ops *hw_ops; + + /* Number of bytes used for alignment in rx_hdr_status of rx desc. */ + int decap_align_bytes; }; struct htt_rx_desc; From aa66ba0c31c69d13e5a59096a67775b776dccbec Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Mon, 26 Sep 2016 21:56:25 +0300 Subject: [PATCH 1581/1715] ath10k: fix typo in logging message Signed-off-by: Ben Greear Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/mac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 0a44dab5a287..76297d69f1ed 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -2793,7 +2793,7 @@ static void ath10k_bss_disassoc(struct ieee80211_hw *hw, ret = ath10k_wmi_vdev_down(ar, arvif->vdev_id); if (ret) - ath10k_warn(ar, "faield to down vdev %i: %d\n", + ath10k_warn(ar, "failed to down vdev %i: %d\n", arvif->vdev_id, ret); arvif->def_wep_key_idx = -1; From 15138fdf327da6132b6e00e3d8c083476eb9aea2 Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Mon, 26 Sep 2016 21:56:26 +0300 Subject: [PATCH 1582/1715] ath10k: document cycle count related counters They are not necessarily named in an intuitive manner, so at least add some comments to help the next person. Signed-off-by: Ben Greear Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/core.h | 8 ++++---- drivers/net/wireless/ath/ath10k/wmi.h | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h index 6ec9495bcc04..dda49af1eb74 100644 --- a/drivers/net/wireless/ath/ath10k/core.h +++ b/drivers/net/wireless/ath/ath10k/core.h @@ -201,10 +201,10 @@ struct ath10k_fw_stats_pdev { /* PDEV stats */ s32 ch_noise_floor; - u32 tx_frame_count; - u32 rx_frame_count; - u32 rx_clear_count; - u32 cycle_count; + u32 tx_frame_count; /* Cycles spent transmitting frames */ + u32 rx_frame_count; /* Cycles spent receiving frames */ + u32 rx_clear_count; /* Total channel busy time, evidently */ + u32 cycle_count; /* Total on-channel time */ u32 phy_err_count; u32 chan_tx_power; u32 ack_rx_bad; diff --git a/drivers/net/wireless/ath/ath10k/wmi.h b/drivers/net/wireless/ath/ath10k/wmi.h index 6a68817b90b8..1b243c899bef 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.h +++ b/drivers/net/wireless/ath/ath10k/wmi.h @@ -4222,10 +4222,10 @@ struct wmi_10_2_stats_event { */ struct wmi_pdev_stats_base { __le32 chan_nf; - __le32 tx_frame_count; - __le32 rx_frame_count; - __le32 rx_clear_count; - __le32 cycle_count; + __le32 tx_frame_count; /* Cycles spent transmitting frames */ + __le32 rx_frame_count; /* Cycles spent receiving frames */ + __le32 rx_clear_count; /* Total channel busy time, evidently */ + __le32 cycle_count; /* Total on-channel time */ __le32 phy_err_count; __le32 chan_tx_pwr; } __packed; From 30d2049b3277cdf6964996f86626add08cf160df Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Mon, 26 Sep 2016 21:56:26 +0300 Subject: [PATCH 1583/1715] ath10k: support up to 64 vdevs The (1 << x) - 1 trick won't work when you are trying to fill up all 64 bits, so add special case for that. Signed-off-by: Ben Greear [kvalo@qca.qualcomm.com: remove the sentence about moving limits] Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index 98af0053d30d..21ae8d663e67 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -1972,7 +1972,10 @@ int ath10k_core_start(struct ath10k *ar, enum ath10k_firmware_mode mode, goto err_hif_stop; } - ar->free_vdev_map = (1LL << ar->max_num_vdevs) - 1; + if (ar->max_num_vdevs >= 64) + ar->free_vdev_map = 0xFFFFFFFFFFFFFFFFLL; + else + ar->free_vdev_map = (1LL << ar->max_num_vdevs) - 1; INIT_LIST_HEAD(&ar->arvifs); From 6bf563d50af2de34024bde0de1733d4028f9400c Mon Sep 17 00:00:00 2001 From: Nelson Chang Date: Mon, 26 Sep 2016 14:33:49 +0800 Subject: [PATCH 1584/1715] net: ethernet: mediatek: add to stop PDMA while stopping the frame engine Stop PDMA while the frame engine is going to stop. Signed-off-by: Nelson Chang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 4cc50c03c12b..62de68dac036 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1784,6 +1784,7 @@ static int mtk_stop(struct net_device *dev) napi_disable(ð->rx_napi); mtk_stop_dma(eth, MTK_QDMA_GLO_CFG); + mtk_stop_dma(eth, MTK_PDMA_GLO_CFG); mtk_dma_free(eth); From ca3ba106a9979dd21988cec296462b4f49b37ace Mon Sep 17 00:00:00 2001 From: Nelson Chang Date: Mon, 26 Sep 2016 14:33:50 +0800 Subject: [PATCH 1585/1715] net: ethernet: mediatek: bug fix to disable HW LRO (1) Modify the register settings for LRO relinquishments (2) Jump out from the waiting loop while LRO relinquishments are done Signed-off-by: Nelson Chang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 1 + drivers/net/ethernet/mediatek/mtk_eth_soc.h | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 62de68dac036..ddf20a00654e 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1374,6 +1374,7 @@ static void mtk_hwlro_rx_uninit(struct mtk_eth *eth) msleep(20); continue; } + break; } /* invalidate lro rings */ diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index 7e194f795c9a..30031959d6de 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -102,8 +102,8 @@ #define MTK_LRO_EN BIT(0) #define MTK_L3_CKS_UPD_EN BIT(7) #define MTK_LRO_ALT_PKT_CNT_MODE BIT(21) -#define MTK_LRO_RING_RELINQUISH_REQ (0x3 << 26) -#define MTK_LRO_RING_RELINQUISH_DONE (0x3 << 29) +#define MTK_LRO_RING_RELINQUISH_REQ (0x7 << 26) +#define MTK_LRO_RING_RELINQUISH_DONE (0x7 << 29) #define MTK_PDMA_LRO_CTRL_DW1 0x984 #define MTK_PDMA_LRO_CTRL_DW2 0x988 From e96e0eded1335b9cfac71fcdd989d682eb3f8412 Mon Sep 17 00:00:00 2001 From: "jbaron@akamai.com" Date: Mon, 26 Sep 2016 11:00:44 -0400 Subject: [PATCH 1586/1715] bnx2x: free the mac filter group list before freeing the cmd The group list must be freed prior to freeing the command otherwise we have a use-after-free. Signed-off-by: Jason Baron Cc: Yuval Mintz Cc: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c index 4947a9cbf0c1..cea6bdcde33f 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c @@ -2714,8 +2714,8 @@ static int bnx2x_mcast_enqueue_cmd(struct bnx2x *bp, elem_group = (struct bnx2x_mcast_elem_group *) __get_free_page(GFP_ATOMIC | __GFP_ZERO); if (!elem_group) { - kfree(new_cmd); bnx2x_free_groups(&new_cmd->group_head); + kfree(new_cmd); return -ENOMEM; } total_elems -= MCAST_MAC_ELEMS_PER_PG; From ad5748893b27b9b27b2deb597443ad6702719c20 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 23 Sep 2016 11:27:19 -0700 Subject: [PATCH 1587/1715] rtlwifi: Add switch variable to 'switch case not processed' messages Help along debugging by showing what switch/case variable is not being processed in these messages. Signed-off-by: Joe Perches Acked-by: Larry Finger Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/core.c | 3 ++- drivers/net/wireless/realtek/rtlwifi/pci.c | 3 ++- drivers/net/wireless/realtek/rtlwifi/ps.c | 2 +- .../wireless/realtek/rtlwifi/rtl8188ee/fw.c | 4 ++-- .../wireless/realtek/rtlwifi/rtl8188ee/hw.c | 9 +++++---- .../wireless/realtek/rtlwifi/rtl8188ee/led.c | 4 ++-- .../wireless/realtek/rtlwifi/rtl8188ee/phy.c | 10 ++++++---- .../realtek/rtlwifi/rtl8192c/fw_common.c | 4 ++-- .../realtek/rtlwifi/rtl8192c/phy_common.c | 8 +++++--- .../wireless/realtek/rtlwifi/rtl8192ce/hw.c | 7 ++++--- .../wireless/realtek/rtlwifi/rtl8192ce/led.c | 4 ++-- .../wireless/realtek/rtlwifi/rtl8192ce/phy.c | 7 ++----- .../wireless/realtek/rtlwifi/rtl8192cu/hw.c | 4 ++-- .../wireless/realtek/rtlwifi/rtl8192cu/led.c | 4 ++-- .../wireless/realtek/rtlwifi/rtl8192cu/phy.c | 7 ++----- .../wireless/realtek/rtlwifi/rtl8192de/fw.c | 4 ++-- .../wireless/realtek/rtlwifi/rtl8192de/hw.c | 9 +++++---- .../wireless/realtek/rtlwifi/rtl8192de/led.c | 4 ++-- .../wireless/realtek/rtlwifi/rtl8192de/phy.c | 15 +++++++------- .../wireless/realtek/rtlwifi/rtl8192ee/fw.c | 4 ++-- .../wireless/realtek/rtlwifi/rtl8192ee/hw.c | 9 +++++---- .../wireless/realtek/rtlwifi/rtl8192ee/led.c | 4 ++-- .../wireless/realtek/rtlwifi/rtl8192ee/phy.c | 10 ++++++---- .../wireless/realtek/rtlwifi/rtl8192se/hw.c | 9 +++++---- .../wireless/realtek/rtlwifi/rtl8192se/led.c | 4 ++-- .../wireless/realtek/rtlwifi/rtl8192se/phy.c | 5 +++-- .../wireless/realtek/rtlwifi/rtl8723ae/fw.c | 4 ++-- .../wireless/realtek/rtlwifi/rtl8723ae/hw.c | 9 +++++---- .../wireless/realtek/rtlwifi/rtl8723ae/led.c | 4 ++-- .../wireless/realtek/rtlwifi/rtl8723ae/phy.c | 10 ++++++---- .../wireless/realtek/rtlwifi/rtl8723be/fw.c | 4 ++-- .../wireless/realtek/rtlwifi/rtl8723be/hw.c | 10 +++++----- .../wireless/realtek/rtlwifi/rtl8723be/led.c | 4 ++-- .../wireless/realtek/rtlwifi/rtl8723be/phy.c | 12 ++++++----- .../wireless/realtek/rtlwifi/rtl8821ae/fw.c | 4 ++-- .../wireless/realtek/rtlwifi/rtl8821ae/hw.c | 9 +++++---- .../wireless/realtek/rtlwifi/rtl8821ae/led.c | 4 ++-- .../wireless/realtek/rtlwifi/rtl8821ae/phy.c | 20 ++++++------------- 38 files changed, 128 insertions(+), 123 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c index 7aee5ebb147d..f95760c13c56 100644 --- a/drivers/net/wireless/realtek/rtlwifi/core.c +++ b/drivers/net/wireless/realtek/rtlwifi/core.c @@ -765,7 +765,8 @@ static int rtl_op_config(struct ieee80211_hw *hw, u32 changed) mac->bw_40 = false; mac->bw_80 = false; RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", + channel_type); break; } } diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.c b/drivers/net/wireless/realtek/rtlwifi/pci.c index d12586d4f845..0dfa9eac3926 100644 --- a/drivers/net/wireless/realtek/rtlwifi/pci.c +++ b/drivers/net/wireless/realtek/rtlwifi/pci.c @@ -179,7 +179,8 @@ static void _rtl_pci_update_default_setting(struct ieee80211_hw *hw) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", + rtlpci->const_support_pciaspm); break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/ps.c b/drivers/net/wireless/realtek/rtlwifi/ps.c index 9a64f9b703e5..18d979affc18 100644 --- a/drivers/net/wireless/realtek/rtlwifi/ps.c +++ b/drivers/net/wireless/realtek/rtlwifi/ps.c @@ -151,7 +151,7 @@ static bool rtl_ps_set_rf_state(struct ieee80211_hw *hw, default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", state_toset); break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/fw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/fw.c index 629125658b87..5360d5332359 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/fw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/fw.c @@ -334,7 +334,7 @@ static void _rtl88e_fill_h2c_command(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", boxnum); break; } isfw_read = _rtl88e_check_fw_read_last_h2c(hw, boxnum); @@ -405,7 +405,7 @@ static void _rtl88e_fill_h2c_command(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", cmd_len); break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c index 4ab6201daf1a..3285117845f5 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c @@ -357,7 +357,7 @@ void rtl88ee_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; } default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process %x\n", variable); + "switch case %#x not processed\n", variable); break; } } @@ -571,7 +571,8 @@ void rtl88ee_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); + "switch case %#x not processed\n", + e_aci); break; } } @@ -735,7 +736,7 @@ void rtl88ee_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; } default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process %x\n", variable); + "switch case %#x not processed\n", variable); break; } } @@ -2352,7 +2353,7 @@ void rtl88ee_set_key(struct ieee80211_hw *hw, u32 key_index, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); + "switch case %#x not processed\n", enc_algo); enc_algo = CAM_TKIP; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/led.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/led.c index b504bd092fc4..f05c2c674165 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/led.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/led.c @@ -62,7 +62,7 @@ void rtl88ee_sw_led_on(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = true; @@ -100,7 +100,7 @@ void rtl88ee_sw_led_off(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = false; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/phy.c index 7498a1218cba..fffaa92eda81 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/phy.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/phy.c @@ -1346,7 +1346,8 @@ static bool _rtl88e_phy_sw_chnl_step_by_step(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", + currentcmd->cmdid); break; } @@ -2128,7 +2129,7 @@ bool rtl88e_phy_set_io_cmd(struct ieee80211_hw *hw, enum io_type iotype) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", iotype); break; } } while (false); @@ -2166,7 +2167,8 @@ static void rtl88e_phy_set_io(struct ieee80211_hw *hw) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", + rtlphy->current_io_type); break; } rtlphy->set_io_inprogress = false; @@ -2319,7 +2321,7 @@ static bool _rtl88ee_phy_set_rf_power_state(struct ieee80211_hw *hw, } default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", rfpwr_state); bresult = false; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192c/fw_common.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192c/fw_common.c index 43fcb25c885f..7d152466152b 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192c/fw_common.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192c/fw_common.c @@ -352,7 +352,7 @@ static void _rtl92c_fill_h2c_command(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", boxnum); break; } @@ -456,7 +456,7 @@ static void _rtl92c_fill_h2c_command(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", cmd_len); break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192c/phy_common.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192c/phy_common.c index 60ab2ec4f4ef..27e3d5f9ca34 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192c/phy_common.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192c/phy_common.c @@ -910,7 +910,8 @@ bool _rtl92c_phy_sw_chnl_step_by_step(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", + currentcmd->cmdid); break; } @@ -1567,7 +1568,7 @@ bool rtl92c_phy_set_io_cmd(struct ieee80211_hw *hw, enum io_type iotype) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", iotype); break; } } while (false); @@ -1605,7 +1606,8 @@ void rtl92c_phy_set_io(struct ieee80211_hw *hw) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", + rtlphy->current_io_type); break; } rtlphy->set_io_inprogress = false; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c index 244607951e28..6d308f9b7ff9 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c @@ -143,7 +143,7 @@ void rtl92ce_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) } default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", variable); break; } } @@ -367,7 +367,8 @@ void rtl92ce_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", + e_aci); break; } } @@ -2154,7 +2155,7 @@ void rtl92ce_set_key(struct ieee80211_hw *hw, u32 key_index, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", enc_algo); enc_algo = CAM_TKIP; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/led.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/led.c index 8283e9b27639..24e483ba3fa4 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/led.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/led.c @@ -62,7 +62,7 @@ void rtl92ce_sw_led_on(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = true; @@ -97,7 +97,7 @@ void rtl92ce_sw_led_off(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = false; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/phy.c index 1ee5a6ae9960..46d0d945f283 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/phy.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/phy.c @@ -300,12 +300,9 @@ bool rtl92c_phy_config_rf_with_headerfile(struct ieee80211_hw *hw, } break; case RF90_PATH_C: - RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); - break; case RF90_PATH_D: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", rfpath); break; default: break; @@ -554,7 +551,7 @@ static bool _rtl92ce_phy_set_rf_power_state(struct ieee80211_hw *hw, } default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", rfpwr_state); bresult = false; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c index 8789752f8143..ae8f055483fa 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c @@ -1560,7 +1560,7 @@ void rtl92cu_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", variable); break; } } @@ -1931,7 +1931,7 @@ void rtl92cu_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) } default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", variable); break; } } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/led.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/led.c index 75a2deb23af1..8514ab652520 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/led.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/led.c @@ -62,7 +62,7 @@ void rtl92cu_sw_led_on(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = true; @@ -95,7 +95,7 @@ void rtl92cu_sw_led_off(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = false; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/phy.c index c972fa50926d..4b2976465905 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/phy.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/phy.c @@ -277,12 +277,9 @@ bool rtl92cu_phy_config_rf_with_headerfile(struct ieee80211_hw *hw, } break; case RF90_PATH_C: - RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); - break; case RF90_PATH_D: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", rfpath); break; default: break; @@ -517,7 +514,7 @@ static bool _rtl92cu_phy_set_rf_power_state(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", rfpwr_state); bresult = false; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/fw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/fw.c index 62ef8209718f..8de29cc3ced0 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/fw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/fw.c @@ -435,7 +435,7 @@ static void _rtl92d_fill_h2c_command(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", boxnum); break; } isfw_read = _rtl92d_check_fw_read_last_h2c(hw, boxnum); @@ -512,7 +512,7 @@ static void _rtl92d_fill_h2c_command(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", cmd_len); break; } bwrite_success = true; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c index 57205514801c..5369011914bb 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c @@ -166,7 +166,7 @@ void rtl92de_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", variable); break; } } @@ -361,7 +361,8 @@ void rtl92de_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", + e_aci); break; } } @@ -502,7 +503,7 @@ void rtl92de_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) } default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", variable); break; } } @@ -2171,7 +2172,7 @@ void rtl92de_set_key(struct ieee80211_hw *hw, u32 key_index, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", enc_algo); enc_algo = CAM_TKIP; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/led.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/led.c index 76a57ae4af3e..811ba57eb9bb 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/led.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/led.c @@ -71,7 +71,7 @@ void rtl92de_sw_led_on(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = true; @@ -106,7 +106,7 @@ void rtl92de_sw_led_off(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = false; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.c index 2a4810d32182..2a1edfd21b96 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.c @@ -836,12 +836,9 @@ bool rtl92d_phy_config_rf_with_headerfile(struct ieee80211_hw *hw, } break; case RF90_PATH_C: - RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); - break; case RF90_PATH_D: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", rfpath); break; } return true; @@ -2850,7 +2847,8 @@ static bool _rtl92d_phy_sw_chnl_step_by_step(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", + currentcmd->cmdid); break; } break; @@ -2963,7 +2961,8 @@ static void rtl92d_phy_set_io(struct ieee80211_hw *hw) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", + rtlphy->current_io_type); break; } rtlphy->set_io_inprogress = false; @@ -2994,7 +2993,7 @@ bool rtl92d_phy_set_io_cmd(struct ieee80211_hw *hw, enum io_type iotype) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", iotype); break; } } while (false); @@ -3182,7 +3181,7 @@ bool rtl92d_phy_set_rf_power_state(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", rfpwr_state); bresult = false; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/fw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/fw.c index 0708eedd9671..b3f6a9ed15d4 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/fw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/fw.c @@ -344,7 +344,7 @@ static void _rtl92ee_fill_h2c_command(struct ieee80211_hw *hw, u8 element_id, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", boxnum); break; } @@ -433,7 +433,7 @@ static void _rtl92ee_fill_h2c_command(struct ieee80211_hw *hw, u8 element_id, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", cmd_len); break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c index b07af8d15273..47bb6d8c8912 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c @@ -340,7 +340,7 @@ void rtl92ee_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_DMESG, - "switch case not process %x\n", variable); + "switch case %#x not processed\n", variable); break; } } @@ -566,7 +566,8 @@ void rtl92ee_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_DMESG, - "switch case not process\n"); + "switch case %#x not processed\n", + e_aci); break; } } @@ -685,7 +686,7 @@ void rtl92ee_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_DMESG, - "switch case not process %x\n", variable); + "switch case %#x not processed\n", variable); break; } } @@ -2463,7 +2464,7 @@ void rtl92ee_set_key(struct ieee80211_hw *hw, u32 key_index, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_DMESG, - "switch case not process\n"); + "switch case %#x not processed\n", enc_algo); enc_algo = CAM_TKIP; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/led.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/led.c index 8388e371c8e2..47da05dd3076 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/led.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/led.c @@ -61,7 +61,7 @@ void rtl92ee_sw_led_on(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = true; @@ -91,7 +91,7 @@ void rtl92ee_sw_led_off(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = false; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/phy.c index beafc9a10ad8..5ad7e753c357 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/phy.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/phy.c @@ -1927,7 +1927,8 @@ static bool _rtl92ee_phy_sw_chnl_step_by_step(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", + currentcmd->cmdid); break; } @@ -3001,7 +3002,7 @@ bool rtl92ee_phy_set_io_cmd(struct ieee80211_hw *hw, enum io_type iotype) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", iotype); break; } } while (false); @@ -3041,7 +3042,8 @@ static void rtl92ee_phy_set_io(struct ieee80211_hw *hw) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", + rtlphy->current_io_type); break; } rtlphy->set_io_inprogress = false; @@ -3187,7 +3189,7 @@ static bool _rtl92ee_phy_set_rf_power_state(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", rfpwr_state); bresult = false; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c index ddfa0aee5bf8..5bad9c9ef609 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c @@ -79,7 +79,7 @@ void rtl92se_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) } default: { RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", variable); break; } } @@ -297,7 +297,8 @@ void rtl92se_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", + e_aci); break; } } @@ -433,7 +434,7 @@ void rtl92se_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; } default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", variable); break; } @@ -2465,7 +2466,7 @@ void rtl92se_set_key(struct ieee80211_hw *hw, u32 key_index, u8 *p_macaddr, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", enc_algo); enc_algo = CAM_TKIP; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/led.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/led.c index 44949b5cbb87..9849cb988186 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/led.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/led.c @@ -68,7 +68,7 @@ void rtl92se_sw_led_on(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = true; @@ -104,7 +104,7 @@ void rtl92se_sw_led_off(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = false; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/phy.c index 881821f4e243..4bb75581ab38 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/phy.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/phy.c @@ -442,7 +442,8 @@ static bool _rtl92s_phy_sw_chnl_step_by_step(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", + currentcmd->cmdid); break; } @@ -648,7 +649,7 @@ bool rtl92s_phy_set_rf_power_state(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", rfpwr_state); bresult = false; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/fw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/fw.c index b7c0d38ee5b5..1186755e55b8 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/fw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/fw.c @@ -124,7 +124,7 @@ static void _rtl8723e_fill_h2c_command(struct ieee80211_hw *hw, u8 element_id, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); + "switch case %#x not processed\n", boxnum); break; } @@ -230,7 +230,7 @@ static void _rtl8723e_fill_h2c_command(struct ieee80211_hw *hw, u8 element_id, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); + "switch case %#x not processed\n", cmd_len); break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c index ba30efc2d195..2bf603b4e12c 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c @@ -143,7 +143,7 @@ void rtl8723e_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) } default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", variable); break; } } @@ -366,7 +366,8 @@ void rtl8723e_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", + e_aci); break; } } @@ -546,7 +547,7 @@ void rtl8723e_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) } default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", variable); break; } } @@ -2225,7 +2226,7 @@ void rtl8723e_set_key(struct ieee80211_hw *hw, u32 key_index, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", enc_algo); enc_algo = CAM_TKIP; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/led.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/led.c index 13173351cbfd..c7be9342136c 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/led.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/led.c @@ -63,7 +63,7 @@ void rtl8723e_sw_led_on(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = true; @@ -105,7 +105,7 @@ void rtl8723e_sw_led_off(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = false; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/phy.c index 601b78efedfb..17b58cb32d55 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/phy.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/phy.c @@ -1023,7 +1023,8 @@ static bool _rtl8723e_phy_sw_chnl_step_by_step(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", + currentcmd->cmdid); break; } @@ -1499,7 +1500,7 @@ bool rtl8723e_phy_set_io_cmd(struct ieee80211_hw *hw, enum io_type iotype) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", iotype); break; } } while (false); @@ -1536,7 +1537,8 @@ static void rtl8723e_phy_set_io(struct ieee80211_hw *hw) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", + rtlphy->current_io_type); break; } rtlphy->set_io_inprogress = false; @@ -1682,7 +1684,7 @@ static bool _rtl8723e_phy_set_rf_power_state(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", rfpwr_state); bresult = false; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/fw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/fw.c index d5da0f3c1217..8c5c27ce8e05 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/fw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/fw.c @@ -122,7 +122,7 @@ static void _rtl8723be_fill_h2c_command(struct ieee80211_hw *hw, u8 element_id, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); + "switch case %#x not processed\n", boxnum); break; } @@ -195,7 +195,7 @@ static void _rtl8723be_fill_h2c_command(struct ieee80211_hw *hw, u8 element_id, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); + "switch case %#x not processed\n", cmd_len); break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c index 82e4476cab23..999c1ac3ee57 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c @@ -350,7 +350,7 @@ void rtl8723be_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process %x\n", variable); + "switch case %#x not processed\n", variable); break; } } @@ -607,7 +607,8 @@ void rtl8723be_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", + e_aci); break; } } @@ -723,8 +724,7 @@ void rtl8723be_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process %x\n", - variable); + "switch case %#x not processed\n", variable); break; } } @@ -2565,7 +2565,7 @@ void rtl8723be_set_key(struct ieee80211_hw *hw, u32 key_index, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", enc_algo); enc_algo = CAM_TKIP; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/led.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/led.c index 4196efb723a2..497913eb3b37 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/led.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/led.c @@ -58,7 +58,7 @@ void rtl8723be_sw_led_on(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = true; @@ -100,7 +100,7 @@ void rtl8723be_sw_led_off(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = false; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/phy.c index 285818df149b..3cc2232f25ca 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/phy.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/phy.c @@ -837,7 +837,7 @@ bool rtl8723be_phy_config_rf_with_headerfile(struct ieee80211_hw *hw, break; case RF90_PATH_D: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", rfpath); break; } return true; @@ -1507,7 +1507,8 @@ static bool _rtl8723be_phy_sw_chnl_step_by_step(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", + currentcmd->cmdid); break; } @@ -2515,7 +2516,7 @@ bool rtl8723be_phy_set_io_cmd(struct ieee80211_hw *hw, enum io_type iotype) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", iotype); break; } } while (false); @@ -2553,7 +2554,8 @@ static void rtl8723be_phy_set_io(struct ieee80211_hw *hw) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", + rtlphy->current_io_type); break; } rtlphy->set_io_inprogress = false; @@ -2705,7 +2707,7 @@ static bool _rtl8723be_phy_set_rf_power_state(struct ieee80211_hw *hw, default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", rfpwr_state); bresult = false; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/fw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/fw.c index a4fc70e8c9c0..b665446351a4 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/fw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/fw.c @@ -392,7 +392,7 @@ static void _rtl8821ae_fill_h2c_command(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", boxnum); break; } @@ -481,7 +481,7 @@ static void _rtl8821ae_fill_h2c_command(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", cmd_len); break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c index 0cddf1ad0fff..1281ebe0c30a 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c @@ -480,7 +480,7 @@ void rtl8821ae_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process %x\n", variable); + "switch case %#x not processed\n", variable); break; } } @@ -671,7 +671,8 @@ void rtl8821ae_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", + e_aci); break; } } @@ -800,7 +801,7 @@ void rtl8821ae_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; } default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process %x\n", variable); + "switch case %#x not processed\n", variable); break; } } @@ -3934,7 +3935,7 @@ void rtl8821ae_set_key(struct ieee80211_hw *hw, u32 key_index, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", enc_algo); enc_algo = CAM_TKIP; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/led.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/led.c index ba1946a0280e..fcb3b28c6b8f 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/led.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/led.c @@ -60,7 +60,7 @@ void rtl8821ae_sw_led_on(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = true; @@ -133,7 +133,7 @@ void rtl8821ae_sw_led_off(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = false; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/phy.c index a71bfe38e7e1..5dad402171c2 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/phy.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/phy.c @@ -2063,12 +2063,9 @@ bool rtl8812ae_phy_config_rf_with_headerfile(struct ieee80211_hw *hw, } break; case RF90_PATH_C: - RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); - break; case RF90_PATH_D: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); + "switch case %#x not processed\n", rfpath); break; } return true; @@ -2133,16 +2130,10 @@ bool rtl8821ae_phy_config_rf_with_headerfile(struct ieee80211_hw *hw, break; case RF90_PATH_B: - RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); - break; case RF90_PATH_C: - RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); - break; case RF90_PATH_D: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); + "switch case %#x not processed\n", rfpath); break; } return true; @@ -4670,7 +4661,7 @@ bool rtl8821ae_phy_set_io_cmd(struct ieee80211_hw *hw, enum io_type iotype) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); + "switch case %#x not processed\n", iotype); break; } } while (false); @@ -4714,7 +4705,8 @@ static void rtl8821ae_phy_set_io(struct ieee80211_hw *hw) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); + "switch case %#x not processed\n", + rtlphy->current_io_type); break; } rtlphy->set_io_inprogress = false; @@ -4820,7 +4812,7 @@ static bool _rtl8821ae_phy_set_rf_power_state(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); + "switch case %#x not processed\n", rfpwr_state); bresult = false; break; } From 1cc49a5b5466e30a26700af3ac9d85c8ebb1b936 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Sat, 24 Sep 2016 11:57:18 -0500 Subject: [PATCH 1588/1715] rtlwifi: Add HAL_DEF_WOWLAN case to *_get_hw() routines Only rtl8821ae implements WOWLAN; however, the other drivers may receive a call requesting information about this mode. The other drivers need to ignore the request rather than logging that the default branch of the switch statement has been reached. Reported by: Jean Delvare Signed-off-by: Larry Finger Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c | 2 ++ drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c | 2 ++ drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c | 2 ++ drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c | 2 ++ drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c | 2 ++ drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c | 2 ++ drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c | 2 ++ 7 files changed, 14 insertions(+) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c index 3285117845f5..37d6efc3d240 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c @@ -355,6 +355,8 @@ void rtl88ee_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) *((u64 *)(val)) = tsf; break; } + case HAL_DEF_WOWLAN: + break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, "switch case %#x not processed\n", variable); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c index 6d308f9b7ff9..a47be73a0980 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c @@ -141,6 +141,8 @@ void rtl92ce_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; } + case HAL_DEF_WOWLAN: + break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, "switch case %#x not processed\n", variable); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c index 5369011914bb..d91f8bbfe7a0 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c @@ -164,6 +164,8 @@ void rtl92de_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) case HW_VAR_INT_AC: *((bool *)(val)) = rtlpriv->dm.disable_tx_int; break; + case HAL_DEF_WOWLAN: + break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, "switch case %#x not processed\n", variable); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c index 47bb6d8c8912..ebf663e1a81a 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c @@ -338,6 +338,8 @@ void rtl92ee_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) *((u64 *)(val)) = tsf; } break; + case HAL_DEF_WOWLAN: + break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_DMESG, "switch case %#x not processed\n", variable); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c index 5bad9c9ef609..52e4430edb54 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c @@ -77,6 +77,8 @@ void rtl92se_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) *((bool *)(val)) = rtlpriv->dm.current_mrc_switch; break; } + case HAL_DEF_WOWLAN: + break; default: { RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, "switch case %#x not processed\n", variable); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c index 2bf603b4e12c..f8be0bd7e326 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c @@ -141,6 +141,8 @@ void rtl8723e_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; } + case HAL_DEF_WOWLAN: + break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, "switch case %#x not processed\n", variable); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c index 999c1ac3ee57..aba60c3145c5 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c @@ -348,6 +348,8 @@ void rtl8723be_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) *((u64 *)(val)) = tsf; } break; + case HAL_DEF_WOWLAN: + break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, "switch case %#x not processed\n", variable); From 8334ffdc8290949bd838e0d83e200a87f2c0efc4 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Sat, 24 Sep 2016 11:57:19 -0500 Subject: [PATCH 1589/1715] rtlwifi: Add explicit values to hw_variables enum The entries in this enum may be referenced in debug output. Adding explicit values simplifies the lookup. Signed-off-by: Larry Finger Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/wifi.h | 198 ++++++++++---------- 1 file changed, 99 insertions(+), 99 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/wifi.h b/drivers/net/wireless/realtek/rtlwifi/wifi.h index c5086c2229aa..595f7d5d091a 100644 --- a/drivers/net/wireless/realtek/rtlwifi/wifi.h +++ b/drivers/net/wireless/realtek/rtlwifi/wifi.h @@ -394,110 +394,110 @@ enum io_type { }; enum hw_variables { - HW_VAR_ETHER_ADDR, - HW_VAR_MULTICAST_REG, - HW_VAR_BASIC_RATE, - HW_VAR_BSSID, - HW_VAR_MEDIA_STATUS, - HW_VAR_SECURITY_CONF, - HW_VAR_BEACON_INTERVAL, - HW_VAR_ATIM_WINDOW, - HW_VAR_LISTEN_INTERVAL, - HW_VAR_CS_COUNTER, - HW_VAR_DEFAULTKEY0, - HW_VAR_DEFAULTKEY1, - HW_VAR_DEFAULTKEY2, - HW_VAR_DEFAULTKEY3, - HW_VAR_SIFS, - HW_VAR_R2T_SIFS, - HW_VAR_DIFS, - HW_VAR_EIFS, - HW_VAR_SLOT_TIME, - HW_VAR_ACK_PREAMBLE, - HW_VAR_CW_CONFIG, - HW_VAR_CW_VALUES, - HW_VAR_RATE_FALLBACK_CONTROL, - HW_VAR_CONTENTION_WINDOW, - HW_VAR_RETRY_COUNT, - HW_VAR_TR_SWITCH, - HW_VAR_COMMAND, - HW_VAR_WPA_CONFIG, - HW_VAR_AMPDU_MIN_SPACE, - HW_VAR_SHORTGI_DENSITY, - HW_VAR_AMPDU_FACTOR, - HW_VAR_MCS_RATE_AVAILABLE, - HW_VAR_AC_PARAM, - HW_VAR_ACM_CTRL, - HW_VAR_DIS_Req_Qsize, - HW_VAR_CCX_CHNL_LOAD, - HW_VAR_CCX_NOISE_HISTOGRAM, - HW_VAR_CCX_CLM_NHM, - HW_VAR_TxOPLimit, - HW_VAR_TURBO_MODE, - HW_VAR_RF_STATE, - HW_VAR_RF_OFF_BY_HW, - HW_VAR_BUS_SPEED, - HW_VAR_SET_DEV_POWER, + HW_VAR_ETHER_ADDR = 0x0, + HW_VAR_MULTICAST_REG = 0x1, + HW_VAR_BASIC_RATE = 0x2, + HW_VAR_BSSID = 0x3, + HW_VAR_MEDIA_STATUS= 0x4, + HW_VAR_SECURITY_CONF= 0x5, + HW_VAR_BEACON_INTERVAL = 0x6, + HW_VAR_ATIM_WINDOW = 0x7, + HW_VAR_LISTEN_INTERVAL = 0x8, + HW_VAR_CS_COUNTER = 0x9, + HW_VAR_DEFAULTKEY0 = 0xa, + HW_VAR_DEFAULTKEY1 = 0xb, + HW_VAR_DEFAULTKEY2 = 0xc, + HW_VAR_DEFAULTKEY3 = 0xd, + HW_VAR_SIFS = 0xe, + HW_VAR_R2T_SIFS = 0xf, + HW_VAR_DIFS = 0x10, + HW_VAR_EIFS = 0x11, + HW_VAR_SLOT_TIME = 0x12, + HW_VAR_ACK_PREAMBLE = 0x13, + HW_VAR_CW_CONFIG = 0x14, + HW_VAR_CW_VALUES = 0x15, + HW_VAR_RATE_FALLBACK_CONTROL= 0x16, + HW_VAR_CONTENTION_WINDOW = 0x17, + HW_VAR_RETRY_COUNT = 0x18, + HW_VAR_TR_SWITCH = 0x19, + HW_VAR_COMMAND = 0x1a, + HW_VAR_WPA_CONFIG = 0x1b, + HW_VAR_AMPDU_MIN_SPACE = 0x1c, + HW_VAR_SHORTGI_DENSITY = 0x1d, + HW_VAR_AMPDU_FACTOR = 0x1e, + HW_VAR_MCS_RATE_AVAILABLE = 0x1f, + HW_VAR_AC_PARAM = 0x20, + HW_VAR_ACM_CTRL = 0x21, + HW_VAR_DIS_Req_Qsize = 0x22, + HW_VAR_CCX_CHNL_LOAD = 0x23, + HW_VAR_CCX_NOISE_HISTOGRAM = 0x24, + HW_VAR_CCX_CLM_NHM = 0x25, + HW_VAR_TxOPLimit = 0x26, + HW_VAR_TURBO_MODE = 0x27, + HW_VAR_RF_STATE = 0x28, + HW_VAR_RF_OFF_BY_HW = 0x29, + HW_VAR_BUS_SPEED = 0x2a, + HW_VAR_SET_DEV_POWER = 0x2b, - HW_VAR_RCR, - HW_VAR_RATR_0, - HW_VAR_RRSR, - HW_VAR_CPU_RST, - HW_VAR_CHECK_BSSID, - HW_VAR_LBK_MODE, - HW_VAR_AES_11N_FIX, - HW_VAR_USB_RX_AGGR, - HW_VAR_USER_CONTROL_TURBO_MODE, - HW_VAR_RETRY_LIMIT, - HW_VAR_INIT_TX_RATE, - HW_VAR_TX_RATE_REG, - HW_VAR_EFUSE_USAGE, - HW_VAR_EFUSE_BYTES, - HW_VAR_AUTOLOAD_STATUS, - HW_VAR_RF_2R_DISABLE, - HW_VAR_SET_RPWM, - HW_VAR_H2C_FW_PWRMODE, - HW_VAR_H2C_FW_JOINBSSRPT, - HW_VAR_H2C_FW_MEDIASTATUSRPT, - HW_VAR_H2C_FW_P2P_PS_OFFLOAD, - HW_VAR_FW_PSMODE_STATUS, - HW_VAR_INIT_RTS_RATE, - HW_VAR_RESUME_CLK_ON, - HW_VAR_FW_LPS_ACTION, - HW_VAR_1X1_RECV_COMBINE, - HW_VAR_STOP_SEND_BEACON, - HW_VAR_TSF_TIMER, - HW_VAR_IO_CMD, + HW_VAR_RCR = 0x2c, + HW_VAR_RATR_0 = 0x2d, + HW_VAR_RRSR = 0x2e, + HW_VAR_CPU_RST = 0x2f, + HW_VAR_CHECK_BSSID = 0x30, + HW_VAR_LBK_MODE = 0x31, + HW_VAR_AES_11N_FIX = 0x32, + HW_VAR_USB_RX_AGGR = 0x33, + HW_VAR_USER_CONTROL_TURBO_MODE = 0x34, + HW_VAR_RETRY_LIMIT = 0x35, + HW_VAR_INIT_TX_RATE = 0x36, + HW_VAR_TX_RATE_REG = 0x37, + HW_VAR_EFUSE_USAGE = 0x38, + HW_VAR_EFUSE_BYTES = 0x39, + HW_VAR_AUTOLOAD_STATUS = 0x3a, + HW_VAR_RF_2R_DISABLE = 0x3b, + HW_VAR_SET_RPWM = 0x3c, + HW_VAR_H2C_FW_PWRMODE = 0x3d, + HW_VAR_H2C_FW_JOINBSSRPT = 0x3e, + HW_VAR_H2C_FW_MEDIASTATUSRPT = 0x3f, + HW_VAR_H2C_FW_P2P_PS_OFFLOAD = 0x40, + HW_VAR_FW_PSMODE_STATUS = 0x41, + HW_VAR_INIT_RTS_RATE = 0x42, + HW_VAR_RESUME_CLK_ON = 0x43, + HW_VAR_FW_LPS_ACTION = 0x44, + HW_VAR_1X1_RECV_COMBINE = 0x45, + HW_VAR_STOP_SEND_BEACON = 0x46, + HW_VAR_TSF_TIMER = 0x47, + HW_VAR_IO_CMD = 0x48, - HW_VAR_RF_RECOVERY, - HW_VAR_H2C_FW_UPDATE_GTK, - HW_VAR_WF_MASK, - HW_VAR_WF_CRC, - HW_VAR_WF_IS_MAC_ADDR, - HW_VAR_H2C_FW_OFFLOAD, - HW_VAR_RESET_WFCRC, + HW_VAR_RF_RECOVERY = 0x49, + HW_VAR_H2C_FW_UPDATE_GTK = 0x4a, + HW_VAR_WF_MASK = 0x4b, + HW_VAR_WF_CRC = 0x4c, + HW_VAR_WF_IS_MAC_ADDR = 0x4d, + HW_VAR_H2C_FW_OFFLOAD = 0x4e, + HW_VAR_RESET_WFCRC = 0x4f, - HW_VAR_HANDLE_FW_C2H, - HW_VAR_DL_FW_RSVD_PAGE, - HW_VAR_AID, - HW_VAR_HW_SEQ_ENABLE, - HW_VAR_CORRECT_TSF, - HW_VAR_BCN_VALID, - HW_VAR_FWLPS_RF_ON, - HW_VAR_DUAL_TSF_RST, - HW_VAR_SWITCH_EPHY_WoWLAN, - HW_VAR_INT_MIGRATION, - HW_VAR_INT_AC, - HW_VAR_RF_TIMING, + HW_VAR_HANDLE_FW_C2H = 0x50, + HW_VAR_DL_FW_RSVD_PAGE = 0x51, + HW_VAR_AID = 0x52, + HW_VAR_HW_SEQ_ENABLE = 0x53, + HW_VAR_CORRECT_TSF = 0x54, + HW_VAR_BCN_VALID = 0x55, + HW_VAR_FWLPS_RF_ON = 0x56, + HW_VAR_DUAL_TSF_RST = 0x57, + HW_VAR_SWITCH_EPHY_WoWLAN = 0x58, + HW_VAR_INT_MIGRATION = 0x59, + HW_VAR_INT_AC = 0x5a, + HW_VAR_RF_TIMING = 0x5b, - HAL_DEF_WOWLAN, - HW_VAR_MRC, - HW_VAR_KEEP_ALIVE, - HW_VAR_NAV_UPPER, + HAL_DEF_WOWLAN = 0x5c, + HW_VAR_MRC = 0x5d, + HW_VAR_KEEP_ALIVE = 0x5e, + HW_VAR_NAV_UPPER = 0x5f, - HW_VAR_MGT_FILTER, - HW_VAR_CTRL_FILTER, - HW_VAR_DATA_FILTER, + HW_VAR_MGT_FILTER = 0x60, + HW_VAR_CTRL_FILTER = 0x61, + HW_VAR_DATA_FILTER = 0x62, }; enum rt_media_status { From 2f0e56fa37cce60a5ac5d451bcadec51cd711436 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Tue, 27 Sep 2016 12:12:24 +0200 Subject: [PATCH 1590/1715] brcmfmac: replace WARNING on timeout with a simple error message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Even with timeout increased to 950 ms we get WARNINGs from time to time. It mostly happens on A-MPDU stalls (e.g. when station goes out of range). It may take up to 5-10 secods for the firmware to recover and for that time it doesn't process packets. It's still useful to have a message on time out as it may indicate some firmware problem and incorrect key update. Raising a WARNING however wasn't really that necessary, it doesn't point to any driver bug anymore and backtrace wasn't much useful. Signed-off-by: Rafał Miłecki Acked-by: Arend van Spriel Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c index 17152805d96f..5eaac13e2317 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c @@ -1155,7 +1155,8 @@ int brcmf_netdev_wait_pend8021x(struct brcmf_if *ifp) !brcmf_get_pend_8021x_cnt(ifp), MAX_WAIT_FOR_8021X_TX); - WARN_ON(!err); + if (!err) + brcmf_err("Timed out waiting for no pending 802.1x packets\n"); return !err; } From 7f00ee2bbc630900ba16fc2690473f3e2db0e264 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Tue, 27 Sep 2016 14:11:04 +0200 Subject: [PATCH 1591/1715] brcmfmac: use correct skb freeing helper when deleting flowring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Flowrings contain skbs waiting for transmission that were passed to us by netif. It means we checked every one of them looking for 802.1x Ethernet type. When deleting flowring we have to use freeing function that will check for 802.1x type as well. Freeing skbs without a proper check was leading to counter not being properly decreased. This was triggering a WARNING every time brcmf_netdev_wait_pend8021x was called. Signed-off-by: Rafał Miłecki Acked-by: Arend van Spriel Cc: stable@vger.kernel.org # 4.5+ Signed-off-by: Kalle Valo --- .../net/wireless/broadcom/brcm80211/brcmfmac/flowring.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/flowring.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/flowring.c index b16b367b0569..d0b738da2458 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/flowring.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/flowring.c @@ -234,13 +234,20 @@ static void brcmf_flowring_block(struct brcmf_flowring *flow, u16 flowid, void brcmf_flowring_delete(struct brcmf_flowring *flow, u16 flowid) { + struct brcmf_bus *bus_if = dev_get_drvdata(flow->dev); struct brcmf_flowring_ring *ring; + struct brcmf_if *ifp; u16 hash_idx; + u8 ifidx; struct sk_buff *skb; ring = flow->rings[flowid]; if (!ring) return; + + ifidx = brcmf_flowring_ifidx_get(flow, flowid); + ifp = brcmf_get_ifp(bus_if->drvr, ifidx); + brcmf_flowring_block(flow, flowid, false); hash_idx = ring->hash_id; flow->hash[hash_idx].ifidx = BRCMF_FLOWRING_INVALID_IFIDX; @@ -249,7 +256,7 @@ void brcmf_flowring_delete(struct brcmf_flowring *flow, u16 flowid) skb = skb_dequeue(&ring->skblist); while (skb) { - brcmu_pkt_buf_free_skb(skb); + brcmf_txfinalize(ifp, skb, false); skb = skb_dequeue(&ring->skblist); } From 7a823471ad42cba6c3b658494d8437ca5c166292 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 23 Aug 2016 15:08:09 +0000 Subject: [PATCH 1592/1715] igb: fix non static symbol warning Fixes the following sparse warning: drivers/net/ethernet/intel/igb/igb_ethtool.c:2707:5: warning: symbol 'igb_rxnfc_write_vlan_prio_filter' was not declared. Should it be static? Signed-off-by: Wei Yongjun Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb_ethtool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 0c33eca7c832..737b664d004c 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -2704,8 +2704,8 @@ static int igb_rxnfc_write_etype_filter(struct igb_adapter *adapter, return 0; } -int igb_rxnfc_write_vlan_prio_filter(struct igb_adapter *adapter, - struct igb_nfc_filter *input) +static int igb_rxnfc_write_vlan_prio_filter(struct igb_adapter *adapter, + struct igb_nfc_filter *input) { struct e1000_hw *hw = &adapter->hw; u8 vlan_priority; From 3d05fd0a99a4a8b49d486116c01c7ac089b64670 Mon Sep 17 00:00:00 2001 From: Todd Fujinaka Date: Wed, 24 Aug 2016 08:38:46 -0700 Subject: [PATCH 1593/1715] igbvf: bump version to igbvf-2.4.0 Bump version to match other igbvf drivers. Signed-off-by: Todd Fujinaka Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igbvf/netdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index b0778ba65083..12bb877df860 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -47,7 +47,7 @@ #include "igbvf.h" -#define DRV_VERSION "2.0.2-k" +#define DRV_VERSION "2.4.0-k" char igbvf_driver_name[] = "igbvf"; const char igbvf_driver_version[] = DRV_VERSION; static const char igbvf_driver_string[] = From 0742337c1984c7cdbac9465cdda004cbdc69d41c Mon Sep 17 00:00:00 2001 From: Todd Fujinaka Date: Wed, 24 Aug 2016 08:40:23 -0700 Subject: [PATCH 1594/1715] igb: bump version to igb-5.4.0 Bump igb version to match other igb drivers. Signed-off-by: Todd Fujinaka Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index a83aa13a5bf4..edc9a6ac5169 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -58,7 +58,7 @@ #include "igb.h" #define MAJ 5 -#define MIN 3 +#define MIN 4 #define BUILD 0 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \ __stringify(BUILD) "-k" From ac28b41aac35e1000712aaa3aee19bf30fd9a312 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Fri, 9 Sep 2016 09:10:51 -0700 Subject: [PATCH 1595/1715] igb: restore PPS signal on igb_ptp_reset When a reset occurs, the PPS SYS_WRAP interrupt was not re-enabled which resulted in disabling of the PPS signaling. Fix this by recording when the interrupt is on and ensuring that we re-enable it every time we reset. Signed-off-by: Jacob Keller Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb.h | 1 + drivers/net/ethernet/intel/igb/igb_ptp.c | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index 03fbe4b7663b..d11093dce1b9 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -489,6 +489,7 @@ struct igb_adapter { struct timecounter tc; u32 tx_hwtstamp_timeouts; u32 rx_hwtstamp_cleared; + bool pps_sys_wrap_on; struct ptp_pin_desc sdp_config[IGB_N_SDP]; struct { diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index 1dd14e166dc8..a7895c4cbcc3 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -591,6 +591,7 @@ static int igb_ptp_feature_enable_i210(struct ptp_clock_info *ptp, tsim |= TSINTR_SYS_WRAP; else tsim &= ~TSINTR_SYS_WRAP; + igb->pps_sys_wrap_on = !!on; wr32(E1000_TSIM, tsim); spin_unlock_irqrestore(&igb->tmreg_lock, flags); return 0; @@ -1235,7 +1236,9 @@ void igb_ptp_reset(struct igb_adapter *adapter) case e1000_i211: wr32(E1000_TSAUXC, 0x0); wr32(E1000_TSSDP, 0x0); - wr32(E1000_TSIM, TSYNC_INTERRUPTS); + wr32(E1000_TSIM, + TSYNC_INTERRUPTS | + (adapter->pps_sys_wrap_on ? TSINTR_SYS_WRAP : 0)); wr32(E1000_IMS, E1000_IMS_TS); break; default: From b761fe226be61eba6ae20a424b288559b8a606b5 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Tue, 27 Sep 2016 08:42:41 -0700 Subject: [PATCH 1596/1715] bpf: clean up put_cpu_var usage put_cpu_var takes the percpu data, not the data returned from get_cpu_var. This doesn't change the behavior. Cc: Tejun Heo Cc: Alexei Starovoitov Signed-off-by: Shaohua Li Acked-by: Alexei Starovoitov Acked-by: Tejun Heo Signed-off-by: David S. Miller --- kernel/bpf/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 7b7baaed9ed4..aa6d98154106 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1031,7 +1031,7 @@ BPF_CALL_0(bpf_user_rnd_u32) state = &get_cpu_var(bpf_user_rnd_state); res = prandom_u32_state(state); - put_cpu_var(state); + put_cpu_var(bpf_user_rnd_state); return res; } From 3796c3cbfb91ad1e3269aa0d15060f888063924b Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Tue, 27 Sep 2016 08:42:42 -0700 Subject: [PATCH 1597/1715] lib: clean up put_cpu_var usage put_cpu_var takes the percpu data, not the data returned from get_cpu_var. This doesn't change the behavior. Cc: Tejun Heo Signed-off-by: Shaohua Li Acked-by: Tejun Heo Signed-off-by: David S. Miller --- lib/random32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/random32.c b/lib/random32.c index 69ed593aab07..915982b304bb 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -81,7 +81,7 @@ u32 prandom_u32(void) u32 res; res = prandom_u32_state(state); - put_cpu_var(state); + put_cpu_var(net_rand_state); return res; } @@ -128,7 +128,7 @@ void prandom_bytes(void *buf, size_t bytes) struct rnd_state *state = &get_cpu_var(net_rand_state); prandom_bytes_state(state, buf, bytes); - put_cpu_var(state); + put_cpu_var(net_rand_state); } EXPORT_SYMBOL(prandom_bytes); From eb523f42d77a43f80bb9c57a34fbdc8406c7b075 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Tue, 27 Sep 2016 11:21:18 +0300 Subject: [PATCH 1598/1715] net/sched: cls_flower: Use a proper mask value for enc key id parameter The current code use the encapsulation key id value as the mask of that parameter which is wrong. Fix that by using a full mask. Fixes: bc3103f1ed40 ('net/sched: cls_flower: Classify packet in ip tunnels') Signed-off-by: Hadar Hen Zion Acked-by: Amir Vadai Signed-off-by: David S. Miller --- net/sched/cls_flower.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 2af09c872a1a..f6f40fba599b 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -481,7 +481,7 @@ static int fl_set_key(struct net *net, struct nlattr **tb, } fl_set_key_val(tb, &key->enc_key_id.keyid, TCA_FLOWER_KEY_ENC_KEY_ID, - &mask->enc_key_id.keyid, TCA_FLOWER_KEY_ENC_KEY_ID, + &mask->enc_key_id.keyid, TCA_FLOWER_UNSPEC, sizeof(key->enc_key_id.keyid)); return 0; @@ -919,7 +919,7 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, goto nla_put_failure; if (fl_dump_key_val(skb, &key->enc_key_id, TCA_FLOWER_KEY_ENC_KEY_ID, - &mask->enc_key_id, TCA_FLOWER_KEY_ENC_KEY_ID, + &mask->enc_key_id, TCA_FLOWER_UNSPEC, sizeof(key->enc_key_id))) goto nla_put_failure; From b90eb754949931b2e4481b1df9a03f84d4be66ba Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 26 Sep 2016 12:52:29 +0200 Subject: [PATCH 1599/1715] fib: introduce FIB notification infrastructure This allows to pass information about added/deleted FIB entries/rules to whoever is interested. This is done in a very similar way as devinet notifies address additions/removals. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/ip_fib.h | 34 ++++++++++++++++++++-- net/ipv4/fib_frontend.c | 16 +++++------ net/ipv4/fib_rules.c | 10 +++++++ net/ipv4/fib_trie.c | 62 +++++++++++++++++++++++++++++++++++++++-- 4 files changed, 108 insertions(+), 14 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 7d4a72e75f33..116a9c0eb455 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -22,6 +22,7 @@ #include #include #include +#include struct fib_config { u8 fc_dst_len; @@ -185,6 +186,33 @@ __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh); #define FIB_RES_PREFSRC(net, res) ((res).fi->fib_prefsrc ? : \ FIB_RES_SADDR(net, res)) +struct fib_notifier_info { + struct net *net; +}; + +struct fib_entry_notifier_info { + struct fib_notifier_info info; /* must be first */ + u32 dst; + int dst_len; + struct fib_info *fi; + u8 tos; + u8 type; + u32 tb_id; + u32 nlflags; +}; + +enum fib_event_type { + FIB_EVENT_ENTRY_ADD, + FIB_EVENT_ENTRY_DEL, + FIB_EVENT_RULE_ADD, + FIB_EVENT_RULE_DEL, +}; + +int register_fib_notifier(struct notifier_block *nb); +int unregister_fib_notifier(struct notifier_block *nb); +int call_fib_notifiers(struct net *net, enum fib_event_type event_type, + struct fib_notifier_info *info); + struct fib_table { struct hlist_node tb_hlist; u32 tb_id; @@ -196,11 +224,11 @@ struct fib_table { int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, struct fib_result *res, int fib_flags); -int fib_table_insert(struct fib_table *, struct fib_config *); -int fib_table_delete(struct fib_table *, struct fib_config *); +int fib_table_insert(struct net *, struct fib_table *, struct fib_config *); +int fib_table_delete(struct net *, struct fib_table *, struct fib_config *); int fib_table_dump(struct fib_table *table, struct sk_buff *skb, struct netlink_callback *cb); -int fib_table_flush(struct fib_table *table); +int fib_table_flush(struct net *net, struct fib_table *table); struct fib_table *fib_trie_unmerge(struct fib_table *main_tb); void fib_table_flush_external(struct fib_table *table); void fib_free_table(struct fib_table *tb); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 4e56a4c20a3c..86c43dc9a60e 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -182,7 +182,7 @@ static void fib_flush(struct net *net) struct fib_table *tb; hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) - flushed += fib_table_flush(tb); + flushed += fib_table_flush(net, tb); } if (flushed) @@ -590,13 +590,13 @@ int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg) if (cmd == SIOCDELRT) { tb = fib_get_table(net, cfg.fc_table); if (tb) - err = fib_table_delete(tb, &cfg); + err = fib_table_delete(net, tb, &cfg); else err = -ESRCH; } else { tb = fib_new_table(net, cfg.fc_table); if (tb) - err = fib_table_insert(tb, &cfg); + err = fib_table_insert(net, tb, &cfg); else err = -ENOBUFS; } @@ -719,7 +719,7 @@ static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh) goto errout; } - err = fib_table_delete(tb, &cfg); + err = fib_table_delete(net, tb, &cfg); errout: return err; } @@ -741,7 +741,7 @@ static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh) goto errout; } - err = fib_table_insert(tb, &cfg); + err = fib_table_insert(net, tb, &cfg); errout: return err; } @@ -828,9 +828,9 @@ static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifad cfg.fc_scope = RT_SCOPE_HOST; if (cmd == RTM_NEWROUTE) - fib_table_insert(tb, &cfg); + fib_table_insert(net, tb, &cfg); else - fib_table_delete(tb, &cfg); + fib_table_delete(net, tb, &cfg); } void fib_add_ifaddr(struct in_ifaddr *ifa) @@ -1254,7 +1254,7 @@ static void ip_fib_net_exit(struct net *net) hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) { hlist_del(&tb->tb_hlist); - fib_table_flush(tb); + fib_table_flush(net, tb); fib_free_table(tb); } } diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 770bebed6b28..ebadf6b99499 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -164,6 +164,14 @@ static struct fib_table *fib_empty_table(struct net *net) return NULL; } +static int call_fib_rule_notifiers(struct net *net, + enum fib_event_type event_type) +{ + struct fib_notifier_info info; + + return call_fib_notifiers(net, event_type, &info); +} + static const struct nla_policy fib4_rule_policy[FRA_MAX+1] = { FRA_GENERIC_POLICY, [FRA_FLOW] = { .type = NLA_U32 }, @@ -221,6 +229,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, net->ipv4.fib_has_custom_rules = true; fib_flush_external(rule->fr_net); + call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD); err = 0; errout: @@ -243,6 +252,7 @@ static int fib4_rule_delete(struct fib_rule *rule) #endif net->ipv4.fib_has_custom_rules = true; fib_flush_external(rule->fr_net); + call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL); errout: return err; } diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 241f27bbd7ad..51a4537eb145 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -73,6 +73,7 @@ #include #include #include +#include #include #include #include @@ -84,6 +85,44 @@ #include #include "fib_lookup.h" +static BLOCKING_NOTIFIER_HEAD(fib_chain); + +int register_fib_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&fib_chain, nb); +} +EXPORT_SYMBOL(register_fib_notifier); + +int unregister_fib_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&fib_chain, nb); +} +EXPORT_SYMBOL(unregister_fib_notifier); + +int call_fib_notifiers(struct net *net, enum fib_event_type event_type, + struct fib_notifier_info *info) +{ + info->net = net; + return blocking_notifier_call_chain(&fib_chain, event_type, info); +} + +static int call_fib_entry_notifiers(struct net *net, + enum fib_event_type event_type, u32 dst, + int dst_len, struct fib_info *fi, + u8 tos, u8 type, u32 tb_id, u32 nlflags) +{ + struct fib_entry_notifier_info info = { + .dst = dst, + .dst_len = dst_len, + .fi = fi, + .tos = tos, + .type = type, + .tb_id = tb_id, + .nlflags = nlflags, + }; + return call_fib_notifiers(net, event_type, &info.info); +} + #define MAX_STAT_DEPTH 32 #define KEYLENGTH (8*sizeof(t_key)) @@ -1076,7 +1115,8 @@ static int fib_insert_alias(struct trie *t, struct key_vector *tp, } /* Caller must hold RTNL. */ -int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) +int fib_table_insert(struct net *net, struct fib_table *tb, + struct fib_config *cfg) { struct trie *t = (struct trie *)tb->tb_data; struct fib_alias *fa, *new_fa; @@ -1193,6 +1233,11 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) fib_release_info(fi_drop); if (state & FA_S_ACCESSED) rt_cache_flush(cfg->fc_nlinfo.nl_net); + + call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, + key, plen, fi, + new_fa->fa_tos, cfg->fc_type, + tb->tb_id, cfg->fc_nlflags); rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, &cfg->fc_nlinfo, nlflags); @@ -1245,6 +1290,8 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) tb->tb_num_default++; rt_cache_flush(cfg->fc_nlinfo.nl_net); + call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, key, plen, fi, tos, + cfg->fc_type, tb->tb_id, cfg->fc_nlflags); rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id, &cfg->fc_nlinfo, nlflags); succeeded: @@ -1490,7 +1537,8 @@ static void fib_remove_alias(struct trie *t, struct key_vector *tp, } /* Caller must hold RTNL. */ -int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) +int fib_table_delete(struct net *net, struct fib_table *tb, + struct fib_config *cfg) { struct trie *t = (struct trie *) tb->tb_data; struct fib_alias *fa, *fa_to_delete; @@ -1546,6 +1594,9 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) switchdev_fib_ipv4_del(key, plen, fa_to_delete->fa_info, tos, cfg->fc_type, tb->tb_id); + call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key, plen, + fa_to_delete->fa_info, tos, cfg->fc_type, + tb->tb_id, 0); rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id, &cfg->fc_nlinfo, 0); @@ -1809,7 +1860,7 @@ void fib_table_flush_external(struct fib_table *tb) } /* Caller must hold RTNL. */ -int fib_table_flush(struct fib_table *tb) +int fib_table_flush(struct net *net, struct fib_table *tb) { struct trie *t = (struct trie *)tb->tb_data; struct key_vector *pn = t->kv; @@ -1861,6 +1912,11 @@ int fib_table_flush(struct fib_table *tb) switchdev_fib_ipv4_del(n->key, KEYLENGTH - fa->fa_slen, fi, fa->fa_tos, fa->fa_type, tb->tb_id); + call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, + n->key, + KEYLENGTH - fa->fa_slen, + fi, fa->fa_tos, fa->fa_type, + tb->tb_id, 0); hlist_del_rcu(&fa->fa_list); fib_release_info(fa->fa_info); alias_free_mem_rcu(fa); From c98501879b1b1af90c7325574f2672e9efca592c Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 26 Sep 2016 12:52:30 +0200 Subject: [PATCH 1600/1715] fib: introduce FIB info offload flag helpers These helpers are to be used in case someone offloads the FIB entry. The result is that if the entry is offloaded to at least one device, the offload flag is set. Signed-off-by: Jiri Pirko Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- include/net/ip_fib.h | 13 +++++++++++++ net/switchdev/switchdev.c | 4 ++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 116a9c0eb455..ffccf1787914 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -123,6 +123,7 @@ struct fib_info { #ifdef CONFIG_IP_ROUTE_MULTIPATH int fib_weight; #endif + unsigned int fib_offload_cnt; struct rcu_head rcu; struct fib_nh fib_nh[0]; #define fib_dev fib_nh[0].nh_dev @@ -174,6 +175,18 @@ struct fib_result_nl { __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh); +static inline void fib_info_offload_inc(struct fib_info *fi) +{ + fi->fib_offload_cnt++; + fi->fib_flags |= RTNH_F_OFFLOAD; +} + +static inline void fib_info_offload_dec(struct fib_info *fi) +{ + if (--fi->fib_offload_cnt == 0) + fi->fib_flags &= ~RTNH_F_OFFLOAD; +} + #define FIB_RES_SADDR(net, res) \ ((FIB_RES_NH(res).nh_saddr_genid == \ atomic_read(&(net)->ipv4.dev_addr_genid)) ? \ diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 10b819308439..abd8d2a38a7d 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -1216,7 +1216,7 @@ int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, ipv4_fib.obj.orig_dev = dev; err = switchdev_port_obj_add(dev, &ipv4_fib.obj); if (!err) - fi->fib_flags |= RTNH_F_OFFLOAD; + fib_info_offload_inc(fi); return err == -EOPNOTSUPP ? 0 : err; } @@ -1260,7 +1260,7 @@ int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, ipv4_fib.obj.orig_dev = dev; err = switchdev_port_obj_del(dev, &ipv4_fib.obj); if (!err) - fi->fib_flags &= ~RTNH_F_OFFLOAD; + fib_info_offload_dec(fi); return err == -EOPNOTSUPP ? 0 : err; } From b45f64d16d456fde027a220cc62d6b4cc315f97b Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 26 Sep 2016 12:52:31 +0200 Subject: [PATCH 1601/1715] mlxsw: spectrum_router: Use FIB notifications instead of switchdev calls Until now, in order to offload a FIB entry to HW we use switchdev op. However that has limits. Mainly in case we need to make the HW aware of all route prefixes configured in kernel. HW needs to know those in order to properly trap appropriate packets and pass the to kernel to do the forwarding. Abort mechanism is now handled within the mlxsw driver. Signed-off-by: Jiri Pirko Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.h | 9 +- .../ethernet/mellanox/mlxsw/spectrum_router.c | 441 ++++++++++-------- .../mellanox/mlxsw/spectrum_switchdev.c | 9 - 3 files changed, 262 insertions(+), 197 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 73cae211a5ce..9b22863a924b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -45,7 +45,7 @@ #include #include #include -#include +#include #include "port.h" #include "core.h" @@ -257,6 +257,7 @@ struct mlxsw_sp_router { #define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */ struct list_head nexthop_group_list; struct list_head nexthop_neighs_list; + bool aborted; }; struct mlxsw_sp { @@ -296,6 +297,7 @@ struct mlxsw_sp { struct mlxsw_sp_span_entry *entries; int entries_count; } span; + struct notifier_block fib_nb; }; static inline struct mlxsw_sp_upper * @@ -584,11 +586,6 @@ static inline void mlxsw_sp_port_dcb_fini(struct mlxsw_sp_port *mlxsw_sp_port) int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp); -int mlxsw_sp_router_fib4_add(struct mlxsw_sp_port *mlxsw_sp_port, - const struct switchdev_obj_ipv4_fib *fib4, - struct switchdev_trans *trans); -int mlxsw_sp_router_fib4_del(struct mlxsw_sp_port *mlxsw_sp_port, - const struct switchdev_obj_ipv4_fib *fib4); int mlxsw_sp_router_neigh_construct(struct net_device *dev, struct neighbour *n); void mlxsw_sp_router_neigh_destroy(struct net_device *dev, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index cc653ac2d7d6..48d50efec5e2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -43,6 +43,7 @@ #include #include #include +#include #include "spectrum.h" #include "core.h" @@ -122,17 +123,20 @@ struct mlxsw_sp_nexthop_group; struct mlxsw_sp_fib_entry { struct rhash_head ht_node; + struct list_head list; struct mlxsw_sp_fib_key key; enum mlxsw_sp_fib_entry_type type; unsigned int ref_count; u16 rif; /* used for action local */ struct mlxsw_sp_vr *vr; + struct fib_info *fi; struct list_head nexthop_group_node; struct mlxsw_sp_nexthop_group *nh_group; }; struct mlxsw_sp_fib { struct rhashtable ht; + struct list_head entry_list; unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT]; struct mlxsw_sp_prefix_usage prefix_usage; }; @@ -154,6 +158,7 @@ static int mlxsw_sp_fib_entry_insert(struct mlxsw_sp_fib *fib, mlxsw_sp_fib_ht_params); if (err) return err; + list_add_tail(&fib_entry->list, &fib->entry_list); if (fib->prefix_ref_count[prefix_len]++ == 0) mlxsw_sp_prefix_usage_set(&fib->prefix_usage, prefix_len); return 0; @@ -166,6 +171,7 @@ static void mlxsw_sp_fib_entry_remove(struct mlxsw_sp_fib *fib, if (--fib->prefix_ref_count[prefix_len] == 0) mlxsw_sp_prefix_usage_clear(&fib->prefix_usage, prefix_len); + list_del(&fib_entry->list); rhashtable_remove_fast(&fib->ht, &fib_entry->ht_node, mlxsw_sp_fib_ht_params); } @@ -216,6 +222,7 @@ static struct mlxsw_sp_fib *mlxsw_sp_fib_create(void) err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params); if (err) goto err_rhashtable_init; + INIT_LIST_HEAD(&fib->entry_list); return fib; err_rhashtable_init: @@ -1520,85 +1527,6 @@ static void mlxsw_sp_nexthop_group_put(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_nexthop_group_destroy(mlxsw_sp, nh_grp); } -static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) -{ - struct mlxsw_resources *resources; - char rgcr_pl[MLXSW_REG_RGCR_LEN]; - int err; - - resources = mlxsw_core_resources_get(mlxsw_sp->core); - if (!resources->max_rif_valid) - return -EIO; - - mlxsw_sp->rifs = kcalloc(resources->max_rif, - sizeof(struct mlxsw_sp_rif *), GFP_KERNEL); - if (!mlxsw_sp->rifs) - return -ENOMEM; - - mlxsw_reg_rgcr_pack(rgcr_pl, true); - mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, resources->max_rif); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); - if (err) - goto err_rgcr_fail; - - return 0; - -err_rgcr_fail: - kfree(mlxsw_sp->rifs); - return err; -} - -static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) -{ - struct mlxsw_resources *resources; - char rgcr_pl[MLXSW_REG_RGCR_LEN]; - int i; - - mlxsw_reg_rgcr_pack(rgcr_pl, false); - mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); - - resources = mlxsw_core_resources_get(mlxsw_sp->core); - for (i = 0; i < resources->max_rif; i++) - WARN_ON_ONCE(mlxsw_sp->rifs[i]); - - kfree(mlxsw_sp->rifs); -} - -int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) -{ - int err; - - INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_neighs_list); - INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_group_list); - err = __mlxsw_sp_router_init(mlxsw_sp); - if (err) - return err; - - mlxsw_sp_lpm_init(mlxsw_sp); - err = mlxsw_sp_vrs_init(mlxsw_sp); - if (err) - goto err_vrs_init; - - err = mlxsw_sp_neigh_init(mlxsw_sp); - if (err) - goto err_neigh_init; - - return 0; - -err_neigh_init: - mlxsw_sp_vrs_fini(mlxsw_sp); -err_vrs_init: - __mlxsw_sp_router_fini(mlxsw_sp); - return err; -} - -void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) -{ - mlxsw_sp_neigh_fini(mlxsw_sp); - mlxsw_sp_vrs_fini(mlxsw_sp); - __mlxsw_sp_router_fini(mlxsw_sp); -} - static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry, enum mlxsw_reg_ralue_op op) @@ -1706,94 +1634,98 @@ static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp, MLXSW_REG_RALUE_OP_WRITE_DELETE); } -struct mlxsw_sp_router_fib4_add_info { - struct switchdev_trans_item tritem; - struct mlxsw_sp *mlxsw_sp; - struct mlxsw_sp_fib_entry *fib_entry; -}; - -static void mlxsw_sp_router_fib4_add_info_destroy(void const *data) -{ - const struct mlxsw_sp_router_fib4_add_info *info = data; - struct mlxsw_sp_fib_entry *fib_entry = info->fib_entry; - struct mlxsw_sp *mlxsw_sp = info->mlxsw_sp; - struct mlxsw_sp_vr *vr = fib_entry->vr; - - mlxsw_sp_fib_entry_destroy(fib_entry); - mlxsw_sp_vr_put(mlxsw_sp, vr); - kfree(info); -} - static int mlxsw_sp_router_fib4_entry_init(struct mlxsw_sp *mlxsw_sp, - const struct switchdev_obj_ipv4_fib *fib4, + const struct fib_entry_notifier_info *fen_info, struct mlxsw_sp_fib_entry *fib_entry) { - struct fib_info *fi = fib4->fi; + struct fib_info *fi = fen_info->fi; + struct mlxsw_sp_rif *r; + int nhsel; + int err; - if (fib4->type == RTN_LOCAL || fib4->type == RTN_BROADCAST) { + if (fen_info->type == RTN_LOCAL || fen_info->type == RTN_BROADCAST) { fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; return 0; } - if (fib4->type != RTN_UNICAST) + if (fen_info->type != RTN_UNICAST) return -EINVAL; - if (fi->fib_scope != RT_SCOPE_UNIVERSE) { - struct mlxsw_sp_rif *r; + for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { + const struct fib_nh *nh = &fi->fib_nh[nhsel]; - fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; - r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, fi->fib_dev); - if (!r) - return -EINVAL; - fib_entry->rif = r->rif; - return 0; + if (!nh->nh_dev) + continue; + r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, nh->nh_dev); + if (!r) { + /* In case router interface is not found for + * at least one of the nexthops, that means + * the nexthop points to some device unrelated + * to us. Set trap and pass the packets for + * this prefix to kernel. + */ + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; + return 0; + } } - fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE; - return mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fi); + + if (fi->fib_scope != RT_SCOPE_UNIVERSE) { + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; + fib_entry->rif = r->rif; + } else { + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE; + err = mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fi); + if (err) + return err; + } + fib_info_offload_inc(fen_info->fi); + return 0; } static void mlxsw_sp_router_fib4_entry_fini(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry) { - if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_REMOTE) - return; - mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry); + if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP) + fib_info_offload_dec(fib_entry->fi); + if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_REMOTE) + mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry); } static struct mlxsw_sp_fib_entry * mlxsw_sp_fib_entry_get(struct mlxsw_sp *mlxsw_sp, - const struct switchdev_obj_ipv4_fib *fib4) + const struct fib_entry_notifier_info *fen_info) { struct mlxsw_sp_fib_entry *fib_entry; - struct fib_info *fi = fib4->fi; + struct fib_info *fi = fen_info->fi; struct mlxsw_sp_vr *vr; int err; - vr = mlxsw_sp_vr_get(mlxsw_sp, fib4->dst_len, fib4->tb_id, + vr = mlxsw_sp_vr_get(mlxsw_sp, fen_info->dst_len, fen_info->tb_id, MLXSW_SP_L3_PROTO_IPV4); if (IS_ERR(vr)) return ERR_CAST(vr); - fib_entry = mlxsw_sp_fib_entry_lookup(vr->fib, &fib4->dst, - sizeof(fib4->dst), - fib4->dst_len, fi->fib_dev); + fib_entry = mlxsw_sp_fib_entry_lookup(vr->fib, &fen_info->dst, + sizeof(fen_info->dst), + fen_info->dst_len, fi->fib_dev); if (fib_entry) { /* Already exists, just take a reference */ fib_entry->ref_count++; return fib_entry; } - fib_entry = mlxsw_sp_fib_entry_create(vr->fib, &fib4->dst, - sizeof(fib4->dst), - fib4->dst_len, fi->fib_dev); + fib_entry = mlxsw_sp_fib_entry_create(vr->fib, &fen_info->dst, + sizeof(fen_info->dst), + fen_info->dst_len, fi->fib_dev); if (!fib_entry) { err = -ENOMEM; goto err_fib_entry_create; } fib_entry->vr = vr; + fib_entry->fi = fi; fib_entry->ref_count = 1; - err = mlxsw_sp_router_fib4_entry_init(mlxsw_sp, fib4, fib_entry); + err = mlxsw_sp_router_fib4_entry_init(mlxsw_sp, fen_info, fib_entry); if (err) goto err_fib4_entry_init; @@ -1809,17 +1741,19 @@ err_fib_entry_create: static struct mlxsw_sp_fib_entry * mlxsw_sp_fib_entry_find(struct mlxsw_sp *mlxsw_sp, - const struct switchdev_obj_ipv4_fib *fib4) + const struct fib_entry_notifier_info *fen_info) { struct mlxsw_sp_vr *vr; - vr = mlxsw_sp_vr_find(mlxsw_sp, fib4->tb_id, MLXSW_SP_L3_PROTO_IPV4); + vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id, + MLXSW_SP_L3_PROTO_IPV4); if (!vr) return NULL; - return mlxsw_sp_fib_entry_lookup(vr->fib, &fib4->dst, - sizeof(fib4->dst), fib4->dst_len, - fib4->fi->fib_dev); + return mlxsw_sp_fib_entry_lookup(vr->fib, &fen_info->dst, + sizeof(fen_info->dst), + fen_info->dst_len, + fen_info->fi->fib_dev); } static void mlxsw_sp_fib_entry_put(struct mlxsw_sp *mlxsw_sp, @@ -1834,60 +1768,43 @@ static void mlxsw_sp_fib_entry_put(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_vr_put(mlxsw_sp, vr); } -static int -mlxsw_sp_router_fib4_add_prepare(struct mlxsw_sp_port *mlxsw_sp_port, - const struct switchdev_obj_ipv4_fib *fib4, - struct switchdev_trans *trans) +static void mlxsw_sp_fib_entry_put_all(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) { - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - struct mlxsw_sp_router_fib4_add_info *info; - struct mlxsw_sp_fib_entry *fib_entry; - int err; + unsigned int last_ref_count; - fib_entry = mlxsw_sp_fib_entry_get(mlxsw_sp, fib4); - if (IS_ERR(fib_entry)) - return PTR_ERR(fib_entry); - - info = kmalloc(sizeof(*info), GFP_KERNEL); - if (!info) { - err = -ENOMEM; - goto err_alloc_info; - } - info->mlxsw_sp = mlxsw_sp; - info->fib_entry = fib_entry; - switchdev_trans_item_enqueue(trans, info, - mlxsw_sp_router_fib4_add_info_destroy, - &info->tritem); - return 0; - -err_alloc_info: - mlxsw_sp_fib_entry_put(mlxsw_sp, fib_entry); - return err; + do { + last_ref_count = fib_entry->ref_count; + mlxsw_sp_fib_entry_put(mlxsw_sp, fib_entry); + } while (last_ref_count != 1); } -static int -mlxsw_sp_router_fib4_add_commit(struct mlxsw_sp_port *mlxsw_sp_port, - const struct switchdev_obj_ipv4_fib *fib4, - struct switchdev_trans *trans) +static int mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp, + struct fib_entry_notifier_info *fen_info) { - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - struct mlxsw_sp_router_fib4_add_info *info; struct mlxsw_sp_fib_entry *fib_entry; struct mlxsw_sp_vr *vr; int err; - info = switchdev_trans_item_dequeue(trans); - fib_entry = info->fib_entry; - kfree(info); + if (mlxsw_sp->router.aborted) + return 0; + + fib_entry = mlxsw_sp_fib_entry_get(mlxsw_sp, fen_info); + if (IS_ERR(fib_entry)) { + dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB4 entry being added.\n"); + return PTR_ERR(fib_entry); + } if (fib_entry->ref_count != 1) return 0; vr = fib_entry->vr; err = mlxsw_sp_fib_entry_insert(vr->fib, fib_entry); - if (err) + if (err) { + dev_warn(mlxsw_sp->bus_info->dev, "Failed to insert FIB4 entry being added.\n"); goto err_fib_entry_insert; - err = mlxsw_sp_fib_entry_update(mlxsw_sp_port->mlxsw_sp, fib_entry); + } + err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); if (err) goto err_fib_entry_add; return 0; @@ -1899,24 +1816,15 @@ err_fib_entry_insert: return err; } -int mlxsw_sp_router_fib4_add(struct mlxsw_sp_port *mlxsw_sp_port, - const struct switchdev_obj_ipv4_fib *fib4, - struct switchdev_trans *trans) +static int mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp, + struct fib_entry_notifier_info *fen_info) { - if (switchdev_trans_ph_prepare(trans)) - return mlxsw_sp_router_fib4_add_prepare(mlxsw_sp_port, - fib4, trans); - return mlxsw_sp_router_fib4_add_commit(mlxsw_sp_port, - fib4, trans); -} - -int mlxsw_sp_router_fib4_del(struct mlxsw_sp_port *mlxsw_sp_port, - const struct switchdev_obj_ipv4_fib *fib4) -{ - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct mlxsw_sp_fib_entry *fib_entry; - fib_entry = mlxsw_sp_fib_entry_find(mlxsw_sp, fib4); + if (mlxsw_sp->router.aborted) + return 0; + + fib_entry = mlxsw_sp_fib_entry_find(mlxsw_sp, fen_info); if (!fib_entry) { dev_warn(mlxsw_sp->bus_info->dev, "Failed to find FIB4 entry being removed.\n"); return -ENOENT; @@ -1930,3 +1838,172 @@ int mlxsw_sp_router_fib4_del(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_fib_entry_put(mlxsw_sp, fib_entry); return 0; } + +static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) +{ + char ralta_pl[MLXSW_REG_RALTA_LEN]; + char ralst_pl[MLXSW_REG_RALST_LEN]; + char raltb_pl[MLXSW_REG_RALTB_LEN]; + char ralue_pl[MLXSW_REG_RALUE_LEN]; + int err; + + mlxsw_reg_ralta_pack(ralta_pl, true, MLXSW_REG_RALXX_PROTOCOL_IPV4, + MLXSW_SP_LPM_TREE_MIN); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); + if (err) + return err; + + mlxsw_reg_ralst_pack(ralst_pl, 0xff, MLXSW_SP_LPM_TREE_MIN); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl); + if (err) + return err; + + mlxsw_reg_raltb_pack(raltb_pl, 0, MLXSW_REG_RALXX_PROTOCOL_IPV4, 0); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); + if (err) + return err; + + mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_SP_L3_PROTO_IPV4, + MLXSW_REG_RALUE_OP_WRITE_WRITE, 0, 0, 0); + mlxsw_reg_ralue_act_ip2me_pack(ralue_pl); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); +} + +static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_resources *resources; + struct mlxsw_sp_fib_entry *fib_entry; + struct mlxsw_sp_fib_entry *tmp; + struct mlxsw_sp_vr *vr; + int i; + int err; + + resources = mlxsw_core_resources_get(mlxsw_sp->core); + for (i = 0; i < resources->max_virtual_routers; i++) { + vr = &mlxsw_sp->router.vrs[i]; + if (!vr->used) + continue; + + list_for_each_entry_safe(fib_entry, tmp, + &vr->fib->entry_list, list) { + bool do_break = &tmp->list == &vr->fib->entry_list; + + mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry); + mlxsw_sp_fib_entry_remove(fib_entry->vr->fib, + fib_entry); + mlxsw_sp_fib_entry_put_all(mlxsw_sp, fib_entry); + if (do_break) + break; + } + } + mlxsw_sp->router.aborted = true; + err = mlxsw_sp_router_set_abort_trap(mlxsw_sp); + if (err) + dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n"); +} + +static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_resources *resources; + char rgcr_pl[MLXSW_REG_RGCR_LEN]; + int err; + + resources = mlxsw_core_resources_get(mlxsw_sp->core); + if (!resources->max_rif_valid) + return -EIO; + + mlxsw_sp->rifs = kcalloc(resources->max_rif, + sizeof(struct mlxsw_sp_rif *), GFP_KERNEL); + if (!mlxsw_sp->rifs) + return -ENOMEM; + + mlxsw_reg_rgcr_pack(rgcr_pl, true); + mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, resources->max_rif); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); + if (err) + goto err_rgcr_fail; + + return 0; + +err_rgcr_fail: + kfree(mlxsw_sp->rifs); + return err; +} + +static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_resources *resources; + char rgcr_pl[MLXSW_REG_RGCR_LEN]; + int i; + + mlxsw_reg_rgcr_pack(rgcr_pl, false); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); + + resources = mlxsw_core_resources_get(mlxsw_sp->core); + for (i = 0; i < resources->max_rif; i++) + WARN_ON_ONCE(mlxsw_sp->rifs[i]); + + kfree(mlxsw_sp->rifs); +} + +static int mlxsw_sp_router_fib_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb); + struct fib_entry_notifier_info *fen_info = ptr; + int err; + + switch (event) { + case FIB_EVENT_ENTRY_ADD: + err = mlxsw_sp_router_fib4_add(mlxsw_sp, fen_info); + if (err) + mlxsw_sp_router_fib4_abort(mlxsw_sp); + break; + case FIB_EVENT_ENTRY_DEL: + mlxsw_sp_router_fib4_del(mlxsw_sp, fen_info); + break; + case FIB_EVENT_RULE_ADD: /* fall through */ + case FIB_EVENT_RULE_DEL: + mlxsw_sp_router_fib4_abort(mlxsw_sp); + break; + } + return NOTIFY_DONE; +} + +int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) +{ + int err; + + INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_neighs_list); + INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_group_list); + err = __mlxsw_sp_router_init(mlxsw_sp); + if (err) + return err; + + mlxsw_sp_lpm_init(mlxsw_sp); + err = mlxsw_sp_vrs_init(mlxsw_sp); + if (err) + goto err_vrs_init; + + err = mlxsw_sp_neigh_init(mlxsw_sp); + if (err) + goto err_neigh_init; + + mlxsw_sp->fib_nb.notifier_call = mlxsw_sp_router_fib_event; + register_fib_notifier(&mlxsw_sp->fib_nb); + return 0; + +err_neigh_init: + mlxsw_sp_vrs_fini(mlxsw_sp); +err_vrs_init: + __mlxsw_sp_router_fini(mlxsw_sp); + return err; +} + +void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) +{ + unregister_fib_notifier(&mlxsw_sp->fib_nb); + mlxsw_sp_neigh_fini(mlxsw_sp); + mlxsw_sp_vrs_fini(mlxsw_sp); + __mlxsw_sp_router_fini(mlxsw_sp); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 2b04b76b503e..5e00c79e8133 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1044,11 +1044,6 @@ static int mlxsw_sp_port_obj_add(struct net_device *dev, SWITCHDEV_OBJ_PORT_VLAN(obj), trans); break; - case SWITCHDEV_OBJ_ID_IPV4_FIB: - err = mlxsw_sp_router_fib4_add(mlxsw_sp_port, - SWITCHDEV_OBJ_IPV4_FIB(obj), - trans); - break; case SWITCHDEV_OBJ_ID_PORT_FDB: err = mlxsw_sp_port_fdb_static_add(mlxsw_sp_port, SWITCHDEV_OBJ_PORT_FDB(obj), @@ -1181,10 +1176,6 @@ static int mlxsw_sp_port_obj_del(struct net_device *dev, err = mlxsw_sp_port_vlans_del(mlxsw_sp_port, SWITCHDEV_OBJ_PORT_VLAN(obj)); break; - case SWITCHDEV_OBJ_ID_IPV4_FIB: - err = mlxsw_sp_router_fib4_del(mlxsw_sp_port, - SWITCHDEV_OBJ_IPV4_FIB(obj)); - break; case SWITCHDEV_OBJ_ID_PORT_FDB: err = mlxsw_sp_port_fdb_static_del(mlxsw_sp_port, SWITCHDEV_OBJ_PORT_FDB(obj)); From 936bd486564aa3edb52313aa7c2e7381e0bcaba3 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 26 Sep 2016 12:52:32 +0200 Subject: [PATCH 1602/1715] rocker: use FIB notifications instead of switchdev calls Until now, in order to offload a FIB entry to HW we use switchdev op. Use the newly introduced FIB notifier infrasturucture to process FIB entries to offload the in HW. Abort mechanism is now handled within the rocker driver. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/rocker/rocker.h | 15 ++- drivers/net/ethernet/rocker/rocker_main.c | 120 +++++++++++++++------ drivers/net/ethernet/rocker/rocker_ofdpa.c | 115 +++++++++++++++----- 3 files changed, 185 insertions(+), 65 deletions(-) diff --git a/drivers/net/ethernet/rocker/rocker.h b/drivers/net/ethernet/rocker/rocker.h index 1ab995f7146b..2eb9b49569d5 100644 --- a/drivers/net/ethernet/rocker/rocker.h +++ b/drivers/net/ethernet/rocker/rocker.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -52,6 +53,9 @@ struct rocker_port { struct rocker_dma_ring_info rx_ring; }; +struct rocker_port *rocker_port_dev_lower_find(struct net_device *dev, + struct rocker *rocker); + struct rocker_world_ops; struct rocker { @@ -66,6 +70,7 @@ struct rocker { spinlock_t cmd_ring_lock; /* for cmd ring accesses */ struct rocker_dma_ring_info cmd_ring; struct rocker_dma_ring_info event_ring; + struct notifier_block fib_nb; struct rocker_world_ops *wops; void *wpriv; }; @@ -117,11 +122,6 @@ struct rocker_world_ops { int (*port_obj_vlan_dump)(const struct rocker_port *rocker_port, struct switchdev_obj_port_vlan *vlan, switchdev_obj_dump_cb_t *cb); - int (*port_obj_fib4_add)(struct rocker_port *rocker_port, - const struct switchdev_obj_ipv4_fib *fib4, - struct switchdev_trans *trans); - int (*port_obj_fib4_del)(struct rocker_port *rocker_port, - const struct switchdev_obj_ipv4_fib *fib4); int (*port_obj_fdb_add)(struct rocker_port *rocker_port, const struct switchdev_obj_port_fdb *fdb, struct switchdev_trans *trans); @@ -141,6 +141,11 @@ struct rocker_world_ops { int (*port_ev_mac_vlan_seen)(struct rocker_port *rocker_port, const unsigned char *addr, __be16 vlan_id); + int (*fib4_add)(struct rocker *rocker, + const struct fib_entry_notifier_info *fen_info); + int (*fib4_del)(struct rocker *rocker, + const struct fib_entry_notifier_info *fen_info); + void (*fib4_abort)(struct rocker *rocker); }; extern struct rocker_world_ops rocker_ofdpa_ops; diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index 1f0c08602eba..5424fb341613 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -1624,29 +1624,6 @@ rocker_world_port_obj_vlan_dump(const struct rocker_port *rocker_port, return wops->port_obj_vlan_dump(rocker_port, vlan, cb); } -static int -rocker_world_port_obj_fib4_add(struct rocker_port *rocker_port, - const struct switchdev_obj_ipv4_fib *fib4, - struct switchdev_trans *trans) -{ - struct rocker_world_ops *wops = rocker_port->rocker->wops; - - if (!wops->port_obj_fib4_add) - return -EOPNOTSUPP; - return wops->port_obj_fib4_add(rocker_port, fib4, trans); -} - -static int -rocker_world_port_obj_fib4_del(struct rocker_port *rocker_port, - const struct switchdev_obj_ipv4_fib *fib4) -{ - struct rocker_world_ops *wops = rocker_port->rocker->wops; - - if (!wops->port_obj_fib4_del) - return -EOPNOTSUPP; - return wops->port_obj_fib4_del(rocker_port, fib4); -} - static int rocker_world_port_obj_fdb_add(struct rocker_port *rocker_port, const struct switchdev_obj_port_fdb *fdb, @@ -1733,6 +1710,34 @@ static int rocker_world_port_ev_mac_vlan_seen(struct rocker_port *rocker_port, return wops->port_ev_mac_vlan_seen(rocker_port, addr, vlan_id); } +static int rocker_world_fib4_add(struct rocker *rocker, + const struct fib_entry_notifier_info *fen_info) +{ + struct rocker_world_ops *wops = rocker->wops; + + if (!wops->fib4_add) + return 0; + return wops->fib4_add(rocker, fen_info); +} + +static int rocker_world_fib4_del(struct rocker *rocker, + const struct fib_entry_notifier_info *fen_info) +{ + struct rocker_world_ops *wops = rocker->wops; + + if (!wops->fib4_del) + return 0; + return wops->fib4_del(rocker, fen_info); +} + +static void rocker_world_fib4_abort(struct rocker *rocker) +{ + struct rocker_world_ops *wops = rocker->wops; + + if (wops->fib4_abort) + wops->fib4_abort(rocker); +} + /***************** * Net device ops *****************/ @@ -2096,11 +2101,6 @@ static int rocker_port_obj_add(struct net_device *dev, SWITCHDEV_OBJ_PORT_VLAN(obj), trans); break; - case SWITCHDEV_OBJ_ID_IPV4_FIB: - err = rocker_world_port_obj_fib4_add(rocker_port, - SWITCHDEV_OBJ_IPV4_FIB(obj), - trans); - break; case SWITCHDEV_OBJ_ID_PORT_FDB: err = rocker_world_port_obj_fdb_add(rocker_port, SWITCHDEV_OBJ_PORT_FDB(obj), @@ -2125,10 +2125,6 @@ static int rocker_port_obj_del(struct net_device *dev, err = rocker_world_port_obj_vlan_del(rocker_port, SWITCHDEV_OBJ_PORT_VLAN(obj)); break; - case SWITCHDEV_OBJ_ID_IPV4_FIB: - err = rocker_world_port_obj_fib4_del(rocker_port, - SWITCHDEV_OBJ_IPV4_FIB(obj)); - break; case SWITCHDEV_OBJ_ID_PORT_FDB: err = rocker_world_port_obj_fdb_del(rocker_port, SWITCHDEV_OBJ_PORT_FDB(obj)); @@ -2175,6 +2171,31 @@ static const struct switchdev_ops rocker_port_switchdev_ops = { .switchdev_port_obj_dump = rocker_port_obj_dump, }; +static int rocker_router_fib_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct rocker *rocker = container_of(nb, struct rocker, fib_nb); + struct fib_entry_notifier_info *fen_info = ptr; + int err; + + switch (event) { + case FIB_EVENT_ENTRY_ADD: + err = rocker_world_fib4_add(rocker, fen_info); + if (err) + rocker_world_fib4_abort(rocker); + else + break; + case FIB_EVENT_ENTRY_DEL: + rocker_world_fib4_del(rocker, fen_info); + break; + case FIB_EVENT_RULE_ADD: /* fall through */ + case FIB_EVENT_RULE_DEL: + rocker_world_fib4_abort(rocker); + break; + } + return NOTIFY_DONE; +} + /******************** * ethtool interface ********************/ @@ -2740,6 +2761,9 @@ static int rocker_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_probe_ports; } + rocker->fib_nb.notifier_call = rocker_router_fib_event; + register_fib_notifier(&rocker->fib_nb); + dev_info(&pdev->dev, "Rocker switch with id %*phN\n", (int)sizeof(rocker->hw.id), &rocker->hw.id); @@ -2771,6 +2795,7 @@ static void rocker_remove(struct pci_dev *pdev) { struct rocker *rocker = pci_get_drvdata(pdev); + unregister_fib_notifier(&rocker->fib_nb); rocker_write32(rocker, CONTROL, ROCKER_CONTROL_RESET); rocker_remove_ports(rocker); free_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_EVENT), rocker); @@ -2799,6 +2824,37 @@ static bool rocker_port_dev_check(const struct net_device *dev) return dev->netdev_ops == &rocker_port_netdev_ops; } +static bool rocker_port_dev_check_under(const struct net_device *dev, + struct rocker *rocker) +{ + struct rocker_port *rocker_port; + + if (!rocker_port_dev_check(dev)) + return false; + + rocker_port = netdev_priv(dev); + if (rocker_port->rocker != rocker) + return false; + + return true; +} + +struct rocker_port *rocker_port_dev_lower_find(struct net_device *dev, + struct rocker *rocker) +{ + struct net_device *lower_dev; + struct list_head *iter; + + if (rocker_port_dev_check_under(dev, rocker)) + return netdev_priv(dev); + + netdev_for_each_all_lower_dev(dev, lower_dev, iter) { + if (rocker_port_dev_check_under(lower_dev, rocker)) + return netdev_priv(lower_dev); + } + return NULL; +} + static int rocker_netdevice_event(struct notifier_block *unused, unsigned long event, void *ptr) { diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c index fcad907baecf..431a60804272 100644 --- a/drivers/net/ethernet/rocker/rocker_ofdpa.c +++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c @@ -99,6 +99,7 @@ struct ofdpa_flow_tbl_entry { struct ofdpa_flow_tbl_key key; size_t key_len; u32 key_crc32; /* key */ + struct fib_info *fi; }; struct ofdpa_group_tbl_entry { @@ -189,6 +190,7 @@ struct ofdpa { spinlock_t neigh_tbl_lock; /* for neigh tbl accesses */ u32 neigh_tbl_next_index; unsigned long ageing_time; + bool fib_aborted; }; struct ofdpa_port { @@ -1043,7 +1045,8 @@ static int ofdpa_flow_tbl_ucast4_routing(struct ofdpa_port *ofdpa_port, __be16 eth_type, __be32 dst, __be32 dst_mask, u32 priority, enum rocker_of_dpa_table_id goto_tbl, - u32 group_id, int flags) + u32 group_id, struct fib_info *fi, + int flags) { struct ofdpa_flow_tbl_entry *entry; @@ -1060,6 +1063,7 @@ static int ofdpa_flow_tbl_ucast4_routing(struct ofdpa_port *ofdpa_port, entry->key.ucast_routing.group_id = group_id; entry->key_len = offsetof(struct ofdpa_flow_tbl_key, ucast_routing.group_id); + entry->fi = fi; return ofdpa_flow_tbl_do(ofdpa_port, trans, flags, entry); } @@ -1425,7 +1429,7 @@ static int ofdpa_port_ipv4_neigh(struct ofdpa_port *ofdpa_port, eth_type, ip_addr, inet_make_mask(32), priority, goto_tbl, - group_id, flags); + group_id, NULL, flags); if (err) netdev_err(ofdpa_port->dev, "Error (%d) /32 unicast route %pI4 group 0x%08x\n", @@ -2390,7 +2394,7 @@ found: static int ofdpa_port_fib_ipv4(struct ofdpa_port *ofdpa_port, struct switchdev_trans *trans, __be32 dst, - int dst_len, const struct fib_info *fi, + int dst_len, struct fib_info *fi, u32 tb_id, int flags) { const struct fib_nh *nh; @@ -2426,7 +2430,7 @@ static int ofdpa_port_fib_ipv4(struct ofdpa_port *ofdpa_port, err = ofdpa_flow_tbl_ucast4_routing(ofdpa_port, trans, eth_type, dst, dst_mask, priority, goto_tbl, - group_id, flags); + group_id, fi, flags); if (err) netdev_err(ofdpa_port->dev, "Error (%d) IPv4 route %pI4\n", err, &dst); @@ -2718,28 +2722,6 @@ static int ofdpa_port_obj_vlan_dump(const struct rocker_port *rocker_port, return err; } -static int ofdpa_port_obj_fib4_add(struct rocker_port *rocker_port, - const struct switchdev_obj_ipv4_fib *fib4, - struct switchdev_trans *trans) -{ - struct ofdpa_port *ofdpa_port = rocker_port->wpriv; - - return ofdpa_port_fib_ipv4(ofdpa_port, trans, - htonl(fib4->dst), fib4->dst_len, - fib4->fi, fib4->tb_id, 0); -} - -static int ofdpa_port_obj_fib4_del(struct rocker_port *rocker_port, - const struct switchdev_obj_ipv4_fib *fib4) -{ - struct ofdpa_port *ofdpa_port = rocker_port->wpriv; - - return ofdpa_port_fib_ipv4(ofdpa_port, NULL, - htonl(fib4->dst), fib4->dst_len, - fib4->fi, fib4->tb_id, - OFDPA_OP_FLAG_REMOVE); -} - static int ofdpa_port_obj_fdb_add(struct rocker_port *rocker_port, const struct switchdev_obj_port_fdb *fdb, struct switchdev_trans *trans) @@ -2922,6 +2904,82 @@ static int ofdpa_port_ev_mac_vlan_seen(struct rocker_port *rocker_port, return ofdpa_port_fdb(ofdpa_port, NULL, addr, vlan_id, flags); } +static struct ofdpa_port *ofdpa_port_dev_lower_find(struct net_device *dev, + struct rocker *rocker) +{ + struct rocker_port *rocker_port; + + rocker_port = rocker_port_dev_lower_find(dev, rocker); + return rocker_port ? rocker_port->wpriv : NULL; +} + +static int ofdpa_fib4_add(struct rocker *rocker, + const struct fib_entry_notifier_info *fen_info) +{ + struct ofdpa *ofdpa = rocker->wpriv; + struct ofdpa_port *ofdpa_port; + int err; + + if (ofdpa->fib_aborted) + return 0; + ofdpa_port = ofdpa_port_dev_lower_find(fen_info->fi->fib_dev, rocker); + if (!ofdpa_port) + return 0; + err = ofdpa_port_fib_ipv4(ofdpa_port, NULL, htonl(fen_info->dst), + fen_info->dst_len, fen_info->fi, + fen_info->tb_id, 0); + if (err) + return err; + fib_info_offload_inc(fen_info->fi); + return 0; +} + +static int ofdpa_fib4_del(struct rocker *rocker, + const struct fib_entry_notifier_info *fen_info) +{ + struct ofdpa *ofdpa = rocker->wpriv; + struct ofdpa_port *ofdpa_port; + + if (ofdpa->fib_aborted) + return 0; + ofdpa_port = ofdpa_port_dev_lower_find(fen_info->fi->fib_dev, rocker); + if (!ofdpa_port) + return 0; + fib_info_offload_dec(fen_info->fi); + return ofdpa_port_fib_ipv4(ofdpa_port, NULL, htonl(fen_info->dst), + fen_info->dst_len, fen_info->fi, + fen_info->tb_id, OFDPA_OP_FLAG_REMOVE); +} + +static void ofdpa_fib4_abort(struct rocker *rocker) +{ + struct ofdpa *ofdpa = rocker->wpriv; + struct ofdpa_port *ofdpa_port; + struct ofdpa_flow_tbl_entry *flow_entry; + struct hlist_node *tmp; + unsigned long flags; + int bkt; + + if (ofdpa->fib_aborted) + return; + + spin_lock_irqsave(&ofdpa->flow_tbl_lock, flags); + hash_for_each_safe(ofdpa->flow_tbl, bkt, tmp, flow_entry, entry) { + if (flow_entry->key.tbl_id != + ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING) + continue; + ofdpa_port = ofdpa_port_dev_lower_find(flow_entry->fi->fib_dev, + rocker); + if (!ofdpa_port) + continue; + fib_info_offload_dec(flow_entry->fi); + ofdpa_flow_tbl_del(ofdpa_port, NULL, OFDPA_OP_FLAG_REMOVE, + flow_entry); + } + spin_unlock_irqrestore(&ofdpa->flow_tbl_lock, flags); + ofdpa->fib_aborted = true; +} + struct rocker_world_ops rocker_ofdpa_ops = { .kind = "ofdpa", .priv_size = sizeof(struct ofdpa), @@ -2941,8 +2999,6 @@ struct rocker_world_ops rocker_ofdpa_ops = { .port_obj_vlan_add = ofdpa_port_obj_vlan_add, .port_obj_vlan_del = ofdpa_port_obj_vlan_del, .port_obj_vlan_dump = ofdpa_port_obj_vlan_dump, - .port_obj_fib4_add = ofdpa_port_obj_fib4_add, - .port_obj_fib4_del = ofdpa_port_obj_fib4_del, .port_obj_fdb_add = ofdpa_port_obj_fdb_add, .port_obj_fdb_del = ofdpa_port_obj_fdb_del, .port_obj_fdb_dump = ofdpa_port_obj_fdb_dump, @@ -2951,4 +3007,7 @@ struct rocker_world_ops rocker_ofdpa_ops = { .port_neigh_update = ofdpa_port_neigh_update, .port_neigh_destroy = ofdpa_port_neigh_destroy, .port_ev_mac_vlan_seen = ofdpa_port_ev_mac_vlan_seen, + .fib4_add = ofdpa_fib4_add, + .fib4_del = ofdpa_fib4_del, + .fib4_abort = ofdpa_fib4_abort, }; From 347e3b28c1ba24c1ae2f30290d8247480ab9ce14 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 26 Sep 2016 12:52:33 +0200 Subject: [PATCH 1603/1715] switchdev: remove FIB offload infrastructure Since this is now taken care of by FIB notifier, remove the code, with all unused dependencies. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/ip_fib.h | 2 - include/net/switchdev.h | 40 --------- net/ipv4/fib_frontend.c | 13 --- net/ipv4/fib_rules.c | 2 - net/ipv4/fib_trie.c | 104 +--------------------- net/switchdev/switchdev.c | 181 -------------------------------------- 6 files changed, 1 insertion(+), 341 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index ffccf1787914..b9314b48e39f 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -243,7 +243,6 @@ int fib_table_dump(struct fib_table *table, struct sk_buff *skb, struct netlink_callback *cb); int fib_table_flush(struct net *net, struct fib_table *table); struct fib_table *fib_trie_unmerge(struct fib_table *main_tb); -void fib_table_flush_external(struct fib_table *table); void fib_free_table(struct fib_table *tb); #ifndef CONFIG_IP_MULTIPLE_TABLES @@ -356,7 +355,6 @@ static inline int fib_num_tclassid_users(struct net *net) } #endif int fib_unmerge(struct net *net); -void fib_flush_external(struct net *net); /* Exported by fib_semantics.c */ int ip_fib_check_default(__be32 gw, struct net_device *dev); diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 729fe1534160..eba80c4fc56f 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -68,7 +68,6 @@ struct switchdev_attr { enum switchdev_obj_id { SWITCHDEV_OBJ_ID_UNDEFINED, SWITCHDEV_OBJ_ID_PORT_VLAN, - SWITCHDEV_OBJ_ID_IPV4_FIB, SWITCHDEV_OBJ_ID_PORT_FDB, SWITCHDEV_OBJ_ID_PORT_MDB, }; @@ -92,21 +91,6 @@ struct switchdev_obj_port_vlan { #define SWITCHDEV_OBJ_PORT_VLAN(obj) \ container_of(obj, struct switchdev_obj_port_vlan, obj) -/* SWITCHDEV_OBJ_ID_IPV4_FIB */ -struct switchdev_obj_ipv4_fib { - struct switchdev_obj obj; - u32 dst; - int dst_len; - struct fib_info *fi; - u8 tos; - u8 type; - u32 nlflags; - u32 tb_id; -}; - -#define SWITCHDEV_OBJ_IPV4_FIB(obj) \ - container_of(obj, struct switchdev_obj_ipv4_fib, obj) - /* SWITCHDEV_OBJ_ID_PORT_FDB */ struct switchdev_obj_port_fdb { struct switchdev_obj obj; @@ -209,11 +193,6 @@ int switchdev_port_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags); int switchdev_port_bridge_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags); -int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 nlflags, u32 tb_id); -int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 tb_id); -void switchdev_fib_ipv4_abort(struct fib_info *fi); int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, u16 nlm_flags); @@ -304,25 +283,6 @@ static inline int switchdev_port_bridge_dellink(struct net_device *dev, return -EOPNOTSUPP; } -static inline int switchdev_fib_ipv4_add(u32 dst, int dst_len, - struct fib_info *fi, - u8 tos, u8 type, - u32 nlflags, u32 tb_id) -{ - return 0; -} - -static inline int switchdev_fib_ipv4_del(u32 dst, int dst_len, - struct fib_info *fi, - u8 tos, u8 type, u32 tb_id) -{ - return 0; -} - -static inline void switchdev_fib_ipv4_abort(struct fib_info *fi) -{ -} - static inline int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 86c43dc9a60e..c3b80478226e 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -189,19 +189,6 @@ static void fib_flush(struct net *net) rt_cache_flush(net); } -void fib_flush_external(struct net *net) -{ - struct fib_table *tb; - struct hlist_head *head; - unsigned int h; - - for (h = 0; h < FIB_TABLE_HASHSZ; h++) { - head = &net->ipv4.fib_table_hash[h]; - hlist_for_each_entry(tb, head, tb_hlist) - fib_table_flush_external(tb); - } -} - /* * Find address type as if only "dev" was present in the system. If * on_dev is NULL then all interfaces are taken into consideration. diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index ebadf6b99499..2e50062f642d 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -228,7 +228,6 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, rule4->tos = frh->tos; net->ipv4.fib_has_custom_rules = true; - fib_flush_external(rule->fr_net); call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD); err = 0; @@ -251,7 +250,6 @@ static int fib4_rule_delete(struct fib_rule *rule) net->ipv4.fib_num_tclassid_users--; #endif net->ipv4.fib_has_custom_rules = true; - fib_flush_external(rule->fr_net); call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL); errout: return err; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 51a4537eb145..31cef3602585 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -81,7 +81,6 @@ #include #include #include -#include #include #include "fib_lookup.h" @@ -1215,17 +1214,6 @@ int fib_table_insert(struct net *net, struct fib_table *tb, new_fa->tb_id = tb->tb_id; new_fa->fa_default = -1; - err = switchdev_fib_ipv4_add(key, plen, fi, - new_fa->fa_tos, - cfg->fc_type, - cfg->fc_nlflags, - tb->tb_id); - if (err) { - switchdev_fib_ipv4_abort(fi); - kmem_cache_free(fn_alias_kmem, new_fa); - goto out; - } - hlist_replace_rcu(&fa->fa_list, &new_fa->fa_list); alias_free_mem_rcu(fa); @@ -1273,18 +1261,10 @@ int fib_table_insert(struct net *net, struct fib_table *tb, new_fa->tb_id = tb->tb_id; new_fa->fa_default = -1; - /* (Optionally) offload fib entry to switch hardware. */ - err = switchdev_fib_ipv4_add(key, plen, fi, tos, cfg->fc_type, - cfg->fc_nlflags, tb->tb_id); - if (err) { - switchdev_fib_ipv4_abort(fi); - goto out_free_new_fa; - } - /* Insert new entry to the list. */ err = fib_insert_alias(t, tp, l, new_fa, fa, key); if (err) - goto out_sw_fib_del; + goto out_free_new_fa; if (!plen) tb->tb_num_default++; @@ -1297,8 +1277,6 @@ int fib_table_insert(struct net *net, struct fib_table *tb, succeeded: return 0; -out_sw_fib_del: - switchdev_fib_ipv4_del(key, plen, fi, tos, cfg->fc_type, tb->tb_id); out_free_new_fa: kmem_cache_free(fn_alias_kmem, new_fa); out: @@ -1591,9 +1569,6 @@ int fib_table_delete(struct net *net, struct fib_table *tb, if (!fa_to_delete) return -ESRCH; - switchdev_fib_ipv4_del(key, plen, fa_to_delete->fa_info, tos, - cfg->fc_type, tb->tb_id); - call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key, plen, fa_to_delete->fa_info, tos, cfg->fc_type, tb->tb_id, 0); @@ -1785,80 +1760,6 @@ out: return NULL; } -/* Caller must hold RTNL */ -void fib_table_flush_external(struct fib_table *tb) -{ - struct trie *t = (struct trie *)tb->tb_data; - struct key_vector *pn = t->kv; - unsigned long cindex = 1; - struct hlist_node *tmp; - struct fib_alias *fa; - - /* walk trie in reverse order */ - for (;;) { - unsigned char slen = 0; - struct key_vector *n; - - if (!(cindex--)) { - t_key pkey = pn->key; - - /* cannot resize the trie vector */ - if (IS_TRIE(pn)) - break; - - /* resize completed node */ - pn = resize(t, pn); - cindex = get_index(pkey, pn); - - continue; - } - - /* grab the next available node */ - n = get_child(pn, cindex); - if (!n) - continue; - - if (IS_TNODE(n)) { - /* record pn and cindex for leaf walking */ - pn = n; - cindex = 1ul << n->bits; - - continue; - } - - hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) { - struct fib_info *fi = fa->fa_info; - - /* if alias was cloned to local then we just - * need to remove the local copy from main - */ - if (tb->tb_id != fa->tb_id) { - hlist_del_rcu(&fa->fa_list); - alias_free_mem_rcu(fa); - continue; - } - - /* record local slen */ - slen = fa->fa_slen; - - if (!fi || !(fi->fib_flags & RTNH_F_OFFLOAD)) - continue; - - switchdev_fib_ipv4_del(n->key, KEYLENGTH - fa->fa_slen, - fi, fa->fa_tos, fa->fa_type, - tb->tb_id); - } - - /* update leaf slen */ - n->slen = slen; - - if (hlist_empty(&n->leaf)) { - put_child_root(pn, n->key, NULL); - node_free(n); - } - } -} - /* Caller must hold RTNL. */ int fib_table_flush(struct net *net, struct fib_table *tb) { @@ -1909,9 +1810,6 @@ int fib_table_flush(struct net *net, struct fib_table *tb) continue; } - switchdev_fib_ipv4_del(n->key, KEYLENGTH - fa->fa_slen, - fi, fa->fa_tos, fa->fa_type, - tb->tb_id); call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, n->key, KEYLENGTH - fa->fa_slen, diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index abd8d2a38a7d..02beb35f577f 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -21,7 +21,6 @@ #include #include #include -#include #include /** @@ -344,8 +343,6 @@ static size_t switchdev_obj_size(const struct switchdev_obj *obj) switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_VLAN: return sizeof(struct switchdev_obj_port_vlan); - case SWITCHDEV_OBJ_ID_IPV4_FIB: - return sizeof(struct switchdev_obj_ipv4_fib); case SWITCHDEV_OBJ_ID_PORT_FDB: return sizeof(struct switchdev_obj_port_fdb); case SWITCHDEV_OBJ_ID_PORT_MDB: @@ -1108,184 +1105,6 @@ int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, } EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump); -static struct net_device *switchdev_get_lowest_dev(struct net_device *dev) -{ - const struct switchdev_ops *ops = dev->switchdev_ops; - struct net_device *lower_dev; - struct net_device *port_dev; - struct list_head *iter; - - /* Recusively search down until we find a sw port dev. - * (A sw port dev supports switchdev_port_attr_get). - */ - - if (ops && ops->switchdev_port_attr_get) - return dev; - - netdev_for_each_lower_dev(dev, lower_dev, iter) { - port_dev = switchdev_get_lowest_dev(lower_dev); - if (port_dev) - return port_dev; - } - - return NULL; -} - -static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi) -{ - struct switchdev_attr attr = { - .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, - }; - struct switchdev_attr prev_attr; - struct net_device *dev = NULL; - int nhsel; - - ASSERT_RTNL(); - - /* For this route, all nexthop devs must be on the same switch. */ - - for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { - const struct fib_nh *nh = &fi->fib_nh[nhsel]; - - if (!nh->nh_dev) - return NULL; - - dev = switchdev_get_lowest_dev(nh->nh_dev); - if (!dev) - return NULL; - - attr.orig_dev = dev; - if (switchdev_port_attr_get(dev, &attr)) - return NULL; - - if (nhsel > 0 && - !netdev_phys_item_id_same(&prev_attr.u.ppid, &attr.u.ppid)) - return NULL; - - prev_attr = attr; - } - - return dev; -} - -/** - * switchdev_fib_ipv4_add - Add/modify switch IPv4 route entry - * - * @dst: route's IPv4 destination address - * @dst_len: destination address length (prefix length) - * @fi: route FIB info structure - * @tos: route TOS - * @type: route type - * @nlflags: netlink flags passed in (NLM_F_*) - * @tb_id: route table ID - * - * Add/modify switch IPv4 route entry. - */ -int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 nlflags, u32 tb_id) -{ - struct switchdev_obj_ipv4_fib ipv4_fib = { - .obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB, - .dst = dst, - .dst_len = dst_len, - .fi = fi, - .tos = tos, - .type = type, - .nlflags = nlflags, - .tb_id = tb_id, - }; - struct net_device *dev; - int err = 0; - - /* Don't offload route if using custom ip rules or if - * IPv4 FIB offloading has been disabled completely. - */ - -#ifdef CONFIG_IP_MULTIPLE_TABLES - if (fi->fib_net->ipv4.fib_has_custom_rules) - return 0; -#endif - - if (fi->fib_net->ipv4.fib_offload_disabled) - return 0; - - dev = switchdev_get_dev_by_nhs(fi); - if (!dev) - return 0; - - ipv4_fib.obj.orig_dev = dev; - err = switchdev_port_obj_add(dev, &ipv4_fib.obj); - if (!err) - fib_info_offload_inc(fi); - - return err == -EOPNOTSUPP ? 0 : err; -} -EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_add); - -/** - * switchdev_fib_ipv4_del - Delete IPv4 route entry from switch - * - * @dst: route's IPv4 destination address - * @dst_len: destination address length (prefix length) - * @fi: route FIB info structure - * @tos: route TOS - * @type: route type - * @tb_id: route table ID - * - * Delete IPv4 route entry from switch device. - */ -int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 tb_id) -{ - struct switchdev_obj_ipv4_fib ipv4_fib = { - .obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB, - .dst = dst, - .dst_len = dst_len, - .fi = fi, - .tos = tos, - .type = type, - .nlflags = 0, - .tb_id = tb_id, - }; - struct net_device *dev; - int err = 0; - - if (!(fi->fib_flags & RTNH_F_OFFLOAD)) - return 0; - - dev = switchdev_get_dev_by_nhs(fi); - if (!dev) - return 0; - - ipv4_fib.obj.orig_dev = dev; - err = switchdev_port_obj_del(dev, &ipv4_fib.obj); - if (!err) - fib_info_offload_dec(fi); - - return err == -EOPNOTSUPP ? 0 : err; -} -EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_del); - -/** - * switchdev_fib_ipv4_abort - Abort an IPv4 FIB operation - * - * @fi: route FIB info structure - */ -void switchdev_fib_ipv4_abort(struct fib_info *fi) -{ - /* There was a problem installing this route to the offload - * device. For now, until we come up with more refined - * policy handling, abruptly end IPv4 fib offloading for - * for entire net by flushing offload device(s) of all - * IPv4 routes, and mark IPv4 fib offloading broken from - * this point forward. - */ - - fib_flush_external(fi->fib_net); - fi->fib_net->ipv4.fib_offload_disabled = true; -} -EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort); - bool switchdev_port_same_parent_id(struct net_device *a, struct net_device *b) { From fd41b0eaa06a8a0516f9e0b0a5889035bf423784 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 26 Sep 2016 12:52:34 +0200 Subject: [PATCH 1604/1715] doc: update switchdev L3 section This is to reflect the change of FIB offload infrastructure from switchdev objects to FIB notifier. Signed-off-by: Jiri Pirko Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- Documentation/networking/switchdev.txt | 27 +++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt index 44235e83799b..2bbac05ab9e2 100644 --- a/Documentation/networking/switchdev.txt +++ b/Documentation/networking/switchdev.txt @@ -314,30 +314,29 @@ the kernel, with the device doing the FIB lookup and forwarding. The device does a longest prefix match (LPM) on FIB entries matching route prefix and forwards the packet to the matching FIB entry's nexthop(s) egress ports. -To program the device, the driver implements support for -SWITCHDEV_OBJ_IPV[4|6]_FIB object using switchdev_port_obj_xxx ops. -switchdev_port_obj_add is used for both adding a new FIB entry to the device, -or modifying an existing entry on the device. +To program the device, the driver has to register a FIB notifier handler +using register_fib_notifier. The following events are available: +FIB_EVENT_ENTRY_ADD: used for both adding a new FIB entry to the device, + or modifying an existing entry on the device. +FIB_EVENT_ENTRY_DEL: used for removing a FIB entry +FIB_EVENT_RULE_ADD, FIB_EVENT_RULE_DEL: used to propagate FIB rule changes -XXX: Currently, only SWITCHDEV_OBJ_ID_IPV4_FIB objects are supported. +FIB_EVENT_ENTRY_ADD and FIB_EVENT_ENTRY_DEL events pass: -SWITCHDEV_OBJ_ID_IPV4_FIB object passes: - - struct switchdev_obj_ipv4_fib { /* IPV4_FIB */ + struct fib_entry_notifier_info { + struct fib_notifier_info info; /* must be first */ u32 dst; int dst_len; struct fib_info *fi; u8 tos; u8 type; - u32 nlflags; u32 tb_id; - } ipv4_fib; + u32 nlflags; + }; to add/modify/delete IPv4 dst/dest_len prefix on table tb_id. The *fi structure holds details on the route and route's nexthops. *dev is one of the -port netdevs mentioned in the routes next hop list. If the output port netdevs -referenced in the route's nexthop list don't all have the same switch ID, the -driver is not called to add/modify/delete the FIB entry. +port netdevs mentioned in the route's next hop list. Routes offloaded to the device are labeled with "offload" in the ip route listing: @@ -355,6 +354,8 @@ listing: 12.0.0.4 via 11.0.0.9 dev sw1p2 proto zebra metric 20 offload 192.168.0.0/24 dev eth0 proto kernel scope link src 192.168.0.15 +The "offload" flag is set in case at least one device offloads the FIB entry. + XXX: add/mod/del IPv6 FIB API Nexthop Resolution From c099ff3cdb9dd2281dee3dc4ef89fec1c516df22 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Tue, 27 Sep 2016 01:23:26 +0300 Subject: [PATCH 1605/1715] sh_eth: add R8A7743/5 support Add support for the first two members of the Renesas RZ/G family, RZ/G1M/E (also known as R8A7743/5). The Ether core is the same as in the R-Car gen2 SoCs, so will share the code/data with them... Signed-off-by: Sergei Shtylyov Acked-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/sh_eth.txt | 2 ++ drivers/net/ethernet/renesas/Kconfig | 2 +- drivers/net/ethernet/renesas/sh_eth.c | 2 ++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/sh_eth.txt b/Documentation/devicetree/bindings/net/sh_eth.txt index 2f6ec85fda8e..0115c85a2425 100644 --- a/Documentation/devicetree/bindings/net/sh_eth.txt +++ b/Documentation/devicetree/bindings/net/sh_eth.txt @@ -5,6 +5,8 @@ interface contains. Required properties: - compatible: "renesas,gether-r8a7740" if the device is a part of R8A7740 SoC. + "renesas,ether-r8a7743" if the device is a part of R8A7743 SoC. + "renesas,ether-r8a7745" if the device is a part of R8A7745 SoC. "renesas,ether-r8a7778" if the device is a part of R8A7778 SoC. "renesas,ether-r8a7779" if the device is a part of R8A7779 SoC. "renesas,ether-r8a7790" if the device is a part of R8A7790 SoC. diff --git a/drivers/net/ethernet/renesas/Kconfig b/drivers/net/ethernet/renesas/Kconfig index 4f132cf177cd..85ec447c2d18 100644 --- a/drivers/net/ethernet/renesas/Kconfig +++ b/drivers/net/ethernet/renesas/Kconfig @@ -27,7 +27,7 @@ config SH_ETH Renesas SuperH Ethernet device driver. This driver supporting CPUs are: - SH7619, SH7710, SH7712, SH7724, SH7734, SH7763, SH7757, - R8A7740, R8A777x and R8A779x. + R8A7740, R8A774x, R8A777x and R8A779x. config RAVB tristate "Renesas Ethernet AVB support" diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 440ae272161c..05b0dc55de77 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -2959,6 +2959,8 @@ static struct sh_eth_plat_data *sh_eth_parse_dt(struct device *dev) static const struct of_device_id sh_eth_match_table[] = { { .compatible = "renesas,gether-r8a7740", .data = &r8a7740_data }, + { .compatible = "renesas,ether-r8a7743", .data = &r8a779x_data }, + { .compatible = "renesas,ether-r8a7745", .data = &r8a779x_data }, { .compatible = "renesas,ether-r8a7778", .data = &r8a777x_data }, { .compatible = "renesas,ether-r8a7779", .data = &r8a777x_data }, { .compatible = "renesas,ether-r8a7790", .data = &r8a779x_data }, From fa5effe7664b1606dfd639dff58686f6dbb119d7 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Tue, 27 Sep 2016 11:09:51 +0300 Subject: [PATCH 1606/1715] net/sched: pkt_cls: change tc actions order to be as the user sets Currently the created tc actions list is reversed against the order set by the user. Change the actions list order to be the same as was set by the user. This patch doesn't affect dump actions behavior. For dumping, action->order parameter is used so the list order doesn't matter. Signed-off-by: Hadar Hen Zion Acked-by: Jamal Hadi Salim Acked-by: Cong Wang Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 5ccaa4be7d96..767b03a3fe67 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -123,7 +123,7 @@ static inline void tcf_exts_to_list(const struct tcf_exts *exts, for (i = 0; i < exts->nr_actions; i++) { struct tc_action *a = exts->actions[i]; - list_add(&a->list, actions); + list_add_tail(&a->list, actions); } #endif } From 931eb6b7fee31f63d33fd5e1e62a60375fa6fb32 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 16 Sep 2016 13:05:35 +0000 Subject: [PATCH 1607/1715] ath10k: fix error return code in ahb Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in function ath10k_ahb_probe() or ath10k_ahb_resource_init(). Signed-off-by: Wei Yongjun Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/ahb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/ahb.c b/drivers/net/wireless/ath/ath10k/ahb.c index fe38b459544d..766c63bf05c4 100644 --- a/drivers/net/wireless/ath/ath10k/ahb.c +++ b/drivers/net/wireless/ath/ath10k/ahb.c @@ -509,6 +509,7 @@ static int ath10k_ahb_resource_init(struct ath10k *ar) ar_ahb->irq = platform_get_irq_byname(pdev, "legacy"); if (ar_ahb->irq < 0) { ath10k_err(ar, "failed to get irq number: %d\n", ar_ahb->irq); + ret = ar_ahb->irq; goto err_clock_deinit; } @@ -787,6 +788,7 @@ static int ath10k_ahb_probe(struct platform_device *pdev) chip_id = ath10k_ahb_soc_read32(ar, SOC_CHIP_ID_ADDRESS); if (chip_id == 0xffffffff) { ath10k_err(ar, "failed to get chip id\n"); + ret = -ENODEV; goto err_halt_device; } From b63b33ecafa5d00edbbb438c7e208392d5f4de23 Mon Sep 17 00:00:00 2001 From: Mohammed Shafi Shajakhan Date: Tue, 20 Sep 2016 13:52:07 +0530 Subject: [PATCH 1608/1715] ath10k: Ignore SWBA event for a vif if its marked for no beacon Ignore processing further in SWBA event scheduled for a vif, if mac80211 has marked the particular vif for stop beaconing and brought the vdev down in 'ath10k_control_beaconing'. This should potentially avoid ath10k warning/error messages while running continuous wifi down/up with max number of vaps configured. Found this change during code walk through and going through other beacon configuration related functions in ath10k Signed-off-by: Mohammed Shafi Shajakhan Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/wmi.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index 38993d72f5e6..54df425bb0fc 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -3514,6 +3514,12 @@ void ath10k_wmi_event_host_swba(struct ath10k *ar, struct sk_buff *skb) continue; } + /* mac80211 would have already asked us to stop beaconing and + * bring the vdev down, so continue in that case + */ + if (!arvif->is_up) + continue; + /* There are no completions for beacons so wait for next SWBA * before telling mac80211 to decrement CSA counter * From 0628467f97b5227755428bac10a68257322f7e34 Mon Sep 17 00:00:00 2001 From: Rajkumar Manoharan Date: Wed, 21 Sep 2016 16:28:06 +0530 Subject: [PATCH 1609/1715] ath10k: fix copy engine 5 destination ring stuck Firmware is running watchdog timer for tracking copy engine ring index and write index. Whenever both indices are stuck at same location for given duration, watchdog will be trigger to assert target. While updating copy engine destination ring write index, driver ensures that write index will not be same as read index by finding delta between these two indices (CE_RING_DELTA). HTT target to host copy engine (CE5) is special case where ring buffers will be reused and delta check is not applied while updating write index. In rare scenario, whenever CE5 ring is full, both indices will be referring same location and this is causing CE ring stuck issue as explained above. This issue is originally reported on IPQ4019 during long hour stress testing and during veriwave max clients testsuites. The same issue is also observed in other chips as well. Fix this by ensuring that write index is one less than read index which means that full ring is available for receiving data. Cc: stable@vger.kernel.org Tested-by: Tamizh chelvam Signed-off-by: Rajkumar Manoharan Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/ce.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/ce.c b/drivers/net/wireless/ath/ath10k/ce.c index e7205546fa6b..0b4d79659884 100644 --- a/drivers/net/wireless/ath/ath10k/ce.c +++ b/drivers/net/wireless/ath/ath10k/ce.c @@ -433,6 +433,13 @@ void ath10k_ce_rx_update_write_idx(struct ath10k_ce_pipe *pipe, u32 nentries) unsigned int nentries_mask = dest_ring->nentries_mask; unsigned int write_index = dest_ring->write_index; u32 ctrl_addr = pipe->ctrl_addr; + u32 cur_write_idx = ath10k_ce_dest_ring_write_index_get(ar, ctrl_addr); + + /* Prevent CE ring stuck issue that will occur when ring is full. + * Make sure that write index is 1 less than read index. + */ + if ((cur_write_idx + nentries) == dest_ring->sw_index) + nentries -= 1; write_index = CE_RING_IDX_ADD(nentries_mask, write_index, nentries); ath10k_ce_dest_ring_write_index_set(ar, ctrl_addr, write_index); From 739ccd76b40ed7f2e851b55ae16bdb655cc2ba1d Mon Sep 17 00:00:00 2001 From: Miaoqing Pan Date: Tue, 9 Aug 2016 15:02:27 +0800 Subject: [PATCH 1610/1715] ath9k: disable RNG by default ath9k RNG will dominates all the noise sources from the real HW RNG, disable it by default. But we strongly recommand to enable it if the system without HW RNG, especially on embedded systems. Signed-off-by: Miaoqing Pan Acked-by: Stephan Mueller Acked-by: Stephan Mueller Reviewed-by: Jason Cooper Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath9k/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/Kconfig b/drivers/net/wireless/ath/ath9k/Kconfig index f68cb00450e0..8f231c67dd51 100644 --- a/drivers/net/wireless/ath/ath9k/Kconfig +++ b/drivers/net/wireless/ath/ath9k/Kconfig @@ -180,7 +180,7 @@ config ATH9K_HTC_DEBUGFS config ATH9K_HWRNG bool "Random number generator support" depends on ATH9K && (HW_RANDOM = y || HW_RANDOM = ATH9K) - default y + default n ---help--- This option incorporates the ADC register output as a source of randomness into Linux entropy pool (/dev/urandom and /dev/random) From b93015057e31933e1ab600290e014779efe5b5a3 Mon Sep 17 00:00:00 2001 From: Chaehyun Lim Date: Sun, 18 Sep 2016 15:30:24 +0900 Subject: [PATCH 1611/1715] ath6kl: fix return value in ath6kl_wmi_set_pvb_cmd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When building with W=1, we got one warning as belows: drivers/net/wireless/ath/ath6kl/wmi.c:3509:6: warning: variable ‘ret’ set but not used [-Wunused-but-set-variable] At the end of ath6kl_wmi_set_pvb_cmd, it is returned by 0 regardless of return value of ath6kl_wmi_cmd_send. This patch fixes return value from 0 to ret that has result of ath6kl_wmi_cmd_send execution. Signed-off-by: Chaehyun Lim Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath6kl/wmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath6kl/wmi.c b/drivers/net/wireless/ath/ath6kl/wmi.c index b8cf04d11975..3fd1cc98fd2f 100644 --- a/drivers/net/wireless/ath/ath6kl/wmi.c +++ b/drivers/net/wireless/ath/ath6kl/wmi.c @@ -3520,7 +3520,7 @@ int ath6kl_wmi_set_pvb_cmd(struct wmi *wmi, u8 if_idx, u16 aid, ret = ath6kl_wmi_cmd_send(wmi, if_idx, skb, WMI_AP_SET_PVB_CMDID, NO_SYNC_WMIFLAG); - return 0; + return ret; } int ath6kl_wmi_set_rx_frame_format_cmd(struct wmi *wmi, u8 if_idx, From ffd74aca44b70a2564da86e8878c1dd296009ffb Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Tue, 27 Sep 2016 18:42:58 +0300 Subject: [PATCH 1612/1715] MAINTAINERS: hostap: Mark the Host AP driver obsolete This is old code for old hardware and it is not really accurate to claim this to be maintained anymore. Change the status to "Obsolete" to make it clearer that minor cleanup and other unnecessary changes from automated tools is not necessarily beneficial and has larger risk of breaking something without being quickly noticed due to lack of testing. In addition, remove the old mailing list that does not work anymore and point the web-page to a more accurate URL. Signed-off-by: Jouni Malinen Signed-off-by: Kalle Valo --- MAINTAINERS | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index ce80b36aab69..463daa52b73a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5592,10 +5592,9 @@ F: Documentation/devicetree/bindings/scsi/hisilicon-sas.txt HOST AP DRIVER M: Jouni Malinen -L: hostap@shmoo.com (subscribers-only) L: linux-wireless@vger.kernel.org -W: http://hostap.epitest.fi/ -S: Maintained +W: http://w1.fi/hostap-driver.html +S: Obsolete F: drivers/net/wireless/intersil/hostap/ HP COMPAQ TC1100 TABLET WMI EXTRAS DRIVER From 3acf3ec3f4b0fd4263989f2e4227bbd1c42b5fe1 Mon Sep 17 00:00:00 2001 From: Lawrence Brakmo Date: Tue, 27 Sep 2016 19:03:37 -0700 Subject: [PATCH 1613/1715] tcp: Change txhash on every SYN and RTO retransmit The current code changes txhash (flowlables) on every retransmitted SYN/ACK, but only after the 2nd retransmitted SYN and only after tcp_retries1 RTO retransmits. With this patch: 1) txhash is changed with every SYN retransmits 2) txhash is changed with every RTO. The result is that we can start re-routing around failed (or very congested paths) as soon as possible. Otherwise application health checks may fail and the connection may be terminated before we start to change txhash. v4: Removed sysctl, txhash is changed for all RTOs v3: Removed text saying default value of sysctl is 0 (it is 100) v2: Added sysctl documentation and cleaned code Tested with packetdrill tests Signed-off-by: Lawrence Brakmo Signed-off-by: David S. Miller --- net/ipv4/tcp_timer.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index f712b411f6ed..3ea1cf804748 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -192,6 +192,8 @@ static int tcp_write_timeout(struct sock *sk) if (tp->syn_data && icsk->icsk_retransmits == 1) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVEFAIL); + } else if (!tp->syn_data && !tp->syn_fastopen) { + sk_rethink_txhash(sk); } retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries; syn_set = true; @@ -213,6 +215,8 @@ static int tcp_write_timeout(struct sock *sk) tcp_mtu_probing(icsk, sk); dst_negative_advice(sk); + } else { + sk_rethink_txhash(sk); } retry_until = net->ipv4.sysctl_tcp_retries2; From 8f7d99ba85d4d7118a6cf2d0ed9c2ff8e6528679 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 28 Sep 2016 23:44:57 +0200 Subject: [PATCH 1614/1715] cfg80211: wext: really don't store non-WEP keys Jouni reported that during (repeated) wext_pmf test runs (from the wpa_supplicant hwsim test suite) the kernel crashes. The reason is that after the key is set, the wext code still unnecessarily stores it into the key cache. Despite smatch pointing out an overflow, I failed to identify the possibility for this in the code and missed it during development of the earlier patch series. In order to fix this, simply check that we never store anything but WEP keys into the cache, adding a comment as to why that's enough. Also, since the cache is still allocated early even if it won't be used in many cases, add a comment explaining why - otherwise we'd have to roll back key settings to the driver in case of allocation failures, which is far more difficult. Fixes: 89b706fb28e4 ("cfg80211: reduce connect key caching struct size") Reported-by: Jouni Malinen Bisected-by: Jouni Malinen Signed-off-by: Johannes Berg --- net/wireless/wext-compat.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 7b97d43b27e1..2b096c02eb85 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -406,6 +406,10 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, if (pairwise && !addr) return -EINVAL; + /* + * In many cases we won't actually need this, but it's better + * to do it first in case the allocation fails. Don't use wext. + */ if (!wdev->wext.keys) { wdev->wext.keys = kzalloc(sizeof(*wdev->wext.keys), GFP_KERNEL); @@ -493,7 +497,13 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, if (err) return err; - if (!addr) { + /* + * We only need to store WEP keys, since they're the only keys that + * can be be set before a connection is established and persist after + * disconnecting. + */ + if (!addr && (params->cipher == WLAN_CIPHER_SUITE_WEP40 || + params->cipher == WLAN_CIPHER_SUITE_WEP104)) { wdev->wext.keys->params[idx] = *params; memcpy(wdev->wext.keys->data[idx], params->key, params->key_len); From 7836667cec5e02ed2ae3eb09b88047b5b5f2343a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Sep 2016 08:41:16 -0700 Subject: [PATCH 1615/1715] net: do not export sk_stream_write_space Since commit 900f65d361d3 ("tcp: move duplicate code from tcp_v4_init_sock()/tcp_v6_init_sock()") we no longer need to export sk_stream_write_space() From: Eric Dumazet Cc: Neal Cardwell Signed-off-by: David S. Miller --- net/core/stream.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/core/stream.c b/net/core/stream.c index 159516a11b7e..1086c8b280a8 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -43,7 +43,6 @@ void sk_stream_write_space(struct sock *sk) rcu_read_unlock(); } } -EXPORT_SYMBOL(sk_stream_write_space); /** * sk_stream_wait_connect - Wait for a socket to get into the connected state From 484611357c19f9e19ef742ebef4505a07d243cc9 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 28 Sep 2016 10:54:32 -0400 Subject: [PATCH 1616/1715] bpf: allow access into map value arrays Suppose you have a map array value that is something like this struct foo { unsigned iter; int array[SOME_CONSTANT]; }; You can easily insert this into an array, but you cannot modify the contents of foo->array[] after the fact. This is because we have no way to verify we won't go off the end of the array at verification time. This patch provides a start for this work. We accomplish this by keeping track of a minimum and maximum value a register could be while we're checking the code. Then at the time we try to do an access into a MAP_VALUE we verify that the maximum offset into that region is a valid access into that memory region. So in practice, code such as this unsigned index = 0; if (foo->iter >= SOME_CONSTANT) foo->iter = index; else index = foo->iter++; foo->array[index] = bar; would be allowed, as we can verify that index will always be between 0 and SOME_CONSTANT-1. If you wish to use signed values you'll have to have an extra check to make sure the index isn't less than 0, or do something like index %= SOME_CONSTANT. Signed-off-by: Josef Bacik Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 7 + include/linux/bpf_verifier.h | 12 ++ kernel/bpf/verifier.c | 329 +++++++++++++++++++++++++++++++++-- samples/bpf/libbpf.h | 8 + samples/bpf/test_verifier.c | 243 +++++++++++++++++++++++++- 5 files changed, 577 insertions(+), 22 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5691fdc83819..c201017b5730 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -139,6 +139,13 @@ enum bpf_reg_type { */ PTR_TO_PACKET, PTR_TO_PACKET_END, /* skb->data + headlen */ + + /* PTR_TO_MAP_VALUE_ADJ is used for doing pointer math inside of a map + * elem value. We only allow this if we can statically verify that + * access from this register are going to fall within the size of the + * map element. + */ + PTR_TO_MAP_VALUE_ADJ, }; struct bpf_prog; diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index c5cb661712c9..7035b997aaa5 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -10,8 +10,19 @@ #include /* for enum bpf_reg_type */ #include /* for MAX_BPF_STACK */ + /* Just some arbitrary values so we can safely do math without overflowing and + * are obviously wrong for any sort of memory access. + */ +#define BPF_REGISTER_MAX_RANGE (1024 * 1024 * 1024) +#define BPF_REGISTER_MIN_RANGE -(1024 * 1024 * 1024) + struct bpf_reg_state { enum bpf_reg_type type; + /* + * Used to determine if any memory access using this register will + * result in a bad access. + */ + u64 min_value, max_value; union { /* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */ s64 imm; @@ -81,6 +92,7 @@ struct bpf_verifier_env { u32 id_gen; /* used to generate unique reg IDs */ bool allow_ptr_leaks; bool seen_direct_write; + bool varlen_map_value_access; struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ }; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 7ada3152a556..99a7e5b388f2 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -182,6 +182,7 @@ static const char * const reg_type_str[] = { [CONST_PTR_TO_MAP] = "map_ptr", [PTR_TO_MAP_VALUE] = "map_value", [PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null", + [PTR_TO_MAP_VALUE_ADJ] = "map_value_adj", [FRAME_PTR] = "fp", [PTR_TO_STACK] = "fp", [CONST_IMM] = "imm", @@ -209,10 +210,17 @@ static void print_verifier_state(struct bpf_verifier_state *state) else if (t == UNKNOWN_VALUE && reg->imm) verbose("%lld", reg->imm); else if (t == CONST_PTR_TO_MAP || t == PTR_TO_MAP_VALUE || - t == PTR_TO_MAP_VALUE_OR_NULL) + t == PTR_TO_MAP_VALUE_OR_NULL || + t == PTR_TO_MAP_VALUE_ADJ) verbose("(ks=%d,vs=%d)", reg->map_ptr->key_size, reg->map_ptr->value_size); + if (reg->min_value != BPF_REGISTER_MIN_RANGE) + verbose(",min_value=%llu", + (unsigned long long)reg->min_value); + if (reg->max_value != BPF_REGISTER_MAX_RANGE) + verbose(",max_value=%llu", + (unsigned long long)reg->max_value); } for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { if (state->stack_slot_type[i] == STACK_SPILL) @@ -424,6 +432,8 @@ static void init_reg_state(struct bpf_reg_state *regs) for (i = 0; i < MAX_BPF_REG; i++) { regs[i].type = NOT_INIT; regs[i].imm = 0; + regs[i].min_value = BPF_REGISTER_MIN_RANGE; + regs[i].max_value = BPF_REGISTER_MAX_RANGE; } /* frame pointer */ @@ -440,6 +450,12 @@ static void mark_reg_unknown_value(struct bpf_reg_state *regs, u32 regno) regs[regno].imm = 0; } +static void reset_reg_range_values(struct bpf_reg_state *regs, u32 regno) +{ + regs[regno].min_value = BPF_REGISTER_MIN_RANGE; + regs[regno].max_value = BPF_REGISTER_MAX_RANGE; +} + enum reg_arg_type { SRC_OP, /* register is used as source operand */ DST_OP, /* register is used as destination operand */ @@ -665,7 +681,7 @@ static bool is_pointer_value(struct bpf_verifier_env *env, int regno) static int check_ptr_alignment(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int off, int size) { - if (reg->type != PTR_TO_PACKET) { + if (reg->type != PTR_TO_PACKET && reg->type != PTR_TO_MAP_VALUE_ADJ) { if (off % size != 0) { verbose("misaligned access off %d size %d\n", off, size); @@ -675,16 +691,6 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, } } - switch (env->prog->type) { - case BPF_PROG_TYPE_SCHED_CLS: - case BPF_PROG_TYPE_SCHED_ACT: - case BPF_PROG_TYPE_XDP: - break; - default: - verbose("verifier is misconfigured\n"); - return -EACCES; - } - if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) /* misaligned access to packet is ok on x86,arm,arm64 */ return 0; @@ -695,7 +701,8 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, } /* skb->data is NET_IP_ALIGN-ed */ - if ((NET_IP_ALIGN + reg->off + off) % size != 0) { + if (reg->type == PTR_TO_PACKET && + (NET_IP_ALIGN + reg->off + off) % size != 0) { verbose("misaligned packet access off %d+%d+%d size %d\n", NET_IP_ALIGN, reg->off, off, size); return -EACCES; @@ -728,12 +735,52 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off, if (err) return err; - if (reg->type == PTR_TO_MAP_VALUE) { + if (reg->type == PTR_TO_MAP_VALUE || + reg->type == PTR_TO_MAP_VALUE_ADJ) { if (t == BPF_WRITE && value_regno >= 0 && is_pointer_value(env, value_regno)) { verbose("R%d leaks addr into map\n", value_regno); return -EACCES; } + + /* If we adjusted the register to this map value at all then we + * need to change off and size to min_value and max_value + * respectively to make sure our theoretical access will be + * safe. + */ + if (reg->type == PTR_TO_MAP_VALUE_ADJ) { + if (log_level) + print_verifier_state(state); + env->varlen_map_value_access = true; + /* The minimum value is only important with signed + * comparisons where we can't assume the floor of a + * value is 0. If we are using signed variables for our + * index'es we need to make sure that whatever we use + * will have a set floor within our range. + */ + if ((s64)reg->min_value < 0) { + verbose("R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", + regno); + return -EACCES; + } + err = check_map_access(env, regno, reg->min_value + off, + size); + if (err) { + verbose("R%d min value is outside of the array range\n", + regno); + return err; + } + + /* If we haven't set a max value then we need to bail + * since we can't be sure we won't do bad things. + */ + if (reg->max_value == BPF_REGISTER_MAX_RANGE) { + verbose("R%d unbounded memory access, make sure to bounds check any array access into a map\n", + regno); + return -EACCES; + } + off += reg->max_value; + } err = check_map_access(env, regno, off, size); if (!err && t == BPF_READ && value_regno >= 0) mark_reg_unknown_value(state->regs, value_regno); @@ -1195,6 +1242,7 @@ static int check_call(struct bpf_verifier_env *env, int func_id) regs[BPF_REG_0].type = NOT_INIT; } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL) { regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL; + regs[BPF_REG_0].max_value = regs[BPF_REG_0].min_value = 0; /* remember map_ptr, so that check_map_access() * can check 'value_size' boundary of memory access * to map element returned from bpf_map_lookup_elem() @@ -1416,6 +1464,106 @@ static int evaluate_reg_imm_alu(struct bpf_verifier_env *env, return 0; } +static void check_reg_overflow(struct bpf_reg_state *reg) +{ + if (reg->max_value > BPF_REGISTER_MAX_RANGE) + reg->max_value = BPF_REGISTER_MAX_RANGE; + if ((s64)reg->min_value < BPF_REGISTER_MIN_RANGE) + reg->min_value = BPF_REGISTER_MIN_RANGE; +} + +static void adjust_reg_min_max_vals(struct bpf_verifier_env *env, + struct bpf_insn *insn) +{ + struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg; + u64 min_val = BPF_REGISTER_MIN_RANGE, max_val = BPF_REGISTER_MAX_RANGE; + bool min_set = false, max_set = false; + u8 opcode = BPF_OP(insn->code); + + dst_reg = ®s[insn->dst_reg]; + if (BPF_SRC(insn->code) == BPF_X) { + check_reg_overflow(®s[insn->src_reg]); + min_val = regs[insn->src_reg].min_value; + max_val = regs[insn->src_reg].max_value; + + /* If the source register is a random pointer then the + * min_value/max_value values represent the range of the known + * accesses into that value, not the actual min/max value of the + * register itself. In this case we have to reset the reg range + * values so we know it is not safe to look at. + */ + if (regs[insn->src_reg].type != CONST_IMM && + regs[insn->src_reg].type != UNKNOWN_VALUE) { + min_val = BPF_REGISTER_MIN_RANGE; + max_val = BPF_REGISTER_MAX_RANGE; + } + } else if (insn->imm < BPF_REGISTER_MAX_RANGE && + (s64)insn->imm > BPF_REGISTER_MIN_RANGE) { + min_val = max_val = insn->imm; + min_set = max_set = true; + } + + /* We don't know anything about what was done to this register, mark it + * as unknown. + */ + if (min_val == BPF_REGISTER_MIN_RANGE && + max_val == BPF_REGISTER_MAX_RANGE) { + reset_reg_range_values(regs, insn->dst_reg); + return; + } + + switch (opcode) { + case BPF_ADD: + dst_reg->min_value += min_val; + dst_reg->max_value += max_val; + break; + case BPF_SUB: + dst_reg->min_value -= min_val; + dst_reg->max_value -= max_val; + break; + case BPF_MUL: + dst_reg->min_value *= min_val; + dst_reg->max_value *= max_val; + break; + case BPF_AND: + /* & is special since it could end up with 0 bits set. */ + dst_reg->min_value &= min_val; + dst_reg->max_value = max_val; + break; + case BPF_LSH: + /* Gotta have special overflow logic here, if we're shifting + * more than MAX_RANGE then just assume we have an invalid + * range. + */ + if (min_val > ilog2(BPF_REGISTER_MAX_RANGE)) + dst_reg->min_value = BPF_REGISTER_MIN_RANGE; + else + dst_reg->min_value <<= min_val; + + if (max_val > ilog2(BPF_REGISTER_MAX_RANGE)) + dst_reg->max_value = BPF_REGISTER_MAX_RANGE; + else + dst_reg->max_value <<= max_val; + break; + case BPF_RSH: + dst_reg->min_value >>= min_val; + dst_reg->max_value >>= max_val; + break; + case BPF_MOD: + /* % is special since it is an unsigned modulus, so the floor + * will always be 0. + */ + dst_reg->min_value = 0; + dst_reg->max_value = max_val - 1; + break; + default: + reset_reg_range_values(regs, insn->dst_reg); + break; + } + + check_reg_overflow(dst_reg); +} + /* check validity of 32-bit and 64-bit arithmetic operations */ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) { @@ -1479,6 +1627,11 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) if (err) return err; + /* we are setting our register to something new, we need to + * reset its range values. + */ + reset_reg_range_values(regs, insn->dst_reg); + if (BPF_SRC(insn->code) == BPF_X) { if (BPF_CLASS(insn->code) == BPF_ALU64) { /* case: R1 = R2 @@ -1500,6 +1653,8 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) */ regs[insn->dst_reg].type = CONST_IMM; regs[insn->dst_reg].imm = insn->imm; + regs[insn->dst_reg].max_value = insn->imm; + regs[insn->dst_reg].min_value = insn->imm; } } else if (opcode > BPF_END) { @@ -1552,6 +1707,9 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) dst_reg = ®s[insn->dst_reg]; + /* first we want to adjust our ranges. */ + adjust_reg_min_max_vals(env, insn); + /* pattern match 'bpf_add Rx, imm' instruction */ if (opcode == BPF_ADD && BPF_CLASS(insn->code) == BPF_ALU64 && dst_reg->type == FRAME_PTR && BPF_SRC(insn->code) == BPF_K) { @@ -1586,8 +1744,17 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) return -EACCES; } - /* mark dest operand */ - mark_reg_unknown_value(regs, insn->dst_reg); + /* If we did pointer math on a map value then just set it to our + * PTR_TO_MAP_VALUE_ADJ type so we can deal with any stores or + * loads to this register appropriately, otherwise just mark the + * register as unknown. + */ + if (env->allow_ptr_leaks && + (dst_reg->type == PTR_TO_MAP_VALUE || + dst_reg->type == PTR_TO_MAP_VALUE_ADJ)) + dst_reg->type = PTR_TO_MAP_VALUE_ADJ; + else + mark_reg_unknown_value(regs, insn->dst_reg); } return 0; @@ -1642,6 +1809,104 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state, } } +/* Adjusts the register min/max values in the case that the dst_reg is the + * variable register that we are working on, and src_reg is a constant or we're + * simply doing a BPF_K check. + */ +static void reg_set_min_max(struct bpf_reg_state *true_reg, + struct bpf_reg_state *false_reg, u64 val, + u8 opcode) +{ + switch (opcode) { + case BPF_JEQ: + /* If this is false then we know nothing Jon Snow, but if it is + * true then we know for sure. + */ + true_reg->max_value = true_reg->min_value = val; + break; + case BPF_JNE: + /* If this is true we know nothing Jon Snow, but if it is false + * we know the value for sure; + */ + false_reg->max_value = false_reg->min_value = val; + break; + case BPF_JGT: + /* Unsigned comparison, the minimum value is 0. */ + false_reg->min_value = 0; + case BPF_JSGT: + /* If this is false then we know the maximum val is val, + * otherwise we know the min val is val+1. + */ + false_reg->max_value = val; + true_reg->min_value = val + 1; + break; + case BPF_JGE: + /* Unsigned comparison, the minimum value is 0. */ + false_reg->min_value = 0; + case BPF_JSGE: + /* If this is false then we know the maximum value is val - 1, + * otherwise we know the mimimum value is val. + */ + false_reg->max_value = val - 1; + true_reg->min_value = val; + break; + default: + break; + } + + check_reg_overflow(false_reg); + check_reg_overflow(true_reg); +} + +/* Same as above, but for the case that dst_reg is a CONST_IMM reg and src_reg + * is the variable reg. + */ +static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, + struct bpf_reg_state *false_reg, u64 val, + u8 opcode) +{ + switch (opcode) { + case BPF_JEQ: + /* If this is false then we know nothing Jon Snow, but if it is + * true then we know for sure. + */ + true_reg->max_value = true_reg->min_value = val; + break; + case BPF_JNE: + /* If this is true we know nothing Jon Snow, but if it is false + * we know the value for sure; + */ + false_reg->max_value = false_reg->min_value = val; + break; + case BPF_JGT: + /* Unsigned comparison, the minimum value is 0. */ + true_reg->min_value = 0; + case BPF_JSGT: + /* + * If this is false, then the val is <= the register, if it is + * true the register <= to the val. + */ + false_reg->min_value = val; + true_reg->max_value = val - 1; + break; + case BPF_JGE: + /* Unsigned comparison, the minimum value is 0. */ + true_reg->min_value = 0; + case BPF_JSGE: + /* If this is false then constant < register, if it is true then + * the register < constant. + */ + false_reg->min_value = val + 1; + true_reg->max_value = val; + break; + default: + break; + } + + check_reg_overflow(false_reg); + check_reg_overflow(true_reg); +} + static int check_cond_jmp_op(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx) { @@ -1708,6 +1973,23 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, if (!other_branch) return -EFAULT; + /* detect if we are comparing against a constant value so we can adjust + * our min/max values for our dst register. + */ + if (BPF_SRC(insn->code) == BPF_X) { + if (regs[insn->src_reg].type == CONST_IMM) + reg_set_min_max(&other_branch->regs[insn->dst_reg], + dst_reg, regs[insn->src_reg].imm, + opcode); + else if (dst_reg->type == CONST_IMM) + reg_set_min_max_inv(&other_branch->regs[insn->src_reg], + ®s[insn->src_reg], dst_reg->imm, + opcode); + } else { + reg_set_min_max(&other_branch->regs[insn->dst_reg], + dst_reg, insn->imm, opcode); + } + /* detect if R == 0 where R is returned from bpf_map_lookup_elem() */ if (BPF_SRC(insn->code) == BPF_K && insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) && @@ -2144,7 +2426,8 @@ static bool compare_ptrs_to_packet(struct bpf_reg_state *old, * whereas register type in current state is meaningful, it means that * the current state will reach 'bpf_exit' instruction safely */ -static bool states_equal(struct bpf_verifier_state *old, +static bool states_equal(struct bpf_verifier_env *env, + struct bpf_verifier_state *old, struct bpf_verifier_state *cur) { struct bpf_reg_state *rold, *rcur; @@ -2157,6 +2440,13 @@ static bool states_equal(struct bpf_verifier_state *old, if (memcmp(rold, rcur, sizeof(*rold)) == 0) continue; + /* If the ranges were not the same, but everything else was and + * we didn't do a variable access into a map then we are a-ok. + */ + if (!env->varlen_map_value_access && + rold->type == rcur->type && rold->imm == rcur->imm) + continue; + if (rold->type == NOT_INIT || (rold->type == UNKNOWN_VALUE && rcur->type != NOT_INIT)) continue; @@ -2213,7 +2503,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) return 0; while (sl != STATE_LIST_MARK) { - if (states_equal(&sl->state, &env->cur_state)) + if (states_equal(env, &sl->state, &env->cur_state)) /* reached equivalent register/stack state, * prune the search */ @@ -2259,6 +2549,7 @@ static int do_check(struct bpf_verifier_env *env) init_reg_state(regs); insn_idx = 0; + env->varlen_map_value_access = false; for (;;) { struct bpf_insn *insn; u8 class; @@ -2339,6 +2630,7 @@ static int do_check(struct bpf_verifier_env *env) if (err) return err; + reset_reg_range_values(regs, insn->dst_reg); if (BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) { insn_idx++; @@ -2509,6 +2801,7 @@ process_bpf_exit: verbose("invalid BPF_LD mode\n"); return -EINVAL; } + reset_reg_range_values(regs, insn->dst_reg); } else { verbose("unknown insn class %d\n", class); return -EINVAL; diff --git a/samples/bpf/libbpf.h b/samples/bpf/libbpf.h index 364582b77888..ac6edb61b64a 100644 --- a/samples/bpf/libbpf.h +++ b/samples/bpf/libbpf.h @@ -85,6 +85,14 @@ extern char bpf_log_buf[LOG_BUF_SIZE]; .off = 0, \ .imm = IMM }) +#define BPF_MOV32_IMM(DST, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ALU | BPF_MOV | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + /* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */ #define BPF_LD_IMM64(DST, IMM) \ BPF_LD_IMM64_RAW(DST, 0, IMM) diff --git a/samples/bpf/test_verifier.c b/samples/bpf/test_verifier.c index ac590d4b7f02..369ffaad3799 100644 --- a/samples/bpf/test_verifier.c +++ b/samples/bpf/test_verifier.c @@ -29,6 +29,7 @@ struct bpf_test { struct bpf_insn insns[MAX_INSNS]; int fixup[MAX_FIXUPS]; int prog_array_fixup[MAX_FIXUPS]; + int test_val_map_fixup[MAX_FIXUPS]; const char *errstr; const char *errstr_unpriv; enum { @@ -39,6 +40,19 @@ struct bpf_test { enum bpf_prog_type prog_type; }; +/* Note we want this to be 64 bit aligned so that the end of our array is + * actually the end of the structure. + */ +#define MAX_ENTRIES 11 +struct test_val { + unsigned index; + int foo[MAX_ENTRIES]; +}; + +struct other_val { + unsigned int action[32]; +}; + static struct bpf_test tests[] = { { "add+sub+mul", @@ -2163,6 +2177,212 @@ static struct bpf_test tests[] = { .errstr = "invalid access to packet", .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, + { + "valid map access into an array with a constant", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), + BPF_EXIT_INSN(), + }, + .test_val_map_fixup = {3}, + .errstr_unpriv = "R0 leaks addr", + .result_unpriv = REJECT, + .result = ACCEPT, + }, + { + "valid map access into an array with a register", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_IMM(BPF_REG_1, 4), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), + BPF_EXIT_INSN(), + }, + .test_val_map_fixup = {3}, + .errstr_unpriv = "R0 leaks addr", + .result_unpriv = REJECT, + .result = ACCEPT, + }, + { + "valid map access into an array with a variable", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGE, BPF_REG_1, MAX_ENTRIES, 3), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), + BPF_EXIT_INSN(), + }, + .test_val_map_fixup = {3}, + .errstr_unpriv = "R0 leaks addr", + .result_unpriv = REJECT, + .result = ACCEPT, + }, + { + "valid map access into an array with a signed variable", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 0xffffffff, 1), + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES), + BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1), + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), + BPF_EXIT_INSN(), + }, + .test_val_map_fixup = {3}, + .errstr_unpriv = "R0 leaks addr", + .result_unpriv = REJECT, + .result = ACCEPT, + }, + { + "invalid map access into an array with a constant", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_0, (MAX_ENTRIES + 1) << 2, + offsetof(struct test_val, foo)), + BPF_EXIT_INSN(), + }, + .test_val_map_fixup = {3}, + .errstr = "invalid access to map value, value_size=48 off=48 size=8", + .result = REJECT, + }, + { + "invalid map access into an array with a register", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_IMM(BPF_REG_1, MAX_ENTRIES + 1), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), + BPF_EXIT_INSN(), + }, + .test_val_map_fixup = {3}, + .errstr = "R0 min value is outside of the array range", + .result = REJECT, + }, + { + "invalid map access into an array with a variable", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), + BPF_EXIT_INSN(), + }, + .test_val_map_fixup = {3}, + .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.", + .result = REJECT, + }, + { + "invalid map access into an array with no floor check", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES), + BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1), + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), + BPF_EXIT_INSN(), + }, + .test_val_map_fixup = {3}, + .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.", + .result = REJECT, + }, + { + "invalid map access into an array with a invalid max check", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES + 1), + BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1), + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), + BPF_EXIT_INSN(), + }, + .test_val_map_fixup = {3}, + .errstr = "invalid access to map value, value_size=48 off=44 size=8", + .result = REJECT, + }, + { + "invalid map access into an array with a invalid max check", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_8), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct test_val, foo)), + BPF_EXIT_INSN(), + }, + .test_val_map_fixup = {3, 11}, + .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.", + .result = REJECT, + }, }; static int probe_filter_length(struct bpf_insn *fp) @@ -2176,12 +2396,12 @@ static int probe_filter_length(struct bpf_insn *fp) return len + 1; } -static int create_map(void) +static int create_map(size_t val_size, int num) { int map_fd; map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, - sizeof(long long), sizeof(long long), 1024, 0); + sizeof(long long), val_size, num, 0); if (map_fd < 0) printf("failed to create map '%s'\n", strerror(errno)); @@ -2211,12 +2431,13 @@ static int test(void) int prog_len = probe_filter_length(prog); int *fixup = tests[i].fixup; int *prog_array_fixup = tests[i].prog_array_fixup; + int *test_val_map_fixup = tests[i].test_val_map_fixup; int expected_result; const char *expected_errstr; - int map_fd = -1, prog_array_fd = -1; + int map_fd = -1, prog_array_fd = -1, test_val_map_fd = -1; if (*fixup) { - map_fd = create_map(); + map_fd = create_map(sizeof(long long), 1024); do { prog[*fixup].imm = map_fd; @@ -2231,6 +2452,18 @@ static int test(void) prog_array_fixup++; } while (*prog_array_fixup); } + if (*test_val_map_fixup) { + /* Unprivileged can't create a hash map.*/ + if (unpriv) + continue; + test_val_map_fd = create_map(sizeof(struct test_val), + 256); + do { + prog[*test_val_map_fixup].imm = test_val_map_fd; + test_val_map_fixup++; + } while (*test_val_map_fixup); + } + printf("#%d %s ", i, tests[i].descr); prog_fd = bpf_prog_load(prog_type ?: BPF_PROG_TYPE_SOCKET_FILTER, @@ -2277,6 +2510,8 @@ fail: close(map_fd); if (prog_array_fd >= 0) close(prog_array_fd); + if (test_val_map_fd >= 0) + close(test_val_map_fd); close(prog_fd); } From 54e19bc74f3380d414681762ceed9f7245bc6a6e Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Wed, 28 Sep 2016 11:58:42 -0500 Subject: [PATCH 1617/1715] net: qcom/emac: do not use devm on internal phy pdev The platform_device returned by of_find_device_by_node() is not automatically released when the driver unprobes. Therefore, managed calls like devm_ioremap_resource() should not be used. Instead, we manually allocate the resources and then free them on driver release. Signed-off-by: Timur Tabi Signed-off-by: David S. Miller --- .../net/ethernet/qualcomm/emac/emac-sgmii.c | 42 +++++++++++++++---- drivers/net/ethernet/qualcomm/emac/emac.c | 4 ++ 2 files changed, 37 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c index 6ab0a3c96431..ad0e4209d948 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c +++ b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c @@ -681,6 +681,7 @@ int emac_sgmii_config(struct platform_device *pdev, struct emac_adapter *adpt) struct resource *res; const struct of_device_id *match; struct device_node *np; + int ret; np = of_parse_phandle(pdev->dev.of_node, "internal-phy", 0); if (!np) { @@ -697,25 +698,48 @@ int emac_sgmii_config(struct platform_device *pdev, struct emac_adapter *adpt) match = of_match_device(emac_sgmii_dt_match, &sgmii_pdev->dev); if (!match) { dev_err(&pdev->dev, "unrecognized internal phy node\n"); - return -ENODEV; + ret = -ENODEV; + goto error_put_device; } phy->initialize = (emac_sgmii_initialize)match->data; /* Base address is the first address */ res = platform_get_resource(sgmii_pdev, IORESOURCE_MEM, 0); - phy->base = devm_ioremap_resource(&sgmii_pdev->dev, res); - if (IS_ERR(phy->base)) - return PTR_ERR(phy->base); + phy->base = ioremap(res->start, resource_size(res)); + if (IS_ERR(phy->base)) { + ret = PTR_ERR(phy->base); + goto error_put_device; + } /* v2 SGMII has a per-lane digital digital, so parse it if it exists */ res = platform_get_resource(sgmii_pdev, IORESOURCE_MEM, 1); if (res) { - phy->digital = devm_ioremap_resource(&sgmii_pdev->dev, res); - if (IS_ERR(phy->base)) - return PTR_ERR(phy->base); - + phy->digital = ioremap(res->start, resource_size(res)); + if (IS_ERR(phy->digital)) { + ret = PTR_ERR(phy->digital); + goto error_unmap_base; + } } - return phy->initialize(adpt); + ret = phy->initialize(adpt); + if (ret) + goto error; + + /* We've remapped the addresses, so we don't need the device any + * more. of_find_device_by_node() says we should release it. + */ + put_device(&sgmii_pdev->dev); + + return 0; + +error: + if (phy->digital) + iounmap(phy->digital); +error_unmap_base: + iounmap(phy->base); +error_put_device: + put_device(&sgmii_pdev->dev); + + return ret; } diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c index e47d38701d6c..429b4cb59ac4 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac.c @@ -723,6 +723,10 @@ static int emac_remove(struct platform_device *pdev) mdiobus_unregister(adpt->mii_bus); free_netdev(netdev); + if (adpt->phy.digital) + iounmap(adpt->phy.digital); + iounmap(adpt->phy.base); + return 0; } From 0de709acbc0d0001dc4b0953aebcfb0f030b9b0e Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Wed, 28 Sep 2016 11:58:43 -0500 Subject: [PATCH 1618/1715] net: qcom/emac: use device_get_mac_address Replace the DT-specific of_get_mac_address() function with device_get_mac_address, which works on both DT and ACPI platforms. This change makes it easier to add ACPI support. Signed-off-by: Timur Tabi Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/emac/emac.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c index 429b4cb59ac4..551df1c52620 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac.c @@ -531,18 +531,16 @@ static void emac_clks_teardown(struct emac_adapter *adpt) static int emac_probe_resources(struct platform_device *pdev, struct emac_adapter *adpt) { - struct device_node *node = pdev->dev.of_node; struct net_device *netdev = adpt->netdev; struct resource *res; - const void *maddr; + char maddr[ETH_ALEN]; int ret = 0; /* get mac address */ - maddr = of_get_mac_address(node); - if (!maddr) - eth_hw_addr_random(netdev); - else + if (device_get_mac_address(&pdev->dev, maddr, ETH_ALEN)) ether_addr_copy(netdev->dev_addr, maddr); + else + eth_hw_addr_random(netdev); /* Core 0 interrupt */ ret = platform_get_irq(pdev, 0); From 5f3d38078c84f7bc12739a3b79fc59797cf0262d Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Wed, 28 Sep 2016 11:58:44 -0500 Subject: [PATCH 1619/1715] net: qcom/emac: initial ACPI support Add support for reading addresses, interrupts, and _DSD properties from ACPI tables, just like with device tree. The HID for the EMAC device itself is QCOM8070. The internal PHY is represented by a child node with a HID of QCOM8071. The EMAC also has some complex clock initialization requirements that are not represented by this patch. This will be addressed in a future patch. Signed-off-by: Timur Tabi Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/emac/emac-phy.c | 37 ++++++++-- .../net/ethernet/qualcomm/emac/emac-sgmii.c | 72 ++++++++++++++----- drivers/net/ethernet/qualcomm/emac/emac.c | 12 ++++ 3 files changed, 95 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/emac/emac-phy.c b/drivers/net/ethernet/qualcomm/emac/emac-phy.c index c412ba9a27e7..da4e90db4d98 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac-phy.c +++ b/drivers/net/ethernet/qualcomm/emac/emac-phy.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "emac.h" #include "emac-mac.h" #include "emac-phy.h" @@ -167,7 +168,6 @@ static int emac_mdio_write(struct mii_bus *bus, int addr, int regnum, u16 val) int emac_phy_config(struct platform_device *pdev, struct emac_adapter *adpt) { struct device_node *np = pdev->dev.of_node; - struct device_node *phy_np; struct mii_bus *mii_bus; int ret; @@ -183,14 +183,37 @@ int emac_phy_config(struct platform_device *pdev, struct emac_adapter *adpt) mii_bus->parent = &pdev->dev; mii_bus->priv = adpt; - ret = of_mdiobus_register(mii_bus, np); - if (ret) { - dev_err(&pdev->dev, "could not register mdio bus\n"); - return ret; + if (has_acpi_companion(&pdev->dev)) { + u32 phy_addr; + + ret = mdiobus_register(mii_bus); + if (ret) { + dev_err(&pdev->dev, "could not register mdio bus\n"); + return ret; + } + ret = device_property_read_u32(&pdev->dev, "phy-channel", + &phy_addr); + if (ret) + /* If we can't read a valid phy address, then assume + * that there is only one phy on this mdio bus. + */ + adpt->phydev = phy_find_first(mii_bus); + else + adpt->phydev = mdiobus_get_phy(mii_bus, phy_addr); + + } else { + struct device_node *phy_np; + + ret = of_mdiobus_register(mii_bus, np); + if (ret) { + dev_err(&pdev->dev, "could not register mdio bus\n"); + return ret; + } + + phy_np = of_parse_phandle(np, "phy-handle", 0); + adpt->phydev = of_phy_find_device(phy_np); } - phy_np = of_parse_phandle(np, "phy-handle", 0); - adpt->phydev = of_phy_find_device(phy_np); if (!adpt->phydev) { dev_err(&pdev->dev, "could not find external phy\n"); mdiobus_unregister(mii_bus); diff --git a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c index ad0e4209d948..3d2c05a40d2c 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c +++ b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c @@ -14,6 +14,7 @@ */ #include +#include #include #include "emac.h" #include "emac-mac.h" @@ -662,6 +663,24 @@ void emac_sgmii_reset(struct emac_adapter *adpt) clk_set_rate(adpt->clk[EMAC_CLK_HIGH_SPEED], 125000000); } +static int emac_sgmii_acpi_match(struct device *dev, void *data) +{ + static const struct acpi_device_id match_table[] = { + { + .id = "QCOM8071", + .driver_data = (kernel_ulong_t)emac_sgmii_init_v2, + }, + {} + }; + const struct acpi_device_id *id = acpi_match_device(match_table, dev); + emac_sgmii_initialize *initialize = data; + + if (id) + *initialize = (emac_sgmii_initialize)id->driver_data; + + return !!id; +} + static const struct of_device_id emac_sgmii_dt_match[] = { { .compatible = "qcom,fsm9900-emac-sgmii", @@ -679,30 +698,45 @@ int emac_sgmii_config(struct platform_device *pdev, struct emac_adapter *adpt) struct platform_device *sgmii_pdev = NULL; struct emac_phy *phy = &adpt->phy; struct resource *res; - const struct of_device_id *match; - struct device_node *np; int ret; - np = of_parse_phandle(pdev->dev.of_node, "internal-phy", 0); - if (!np) { - dev_err(&pdev->dev, "missing internal-phy property\n"); - return -ENODEV; - } + if (has_acpi_companion(&pdev->dev)) { + struct device *dev; - sgmii_pdev = of_find_device_by_node(np); - if (!sgmii_pdev) { - dev_err(&pdev->dev, "invalid internal-phy property\n"); - return -ENODEV; - } + dev = device_find_child(&pdev->dev, &phy->initialize, + emac_sgmii_acpi_match); - match = of_match_device(emac_sgmii_dt_match, &sgmii_pdev->dev); - if (!match) { - dev_err(&pdev->dev, "unrecognized internal phy node\n"); - ret = -ENODEV; - goto error_put_device; - } + if (!dev) { + dev_err(&pdev->dev, "cannot find internal phy node\n"); + return -ENODEV; + } - phy->initialize = (emac_sgmii_initialize)match->data; + sgmii_pdev = to_platform_device(dev); + } else { + const struct of_device_id *match; + struct device_node *np; + + np = of_parse_phandle(pdev->dev.of_node, "internal-phy", 0); + if (!np) { + dev_err(&pdev->dev, "missing internal-phy property\n"); + return -ENODEV; + } + + sgmii_pdev = of_find_device_by_node(np); + if (!sgmii_pdev) { + dev_err(&pdev->dev, "invalid internal-phy property\n"); + return -ENODEV; + } + + match = of_match_device(emac_sgmii_dt_match, &sgmii_pdev->dev); + if (!match) { + dev_err(&pdev->dev, "unrecognized internal phy node\n"); + ret = -ENODEV; + goto error_put_device; + } + + phy->initialize = (emac_sgmii_initialize)match->data; + } /* Base address is the first address */ res = platform_get_resource(sgmii_pdev, IORESOURCE_MEM, 0); diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c index 551df1c52620..9bf3b2b82e95 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "emac.h" #include "emac-mac.h" #include "emac-phy.h" @@ -575,6 +576,16 @@ static const struct of_device_id emac_dt_match[] = { {} }; +#if IS_ENABLED(CONFIG_ACPI) +static const struct acpi_device_id emac_acpi_match[] = { + { + .id = "QCOM8070", + }, + {} +}; +MODULE_DEVICE_TABLE(acpi, emac_acpi_match); +#endif + static int emac_probe(struct platform_device *pdev) { struct net_device *netdev; @@ -734,6 +745,7 @@ static struct platform_driver emac_platform_driver = { .driver = { .name = "qcom-emac", .of_match_table = emac_dt_match, + .acpi_match_table = ACPI_PTR(emac_acpi_match), }, }; From 8732db67c6b6dcdb455b73773ea2fc1e1d5024b1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 29 Sep 2016 22:37:15 +0100 Subject: [PATCH 1620/1715] rxrpc: Fix exclusive client connections Exclusive connections are currently reusable (which they shouldn't be) because rxrpc_alloc_client_connection() checks the exclusive flag in the rxrpc_connection struct before it's initialised from the function parameters. This means that the DONT_REUSE flag doesn't get set. Fix this by checking the function parameters for the exclusive flag. Signed-off-by: David Howells --- net/rxrpc/conn_client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index c76a125df891..f5ee8bfa5bef 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -200,7 +200,7 @@ rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp) } atomic_set(&conn->usage, 1); - if (conn->params.exclusive) + if (cp->exclusive) __set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); conn->params = *cp; From a1767077b0176de17fa40ec743a20cbdac7a0d56 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 29 Sep 2016 22:37:15 +0100 Subject: [PATCH 1621/1715] rxrpc: Make Tx loss-injection go through normal return and adjust tracing In rxrpc_send_data_packet() make the loss-injection path return through the same code as the transmission path so that the RTT determination is initiated and any future timer shuffling will be done, despite the packet having been binned. Whilst we're at it: (1) Add to the tx_data tracepoint an indication of whether or not we're retransmitting a data packet. (2) When we're deciding whether or not to request an ACK, rather than checking if we're in fast-retransmit mode check instead if we're retransmitting. (3) Don't invoke the lose_skb tracepoint when losing a Tx packet as we're not altering the sk_buff refcount nor are we just seeing it after getting it off the Tx list. (4) The rxrpc_skb_tx_lost note is then no longer used so remove it. (5) rxrpc_lose_skb() no longer needs to deal with rxrpc_skb_tx_lost. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 6 ++++-- net/rxrpc/ar-internal.h | 3 +-- net/rxrpc/call_event.c | 2 +- net/rxrpc/misc.c | 1 - net/rxrpc/output.c | 17 +++++++++-------- net/rxrpc/sendmsg.c | 2 +- net/rxrpc/skbuff.c | 11 +++-------- 7 files changed, 19 insertions(+), 23 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index ada12d00118c..8ba8d76e856a 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -258,15 +258,16 @@ TRACE_EVENT(rxrpc_rx_ack, TRACE_EVENT(rxrpc_tx_data, TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq, - rxrpc_serial_t serial, u8 flags, bool lose), + rxrpc_serial_t serial, u8 flags, bool retrans, bool lose), - TP_ARGS(call, seq, serial, flags, lose), + TP_ARGS(call, seq, serial, flags, retrans, lose), TP_STRUCT__entry( __field(struct rxrpc_call *, call ) __field(rxrpc_seq_t, seq ) __field(rxrpc_serial_t, serial ) __field(u8, flags ) + __field(bool, retrans ) __field(bool, lose ) ), @@ -275,6 +276,7 @@ TRACE_EVENT(rxrpc_tx_data, __entry->seq = seq; __entry->serial = serial; __entry->flags = flags; + __entry->retrans = retrans; __entry->lose = lose; ), diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index ca96e547cb9a..6aadaa7d8b43 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -603,7 +603,6 @@ enum rxrpc_skb_trace { rxrpc_skb_tx_cleaned, rxrpc_skb_tx_freed, rxrpc_skb_tx_got, - rxrpc_skb_tx_lost, rxrpc_skb_tx_new, rxrpc_skb_tx_rotated, rxrpc_skb_tx_seen, @@ -1073,7 +1072,7 @@ extern const s8 rxrpc_ack_priority[]; * output.c */ int rxrpc_send_call_packet(struct rxrpc_call *, u8); -int rxrpc_send_data_packet(struct rxrpc_call *, struct sk_buff *); +int rxrpc_send_data_packet(struct rxrpc_call *, struct sk_buff *, bool); void rxrpc_reject_packets(struct rxrpc_local *); /* diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 0e8478012212..1f6c7633b964 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -256,7 +256,7 @@ static void rxrpc_resend(struct rxrpc_call *call) rxrpc_get_skb(skb, rxrpc_skb_tx_got); spin_unlock_bh(&call->lock); - if (rxrpc_send_data_packet(call, skb) < 0) { + if (rxrpc_send_data_packet(call, skb, true) < 0) { rxrpc_free_skb(skb, rxrpc_skb_tx_freed); return; } diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index aedb8978226d..47dddacdbb91 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -108,7 +108,6 @@ const char rxrpc_skb_traces[rxrpc_skb__nr_trace][7] = { [rxrpc_skb_tx_cleaned] = "Tx CLN", [rxrpc_skb_tx_freed] = "Tx FRE", [rxrpc_skb_tx_got] = "Tx GOT", - [rxrpc_skb_tx_lost] = "Tx *L*", [rxrpc_skb_tx_new] = "Tx NEW", [rxrpc_skb_tx_rotated] = "Tx ROT", [rxrpc_skb_tx_seen] = "Tx SEE", diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index cf43a715685e..ac9a58b619a6 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -238,7 +238,8 @@ out: /* * send a packet through the transport endpoint */ -int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb) +int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, + bool retrans) { struct rxrpc_connection *conn = call->conn; struct rxrpc_wire_header whdr; @@ -247,6 +248,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb) struct kvec iov[2]; rxrpc_serial_t serial; size_t len; + bool lost = false; int ret, opt; _enter(",{%d}", skb->len); @@ -281,7 +283,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb) /* If our RTT cache needs working on, request an ACK. Also request * ACKs if a DATA packet appears to have been lost. */ - if (call->cong_mode == RXRPC_CALL_FAST_RETRANSMIT || + if (retrans || (call->peer->rtt_usage < 3 && sp->hdr.seq & 1) || ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), ktime_get_real())) @@ -290,11 +292,9 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb) if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { static int lose; if ((lose++ & 7) == 7) { - trace_rxrpc_tx_data(call, sp->hdr.seq, serial, - whdr.flags, true); - rxrpc_lose_skb(skb, rxrpc_skb_tx_lost); - _leave(" = 0 [lose]"); - return 0; + ret = 0; + lost = true; + goto done; } } @@ -319,7 +319,8 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb) goto send_fragmentable; done: - trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags, false); + trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags, + retrans, lost); if (ret >= 0) { ktime_t now = ktime_get_real(); skb->tstamp = now; diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 1f8040d82395..d8dfdce874d8 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -144,7 +144,7 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, if (seq == 1 && rxrpc_is_client_call(call)) rxrpc_expose_client_call(call); - ret = rxrpc_send_data_packet(call, skb); + ret = rxrpc_send_data_packet(call, skb, false); if (ret < 0) { _debug("need instant resend %d", ret); rxrpc_instant_resend(call, ix); diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c index 5154cbf7e540..67b02c45271b 100644 --- a/net/rxrpc/skbuff.c +++ b/net/rxrpc/skbuff.c @@ -77,14 +77,9 @@ void rxrpc_lose_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) if (skb) { int n; CHECK_SLAB_OKAY(&skb->users); - if (op == rxrpc_skb_tx_lost) { - n = atomic_read(select_skb_count(op)); - trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here); - } else { - n = atomic_dec_return(select_skb_count(op)); - trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here); - kfree_skb(skb); - } + n = atomic_dec_return(select_skb_count(op)); + trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here); + kfree_skb(skb); } } From 2629c7fa7c0adfdf023051b404cd538951bd0354 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 29 Sep 2016 22:37:15 +0100 Subject: [PATCH 1622/1715] rxrpc: When activating client conn channels, do state check inside lock In rxrpc_activate_channels(), the connection cache state is checked outside of the lock, which means it can change whilst we're waking calls up, thereby changing whether or not we're allowed to wake calls up. Fix this by moving the check inside the locked region. The check to see if all the channels are currently busy can stay outside of the locked region. Whilst we're at it: (1) Split the locked section out into its own function so that we can call it from other places in a later patch. (2) Determine the mask of channels dependent on the state as we're going to add another state in a later patch that will restrict the number of simultaneous calls to 1 on a connection. Signed-off-by: David Howells --- net/rxrpc/conn_client.c | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index f5ee8bfa5bef..60ef9605167e 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -575,29 +575,43 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn, wake_up(&call->waitq); } +/* + * Assign channels and callNumbers to waiting calls with channel_lock + * held by caller. + */ +static void rxrpc_activate_channels_locked(struct rxrpc_connection *conn) +{ + u8 avail, mask; + + switch (conn->cache_state) { + case RXRPC_CONN_CLIENT_ACTIVE: + mask = RXRPC_ACTIVE_CHANS_MASK; + break; + default: + return; + } + + while (!list_empty(&conn->waiting_calls) && + (avail = ~conn->active_chans, + avail &= mask, + avail != 0)) + rxrpc_activate_one_channel(conn, __ffs(avail)); +} + /* * Assign channels and callNumbers to waiting calls. */ static void rxrpc_activate_channels(struct rxrpc_connection *conn) { - unsigned char mask; - _enter("%d", conn->debug_id); trace_rxrpc_client(conn, -1, rxrpc_client_activate_chans); - if (conn->cache_state != RXRPC_CONN_CLIENT_ACTIVE || - conn->active_chans == RXRPC_ACTIVE_CHANS_MASK) + if (conn->active_chans == RXRPC_ACTIVE_CHANS_MASK) return; spin_lock(&conn->channel_lock); - - while (!list_empty(&conn->waiting_calls) && - (mask = ~conn->active_chans, - mask &= RXRPC_ACTIVE_CHANS_MASK, - mask != 0)) - rxrpc_activate_one_channel(conn, __ffs(mask)); - + rxrpc_activate_channels_locked(conn); spin_unlock(&conn->channel_lock); _leave(""); } From 1e9e5c9521d3667664a6e3c97075f71afec23720 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 29 Sep 2016 22:37:15 +0100 Subject: [PATCH 1623/1715] rxrpc: Reduce the rxrpc_local::services list to a pointer Reduce the rxrpc_local::services list to just a pointer as we don't permit multiple service endpoints to bind to a single transport endpoints (this is excluded by rxrpc_lookup_local()). The reason we don't allow this is that if you send a request to an AFS filesystem service, it will try to talk back to your cache manager on the port you sent from (this is how file change notifications are handled). To prevent someone from stealing your CM callbacks, we don't let AF_RXRPC sockets share a UDP socket if at least one of them has a service bound. Signed-off-by: David Howells --- net/rxrpc/af_rxrpc.c | 21 ++++++++------------- net/rxrpc/ar-internal.h | 3 +-- net/rxrpc/call_accept.c | 8 ++++---- net/rxrpc/local_object.c | 3 +-- net/rxrpc/security.c | 8 ++++---- 5 files changed, 18 insertions(+), 25 deletions(-) diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 8dbf7bed2cc4..44c9c2b0b190 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -136,7 +136,8 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *)saddr; struct sock *sk = sock->sk; struct rxrpc_local *local; - struct rxrpc_sock *rx = rxrpc_sk(sk), *prx; + struct rxrpc_sock *rx = rxrpc_sk(sk); + u16 service_id = srx->srx_service; int ret; _enter("%p,%p,%d", rx, saddr, len); @@ -160,15 +161,12 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) goto error_unlock; } - if (rx->srx.srx_service) { + if (service_id) { write_lock(&local->services_lock); - hlist_for_each_entry(prx, &local->services, listen_link) { - if (prx->srx.srx_service == rx->srx.srx_service) - goto service_in_use; - } - + if (rcu_access_pointer(local->service)) + goto service_in_use; rx->local = local; - hlist_add_head_rcu(&rx->listen_link, &local->services); + rcu_assign_pointer(local->service, rx); write_unlock(&local->services_lock); rx->sk.sk_state = RXRPC_SERVER_BOUND; @@ -599,7 +597,6 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol, rx->family = protocol; rx->calls = RB_ROOT; - INIT_HLIST_NODE(&rx->listen_link); spin_lock_init(&rx->incoming_lock); INIT_LIST_HEAD(&rx->sock_calls); INIT_LIST_HEAD(&rx->to_be_accepted); @@ -681,11 +678,9 @@ static int rxrpc_release_sock(struct sock *sk) sk->sk_state = RXRPC_CLOSE; spin_unlock_bh(&sk->sk_receive_queue.lock); - ASSERTCMP(rx->listen_link.next, !=, LIST_POISON1); - - if (!hlist_unhashed(&rx->listen_link)) { + if (rx->local && rx->local->service == rx) { write_lock(&rx->local->services_lock); - hlist_del_rcu(&rx->listen_link); + rx->local->service = NULL; write_unlock(&rx->local->services_lock); } diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 6aadaa7d8b43..539db54697f9 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -93,7 +93,6 @@ struct rxrpc_sock { rxrpc_notify_new_call_t notify_new_call; /* Func to notify of new call */ rxrpc_discard_new_call_t discard_new_call; /* Func to discard a new call */ struct rxrpc_local *local; /* local endpoint */ - struct hlist_node listen_link; /* link in the local endpoint's listen list */ struct rxrpc_backlog *backlog; /* Preallocation for services */ spinlock_t incoming_lock; /* Incoming call vs service shutdown lock */ struct list_head sock_calls; /* List of calls owned by this socket */ @@ -216,7 +215,7 @@ struct rxrpc_local { struct list_head link; struct socket *socket; /* my UDP socket */ struct work_struct processor; - struct hlist_head services; /* services listening on this endpoint */ + struct rxrpc_sock __rcu *service; /* Service(s) listening on this endpoint */ struct rw_semaphore defrag_sem; /* control re-enablement of IP DF bit */ struct sk_buff_head reject_queue; /* packets awaiting rejection */ struct sk_buff_head event_queue; /* endpoint event packets awaiting processing */ diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index a8d39d7cf42c..3cac231d8405 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -331,14 +331,14 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_sock *rx; struct rxrpc_call *call; + u16 service_id = sp->hdr.serviceId; _enter(""); /* Get the socket providing the service */ - hlist_for_each_entry_rcu_bh(rx, &local->services, listen_link) { - if (rx->srx.srx_service == sp->hdr.serviceId) - goto found_service; - } + rx = rcu_dereference(local->service); + if (service_id == rx->srx.srx_service) + goto found_service; trace_rxrpc_abort("INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, RX_INVALID_OPERATION, EOPNOTSUPP); diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index e3fad80b0795..ff4864d550b8 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -86,7 +86,6 @@ static struct rxrpc_local *rxrpc_alloc_local(const struct sockaddr_rxrpc *srx) atomic_set(&local->usage, 1); INIT_LIST_HEAD(&local->link); INIT_WORK(&local->processor, rxrpc_local_processor); - INIT_HLIST_HEAD(&local->services); init_rwsem(&local->defrag_sem); skb_queue_head_init(&local->reject_queue); skb_queue_head_init(&local->event_queue); @@ -292,7 +291,7 @@ static void rxrpc_local_destroyer(struct rxrpc_local *local) mutex_unlock(&rxrpc_local_mutex); ASSERT(RB_EMPTY_ROOT(&local->client_conns)); - ASSERT(hlist_empty(&local->services)); + ASSERT(!local->service); if (socket) { local->socket = NULL; diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c index 82d8134e9287..7d921e56e715 100644 --- a/net/rxrpc/security.c +++ b/net/rxrpc/security.c @@ -131,10 +131,10 @@ int rxrpc_init_server_conn_security(struct rxrpc_connection *conn) /* find the service */ read_lock(&local->services_lock); - hlist_for_each_entry(rx, &local->services, listen_link) { - if (rx->srx.srx_service == conn->params.service_id) - goto found_service; - } + rx = rcu_dereference_protected(local->service, + lockdep_is_held(&local->services_lock)); + if (rx && rx->srx.srx_service == conn->params.service_id) + goto found_service; /* the service appears to have died */ read_unlock(&local->services_lock); From b112a67081e4b06652ecde588bf1d5778fe43d75 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 29 Sep 2016 22:37:16 +0100 Subject: [PATCH 1624/1715] rxrpc: Request more ACKs in slow-start mode Set the request-ACK on more DATA packets whilst we're in slow start mode so that we get sufficient ACKs back to supply information to configure the window. Signed-off-by: David Howells --- net/rxrpc/output.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index ac9a58b619a6..0d47db886f6e 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -284,6 +284,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, * ACKs if a DATA packet appears to have been lost. */ if (retrans || + call->cong_mode == RXRPC_CALL_SLOW_START || (call->peer->rtt_usage < 3 && sp->hdr.seq & 1) || ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), ktime_get_real())) From ed1e8679d8bc6537077d1f24bc83b396f6062f09 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 29 Sep 2016 22:37:16 +0100 Subject: [PATCH 1625/1715] rxrpc: Note serial number being ACK'd in the congestion management trace Note the serial number of the packet being ACK'd in the congestion management trace rather than the serial number of the ACK packet. Whilst the serial number of the ACK packet is useful for matching ACK packet in the output of wireshark, the serial number that the ACK is in response to is of more use in working out how different trace lines relate. Signed-off-by: David Howells --- net/rxrpc/input.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 094720dd1eaf..1461d30583c9 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -41,10 +41,10 @@ static void rxrpc_proto_abort(const char *why, */ static void rxrpc_congestion_management(struct rxrpc_call *call, struct sk_buff *skb, - struct rxrpc_ack_summary *summary) + struct rxrpc_ack_summary *summary, + rxrpc_serial_t acked_serial) { enum rxrpc_congest_change change = rxrpc_cong_no_change; - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); unsigned int cumulative_acks = call->cong_cumul_acks; unsigned int cwnd = call->cong_cwnd; bool resend = false; @@ -172,7 +172,7 @@ out_no_clear_ca: cwnd = RXRPC_RXTX_BUFF_SIZE - 1; call->cong_cwnd = cwnd; call->cong_cumul_acks = cumulative_acks; - trace_rxrpc_congest(call, summary, sp->hdr.serial, change); + trace_rxrpc_congest(call, summary, acked_serial, change); if (resend && !test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events)) rxrpc_queue_call(call); return; @@ -848,7 +848,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, false, true, rxrpc_propose_ack_ping_for_lost_reply); - return rxrpc_congestion_management(call, skb, &summary); + return rxrpc_congestion_management(call, skb, &summary, acked_serial); } /* From a935c0523c852feb619a050597bb545e7c818d81 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 29 Sep 2016 12:21:53 -0400 Subject: [PATCH 1626/1715] net: dsa: mv88e6xxx: add global1 helpers The Global (1) internal SMI device is an extended set of registers containing ATU, PPU, VTU, STU, etc. It is present on every switches, usually at SMI address 0x1B. But old models such as 88E6060 access it at address 0xF, thus using REG_GLOBAL is erroneous. Add a global1_addr info member used by mv88e6xxx_g1_{read,write} and mv88e6xxx_g1_wait helpers in a new global1.c file. This patch finally removes _mv88e6xxx_reg_{read,write}, in favor on the appropriate helpers. No functional changes here. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/Makefile | 1 + drivers/net/dsa/mv88e6xxx/chip.c | 505 +++++++++++++------------- drivers/net/dsa/mv88e6xxx/global1.c | 34 ++ drivers/net/dsa/mv88e6xxx/global1.h | 23 ++ drivers/net/dsa/mv88e6xxx/mv88e6xxx.h | 2 +- 5 files changed, 306 insertions(+), 259 deletions(-) create mode 100644 drivers/net/dsa/mv88e6xxx/global1.c create mode 100644 drivers/net/dsa/mv88e6xxx/global1.h diff --git a/drivers/net/dsa/mv88e6xxx/Makefile b/drivers/net/dsa/mv88e6xxx/Makefile index 697103934317..10ce820daa48 100644 --- a/drivers/net/dsa/mv88e6xxx/Makefile +++ b/drivers/net/dsa/mv88e6xxx/Makefile @@ -1,3 +1,4 @@ obj-$(CONFIG_NET_DSA_MV88E6XXX) += mv88e6xxx.o mv88e6xxx-objs := chip.o +mv88e6xxx-objs += global1.o mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_GLOBAL2) += global2.o diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index b2c25daef294..98dee2c63163 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -31,6 +31,7 @@ #include #include "mv88e6xxx.h" +#include "global1.h" #include "global2.h" static void assert_reg_lock(struct mv88e6xxx_chip *chip) @@ -361,46 +362,27 @@ int mv88e6xxx_update(struct mv88e6xxx_chip *chip, int addr, int reg, u16 update) return mv88e6xxx_write(chip, addr, reg, val); } -static int _mv88e6xxx_reg_read(struct mv88e6xxx_chip *chip, int addr, int reg) +static int mv88e6xxx_ppu_disable(struct mv88e6xxx_chip *chip) { u16 val; - int err; + int i, err; - err = mv88e6xxx_read(chip, addr, reg, &val); + err = mv88e6xxx_g1_read(chip, GLOBAL_CONTROL, &val); if (err) return err; - return val; -} - -static int _mv88e6xxx_reg_write(struct mv88e6xxx_chip *chip, int addr, - int reg, u16 val) -{ - return mv88e6xxx_write(chip, addr, reg, val); -} - -static int mv88e6xxx_ppu_disable(struct mv88e6xxx_chip *chip) -{ - int ret; - int i; - - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_CONTROL); - if (ret < 0) - return ret; - - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_CONTROL, - ret & ~GLOBAL_CONTROL_PPU_ENABLE); - if (ret) - return ret; + err = mv88e6xxx_g1_write(chip, GLOBAL_CONTROL, + val & ~GLOBAL_CONTROL_PPU_ENABLE); + if (err) + return err; for (i = 0; i < 16; i++) { - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_STATUS); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_read(chip, GLOBAL_STATUS, &val); + if (err) + return err; usleep_range(1000, 2000); - if ((ret & GLOBAL_STATUS_PPU_MASK) != - GLOBAL_STATUS_PPU_POLLING) + if ((val & GLOBAL_STATUS_PPU_MASK) != GLOBAL_STATUS_PPU_POLLING) return 0; } @@ -409,25 +391,25 @@ static int mv88e6xxx_ppu_disable(struct mv88e6xxx_chip *chip) static int mv88e6xxx_ppu_enable(struct mv88e6xxx_chip *chip) { - int ret, err, i; + u16 val; + int i, err; - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_CONTROL); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_read(chip, GLOBAL_CONTROL, &val); + if (err) + return err; - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_CONTROL, - ret | GLOBAL_CONTROL_PPU_ENABLE); + err = mv88e6xxx_g1_write(chip, GLOBAL_CONTROL, + val | GLOBAL_CONTROL_PPU_ENABLE); if (err) return err; for (i = 0; i < 16; i++) { - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_STATUS); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_read(chip, GLOBAL_STATUS, &val); + if (err) + return err; usleep_range(1000, 2000); - if ((ret & GLOBAL_STATUS_PPU_MASK) == - GLOBAL_STATUS_PPU_POLLING) + if ((val & GLOBAL_STATUS_PPU_MASK) == GLOBAL_STATUS_PPU_POLLING) return 0; } @@ -663,12 +645,12 @@ out: static int _mv88e6xxx_stats_wait(struct mv88e6xxx_chip *chip) { - int ret; - int i; + u16 val; + int i, err; for (i = 0; i < 10; i++) { - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_STATS_OP); - if ((ret & GLOBAL_STATS_OP_BUSY) == 0) + err = mv88e6xxx_g1_read(chip, GLOBAL_STATS_OP, &val); + if ((val & GLOBAL_STATS_OP_BUSY) == 0) return 0; } @@ -677,55 +659,52 @@ static int _mv88e6xxx_stats_wait(struct mv88e6xxx_chip *chip) static int _mv88e6xxx_stats_snapshot(struct mv88e6xxx_chip *chip, int port) { - int ret; + int err; if (mv88e6xxx_6320_family(chip) || mv88e6xxx_6352_family(chip)) port = (port + 1) << 5; /* Snapshot the hardware statistics counters for this port. */ - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_STATS_OP, - GLOBAL_STATS_OP_CAPTURE_PORT | - GLOBAL_STATS_OP_HIST_RX_TX | port); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_write(chip, GLOBAL_STATS_OP, + GLOBAL_STATS_OP_CAPTURE_PORT | + GLOBAL_STATS_OP_HIST_RX_TX | port); + if (err) + return err; /* Wait for the snapshotting to complete. */ - ret = _mv88e6xxx_stats_wait(chip); - if (ret < 0) - return ret; - - return 0; + return _mv88e6xxx_stats_wait(chip); } static void _mv88e6xxx_stats_read(struct mv88e6xxx_chip *chip, int stat, u32 *val) { - u32 _val; - int ret; + u32 value; + u16 reg; + int err; *val = 0; - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_STATS_OP, - GLOBAL_STATS_OP_READ_CAPTURED | - GLOBAL_STATS_OP_HIST_RX_TX | stat); - if (ret < 0) + err = mv88e6xxx_g1_write(chip, GLOBAL_STATS_OP, + GLOBAL_STATS_OP_READ_CAPTURED | + GLOBAL_STATS_OP_HIST_RX_TX | stat); + if (err) return; - ret = _mv88e6xxx_stats_wait(chip); - if (ret < 0) + err = _mv88e6xxx_stats_wait(chip); + if (err) return; - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_STATS_COUNTER_32); - if (ret < 0) + err = mv88e6xxx_g1_read(chip, GLOBAL_STATS_COUNTER_32, ®); + if (err) return; - _val = ret << 16; + value = reg << 16; - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_STATS_COUNTER_01); - if (ret < 0) + err = mv88e6xxx_g1_read(chip, GLOBAL_STATS_COUNTER_01, ®); + if (err) return; - *val = _val | ret; + *val = value | reg; } static struct mv88e6xxx_hw_stat mv88e6xxx_hw_stats[] = { @@ -932,8 +911,7 @@ static void mv88e6xxx_get_regs(struct dsa_switch *ds, int port, static int _mv88e6xxx_atu_wait(struct mv88e6xxx_chip *chip) { - return mv88e6xxx_wait(chip, REG_GLOBAL, GLOBAL_ATU_OP, - GLOBAL_ATU_OP_BUSY); + return mv88e6xxx_g1_wait(chip, GLOBAL_ATU_OP, GLOBAL_ATU_OP_BUSY); } static int mv88e6xxx_get_eee(struct dsa_switch *ds, int port, @@ -997,32 +975,31 @@ out: static int _mv88e6xxx_atu_cmd(struct mv88e6xxx_chip *chip, u16 fid, u16 cmd) { - int ret; + u16 val; + int err; if (mv88e6xxx_has_fid_reg(chip)) { - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_ATU_FID, - fid); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_write(chip, GLOBAL_ATU_FID, fid); + if (err) + return err; } else if (mv88e6xxx_num_databases(chip) == 256) { /* ATU DBNum[7:4] are located in ATU Control 15:12 */ - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_ATU_CONTROL); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_read(chip, GLOBAL_ATU_CONTROL, &val); + if (err) + return err; - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_ATU_CONTROL, - (ret & 0xfff) | - ((fid << 8) & 0xf000)); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_write(chip, GLOBAL_ATU_CONTROL, + (val & 0xfff) | ((fid << 8) & 0xf000)); + if (err) + return err; /* ATU DBNum[3:0] are located in ATU Operation 3:0 */ cmd |= fid & 0xf; } - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_ATU_OP, cmd); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_write(chip, GLOBAL_ATU_OP, cmd); + if (err) + return err; return _mv88e6xxx_atu_wait(chip); } @@ -1047,7 +1024,7 @@ static int _mv88e6xxx_atu_data_write(struct mv88e6xxx_chip *chip, data |= (entry->portv_trunkid << shift) & mask; } - return _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_ATU_DATA, data); + return mv88e6xxx_g1_write(chip, GLOBAL_ATU_DATA, data); } static int _mv88e6xxx_atu_flush_move(struct mv88e6xxx_chip *chip, @@ -1277,17 +1254,16 @@ static int _mv88e6xxx_port_pvid_set(struct mv88e6xxx_chip *chip, static int _mv88e6xxx_vtu_wait(struct mv88e6xxx_chip *chip) { - return mv88e6xxx_wait(chip, REG_GLOBAL, GLOBAL_VTU_OP, - GLOBAL_VTU_OP_BUSY); + return mv88e6xxx_g1_wait(chip, GLOBAL_VTU_OP, GLOBAL_VTU_OP_BUSY); } static int _mv88e6xxx_vtu_cmd(struct mv88e6xxx_chip *chip, u16 op) { - int ret; + int err; - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_VTU_OP, op); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_write(chip, GLOBAL_VTU_OP, op); + if (err) + return err; return _mv88e6xxx_vtu_wait(chip); } @@ -1308,16 +1284,14 @@ static int _mv88e6xxx_vtu_stu_data_read(struct mv88e6xxx_chip *chip, unsigned int nibble_offset) { u16 regs[3]; - int i; - int ret; + int i, err; for (i = 0; i < 3; ++i) { - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, - GLOBAL_VTU_DATA_0_3 + i); - if (ret < 0) - return ret; + u16 *reg = ®s[i]; - regs[i] = ret; + err = mv88e6xxx_g1_read(chip, GLOBAL_VTU_DATA_0_3 + i, reg); + if (err) + return err; } for (i = 0; i < chip->info->num_ports; ++i) { @@ -1347,8 +1321,7 @@ static int _mv88e6xxx_vtu_stu_data_write(struct mv88e6xxx_chip *chip, unsigned int nibble_offset) { u16 regs[3] = { 0 }; - int i; - int ret; + int i, err; for (i = 0; i < chip->info->num_ports; ++i) { unsigned int shift = (i % 4) * 4 + nibble_offset; @@ -1358,10 +1331,11 @@ static int _mv88e6xxx_vtu_stu_data_write(struct mv88e6xxx_chip *chip, } for (i = 0; i < 3; ++i) { - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, - GLOBAL_VTU_DATA_0_3 + i, regs[i]); - if (ret < 0) - return ret; + u16 reg = regs[i]; + + err = mv88e6xxx_g1_write(chip, GLOBAL_VTU_DATA_0_3 + i, reg); + if (err) + return err; } return 0; @@ -1381,63 +1355,61 @@ static int mv88e6xxx_stu_data_write(struct mv88e6xxx_chip *chip, static int _mv88e6xxx_vtu_vid_write(struct mv88e6xxx_chip *chip, u16 vid) { - return _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_VTU_VID, - vid & GLOBAL_VTU_VID_MASK); + return mv88e6xxx_g1_write(chip, GLOBAL_VTU_VID, + vid & GLOBAL_VTU_VID_MASK); } static int _mv88e6xxx_vtu_getnext(struct mv88e6xxx_chip *chip, struct mv88e6xxx_vtu_stu_entry *entry) { struct mv88e6xxx_vtu_stu_entry next = { 0 }; - int ret; + u16 val; + int err; - ret = _mv88e6xxx_vtu_wait(chip); - if (ret < 0) - return ret; + err = _mv88e6xxx_vtu_wait(chip); + if (err) + return err; - ret = _mv88e6xxx_vtu_cmd(chip, GLOBAL_VTU_OP_VTU_GET_NEXT); - if (ret < 0) - return ret; + err = _mv88e6xxx_vtu_cmd(chip, GLOBAL_VTU_OP_VTU_GET_NEXT); + if (err) + return err; - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_VTU_VID); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_read(chip, GLOBAL_VTU_VID, &val); + if (err) + return err; - next.vid = ret & GLOBAL_VTU_VID_MASK; - next.valid = !!(ret & GLOBAL_VTU_VID_VALID); + next.vid = val & GLOBAL_VTU_VID_MASK; + next.valid = !!(val & GLOBAL_VTU_VID_VALID); if (next.valid) { - ret = mv88e6xxx_vtu_data_read(chip, &next); - if (ret < 0) - return ret; + err = mv88e6xxx_vtu_data_read(chip, &next); + if (err) + return err; if (mv88e6xxx_has_fid_reg(chip)) { - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, - GLOBAL_VTU_FID); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_read(chip, GLOBAL_VTU_FID, &val); + if (err) + return err; - next.fid = ret & GLOBAL_VTU_FID_MASK; + next.fid = val & GLOBAL_VTU_FID_MASK; } else if (mv88e6xxx_num_databases(chip) == 256) { /* VTU DBNum[7:4] are located in VTU Operation 11:8, and * VTU DBNum[3:0] are located in VTU Operation 3:0 */ - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, - GLOBAL_VTU_OP); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_read(chip, GLOBAL_VTU_OP, &val); + if (err) + return err; - next.fid = (ret & 0xf00) >> 4; - next.fid |= ret & 0xf; + next.fid = (val & 0xf00) >> 4; + next.fid |= val & 0xf; } if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_STU)) { - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, - GLOBAL_VTU_SID); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_read(chip, GLOBAL_VTU_SID, &val); + if (err) + return err; - next.sid = ret & GLOBAL_VTU_SID_MASK; + next.sid = val & GLOBAL_VTU_SID_MASK; } } @@ -1505,34 +1477,32 @@ static int _mv88e6xxx_vtu_loadpurge(struct mv88e6xxx_chip *chip, { u16 op = GLOBAL_VTU_OP_VTU_LOAD_PURGE; u16 reg = 0; - int ret; + int err; - ret = _mv88e6xxx_vtu_wait(chip); - if (ret < 0) - return ret; + err = _mv88e6xxx_vtu_wait(chip); + if (err) + return err; if (!entry->valid) goto loadpurge; /* Write port member tags */ - ret = mv88e6xxx_vtu_data_write(chip, entry); - if (ret < 0) - return ret; + err = mv88e6xxx_vtu_data_write(chip, entry); + if (err) + return err; if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_STU)) { reg = entry->sid & GLOBAL_VTU_SID_MASK; - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_VTU_SID, - reg); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_write(chip, GLOBAL_VTU_SID, reg); + if (err) + return err; } if (mv88e6xxx_has_fid_reg(chip)) { reg = entry->fid & GLOBAL_VTU_FID_MASK; - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_VTU_FID, - reg); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_write(chip, GLOBAL_VTU_FID, reg); + if (err) + return err; } else if (mv88e6xxx_num_databases(chip) == 256) { /* VTU DBNum[7:4] are located in VTU Operation 11:8, and * VTU DBNum[3:0] are located in VTU Operation 3:0 @@ -1544,9 +1514,9 @@ static int _mv88e6xxx_vtu_loadpurge(struct mv88e6xxx_chip *chip, reg = GLOBAL_VTU_VID_VALID; loadpurge: reg |= entry->vid & GLOBAL_VTU_VID_MASK; - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_VTU_VID, reg); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_write(chip, GLOBAL_VTU_VID, reg); + if (err) + return err; return _mv88e6xxx_vtu_cmd(chip, op); } @@ -1555,37 +1525,38 @@ static int _mv88e6xxx_stu_getnext(struct mv88e6xxx_chip *chip, u8 sid, struct mv88e6xxx_vtu_stu_entry *entry) { struct mv88e6xxx_vtu_stu_entry next = { 0 }; - int ret; + u16 val; + int err; - ret = _mv88e6xxx_vtu_wait(chip); - if (ret < 0) - return ret; + err = _mv88e6xxx_vtu_wait(chip); + if (err) + return err; - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_VTU_SID, - sid & GLOBAL_VTU_SID_MASK); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_write(chip, GLOBAL_VTU_SID, + sid & GLOBAL_VTU_SID_MASK); + if (err) + return err; - ret = _mv88e6xxx_vtu_cmd(chip, GLOBAL_VTU_OP_STU_GET_NEXT); - if (ret < 0) - return ret; + err = _mv88e6xxx_vtu_cmd(chip, GLOBAL_VTU_OP_STU_GET_NEXT); + if (err) + return err; - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_VTU_SID); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_read(chip, GLOBAL_VTU_SID, &val); + if (err) + return err; - next.sid = ret & GLOBAL_VTU_SID_MASK; + next.sid = val & GLOBAL_VTU_SID_MASK; - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_VTU_VID); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_read(chip, GLOBAL_VTU_VID, &val); + if (err) + return err; - next.valid = !!(ret & GLOBAL_VTU_VID_VALID); + next.valid = !!(val & GLOBAL_VTU_VID_VALID); if (next.valid) { - ret = mv88e6xxx_stu_data_read(chip, &next); - if (ret < 0) - return ret; + err = mv88e6xxx_stu_data_read(chip, &next); + if (err) + return err; } *entry = next; @@ -1596,30 +1567,30 @@ static int _mv88e6xxx_stu_loadpurge(struct mv88e6xxx_chip *chip, struct mv88e6xxx_vtu_stu_entry *entry) { u16 reg = 0; - int ret; + int err; - ret = _mv88e6xxx_vtu_wait(chip); - if (ret < 0) - return ret; + err = _mv88e6xxx_vtu_wait(chip); + if (err) + return err; if (!entry->valid) goto loadpurge; /* Write port states */ - ret = mv88e6xxx_stu_data_write(chip, entry); - if (ret < 0) - return ret; + err = mv88e6xxx_stu_data_write(chip, entry); + if (err) + return err; reg = GLOBAL_VTU_VID_VALID; loadpurge: - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_VTU_VID, reg); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_write(chip, GLOBAL_VTU_VID, reg); + if (err) + return err; reg = entry->sid & GLOBAL_VTU_SID_MASK; - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_VTU_SID, reg); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_write(chip, GLOBAL_VTU_SID, reg); + if (err) + return err; return _mv88e6xxx_vtu_cmd(chip, GLOBAL_VTU_OP_STU_LOAD_PURGE); } @@ -2057,14 +2028,13 @@ unlock: static int _mv88e6xxx_atu_mac_write(struct mv88e6xxx_chip *chip, const unsigned char *addr) { - int i, ret; + int i, err; for (i = 0; i < 3; i++) { - ret = _mv88e6xxx_reg_write( - chip, REG_GLOBAL, GLOBAL_ATU_MAC_01 + i, - (addr[i * 2] << 8) | addr[i * 2 + 1]); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_write(chip, GLOBAL_ATU_MAC_01 + i, + (addr[i * 2] << 8) | addr[i * 2 + 1]); + if (err) + return err; } return 0; @@ -2073,15 +2043,16 @@ static int _mv88e6xxx_atu_mac_write(struct mv88e6xxx_chip *chip, static int _mv88e6xxx_atu_mac_read(struct mv88e6xxx_chip *chip, unsigned char *addr) { - int i, ret; + u16 val; + int i, err; for (i = 0; i < 3; i++) { - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, - GLOBAL_ATU_MAC_01 + i); - if (ret < 0) - return ret; - addr[i * 2] = ret >> 8; - addr[i * 2 + 1] = ret & 0xff; + err = mv88e6xxx_g1_read(chip, GLOBAL_ATU_MAC_01 + i, &val); + if (err) + return err; + + addr[i * 2] = val >> 8; + addr[i * 2 + 1] = val & 0xff; } return 0; @@ -2217,31 +2188,32 @@ static int _mv88e6xxx_atu_getnext(struct mv88e6xxx_chip *chip, u16 fid, struct mv88e6xxx_atu_entry *entry) { struct mv88e6xxx_atu_entry next = { 0 }; - int ret; + u16 val; + int err; next.fid = fid; - ret = _mv88e6xxx_atu_wait(chip); - if (ret < 0) - return ret; + err = _mv88e6xxx_atu_wait(chip); + if (err) + return err; - ret = _mv88e6xxx_atu_cmd(chip, fid, GLOBAL_ATU_OP_GET_NEXT_DB); - if (ret < 0) - return ret; + err = _mv88e6xxx_atu_cmd(chip, fid, GLOBAL_ATU_OP_GET_NEXT_DB); + if (err) + return err; - ret = _mv88e6xxx_atu_mac_read(chip, next.mac); - if (ret < 0) - return ret; + err = _mv88e6xxx_atu_mac_read(chip, next.mac); + if (err) + return err; - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_ATU_DATA); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_read(chip, GLOBAL_ATU_DATA, &val); + if (err) + return err; - next.state = ret & GLOBAL_ATU_DATA_STATE_MASK; + next.state = val & GLOBAL_ATU_DATA_STATE_MASK; if (next.state != GLOBAL_ATU_DATA_STATE_UNUSED) { unsigned int mask, shift; - if (ret & GLOBAL_ATU_DATA_TRUNK) { + if (val & GLOBAL_ATU_DATA_TRUNK) { next.trunk = true; mask = GLOBAL_ATU_DATA_TRUNK_ID_MASK; shift = GLOBAL_ATU_DATA_TRUNK_ID_SHIFT; @@ -2251,7 +2223,7 @@ static int _mv88e6xxx_atu_getnext(struct mv88e6xxx_chip *chip, u16 fid, shift = GLOBAL_ATU_DATA_PORT_VECTOR_SHIFT; } - next.portv_trunkid = (ret & mask) >> shift; + next.portv_trunkid = (val & mask) >> shift; } *entry = next; @@ -2422,8 +2394,8 @@ static int mv88e6xxx_switch_reset(struct mv88e6xxx_chip *chip) u16 is_reset = (ppu_active ? 0x8800 : 0xc800); struct gpio_desc *gpiod = chip->reset; unsigned long timeout; - int err, ret; u16 reg; + int err; int i; /* Set all ports to the disabled state. */ @@ -2454,20 +2426,20 @@ static int mv88e6xxx_switch_reset(struct mv88e6xxx_chip *chip) * through global registers 0x18 and 0x19. */ if (ppu_active) - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, 0x04, 0xc000); + err = mv88e6xxx_g1_write(chip, 0x04, 0xc000); else - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, 0x04, 0xc400); + err = mv88e6xxx_g1_write(chip, 0x04, 0xc400); if (err) return err; /* Wait up to one second for reset to complete. */ timeout = jiffies + 1 * HZ; while (time_before(jiffies, timeout)) { - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, 0x00); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_read(chip, 0x00, ®); + if (err) + return err; - if ((ret & is_reset) == is_reset) + if ((reg & is_reset) == is_reset) break; usleep_range(1000, 2000); } @@ -2749,22 +2721,23 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) return mv88e6xxx_port_write(chip, port, PORT_DEFAULT_VLAN, 0x0000); } -static int mv88e6xxx_g1_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr) +int mv88e6xxx_g1_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr) { int err; - err = mv88e6xxx_write(chip, REG_GLOBAL, GLOBAL_MAC_01, - (addr[0] << 8) | addr[1]); + err = mv88e6xxx_g1_write(chip, GLOBAL_MAC_01, (addr[0] << 8) | addr[1]); if (err) return err; - err = mv88e6xxx_write(chip, REG_GLOBAL, GLOBAL_MAC_23, - (addr[2] << 8) | addr[3]); + err = mv88e6xxx_g1_write(chip, GLOBAL_MAC_23, (addr[2] << 8) | addr[3]); if (err) return err; - return mv88e6xxx_write(chip, REG_GLOBAL, GLOBAL_MAC_45, - (addr[4] << 8) | addr[5]); + err = mv88e6xxx_g1_write(chip, GLOBAL_MAC_45, (addr[4] << 8) | addr[5]); + if (err) + return err; + + return 0; } static int mv88e6xxx_g1_set_age_time(struct mv88e6xxx_chip *chip, @@ -2783,7 +2756,7 @@ static int mv88e6xxx_g1_set_age_time(struct mv88e6xxx_chip *chip, /* Round to nearest multiple of coeff */ age_time = (msecs + coeff / 2) / coeff; - err = mv88e6xxx_read(chip, REG_GLOBAL, GLOBAL_ATU_CONTROL, &val); + err = mv88e6xxx_g1_read(chip, GLOBAL_ATU_CONTROL, &val); if (err) return err; @@ -2791,7 +2764,7 @@ static int mv88e6xxx_g1_set_age_time(struct mv88e6xxx_chip *chip, val &= ~0xff0; val |= age_time << 4; - return mv88e6xxx_write(chip, REG_GLOBAL, GLOBAL_ATU_CONTROL, val); + return mv88e6xxx_g1_write(chip, GLOBAL_ATU_CONTROL, val); } static int mv88e6xxx_set_ageing_time(struct dsa_switch *ds, @@ -2822,7 +2795,7 @@ static int mv88e6xxx_g1_setup(struct mv88e6xxx_chip *chip) mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU_ACTIVE)) reg |= GLOBAL_CONTROL_PPU_ENABLE; - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_CONTROL, reg); + err = mv88e6xxx_g1_write(chip, GLOBAL_CONTROL, reg); if (err) return err; @@ -2832,15 +2805,14 @@ static int mv88e6xxx_g1_setup(struct mv88e6xxx_chip *chip) reg = upstream_port << GLOBAL_MONITOR_CONTROL_INGRESS_SHIFT | upstream_port << GLOBAL_MONITOR_CONTROL_EGRESS_SHIFT | upstream_port << GLOBAL_MONITOR_CONTROL_ARP_SHIFT; - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_MONITOR_CONTROL, - reg); + err = mv88e6xxx_g1_write(chip, GLOBAL_MONITOR_CONTROL, reg); if (err) return err; /* Disable remote management, and set the switch's DSA device number. */ - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_CONTROL_2, - GLOBAL_CONTROL_2_MULTIPLE_CASCADE | - (ds->index & 0x1f)); + err = mv88e6xxx_g1_write(chip, GLOBAL_CONTROL_2, + GLOBAL_CONTROL_2_MULTIPLE_CASCADE | + (ds->index & 0x1f)); if (err) return err; @@ -2853,8 +2825,8 @@ static int mv88e6xxx_g1_setup(struct mv88e6xxx_chip *chip) * enable address learn messages to be sent to all message * ports. */ - err = mv88e6xxx_write(chip, REG_GLOBAL, GLOBAL_ATU_CONTROL, - GLOBAL_ATU_CONTROL_LEARN2ALL); + err = mv88e6xxx_g1_write(chip, GLOBAL_ATU_CONTROL, + GLOBAL_ATU_CONTROL_LEARN2ALL); if (err) return err; @@ -2868,39 +2840,39 @@ static int mv88e6xxx_g1_setup(struct mv88e6xxx_chip *chip) return err; /* Configure the IP ToS mapping registers. */ - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_IP_PRI_0, 0x0000); + err = mv88e6xxx_g1_write(chip, GLOBAL_IP_PRI_0, 0x0000); if (err) return err; - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_IP_PRI_1, 0x0000); + err = mv88e6xxx_g1_write(chip, GLOBAL_IP_PRI_1, 0x0000); if (err) return err; - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_IP_PRI_2, 0x5555); + err = mv88e6xxx_g1_write(chip, GLOBAL_IP_PRI_2, 0x5555); if (err) return err; - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_IP_PRI_3, 0x5555); + err = mv88e6xxx_g1_write(chip, GLOBAL_IP_PRI_3, 0x5555); if (err) return err; - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_IP_PRI_4, 0xaaaa); + err = mv88e6xxx_g1_write(chip, GLOBAL_IP_PRI_4, 0xaaaa); if (err) return err; - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_IP_PRI_5, 0xaaaa); + err = mv88e6xxx_g1_write(chip, GLOBAL_IP_PRI_5, 0xaaaa); if (err) return err; - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_IP_PRI_6, 0xffff); + err = mv88e6xxx_g1_write(chip, GLOBAL_IP_PRI_6, 0xffff); if (err) return err; - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_IP_PRI_7, 0xffff); + err = mv88e6xxx_g1_write(chip, GLOBAL_IP_PRI_7, 0xffff); if (err) return err; /* Configure the IEEE 802.1p priority mapping register. */ - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_IEEE_PRI, 0xfa41); + err = mv88e6xxx_g1_write(chip, GLOBAL_IEEE_PRI, 0xfa41); if (err) return err; /* Clear the statistics counters for all ports */ - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_STATS_OP, - GLOBAL_STATS_OP_FLUSH_ALL); + err = mv88e6xxx_g1_write(chip, GLOBAL_STATS_OP, + GLOBAL_STATS_OP_FLUSH_ALL); if (err) return err; @@ -3265,6 +3237,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 10, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6097, }, @@ -3276,6 +3249,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 256, .num_ports = 11, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6095, }, @@ -3287,6 +3261,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 3, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6165, }, @@ -3298,6 +3273,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 256, .num_ports = 8, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6185, }, @@ -3309,6 +3285,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 6, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6165, }, @@ -3320,6 +3297,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 6, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6165, }, @@ -3331,6 +3309,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 7, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6351, }, @@ -3342,6 +3321,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 7, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6352, }, @@ -3353,6 +3333,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 7, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6351, }, @@ -3364,6 +3345,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 7, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6352, }, @@ -3375,6 +3357,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 256, .num_ports = 10, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6185, }, @@ -3386,6 +3369,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 7, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6352, }, @@ -3397,6 +3381,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 7, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6320, }, @@ -3408,6 +3393,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 7, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6320, }, @@ -3419,6 +3405,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 7, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6351, }, @@ -3430,6 +3417,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 7, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6351, }, @@ -3441,6 +3429,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 7, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6352, }, diff --git a/drivers/net/dsa/mv88e6xxx/global1.c b/drivers/net/dsa/mv88e6xxx/global1.c new file mode 100644 index 000000000000..d358720b6c2d --- /dev/null +++ b/drivers/net/dsa/mv88e6xxx/global1.c @@ -0,0 +1,34 @@ +/* + * Marvell 88E6xxx Switch Global (1) Registers support + * + * Copyright (c) 2008 Marvell Semiconductor + * + * Copyright (c) 2016 Vivien Didelot + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include "mv88e6xxx.h" +#include "global1.h" + +int mv88e6xxx_g1_read(struct mv88e6xxx_chip *chip, int reg, u16 *val) +{ + int addr = chip->info->global1_addr; + + return mv88e6xxx_read(chip, addr, reg, val); +} + +int mv88e6xxx_g1_write(struct mv88e6xxx_chip *chip, int reg, u16 val) +{ + int addr = chip->info->global1_addr; + + return mv88e6xxx_write(chip, addr, reg, val); +} + +int mv88e6xxx_g1_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask) +{ + return mv88e6xxx_wait(chip, chip->info->global1_addr, reg, mask); +} diff --git a/drivers/net/dsa/mv88e6xxx/global1.h b/drivers/net/dsa/mv88e6xxx/global1.h new file mode 100644 index 000000000000..62291e6fe3a3 --- /dev/null +++ b/drivers/net/dsa/mv88e6xxx/global1.h @@ -0,0 +1,23 @@ +/* + * Marvell 88E6xxx Switch Global (1) Registers support + * + * Copyright (c) 2008 Marvell Semiconductor + * + * Copyright (c) 2016 Vivien Didelot + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef _MV88E6XXX_GLOBAL1_H +#define _MV88E6XXX_GLOBAL1_H + +#include "mv88e6xxx.h" + +int mv88e6xxx_g1_read(struct mv88e6xxx_chip *chip, int reg, u16 *val); +int mv88e6xxx_g1_write(struct mv88e6xxx_chip *chip, int reg, u16 val); +int mv88e6xxx_g1_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask); + +#endif /* _MV88E6XXX_GLOBAL1_H */ diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h index 827988397fd8..bf78f6dc18cf 100644 --- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h @@ -159,7 +159,6 @@ #define PORT_TAG_REGMAP_0123 0x18 #define PORT_TAG_REGMAP_4567 0x19 -#define REG_GLOBAL 0x1b #define GLOBAL_STATUS 0x00 #define GLOBAL_STATUS_PPU_STATE BIT(15) /* 6351 and 6171 */ /* Two bits for 6165, 6185 etc */ @@ -613,6 +612,7 @@ struct mv88e6xxx_info { unsigned int num_databases; unsigned int num_ports; unsigned int port_base_addr; + unsigned int global1_addr; unsigned int age_time_coeff; unsigned long long flags; }; From 9fe850fb219e3fb729277b11229c2943bc5096a9 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 29 Sep 2016 12:21:54 -0400 Subject: [PATCH 1627/1715] net: dsa: mv88e6xxx: abstract REG_GLOBAL2 Similarly to the ports, phys, and Global SMI devices, abstract the SMI device address of the Global 2 registers in a few g2 static helpers. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/global2.c | 78 +++++++++++++++++---------- drivers/net/dsa/mv88e6xxx/mv88e6xxx.h | 1 - 2 files changed, 49 insertions(+), 30 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c index 99ed028298ac..f31d553c7448 100644 --- a/drivers/net/dsa/mv88e6xxx/global2.c +++ b/drivers/net/dsa/mv88e6xxx/global2.c @@ -14,6 +14,28 @@ #include "mv88e6xxx.h" #include "global2.h" +#define ADDR_GLOBAL2 0x1c + +static int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val) +{ + return mv88e6xxx_read(chip, ADDR_GLOBAL2, reg, val); +} + +static int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val) +{ + return mv88e6xxx_write(chip, ADDR_GLOBAL2, reg, val); +} + +static int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 update) +{ + return mv88e6xxx_update(chip, ADDR_GLOBAL2, reg, update); +} + +static int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask) +{ + return mv88e6xxx_wait(chip, ADDR_GLOBAL2, reg, mask); +} + /* Offset 0x06: Device Mapping Table register */ static int mv88e6xxx_g2_device_mapping_write(struct mv88e6xxx_chip *chip, @@ -21,7 +43,7 @@ static int mv88e6xxx_g2_device_mapping_write(struct mv88e6xxx_chip *chip, { u16 val = (target << 8) | (port & 0xf); - return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_DEVICE_MAPPING, val); + return mv88e6xxx_g2_update(chip, GLOBAL2_DEVICE_MAPPING, val); } static int mv88e6xxx_g2_set_device_mapping(struct mv88e6xxx_chip *chip) @@ -58,7 +80,7 @@ static int mv88e6xxx_g2_trunk_mask_write(struct mv88e6xxx_chip *chip, int num, if (hask) val |= GLOBAL2_TRUNK_MASK_HASK; - return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_TRUNK_MASK, val); + return mv88e6xxx_g2_update(chip, GLOBAL2_TRUNK_MASK, val); } /* Offset 0x08: Trunk Mapping Table register */ @@ -69,7 +91,7 @@ static int mv88e6xxx_g2_trunk_mapping_write(struct mv88e6xxx_chip *chip, int id, const u16 port_mask = BIT(chip->info->num_ports) - 1; u16 val = (id << 11) | (map & port_mask); - return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_TRUNK_MAPPING, val); + return mv88e6xxx_g2_update(chip, GLOBAL2_TRUNK_MAPPING, val); } static int mv88e6xxx_g2_clear_trunk(struct mv88e6xxx_chip *chip) @@ -105,15 +127,15 @@ static int mv88e6xxx_g2_clear_irl(struct mv88e6xxx_chip *chip) /* Init all Ingress Rate Limit resources of all ports */ for (port = 0; port < chip->info->num_ports; ++port) { /* XXX newer chips (like 88E6390) have different 2-bit ops */ - err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_IRL_CMD, - GLOBAL2_IRL_CMD_OP_INIT_ALL | - (port << 8)); + err = mv88e6xxx_g2_write(chip, GLOBAL2_IRL_CMD, + GLOBAL2_IRL_CMD_OP_INIT_ALL | + (port << 8)); if (err) break; /* Wait for the operation to complete */ - err = mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_IRL_CMD, - GLOBAL2_IRL_CMD_BUSY); + err = mv88e6xxx_g2_wait(chip, GLOBAL2_IRL_CMD, + GLOBAL2_IRL_CMD_BUSY); if (err) break; } @@ -128,7 +150,7 @@ static int mv88e6xxx_g2_switch_mac_write(struct mv88e6xxx_chip *chip, { u16 val = (pointer << 8) | data; - return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_SWITCH_MAC, val); + return mv88e6xxx_g2_update(chip, GLOBAL2_SWITCH_MAC, val); } int mv88e6xxx_g2_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr) @@ -151,7 +173,7 @@ static int mv88e6xxx_g2_pot_write(struct mv88e6xxx_chip *chip, int pointer, { u16 val = (pointer << 8) | (data & 0x7); - return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_PRIO_OVERRIDE, val); + return mv88e6xxx_g2_update(chip, GLOBAL2_PRIO_OVERRIDE, val); } static int mv88e6xxx_g2_clear_pot(struct mv88e6xxx_chip *chip) @@ -174,16 +196,16 @@ static int mv88e6xxx_g2_clear_pot(struct mv88e6xxx_chip *chip) static int mv88e6xxx_g2_eeprom_wait(struct mv88e6xxx_chip *chip) { - return mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_EEPROM_CMD, - GLOBAL2_EEPROM_CMD_BUSY | - GLOBAL2_EEPROM_CMD_RUNNING); + return mv88e6xxx_g2_wait(chip, GLOBAL2_EEPROM_CMD, + GLOBAL2_EEPROM_CMD_BUSY | + GLOBAL2_EEPROM_CMD_RUNNING); } static int mv88e6xxx_g2_eeprom_cmd(struct mv88e6xxx_chip *chip, u16 cmd) { int err; - err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_EEPROM_CMD, cmd); + err = mv88e6xxx_g2_write(chip, GLOBAL2_EEPROM_CMD, cmd); if (err) return err; @@ -204,7 +226,7 @@ static int mv88e6xxx_g2_eeprom_read16(struct mv88e6xxx_chip *chip, if (err) return err; - return mv88e6xxx_read(chip, REG_GLOBAL2, GLOBAL2_EEPROM_DATA, data); + return mv88e6xxx_g2_read(chip, GLOBAL2_EEPROM_DATA, data); } static int mv88e6xxx_g2_eeprom_write16(struct mv88e6xxx_chip *chip, @@ -217,7 +239,7 @@ static int mv88e6xxx_g2_eeprom_write16(struct mv88e6xxx_chip *chip, if (err) return err; - err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_EEPROM_DATA, data); + err = mv88e6xxx_g2_write(chip, GLOBAL2_EEPROM_DATA, data); if (err) return err; @@ -283,7 +305,7 @@ int mv88e6xxx_g2_set_eeprom16(struct mv88e6xxx_chip *chip, int err; /* Ensure the RO WriteEn bit is set */ - err = mv88e6xxx_read(chip, REG_GLOBAL2, GLOBAL2_EEPROM_CMD, &val); + err = mv88e6xxx_g2_read(chip, GLOBAL2_EEPROM_CMD, &val); if (err) return err; @@ -346,15 +368,15 @@ int mv88e6xxx_g2_set_eeprom16(struct mv88e6xxx_chip *chip, static int mv88e6xxx_g2_smi_phy_wait(struct mv88e6xxx_chip *chip) { - return mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_CMD, - GLOBAL2_SMI_PHY_CMD_BUSY); + return mv88e6xxx_g2_wait(chip, GLOBAL2_SMI_PHY_CMD, + GLOBAL2_SMI_PHY_CMD_BUSY); } static int mv88e6xxx_g2_smi_phy_cmd(struct mv88e6xxx_chip *chip, u16 cmd) { int err; - err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_CMD, cmd); + err = mv88e6xxx_g2_write(chip, GLOBAL2_SMI_PHY_CMD, cmd); if (err) return err; @@ -375,7 +397,7 @@ int mv88e6xxx_g2_smi_phy_read(struct mv88e6xxx_chip *chip, int addr, int reg, if (err) return err; - return mv88e6xxx_read(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_DATA, val); + return mv88e6xxx_g2_read(chip, GLOBAL2_SMI_PHY_DATA, val); } int mv88e6xxx_g2_smi_phy_write(struct mv88e6xxx_chip *chip, int addr, int reg, @@ -388,7 +410,7 @@ int mv88e6xxx_g2_smi_phy_write(struct mv88e6xxx_chip *chip, int addr, int reg, if (err) return err; - err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_DATA, val); + err = mv88e6xxx_g2_write(chip, GLOBAL2_SMI_PHY_DATA, val); if (err) return err; @@ -404,8 +426,7 @@ int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip) /* Consider the frames with reserved multicast destination * addresses matching 01:80:c2:00:00:2x as MGMT. */ - err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_MGMT_EN_2X, - 0xffff); + err = mv88e6xxx_g2_write(chip, GLOBAL2_MGMT_EN_2X, 0xffff); if (err) return err; } @@ -414,8 +435,7 @@ int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip) /* Consider the frames with reserved multicast destination * addresses matching 01:80:c2:00:00:0x as MGMT. */ - err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_MGMT_EN_0X, - 0xffff); + err = mv88e6xxx_g2_write(chip, GLOBAL2_MGMT_EN_0X, 0xffff); if (err) return err; } @@ -429,7 +449,7 @@ int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip) if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_0X) || mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_2X)) reg |= GLOBAL2_SWITCH_MGMT_RSVD2CPU | 0x7; - err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_SWITCH_MGMT, reg); + err = mv88e6xxx_g2_write(chip, GLOBAL2_SWITCH_MGMT, reg); if (err) return err; @@ -454,8 +474,8 @@ int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip) if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_PVT)) { /* Initialize Cross-chip Port VLAN Table to reset defaults */ - err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_PVT_ADDR, - GLOBAL2_PVT_ADDR_OP_INIT_ONES); + err = mv88e6xxx_g2_write(chip, GLOBAL2_PVT_ADDR, + GLOBAL2_PVT_ADDR_OP_INIT_ONES); if (err) return err; } diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h index bf78f6dc18cf..2f1010818a92 100644 --- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h @@ -276,7 +276,6 @@ #define GLOBAL_STATS_COUNTER_32 0x1e #define GLOBAL_STATS_COUNTER_01 0x1f -#define REG_GLOBAL2 0x1c #define GLOBAL2_INT_SOURCE 0x00 #define GLOBAL2_INT_MASK 0x01 #define GLOBAL2_MGMT_EN_2X 0x02 From 6dc10bbc467d6f76e2665b865d0d8f9e0049b3e6 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 29 Sep 2016 12:21:55 -0400 Subject: [PATCH 1628/1715] net: dsa: mv88e6xxx: add flags for FID registers Add flags to describe the presence of Global 1 ATU FID register (0x01) and VTU FID register (0x02), instead of checking families. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 16 +++------------- drivers/net/dsa/mv88e6xxx/mv88e6xxx.h | 24 ++++++++++++++++++++---- 2 files changed, 23 insertions(+), 17 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 98dee2c63163..b7eecc957bcc 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -565,16 +565,6 @@ static unsigned int mv88e6xxx_num_databases(struct mv88e6xxx_chip *chip) return chip->info->num_databases; } -static bool mv88e6xxx_has_fid_reg(struct mv88e6xxx_chip *chip) -{ - /* Does the device have dedicated FID registers for ATU and VTU ops? */ - if (mv88e6xxx_6097_family(chip) || mv88e6xxx_6165_family(chip) || - mv88e6xxx_6351_family(chip) || mv88e6xxx_6352_family(chip)) - return true; - - return false; -} - /* We expect the switch to perform auto negotiation if there is a real * phy. However, in the case of a fixed link phy, we force the port * settings from the fixed link settings. @@ -978,7 +968,7 @@ static int _mv88e6xxx_atu_cmd(struct mv88e6xxx_chip *chip, u16 fid, u16 cmd) u16 val; int err; - if (mv88e6xxx_has_fid_reg(chip)) { + if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G1_ATU_FID)) { err = mv88e6xxx_g1_write(chip, GLOBAL_ATU_FID, fid); if (err) return err; @@ -1386,7 +1376,7 @@ static int _mv88e6xxx_vtu_getnext(struct mv88e6xxx_chip *chip, if (err) return err; - if (mv88e6xxx_has_fid_reg(chip)) { + if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G1_VTU_FID)) { err = mv88e6xxx_g1_read(chip, GLOBAL_VTU_FID, &val); if (err) return err; @@ -1498,7 +1488,7 @@ static int _mv88e6xxx_vtu_loadpurge(struct mv88e6xxx_chip *chip, return err; } - if (mv88e6xxx_has_fid_reg(chip)) { + if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G1_VTU_FID)) { reg = entry->fid & GLOBAL_VTU_FID_MASK; err = mv88e6xxx_g1_write(chip, GLOBAL_VTU_FID, reg); if (err) diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h index 2f1010818a92..6c8584f14388 100644 --- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h @@ -170,8 +170,8 @@ #define GLOBAL_MAC_01 0x01 #define GLOBAL_MAC_23 0x02 #define GLOBAL_MAC_45 0x03 -#define GLOBAL_ATU_FID 0x01 /* 6097 6165 6351 6352 */ -#define GLOBAL_VTU_FID 0x02 /* 6097 6165 6351 6352 */ +#define GLOBAL_ATU_FID 0x01 +#define GLOBAL_VTU_FID 0x02 #define GLOBAL_VTU_FID_MASK 0xfff #define GLOBAL_VTU_SID 0x03 /* 6097 6165 6351 6352 */ #define GLOBAL_VTU_SID_MASK 0x3f @@ -408,6 +408,11 @@ enum mv88e6xxx_cap { */ MV88E6XXX_CAP_SERDES, + /* Switch Global (1) Registers. + */ + MV88E6XXX_CAP_G1_ATU_FID, /* (0x01) ATU FID Register */ + MV88E6XXX_CAP_G1_VTU_FID, /* (0x02) VTU FID Register */ + /* Switch Global 2 Registers. * The device contains a second set of global 16-bit registers. */ @@ -460,6 +465,9 @@ enum mv88e6xxx_cap { #define MV88E6XXX_FLAG_SERDES BIT_ULL(MV88E6XXX_CAP_SERDES) +#define MV88E6XXX_FLAG_G1_ATU_FID BIT_ULL(MV88E6XXX_CAP_G1_ATU_FID) +#define MV88E6XXX_FLAG_G1_VTU_FID BIT_ULL(MV88E6XXX_CAP_G1_VTU_FID) + #define MV88E6XXX_FLAG_GLOBAL2 BIT_ULL(MV88E6XXX_CAP_GLOBAL2) #define MV88E6XXX_FLAG_G2_MGMT_EN_2X BIT_ULL(MV88E6XXX_CAP_G2_MGMT_EN_2X) #define MV88E6XXX_FLAG_G2_MGMT_EN_0X BIT_ULL(MV88E6XXX_CAP_G2_MGMT_EN_0X) @@ -519,7 +527,9 @@ enum mv88e6xxx_cap { MV88E6XXX_FLAGS_MULTI_CHIP) #define MV88E6XXX_FLAGS_FAMILY_6097 \ - (MV88E6XXX_FLAG_GLOBAL2 | \ + (MV88E6XXX_FLAG_G1_ATU_FID | \ + MV88E6XXX_FLAG_G1_VTU_FID | \ + MV88E6XXX_FLAG_GLOBAL2 | \ MV88E6XXX_FLAG_G2_MGMT_EN_2X | \ MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ MV88E6XXX_FLAG_G2_POT | \ @@ -531,7 +541,9 @@ enum mv88e6xxx_cap { MV88E6XXX_FLAGS_PVT) #define MV88E6XXX_FLAGS_FAMILY_6165 \ - (MV88E6XXX_FLAG_GLOBAL2 | \ + (MV88E6XXX_FLAG_G1_ATU_FID | \ + MV88E6XXX_FLAG_G1_VTU_FID | \ + MV88E6XXX_FLAG_GLOBAL2 | \ MV88E6XXX_FLAG_G2_MGMT_EN_2X | \ MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ MV88E6XXX_FLAG_G2_SWITCH_MAC | \ @@ -570,6 +582,8 @@ enum mv88e6xxx_cap { #define MV88E6XXX_FLAGS_FAMILY_6351 \ (MV88E6XXX_FLAG_EDSA | \ + MV88E6XXX_FLAG_G1_ATU_FID | \ + MV88E6XXX_FLAG_G1_VTU_FID | \ MV88E6XXX_FLAG_GLOBAL2 | \ MV88E6XXX_FLAG_G2_MGMT_EN_2X | \ MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ @@ -587,6 +601,8 @@ enum mv88e6xxx_cap { #define MV88E6XXX_FLAGS_FAMILY_6352 \ (MV88E6XXX_FLAG_EDSA | \ MV88E6XXX_FLAG_EEE | \ + MV88E6XXX_FLAG_G1_ATU_FID | \ + MV88E6XXX_FLAG_G1_VTU_FID | \ MV88E6XXX_FLAG_GLOBAL2 | \ MV88E6XXX_FLAG_G2_MGMT_EN_2X | \ MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ From de33376b39b6ea939d53ea44c8c6595b80826501 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 29 Sep 2016 12:21:56 -0400 Subject: [PATCH 1629/1715] net: dsa: mv88e6xxx: expose mv88e6xxx_num_databases The mv88e6xxx_num_databases will be used by shared code, so move it inline to the header file. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 5 ----- drivers/net/dsa/mv88e6xxx/mv88e6xxx.h | 5 +++++ 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index b7eecc957bcc..6a55bba943c3 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -560,11 +560,6 @@ static bool mv88e6xxx_6352_family(struct mv88e6xxx_chip *chip) return chip->info->family == MV88E6XXX_FAMILY_6352; } -static unsigned int mv88e6xxx_num_databases(struct mv88e6xxx_chip *chip) -{ - return chip->info->num_databases; -} - /* We expect the switch to perform auto negotiation if there is a real * phy. However, in the case of a fixed link phy, we force the port * settings from the fixed link settings. diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h index 6c8584f14388..20fe6c61d5a7 100644 --- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h @@ -732,6 +732,11 @@ static inline bool mv88e6xxx_has(struct mv88e6xxx_chip *chip, return (chip->info->flags & flags) == flags; } +static inline unsigned int mv88e6xxx_num_databases(struct mv88e6xxx_chip *chip) +{ + return chip->info->num_databases; +} + int mv88e6xxx_read(struct mv88e6xxx_chip *chip, int addr, int reg, u16 *val); int mv88e6xxx_write(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val); int mv88e6xxx_update(struct mv88e6xxx_chip *chip, int addr, int reg, From 370b4ffbd8dda2b1a61dc63a5ac3088d8e715d53 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 29 Sep 2016 12:21:57 -0400 Subject: [PATCH 1630/1715] net: dsa: mv88e6xxx: add mv88e6xxx_num_ports helper Add an mv88e6xxx_num_ports helper instead of digging in the chip info structure. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 30 +++++++++++++-------------- drivers/net/dsa/mv88e6xxx/global2.c | 8 +++---- drivers/net/dsa/mv88e6xxx/mv88e6xxx.h | 5 +++++ 3 files changed, 24 insertions(+), 19 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 6a55bba943c3..9056d9eb3c13 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -613,7 +613,7 @@ static void mv88e6xxx_adjust_link(struct dsa_switch *ds, int port, reg |= PORT_PCS_CTRL_DUPLEX_FULL; if ((mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip)) && - (port >= chip->info->num_ports - 2)) { + (port >= mv88e6xxx_num_ports(chip) - 2)) { if (phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID) reg |= PORT_PCS_CTRL_RGMII_DELAY_RXCLK; if (phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID) @@ -1112,7 +1112,7 @@ static int _mv88e6xxx_port_state(struct mv88e6xxx_chip *chip, int port, static int _mv88e6xxx_port_based_vlan_map(struct mv88e6xxx_chip *chip, int port) { struct net_device *bridge = chip->ports[port].bridge_dev; - const u16 mask = (1 << chip->info->num_ports) - 1; + const u16 mask = (1 << mv88e6xxx_num_ports(chip)) - 1; struct dsa_switch *ds = chip->ds; u16 output_ports = 0; u16 reg; @@ -1123,7 +1123,7 @@ static int _mv88e6xxx_port_based_vlan_map(struct mv88e6xxx_chip *chip, int port) if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) { output_ports = mask; } else { - for (i = 0; i < chip->info->num_ports; ++i) { + for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) { /* allow sending frames to every group member */ if (bridge && chip->ports[i].bridge_dev == bridge) output_ports |= BIT(i); @@ -1279,7 +1279,7 @@ static int _mv88e6xxx_vtu_stu_data_read(struct mv88e6xxx_chip *chip, return err; } - for (i = 0; i < chip->info->num_ports; ++i) { + for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) { unsigned int shift = (i % 4) * 4 + nibble_offset; u16 reg = regs[i / 4]; @@ -1308,7 +1308,7 @@ static int _mv88e6xxx_vtu_stu_data_write(struct mv88e6xxx_chip *chip, u16 regs[3] = { 0 }; int i, err; - for (i = 0; i < chip->info->num_ports; ++i) { + for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) { unsigned int shift = (i % 4) * 4 + nibble_offset; u8 data = entry->data[i]; @@ -1658,7 +1658,7 @@ static int _mv88e6xxx_fid_new(struct mv88e6xxx_chip *chip, u16 *fid) bitmap_zero(fid_bitmap, MV88E6XXX_N_FID); /* Set every FID bit used by the (un)bridged ports */ - for (i = 0; i < chip->info->num_ports; ++i) { + for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) { err = _mv88e6xxx_port_fid_get(chip, i, fid); if (err) return err; @@ -1708,7 +1708,7 @@ static int _mv88e6xxx_vtu_new(struct mv88e6xxx_chip *chip, u16 vid, return err; /* exclude all ports except the CPU and DSA ports */ - for (i = 0; i < chip->info->num_ports; ++i) + for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) vlan.data[i] = dsa_is_cpu_port(ds, i) || dsa_is_dsa_port(ds, i) ? GLOBAL_VTU_DATA_MEMBER_TAG_UNMODIFIED : GLOBAL_VTU_DATA_MEMBER_TAG_NON_MEMBER; @@ -1797,7 +1797,7 @@ static int mv88e6xxx_port_check_hw_vlan(struct dsa_switch *ds, int port, if (vlan.vid > vid_end) break; - for (i = 0; i < chip->info->num_ports; ++i) { + for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) { if (dsa_is_dsa_port(ds, i) || dsa_is_cpu_port(ds, i)) continue; @@ -1959,7 +1959,7 @@ static int _mv88e6xxx_port_vlan_del(struct mv88e6xxx_chip *chip, /* keep the VLAN unless all ports are excluded */ vlan.valid = false; - for (i = 0; i < chip->info->num_ports; ++i) { + for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) { if (dsa_is_cpu_port(ds, i) || dsa_is_dsa_port(ds, i)) continue; @@ -2340,7 +2340,7 @@ static int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port, /* Assign the bridge and remap each port's VLANTable */ chip->ports[port].bridge_dev = bridge; - for (i = 0; i < chip->info->num_ports; ++i) { + for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) { if (chip->ports[i].bridge_dev == bridge) { err = _mv88e6xxx_port_based_vlan_map(chip, i); if (err) @@ -2364,7 +2364,7 @@ static void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, int port) /* Unassign the bridge and remap each port's VLANTable */ chip->ports[port].bridge_dev = NULL; - for (i = 0; i < chip->info->num_ports; ++i) + for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) if (i == port || chip->ports[i].bridge_dev == bridge) if (_mv88e6xxx_port_based_vlan_map(chip, i)) netdev_warn(ds->ports[i].netdev, @@ -2384,7 +2384,7 @@ static int mv88e6xxx_switch_reset(struct mv88e6xxx_chip *chip) int i; /* Set all ports to the disabled state. */ - for (i = 0; i < chip->info->num_ports; i++) { + for (i = 0; i < mv88e6xxx_num_ports(chip); i++) { err = mv88e6xxx_port_read(chip, i, PORT_CONTROL, ®); if (err) return err; @@ -2885,7 +2885,7 @@ static int mv88e6xxx_setup(struct dsa_switch *ds) goto unlock; /* Setup Switch Port Registers */ - for (i = 0; i < chip->info->num_ports; i++) { + for (i = 0; i < mv88e6xxx_num_ports(chip); i++) { err = mv88e6xxx_setup_port(chip, i); if (err) goto unlock; @@ -2933,7 +2933,7 @@ static int mv88e6xxx_mdio_read(struct mii_bus *bus, int phy, int reg) u16 val; int err; - if (phy >= chip->info->num_ports) + if (phy >= mv88e6xxx_num_ports(chip)) return 0xffff; mutex_lock(&chip->reg_lock); @@ -2948,7 +2948,7 @@ static int mv88e6xxx_mdio_write(struct mii_bus *bus, int phy, int reg, u16 val) struct mv88e6xxx_chip *chip = bus->priv; int err; - if (phy >= chip->info->num_ports) + if (phy >= mv88e6xxx_num_ports(chip)) return 0xffff; mutex_lock(&chip->reg_lock); diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c index f31d553c7448..cf686e7506a9 100644 --- a/drivers/net/dsa/mv88e6xxx/global2.c +++ b/drivers/net/dsa/mv88e6xxx/global2.c @@ -74,7 +74,7 @@ static int mv88e6xxx_g2_set_device_mapping(struct mv88e6xxx_chip *chip) static int mv88e6xxx_g2_trunk_mask_write(struct mv88e6xxx_chip *chip, int num, bool hask, u16 mask) { - const u16 port_mask = BIT(chip->info->num_ports) - 1; + const u16 port_mask = BIT(mv88e6xxx_num_ports(chip)) - 1; u16 val = (num << 12) | (mask & port_mask); if (hask) @@ -88,7 +88,7 @@ static int mv88e6xxx_g2_trunk_mask_write(struct mv88e6xxx_chip *chip, int num, static int mv88e6xxx_g2_trunk_mapping_write(struct mv88e6xxx_chip *chip, int id, u16 map) { - const u16 port_mask = BIT(chip->info->num_ports) - 1; + const u16 port_mask = BIT(mv88e6xxx_num_ports(chip)) - 1; u16 val = (id << 11) | (map & port_mask); return mv88e6xxx_g2_update(chip, GLOBAL2_TRUNK_MAPPING, val); @@ -96,7 +96,7 @@ static int mv88e6xxx_g2_trunk_mapping_write(struct mv88e6xxx_chip *chip, int id, static int mv88e6xxx_g2_clear_trunk(struct mv88e6xxx_chip *chip) { - const u16 port_mask = BIT(chip->info->num_ports) - 1; + const u16 port_mask = BIT(mv88e6xxx_num_ports(chip)) - 1; int i, err; /* Clear all eight possible Trunk Mask vectors */ @@ -125,7 +125,7 @@ static int mv88e6xxx_g2_clear_irl(struct mv88e6xxx_chip *chip) int port, err; /* Init all Ingress Rate Limit resources of all ports */ - for (port = 0; port < chip->info->num_ports; ++port) { + for (port = 0; port < mv88e6xxx_num_ports(chip); ++port) { /* XXX newer chips (like 88E6390) have different 2-bit ops */ err = mv88e6xxx_g2_write(chip, GLOBAL2_IRL_CMD, GLOBAL2_IRL_CMD_OP_INIT_ALL | diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h index 20fe6c61d5a7..cf639ed91d03 100644 --- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h @@ -737,6 +737,11 @@ static inline unsigned int mv88e6xxx_num_databases(struct mv88e6xxx_chip *chip) return chip->info->num_databases; } +static inline unsigned int mv88e6xxx_num_ports(struct mv88e6xxx_chip *chip) +{ + return chip->info->num_ports; +} + int mv88e6xxx_read(struct mv88e6xxx_chip *chip, int addr, int reg, u16 *val); int mv88e6xxx_write(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val); int mv88e6xxx_update(struct mv88e6xxx_chip *chip, int addr, int reg, From b4e47c0fb94923781addbb4616fa82fd825ac7ec Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 29 Sep 2016 12:21:58 -0400 Subject: [PATCH 1631/1715] net: dsa: mv88e6xxx: rename mv88e6xxx_vtu_stu_entry The STU (if the switch has one) is abstracted and accessed through the VTU operations and data registers. Thus rename the mv88e6xxx_vtu_stu_entry struct to mv88e6xxx_vtu_entry. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 46 +++++++++++++-------------- drivers/net/dsa/mv88e6xxx/mv88e6xxx.h | 5 +-- 2 files changed, 24 insertions(+), 27 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 9056d9eb3c13..d805661af3cc 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -1265,7 +1265,7 @@ static int _mv88e6xxx_vtu_stu_flush(struct mv88e6xxx_chip *chip) } static int _mv88e6xxx_vtu_stu_data_read(struct mv88e6xxx_chip *chip, - struct mv88e6xxx_vtu_stu_entry *entry, + struct mv88e6xxx_vtu_entry *entry, unsigned int nibble_offset) { u16 regs[3]; @@ -1290,19 +1290,19 @@ static int _mv88e6xxx_vtu_stu_data_read(struct mv88e6xxx_chip *chip, } static int mv88e6xxx_vtu_data_read(struct mv88e6xxx_chip *chip, - struct mv88e6xxx_vtu_stu_entry *entry) + struct mv88e6xxx_vtu_entry *entry) { return _mv88e6xxx_vtu_stu_data_read(chip, entry, 0); } static int mv88e6xxx_stu_data_read(struct mv88e6xxx_chip *chip, - struct mv88e6xxx_vtu_stu_entry *entry) + struct mv88e6xxx_vtu_entry *entry) { return _mv88e6xxx_vtu_stu_data_read(chip, entry, 2); } static int _mv88e6xxx_vtu_stu_data_write(struct mv88e6xxx_chip *chip, - struct mv88e6xxx_vtu_stu_entry *entry, + struct mv88e6xxx_vtu_entry *entry, unsigned int nibble_offset) { u16 regs[3] = { 0 }; @@ -1327,13 +1327,13 @@ static int _mv88e6xxx_vtu_stu_data_write(struct mv88e6xxx_chip *chip, } static int mv88e6xxx_vtu_data_write(struct mv88e6xxx_chip *chip, - struct mv88e6xxx_vtu_stu_entry *entry) + struct mv88e6xxx_vtu_entry *entry) { return _mv88e6xxx_vtu_stu_data_write(chip, entry, 0); } static int mv88e6xxx_stu_data_write(struct mv88e6xxx_chip *chip, - struct mv88e6xxx_vtu_stu_entry *entry) + struct mv88e6xxx_vtu_entry *entry) { return _mv88e6xxx_vtu_stu_data_write(chip, entry, 2); } @@ -1345,9 +1345,9 @@ static int _mv88e6xxx_vtu_vid_write(struct mv88e6xxx_chip *chip, u16 vid) } static int _mv88e6xxx_vtu_getnext(struct mv88e6xxx_chip *chip, - struct mv88e6xxx_vtu_stu_entry *entry) + struct mv88e6xxx_vtu_entry *entry) { - struct mv88e6xxx_vtu_stu_entry next = { 0 }; + struct mv88e6xxx_vtu_entry next = { 0 }; u16 val; int err; @@ -1407,7 +1407,7 @@ static int mv88e6xxx_port_vlan_dump(struct dsa_switch *ds, int port, int (*cb)(struct switchdev_obj *obj)) { struct mv88e6xxx_chip *chip = ds->priv; - struct mv88e6xxx_vtu_stu_entry next; + struct mv88e6xxx_vtu_entry next; u16 pvid; int err; @@ -1458,7 +1458,7 @@ unlock: } static int _mv88e6xxx_vtu_loadpurge(struct mv88e6xxx_chip *chip, - struct mv88e6xxx_vtu_stu_entry *entry) + struct mv88e6xxx_vtu_entry *entry) { u16 op = GLOBAL_VTU_OP_VTU_LOAD_PURGE; u16 reg = 0; @@ -1507,9 +1507,9 @@ loadpurge: } static int _mv88e6xxx_stu_getnext(struct mv88e6xxx_chip *chip, u8 sid, - struct mv88e6xxx_vtu_stu_entry *entry) + struct mv88e6xxx_vtu_entry *entry) { - struct mv88e6xxx_vtu_stu_entry next = { 0 }; + struct mv88e6xxx_vtu_entry next = { 0 }; u16 val; int err; @@ -1549,7 +1549,7 @@ static int _mv88e6xxx_stu_getnext(struct mv88e6xxx_chip *chip, u8 sid, } static int _mv88e6xxx_stu_loadpurge(struct mv88e6xxx_chip *chip, - struct mv88e6xxx_vtu_stu_entry *entry) + struct mv88e6xxx_vtu_entry *entry) { u16 reg = 0; int err; @@ -1652,7 +1652,7 @@ static int _mv88e6xxx_port_fid_set(struct mv88e6xxx_chip *chip, static int _mv88e6xxx_fid_new(struct mv88e6xxx_chip *chip, u16 *fid) { DECLARE_BITMAP(fid_bitmap, MV88E6XXX_N_FID); - struct mv88e6xxx_vtu_stu_entry vlan; + struct mv88e6xxx_vtu_entry vlan; int i, err; bitmap_zero(fid_bitmap, MV88E6XXX_N_FID); @@ -1694,10 +1694,10 @@ static int _mv88e6xxx_fid_new(struct mv88e6xxx_chip *chip, u16 *fid) } static int _mv88e6xxx_vtu_new(struct mv88e6xxx_chip *chip, u16 vid, - struct mv88e6xxx_vtu_stu_entry *entry) + struct mv88e6xxx_vtu_entry *entry) { struct dsa_switch *ds = chip->ds; - struct mv88e6xxx_vtu_stu_entry vlan = { + struct mv88e6xxx_vtu_entry vlan = { .valid = true, .vid = vid, }; @@ -1715,7 +1715,7 @@ static int _mv88e6xxx_vtu_new(struct mv88e6xxx_chip *chip, u16 vid, if (mv88e6xxx_6097_family(chip) || mv88e6xxx_6165_family(chip) || mv88e6xxx_6351_family(chip) || mv88e6xxx_6352_family(chip)) { - struct mv88e6xxx_vtu_stu_entry vstp; + struct mv88e6xxx_vtu_entry vstp; /* Adding a VTU entry requires a valid STU entry. As VSTP is not * implemented, only one STU entry is needed to cover all VTU @@ -1742,7 +1742,7 @@ static int _mv88e6xxx_vtu_new(struct mv88e6xxx_chip *chip, u16 vid, } static int _mv88e6xxx_vtu_get(struct mv88e6xxx_chip *chip, u16 vid, - struct mv88e6xxx_vtu_stu_entry *entry, bool creat) + struct mv88e6xxx_vtu_entry *entry, bool creat) { int err; @@ -1774,7 +1774,7 @@ static int mv88e6xxx_port_check_hw_vlan(struct dsa_switch *ds, int port, u16 vid_begin, u16 vid_end) { struct mv88e6xxx_chip *chip = ds->priv; - struct mv88e6xxx_vtu_stu_entry vlan; + struct mv88e6xxx_vtu_entry vlan; int i, err; if (!vid_begin) @@ -1899,7 +1899,7 @@ mv88e6xxx_port_vlan_prepare(struct dsa_switch *ds, int port, static int _mv88e6xxx_port_vlan_add(struct mv88e6xxx_chip *chip, int port, u16 vid, bool untagged) { - struct mv88e6xxx_vtu_stu_entry vlan; + struct mv88e6xxx_vtu_entry vlan; int err; err = _mv88e6xxx_vtu_get(chip, vid, &vlan, true); @@ -1944,7 +1944,7 @@ static int _mv88e6xxx_port_vlan_del(struct mv88e6xxx_chip *chip, int port, u16 vid) { struct dsa_switch *ds = chip->ds; - struct mv88e6xxx_vtu_stu_entry vlan; + struct mv88e6xxx_vtu_entry vlan; int i, err; err = _mv88e6xxx_vtu_get(chip, vid, &vlan, false); @@ -2103,7 +2103,7 @@ static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port, const unsigned char *addr, u16 vid, u8 state) { - struct mv88e6xxx_vtu_stu_entry vlan; + struct mv88e6xxx_vtu_entry vlan; struct mv88e6xxx_atu_entry entry; int err; @@ -2278,7 +2278,7 @@ static int mv88e6xxx_port_db_dump(struct mv88e6xxx_chip *chip, int port, struct switchdev_obj *obj, int (*cb)(struct switchdev_obj *obj)) { - struct mv88e6xxx_vtu_stu_entry vlan = { + struct mv88e6xxx_vtu_entry vlan = { .vid = GLOBAL_VTU_VID_MASK, /* all ones */ }; u16 fid; diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h index cf639ed91d03..ee7873aaf127 100644 --- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h @@ -640,12 +640,9 @@ struct mv88e6xxx_atu_entry { u8 mac[ETH_ALEN]; }; -struct mv88e6xxx_vtu_stu_entry { - /* VTU only */ +struct mv88e6xxx_vtu_entry { u16 vid; u16 fid; - - /* VTU and STU */ u8 sid; bool valid; u8 data[DSA_MAX_PORTS]; From c08026aba70a97925512266d29429dbd62df497d Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 29 Sep 2016 12:21:59 -0400 Subject: [PATCH 1632/1715] net: dsa: mv88e6xxx: rename mv88e6xxx_ops The mv88e6xxx_ops is used to describe how to access the chip registers. It can be through SMI (via an MDIO bus), or via another interface such as crafted remote management frames. The correct BUS operations structure is chosen at runtime, depending on the chip address and connectivity. We will need the mv88e6xxx_ops name for future chip-wide operation structure, thus rename mv88e6xxx_ops to more explicit mv88e6xxx_bus_ops. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 10 +++++----- drivers/net/dsa/mv88e6xxx/mv88e6xxx.h | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index d805661af3cc..ad31d3ed3aca 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -98,7 +98,7 @@ static int mv88e6xxx_smi_single_chip_write(struct mv88e6xxx_chip *chip, return 0; } -static const struct mv88e6xxx_ops mv88e6xxx_smi_single_chip_ops = { +static const struct mv88e6xxx_bus_ops mv88e6xxx_smi_single_chip_ops = { .read = mv88e6xxx_smi_single_chip_read, .write = mv88e6xxx_smi_single_chip_write, }; @@ -180,7 +180,7 @@ static int mv88e6xxx_smi_multi_chip_write(struct mv88e6xxx_chip *chip, return 0; } -static const struct mv88e6xxx_ops mv88e6xxx_smi_multi_chip_ops = { +static const struct mv88e6xxx_bus_ops mv88e6xxx_smi_multi_chip_ops = { .read = mv88e6xxx_smi_multi_chip_read, .write = mv88e6xxx_smi_multi_chip_write, }; @@ -515,7 +515,7 @@ static int mv88e6xxx_phy_ppu_write(struct mv88e6xxx_chip *chip, int addr, return err; } -static const struct mv88e6xxx_ops mv88e6xxx_phy_ppu_ops = { +static const struct mv88e6xxx_bus_ops mv88e6xxx_phy_ppu_ops = { .read = mv88e6xxx_phy_ppu_read, .write = mv88e6xxx_phy_ppu_write, }; @@ -3479,12 +3479,12 @@ static struct mv88e6xxx_chip *mv88e6xxx_alloc_chip(struct device *dev) return chip; } -static const struct mv88e6xxx_ops mv88e6xxx_g2_smi_phy_ops = { +static const struct mv88e6xxx_bus_ops mv88e6xxx_g2_smi_phy_ops = { .read = mv88e6xxx_g2_smi_phy_read, .write = mv88e6xxx_g2_smi_phy_write, }; -static const struct mv88e6xxx_ops mv88e6xxx_phy_ops = { +static const struct mv88e6xxx_bus_ops mv88e6xxx_phy_ops = { .read = mv88e6xxx_read, .write = mv88e6xxx_write, }; diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h index ee7873aaf127..b9ef769311d9 100644 --- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h @@ -648,7 +648,7 @@ struct mv88e6xxx_vtu_entry { u8 data[DSA_MAX_PORTS]; }; -struct mv88e6xxx_ops; +struct mv88e6xxx_bus_ops; struct mv88e6xxx_priv_port { struct net_device *bridge_dev; @@ -669,14 +669,14 @@ struct mv88e6xxx_chip { /* The MII bus and the address on the bus that is used to * communication with the switch */ - const struct mv88e6xxx_ops *smi_ops; + const struct mv88e6xxx_bus_ops *smi_ops; struct mii_bus *bus; int sw_addr; /* Handles automatic disabling and re-enabling of the PHY * polling unit. */ - const struct mv88e6xxx_ops *phy_ops; + const struct mv88e6xxx_bus_ops *phy_ops; struct mutex ppu_mutex; int ppu_disabled; struct work_struct ppu_work; @@ -705,7 +705,7 @@ struct mv88e6xxx_chip { struct mii_bus *mdio_bus; }; -struct mv88e6xxx_ops { +struct mv88e6xxx_bus_ops { int (*read)(struct mv88e6xxx_chip *chip, int addr, int reg, u16 *val); int (*write)(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val); }; From b3469dd8adade11e8234854d79b43daf8ce478c9 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 29 Sep 2016 12:22:00 -0400 Subject: [PATCH 1633/1715] net: dsa: mv88e6xxx: add chip-wide ops Introduce a mv88e6xxx_ops structure to describe supported chip-wide functions and assign the correct variant to the chip models. For the moment, add only PHY access routines. This allows to get rid of the PHY ops structures and the usage of PHY flags. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 136 ++++++++++++++++++++------ drivers/net/dsa/mv88e6xxx/mv88e6xxx.h | 28 +++--- 2 files changed, 121 insertions(+), 43 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index ad31d3ed3aca..83a37693a8db 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -238,10 +238,10 @@ static int mv88e6xxx_phy_read(struct mv88e6xxx_chip *chip, int phy, { int addr = phy; /* PHY devices addresses start at 0x0 */ - if (!chip->phy_ops) + if (!chip->info->ops->phy_read) return -EOPNOTSUPP; - return chip->phy_ops->read(chip, addr, reg, val); + return chip->info->ops->phy_read(chip, addr, reg, val); } static int mv88e6xxx_phy_write(struct mv88e6xxx_chip *chip, int phy, @@ -249,10 +249,10 @@ static int mv88e6xxx_phy_write(struct mv88e6xxx_chip *chip, int phy, { int addr = phy; /* PHY devices addresses start at 0x0 */ - if (!chip->phy_ops) + if (!chip->info->ops->phy_write) return -EOPNOTSUPP; - return chip->phy_ops->write(chip, addr, reg, val); + return chip->info->ops->phy_write(chip, addr, reg, val); } static int mv88e6xxx_phy_page_get(struct mv88e6xxx_chip *chip, int phy, u8 page) @@ -515,11 +515,6 @@ static int mv88e6xxx_phy_ppu_write(struct mv88e6xxx_chip *chip, int addr, return err; } -static const struct mv88e6xxx_bus_ops mv88e6xxx_phy_ppu_ops = { - .read = mv88e6xxx_phy_ppu_read, - .write = mv88e6xxx_phy_ppu_write, -}; - static bool mv88e6xxx_6065_family(struct mv88e6xxx_chip *chip) { return chip->info->family == MV88E6XXX_FAMILY_6065; @@ -3214,6 +3209,91 @@ static int mv88e6xxx_set_eeprom(struct dsa_switch *ds, return err; } +static const struct mv88e6xxx_ops mv88e6085_ops = { + .phy_read = mv88e6xxx_phy_ppu_read, + .phy_write = mv88e6xxx_phy_ppu_write, +}; + +static const struct mv88e6xxx_ops mv88e6095_ops = { + .phy_read = mv88e6xxx_phy_ppu_read, + .phy_write = mv88e6xxx_phy_ppu_write, +}; + +static const struct mv88e6xxx_ops mv88e6123_ops = { + .phy_read = mv88e6xxx_read, + .phy_write = mv88e6xxx_write, +}; + +static const struct mv88e6xxx_ops mv88e6131_ops = { + .phy_read = mv88e6xxx_phy_ppu_read, + .phy_write = mv88e6xxx_phy_ppu_write, +}; + +static const struct mv88e6xxx_ops mv88e6161_ops = { + .phy_read = mv88e6xxx_read, + .phy_write = mv88e6xxx_write, +}; + +static const struct mv88e6xxx_ops mv88e6165_ops = { + .phy_read = mv88e6xxx_read, + .phy_write = mv88e6xxx_write, +}; + +static const struct mv88e6xxx_ops mv88e6171_ops = { + .phy_read = mv88e6xxx_g2_smi_phy_read, + .phy_write = mv88e6xxx_g2_smi_phy_write, +}; + +static const struct mv88e6xxx_ops mv88e6172_ops = { + .phy_read = mv88e6xxx_g2_smi_phy_read, + .phy_write = mv88e6xxx_g2_smi_phy_write, +}; + +static const struct mv88e6xxx_ops mv88e6175_ops = { + .phy_read = mv88e6xxx_g2_smi_phy_read, + .phy_write = mv88e6xxx_g2_smi_phy_write, +}; + +static const struct mv88e6xxx_ops mv88e6176_ops = { + .phy_read = mv88e6xxx_g2_smi_phy_read, + .phy_write = mv88e6xxx_g2_smi_phy_write, +}; + +static const struct mv88e6xxx_ops mv88e6185_ops = { + .phy_read = mv88e6xxx_phy_ppu_read, + .phy_write = mv88e6xxx_phy_ppu_write, +}; + +static const struct mv88e6xxx_ops mv88e6240_ops = { + .phy_read = mv88e6xxx_g2_smi_phy_read, + .phy_write = mv88e6xxx_g2_smi_phy_write, +}; + +static const struct mv88e6xxx_ops mv88e6320_ops = { + .phy_read = mv88e6xxx_g2_smi_phy_read, + .phy_write = mv88e6xxx_g2_smi_phy_write, +}; + +static const struct mv88e6xxx_ops mv88e6321_ops = { + .phy_read = mv88e6xxx_g2_smi_phy_read, + .phy_write = mv88e6xxx_g2_smi_phy_write, +}; + +static const struct mv88e6xxx_ops mv88e6350_ops = { + .phy_read = mv88e6xxx_g2_smi_phy_read, + .phy_write = mv88e6xxx_g2_smi_phy_write, +}; + +static const struct mv88e6xxx_ops mv88e6351_ops = { + .phy_read = mv88e6xxx_g2_smi_phy_read, + .phy_write = mv88e6xxx_g2_smi_phy_write, +}; + +static const struct mv88e6xxx_ops mv88e6352_ops = { + .phy_read = mv88e6xxx_g2_smi_phy_read, + .phy_write = mv88e6xxx_g2_smi_phy_write, +}; + static const struct mv88e6xxx_info mv88e6xxx_table[] = { [MV88E6085] = { .prod_num = PORT_SWITCH_ID_PROD_NUM_6085, @@ -3225,6 +3305,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6097, + .ops = &mv88e6085_ops, }, [MV88E6095] = { @@ -3237,6 +3318,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6095, + .ops = &mv88e6095_ops, }, [MV88E6123] = { @@ -3249,6 +3331,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6165, + .ops = &mv88e6123_ops, }, [MV88E6131] = { @@ -3261,6 +3344,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6185, + .ops = &mv88e6131_ops, }, [MV88E6161] = { @@ -3273,6 +3357,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6165, + .ops = &mv88e6161_ops, }, [MV88E6165] = { @@ -3285,6 +3370,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6165, + .ops = &mv88e6165_ops, }, [MV88E6171] = { @@ -3297,6 +3383,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6351, + .ops = &mv88e6171_ops, }, [MV88E6172] = { @@ -3309,6 +3396,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6352, + .ops = &mv88e6172_ops, }, [MV88E6175] = { @@ -3321,6 +3409,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6351, + .ops = &mv88e6175_ops, }, [MV88E6176] = { @@ -3333,6 +3422,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6352, + .ops = &mv88e6176_ops, }, [MV88E6185] = { @@ -3345,6 +3435,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6185, + .ops = &mv88e6185_ops, }, [MV88E6240] = { @@ -3357,6 +3448,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6352, + .ops = &mv88e6240_ops, }, [MV88E6320] = { @@ -3369,6 +3461,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6320, + .ops = &mv88e6320_ops, }, [MV88E6321] = { @@ -3381,6 +3474,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6320, + .ops = &mv88e6321_ops, }, [MV88E6350] = { @@ -3393,6 +3487,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6351, + .ops = &mv88e6350_ops, }, [MV88E6351] = { @@ -3405,6 +3500,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6351, + .ops = &mv88e6351_ops, }, [MV88E6352] = { @@ -3417,6 +3513,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6352, + .ops = &mv88e6352_ops, }, }; @@ -3479,33 +3576,16 @@ static struct mv88e6xxx_chip *mv88e6xxx_alloc_chip(struct device *dev) return chip; } -static const struct mv88e6xxx_bus_ops mv88e6xxx_g2_smi_phy_ops = { - .read = mv88e6xxx_g2_smi_phy_read, - .write = mv88e6xxx_g2_smi_phy_write, -}; - -static const struct mv88e6xxx_bus_ops mv88e6xxx_phy_ops = { - .read = mv88e6xxx_read, - .write = mv88e6xxx_write, -}; - static void mv88e6xxx_phy_init(struct mv88e6xxx_chip *chip) { - if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_SMI_PHY)) { - chip->phy_ops = &mv88e6xxx_g2_smi_phy_ops; - } else if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU)) { - chip->phy_ops = &mv88e6xxx_phy_ppu_ops; + if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU)) mv88e6xxx_ppu_state_init(chip); - } else { - chip->phy_ops = &mv88e6xxx_phy_ops; - } } static void mv88e6xxx_phy_destroy(struct mv88e6xxx_chip *chip) { - if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU)) { + if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU)) mv88e6xxx_ppu_state_destroy(chip); - } } static int mv88e6xxx_smi_init(struct mv88e6xxx_chip *chip, diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h index b9ef769311d9..8e1290278ef6 100644 --- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h @@ -427,8 +427,6 @@ enum mv88e6xxx_cap { MV88E6XXX_CAP_G2_POT, /* (0x0f) Priority Override Table */ MV88E6XXX_CAP_G2_EEPROM_CMD, /* (0x14) EEPROM Command */ MV88E6XXX_CAP_G2_EEPROM_DATA, /* (0x15) EEPROM Data */ - MV88E6XXX_CAP_G2_SMI_PHY_CMD, /* (0x18) SMI PHY Command */ - MV88E6XXX_CAP_G2_SMI_PHY_DATA, /* (0x19) SMI PHY Data */ /* PHY Polling Unit. * See GLOBAL_CONTROL_PPU_ENABLE and GLOBAL_STATUS_PPU_POLLING. @@ -479,8 +477,6 @@ enum mv88e6xxx_cap { #define MV88E6XXX_FLAG_G2_POT BIT_ULL(MV88E6XXX_CAP_G2_POT) #define MV88E6XXX_FLAG_G2_EEPROM_CMD BIT_ULL(MV88E6XXX_CAP_G2_EEPROM_CMD) #define MV88E6XXX_FLAG_G2_EEPROM_DATA BIT_ULL(MV88E6XXX_CAP_G2_EEPROM_DATA) -#define MV88E6XXX_FLAG_G2_SMI_PHY_CMD BIT_ULL(MV88E6XXX_CAP_G2_SMI_PHY_CMD) -#define MV88E6XXX_FLAG_G2_SMI_PHY_DATA BIT_ULL(MV88E6XXX_CAP_G2_SMI_PHY_DATA) #define MV88E6XXX_FLAG_PPU BIT_ULL(MV88E6XXX_CAP_PPU) #define MV88E6XXX_FLAG_PPU_ACTIVE BIT_ULL(MV88E6XXX_CAP_PPU_ACTIVE) @@ -514,11 +510,6 @@ enum mv88e6xxx_cap { (MV88E6XXX_FLAG_PHY_PAGE | \ MV88E6XXX_FLAG_SERDES) -/* Indirect PHY access via Global2 SMI PHY registers */ -#define MV88E6XXX_FLAGS_SMI_PHY \ - (MV88E6XXX_FLAG_G2_SMI_PHY_CMD |\ - MV88E6XXX_FLAG_G2_SMI_PHY_DATA) - #define MV88E6XXX_FLAGS_FAMILY_6095 \ (MV88E6XXX_FLAG_GLOBAL2 | \ MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ @@ -577,8 +568,7 @@ enum mv88e6xxx_cap { MV88E6XXX_FLAGS_EEPROM16 | \ MV88E6XXX_FLAGS_IRL | \ MV88E6XXX_FLAGS_MULTI_CHIP | \ - MV88E6XXX_FLAGS_PVT | \ - MV88E6XXX_FLAGS_SMI_PHY) + MV88E6XXX_FLAGS_PVT) #define MV88E6XXX_FLAGS_FAMILY_6351 \ (MV88E6XXX_FLAG_EDSA | \ @@ -595,8 +585,7 @@ enum mv88e6xxx_cap { MV88E6XXX_FLAG_VTU | \ MV88E6XXX_FLAGS_IRL | \ MV88E6XXX_FLAGS_MULTI_CHIP | \ - MV88E6XXX_FLAGS_PVT | \ - MV88E6XXX_FLAGS_SMI_PHY) + MV88E6XXX_FLAGS_PVT) #define MV88E6XXX_FLAGS_FAMILY_6352 \ (MV88E6XXX_FLAG_EDSA | \ @@ -617,8 +606,9 @@ enum mv88e6xxx_cap { MV88E6XXX_FLAGS_IRL | \ MV88E6XXX_FLAGS_MULTI_CHIP | \ MV88E6XXX_FLAGS_PVT | \ - MV88E6XXX_FLAGS_SERDES | \ - MV88E6XXX_FLAGS_SMI_PHY) + MV88E6XXX_FLAGS_SERDES) + +struct mv88e6xxx_ops; struct mv88e6xxx_info { enum mv88e6xxx_family family; @@ -630,6 +620,7 @@ struct mv88e6xxx_info { unsigned int global1_addr; unsigned int age_time_coeff; unsigned long long flags; + const struct mv88e6xxx_ops *ops; }; struct mv88e6xxx_atu_entry { @@ -710,6 +701,13 @@ struct mv88e6xxx_bus_ops { int (*write)(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val); }; +struct mv88e6xxx_ops { + int (*phy_read)(struct mv88e6xxx_chip *chip, int addr, int reg, + u16 *val); + int (*phy_write)(struct mv88e6xxx_chip *chip, int addr, int reg, + u16 val); +}; + enum stat_type { BANK0, BANK1, From b073d4e2b16a42f7d5b01814307ff41012c2e1ea Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 29 Sep 2016 12:22:01 -0400 Subject: [PATCH 1634/1715] net: dsa: mv88e6xxx: add set_switch_mac to ops Add a set_switch_mac chip-wide function to mv88e6xxx_ops and remove MV88E6XXX_FLAG_G2_SWITCH_MAC flags. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 28 ++++++++++++++++++++------- drivers/net/dsa/mv88e6xxx/mv88e6xxx.h | 8 ++------ 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 83a37693a8db..e40b71ba871c 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -2909,14 +2909,11 @@ static int mv88e6xxx_set_addr(struct dsa_switch *ds, u8 *addr) struct mv88e6xxx_chip *chip = ds->priv; int err; + if (!chip->info->ops->set_switch_mac) + return -EOPNOTSUPP; + mutex_lock(&chip->reg_lock); - - /* Has an indirect Switch MAC/WoL/WoF register in Global 2? */ - if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_SWITCH_MAC)) - err = mv88e6xxx_g2_set_switch_mac(chip, addr); - else - err = mv88e6xxx_g1_set_switch_mac(chip, addr); - + err = chip->info->ops->set_switch_mac(chip, addr); mutex_unlock(&chip->reg_lock); return err; @@ -3210,86 +3207,103 @@ static int mv88e6xxx_set_eeprom(struct dsa_switch *ds, } static const struct mv88e6xxx_ops mv88e6085_ops = { + .set_switch_mac = mv88e6xxx_g1_set_switch_mac, .phy_read = mv88e6xxx_phy_ppu_read, .phy_write = mv88e6xxx_phy_ppu_write, }; static const struct mv88e6xxx_ops mv88e6095_ops = { + .set_switch_mac = mv88e6xxx_g1_set_switch_mac, .phy_read = mv88e6xxx_phy_ppu_read, .phy_write = mv88e6xxx_phy_ppu_write, }; static const struct mv88e6xxx_ops mv88e6123_ops = { + .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_read, .phy_write = mv88e6xxx_write, }; static const struct mv88e6xxx_ops mv88e6131_ops = { + .set_switch_mac = mv88e6xxx_g1_set_switch_mac, .phy_read = mv88e6xxx_phy_ppu_read, .phy_write = mv88e6xxx_phy_ppu_write, }; static const struct mv88e6xxx_ops mv88e6161_ops = { + .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_read, .phy_write = mv88e6xxx_write, }; static const struct mv88e6xxx_ops mv88e6165_ops = { + .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_read, .phy_write = mv88e6xxx_write, }; static const struct mv88e6xxx_ops mv88e6171_ops = { + .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, }; static const struct mv88e6xxx_ops mv88e6172_ops = { + .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, }; static const struct mv88e6xxx_ops mv88e6175_ops = { + .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, }; static const struct mv88e6xxx_ops mv88e6176_ops = { + .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, }; static const struct mv88e6xxx_ops mv88e6185_ops = { + .set_switch_mac = mv88e6xxx_g1_set_switch_mac, .phy_read = mv88e6xxx_phy_ppu_read, .phy_write = mv88e6xxx_phy_ppu_write, }; static const struct mv88e6xxx_ops mv88e6240_ops = { + .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, }; static const struct mv88e6xxx_ops mv88e6320_ops = { + .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, }; static const struct mv88e6xxx_ops mv88e6321_ops = { + .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, }; static const struct mv88e6xxx_ops mv88e6350_ops = { + .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, }; static const struct mv88e6xxx_ops mv88e6351_ops = { + .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, }; static const struct mv88e6xxx_ops mv88e6352_ops = { + .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, }; diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h index 8e1290278ef6..d04184c7e068 100644 --- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h @@ -423,7 +423,6 @@ enum mv88e6xxx_cap { MV88E6XXX_CAP_G2_IRL_DATA, /* (0x0a) Ingress Rate Data */ MV88E6XXX_CAP_G2_PVT_ADDR, /* (0x0b) Cross Chip Port VLAN Addr */ MV88E6XXX_CAP_G2_PVT_DATA, /* (0x0c) Cross Chip Port VLAN Data */ - MV88E6XXX_CAP_G2_SWITCH_MAC, /* (0x0d) Switch MAC/WoL/WoF */ MV88E6XXX_CAP_G2_POT, /* (0x0f) Priority Override Table */ MV88E6XXX_CAP_G2_EEPROM_CMD, /* (0x14) EEPROM Command */ MV88E6XXX_CAP_G2_EEPROM_DATA, /* (0x15) EEPROM Data */ @@ -473,7 +472,6 @@ enum mv88e6xxx_cap { #define MV88E6XXX_FLAG_G2_IRL_DATA BIT_ULL(MV88E6XXX_CAP_G2_IRL_DATA) #define MV88E6XXX_FLAG_G2_PVT_ADDR BIT_ULL(MV88E6XXX_CAP_G2_PVT_ADDR) #define MV88E6XXX_FLAG_G2_PVT_DATA BIT_ULL(MV88E6XXX_CAP_G2_PVT_DATA) -#define MV88E6XXX_FLAG_G2_SWITCH_MAC BIT_ULL(MV88E6XXX_CAP_G2_SWITCH_MAC) #define MV88E6XXX_FLAG_G2_POT BIT_ULL(MV88E6XXX_CAP_G2_POT) #define MV88E6XXX_FLAG_G2_EEPROM_CMD BIT_ULL(MV88E6XXX_CAP_G2_EEPROM_CMD) #define MV88E6XXX_FLAG_G2_EEPROM_DATA BIT_ULL(MV88E6XXX_CAP_G2_EEPROM_DATA) @@ -537,7 +535,6 @@ enum mv88e6xxx_cap { MV88E6XXX_FLAG_GLOBAL2 | \ MV88E6XXX_FLAG_G2_MGMT_EN_2X | \ MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ - MV88E6XXX_FLAG_G2_SWITCH_MAC | \ MV88E6XXX_FLAG_G2_POT | \ MV88E6XXX_FLAG_STU | \ MV88E6XXX_FLAG_TEMP | \ @@ -559,7 +556,6 @@ enum mv88e6xxx_cap { MV88E6XXX_FLAG_GLOBAL2 | \ MV88E6XXX_FLAG_G2_MGMT_EN_2X | \ MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ - MV88E6XXX_FLAG_G2_SWITCH_MAC | \ MV88E6XXX_FLAG_G2_POT | \ MV88E6XXX_FLAG_PPU_ACTIVE | \ MV88E6XXX_FLAG_TEMP | \ @@ -577,7 +573,6 @@ enum mv88e6xxx_cap { MV88E6XXX_FLAG_GLOBAL2 | \ MV88E6XXX_FLAG_G2_MGMT_EN_2X | \ MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ - MV88E6XXX_FLAG_G2_SWITCH_MAC | \ MV88E6XXX_FLAG_G2_POT | \ MV88E6XXX_FLAG_PPU_ACTIVE | \ MV88E6XXX_FLAG_STU | \ @@ -595,7 +590,6 @@ enum mv88e6xxx_cap { MV88E6XXX_FLAG_GLOBAL2 | \ MV88E6XXX_FLAG_G2_MGMT_EN_2X | \ MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ - MV88E6XXX_FLAG_G2_SWITCH_MAC | \ MV88E6XXX_FLAG_G2_POT | \ MV88E6XXX_FLAG_PPU_ACTIVE | \ MV88E6XXX_FLAG_STU | \ @@ -702,6 +696,8 @@ struct mv88e6xxx_bus_ops { }; struct mv88e6xxx_ops { + int (*set_switch_mac)(struct mv88e6xxx_chip *chip, u8 *addr); + int (*phy_read)(struct mv88e6xxx_chip *chip, int addr, int reg, u16 *val); int (*phy_write)(struct mv88e6xxx_chip *chip, int addr, int reg, From ee4dc2e75337e5925e9434f28ec48374a65ffcd9 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 29 Sep 2016 12:22:02 -0400 Subject: [PATCH 1635/1715] net: dsa: mv88e6xxx: add eeprom ops Remove EEPROM flags in favor of new {get,set}_eeprom chip-wide functions in the mv88e6xxx_ops structure. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 34 +++++++++++++++++---------- drivers/net/dsa/mv88e6xxx/mv88e6xxx.h | 16 ++++--------- 2 files changed, 26 insertions(+), 24 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index e40b71ba871c..883fd9809dd2 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3168,13 +3168,11 @@ static int mv88e6xxx_get_eeprom(struct dsa_switch *ds, struct mv88e6xxx_chip *chip = ds->priv; int err; + if (!chip->info->ops->get_eeprom) + return -EOPNOTSUPP; + mutex_lock(&chip->reg_lock); - - if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_EEPROM16)) - err = mv88e6xxx_g2_get_eeprom16(chip, eeprom, data); - else - err = -EOPNOTSUPP; - + err = chip->info->ops->get_eeprom(chip, eeprom, data); mutex_unlock(&chip->reg_lock); if (err) @@ -3191,16 +3189,14 @@ static int mv88e6xxx_set_eeprom(struct dsa_switch *ds, struct mv88e6xxx_chip *chip = ds->priv; int err; + if (!chip->info->ops->set_eeprom) + return -EOPNOTSUPP; + if (eeprom->magic != 0xc3ec4951) return -EINVAL; mutex_lock(&chip->reg_lock); - - if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_EEPROM16)) - err = mv88e6xxx_g2_set_eeprom16(chip, eeprom, data); - else - err = -EOPNOTSUPP; - + err = chip->info->ops->set_eeprom(chip, eeprom, data); mutex_unlock(&chip->reg_lock); return err; @@ -3249,6 +3245,8 @@ static const struct mv88e6xxx_ops mv88e6171_ops = { }; static const struct mv88e6xxx_ops mv88e6172_ops = { + .get_eeprom = mv88e6xxx_g2_get_eeprom16, + .set_eeprom = mv88e6xxx_g2_set_eeprom16, .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, @@ -3261,6 +3259,8 @@ static const struct mv88e6xxx_ops mv88e6175_ops = { }; static const struct mv88e6xxx_ops mv88e6176_ops = { + .get_eeprom = mv88e6xxx_g2_get_eeprom16, + .set_eeprom = mv88e6xxx_g2_set_eeprom16, .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, @@ -3273,18 +3273,24 @@ static const struct mv88e6xxx_ops mv88e6185_ops = { }; static const struct mv88e6xxx_ops mv88e6240_ops = { + .get_eeprom = mv88e6xxx_g2_get_eeprom16, + .set_eeprom = mv88e6xxx_g2_set_eeprom16, .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, }; static const struct mv88e6xxx_ops mv88e6320_ops = { + .get_eeprom = mv88e6xxx_g2_get_eeprom16, + .set_eeprom = mv88e6xxx_g2_set_eeprom16, .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, }; static const struct mv88e6xxx_ops mv88e6321_ops = { + .get_eeprom = mv88e6xxx_g2_get_eeprom16, + .set_eeprom = mv88e6xxx_g2_set_eeprom16, .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, @@ -3303,6 +3309,8 @@ static const struct mv88e6xxx_ops mv88e6351_ops = { }; static const struct mv88e6xxx_ops mv88e6352_ops = { + .get_eeprom = mv88e6xxx_g2_get_eeprom16, + .set_eeprom = mv88e6xxx_g2_set_eeprom16, .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, @@ -3825,7 +3833,7 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev) if (IS_ERR(chip->reset)) return PTR_ERR(chip->reset); - if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_EEPROM16) && + if (chip->info->ops->get_eeprom && !of_property_read_u32(np, "eeprom-length", &eeprom_len)) chip->eeprom_len = eeprom_len; diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h index d04184c7e068..e572121c196e 100644 --- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h @@ -424,8 +424,6 @@ enum mv88e6xxx_cap { MV88E6XXX_CAP_G2_PVT_ADDR, /* (0x0b) Cross Chip Port VLAN Addr */ MV88E6XXX_CAP_G2_PVT_DATA, /* (0x0c) Cross Chip Port VLAN Data */ MV88E6XXX_CAP_G2_POT, /* (0x0f) Priority Override Table */ - MV88E6XXX_CAP_G2_EEPROM_CMD, /* (0x14) EEPROM Command */ - MV88E6XXX_CAP_G2_EEPROM_DATA, /* (0x15) EEPROM Data */ /* PHY Polling Unit. * See GLOBAL_CONTROL_PPU_ENABLE and GLOBAL_STATUS_PPU_POLLING. @@ -473,8 +471,6 @@ enum mv88e6xxx_cap { #define MV88E6XXX_FLAG_G2_PVT_ADDR BIT_ULL(MV88E6XXX_CAP_G2_PVT_ADDR) #define MV88E6XXX_FLAG_G2_PVT_DATA BIT_ULL(MV88E6XXX_CAP_G2_PVT_DATA) #define MV88E6XXX_FLAG_G2_POT BIT_ULL(MV88E6XXX_CAP_G2_POT) -#define MV88E6XXX_FLAG_G2_EEPROM_CMD BIT_ULL(MV88E6XXX_CAP_G2_EEPROM_CMD) -#define MV88E6XXX_FLAG_G2_EEPROM_DATA BIT_ULL(MV88E6XXX_CAP_G2_EEPROM_DATA) #define MV88E6XXX_FLAG_PPU BIT_ULL(MV88E6XXX_CAP_PPU) #define MV88E6XXX_FLAG_PPU_ACTIVE BIT_ULL(MV88E6XXX_CAP_PPU_ACTIVE) @@ -483,11 +479,6 @@ enum mv88e6xxx_cap { #define MV88E6XXX_FLAG_TEMP_LIMIT BIT_ULL(MV88E6XXX_CAP_TEMP_LIMIT) #define MV88E6XXX_FLAG_VTU BIT_ULL(MV88E6XXX_CAP_VTU) -/* EEPROM Programming via Global2 with 16-bit data */ -#define MV88E6XXX_FLAGS_EEPROM16 \ - (MV88E6XXX_FLAG_G2_EEPROM_CMD | \ - MV88E6XXX_FLAG_G2_EEPROM_DATA) - /* Ingress Rate Limit unit */ #define MV88E6XXX_FLAGS_IRL \ (MV88E6XXX_FLAG_G2_IRL_CMD | \ @@ -561,7 +552,6 @@ enum mv88e6xxx_cap { MV88E6XXX_FLAG_TEMP | \ MV88E6XXX_FLAG_TEMP_LIMIT | \ MV88E6XXX_FLAG_VTU | \ - MV88E6XXX_FLAGS_EEPROM16 | \ MV88E6XXX_FLAGS_IRL | \ MV88E6XXX_FLAGS_MULTI_CHIP | \ MV88E6XXX_FLAGS_PVT) @@ -596,7 +586,6 @@ enum mv88e6xxx_cap { MV88E6XXX_FLAG_TEMP | \ MV88E6XXX_FLAG_TEMP_LIMIT | \ MV88E6XXX_FLAG_VTU | \ - MV88E6XXX_FLAGS_EEPROM16 | \ MV88E6XXX_FLAGS_IRL | \ MV88E6XXX_FLAGS_MULTI_CHIP | \ MV88E6XXX_FLAGS_PVT | \ @@ -696,6 +685,11 @@ struct mv88e6xxx_bus_ops { }; struct mv88e6xxx_ops { + int (*get_eeprom)(struct mv88e6xxx_chip *chip, + struct ethtool_eeprom *eeprom, u8 *data); + int (*set_eeprom)(struct mv88e6xxx_chip *chip, + struct ethtool_eeprom *eeprom, u8 *data); + int (*set_switch_mac)(struct mv88e6xxx_chip *chip, u8 *addr); int (*phy_read)(struct mv88e6xxx_chip *chip, int addr, int reg, From 6348ef2dbbd96c4488a1ee83cc0f0f3d9a314a2f Mon Sep 17 00:00:00 2001 From: Jia He Date: Fri, 30 Sep 2016 11:28:58 +0800 Subject: [PATCH 1636/1715] net:snmp: Introduce generic interfaces for snmp_get_cpu_field{, 64} This is to introduce the generic interfaces for snmp_get_cpu_field{,64}. It exchanges the two for-loops for collecting the percpu statistics data. This can aggregate the data by going through all the items of each cpu sequentially. Signed-off-by: Jia He Suggested-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/ip.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/include/net/ip.h b/include/net/ip.h index 9742b92dc933..bc43c0fcae12 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -219,6 +219,29 @@ static inline u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_o } #endif +#define snmp_get_cpu_field64_batch(buff64, stats_list, mib_statistic, offset) \ +{ \ + int i, c; \ + for_each_possible_cpu(c) { \ + for (i = 0; stats_list[i].name; i++) \ + buff64[i] += snmp_get_cpu_field64( \ + mib_statistic, \ + c, stats_list[i].entry, \ + offset); \ + } \ +} + +#define snmp_get_cpu_field_batch(buff, stats_list, mib_statistic) \ +{ \ + int i, c; \ + for_each_possible_cpu(c) { \ + for (i = 0; stats_list[i].name; i++) \ + buff[i] += snmp_get_cpu_field( \ + mib_statistic, \ + c, stats_list[i].entry); \ + } \ +} + void inet_get_local_port_range(struct net *net, int *low, int *high); #ifdef CONFIG_SYSCTL From f22d5c490990ecb6f4eb70c4ed478fc8cea78fe1 Mon Sep 17 00:00:00 2001 From: Jia He Date: Fri, 30 Sep 2016 11:28:59 +0800 Subject: [PATCH 1637/1715] proc: Reduce cache miss in snmp_seq_show This is to use the generic interfaces snmp_get_cpu_field{,64}_batch to aggregate the data by going through all the items of each cpu sequentially. Then snmp_seq_show is split into 2 parts to avoid build warning "the frame size" larger than 1024. Signed-off-by: Jia He Signed-off-by: David S. Miller --- net/ipv4/proc.c | 70 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 47 insertions(+), 23 deletions(-) diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 1ed015e4bc79..f51fc8803154 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -46,6 +46,8 @@ #include #include +#define TCPUDP_MIB_MAX max_t(u32, UDP_MIB_MAX, TCP_MIB_MAX) + /* * Report socket allocation statistics [mea@utu.fi] */ @@ -379,13 +381,15 @@ static void icmp_put(struct seq_file *seq) /* * Called from the PROCfs module. This outputs /proc/net/snmp. */ -static int snmp_seq_show(struct seq_file *seq, void *v) +static int snmp_seq_show_ipstats(struct seq_file *seq, void *v) { - int i; struct net *net = seq->private; + u64 buff64[IPSTATS_MIB_MAX]; + int i; + + memset(buff64, 0, IPSTATS_MIB_MAX * sizeof(u64)); seq_puts(seq, "Ip: Forwarding DefaultTTL"); - for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) seq_printf(seq, " %s", snmp4_ipstats_list[i].name); @@ -394,57 +398,77 @@ static int snmp_seq_show(struct seq_file *seq, void *v) net->ipv4.sysctl_ip_default_ttl); BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0); + snmp_get_cpu_field64_batch(buff64, snmp4_ipstats_list, + net->mib.ip_statistics, + offsetof(struct ipstats_mib, syncp)); for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) - seq_printf(seq, " %llu", - snmp_fold_field64(net->mib.ip_statistics, - snmp4_ipstats_list[i].entry, - offsetof(struct ipstats_mib, syncp))); + seq_printf(seq, " %llu", buff64[i]); - icmp_put(seq); /* RFC 2011 compatibility */ - icmpmsg_put(seq); + return 0; +} + +static int snmp_seq_show_tcp_udp(struct seq_file *seq, void *v) +{ + unsigned long buff[TCPUDP_MIB_MAX]; + struct net *net = seq->private; + int i; + + memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long)); seq_puts(seq, "\nTcp:"); for (i = 0; snmp4_tcp_list[i].name != NULL; i++) seq_printf(seq, " %s", snmp4_tcp_list[i].name); seq_puts(seq, "\nTcp:"); + snmp_get_cpu_field_batch(buff, snmp4_tcp_list, + net->mib.tcp_statistics); for (i = 0; snmp4_tcp_list[i].name != NULL; i++) { /* MaxConn field is signed, RFC 2012 */ if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN) - seq_printf(seq, " %ld", - snmp_fold_field(net->mib.tcp_statistics, - snmp4_tcp_list[i].entry)); + seq_printf(seq, " %ld", buff[i]); else - seq_printf(seq, " %lu", - snmp_fold_field(net->mib.tcp_statistics, - snmp4_tcp_list[i].entry)); + seq_printf(seq, " %lu", buff[i]); } + memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long)); + + snmp_get_cpu_field_batch(buff, snmp4_udp_list, + net->mib.udp_statistics); seq_puts(seq, "\nUdp:"); for (i = 0; snmp4_udp_list[i].name != NULL; i++) seq_printf(seq, " %s", snmp4_udp_list[i].name); - seq_puts(seq, "\nUdp:"); for (i = 0; snmp4_udp_list[i].name != NULL; i++) - seq_printf(seq, " %lu", - snmp_fold_field(net->mib.udp_statistics, - snmp4_udp_list[i].entry)); + seq_printf(seq, " %lu", buff[i]); + + memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long)); /* the UDP and UDP-Lite MIBs are the same */ seq_puts(seq, "\nUdpLite:"); + snmp_get_cpu_field_batch(buff, snmp4_udp_list, + net->mib.udplite_statistics); for (i = 0; snmp4_udp_list[i].name != NULL; i++) seq_printf(seq, " %s", snmp4_udp_list[i].name); - seq_puts(seq, "\nUdpLite:"); for (i = 0; snmp4_udp_list[i].name != NULL; i++) - seq_printf(seq, " %lu", - snmp_fold_field(net->mib.udplite_statistics, - snmp4_udp_list[i].entry)); + seq_printf(seq, " %lu", buff[i]); seq_putc(seq, '\n'); return 0; } +static int snmp_seq_show(struct seq_file *seq, void *v) +{ + snmp_seq_show_ipstats(seq, v); + + icmp_put(seq); /* RFC 2011 compatibility */ + icmpmsg_put(seq); + + snmp_seq_show_tcp_udp(seq, v); + + return 0; +} + static int snmp_seq_open(struct inode *inode, struct file *file) { return single_open_net(inode, file, snmp_seq_show); From 4a4857b1c81ef39a9dc719af6b498cd39d1c1eb0 Mon Sep 17 00:00:00 2001 From: Jia He Date: Fri, 30 Sep 2016 11:29:00 +0800 Subject: [PATCH 1638/1715] proc: Reduce cache miss in snmp6_seq_show This is to use the generic interfaces snmp_get_cpu_field{,64}_batch to aggregate the data by going through all the items of each cpu sequentially. Signed-off-by: Jia He Signed-off-by: David S. Miller --- net/ipv6/proc.c | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 679253d0af84..cc8e3ae9ca73 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -30,6 +30,11 @@ #include #include +#define MAX4(a, b, c, d) \ + max_t(u32, max_t(u32, a, b), max_t(u32, c, d)) +#define SNMP_MIB_MAX MAX4(UDP_MIB_MAX, TCP_MIB_MAX, \ + IPSTATS_MIB_MAX, ICMP_MIB_MAX) + static int sockstat6_seq_show(struct seq_file *seq, void *v) { struct net *net = seq->private; @@ -191,25 +196,34 @@ static void snmp6_seq_show_item(struct seq_file *seq, void __percpu *pcpumib, atomic_long_t *smib, const struct snmp_mib *itemlist) { + unsigned long buff[SNMP_MIB_MAX]; int i; - unsigned long val; - for (i = 0; itemlist[i].name; i++) { - val = pcpumib ? - snmp_fold_field(pcpumib, itemlist[i].entry) : - atomic_long_read(smib + itemlist[i].entry); - seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name, val); + if (pcpumib) { + memset(buff, 0, sizeof(unsigned long) * SNMP_MIB_MAX); + + snmp_get_cpu_field_batch(buff, itemlist, pcpumib); + for (i = 0; itemlist[i].name; i++) + seq_printf(seq, "%-32s\t%lu\n", + itemlist[i].name, buff[i]); + } else { + for (i = 0; itemlist[i].name; i++) + seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name, + atomic_long_read(smib + itemlist[i].entry)); } } static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu *mib, const struct snmp_mib *itemlist, size_t syncpoff) { + u64 buff64[SNMP_MIB_MAX]; int i; + memset(buff64, 0, sizeof(unsigned long) * SNMP_MIB_MAX); + + snmp_get_cpu_field64_batch(buff64, itemlist, mib, syncpoff); for (i = 0; itemlist[i].name; i++) - seq_printf(seq, "%-32s\t%llu\n", itemlist[i].name, - snmp_fold_field64(mib, itemlist[i].entry, syncpoff)); + seq_printf(seq, "%-32s\t%llu\n", itemlist[i].name, buff64[i]); } static int snmp6_seq_show(struct seq_file *seq, void *v) From 7d64a94be2f9fadb1dd95742650f1fdbac69f25b Mon Sep 17 00:00:00 2001 From: Jia He Date: Fri, 30 Sep 2016 11:29:01 +0800 Subject: [PATCH 1639/1715] proc: Reduce cache miss in sctp_snmp_seq_show This is to use the generic interfaces snmp_get_cpu_field{,64}_batch to aggregate the data by going through all the items of each cpu sequentially. Signed-off-by: Jia He Signed-off-by: David S. Miller --- net/sctp/proc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/sctp/proc.c b/net/sctp/proc.c index ef8ba77a5bea..09e16c2b5c17 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -73,13 +73,17 @@ static const struct snmp_mib sctp_snmp_list[] = { /* Display sctp snmp mib statistics(/proc/net/sctp/snmp). */ static int sctp_snmp_seq_show(struct seq_file *seq, void *v) { + unsigned long buff[SCTP_MIB_MAX]; struct net *net = seq->private; int i; + memset(buff, 0, sizeof(unsigned long) * SCTP_MIB_MAX); + + snmp_get_cpu_field_batch(buff, sctp_snmp_list, + net->sctp.sctp_statistics); for (i = 0; sctp_snmp_list[i].name != NULL; i++) seq_printf(seq, "%-32s\t%ld\n", sctp_snmp_list[i].name, - snmp_fold_field(net->sctp.sctp_statistics, - sctp_snmp_list[i].entry)); + buff[i]); return 0; } From 07613873f1731aa47fdf06a1cbd2e3cd1974c026 Mon Sep 17 00:00:00 2001 From: Jia He Date: Fri, 30 Sep 2016 11:29:02 +0800 Subject: [PATCH 1640/1715] proc: Reduce cache miss in xfrm_statistics_seq_show This is to use the generic interfaces snmp_get_cpu_field{,64}_batch to aggregate the data by going through all the items of each cpu sequentially. Signed-off-by: Jia He Signed-off-by: David S. Miller --- net/xfrm/xfrm_proc.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c index 9c4fbd8935f4..ba2b539879bc 100644 --- a/net/xfrm/xfrm_proc.c +++ b/net/xfrm/xfrm_proc.c @@ -50,12 +50,18 @@ static const struct snmp_mib xfrm_mib_list[] = { static int xfrm_statistics_seq_show(struct seq_file *seq, void *v) { + unsigned long buff[LINUX_MIB_XFRMMAX]; struct net *net = seq->private; int i; + + memset(buff, 0, sizeof(unsigned long) * LINUX_MIB_XFRMMAX); + + snmp_get_cpu_field_batch(buff, xfrm_mib_list, + net->mib.xfrm_statistics); for (i = 0; xfrm_mib_list[i].name; i++) seq_printf(seq, "%-24s\t%lu\n", xfrm_mib_list[i].name, - snmp_fold_field(net->mib.xfrm_statistics, - xfrm_mib_list[i].entry)); + buff[i]); + return 0; } From aca05671d58cea06dc60bbe554b8b399af7da409 Mon Sep 17 00:00:00 2001 From: Jia He Date: Fri, 30 Sep 2016 11:29:03 +0800 Subject: [PATCH 1641/1715] ipv6: Remove useless parameter in __snmp6_fill_statsdev The parameter items(is always ICMP6_MIB_MAX) is useless for __snmp6_fill_statsdev Signed-off-by: Jia He Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 2f1f5d439788..35d4baa55c9d 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4961,18 +4961,18 @@ static inline size_t inet6_if_nlmsg_size(void) } static inline void __snmp6_fill_statsdev(u64 *stats, atomic_long_t *mib, - int items, int bytes) + int bytes) { int i; - int pad = bytes - sizeof(u64) * items; + int pad = bytes - sizeof(u64) * ICMP6_MIB_MAX; BUG_ON(pad < 0); /* Use put_unaligned() because stats may not be aligned for u64. */ - put_unaligned(items, &stats[0]); - for (i = 1; i < items; i++) + put_unaligned(ICMP6_MIB_MAX, &stats[0]); + for (i = 1; i < ICMP6_MIB_MAX; i++) put_unaligned(atomic_long_read(&mib[i]), &stats[i]); - memset(&stats[items], 0, pad); + memset(&stats[ICMP6_MIB_MAX], 0, pad); } static inline void __snmp6_fill_stats64(u64 *stats, void __percpu *mib, @@ -5005,7 +5005,7 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, offsetof(struct ipstats_mib, syncp)); break; case IFLA_INET6_ICMP6STATS: - __snmp6_fill_statsdev(stats, idev->stats.icmpv6dev->mibs, ICMP6_MIB_MAX, bytes); + __snmp6_fill_statsdev(stats, idev->stats.icmpv6dev->mibs, bytes); break; } } From 6d4a741cbbfa6612a479656654ca5edf7becc72c Mon Sep 17 00:00:00 2001 From: Jia He Date: Fri, 30 Sep 2016 11:29:04 +0800 Subject: [PATCH 1642/1715] net: Suppress the "Comparison to NULL could be written" warnings This is to suppress the checkpatch.pl warning "Comparison to NULL could be written". No functional changes here. Signed-off-by: Jia He Signed-off-by: David S. Miller --- net/ipv4/proc.c | 32 ++++++++++++++++---------------- net/sctp/proc.c | 2 +- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index f51fc8803154..7143ca1a6af9 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -358,22 +358,22 @@ static void icmp_put(struct seq_file *seq) atomic_long_t *ptr = net->mib.icmpmsg_statistics->mibs; seq_puts(seq, "\nIcmp: InMsgs InErrors InCsumErrors"); - for (i = 0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name; i++) seq_printf(seq, " In%s", icmpmibmap[i].name); seq_puts(seq, " OutMsgs OutErrors"); - for (i = 0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name; i++) seq_printf(seq, " Out%s", icmpmibmap[i].name); seq_printf(seq, "\nIcmp: %lu %lu %lu", snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INMSGS), snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INERRORS), snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_CSUMERRORS)); - for (i = 0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name; i++) seq_printf(seq, " %lu", atomic_long_read(ptr + icmpmibmap[i].index)); seq_printf(seq, " %lu %lu", snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTMSGS), snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTERRORS)); - for (i = 0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name; i++) seq_printf(seq, " %lu", atomic_long_read(ptr + (icmpmibmap[i].index | 0x100))); } @@ -390,7 +390,7 @@ static int snmp_seq_show_ipstats(struct seq_file *seq, void *v) memset(buff64, 0, IPSTATS_MIB_MAX * sizeof(u64)); seq_puts(seq, "Ip: Forwarding DefaultTTL"); - for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) + for (i = 0; snmp4_ipstats_list[i].name; i++) seq_printf(seq, " %s", snmp4_ipstats_list[i].name); seq_printf(seq, "\nIp: %d %d", @@ -401,7 +401,7 @@ static int snmp_seq_show_ipstats(struct seq_file *seq, void *v) snmp_get_cpu_field64_batch(buff64, snmp4_ipstats_list, net->mib.ip_statistics, offsetof(struct ipstats_mib, syncp)); - for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) + for (i = 0; snmp4_ipstats_list[i].name; i++) seq_printf(seq, " %llu", buff64[i]); return 0; @@ -416,13 +416,13 @@ static int snmp_seq_show_tcp_udp(struct seq_file *seq, void *v) memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long)); seq_puts(seq, "\nTcp:"); - for (i = 0; snmp4_tcp_list[i].name != NULL; i++) + for (i = 0; snmp4_tcp_list[i].name; i++) seq_printf(seq, " %s", snmp4_tcp_list[i].name); seq_puts(seq, "\nTcp:"); snmp_get_cpu_field_batch(buff, snmp4_tcp_list, net->mib.tcp_statistics); - for (i = 0; snmp4_tcp_list[i].name != NULL; i++) { + for (i = 0; snmp4_tcp_list[i].name; i++) { /* MaxConn field is signed, RFC 2012 */ if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN) seq_printf(seq, " %ld", buff[i]); @@ -435,10 +435,10 @@ static int snmp_seq_show_tcp_udp(struct seq_file *seq, void *v) snmp_get_cpu_field_batch(buff, snmp4_udp_list, net->mib.udp_statistics); seq_puts(seq, "\nUdp:"); - for (i = 0; snmp4_udp_list[i].name != NULL; i++) + for (i = 0; snmp4_udp_list[i].name; i++) seq_printf(seq, " %s", snmp4_udp_list[i].name); seq_puts(seq, "\nUdp:"); - for (i = 0; snmp4_udp_list[i].name != NULL; i++) + for (i = 0; snmp4_udp_list[i].name; i++) seq_printf(seq, " %lu", buff[i]); memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long)); @@ -447,10 +447,10 @@ static int snmp_seq_show_tcp_udp(struct seq_file *seq, void *v) seq_puts(seq, "\nUdpLite:"); snmp_get_cpu_field_batch(buff, snmp4_udp_list, net->mib.udplite_statistics); - for (i = 0; snmp4_udp_list[i].name != NULL; i++) + for (i = 0; snmp4_udp_list[i].name; i++) seq_printf(seq, " %s", snmp4_udp_list[i].name); seq_puts(seq, "\nUdpLite:"); - for (i = 0; snmp4_udp_list[i].name != NULL; i++) + for (i = 0; snmp4_udp_list[i].name; i++) seq_printf(seq, " %lu", buff[i]); seq_putc(seq, '\n'); @@ -493,21 +493,21 @@ static int netstat_seq_show(struct seq_file *seq, void *v) struct net *net = seq->private; seq_puts(seq, "TcpExt:"); - for (i = 0; snmp4_net_list[i].name != NULL; i++) + for (i = 0; snmp4_net_list[i].name; i++) seq_printf(seq, " %s", snmp4_net_list[i].name); seq_puts(seq, "\nTcpExt:"); - for (i = 0; snmp4_net_list[i].name != NULL; i++) + for (i = 0; snmp4_net_list[i].name; i++) seq_printf(seq, " %lu", snmp_fold_field(net->mib.net_statistics, snmp4_net_list[i].entry)); seq_puts(seq, "\nIpExt:"); - for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++) + for (i = 0; snmp4_ipextstats_list[i].name; i++) seq_printf(seq, " %s", snmp4_ipextstats_list[i].name); seq_puts(seq, "\nIpExt:"); - for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++) + for (i = 0; snmp4_ipextstats_list[i].name; i++) seq_printf(seq, " %llu", snmp_fold_field64(net->mib.ip_statistics, snmp4_ipextstats_list[i].entry, diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 09e16c2b5c17..206377fe91ec 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -81,7 +81,7 @@ static int sctp_snmp_seq_show(struct seq_file *seq, void *v) snmp_get_cpu_field_batch(buff, sctp_snmp_list, net->sctp.sctp_statistics); - for (i = 0; sctp_snmp_list[i].name != NULL; i++) + for (i = 0; sctp_snmp_list[i].name; i++) seq_printf(seq, "%-32s\t%ld\n", sctp_snmp_list[i].name, buff[i]); From bd11f0741fa5a2c296629898ad07759dd12b35bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Tue, 27 Sep 2016 23:57:58 -0700 Subject: [PATCH 1643/1715] ipv6 addrconf: implement RFC7559 router solicitation backoff MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This implements: https://tools.ietf.org/html/rfc7559 Backoff is performed according to RFC3315 section 14: https://tools.ietf.org/html/rfc3315#section-14 We allow setting /proc/sys/net/ipv6/conf/*/router_solicitations to a negative value meaning an unlimited number of retransmits, and we make this the new default (inline with the RFC). We also add a new setting: /proc/sys/net/ipv6/conf/*/router_solicitation_max_interval defaulting to 1 hour (per RFC recommendation). Signed-off-by: Maciej Żenczykowski Acked-by: Erik Kline Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 + include/net/addrconf.h | 3 ++- include/net/if_inet6.h | 1 + include/uapi/linux/ipv6.h | 1 + net/ipv6/addrconf.c | 51 +++++++++++++++++++++++++++++++++------ 5 files changed, 49 insertions(+), 8 deletions(-) diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index c6dbcd84a2c7..7e9a789be5e0 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -18,6 +18,7 @@ struct ipv6_devconf { __s32 dad_transmits; __s32 rtr_solicits; __s32 rtr_solicit_interval; + __s32 rtr_solicit_max_interval; __s32 rtr_solicit_delay; __s32 force_mld_version; __s32 mldv1_unsolicited_report_interval; diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 9826d3a9464c..f2d072787947 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -1,8 +1,9 @@ #ifndef _ADDRCONF_H #define _ADDRCONF_H -#define MAX_RTR_SOLICITATIONS 3 +#define MAX_RTR_SOLICITATIONS -1 /* unlimited */ #define RTR_SOLICITATION_INTERVAL (4*HZ) +#define RTR_SOLICITATION_MAX_INTERVAL (3600*HZ) /* 1 hour */ #define MIN_VALID_LIFETIME (2*3600) /* 2 hours */ diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index 1c8b6820b694..515352c6280a 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -201,6 +201,7 @@ struct inet6_dev { struct ipv6_devstat stats; struct timer_list rs_timer; + __s32 rs_interval; /* in jiffies */ __u8 rs_probes; __u8 addr_gen_mode; diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 395876060f50..8c2772340c3f 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -177,6 +177,7 @@ enum { DEVCONF_DROP_UNICAST_IN_L2_MULTICAST, DEVCONF_DROP_UNSOLICITED_NA, DEVCONF_KEEP_ADDR_ON_DOWN, + DEVCONF_RTR_SOLICIT_MAX_INTERVAL, DEVCONF_MAX }; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 35d4baa55c9d..87183983724d 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -112,6 +112,27 @@ static inline u32 cstamp_delta(unsigned long cstamp) return (cstamp - INITIAL_JIFFIES) * 100UL / HZ; } +static inline s32 rfc3315_s14_backoff_init(s32 irt) +{ + /* multiply 'initial retransmission time' by 0.9 .. 1.1 */ + u64 tmp = (900000 + prandom_u32() % 200001) * (u64)irt; + do_div(tmp, 1000000); + return (s32)tmp; +} + +static inline s32 rfc3315_s14_backoff_update(s32 rt, s32 mrt) +{ + /* multiply 'retransmission timeout' by 1.9 .. 2.1 */ + u64 tmp = (1900000 + prandom_u32() % 200001) * (u64)rt; + do_div(tmp, 1000000); + if ((s32)tmp > mrt) { + /* multiply 'maximum retransmission time' by 0.9 .. 1.1 */ + tmp = (900000 + prandom_u32() % 200001) * (u64)mrt; + do_div(tmp, 1000000); + } + return (s32)tmp; +} + #ifdef CONFIG_SYSCTL static int addrconf_sysctl_register(struct inet6_dev *idev); static void addrconf_sysctl_unregister(struct inet6_dev *idev); @@ -187,6 +208,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .dad_transmits = 1, .rtr_solicits = MAX_RTR_SOLICITATIONS, .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL, + .rtr_solicit_max_interval = RTR_SOLICITATION_MAX_INTERVAL, .rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY, .use_tempaddr = 0, .temp_valid_lft = TEMP_VALID_LIFETIME, @@ -232,6 +254,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .dad_transmits = 1, .rtr_solicits = MAX_RTR_SOLICITATIONS, .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL, + .rtr_solicit_max_interval = RTR_SOLICITATION_MAX_INTERVAL, .rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY, .use_tempaddr = 0, .temp_valid_lft = TEMP_VALID_LIFETIME, @@ -3687,7 +3710,7 @@ static void addrconf_rs_timer(unsigned long data) if (idev->if_flags & IF_RA_RCVD) goto out; - if (idev->rs_probes++ < idev->cnf.rtr_solicits) { + if (idev->rs_probes++ < idev->cnf.rtr_solicits || idev->cnf.rtr_solicits < 0) { write_unlock(&idev->lock); if (!ipv6_get_lladdr(dev, &lladdr, IFA_F_TENTATIVE)) ndisc_send_rs(dev, &lladdr, @@ -3696,11 +3719,13 @@ static void addrconf_rs_timer(unsigned long data) goto put; write_lock(&idev->lock); + idev->rs_interval = rfc3315_s14_backoff_update( + idev->rs_interval, idev->cnf.rtr_solicit_max_interval); /* The wait after the last probe can be shorter */ addrconf_mod_rs_timer(idev, (idev->rs_probes == idev->cnf.rtr_solicits) ? idev->cnf.rtr_solicit_delay : - idev->cnf.rtr_solicit_interval); + idev->rs_interval); } else { /* * Note: we do not support deprecated "all on-link" @@ -3949,7 +3974,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) send_mld = ifp->scope == IFA_LINK && ipv6_lonely_lladdr(ifp); send_rs = send_mld && ipv6_accept_ra(ifp->idev) && - ifp->idev->cnf.rtr_solicits > 0 && + ifp->idev->cnf.rtr_solicits != 0 && (dev->flags&IFF_LOOPBACK) == 0; read_unlock_bh(&ifp->idev->lock); @@ -3971,10 +3996,11 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) write_lock_bh(&ifp->idev->lock); spin_lock(&ifp->lock); + ifp->idev->rs_interval = rfc3315_s14_backoff_init( + ifp->idev->cnf.rtr_solicit_interval); ifp->idev->rs_probes = 1; ifp->idev->if_flags |= IF_RS_SENT; - addrconf_mod_rs_timer(ifp->idev, - ifp->idev->cnf.rtr_solicit_interval); + addrconf_mod_rs_timer(ifp->idev, ifp->idev->rs_interval); spin_unlock(&ifp->lock); write_unlock_bh(&ifp->idev->lock); } @@ -4891,6 +4917,8 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_RTR_SOLICITS] = cnf->rtr_solicits; array[DEVCONF_RTR_SOLICIT_INTERVAL] = jiffies_to_msecs(cnf->rtr_solicit_interval); + array[DEVCONF_RTR_SOLICIT_MAX_INTERVAL] = + jiffies_to_msecs(cnf->rtr_solicit_max_interval); array[DEVCONF_RTR_SOLICIT_DELAY] = jiffies_to_msecs(cnf->rtr_solicit_delay); array[DEVCONF_FORCE_MLD_VERSION] = cnf->force_mld_version; @@ -5099,7 +5127,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) return -EINVAL; if (!ipv6_accept_ra(idev)) return -EINVAL; - if (idev->cnf.rtr_solicits <= 0) + if (idev->cnf.rtr_solicits == 0) return -EINVAL; write_lock_bh(&idev->lock); @@ -5128,8 +5156,10 @@ update_lft: if (update_rs) { idev->if_flags |= IF_RS_SENT; + idev->rs_interval = rfc3315_s14_backoff_init( + idev->cnf.rtr_solicit_interval); idev->rs_probes = 1; - addrconf_mod_rs_timer(idev, idev->cnf.rtr_solicit_interval); + addrconf_mod_rs_timer(idev, idev->rs_interval); } /* Well, that's kinda nasty ... */ @@ -5777,6 +5807,13 @@ static const struct ctl_table addrconf_sysctl[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, + { + .procname = "router_solicitation_max_interval", + .data = &ipv6_devconf.rtr_solicit_max_interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, { .procname = "router_solicitation_delay", .data = &ipv6_devconf.rtr_solicit_delay, From 5038056e6bd45788235e97e3bcfc43f96c52ca84 Mon Sep 17 00:00:00 2001 From: David Decotigny Date: Wed, 28 Sep 2016 11:00:04 -0700 Subject: [PATCH 1644/1715] mlx4: remove unused fields This also can address following UBSAN warnings: [ 36.640343] ================================================================================ [ 36.648772] UBSAN: Undefined behaviour in drivers/net/ethernet/mellanox/mlx4/fw.c:857:26 [ 36.656853] shift exponent 64 is too large for 32-bit type 'int' [ 36.663348] ================================================================================ [ 36.671783] ================================================================================ [ 36.680213] UBSAN: Undefined behaviour in drivers/net/ethernet/mellanox/mlx4/fw.c:861:27 [ 36.688297] shift exponent 35 is too large for 32-bit type 'int' [ 36.694702] ================================================================================ Tested: reboot with UBSAN, no warning. Signed-off-by: David Decotigny Acked-by: Eric Dumazet Reviewed-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/fw.c | 4 ---- drivers/net/ethernet/mellanox/mlx4/fw.h | 2 -- 2 files changed, 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 090bf81076e8..f9cbc67f1694 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -853,12 +853,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev_cap->max_eqs = 1 << (field & 0xf); MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MTT_OFFSET); dev_cap->reserved_mtts = 1 << (field >> 4); - MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MRW_SZ_OFFSET); - dev_cap->max_mrw_sz = 1 << field; MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MRW_OFFSET); dev_cap->reserved_mrws = 1 << (field & 0xf); - MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MTT_SEG_OFFSET); - dev_cap->max_mtt_seg = 1 << (field & 0x3f); MLX4_GET(size, outbox, QUERY_DEV_CAP_NUM_SYS_EQ_OFFSET); dev_cap->num_sys_eqs = size & 0xfff; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_REQ_QP_OFFSET); diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index f11614f12517..5343a0599253 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -80,9 +80,7 @@ struct mlx4_dev_cap { int max_eqs; int num_sys_eqs; int reserved_mtts; - int max_mrw_sz; int reserved_mrws; - int max_mtt_seg; int max_requester_per_qp; int max_responder_per_qp; int max_rdma_global; From 6cd80b55471515e0102760b59034ded000c35a09 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 28 Sep 2016 23:47:37 +0100 Subject: [PATCH 1645/1715] nfp: bpf: zero extend 4 byte context loads Set upper 32 bits of destination register to zeros after load from the context structure. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c index 3de819acd68c..f8df5300f49c 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c @@ -1134,6 +1134,8 @@ static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) else return -ENOTSUPP; + wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); + return 0; } From 803783849fed11e38a30f31932c02c815520da70 Mon Sep 17 00:00:00 2001 From: Calvin Owens Date: Wed, 28 Sep 2016 21:46:39 -0700 Subject: [PATCH 1646/1715] mlx5: Add ndo_poll_controller() implementation This implements ndo_poll_controller in net_device_ops callbacks for mlx5, which is necessary to use netconsole with this driver. Acked-By: Saeed Mahameed Signed-off-by: Calvin Owens Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/en_main.c | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index b58cfe37dead..7eaf38020a8f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3188,6 +3188,20 @@ static int mlx5e_xdp(struct net_device *dev, struct netdev_xdp *xdp) } } +#ifdef CONFIG_NET_POLL_CONTROLLER +/* Fake "interrupt" called by netpoll (eg netconsole) to send skbs without + * reenabling interrupts. + */ +static void mlx5e_netpoll(struct net_device *dev) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + int i; + + for (i = 0; i < priv->params.num_channels; i++) + napi_schedule(&priv->channel[i]->napi); +} +#endif + static const struct net_device_ops mlx5e_netdev_ops_basic = { .ndo_open = mlx5e_open, .ndo_stop = mlx5e_close, @@ -3208,6 +3222,9 @@ static const struct net_device_ops mlx5e_netdev_ops_basic = { #endif .ndo_tx_timeout = mlx5e_tx_timeout, .ndo_xdp = mlx5e_xdp, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = mlx5e_netpoll, +#endif }; static const struct net_device_ops mlx5e_netdev_ops_sriov = { @@ -3240,6 +3257,9 @@ static const struct net_device_ops mlx5e_netdev_ops_sriov = { .ndo_get_vf_stats = mlx5e_get_vf_stats, .ndo_tx_timeout = mlx5e_tx_timeout, .ndo_xdp = mlx5e_xdp, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = mlx5e_netpoll, +#endif }; static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev) From 265a44bbf23e4ed1c76409f07595ea88351ba4b3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 30 Sep 2016 08:50:42 +0100 Subject: [PATCH 1647/1715] rxrpc: Actually display the tx_data trace retransmission note Actually display in the tx_data trace the retransmission note added in a previous patch. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 8ba8d76e856a..67f03946ea4a 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -280,11 +280,12 @@ TRACE_EVENT(rxrpc_tx_data, __entry->lose = lose; ), - TP_printk("c=%p DATA %08x q=%08x fl=%02x%s", + TP_printk("c=%p DATA %08x q=%08x fl=%02x%s%s", __entry->call, __entry->serial, __entry->seq, __entry->flags, + __entry->retrans ? " *RETRANS*" : "", __entry->lose ? " *LOSE*" : "") ); From e0e2effff5e19eba07de1ee1c95ba0588a7b3330 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Thu, 22 Sep 2016 19:04:19 +0200 Subject: [PATCH 1648/1715] mac80211: Move ieee802111_tx_dequeue() to later in tx.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The TXQ path restructure requires ieee80211_tx_dequeue() to call TX handlers and parts of the xmit_fast path. Move the function to later in tx.c in preparation for this. Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 90 +++++++++++++++++++++++------------------------ 1 file changed, 45 insertions(+), 45 deletions(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 61d302d97145..e8c996463b11 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1476,51 +1476,6 @@ void ieee80211_txq_teardown_flows(struct ieee80211_local *local) spin_unlock_bh(&fq->lock); } -struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, - struct ieee80211_txq *txq) -{ - struct ieee80211_local *local = hw_to_local(hw); - struct txq_info *txqi = container_of(txq, struct txq_info, txq); - struct ieee80211_hdr *hdr; - struct sk_buff *skb = NULL; - struct fq *fq = &local->fq; - struct fq_tin *tin = &txqi->tin; - - spin_lock_bh(&fq->lock); - - if (test_bit(IEEE80211_TXQ_STOP, &txqi->flags)) - goto out; - - skb = fq_tin_dequeue(fq, tin, fq_tin_dequeue_func); - if (!skb) - goto out; - - ieee80211_set_skb_vif(skb, txqi); - - hdr = (struct ieee80211_hdr *)skb->data; - if (txq->sta && ieee80211_is_data_qos(hdr->frame_control)) { - struct sta_info *sta = container_of(txq->sta, struct sta_info, - sta); - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - - hdr->seq_ctrl = ieee80211_tx_next_seq(sta, txq->tid); - if (test_bit(IEEE80211_TXQ_AMPDU, &txqi->flags)) - info->flags |= IEEE80211_TX_CTL_AMPDU; - else - info->flags &= ~IEEE80211_TX_CTL_AMPDU; - } - -out: - spin_unlock_bh(&fq->lock); - - if (skb && skb_has_frag_list(skb) && - !ieee80211_hw_check(&local->hw, TX_FRAG_LIST)) - skb_linearize(skb); - - return skb; -} -EXPORT_SYMBOL(ieee80211_tx_dequeue); - static bool ieee80211_tx_frags(struct ieee80211_local *local, struct ieee80211_vif *vif, struct ieee80211_sta *sta, @@ -3311,6 +3266,51 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, return true; } +struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, + struct ieee80211_txq *txq) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct txq_info *txqi = container_of(txq, struct txq_info, txq); + struct ieee80211_hdr *hdr; + struct sk_buff *skb = NULL; + struct fq *fq = &local->fq; + struct fq_tin *tin = &txqi->tin; + + spin_lock_bh(&fq->lock); + + if (test_bit(IEEE80211_TXQ_STOP, &txqi->flags)) + goto out; + + skb = fq_tin_dequeue(fq, tin, fq_tin_dequeue_func); + if (!skb) + goto out; + + ieee80211_set_skb_vif(skb, txqi); + + hdr = (struct ieee80211_hdr *)skb->data; + if (txq->sta && ieee80211_is_data_qos(hdr->frame_control)) { + struct sta_info *sta = container_of(txq->sta, struct sta_info, + sta); + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + + hdr->seq_ctrl = ieee80211_tx_next_seq(sta, txq->tid); + if (test_bit(IEEE80211_TXQ_AMPDU, &txqi->flags)) + info->flags |= IEEE80211_TX_CTL_AMPDU; + else + info->flags &= ~IEEE80211_TX_CTL_AMPDU; + } + +out: + spin_unlock_bh(&fq->lock); + + if (skb && skb_has_frag_list(skb) && + !ieee80211_hw_check(&local->hw, TX_FRAG_LIST)) + skb_linearize(skb); + + return skb; +} +EXPORT_SYMBOL(ieee80211_tx_dequeue); + void __ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev, u32 info_flags) From b8676221f00dd5b6018f0fd88cd278f93e11143a Mon Sep 17 00:00:00 2001 From: David Spinadel Date: Thu, 22 Sep 2016 23:16:50 +0300 Subject: [PATCH 1649/1715] cfg80211: Add support for static WEP in the driver Add support for drivers that implement static WEP internally, i.e. expose connection keys to the driver in connect flow and don't upload the keys after the connection. Signed-off-by: David Spinadel Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 12 +++++++++++- net/wireless/core.h | 4 ++-- net/wireless/ibss.c | 5 +++-- net/wireless/sme.c | 6 +++++- net/wireless/util.c | 2 +- net/wireless/wext-compat.c | 2 +- net/wireless/wext-sme.c | 2 +- 7 files changed, 24 insertions(+), 9 deletions(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index ed37304fa09d..68dca3d93b85 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5,7 +5,7 @@ * * Copyright 2006-2010 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright 2015 Intel Deutschland GmbH + * Copyright 2015-2016 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -593,6 +593,8 @@ struct survey_info { s8 noise; }; +#define CFG80211_MAX_WEP_KEYS 4 + /** * struct cfg80211_crypto_settings - Crypto settings * @wpa_versions: indicates which, if any, WPA versions are enabled @@ -610,6 +612,9 @@ struct survey_info { * allowed through even on unauthorized ports * @control_port_no_encrypt: TRUE to prevent encryption of control port * protocol frames. + * @wep_keys: static WEP keys, if not NULL points to an array of + * CFG80211_MAX_WEP_KEYS WEP keys + * @wep_tx_key: key index (0..3) of the default TX static WEP key */ struct cfg80211_crypto_settings { u32 wpa_versions; @@ -621,6 +626,8 @@ struct cfg80211_crypto_settings { bool control_port; __be16 control_port_ethertype; bool control_port_no_encrypt; + struct key_params *wep_keys; + int wep_tx_key; }; /** @@ -2905,6 +2912,8 @@ struct cfg80211_ops { * @WIPHY_FLAG_SUPPORTS_5_10_MHZ: Device supports 5 MHz and 10 MHz channels. * @WIPHY_FLAG_HAS_CHANNEL_SWITCH: Device supports channel switch in * beaconing mode (AP, IBSS, Mesh, ...). + * @WIPHY_FLAG_HAS_STATIC_WEP: The device supports static WEP key installation + * before connection. */ enum wiphy_flags { /* use hole at 0 */ @@ -2930,6 +2939,7 @@ enum wiphy_flags { WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL = BIT(21), WIPHY_FLAG_SUPPORTS_5_10_MHZ = BIT(22), WIPHY_FLAG_HAS_CHANNEL_SWITCH = BIT(23), + WIPHY_FLAG_HAS_STATIC_WEP = BIT(24), }; /** diff --git a/net/wireless/core.h b/net/wireless/core.h index 5555e3c13ae9..554f87d0f991 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -249,8 +249,8 @@ struct cfg80211_event { }; struct cfg80211_cached_keys { - struct key_params params[4]; - u8 data[4][WLAN_KEY_LEN_WEP104]; + struct key_params params[CFG80211_MAX_WEP_KEYS]; + u8 data[CFG80211_MAX_WEP_KEYS][WLAN_KEY_LEN_WEP104]; int def; }; diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index eafdfa5798ae..364f900a3dc4 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -43,7 +43,8 @@ void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, cfg80211_hold_bss(bss_from_pub(bss)); wdev->current_bss = bss_from_pub(bss); - cfg80211_upload_connect_keys(wdev); + if (!(wdev->wiphy->flags & WIPHY_FLAG_HAS_STATIC_WEP)) + cfg80211_upload_connect_keys(wdev); nl80211_send_ibss_bssid(wiphy_to_rdev(wdev->wiphy), dev, bssid, GFP_KERNEL); @@ -296,7 +297,7 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, ck = kmemdup(wdev->wext.keys, sizeof(*ck), GFP_KERNEL); if (!ck) return -ENOMEM; - for (i = 0; i < 4; i++) + for (i = 0; i < CFG80211_MAX_WEP_KEYS; i++) ck->params[i].key = ck->data[i]; } err = __cfg80211_join_ibss(rdev, wdev->netdev, diff --git a/net/wireless/sme.c b/net/wireless/sme.c index c08a3b57dca1..a77db333927e 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -726,7 +726,8 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, wdev->current_bss = bss_from_pub(bss); - cfg80211_upload_connect_keys(wdev); + if (!(wdev->wiphy->flags & WIPHY_FLAG_HAS_STATIC_WEP)) + cfg80211_upload_connect_keys(wdev); rcu_read_lock(); country_ie = ieee80211_bss_get_ie(bss, WLAN_EID_COUNTRY); @@ -1043,6 +1044,9 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev, connect->crypto.ciphers_pairwise[0] = cipher; } } + + connect->crypto.wep_keys = connkeys->params; + connect->crypto.wep_tx_key = connkeys->def; } else { if (WARN_ON(connkeys)) return -EINVAL; diff --git a/net/wireless/util.c b/net/wireless/util.c index 9e6e2aaa7766..e02141d66b69 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -912,7 +912,7 @@ void cfg80211_upload_connect_keys(struct wireless_dev *wdev) if (!wdev->connect_keys) return; - for (i = 0; i < 4; i++) { + for (i = 0; i < CFG80211_MAX_WEP_KEYS; i++) { if (!wdev->connect_keys->params[i].cipher) continue; if (rdev_add_key(rdev, dev, i, false, NULL, diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 2b096c02eb85..a220156cf217 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -415,7 +415,7 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, GFP_KERNEL); if (!wdev->wext.keys) return -ENOMEM; - for (i = 0; i < 4; i++) + for (i = 0; i < CFG80211_MAX_WEP_KEYS; i++) wdev->wext.keys->params[i].key = wdev->wext.keys->data[i]; } diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index 88f1f6931ab8..995163830a61 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -46,7 +46,7 @@ int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev, ck = kmemdup(wdev->wext.keys, sizeof(*ck), GFP_KERNEL); if (!ck) return -ENOMEM; - for (i = 0; i < 4; i++) + for (i = 0; i < CFG80211_MAX_WEP_KEYS; i++) ck->params[i].key = ck->data[i]; } From cb3b7d87652aeb37cfb5295a6157a3280dae10cb Mon Sep 17 00:00:00 2001 From: Ayala Beker Date: Tue, 20 Sep 2016 17:31:13 +0300 Subject: [PATCH 1650/1715] cfg80211: add start / stop NAN commands This allows user space to start/stop NAN interface. A NAN interface is like P2P device in a few aspects: it doesn't have a netdev associated to it. Add the new interface type and prevent operations that can't be executed on NAN interface like scan. Define several attributes that may be configured by user space when starting NAN functionality (master preference and dual band operation) Signed-off-by: Andrei Otcheretianski Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 21 ++++++++- include/uapi/linux/nl80211.h | 47 +++++++++++++++++++ net/mac80211/cfg.c | 2 + net/mac80211/chan.c | 3 ++ net/mac80211/iface.c | 4 ++ net/mac80211/offchannel.c | 1 + net/mac80211/rx.c | 3 ++ net/mac80211/util.c | 1 + net/wireless/chan.c | 2 + net/wireless/core.c | 34 ++++++++++++++ net/wireless/core.h | 3 ++ net/wireless/mlme.c | 1 + net/wireless/nl80211.c | 91 ++++++++++++++++++++++++++++++++++-- net/wireless/rdev-ops.h | 20 ++++++++ net/wireless/trace.h | 27 +++++++++++ net/wireless/util.c | 6 ++- 16 files changed, 260 insertions(+), 6 deletions(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 68dca3d93b85..9898e1f883e2 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2313,6 +2313,19 @@ struct cfg80211_qos_map { struct cfg80211_dscp_range up[8]; }; +/** + * struct cfg80211_nan_conf - NAN configuration + * + * This struct defines NAN configuration parameters + * + * @master_pref: master preference (1 - 255) + * @dual: dual band operation mode, see &enum nl80211_nan_dual_band_conf + */ +struct cfg80211_nan_conf { + u8 master_pref; + u8 dual; +}; + /** * struct cfg80211_ops - backend description for wireless configuration * @@ -2601,6 +2614,8 @@ struct cfg80211_qos_map { * and returning to the base channel for communication with the AP. * @tdls_cancel_channel_switch: Stop channel-switching with a TDLS peer. Both * peers must be on the base channel when the call completes. + * @start_nan: Start the NAN interface. + * @stop_nan: Stop the NAN interface. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -2866,6 +2881,9 @@ struct cfg80211_ops { void (*tdls_cancel_channel_switch)(struct wiphy *wiphy, struct net_device *dev, const u8 *addr); + int (*start_nan)(struct wiphy *wiphy, struct wireless_dev *wdev, + struct cfg80211_nan_conf *conf); + void (*stop_nan)(struct wiphy *wiphy, struct wireless_dev *wdev); }; /* @@ -3626,6 +3644,7 @@ struct cfg80211_cached_keys; * beacons, 0 when not valid * @address: The address for this device, valid only if @netdev is %NULL * @p2p_started: true if this is a P2P Device that has been started + * @nan_started: true if this is a NAN interface that has been started * @cac_started: true if DFS channel availability check has been started * @cac_start_time: timestamp (jiffies) when the dfs state was entered. * @cac_time_ms: CAC time in ms @@ -3657,7 +3676,7 @@ struct wireless_dev { struct mutex mtx; - bool use_4addr, p2p_started; + bool use_4addr, p2p_started, nan_started; u8 address[ETH_ALEN] __aligned(sizeof(u16)); diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index ec10d1b2838f..98fd3ec8598d 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -838,6 +838,16 @@ * not running. The driver indicates the status of the scan through * cfg80211_scan_done(). * + * @NL80211_CMD_START_NAN: Start NAN operation, identified by its + * %NL80211_ATTR_WDEV interface. This interface must have been previously + * created with %NL80211_CMD_NEW_INTERFACE. After it has been started, the + * NAN interface will create or join a cluster. This command must have a + * valid %NL80211_ATTR_NAN_MASTER_PREF attribute and optional + * %NL80211_ATTR_NAN_DUAL attributes. + * After this command NAN functions can be added. + * @NL80211_CMD_STOP_NAN: Stop the NAN operation, identified by + * its %NL80211_ATTR_WDEV interface. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1026,6 +1036,9 @@ enum nl80211_commands { NL80211_CMD_ABORT_SCAN, + NL80211_CMD_START_NAN, + NL80211_CMD_STOP_NAN, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -1739,6 +1752,12 @@ enum nl80211_commands { * regulatory indoor configuration would be owned by the netlink socket * that configured the indoor setting, and the indoor operation would be * cleared when the socket is closed. + * If set during NAN interface creation, the interface will be destroyed + * if the socket is closed just like any other interface. Moreover, only + * the netlink socket that created the interface will be allowed to add + * and remove functions. NAN notifications will be sent in unicast to that + * socket. Without this attribute, any socket can add functions and the + * notifications will be sent to the %NL80211_MCGRP_NAN multicast group. * * @NL80211_ATTR_TDLS_INITIATOR: flag attribute indicating the current end is * the TDLS link initiator. @@ -1873,6 +1892,14 @@ enum nl80211_commands { * @NL80211_ATTR_MESH_PEER_AID: Association ID for the mesh peer (u16). This is * used to pull the stored data for mesh peer in power save state. * + * @NL80211_ATTR_NAN_MASTER_PREF: the master preference to be used by + * %NL80211_CMD_START_NAN. Its type is u8 and it can't be 0. + * Also, values 1 and 255 are reserved for certification purposes and + * should not be used during a normal device operation. + * @NL80211_ATTR_NAN_DUAL: NAN dual band operation config (see + * &enum nl80211_nan_dual_band_conf). This attribute is used with + * %NL80211_CMD_START_NAN. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -2267,6 +2294,9 @@ enum nl80211_attrs { NL80211_ATTR_MESH_PEER_AID, + NL80211_ATTR_NAN_MASTER_PREF, + NL80211_ATTR_NAN_DUAL, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -2345,6 +2375,7 @@ enum nl80211_attrs { * commands to create and destroy one * @NL80211_IF_TYPE_OCB: Outside Context of a BSS * This mode corresponds to the MIB variable dot11OCBActivated=true + * @NL80211_IFTYPE_NAN: NAN device interface type (not a netdev) * @NL80211_IFTYPE_MAX: highest interface type number currently defined * @NUM_NL80211_IFTYPES: number of defined interface types * @@ -2365,6 +2396,7 @@ enum nl80211_iftype { NL80211_IFTYPE_P2P_GO, NL80211_IFTYPE_P2P_DEVICE, NL80211_IFTYPE_OCB, + NL80211_IFTYPE_NAN, /* keep last */ NUM_NL80211_IFTYPES, @@ -4870,4 +4902,19 @@ enum nl80211_bss_select_attr { NL80211_BSS_SELECT_ATTR_MAX = __NL80211_BSS_SELECT_ATTR_AFTER_LAST - 1 }; +/** + * enum nl80211_nan_dual_band_conf - NAN dual band configuration + * + * Defines the NAN dual band mode of operation + * + * @NL80211_NAN_BAND_DEFAULT: device default mode + * @NL80211_NAN_BAND_2GHZ: 2.4GHz mode + * @NL80211_NAN_BAND_5GHZ: 5GHz mode + */ +enum nl80211_nan_dual_band_conf { + NL80211_NAN_BAND_DEFAULT = 1 << 0, + NL80211_NAN_BAND_2GHZ = 1 << 1, + NL80211_NAN_BAND_5GHZ = 1 << 2, +}; + #endif /* __LINUX_NL80211_H */ diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index e29ff5749944..a74027f887bc 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -257,6 +257,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_NAN: case NL80211_IFTYPE_UNSPECIFIED: case NUM_NL80211_IFTYPES: case NL80211_IFTYPE_P2P_CLIENT: @@ -2036,6 +2037,7 @@ static int ieee80211_scan(struct wiphy *wiphy, !(req->flags & NL80211_SCAN_FLAG_AP))) return -EOPNOTSUPP; break; + case NL80211_IFTYPE_NAN: default: return -EOPNOTSUPP; } diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 74142d07ad31..d035801569eb 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -274,6 +274,7 @@ ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local, ieee80211_get_max_required_bw(sdata)); break; case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_NAN: continue; case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_WDS: @@ -718,6 +719,7 @@ void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local, switch (sdata->vif.type) { case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_NAN: continue; case NL80211_IFTYPE_STATION: if (!sdata->u.mgd.associated) @@ -980,6 +982,7 @@ ieee80211_vif_chanctx_reservation_complete(struct ieee80211_sub_if_data *sdata) case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_NAN: case NUM_NL80211_IFTYPES: WARN_ON(1); break; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index b0abddc714ef..e694ca2baad0 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -545,6 +545,7 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_P2P_DEVICE: case NL80211_IFTYPE_OCB: + case NL80211_IFTYPE_NAN: /* no special treatment */ break; case NL80211_IFTYPE_UNSPECIFIED: @@ -660,6 +661,7 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) break; case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_NAN: break; default: /* not reached */ @@ -948,6 +950,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, /* relies on synchronize_rcu() below */ RCU_INIT_POINTER(local->p2p_sdata, NULL); /* fall through */ + case NL80211_IFTYPE_NAN: default: cancel_work_sync(&sdata->work); /* @@ -1457,6 +1460,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, break; case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_NAN: sdata->vif.bss_conf.bssid = sdata->vif.addr; break; case NL80211_IFTYPE_UNSPECIFIED: diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 55a9c5b94ce1..75d5c960ce67 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -838,6 +838,7 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, case NL80211_IFTYPE_P2P_DEVICE: need_offchan = true; break; + case NL80211_IFTYPE_NAN: default: return -EOPNOTSUPP; } diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index e796060b7c5e..c9489a86e6d6 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -3586,6 +3586,9 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx) ieee80211_is_probe_req(hdr->frame_control) || ieee80211_is_probe_resp(hdr->frame_control) || ieee80211_is_beacon(hdr->frame_control); + case NL80211_IFTYPE_NAN: + /* Currently no frames on NAN interface are allowed */ + return false; default: break; } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index b6865d884487..2c78541f695c 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1975,6 +1975,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_NAN: /* nothing to do */ break; case NL80211_IFTYPE_UNSPECIFIED: diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 0f506220a3bd..5497d022fada 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -372,6 +372,7 @@ int cfg80211_chandef_dfs_required(struct wiphy *wiphy, case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_NAN: break; case NL80211_IFTYPE_UNSPECIFIED: case NUM_NL80211_IFTYPES: @@ -946,6 +947,7 @@ cfg80211_get_chan_state(struct wireless_dev *wdev, case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_NAN: /* these interface types don't really have a channel */ return; case NL80211_IFTYPE_UNSPECIFIED: diff --git a/net/wireless/core.c b/net/wireless/core.c index 4911cd997b9a..013987243c0b 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -225,6 +225,23 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, } } +void cfg80211_stop_nan(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev) +{ + ASSERT_RTNL(); + + if (WARN_ON(wdev->iftype != NL80211_IFTYPE_NAN)) + return; + + if (!wdev->nan_started) + return; + + rdev_stop_nan(rdev, wdev); + wdev->nan_started = false; + + rdev->opencount--; +} + void cfg80211_shutdown_all_interfaces(struct wiphy *wiphy) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); @@ -242,6 +259,9 @@ void cfg80211_shutdown_all_interfaces(struct wiphy *wiphy) case NL80211_IFTYPE_P2P_DEVICE: cfg80211_stop_p2p_device(rdev, wdev); break; + case NL80211_IFTYPE_NAN: + cfg80211_stop_nan(rdev, wdev); + break; default: break; } @@ -537,6 +557,11 @@ static int wiphy_verify_combinations(struct wiphy *wiphy) c->limits[j].max > 1)) return -EINVAL; + /* Only a single NAN can be allowed */ + if (WARN_ON(types & BIT(NL80211_IFTYPE_NAN) && + c->limits[j].max > 1)) + return -EINVAL; + cnt += c->limits[j].max; /* * Don't advertise an unsupported type @@ -579,6 +604,10 @@ int wiphy_register(struct wiphy *wiphy) !rdev->ops->tdls_cancel_channel_switch))) return -EINVAL; + if (WARN_ON((wiphy->interface_modes & BIT(NL80211_IFTYPE_NAN)) && + (!rdev->ops->start_nan || !rdev->ops->stop_nan))) + return -EINVAL; + /* * if a wiphy has unsupported modes for regulatory channel enforcement, * opt-out of enforcement checking @@ -589,6 +618,7 @@ int wiphy_register(struct wiphy *wiphy) BIT(NL80211_IFTYPE_P2P_GO) | BIT(NL80211_IFTYPE_ADHOC) | BIT(NL80211_IFTYPE_P2P_DEVICE) | + BIT(NL80211_IFTYPE_NAN) | BIT(NL80211_IFTYPE_AP_VLAN) | BIT(NL80211_IFTYPE_MONITOR))) wiphy->regulatory_flags |= REGULATORY_IGNORE_STALE_KICKOFF; @@ -916,6 +946,9 @@ void cfg80211_unregister_wdev(struct wireless_dev *wdev) cfg80211_mlme_purge_registrations(wdev); cfg80211_stop_p2p_device(rdev, wdev); break; + case NL80211_IFTYPE_NAN: + cfg80211_stop_nan(rdev, wdev); + break; default: WARN_ON_ONCE(1); break; @@ -979,6 +1012,7 @@ void __cfg80211_leave(struct cfg80211_registered_device *rdev, /* must be handled by mac80211/driver, has no APIs */ break; case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_NAN: /* cannot happen, has no netdev */ break; case NL80211_IFTYPE_AP_VLAN: diff --git a/net/wireless/core.h b/net/wireless/core.h index 554f87d0f991..08d2e948c9ad 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -488,6 +488,9 @@ void cfg80211_leave(struct cfg80211_registered_device *rdev, void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); +void cfg80211_stop_nan(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev); + #define CFG80211_MAX_NUM_DIFFERENT_CHANNELS 10 #ifdef CONFIG_CFG80211_DEVELOPER_WARNINGS diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index d6abb0704db5..cbb48e26a871 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -634,6 +634,7 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, * fall through, P2P device only supports * public action frames */ + case NL80211_IFTYPE_NAN: default: err = -EOPNOTSUPP; break; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index b8441e60b0f6..9e9fb37087fc 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -409,6 +409,8 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { .len = VHT_MUMIMO_GROUPS_DATA_LEN }, [NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR] = { .len = ETH_ALEN }, + [NL80211_ATTR_NAN_MASTER_PREF] = { .type = NLA_U8 }, + [NL80211_ATTR_NAN_DUAL] = { .type = NLA_U8 }, }; /* policy for the key attributes */ @@ -934,6 +936,7 @@ static int nl80211_key_allowed(struct wireless_dev *wdev) case NL80211_IFTYPE_UNSPECIFIED: case NL80211_IFTYPE_OCB: case NL80211_IFTYPE_MONITOR: + case NL80211_IFTYPE_NAN: case NL80211_IFTYPE_P2P_DEVICE: case NL80211_IFTYPE_WDS: case NUM_NL80211_IFTYPES: @@ -2819,7 +2822,7 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) !(rdev->wiphy.interface_modes & (1 << type))) return -EOPNOTSUPP; - if ((type == NL80211_IFTYPE_P2P_DEVICE || + if ((type == NL80211_IFTYPE_P2P_DEVICE || type == NL80211_IFTYPE_NAN || rdev->wiphy.features & NL80211_FEATURE_MAC_ON_CREATE) && info->attrs[NL80211_ATTR_MAC]) { nla_memcpy(params.macaddr, info->attrs[NL80211_ATTR_MAC], @@ -2875,9 +2878,10 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) wdev->mesh_id_up_len); wdev_unlock(wdev); break; + case NL80211_IFTYPE_NAN: case NL80211_IFTYPE_P2P_DEVICE: /* - * P2P Device doesn't have a netdev, so doesn't go + * P2P Device and NAN do not have a netdev, so don't go * through the netdev notifier and must be added here */ mutex_init(&wdev->mtx); @@ -6434,6 +6438,9 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) wiphy = &rdev->wiphy; + if (wdev->iftype == NL80211_IFTYPE_NAN) + return -EOPNOTSUPP; + if (!rdev->ops->scan) return -EOPNOTSUPP; @@ -8977,6 +8984,7 @@ static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info) case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_P2P_DEVICE: break; + case NL80211_IFTYPE_NAN: default: return -EOPNOTSUPP; } @@ -9022,6 +9030,7 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_P2P_GO: break; + case NL80211_IFTYPE_NAN: default: return -EOPNOTSUPP; } @@ -9138,6 +9147,7 @@ static int nl80211_tx_mgmt_cancel_wait(struct sk_buff *skb, struct genl_info *in case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_P2P_DEVICE: break; + case NL80211_IFTYPE_NAN: default: return -EOPNOTSUPP; } @@ -10504,6 +10514,58 @@ static int nl80211_stop_p2p_device(struct sk_buff *skb, struct genl_info *info) return 0; } +static int nl80211_start_nan(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct wireless_dev *wdev = info->user_ptr[1]; + struct cfg80211_nan_conf conf = {}; + int err; + + if (wdev->iftype != NL80211_IFTYPE_NAN) + return -EOPNOTSUPP; + + if (wdev->nan_started) + return -EEXIST; + + if (rfkill_blocked(rdev->rfkill)) + return -ERFKILL; + + if (!info->attrs[NL80211_ATTR_NAN_MASTER_PREF]) + return -EINVAL; + + if (!info->attrs[NL80211_ATTR_NAN_DUAL]) + return -EINVAL; + + conf.master_pref = + nla_get_u8(info->attrs[NL80211_ATTR_NAN_MASTER_PREF]); + if (!conf.master_pref) + return -EINVAL; + + conf.dual = nla_get_u8(info->attrs[NL80211_ATTR_NAN_DUAL]); + + err = rdev_start_nan(rdev, wdev, &conf); + if (err) + return err; + + wdev->nan_started = true; + rdev->opencount++; + + return 0; +} + +static int nl80211_stop_nan(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct wireless_dev *wdev = info->user_ptr[1]; + + if (wdev->iftype != NL80211_IFTYPE_NAN) + return -EOPNOTSUPP; + + cfg80211_stop_nan(rdev, wdev); + + return 0; +} + static int nl80211_get_protocol_features(struct sk_buff *skb, struct genl_info *info) { @@ -11205,7 +11267,14 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, dev_hold(dev); } else if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP) { - if (!wdev->p2p_started) { + if (wdev->iftype == NL80211_IFTYPE_P2P_DEVICE && + !wdev->p2p_started) { + if (rtnl) + rtnl_unlock(); + return -ENETDOWN; + } + if (wdev->iftype == NL80211_IFTYPE_NAN && + !wdev->nan_started) { if (rtnl) rtnl_unlock(); return -ENETDOWN; @@ -11838,6 +11907,22 @@ static const struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_START_NAN, + .doit = nl80211_start_nan, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_WDEV | + NL80211_FLAG_NEED_RTNL, + }, + { + .cmd = NL80211_CMD_STOP_NAN, + .doit = nl80211_stop_nan, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_WDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, { .cmd = NL80211_CMD_SET_MCAST_RATE, .doit = nl80211_set_mcast_rate, diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 85ff30bee2b9..afb68a8428b9 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -887,6 +887,26 @@ static inline void rdev_stop_p2p_device(struct cfg80211_registered_device *rdev, trace_rdev_return_void(&rdev->wiphy); } +static inline int rdev_start_nan(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, + struct cfg80211_nan_conf *conf) +{ + int ret; + + trace_rdev_start_nan(&rdev->wiphy, wdev, conf); + ret = rdev->ops->start_nan(&rdev->wiphy, wdev, conf); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} + +static inline void rdev_stop_nan(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev) +{ + trace_rdev_stop_nan(&rdev->wiphy, wdev); + rdev->ops->stop_nan(&rdev->wiphy, wdev); + trace_rdev_return_void(&rdev->wiphy); +} + static inline int rdev_set_mac_acl(struct cfg80211_registered_device *rdev, struct net_device *dev, struct cfg80211_acl_data *params) diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 72b5255cefe2..5f3370f4c6a2 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1889,6 +1889,33 @@ DEFINE_EVENT(wiphy_wdev_evt, rdev_stop_p2p_device, TP_ARGS(wiphy, wdev) ); +TRACE_EVENT(rdev_start_nan, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, + struct cfg80211_nan_conf *conf), + TP_ARGS(wiphy, wdev, conf), + TP_STRUCT__entry( + WIPHY_ENTRY + WDEV_ENTRY + __field(u8, master_pref) + __field(u8, dual); + ), + TP_fast_assign( + WIPHY_ASSIGN; + WDEV_ASSIGN; + __entry->master_pref = conf->master_pref; + __entry->dual = conf->dual; + ), + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT + ", master preference: %u, dual: %d", + WIPHY_PR_ARG, WDEV_PR_ARG, __entry->master_pref, + __entry->dual) +); + +DEFINE_EVENT(wiphy_wdev_evt, rdev_stop_nan, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev), + TP_ARGS(wiphy, wdev) +); + TRACE_EVENT(rdev_set_mac_acl, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct cfg80211_acl_data *params), diff --git a/net/wireless/util.c b/net/wireless/util.c index e02141d66b69..7a2d46b0058a 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1008,8 +1008,9 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, if (otype == NL80211_IFTYPE_AP_VLAN) return -EOPNOTSUPP; - /* cannot change into P2P device type */ - if (ntype == NL80211_IFTYPE_P2P_DEVICE) + /* cannot change into P2P device or NAN */ + if (ntype == NL80211_IFTYPE_P2P_DEVICE || + ntype == NL80211_IFTYPE_NAN) return -EOPNOTSUPP; if (!rdev->ops->change_virtual_intf || @@ -1088,6 +1089,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, /* not happening */ break; case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_NAN: WARN_ON(1); break; } From 708d50edb149fe488c7c96f59ba9a89a64985cf2 Mon Sep 17 00:00:00 2001 From: Ayala Beker Date: Tue, 20 Sep 2016 17:31:14 +0300 Subject: [PATCH 1651/1715] mac80211: add boilerplate code for start / stop NAN This code doesn't do much besides allowing to start and stop the vif. Signed-off-by: Andrei Otcheretianski Signed-off-by: Emmanuel Grumbach Signed-off-by: Ayala Beker Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/net/mac80211.h | 9 +++++++ net/mac80211/cfg.c | 36 ++++++++++++++++++++++++++++ net/mac80211/chan.c | 3 +++ net/mac80211/driver-ops.h | 27 +++++++++++++++++++++ net/mac80211/iface.c | 8 +++++-- net/mac80211/main.c | 5 ++++ net/mac80211/offchannel.c | 3 ++- net/mac80211/trace.h | 50 +++++++++++++++++++++++++++++++++++++++ net/mac80211/util.c | 3 ++- 9 files changed, 140 insertions(+), 4 deletions(-) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 5296100f3889..df9b5cff300c 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3420,6 +3420,9 @@ enum ieee80211_reconfig_type { * synchronization which is needed in case driver has in its RSS queues * pending frames that were received prior to the control path action * currently taken (e.g. disassociation) but are not processed yet. + * + * @start_nan: join an existing NAN cluster, or create a new one. + * @stop_nan: leave the NAN cluster. */ struct ieee80211_ops { void (*tx)(struct ieee80211_hw *hw, @@ -3655,6 +3658,12 @@ struct ieee80211_ops { void (*wake_tx_queue)(struct ieee80211_hw *hw, struct ieee80211_txq *txq); void (*sync_rx_queues)(struct ieee80211_hw *hw); + + int (*start_nan)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct cfg80211_nan_conf *conf); + int (*stop_nan)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif); }; /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index a74027f887bc..9aabb0932d24 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3,6 +3,7 @@ * * Copyright 2006-2010 Johannes Berg * Copyright 2013-2015 Intel Mobile Communications GmbH + * Copyright (C) 2015-2016 Intel Deutschland GmbH * * This file is GPLv2 as found in COPYING. */ @@ -152,6 +153,39 @@ static void ieee80211_stop_p2p_device(struct wiphy *wiphy, ieee80211_sdata_stop(IEEE80211_WDEV_TO_SUB_IF(wdev)); } +static int ieee80211_start_nan(struct wiphy *wiphy, + struct wireless_dev *wdev, + struct cfg80211_nan_conf *conf) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + int ret; + + mutex_lock(&sdata->local->chanctx_mtx); + ret = ieee80211_check_combinations(sdata, NULL, 0, 0); + mutex_unlock(&sdata->local->chanctx_mtx); + if (ret < 0) + return ret; + + ret = ieee80211_do_open(wdev, true); + if (ret) + return ret; + + ret = drv_start_nan(sdata->local, sdata, conf); + if (ret) + ieee80211_sdata_stop(sdata); + + return ret; +} + +static void ieee80211_stop_nan(struct wiphy *wiphy, + struct wireless_dev *wdev) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + + drv_stop_nan(sdata->local, sdata); + ieee80211_sdata_stop(sdata); +} + static int ieee80211_set_noack_map(struct wiphy *wiphy, struct net_device *dev, u16 noack_map) @@ -3464,4 +3498,6 @@ const struct cfg80211_ops mac80211_config_ops = { .set_ap_chanwidth = ieee80211_set_ap_chanwidth, .add_tx_ts = ieee80211_add_tx_ts, .del_tx_ts = ieee80211_del_tx_ts, + .start_nan = ieee80211_start_nan, + .stop_nan = ieee80211_stop_nan, }; diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index d035801569eb..e75cbf6ecc26 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -647,6 +647,9 @@ static int ieee80211_assign_vif_chanctx(struct ieee80211_sub_if_data *sdata, struct ieee80211_chanctx *curr_ctx = NULL; int ret = 0; + if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_NAN)) + return -ENOTSUPP; + conf = rcu_dereference_protected(sdata->vif.chanctx_conf, lockdep_is_held(&local->chanctx_mtx)); diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index fe35a1c0dc86..e52cfb855bd9 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -162,6 +162,7 @@ static inline void drv_bss_info_changed(struct ieee80211_local *local, return; if (WARN_ON_ONCE(sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE || + sdata->vif.type == NL80211_IFTYPE_NAN || (sdata->vif.type == NL80211_IFTYPE_MONITOR && !sdata->vif.mu_mimo_owner))) return; @@ -1165,4 +1166,30 @@ static inline void drv_wake_tx_queue(struct ieee80211_local *local, local->ops->wake_tx_queue(&local->hw, &txq->txq); } +static inline int drv_start_nan(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct cfg80211_nan_conf *conf) +{ + int ret; + + might_sleep(); + check_sdata_in_driver(sdata); + + trace_drv_start_nan(local, sdata, conf); + ret = local->ops->start_nan(&local->hw, &sdata->vif, conf); + trace_drv_return_int(local, ret); + return ret; +} + +static inline void drv_stop_nan(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata) +{ + might_sleep(); + check_sdata_in_driver(sdata); + + trace_drv_stop_nan(local, sdata); + local->ops->stop_nan(&local->hw, &sdata->vif); + trace_drv_return_void(local); +} + #endif /* __MAC80211_DRIVER_OPS */ diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index e694ca2baad0..507f46a8eb1c 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -327,6 +327,9 @@ static int ieee80211_check_queues(struct ieee80211_sub_if_data *sdata, int n_queues = sdata->local->hw.queues; int i; + if (iftype == NL80211_IFTYPE_NAN) + return 0; + if (iftype != NL80211_IFTYPE_P2P_DEVICE) { for (i = 0; i < IEEE80211_NUM_ACS; i++) { if (WARN_ON_ONCE(sdata->vif.hw_queue[i] == @@ -647,7 +650,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) local->fif_probe_req++; } - if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE) + if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE && + sdata->vif.type != NL80211_IFTYPE_NAN) changed |= ieee80211_reset_erp_info(sdata); ieee80211_bss_info_change_notify(sdata, changed); @@ -1726,7 +1730,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, ASSERT_RTNL(); - if (type == NL80211_IFTYPE_P2P_DEVICE) { + if (type == NL80211_IFTYPE_P2P_DEVICE || type == NL80211_IFTYPE_NAN) { struct wireless_dev *wdev; sdata = kzalloc(sizeof(*sdata) + local->hw.vif_data_size, diff --git a/net/mac80211/main.c b/net/mac80211/main.c index ac053a9df36d..b5cf2c5cc166 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -821,6 +821,11 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) !local->ops->tdls_recv_channel_switch)) return -EOPNOTSUPP; + if (WARN_ON(local->hw.wiphy->interface_modes & + BIT(NL80211_IFTYPE_NAN) && + (!local->ops->start_nan || !local->ops->stop_nan))) + return -EINVAL; + #ifdef CONFIG_PM if (hw->wiphy->wowlan && (!local->ops->suspend || !local->ops->resume)) return -EINVAL; diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 75d5c960ce67..c3f610bba3fe 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -128,7 +128,8 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local) if (!ieee80211_sdata_running(sdata)) continue; - if (sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE) + if (sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE || + sdata->vif.type == NL80211_IFTYPE_NAN) continue; if (sdata->vif.type != NL80211_IFTYPE_MONITOR) diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 77e4c53baefb..deefbfb9f6fb 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -1700,6 +1700,56 @@ TRACE_EVENT(drv_get_expected_throughput, ) ); +TRACE_EVENT(drv_start_nan, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct cfg80211_nan_conf *conf), + + TP_ARGS(local, sdata, conf), + TP_STRUCT__entry( + LOCAL_ENTRY + VIF_ENTRY + __field(u8, master_pref) + __field(u8, dual) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + VIF_ASSIGN; + __entry->master_pref = conf->master_pref; + __entry->dual = conf->dual; + ), + + TP_printk( + LOCAL_PR_FMT VIF_PR_FMT + ", master preference: %u, dual: %d", + LOCAL_PR_ARG, VIF_PR_ARG, __entry->master_pref, + __entry->dual + ) +); + +TRACE_EVENT(drv_stop_nan, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata), + + TP_ARGS(local, sdata), + + TP_STRUCT__entry( + LOCAL_ENTRY + VIF_ENTRY + ), + + TP_fast_assign( + LOCAL_ASSIGN; + VIF_ASSIGN; + ), + + TP_printk( + LOCAL_PR_FMT VIF_PR_FMT, + LOCAL_PR_ARG, VIF_PR_ARG + ) +); + /* * Tracing for API calls that drivers call. */ diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 2c78541f695c..5b57fcaaec9b 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1209,7 +1209,8 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata, } if (sdata->vif.type != NL80211_IFTYPE_MONITOR && - sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE) { + sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE && + sdata->vif.type != NL80211_IFTYPE_NAN) { sdata->vif.bss_conf.qos = enable_qos; if (bss_notify) ieee80211_bss_info_change_notify(sdata, From a442b761b24b6886f9a4e2ff5f8cb4824c96526b Mon Sep 17 00:00:00 2001 From: Ayala Beker Date: Tue, 20 Sep 2016 17:31:15 +0300 Subject: [PATCH 1652/1715] cfg80211: add add_nan_func / del_nan_func A NAN function can be either publish, subscribe or follow up. Make all the necessary verifications and just pass the request to the driver. Allow the user space application that starts NAN to forbid any other socket to add or remove functions. Signed-off-by: Andrei Otcheretianski Signed-off-by: Emmanuel Grumbach Signed-off-by: Ayala Beker Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 91 +++++++++ include/uapi/linux/nl80211.h | 150 ++++++++++++++ net/wireless/core.c | 3 +- net/wireless/nl80211.c | 369 +++++++++++++++++++++++++++++++++++ net/wireless/rdev-ops.h | 21 ++ net/wireless/trace.h | 39 ++++ net/wireless/util.c | 22 +++ 7 files changed, 694 insertions(+), 1 deletion(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 9898e1f883e2..2f35ccf6da83 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2326,6 +2326,73 @@ struct cfg80211_nan_conf { u8 dual; }; +/** + * struct cfg80211_nan_func_filter - a NAN function Rx / Tx filter + * + * @filter: the content of the filter + * @len: the length of the filter + */ +struct cfg80211_nan_func_filter { + const u8 *filter; + u8 len; +}; + +/** + * struct cfg80211_nan_func - a NAN function + * + * @type: &enum nl80211_nan_function_type + * @service_id: the service ID of the function + * @publish_type: &nl80211_nan_publish_type + * @close_range: if true, the range should be limited. Threshold is + * implementation specific. + * @publish_bcast: if true, the solicited publish should be broadcasted + * @subscribe_active: if true, the subscribe is active + * @followup_id: the instance ID for follow up + * @followup_reqid: the requestor instance ID for follow up + * @followup_dest: MAC address of the recipient of the follow up + * @ttl: time to live counter in DW. + * @serv_spec_info: Service Specific Info + * @serv_spec_info_len: Service Specific Info length + * @srf_include: if true, SRF is inclusive + * @srf_bf: Bloom Filter + * @srf_bf_len: Bloom Filter length + * @srf_bf_idx: Bloom Filter index + * @srf_macs: SRF MAC addresses + * @srf_num_macs: number of MAC addresses in SRF + * @rx_filters: rx filters that are matched with corresponding peer's tx_filter + * @tx_filters: filters that should be transmitted in the SDF. + * @num_rx_filters: length of &rx_filters. + * @num_tx_filters: length of &tx_filters. + * @instance_id: driver allocated id of the function. + * @cookie: unique NAN function identifier. + */ +struct cfg80211_nan_func { + enum nl80211_nan_function_type type; + u8 service_id[NL80211_NAN_FUNC_SERVICE_ID_LEN]; + u8 publish_type; + bool close_range; + bool publish_bcast; + bool subscribe_active; + u8 followup_id; + u8 followup_reqid; + struct mac_address followup_dest; + u32 ttl; + const u8 *serv_spec_info; + u8 serv_spec_info_len; + bool srf_include; + const u8 *srf_bf; + u8 srf_bf_len; + u8 srf_bf_idx; + struct mac_address *srf_macs; + int srf_num_macs; + struct cfg80211_nan_func_filter *rx_filters; + struct cfg80211_nan_func_filter *tx_filters; + u8 num_tx_filters; + u8 num_rx_filters; + u8 instance_id; + u64 cookie; +}; + /** * struct cfg80211_ops - backend description for wireless configuration * @@ -2616,6 +2683,14 @@ struct cfg80211_nan_conf { * peers must be on the base channel when the call completes. * @start_nan: Start the NAN interface. * @stop_nan: Stop the NAN interface. + * @add_nan_func: Add a NAN function. Returns negative value on failure. + * On success @nan_func ownership is transferred to the driver and + * it may access it outside of the scope of this function. The driver + * should free the @nan_func when no longer needed by calling + * cfg80211_free_nan_func(). + * On success the driver should assign an instance_id in the + * provided @nan_func. + * @del_nan_func: Delete a NAN function. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -2884,6 +2959,10 @@ struct cfg80211_ops { int (*start_nan)(struct wiphy *wiphy, struct wireless_dev *wdev, struct cfg80211_nan_conf *conf); void (*stop_nan)(struct wiphy *wiphy, struct wireless_dev *wdev); + int (*add_nan_func)(struct wiphy *wiphy, struct wireless_dev *wdev, + struct cfg80211_nan_func *nan_func); + void (*del_nan_func)(struct wiphy *wiphy, struct wireless_dev *wdev, + u64 cookie); }; /* @@ -3335,6 +3414,8 @@ struct wiphy_iftype_ext_capab { * @bss_select_support: bitmask indicating the BSS selection criteria supported * by the driver in the .connect() callback. The bit position maps to the * attribute indices defined in &enum nl80211_bss_select_attr. + * + * @cookie_counter: unique generic cookie counter, used to identify objects. */ struct wiphy { /* assign these fields before you register the wiphy */ @@ -3464,6 +3545,8 @@ struct wiphy { u32 bss_select_support; + u64 cookie_counter; + char priv[0] __aligned(NETDEV_ALIGN); }; @@ -5584,6 +5667,14 @@ wiphy_ext_feature_isset(struct wiphy *wiphy, return (ft_byte & BIT(ftidx % 8)) != 0; } +/** + * cfg80211_free_nan_func - free NAN function + * @f: NAN function that should be freed + * + * Frees all the NAN function and all it's allocated members. + */ +void cfg80211_free_nan_func(struct cfg80211_nan_func *f); + /* ethtool helper */ void cfg80211_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info); diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 98fd3ec8598d..e4935d963061 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -847,6 +847,21 @@ * After this command NAN functions can be added. * @NL80211_CMD_STOP_NAN: Stop the NAN operation, identified by * its %NL80211_ATTR_WDEV interface. + * @NL80211_CMD_ADD_NAN_FUNCTION: Add a NAN function. The function is defined + * with %NL80211_ATTR_NAN_FUNC nested attribute. When called, this + * operation returns the strictly positive and unique instance id + * (%NL80211_ATTR_NAN_FUNC_INST_ID) and a cookie (%NL80211_ATTR_COOKIE) + * of the function upon success. + * Since instance ID's can be re-used, this cookie is the right + * way to identify the function. This will avoid races when a termination + * event is handled by the user space after it has already added a new + * function that got the same instance id from the kernel as the one + * which just terminated. + * This cookie may be used in NAN events even before the command + * returns, so userspace shouldn't process NAN events until it processes + * the response to this command. + * Look at %NL80211_ATTR_SOCKET_OWNER as well. + * @NL80211_CMD_DEL_NAN_FUNCTION: Delete a NAN function by cookie. * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use @@ -1038,6 +1053,8 @@ enum nl80211_commands { NL80211_CMD_START_NAN, NL80211_CMD_STOP_NAN, + NL80211_CMD_ADD_NAN_FUNCTION, + NL80211_CMD_DEL_NAN_FUNCTION, /* add new commands above here */ @@ -1899,6 +1916,9 @@ enum nl80211_commands { * @NL80211_ATTR_NAN_DUAL: NAN dual band operation config (see * &enum nl80211_nan_dual_band_conf). This attribute is used with * %NL80211_CMD_START_NAN. + * @NL80211_ATTR_NAN_FUNC: a function that can be added to NAN. See + * &enum nl80211_nan_func_attributes for description of this nested + * attribute. * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined @@ -2296,6 +2316,7 @@ enum nl80211_attrs { NL80211_ATTR_NAN_MASTER_PREF, NL80211_ATTR_NAN_DUAL, + NL80211_ATTR_NAN_FUNC, /* add attributes here, update the policy in nl80211.c */ @@ -4917,4 +4938,133 @@ enum nl80211_nan_dual_band_conf { NL80211_NAN_BAND_5GHZ = 1 << 2, }; +/** + * enum nl80211_nan_function_type - NAN function type + * + * Defines the function type of a NAN function + * + * @NL80211_NAN_FUNC_PUBLISH: function is publish + * @NL80211_NAN_FUNC_SUBSCRIBE: function is subscribe + * @NL80211_NAN_FUNC_FOLLOW_UP: function is follow-up + */ +enum nl80211_nan_function_type { + NL80211_NAN_FUNC_PUBLISH, + NL80211_NAN_FUNC_SUBSCRIBE, + NL80211_NAN_FUNC_FOLLOW_UP, + + /* keep last */ + __NL80211_NAN_FUNC_TYPE_AFTER_LAST, + NL80211_NAN_FUNC_MAX_TYPE = __NL80211_NAN_FUNC_TYPE_AFTER_LAST - 1, +}; + +/** + * enum nl80211_nan_publish_type - NAN publish tx type + * + * Defines how to send publish Service Discovery Frames + * + * @NL80211_NAN_SOLICITED_PUBLISH: publish function is solicited + * @NL80211_NAN_UNSOLICITED_PUBLISH: publish function is unsolicited + */ +enum nl80211_nan_publish_type { + NL80211_NAN_SOLICITED_PUBLISH = 1 << 0, + NL80211_NAN_UNSOLICITED_PUBLISH = 1 << 1, +}; + +#define NL80211_NAN_FUNC_SERVICE_ID_LEN 6 +#define NL80211_NAN_FUNC_SERVICE_SPEC_INFO_MAX_LEN 0xff +#define NL80211_NAN_FUNC_SRF_MAX_LEN 0xff + +/** + * enum nl80211_nan_func_attributes - NAN function attributes + * @__NL80211_NAN_FUNC_INVALID: invalid + * @NL80211_NAN_FUNC_TYPE: &enum nl80211_nan_function_type (u8). + * @NL80211_NAN_FUNC_SERVICE_ID: 6 bytes of the service ID hash as + * specified in NAN spec. This is a binary attribute. + * @NL80211_NAN_FUNC_PUBLISH_TYPE: relevant if the function's type is + * publish. Defines the transmission type for the publish Service Discovery + * Frame, see &enum nl80211_nan_publish_type. Its type is u8. + * @NL80211_NAN_FUNC_PUBLISH_BCAST: relevant if the function is a solicited + * publish. Should the solicited publish Service Discovery Frame be sent to + * the NAN Broadcast address. This is a flag. + * @NL80211_NAN_FUNC_SUBSCRIBE_ACTIVE: relevant if the function's type is + * subscribe. Is the subscribe active. This is a flag. + * @NL80211_NAN_FUNC_FOLLOW_UP_ID: relevant if the function's type is follow up. + * The instance ID for the follow up Service Discovery Frame. This is u8. + * @NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID: relevant if the function's type + * is follow up. This is a u8. + * The requestor instance ID for the follow up Service Discovery Frame. + * @NL80211_NAN_FUNC_FOLLOW_UP_DEST: the MAC address of the recipient of the + * follow up Service Discovery Frame. This is a binary attribute. + * @NL80211_NAN_FUNC_CLOSE_RANGE: is this function limited for devices in a + * close range. The range itself (RSSI) is defined by the device. + * This is a flag. + * @NL80211_NAN_FUNC_TTL: strictly positive number of DWs this function should + * stay active. If not present infinite TTL is assumed. This is a u32. + * @NL80211_NAN_FUNC_SERVICE_INFO: array of bytes describing the service + * specific info. This is a binary attribute. + * @NL80211_NAN_FUNC_SRF: Service Receive Filter. This is a nested attribute. + * See &enum nl80211_nan_srf_attributes. + * @NL80211_NAN_FUNC_RX_MATCH_FILTER: Receive Matching filter. This is a nested + * attribute. It is a list of binary values. + * @NL80211_NAN_FUNC_TX_MATCH_FILTER: Transmit Matching filter. This is a + * nested attribute. It is a list of binary values. + * @NL80211_NAN_FUNC_INSTANCE_ID: The instance ID of the function. + * Its type is u8 and it cannot be 0. + * @NL80211_NAN_FUNC_TERM_REASON: NAN function termination reason. + * See &enum nl80211_nan_func_term_reason. + * + * @NUM_NL80211_NAN_FUNC_ATTR: internal + * @NL80211_NAN_FUNC_ATTR_MAX: highest NAN function attribute + */ +enum nl80211_nan_func_attributes { + __NL80211_NAN_FUNC_INVALID, + NL80211_NAN_FUNC_TYPE, + NL80211_NAN_FUNC_SERVICE_ID, + NL80211_NAN_FUNC_PUBLISH_TYPE, + NL80211_NAN_FUNC_PUBLISH_BCAST, + NL80211_NAN_FUNC_SUBSCRIBE_ACTIVE, + NL80211_NAN_FUNC_FOLLOW_UP_ID, + NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID, + NL80211_NAN_FUNC_FOLLOW_UP_DEST, + NL80211_NAN_FUNC_CLOSE_RANGE, + NL80211_NAN_FUNC_TTL, + NL80211_NAN_FUNC_SERVICE_INFO, + NL80211_NAN_FUNC_SRF, + NL80211_NAN_FUNC_RX_MATCH_FILTER, + NL80211_NAN_FUNC_TX_MATCH_FILTER, + NL80211_NAN_FUNC_INSTANCE_ID, + NL80211_NAN_FUNC_TERM_REASON, + + /* keep last */ + NUM_NL80211_NAN_FUNC_ATTR, + NL80211_NAN_FUNC_ATTR_MAX = NUM_NL80211_NAN_FUNC_ATTR - 1 +}; + +/** + * enum nl80211_nan_srf_attributes - NAN Service Response filter attributes + * @__NL80211_NAN_SRF_INVALID: invalid + * @NL80211_NAN_SRF_INCLUDE: present if the include bit of the SRF set. + * This is a flag. + * @NL80211_NAN_SRF_BF: Bloom Filter. Present if and only if + * &NL80211_NAN_SRF_MAC_ADDRS isn't present. This attribute is binary. + * @NL80211_NAN_SRF_BF_IDX: index of the Bloom Filter. Mandatory if + * &NL80211_NAN_SRF_BF is present. This is a u8. + * @NL80211_NAN_SRF_MAC_ADDRS: list of MAC addresses for the SRF. Present if + * and only if &NL80211_NAN_SRF_BF isn't present. This is a nested + * attribute. Each nested attribute is a MAC address. + * @NUM_NL80211_NAN_SRF_ATTR: internal + * @NL80211_NAN_SRF_ATTR_MAX: highest NAN SRF attribute + */ +enum nl80211_nan_srf_attributes { + __NL80211_NAN_SRF_INVALID, + NL80211_NAN_SRF_INCLUDE, + NL80211_NAN_SRF_BF, + NL80211_NAN_SRF_BF_IDX, + NL80211_NAN_SRF_MAC_ADDRS, + + /* keep last */ + NUM_NL80211_NAN_SRF_ATTR, + NL80211_NAN_SRF_ATTR_MAX = NUM_NL80211_NAN_SRF_ATTR - 1, +}; + #endif /* __LINUX_NL80211_H */ diff --git a/net/wireless/core.c b/net/wireless/core.c index 013987243c0b..8201e6d7449e 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -605,7 +605,8 @@ int wiphy_register(struct wiphy *wiphy) return -EINVAL; if (WARN_ON((wiphy->interface_modes & BIT(NL80211_IFTYPE_NAN)) && - (!rdev->ops->start_nan || !rdev->ops->stop_nan))) + (!rdev->ops->start_nan || !rdev->ops->stop_nan || + !rdev->ops->add_nan_func || !rdev->ops->del_nan_func))) return -EINVAL; /* diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 9e9fb37087fc..0eca59ccd685 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -411,6 +411,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR] = { .len = ETH_ALEN }, [NL80211_ATTR_NAN_MASTER_PREF] = { .type = NLA_U8 }, [NL80211_ATTR_NAN_DUAL] = { .type = NLA_U8 }, + [NL80211_ATTR_NAN_FUNC] = { .type = NLA_NESTED }, }; /* policy for the key attributes */ @@ -504,6 +505,39 @@ nl80211_bss_select_policy[NL80211_BSS_SELECT_ATTR_MAX + 1] = { }, }; +/* policy for NAN function attributes */ +static const struct nla_policy +nl80211_nan_func_policy[NL80211_NAN_FUNC_ATTR_MAX + 1] = { + [NL80211_NAN_FUNC_TYPE] = { .type = NLA_U8 }, + [NL80211_NAN_FUNC_SERVICE_ID] = { .type = NLA_BINARY, + .len = NL80211_NAN_FUNC_SERVICE_ID_LEN }, + [NL80211_NAN_FUNC_PUBLISH_TYPE] = { .type = NLA_U8 }, + [NL80211_NAN_FUNC_PUBLISH_BCAST] = { .type = NLA_FLAG }, + [NL80211_NAN_FUNC_SUBSCRIBE_ACTIVE] = { .type = NLA_FLAG }, + [NL80211_NAN_FUNC_FOLLOW_UP_ID] = { .type = NLA_U8 }, + [NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID] = { .type = NLA_U8 }, + [NL80211_NAN_FUNC_FOLLOW_UP_DEST] = { .len = ETH_ALEN }, + [NL80211_NAN_FUNC_CLOSE_RANGE] = { .type = NLA_FLAG }, + [NL80211_NAN_FUNC_TTL] = { .type = NLA_U32 }, + [NL80211_NAN_FUNC_SERVICE_INFO] = { .type = NLA_BINARY, + .len = NL80211_NAN_FUNC_SERVICE_SPEC_INFO_MAX_LEN }, + [NL80211_NAN_FUNC_SRF] = { .type = NLA_NESTED }, + [NL80211_NAN_FUNC_RX_MATCH_FILTER] = { .type = NLA_NESTED }, + [NL80211_NAN_FUNC_TX_MATCH_FILTER] = { .type = NLA_NESTED }, + [NL80211_NAN_FUNC_INSTANCE_ID] = { .type = NLA_U8 }, + [NL80211_NAN_FUNC_TERM_REASON] = { .type = NLA_U8 }, +}; + +/* policy for Service Response Filter attributes */ +static const struct nla_policy +nl80211_nan_srf_policy[NL80211_NAN_SRF_ATTR_MAX + 1] = { + [NL80211_NAN_SRF_INCLUDE] = { .type = NLA_FLAG }, + [NL80211_NAN_SRF_BF] = { .type = NLA_BINARY, + .len = NL80211_NAN_FUNC_SRF_MAX_LEN }, + [NL80211_NAN_SRF_BF_IDX] = { .type = NLA_U8 }, + [NL80211_NAN_SRF_MAC_ADDRS] = { .type = NLA_NESTED }, +}; + static int nl80211_prepare_wdev_dump(struct sk_buff *skb, struct netlink_callback *cb, struct cfg80211_registered_device **rdev, @@ -10566,6 +10600,325 @@ static int nl80211_stop_nan(struct sk_buff *skb, struct genl_info *info) return 0; } +static int validate_nan_filter(struct nlattr *filter_attr) +{ + struct nlattr *attr; + int len = 0, n_entries = 0, rem; + + nla_for_each_nested(attr, filter_attr, rem) { + len += nla_len(attr); + n_entries++; + } + + if (len >= U8_MAX) + return -EINVAL; + + return n_entries; +} + +static int handle_nan_filter(struct nlattr *attr_filter, + struct cfg80211_nan_func *func, + bool tx) +{ + struct nlattr *attr; + int n_entries, rem, i; + struct cfg80211_nan_func_filter *filter; + + n_entries = validate_nan_filter(attr_filter); + if (n_entries < 0) + return n_entries; + + BUILD_BUG_ON(sizeof(*func->rx_filters) != sizeof(*func->tx_filters)); + + filter = kcalloc(n_entries, sizeof(*func->rx_filters), GFP_KERNEL); + if (!filter) + return -ENOMEM; + + i = 0; + nla_for_each_nested(attr, attr_filter, rem) { + filter[i].filter = kmemdup(nla_data(attr), nla_len(attr), + GFP_KERNEL); + filter[i].len = nla_len(attr); + i++; + } + if (tx) { + func->num_tx_filters = n_entries; + func->tx_filters = filter; + } else { + func->num_rx_filters = n_entries; + func->rx_filters = filter; + } + + return 0; +} + +static int nl80211_nan_add_func(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct wireless_dev *wdev = info->user_ptr[1]; + struct nlattr *tb[NUM_NL80211_NAN_FUNC_ATTR], *func_attr; + struct cfg80211_nan_func *func; + struct sk_buff *msg = NULL; + void *hdr = NULL; + int err = 0; + + if (wdev->iftype != NL80211_IFTYPE_NAN) + return -EOPNOTSUPP; + + if (!wdev->nan_started) + return -ENOTCONN; + + if (!info->attrs[NL80211_ATTR_NAN_FUNC]) + return -EINVAL; + + if (wdev->owner_nlportid && + wdev->owner_nlportid != info->snd_portid) + return -ENOTCONN; + + err = nla_parse(tb, NL80211_NAN_FUNC_ATTR_MAX, + nla_data(info->attrs[NL80211_ATTR_NAN_FUNC]), + nla_len(info->attrs[NL80211_ATTR_NAN_FUNC]), + nl80211_nan_func_policy); + if (err) + return err; + + func = kzalloc(sizeof(*func), GFP_KERNEL); + if (!func) + return -ENOMEM; + + func->cookie = wdev->wiphy->cookie_counter++; + + if (!tb[NL80211_NAN_FUNC_TYPE] || + nla_get_u8(tb[NL80211_NAN_FUNC_TYPE]) > NL80211_NAN_FUNC_MAX_TYPE) { + err = -EINVAL; + goto out; + } + + + func->type = nla_get_u8(tb[NL80211_NAN_FUNC_TYPE]); + + if (!tb[NL80211_NAN_FUNC_SERVICE_ID]) { + err = -EINVAL; + goto out; + } + + memcpy(func->service_id, nla_data(tb[NL80211_NAN_FUNC_SERVICE_ID]), + sizeof(func->service_id)); + + func->close_range = + nla_get_flag(tb[NL80211_NAN_FUNC_CLOSE_RANGE]); + + if (tb[NL80211_NAN_FUNC_SERVICE_INFO]) { + func->serv_spec_info_len = + nla_len(tb[NL80211_NAN_FUNC_SERVICE_INFO]); + func->serv_spec_info = + kmemdup(nla_data(tb[NL80211_NAN_FUNC_SERVICE_INFO]), + func->serv_spec_info_len, + GFP_KERNEL); + if (!func->serv_spec_info) { + err = -ENOMEM; + goto out; + } + } + + if (tb[NL80211_NAN_FUNC_TTL]) + func->ttl = nla_get_u32(tb[NL80211_NAN_FUNC_TTL]); + + switch (func->type) { + case NL80211_NAN_FUNC_PUBLISH: + if (!tb[NL80211_NAN_FUNC_PUBLISH_TYPE]) { + err = -EINVAL; + goto out; + } + + func->publish_type = + nla_get_u8(tb[NL80211_NAN_FUNC_PUBLISH_TYPE]); + func->publish_bcast = + nla_get_flag(tb[NL80211_NAN_FUNC_PUBLISH_BCAST]); + + if ((!(func->publish_type & NL80211_NAN_SOLICITED_PUBLISH)) && + func->publish_bcast) { + err = -EINVAL; + goto out; + } + break; + case NL80211_NAN_FUNC_SUBSCRIBE: + func->subscribe_active = + nla_get_flag(tb[NL80211_NAN_FUNC_SUBSCRIBE_ACTIVE]); + break; + case NL80211_NAN_FUNC_FOLLOW_UP: + if (!tb[NL80211_NAN_FUNC_FOLLOW_UP_ID] || + !tb[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID]) { + err = -EINVAL; + goto out; + } + + func->followup_id = + nla_get_u8(tb[NL80211_NAN_FUNC_FOLLOW_UP_ID]); + func->followup_reqid = + nla_get_u8(tb[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID]); + memcpy(func->followup_dest.addr, + nla_data(tb[NL80211_NAN_FUNC_FOLLOW_UP_DEST]), + sizeof(func->followup_dest.addr)); + if (func->ttl) { + err = -EINVAL; + goto out; + } + break; + default: + err = -EINVAL; + goto out; + } + + if (tb[NL80211_NAN_FUNC_SRF]) { + struct nlattr *srf_tb[NUM_NL80211_NAN_SRF_ATTR]; + + err = nla_parse(srf_tb, NL80211_NAN_SRF_ATTR_MAX, + nla_data(tb[NL80211_NAN_FUNC_SRF]), + nla_len(tb[NL80211_NAN_FUNC_SRF]), NULL); + if (err) + goto out; + + func->srf_include = + nla_get_flag(srf_tb[NL80211_NAN_SRF_INCLUDE]); + + if (srf_tb[NL80211_NAN_SRF_BF]) { + if (srf_tb[NL80211_NAN_SRF_MAC_ADDRS] || + !srf_tb[NL80211_NAN_SRF_BF_IDX]) { + err = -EINVAL; + goto out; + } + + func->srf_bf_len = + nla_len(srf_tb[NL80211_NAN_SRF_BF]); + func->srf_bf = + kmemdup(nla_data(srf_tb[NL80211_NAN_SRF_BF]), + func->srf_bf_len, GFP_KERNEL); + if (!func->srf_bf) { + err = -ENOMEM; + goto out; + } + + func->srf_bf_idx = + nla_get_u8(srf_tb[NL80211_NAN_SRF_BF_IDX]); + } else { + struct nlattr *attr, *mac_attr = + srf_tb[NL80211_NAN_SRF_MAC_ADDRS]; + int n_entries, rem, i = 0; + + if (!mac_attr) { + err = -EINVAL; + goto out; + } + + n_entries = validate_acl_mac_addrs(mac_attr); + if (n_entries <= 0) { + err = -EINVAL; + goto out; + } + + func->srf_num_macs = n_entries; + func->srf_macs = + kzalloc(sizeof(*func->srf_macs) * n_entries, + GFP_KERNEL); + if (!func->srf_macs) { + err = -ENOMEM; + goto out; + } + + nla_for_each_nested(attr, mac_attr, rem) + memcpy(func->srf_macs[i++].addr, nla_data(attr), + sizeof(*func->srf_macs)); + } + } + + if (tb[NL80211_NAN_FUNC_TX_MATCH_FILTER]) { + err = handle_nan_filter(tb[NL80211_NAN_FUNC_TX_MATCH_FILTER], + func, true); + if (err) + goto out; + } + + if (tb[NL80211_NAN_FUNC_RX_MATCH_FILTER]) { + err = handle_nan_filter(tb[NL80211_NAN_FUNC_RX_MATCH_FILTER], + func, false); + if (err) + goto out; + } + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) { + err = -ENOMEM; + goto out; + } + + hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, + NL80211_CMD_ADD_NAN_FUNCTION); + /* This can't really happen - we just allocated 4KB */ + if (WARN_ON(!hdr)) { + err = -ENOMEM; + goto out; + } + + err = rdev_add_nan_func(rdev, wdev, func); +out: + if (err < 0) { + cfg80211_free_nan_func(func); + nlmsg_free(msg); + return err; + } + + /* propagate the instance id and cookie to userspace */ + if (nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, func->cookie, + NL80211_ATTR_PAD)) + goto nla_put_failure; + + func_attr = nla_nest_start(msg, NL80211_ATTR_NAN_FUNC); + if (!func_attr) + goto nla_put_failure; + + if (nla_put_u8(msg, NL80211_NAN_FUNC_INSTANCE_ID, + func->instance_id)) + goto nla_put_failure; + + nla_nest_end(msg, func_attr); + + genlmsg_end(msg, hdr); + return genlmsg_reply(msg, info); + +nla_put_failure: + nlmsg_free(msg); + return -ENOBUFS; +} + +static int nl80211_nan_del_func(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct wireless_dev *wdev = info->user_ptr[1]; + u64 cookie; + + if (wdev->iftype != NL80211_IFTYPE_NAN) + return -EOPNOTSUPP; + + if (!wdev->nan_started) + return -ENOTCONN; + + if (!info->attrs[NL80211_ATTR_COOKIE]) + return -EINVAL; + + if (wdev->owner_nlportid && + wdev->owner_nlportid != info->snd_portid) + return -ENOTCONN; + + cookie = nla_get_u64(info->attrs[NL80211_ATTR_COOKIE]); + + rdev_del_nan_func(rdev, wdev, cookie); + + return 0; +} + static int nl80211_get_protocol_features(struct sk_buff *skb, struct genl_info *info) { @@ -11923,6 +12276,22 @@ static const struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_ADD_NAN_FUNCTION, + .doit = nl80211_nan_add_func, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_WDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, + { + .cmd = NL80211_CMD_DEL_NAN_FUNCTION, + .doit = nl80211_nan_del_func, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_WDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, { .cmd = NL80211_CMD_SET_MCAST_RATE, .doit = nl80211_set_mcast_rate, diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index afb68a8428b9..98c4c3bdcb11 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -907,6 +907,27 @@ static inline void rdev_stop_nan(struct cfg80211_registered_device *rdev, trace_rdev_return_void(&rdev->wiphy); } +static inline int +rdev_add_nan_func(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, + struct cfg80211_nan_func *nan_func) +{ + int ret; + + trace_rdev_add_nan_func(&rdev->wiphy, wdev, nan_func); + ret = rdev->ops->add_nan_func(&rdev->wiphy, wdev, nan_func); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} + +static inline void rdev_del_nan_func(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, u64 cookie) +{ + trace_rdev_del_nan_func(&rdev->wiphy, wdev, cookie); + rdev->ops->del_nan_func(&rdev->wiphy, wdev, cookie); + trace_rdev_return_void(&rdev->wiphy); +} + static inline int rdev_set_mac_acl(struct cfg80211_registered_device *rdev, struct net_device *dev, struct cfg80211_acl_data *params) diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 5f3370f4c6a2..56089843d619 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1916,6 +1916,45 @@ DEFINE_EVENT(wiphy_wdev_evt, rdev_stop_nan, TP_ARGS(wiphy, wdev) ); +TRACE_EVENT(rdev_add_nan_func, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, + const struct cfg80211_nan_func *func), + TP_ARGS(wiphy, wdev, func), + TP_STRUCT__entry( + WIPHY_ENTRY + WDEV_ENTRY + __field(u8, func_type) + __field(u64, cookie) + ), + TP_fast_assign( + WIPHY_ASSIGN; + WDEV_ASSIGN; + __entry->func_type = func->type; + __entry->cookie = func->cookie + ), + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", type=%u, cookie=%llu", + WIPHY_PR_ARG, WDEV_PR_ARG, __entry->func_type, + __entry->cookie) +); + +TRACE_EVENT(rdev_del_nan_func, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, + u64 cookie), + TP_ARGS(wiphy, wdev, cookie), + TP_STRUCT__entry( + WIPHY_ENTRY + WDEV_ENTRY + __field(u64, cookie) + ), + TP_fast_assign( + WIPHY_ASSIGN; + WDEV_ASSIGN; + __entry->cookie = cookie; + ), + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", cookie=%llu", + WIPHY_PR_ARG, WDEV_PR_ARG, __entry->cookie) +); + TRACE_EVENT(rdev_set_mac_acl, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct cfg80211_acl_data *params), diff --git a/net/wireless/util.c b/net/wireless/util.c index 7a2d46b0058a..8edce22d1b93 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1762,6 +1762,28 @@ int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr, } EXPORT_SYMBOL(cfg80211_get_station); +void cfg80211_free_nan_func(struct cfg80211_nan_func *f) +{ + int i; + + if (!f) + return; + + kfree(f->serv_spec_info); + kfree(f->srf_bf); + kfree(f->srf_macs); + for (i = 0; i < f->num_rx_filters; i++) + kfree(f->rx_filters[i].filter); + + for (i = 0; i < f->num_tx_filters; i++) + kfree(f->tx_filters[i].filter); + + kfree(f->rx_filters); + kfree(f->tx_filters); + kfree(f); +} +EXPORT_SYMBOL(cfg80211_free_nan_func); + /* See IEEE 802.1H for LLC/SNAP encapsulation/decapsulation */ /* Ethernet-II snap header (RFC1042 for most EtherTypes) */ const unsigned char rfc1042_header[] __aligned(2) = From a5a9dcf291e1e541243878eed2d73a74006fa1f1 Mon Sep 17 00:00:00 2001 From: Ayala Beker Date: Tue, 20 Sep 2016 17:31:16 +0300 Subject: [PATCH 1653/1715] cfg80211: allow the user space to change current NAN configuration Some NAN configuration paramaters may change during the operation of the NAN device. For example, a user may want to update master preference value when the device gets plugged/unplugged to the power. Add API that allows to do so. Signed-off-by: Andrei Otcheretianski Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 19 ++++++++++++++++ include/uapi/linux/nl80211.h | 11 ++++++++-- net/wireless/nl80211.c | 42 ++++++++++++++++++++++++++++++++++++ net/wireless/rdev-ops.h | 17 +++++++++++++++ net/wireless/trace.h | 24 +++++++++++++++++++++ 5 files changed, 111 insertions(+), 2 deletions(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 2f35ccf6da83..8574a57e19ba 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2326,6 +2326,18 @@ struct cfg80211_nan_conf { u8 dual; }; +/** + * enum cfg80211_nan_conf_changes - indicates changed fields in NAN + * configuration + * + * @CFG80211_NAN_CONF_CHANGED_PREF: master preference + * @CFG80211_NAN_CONF_CHANGED_DUAL: dual band operation + */ +enum cfg80211_nan_conf_changes { + CFG80211_NAN_CONF_CHANGED_PREF = BIT(0), + CFG80211_NAN_CONF_CHANGED_DUAL = BIT(1), +}; + /** * struct cfg80211_nan_func_filter - a NAN function Rx / Tx filter * @@ -2691,6 +2703,9 @@ struct cfg80211_nan_func { * On success the driver should assign an instance_id in the * provided @nan_func. * @del_nan_func: Delete a NAN function. + * @nan_change_conf: changes NAN configuration. The changed parameters must + * be specified in @changes (using &enum cfg80211_nan_conf_changes); + * All other parameters must be ignored. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -2963,6 +2978,10 @@ struct cfg80211_ops { struct cfg80211_nan_func *nan_func); void (*del_nan_func)(struct wiphy *wiphy, struct wireless_dev *wdev, u64 cookie); + int (*nan_change_conf)(struct wiphy *wiphy, + struct wireless_dev *wdev, + struct cfg80211_nan_conf *conf, + u32 changes); }; /* diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index e4935d963061..9c9c0c352873 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -862,6 +862,10 @@ * the response to this command. * Look at %NL80211_ATTR_SOCKET_OWNER as well. * @NL80211_CMD_DEL_NAN_FUNCTION: Delete a NAN function by cookie. + * @NL80211_CMD_CHANGE_NAN_CONFIG: Change current NAN configuration. NAN + * must be operational (%NL80211_CMD_START_NAN was executed). + * It must contain at least one of the following attributes: + * %NL80211_ATTR_NAN_MASTER_PREF, %NL80211_ATTR_NAN_DUAL. * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use @@ -1055,6 +1059,7 @@ enum nl80211_commands { NL80211_CMD_STOP_NAN, NL80211_CMD_ADD_NAN_FUNCTION, NL80211_CMD_DEL_NAN_FUNCTION, + NL80211_CMD_CHANGE_NAN_CONFIG, /* add new commands above here */ @@ -1910,12 +1915,14 @@ enum nl80211_commands { * used to pull the stored data for mesh peer in power save state. * * @NL80211_ATTR_NAN_MASTER_PREF: the master preference to be used by - * %NL80211_CMD_START_NAN. Its type is u8 and it can't be 0. + * %NL80211_CMD_START_NAN and optionally with + * %NL80211_CMD_CHANGE_NAN_CONFIG. Its type is u8 and it can't be 0. * Also, values 1 and 255 are reserved for certification purposes and * should not be used during a normal device operation. * @NL80211_ATTR_NAN_DUAL: NAN dual band operation config (see * &enum nl80211_nan_dual_band_conf). This attribute is used with - * %NL80211_CMD_START_NAN. + * %NL80211_CMD_START_NAN and optionally with + * %NL80211_CMD_CHANGE_NAN_CONFIG. * @NL80211_ATTR_NAN_FUNC: a function that can be added to NAN. See * &enum nl80211_nan_func_attributes for description of this nested * attribute. diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 0eca59ccd685..c0b5ae4af2d8 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -10919,6 +10919,40 @@ static int nl80211_nan_del_func(struct sk_buff *skb, return 0; } +static int nl80211_nan_change_config(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct wireless_dev *wdev = info->user_ptr[1]; + struct cfg80211_nan_conf conf = {}; + u32 changed = 0; + + if (wdev->iftype != NL80211_IFTYPE_NAN) + return -EOPNOTSUPP; + + if (!wdev->nan_started) + return -ENOTCONN; + + if (info->attrs[NL80211_ATTR_NAN_MASTER_PREF]) { + conf.master_pref = + nla_get_u8(info->attrs[NL80211_ATTR_NAN_MASTER_PREF]); + if (conf.master_pref <= 1 || conf.master_pref == 255) + return -EINVAL; + + changed |= CFG80211_NAN_CONF_CHANGED_PREF; + } + + if (info->attrs[NL80211_ATTR_NAN_DUAL]) { + conf.dual = nla_get_u8(info->attrs[NL80211_ATTR_NAN_DUAL]); + changed |= CFG80211_NAN_CONF_CHANGED_DUAL; + } + + if (!changed) + return -EINVAL; + + return rdev_nan_change_conf(rdev, wdev, &conf, changed); +} + static int nl80211_get_protocol_features(struct sk_buff *skb, struct genl_info *info) { @@ -12292,6 +12326,14 @@ static const struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_CHANGE_NAN_CONFIG, + .doit = nl80211_nan_change_config, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_WDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, { .cmd = NL80211_CMD_SET_MCAST_RATE, .doit = nl80211_set_mcast_rate, diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 98c4c3bdcb11..11cf83c8ad4f 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -928,6 +928,23 @@ static inline void rdev_del_nan_func(struct cfg80211_registered_device *rdev, trace_rdev_return_void(&rdev->wiphy); } +static inline int +rdev_nan_change_conf(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, + struct cfg80211_nan_conf *conf, u32 changes) +{ + int ret; + + trace_rdev_nan_change_conf(&rdev->wiphy, wdev, conf, changes); + if (rdev->ops->nan_change_conf) + ret = rdev->ops->nan_change_conf(&rdev->wiphy, wdev, conf, + changes); + else + ret = -ENOTSUPP; + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} + static inline int rdev_set_mac_acl(struct cfg80211_registered_device *rdev, struct net_device *dev, struct cfg80211_acl_data *params) diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 56089843d619..a3d0a91b1e09 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1911,6 +1911,30 @@ TRACE_EVENT(rdev_start_nan, __entry->dual) ); +TRACE_EVENT(rdev_nan_change_conf, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, + struct cfg80211_nan_conf *conf, u32 changes), + TP_ARGS(wiphy, wdev, conf, changes), + TP_STRUCT__entry( + WIPHY_ENTRY + WDEV_ENTRY + __field(u8, master_pref) + __field(u8, dual); + __field(u32, changes); + ), + TP_fast_assign( + WIPHY_ASSIGN; + WDEV_ASSIGN; + __entry->master_pref = conf->master_pref; + __entry->dual = conf->dual; + __entry->changes = changes; + ), + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT + ", master preference: %u, dual: %d, changes: %x", + WIPHY_PR_ARG, WDEV_PR_ARG, __entry->master_pref, + __entry->dual, __entry->changes) +); + DEFINE_EVENT(wiphy_wdev_evt, rdev_stop_nan, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev), TP_ARGS(wiphy, wdev) From 50bcd31d9992e99c231820f5276e70346cbfbc51 Mon Sep 17 00:00:00 2001 From: Ayala Beker Date: Tue, 20 Sep 2016 17:31:17 +0300 Subject: [PATCH 1654/1715] cfg80211: provide a function to report a match for NAN Provide a function the driver can call to report a match. This will send the event to the user space. If the NAN instance is tied to the owner, the notifications will be sent to the socket that started the NAN interface only. Signed-off-by: Andrei Otcheretianski Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 37 +++++++++++++++++ include/uapi/linux/nl80211.h | 31 ++++++++++++++ net/wireless/nl80211.c | 80 ++++++++++++++++++++++++++++++++++++ 3 files changed, 148 insertions(+) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 8574a57e19ba..5481664b5389 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5694,6 +5694,43 @@ wiphy_ext_feature_isset(struct wiphy *wiphy, */ void cfg80211_free_nan_func(struct cfg80211_nan_func *f); +/** + * struct cfg80211_nan_match_params - NAN match parameters + * @type: the type of the function that triggered a match. If it is + * %NL80211_NAN_FUNC_SUBSCRIBE it means that we replied to a subscriber. + * If it is %NL80211_NAN_FUNC_PUBLISH, it means that we got a discovery + * result. + * If it is %NL80211_NAN_FUNC_FOLLOW_UP, we received a follow up. + * @inst_id: the local instance id + * @peer_inst_id: the instance id of the peer's function + * @addr: the MAC address of the peer + * @info_len: the length of the &info + * @info: the Service Specific Info from the peer (if any) + * @cookie: unique identifier of the corresponding function + */ +struct cfg80211_nan_match_params { + enum nl80211_nan_function_type type; + u8 inst_id; + u8 peer_inst_id; + const u8 *addr; + u8 info_len; + const u8 *info; + u64 cookie; +}; + +/** + * cfg80211_nan_match - report a match for a NAN function. + * @wdev: the wireless device reporting the match + * @match: match notification parameters + * @gfp: allocation flags + * + * This function reports that the a NAN function had a match. This + * can be a subscribe that had a match or a solicited publish that + * was sent. It can also be a follow up that was received. + */ +void cfg80211_nan_match(struct wireless_dev *wdev, + struct cfg80211_nan_match_params *match, gfp_t gfp); + /* ethtool helper */ void cfg80211_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info); diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 9c9c0c352873..995bf802d604 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -48,6 +48,7 @@ #define NL80211_MULTICAST_GROUP_REG "regulatory" #define NL80211_MULTICAST_GROUP_MLME "mlme" #define NL80211_MULTICAST_GROUP_VENDOR "vendor" +#define NL80211_MULTICAST_GROUP_NAN "nan" #define NL80211_MULTICAST_GROUP_TESTMODE "testmode" /** @@ -866,6 +867,9 @@ * must be operational (%NL80211_CMD_START_NAN was executed). * It must contain at least one of the following attributes: * %NL80211_ATTR_NAN_MASTER_PREF, %NL80211_ATTR_NAN_DUAL. + * @NL80211_CMD_NAN_FUNC_MATCH: Notification sent when a match is reported. + * This will contain a %NL80211_ATTR_NAN_MATCH nested attribute and + * %NL80211_ATTR_COOKIE. * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use @@ -1060,6 +1064,7 @@ enum nl80211_commands { NL80211_CMD_ADD_NAN_FUNCTION, NL80211_CMD_DEL_NAN_FUNCTION, NL80211_CMD_CHANGE_NAN_CONFIG, + NL80211_CMD_NAN_MATCH, /* add new commands above here */ @@ -1926,6 +1931,8 @@ enum nl80211_commands { * @NL80211_ATTR_NAN_FUNC: a function that can be added to NAN. See * &enum nl80211_nan_func_attributes for description of this nested * attribute. + * @NL80211_ATTR_NAN_MATCH: used to report a match. This is a nested attribute. + * See &enum nl80211_nan_match_attributes. * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined @@ -2324,6 +2331,7 @@ enum nl80211_attrs { NL80211_ATTR_NAN_MASTER_PREF, NL80211_ATTR_NAN_DUAL, NL80211_ATTR_NAN_FUNC, + NL80211_ATTR_NAN_MATCH, /* add attributes here, update the policy in nl80211.c */ @@ -5074,4 +5082,27 @@ enum nl80211_nan_srf_attributes { NL80211_NAN_SRF_ATTR_MAX = NUM_NL80211_NAN_SRF_ATTR - 1, }; +/** + * enum nl80211_nan_match_attributes - NAN match attributes + * @__NL80211_NAN_MATCH_INVALID: invalid + * @NL80211_NAN_MATCH_FUNC_LOCAL: the local function that had the + * match. This is a nested attribute. + * See &enum nl80211_nan_func_attributes. + * @NL80211_NAN_MATCH_FUNC_PEER: the peer function + * that caused the match. This is a nested attribute. + * See &enum nl80211_nan_func_attributes. + * + * @NUM_NL80211_NAN_MATCH_ATTR: internal + * @NL80211_NAN_MATCH_ATTR_MAX: highest NAN match attribute + */ +enum nl80211_nan_match_attributes { + __NL80211_NAN_MATCH_INVALID, + NL80211_NAN_MATCH_FUNC_LOCAL, + NL80211_NAN_MATCH_FUNC_PEER, + + /* keep last */ + NUM_NL80211_NAN_MATCH_ATTR, + NL80211_NAN_MATCH_ATTR_MAX = NUM_NL80211_NAN_MATCH_ATTR - 1 +}; + #endif /* __LINUX_NL80211_H */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index c0b5ae4af2d8..0bbd9ed28318 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -56,6 +56,7 @@ enum nl80211_multicast_groups { NL80211_MCGRP_REGULATORY, NL80211_MCGRP_MLME, NL80211_MCGRP_VENDOR, + NL80211_MCGRP_NAN, NL80211_MCGRP_TESTMODE /* keep last - ifdef! */ }; @@ -65,6 +66,7 @@ static const struct genl_multicast_group nl80211_mcgrps[] = { [NL80211_MCGRP_REGULATORY] = { .name = NL80211_MULTICAST_GROUP_REG }, [NL80211_MCGRP_MLME] = { .name = NL80211_MULTICAST_GROUP_MLME }, [NL80211_MCGRP_VENDOR] = { .name = NL80211_MULTICAST_GROUP_VENDOR }, + [NL80211_MCGRP_NAN] = { .name = NL80211_MULTICAST_GROUP_NAN }, #ifdef CONFIG_NL80211_TESTMODE [NL80211_MCGRP_TESTMODE] = { .name = NL80211_MULTICAST_GROUP_TESTMODE } #endif @@ -10953,6 +10955,84 @@ static int nl80211_nan_change_config(struct sk_buff *skb, return rdev_nan_change_conf(rdev, wdev, &conf, changed); } +void cfg80211_nan_match(struct wireless_dev *wdev, + struct cfg80211_nan_match_params *match, gfp_t gfp) +{ + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + struct nlattr *match_attr, *local_func_attr, *peer_func_attr; + struct sk_buff *msg; + void *hdr; + + if (WARN_ON(!match->inst_id || !match->peer_inst_id || !match->addr)) + return; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_NAN_MATCH); + if (!hdr) { + nlmsg_free(msg); + return; + } + + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || + (wdev->netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, + wdev->netdev->ifindex)) || + nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), + NL80211_ATTR_PAD)) + goto nla_put_failure; + + if (nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, match->cookie, + NL80211_ATTR_PAD) || + nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, match->addr)) + goto nla_put_failure; + + match_attr = nla_nest_start(msg, NL80211_ATTR_NAN_MATCH); + if (!match_attr) + goto nla_put_failure; + + local_func_attr = nla_nest_start(msg, NL80211_NAN_MATCH_FUNC_LOCAL); + if (!local_func_attr) + goto nla_put_failure; + + if (nla_put_u8(msg, NL80211_NAN_FUNC_INSTANCE_ID, match->inst_id)) + goto nla_put_failure; + + nla_nest_end(msg, local_func_attr); + + peer_func_attr = nla_nest_start(msg, NL80211_NAN_MATCH_FUNC_PEER); + if (!peer_func_attr) + goto nla_put_failure; + + if (nla_put_u8(msg, NL80211_NAN_FUNC_TYPE, match->type) || + nla_put_u8(msg, NL80211_NAN_FUNC_INSTANCE_ID, match->peer_inst_id)) + goto nla_put_failure; + + if (match->info && match->info_len && + nla_put(msg, NL80211_NAN_FUNC_SERVICE_INFO, match->info_len, + match->info)) + goto nla_put_failure; + + nla_nest_end(msg, peer_func_attr); + nla_nest_end(msg, match_attr); + genlmsg_end(msg, hdr); + + if (!wdev->owner_nlportid) + genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), + msg, 0, NL80211_MCGRP_NAN, gfp); + else + genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, + wdev->owner_nlportid); + + return; + +nla_put_failure: + nlmsg_free(msg); +} +EXPORT_SYMBOL(cfg80211_nan_match); + static int nl80211_get_protocol_features(struct sk_buff *skb, struct genl_info *info) { From 368e5a7b4ecb71b3d347799cb9351b0dce5dec70 Mon Sep 17 00:00:00 2001 From: Ayala Beker Date: Tue, 20 Sep 2016 17:31:18 +0300 Subject: [PATCH 1655/1715] cfg80211: Provide an API to report NAN function termination Provide a function that reports NAN DE function termination. The function may be terminated due to one of the following reasons: user request, ttl expiration or failure. If the NAN instance is tied to the owner, the notification will be sent to the socket that started the NAN interface only Signed-off-by: Andrei Otcheretianski Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 16 ++++++++++ include/uapi/linux/nl80211.h | 18 +++++++++++ net/wireless/nl80211.c | 60 ++++++++++++++++++++++++++++++++++++ 3 files changed, 94 insertions(+) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 5481664b5389..fe78f02a242e 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5731,6 +5731,22 @@ struct cfg80211_nan_match_params { void cfg80211_nan_match(struct wireless_dev *wdev, struct cfg80211_nan_match_params *match, gfp_t gfp); +/** + * cfg80211_nan_func_terminated - notify about NAN function termination. + * + * @wdev: the wireless device reporting the match + * @inst_id: the local instance id + * @reason: termination reason (one of the NL80211_NAN_FUNC_TERM_REASON_*) + * @cookie: unique NAN function identifier + * @gfp: allocation flags + * + * This function reports that the a NAN function is terminated. + */ +void cfg80211_nan_func_terminated(struct wireless_dev *wdev, + u8 inst_id, + enum nl80211_nan_func_term_reason reason, + u64 cookie, gfp_t gfp); + /* ethtool helper */ void cfg80211_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info); diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 995bf802d604..56368e9b4622 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -863,6 +863,9 @@ * the response to this command. * Look at %NL80211_ATTR_SOCKET_OWNER as well. * @NL80211_CMD_DEL_NAN_FUNCTION: Delete a NAN function by cookie. + * This command is also used as a notification sent when a NAN function is + * terminated. This will contain a %NL80211_ATTR_NAN_FUNC_INST_ID + * and %NL80211_ATTR_COOKIE attributes. * @NL80211_CMD_CHANGE_NAN_CONFIG: Change current NAN configuration. NAN * must be operational (%NL80211_CMD_START_NAN was executed). * It must contain at least one of the following attributes: @@ -4985,6 +4988,21 @@ enum nl80211_nan_publish_type { NL80211_NAN_UNSOLICITED_PUBLISH = 1 << 1, }; +/** + * enum nl80211_nan_func_term_reason - NAN functions termination reason + * + * Defines termination reasons of a NAN function + * + * @NL80211_NAN_FUNC_TERM_REASON_USER_REQUEST: requested by user + * @NL80211_NAN_FUNC_TERM_REASON_TTL_EXPIRED: timeout + * @NL80211_NAN_FUNC_TERM_REASON_ERROR: errored + */ +enum nl80211_nan_func_term_reason { + NL80211_NAN_FUNC_TERM_REASON_USER_REQUEST, + NL80211_NAN_FUNC_TERM_REASON_TTL_EXPIRED, + NL80211_NAN_FUNC_TERM_REASON_ERROR, +}; + #define NL80211_NAN_FUNC_SERVICE_ID_LEN 6 #define NL80211_NAN_FUNC_SERVICE_SPEC_INFO_MAX_LEN 0xff #define NL80211_NAN_FUNC_SRF_MAX_LEN 0xff diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 0bbd9ed28318..92eb6f0b9f3d 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -11033,6 +11033,66 @@ nla_put_failure: } EXPORT_SYMBOL(cfg80211_nan_match); +void cfg80211_nan_func_terminated(struct wireless_dev *wdev, + u8 inst_id, + enum nl80211_nan_func_term_reason reason, + u64 cookie, gfp_t gfp) +{ + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + struct sk_buff *msg; + struct nlattr *func_attr; + void *hdr; + + if (WARN_ON(!inst_id)) + return; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_DEL_NAN_FUNCTION); + if (!hdr) { + nlmsg_free(msg); + return; + } + + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || + (wdev->netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, + wdev->netdev->ifindex)) || + nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), + NL80211_ATTR_PAD)) + goto nla_put_failure; + + if (nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, cookie, + NL80211_ATTR_PAD)) + goto nla_put_failure; + + func_attr = nla_nest_start(msg, NL80211_ATTR_NAN_FUNC); + if (!func_attr) + goto nla_put_failure; + + if (nla_put_u8(msg, NL80211_NAN_FUNC_INSTANCE_ID, inst_id) || + nla_put_u8(msg, NL80211_NAN_FUNC_TERM_REASON, reason)) + goto nla_put_failure; + + nla_nest_end(msg, func_attr); + genlmsg_end(msg, hdr); + + if (!wdev->owner_nlportid) + genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), + msg, 0, NL80211_MCGRP_NAN, gfp); + else + genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, + wdev->owner_nlportid); + + return; + +nla_put_failure: + nlmsg_free(msg); +} +EXPORT_SYMBOL(cfg80211_nan_func_terminated); + static int nl80211_get_protocol_features(struct sk_buff *skb, struct genl_info *info) { From 5953ff6d6a3e92dd4f8d9d8e8a9359d7e180ae93 Mon Sep 17 00:00:00 2001 From: Ayala Beker Date: Tue, 20 Sep 2016 17:31:19 +0300 Subject: [PATCH 1656/1715] mac80211: implement nan_change_conf Implement nan_change_conf callback which allows to change current NAN configuration (master preference and dual band operation). Store the current NAN configuration in sdata, so it can be used both to provide the driver the updated configuration with changes and also it will be used in hw reconfig flows in next patches. Signed-off-by: Andrei Otcheretianski Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/net/mac80211.h | 9 +++++++++ net/mac80211/cfg.c | 31 +++++++++++++++++++++++++++++++ net/mac80211/driver-ops.h | 21 +++++++++++++++++++++ net/mac80211/ieee80211_i.h | 10 ++++++++++ net/mac80211/trace.h | 31 +++++++++++++++++++++++++++++++ 5 files changed, 102 insertions(+) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index df9b5cff300c..ef8d02a2ce1a 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3423,6 +3423,12 @@ enum ieee80211_reconfig_type { * * @start_nan: join an existing NAN cluster, or create a new one. * @stop_nan: leave the NAN cluster. + * @nan_change_conf: change NAN configuration. The data in cfg80211_nan_conf + * contains full new configuration and changes specify which parameters + * are changed with respect to the last NAN config. + * The driver gets both full configuration and the changed parameters since + * some devices may need the full configuration while others need only the + * changed parameters. */ struct ieee80211_ops { void (*tx)(struct ieee80211_hw *hw, @@ -3664,6 +3670,9 @@ struct ieee80211_ops { struct cfg80211_nan_conf *conf); int (*stop_nan)(struct ieee80211_hw *hw, struct ieee80211_vif *vif); + int (*nan_change_conf)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct cfg80211_nan_conf *conf, u32 changes); }; /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 9aabb0932d24..38fdb539cab3 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -186,6 +186,36 @@ static void ieee80211_stop_nan(struct wiphy *wiphy, ieee80211_sdata_stop(sdata); } +static int ieee80211_nan_change_conf(struct wiphy *wiphy, + struct wireless_dev *wdev, + struct cfg80211_nan_conf *conf, + u32 changes) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + struct cfg80211_nan_conf new_conf; + int ret = 0; + + if (sdata->vif.type != NL80211_IFTYPE_NAN) + return -EOPNOTSUPP; + + if (!ieee80211_sdata_running(sdata)) + return -ENETDOWN; + + new_conf = sdata->u.nan.conf; + + if (changes & CFG80211_NAN_CONF_CHANGED_PREF) + new_conf.master_pref = conf->master_pref; + + if (changes & CFG80211_NAN_CONF_CHANGED_DUAL) + new_conf.dual = conf->dual; + + ret = drv_nan_change_conf(sdata->local, sdata, &new_conf, changes); + if (!ret) + sdata->u.nan.conf = new_conf; + + return ret; +} + static int ieee80211_set_noack_map(struct wiphy *wiphy, struct net_device *dev, u16 noack_map) @@ -3500,4 +3530,5 @@ const struct cfg80211_ops mac80211_config_ops = { .del_tx_ts = ieee80211_del_tx_ts, .start_nan = ieee80211_start_nan, .stop_nan = ieee80211_stop_nan, + .nan_change_conf = ieee80211_nan_change_conf, }; diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index e52cfb855bd9..daaa409bec6f 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -1192,4 +1192,25 @@ static inline void drv_stop_nan(struct ieee80211_local *local, trace_drv_return_void(local); } +static inline int drv_nan_change_conf(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct cfg80211_nan_conf *conf, + u32 changes) +{ + int ret; + + might_sleep(); + check_sdata_in_driver(sdata); + + if (!local->ops->nan_change_conf) + return -EOPNOTSUPP; + + trace_drv_nan_change_conf(local, sdata, conf, changes); + ret = local->ops->nan_change_conf(&local->hw, &sdata->vif, conf, + changes); + trace_drv_return_int(local, ret); + + return ret; +} + #endif /* __MAC80211_DRIVER_OPS */ diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index c71c73594790..712b20b05660 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -830,6 +830,15 @@ struct ieee80211_if_mntr { u8 mu_follow_addr[ETH_ALEN] __aligned(2); }; +/** + * struct ieee80211_if_nan - NAN state + * + * @conf: current NAN configuration + */ +struct ieee80211_if_nan { + struct cfg80211_nan_conf conf; +}; + struct ieee80211_sub_if_data { struct list_head list; @@ -929,6 +938,7 @@ struct ieee80211_sub_if_data { struct ieee80211_if_mesh mesh; struct ieee80211_if_ocb ocb; struct ieee80211_if_mntr mntr; + struct ieee80211_if_nan nan; } u; #ifdef CONFIG_MAC80211_DEBUGFS diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index deefbfb9f6fb..0bafe1159d01 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -1750,6 +1750,37 @@ TRACE_EVENT(drv_stop_nan, ) ); +TRACE_EVENT(drv_nan_change_conf, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct cfg80211_nan_conf *conf, + u32 changes), + + TP_ARGS(local, sdata, conf, changes), + TP_STRUCT__entry( + LOCAL_ENTRY + VIF_ENTRY + __field(u8, master_pref) + __field(u8, dual) + __field(u32, changes) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + VIF_ASSIGN; + __entry->master_pref = conf->master_pref; + __entry->dual = conf->dual; + __entry->changes = changes; + ), + + TP_printk( + LOCAL_PR_FMT VIF_PR_FMT + ", master preference: %u, dual: %d, changes: 0x%x", + LOCAL_PR_ARG, VIF_PR_ARG, __entry->master_pref, + __entry->dual, __entry->changes + ) +); + /* * Tracing for API calls that drivers call. */ From 167e33f4f68cc8e4e3bdaf6d43641176c51f2d79 Mon Sep 17 00:00:00 2001 From: Ayala Beker Date: Tue, 20 Sep 2016 17:31:20 +0300 Subject: [PATCH 1657/1715] mac80211: Implement add_nan_func and rm_nan_func Implement add/rm_nan_func functions and handle NAN function termination notifications. Handle instance_id allocation for NAN functions and implement the reconfig flow. Signed-off-by: Andrei Otcheretianski Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/net/mac80211.h | 31 ++++++++++ net/mac80211/cfg.c | 114 +++++++++++++++++++++++++++++++++++++ net/mac80211/driver-ops.h | 32 +++++++++++ net/mac80211/ieee80211_i.h | 7 +++ net/mac80211/iface.c | 20 ++++++- net/mac80211/main.c | 3 + net/mac80211/trace.h | 52 +++++++++++++++++ net/mac80211/util.c | 48 +++++++++++++++- 8 files changed, 304 insertions(+), 3 deletions(-) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index ef8d02a2ce1a..d4ddf476dc76 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2177,6 +2177,8 @@ enum ieee80211_hw_flags { * @n_cipher_schemes: a size of an array of cipher schemes definitions. * @cipher_schemes: a pointer to an array of cipher scheme definitions * supported by HW. + * @max_nan_de_entries: maximum number of NAN DE functions supported by the + * device. */ struct ieee80211_hw { struct ieee80211_conf conf; @@ -2211,6 +2213,7 @@ struct ieee80211_hw { u8 uapsd_max_sp_len; u8 n_cipher_schemes; const struct ieee80211_cipher_scheme *cipher_schemes; + u8 max_nan_de_entries; }; static inline bool _ieee80211_hw_check(struct ieee80211_hw *hw, @@ -3429,6 +3432,12 @@ enum ieee80211_reconfig_type { * The driver gets both full configuration and the changed parameters since * some devices may need the full configuration while others need only the * changed parameters. + * @add_nan_func: Add a NAN function. Returns 0 on success. The data in + * cfg80211_nan_func must not be referenced outside the scope of + * this call. + * @del_nan_func: Remove a NAN function. The driver must call + * ieee80211_nan_func_terminated() with + * NL80211_NAN_FUNC_TERM_REASON_USER_REQUEST reason code upon removal. */ struct ieee80211_ops { void (*tx)(struct ieee80211_hw *hw, @@ -3673,6 +3682,12 @@ struct ieee80211_ops { int (*nan_change_conf)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct cfg80211_nan_conf *conf, u32 changes); + int (*add_nan_func)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + const struct cfg80211_nan_func *nan_func); + void (*del_nan_func)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + u8 instance_id); }; /** @@ -5746,4 +5761,20 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, void ieee80211_txq_get_depth(struct ieee80211_txq *txq, unsigned long *frame_cnt, unsigned long *byte_cnt); + +/** + * ieee80211_nan_func_terminated - notify about NAN function termination. + * + * This function is used to notify mac80211 about NAN function termination. + * Note that this function can't be called from hard irq. + * + * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * @inst_id: the local instance id + * @reason: termination reason (one of the NL80211_NAN_FUNC_TERM_REASON_*) + * @gfp: allocation flags + */ +void ieee80211_nan_func_terminated(struct ieee80211_vif *vif, + u8 inst_id, + enum nl80211_nan_func_term_reason reason, + gfp_t gfp); #endif /* MAC80211_H */ diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 38fdb539cab3..72ddb4379319 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -174,6 +174,8 @@ static int ieee80211_start_nan(struct wiphy *wiphy, if (ret) ieee80211_sdata_stop(sdata); + sdata->u.nan.conf = *conf; + return ret; } @@ -216,6 +218,84 @@ static int ieee80211_nan_change_conf(struct wiphy *wiphy, return ret; } +static int ieee80211_add_nan_func(struct wiphy *wiphy, + struct wireless_dev *wdev, + struct cfg80211_nan_func *nan_func) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + int ret; + + if (sdata->vif.type != NL80211_IFTYPE_NAN) + return -EOPNOTSUPP; + + if (!ieee80211_sdata_running(sdata)) + return -ENETDOWN; + + spin_lock_bh(&sdata->u.nan.func_lock); + + ret = idr_alloc(&sdata->u.nan.function_inst_ids, + nan_func, 1, sdata->local->hw.max_nan_de_entries + 1, + GFP_ATOMIC); + spin_unlock_bh(&sdata->u.nan.func_lock); + + if (ret < 0) + return ret; + + nan_func->instance_id = ret; + + WARN_ON(nan_func->instance_id == 0); + + ret = drv_add_nan_func(sdata->local, sdata, nan_func); + if (ret) { + spin_lock_bh(&sdata->u.nan.func_lock); + idr_remove(&sdata->u.nan.function_inst_ids, + nan_func->instance_id); + spin_unlock_bh(&sdata->u.nan.func_lock); + } + + return ret; +} + +static struct cfg80211_nan_func * +ieee80211_find_nan_func_by_cookie(struct ieee80211_sub_if_data *sdata, + u64 cookie) +{ + struct cfg80211_nan_func *func; + int id; + + lockdep_assert_held(&sdata->u.nan.func_lock); + + idr_for_each_entry(&sdata->u.nan.function_inst_ids, func, id) { + if (func->cookie == cookie) + return func; + } + + return NULL; +} + +static void ieee80211_del_nan_func(struct wiphy *wiphy, + struct wireless_dev *wdev, u64 cookie) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + struct cfg80211_nan_func *func; + u8 instance_id = 0; + + if (sdata->vif.type != NL80211_IFTYPE_NAN || + !ieee80211_sdata_running(sdata)) + return; + + spin_lock_bh(&sdata->u.nan.func_lock); + + func = ieee80211_find_nan_func_by_cookie(sdata, cookie); + if (func) + instance_id = func->instance_id; + + spin_unlock_bh(&sdata->u.nan.func_lock); + + if (instance_id) + drv_del_nan_func(sdata->local, sdata, instance_id); +} + static int ieee80211_set_noack_map(struct wiphy *wiphy, struct net_device *dev, u16 noack_map) @@ -3443,6 +3523,38 @@ static int ieee80211_del_tx_ts(struct wiphy *wiphy, struct net_device *dev, return -ENOENT; } +void ieee80211_nan_func_terminated(struct ieee80211_vif *vif, + u8 inst_id, + enum nl80211_nan_func_term_reason reason, + gfp_t gfp) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct cfg80211_nan_func *func; + u64 cookie; + + if (WARN_ON(vif->type != NL80211_IFTYPE_NAN)) + return; + + spin_lock_bh(&sdata->u.nan.func_lock); + + func = idr_find(&sdata->u.nan.function_inst_ids, inst_id); + if (WARN_ON(!func)) { + spin_unlock_bh(&sdata->u.nan.func_lock); + return; + } + + cookie = func->cookie; + idr_remove(&sdata->u.nan.function_inst_ids, inst_id); + + spin_unlock_bh(&sdata->u.nan.func_lock); + + cfg80211_free_nan_func(func); + + cfg80211_nan_func_terminated(ieee80211_vif_to_wdev(vif), inst_id, + reason, cookie, gfp); +} +EXPORT_SYMBOL(ieee80211_nan_func_terminated); + const struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, @@ -3531,4 +3643,6 @@ const struct cfg80211_ops mac80211_config_ops = { .start_nan = ieee80211_start_nan, .stop_nan = ieee80211_stop_nan, .nan_change_conf = ieee80211_nan_change_conf, + .add_nan_func = ieee80211_add_nan_func, + .del_nan_func = ieee80211_del_nan_func, }; diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index daaa409bec6f..dea92c33b2ca 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -1213,4 +1213,36 @@ static inline int drv_nan_change_conf(struct ieee80211_local *local, return ret; } +static inline int drv_add_nan_func(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + const struct cfg80211_nan_func *nan_func) +{ + int ret; + + might_sleep(); + check_sdata_in_driver(sdata); + + if (!local->ops->add_nan_func) + return -EOPNOTSUPP; + + trace_drv_add_nan_func(local, sdata, nan_func); + ret = local->ops->add_nan_func(&local->hw, &sdata->vif, nan_func); + trace_drv_return_int(local, ret); + + return ret; +} + +static inline void drv_del_nan_func(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + u8 instance_id) +{ + might_sleep(); + check_sdata_in_driver(sdata); + + trace_drv_del_nan_func(local, sdata, instance_id); + if (local->ops->del_nan_func) + local->ops->del_nan_func(&local->hw, &sdata->vif, instance_id); + trace_drv_return_void(local); +} + #endif /* __MAC80211_DRIVER_OPS */ diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 712b20b05660..2b391f242e58 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -86,6 +86,8 @@ struct ieee80211_local; #define IEEE80211_DEAUTH_FRAME_LEN (24 /* hdr */ + 2 /* reason */) +#define IEEE80211_MAX_NAN_INSTANCE_ID 255 + struct ieee80211_fragment_entry { struct sk_buff_head skb_list; unsigned long first_frag_time; @@ -834,9 +836,14 @@ struct ieee80211_if_mntr { * struct ieee80211_if_nan - NAN state * * @conf: current NAN configuration + * @func_ids: a bitmap of available instance_id's */ struct ieee80211_if_nan { struct cfg80211_nan_conf conf; + + /* protects function_inst_ids */ + spinlock_t func_lock; + struct idr function_inst_ids; }; struct ieee80211_sub_if_data { diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 507f46a8eb1c..638ec0759078 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -798,6 +798,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, struct ps_data *ps; struct cfg80211_chan_def chandef; bool cancel_scan; + struct cfg80211_nan_func *func; clear_bit(SDATA_STATE_RUNNING, &sdata->state); @@ -950,11 +951,22 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, ieee80211_adjust_monitor_flags(sdata, -1); break; + case NL80211_IFTYPE_NAN: + /* clean all the functions */ + spin_lock_bh(&sdata->u.nan.func_lock); + + idr_for_each_entry(&sdata->u.nan.function_inst_ids, func, i) { + idr_remove(&sdata->u.nan.function_inst_ids, i); + cfg80211_free_nan_func(func); + } + idr_destroy(&sdata->u.nan.function_inst_ids); + + spin_unlock_bh(&sdata->u.nan.func_lock); + break; case NL80211_IFTYPE_P2P_DEVICE: /* relies on synchronize_rcu() below */ RCU_INIT_POINTER(local->p2p_sdata, NULL); /* fall through */ - case NL80211_IFTYPE_NAN: default: cancel_work_sync(&sdata->work); /* @@ -1462,9 +1474,13 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, case NL80211_IFTYPE_WDS: sdata->vif.bss_conf.bssid = NULL; break; + case NL80211_IFTYPE_NAN: + idr_init(&sdata->u.nan.function_inst_ids); + spin_lock_init(&sdata->u.nan.func_lock); + sdata->vif.bss_conf.bssid = sdata->vif.addr; + break; case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_P2P_DEVICE: - case NL80211_IFTYPE_NAN: sdata->vif.bss_conf.bssid = sdata->vif.addr; break; case NL80211_IFTYPE_UNSPECIFIED: diff --git a/net/mac80211/main.c b/net/mac80211/main.c index b5cf2c5cc166..1075ac24c8c5 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -1063,6 +1063,9 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) local->dynamic_ps_forced_timeout = -1; + if (!local->hw.max_nan_de_entries) + local->hw.max_nan_de_entries = IEEE80211_MAX_NAN_INSTANCE_ID; + result = ieee80211_wep_init(local); if (result < 0) wiphy_debug(local->hw.wiphy, "Failed to initialize wep: %d\n", diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 0bafe1159d01..37891fa67e9a 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -1781,6 +1781,58 @@ TRACE_EVENT(drv_nan_change_conf, ) ); +TRACE_EVENT(drv_add_nan_func, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + const struct cfg80211_nan_func *func), + + TP_ARGS(local, sdata, func), + TP_STRUCT__entry( + LOCAL_ENTRY + VIF_ENTRY + __field(u8, type) + __field(u8, inst_id) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + VIF_ASSIGN; + __entry->type = func->type; + __entry->inst_id = func->instance_id; + ), + + TP_printk( + LOCAL_PR_FMT VIF_PR_FMT + ", type: %u, inst_id: %u", + LOCAL_PR_ARG, VIF_PR_ARG, __entry->type, __entry->inst_id + ) +); + +TRACE_EVENT(drv_del_nan_func, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + u8 instance_id), + + TP_ARGS(local, sdata, instance_id), + TP_STRUCT__entry( + LOCAL_ENTRY + VIF_ENTRY + __field(u8, instance_id) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + VIF_ASSIGN; + __entry->instance_id = instance_id; + ), + + TP_printk( + LOCAL_PR_FMT VIF_PR_FMT + ", instance_id: %u", + LOCAL_PR_ARG, VIF_PR_ARG, __entry->instance_id + ) +); + /* * Tracing for API calls that drivers call. */ diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 5b57fcaaec9b..91754c8dafb2 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1749,6 +1749,46 @@ static void ieee80211_reconfig_stations(struct ieee80211_sub_if_data *sdata) mutex_unlock(&local->sta_mtx); } +static int ieee80211_reconfig_nan(struct ieee80211_sub_if_data *sdata) +{ + struct cfg80211_nan_func *func, **funcs; + int res, id, i = 0; + + res = drv_start_nan(sdata->local, sdata, + &sdata->u.nan.conf); + if (WARN_ON(res)) + return res; + + funcs = kzalloc((sdata->local->hw.max_nan_de_entries + 1) * + sizeof(*funcs), GFP_KERNEL); + if (!funcs) + return -ENOMEM; + + /* Add all the functions: + * This is a little bit ugly. We need to call a potentially sleeping + * callback for each NAN function, so we can't hold the spinlock. + */ + spin_lock_bh(&sdata->u.nan.func_lock); + + idr_for_each_entry(&sdata->u.nan.function_inst_ids, func, id) + funcs[i++] = func; + + spin_unlock_bh(&sdata->u.nan.func_lock); + + for (i = 0; funcs[i]; i++) { + res = drv_add_nan_func(sdata->local, sdata, funcs[i]); + if (WARN_ON(res)) + ieee80211_nan_func_terminated(&sdata->vif, + funcs[i]->instance_id, + NL80211_NAN_FUNC_TERM_REASON_ERROR, + GFP_KERNEL); + } + + kfree(funcs); + + return 0; +} + int ieee80211_reconfig(struct ieee80211_local *local) { struct ieee80211_hw *hw = &local->hw; @@ -1972,11 +2012,17 @@ int ieee80211_reconfig(struct ieee80211_local *local) ieee80211_bss_info_change_notify(sdata, changed); } break; + case NL80211_IFTYPE_NAN: + res = ieee80211_reconfig_nan(sdata); + if (res < 0) { + ieee80211_handle_reconfig_failure(local); + return res; + } + break; case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_P2P_DEVICE: - case NL80211_IFTYPE_NAN: /* nothing to do */ break; case NL80211_IFTYPE_UNSPECIFIED: From 92bc43bce2849c814cece258694f167d28524fbd Mon Sep 17 00:00:00 2001 From: Ayala Beker Date: Tue, 20 Sep 2016 17:31:21 +0300 Subject: [PATCH 1658/1715] mac80211: Add API to report NAN function match Provide an API to report NAN function match. Mac80211 will lookup the corresponding cookie and report the match to cfg80211. Signed-off-by: Andrei Otcheretianski Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/net/mac80211.h | 16 ++++++++++++++++ net/mac80211/cfg.c | 25 +++++++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index d4ddf476dc76..fc589ba90a48 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -5777,4 +5777,20 @@ void ieee80211_nan_func_terminated(struct ieee80211_vif *vif, u8 inst_id, enum nl80211_nan_func_term_reason reason, gfp_t gfp); + +/** + * ieee80211_nan_func_match - notify about NAN function match event. + * + * This function is used to notify mac80211 about NAN function match. The + * cookie inside the match struct will be assigned by mac80211. + * Note that this function can't be called from hard irq. + * + * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * @match: match event information + * @gfp: allocation flags + */ +void ieee80211_nan_func_match(struct ieee80211_vif *vif, + struct cfg80211_nan_match_params *match, + gfp_t gfp); + #endif /* MAC80211_H */ diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 72ddb4379319..fd6541f3ade3 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3555,6 +3555,31 @@ void ieee80211_nan_func_terminated(struct ieee80211_vif *vif, } EXPORT_SYMBOL(ieee80211_nan_func_terminated); +void ieee80211_nan_func_match(struct ieee80211_vif *vif, + struct cfg80211_nan_match_params *match, + gfp_t gfp) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct cfg80211_nan_func *func; + + if (WARN_ON(vif->type != NL80211_IFTYPE_NAN)) + return; + + spin_lock_bh(&sdata->u.nan.func_lock); + + func = idr_find(&sdata->u.nan.function_inst_ids, match->inst_id); + if (WARN_ON(!func)) { + spin_unlock_bh(&sdata->u.nan.func_lock); + return; + } + match->cookie = func->cookie; + + spin_unlock_bh(&sdata->u.nan.func_lock); + + cfg80211_nan_match(ieee80211_vif_to_wdev(vif), match, gfp); +} +EXPORT_SYMBOL(ieee80211_nan_func_match); + const struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, From 097b065b5cbfa3fd57b47f3c86d6baa96c30bf31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Fri, 23 Sep 2016 21:59:09 +0200 Subject: [PATCH 1659/1715] fq.h: Port memory limit mechanism from fq_codel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The reusable fairness queueing implementation (fq.h) lacks the memory usage limit that the fq_codel qdisc has. This means that small devices (e.g. WiFi routers) can run out of memory when flooded with a large number of packets. This ports the memory limit feature from fq_codel to fq.h. Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Johannes Berg --- include/net/fq.h | 3 +++ include/net/fq_impl.h | 7 ++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/include/net/fq.h b/include/net/fq.h index 268b49049c37..6d8521a30c5c 100644 --- a/include/net/fq.h +++ b/include/net/fq.h @@ -72,9 +72,12 @@ struct fq { u32 flows_cnt; u32 perturbation; u32 limit; + u32 memory_limit; + u32 memory_usage; u32 quantum; u32 backlog; u32 overlimit; + u32 overmemory; u32 collisions; }; diff --git a/include/net/fq_impl.h b/include/net/fq_impl.h index 163f3ed0f05a..4e6131cd3f43 100644 --- a/include/net/fq_impl.h +++ b/include/net/fq_impl.h @@ -29,6 +29,7 @@ static struct sk_buff *fq_flow_dequeue(struct fq *fq, tin->backlog_packets--; flow->backlog -= skb->len; fq->backlog--; + fq->memory_usage -= skb->truesize; if (flow->backlog == 0) { list_del_init(&flow->backlogchain); @@ -154,6 +155,7 @@ static void fq_tin_enqueue(struct fq *fq, flow->backlog += skb->len; tin->backlog_bytes += skb->len; tin->backlog_packets++; + fq->memory_usage += skb->truesize; fq->backlog++; fq_recalc_backlog(fq, tin, flow); @@ -166,7 +168,7 @@ static void fq_tin_enqueue(struct fq *fq, __skb_queue_tail(&flow->queue, skb); - if (fq->backlog > fq->limit) { + if (fq->backlog > fq->limit || fq->memory_usage > fq->memory_limit) { flow = list_first_entry_or_null(&fq->backlogs, struct fq_flow, backlogchain); @@ -181,6 +183,8 @@ static void fq_tin_enqueue(struct fq *fq, flow->tin->overlimit++; fq->overlimit++; + if (fq->memory_usage > fq->memory_limit) + fq->overmemory++; } } @@ -251,6 +255,7 @@ static int fq_init(struct fq *fq, int flows_cnt) fq->perturbation = prandom_u32(); fq->quantum = 300; fq->limit = 8192; + fq->memory_limit = 16 << 20; /* 16 MBytes */ fq->flows = kcalloc(fq->flows_cnt, sizeof(fq->flows[0]), GFP_KERNEL); if (!fq->flows) From 2a4e675d887bb3130354561a70f05127de8b9926 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Fri, 23 Sep 2016 21:59:10 +0200 Subject: [PATCH 1660/1715] mac80211: Export fq memory limit information in debugfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add memory limit, usage and overlimit counter to per-PHY 'aqm' debugfs file. Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Johannes Berg --- net/mac80211/debugfs.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 8ca62b6bb02a..f56e2f487d09 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -89,13 +89,19 @@ static ssize_t aqm_read(struct file *file, "R fq_flows_cnt %u\n" "R fq_backlog %u\n" "R fq_overlimit %u\n" + "R fq_overmemory %u\n" "R fq_collisions %u\n" + "R fq_memory_usage %u\n" + "RW fq_memory_limit %u\n" "RW fq_limit %u\n" "RW fq_quantum %u\n", fq->flows_cnt, fq->backlog, + fq->overmemory, fq->overlimit, fq->collisions, + fq->memory_usage, + fq->memory_limit, fq->limit, fq->quantum); @@ -128,6 +134,8 @@ static ssize_t aqm_write(struct file *file, if (sscanf(buf, "fq_limit %u", &local->fq.limit) == 1) return count; + else if (sscanf(buf, "fq_memory_limit %u", &local->fq.memory_limit) == 1) + return count; else if (sscanf(buf, "fq_quantum %u", &local->fq.quantum) == 1) return count; From 3ff23cd5654b9c8f4d567caa73439b4c39fbeaae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Fri, 23 Sep 2016 21:59:11 +0200 Subject: [PATCH 1661/1715] mac80211: Set lower memory limit for non-VHT devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Small devices can run out of memory from queueing too many packets. If VHT is not supported by the PHY, having more than 4 MBytes of total queue in the TXQ intermediate queues is not needed, and so we can safely limit the memory usage in these cases and avoid OOM. Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index e8c996463b11..378a7a6b6dbe 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1433,6 +1433,8 @@ int ieee80211_txq_setup_flows(struct ieee80211_local *local) struct fq *fq = &local->fq; int ret; int i; + bool supp_vht = false; + enum nl80211_band band; if (!local->ops->wake_tx_queue) return 0; @@ -1441,6 +1443,23 @@ int ieee80211_txq_setup_flows(struct ieee80211_local *local) if (ret) return ret; + /* + * If the hardware doesn't support VHT, it is safe to limit the maximum + * queue size. 4 Mbytes is 64 max-size aggregates in 802.11n. + */ + for (band = 0; band < NUM_NL80211_BANDS; band++) { + struct ieee80211_supported_band *sband; + + sband = local->hw.wiphy->bands[band]; + if (!sband) + continue; + + supp_vht = supp_vht || sband->vht_cap.vht_supported; + } + + if (!supp_vht) + fq->memory_limit = 4 << 20; /* 4 Mbytes */ + codel_params_init(&local->cparams); local->cparams.interval = MS2TIME(100); local->cparams.target = MS2TIME(20); From 354d381baf1126c45d03b5c0d87d22caf938b86b Mon Sep 17 00:00:00 2001 From: "Pedersen, Thomas" Date: Wed, 28 Sep 2016 16:56:28 -0700 Subject: [PATCH 1662/1715] mac80211: add offset_tsf driver op and use it for mesh This allows the mesh sync (and debugfs) code to make incremental TSF adjustments, avoiding any uncertainty introduced by delay in programming absolute TSF. Signed-off-by: Thomas Pedersen Signed-off-by: Johannes Berg --- include/net/mac80211.h | 8 ++++++++ net/mac80211/debugfs_netdev.c | 12 +++++++++--- net/mac80211/driver-ops.c | 15 +++++++++++++++ net/mac80211/driver-ops.h | 3 +++ net/mac80211/mesh_sync.c | 10 +++++++--- net/mac80211/trace.h | 26 ++++++++++++++++++++++++++ 6 files changed, 68 insertions(+), 6 deletions(-) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index fc589ba90a48..c9f39538ac17 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3169,6 +3169,12 @@ enum ieee80211_reconfig_type { * required function. * The callback can sleep. * + * @offset_tsf: Offset the TSF timer by the specified value in the + * firmware/hardware. Preferred to set_tsf as it avoids delay between + * calling set_tsf() and hardware getting programmed, which will show up + * as TSF delay. Is not a required function. + * The callback can sleep. + * * @reset_tsf: Reset the TSF timer and allow firmware/hardware to synchronize * with other STAs in the IBSS. This is only used in IBSS mode. This * function is optional if the firmware/hardware takes full care of @@ -3549,6 +3555,8 @@ struct ieee80211_ops { u64 (*get_tsf)(struct ieee80211_hw *hw, struct ieee80211_vif *vif); void (*set_tsf)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, u64 tsf); + void (*offset_tsf)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, + s64 offset); void (*reset_tsf)(struct ieee80211_hw *hw, struct ieee80211_vif *vif); int (*tx_last_beacon)(struct ieee80211_hw *hw); int (*ampdu_action)(struct ieee80211_hw *hw, diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 5d35c0f37bb7..bcec1240f41d 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -556,9 +556,15 @@ static ssize_t ieee80211_if_parse_tsf( ret = kstrtoull(buf, 10, &tsf); if (ret < 0) return ret; - if (tsf_is_delta) - tsf = drv_get_tsf(local, sdata) + tsf_is_delta * tsf; - if (local->ops->set_tsf) { + if (tsf_is_delta && local->ops->offset_tsf) { + drv_offset_tsf(local, sdata, tsf_is_delta * tsf); + wiphy_info(local->hw.wiphy, + "debugfs offset TSF by %018lld\n", + tsf_is_delta * tsf); + } else if (local->ops->set_tsf) { + if (tsf_is_delta) + tsf = drv_get_tsf(local, sdata) + + tsf_is_delta * tsf; drv_set_tsf(local, sdata, tsf); wiphy_info(local->hw.wiphy, "debugfs set TSF to %#018llx\n", tsf); diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index c701b6438bd9..bb886e7db47f 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -215,6 +215,21 @@ void drv_set_tsf(struct ieee80211_local *local, trace_drv_return_void(local); } +void drv_offset_tsf(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + s64 offset) +{ + might_sleep(); + + if (!check_sdata_in_driver(sdata)) + return; + + trace_drv_offset_tsf(local, sdata, offset); + if (local->ops->offset_tsf) + local->ops->offset_tsf(&local->hw, &sdata->vif, offset); + trace_drv_return_void(local); +} + void drv_reset_tsf(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index dea92c33b2ca..09f77e4a8a79 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -569,6 +569,9 @@ u64 drv_get_tsf(struct ieee80211_local *local, void drv_set_tsf(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, u64 tsf); +void drv_offset_tsf(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + s64 offset); void drv_reset_tsf(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata); diff --git a/net/mac80211/mesh_sync.c b/net/mac80211/mesh_sync.c index 64bc22ad9496..22ca43c500e4 100644 --- a/net/mac80211/mesh_sync.c +++ b/net/mac80211/mesh_sync.c @@ -70,9 +70,13 @@ void mesh_sync_adjust_tbtt(struct ieee80211_sub_if_data *sdata) } spin_unlock_bh(&ifmsh->sync_offset_lock); - tsf = drv_get_tsf(local, sdata); - if (tsf != -1ULL) - drv_set_tsf(local, sdata, tsf + tsfdelta); + if (local->ops->offset_tsf) { + drv_offset_tsf(local, sdata, tsfdelta); + } else { + tsf = drv_get_tsf(local, sdata); + if (tsf != -1ULL) + drv_set_tsf(local, sdata, tsf + tsfdelta); + } } static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 37891fa67e9a..92a47afaa989 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -984,6 +984,32 @@ TRACE_EVENT(drv_set_tsf, ) ); +TRACE_EVENT(drv_offset_tsf, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + s64 offset), + + TP_ARGS(local, sdata, offset), + + TP_STRUCT__entry( + LOCAL_ENTRY + VIF_ENTRY + __field(s64, tsf_offset) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + VIF_ASSIGN; + __entry->tsf_offset = offset; + ), + + TP_printk( + LOCAL_PR_FMT VIF_PR_FMT " tsf offset:%lld", + LOCAL_PR_ARG, VIF_PR_ARG, + (unsigned long long)__entry->tsf_offset + ) +); + DEFINE_EVENT(local_sdata_evt, drv_reset_tsf, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata), From 3a53731df7e2a6e238274d13aa8d4826f78030f9 Mon Sep 17 00:00:00 2001 From: "Pedersen, Thomas" Date: Wed, 28 Sep 2016 16:56:31 -0700 Subject: [PATCH 1663/1715] mac80211: mesh: decrease max drift The old value was 30ms, which means mesh sync will treat any value below as merely TSF drift. This isn't really reasonable (typical drift is < 10us/s) since people probably want to adjust TSF in smaller increments (for ie. beacon collision avoidance) without mesh sync fighting back. Change max drift adjustment to 0.8ms, so manual TSF adjustments can be made in 1ms increments, with some margin. Signed-off-by: Thomas Pedersen Signed-off-by: Johannes Berg --- net/mac80211/mesh_sync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/mesh_sync.c b/net/mac80211/mesh_sync.c index 22ca43c500e4..faca22cd02b5 100644 --- a/net/mac80211/mesh_sync.c +++ b/net/mac80211/mesh_sync.c @@ -28,7 +28,7 @@ * could be, for instance, in case a neighbor is restarted and its TSF counter * reset. */ -#define TOFFSET_MAXIMUM_ADJUSTMENT 30000 /* 30 ms */ +#define TOFFSET_MAXIMUM_ADJUSTMENT 800 /* 0.8 ms */ struct sync_method { u8 method; From bb42f2d13ffcd0baed7547b37d05add51fcd50e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Thu, 22 Sep 2016 19:04:20 +0200 Subject: [PATCH 1664/1715] mac80211: Move reorder-sensitive TX handlers to after TXQ dequeue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The TXQ intermediate queues can cause packet reordering when more than one flow is active to a single station. Since some of the wifi-specific packet handling (notably sequence number and encryption handling) is sensitive to re-ordering, things break if they are applied before the TXQ. This splits up the TX handlers and fast_xmit logic into two parts: An early part and a late part. The former is applied before TXQ enqueue, and the latter after dequeue. The non-TXQ path just applies both parts at once. Because fragments shouldn't be split up or reordered, the fragmentation handler is run after dequeue. Any fragments are then kept in the TXQ and on subsequent dequeues they take precedence over dequeueing from the FQ structure. This approach avoids having to scatter special cases all over the place for when TXQ is enabled, at the cost of making the fast_xmit and TX handler code slightly more complex. Signed-off-by: Toke Høiland-Jørgensen [fix a few code-style nits, make ieee80211_xmit_fast_finish void, remove a useless txq->sta check] Signed-off-by: Johannes Berg --- include/net/mac80211.h | 2 + net/mac80211/ieee80211_i.h | 9 ++ net/mac80211/rx.c | 4 +- net/mac80211/sta_info.c | 10 +- net/mac80211/tx.c | 284 +++++++++++++++++++++++++++---------- net/mac80211/util.c | 11 +- 6 files changed, 230 insertions(+), 90 deletions(-) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index c9f39538ac17..a810dfcb83c2 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -715,6 +715,7 @@ enum mac80211_tx_info_flags { * frame (PS-Poll or uAPSD). * @IEEE80211_TX_CTRL_RATE_INJECT: This frame is injected with rate information * @IEEE80211_TX_CTRL_AMSDU: This frame is an A-MSDU frame + * @IEEE80211_TX_CTRL_FAST_XMIT: This frame is going through the fast_xmit path * * These flags are used in tx_info->control.flags. */ @@ -723,6 +724,7 @@ enum mac80211_tx_control_flags { IEEE80211_TX_CTRL_PS_RESPONSE = BIT(1), IEEE80211_TX_CTRL_RATE_INJECT = BIT(2), IEEE80211_TX_CTRL_AMSDU = BIT(3), + IEEE80211_TX_CTRL_FAST_XMIT = BIT(4), }; /* diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 2b391f242e58..8f8bddd5c8d8 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -815,12 +815,14 @@ enum txq_info_flags { * @def_flow: used as a fallback flow when a packet destined to @tin hashes to * a fq_flow which is already owned by a different tin * @def_cvars: codel vars for @def_flow + * @frags: used to keep fragments created after dequeue */ struct txq_info { struct fq_tin tin; struct fq_flow def_flow; struct codel_vars def_cvars; struct codel_stats cstats; + struct sk_buff_head frags; unsigned long flags; /* keep last! */ @@ -1498,6 +1500,13 @@ static inline struct txq_info *to_txq_info(struct ieee80211_txq *txq) return container_of(txq, struct txq_info, txq); } +static inline bool txq_has_queue(struct ieee80211_txq *txq) +{ + struct txq_info *txqi = to_txq_info(txq); + + return !(skb_queue_empty(&txqi->frags) && !txqi->tin.backlog_packets); +} + static inline int ieee80211_bssid_match(const u8 *raddr, const u8 *addr) { return ether_addr_equal(raddr, addr) || diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index c9489a86e6d6..b2fe725881dc 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1323,9 +1323,7 @@ static void sta_ps_start(struct sta_info *sta) return; for (tid = 0; tid < ARRAY_SIZE(sta->sta.txq); tid++) { - struct txq_info *txqi = to_txq_info(sta->sta.txq[tid]); - - if (txqi->tin.backlog_packets) + if (txq_has_queue(sta->sta.txq[tid])) set_bit(tid, &sta->txq_buffered_tids); else clear_bit(tid, &sta->txq_buffered_tids); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 1b1b28ff4fdb..167bff078bdd 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1212,12 +1212,10 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) if (sta->sta.txq[0]) { for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) { - struct txq_info *txqi = to_txq_info(sta->sta.txq[i]); - - if (!txqi->tin.backlog_packets) + if (!txq_has_queue(sta->sta.txq[i])) continue; - drv_wake_tx_queue(local, txqi); + drv_wake_tx_queue(local, to_txq_info(sta->sta.txq[i])); } } @@ -1649,9 +1647,7 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, return; for (tid = 0; tid < ARRAY_SIZE(sta->sta.txq); tid++) { - struct txq_info *txqi = to_txq_info(sta->sta.txq[tid]); - - if (!(tids & BIT(tid)) || txqi->tin.backlog_packets) + if (!(tids & BIT(tid)) || txq_has_queue(sta->sta.txq[tid])) continue; sta_info_recalc_tim(sta); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 378a7a6b6dbe..0ea1b0d02186 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -853,8 +853,7 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx) tid = *qc & IEEE80211_QOS_CTL_TID_MASK; tx->sta->tx_stats.msdu[tid]++; - if (!tx->sta->sta.txq[0]) - hdr->seq_ctrl = ieee80211_tx_next_seq(tx->sta, tid); + hdr->seq_ctrl = ieee80211_tx_next_seq(tx->sta, tid); return TX_CONTINUE; } @@ -1404,6 +1403,7 @@ void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata, fq_flow_init(&txqi->def_flow); codel_vars_init(&txqi->def_cvars); codel_stats_init(&txqi->cstats); + __skb_queue_head_init(&txqi->frags); txqi->txq.vif = &sdata->vif; @@ -1426,6 +1426,7 @@ void ieee80211_txq_purge(struct ieee80211_local *local, struct fq_tin *tin = &txqi->tin; fq_tin_reset(fq, tin, fq_skb_free_func); + ieee80211_purge_tx_queue(&local->hw, &txqi->frags); } int ieee80211_txq_setup_flows(struct ieee80211_local *local) @@ -1495,6 +1496,47 @@ void ieee80211_txq_teardown_flows(struct ieee80211_local *local) spin_unlock_bh(&fq->lock); } +static bool ieee80211_queue_skb(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, + struct sk_buff *skb) +{ + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct fq *fq = &local->fq; + struct ieee80211_vif *vif; + struct txq_info *txqi; + struct ieee80211_sta *pubsta; + + if (!local->ops->wake_tx_queue || + sdata->vif.type == NL80211_IFTYPE_MONITOR) + return false; + + if (sta && sta->uploaded) + pubsta = &sta->sta; + else + pubsta = NULL; + + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + sdata = container_of(sdata->bss, + struct ieee80211_sub_if_data, u.ap); + + vif = &sdata->vif; + txqi = ieee80211_get_txq(local, vif, pubsta, skb); + + if (!txqi) + return false; + + info->control.vif = vif; + + spin_lock_bh(&fq->lock); + ieee80211_txq_enqueue(local, txqi, skb); + spin_unlock_bh(&fq->lock); + + drv_wake_tx_queue(local, txqi); + + return true; +} + static bool ieee80211_tx_frags(struct ieee80211_local *local, struct ieee80211_vif *vif, struct ieee80211_sta *sta, @@ -1502,9 +1544,7 @@ static bool ieee80211_tx_frags(struct ieee80211_local *local, bool txpending) { struct ieee80211_tx_control control = {}; - struct fq *fq = &local->fq; struct sk_buff *skb, *tmp; - struct txq_info *txqi; unsigned long flags; skb_queue_walk_safe(skbs, skb, tmp) { @@ -1519,21 +1559,6 @@ static bool ieee80211_tx_frags(struct ieee80211_local *local, } #endif - txqi = ieee80211_get_txq(local, vif, sta, skb); - if (txqi) { - info->control.vif = vif; - - __skb_unlink(skb, skbs); - - spin_lock_bh(&fq->lock); - ieee80211_txq_enqueue(local, txqi, skb); - spin_unlock_bh(&fq->lock); - - drv_wake_tx_queue(local, txqi); - - continue; - } - spin_lock_irqsave(&local->queue_stop_reason_lock, flags); if (local->queue_stop_reasons[q] || (!txpending && !skb_queue_empty(&local->pending[q]))) { @@ -1654,10 +1679,13 @@ static bool __ieee80211_tx(struct ieee80211_local *local, /* * Invoke TX handlers, return 0 on success and non-zero if the * frame was dropped or queued. + * + * The handlers are split into an early and late part. The latter is everything + * that can be sensitive to reordering, and will be deferred to after packets + * are dequeued from the intermediate queues (when they are enabled). */ -static int invoke_tx_handlers(struct ieee80211_tx_data *tx) +static int invoke_tx_handlers_early(struct ieee80211_tx_data *tx) { - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); ieee80211_tx_result res = TX_DROP; #define CALL_TXH(txh) \ @@ -1675,6 +1703,31 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx) if (!ieee80211_hw_check(&tx->local->hw, HAS_RATE_CONTROL)) CALL_TXH(ieee80211_tx_h_rate_ctrl); + txh_done: + if (unlikely(res == TX_DROP)) { + I802_DEBUG_INC(tx->local->tx_handlers_drop); + if (tx->skb) + ieee80211_free_txskb(&tx->local->hw, tx->skb); + else + ieee80211_purge_tx_queue(&tx->local->hw, &tx->skbs); + return -1; + } else if (unlikely(res == TX_QUEUED)) { + I802_DEBUG_INC(tx->local->tx_handlers_queued); + return -1; + } + + return 0; +} + +/* + * Late handlers can be called while the sta lock is held. Handlers that can + * cause packets to be generated will cause deadlock! + */ +static int invoke_tx_handlers_late(struct ieee80211_tx_data *tx) +{ + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); + ieee80211_tx_result res = TX_CONTINUE; + if (unlikely(info->flags & IEEE80211_TX_INTFL_RETRANSMISSION)) { __skb_queue_tail(&tx->skbs, tx->skb); tx->skb = NULL; @@ -1707,6 +1760,15 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx) return 0; } +static int invoke_tx_handlers(struct ieee80211_tx_data *tx) +{ + int r = invoke_tx_handlers_early(tx); + + if (r) + return r; + return invoke_tx_handlers_late(tx); +} + bool ieee80211_tx_prepare_skb(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct sk_buff *skb, int band, struct ieee80211_sta **sta) @@ -1781,7 +1843,13 @@ static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata, info->hw_queue = sdata->vif.hw_queue[skb_get_queue_mapping(skb)]; - if (!invoke_tx_handlers(&tx)) + if (invoke_tx_handlers_early(&tx)) + return false; + + if (ieee80211_queue_skb(local, sdata, tx.sta, tx.skb)) + return true; + + if (!invoke_tx_handlers_late(&tx)) result = __ieee80211_tx(local, &tx.skbs, led_len, tx.sta, txpending); @@ -3125,8 +3193,71 @@ out: return ret; } +/* + * Can be called while the sta lock is held. Anything that can cause packets to + * be generated will cause deadlock! + */ +static void ieee80211_xmit_fast_finish(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, u8 pn_offs, + struct ieee80211_key *key, + struct sk_buff *skb) +{ + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_hdr *hdr = (void *)skb->data; + u8 tid = IEEE80211_NUM_TIDS; + + if (key) + info->control.hw_key = &key->conf; + + ieee80211_tx_stats(skb->dev, skb->len); + + if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) { + tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK; + *ieee80211_get_qos_ctl(hdr) = tid; + hdr->seq_ctrl = ieee80211_tx_next_seq(sta, tid); + } else { + info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ; + hdr->seq_ctrl = cpu_to_le16(sdata->sequence_number); + sdata->sequence_number += 0x10; + } + + if (skb_shinfo(skb)->gso_size) + sta->tx_stats.msdu[tid] += + DIV_ROUND_UP(skb->len, skb_shinfo(skb)->gso_size); + else + sta->tx_stats.msdu[tid]++; + + info->hw_queue = sdata->vif.hw_queue[skb_get_queue_mapping(skb)]; + + /* statistics normally done by ieee80211_tx_h_stats (but that + * has to consider fragmentation, so is more complex) + */ + sta->tx_stats.bytes[skb_get_queue_mapping(skb)] += skb->len; + sta->tx_stats.packets[skb_get_queue_mapping(skb)]++; + + if (pn_offs) { + u64 pn; + u8 *crypto_hdr = skb->data + pn_offs; + + switch (key->conf.cipher) { + case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_CCMP_256: + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: + pn = atomic64_inc_return(&key->conf.tx_pn); + crypto_hdr[0] = pn; + crypto_hdr[1] = pn >> 8; + crypto_hdr[4] = pn >> 16; + crypto_hdr[5] = pn >> 24; + crypto_hdr[6] = pn >> 32; + crypto_hdr[7] = pn >> 40; + break; + } + } +} + static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, - struct net_device *dev, struct sta_info *sta, + struct sta_info *sta, struct ieee80211_fast_tx *fast_tx, struct sk_buff *skb) { @@ -3177,8 +3308,6 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, return true; } - ieee80211_tx_stats(dev, skb->len + extra_head); - if ((hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) && ieee80211_amsdu_aggregate(sdata, sta, fast_tx, skb)) return true; @@ -3207,24 +3336,7 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, info->flags = IEEE80211_TX_CTL_FIRST_FRAGMENT | IEEE80211_TX_CTL_DONTFRAG | (tid_tx ? IEEE80211_TX_CTL_AMPDU : 0); - - if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) { - *ieee80211_get_qos_ctl(hdr) = tid; - if (!sta->sta.txq[0]) - hdr->seq_ctrl = ieee80211_tx_next_seq(sta, tid); - } else { - info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ; - hdr->seq_ctrl = cpu_to_le16(sdata->sequence_number); - sdata->sequence_number += 0x10; - } - - if (skb_shinfo(skb)->gso_size) - sta->tx_stats.msdu[tid] += - DIV_ROUND_UP(skb->len, skb_shinfo(skb)->gso_size); - else - sta->tx_stats.msdu[tid]++; - - info->hw_queue = sdata->vif.hw_queue[skb_get_queue_mapping(skb)]; + info->control.flags = IEEE80211_TX_CTRL_FAST_XMIT; __skb_queue_head_init(&tx.skbs); @@ -3234,9 +3346,6 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, tx.sta = sta; tx.key = fast_tx->key; - if (fast_tx->key) - info->control.hw_key = &fast_tx->key->conf; - if (!ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) { tx.skb = skb; r = ieee80211_tx_h_rate_ctrl(&tx); @@ -3250,31 +3359,11 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, } } - /* statistics normally done by ieee80211_tx_h_stats (but that - * has to consider fragmentation, so is more complex) - */ - sta->tx_stats.bytes[skb_get_queue_mapping(skb)] += skb->len; - sta->tx_stats.packets[skb_get_queue_mapping(skb)]++; + if (ieee80211_queue_skb(local, sdata, sta, skb)) + return true; - if (fast_tx->pn_offs) { - u64 pn; - u8 *crypto_hdr = skb->data + fast_tx->pn_offs; - - switch (fast_tx->key->conf.cipher) { - case WLAN_CIPHER_SUITE_CCMP: - case WLAN_CIPHER_SUITE_CCMP_256: - case WLAN_CIPHER_SUITE_GCMP: - case WLAN_CIPHER_SUITE_GCMP_256: - pn = atomic64_inc_return(&fast_tx->key->conf.tx_pn); - crypto_hdr[0] = pn; - crypto_hdr[1] = pn >> 8; - crypto_hdr[4] = pn >> 16; - crypto_hdr[5] = pn >> 24; - crypto_hdr[6] = pn >> 32; - crypto_hdr[7] = pn >> 40; - break; - } - } + ieee80211_xmit_fast_finish(sdata, sta, fast_tx->pn_offs, + fast_tx->key, skb); if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) sdata = container_of(sdata->bss, @@ -3294,12 +3383,21 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, struct sk_buff *skb = NULL; struct fq *fq = &local->fq; struct fq_tin *tin = &txqi->tin; + struct ieee80211_tx_info *info; + struct ieee80211_tx_data tx; + ieee80211_tx_result r; spin_lock_bh(&fq->lock); if (test_bit(IEEE80211_TXQ_STOP, &txqi->flags)) goto out; + /* Make sure fragments stay together. */ + skb = __skb_dequeue(&txqi->frags); + if (skb) + goto out; + +begin: skb = fq_tin_dequeue(fq, tin, fq_tin_dequeue_func); if (!skb) goto out; @@ -3307,16 +3405,46 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, ieee80211_set_skb_vif(skb, txqi); hdr = (struct ieee80211_hdr *)skb->data; - if (txq->sta && ieee80211_is_data_qos(hdr->frame_control)) { + info = IEEE80211_SKB_CB(skb); + + memset(&tx, 0, sizeof(tx)); + __skb_queue_head_init(&tx.skbs); + tx.local = local; + tx.skb = skb; + tx.sdata = vif_to_sdata(info->control.vif); + + if (txq->sta) + tx.sta = container_of(txq->sta, struct sta_info, sta); + + /* + * The key can be removed while the packet was queued, so need to call + * this here to get the current key. + */ + r = ieee80211_tx_h_select_key(&tx); + if (r != TX_CONTINUE) { + ieee80211_free_txskb(&local->hw, skb); + goto begin; + } + + if (info->control.flags & IEEE80211_TX_CTRL_FAST_XMIT) { struct sta_info *sta = container_of(txq->sta, struct sta_info, sta); - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + u8 pn_offs = 0; - hdr->seq_ctrl = ieee80211_tx_next_seq(sta, txq->tid); - if (test_bit(IEEE80211_TXQ_AMPDU, &txqi->flags)) - info->flags |= IEEE80211_TX_CTL_AMPDU; - else - info->flags &= ~IEEE80211_TX_CTL_AMPDU; + if (tx.key && + (tx.key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) + pn_offs = ieee80211_hdrlen(hdr->frame_control); + + ieee80211_xmit_fast_finish(sta->sdata, sta, pn_offs, + tx.key, skb); + } else { + if (invoke_tx_handlers_late(&tx)) + goto begin; + + skb = __skb_dequeue(&tx.skbs); + + if (!skb_queue_empty(&tx.skbs)) + skb_queue_splice_tail(&tx.skbs, &txqi->frags); } out: @@ -3354,7 +3482,7 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb, fast_tx = rcu_dereference(sta->fast_tx); if (fast_tx && - ieee80211_xmit_fast(sdata, dev, sta, fast_tx, skb)) + ieee80211_xmit_fast(sdata, sta, fast_tx, skb)) goto out; } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 91754c8dafb2..545c79a42a77 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -3441,11 +3441,18 @@ void ieee80211_txq_get_depth(struct ieee80211_txq *txq, unsigned long *byte_cnt) { struct txq_info *txqi = to_txq_info(txq); + u32 frag_cnt = 0, frag_bytes = 0; + struct sk_buff *skb; + + skb_queue_walk(&txqi->frags, skb) { + frag_cnt++; + frag_bytes += skb->len; + } if (frame_cnt) - *frame_cnt = txqi->tin.backlog_packets; + *frame_cnt = txqi->tin.backlog_packets + frag_cnt; if (byte_cnt) - *byte_cnt = txqi->tin.backlog_bytes; + *byte_cnt = txqi->tin.backlog_bytes + frag_bytes; } EXPORT_SYMBOL(ieee80211_txq_get_depth); From 8782def204e57f6a507ff425e4944df4e010751a Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 30 Sep 2016 09:26:12 +0100 Subject: [PATCH 1665/1715] rxrpc: Switch to Congestion Avoidance mode at cwnd==ssthresh Switch to Congestion Avoidance mode at cwnd == ssthresh rather than relying on cwnd getting incremented beyond ssthresh and the window size, the mode being shifted and then cwnd being corrected. We need to make sure we switch into CA mode so that we stop marking every packet for ACK. Signed-off-by: David Howells --- net/rxrpc/input.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 1461d30583c9..21746f0f7ae0 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -57,7 +57,7 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, call->cong_ssthresh = max_t(unsigned int, summary->flight_size / 2, 2); cwnd = 1; - if (cwnd > call->cong_ssthresh && + if (cwnd >= call->cong_ssthresh && call->cong_mode == RXRPC_CALL_SLOW_START) { call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; call->cong_tstamp = skb->tstamp; @@ -82,7 +82,7 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, goto packet_loss_detected; if (summary->cumulative_acks > 0) cwnd += 1; - if (cwnd > call->cong_ssthresh) { + if (cwnd >= call->cong_ssthresh) { call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; call->cong_tstamp = skb->tstamp; } @@ -161,7 +161,7 @@ resume_normality: call->cong_dup_acks = 0; call->cong_extra = 0; call->cong_tstamp = skb->tstamp; - if (cwnd <= call->cong_ssthresh) + if (cwnd < call->cong_ssthresh) call->cong_mode = RXRPC_CALL_SLOW_START; else call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; From 0851115090a3eb9585d6a804a61e47f3d89ac2a8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 30 Sep 2016 09:33:27 +0100 Subject: [PATCH 1666/1715] rxrpc: Reduce ssthresh to peer's receive window When we receive an ACK from the peer that tells us what the peer's receive window (rwind) is, we should reduce ssthresh to rwind if rwind is smaller than ssthresh. Signed-off-by: David Howells --- net/rxrpc/input.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 21746f0f7ae0..7993473e56bb 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -658,6 +658,8 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, if (rwind > RXRPC_RXTX_BUFF_SIZE - 1) rwind = RXRPC_RXTX_BUFF_SIZE - 1; call->tx_winsize = rwind; + if (call->cong_ssthresh > rwind) + call->cong_ssthresh = rwind; mtu = min(ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU)); From 775e5b71db6aca47d49d43d08751f2e8ebad7f60 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 30 Sep 2016 13:26:03 +0100 Subject: [PATCH 1667/1715] rxrpc: The offset field in struct rxrpc_skb_priv is unnecessary The offset field in struct rxrpc_skb_priv is unnecessary as the value can always be calculated. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 1 - net/rxrpc/conn_event.c | 3 ++- net/rxrpc/input.c | 23 ++++++++++++----------- net/rxrpc/local_event.c | 3 ++- net/rxrpc/recvmsg.c | 6 ++---- net/rxrpc/rxkad.c | 9 ++++++--- 6 files changed, 24 insertions(+), 21 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 539db54697f9..fd64a2bd1072 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -144,7 +144,6 @@ struct rxrpc_skb_priv { u8 nr_jumbo; /* Number of jumbo subpackets */ }; union { - unsigned int offset; /* offset into buffer of next read */ int remain; /* amount of space remaining for next write */ u32 error; /* network error code */ }; diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 37609ce89f52..3f9d8d7ec632 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -276,7 +276,8 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, return 0; case RXRPC_PACKET_TYPE_ABORT: - if (skb_copy_bits(skb, sp->offset, &wtmp, sizeof(wtmp)) < 0) + if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), + &wtmp, sizeof(wtmp)) < 0) return -EPROTO; abort_code = ntohl(wtmp); _proto("Rx ABORT %%%u { ac=%d }", sp->hdr.serial, abort_code); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 7993473e56bb..5ba35b4a907b 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -358,7 +358,7 @@ static bool rxrpc_receiving_reply(struct rxrpc_call *call) static bool rxrpc_validate_jumbo(struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - unsigned int offset = sp->offset; + unsigned int offset = sizeof(struct rxrpc_wire_header); unsigned int len = skb->len; int nr_jumbo = 1; u8 flags = sp->hdr.flags; @@ -419,7 +419,7 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb, u16 skew) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - unsigned int offset = sp->offset; + unsigned int offset = sizeof(struct rxrpc_wire_header); unsigned int ix; rxrpc_serial_t serial = sp->hdr.serial, ack_serial = 0; rxrpc_seq_t seq = sp->hdr.seq, hard_ack; @@ -746,15 +746,16 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, } buf; rxrpc_serial_t acked_serial; rxrpc_seq_t first_soft_ack, hard_ack; - int nr_acks, offset; + int nr_acks, offset, ioffset; _enter(""); - if (skb_copy_bits(skb, sp->offset, &buf.ack, sizeof(buf.ack)) < 0) { + offset = sizeof(struct rxrpc_wire_header); + if (skb_copy_bits(skb, offset, &buf.ack, sizeof(buf.ack)) < 0) { _debug("extraction failure"); return rxrpc_proto_abort("XAK", call, 0); } - sp->offset += sizeof(buf.ack); + offset += sizeof(buf.ack); acked_serial = ntohl(buf.ack.serial); first_soft_ack = ntohl(buf.ack.firstPacket); @@ -792,9 +793,9 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, rxrpc_propose_ack_respond_to_ack); } - offset = sp->offset + nr_acks + 3; - if (skb->len >= offset + sizeof(buf.info)) { - if (skb_copy_bits(skb, offset, &buf.info, sizeof(buf.info)) < 0) + ioffset = offset + nr_acks + 3; + if (skb->len >= ioffset + sizeof(buf.info)) { + if (skb_copy_bits(skb, ioffset, &buf.info, sizeof(buf.info)) < 0) return rxrpc_proto_abort("XAI", call, 0); rxrpc_input_ackinfo(call, skb, &buf.info); } @@ -832,7 +833,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, rxrpc_rotate_tx_window(call, hard_ack, &summary); if (nr_acks > 0) { - if (skb_copy_bits(skb, sp->offset, buf.acks, nr_acks) < 0) + if (skb_copy_bits(skb, offset, buf.acks, nr_acks) < 0) return rxrpc_proto_abort("XSA", call, 0); rxrpc_input_soft_acks(call, buf.acks, first_soft_ack, nr_acks, &summary); @@ -880,7 +881,8 @@ static void rxrpc_input_abort(struct rxrpc_call *call, struct sk_buff *skb) _enter(""); if (skb->len >= 4 && - skb_copy_bits(skb, sp->offset, &wtmp, sizeof(wtmp)) >= 0) + skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), + &wtmp, sizeof(wtmp)) >= 0) abort_code = ntohl(wtmp); _proto("Rx ABORT %%%u { %x }", sp->hdr.serial, abort_code); @@ -996,7 +998,6 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb) sp->hdr.securityIndex = whdr.securityIndex; sp->hdr._rsvd = ntohs(whdr._rsvd); sp->hdr.serviceId = ntohs(whdr.serviceId); - sp->offset = sizeof(whdr); return 0; } diff --git a/net/rxrpc/local_event.c b/net/rxrpc/local_event.c index 190f68bd9e27..540d3955c1bc 100644 --- a/net/rxrpc/local_event.c +++ b/net/rxrpc/local_event.c @@ -95,7 +95,8 @@ void rxrpc_process_local_events(struct rxrpc_local *local) switch (sp->hdr.type) { case RXRPC_PACKET_TYPE_VERSION: - if (skb_copy_bits(skb, sp->offset, &v, 1) < 0) + if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), + &v, 1) < 0) return; _proto("Rx VERSION { %02x }", v); if (v == 0) diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 038ae62ddb4d..f05ea0a88076 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -261,15 +261,13 @@ static int rxrpc_locate_data(struct rxrpc_call *call, struct sk_buff *skb, u8 *_annotation, unsigned int *_offset, unsigned int *_len) { - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - unsigned int offset = *_offset; + unsigned int offset = sizeof(struct rxrpc_wire_header); unsigned int len = *_len; int ret; u8 annotation = *_annotation; /* Locate the subpacket */ - offset = sp->offset; - len = skb->len - sp->offset; + len = skb->len - offset; if ((annotation & RXRPC_RX_ANNO_JUMBO) > 0) { offset += (((annotation & RXRPC_RX_ANNO_JUMBO) - 1) * RXRPC_JUMBO_SUBPKTLEN); diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 88d080a1a3de..627abed5f999 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -771,7 +771,8 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, } abort_code = RXKADPACKETSHORT; - if (skb_copy_bits(skb, sp->offset, &challenge, sizeof(challenge)) < 0) + if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), + &challenge, sizeof(challenge)) < 0) goto protocol_error; version = ntohl(challenge.version); @@ -1028,7 +1029,8 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, _enter("{%d,%x}", conn->debug_id, key_serial(conn->server_key)); abort_code = RXKADPACKETSHORT; - if (skb_copy_bits(skb, sp->offset, &response, sizeof(response)) < 0) + if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), + &response, sizeof(response)) < 0) goto protocol_error; if (!pskb_pull(skb, sizeof(response))) BUG(); @@ -1057,7 +1059,8 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, return -ENOMEM; abort_code = RXKADPACKETSHORT; - if (skb_copy_bits(skb, sp->offset, ticket, ticket_len) < 0) + if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), + ticket, ticket_len) < 0) goto protocol_error_free; ret = rxkad_decrypt_ticket(conn, ticket, ticket_len, &session_key, From c31410ea009d10501ea90f64cdda0083c8cf0161 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 30 Sep 2016 13:42:31 +0100 Subject: [PATCH 1668/1715] rxrpc: Remove error from struct rxrpc_skb_priv as it is unused Remove error from struct rxrpc_skb_priv as it is no longer used. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 1 - 1 file changed, 1 deletion(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index fd64a2bd1072..141c1458e719 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -145,7 +145,6 @@ struct rxrpc_skb_priv { }; union { int remain; /* amount of space remaining for next write */ - u32 error; /* network error code */ }; struct rxrpc_host_header hdr; /* RxRPC packet header from this packet */ From df0adc788ae74e35ab1a79f3db878df7fdc7db55 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 26 Sep 2016 22:12:49 +0100 Subject: [PATCH 1669/1715] rxrpc: Keep the call timeouts as ktimes rather than jiffies Keep that call timeouts as ktimes rather than jiffies so that they can be expressed as functions of RTT. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 25 ++++++------ net/rxrpc/ar-internal.h | 8 ++-- net/rxrpc/call_event.c | 73 +++++++++++++++++++----------------- net/rxrpc/call_object.c | 16 +++----- net/rxrpc/input.c | 3 +- net/rxrpc/misc.c | 15 +++++--- net/rxrpc/sendmsg.c | 8 ++-- net/rxrpc/sysctl.c | 8 ++-- 8 files changed, 82 insertions(+), 74 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 67f03946ea4a..0383e5e9a0f3 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -453,17 +453,18 @@ TRACE_EVENT(rxrpc_rtt_rx, TRACE_EVENT(rxrpc_timer, TP_PROTO(struct rxrpc_call *call, enum rxrpc_timer_trace why, - unsigned long now), + ktime_t now, unsigned long now_j), - TP_ARGS(call, why, now), + TP_ARGS(call, why, now, now_j), TP_STRUCT__entry( __field(struct rxrpc_call *, call ) __field(enum rxrpc_timer_trace, why ) - __field(unsigned long, now ) - __field(unsigned long, expire_at ) - __field(unsigned long, ack_at ) - __field(unsigned long, resend_at ) + __field_struct(ktime_t, now ) + __field_struct(ktime_t, expire_at ) + __field_struct(ktime_t, ack_at ) + __field_struct(ktime_t, resend_at ) + __field(unsigned long, now_j ) __field(unsigned long, timer ) ), @@ -474,17 +475,17 @@ TRACE_EVENT(rxrpc_timer, __entry->expire_at = call->expire_at; __entry->ack_at = call->ack_at; __entry->resend_at = call->resend_at; + __entry->now_j = now_j; __entry->timer = call->timer.expires; ), - TP_printk("c=%p %s now=%lx x=%ld a=%ld r=%ld t=%ld", + TP_printk("c=%p %s x=%lld a=%lld r=%lld t=%ld", __entry->call, rxrpc_timer_traces[__entry->why], - __entry->now, - __entry->expire_at - __entry->now, - __entry->ack_at - __entry->now, - __entry->resend_at - __entry->now, - __entry->timer - __entry->now) + ktime_to_ns(ktime_sub(__entry->expire_at, __entry->now)), + ktime_to_ns(ktime_sub(__entry->ack_at, __entry->now)), + ktime_to_ns(ktime_sub(__entry->resend_at, __entry->now)), + __entry->timer - __entry->now_j) ); TRACE_EVENT(rxrpc_rx_lose, diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 141c1458e719..d38dffd78085 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -464,9 +464,9 @@ struct rxrpc_call { struct rxrpc_connection *conn; /* connection carrying call */ struct rxrpc_peer *peer; /* Peer record for remote address */ struct rxrpc_sock __rcu *socket; /* socket responsible */ - unsigned long ack_at; /* When deferred ACK needs to happen */ - unsigned long resend_at; /* When next resend needs to happen */ - unsigned long expire_at; /* When the call times out */ + ktime_t ack_at; /* When deferred ACK needs to happen */ + ktime_t resend_at; /* When next resend needs to happen */ + ktime_t expire_at; /* When the call times out */ struct timer_list timer; /* Combined event timer */ struct work_struct processor; /* Event processor */ rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */ @@ -805,7 +805,7 @@ int rxrpc_reject_call(struct rxrpc_sock *); /* * call_event.c */ -void rxrpc_set_timer(struct rxrpc_call *, enum rxrpc_timer_trace); +void rxrpc_set_timer(struct rxrpc_call *, enum rxrpc_timer_trace, ktime_t); void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool, enum rxrpc_propose_ack_trace); void rxrpc_process_call(struct work_struct *); diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 1f6c7633b964..9ff3bb3ffb41 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -24,28 +24,40 @@ /* * Set the timer */ -void rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why) +void rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why, + ktime_t now) { - unsigned long t, now = jiffies; + unsigned long t_j, now_j = jiffies; + ktime_t t; read_lock_bh(&call->state_lock); if (call->state < RXRPC_CALL_COMPLETE) { t = call->expire_at; - if (time_before_eq(t, now)) + if (!ktime_after(t, now)) goto out; - if (time_after(call->resend_at, now) && - time_before(call->resend_at, t)) + if (ktime_after(call->resend_at, now) && + ktime_before(call->resend_at, t)) t = call->resend_at; - if (time_after(call->ack_at, now) && - time_before(call->ack_at, t)) + if (ktime_after(call->ack_at, now) && + ktime_before(call->ack_at, t)) t = call->ack_at; - if (call->timer.expires != t || !timer_pending(&call->timer)) { - mod_timer(&call->timer, t); - trace_rxrpc_timer(call, why, now); + t_j = nsecs_to_jiffies(ktime_to_ns(ktime_sub(t, now))); + t_j += jiffies; + + /* We have to make sure that the calculated jiffies value falls + * at or after the nsec value, or we may loop ceaselessly + * because the timer times out, but we haven't reached the nsec + * timeout yet. + */ + t_j++; + + if (call->timer.expires != t_j || !timer_pending(&call->timer)) { + mod_timer(&call->timer, t_j); + trace_rxrpc_timer(call, why, now, now_j); } } @@ -62,7 +74,8 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, enum rxrpc_propose_ack_trace why) { enum rxrpc_propose_ack_outcome outcome = rxrpc_propose_ack_use; - unsigned long now, ack_at, expiry = rxrpc_soft_ack_delay; + unsigned int expiry = rxrpc_soft_ack_delay; + ktime_t now, ack_at; s8 prior = rxrpc_ack_priority[ack_reason]; /* Update DELAY, IDLE, REQUESTED and PING_RESPONSE ACK serial @@ -111,7 +124,6 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, break; } - now = jiffies; if (test_bit(RXRPC_CALL_EV_ACK, &call->events)) { _debug("already scheduled"); } else if (immediate || expiry == 0) { @@ -120,11 +132,11 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, background) rxrpc_queue_call(call); } else { - ack_at = now + expiry; - _debug("deferred ACK %ld < %ld", expiry, call->ack_at - now); - if (time_before(ack_at, call->ack_at)) { + now = ktime_get_real(); + ack_at = ktime_add_ms(now, expiry); + if (ktime_before(ack_at, call->ack_at)) { call->ack_at = ack_at; - rxrpc_set_timer(call, rxrpc_timer_set_for_ack); + rxrpc_set_timer(call, rxrpc_timer_set_for_ack, now); } } @@ -157,12 +169,12 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call) /* * Perform retransmission of NAK'd and unack'd packets. */ -static void rxrpc_resend(struct rxrpc_call *call) +static void rxrpc_resend(struct rxrpc_call *call, ktime_t now) { struct rxrpc_skb_priv *sp; struct sk_buff *skb; rxrpc_seq_t cursor, seq, top; - ktime_t now = ktime_get_real(), max_age, oldest, resend_at, ack_ts; + ktime_t max_age, oldest, ack_ts; int ix; u8 annotation, anno_type, retrans = 0, unacked = 0; @@ -212,14 +224,7 @@ static void rxrpc_resend(struct rxrpc_call *call) ktime_to_ns(ktime_sub(skb->tstamp, max_age))); } - resend_at = ktime_add_ms(oldest, rxrpc_resend_timeout); - call->resend_at = jiffies + - nsecs_to_jiffies(ktime_to_ns(ktime_sub(resend_at, now))) + - 1; /* We have to make sure that the calculated jiffies value - * falls at or after the nsec value, or we shall loop - * ceaselessly because the timer times out, but we haven't - * reached the nsec timeout yet. - */ + call->resend_at = ktime_add_ms(oldest, rxrpc_resend_timeout); if (unacked) rxrpc_congestion_timeout(call); @@ -229,7 +234,7 @@ static void rxrpc_resend(struct rxrpc_call *call) * retransmitting data. */ if (!retrans) { - rxrpc_set_timer(call, rxrpc_timer_set_for_resend); + rxrpc_set_timer(call, rxrpc_timer_set_for_resend, now); spin_unlock_bh(&call->lock); ack_ts = ktime_sub(now, call->acks_latest_ts); if (ktime_to_ns(ack_ts) < call->peer->rtt) @@ -301,7 +306,7 @@ void rxrpc_process_call(struct work_struct *work) { struct rxrpc_call *call = container_of(work, struct rxrpc_call, processor); - unsigned long now; + ktime_t now; rxrpc_see_call(call); @@ -320,15 +325,15 @@ recheck_state: goto out_put; } - now = jiffies; - if (time_after_eq(now, call->expire_at)) { + now = ktime_get_real(); + if (ktime_before(call->expire_at, now)) { rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, ETIME); set_bit(RXRPC_CALL_EV_ABORT, &call->events); goto recheck_state; } if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events) || - time_after_eq(now, call->ack_at)) { + ktime_before(call->ack_at, now)) { call->ack_at = call->expire_at; if (call->ackr_reason) { rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); @@ -337,12 +342,12 @@ recheck_state: } if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events) || - time_after_eq(now, call->resend_at)) { - rxrpc_resend(call); + ktime_before(call->resend_at, now)) { + rxrpc_resend(call, now); goto recheck_state; } - rxrpc_set_timer(call, rxrpc_timer_set_for_resend); + rxrpc_set_timer(call, rxrpc_timer_set_for_resend, now); /* other events may have been raised since we started checking */ if (call->events && call->state < RXRPC_CALL_COMPLETE) { diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index d4b3293b78fa..456ab752d473 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -19,11 +19,6 @@ #include #include "ar-internal.h" -/* - * Maximum lifetime of a call (in jiffies). - */ -unsigned int rxrpc_max_call_lifetime = 60 * HZ; - const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = { [RXRPC_CALL_UNINITIALISED] = "Uninit ", [RXRPC_CALL_CLIENT_AWAIT_CONN] = "ClWtConn", @@ -77,7 +72,8 @@ static void rxrpc_call_timer_expired(unsigned long _call) _enter("%d", call->debug_id); if (call->state < RXRPC_CALL_COMPLETE) { - trace_rxrpc_timer(call, rxrpc_timer_expired, jiffies); + trace_rxrpc_timer(call, rxrpc_timer_expired, + ktime_get_real(), jiffies); rxrpc_queue_call(call); } } @@ -207,14 +203,14 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx, */ static void rxrpc_start_call_timer(struct rxrpc_call *call) { - unsigned long expire_at; + ktime_t now = ktime_get_real(), expire_at; - expire_at = jiffies + rxrpc_max_call_lifetime; + expire_at = ktime_add_ms(now, rxrpc_max_call_lifetime); call->expire_at = expire_at; call->ack_at = expire_at; call->resend_at = expire_at; - call->timer.expires = expire_at + 1; - rxrpc_set_timer(call, rxrpc_timer_begin); + call->timer.expires = jiffies + LONG_MAX / 2; + rxrpc_set_timer(call, rxrpc_timer_begin, now); } /* diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 5ba35b4a907b..3ad9f75031e3 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -328,7 +328,8 @@ static bool rxrpc_receiving_reply(struct rxrpc_call *call) call->resend_at = call->expire_at; call->ack_at = call->expire_at; spin_unlock_bh(&call->lock); - rxrpc_set_timer(call, rxrpc_timer_init_for_reply); + rxrpc_set_timer(call, rxrpc_timer_init_for_reply, + ktime_get_real()); } if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 47dddacdbb91..9d1c721bc4e8 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -20,29 +20,34 @@ */ unsigned int rxrpc_max_backlog __read_mostly = 10; +/* + * Maximum lifetime of a call (in mx). + */ +unsigned int rxrpc_max_call_lifetime = 60 * 1000; + /* * How long to wait before scheduling ACK generation after seeing a - * packet with RXRPC_REQUEST_ACK set (in jiffies). + * packet with RXRPC_REQUEST_ACK set (in ms). */ unsigned int rxrpc_requested_ack_delay = 1; /* - * How long to wait before scheduling an ACK with subtype DELAY (in jiffies). + * How long to wait before scheduling an ACK with subtype DELAY (in ms). * * We use this when we've received new data packets. If those packets aren't * all consumed within this time we will send a DELAY ACK if an ACK was not * requested to let the sender know it doesn't need to resend. */ -unsigned int rxrpc_soft_ack_delay = 1 * HZ; +unsigned int rxrpc_soft_ack_delay = 1 * 1000; /* - * How long to wait before scheduling an ACK with subtype IDLE (in jiffies). + * How long to wait before scheduling an ACK with subtype IDLE (in ms). * * We use this when we've consumed some previously soft-ACK'd packets when * further packets aren't immediately received to decide when to send an IDLE * ACK let the other end know that it can free up its Tx buffer space. */ -unsigned int rxrpc_idle_ack_delay = 0.5 * HZ; +unsigned int rxrpc_idle_ack_delay = 0.5 * 1000; /* * Receive window size in packets. This indicates the maximum number of diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index d8dfdce874d8..3322543d460a 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -149,13 +149,13 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, _debug("need instant resend %d", ret); rxrpc_instant_resend(call, ix); } else { - unsigned long resend_at; + ktime_t now = ktime_get_real(), resend_at; - resend_at = jiffies + msecs_to_jiffies(rxrpc_resend_timeout); + resend_at = ktime_add_ms(now, rxrpc_resend_timeout); - if (time_before(resend_at, call->resend_at)) { + if (ktime_before(resend_at, call->resend_at)) { call->resend_at = resend_at; - rxrpc_set_timer(call, rxrpc_timer_set_for_send); + rxrpc_set_timer(call, rxrpc_timer_set_for_send, now); } } diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c index 13d1df03ebac..34c706d2f79c 100644 --- a/net/rxrpc/sysctl.c +++ b/net/rxrpc/sysctl.c @@ -35,7 +35,7 @@ static struct ctl_table rxrpc_sysctl_table[] = { .data = &rxrpc_requested_ack_delay, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec_ms_jiffies, + .proc_handler = proc_dointvec, .extra1 = (void *)&zero, }, { @@ -43,7 +43,7 @@ static struct ctl_table rxrpc_sysctl_table[] = { .data = &rxrpc_soft_ack_delay, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec_ms_jiffies, + .proc_handler = proc_dointvec, .extra1 = (void *)&one, }, { @@ -51,7 +51,7 @@ static struct ctl_table rxrpc_sysctl_table[] = { .data = &rxrpc_idle_ack_delay, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec_ms_jiffies, + .proc_handler = proc_dointvec, .extra1 = (void *)&one, }, { @@ -85,7 +85,7 @@ static struct ctl_table rxrpc_sysctl_table[] = { .data = &rxrpc_max_call_lifetime, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec_jiffies, + .proc_handler = proc_dointvec, .extra1 = (void *)&one, }, From 405dea1debeb9956684de342903bba9ddd52f1cb Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 30 Sep 2016 09:13:50 +0100 Subject: [PATCH 1670/1715] rxrpc: Fix the call timer handling The call timer's concept of a call timeout (of which there are three) that is inactive is that it is the timeout has the same expiration time as the call expiration timeout (the expiration timer is never inactive). However, I'm not resetting the timeouts when they expire, leading to repeated processing of expired timeouts when other timeout events occur. Fix this by: (1) Move the timer expiry detection into rxrpc_set_timer() inside the locked section. This means that if a timeout is set that will expire immediately, we deal with it immediately. (2) If a timeout is at or before now then it has expired. When an expiry is detected, an event is raised, the timeout is automatically inactivated and the event processor is queued. (3) If a timeout is at or after the expiry timeout then it is inactive. Inactive timeouts do not contribute to the timer setting. (4) The call timer callback can now just call rxrpc_set_timer() to handle things. (5) The call processor work function now checks the event flags rather than checking the timeouts directly. Signed-off-by: David Howells --- net/rxrpc/call_event.c | 26 ++++++++++++++++++-------- net/rxrpc/call_object.c | 7 ++----- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 9ff3bb3ffb41..4f00476630b9 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -29,6 +29,7 @@ void rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why, { unsigned long t_j, now_j = jiffies; ktime_t t; + bool queue = false; read_lock_bh(&call->state_lock); @@ -37,13 +38,21 @@ void rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why, if (!ktime_after(t, now)) goto out; - if (ktime_after(call->resend_at, now) && - ktime_before(call->resend_at, t)) + if (!ktime_after(call->resend_at, now)) { + call->resend_at = call->expire_at; + if (!test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events)) + queue = true; + } else if (ktime_before(call->resend_at, t)) { t = call->resend_at; + } - if (ktime_after(call->ack_at, now) && - ktime_before(call->ack_at, t)) + if (!ktime_after(call->ack_at, now)) { + call->ack_at = call->expire_at; + if (!test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events)) + queue = true; + } else if (ktime_before(call->ack_at, t)) { t = call->ack_at; + } t_j = nsecs_to_jiffies(ktime_to_ns(ktime_sub(t, now))); t_j += jiffies; @@ -59,6 +68,9 @@ void rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why, mod_timer(&call->timer, t_j); trace_rxrpc_timer(call, why, now, now_j); } + + if (queue) + rxrpc_queue_call(call); } out: @@ -332,8 +344,7 @@ recheck_state: goto recheck_state; } - if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events) || - ktime_before(call->ack_at, now)) { + if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events)) { call->ack_at = call->expire_at; if (call->ackr_reason) { rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); @@ -341,8 +352,7 @@ recheck_state: } } - if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events) || - ktime_before(call->resend_at, now)) { + if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events)) { rxrpc_resend(call, now); goto recheck_state; } diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 456ab752d473..364b42dc3dce 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -71,11 +71,8 @@ static void rxrpc_call_timer_expired(unsigned long _call) _enter("%d", call->debug_id); - if (call->state < RXRPC_CALL_COMPLETE) { - trace_rxrpc_timer(call, rxrpc_timer_expired, - ktime_get_real(), jiffies); - rxrpc_queue_call(call); - } + if (call->state < RXRPC_CALL_COMPLETE) + rxrpc_set_timer(call, rxrpc_timer_expired, ktime_get_real()); } /* From d6169b0206db1c8c8d0e4c6b79fdf4b2fc6455f1 Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Fri, 30 Sep 2016 15:24:31 -0700 Subject: [PATCH 1671/1715] net: Use ns_capable_noaudit() when determining net sysctl permissions The capability check should not be audited since it is only being used to determine the inode permissions. A failed check does not indicate a violation of security policy but, when an LSM is enabled, a denial audit message was being generated. The denial audit message caused confusion for some application authors because root-running Go applications always triggered the denial. To prevent this confusion, the capability check in net_ctl_permissions() is switched to the noaudit variant. BugLink: https://launchpad.net/bugs/1465724 Signed-off-by: Tyler Hicks Acked-by: Serge E. Hallyn Signed-off-by: James Morris [dtor: reapplied after e79c6a4fc923 ("net: make net namespace sysctls belong to container's owner") accidentally reverted the change.] Signed-off-by: Dmitry Torokhov Signed-off-by: David S. Miller --- net/sysctl_net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sysctl_net.c b/net/sysctl_net.c index 5bc1a3d57401..e0c71bd8f7cf 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -44,7 +44,7 @@ static int net_ctl_permissions(struct ctl_table_header *head, struct net *net = container_of(head->set, struct net, sysctls); /* Allow network administrator to have same access as root. */ - if (ns_capable(net->user_ns, CAP_NET_ADMIN)) { + if (ns_capable_noaudit(net->user_ns, CAP_NET_ADMIN)) { int mode = (table->mode >> 6) & 7; return (mode << 6) | (mode << 3) | mode; } From d4ef9f72128d414ad83b27b49312faa971d77382 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Wed, 28 Sep 2016 15:05:28 -0700 Subject: [PATCH 1672/1715] netfilter: bridge: clarify bridge/netfilter message When using bridge without bridge netfilter enabled the message displayed is rather confusing and leads to belive that a deprecated feature is in use. Use IS_MODULE to be explicit that the message only affects users which use bridge netfilter as module and reword the message. Signed-off-by: Stefan Agner Acked-by: Florian Westphal Signed-off-by: David S. Miller --- net/bridge/br.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/bridge/br.c b/net/bridge/br.c index 3addc05b9a16..889e5640455f 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -227,9 +227,11 @@ static int __init br_init(void) br_fdb_test_addr_hook = br_fdb_test_addr; #endif - pr_info("bridge: automatic filtering via arp/ip/ip6tables has been " - "deprecated. Update your scripts to load br_netfilter if you " +#if IS_MODULE(CONFIG_BRIDGE_NETFILTER) + pr_info("bridge: filtering via arp/ip/ip6tables is no longer available " + "by default. Update your scripts to load br_netfilter if you " "need this.\n"); +#endif return 0; From cb9e684e89e69894cb6697a3fa1274a284d1d3bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Thu, 29 Sep 2016 00:33:43 -0700 Subject: [PATCH 1673/1715] ipv6 addrconf: remove addrconf_sysctl_hop_limit() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is an effective no-op in terms of user observable behaviour. By preventing the overwrite of non-null extra1/extra2 fields in addrconf_sysctl() we can enable the use of proc_dointvec_minmax(). This allows us to eliminate the constant min/max (1..255) trampoline function that is addrconf_sysctl_hop_limit(). This is nice because it simplifies the code, and allows future sysctls with constant min/max limits to also not require trampolines. We still can't eliminate the trampoline for mtu because it isn't actually a constant (it depends on other tunables of the device) and thus requires at-write-time logic to enforce range. Signed-off-by: Maciej Żenczykowski Acked-by: Erik Kline Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 87183983724d..cbd9343751a2 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5496,20 +5496,6 @@ int addrconf_sysctl_forward(struct ctl_table *ctl, int write, return ret; } -static -int addrconf_sysctl_hop_limit(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - struct ctl_table lctl; - int min_hl = 1, max_hl = 255; - - lctl = *ctl; - lctl.extra1 = &min_hl; - lctl.extra2 = &max_hl; - - return proc_dointvec_minmax(&lctl, write, buffer, lenp, ppos); -} - static int addrconf_sysctl_mtu(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -5743,6 +5729,9 @@ int addrconf_sysctl_ignore_routes_with_linkdown(struct ctl_table *ctl, return ret; } +static const int one = 1; +static const int two_five_five = 255; + static const struct ctl_table addrconf_sysctl[] = { { .procname = "forwarding", @@ -5756,7 +5745,9 @@ static const struct ctl_table addrconf_sysctl[] = { .data = &ipv6_devconf.hop_limit, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = addrconf_sysctl_hop_limit, + .proc_handler = proc_dointvec_minmax, + .extra1 = (void *)&one, + .extra2 = (void *)&two_five_five, }, { .procname = "mtu", @@ -6081,8 +6072,14 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name, for (i = 0; table[i].data; i++) { table[i].data += (char *)p - (char *)&ipv6_devconf; - table[i].extra1 = idev; /* embedded; no ref */ - table[i].extra2 = net; + /* If one of these is already set, then it is not safe to + * overwrite either of them: this makes proc_dointvec_minmax + * usable. + */ + if (!table[i].extra1 && !table[i].extra2) { + table[i].extra1 = idev; /* embedded; no ref */ + table[i].extra2 = net; + } } snprintf(path, sizeof(path), "net/ipv6/conf/%s", dev_name); From 8efebd6e5e93283a72d7a014d6dd8130e6601352 Mon Sep 17 00:00:00 2001 From: Baoyou Xie Date: Fri, 30 Sep 2016 15:34:25 +0800 Subject: [PATCH 1674/1715] cxgb4: mark cxgb_setup_tc() static We get 1 warning when building kernel with W=1: drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c:2715:5: warning: no previous prototype for 'cxgb_setup_tc' [-Wmissing-prototypes] In fact, this function is only used in the file in which it is declared and don't need a declaration, but can be made static. so this patch marks this function with 'static'. Signed-off-by: Baoyou Xie Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index c8a5c434ad2c..08e69d31dbaa 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -2712,8 +2712,8 @@ static int cxgb_set_tx_maxrate(struct net_device *dev, int index, u32 rate) return err; } -int cxgb_setup_tc(struct net_device *dev, u32 handle, __be16 proto, - struct tc_to_netdev *tc) +static int cxgb_setup_tc(struct net_device *dev, u32 handle, __be16 proto, + struct tc_to_netdev *tc) { struct port_info *pi = netdev2pinfo(dev); struct adapter *adap = netdev2adap(dev); From 3a82e78c131a8199d38cf653b523c8fa2909df65 Mon Sep 17 00:00:00 2001 From: Baoyou Xie Date: Fri, 30 Sep 2016 15:48:50 +0800 Subject: [PATCH 1675/1715] net: ethernet: mediatek: mark symbols static where possible We get 2 warnings when building kernel with W=1: drivers/net/ethernet/mediatek/mtk_eth_soc.c:2041:5: warning: no previous prototype for 'mtk_get_link_ksettings' [-Wmissing-prototypes] drivers/net/ethernet/mediatek/mtk_eth_soc.c:2052:5: warning: no previous prototype for 'mtk_set_link_ksettings' [-Wmissing-prototypes] In fact, these functions are only used in the file in which they are declared and don't need a declaration, but can be made static. So this patch marks these functions with 'static'. Signed-off-by: Baoyou Xie Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index ddf20a00654e..ad4ab979507b 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -2038,8 +2038,8 @@ static int mtk_cleanup(struct mtk_eth *eth) return 0; } -int mtk_get_link_ksettings(struct net_device *ndev, - struct ethtool_link_ksettings *cmd) +static int mtk_get_link_ksettings(struct net_device *ndev, + struct ethtool_link_ksettings *cmd) { struct mtk_mac *mac = netdev_priv(ndev); @@ -2049,8 +2049,8 @@ int mtk_get_link_ksettings(struct net_device *ndev, return phy_ethtool_ksettings_get(ndev->phydev, cmd); } -int mtk_set_link_ksettings(struct net_device *ndev, - const struct ethtool_link_ksettings *cmd) +static int mtk_set_link_ksettings(struct net_device *ndev, + const struct ethtool_link_ksettings *cmd) { struct mtk_mac *mac = netdev_priv(ndev); From b82d44d78480faff7456e9e0999acb9d38666057 Mon Sep 17 00:00:00 2001 From: Gavin Schenk Date: Fri, 30 Sep 2016 11:46:10 +0200 Subject: [PATCH 1676/1715] net: fec: set mac address unconditionally MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the mac address origin is not dt, you can only safely assign a mac address after "link up" of the device. If the link is off the clocks are disabled and because of issues assigning registers when clocks are off the new mac address cannot be written in .ndo_set_mac_address() on some soc's. This fix sets the mac address unconditionally in fec_restart(...) and ensures consistency between fec registers and the network layer. Signed-off-by: Gavin Schenk Acked-by: Fugang Duan Acked-by: Uwe Kleine-König Fixes: 9638d19e4816 ("net: fec: add netif status check before set mac address") Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 1fa2d87c2fc9..48a033e64423 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -913,13 +913,11 @@ fec_restart(struct net_device *ndev) * enet-mac reset will reset mac address registers too, * so need to reconfigure it. */ - if (fep->quirks & FEC_QUIRK_ENET_MAC) { - memcpy(&temp_mac, ndev->dev_addr, ETH_ALEN); - writel((__force u32)cpu_to_be32(temp_mac[0]), - fep->hwp + FEC_ADDR_LOW); - writel((__force u32)cpu_to_be32(temp_mac[1]), - fep->hwp + FEC_ADDR_HIGH); - } + memcpy(&temp_mac, ndev->dev_addr, ETH_ALEN); + writel((__force u32)cpu_to_be32(temp_mac[0]), + fep->hwp + FEC_ADDR_LOW); + writel((__force u32)cpu_to_be32(temp_mac[1]), + fep->hwp + FEC_ADDR_HIGH); /* Clear any outstanding interrupt. */ writel(0xffffffff, fep->hwp + FEC_IEVENT); From 63d75463c91a5b5be7c0aca11ceb45ea5a0ae81d Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 30 Sep 2016 16:56:45 +0200 Subject: [PATCH 1677/1715] net: pktgen: fix pkt_size The commit 879c7220e828 ("net: pktgen: Observe needed_headroom of the device") increased the 'pkt_overhead' field value by LL_RESERVED_SPACE. As a side effect the generated packet size, computed as: /* Eth + IPh + UDPh + mpls */ datalen = pkt_dev->cur_pkt_size - 14 - 20 - 8 - pkt_dev->pkt_overhead; is decreased by the same value. The above changed slightly the behavior of existing pktgen users, and made the procfs interface somewhat inconsistent. Fix it by restoring the previous pkt_overhead value and using LL_RESERVED_SPACE as extralen in skb allocation. Also, change pktgen_alloc_skb() to only partially reserve the headroom to allow the caller to prefetch from ll header start. v1 -> v2: - fixed some typos in the comments Fixes: 879c7220e828 ("net: pktgen: Observe needed_headroom of the device") Suggested-by: Ben Greear Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/core/pktgen.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/net/core/pktgen.c b/net/core/pktgen.c index bbd118b19aef..5219a9e2127a 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2286,7 +2286,7 @@ out: static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev) { - pkt_dev->pkt_overhead = LL_RESERVED_SPACE(pkt_dev->odev); + pkt_dev->pkt_overhead = 0; pkt_dev->pkt_overhead += pkt_dev->nr_labels*sizeof(u32); pkt_dev->pkt_overhead += VLAN_TAG_SIZE(pkt_dev); pkt_dev->pkt_overhead += SVLAN_TAG_SIZE(pkt_dev); @@ -2777,13 +2777,13 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, } static struct sk_buff *pktgen_alloc_skb(struct net_device *dev, - struct pktgen_dev *pkt_dev, - unsigned int extralen) + struct pktgen_dev *pkt_dev) { + unsigned int extralen = LL_RESERVED_SPACE(dev); struct sk_buff *skb = NULL; - unsigned int size = pkt_dev->cur_pkt_size + 64 + extralen + - pkt_dev->pkt_overhead; + unsigned int size; + size = pkt_dev->cur_pkt_size + 64 + extralen + pkt_dev->pkt_overhead; if (pkt_dev->flags & F_NODE) { int node = pkt_dev->node >= 0 ? pkt_dev->node : numa_node_id(); @@ -2796,8 +2796,9 @@ static struct sk_buff *pktgen_alloc_skb(struct net_device *dev, skb = __netdev_alloc_skb(dev, size, GFP_NOWAIT); } + /* the caller pre-fetches from skb->data and reserves for the mac hdr */ if (likely(skb)) - skb_reserve(skb, LL_RESERVED_SPACE(dev)); + skb_reserve(skb, extralen - 16); return skb; } @@ -2830,16 +2831,14 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, mod_cur_headers(pkt_dev); queue_map = pkt_dev->cur_queue_map; - datalen = (odev->hard_header_len + 16) & ~0xf; - - skb = pktgen_alloc_skb(odev, pkt_dev, datalen); + skb = pktgen_alloc_skb(odev, pkt_dev); if (!skb) { sprintf(pkt_dev->result, "No memory"); return NULL; } prefetchw(skb->data); - skb_reserve(skb, datalen); + skb_reserve(skb, 16); /* Reserve for ethernet and IP header */ eth = (__u8 *) skb_push(skb, 14); @@ -2959,7 +2958,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, mod_cur_headers(pkt_dev); queue_map = pkt_dev->cur_queue_map; - skb = pktgen_alloc_skb(odev, pkt_dev, 16); + skb = pktgen_alloc_skb(odev, pkt_dev); if (!skb) { sprintf(pkt_dev->result, "No memory"); return NULL; From fa34cd94fb01fcb8d79d91e009451b37692e94e5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 30 Sep 2016 18:13:49 +0200 Subject: [PATCH 1678/1715] net: rtnl: avoid uninitialized data in IFLA_VF_VLAN_LIST handling With the newly added support for IFLA_VF_VLAN_LIST netlink messages, we get a warning about potential uninitialized variable use in the parsing of the user input when enabling the -Wmaybe-uninitialized warning: net/core/rtnetlink.c: In function 'do_setvfinfo': net/core/rtnetlink.c:1756:9: error: 'ivvl$' may be used uninitialized in this function [-Werror=maybe-uninitialized] I have not been able to prove whether it is possible to arrive in this code with an empty IFLA_VF_VLAN_LIST block, but if we do, then ndo_set_vf_vlan gets called with uninitialized arguments. This adds an explicit check for an empty list, making it obvious to the reader and the compiler that this cannot happen. Fixes: 79aab093a0b5 ("net: Update API for VF vlan protocol 802.1ad support") Signed-off-by: Arnd Bergmann Reviewed-by: Moshe Shemesh Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 3ac8946bf244..b06d2f46b83e 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1753,6 +1753,9 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) len++; } + if (len == 0) + return -EINVAL; + err = ops->ndo_set_vf_vlan(dev, ivvl[0]->vf, ivvl[0]->vlan, ivvl[0]->qos, ivvl[0]->vlan_proto); if (err < 0) From 7c70c4f8b2bf5ed777120f3d70efe35e64930c10 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 30 Sep 2016 18:15:33 +0200 Subject: [PATCH 1679/1715] cxgb4: unexport cxgb4_dcb_enabled A recent cleanup marked cxgb4_dcb_enabled as 'static', which is correct, but this ignored how the symbol is also exported. In addition, the export can be compiled out when modules are disabled, causing a harmless compiler warning in configurations for which it is not used at all: drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c:282:12: error: 'cxgb4_dcb_enabled' defined but not used [-Werror=unused-function] This removes the export and moves the function into the correct #ifdef so we only build it when there are users. Fixes: 50935857f878 ("cxgb4: mark symbols static where possible") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 08e69d31dbaa..cf147ca419a8 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -277,11 +277,9 @@ static void dcb_tx_queue_prio_enable(struct net_device *dev, int enable) txq->dcb_prio = value; } } -#endif /* CONFIG_CHELSIO_T4_DCB */ static int cxgb4_dcb_enabled(const struct net_device *dev) { -#ifdef CONFIG_CHELSIO_T4_DCB struct port_info *pi = netdev_priv(dev); if (!pi->dcb.enabled) @@ -289,11 +287,8 @@ static int cxgb4_dcb_enabled(const struct net_device *dev) return ((pi->dcb.state == CXGB4_DCB_STATE_FW_ALLSYNCED) || (pi->dcb.state == CXGB4_DCB_STATE_HOST)); -#else - return 0; -#endif } -EXPORT_SYMBOL(cxgb4_dcb_enabled); +#endif /* CONFIG_CHELSIO_T4_DCB */ void t4_os_link_changed(struct adapter *adapter, int port_id, int link_stat) { From bab02a69296682881cae280587618978c357e26e Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 25 Aug 2016 14:06:54 -0700 Subject: [PATCH 1680/1715] fm10k: use generic ethtool_op_get_ts_info callback This generic callback is for drivers which have software Tx timestamp support enabled. Without this, PTP applications requesting software timestamps may complain that the requested mode is not supported. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c index adb7cb4311ba..5241e0873397 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c @@ -1182,6 +1182,7 @@ static const struct ethtool_ops fm10k_ethtool_ops = { .set_rxfh = fm10k_set_rssh, .get_channels = fm10k_get_channels, .set_channels = fm10k_set_channels, + .get_ts_info = ethtool_op_get_ts_info, }; void fm10k_set_ethtool_ops(struct net_device *dev) From d0debb76df18f05ecc65579d37203703ffdec44d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 30 Sep 2016 18:17:09 +0200 Subject: [PATCH 1681/1715] net/mlx5e: shut up maybe-uninitialized warning Build-testing this driver with -Wmaybe-uninitialized gives a new false-positive warning that I can't really explain: drivers/net/ethernet/mellanox/mlx5/core/en_tc.c: In function 'mlx5e_configure_flower': drivers/net/ethernet/mellanox/mlx5/core/en_tc.c:509:3: error: 'old_attr' may be used uninitialized in this function [-Werror=maybe-uninitialized] It's obvious from the code that 'old_attr' is initialized whenever 'old' is non-NULL here. The warning appears with all versions I tested from gcc-4.7 through gcc-6.1, and I could not come up with a way to rewrite the function in a more readable way that avoids the warning, so I'm adding another initialization to shut it up. Fixes: 8b32580df1cb ("net/mlx5e: Add TC vlan action for SRIOV offloads") Signed-off-by: Arnd Bergmann Acked-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index a350b7171e3d..ce8c54d18906 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -451,7 +451,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, struct mlx5e_tc_flow *flow; struct mlx5_flow_spec *spec; struct mlx5_flow_rule *old = NULL; - struct mlx5_esw_flow_attr *old_attr; + struct mlx5_esw_flow_attr *old_attr = NULL; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; if (esw && esw->mode == SRIOV_OFFLOADS) From ab580705693d5af79663efa504a72248700766fc Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 30 Sep 2016 18:17:10 +0200 Subject: [PATCH 1682/1715] mlxsw: spectrum_router: avoid potential uninitialized data usage If fi->fib_nhs is zero, the router interface pointer is uninitialized, as shown by this warning: drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c: In function 'mlxsw_sp_router_fib_event': drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c:1674:21: error: 'r' may be used uninitialized in this function [-Werror=maybe-uninitialized] drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c:1643:23: note: 'r' was declared here This changes the loop so we handle the case the same way as finding no router interface pointer attached to one of the nexthops to ensure we always trap here instead of using uninitialized data. Fixes: b45f64d16d45 ("mlxsw: spectrum_router: Use FIB notifications instead of switchdev calls") Signed-off-by: Arnd Bergmann Acked-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 48d50efec5e2..78fc557d6dd7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1640,7 +1640,7 @@ mlxsw_sp_router_fib4_entry_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry) { struct fib_info *fi = fen_info->fi; - struct mlxsw_sp_rif *r; + struct mlxsw_sp_rif *r = NULL; int nhsel; int err; @@ -1664,11 +1664,15 @@ mlxsw_sp_router_fib4_entry_init(struct mlxsw_sp *mlxsw_sp, * to us. Set trap and pass the packets for * this prefix to kernel. */ - fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; - return 0; + break; } } + if (!r) { + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; + return 0; + } + if (fi->fib_scope != RT_SCOPE_UNIVERSE) { fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; fib_entry->rif = r->rif; From f7d49bce8e741e1e6aa14ce4db1b6cea7e4be4e8 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Fri, 30 Sep 2016 19:08:05 +0200 Subject: [PATCH 1683/1715] openvswitch: mpls: set network header correctly on key extract After the 48d2ab609b6b ("net: mpls: Fixups for GSO"), MPLS handling in openvswitch was changed to have network header pointing to the start of the MPLS headers and inner_network_header pointing after the MPLS headers. However, key_extract was missed by the mentioned commit, causing incorrect headers to be set when a MPLS packet just enters the bridge or after it is recirculated. Fixes: 48d2ab609b6b ("net: mpls: Fixups for GSO") Signed-off-by: Jiri Benc Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/flow.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 634cc10d6dee..c8c82e109c68 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -633,12 +633,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) } else if (eth_p_mpls(key->eth.type)) { size_t stack_len = MPLS_HLEN; - /* In the presence of an MPLS label stack the end of the L2 - * header and the beginning of the L3 header differ. - * - * Advance network_header to the beginning of the L3 - * header. mac_len corresponds to the end of the L2 header. - */ + skb_set_inner_network_header(skb, skb->mac_len); while (1) { __be32 lse; @@ -646,12 +641,12 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) if (unlikely(error)) return 0; - memcpy(&lse, skb_network_header(skb), MPLS_HLEN); + memcpy(&lse, skb_inner_network_header(skb), MPLS_HLEN); if (stack_len == MPLS_HLEN) memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN); - skb_set_network_header(skb, skb->mac_len + stack_len); + skb_set_inner_network_header(skb, skb->mac_len + stack_len); if (lse & htonl(MPLS_LS_S_MASK)) break; From 9095e10edd28e1e4a10ba5ca61fb54d9f74f8968 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Fri, 30 Sep 2016 19:08:06 +0200 Subject: [PATCH 1684/1715] mpls: move mpls_hdr to a common location This will be also used by openvswitch. Signed-off-by: Jiri Benc Acked-by: David Ahern Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/net/mpls.h | 9 +++++++++ net/mpls/internal.h | 10 +--------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/include/net/mpls.h b/include/net/mpls.h index 5b3b5addfb08..3ebbc0bb57ff 100644 --- a/include/net/mpls.h +++ b/include/net/mpls.h @@ -19,12 +19,21 @@ #define MPLS_HLEN 4 +struct mpls_shim_hdr { + __be32 label_stack_entry; +}; + static inline bool eth_p_mpls(__be16 eth_type) { return eth_type == htons(ETH_P_MPLS_UC) || eth_type == htons(ETH_P_MPLS_MC); } +static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb) +{ + return (struct mpls_shim_hdr *)skb_network_header(skb); +} + /* * For non-MPLS skbs this will correspond to the network header. * For MPLS skbs it will be before the network_header as the MPLS diff --git a/net/mpls/internal.h b/net/mpls/internal.h index 732a5c17e986..bdfef6c3271a 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h @@ -1,9 +1,6 @@ #ifndef MPLS_INTERNAL_H #define MPLS_INTERNAL_H - -struct mpls_shim_hdr { - __be32 label_stack_entry; -}; +#include struct mpls_entry_decoded { u32 label; @@ -93,11 +90,6 @@ struct mpls_route { /* next hop label forwarding entry */ #define endfor_nexthops(rt) } -static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb) -{ - return (struct mpls_shim_hdr *)skb_network_header(skb); -} - static inline struct mpls_shim_hdr mpls_entry_encode(u32 label, unsigned ttl, unsigned tc, bool bos) { struct mpls_shim_hdr result; From 85de4a2101acb85c3b1dde465e84596ccca99f2c Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Fri, 30 Sep 2016 19:08:07 +0200 Subject: [PATCH 1685/1715] openvswitch: use mpls_hdr skb_mpls_header is equivalent to mpls_hdr now. Use the existing helper instead. Signed-off-by: Jiri Benc Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/net/mpls.h | 12 ------------ net/openvswitch/actions.c | 24 ++++++++++++------------ 2 files changed, 12 insertions(+), 24 deletions(-) diff --git a/include/net/mpls.h b/include/net/mpls.h index 3ebbc0bb57ff..1dbc669b770e 100644 --- a/include/net/mpls.h +++ b/include/net/mpls.h @@ -33,16 +33,4 @@ static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb) { return (struct mpls_shim_hdr *)skb_network_header(skb); } - -/* - * For non-MPLS skbs this will correspond to the network header. - * For MPLS skbs it will be before the network_header as the MPLS - * label stack lies between the end of the mac header and the network - * header. That is, for MPLS skbs the end of the mac header - * is the top of the MPLS label stack. - */ -static inline unsigned char *skb_mpls_header(struct sk_buff *skb) -{ - return skb_mac_header(skb) + skb->mac_len; -} #endif diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 863e992dfbc0..4e03f64709bc 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -160,7 +160,7 @@ static void update_ethertype(struct sk_buff *skb, struct ethhdr *hdr, static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key, const struct ovs_action_push_mpls *mpls) { - __be32 *new_mpls_lse; + struct mpls_shim_hdr *new_mpls_lse; /* Networking stack do not allow simultaneous Tunnel and MPLS GSO. */ if (skb->encapsulation) @@ -180,8 +180,8 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key, skb_reset_mac_header(skb); skb_set_network_header(skb, skb->mac_len); - new_mpls_lse = (__be32 *)skb_mpls_header(skb); - *new_mpls_lse = mpls->mpls_lse; + new_mpls_lse = mpls_hdr(skb); + new_mpls_lse->label_stack_entry = mpls->mpls_lse; skb_postpush_rcsum(skb, new_mpls_lse, MPLS_HLEN); @@ -202,7 +202,7 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key, if (unlikely(err)) return err; - skb_postpull_rcsum(skb, skb_mpls_header(skb), MPLS_HLEN); + skb_postpull_rcsum(skb, mpls_hdr(skb), MPLS_HLEN); memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb), skb->mac_len); @@ -211,10 +211,10 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key, skb_reset_mac_header(skb); skb_set_network_header(skb, skb->mac_len); - /* skb_mpls_header() is used to locate the ethertype - * field correctly in the presence of VLAN tags. + /* mpls_hdr() is used to locate the ethertype field correctly in the + * presence of VLAN tags. */ - hdr = (struct ethhdr *)(skb_mpls_header(skb) - ETH_HLEN); + hdr = (struct ethhdr *)((void *)mpls_hdr(skb) - ETH_HLEN); update_ethertype(skb, hdr, ethertype); if (eth_p_mpls(skb->protocol)) skb->protocol = ethertype; @@ -226,7 +226,7 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key, static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key, const __be32 *mpls_lse, const __be32 *mask) { - __be32 *stack; + struct mpls_shim_hdr *stack; __be32 lse; int err; @@ -234,16 +234,16 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key, if (unlikely(err)) return err; - stack = (__be32 *)skb_mpls_header(skb); - lse = OVS_MASKED(*stack, *mpls_lse, *mask); + stack = mpls_hdr(skb); + lse = OVS_MASKED(stack->label_stack_entry, *mpls_lse, *mask); if (skb->ip_summed == CHECKSUM_COMPLETE) { - __be32 diff[] = { ~(*stack), lse }; + __be32 diff[] = { ~(stack->label_stack_entry), lse }; skb->csum = ~csum_partial((char *)diff, sizeof(diff), ~skb->csum); } - *stack = lse; + stack->label_stack_entry = lse; flow_key->mpls.top_lse = lse; return 0; } From f814bfd765218908b23e21ca7f0b6f403fb88972 Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Sat, 1 Oct 2016 00:56:37 +0300 Subject: [PATCH 1686/1715] net: mvmdio: do not clk_disable_unprepare() NULL clock There is no need to clk_disable_unprepare(dev->clk) before it was initialized. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvmdio.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvmdio.c b/drivers/net/ethernet/marvell/mvmdio.c index 8982c882af1b..a0d1b084ecec 100644 --- a/drivers/net/ethernet/marvell/mvmdio.c +++ b/drivers/net/ethernet/marvell/mvmdio.c @@ -211,8 +211,7 @@ static int orion_mdio_probe(struct platform_device *pdev) dev->regs = devm_ioremap(&pdev->dev, r->start, resource_size(r)); if (!dev->regs) { dev_err(&pdev->dev, "Unable to remap SMI register\n"); - ret = -ENODEV; - goto out_mdio; + return -ENODEV; } init_waitqueue_head(&dev->smi_busy_wait); From 5e3d033ec175b3d0a8da2b1fc9aee8bc8acf3cfc Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 25 Aug 2016 14:15:39 -0700 Subject: [PATCH 1687/1715] fm10k: wrap long line for alloc_workqueue Trivial change here to cleanup a checkpatch.pl warning that got introduced when changing to alloc_workqueue. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index 0d39103124bd..5de937852436 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -56,7 +56,8 @@ static int __init fm10k_init_module(void) pr_info("%s\n", fm10k_copyright); /* create driver workqueue */ - fm10k_workqueue = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, fm10k_driver_name); + fm10k_workqueue = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, + fm10k_driver_name); fm10k_dbg_init(); From f39acc84aad10710e89835c60d3b6694c43a8dd9 Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Thu, 29 Sep 2016 12:10:40 +0300 Subject: [PATCH 1688/1715] net/sched: act_vlan: Push skb->data to mac_header prior calling skb_vlan_*() functions Generic skb_vlan_push/skb_vlan_pop functions don't properly handle the case where the input skb data pointer does not point at the mac header: - They're doing push/pop, but fail to properly unwind data back to its original location. For example, in the skb_vlan_push case, any subsequent 'skb_push(skb, skb->mac_len)' calls make the skb->data point 4 bytes BEFORE start of frame, leading to bogus frames that may be transmitted. - They update rcsum per the added/removed 4 bytes tag. Alas if data is originally after the vlan/eth headers, then these bytes were already pulled out of the csum. OTOH calling skb_vlan_push/skb_vlan_pop with skb->data at mac_header present no issues. act_vlan is the only caller to skb_vlan_*() that has skb->data pointing at network header (upon ingress). Other calles (ovs, bpf) already adjust skb->data at mac_header. This patch fixes act_vlan to point to the mac_header prior calling skb_vlan_*() functions, as other callers do. Signed-off-by: Shmulik Ladkani Cc: Daniel Borkmann Cc: Pravin Shelar Cc: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/act_vlan.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index a95c00b119da..b57fcbcefea1 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -37,6 +37,12 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a, bstats_update(&v->tcf_bstats, skb); action = v->tcf_action; + /* Ensure 'data' points at mac_header prior calling vlan manipulating + * functions. + */ + if (skb_at_tc_ingress(skb)) + skb_push_rcsum(skb, skb->mac_len); + switch (v->tcfv_action) { case TCA_VLAN_ACT_POP: err = skb_vlan_pop(skb); @@ -83,6 +89,9 @@ drop: action = TC_ACT_SHOT; v->tcf_qstats.drops++; unlock: + if (skb_at_tc_ingress(skb)) + skb_pull_rcsum(skb, skb->mac_len); + spin_unlock(&v->tcf_lock); return action; } From b6a7920848cab619b5e434fdc0338778c63ef3f3 Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Thu, 29 Sep 2016 12:10:41 +0300 Subject: [PATCH 1689/1715] net: skbuff: Limit skb_vlan_pop/push() to expect skb->data at mac header skb_vlan_pop/push were too generic, trying to support the cases where skb->data is at mac header, and cases where skb->data is arbitrarily elsewhere. Supporting an arbitrary skb->data was complex and bogus: - It failed to unwind skb->data to its original location post actual pop/push. (Also, semantic is not well defined for unwinding: If data was into the eth header, need to use same offset from start; But if data was at network header or beyond, need to adjust the original offset according to the push/pull) - It mangled the rcsum post actual push/pop, without taking into account that the eth bytes might already have been pulled out of the csum. Most callers (ovs, bpf) already had their skb->data at mac_header upon invoking skb_vlan_pop/push. Last caller that failed to do so (act_vlan) has been recently fixed. Therefore, to simplify things, no longer support arbitrary skb->data inputs for skb_vlan_pop/push(). skb->data is expected to be exactly at mac_header; WARN otherwise. Signed-off-by: Shmulik Ladkani Cc: Daniel Borkmann Cc: Pravin Shelar Cc: Jiri Pirko Signed-off-by: David S. Miller --- net/core/skbuff.c | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d36c7548952f..cbd19d250947 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4528,13 +4528,18 @@ EXPORT_SYMBOL(skb_ensure_writable); int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci) { struct vlan_hdr *vhdr; - unsigned int offset = skb->data - skb_mac_header(skb); + int offset = skb->data - skb_mac_header(skb); int err; - __skb_push(skb, offset); + if (WARN_ONCE(offset, + "__skb_vlan_pop got skb with skb->data not at mac header (offset %d)\n", + offset)) { + return -EINVAL; + } + err = skb_ensure_writable(skb, VLAN_ETH_HLEN); if (unlikely(err)) - goto pull; + return err; skb_postpull_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN); @@ -4551,13 +4556,14 @@ int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci) skb_set_network_header(skb, ETH_HLEN); skb_reset_mac_len(skb); -pull: - __skb_pull(skb, offset); return err; } EXPORT_SYMBOL(__skb_vlan_pop); +/* Pop a vlan tag either from hwaccel or from payload. + * Expects skb->data at mac header. + */ int skb_vlan_pop(struct sk_buff *skb) { u16 vlan_tci; @@ -4588,29 +4594,30 @@ int skb_vlan_pop(struct sk_buff *skb) } EXPORT_SYMBOL(skb_vlan_pop); +/* Push a vlan tag either into hwaccel or into payload (if hwaccel tag present). + * Expects skb->data at mac header. + */ int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) { if (skb_vlan_tag_present(skb)) { - unsigned int offset = skb->data - skb_mac_header(skb); + int offset = skb->data - skb_mac_header(skb); int err; - /* __vlan_insert_tag expect skb->data pointing to mac header. - * So change skb->data before calling it and change back to - * original position later - */ - __skb_push(skb, offset); + if (WARN_ONCE(offset, + "skb_vlan_push got skb with skb->data not at mac header (offset %d)\n", + offset)) { + return -EINVAL; + } + err = __vlan_insert_tag(skb, skb->vlan_proto, skb_vlan_tag_get(skb)); - if (err) { - __skb_pull(skb, offset); + if (err) return err; - } skb->protocol = skb->vlan_proto; skb->mac_len += VLAN_HLEN; skb_postpush_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN); - __skb_pull(skb, offset); } __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci); return 0; From 0fd7d43fbc4aaf358005231a6bed27eb1c2f60c3 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 1 Oct 2016 09:12:29 +0000 Subject: [PATCH 1690/1715] net: qcom/emac: fix return value check in emac_sgmii_config() In case of error, the function ioremap() returns NULL pointer not ERR_PTR(). The IS_ERR() test in the return value check should be replaced with NULL test. Also add check for return value of platform_get_resource(). Fixes: 54e19bc74f33 ("net: qcom/emac: do not use devm on internal phy pdev") Signed-off-by: Wei Yongjun Acked-by: Timur Tabi Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/emac/emac-sgmii.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c index 3d2c05a40d2c..75c1b530e39e 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c +++ b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c @@ -740,9 +740,14 @@ int emac_sgmii_config(struct platform_device *pdev, struct emac_adapter *adpt) /* Base address is the first address */ res = platform_get_resource(sgmii_pdev, IORESOURCE_MEM, 0); + if (!res) { + ret = -EINVAL; + goto error_put_device; + } + phy->base = ioremap(res->start, resource_size(res)); - if (IS_ERR(phy->base)) { - ret = PTR_ERR(phy->base); + if (!phy->base) { + ret = -ENOMEM; goto error_put_device; } @@ -750,8 +755,8 @@ int emac_sgmii_config(struct platform_device *pdev, struct emac_adapter *adpt) res = platform_get_resource(sgmii_pdev, IORESOURCE_MEM, 1); if (res) { phy->digital = ioremap(res->start, resource_size(res)); - if (IS_ERR(phy->digital)) { - ret = PTR_ERR(phy->digital); + if (!phy->digital) { + ret = -ENOMEM; goto error_unmap_base; } } From b9118b7221ebb12156d2b08d4d5647bc6076d6bb Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Sun, 2 Oct 2016 09:04:16 +0200 Subject: [PATCH 1691/1715] ptp: Fix resource leak in case of error A call to 'ida_simple_remove()' is missing in the error handling path. This as been spotted with the following coccinelle script which tries to detect missing 'ida_simple_remove()' call in error handling paths. /////////////// @@ expression x; identifier l; @@ * x = ida_simple_get(...); ... if (...) { ... } ... if (...) { ... goto l; } ... * l: ... when != ida_simple_remove(...); Signed-off-by: Christophe JAILLET Acked-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/ptp/ptp_clock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index 2e481b9e8ea5..86280b7e41f3 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -263,6 +263,7 @@ no_sysfs: no_device: mutex_destroy(&ptp->tsevq_mux); mutex_destroy(&ptp->pincfg_mux); + ida_simple_remove(&ptp_clocks_map, index); no_slot: kfree(ptp); no_memory: From 9ca57e97a75780a7f9b8e93e83ab1e36dbfd9846 Mon Sep 17 00:00:00 2001 From: Stefan Assmann Date: Mon, 19 Sep 2016 13:37:49 +0200 Subject: [PATCH 1692/1715] i40e: check if vectors are already depleted when doing VMDq allocation During MSI-X vector allocation for VMDq, a check for "no vectors left" was missing, add it. This prevents more vectors to be allocated than available. Signed-off-by: Stefan Assmann Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 27 ++++++++++++--------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 8176596932be..66d2ca0194f6 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -7674,18 +7674,23 @@ static int i40e_init_msix(struct i40e_pf *pf) int vmdq_vecs_wanted = pf->num_vmdq_vsis * pf->num_vmdq_qps; int vmdq_vecs = min_t(int, vectors_left, vmdq_vecs_wanted); - /* if we're short on vectors for what's desired, we limit - * the queues per vmdq. If this is still more than are - * available, the user will need to change the number of - * queues/vectors used by the PF later with the ethtool - * channels command - */ - if (vmdq_vecs < vmdq_vecs_wanted) - pf->num_vmdq_qps = 1; - pf->num_vmdq_msix = pf->num_vmdq_qps; + if (!vectors_left) { + pf->num_vmdq_msix = 0; + pf->num_vmdq_qps = 0; + } else { + /* if we're short on vectors for what's desired, we limit + * the queues per vmdq. If this is still more than are + * available, the user will need to change the number of + * queues/vectors used by the PF later with the ethtool + * channels command + */ + if (vmdq_vecs < vmdq_vecs_wanted) + pf->num_vmdq_qps = 1; + pf->num_vmdq_msix = pf->num_vmdq_qps; - v_budget += vmdq_vecs; - vectors_left -= vmdq_vecs; + v_budget += vmdq_vecs; + vectors_left -= vmdq_vecs; + } } pf->msix_entries = kcalloc(v_budget, sizeof(struct msix_entry), From 4ce20abc645fc1822e86d9845a8562347e877b36 Mon Sep 17 00:00:00 2001 From: Stefan Assmann Date: Mon, 19 Sep 2016 13:37:50 +0200 Subject: [PATCH 1693/1715] i40e: fix MSI-X vector redistribution if hw limit is reached The driver allocates 1 vector per CPU thread and the current hardware limit for vectors is 129 per PF. On systems with 128 or more threads this currently means all vectors are used by the PF leaving no room for additional features like VMDq, iWARP, etc... The code that should redistribute the vectors in this case is broken and never triggers. Fixed the code so that it actually triggers if the hardware limit is reached and adjust the number of queue pairs accordingly. Also the number of initially requested iWARP vectors was not properly saved when the vector limit was reached, and therefore always zero. Comparison with debug statement. Before: i40e 0000:2d:00.0: VMDq disabled, not enough MSI-X vectors i40e 0000:2d:00.0: IWARP disabled, not enough MSI-X vectors i40e 00.0 MSI-X vector distribution: PF 128, VMDq 0, FDSB 0, iWARP 0 After: i40e 0000:2d:00.0: MSI-X vector limit reached, attempting to redistribute vectors i40e 00.0 MSI-X vector distribution: PF 78, VMDq 8, FDSB 0, iWARP 42 Signed-off-by: Stefan Assmann Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 38 ++++++++++++--------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 66d2ca0194f6..ca7dd43abf5a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -7661,6 +7661,8 @@ static int i40e_init_msix(struct i40e_pf *pf) #endif /* can we reserve enough for iWARP? */ if (pf->flags & I40E_FLAG_IWARP_ENABLED) { + iwarp_requested = pf->num_iwarp_msix; + if (!vectors_left) pf->num_iwarp_msix = 0; else if (vectors_left < pf->num_iwarp_msix) @@ -7702,21 +7704,6 @@ static int i40e_init_msix(struct i40e_pf *pf) pf->msix_entries[i].entry = i; v_actual = i40e_reserve_msix_vectors(pf, v_budget); - if (v_actual != v_budget) { - /* If we have limited resources, we will start with no vectors - * for the special features and then allocate vectors to some - * of these features based on the policy and at the end disable - * the features that did not get any vectors. - */ - iwarp_requested = pf->num_iwarp_msix; - pf->num_iwarp_msix = 0; -#ifdef I40E_FCOE - pf->num_fcoe_qps = 0; - pf->num_fcoe_msix = 0; -#endif - pf->num_vmdq_msix = 0; - } - if (v_actual < I40E_MIN_MSIX) { pf->flags &= ~I40E_FLAG_MSIX_ENABLED; kfree(pf->msix_entries); @@ -7730,9 +7717,16 @@ static int i40e_init_msix(struct i40e_pf *pf) pf->num_lan_qps = 1; pf->num_lan_msix = 1; - } else if (v_actual != v_budget) { + } else if (!vectors_left) { + /* If we have limited resources, we will start with no vectors + * for the special features and then allocate vectors to some + * of these features based on the policy and at the end disable + * the features that did not get any vectors. + */ int vec; + dev_info(&pf->pdev->dev, + "MSI-X vector limit reached, attempting to redistribute vectors\n"); /* reserve the misc vector */ vec = v_actual - 1; @@ -7740,6 +7734,10 @@ static int i40e_init_msix(struct i40e_pf *pf) pf->num_vmdq_msix = 1; /* force VMDqs to only one vector */ pf->num_vmdq_vsis = 1; pf->num_vmdq_qps = 1; +#ifdef I40E_FCOE + pf->num_fcoe_qps = 0; + pf->num_fcoe_msix = 0; +#endif pf->flags &= ~I40E_FLAG_FD_SB_ENABLED; /* partition out the remaining vectors */ @@ -7775,6 +7773,7 @@ static int i40e_init_msix(struct i40e_pf *pf) pf->num_lan_msix = min_t(int, (vec - (pf->num_iwarp_msix + pf->num_vmdq_vsis)), pf->num_lan_msix); + pf->num_lan_qps = pf->num_lan_msix; #ifdef I40E_FCOE /* give one vector to FCoE */ if (pf->flags & I40E_FLAG_FCOE_ENABLED) { @@ -7804,6 +7803,13 @@ static int i40e_init_msix(struct i40e_pf *pf) pf->flags &= ~I40E_FLAG_FCOE_ENABLED; } #endif + i40e_debug(&pf->hw, I40E_DEBUG_INIT, + "MSI-X vector distribution: PF %d, VMDq %d, FDSB %d, iWARP %d\n", + pf->num_lan_msix, + pf->num_vmdq_msix * pf->num_vmdq_vsis, + pf->num_fdsb_msix, + pf->num_iwarp_msix); + return v_actual; } From abd97a94ba913d121a920d5541aba1e568be6972 Mon Sep 17 00:00:00 2001 From: Stefan Assmann Date: Mon, 19 Sep 2016 13:37:51 +0200 Subject: [PATCH 1694/1715] i40e: fix sideband flow director vector allocation Currently if the MSI-X vector limit is reached the sideband flow director gets disabled. A bit too early to make that decision, as vectors may get re-distributed. So move the check further back. Signed-off-by: Stefan Assmann Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index ca7dd43abf5a..0044c29ca31e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -7641,7 +7641,6 @@ static int i40e_init_msix(struct i40e_pf *pf) vectors_left--; } else { pf->num_fdsb_msix = 0; - pf->flags &= ~I40E_FLAG_FD_SB_ENABLED; } } @@ -7738,7 +7737,6 @@ static int i40e_init_msix(struct i40e_pf *pf) pf->num_fcoe_qps = 0; pf->num_fcoe_msix = 0; #endif - pf->flags &= ~I40E_FLAG_FD_SB_ENABLED; /* partition out the remaining vectors */ switch (vec) { @@ -7770,6 +7768,10 @@ static int i40e_init_msix(struct i40e_pf *pf) pf->num_vmdq_vsis = min_t(int, (vec / 2), I40E_DEFAULT_NUM_VMDQ_VSI); } + if (pf->flags & I40E_FLAG_FD_SB_ENABLED) { + pf->num_fdsb_msix = 1; + vec--; + } pf->num_lan_msix = min_t(int, (vec - (pf->num_iwarp_msix + pf->num_vmdq_vsis)), pf->num_lan_msix); @@ -7785,6 +7787,11 @@ static int i40e_init_msix(struct i40e_pf *pf) } } + if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) && + (pf->num_fdsb_msix == 0)) { + dev_info(&pf->pdev->dev, "Sideband Flowdir disabled, not enough MSI-X vectors\n"); + pf->flags &= ~I40E_FLAG_FD_SB_ENABLED; + } if ((pf->flags & I40E_FLAG_VMDQ_ENABLED) && (pf->num_vmdq_msix == 0)) { dev_info(&pf->pdev->dev, "VMDq disabled, not enough MSI-X vectors\n"); From 0a7fb11c23c0fb8f5ad37f285f40348f1ab9ccbd Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Sat, 1 Oct 2016 21:59:55 +0300 Subject: [PATCH 1695/1715] qed: Add Light L2 support Other protocols beside the networking driver need the ability of passing some L2 traffic, usually [although not limited] for the purpose of some management traffic. Signed-off-by: Yuval Mintz Signed-off-by: Ram Amrani Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/Kconfig | 8 + drivers/net/ethernet/qlogic/qed/Makefile | 1 + drivers/net/ethernet/qlogic/qed/qed.h | 9 + drivers/net/ethernet/qlogic/qed/qed_cxt.c | 2 + drivers/net/ethernet/qlogic/qed/qed_dev.c | 120 +- drivers/net/ethernet/qlogic/qed/qed_dev_api.h | 20 + drivers/net/ethernet/qlogic/qed/qed_ll2.c | 1699 +++++++++++++++++ drivers/net/ethernet/qlogic/qed/qed_ll2.h | 289 +++ drivers/net/ethernet/qlogic/qed/qed_main.c | 23 +- .../net/ethernet/qlogic/qed/qed_reg_addr.h | 22 + drivers/net/ethernet/qlogic/qed/qed_sp.h | 4 + include/linux/qed/qed_if.h | 1 + include/linux/qed/qed_ll2_if.h | 139 ++ 13 files changed, 2334 insertions(+), 3 deletions(-) create mode 100644 drivers/net/ethernet/qlogic/qed/qed_ll2.c create mode 100644 drivers/net/ethernet/qlogic/qed/qed_ll2.h create mode 100644 include/linux/qed/qed_ll2_if.h diff --git a/drivers/net/ethernet/qlogic/Kconfig b/drivers/net/ethernet/qlogic/Kconfig index 6ba48406899e..9eb3b1914cf5 100644 --- a/drivers/net/ethernet/qlogic/Kconfig +++ b/drivers/net/ethernet/qlogic/Kconfig @@ -88,6 +88,14 @@ config QED ---help--- This enables the support for ... +config QED_LL2 + bool "Qlogic QED Light L2 interface" + default n + depends on QED + ---help--- + This enables support for Light L2 interface which is required + by all qed protocol drivers other than qede. + config QED_SRIOV bool "QLogic QED 25/40/100Gb SR-IOV support" depends on QED && PCI_IOV diff --git a/drivers/net/ethernet/qlogic/qed/Makefile b/drivers/net/ethernet/qlogic/qed/Makefile index 86a5b4f5f870..e067098f10a9 100644 --- a/drivers/net/ethernet/qlogic/qed/Makefile +++ b/drivers/net/ethernet/qlogic/qed/Makefile @@ -4,3 +4,4 @@ qed-y := qed_cxt.o qed_dev.o qed_hw.o qed_init_fw_funcs.o qed_init_ops.o \ qed_int.o qed_main.o qed_mcp.o qed_sp_commands.o qed_spq.o qed_l2.o \ qed_selftest.o qed_dcbx.o qed_debug.o qed-$(CONFIG_QED_SRIOV) += qed_sriov.o qed_vf.o +qed-$(CONFIG_QED_LL2) += qed_ll2.o diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index 0929582fc82b..91b571a3670b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -72,6 +72,7 @@ struct qed_sb_info; struct qed_sb_attn_info; struct qed_cxt_mngr; struct qed_sb_sp_info; +struct qed_ll2_info; struct qed_mcp_info; struct qed_rt_data { @@ -152,6 +153,7 @@ enum QED_RESOURCES { QED_MAC, QED_VLAN, QED_ILT, + QED_LL2_QUEUE, QED_MAX_RESC, }; @@ -360,6 +362,8 @@ struct qed_hwfn { struct qed_sb_attn_info *p_sb_attn; /* Protocol related */ + bool using_ll2; + struct qed_ll2_info *p_ll2_info; struct qed_pf_params pf_params; bool b_rdma_enabled_in_prs; @@ -564,6 +568,11 @@ struct qed_dev { struct qed_dbg_params dbg_params; +#ifdef CONFIG_QED_LL2 + struct qed_cb_ll2_info *ll2; + u8 ll2_mac_address[ETH_ALEN]; +#endif + const struct firmware *firmware; }; diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c index dd579b2ef224..d9bea2a9c9f7 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c +++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c @@ -1839,6 +1839,8 @@ int qed_cxt_set_pf_params(struct qed_hwfn *p_hwfn) /* Set the number of required CORE connections */ u32 core_cids = 1; /* SPQ */ + if (p_hwfn->using_ll2) + core_cids += 4; qed_cxt_set_proto_cid_count(p_hwfn, PROTOCOLID_CORE, core_cids, 0); switch (p_hwfn->hw_info.personality) { diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index 13d8b4075b01..9a8e153df841 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -29,6 +29,7 @@ #include "qed_hw.h" #include "qed_init_ops.h" #include "qed_int.h" +#include "qed_ll2.h" #include "qed_mcp.h" #include "qed_reg_addr.h" #include "qed_sp.h" @@ -147,6 +148,9 @@ void qed_resc_free(struct qed_dev *cdev) qed_eq_free(p_hwfn, p_hwfn->p_eq); qed_consq_free(p_hwfn, p_hwfn->p_consq); qed_int_free(p_hwfn); +#ifdef CONFIG_QED_LL2 + qed_ll2_free(p_hwfn, p_hwfn->p_ll2_info); +#endif qed_iov_free(p_hwfn); qed_dmae_info_free(p_hwfn); qed_dcbx_info_free(p_hwfn, p_hwfn->p_dcbx_info); @@ -403,6 +407,9 @@ int qed_qm_reconf(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) int qed_resc_alloc(struct qed_dev *cdev) { +#ifdef CONFIG_QED_LL2 + struct qed_ll2_info *p_ll2_info; +#endif struct qed_consq *p_consq; struct qed_eq *p_eq; int i, rc = 0; @@ -513,6 +520,15 @@ int qed_resc_alloc(struct qed_dev *cdev) goto alloc_no_mem; p_hwfn->p_consq = p_consq; +#ifdef CONFIG_QED_LL2 + if (p_hwfn->using_ll2) { + p_ll2_info = qed_ll2_alloc(p_hwfn); + if (!p_ll2_info) + goto alloc_no_mem; + p_hwfn->p_ll2_info = p_ll2_info; + } +#endif + /* DMA info initialization */ rc = qed_dmae_info_alloc(p_hwfn); if (rc) @@ -561,6 +577,10 @@ void qed_resc_setup(struct qed_dev *cdev) qed_int_setup(p_hwfn, p_hwfn->p_main_ptt); qed_iov_setup(p_hwfn, p_hwfn->p_main_ptt); +#ifdef CONFIG_QED_LL2 + if (p_hwfn->using_ll2) + qed_ll2_setup(p_hwfn, p_hwfn->p_ll2_info); +#endif } } @@ -1304,6 +1324,7 @@ static int qed_hw_get_resc(struct qed_hwfn *p_hwfn) resc_num[QED_VLAN] = (ETH_NUM_VLAN_FILTERS - 1 /*For vlan0*/) / num_funcs; resc_num[QED_ILT] = PXP_NUM_ILT_RECORDS_BB / num_funcs; + resc_num[QED_LL2_QUEUE] = MAX_NUM_LL2_RX_QUEUES / num_funcs; for (i = 0; i < QED_MAX_RESC; i++) resc_start[i] = resc_num[i] * enabled_func_idx; @@ -1327,7 +1348,8 @@ static int qed_hw_get_resc(struct qed_hwfn *p_hwfn) "RL = %d start = %d\n" "MAC = %d start = %d\n" "VLAN = %d start = %d\n" - "ILT = %d start = %d\n", + "ILT = %d start = %d\n" + "LL2_QUEUE = %d start = %d\n", p_hwfn->hw_info.resc_num[QED_SB], p_hwfn->hw_info.resc_start[QED_SB], p_hwfn->hw_info.resc_num[QED_L2_QUEUE], @@ -1343,7 +1365,9 @@ static int qed_hw_get_resc(struct qed_hwfn *p_hwfn) p_hwfn->hw_info.resc_num[QED_VLAN], p_hwfn->hw_info.resc_start[QED_VLAN], p_hwfn->hw_info.resc_num[QED_ILT], - p_hwfn->hw_info.resc_start[QED_ILT]); + p_hwfn->hw_info.resc_start[QED_ILT], + RESC_NUM(p_hwfn, QED_LL2_QUEUE), + RESC_START(p_hwfn, QED_LL2_QUEUE)); return 0; } @@ -2133,6 +2157,98 @@ int qed_fw_rss_eng(struct qed_hwfn *p_hwfn, u8 src_id, u8 *dst_id) return 0; } +static void qed_llh_mac_to_filter(u32 *p_high, u32 *p_low, + u8 *p_filter) +{ + *p_high = p_filter[1] | (p_filter[0] << 8); + *p_low = p_filter[5] | (p_filter[4] << 8) | + (p_filter[3] << 16) | (p_filter[2] << 24); +} + +int qed_llh_add_mac_filter(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u8 *p_filter) +{ + u32 high = 0, low = 0, en; + int i; + + if (!(IS_MF_SI(p_hwfn) || IS_MF_DEFAULT(p_hwfn))) + return 0; + + qed_llh_mac_to_filter(&high, &low, p_filter); + + /* Find a free entry and utilize it */ + for (i = 0; i < NIG_REG_LLH_FUNC_FILTER_EN_SIZE; i++) { + en = qed_rd(p_hwfn, p_ptt, + NIG_REG_LLH_FUNC_FILTER_EN + i * sizeof(u32)); + if (en) + continue; + qed_wr(p_hwfn, p_ptt, + NIG_REG_LLH_FUNC_FILTER_VALUE + + 2 * i * sizeof(u32), low); + qed_wr(p_hwfn, p_ptt, + NIG_REG_LLH_FUNC_FILTER_VALUE + + (2 * i + 1) * sizeof(u32), high); + qed_wr(p_hwfn, p_ptt, + NIG_REG_LLH_FUNC_FILTER_MODE + i * sizeof(u32), 0); + qed_wr(p_hwfn, p_ptt, + NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE + + i * sizeof(u32), 0); + qed_wr(p_hwfn, p_ptt, + NIG_REG_LLH_FUNC_FILTER_EN + i * sizeof(u32), 1); + break; + } + if (i >= NIG_REG_LLH_FUNC_FILTER_EN_SIZE) { + DP_NOTICE(p_hwfn, + "Failed to find an empty LLH filter to utilize\n"); + return -EINVAL; + } + + DP_VERBOSE(p_hwfn, NETIF_MSG_HW, + "mac: %pM is added at %d\n", + p_filter, i); + + return 0; +} + +void qed_llh_remove_mac_filter(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u8 *p_filter) +{ + u32 high = 0, low = 0; + int i; + + if (!(IS_MF_SI(p_hwfn) || IS_MF_DEFAULT(p_hwfn))) + return; + + qed_llh_mac_to_filter(&high, &low, p_filter); + + /* Find the entry and clean it */ + for (i = 0; i < NIG_REG_LLH_FUNC_FILTER_EN_SIZE; i++) { + if (qed_rd(p_hwfn, p_ptt, + NIG_REG_LLH_FUNC_FILTER_VALUE + + 2 * i * sizeof(u32)) != low) + continue; + if (qed_rd(p_hwfn, p_ptt, + NIG_REG_LLH_FUNC_FILTER_VALUE + + (2 * i + 1) * sizeof(u32)) != high) + continue; + + qed_wr(p_hwfn, p_ptt, + NIG_REG_LLH_FUNC_FILTER_EN + i * sizeof(u32), 0); + qed_wr(p_hwfn, p_ptt, + NIG_REG_LLH_FUNC_FILTER_VALUE + 2 * i * sizeof(u32), 0); + qed_wr(p_hwfn, p_ptt, + NIG_REG_LLH_FUNC_FILTER_VALUE + + (2 * i + 1) * sizeof(u32), 0); + + DP_VERBOSE(p_hwfn, NETIF_MSG_HW, + "mac: %pM is removed from %d\n", + p_filter, i); + break; + } + if (i >= NIG_REG_LLH_FUNC_FILTER_EN_SIZE) + DP_NOTICE(p_hwfn, "Tried to remove a non-configured filter\n"); +} + static int qed_set_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, u32 hw_addr, void *p_eth_qzone, size_t eth_qzone_size, u8 timeset) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h index 343bb0344f62..b6711c106597 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h +++ b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h @@ -309,6 +309,26 @@ int qed_fw_rss_eng(struct qed_hwfn *p_hwfn, u8 src_id, u8 *dst_id); +/** + * @brief qed_llh_add_mac_filter - configures a MAC filter in llh + * + * @param p_hwfn + * @param p_ptt + * @param p_filter - MAC to add + */ +int qed_llh_add_mac_filter(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u8 *p_filter); + +/** + * @brief qed_llh_remove_mac_filter - removes a MAC filter from llh + * + * @param p_hwfn + * @param p_ptt + * @param p_filter - MAC to remove + */ +void qed_llh_remove_mac_filter(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u8 *p_filter); + /** * *@brief Cleanup of previous driver remains prior to load * diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c new file mode 100644 index 000000000000..e0ec8ed2f92c --- /dev/null +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -0,0 +1,1699 @@ +/* QLogic qed NIC Driver + * + * Copyright (c) 2015 QLogic Corporation + * + * This software is available under the terms of the GNU General Public License + * (GPL) Version 2, available from the file COPYING in the main directory of + * this source tree. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "qed.h" +#include "qed_cxt.h" +#include "qed_dev_api.h" +#include "qed_hsi.h" +#include "qed_hw.h" +#include "qed_int.h" +#include "qed_ll2.h" +#include "qed_mcp.h" +#include "qed_reg_addr.h" +#include "qed_sp.h" + +#define QED_LL2_RX_REGISTERED(ll2) ((ll2)->rx_queue.b_cb_registred) +#define QED_LL2_TX_REGISTERED(ll2) ((ll2)->tx_queue.b_cb_registred) + +#define QED_LL2_TX_SIZE (256) +#define QED_LL2_RX_SIZE (4096) + +struct qed_cb_ll2_info { + int rx_cnt; + u32 rx_size; + u8 handle; + bool frags_mapped; + + /* Lock protecting LL2 buffer lists in sleepless context */ + spinlock_t lock; + struct list_head list; + + const struct qed_ll2_cb_ops *cbs; + void *cb_cookie; +}; + +struct qed_ll2_buffer { + struct list_head list; + void *data; + dma_addr_t phys_addr; +}; + +static void qed_ll2b_complete_tx_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + void *cookie, + dma_addr_t first_frag_addr, + bool b_last_fragment, + bool b_last_packet) +{ + struct qed_dev *cdev = p_hwfn->cdev; + struct sk_buff *skb = cookie; + + /* All we need to do is release the mapping */ + dma_unmap_single(&p_hwfn->cdev->pdev->dev, first_frag_addr, + skb_headlen(skb), DMA_TO_DEVICE); + + if (cdev->ll2->cbs && cdev->ll2->cbs->tx_cb) + cdev->ll2->cbs->tx_cb(cdev->ll2->cb_cookie, skb, + b_last_fragment); + + if (cdev->ll2->frags_mapped) + /* Case where mapped frags were received, need to + * free skb with nr_frags marked as 0 + */ + skb_shinfo(skb)->nr_frags = 0; + + dev_kfree_skb_any(skb); +} + +static int qed_ll2_alloc_buffer(struct qed_dev *cdev, + u8 **data, dma_addr_t *phys_addr) +{ + *data = kmalloc(cdev->ll2->rx_size, GFP_ATOMIC); + if (!(*data)) { + DP_INFO(cdev, "Failed to allocate LL2 buffer data\n"); + return -ENOMEM; + } + + *phys_addr = dma_map_single(&cdev->pdev->dev, + ((*data) + NET_SKB_PAD), + cdev->ll2->rx_size, DMA_FROM_DEVICE); + if (dma_mapping_error(&cdev->pdev->dev, *phys_addr)) { + DP_INFO(cdev, "Failed to map LL2 buffer data\n"); + kfree((*data)); + return -ENOMEM; + } + + return 0; +} + +static int qed_ll2_dealloc_buffer(struct qed_dev *cdev, + struct qed_ll2_buffer *buffer) +{ + spin_lock_bh(&cdev->ll2->lock); + + dma_unmap_single(&cdev->pdev->dev, buffer->phys_addr, + cdev->ll2->rx_size, DMA_FROM_DEVICE); + kfree(buffer->data); + list_del(&buffer->list); + + cdev->ll2->rx_cnt--; + if (!cdev->ll2->rx_cnt) + DP_INFO(cdev, "All LL2 entries were removed\n"); + + spin_unlock_bh(&cdev->ll2->lock); + + return 0; +} + +static void qed_ll2_kill_buffers(struct qed_dev *cdev) +{ + struct qed_ll2_buffer *buffer, *tmp_buffer; + + list_for_each_entry_safe(buffer, tmp_buffer, &cdev->ll2->list, list) + qed_ll2_dealloc_buffer(cdev, buffer); +} + +void qed_ll2b_complete_rx_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + struct qed_ll2_rx_packet *p_pkt, + struct core_rx_fast_path_cqe *p_cqe, + bool b_last_packet) +{ + u16 packet_length = le16_to_cpu(p_cqe->packet_length); + struct qed_ll2_buffer *buffer = p_pkt->cookie; + struct qed_dev *cdev = p_hwfn->cdev; + u16 vlan = le16_to_cpu(p_cqe->vlan); + u32 opaque_data_0, opaque_data_1; + u8 pad = p_cqe->placement_offset; + dma_addr_t new_phys_addr; + struct sk_buff *skb; + bool reuse = false; + int rc = -EINVAL; + u8 *new_data; + + opaque_data_0 = le32_to_cpu(p_cqe->opaque_data.data[0]); + opaque_data_1 = le32_to_cpu(p_cqe->opaque_data.data[1]); + + DP_VERBOSE(p_hwfn, + (NETIF_MSG_RX_STATUS | QED_MSG_STORAGE | NETIF_MSG_PKTDATA), + "Got an LL2 Rx completion: [Buffer at phys 0x%llx, offset 0x%02x] Length 0x%04x Parse_flags 0x%04x vlan 0x%04x Opaque data [0x%08x:0x%08x]\n", + (u64)p_pkt->rx_buf_addr, pad, packet_length, + le16_to_cpu(p_cqe->parse_flags.flags), vlan, + opaque_data_0, opaque_data_1); + + if ((cdev->dp_module & NETIF_MSG_PKTDATA) && buffer->data) { + print_hex_dump(KERN_INFO, "", + DUMP_PREFIX_OFFSET, 16, 1, + buffer->data, packet_length, false); + } + + /* Determine if data is valid */ + if (packet_length < ETH_HLEN) + reuse = true; + + /* Allocate a replacement for buffer; Reuse upon failure */ + if (!reuse) + rc = qed_ll2_alloc_buffer(p_hwfn->cdev, &new_data, + &new_phys_addr); + + /* If need to reuse or there's no replacement buffer, repost this */ + if (rc) + goto out_post; + + skb = build_skb(buffer->data, 0); + if (!skb) { + rc = -ENOMEM; + goto out_post; + } + + pad += NET_SKB_PAD; + skb_reserve(skb, pad); + skb_put(skb, packet_length); + skb_checksum_none_assert(skb); + + /* Get parital ethernet information instead of eth_type_trans(), + * Since we don't have an associated net_device. + */ + skb_reset_mac_header(skb); + skb->protocol = eth_hdr(skb)->h_proto; + + /* Pass SKB onward */ + if (cdev->ll2->cbs && cdev->ll2->cbs->rx_cb) { + if (vlan) + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan); + cdev->ll2->cbs->rx_cb(cdev->ll2->cb_cookie, skb, + opaque_data_0, opaque_data_1); + } + + /* Update Buffer information and update FW producer */ + buffer->data = new_data; + buffer->phys_addr = new_phys_addr; + +out_post: + rc = qed_ll2_post_rx_buffer(QED_LEADING_HWFN(cdev), cdev->ll2->handle, + buffer->phys_addr, 0, buffer, 1); + + if (rc) + qed_ll2_dealloc_buffer(cdev, buffer); +} + +static struct qed_ll2_info *__qed_ll2_handle_sanity(struct qed_hwfn *p_hwfn, + u8 connection_handle, + bool b_lock, + bool b_only_active) +{ + struct qed_ll2_info *p_ll2_conn, *p_ret = NULL; + + if (connection_handle >= QED_MAX_NUM_OF_LL2_CONNECTIONS) + return NULL; + + if (!p_hwfn->p_ll2_info) + return NULL; + + p_ll2_conn = &p_hwfn->p_ll2_info[connection_handle]; + + if (b_only_active) { + if (b_lock) + mutex_lock(&p_ll2_conn->mutex); + if (p_ll2_conn->b_active) + p_ret = p_ll2_conn; + if (b_lock) + mutex_unlock(&p_ll2_conn->mutex); + } else { + p_ret = p_ll2_conn; + } + + return p_ret; +} + +static struct qed_ll2_info *qed_ll2_handle_sanity(struct qed_hwfn *p_hwfn, + u8 connection_handle) +{ + return __qed_ll2_handle_sanity(p_hwfn, connection_handle, false, true); +} + +static struct qed_ll2_info *qed_ll2_handle_sanity_lock(struct qed_hwfn *p_hwfn, + u8 connection_handle) +{ + return __qed_ll2_handle_sanity(p_hwfn, connection_handle, true, true); +} + +static struct qed_ll2_info *qed_ll2_handle_sanity_inactive(struct qed_hwfn + *p_hwfn, + u8 connection_handle) +{ + return __qed_ll2_handle_sanity(p_hwfn, connection_handle, false, false); +} + +static void qed_ll2_txq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle) +{ + bool b_last_packet = false, b_last_frag = false; + struct qed_ll2_tx_packet *p_pkt = NULL; + struct qed_ll2_info *p_ll2_conn; + struct qed_ll2_tx_queue *p_tx; + + p_ll2_conn = qed_ll2_handle_sanity_inactive(p_hwfn, connection_handle); + if (!p_ll2_conn) + return; + + p_tx = &p_ll2_conn->tx_queue; + + while (!list_empty(&p_tx->active_descq)) { + p_pkt = list_first_entry(&p_tx->active_descq, + struct qed_ll2_tx_packet, list_entry); + if (!p_pkt) + break; + + list_del(&p_pkt->list_entry); + b_last_packet = list_empty(&p_tx->active_descq); + list_add_tail(&p_pkt->list_entry, &p_tx->free_descq); + p_tx->cur_completing_packet = *p_pkt; + p_tx->cur_completing_bd_idx = 1; + b_last_frag = p_tx->cur_completing_bd_idx == p_pkt->bd_used; + + qed_ll2b_complete_tx_packet(p_hwfn, p_ll2_conn->my_id, + p_pkt->cookie, + p_pkt->bds_set[0].tx_frag, + b_last_frag, b_last_packet); + } +} + +static int qed_ll2_txq_completion(struct qed_hwfn *p_hwfn, void *p_cookie) +{ + struct qed_ll2_info *p_ll2_conn = p_cookie; + struct qed_ll2_tx_queue *p_tx = &p_ll2_conn->tx_queue; + u16 new_idx = 0, num_bds = 0, num_bds_in_packet = 0; + struct qed_ll2_tx_packet *p_pkt; + bool b_last_frag = false; + unsigned long flags; + int rc = -EINVAL; + + spin_lock_irqsave(&p_tx->lock, flags); + if (p_tx->b_completing_packet) { + rc = -EBUSY; + goto out; + } + + new_idx = le16_to_cpu(*p_tx->p_fw_cons); + num_bds = ((s16)new_idx - (s16)p_tx->bds_idx); + while (num_bds) { + if (list_empty(&p_tx->active_descq)) + goto out; + + p_pkt = list_first_entry(&p_tx->active_descq, + struct qed_ll2_tx_packet, list_entry); + if (!p_pkt) + goto out; + + p_tx->b_completing_packet = true; + p_tx->cur_completing_packet = *p_pkt; + num_bds_in_packet = p_pkt->bd_used; + list_del(&p_pkt->list_entry); + + if (num_bds < num_bds_in_packet) { + DP_NOTICE(p_hwfn, + "Rest of BDs does not cover whole packet\n"); + goto out; + } + + num_bds -= num_bds_in_packet; + p_tx->bds_idx += num_bds_in_packet; + while (num_bds_in_packet--) + qed_chain_consume(&p_tx->txq_chain); + + p_tx->cur_completing_bd_idx = 1; + b_last_frag = p_tx->cur_completing_bd_idx == p_pkt->bd_used; + list_add_tail(&p_pkt->list_entry, &p_tx->free_descq); + + spin_unlock_irqrestore(&p_tx->lock, flags); + qed_ll2b_complete_tx_packet(p_hwfn, + p_ll2_conn->my_id, + p_pkt->cookie, + p_pkt->bds_set[0].tx_frag, + b_last_frag, !num_bds); + spin_lock_irqsave(&p_tx->lock, flags); + } + + p_tx->b_completing_packet = false; + rc = 0; +out: + spin_unlock_irqrestore(&p_tx->lock, flags); + return rc; +} + +static int qed_ll2_rxq_completion_reg(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_ll2_conn, + union core_rx_cqe_union *p_cqe, + unsigned long lock_flags, + bool b_last_cqe) +{ + struct qed_ll2_rx_queue *p_rx = &p_ll2_conn->rx_queue; + struct qed_ll2_rx_packet *p_pkt = NULL; + + if (!list_empty(&p_rx->active_descq)) + p_pkt = list_first_entry(&p_rx->active_descq, + struct qed_ll2_rx_packet, list_entry); + if (!p_pkt) { + DP_NOTICE(p_hwfn, + "LL2 Rx completion but active_descq is empty\n"); + return -EIO; + } + list_del(&p_pkt->list_entry); + + if (qed_chain_consume(&p_rx->rxq_chain) != p_pkt->rxq_bd) + DP_NOTICE(p_hwfn, + "Mismatch between active_descq and the LL2 Rx chain\n"); + list_add_tail(&p_pkt->list_entry, &p_rx->free_descq); + + spin_unlock_irqrestore(&p_rx->lock, lock_flags); + qed_ll2b_complete_rx_packet(p_hwfn, p_ll2_conn->my_id, + p_pkt, &p_cqe->rx_cqe_fp, b_last_cqe); + spin_lock_irqsave(&p_rx->lock, lock_flags); + + return 0; +} + +static int qed_ll2_rxq_completion(struct qed_hwfn *p_hwfn, void *cookie) +{ + struct qed_ll2_info *p_ll2_conn = cookie; + struct qed_ll2_rx_queue *p_rx = &p_ll2_conn->rx_queue; + union core_rx_cqe_union *cqe = NULL; + u16 cq_new_idx = 0, cq_old_idx = 0; + unsigned long flags = 0; + int rc = 0; + + spin_lock_irqsave(&p_rx->lock, flags); + cq_new_idx = le16_to_cpu(*p_rx->p_fw_cons); + cq_old_idx = qed_chain_get_cons_idx(&p_rx->rcq_chain); + + while (cq_new_idx != cq_old_idx) { + bool b_last_cqe = (cq_new_idx == cq_old_idx); + + cqe = qed_chain_consume(&p_rx->rcq_chain); + cq_old_idx = qed_chain_get_cons_idx(&p_rx->rcq_chain); + + DP_VERBOSE(p_hwfn, + QED_MSG_LL2, + "LL2 [sw. cons %04x, fw. at %04x] - Got Packet of type %02x\n", + cq_old_idx, cq_new_idx, cqe->rx_cqe_sp.type); + + switch (cqe->rx_cqe_sp.type) { + case CORE_RX_CQE_TYPE_SLOW_PATH: + DP_NOTICE(p_hwfn, "LL2 - unexpected Rx CQE slowpath\n"); + rc = -EINVAL; + break; + case CORE_RX_CQE_TYPE_REGULAR: + rc = qed_ll2_rxq_completion_reg(p_hwfn, p_ll2_conn, + cqe, flags, b_last_cqe); + break; + default: + rc = -EIO; + } + } + + spin_unlock_irqrestore(&p_rx->lock, flags); + return rc; +} + +void qed_ll2_rxq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle) +{ + struct qed_ll2_info *p_ll2_conn = NULL; + struct qed_ll2_rx_packet *p_pkt = NULL; + struct qed_ll2_rx_queue *p_rx; + + p_ll2_conn = qed_ll2_handle_sanity_inactive(p_hwfn, connection_handle); + if (!p_ll2_conn) + return; + + p_rx = &p_ll2_conn->rx_queue; + + while (!list_empty(&p_rx->active_descq)) { + dma_addr_t rx_buf_addr; + void *cookie; + bool b_last; + + p_pkt = list_first_entry(&p_rx->active_descq, + struct qed_ll2_rx_packet, list_entry); + if (!p_pkt) + break; + + list_del(&p_pkt->list_entry); + list_add_tail(&p_pkt->list_entry, &p_rx->free_descq); + + rx_buf_addr = p_pkt->rx_buf_addr; + cookie = p_pkt->cookie; + + b_last = list_empty(&p_rx->active_descq); + } +} + +static int qed_sp_ll2_rx_queue_start(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_ll2_conn, + u8 action_on_error) +{ + enum qed_ll2_conn_type conn_type = p_ll2_conn->conn_type; + struct qed_ll2_rx_queue *p_rx = &p_ll2_conn->rx_queue; + struct core_rx_start_ramrod_data *p_ramrod = NULL; + struct qed_spq_entry *p_ent = NULL; + struct qed_sp_init_data init_data; + u16 cqe_pbl_size; + int rc = 0; + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = p_ll2_conn->cid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + rc = qed_sp_init_request(p_hwfn, &p_ent, + CORE_RAMROD_RX_QUEUE_START, + PROTOCOLID_CORE, &init_data); + if (rc) + return rc; + + p_ramrod = &p_ent->ramrod.core_rx_queue_start; + + p_ramrod->sb_id = cpu_to_le16(qed_int_get_sp_sb_id(p_hwfn)); + p_ramrod->sb_index = p_rx->rx_sb_index; + p_ramrod->complete_event_flg = 1; + + p_ramrod->mtu = cpu_to_le16(p_ll2_conn->mtu); + DMA_REGPAIR_LE(p_ramrod->bd_base, + p_rx->rxq_chain.p_phys_addr); + cqe_pbl_size = (u16)qed_chain_get_page_cnt(&p_rx->rcq_chain); + p_ramrod->num_of_pbl_pages = cpu_to_le16(cqe_pbl_size); + DMA_REGPAIR_LE(p_ramrod->cqe_pbl_addr, + qed_chain_get_pbl_phys(&p_rx->rcq_chain)); + + p_ramrod->drop_ttl0_flg = p_ll2_conn->rx_drop_ttl0_flg; + p_ramrod->inner_vlan_removal_en = p_ll2_conn->rx_vlan_removal_en; + p_ramrod->queue_id = p_ll2_conn->queue_id; + p_ramrod->main_func_queue = 1; + + if ((IS_MF_DEFAULT(p_hwfn) || IS_MF_SI(p_hwfn)) && + p_ramrod->main_func_queue && (conn_type != QED_LL2_TYPE_ROCE)) { + p_ramrod->mf_si_bcast_accept_all = 1; + p_ramrod->mf_si_mcast_accept_all = 1; + } else { + p_ramrod->mf_si_bcast_accept_all = 0; + p_ramrod->mf_si_mcast_accept_all = 0; + } + + p_ramrod->action_on_error.error_type = action_on_error; + return qed_spq_post(p_hwfn, p_ent, NULL); +} + +static int qed_sp_ll2_tx_queue_start(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_ll2_conn) +{ + enum qed_ll2_conn_type conn_type = p_ll2_conn->conn_type; + struct qed_ll2_tx_queue *p_tx = &p_ll2_conn->tx_queue; + struct core_tx_start_ramrod_data *p_ramrod = NULL; + struct qed_spq_entry *p_ent = NULL; + struct qed_sp_init_data init_data; + union qed_qm_pq_params pq_params; + u16 pq_id = 0, pbl_size; + int rc = -EINVAL; + + if (!QED_LL2_TX_REGISTERED(p_ll2_conn)) + return 0; + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = p_ll2_conn->cid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + rc = qed_sp_init_request(p_hwfn, &p_ent, + CORE_RAMROD_TX_QUEUE_START, + PROTOCOLID_CORE, &init_data); + if (rc) + return rc; + + p_ramrod = &p_ent->ramrod.core_tx_queue_start; + + p_ramrod->sb_id = cpu_to_le16(qed_int_get_sp_sb_id(p_hwfn)); + p_ramrod->sb_index = p_tx->tx_sb_index; + p_ramrod->mtu = cpu_to_le16(p_ll2_conn->mtu); + p_ll2_conn->tx_stats_en = 1; + p_ramrod->stats_en = p_ll2_conn->tx_stats_en; + p_ramrod->stats_id = p_ll2_conn->tx_stats_id; + + DMA_REGPAIR_LE(p_ramrod->pbl_base_addr, + qed_chain_get_pbl_phys(&p_tx->txq_chain)); + pbl_size = qed_chain_get_page_cnt(&p_tx->txq_chain); + p_ramrod->pbl_size = cpu_to_le16(pbl_size); + + memset(&pq_params, 0, sizeof(pq_params)); + pq_params.core.tc = p_ll2_conn->tx_tc; + pq_id = qed_get_qm_pq(p_hwfn, PROTOCOLID_CORE, &pq_params); + p_ramrod->qm_pq_id = cpu_to_le16(pq_id); + + switch (conn_type) { + case QED_LL2_TYPE_ISCSI: + case QED_LL2_TYPE_ISCSI_OOO: + p_ramrod->conn_type = PROTOCOLID_ISCSI; + break; + case QED_LL2_TYPE_ROCE: + p_ramrod->conn_type = PROTOCOLID_ROCE; + break; + default: + p_ramrod->conn_type = PROTOCOLID_ETH; + DP_NOTICE(p_hwfn, "Unknown connection type: %d\n", conn_type); + } + + return qed_spq_post(p_hwfn, p_ent, NULL); +} + +static int qed_sp_ll2_rx_queue_stop(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_ll2_conn) +{ + struct core_rx_stop_ramrod_data *p_ramrod = NULL; + struct qed_spq_entry *p_ent = NULL; + struct qed_sp_init_data init_data; + int rc = -EINVAL; + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = p_ll2_conn->cid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + rc = qed_sp_init_request(p_hwfn, &p_ent, + CORE_RAMROD_RX_QUEUE_STOP, + PROTOCOLID_CORE, &init_data); + if (rc) + return rc; + + p_ramrod = &p_ent->ramrod.core_rx_queue_stop; + + p_ramrod->complete_event_flg = 1; + p_ramrod->queue_id = p_ll2_conn->queue_id; + + return qed_spq_post(p_hwfn, p_ent, NULL); +} + +static int qed_sp_ll2_tx_queue_stop(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_ll2_conn) +{ + struct qed_spq_entry *p_ent = NULL; + struct qed_sp_init_data init_data; + int rc = -EINVAL; + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = p_ll2_conn->cid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + rc = qed_sp_init_request(p_hwfn, &p_ent, + CORE_RAMROD_TX_QUEUE_STOP, + PROTOCOLID_CORE, &init_data); + if (rc) + return rc; + + return qed_spq_post(p_hwfn, p_ent, NULL); +} + +static int +qed_ll2_acquire_connection_rx(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_ll2_info, u16 rx_num_desc) +{ + struct qed_ll2_rx_packet *p_descq; + u32 capacity; + int rc = 0; + + if (!rx_num_desc) + goto out; + + rc = qed_chain_alloc(p_hwfn->cdev, + QED_CHAIN_USE_TO_CONSUME_PRODUCE, + QED_CHAIN_MODE_NEXT_PTR, + QED_CHAIN_CNT_TYPE_U16, + rx_num_desc, + sizeof(struct core_rx_bd), + &p_ll2_info->rx_queue.rxq_chain); + if (rc) { + DP_NOTICE(p_hwfn, "Failed to allocate ll2 rxq chain\n"); + goto out; + } + + capacity = qed_chain_get_capacity(&p_ll2_info->rx_queue.rxq_chain); + p_descq = kcalloc(capacity, sizeof(struct qed_ll2_rx_packet), + GFP_KERNEL); + if (!p_descq) { + rc = -ENOMEM; + DP_NOTICE(p_hwfn, "Failed to allocate ll2 Rx desc\n"); + goto out; + } + p_ll2_info->rx_queue.descq_array = p_descq; + + rc = qed_chain_alloc(p_hwfn->cdev, + QED_CHAIN_USE_TO_CONSUME_PRODUCE, + QED_CHAIN_MODE_PBL, + QED_CHAIN_CNT_TYPE_U16, + rx_num_desc, + sizeof(struct core_rx_fast_path_cqe), + &p_ll2_info->rx_queue.rcq_chain); + if (rc) { + DP_NOTICE(p_hwfn, "Failed to allocate ll2 rcq chain\n"); + goto out; + } + + DP_VERBOSE(p_hwfn, QED_MSG_LL2, + "Allocated LL2 Rxq [Type %08x] with 0x%08x buffers\n", + p_ll2_info->conn_type, rx_num_desc); + +out: + return rc; +} + +static int qed_ll2_acquire_connection_tx(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_ll2_info, + u16 tx_num_desc) +{ + struct qed_ll2_tx_packet *p_descq; + u32 capacity; + int rc = 0; + + if (!tx_num_desc) + goto out; + + rc = qed_chain_alloc(p_hwfn->cdev, + QED_CHAIN_USE_TO_CONSUME_PRODUCE, + QED_CHAIN_MODE_PBL, + QED_CHAIN_CNT_TYPE_U16, + tx_num_desc, + sizeof(struct core_tx_bd), + &p_ll2_info->tx_queue.txq_chain); + if (rc) + goto out; + + capacity = qed_chain_get_capacity(&p_ll2_info->tx_queue.txq_chain); + p_descq = kcalloc(capacity, sizeof(struct qed_ll2_tx_packet), + GFP_KERNEL); + if (!p_descq) { + rc = -ENOMEM; + goto out; + } + p_ll2_info->tx_queue.descq_array = p_descq; + + DP_VERBOSE(p_hwfn, QED_MSG_LL2, + "Allocated LL2 Txq [Type %08x] with 0x%08x buffers\n", + p_ll2_info->conn_type, tx_num_desc); + +out: + if (rc) + DP_NOTICE(p_hwfn, + "Can't allocate memory for Tx LL2 with 0x%08x buffers\n", + tx_num_desc); + return rc; +} + +int qed_ll2_acquire_connection(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_params, + u16 rx_num_desc, + u16 tx_num_desc, + u8 *p_connection_handle) +{ + qed_int_comp_cb_t comp_rx_cb, comp_tx_cb; + struct qed_ll2_info *p_ll2_info = NULL; + int rc; + u8 i; + + if (!p_connection_handle || !p_hwfn->p_ll2_info) + return -EINVAL; + + /* Find a free connection to be used */ + for (i = 0; (i < QED_MAX_NUM_OF_LL2_CONNECTIONS); i++) { + mutex_lock(&p_hwfn->p_ll2_info[i].mutex); + if (p_hwfn->p_ll2_info[i].b_active) { + mutex_unlock(&p_hwfn->p_ll2_info[i].mutex); + continue; + } + + p_hwfn->p_ll2_info[i].b_active = true; + p_ll2_info = &p_hwfn->p_ll2_info[i]; + mutex_unlock(&p_hwfn->p_ll2_info[i].mutex); + break; + } + if (!p_ll2_info) + return -EBUSY; + + p_ll2_info->conn_type = p_params->conn_type; + p_ll2_info->mtu = p_params->mtu; + p_ll2_info->rx_drop_ttl0_flg = p_params->rx_drop_ttl0_flg; + p_ll2_info->rx_vlan_removal_en = p_params->rx_vlan_removal_en; + p_ll2_info->tx_tc = p_params->tx_tc; + p_ll2_info->tx_dest = p_params->tx_dest; + p_ll2_info->ai_err_packet_too_big = p_params->ai_err_packet_too_big; + p_ll2_info->ai_err_no_buf = p_params->ai_err_no_buf; + + rc = qed_ll2_acquire_connection_rx(p_hwfn, p_ll2_info, rx_num_desc); + if (rc) + goto q_allocate_fail; + + rc = qed_ll2_acquire_connection_tx(p_hwfn, p_ll2_info, tx_num_desc); + if (rc) + goto q_allocate_fail; + + /* Register callbacks for the Rx/Tx queues */ + comp_rx_cb = qed_ll2_rxq_completion; + comp_tx_cb = qed_ll2_txq_completion; + + if (rx_num_desc) { + qed_int_register_cb(p_hwfn, comp_rx_cb, + &p_hwfn->p_ll2_info[i], + &p_ll2_info->rx_queue.rx_sb_index, + &p_ll2_info->rx_queue.p_fw_cons); + p_ll2_info->rx_queue.b_cb_registred = true; + } + + if (tx_num_desc) { + qed_int_register_cb(p_hwfn, + comp_tx_cb, + &p_hwfn->p_ll2_info[i], + &p_ll2_info->tx_queue.tx_sb_index, + &p_ll2_info->tx_queue.p_fw_cons); + p_ll2_info->tx_queue.b_cb_registred = true; + } + + *p_connection_handle = i; + return rc; + +q_allocate_fail: + qed_ll2_release_connection(p_hwfn, i); + return -ENOMEM; +} + +static int qed_ll2_establish_connection_rx(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_ll2_conn) +{ + u8 action_on_error = 0; + + if (!QED_LL2_RX_REGISTERED(p_ll2_conn)) + return 0; + + DIRECT_REG_WR(p_ll2_conn->rx_queue.set_prod_addr, 0x0); + + SET_FIELD(action_on_error, + CORE_RX_ACTION_ON_ERROR_PACKET_TOO_BIG, + p_ll2_conn->ai_err_packet_too_big); + SET_FIELD(action_on_error, + CORE_RX_ACTION_ON_ERROR_NO_BUFF, p_ll2_conn->ai_err_no_buf); + + return qed_sp_ll2_rx_queue_start(p_hwfn, p_ll2_conn, action_on_error); +} + +int qed_ll2_establish_connection(struct qed_hwfn *p_hwfn, u8 connection_handle) +{ + struct qed_ll2_info *p_ll2_conn; + struct qed_ll2_rx_queue *p_rx; + struct qed_ll2_tx_queue *p_tx; + int rc = -EINVAL; + u32 i, capacity; + u8 qid; + + p_ll2_conn = qed_ll2_handle_sanity_lock(p_hwfn, connection_handle); + if (!p_ll2_conn) + return -EINVAL; + p_rx = &p_ll2_conn->rx_queue; + p_tx = &p_ll2_conn->tx_queue; + + qed_chain_reset(&p_rx->rxq_chain); + qed_chain_reset(&p_rx->rcq_chain); + INIT_LIST_HEAD(&p_rx->active_descq); + INIT_LIST_HEAD(&p_rx->free_descq); + INIT_LIST_HEAD(&p_rx->posting_descq); + spin_lock_init(&p_rx->lock); + capacity = qed_chain_get_capacity(&p_rx->rxq_chain); + for (i = 0; i < capacity; i++) + list_add_tail(&p_rx->descq_array[i].list_entry, + &p_rx->free_descq); + *p_rx->p_fw_cons = 0; + + qed_chain_reset(&p_tx->txq_chain); + INIT_LIST_HEAD(&p_tx->active_descq); + INIT_LIST_HEAD(&p_tx->free_descq); + INIT_LIST_HEAD(&p_tx->sending_descq); + spin_lock_init(&p_tx->lock); + capacity = qed_chain_get_capacity(&p_tx->txq_chain); + for (i = 0; i < capacity; i++) + list_add_tail(&p_tx->descq_array[i].list_entry, + &p_tx->free_descq); + p_tx->cur_completing_bd_idx = 0; + p_tx->bds_idx = 0; + p_tx->b_completing_packet = false; + p_tx->cur_send_packet = NULL; + p_tx->cur_send_frag_num = 0; + p_tx->cur_completing_frag_num = 0; + *p_tx->p_fw_cons = 0; + + qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_CORE, &p_ll2_conn->cid); + + qid = p_hwfn->hw_info.resc_start[QED_LL2_QUEUE] + connection_handle; + p_ll2_conn->queue_id = qid; + p_ll2_conn->tx_stats_id = qid; + p_rx->set_prod_addr = (u8 __iomem *)p_hwfn->regview + + GTT_BAR0_MAP_REG_TSDM_RAM + + TSTORM_LL2_RX_PRODS_OFFSET(qid); + p_tx->doorbell_addr = (u8 __iomem *)p_hwfn->doorbells + + qed_db_addr(p_ll2_conn->cid, + DQ_DEMS_LEGACY); + + rc = qed_ll2_establish_connection_rx(p_hwfn, p_ll2_conn); + if (rc) + return rc; + + rc = qed_sp_ll2_tx_queue_start(p_hwfn, p_ll2_conn); + if (rc) + return rc; + + if (p_hwfn->hw_info.personality != QED_PCI_ETH_ROCE) + qed_wr(p_hwfn, p_hwfn->p_main_ptt, PRS_REG_USE_LIGHT_L2, 1); + + return rc; +} + +static void qed_ll2_post_rx_buffer_notify_fw(struct qed_hwfn *p_hwfn, + struct qed_ll2_rx_queue *p_rx, + struct qed_ll2_rx_packet *p_curp) +{ + struct qed_ll2_rx_packet *p_posting_packet = NULL; + struct core_ll2_rx_prod rx_prod = { 0, 0, 0 }; + bool b_notify_fw = false; + u16 bd_prod, cq_prod; + + /* This handles the flushing of already posted buffers */ + while (!list_empty(&p_rx->posting_descq)) { + p_posting_packet = list_first_entry(&p_rx->posting_descq, + struct qed_ll2_rx_packet, + list_entry); + list_del(&p_posting_packet->list_entry); + list_add_tail(&p_posting_packet->list_entry, + &p_rx->active_descq); + b_notify_fw = true; + } + + /* This handles the supplied packet [if there is one] */ + if (p_curp) { + list_add_tail(&p_curp->list_entry, &p_rx->active_descq); + b_notify_fw = true; + } + + if (!b_notify_fw) + return; + + bd_prod = qed_chain_get_prod_idx(&p_rx->rxq_chain); + cq_prod = qed_chain_get_prod_idx(&p_rx->rcq_chain); + rx_prod.bd_prod = cpu_to_le16(bd_prod); + rx_prod.cqe_prod = cpu_to_le16(cq_prod); + DIRECT_REG_WR(p_rx->set_prod_addr, *((u32 *)&rx_prod)); +} + +int qed_ll2_post_rx_buffer(struct qed_hwfn *p_hwfn, + u8 connection_handle, + dma_addr_t addr, + u16 buf_len, void *cookie, u8 notify_fw) +{ + struct core_rx_bd_with_buff_len *p_curb = NULL; + struct qed_ll2_rx_packet *p_curp = NULL; + struct qed_ll2_info *p_ll2_conn; + struct qed_ll2_rx_queue *p_rx; + unsigned long flags; + void *p_data; + int rc = 0; + + p_ll2_conn = qed_ll2_handle_sanity(p_hwfn, connection_handle); + if (!p_ll2_conn) + return -EINVAL; + p_rx = &p_ll2_conn->rx_queue; + + spin_lock_irqsave(&p_rx->lock, flags); + if (!list_empty(&p_rx->free_descq)) + p_curp = list_first_entry(&p_rx->free_descq, + struct qed_ll2_rx_packet, list_entry); + if (p_curp) { + if (qed_chain_get_elem_left(&p_rx->rxq_chain) && + qed_chain_get_elem_left(&p_rx->rcq_chain)) { + p_data = qed_chain_produce(&p_rx->rxq_chain); + p_curb = (struct core_rx_bd_with_buff_len *)p_data; + qed_chain_produce(&p_rx->rcq_chain); + } + } + + /* If we're lacking entires, let's try to flush buffers to FW */ + if (!p_curp || !p_curb) { + rc = -EBUSY; + p_curp = NULL; + goto out_notify; + } + + /* We have an Rx packet we can fill */ + DMA_REGPAIR_LE(p_curb->addr, addr); + p_curb->buff_length = cpu_to_le16(buf_len); + p_curp->rx_buf_addr = addr; + p_curp->cookie = cookie; + p_curp->rxq_bd = p_curb; + p_curp->buf_length = buf_len; + list_del(&p_curp->list_entry); + + /* Check if we only want to enqueue this packet without informing FW */ + if (!notify_fw) { + list_add_tail(&p_curp->list_entry, &p_rx->posting_descq); + goto out; + } + +out_notify: + qed_ll2_post_rx_buffer_notify_fw(p_hwfn, p_rx, p_curp); +out: + spin_unlock_irqrestore(&p_rx->lock, flags); + return rc; +} + +static void qed_ll2_prepare_tx_packet_set(struct qed_hwfn *p_hwfn, + struct qed_ll2_tx_queue *p_tx, + struct qed_ll2_tx_packet *p_curp, + u8 num_of_bds, + dma_addr_t first_frag, + u16 first_frag_len, void *p_cookie, + u8 notify_fw) +{ + list_del(&p_curp->list_entry); + p_curp->cookie = p_cookie; + p_curp->bd_used = num_of_bds; + p_curp->notify_fw = notify_fw; + p_tx->cur_send_packet = p_curp; + p_tx->cur_send_frag_num = 0; + + p_curp->bds_set[p_tx->cur_send_frag_num].tx_frag = first_frag; + p_curp->bds_set[p_tx->cur_send_frag_num].frag_len = first_frag_len; + p_tx->cur_send_frag_num++; +} + +static void qed_ll2_prepare_tx_packet_set_bd(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_ll2, + struct qed_ll2_tx_packet *p_curp, + u8 num_of_bds, + enum core_tx_dest tx_dest, + u16 vlan, + u8 bd_flags, + u16 l4_hdr_offset_w, + dma_addr_t first_frag, + u16 first_frag_len) +{ + struct qed_chain *p_tx_chain = &p_ll2->tx_queue.txq_chain; + u16 prod_idx = qed_chain_get_prod_idx(p_tx_chain); + struct core_tx_bd *start_bd = NULL; + u16 frag_idx; + + start_bd = (struct core_tx_bd *)qed_chain_produce(p_tx_chain); + start_bd->nw_vlan_or_lb_echo = cpu_to_le16(vlan); + SET_FIELD(start_bd->bitfield1, CORE_TX_BD_L4_HDR_OFFSET_W, + cpu_to_le16(l4_hdr_offset_w)); + SET_FIELD(start_bd->bitfield1, CORE_TX_BD_TX_DST, tx_dest); + start_bd->bd_flags.as_bitfield = bd_flags; + start_bd->bd_flags.as_bitfield |= CORE_TX_BD_FLAGS_START_BD_MASK << + CORE_TX_BD_FLAGS_START_BD_SHIFT; + SET_FIELD(start_bd->bitfield0, CORE_TX_BD_NBDS, num_of_bds); + DMA_REGPAIR_LE(start_bd->addr, first_frag); + start_bd->nbytes = cpu_to_le16(first_frag_len); + + DP_VERBOSE(p_hwfn, + (NETIF_MSG_TX_QUEUED | QED_MSG_LL2), + "LL2 [q 0x%02x cid 0x%08x type 0x%08x] Tx Producer at [0x%04x] - set with a %04x bytes %02x BDs buffer at %08x:%08x\n", + p_ll2->queue_id, + p_ll2->cid, + p_ll2->conn_type, + prod_idx, + first_frag_len, + num_of_bds, + le32_to_cpu(start_bd->addr.hi), + le32_to_cpu(start_bd->addr.lo)); + + if (p_ll2->tx_queue.cur_send_frag_num == num_of_bds) + return; + + /* Need to provide the packet with additional BDs for frags */ + for (frag_idx = p_ll2->tx_queue.cur_send_frag_num; + frag_idx < num_of_bds; frag_idx++) { + struct core_tx_bd **p_bd = &p_curp->bds_set[frag_idx].txq_bd; + + *p_bd = (struct core_tx_bd *)qed_chain_produce(p_tx_chain); + (*p_bd)->bd_flags.as_bitfield = 0; + (*p_bd)->bitfield1 = 0; + (*p_bd)->bitfield0 = 0; + p_curp->bds_set[frag_idx].tx_frag = 0; + p_curp->bds_set[frag_idx].frag_len = 0; + } +} + +/* This should be called while the Txq spinlock is being held */ +static void qed_ll2_tx_packet_notify(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_ll2_conn) +{ + bool b_notify = p_ll2_conn->tx_queue.cur_send_packet->notify_fw; + struct qed_ll2_tx_queue *p_tx = &p_ll2_conn->tx_queue; + struct qed_ll2_tx_packet *p_pkt = NULL; + struct core_db_data db_msg = { 0, 0, 0 }; + u16 bd_prod; + + /* If there are missing BDs, don't do anything now */ + if (p_ll2_conn->tx_queue.cur_send_frag_num != + p_ll2_conn->tx_queue.cur_send_packet->bd_used) + return; + + /* Push the current packet to the list and clean after it */ + list_add_tail(&p_ll2_conn->tx_queue.cur_send_packet->list_entry, + &p_ll2_conn->tx_queue.sending_descq); + p_ll2_conn->tx_queue.cur_send_packet = NULL; + p_ll2_conn->tx_queue.cur_send_frag_num = 0; + + /* Notify FW of packet only if requested to */ + if (!b_notify) + return; + + bd_prod = qed_chain_get_prod_idx(&p_ll2_conn->tx_queue.txq_chain); + + while (!list_empty(&p_tx->sending_descq)) { + p_pkt = list_first_entry(&p_tx->sending_descq, + struct qed_ll2_tx_packet, list_entry); + if (!p_pkt) + break; + + list_del(&p_pkt->list_entry); + list_add_tail(&p_pkt->list_entry, &p_tx->active_descq); + } + + SET_FIELD(db_msg.params, CORE_DB_DATA_DEST, DB_DEST_XCM); + SET_FIELD(db_msg.params, CORE_DB_DATA_AGG_CMD, DB_AGG_CMD_SET); + SET_FIELD(db_msg.params, CORE_DB_DATA_AGG_VAL_SEL, + DQ_XCM_CORE_TX_BD_PROD_CMD); + db_msg.agg_flags = DQ_XCM_CORE_DQ_CF_CMD; + db_msg.spq_prod = cpu_to_le16(bd_prod); + + /* Make sure the BDs data is updated before ringing the doorbell */ + wmb(); + + DIRECT_REG_WR(p_tx->doorbell_addr, *((u32 *)&db_msg)); + + DP_VERBOSE(p_hwfn, + (NETIF_MSG_TX_QUEUED | QED_MSG_LL2), + "LL2 [q 0x%02x cid 0x%08x type 0x%08x] Doorbelled [producer 0x%04x]\n", + p_ll2_conn->queue_id, + p_ll2_conn->cid, p_ll2_conn->conn_type, db_msg.spq_prod); +} + +int qed_ll2_prepare_tx_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + u8 num_of_bds, + u16 vlan, + u8 bd_flags, + u16 l4_hdr_offset_w, + dma_addr_t first_frag, + u16 first_frag_len, void *cookie, u8 notify_fw) +{ + struct qed_ll2_tx_packet *p_curp = NULL; + struct qed_ll2_info *p_ll2_conn = NULL; + struct qed_ll2_tx_queue *p_tx; + struct qed_chain *p_tx_chain; + unsigned long flags; + int rc = 0; + + p_ll2_conn = qed_ll2_handle_sanity(p_hwfn, connection_handle); + if (!p_ll2_conn) + return -EINVAL; + p_tx = &p_ll2_conn->tx_queue; + p_tx_chain = &p_tx->txq_chain; + + if (num_of_bds > CORE_LL2_TX_MAX_BDS_PER_PACKET) + return -EIO; + + spin_lock_irqsave(&p_tx->lock, flags); + if (p_tx->cur_send_packet) { + rc = -EEXIST; + goto out; + } + + /* Get entry, but only if we have tx elements for it */ + if (!list_empty(&p_tx->free_descq)) + p_curp = list_first_entry(&p_tx->free_descq, + struct qed_ll2_tx_packet, list_entry); + if (p_curp && qed_chain_get_elem_left(p_tx_chain) < num_of_bds) + p_curp = NULL; + + if (!p_curp) { + rc = -EBUSY; + goto out; + } + + /* Prepare packet and BD, and perhaps send a doorbell to FW */ + qed_ll2_prepare_tx_packet_set(p_hwfn, p_tx, p_curp, + num_of_bds, first_frag, + first_frag_len, cookie, notify_fw); + qed_ll2_prepare_tx_packet_set_bd(p_hwfn, p_ll2_conn, p_curp, + num_of_bds, CORE_TX_DEST_NW, + vlan, bd_flags, l4_hdr_offset_w, + first_frag, first_frag_len); + + qed_ll2_tx_packet_notify(p_hwfn, p_ll2_conn); + +out: + spin_unlock_irqrestore(&p_tx->lock, flags); + return rc; +} + +int qed_ll2_set_fragment_of_tx_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + dma_addr_t addr, u16 nbytes) +{ + struct qed_ll2_tx_packet *p_cur_send_packet = NULL; + struct qed_ll2_info *p_ll2_conn = NULL; + u16 cur_send_frag_num = 0; + struct core_tx_bd *p_bd; + unsigned long flags; + + p_ll2_conn = qed_ll2_handle_sanity(p_hwfn, connection_handle); + if (!p_ll2_conn) + return -EINVAL; + + if (!p_ll2_conn->tx_queue.cur_send_packet) + return -EINVAL; + + p_cur_send_packet = p_ll2_conn->tx_queue.cur_send_packet; + cur_send_frag_num = p_ll2_conn->tx_queue.cur_send_frag_num; + + if (cur_send_frag_num >= p_cur_send_packet->bd_used) + return -EINVAL; + + /* Fill the BD information, and possibly notify FW */ + p_bd = p_cur_send_packet->bds_set[cur_send_frag_num].txq_bd; + DMA_REGPAIR_LE(p_bd->addr, addr); + p_bd->nbytes = cpu_to_le16(nbytes); + p_cur_send_packet->bds_set[cur_send_frag_num].tx_frag = addr; + p_cur_send_packet->bds_set[cur_send_frag_num].frag_len = nbytes; + + p_ll2_conn->tx_queue.cur_send_frag_num++; + + spin_lock_irqsave(&p_ll2_conn->tx_queue.lock, flags); + qed_ll2_tx_packet_notify(p_hwfn, p_ll2_conn); + spin_unlock_irqrestore(&p_ll2_conn->tx_queue.lock, flags); + + return 0; +} + +int qed_ll2_terminate_connection(struct qed_hwfn *p_hwfn, u8 connection_handle) +{ + struct qed_ll2_info *p_ll2_conn = NULL; + int rc = -EINVAL; + + p_ll2_conn = qed_ll2_handle_sanity_lock(p_hwfn, connection_handle); + if (!p_ll2_conn) + return -EINVAL; + + /* Stop Tx & Rx of connection, if needed */ + if (QED_LL2_TX_REGISTERED(p_ll2_conn)) { + rc = qed_sp_ll2_tx_queue_stop(p_hwfn, p_ll2_conn); + if (rc) + return rc; + qed_ll2_txq_flush(p_hwfn, connection_handle); + } + + if (QED_LL2_RX_REGISTERED(p_ll2_conn)) { + rc = qed_sp_ll2_rx_queue_stop(p_hwfn, p_ll2_conn); + if (rc) + return rc; + qed_ll2_rxq_flush(p_hwfn, connection_handle); + } + + return rc; +} + +void qed_ll2_release_connection(struct qed_hwfn *p_hwfn, u8 connection_handle) +{ + struct qed_ll2_info *p_ll2_conn = NULL; + + p_ll2_conn = qed_ll2_handle_sanity(p_hwfn, connection_handle); + if (!p_ll2_conn) + return; + + if (QED_LL2_RX_REGISTERED(p_ll2_conn)) { + p_ll2_conn->rx_queue.b_cb_registred = false; + qed_int_unregister_cb(p_hwfn, p_ll2_conn->rx_queue.rx_sb_index); + } + + if (QED_LL2_TX_REGISTERED(p_ll2_conn)) { + p_ll2_conn->tx_queue.b_cb_registred = false; + qed_int_unregister_cb(p_hwfn, p_ll2_conn->tx_queue.tx_sb_index); + } + + kfree(p_ll2_conn->tx_queue.descq_array); + qed_chain_free(p_hwfn->cdev, &p_ll2_conn->tx_queue.txq_chain); + + kfree(p_ll2_conn->rx_queue.descq_array); + qed_chain_free(p_hwfn->cdev, &p_ll2_conn->rx_queue.rxq_chain); + qed_chain_free(p_hwfn->cdev, &p_ll2_conn->rx_queue.rcq_chain); + + qed_cxt_release_cid(p_hwfn, p_ll2_conn->cid); + + mutex_lock(&p_ll2_conn->mutex); + p_ll2_conn->b_active = false; + mutex_unlock(&p_ll2_conn->mutex); +} + +struct qed_ll2_info *qed_ll2_alloc(struct qed_hwfn *p_hwfn) +{ + struct qed_ll2_info *p_ll2_connections; + u8 i; + + /* Allocate LL2's set struct */ + p_ll2_connections = kcalloc(QED_MAX_NUM_OF_LL2_CONNECTIONS, + sizeof(struct qed_ll2_info), GFP_KERNEL); + if (!p_ll2_connections) { + DP_NOTICE(p_hwfn, "Failed to allocate `struct qed_ll2'\n"); + return NULL; + } + + for (i = 0; i < QED_MAX_NUM_OF_LL2_CONNECTIONS; i++) + p_ll2_connections[i].my_id = i; + + return p_ll2_connections; +} + +void qed_ll2_setup(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_ll2_connections) +{ + int i; + + for (i = 0; i < QED_MAX_NUM_OF_LL2_CONNECTIONS; i++) + mutex_init(&p_ll2_connections[i].mutex); +} + +void qed_ll2_free(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_ll2_connections) +{ + kfree(p_ll2_connections); +} + +static void _qed_ll2_get_tstats(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct qed_ll2_info *p_ll2_conn, + struct qed_ll2_stats *p_stats) +{ + struct core_ll2_tstorm_per_queue_stat tstats; + u8 qid = p_ll2_conn->queue_id; + u32 tstats_addr; + + memset(&tstats, 0, sizeof(tstats)); + tstats_addr = BAR0_MAP_REG_TSDM_RAM + + CORE_LL2_TSTORM_PER_QUEUE_STAT_OFFSET(qid); + qed_memcpy_from(p_hwfn, p_ptt, &tstats, tstats_addr, sizeof(tstats)); + + p_stats->packet_too_big_discard = + HILO_64_REGPAIR(tstats.packet_too_big_discard); + p_stats->no_buff_discard = HILO_64_REGPAIR(tstats.no_buff_discard); +} + +static void _qed_ll2_get_ustats(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct qed_ll2_info *p_ll2_conn, + struct qed_ll2_stats *p_stats) +{ + struct core_ll2_ustorm_per_queue_stat ustats; + u8 qid = p_ll2_conn->queue_id; + u32 ustats_addr; + + memset(&ustats, 0, sizeof(ustats)); + ustats_addr = BAR0_MAP_REG_USDM_RAM + + CORE_LL2_USTORM_PER_QUEUE_STAT_OFFSET(qid); + qed_memcpy_from(p_hwfn, p_ptt, &ustats, ustats_addr, sizeof(ustats)); + + p_stats->rcv_ucast_bytes = HILO_64_REGPAIR(ustats.rcv_ucast_bytes); + p_stats->rcv_mcast_bytes = HILO_64_REGPAIR(ustats.rcv_mcast_bytes); + p_stats->rcv_bcast_bytes = HILO_64_REGPAIR(ustats.rcv_bcast_bytes); + p_stats->rcv_ucast_pkts = HILO_64_REGPAIR(ustats.rcv_ucast_pkts); + p_stats->rcv_mcast_pkts = HILO_64_REGPAIR(ustats.rcv_mcast_pkts); + p_stats->rcv_bcast_pkts = HILO_64_REGPAIR(ustats.rcv_bcast_pkts); +} + +static void _qed_ll2_get_pstats(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct qed_ll2_info *p_ll2_conn, + struct qed_ll2_stats *p_stats) +{ + struct core_ll2_pstorm_per_queue_stat pstats; + u8 stats_id = p_ll2_conn->tx_stats_id; + u32 pstats_addr; + + memset(&pstats, 0, sizeof(pstats)); + pstats_addr = BAR0_MAP_REG_PSDM_RAM + + CORE_LL2_PSTORM_PER_QUEUE_STAT_OFFSET(stats_id); + qed_memcpy_from(p_hwfn, p_ptt, &pstats, pstats_addr, sizeof(pstats)); + + p_stats->sent_ucast_bytes = HILO_64_REGPAIR(pstats.sent_ucast_bytes); + p_stats->sent_mcast_bytes = HILO_64_REGPAIR(pstats.sent_mcast_bytes); + p_stats->sent_bcast_bytes = HILO_64_REGPAIR(pstats.sent_bcast_bytes); + p_stats->sent_ucast_pkts = HILO_64_REGPAIR(pstats.sent_ucast_pkts); + p_stats->sent_mcast_pkts = HILO_64_REGPAIR(pstats.sent_mcast_pkts); + p_stats->sent_bcast_pkts = HILO_64_REGPAIR(pstats.sent_bcast_pkts); +} + +int qed_ll2_get_stats(struct qed_hwfn *p_hwfn, + u8 connection_handle, struct qed_ll2_stats *p_stats) +{ + struct qed_ll2_info *p_ll2_conn = NULL; + struct qed_ptt *p_ptt; + + memset(p_stats, 0, sizeof(*p_stats)); + + if ((connection_handle >= QED_MAX_NUM_OF_LL2_CONNECTIONS) || + !p_hwfn->p_ll2_info) + return -EINVAL; + + p_ll2_conn = &p_hwfn->p_ll2_info[connection_handle]; + + p_ptt = qed_ptt_acquire(p_hwfn); + if (!p_ptt) { + DP_ERR(p_hwfn, "Failed to acquire ptt\n"); + return -EINVAL; + } + + _qed_ll2_get_tstats(p_hwfn, p_ptt, p_ll2_conn, p_stats); + _qed_ll2_get_ustats(p_hwfn, p_ptt, p_ll2_conn, p_stats); + if (p_ll2_conn->tx_stats_en) + _qed_ll2_get_pstats(p_hwfn, p_ptt, p_ll2_conn, p_stats); + + qed_ptt_release(p_hwfn, p_ptt); + return 0; +} + +static void qed_ll2_register_cb_ops(struct qed_dev *cdev, + const struct qed_ll2_cb_ops *ops, + void *cookie) +{ + cdev->ll2->cbs = ops; + cdev->ll2->cb_cookie = cookie; +} + +static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params) +{ + struct qed_ll2_info ll2_info; + struct qed_ll2_buffer *buffer; + enum qed_ll2_conn_type conn_type; + struct qed_ptt *p_ptt; + int rc, i; + + /* Initialize LL2 locks & lists */ + INIT_LIST_HEAD(&cdev->ll2->list); + spin_lock_init(&cdev->ll2->lock); + cdev->ll2->rx_size = NET_SKB_PAD + ETH_HLEN + + L1_CACHE_BYTES + params->mtu; + cdev->ll2->frags_mapped = params->frags_mapped; + + /*Allocate memory for LL2 */ + DP_INFO(cdev, "Allocating LL2 buffers of size %08x bytes\n", + cdev->ll2->rx_size); + for (i = 0; i < QED_LL2_RX_SIZE; i++) { + buffer = kzalloc(sizeof(*buffer), GFP_KERNEL); + if (!buffer) { + DP_INFO(cdev, "Failed to allocate LL2 buffers\n"); + goto fail; + } + + rc = qed_ll2_alloc_buffer(cdev, (u8 **)&buffer->data, + &buffer->phys_addr); + if (rc) { + kfree(buffer); + goto fail; + } + + list_add_tail(&buffer->list, &cdev->ll2->list); + } + + switch (QED_LEADING_HWFN(cdev)->hw_info.personality) { + case QED_PCI_ISCSI: + conn_type = QED_LL2_TYPE_ISCSI; + break; + case QED_PCI_ETH_ROCE: + conn_type = QED_LL2_TYPE_ROCE; + break; + default: + conn_type = QED_LL2_TYPE_TEST; + } + + /* Prepare the temporary ll2 information */ + memset(&ll2_info, 0, sizeof(ll2_info)); + ll2_info.conn_type = conn_type; + ll2_info.mtu = params->mtu; + ll2_info.rx_drop_ttl0_flg = params->drop_ttl0_packets; + ll2_info.rx_vlan_removal_en = params->rx_vlan_stripping; + ll2_info.tx_tc = 0; + ll2_info.tx_dest = CORE_TX_DEST_NW; + + rc = qed_ll2_acquire_connection(QED_LEADING_HWFN(cdev), &ll2_info, + QED_LL2_RX_SIZE, QED_LL2_TX_SIZE, + &cdev->ll2->handle); + if (rc) { + DP_INFO(cdev, "Failed to acquire LL2 connection\n"); + goto fail; + } + + rc = qed_ll2_establish_connection(QED_LEADING_HWFN(cdev), + cdev->ll2->handle); + if (rc) { + DP_INFO(cdev, "Failed to establish LL2 connection\n"); + goto release_fail; + } + + /* Post all Rx buffers to FW */ + spin_lock_bh(&cdev->ll2->lock); + list_for_each_entry(buffer, &cdev->ll2->list, list) { + rc = qed_ll2_post_rx_buffer(QED_LEADING_HWFN(cdev), + cdev->ll2->handle, + buffer->phys_addr, 0, buffer, 1); + if (rc) { + DP_INFO(cdev, + "Failed to post an Rx buffer; Deleting it\n"); + dma_unmap_single(&cdev->pdev->dev, buffer->phys_addr, + cdev->ll2->rx_size, DMA_FROM_DEVICE); + kfree(buffer->data); + list_del(&buffer->list); + kfree(buffer); + } else { + cdev->ll2->rx_cnt++; + } + } + spin_unlock_bh(&cdev->ll2->lock); + + if (!cdev->ll2->rx_cnt) { + DP_INFO(cdev, "Failed passing even a single Rx buffer\n"); + goto release_terminate; + } + + if (!is_valid_ether_addr(params->ll2_mac_address)) { + DP_INFO(cdev, "Invalid Ethernet address\n"); + goto release_terminate; + } + + p_ptt = qed_ptt_acquire(QED_LEADING_HWFN(cdev)); + if (!p_ptt) { + DP_INFO(cdev, "Failed to acquire PTT\n"); + goto release_terminate; + } + + rc = qed_llh_add_mac_filter(QED_LEADING_HWFN(cdev), p_ptt, + params->ll2_mac_address); + qed_ptt_release(QED_LEADING_HWFN(cdev), p_ptt); + if (rc) { + DP_ERR(cdev, "Failed to allocate LLH filter\n"); + goto release_terminate_all; + } + + ether_addr_copy(cdev->ll2_mac_address, params->ll2_mac_address); + + return 0; + +release_terminate_all: + +release_terminate: + qed_ll2_terminate_connection(QED_LEADING_HWFN(cdev), cdev->ll2->handle); +release_fail: + qed_ll2_release_connection(QED_LEADING_HWFN(cdev), cdev->ll2->handle); +fail: + qed_ll2_kill_buffers(cdev); + cdev->ll2->handle = QED_LL2_UNUSED_HANDLE; + return -EINVAL; +} + +static int qed_ll2_stop(struct qed_dev *cdev) +{ + struct qed_ptt *p_ptt; + int rc; + + if (cdev->ll2->handle == QED_LL2_UNUSED_HANDLE) + return 0; + + p_ptt = qed_ptt_acquire(QED_LEADING_HWFN(cdev)); + if (!p_ptt) { + DP_INFO(cdev, "Failed to acquire PTT\n"); + goto fail; + } + + qed_llh_remove_mac_filter(QED_LEADING_HWFN(cdev), p_ptt, + cdev->ll2_mac_address); + qed_ptt_release(QED_LEADING_HWFN(cdev), p_ptt); + eth_zero_addr(cdev->ll2_mac_address); + + rc = qed_ll2_terminate_connection(QED_LEADING_HWFN(cdev), + cdev->ll2->handle); + if (rc) + DP_INFO(cdev, "Failed to terminate LL2 connection\n"); + + qed_ll2_kill_buffers(cdev); + + qed_ll2_release_connection(QED_LEADING_HWFN(cdev), cdev->ll2->handle); + cdev->ll2->handle = QED_LL2_UNUSED_HANDLE; + + return rc; +fail: + return -EINVAL; +} + +static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb) +{ + const skb_frag_t *frag; + int rc = -EINVAL, i; + dma_addr_t mapping; + u16 vlan = 0; + u8 flags = 0; + + if (unlikely(skb->ip_summed != CHECKSUM_NONE)) { + DP_INFO(cdev, "Cannot transmit a checksumed packet\n"); + return -EINVAL; + } + + if (1 + skb_shinfo(skb)->nr_frags > CORE_LL2_TX_MAX_BDS_PER_PACKET) { + DP_ERR(cdev, "Cannot transmit a packet with %d fragments\n", + 1 + skb_shinfo(skb)->nr_frags); + return -EINVAL; + } + + mapping = dma_map_single(&cdev->pdev->dev, skb->data, + skb->len, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(&cdev->pdev->dev, mapping))) { + DP_NOTICE(cdev, "SKB mapping failed\n"); + return -EINVAL; + } + + /* Request HW to calculate IP csum */ + if (!((vlan_get_protocol(skb) == htons(ETH_P_IPV6)) && + ipv6_hdr(skb)->nexthdr == NEXTHDR_IPV6)) + flags |= BIT(CORE_TX_BD_FLAGS_IP_CSUM_SHIFT); + + if (skb_vlan_tag_present(skb)) { + vlan = skb_vlan_tag_get(skb); + flags |= BIT(CORE_TX_BD_FLAGS_VLAN_INSERTION_SHIFT); + } + + rc = qed_ll2_prepare_tx_packet(QED_LEADING_HWFN(cdev), + cdev->ll2->handle, + 1 + skb_shinfo(skb)->nr_frags, + vlan, flags, 0, mapping, + skb->len, skb, 1); + if (rc) + goto err; + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + frag = &skb_shinfo(skb)->frags[i]; + if (!cdev->ll2->frags_mapped) { + mapping = skb_frag_dma_map(&cdev->pdev->dev, frag, 0, + skb_frag_size(frag), + DMA_TO_DEVICE); + + if (unlikely(dma_mapping_error(&cdev->pdev->dev, + mapping))) { + DP_NOTICE(cdev, + "Unable to map frag - dropping packet\n"); + goto err; + } + } else { + mapping = page_to_phys(skb_frag_page(frag)) | + frag->page_offset; + } + + rc = qed_ll2_set_fragment_of_tx_packet(QED_LEADING_HWFN(cdev), + cdev->ll2->handle, + mapping, + skb_frag_size(frag)); + + /* if failed not much to do here, partial packet has been posted + * we can't free memory, will need to wait for completion. + */ + if (rc) + goto err2; + } + + return 0; + +err: + dma_unmap_single(&cdev->pdev->dev, mapping, skb->len, DMA_TO_DEVICE); + +err2: + return rc; +} + +static int qed_ll2_stats(struct qed_dev *cdev, struct qed_ll2_stats *stats) +{ + if (!cdev->ll2) + return -EINVAL; + + return qed_ll2_get_stats(QED_LEADING_HWFN(cdev), + cdev->ll2->handle, stats); +} + +const struct qed_ll2_ops qed_ll2_ops_pass = { + .start = &qed_ll2_start, + .stop = &qed_ll2_stop, + .start_xmit = &qed_ll2_start_xmit, + .register_cb_ops = &qed_ll2_register_cb_ops, + .get_stats = &qed_ll2_stats, +}; + +int qed_ll2_alloc_if(struct qed_dev *cdev) +{ + cdev->ll2 = kzalloc(sizeof(*cdev->ll2), GFP_KERNEL); + return cdev->ll2 ? 0 : -ENOMEM; +} + +void qed_ll2_dealloc_if(struct qed_dev *cdev) +{ + kfree(cdev->ll2); + cdev->ll2 = NULL; +} diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.h b/drivers/net/ethernet/qlogic/qed/qed_ll2.h new file mode 100644 index 000000000000..a037c4845928 --- /dev/null +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.h @@ -0,0 +1,289 @@ +/* QLogic qed NIC Driver + * + * Copyright (c) 2015 QLogic Corporation + * + * This software is available under the terms of the GNU General Public License + * (GPL) Version 2, available from the file COPYING in the main directory of + * this source tree. + */ + +#ifndef _QED_LL2_H +#define _QED_LL2_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include "qed.h" +#include "qed_hsi.h" +#include "qed_sp.h" + +#define QED_MAX_NUM_OF_LL2_CONNECTIONS (4) + +enum qed_ll2_conn_type { + QED_LL2_TYPE_RESERVED, + QED_LL2_TYPE_ISCSI, + QED_LL2_TYPE_TEST, + QED_LL2_TYPE_ISCSI_OOO, + QED_LL2_TYPE_RESERVED2, + QED_LL2_TYPE_ROCE, + QED_LL2_TYPE_RESERVED3, + MAX_QED_LL2_RX_CONN_TYPE +}; + +struct qed_ll2_rx_packet { + struct list_head list_entry; + struct core_rx_bd_with_buff_len *rxq_bd; + dma_addr_t rx_buf_addr; + u16 buf_length; + void *cookie; + u8 placement_offset; + u16 parse_flags; + u16 packet_length; + u16 vlan; + u32 opaque_data[2]; +}; + +struct qed_ll2_tx_packet { + struct list_head list_entry; + u16 bd_used; + u16 vlan; + u16 l4_hdr_offset_w; + u8 bd_flags; + bool notify_fw; + void *cookie; + + struct { + struct core_tx_bd *txq_bd; + dma_addr_t tx_frag; + u16 frag_len; + } bds_set[ETH_TX_MAX_BDS_PER_NON_LSO_PACKET]; +}; + +struct qed_ll2_rx_queue { + /* Lock protecting the Rx queue manipulation */ + spinlock_t lock; + struct qed_chain rxq_chain; + struct qed_chain rcq_chain; + u8 rx_sb_index; + bool b_cb_registred; + __le16 *p_fw_cons; + struct list_head active_descq; + struct list_head free_descq; + struct list_head posting_descq; + struct qed_ll2_rx_packet *descq_array; + void __iomem *set_prod_addr; +}; + +struct qed_ll2_tx_queue { + /* Lock protecting the Tx queue manipulation */ + spinlock_t lock; + struct qed_chain txq_chain; + u8 tx_sb_index; + bool b_cb_registred; + __le16 *p_fw_cons; + struct list_head active_descq; + struct list_head free_descq; + struct list_head sending_descq; + struct qed_ll2_tx_packet *descq_array; + struct qed_ll2_tx_packet *cur_send_packet; + struct qed_ll2_tx_packet cur_completing_packet; + u16 cur_completing_bd_idx; + void __iomem *doorbell_addr; + u16 bds_idx; + u16 cur_send_frag_num; + u16 cur_completing_frag_num; + bool b_completing_packet; +}; + +struct qed_ll2_info { + /* Lock protecting the state of LL2 */ + struct mutex mutex; + enum qed_ll2_conn_type conn_type; + u32 cid; + u8 my_id; + u8 queue_id; + u8 tx_stats_id; + bool b_active; + u16 mtu; + u8 rx_drop_ttl0_flg; + u8 rx_vlan_removal_en; + u8 tx_tc; + enum core_tx_dest tx_dest; + enum core_error_handle ai_err_packet_too_big; + enum core_error_handle ai_err_no_buf; + u8 tx_stats_en; + struct qed_ll2_rx_queue rx_queue; + struct qed_ll2_tx_queue tx_queue; +}; + +/** + * @brief qed_ll2_acquire_connection - allocate resources, + * starts rx & tx (if relevant) queues pair. Provides + * connecion handler as output parameter. + * + * @param p_hwfn + * @param p_params Contain various configuration properties + * @param rx_num_desc + * @param tx_num_desc + * + * @param p_connection_handle Output container for LL2 connection's handle + * + * @return 0 on success, failure otherwise + */ +int qed_ll2_acquire_connection(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_params, + u16 rx_num_desc, + u16 tx_num_desc, + u8 *p_connection_handle); + +/** + * @brief qed_ll2_establish_connection - start previously + * allocated LL2 queues pair + * + * @param p_hwfn + * @param p_ptt + * @param connection_handle LL2 connection's handle obtained from + * qed_ll2_require_connection + * + * @return 0 on success, failure otherwise + */ +int qed_ll2_establish_connection(struct qed_hwfn *p_hwfn, u8 connection_handle); + +/** + * @brief qed_ll2_post_rx_buffers - submit buffers to LL2 Rx queue. + * + * @param p_hwfn + * @param connection_handle LL2 connection's handle obtained from + * qed_ll2_require_connection + * @param addr rx (physical address) buffers to submit + * @param cookie + * @param notify_fw produce corresponding Rx BD immediately + * + * @return 0 on success, failure otherwise + */ +int qed_ll2_post_rx_buffer(struct qed_hwfn *p_hwfn, + u8 connection_handle, + dma_addr_t addr, + u16 buf_len, void *cookie, u8 notify_fw); + +/** + * @brief qed_ll2_prepare_tx_packet - request for start Tx BD + * to prepare Tx packet submission to FW. + * + * @param p_hwfn + * @param connection_handle LL2 connection's handle obtained from + * qed_ll2_require_connection + * @param num_of_bds a number of requested BD equals a number of + * fragments in Tx packet + * @param vlan VLAN to insert to packet (if insertion set) + * @param bd_flags + * @param l4_hdr_offset_w L4 Header Offset from start of packet + * (in words). This is needed if both l4_csum + * and ipv6_ext are set + * @param first_frag + * @param first_frag_len + * @param cookie + * + * @param notify_fw + * + * @return 0 on success, failure otherwise + */ +int qed_ll2_prepare_tx_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + u8 num_of_bds, + u16 vlan, + u8 bd_flags, + u16 l4_hdr_offset_w, + dma_addr_t first_frag, + u16 first_frag_len, void *cookie, u8 notify_fw); + +/** + * @brief qed_ll2_release_connection - releases resources + * allocated for LL2 connection + * + * @param p_hwfn + * @param connection_handle LL2 connection's handle obtained from + * qed_ll2_require_connection + */ +void qed_ll2_release_connection(struct qed_hwfn *p_hwfn, u8 connection_handle); + +/** + * @brief qed_ll2_set_fragment_of_tx_packet - provides fragments to fill + * Tx BD of BDs requested by + * qed_ll2_prepare_tx_packet + * + * @param p_hwfn + * @param connection_handle LL2 connection's handle + * obtained from + * qed_ll2_require_connection + * @param addr + * @param nbytes + * + * @return 0 on success, failure otherwise + */ +int qed_ll2_set_fragment_of_tx_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + dma_addr_t addr, u16 nbytes); + +/** + * @brief qed_ll2_terminate_connection - stops Tx/Rx queues + * + * + * @param p_hwfn + * @param connection_handle LL2 connection's handle + * obtained from + * qed_ll2_require_connection + * + * @return 0 on success, failure otherwise + */ +int qed_ll2_terminate_connection(struct qed_hwfn *p_hwfn, u8 connection_handle); + +/** + * @brief qed_ll2_get_stats - get LL2 queue's statistics + * + * + * @param p_hwfn + * @param connection_handle LL2 connection's handle obtained from + * qed_ll2_require_connection + * @param p_stats + * + * @return 0 on success, failure otherwise + */ +int qed_ll2_get_stats(struct qed_hwfn *p_hwfn, + u8 connection_handle, struct qed_ll2_stats *p_stats); + +/** + * @brief qed_ll2_alloc - Allocates LL2 connections set + * + * @param p_hwfn + * + * @return pointer to alocated qed_ll2_info or NULL + */ +struct qed_ll2_info *qed_ll2_alloc(struct qed_hwfn *p_hwfn); + +/** + * @brief qed_ll2_setup - Inits LL2 connections set + * + * @param p_hwfn + * @param p_ll2_connections + * + */ +void qed_ll2_setup(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_ll2_connections); + +/** + * @brief qed_ll2_free - Releases LL2 connections set + * + * @param p_hwfn + * @param p_ll2_connections + * + */ +void qed_ll2_free(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_ll2_connections); + +#endif diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index b730a632c383..48cdf62c025b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -22,11 +22,13 @@ #include #include #include +#include #include "qed.h" #include "qed_sriov.h" #include "qed_sp.h" #include "qed_dev_api.h" +#include "qed_ll2.h" #include "qed_mcp.h" #include "qed_hw.h" #include "qed_selftest.h" @@ -608,7 +610,16 @@ static int qed_nic_reset(struct qed_dev *cdev) static int qed_nic_setup(struct qed_dev *cdev) { - int rc; + int rc, i; + + /* Determine if interface is going to require LL2 */ + if (QED_LEADING_HWFN(cdev)->hw_info.personality != QED_PCI_ETH) { + for (i = 0; i < cdev->num_hwfns; i++) { + struct qed_hwfn *p_hwfn = &cdev->hwfns[i]; + + p_hwfn->using_ll2 = true; + } + } rc = qed_resc_alloc(cdev); if (rc) @@ -873,6 +884,12 @@ static int qed_slowpath_start(struct qed_dev *cdev, DP_INFO(cdev, "HW initialization and function start completed successfully\n"); + /* Allocate LL2 interface if needed */ + if (QED_LEADING_HWFN(cdev)->using_ll2) { + rc = qed_ll2_alloc_if(cdev); + if (rc) + goto err3; + } if (IS_PF(cdev)) { hwfn = QED_LEADING_HWFN(cdev); drv_version.version = (params->drv_major << 24) | @@ -893,6 +910,8 @@ static int qed_slowpath_start(struct qed_dev *cdev, return 0; +err3: + qed_hw_stop(cdev); err2: qed_hw_timers_stop_all(cdev); if (IS_PF(cdev)) @@ -915,6 +934,8 @@ static int qed_slowpath_stop(struct qed_dev *cdev) if (!cdev) return -ENODEV; + qed_ll2_dealloc_if(cdev); + if (IS_PF(cdev)) { qed_free_stream_mem(cdev); if (IS_QED_ETH_IF(cdev)) diff --git a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h index 759cb04e02b0..e75738d21783 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h +++ b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h @@ -208,6 +208,26 @@ 0x50196cUL #define NIG_REG_LLH_CLS_TYPE_DUALMODE \ 0x501964UL +#define NIG_REG_LLH_FUNC_FILTER_VALUE \ + 0x501a00UL +#define NIG_REG_LLH_FUNC_FILTER_VALUE_SIZE \ + 32 +#define NIG_REG_LLH_FUNC_FILTER_EN \ + 0x501a80UL +#define NIG_REG_LLH_FUNC_FILTER_EN_SIZE \ + 16 +#define NIG_REG_LLH_FUNC_FILTER_MODE \ + 0x501ac0UL +#define NIG_REG_LLH_FUNC_FILTER_MODE_SIZE \ + 16 +#define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE \ + 0x501b00UL +#define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_SIZE \ + 16 +#define NIG_REG_LLH_FUNC_FILTER_HDR_SEL \ + 0x501b40UL +#define NIG_REG_LLH_FUNC_FILTER_HDR_SEL_SIZE \ + 16 #define NCSI_REG_CONFIG \ 0x040200UL #define PBF_REG_INIT \ @@ -264,6 +284,8 @@ 0x1f0a1cUL #define PRS_REG_ROCE_DEST_QP_MAX_PF \ 0x1f0430UL +#define PRS_REG_USE_LIGHT_L2 \ + 0x1f096cUL #define PSDM_REG_ENABLE_IN1 \ 0xfa0004UL #define PSEM_REG_ENABLE_IN \ diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp.h b/drivers/net/ethernet/qlogic/qed/qed_sp.h index a548504c3420..a3c539f1c2ac 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_sp.h @@ -61,6 +61,10 @@ union ramrod_data { struct vport_start_ramrod_data vport_start; struct vport_stop_ramrod_data vport_stop; struct vport_update_ramrod_data vport_update; + struct core_rx_start_ramrod_data core_rx_queue_start; + struct core_rx_stop_ramrod_data core_rx_queue_stop; + struct core_tx_start_ramrod_data core_tx_queue_start; + struct core_tx_stop_ramrod_data core_tx_queue_stop; struct vport_filter_update_ramrod_data vport_filter_update; struct rdma_init_func_ramrod_data rdma_init_func; diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index e4546abcea08..c2d74e8785cf 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -627,6 +627,7 @@ enum DP_MODULE { QED_MSG_SP = 0x100000, QED_MSG_STORAGE = 0x200000, QED_MSG_CXT = 0x800000, + QED_MSG_LL2 = 0x1000000, QED_MSG_ILT = 0x2000000, QED_MSG_ROCE = 0x4000000, QED_MSG_DEBUG = 0x8000000, diff --git a/include/linux/qed/qed_ll2_if.h b/include/linux/qed/qed_ll2_if.h new file mode 100644 index 000000000000..fd75c265dba3 --- /dev/null +++ b/include/linux/qed/qed_ll2_if.h @@ -0,0 +1,139 @@ +/* QLogic qed NIC Driver + * + * Copyright (c) 2015 QLogic Corporation + * + * This software is available under the terms of the GNU General Public License + * (GPL) Version 2, available from the file COPYING in the main directory of + * this source tree. + */ + +#ifndef _QED_LL2_IF_H +#define _QED_LL2_IF_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct qed_ll2_stats { + u64 gsi_invalid_hdr; + u64 gsi_invalid_pkt_length; + u64 gsi_unsupported_pkt_typ; + u64 gsi_crcchksm_error; + + u64 packet_too_big_discard; + u64 no_buff_discard; + + u64 rcv_ucast_bytes; + u64 rcv_mcast_bytes; + u64 rcv_bcast_bytes; + u64 rcv_ucast_pkts; + u64 rcv_mcast_pkts; + u64 rcv_bcast_pkts; + + u64 sent_ucast_bytes; + u64 sent_mcast_bytes; + u64 sent_bcast_bytes; + u64 sent_ucast_pkts; + u64 sent_mcast_pkts; + u64 sent_bcast_pkts; +}; + +#define QED_LL2_UNUSED_HANDLE (0xff) + +struct qed_ll2_cb_ops { + int (*rx_cb)(void *, struct sk_buff *, u32, u32); + int (*tx_cb)(void *, struct sk_buff *, bool); +}; + +struct qed_ll2_params { + u16 mtu; + bool drop_ttl0_packets; + bool rx_vlan_stripping; + u8 tx_tc; + bool frags_mapped; + u8 ll2_mac_address[ETH_ALEN]; +}; + +struct qed_ll2_ops { +/** + * @brief start - initializes ll2 + * + * @param cdev + * @param params - protocol driver configuration for the ll2. + * + * @return 0 on success, otherwise error value. + */ + int (*start)(struct qed_dev *cdev, struct qed_ll2_params *params); + +/** + * @brief stop - stops the ll2 + * + * @param cdev + * + * @return 0 on success, otherwise error value. + */ + int (*stop)(struct qed_dev *cdev); + +/** + * @brief start_xmit - transmits an skb over the ll2 interface + * + * @param cdev + * @param skb + * + * @return 0 on success, otherwise error value. + */ + int (*start_xmit)(struct qed_dev *cdev, struct sk_buff *skb); + +/** + * @brief register_cb_ops - protocol driver register the callback for Rx/Tx + * packets. Should be called before `start'. + * + * @param cdev + * @param cookie - to be passed to the callback functions. + * @param ops - the callback functions to register for Rx / Tx. + * + * @return 0 on success, otherwise error value. + */ + void (*register_cb_ops)(struct qed_dev *cdev, + const struct qed_ll2_cb_ops *ops, + void *cookie); + +/** + * @brief get LL2 related statistics + * + * @param cdev + * @param stats - pointer to struct that would be filled with stats + * + * @return 0 on success, error otherwise. + */ + int (*get_stats)(struct qed_dev *cdev, struct qed_ll2_stats *stats); +}; + +#ifdef CONFIG_QED_LL2 +int qed_ll2_alloc_if(struct qed_dev *); +void qed_ll2_dealloc_if(struct qed_dev *); +#else +static const struct qed_ll2_ops qed_ll2_ops_pass = { + .start = NULL, + .stop = NULL, + .start_xmit = NULL, + .register_cb_ops = NULL, + .get_stats = NULL, +}; + +static inline int qed_ll2_alloc_if(struct qed_dev *cdev) +{ + return 0; +} + +static inline void qed_ll2_dealloc_if(struct qed_dev *cdev) +{ +} +#endif +#endif From cee9fbd8e2e9e713cd8bf227c6492fd8854de74b Mon Sep 17 00:00:00 2001 From: Ram Amrani Date: Sat, 1 Oct 2016 21:59:56 +0300 Subject: [PATCH 1696/1715] qede: Add qedr framework Adds a skeletal implementation of the qede RoCE driver - The qedr has some dependencies of the state of the underlying base interface. This adds some logic required with mutual registrations and the ability to pass updates on 'intresting' events. Signed-off-by: Ram Amrani Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/Kconfig | 18 +- drivers/net/ethernet/qlogic/qede/Makefile | 1 + drivers/net/ethernet/qlogic/qede/qede.h | 9 + drivers/net/ethernet/qlogic/qede/qede_main.c | 35 ++- drivers/net/ethernet/qlogic/qede/qede_roce.c | 314 +++++++++++++++++++ include/linux/qed/qed_if.h | 3 +- include/linux/qed/qede_roce.h | 88 ++++++ 7 files changed, 451 insertions(+), 17 deletions(-) create mode 100644 drivers/net/ethernet/qlogic/qede/qede_roce.c create mode 100644 include/linux/qed/qede_roce.h diff --git a/drivers/net/ethernet/qlogic/Kconfig b/drivers/net/ethernet/qlogic/Kconfig index 9eb3b1914cf5..0df1391f9663 100644 --- a/drivers/net/ethernet/qlogic/Kconfig +++ b/drivers/net/ethernet/qlogic/Kconfig @@ -89,12 +89,7 @@ config QED This enables the support for ... config QED_LL2 - bool "Qlogic QED Light L2 interface" - default n - depends on QED - ---help--- - This enables support for Light L2 interface which is required - by all qed protocol drivers other than qede. + bool config QED_SRIOV bool "QLogic QED 25/40/100Gb SR-IOV support" @@ -112,4 +107,15 @@ config QEDE ---help--- This enables the support for ... +config INFINIBAND_QEDR + tristate "QLogic qede RoCE sources [debug]" + depends on QEDE && 64BIT + select QED_LL2 + default n + ---help--- + This provides a temporary node that allows the compilation + and logical testing of the InfiniBand over Ethernet support + for QLogic QED. This would be replaced by the 'real' option + once the QEDR driver is added [+relocated]. + endif # NET_VENDOR_QLOGIC diff --git a/drivers/net/ethernet/qlogic/qede/Makefile b/drivers/net/ethernet/qlogic/qede/Makefile index 74a49850d74d..28dc58919c85 100644 --- a/drivers/net/ethernet/qlogic/qede/Makefile +++ b/drivers/net/ethernet/qlogic/qede/Makefile @@ -2,3 +2,4 @@ obj-$(CONFIG_QEDE) := qede.o qede-y := qede_main.o qede_ethtool.o qede-$(CONFIG_DCB) += qede_dcbnl.o +qede-$(CONFIG_INFINIBAND_QEDR) += qede_roce.o diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index e01adce4a966..28c0e9f42c9e 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -106,6 +106,13 @@ struct qede_vlan { bool configured; }; +struct qede_rdma_dev { + struct qedr_dev *qedr_dev; + struct list_head entry; + struct list_head roce_event_list; + struct workqueue_struct *roce_wq; +}; + struct qede_dev { struct qed_dev *cdev; struct net_device *ndev; @@ -185,6 +192,8 @@ struct qede_dev { unsigned long sp_flags; u16 vxlan_dst_port; u16 geneve_dst_port; + + struct qede_rdma_dev rdma_info; }; enum QEDE_STATE { diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 0e198fe89d1a..343038ca047d 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -36,7 +36,7 @@ #include #include #include - +#include #include "qede.h" static char version[] = @@ -193,8 +193,7 @@ static int qede_netdev_event(struct notifier_block *this, unsigned long event, struct ethtool_drvinfo drvinfo; struct qede_dev *edev; - /* Currently only support name change */ - if (event != NETDEV_CHANGENAME) + if (event != NETDEV_CHANGENAME && event != NETDEV_CHANGEADDR) goto done; /* Check whether this is a qede device */ @@ -207,11 +206,18 @@ static int qede_netdev_event(struct notifier_block *this, unsigned long event, goto done; edev = netdev_priv(ndev); - /* Notify qed of the name change */ - if (!edev->ops || !edev->ops->common) - goto done; - edev->ops->common->set_id(edev->cdev, edev->ndev->name, - "qede"); + switch (event) { + case NETDEV_CHANGENAME: + /* Notify qed of the name change */ + if (!edev->ops || !edev->ops->common) + goto done; + edev->ops->common->set_id(edev->cdev, edev->ndev->name, "qede"); + break; + case NETDEV_CHANGEADDR: + edev = netdev_priv(ndev); + qede_roce_event_changeaddr(edev); + break; + } done: return NOTIFY_DONE; @@ -2545,10 +2551,14 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level, qede_init_ndev(edev); + rc = qede_roce_dev_add(edev); + if (rc) + goto err3; + rc = register_netdev(edev->ndev); if (rc) { DP_NOTICE(edev, "Cannot register net-device\n"); - goto err3; + goto err4; } edev->ops->common->set_id(cdev, edev->ndev->name, DRV_MODULE_VERSION); @@ -2568,6 +2578,8 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level, return 0; +err4: + qede_roce_dev_remove(edev); err3: free_netdev(edev->ndev); err2: @@ -2614,8 +2626,11 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode) DP_INFO(edev, "Starting qede_remove\n"); cancel_delayed_work_sync(&edev->sp_task); + unregister_netdev(ndev); + qede_roce_dev_remove(edev); + edev->ops->common->set_power_state(cdev, PCI_D0); pci_set_drvdata(pdev, NULL); @@ -3512,6 +3527,7 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode) DP_INFO(edev, "Starting qede unload\n"); + qede_roce_dev_event_close(edev); mutex_lock(&edev->qede_lock); edev->state = QEDE_STATE_CLOSED; @@ -3612,6 +3628,7 @@ static int qede_load(struct qede_dev *edev, enum qede_load_mode mode) /* Query whether link is already-up */ memset(&link_output, 0, sizeof(link_output)); edev->ops->common->get_link(edev->cdev, &link_output); + qede_roce_dev_event_open(edev); qede_link_update(edev, &link_output); DP_INFO(edev, "Ending successfully qede load\n"); diff --git a/drivers/net/ethernet/qlogic/qede/qede_roce.c b/drivers/net/ethernet/qlogic/qede/qede_roce.c new file mode 100644 index 000000000000..9867f960b063 --- /dev/null +++ b/drivers/net/ethernet/qlogic/qede/qede_roce.c @@ -0,0 +1,314 @@ +/* QLogic qedr NIC Driver + * Copyright (c) 2015-2016 QLogic Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include +#include +#include +#include +#include +#include "qede.h" + +static struct qedr_driver *qedr_drv; +static LIST_HEAD(qedr_dev_list); +static DEFINE_MUTEX(qedr_dev_list_lock); + +bool qede_roce_supported(struct qede_dev *dev) +{ + return dev->dev_info.common.rdma_supported; +} + +static void _qede_roce_dev_add(struct qede_dev *edev) +{ + if (!qedr_drv) + return; + + edev->rdma_info.qedr_dev = qedr_drv->add(edev->cdev, edev->pdev, + edev->ndev); +} + +static int qede_roce_create_wq(struct qede_dev *edev) +{ + INIT_LIST_HEAD(&edev->rdma_info.roce_event_list); + edev->rdma_info.roce_wq = create_singlethread_workqueue("roce_wq"); + if (!edev->rdma_info.roce_wq) { + DP_NOTICE(edev, "qedr: Could not create workqueue\n"); + return -ENOMEM; + } + + return 0; +} + +static void qede_roce_cleanup_event(struct qede_dev *edev) +{ + struct list_head *head = &edev->rdma_info.roce_event_list; + struct qede_roce_event_work *event_node; + + flush_workqueue(edev->rdma_info.roce_wq); + while (!list_empty(head)) { + event_node = list_entry(head->next, struct qede_roce_event_work, + list); + cancel_work_sync(&event_node->work); + list_del(&event_node->list); + kfree(event_node); + } +} + +static void qede_roce_destroy_wq(struct qede_dev *edev) +{ + qede_roce_cleanup_event(edev); + destroy_workqueue(edev->rdma_info.roce_wq); +} + +int qede_roce_dev_add(struct qede_dev *edev) +{ + int rc = 0; + + if (qede_roce_supported(edev)) { + rc = qede_roce_create_wq(edev); + if (rc) + return rc; + + INIT_LIST_HEAD(&edev->rdma_info.entry); + mutex_lock(&qedr_dev_list_lock); + list_add_tail(&edev->rdma_info.entry, &qedr_dev_list); + _qede_roce_dev_add(edev); + mutex_unlock(&qedr_dev_list_lock); + } + + return rc; +} + +static void _qede_roce_dev_remove(struct qede_dev *edev) +{ + if (qedr_drv && qedr_drv->remove && edev->rdma_info.qedr_dev) + qedr_drv->remove(edev->rdma_info.qedr_dev); + edev->rdma_info.qedr_dev = NULL; +} + +void qede_roce_dev_remove(struct qede_dev *edev) +{ + if (!qede_roce_supported(edev)) + return; + + qede_roce_destroy_wq(edev); + mutex_lock(&qedr_dev_list_lock); + _qede_roce_dev_remove(edev); + list_del(&edev->rdma_info.entry); + mutex_unlock(&qedr_dev_list_lock); +} + +static void _qede_roce_dev_open(struct qede_dev *edev) +{ + if (qedr_drv && edev->rdma_info.qedr_dev && qedr_drv->notify) + qedr_drv->notify(edev->rdma_info.qedr_dev, QEDE_UP); +} + +static void qede_roce_dev_open(struct qede_dev *edev) +{ + if (!qede_roce_supported(edev)) + return; + + mutex_lock(&qedr_dev_list_lock); + _qede_roce_dev_open(edev); + mutex_unlock(&qedr_dev_list_lock); +} + +static void _qede_roce_dev_close(struct qede_dev *edev) +{ + if (qedr_drv && edev->rdma_info.qedr_dev && qedr_drv->notify) + qedr_drv->notify(edev->rdma_info.qedr_dev, QEDE_DOWN); +} + +static void qede_roce_dev_close(struct qede_dev *edev) +{ + if (!qede_roce_supported(edev)) + return; + + mutex_lock(&qedr_dev_list_lock); + _qede_roce_dev_close(edev); + mutex_unlock(&qedr_dev_list_lock); +} + +static void qede_roce_dev_shutdown(struct qede_dev *edev) +{ + if (!qede_roce_supported(edev)) + return; + + mutex_lock(&qedr_dev_list_lock); + if (qedr_drv && edev->rdma_info.qedr_dev && qedr_drv->notify) + qedr_drv->notify(edev->rdma_info.qedr_dev, QEDE_CLOSE); + mutex_unlock(&qedr_dev_list_lock); +} + +int qede_roce_register_driver(struct qedr_driver *drv) +{ + struct qede_dev *edev; + u8 qedr_counter = 0; + + mutex_lock(&qedr_dev_list_lock); + if (qedr_drv) { + mutex_unlock(&qedr_dev_list_lock); + return -EINVAL; + } + qedr_drv = drv; + + list_for_each_entry(edev, &qedr_dev_list, rdma_info.entry) { + struct net_device *ndev; + + qedr_counter++; + _qede_roce_dev_add(edev); + ndev = edev->ndev; + if (netif_running(ndev) && netif_oper_up(ndev)) + _qede_roce_dev_open(edev); + } + mutex_unlock(&qedr_dev_list_lock); + + DP_INFO(edev, "qedr: discovered and registered %d RoCE funcs\n", + qedr_counter); + + return 0; +} +EXPORT_SYMBOL(qede_roce_register_driver); + +void qede_roce_unregister_driver(struct qedr_driver *drv) +{ + struct qede_dev *edev; + + mutex_lock(&qedr_dev_list_lock); + list_for_each_entry(edev, &qedr_dev_list, rdma_info.entry) { + if (edev->rdma_info.qedr_dev) + _qede_roce_dev_remove(edev); + } + qedr_drv = NULL; + mutex_unlock(&qedr_dev_list_lock); +} +EXPORT_SYMBOL(qede_roce_unregister_driver); + +static void qede_roce_changeaddr(struct qede_dev *edev) +{ + if (!qede_roce_supported(edev)) + return; + + if (qedr_drv && edev->rdma_info.qedr_dev && qedr_drv->notify) + qedr_drv->notify(edev->rdma_info.qedr_dev, QEDE_CHANGE_ADDR); +} + +struct qede_roce_event_work *qede_roce_get_free_event_node(struct qede_dev + *edev) +{ + struct qede_roce_event_work *event_node = NULL; + struct list_head *list_node = NULL; + bool found = false; + + list_for_each(list_node, &edev->rdma_info.roce_event_list) { + event_node = list_entry(list_node, struct qede_roce_event_work, + list); + if (!work_pending(&event_node->work)) { + found = true; + break; + } + } + + if (!found) { + event_node = kzalloc(sizeof(*event_node), GFP_KERNEL); + if (!event_node) { + DP_NOTICE(edev, + "qedr: Could not allocate memory for roce work\n"); + return NULL; + } + list_add_tail(&event_node->list, + &edev->rdma_info.roce_event_list); + } + + return event_node; +} + +static void qede_roce_handle_event(struct work_struct *work) +{ + struct qede_roce_event_work *event_node; + enum qede_roce_event event; + struct qede_dev *edev; + + event_node = container_of(work, struct qede_roce_event_work, work); + event = event_node->event; + edev = event_node->ptr; + + switch (event) { + case QEDE_UP: + qede_roce_dev_open(edev); + break; + case QEDE_DOWN: + qede_roce_dev_close(edev); + break; + case QEDE_CLOSE: + qede_roce_dev_shutdown(edev); + break; + case QEDE_CHANGE_ADDR: + qede_roce_changeaddr(edev); + break; + default: + DP_NOTICE(edev, "Invalid roce event %d", event); + } +} + +static void qede_roce_add_event(struct qede_dev *edev, + enum qede_roce_event event) +{ + struct qede_roce_event_work *event_node; + + if (!edev->rdma_info.qedr_dev) + return; + + event_node = qede_roce_get_free_event_node(edev); + if (!event_node) + return; + + event_node->event = event; + event_node->ptr = edev; + + INIT_WORK(&event_node->work, qede_roce_handle_event); + queue_work(edev->rdma_info.roce_wq, &event_node->work); +} + +void qede_roce_dev_event_open(struct qede_dev *edev) +{ + qede_roce_add_event(edev, QEDE_UP); +} + +void qede_roce_dev_event_close(struct qede_dev *edev) +{ + qede_roce_add_event(edev, QEDE_DOWN); +} + +void qede_roce_event_changeaddr(struct qede_dev *edev) +{ + qede_roce_add_event(edev, QEDE_CHANGE_ADDR); +} diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index c2d74e8785cf..e313742b571d 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -260,11 +260,10 @@ struct qed_dev_info { /* MFW version */ u32 mfw_rev; - bool rdma_supported; - u32 flash_size; u8 mf_mode; bool tx_switching; + bool rdma_supported; }; enum qed_sb_type { diff --git a/include/linux/qed/qede_roce.h b/include/linux/qed/qede_roce.h new file mode 100644 index 000000000000..99fbe6d55acb --- /dev/null +++ b/include/linux/qed/qede_roce.h @@ -0,0 +1,88 @@ +/* QLogic qedr NIC Driver + * Copyright (c) 2015-2016 QLogic Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef QEDE_ROCE_H +#define QEDE_ROCE_H + +struct qedr_dev; +struct qed_dev; +struct qede_dev; + +enum qede_roce_event { + QEDE_UP, + QEDE_DOWN, + QEDE_CHANGE_ADDR, + QEDE_CLOSE +}; + +struct qede_roce_event_work { + struct list_head list; + struct work_struct work; + void *ptr; + enum qede_roce_event event; +}; + +struct qedr_driver { + unsigned char name[32]; + + struct qedr_dev* (*add)(struct qed_dev *, struct pci_dev *, + struct net_device *); + + void (*remove)(struct qedr_dev *); + void (*notify)(struct qedr_dev *, enum qede_roce_event); +}; + +/* APIs for RoCE driver to register callback handlers, + * which will be invoked when device is added, removed, ifup, ifdown + */ +int qede_roce_register_driver(struct qedr_driver *drv); +void qede_roce_unregister_driver(struct qedr_driver *drv); + +bool qede_roce_supported(struct qede_dev *dev); + +#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) +int qede_roce_dev_add(struct qede_dev *dev); +void qede_roce_dev_event_open(struct qede_dev *dev); +void qede_roce_dev_event_close(struct qede_dev *dev); +void qede_roce_dev_remove(struct qede_dev *dev); +void qede_roce_event_changeaddr(struct qede_dev *qedr); +#else +static inline int qede_roce_dev_add(struct qede_dev *dev) +{ + return 0; +} + +static inline void qede_roce_dev_event_open(struct qede_dev *dev) {} +static inline void qede_roce_dev_event_close(struct qede_dev *dev) {} +static inline void qede_roce_dev_remove(struct qede_dev *dev) {} +static inline void qede_roce_event_changeaddr(struct qede_dev *qedr) {} +#endif +#endif From 51ff17251c9c2c2e71974149d22bc73ea09c27cc Mon Sep 17 00:00:00 2001 From: Ram Amrani Date: Sat, 1 Oct 2016 21:59:57 +0300 Subject: [PATCH 1697/1715] qed: Add support for RoCE hw init This adds the backbone required for the various HW initalizations which are necessary for the qedr driver - FW notification, resource initializations, etc. Signed-off-by: Ram Amrani Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/Makefile | 1 + drivers/net/ethernet/qlogic/qed/qed.h | 33 +- drivers/net/ethernet/qlogic/qed/qed_cxt.c | 6 + drivers/net/ethernet/qlogic/qed/qed_cxt.h | 6 + drivers/net/ethernet/qlogic/qed/qed_dev.c | 154 +++ drivers/net/ethernet/qlogic/qed/qed_main.c | 44 +- .../net/ethernet/qlogic/qed/qed_reg_addr.h | 8 +- drivers/net/ethernet/qlogic/qed/qed_roce.c | 886 ++++++++++++++++++ drivers/net/ethernet/qlogic/qed/qed_roce.h | 123 +++ drivers/net/ethernet/qlogic/qed/qed_sp.h | 1 + drivers/net/ethernet/qlogic/qed/qed_spq.c | 8 + drivers/net/ethernet/qlogic/qed/qed_sriov.c | 4 +- drivers/net/ethernet/qlogic/qed/qed_vf.c | 2 +- include/linux/qed/common_hsi.h | 1 + include/linux/qed/qed_if.h | 5 +- include/linux/qed/qed_roce_if.h | 345 +++++++ include/linux/qed/rdma_common.h | 1 + 17 files changed, 1620 insertions(+), 8 deletions(-) create mode 100644 drivers/net/ethernet/qlogic/qed/qed_roce.c create mode 100644 drivers/net/ethernet/qlogic/qed/qed_roce.h create mode 100644 include/linux/qed/qed_roce_if.h diff --git a/drivers/net/ethernet/qlogic/qed/Makefile b/drivers/net/ethernet/qlogic/qed/Makefile index e067098f10a9..cda0af7fbc20 100644 --- a/drivers/net/ethernet/qlogic/qed/Makefile +++ b/drivers/net/ethernet/qlogic/qed/Makefile @@ -5,3 +5,4 @@ qed-y := qed_cxt.o qed_dev.o qed_hw.o qed_init_fw_funcs.o qed_init_ops.o \ qed_selftest.o qed_dcbx.o qed_debug.o qed-$(CONFIG_QED_SRIOV) += qed_sriov.o qed_vf.o qed-$(CONFIG_QED_LL2) += qed_ll2.o +qed-$(CONFIG_INFINIBAND_QEDR) += qed_roce.o diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index 91b571a3670b..c5a098a2ca2c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -35,6 +35,9 @@ extern const struct qed_common_ops qed_common_ops_pass; #define QED_WFQ_UNIT 100 +#define QED_WID_SIZE (1024) +#define QED_PF_DEMS_SIZE (4) + /* cau states */ enum qed_coalescing_mode { QED_COAL_MODE_DISABLE, @@ -48,6 +51,14 @@ enum qed_mcp_protocol_type; /* helpers */ static inline u32 qed_db_addr(u32 cid, u32 DEMS) +{ + u32 db_addr = FIELD_VALUE(DB_LEGACY_ADDR_DEMS, DEMS) | + (cid * QED_PF_DEMS_SIZE); + + return db_addr; +} + +static inline u32 qed_db_addr_vf(u32 cid, u32 DEMS) { u32 db_addr = FIELD_VALUE(DB_LEGACY_ADDR_DEMS, DEMS) | FIELD_VALUE(DB_LEGACY_ADDR_ICID, cid); @@ -152,14 +163,17 @@ enum QED_RESOURCES { QED_RL, QED_MAC, QED_VLAN, + QED_RDMA_CNQ_RAM, QED_ILT, QED_LL2_QUEUE, + QED_RDMA_STATS_QUEUE, QED_MAX_RESC, }; enum QED_FEATURE { QED_PF_L2_QUE, QED_VF, + QED_RDMA_CNQ, QED_MAX_FEATURES, }; @@ -364,6 +378,7 @@ struct qed_hwfn { /* Protocol related */ bool using_ll2; struct qed_ll2_info *p_ll2_info; + struct qed_rdma_info *p_rdma_info; struct qed_pf_params pf_params; bool b_rdma_enabled_in_prs; @@ -402,6 +417,17 @@ struct qed_hwfn { struct dbg_tools_data dbg_info; + /* PWM region specific data */ + u32 dpi_size; + u32 dpi_count; + + /* This is used to calculate the doorbell address */ + u32 dpi_start_offset; + + /* If one of the following is set then EDPM shouldn't be used */ + u8 dcbx_no_edpm; + u8 db_bar_no_edpm; + struct qed_simd_fp_handler simd_proto_handler[64]; #ifdef CONFIG_QED_SRIOV @@ -435,6 +461,8 @@ struct qed_int_params { bool fp_initialized; u8 fp_msix_base; u8 fp_msix_cnt; + u8 rdma_msix_base; + u8 rdma_msix_cnt; }; struct qed_dbg_feature { @@ -541,7 +569,6 @@ struct qed_dev { bool b_is_vf; u32 drv_type; - struct qed_eth_stats *reset_stats; struct qed_fw_data *fw_data; @@ -574,6 +601,10 @@ struct qed_dev { #endif const struct firmware *firmware; + + u32 rdma_max_sge; + u32 rdma_max_inline; + u32 rdma_max_srq_sge; }; #define NUM_OF_VFS(dev) MAX_NUM_VFS_BB diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c index d9bea2a9c9f7..82370a1a59ad 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c +++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c @@ -48,7 +48,13 @@ #define TM_ELEM_SIZE 4 /* ILT constants */ +#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) +/* For RoCE we configure to 64K to cover for RoCE max tasks 256K purpose. */ +#define ILT_DEFAULT_HW_P_SIZE 4 +#else #define ILT_DEFAULT_HW_P_SIZE 3 +#endif + #define ILT_PAGE_IN_BYTES(hw_p_size) (1U << ((hw_p_size) + 12)) #define ILT_CFG_REG(cli, reg) PSWRQ2_REG_ ## cli ## _ ## reg ## _RT_OFFSET diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.h b/drivers/net/ethernet/qlogic/qed/qed_cxt.h index c6f6f2e8192d..d00ad055802b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_cxt.h +++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.h @@ -170,6 +170,12 @@ int qed_qm_reconf(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); */ void qed_cxt_release_cid(struct qed_hwfn *p_hwfn, u32 cid); +int qed_cxt_dynamic_ilt_alloc(struct qed_hwfn *p_hwfn, + enum qed_cxt_elem_type elem_type, u32 iid); +u32 qed_cxt_get_proto_tid_count(struct qed_hwfn *p_hwfn, + enum protocol_type type); +u32 qed_cxt_get_proto_cid_start(struct qed_hwfn *p_hwfn, + enum protocol_type type); #define QED_CTX_WORKING_MEM 0 #define QED_CTX_FL_MEM 1 diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index 9a8e153df841..754f6a908858 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -35,9 +35,13 @@ #include "qed_sp.h" #include "qed_sriov.h" #include "qed_vf.h" +#include "qed_roce.h" static DEFINE_SPINLOCK(qm_lock); +#define QED_MIN_DPIS (4) +#define QED_MIN_PWM_REGION (QED_WID_SIZE * QED_MIN_DPIS) + /* API common to all protocols */ enum BAR_ID { BAR_ID_0, /* used for GRC */ @@ -787,6 +791,136 @@ static int qed_hw_init_common(struct qed_hwfn *p_hwfn, return rc; } +static int +qed_hw_init_dpi_size(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 pwm_region_size, u32 n_cpus) +{ + u32 dpi_page_size_1, dpi_page_size_2, dpi_page_size; + u32 dpi_bit_shift, dpi_count; + u32 min_dpis; + + /* Calculate DPI size */ + dpi_page_size_1 = QED_WID_SIZE * n_cpus; + dpi_page_size_2 = max_t(u32, QED_WID_SIZE, PAGE_SIZE); + dpi_page_size = max_t(u32, dpi_page_size_1, dpi_page_size_2); + dpi_page_size = roundup_pow_of_two(dpi_page_size); + dpi_bit_shift = ilog2(dpi_page_size / 4096); + + dpi_count = pwm_region_size / dpi_page_size; + + min_dpis = p_hwfn->pf_params.rdma_pf_params.min_dpis; + min_dpis = max_t(u32, QED_MIN_DPIS, min_dpis); + + p_hwfn->dpi_size = dpi_page_size; + p_hwfn->dpi_count = dpi_count; + + qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_DPI_BIT_SHIFT, dpi_bit_shift); + + if (dpi_count < min_dpis) + return -EINVAL; + + return 0; +} + +enum QED_ROCE_EDPM_MODE { + QED_ROCE_EDPM_MODE_ENABLE = 0, + QED_ROCE_EDPM_MODE_FORCE_ON = 1, + QED_ROCE_EDPM_MODE_DISABLE = 2, +}; + +static int +qed_hw_init_pf_doorbell_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) +{ + u32 pwm_regsize, norm_regsize; + u32 non_pwm_conn, min_addr_reg1; + u32 db_bar_size, n_cpus; + u32 roce_edpm_mode; + u32 pf_dems_shift; + int rc = 0; + u8 cond; + + db_bar_size = qed_hw_bar_size(p_hwfn, BAR_ID_1); + if (p_hwfn->cdev->num_hwfns > 1) + db_bar_size /= 2; + + /* Calculate doorbell regions */ + non_pwm_conn = qed_cxt_get_proto_cid_start(p_hwfn, PROTOCOLID_CORE) + + qed_cxt_get_proto_cid_count(p_hwfn, PROTOCOLID_CORE, + NULL) + + qed_cxt_get_proto_cid_count(p_hwfn, PROTOCOLID_ETH, + NULL); + norm_regsize = roundup(QED_PF_DEMS_SIZE * non_pwm_conn, 4096); + min_addr_reg1 = norm_regsize / 4096; + pwm_regsize = db_bar_size - norm_regsize; + + /* Check that the normal and PWM sizes are valid */ + if (db_bar_size < norm_regsize) { + DP_ERR(p_hwfn->cdev, + "Doorbell BAR size 0x%x is too small (normal region is 0x%0x )\n", + db_bar_size, norm_regsize); + return -EINVAL; + } + + if (pwm_regsize < QED_MIN_PWM_REGION) { + DP_ERR(p_hwfn->cdev, + "PWM region size 0x%0x is too small. Should be at least 0x%0x (Doorbell BAR size is 0x%x and normal region size is 0x%0x)\n", + pwm_regsize, + QED_MIN_PWM_REGION, db_bar_size, norm_regsize); + return -EINVAL; + } + + /* Calculate number of DPIs */ + roce_edpm_mode = p_hwfn->pf_params.rdma_pf_params.roce_edpm_mode; + if ((roce_edpm_mode == QED_ROCE_EDPM_MODE_ENABLE) || + ((roce_edpm_mode == QED_ROCE_EDPM_MODE_FORCE_ON))) { + /* Either EDPM is mandatory, or we are attempting to allocate a + * WID per CPU. + */ + n_cpus = num_active_cpus(); + rc = qed_hw_init_dpi_size(p_hwfn, p_ptt, pwm_regsize, n_cpus); + } + + cond = (rc && (roce_edpm_mode == QED_ROCE_EDPM_MODE_ENABLE)) || + (roce_edpm_mode == QED_ROCE_EDPM_MODE_DISABLE); + if (cond || p_hwfn->dcbx_no_edpm) { + /* Either EDPM is disabled from user configuration, or it is + * disabled via DCBx, or it is not mandatory and we failed to + * allocated a WID per CPU. + */ + n_cpus = 1; + rc = qed_hw_init_dpi_size(p_hwfn, p_ptt, pwm_regsize, n_cpus); + + if (cond) + qed_rdma_dpm_bar(p_hwfn, p_ptt); + } + + DP_INFO(p_hwfn, + "doorbell bar: normal_region_size=%d, pwm_region_size=%d, dpi_size=%d, dpi_count=%d, roce_edpm=%s\n", + norm_regsize, + pwm_regsize, + p_hwfn->dpi_size, + p_hwfn->dpi_count, + ((p_hwfn->dcbx_no_edpm) || (p_hwfn->db_bar_no_edpm)) ? + "disabled" : "enabled"); + + if (rc) { + DP_ERR(p_hwfn, + "Failed to allocate enough DPIs. Allocated %d but the current minimum is %d.\n", + p_hwfn->dpi_count, + p_hwfn->pf_params.rdma_pf_params.min_dpis); + return -EINVAL; + } + + p_hwfn->dpi_start_offset = norm_regsize; + + /* DEMS size is configured log2 of DWORDs, hence the division by 4 */ + pf_dems_shift = ilog2(QED_PF_DEMS_SIZE / 4); + qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_ICID_BIT_SHIFT_NORM, pf_dems_shift); + qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_MIN_ADDR_REG1, min_addr_reg1); + + return 0; +} + static int qed_hw_init_port(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, int hw_mode) { @@ -860,6 +994,10 @@ static int qed_hw_init_pf(struct qed_hwfn *p_hwfn, /* Pure runtime initializations - directly to the HW */ qed_int_igu_init_pure_rt(p_hwfn, p_ptt, true, true); + rc = qed_hw_init_pf_doorbell_bar(p_hwfn, p_ptt); + if (rc) + return rc; + if (b_hw_start) { /* enable interrupts */ qed_int_igu_enable(p_hwfn, p_ptt, int_mode); @@ -1284,6 +1422,19 @@ static void qed_hw_set_feat(struct qed_hwfn *p_hwfn) u32 *feat_num = p_hwfn->hw_info.feat_num; int num_features = 1; +#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) + /* Roce CNQ each requires: 1 status block + 1 CNQ. We divide the + * status blocks equally between L2 / RoCE but with consideration as + * to how many l2 queues / cnqs we have + */ + if (p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE) { + num_features++; + + feat_num[QED_RDMA_CNQ] = + min_t(u32, RESC_NUM(p_hwfn, QED_SB) / num_features, + RESC_NUM(p_hwfn, QED_RDMA_CNQ_RAM)); + } +#endif feat_num[QED_PF_L2_QUE] = min_t(u32, RESC_NUM(p_hwfn, QED_SB) / num_features, RESC_NUM(p_hwfn, QED_L2_QUEUE)); @@ -1325,6 +1476,9 @@ static int qed_hw_get_resc(struct qed_hwfn *p_hwfn) num_funcs; resc_num[QED_ILT] = PXP_NUM_ILT_RECORDS_BB / num_funcs; resc_num[QED_LL2_QUEUE] = MAX_NUM_LL2_RX_QUEUES / num_funcs; + resc_num[QED_RDMA_CNQ_RAM] = NUM_OF_CMDQS_CQS / num_funcs; + resc_num[QED_RDMA_STATS_QUEUE] = RDMA_NUM_STATISTIC_COUNTERS_BB / + num_funcs; for (i = 0; i < QED_MAX_RESC; i++) resc_start[i] = resc_num[i] * enabled_func_idx; diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 48cdf62c025b..4ee3151e80c2 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -33,6 +33,11 @@ #include "qed_hw.h" #include "qed_selftest.h" +#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) +#define QED_ROCE_QPS (8192) +#define QED_ROCE_DPIS (8) +#endif + static char version[] = "QLogic FastLinQ 4xxxx Core Module qed " DRV_MODULE_VERSION "\n"; @@ -206,8 +211,8 @@ int qed_fill_dev_info(struct qed_dev *cdev, dev_info->pci_mem_start = cdev->pci_params.mem_start; dev_info->pci_mem_end = cdev->pci_params.mem_end; dev_info->pci_irq = cdev->pci_params.irq; - dev_info->rdma_supported = - (cdev->hwfns[0].hw_info.personality == QED_PCI_ETH_ROCE); + dev_info->rdma_supported = (cdev->hwfns[0].hw_info.personality == + QED_PCI_ETH_ROCE); dev_info->is_mf_default = IS_MF_DEFAULT(&cdev->hwfns[0]); ether_addr_copy(dev_info->hw_mac, cdev->hwfns[0].hw_info.hw_mac_addr); @@ -677,6 +682,9 @@ static int qed_slowpath_setup_int(struct qed_dev *cdev, enum qed_int_mode int_mode) { struct qed_sb_cnt_info sb_cnt_info; +#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) + int num_l2_queues; +#endif int rc; int i; @@ -707,6 +715,31 @@ static int qed_slowpath_setup_int(struct qed_dev *cdev, cdev->int_params.fp_msix_cnt = cdev->int_params.out.num_vectors - cdev->num_hwfns; +#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) + num_l2_queues = 0; + for_each_hwfn(cdev, i) + num_l2_queues += FEAT_NUM(&cdev->hwfns[i], QED_PF_L2_QUE); + + DP_VERBOSE(cdev, QED_MSG_RDMA, + "cdev->int_params.fp_msix_cnt=%d num_l2_queues=%d\n", + cdev->int_params.fp_msix_cnt, num_l2_queues); + + if (cdev->int_params.fp_msix_cnt > num_l2_queues) { + cdev->int_params.rdma_msix_cnt = + (cdev->int_params.fp_msix_cnt - num_l2_queues) + / cdev->num_hwfns; + cdev->int_params.rdma_msix_base = + cdev->int_params.fp_msix_base + num_l2_queues; + cdev->int_params.fp_msix_cnt = num_l2_queues; + } else { + cdev->int_params.rdma_msix_cnt = 0; + } + + DP_VERBOSE(cdev, QED_MSG_RDMA, "roce_msix_cnt=%d roce_msix_base=%d\n", + cdev->int_params.rdma_msix_cnt, + cdev->int_params.rdma_msix_base); +#endif + return 0; } @@ -810,6 +843,13 @@ static void qed_update_pf_params(struct qed_dev *cdev, { int i; +#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) + params->rdma_pf_params.num_qps = QED_ROCE_QPS; + params->rdma_pf_params.min_dpis = QED_ROCE_DPIS; + /* divide by 3 the MRs to avoid MF ILT overflow */ + params->rdma_pf_params.num_mrs = RDMA_MAX_TIDS; + params->rdma_pf_params.gl_pi = QED_ROCE_PROTOCOL_INDEX; +#endif for (i = 0; i < cdev->num_hwfns; i++) { struct qed_hwfn *p_hwfn = &cdev->hwfns[i]; diff --git a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h index e75738d21783..b414a0542177 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h +++ b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h @@ -1448,5 +1448,11 @@ 0x620000UL #define PHY_PCIE_REG_PHY1 \ 0x624000UL - +#define NIG_REG_ROCE_DUPLICATE_TO_HOST 0x5088f0UL +#define PRS_REG_LIGHT_L2_ETHERTYPE_EN 0x1f0968UL +#define NIG_REG_LLH_ENG_CLS_ENG_ID_TBL 0x501b90UL +#define DORQ_REG_PF_DPM_ENABLE 0x100510UL +#define DORQ_REG_PF_ICID_BIT_SHIFT_NORM 0x100448UL +#define DORQ_REG_PF_MIN_ADDR_REG1 0x100400UL +#define DORQ_REG_PF_DPI_BIT_SHIFT 0x100450UL #endif diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c new file mode 100644 index 000000000000..4c53b857cc1c --- /dev/null +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c @@ -0,0 +1,886 @@ +/* QLogic qed NIC Driver + * Copyright (c) 2015-2016 QLogic Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "qed.h" +#include "qed_cxt.h" +#include "qed_hsi.h" +#include "qed_hw.h" +#include "qed_init_ops.h" +#include "qed_int.h" +#include "qed_ll2.h" +#include "qed_mcp.h" +#include "qed_reg_addr.h" +#include "qed_sp.h" +#include "qed_roce.h" + +void qed_async_roce_event(struct qed_hwfn *p_hwfn, + struct event_ring_entry *p_eqe) +{ + struct qed_rdma_info *p_rdma_info = p_hwfn->p_rdma_info; + + p_rdma_info->events.affiliated_event(p_rdma_info->events.context, + p_eqe->opcode, &p_eqe->data); +} + +static int qed_rdma_bmap_alloc(struct qed_hwfn *p_hwfn, + struct qed_bmap *bmap, u32 max_count) +{ + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "max_count = %08x\n", max_count); + + bmap->max_count = max_count; + + bmap->bitmap = kzalloc(BITS_TO_LONGS(max_count) * sizeof(long), + GFP_KERNEL); + if (!bmap->bitmap) { + DP_NOTICE(p_hwfn, + "qed bmap alloc failed: cannot allocate memory (bitmap)\n"); + return -ENOMEM; + } + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Allocated bitmap %p\n", + bmap->bitmap); + return 0; +} + +static int qed_rdma_bmap_alloc_id(struct qed_hwfn *p_hwfn, + struct qed_bmap *bmap, u32 *id_num) +{ + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "bmap = %p\n", bmap); + + *id_num = find_first_zero_bit(bmap->bitmap, bmap->max_count); + + if (*id_num >= bmap->max_count) { + DP_NOTICE(p_hwfn, "no id available max_count=%d\n", + bmap->max_count); + return -EINVAL; + } + + __set_bit(*id_num, bmap->bitmap); + + return 0; +} + +static void qed_bmap_release_id(struct qed_hwfn *p_hwfn, + struct qed_bmap *bmap, u32 id_num) +{ + bool b_acquired; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "id_num = %08x", id_num); + if (id_num >= bmap->max_count) + return; + + b_acquired = test_and_clear_bit(id_num, bmap->bitmap); + if (!b_acquired) { + DP_NOTICE(p_hwfn, "ID %d already released\n", id_num); + return; + } +} + +u32 qed_rdma_get_sb_id(void *p_hwfn, u32 rel_sb_id) +{ + /* First sb id for RoCE is after all the l2 sb */ + return FEAT_NUM((struct qed_hwfn *)p_hwfn, QED_PF_L2_QUE) + rel_sb_id; +} + +u32 qed_rdma_query_cau_timer_res(void *rdma_cxt) +{ + return QED_CAU_DEF_RX_TIMER_RES; +} + +static int qed_rdma_alloc(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct qed_rdma_start_in_params *params) +{ + struct qed_rdma_info *p_rdma_info; + u32 num_cons, num_tasks; + int rc = -ENOMEM; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Allocating RDMA\n"); + + /* Allocate a struct with current pf rdma info */ + p_rdma_info = kzalloc(sizeof(*p_rdma_info), GFP_KERNEL); + if (!p_rdma_info) { + DP_NOTICE(p_hwfn, + "qed rdma alloc failed: cannot allocate memory (rdma info). rc = %d\n", + rc); + return rc; + } + + p_hwfn->p_rdma_info = p_rdma_info; + p_rdma_info->proto = PROTOCOLID_ROCE; + + num_cons = qed_cxt_get_proto_cid_count(p_hwfn, p_rdma_info->proto, 0); + + p_rdma_info->num_qps = num_cons / 2; + + num_tasks = qed_cxt_get_proto_tid_count(p_hwfn, PROTOCOLID_ROCE); + + /* Each MR uses a single task */ + p_rdma_info->num_mrs = num_tasks; + + /* Queue zone lines are shared between RoCE and L2 in such a way that + * they can be used by each without obstructing the other. + */ + p_rdma_info->queue_zone_base = (u16)FEAT_NUM(p_hwfn, QED_L2_QUEUE); + + /* Allocate a struct with device params and fill it */ + p_rdma_info->dev = kzalloc(sizeof(*p_rdma_info->dev), GFP_KERNEL); + if (!p_rdma_info->dev) { + DP_NOTICE(p_hwfn, + "qed rdma alloc failed: cannot allocate memory (rdma info dev). rc = %d\n", + rc); + goto free_rdma_info; + } + + /* Allocate a struct with port params and fill it */ + p_rdma_info->port = kzalloc(sizeof(*p_rdma_info->port), GFP_KERNEL); + if (!p_rdma_info->port) { + DP_NOTICE(p_hwfn, + "qed rdma alloc failed: cannot allocate memory (rdma info port). rc = %d\n", + rc); + goto free_rdma_dev; + } + + /* Allocate bit map for pd's */ + rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->pd_map, RDMA_MAX_PDS); + if (rc) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "Failed to allocate pd_map, rc = %d\n", + rc); + goto free_rdma_port; + } + + /* Allocate DPI bitmap */ + rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->dpi_map, + p_hwfn->dpi_count); + if (rc) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "Failed to allocate DPI bitmap, rc = %d\n", rc); + goto free_pd_map; + } + + /* Allocate bitmap for cq's. The maximum number of CQs is bounded to + * twice the number of QPs. + */ + rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->cq_map, + p_rdma_info->num_qps * 2); + if (rc) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "Failed to allocate cq bitmap, rc = %d\n", rc); + goto free_dpi_map; + } + + /* Allocate bitmap for toggle bit for cq icids + * We toggle the bit every time we create or resize cq for a given icid. + * The maximum number of CQs is bounded to twice the number of QPs. + */ + rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->toggle_bits, + p_rdma_info->num_qps * 2); + if (rc) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "Failed to allocate toogle bits, rc = %d\n", rc); + goto free_cq_map; + } + + /* Allocate bitmap for itids */ + rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->tid_map, + p_rdma_info->num_mrs); + if (rc) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "Failed to allocate itids bitmaps, rc = %d\n", rc); + goto free_toggle_map; + } + + /* Allocate bitmap for cids used for qps. */ + rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->cid_map, num_cons); + if (rc) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "Failed to allocate cid bitmap, rc = %d\n", rc); + goto free_tid_map; + } + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Allocation successful\n"); + return 0; + +free_tid_map: + kfree(p_rdma_info->tid_map.bitmap); +free_toggle_map: + kfree(p_rdma_info->toggle_bits.bitmap); +free_cq_map: + kfree(p_rdma_info->cq_map.bitmap); +free_dpi_map: + kfree(p_rdma_info->dpi_map.bitmap); +free_pd_map: + kfree(p_rdma_info->pd_map.bitmap); +free_rdma_port: + kfree(p_rdma_info->port); +free_rdma_dev: + kfree(p_rdma_info->dev); +free_rdma_info: + kfree(p_rdma_info); + + return rc; +} + +void qed_rdma_resc_free(struct qed_hwfn *p_hwfn) +{ + struct qed_rdma_info *p_rdma_info = p_hwfn->p_rdma_info; + + kfree(p_rdma_info->cid_map.bitmap); + kfree(p_rdma_info->tid_map.bitmap); + kfree(p_rdma_info->toggle_bits.bitmap); + kfree(p_rdma_info->cq_map.bitmap); + kfree(p_rdma_info->dpi_map.bitmap); + kfree(p_rdma_info->pd_map.bitmap); + + kfree(p_rdma_info->port); + kfree(p_rdma_info->dev); + + kfree(p_rdma_info); +} + +static void qed_rdma_free(struct qed_hwfn *p_hwfn) +{ + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Freeing RDMA\n"); + + qed_rdma_resc_free(p_hwfn); +} + +static void qed_rdma_get_guid(struct qed_hwfn *p_hwfn, u8 *guid) +{ + guid[0] = p_hwfn->hw_info.hw_mac_addr[0] ^ 2; + guid[1] = p_hwfn->hw_info.hw_mac_addr[1]; + guid[2] = p_hwfn->hw_info.hw_mac_addr[2]; + guid[3] = 0xff; + guid[4] = 0xfe; + guid[5] = p_hwfn->hw_info.hw_mac_addr[3]; + guid[6] = p_hwfn->hw_info.hw_mac_addr[4]; + guid[7] = p_hwfn->hw_info.hw_mac_addr[5]; +} + +static void qed_rdma_init_events(struct qed_hwfn *p_hwfn, + struct qed_rdma_start_in_params *params) +{ + struct qed_rdma_events *events; + + events = &p_hwfn->p_rdma_info->events; + + events->unaffiliated_event = params->events->unaffiliated_event; + events->affiliated_event = params->events->affiliated_event; + events->context = params->events->context; +} + +static void qed_rdma_init_devinfo(struct qed_hwfn *p_hwfn, + struct qed_rdma_start_in_params *params) +{ + struct qed_rdma_device *dev = p_hwfn->p_rdma_info->dev; + struct qed_dev *cdev = p_hwfn->cdev; + u32 pci_status_control; + u32 num_qps; + + /* Vendor specific information */ + dev->vendor_id = cdev->vendor_id; + dev->vendor_part_id = cdev->device_id; + dev->hw_ver = 0; + dev->fw_ver = (FW_MAJOR_VERSION << 24) | (FW_MINOR_VERSION << 16) | + (FW_REVISION_VERSION << 8) | (FW_ENGINEERING_VERSION); + + qed_rdma_get_guid(p_hwfn, (u8 *)&dev->sys_image_guid); + dev->node_guid = dev->sys_image_guid; + + dev->max_sge = min_t(u32, RDMA_MAX_SGE_PER_SQ_WQE, + RDMA_MAX_SGE_PER_RQ_WQE); + + if (cdev->rdma_max_sge) + dev->max_sge = min_t(u32, cdev->rdma_max_sge, dev->max_sge); + + dev->max_inline = ROCE_REQ_MAX_INLINE_DATA_SIZE; + + dev->max_inline = (cdev->rdma_max_inline) ? + min_t(u32, cdev->rdma_max_inline, dev->max_inline) : + dev->max_inline; + + dev->max_wqe = QED_RDMA_MAX_WQE; + dev->max_cnq = (u8)FEAT_NUM(p_hwfn, QED_RDMA_CNQ); + + /* The number of QPs may be higher than QED_ROCE_MAX_QPS, because + * it is up-aligned to 16 and then to ILT page size within qed cxt. + * This is OK in terms of ILT but we don't want to configure the FW + * above its abilities + */ + num_qps = ROCE_MAX_QPS; + num_qps = min_t(u64, num_qps, p_hwfn->p_rdma_info->num_qps); + dev->max_qp = num_qps; + + /* CQs uses the same icids that QPs use hence they are limited by the + * number of icids. There are two icids per QP. + */ + dev->max_cq = num_qps * 2; + + /* The number of mrs is smaller by 1 since the first is reserved */ + dev->max_mr = p_hwfn->p_rdma_info->num_mrs - 1; + dev->max_mr_size = QED_RDMA_MAX_MR_SIZE; + + /* The maximum CQE capacity per CQ supported. + * max number of cqes will be in two layer pbl, + * 8 is the pointer size in bytes + * 32 is the size of cq element in bytes + */ + if (params->cq_mode == QED_RDMA_CQ_MODE_32_BITS) + dev->max_cqe = QED_RDMA_MAX_CQE_32_BIT; + else + dev->max_cqe = QED_RDMA_MAX_CQE_16_BIT; + + dev->max_mw = 0; + dev->max_fmr = QED_RDMA_MAX_FMR; + dev->max_mr_mw_fmr_pbl = (PAGE_SIZE / 8) * (PAGE_SIZE / 8); + dev->max_mr_mw_fmr_size = dev->max_mr_mw_fmr_pbl * PAGE_SIZE; + dev->max_pkey = QED_RDMA_MAX_P_KEY; + + dev->max_qp_resp_rd_atomic_resc = RDMA_RING_PAGE_SIZE / + (RDMA_RESP_RD_ATOMIC_ELM_SIZE * 2); + dev->max_qp_req_rd_atomic_resc = RDMA_RING_PAGE_SIZE / + RDMA_REQ_RD_ATOMIC_ELM_SIZE; + dev->max_dev_resp_rd_atomic_resc = dev->max_qp_resp_rd_atomic_resc * + p_hwfn->p_rdma_info->num_qps; + dev->page_size_caps = QED_RDMA_PAGE_SIZE_CAPS; + dev->dev_ack_delay = QED_RDMA_ACK_DELAY; + dev->max_pd = RDMA_MAX_PDS; + dev->max_ah = p_hwfn->p_rdma_info->num_qps; + dev->max_stats_queues = (u8)RESC_NUM(p_hwfn, QED_RDMA_STATS_QUEUE); + + /* Set capablities */ + dev->dev_caps = 0; + SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_RNR_NAK, 1); + SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_PORT_ACTIVE_EVENT, 1); + SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_PORT_CHANGE_EVENT, 1); + SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_RESIZE_CQ, 1); + SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_BASE_MEMORY_EXT, 1); + SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_BASE_QUEUE_EXT, 1); + SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_ZBVA, 1); + SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_LOCAL_INV_FENCE, 1); + + /* Check atomic operations support in PCI configuration space. */ + pci_read_config_dword(cdev->pdev, + cdev->pdev->pcie_cap + PCI_EXP_DEVCTL2, + &pci_status_control); + + if (pci_status_control & PCI_EXP_DEVCTL2_LTR_EN) + SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_ATOMIC_OP, 1); +} + +static void qed_rdma_init_port(struct qed_hwfn *p_hwfn) +{ + struct qed_rdma_port *port = p_hwfn->p_rdma_info->port; + struct qed_rdma_device *dev = p_hwfn->p_rdma_info->dev; + + port->port_state = p_hwfn->mcp_info->link_output.link_up ? + QED_RDMA_PORT_UP : QED_RDMA_PORT_DOWN; + + port->max_msg_size = min_t(u64, + (dev->max_mr_mw_fmr_size * + p_hwfn->cdev->rdma_max_sge), + BIT(31)); + + port->pkey_bad_counter = 0; +} + +static int qed_rdma_init_hw(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) +{ + u32 ll2_ethertype_en; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Initializing HW\n"); + p_hwfn->b_rdma_enabled_in_prs = false; + + qed_wr(p_hwfn, p_ptt, PRS_REG_ROCE_DEST_QP_MAX_PF, 0); + + p_hwfn->rdma_prs_search_reg = PRS_REG_SEARCH_ROCE; + + /* We delay writing to this reg until first cid is allocated. See + * qed_cxt_dynamic_ilt_alloc function for more details + */ + ll2_ethertype_en = qed_rd(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN); + qed_wr(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN, + (ll2_ethertype_en | 0x01)); + + if (qed_cxt_get_proto_cid_start(p_hwfn, PROTOCOLID_ROCE) % 2) { + DP_NOTICE(p_hwfn, "The first RoCE's cid should be even\n"); + return -EINVAL; + } + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Initializing HW - Done\n"); + return 0; +} + +static int qed_rdma_start_fw(struct qed_hwfn *p_hwfn, + struct qed_rdma_start_in_params *params, + struct qed_ptt *p_ptt) +{ + struct rdma_init_func_ramrod_data *p_ramrod; + struct qed_rdma_cnq_params *p_cnq_pbl_list; + struct rdma_init_func_hdr *p_params_header; + struct rdma_cnq_params *p_cnq_params; + struct qed_sp_init_data init_data; + struct qed_spq_entry *p_ent; + u32 cnq_id, sb_id; + int rc; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Starting FW\n"); + + /* Save the number of cnqs for the function close ramrod */ + p_hwfn->p_rdma_info->num_cnqs = params->desired_cnq; + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + rc = qed_sp_init_request(p_hwfn, &p_ent, RDMA_RAMROD_FUNC_INIT, + p_hwfn->p_rdma_info->proto, &init_data); + if (rc) + return rc; + + p_ramrod = &p_ent->ramrod.roce_init_func.rdma; + + p_params_header = &p_ramrod->params_header; + p_params_header->cnq_start_offset = (u8)RESC_START(p_hwfn, + QED_RDMA_CNQ_RAM); + p_params_header->num_cnqs = params->desired_cnq; + + if (params->cq_mode == QED_RDMA_CQ_MODE_16_BITS) + p_params_header->cq_ring_mode = 1; + else + p_params_header->cq_ring_mode = 0; + + for (cnq_id = 0; cnq_id < params->desired_cnq; cnq_id++) { + sb_id = qed_rdma_get_sb_id(p_hwfn, cnq_id); + p_cnq_params = &p_ramrod->cnq_params[cnq_id]; + p_cnq_pbl_list = ¶ms->cnq_pbl_list[cnq_id]; + p_cnq_params->sb_num = + cpu_to_le16(p_hwfn->sbs_info[sb_id]->igu_sb_id); + + p_cnq_params->sb_index = p_hwfn->pf_params.rdma_pf_params.gl_pi; + p_cnq_params->num_pbl_pages = p_cnq_pbl_list->num_pbl_pages; + + DMA_REGPAIR_LE(p_cnq_params->pbl_base_addr, + p_cnq_pbl_list->pbl_ptr); + + /* we assume here that cnq_id and qz_offset are the same */ + p_cnq_params->queue_zone_num = + cpu_to_le16(p_hwfn->p_rdma_info->queue_zone_base + + cnq_id); + } + + return qed_spq_post(p_hwfn, p_ent, NULL); +} + +static int qed_rdma_reserve_lkey(struct qed_hwfn *p_hwfn) +{ + struct qed_rdma_device *dev = p_hwfn->p_rdma_info->dev; + + /* The first DPI is reserved for the Kernel */ + __set_bit(0, p_hwfn->p_rdma_info->dpi_map.bitmap); + + /* Tid 0 will be used as the key for "reserved MR". + * The driver should allocate memory for it so it can be loaded but no + * ramrod should be passed on it. + */ + qed_rdma_alloc_tid(p_hwfn, &dev->reserved_lkey); + if (dev->reserved_lkey != RDMA_RESERVED_LKEY) { + DP_NOTICE(p_hwfn, + "Reserved lkey should be equal to RDMA_RESERVED_LKEY\n"); + return -EINVAL; + } + + return 0; +} + +static int qed_rdma_setup(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct qed_rdma_start_in_params *params) +{ + int rc; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "RDMA setup\n"); + + spin_lock_init(&p_hwfn->p_rdma_info->lock); + + qed_rdma_init_devinfo(p_hwfn, params); + qed_rdma_init_port(p_hwfn); + qed_rdma_init_events(p_hwfn, params); + + rc = qed_rdma_reserve_lkey(p_hwfn); + if (rc) + return rc; + + rc = qed_rdma_init_hw(p_hwfn, p_ptt); + if (rc) + return rc; + + return qed_rdma_start_fw(p_hwfn, params, p_ptt); +} + +int qed_rdma_stop(void *rdma_cxt) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + struct rdma_close_func_ramrod_data *p_ramrod; + struct qed_sp_init_data init_data; + struct qed_spq_entry *p_ent; + struct qed_ptt *p_ptt; + u32 ll2_ethertype_en; + int rc = -EBUSY; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "RDMA stop\n"); + + p_ptt = qed_ptt_acquire(p_hwfn); + if (!p_ptt) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Failed to acquire PTT\n"); + return rc; + } + + /* Disable RoCE search */ + qed_wr(p_hwfn, p_ptt, p_hwfn->rdma_prs_search_reg, 0); + p_hwfn->b_rdma_enabled_in_prs = false; + + qed_wr(p_hwfn, p_ptt, PRS_REG_ROCE_DEST_QP_MAX_PF, 0); + + ll2_ethertype_en = qed_rd(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN); + + qed_wr(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN, + (ll2_ethertype_en & 0xFFFE)); + + qed_ptt_release(p_hwfn, p_ptt); + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + /* Stop RoCE */ + rc = qed_sp_init_request(p_hwfn, &p_ent, RDMA_RAMROD_FUNC_CLOSE, + p_hwfn->p_rdma_info->proto, &init_data); + if (rc) + goto out; + + p_ramrod = &p_ent->ramrod.rdma_close_func; + + p_ramrod->num_cnqs = p_hwfn->p_rdma_info->num_cnqs; + p_ramrod->cnq_start_offset = (u8)RESC_START(p_hwfn, QED_RDMA_CNQ_RAM); + + rc = qed_spq_post(p_hwfn, p_ent, NULL); + +out: + qed_rdma_free(p_hwfn); + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "RDMA stop done, rc = %d\n", rc); + return rc; +} + +int qed_rdma_add_user(void *rdma_cxt, + struct qed_rdma_add_user_out_params *out_params) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + u32 dpi_start_offset; + u32 returned_id = 0; + int rc; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Adding User\n"); + + /* Allocate DPI */ + spin_lock_bh(&p_hwfn->p_rdma_info->lock); + rc = qed_rdma_bmap_alloc_id(p_hwfn, &p_hwfn->p_rdma_info->dpi_map, + &returned_id); + spin_unlock_bh(&p_hwfn->p_rdma_info->lock); + + out_params->dpi = (u16)returned_id; + + /* Calculate the corresponding DPI address */ + dpi_start_offset = p_hwfn->dpi_start_offset; + + out_params->dpi_addr = (u64)((u8 __iomem *)p_hwfn->doorbells + + dpi_start_offset + + ((out_params->dpi) * p_hwfn->dpi_size)); + + out_params->dpi_phys_addr = p_hwfn->cdev->db_phys_addr + + dpi_start_offset + + ((out_params->dpi) * p_hwfn->dpi_size); + + out_params->dpi_size = p_hwfn->dpi_size; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Adding user - done, rc = %d\n", rc); + return rc; +} + +struct qed_rdma_device *qed_rdma_query_device(void *rdma_cxt) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Query device\n"); + + /* Return struct with device parameters */ + return p_hwfn->p_rdma_info->dev; +} + +int qed_rdma_alloc_tid(void *rdma_cxt, u32 *itid) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + int rc; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Allocate TID\n"); + + spin_lock_bh(&p_hwfn->p_rdma_info->lock); + rc = qed_rdma_bmap_alloc_id(p_hwfn, + &p_hwfn->p_rdma_info->tid_map, itid); + spin_unlock_bh(&p_hwfn->p_rdma_info->lock); + if (rc) + goto out; + + rc = qed_cxt_dynamic_ilt_alloc(p_hwfn, QED_ELEM_TASK, *itid); +out: + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Allocate TID - done, rc = %d\n", rc); + return rc; +} + +void qed_rdma_cnq_prod_update(void *rdma_cxt, u8 qz_offset, u16 prod) +{ + struct qed_hwfn *p_hwfn; + u16 qz_num; + u32 addr; + + p_hwfn = (struct qed_hwfn *)rdma_cxt; + qz_num = p_hwfn->p_rdma_info->queue_zone_base + qz_offset; + addr = GTT_BAR0_MAP_REG_USDM_RAM + + USTORM_COMMON_QUEUE_CONS_OFFSET(qz_num); + + REG_WR16(p_hwfn, addr, prod); + + /* keep prod updates ordered */ + wmb(); +} + +static int qed_fill_rdma_dev_info(struct qed_dev *cdev, + struct qed_dev_rdma_info *info) +{ + memset(info, 0, sizeof(*info)); + + info->rdma_type = QED_RDMA_TYPE_ROCE; + + qed_fill_dev_info(cdev, &info->common); + + return 0; +} + +static int qed_rdma_get_sb_start(struct qed_dev *cdev) +{ + int feat_num; + + if (cdev->num_hwfns > 1) + feat_num = FEAT_NUM(QED_LEADING_HWFN(cdev), QED_PF_L2_QUE); + else + feat_num = FEAT_NUM(QED_LEADING_HWFN(cdev), QED_PF_L2_QUE) * + cdev->num_hwfns; + + return feat_num; +} + +static int qed_rdma_get_min_cnq_msix(struct qed_dev *cdev) +{ + int n_cnq = FEAT_NUM(QED_LEADING_HWFN(cdev), QED_RDMA_CNQ); + int n_msix = cdev->int_params.rdma_msix_cnt; + + return min_t(int, n_cnq, n_msix); +} + +static int qed_rdma_set_int(struct qed_dev *cdev, u16 cnt) +{ + int limit = 0; + + /* Mark the fastpath as free/used */ + cdev->int_params.fp_initialized = cnt ? true : false; + + if (cdev->int_params.out.int_mode != QED_INT_MODE_MSIX) { + DP_ERR(cdev, + "qed roce supports only MSI-X interrupts (detected %d).\n", + cdev->int_params.out.int_mode); + return -EINVAL; + } else if (cdev->int_params.fp_msix_cnt) { + limit = cdev->int_params.rdma_msix_cnt; + } + + if (!limit) + return -ENOMEM; + + return min_t(int, cnt, limit); +} + +static int qed_rdma_get_int(struct qed_dev *cdev, struct qed_int_info *info) +{ + memset(info, 0, sizeof(*info)); + + if (!cdev->int_params.fp_initialized) { + DP_INFO(cdev, + "Protocol driver requested interrupt information, but its support is not yet configured\n"); + return -EINVAL; + } + + if (cdev->int_params.out.int_mode == QED_INT_MODE_MSIX) { + int msix_base = cdev->int_params.rdma_msix_base; + + info->msix_cnt = cdev->int_params.rdma_msix_cnt; + info->msix = &cdev->int_params.msix_table[msix_base]; + + DP_VERBOSE(cdev, QED_MSG_RDMA, "msix_cnt = %d msix_base=%d\n", + info->msix_cnt, msix_base); + } + + return 0; +} + +static void *qed_rdma_get_rdma_ctx(struct qed_dev *cdev) +{ + return QED_LEADING_HWFN(cdev); +} + +static void qed_rdma_dpm_conf(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) +{ + u32 val; + + val = (p_hwfn->dcbx_no_edpm || p_hwfn->db_bar_no_edpm) ? 0 : 1; + + qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_DPM_ENABLE, val); + DP_VERBOSE(p_hwfn, (QED_MSG_DCB | QED_MSG_RDMA), + "Changing DPM_EN state to %d (DCBX=%d, DB_BAR=%d)\n", + val, p_hwfn->dcbx_no_edpm, p_hwfn->db_bar_no_edpm); +} + +void qed_rdma_dpm_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) +{ + p_hwfn->db_bar_no_edpm = true; + + qed_rdma_dpm_conf(p_hwfn, p_ptt); +} + +int qed_rdma_start(void *rdma_cxt, struct qed_rdma_start_in_params *params) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + struct qed_ptt *p_ptt; + int rc = -EBUSY; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "desired_cnq = %08x\n", params->desired_cnq); + + p_ptt = qed_ptt_acquire(p_hwfn); + if (!p_ptt) + goto err; + + rc = qed_rdma_alloc(p_hwfn, p_ptt, params); + if (rc) + goto err1; + + rc = qed_rdma_setup(p_hwfn, p_ptt, params); + if (rc) + goto err2; + + qed_ptt_release(p_hwfn, p_ptt); + + return rc; + +err2: + qed_rdma_free(p_hwfn); +err1: + qed_ptt_release(p_hwfn, p_ptt); +err: + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "RDMA start - error, rc = %d\n", rc); + return rc; +} + +static int qed_rdma_init(struct qed_dev *cdev, + struct qed_rdma_start_in_params *params) +{ + return qed_rdma_start(QED_LEADING_HWFN(cdev), params); +} + +void qed_rdma_remove_user(void *rdma_cxt, u16 dpi) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "dpi = %08x\n", dpi); + + spin_lock_bh(&p_hwfn->p_rdma_info->lock); + qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->dpi_map, dpi); + spin_unlock_bh(&p_hwfn->p_rdma_info->lock); +} + +static const struct qed_rdma_ops qed_rdma_ops_pass = { + .common = &qed_common_ops_pass, + .fill_dev_info = &qed_fill_rdma_dev_info, + .rdma_get_rdma_ctx = &qed_rdma_get_rdma_ctx, + .rdma_init = &qed_rdma_init, + .rdma_add_user = &qed_rdma_add_user, + .rdma_remove_user = &qed_rdma_remove_user, + .rdma_stop = &qed_rdma_stop, + .rdma_query_device = &qed_rdma_query_device, + .rdma_get_start_sb = &qed_rdma_get_sb_start, + .rdma_get_rdma_int = &qed_rdma_get_int, + .rdma_set_rdma_int = &qed_rdma_set_int, + .rdma_get_min_cnq_msix = &qed_rdma_get_min_cnq_msix, + .rdma_cnq_prod_update = &qed_rdma_cnq_prod_update, +}; + +const struct qed_rdma_ops *qed_get_rdma_ops() +{ + return &qed_rdma_ops_pass; +} +EXPORT_SYMBOL(qed_get_rdma_ops); diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.h b/drivers/net/ethernet/qlogic/qed/qed_roce.h new file mode 100644 index 000000000000..e55048106a83 --- /dev/null +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.h @@ -0,0 +1,123 @@ +/* QLogic qed NIC Driver + * Copyright (c) 2015-2016 QLogic Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _QED_ROCE_H +#define _QED_ROCE_H +#include +#include +#include +#include +#include +#include +#include +#include +#include "qed.h" +#include "qed_dev_api.h" +#include "qed_hsi.h" + +#define QED_RDMA_MAX_FMR (RDMA_MAX_TIDS) +#define QED_RDMA_MAX_P_KEY (1) +#define QED_RDMA_MAX_WQE (0x7FFF) +#define QED_RDMA_MAX_SRQ_WQE_ELEM (0x7FFF) +#define QED_RDMA_PAGE_SIZE_CAPS (0xFFFFF000) +#define QED_RDMA_ACK_DELAY (15) +#define QED_RDMA_MAX_MR_SIZE (0x10000000000ULL) +#define QED_RDMA_MAX_CQS (RDMA_MAX_CQS) +#define QED_RDMA_MAX_MRS (RDMA_MAX_TIDS) +/* Add 1 for header element */ +#define QED_RDMA_MAX_SRQ_ELEM_PER_WQE (RDMA_MAX_SGE_PER_RQ_WQE + 1) +#define QED_RDMA_MAX_SGE_PER_SRQ_WQE (RDMA_MAX_SGE_PER_RQ_WQE) +#define QED_RDMA_SRQ_WQE_ELEM_SIZE (16) +#define QED_RDMA_MAX_SRQS (32 * 1024) + +#define QED_RDMA_MAX_CQE_32_BIT (0x7FFFFFFF - 1) +#define QED_RDMA_MAX_CQE_16_BIT (0x7FFF - 1) + +enum qed_rdma_toggle_bit { + QED_RDMA_TOGGLE_BIT_CLEAR = 0, + QED_RDMA_TOGGLE_BIT_SET = 1 +}; + +struct qed_bmap { + unsigned long *bitmap; + u32 max_count; +}; + +struct qed_rdma_info { + /* spin lock to protect bitmaps */ + spinlock_t lock; + + struct qed_bmap cq_map; + struct qed_bmap pd_map; + struct qed_bmap tid_map; + struct qed_bmap qp_map; + struct qed_bmap srq_map; + struct qed_bmap cid_map; + struct qed_bmap dpi_map; + struct qed_bmap toggle_bits; + struct qed_rdma_events events; + struct qed_rdma_device *dev; + struct qed_rdma_port *port; + u32 last_tid; + u8 num_cnqs; + u32 num_qps; + u32 num_mrs; + u16 queue_zone_base; + enum protocol_type proto; +}; + +int +qed_rdma_add_user(void *rdma_cxt, + struct qed_rdma_add_user_out_params *out_params); +int qed_rdma_alloc_pd(void *rdma_cxt, u16 *pd); +int qed_rdma_alloc_tid(void *rdma_cxt, u32 *tid); +int qed_rdma_deregister_tid(void *rdma_cxt, u32 tid); +void qed_rdma_free_tid(void *rdma_cxt, u32 tid); +struct qed_rdma_device *qed_rdma_query_device(void *rdma_cxt); +int +qed_rdma_register_tid(void *rdma_cxt, + struct qed_rdma_register_tid_in_params *params); +void qed_rdma_remove_user(void *rdma_cxt, u16 dpi); +int qed_rdma_start(void *p_hwfn, struct qed_rdma_start_in_params *params); +int qed_rdma_stop(void *rdma_cxt); +u32 qed_rdma_get_sb_id(void *p_hwfn, u32 rel_sb_id); +u32 qed_rdma_query_cau_timer_res(void *p_hwfn); +void qed_rdma_cnq_prod_update(void *rdma_cxt, u8 cnq_index, u16 prod); +void qed_rdma_resc_free(struct qed_hwfn *p_hwfn); +void qed_async_roce_event(struct qed_hwfn *p_hwfn, + struct event_ring_entry *p_eqe); + +#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) +void qed_rdma_dpm_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); +#else +void qed_rdma_dpm_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) {} +#endif +#endif diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp.h b/drivers/net/ethernet/qlogic/qed/qed_sp.h index a3c539f1c2ac..652c90819758 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_sp.h @@ -85,6 +85,7 @@ union ramrod_data { struct rdma_srq_create_ramrod_data rdma_create_srq; struct rdma_srq_destroy_ramrod_data rdma_destroy_srq; struct rdma_srq_modify_ramrod_data rdma_modify_srq; + struct roce_init_func_ramrod_data roce_init_func; struct iscsi_slow_path_hdr iscsi_empty; struct iscsi_init_ramrod_params iscsi_init; diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c index 349af182d085..caff41544898 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_spq.c +++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c @@ -28,6 +28,9 @@ #include "qed_reg_addr.h" #include "qed_sp.h" #include "qed_sriov.h" +#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) +#include "qed_roce.h" +#endif /*************************************************************************** * Structures & Definitions @@ -237,6 +240,11 @@ qed_async_event_completion(struct qed_hwfn *p_hwfn, struct event_ring_entry *p_eqe) { switch (p_eqe->protocol_id) { +#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) + case PROTOCOLID_ROCE: + qed_async_roce_event(p_hwfn, p_eqe); + return 0; +#endif case PROTOCOLID_COMMON: return qed_sriov_eqe_event(p_hwfn, p_eqe->opcode, diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c index a4a3cead15bb..d2d6621fe0e5 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c @@ -1851,8 +1851,8 @@ static void qed_iov_vf_mbx_start_txq_resp(struct qed_hwfn *p_hwfn, if ((status == PFVF_STATUS_SUCCESS) && !b_legacy) { u16 qid = mbx->req_virt->start_txq.tx_qid; - p_tlv->offset = qed_db_addr(p_vf->vf_queues[qid].fw_cid, - DQ_DEMS_LEGACY); + p_tlv->offset = qed_db_addr_vf(p_vf->vf_queues[qid].fw_cid, + DQ_DEMS_LEGACY); } qed_iov_send_response(p_hwfn, p_ptt, p_vf, length, status); diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c index 85334ceaf69c..abf5bf11f865 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_vf.c +++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c @@ -544,7 +544,7 @@ int qed_vf_pf_txq_start(struct qed_hwfn *p_hwfn, u8 cid = p_iov->acquire_resp.resc.cid[tx_queue_id]; u32 db_addr; - db_addr = qed_db_addr(cid, DQ_DEMS_LEGACY); + db_addr = qed_db_addr_vf(cid, DQ_DEMS_LEGACY); *pp_doorbell = (u8 __iomem *)p_hwfn->doorbells + db_addr; } diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h index 19027635df0d..734deb094618 100644 --- a/include/linux/qed/common_hsi.h +++ b/include/linux/qed/common_hsi.h @@ -674,6 +674,7 @@ union event_ring_data { struct iscsi_eqe_data iscsi_info; struct malicious_vf_eqe_data malicious_vf; struct initial_cleanup_eqe_data vf_init_cleanup; + struct regpair roce_handle; }; /* Event Ring Entry */ diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index e313742b571d..f9ae903bbb84 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -34,6 +34,8 @@ enum dcbx_protocol_type { DCBX_MAX_PROTOCOL_TYPE }; +#define QED_ROCE_PROTOCOL_INDEX (3) + #ifdef CONFIG_DCB #define QED_LLDP_CHASSIS_ID_STAT_LEN 4 #define QED_LLDP_PORT_ID_STAT_LEN 4 @@ -268,6 +270,7 @@ struct qed_dev_info { enum qed_sb_type { QED_SB_TYPE_L2_QUEUE, + QED_SB_TYPE_CNQ, }; enum qed_protocol { @@ -628,7 +631,7 @@ enum DP_MODULE { QED_MSG_CXT = 0x800000, QED_MSG_LL2 = 0x1000000, QED_MSG_ILT = 0x2000000, - QED_MSG_ROCE = 0x4000000, + QED_MSG_RDMA = 0x4000000, QED_MSG_DEBUG = 0x8000000, /* to be added...up to 0x8000000 */ }; diff --git a/include/linux/qed/qed_roce_if.h b/include/linux/qed/qed_roce_if.h new file mode 100644 index 000000000000..0f7d5275e515 --- /dev/null +++ b/include/linux/qed/qed_roce_if.h @@ -0,0 +1,345 @@ +/* QLogic qed NIC Driver + * Copyright (c) 2015-2016 QLogic Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _QED_ROCE_IF_H +#define _QED_ROCE_IF_H +#include +#include +#include +#include +#include +#include +#include +#include + +#define QED_RDMA_MAX_CNQ_SIZE (0xFFFF) + +/* rdma interface */ +enum qed_rdma_tid_type { + QED_RDMA_TID_REGISTERED_MR, + QED_RDMA_TID_FMR, + QED_RDMA_TID_MW_TYPE1, + QED_RDMA_TID_MW_TYPE2A +}; + +struct qed_rdma_events { + void *context; + void (*affiliated_event)(void *context, u8 fw_event_code, + void *fw_handle); + void (*unaffiliated_event)(void *context, u8 event_code); +}; + +struct qed_rdma_device { + u32 vendor_id; + u32 vendor_part_id; + u32 hw_ver; + u64 fw_ver; + + u64 node_guid; + u64 sys_image_guid; + + u8 max_cnq; + u8 max_sge; + u8 max_srq_sge; + u16 max_inline; + u32 max_wqe; + u32 max_srq_wqe; + u8 max_qp_resp_rd_atomic_resc; + u8 max_qp_req_rd_atomic_resc; + u64 max_dev_resp_rd_atomic_resc; + u32 max_cq; + u32 max_qp; + u32 max_srq; + u32 max_mr; + u64 max_mr_size; + u32 max_cqe; + u32 max_mw; + u32 max_fmr; + u32 max_mr_mw_fmr_pbl; + u64 max_mr_mw_fmr_size; + u32 max_pd; + u32 max_ah; + u8 max_pkey; + u16 max_srq_wr; + u8 max_stats_queues; + u32 dev_caps; + + /* Abilty to support RNR-NAK generation */ + +#define QED_RDMA_DEV_CAP_RNR_NAK_MASK 0x1 +#define QED_RDMA_DEV_CAP_RNR_NAK_SHIFT 0 + /* Abilty to support shutdown port */ +#define QED_RDMA_DEV_CAP_SHUTDOWN_PORT_MASK 0x1 +#define QED_RDMA_DEV_CAP_SHUTDOWN_PORT_SHIFT 1 + /* Abilty to support port active event */ +#define QED_RDMA_DEV_CAP_PORT_ACTIVE_EVENT_MASK 0x1 +#define QED_RDMA_DEV_CAP_PORT_ACTIVE_EVENT_SHIFT 2 + /* Abilty to support port change event */ +#define QED_RDMA_DEV_CAP_PORT_CHANGE_EVENT_MASK 0x1 +#define QED_RDMA_DEV_CAP_PORT_CHANGE_EVENT_SHIFT 3 + /* Abilty to support system image GUID */ +#define QED_RDMA_DEV_CAP_SYS_IMAGE_MASK 0x1 +#define QED_RDMA_DEV_CAP_SYS_IMAGE_SHIFT 4 + /* Abilty to support bad P_Key counter support */ +#define QED_RDMA_DEV_CAP_BAD_PKEY_CNT_MASK 0x1 +#define QED_RDMA_DEV_CAP_BAD_PKEY_CNT_SHIFT 5 + /* Abilty to support atomic operations */ +#define QED_RDMA_DEV_CAP_ATOMIC_OP_MASK 0x1 +#define QED_RDMA_DEV_CAP_ATOMIC_OP_SHIFT 6 +#define QED_RDMA_DEV_CAP_RESIZE_CQ_MASK 0x1 +#define QED_RDMA_DEV_CAP_RESIZE_CQ_SHIFT 7 + /* Abilty to support modifying the maximum number of + * outstanding work requests per QP + */ +#define QED_RDMA_DEV_CAP_RESIZE_MAX_WR_MASK 0x1 +#define QED_RDMA_DEV_CAP_RESIZE_MAX_WR_SHIFT 8 + /* Abilty to support automatic path migration */ +#define QED_RDMA_DEV_CAP_AUTO_PATH_MIG_MASK 0x1 +#define QED_RDMA_DEV_CAP_AUTO_PATH_MIG_SHIFT 9 + /* Abilty to support the base memory management extensions */ +#define QED_RDMA_DEV_CAP_BASE_MEMORY_EXT_MASK 0x1 +#define QED_RDMA_DEV_CAP_BASE_MEMORY_EXT_SHIFT 10 +#define QED_RDMA_DEV_CAP_BASE_QUEUE_EXT_MASK 0x1 +#define QED_RDMA_DEV_CAP_BASE_QUEUE_EXT_SHIFT 11 + /* Abilty to support multipile page sizes per memory region */ +#define QED_RDMA_DEV_CAP_MULTI_PAGE_PER_MR_EXT_MASK 0x1 +#define QED_RDMA_DEV_CAP_MULTI_PAGE_PER_MR_EXT_SHIFT 12 + /* Abilty to support block list physical buffer list */ +#define QED_RDMA_DEV_CAP_BLOCK_MODE_MASK 0x1 +#define QED_RDMA_DEV_CAP_BLOCK_MODE_SHIFT 13 + /* Abilty to support zero based virtual addresses */ +#define QED_RDMA_DEV_CAP_ZBVA_MASK 0x1 +#define QED_RDMA_DEV_CAP_ZBVA_SHIFT 14 + /* Abilty to support local invalidate fencing */ +#define QED_RDMA_DEV_CAP_LOCAL_INV_FENCE_MASK 0x1 +#define QED_RDMA_DEV_CAP_LOCAL_INV_FENCE_SHIFT 15 + /* Abilty to support Loopback on QP */ +#define QED_RDMA_DEV_CAP_LB_INDICATOR_MASK 0x1 +#define QED_RDMA_DEV_CAP_LB_INDICATOR_SHIFT 16 + u64 page_size_caps; + u8 dev_ack_delay; + u32 reserved_lkey; + u32 bad_pkey_counter; + struct qed_rdma_events events; +}; + +enum qed_port_state { + QED_RDMA_PORT_UP, + QED_RDMA_PORT_DOWN, +}; + +enum qed_roce_capability { + QED_ROCE_V1 = 1 << 0, + QED_ROCE_V2 = 1 << 1, +}; + +struct qed_rdma_port { + enum qed_port_state port_state; + int link_speed; + u64 max_msg_size; + u8 source_gid_table_len; + void *source_gid_table_ptr; + u8 pkey_table_len; + void *pkey_table_ptr; + u32 pkey_bad_counter; + enum qed_roce_capability capability; +}; + +struct qed_rdma_cnq_params { + u8 num_pbl_pages; + u64 pbl_ptr; +}; + +/* The CQ Mode affects the CQ doorbell transaction size. + * 64/32 bit machines should configure to 32/16 bits respectively. + */ +enum qed_rdma_cq_mode { + QED_RDMA_CQ_MODE_16_BITS, + QED_RDMA_CQ_MODE_32_BITS, +}; + +struct qed_roce_dcqcn_params { + u8 notification_point; + u8 reaction_point; + + /* fields for notification point */ + u32 cnp_send_timeout; + + /* fields for reaction point */ + u32 rl_bc_rate; + u16 rl_max_rate; + u16 rl_r_ai; + u16 rl_r_hai; + u16 dcqcn_g; + u32 dcqcn_k_us; + u32 dcqcn_timeout_us; +}; + +struct qed_rdma_start_in_params { + struct qed_rdma_events *events; + struct qed_rdma_cnq_params cnq_pbl_list[128]; + u8 desired_cnq; + enum qed_rdma_cq_mode cq_mode; + struct qed_roce_dcqcn_params dcqcn_params; + u16 max_mtu; + u8 mac_addr[ETH_ALEN]; + u8 iwarp_flags; +}; + +struct qed_rdma_add_user_out_params { + u16 dpi; + u64 dpi_addr; + u64 dpi_phys_addr; + u32 dpi_size; +}; + +enum roce_mode { + ROCE_V1, + ROCE_V2_IPV4, + ROCE_V2_IPV6, + MAX_ROCE_MODE +}; + +union qed_gid { + u8 bytes[16]; + u16 words[8]; + u32 dwords[4]; + u64 qwords[2]; + u32 ipv4_addr; +}; + +struct qed_rdma_register_tid_in_params { + u32 itid; + enum qed_rdma_tid_type tid_type; + u8 key; + u16 pd; + bool local_read; + bool local_write; + bool remote_read; + bool remote_write; + bool remote_atomic; + bool mw_bind; + u64 pbl_ptr; + bool pbl_two_level; + u8 pbl_page_size_log; + u8 page_size_log; + u32 fbo; + u64 length; + u64 vaddr; + bool zbva; + bool phy_mr; + bool dma_mr; + + bool dif_enabled; + u64 dif_error_addr; + u64 dif_runt_addr; +}; + +struct qed_rdma_create_srq_in_params { + u64 pbl_base_addr; + u64 prod_pair_addr; + u16 num_pages; + u16 pd_id; + u16 page_size; +}; + +struct qed_rdma_create_srq_out_params { + u16 srq_id; +}; + +struct qed_rdma_destroy_srq_in_params { + u16 srq_id; +}; + +struct qed_rdma_modify_srq_in_params { + u32 wqe_limit; + u16 srq_id; +}; + +struct qed_rdma_stats_out_params { + u64 sent_bytes; + u64 sent_pkts; + u64 rcv_bytes; + u64 rcv_pkts; +}; + +struct qed_rdma_counters_out_params { + u64 pd_count; + u64 max_pd; + u64 dpi_count; + u64 max_dpi; + u64 cq_count; + u64 max_cq; + u64 qp_count; + u64 max_qp; + u64 tid_count; + u64 max_tid; +}; + +#define QED_ROCE_TX_HEAD_FAILURE (1) +#define QED_ROCE_TX_FRAG_FAILURE (2) + +enum qed_rdma_type { + QED_RDMA_TYPE_ROCE, +}; + +struct qed_dev_rdma_info { + struct qed_dev_info common; + enum qed_rdma_type rdma_type; +}; + +struct qed_rdma_ops { + const struct qed_common_ops *common; + + int (*fill_dev_info)(struct qed_dev *cdev, + struct qed_dev_rdma_info *info); + void *(*rdma_get_rdma_ctx)(struct qed_dev *cdev); + + int (*rdma_init)(struct qed_dev *dev, + struct qed_rdma_start_in_params *iparams); + + int (*rdma_add_user)(void *rdma_cxt, + struct qed_rdma_add_user_out_params *oparams); + + void (*rdma_remove_user)(void *rdma_cxt, u16 dpi); + int (*rdma_stop)(void *rdma_cxt); + struct qed_rdma_device* (*rdma_query_device)(void *rdma_cxt); + int (*rdma_get_start_sb)(struct qed_dev *cdev); + int (*rdma_get_min_cnq_msix)(struct qed_dev *cdev); + void (*rdma_cnq_prod_update)(void *rdma_cxt, u8 cnq_index, u16 prod); + int (*rdma_get_rdma_int)(struct qed_dev *cdev, + struct qed_int_info *info); + int (*rdma_set_rdma_int)(struct qed_dev *cdev, u16 cnt); +}; + +const struct qed_rdma_ops *qed_get_rdma_ops(void); + +#endif diff --git a/include/linux/qed/rdma_common.h b/include/linux/qed/rdma_common.h index 187991c1f439..7663725faa94 100644 --- a/include/linux/qed/rdma_common.h +++ b/include/linux/qed/rdma_common.h @@ -28,6 +28,7 @@ #define RDMA_MAX_PDS (64 * 1024) #define RDMA_NUM_STATISTIC_COUNTERS MAX_NUM_VPORTS +#define RDMA_NUM_STATISTIC_COUNTERS_BB MAX_NUM_VPORTS_BB #define RDMA_TASK_TYPE (PROTOCOLID_ROCE) From c295f86e60f5ba67f0f4bba2bb2c22b3cbf01ec1 Mon Sep 17 00:00:00 2001 From: Ram Amrani Date: Sat, 1 Oct 2016 21:59:58 +0300 Subject: [PATCH 1698/1715] qed: PD,PKEY and CQ verb support Add support for the configurations of the protection domain and completion queues. Signed-off-by: Ram Amrani Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_roce.c | 324 +++++++++++++++++++++ drivers/net/ethernet/qlogic/qed/qed_roce.h | 21 ++ include/linux/qed/qed_roce_if.h | 30 ++ 3 files changed, 375 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c index 4c53b857cc1c..f9551643428f 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_roce.c +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c @@ -663,6 +663,22 @@ int qed_rdma_add_user(void *rdma_cxt, return rc; } +struct qed_rdma_port *qed_rdma_query_port(void *rdma_cxt) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + struct qed_rdma_port *p_port = p_hwfn->p_rdma_info->port; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "RDMA Query port\n"); + + /* Link may have changed */ + p_port->port_state = p_hwfn->mcp_info->link_output.link_up ? + QED_RDMA_PORT_UP : QED_RDMA_PORT_DOWN; + + p_port->link_speed = p_hwfn->mcp_info->link_output.speed; + + return p_port; +} + struct qed_rdma_device *qed_rdma_query_device(void *rdma_cxt) { struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; @@ -788,6 +804,309 @@ static int qed_rdma_get_int(struct qed_dev *cdev, struct qed_int_info *info) return 0; } +int qed_rdma_alloc_pd(void *rdma_cxt, u16 *pd) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + u32 returned_id; + int rc; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Alloc PD\n"); + + /* Allocates an unused protection domain */ + spin_lock_bh(&p_hwfn->p_rdma_info->lock); + rc = qed_rdma_bmap_alloc_id(p_hwfn, + &p_hwfn->p_rdma_info->pd_map, &returned_id); + spin_unlock_bh(&p_hwfn->p_rdma_info->lock); + + *pd = (u16)returned_id; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Alloc PD - done, rc = %d\n", rc); + return rc; +} + +void qed_rdma_free_pd(void *rdma_cxt, u16 pd) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "pd = %08x\n", pd); + + /* Returns a previously allocated protection domain for reuse */ + spin_lock_bh(&p_hwfn->p_rdma_info->lock); + qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->pd_map, pd); + spin_unlock_bh(&p_hwfn->p_rdma_info->lock); +} + +static enum qed_rdma_toggle_bit +qed_rdma_toggle_bit_create_resize_cq(struct qed_hwfn *p_hwfn, u16 icid) +{ + struct qed_rdma_info *p_info = p_hwfn->p_rdma_info; + enum qed_rdma_toggle_bit toggle_bit; + u32 bmap_id; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", icid); + + /* the function toggle the bit that is related to a given icid + * and returns the new toggle bit's value + */ + bmap_id = icid - qed_cxt_get_proto_cid_start(p_hwfn, p_info->proto); + + spin_lock_bh(&p_info->lock); + toggle_bit = !test_and_change_bit(bmap_id, + p_info->toggle_bits.bitmap); + spin_unlock_bh(&p_info->lock); + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "QED_RDMA_TOGGLE_BIT_= %d\n", + toggle_bit); + + return toggle_bit; +} + +int qed_rdma_create_cq(void *rdma_cxt, + struct qed_rdma_create_cq_in_params *params, u16 *icid) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + struct qed_rdma_info *p_info = p_hwfn->p_rdma_info; + struct rdma_create_cq_ramrod_data *p_ramrod; + enum qed_rdma_toggle_bit toggle_bit; + struct qed_sp_init_data init_data; + struct qed_spq_entry *p_ent; + u32 returned_id, start_cid; + int rc; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "cq_handle = %08x%08x\n", + params->cq_handle_hi, params->cq_handle_lo); + + /* Allocate icid */ + spin_lock_bh(&p_info->lock); + rc = qed_rdma_bmap_alloc_id(p_hwfn, + &p_info->cq_map, &returned_id); + spin_unlock_bh(&p_info->lock); + + if (rc) { + DP_NOTICE(p_hwfn, "Can't create CQ, rc = %d\n", rc); + return rc; + } + + start_cid = qed_cxt_get_proto_cid_start(p_hwfn, + p_info->proto); + *icid = returned_id + start_cid; + + /* Check if icid requires a page allocation */ + rc = qed_cxt_dynamic_ilt_alloc(p_hwfn, QED_ELEM_CXT, *icid); + if (rc) + goto err; + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = *icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + /* Send create CQ ramrod */ + rc = qed_sp_init_request(p_hwfn, &p_ent, + RDMA_RAMROD_CREATE_CQ, + p_info->proto, &init_data); + if (rc) + goto err; + + p_ramrod = &p_ent->ramrod.rdma_create_cq; + + p_ramrod->cq_handle.hi = cpu_to_le32(params->cq_handle_hi); + p_ramrod->cq_handle.lo = cpu_to_le32(params->cq_handle_lo); + p_ramrod->dpi = cpu_to_le16(params->dpi); + p_ramrod->is_two_level_pbl = params->pbl_two_level; + p_ramrod->max_cqes = cpu_to_le32(params->cq_size); + DMA_REGPAIR_LE(p_ramrod->pbl_addr, params->pbl_ptr); + p_ramrod->pbl_num_pages = cpu_to_le16(params->pbl_num_pages); + p_ramrod->cnq_id = (u8)RESC_START(p_hwfn, QED_RDMA_CNQ_RAM) + + params->cnq_id; + p_ramrod->int_timeout = params->int_timeout; + + /* toggle the bit for every resize or create cq for a given icid */ + toggle_bit = qed_rdma_toggle_bit_create_resize_cq(p_hwfn, *icid); + + p_ramrod->toggle_bit = toggle_bit; + + rc = qed_spq_post(p_hwfn, p_ent, NULL); + if (rc) { + /* restore toggle bit */ + qed_rdma_toggle_bit_create_resize_cq(p_hwfn, *icid); + goto err; + } + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Created CQ, rc = %d\n", rc); + return rc; + +err: + /* release allocated icid */ + qed_bmap_release_id(p_hwfn, &p_info->cq_map, returned_id); + DP_NOTICE(p_hwfn, "Create CQ failed, rc = %d\n", rc); + + return rc; +} + +int qed_rdma_resize_cq(void *rdma_cxt, + struct qed_rdma_resize_cq_in_params *in_params, + struct qed_rdma_resize_cq_out_params *out_params) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + struct rdma_resize_cq_output_params *p_ramrod_res; + struct rdma_resize_cq_ramrod_data *p_ramrod; + enum qed_rdma_toggle_bit toggle_bit; + struct qed_sp_init_data init_data; + struct qed_spq_entry *p_ent; + dma_addr_t ramrod_res_phys; + u8 fw_return_code; + int rc = -ENOMEM; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", in_params->icid); + + p_ramrod_res = + (struct rdma_resize_cq_output_params *) + dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, + sizeof(struct rdma_resize_cq_output_params), + &ramrod_res_phys, GFP_KERNEL); + if (!p_ramrod_res) { + DP_NOTICE(p_hwfn, + "qed resize cq failed: cannot allocate memory (ramrod)\n"); + return rc; + } + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = in_params->icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + rc = qed_sp_init_request(p_hwfn, &p_ent, + RDMA_RAMROD_RESIZE_CQ, + p_hwfn->p_rdma_info->proto, &init_data); + if (rc) + goto err; + + p_ramrod = &p_ent->ramrod.rdma_resize_cq; + + p_ramrod->flags = 0; + + /* toggle the bit for every resize or create cq for a given icid */ + toggle_bit = qed_rdma_toggle_bit_create_resize_cq(p_hwfn, + in_params->icid); + + SET_FIELD(p_ramrod->flags, + RDMA_RESIZE_CQ_RAMROD_DATA_TOGGLE_BIT, toggle_bit); + + SET_FIELD(p_ramrod->flags, + RDMA_RESIZE_CQ_RAMROD_DATA_IS_TWO_LEVEL_PBL, + in_params->pbl_two_level); + + p_ramrod->pbl_log_page_size = in_params->pbl_page_size_log - 12; + p_ramrod->pbl_num_pages = cpu_to_le16(in_params->pbl_num_pages); + p_ramrod->max_cqes = cpu_to_le32(in_params->cq_size); + DMA_REGPAIR_LE(p_ramrod->pbl_addr, in_params->pbl_ptr); + DMA_REGPAIR_LE(p_ramrod->output_params_addr, ramrod_res_phys); + + rc = qed_spq_post(p_hwfn, p_ent, &fw_return_code); + if (rc) + goto err; + + if (fw_return_code != RDMA_RETURN_OK) { + DP_NOTICE(p_hwfn, "fw_return_code = %d\n", fw_return_code); + rc = -EINVAL; + goto err; + } + + out_params->prod = le32_to_cpu(p_ramrod_res->old_cq_prod); + out_params->cons = le32_to_cpu(p_ramrod_res->old_cq_cons); + + dma_free_coherent(&p_hwfn->cdev->pdev->dev, + sizeof(struct rdma_resize_cq_output_params), + p_ramrod_res, ramrod_res_phys); + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Resized CQ, rc = %d\n", rc); + + return rc; + +err: dma_free_coherent(&p_hwfn->cdev->pdev->dev, + sizeof(struct rdma_resize_cq_output_params), + p_ramrod_res, ramrod_res_phys); + DP_NOTICE(p_hwfn, "Resized CQ, Failed - rc = %d\n", rc); + + return rc; +} + +int qed_rdma_destroy_cq(void *rdma_cxt, + struct qed_rdma_destroy_cq_in_params *in_params, + struct qed_rdma_destroy_cq_out_params *out_params) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + struct rdma_destroy_cq_output_params *p_ramrod_res; + struct rdma_destroy_cq_ramrod_data *p_ramrod; + struct qed_sp_init_data init_data; + struct qed_spq_entry *p_ent; + dma_addr_t ramrod_res_phys; + int rc = -ENOMEM; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", in_params->icid); + + p_ramrod_res = + (struct rdma_destroy_cq_output_params *) + dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, + sizeof(struct rdma_destroy_cq_output_params), + &ramrod_res_phys, GFP_KERNEL); + if (!p_ramrod_res) { + DP_NOTICE(p_hwfn, + "qed destroy cq failed: cannot allocate memory (ramrod)\n"); + return rc; + } + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = in_params->icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + /* Send destroy CQ ramrod */ + rc = qed_sp_init_request(p_hwfn, &p_ent, + RDMA_RAMROD_DESTROY_CQ, + p_hwfn->p_rdma_info->proto, &init_data); + if (rc) + goto err; + + p_ramrod = &p_ent->ramrod.rdma_destroy_cq; + DMA_REGPAIR_LE(p_ramrod->output_params_addr, ramrod_res_phys); + + rc = qed_spq_post(p_hwfn, p_ent, NULL); + if (rc) + goto err; + + out_params->num_cq_notif = le16_to_cpu(p_ramrod_res->cnq_num); + + dma_free_coherent(&p_hwfn->cdev->pdev->dev, + sizeof(struct rdma_destroy_cq_output_params), + p_ramrod_res, ramrod_res_phys); + + /* Free icid */ + spin_lock_bh(&p_hwfn->p_rdma_info->lock); + + qed_bmap_release_id(p_hwfn, + &p_hwfn->p_rdma_info->cq_map, + (in_params->icid - + qed_cxt_get_proto_cid_start(p_hwfn, + p_hwfn-> + p_rdma_info->proto))); + + spin_unlock_bh(&p_hwfn->p_rdma_info->lock); + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Destroyed CQ, rc = %d\n", rc); + return rc; + +err: dma_free_coherent(&p_hwfn->cdev->pdev->dev, + sizeof(struct rdma_destroy_cq_output_params), + p_ramrod_res, ramrod_res_phys); + + return rc; +} + static void *qed_rdma_get_rdma_ctx(struct qed_dev *cdev) { return QED_LEADING_HWFN(cdev); @@ -871,12 +1190,17 @@ static const struct qed_rdma_ops qed_rdma_ops_pass = { .rdma_add_user = &qed_rdma_add_user, .rdma_remove_user = &qed_rdma_remove_user, .rdma_stop = &qed_rdma_stop, + .rdma_query_port = &qed_rdma_query_port, .rdma_query_device = &qed_rdma_query_device, .rdma_get_start_sb = &qed_rdma_get_sb_start, .rdma_get_rdma_int = &qed_rdma_get_int, .rdma_set_rdma_int = &qed_rdma_set_int, .rdma_get_min_cnq_msix = &qed_rdma_get_min_cnq_msix, .rdma_cnq_prod_update = &qed_rdma_cnq_prod_update, + .rdma_alloc_pd = &qed_rdma_alloc_pd, + .rdma_dealloc_pd = &qed_rdma_free_pd, + .rdma_create_cq = &qed_rdma_create_cq, + .rdma_destroy_cq = &qed_rdma_destroy_cq, }; const struct qed_rdma_ops *qed_get_rdma_ops() diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.h b/drivers/net/ethernet/qlogic/qed/qed_roce.h index e55048106a83..1fe73707e0b5 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_roce.h +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.h @@ -94,6 +94,26 @@ struct qed_rdma_info { enum protocol_type proto; }; +struct qed_rdma_resize_cq_in_params { + u16 icid; + u32 cq_size; + bool pbl_two_level; + u64 pbl_ptr; + u16 pbl_num_pages; + u8 pbl_page_size_log; +}; + +struct qed_rdma_resize_cq_out_params { + u32 prod; + u32 cons; +}; + +struct qed_rdma_resize_cnq_in_params { + u32 cnq_id; + u32 pbl_page_size_log; + u64 pbl_ptr; +}; + int qed_rdma_add_user(void *rdma_cxt, struct qed_rdma_add_user_out_params *out_params); @@ -102,6 +122,7 @@ int qed_rdma_alloc_tid(void *rdma_cxt, u32 *tid); int qed_rdma_deregister_tid(void *rdma_cxt, u32 tid); void qed_rdma_free_tid(void *rdma_cxt, u32 tid); struct qed_rdma_device *qed_rdma_query_device(void *rdma_cxt); +struct qed_rdma_port *qed_rdma_query_port(void *rdma_cxt); int qed_rdma_register_tid(void *rdma_cxt, struct qed_rdma_register_tid_in_params *params); diff --git a/include/linux/qed/qed_roce_if.h b/include/linux/qed/qed_roce_if.h index 0f7d5275e515..b559b1c9e76d 100644 --- a/include/linux/qed/qed_roce_if.h +++ b/include/linux/qed/qed_roce_if.h @@ -263,6 +263,19 @@ struct qed_rdma_register_tid_in_params { u64 dif_runt_addr; }; +struct qed_rdma_create_cq_in_params { + u32 cq_handle_lo; + u32 cq_handle_hi; + u32 cq_size; + u16 dpi; + bool pbl_two_level; + u64 pbl_ptr; + u16 pbl_num_pages; + u8 pbl_page_size_log; + u8 cnq_id; + u16 int_timeout; +}; + struct qed_rdma_create_srq_in_params { u64 pbl_base_addr; u64 prod_pair_addr; @@ -271,6 +284,14 @@ struct qed_rdma_create_srq_in_params { u16 page_size; }; +struct qed_rdma_destroy_cq_in_params { + u16 icid; +}; + +struct qed_rdma_destroy_cq_out_params { + u16 num_cq_notif; +}; + struct qed_rdma_create_srq_out_params { u16 srq_id; }; @@ -332,12 +353,21 @@ struct qed_rdma_ops { void (*rdma_remove_user)(void *rdma_cxt, u16 dpi); int (*rdma_stop)(void *rdma_cxt); struct qed_rdma_device* (*rdma_query_device)(void *rdma_cxt); + struct qed_rdma_port* (*rdma_query_port)(void *rdma_cxt); int (*rdma_get_start_sb)(struct qed_dev *cdev); int (*rdma_get_min_cnq_msix)(struct qed_dev *cdev); void (*rdma_cnq_prod_update)(void *rdma_cxt, u8 cnq_index, u16 prod); int (*rdma_get_rdma_int)(struct qed_dev *cdev, struct qed_int_info *info); int (*rdma_set_rdma_int)(struct qed_dev *cdev, u16 cnt); + int (*rdma_alloc_pd)(void *rdma_cxt, u16 *pd); + void (*rdma_dealloc_pd)(void *rdma_cxt, u16 pd); + int (*rdma_create_cq)(void *rdma_cxt, + struct qed_rdma_create_cq_in_params *params, + u16 *icid); + int (*rdma_destroy_cq)(void *rdma_cxt, + struct qed_rdma_destroy_cq_in_params *iparams, + struct qed_rdma_destroy_cq_out_params *oparams); }; const struct qed_rdma_ops *qed_get_rdma_ops(void); From f109394033521862f2558df93d9afc4dfa829c6a Mon Sep 17 00:00:00 2001 From: Ram Amrani Date: Sat, 1 Oct 2016 21:59:59 +0300 Subject: [PATCH 1699/1715] qed: Add support for QP verbs Add support for the slowpath configurations of Queue Pair verbs which adds, deletes, modifies and queries Queue Pairs. Signed-off-by: Ram Amrani Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_cxt.h | 1 + drivers/net/ethernet/qlogic/qed/qed_roce.c | 1197 ++++++++++++++++++++ drivers/net/ethernet/qlogic/qed/qed_roce.h | 71 ++ include/linux/qed/qed_roce_if.h | 144 +++ 4 files changed, 1413 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.h b/drivers/net/ethernet/qlogic/qed/qed_cxt.h index d00ad055802b..2b8bdaa77800 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_cxt.h +++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.h @@ -176,6 +176,7 @@ u32 qed_cxt_get_proto_tid_count(struct qed_hwfn *p_hwfn, enum protocol_type type); u32 qed_cxt_get_proto_cid_start(struct qed_hwfn *p_hwfn, enum protocol_type type); +int qed_cxt_free_proto_ilt(struct qed_hwfn *p_hwfn, enum protocol_type proto); #define QED_CTX_WORKING_MEM 0 #define QED_CTX_FL_MEM 1 diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c index f9551643428f..438a0badc4e1 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_roce.c +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c @@ -1107,6 +1107,1199 @@ err: dma_free_coherent(&p_hwfn->cdev->pdev->dev, return rc; } +static void qed_rdma_set_fw_mac(u16 *p_fw_mac, u8 *p_qed_mac) +{ + p_fw_mac[0] = cpu_to_le16((p_qed_mac[0] << 8) + p_qed_mac[1]); + p_fw_mac[1] = cpu_to_le16((p_qed_mac[2] << 8) + p_qed_mac[3]); + p_fw_mac[2] = cpu_to_le16((p_qed_mac[4] << 8) + p_qed_mac[5]); +} + +static void qed_rdma_copy_gids(struct qed_rdma_qp *qp, __le32 *src_gid, + __le32 *dst_gid) +{ + u32 i; + + if (qp->roce_mode == ROCE_V2_IPV4) { + /* The IPv4 addresses shall be aligned to the highest word. + * The lower words must be zero. + */ + memset(src_gid, 0, sizeof(union qed_gid)); + memset(dst_gid, 0, sizeof(union qed_gid)); + src_gid[3] = cpu_to_le32(qp->sgid.ipv4_addr); + dst_gid[3] = cpu_to_le32(qp->dgid.ipv4_addr); + } else { + /* GIDs and IPv6 addresses coincide in location and size */ + for (i = 0; i < ARRAY_SIZE(qp->sgid.dwords); i++) { + src_gid[i] = cpu_to_le32(qp->sgid.dwords[i]); + dst_gid[i] = cpu_to_le32(qp->dgid.dwords[i]); + } + } +} + +static enum roce_flavor qed_roce_mode_to_flavor(enum roce_mode roce_mode) +{ + enum roce_flavor flavor; + + switch (roce_mode) { + case ROCE_V1: + flavor = PLAIN_ROCE; + break; + case ROCE_V2_IPV4: + flavor = RROCE_IPV4; + break; + case ROCE_V2_IPV6: + flavor = ROCE_V2_IPV6; + break; + default: + flavor = MAX_ROCE_MODE; + break; + } + return flavor; +} + +int qed_roce_alloc_cid(struct qed_hwfn *p_hwfn, u16 *cid) +{ + struct qed_rdma_info *p_rdma_info = p_hwfn->p_rdma_info; + u32 responder_icid; + u32 requester_icid; + int rc; + + spin_lock_bh(&p_hwfn->p_rdma_info->lock); + rc = qed_rdma_bmap_alloc_id(p_hwfn, &p_rdma_info->cid_map, + &responder_icid); + if (rc) { + spin_unlock_bh(&p_rdma_info->lock); + return rc; + } + + rc = qed_rdma_bmap_alloc_id(p_hwfn, &p_rdma_info->cid_map, + &requester_icid); + + spin_unlock_bh(&p_rdma_info->lock); + if (rc) + goto err; + + /* the two icid's should be adjacent */ + if ((requester_icid - responder_icid) != 1) { + DP_NOTICE(p_hwfn, "Failed to allocate two adjacent qp's'\n"); + rc = -EINVAL; + goto err; + } + + responder_icid += qed_cxt_get_proto_cid_start(p_hwfn, + p_rdma_info->proto); + requester_icid += qed_cxt_get_proto_cid_start(p_hwfn, + p_rdma_info->proto); + + /* If these icids require a new ILT line allocate DMA-able context for + * an ILT page + */ + rc = qed_cxt_dynamic_ilt_alloc(p_hwfn, QED_ELEM_CXT, responder_icid); + if (rc) + goto err; + + rc = qed_cxt_dynamic_ilt_alloc(p_hwfn, QED_ELEM_CXT, requester_icid); + if (rc) + goto err; + + *cid = (u16)responder_icid; + return rc; + +err: + spin_lock_bh(&p_rdma_info->lock); + qed_bmap_release_id(p_hwfn, &p_rdma_info->cid_map, responder_icid); + qed_bmap_release_id(p_hwfn, &p_rdma_info->cid_map, requester_icid); + + spin_unlock_bh(&p_rdma_info->lock); + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "Allocate CID - failed, rc = %d\n", rc); + return rc; +} + +static int qed_roce_sp_create_responder(struct qed_hwfn *p_hwfn, + struct qed_rdma_qp *qp) +{ + struct roce_create_qp_resp_ramrod_data *p_ramrod; + struct qed_sp_init_data init_data; + union qed_qm_pq_params qm_params; + enum roce_flavor roce_flavor; + struct qed_spq_entry *p_ent; + u16 physical_queue0 = 0; + int rc; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid); + + /* Allocate DMA-able memory for IRQ */ + qp->irq_num_pages = 1; + qp->irq = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, + RDMA_RING_PAGE_SIZE, + &qp->irq_phys_addr, GFP_KERNEL); + if (!qp->irq) { + rc = -ENOMEM; + DP_NOTICE(p_hwfn, + "qed create responder failed: cannot allocate memory (irq). rc = %d\n", + rc); + return rc; + } + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = qp->icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + rc = qed_sp_init_request(p_hwfn, &p_ent, ROCE_RAMROD_CREATE_QP, + PROTOCOLID_ROCE, &init_data); + if (rc) + goto err; + + p_ramrod = &p_ent->ramrod.roce_create_qp_resp; + + p_ramrod->flags = 0; + + roce_flavor = qed_roce_mode_to_flavor(qp->roce_mode); + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_RESP_RAMROD_DATA_ROCE_FLAVOR, roce_flavor); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_RESP_RAMROD_DATA_RDMA_RD_EN, + qp->incoming_rdma_read_en); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_RESP_RAMROD_DATA_RDMA_WR_EN, + qp->incoming_rdma_write_en); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_RESP_RAMROD_DATA_ATOMIC_EN, + qp->incoming_atomic_en); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_RESP_RAMROD_DATA_E2E_FLOW_CONTROL_EN, + qp->e2e_flow_control_en); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_RESP_RAMROD_DATA_SRQ_FLG, qp->use_srq); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_RESP_RAMROD_DATA_RESERVED_KEY_EN, + qp->fmr_and_reserved_lkey); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_RESP_RAMROD_DATA_MIN_RNR_NAK_TIMER, + qp->min_rnr_nak_timer); + + p_ramrod->max_ird = qp->max_rd_atomic_resp; + p_ramrod->traffic_class = qp->traffic_class_tos; + p_ramrod->hop_limit = qp->hop_limit_ttl; + p_ramrod->irq_num_pages = qp->irq_num_pages; + p_ramrod->p_key = cpu_to_le16(qp->pkey); + p_ramrod->flow_label = cpu_to_le32(qp->flow_label); + p_ramrod->dst_qp_id = cpu_to_le32(qp->dest_qp); + p_ramrod->mtu = cpu_to_le16(qp->mtu); + p_ramrod->initial_psn = cpu_to_le32(qp->rq_psn); + p_ramrod->pd = cpu_to_le16(qp->pd); + p_ramrod->rq_num_pages = cpu_to_le16(qp->rq_num_pages); + DMA_REGPAIR_LE(p_ramrod->rq_pbl_addr, qp->rq_pbl_ptr); + DMA_REGPAIR_LE(p_ramrod->irq_pbl_addr, qp->irq_phys_addr); + qed_rdma_copy_gids(qp, p_ramrod->src_gid, p_ramrod->dst_gid); + p_ramrod->qp_handle_for_async.hi = cpu_to_le32(qp->qp_handle_async.hi); + p_ramrod->qp_handle_for_async.lo = cpu_to_le32(qp->qp_handle_async.lo); + p_ramrod->qp_handle_for_cqe.hi = cpu_to_le32(qp->qp_handle.hi); + p_ramrod->qp_handle_for_cqe.lo = cpu_to_le32(qp->qp_handle.lo); + p_ramrod->stats_counter_id = p_hwfn->rel_pf_id; + p_ramrod->cq_cid = cpu_to_le32((p_hwfn->hw_info.opaque_fid << 16) | + qp->rq_cq_id); + + memset(&qm_params, 0, sizeof(qm_params)); + qm_params.roce.qpid = qp->icid >> 1; + physical_queue0 = qed_get_qm_pq(p_hwfn, PROTOCOLID_ROCE, &qm_params); + + p_ramrod->physical_queue0 = cpu_to_le16(physical_queue0); + p_ramrod->dpi = cpu_to_le16(qp->dpi); + + qed_rdma_set_fw_mac(p_ramrod->remote_mac_addr, qp->remote_mac_addr); + qed_rdma_set_fw_mac(p_ramrod->local_mac_addr, qp->local_mac_addr); + + p_ramrod->udp_src_port = qp->udp_src_port; + p_ramrod->vlan_id = cpu_to_le16(qp->vlan_id); + p_ramrod->srq_id.srq_idx = cpu_to_le16(qp->srq_id); + p_ramrod->srq_id.opaque_fid = cpu_to_le16(p_hwfn->hw_info.opaque_fid); + + p_ramrod->stats_counter_id = RESC_START(p_hwfn, QED_RDMA_STATS_QUEUE) + + qp->stats_queue; + + rc = qed_spq_post(p_hwfn, p_ent, NULL); + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "rc = %d physical_queue0 = 0x%x\n", + rc, physical_queue0); + + if (rc) + goto err; + + qp->resp_offloaded = true; + + return rc; + +err: + DP_NOTICE(p_hwfn, "create responder - failed, rc = %d\n", rc); + dma_free_coherent(&p_hwfn->cdev->pdev->dev, + qp->irq_num_pages * RDMA_RING_PAGE_SIZE, + qp->irq, qp->irq_phys_addr); + + return rc; +} + +static int qed_roce_sp_create_requester(struct qed_hwfn *p_hwfn, + struct qed_rdma_qp *qp) +{ + struct roce_create_qp_req_ramrod_data *p_ramrod; + struct qed_sp_init_data init_data; + union qed_qm_pq_params qm_params; + enum roce_flavor roce_flavor; + struct qed_spq_entry *p_ent; + u16 physical_queue0 = 0; + int rc; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid); + + /* Allocate DMA-able memory for ORQ */ + qp->orq_num_pages = 1; + qp->orq = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, + RDMA_RING_PAGE_SIZE, + &qp->orq_phys_addr, GFP_KERNEL); + if (!qp->orq) { + rc = -ENOMEM; + DP_NOTICE(p_hwfn, + "qed create requester failed: cannot allocate memory (orq). rc = %d\n", + rc); + return rc; + } + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = qp->icid + 1; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + rc = qed_sp_init_request(p_hwfn, &p_ent, + ROCE_RAMROD_CREATE_QP, + PROTOCOLID_ROCE, &init_data); + if (rc) + goto err; + + p_ramrod = &p_ent->ramrod.roce_create_qp_req; + + p_ramrod->flags = 0; + + roce_flavor = qed_roce_mode_to_flavor(qp->roce_mode); + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_REQ_RAMROD_DATA_ROCE_FLAVOR, roce_flavor); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_REQ_RAMROD_DATA_FMR_AND_RESERVED_EN, + qp->fmr_and_reserved_lkey); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_REQ_RAMROD_DATA_SIGNALED_COMP, qp->signal_all); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_REQ_RAMROD_DATA_ERR_RETRY_CNT, qp->retry_cnt); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_REQ_RAMROD_DATA_RNR_NAK_CNT, + qp->rnr_retry_cnt); + + p_ramrod->max_ord = qp->max_rd_atomic_req; + p_ramrod->traffic_class = qp->traffic_class_tos; + p_ramrod->hop_limit = qp->hop_limit_ttl; + p_ramrod->orq_num_pages = qp->orq_num_pages; + p_ramrod->p_key = cpu_to_le16(qp->pkey); + p_ramrod->flow_label = cpu_to_le32(qp->flow_label); + p_ramrod->dst_qp_id = cpu_to_le32(qp->dest_qp); + p_ramrod->ack_timeout_val = cpu_to_le32(qp->ack_timeout); + p_ramrod->mtu = cpu_to_le16(qp->mtu); + p_ramrod->initial_psn = cpu_to_le32(qp->sq_psn); + p_ramrod->pd = cpu_to_le16(qp->pd); + p_ramrod->sq_num_pages = cpu_to_le16(qp->sq_num_pages); + DMA_REGPAIR_LE(p_ramrod->sq_pbl_addr, qp->sq_pbl_ptr); + DMA_REGPAIR_LE(p_ramrod->orq_pbl_addr, qp->orq_phys_addr); + qed_rdma_copy_gids(qp, p_ramrod->src_gid, p_ramrod->dst_gid); + p_ramrod->qp_handle_for_async.hi = cpu_to_le32(qp->qp_handle_async.hi); + p_ramrod->qp_handle_for_async.lo = cpu_to_le32(qp->qp_handle_async.lo); + p_ramrod->qp_handle_for_cqe.hi = cpu_to_le32(qp->qp_handle.hi); + p_ramrod->qp_handle_for_cqe.lo = cpu_to_le32(qp->qp_handle.lo); + p_ramrod->stats_counter_id = p_hwfn->rel_pf_id; + p_ramrod->cq_cid = cpu_to_le32((p_hwfn->hw_info.opaque_fid << 16) | + qp->sq_cq_id); + + memset(&qm_params, 0, sizeof(qm_params)); + qm_params.roce.qpid = qp->icid >> 1; + physical_queue0 = qed_get_qm_pq(p_hwfn, PROTOCOLID_ROCE, &qm_params); + + p_ramrod->physical_queue0 = cpu_to_le16(physical_queue0); + p_ramrod->dpi = cpu_to_le16(qp->dpi); + + qed_rdma_set_fw_mac(p_ramrod->remote_mac_addr, qp->remote_mac_addr); + qed_rdma_set_fw_mac(p_ramrod->local_mac_addr, qp->local_mac_addr); + + p_ramrod->udp_src_port = qp->udp_src_port; + p_ramrod->vlan_id = cpu_to_le16(qp->vlan_id); + p_ramrod->stats_counter_id = RESC_START(p_hwfn, QED_RDMA_STATS_QUEUE) + + qp->stats_queue; + + rc = qed_spq_post(p_hwfn, p_ent, NULL); + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "rc = %d\n", rc); + + if (rc) + goto err; + + qp->req_offloaded = true; + + return rc; + +err: + DP_NOTICE(p_hwfn, "Create requested - failed, rc = %d\n", rc); + dma_free_coherent(&p_hwfn->cdev->pdev->dev, + qp->orq_num_pages * RDMA_RING_PAGE_SIZE, + qp->orq, qp->orq_phys_addr); + return rc; +} + +static int qed_roce_sp_modify_responder(struct qed_hwfn *p_hwfn, + struct qed_rdma_qp *qp, + bool move_to_err, u32 modify_flags) +{ + struct roce_modify_qp_resp_ramrod_data *p_ramrod; + struct qed_sp_init_data init_data; + struct qed_spq_entry *p_ent; + int rc; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid); + + if (move_to_err && !qp->resp_offloaded) + return 0; + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = qp->icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + rc = qed_sp_init_request(p_hwfn, &p_ent, + ROCE_EVENT_MODIFY_QP, + PROTOCOLID_ROCE, &init_data); + if (rc) { + DP_NOTICE(p_hwfn, "rc = %d\n", rc); + return rc; + } + + p_ramrod = &p_ent->ramrod.roce_modify_qp_resp; + + p_ramrod->flags = 0; + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_MOVE_TO_ERR_FLG, move_to_err); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_RDMA_RD_EN, + qp->incoming_rdma_read_en); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_RDMA_WR_EN, + qp->incoming_rdma_write_en); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_ATOMIC_EN, + qp->incoming_atomic_en); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_RESP_RAMROD_DATA_E2E_FLOW_CONTROL_EN, + qp->e2e_flow_control_en); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_RDMA_OPS_EN_FLG, + GET_FIELD(modify_flags, + QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN)); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_P_KEY_FLG, + GET_FIELD(modify_flags, QED_ROCE_MODIFY_QP_VALID_PKEY)); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_ADDRESS_VECTOR_FLG, + GET_FIELD(modify_flags, + QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR)); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_MAX_IRD_FLG, + GET_FIELD(modify_flags, + QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP)); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_MIN_RNR_NAK_TIMER_FLG, + GET_FIELD(modify_flags, + QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER)); + + p_ramrod->fields = 0; + SET_FIELD(p_ramrod->fields, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_MIN_RNR_NAK_TIMER, + qp->min_rnr_nak_timer); + + p_ramrod->max_ird = qp->max_rd_atomic_resp; + p_ramrod->traffic_class = qp->traffic_class_tos; + p_ramrod->hop_limit = qp->hop_limit_ttl; + p_ramrod->p_key = cpu_to_le16(qp->pkey); + p_ramrod->flow_label = cpu_to_le32(qp->flow_label); + p_ramrod->mtu = cpu_to_le16(qp->mtu); + qed_rdma_copy_gids(qp, p_ramrod->src_gid, p_ramrod->dst_gid); + rc = qed_spq_post(p_hwfn, p_ent, NULL); + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Modify responder, rc = %d\n", rc); + return rc; +} + +static int qed_roce_sp_modify_requester(struct qed_hwfn *p_hwfn, + struct qed_rdma_qp *qp, + bool move_to_sqd, + bool move_to_err, u32 modify_flags) +{ + struct roce_modify_qp_req_ramrod_data *p_ramrod; + struct qed_sp_init_data init_data; + struct qed_spq_entry *p_ent; + int rc; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid); + + if (move_to_err && !(qp->req_offloaded)) + return 0; + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = qp->icid + 1; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + rc = qed_sp_init_request(p_hwfn, &p_ent, + ROCE_EVENT_MODIFY_QP, + PROTOCOLID_ROCE, &init_data); + if (rc) { + DP_NOTICE(p_hwfn, "rc = %d\n", rc); + return rc; + } + + p_ramrod = &p_ent->ramrod.roce_modify_qp_req; + + p_ramrod->flags = 0; + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_MOVE_TO_ERR_FLG, move_to_err); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_MOVE_TO_SQD_FLG, move_to_sqd); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_EN_SQD_ASYNC_NOTIFY, + qp->sqd_async); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_P_KEY_FLG, + GET_FIELD(modify_flags, QED_ROCE_MODIFY_QP_VALID_PKEY)); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_ADDRESS_VECTOR_FLG, + GET_FIELD(modify_flags, + QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR)); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_MAX_ORD_FLG, + GET_FIELD(modify_flags, + QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ)); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_RNR_NAK_CNT_FLG, + GET_FIELD(modify_flags, + QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT)); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_ERR_RETRY_CNT_FLG, + GET_FIELD(modify_flags, QED_ROCE_MODIFY_QP_VALID_RETRY_CNT)); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_ACK_TIMEOUT_FLG, + GET_FIELD(modify_flags, + QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT)); + + p_ramrod->fields = 0; + SET_FIELD(p_ramrod->fields, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_ERR_RETRY_CNT, qp->retry_cnt); + + SET_FIELD(p_ramrod->fields, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_RNR_NAK_CNT, + qp->rnr_retry_cnt); + + p_ramrod->max_ord = qp->max_rd_atomic_req; + p_ramrod->traffic_class = qp->traffic_class_tos; + p_ramrod->hop_limit = qp->hop_limit_ttl; + p_ramrod->p_key = cpu_to_le16(qp->pkey); + p_ramrod->flow_label = cpu_to_le32(qp->flow_label); + p_ramrod->ack_timeout_val = cpu_to_le32(qp->ack_timeout); + p_ramrod->mtu = cpu_to_le16(qp->mtu); + qed_rdma_copy_gids(qp, p_ramrod->src_gid, p_ramrod->dst_gid); + rc = qed_spq_post(p_hwfn, p_ent, NULL); + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Modify requester, rc = %d\n", rc); + return rc; +} + +static int qed_roce_sp_destroy_qp_responder(struct qed_hwfn *p_hwfn, + struct qed_rdma_qp *qp, + u32 *num_invalidated_mw) +{ + struct roce_destroy_qp_resp_output_params *p_ramrod_res; + struct roce_destroy_qp_resp_ramrod_data *p_ramrod; + struct qed_sp_init_data init_data; + struct qed_spq_entry *p_ent; + dma_addr_t ramrod_res_phys; + int rc; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid); + + if (!qp->resp_offloaded) + return 0; + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = qp->icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + rc = qed_sp_init_request(p_hwfn, &p_ent, + ROCE_RAMROD_DESTROY_QP, + PROTOCOLID_ROCE, &init_data); + if (rc) + return rc; + + p_ramrod = &p_ent->ramrod.roce_destroy_qp_resp; + + p_ramrod_res = (struct roce_destroy_qp_resp_output_params *) + dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, sizeof(*p_ramrod_res), + &ramrod_res_phys, GFP_KERNEL); + + if (!p_ramrod_res) { + rc = -ENOMEM; + DP_NOTICE(p_hwfn, + "qed destroy responder failed: cannot allocate memory (ramrod). rc = %d\n", + rc); + return rc; + } + + DMA_REGPAIR_LE(p_ramrod->output_params_addr, ramrod_res_phys); + + rc = qed_spq_post(p_hwfn, p_ent, NULL); + if (rc) + goto err; + + *num_invalidated_mw = le32_to_cpu(p_ramrod_res->num_invalidated_mw); + + /* Free IRQ - only if ramrod succeeded, in case FW is still using it */ + dma_free_coherent(&p_hwfn->cdev->pdev->dev, + qp->irq_num_pages * RDMA_RING_PAGE_SIZE, + qp->irq, qp->irq_phys_addr); + + qp->resp_offloaded = false; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Destroy responder, rc = %d\n", rc); + +err: + dma_free_coherent(&p_hwfn->cdev->pdev->dev, + sizeof(struct roce_destroy_qp_resp_output_params), + p_ramrod_res, ramrod_res_phys); + + return rc; +} + +static int qed_roce_sp_destroy_qp_requester(struct qed_hwfn *p_hwfn, + struct qed_rdma_qp *qp, + u32 *num_bound_mw) +{ + struct roce_destroy_qp_req_output_params *p_ramrod_res; + struct roce_destroy_qp_req_ramrod_data *p_ramrod; + struct qed_sp_init_data init_data; + struct qed_spq_entry *p_ent; + dma_addr_t ramrod_res_phys; + int rc = -ENOMEM; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid); + + if (!qp->req_offloaded) + return 0; + + p_ramrod_res = (struct roce_destroy_qp_req_output_params *) + dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, + sizeof(*p_ramrod_res), + &ramrod_res_phys, GFP_KERNEL); + if (!p_ramrod_res) { + DP_NOTICE(p_hwfn, + "qed destroy requester failed: cannot allocate memory (ramrod)\n"); + return rc; + } + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = qp->icid + 1; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + rc = qed_sp_init_request(p_hwfn, &p_ent, ROCE_RAMROD_DESTROY_QP, + PROTOCOLID_ROCE, &init_data); + if (rc) + goto err; + + p_ramrod = &p_ent->ramrod.roce_destroy_qp_req; + DMA_REGPAIR_LE(p_ramrod->output_params_addr, ramrod_res_phys); + + rc = qed_spq_post(p_hwfn, p_ent, NULL); + if (rc) + goto err; + + *num_bound_mw = le32_to_cpu(p_ramrod_res->num_bound_mw); + + /* Free ORQ - only if ramrod succeeded, in case FW is still using it */ + dma_free_coherent(&p_hwfn->cdev->pdev->dev, + qp->orq_num_pages * RDMA_RING_PAGE_SIZE, + qp->orq, qp->orq_phys_addr); + + qp->req_offloaded = false; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Destroy requester, rc = %d\n", rc); + +err: + dma_free_coherent(&p_hwfn->cdev->pdev->dev, sizeof(*p_ramrod_res), + p_ramrod_res, ramrod_res_phys); + + return rc; +} + +int qed_roce_query_qp(struct qed_hwfn *p_hwfn, + struct qed_rdma_qp *qp, + struct qed_rdma_query_qp_out_params *out_params) +{ + struct roce_query_qp_resp_output_params *p_resp_ramrod_res; + struct roce_query_qp_req_output_params *p_req_ramrod_res; + struct roce_query_qp_resp_ramrod_data *p_resp_ramrod; + struct roce_query_qp_req_ramrod_data *p_req_ramrod; + struct qed_sp_init_data init_data; + dma_addr_t resp_ramrod_res_phys; + dma_addr_t req_ramrod_res_phys; + struct qed_spq_entry *p_ent; + bool rq_err_state; + bool sq_err_state; + bool sq_draining; + int rc = -ENOMEM; + + if ((!(qp->resp_offloaded)) && (!(qp->req_offloaded))) { + /* We can't send ramrod to the fw since this qp wasn't offloaded + * to the fw yet + */ + out_params->draining = false; + out_params->rq_psn = qp->rq_psn; + out_params->sq_psn = qp->sq_psn; + out_params->state = qp->cur_state; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "No QPs as no offload\n"); + return 0; + } + + if (!(qp->resp_offloaded)) { + DP_NOTICE(p_hwfn, + "The responder's qp should be offloded before requester's\n"); + return -EINVAL; + } + + /* Send a query responder ramrod to FW to get RQ-PSN and state */ + p_resp_ramrod_res = (struct roce_query_qp_resp_output_params *) + dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, + sizeof(*p_resp_ramrod_res), + &resp_ramrod_res_phys, GFP_KERNEL); + if (!p_resp_ramrod_res) { + DP_NOTICE(p_hwfn, + "qed query qp failed: cannot allocate memory (ramrod)\n"); + return rc; + } + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = qp->icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + rc = qed_sp_init_request(p_hwfn, &p_ent, ROCE_RAMROD_QUERY_QP, + PROTOCOLID_ROCE, &init_data); + if (rc) + goto err_resp; + + p_resp_ramrod = &p_ent->ramrod.roce_query_qp_resp; + DMA_REGPAIR_LE(p_resp_ramrod->output_params_addr, resp_ramrod_res_phys); + + rc = qed_spq_post(p_hwfn, p_ent, NULL); + if (rc) + goto err_resp; + + dma_free_coherent(&p_hwfn->cdev->pdev->dev, sizeof(*p_resp_ramrod_res), + p_resp_ramrod_res, resp_ramrod_res_phys); + + out_params->rq_psn = le32_to_cpu(p_resp_ramrod_res->psn); + rq_err_state = GET_FIELD(le32_to_cpu(p_resp_ramrod_res->err_flag), + ROCE_QUERY_QP_RESP_OUTPUT_PARAMS_ERROR_FLG); + + if (!(qp->req_offloaded)) { + /* Don't send query qp for the requester */ + out_params->sq_psn = qp->sq_psn; + out_params->draining = false; + + if (rq_err_state) + qp->cur_state = QED_ROCE_QP_STATE_ERR; + + out_params->state = qp->cur_state; + + return 0; + } + + /* Send a query requester ramrod to FW to get SQ-PSN and state */ + p_req_ramrod_res = (struct roce_query_qp_req_output_params *) + dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, + sizeof(*p_req_ramrod_res), + &req_ramrod_res_phys, + GFP_KERNEL); + if (!p_req_ramrod_res) { + rc = -ENOMEM; + DP_NOTICE(p_hwfn, + "qed query qp failed: cannot allocate memory (ramrod)\n"); + return rc; + } + + /* Get SPQ entry */ + init_data.cid = qp->icid + 1; + rc = qed_sp_init_request(p_hwfn, &p_ent, ROCE_RAMROD_QUERY_QP, + PROTOCOLID_ROCE, &init_data); + if (rc) + goto err_req; + + p_req_ramrod = &p_ent->ramrod.roce_query_qp_req; + DMA_REGPAIR_LE(p_req_ramrod->output_params_addr, req_ramrod_res_phys); + + rc = qed_spq_post(p_hwfn, p_ent, NULL); + if (rc) + goto err_req; + + dma_free_coherent(&p_hwfn->cdev->pdev->dev, sizeof(*p_req_ramrod_res), + p_req_ramrod_res, req_ramrod_res_phys); + + out_params->sq_psn = le32_to_cpu(p_req_ramrod_res->psn); + sq_err_state = GET_FIELD(le32_to_cpu(p_req_ramrod_res->flags), + ROCE_QUERY_QP_REQ_OUTPUT_PARAMS_ERR_FLG); + sq_draining = + GET_FIELD(le32_to_cpu(p_req_ramrod_res->flags), + ROCE_QUERY_QP_REQ_OUTPUT_PARAMS_SQ_DRAINING_FLG); + + out_params->draining = false; + + if (rq_err_state) + qp->cur_state = QED_ROCE_QP_STATE_ERR; + else if (sq_err_state) + qp->cur_state = QED_ROCE_QP_STATE_SQE; + else if (sq_draining) + out_params->draining = true; + out_params->state = qp->cur_state; + + return 0; + +err_req: + dma_free_coherent(&p_hwfn->cdev->pdev->dev, sizeof(*p_req_ramrod_res), + p_req_ramrod_res, req_ramrod_res_phys); + return rc; +err_resp: + dma_free_coherent(&p_hwfn->cdev->pdev->dev, sizeof(*p_resp_ramrod_res), + p_resp_ramrod_res, resp_ramrod_res_phys); + return rc; +} + +int qed_roce_destroy_qp(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp) +{ + u32 num_invalidated_mw = 0; + u32 num_bound_mw = 0; + u32 start_cid; + int rc; + + /* Destroys the specified QP */ + if ((qp->cur_state != QED_ROCE_QP_STATE_RESET) && + (qp->cur_state != QED_ROCE_QP_STATE_ERR) && + (qp->cur_state != QED_ROCE_QP_STATE_INIT)) { + DP_NOTICE(p_hwfn, + "QP must be in error, reset or init state before destroying it\n"); + return -EINVAL; + } + + rc = qed_roce_sp_destroy_qp_responder(p_hwfn, qp, &num_invalidated_mw); + if (rc) + return rc; + + /* Send destroy requester ramrod */ + rc = qed_roce_sp_destroy_qp_requester(p_hwfn, qp, &num_bound_mw); + if (rc) + return rc; + + if (num_invalidated_mw != num_bound_mw) { + DP_NOTICE(p_hwfn, + "number of invalidate memory windows is different from bounded ones\n"); + return -EINVAL; + } + + spin_lock_bh(&p_hwfn->p_rdma_info->lock); + + start_cid = qed_cxt_get_proto_cid_start(p_hwfn, + p_hwfn->p_rdma_info->proto); + + /* Release responder's icid */ + qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->cid_map, + qp->icid - start_cid); + + /* Release requester's icid */ + qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->cid_map, + qp->icid + 1 - start_cid); + + spin_unlock_bh(&p_hwfn->p_rdma_info->lock); + + return 0; +} + +int qed_rdma_query_qp(void *rdma_cxt, + struct qed_rdma_qp *qp, + struct qed_rdma_query_qp_out_params *out_params) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + int rc; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid); + + /* The following fields are filled in from qp and not FW as they can't + * be modified by FW + */ + out_params->mtu = qp->mtu; + out_params->dest_qp = qp->dest_qp; + out_params->incoming_atomic_en = qp->incoming_atomic_en; + out_params->e2e_flow_control_en = qp->e2e_flow_control_en; + out_params->incoming_rdma_read_en = qp->incoming_rdma_read_en; + out_params->incoming_rdma_write_en = qp->incoming_rdma_write_en; + out_params->dgid = qp->dgid; + out_params->flow_label = qp->flow_label; + out_params->hop_limit_ttl = qp->hop_limit_ttl; + out_params->traffic_class_tos = qp->traffic_class_tos; + out_params->timeout = qp->ack_timeout; + out_params->rnr_retry = qp->rnr_retry_cnt; + out_params->retry_cnt = qp->retry_cnt; + out_params->min_rnr_nak_timer = qp->min_rnr_nak_timer; + out_params->pkey_index = 0; + out_params->max_rd_atomic = qp->max_rd_atomic_req; + out_params->max_dest_rd_atomic = qp->max_rd_atomic_resp; + out_params->sqd_async = qp->sqd_async; + + rc = qed_roce_query_qp(p_hwfn, qp, out_params); + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Query QP, rc = %d\n", rc); + return rc; +} + +int qed_rdma_destroy_qp(void *rdma_cxt, struct qed_rdma_qp *qp) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + int rc = 0; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid); + + rc = qed_roce_destroy_qp(p_hwfn, qp); + + /* free qp params struct */ + kfree(qp); + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "QP destroyed\n"); + return rc; +} + +struct qed_rdma_qp * +qed_rdma_create_qp(void *rdma_cxt, + struct qed_rdma_create_qp_in_params *in_params, + struct qed_rdma_create_qp_out_params *out_params) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + struct qed_rdma_qp *qp; + u8 max_stats_queues; + int rc; + + if (!rdma_cxt || !in_params || !out_params || !p_hwfn->p_rdma_info) { + DP_ERR(p_hwfn->cdev, + "qed roce create qp failed due to NULL entry (rdma_cxt=%p, in=%p, out=%p, roce_info=?\n", + rdma_cxt, in_params, out_params); + return NULL; + } + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "qed rdma create qp called with qp_handle = %08x%08x\n", + in_params->qp_handle_hi, in_params->qp_handle_lo); + + /* Some sanity checks... */ + max_stats_queues = p_hwfn->p_rdma_info->dev->max_stats_queues; + if (in_params->stats_queue >= max_stats_queues) { + DP_ERR(p_hwfn->cdev, + "qed rdma create qp failed due to invalid statistics queue %d. maximum is %d\n", + in_params->stats_queue, max_stats_queues); + return NULL; + } + + qp = kzalloc(sizeof(*qp), GFP_KERNEL); + if (!qp) { + DP_NOTICE(p_hwfn, "Failed to allocate qed_rdma_qp\n"); + return NULL; + } + + rc = qed_roce_alloc_cid(p_hwfn, &qp->icid); + qp->qpid = ((0xFF << 16) | qp->icid); + + DP_INFO(p_hwfn, "ROCE qpid=%x\n", qp->qpid); + + if (rc) { + kfree(qp); + return NULL; + } + + qp->cur_state = QED_ROCE_QP_STATE_RESET; + qp->qp_handle.hi = cpu_to_le32(in_params->qp_handle_hi); + qp->qp_handle.lo = cpu_to_le32(in_params->qp_handle_lo); + qp->qp_handle_async.hi = cpu_to_le32(in_params->qp_handle_async_hi); + qp->qp_handle_async.lo = cpu_to_le32(in_params->qp_handle_async_lo); + qp->use_srq = in_params->use_srq; + qp->signal_all = in_params->signal_all; + qp->fmr_and_reserved_lkey = in_params->fmr_and_reserved_lkey; + qp->pd = in_params->pd; + qp->dpi = in_params->dpi; + qp->sq_cq_id = in_params->sq_cq_id; + qp->sq_num_pages = in_params->sq_num_pages; + qp->sq_pbl_ptr = in_params->sq_pbl_ptr; + qp->rq_cq_id = in_params->rq_cq_id; + qp->rq_num_pages = in_params->rq_num_pages; + qp->rq_pbl_ptr = in_params->rq_pbl_ptr; + qp->srq_id = in_params->srq_id; + qp->req_offloaded = false; + qp->resp_offloaded = false; + qp->e2e_flow_control_en = qp->use_srq ? false : true; + qp->stats_queue = in_params->stats_queue; + + out_params->icid = qp->icid; + out_params->qp_id = qp->qpid; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Create QP, rc = %d\n", rc); + return qp; +} + +static int qed_roce_modify_qp(struct qed_hwfn *p_hwfn, + struct qed_rdma_qp *qp, + enum qed_roce_qp_state prev_state, + struct qed_rdma_modify_qp_in_params *params) +{ + u32 num_invalidated_mw = 0, num_bound_mw = 0; + int rc = 0; + + /* Perform additional operations according to the current state and the + * next state + */ + if (((prev_state == QED_ROCE_QP_STATE_INIT) || + (prev_state == QED_ROCE_QP_STATE_RESET)) && + (qp->cur_state == QED_ROCE_QP_STATE_RTR)) { + /* Init->RTR or Reset->RTR */ + rc = qed_roce_sp_create_responder(p_hwfn, qp); + return rc; + } else if ((prev_state == QED_ROCE_QP_STATE_RTR) && + (qp->cur_state == QED_ROCE_QP_STATE_RTS)) { + /* RTR-> RTS */ + rc = qed_roce_sp_create_requester(p_hwfn, qp); + if (rc) + return rc; + + /* Send modify responder ramrod */ + rc = qed_roce_sp_modify_responder(p_hwfn, qp, false, + params->modify_flags); + return rc; + } else if ((prev_state == QED_ROCE_QP_STATE_RTS) && + (qp->cur_state == QED_ROCE_QP_STATE_RTS)) { + /* RTS->RTS */ + rc = qed_roce_sp_modify_responder(p_hwfn, qp, false, + params->modify_flags); + if (rc) + return rc; + + rc = qed_roce_sp_modify_requester(p_hwfn, qp, false, false, + params->modify_flags); + return rc; + } else if ((prev_state == QED_ROCE_QP_STATE_RTS) && + (qp->cur_state == QED_ROCE_QP_STATE_SQD)) { + /* RTS->SQD */ + rc = qed_roce_sp_modify_requester(p_hwfn, qp, true, false, + params->modify_flags); + return rc; + } else if ((prev_state == QED_ROCE_QP_STATE_SQD) && + (qp->cur_state == QED_ROCE_QP_STATE_SQD)) { + /* SQD->SQD */ + rc = qed_roce_sp_modify_responder(p_hwfn, qp, false, + params->modify_flags); + if (rc) + return rc; + + rc = qed_roce_sp_modify_requester(p_hwfn, qp, false, false, + params->modify_flags); + return rc; + } else if ((prev_state == QED_ROCE_QP_STATE_SQD) && + (qp->cur_state == QED_ROCE_QP_STATE_RTS)) { + /* SQD->RTS */ + rc = qed_roce_sp_modify_responder(p_hwfn, qp, false, + params->modify_flags); + if (rc) + return rc; + + rc = qed_roce_sp_modify_requester(p_hwfn, qp, false, false, + params->modify_flags); + + return rc; + } else if (qp->cur_state == QED_ROCE_QP_STATE_ERR || + qp->cur_state == QED_ROCE_QP_STATE_SQE) { + /* ->ERR */ + rc = qed_roce_sp_modify_responder(p_hwfn, qp, true, + params->modify_flags); + if (rc) + return rc; + + rc = qed_roce_sp_modify_requester(p_hwfn, qp, false, true, + params->modify_flags); + return rc; + } else if (qp->cur_state == QED_ROCE_QP_STATE_RESET) { + /* Any state -> RESET */ + + rc = qed_roce_sp_destroy_qp_responder(p_hwfn, qp, + &num_invalidated_mw); + if (rc) + return rc; + + rc = qed_roce_sp_destroy_qp_requester(p_hwfn, qp, + &num_bound_mw); + + if (num_invalidated_mw != num_bound_mw) { + DP_NOTICE(p_hwfn, + "number of invalidate memory windows is different from bounded ones\n"); + return -EINVAL; + } + } else { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "0\n"); + } + + return rc; +} + +int qed_rdma_modify_qp(void *rdma_cxt, + struct qed_rdma_qp *qp, + struct qed_rdma_modify_qp_in_params *params) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + enum qed_roce_qp_state prev_state; + int rc = 0; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x params->new_state=%d\n", + qp->icid, params->new_state); + + if (rc) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "rc = %d\n", rc); + return rc; + } + + if (GET_FIELD(params->modify_flags, + QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN)) { + qp->incoming_rdma_read_en = params->incoming_rdma_read_en; + qp->incoming_rdma_write_en = params->incoming_rdma_write_en; + qp->incoming_atomic_en = params->incoming_atomic_en; + } + + /* Update QP structure with the updated values */ + if (GET_FIELD(params->modify_flags, QED_ROCE_MODIFY_QP_VALID_ROCE_MODE)) + qp->roce_mode = params->roce_mode; + if (GET_FIELD(params->modify_flags, QED_ROCE_MODIFY_QP_VALID_PKEY)) + qp->pkey = params->pkey; + if (GET_FIELD(params->modify_flags, + QED_ROCE_MODIFY_QP_VALID_E2E_FLOW_CONTROL_EN)) + qp->e2e_flow_control_en = params->e2e_flow_control_en; + if (GET_FIELD(params->modify_flags, QED_ROCE_MODIFY_QP_VALID_DEST_QP)) + qp->dest_qp = params->dest_qp; + if (GET_FIELD(params->modify_flags, + QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR)) { + /* Indicates that the following parameters have changed: + * Traffic class, flow label, hop limit, source GID, + * destination GID, loopback indicator + */ + qp->traffic_class_tos = params->traffic_class_tos; + qp->flow_label = params->flow_label; + qp->hop_limit_ttl = params->hop_limit_ttl; + + qp->sgid = params->sgid; + qp->dgid = params->dgid; + qp->udp_src_port = 0; + qp->vlan_id = params->vlan_id; + qp->mtu = params->mtu; + qp->lb_indication = params->lb_indication; + memcpy((u8 *)&qp->remote_mac_addr[0], + (u8 *)¶ms->remote_mac_addr[0], ETH_ALEN); + if (params->use_local_mac) { + memcpy((u8 *)&qp->local_mac_addr[0], + (u8 *)¶ms->local_mac_addr[0], ETH_ALEN); + } else { + memcpy((u8 *)&qp->local_mac_addr[0], + (u8 *)&p_hwfn->hw_info.hw_mac_addr, ETH_ALEN); + } + } + if (GET_FIELD(params->modify_flags, QED_ROCE_MODIFY_QP_VALID_RQ_PSN)) + qp->rq_psn = params->rq_psn; + if (GET_FIELD(params->modify_flags, QED_ROCE_MODIFY_QP_VALID_SQ_PSN)) + qp->sq_psn = params->sq_psn; + if (GET_FIELD(params->modify_flags, + QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ)) + qp->max_rd_atomic_req = params->max_rd_atomic_req; + if (GET_FIELD(params->modify_flags, + QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP)) + qp->max_rd_atomic_resp = params->max_rd_atomic_resp; + if (GET_FIELD(params->modify_flags, + QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT)) + qp->ack_timeout = params->ack_timeout; + if (GET_FIELD(params->modify_flags, QED_ROCE_MODIFY_QP_VALID_RETRY_CNT)) + qp->retry_cnt = params->retry_cnt; + if (GET_FIELD(params->modify_flags, + QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT)) + qp->rnr_retry_cnt = params->rnr_retry_cnt; + if (GET_FIELD(params->modify_flags, + QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER)) + qp->min_rnr_nak_timer = params->min_rnr_nak_timer; + + qp->sqd_async = params->sqd_async; + + prev_state = qp->cur_state; + if (GET_FIELD(params->modify_flags, + QED_RDMA_MODIFY_QP_VALID_NEW_STATE)) { + qp->cur_state = params->new_state; + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "qp->cur_state=%d\n", + qp->cur_state); + } + + rc = qed_roce_modify_qp(p_hwfn, qp, prev_state, params); + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Modify QP, rc = %d\n", rc); + return rc; +} + static void *qed_rdma_get_rdma_ctx(struct qed_dev *cdev) { return QED_LEADING_HWFN(cdev); @@ -1201,6 +2394,10 @@ static const struct qed_rdma_ops qed_rdma_ops_pass = { .rdma_dealloc_pd = &qed_rdma_free_pd, .rdma_create_cq = &qed_rdma_create_cq, .rdma_destroy_cq = &qed_rdma_destroy_cq, + .rdma_create_qp = &qed_rdma_create_qp, + .rdma_modify_qp = &qed_rdma_modify_qp, + .rdma_query_qp = &qed_rdma_query_qp, + .rdma_destroy_qp = &qed_rdma_destroy_qp, }; const struct qed_rdma_ops *qed_get_rdma_ops() diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.h b/drivers/net/ethernet/qlogic/qed/qed_roce.h index 1fe73707e0b5..b8ddda456101 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_roce.h +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.h @@ -114,6 +114,72 @@ struct qed_rdma_resize_cnq_in_params { u64 pbl_ptr; }; +struct qed_rdma_qp { + struct regpair qp_handle; + struct regpair qp_handle_async; + u32 qpid; + u16 icid; + enum qed_roce_qp_state cur_state; + bool use_srq; + bool signal_all; + bool fmr_and_reserved_lkey; + + bool incoming_rdma_read_en; + bool incoming_rdma_write_en; + bool incoming_atomic_en; + bool e2e_flow_control_en; + + u16 pd; + u16 pkey; + u32 dest_qp; + u16 mtu; + u16 srq_id; + u8 traffic_class_tos; + u8 hop_limit_ttl; + u16 dpi; + u32 flow_label; + bool lb_indication; + u16 vlan_id; + u32 ack_timeout; + u8 retry_cnt; + u8 rnr_retry_cnt; + u8 min_rnr_nak_timer; + bool sqd_async; + union qed_gid sgid; + union qed_gid dgid; + enum roce_mode roce_mode; + u16 udp_src_port; + u8 stats_queue; + + /* requeseter */ + u8 max_rd_atomic_req; + u32 sq_psn; + u16 sq_cq_id; + u16 sq_num_pages; + dma_addr_t sq_pbl_ptr; + void *orq; + dma_addr_t orq_phys_addr; + u8 orq_num_pages; + bool req_offloaded; + + /* responder */ + u8 max_rd_atomic_resp; + u32 rq_psn; + u16 rq_cq_id; + u16 rq_num_pages; + dma_addr_t rq_pbl_ptr; + void *irq; + dma_addr_t irq_phys_addr; + u8 irq_num_pages; + bool resp_offloaded; + + u8 remote_mac_addr[6]; + u8 local_mac_addr[6]; + + void *shared_queue; + dma_addr_t shared_queue_phys_addr; +}; + int qed_rdma_add_user(void *rdma_cxt, struct qed_rdma_add_user_out_params *out_params); @@ -135,6 +201,11 @@ void qed_rdma_cnq_prod_update(void *rdma_cxt, u8 cnq_index, u16 prod); void qed_rdma_resc_free(struct qed_hwfn *p_hwfn); void qed_async_roce_event(struct qed_hwfn *p_hwfn, struct event_ring_entry *p_eqe); +int qed_rdma_destroy_qp(void *rdma_cxt, struct qed_rdma_qp *qp); +int qed_rdma_modify_qp(void *rdma_cxt, struct qed_rdma_qp *qp, + struct qed_rdma_modify_qp_in_params *params); +int qed_rdma_query_qp(void *rdma_cxt, struct qed_rdma_qp *qp, + struct qed_rdma_query_qp_out_params *out_params); #if IS_ENABLED(CONFIG_INFINIBAND_QEDR) void qed_rdma_dpm_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); diff --git a/include/linux/qed/qed_roce_if.h b/include/linux/qed/qed_roce_if.h index b559b1c9e76d..02321e3b1716 100644 --- a/include/linux/qed/qed_roce_if.h +++ b/include/linux/qed/qed_roce_if.h @@ -43,6 +43,17 @@ #define QED_RDMA_MAX_CNQ_SIZE (0xFFFF) /* rdma interface */ + +enum qed_roce_qp_state { + QED_ROCE_QP_STATE_RESET, + QED_ROCE_QP_STATE_INIT, + QED_ROCE_QP_STATE_RTR, + QED_ROCE_QP_STATE_RTS, + QED_ROCE_QP_STATE_SQD, + QED_ROCE_QP_STATE_ERR, + QED_ROCE_QP_STATE_SQE +}; + enum qed_rdma_tid_type { QED_RDMA_TID_REGISTERED_MR, QED_RDMA_TID_FMR, @@ -292,6 +303,128 @@ struct qed_rdma_destroy_cq_out_params { u16 num_cq_notif; }; +struct qed_rdma_create_qp_in_params { + u32 qp_handle_lo; + u32 qp_handle_hi; + u32 qp_handle_async_lo; + u32 qp_handle_async_hi; + bool use_srq; + bool signal_all; + bool fmr_and_reserved_lkey; + u16 pd; + u16 dpi; + u16 sq_cq_id; + u16 sq_num_pages; + u64 sq_pbl_ptr; + u8 max_sq_sges; + u16 rq_cq_id; + u16 rq_num_pages; + u64 rq_pbl_ptr; + u16 srq_id; + u8 stats_queue; +}; + +struct qed_rdma_create_qp_out_params { + u32 qp_id; + u16 icid; + void *rq_pbl_virt; + dma_addr_t rq_pbl_phys; + void *sq_pbl_virt; + dma_addr_t sq_pbl_phys; +}; + +struct qed_rdma_modify_qp_in_params { + u32 modify_flags; +#define QED_RDMA_MODIFY_QP_VALID_NEW_STATE_MASK 0x1 +#define QED_RDMA_MODIFY_QP_VALID_NEW_STATE_SHIFT 0 +#define QED_ROCE_MODIFY_QP_VALID_PKEY_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_PKEY_SHIFT 1 +#define QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN_MASK 0x1 +#define QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN_SHIFT 2 +#define QED_ROCE_MODIFY_QP_VALID_DEST_QP_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_DEST_QP_SHIFT 3 +#define QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR_SHIFT 4 +#define QED_ROCE_MODIFY_QP_VALID_RQ_PSN_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_RQ_PSN_SHIFT 5 +#define QED_ROCE_MODIFY_QP_VALID_SQ_PSN_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_SQ_PSN_SHIFT 6 +#define QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ_MASK 0x1 +#define QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ_SHIFT 7 +#define QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP_MASK 0x1 +#define QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP_SHIFT 8 +#define QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT_SHIFT 9 +#define QED_ROCE_MODIFY_QP_VALID_RETRY_CNT_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_RETRY_CNT_SHIFT 10 +#define QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT_SHIFT 11 +#define QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER_SHIFT 12 +#define QED_ROCE_MODIFY_QP_VALID_E2E_FLOW_CONTROL_EN_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_E2E_FLOW_CONTROL_EN_SHIFT 13 +#define QED_ROCE_MODIFY_QP_VALID_ROCE_MODE_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_ROCE_MODE_SHIFT 14 + + enum qed_roce_qp_state new_state; + u16 pkey; + bool incoming_rdma_read_en; + bool incoming_rdma_write_en; + bool incoming_atomic_en; + bool e2e_flow_control_en; + u32 dest_qp; + bool lb_indication; + u16 mtu; + u8 traffic_class_tos; + u8 hop_limit_ttl; + u32 flow_label; + union qed_gid sgid; + union qed_gid dgid; + u16 udp_src_port; + + u16 vlan_id; + + u32 rq_psn; + u32 sq_psn; + u8 max_rd_atomic_resp; + u8 max_rd_atomic_req; + u32 ack_timeout; + u8 retry_cnt; + u8 rnr_retry_cnt; + u8 min_rnr_nak_timer; + bool sqd_async; + u8 remote_mac_addr[6]; + u8 local_mac_addr[6]; + bool use_local_mac; + enum roce_mode roce_mode; +}; + +struct qed_rdma_query_qp_out_params { + enum qed_roce_qp_state state; + u32 rq_psn; + u32 sq_psn; + bool draining; + u16 mtu; + u32 dest_qp; + bool incoming_rdma_read_en; + bool incoming_rdma_write_en; + bool incoming_atomic_en; + bool e2e_flow_control_en; + union qed_gid sgid; + union qed_gid dgid; + u32 flow_label; + u8 hop_limit_ttl; + u8 traffic_class_tos; + u32 timeout; + u8 rnr_retry; + u8 retry_cnt; + u8 min_rnr_nak_timer; + u16 pkey_index; + u8 max_rd_atomic; + u8 max_dest_rd_atomic; + bool sqd_async; +}; + struct qed_rdma_create_srq_out_params { u16 srq_id; }; @@ -368,6 +501,17 @@ struct qed_rdma_ops { int (*rdma_destroy_cq)(void *rdma_cxt, struct qed_rdma_destroy_cq_in_params *iparams, struct qed_rdma_destroy_cq_out_params *oparams); + struct qed_rdma_qp * + (*rdma_create_qp)(void *rdma_cxt, + struct qed_rdma_create_qp_in_params *iparams, + struct qed_rdma_create_qp_out_params *oparams); + + int (*rdma_modify_qp)(void *roce_cxt, struct qed_rdma_qp *qp, + struct qed_rdma_modify_qp_in_params *iparams); + + int (*rdma_query_qp)(void *rdma_cxt, struct qed_rdma_qp *qp, + struct qed_rdma_query_qp_out_params *oparams); + int (*rdma_destroy_qp)(void *rdma_cxt, struct qed_rdma_qp *qp); }; const struct qed_rdma_ops *qed_get_rdma_ops(void); From ee8eaea30b1368680f4d2f873bc14e1d7b57d021 Mon Sep 17 00:00:00 2001 From: Ram Amrani Date: Sat, 1 Oct 2016 22:00:00 +0300 Subject: [PATCH 1700/1715] qed: Add support for memory registeration verbs Add slowpath configuration support for user, dma and memory regions registration. Signed-off-by: Ram Amrani Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_roce.c | 240 +++++++++++++++++++++ include/linux/qed/qed_roce_if.h | 6 + 2 files changed, 246 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c index 438a0badc4e1..8b4854d3b395 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_roce.c +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c @@ -689,6 +689,17 @@ struct qed_rdma_device *qed_rdma_query_device(void *rdma_cxt) return p_hwfn->p_rdma_info->dev; } +void qed_rdma_free_tid(void *rdma_cxt, u32 itid) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "itid = %08x\n", itid); + + spin_lock_bh(&p_hwfn->p_rdma_info->lock); + qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->tid_map, itid); + spin_unlock_bh(&p_hwfn->p_rdma_info->lock); +} + int qed_rdma_alloc_tid(void *rdma_cxt, u32 *itid) { struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; @@ -2300,6 +2311,231 @@ int qed_rdma_modify_qp(void *rdma_cxt, return rc; } +int qed_rdma_register_tid(void *rdma_cxt, + struct qed_rdma_register_tid_in_params *params) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + struct rdma_register_tid_ramrod_data *p_ramrod; + struct qed_sp_init_data init_data; + struct qed_spq_entry *p_ent; + enum rdma_tid_type tid_type; + u8 fw_return_code; + int rc; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "itid = %08x\n", params->itid); + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + rc = qed_sp_init_request(p_hwfn, &p_ent, RDMA_RAMROD_REGISTER_MR, + p_hwfn->p_rdma_info->proto, &init_data); + if (rc) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "rc = %d\n", rc); + return rc; + } + + if (p_hwfn->p_rdma_info->last_tid < params->itid) + p_hwfn->p_rdma_info->last_tid = params->itid; + + p_ramrod = &p_ent->ramrod.rdma_register_tid; + + p_ramrod->flags = 0; + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_TWO_LEVEL_PBL, + params->pbl_two_level); + + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_ZERO_BASED, params->zbva); + + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_PHY_MR, params->phy_mr); + + /* Don't initialize D/C field, as it may override other bits. */ + if (!(params->tid_type == QED_RDMA_TID_FMR) && !(params->dma_mr)) + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_PAGE_SIZE_LOG, + params->page_size_log - 12); + + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_MAX_ID, + p_hwfn->p_rdma_info->last_tid); + + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_REMOTE_READ, + params->remote_read); + + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_REMOTE_WRITE, + params->remote_write); + + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_REMOTE_ATOMIC, + params->remote_atomic); + + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_LOCAL_WRITE, + params->local_write); + + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_LOCAL_READ, params->local_read); + + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_ENABLE_MW_BIND, + params->mw_bind); + + SET_FIELD(p_ramrod->flags1, + RDMA_REGISTER_TID_RAMROD_DATA_PBL_PAGE_SIZE_LOG, + params->pbl_page_size_log - 12); + + SET_FIELD(p_ramrod->flags2, + RDMA_REGISTER_TID_RAMROD_DATA_DMA_MR, params->dma_mr); + + switch (params->tid_type) { + case QED_RDMA_TID_REGISTERED_MR: + tid_type = RDMA_TID_REGISTERED_MR; + break; + case QED_RDMA_TID_FMR: + tid_type = RDMA_TID_FMR; + break; + case QED_RDMA_TID_MW_TYPE1: + tid_type = RDMA_TID_MW_TYPE1; + break; + case QED_RDMA_TID_MW_TYPE2A: + tid_type = RDMA_TID_MW_TYPE2A; + break; + default: + rc = -EINVAL; + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "rc = %d\n", rc); + return rc; + } + SET_FIELD(p_ramrod->flags1, + RDMA_REGISTER_TID_RAMROD_DATA_TID_TYPE, tid_type); + + p_ramrod->itid = cpu_to_le32(params->itid); + p_ramrod->key = params->key; + p_ramrod->pd = cpu_to_le16(params->pd); + p_ramrod->length_hi = (u8)(params->length >> 32); + p_ramrod->length_lo = DMA_LO_LE(params->length); + if (params->zbva) { + /* Lower 32 bits of the registered MR address. + * In case of zero based MR, will hold FBO + */ + p_ramrod->va.hi = 0; + p_ramrod->va.lo = cpu_to_le32(params->fbo); + } else { + DMA_REGPAIR_LE(p_ramrod->va, params->vaddr); + } + DMA_REGPAIR_LE(p_ramrod->pbl_base, params->pbl_ptr); + + /* DIF */ + if (params->dif_enabled) { + SET_FIELD(p_ramrod->flags2, + RDMA_REGISTER_TID_RAMROD_DATA_DIF_ON_HOST_FLG, 1); + DMA_REGPAIR_LE(p_ramrod->dif_error_addr, + params->dif_error_addr); + DMA_REGPAIR_LE(p_ramrod->dif_runt_addr, params->dif_runt_addr); + } + + rc = qed_spq_post(p_hwfn, p_ent, &fw_return_code); + + if (fw_return_code != RDMA_RETURN_OK) { + DP_NOTICE(p_hwfn, "fw_return_code = %d\n", fw_return_code); + return -EINVAL; + } + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Register TID, rc = %d\n", rc); + return rc; +} + +int qed_rdma_deregister_tid(void *rdma_cxt, u32 itid) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + struct rdma_deregister_tid_ramrod_data *p_ramrod; + struct qed_sp_init_data init_data; + struct qed_spq_entry *p_ent; + struct qed_ptt *p_ptt; + u8 fw_return_code; + int rc; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "itid = %08x\n", itid); + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + rc = qed_sp_init_request(p_hwfn, &p_ent, RDMA_RAMROD_DEREGISTER_MR, + p_hwfn->p_rdma_info->proto, &init_data); + if (rc) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "rc = %d\n", rc); + return rc; + } + + p_ramrod = &p_ent->ramrod.rdma_deregister_tid; + p_ramrod->itid = cpu_to_le32(itid); + + rc = qed_spq_post(p_hwfn, p_ent, &fw_return_code); + if (rc) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "rc = %d\n", rc); + return rc; + } + + if (fw_return_code == RDMA_RETURN_DEREGISTER_MR_BAD_STATE_ERR) { + DP_NOTICE(p_hwfn, "fw_return_code = %d\n", fw_return_code); + return -EINVAL; + } else if (fw_return_code == RDMA_RETURN_NIG_DRAIN_REQ) { + /* Bit indicating that the TID is in use and a nig drain is + * required before sending the ramrod again + */ + p_ptt = qed_ptt_acquire(p_hwfn); + if (!p_ptt) { + rc = -EBUSY; + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "Failed to acquire PTT\n"); + return rc; + } + + rc = qed_mcp_drain(p_hwfn, p_ptt); + if (rc) { + qed_ptt_release(p_hwfn, p_ptt); + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "Drain failed\n"); + return rc; + } + + qed_ptt_release(p_hwfn, p_ptt); + + /* Resend the ramrod */ + rc = qed_sp_init_request(p_hwfn, &p_ent, + RDMA_RAMROD_DEREGISTER_MR, + p_hwfn->p_rdma_info->proto, + &init_data); + if (rc) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "Failed to init sp-element\n"); + return rc; + } + + rc = qed_spq_post(p_hwfn, p_ent, &fw_return_code); + if (rc) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "Ramrod failed\n"); + return rc; + } + + if (fw_return_code != RDMA_RETURN_OK) { + DP_NOTICE(p_hwfn, "fw_return_code = %d\n", + fw_return_code); + return rc; + } + } + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "De-registered TID, rc = %d\n", rc); + return rc; +} + static void *qed_rdma_get_rdma_ctx(struct qed_dev *cdev) { return QED_LEADING_HWFN(cdev); @@ -2398,6 +2634,10 @@ static const struct qed_rdma_ops qed_rdma_ops_pass = { .rdma_modify_qp = &qed_rdma_modify_qp, .rdma_query_qp = &qed_rdma_query_qp, .rdma_destroy_qp = &qed_rdma_destroy_qp, + .rdma_alloc_tid = &qed_rdma_alloc_tid, + .rdma_free_tid = &qed_rdma_free_tid, + .rdma_register_tid = &qed_rdma_register_tid, + .rdma_deregister_tid = &qed_rdma_deregister_tid, }; const struct qed_rdma_ops *qed_get_rdma_ops() diff --git a/include/linux/qed/qed_roce_if.h b/include/linux/qed/qed_roce_if.h index 02321e3b1716..0b6df6eedcf1 100644 --- a/include/linux/qed/qed_roce_if.h +++ b/include/linux/qed/qed_roce_if.h @@ -512,6 +512,12 @@ struct qed_rdma_ops { int (*rdma_query_qp)(void *rdma_cxt, struct qed_rdma_qp *qp, struct qed_rdma_query_qp_out_params *oparams); int (*rdma_destroy_qp)(void *rdma_cxt, struct qed_rdma_qp *qp); + int + (*rdma_register_tid)(void *rdma_cxt, + struct qed_rdma_register_tid_in_params *iparams); + int (*rdma_deregister_tid)(void *rdma_cxt, u32 itid); + int (*rdma_alloc_tid)(void *rdma_cxt, u32 *itid); + void (*rdma_free_tid)(void *rdma_cxt, u32 itid); }; const struct qed_rdma_ops *qed_get_rdma_ops(void); From abd49676c70793ee0a251bc3d8fe1604f9303210 Mon Sep 17 00:00:00 2001 From: Ram Amrani Date: Sat, 1 Oct 2016 22:00:01 +0300 Subject: [PATCH 1701/1715] qed: Add RoCE ll2 & GSI support Add the RoCE-specific LL2 logic [as well as GSI support] over the 'generic' LL2 interface. Signed-off-by: Ram Amrani Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed.h | 1 + drivers/net/ethernet/qlogic/qed/qed_hsi.h | 3 + drivers/net/ethernet/qlogic/qed/qed_ll2.c | 115 +++++++- drivers/net/ethernet/qlogic/qed/qed_ll2.h | 29 +- drivers/net/ethernet/qlogic/qed/qed_roce.c | 307 +++++++++++++++++++++ drivers/net/ethernet/qlogic/qed/qed_roce.h | 1 + include/linux/qed/qed_roce_if.h | 79 ++++++ 7 files changed, 523 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index c5a098a2ca2c..653bb5735f0c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -437,6 +437,7 @@ struct qed_hwfn { #endif struct z_stream_s *stream; + struct qed_roce_ll2_info *ll2; }; struct pci_params { diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h index 2777d5bb4380..72eee29c677f 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h +++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h @@ -727,6 +727,9 @@ struct core_tx_bd_flags { #define CORE_TX_BD_FLAGS_L4_PROTOCOL_SHIFT 6 #define CORE_TX_BD_FLAGS_L4_PSEUDO_CSUM_MODE_MASK 0x1 #define CORE_TX_BD_FLAGS_L4_PSEUDO_CSUM_MODE_SHIFT 7 +#define CORE_TX_BD_FLAGS_ROCE_FLAV_MASK 0x1 +#define CORE_TX_BD_FLAGS_ROCE_FLAV_SHIFT 12 + }; struct core_tx_bd { diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index e0ec8ed2f92c..a6db10717d5c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -278,6 +278,7 @@ static void qed_ll2_txq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle) struct qed_ll2_tx_packet *p_pkt = NULL; struct qed_ll2_info *p_ll2_conn; struct qed_ll2_tx_queue *p_tx; + dma_addr_t tx_frag; p_ll2_conn = qed_ll2_handle_sanity_inactive(p_hwfn, connection_handle); if (!p_ll2_conn) @@ -297,11 +298,22 @@ static void qed_ll2_txq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle) p_tx->cur_completing_packet = *p_pkt; p_tx->cur_completing_bd_idx = 1; b_last_frag = p_tx->cur_completing_bd_idx == p_pkt->bd_used; + tx_frag = p_pkt->bds_set[0].tx_frag; + if (p_ll2_conn->gsi_enable) + qed_ll2b_release_tx_gsi_packet(p_hwfn, + p_ll2_conn->my_id, + p_pkt->cookie, + tx_frag, + b_last_frag, + b_last_packet); + else + qed_ll2b_complete_tx_packet(p_hwfn, + p_ll2_conn->my_id, + p_pkt->cookie, + tx_frag, + b_last_frag, + b_last_packet); - qed_ll2b_complete_tx_packet(p_hwfn, p_ll2_conn->my_id, - p_pkt->cookie, - p_pkt->bds_set[0].tx_frag, - b_last_frag, b_last_packet); } } @@ -313,6 +325,7 @@ static int qed_ll2_txq_completion(struct qed_hwfn *p_hwfn, void *p_cookie) struct qed_ll2_tx_packet *p_pkt; bool b_last_frag = false; unsigned long flags; + dma_addr_t tx_frag; int rc = -EINVAL; spin_lock_irqsave(&p_tx->lock, flags); @@ -353,11 +366,19 @@ static int qed_ll2_txq_completion(struct qed_hwfn *p_hwfn, void *p_cookie) list_add_tail(&p_pkt->list_entry, &p_tx->free_descq); spin_unlock_irqrestore(&p_tx->lock, flags); - qed_ll2b_complete_tx_packet(p_hwfn, - p_ll2_conn->my_id, - p_pkt->cookie, - p_pkt->bds_set[0].tx_frag, - b_last_frag, !num_bds); + tx_frag = p_pkt->bds_set[0].tx_frag; + if (p_ll2_conn->gsi_enable) + qed_ll2b_complete_tx_gsi_packet(p_hwfn, + p_ll2_conn->my_id, + p_pkt->cookie, + tx_frag, + b_last_frag, !num_bds); + else + qed_ll2b_complete_tx_packet(p_hwfn, + p_ll2_conn->my_id, + p_pkt->cookie, + tx_frag, + b_last_frag, !num_bds); spin_lock_irqsave(&p_tx->lock, flags); } @@ -368,6 +389,54 @@ out: return rc; } +static int +qed_ll2_rxq_completion_gsi(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_ll2_info, + union core_rx_cqe_union *p_cqe, + unsigned long lock_flags, bool b_last_cqe) +{ + struct qed_ll2_rx_queue *p_rx = &p_ll2_info->rx_queue; + struct qed_ll2_rx_packet *p_pkt = NULL; + u16 packet_length, parse_flags, vlan; + u32 src_mac_addrhi; + u16 src_mac_addrlo; + + if (!list_empty(&p_rx->active_descq)) + p_pkt = list_first_entry(&p_rx->active_descq, + struct qed_ll2_rx_packet, list_entry); + if (!p_pkt) { + DP_NOTICE(p_hwfn, + "GSI Rx completion but active_descq is empty\n"); + return -EIO; + } + + list_del(&p_pkt->list_entry); + parse_flags = le16_to_cpu(p_cqe->rx_cqe_gsi.parse_flags.flags); + packet_length = le16_to_cpu(p_cqe->rx_cqe_gsi.data_length); + vlan = le16_to_cpu(p_cqe->rx_cqe_gsi.vlan); + src_mac_addrhi = le32_to_cpu(p_cqe->rx_cqe_gsi.src_mac_addrhi); + src_mac_addrlo = le16_to_cpu(p_cqe->rx_cqe_gsi.src_mac_addrlo); + if (qed_chain_consume(&p_rx->rxq_chain) != p_pkt->rxq_bd) + DP_NOTICE(p_hwfn, + "Mismatch between active_descq and the LL2 Rx chain\n"); + list_add_tail(&p_pkt->list_entry, &p_rx->free_descq); + + spin_unlock_irqrestore(&p_rx->lock, lock_flags); + qed_ll2b_complete_rx_gsi_packet(p_hwfn, + p_ll2_info->my_id, + p_pkt->cookie, + p_pkt->rx_buf_addr, + packet_length, + p_cqe->rx_cqe_gsi.data_length_error, + parse_flags, + vlan, + src_mac_addrhi, + src_mac_addrlo, b_last_cqe); + spin_lock_irqsave(&p_rx->lock, lock_flags); + + return 0; +} + static int qed_ll2_rxq_completion_reg(struct qed_hwfn *p_hwfn, struct qed_ll2_info *p_ll2_conn, union core_rx_cqe_union *p_cqe, @@ -429,6 +498,10 @@ static int qed_ll2_rxq_completion(struct qed_hwfn *p_hwfn, void *cookie) DP_NOTICE(p_hwfn, "LL2 - unexpected Rx CQE slowpath\n"); rc = -EINVAL; break; + case CORE_RX_CQE_TYPE_GSI_OFFLOAD: + rc = qed_ll2_rxq_completion_gsi(p_hwfn, p_ll2_conn, + cqe, flags, b_last_cqe); + break; case CORE_RX_CQE_TYPE_REGULAR: rc = qed_ll2_rxq_completion_reg(p_hwfn, p_ll2_conn, cqe, flags, b_last_cqe); @@ -527,6 +600,7 @@ static int qed_sp_ll2_rx_queue_start(struct qed_hwfn *p_hwfn, } p_ramrod->action_on_error.error_type = action_on_error; + p_ramrod->gsi_offload_flag = p_ll2_conn->gsi_enable; return qed_spq_post(p_hwfn, p_ent, NULL); } @@ -589,6 +663,7 @@ static int qed_sp_ll2_tx_queue_start(struct qed_hwfn *p_hwfn, DP_NOTICE(p_hwfn, "Unknown connection type: %d\n", conn_type); } + p_ramrod->gsi_offload_flag = p_ll2_conn->gsi_enable; return qed_spq_post(p_hwfn, p_ent, NULL); } @@ -775,6 +850,7 @@ int qed_ll2_acquire_connection(struct qed_hwfn *p_hwfn, p_ll2_info->tx_dest = p_params->tx_dest; p_ll2_info->ai_err_packet_too_big = p_params->ai_err_packet_too_big; p_ll2_info->ai_err_no_buf = p_params->ai_err_no_buf; + p_ll2_info->gsi_enable = p_params->gsi_enable; rc = qed_ll2_acquire_connection_rx(p_hwfn, p_ll2_info, rx_num_desc); if (rc) @@ -1026,6 +1102,7 @@ static void qed_ll2_prepare_tx_packet_set_bd(struct qed_hwfn *p_hwfn, u16 vlan, u8 bd_flags, u16 l4_hdr_offset_w, + enum core_roce_flavor_type type, dma_addr_t first_frag, u16 first_frag_len) { @@ -1046,6 +1123,9 @@ static void qed_ll2_prepare_tx_packet_set_bd(struct qed_hwfn *p_hwfn, DMA_REGPAIR_LE(start_bd->addr, first_frag); start_bd->nbytes = cpu_to_le16(first_frag_len); + SET_FIELD(start_bd->bd_flags.as_bitfield, CORE_TX_BD_FLAGS_ROCE_FLAV, + type); + DP_VERBOSE(p_hwfn, (NETIF_MSG_TX_QUEUED | QED_MSG_LL2), "LL2 [q 0x%02x cid 0x%08x type 0x%08x] Tx Producer at [0x%04x] - set with a %04x bytes %02x BDs buffer at %08x:%08x\n", @@ -1137,11 +1217,13 @@ int qed_ll2_prepare_tx_packet(struct qed_hwfn *p_hwfn, u16 vlan, u8 bd_flags, u16 l4_hdr_offset_w, + enum qed_ll2_roce_flavor_type qed_roce_flavor, dma_addr_t first_frag, u16 first_frag_len, void *cookie, u8 notify_fw) { struct qed_ll2_tx_packet *p_curp = NULL; struct qed_ll2_info *p_ll2_conn = NULL; + enum core_roce_flavor_type roce_flavor; struct qed_ll2_tx_queue *p_tx; struct qed_chain *p_tx_chain; unsigned long flags; @@ -1174,6 +1256,15 @@ int qed_ll2_prepare_tx_packet(struct qed_hwfn *p_hwfn, goto out; } + if (qed_roce_flavor == QED_LL2_ROCE) { + roce_flavor = CORE_ROCE; + } else if (qed_roce_flavor == QED_LL2_RROCE) { + roce_flavor = CORE_RROCE; + } else { + rc = -EINVAL; + goto out; + } + /* Prepare packet and BD, and perhaps send a doorbell to FW */ qed_ll2_prepare_tx_packet_set(p_hwfn, p_tx, p_curp, num_of_bds, first_frag, @@ -1181,6 +1272,7 @@ int qed_ll2_prepare_tx_packet(struct qed_hwfn *p_hwfn, qed_ll2_prepare_tx_packet_set_bd(p_hwfn, p_ll2_conn, p_curp, num_of_bds, CORE_TX_DEST_NW, vlan, bd_flags, l4_hdr_offset_w, + roce_flavor, first_frag, first_frag_len); qed_ll2_tx_packet_notify(p_hwfn, p_ll2_conn); @@ -1476,6 +1568,7 @@ static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params) ll2_info.rx_vlan_removal_en = params->rx_vlan_stripping; ll2_info.tx_tc = 0; ll2_info.tx_dest = CORE_TX_DEST_NW; + ll2_info.gsi_enable = 1; rc = qed_ll2_acquire_connection(QED_LEADING_HWFN(cdev), &ll2_info, QED_LL2_RX_SIZE, QED_LL2_TX_SIZE, @@ -1625,8 +1718,8 @@ static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb) rc = qed_ll2_prepare_tx_packet(QED_LEADING_HWFN(cdev), cdev->ll2->handle, 1 + skb_shinfo(skb)->nr_frags, - vlan, flags, 0, mapping, - skb->len, skb, 1); + vlan, flags, 0, 0 /* RoCE FLAVOR */, + mapping, skb->len, skb, 1); if (rc) goto err; diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.h b/drivers/net/ethernet/qlogic/qed/qed_ll2.h index a037c4845928..80a5dc2d652d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.h +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.h @@ -24,6 +24,12 @@ #define QED_MAX_NUM_OF_LL2_CONNECTIONS (4) +enum qed_ll2_roce_flavor_type { + QED_LL2_ROCE, + QED_LL2_RROCE, + MAX_QED_LL2_ROCE_FLAVOR_TYPE +}; + enum qed_ll2_conn_type { QED_LL2_TYPE_RESERVED, QED_LL2_TYPE_ISCSI, @@ -119,6 +125,7 @@ struct qed_ll2_info { u8 tx_stats_en; struct qed_ll2_rx_queue rx_queue; struct qed_ll2_tx_queue tx_queue; + u8 gsi_enable; }; /** @@ -199,6 +206,7 @@ int qed_ll2_prepare_tx_packet(struct qed_hwfn *p_hwfn, u16 vlan, u8 bd_flags, u16 l4_hdr_offset_w, + enum qed_ll2_roce_flavor_type qed_roce_flavor, dma_addr_t first_frag, u16 first_frag_len, void *cookie, u8 notify_fw); @@ -285,5 +293,24 @@ void qed_ll2_setup(struct qed_hwfn *p_hwfn, */ void qed_ll2_free(struct qed_hwfn *p_hwfn, struct qed_ll2_info *p_ll2_connections); - +void qed_ll2b_complete_rx_gsi_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + void *cookie, + dma_addr_t rx_buf_addr, + u16 data_length, + u8 data_length_error, + u16 parse_flags, + u16 vlan, + u32 src_mac_addr_hi, + u16 src_mac_addr_lo, bool b_last_packet); +void qed_ll2b_complete_tx_gsi_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + void *cookie, + dma_addr_t first_frag_addr, + bool b_last_fragment, bool b_last_packet); +void qed_ll2b_release_tx_gsi_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + void *cookie, + dma_addr_t first_frag_addr, + bool b_last_fragment, bool b_last_packet); #endif diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c index 8b4854d3b395..23430059471c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_roce.c +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c @@ -64,6 +64,7 @@ #include "qed_reg_addr.h" #include "qed_sp.h" #include "qed_roce.h" +#include "qed_ll2.h" void qed_async_roce_event(struct qed_hwfn *p_hwfn, struct event_ring_entry *p_eqe) @@ -2611,6 +2612,306 @@ void qed_rdma_remove_user(void *rdma_cxt, u16 dpi) spin_unlock_bh(&p_hwfn->p_rdma_info->lock); } +void qed_ll2b_complete_tx_gsi_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + void *cookie, + dma_addr_t first_frag_addr, + bool b_last_fragment, bool b_last_packet) +{ + struct qed_roce_ll2_packet *packet = cookie; + struct qed_roce_ll2_info *roce_ll2 = p_hwfn->ll2; + + roce_ll2->cbs.tx_cb(roce_ll2->cb_cookie, packet); +} + +void qed_ll2b_release_tx_gsi_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + void *cookie, + dma_addr_t first_frag_addr, + bool b_last_fragment, bool b_last_packet) +{ + qed_ll2b_complete_tx_gsi_packet(p_hwfn, connection_handle, + cookie, first_frag_addr, + b_last_fragment, b_last_packet); +} + +void qed_ll2b_complete_rx_gsi_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + void *cookie, + dma_addr_t rx_buf_addr, + u16 data_length, + u8 data_length_error, + u16 parse_flags, + u16 vlan, + u32 src_mac_addr_hi, + u16 src_mac_addr_lo, bool b_last_packet) +{ + struct qed_roce_ll2_info *roce_ll2 = p_hwfn->ll2; + struct qed_roce_ll2_rx_params params; + struct qed_dev *cdev = p_hwfn->cdev; + struct qed_roce_ll2_packet pkt; + + DP_VERBOSE(cdev, + QED_MSG_LL2, + "roce ll2 rx complete: bus_addr=%p, len=%d, data_len_err=%d\n", + (void *)(uintptr_t)rx_buf_addr, + data_length, data_length_error); + + memset(&pkt, 0, sizeof(pkt)); + pkt.n_seg = 1; + pkt.payload[0].baddr = rx_buf_addr; + pkt.payload[0].len = data_length; + + memset(¶ms, 0, sizeof(params)); + params.vlan_id = vlan; + *((u32 *)¶ms.smac[0]) = ntohl(src_mac_addr_hi); + *((u16 *)¶ms.smac[4]) = ntohs(src_mac_addr_lo); + + if (data_length_error) { + DP_ERR(cdev, + "roce ll2 rx complete: data length error %d, length=%d\n", + data_length_error, data_length); + params.rc = -EINVAL; + } + + roce_ll2->cbs.rx_cb(roce_ll2->cb_cookie, &pkt, ¶ms); +} + +static int qed_roce_ll2_set_mac_filter(struct qed_dev *cdev, + u8 *old_mac_address, + u8 *new_mac_address) +{ + struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); + struct qed_ptt *p_ptt; + int rc = 0; + + if (!hwfn->ll2 || hwfn->ll2->handle == QED_LL2_UNUSED_HANDLE) { + DP_ERR(cdev, + "qed roce mac filter failed - roce_info/ll2 NULL\n"); + return -EINVAL; + } + + p_ptt = qed_ptt_acquire(QED_LEADING_HWFN(cdev)); + if (!p_ptt) { + DP_ERR(cdev, + "qed roce ll2 mac filter set: failed to acquire PTT\n"); + return -EINVAL; + } + + mutex_lock(&hwfn->ll2->lock); + if (old_mac_address) + qed_llh_remove_mac_filter(QED_LEADING_HWFN(cdev), p_ptt, + old_mac_address); + if (new_mac_address) + rc = qed_llh_add_mac_filter(QED_LEADING_HWFN(cdev), p_ptt, + new_mac_address); + mutex_unlock(&hwfn->ll2->lock); + + qed_ptt_release(QED_LEADING_HWFN(cdev), p_ptt); + + if (rc) + DP_ERR(cdev, + "qed roce ll2 mac filter set: failed to add mac filter\n"); + + return rc; +} + +static int qed_roce_ll2_start(struct qed_dev *cdev, + struct qed_roce_ll2_params *params) +{ + struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); + struct qed_roce_ll2_info *roce_ll2; + struct qed_ll2_info ll2_params; + int rc; + + if (!params) { + DP_ERR(cdev, "qed roce ll2 start: failed due to NULL params\n"); + return -EINVAL; + } + if (!params->cbs.tx_cb || !params->cbs.rx_cb) { + DP_ERR(cdev, + "qed roce ll2 start: failed due to NULL tx/rx. tx_cb=%p, rx_cb=%p\n", + params->cbs.tx_cb, params->cbs.rx_cb); + return -EINVAL; + } + if (!is_valid_ether_addr(params->mac_address)) { + DP_ERR(cdev, + "qed roce ll2 start: failed due to invalid Ethernet address %pM\n", + params->mac_address); + return -EINVAL; + } + + /* Initialize */ + roce_ll2 = kzalloc(sizeof(*roce_ll2), GFP_ATOMIC); + if (!roce_ll2) { + DP_ERR(cdev, "qed roce ll2 start: failed memory allocation\n"); + return -ENOMEM; + } + memset(roce_ll2, 0, sizeof(*roce_ll2)); + roce_ll2->handle = QED_LL2_UNUSED_HANDLE; + roce_ll2->cbs = params->cbs; + roce_ll2->cb_cookie = params->cb_cookie; + mutex_init(&roce_ll2->lock); + + memset(&ll2_params, 0, sizeof(ll2_params)); + ll2_params.conn_type = QED_LL2_TYPE_ROCE; + ll2_params.mtu = params->mtu; + ll2_params.rx_drop_ttl0_flg = true; + ll2_params.rx_vlan_removal_en = false; + ll2_params.tx_dest = CORE_TX_DEST_NW; + ll2_params.ai_err_packet_too_big = LL2_DROP_PACKET; + ll2_params.ai_err_no_buf = LL2_DROP_PACKET; + ll2_params.gsi_enable = true; + + rc = qed_ll2_acquire_connection(QED_LEADING_HWFN(cdev), &ll2_params, + params->max_rx_buffers, + params->max_tx_buffers, + &roce_ll2->handle); + if (rc) { + DP_ERR(cdev, + "qed roce ll2 start: failed to acquire LL2 connection (rc=%d)\n", + rc); + goto err; + } + + rc = qed_ll2_establish_connection(QED_LEADING_HWFN(cdev), + roce_ll2->handle); + if (rc) { + DP_ERR(cdev, + "qed roce ll2 start: failed to establish LL2 connection (rc=%d)\n", + rc); + goto err1; + } + + hwfn->ll2 = roce_ll2; + + rc = qed_roce_ll2_set_mac_filter(cdev, NULL, params->mac_address); + if (rc) { + hwfn->ll2 = NULL; + goto err2; + } + ether_addr_copy(roce_ll2->mac_address, params->mac_address); + + return 0; + +err2: + qed_ll2_terminate_connection(QED_LEADING_HWFN(cdev), roce_ll2->handle); +err1: + qed_ll2_release_connection(QED_LEADING_HWFN(cdev), roce_ll2->handle); +err: + kfree(roce_ll2); + return rc; +} + +static int qed_roce_ll2_stop(struct qed_dev *cdev) +{ + struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); + struct qed_roce_ll2_info *roce_ll2 = hwfn->ll2; + int rc; + + if (!cdev) { + DP_ERR(cdev, "qed roce ll2 stop: invalid cdev\n"); + return -EINVAL; + } + + if (roce_ll2->handle == QED_LL2_UNUSED_HANDLE) { + DP_ERR(cdev, "qed roce ll2 stop: cannot stop an unused LL2\n"); + return -EINVAL; + } + + /* remove LL2 MAC address filter */ + rc = qed_roce_ll2_set_mac_filter(cdev, roce_ll2->mac_address, NULL); + eth_zero_addr(roce_ll2->mac_address); + + rc = qed_ll2_terminate_connection(QED_LEADING_HWFN(cdev), + roce_ll2->handle); + if (rc) + DP_ERR(cdev, + "qed roce ll2 stop: failed to terminate LL2 connection (rc=%d)\n", + rc); + + qed_ll2_release_connection(QED_LEADING_HWFN(cdev), roce_ll2->handle); + + roce_ll2->handle = QED_LL2_UNUSED_HANDLE; + + kfree(roce_ll2); + + return rc; +} + +static int qed_roce_ll2_tx(struct qed_dev *cdev, + struct qed_roce_ll2_packet *pkt, + struct qed_roce_ll2_tx_params *params) +{ + struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); + struct qed_roce_ll2_info *roce_ll2 = hwfn->ll2; + enum qed_ll2_roce_flavor_type qed_roce_flavor; + u8 flags = 0; + int rc; + int i; + + if (!cdev || !pkt || !params) { + DP_ERR(cdev, + "roce ll2 tx: failed tx because one of the following is NULL - drv=%p, pkt=%p, params=%p\n", + cdev, pkt, params); + return -EINVAL; + } + + qed_roce_flavor = (pkt->roce_mode == ROCE_V1) ? QED_LL2_ROCE + : QED_LL2_RROCE; + + if (pkt->roce_mode == ROCE_V2_IPV4) + flags |= BIT(CORE_TX_BD_FLAGS_IP_CSUM_SHIFT); + + /* Tx header */ + rc = qed_ll2_prepare_tx_packet(QED_LEADING_HWFN(cdev), roce_ll2->handle, + 1 + pkt->n_seg, 0, flags, 0, + qed_roce_flavor, pkt->header.baddr, + pkt->header.len, pkt, 1); + if (rc) { + DP_ERR(cdev, "roce ll2 tx: header failed (rc=%d)\n", rc); + return QED_ROCE_TX_HEAD_FAILURE; + } + + /* Tx payload */ + for (i = 0; i < pkt->n_seg; i++) { + rc = qed_ll2_set_fragment_of_tx_packet(QED_LEADING_HWFN(cdev), + roce_ll2->handle, + pkt->payload[i].baddr, + pkt->payload[i].len); + if (rc) { + /* If failed not much to do here, partial packet has + * been posted * we can't free memory, will need to wait + * for completion + */ + DP_ERR(cdev, + "roce ll2 tx: payload failed (rc=%d)\n", rc); + return QED_ROCE_TX_FRAG_FAILURE; + } + } + + return 0; +} + +static int qed_roce_ll2_post_rx_buffer(struct qed_dev *cdev, + struct qed_roce_ll2_buffer *buf, + u64 cookie, u8 notify_fw) +{ + return qed_ll2_post_rx_buffer(QED_LEADING_HWFN(cdev), + QED_LEADING_HWFN(cdev)->ll2->handle, + buf->baddr, buf->len, + (void *)(uintptr_t)cookie, notify_fw); +} + +static int qed_roce_ll2_stats(struct qed_dev *cdev, struct qed_ll2_stats *stats) +{ + struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); + struct qed_roce_ll2_info *roce_ll2 = hwfn->ll2; + + return qed_ll2_get_stats(QED_LEADING_HWFN(cdev), + roce_ll2->handle, stats); +} + static const struct qed_rdma_ops qed_rdma_ops_pass = { .common = &qed_common_ops_pass, .fill_dev_info = &qed_fill_rdma_dev_info, @@ -2638,6 +2939,12 @@ static const struct qed_rdma_ops qed_rdma_ops_pass = { .rdma_free_tid = &qed_rdma_free_tid, .rdma_register_tid = &qed_rdma_register_tid, .rdma_deregister_tid = &qed_rdma_deregister_tid, + .roce_ll2_start = &qed_roce_ll2_start, + .roce_ll2_stop = &qed_roce_ll2_stop, + .roce_ll2_tx = &qed_roce_ll2_tx, + .roce_ll2_post_rx_buffer = &qed_roce_ll2_post_rx_buffer, + .roce_ll2_set_mac_filter = &qed_roce_ll2_set_mac_filter, + .roce_ll2_stats = &qed_roce_ll2_stats, }; const struct qed_rdma_ops *qed_get_rdma_ops() diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.h b/drivers/net/ethernet/qlogic/qed/qed_roce.h index b8ddda456101..2f091e8a0f40 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_roce.h +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.h @@ -42,6 +42,7 @@ #include "qed.h" #include "qed_dev_api.h" #include "qed_hsi.h" +#include "qed_ll2.h" #define QED_RDMA_MAX_FMR (RDMA_MAX_TIDS) #define QED_RDMA_MAX_P_KEY (1) diff --git a/include/linux/qed/qed_roce_if.h b/include/linux/qed/qed_roce_if.h index 0b6df6eedcf1..53047d3fa678 100644 --- a/include/linux/qed/qed_roce_if.h +++ b/include/linux/qed/qed_roce_if.h @@ -39,6 +39,16 @@ #include #include #include +#include + +enum qed_roce_ll2_tx_dest { + /* Light L2 TX Destination to the Network */ + QED_ROCE_LL2_TX_DEST_NW, + + /* Light L2 TX Destination to the Loopback */ + QED_ROCE_LL2_TX_DEST_LB, + QED_ROCE_LL2_TX_DEST_MAX +}; #define QED_RDMA_MAX_CNQ_SIZE (0xFFFF) @@ -461,6 +471,61 @@ struct qed_rdma_counters_out_params { #define QED_ROCE_TX_HEAD_FAILURE (1) #define QED_ROCE_TX_FRAG_FAILURE (2) +struct qed_roce_ll2_header { + void *vaddr; + dma_addr_t baddr; + size_t len; +}; + +struct qed_roce_ll2_buffer { + dma_addr_t baddr; + size_t len; +}; + +struct qed_roce_ll2_packet { + struct qed_roce_ll2_header header; + int n_seg; + struct qed_roce_ll2_buffer payload[RDMA_MAX_SGE_PER_SQ_WQE]; + int roce_mode; + enum qed_roce_ll2_tx_dest tx_dest; +}; + +struct qed_roce_ll2_tx_params { + int reserved; +}; + +struct qed_roce_ll2_rx_params { + u16 vlan_id; + u8 smac[ETH_ALEN]; + int rc; +}; + +struct qed_roce_ll2_cbs { + void (*tx_cb)(void *pdev, struct qed_roce_ll2_packet *pkt); + + void (*rx_cb)(void *pdev, struct qed_roce_ll2_packet *pkt, + struct qed_roce_ll2_rx_params *params); +}; + +struct qed_roce_ll2_params { + u16 max_rx_buffers; + u16 max_tx_buffers; + u16 mtu; + u8 mac_address[ETH_ALEN]; + struct qed_roce_ll2_cbs cbs; + void *cb_cookie; +}; + +struct qed_roce_ll2_info { + u8 handle; + struct qed_roce_ll2_cbs cbs; + u8 mac_address[ETH_ALEN]; + void *cb_cookie; + + /* Lock to protect ll2 */ + struct mutex lock; +}; + enum qed_rdma_type { QED_RDMA_TYPE_ROCE, }; @@ -518,6 +583,20 @@ struct qed_rdma_ops { int (*rdma_deregister_tid)(void *rdma_cxt, u32 itid); int (*rdma_alloc_tid)(void *rdma_cxt, u32 *itid); void (*rdma_free_tid)(void *rdma_cxt, u32 itid); + int (*roce_ll2_start)(struct qed_dev *cdev, + struct qed_roce_ll2_params *params); + int (*roce_ll2_stop)(struct qed_dev *cdev); + int (*roce_ll2_tx)(struct qed_dev *cdev, + struct qed_roce_ll2_packet *packet, + struct qed_roce_ll2_tx_params *params); + int (*roce_ll2_post_rx_buffer)(struct qed_dev *cdev, + struct qed_roce_ll2_buffer *buf, + u64 cookie, u8 notify_fw); + int (*roce_ll2_set_mac_filter)(struct qed_dev *cdev, + u8 *old_mac_address, + u8 *new_mac_address); + int (*roce_ll2_stats)(struct qed_dev *cdev, + struct qed_ll2_stats *stats); }; const struct qed_rdma_ops *qed_get_rdma_ops(void); From edfc23ee3e0ebbb6713d7574ab1b00abff178f6c Mon Sep 17 00:00:00 2001 From: Guilherme G Piccoli Date: Mon, 3 Oct 2016 00:31:12 -0700 Subject: [PATCH 1702/1715] i40e: avoid NULL pointer dereference and recursive errors on early PCI error Although rare, it's possible to hit PCI error early on device probe, meaning possibly some structs are not entirely initialized, and some might even be completely uninitialized, leading to NULL pointer dereference. The i40e driver currently presents a "bad" behavior if device hits such early PCI error: firstly, the struct i40e_pf might not be attached to pci_dev yet, leading to a NULL pointer dereference on access to pf->state. Even checking if the struct is NULL and avoiding the access in that case isn't enough, since the driver cannot recover from PCI error that early; in our experiments we saw multiple failures on kernel log, like: [549.664] i40e 0007:01:00.1: Initial pf_reset failed: -15 [549.664] i40e: probe of 0007:01:00.1 failed with error -15 [...] [871.644] i40e 0007:01:00.1: The driver for the device stopped because the device firmware failed to init. Try updating your NVM image. [871.644] i40e: probe of 0007:01:00.1 failed with error -32 [...] [872.516] i40e 0007:01:00.0: ARQ: Unknown event 0x0000 ignored Between the first probe failure (error -15) and the second (error -32) another PCI error happened due to the first bad probe. Also, driver started to flood console with those ARQ event messages. This patch will prevent these issues by allowing error recovery mechanism to remove the failed device from the system instead of trying to recover from early PCI errors during device probe. CC: Signed-off-by: Guilherme G Piccoli Acked-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/i40e/i40e_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 0044c29ca31e..ac1faee2a5b8 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -11400,6 +11400,12 @@ static pci_ers_result_t i40e_pci_error_detected(struct pci_dev *pdev, dev_info(&pdev->dev, "%s: error %d\n", __func__, error); + if (!pf) { + dev_info(&pdev->dev, + "Cannot recover - error happened during device probe\n"); + return PCI_ERS_RESULT_DISCONNECT; + } + /* shutdown all operations */ if (!test_bit(__I40E_SUSPENDED, &pf->state)) { rtnl_lock(); From 277964e19e1416ca31301e113edb2580c81a8b66 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Mon, 3 Oct 2016 10:47:50 +0800 Subject: [PATCH 1703/1715] vmxnet3: Wake queue from reset work vmxnet3_reset_work() expects tx queues to be stopped (via vmxnet3_quiesce_dev -> netif_tx_disable). However, this races with the netif_wake_queue() call in netif_tx_timeout() such that the driver's start_xmit routine may be called unexpectedly, triggering one of the BUG_ON in vmxnet3_map_pkt with a stack trace like this: RIP: 0010:[] vmxnet3_map_pkt+0x3ac/0x4c0 [vmxnet3] [] vmxnet3_tq_xmit+0x210/0x4e0 [vmxnet3] [] dev_hard_start_xmit+0x2e4/0x4c0 [] sch_direct_xmit+0x17e/0x1e0 [] __qdisc_run+0xd7/0x130 [] net_tx_action+0x10a/0x200 [] __do_softirq+0x11f/0x260 [] call_softirq+0x1c/0x30 [] do_softirq+0x65/0xa0 [] local_bh_enable_ip+0x99/0xa0 [] destroy_conntrack+0x96/0x110 [nf_conntrack] [] nf_conntrack_destroy+0x12/0x20 [] skb_release_head_state+0xb5/0xf0 [] skb_release_all+0x9/0x20 [] __kfree_skb+0x9/0x90 [] vmxnet3_quiesce_dev+0x209/0x340 [vmxnet3] [] vmxnet3_reset_work+0x6a/0xa0 [vmxnet3] [] process_one_work+0x16c/0x350 [] worker_thread+0x17a/0x410 [] kthread+0x96/0xa0 [] kernel_thread_helper+0x4/0x10 Signed-off-by: Benjamin Poirier Signed-off-by: David S. Miller --- drivers/net/vmxnet3/vmxnet3_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 2fd93b4c759a..b5554f2ebee4 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -3186,7 +3186,6 @@ vmxnet3_tx_timeout(struct net_device *netdev) netdev_err(adapter->netdev, "tx hang\n"); schedule_work(&adapter->work); - netif_wake_queue(adapter->netdev); } @@ -3213,6 +3212,7 @@ vmxnet3_reset_work(struct work_struct *data) } rtnl_unlock(); + netif_wake_queue(adapter->netdev); clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state); } From a4cc96d1f0170b779c32c6b2cc58764f5d2cdef0 Mon Sep 17 00:00:00 2001 From: Raju Lakkaraju Date: Mon, 3 Oct 2016 12:53:13 +0530 Subject: [PATCH 1704/1715] net: phy: Add Edge-rate driver for Microsemi PHYs. Edge-rate: As system and networking speeds increase, a signal's output transition, also know as the edge rate or slew rate (V/ns), takes on greater importance because high-speed signals come with a price. That price is an assortment of interference problems like ringing on the line, signal overshoot and undershoot, extended signal settling times, crosstalk noise, transmission line reflections, false signal detection by the receiving device and electromagnetic interference (EMI) -- all of which can negate the potential gains designers are seeking when they try to increase system speeds through the use of higher performance logic devices. The fact is, faster signaling edge rates can cause a higher level of electrical noise or other type of interference that can actually lead to slower line speeds and lower maximum system frequencies. This parameter allow the board designers to change the driving strange, and thereby change the EMI behavioral. Edge-rate parameters (vddmac, edge-slowdown) get from Device Tree. Tested on Beaglebone Black with VSC 8531 PHY. Signed-off-by: Raju Lakkaraju Signed-off-by: David S. Miller --- .../bindings/net/mscc-phy-vsc8531.txt | 58 ++++++++ drivers/net/phy/mscc.c | 125 ++++++++++++++++++ include/dt-bindings/net/mscc-phy-vsc8531.h | 21 +++ 3 files changed, 204 insertions(+) create mode 100644 Documentation/devicetree/bindings/net/mscc-phy-vsc8531.txt create mode 100644 include/dt-bindings/net/mscc-phy-vsc8531.h diff --git a/Documentation/devicetree/bindings/net/mscc-phy-vsc8531.txt b/Documentation/devicetree/bindings/net/mscc-phy-vsc8531.txt new file mode 100644 index 000000000000..99c7eb0a00c8 --- /dev/null +++ b/Documentation/devicetree/bindings/net/mscc-phy-vsc8531.txt @@ -0,0 +1,58 @@ +* Microsemi - vsc8531 Giga bit ethernet phy + +Required properties: +- compatible : Should contain phy id as "ethernet-phy-idAAAA.BBBB" + The PHY device uses the binding described in + Documentation/devicetree/bindings/net/phy.txt + +Optional properties: +- vsc8531,vddmac : The vddmac in mV. +- vsc8531,edge-slowdown : % the edge should be slowed down relative to + the fastest possible edge time. Native sign + need not enter. + Edge rate sets the drive strength of the MAC + interface output signals. Changing the drive + strength will affect the edge rate of the output + signal. The goal of this setting is to help + reduce electrical emission (EMI) by being able + to reprogram drive strength and in effect slow + down the edge rate if desired. Table 1 shows the + impact to the edge rate per VDDMAC supply for each + drive strength setting. + Ref: Table:1 - Edge rate change below. + +Note: see dt-bindings/net/mscc-phy-vsc8531.h for applicable values + +Table: 1 - Edge rate change +----------------------------------------------------------------| +| Edge Rate Change (VDDMAC) | +| | +| 3300 mV 2500 mV 1800 mV 1500 mV | +|---------------------------------------------------------------| +| Default Deafult Default Default | +| (Fastest) (recommended) (recommended) | +|---------------------------------------------------------------| +| -2% -3% -5% -6% | +|---------------------------------------------------------------| +| -4% -6% -9% -14% | +|---------------------------------------------------------------| +| -7% -10% -16% -21% | +|(recommended) (recommended) | +|---------------------------------------------------------------| +| -10% -14% -23% -29% | +|---------------------------------------------------------------| +| -17% -23% -35% -42% | +|---------------------------------------------------------------| +| -29% -37% -52% -58% | +|---------------------------------------------------------------| +| -53% -63% -76% -77% | +| (slowest) | +|---------------------------------------------------------------| + +Example: + + vsc8531_0: ethernet-phy@0 { + compatible = "ethernet-phy-id0007.0570"; + vsc8531,vddmac = <3300>; + vsc8531,edge-slowdown = <21>; + }; diff --git a/drivers/net/phy/mscc.c b/drivers/net/phy/mscc.c index d350debd174a..a17573e3bd8a 100644 --- a/drivers/net/phy/mscc.c +++ b/drivers/net/phy/mscc.c @@ -11,6 +11,8 @@ #include #include #include +#include +#include enum rgmii_rx_clock_delay { RGMII_RX_CLK_DELAY_0_2_NS = 0, @@ -37,6 +39,10 @@ enum rgmii_rx_clock_delay { #define MII_VSC85XX_INT_MASK_MASK 0xa000 #define MII_VSC85XX_INT_STATUS 26 +#define MSCC_PHY_WOL_MAC_CONTROL 27 +#define EDGE_RATE_CNTL_POS 5 +#define EDGE_RATE_CNTL_MASK 0x00E0 + #define MSCC_EXT_PAGE_ACCESS 31 #define MSCC_PHY_PAGE_STANDARD 0x0000 /* Standard registers */ #define MSCC_PHY_PAGE_EXTENDED_2 0x0002 /* Extended reg - page 2 */ @@ -50,6 +56,23 @@ enum rgmii_rx_clock_delay { #define PHY_ID_VSC8531 0x00070570 #define PHY_ID_VSC8541 0x00070770 +struct edge_rate_table { + u16 vddmac; + int slowdown[MSCC_SLOWDOWN_MAX]; +}; + +struct edge_rate_table edge_table[MSCC_VDDMAC_MAX] = { + {3300, { 0, -2, -4, -7, -10, -17, -29, -53} }, + {2500, { 0, -3, -6, -10, -14, -23, -37, -63} }, + {1800, { 0, -5, -9, -16, -23, -35, -52, -76} }, + {1500, { 0, -6, -14, -21, -29, -42, -58, -77} }, +}; + +struct vsc8531_private { + u8 edge_slowdown; + u16 vddmac; +}; + static int vsc85xx_phy_page_set(struct phy_device *phydev, u8 page) { int rc; @@ -58,6 +81,51 @@ static int vsc85xx_phy_page_set(struct phy_device *phydev, u8 page) return rc; } +static u8 edge_rate_magic_get(u16 vddmac, + int slowdown) +{ + int rc = (MSCC_SLOWDOWN_MAX - 1); + u8 vdd; + u8 sd; + + for (vdd = 0; vdd < MSCC_VDDMAC_MAX; vdd++) { + if (edge_table[vdd].vddmac == vddmac) { + for (sd = 0; sd < MSCC_SLOWDOWN_MAX; sd++) { + if (edge_table[vdd].slowdown[sd] <= slowdown) { + rc = (MSCC_SLOWDOWN_MAX - sd - 1); + break; + } + } + } + } + + return rc; +} + +static int vsc85xx_edge_rate_cntl_set(struct phy_device *phydev, + u8 edge_rate) +{ + int rc; + u16 reg_val; + + mutex_lock(&phydev->lock); + rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_EXTENDED_2); + if (rc != 0) + goto out_unlock; + reg_val = phy_read(phydev, MSCC_PHY_WOL_MAC_CONTROL); + reg_val &= ~(EDGE_RATE_CNTL_MASK); + reg_val |= (edge_rate << EDGE_RATE_CNTL_POS); + rc = phy_write(phydev, MSCC_PHY_WOL_MAC_CONTROL, reg_val); + if (rc != 0) + goto out_unlock; + rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_STANDARD); + +out_unlock: + mutex_unlock(&phydev->lock); + + return rc; +} + static int vsc85xx_mac_if_set(struct phy_device *phydev, phy_interface_t interface) { @@ -116,9 +184,45 @@ out_unlock: return rc; } +#ifdef CONFIG_OF_MDIO +static int vsc8531_of_init(struct phy_device *phydev) +{ + int rc; + struct vsc8531_private *vsc8531 = phydev->priv; + struct device *dev = &phydev->mdio.dev; + struct device_node *of_node = dev->of_node; + + if (!of_node) + return -ENODEV; + + rc = of_property_read_u16(of_node, "vsc8531,vddmac", + &vsc8531->vddmac); + if (rc == -EINVAL) + vsc8531->vddmac = MSCC_VDDMAC_3300; + rc = of_property_read_u8(of_node, "vsc8531,edge-slowdown", + &vsc8531->edge_slowdown); + if (rc == -EINVAL) + vsc8531->edge_slowdown = 0; + + rc = 0; + return rc; +} +#else +static int vsc8531_of_init(struct phy_device *phydev) +{ + return 0; +} +#endif /* CONFIG_OF_MDIO */ + static int vsc85xx_config_init(struct phy_device *phydev) { int rc; + struct vsc8531_private *vsc8531 = phydev->priv; + u8 edge_rate; + + rc = vsc8531_of_init(phydev); + if (rc) + return rc; rc = vsc85xx_default_config(phydev); if (rc) @@ -128,6 +232,12 @@ static int vsc85xx_config_init(struct phy_device *phydev) if (rc) return rc; + edge_rate = edge_rate_magic_get(vsc8531->vddmac, + -(int)vsc8531->edge_slowdown); + rc = vsc85xx_edge_rate_cntl_set(phydev, edge_rate); + if (rc) + return rc; + rc = genphy_config_init(phydev); return rc; @@ -160,6 +270,19 @@ static int vsc85xx_config_intr(struct phy_device *phydev) return rc; } +static int vsc85xx_probe(struct phy_device *phydev) +{ + struct vsc8531_private *vsc8531; + + vsc8531 = devm_kzalloc(&phydev->mdio.dev, sizeof(*vsc8531), GFP_KERNEL); + if (!vsc8531) + return -ENOMEM; + + phydev->priv = vsc8531; + + return 0; +} + /* Microsemi VSC85xx PHYs */ static struct phy_driver vsc85xx_driver[] = { { @@ -177,6 +300,7 @@ static struct phy_driver vsc85xx_driver[] = { .config_intr = &vsc85xx_config_intr, .suspend = &genphy_suspend, .resume = &genphy_resume, + .probe = &vsc85xx_probe, }, { .phy_id = PHY_ID_VSC8541, @@ -193,6 +317,7 @@ static struct phy_driver vsc85xx_driver[] = { .config_intr = &vsc85xx_config_intr, .suspend = &genphy_suspend, .resume = &genphy_resume, + .probe = &vsc85xx_probe, } }; diff --git a/include/dt-bindings/net/mscc-phy-vsc8531.h b/include/dt-bindings/net/mscc-phy-vsc8531.h new file mode 100644 index 000000000000..2383dd20ff43 --- /dev/null +++ b/include/dt-bindings/net/mscc-phy-vsc8531.h @@ -0,0 +1,21 @@ +/* + * Device Tree constants for Microsemi VSC8531 PHY + * + * Author: Nagaraju Lakkaraju + * + * License: Dual MIT/GPL + * Copyright (c) 2016 Microsemi Corporation + */ + +#ifndef _DT_BINDINGS_MSCC_VSC8531_H +#define _DT_BINDINGS_MSCC_VSC8531_H + +/* MAC interface Edge rate control VDDMAC in milli Volts */ +#define MSCC_VDDMAC_3300 3300 +#define MSCC_VDDMAC_2500 2500 +#define MSCC_VDDMAC_1800 1800 +#define MSCC_VDDMAC_1500 1500 +#define MSCC_VDDMAC_MAX 4 +#define MSCC_SLOWDOWN_MAX 8 + +#endif From 93409033ae653f1c9a949202fb537ab095b2092f Mon Sep 17 00:00:00 2001 From: Andrew Collins Date: Mon, 3 Oct 2016 13:43:02 -0600 Subject: [PATCH 1705/1715] net: Add netdev all_adj_list refcnt propagation to fix panic This is a respin of a patch to fix a relatively easily reproducible kernel panic related to the all_adj_list handling for netdevs in recent kernels. The following sequence of commands will reproduce the issue: ip link add link eth0 name eth0.100 type vlan id 100 ip link add link eth0 name eth0.200 type vlan id 200 ip link add name testbr type bridge ip link set eth0.100 master testbr ip link set eth0.200 master testbr ip link add link testbr mac0 type macvlan ip link delete dev testbr This creates an upper/lower tree of (excuse the poor ASCII art): /---eth0.100-eth0 mac0-testbr- \---eth0.200-eth0 When testbr is deleted, the all_adj_lists are walked, and eth0 is deleted twice from the mac0 list. Unfortunately, during setup in __netdev_upper_dev_link, only one reference to eth0 is added, so this results in a panic. This change adds reference count propagation so things are handled properly. Matthias Schiffer reported a similar crash in batman-adv: https://github.com/freifunk-gluon/gluon/issues/680 https://www.open-mesh.org/issues/247 which this patch also seems to resolve. Signed-off-by: Andrew Collins Signed-off-by: David S. Miller --- net/core/dev.c | 68 +++++++++++++++++++++++++++----------------------- 1 file changed, 37 insertions(+), 31 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index c0c291f721d6..f1fe26f66458 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5589,6 +5589,7 @@ static inline bool netdev_adjacent_is_neigh_list(struct net_device *dev, static int __netdev_adjacent_dev_insert(struct net_device *dev, struct net_device *adj_dev, + u16 ref_nr, struct list_head *dev_list, void *private, bool master) { @@ -5598,7 +5599,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, adj = __netdev_find_adj(adj_dev, dev_list); if (adj) { - adj->ref_nr++; + adj->ref_nr += ref_nr; return 0; } @@ -5608,7 +5609,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, adj->dev = adj_dev; adj->master = master; - adj->ref_nr = 1; + adj->ref_nr = ref_nr; adj->private = private; dev_hold(adj_dev); @@ -5647,6 +5648,7 @@ free_adj: static void __netdev_adjacent_dev_remove(struct net_device *dev, struct net_device *adj_dev, + u16 ref_nr, struct list_head *dev_list) { struct netdev_adjacent *adj; @@ -5659,10 +5661,10 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev, BUG(); } - if (adj->ref_nr > 1) { - pr_debug("%s to %s ref_nr-- = %d\n", dev->name, adj_dev->name, - adj->ref_nr-1); - adj->ref_nr--; + if (adj->ref_nr > ref_nr) { + pr_debug("%s to %s ref_nr-%d = %d\n", dev->name, adj_dev->name, + ref_nr, adj->ref_nr-ref_nr); + adj->ref_nr -= ref_nr; return; } @@ -5681,21 +5683,22 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev, static int __netdev_adjacent_dev_link_lists(struct net_device *dev, struct net_device *upper_dev, + u16 ref_nr, struct list_head *up_list, struct list_head *down_list, void *private, bool master) { int ret; - ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list, private, - master); + ret = __netdev_adjacent_dev_insert(dev, upper_dev, ref_nr, up_list, + private, master); if (ret) return ret; - ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list, private, - false); + ret = __netdev_adjacent_dev_insert(upper_dev, dev, ref_nr, down_list, + private, false); if (ret) { - __netdev_adjacent_dev_remove(dev, upper_dev, up_list); + __netdev_adjacent_dev_remove(dev, upper_dev, ref_nr, up_list); return ret; } @@ -5703,9 +5706,10 @@ static int __netdev_adjacent_dev_link_lists(struct net_device *dev, } static int __netdev_adjacent_dev_link(struct net_device *dev, - struct net_device *upper_dev) + struct net_device *upper_dev, + u16 ref_nr) { - return __netdev_adjacent_dev_link_lists(dev, upper_dev, + return __netdev_adjacent_dev_link_lists(dev, upper_dev, ref_nr, &dev->all_adj_list.upper, &upper_dev->all_adj_list.lower, NULL, false); @@ -5713,17 +5717,19 @@ static int __netdev_adjacent_dev_link(struct net_device *dev, static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev, struct net_device *upper_dev, + u16 ref_nr, struct list_head *up_list, struct list_head *down_list) { - __netdev_adjacent_dev_remove(dev, upper_dev, up_list); - __netdev_adjacent_dev_remove(upper_dev, dev, down_list); + __netdev_adjacent_dev_remove(dev, upper_dev, ref_nr, up_list); + __netdev_adjacent_dev_remove(upper_dev, dev, ref_nr, down_list); } static void __netdev_adjacent_dev_unlink(struct net_device *dev, - struct net_device *upper_dev) + struct net_device *upper_dev, + u16 ref_nr) { - __netdev_adjacent_dev_unlink_lists(dev, upper_dev, + __netdev_adjacent_dev_unlink_lists(dev, upper_dev, ref_nr, &dev->all_adj_list.upper, &upper_dev->all_adj_list.lower); } @@ -5732,17 +5738,17 @@ static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, struct net_device *upper_dev, void *private, bool master) { - int ret = __netdev_adjacent_dev_link(dev, upper_dev); + int ret = __netdev_adjacent_dev_link(dev, upper_dev, 1); if (ret) return ret; - ret = __netdev_adjacent_dev_link_lists(dev, upper_dev, + ret = __netdev_adjacent_dev_link_lists(dev, upper_dev, 1, &dev->adj_list.upper, &upper_dev->adj_list.lower, private, master); if (ret) { - __netdev_adjacent_dev_unlink(dev, upper_dev); + __netdev_adjacent_dev_unlink(dev, upper_dev, 1); return ret; } @@ -5752,8 +5758,8 @@ static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, struct net_device *upper_dev) { - __netdev_adjacent_dev_unlink(dev, upper_dev); - __netdev_adjacent_dev_unlink_lists(dev, upper_dev, + __netdev_adjacent_dev_unlink(dev, upper_dev, 1); + __netdev_adjacent_dev_unlink_lists(dev, upper_dev, 1, &dev->adj_list.upper, &upper_dev->adj_list.lower); } @@ -5806,7 +5812,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) { pr_debug("Interlinking %s with %s, non-neighbour\n", i->dev->name, j->dev->name); - ret = __netdev_adjacent_dev_link(i->dev, j->dev); + ret = __netdev_adjacent_dev_link(i->dev, j->dev, i->ref_nr); if (ret) goto rollback_mesh; } @@ -5816,7 +5822,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) { pr_debug("linking %s's upper device %s with %s\n", upper_dev->name, i->dev->name, dev->name); - ret = __netdev_adjacent_dev_link(dev, i->dev); + ret = __netdev_adjacent_dev_link(dev, i->dev, i->ref_nr); if (ret) goto rollback_upper_mesh; } @@ -5825,7 +5831,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, list_for_each_entry(i, &dev->all_adj_list.lower, list) { pr_debug("linking %s's lower device %s with %s\n", dev->name, i->dev->name, upper_dev->name); - ret = __netdev_adjacent_dev_link(i->dev, upper_dev); + ret = __netdev_adjacent_dev_link(i->dev, upper_dev, i->ref_nr); if (ret) goto rollback_lower_mesh; } @@ -5843,7 +5849,7 @@ rollback_lower_mesh: list_for_each_entry(i, &dev->all_adj_list.lower, list) { if (i == to_i) break; - __netdev_adjacent_dev_unlink(i->dev, upper_dev); + __netdev_adjacent_dev_unlink(i->dev, upper_dev, i->ref_nr); } i = NULL; @@ -5853,7 +5859,7 @@ rollback_upper_mesh: list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) { if (i == to_i) break; - __netdev_adjacent_dev_unlink(dev, i->dev); + __netdev_adjacent_dev_unlink(dev, i->dev, i->ref_nr); } i = j = NULL; @@ -5865,7 +5871,7 @@ rollback_mesh: list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) { if (i == to_i && j == to_j) break; - __netdev_adjacent_dev_unlink(i->dev, j->dev); + __netdev_adjacent_dev_unlink(i->dev, j->dev, i->ref_nr); } if (i == to_i) break; @@ -5945,16 +5951,16 @@ void netdev_upper_dev_unlink(struct net_device *dev, */ list_for_each_entry(i, &dev->all_adj_list.lower, list) list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) - __netdev_adjacent_dev_unlink(i->dev, j->dev); + __netdev_adjacent_dev_unlink(i->dev, j->dev, i->ref_nr); /* remove also the devices itself from lower/upper device * list */ list_for_each_entry(i, &dev->all_adj_list.lower, list) - __netdev_adjacent_dev_unlink(i->dev, upper_dev); + __netdev_adjacent_dev_unlink(i->dev, upper_dev, i->ref_nr); list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) - __netdev_adjacent_dev_unlink(dev, i->dev); + __netdev_adjacent_dev_unlink(dev, i->dev, i->ref_nr); call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, &changeupper_info.info); From d8cedaabe71236d27da1ff03d32ab1da06ed041f Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 4 Oct 2016 11:25:47 +1100 Subject: [PATCH 1706/1715] net/ncsi: Avoid unused-value build warning from ia64-linux-gcc xchg() is used to set NCSI channel's state in order for consistent access to the state. xchg()'s return value should be used. Otherwise, one build warning will be raised (with -Wunused-value) as below message indicates. It is reported by ia64-linux-gcc (GCC) 4.9.0. net/ncsi/ncsi-manage.c: In function 'ncsi_channel_monitor': arch/ia64/include/uapi/asm/cmpxchg.h:56:2: warning: value computed is \ not used [-Wunused-value] ((__typeof__(*(ptr))) __xchg((unsigned long) (x), (ptr), sizeof(*(ptr)))) ^ net/ncsi/ncsi-manage.c:202:3: note: in expansion of macro 'xchg' xchg(&nc->state, NCSI_CHANNEL_INACTIVE); This removes the atomic access to NCSI channel's state avoid the above build warning. We have to hold the channel's lock when its state is readed or updated. No functional changes introduced. Signed-off-by: Gavin Shan Reviewed-by: Joel Stanley Signed-off-by: David S. Miller --- net/ncsi/ncsi-aen.c | 37 ++++++++++++++++------ net/ncsi/ncsi-manage.c | 71 ++++++++++++++++++++++++++++++++---------- 2 files changed, 81 insertions(+), 27 deletions(-) diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c index d463468442ae..b41a6617d498 100644 --- a/net/ncsi/ncsi-aen.c +++ b/net/ncsi/ncsi-aen.c @@ -53,7 +53,9 @@ static int ncsi_aen_handler_lsc(struct ncsi_dev_priv *ndp, struct ncsi_aen_lsc_pkt *lsc; struct ncsi_channel *nc; struct ncsi_channel_mode *ncm; - unsigned long old_data; + bool chained; + int state; + unsigned long old_data, data; unsigned long flags; /* Find the NCSI channel */ @@ -62,20 +64,27 @@ static int ncsi_aen_handler_lsc(struct ncsi_dev_priv *ndp, return -ENODEV; /* Update the link status */ - ncm = &nc->modes[NCSI_MODE_LINK]; lsc = (struct ncsi_aen_lsc_pkt *)h; + + spin_lock_irqsave(&nc->lock, flags); + ncm = &nc->modes[NCSI_MODE_LINK]; old_data = ncm->data[2]; - ncm->data[2] = ntohl(lsc->status); + data = ntohl(lsc->status); + ncm->data[2] = data; ncm->data[4] = ntohl(lsc->oem_status); - if (!((old_data ^ ncm->data[2]) & 0x1) || - !list_empty(&nc->link)) + + chained = !list_empty(&nc->link); + state = nc->state; + spin_unlock_irqrestore(&nc->lock, flags); + + if (!((old_data ^ data) & 0x1) || chained) return 0; - if (!(nc->state == NCSI_CHANNEL_INACTIVE && (ncm->data[2] & 0x1)) && - !(nc->state == NCSI_CHANNEL_ACTIVE && !(ncm->data[2] & 0x1))) + if (!(state == NCSI_CHANNEL_INACTIVE && (data & 0x1)) && + !(state == NCSI_CHANNEL_ACTIVE && !(data & 0x1))) return 0; if (!(ndp->flags & NCSI_DEV_HWA) && - nc->state == NCSI_CHANNEL_ACTIVE) + state == NCSI_CHANNEL_ACTIVE) ndp->flags |= NCSI_DEV_RESHUFFLE; ncsi_stop_channel_monitor(nc); @@ -97,13 +106,21 @@ static int ncsi_aen_handler_cr(struct ncsi_dev_priv *ndp, if (!nc) return -ENODEV; + spin_lock_irqsave(&nc->lock, flags); if (!list_empty(&nc->link) || - nc->state != NCSI_CHANNEL_ACTIVE) + nc->state != NCSI_CHANNEL_ACTIVE) { + spin_unlock_irqrestore(&nc->lock, flags); return 0; + } + spin_unlock_irqrestore(&nc->lock, flags); ncsi_stop_channel_monitor(nc); + spin_lock_irqsave(&nc->lock, flags); + nc->state = NCSI_CHANNEL_INVISIBLE; + spin_unlock_irqrestore(&nc->lock, flags); + spin_lock_irqsave(&ndp->lock, flags); - xchg(&nc->state, NCSI_CHANNEL_INACTIVE); + nc->state = NCSI_CHANNEL_INACTIVE; list_add_tail_rcu(&nc->link, &ndp->channel_queue); spin_unlock_irqrestore(&ndp->lock, flags); diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index ef017b871857..a26ce5132549 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -132,6 +132,7 @@ static void ncsi_report_link(struct ncsi_dev_priv *ndp, bool force_down) struct ncsi_dev *nd = &ndp->ndev; struct ncsi_package *np; struct ncsi_channel *nc; + unsigned long flags; nd->state = ncsi_dev_state_functional; if (force_down) { @@ -142,14 +143,21 @@ static void ncsi_report_link(struct ncsi_dev_priv *ndp, bool force_down) nd->link_up = 0; NCSI_FOR_EACH_PACKAGE(ndp, np) { NCSI_FOR_EACH_CHANNEL(np, nc) { + spin_lock_irqsave(&nc->lock, flags); + if (!list_empty(&nc->link) || - nc->state != NCSI_CHANNEL_ACTIVE) + nc->state != NCSI_CHANNEL_ACTIVE) { + spin_unlock_irqrestore(&nc->lock, flags); continue; + } if (nc->modes[NCSI_MODE_LINK].data[2] & 0x1) { + spin_unlock_irqrestore(&nc->lock, flags); nd->link_up = 1; goto report; } + + spin_unlock_irqrestore(&nc->lock, flags); } } @@ -163,20 +171,22 @@ static void ncsi_channel_monitor(unsigned long data) struct ncsi_package *np = nc->package; struct ncsi_dev_priv *ndp = np->ndp; struct ncsi_cmd_arg nca; - bool enabled; + bool enabled, chained; unsigned int timeout; unsigned long flags; - int ret; + int state, ret; spin_lock_irqsave(&nc->lock, flags); + state = nc->state; + chained = !list_empty(&nc->link); timeout = nc->timeout; enabled = nc->enabled; spin_unlock_irqrestore(&nc->lock, flags); - if (!enabled || !list_empty(&nc->link)) + if (!enabled || chained) return; - if (nc->state != NCSI_CHANNEL_INACTIVE && - nc->state != NCSI_CHANNEL_ACTIVE) + if (state != NCSI_CHANNEL_INACTIVE && + state != NCSI_CHANNEL_ACTIVE) return; if (!(timeout % 2)) { @@ -195,11 +205,15 @@ static void ncsi_channel_monitor(unsigned long data) if (timeout + 1 >= 3) { if (!(ndp->flags & NCSI_DEV_HWA) && - nc->state == NCSI_CHANNEL_ACTIVE) + state == NCSI_CHANNEL_ACTIVE) ncsi_report_link(ndp, true); + spin_lock_irqsave(&nc->lock, flags); + nc->state = NCSI_CHANNEL_INVISIBLE; + spin_unlock_irqrestore(&nc->lock, flags); + spin_lock_irqsave(&ndp->lock, flags); - xchg(&nc->state, NCSI_CHANNEL_INACTIVE); + nc->state = NCSI_CHANNEL_INACTIVE; list_add_tail_rcu(&nc->link, &ndp->channel_queue); spin_unlock_irqrestore(&ndp->lock, flags); ncsi_process_next_channel(ndp); @@ -508,6 +522,7 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp) struct ncsi_package *np = ndp->active_package; struct ncsi_channel *nc = ndp->active_channel; struct ncsi_cmd_arg nca; + unsigned long flags; int ret; nca.ndp = ndp; @@ -556,7 +571,9 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp) break; case ncsi_dev_state_suspend_done: - xchg(&nc->state, NCSI_CHANNEL_INACTIVE); + spin_lock_irqsave(&nc->lock, flags); + nc->state = NCSI_CHANNEL_INACTIVE; + spin_unlock_irqrestore(&nc->lock, flags); ncsi_process_next_channel(ndp); break; @@ -574,6 +591,7 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) struct ncsi_channel *nc = ndp->active_channel; struct ncsi_cmd_arg nca; unsigned char index; + unsigned long flags; int ret; nca.ndp = ndp; @@ -675,10 +693,12 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) goto error; break; case ncsi_dev_state_config_done: + spin_lock_irqsave(&nc->lock, flags); if (nc->modes[NCSI_MODE_LINK].data[2] & 0x1) - xchg(&nc->state, NCSI_CHANNEL_ACTIVE); + nc->state = NCSI_CHANNEL_ACTIVE; else - xchg(&nc->state, NCSI_CHANNEL_INACTIVE); + nc->state = NCSI_CHANNEL_INACTIVE; + spin_unlock_irqrestore(&nc->lock, flags); ncsi_start_channel_monitor(nc); ncsi_process_next_channel(ndp); @@ -707,18 +727,25 @@ static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp) found = NULL; NCSI_FOR_EACH_PACKAGE(ndp, np) { NCSI_FOR_EACH_CHANNEL(np, nc) { + spin_lock_irqsave(&nc->lock, flags); + if (!list_empty(&nc->link) || - nc->state != NCSI_CHANNEL_INACTIVE) + nc->state != NCSI_CHANNEL_INACTIVE) { + spin_unlock_irqrestore(&nc->lock, flags); continue; + } if (!found) found = nc; ncm = &nc->modes[NCSI_MODE_LINK]; if (ncm->data[2] & 0x1) { + spin_unlock_irqrestore(&nc->lock, flags); found = nc; goto out; } + + spin_unlock_irqrestore(&nc->lock, flags); } } @@ -987,11 +1014,14 @@ int ncsi_process_next_channel(struct ncsi_dev_priv *ndp) goto out; } - old_state = xchg(&nc->state, NCSI_CHANNEL_INVISIBLE); list_del_init(&nc->link); - spin_unlock_irqrestore(&ndp->lock, flags); + spin_lock_irqsave(&nc->lock, flags); + old_state = nc->state; + nc->state = NCSI_CHANNEL_INVISIBLE; + spin_unlock_irqrestore(&nc->lock, flags); + ndp->active_channel = nc; ndp->active_package = nc->package; @@ -1006,7 +1036,7 @@ int ncsi_process_next_channel(struct ncsi_dev_priv *ndp) break; default: netdev_err(ndp->ndev.dev, "Invalid state 0x%x on %d:%d\n", - nc->state, nc->package->id, nc->id); + old_state, nc->package->id, nc->id); ncsi_report_link(ndp, false); return -EINVAL; } @@ -1151,6 +1181,8 @@ int ncsi_start_dev(struct ncsi_dev *nd) struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd); struct ncsi_package *np; struct ncsi_channel *nc; + unsigned long flags; + bool chained; int old_state, ret; if (nd->state != ncsi_dev_state_registered && @@ -1166,8 +1198,13 @@ int ncsi_start_dev(struct ncsi_dev *nd) /* Reset channel's state and start over */ NCSI_FOR_EACH_PACKAGE(ndp, np) { NCSI_FOR_EACH_CHANNEL(np, nc) { - old_state = xchg(&nc->state, NCSI_CHANNEL_INACTIVE); - WARN_ON_ONCE(!list_empty(&nc->link) || + spin_lock_irqsave(&nc->lock, flags); + chained = !list_empty(&nc->link); + old_state = nc->state; + nc->state = NCSI_CHANNEL_INACTIVE; + spin_unlock_irqrestore(&nc->lock, flags); + + WARN_ON_ONCE(chained || old_state == NCSI_CHANNEL_INVISIBLE); } } From bc7e0f50aa6958676115bffc1e5e58703579e04b Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 4 Oct 2016 11:25:48 +1100 Subject: [PATCH 1707/1715] net/ncsi: Introduce NCSI_RESERVED_CHANNEL This defines NCSI_RESERVED_CHANNEL as the reserved NCSI channel ID (0x1f). No logical changes introduced. Signed-off-by: Gavin Shan Reviewed-by: Joel Stanley Signed-off-by: David S. Miller --- net/ncsi/internal.h | 1 + net/ncsi/ncsi-manage.c | 14 +++++++------- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h index 33738c060547..66dc851d49ee 100644 --- a/net/ncsi/internal.h +++ b/net/ncsi/internal.h @@ -170,6 +170,7 @@ struct ncsi_package; #define NCSI_PACKAGE_SHIFT 5 #define NCSI_PACKAGE_INDEX(c) (((c) >> NCSI_PACKAGE_SHIFT) & 0x7) +#define NCSI_RESERVED_CHANNEL 0x1f #define NCSI_CHANNEL_INDEX(c) ((c) & ((1 << NCSI_PACKAGE_SHIFT) - 1)) #define NCSI_TO_CHANNEL(p, c) (((p) << NCSI_PACKAGE_SHIFT) | (c)) diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index a26ce5132549..97c99bee8b68 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -542,7 +542,7 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp) nca.package = np->id; if (nd->state == ncsi_dev_state_suspend_select) { nca.type = NCSI_PKT_CMD_SP; - nca.channel = 0x1f; + nca.channel = NCSI_RESERVED_CHANNEL; if (ndp->flags & NCSI_DEV_HWA) nca.bytes[0] = 0; else @@ -559,7 +559,7 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp) nd->state = ncsi_dev_state_suspend_deselect; } else if (nd->state == ncsi_dev_state_suspend_deselect) { nca.type = NCSI_PKT_CMD_DP; - nca.channel = 0x1f; + nca.channel = NCSI_RESERVED_CHANNEL; nd->state = ncsi_dev_state_suspend_done; } @@ -608,7 +608,7 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) else nca.bytes[0] = 1; nca.package = np->id; - nca.channel = 0x1f; + nca.channel = NCSI_RESERVED_CHANNEL; ret = ncsi_xmit_cmd(&nca); if (ret) goto error; @@ -834,7 +834,7 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) /* Deselect all possible packages */ nca.type = NCSI_PKT_CMD_DP; - nca.channel = 0x1f; + nca.channel = NCSI_RESERVED_CHANNEL; for (index = 0; index < 8; index++) { nca.package = index; ret = ncsi_xmit_cmd(&nca); @@ -850,7 +850,7 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) /* Select all possible packages */ nca.type = NCSI_PKT_CMD_SP; nca.bytes[0] = 1; - nca.channel = 0x1f; + nca.channel = NCSI_RESERVED_CHANNEL; for (index = 0; index < 8; index++) { nca.package = index; ret = ncsi_xmit_cmd(&nca); @@ -903,7 +903,7 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) nca.type = NCSI_PKT_CMD_SP; nca.bytes[0] = 1; nca.package = ndp->active_package->id; - nca.channel = 0x1f; + nca.channel = NCSI_RESERVED_CHANNEL; ret = ncsi_xmit_cmd(&nca); if (ret) goto error; @@ -960,7 +960,7 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) /* Deselect the active package */ nca.type = NCSI_PKT_CMD_DP; nca.package = ndp->active_package->id; - nca.channel = 0x1f; + nca.channel = NCSI_RESERVED_CHANNEL; ret = ncsi_xmit_cmd(&nca); if (ret) goto error; From 55e02d0837fb4cf023832252847bfbff453603cc Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 4 Oct 2016 11:25:49 +1100 Subject: [PATCH 1708/1715] net/ncsi: Don't probe on the reserved channel ID (0x1f) We needn't send CIS (Clear Initial State) command to the NCSI reserved channel (0x1f) in the enumeration. We shouldn't receive a valid response from CIS on NCSI channel 0x1f. Signed-off-by: Gavin Shan Reviewed-by: Joel Stanley Signed-off-by: David S. Miller --- net/ncsi/ncsi-manage.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index 97c99bee8b68..8c5e0160d578 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -911,12 +911,12 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) nd->state = ncsi_dev_state_probe_cis; break; case ncsi_dev_state_probe_cis: - ndp->pending_req_num = 32; + ndp->pending_req_num = NCSI_RESERVED_CHANNEL; /* Clear initial state */ nca.type = NCSI_PKT_CMD_CIS; nca.package = ndp->active_package->id; - for (index = 0; index < 0x20; index++) { + for (index = 0; index < NCSI_RESERVED_CHANNEL; index++) { nca.channel = index; ret = ncsi_xmit_cmd(&nca); if (ret) From a15af54f8f2a32d629781417503843bfbd02a004 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 4 Oct 2016 11:25:50 +1100 Subject: [PATCH 1709/1715] net/ncsi: Rework request index allocation The NCSI request index (struct ncsi_request::id) is put into instance ID (IID) field while sending NCSI command packet. It was designed the available IDs are given in round-robin fashion. @ndp->request_id was introduced to represent the next available ID, but it has been used as number of successively allocated IDs. It breaks the round-robin design. Besides, we shouldn't put 0 to NCSI command packet's IID field, meaning ID#0 should be reserved according section 6.3.1.1 in NCSI spec (v1.1.0). This fixes above two issues. With it applied, the available IDs will be assigned in round-robin fashion and ID#0 won't be assigned. Signed-off-by: Gavin Shan Reviewed-by: Joel Stanley Signed-off-by: David S. Miller --- net/ncsi/internal.h | 1 + net/ncsi/ncsi-manage.c | 17 +++++++++-------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h index 66dc851d49ee..c956fe8d80c3 100644 --- a/net/ncsi/internal.h +++ b/net/ncsi/internal.h @@ -259,6 +259,7 @@ struct ncsi_dev_priv { struct list_head packages; /* List of packages */ struct ncsi_request requests[256]; /* Request table */ unsigned int request_id; /* Last used request ID */ +#define NCSI_REQ_START_IDX 1 unsigned int pending_req_num; /* Number of pending requests */ struct ncsi_package *active_package; /* Currently handled package */ struct ncsi_channel *active_channel; /* Currently handled channel */ diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index 8c5e0160d578..00ce2c7fdb15 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -427,30 +427,31 @@ struct ncsi_request *ncsi_alloc_request(struct ncsi_dev_priv *ndp, bool driven) /* Check if there is one available request until the ceiling */ spin_lock_irqsave(&ndp->lock, flags); - for (i = ndp->request_id; !nr && i < limit; i++) { + for (i = ndp->request_id; i < limit; i++) { if (ndp->requests[i].used) continue; nr = &ndp->requests[i]; nr->used = true; nr->driven = driven; - if (++ndp->request_id >= limit) - ndp->request_id = 0; + ndp->request_id = i + 1; + goto found; } /* Fail back to check from the starting cursor */ - for (i = 0; !nr && i < ndp->request_id; i++) { + for (i = NCSI_REQ_START_IDX; i < ndp->request_id; i++) { if (ndp->requests[i].used) continue; nr = &ndp->requests[i]; nr->used = true; nr->driven = driven; - if (++ndp->request_id >= limit) - ndp->request_id = 0; + ndp->request_id = i + 1; + goto found; } - spin_unlock_irqrestore(&ndp->lock, flags); +found: + spin_unlock_irqrestore(&ndp->lock, flags); return nr; } @@ -1148,7 +1149,7 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev, /* Initialize private NCSI device */ spin_lock_init(&ndp->lock); INIT_LIST_HEAD(&ndp->packages); - ndp->request_id = 0; + ndp->request_id = NCSI_REQ_START_IDX; for (i = 0; i < ARRAY_SIZE(ndp->requests); i++) { ndp->requests[i].id = i; ndp->requests[i].ndp = ndp; From a0509cbeef5dafbab42c42622e012bcc94c3eb9e Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 4 Oct 2016 11:25:51 +1100 Subject: [PATCH 1710/1715] net/ncsi: Allow to extend NCSI request properties There is only one NCSI request property for now: the response for the sent command need drive the workqueue or not. So we had one field (@driven) for the purpose. We lost the flexibility to extend NCSI request properties. This replaces @driven with @flags and @req_flags in NCSI request and NCSI command argument struct. Each bit of the newly introduced field can be used for one property. No functional changes introduced. Signed-off-by: Gavin Shan Reviewed-by: Joel Stanley Signed-off-by: David S. Miller --- net/ncsi/internal.h | 8 +++++--- net/ncsi/ncsi-cmd.c | 2 +- net/ncsi/ncsi-manage.c | 19 ++++++++++--------- net/ncsi/ncsi-rsp.c | 2 +- 4 files changed, 17 insertions(+), 14 deletions(-) diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h index c956fe8d80c3..26e929595b5e 100644 --- a/net/ncsi/internal.h +++ b/net/ncsi/internal.h @@ -207,7 +207,8 @@ struct ncsi_package { struct ncsi_request { unsigned char id; /* Request ID - 0 to 255 */ bool used; /* Request that has been assigned */ - bool driven; /* Drive state machine */ + unsigned int flags; /* NCSI request property */ +#define NCSI_REQ_FLAG_EVENT_DRIVEN 1 struct ncsi_dev_priv *ndp; /* Associated NCSI device */ struct sk_buff *cmd; /* Associated NCSI command packet */ struct sk_buff *rsp; /* Associated NCSI response packet */ @@ -276,7 +277,7 @@ struct ncsi_cmd_arg { unsigned char package; /* Destination package ID */ unsigned char channel; /* Detination channel ID or 0x1f */ unsigned short payload; /* Command packet payload length */ - bool driven; /* Drive the state machine? */ + unsigned int req_flags; /* NCSI request properties */ union { unsigned char bytes[16]; /* Command packet specific data */ unsigned short words[8]; @@ -315,7 +316,8 @@ void ncsi_find_package_and_channel(struct ncsi_dev_priv *ndp, unsigned char id, struct ncsi_package **np, struct ncsi_channel **nc); -struct ncsi_request *ncsi_alloc_request(struct ncsi_dev_priv *ndp, bool driven); +struct ncsi_request *ncsi_alloc_request(struct ncsi_dev_priv *ndp, + unsigned int req_flags); void ncsi_free_request(struct ncsi_request *nr); struct ncsi_dev *ncsi_find_dev(struct net_device *dev); int ncsi_process_next_channel(struct ncsi_dev_priv *ndp); diff --git a/net/ncsi/ncsi-cmd.c b/net/ncsi/ncsi-cmd.c index 21057a8ceeac..db7083bfd476 100644 --- a/net/ncsi/ncsi-cmd.c +++ b/net/ncsi/ncsi-cmd.c @@ -272,7 +272,7 @@ static struct ncsi_request *ncsi_alloc_command(struct ncsi_cmd_arg *nca) struct sk_buff *skb; struct ncsi_request *nr; - nr = ncsi_alloc_request(ndp, nca->driven); + nr = ncsi_alloc_request(ndp, nca->req_flags); if (!nr) return NULL; diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index 00ce2c7fdb15..adf5401817c2 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -194,7 +194,7 @@ static void ncsi_channel_monitor(unsigned long data) nca.package = np->id; nca.channel = nc->id; nca.type = NCSI_PKT_CMD_GLS; - nca.driven = false; + nca.req_flags = 0; ret = ncsi_xmit_cmd(&nca); if (ret) { netdev_err(ndp->ndev.dev, "Error %d sending GLS\n", @@ -419,7 +419,8 @@ void ncsi_find_package_and_channel(struct ncsi_dev_priv *ndp, * be same. Otherwise, the bogus response might be replied. So * the available IDs are allocated in round-robin fashion. */ -struct ncsi_request *ncsi_alloc_request(struct ncsi_dev_priv *ndp, bool driven) +struct ncsi_request *ncsi_alloc_request(struct ncsi_dev_priv *ndp, + unsigned int req_flags) { struct ncsi_request *nr = NULL; int i, limit = ARRAY_SIZE(ndp->requests); @@ -433,7 +434,7 @@ struct ncsi_request *ncsi_alloc_request(struct ncsi_dev_priv *ndp, bool driven) nr = &ndp->requests[i]; nr->used = true; - nr->driven = driven; + nr->flags = req_flags; ndp->request_id = i + 1; goto found; } @@ -445,7 +446,7 @@ struct ncsi_request *ncsi_alloc_request(struct ncsi_dev_priv *ndp, bool driven) nr = &ndp->requests[i]; nr->used = true; - nr->driven = driven; + nr->flags = req_flags; ndp->request_id = i + 1; goto found; } @@ -473,7 +474,7 @@ void ncsi_free_request(struct ncsi_request *nr) nr->cmd = NULL; nr->rsp = NULL; nr->used = false; - driven = nr->driven; + driven = !!(nr->flags & NCSI_REQ_FLAG_EVENT_DRIVEN); spin_unlock_irqrestore(&ndp->lock, flags); if (driven && cmd && --ndp->pending_req_num == 0) @@ -527,7 +528,7 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp) int ret; nca.ndp = ndp; - nca.driven = true; + nca.req_flags = NCSI_REQ_FLAG_EVENT_DRIVEN; switch (nd->state) { case ncsi_dev_state_suspend: nd->state = ncsi_dev_state_suspend_select; @@ -596,7 +597,7 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) int ret; nca.ndp = ndp; - nca.driven = true; + nca.req_flags = NCSI_REQ_FLAG_EVENT_DRIVEN; switch (nd->state) { case ncsi_dev_state_config: case ncsi_dev_state_config_sp: @@ -825,7 +826,7 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) int ret; nca.ndp = ndp; - nca.driven = true; + nca.req_flags = NCSI_REQ_FLAG_EVENT_DRIVEN; switch (nd->state) { case ncsi_dev_state_probe: nd->state = ncsi_dev_state_probe_deselect; @@ -1101,7 +1102,7 @@ static int ncsi_inet6addr_event(struct notifier_block *this, return NOTIFY_OK; nca.ndp = ndp; - nca.driven = false; + nca.req_flags = 0; nca.package = np->id; nca.channel = nc->id; nca.dwords[0] = nc->caps[NCSI_CAP_MC].cap; diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index af84389a6bf1..86cdaebd8d9e 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -317,7 +317,7 @@ static int ncsi_rsp_handler_gls(struct ncsi_request *nr) ncm->data[3] = ntohl(rsp->other); ncm->data[4] = ntohl(rsp->oem_status); - if (nr->driven) + if (nr->flags & NCSI_REQ_FLAG_EVENT_DRIVEN) return 0; /* Reset the channel monitor if it has been enabled */ From 83afdc6aad9d767cae271df1ca15641b9cbe3bfe Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 4 Oct 2016 11:25:52 +1100 Subject: [PATCH 1711/1715] net/ncsi: Rework the channel monitoring The original NCSI channel monitoring was implemented based on a backoff algorithm: the GLS response should be received in the specified interval. Otherwise, the channel is regarded as dead and failover should be taken if current channel is an active one. There are several problems in the implementation: (A) On BCM5718, we found when the IID (Instance ID) in the GLS command packet changes from 255 to 1, the response corresponding to IID#1 never comes in. It means we cannot make the unfair judgement that the channel is dead when one response is missed. (B) The code's readability should be improved. (C) We should do failover when current channel is active one and the channel monitoring should be marked as disabled before doing failover. This reworks the channel monitoring to address all above issues. The fields for channel monitoring is put into separate struct and the state of channel monitoring is predefined. The channel is regarded alive if the network controller responses to one of two GLS commands or both of them in 5 seconds. Signed-off-by: Gavin Shan Reviewed-by: Joel Stanley Signed-off-by: David S. Miller --- net/ncsi/internal.h | 12 +++++++++--- net/ncsi/ncsi-manage.c | 44 ++++++++++++++++++++++++------------------ net/ncsi/ncsi-rsp.c | 2 +- 3 files changed, 35 insertions(+), 23 deletions(-) diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h index 26e929595b5e..13290a70fa71 100644 --- a/net/ncsi/internal.h +++ b/net/ncsi/internal.h @@ -187,9 +187,15 @@ struct ncsi_channel { struct ncsi_channel_mode modes[NCSI_MODE_MAX]; struct ncsi_channel_filter *filters[NCSI_FILTER_MAX]; struct ncsi_channel_stats stats; - struct timer_list timer; /* Link monitor timer */ - bool enabled; /* Timer is enabled */ - unsigned int timeout; /* Times of timeout */ + struct { + struct timer_list timer; + bool enabled; + unsigned int state; +#define NCSI_CHANNEL_MONITOR_START 0 +#define NCSI_CHANNEL_MONITOR_RETRY 1 +#define NCSI_CHANNEL_MONITOR_WAIT 2 +#define NCSI_CHANNEL_MONITOR_WAIT_MAX 5 + } monitor; struct list_head node; struct list_head link; }; diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index adf5401817c2..4742c7c6c748 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -172,15 +172,15 @@ static void ncsi_channel_monitor(unsigned long data) struct ncsi_dev_priv *ndp = np->ndp; struct ncsi_cmd_arg nca; bool enabled, chained; - unsigned int timeout; + unsigned int monitor_state; unsigned long flags; int state, ret; spin_lock_irqsave(&nc->lock, flags); state = nc->state; chained = !list_empty(&nc->link); - timeout = nc->timeout; - enabled = nc->enabled; + enabled = nc->monitor.enabled; + monitor_state = nc->monitor.state; spin_unlock_irqrestore(&nc->lock, flags); if (!enabled || chained) @@ -189,7 +189,9 @@ static void ncsi_channel_monitor(unsigned long data) state != NCSI_CHANNEL_ACTIVE) return; - if (!(timeout % 2)) { + switch (monitor_state) { + case NCSI_CHANNEL_MONITOR_START: + case NCSI_CHANNEL_MONITOR_RETRY: nca.ndp = ndp; nca.package = np->id; nca.channel = nc->id; @@ -201,12 +203,16 @@ static void ncsi_channel_monitor(unsigned long data) ret); return; } - } - if (timeout + 1 >= 3) { + break; + case NCSI_CHANNEL_MONITOR_WAIT ... NCSI_CHANNEL_MONITOR_WAIT_MAX: + break; + default: if (!(ndp->flags & NCSI_DEV_HWA) && - state == NCSI_CHANNEL_ACTIVE) + state == NCSI_CHANNEL_ACTIVE) { ncsi_report_link(ndp, true); + ndp->flags |= NCSI_DEV_RESHUFFLE; + } spin_lock_irqsave(&nc->lock, flags); nc->state = NCSI_CHANNEL_INVISIBLE; @@ -221,10 +227,9 @@ static void ncsi_channel_monitor(unsigned long data) } spin_lock_irqsave(&nc->lock, flags); - nc->timeout = timeout + 1; - nc->enabled = true; + nc->monitor.state++; spin_unlock_irqrestore(&nc->lock, flags); - mod_timer(&nc->timer, jiffies + HZ * (1 << (nc->timeout / 2))); + mod_timer(&nc->monitor.timer, jiffies + HZ); } void ncsi_start_channel_monitor(struct ncsi_channel *nc) @@ -232,12 +237,12 @@ void ncsi_start_channel_monitor(struct ncsi_channel *nc) unsigned long flags; spin_lock_irqsave(&nc->lock, flags); - WARN_ON_ONCE(nc->enabled); - nc->timeout = 0; - nc->enabled = true; + WARN_ON_ONCE(nc->monitor.enabled); + nc->monitor.enabled = true; + nc->monitor.state = NCSI_CHANNEL_MONITOR_START; spin_unlock_irqrestore(&nc->lock, flags); - mod_timer(&nc->timer, jiffies + HZ * (1 << (nc->timeout / 2))); + mod_timer(&nc->monitor.timer, jiffies + HZ); } void ncsi_stop_channel_monitor(struct ncsi_channel *nc) @@ -245,14 +250,14 @@ void ncsi_stop_channel_monitor(struct ncsi_channel *nc) unsigned long flags; spin_lock_irqsave(&nc->lock, flags); - if (!nc->enabled) { + if (!nc->monitor.enabled) { spin_unlock_irqrestore(&nc->lock, flags); return; } - nc->enabled = false; + nc->monitor.enabled = false; spin_unlock_irqrestore(&nc->lock, flags); - del_timer_sync(&nc->timer); + del_timer_sync(&nc->monitor.timer); } struct ncsi_channel *ncsi_find_channel(struct ncsi_package *np, @@ -281,8 +286,9 @@ struct ncsi_channel *ncsi_add_channel(struct ncsi_package *np, unsigned char id) nc->id = id; nc->package = np; nc->state = NCSI_CHANNEL_INACTIVE; - nc->enabled = false; - setup_timer(&nc->timer, ncsi_channel_monitor, (unsigned long)nc); + nc->monitor.enabled = false; + setup_timer(&nc->monitor.timer, + ncsi_channel_monitor, (unsigned long)nc); spin_lock_init(&nc->lock); INIT_LIST_HEAD(&nc->link); for (index = 0; index < NCSI_CAP_MAX; index++) diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index 86cdaebd8d9e..087db775b3dc 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -322,7 +322,7 @@ static int ncsi_rsp_handler_gls(struct ncsi_request *nr) /* Reset the channel monitor if it has been enabled */ spin_lock_irqsave(&nc->lock, flags); - nc->timeout = 0; + nc->monitor.state = NCSI_CHANNEL_MONITOR_START; spin_unlock_irqrestore(&nc->lock, flags); return 0; From c0cd1ba4f8bd8b5fef43bc51a2983673b8f086ff Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 4 Oct 2016 11:25:53 +1100 Subject: [PATCH 1712/1715] net/ncsi: Introduce ncsi_stop_dev() This introduces ncsi_stop_dev(), as counterpart to ncsi_start_dev(), to stop the NCSI device so that it can be reenabled in future. This API should be called when the network device driver is going to shutdown the device. There are 3 things done in the function: Stop the channel monitoring; Reset channels to inactive state; Report NCSI link down. Signed-off-by: Gavin Shan Reviewed-by: Joel Stanley Signed-off-by: David S. Miller --- include/net/ncsi.h | 5 +++++ net/ncsi/ncsi-manage.c | 37 ++++++++++++++++++++++++------------- 2 files changed, 29 insertions(+), 13 deletions(-) diff --git a/include/net/ncsi.h b/include/net/ncsi.h index 1dbf42f79750..68680baac0fd 100644 --- a/include/net/ncsi.h +++ b/include/net/ncsi.h @@ -31,6 +31,7 @@ struct ncsi_dev { struct ncsi_dev *ncsi_register_dev(struct net_device *dev, void (*notifier)(struct ncsi_dev *nd)); int ncsi_start_dev(struct ncsi_dev *nd); +void ncsi_stop_dev(struct ncsi_dev *nd); void ncsi_unregister_dev(struct ncsi_dev *nd); #else /* !CONFIG_NET_NCSI */ static inline struct ncsi_dev *ncsi_register_dev(struct net_device *dev, @@ -44,6 +45,10 @@ static inline int ncsi_start_dev(struct ncsi_dev *nd) return -ENOTTY; } +static void ncsi_stop_dev(struct ncsi_dev *nd) +{ +} + static inline void ncsi_unregister_dev(struct ncsi_dev *nd) { } diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index 4742c7c6c748..5e509e547c2d 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -1187,11 +1187,7 @@ EXPORT_SYMBOL_GPL(ncsi_register_dev); int ncsi_start_dev(struct ncsi_dev *nd) { struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd); - struct ncsi_package *np; - struct ncsi_channel *nc; - unsigned long flags; - bool chained; - int old_state, ret; + int ret; if (nd->state != ncsi_dev_state_registered && nd->state != ncsi_dev_state_functional) @@ -1203,9 +1199,29 @@ int ncsi_start_dev(struct ncsi_dev *nd) return 0; } - /* Reset channel's state and start over */ + if (ndp->flags & NCSI_DEV_HWA) + ret = ncsi_enable_hwa(ndp); + else + ret = ncsi_choose_active_channel(ndp); + + return ret; +} +EXPORT_SYMBOL_GPL(ncsi_start_dev); + +void ncsi_stop_dev(struct ncsi_dev *nd) +{ + struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd); + struct ncsi_package *np; + struct ncsi_channel *nc; + bool chained; + int old_state; + unsigned long flags; + + /* Stop the channel monitor and reset channel's state */ NCSI_FOR_EACH_PACKAGE(ndp, np) { NCSI_FOR_EACH_CHANNEL(np, nc) { + ncsi_stop_channel_monitor(nc); + spin_lock_irqsave(&nc->lock, flags); chained = !list_empty(&nc->link); old_state = nc->state; @@ -1217,14 +1233,9 @@ int ncsi_start_dev(struct ncsi_dev *nd) } } - if (ndp->flags & NCSI_DEV_HWA) - ret = ncsi_enable_hwa(ndp); - else - ret = ncsi_choose_active_channel(ndp); - - return ret; + ncsi_report_link(ndp, true); } -EXPORT_SYMBOL_GPL(ncsi_start_dev); +EXPORT_SYMBOL_GPL(ncsi_stop_dev); void ncsi_unregister_dev(struct ncsi_dev *nd) { From 2c15f25b2923435515298589dcaa2eace6a948c1 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 4 Oct 2016 11:25:54 +1100 Subject: [PATCH 1713/1715] net/faraday: Stop NCSI device on shutdown This stops NCSI device when closing the network device so that the NCSI device can be reenabled later. Signed-off-by: Gavin Shan Reviewed-by: Joel Stanley Signed-off-by: David S. Miller --- drivers/net/ethernet/faraday/ftgmac100.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 90f9c5481290..262587240c86 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -1190,6 +1190,8 @@ static int ftgmac100_stop(struct net_device *netdev) napi_disable(&priv->napi); if (netdev->phydev) phy_stop(netdev->phydev); + else if (priv->use_ncsi) + ncsi_stop_dev(priv->ndev); ftgmac100_stop_hw(priv); free_irq(priv->irq, netdev); From feb7d387a6cf6c1ec66d4a2b6d4b2cc52309876e Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 4 Oct 2016 09:46:04 +0200 Subject: [PATCH 1714/1715] mlxsw: spectrum: Fix misuse of hard_header_len In order to specify that the mlxsw spectrum driver needs additional headroom for packets, there have been use of the hard_header_len field of the netdevice struct. This commit changes that to use needed_headroom instead, as this is the correct way to do that. Fixes: 56ade8fe3fe1 ("mlxsw: spectrum: Add initial support for Spectrum ASIC") Signed-off-by: Yotam Gigi Acked-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index fd74d1064ff3..1ec0a4ce3c46 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -2285,7 +2285,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, /* Each packet needs to have a Tx header (metadata) on top all other * headers. */ - dev->hard_header_len += MLXSW_TXHDR_LEN; + dev->needed_headroom = MLXSW_TXHDR_LEN; err = mlxsw_sp_port_system_port_mapping_set(mlxsw_sp_port); if (err) { From 251d41c58b765f00d73b1b4230cad256e25f2735 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 4 Oct 2016 09:46:05 +0200 Subject: [PATCH 1715/1715] mlxsw: switchx2: Fix misuse of hard_header_len In order to specify that the mlxsw switchx2 driver needs additional headroom for packets, there have been use of the hard_header_len field of the netdevice struct. This commit changes that to use needed_headroom instead, as this is the correct way to do that. Fixes: 31557f0f9755 ("mlxsw: Introduce Mellanox SwitchX-2 ASIC support") Signed-off-by: Yotam Gigi Acked-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/switchx2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index 8b15bf0c744d..c0c23e2f3275 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -997,7 +997,7 @@ static int mlxsw_sx_port_create(struct mlxsw_sx *mlxsw_sx, u8 local_port) /* Each packet needs to have a Tx header (metadata) on top all other * headers. */ - dev->hard_header_len += MLXSW_TXHDR_LEN; + dev->needed_headroom = MLXSW_TXHDR_LEN; err = mlxsw_sx_port_module_check(mlxsw_sx_port, &usable); if (err) {